Merge branches 'hammer2' and 'master' of ssh://crater.dragonflybsd.org/repository...
[dragonfly.git] / sys / vfs / hammer2 / hammer2_inode.c
1 /*
2  * Copyright (c) 2011-2012 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  * 3. Neither the name of The DragonFly Project nor the names of its
19  *    contributors may be used to endorse or promote products derived
20  *    from this software without specific, prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
26  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 #include <sys/cdefs.h>
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/types.h>
39 #include <sys/lock.h>
40 #include <sys/uuid.h>
41
42 #include "hammer2.h"
43
44 /*
45  * Adding a ref to an inode is only legal if the inode already has at least
46  * one ref.
47  */
48 void
49 hammer2_inode_ref(hammer2_inode_t *ip)
50 {
51         hammer2_chain_ref(ip->hmp, &ip->chain);
52 }
53
54 /*
55  * Drop an inode reference, freeing the inode when the last reference goes
56  * away.
57  */
58 void
59 hammer2_inode_drop(hammer2_inode_t *ip)
60 {
61         hammer2_chain_drop(ip->hmp, &ip->chain);
62 }
63
64 /*
65  * Get the vnode associated with the given inode, allocating the vnode if
66  * necessary.  The vnode will be returned exclusively locked.
67  *
68  * The caller must lock the inode (shared or exclusive).
69  *
70  * Great care must be taken to avoid deadlocks and vnode acquisition/reclaim
71  * races.
72  */
73 struct vnode *
74 hammer2_igetv(hammer2_inode_t *ip, int *errorp)
75 {
76         struct vnode *vp;
77         hammer2_pfsmount_t *pmp;
78         ccms_state_t ostate;
79
80         pmp = ip->pmp;
81         KKASSERT(pmp != NULL);
82         *errorp = 0;
83
84         for (;;) {
85                 /*
86                  * Attempt to reuse an existing vnode assignment.  It is
87                  * possible to race a reclaim so the vget() may fail.  The
88                  * inode must be unlocked during the vget() to avoid a
89                  * deadlock against a reclaim.
90                  */
91                 vp = ip->vp;
92                 if (vp) {
93                         /*
94                          * Inode must be unlocked during the vget() to avoid
95                          * possible deadlocks, vnode is held to prevent
96                          * destruction during the vget().  The vget() can
97                          * still fail if we lost a reclaim race on the vnode.
98                          */
99                         vhold_interlocked(vp);
100                         ccms_thread_unlock(&ip->chain.cst);
101                         if (vget(vp, LK_EXCLUSIVE)) {
102                                 vdrop(vp);
103                                 ccms_thread_lock(&ip->chain.cst,
104                                                  CCMS_STATE_EXCLUSIVE);
105                                 continue;
106                         }
107                         ccms_thread_lock(&ip->chain.cst, CCMS_STATE_EXCLUSIVE);
108                         vdrop(vp);
109                         /* vp still locked and ref from vget */
110                         *errorp = 0;
111                         break;
112                 }
113
114                 /*
115                  * No vnode exists, allocate a new vnode.  Beware of
116                  * allocation races.  This function will return an
117                  * exclusively locked and referenced vnode.
118                  */
119                 *errorp = getnewvnode(VT_HAMMER2, pmp->mp, &vp, 0, 0);
120                 if (*errorp) {
121                         vp = NULL;
122                         break;
123                 }
124
125                 /*
126                  * Lock the inode and check for an allocation race.
127                  */
128                 ostate = ccms_thread_lock_upgrade(&ip->chain.cst);
129                 if (ip->vp != NULL) {
130                         vp->v_type = VBAD;
131                         vx_put(vp);
132                         ccms_thread_lock_restore(&ip->chain.cst, ostate);
133                         continue;
134                 }
135
136                 switch (ip->ip_data.type) {
137                 case HAMMER2_OBJTYPE_DIRECTORY:
138                         vp->v_type = VDIR;
139                         break;
140                 case HAMMER2_OBJTYPE_REGFILE:
141                         vp->v_type = VREG;
142                         vinitvmio(vp, ip->ip_data.size,
143                                   HAMMER2_LBUFSIZE,
144                                   (int)ip->ip_data.size & HAMMER2_LBUFMASK);
145                         break;
146                 case HAMMER2_OBJTYPE_SOFTLINK:
147                         /*
148                          * XXX for now we are using the generic file_read
149                          * and file_write code so we need a buffer cache
150                          * association.
151                          */
152                         vp->v_type = VLNK;
153                         vinitvmio(vp, ip->ip_data.size,
154                                   HAMMER2_LBUFSIZE,
155                                   (int)ip->ip_data.size & HAMMER2_LBUFMASK);
156                         break;
157                 /* XXX FIFO */
158                 default:
159                         panic("hammer2: unhandled objtype %d",
160                               ip->ip_data.type);
161                         break;
162                 }
163
164                 if (ip == pmp->iroot)
165                         vsetflags(vp, VROOT);
166
167                 vp->v_data = ip;
168                 ip->vp = vp;
169                 hammer2_chain_ref(ip->hmp, &ip->chain); /* vp association */
170                 ccms_thread_lock_restore(&ip->chain.cst, ostate);
171                 break;
172         }
173
174         /*
175          * Return non-NULL vp and *errorp == 0, or NULL vp and *errorp != 0.
176          */
177         if (hammer2_debug & 0x0002) {
178                 kprintf("igetv vp %p refs %d aux %d\n",
179                         vp, vp->v_sysref.refcnt, vp->v_auxrefs);
180         }
181         return (vp);
182 }
183
184 /*
185  * Create a new inode in the specified directory using the vattr to
186  * figure out the type of inode.
187  *
188  * If no error occurs the new inode with its chain locked is returned in
189  * *nipp, otherwise an error is returned and *nipp is set to NULL.
190  *
191  * If vap and/or cred are NULL the related fields are not set and the
192  * inode type defaults to a directory.  This is used when creating PFSs
193  * under the super-root, so the inode number is set to 1 in this case.
194  */
195 int
196 hammer2_inode_create(hammer2_inode_t *dip,
197                      struct vattr *vap, struct ucred *cred,
198                      const uint8_t *name, size_t name_len,
199                      hammer2_inode_t **nipp)
200 {
201         hammer2_mount_t *hmp = dip->hmp;
202         hammer2_chain_t *chain;
203         hammer2_chain_t *parent;
204         hammer2_inode_t *nip;
205         hammer2_key_t lhc;
206         int error;
207         uid_t xuid;
208
209         lhc = hammer2_dirhash(name, name_len);
210
211         /*
212          * Locate the inode or indirect block to create the new
213          * entry in.  At the same time check for key collisions
214          * and iterate until we don't get one.
215          */
216         parent = &dip->chain;
217         hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
218
219         error = 0;
220         while (error == 0) {
221                 chain = hammer2_chain_lookup(hmp, &parent, lhc, lhc, 0);
222                 if (chain == NULL)
223                         break;
224                 if ((lhc & HAMMER2_DIRHASH_VISIBLE) == 0)
225                         error = ENOSPC;
226                 if ((lhc & HAMMER2_DIRHASH_LOMASK) == HAMMER2_DIRHASH_LOMASK)
227                         error = ENOSPC;
228                 hammer2_chain_unlock(hmp, chain);
229                 chain = NULL;
230                 ++lhc;
231         }
232         if (error == 0) {
233                 chain = hammer2_chain_create(hmp, parent, NULL, lhc, 0,
234                                              HAMMER2_BREF_TYPE_INODE,
235                                              HAMMER2_INODE_BYTES);
236                 if (chain == NULL)
237                         error = EIO;
238         }
239         hammer2_chain_unlock(hmp, parent);
240
241         /*
242          * Handle the error case
243          */
244         if (error) {
245                 KKASSERT(chain == NULL);
246                 *nipp = NULL;
247                 return (error);
248         }
249
250         /*
251          * Set up the new inode
252          */
253         nip = chain->u.ip;
254         *nipp = nip;
255
256         hammer2_voldata_lock(hmp);
257         if (vap) {
258                 nip->ip_data.type = hammer2_get_obj_type(vap->va_type);
259                 nip->ip_data.inum = hmp->voldata.alloc_tid++;
260                 /* XXX modify/lock */
261         } else {
262                 nip->ip_data.type = HAMMER2_OBJTYPE_DIRECTORY;
263                 nip->ip_data.inum = 1;
264         }
265         hammer2_voldata_unlock(hmp);
266         nip->ip_data.version = HAMMER2_INODE_VERSION_ONE;
267         hammer2_update_time(&nip->ip_data.ctime);
268         nip->ip_data.mtime = nip->ip_data.ctime;
269         if (vap)
270                 nip->ip_data.mode = vap->va_mode;
271         nip->ip_data.nlinks = 1;
272         if (vap) {
273                 if (dip) {
274                         xuid = hammer2_to_unix_xid(&dip->ip_data.uid);
275                         xuid = vop_helper_create_uid(dip->pmp->mp,
276                                                      dip->ip_data.mode,
277                                                      xuid,
278                                                      cred,
279                                                      &vap->va_mode);
280                 } else {
281                         xuid = 0;
282                 }
283                 if (vap->va_vaflags & VA_UID_UUID_VALID)
284                         nip->ip_data.uid = vap->va_uid_uuid;
285                 else if (vap->va_uid != (uid_t)VNOVAL)
286                         hammer2_guid_to_uuid(&nip->ip_data.uid, vap->va_uid);
287                 else
288                         hammer2_guid_to_uuid(&nip->ip_data.uid, xuid);
289
290                 if (vap->va_vaflags & VA_GID_UUID_VALID)
291                         nip->ip_data.gid = vap->va_gid_uuid;
292                 else if (vap->va_gid != (gid_t)VNOVAL)
293                         hammer2_guid_to_uuid(&nip->ip_data.gid, vap->va_gid);
294                 else if (dip)
295                         nip->ip_data.gid = dip->ip_data.gid;
296         }
297
298         /*
299          * Regular files and softlinks allow a small amount of data to be
300          * directly embedded in the inode.  This flag will be cleared if
301          * the size is extended past the embedded limit.
302          */
303         if (nip->ip_data.type == HAMMER2_OBJTYPE_REGFILE ||
304             nip->ip_data.type == HAMMER2_OBJTYPE_SOFTLINK) {
305                 nip->ip_data.op_flags |= HAMMER2_OPFLAG_DIRECTDATA;
306         }
307
308         KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
309         bcopy(name, nip->ip_data.filename, name_len);
310         nip->ip_data.name_key = lhc;
311         nip->ip_data.name_len = name_len;
312
313         return (0);
314 }
315
316 /*
317  * Duplicate the specified existing inode in the specified target directory.
318  * If name is NULL the inode is duplicated as a hidden directory entry.
319  *
320  * Returns the new inode.  The old inode is left alone.
321  *
322  * XXX name needs to be NULL for now.
323  */
324 int
325 hammer2_inode_duplicate(hammer2_inode_t *dip, hammer2_inode_t *oip,
326                         hammer2_inode_t **nipp,
327                         const uint8_t *name, size_t name_len)
328 {
329         hammer2_mount_t *hmp = dip->hmp;
330         hammer2_inode_t *nip;
331         hammer2_chain_t *parent;
332         hammer2_chain_t *chain;
333         hammer2_key_t lhc;
334         int error;
335
336         if (name) {
337                 lhc = hammer2_dirhash(name, name_len);
338         } else {
339                 lhc = oip->ip_data.inum;
340                 KKASSERT((lhc & HAMMER2_DIRHASH_VISIBLE) == 0);
341         }
342
343         /*
344          * Locate the inode or indirect block to create the new
345          * entry in.  At the same time check for key collisions
346          * and iterate until we don't get one.
347          */
348         nip = NULL;
349         parent = &dip->chain;
350         hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
351
352         error = 0;
353         while (error == 0) {
354                 chain = hammer2_chain_lookup(hmp, &parent, lhc, lhc, 0);
355                 if (chain == NULL)
356                         break;
357                 /* XXX bcmp name if not NULL */
358                 if ((lhc & HAMMER2_DIRHASH_LOMASK) == HAMMER2_DIRHASH_LOMASK)
359                         error = ENOSPC;
360                 if ((lhc & HAMMER2_DIRHASH_VISIBLE) == 0) /* shouldn't happen */
361                         error = ENOSPC;
362                 hammer2_chain_unlock(hmp, chain);
363                 chain = NULL;
364                 ++lhc;
365         }
366
367         /*
368          * Create entry in common parent directory.
369          */
370         if (error == 0) {
371                 chain = hammer2_chain_create(hmp, parent, NULL, lhc, 0,
372                                              HAMMER2_BREF_TYPE_INODE /* n/a */,
373                                              HAMMER2_INODE_BYTES);   /* n/a */
374                 if (chain == NULL)
375                         error = EIO;
376         }
377         hammer2_chain_unlock(hmp, parent);
378
379         /*
380          * Handle the error case
381          */
382         if (error) {
383                 KKASSERT(chain == NULL);
384                 return (error);
385         }
386
387         /*
388          * XXX This is currently a horrible hack.  Well, if we wanted to
389          *     duplicate a file, i.e. as in a snapshot, we definitely
390          *     would have to flush it first.
391          *
392          *     For hardlink target generation we can theoretically move any
393          *     active chain structures without flushing, but that gets really
394          *     iffy for code which follows chain->parent and ip->pip links.
395          *
396          * XXX only works with files.  Duplicating a directory hierarchy
397          *     requires a flush but doesn't deal with races post-flush.
398          *     Well, it would work I guess, but you might catch some files
399          *     mid-operation.
400          *
401          * We cannot leave oip with any in-memory chains because (for a
402          * hardlink), oip will become a OBJTYPE_HARDLINK which is just a
403          * pointer to the real hardlink's inum and can't have any sub-chains.
404          * XXX might be 0-ref chains left.
405          */
406         hammer2_inode_lock_ex(oip);
407         hammer2_chain_flush(hmp, &oip->chain, 0);
408         hammer2_inode_unlock_ex(oip);
409         /*KKASSERT(RB_EMPTY(&oip->chain.rbhead));*/
410
411         nip = chain->u.ip;
412         hammer2_chain_modify(hmp, chain, 0);
413         nip->ip_data = oip->ip_data;    /* sync media data after flush */
414
415         if (name) {
416                 /*
417                  * Directory entries are inodes so if the name has changed
418                  * we have to update the inode.
419                  */
420                 KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
421                 bcopy(name, nip->ip_data.filename, name_len);
422                 nip->ip_data.name_key = lhc;
423                 nip->ip_data.name_len = name_len;
424         } else {
425                 /*
426                  * Directory entries are inodes but this is a hidden hardlink
427                  * target.  The name isn't used but to ease debugging give it
428                  * a name after its inode number.
429                  */
430                 ksnprintf(nip->ip_data.filename, sizeof(nip->ip_data.filename),
431                           "0x%016jx", (intmax_t)nip->ip_data.inum);
432                 nip->ip_data.name_len = strlen(nip->ip_data.filename);
433                 nip->ip_data.name_key = lhc;
434         }
435         *nipp = nip;
436
437         return (0);
438 }
439
440
441 /*
442  * Connect inode (oip) to the specified directory using the specified name.
443  * (oip) must be locked.
444  *
445  * If (oip) is not currently connected we simply connect it up.
446  *
447  * If (oip) is already connected we create a OBJTYPE_HARDLINK entry which
448  * points to (oip)'s inode number.  (oip) is expected to be the terminus of
449  * the hardlink sitting as a hidden file in a common parent directory
450  * in this situation (thus the lock order is correct).
451  */
452 int
453 hammer2_inode_connect(hammer2_inode_t *dip, hammer2_inode_t *oip,
454                       const uint8_t *name, size_t name_len)
455 {
456         hammer2_mount_t *hmp = dip->hmp;
457         hammer2_chain_t *chain;
458         hammer2_chain_t *parent;
459         hammer2_inode_t *nip;
460         hammer2_key_t lhc;
461         int error;
462         int hlink;
463
464         lhc = hammer2_dirhash(name, name_len);
465         hlink = (oip->chain.parent != NULL);
466
467         /*
468          * In fake mode flush oip so we can just snapshot it downbelow.
469          */
470         if (hlink && hammer2_hardlink_enable < 0)
471                 hammer2_chain_flush(hmp, &oip->chain, 0);
472
473         /*
474          * Locate the inode or indirect block to create the new
475          * entry in.  At the same time check for key collisions
476          * and iterate until we don't get one.
477          */
478         parent = &dip->chain;
479         hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
480
481         error = 0;
482         while (error == 0) {
483                 chain = hammer2_chain_lookup(hmp, &parent, lhc, lhc, 0);
484                 if (chain == NULL)
485                         break;
486                 if ((lhc & HAMMER2_DIRHASH_LOMASK) == HAMMER2_DIRHASH_LOMASK)
487                         error = ENOSPC;
488                 hammer2_chain_unlock(hmp, chain);
489                 chain = NULL;
490                 ++lhc;
491         }
492
493         /*
494          * Passing a non-NULL chain to hammer2_chain_create() reconnects the
495          * existing chain instead of creating a new one.  The chain's bref
496          * will be properly updated.
497          */
498         if (error == 0) {
499                 if (hlink) {
500                         chain = hammer2_chain_create(hmp, parent,
501                                                      NULL, lhc, 0,
502                                                      HAMMER2_BREF_TYPE_INODE,
503                                                      HAMMER2_INODE_BYTES);
504                 } else {
505                         chain = hammer2_chain_create(hmp, parent,
506                                                      &oip->chain, lhc, 0,
507                                                      HAMMER2_BREF_TYPE_INODE,
508                                                      HAMMER2_INODE_BYTES);
509                         if (chain)
510                                 KKASSERT(chain == &oip->chain);
511                 }
512                 if (chain == NULL)
513                         error = EIO;
514         }
515         hammer2_chain_unlock(hmp, parent);
516
517         /*
518          * Handle the error case
519          */
520         if (error) {
521                 KKASSERT(chain == NULL);
522                 return (error);
523         }
524
525         /*
526          * Directory entries are inodes so if the name has changed we have
527          * to update the inode.
528          *
529          * When creating an OBJTYPE_HARDLINK entry remember to unlock the
530          * chain, the caller will access the hardlink via the actual hardlink
531          * target file and not the hardlink pointer entry.
532          */
533         if (hlink && hammer2_hardlink_enable >= 0) {
534                 /*
535                  * Create the HARDLINK pointer.  oip represents the hardlink
536                  * target in this situation.
537                  */
538                 nip = chain->u.ip;
539                 hammer2_chain_modify(hmp, chain, 0);
540                 KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
541                 bcopy(name, nip->ip_data.filename, name_len);
542                 nip->ip_data.name_key = lhc;
543                 nip->ip_data.name_len = name_len;
544                 nip->ip_data.target_type = oip->ip_data.type;
545                 nip->ip_data.type = HAMMER2_OBJTYPE_HARDLINK;
546                 nip->ip_data.inum = oip->ip_data.inum;
547                 nip->ip_data.nlinks = 1;
548                 kprintf("created hardlink %*.*s\n",
549                         (int)name_len, (int)name_len, name);
550                 hammer2_chain_unlock(hmp, chain);
551         } else if (hlink && hammer2_hardlink_enable < 0) {
552                 /*
553                  * Create a snapshot (hardlink fake mode for debugging).
554                  */
555                 nip = chain->u.ip;
556                 nip->ip_data = oip->ip_data;
557                 hammer2_chain_modify(hmp, chain, 0);
558                 KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
559                 bcopy(name, nip->ip_data.filename, name_len);
560                 nip->ip_data.name_key = lhc;
561                 nip->ip_data.name_len = name_len;
562                 kprintf("created fake hardlink %*.*s\n",
563                         (int)name_len, (int)name_len, name);
564                 hammer2_chain_unlock(hmp, chain);
565         } else {
566                 /*
567                  * Normally disconnected inode (e.g. during a rename) that
568                  * was reconnected.  We must fixup the name stored in
569                  * oip.
570                  *
571                  * We are using oip as chain, already locked by caller,
572                  * do not unlock it.
573                  */
574                 hammer2_chain_modify(hmp, chain, 0);
575                 if (oip->ip_data.name_len != name_len ||
576                     bcmp(oip->ip_data.filename, name, name_len) != 0) {
577                         KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
578                         bcopy(name, oip->ip_data.filename, name_len);
579                         oip->ip_data.name_key = lhc;
580                         oip->ip_data.name_len = name_len;
581                 }
582                 oip->ip_data.nlinks = 1;
583         }
584
585         return (0);
586 }
587
588 /*
589  * Unlink the file from the specified directory inode.  The directory inode
590  * does not need to be locked.
591  *
592  * isdir determines whether a directory/non-directory check should be made.
593  * No check is made if isdir is set to -1.
594  */
595 int
596 hammer2_unlink_file(hammer2_inode_t *dip,
597                     const uint8_t *name, size_t name_len,
598                     int isdir, hammer2_inode_t *retain_ip)
599 {
600         hammer2_mount_t *hmp;
601         hammer2_chain_t *parent;
602         hammer2_chain_t *chain;
603         hammer2_chain_t *dparent;
604         hammer2_chain_t *dchain;
605         hammer2_key_t lhc;
606         hammer2_inode_t *ip;
607         hammer2_inode_t *oip;
608         int error;
609         uint8_t type;
610
611         error = 0;
612         oip = NULL;
613         hmp = dip->hmp;
614         lhc = hammer2_dirhash(name, name_len);
615
616         /*
617          * Search for the filename in the directory
618          */
619         parent = &dip->chain;
620         hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
621         chain = hammer2_chain_lookup(hmp, &parent,
622                                      lhc, lhc + HAMMER2_DIRHASH_LOMASK,
623                                      0);
624         while (chain) {
625                 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE &&
626                     chain->u.ip &&
627                     name_len == chain->data->ipdata.name_len &&
628                     bcmp(name, chain->data->ipdata.filename, name_len) == 0) {
629                         break;
630                 }
631                 chain = hammer2_chain_next(hmp, &parent, chain,
632                                            lhc, lhc + HAMMER2_DIRHASH_LOMASK,
633                                            0);
634         }
635
636         /*
637          * Not found or wrong type (isdir < 0 disables the type check).
638          */
639         if (chain == NULL) {
640                 hammer2_chain_unlock(hmp, parent);
641                 return ENOENT;
642         }
643         if ((type = chain->data->ipdata.type) == HAMMER2_OBJTYPE_HARDLINK)
644                 type = chain->data->ipdata.target_type;
645
646         if (type == HAMMER2_OBJTYPE_DIRECTORY && isdir == 0) {
647                 error = ENOTDIR;
648                 goto done;
649         }
650         if (type != HAMMER2_OBJTYPE_DIRECTORY && isdir == 1) {
651                 error = EISDIR;
652                 goto done;
653         }
654
655         /*
656          * Hardlink must be resolved.  We can't hold parent locked while we
657          * do this or we could deadlock.
658          */
659         if (chain->data->ipdata.type == HAMMER2_OBJTYPE_HARDLINK) {
660                 hammer2_chain_unlock(hmp, parent);
661                 parent = NULL;
662                 error = hammer2_hardlink_find(dip, &chain, &oip);
663         }
664
665         /*
666          * If this is a directory the directory must be empty.  However, if
667          * isdir < 0 we are doing a rename and the directory does not have
668          * to be empty.
669          *
670          * NOTE: We check the full key range here which covers both visible
671          *       and invisible entries.  Theoretically there should be no
672          *       invisible (hardlink target) entries if there are no visible
673          *       entries.
674          */
675         if (type == HAMMER2_OBJTYPE_DIRECTORY && isdir >= 0) {
676                 dparent = chain;
677                 hammer2_chain_lock(hmp, dparent, HAMMER2_RESOLVE_ALWAYS);
678                 dchain = hammer2_chain_lookup(hmp, &dparent,
679                                               0, (hammer2_key_t)-1,
680                                               HAMMER2_LOOKUP_NODATA);
681                 if (dchain) {
682                         hammer2_chain_unlock(hmp, dchain);
683                         hammer2_chain_unlock(hmp, dparent);
684                         error = ENOTEMPTY;
685                         goto done;
686                 }
687                 hammer2_chain_unlock(hmp, dparent);
688                 dparent = NULL;
689                 /* dchain NULL */
690         }
691
692         /*
693          * Ok, we can now unlink the chain.  We always decrement nlinks even
694          * if the entry can be deleted in case someone has the file open and
695          * does an fstat().
696          *
697          * The chain itself will no longer be in the on-media topology but
698          * can still be flushed to the media (e.g. if an open descriptor
699          * remains).  When the last vnode/ip ref goes away the chain will
700          * be marked unmodified, avoiding any further (now unnecesary) I/O.
701          */
702         if (oip) {
703                 /*
704                  * If this was a hardlink we first delete the hardlink
705                  * pointer entry.
706                  */
707                 parent = oip->chain.parent;
708                 hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
709                 hammer2_chain_lock(hmp, &oip->chain, HAMMER2_RESOLVE_ALWAYS);
710                 hammer2_chain_delete(hmp, parent, &oip->chain,
711                                     (retain_ip == oip));
712                 hammer2_chain_unlock(hmp, &oip->chain);
713                 hammer2_chain_unlock(hmp, parent);
714                 parent = NULL;
715
716                 /*
717                  * Then decrement nlinks on hardlink target.
718                  */
719                 ip = chain->u.ip;
720                 if (ip->ip_data.nlinks == 1) {
721                         dparent = chain->parent;
722                         hammer2_chain_ref(hmp, chain);
723                         hammer2_chain_unlock(hmp, chain);
724                         hammer2_chain_lock(hmp, dparent,
725                                            HAMMER2_RESOLVE_ALWAYS);
726                         hammer2_chain_lock(hmp, chain, HAMMER2_RESOLVE_ALWAYS);
727                         hammer2_chain_drop(hmp, chain);
728                         hammer2_chain_modify(hmp, chain, 0);
729                         --ip->ip_data.nlinks;
730                         hammer2_chain_delete(hmp, dparent, chain, 0);
731                         hammer2_chain_unlock(hmp, dparent);
732                 } else {
733                         hammer2_chain_modify(hmp, chain, 0);
734                         --ip->ip_data.nlinks;
735                 }
736         } else {
737                 /*
738                  * Otherwise this was not a hardlink and we can just
739                  * remove the entry and decrement nlinks.
740                  */
741                 ip = chain->u.ip;
742                 hammer2_chain_modify(hmp, chain, 0);
743                 --ip->ip_data.nlinks;
744                 hammer2_chain_delete(hmp, parent, chain,
745                                      (retain_ip == ip));
746         }
747
748         error = 0;
749
750 done:
751         if (chain)
752                 hammer2_chain_unlock(hmp, chain);
753         if (parent)
754                 hammer2_chain_unlock(hmp, parent);
755         if (oip)
756                 hammer2_chain_drop(oip->hmp, &oip->chain);
757
758         return error;
759 }
760
761 /*
762  * Calculate the allocation size for the file fragment straddling EOF
763  */
764 int
765 hammer2_inode_calc_alloc(hammer2_key_t filesize)
766 {
767         int frag = (int)filesize & HAMMER2_PBUFMASK;
768         int radix;
769
770         if (frag == 0)
771                 return(0);
772         for (radix = HAMMER2_MINALLOCRADIX; frag > (1 << radix); ++radix)
773                 ;
774         return (radix);
775 }
776
777 void
778 hammer2_inode_lock_nlinks(hammer2_inode_t *ip)
779 {
780         hammer2_chain_ref(ip->hmp, &ip->chain);
781 }
782
783 void
784 hammer2_inode_unlock_nlinks(hammer2_inode_t *ip)
785 {
786         hammer2_chain_drop(ip->hmp, &ip->chain);
787 }
788
789 /*
790  * Consolidate for hard link creation.  This moves the specified terminal
791  * hardlink inode to a directory common to its current directory and tdip
792  * if necessary, replacing *ipp with the new inode chain element and
793  * modifying the original inode chain element to OBJTYPE_HARDLINK.
794  *
795  * If the original inode chain element was a prior incarnation of a hidden
796  * inode it can simply be deleted instead of converted.
797  *
798  * (*ipp)'s nlinks field is locked on entry and the new (*ipp)'s nlinks
799  * field will be locked on return (with the original's unlocked).
800  *
801  * The link count is bumped if requested.
802  */
803 int
804 hammer2_hardlink_consolidate(hammer2_inode_t **ipp, hammer2_inode_t *tdip)
805 {
806         hammer2_mount_t *hmp;
807         hammer2_inode_t *oip = *ipp;
808         hammer2_inode_t *nip = NULL;
809         hammer2_inode_t *fdip;
810         hammer2_chain_t *parent;
811         int error;
812
813         hmp = tdip->hmp;
814
815         if (hammer2_hardlink_enable < 0)
816                 return (0);
817         if (hammer2_hardlink_enable == 0)
818                 return (ENOTSUP);
819
820         /*
821          * Find the common parent directory
822          */
823         fdip = oip->pip;
824         while (fdip->depth > tdip->depth) {
825                 fdip = fdip->pip;
826                 KKASSERT(fdip != NULL);
827         }
828         while (tdip->depth > fdip->depth) {
829                 tdip = tdip->pip;
830                 KKASSERT(tdip != NULL);
831         }
832         while (fdip != tdip) {
833                 fdip = fdip->pip;
834                 tdip = tdip->pip;
835                 KKASSERT(fdip != NULL);
836                 KKASSERT(tdip != NULL);
837         }
838
839         /*
840          * Nothing to do (except bump the link count) if the hardlink has
841          * already been consolidated in the correct place.
842          */
843         if (oip->pip == fdip &&
844             (oip->ip_data.name_key & HAMMER2_DIRHASH_VISIBLE) == 0) {
845                 kprintf("hardlink already consolidated correctly\n");
846                 nip = oip;
847                 hammer2_inode_lock_ex(nip);
848                 hammer2_chain_modify(hmp, &nip->chain, 0);
849                 ++nip->ip_data.nlinks;
850                 hammer2_inode_unlock_ex(nip);
851                 return (0);
852         }
853
854         /*
855          * Create a hidden inode directory entry in the parent, copying
856          * (*oip)'s state.  Then replace oip with OBJTYPE_HARDLINK.
857          *
858          * The duplication function will either flush or move any chains
859          * under oip to the new hardlink target inode, retiring all chains
860          * related to oip before returning.  XXX vp->ip races.
861          */
862         error = hammer2_inode_duplicate(fdip, oip, &nip, NULL, 0);
863         if (error == 0) {
864                 /*
865                  * Bump nlinks on duplicated hidden inode.
866                  */
867                 kprintf("hardlink consolidation success in parent dir %s\n",
868                         fdip->ip_data.filename);
869                 hammer2_inode_lock_nlinks(nip);
870                 hammer2_inode_unlock_nlinks(oip);
871                 hammer2_chain_modify(hmp, &nip->chain, 0);
872                 ++nip->ip_data.nlinks;
873                 hammer2_inode_unlock_ex(nip);
874
875                 if (oip->ip_data.name_key & HAMMER2_DIRHASH_VISIBLE) {
876                         /*
877                          * Replace the old inode with an OBJTYPE_HARDLINK
878                          * pointer.
879                          */
880                         hammer2_inode_lock_ex(oip);
881                         hammer2_chain_modify(hmp, &oip->chain, 0);
882                         oip->ip_data.target_type = oip->ip_data.type;
883                         oip->ip_data.type = HAMMER2_OBJTYPE_HARDLINK;
884                         oip->ip_data.uflags = 0;
885                         oip->ip_data.rmajor = 0;
886                         oip->ip_data.rminor = 0;
887                         oip->ip_data.ctime = 0;
888                         oip->ip_data.mtime = 0;
889                         oip->ip_data.atime = 0;
890                         oip->ip_data.btime = 0;
891                         bzero(&oip->ip_data.uid, sizeof(oip->ip_data.uid));
892                         bzero(&oip->ip_data.gid, sizeof(oip->ip_data.gid));
893                         oip->ip_data.op_flags = HAMMER2_OPFLAG_DIRECTDATA;
894                         oip->ip_data.cap_flags = 0;
895                         oip->ip_data.mode = 0;
896                         oip->ip_data.size = 0;
897                         oip->ip_data.nlinks = 1;
898                         oip->ip_data.iparent = 0;       /* XXX */
899                         oip->ip_data.pfs_type = 0;
900                         oip->ip_data.pfs_inum = 0;
901                         bzero(&oip->ip_data.pfs_id,
902                               sizeof(oip->ip_data.pfs_id));
903                         bzero(&oip->ip_data.pfs_fsid,
904                               sizeof(oip->ip_data.pfs_fsid));
905                         oip->ip_data.data_quota = 0;
906                         oip->ip_data.data_count = 0;
907                         oip->ip_data.inode_quota = 0;
908                         oip->ip_data.inode_count = 0;
909                         oip->ip_data.attr_tid = 0;
910                         oip->ip_data.dirent_tid = 0;
911                         bzero(&oip->ip_data.u, sizeof(oip->ip_data.u));
912                         /* XXX transaction ids */
913
914                         hammer2_inode_unlock_ex(oip);
915                 } else {
916                         /*
917                          * The old inode was a hardlink target, which we
918                          * have now moved.  We must delete it so the new
919                          * hardlink target at a higher directory level
920                          * becomes the only hardlink target for this inode.
921                          */
922                         kprintf("DELETE INVISIBLE\n");
923                         parent = oip->chain.parent;
924                         hammer2_chain_lock(hmp, parent,
925                                            HAMMER2_RESOLVE_ALWAYS);
926                         hammer2_chain_lock(hmp, &oip->chain,
927                                            HAMMER2_RESOLVE_ALWAYS);
928                         hammer2_chain_delete(hmp, parent, &oip->chain, 0);
929                         hammer2_chain_unlock(hmp, &oip->chain);
930                         hammer2_chain_unlock(hmp, parent);
931                 }
932                 *ipp = nip;
933         } else {
934                 KKASSERT(nip == NULL);
935         }
936
937         return (error);
938 }
939
940 /*
941  * If (*ipp) is non-NULL it points to the forward OBJTYPE_HARDLINK inode while
942  * (*chainp) points to the resolved (hidden hardlink target) inode.  In this
943  * situation when nlinks is 1 we wish to deconsolidate the hardlink, moving
944  * it back to the directory that now represents the only remaining link.
945  */
946 int
947 hammer2_hardlink_deconsolidate(hammer2_inode_t *dip, hammer2_chain_t **chainp,
948                                hammer2_inode_t **ipp)
949 {
950         if (*ipp == NULL)
951                 return (0);
952         /* XXX */
953         return (0);
954 }
955
956 /*
957  * When presented with a (*chainp) representing an inode of type
958  * OBJTYPE_HARDLINK this code will save the original inode (with a ref)
959  * in (*ipp), and then locate the hidden hardlink target in (dip) or
960  * any parent directory above (dip).  The locked (*chainp) is replaced
961  * with a new locked (*chainp) representing the hardlink target.
962  */
963 int
964 hammer2_hardlink_find(hammer2_inode_t *dip, hammer2_chain_t **chainp,
965                       hammer2_inode_t **ipp)
966 {
967         hammer2_mount_t *hmp = dip->hmp;
968         hammer2_chain_t *chain = *chainp;
969         hammer2_chain_t *parent;
970         hammer2_inode_t *pip;
971         hammer2_key_t lhc;
972
973         *ipp = chain->u.ip;
974         hammer2_inode_ref(chain->u.ip);
975         lhc = chain->u.ip->ip_data.inum;
976
977         hammer2_inode_unlock_ex(chain->u.ip);
978         pip = chain->u.ip->pip;
979
980         chain = NULL;
981         while (pip) {
982                 parent = &pip->chain;
983                 KKASSERT(parent->bref.type == HAMMER2_BREF_TYPE_INODE);
984
985                 hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
986                 chain = hammer2_chain_lookup(hmp, &parent, lhc, lhc, 0);
987                 hammer2_chain_unlock(hmp, parent);
988                 if (chain)
989                         break;
990                 pip = pip->pip;
991         }
992         *chainp = chain;
993         if (chain) {
994                 KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_INODE);
995                 /* already locked */
996                 return (0);
997         } else {
998                 return (EIO);
999         }
1000 }