Merge branches 'hammer2' and 'master' of ssh://crater.dragonflybsd.org/repository...
[dragonfly.git] / sys / vfs / hammer2 / hammer2_inode.c
1 /*
2  * Copyright (c) 2011-2012 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  * 3. Neither the name of The DragonFly Project nor the names of its
19  *    contributors may be used to endorse or promote products derived
20  *    from this software without specific, prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
26  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 #include <sys/cdefs.h>
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/types.h>
39 #include <sys/lock.h>
40 #include <sys/uuid.h>
41
42 #include "hammer2.h"
43
44 /*
45  * Adding a ref to an inode is only legal if the inode already has at least
46  * one ref.
47  */
48 void
49 hammer2_inode_ref(hammer2_inode_t *ip)
50 {
51         hammer2_chain_ref(ip->hmp, &ip->chain);
52 }
53
54 /*
55  * Drop an inode reference, freeing the inode when the last reference goes
56  * away.
57  */
58 void
59 hammer2_inode_drop(hammer2_inode_t *ip)
60 {
61         hammer2_chain_drop(ip->hmp, &ip->chain);
62 }
63
64 /*
65  * Get the vnode associated with the given inode, allocating the vnode if
66  * necessary.
67  *
68  * Great care must be taken to avoid deadlocks and vnode acquisition/reclaim
69  * races.
70  *
71  * The vnode will be returned exclusively locked and referenced.  The
72  * reference on the vnode prevents it from being reclaimed.
73  *
74  * The inode (ip) must be referenced by the caller and not locked to avoid
75  * it getting ripped out from under us or deadlocked.
76  */
77 struct vnode *
78 hammer2_igetv(hammer2_inode_t *ip, int *errorp)
79 {
80         struct vnode *vp;
81         hammer2_pfsmount_t *pmp;
82
83         pmp = ip->pmp;
84         KKASSERT(pmp != NULL);
85         *errorp = 0;
86
87         for (;;) {
88                 /*
89                  * Attempt to reuse an existing vnode assignment.  It is
90                  * possible to race a reclaim so the vget() may fail.  The
91                  * inode must be unlocked during the vget() to avoid a
92                  * deadlock against a reclaim.
93                  */
94                 vp = ip->vp;
95                 if (vp) {
96                         /*
97                          * Lock the inode and check for a reclaim race
98                          */
99                         hammer2_inode_lock_ex(ip);
100                         if (ip->vp != vp) {
101                                 hammer2_inode_unlock_ex(ip);
102                                 continue;
103                         }
104
105                         /*
106                          * Inode must be unlocked during the vget() to avoid
107                          * possible deadlocks, vnode is held to prevent
108                          * destruction during the vget().  The vget() can
109                          * still fail if we lost a reclaim race on the vnode.
110                          */
111                         vhold_interlocked(vp);
112                         hammer2_inode_unlock_ex(ip);
113                         if (vget(vp, LK_EXCLUSIVE)) {
114                                 vdrop(vp);
115                                 continue;
116                         }
117                         vdrop(vp);
118                         /* vp still locked and ref from vget */
119                         *errorp = 0;
120                         break;
121                 }
122
123                 /*
124                  * No vnode exists, allocate a new vnode.  Beware of
125                  * allocation races.  This function will return an
126                  * exclusively locked and referenced vnode.
127                  */
128                 *errorp = getnewvnode(VT_HAMMER2, pmp->mp, &vp, 0, 0);
129                 if (*errorp) {
130                         vp = NULL;
131                         break;
132                 }
133
134                 /*
135                  * Lock the inode and check for an allocation race.
136                  */
137                 hammer2_inode_lock_ex(ip);
138                 if (ip->vp != NULL) {
139                         vp->v_type = VBAD;
140                         vx_put(vp);
141                         hammer2_inode_unlock_ex(ip);
142                         continue;
143                 }
144
145                 switch (ip->ip_data.type) {
146                 case HAMMER2_OBJTYPE_DIRECTORY:
147                         vp->v_type = VDIR;
148                         break;
149                 case HAMMER2_OBJTYPE_REGFILE:
150                         vp->v_type = VREG;
151                         vinitvmio(vp, ip->ip_data.size,
152                                   HAMMER2_LBUFSIZE,
153                                   (int)ip->ip_data.size & HAMMER2_LBUFMASK);
154                         break;
155                 case HAMMER2_OBJTYPE_SOFTLINK:
156                         /*
157                          * XXX for now we are using the generic file_read
158                          * and file_write code so we need a buffer cache
159                          * association.
160                          */
161                         vp->v_type = VLNK;
162                         vinitvmio(vp, ip->ip_data.size,
163                                   HAMMER2_LBUFSIZE,
164                                   (int)ip->ip_data.size & HAMMER2_LBUFMASK);
165                         break;
166                 /* XXX FIFO */
167                 default:
168                         panic("hammer2: unhandled objtype %d",
169                               ip->ip_data.type);
170                         break;
171                 }
172
173                 if (ip == pmp->iroot)
174                         vsetflags(vp, VROOT);
175
176                 vp->v_data = ip;
177                 ip->vp = vp;
178                 hammer2_chain_ref(ip->hmp, &ip->chain); /* vp association */
179                 hammer2_inode_unlock_ex(ip);
180                 break;
181         }
182
183         /*
184          * Return non-NULL vp and *errorp == 0, or NULL vp and *errorp != 0.
185          */
186         if (hammer2_debug & 0x0002) {
187                 kprintf("igetv vp %p refs %d aux %d\n",
188                         vp, vp->v_sysref.refcnt, vp->v_auxrefs);
189         }
190         return (vp);
191 }
192
193 /*
194  * Create a new inode in the specified directory using the vattr to
195  * figure out the type of inode.
196  *
197  * If no error occurs the new inode with its chain locked is returned in
198  * *nipp, otherwise an error is returned and *nipp is set to NULL.
199  *
200  * If vap and/or cred are NULL the related fields are not set and the
201  * inode type defaults to a directory.  This is used when creating PFSs
202  * under the super-root, so the inode number is set to 1 in this case.
203  */
204 int
205 hammer2_inode_create(hammer2_inode_t *dip,
206                      struct vattr *vap, struct ucred *cred,
207                      const uint8_t *name, size_t name_len,
208                      hammer2_inode_t **nipp)
209 {
210         hammer2_mount_t *hmp = dip->hmp;
211         hammer2_chain_t *chain;
212         hammer2_chain_t *parent;
213         hammer2_inode_t *nip;
214         hammer2_key_t lhc;
215         int error;
216         uid_t xuid;
217
218         lhc = hammer2_dirhash(name, name_len);
219
220         /*
221          * Locate the inode or indirect block to create the new
222          * entry in.  At the same time check for key collisions
223          * and iterate until we don't get one.
224          */
225         parent = &dip->chain;
226         hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
227
228         error = 0;
229         while (error == 0) {
230                 chain = hammer2_chain_lookup(hmp, &parent, lhc, lhc, 0);
231                 if (chain == NULL)
232                         break;
233                 if ((lhc & HAMMER2_DIRHASH_VISIBLE) == 0)
234                         error = ENOSPC;
235                 if ((lhc & HAMMER2_DIRHASH_LOMASK) == HAMMER2_DIRHASH_LOMASK)
236                         error = ENOSPC;
237                 hammer2_chain_unlock(hmp, chain);
238                 chain = NULL;
239                 ++lhc;
240         }
241         if (error == 0) {
242                 chain = hammer2_chain_create(hmp, parent, NULL, lhc, 0,
243                                              HAMMER2_BREF_TYPE_INODE,
244                                              HAMMER2_INODE_BYTES);
245                 if (chain == NULL)
246                         error = EIO;
247         }
248         hammer2_chain_unlock(hmp, parent);
249
250         /*
251          * Handle the error case
252          */
253         if (error) {
254                 KKASSERT(chain == NULL);
255                 *nipp = NULL;
256                 return (error);
257         }
258
259         /*
260          * Set up the new inode
261          */
262         nip = chain->u.ip;
263         *nipp = nip;
264
265         hammer2_voldata_lock(hmp);
266         if (vap) {
267                 nip->ip_data.type = hammer2_get_obj_type(vap->va_type);
268                 nip->ip_data.inum = hmp->voldata.alloc_tid++;
269                 /* XXX modify/lock */
270         } else {
271                 nip->ip_data.type = HAMMER2_OBJTYPE_DIRECTORY;
272                 nip->ip_data.inum = 1;
273         }
274         hammer2_voldata_unlock(hmp);
275         nip->ip_data.version = HAMMER2_INODE_VERSION_ONE;
276         hammer2_update_time(&nip->ip_data.ctime);
277         nip->ip_data.mtime = nip->ip_data.ctime;
278         if (vap)
279                 nip->ip_data.mode = vap->va_mode;
280         nip->ip_data.nlinks = 1;
281         if (vap) {
282                 if (dip) {
283                         xuid = hammer2_to_unix_xid(&dip->ip_data.uid);
284                         xuid = vop_helper_create_uid(dip->pmp->mp,
285                                                      dip->ip_data.mode,
286                                                      xuid,
287                                                      cred,
288                                                      &vap->va_mode);
289                 } else {
290                         xuid = 0;
291                 }
292                 if (vap->va_vaflags & VA_UID_UUID_VALID)
293                         nip->ip_data.uid = vap->va_uid_uuid;
294                 else if (vap->va_uid != (uid_t)VNOVAL)
295                         hammer2_guid_to_uuid(&nip->ip_data.uid, vap->va_uid);
296                 else
297                         hammer2_guid_to_uuid(&nip->ip_data.uid, xuid);
298
299                 if (vap->va_vaflags & VA_GID_UUID_VALID)
300                         nip->ip_data.gid = vap->va_gid_uuid;
301                 else if (vap->va_gid != (gid_t)VNOVAL)
302                         hammer2_guid_to_uuid(&nip->ip_data.gid, vap->va_gid);
303                 else if (dip)
304                         nip->ip_data.gid = dip->ip_data.gid;
305         }
306
307         /*
308          * Regular files and softlinks allow a small amount of data to be
309          * directly embedded in the inode.  This flag will be cleared if
310          * the size is extended past the embedded limit.
311          */
312         if (nip->ip_data.type == HAMMER2_OBJTYPE_REGFILE ||
313             nip->ip_data.type == HAMMER2_OBJTYPE_SOFTLINK) {
314                 nip->ip_data.op_flags |= HAMMER2_OPFLAG_DIRECTDATA;
315         }
316
317         KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
318         bcopy(name, nip->ip_data.filename, name_len);
319         nip->ip_data.name_key = lhc;
320         nip->ip_data.name_len = name_len;
321
322         return (0);
323 }
324
325 /*
326  * Duplicate the specified existing inode in the specified target directory.
327  * If name is NULL the inode is duplicated as a hidden directory entry.
328  *
329  * Returns the new inode.  The old inode is left alone.
330  *
331  * XXX name needs to be NULL for now.
332  */
333 int
334 hammer2_inode_duplicate(hammer2_inode_t *dip, hammer2_inode_t *oip,
335                         hammer2_inode_t **nipp,
336                         const uint8_t *name, size_t name_len)
337 {
338         hammer2_mount_t *hmp = dip->hmp;
339         hammer2_inode_t *nip;
340         hammer2_chain_t *parent;
341         hammer2_chain_t *chain;
342         hammer2_key_t lhc;
343         int error;
344
345         if (name) {
346                 lhc = hammer2_dirhash(name, name_len);
347         } else {
348                 lhc = oip->ip_data.inum;
349                 KKASSERT((lhc & HAMMER2_DIRHASH_VISIBLE) == 0);
350         }
351
352         /*
353          * Locate the inode or indirect block to create the new
354          * entry in.  At the same time check for key collisions
355          * and iterate until we don't get one.
356          */
357         nip = NULL;
358         parent = &dip->chain;
359         hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
360
361         error = 0;
362         while (error == 0) {
363                 chain = hammer2_chain_lookup(hmp, &parent, lhc, lhc, 0);
364                 if (chain == NULL)
365                         break;
366                 /* XXX bcmp name if not NULL */
367                 if ((lhc & HAMMER2_DIRHASH_LOMASK) == HAMMER2_DIRHASH_LOMASK)
368                         error = ENOSPC;
369                 if ((lhc & HAMMER2_DIRHASH_VISIBLE) == 0) /* shouldn't happen */
370                         error = ENOSPC;
371                 hammer2_chain_unlock(hmp, chain);
372                 chain = NULL;
373                 ++lhc;
374         }
375
376         /*
377          * Create entry in common parent directory.
378          */
379         if (error == 0) {
380                 chain = hammer2_chain_create(hmp, parent, NULL, lhc, 0,
381                                              HAMMER2_BREF_TYPE_INODE /* n/a */,
382                                              HAMMER2_INODE_BYTES);   /* n/a */
383                 if (chain == NULL)
384                         error = EIO;
385         }
386         hammer2_chain_unlock(hmp, parent);
387
388         /*
389          * Handle the error case
390          */
391         if (error) {
392                 KKASSERT(chain == NULL);
393                 return (error);
394         }
395
396         /*
397          * XXX This is currently a horrible hack.  Well, if we wanted to
398          *     duplicate a file, i.e. as in a snapshot, we definitely
399          *     would have to flush it first.
400          *
401          *     For hardlink target generation we can theoretically move any
402          *     active chain structures without flushing, but that gets really
403          *     iffy for code which follows chain->parent and ip->pip links.
404          *
405          * XXX only works with files.  Duplicating a directory hierarchy
406          *     requires a flush but doesn't deal with races post-flush.
407          *     Well, it would work I guess, but you might catch some files
408          *     mid-operation.
409          *
410          * We cannot leave oip with any in-memory chains because (for a
411          * hardlink), oip will become a OBJTYPE_HARDLINK which is just a
412          * pointer to the real hardlink's inum and can't have any sub-chains.
413          */
414         hammer2_inode_lock_ex(oip);
415         hammer2_chain_flush(hmp, &oip->chain, 0);
416         hammer2_inode_unlock_ex(oip);
417         KKASSERT(SPLAY_EMPTY(&oip->chain.shead));
418
419         nip = chain->u.ip;
420         hammer2_chain_modify(hmp, chain, 0);
421         nip->ip_data = oip->ip_data;    /* sync media data after flush */
422
423         if (name) {
424                 /*
425                  * Directory entries are inodes so if the name has changed
426                  * we have to update the inode.
427                  */
428                 KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
429                 bcopy(name, nip->ip_data.filename, name_len);
430                 nip->ip_data.name_key = lhc;
431                 nip->ip_data.name_len = name_len;
432         } else {
433                 /*
434                  * Directory entries are inodes but this is a hidden hardlink
435                  * target.  The name isn't used but to ease debugging give it
436                  * a name after its inode number.
437                  */
438                 ksnprintf(nip->ip_data.filename, sizeof(nip->ip_data.filename),
439                           "0x%016jx", (intmax_t)nip->ip_data.inum);
440                 nip->ip_data.name_len = strlen(nip->ip_data.filename);
441                 nip->ip_data.name_key = lhc;
442         }
443         *nipp = nip;
444
445         return (0);
446 }
447
448
449 /*
450  * Connect inode (oip) to the specified directory using the specified name.
451  * (oip) must be locked.
452  *
453  * If (oip) is not currently connected we simply connect it up.
454  *
455  * If (oip) is already connected we create a OBJTYPE_HARDLINK entry which
456  * points to (oip)'s inode number.  (oip) is expected to be the terminus of
457  * the hardlink sitting as a hidden file in a common parent directory
458  * in this situation (thus the lock order is correct).
459  */
460 int
461 hammer2_inode_connect(hammer2_inode_t *dip, hammer2_inode_t *oip,
462                       const uint8_t *name, size_t name_len)
463 {
464         hammer2_mount_t *hmp = dip->hmp;
465         hammer2_chain_t *chain;
466         hammer2_chain_t *parent;
467         hammer2_inode_t *nip;
468         hammer2_key_t lhc;
469         int error;
470         int hlink;
471
472         lhc = hammer2_dirhash(name, name_len);
473         hlink = (oip->chain.parent != NULL);
474
475         /*
476          * In fake mode flush oip so we can just snapshot it downbelow.
477          */
478         if (hlink && hammer2_hardlink_enable < 0)
479                 hammer2_chain_flush(hmp, &oip->chain, 0);
480
481         /*
482          * Locate the inode or indirect block to create the new
483          * entry in.  At the same time check for key collisions
484          * and iterate until we don't get one.
485          */
486         parent = &dip->chain;
487         hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
488
489         error = 0;
490         while (error == 0) {
491                 chain = hammer2_chain_lookup(hmp, &parent, lhc, lhc, 0);
492                 if (chain == NULL)
493                         break;
494                 if ((lhc & HAMMER2_DIRHASH_LOMASK) == HAMMER2_DIRHASH_LOMASK)
495                         error = ENOSPC;
496                 hammer2_chain_unlock(hmp, chain);
497                 chain = NULL;
498                 ++lhc;
499         }
500
501         /*
502          * Passing a non-NULL chain to hammer2_chain_create() reconnects the
503          * existing chain instead of creating a new one.  The chain's bref
504          * will be properly updated.
505          */
506         if (error == 0) {
507                 if (hlink) {
508                         chain = hammer2_chain_create(hmp, parent,
509                                                      NULL, lhc, 0,
510                                                      HAMMER2_BREF_TYPE_INODE,
511                                                      HAMMER2_INODE_BYTES);
512                 } else {
513                         chain = hammer2_chain_create(hmp, parent,
514                                                      &oip->chain, lhc, 0,
515                                                      HAMMER2_BREF_TYPE_INODE,
516                                                      HAMMER2_INODE_BYTES);
517                         if (chain)
518                                 KKASSERT(chain == &oip->chain);
519                 }
520                 if (chain == NULL)
521                         error = EIO;
522         }
523         hammer2_chain_unlock(hmp, parent);
524
525         /*
526          * Handle the error case
527          */
528         if (error) {
529                 KKASSERT(chain == NULL);
530                 return (error);
531         }
532
533         /*
534          * Directory entries are inodes so if the name has changed we have
535          * to update the inode.
536          *
537          * When creating an OBJTYPE_HARDLINK entry remember to unlock the
538          * chain, the caller will access the hardlink via the actual hardlink
539          * target file and not the hardlink pointer entry.
540          */
541         if (hlink && hammer2_hardlink_enable >= 0) {
542                 /*
543                  * Create the HARDLINK pointer.  oip represents the hardlink
544                  * target in this situation.
545                  */
546                 nip = chain->u.ip;
547                 hammer2_chain_modify(hmp, chain, 0);
548                 KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
549                 bcopy(name, nip->ip_data.filename, name_len);
550                 nip->ip_data.name_key = lhc;
551                 nip->ip_data.name_len = name_len;
552                 nip->ip_data.target_type = oip->ip_data.type;
553                 nip->ip_data.type = HAMMER2_OBJTYPE_HARDLINK;
554                 nip->ip_data.inum = oip->ip_data.inum;
555                 nip->ip_data.nlinks = 1;
556                 kprintf("created hardlink %*.*s\n",
557                         (int)name_len, (int)name_len, name);
558                 hammer2_chain_unlock(hmp, chain);
559         } else if (hlink && hammer2_hardlink_enable < 0) {
560                 /*
561                  * Create a snapshot (hardlink fake mode for debugging).
562                  */
563                 nip = chain->u.ip;
564                 nip->ip_data = oip->ip_data;
565                 hammer2_chain_modify(hmp, chain, 0);
566                 KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
567                 bcopy(name, nip->ip_data.filename, name_len);
568                 nip->ip_data.name_key = lhc;
569                 nip->ip_data.name_len = name_len;
570                 kprintf("created fake hardlink %*.*s\n",
571                         (int)name_len, (int)name_len, name);
572                 hammer2_chain_unlock(hmp, chain);
573         } else {
574                 /*
575                  * Normally disconnected inode (e.g. during a rename) that
576                  * was reconnected.  We must fixup the name stored in
577                  * oip.
578                  *
579                  * We are using oip as chain, already locked by caller,
580                  * do not unlock it.
581                  */
582                 hammer2_chain_modify(hmp, chain, 0);
583                 if (oip->ip_data.name_len != name_len ||
584                     bcmp(oip->ip_data.filename, name, name_len) != 0) {
585                         KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
586                         bcopy(name, oip->ip_data.filename, name_len);
587                         oip->ip_data.name_key = lhc;
588                         oip->ip_data.name_len = name_len;
589                 }
590                 oip->ip_data.nlinks = 1;
591         }
592
593         return (0);
594 }
595
596 /*
597  * Unlink the file from the specified directory inode.  The directory inode
598  * does not need to be locked.
599  *
600  * isdir determines whether a directory/non-directory check should be made.
601  * No check is made if isdir is set to -1.
602  */
603 int
604 hammer2_unlink_file(hammer2_inode_t *dip,
605                     const uint8_t *name, size_t name_len,
606                     int isdir, hammer2_inode_t *retain_ip)
607 {
608         hammer2_mount_t *hmp;
609         hammer2_chain_t *parent;
610         hammer2_chain_t *chain;
611         hammer2_chain_t *dparent;
612         hammer2_chain_t *dchain;
613         hammer2_key_t lhc;
614         hammer2_inode_t *ip;
615         hammer2_inode_t *oip;
616         int error;
617         uint8_t type;
618
619         error = 0;
620         oip = NULL;
621         hmp = dip->hmp;
622         lhc = hammer2_dirhash(name, name_len);
623
624         /*
625          * Search for the filename in the directory
626          */
627         parent = &dip->chain;
628         hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
629         chain = hammer2_chain_lookup(hmp, &parent,
630                                      lhc, lhc + HAMMER2_DIRHASH_LOMASK,
631                                      0);
632         while (chain) {
633                 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE &&
634                     chain->u.ip &&
635                     name_len == chain->data->ipdata.name_len &&
636                     bcmp(name, chain->data->ipdata.filename, name_len) == 0) {
637                         break;
638                 }
639                 chain = hammer2_chain_next(hmp, &parent, chain,
640                                            lhc, lhc + HAMMER2_DIRHASH_LOMASK,
641                                            0);
642         }
643
644         /*
645          * Not found or wrong type (isdir < 0 disables the type check).
646          */
647         if (chain == NULL) {
648                 hammer2_chain_unlock(hmp, parent);
649                 return ENOENT;
650         }
651         if ((type = chain->data->ipdata.type) == HAMMER2_OBJTYPE_HARDLINK)
652                 type = chain->data->ipdata.target_type;
653
654         if (type == HAMMER2_OBJTYPE_DIRECTORY && isdir == 0) {
655                 error = ENOTDIR;
656                 goto done;
657         }
658         if (type != HAMMER2_OBJTYPE_DIRECTORY && isdir == 1) {
659                 error = EISDIR;
660                 goto done;
661         }
662
663         /*
664          * Hardlink must be resolved.  We can't hold parent locked while we
665          * do this or we could deadlock.
666          */
667         if (chain->data->ipdata.type == HAMMER2_OBJTYPE_HARDLINK) {
668                 hammer2_chain_unlock(hmp, parent);
669                 parent = NULL;
670                 error = hammer2_hardlink_find(dip, &chain, &oip);
671         }
672
673         /*
674          * If this is a directory the directory must be empty.  However, if
675          * isdir < 0 we are doing a rename and the directory does not have
676          * to be empty.
677          *
678          * NOTE: We check the full key range here which covers both visible
679          *       and invisible entries.  Theoretically there should be no
680          *       invisible (hardlink target) entries if there are no visible
681          *       entries.
682          */
683         if (type == HAMMER2_OBJTYPE_DIRECTORY && isdir >= 0) {
684                 dparent = chain;
685                 hammer2_chain_lock(hmp, dparent, HAMMER2_RESOLVE_ALWAYS);
686                 dchain = hammer2_chain_lookup(hmp, &dparent,
687                                               0, (hammer2_key_t)-1,
688                                               HAMMER2_LOOKUP_NODATA);
689                 if (dchain) {
690                         hammer2_chain_unlock(hmp, dchain);
691                         hammer2_chain_unlock(hmp, dparent);
692                         error = ENOTEMPTY;
693                         goto done;
694                 }
695                 hammer2_chain_unlock(hmp, dparent);
696                 dparent = NULL;
697                 /* dchain NULL */
698         }
699
700         /*
701          * Ok, we can now unlink the chain.  We always decrement nlinks even
702          * if the entry can be deleted in case someone has the file open and
703          * does an fstat().
704          *
705          * The chain itself will no longer be in the on-media topology but
706          * can still be flushed to the media (e.g. if an open descriptor
707          * remains).  When the last vnode/ip ref goes away the chain will
708          * be marked unmodified, avoiding any further (now unnecesary) I/O.
709          */
710         if (oip) {
711                 /*
712                  * If this was a hardlink we first delete the hardlink
713                  * pointer entry.
714                  */
715                 parent = oip->chain.parent;
716                 hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
717                 hammer2_chain_lock(hmp, &oip->chain, HAMMER2_RESOLVE_ALWAYS);
718                 hammer2_chain_delete(hmp, parent, &oip->chain,
719                                     (retain_ip == oip));
720                 hammer2_chain_unlock(hmp, &oip->chain);
721                 hammer2_chain_unlock(hmp, parent);
722                 parent = NULL;
723
724                 /*
725                  * Then decrement nlinks on hardlink target.
726                  */
727                 ip = chain->u.ip;
728                 if (ip->ip_data.nlinks == 1) {
729                         dparent = chain->parent;
730                         hammer2_chain_ref(hmp, chain);
731                         hammer2_chain_unlock(hmp, chain);
732                         hammer2_chain_lock(hmp, dparent,
733                                            HAMMER2_RESOLVE_ALWAYS);
734                         hammer2_chain_lock(hmp, chain, HAMMER2_RESOLVE_ALWAYS);
735                         hammer2_chain_drop(hmp, chain);
736                         hammer2_chain_modify(hmp, chain, 0);
737                         --ip->ip_data.nlinks;
738                         hammer2_chain_delete(hmp, dparent, chain, 0);
739                         hammer2_chain_unlock(hmp, dparent);
740                 } else {
741                         hammer2_chain_modify(hmp, chain, 0);
742                         --ip->ip_data.nlinks;
743                 }
744         } else {
745                 /*
746                  * Otherwise this was not a hardlink and we can just
747                  * remove the entry and decrement nlinks.
748                  */
749                 ip = chain->u.ip;
750                 hammer2_chain_modify(hmp, chain, 0);
751                 --ip->ip_data.nlinks;
752                 hammer2_chain_delete(hmp, parent, chain,
753                                      (retain_ip == ip));
754         }
755
756         error = 0;
757
758 done:
759         if (chain)
760                 hammer2_chain_unlock(hmp, chain);
761         if (parent)
762                 hammer2_chain_unlock(hmp, parent);
763         if (oip)
764                 hammer2_chain_drop(oip->hmp, &oip->chain);
765
766         return error;
767 }
768
769 /*
770  * Calculate the allocation size for the file fragment straddling EOF
771  */
772 int
773 hammer2_inode_calc_alloc(hammer2_key_t filesize)
774 {
775         int frag = (int)filesize & HAMMER2_PBUFMASK;
776         int radix;
777
778         if (frag == 0)
779                 return(0);
780         for (radix = HAMMER2_MINALLOCRADIX; frag > (1 << radix); ++radix)
781                 ;
782         return (radix);
783 }
784
785 void
786 hammer2_inode_lock_nlinks(hammer2_inode_t *ip)
787 {
788         hammer2_chain_ref(ip->hmp, &ip->chain);
789 }
790
791 void
792 hammer2_inode_unlock_nlinks(hammer2_inode_t *ip)
793 {
794         hammer2_chain_drop(ip->hmp, &ip->chain);
795 }
796
797 /*
798  * Consolidate for hard link creation.  This moves the specified terminal
799  * hardlink inode to a directory common to its current directory and tdip
800  * if necessary, replacing *ipp with the new inode chain element and
801  * modifying the original inode chain element to OBJTYPE_HARDLINK.
802  *
803  * If the original inode chain element was a prior incarnation of a hidden
804  * inode it can simply be deleted instead of converted.
805  *
806  * (*ipp)'s nlinks field is locked on entry and the new (*ipp)'s nlinks
807  * field will be locked on return (with the original's unlocked).
808  *
809  * The link count is bumped if requested.
810  */
811 int
812 hammer2_hardlink_consolidate(hammer2_inode_t **ipp, hammer2_inode_t *tdip)
813 {
814         hammer2_mount_t *hmp;
815         hammer2_inode_t *oip = *ipp;
816         hammer2_inode_t *nip = NULL;
817         hammer2_inode_t *fdip;
818         hammer2_chain_t *parent;
819         int error;
820
821         hmp = tdip->hmp;
822
823         if (hammer2_hardlink_enable < 0)
824                 return (0);
825         if (hammer2_hardlink_enable == 0)
826                 return (ENOTSUP);
827
828         /*
829          * Find the common parent directory
830          */
831         fdip = oip->pip;
832         while (fdip->depth > tdip->depth) {
833                 fdip = fdip->pip;
834                 KKASSERT(fdip != NULL);
835         }
836         while (tdip->depth > fdip->depth) {
837                 tdip = tdip->pip;
838                 KKASSERT(tdip != NULL);
839         }
840         while (fdip != tdip) {
841                 fdip = fdip->pip;
842                 tdip = tdip->pip;
843                 KKASSERT(fdip != NULL);
844                 KKASSERT(tdip != NULL);
845         }
846
847         /*
848          * Nothing to do (except bump the link count) if the hardlink has
849          * already been consolidated in the correct place.
850          */
851         if (oip->pip == fdip &&
852             (oip->ip_data.name_key & HAMMER2_DIRHASH_VISIBLE) == 0) {
853                 kprintf("hardlink already consolidated correctly\n");
854                 nip = oip;
855                 hammer2_inode_lock_ex(nip);
856                 hammer2_chain_modify(hmp, &nip->chain, 0);
857                 ++nip->ip_data.nlinks;
858                 hammer2_inode_unlock_ex(nip);
859                 return (0);
860         }
861
862         /*
863          * Create a hidden inode directory entry in the parent, copying
864          * (*oip)'s state.  Then replace oip with OBJTYPE_HARDLINK.
865          *
866          * The duplication function will either flush or move any chains
867          * under oip to the new hardlink target inode, retiring all chains
868          * related to oip before returning.  XXX vp->ip races.
869          */
870         error = hammer2_inode_duplicate(fdip, oip, &nip, NULL, 0);
871         if (error == 0) {
872                 /*
873                  * Bump nlinks on duplicated hidden inode.
874                  */
875                 kprintf("hardlink consolidation success in parent dir %s\n",
876                         fdip->ip_data.filename);
877                 hammer2_inode_lock_nlinks(nip);
878                 hammer2_inode_unlock_nlinks(oip);
879                 hammer2_chain_modify(hmp, &nip->chain, 0);
880                 ++nip->ip_data.nlinks;
881                 hammer2_inode_unlock_ex(nip);
882
883                 if (oip->ip_data.name_key & HAMMER2_DIRHASH_VISIBLE) {
884                         /*
885                          * Replace the old inode with an OBJTYPE_HARDLINK
886                          * pointer.
887                          */
888                         hammer2_inode_lock_ex(oip);
889                         hammer2_chain_modify(hmp, &oip->chain, 0);
890                         oip->ip_data.target_type = oip->ip_data.type;
891                         oip->ip_data.type = HAMMER2_OBJTYPE_HARDLINK;
892                         oip->ip_data.uflags = 0;
893                         oip->ip_data.rmajor = 0;
894                         oip->ip_data.rminor = 0;
895                         oip->ip_data.ctime = 0;
896                         oip->ip_data.mtime = 0;
897                         oip->ip_data.atime = 0;
898                         oip->ip_data.btime = 0;
899                         bzero(&oip->ip_data.uid, sizeof(oip->ip_data.uid));
900                         bzero(&oip->ip_data.gid, sizeof(oip->ip_data.gid));
901                         oip->ip_data.op_flags = HAMMER2_OPFLAG_DIRECTDATA;
902                         oip->ip_data.cap_flags = 0;
903                         oip->ip_data.mode = 0;
904                         oip->ip_data.size = 0;
905                         oip->ip_data.nlinks = 1;
906                         oip->ip_data.iparent = 0;       /* XXX */
907                         oip->ip_data.pfs_type = 0;
908                         oip->ip_data.pfs_inum = 0;
909                         bzero(&oip->ip_data.pfs_id,
910                               sizeof(oip->ip_data.pfs_id));
911                         bzero(&oip->ip_data.pfs_fsid,
912                               sizeof(oip->ip_data.pfs_fsid));
913                         oip->ip_data.data_quota = 0;
914                         oip->ip_data.data_count = 0;
915                         oip->ip_data.inode_quota = 0;
916                         oip->ip_data.inode_count = 0;
917                         oip->ip_data.attr_tid = 0;
918                         oip->ip_data.dirent_tid = 0;
919                         bzero(&oip->ip_data.u, sizeof(oip->ip_data.u));
920                         /* XXX transaction ids */
921
922                         hammer2_inode_unlock_ex(oip);
923                 } else {
924                         /*
925                          * The old inode was a hardlink target, which we
926                          * have now moved.  We must delete it so the new
927                          * hardlink target at a higher directory level
928                          * becomes the only hardlink target for this inode.
929                          */
930                         kprintf("DELETE INVISIBLE\n");
931                         parent = oip->chain.parent;
932                         hammer2_chain_lock(hmp, parent,
933                                            HAMMER2_RESOLVE_ALWAYS);
934                         hammer2_chain_lock(hmp, &oip->chain,
935                                            HAMMER2_RESOLVE_ALWAYS);
936                         hammer2_chain_delete(hmp, parent, &oip->chain, 0);
937                         hammer2_chain_unlock(hmp, &oip->chain);
938                         hammer2_chain_unlock(hmp, parent);
939                 }
940                 *ipp = nip;
941         } else {
942                 KKASSERT(nip == NULL);
943         }
944
945         return (error);
946 }
947
948 /*
949  * If (*ipp) is non-NULL it points to the forward OBJTYPE_HARDLINK inode while
950  * (*chainp) points to the resolved (hidden hardlink target) inode.  In this
951  * situation when nlinks is 1 we wish to deconsolidate the hardlink, moving
952  * it back to the directory that now represents the only remaining link.
953  */
954 int
955 hammer2_hardlink_deconsolidate(hammer2_inode_t *dip, hammer2_chain_t **chainp,
956                                hammer2_inode_t **ipp)
957 {
958         if (*ipp == NULL)
959                 return (0);
960         /* XXX */
961         return (0);
962 }
963
964 /*
965  * When presented with a (*chainp) representing an inode of type
966  * OBJTYPE_HARDLINK this code will save the original inode (with a ref)
967  * in (*ipp), and then locate the hidden hardlink target in (dip) or
968  * any parent directory above (dip).  The locked (*chainp) is replaced
969  * with a new locked (*chainp) representing the hardlink target.
970  */
971 int
972 hammer2_hardlink_find(hammer2_inode_t *dip, hammer2_chain_t **chainp,
973                       hammer2_inode_t **ipp)
974 {
975         hammer2_mount_t *hmp = dip->hmp;
976         hammer2_chain_t *chain = *chainp;
977         hammer2_chain_t *parent;
978         hammer2_inode_t *pip;
979         hammer2_key_t lhc;
980
981         *ipp = chain->u.ip;
982         hammer2_inode_ref(chain->u.ip);
983         lhc = chain->u.ip->ip_data.inum;
984
985         hammer2_inode_unlock_ex(chain->u.ip);
986         pip = chain->u.ip->pip;
987
988         chain = NULL;
989         while (pip) {
990                 parent = &pip->chain;
991                 KKASSERT(parent->bref.type == HAMMER2_BREF_TYPE_INODE);
992
993                 hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
994                 chain = hammer2_chain_lookup(hmp, &parent, lhc, lhc, 0);
995                 hammer2_chain_unlock(hmp, parent);
996                 if (chain)
997                         break;
998                 pip = pip->pip;
999         }
1000         *chainp = chain;
1001         if (chain) {
1002                 KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_INODE);
1003                 /* already locked */
1004                 return (0);
1005         } else {
1006                 return (EIO);
1007         }
1008 }