hammer2 - Starting refactoring PFS management in mount
[dragonfly.git] / sys / vfs / hammer2 / hammer2_inode.c
1 /*
2  * Copyright (c) 2011-2014 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  * 3. Neither the name of The DragonFly Project nor the names of its
19  *    contributors may be used to endorse or promote products derived
20  *    from this software without specific, prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
26  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 #include <sys/cdefs.h>
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/types.h>
39 #include <sys/lock.h>
40 #include <sys/uuid.h>
41
42 #include "hammer2.h"
43
44 #define INODE_DEBUG     0
45
46 static void hammer2_inode_move_to_hidden(hammer2_trans_t *trans,
47                                          hammer2_cluster_t **cparentp,
48                                          hammer2_cluster_t **clusterp,
49                                          hammer2_tid_t inum);
50
51 RB_GENERATE2(hammer2_inode_tree, hammer2_inode, rbnode, hammer2_inode_cmp,
52              hammer2_tid_t, inum);
53
54 int
55 hammer2_inode_cmp(hammer2_inode_t *ip1, hammer2_inode_t *ip2)
56 {
57         if (ip1->inum < ip2->inum)
58                 return(-1);
59         if (ip1->inum > ip2->inum)
60                 return(1);
61         return(0);
62 }
63
64 /*
65  * HAMMER2 inode locks
66  *
67  * HAMMER2 offers shared locks and exclusive locks on inodes.
68  *
69  * The standard exclusive inode lock always resolves the inode meta-data,
70  * but there is a bypass version used by the vnode reclamation code that
71  * avoids the I/O.
72  *
73  * The inode locking function locks the inode itself, resolves any stale
74  * chains in the inode's cluster, and allocates a fresh copy of the
75  * cluster with 1 ref and all the underlying chains locked.  Duplication
76  * races are handled by this function.
77  *
78  * ip->cluster will be stable while the inode is locked.
79  *
80  * NOTE: We don't combine the inode/chain lock because putting away an
81  *       inode would otherwise confuse multiple lock holders of the inode.
82  *
83  * NOTE: In-memory inodes always point to hardlink targets (the actual file),
84  *       and never point to a hardlink pointer.
85  *
86  * NOTE: Caller must not passed HAMMER2_RESOLVE_NOREF because we use it
87  *       internally and refs confusion will ensue.
88  */
89 hammer2_cluster_t *
90 hammer2_inode_lock_ex(hammer2_inode_t *ip)
91 {
92         return hammer2_inode_lock_nex(ip, HAMMER2_RESOLVE_ALWAYS);
93 }
94
95 hammer2_cluster_t *
96 hammer2_inode_lock_nex(hammer2_inode_t *ip, int how)
97 {
98         hammer2_cluster_t *cluster;
99
100         KKASSERT((how & HAMMER2_RESOLVE_NOREF) == 0);
101
102         hammer2_inode_ref(ip);
103         hammer2_mtx_ex(&ip->lock);
104
105         /*
106          * Create a copy of ip->cluster and lock it.  Note that the copy
107          * will have a ref on the cluster AND its chains and we don't want
108          * a second ref to either when we lock it.
109          *
110          * The copy will not have a focus until it is locked.
111          *
112          * We save the focused chain in our embedded ip->cluster for now XXX.
113          */
114         cluster = hammer2_cluster_copy(&ip->cluster);
115         hammer2_cluster_lock(cluster, how | HAMMER2_RESOLVE_NOREF);
116         ip->cluster.focus = cluster->focus;
117
118         /*
119          * Returned cluster must resolve hardlink pointers
120          */
121         if ((how & HAMMER2_RESOLVE_MASK) == HAMMER2_RESOLVE_ALWAYS) {
122                 const hammer2_inode_data_t *ripdata;
123                 ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
124                 KKASSERT(ripdata->type != HAMMER2_OBJTYPE_HARDLINK);
125                 /*
126                 if (ripdata->type == HAMMER2_OBJTYPE_HARDLINK &&
127                     (cluster->focus->flags & HAMMER2_CHAIN_DELETED) == 0) {
128                         error = hammer2_hardlink_find(ip->pip, NULL, cluster);
129                         KKASSERT(error == 0);
130                 }
131                 */
132         }
133         return (cluster);
134 }
135
136 void
137 hammer2_inode_unlock_ex(hammer2_inode_t *ip, hammer2_cluster_t *cluster)
138 {
139         if (cluster)
140                 hammer2_cluster_unlock(cluster);
141         hammer2_mtx_unlock(&ip->lock);
142         hammer2_inode_drop(ip);
143 }
144
145 /*
146  * Standard shared inode lock always resolves the inode meta-data.
147  *
148  * NOTE: We don't combine the inode/chain lock because putting away an
149  *       inode would otherwise confuse multiple lock holders of the inode.
150  *
151  *       Shared locks are especially sensitive to having too many shared
152  *       lock counts (from the same thread) on certain paths which might
153  *       need to upgrade them.  Only one count of a shared lock can be
154  *       upgraded.
155  */
156 hammer2_cluster_t *
157 hammer2_inode_lock_sh(hammer2_inode_t *ip)
158 {
159         const hammer2_inode_data_t *ripdata;
160         hammer2_cluster_t *cluster;
161
162         hammer2_inode_ref(ip);
163         hammer2_mtx_sh(&ip->lock);
164
165         /*
166          * Create a copy of ip->cluster and lock it.  Note that the copy
167          * will have a ref on the cluster AND its chains and we don't want
168          * a second ref to either when we lock it.
169          *
170          * The copy will not have a focus until it is locked.
171          */
172         cluster = hammer2_cluster_copy(&ip->cluster);
173         hammer2_cluster_lock(cluster, HAMMER2_RESOLVE_ALWAYS |
174                                       HAMMER2_RESOLVE_SHARED |
175                                       HAMMER2_RESOLVE_NOREF);
176         /* do not update ip->cluster.focus on a shared inode lock! */
177         /*ip->cluster.focus = cluster->focus;*/
178
179         /*
180          * Returned cluster must resolve hardlink pointers
181          */
182         ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
183         KKASSERT(ripdata->type != HAMMER2_OBJTYPE_HARDLINK);
184         /*
185         if (ripdata->type == HAMMER2_OBJTYPE_HARDLINK &&
186             (cluster->focus->flags & HAMMER2_CHAIN_DELETED) == 0) {
187                 error = hammer2_hardlink_find(ip->pip, NULL, cluster);
188                 KKASSERT(error == 0);
189         }
190         */
191
192         return (cluster);
193 }
194
195 void
196 hammer2_inode_unlock_sh(hammer2_inode_t *ip, hammer2_cluster_t *cluster)
197 {
198         if (cluster)
199                 hammer2_cluster_unlock(cluster);
200         hammer2_mtx_unlock(&ip->lock);
201         hammer2_inode_drop(ip);
202 }
203
204 /*
205  * Temporarily release a lock held shared or exclusive.  Caller must
206  * hold the lock shared or exclusive on call and lock will be released
207  * on return.
208  *
209  * Restore a lock that was temporarily released.
210  */
211 hammer2_mtx_state_t
212 hammer2_inode_lock_temp_release(hammer2_inode_t *ip)
213 {
214         return hammer2_mtx_temp_release(&ip->lock);
215 }
216
217 void
218 hammer2_inode_lock_temp_restore(hammer2_inode_t *ip, hammer2_mtx_state_t ostate)
219 {
220         hammer2_mtx_temp_restore(&ip->lock, ostate);
221 }
222
223 /*
224  * Upgrade a shared inode lock to exclusive and return.  If the inode lock
225  * is already held exclusively this is a NOP.
226  *
227  * The caller MUST hold the inode lock either shared or exclusive on call
228  * and will own the lock exclusively on return.
229  *
230  * Returns non-zero if the lock was already exclusive prior to the upgrade.
231  */
232 int
233 hammer2_inode_lock_upgrade(hammer2_inode_t *ip)
234 {
235         int wasexclusive;
236
237         if (mtx_islocked_ex(&ip->lock)) {
238                 wasexclusive = 1;
239         } else {
240                 hammer2_mtx_unlock(&ip->lock);
241                 hammer2_mtx_ex(&ip->lock);
242                 wasexclusive = 0;
243         }
244         return wasexclusive;
245 }
246
247 /*
248  * Downgrade an inode lock from exclusive to shared only if the inode
249  * lock was previously shared.  If the inode lock was previously exclusive,
250  * this is a NOP.
251  */
252 void
253 hammer2_inode_lock_downgrade(hammer2_inode_t *ip, int wasexclusive)
254 {
255         if (wasexclusive == 0)
256                 mtx_downgrade(&ip->lock);
257 }
258
259 /*
260  * Lookup an inode by inode number
261  */
262 hammer2_inode_t *
263 hammer2_inode_lookup(hammer2_pfs_t *pmp, hammer2_tid_t inum)
264 {
265         hammer2_inode_t *ip;
266
267         KKASSERT(pmp);
268         if (pmp->spmp_hmp) {
269                 ip = NULL;
270         } else {
271                 hammer2_spin_ex(&pmp->inum_spin);
272                 ip = RB_LOOKUP(hammer2_inode_tree, &pmp->inum_tree, inum);
273                 if (ip)
274                         hammer2_inode_ref(ip);
275                 hammer2_spin_unex(&pmp->inum_spin);
276         }
277         return(ip);
278 }
279
280 /*
281  * Adding a ref to an inode is only legal if the inode already has at least
282  * one ref.
283  *
284  * (can be called with spinlock held)
285  */
286 void
287 hammer2_inode_ref(hammer2_inode_t *ip)
288 {
289         atomic_add_int(&ip->refs, 1);
290 }
291
292 /*
293  * Drop an inode reference, freeing the inode when the last reference goes
294  * away.
295  */
296 void
297 hammer2_inode_drop(hammer2_inode_t *ip)
298 {
299         hammer2_pfs_t *pmp;
300         hammer2_inode_t *pip;
301         u_int refs;
302
303         while (ip) {
304                 refs = ip->refs;
305                 cpu_ccfence();
306                 if (refs == 1) {
307                         /*
308                          * Transition to zero, must interlock with
309                          * the inode inumber lookup tree (if applicable).
310                          * It should not be possible for anyone to race
311                          * the transition to 0.
312                          *
313                          */
314                         pmp = ip->pmp;
315                         KKASSERT(pmp);
316                         hammer2_spin_ex(&pmp->inum_spin);
317
318                         if (atomic_cmpset_int(&ip->refs, 1, 0)) {
319                                 KKASSERT(hammer2_mtx_refs(&ip->lock) == 0);
320                                 if (ip->flags & HAMMER2_INODE_ONRBTREE) {
321                                         atomic_clear_int(&ip->flags,
322                                                      HAMMER2_INODE_ONRBTREE);
323                                         RB_REMOVE(hammer2_inode_tree,
324                                                   &pmp->inum_tree, ip);
325                                 }
326                                 hammer2_spin_unex(&pmp->inum_spin);
327
328                                 pip = ip->pip;
329                                 ip->pip = NULL;
330                                 ip->pmp = NULL;
331
332                                 /*
333                                  * Cleaning out ip->cluster isn't entirely
334                                  * trivial.
335                                  */
336                                 hammer2_inode_repoint(ip, NULL, NULL);
337
338                                 /*
339                                  * We have to drop pip (if non-NULL) to
340                                  * dispose of our implied reference from
341                                  * ip->pip.  We can simply loop on it.
342                                  */
343                                 kfree(ip, pmp->minode);
344                                 atomic_add_long(&pmp->inmem_inodes, -1);
345                                 ip = pip;
346                                 /* continue with pip (can be NULL) */
347                         } else {
348                                 hammer2_spin_unex(&ip->pmp->inum_spin);
349                         }
350                 } else {
351                         /*
352                          * Non zero transition
353                          */
354                         if (atomic_cmpset_int(&ip->refs, refs, refs - 1))
355                                 break;
356                 }
357         }
358 }
359
360 /*
361  * Get the vnode associated with the given inode, allocating the vnode if
362  * necessary.  The vnode will be returned exclusively locked.
363  *
364  * The caller must lock the inode (shared or exclusive).
365  *
366  * Great care must be taken to avoid deadlocks and vnode acquisition/reclaim
367  * races.
368  */
369 struct vnode *
370 hammer2_igetv(hammer2_inode_t *ip, hammer2_cluster_t *cparent, int *errorp)
371 {
372         const hammer2_inode_data_t *ripdata;
373         hammer2_pfs_t *pmp;
374         struct vnode *vp;
375
376         pmp = ip->pmp;
377         KKASSERT(pmp != NULL);
378         *errorp = 0;
379
380         ripdata = &hammer2_cluster_rdata(cparent)->ipdata;
381
382         for (;;) {
383                 /*
384                  * Attempt to reuse an existing vnode assignment.  It is
385                  * possible to race a reclaim so the vget() may fail.  The
386                  * inode must be unlocked during the vget() to avoid a
387                  * deadlock against a reclaim.
388                  */
389                 int wasexclusive;
390
391                 vp = ip->vp;
392                 if (vp) {
393                         /*
394                          * Inode must be unlocked during the vget() to avoid
395                          * possible deadlocks, but leave the ip ref intact.
396                          *
397                          * vnode is held to prevent destruction during the
398                          * vget().  The vget() can still fail if we lost
399                          * a reclaim race on the vnode.
400                          */
401                         hammer2_mtx_state_t ostate;
402
403                         vhold(vp);
404                         ostate = hammer2_inode_lock_temp_release(ip);
405                         if (vget(vp, LK_EXCLUSIVE)) {
406                                 vdrop(vp);
407                                 hammer2_inode_lock_temp_restore(ip, ostate);
408                                 continue;
409                         }
410                         hammer2_inode_lock_temp_restore(ip, ostate);
411                         vdrop(vp);
412                         /* vp still locked and ref from vget */
413                         if (ip->vp != vp) {
414                                 kprintf("hammer2: igetv race %p/%p\n",
415                                         ip->vp, vp);
416                                 vput(vp);
417                                 continue;
418                         }
419                         *errorp = 0;
420                         break;
421                 }
422
423                 /*
424                  * No vnode exists, allocate a new vnode.  Beware of
425                  * allocation races.  This function will return an
426                  * exclusively locked and referenced vnode.
427                  */
428                 *errorp = getnewvnode(VT_HAMMER2, pmp->mp, &vp, 0, 0);
429                 if (*errorp) {
430                         kprintf("hammer2: igetv getnewvnode failed %d\n",
431                                 *errorp);
432                         vp = NULL;
433                         break;
434                 }
435
436                 /*
437                  * Lock the inode and check for an allocation race.
438                  */
439                 wasexclusive = hammer2_inode_lock_upgrade(ip);
440                 if (ip->vp != NULL) {
441                         vp->v_type = VBAD;
442                         vx_put(vp);
443                         hammer2_inode_lock_downgrade(ip, wasexclusive);
444                         continue;
445                 }
446
447                 switch (ripdata->type) {
448                 case HAMMER2_OBJTYPE_DIRECTORY:
449                         vp->v_type = VDIR;
450                         break;
451                 case HAMMER2_OBJTYPE_REGFILE:
452                         vp->v_type = VREG;
453                         vinitvmio(vp, ripdata->size,
454                                   HAMMER2_LBUFSIZE,
455                                   (int)ripdata->size & HAMMER2_LBUFMASK);
456                         break;
457                 case HAMMER2_OBJTYPE_SOFTLINK:
458                         /*
459                          * XXX for now we are using the generic file_read
460                          * and file_write code so we need a buffer cache
461                          * association.
462                          */
463                         vp->v_type = VLNK;
464                         vinitvmio(vp, ripdata->size,
465                                   HAMMER2_LBUFSIZE,
466                                   (int)ripdata->size & HAMMER2_LBUFMASK);
467                         break;
468                 case HAMMER2_OBJTYPE_CDEV:
469                         vp->v_type = VCHR;
470                         /* fall through */
471                 case HAMMER2_OBJTYPE_BDEV:
472                         vp->v_ops = &pmp->mp->mnt_vn_spec_ops;
473                         if (ripdata->type != HAMMER2_OBJTYPE_CDEV)
474                                 vp->v_type = VBLK;
475                         addaliasu(vp, ripdata->rmajor, ripdata->rminor);
476                         break;
477                 case HAMMER2_OBJTYPE_FIFO:
478                         vp->v_type = VFIFO;
479                         vp->v_ops = &pmp->mp->mnt_vn_fifo_ops;
480                         break;
481                 default:
482                         panic("hammer2: unhandled objtype %d", ripdata->type);
483                         break;
484                 }
485
486                 if (ip == pmp->iroot)
487                         vsetflags(vp, VROOT);
488
489                 vp->v_data = ip;
490                 ip->vp = vp;
491                 hammer2_inode_ref(ip);          /* vp association */
492                 hammer2_inode_lock_downgrade(ip, wasexclusive);
493                 break;
494         }
495
496         /*
497          * Return non-NULL vp and *errorp == 0, or NULL vp and *errorp != 0.
498          */
499         if (hammer2_debug & 0x0002) {
500                 kprintf("igetv vp %p refs 0x%08x aux 0x%08x\n",
501                         vp, vp->v_refcnt, vp->v_auxrefs);
502         }
503         return (vp);
504 }
505
506 /*
507  * Returns the inode associated with the passed-in cluster, creating the
508  * inode if necessary and synchronizing it to the passed-in cluster otherwise.
509  *
510  * The passed-in chain must be locked and will remain locked on return.
511  * The returned inode will be locked and the caller may dispose of both
512  * via hammer2_inode_unlock_ex().  However, if the caller needs to resolve
513  * a hardlink it must ref/unlock/relock/drop the inode.
514  *
515  * The hammer2_inode structure regulates the interface between the high level
516  * kernel VNOPS API and the filesystem backend (the chains).
517  */
518 hammer2_inode_t *
519 hammer2_inode_get(hammer2_pfs_t *pmp, hammer2_inode_t *dip,
520                   hammer2_cluster_t *cluster)
521 {
522         hammer2_inode_t *nip;
523         const hammer2_inode_data_t *iptmp;
524         const hammer2_inode_data_t *nipdata;
525
526         KKASSERT(cluster == NULL ||
527                  hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE);
528         KKASSERT(pmp);
529
530         /*
531          * Interlocked lookup/ref of the inode.  This code is only needed
532          * when looking up inodes with nlinks != 0 (TODO: optimize out
533          * otherwise and test for duplicates).
534          *
535          * Cluster can be NULL during the initial pfs allocation.
536          */
537 again:
538         while (cluster) {
539                 iptmp = &hammer2_cluster_rdata(cluster)->ipdata;
540                 nip = hammer2_inode_lookup(pmp, iptmp->inum);
541                 if (nip == NULL)
542                         break;
543
544                 hammer2_mtx_ex(&nip->lock);
545
546                 /*
547                  * Handle SMP race (not applicable to the super-root spmp
548                  * which can't index inodes due to duplicative inode numbers).
549                  */
550                 if (pmp->spmp_hmp == NULL &&
551                     (nip->flags & HAMMER2_INODE_ONRBTREE) == 0) {
552                         hammer2_mtx_unlock(&nip->lock);
553                         hammer2_inode_drop(nip);
554                         continue;
555                 }
556                 hammer2_inode_repoint(nip, NULL, cluster);
557                 return nip;
558         }
559
560         /*
561          * We couldn't find the inode number, create a new inode.
562          */
563         nip = kmalloc(sizeof(*nip), pmp->minode, M_WAITOK | M_ZERO);
564         atomic_add_long(&pmp->inmem_inodes, 1);
565         hammer2_pfs_memory_inc(pmp);
566         hammer2_pfs_memory_wakeup(pmp);
567         if (pmp->spmp_hmp)
568                 nip->flags = HAMMER2_INODE_SROOT;
569
570         /*
571          * Initialize nip's cluster.  A cluster is provided for normal
572          * inodes but typically not for the super-root or PFS inodes.
573          */
574         nip->cluster.refs = 1;
575         nip->cluster.pmp = pmp;
576         nip->cluster.flags |= HAMMER2_CLUSTER_INODE;
577         if (cluster) {
578                 hammer2_cluster_replace(&nip->cluster, cluster);
579                 nipdata = &hammer2_cluster_rdata(cluster)->ipdata;
580                 nip->inum = nipdata->inum;
581                 nip->size = nipdata->size;
582                 nip->mtime = nipdata->mtime;
583                 hammer2_inode_repoint(nip, NULL, cluster);
584         } else {
585                 nip->inum = 1;                  /* PFS inum is always 1 XXX */
586                 /* mtime will be updated when a cluster is available */
587         }
588
589         nip->pip = dip;                         /* can be NULL */
590         if (dip)
591                 hammer2_inode_ref(dip); /* ref dip for nip->pip */
592
593         nip->pmp = pmp;
594
595         /*
596          * ref and lock on nip gives it state compatible to after a
597          * hammer2_inode_lock_ex() call.
598          */
599         nip->refs = 1;
600         hammer2_mtx_init(&nip->lock, "h2inode");
601         hammer2_mtx_ex(&nip->lock);
602         /* combination of thread lock and chain lock == inode lock */
603
604         /*
605          * Attempt to add the inode.  If it fails we raced another inode
606          * get.  Undo all the work and try again.
607          */
608         if (pmp->spmp_hmp == NULL) {
609                 hammer2_spin_ex(&pmp->inum_spin);
610                 if (RB_INSERT(hammer2_inode_tree, &pmp->inum_tree, nip)) {
611                         hammer2_spin_unex(&pmp->inum_spin);
612                         hammer2_mtx_unlock(&nip->lock);
613                         hammer2_inode_drop(nip);
614                         goto again;
615                 }
616                 atomic_set_int(&nip->flags, HAMMER2_INODE_ONRBTREE);
617                 hammer2_spin_unex(&pmp->inum_spin);
618         }
619
620         return (nip);
621 }
622
623 /*
624  * Create a new inode in the specified directory using the vattr to
625  * figure out the type of inode.
626  *
627  * If no error occurs the new inode with its cluster locked is returned in
628  * *nipp, otherwise an error is returned and *nipp is set to NULL.
629  *
630  * If vap and/or cred are NULL the related fields are not set and the
631  * inode type defaults to a directory.  This is used when creating PFSs
632  * under the super-root, so the inode number is set to 1 in this case.
633  *
634  * dip is not locked on entry.
635  *
636  * NOTE: When used to create a snapshot, the inode is temporarily associated
637  *       with the super-root spmp. XXX should pass new pmp for snapshot.
638  */
639 hammer2_inode_t *
640 hammer2_inode_create(hammer2_trans_t *trans, hammer2_inode_t *dip,
641                      struct vattr *vap, struct ucred *cred,
642                      const uint8_t *name, size_t name_len,
643                      hammer2_cluster_t **clusterp,
644                      int flags, int *errorp)
645 {
646         const hammer2_inode_data_t *dipdata;
647         hammer2_inode_data_t *nipdata;
648         hammer2_cluster_t *cluster;
649         hammer2_cluster_t *cparent;
650         hammer2_inode_t *nip;
651         hammer2_key_t key_dummy;
652         hammer2_key_t lhc;
653         int error;
654         uid_t xuid;
655         uuid_t dip_uid;
656         uuid_t dip_gid;
657         uint32_t dip_mode;
658         uint8_t dip_comp_algo;
659         uint8_t dip_check_algo;
660         int ddflag;
661
662         lhc = hammer2_dirhash(name, name_len);
663         *errorp = 0;
664
665         /*
666          * Locate the inode or indirect block to create the new
667          * entry in.  At the same time check for key collisions
668          * and iterate until we don't get one.
669          *
670          * NOTE: hidden inodes do not have iterators.
671          */
672 retry:
673         cparent = hammer2_inode_lock_ex(dip);
674         dipdata = &hammer2_cluster_rdata(cparent)->ipdata;
675         dip_uid = dipdata->uid;
676         dip_gid = dipdata->gid;
677         dip_mode = dipdata->mode;
678         dip_comp_algo = dipdata->comp_algo;
679         dip_check_algo = dipdata->check_algo;
680
681         error = 0;
682         while (error == 0) {
683                 cluster = hammer2_cluster_lookup(cparent, &key_dummy,
684                                                  lhc, lhc, 0, &ddflag);
685                 if (cluster == NULL)
686                         break;
687                 if ((lhc & HAMMER2_DIRHASH_VISIBLE) == 0)
688                         error = ENOSPC;
689                 if ((lhc & HAMMER2_DIRHASH_LOMASK) == HAMMER2_DIRHASH_LOMASK)
690                         error = ENOSPC;
691                 hammer2_cluster_unlock(cluster);
692                 cluster = NULL;
693                 ++lhc;
694         }
695
696         if (error == 0) {
697                 error = hammer2_cluster_create(trans, cparent, &cluster,
698                                              lhc, 0,
699                                              HAMMER2_BREF_TYPE_INODE,
700                                              HAMMER2_INODE_BYTES,
701                                              flags);
702         }
703 #if INODE_DEBUG
704         kprintf("CREATE INODE %*.*s chain=%p\n",
705                 (int)name_len, (int)name_len, name,
706                 (cluster ? cluster->focus : NULL));
707 #endif
708
709         /*
710          * Cleanup and handle retries.
711          */
712         if (error == EAGAIN) {
713                 hammer2_cluster_ref(cparent);
714                 hammer2_inode_unlock_ex(dip, cparent);
715                 hammer2_cluster_wait(cparent);
716                 hammer2_cluster_drop(cparent);
717                 goto retry;
718         }
719         hammer2_inode_unlock_ex(dip, cparent);
720         cparent = NULL;
721
722         if (error) {
723                 KKASSERT(cluster == NULL);
724                 *errorp = error;
725                 return (NULL);
726         }
727
728         /*
729          * Set up the new inode.
730          *
731          * NOTE: *_get() integrates chain's lock into the inode lock.
732          *
733          * NOTE: Only one new inode can currently be created per
734          *       transaction.  If the need arises we can adjust
735          *       hammer2_trans_init() to allow more.
736          *
737          * NOTE: nipdata will have chain's blockset data.
738          */
739         KKASSERT(cluster->focus->flags & HAMMER2_CHAIN_MODIFIED);
740         nipdata = &hammer2_cluster_wdata(cluster)->ipdata;
741         nipdata->inum = trans->inode_tid;
742         hammer2_cluster_modsync(cluster);
743         nip = hammer2_inode_get(dip->pmp, dip, cluster);
744         nipdata = &hammer2_cluster_wdata(cluster)->ipdata;
745
746         if (vap) {
747                 KKASSERT(trans->inodes_created == 0);
748                 nipdata->type = hammer2_get_obj_type(vap->va_type);
749                 nipdata->inum = trans->inode_tid;
750                 ++trans->inodes_created;
751
752                 switch (nipdata->type) {
753                 case HAMMER2_OBJTYPE_CDEV:
754                 case HAMMER2_OBJTYPE_BDEV:
755                         nipdata->rmajor = vap->va_rmajor;
756                         nipdata->rminor = vap->va_rminor;
757                         break;
758                 default:
759                         break;
760                 }
761         } else {
762                 nipdata->type = HAMMER2_OBJTYPE_DIRECTORY;
763                 nipdata->inum = 1;
764         }
765         
766         /* Inherit parent's inode compression mode. */
767         nip->comp_heuristic = 0;
768         nipdata->comp_algo = dip_comp_algo;
769         nipdata->check_algo = dip_check_algo;
770         nipdata->version = HAMMER2_INODE_VERSION_ONE;
771         hammer2_update_time(&nipdata->ctime);
772         nipdata->mtime = nipdata->ctime;
773         if (vap)
774                 nipdata->mode = vap->va_mode;
775         nipdata->nlinks = 1;
776         if (vap) {
777                 if (dip && dip->pmp) {
778                         xuid = hammer2_to_unix_xid(&dip_uid);
779                         xuid = vop_helper_create_uid(dip->pmp->mp,
780                                                      dip_mode,
781                                                      xuid,
782                                                      cred,
783                                                      &vap->va_mode);
784                 } else {
785                         /* super-root has no dip and/or pmp */
786                         xuid = 0;
787                 }
788                 if (vap->va_vaflags & VA_UID_UUID_VALID)
789                         nipdata->uid = vap->va_uid_uuid;
790                 else if (vap->va_uid != (uid_t)VNOVAL)
791                         hammer2_guid_to_uuid(&nipdata->uid, vap->va_uid);
792                 else
793                         hammer2_guid_to_uuid(&nipdata->uid, xuid);
794
795                 if (vap->va_vaflags & VA_GID_UUID_VALID)
796                         nipdata->gid = vap->va_gid_uuid;
797                 else if (vap->va_gid != (gid_t)VNOVAL)
798                         hammer2_guid_to_uuid(&nipdata->gid, vap->va_gid);
799                 else if (dip)
800                         nipdata->gid = dip_gid;
801         }
802
803         /*
804          * Regular files and softlinks allow a small amount of data to be
805          * directly embedded in the inode.  This flag will be cleared if
806          * the size is extended past the embedded limit.
807          */
808         if (nipdata->type == HAMMER2_OBJTYPE_REGFILE ||
809             nipdata->type == HAMMER2_OBJTYPE_SOFTLINK) {
810                 nipdata->op_flags |= HAMMER2_OPFLAG_DIRECTDATA;
811         }
812
813         KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
814         bcopy(name, nipdata->filename, name_len);
815         nipdata->name_key = lhc;
816         nipdata->name_len = name_len;
817         hammer2_cluster_modsync(cluster);
818         *clusterp = cluster;
819
820         return (nip);
821 }
822
823 /*
824  * The cluster has been removed from the original directory and replaced
825  * with a hardlink pointer.  Move the cluster to the specified parent
826  * directory, change the filename to "0xINODENUMBER", and adjust the key.
827  * The cluster becomes our invisible hardlink target.
828  *
829  * The original cluster must be deleted on entry.
830  */
831 static
832 void
833 hammer2_hardlink_shiftup(hammer2_trans_t *trans, hammer2_cluster_t *cluster,
834                         hammer2_inode_t *dip, hammer2_cluster_t *dcluster,
835                         int nlinks, int *errorp)
836 {
837         const hammer2_inode_data_t *iptmp;
838         hammer2_inode_data_t *nipdata;
839         hammer2_cluster_t *xcluster;
840         hammer2_key_t key_dummy;
841         hammer2_key_t lhc;
842         hammer2_blockref_t bref;
843         int ddflag;
844
845         iptmp = &hammer2_cluster_rdata(cluster)->ipdata;
846         lhc = iptmp->inum;
847         KKASSERT((lhc & HAMMER2_DIRHASH_VISIBLE) == 0);
848
849         /*
850          * Locate the inode or indirect block to create the new
851          * entry in.  lhc represents the inode number so there is
852          * no collision iteration.
853          *
854          * There should be no key collisions with invisible inode keys.
855          *
856          * WARNING! Must use inode_lock_ex() on dip to handle a stale
857          *          dip->cluster cache.
858          */
859         *errorp = 0;
860         xcluster = hammer2_cluster_lookup(dcluster, &key_dummy,
861                                       lhc, lhc, 0, &ddflag);
862         if (xcluster) {
863                 kprintf("X3 chain %p dip %p dchain %p dip->chain %p\n",
864                         xcluster->focus, dip, dcluster->focus,
865                         dip->cluster.focus);
866                 hammer2_cluster_unlock(xcluster);
867                 xcluster = NULL;
868                 *errorp = ENOSPC;
869 #if 0
870                 Debugger("X3");
871 #endif
872         }
873
874         /*
875          * Handle the error case
876          */
877         if (*errorp) {
878                 panic("error2");
879                 KKASSERT(xcluster == NULL);
880                 return;
881         }
882
883         /*
884          * Use xcluster as a placeholder for (lhc).  Duplicate cluster to the
885          * same target bref as xcluster and then delete xcluster.  The
886          * duplication occurs after xcluster in flush order even though
887          * xcluster is deleted after the duplication. XXX
888          *
889          * WARNING! Duplications (to a different parent) can cause indirect
890          *          blocks to be inserted, refactor xcluster.
891          *
892          * WARNING! Only key and keybits is extracted from a passed-in bref.
893          */
894         hammer2_cluster_bref(cluster, &bref);
895         bref.key = lhc;                 /* invisible dir entry key */
896         bref.keybits = 0;
897         hammer2_cluster_rename(trans, &bref, dcluster, cluster, 0);
898
899         /*
900          * cluster is now 'live' again.. adjust the filename.
901          *
902          * Directory entries are inodes but this is a hidden hardlink
903          * target.  The name isn't used but to ease debugging give it
904          * a name after its inode number.
905          */
906         hammer2_cluster_modify(trans, cluster, 0);
907         nipdata = &hammer2_cluster_wdata(cluster)->ipdata;
908         ksnprintf(nipdata->filename, sizeof(nipdata->filename),
909                   "0x%016jx", (intmax_t)nipdata->inum);
910         nipdata->name_len = strlen(nipdata->filename);
911         nipdata->name_key = lhc;
912         nipdata->nlinks += nlinks;
913         hammer2_cluster_modsync(cluster);
914 }
915
916 /*
917  * Connect the target inode represented by (cluster) to the media topology
918  * at (dip, name, len).  The caller can pass a rough *chainp, this function
919  * will issue lookup()s to position the parent chain properly for the
920  * chain insertion.
921  *
922  * If hlink is TRUE this function creates an OBJTYPE_HARDLINK directory
923  * entry instead of connecting (cluster).
924  *
925  * If hlink is FALSE this function expects (cluster) to be unparented.
926  */
927 int
928 hammer2_inode_connect(hammer2_trans_t *trans,
929                       hammer2_cluster_t **clusterp, int hlink,
930                       hammer2_inode_t *dip, hammer2_cluster_t *dcluster,
931                       const uint8_t *name, size_t name_len,
932                       hammer2_key_t lhc)
933 {
934         hammer2_inode_data_t *wipdata;
935         hammer2_cluster_t *ocluster;
936         hammer2_cluster_t *ncluster;
937         hammer2_key_t key_dummy;
938         int ddflag;
939         int error;
940
941         /*
942          * Since ocluster is either disconnected from the topology or
943          * represents a hardlink terminus which is always a parent of or
944          * equal to dip, we should be able to safely lock dip->chain for
945          * our setup.
946          *
947          * WARNING! Must use inode_lock_ex() on dip to handle a stale
948          *          dip->cluster.
949          *
950          * If name is non-NULL we calculate lhc, else we use the passed-in
951          * lhc.
952          */
953         ocluster = *clusterp;
954
955         if (name) {
956                 lhc = hammer2_dirhash(name, name_len);
957
958                 /*
959                  * Locate the inode or indirect block to create the new
960                  * entry in.  At the same time check for key collisions
961                  * and iterate until we don't get one.
962                  */
963                 error = 0;
964                 while (error == 0) {
965                         ncluster = hammer2_cluster_lookup(dcluster, &key_dummy,
966                                                       lhc, lhc,
967                                                       0, &ddflag);
968                         if (ncluster == NULL)
969                                 break;
970                         if ((lhc & HAMMER2_DIRHASH_LOMASK) ==
971                             HAMMER2_DIRHASH_LOMASK) {
972                                 error = ENOSPC;
973                         }
974                         hammer2_cluster_unlock(ncluster);
975                         ncluster = NULL;
976                         ++lhc;
977                 }
978         } else {
979                 /*
980                  * Reconnect to specific key (used when moving
981                  * unlinked-but-open files into the hidden directory).
982                  */
983                 ncluster = hammer2_cluster_lookup(dcluster, &key_dummy,
984                                                   lhc, lhc,
985                                                   0, &ddflag);
986                 KKASSERT(ncluster == NULL);
987         }
988
989         if (error == 0) {
990                 if (hlink) {
991                         /*
992                          * Hardlink pointer needed, create totally fresh
993                          * directory entry.
994                          *
995                          * We must refactor ocluster because it might have
996                          * been shifted into an indirect cluster by the
997                          * create.
998                          */
999                         KKASSERT(ncluster == NULL);
1000                         error = hammer2_cluster_create(trans,
1001                                                        dcluster, &ncluster,
1002                                                        lhc, 0,
1003                                                        HAMMER2_BREF_TYPE_INODE,
1004                                                        HAMMER2_INODE_BYTES,
1005                                                        0);
1006                 } else {
1007                         /*
1008                          * Reconnect the original cluster under the new name.
1009                          * Original cluster must have already been deleted by
1010                          * teh caller.
1011                          *
1012                          * WARNING! Can cause held-over clusters to require a
1013                          *          refactor.  Fortunately we have none (our
1014                          *          locked clusters are passed into and
1015                          *          modified by the call).
1016                          */
1017                         ncluster = ocluster;
1018                         ocluster = NULL;
1019                         error = hammer2_cluster_create(trans,
1020                                                        dcluster, &ncluster,
1021                                                        lhc, 0,
1022                                                        HAMMER2_BREF_TYPE_INODE,
1023                                                        HAMMER2_INODE_BYTES,
1024                                                        0);
1025                 }
1026         }
1027
1028         /*
1029          * Unlock stuff.
1030          */
1031         KKASSERT(error != EAGAIN);
1032
1033         /*
1034          * ncluster should be NULL on error, leave ocluster
1035          * (ocluster == *clusterp) alone.
1036          */
1037         if (error) {
1038                 KKASSERT(ncluster == NULL);
1039                 return (error);
1040         }
1041
1042         /*
1043          * Directory entries are inodes so if the name has changed we have
1044          * to update the inode.
1045          *
1046          * When creating an OBJTYPE_HARDLINK entry remember to unlock the
1047          * cluster, the caller will access the hardlink via the actual hardlink
1048          * target file and not the hardlink pointer entry, so we must still
1049          * return ocluster.
1050          */
1051         if (hlink && hammer2_hardlink_enable >= 0) {
1052                 /*
1053                  * Create the HARDLINK pointer.  oip represents the hardlink
1054                  * target in this situation.
1055                  *
1056                  * We will return ocluster (the hardlink target).
1057                  */
1058                 hammer2_cluster_modify(trans, ncluster, 0);
1059                 hammer2_cluster_clr_chainflags(ncluster,
1060                                                HAMMER2_CHAIN_UNLINKED);
1061                 KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
1062                 wipdata = &hammer2_cluster_wdata(ncluster)->ipdata;
1063                 bcopy(name, wipdata->filename, name_len);
1064                 wipdata->name_key = lhc;
1065                 wipdata->name_len = name_len;
1066                 wipdata->target_type =
1067                                 hammer2_cluster_rdata(ocluster)->ipdata.type;
1068                 wipdata->type = HAMMER2_OBJTYPE_HARDLINK;
1069                 wipdata->inum = hammer2_cluster_rdata(ocluster)->ipdata.inum;
1070                 wipdata->version = HAMMER2_INODE_VERSION_ONE;
1071                 wipdata->nlinks = 1;
1072                 wipdata->op_flags = HAMMER2_OPFLAG_DIRECTDATA;
1073                 hammer2_cluster_modsync(ncluster);
1074                 hammer2_cluster_unlock(ncluster);
1075                 ncluster = ocluster;
1076                 ocluster = NULL;
1077         } else {
1078                 /*
1079                  * ncluster is a duplicate of ocluster at the new location.
1080                  * We must fixup the name stored in the inode data.
1081                  * The bref key has already been adjusted by inode_connect().
1082                  */
1083                 hammer2_cluster_modify(trans, ncluster, 0);
1084                 hammer2_cluster_clr_chainflags(ncluster,
1085                                                HAMMER2_CHAIN_UNLINKED);
1086                 wipdata = &hammer2_cluster_wdata(ncluster)->ipdata;
1087
1088                 KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
1089                 bcopy(name, wipdata->filename, name_len);
1090                 wipdata->name_key = lhc;
1091                 wipdata->name_len = name_len;
1092                 wipdata->nlinks = 1;
1093                 hammer2_cluster_modsync(ncluster);
1094         }
1095
1096         /*
1097          * We are replacing ocluster with ncluster, unlock ocluster.  In the
1098          * case where ocluster is left unchanged the code above sets
1099          * ncluster to ocluster and ocluster to NULL, resulting in a NOP here.
1100          */
1101         if (ocluster)
1102                 hammer2_cluster_unlock(ocluster);
1103         *clusterp = ncluster;
1104
1105         return (0);
1106 }
1107
1108 /*
1109  * Repoint ip->cluster's chains to cluster's chains and fixup the default
1110  * focus.
1111  *
1112  * Caller must hold the inode exclusively locked and cluster, if not NULL,
1113  * must also be locked.
1114  *
1115  * Cluster may be NULL to clean out any chains in ip->cluster.
1116  */
1117 void
1118 hammer2_inode_repoint(hammer2_inode_t *ip, hammer2_inode_t *pip,
1119                       hammer2_cluster_t *cluster)
1120 {
1121         hammer2_chain_t *ochain;
1122         hammer2_chain_t *nchain;
1123         hammer2_inode_t *opip;
1124         int i;
1125
1126         /*
1127          * Replace chains in ip->cluster with chains from cluster and
1128          * adjust the focus if necessary.
1129          *
1130          * NOTE: nchain and/or ochain can be NULL due to gaps
1131          *       in the cluster arrays.
1132          */
1133         ip->cluster.focus = NULL;
1134         for (i = 0; cluster && i < cluster->nchains; ++i) {
1135                 nchain = cluster->array[i].chain;
1136                 if (i < ip->cluster.nchains) {
1137                         ochain = ip->cluster.array[i].chain;
1138                         if (ochain == nchain) {
1139                                 if (ip->cluster.focus == NULL)
1140                                         ip->cluster.focus = nchain;
1141                                 continue;
1142                         }
1143                 } else {
1144                         ochain = NULL;
1145                 }
1146
1147                 /*
1148                  * Make adjustments
1149                  */
1150                 ip->cluster.array[i].chain = nchain;
1151                 if (ip->cluster.focus == NULL)
1152                         ip->cluster.focus = nchain;
1153                 if (nchain)
1154                         hammer2_chain_ref(nchain);
1155                 if (ochain)
1156                         hammer2_chain_drop(ochain);
1157         }
1158
1159         /*
1160          * Release any left-over chains in ip->cluster.
1161          */
1162         while (i < ip->cluster.nchains) {
1163                 nchain = ip->cluster.array[i].chain;
1164                 if (nchain) {
1165                         ip->cluster.array[i].chain = NULL;
1166                         hammer2_chain_drop(nchain);
1167                 }
1168                 ++i;
1169         }
1170         ip->cluster.nchains = cluster ? cluster->nchains : 0;
1171
1172         /*
1173          * Repoint ip->pip if requested (non-NULL pip).
1174          */
1175         if (pip && ip->pip != pip) {
1176                 opip = ip->pip;
1177                 hammer2_inode_ref(pip);
1178                 ip->pip = pip;
1179                 if (opip)
1180                         hammer2_inode_drop(opip);
1181         }
1182 }
1183
1184 /*
1185  * Unlink the file from the specified directory inode.  The directory inode
1186  * does not need to be locked.
1187  *
1188  * isdir determines whether a directory/non-directory check should be made.
1189  * No check is made if isdir is set to -1.
1190  *
1191  * isopen specifies whether special unlink-with-open-descriptor handling
1192  * must be performed.  If set to -1 the caller is deleting a PFS and we
1193  * check whether the chain is mounted or not (chain->pmp != NULL).  1 is
1194  * implied if it is mounted.
1195  *
1196  * If isopen is 1 and nlinks drops to 0 this function must move the chain
1197  * to a special hidden directory until last-close occurs on the file.
1198  *
1199  * NOTE!  The underlying file can still be active with open descriptors
1200  *        or if the chain is being manually held (e.g. for rename).
1201  *
1202  *        The caller is responsible for fixing up ip->chain if e.g. a
1203  *        rename occurs (see chain_duplicate()).
1204  *
1205  * NOTE!  The chain is not deleted if it is moved to the hidden directory,
1206  *        but otherwise will be deleted.
1207  */
1208 int
1209 hammer2_unlink_file(hammer2_trans_t *trans, hammer2_inode_t *dip,
1210                     const uint8_t *name, size_t name_len,
1211                     int isdir, int *hlinkp, struct nchandle *nch,
1212                     int nlinks)
1213 {
1214         const hammer2_inode_data_t *ripdata;
1215         hammer2_inode_data_t *wipdata;
1216         hammer2_cluster_t *cparent;
1217         hammer2_cluster_t *hcluster;
1218         hammer2_cluster_t *hparent;
1219         hammer2_cluster_t *cluster;
1220         hammer2_cluster_t *dparent;
1221         hammer2_cluster_t *dcluster;
1222         hammer2_key_t key_dummy;
1223         hammer2_key_t key_next;
1224         hammer2_key_t lhc;
1225         int error;
1226         int ddflag;
1227         int hlink;
1228         uint8_t type;
1229
1230         error = 0;
1231         hlink = 0;
1232         hcluster = NULL;
1233         hparent = NULL;
1234         lhc = hammer2_dirhash(name, name_len);
1235
1236 again:
1237         /*
1238          * Search for the filename in the directory
1239          */
1240         cparent = hammer2_inode_lock_ex(dip);
1241         cluster = hammer2_cluster_lookup(cparent, &key_next,
1242                                      lhc, lhc + HAMMER2_DIRHASH_LOMASK,
1243                                      0, &ddflag);
1244         while (cluster) {
1245                 if (hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE) {
1246                         ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
1247                         if (ripdata->name_len == name_len &&
1248                             bcmp(ripdata->filename, name, name_len) == 0) {
1249                                 break;
1250                         }
1251                 }
1252                 cluster = hammer2_cluster_next(cparent, cluster, &key_next,
1253                                                key_next,
1254                                                lhc + HAMMER2_DIRHASH_LOMASK,
1255                                                0);
1256         }
1257         hammer2_inode_unlock_ex(dip, NULL);     /* retain cparent */
1258
1259         /*
1260          * Not found or wrong type (isdir < 0 disables the type check).
1261          * If a hardlink pointer, type checks use the hardlink target.
1262          */
1263         if (cluster == NULL) {
1264                 error = ENOENT;
1265                 goto done;
1266         }
1267         ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
1268         type = ripdata->type;
1269         if (type == HAMMER2_OBJTYPE_HARDLINK) {
1270                 hlink = 1;
1271                 type = ripdata->target_type;
1272         }
1273
1274         if (type == HAMMER2_OBJTYPE_DIRECTORY && isdir == 0) {
1275                 error = ENOTDIR;
1276                 goto done;
1277         }
1278         if (type != HAMMER2_OBJTYPE_DIRECTORY && isdir >= 1) {
1279                 error = EISDIR;
1280                 goto done;
1281         }
1282
1283         /*
1284          * Hardlink must be resolved.  We can't hold the parent locked
1285          * while we do this or we could deadlock.  The physical file will
1286          * be located at or above the current directory.
1287          *
1288          * We loop to reacquire the hardlink origination.
1289          *
1290          * NOTE: hammer2_hardlink_find() will locate the hardlink target,
1291          *       returning a modified hparent and hcluster.
1292          */
1293         if (ripdata->type == HAMMER2_OBJTYPE_HARDLINK) {
1294                 if (hcluster == NULL) {
1295                         hcluster = cluster;
1296                         cluster = NULL; /* safety */
1297                         hammer2_cluster_unlock(cparent);
1298                         cparent = NULL; /* safety */
1299                         ripdata = NULL; /* safety (associated w/cparent) */
1300                         error = hammer2_hardlink_find(dip, &hparent, hcluster);
1301
1302                         /*
1303                          * If we couldn't find the hardlink target then some
1304                          * parent directory containing the hardlink pointer
1305                          * probably got renamed to above the original target,
1306                          * a case not yet handled by H2.
1307                          */
1308                         if (error) {
1309                                 kprintf("H2 unlink_file: hardlink target for "
1310                                         "\"%s\" not found\n",
1311                                         name);
1312                                 kprintf("(likely due to known directory "
1313                                         "rename bug)\n");
1314                                 goto done;
1315                         }
1316                         goto again;
1317                 }
1318         }
1319
1320         /*
1321          * If this is a directory the directory must be empty.  However, if
1322          * isdir < 0 we are doing a rename and the directory does not have
1323          * to be empty, and if isdir > 1 we are deleting a PFS/snapshot
1324          * and the directory does not have to be empty.
1325          *
1326          * NOTE: We check the full key range here which covers both visible
1327          *       and invisible entries.  Theoretically there should be no
1328          *       invisible (hardlink target) entries if there are no visible
1329          *       entries.
1330          */
1331         if (type == HAMMER2_OBJTYPE_DIRECTORY && isdir == 1) {
1332                 dparent = hammer2_cluster_lookup_init(cluster, 0);
1333                 dcluster = hammer2_cluster_lookup(dparent, &key_dummy,
1334                                                   0, (hammer2_key_t)-1,
1335                                                   HAMMER2_LOOKUP_NODATA,
1336                                                   &ddflag);
1337                 if (dcluster) {
1338                         hammer2_cluster_unlock(dcluster);
1339                         hammer2_cluster_lookup_done(dparent);
1340                         error = ENOTEMPTY;
1341                         goto done;
1342                 }
1343                 hammer2_cluster_lookup_done(dparent);
1344                 dparent = NULL;
1345                 /* dcluster NULL */
1346         }
1347
1348         /*
1349          * If this was a hardlink then (cparent, cluster) is the hardlink
1350          * pointer, which we can simply destroy outright.  Discard the
1351          * clusters and replace with the hardlink target.
1352          */
1353         if (hcluster) {
1354                 hammer2_cluster_delete(trans, cparent, cluster,
1355                                        HAMMER2_DELETE_PERMANENT);
1356                 hammer2_cluster_unlock(cparent);
1357                 hammer2_cluster_unlock(cluster);
1358                 cparent = hparent;
1359                 cluster = hcluster;
1360                 hparent = NULL;
1361                 hcluster = NULL;
1362         }
1363
1364         /*
1365          * This leaves us with the hardlink target or non-hardlinked file
1366          * or directory in (cparent, cluster).
1367          *
1368          * Delete the target when nlinks reaches 0 with special handling
1369          * if (isopen) is set.
1370          *
1371          * NOTE! In DragonFly the vnops function calls cache_unlink() after
1372          *       calling us here to clean out the namecache association,
1373          *       (which does not represent a ref for the open-test), and to
1374          *       force finalization of the vnode if/when the last ref gets
1375          *       dropped.
1376          *
1377          * NOTE! Files are unlinked by rename and then relinked.  nch will be
1378          *       passed as NULL in this situation.  hammer2_inode_connect()
1379          *       will bump nlinks.
1380          */
1381         KKASSERT(cluster != NULL);
1382         hammer2_cluster_modify(trans, cluster, 0);
1383         wipdata = &hammer2_cluster_wdata(cluster)->ipdata;
1384         ripdata = wipdata;
1385         wipdata->nlinks += nlinks;
1386         if ((int64_t)wipdata->nlinks < 0) {     /* XXX debugging */
1387                 wipdata->nlinks = 0;
1388         }
1389         hammer2_cluster_modsync(cluster);
1390
1391         if (wipdata->nlinks == 0) {
1392                 /*
1393                  * Target nlinks has reached 0, file now unlinked (but may
1394                  * still be open).
1395                  */
1396                 /* XXX need interlock if mounted
1397                 if ((cluster->focus->flags & HAMMER2_CHAIN_PFSROOT) &&
1398                     cluster->pmp) {
1399                         error = EINVAL;
1400                         kprintf("hammer2: PFS \"%s\" cannot be deleted "
1401                                 "while still mounted\n",
1402                                 wipdata->filename);
1403                         goto done;
1404                 }
1405                 */
1406                 hammer2_cluster_set_chainflags(cluster, HAMMER2_CHAIN_UNLINKED);
1407                 if (nch && cache_isopen(nch)) {
1408                         hammer2_inode_move_to_hidden(trans, &cparent, &cluster,
1409                                                      wipdata->inum);
1410                 } else {
1411                         /*
1412                          * This won't get everything if a vnode is still
1413                          * present, but the cache_unlink() call the caller
1414                          * makes will.
1415                          */
1416                         hammer2_cluster_delete(trans, cparent, cluster,
1417                                                HAMMER2_DELETE_PERMANENT);
1418                 }
1419         } else if (hlink == 0) {
1420                 /*
1421                  * In this situation a normal non-hardlinked file (which can
1422                  * only have nlinks == 1) still has a non-zero nlinks, the
1423                  * caller must be doing a RENAME operation and so is passing
1424                  * a nlinks adjustment of 0, and only wishes to remove file
1425                  * in order to be able to reconnect it under a different name.
1426                  *
1427                  * In this situation we do a non-permanent deletion of the
1428                  * chain in order to allow the file to be reconnected in
1429                  * a different location.
1430                  */
1431                 KKASSERT(nlinks == 0);
1432                 hammer2_cluster_delete(trans, cparent, cluster, 0);
1433         }
1434         error = 0;
1435 done:
1436         if (cparent)
1437                 hammer2_cluster_unlock(cparent);
1438         if (cluster)
1439                 hammer2_cluster_unlock(cluster);
1440         if (hparent)
1441                 hammer2_cluster_unlock(hparent);
1442         if (hcluster)
1443                 hammer2_cluster_unlock(hcluster);
1444         if (hlinkp)
1445                 *hlinkp = hlink;
1446
1447         return error;
1448 }
1449
1450 /*
1451  * This is called from the mount code to initialize pmp->ihidden
1452  */
1453 void
1454 hammer2_inode_install_hidden(hammer2_pfs_t *pmp)
1455 {
1456         hammer2_trans_t trans;
1457         hammer2_cluster_t *cparent;
1458         hammer2_cluster_t *cluster;
1459         hammer2_cluster_t *scan;
1460         const hammer2_inode_data_t *ripdata;
1461         hammer2_inode_data_t *wipdata;
1462         hammer2_key_t key_dummy;
1463         hammer2_key_t key_next;
1464         int ddflag;
1465         int error;
1466         int count;
1467         int dip_check_algo;
1468         int dip_comp_algo;
1469
1470         if (pmp->ihidden)
1471                 return;
1472
1473         /*
1474          * Find the hidden directory
1475          */
1476         bzero(&key_dummy, sizeof(key_dummy));
1477         hammer2_trans_init(&trans, pmp, 0);
1478
1479         /*
1480          * Setup for lookup, retrieve iroot's check and compression
1481          * algorithm request which was likely generated by newfs_hammer2.
1482          *
1483          * The check/comp fields will probably never be used since inodes
1484          * are renamed into the hidden directory and not created relative to
1485          * the hidden directory, chain creation inherits from bref.methods,
1486          * and data chains inherit from their respective file inode *_algo
1487          * fields.
1488          */
1489         cparent = hammer2_inode_lock_ex(pmp->iroot);
1490         ripdata = &hammer2_cluster_rdata(cparent)->ipdata;
1491         dip_check_algo = ripdata->check_algo;
1492         dip_comp_algo = ripdata->comp_algo;
1493         ripdata = NULL;
1494
1495         cluster = hammer2_cluster_lookup(cparent, &key_dummy,
1496                                          HAMMER2_INODE_HIDDENDIR,
1497                                          HAMMER2_INODE_HIDDENDIR,
1498                                          0, &ddflag);
1499         if (cluster) {
1500                 pmp->ihidden = hammer2_inode_get(pmp, pmp->iroot, cluster);
1501                 hammer2_inode_ref(pmp->ihidden);
1502
1503                 /*
1504                  * Remove any unlinked files which were left open as-of
1505                  * any system crash.
1506                  *
1507                  * Don't pass NODATA, we need the inode data so the delete
1508                  * can do proper statistics updates.
1509                  */
1510                 count = 0;
1511                 scan = hammer2_cluster_lookup(cluster, &key_next,
1512                                               0, HAMMER2_TID_MAX,
1513                                               0, &ddflag);
1514                 while (scan) {
1515                         if (hammer2_cluster_type(scan) ==
1516                             HAMMER2_BREF_TYPE_INODE) {
1517                                 hammer2_cluster_delete(&trans, cluster, scan,
1518                                                    HAMMER2_DELETE_PERMANENT);
1519                                 ++count;
1520                         }
1521                         scan = hammer2_cluster_next(cluster, scan, &key_next,
1522                                                     0, HAMMER2_TID_MAX, 0);
1523                 }
1524
1525                 hammer2_inode_unlock_ex(pmp->ihidden, cluster);
1526                 hammer2_inode_unlock_ex(pmp->iroot, cparent);
1527                 hammer2_trans_done(&trans);
1528                 kprintf("hammer2: PFS loaded hidden dir, "
1529                         "removed %d dead entries\n", count);
1530                 return;
1531         }
1532
1533         /*
1534          * Create the hidden directory
1535          */
1536         error = hammer2_cluster_create(&trans, cparent, &cluster,
1537                                        HAMMER2_INODE_HIDDENDIR, 0,
1538                                        HAMMER2_BREF_TYPE_INODE,
1539                                        HAMMER2_INODE_BYTES,
1540                                        0);
1541         hammer2_inode_unlock_ex(pmp->iroot, cparent);
1542
1543         hammer2_cluster_modify(&trans, cluster, 0);
1544         wipdata = &hammer2_cluster_wdata(cluster)->ipdata;
1545         wipdata->type = HAMMER2_OBJTYPE_DIRECTORY;
1546         wipdata->inum = HAMMER2_INODE_HIDDENDIR;
1547         wipdata->nlinks = 1;
1548         wipdata->comp_algo = dip_comp_algo;
1549         wipdata->check_algo = dip_check_algo;
1550         hammer2_cluster_modsync(cluster);
1551         kprintf("hammer2: PFS root missing hidden directory, creating\n");
1552
1553         pmp->ihidden = hammer2_inode_get(pmp, pmp->iroot, cluster);
1554         hammer2_inode_ref(pmp->ihidden);
1555         hammer2_inode_unlock_ex(pmp->ihidden, cluster);
1556         hammer2_trans_done(&trans);
1557 }
1558
1559 /*
1560  * If an open file is unlinked H2 needs to retain the file in the topology
1561  * to ensure that its backing store is not recovered by the bulk free scan.
1562  * This also allows us to avoid having to special-case the CHAIN_DELETED flag.
1563  *
1564  * To do this the file is moved to a hidden directory in the PFS root and
1565  * renamed.  The hidden directory must be created if it does not exist.
1566  */
1567 static
1568 void
1569 hammer2_inode_move_to_hidden(hammer2_trans_t *trans,
1570                              hammer2_cluster_t **cparentp,
1571                              hammer2_cluster_t **clusterp,
1572                              hammer2_tid_t inum)
1573 {
1574         hammer2_cluster_t *dcluster;
1575         hammer2_pfs_t *pmp;
1576         int error;
1577
1578         pmp = (*clusterp)->pmp;
1579         KKASSERT(pmp != NULL);
1580         KKASSERT(pmp->ihidden != NULL);
1581
1582         hammer2_cluster_delete(trans, *cparentp, *clusterp, 0);
1583         dcluster = hammer2_inode_lock_ex(pmp->ihidden);
1584         error = hammer2_inode_connect(trans, clusterp, 0,
1585                                       pmp->ihidden, dcluster,
1586                                       NULL, 0, inum);
1587         hammer2_inode_unlock_ex(pmp->ihidden, dcluster);
1588         KKASSERT(error == 0);
1589 }
1590
1591 /*
1592  * Given an exclusively locked inode and cluster we consolidate the cluster
1593  * for hardlink creation, adding (nlinks) to the file's link count and
1594  * potentially relocating the inode to (cdip) which is a parent directory
1595  * common to both the current location of the inode and the intended new
1596  * hardlink.
1597  *
1598  * Replaces (*clusterp) if consolidation occurred, unlocking the old cluster
1599  * and returning a new locked cluster.
1600  *
1601  * NOTE!  This function will also replace ip->cluster.
1602  */
1603 int
1604 hammer2_hardlink_consolidate(hammer2_trans_t *trans,
1605                              hammer2_inode_t *ip,
1606                              hammer2_cluster_t **clusterp,
1607                              hammer2_inode_t *cdip,
1608                              hammer2_cluster_t *cdcluster,
1609                              int nlinks)
1610 {
1611         const hammer2_inode_data_t *ripdata;
1612         hammer2_inode_data_t *wipdata;
1613         hammer2_cluster_t *cluster;
1614         hammer2_cluster_t *cparent;
1615         int error;
1616
1617         cluster = *clusterp;
1618         ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
1619         if (nlinks == 0 &&                      /* no hardlink needed */
1620             (ripdata->name_key & HAMMER2_DIRHASH_VISIBLE)) {
1621                 return (0);
1622         }
1623
1624         if (hammer2_hardlink_enable == 0) {     /* disallow hardlinks */
1625                 hammer2_cluster_unlock(cluster);
1626                 *clusterp = NULL;
1627                 return (ENOTSUP);
1628         }
1629
1630         cparent = NULL;
1631
1632         /*
1633          * If no change in the hardlink's target directory is required and
1634          * this is already a hardlink target, all we need to do is adjust
1635          * the link count.
1636          */
1637         ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
1638         if (cdip == ip->pip &&
1639             (ripdata->name_key & HAMMER2_DIRHASH_VISIBLE) == 0) {
1640                 if (nlinks) {
1641                         hammer2_cluster_modify(trans, cluster, 0);
1642                         wipdata = &hammer2_cluster_wdata(cluster)->ipdata;
1643                         wipdata->nlinks += nlinks;
1644                         hammer2_cluster_modsync(cluster);
1645                         ripdata = wipdata;
1646                 }
1647                 error = 0;
1648                 goto done;
1649         }
1650
1651         /*
1652          * Cluster is the real inode.  The originating directory is locked
1653          * by the caller so we can manipulate it without worrying about races
1654          * against other lookups.
1655          *
1656          * If cluster is visible we need to delete it from the current
1657          * location and create a hardlink pointer in its place.  If it is
1658          * not visible we need only delete it.  Then later cluster will be
1659          * renamed to a parent directory and converted (if necessary) to
1660          * a hidden inode (via shiftup).
1661          *
1662          * NOTE! We must hold cparent locked through the delete/create/rename
1663          *       operation to ensure that other threads block resolving to
1664          *       the same hardlink, otherwise the other threads may not see
1665          *       the hardlink.
1666          */
1667         KKASSERT((cluster->focus->flags & HAMMER2_CHAIN_DELETED) == 0);
1668         cparent = hammer2_cluster_parent(cluster);
1669
1670         hammer2_cluster_delete(trans, cparent, cluster, 0);
1671
1672         ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
1673         KKASSERT(ripdata->type != HAMMER2_OBJTYPE_HARDLINK);
1674         if (ripdata->name_key & HAMMER2_DIRHASH_VISIBLE) {
1675                 hammer2_cluster_t *ncluster;
1676                 hammer2_key_t lhc;
1677
1678                 ncluster = NULL;
1679                 lhc = cluster->focus->bref.key;
1680                 error = hammer2_cluster_create(trans, cparent, &ncluster,
1681                                              lhc, 0,
1682                                              HAMMER2_BREF_TYPE_INODE,
1683                                              HAMMER2_INODE_BYTES,
1684                                              0);
1685                 hammer2_cluster_modify(trans, ncluster, 0);
1686                 wipdata = &hammer2_cluster_wdata(ncluster)->ipdata;
1687
1688                 /* wipdata->comp_algo = ripdata->comp_algo; */
1689                 wipdata->comp_algo = 0;
1690                 wipdata->check_algo = 0;
1691                 wipdata->version = HAMMER2_INODE_VERSION_ONE;
1692                 wipdata->inum = ripdata->inum;
1693                 wipdata->target_type = ripdata->type;
1694                 wipdata->type = HAMMER2_OBJTYPE_HARDLINK;
1695                 wipdata->uflags = 0;
1696                 wipdata->rmajor = 0;
1697                 wipdata->rminor = 0;
1698                 wipdata->ctime = 0;
1699                 wipdata->mtime = 0;
1700                 wipdata->atime = 0;
1701                 wipdata->btime = 0;
1702                 bzero(&wipdata->uid, sizeof(wipdata->uid));
1703                 bzero(&wipdata->gid, sizeof(wipdata->gid));
1704                 wipdata->op_flags = HAMMER2_OPFLAG_DIRECTDATA;
1705                 wipdata->cap_flags = 0;
1706                 wipdata->mode = 0;
1707                 wipdata->size = 0;
1708                 wipdata->nlinks = 1;
1709                 wipdata->iparent = 0;   /* XXX */
1710                 wipdata->pfs_type = 0;
1711                 wipdata->pfs_inum = 0;
1712                 bzero(&wipdata->pfs_clid, sizeof(wipdata->pfs_clid));
1713                 bzero(&wipdata->pfs_fsid, sizeof(wipdata->pfs_fsid));
1714                 wipdata->data_quota = 0;
1715                 wipdata->data_count = 0;
1716                 wipdata->inode_quota = 0;
1717                 wipdata->inode_count = 0;
1718                 wipdata->attr_tid = 0;
1719                 wipdata->dirent_tid = 0;
1720                 bzero(&wipdata->u, sizeof(wipdata->u));
1721                 bcopy(ripdata->filename, wipdata->filename, ripdata->name_len);
1722                 wipdata->name_key = ncluster->focus->bref.key;
1723                 wipdata->name_len = ripdata->name_len;
1724                 /* XXX transaction ids */
1725                 hammer2_cluster_modsync(ncluster);
1726                 hammer2_cluster_unlock(ncluster);
1727         }
1728         ripdata = wipdata;
1729
1730         /*
1731          * cluster represents the hardlink target and is now flagged deleted.
1732          * duplicate it to the parent directory and adjust nlinks.
1733          *
1734          * WARNING! The shiftup() call can cause ncluster to be moved into
1735          *          an indirect block, and our ncluster will wind up pointing
1736          *          to the older/original version.
1737          */
1738         KKASSERT(cluster->focus->flags & HAMMER2_CHAIN_DELETED);
1739         hammer2_hardlink_shiftup(trans, cluster, cdip, cdcluster,
1740                                  nlinks, &error);
1741
1742         if (error == 0)
1743                 hammer2_inode_repoint(ip, cdip, cluster);
1744
1745 done:
1746         /*
1747          * Cleanup, cluster/ncluster already dealt with.
1748          *
1749          * Return the shifted cluster in *clusterp.
1750          */
1751         if (cparent)
1752                 hammer2_cluster_unlock(cparent);
1753         *clusterp = cluster;
1754
1755         return (error);
1756 }
1757
1758 /*
1759  * If (*ochainp) is non-NULL it points to the forward OBJTYPE_HARDLINK
1760  * inode while (*chainp) points to the resolved (hidden hardlink
1761  * target) inode.  In this situation when nlinks is 1 we wish to
1762  * deconsolidate the hardlink, moving it back to the directory that now
1763  * represents the only remaining link.
1764  */
1765 int
1766 hammer2_hardlink_deconsolidate(hammer2_trans_t *trans,
1767                                hammer2_inode_t *dip,
1768                                hammer2_chain_t **chainp,
1769                                hammer2_chain_t **ochainp)
1770 {
1771         if (*ochainp == NULL)
1772                 return (0);
1773         /* XXX */
1774         return (0);
1775 }
1776
1777 /*
1778  * The caller presents a locked cluster with an obj_type of
1779  * HAMMER2_OBJTYPE_HARDLINK.  This routine will locate and replace the
1780  * cluster with the target hardlink, also locked.
1781  *
1782  * If cparentp is not NULL a locked cluster representing the hardlink's
1783  * parent is also returned.
1784  *
1785  * If we are unable to locate the hardlink target EIO is returned and
1786  * (*cparentp) is set to NULL.  The passed-in cluster still needs to be
1787  * unlocked by the caller but will be degenerate... not have any chains.
1788  */
1789 int
1790 hammer2_hardlink_find(hammer2_inode_t *dip,
1791                       hammer2_cluster_t **cparentp, hammer2_cluster_t *cluster)
1792 {
1793         const hammer2_inode_data_t *ipdata;
1794         hammer2_cluster_t *cparent;
1795         hammer2_cluster_t *rcluster;
1796         hammer2_inode_t *ip;
1797         hammer2_inode_t *pip;
1798         hammer2_key_t key_dummy;
1799         hammer2_key_t lhc;
1800         int ddflag;
1801
1802         pip = dip;
1803         hammer2_inode_ref(pip);         /* for loop */
1804
1805         /*
1806          * Locate the hardlink.  pip is referenced and not locked.
1807          */
1808         ipdata = &hammer2_cluster_rdata(cluster)->ipdata;
1809         lhc = ipdata->inum;
1810
1811         /*
1812          * We don't need the cluster's chains, but we need to retain the
1813          * cluster structure itself so we can load the hardlink search
1814          * result into it.
1815          */
1816         KKASSERT(cluster->refs == 1);
1817         atomic_add_int(&cluster->refs, 1);
1818         hammer2_cluster_unlock(cluster);        /* hack */
1819         cluster->nchains = 0;                   /* hack */
1820
1821         rcluster = NULL;
1822         cparent = NULL;
1823
1824         while ((ip = pip) != NULL) {
1825                 cparent = hammer2_inode_lock_ex(ip);
1826                 hammer2_inode_drop(ip);                 /* loop */
1827                 KKASSERT(hammer2_cluster_type(cparent) ==
1828                          HAMMER2_BREF_TYPE_INODE);
1829                 rcluster = hammer2_cluster_lookup(cparent, &key_dummy,
1830                                              lhc, lhc, 0, &ddflag);
1831                 if (rcluster)
1832                         break;
1833                 hammer2_cluster_lookup_done(cparent);   /* discard parent */
1834                 cparent = NULL;                         /* safety */
1835                 pip = ip->pip;          /* safe, ip held locked */
1836                 if (pip)
1837                         hammer2_inode_ref(pip);         /* loop */
1838                 hammer2_inode_unlock_ex(ip, NULL);
1839         }
1840
1841         /*
1842          * chain is locked, ip is locked.  Unlock ip, return the locked
1843          * chain.  *ipp is already set w/a ref count and not locked.
1844          *
1845          * (cparent is already unlocked).
1846          */
1847         if (rcluster) {
1848                 hammer2_cluster_replace(cluster, rcluster);
1849                 hammer2_cluster_drop(rcluster);
1850                 if (cparentp) {
1851                         *cparentp = cparent;
1852                         hammer2_inode_unlock_ex(ip, NULL);
1853                 } else {
1854                         hammer2_inode_unlock_ex(ip, cparent);
1855                 }
1856                 return (0);
1857         } else {
1858                 if (cparentp)
1859                         *cparentp = NULL;
1860                 if (ip)
1861                         hammer2_inode_unlock_ex(ip, cparent);
1862                 return (EIO);
1863         }
1864 }
1865
1866 /*
1867  * Find the directory common to both fdip and tdip.
1868  *
1869  * Returns a held but not locked inode.  Caller typically locks the inode,
1870  * and when through unlocks AND drops it.
1871  */
1872 hammer2_inode_t *
1873 hammer2_inode_common_parent(hammer2_inode_t *fdip, hammer2_inode_t *tdip)
1874 {
1875         hammer2_inode_t *scan1;
1876         hammer2_inode_t *scan2;
1877
1878         /*
1879          * We used to have a depth field but it complicated matters too
1880          * much for directory renames.  So now its ugly.  Check for
1881          * simple cases before giving up and doing it the expensive way.
1882          *
1883          * XXX need a bottom-up topology stability lock
1884          */
1885         if (fdip == tdip || fdip == tdip->pip) {
1886                 hammer2_inode_ref(fdip);
1887                 return(fdip);
1888         }
1889         if (fdip->pip == tdip) {
1890                 hammer2_inode_ref(tdip);
1891                 return(tdip);
1892         }
1893
1894         /*
1895          * XXX not MPSAFE
1896          */
1897         for (scan1 = fdip; scan1->pmp == fdip->pmp; scan1 = scan1->pip) {
1898                 scan2 = tdip;
1899                 while (scan2->pmp == tdip->pmp) {
1900                         if (scan1 == scan2) {
1901                                 hammer2_inode_ref(scan1);
1902                                 return(scan1);
1903                         }
1904                         scan2 = scan2->pip;
1905                         if (scan2 == NULL)
1906                                 break;
1907                 }
1908         }
1909         panic("hammer2_inode_common_parent: no common parent %p %p\n",
1910               fdip, tdip);
1911         /* NOT REACHED */
1912         return(NULL);
1913 }
1914
1915 /*
1916  * Synchronize the inode's frontend state with the chain state prior
1917  * to any explicit flush of the inode or any strategy write call.
1918  *
1919  * Called with a locked inode.
1920  */
1921 void
1922 hammer2_inode_fsync(hammer2_trans_t *trans, hammer2_inode_t *ip, 
1923                     hammer2_cluster_t *cparent)
1924 {
1925         const hammer2_inode_data_t *ripdata;
1926         hammer2_inode_data_t *wipdata;
1927         hammer2_cluster_t *dparent;
1928         hammer2_cluster_t *cluster;
1929         hammer2_key_t lbase;
1930         hammer2_key_t key_next;
1931         int dosync = 0;
1932         int ddflag;
1933
1934         ripdata = &hammer2_cluster_rdata(cparent)->ipdata;    /* target file */
1935
1936         if (ip->flags & HAMMER2_INODE_MTIME) {
1937                 wipdata = hammer2_cluster_modify_ip(trans, ip, cparent, 0);
1938                 atomic_clear_int(&ip->flags, HAMMER2_INODE_MTIME);
1939                 wipdata->mtime = ip->mtime;
1940                 dosync = 1;
1941                 ripdata = wipdata;
1942         }
1943         if ((ip->flags & HAMMER2_INODE_RESIZED) && ip->size < ripdata->size) {
1944                 wipdata = hammer2_cluster_modify_ip(trans, ip, cparent, 0);
1945                 wipdata->size = ip->size;
1946                 dosync = 1;
1947                 ripdata = wipdata;
1948                 atomic_clear_int(&ip->flags, HAMMER2_INODE_RESIZED);
1949
1950                 /*
1951                  * We must delete any chains beyond the EOF.  The chain
1952                  * straddling the EOF will be pending in the bioq.
1953                  */
1954                 lbase = (ripdata->size + HAMMER2_PBUFMASK64) &
1955                         ~HAMMER2_PBUFMASK64;
1956                 dparent = hammer2_cluster_lookup_init(&ip->cluster, 0);
1957                 cluster = hammer2_cluster_lookup(dparent, &key_next,
1958                                                  lbase, (hammer2_key_t)-1,
1959                                                  HAMMER2_LOOKUP_NODATA,
1960                                                  &ddflag);
1961                 while (cluster) {
1962                         /*
1963                          * Degenerate embedded case, nothing to loop on
1964                          */
1965                         switch (hammer2_cluster_type(cluster)) {
1966                         case HAMMER2_BREF_TYPE_INODE:
1967                                 hammer2_cluster_unlock(cluster);
1968                                 cluster = NULL;
1969                                 break;
1970                         case HAMMER2_BREF_TYPE_DATA:
1971                                 hammer2_cluster_delete(trans, dparent, cluster,
1972                                                    HAMMER2_DELETE_PERMANENT);
1973                                 /* fall through */
1974                         default:
1975                                 cluster = hammer2_cluster_next(dparent, cluster,
1976                                                    &key_next,
1977                                                    key_next, (hammer2_key_t)-1,
1978                                                    HAMMER2_LOOKUP_NODATA);
1979                                 break;
1980                         }
1981                 }
1982                 hammer2_cluster_lookup_done(dparent);
1983         } else
1984         if ((ip->flags & HAMMER2_INODE_RESIZED) && ip->size > ripdata->size) {
1985                 wipdata = hammer2_cluster_modify_ip(trans, ip, cparent, 0);
1986                 wipdata->size = ip->size;
1987                 atomic_clear_int(&ip->flags, HAMMER2_INODE_RESIZED);
1988
1989                 /*
1990                  * When resizing larger we may not have any direct-data
1991                  * available.
1992                  */
1993                 if ((wipdata->op_flags & HAMMER2_OPFLAG_DIRECTDATA) &&
1994                     ip->size > HAMMER2_EMBEDDED_BYTES) {
1995                         wipdata->op_flags &= ~HAMMER2_OPFLAG_DIRECTDATA;
1996                         bzero(&wipdata->u.blockset,
1997                               sizeof(wipdata->u.blockset));
1998                 }
1999                 dosync = 1;
2000                 ripdata = wipdata;
2001         }
2002         if (dosync)
2003                 hammer2_cluster_modsync(cparent);
2004 }