hammer2 - locking revamp
[dragonfly.git] / sys / vfs / hammer2 / hammer2_inode.c
1 /*
2  * Copyright (c) 2011-2014 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  * 3. Neither the name of The DragonFly Project nor the names of its
19  *    contributors may be used to endorse or promote products derived
20  *    from this software without specific, prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
26  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 #include <sys/cdefs.h>
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/types.h>
39 #include <sys/lock.h>
40 #include <sys/uuid.h>
41
42 #include "hammer2.h"
43
44 #define INODE_DEBUG     0
45
46 static void hammer2_inode_move_to_hidden(hammer2_trans_t *trans,
47                                          hammer2_cluster_t **cparentp,
48                                          hammer2_cluster_t **clusterp,
49                                          hammer2_tid_t inum);
50
51 RB_GENERATE2(hammer2_inode_tree, hammer2_inode, rbnode, hammer2_inode_cmp,
52              hammer2_tid_t, inum);
53
54 int
55 hammer2_inode_cmp(hammer2_inode_t *ip1, hammer2_inode_t *ip2)
56 {
57         if (ip1->inum < ip2->inum)
58                 return(-1);
59         if (ip1->inum > ip2->inum)
60                 return(1);
61         return(0);
62 }
63
64 /*
65  * HAMMER2 inode locks
66  *
67  * HAMMER2 offers shared locks and exclusive locks on inodes.
68  *
69  * The standard exclusive inode lock always resolves the inode meta-data,
70  * but there is a bypass version used by the vnode reclamation code that
71  * avoids the I/O.
72  *
73  * The inode locking function locks the inode itself, resolves any stale
74  * chains in the inode's cluster, and allocates a fresh copy of the
75  * cluster with 1 ref and all the underlying chains locked.  Duplication
76  * races are handled by this function.
77  *
78  * ip->cluster will be stable while the inode is locked.
79  *
80  * NOTE: We don't combine the inode/chain lock because putting away an
81  *       inode would otherwise confuse multiple lock holders of the inode.
82  *
83  * NOTE: In-memory inodes always point to hardlink targets (the actual file),
84  *       and never point to a hardlink pointer.
85  */
86 hammer2_cluster_t *
87 hammer2_inode_lock_ex(hammer2_inode_t *ip)
88 {
89         return hammer2_inode_lock_nex(ip, HAMMER2_RESOLVE_ALWAYS);
90 }
91
92 hammer2_cluster_t *
93 hammer2_inode_lock_nex(hammer2_inode_t *ip, int how)
94 {
95         hammer2_cluster_t *cluster;
96         hammer2_chain_t *chain;
97         int i;
98
99         hammer2_inode_ref(ip);
100         hammer2_mtx_ex(&ip->lock, "h2ino");
101         cluster = hammer2_cluster_copy(&ip->cluster,
102                                        HAMMER2_CLUSTER_COPY_NOCHAINS);
103
104         ip->cluster.focus = NULL;
105         cluster->focus = NULL;
106
107         for (i = 0; i < cluster->nchains; ++i) {
108                 chain = ip->cluster.array[i];
109                 if (chain == NULL) {
110                         kprintf("inode_lock: %p: missing chain\n", ip);
111                         continue;
112                 }
113
114                 hammer2_chain_lock(chain, how);
115                 cluster->array[i] = chain;
116                 if (cluster->focus == NULL)
117                         cluster->focus = chain;
118                 if (ip->cluster.focus == NULL)
119                         ip->cluster.focus = chain;
120         }
121
122         /*
123          * Returned cluster must resolve hardlink pointers
124          */
125         if ((how & HAMMER2_RESOLVE_MASK) == HAMMER2_RESOLVE_ALWAYS) {
126                 const hammer2_inode_data_t *ripdata;
127                 ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
128                 KKASSERT(ripdata->type != HAMMER2_OBJTYPE_HARDLINK);
129                 /*
130                 if (ripdata->type == HAMMER2_OBJTYPE_HARDLINK &&
131                     (cluster->focus->flags & HAMMER2_CHAIN_DELETED) == 0) {
132                         error = hammer2_hardlink_find(ip->pip, NULL, cluster);
133                         KKASSERT(error == 0);
134                 }
135                 */
136         }
137         return (cluster);
138 }
139
140 void
141 hammer2_inode_unlock_ex(hammer2_inode_t *ip, hammer2_cluster_t *cluster)
142 {
143         if (cluster)
144                 hammer2_cluster_unlock(cluster);
145         hammer2_mtx_unlock(&ip->lock);
146         hammer2_inode_drop(ip);
147 }
148
149 /*
150  * Standard shared inode lock always resolves the inode meta-data.
151  *
152  * NOTE: We don't combine the inode/chain lock because putting away an
153  *       inode would otherwise confuse multiple lock holders of the inode.
154  *
155  *       Shared locks are especially sensitive to having too many shared
156  *       lock counts (from the same thread) on certain paths which might
157  *       need to upgrade them.  Only one count of a shared lock can be
158  *       upgraded.
159  */
160 hammer2_cluster_t *
161 hammer2_inode_lock_sh(hammer2_inode_t *ip)
162 {
163         const hammer2_inode_data_t *ripdata;
164         hammer2_cluster_t *cluster;
165         hammer2_chain_t *chain;
166         int i;
167
168         hammer2_inode_ref(ip);
169         cluster = hammer2_cluster_copy(&ip->cluster,
170                                        HAMMER2_CLUSTER_COPY_NOCHAINS);
171         hammer2_mtx_sh(&ip->lock, "h2ino");
172
173         cluster->focus = NULL;
174
175         for (i = 0; i < cluster->nchains; ++i) {
176                 chain = ip->cluster.array[i];
177
178                 if (chain == NULL) {
179                         kprintf("inode_lock: %p: missing chain\n", ip);
180                         continue;
181                 }
182
183                 hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS |
184                                           HAMMER2_RESOLVE_SHARED);
185                 cluster->array[i] = chain;
186                 if (cluster->focus == NULL)
187                         cluster->focus = chain;
188         }
189
190         /*
191          * Returned cluster must resolve hardlink pointers
192          */
193         ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
194         KKASSERT(ripdata->type != HAMMER2_OBJTYPE_HARDLINK);
195         /*
196         if (ripdata->type == HAMMER2_OBJTYPE_HARDLINK &&
197             (cluster->focus->flags & HAMMER2_CHAIN_DELETED) == 0) {
198                 error = hammer2_hardlink_find(ip->pip, NULL, cluster);
199                 KKASSERT(error == 0);
200         }
201         */
202
203         return (cluster);
204 }
205
206 void
207 hammer2_inode_unlock_sh(hammer2_inode_t *ip, hammer2_cluster_t *cluster)
208 {
209         if (cluster)
210                 hammer2_cluster_unlock(cluster);
211         hammer2_mtx_unlock(&ip->lock);
212         hammer2_inode_drop(ip);
213 }
214
215 /*
216  * Temporarily release a lock held shared or exclusive.  Caller must
217  * hold the lock shared or exclusive on call and lock will be released
218  * on return.
219  *
220  * Restore a lock that was temporarily released.
221  */
222 hammer2_mtx_state_t
223 hammer2_inode_lock_temp_release(hammer2_inode_t *ip)
224 {
225         return hammer2_mtx_temp_release(&ip->lock);
226 }
227
228 void
229 hammer2_inode_lock_temp_restore(hammer2_inode_t *ip, hammer2_mtx_state_t ostate)
230 {
231         hammer2_mtx_temp_restore(&ip->lock, "h2ino", ostate);
232 }
233
234 /*
235  * Upgrade a shared inode lock to exclusive and return.  If the inode lock
236  * is already held exclusively this is a NOP.
237  *
238  * The caller MUST hold the inode lock either shared or exclusive on call
239  * and will own the lock exclusively on return.
240  *
241  * Returns non-zero if the lock was already exclusive prior to the upgrade.
242  */
243 int
244 hammer2_inode_lock_upgrade(hammer2_inode_t *ip)
245 {
246         int wasexclusive;
247
248         if (mtx_islocked_ex(&ip->lock)) {
249                 wasexclusive = 1;
250         } else {
251                 hammer2_mtx_unlock(&ip->lock);
252                 hammer2_mtx_ex(&ip->lock, "h2upg");
253                 wasexclusive = 0;
254         }
255         return wasexclusive;
256 }
257
258 /*
259  * Downgrade an inode lock from exclusive to shared only if the inode
260  * lock was previously shared.  If the inode lock was previously exclusive,
261  * this is a NOP.
262  */
263 void
264 hammer2_inode_lock_downgrade(hammer2_inode_t *ip, int wasexclusive)
265 {
266         if (wasexclusive == 0)
267                 mtx_downgrade(&ip->lock);
268 }
269
270 /*
271  * Lookup an inode by inode number
272  */
273 hammer2_inode_t *
274 hammer2_inode_lookup(hammer2_pfsmount_t *pmp, hammer2_tid_t inum)
275 {
276         hammer2_inode_t *ip;
277
278         KKASSERT(pmp);
279         if (pmp->spmp_hmp) {
280                 ip = NULL;
281         } else {
282                 hammer2_spin_ex(&pmp->inum_spin);
283                 ip = RB_LOOKUP(hammer2_inode_tree, &pmp->inum_tree, inum);
284                 if (ip)
285                         hammer2_inode_ref(ip);
286                 hammer2_spin_unex(&pmp->inum_spin);
287         }
288         return(ip);
289 }
290
291 /*
292  * Adding a ref to an inode is only legal if the inode already has at least
293  * one ref.
294  *
295  * (can be called with spinlock held)
296  */
297 void
298 hammer2_inode_ref(hammer2_inode_t *ip)
299 {
300         atomic_add_int(&ip->refs, 1);
301 }
302
303 /*
304  * Drop an inode reference, freeing the inode when the last reference goes
305  * away.
306  */
307 void
308 hammer2_inode_drop(hammer2_inode_t *ip)
309 {
310         hammer2_pfsmount_t *pmp;
311         hammer2_inode_t *pip;
312         u_int refs;
313
314         while (ip) {
315                 refs = ip->refs;
316                 cpu_ccfence();
317                 if (refs == 1) {
318                         /*
319                          * Transition to zero, must interlock with
320                          * the inode inumber lookup tree (if applicable).
321                          * It should not be possible for anyone to race
322                          * the transition to 0.
323                          *
324                          */
325                         pmp = ip->pmp;
326                         KKASSERT(pmp);
327                         hammer2_spin_ex(&pmp->inum_spin);
328
329                         if (atomic_cmpset_int(&ip->refs, 1, 0)) {
330                                 KKASSERT(hammer2_mtx_refs(&ip->lock) == 0);
331                                 if (ip->flags & HAMMER2_INODE_ONRBTREE) {
332                                         atomic_clear_int(&ip->flags,
333                                                      HAMMER2_INODE_ONRBTREE);
334                                         RB_REMOVE(hammer2_inode_tree,
335                                                   &pmp->inum_tree, ip);
336                                 }
337                                 hammer2_spin_unex(&pmp->inum_spin);
338
339                                 pip = ip->pip;
340                                 ip->pip = NULL;
341                                 ip->pmp = NULL;
342
343                                 /*
344                                  * Cleaning out ip->cluster isn't entirely
345                                  * trivial.
346                                  */
347                                 hammer2_inode_repoint(ip, NULL, NULL);
348
349                                 /*
350                                  * We have to drop pip (if non-NULL) to
351                                  * dispose of our implied reference from
352                                  * ip->pip.  We can simply loop on it.
353                                  */
354                                 kfree(ip, pmp->minode);
355                                 atomic_add_long(&pmp->inmem_inodes, -1);
356                                 ip = pip;
357                                 /* continue with pip (can be NULL) */
358                         } else {
359                                 hammer2_spin_unex(&ip->pmp->inum_spin);
360                         }
361                 } else {
362                         /*
363                          * Non zero transition
364                          */
365                         if (atomic_cmpset_int(&ip->refs, refs, refs - 1))
366                                 break;
367                 }
368         }
369 }
370
371 /*
372  * Get the vnode associated with the given inode, allocating the vnode if
373  * necessary.  The vnode will be returned exclusively locked.
374  *
375  * The caller must lock the inode (shared or exclusive).
376  *
377  * Great care must be taken to avoid deadlocks and vnode acquisition/reclaim
378  * races.
379  */
380 struct vnode *
381 hammer2_igetv(hammer2_inode_t *ip, hammer2_cluster_t *cparent, int *errorp)
382 {
383         const hammer2_inode_data_t *ripdata;
384         hammer2_pfsmount_t *pmp;
385         struct vnode *vp;
386
387         pmp = ip->pmp;
388         KKASSERT(pmp != NULL);
389         *errorp = 0;
390
391         ripdata = &hammer2_cluster_rdata(cparent)->ipdata;
392
393         for (;;) {
394                 /*
395                  * Attempt to reuse an existing vnode assignment.  It is
396                  * possible to race a reclaim so the vget() may fail.  The
397                  * inode must be unlocked during the vget() to avoid a
398                  * deadlock against a reclaim.
399                  */
400                 int wasexclusive;
401
402                 vp = ip->vp;
403                 if (vp) {
404                         /*
405                          * Inode must be unlocked during the vget() to avoid
406                          * possible deadlocks, but leave the ip ref intact.
407                          *
408                          * vnode is held to prevent destruction during the
409                          * vget().  The vget() can still fail if we lost
410                          * a reclaim race on the vnode.
411                          */
412                         hammer2_mtx_state_t ostate;
413
414                         vhold(vp);
415                         ostate = hammer2_inode_lock_temp_release(ip);
416                         if (vget(vp, LK_EXCLUSIVE)) {
417                                 vdrop(vp);
418                                 hammer2_inode_lock_temp_restore(ip, ostate);
419                                 continue;
420                         }
421                         hammer2_inode_lock_temp_restore(ip, ostate);
422                         vdrop(vp);
423                         /* vp still locked and ref from vget */
424                         if (ip->vp != vp) {
425                                 kprintf("hammer2: igetv race %p/%p\n",
426                                         ip->vp, vp);
427                                 vput(vp);
428                                 continue;
429                         }
430                         *errorp = 0;
431                         break;
432                 }
433
434                 /*
435                  * No vnode exists, allocate a new vnode.  Beware of
436                  * allocation races.  This function will return an
437                  * exclusively locked and referenced vnode.
438                  */
439                 *errorp = getnewvnode(VT_HAMMER2, pmp->mp, &vp, 0, 0);
440                 if (*errorp) {
441                         kprintf("hammer2: igetv getnewvnode failed %d\n",
442                                 *errorp);
443                         vp = NULL;
444                         break;
445                 }
446
447                 /*
448                  * Lock the inode and check for an allocation race.
449                  */
450                 wasexclusive = hammer2_inode_lock_upgrade(ip);
451                 if (ip->vp != NULL) {
452                         vp->v_type = VBAD;
453                         vx_put(vp);
454                         hammer2_inode_lock_downgrade(ip, wasexclusive);
455                         continue;
456                 }
457
458                 switch (ripdata->type) {
459                 case HAMMER2_OBJTYPE_DIRECTORY:
460                         vp->v_type = VDIR;
461                         break;
462                 case HAMMER2_OBJTYPE_REGFILE:
463                         vp->v_type = VREG;
464                         vinitvmio(vp, ripdata->size,
465                                   HAMMER2_LBUFSIZE,
466                                   (int)ripdata->size & HAMMER2_LBUFMASK);
467                         break;
468                 case HAMMER2_OBJTYPE_SOFTLINK:
469                         /*
470                          * XXX for now we are using the generic file_read
471                          * and file_write code so we need a buffer cache
472                          * association.
473                          */
474                         vp->v_type = VLNK;
475                         vinitvmio(vp, ripdata->size,
476                                   HAMMER2_LBUFSIZE,
477                                   (int)ripdata->size & HAMMER2_LBUFMASK);
478                         break;
479                 case HAMMER2_OBJTYPE_CDEV:
480                         vp->v_type = VCHR;
481                         /* fall through */
482                 case HAMMER2_OBJTYPE_BDEV:
483                         vp->v_ops = &pmp->mp->mnt_vn_spec_ops;
484                         if (ripdata->type != HAMMER2_OBJTYPE_CDEV)
485                                 vp->v_type = VBLK;
486                         addaliasu(vp, ripdata->rmajor, ripdata->rminor);
487                         break;
488                 case HAMMER2_OBJTYPE_FIFO:
489                         vp->v_type = VFIFO;
490                         vp->v_ops = &pmp->mp->mnt_vn_fifo_ops;
491                         break;
492                 default:
493                         panic("hammer2: unhandled objtype %d", ripdata->type);
494                         break;
495                 }
496
497                 if (ip == pmp->iroot)
498                         vsetflags(vp, VROOT);
499
500                 vp->v_data = ip;
501                 ip->vp = vp;
502                 hammer2_inode_ref(ip);          /* vp association */
503                 hammer2_inode_lock_downgrade(ip, wasexclusive);
504                 break;
505         }
506
507         /*
508          * Return non-NULL vp and *errorp == 0, or NULL vp and *errorp != 0.
509          */
510         if (hammer2_debug & 0x0002) {
511                 kprintf("igetv vp %p refs 0x%08x aux 0x%08x\n",
512                         vp, vp->v_refcnt, vp->v_auxrefs);
513         }
514         return (vp);
515 }
516
517 /*
518  * Returns the inode associated with the passed-in cluster, creating the
519  * inode if necessary and synchronizing it to the passed-in cluster otherwise.
520  *
521  * The passed-in chain must be locked and will remain locked on return.
522  * The returned inode will be locked and the caller may dispose of both
523  * via hammer2_inode_unlock_ex().  However, if the caller needs to resolve
524  * a hardlink it must ref/unlock/relock/drop the inode.
525  *
526  * The hammer2_inode structure regulates the interface between the high level
527  * kernel VNOPS API and the filesystem backend (the chains).
528  */
529 hammer2_inode_t *
530 hammer2_inode_get(hammer2_pfsmount_t *pmp, hammer2_inode_t *dip,
531                   hammer2_cluster_t *cluster)
532 {
533         hammer2_inode_t *nip;
534         const hammer2_inode_data_t *iptmp;
535         const hammer2_inode_data_t *nipdata;
536
537         KKASSERT(hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE);
538         KKASSERT(pmp);
539
540         /*
541          * Interlocked lookup/ref of the inode.  This code is only needed
542          * when looking up inodes with nlinks != 0 (TODO: optimize out
543          * otherwise and test for duplicates).
544          */
545 again:
546         for (;;) {
547                 iptmp = &hammer2_cluster_rdata(cluster)->ipdata;
548                 nip = hammer2_inode_lookup(pmp, iptmp->inum);
549                 if (nip == NULL)
550                         break;
551
552                 hammer2_mtx_ex(&nip->lock, "h2ino");
553
554                 /*
555                  * Handle SMP race (not applicable to the super-root spmp
556                  * which can't index inodes due to duplicative inode numbers).
557                  */
558                 if (pmp->spmp_hmp == NULL &&
559                     (nip->flags & HAMMER2_INODE_ONRBTREE) == 0) {
560                         hammer2_mtx_unlock(&nip->lock);
561                         hammer2_inode_drop(nip);
562                         continue;
563                 }
564                 hammer2_inode_repoint(nip, NULL, cluster);
565                 return nip;
566         }
567
568         /*
569          * We couldn't find the inode number, create a new inode.
570          */
571         nip = kmalloc(sizeof(*nip), pmp->minode, M_WAITOK | M_ZERO);
572         atomic_add_long(&pmp->inmem_inodes, 1);
573         hammer2_pfs_memory_inc(pmp);
574         hammer2_pfs_memory_wakeup(pmp);
575         if (pmp->spmp_hmp)
576                 nip->flags = HAMMER2_INODE_SROOT;
577
578         /*
579          * Initialize nip's cluster
580          */
581         nip->cluster.refs = 1;
582         nip->cluster.pmp = pmp;
583         nip->cluster.flags |= HAMMER2_CLUSTER_INODE;
584         hammer2_cluster_replace(&nip->cluster, cluster);
585
586         nipdata = &hammer2_cluster_rdata(cluster)->ipdata;
587         nip->inum = nipdata->inum;
588         nip->size = nipdata->size;
589         nip->mtime = nipdata->mtime;
590         hammer2_inode_repoint(nip, NULL, cluster);
591         nip->pip = dip;                         /* can be NULL */
592         if (dip)
593                 hammer2_inode_ref(dip); /* ref dip for nip->pip */
594
595         nip->pmp = pmp;
596
597         /*
598          * ref and lock on nip gives it state compatible to after a
599          * hammer2_inode_lock_ex() call.
600          */
601         nip->refs = 1;
602         hammer2_mtx_init(&nip->lock, "h2ino");
603         hammer2_mtx_ex(&nip->lock, "h2ino");
604         /* combination of thread lock and chain lock == inode lock */
605
606         /*
607          * Attempt to add the inode.  If it fails we raced another inode
608          * get.  Undo all the work and try again.
609          */
610         if (pmp->spmp_hmp == NULL) {
611                 hammer2_spin_ex(&pmp->inum_spin);
612                 if (RB_INSERT(hammer2_inode_tree, &pmp->inum_tree, nip)) {
613                         hammer2_spin_unex(&pmp->inum_spin);
614                         hammer2_mtx_unlock(&nip->lock);
615                         hammer2_inode_drop(nip);
616                         goto again;
617                 }
618                 atomic_set_int(&nip->flags, HAMMER2_INODE_ONRBTREE);
619                 hammer2_spin_unex(&pmp->inum_spin);
620         }
621
622         return (nip);
623 }
624
625 /*
626  * Create a new inode in the specified directory using the vattr to
627  * figure out the type of inode.
628  *
629  * If no error occurs the new inode with its cluster locked is returned in
630  * *nipp, otherwise an error is returned and *nipp is set to NULL.
631  *
632  * If vap and/or cred are NULL the related fields are not set and the
633  * inode type defaults to a directory.  This is used when creating PFSs
634  * under the super-root, so the inode number is set to 1 in this case.
635  *
636  * dip is not locked on entry.
637  *
638  * NOTE: When used to create a snapshot, the inode is temporarily associated
639  *       with the super-root spmp. XXX should pass new pmp for snapshot.
640  */
641 hammer2_inode_t *
642 hammer2_inode_create(hammer2_trans_t *trans, hammer2_inode_t *dip,
643                      struct vattr *vap, struct ucred *cred,
644                      const uint8_t *name, size_t name_len,
645                      hammer2_cluster_t **clusterp, int *errorp)
646 {
647         const hammer2_inode_data_t *dipdata;
648         hammer2_inode_data_t *nipdata;
649         hammer2_cluster_t *cluster;
650         hammer2_cluster_t *cparent;
651         hammer2_inode_t *nip;
652         hammer2_key_t key_dummy;
653         hammer2_key_t lhc;
654         int error;
655         uid_t xuid;
656         uuid_t dip_uid;
657         uuid_t dip_gid;
658         uint32_t dip_mode;
659         uint8_t dip_comp_algo;
660         uint8_t dip_check_algo;
661         int ddflag;
662
663         lhc = hammer2_dirhash(name, name_len);
664         *errorp = 0;
665
666         /*
667          * Locate the inode or indirect block to create the new
668          * entry in.  At the same time check for key collisions
669          * and iterate until we don't get one.
670          *
671          * NOTE: hidden inodes do not have iterators.
672          */
673 retry:
674         cparent = hammer2_inode_lock_ex(dip);
675         dipdata = &hammer2_cluster_rdata(cparent)->ipdata;
676         dip_uid = dipdata->uid;
677         dip_gid = dipdata->gid;
678         dip_mode = dipdata->mode;
679         dip_comp_algo = dipdata->comp_algo;
680         dip_check_algo = dipdata->check_algo;
681
682         error = 0;
683         while (error == 0) {
684                 cluster = hammer2_cluster_lookup(cparent, &key_dummy,
685                                                  lhc, lhc, 0, &ddflag);
686                 if (cluster == NULL)
687                         break;
688                 if ((lhc & HAMMER2_DIRHASH_VISIBLE) == 0)
689                         error = ENOSPC;
690                 if ((lhc & HAMMER2_DIRHASH_LOMASK) == HAMMER2_DIRHASH_LOMASK)
691                         error = ENOSPC;
692                 hammer2_cluster_unlock(cluster);
693                 cluster = NULL;
694                 ++lhc;
695         }
696
697         if (error == 0) {
698                 error = hammer2_cluster_create(trans, cparent, &cluster,
699                                              lhc, 0,
700                                              HAMMER2_BREF_TYPE_INODE,
701                                              HAMMER2_INODE_BYTES,
702                                              0);
703         }
704 #if INODE_DEBUG
705         kprintf("CREATE INODE %*.*s chain=%p\n",
706                 (int)name_len, (int)name_len, name,
707                 (cluster ? cluster->focus : NULL));
708 #endif
709
710         /*
711          * Cleanup and handle retries.
712          */
713         if (error == EAGAIN) {
714                 hammer2_cluster_ref(cparent);
715                 hammer2_inode_unlock_ex(dip, cparent);
716                 hammer2_cluster_wait(cparent);
717                 hammer2_cluster_drop(cparent);
718                 goto retry;
719         }
720         hammer2_inode_unlock_ex(dip, cparent);
721         cparent = NULL;
722
723         if (error) {
724                 KKASSERT(cluster == NULL);
725                 *errorp = error;
726                 return (NULL);
727         }
728
729         /*
730          * Set up the new inode.
731          *
732          * NOTE: *_get() integrates chain's lock into the inode lock.
733          *
734          * NOTE: Only one new inode can currently be created per
735          *       transaction.  If the need arises we can adjust
736          *       hammer2_trans_init() to allow more.
737          *
738          * NOTE: nipdata will have chain's blockset data.
739          */
740         KKASSERT(cluster->focus->flags & HAMMER2_CHAIN_MODIFIED);
741         nipdata = &hammer2_cluster_wdata(cluster)->ipdata;
742         nipdata->inum = trans->inode_tid;
743         hammer2_cluster_modsync(cluster);
744         nip = hammer2_inode_get(dip->pmp, dip, cluster);
745         nipdata = &hammer2_cluster_wdata(cluster)->ipdata;
746
747         if (vap) {
748                 KKASSERT(trans->inodes_created == 0);
749                 nipdata->type = hammer2_get_obj_type(vap->va_type);
750                 nipdata->inum = trans->inode_tid;
751                 ++trans->inodes_created;
752
753                 switch (nipdata->type) {
754                 case HAMMER2_OBJTYPE_CDEV:
755                 case HAMMER2_OBJTYPE_BDEV:
756                         nipdata->rmajor = vap->va_rmajor;
757                         nipdata->rminor = vap->va_rminor;
758                         break;
759                 default:
760                         break;
761                 }
762         } else {
763                 nipdata->type = HAMMER2_OBJTYPE_DIRECTORY;
764                 nipdata->inum = 1;
765         }
766         
767         /* Inherit parent's inode compression mode. */
768         nip->comp_heuristic = 0;
769         nipdata->comp_algo = dip_comp_algo;
770         nipdata->check_algo = dip_check_algo;
771         nipdata->version = HAMMER2_INODE_VERSION_ONE;
772         hammer2_update_time(&nipdata->ctime);
773         nipdata->mtime = nipdata->ctime;
774         if (vap)
775                 nipdata->mode = vap->va_mode;
776         nipdata->nlinks = 1;
777         if (vap) {
778                 if (dip && dip->pmp) {
779                         xuid = hammer2_to_unix_xid(&dip_uid);
780                         xuid = vop_helper_create_uid(dip->pmp->mp,
781                                                      dip_mode,
782                                                      xuid,
783                                                      cred,
784                                                      &vap->va_mode);
785                 } else {
786                         /* super-root has no dip and/or pmp */
787                         xuid = 0;
788                 }
789                 if (vap->va_vaflags & VA_UID_UUID_VALID)
790                         nipdata->uid = vap->va_uid_uuid;
791                 else if (vap->va_uid != (uid_t)VNOVAL)
792                         hammer2_guid_to_uuid(&nipdata->uid, vap->va_uid);
793                 else
794                         hammer2_guid_to_uuid(&nipdata->uid, xuid);
795
796                 if (vap->va_vaflags & VA_GID_UUID_VALID)
797                         nipdata->gid = vap->va_gid_uuid;
798                 else if (vap->va_gid != (gid_t)VNOVAL)
799                         hammer2_guid_to_uuid(&nipdata->gid, vap->va_gid);
800                 else if (dip)
801                         nipdata->gid = dip_gid;
802         }
803
804         /*
805          * Regular files and softlinks allow a small amount of data to be
806          * directly embedded in the inode.  This flag will be cleared if
807          * the size is extended past the embedded limit.
808          */
809         if (nipdata->type == HAMMER2_OBJTYPE_REGFILE ||
810             nipdata->type == HAMMER2_OBJTYPE_SOFTLINK) {
811                 nipdata->op_flags |= HAMMER2_OPFLAG_DIRECTDATA;
812         }
813
814         KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
815         bcopy(name, nipdata->filename, name_len);
816         nipdata->name_key = lhc;
817         nipdata->name_len = name_len;
818         hammer2_cluster_modsync(cluster);
819         *clusterp = cluster;
820
821         return (nip);
822 }
823
824 /*
825  * The cluster has been removed from the original directory and replaced
826  * with a hardlink pointer.  Move the cluster to the specified parent
827  * directory, change the filename to "0xINODENUMBER", and adjust the key.
828  * The cluster becomes our invisible hardlink target.
829  *
830  * The original cluster must be deleted on entry.
831  */
832 static
833 void
834 hammer2_hardlink_shiftup(hammer2_trans_t *trans, hammer2_cluster_t *cluster,
835                         hammer2_inode_t *dip, hammer2_cluster_t *dcluster,
836                         int nlinks, int *errorp)
837 {
838         const hammer2_inode_data_t *iptmp;
839         hammer2_inode_data_t *nipdata;
840         hammer2_cluster_t *xcluster;
841         hammer2_key_t key_dummy;
842         hammer2_key_t lhc;
843         hammer2_blockref_t bref;
844         int ddflag;
845
846         iptmp = &hammer2_cluster_rdata(cluster)->ipdata;
847         lhc = iptmp->inum;
848         KKASSERT((lhc & HAMMER2_DIRHASH_VISIBLE) == 0);
849
850         /*
851          * Locate the inode or indirect block to create the new
852          * entry in.  lhc represents the inode number so there is
853          * no collision iteration.
854          *
855          * There should be no key collisions with invisible inode keys.
856          *
857          * WARNING! Must use inode_lock_ex() on dip to handle a stale
858          *          dip->cluster cache.
859          */
860         *errorp = 0;
861         xcluster = hammer2_cluster_lookup(dcluster, &key_dummy,
862                                       lhc, lhc, 0, &ddflag);
863         if (xcluster) {
864                 kprintf("X3 chain %p dip %p dchain %p dip->chain %p\n",
865                         xcluster->focus, dip, dcluster->focus,
866                         dip->cluster.focus);
867                 hammer2_cluster_unlock(xcluster);
868                 xcluster = NULL;
869                 *errorp = ENOSPC;
870 #if 0
871                 Debugger("X3");
872 #endif
873         }
874
875         /*
876          * Handle the error case
877          */
878         if (*errorp) {
879                 panic("error2");
880                 KKASSERT(xcluster == NULL);
881                 return;
882         }
883
884         /*
885          * Use xcluster as a placeholder for (lhc).  Duplicate cluster to the
886          * same target bref as xcluster and then delete xcluster.  The
887          * duplication occurs after xcluster in flush order even though
888          * xcluster is deleted after the duplication. XXX
889          *
890          * WARNING! Duplications (to a different parent) can cause indirect
891          *          blocks to be inserted, refactor xcluster.
892          *
893          * WARNING! Only key and keybits is extracted from a passed-in bref.
894          */
895         hammer2_cluster_bref(cluster, &bref);
896         bref.key = lhc;                 /* invisible dir entry key */
897         bref.keybits = 0;
898         hammer2_cluster_rename(trans, &bref, dcluster, cluster, 0);
899
900         /*
901          * cluster is now 'live' again.. adjust the filename.
902          *
903          * Directory entries are inodes but this is a hidden hardlink
904          * target.  The name isn't used but to ease debugging give it
905          * a name after its inode number.
906          */
907         hammer2_cluster_modify(trans, cluster, 0);
908         nipdata = &hammer2_cluster_wdata(cluster)->ipdata;
909         ksnprintf(nipdata->filename, sizeof(nipdata->filename),
910                   "0x%016jx", (intmax_t)nipdata->inum);
911         nipdata->name_len = strlen(nipdata->filename);
912         nipdata->name_key = lhc;
913         nipdata->nlinks += nlinks;
914         hammer2_cluster_modsync(cluster);
915 }
916
917 /*
918  * Connect the target inode represented by (cluster) to the media topology
919  * at (dip, name, len).  The caller can pass a rough *chainp, this function
920  * will issue lookup()s to position the parent chain properly for the
921  * chain insertion.
922  *
923  * If hlink is TRUE this function creates an OBJTYPE_HARDLINK directory
924  * entry instead of connecting (cluster).
925  *
926  * If hlink is FALSE this function expects (cluster) to be unparented.
927  */
928 int
929 hammer2_inode_connect(hammer2_trans_t *trans,
930                       hammer2_cluster_t **clusterp, int hlink,
931                       hammer2_inode_t *dip, hammer2_cluster_t *dcluster,
932                       const uint8_t *name, size_t name_len,
933                       hammer2_key_t lhc)
934 {
935         hammer2_inode_data_t *wipdata;
936         hammer2_cluster_t *ocluster;
937         hammer2_cluster_t *ncluster;
938         hammer2_key_t key_dummy;
939         int ddflag;
940         int error;
941
942         /*
943          * Since ocluster is either disconnected from the topology or
944          * represents a hardlink terminus which is always a parent of or
945          * equal to dip, we should be able to safely lock dip->chain for
946          * our setup.
947          *
948          * WARNING! Must use inode_lock_ex() on dip to handle a stale
949          *          dip->cluster.
950          *
951          * If name is non-NULL we calculate lhc, else we use the passed-in
952          * lhc.
953          */
954         ocluster = *clusterp;
955
956         if (name) {
957                 lhc = hammer2_dirhash(name, name_len);
958
959                 /*
960                  * Locate the inode or indirect block to create the new
961                  * entry in.  At the same time check for key collisions
962                  * and iterate until we don't get one.
963                  */
964                 error = 0;
965                 while (error == 0) {
966                         ncluster = hammer2_cluster_lookup(dcluster, &key_dummy,
967                                                       lhc, lhc,
968                                                       0, &ddflag);
969                         if (ncluster == NULL)
970                                 break;
971                         if ((lhc & HAMMER2_DIRHASH_LOMASK) ==
972                             HAMMER2_DIRHASH_LOMASK) {
973                                 error = ENOSPC;
974                         }
975                         hammer2_cluster_unlock(ncluster);
976                         ncluster = NULL;
977                         ++lhc;
978                 }
979         } else {
980                 /*
981                  * Reconnect to specific key (used when moving
982                  * unlinked-but-open files into the hidden directory).
983                  */
984                 ncluster = hammer2_cluster_lookup(dcluster, &key_dummy,
985                                                   lhc, lhc,
986                                                   0, &ddflag);
987                 KKASSERT(ncluster == NULL);
988         }
989
990         if (error == 0) {
991                 if (hlink) {
992                         /*
993                          * Hardlink pointer needed, create totally fresh
994                          * directory entry.
995                          *
996                          * We must refactor ocluster because it might have
997                          * been shifted into an indirect cluster by the
998                          * create.
999                          */
1000                         KKASSERT(ncluster == NULL);
1001                         error = hammer2_cluster_create(trans,
1002                                                        dcluster, &ncluster,
1003                                                        lhc, 0,
1004                                                        HAMMER2_BREF_TYPE_INODE,
1005                                                        HAMMER2_INODE_BYTES,
1006                                                        0);
1007                 } else {
1008                         /*
1009                          * Reconnect the original cluster under the new name.
1010                          * Original cluster must have already been deleted by
1011                          * teh caller.
1012                          *
1013                          * WARNING! Can cause held-over clusters to require a
1014                          *          refactor.  Fortunately we have none (our
1015                          *          locked clusters are passed into and
1016                          *          modified by the call).
1017                          */
1018                         ncluster = ocluster;
1019                         ocluster = NULL;
1020                         error = hammer2_cluster_create(trans,
1021                                                        dcluster, &ncluster,
1022                                                        lhc, 0,
1023                                                        HAMMER2_BREF_TYPE_INODE,
1024                                                        HAMMER2_INODE_BYTES,
1025                                                        0);
1026                 }
1027         }
1028
1029         /*
1030          * Unlock stuff.
1031          */
1032         KKASSERT(error != EAGAIN);
1033
1034         /*
1035          * ncluster should be NULL on error, leave ocluster
1036          * (ocluster == *clusterp) alone.
1037          */
1038         if (error) {
1039                 KKASSERT(ncluster == NULL);
1040                 return (error);
1041         }
1042
1043         /*
1044          * Directory entries are inodes so if the name has changed we have
1045          * to update the inode.
1046          *
1047          * When creating an OBJTYPE_HARDLINK entry remember to unlock the
1048          * cluster, the caller will access the hardlink via the actual hardlink
1049          * target file and not the hardlink pointer entry, so we must still
1050          * return ocluster.
1051          */
1052         if (hlink && hammer2_hardlink_enable >= 0) {
1053                 /*
1054                  * Create the HARDLINK pointer.  oip represents the hardlink
1055                  * target in this situation.
1056                  *
1057                  * We will return ocluster (the hardlink target).
1058                  */
1059                 hammer2_cluster_modify(trans, ncluster, 0);
1060                 hammer2_cluster_clr_chainflags(ncluster,
1061                                                HAMMER2_CHAIN_UNLINKED);
1062                 KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
1063                 wipdata = &hammer2_cluster_wdata(ncluster)->ipdata;
1064                 bcopy(name, wipdata->filename, name_len);
1065                 wipdata->name_key = lhc;
1066                 wipdata->name_len = name_len;
1067                 wipdata->target_type =
1068                                 hammer2_cluster_rdata(ocluster)->ipdata.type;
1069                 wipdata->type = HAMMER2_OBJTYPE_HARDLINK;
1070                 wipdata->inum = hammer2_cluster_rdata(ocluster)->ipdata.inum;
1071                 wipdata->version = HAMMER2_INODE_VERSION_ONE;
1072                 wipdata->nlinks = 1;
1073                 wipdata->op_flags = HAMMER2_OPFLAG_DIRECTDATA;
1074                 hammer2_cluster_modsync(ncluster);
1075                 hammer2_cluster_unlock(ncluster);
1076                 ncluster = ocluster;
1077                 ocluster = NULL;
1078         } else {
1079                 /*
1080                  * ncluster is a duplicate of ocluster at the new location.
1081                  * We must fixup the name stored in the inode data.
1082                  * The bref key has already been adjusted by inode_connect().
1083                  */
1084                 hammer2_cluster_modify(trans, ncluster, 0);
1085                 hammer2_cluster_clr_chainflags(ncluster,
1086                                                HAMMER2_CHAIN_UNLINKED);
1087                 wipdata = &hammer2_cluster_wdata(ncluster)->ipdata;
1088
1089                 KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
1090                 bcopy(name, wipdata->filename, name_len);
1091                 wipdata->name_key = lhc;
1092                 wipdata->name_len = name_len;
1093                 wipdata->nlinks = 1;
1094                 hammer2_cluster_modsync(ncluster);
1095         }
1096
1097         /*
1098          * We are replacing ocluster with ncluster, unlock ocluster.  In the
1099          * case where ocluster is left unchanged the code above sets
1100          * ncluster to ocluster and ocluster to NULL, resulting in a NOP here.
1101          */
1102         if (ocluster)
1103                 hammer2_cluster_unlock(ocluster);
1104         *clusterp = ncluster;
1105
1106         return (0);
1107 }
1108
1109 /*
1110  * Repoint ip->cluster's chains to cluster's chains.  Caller must hold
1111  * the inode exclusively locked.  cluster may be NULL to clean out any
1112  * chains in ip->cluster.
1113  */
1114 void
1115 hammer2_inode_repoint(hammer2_inode_t *ip, hammer2_inode_t *pip,
1116                       hammer2_cluster_t *cluster)
1117 {
1118         hammer2_chain_t *ochain;
1119         hammer2_chain_t *nchain;
1120         hammer2_inode_t *opip;
1121         int i;
1122
1123         /*
1124          * Replace chains in ip->cluster with chains from cluster and
1125          * adjust the focus if necessary.
1126          *
1127          * NOTE: nchain and/or ochain can be NULL due to gaps
1128          *       in the cluster arrays.
1129          */
1130         ip->cluster.focus = NULL;
1131         for (i = 0; cluster && i < cluster->nchains; ++i) {
1132                 nchain = cluster->array[i];
1133                 if (i < ip->cluster.nchains) {
1134                         ochain = ip->cluster.array[i];
1135                         if (ochain == nchain) {
1136                                 if (ip->cluster.focus == NULL)
1137                                         ip->cluster.focus = nchain;
1138                                 continue;
1139                         }
1140                 } else {
1141                         ochain = NULL;
1142                 }
1143
1144                 /*
1145                  * Make adjustments
1146                  */
1147                 ip->cluster.array[i] = nchain;
1148                 if (ip->cluster.focus == NULL)
1149                         ip->cluster.focus = nchain;
1150                 if (nchain)
1151                         hammer2_chain_ref(nchain);
1152                 if (ochain)
1153                         hammer2_chain_drop(ochain);
1154         }
1155
1156         /*
1157          * Release any left-over chains in ip->cluster.
1158          */
1159         while (i < ip->cluster.nchains) {
1160                 nchain = ip->cluster.array[i];
1161                 if (nchain) {
1162                         ip->cluster.array[i] = NULL;
1163                         hammer2_chain_drop(nchain);
1164                 }
1165                 ++i;
1166         }
1167         ip->cluster.nchains = cluster ? cluster->nchains : 0;
1168
1169         /*
1170          * Repoint ip->pip if requested (non-NULL pip).
1171          */
1172         if (pip && ip->pip != pip) {
1173                 opip = ip->pip;
1174                 hammer2_inode_ref(pip);
1175                 ip->pip = pip;
1176                 if (opip)
1177                         hammer2_inode_drop(opip);
1178         }
1179 }
1180
1181 /*
1182  * Unlink the file from the specified directory inode.  The directory inode
1183  * does not need to be locked.
1184  *
1185  * isdir determines whether a directory/non-directory check should be made.
1186  * No check is made if isdir is set to -1.
1187  *
1188  * isopen specifies whether special unlink-with-open-descriptor handling
1189  * must be performed.  If set to -1 the caller is deleting a PFS and we
1190  * check whether the chain is mounted or not (chain->pmp != NULL).  1 is
1191  * implied if it is mounted.
1192  *
1193  * If isopen is 1 and nlinks drops to 0 this function must move the chain
1194  * to a special hidden directory until last-close occurs on the file.
1195  *
1196  * NOTE!  The underlying file can still be active with open descriptors
1197  *        or if the chain is being manually held (e.g. for rename).
1198  *
1199  *        The caller is responsible for fixing up ip->chain if e.g. a
1200  *        rename occurs (see chain_duplicate()).
1201  *
1202  * NOTE!  The chain is not deleted if it is moved to the hidden directory,
1203  *        but otherwise will be deleted.
1204  */
1205 int
1206 hammer2_unlink_file(hammer2_trans_t *trans, hammer2_inode_t *dip,
1207                     const uint8_t *name, size_t name_len,
1208                     int isdir, int *hlinkp, struct nchandle *nch,
1209                     int nlinks)
1210 {
1211         const hammer2_inode_data_t *ripdata;
1212         hammer2_inode_data_t *wipdata;
1213         hammer2_cluster_t *cparent;
1214         hammer2_cluster_t *hcluster;
1215         hammer2_cluster_t *hparent;
1216         hammer2_cluster_t *cluster;
1217         hammer2_cluster_t *dparent;
1218         hammer2_cluster_t *dcluster;
1219         hammer2_key_t key_dummy;
1220         hammer2_key_t key_next;
1221         hammer2_key_t lhc;
1222         int error;
1223         int ddflag;
1224         int hlink;
1225         uint8_t type;
1226
1227         error = 0;
1228         hlink = 0;
1229         hcluster = NULL;
1230         hparent = NULL;
1231         lhc = hammer2_dirhash(name, name_len);
1232
1233 again:
1234         /*
1235          * Search for the filename in the directory
1236          */
1237         cparent = hammer2_inode_lock_ex(dip);
1238         cluster = hammer2_cluster_lookup(cparent, &key_next,
1239                                      lhc, lhc + HAMMER2_DIRHASH_LOMASK,
1240                                      0, &ddflag);
1241         while (cluster) {
1242                 if (hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE) {
1243                         ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
1244                         if (ripdata->name_len == name_len &&
1245                             bcmp(ripdata->filename, name, name_len) == 0) {
1246                                 break;
1247                         }
1248                 }
1249                 cluster = hammer2_cluster_next(cparent, cluster, &key_next,
1250                                                key_next,
1251                                                lhc + HAMMER2_DIRHASH_LOMASK,
1252                                                0);
1253         }
1254         hammer2_inode_unlock_ex(dip, NULL);     /* retain cparent */
1255
1256         /*
1257          * Not found or wrong type (isdir < 0 disables the type check).
1258          * If a hardlink pointer, type checks use the hardlink target.
1259          */
1260         if (cluster == NULL) {
1261                 error = ENOENT;
1262                 goto done;
1263         }
1264         ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
1265         type = ripdata->type;
1266         if (type == HAMMER2_OBJTYPE_HARDLINK) {
1267                 hlink = 1;
1268                 type = ripdata->target_type;
1269         }
1270
1271         if (type == HAMMER2_OBJTYPE_DIRECTORY && isdir == 0) {
1272                 error = ENOTDIR;
1273                 goto done;
1274         }
1275         if (type != HAMMER2_OBJTYPE_DIRECTORY && isdir >= 1) {
1276                 error = EISDIR;
1277                 goto done;
1278         }
1279
1280         /*
1281          * Hardlink must be resolved.  We can't hold the parent locked
1282          * while we do this or we could deadlock.  The physical file will
1283          * be located at or above the current directory.
1284          *
1285          * We loop to reacquire the hardlink origination.
1286          *
1287          * NOTE: hammer2_hardlink_find() will locate the hardlink target,
1288          *       returning a modified hparent and hcluster.
1289          */
1290         if (ripdata->type == HAMMER2_OBJTYPE_HARDLINK) {
1291                 if (hcluster == NULL) {
1292                         hcluster = cluster;
1293                         cluster = NULL; /* safety */
1294                         hammer2_cluster_unlock(cparent);
1295                         cparent = NULL; /* safety */
1296                         ripdata = NULL; /* safety (associated w/cparent) */
1297                         error = hammer2_hardlink_find(dip, &hparent, hcluster);
1298
1299                         /*
1300                          * If we couldn't find the hardlink target then some
1301                          * parent directory containing the hardlink pointer
1302                          * probably got renamed to above the original target,
1303                          * a case not yet handled by H2.
1304                          */
1305                         if (error) {
1306                                 kprintf("H2 unlink_file: hardlink target for "
1307                                         "\"%s\" not found\n",
1308                                         name);
1309                                 kprintf("(likely due to known directory "
1310                                         "rename bug)\n");
1311                                 goto done;
1312                         }
1313                         goto again;
1314                 }
1315         }
1316
1317         /*
1318          * If this is a directory the directory must be empty.  However, if
1319          * isdir < 0 we are doing a rename and the directory does not have
1320          * to be empty, and if isdir > 1 we are deleting a PFS/snapshot
1321          * and the directory does not have to be empty.
1322          *
1323          * NOTE: We check the full key range here which covers both visible
1324          *       and invisible entries.  Theoretically there should be no
1325          *       invisible (hardlink target) entries if there are no visible
1326          *       entries.
1327          */
1328         if (type == HAMMER2_OBJTYPE_DIRECTORY && isdir == 1) {
1329                 dparent = hammer2_cluster_lookup_init(cluster, 0);
1330                 dcluster = hammer2_cluster_lookup(dparent, &key_dummy,
1331                                                   0, (hammer2_key_t)-1,
1332                                                   HAMMER2_LOOKUP_NODATA,
1333                                                   &ddflag);
1334                 if (dcluster) {
1335                         hammer2_cluster_unlock(dcluster);
1336                         hammer2_cluster_lookup_done(dparent);
1337                         error = ENOTEMPTY;
1338                         goto done;
1339                 }
1340                 hammer2_cluster_lookup_done(dparent);
1341                 dparent = NULL;
1342                 /* dcluster NULL */
1343         }
1344
1345         /*
1346          * If this was a hardlink then (cparent, cluster) is the hardlink
1347          * pointer, which we can simply destroy outright.  Discard the
1348          * clusters and replace with the hardlink target.
1349          */
1350         if (hcluster) {
1351                 hammer2_cluster_delete(trans, cparent, cluster,
1352                                        HAMMER2_DELETE_PERMANENT);
1353                 hammer2_cluster_unlock(cparent);
1354                 hammer2_cluster_unlock(cluster);
1355                 cparent = hparent;
1356                 cluster = hcluster;
1357                 hparent = NULL;
1358                 hcluster = NULL;
1359         }
1360
1361         /*
1362          * This leaves us with the hardlink target or non-hardlinked file
1363          * or directory in (cparent, cluster).
1364          *
1365          * Delete the target when nlinks reaches 0 with special handling
1366          * if (isopen) is set.
1367          *
1368          * NOTE! In DragonFly the vnops function calls cache_unlink() after
1369          *       calling us here to clean out the namecache association,
1370          *       (which does not represent a ref for the open-test), and to
1371          *       force finalization of the vnode if/when the last ref gets
1372          *       dropped.
1373          *
1374          * NOTE! Files are unlinked by rename and then relinked.  nch will be
1375          *       passed as NULL in this situation.  hammer2_inode_connect()
1376          *       will bump nlinks.
1377          */
1378         KKASSERT(cluster != NULL);
1379         hammer2_cluster_modify(trans, cluster, 0);
1380         wipdata = &hammer2_cluster_wdata(cluster)->ipdata;
1381         ripdata = wipdata;
1382         wipdata->nlinks += nlinks;
1383         if ((int64_t)wipdata->nlinks < 0) {     /* XXX debugging */
1384                 wipdata->nlinks = 0;
1385         }
1386         hammer2_cluster_modsync(cluster);
1387
1388         if (wipdata->nlinks == 0) {
1389                 /*
1390                  * Target nlinks has reached 0, file now unlinked (but may
1391                  * still be open).
1392                  */
1393                 /* XXX need interlock if mounted
1394                 if ((cluster->focus->flags & HAMMER2_CHAIN_PFSROOT) &&
1395                     cluster->pmp) {
1396                         error = EINVAL;
1397                         kprintf("hammer2: PFS \"%s\" cannot be deleted "
1398                                 "while still mounted\n",
1399                                 wipdata->filename);
1400                         goto done;
1401                 }
1402                 */
1403                 hammer2_cluster_set_chainflags(cluster, HAMMER2_CHAIN_UNLINKED);
1404                 if (nch && cache_isopen(nch)) {
1405                         hammer2_inode_move_to_hidden(trans, &cparent, &cluster,
1406                                                      wipdata->inum);
1407                 } else {
1408                         /*
1409                          * This won't get everything if a vnode is still
1410                          * present, but the cache_unlink() call the caller
1411                          * makes will.
1412                          */
1413                         hammer2_cluster_delete(trans, cparent, cluster,
1414                                                HAMMER2_DELETE_PERMANENT);
1415                 }
1416         } else if (hlink == 0) {
1417                 /*
1418                  * In this situation a normal non-hardlinked file (which can
1419                  * only have nlinks == 1) still has a non-zero nlinks, the
1420                  * caller must be doing a RENAME operation and so is passing
1421                  * a nlinks adjustment of 0, and only wishes to remove file
1422                  * in order to be able to reconnect it under a different name.
1423                  *
1424                  * In this situation we do a non-permanent deletion of the
1425                  * chain in order to allow the file to be reconnected in
1426                  * a different location.
1427                  */
1428                 KKASSERT(nlinks == 0);
1429                 hammer2_cluster_delete(trans, cparent, cluster, 0);
1430         }
1431         error = 0;
1432 done:
1433         if (cparent)
1434                 hammer2_cluster_unlock(cparent);
1435         if (cluster)
1436                 hammer2_cluster_unlock(cluster);
1437         if (hparent)
1438                 hammer2_cluster_unlock(hparent);
1439         if (hcluster)
1440                 hammer2_cluster_unlock(hcluster);
1441         if (hlinkp)
1442                 *hlinkp = hlink;
1443
1444         return error;
1445 }
1446
1447 /*
1448  * This is called from the mount code to initialize pmp->ihidden
1449  */
1450 void
1451 hammer2_inode_install_hidden(hammer2_pfsmount_t *pmp)
1452 {
1453         hammer2_trans_t trans;
1454         hammer2_cluster_t *cparent;
1455         hammer2_cluster_t *cluster;
1456         hammer2_cluster_t *scan;
1457         const hammer2_inode_data_t *ripdata;
1458         hammer2_inode_data_t *wipdata;
1459         hammer2_key_t key_dummy;
1460         hammer2_key_t key_next;
1461         int ddflag;
1462         int error;
1463         int count;
1464         int dip_check_algo;
1465         int dip_comp_algo;
1466
1467         if (pmp->ihidden)
1468                 return;
1469
1470         /*
1471          * Find the hidden directory
1472          */
1473         bzero(&key_dummy, sizeof(key_dummy));
1474         hammer2_trans_init(&trans, pmp, 0);
1475
1476         /*
1477          * Setup for lookup, retrieve iroot's check and compression
1478          * algorithm request which was likely generated by newfs_hammer2.
1479          *
1480          * The check/comp fields will probably never be used since inodes
1481          * are renamed into the hidden directory and not created relative to
1482          * the hidden directory, chain creation inherits from bref.methods,
1483          * and data chains inherit from their respective file inode *_algo
1484          * fields.
1485          */
1486         cparent = hammer2_inode_lock_ex(pmp->iroot);
1487         ripdata = &hammer2_cluster_rdata(cparent)->ipdata;
1488         dip_check_algo = ripdata->check_algo;
1489         dip_comp_algo = ripdata->comp_algo;
1490         ripdata = NULL;
1491
1492         cluster = hammer2_cluster_lookup(cparent, &key_dummy,
1493                                          HAMMER2_INODE_HIDDENDIR,
1494                                          HAMMER2_INODE_HIDDENDIR,
1495                                          0, &ddflag);
1496         if (cluster) {
1497                 pmp->ihidden = hammer2_inode_get(pmp, pmp->iroot, cluster);
1498                 hammer2_inode_ref(pmp->ihidden);
1499
1500                 /*
1501                  * Remove any unlinked files which were left open as-of
1502                  * any system crash.
1503                  *
1504                  * Don't pass NODATA, we need the inode data so the delete
1505                  * can do proper statistics updates.
1506                  */
1507                 count = 0;
1508                 scan = hammer2_cluster_lookup(cluster, &key_next,
1509                                               0, HAMMER2_TID_MAX,
1510                                               0, &ddflag);
1511                 while (scan) {
1512                         if (hammer2_cluster_type(scan) ==
1513                             HAMMER2_BREF_TYPE_INODE) {
1514                                 hammer2_cluster_delete(&trans, cluster, scan,
1515                                                    HAMMER2_DELETE_PERMANENT);
1516                                 ++count;
1517                         }
1518                         scan = hammer2_cluster_next(cluster, scan, &key_next,
1519                                                     0, HAMMER2_TID_MAX, 0);
1520                 }
1521
1522                 hammer2_inode_unlock_ex(pmp->ihidden, cluster);
1523                 hammer2_inode_unlock_ex(pmp->iroot, cparent);
1524                 hammer2_trans_done(&trans);
1525                 kprintf("hammer2: PFS loaded hidden dir, "
1526                         "removed %d dead entries\n", count);
1527                 return;
1528         }
1529
1530         /*
1531          * Create the hidden directory
1532          */
1533         error = hammer2_cluster_create(&trans, cparent, &cluster,
1534                                        HAMMER2_INODE_HIDDENDIR, 0,
1535                                        HAMMER2_BREF_TYPE_INODE,
1536                                        HAMMER2_INODE_BYTES,
1537                                        0);
1538         hammer2_inode_unlock_ex(pmp->iroot, cparent);
1539
1540         hammer2_cluster_modify(&trans, cluster, 0);
1541         wipdata = &hammer2_cluster_wdata(cluster)->ipdata;
1542         wipdata->type = HAMMER2_OBJTYPE_DIRECTORY;
1543         wipdata->inum = HAMMER2_INODE_HIDDENDIR;
1544         wipdata->nlinks = 1;
1545         wipdata->comp_algo = dip_comp_algo;
1546         wipdata->check_algo = dip_check_algo;
1547         hammer2_cluster_modsync(cluster);
1548         kprintf("hammer2: PFS root missing hidden directory, creating\n");
1549
1550         pmp->ihidden = hammer2_inode_get(pmp, pmp->iroot, cluster);
1551         hammer2_inode_ref(pmp->ihidden);
1552         hammer2_inode_unlock_ex(pmp->ihidden, cluster);
1553         hammer2_trans_done(&trans);
1554 }
1555
1556 /*
1557  * If an open file is unlinked H2 needs to retain the file in the topology
1558  * to ensure that its backing store is not recovered by the bulk free scan.
1559  * This also allows us to avoid having to special-case the CHAIN_DELETED flag.
1560  *
1561  * To do this the file is moved to a hidden directory in the PFS root and
1562  * renamed.  The hidden directory must be created if it does not exist.
1563  */
1564 static
1565 void
1566 hammer2_inode_move_to_hidden(hammer2_trans_t *trans,
1567                              hammer2_cluster_t **cparentp,
1568                              hammer2_cluster_t **clusterp,
1569                              hammer2_tid_t inum)
1570 {
1571         hammer2_cluster_t *dcluster;
1572         hammer2_pfsmount_t *pmp;
1573         int error;
1574
1575         pmp = (*clusterp)->pmp;
1576         KKASSERT(pmp != NULL);
1577         KKASSERT(pmp->ihidden != NULL);
1578
1579         hammer2_cluster_delete(trans, *cparentp, *clusterp, 0);
1580         dcluster = hammer2_inode_lock_ex(pmp->ihidden);
1581         error = hammer2_inode_connect(trans, clusterp, 0,
1582                                       pmp->ihidden, dcluster,
1583                                       NULL, 0, inum);
1584         hammer2_inode_unlock_ex(pmp->ihidden, dcluster);
1585         KKASSERT(error == 0);
1586 }
1587
1588 /*
1589  * Given an exclusively locked inode and cluster we consolidate the cluster
1590  * for hardlink creation, adding (nlinks) to the file's link count and
1591  * potentially relocating the inode to (cdip) which is a parent directory
1592  * common to both the current location of the inode and the intended new
1593  * hardlink.
1594  *
1595  * Replaces (*clusterp) if consolidation occurred, unlocking the old cluster
1596  * and returning a new locked cluster.
1597  *
1598  * NOTE!  This function will also replace ip->cluster.
1599  */
1600 int
1601 hammer2_hardlink_consolidate(hammer2_trans_t *trans,
1602                              hammer2_inode_t *ip,
1603                              hammer2_cluster_t **clusterp,
1604                              hammer2_inode_t *cdip,
1605                              hammer2_cluster_t *cdcluster,
1606                              int nlinks)
1607 {
1608         const hammer2_inode_data_t *ripdata;
1609         hammer2_inode_data_t *wipdata;
1610         hammer2_cluster_t *cluster;
1611         hammer2_cluster_t *cparent;
1612         int error;
1613
1614         cluster = *clusterp;
1615         ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
1616         if (nlinks == 0 &&                      /* no hardlink needed */
1617             (ripdata->name_key & HAMMER2_DIRHASH_VISIBLE)) {
1618                 return (0);
1619         }
1620
1621         if (hammer2_hardlink_enable == 0) {     /* disallow hardlinks */
1622                 hammer2_cluster_unlock(cluster);
1623                 *clusterp = NULL;
1624                 return (ENOTSUP);
1625         }
1626
1627         cparent = NULL;
1628
1629         /*
1630          * If no change in the hardlink's target directory is required and
1631          * this is already a hardlink target, all we need to do is adjust
1632          * the link count.
1633          */
1634         ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
1635         if (cdip == ip->pip &&
1636             (ripdata->name_key & HAMMER2_DIRHASH_VISIBLE) == 0) {
1637                 if (nlinks) {
1638                         hammer2_cluster_modify(trans, cluster, 0);
1639                         wipdata = &hammer2_cluster_wdata(cluster)->ipdata;
1640                         wipdata->nlinks += nlinks;
1641                         hammer2_cluster_modsync(cluster);
1642                         ripdata = wipdata;
1643                 }
1644                 error = 0;
1645                 goto done;
1646         }
1647
1648         /*
1649          * Cluster is the real inode.  The originating directory is locked
1650          * by the caller so we can manipulate it without worrying about races
1651          * against other lookups.
1652          *
1653          * If cluster is visible we need to delete it from the current
1654          * location and create a hardlink pointer in its place.  If it is
1655          * not visible we need only delete it.  Then later cluster will be
1656          * renamed to a parent directory and converted (if necessary) to
1657          * a hidden inode (via shiftup).
1658          *
1659          * NOTE! We must hold cparent locked through the delete/create/rename
1660          *       operation to ensure that other threads block resolving to
1661          *       the same hardlink, otherwise the other threads may not see
1662          *       the hardlink.
1663          */
1664         KKASSERT((cluster->focus->flags & HAMMER2_CHAIN_DELETED) == 0);
1665         cparent = hammer2_cluster_parent(cluster);
1666
1667         hammer2_cluster_delete(trans, cparent, cluster, 0);
1668
1669         ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
1670         KKASSERT(ripdata->type != HAMMER2_OBJTYPE_HARDLINK);
1671         if (ripdata->name_key & HAMMER2_DIRHASH_VISIBLE) {
1672                 hammer2_cluster_t *ncluster;
1673                 hammer2_key_t lhc;
1674
1675                 ncluster = NULL;
1676                 lhc = cluster->focus->bref.key;
1677                 error = hammer2_cluster_create(trans, cparent, &ncluster,
1678                                              lhc, 0,
1679                                              HAMMER2_BREF_TYPE_INODE,
1680                                              HAMMER2_INODE_BYTES,
1681                                              0);
1682                 hammer2_cluster_modify(trans, ncluster, 0);
1683                 wipdata = &hammer2_cluster_wdata(ncluster)->ipdata;
1684
1685                 /* wipdata->comp_algo = ripdata->comp_algo; */
1686                 wipdata->comp_algo = 0;
1687                 wipdata->check_algo = 0;
1688                 wipdata->version = HAMMER2_INODE_VERSION_ONE;
1689                 wipdata->inum = ripdata->inum;
1690                 wipdata->target_type = ripdata->type;
1691                 wipdata->type = HAMMER2_OBJTYPE_HARDLINK;
1692                 wipdata->uflags = 0;
1693                 wipdata->rmajor = 0;
1694                 wipdata->rminor = 0;
1695                 wipdata->ctime = 0;
1696                 wipdata->mtime = 0;
1697                 wipdata->atime = 0;
1698                 wipdata->btime = 0;
1699                 bzero(&wipdata->uid, sizeof(wipdata->uid));
1700                 bzero(&wipdata->gid, sizeof(wipdata->gid));
1701                 wipdata->op_flags = HAMMER2_OPFLAG_DIRECTDATA;
1702                 wipdata->cap_flags = 0;
1703                 wipdata->mode = 0;
1704                 wipdata->size = 0;
1705                 wipdata->nlinks = 1;
1706                 wipdata->iparent = 0;   /* XXX */
1707                 wipdata->pfs_type = 0;
1708                 wipdata->pfs_inum = 0;
1709                 bzero(&wipdata->pfs_clid, sizeof(wipdata->pfs_clid));
1710                 bzero(&wipdata->pfs_fsid, sizeof(wipdata->pfs_fsid));
1711                 wipdata->data_quota = 0;
1712                 wipdata->data_count = 0;
1713                 wipdata->inode_quota = 0;
1714                 wipdata->inode_count = 0;
1715                 wipdata->attr_tid = 0;
1716                 wipdata->dirent_tid = 0;
1717                 bzero(&wipdata->u, sizeof(wipdata->u));
1718                 bcopy(ripdata->filename, wipdata->filename, ripdata->name_len);
1719                 wipdata->name_key = ncluster->focus->bref.key;
1720                 wipdata->name_len = ripdata->name_len;
1721                 /* XXX transaction ids */
1722                 hammer2_cluster_modsync(ncluster);
1723                 hammer2_cluster_unlock(ncluster);
1724         }
1725         ripdata = wipdata;
1726
1727         /*
1728          * cluster represents the hardlink target and is now flagged deleted.
1729          * duplicate it to the parent directory and adjust nlinks.
1730          *
1731          * WARNING! The shiftup() call can cause ncluster to be moved into
1732          *          an indirect block, and our ncluster will wind up pointing
1733          *          to the older/original version.
1734          */
1735         KKASSERT(cluster->focus->flags & HAMMER2_CHAIN_DELETED);
1736         hammer2_hardlink_shiftup(trans, cluster, cdip, cdcluster,
1737                                  nlinks, &error);
1738
1739         if (error == 0)
1740                 hammer2_inode_repoint(ip, cdip, cluster);
1741
1742 done:
1743         /*
1744          * Cleanup, cluster/ncluster already dealt with.
1745          *
1746          * Return the shifted cluster in *clusterp.
1747          */
1748         if (cparent)
1749                 hammer2_cluster_unlock(cparent);
1750         *clusterp = cluster;
1751
1752         return (error);
1753 }
1754
1755 /*
1756  * If (*ochainp) is non-NULL it points to the forward OBJTYPE_HARDLINK
1757  * inode while (*chainp) points to the resolved (hidden hardlink
1758  * target) inode.  In this situation when nlinks is 1 we wish to
1759  * deconsolidate the hardlink, moving it back to the directory that now
1760  * represents the only remaining link.
1761  */
1762 int
1763 hammer2_hardlink_deconsolidate(hammer2_trans_t *trans,
1764                                hammer2_inode_t *dip,
1765                                hammer2_chain_t **chainp,
1766                                hammer2_chain_t **ochainp)
1767 {
1768         if (*ochainp == NULL)
1769                 return (0);
1770         /* XXX */
1771         return (0);
1772 }
1773
1774 /*
1775  * The caller presents a locked cluster with an obj_type of
1776  * HAMMER2_OBJTYPE_HARDLINK.  This routine will locate and replace the
1777  * cluster with the target hardlink, also locked.
1778  *
1779  * If cparentp is not NULL a locked cluster representing the hardlink's
1780  * parent is also returned.
1781  *
1782  * If we are unable to locate the hardlink target EIO is returned and
1783  * (*cparentp) is set to NULL.  The passed-in cluster still needs to be
1784  * unlocked by the caller but will be degenerate... not have any chains.
1785  */
1786 int
1787 hammer2_hardlink_find(hammer2_inode_t *dip,
1788                       hammer2_cluster_t **cparentp, hammer2_cluster_t *cluster)
1789 {
1790         const hammer2_inode_data_t *ipdata;
1791         hammer2_cluster_t *cparent;
1792         hammer2_cluster_t *rcluster;
1793         hammer2_inode_t *ip;
1794         hammer2_inode_t *pip;
1795         hammer2_key_t key_dummy;
1796         hammer2_key_t lhc;
1797         int ddflag;
1798
1799         pip = dip;
1800         hammer2_inode_ref(pip);         /* for loop */
1801
1802         /*
1803          * Locate the hardlink.  pip is referenced and not locked.
1804          */
1805         ipdata = &hammer2_cluster_rdata(cluster)->ipdata;
1806         lhc = ipdata->inum;
1807
1808         /*
1809          * We don't need the cluster's chains, but we need to retain the
1810          * cluster structure itself so we can load the hardlink search
1811          * result into it.
1812          */
1813         KKASSERT(cluster->refs == 1);
1814         atomic_add_int(&cluster->refs, 1);
1815         hammer2_cluster_unlock(cluster);        /* hack */
1816         cluster->nchains = 0;                   /* hack */
1817
1818         rcluster = NULL;
1819         cparent = NULL;
1820
1821         while ((ip = pip) != NULL) {
1822                 cparent = hammer2_inode_lock_ex(ip);
1823                 hammer2_inode_drop(ip);                 /* loop */
1824                 KKASSERT(hammer2_cluster_type(cparent) ==
1825                          HAMMER2_BREF_TYPE_INODE);
1826                 rcluster = hammer2_cluster_lookup(cparent, &key_dummy,
1827                                              lhc, lhc, 0, &ddflag);
1828                 if (rcluster)
1829                         break;
1830                 hammer2_cluster_lookup_done(cparent);   /* discard parent */
1831                 cparent = NULL;                         /* safety */
1832                 pip = ip->pip;          /* safe, ip held locked */
1833                 if (pip)
1834                         hammer2_inode_ref(pip);         /* loop */
1835                 hammer2_inode_unlock_ex(ip, NULL);
1836         }
1837
1838         /*
1839          * chain is locked, ip is locked.  Unlock ip, return the locked
1840          * chain.  *ipp is already set w/a ref count and not locked.
1841          *
1842          * (cparent is already unlocked).
1843          */
1844         if (rcluster) {
1845                 hammer2_cluster_replace(cluster, rcluster);
1846                 hammer2_cluster_drop(rcluster);
1847                 if (cparentp) {
1848                         *cparentp = cparent;
1849                         hammer2_inode_unlock_ex(ip, NULL);
1850                 } else {
1851                         hammer2_inode_unlock_ex(ip, cparent);
1852                 }
1853                 return (0);
1854         } else {
1855                 if (cparentp)
1856                         *cparentp = NULL;
1857                 if (ip)
1858                         hammer2_inode_unlock_ex(ip, cparent);
1859                 return (EIO);
1860         }
1861 }
1862
1863 /*
1864  * Find the directory common to both fdip and tdip.
1865  *
1866  * Returns a held but not locked inode.  Caller typically locks the inode,
1867  * and when through unlocks AND drops it.
1868  */
1869 hammer2_inode_t *
1870 hammer2_inode_common_parent(hammer2_inode_t *fdip, hammer2_inode_t *tdip)
1871 {
1872         hammer2_inode_t *scan1;
1873         hammer2_inode_t *scan2;
1874
1875         /*
1876          * We used to have a depth field but it complicated matters too
1877          * much for directory renames.  So now its ugly.  Check for
1878          * simple cases before giving up and doing it the expensive way.
1879          *
1880          * XXX need a bottom-up topology stability lock
1881          */
1882         if (fdip == tdip || fdip == tdip->pip) {
1883                 hammer2_inode_ref(fdip);
1884                 return(fdip);
1885         }
1886         if (fdip->pip == tdip) {
1887                 hammer2_inode_ref(tdip);
1888                 return(tdip);
1889         }
1890
1891         /*
1892          * XXX not MPSAFE
1893          */
1894         for (scan1 = fdip; scan1->pmp == fdip->pmp; scan1 = scan1->pip) {
1895                 scan2 = tdip;
1896                 while (scan2->pmp == tdip->pmp) {
1897                         if (scan1 == scan2) {
1898                                 hammer2_inode_ref(scan1);
1899                                 return(scan1);
1900                         }
1901                         scan2 = scan2->pip;
1902                         if (scan2 == NULL)
1903                                 break;
1904                 }
1905         }
1906         panic("hammer2_inode_common_parent: no common parent %p %p\n",
1907               fdip, tdip);
1908         /* NOT REACHED */
1909         return(NULL);
1910 }
1911
1912 /*
1913  * Synchronize the inode's frontend state with the chain state prior
1914  * to any explicit flush of the inode or any strategy write call.
1915  *
1916  * Called with a locked inode.
1917  */
1918 void
1919 hammer2_inode_fsync(hammer2_trans_t *trans, hammer2_inode_t *ip, 
1920                     hammer2_cluster_t *cparent)
1921 {
1922         const hammer2_inode_data_t *ripdata;
1923         hammer2_inode_data_t *wipdata;
1924         hammer2_cluster_t *dparent;
1925         hammer2_cluster_t *cluster;
1926         hammer2_key_t lbase;
1927         hammer2_key_t key_next;
1928         int dosync = 0;
1929         int ddflag;
1930
1931         ripdata = &hammer2_cluster_rdata(cparent)->ipdata;    /* target file */
1932
1933         if (ip->flags & HAMMER2_INODE_MTIME) {
1934                 wipdata = hammer2_cluster_modify_ip(trans, ip, cparent, 0);
1935                 atomic_clear_int(&ip->flags, HAMMER2_INODE_MTIME);
1936                 wipdata->mtime = ip->mtime;
1937                 dosync = 1;
1938                 ripdata = wipdata;
1939         }
1940         if ((ip->flags & HAMMER2_INODE_RESIZED) && ip->size < ripdata->size) {
1941                 wipdata = hammer2_cluster_modify_ip(trans, ip, cparent, 0);
1942                 wipdata->size = ip->size;
1943                 dosync = 1;
1944                 ripdata = wipdata;
1945                 atomic_clear_int(&ip->flags, HAMMER2_INODE_RESIZED);
1946
1947                 /*
1948                  * We must delete any chains beyond the EOF.  The chain
1949                  * straddling the EOF will be pending in the bioq.
1950                  */
1951                 lbase = (ripdata->size + HAMMER2_PBUFMASK64) &
1952                         ~HAMMER2_PBUFMASK64;
1953                 dparent = hammer2_cluster_lookup_init(&ip->cluster, 0);
1954                 cluster = hammer2_cluster_lookup(dparent, &key_next,
1955                                                  lbase, (hammer2_key_t)-1,
1956                                                  HAMMER2_LOOKUP_NODATA,
1957                                                  &ddflag);
1958                 while (cluster) {
1959                         /*
1960                          * Degenerate embedded case, nothing to loop on
1961                          */
1962                         switch (hammer2_cluster_type(cluster)) {
1963                         case HAMMER2_BREF_TYPE_INODE:
1964                                 hammer2_cluster_unlock(cluster);
1965                                 cluster = NULL;
1966                                 break;
1967                         case HAMMER2_BREF_TYPE_DATA:
1968                                 hammer2_cluster_delete(trans, dparent, cluster,
1969                                                    HAMMER2_DELETE_PERMANENT);
1970                                 /* fall through */
1971                         default:
1972                                 cluster = hammer2_cluster_next(dparent, cluster,
1973                                                    &key_next,
1974                                                    key_next, (hammer2_key_t)-1,
1975                                                    HAMMER2_LOOKUP_NODATA);
1976                                 break;
1977                         }
1978                 }
1979                 hammer2_cluster_lookup_done(dparent);
1980         } else
1981         if ((ip->flags & HAMMER2_INODE_RESIZED) && ip->size > ripdata->size) {
1982                 wipdata = hammer2_cluster_modify_ip(trans, ip, cparent, 0);
1983                 wipdata->size = ip->size;
1984                 atomic_clear_int(&ip->flags, HAMMER2_INODE_RESIZED);
1985
1986                 /*
1987                  * When resizing larger we may not have any direct-data
1988                  * available.
1989                  */
1990                 if ((wipdata->op_flags & HAMMER2_OPFLAG_DIRECTDATA) &&
1991                     ip->size > HAMMER2_EMBEDDED_BYTES) {
1992                         wipdata->op_flags &= ~HAMMER2_OPFLAG_DIRECTDATA;
1993                         bzero(&wipdata->u.blockset,
1994                               sizeof(wipdata->u.blockset));
1995                 }
1996                 dosync = 1;
1997                 ripdata = wipdata;
1998         }
1999         if (dosync)
2000                 hammer2_cluster_modsync(cparent);
2001 }