hammer2 - Retool flushing and use of mirror_tid, more cluster work.
[dragonfly.git] / sys / vfs / hammer2 / hammer2_inode.c
1 /*
2  * Copyright (c) 2011-2014 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  * 3. Neither the name of The DragonFly Project nor the names of its
19  *    contributors may be used to endorse or promote products derived
20  *    from this software without specific, prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
26  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 #include <sys/cdefs.h>
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/types.h>
39 #include <sys/lock.h>
40 #include <sys/uuid.h>
41
42 #include "hammer2.h"
43
44 #define INODE_DEBUG     0
45
46 static void hammer2_inode_move_to_hidden(hammer2_trans_t *trans,
47                                          hammer2_cluster_t **cparentp,
48                                          hammer2_cluster_t **clusterp,
49                                          hammer2_tid_t inum);
50
51 RB_GENERATE2(hammer2_inode_tree, hammer2_inode, rbnode, hammer2_inode_cmp,
52              hammer2_tid_t, inum);
53
54 int
55 hammer2_inode_cmp(hammer2_inode_t *ip1, hammer2_inode_t *ip2)
56 {
57         if (ip1->inum < ip2->inum)
58                 return(-1);
59         if (ip1->inum > ip2->inum)
60                 return(1);
61         return(0);
62 }
63
64 /*
65  * HAMMER2 inode locks
66  *
67  * HAMMER2 offers shared and exclusive locks on inodes.  Pass a mask of
68  * flags for options:
69  *
70  *      - pass HAMMER2_RESOLVE_SHARED if a shared lock is desired.  The
71  *        inode locking function will automatically set the RDONLY flag.
72  *
73  *      - pass HAMMER2_RESOLVE_ALWAYS if you need the inode's meta-data.
74  *        Most front-end inode locks do.
75  *
76  *      - pass HAMMER2_RESOLVE_NEVER if you do not want to require that
77  *        the inode data be resolved.  This is used by the syncthr because
78  *        it can run on an unresolved/out-of-sync cluster, and also by the
79  *        vnode reclamation code to avoid unnecessary I/O (particularly when
80  *        disposing of hundreds of thousands of cached vnodes).
81  *
82  * The inode locking function locks the inode itself, resolves any stale
83  * chains in the inode's cluster, and allocates a fresh copy of the
84  * cluster with 1 ref and all the underlying chains locked.
85  *
86  * ip->cluster will be stable while the inode is locked.
87  *
88  * NOTE: We don't combine the inode/chain lock because putting away an
89  *       inode would otherwise confuse multiple lock holders of the inode.
90  *
91  * NOTE: In-memory inodes always point to hardlink targets (the actual file),
92  *       and never point to a hardlink pointer.
93  *
94  * NOTE: If caller passes HAMMER2_RESOLVE_RDONLY the exclusive locking code
95  *       will feel free to reduce the chain set in the cluster as an
96  *       optimization.  It will still be validated against the quorum if
97  *       appropriate, but the optimization might be able to reduce data
98  *       accesses to one node.  This flag is automatically set if the inode
99  *       is locked with HAMMER2_RESOLVE_SHARED.
100  */
101 hammer2_cluster_t *
102 hammer2_inode_lock(hammer2_inode_t *ip, int how)
103 {
104         hammer2_cluster_t *cluster;
105
106         hammer2_inode_ref(ip);
107
108         /* 
109          * Inode structure mutex
110          */
111         if (how & HAMMER2_RESOLVE_SHARED) {
112                 how |= HAMMER2_RESOLVE_RDONLY;
113                 hammer2_mtx_sh(&ip->lock);
114         } else {
115                 hammer2_mtx_ex(&ip->lock);
116         }
117
118         /*
119          * Create a copy of ip->cluster and lock it.  Note that the copy
120          * will have a ref on the cluster AND its chains and we don't want
121          * a second ref to either when we lock it.
122          *
123          * The copy will not have a focus until it is locked.
124          *
125          * Exclusive inode locks set the template focus chain in (ip)
126          * as a hint.  Cluster locks can ALWAYS replace the focus in the
127          * working copy if the hint does not work out, so beware.
128          */
129         cluster = hammer2_cluster_copy(&ip->cluster);
130         hammer2_cluster_lock(cluster, how);
131
132         /*
133          * cluster->focus will be set if resolving RESOLVE_ALWAYS, but
134          * only update the cached focus in the inode structure when taking
135          * out an exclusive lock.
136          */
137         if ((how & HAMMER2_RESOLVE_SHARED) == 0)
138                 ip->cluster.focus = cluster->focus;
139
140         /*
141          * Returned cluster must resolve hardlink pointers.
142          * XXX remove me.
143          */
144         if ((how & HAMMER2_RESOLVE_MASK) == HAMMER2_RESOLVE_ALWAYS &&
145             cluster->error == 0 &&
146             cluster->focus) {
147                 const hammer2_inode_data_t *ripdata;
148
149                 ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
150                 KKASSERT(ripdata->type != HAMMER2_OBJTYPE_HARDLINK);
151         }
152         return (cluster);
153 }
154
155 void
156 hammer2_inode_unlock(hammer2_inode_t *ip, hammer2_cluster_t *cluster)
157 {
158         if (cluster) {
159                 hammer2_cluster_unlock(cluster);
160                 hammer2_cluster_drop(cluster);
161         }
162         hammer2_mtx_unlock(&ip->lock);
163         hammer2_inode_drop(ip);
164 }
165
166 /*
167  * Temporarily release a lock held shared or exclusive.  Caller must
168  * hold the lock shared or exclusive on call and lock will be released
169  * on return.
170  *
171  * Restore a lock that was temporarily released.
172  */
173 hammer2_mtx_state_t
174 hammer2_inode_lock_temp_release(hammer2_inode_t *ip)
175 {
176         return hammer2_mtx_temp_release(&ip->lock);
177 }
178
179 void
180 hammer2_inode_lock_temp_restore(hammer2_inode_t *ip, hammer2_mtx_state_t ostate)
181 {
182         hammer2_mtx_temp_restore(&ip->lock, ostate);
183 }
184
185 /*
186  * Upgrade a shared inode lock to exclusive and return.  If the inode lock
187  * is already held exclusively this is a NOP.
188  *
189  * The caller MUST hold the inode lock either shared or exclusive on call
190  * and will own the lock exclusively on return.
191  *
192  * Returns non-zero if the lock was already exclusive prior to the upgrade.
193  */
194 int
195 hammer2_inode_lock_upgrade(hammer2_inode_t *ip)
196 {
197         int wasexclusive;
198
199         if (mtx_islocked_ex(&ip->lock)) {
200                 wasexclusive = 1;
201         } else {
202                 hammer2_mtx_unlock(&ip->lock);
203                 hammer2_mtx_ex(&ip->lock);
204                 wasexclusive = 0;
205         }
206         return wasexclusive;
207 }
208
209 /*
210  * Downgrade an inode lock from exclusive to shared only if the inode
211  * lock was previously shared.  If the inode lock was previously exclusive,
212  * this is a NOP.
213  */
214 void
215 hammer2_inode_lock_downgrade(hammer2_inode_t *ip, int wasexclusive)
216 {
217         if (wasexclusive == 0)
218                 mtx_downgrade(&ip->lock);
219 }
220
221 /*
222  * Lookup an inode by inode number
223  */
224 hammer2_inode_t *
225 hammer2_inode_lookup(hammer2_pfs_t *pmp, hammer2_tid_t inum)
226 {
227         hammer2_inode_t *ip;
228
229         KKASSERT(pmp);
230         if (pmp->spmp_hmp) {
231                 ip = NULL;
232         } else {
233                 hammer2_spin_ex(&pmp->inum_spin);
234                 ip = RB_LOOKUP(hammer2_inode_tree, &pmp->inum_tree, inum);
235                 if (ip)
236                         hammer2_inode_ref(ip);
237                 hammer2_spin_unex(&pmp->inum_spin);
238         }
239         return(ip);
240 }
241
242 /*
243  * Adding a ref to an inode is only legal if the inode already has at least
244  * one ref.
245  *
246  * (can be called with spinlock held)
247  */
248 void
249 hammer2_inode_ref(hammer2_inode_t *ip)
250 {
251         atomic_add_int(&ip->refs, 1);
252 }
253
254 /*
255  * Drop an inode reference, freeing the inode when the last reference goes
256  * away.
257  */
258 void
259 hammer2_inode_drop(hammer2_inode_t *ip)
260 {
261         hammer2_pfs_t *pmp;
262         hammer2_inode_t *pip;
263         u_int refs;
264
265         while (ip) {
266                 refs = ip->refs;
267                 cpu_ccfence();
268                 if (refs == 1) {
269                         /*
270                          * Transition to zero, must interlock with
271                          * the inode inumber lookup tree (if applicable).
272                          * It should not be possible for anyone to race
273                          * the transition to 0.
274                          *
275                          */
276                         pmp = ip->pmp;
277                         KKASSERT(pmp);
278                         hammer2_spin_ex(&pmp->inum_spin);
279
280                         if (atomic_cmpset_int(&ip->refs, 1, 0)) {
281                                 KKASSERT(hammer2_mtx_refs(&ip->lock) == 0);
282                                 if (ip->flags & HAMMER2_INODE_ONRBTREE) {
283                                         atomic_clear_int(&ip->flags,
284                                                      HAMMER2_INODE_ONRBTREE);
285                                         RB_REMOVE(hammer2_inode_tree,
286                                                   &pmp->inum_tree, ip);
287                                 }
288                                 hammer2_spin_unex(&pmp->inum_spin);
289
290                                 pip = ip->pip;
291                                 ip->pip = NULL;
292                                 ip->pmp = NULL;
293
294                                 /*
295                                  * Cleaning out ip->cluster isn't entirely
296                                  * trivial.
297                                  */
298                                 hammer2_inode_repoint(ip, NULL, NULL);
299
300                                 /*
301                                  * We have to drop pip (if non-NULL) to
302                                  * dispose of our implied reference from
303                                  * ip->pip.  We can simply loop on it.
304                                  */
305                                 kfree(ip, pmp->minode);
306                                 atomic_add_long(&pmp->inmem_inodes, -1);
307                                 ip = pip;
308                                 /* continue with pip (can be NULL) */
309                         } else {
310                                 hammer2_spin_unex(&ip->pmp->inum_spin);
311                         }
312                 } else {
313                         /*
314                          * Non zero transition
315                          */
316                         if (atomic_cmpset_int(&ip->refs, refs, refs - 1))
317                                 break;
318                 }
319         }
320 }
321
322 /*
323  * Get the vnode associated with the given inode, allocating the vnode if
324  * necessary.  The vnode will be returned exclusively locked.
325  *
326  * The caller must lock the inode (shared or exclusive).
327  *
328  * Great care must be taken to avoid deadlocks and vnode acquisition/reclaim
329  * races.
330  */
331 struct vnode *
332 hammer2_igetv(hammer2_inode_t *ip, hammer2_cluster_t *cparent, int *errorp)
333 {
334         const hammer2_inode_data_t *ripdata;
335         hammer2_pfs_t *pmp;
336         struct vnode *vp;
337
338         pmp = ip->pmp;
339         KKASSERT(pmp != NULL);
340         *errorp = 0;
341
342         ripdata = &hammer2_cluster_rdata(cparent)->ipdata;
343
344         for (;;) {
345                 /*
346                  * Attempt to reuse an existing vnode assignment.  It is
347                  * possible to race a reclaim so the vget() may fail.  The
348                  * inode must be unlocked during the vget() to avoid a
349                  * deadlock against a reclaim.
350                  */
351                 int wasexclusive;
352
353                 vp = ip->vp;
354                 if (vp) {
355                         /*
356                          * Inode must be unlocked during the vget() to avoid
357                          * possible deadlocks, but leave the ip ref intact.
358                          *
359                          * vnode is held to prevent destruction during the
360                          * vget().  The vget() can still fail if we lost
361                          * a reclaim race on the vnode.
362                          */
363                         hammer2_mtx_state_t ostate;
364
365                         vhold(vp);
366                         ostate = hammer2_inode_lock_temp_release(ip);
367                         if (vget(vp, LK_EXCLUSIVE)) {
368                                 vdrop(vp);
369                                 hammer2_inode_lock_temp_restore(ip, ostate);
370                                 continue;
371                         }
372                         hammer2_inode_lock_temp_restore(ip, ostate);
373                         vdrop(vp);
374                         /* vp still locked and ref from vget */
375                         if (ip->vp != vp) {
376                                 kprintf("hammer2: igetv race %p/%p\n",
377                                         ip->vp, vp);
378                                 vput(vp);
379                                 continue;
380                         }
381                         *errorp = 0;
382                         break;
383                 }
384
385                 /*
386                  * No vnode exists, allocate a new vnode.  Beware of
387                  * allocation races.  This function will return an
388                  * exclusively locked and referenced vnode.
389                  */
390                 *errorp = getnewvnode(VT_HAMMER2, pmp->mp, &vp, 0, 0);
391                 if (*errorp) {
392                         kprintf("hammer2: igetv getnewvnode failed %d\n",
393                                 *errorp);
394                         vp = NULL;
395                         break;
396                 }
397
398                 /*
399                  * Lock the inode and check for an allocation race.
400                  */
401                 wasexclusive = hammer2_inode_lock_upgrade(ip);
402                 if (ip->vp != NULL) {
403                         vp->v_type = VBAD;
404                         vx_put(vp);
405                         hammer2_inode_lock_downgrade(ip, wasexclusive);
406                         continue;
407                 }
408
409                 switch (ripdata->type) {
410                 case HAMMER2_OBJTYPE_DIRECTORY:
411                         vp->v_type = VDIR;
412                         break;
413                 case HAMMER2_OBJTYPE_REGFILE:
414                         vp->v_type = VREG;
415                         vinitvmio(vp, ripdata->size,
416                                   HAMMER2_LBUFSIZE,
417                                   (int)ripdata->size & HAMMER2_LBUFMASK);
418                         break;
419                 case HAMMER2_OBJTYPE_SOFTLINK:
420                         /*
421                          * XXX for now we are using the generic file_read
422                          * and file_write code so we need a buffer cache
423                          * association.
424                          */
425                         vp->v_type = VLNK;
426                         vinitvmio(vp, ripdata->size,
427                                   HAMMER2_LBUFSIZE,
428                                   (int)ripdata->size & HAMMER2_LBUFMASK);
429                         break;
430                 case HAMMER2_OBJTYPE_CDEV:
431                         vp->v_type = VCHR;
432                         /* fall through */
433                 case HAMMER2_OBJTYPE_BDEV:
434                         vp->v_ops = &pmp->mp->mnt_vn_spec_ops;
435                         if (ripdata->type != HAMMER2_OBJTYPE_CDEV)
436                                 vp->v_type = VBLK;
437                         addaliasu(vp, ripdata->rmajor, ripdata->rminor);
438                         break;
439                 case HAMMER2_OBJTYPE_FIFO:
440                         vp->v_type = VFIFO;
441                         vp->v_ops = &pmp->mp->mnt_vn_fifo_ops;
442                         break;
443                 default:
444                         panic("hammer2: unhandled objtype %d", ripdata->type);
445                         break;
446                 }
447
448                 if (ip == pmp->iroot)
449                         vsetflags(vp, VROOT);
450
451                 vp->v_data = ip;
452                 ip->vp = vp;
453                 hammer2_inode_ref(ip);          /* vp association */
454                 hammer2_inode_lock_downgrade(ip, wasexclusive);
455                 break;
456         }
457
458         /*
459          * Return non-NULL vp and *errorp == 0, or NULL vp and *errorp != 0.
460          */
461         if (hammer2_debug & 0x0002) {
462                 kprintf("igetv vp %p refs 0x%08x aux 0x%08x\n",
463                         vp, vp->v_refcnt, vp->v_auxrefs);
464         }
465         return (vp);
466 }
467
468 /*
469  * Returns the inode associated with the passed-in cluster, creating the
470  * inode if necessary and synchronizing it to the passed-in cluster otherwise.
471  *
472  * The passed-in cluster must be locked and will remain locked on return.
473  * The returned inode will be locked and the caller may dispose of both
474  * via hammer2_inode_unlock_ex().  However, if the caller needs to resolve
475  * a hardlink it must ref/unlock/relock/drop the inode.
476  *
477  * The hammer2_inode structure regulates the interface between the high level
478  * kernel VNOPS API and the filesystem backend (the chains).
479  *
480  * On return the inode is locked with the supplied cluster.
481  */
482 hammer2_inode_t *
483 hammer2_inode_get(hammer2_pfs_t *pmp, hammer2_inode_t *dip,
484                   hammer2_cluster_t *cluster)
485 {
486         hammer2_inode_t *nip;
487         const hammer2_inode_data_t *iptmp;
488         const hammer2_inode_data_t *nipdata;
489
490         KKASSERT(cluster == NULL ||
491                  hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE);
492         KKASSERT(pmp);
493
494         /*
495          * Interlocked lookup/ref of the inode.  This code is only needed
496          * when looking up inodes with nlinks != 0 (TODO: optimize out
497          * otherwise and test for duplicates).
498          *
499          * Cluster can be NULL during the initial pfs allocation.
500          */
501 again:
502         while (cluster) {
503                 iptmp = &hammer2_cluster_rdata(cluster)->ipdata;
504                 nip = hammer2_inode_lookup(pmp, iptmp->inum);
505                 if (nip == NULL)
506                         break;
507
508                 hammer2_mtx_ex(&nip->lock);
509
510                 /*
511                  * Handle SMP race (not applicable to the super-root spmp
512                  * which can't index inodes due to duplicative inode numbers).
513                  */
514                 if (pmp->spmp_hmp == NULL &&
515                     (nip->flags & HAMMER2_INODE_ONRBTREE) == 0) {
516                         hammer2_mtx_unlock(&nip->lock);
517                         hammer2_inode_drop(nip);
518                         continue;
519                 }
520                 hammer2_inode_repoint(nip, NULL, cluster);
521
522                 return nip;
523         }
524
525         /*
526          * We couldn't find the inode number, create a new inode.
527          */
528         nip = kmalloc(sizeof(*nip), pmp->minode, M_WAITOK | M_ZERO);
529         atomic_add_long(&pmp->inmem_inodes, 1);
530         hammer2_pfs_memory_inc(pmp);
531         hammer2_pfs_memory_wakeup(pmp);
532         if (pmp->spmp_hmp)
533                 nip->flags = HAMMER2_INODE_SROOT;
534
535         /*
536          * Initialize nip's cluster.  A cluster is provided for normal
537          * inodes but typically not for the super-root or PFS inodes.
538          */
539         nip->cluster.refs = 1;
540         nip->cluster.pmp = pmp;
541         nip->cluster.flags |= HAMMER2_CLUSTER_INODE;
542         if (cluster) {
543                 nipdata = &hammer2_cluster_rdata(cluster)->ipdata;
544                 nip->inum = nipdata->inum;
545                 nip->size = nipdata->size;
546                 nip->mtime = nipdata->mtime;
547                 hammer2_inode_repoint(nip, NULL, cluster);
548         } else {
549                 nip->inum = 1;                  /* PFS inum is always 1 XXX */
550                 /* mtime will be updated when a cluster is available */
551         }
552
553         nip->pip = dip;                         /* can be NULL */
554         if (dip)
555                 hammer2_inode_ref(dip); /* ref dip for nip->pip */
556
557         nip->pmp = pmp;
558
559         /*
560          * ref and lock on nip gives it state compatible to after a
561          * hammer2_inode_lock() call.
562          */
563         nip->refs = 1;
564         hammer2_mtx_init(&nip->lock, "h2inode");
565         hammer2_mtx_ex(&nip->lock);
566         /* combination of thread lock and chain lock == inode lock */
567
568         /*
569          * Attempt to add the inode.  If it fails we raced another inode
570          * get.  Undo all the work and try again.
571          */
572         if (pmp->spmp_hmp == NULL) {
573                 hammer2_spin_ex(&pmp->inum_spin);
574                 if (RB_INSERT(hammer2_inode_tree, &pmp->inum_tree, nip)) {
575                         hammer2_spin_unex(&pmp->inum_spin);
576                         hammer2_mtx_unlock(&nip->lock);
577                         hammer2_inode_drop(nip);
578                         goto again;
579                 }
580                 atomic_set_int(&nip->flags, HAMMER2_INODE_ONRBTREE);
581                 hammer2_spin_unex(&pmp->inum_spin);
582         }
583
584         return (nip);
585 }
586
587 /*
588  * Create a new inode in the specified directory using the vattr to
589  * figure out the type of inode.
590  *
591  * If no error occurs the new inode with its cluster locked is returned in
592  * *nipp, otherwise an error is returned and *nipp is set to NULL.
593  *
594  * If vap and/or cred are NULL the related fields are not set and the
595  * inode type defaults to a directory.  This is used when creating PFSs
596  * under the super-root, so the inode number is set to 1 in this case.
597  *
598  * dip is not locked on entry.
599  *
600  * NOTE: When used to create a snapshot, the inode is temporarily associated
601  *       with the super-root spmp. XXX should pass new pmp for snapshot.
602  */
603 hammer2_inode_t *
604 hammer2_inode_create(hammer2_trans_t *trans, hammer2_inode_t *dip,
605                      struct vattr *vap, struct ucred *cred,
606                      const uint8_t *name, size_t name_len,
607                      hammer2_cluster_t **clusterp,
608                      int flags, int *errorp)
609 {
610         const hammer2_inode_data_t *dipdata;
611         hammer2_inode_data_t *nipdata;
612         hammer2_cluster_t *cluster;
613         hammer2_cluster_t *cparent;
614         hammer2_inode_t *nip;
615         hammer2_key_t key_dummy;
616         hammer2_key_t lhc;
617         int error;
618         uid_t xuid;
619         uuid_t dip_uid;
620         uuid_t dip_gid;
621         uint32_t dip_mode;
622         uint8_t dip_comp_algo;
623         uint8_t dip_check_algo;
624
625         lhc = hammer2_dirhash(name, name_len);
626         *errorp = 0;
627
628         /*
629          * Locate the inode or indirect block to create the new
630          * entry in.  At the same time check for key collisions
631          * and iterate until we don't get one.
632          *
633          * NOTE: hidden inodes do not have iterators.
634          */
635 retry:
636         cparent = hammer2_inode_lock(dip, HAMMER2_RESOLVE_ALWAYS);
637         dipdata = &hammer2_cluster_rdata(cparent)->ipdata;
638         dip_uid = dipdata->uid;
639         dip_gid = dipdata->gid;
640         dip_mode = dipdata->mode;
641         dip_comp_algo = dipdata->comp_algo;
642         dip_check_algo = dipdata->check_algo;
643
644         error = 0;
645         while (error == 0) {
646                 cluster = hammer2_cluster_lookup(cparent, &key_dummy,
647                                                  lhc, lhc, 0);
648                 if (cluster == NULL)
649                         break;
650                 if ((lhc & HAMMER2_DIRHASH_VISIBLE) == 0)
651                         error = ENOSPC;
652                 if ((lhc & HAMMER2_DIRHASH_LOMASK) == HAMMER2_DIRHASH_LOMASK)
653                         error = ENOSPC;
654                 hammer2_cluster_unlock(cluster);
655                 hammer2_cluster_drop(cluster);
656                 cluster = NULL;
657                 ++lhc;
658         }
659
660         if (error == 0) {
661                 error = hammer2_cluster_create(trans, cparent, &cluster,
662                                              lhc, 0,
663                                              HAMMER2_BREF_TYPE_INODE,
664                                              HAMMER2_INODE_BYTES,
665                                              flags);
666         }
667 #if INODE_DEBUG
668         kprintf("CREATE INODE %*.*s chain=%p\n",
669                 (int)name_len, (int)name_len, name,
670                 (cluster ? cluster->focus : NULL));
671 #endif
672
673         /*
674          * Cleanup and handle retries.
675          */
676         if (error == EAGAIN) {
677                 hammer2_cluster_ref(cparent);
678                 hammer2_inode_unlock(dip, cparent);
679                 hammer2_cluster_wait(cparent);
680                 hammer2_cluster_drop(cparent);
681                 goto retry;
682         }
683         hammer2_inode_unlock(dip, cparent);
684         cparent = NULL;
685
686         if (error) {
687                 KKASSERT(cluster == NULL);
688                 *errorp = error;
689                 return (NULL);
690         }
691
692         /*
693          * Set up the new inode.
694          *
695          * NOTE: *_get() integrates chain's lock into the inode lock.
696          *
697          * NOTE: Only one new inode can currently be created per
698          *       transaction.  If the need arises we can adjust
699          *       hammer2_trans_init() to allow more.
700          *
701          * NOTE: nipdata will have chain's blockset data.
702          */
703         KKASSERT(cluster->focus->flags & HAMMER2_CHAIN_MODIFIED);
704         nipdata = &hammer2_cluster_wdata(cluster)->ipdata;
705         nipdata->inum = trans->inode_tid;
706         hammer2_cluster_modsync(cluster);
707         nip = hammer2_inode_get(dip->pmp, dip, cluster);
708         nipdata = &hammer2_cluster_wdata(cluster)->ipdata;
709
710         if (vap) {
711                 KKASSERT(trans->inodes_created == 0);
712                 nipdata->type = hammer2_get_obj_type(vap->va_type);
713                 nipdata->inum = trans->inode_tid;
714                 ++trans->inodes_created;
715
716                 switch (nipdata->type) {
717                 case HAMMER2_OBJTYPE_CDEV:
718                 case HAMMER2_OBJTYPE_BDEV:
719                         nipdata->rmajor = vap->va_rmajor;
720                         nipdata->rminor = vap->va_rminor;
721                         break;
722                 default:
723                         break;
724                 }
725         } else {
726                 nipdata->type = HAMMER2_OBJTYPE_DIRECTORY;
727                 nipdata->inum = 1;
728         }
729         
730         /* Inherit parent's inode compression mode. */
731         nip->comp_heuristic = 0;
732         nipdata->comp_algo = dip_comp_algo;
733         nipdata->check_algo = dip_check_algo;
734         nipdata->version = HAMMER2_INODE_VERSION_ONE;
735         hammer2_update_time(&nipdata->ctime);
736         nipdata->mtime = nipdata->ctime;
737         if (vap)
738                 nipdata->mode = vap->va_mode;
739         nipdata->nlinks = 1;
740         if (vap) {
741                 if (dip && dip->pmp) {
742                         xuid = hammer2_to_unix_xid(&dip_uid);
743                         xuid = vop_helper_create_uid(dip->pmp->mp,
744                                                      dip_mode,
745                                                      xuid,
746                                                      cred,
747                                                      &vap->va_mode);
748                 } else {
749                         /* super-root has no dip and/or pmp */
750                         xuid = 0;
751                 }
752                 if (vap->va_vaflags & VA_UID_UUID_VALID)
753                         nipdata->uid = vap->va_uid_uuid;
754                 else if (vap->va_uid != (uid_t)VNOVAL)
755                         hammer2_guid_to_uuid(&nipdata->uid, vap->va_uid);
756                 else
757                         hammer2_guid_to_uuid(&nipdata->uid, xuid);
758
759                 if (vap->va_vaflags & VA_GID_UUID_VALID)
760                         nipdata->gid = vap->va_gid_uuid;
761                 else if (vap->va_gid != (gid_t)VNOVAL)
762                         hammer2_guid_to_uuid(&nipdata->gid, vap->va_gid);
763                 else if (dip)
764                         nipdata->gid = dip_gid;
765         }
766
767         /*
768          * Regular files and softlinks allow a small amount of data to be
769          * directly embedded in the inode.  This flag will be cleared if
770          * the size is extended past the embedded limit.
771          */
772         if (nipdata->type == HAMMER2_OBJTYPE_REGFILE ||
773             nipdata->type == HAMMER2_OBJTYPE_SOFTLINK) {
774                 nipdata->op_flags |= HAMMER2_OPFLAG_DIRECTDATA;
775         }
776
777         KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
778         bcopy(name, nipdata->filename, name_len);
779         nipdata->name_key = lhc;
780         nipdata->name_len = name_len;
781         hammer2_cluster_modsync(cluster);
782         *clusterp = cluster;
783
784         return (nip);
785 }
786
787 /*
788  * The cluster has been removed from the original directory and replaced
789  * with a hardlink pointer.  Move the cluster to the specified parent
790  * directory, change the filename to "0xINODENUMBER", and adjust the key.
791  * The cluster becomes our invisible hardlink target.
792  *
793  * The original cluster must be deleted on entry.
794  */
795 static
796 void
797 hammer2_hardlink_shiftup(hammer2_trans_t *trans, hammer2_cluster_t *cluster,
798                         hammer2_inode_t *dip, hammer2_cluster_t *dcluster,
799                         int nlinks, int *errorp)
800 {
801         const hammer2_inode_data_t *iptmp;
802         hammer2_inode_data_t *nipdata;
803         hammer2_cluster_t *xcluster;
804         hammer2_key_t key_dummy;
805         hammer2_key_t lhc;
806         hammer2_blockref_t bref;
807
808         iptmp = &hammer2_cluster_rdata(cluster)->ipdata;
809         lhc = iptmp->inum;
810         KKASSERT((lhc & HAMMER2_DIRHASH_VISIBLE) == 0);
811
812         /*
813          * Locate the inode or indirect block to create the new
814          * entry in.  lhc represents the inode number so there is
815          * no collision iteration.
816          *
817          * There should be no key collisions with invisible inode keys.
818          *
819          * WARNING! Must use inode_lock_ex() on dip to handle a stale
820          *          dip->cluster cache.
821          */
822         *errorp = 0;
823         xcluster = hammer2_cluster_lookup(dcluster, &key_dummy,
824                                       lhc, lhc, 0);
825         if (xcluster) {
826                 kprintf("X3 chain %p dip %p dchain %p dip->chain %p\n",
827                         xcluster->focus, dip, dcluster->focus,
828                         dip->cluster.focus);
829                 hammer2_cluster_unlock(xcluster);
830                 hammer2_cluster_drop(xcluster);
831                 xcluster = NULL;
832                 *errorp = ENOSPC;
833 #if 0
834                 Debugger("X3");
835 #endif
836         }
837
838         /*
839          * Handle the error case
840          */
841         if (*errorp) {
842                 panic("error2");
843                 KKASSERT(xcluster == NULL);
844                 return;
845         }
846
847         /*
848          * Use xcluster as a placeholder for (lhc).  Duplicate cluster to the
849          * same target bref as xcluster and then delete xcluster.  The
850          * duplication occurs after xcluster in flush order even though
851          * xcluster is deleted after the duplication. XXX
852          *
853          * WARNING! Duplications (to a different parent) can cause indirect
854          *          blocks to be inserted, refactor xcluster.
855          *
856          * WARNING! Only key and keybits is extracted from a passed-in bref.
857          */
858         hammer2_cluster_bref(cluster, &bref);
859         bref.key = lhc;                 /* invisible dir entry key */
860         bref.keybits = 0;
861         hammer2_cluster_rename(trans, &bref, dcluster, cluster, 0);
862
863         /*
864          * cluster is now 'live' again.. adjust the filename.
865          *
866          * Directory entries are inodes but this is a hidden hardlink
867          * target.  The name isn't used but to ease debugging give it
868          * a name after its inode number.
869          */
870         hammer2_cluster_modify(trans, cluster, 0);
871         nipdata = &hammer2_cluster_wdata(cluster)->ipdata;
872         ksnprintf(nipdata->filename, sizeof(nipdata->filename),
873                   "0x%016jx", (intmax_t)nipdata->inum);
874         nipdata->name_len = strlen(nipdata->filename);
875         nipdata->name_key = lhc;
876         nipdata->nlinks += nlinks;
877         hammer2_cluster_modsync(cluster);
878 }
879
880 /*
881  * Connect the target inode represented by (cluster) to the media topology
882  * at (dip, name, len).  The caller can pass a rough *chainp, this function
883  * will issue lookup()s to position the parent chain properly for the
884  * chain insertion.
885  *
886  * If hlink is TRUE this function creates an OBJTYPE_HARDLINK directory
887  * entry instead of connecting (cluster).
888  *
889  * If hlink is FALSE this function expects (cluster) to be unparented.
890  */
891 int
892 hammer2_inode_connect(hammer2_trans_t *trans,
893                       hammer2_cluster_t **clusterp, int hlink,
894                       hammer2_inode_t *dip, hammer2_cluster_t *dcluster,
895                       const uint8_t *name, size_t name_len,
896                       hammer2_key_t lhc)
897 {
898         hammer2_inode_data_t *wipdata;
899         hammer2_cluster_t *ocluster;
900         hammer2_cluster_t *ncluster;
901         hammer2_key_t key_dummy;
902         int error;
903
904         /*
905          * Since ocluster is either disconnected from the topology or
906          * represents a hardlink terminus which is always a parent of or
907          * equal to dip, we should be able to safely lock dip->chain for
908          * our setup.
909          *
910          * WARNING! Must use inode_lock_ex() on dip to handle a stale
911          *          dip->cluster.
912          *
913          * If name is non-NULL we calculate lhc, else we use the passed-in
914          * lhc.
915          */
916         ocluster = *clusterp;
917
918         if (name) {
919                 lhc = hammer2_dirhash(name, name_len);
920
921                 /*
922                  * Locate the inode or indirect block to create the new
923                  * entry in.  At the same time check for key collisions
924                  * and iterate until we don't get one.
925                  */
926                 error = 0;
927                 while (error == 0) {
928                         ncluster = hammer2_cluster_lookup(dcluster, &key_dummy,
929                                                       lhc, lhc, 0);
930                         if (ncluster == NULL)
931                                 break;
932                         if ((lhc & HAMMER2_DIRHASH_LOMASK) ==
933                             HAMMER2_DIRHASH_LOMASK) {
934                                 error = ENOSPC;
935                         }
936                         hammer2_cluster_unlock(ncluster);
937                         hammer2_cluster_drop(ncluster);
938                         ncluster = NULL;
939                         ++lhc;
940                 }
941         } else {
942                 /*
943                  * Reconnect to specific key (used when moving
944                  * unlinked-but-open files into the hidden directory).
945                  */
946                 ncluster = hammer2_cluster_lookup(dcluster, &key_dummy,
947                                                   lhc, lhc, 0);
948                 KKASSERT(ncluster == NULL);
949         }
950
951         if (error == 0) {
952                 if (hlink) {
953                         /*
954                          * Hardlink pointer needed, create totally fresh
955                          * directory entry.
956                          *
957                          * We must refactor ocluster because it might have
958                          * been shifted into an indirect cluster by the
959                          * create.
960                          */
961                         KKASSERT(ncluster == NULL);
962                         error = hammer2_cluster_create(trans,
963                                                        dcluster, &ncluster,
964                                                        lhc, 0,
965                                                        HAMMER2_BREF_TYPE_INODE,
966                                                        HAMMER2_INODE_BYTES,
967                                                        0);
968                 } else {
969                         /*
970                          * Reconnect the original cluster under the new name.
971                          * Original cluster must have already been deleted by
972                          * teh caller.
973                          *
974                          * WARNING! Can cause held-over clusters to require a
975                          *          refactor.  Fortunately we have none (our
976                          *          locked clusters are passed into and
977                          *          modified by the call).
978                          */
979                         ncluster = ocluster;
980                         ocluster = NULL;
981                         error = hammer2_cluster_create(trans,
982                                                        dcluster, &ncluster,
983                                                        lhc, 0,
984                                                        HAMMER2_BREF_TYPE_INODE,
985                                                        HAMMER2_INODE_BYTES,
986                                                        0);
987                 }
988         }
989
990         /*
991          * Unlock stuff.
992          */
993         KKASSERT(error != EAGAIN);
994
995         /*
996          * ncluster should be NULL on error, leave ocluster
997          * (ocluster == *clusterp) alone.
998          */
999         if (error) {
1000                 KKASSERT(ncluster == NULL);
1001                 return (error);
1002         }
1003
1004         /*
1005          * Directory entries are inodes so if the name has changed we have
1006          * to update the inode.
1007          *
1008          * When creating an OBJTYPE_HARDLINK entry remember to unlock the
1009          * cluster, the caller will access the hardlink via the actual hardlink
1010          * target file and not the hardlink pointer entry, so we must still
1011          * return ocluster.
1012          */
1013         if (hlink && hammer2_hardlink_enable >= 0) {
1014                 /*
1015                  * Create the HARDLINK pointer.  oip represents the hardlink
1016                  * target in this situation.
1017                  *
1018                  * We will return ocluster (the hardlink target).
1019                  */
1020                 hammer2_cluster_modify(trans, ncluster, 0);
1021                 hammer2_cluster_clr_chainflags(ncluster,
1022                                                HAMMER2_CHAIN_UNLINKED);
1023                 KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
1024                 wipdata = &hammer2_cluster_wdata(ncluster)->ipdata;
1025                 bcopy(name, wipdata->filename, name_len);
1026                 wipdata->name_key = lhc;
1027                 wipdata->name_len = name_len;
1028                 wipdata->target_type =
1029                                 hammer2_cluster_rdata(ocluster)->ipdata.type;
1030                 wipdata->type = HAMMER2_OBJTYPE_HARDLINK;
1031                 wipdata->inum = hammer2_cluster_rdata(ocluster)->ipdata.inum;
1032                 wipdata->version = HAMMER2_INODE_VERSION_ONE;
1033                 wipdata->nlinks = 1;
1034                 wipdata->op_flags = HAMMER2_OPFLAG_DIRECTDATA;
1035                 hammer2_cluster_modsync(ncluster);
1036                 hammer2_cluster_unlock(ncluster);
1037                 hammer2_cluster_drop(ncluster);
1038                 ncluster = ocluster;
1039                 ocluster = NULL;
1040         } else {
1041                 /*
1042                  * ncluster is a duplicate of ocluster at the new location.
1043                  * We must fixup the name stored in the inode data.
1044                  * The bref key has already been adjusted by inode_connect().
1045                  */
1046                 hammer2_cluster_modify(trans, ncluster, 0);
1047                 hammer2_cluster_clr_chainflags(ncluster,
1048                                                HAMMER2_CHAIN_UNLINKED);
1049                 wipdata = &hammer2_cluster_wdata(ncluster)->ipdata;
1050
1051                 KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
1052                 bcopy(name, wipdata->filename, name_len);
1053                 wipdata->name_key = lhc;
1054                 wipdata->name_len = name_len;
1055                 wipdata->nlinks = 1;
1056                 hammer2_cluster_modsync(ncluster);
1057         }
1058
1059         /*
1060          * We are replacing ocluster with ncluster, unlock ocluster.  In the
1061          * case where ocluster is left unchanged the code above sets
1062          * ncluster to ocluster and ocluster to NULL, resulting in a NOP here.
1063          */
1064         if (ocluster) {
1065                 hammer2_cluster_unlock(ocluster);
1066                 hammer2_cluster_drop(ocluster);
1067         }
1068         *clusterp = ncluster;
1069
1070         return (0);
1071 }
1072
1073 /*
1074  * Repoint ip->cluster's chains to cluster's chains and fixup the default
1075  * focus.
1076  *
1077  * Caller must hold the inode and cluster exclusive locked, if not NULL,
1078  * must also be locked.
1079  *
1080  * Cluster may be NULL to clean out any chains in ip->cluster.
1081  */
1082 void
1083 hammer2_inode_repoint(hammer2_inode_t *ip, hammer2_inode_t *pip,
1084                       hammer2_cluster_t *cluster)
1085 {
1086         hammer2_chain_t *ochain;
1087         hammer2_chain_t *nchain;
1088         hammer2_inode_t *opip;
1089         int i;
1090
1091         /*
1092          * Replace chains in ip->cluster with chains from cluster and
1093          * adjust the focus if necessary.
1094          *
1095          * NOTE: nchain and/or ochain can be NULL due to gaps
1096          *       in the cluster arrays.
1097          */
1098         for (i = 0; cluster && i < cluster->nchains; ++i) {
1099                 nchain = cluster->array[i].chain;
1100                 if (i < ip->cluster.nchains) {
1101                         ochain = ip->cluster.array[i].chain;
1102                         if (ochain == nchain)
1103                                 continue;
1104                 } else {
1105                         ochain = NULL;
1106                 }
1107
1108                 /*
1109                  * Make adjustments
1110                  */
1111                 ip->cluster.array[i].chain = nchain;
1112                 if (nchain)
1113                         hammer2_chain_ref(nchain);
1114                 if (ochain)
1115                         hammer2_chain_drop(ochain);
1116         }
1117
1118         /*
1119          * Release any left-over chains in ip->cluster.
1120          */
1121         while (i < ip->cluster.nchains) {
1122                 nchain = ip->cluster.array[i].chain;
1123                 if (nchain) {
1124                         ip->cluster.array[i].chain = NULL;
1125                         hammer2_chain_drop(nchain);
1126                 }
1127                 ++i;
1128         }
1129
1130         /*
1131          * Fixup fields.  Note that the inode-embedded cluster is never
1132          * directly locked.
1133          */
1134         if (cluster) {
1135                 ip->cluster.nchains = cluster->nchains;
1136                 ip->cluster.focus = cluster->focus;
1137                 ip->cluster.flags = cluster->flags & ~HAMMER2_CLUSTER_LOCKED;
1138         } else {
1139                 ip->cluster.nchains = 0;
1140                 ip->cluster.focus = NULL;
1141                 ip->cluster.flags &= ~HAMMER2_CLUSTER_ZFLAGS;
1142         }
1143
1144         /*
1145          * Repoint ip->pip if requested (non-NULL pip).
1146          */
1147         if (pip && ip->pip != pip) {
1148                 opip = ip->pip;
1149                 hammer2_inode_ref(pip);
1150                 ip->pip = pip;
1151                 if (opip)
1152                         hammer2_inode_drop(opip);
1153         }
1154 }
1155
1156 /*
1157  * Unlink the file from the specified directory inode.  The directory inode
1158  * does not need to be locked.
1159  *
1160  * isdir determines whether a directory/non-directory check should be made.
1161  * No check is made if isdir is set to -1.
1162  *
1163  * isopen specifies whether special unlink-with-open-descriptor handling
1164  * must be performed.  If set to -1 the caller is deleting a PFS and we
1165  * check whether the chain is mounted or not (chain->pmp != NULL).  1 is
1166  * implied if it is mounted.
1167  *
1168  * If isopen is 1 and nlinks drops to 0 this function must move the chain
1169  * to a special hidden directory until last-close occurs on the file.
1170  *
1171  * NOTE!  The underlying file can still be active with open descriptors
1172  *        or if the chain is being manually held (e.g. for rename).
1173  *
1174  *        The caller is responsible for fixing up ip->chain if e.g. a
1175  *        rename occurs (see chain_duplicate()).
1176  *
1177  * NOTE!  The chain is not deleted if it is moved to the hidden directory,
1178  *        but otherwise will be deleted.
1179  */
1180 int
1181 hammer2_unlink_file(hammer2_trans_t *trans, hammer2_inode_t *dip,
1182                     const uint8_t *name, size_t name_len,
1183                     int isdir, int *hlinkp, struct nchandle *nch,
1184                     int nlinks)
1185 {
1186         const hammer2_inode_data_t *ripdata;
1187         hammer2_inode_data_t *wipdata;
1188         hammer2_cluster_t *cparent;
1189         hammer2_cluster_t *hcluster;
1190         hammer2_cluster_t *hparent;
1191         hammer2_cluster_t *cluster;
1192         hammer2_cluster_t *dparent;
1193         hammer2_cluster_t *dcluster;
1194         hammer2_key_t key_dummy;
1195         hammer2_key_t key_next;
1196         hammer2_key_t lhc;
1197         int error;
1198         int hlink;
1199         uint8_t type;
1200
1201         error = 0;
1202         hlink = 0;
1203         hcluster = NULL;
1204         hparent = NULL;
1205         lhc = hammer2_dirhash(name, name_len);
1206
1207 again:
1208         /*
1209          * Search for the filename in the directory
1210          */
1211         cparent = hammer2_inode_lock(dip, HAMMER2_RESOLVE_ALWAYS);
1212         cluster = hammer2_cluster_lookup(cparent, &key_next,
1213                                      lhc, lhc + HAMMER2_DIRHASH_LOMASK, 0);
1214         while (cluster) {
1215                 if (hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE) {
1216                         ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
1217                         if (ripdata->name_len == name_len &&
1218                             bcmp(ripdata->filename, name, name_len) == 0) {
1219                                 break;
1220                         }
1221                 }
1222                 cluster = hammer2_cluster_next(cparent, cluster, &key_next,
1223                                                key_next,
1224                                                lhc + HAMMER2_DIRHASH_LOMASK,
1225                                                0);
1226         }
1227         hammer2_inode_unlock(dip, NULL);        /* retain cparent */
1228
1229         /*
1230          * Not found or wrong type (isdir < 0 disables the type check).
1231          * If a hardlink pointer, type checks use the hardlink target.
1232          */
1233         if (cluster == NULL) {
1234                 error = ENOENT;
1235                 goto done;
1236         }
1237         ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
1238         type = ripdata->type;
1239         if (type == HAMMER2_OBJTYPE_HARDLINK) {
1240                 hlink = 1;
1241                 type = ripdata->target_type;
1242         }
1243
1244         if (type == HAMMER2_OBJTYPE_DIRECTORY && isdir == 0) {
1245                 error = ENOTDIR;
1246                 goto done;
1247         }
1248         if (type != HAMMER2_OBJTYPE_DIRECTORY && isdir >= 1) {
1249                 error = EISDIR;
1250                 goto done;
1251         }
1252
1253         /*
1254          * Hardlink must be resolved.  We can't hold the parent locked
1255          * while we do this or we could deadlock.  The physical file will
1256          * be located at or above the current directory.
1257          *
1258          * We loop to reacquire the hardlink origination.
1259          *
1260          * NOTE: hammer2_hardlink_find() will locate the hardlink target,
1261          *       returning a modified hparent and hcluster.
1262          */
1263         if (ripdata->type == HAMMER2_OBJTYPE_HARDLINK) {
1264                 if (hcluster == NULL) {
1265                         hcluster = cluster;
1266                         cluster = NULL; /* safety */
1267                         hammer2_cluster_unlock(cparent);
1268                         hammer2_cluster_drop(cparent);
1269                         cparent = NULL; /* safety */
1270                         ripdata = NULL; /* safety (associated w/cparent) */
1271                         error = hammer2_hardlink_find(dip, &hparent, &hcluster);
1272
1273                         /*
1274                          * If we couldn't find the hardlink target then some
1275                          * parent directory containing the hardlink pointer
1276                          * probably got renamed to above the original target,
1277                          * a case not yet handled by H2.
1278                          */
1279                         if (error) {
1280                                 kprintf("H2 unlink_file: hardlink target for "
1281                                         "\"%s\" not found\n",
1282                                         name);
1283                                 kprintf("(likely due to known directory "
1284                                         "rename bug)\n");
1285                                 goto done;
1286                         }
1287                         goto again;
1288                 }
1289         }
1290
1291         /*
1292          * If this is a directory the directory must be empty.  However, if
1293          * isdir < 0 we are doing a rename and the directory does not have
1294          * to be empty, and if isdir > 1 we are deleting a PFS/snapshot
1295          * and the directory does not have to be empty.
1296          *
1297          * NOTE: We check the full key range here which covers both visible
1298          *       and invisible entries.  Theoretically there should be no
1299          *       invisible (hardlink target) entries if there are no visible
1300          *       entries.
1301          */
1302         if (type == HAMMER2_OBJTYPE_DIRECTORY && isdir == 1) {
1303                 dparent = hammer2_cluster_lookup_init(cluster, 0);
1304                 dcluster = hammer2_cluster_lookup(dparent, &key_dummy,
1305                                                   0, (hammer2_key_t)-1,
1306                                                   HAMMER2_LOOKUP_NODATA);
1307                 if (dcluster) {
1308                         hammer2_cluster_unlock(dcluster);
1309                         hammer2_cluster_drop(dcluster);
1310                         hammer2_cluster_lookup_done(dparent);
1311                         error = ENOTEMPTY;
1312                         goto done;
1313                 }
1314                 hammer2_cluster_lookup_done(dparent);
1315                 dparent = NULL;
1316                 /* dcluster NULL */
1317         }
1318
1319         /*
1320          * If this was a hardlink then (cparent, cluster) is the hardlink
1321          * pointer, which we can simply destroy outright.  Discard the
1322          * clusters and replace with the hardlink target.
1323          */
1324         if (hcluster) {
1325                 hammer2_cluster_delete(trans, cparent, cluster,
1326                                        HAMMER2_DELETE_PERMANENT);
1327                 hammer2_cluster_unlock(cparent);
1328                 hammer2_cluster_drop(cparent);
1329                 hammer2_cluster_unlock(cluster);
1330                 hammer2_cluster_drop(cluster);
1331                 cparent = hparent;
1332                 cluster = hcluster;
1333                 hparent = NULL;
1334                 hcluster = NULL;
1335         }
1336
1337         /*
1338          * This leaves us with the hardlink target or non-hardlinked file
1339          * or directory in (cparent, cluster).
1340          *
1341          * Delete the target when nlinks reaches 0 with special handling
1342          * if (isopen) is set.
1343          *
1344          * NOTE! In DragonFly the vnops function calls cache_unlink() after
1345          *       calling us here to clean out the namecache association,
1346          *       (which does not represent a ref for the open-test), and to
1347          *       force finalization of the vnode if/when the last ref gets
1348          *       dropped.
1349          *
1350          * NOTE! Files are unlinked by rename and then relinked.  nch will be
1351          *       passed as NULL in this situation.  hammer2_inode_connect()
1352          *       will bump nlinks.
1353          */
1354         KKASSERT(cluster != NULL);
1355         hammer2_cluster_modify(trans, cluster, 0);
1356         wipdata = &hammer2_cluster_wdata(cluster)->ipdata;
1357         ripdata = wipdata;
1358         wipdata->nlinks += nlinks;
1359         if ((int64_t)wipdata->nlinks < 0) {     /* XXX debugging */
1360                 wipdata->nlinks = 0;
1361         }
1362         hammer2_cluster_modsync(cluster);
1363
1364         if (wipdata->nlinks == 0) {
1365                 /*
1366                  * Target nlinks has reached 0, file now unlinked (but may
1367                  * still be open).
1368                  */
1369                 /* XXX need interlock if mounted
1370                 if ((cluster->focus->flags & HAMMER2_CHAIN_PFSROOT) &&
1371                     cluster->pmp) {
1372                         error = EINVAL;
1373                         kprintf("hammer2: PFS \"%s\" cannot be deleted "
1374                                 "while still mounted\n",
1375                                 wipdata->filename);
1376                         goto done;
1377                 }
1378                 */
1379                 hammer2_cluster_set_chainflags(cluster, HAMMER2_CHAIN_UNLINKED);
1380                 if (nch && cache_isopen(nch)) {
1381                         hammer2_inode_move_to_hidden(trans, &cparent, &cluster,
1382                                                      wipdata->inum);
1383                 } else {
1384                         /*
1385                          * This won't get everything if a vnode is still
1386                          * present, but the cache_unlink() call the caller
1387                          * makes will.
1388                          */
1389                         hammer2_cluster_delete(trans, cparent, cluster,
1390                                                HAMMER2_DELETE_PERMANENT);
1391                 }
1392         } else if (hlink == 0) {
1393                 /*
1394                  * In this situation a normal non-hardlinked file (which can
1395                  * only have nlinks == 1) still has a non-zero nlinks, the
1396                  * caller must be doing a RENAME operation and so is passing
1397                  * a nlinks adjustment of 0, and only wishes to remove file
1398                  * in order to be able to reconnect it under a different name.
1399                  *
1400                  * In this situation we do a non-permanent deletion of the
1401                  * chain in order to allow the file to be reconnected in
1402                  * a different location.
1403                  */
1404                 KKASSERT(nlinks == 0);
1405                 hammer2_cluster_delete(trans, cparent, cluster, 0);
1406         }
1407         error = 0;
1408 done:
1409         if (cparent) {
1410                 hammer2_cluster_unlock(cparent);
1411                 hammer2_cluster_drop(cparent);
1412         }
1413         if (cluster) {
1414                 hammer2_cluster_unlock(cluster);
1415                 hammer2_cluster_drop(cluster);
1416         }
1417         if (hparent) {
1418                 hammer2_cluster_unlock(hparent);
1419                 hammer2_cluster_drop(hparent);
1420         }
1421         if (hcluster) {
1422                 hammer2_cluster_unlock(hcluster);
1423                 hammer2_cluster_drop(hcluster);
1424         }
1425         if (hlinkp)
1426                 *hlinkp = hlink;
1427
1428         return error;
1429 }
1430
1431 /*
1432  * This is called from the mount code to initialize pmp->ihidden
1433  */
1434 void
1435 hammer2_inode_install_hidden(hammer2_pfs_t *pmp)
1436 {
1437         hammer2_trans_t trans;
1438         hammer2_cluster_t *cparent;
1439         hammer2_cluster_t *cluster;
1440         hammer2_cluster_t *scan;
1441         const hammer2_inode_data_t *ripdata;
1442         hammer2_inode_data_t *wipdata;
1443         hammer2_key_t key_dummy;
1444         hammer2_key_t key_next;
1445         int error;
1446         int count;
1447         int dip_check_algo;
1448         int dip_comp_algo;
1449
1450         if (pmp->ihidden)
1451                 return;
1452
1453         /*
1454          * Find the hidden directory
1455          */
1456         bzero(&key_dummy, sizeof(key_dummy));
1457         hammer2_trans_init(&trans, pmp, 0);
1458
1459         /*
1460          * Setup for lookup, retrieve iroot's check and compression
1461          * algorithm request which was likely generated by newfs_hammer2.
1462          *
1463          * The check/comp fields will probably never be used since inodes
1464          * are renamed into the hidden directory and not created relative to
1465          * the hidden directory, chain creation inherits from bref.methods,
1466          * and data chains inherit from their respective file inode *_algo
1467          * fields.
1468          */
1469         cparent = hammer2_inode_lock(pmp->iroot, HAMMER2_RESOLVE_ALWAYS);
1470         ripdata = &hammer2_cluster_rdata(cparent)->ipdata;
1471         dip_check_algo = ripdata->check_algo;
1472         dip_comp_algo = ripdata->comp_algo;
1473         ripdata = NULL;
1474
1475         cluster = hammer2_cluster_lookup(cparent, &key_dummy,
1476                                          HAMMER2_INODE_HIDDENDIR,
1477                                          HAMMER2_INODE_HIDDENDIR,
1478                                          0);
1479         if (cluster) {
1480                 pmp->ihidden = hammer2_inode_get(pmp, pmp->iroot, cluster);
1481                 hammer2_inode_ref(pmp->ihidden);
1482
1483                 /*
1484                  * Remove any unlinked files which were left open as-of
1485                  * any system crash.
1486                  *
1487                  * Don't pass NODATA, we need the inode data so the delete
1488                  * can do proper statistics updates.
1489                  */
1490                 count = 0;
1491                 scan = hammer2_cluster_lookup(cluster, &key_next,
1492                                               0, HAMMER2_TID_MAX, 0);
1493                 while (scan) {
1494                         if (hammer2_cluster_type(scan) ==
1495                             HAMMER2_BREF_TYPE_INODE) {
1496                                 hammer2_cluster_delete(&trans, cluster, scan,
1497                                                    HAMMER2_DELETE_PERMANENT);
1498                                 ++count;
1499                         }
1500                         scan = hammer2_cluster_next(cluster, scan, &key_next,
1501                                                     0, HAMMER2_TID_MAX, 0);
1502                 }
1503
1504                 hammer2_inode_unlock(pmp->ihidden, cluster);
1505                 hammer2_inode_unlock(pmp->iroot, cparent);
1506                 hammer2_trans_done(&trans);
1507                 kprintf("hammer2: PFS loaded hidden dir, "
1508                         "removed %d dead entries\n", count);
1509                 return;
1510         }
1511
1512         /*
1513          * Create the hidden directory
1514          */
1515         error = hammer2_cluster_create(&trans, cparent, &cluster,
1516                                        HAMMER2_INODE_HIDDENDIR, 0,
1517                                        HAMMER2_BREF_TYPE_INODE,
1518                                        HAMMER2_INODE_BYTES,
1519                                        0);
1520         hammer2_inode_unlock(pmp->iroot, cparent);
1521
1522         hammer2_cluster_modify(&trans, cluster, 0);
1523         wipdata = &hammer2_cluster_wdata(cluster)->ipdata;
1524         wipdata->type = HAMMER2_OBJTYPE_DIRECTORY;
1525         wipdata->inum = HAMMER2_INODE_HIDDENDIR;
1526         wipdata->nlinks = 1;
1527         wipdata->comp_algo = dip_comp_algo;
1528         wipdata->check_algo = dip_check_algo;
1529         hammer2_cluster_modsync(cluster);
1530         kprintf("hammer2: PFS root missing hidden directory, creating\n");
1531
1532         pmp->ihidden = hammer2_inode_get(pmp, pmp->iroot, cluster);
1533         hammer2_inode_ref(pmp->ihidden);
1534         hammer2_inode_unlock(pmp->ihidden, cluster);
1535         hammer2_trans_done(&trans);
1536 }
1537
1538 /*
1539  * If an open file is unlinked H2 needs to retain the file in the topology
1540  * to ensure that its backing store is not recovered by the bulk free scan.
1541  * This also allows us to avoid having to special-case the CHAIN_DELETED flag.
1542  *
1543  * To do this the file is moved to a hidden directory in the PFS root and
1544  * renamed.  The hidden directory must be created if it does not exist.
1545  */
1546 static
1547 void
1548 hammer2_inode_move_to_hidden(hammer2_trans_t *trans,
1549                              hammer2_cluster_t **cparentp,
1550                              hammer2_cluster_t **clusterp,
1551                              hammer2_tid_t inum)
1552 {
1553         hammer2_cluster_t *dcluster;
1554         hammer2_pfs_t *pmp;
1555         int error;
1556
1557         pmp = (*clusterp)->pmp;
1558         KKASSERT(pmp != NULL);
1559         KKASSERT(pmp->ihidden != NULL);
1560
1561         hammer2_cluster_delete(trans, *cparentp, *clusterp, 0);
1562         dcluster = hammer2_inode_lock(pmp->ihidden, HAMMER2_RESOLVE_ALWAYS);
1563         error = hammer2_inode_connect(trans, clusterp, 0,
1564                                       pmp->ihidden, dcluster,
1565                                       NULL, 0, inum);
1566         hammer2_inode_unlock(pmp->ihidden, dcluster);
1567         KKASSERT(error == 0);
1568 }
1569
1570 /*
1571  * Given an exclusively locked inode and cluster we consolidate the cluster
1572  * for hardlink creation, adding (nlinks) to the file's link count and
1573  * potentially relocating the inode to (cdip) which is a parent directory
1574  * common to both the current location of the inode and the intended new
1575  * hardlink.
1576  *
1577  * Replaces (*clusterp) if consolidation occurred, unlocking the old cluster
1578  * and returning a new locked cluster.
1579  *
1580  * NOTE!  This function will also replace ip->cluster.
1581  */
1582 int
1583 hammer2_hardlink_consolidate(hammer2_trans_t *trans,
1584                              hammer2_inode_t *ip,
1585                              hammer2_cluster_t **clusterp,
1586                              hammer2_inode_t *cdip,
1587                              hammer2_cluster_t *cdcluster,
1588                              int nlinks)
1589 {
1590         const hammer2_inode_data_t *ripdata;
1591         hammer2_inode_data_t *wipdata;
1592         hammer2_cluster_t *cluster;
1593         hammer2_cluster_t *cparent;
1594         int error;
1595
1596         cluster = *clusterp;
1597         ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
1598         if (nlinks == 0 &&                      /* no hardlink needed */
1599             (ripdata->name_key & HAMMER2_DIRHASH_VISIBLE)) {
1600                 return (0);
1601         }
1602
1603         if (hammer2_hardlink_enable == 0) {     /* disallow hardlinks */
1604                 hammer2_cluster_unlock(cluster);
1605                 hammer2_cluster_drop(cluster);
1606                 *clusterp = NULL;
1607                 return (ENOTSUP);
1608         }
1609
1610         cparent = NULL;
1611
1612         /*
1613          * If no change in the hardlink's target directory is required and
1614          * this is already a hardlink target, all we need to do is adjust
1615          * the link count.
1616          */
1617         ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
1618         if (cdip == ip->pip &&
1619             (ripdata->name_key & HAMMER2_DIRHASH_VISIBLE) == 0) {
1620                 if (nlinks) {
1621                         hammer2_cluster_modify(trans, cluster, 0);
1622                         wipdata = &hammer2_cluster_wdata(cluster)->ipdata;
1623                         wipdata->nlinks += nlinks;
1624                         hammer2_cluster_modsync(cluster);
1625                         ripdata = wipdata;
1626                 }
1627                 error = 0;
1628                 goto done;
1629         }
1630
1631         /*
1632          * Cluster is the real inode.  The originating directory is locked
1633          * by the caller so we can manipulate it without worrying about races
1634          * against other lookups.
1635          *
1636          * If cluster is visible we need to delete it from the current
1637          * location and create a hardlink pointer in its place.  If it is
1638          * not visible we need only delete it.  Then later cluster will be
1639          * renamed to a parent directory and converted (if necessary) to
1640          * a hidden inode (via shiftup).
1641          *
1642          * NOTE! We must hold cparent locked through the delete/create/rename
1643          *       operation to ensure that other threads block resolving to
1644          *       the same hardlink, otherwise the other threads may not see
1645          *       the hardlink.
1646          */
1647         KKASSERT((cluster->focus->flags & HAMMER2_CHAIN_DELETED) == 0);
1648         cparent = hammer2_cluster_parent(cluster);
1649
1650         hammer2_cluster_delete(trans, cparent, cluster, 0);
1651
1652         ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
1653         KKASSERT(ripdata->type != HAMMER2_OBJTYPE_HARDLINK);
1654         if (ripdata->name_key & HAMMER2_DIRHASH_VISIBLE) {
1655                 hammer2_cluster_t *ncluster;
1656                 hammer2_key_t lhc;
1657
1658                 ncluster = NULL;
1659                 lhc = cluster->focus->bref.key;
1660                 error = hammer2_cluster_create(trans, cparent, &ncluster,
1661                                              lhc, 0,
1662                                              HAMMER2_BREF_TYPE_INODE,
1663                                              HAMMER2_INODE_BYTES,
1664                                              0);
1665                 hammer2_cluster_modify(trans, ncluster, 0);
1666                 wipdata = &hammer2_cluster_wdata(ncluster)->ipdata;
1667
1668                 /* wipdata->comp_algo = ripdata->comp_algo; */
1669                 wipdata->comp_algo = 0;
1670                 wipdata->check_algo = 0;
1671                 wipdata->version = HAMMER2_INODE_VERSION_ONE;
1672                 wipdata->inum = ripdata->inum;
1673                 wipdata->target_type = ripdata->type;
1674                 wipdata->type = HAMMER2_OBJTYPE_HARDLINK;
1675                 wipdata->uflags = 0;
1676                 wipdata->rmajor = 0;
1677                 wipdata->rminor = 0;
1678                 wipdata->ctime = 0;
1679                 wipdata->mtime = 0;
1680                 wipdata->atime = 0;
1681                 wipdata->btime = 0;
1682                 bzero(&wipdata->uid, sizeof(wipdata->uid));
1683                 bzero(&wipdata->gid, sizeof(wipdata->gid));
1684                 wipdata->op_flags = HAMMER2_OPFLAG_DIRECTDATA;
1685                 wipdata->cap_flags = 0;
1686                 wipdata->mode = 0;
1687                 wipdata->size = 0;
1688                 wipdata->nlinks = 1;
1689                 wipdata->iparent = 0;   /* XXX */
1690                 wipdata->pfs_type = 0;
1691                 wipdata->pfs_inum = 0;
1692                 bzero(&wipdata->pfs_clid, sizeof(wipdata->pfs_clid));
1693                 bzero(&wipdata->pfs_fsid, sizeof(wipdata->pfs_fsid));
1694                 wipdata->data_quota = 0;
1695                 wipdata->data_count = 0;
1696                 wipdata->inode_quota = 0;
1697                 wipdata->inode_count = 0;
1698                 wipdata->attr_tid = 0;
1699                 wipdata->dirent_tid = 0;
1700                 bzero(&wipdata->u, sizeof(wipdata->u));
1701                 bcopy(ripdata->filename, wipdata->filename, ripdata->name_len);
1702                 wipdata->name_key = ncluster->focus->bref.key;
1703                 wipdata->name_len = ripdata->name_len;
1704                 /* XXX transaction ids */
1705                 hammer2_cluster_modsync(ncluster);
1706                 hammer2_cluster_unlock(ncluster);
1707                 hammer2_cluster_drop(ncluster);
1708         }
1709         ripdata = wipdata;
1710
1711         /*
1712          * cluster represents the hardlink target and is now flagged deleted.
1713          * duplicate it to the parent directory and adjust nlinks.
1714          *
1715          * WARNING! The shiftup() call can cause ncluster to be moved into
1716          *          an indirect block, and our ncluster will wind up pointing
1717          *          to the older/original version.
1718          */
1719         KKASSERT(cluster->focus->flags & HAMMER2_CHAIN_DELETED);
1720         hammer2_hardlink_shiftup(trans, cluster, cdip, cdcluster,
1721                                  nlinks, &error);
1722
1723         if (error == 0)
1724                 hammer2_inode_repoint(ip, cdip, cluster);
1725
1726 done:
1727         /*
1728          * Cleanup, cluster/ncluster already dealt with.
1729          *
1730          * Return the shifted cluster in *clusterp.
1731          */
1732         if (cparent) {
1733                 hammer2_cluster_unlock(cparent);
1734                 hammer2_cluster_drop(cparent);
1735         }
1736         *clusterp = cluster;
1737
1738         return (error);
1739 }
1740
1741 /*
1742  * If (*ochainp) is non-NULL it points to the forward OBJTYPE_HARDLINK
1743  * inode while (*chainp) points to the resolved (hidden hardlink
1744  * target) inode.  In this situation when nlinks is 1 we wish to
1745  * deconsolidate the hardlink, moving it back to the directory that now
1746  * represents the only remaining link.
1747  */
1748 int
1749 hammer2_hardlink_deconsolidate(hammer2_trans_t *trans,
1750                                hammer2_inode_t *dip,
1751                                hammer2_chain_t **chainp,
1752                                hammer2_chain_t **ochainp)
1753 {
1754         if (*ochainp == NULL)
1755                 return (0);
1756         /* XXX */
1757         return (0);
1758 }
1759
1760 /*
1761  * The caller presents a locked cluster with an obj_type of
1762  * HAMMER2_OBJTYPE_HARDLINK in (*clusterp).  This routine will locate
1763  * the inode and replace (*clusterp) with a new locked cluster containing
1764  * the target hardlink, also locked.  The original cluster will be
1765  * unlocked and released.
1766  *
1767  * If cparentp is not NULL a locked cluster representing the hardlink's
1768  * parent is also returned.
1769  *
1770  * If we are unable to locate the hardlink target EIO is returned,
1771  * (*cparentp) is set to NULL, the original passed-in (*clusterp)
1772  * will be unlocked and released and (*clusterp) will be set to NULL
1773  * as well.
1774  */
1775 int
1776 hammer2_hardlink_find(hammer2_inode_t *dip,
1777                       hammer2_cluster_t **cparentp,
1778                       hammer2_cluster_t **clusterp)
1779 {
1780         const hammer2_inode_data_t *ipdata;
1781         hammer2_cluster_t *cluster;
1782         hammer2_cluster_t *cparent;
1783         hammer2_cluster_t *rcluster;
1784         hammer2_inode_t *ip;
1785         hammer2_inode_t *pip;
1786         hammer2_key_t key_dummy;
1787         hammer2_key_t lhc;
1788
1789         cluster = *clusterp;
1790         pip = dip;
1791         hammer2_inode_ref(pip);         /* for loop */
1792
1793         /*
1794          * Locate the hardlink.  pip is referenced and not locked.
1795          * Unlock and release (*clusterp) after extracting the needed
1796          * data.
1797          */
1798         ipdata = &hammer2_cluster_rdata(cluster)->ipdata;
1799         lhc = ipdata->inum;
1800         ipdata = NULL;                  /* safety */
1801         hammer2_cluster_unlock(cluster);
1802         hammer2_cluster_drop(cluster);
1803         *clusterp = NULL;               /* safety */
1804
1805         rcluster = NULL;
1806         cparent = NULL;
1807
1808         while ((ip = pip) != NULL) {
1809                 cparent = hammer2_inode_lock(ip, HAMMER2_RESOLVE_ALWAYS);
1810                 hammer2_inode_drop(ip);                 /* loop */
1811                 KKASSERT(hammer2_cluster_type(cparent) ==
1812                          HAMMER2_BREF_TYPE_INODE);
1813                 rcluster = hammer2_cluster_lookup(cparent, &key_dummy,
1814                                              lhc, lhc, 0);
1815                 if (rcluster)
1816                         break;
1817                 hammer2_cluster_lookup_done(cparent);   /* discard parent */
1818                 cparent = NULL;                         /* safety */
1819                 pip = ip->pip;          /* safe, ip held locked */
1820                 if (pip)
1821                         hammer2_inode_ref(pip);         /* loop */
1822                 hammer2_inode_unlock(ip, NULL);
1823         }
1824
1825         /*
1826          * chain is locked, ip is locked.  Unlock ip, return the locked
1827          * chain.  *ipp is already set w/a ref count and not locked.
1828          *
1829          * (cparent is already unlocked).
1830          */
1831         *clusterp = rcluster;
1832         if (rcluster) {
1833                 if (cparentp) {
1834                         *cparentp = cparent;
1835                         hammer2_inode_unlock(ip, NULL);
1836                 } else {
1837                         hammer2_inode_unlock(ip, cparent);
1838                 }
1839                 return (0);
1840         } else {
1841                 if (cparentp)
1842                         *cparentp = NULL;
1843                 if (ip)
1844                         hammer2_inode_unlock(ip, cparent);
1845                 return (EIO);
1846         }
1847 }
1848
1849 /*
1850  * Find the directory common to both fdip and tdip.
1851  *
1852  * Returns a held but not locked inode.  Caller typically locks the inode,
1853  * and when through unlocks AND drops it.
1854  */
1855 hammer2_inode_t *
1856 hammer2_inode_common_parent(hammer2_inode_t *fdip, hammer2_inode_t *tdip)
1857 {
1858         hammer2_inode_t *scan1;
1859         hammer2_inode_t *scan2;
1860
1861         /*
1862          * We used to have a depth field but it complicated matters too
1863          * much for directory renames.  So now its ugly.  Check for
1864          * simple cases before giving up and doing it the expensive way.
1865          *
1866          * XXX need a bottom-up topology stability lock
1867          */
1868         if (fdip == tdip || fdip == tdip->pip) {
1869                 hammer2_inode_ref(fdip);
1870                 return(fdip);
1871         }
1872         if (fdip->pip == tdip) {
1873                 hammer2_inode_ref(tdip);
1874                 return(tdip);
1875         }
1876
1877         /*
1878          * XXX not MPSAFE
1879          */
1880         for (scan1 = fdip; scan1->pmp == fdip->pmp; scan1 = scan1->pip) {
1881                 scan2 = tdip;
1882                 while (scan2->pmp == tdip->pmp) {
1883                         if (scan1 == scan2) {
1884                                 hammer2_inode_ref(scan1);
1885                                 return(scan1);
1886                         }
1887                         scan2 = scan2->pip;
1888                         if (scan2 == NULL)
1889                                 break;
1890                 }
1891         }
1892         panic("hammer2_inode_common_parent: no common parent %p %p\n",
1893               fdip, tdip);
1894         /* NOT REACHED */
1895         return(NULL);
1896 }
1897
1898 /*
1899  * Synchronize the inode's frontend state with the chain state prior
1900  * to any explicit flush of the inode or any strategy write call.
1901  *
1902  * Called with a locked inode.
1903  */
1904 void
1905 hammer2_inode_fsync(hammer2_trans_t *trans, hammer2_inode_t *ip, 
1906                     hammer2_cluster_t *cparent)
1907 {
1908         const hammer2_inode_data_t *ripdata;
1909         hammer2_inode_data_t *wipdata;
1910         hammer2_cluster_t *dparent;
1911         hammer2_cluster_t *cluster;
1912         hammer2_key_t lbase;
1913         hammer2_key_t key_next;
1914         int dosync = 0;
1915
1916         ripdata = &hammer2_cluster_rdata(cparent)->ipdata;    /* target file */
1917
1918         if (ip->flags & HAMMER2_INODE_MTIME) {
1919                 wipdata = hammer2_cluster_modify_ip(trans, ip, cparent, 0);
1920                 atomic_clear_int(&ip->flags, HAMMER2_INODE_MTIME);
1921                 wipdata->mtime = ip->mtime;
1922                 dosync = 1;
1923                 ripdata = wipdata;
1924         }
1925         if ((ip->flags & HAMMER2_INODE_RESIZED) && ip->size < ripdata->size) {
1926                 wipdata = hammer2_cluster_modify_ip(trans, ip, cparent, 0);
1927                 wipdata->size = ip->size;
1928                 dosync = 1;
1929                 ripdata = wipdata;
1930                 atomic_clear_int(&ip->flags, HAMMER2_INODE_RESIZED);
1931
1932                 /*
1933                  * We must delete any chains beyond the EOF.  The chain
1934                  * straddling the EOF will be pending in the bioq.
1935                  */
1936                 lbase = (ripdata->size + HAMMER2_PBUFMASK64) &
1937                         ~HAMMER2_PBUFMASK64;
1938                 dparent = hammer2_cluster_lookup_init(&ip->cluster, 0);
1939                 cluster = hammer2_cluster_lookup(dparent, &key_next,
1940                                                  lbase, (hammer2_key_t)-1,
1941                                                  HAMMER2_LOOKUP_NODATA);
1942                 while (cluster) {
1943                         /*
1944                          * Degenerate embedded case, nothing to loop on
1945                          */
1946                         switch (hammer2_cluster_type(cluster)) {
1947                         case HAMMER2_BREF_TYPE_INODE:
1948                                 hammer2_cluster_unlock(cluster);
1949                                 hammer2_cluster_drop(cluster);
1950                                 cluster = NULL;
1951                                 break;
1952                         case HAMMER2_BREF_TYPE_DATA:
1953                                 hammer2_cluster_delete(trans, dparent, cluster,
1954                                                    HAMMER2_DELETE_PERMANENT);
1955                                 /* fall through */
1956                         default:
1957                                 cluster = hammer2_cluster_next(dparent, cluster,
1958                                                    &key_next,
1959                                                    key_next, (hammer2_key_t)-1,
1960                                                    HAMMER2_LOOKUP_NODATA);
1961                                 break;
1962                         }
1963                 }
1964                 hammer2_cluster_lookup_done(dparent);
1965         } else
1966         if ((ip->flags & HAMMER2_INODE_RESIZED) && ip->size > ripdata->size) {
1967                 wipdata = hammer2_cluster_modify_ip(trans, ip, cparent, 0);
1968                 wipdata->size = ip->size;
1969                 atomic_clear_int(&ip->flags, HAMMER2_INODE_RESIZED);
1970
1971                 /*
1972                  * When resizing larger we may not have any direct-data
1973                  * available.
1974                  */
1975                 if ((wipdata->op_flags & HAMMER2_OPFLAG_DIRECTDATA) &&
1976                     ip->size > HAMMER2_EMBEDDED_BYTES) {
1977                         wipdata->op_flags &= ~HAMMER2_OPFLAG_DIRECTDATA;
1978                         bzero(&wipdata->u.blockset,
1979                               sizeof(wipdata->u.blockset));
1980                 }
1981                 dosync = 1;
1982                 ripdata = wipdata;
1983         }
1984         if (dosync)
1985                 hammer2_cluster_modsync(cparent);
1986 }