hammer2 - update documentation, begin working on callback I/O
[dragonfly.git] / sys / vfs / hammer2 / hammer2_inode.c
1 /*
2  * Copyright (c) 2011-2014 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  * 3. Neither the name of The DragonFly Project nor the names of its
19  *    contributors may be used to endorse or promote products derived
20  *    from this software without specific, prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
26  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 #include <sys/cdefs.h>
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/types.h>
39 #include <sys/lock.h>
40 #include <sys/uuid.h>
41
42 #include "hammer2.h"
43
44 #define INODE_DEBUG     0
45
46 static void hammer2_inode_move_to_hidden(hammer2_trans_t *trans,
47                                          hammer2_cluster_t **cparentp,
48                                          hammer2_cluster_t **clusterp,
49                                          hammer2_tid_t inum);
50
51 RB_GENERATE2(hammer2_inode_tree, hammer2_inode, rbnode, hammer2_inode_cmp,
52              hammer2_tid_t, inum);
53
54 int
55 hammer2_inode_cmp(hammer2_inode_t *ip1, hammer2_inode_t *ip2)
56 {
57         if (ip1->inum < ip2->inum)
58                 return(-1);
59         if (ip1->inum > ip2->inum)
60                 return(1);
61         return(0);
62 }
63
64 /*
65  * HAMMER2 inode locks
66  *
67  * HAMMER2 offers shared locks and exclusive locks on inodes.
68  *
69  * The inode locking function locks the inode itself, resolves any stale
70  * chains in the inode's cluster, and allocates a fresh copy of the
71  * cluster with 1 ref and all the underlying chains locked.  Duplication
72  * races are handled by this function.
73  *
74  * ip->cluster will be stable while the inode is locked.
75  *
76  * NOTE: We don't combine the inode/chain lock because putting away an
77  *       inode would otherwise confuse multiple lock holders of the inode.
78  *
79  * NOTE: In-memory inodes always point to hardlink targets (the actual file),
80  *       and never point to a hardlink pointer.
81  */
82 hammer2_cluster_t *
83 hammer2_inode_lock_ex(hammer2_inode_t *ip)
84 {
85         const hammer2_inode_data_t *ripdata;
86         hammer2_cluster_t *cluster;
87         hammer2_chain_t *chain;
88         int i;
89
90         hammer2_inode_ref(ip);
91         ccms_thread_lock(&ip->topo_cst, CCMS_STATE_EXCLUSIVE);
92         cluster = hammer2_cluster_copy(&ip->cluster,
93                                        HAMMER2_CLUSTER_COPY_NOCHAINS);
94
95         ip->cluster.focus = NULL;
96         cluster->focus = NULL;
97
98         for (i = 0; i < cluster->nchains; ++i) {
99                 chain = ip->cluster.array[i];
100                 if (chain == NULL) {
101                         kprintf("inode_lock: %p: missing chain\n", ip);
102                         continue;
103                 }
104
105                 hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS);
106                 cluster->array[i] = chain;
107                 if (cluster->focus == NULL)
108                         cluster->focus = chain;
109                 if (ip->cluster.focus == NULL)
110                         ip->cluster.focus = chain;
111         }
112
113         /*
114          * Returned cluster must resolve hardlink pointers
115          */
116         ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
117         KKASSERT(ripdata->type != HAMMER2_OBJTYPE_HARDLINK);
118         /*
119         if (ripdata->type == HAMMER2_OBJTYPE_HARDLINK &&
120             (cluster->focus->flags & HAMMER2_CHAIN_DELETED) == 0) {
121                 error = hammer2_hardlink_find(ip->pip, NULL, cluster);
122                 KKASSERT(error == 0);
123         }
124         */
125
126         return (cluster);
127 }
128
129 void
130 hammer2_inode_unlock_ex(hammer2_inode_t *ip, hammer2_cluster_t *cluster)
131 {
132         if (cluster)
133                 hammer2_cluster_unlock(cluster);
134         ccms_thread_unlock(&ip->topo_cst);
135         hammer2_inode_drop(ip);
136 }
137
138 /*
139  * NOTE: We don't combine the inode/chain lock because putting away an
140  *       inode would otherwise confuse multiple lock holders of the inode.
141  *
142  *       Shared locks are especially sensitive to having too many shared
143  *       lock counts (from the same thread) on certain paths which might
144  *       need to upgrade them.  Only one count of a shared lock can be
145  *       upgraded.
146  */
147 hammer2_cluster_t *
148 hammer2_inode_lock_sh(hammer2_inode_t *ip)
149 {
150         const hammer2_inode_data_t *ripdata;
151         hammer2_cluster_t *cluster;
152         hammer2_chain_t *chain;
153         int i;
154
155         hammer2_inode_ref(ip);
156         cluster = hammer2_cluster_copy(&ip->cluster,
157                                        HAMMER2_CLUSTER_COPY_NOCHAINS);
158         ccms_thread_lock(&ip->topo_cst, CCMS_STATE_SHARED);
159
160         cluster->focus = NULL;
161
162         for (i = 0; i < cluster->nchains; ++i) {
163                 chain = ip->cluster.array[i];
164
165                 if (chain == NULL) {
166                         kprintf("inode_lock: %p: missing chain\n", ip);
167                         continue;
168                 }
169
170                 hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS |
171                                           HAMMER2_RESOLVE_SHARED);
172                 cluster->array[i] = chain;
173                 if (cluster->focus == NULL)
174                         cluster->focus = chain;
175         }
176
177         /*
178          * Returned cluster must resolve hardlink pointers
179          */
180         ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
181         KKASSERT(ripdata->type != HAMMER2_OBJTYPE_HARDLINK);
182         /*
183         if (ripdata->type == HAMMER2_OBJTYPE_HARDLINK &&
184             (cluster->focus->flags & HAMMER2_CHAIN_DELETED) == 0) {
185                 error = hammer2_hardlink_find(ip->pip, NULL, cluster);
186                 KKASSERT(error == 0);
187         }
188         */
189
190         return (cluster);
191 }
192
193 void
194 hammer2_inode_unlock_sh(hammer2_inode_t *ip, hammer2_cluster_t *cluster)
195 {
196         if (cluster)
197                 hammer2_cluster_unlock(cluster);
198         ccms_thread_unlock(&ip->topo_cst);
199         hammer2_inode_drop(ip);
200 }
201
202 ccms_state_t
203 hammer2_inode_lock_temp_release(hammer2_inode_t *ip)
204 {
205         return(ccms_thread_lock_temp_release(&ip->topo_cst));
206 }
207
208 void
209 hammer2_inode_lock_temp_restore(hammer2_inode_t *ip, ccms_state_t ostate)
210 {
211         ccms_thread_lock_temp_restore(&ip->topo_cst, ostate);
212 }
213
214 ccms_state_t
215 hammer2_inode_lock_upgrade(hammer2_inode_t *ip)
216 {
217         return(ccms_thread_lock_upgrade(&ip->topo_cst));
218 }
219
220 void
221 hammer2_inode_lock_downgrade(hammer2_inode_t *ip, ccms_state_t ostate)
222 {
223         ccms_thread_lock_downgrade(&ip->topo_cst, ostate);
224 }
225
226 /*
227  * Lookup an inode by inode number
228  */
229 hammer2_inode_t *
230 hammer2_inode_lookup(hammer2_pfsmount_t *pmp, hammer2_tid_t inum)
231 {
232         hammer2_inode_t *ip;
233
234         KKASSERT(pmp);
235         if (pmp->spmp_hmp) {
236                 ip = NULL;
237         } else {
238                 spin_lock(&pmp->inum_spin);
239                 ip = RB_LOOKUP(hammer2_inode_tree, &pmp->inum_tree, inum);
240                 if (ip)
241                         hammer2_inode_ref(ip);
242                 spin_unlock(&pmp->inum_spin);
243         }
244         return(ip);
245 }
246
247 /*
248  * Adding a ref to an inode is only legal if the inode already has at least
249  * one ref.
250  *
251  * (can be called with spinlock held)
252  */
253 void
254 hammer2_inode_ref(hammer2_inode_t *ip)
255 {
256         atomic_add_int(&ip->refs, 1);
257 }
258
259 /*
260  * Drop an inode reference, freeing the inode when the last reference goes
261  * away.
262  */
263 void
264 hammer2_inode_drop(hammer2_inode_t *ip)
265 {
266         hammer2_pfsmount_t *pmp;
267         hammer2_inode_t *pip;
268         u_int refs;
269
270         while (ip) {
271                 refs = ip->refs;
272                 cpu_ccfence();
273                 if (refs == 1) {
274                         /*
275                          * Transition to zero, must interlock with
276                          * the inode inumber lookup tree (if applicable).
277                          */
278                         pmp = ip->pmp;
279                         KKASSERT(pmp);
280                         spin_lock(&pmp->inum_spin);
281
282                         if (atomic_cmpset_int(&ip->refs, 1, 0)) {
283                                 KKASSERT(ip->topo_cst.count == 0);
284                                 if (ip->flags & HAMMER2_INODE_ONRBTREE) {
285                                         atomic_clear_int(&ip->flags,
286                                                      HAMMER2_INODE_ONRBTREE);
287                                         RB_REMOVE(hammer2_inode_tree,
288                                                   &pmp->inum_tree, ip);
289                                 }
290                                 spin_unlock(&pmp->inum_spin);
291
292                                 pip = ip->pip;
293                                 ip->pip = NULL;
294                                 ip->pmp = NULL;
295
296                                 /*
297                                  * Cleaning out ip->cluster isn't entirely
298                                  * trivial.
299                                  */
300                                 hammer2_inode_repoint(ip, NULL, NULL);
301
302                                 /*
303                                  * We have to drop pip (if non-NULL) to
304                                  * dispose of our implied reference from
305                                  * ip->pip.  We can simply loop on it.
306                                  */
307                                 kfree(ip, pmp->minode);
308                                 atomic_add_long(&pmp->inmem_inodes, -1);
309                                 ip = pip;
310                                 /* continue with pip (can be NULL) */
311                         } else {
312                                 spin_unlock(&ip->pmp->inum_spin);
313                         }
314                 } else {
315                         /*
316                          * Non zero transition
317                          */
318                         if (atomic_cmpset_int(&ip->refs, refs, refs - 1))
319                                 break;
320                 }
321         }
322 }
323
324 /*
325  * Get the vnode associated with the given inode, allocating the vnode if
326  * necessary.  The vnode will be returned exclusively locked.
327  *
328  * The caller must lock the inode (shared or exclusive).
329  *
330  * Great care must be taken to avoid deadlocks and vnode acquisition/reclaim
331  * races.
332  */
333 struct vnode *
334 hammer2_igetv(hammer2_inode_t *ip, hammer2_cluster_t *cparent, int *errorp)
335 {
336         const hammer2_inode_data_t *ripdata;
337         hammer2_pfsmount_t *pmp;
338         struct vnode *vp;
339         ccms_state_t ostate;
340
341         pmp = ip->pmp;
342         KKASSERT(pmp != NULL);
343         *errorp = 0;
344
345         ripdata = &hammer2_cluster_rdata(cparent)->ipdata;
346
347         for (;;) {
348                 /*
349                  * Attempt to reuse an existing vnode assignment.  It is
350                  * possible to race a reclaim so the vget() may fail.  The
351                  * inode must be unlocked during the vget() to avoid a
352                  * deadlock against a reclaim.
353                  */
354                 vp = ip->vp;
355                 if (vp) {
356                         /*
357                          * Inode must be unlocked during the vget() to avoid
358                          * possible deadlocks, but leave the ip ref intact.
359                          *
360                          * vnode is held to prevent destruction during the
361                          * vget().  The vget() can still fail if we lost
362                          * a reclaim race on the vnode.
363                          */
364                         vhold(vp);
365                         ostate = hammer2_inode_lock_temp_release(ip);
366                         if (vget(vp, LK_EXCLUSIVE)) {
367                                 vdrop(vp);
368                                 hammer2_inode_lock_temp_restore(ip, ostate);
369                                 continue;
370                         }
371                         hammer2_inode_lock_temp_restore(ip, ostate);
372                         vdrop(vp);
373                         /* vp still locked and ref from vget */
374                         if (ip->vp != vp) {
375                                 kprintf("hammer2: igetv race %p/%p\n",
376                                         ip->vp, vp);
377                                 vput(vp);
378                                 continue;
379                         }
380                         *errorp = 0;
381                         break;
382                 }
383
384                 /*
385                  * No vnode exists, allocate a new vnode.  Beware of
386                  * allocation races.  This function will return an
387                  * exclusively locked and referenced vnode.
388                  */
389                 *errorp = getnewvnode(VT_HAMMER2, pmp->mp, &vp, 0, 0);
390                 if (*errorp) {
391                         kprintf("hammer2: igetv getnewvnode failed %d\n",
392                                 *errorp);
393                         vp = NULL;
394                         break;
395                 }
396
397                 /*
398                  * Lock the inode and check for an allocation race.
399                  */
400                 ostate = hammer2_inode_lock_upgrade(ip);
401                 if (ip->vp != NULL) {
402                         vp->v_type = VBAD;
403                         vx_put(vp);
404                         hammer2_inode_lock_downgrade(ip, ostate);
405                         continue;
406                 }
407
408                 switch (ripdata->type) {
409                 case HAMMER2_OBJTYPE_DIRECTORY:
410                         vp->v_type = VDIR;
411                         break;
412                 case HAMMER2_OBJTYPE_REGFILE:
413                         vp->v_type = VREG;
414                         vinitvmio(vp, ripdata->size,
415                                   HAMMER2_LBUFSIZE,
416                                   (int)ripdata->size & HAMMER2_LBUFMASK);
417                         break;
418                 case HAMMER2_OBJTYPE_SOFTLINK:
419                         /*
420                          * XXX for now we are using the generic file_read
421                          * and file_write code so we need a buffer cache
422                          * association.
423                          */
424                         vp->v_type = VLNK;
425                         vinitvmio(vp, ripdata->size,
426                                   HAMMER2_LBUFSIZE,
427                                   (int)ripdata->size & HAMMER2_LBUFMASK);
428                         break;
429                 case HAMMER2_OBJTYPE_CDEV:
430                         vp->v_type = VCHR;
431                         /* fall through */
432                 case HAMMER2_OBJTYPE_BDEV:
433                         vp->v_ops = &pmp->mp->mnt_vn_spec_ops;
434                         if (ripdata->type != HAMMER2_OBJTYPE_CDEV)
435                                 vp->v_type = VBLK;
436                         addaliasu(vp, ripdata->rmajor, ripdata->rminor);
437                         break;
438                 case HAMMER2_OBJTYPE_FIFO:
439                         vp->v_type = VFIFO;
440                         vp->v_ops = &pmp->mp->mnt_vn_fifo_ops;
441                         break;
442                 default:
443                         panic("hammer2: unhandled objtype %d", ripdata->type);
444                         break;
445                 }
446
447                 if (ip == pmp->iroot)
448                         vsetflags(vp, VROOT);
449
450                 vp->v_data = ip;
451                 ip->vp = vp;
452                 hammer2_inode_ref(ip);          /* vp association */
453                 hammer2_inode_lock_downgrade(ip, ostate);
454                 break;
455         }
456
457         /*
458          * Return non-NULL vp and *errorp == 0, or NULL vp and *errorp != 0.
459          */
460         if (hammer2_debug & 0x0002) {
461                 kprintf("igetv vp %p refs 0x%08x aux 0x%08x\n",
462                         vp, vp->v_refcnt, vp->v_auxrefs);
463         }
464         return (vp);
465 }
466
467 /*
468  * Returns the inode associated with the passed-in cluster, creating the
469  * inode if necessary and synchronizing it to the passed-in cluster otherwise.
470  *
471  * The passed-in chain must be locked and will remain locked on return.
472  * The returned inode will be locked and the caller may dispose of both
473  * via hammer2_inode_unlock_ex().  However, if the caller needs to resolve
474  * a hardlink it must ref/unlock/relock/drop the inode.
475  *
476  * The hammer2_inode structure regulates the interface between the high level
477  * kernel VNOPS API and the filesystem backend (the chains).
478  */
479 hammer2_inode_t *
480 hammer2_inode_get(hammer2_pfsmount_t *pmp, hammer2_inode_t *dip,
481                   hammer2_cluster_t *cluster)
482 {
483         hammer2_inode_t *nip;
484         const hammer2_inode_data_t *iptmp;
485         const hammer2_inode_data_t *nipdata;
486
487         KKASSERT(hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE);
488         KKASSERT(pmp);
489
490         /*
491          * Interlocked lookup/ref of the inode.  This code is only needed
492          * when looking up inodes with nlinks != 0 (TODO: optimize out
493          * otherwise and test for duplicates).
494          */
495 again:
496         for (;;) {
497                 iptmp = &hammer2_cluster_rdata(cluster)->ipdata;
498                 nip = hammer2_inode_lookup(pmp, iptmp->inum);
499                 if (nip == NULL)
500                         break;
501
502                 ccms_thread_lock(&nip->topo_cst, CCMS_STATE_EXCLUSIVE);
503
504                 /*
505                  * Handle SMP race (not applicable to the super-root spmp
506                  * which can't index inodes due to duplicative inode numbers).
507                  */
508                 if (pmp->spmp_hmp == NULL &&
509                     (nip->flags & HAMMER2_INODE_ONRBTREE) == 0) {
510                         ccms_thread_unlock(&nip->topo_cst);
511                         hammer2_inode_drop(nip);
512                         continue;
513                 }
514                 hammer2_inode_repoint(nip, NULL, cluster);
515                 return nip;
516         }
517
518         /*
519          * We couldn't find the inode number, create a new inode.
520          */
521         nip = kmalloc(sizeof(*nip), pmp->minode, M_WAITOK | M_ZERO);
522         atomic_add_long(&pmp->inmem_inodes, 1);
523         hammer2_pfs_memory_inc(pmp);
524         hammer2_pfs_memory_wakeup(pmp);
525         if (pmp->spmp_hmp)
526                 nip->flags = HAMMER2_INODE_SROOT;
527
528         /*
529          * Initialize nip's cluster
530          */
531         nip->cluster.refs = 1;
532         nip->cluster.pmp = pmp;
533         nip->cluster.flags |= HAMMER2_CLUSTER_INODE;
534         hammer2_cluster_replace(&nip->cluster, cluster);
535
536         nipdata = &hammer2_cluster_rdata(cluster)->ipdata;
537         nip->inum = nipdata->inum;
538         nip->size = nipdata->size;
539         nip->mtime = nipdata->mtime;
540         hammer2_inode_repoint(nip, NULL, cluster);
541         nip->pip = dip;                         /* can be NULL */
542         if (dip)
543                 hammer2_inode_ref(dip); /* ref dip for nip->pip */
544
545         nip->pmp = pmp;
546
547         /*
548          * ref and lock on nip gives it state compatible to after a
549          * hammer2_inode_lock_ex() call.
550          */
551         nip->refs = 1;
552         ccms_cst_init(&nip->topo_cst, &nip->cluster);
553         ccms_thread_lock(&nip->topo_cst, CCMS_STATE_EXCLUSIVE);
554         /* combination of thread lock and chain lock == inode lock */
555
556         /*
557          * Attempt to add the inode.  If it fails we raced another inode
558          * get.  Undo all the work and try again.
559          */
560         if (pmp->spmp_hmp == NULL) {
561                 spin_lock(&pmp->inum_spin);
562                 if (RB_INSERT(hammer2_inode_tree, &pmp->inum_tree, nip)) {
563                         spin_unlock(&pmp->inum_spin);
564                         ccms_thread_unlock(&nip->topo_cst);
565                         hammer2_inode_drop(nip);
566                         goto again;
567                 }
568                 atomic_set_int(&nip->flags, HAMMER2_INODE_ONRBTREE);
569                 spin_unlock(&pmp->inum_spin);
570         }
571
572         return (nip);
573 }
574
575 /*
576  * Create a new inode in the specified directory using the vattr to
577  * figure out the type of inode.
578  *
579  * If no error occurs the new inode with its cluster locked is returned in
580  * *nipp, otherwise an error is returned and *nipp is set to NULL.
581  *
582  * If vap and/or cred are NULL the related fields are not set and the
583  * inode type defaults to a directory.  This is used when creating PFSs
584  * under the super-root, so the inode number is set to 1 in this case.
585  *
586  * dip is not locked on entry.
587  *
588  * NOTE: When used to create a snapshot, the inode is temporarily associated
589  *       with the super-root spmp. XXX should pass new pmp for snapshot.
590  */
591 hammer2_inode_t *
592 hammer2_inode_create(hammer2_trans_t *trans, hammer2_inode_t *dip,
593                      struct vattr *vap, struct ucred *cred,
594                      const uint8_t *name, size_t name_len,
595                      hammer2_cluster_t **clusterp, int *errorp)
596 {
597         const hammer2_inode_data_t *dipdata;
598         hammer2_inode_data_t *nipdata;
599         hammer2_cluster_t *cluster;
600         hammer2_cluster_t *cparent;
601         hammer2_inode_t *nip;
602         hammer2_key_t key_dummy;
603         hammer2_key_t lhc;
604         int error;
605         uid_t xuid;
606         uuid_t dip_uid;
607         uuid_t dip_gid;
608         uint32_t dip_mode;
609         uint8_t dip_comp_algo;
610         uint8_t dip_check_algo;
611         int ddflag;
612
613         lhc = hammer2_dirhash(name, name_len);
614         *errorp = 0;
615
616         /*
617          * Locate the inode or indirect block to create the new
618          * entry in.  At the same time check for key collisions
619          * and iterate until we don't get one.
620          *
621          * NOTE: hidden inodes do not have iterators.
622          */
623 retry:
624         cparent = hammer2_inode_lock_ex(dip);
625         dipdata = &hammer2_cluster_rdata(cparent)->ipdata;
626         dip_uid = dipdata->uid;
627         dip_gid = dipdata->gid;
628         dip_mode = dipdata->mode;
629         dip_comp_algo = dipdata->comp_algo;
630         dip_check_algo = dipdata->check_algo;
631
632         error = 0;
633         while (error == 0) {
634                 cluster = hammer2_cluster_lookup(cparent, &key_dummy,
635                                                  lhc, lhc, 0, &ddflag);
636                 if (cluster == NULL)
637                         break;
638                 if ((lhc & HAMMER2_DIRHASH_VISIBLE) == 0)
639                         error = ENOSPC;
640                 if ((lhc & HAMMER2_DIRHASH_LOMASK) == HAMMER2_DIRHASH_LOMASK)
641                         error = ENOSPC;
642                 hammer2_cluster_unlock(cluster);
643                 cluster = NULL;
644                 ++lhc;
645         }
646
647         if (error == 0) {
648                 error = hammer2_cluster_create(trans, cparent, &cluster,
649                                              lhc, 0,
650                                              HAMMER2_BREF_TYPE_INODE,
651                                              HAMMER2_INODE_BYTES,
652                                              0);
653         }
654 #if INODE_DEBUG
655         kprintf("CREATE INODE %*.*s chain=%p\n",
656                 (int)name_len, (int)name_len, name,
657                 (cluster ? cluster->focus : NULL));
658 #endif
659
660         /*
661          * Cleanup and handle retries.
662          */
663         if (error == EAGAIN) {
664                 hammer2_cluster_ref(cparent);
665                 hammer2_inode_unlock_ex(dip, cparent);
666                 hammer2_cluster_wait(cparent);
667                 hammer2_cluster_drop(cparent);
668                 goto retry;
669         }
670         hammer2_inode_unlock_ex(dip, cparent);
671         cparent = NULL;
672
673         if (error) {
674                 KKASSERT(cluster == NULL);
675                 *errorp = error;
676                 return (NULL);
677         }
678
679         /*
680          * Set up the new inode.
681          *
682          * NOTE: *_get() integrates chain's lock into the inode lock.
683          *
684          * NOTE: Only one new inode can currently be created per
685          *       transaction.  If the need arises we can adjust
686          *       hammer2_trans_init() to allow more.
687          *
688          * NOTE: nipdata will have chain's blockset data.
689          */
690         KKASSERT(cluster->focus->flags & HAMMER2_CHAIN_MODIFIED);
691         nipdata = &hammer2_cluster_wdata(cluster)->ipdata;
692         nipdata->inum = trans->inode_tid;
693         hammer2_cluster_modsync(cluster);
694         nip = hammer2_inode_get(dip->pmp, dip, cluster);
695         nipdata = &hammer2_cluster_wdata(cluster)->ipdata;
696
697         if (vap) {
698                 KKASSERT(trans->inodes_created == 0);
699                 nipdata->type = hammer2_get_obj_type(vap->va_type);
700                 nipdata->inum = trans->inode_tid;
701                 ++trans->inodes_created;
702
703                 switch (nipdata->type) {
704                 case HAMMER2_OBJTYPE_CDEV:
705                 case HAMMER2_OBJTYPE_BDEV:
706                         nipdata->rmajor = vap->va_rmajor;
707                         nipdata->rminor = vap->va_rminor;
708                         break;
709                 default:
710                         break;
711                 }
712         } else {
713                 nipdata->type = HAMMER2_OBJTYPE_DIRECTORY;
714                 nipdata->inum = 1;
715         }
716         
717         /* Inherit parent's inode compression mode. */
718         nip->comp_heuristic = 0;
719         nipdata->comp_algo = dip_comp_algo;
720         nipdata->check_algo = dip_check_algo;
721         nipdata->version = HAMMER2_INODE_VERSION_ONE;
722         hammer2_update_time(&nipdata->ctime);
723         nipdata->mtime = nipdata->ctime;
724         if (vap)
725                 nipdata->mode = vap->va_mode;
726         nipdata->nlinks = 1;
727         if (vap) {
728                 if (dip && dip->pmp) {
729                         xuid = hammer2_to_unix_xid(&dip_uid);
730                         xuid = vop_helper_create_uid(dip->pmp->mp,
731                                                      dip_mode,
732                                                      xuid,
733                                                      cred,
734                                                      &vap->va_mode);
735                 } else {
736                         /* super-root has no dip and/or pmp */
737                         xuid = 0;
738                 }
739                 if (vap->va_vaflags & VA_UID_UUID_VALID)
740                         nipdata->uid = vap->va_uid_uuid;
741                 else if (vap->va_uid != (uid_t)VNOVAL)
742                         hammer2_guid_to_uuid(&nipdata->uid, vap->va_uid);
743                 else
744                         hammer2_guid_to_uuid(&nipdata->uid, xuid);
745
746                 if (vap->va_vaflags & VA_GID_UUID_VALID)
747                         nipdata->gid = vap->va_gid_uuid;
748                 else if (vap->va_gid != (gid_t)VNOVAL)
749                         hammer2_guid_to_uuid(&nipdata->gid, vap->va_gid);
750                 else if (dip)
751                         nipdata->gid = dip_gid;
752         }
753
754         /*
755          * Regular files and softlinks allow a small amount of data to be
756          * directly embedded in the inode.  This flag will be cleared if
757          * the size is extended past the embedded limit.
758          */
759         if (nipdata->type == HAMMER2_OBJTYPE_REGFILE ||
760             nipdata->type == HAMMER2_OBJTYPE_SOFTLINK) {
761                 nipdata->op_flags |= HAMMER2_OPFLAG_DIRECTDATA;
762         }
763
764         KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
765         bcopy(name, nipdata->filename, name_len);
766         nipdata->name_key = lhc;
767         nipdata->name_len = name_len;
768         hammer2_cluster_modsync(cluster);
769         *clusterp = cluster;
770
771         return (nip);
772 }
773
774 /*
775  * The cluster has been removed from the original directory and replaced
776  * with a hardlink pointer.  Move the cluster to the specified parent
777  * directory, change the filename to "0xINODENUMBER", and adjust the key.
778  * The cluster becomes our invisible hardlink target.
779  *
780  * The original cluster must be deleted on entry.
781  */
782 static
783 void
784 hammer2_hardlink_shiftup(hammer2_trans_t *trans, hammer2_cluster_t *cluster,
785                         hammer2_inode_t *dip, hammer2_cluster_t *dcluster,
786                         int nlinks, int *errorp)
787 {
788         const hammer2_inode_data_t *iptmp;
789         hammer2_inode_data_t *nipdata;
790         hammer2_cluster_t *xcluster;
791         hammer2_key_t key_dummy;
792         hammer2_key_t lhc;
793         hammer2_blockref_t bref;
794         int ddflag;
795
796         iptmp = &hammer2_cluster_rdata(cluster)->ipdata;
797         lhc = iptmp->inum;
798         KKASSERT((lhc & HAMMER2_DIRHASH_VISIBLE) == 0);
799
800         /*
801          * Locate the inode or indirect block to create the new
802          * entry in.  lhc represents the inode number so there is
803          * no collision iteration.
804          *
805          * There should be no key collisions with invisible inode keys.
806          *
807          * WARNING! Must use inode_lock_ex() on dip to handle a stale
808          *          dip->cluster cache.
809          */
810         *errorp = 0;
811         xcluster = hammer2_cluster_lookup(dcluster, &key_dummy,
812                                       lhc, lhc, 0, &ddflag);
813         if (xcluster) {
814                 kprintf("X3 chain %p dip %p dchain %p dip->chain %p\n",
815                         xcluster->focus, dip, dcluster->focus,
816                         dip->cluster.focus);
817                 hammer2_cluster_unlock(xcluster);
818                 xcluster = NULL;
819                 *errorp = ENOSPC;
820 #if 0
821                 Debugger("X3");
822 #endif
823         }
824
825         /*
826          * Handle the error case
827          */
828         if (*errorp) {
829                 panic("error2");
830                 KKASSERT(xcluster == NULL);
831                 return;
832         }
833
834         /*
835          * Use xcluster as a placeholder for (lhc).  Duplicate cluster to the
836          * same target bref as xcluster and then delete xcluster.  The
837          * duplication occurs after xcluster in flush order even though
838          * xcluster is deleted after the duplication. XXX
839          *
840          * WARNING! Duplications (to a different parent) can cause indirect
841          *          blocks to be inserted, refactor xcluster.
842          *
843          * WARNING! Only key and keybits is extracted from a passed-in bref.
844          */
845         hammer2_cluster_bref(cluster, &bref);
846         bref.key = lhc;                 /* invisible dir entry key */
847         bref.keybits = 0;
848         hammer2_cluster_rename(trans, &bref, dcluster, cluster, 0);
849
850         /*
851          * cluster is now 'live' again.. adjust the filename.
852          *
853          * Directory entries are inodes but this is a hidden hardlink
854          * target.  The name isn't used but to ease debugging give it
855          * a name after its inode number.
856          */
857         hammer2_cluster_modify(trans, cluster, 0);
858         nipdata = &hammer2_cluster_wdata(cluster)->ipdata;
859         ksnprintf(nipdata->filename, sizeof(nipdata->filename),
860                   "0x%016jx", (intmax_t)nipdata->inum);
861         nipdata->name_len = strlen(nipdata->filename);
862         nipdata->name_key = lhc;
863         nipdata->nlinks += nlinks;
864         hammer2_cluster_modsync(cluster);
865 }
866
867 /*
868  * Connect the target inode represented by (cluster) to the media topology
869  * at (dip, name, len).  The caller can pass a rough *chainp, this function
870  * will issue lookup()s to position the parent chain properly for the
871  * chain insertion.
872  *
873  * If hlink is TRUE this function creates an OBJTYPE_HARDLINK directory
874  * entry instead of connecting (cluster).
875  *
876  * If hlink is FALSE this function expects (cluster) to be unparented.
877  */
878 int
879 hammer2_inode_connect(hammer2_trans_t *trans,
880                       hammer2_cluster_t **clusterp, int hlink,
881                       hammer2_inode_t *dip, hammer2_cluster_t *dcluster,
882                       const uint8_t *name, size_t name_len,
883                       hammer2_key_t lhc)
884 {
885         hammer2_inode_data_t *wipdata;
886         hammer2_cluster_t *ocluster;
887         hammer2_cluster_t *ncluster;
888         hammer2_key_t key_dummy;
889         int ddflag;
890         int error;
891
892         /*
893          * Since ocluster is either disconnected from the topology or
894          * represents a hardlink terminus which is always a parent of or
895          * equal to dip, we should be able to safely lock dip->chain for
896          * our setup.
897          *
898          * WARNING! Must use inode_lock_ex() on dip to handle a stale
899          *          dip->cluster.
900          *
901          * If name is non-NULL we calculate lhc, else we use the passed-in
902          * lhc.
903          */
904         ocluster = *clusterp;
905
906         if (name) {
907                 lhc = hammer2_dirhash(name, name_len);
908
909                 /*
910                  * Locate the inode or indirect block to create the new
911                  * entry in.  At the same time check for key collisions
912                  * and iterate until we don't get one.
913                  */
914                 error = 0;
915                 while (error == 0) {
916                         ncluster = hammer2_cluster_lookup(dcluster, &key_dummy,
917                                                       lhc, lhc,
918                                                       0, &ddflag);
919                         if (ncluster == NULL)
920                                 break;
921                         if ((lhc & HAMMER2_DIRHASH_LOMASK) ==
922                             HAMMER2_DIRHASH_LOMASK) {
923                                 error = ENOSPC;
924                         }
925                         hammer2_cluster_unlock(ncluster);
926                         ncluster = NULL;
927                         ++lhc;
928                 }
929         } else {
930                 /*
931                  * Reconnect to specific key (used when moving
932                  * unlinked-but-open files into the hidden directory).
933                  */
934                 ncluster = hammer2_cluster_lookup(dcluster, &key_dummy,
935                                                   lhc, lhc,
936                                                   0, &ddflag);
937                 KKASSERT(ncluster == NULL);
938         }
939
940         if (error == 0) {
941                 if (hlink) {
942                         /*
943                          * Hardlink pointer needed, create totally fresh
944                          * directory entry.
945                          *
946                          * We must refactor ocluster because it might have
947                          * been shifted into an indirect cluster by the
948                          * create.
949                          */
950                         KKASSERT(ncluster == NULL);
951                         error = hammer2_cluster_create(trans,
952                                                        dcluster, &ncluster,
953                                                        lhc, 0,
954                                                        HAMMER2_BREF_TYPE_INODE,
955                                                        HAMMER2_INODE_BYTES,
956                                                        0);
957                 } else {
958                         /*
959                          * Reconnect the original cluster under the new name.
960                          * Original cluster must have already been deleted by
961                          * teh caller.
962                          *
963                          * WARNING! Can cause held-over clusters to require a
964                          *          refactor.  Fortunately we have none (our
965                          *          locked clusters are passed into and
966                          *          modified by the call).
967                          */
968                         ncluster = ocluster;
969                         ocluster = NULL;
970                         error = hammer2_cluster_create(trans,
971                                                        dcluster, &ncluster,
972                                                        lhc, 0,
973                                                        HAMMER2_BREF_TYPE_INODE,
974                                                        HAMMER2_INODE_BYTES,
975                                                        0);
976                 }
977         }
978
979         /*
980          * Unlock stuff.
981          */
982         KKASSERT(error != EAGAIN);
983
984         /*
985          * ncluster should be NULL on error, leave ocluster
986          * (ocluster == *clusterp) alone.
987          */
988         if (error) {
989                 KKASSERT(ncluster == NULL);
990                 return (error);
991         }
992
993         /*
994          * Directory entries are inodes so if the name has changed we have
995          * to update the inode.
996          *
997          * When creating an OBJTYPE_HARDLINK entry remember to unlock the
998          * cluster, the caller will access the hardlink via the actual hardlink
999          * target file and not the hardlink pointer entry, so we must still
1000          * return ocluster.
1001          */
1002         if (hlink && hammer2_hardlink_enable >= 0) {
1003                 /*
1004                  * Create the HARDLINK pointer.  oip represents the hardlink
1005                  * target in this situation.
1006                  *
1007                  * We will return ocluster (the hardlink target).
1008                  */
1009                 hammer2_cluster_modify(trans, ncluster, 0);
1010                 KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
1011                 wipdata = &hammer2_cluster_wdata(ncluster)->ipdata;
1012                 bcopy(name, wipdata->filename, name_len);
1013                 wipdata->name_key = lhc;
1014                 wipdata->name_len = name_len;
1015                 wipdata->target_type =
1016                                 hammer2_cluster_rdata(ocluster)->ipdata.type;
1017                 wipdata->type = HAMMER2_OBJTYPE_HARDLINK;
1018                 wipdata->inum = hammer2_cluster_rdata(ocluster)->ipdata.inum;
1019                 wipdata->version = HAMMER2_INODE_VERSION_ONE;
1020                 wipdata->nlinks = 1;
1021                 wipdata->op_flags = HAMMER2_OPFLAG_DIRECTDATA;
1022                 hammer2_cluster_modsync(ncluster);
1023                 hammer2_cluster_unlock(ncluster);
1024                 ncluster = ocluster;
1025                 ocluster = NULL;
1026         } else {
1027                 /*
1028                  * ncluster is a duplicate of ocluster at the new location.
1029                  * We must fixup the name stored in the inode data.
1030                  * The bref key has already been adjusted by inode_connect().
1031                  */
1032                 hammer2_cluster_modify(trans, ncluster, 0);
1033                 wipdata = &hammer2_cluster_wdata(ncluster)->ipdata;
1034
1035                 KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
1036                 bcopy(name, wipdata->filename, name_len);
1037                 wipdata->name_key = lhc;
1038                 wipdata->name_len = name_len;
1039                 wipdata->nlinks = 1;
1040                 hammer2_cluster_modsync(ncluster);
1041         }
1042
1043         /*
1044          * We are replacing ocluster with ncluster, unlock ocluster.  In the
1045          * case where ocluster is left unchanged the code above sets
1046          * ncluster to ocluster and ocluster to NULL, resulting in a NOP here.
1047          */
1048         if (ocluster)
1049                 hammer2_cluster_unlock(ocluster);
1050         *clusterp = ncluster;
1051
1052         return (0);
1053 }
1054
1055 /*
1056  * Repoint ip->cluster's chains to cluster's chains.  Caller must hold
1057  * the inode exclusively locked.  cluster may be NULL to clean out any
1058  * chains in ip->cluster.
1059  */
1060 void
1061 hammer2_inode_repoint(hammer2_inode_t *ip, hammer2_inode_t *pip,
1062                       hammer2_cluster_t *cluster)
1063 {
1064         hammer2_chain_t *ochain;
1065         hammer2_chain_t *nchain;
1066         hammer2_inode_t *opip;
1067         int i;
1068
1069         /*
1070          * Replace chains in ip->cluster with chains from cluster and
1071          * adjust the focus if necessary.
1072          *
1073          * NOTE: nchain and/or ochain can be NULL due to gaps
1074          *       in the cluster arrays.
1075          */
1076         ip->cluster.focus = NULL;
1077         for (i = 0; cluster && i < cluster->nchains; ++i) {
1078                 nchain = cluster->array[i];
1079                 if (i < ip->cluster.nchains) {
1080                         ochain = ip->cluster.array[i];
1081                         if (ochain == nchain) {
1082                                 if (ip->cluster.focus == NULL)
1083                                         ip->cluster.focus = nchain;
1084                                 continue;
1085                         }
1086                 } else {
1087                         ochain = NULL;
1088                 }
1089
1090                 /*
1091                  * Make adjustments
1092                  */
1093                 ip->cluster.array[i] = nchain;
1094                 if (ip->cluster.focus == NULL)
1095                         ip->cluster.focus = nchain;
1096                 if (nchain)
1097                         hammer2_chain_ref(nchain);
1098                 if (ochain)
1099                         hammer2_chain_drop(ochain);
1100         }
1101
1102         /*
1103          * Release any left-over chains in ip->cluster.
1104          */
1105         while (i < ip->cluster.nchains) {
1106                 nchain = ip->cluster.array[i];
1107                 if (nchain) {
1108                         ip->cluster.array[i] = NULL;
1109                         hammer2_chain_drop(nchain);
1110                 }
1111                 ++i;
1112         }
1113         ip->cluster.nchains = cluster ? cluster->nchains : 0;
1114
1115         /*
1116          * Repoint ip->pip if requested (non-NULL pip).
1117          */
1118         if (pip && ip->pip != pip) {
1119                 opip = ip->pip;
1120                 hammer2_inode_ref(pip);
1121                 ip->pip = pip;
1122                 if (opip)
1123                         hammer2_inode_drop(opip);
1124         }
1125 }
1126
1127 /*
1128  * Unlink the file from the specified directory inode.  The directory inode
1129  * does not need to be locked.
1130  *
1131  * isdir determines whether a directory/non-directory check should be made.
1132  * No check is made if isdir is set to -1.
1133  *
1134  * isopen specifies whether special unlink-with-open-descriptor handling
1135  * must be performed.  If set to -1 the caller is deleting a PFS and we
1136  * check whether the chain is mounted or not (chain->pmp != NULL).  1 is
1137  * implied if it is mounted.
1138  *
1139  * If isopen is 1 and nlinks drops to 0 this function must move the chain
1140  * to a special hidden directory until last-close occurs on the file.
1141  *
1142  * NOTE!  The underlying file can still be active with open descriptors
1143  *        or if the chain is being manually held (e.g. for rename).
1144  *
1145  *        The caller is responsible for fixing up ip->chain if e.g. a
1146  *        rename occurs (see chain_duplicate()).
1147  *
1148  * NOTE!  The chain is not deleted if it is moved to the hidden directory,
1149  *        but otherwise will be deleted.
1150  */
1151 int
1152 hammer2_unlink_file(hammer2_trans_t *trans, hammer2_inode_t *dip,
1153                     const uint8_t *name, size_t name_len,
1154                     int isdir, int *hlinkp, struct nchandle *nch,
1155                     int nlinks)
1156 {
1157         const hammer2_inode_data_t *ripdata;
1158         hammer2_inode_data_t *wipdata;
1159         hammer2_cluster_t *cparent;
1160         hammer2_cluster_t *hcluster;
1161         hammer2_cluster_t *hparent;
1162         hammer2_cluster_t *cluster;
1163         hammer2_cluster_t *dparent;
1164         hammer2_cluster_t *dcluster;
1165         hammer2_key_t key_dummy;
1166         hammer2_key_t key_next;
1167         hammer2_key_t lhc;
1168         int error;
1169         int ddflag;
1170         int hlink;
1171         uint8_t type;
1172
1173         error = 0;
1174         hlink = 0;
1175         hcluster = NULL;
1176         hparent = NULL;
1177         lhc = hammer2_dirhash(name, name_len);
1178
1179 again:
1180         /*
1181          * Search for the filename in the directory
1182          */
1183         cparent = hammer2_inode_lock_ex(dip);
1184         cluster = hammer2_cluster_lookup(cparent, &key_next,
1185                                      lhc, lhc + HAMMER2_DIRHASH_LOMASK,
1186                                      0, &ddflag);
1187         while (cluster) {
1188                 if (hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE) {
1189                         ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
1190                         if (ripdata->name_len == name_len &&
1191                             bcmp(ripdata->filename, name, name_len) == 0) {
1192                                 break;
1193                         }
1194                 }
1195                 cluster = hammer2_cluster_next(cparent, cluster, &key_next,
1196                                                key_next,
1197                                                lhc + HAMMER2_DIRHASH_LOMASK,
1198                                                0);
1199         }
1200         hammer2_inode_unlock_ex(dip, NULL);     /* retain cparent */
1201
1202         /*
1203          * Not found or wrong type (isdir < 0 disables the type check).
1204          * If a hardlink pointer, type checks use the hardlink target.
1205          */
1206         if (cluster == NULL) {
1207                 error = ENOENT;
1208                 goto done;
1209         }
1210         ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
1211         type = ripdata->type;
1212         if (type == HAMMER2_OBJTYPE_HARDLINK) {
1213                 hlink = 1;
1214                 type = ripdata->target_type;
1215         }
1216
1217         if (type == HAMMER2_OBJTYPE_DIRECTORY && isdir == 0) {
1218                 error = ENOTDIR;
1219                 goto done;
1220         }
1221         if (type != HAMMER2_OBJTYPE_DIRECTORY && isdir >= 1) {
1222                 error = EISDIR;
1223                 goto done;
1224         }
1225
1226         /*
1227          * Hardlink must be resolved.  We can't hold the parent locked
1228          * while we do this or we could deadlock.  The physical file will
1229          * be located at or above the current directory.
1230          *
1231          * We loop to reacquire the hardlink origination.
1232          *
1233          * NOTE: hammer2_hardlink_find() will locate the hardlink target,
1234          *       returning a modified hparent and hcluster.
1235          */
1236         if (ripdata->type == HAMMER2_OBJTYPE_HARDLINK) {
1237                 if (hcluster == NULL) {
1238                         hcluster = cluster;
1239                         cluster = NULL; /* safety */
1240                         hammer2_cluster_unlock(cparent);
1241                         cparent = NULL; /* safety */
1242                         ripdata = NULL; /* safety (associated w/cparent) */
1243                         error = hammer2_hardlink_find(dip, &hparent, hcluster);
1244
1245                         /*
1246                          * If we couldn't find the hardlink target then some
1247                          * parent directory containing the hardlink pointer
1248                          * probably got renamed to above the original target,
1249                          * a case not yet handled by H2.
1250                          */
1251                         if (error) {
1252                                 kprintf("H2 unlink_file: hardlink target for "
1253                                         "\"%s\" not found\n",
1254                                         name);
1255                                 kprintf("(likely due to known directory "
1256                                         "rename bug)\n");
1257                                 goto done;
1258                         }
1259                         goto again;
1260                 }
1261         }
1262
1263         /*
1264          * If this is a directory the directory must be empty.  However, if
1265          * isdir < 0 we are doing a rename and the directory does not have
1266          * to be empty, and if isdir > 1 we are deleting a PFS/snapshot
1267          * and the directory does not have to be empty.
1268          *
1269          * NOTE: We check the full key range here which covers both visible
1270          *       and invisible entries.  Theoretically there should be no
1271          *       invisible (hardlink target) entries if there are no visible
1272          *       entries.
1273          */
1274         if (type == HAMMER2_OBJTYPE_DIRECTORY && isdir == 1) {
1275                 dparent = hammer2_cluster_lookup_init(cluster, 0);
1276                 dcluster = hammer2_cluster_lookup(dparent, &key_dummy,
1277                                                   0, (hammer2_key_t)-1,
1278                                                   HAMMER2_LOOKUP_NODATA,
1279                                                   &ddflag);
1280                 if (dcluster) {
1281                         hammer2_cluster_unlock(dcluster);
1282                         hammer2_cluster_lookup_done(dparent);
1283                         error = ENOTEMPTY;
1284                         goto done;
1285                 }
1286                 hammer2_cluster_lookup_done(dparent);
1287                 dparent = NULL;
1288                 /* dcluster NULL */
1289         }
1290
1291         /*
1292          * If this was a hardlink then (cparent, cluster) is the hardlink
1293          * pointer, which we can simply destroy outright.  Discard the
1294          * clusters and replace with the hardlink target.
1295          */
1296         if (hcluster) {
1297                 hammer2_cluster_delete(trans, cparent, cluster,
1298                                        HAMMER2_DELETE_PERMANENT);
1299                 hammer2_cluster_unlock(cparent);
1300                 hammer2_cluster_unlock(cluster);
1301                 cparent = hparent;
1302                 cluster = hcluster;
1303                 hparent = NULL;
1304                 hcluster = NULL;
1305         }
1306
1307         /*
1308          * This leaves us with the hardlink target or non-hardlinked file
1309          * or directory in (cparent, cluster).
1310          *
1311          * Delete the target when nlinks reaches 0 with special handling
1312          * if (isopen) is set.
1313          *
1314          * NOTE! In DragonFly the vnops function calls cache_unlink() after
1315          *       calling us here to clean out the namecache association,
1316          *       (which does not represent a ref for the open-test), and to
1317          *       force finalization of the vnode if/when the last ref gets
1318          *       dropped.
1319          *
1320          * NOTE! Files are unlinked by rename and then relinked.  nch will be
1321          *       passed as NULL in this situation.  hammer2_inode_connect()
1322          *       will bump nlinks.
1323          */
1324         KKASSERT(cluster != NULL);
1325         hammer2_cluster_modify(trans, cluster, 0);
1326         wipdata = &hammer2_cluster_wdata(cluster)->ipdata;
1327         ripdata = wipdata;
1328         wipdata->nlinks += nlinks;
1329         if ((int64_t)wipdata->nlinks < 0) {     /* XXX debugging */
1330                 wipdata->nlinks = 0;
1331         }
1332         hammer2_cluster_modsync(cluster);
1333
1334         if (wipdata->nlinks == 0) {
1335                 /*
1336                  * Target nlinks has reached 0, file now unlinked (but may
1337                  * still be open).
1338                  */
1339                 /* XXX need interlock if mounted
1340                 if ((cluster->focus->flags & HAMMER2_CHAIN_PFSROOT) &&
1341                     cluster->pmp) {
1342                         error = EINVAL;
1343                         kprintf("hammer2: PFS \"%s\" cannot be deleted "
1344                                 "while still mounted\n",
1345                                 wipdata->filename);
1346                         goto done;
1347                 }
1348                 */
1349                 if (nch && cache_isopen(nch)) {
1350                         hammer2_inode_move_to_hidden(trans, &cparent, &cluster,
1351                                                      wipdata->inum);
1352                 } else {
1353                         /*
1354                          * This won't get everything if a vnode is still
1355                          * present, but the cache_unlink() call the caller
1356                          * makes will.
1357                          */
1358                         hammer2_cluster_delete(trans, cparent, cluster,
1359                                                HAMMER2_DELETE_PERMANENT);
1360                 }
1361         } else if (hlink == 0) {
1362                 /*
1363                  * In this situation a normal non-hardlinked file (which can
1364                  * only have nlinks == 1) still has a non-zero nlinks, the
1365                  * caller must be doing a RENAME operation and so is passing
1366                  * a nlinks adjustment of 0, and only wishes to remove file
1367                  * in order to be able to reconnect it under a different name.
1368                  *
1369                  * In this situation we do a non-permanent deletion of the
1370                  * chain in order to allow the file to be reconnected in
1371                  * a different location.
1372                  */
1373                 KKASSERT(nlinks == 0);
1374                 hammer2_cluster_delete(trans, cparent, cluster, 0);
1375         }
1376         error = 0;
1377 done:
1378         if (cparent)
1379                 hammer2_cluster_unlock(cparent);
1380         if (cluster)
1381                 hammer2_cluster_unlock(cluster);
1382         if (hparent)
1383                 hammer2_cluster_unlock(hparent);
1384         if (hcluster)
1385                 hammer2_cluster_unlock(hcluster);
1386         if (hlinkp)
1387                 *hlinkp = hlink;
1388
1389         return error;
1390 }
1391
1392 /*
1393  * This is called from the mount code to initialize pmp->ihidden
1394  */
1395 void
1396 hammer2_inode_install_hidden(hammer2_pfsmount_t *pmp)
1397 {
1398         hammer2_trans_t trans;
1399         hammer2_cluster_t *cparent;
1400         hammer2_cluster_t *cluster;
1401         hammer2_cluster_t *scan;
1402         const hammer2_inode_data_t *ripdata;
1403         hammer2_inode_data_t *wipdata;
1404         hammer2_key_t key_dummy;
1405         hammer2_key_t key_next;
1406         int ddflag;
1407         int error;
1408         int count;
1409         int dip_check_algo;
1410         int dip_comp_algo;
1411
1412         if (pmp->ihidden)
1413                 return;
1414
1415         /*
1416          * Find the hidden directory
1417          */
1418         bzero(&key_dummy, sizeof(key_dummy));
1419         hammer2_trans_init(&trans, pmp, 0);
1420
1421         /*
1422          * Setup for lookup, retrieve iroot's check and compression
1423          * algorithm request which was likely generated by newfs_hammer2.
1424          *
1425          * The check/comp fields will probably never be used since inodes
1426          * are renamed into the hidden directory and not created relative to
1427          * the hidden directory, chain creation inherits from bref.methods,
1428          * and data chains inherit from their respective file inode *_algo
1429          * fields.
1430          */
1431         cparent = hammer2_inode_lock_ex(pmp->iroot);
1432         ripdata = &hammer2_cluster_rdata(cparent)->ipdata;
1433         dip_check_algo = ripdata->check_algo;
1434         dip_comp_algo = ripdata->comp_algo;
1435         ripdata = NULL;
1436
1437         cluster = hammer2_cluster_lookup(cparent, &key_dummy,
1438                                          HAMMER2_INODE_HIDDENDIR,
1439                                          HAMMER2_INODE_HIDDENDIR,
1440                                          0, &ddflag);
1441         if (cluster) {
1442                 pmp->ihidden = hammer2_inode_get(pmp, pmp->iroot, cluster);
1443                 hammer2_inode_ref(pmp->ihidden);
1444
1445                 /*
1446                  * Remove any unlinked files which were left open as-of
1447                  * any system crash.
1448                  *
1449                  * Don't pass NODATA, we need the inode data so the delete
1450                  * can do proper statistics updates.
1451                  */
1452                 count = 0;
1453                 scan = hammer2_cluster_lookup(cluster, &key_next,
1454                                               0, HAMMER2_TID_MAX,
1455                                               0, &ddflag);
1456                 while (scan) {
1457                         if (hammer2_cluster_type(scan) ==
1458                             HAMMER2_BREF_TYPE_INODE) {
1459                                 hammer2_cluster_delete(&trans, cluster, scan,
1460                                                    HAMMER2_DELETE_PERMANENT);
1461                                 ++count;
1462                         }
1463                         scan = hammer2_cluster_next(cluster, scan, &key_next,
1464                                                     0, HAMMER2_TID_MAX, 0);
1465                 }
1466
1467                 hammer2_inode_unlock_ex(pmp->ihidden, cluster);
1468                 hammer2_inode_unlock_ex(pmp->iroot, cparent);
1469                 hammer2_trans_done(&trans);
1470                 kprintf("hammer2: PFS loaded hidden dir, "
1471                         "removed %d dead entries\n", count);
1472                 return;
1473         }
1474
1475         /*
1476          * Create the hidden directory
1477          */
1478         error = hammer2_cluster_create(&trans, cparent, &cluster,
1479                                        HAMMER2_INODE_HIDDENDIR, 0,
1480                                        HAMMER2_BREF_TYPE_INODE,
1481                                        HAMMER2_INODE_BYTES,
1482                                        0);
1483         hammer2_inode_unlock_ex(pmp->iroot, cparent);
1484
1485         hammer2_cluster_modify(&trans, cluster, 0);
1486         wipdata = &hammer2_cluster_wdata(cluster)->ipdata;
1487         wipdata->type = HAMMER2_OBJTYPE_DIRECTORY;
1488         wipdata->inum = HAMMER2_INODE_HIDDENDIR;
1489         wipdata->nlinks = 1;
1490         wipdata->comp_algo = dip_comp_algo;
1491         wipdata->check_algo = dip_check_algo;
1492         hammer2_cluster_modsync(cluster);
1493         kprintf("hammer2: PFS root missing hidden directory, creating\n");
1494
1495         pmp->ihidden = hammer2_inode_get(pmp, pmp->iroot, cluster);
1496         hammer2_inode_ref(pmp->ihidden);
1497         hammer2_inode_unlock_ex(pmp->ihidden, cluster);
1498         hammer2_trans_done(&trans);
1499 }
1500
1501 /*
1502  * If an open file is unlinked H2 needs to retain the file in the topology
1503  * to ensure that its backing store is not recovered by the bulk free scan.
1504  * This also allows us to avoid having to special-case the CHAIN_DELETED flag.
1505  *
1506  * To do this the file is moved to a hidden directory in the PFS root and
1507  * renamed.  The hidden directory must be created if it does not exist.
1508  */
1509 static
1510 void
1511 hammer2_inode_move_to_hidden(hammer2_trans_t *trans,
1512                              hammer2_cluster_t **cparentp,
1513                              hammer2_cluster_t **clusterp,
1514                              hammer2_tid_t inum)
1515 {
1516         hammer2_cluster_t *dcluster;
1517         hammer2_pfsmount_t *pmp;
1518         int error;
1519
1520         pmp = (*clusterp)->pmp;
1521         KKASSERT(pmp != NULL);
1522         KKASSERT(pmp->ihidden != NULL);
1523
1524         hammer2_cluster_delete(trans, *cparentp, *clusterp, 0);
1525         dcluster = hammer2_inode_lock_ex(pmp->ihidden);
1526         error = hammer2_inode_connect(trans, clusterp, 0,
1527                                       pmp->ihidden, dcluster,
1528                                       NULL, 0, inum);
1529         hammer2_inode_unlock_ex(pmp->ihidden, dcluster);
1530         KKASSERT(error == 0);
1531 }
1532
1533 /*
1534  * Given an exclusively locked inode and cluster we consolidate the cluster
1535  * for hardlink creation, adding (nlinks) to the file's link count and
1536  * potentially relocating the inode to (cdip) which is a parent directory
1537  * common to both the current location of the inode and the intended new
1538  * hardlink.
1539  *
1540  * Replaces (*clusterp) if consolidation occurred, unlocking the old cluster
1541  * and returning a new locked cluster.
1542  *
1543  * NOTE!  This function will also replace ip->cluster.
1544  */
1545 int
1546 hammer2_hardlink_consolidate(hammer2_trans_t *trans,
1547                              hammer2_inode_t *ip,
1548                              hammer2_cluster_t **clusterp,
1549                              hammer2_inode_t *cdip,
1550                              hammer2_cluster_t *cdcluster,
1551                              int nlinks)
1552 {
1553         const hammer2_inode_data_t *ripdata;
1554         hammer2_inode_data_t *wipdata;
1555         hammer2_cluster_t *cluster;
1556         hammer2_cluster_t *cparent;
1557         int error;
1558
1559         cluster = *clusterp;
1560         ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
1561         if (nlinks == 0 &&                      /* no hardlink needed */
1562             (ripdata->name_key & HAMMER2_DIRHASH_VISIBLE)) {
1563                 return (0);
1564         }
1565
1566         if (hammer2_hardlink_enable == 0) {     /* disallow hardlinks */
1567                 hammer2_cluster_unlock(cluster);
1568                 *clusterp = NULL;
1569                 return (ENOTSUP);
1570         }
1571
1572         cparent = NULL;
1573
1574         /*
1575          * If no change in the hardlink's target directory is required and
1576          * this is already a hardlink target, all we need to do is adjust
1577          * the link count.
1578          */
1579         ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
1580         if (cdip == ip->pip &&
1581             (ripdata->name_key & HAMMER2_DIRHASH_VISIBLE) == 0) {
1582                 if (nlinks) {
1583                         hammer2_cluster_modify(trans, cluster, 0);
1584                         wipdata = &hammer2_cluster_wdata(cluster)->ipdata;
1585                         wipdata->nlinks += nlinks;
1586                         hammer2_cluster_modsync(cluster);
1587                         ripdata = wipdata;
1588                 }
1589                 error = 0;
1590                 goto done;
1591         }
1592
1593         /*
1594          * Cluster is the real inode.  The originating directory is locked
1595          * by the caller so we can manipulate it without worrying about races
1596          * against other lookups.
1597          *
1598          * If cluster is visible we need to delete it from the current
1599          * location and create a hardlink pointer in its place.  If it is
1600          * not visible we need only delete it.  Then later cluster will be
1601          * renamed to a parent directory and converted (if necessary) to
1602          * a hidden inode (via shiftup).
1603          *
1604          * NOTE! We must hold cparent locked through the delete/create/rename
1605          *       operation to ensure that other threads block resolving to
1606          *       the same hardlink, otherwise the other threads may not see
1607          *       the hardlink.
1608          */
1609         KKASSERT((cluster->focus->flags & HAMMER2_CHAIN_DELETED) == 0);
1610         cparent = hammer2_cluster_parent(cluster);
1611
1612         hammer2_cluster_delete(trans, cparent, cluster, 0);
1613
1614         ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
1615         KKASSERT(ripdata->type != HAMMER2_OBJTYPE_HARDLINK);
1616         if (ripdata->name_key & HAMMER2_DIRHASH_VISIBLE) {
1617                 hammer2_cluster_t *ncluster;
1618                 hammer2_key_t lhc;
1619
1620                 ncluster = NULL;
1621                 lhc = cluster->focus->bref.key;
1622                 error = hammer2_cluster_create(trans, cparent, &ncluster,
1623                                              lhc, 0,
1624                                              HAMMER2_BREF_TYPE_INODE,
1625                                              HAMMER2_INODE_BYTES,
1626                                              0);
1627                 hammer2_cluster_modify(trans, ncluster, 0);
1628                 wipdata = &hammer2_cluster_wdata(ncluster)->ipdata;
1629
1630                 /* wipdata->comp_algo = ripdata->comp_algo; */
1631                 wipdata->comp_algo = 0;
1632                 wipdata->check_algo = 0;
1633                 wipdata->version = HAMMER2_INODE_VERSION_ONE;
1634                 wipdata->inum = ripdata->inum;
1635                 wipdata->target_type = ripdata->type;
1636                 wipdata->type = HAMMER2_OBJTYPE_HARDLINK;
1637                 wipdata->uflags = 0;
1638                 wipdata->rmajor = 0;
1639                 wipdata->rminor = 0;
1640                 wipdata->ctime = 0;
1641                 wipdata->mtime = 0;
1642                 wipdata->atime = 0;
1643                 wipdata->btime = 0;
1644                 bzero(&wipdata->uid, sizeof(wipdata->uid));
1645                 bzero(&wipdata->gid, sizeof(wipdata->gid));
1646                 wipdata->op_flags = HAMMER2_OPFLAG_DIRECTDATA;
1647                 wipdata->cap_flags = 0;
1648                 wipdata->mode = 0;
1649                 wipdata->size = 0;
1650                 wipdata->nlinks = 1;
1651                 wipdata->iparent = 0;   /* XXX */
1652                 wipdata->pfs_type = 0;
1653                 wipdata->pfs_inum = 0;
1654                 bzero(&wipdata->pfs_clid, sizeof(wipdata->pfs_clid));
1655                 bzero(&wipdata->pfs_fsid, sizeof(wipdata->pfs_fsid));
1656                 wipdata->data_quota = 0;
1657                 wipdata->data_count = 0;
1658                 wipdata->inode_quota = 0;
1659                 wipdata->inode_count = 0;
1660                 wipdata->attr_tid = 0;
1661                 wipdata->dirent_tid = 0;
1662                 bzero(&wipdata->u, sizeof(wipdata->u));
1663                 bcopy(ripdata->filename, wipdata->filename, ripdata->name_len);
1664                 wipdata->name_key = ncluster->focus->bref.key;
1665                 wipdata->name_len = ripdata->name_len;
1666                 /* XXX transaction ids */
1667                 hammer2_cluster_modsync(ncluster);
1668                 hammer2_cluster_unlock(ncluster);
1669         }
1670         ripdata = wipdata;
1671
1672         /*
1673          * cluster represents the hardlink target and is now flagged deleted.
1674          * duplicate it to the parent directory and adjust nlinks.
1675          *
1676          * WARNING! The shiftup() call can cause ncluster to be moved into
1677          *          an indirect block, and our ncluster will wind up pointing
1678          *          to the older/original version.
1679          */
1680         KKASSERT(cluster->focus->flags & HAMMER2_CHAIN_DELETED);
1681         hammer2_hardlink_shiftup(trans, cluster, cdip, cdcluster,
1682                                  nlinks, &error);
1683
1684         if (error == 0)
1685                 hammer2_inode_repoint(ip, cdip, cluster);
1686
1687 done:
1688         /*
1689          * Cleanup, cluster/ncluster already dealt with.
1690          *
1691          * Return the shifted cluster in *clusterp.
1692          */
1693         if (cparent)
1694                 hammer2_cluster_unlock(cparent);
1695         *clusterp = cluster;
1696
1697         return (error);
1698 }
1699
1700 /*
1701  * If (*ochainp) is non-NULL it points to the forward OBJTYPE_HARDLINK
1702  * inode while (*chainp) points to the resolved (hidden hardlink
1703  * target) inode.  In this situation when nlinks is 1 we wish to
1704  * deconsolidate the hardlink, moving it back to the directory that now
1705  * represents the only remaining link.
1706  */
1707 int
1708 hammer2_hardlink_deconsolidate(hammer2_trans_t *trans,
1709                                hammer2_inode_t *dip,
1710                                hammer2_chain_t **chainp,
1711                                hammer2_chain_t **ochainp)
1712 {
1713         if (*ochainp == NULL)
1714                 return (0);
1715         /* XXX */
1716         return (0);
1717 }
1718
1719 /*
1720  * The caller presents a locked cluster with an obj_type of
1721  * HAMMER2_OBJTYPE_HARDLINK.  This routine will locate and replace the
1722  * cluster with the target hardlink, also locked.
1723  *
1724  * If cparentp is not NULL a locked cluster representing the hardlink's
1725  * parent is also returned.
1726  *
1727  * If we are unable to locate the hardlink target EIO is returned and
1728  * (*cparentp) is set to NULL.  The passed-in cluster still needs to be
1729  * unlocked by the caller but will be degenerate... not have any chains.
1730  */
1731 int
1732 hammer2_hardlink_find(hammer2_inode_t *dip,
1733                       hammer2_cluster_t **cparentp, hammer2_cluster_t *cluster)
1734 {
1735         const hammer2_inode_data_t *ipdata;
1736         hammer2_cluster_t *cparent;
1737         hammer2_cluster_t *rcluster;
1738         hammer2_inode_t *ip;
1739         hammer2_inode_t *pip;
1740         hammer2_key_t key_dummy;
1741         hammer2_key_t lhc;
1742         int ddflag;
1743
1744         pip = dip;
1745         hammer2_inode_ref(pip);         /* for loop */
1746
1747         /*
1748          * Locate the hardlink.  pip is referenced and not locked.
1749          */
1750         ipdata = &hammer2_cluster_rdata(cluster)->ipdata;
1751         lhc = ipdata->inum;
1752
1753         /*
1754          * We don't need the cluster's chains, but we need to retain the
1755          * cluster structure itself so we can load the hardlink search
1756          * result into it.
1757          */
1758         KKASSERT(cluster->refs == 1);
1759         atomic_add_int(&cluster->refs, 1);
1760         hammer2_cluster_unlock(cluster);        /* hack */
1761         cluster->nchains = 0;                   /* hack */
1762
1763         rcluster = NULL;
1764         cparent = NULL;
1765
1766         while ((ip = pip) != NULL) {
1767                 cparent = hammer2_inode_lock_ex(ip);
1768                 hammer2_inode_drop(ip);                 /* loop */
1769                 KKASSERT(hammer2_cluster_type(cparent) ==
1770                          HAMMER2_BREF_TYPE_INODE);
1771                 rcluster = hammer2_cluster_lookup(cparent, &key_dummy,
1772                                              lhc, lhc, 0, &ddflag);
1773                 if (rcluster)
1774                         break;
1775                 hammer2_cluster_lookup_done(cparent);   /* discard parent */
1776                 cparent = NULL;                         /* safety */
1777                 pip = ip->pip;          /* safe, ip held locked */
1778                 if (pip)
1779                         hammer2_inode_ref(pip);         /* loop */
1780                 hammer2_inode_unlock_ex(ip, NULL);
1781         }
1782
1783         /*
1784          * chain is locked, ip is locked.  Unlock ip, return the locked
1785          * chain.  *ipp is already set w/a ref count and not locked.
1786          *
1787          * (cparent is already unlocked).
1788          */
1789         if (rcluster) {
1790                 hammer2_cluster_replace(cluster, rcluster);
1791                 hammer2_cluster_drop(rcluster);
1792                 if (cparentp) {
1793                         *cparentp = cparent;
1794                         hammer2_inode_unlock_ex(ip, NULL);
1795                 } else {
1796                         hammer2_inode_unlock_ex(ip, cparent);
1797                 }
1798                 return (0);
1799         } else {
1800                 if (cparentp)
1801                         *cparentp = NULL;
1802                 if (ip)
1803                         hammer2_inode_unlock_ex(ip, cparent);
1804                 return (EIO);
1805         }
1806 }
1807
1808 /*
1809  * Find the directory common to both fdip and tdip.
1810  *
1811  * Returns a held but not locked inode.  Caller typically locks the inode,
1812  * and when through unlocks AND drops it.
1813  */
1814 hammer2_inode_t *
1815 hammer2_inode_common_parent(hammer2_inode_t *fdip, hammer2_inode_t *tdip)
1816 {
1817         hammer2_inode_t *scan1;
1818         hammer2_inode_t *scan2;
1819
1820         /*
1821          * We used to have a depth field but it complicated matters too
1822          * much for directory renames.  So now its ugly.  Check for
1823          * simple cases before giving up and doing it the expensive way.
1824          *
1825          * XXX need a bottom-up topology stability lock
1826          */
1827         if (fdip == tdip || fdip == tdip->pip) {
1828                 hammer2_inode_ref(fdip);
1829                 return(fdip);
1830         }
1831         if (fdip->pip == tdip) {
1832                 hammer2_inode_ref(tdip);
1833                 return(tdip);
1834         }
1835
1836         /*
1837          * XXX not MPSAFE
1838          */
1839         for (scan1 = fdip; scan1->pmp == fdip->pmp; scan1 = scan1->pip) {
1840                 scan2 = tdip;
1841                 while (scan2->pmp == tdip->pmp) {
1842                         if (scan1 == scan2) {
1843                                 hammer2_inode_ref(scan1);
1844                                 return(scan1);
1845                         }
1846                         scan2 = scan2->pip;
1847                         if (scan2 == NULL)
1848                                 break;
1849                 }
1850         }
1851         panic("hammer2_inode_common_parent: no common parent %p %p\n",
1852               fdip, tdip);
1853         /* NOT REACHED */
1854         return(NULL);
1855 }
1856
1857 /*
1858  * Synchronize the inode's frontend state with the chain state prior
1859  * to any explicit flush of the inode or any strategy write call.
1860  *
1861  * Called with a locked inode.
1862  */
1863 void
1864 hammer2_inode_fsync(hammer2_trans_t *trans, hammer2_inode_t *ip, 
1865                     hammer2_cluster_t *cparent)
1866 {
1867         const hammer2_inode_data_t *ripdata;
1868         hammer2_inode_data_t *wipdata;
1869         hammer2_cluster_t *dparent;
1870         hammer2_cluster_t *cluster;
1871         hammer2_key_t lbase;
1872         hammer2_key_t key_next;
1873         int dosync = 0;
1874         int ddflag;
1875
1876         ripdata = &hammer2_cluster_rdata(cparent)->ipdata;    /* target file */
1877
1878         if (ip->flags & HAMMER2_INODE_MTIME) {
1879                 wipdata = hammer2_cluster_modify_ip(trans, ip, cparent, 0);
1880                 atomic_clear_int(&ip->flags, HAMMER2_INODE_MTIME);
1881                 wipdata->mtime = ip->mtime;
1882                 dosync = 1;
1883                 ripdata = wipdata;
1884         }
1885         if ((ip->flags & HAMMER2_INODE_RESIZED) && ip->size < ripdata->size) {
1886                 wipdata = hammer2_cluster_modify_ip(trans, ip, cparent, 0);
1887                 wipdata->size = ip->size;
1888                 dosync = 1;
1889                 ripdata = wipdata;
1890                 atomic_clear_int(&ip->flags, HAMMER2_INODE_RESIZED);
1891
1892                 /*
1893                  * We must delete any chains beyond the EOF.  The chain
1894                  * straddling the EOF will be pending in the bioq.
1895                  */
1896                 lbase = (ripdata->size + HAMMER2_PBUFMASK64) &
1897                         ~HAMMER2_PBUFMASK64;
1898                 dparent = hammer2_cluster_lookup_init(&ip->cluster, 0);
1899                 cluster = hammer2_cluster_lookup(dparent, &key_next,
1900                                                  lbase, (hammer2_key_t)-1,
1901                                                  HAMMER2_LOOKUP_NODATA,
1902                                                  &ddflag);
1903                 while (cluster) {
1904                         /*
1905                          * Degenerate embedded case, nothing to loop on
1906                          */
1907                         switch (hammer2_cluster_type(cluster)) {
1908                         case HAMMER2_BREF_TYPE_INODE:
1909                                 hammer2_cluster_unlock(cluster);
1910                                 cluster = NULL;
1911                                 break;
1912                         case HAMMER2_BREF_TYPE_DATA:
1913                                 hammer2_cluster_delete(trans, dparent, cluster,
1914                                                    HAMMER2_DELETE_PERMANENT);
1915                                 /* fall through */
1916                         default:
1917                                 cluster = hammer2_cluster_next(dparent, cluster,
1918                                                    &key_next,
1919                                                    key_next, (hammer2_key_t)-1,
1920                                                    HAMMER2_LOOKUP_NODATA);
1921                                 break;
1922                         }
1923                 }
1924                 hammer2_cluster_lookup_done(dparent);
1925         } else
1926         if ((ip->flags & HAMMER2_INODE_RESIZED) && ip->size > ripdata->size) {
1927                 wipdata = hammer2_cluster_modify_ip(trans, ip, cparent, 0);
1928                 wipdata->size = ip->size;
1929                 atomic_clear_int(&ip->flags, HAMMER2_INODE_RESIZED);
1930
1931                 /*
1932                  * When resizing larger we may not have any direct-data
1933                  * available.
1934                  */
1935                 if ((wipdata->op_flags & HAMMER2_OPFLAG_DIRECTDATA) &&
1936                     ip->size > HAMMER2_EMBEDDED_BYTES) {
1937                         wipdata->op_flags &= ~HAMMER2_OPFLAG_DIRECTDATA;
1938                         bzero(&wipdata->u.blockset,
1939                               sizeof(wipdata->u.blockset));
1940                 }
1941                 dosync = 1;
1942                 ripdata = wipdata;
1943         }
1944         if (dosync)
1945                 hammer2_cluster_modsync(cparent);
1946 }