hammer2 - Refactor frontend part 5/many
[dragonfly.git] / sys / vfs / hammer2 / hammer2_inode.c
1 /*
2  * Copyright (c) 2011-2014 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  * 3. Neither the name of The DragonFly Project nor the names of its
19  *    contributors may be used to endorse or promote products derived
20  *    from this software without specific, prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
26  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 #include <sys/cdefs.h>
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/types.h>
39 #include <sys/lock.h>
40 #include <sys/uuid.h>
41
42 #include "hammer2.h"
43
44 #define INODE_DEBUG     0
45
46 static void hammer2_inode_move_to_hidden(hammer2_trans_t *trans,
47                                          hammer2_cluster_t **cparentp,
48                                          hammer2_cluster_t **clusterp,
49                                          hammer2_tid_t inum);
50
51 RB_GENERATE2(hammer2_inode_tree, hammer2_inode, rbnode, hammer2_inode_cmp,
52              hammer2_tid_t, meta.inum);
53
54 int
55 hammer2_inode_cmp(hammer2_inode_t *ip1, hammer2_inode_t *ip2)
56 {
57         if (ip1->meta.inum < ip2->meta.inum)
58                 return(-1);
59         if (ip1->meta.inum > ip2->meta.inum)
60                 return(1);
61         return(0);
62 }
63
64 /*
65  * HAMMER2 inode locks
66  *
67  * HAMMER2 offers shared and exclusive locks on inodes.  Pass a mask of
68  * flags for options:
69  *
70  *      - pass HAMMER2_RESOLVE_SHARED if a shared lock is desired.  The
71  *        inode locking function will automatically set the RDONLY flag.
72  *
73  *      - pass HAMMER2_RESOLVE_ALWAYS if you need the inode's meta-data.
74  *        Most front-end inode locks do.
75  *
76  *      - pass HAMMER2_RESOLVE_NEVER if you do not want to require that
77  *        the inode data be resolved.  This is used by the syncthr because
78  *        it can run on an unresolved/out-of-sync cluster, and also by the
79  *        vnode reclamation code to avoid unnecessary I/O (particularly when
80  *        disposing of hundreds of thousands of cached vnodes).
81  *
82  * The inode locking function locks the inode itself, resolves any stale
83  * chains in the inode's cluster, and allocates a fresh copy of the
84  * cluster with 1 ref and all the underlying chains locked.
85  *
86  * ip->cluster will be stable while the inode is locked.
87  *
88  * NOTE: We don't combine the inode/chain lock because putting away an
89  *       inode would otherwise confuse multiple lock holders of the inode.
90  *
91  * NOTE: In-memory inodes always point to hardlink targets (the actual file),
92  *       and never point to a hardlink pointer.
93  *
94  * NOTE: If caller passes HAMMER2_RESOLVE_RDONLY the exclusive locking code
95  *       will feel free to reduce the chain set in the cluster as an
96  *       optimization.  It will still be validated against the quorum if
97  *       appropriate, but the optimization might be able to reduce data
98  *       accesses to one node.  This flag is automatically set if the inode
99  *       is locked with HAMMER2_RESOLVE_SHARED.
100  */
101 void
102 hammer2_inode_lock(hammer2_inode_t *ip, int how)
103 {
104         hammer2_inode_ref(ip);
105
106         /* 
107          * Inode structure mutex
108          */
109         if (how & HAMMER2_RESOLVE_SHARED) {
110                 how |= HAMMER2_RESOLVE_RDONLY;
111                 hammer2_mtx_sh(&ip->lock);
112         } else {
113                 hammer2_mtx_ex(&ip->lock);
114         }
115 }
116
117 /*
118  * Create a locked copy of ip->cluster.  Note that the copy will have a
119  * ref on the cluster AND its chains and we don't want a second ref to
120  * either when we lock it.
121  *
122  * Exclusive inode locks set the template focus chain in (ip)
123  * as a hint.  Cluster locks can ALWAYS replace the focus in the
124  * working copy if the hint does not work out, so beware.
125  */
126 hammer2_cluster_t *
127 hammer2_inode_cluster(hammer2_inode_t *ip, int how)
128 {
129         hammer2_cluster_t *cluster;
130
131         cluster = hammer2_cluster_copy(&ip->cluster);
132         hammer2_cluster_lock(cluster, how);
133         hammer2_cluster_resolve(cluster);
134
135         /*
136          * cluster->focus will be set if resolving RESOLVE_ALWAYS, but
137          * only update the cached focus in the inode structure when taking
138          * out an exclusive lock.
139          */
140         if ((how & HAMMER2_RESOLVE_SHARED) == 0)
141                 ip->cluster.focus = cluster->focus;
142
143         return cluster;
144 }
145
146 void
147 hammer2_inode_unlock(hammer2_inode_t *ip, hammer2_cluster_t *cluster)
148 {
149         if (cluster) {
150                 hammer2_cluster_unlock(cluster);
151                 hammer2_cluster_drop(cluster);
152         }
153         hammer2_mtx_unlock(&ip->lock);
154         hammer2_inode_drop(ip);
155 }
156
157 /*
158  * Temporarily release a lock held shared or exclusive.  Caller must
159  * hold the lock shared or exclusive on call and lock will be released
160  * on return.
161  *
162  * Restore a lock that was temporarily released.
163  */
164 hammer2_mtx_state_t
165 hammer2_inode_lock_temp_release(hammer2_inode_t *ip)
166 {
167         return hammer2_mtx_temp_release(&ip->lock);
168 }
169
170 void
171 hammer2_inode_lock_temp_restore(hammer2_inode_t *ip, hammer2_mtx_state_t ostate)
172 {
173         hammer2_mtx_temp_restore(&ip->lock, ostate);
174 }
175
176 /*
177  * Upgrade a shared inode lock to exclusive and return.  If the inode lock
178  * is already held exclusively this is a NOP.
179  *
180  * The caller MUST hold the inode lock either shared or exclusive on call
181  * and will own the lock exclusively on return.
182  *
183  * Returns non-zero if the lock was already exclusive prior to the upgrade.
184  */
185 int
186 hammer2_inode_lock_upgrade(hammer2_inode_t *ip)
187 {
188         int wasexclusive;
189
190         if (mtx_islocked_ex(&ip->lock)) {
191                 wasexclusive = 1;
192         } else {
193                 hammer2_mtx_unlock(&ip->lock);
194                 hammer2_mtx_ex(&ip->lock);
195                 wasexclusive = 0;
196         }
197         return wasexclusive;
198 }
199
200 /*
201  * Downgrade an inode lock from exclusive to shared only if the inode
202  * lock was previously shared.  If the inode lock was previously exclusive,
203  * this is a NOP.
204  */
205 void
206 hammer2_inode_lock_downgrade(hammer2_inode_t *ip, int wasexclusive)
207 {
208         if (wasexclusive == 0)
209                 mtx_downgrade(&ip->lock);
210 }
211
212 /*
213  * Lookup an inode by inode number
214  */
215 hammer2_inode_t *
216 hammer2_inode_lookup(hammer2_pfs_t *pmp, hammer2_tid_t inum)
217 {
218         hammer2_inode_t *ip;
219
220         KKASSERT(pmp);
221         if (pmp->spmp_hmp) {
222                 ip = NULL;
223         } else {
224                 hammer2_spin_ex(&pmp->inum_spin);
225                 ip = RB_LOOKUP(hammer2_inode_tree, &pmp->inum_tree, inum);
226                 if (ip)
227                         hammer2_inode_ref(ip);
228                 hammer2_spin_unex(&pmp->inum_spin);
229         }
230         return(ip);
231 }
232
233 /*
234  * Adding a ref to an inode is only legal if the inode already has at least
235  * one ref.
236  *
237  * (can be called with spinlock held)
238  */
239 void
240 hammer2_inode_ref(hammer2_inode_t *ip)
241 {
242         atomic_add_int(&ip->refs, 1);
243 }
244
245 /*
246  * Drop an inode reference, freeing the inode when the last reference goes
247  * away.
248  */
249 void
250 hammer2_inode_drop(hammer2_inode_t *ip)
251 {
252         hammer2_pfs_t *pmp;
253         hammer2_inode_t *pip;
254         u_int refs;
255
256         while (ip) {
257                 refs = ip->refs;
258                 cpu_ccfence();
259                 if (refs == 1) {
260                         /*
261                          * Transition to zero, must interlock with
262                          * the inode inumber lookup tree (if applicable).
263                          * It should not be possible for anyone to race
264                          * the transition to 0.
265                          *
266                          */
267                         pmp = ip->pmp;
268                         KKASSERT(pmp);
269                         hammer2_spin_ex(&pmp->inum_spin);
270
271                         if (atomic_cmpset_int(&ip->refs, 1, 0)) {
272                                 KKASSERT(hammer2_mtx_refs(&ip->lock) == 0);
273                                 if (ip->flags & HAMMER2_INODE_ONRBTREE) {
274                                         atomic_clear_int(&ip->flags,
275                                                      HAMMER2_INODE_ONRBTREE);
276                                         RB_REMOVE(hammer2_inode_tree,
277                                                   &pmp->inum_tree, ip);
278                                 }
279                                 hammer2_spin_unex(&pmp->inum_spin);
280
281                                 pip = ip->pip;
282                                 ip->pip = NULL;
283                                 ip->pmp = NULL;
284
285                                 /*
286                                  * Cleaning out ip->cluster isn't entirely
287                                  * trivial.
288                                  */
289                                 hammer2_inode_repoint(ip, NULL, NULL);
290
291                                 /*
292                                  * We have to drop pip (if non-NULL) to
293                                  * dispose of our implied reference from
294                                  * ip->pip.  We can simply loop on it.
295                                  */
296                                 kfree(ip, pmp->minode);
297                                 atomic_add_long(&pmp->inmem_inodes, -1);
298                                 ip = pip;
299                                 /* continue with pip (can be NULL) */
300                         } else {
301                                 hammer2_spin_unex(&ip->pmp->inum_spin);
302                         }
303                 } else {
304                         /*
305                          * Non zero transition
306                          */
307                         if (atomic_cmpset_int(&ip->refs, refs, refs - 1))
308                                 break;
309                 }
310         }
311 }
312
313 /*
314  * Get the vnode associated with the given inode, allocating the vnode if
315  * necessary.  The vnode will be returned exclusively locked.
316  *
317  * The caller must lock the inode (shared or exclusive).
318  *
319  * Great care must be taken to avoid deadlocks and vnode acquisition/reclaim
320  * races.
321  */
322 struct vnode *
323 hammer2_igetv(hammer2_inode_t *ip, hammer2_cluster_t *cparent, int *errorp)
324 {
325         const hammer2_inode_data_t *ripdata;
326         hammer2_pfs_t *pmp;
327         struct vnode *vp;
328
329         pmp = ip->pmp;
330         KKASSERT(pmp != NULL);
331         *errorp = 0;
332
333         ripdata = &hammer2_cluster_rdata(cparent)->ipdata;
334
335         for (;;) {
336                 /*
337                  * Attempt to reuse an existing vnode assignment.  It is
338                  * possible to race a reclaim so the vget() may fail.  The
339                  * inode must be unlocked during the vget() to avoid a
340                  * deadlock against a reclaim.
341                  */
342                 int wasexclusive;
343
344                 vp = ip->vp;
345                 if (vp) {
346                         /*
347                          * Inode must be unlocked during the vget() to avoid
348                          * possible deadlocks, but leave the ip ref intact.
349                          *
350                          * vnode is held to prevent destruction during the
351                          * vget().  The vget() can still fail if we lost
352                          * a reclaim race on the vnode.
353                          */
354                         hammer2_mtx_state_t ostate;
355
356                         vhold(vp);
357                         ostate = hammer2_inode_lock_temp_release(ip);
358                         if (vget(vp, LK_EXCLUSIVE)) {
359                                 vdrop(vp);
360                                 hammer2_inode_lock_temp_restore(ip, ostate);
361                                 continue;
362                         }
363                         hammer2_inode_lock_temp_restore(ip, ostate);
364                         vdrop(vp);
365                         /* vp still locked and ref from vget */
366                         if (ip->vp != vp) {
367                                 kprintf("hammer2: igetv race %p/%p\n",
368                                         ip->vp, vp);
369                                 vput(vp);
370                                 continue;
371                         }
372                         *errorp = 0;
373                         break;
374                 }
375
376                 /*
377                  * No vnode exists, allocate a new vnode.  Beware of
378                  * allocation races.  This function will return an
379                  * exclusively locked and referenced vnode.
380                  */
381                 *errorp = getnewvnode(VT_HAMMER2, pmp->mp, &vp, 0, 0);
382                 if (*errorp) {
383                         kprintf("hammer2: igetv getnewvnode failed %d\n",
384                                 *errorp);
385                         vp = NULL;
386                         break;
387                 }
388
389                 /*
390                  * Lock the inode and check for an allocation race.
391                  */
392                 wasexclusive = hammer2_inode_lock_upgrade(ip);
393                 if (ip->vp != NULL) {
394                         vp->v_type = VBAD;
395                         vx_put(vp);
396                         hammer2_inode_lock_downgrade(ip, wasexclusive);
397                         continue;
398                 }
399
400                 switch (ripdata->meta.type) {
401                 case HAMMER2_OBJTYPE_DIRECTORY:
402                         vp->v_type = VDIR;
403                         break;
404                 case HAMMER2_OBJTYPE_REGFILE:
405                         vp->v_type = VREG;
406                         vinitvmio(vp, ripdata->meta.size,
407                                   HAMMER2_LBUFSIZE,
408                                   (int)ripdata->meta.size & HAMMER2_LBUFMASK);
409                         break;
410                 case HAMMER2_OBJTYPE_SOFTLINK:
411                         /*
412                          * XXX for now we are using the generic file_read
413                          * and file_write code so we need a buffer cache
414                          * association.
415                          */
416                         vp->v_type = VLNK;
417                         vinitvmio(vp, ripdata->meta.size,
418                                   HAMMER2_LBUFSIZE,
419                                   (int)ripdata->meta.size & HAMMER2_LBUFMASK);
420                         break;
421                 case HAMMER2_OBJTYPE_CDEV:
422                         vp->v_type = VCHR;
423                         /* fall through */
424                 case HAMMER2_OBJTYPE_BDEV:
425                         vp->v_ops = &pmp->mp->mnt_vn_spec_ops;
426                         if (ripdata->meta.type != HAMMER2_OBJTYPE_CDEV)
427                                 vp->v_type = VBLK;
428                         addaliasu(vp,
429                                   ripdata->meta.rmajor,
430                                   ripdata->meta.rminor);
431                         break;
432                 case HAMMER2_OBJTYPE_FIFO:
433                         vp->v_type = VFIFO;
434                         vp->v_ops = &pmp->mp->mnt_vn_fifo_ops;
435                         break;
436                 default:
437                         panic("hammer2: unhandled objtype %d",
438                               ripdata->meta.type);
439                         break;
440                 }
441
442                 if (ip == pmp->iroot)
443                         vsetflags(vp, VROOT);
444
445                 vp->v_data = ip;
446                 ip->vp = vp;
447                 hammer2_inode_ref(ip);          /* vp association */
448                 hammer2_inode_lock_downgrade(ip, wasexclusive);
449                 break;
450         }
451
452         /*
453          * Return non-NULL vp and *errorp == 0, or NULL vp and *errorp != 0.
454          */
455         if (hammer2_debug & 0x0002) {
456                 kprintf("igetv vp %p refs 0x%08x aux 0x%08x\n",
457                         vp, vp->v_refcnt, vp->v_auxrefs);
458         }
459         return (vp);
460 }
461
462 /*
463  * Returns the inode associated with the passed-in cluster, creating the
464  * inode if necessary and synchronizing it to the passed-in cluster otherwise.
465  *
466  * The passed-in cluster must be locked and will remain locked on return.
467  * The returned inode will be locked and the caller may dispose of both
468  * via hammer2_inode_unlock_ex().  However, if the caller needs to resolve
469  * a hardlink it must ref/unlock/relock/drop the inode.
470  *
471  * The hammer2_inode structure regulates the interface between the high level
472  * kernel VNOPS API and the filesystem backend (the chains).
473  *
474  * On return the inode is locked with the supplied cluster.
475  */
476 hammer2_inode_t *
477 hammer2_inode_get(hammer2_pfs_t *pmp, hammer2_inode_t *dip,
478                   hammer2_cluster_t *cluster)
479 {
480         hammer2_inode_t *nip;
481         const hammer2_inode_data_t *iptmp;
482         const hammer2_inode_data_t *nipdata;
483
484         KKASSERT(cluster == NULL ||
485                  hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE);
486         KKASSERT(pmp);
487
488         /*
489          * Interlocked lookup/ref of the inode.  This code is only needed
490          * when looking up inodes with nlinks != 0 (TODO: optimize out
491          * otherwise and test for duplicates).
492          *
493          * Cluster can be NULL during the initial pfs allocation.
494          */
495 again:
496         while (cluster) {
497                 iptmp = &hammer2_cluster_rdata(cluster)->ipdata;
498                 nip = hammer2_inode_lookup(pmp, iptmp->meta.inum);
499                 if (nip == NULL)
500                         break;
501
502                 hammer2_mtx_ex(&nip->lock);
503
504                 /*
505                  * Handle SMP race (not applicable to the super-root spmp
506                  * which can't index inodes due to duplicative inode numbers).
507                  */
508                 if (pmp->spmp_hmp == NULL &&
509                     (nip->flags & HAMMER2_INODE_ONRBTREE) == 0) {
510                         hammer2_mtx_unlock(&nip->lock);
511                         hammer2_inode_drop(nip);
512                         continue;
513                 }
514                 hammer2_inode_repoint(nip, NULL, cluster);
515
516                 return nip;
517         }
518
519         /*
520          * We couldn't find the inode number, create a new inode.
521          */
522         nip = kmalloc(sizeof(*nip), pmp->minode, M_WAITOK | M_ZERO);
523         spin_init(&nip->cluster_spin, "h2clspin");
524         atomic_add_long(&pmp->inmem_inodes, 1);
525         hammer2_pfs_memory_inc(pmp);
526         hammer2_pfs_memory_wakeup(pmp);
527         if (pmp->spmp_hmp)
528                 nip->flags = HAMMER2_INODE_SROOT;
529
530         /*
531          * Initialize nip's cluster.  A cluster is provided for normal
532          * inodes but typically not for the super-root or PFS inodes.
533          */
534         nip->cluster.refs = 1;
535         nip->cluster.pmp = pmp;
536         nip->cluster.flags |= HAMMER2_CLUSTER_INODE;
537         if (cluster) {
538                 nipdata = &hammer2_cluster_rdata(cluster)->ipdata;
539                 nip->meta = nipdata->meta;
540                 hammer2_cluster_bref(cluster, &nip->bref);
541                 atomic_set_int(&nip->flags, HAMMER2_INODE_METAGOOD);
542                 hammer2_inode_repoint(nip, NULL, cluster);
543         } else {
544                 nip->meta.inum = 1;             /* PFS inum is always 1 XXX */
545                 /* mtime will be updated when a cluster is available */
546                 atomic_set_int(&nip->flags, HAMMER2_INODE_METAGOOD);/*XXX*/
547         }
548
549         nip->pip = dip;                         /* can be NULL */
550         if (dip)
551                 hammer2_inode_ref(dip); /* ref dip for nip->pip */
552
553         nip->pmp = pmp;
554
555         /*
556          * ref and lock on nip gives it state compatible to after a
557          * hammer2_inode_lock() call.
558          */
559         nip->refs = 1;
560         hammer2_mtx_init(&nip->lock, "h2inode");
561         hammer2_mtx_ex(&nip->lock);
562         /* combination of thread lock and chain lock == inode lock */
563
564         /*
565          * Attempt to add the inode.  If it fails we raced another inode
566          * get.  Undo all the work and try again.
567          */
568         if (pmp->spmp_hmp == NULL) {
569                 hammer2_spin_ex(&pmp->inum_spin);
570                 if (RB_INSERT(hammer2_inode_tree, &pmp->inum_tree, nip)) {
571                         hammer2_spin_unex(&pmp->inum_spin);
572                         hammer2_mtx_unlock(&nip->lock);
573                         hammer2_inode_drop(nip);
574                         goto again;
575                 }
576                 atomic_set_int(&nip->flags, HAMMER2_INODE_ONRBTREE);
577                 hammer2_spin_unex(&pmp->inum_spin);
578         }
579
580         return (nip);
581 }
582
583 /*
584  * Create a new inode in the specified directory using the vattr to
585  * figure out the type of inode.
586  *
587  * If no error occurs the new inode with its cluster locked is returned in
588  * *nipp, otherwise an error is returned and *nipp is set to NULL.
589  *
590  * If vap and/or cred are NULL the related fields are not set and the
591  * inode type defaults to a directory.  This is used when creating PFSs
592  * under the super-root, so the inode number is set to 1 in this case.
593  *
594  * dip is not locked on entry.
595  *
596  * NOTE: When used to create a snapshot, the inode is temporarily associated
597  *       with the super-root spmp. XXX should pass new pmp for snapshot.
598  */
599 hammer2_inode_t *
600 hammer2_inode_create(hammer2_trans_t *trans, hammer2_inode_t *dip,
601                      struct vattr *vap, struct ucred *cred,
602                      const uint8_t *name, size_t name_len,
603                      hammer2_cluster_t **clusterp,
604                      int flags, int *errorp)
605 {
606         const hammer2_inode_data_t *dipdata;
607         hammer2_inode_data_t *nipdata;
608         hammer2_cluster_t *cluster;
609         hammer2_cluster_t *cparent;
610         hammer2_inode_t *nip;
611         hammer2_key_t key_dummy;
612         hammer2_key_t lhc;
613         int error;
614         uid_t xuid;
615         uuid_t dip_uid;
616         uuid_t dip_gid;
617         uint32_t dip_mode;
618         uint8_t dip_comp_algo;
619         uint8_t dip_check_algo;
620
621         lhc = hammer2_dirhash(name, name_len);
622         *errorp = 0;
623
624         /*
625          * Locate the inode or indirect block to create the new
626          * entry in.  At the same time check for key collisions
627          * and iterate until we don't get one.
628          *
629          * NOTE: hidden inodes do not have iterators.
630          */
631 retry:
632         hammer2_inode_lock(dip, HAMMER2_RESOLVE_ALWAYS);
633         cparent = hammer2_inode_cluster(dip, HAMMER2_RESOLVE_ALWAYS);
634         dipdata = &hammer2_cluster_rdata(cparent)->ipdata;
635         dip_uid = dipdata->meta.uid;
636         dip_gid = dipdata->meta.gid;
637         dip_mode = dipdata->meta.mode;
638         dip_comp_algo = dipdata->meta.comp_algo;
639         dip_check_algo = dipdata->meta.check_algo;
640
641         error = 0;
642         while (error == 0) {
643                 cluster = hammer2_cluster_lookup(cparent, &key_dummy,
644                                                  lhc, lhc, 0);
645                 if (cluster == NULL)
646                         break;
647                 if ((lhc & HAMMER2_DIRHASH_VISIBLE) == 0)
648                         error = ENOSPC;
649                 if ((lhc & HAMMER2_DIRHASH_LOMASK) == HAMMER2_DIRHASH_LOMASK)
650                         error = ENOSPC;
651                 hammer2_cluster_unlock(cluster);
652                 hammer2_cluster_drop(cluster);
653                 cluster = NULL;
654                 ++lhc;
655         }
656
657         if (error == 0) {
658                 error = hammer2_cluster_create(trans, cparent, &cluster,
659                                              lhc, 0,
660                                              HAMMER2_BREF_TYPE_INODE,
661                                              HAMMER2_INODE_BYTES,
662                                              flags);
663         }
664 #if INODE_DEBUG
665         kprintf("CREATE INODE %*.*s chain=%p\n",
666                 (int)name_len, (int)name_len, name,
667                 (cluster ? cluster->focus : NULL));
668 #endif
669
670         /*
671          * Cleanup and handle retries.
672          */
673         if (error == EAGAIN) {
674                 hammer2_cluster_ref(cparent);
675                 hammer2_inode_unlock(dip, cparent);
676                 hammer2_cluster_wait(cparent);
677                 hammer2_cluster_drop(cparent);
678                 goto retry;
679         }
680         hammer2_inode_unlock(dip, cparent);
681         cparent = NULL;
682
683         if (error) {
684                 KKASSERT(cluster == NULL);
685                 *errorp = error;
686                 return (NULL);
687         }
688
689         /*
690          * Set up the new inode.
691          *
692          * NOTE: *_get() integrates chain's lock into the inode lock.
693          *
694          * NOTE: Only one new inode can currently be created per
695          *       transaction.  If the need arises we can adjust
696          *       hammer2_trans_init() to allow more.
697          *
698          * NOTE: nipdata will have chain's blockset data.
699          */
700         KKASSERT(cluster->focus->flags & HAMMER2_CHAIN_MODIFIED);
701         nipdata = &hammer2_cluster_wdata(cluster)->ipdata;
702         nipdata->meta.inum = trans->inode_tid;
703         hammer2_cluster_modsync(cluster);
704         nip = hammer2_inode_get(dip->pmp, dip, cluster);
705         nipdata = &hammer2_cluster_wdata(cluster)->ipdata;
706
707         if (vap) {
708                 KKASSERT(trans->inodes_created == 0);
709                 nipdata->meta.type = hammer2_get_obj_type(vap->va_type);
710                 nipdata->meta.inum = trans->inode_tid;
711                 ++trans->inodes_created;
712
713                 switch (nipdata->meta.type) {
714                 case HAMMER2_OBJTYPE_CDEV:
715                 case HAMMER2_OBJTYPE_BDEV:
716                         nipdata->meta.rmajor = vap->va_rmajor;
717                         nipdata->meta.rminor = vap->va_rminor;
718                         break;
719                 default:
720                         break;
721                 }
722         } else {
723                 nipdata->meta.type = HAMMER2_OBJTYPE_DIRECTORY;
724                 nipdata->meta.inum = 1;
725         }
726         
727         /* Inherit parent's inode compression mode. */
728         nip->comp_heuristic = 0;
729         nipdata->meta.comp_algo = dip_comp_algo;
730         nipdata->meta.check_algo = dip_check_algo;
731         nipdata->meta.version = HAMMER2_INODE_VERSION_ONE;
732         hammer2_update_time(&nipdata->meta.ctime);
733         nipdata->meta.mtime = nipdata->meta.ctime;
734         if (vap)
735                 nipdata->meta.mode = vap->va_mode;
736         nipdata->meta.nlinks = 1;
737         if (vap) {
738                 if (dip && dip->pmp) {
739                         xuid = hammer2_to_unix_xid(&dip_uid);
740                         xuid = vop_helper_create_uid(dip->pmp->mp,
741                                                      dip_mode,
742                                                      xuid,
743                                                      cred,
744                                                      &vap->va_mode);
745                 } else {
746                         /* super-root has no dip and/or pmp */
747                         xuid = 0;
748                 }
749                 if (vap->va_vaflags & VA_UID_UUID_VALID)
750                         nipdata->meta.uid = vap->va_uid_uuid;
751                 else if (vap->va_uid != (uid_t)VNOVAL)
752                         hammer2_guid_to_uuid(&nipdata->meta.uid, vap->va_uid);
753                 else
754                         hammer2_guid_to_uuid(&nipdata->meta.uid, xuid);
755
756                 if (vap->va_vaflags & VA_GID_UUID_VALID)
757                         nipdata->meta.gid = vap->va_gid_uuid;
758                 else if (vap->va_gid != (gid_t)VNOVAL)
759                         hammer2_guid_to_uuid(&nipdata->meta.gid, vap->va_gid);
760                 else if (dip)
761                         nipdata->meta.gid = dip_gid;
762         }
763
764         /*
765          * Regular files and softlinks allow a small amount of data to be
766          * directly embedded in the inode.  This flag will be cleared if
767          * the size is extended past the embedded limit.
768          */
769         if (nipdata->meta.type == HAMMER2_OBJTYPE_REGFILE ||
770             nipdata->meta.type == HAMMER2_OBJTYPE_SOFTLINK) {
771                 nipdata->meta.op_flags |= HAMMER2_OPFLAG_DIRECTDATA;
772         }
773
774         KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
775         bcopy(name, nipdata->filename, name_len);
776         nipdata->meta.name_key = lhc;
777         nipdata->meta.name_len = name_len;
778         nip->meta = nipdata->meta;
779         hammer2_cluster_modsync(cluster);
780         *clusterp = cluster;
781
782         return (nip);
783 }
784
785 /*
786  * The cluster has been removed from the original directory and replaced
787  * with a hardlink pointer.  Move the cluster to the specified parent
788  * directory, change the filename to "0xINODENUMBER", and adjust the key.
789  * The cluster becomes our invisible hardlink target.
790  *
791  * The original cluster must be deleted on entry.
792  */
793 static
794 void
795 hammer2_hardlink_shiftup(hammer2_trans_t *trans, hammer2_cluster_t *cluster,
796                         hammer2_inode_t *ip, hammer2_inode_t *dip,
797                         hammer2_cluster_t *dcluster,
798                         int nlinks, int *errorp)
799 {
800         const hammer2_inode_data_t *iptmp;
801         hammer2_inode_data_t *nipdata;
802         hammer2_cluster_t *xcluster;
803         hammer2_key_t key_dummy;
804         hammer2_key_t lhc;
805         hammer2_blockref_t bref;
806
807         iptmp = &hammer2_cluster_rdata(cluster)->ipdata;
808         lhc = iptmp->meta.inum;
809         KKASSERT((lhc & HAMMER2_DIRHASH_VISIBLE) == 0);
810
811         /*
812          * Locate the inode or indirect block to create the new
813          * entry in.  lhc represents the inode number so there is
814          * no collision iteration.
815          *
816          * There should be no key collisions with invisible inode keys.
817          *
818          * WARNING! Must use inode_lock_ex() on dip to handle a stale
819          *          dip->cluster cache.
820          */
821         *errorp = 0;
822         xcluster = hammer2_cluster_lookup(dcluster, &key_dummy,
823                                       lhc, lhc, 0);
824         if (xcluster) {
825                 kprintf("X3 chain %p dip %p dchain %p dip->chain %p\n",
826                         xcluster->focus, dip, dcluster->focus,
827                         dip->cluster.focus);
828                 hammer2_cluster_unlock(xcluster);
829                 hammer2_cluster_drop(xcluster);
830                 xcluster = NULL;
831                 *errorp = ENOSPC;
832 #if 0
833                 Debugger("X3");
834 #endif
835         }
836
837         /*
838          * Handle the error case
839          */
840         if (*errorp) {
841                 panic("error2");
842                 KKASSERT(xcluster == NULL);
843                 return;
844         }
845
846         /*
847          * Use xcluster as a placeholder for (lhc).  Duplicate cluster to the
848          * same target bref as xcluster and then delete xcluster.  The
849          * duplication occurs after xcluster in flush order even though
850          * xcluster is deleted after the duplication. XXX
851          *
852          * WARNING! Duplications (to a different parent) can cause indirect
853          *          blocks to be inserted, refactor xcluster.
854          *
855          * WARNING! Only key and keybits is extracted from a passed-in bref.
856          */
857         hammer2_cluster_bref(cluster, &bref);
858         bref.key = lhc;                 /* invisible dir entry key */
859         bref.keybits = 0;
860         hammer2_cluster_rename(trans, &bref, dcluster, cluster, 0);
861
862         /*
863          * cluster is now 'live' again.. adjust the filename.
864          *
865          * Directory entries are inodes but this is a hidden hardlink
866          * target.  The name isn't used but to ease debugging give it
867          * a name after its inode number.
868          */
869         hammer2_cluster_modify(trans, cluster, 0);
870         nipdata = &hammer2_cluster_wdata(cluster)->ipdata;
871         ksnprintf(nipdata->filename, sizeof(nipdata->filename),
872                   "0x%016jx", (intmax_t)nipdata->meta.inum);
873         nipdata->meta.name_len = strlen(nipdata->filename);
874         nipdata->meta.name_key = lhc;
875         nipdata->meta.nlinks += nlinks;
876
877         /*
878          * Resync ip->meta.  Some fields have to be retained.
879          */
880         nipdata->meta.size = ip->meta.size;
881         nipdata->meta.mtime = ip->meta.mtime;
882         ip->meta = nipdata->meta;
883
884         hammer2_cluster_modsync(cluster);
885 }
886
887 /*
888  * Connect the target inode represented by (cluster) to the media topology
889  * at (dip, name, len).  The caller can pass a rough *chainp, this function
890  * will issue lookup()s to position the parent chain properly for the
891  * chain insertion.
892  *
893  * If hlink is TRUE this function creates an OBJTYPE_HARDLINK directory
894  * entry instead of connecting (cluster).
895  *
896  * If hlink is FALSE this function expects (cluster) to be unparented.
897  */
898 int
899 hammer2_inode_connect(hammer2_trans_t *trans,
900                       hammer2_inode_t *ip, hammer2_cluster_t **clusterp,
901                       int hlink,
902                       hammer2_inode_t *dip, hammer2_cluster_t *dcluster,
903                       const uint8_t *name, size_t name_len,
904                       hammer2_key_t lhc)
905 {
906         hammer2_inode_data_t *wipdata;
907         hammer2_cluster_t *ocluster;
908         hammer2_cluster_t *ncluster;
909         hammer2_key_t key_dummy;
910         int error;
911
912         /*
913          * Since ocluster is either disconnected from the topology or
914          * represents a hardlink terminus which is always a parent of or
915          * equal to dip, we should be able to safely lock dip->chain for
916          * our setup.
917          *
918          * WARNING! Must use inode_lock_ex() on dip to handle a stale
919          *          dip->cluster.
920          *
921          * If name is non-NULL we calculate lhc, else we use the passed-in
922          * lhc.
923          */
924         ocluster = *clusterp;
925
926         if (name) {
927                 lhc = hammer2_dirhash(name, name_len);
928
929                 /*
930                  * Locate the inode or indirect block to create the new
931                  * entry in.  At the same time check for key collisions
932                  * and iterate until we don't get one.
933                  */
934                 error = 0;
935                 while (error == 0) {
936                         ncluster = hammer2_cluster_lookup(dcluster, &key_dummy,
937                                                       lhc, lhc, 0);
938                         if (ncluster == NULL)
939                                 break;
940                         if ((lhc & HAMMER2_DIRHASH_LOMASK) ==
941                             HAMMER2_DIRHASH_LOMASK) {
942                                 error = ENOSPC;
943                         }
944                         hammer2_cluster_unlock(ncluster);
945                         hammer2_cluster_drop(ncluster);
946                         ncluster = NULL;
947                         ++lhc;
948                 }
949         } else {
950                 /*
951                  * Reconnect to specific key (used when moving
952                  * unlinked-but-open files into the hidden directory).
953                  */
954                 ncluster = hammer2_cluster_lookup(dcluster, &key_dummy,
955                                                   lhc, lhc, 0);
956                 KKASSERT(ncluster == NULL);
957                 error = 0;
958         }
959
960         if (error == 0) {
961                 if (hlink) {
962                         /*
963                          * Hardlink pointer needed, create totally fresh
964                          * directory entry.
965                          *
966                          * We must refactor ocluster because it might have
967                          * been shifted into an indirect cluster by the
968                          * create.
969                          */
970                         KKASSERT(ncluster == NULL);
971                         error = hammer2_cluster_create(trans,
972                                                        dcluster, &ncluster,
973                                                        lhc, 0,
974                                                        HAMMER2_BREF_TYPE_INODE,
975                                                        HAMMER2_INODE_BYTES,
976                                                        0);
977                 } else {
978                         /*
979                          * Reconnect the original cluster under the new name.
980                          * Original cluster must have already been deleted by
981                          * teh caller.
982                          *
983                          * WARNING! Can cause held-over clusters to require a
984                          *          refactor.  Fortunately we have none (our
985                          *          locked clusters are passed into and
986                          *          modified by the call).
987                          */
988                         ncluster = ocluster;
989                         ocluster = NULL;
990                         error = hammer2_cluster_create(trans,
991                                                        dcluster, &ncluster,
992                                                        lhc, 0,
993                                                        HAMMER2_BREF_TYPE_INODE,
994                                                        HAMMER2_INODE_BYTES,
995                                                        0);
996                 }
997         }
998
999         /*
1000          * Unlock stuff.
1001          */
1002         KKASSERT(error != EAGAIN);
1003
1004         /*
1005          * ncluster should be NULL on error, leave ocluster
1006          * (ocluster == *clusterp) alone.
1007          */
1008         if (error) {
1009                 KKASSERT(ncluster == NULL);
1010                 return (error);
1011         }
1012
1013         /*
1014          * Directory entries are inodes so if the name has changed we have
1015          * to update the inode.
1016          *
1017          * When creating an OBJTYPE_HARDLINK entry remember to unlock the
1018          * cluster, the caller will access the hardlink via the actual hardlink
1019          * target file and not the hardlink pointer entry, so we must still
1020          * return ocluster.
1021          */
1022         if (hlink && hammer2_hardlink_enable >= 0) {
1023                 /*
1024                  * Create the HARDLINK pointer.  oip represents the hardlink
1025                  * target in this situation.
1026                  *
1027                  * We will return ocluster (the hardlink target).
1028                  */
1029                 hammer2_cluster_modify(trans, ncluster, 0);
1030                 KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
1031                 wipdata = &hammer2_cluster_wdata(ncluster)->ipdata;
1032                 bcopy(name, wipdata->filename, name_len);
1033                 wipdata->meta.name_key = lhc;
1034                 wipdata->meta.name_len = name_len;
1035                 wipdata->meta.target_type =
1036                             hammer2_cluster_rdata(ocluster)->ipdata.meta.type;
1037                 wipdata->meta.type = HAMMER2_OBJTYPE_HARDLINK;
1038                 wipdata->meta.inum =
1039                             hammer2_cluster_rdata(ocluster)->ipdata.meta.inum;
1040                 wipdata->meta.version = HAMMER2_INODE_VERSION_ONE;
1041                 wipdata->meta.nlinks = 1;
1042                 wipdata->meta.op_flags = HAMMER2_OPFLAG_DIRECTDATA;
1043                 hammer2_cluster_modsync(ncluster);
1044                 hammer2_cluster_unlock(ncluster);
1045                 hammer2_cluster_drop(ncluster);
1046                 ncluster = ocluster;
1047                 ocluster = NULL;
1048         } else {
1049                 /*
1050                  * ncluster is a duplicate of ocluster at the new location.
1051                  * We must fixup the name stored in the inode data.
1052                  * The bref key has already been adjusted by inode_connect().
1053                  */
1054                 hammer2_cluster_modify(trans, ncluster, 0);
1055                 wipdata = &hammer2_cluster_wdata(ncluster)->ipdata;
1056
1057                 KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
1058                 bcopy(name, wipdata->filename, name_len);
1059                 wipdata->meta.name_key = lhc;
1060                 wipdata->meta.name_len = name_len;
1061                 wipdata->meta.nlinks = 1;
1062                 hammer2_cluster_modsync(ncluster);
1063
1064                 /*
1065                  * Resync the in-memory inode, some fields must be retained.
1066                  */
1067                 if (ip) {       /* XXX move_to_hidden passes NULL */
1068                         wipdata->meta.size = ip->meta.size;
1069                         wipdata->meta.mtime = ip->meta.mtime;
1070                         ip->meta = wipdata->meta;
1071                 }
1072         }
1073
1074         /*
1075          * We are replacing ocluster with ncluster, unlock ocluster.  In the
1076          * case where ocluster is left unchanged the code above sets
1077          * ncluster to ocluster and ocluster to NULL, resulting in a NOP here.
1078          */
1079         if (ocluster) {
1080                 hammer2_cluster_unlock(ocluster);
1081                 hammer2_cluster_drop(ocluster);
1082         }
1083         *clusterp = ncluster;
1084
1085         return (0);
1086 }
1087
1088 /*
1089  * Repoint ip->cluster's chains to cluster's chains and fixup the default
1090  * focus.  Only valid elements are repointed.  Invalid elements have to be
1091  * adjusted by the appropriate slave sync threads.
1092  *
1093  * Caller must hold the inode and cluster exclusive locked, if not NULL,
1094  * must also be locked.
1095  *
1096  * Cluster may be NULL to clean out any chains in ip->cluster.
1097  */
1098 void
1099 hammer2_inode_repoint(hammer2_inode_t *ip, hammer2_inode_t *pip,
1100                       hammer2_cluster_t *cluster)
1101 {
1102         hammer2_chain_t *dropch[HAMMER2_MAXCLUSTER];
1103         hammer2_chain_t *ochain;
1104         hammer2_chain_t *nchain;
1105         hammer2_inode_t *opip;
1106         int i;
1107
1108         bzero(dropch, sizeof(dropch));
1109
1110         /*
1111          * Replace chains in ip->cluster with chains from cluster and
1112          * adjust the focus if necessary.
1113          *
1114          * NOTE: nchain and/or ochain can be NULL due to gaps
1115          *       in the cluster arrays.
1116          */
1117         hammer2_spin_ex(&ip->cluster_spin);
1118         for (i = 0; cluster && i < cluster->nchains; ++i) {
1119                 /*
1120                  * Do not replace invalid elements as this might race
1121                  * syncthr replacements.
1122                  */
1123                 if (cluster->array[i].flags & HAMMER2_CITEM_INVALID)
1124                         continue;
1125
1126                 /*
1127                  * Do not replace elements which are the same.  Also handle
1128                  * element count discrepancies.
1129                  */
1130                 nchain = cluster->array[i].chain;
1131                 if (i < ip->cluster.nchains) {
1132                         ochain = ip->cluster.array[i].chain;
1133                         if (ochain == nchain)
1134                                 continue;
1135                 } else {
1136                         ochain = NULL;
1137                 }
1138
1139                 /*
1140                  * Make adjustments
1141                  */
1142                 ip->cluster.array[i].chain = nchain;
1143                 ip->cluster.array[i].flags &= ~HAMMER2_CITEM_INVALID;
1144                 ip->cluster.array[i].flags |= cluster->array[i].flags &
1145                                               HAMMER2_CITEM_INVALID;
1146                 if (nchain)
1147                         hammer2_chain_ref(nchain);
1148                 dropch[i] = ochain;
1149         }
1150
1151         /*
1152          * Release any left-over chains in ip->cluster.
1153          */
1154         while (i < ip->cluster.nchains) {
1155                 nchain = ip->cluster.array[i].chain;
1156                 if (nchain) {
1157                         ip->cluster.array[i].chain = NULL;
1158                         ip->cluster.array[i].flags |= HAMMER2_CITEM_INVALID;
1159                 }
1160                 dropch[i] = nchain;
1161                 ++i;
1162         }
1163
1164         /*
1165          * Fixup fields.  Note that the inode-embedded cluster is never
1166          * directly locked.
1167          */
1168         if (cluster) {
1169                 ip->cluster.nchains = cluster->nchains;
1170                 ip->cluster.focus = cluster->focus;
1171                 ip->cluster.flags = cluster->flags & ~HAMMER2_CLUSTER_LOCKED;
1172         } else {
1173                 ip->cluster.nchains = 0;
1174                 ip->cluster.focus = NULL;
1175                 ip->cluster.flags &= ~HAMMER2_CLUSTER_ZFLAGS;
1176         }
1177
1178         /*
1179          * Repoint ip->pip if requested (non-NULL pip).
1180          */
1181         if (pip && ip->pip != pip) {
1182                 opip = ip->pip;
1183                 hammer2_inode_ref(pip);
1184                 ip->pip = pip;
1185         } else {
1186                 opip = NULL;
1187         }
1188         hammer2_spin_unex(&ip->cluster_spin);
1189
1190         /*
1191          * Cleanup outside of spinlock
1192          */
1193         while (--i >= 0) {
1194                 if (dropch[i])
1195                         hammer2_chain_drop(dropch[i]);
1196         }
1197         if (opip)
1198                 hammer2_inode_drop(opip);
1199 }
1200
1201 /*
1202  * Repoint a single element from the cluster to the ip.  Used by the
1203  * synchronization threads to piecemeal update inodes.  Does not change
1204  * focus and requires inode to be re-locked to clean-up flags (XXX).
1205  */
1206 void
1207 hammer2_inode_repoint_one(hammer2_inode_t *ip, hammer2_cluster_t *cluster,
1208                           int idx)
1209 {
1210         hammer2_chain_t *ochain;
1211         hammer2_chain_t *nchain;
1212         int i;
1213
1214         hammer2_spin_ex(&ip->cluster_spin);
1215         KKASSERT(idx < cluster->nchains);
1216         if (idx < ip->cluster.nchains) {
1217                 ochain = ip->cluster.array[idx].chain;
1218                 nchain = cluster->array[idx].chain;
1219         } else {
1220                 ochain = NULL;
1221                 nchain = cluster->array[idx].chain;
1222                 ip->cluster.nchains = idx + 1;
1223                 for (i = ip->cluster.nchains; i <= idx; ++i) {
1224                         bzero(&ip->cluster.array[i],
1225                               sizeof(ip->cluster.array[i]));
1226                         ip->cluster.array[i].flags |= HAMMER2_CITEM_INVALID;
1227                 }
1228         }
1229         if (ochain != nchain) {
1230                 /*
1231                  * Make adjustments.
1232                  */
1233                 ip->cluster.array[idx].chain = nchain;
1234                 ip->cluster.array[idx].flags &= ~HAMMER2_CITEM_INVALID;
1235                 ip->cluster.array[idx].flags |= cluster->array[idx].flags &
1236                                                 HAMMER2_CITEM_INVALID;
1237         }
1238         hammer2_spin_unex(&ip->cluster_spin);
1239         if (ochain != nchain) {
1240                 if (nchain)
1241                         hammer2_chain_ref(nchain);
1242                 if (ochain)
1243                         hammer2_chain_drop(ochain);
1244         }
1245 }
1246
1247 /*
1248  * Unlink the file from the specified directory inode.  The directory inode
1249  * does not need to be locked.
1250  *
1251  * isdir determines whether a directory/non-directory check should be made.
1252  * No check is made if isdir is set to -1.
1253  *
1254  * isopen specifies whether special unlink-with-open-descriptor handling
1255  * must be performed.  If set to -1 the caller is deleting a PFS and we
1256  * check whether the chain is mounted or not (chain->pmp != NULL).  1 is
1257  * implied if it is mounted.
1258  *
1259  * If isopen is 1 and nlinks drops to 0 this function must move the chain
1260  * to a special hidden directory until last-close occurs on the file.
1261  *
1262  * NOTE!  The underlying file can still be active with open descriptors
1263  *        or if the chain is being manually held (e.g. for rename).
1264  *
1265  *        The caller is responsible for fixing up ip->chain if e.g. a
1266  *        rename occurs (see chain_duplicate()).
1267  *
1268  * NOTE!  The chain is not deleted if it is moved to the hidden directory,
1269  *        but otherwise will be deleted.
1270  */
1271 int
1272 hammer2_unlink_file(hammer2_trans_t *trans, hammer2_inode_t *dip,
1273                     const uint8_t *name, size_t name_len,
1274                     int isdir, int *hlinkp, struct nchandle *nch,
1275                     int nlinks)
1276 {
1277         const hammer2_inode_data_t *ripdata;
1278         hammer2_inode_data_t *wipdata;
1279         hammer2_cluster_t *cparent;
1280         hammer2_cluster_t *hcluster;
1281         hammer2_cluster_t *hparent;
1282         hammer2_cluster_t *cluster;
1283         hammer2_cluster_t *dparent;
1284         hammer2_cluster_t *dcluster;
1285         hammer2_key_t key_dummy;
1286         hammer2_key_t key_next;
1287         hammer2_key_t lhc;
1288         int last_link;
1289         int error;
1290         int hlink;
1291         uint8_t type;
1292
1293         error = 0;
1294         hlink = 0;
1295         hcluster = NULL;
1296         hparent = NULL;
1297         lhc = hammer2_dirhash(name, name_len);
1298
1299 again:
1300         /*
1301          * Search for the filename in the directory
1302          */
1303         hammer2_inode_lock(dip, HAMMER2_RESOLVE_ALWAYS);
1304         cparent = hammer2_inode_cluster(dip, HAMMER2_RESOLVE_ALWAYS);
1305         cluster = hammer2_cluster_lookup(cparent, &key_next,
1306                                      lhc, lhc + HAMMER2_DIRHASH_LOMASK, 0);
1307         while (cluster) {
1308                 if (hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE) {
1309                         ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
1310                         if (ripdata->meta.name_len == name_len &&
1311                             bcmp(ripdata->filename, name, name_len) == 0) {
1312                                 break;
1313                         }
1314                 }
1315                 cluster = hammer2_cluster_next(cparent, cluster, &key_next,
1316                                                key_next,
1317                                                lhc + HAMMER2_DIRHASH_LOMASK,
1318                                                0);
1319         }
1320         hammer2_inode_unlock(dip, NULL);        /* retain cparent */
1321
1322         /*
1323          * Not found or wrong type (isdir < 0 disables the type check).
1324          * If a hardlink pointer, type checks use the hardlink target.
1325          */
1326         if (cluster == NULL) {
1327                 error = ENOENT;
1328                 goto done;
1329         }
1330         ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
1331         type = ripdata->meta.type;
1332         if (type == HAMMER2_OBJTYPE_HARDLINK) {
1333                 hlink = 1;
1334                 type = ripdata->meta.target_type;
1335         }
1336
1337         if (type == HAMMER2_OBJTYPE_DIRECTORY && isdir == 0) {
1338                 error = ENOTDIR;
1339                 goto done;
1340         }
1341         if (type != HAMMER2_OBJTYPE_DIRECTORY && isdir >= 1) {
1342                 error = EISDIR;
1343                 goto done;
1344         }
1345
1346         /*
1347          * Hardlink must be resolved.  We can't hold the parent locked
1348          * while we do this or we could deadlock.  The physical file will
1349          * be located at or above the current directory.
1350          *
1351          * We loop to reacquire the hardlink origination.
1352          *
1353          * NOTE: hammer2_hardlink_find() will locate the hardlink target,
1354          *       returning a modified hparent and hcluster.
1355          */
1356         if (ripdata->meta.type == HAMMER2_OBJTYPE_HARDLINK) {
1357                 if (hcluster == NULL) {
1358                         hcluster = cluster;
1359                         cluster = NULL; /* safety */
1360                         hammer2_cluster_unlock(cparent);
1361                         hammer2_cluster_drop(cparent);
1362                         cparent = NULL; /* safety */
1363                         ripdata = NULL; /* safety (associated w/cparent) */
1364                         error = hammer2_hardlink_find(dip, &hparent, &hcluster);
1365
1366                         /*
1367                          * If we couldn't find the hardlink target then some
1368                          * parent directory containing the hardlink pointer
1369                          * probably got renamed to above the original target,
1370                          * a case not yet handled by H2.
1371                          */
1372                         if (error) {
1373                                 kprintf("H2 unlink_file: hardlink target for "
1374                                         "\"%s\" not found\n",
1375                                         name);
1376                                 kprintf("(likely due to known directory "
1377                                         "rename bug)\n");
1378                                 goto done;
1379                         }
1380                         goto again;
1381                 }
1382         }
1383
1384         /*
1385          * If this is a directory the directory must be empty.  However, if
1386          * isdir < 0 we are doing a rename and the directory does not have
1387          * to be empty, and if isdir > 1 we are deleting a PFS/snapshot
1388          * and the directory does not have to be empty.
1389          *
1390          * NOTE: We check the full key range here which covers both visible
1391          *       and invisible entries.  Theoretically there should be no
1392          *       invisible (hardlink target) entries if there are no visible
1393          *       entries.
1394          */
1395         if (type == HAMMER2_OBJTYPE_DIRECTORY && isdir == 1) {
1396                 dparent = hammer2_cluster_lookup_init(cluster, 0);
1397                 dcluster = hammer2_cluster_lookup(dparent, &key_dummy,
1398                                                   0, (hammer2_key_t)-1,
1399                                                   HAMMER2_LOOKUP_NODATA);
1400                 if (dcluster) {
1401                         hammer2_cluster_unlock(dcluster);
1402                         hammer2_cluster_drop(dcluster);
1403                         hammer2_cluster_lookup_done(dparent);
1404                         error = ENOTEMPTY;
1405                         goto done;
1406                 }
1407                 hammer2_cluster_lookup_done(dparent);
1408                 dparent = NULL;
1409                 /* dcluster NULL */
1410         }
1411
1412         /*
1413          * If this was a hardlink then (cparent, cluster) is the hardlink
1414          * pointer, which we can simply destroy outright.  Discard the
1415          * clusters and replace with the hardlink target.
1416          */
1417         if (hcluster) {
1418                 hammer2_cluster_delete(trans, cparent, cluster,
1419                                        HAMMER2_DELETE_PERMANENT);
1420                 hammer2_cluster_unlock(cparent);
1421                 hammer2_cluster_drop(cparent);
1422                 hammer2_cluster_unlock(cluster);
1423                 hammer2_cluster_drop(cluster);
1424                 cparent = hparent;
1425                 cluster = hcluster;
1426                 hparent = NULL;
1427                 hcluster = NULL;
1428         }
1429
1430         /*
1431          * This leaves us with the hardlink target or non-hardlinked file
1432          * or directory in (cparent, cluster).
1433          *
1434          * Delete the target when nlinks reaches 0 with special handling
1435          * to avoid I/O (to avoid actually updating the inode) for the 1->0
1436          * transition, if possible.  This optimization makes rm -rf very
1437          * fast.
1438          *
1439          * NOTE! In DragonFly the vnops function calls cache_unlink() after
1440          *       calling us here to clean out the namecache association,
1441          *       (which does not represent a ref for the open-test), and to
1442          *       force finalization of the vnode if/when the last ref gets
1443          *       dropped.
1444          *
1445          * NOTE! Files are unlinked by rename and then relinked.  nch will be
1446          *       passed as NULL in this situation.  hammer2_inode_connect()
1447          *       will bump nlinks.
1448          */
1449         KKASSERT(cluster != NULL);
1450
1451         /*
1452          * Note: nlinks is negative when decrementing, positive when
1453          *       incrementing.
1454          */
1455         ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
1456         last_link = (ripdata->meta.nlinks + nlinks == 0);
1457
1458         if (last_link) {
1459                 /*
1460                  * Target nlinks has reached 0, file now unlinked (but may
1461                  * still be open).
1462                  *
1463                  * nlinks will be -1 for a normal remove().  If this is the
1464                  * last link we must flag the inode on deactivation. XXX race ?
1465                  */
1466                 hammer2_inode_t *ip;
1467
1468                 if (nlinks == -1) {
1469                         ip = hammer2_inode_lookup(trans->pmp,
1470                                                   ripdata->meta.inum);
1471                         if (ip) {
1472                                 atomic_set_int(&ip->flags,
1473                                                HAMMER2_INODE_ISUNLINKED);
1474                                 hammer2_inode_drop(ip);
1475                         }
1476                 }
1477
1478                 if (nch && cache_isopen(nch)) {
1479                         /*
1480                          * If an unlinked file is still open we must update
1481                          * the inodes link count.
1482                          */
1483                         hammer2_cluster_modify(trans, cluster, 0);
1484                         wipdata = &hammer2_cluster_wdata(cluster)->ipdata;
1485                         ripdata = wipdata;
1486                         wipdata->meta.nlinks += nlinks;
1487                         /* XXX race */
1488                         /* XXX debugging */
1489                         if ((int64_t)wipdata->meta.nlinks < 0) {
1490                                 wipdata->meta.nlinks = 0;
1491                         }
1492                         hammer2_cluster_modsync(cluster);
1493                         hammer2_inode_move_to_hidden(trans, &cparent, &cluster,
1494                                                      wipdata->meta.inum);
1495                 } else {
1496                         /*
1497                          * This won't get everything if a vnode is still
1498                          * present, but the cache_unlink() call the caller
1499                          * makes will.
1500                          */
1501                         hammer2_cluster_delete(trans, cparent, cluster,
1502                                                HAMMER2_DELETE_PERMANENT);
1503                 }
1504         } else if (hlink == 0) {
1505                 /*
1506                  * In this situation a normal non-hardlinked file (which can
1507                  * only have nlinks == 1) still has a non-zero nlinks, the
1508                  * caller must be doing a RENAME operation and so is passing
1509                  * a nlinks adjustment of 0, and only wishes to remove file
1510                  * in order to be able to reconnect it under a different name.
1511                  *
1512                  * In this situation we do a temporary deletion of the
1513                  * chain in order to allow the file to be reconnected in
1514                  * a different location.
1515                  */
1516                 KKASSERT(nlinks == 0);
1517                 hammer2_cluster_delete(trans, cparent, cluster, 0);
1518         } else {
1519                 /*
1520                  * Links remain, must update the inode link count.
1521                  */
1522                 hammer2_cluster_modify(trans, cluster, 0);
1523                 wipdata = &hammer2_cluster_wdata(cluster)->ipdata;
1524                 ripdata = wipdata;
1525                 wipdata->meta.nlinks += nlinks;
1526                 /* XXX debugging */
1527                 if ((int64_t)wipdata->meta.nlinks < 0) {
1528                         wipdata->meta.nlinks = 0;
1529                 }
1530                 hammer2_cluster_modsync(cluster);
1531         }
1532
1533         error = 0;
1534 done:
1535         if (cparent) {
1536                 hammer2_cluster_unlock(cparent);
1537                 hammer2_cluster_drop(cparent);
1538         }
1539         if (cluster) {
1540                 hammer2_cluster_unlock(cluster);
1541                 hammer2_cluster_drop(cluster);
1542         }
1543         if (hparent) {
1544                 hammer2_cluster_unlock(hparent);
1545                 hammer2_cluster_drop(hparent);
1546         }
1547         if (hcluster) {
1548                 hammer2_cluster_unlock(hcluster);
1549                 hammer2_cluster_drop(hcluster);
1550         }
1551         if (hlinkp)
1552                 *hlinkp = hlink;
1553
1554         return error;
1555 }
1556
1557 /*
1558  * This is called from the mount code to initialize pmp->ihidden
1559  */
1560 void
1561 hammer2_inode_install_hidden(hammer2_pfs_t *pmp)
1562 {
1563         hammer2_trans_t trans;
1564         hammer2_cluster_t *cparent;
1565         hammer2_cluster_t *cluster;
1566         hammer2_cluster_t *scan;
1567         const hammer2_inode_data_t *ripdata;
1568         hammer2_inode_data_t *wipdata;
1569         hammer2_key_t key_dummy;
1570         hammer2_key_t key_next;
1571         int error;
1572         int count;
1573         int dip_check_algo;
1574         int dip_comp_algo;
1575
1576         if (pmp->ihidden)
1577                 return;
1578
1579         /*
1580          * Find the hidden directory
1581          */
1582         bzero(&key_dummy, sizeof(key_dummy));
1583         hammer2_trans_init(&trans, pmp, 0);
1584
1585         /*
1586          * Setup for lookup, retrieve iroot's check and compression
1587          * algorithm request which was likely generated by newfs_hammer2.
1588          *
1589          * The check/comp fields will probably never be used since inodes
1590          * are renamed into the hidden directory and not created relative to
1591          * the hidden directory, chain creation inherits from bref.methods,
1592          * and data chains inherit from their respective file inode *_algo
1593          * fields.
1594          */
1595         hammer2_inode_lock(pmp->iroot, HAMMER2_RESOLVE_ALWAYS);
1596         cparent = hammer2_inode_cluster(pmp->iroot, HAMMER2_RESOLVE_ALWAYS);
1597         ripdata = &hammer2_cluster_rdata(cparent)->ipdata;
1598         dip_check_algo = ripdata->meta.check_algo;
1599         dip_comp_algo = ripdata->meta.comp_algo;
1600         ripdata = NULL;
1601
1602         cluster = hammer2_cluster_lookup(cparent, &key_dummy,
1603                                          HAMMER2_INODE_HIDDENDIR,
1604                                          HAMMER2_INODE_HIDDENDIR,
1605                                          0);
1606         if (cluster) {
1607                 pmp->ihidden = hammer2_inode_get(pmp, pmp->iroot, cluster);
1608                 hammer2_inode_ref(pmp->ihidden);
1609
1610                 /*
1611                  * Remove any unlinked files which were left open as-of
1612                  * any system crash.
1613                  *
1614                  * Don't pass NODATA, we need the inode data so the delete
1615                  * can do proper statistics updates.
1616                  */
1617                 count = 0;
1618                 scan = hammer2_cluster_lookup(cluster, &key_next,
1619                                               0, HAMMER2_TID_MAX, 0);
1620                 while (scan) {
1621                         if (hammer2_cluster_type(scan) ==
1622                             HAMMER2_BREF_TYPE_INODE) {
1623                                 hammer2_cluster_delete(&trans, cluster, scan,
1624                                                    HAMMER2_DELETE_PERMANENT);
1625                                 ++count;
1626                         }
1627                         scan = hammer2_cluster_next(cluster, scan, &key_next,
1628                                                     0, HAMMER2_TID_MAX, 0);
1629                 }
1630
1631                 hammer2_inode_unlock(pmp->ihidden, cluster);
1632                 hammer2_inode_unlock(pmp->iroot, cparent);
1633                 hammer2_trans_done(&trans);
1634                 kprintf("hammer2: PFS loaded hidden dir, "
1635                         "removed %d dead entries\n", count);
1636                 return;
1637         }
1638
1639         /*
1640          * Create the hidden directory
1641          */
1642         error = hammer2_cluster_create(&trans, cparent, &cluster,
1643                                        HAMMER2_INODE_HIDDENDIR, 0,
1644                                        HAMMER2_BREF_TYPE_INODE,
1645                                        HAMMER2_INODE_BYTES,
1646                                        0);
1647         hammer2_inode_unlock(pmp->iroot, cparent);
1648
1649         hammer2_cluster_modify(&trans, cluster, 0);
1650         wipdata = &hammer2_cluster_wdata(cluster)->ipdata;
1651         wipdata->meta.type = HAMMER2_OBJTYPE_DIRECTORY;
1652         wipdata->meta.inum = HAMMER2_INODE_HIDDENDIR;
1653         wipdata->meta.nlinks = 1;
1654         wipdata->meta.comp_algo = dip_comp_algo;
1655         wipdata->meta.check_algo = dip_check_algo;
1656         hammer2_cluster_modsync(cluster);
1657         kprintf("hammer2: PFS root missing hidden directory, creating\n");
1658
1659         pmp->ihidden = hammer2_inode_get(pmp, pmp->iroot, cluster);
1660         hammer2_inode_ref(pmp->ihidden);
1661         hammer2_inode_unlock(pmp->ihidden, cluster);
1662         hammer2_trans_done(&trans);
1663 }
1664
1665 /*
1666  * If an open file is unlinked H2 needs to retain the file in the topology
1667  * to ensure that its backing store is not recovered by the bulk free scan.
1668  * This also allows us to avoid having to special-case the CHAIN_DELETED flag.
1669  *
1670  * To do this the file is moved to a hidden directory in the PFS root and
1671  * renamed.  The hidden directory must be created if it does not exist.
1672  */
1673 static
1674 void
1675 hammer2_inode_move_to_hidden(hammer2_trans_t *trans,
1676                              hammer2_cluster_t **cparentp,
1677                              hammer2_cluster_t **clusterp,
1678                              hammer2_tid_t inum)
1679 {
1680         hammer2_cluster_t *dcluster;
1681         hammer2_pfs_t *pmp;
1682         int error;
1683
1684         pmp = (*clusterp)->pmp;
1685         KKASSERT(pmp != NULL);
1686         KKASSERT(pmp->ihidden != NULL);
1687
1688         hammer2_cluster_delete(trans, *cparentp, *clusterp, 0);
1689         hammer2_inode_lock(pmp->ihidden, HAMMER2_RESOLVE_ALWAYS);
1690         dcluster = hammer2_inode_cluster(pmp->ihidden, HAMMER2_RESOLVE_ALWAYS);
1691         error = hammer2_inode_connect(trans,
1692                                       NULL/*XXX*/, clusterp, 0,
1693                                       pmp->ihidden, dcluster,
1694                                       NULL, 0, inum);
1695         hammer2_inode_unlock(pmp->ihidden, dcluster);
1696         KKASSERT(error == 0);
1697 }
1698
1699 /*
1700  * Given an exclusively locked inode and cluster we consolidate the cluster
1701  * for hardlink creation, adding (nlinks) to the file's link count and
1702  * potentially relocating the inode to (cdip) which is a parent directory
1703  * common to both the current location of the inode and the intended new
1704  * hardlink.
1705  *
1706  * Replaces (*clusterp) if consolidation occurred, unlocking the old cluster
1707  * and returning a new locked cluster.
1708  *
1709  * NOTE!  This function will also replace ip->cluster.
1710  */
1711 int
1712 hammer2_hardlink_consolidate(hammer2_trans_t *trans,
1713                              hammer2_inode_t *ip,
1714                              hammer2_cluster_t **clusterp,
1715                              hammer2_inode_t *cdip,
1716                              hammer2_cluster_t *cdcluster,
1717                              int nlinks)
1718 {
1719         const hammer2_inode_data_t *ripdata;
1720         hammer2_inode_data_t *wipdata;
1721         hammer2_cluster_t *cluster;
1722         hammer2_cluster_t *cparent;
1723         int error;
1724
1725         cluster = *clusterp;
1726         ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
1727         if (nlinks == 0 &&                      /* no hardlink needed */
1728             (ripdata->meta.name_key & HAMMER2_DIRHASH_VISIBLE)) {
1729                 return (0);
1730         }
1731
1732         if (hammer2_hardlink_enable == 0) {     /* disallow hardlinks */
1733                 hammer2_cluster_unlock(cluster);
1734                 hammer2_cluster_drop(cluster);
1735                 *clusterp = NULL;
1736                 return (ENOTSUP);
1737         }
1738
1739         cparent = NULL;
1740
1741         /*
1742          * If no change in the hardlink's target directory is required and
1743          * this is already a hardlink target, all we need to do is adjust
1744          * the link count.
1745          */
1746         ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
1747         if (cdip == ip->pip &&
1748             (ripdata->meta.name_key & HAMMER2_DIRHASH_VISIBLE) == 0) {
1749                 if (nlinks) {
1750                         hammer2_cluster_modify(trans, cluster, 0);
1751                         wipdata = &hammer2_cluster_wdata(cluster)->ipdata;
1752                         wipdata->meta.nlinks += nlinks;
1753                         hammer2_cluster_modsync(cluster);
1754                         ripdata = wipdata;
1755                 }
1756                 error = 0;
1757                 goto done;
1758         }
1759
1760         /*
1761          * Cluster is the real inode.  The originating directory is locked
1762          * by the caller so we can manipulate it without worrying about races
1763          * against other lookups.
1764          *
1765          * If cluster is visible we need to delete it from the current
1766          * location and create a hardlink pointer in its place.  If it is
1767          * not visible we need only delete it.  Then later cluster will be
1768          * renamed to a parent directory and converted (if necessary) to
1769          * a hidden inode (via shiftup).
1770          *
1771          * NOTE! We must hold cparent locked through the delete/create/rename
1772          *       operation to ensure that other threads block resolving to
1773          *       the same hardlink, otherwise the other threads may not see
1774          *       the hardlink.
1775          */
1776         KKASSERT((cluster->focus->flags & HAMMER2_CHAIN_DELETED) == 0);
1777         cparent = hammer2_cluster_parent(cluster);
1778
1779         hammer2_cluster_delete(trans, cparent, cluster, 0);
1780
1781         ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
1782         KKASSERT(ripdata->meta.type != HAMMER2_OBJTYPE_HARDLINK);
1783         if (ripdata->meta.name_key & HAMMER2_DIRHASH_VISIBLE) {
1784                 hammer2_cluster_t *ncluster;
1785                 hammer2_key_t lhc;
1786
1787                 ncluster = NULL;
1788                 lhc = cluster->focus->bref.key;
1789                 error = hammer2_cluster_create(trans, cparent, &ncluster,
1790                                              lhc, 0,
1791                                              HAMMER2_BREF_TYPE_INODE,
1792                                              HAMMER2_INODE_BYTES,
1793                                              0);
1794                 hammer2_cluster_modify(trans, ncluster, 0);
1795                 wipdata = &hammer2_cluster_wdata(ncluster)->ipdata;
1796
1797                 /* wipdata->comp_algo = ripdata->comp_algo; */
1798                 wipdata->meta.comp_algo = 0;
1799                 wipdata->meta.check_algo = 0;
1800                 wipdata->meta.version = HAMMER2_INODE_VERSION_ONE;
1801                 wipdata->meta.inum = ripdata->meta.inum;
1802                 wipdata->meta.target_type = ripdata->meta.type;
1803                 wipdata->meta.type = HAMMER2_OBJTYPE_HARDLINK;
1804                 wipdata->meta.uflags = 0;
1805                 wipdata->meta.rmajor = 0;
1806                 wipdata->meta.rminor = 0;
1807                 wipdata->meta.ctime = 0;
1808                 wipdata->meta.mtime = 0;
1809                 wipdata->meta.atime = 0;
1810                 wipdata->meta.btime = 0;
1811                 bzero(&wipdata->meta.uid, sizeof(wipdata->meta.uid));
1812                 bzero(&wipdata->meta.gid, sizeof(wipdata->meta.gid));
1813                 wipdata->meta.op_flags = HAMMER2_OPFLAG_DIRECTDATA;
1814                 wipdata->meta.cap_flags = 0;
1815                 wipdata->meta.mode = 0;
1816                 wipdata->meta.size = 0;
1817                 wipdata->meta.nlinks = 1;
1818                 wipdata->meta.iparent = 0;      /* XXX */
1819                 wipdata->meta.pfs_type = 0;
1820                 wipdata->meta.pfs_inum = 0;
1821                 bzero(&wipdata->meta.pfs_clid, sizeof(wipdata->meta.pfs_clid));
1822                 bzero(&wipdata->meta.pfs_fsid, sizeof(wipdata->meta.pfs_fsid));
1823                 wipdata->meta.data_quota = 0;
1824                 /* wipdata->data_count = 0; */
1825                 wipdata->meta.inode_quota = 0;
1826                 /* wipdata->inode_count = 0; */
1827                 wipdata->meta.attr_tid = 0;
1828                 wipdata->meta.dirent_tid = 0;
1829                 bzero(&wipdata->u, sizeof(wipdata->u));
1830                 bcopy(ripdata->filename, wipdata->filename,
1831                       ripdata->meta.name_len);
1832                 wipdata->meta.name_key = ncluster->focus->bref.key;
1833                 wipdata->meta.name_len = ripdata->meta.name_len;
1834                 /* XXX transaction ids */
1835                 hammer2_cluster_modsync(ncluster);
1836                 hammer2_cluster_unlock(ncluster);
1837                 hammer2_cluster_drop(ncluster);
1838         }
1839         ripdata = wipdata;
1840
1841         /*
1842          * cluster represents the hardlink target and is now flagged deleted.
1843          * duplicate it to the parent directory and adjust nlinks.
1844          *
1845          * WARNING! The shiftup() call can cause ncluster to be moved into
1846          *          an indirect block, and our ncluster will wind up pointing
1847          *          to the older/original version.
1848          */
1849         KKASSERT(cluster->focus->flags & HAMMER2_CHAIN_DELETED);
1850         hammer2_hardlink_shiftup(trans, cluster, ip, cdip, cdcluster,
1851                                  nlinks, &error);
1852
1853         if (error == 0)
1854                 hammer2_inode_repoint(ip, cdip, cluster);
1855
1856 done:
1857         /*
1858          * Cleanup, cluster/ncluster already dealt with.
1859          *
1860          * Return the shifted cluster in *clusterp.
1861          */
1862         if (cparent) {
1863                 hammer2_cluster_unlock(cparent);
1864                 hammer2_cluster_drop(cparent);
1865         }
1866         *clusterp = cluster;
1867
1868         return (error);
1869 }
1870
1871 /*
1872  * If (*ochainp) is non-NULL it points to the forward OBJTYPE_HARDLINK
1873  * inode while (*chainp) points to the resolved (hidden hardlink
1874  * target) inode.  In this situation when nlinks is 1 we wish to
1875  * deconsolidate the hardlink, moving it back to the directory that now
1876  * represents the only remaining link.
1877  */
1878 int
1879 hammer2_hardlink_deconsolidate(hammer2_trans_t *trans,
1880                                hammer2_inode_t *dip,
1881                                hammer2_chain_t **chainp,
1882                                hammer2_chain_t **ochainp)
1883 {
1884         if (*ochainp == NULL)
1885                 return (0);
1886         /* XXX */
1887         return (0);
1888 }
1889
1890 /*
1891  * The caller presents a locked cluster with an obj_type of
1892  * HAMMER2_OBJTYPE_HARDLINK in (*clusterp).  This routine will locate
1893  * the inode and replace (*clusterp) with a new locked cluster containing
1894  * the target hardlink, also locked.  The original cluster will be
1895  * unlocked and released.
1896  *
1897  * If cparentp is not NULL a locked cluster representing the hardlink's
1898  * parent is also returned.
1899  *
1900  * If we are unable to locate the hardlink target EIO is returned,
1901  * (*cparentp) is set to NULL, the original passed-in (*clusterp)
1902  * will be unlocked and released and (*clusterp) will be set to NULL
1903  * as well.
1904  */
1905 int
1906 hammer2_hardlink_find(hammer2_inode_t *dip,
1907                       hammer2_cluster_t **cparentp,
1908                       hammer2_cluster_t **clusterp)
1909 {
1910         const hammer2_inode_data_t *ipdata;
1911         hammer2_cluster_t *cluster;
1912         hammer2_cluster_t *cparent;
1913         hammer2_cluster_t *rcluster;
1914         hammer2_inode_t *ip;
1915         hammer2_inode_t *pip;
1916         hammer2_key_t key_dummy;
1917         hammer2_key_t lhc;
1918
1919         cluster = *clusterp;
1920         pip = dip;
1921         hammer2_inode_ref(pip);         /* for loop */
1922
1923         /*
1924          * Locate the hardlink.  pip is referenced and not locked.
1925          * Unlock and release (*clusterp) after extracting the needed
1926          * data.
1927          */
1928         ipdata = &hammer2_cluster_rdata(cluster)->ipdata;
1929         lhc = ipdata->meta.inum;
1930         ipdata = NULL;                  /* safety */
1931         hammer2_cluster_unlock(cluster);
1932         hammer2_cluster_drop(cluster);
1933         *clusterp = NULL;               /* safety */
1934
1935         rcluster = NULL;
1936         cparent = NULL;
1937
1938         while ((ip = pip) != NULL) {
1939                 hammer2_inode_lock(ip, HAMMER2_RESOLVE_ALWAYS);
1940                 cparent = hammer2_inode_cluster(ip, HAMMER2_RESOLVE_ALWAYS);
1941                 hammer2_inode_drop(ip);                 /* loop */
1942                 KKASSERT(hammer2_cluster_type(cparent) ==
1943                          HAMMER2_BREF_TYPE_INODE);
1944                 rcluster = hammer2_cluster_lookup(cparent, &key_dummy,
1945                                              lhc, lhc, 0);
1946                 if (rcluster)
1947                         break;
1948                 hammer2_cluster_lookup_done(cparent);   /* discard parent */
1949                 cparent = NULL;                         /* safety */
1950                 pip = ip->pip;          /* safe, ip held locked */
1951                 if (pip)
1952                         hammer2_inode_ref(pip);         /* loop */
1953                 hammer2_inode_unlock(ip, NULL);
1954         }
1955
1956         /*
1957          * chain is locked, ip is locked.  Unlock ip, return the locked
1958          * chain.  *ipp is already set w/a ref count and not locked.
1959          *
1960          * (cparent is already unlocked).
1961          */
1962         *clusterp = rcluster;
1963         if (rcluster) {
1964                 if (cparentp) {
1965                         *cparentp = cparent;
1966                         hammer2_inode_unlock(ip, NULL);
1967                 } else {
1968                         hammer2_inode_unlock(ip, cparent);
1969                 }
1970                 return (0);
1971         } else {
1972                 if (cparentp)
1973                         *cparentp = NULL;
1974                 if (ip)
1975                         hammer2_inode_unlock(ip, cparent);
1976                 return (EIO);
1977         }
1978 }
1979
1980 /*
1981  * Find the directory common to both fdip and tdip.
1982  *
1983  * Returns a held but not locked inode.  Caller typically locks the inode,
1984  * and when through unlocks AND drops it.
1985  */
1986 hammer2_inode_t *
1987 hammer2_inode_common_parent(hammer2_inode_t *fdip, hammer2_inode_t *tdip)
1988 {
1989         hammer2_inode_t *scan1;
1990         hammer2_inode_t *scan2;
1991
1992         /*
1993          * We used to have a depth field but it complicated matters too
1994          * much for directory renames.  So now its ugly.  Check for
1995          * simple cases before giving up and doing it the expensive way.
1996          *
1997          * XXX need a bottom-up topology stability lock
1998          */
1999         if (fdip == tdip || fdip == tdip->pip) {
2000                 hammer2_inode_ref(fdip);
2001                 return(fdip);
2002         }
2003         if (fdip->pip == tdip) {
2004                 hammer2_inode_ref(tdip);
2005                 return(tdip);
2006         }
2007
2008         /*
2009          * XXX not MPSAFE
2010          */
2011         for (scan1 = fdip; scan1->pmp == fdip->pmp; scan1 = scan1->pip) {
2012                 scan2 = tdip;
2013                 while (scan2->pmp == tdip->pmp) {
2014                         if (scan1 == scan2) {
2015                                 hammer2_inode_ref(scan1);
2016                                 return(scan1);
2017                         }
2018                         scan2 = scan2->pip;
2019                         if (scan2 == NULL)
2020                                 break;
2021                 }
2022         }
2023         panic("hammer2_inode_common_parent: no common parent %p %p\n",
2024               fdip, tdip);
2025         /* NOT REACHED */
2026         return(NULL);
2027 }
2028
2029 /*
2030  * Synchronize the inode's frontend state with the chain state prior
2031  * to any explicit flush of the inode or any strategy write call.
2032  *
2033  * Called with a locked inode.
2034  */
2035 void
2036 hammer2_inode_fsync(hammer2_trans_t *trans, hammer2_inode_t *ip, 
2037                     hammer2_cluster_t *cparent)
2038 {
2039         const hammer2_inode_data_t *ripdata;
2040         hammer2_inode_data_t *wipdata;
2041         hammer2_cluster_t *dparent;
2042         hammer2_cluster_t *cluster;
2043         hammer2_key_t lbase;
2044         hammer2_key_t key_next;
2045         int dosync = 0;
2046
2047         ripdata = &hammer2_cluster_rdata(cparent)->ipdata;    /* target file */
2048
2049         if (ip->flags & HAMMER2_INODE_MTIME) {
2050                 wipdata = hammer2_cluster_modify_ip(trans, ip, cparent, 0);
2051                 atomic_clear_int(&ip->flags, HAMMER2_INODE_MTIME);
2052                 wipdata->meta.mtime = ip->meta.mtime;
2053                 dosync = 1;
2054                 ripdata = wipdata;
2055         }
2056         if ((ip->flags & HAMMER2_INODE_RESIZED) &&
2057             ip->meta.size < ripdata->meta.size) {
2058                 wipdata = hammer2_cluster_modify_ip(trans, ip, cparent, 0);
2059                 wipdata->meta.size = ip->meta.size;
2060                 dosync = 1;
2061                 ripdata = wipdata;
2062                 atomic_clear_int(&ip->flags, HAMMER2_INODE_RESIZED);
2063
2064                 /*
2065                  * We must delete any chains beyond the EOF.  The chain
2066                  * straddling the EOF will be pending in the bioq.
2067                  */
2068                 lbase = (ripdata->meta.size + HAMMER2_PBUFMASK64) &
2069                         ~HAMMER2_PBUFMASK64;
2070                 dparent = hammer2_cluster_lookup_init(&ip->cluster, 0);
2071                 cluster = hammer2_cluster_lookup(dparent, &key_next,
2072                                                  lbase, (hammer2_key_t)-1,
2073                                                  HAMMER2_LOOKUP_NODATA);
2074                 while (cluster) {
2075                         /*
2076                          * Degenerate embedded case, nothing to loop on
2077                          */
2078                         switch (hammer2_cluster_type(cluster)) {
2079                         case HAMMER2_BREF_TYPE_INODE:
2080                                 hammer2_cluster_unlock(cluster);
2081                                 hammer2_cluster_drop(cluster);
2082                                 cluster = NULL;
2083                                 break;
2084                         case HAMMER2_BREF_TYPE_DATA:
2085                                 hammer2_cluster_delete(trans, dparent, cluster,
2086                                                    HAMMER2_DELETE_PERMANENT);
2087                                 /* fall through */
2088                         default:
2089                                 cluster = hammer2_cluster_next(dparent, cluster,
2090                                                    &key_next,
2091                                                    key_next, (hammer2_key_t)-1,
2092                                                    HAMMER2_LOOKUP_NODATA);
2093                                 break;
2094                         }
2095                 }
2096                 hammer2_cluster_lookup_done(dparent);
2097         } else
2098         if ((ip->flags & HAMMER2_INODE_RESIZED) &&
2099             ip->meta.size > ripdata->meta.size) {
2100                 wipdata = hammer2_cluster_modify_ip(trans, ip, cparent, 0);
2101                 wipdata->meta.size = ip->meta.size;
2102                 atomic_clear_int(&ip->flags, HAMMER2_INODE_RESIZED);
2103
2104                 /*
2105                  * When resizing larger we may not have any direct-data
2106                  * available.
2107                  */
2108                 if ((wipdata->meta.op_flags & HAMMER2_OPFLAG_DIRECTDATA) &&
2109                     ip->meta.size > HAMMER2_EMBEDDED_BYTES) {
2110                         wipdata->meta.op_flags &= ~HAMMER2_OPFLAG_DIRECTDATA;
2111                         bzero(&wipdata->u.blockset,
2112                               sizeof(wipdata->u.blockset));
2113                 }
2114                 dosync = 1;
2115                 ripdata = wipdata;
2116         }
2117         if (dosync)
2118                 hammer2_cluster_modsync(cparent);
2119 }