6f97bc0d1e9b99df262a0bf909a5e9a27e556bbb
[dragonfly.git] / sys / vfs / hammer2 / hammer2_inode.c
1 /*
2  * Copyright (c) 2011-2014 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  * 3. Neither the name of The DragonFly Project nor the names of its
19  *    contributors may be used to endorse or promote products derived
20  *    from this software without specific, prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
26  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 #include <sys/cdefs.h>
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/types.h>
39 #include <sys/lock.h>
40 #include <sys/uuid.h>
41
42 #include "hammer2.h"
43
44 #define INODE_DEBUG     0
45
46 static void hammer2_inode_move_to_hidden(hammer2_trans_t *trans,
47                                          hammer2_cluster_t **cparentp,
48                                          hammer2_cluster_t **clusterp,
49                                          hammer2_tid_t inum);
50
51 RB_GENERATE2(hammer2_inode_tree, hammer2_inode, rbnode, hammer2_inode_cmp,
52              hammer2_tid_t, meta.inum);
53
54 int
55 hammer2_inode_cmp(hammer2_inode_t *ip1, hammer2_inode_t *ip2)
56 {
57         if (ip1->meta.inum < ip2->meta.inum)
58                 return(-1);
59         if (ip1->meta.inum > ip2->meta.inum)
60                 return(1);
61         return(0);
62 }
63
64 /*
65  * HAMMER2 inode locks
66  *
67  * HAMMER2 offers shared and exclusive locks on inodes.  Pass a mask of
68  * flags for options:
69  *
70  *      - pass HAMMER2_RESOLVE_SHARED if a shared lock is desired.  The
71  *        inode locking function will automatically set the RDONLY flag.
72  *
73  *      - pass HAMMER2_RESOLVE_ALWAYS if you need the inode's meta-data.
74  *        Most front-end inode locks do.
75  *
76  *      - pass HAMMER2_RESOLVE_NEVER if you do not want to require that
77  *        the inode data be resolved.  This is used by the syncthr because
78  *        it can run on an unresolved/out-of-sync cluster, and also by the
79  *        vnode reclamation code to avoid unnecessary I/O (particularly when
80  *        disposing of hundreds of thousands of cached vnodes).
81  *
82  * The inode locking function locks the inode itself, resolves any stale
83  * chains in the inode's cluster, and allocates a fresh copy of the
84  * cluster with 1 ref and all the underlying chains locked.
85  *
86  * ip->cluster will be stable while the inode is locked.
87  *
88  * NOTE: We don't combine the inode/chain lock because putting away an
89  *       inode would otherwise confuse multiple lock holders of the inode.
90  *
91  * NOTE: In-memory inodes always point to hardlink targets (the actual file),
92  *       and never point to a hardlink pointer.
93  *
94  * NOTE: If caller passes HAMMER2_RESOLVE_RDONLY the exclusive locking code
95  *       will feel free to reduce the chain set in the cluster as an
96  *       optimization.  It will still be validated against the quorum if
97  *       appropriate, but the optimization might be able to reduce data
98  *       accesses to one node.  This flag is automatically set if the inode
99  *       is locked with HAMMER2_RESOLVE_SHARED.
100  */
101 hammer2_cluster_t *
102 hammer2_inode_lock(hammer2_inode_t *ip, int how)
103 {
104         hammer2_cluster_t *cluster;
105
106         hammer2_inode_ref(ip);
107
108         /* 
109          * Inode structure mutex
110          */
111         if (how & HAMMER2_RESOLVE_SHARED) {
112                 how |= HAMMER2_RESOLVE_RDONLY;
113                 hammer2_mtx_sh(&ip->lock);
114         } else {
115                 hammer2_mtx_ex(&ip->lock);
116         }
117
118         /*
119          * Create a copy of ip->cluster and lock it.  Note that the copy
120          * will have a ref on the cluster AND its chains and we don't want
121          * a second ref to either when we lock it.
122          *
123          * The copy will not have a focus until it is locked.
124          *
125          * Exclusive inode locks set the template focus chain in (ip)
126          * as a hint.  Cluster locks can ALWAYS replace the focus in the
127          * working copy if the hint does not work out, so beware.
128          */
129         cluster = hammer2_cluster_copy(&ip->cluster);
130         hammer2_cluster_lock(cluster, how);
131         hammer2_cluster_resolve(cluster);
132
133         /*
134          * cluster->focus will be set if resolving RESOLVE_ALWAYS, but
135          * only update the cached focus in the inode structure when taking
136          * out an exclusive lock.
137          */
138         if ((how & HAMMER2_RESOLVE_SHARED) == 0)
139                 ip->cluster.focus = cluster->focus;
140
141         /*
142          * Initialize pmp->inode_tid and pmp->modify_tid on first access
143          * to the root of mount that resolves good.
144          * XXX probably not the best place for this.
145          */
146         if (ip->pmp->inode_tid == 0 &&
147             cluster->error == 0 && cluster->focus) {
148                 const hammer2_inode_data_t *ripdata;
149                 hammer2_pfs_t *pmp = ip->pmp;
150                 hammer2_blockref_t bref;
151
152                 ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
153                 hammer2_cluster_bref(cluster, &bref);
154                 pmp->inode_tid = ripdata->meta.pfs_inum + 1;
155                 pmp->modify_tid = bref.modify_tid;
156                 kprintf("PMP focus good set nextino=%ld mod=%016jx\n",
157                         pmp->inode_tid, pmp->modify_tid);
158
159         }
160         return (cluster);
161 }
162
163 void
164 hammer2_inode_unlock(hammer2_inode_t *ip, hammer2_cluster_t *cluster)
165 {
166         if (cluster) {
167                 hammer2_cluster_unlock(cluster);
168                 hammer2_cluster_drop(cluster);
169         }
170         hammer2_mtx_unlock(&ip->lock);
171         hammer2_inode_drop(ip);
172 }
173
174 /*
175  * Temporarily release a lock held shared or exclusive.  Caller must
176  * hold the lock shared or exclusive on call and lock will be released
177  * on return.
178  *
179  * Restore a lock that was temporarily released.
180  */
181 hammer2_mtx_state_t
182 hammer2_inode_lock_temp_release(hammer2_inode_t *ip)
183 {
184         return hammer2_mtx_temp_release(&ip->lock);
185 }
186
187 void
188 hammer2_inode_lock_temp_restore(hammer2_inode_t *ip, hammer2_mtx_state_t ostate)
189 {
190         hammer2_mtx_temp_restore(&ip->lock, ostate);
191 }
192
193 /*
194  * Upgrade a shared inode lock to exclusive and return.  If the inode lock
195  * is already held exclusively this is a NOP.
196  *
197  * The caller MUST hold the inode lock either shared or exclusive on call
198  * and will own the lock exclusively on return.
199  *
200  * Returns non-zero if the lock was already exclusive prior to the upgrade.
201  */
202 int
203 hammer2_inode_lock_upgrade(hammer2_inode_t *ip)
204 {
205         int wasexclusive;
206
207         if (mtx_islocked_ex(&ip->lock)) {
208                 wasexclusive = 1;
209         } else {
210                 hammer2_mtx_unlock(&ip->lock);
211                 hammer2_mtx_ex(&ip->lock);
212                 wasexclusive = 0;
213         }
214         return wasexclusive;
215 }
216
217 /*
218  * Downgrade an inode lock from exclusive to shared only if the inode
219  * lock was previously shared.  If the inode lock was previously exclusive,
220  * this is a NOP.
221  */
222 void
223 hammer2_inode_lock_downgrade(hammer2_inode_t *ip, int wasexclusive)
224 {
225         if (wasexclusive == 0)
226                 mtx_downgrade(&ip->lock);
227 }
228
229 /*
230  * Lookup an inode by inode number
231  */
232 hammer2_inode_t *
233 hammer2_inode_lookup(hammer2_pfs_t *pmp, hammer2_tid_t inum)
234 {
235         hammer2_inode_t *ip;
236
237         KKASSERT(pmp);
238         if (pmp->spmp_hmp) {
239                 ip = NULL;
240         } else {
241                 hammer2_spin_ex(&pmp->inum_spin);
242                 ip = RB_LOOKUP(hammer2_inode_tree, &pmp->inum_tree, inum);
243                 if (ip)
244                         hammer2_inode_ref(ip);
245                 hammer2_spin_unex(&pmp->inum_spin);
246         }
247         return(ip);
248 }
249
250 /*
251  * Adding a ref to an inode is only legal if the inode already has at least
252  * one ref.
253  *
254  * (can be called with spinlock held)
255  */
256 void
257 hammer2_inode_ref(hammer2_inode_t *ip)
258 {
259         atomic_add_int(&ip->refs, 1);
260 }
261
262 /*
263  * Drop an inode reference, freeing the inode when the last reference goes
264  * away.
265  */
266 void
267 hammer2_inode_drop(hammer2_inode_t *ip)
268 {
269         hammer2_pfs_t *pmp;
270         hammer2_inode_t *pip;
271         u_int refs;
272
273         while (ip) {
274                 refs = ip->refs;
275                 cpu_ccfence();
276                 if (refs == 1) {
277                         /*
278                          * Transition to zero, must interlock with
279                          * the inode inumber lookup tree (if applicable).
280                          * It should not be possible for anyone to race
281                          * the transition to 0.
282                          *
283                          */
284                         pmp = ip->pmp;
285                         KKASSERT(pmp);
286                         hammer2_spin_ex(&pmp->inum_spin);
287
288                         if (atomic_cmpset_int(&ip->refs, 1, 0)) {
289                                 KKASSERT(hammer2_mtx_refs(&ip->lock) == 0);
290                                 if (ip->flags & HAMMER2_INODE_ONRBTREE) {
291                                         atomic_clear_int(&ip->flags,
292                                                      HAMMER2_INODE_ONRBTREE);
293                                         RB_REMOVE(hammer2_inode_tree,
294                                                   &pmp->inum_tree, ip);
295                                 }
296                                 hammer2_spin_unex(&pmp->inum_spin);
297
298                                 pip = ip->pip;
299                                 ip->pip = NULL;
300                                 ip->pmp = NULL;
301
302                                 /*
303                                  * Cleaning out ip->cluster isn't entirely
304                                  * trivial.
305                                  */
306                                 hammer2_inode_repoint(ip, NULL, NULL);
307
308                                 /*
309                                  * We have to drop pip (if non-NULL) to
310                                  * dispose of our implied reference from
311                                  * ip->pip.  We can simply loop on it.
312                                  */
313                                 kfree(ip, pmp->minode);
314                                 atomic_add_long(&pmp->inmem_inodes, -1);
315                                 ip = pip;
316                                 /* continue with pip (can be NULL) */
317                         } else {
318                                 hammer2_spin_unex(&ip->pmp->inum_spin);
319                         }
320                 } else {
321                         /*
322                          * Non zero transition
323                          */
324                         if (atomic_cmpset_int(&ip->refs, refs, refs - 1))
325                                 break;
326                 }
327         }
328 }
329
330 /*
331  * Get the vnode associated with the given inode, allocating the vnode if
332  * necessary.  The vnode will be returned exclusively locked.
333  *
334  * The caller must lock the inode (shared or exclusive).
335  *
336  * Great care must be taken to avoid deadlocks and vnode acquisition/reclaim
337  * races.
338  */
339 struct vnode *
340 hammer2_igetv(hammer2_inode_t *ip, hammer2_cluster_t *cparent, int *errorp)
341 {
342         const hammer2_inode_data_t *ripdata;
343         hammer2_pfs_t *pmp;
344         struct vnode *vp;
345
346         pmp = ip->pmp;
347         KKASSERT(pmp != NULL);
348         *errorp = 0;
349
350         ripdata = &hammer2_cluster_rdata(cparent)->ipdata;
351
352         for (;;) {
353                 /*
354                  * Attempt to reuse an existing vnode assignment.  It is
355                  * possible to race a reclaim so the vget() may fail.  The
356                  * inode must be unlocked during the vget() to avoid a
357                  * deadlock against a reclaim.
358                  */
359                 int wasexclusive;
360
361                 vp = ip->vp;
362                 if (vp) {
363                         /*
364                          * Inode must be unlocked during the vget() to avoid
365                          * possible deadlocks, but leave the ip ref intact.
366                          *
367                          * vnode is held to prevent destruction during the
368                          * vget().  The vget() can still fail if we lost
369                          * a reclaim race on the vnode.
370                          */
371                         hammer2_mtx_state_t ostate;
372
373                         vhold(vp);
374                         ostate = hammer2_inode_lock_temp_release(ip);
375                         if (vget(vp, LK_EXCLUSIVE)) {
376                                 vdrop(vp);
377                                 hammer2_inode_lock_temp_restore(ip, ostate);
378                                 continue;
379                         }
380                         hammer2_inode_lock_temp_restore(ip, ostate);
381                         vdrop(vp);
382                         /* vp still locked and ref from vget */
383                         if (ip->vp != vp) {
384                                 kprintf("hammer2: igetv race %p/%p\n",
385                                         ip->vp, vp);
386                                 vput(vp);
387                                 continue;
388                         }
389                         *errorp = 0;
390                         break;
391                 }
392
393                 /*
394                  * No vnode exists, allocate a new vnode.  Beware of
395                  * allocation races.  This function will return an
396                  * exclusively locked and referenced vnode.
397                  */
398                 *errorp = getnewvnode(VT_HAMMER2, pmp->mp, &vp, 0, 0);
399                 if (*errorp) {
400                         kprintf("hammer2: igetv getnewvnode failed %d\n",
401                                 *errorp);
402                         vp = NULL;
403                         break;
404                 }
405
406                 /*
407                  * Lock the inode and check for an allocation race.
408                  */
409                 wasexclusive = hammer2_inode_lock_upgrade(ip);
410                 if (ip->vp != NULL) {
411                         vp->v_type = VBAD;
412                         vx_put(vp);
413                         hammer2_inode_lock_downgrade(ip, wasexclusive);
414                         continue;
415                 }
416
417                 switch (ripdata->meta.type) {
418                 case HAMMER2_OBJTYPE_DIRECTORY:
419                         vp->v_type = VDIR;
420                         break;
421                 case HAMMER2_OBJTYPE_REGFILE:
422                         vp->v_type = VREG;
423                         vinitvmio(vp, ripdata->meta.size,
424                                   HAMMER2_LBUFSIZE,
425                                   (int)ripdata->meta.size & HAMMER2_LBUFMASK);
426                         break;
427                 case HAMMER2_OBJTYPE_SOFTLINK:
428                         /*
429                          * XXX for now we are using the generic file_read
430                          * and file_write code so we need a buffer cache
431                          * association.
432                          */
433                         vp->v_type = VLNK;
434                         vinitvmio(vp, ripdata->meta.size,
435                                   HAMMER2_LBUFSIZE,
436                                   (int)ripdata->meta.size & HAMMER2_LBUFMASK);
437                         break;
438                 case HAMMER2_OBJTYPE_CDEV:
439                         vp->v_type = VCHR;
440                         /* fall through */
441                 case HAMMER2_OBJTYPE_BDEV:
442                         vp->v_ops = &pmp->mp->mnt_vn_spec_ops;
443                         if (ripdata->meta.type != HAMMER2_OBJTYPE_CDEV)
444                                 vp->v_type = VBLK;
445                         addaliasu(vp,
446                                   ripdata->meta.rmajor,
447                                   ripdata->meta.rminor);
448                         break;
449                 case HAMMER2_OBJTYPE_FIFO:
450                         vp->v_type = VFIFO;
451                         vp->v_ops = &pmp->mp->mnt_vn_fifo_ops;
452                         break;
453                 default:
454                         panic("hammer2: unhandled objtype %d",
455                               ripdata->meta.type);
456                         break;
457                 }
458
459                 if (ip == pmp->iroot)
460                         vsetflags(vp, VROOT);
461
462                 vp->v_data = ip;
463                 ip->vp = vp;
464                 hammer2_inode_ref(ip);          /* vp association */
465                 hammer2_inode_lock_downgrade(ip, wasexclusive);
466                 break;
467         }
468
469         /*
470          * Return non-NULL vp and *errorp == 0, or NULL vp and *errorp != 0.
471          */
472         if (hammer2_debug & 0x0002) {
473                 kprintf("igetv vp %p refs 0x%08x aux 0x%08x\n",
474                         vp, vp->v_refcnt, vp->v_auxrefs);
475         }
476         return (vp);
477 }
478
479 /*
480  * Returns the inode associated with the passed-in cluster, creating the
481  * inode if necessary and synchronizing it to the passed-in cluster otherwise.
482  *
483  * The passed-in cluster must be locked and will remain locked on return.
484  * The returned inode will be locked and the caller may dispose of both
485  * via hammer2_inode_unlock_ex().  However, if the caller needs to resolve
486  * a hardlink it must ref/unlock/relock/drop the inode.
487  *
488  * The hammer2_inode structure regulates the interface between the high level
489  * kernel VNOPS API and the filesystem backend (the chains).
490  *
491  * On return the inode is locked with the supplied cluster.
492  */
493 hammer2_inode_t *
494 hammer2_inode_get(hammer2_pfs_t *pmp, hammer2_inode_t *dip,
495                   hammer2_cluster_t *cluster)
496 {
497         hammer2_inode_t *nip;
498         const hammer2_inode_data_t *iptmp;
499         const hammer2_inode_data_t *nipdata;
500
501         KKASSERT(cluster == NULL ||
502                  hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE);
503         KKASSERT(pmp);
504
505         /*
506          * Interlocked lookup/ref of the inode.  This code is only needed
507          * when looking up inodes with nlinks != 0 (TODO: optimize out
508          * otherwise and test for duplicates).
509          *
510          * Cluster can be NULL during the initial pfs allocation.
511          */
512 again:
513         while (cluster) {
514                 iptmp = &hammer2_cluster_rdata(cluster)->ipdata;
515                 nip = hammer2_inode_lookup(pmp, iptmp->meta.inum);
516                 if (nip == NULL)
517                         break;
518
519                 hammer2_mtx_ex(&nip->lock);
520
521                 /*
522                  * Handle SMP race (not applicable to the super-root spmp
523                  * which can't index inodes due to duplicative inode numbers).
524                  */
525                 if (pmp->spmp_hmp == NULL &&
526                     (nip->flags & HAMMER2_INODE_ONRBTREE) == 0) {
527                         hammer2_mtx_unlock(&nip->lock);
528                         hammer2_inode_drop(nip);
529                         continue;
530                 }
531                 hammer2_inode_repoint(nip, NULL, cluster);
532
533                 return nip;
534         }
535
536         /*
537          * We couldn't find the inode number, create a new inode.
538          */
539         nip = kmalloc(sizeof(*nip), pmp->minode, M_WAITOK | M_ZERO);
540         spin_init(&nip->cluster_spin, "h2clspin");
541         atomic_add_long(&pmp->inmem_inodes, 1);
542         hammer2_pfs_memory_inc(pmp);
543         hammer2_pfs_memory_wakeup(pmp);
544         if (pmp->spmp_hmp)
545                 nip->flags = HAMMER2_INODE_SROOT;
546
547         /*
548          * Initialize nip's cluster.  A cluster is provided for normal
549          * inodes but typically not for the super-root or PFS inodes.
550          */
551         nip->cluster.refs = 1;
552         nip->cluster.pmp = pmp;
553         nip->cluster.flags |= HAMMER2_CLUSTER_INODE;
554         if (cluster) {
555                 nipdata = &hammer2_cluster_rdata(cluster)->ipdata;
556                 nip->meta = nipdata->meta;
557                 atomic_set_int(&nip->flags, HAMMER2_INODE_METAGOOD);
558                 hammer2_inode_repoint(nip, NULL, cluster);
559         } else {
560                 nip->meta.inum = 1;             /* PFS inum is always 1 XXX */
561                 /* mtime will be updated when a cluster is available */
562                 atomic_set_int(&nip->flags, HAMMER2_INODE_METAGOOD);
563         }
564
565         nip->pip = dip;                         /* can be NULL */
566         if (dip)
567                 hammer2_inode_ref(dip); /* ref dip for nip->pip */
568
569         nip->pmp = pmp;
570
571         /*
572          * ref and lock on nip gives it state compatible to after a
573          * hammer2_inode_lock() call.
574          */
575         nip->refs = 1;
576         hammer2_mtx_init(&nip->lock, "h2inode");
577         hammer2_mtx_ex(&nip->lock);
578         /* combination of thread lock and chain lock == inode lock */
579
580         /*
581          * Attempt to add the inode.  If it fails we raced another inode
582          * get.  Undo all the work and try again.
583          */
584         if (pmp->spmp_hmp == NULL) {
585                 hammer2_spin_ex(&pmp->inum_spin);
586                 if (RB_INSERT(hammer2_inode_tree, &pmp->inum_tree, nip)) {
587                         hammer2_spin_unex(&pmp->inum_spin);
588                         hammer2_mtx_unlock(&nip->lock);
589                         hammer2_inode_drop(nip);
590                         goto again;
591                 }
592                 atomic_set_int(&nip->flags, HAMMER2_INODE_ONRBTREE);
593                 hammer2_spin_unex(&pmp->inum_spin);
594         }
595
596         return (nip);
597 }
598
599 /*
600  * Create a new inode in the specified directory using the vattr to
601  * figure out the type of inode.
602  *
603  * If no error occurs the new inode with its cluster locked is returned in
604  * *nipp, otherwise an error is returned and *nipp is set to NULL.
605  *
606  * If vap and/or cred are NULL the related fields are not set and the
607  * inode type defaults to a directory.  This is used when creating PFSs
608  * under the super-root, so the inode number is set to 1 in this case.
609  *
610  * dip is not locked on entry.
611  *
612  * NOTE: When used to create a snapshot, the inode is temporarily associated
613  *       with the super-root spmp. XXX should pass new pmp for snapshot.
614  */
615 hammer2_inode_t *
616 hammer2_inode_create(hammer2_trans_t *trans, hammer2_inode_t *dip,
617                      struct vattr *vap, struct ucred *cred,
618                      const uint8_t *name, size_t name_len,
619                      hammer2_cluster_t **clusterp,
620                      int flags, int *errorp)
621 {
622         const hammer2_inode_data_t *dipdata;
623         hammer2_inode_data_t *nipdata;
624         hammer2_cluster_t *cluster;
625         hammer2_cluster_t *cparent;
626         hammer2_inode_t *nip;
627         hammer2_key_t key_dummy;
628         hammer2_key_t lhc;
629         int error;
630         uid_t xuid;
631         uuid_t dip_uid;
632         uuid_t dip_gid;
633         uint32_t dip_mode;
634         uint8_t dip_comp_algo;
635         uint8_t dip_check_algo;
636
637         lhc = hammer2_dirhash(name, name_len);
638         *errorp = 0;
639
640         /*
641          * Locate the inode or indirect block to create the new
642          * entry in.  At the same time check for key collisions
643          * and iterate until we don't get one.
644          *
645          * NOTE: hidden inodes do not have iterators.
646          */
647 retry:
648         cparent = hammer2_inode_lock(dip, HAMMER2_RESOLVE_ALWAYS);
649         dipdata = &hammer2_cluster_rdata(cparent)->ipdata;
650         dip_uid = dipdata->meta.uid;
651         dip_gid = dipdata->meta.gid;
652         dip_mode = dipdata->meta.mode;
653         dip_comp_algo = dipdata->meta.comp_algo;
654         dip_check_algo = dipdata->meta.check_algo;
655
656         error = 0;
657         while (error == 0) {
658                 cluster = hammer2_cluster_lookup(cparent, &key_dummy,
659                                                  lhc, lhc, 0);
660                 if (cluster == NULL)
661                         break;
662                 if ((lhc & HAMMER2_DIRHASH_VISIBLE) == 0)
663                         error = ENOSPC;
664                 if ((lhc & HAMMER2_DIRHASH_LOMASK) == HAMMER2_DIRHASH_LOMASK)
665                         error = ENOSPC;
666                 hammer2_cluster_unlock(cluster);
667                 hammer2_cluster_drop(cluster);
668                 cluster = NULL;
669                 ++lhc;
670         }
671
672         if (error == 0) {
673                 error = hammer2_cluster_create(trans, cparent, &cluster,
674                                              lhc, 0,
675                                              HAMMER2_BREF_TYPE_INODE,
676                                              HAMMER2_INODE_BYTES,
677                                              flags);
678         }
679 #if INODE_DEBUG
680         kprintf("CREATE INODE %*.*s chain=%p\n",
681                 (int)name_len, (int)name_len, name,
682                 (cluster ? cluster->focus : NULL));
683 #endif
684
685         /*
686          * Cleanup and handle retries.
687          */
688         if (error == EAGAIN) {
689                 hammer2_cluster_ref(cparent);
690                 hammer2_inode_unlock(dip, cparent);
691                 hammer2_cluster_wait(cparent);
692                 hammer2_cluster_drop(cparent);
693                 goto retry;
694         }
695         hammer2_inode_unlock(dip, cparent);
696         cparent = NULL;
697
698         if (error) {
699                 KKASSERT(cluster == NULL);
700                 *errorp = error;
701                 return (NULL);
702         }
703
704         /*
705          * Set up the new inode.
706          *
707          * NOTE: *_get() integrates chain's lock into the inode lock.
708          *
709          * NOTE: Only one new inode can currently be created per
710          *       transaction.  If the need arises we can adjust
711          *       hammer2_trans_init() to allow more.
712          *
713          * NOTE: nipdata will have chain's blockset data.
714          */
715         KKASSERT(cluster->focus->flags & HAMMER2_CHAIN_MODIFIED);
716         nipdata = &hammer2_cluster_wdata(cluster)->ipdata;
717         nipdata->meta.inum = trans->inode_tid;
718         hammer2_cluster_modsync(cluster);
719         nip = hammer2_inode_get(dip->pmp, dip, cluster);
720         nipdata = &hammer2_cluster_wdata(cluster)->ipdata;
721
722         if (vap) {
723                 KKASSERT(trans->inodes_created == 0);
724                 nipdata->meta.type = hammer2_get_obj_type(vap->va_type);
725                 nipdata->meta.inum = trans->inode_tid;
726                 ++trans->inodes_created;
727
728                 switch (nipdata->meta.type) {
729                 case HAMMER2_OBJTYPE_CDEV:
730                 case HAMMER2_OBJTYPE_BDEV:
731                         nipdata->meta.rmajor = vap->va_rmajor;
732                         nipdata->meta.rminor = vap->va_rminor;
733                         break;
734                 default:
735                         break;
736                 }
737         } else {
738                 nipdata->meta.type = HAMMER2_OBJTYPE_DIRECTORY;
739                 nipdata->meta.inum = 1;
740         }
741         
742         /* Inherit parent's inode compression mode. */
743         nip->comp_heuristic = 0;
744         nipdata->meta.comp_algo = dip_comp_algo;
745         nipdata->meta.check_algo = dip_check_algo;
746         nipdata->meta.version = HAMMER2_INODE_VERSION_ONE;
747         hammer2_update_time(&nipdata->meta.ctime);
748         nipdata->meta.mtime = nipdata->meta.ctime;
749         if (vap)
750                 nipdata->meta.mode = vap->va_mode;
751         nipdata->meta.nlinks = 1;
752         if (vap) {
753                 if (dip && dip->pmp) {
754                         xuid = hammer2_to_unix_xid(&dip_uid);
755                         xuid = vop_helper_create_uid(dip->pmp->mp,
756                                                      dip_mode,
757                                                      xuid,
758                                                      cred,
759                                                      &vap->va_mode);
760                 } else {
761                         /* super-root has no dip and/or pmp */
762                         xuid = 0;
763                 }
764                 if (vap->va_vaflags & VA_UID_UUID_VALID)
765                         nipdata->meta.uid = vap->va_uid_uuid;
766                 else if (vap->va_uid != (uid_t)VNOVAL)
767                         hammer2_guid_to_uuid(&nipdata->meta.uid, vap->va_uid);
768                 else
769                         hammer2_guid_to_uuid(&nipdata->meta.uid, xuid);
770
771                 if (vap->va_vaflags & VA_GID_UUID_VALID)
772                         nipdata->meta.gid = vap->va_gid_uuid;
773                 else if (vap->va_gid != (gid_t)VNOVAL)
774                         hammer2_guid_to_uuid(&nipdata->meta.gid, vap->va_gid);
775                 else if (dip)
776                         nipdata->meta.gid = dip_gid;
777         }
778
779         /*
780          * Regular files and softlinks allow a small amount of data to be
781          * directly embedded in the inode.  This flag will be cleared if
782          * the size is extended past the embedded limit.
783          */
784         if (nipdata->meta.type == HAMMER2_OBJTYPE_REGFILE ||
785             nipdata->meta.type == HAMMER2_OBJTYPE_SOFTLINK) {
786                 nipdata->meta.op_flags |= HAMMER2_OPFLAG_DIRECTDATA;
787         }
788
789         KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
790         bcopy(name, nipdata->filename, name_len);
791         nipdata->meta.name_key = lhc;
792         nipdata->meta.name_len = name_len;
793         nip->meta = nipdata->meta;
794         hammer2_cluster_modsync(cluster);
795         *clusterp = cluster;
796
797         return (nip);
798 }
799
800 /*
801  * The cluster has been removed from the original directory and replaced
802  * with a hardlink pointer.  Move the cluster to the specified parent
803  * directory, change the filename to "0xINODENUMBER", and adjust the key.
804  * The cluster becomes our invisible hardlink target.
805  *
806  * The original cluster must be deleted on entry.
807  */
808 static
809 void
810 hammer2_hardlink_shiftup(hammer2_trans_t *trans, hammer2_cluster_t *cluster,
811                         hammer2_inode_t *ip, hammer2_inode_t *dip,
812                         hammer2_cluster_t *dcluster,
813                         int nlinks, int *errorp)
814 {
815         const hammer2_inode_data_t *iptmp;
816         hammer2_inode_data_t *nipdata;
817         hammer2_cluster_t *xcluster;
818         hammer2_key_t key_dummy;
819         hammer2_key_t lhc;
820         hammer2_blockref_t bref;
821
822         iptmp = &hammer2_cluster_rdata(cluster)->ipdata;
823         lhc = iptmp->meta.inum;
824         KKASSERT((lhc & HAMMER2_DIRHASH_VISIBLE) == 0);
825
826         /*
827          * Locate the inode or indirect block to create the new
828          * entry in.  lhc represents the inode number so there is
829          * no collision iteration.
830          *
831          * There should be no key collisions with invisible inode keys.
832          *
833          * WARNING! Must use inode_lock_ex() on dip to handle a stale
834          *          dip->cluster cache.
835          */
836         *errorp = 0;
837         xcluster = hammer2_cluster_lookup(dcluster, &key_dummy,
838                                       lhc, lhc, 0);
839         if (xcluster) {
840                 kprintf("X3 chain %p dip %p dchain %p dip->chain %p\n",
841                         xcluster->focus, dip, dcluster->focus,
842                         dip->cluster.focus);
843                 hammer2_cluster_unlock(xcluster);
844                 hammer2_cluster_drop(xcluster);
845                 xcluster = NULL;
846                 *errorp = ENOSPC;
847 #if 0
848                 Debugger("X3");
849 #endif
850         }
851
852         /*
853          * Handle the error case
854          */
855         if (*errorp) {
856                 panic("error2");
857                 KKASSERT(xcluster == NULL);
858                 return;
859         }
860
861         /*
862          * Use xcluster as a placeholder for (lhc).  Duplicate cluster to the
863          * same target bref as xcluster and then delete xcluster.  The
864          * duplication occurs after xcluster in flush order even though
865          * xcluster is deleted after the duplication. XXX
866          *
867          * WARNING! Duplications (to a different parent) can cause indirect
868          *          blocks to be inserted, refactor xcluster.
869          *
870          * WARNING! Only key and keybits is extracted from a passed-in bref.
871          */
872         hammer2_cluster_bref(cluster, &bref);
873         bref.key = lhc;                 /* invisible dir entry key */
874         bref.keybits = 0;
875         hammer2_cluster_rename(trans, &bref, dcluster, cluster, 0);
876
877         /*
878          * cluster is now 'live' again.. adjust the filename.
879          *
880          * Directory entries are inodes but this is a hidden hardlink
881          * target.  The name isn't used but to ease debugging give it
882          * a name after its inode number.
883          */
884         hammer2_cluster_modify(trans, cluster, 0);
885         nipdata = &hammer2_cluster_wdata(cluster)->ipdata;
886         ksnprintf(nipdata->filename, sizeof(nipdata->filename),
887                   "0x%016jx", (intmax_t)nipdata->meta.inum);
888         nipdata->meta.name_len = strlen(nipdata->filename);
889         nipdata->meta.name_key = lhc;
890         nipdata->meta.nlinks += nlinks;
891
892         /*
893          * Resync ip->meta.  Some fields have to be retained.
894          */
895         nipdata->meta.size = ip->meta.size;
896         nipdata->meta.mtime = ip->meta.mtime;
897         ip->meta = nipdata->meta;
898
899         hammer2_cluster_modsync(cluster);
900 }
901
902 /*
903  * Connect the target inode represented by (cluster) to the media topology
904  * at (dip, name, len).  The caller can pass a rough *chainp, this function
905  * will issue lookup()s to position the parent chain properly for the
906  * chain insertion.
907  *
908  * If hlink is TRUE this function creates an OBJTYPE_HARDLINK directory
909  * entry instead of connecting (cluster).
910  *
911  * If hlink is FALSE this function expects (cluster) to be unparented.
912  */
913 int
914 hammer2_inode_connect(hammer2_trans_t *trans,
915                       hammer2_inode_t *ip, hammer2_cluster_t **clusterp,
916                       int hlink,
917                       hammer2_inode_t *dip, hammer2_cluster_t *dcluster,
918                       const uint8_t *name, size_t name_len,
919                       hammer2_key_t lhc)
920 {
921         hammer2_inode_data_t *wipdata;
922         hammer2_cluster_t *ocluster;
923         hammer2_cluster_t *ncluster;
924         hammer2_key_t key_dummy;
925         int error;
926
927         /*
928          * Since ocluster is either disconnected from the topology or
929          * represents a hardlink terminus which is always a parent of or
930          * equal to dip, we should be able to safely lock dip->chain for
931          * our setup.
932          *
933          * WARNING! Must use inode_lock_ex() on dip to handle a stale
934          *          dip->cluster.
935          *
936          * If name is non-NULL we calculate lhc, else we use the passed-in
937          * lhc.
938          */
939         ocluster = *clusterp;
940
941         if (name) {
942                 lhc = hammer2_dirhash(name, name_len);
943
944                 /*
945                  * Locate the inode or indirect block to create the new
946                  * entry in.  At the same time check for key collisions
947                  * and iterate until we don't get one.
948                  */
949                 error = 0;
950                 while (error == 0) {
951                         ncluster = hammer2_cluster_lookup(dcluster, &key_dummy,
952                                                       lhc, lhc, 0);
953                         if (ncluster == NULL)
954                                 break;
955                         if ((lhc & HAMMER2_DIRHASH_LOMASK) ==
956                             HAMMER2_DIRHASH_LOMASK) {
957                                 error = ENOSPC;
958                         }
959                         hammer2_cluster_unlock(ncluster);
960                         hammer2_cluster_drop(ncluster);
961                         ncluster = NULL;
962                         ++lhc;
963                 }
964         } else {
965                 /*
966                  * Reconnect to specific key (used when moving
967                  * unlinked-but-open files into the hidden directory).
968                  */
969                 ncluster = hammer2_cluster_lookup(dcluster, &key_dummy,
970                                                   lhc, lhc, 0);
971                 KKASSERT(ncluster == NULL);
972                 error = 0;
973         }
974
975         if (error == 0) {
976                 if (hlink) {
977                         /*
978                          * Hardlink pointer needed, create totally fresh
979                          * directory entry.
980                          *
981                          * We must refactor ocluster because it might have
982                          * been shifted into an indirect cluster by the
983                          * create.
984                          */
985                         KKASSERT(ncluster == NULL);
986                         error = hammer2_cluster_create(trans,
987                                                        dcluster, &ncluster,
988                                                        lhc, 0,
989                                                        HAMMER2_BREF_TYPE_INODE,
990                                                        HAMMER2_INODE_BYTES,
991                                                        0);
992                 } else {
993                         /*
994                          * Reconnect the original cluster under the new name.
995                          * Original cluster must have already been deleted by
996                          * teh caller.
997                          *
998                          * WARNING! Can cause held-over clusters to require a
999                          *          refactor.  Fortunately we have none (our
1000                          *          locked clusters are passed into and
1001                          *          modified by the call).
1002                          */
1003                         ncluster = ocluster;
1004                         ocluster = NULL;
1005                         error = hammer2_cluster_create(trans,
1006                                                        dcluster, &ncluster,
1007                                                        lhc, 0,
1008                                                        HAMMER2_BREF_TYPE_INODE,
1009                                                        HAMMER2_INODE_BYTES,
1010                                                        0);
1011                 }
1012         }
1013
1014         /*
1015          * Unlock stuff.
1016          */
1017         KKASSERT(error != EAGAIN);
1018
1019         /*
1020          * ncluster should be NULL on error, leave ocluster
1021          * (ocluster == *clusterp) alone.
1022          */
1023         if (error) {
1024                 KKASSERT(ncluster == NULL);
1025                 return (error);
1026         }
1027
1028         /*
1029          * Directory entries are inodes so if the name has changed we have
1030          * to update the inode.
1031          *
1032          * When creating an OBJTYPE_HARDLINK entry remember to unlock the
1033          * cluster, the caller will access the hardlink via the actual hardlink
1034          * target file and not the hardlink pointer entry, so we must still
1035          * return ocluster.
1036          */
1037         if (hlink && hammer2_hardlink_enable >= 0) {
1038                 /*
1039                  * Create the HARDLINK pointer.  oip represents the hardlink
1040                  * target in this situation.
1041                  *
1042                  * We will return ocluster (the hardlink target).
1043                  */
1044                 hammer2_cluster_modify(trans, ncluster, 0);
1045                 KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
1046                 wipdata = &hammer2_cluster_wdata(ncluster)->ipdata;
1047                 bcopy(name, wipdata->filename, name_len);
1048                 wipdata->meta.name_key = lhc;
1049                 wipdata->meta.name_len = name_len;
1050                 wipdata->meta.target_type =
1051                             hammer2_cluster_rdata(ocluster)->ipdata.meta.type;
1052                 wipdata->meta.type = HAMMER2_OBJTYPE_HARDLINK;
1053                 wipdata->meta.inum =
1054                             hammer2_cluster_rdata(ocluster)->ipdata.meta.inum;
1055                 wipdata->meta.version = HAMMER2_INODE_VERSION_ONE;
1056                 wipdata->meta.nlinks = 1;
1057                 wipdata->meta.op_flags = HAMMER2_OPFLAG_DIRECTDATA;
1058                 hammer2_cluster_modsync(ncluster);
1059                 hammer2_cluster_unlock(ncluster);
1060                 hammer2_cluster_drop(ncluster);
1061                 ncluster = ocluster;
1062                 ocluster = NULL;
1063         } else {
1064                 /*
1065                  * ncluster is a duplicate of ocluster at the new location.
1066                  * We must fixup the name stored in the inode data.
1067                  * The bref key has already been adjusted by inode_connect().
1068                  */
1069                 hammer2_cluster_modify(trans, ncluster, 0);
1070                 wipdata = &hammer2_cluster_wdata(ncluster)->ipdata;
1071
1072                 KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
1073                 bcopy(name, wipdata->filename, name_len);
1074                 wipdata->meta.name_key = lhc;
1075                 wipdata->meta.name_len = name_len;
1076                 wipdata->meta.nlinks = 1;
1077                 hammer2_cluster_modsync(ncluster);
1078
1079                 /*
1080                  * Resync the in-memory inode, some fields must be retained.
1081                  */
1082                 if (ip) {       /* XXX move_to_hidden passes NULL */
1083                         wipdata->meta.size = ip->meta.size;
1084                         wipdata->meta.mtime = ip->meta.mtime;
1085                         ip->meta = wipdata->meta;
1086                 }
1087         }
1088
1089         /*
1090          * We are replacing ocluster with ncluster, unlock ocluster.  In the
1091          * case where ocluster is left unchanged the code above sets
1092          * ncluster to ocluster and ocluster to NULL, resulting in a NOP here.
1093          */
1094         if (ocluster) {
1095                 hammer2_cluster_unlock(ocluster);
1096                 hammer2_cluster_drop(ocluster);
1097         }
1098         *clusterp = ncluster;
1099
1100         return (0);
1101 }
1102
1103 /*
1104  * Repoint ip->cluster's chains to cluster's chains and fixup the default
1105  * focus.  Only valid elements are repointed.  Invalid elements have to be
1106  * adjusted by the appropriate slave sync threads.
1107  *
1108  * Caller must hold the inode and cluster exclusive locked, if not NULL,
1109  * must also be locked.
1110  *
1111  * Cluster may be NULL to clean out any chains in ip->cluster.
1112  */
1113 void
1114 hammer2_inode_repoint(hammer2_inode_t *ip, hammer2_inode_t *pip,
1115                       hammer2_cluster_t *cluster)
1116 {
1117         hammer2_chain_t *dropch[HAMMER2_MAXCLUSTER];
1118         hammer2_chain_t *ochain;
1119         hammer2_chain_t *nchain;
1120         hammer2_inode_t *opip;
1121         int i;
1122
1123         bzero(dropch, sizeof(dropch));
1124
1125         /*
1126          * Replace chains in ip->cluster with chains from cluster and
1127          * adjust the focus if necessary.
1128          *
1129          * NOTE: nchain and/or ochain can be NULL due to gaps
1130          *       in the cluster arrays.
1131          */
1132         hammer2_spin_ex(&ip->cluster_spin);
1133         for (i = 0; cluster && i < cluster->nchains; ++i) {
1134                 /*
1135                  * Do not replace invalid elements as this might race
1136                  * syncthr replacements.
1137                  */
1138                 if (cluster->array[i].flags & HAMMER2_CITEM_INVALID)
1139                         continue;
1140
1141                 /*
1142                  * Do not replace elements which are the same.  Also handle
1143                  * element count discrepancies.
1144                  */
1145                 nchain = cluster->array[i].chain;
1146                 if (i < ip->cluster.nchains) {
1147                         ochain = ip->cluster.array[i].chain;
1148                         if (ochain == nchain)
1149                                 continue;
1150                 } else {
1151                         ochain = NULL;
1152                 }
1153
1154                 /*
1155                  * Make adjustments
1156                  */
1157                 ip->cluster.array[i].chain = nchain;
1158                 ip->cluster.array[i].flags &= ~HAMMER2_CITEM_INVALID;
1159                 ip->cluster.array[i].flags |= cluster->array[i].flags &
1160                                               HAMMER2_CITEM_INVALID;
1161                 if (nchain)
1162                         hammer2_chain_ref(nchain);
1163                 dropch[i] = ochain;
1164         }
1165
1166         /*
1167          * Release any left-over chains in ip->cluster.
1168          */
1169         while (i < ip->cluster.nchains) {
1170                 nchain = ip->cluster.array[i].chain;
1171                 if (nchain) {
1172                         ip->cluster.array[i].chain = NULL;
1173                         ip->cluster.array[i].flags |= HAMMER2_CITEM_INVALID;
1174                 }
1175                 dropch[i] = nchain;
1176                 ++i;
1177         }
1178
1179         /*
1180          * Fixup fields.  Note that the inode-embedded cluster is never
1181          * directly locked.
1182          */
1183         if (cluster) {
1184                 ip->cluster.nchains = cluster->nchains;
1185                 ip->cluster.focus = cluster->focus;
1186                 ip->cluster.flags = cluster->flags & ~HAMMER2_CLUSTER_LOCKED;
1187         } else {
1188                 ip->cluster.nchains = 0;
1189                 ip->cluster.focus = NULL;
1190                 ip->cluster.flags &= ~HAMMER2_CLUSTER_ZFLAGS;
1191         }
1192
1193         /*
1194          * Repoint ip->pip if requested (non-NULL pip).
1195          */
1196         if (pip && ip->pip != pip) {
1197                 opip = ip->pip;
1198                 hammer2_inode_ref(pip);
1199                 ip->pip = pip;
1200         } else {
1201                 opip = NULL;
1202         }
1203         hammer2_spin_unex(&ip->cluster_spin);
1204
1205         /*
1206          * Cleanup outside of spinlock
1207          */
1208         while (--i >= 0) {
1209                 if (dropch[i])
1210                         hammer2_chain_drop(dropch[i]);
1211         }
1212         if (opip)
1213                 hammer2_inode_drop(opip);
1214 }
1215
1216 /*
1217  * Repoint a single element from the cluster to the ip.  Used by the
1218  * synchronization threads to piecemeal update inodes.  Does not change
1219  * focus and requires inode to be re-locked to clean-up flags (XXX).
1220  */
1221 void
1222 hammer2_inode_repoint_one(hammer2_inode_t *ip, hammer2_cluster_t *cluster,
1223                           int idx)
1224 {
1225         hammer2_chain_t *ochain;
1226         hammer2_chain_t *nchain;
1227         int i;
1228
1229         hammer2_spin_ex(&ip->cluster_spin);
1230         KKASSERT(idx < cluster->nchains);
1231         if (idx < ip->cluster.nchains) {
1232                 ochain = ip->cluster.array[idx].chain;
1233                 nchain = cluster->array[idx].chain;
1234         } else {
1235                 ochain = NULL;
1236                 nchain = cluster->array[idx].chain;
1237                 ip->cluster.nchains = idx + 1;
1238                 for (i = ip->cluster.nchains; i <= idx; ++i) {
1239                         bzero(&ip->cluster.array[i],
1240                               sizeof(ip->cluster.array[i]));
1241                         ip->cluster.array[i].flags |= HAMMER2_CITEM_INVALID;
1242                 }
1243         }
1244         if (ochain != nchain) {
1245                 /*
1246                  * Make adjustments.
1247                  */
1248                 ip->cluster.array[idx].chain = nchain;
1249                 ip->cluster.array[idx].flags &= ~HAMMER2_CITEM_INVALID;
1250                 ip->cluster.array[idx].flags |= cluster->array[idx].flags &
1251                                                 HAMMER2_CITEM_INVALID;
1252         }
1253         hammer2_spin_unex(&ip->cluster_spin);
1254         if (ochain != nchain) {
1255                 if (nchain)
1256                         hammer2_chain_ref(nchain);
1257                 if (ochain)
1258                         hammer2_chain_drop(ochain);
1259         }
1260 }
1261
1262 /*
1263  * Unlink the file from the specified directory inode.  The directory inode
1264  * does not need to be locked.
1265  *
1266  * isdir determines whether a directory/non-directory check should be made.
1267  * No check is made if isdir is set to -1.
1268  *
1269  * isopen specifies whether special unlink-with-open-descriptor handling
1270  * must be performed.  If set to -1 the caller is deleting a PFS and we
1271  * check whether the chain is mounted or not (chain->pmp != NULL).  1 is
1272  * implied if it is mounted.
1273  *
1274  * If isopen is 1 and nlinks drops to 0 this function must move the chain
1275  * to a special hidden directory until last-close occurs on the file.
1276  *
1277  * NOTE!  The underlying file can still be active with open descriptors
1278  *        or if the chain is being manually held (e.g. for rename).
1279  *
1280  *        The caller is responsible for fixing up ip->chain if e.g. a
1281  *        rename occurs (see chain_duplicate()).
1282  *
1283  * NOTE!  The chain is not deleted if it is moved to the hidden directory,
1284  *        but otherwise will be deleted.
1285  */
1286 int
1287 hammer2_unlink_file(hammer2_trans_t *trans, hammer2_inode_t *dip,
1288                     const uint8_t *name, size_t name_len,
1289                     int isdir, int *hlinkp, struct nchandle *nch,
1290                     int nlinks)
1291 {
1292         const hammer2_inode_data_t *ripdata;
1293         hammer2_inode_data_t *wipdata;
1294         hammer2_cluster_t *cparent;
1295         hammer2_cluster_t *hcluster;
1296         hammer2_cluster_t *hparent;
1297         hammer2_cluster_t *cluster;
1298         hammer2_cluster_t *dparent;
1299         hammer2_cluster_t *dcluster;
1300         hammer2_key_t key_dummy;
1301         hammer2_key_t key_next;
1302         hammer2_key_t lhc;
1303         int last_link;
1304         int error;
1305         int hlink;
1306         uint8_t type;
1307
1308         error = 0;
1309         hlink = 0;
1310         hcluster = NULL;
1311         hparent = NULL;
1312         lhc = hammer2_dirhash(name, name_len);
1313
1314 again:
1315         /*
1316          * Search for the filename in the directory
1317          */
1318         cparent = hammer2_inode_lock(dip, HAMMER2_RESOLVE_ALWAYS);
1319         cluster = hammer2_cluster_lookup(cparent, &key_next,
1320                                      lhc, lhc + HAMMER2_DIRHASH_LOMASK, 0);
1321         while (cluster) {
1322                 if (hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE) {
1323                         ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
1324                         if (ripdata->meta.name_len == name_len &&
1325                             bcmp(ripdata->filename, name, name_len) == 0) {
1326                                 break;
1327                         }
1328                 }
1329                 cluster = hammer2_cluster_next(cparent, cluster, &key_next,
1330                                                key_next,
1331                                                lhc + HAMMER2_DIRHASH_LOMASK,
1332                                                0);
1333         }
1334         hammer2_inode_unlock(dip, NULL);        /* retain cparent */
1335
1336         /*
1337          * Not found or wrong type (isdir < 0 disables the type check).
1338          * If a hardlink pointer, type checks use the hardlink target.
1339          */
1340         if (cluster == NULL) {
1341                 error = ENOENT;
1342                 goto done;
1343         }
1344         ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
1345         type = ripdata->meta.type;
1346         if (type == HAMMER2_OBJTYPE_HARDLINK) {
1347                 hlink = 1;
1348                 type = ripdata->meta.target_type;
1349         }
1350
1351         if (type == HAMMER2_OBJTYPE_DIRECTORY && isdir == 0) {
1352                 error = ENOTDIR;
1353                 goto done;
1354         }
1355         if (type != HAMMER2_OBJTYPE_DIRECTORY && isdir >= 1) {
1356                 error = EISDIR;
1357                 goto done;
1358         }
1359
1360         /*
1361          * Hardlink must be resolved.  We can't hold the parent locked
1362          * while we do this or we could deadlock.  The physical file will
1363          * be located at or above the current directory.
1364          *
1365          * We loop to reacquire the hardlink origination.
1366          *
1367          * NOTE: hammer2_hardlink_find() will locate the hardlink target,
1368          *       returning a modified hparent and hcluster.
1369          */
1370         if (ripdata->meta.type == HAMMER2_OBJTYPE_HARDLINK) {
1371                 if (hcluster == NULL) {
1372                         hcluster = cluster;
1373                         cluster = NULL; /* safety */
1374                         hammer2_cluster_unlock(cparent);
1375                         hammer2_cluster_drop(cparent);
1376                         cparent = NULL; /* safety */
1377                         ripdata = NULL; /* safety (associated w/cparent) */
1378                         error = hammer2_hardlink_find(dip, &hparent, &hcluster);
1379
1380                         /*
1381                          * If we couldn't find the hardlink target then some
1382                          * parent directory containing the hardlink pointer
1383                          * probably got renamed to above the original target,
1384                          * a case not yet handled by H2.
1385                          */
1386                         if (error) {
1387                                 kprintf("H2 unlink_file: hardlink target for "
1388                                         "\"%s\" not found\n",
1389                                         name);
1390                                 kprintf("(likely due to known directory "
1391                                         "rename bug)\n");
1392                                 goto done;
1393                         }
1394                         goto again;
1395                 }
1396         }
1397
1398         /*
1399          * If this is a directory the directory must be empty.  However, if
1400          * isdir < 0 we are doing a rename and the directory does not have
1401          * to be empty, and if isdir > 1 we are deleting a PFS/snapshot
1402          * and the directory does not have to be empty.
1403          *
1404          * NOTE: We check the full key range here which covers both visible
1405          *       and invisible entries.  Theoretically there should be no
1406          *       invisible (hardlink target) entries if there are no visible
1407          *       entries.
1408          */
1409         if (type == HAMMER2_OBJTYPE_DIRECTORY && isdir == 1) {
1410                 dparent = hammer2_cluster_lookup_init(cluster, 0);
1411                 dcluster = hammer2_cluster_lookup(dparent, &key_dummy,
1412                                                   0, (hammer2_key_t)-1,
1413                                                   HAMMER2_LOOKUP_NODATA);
1414                 if (dcluster) {
1415                         hammer2_cluster_unlock(dcluster);
1416                         hammer2_cluster_drop(dcluster);
1417                         hammer2_cluster_lookup_done(dparent);
1418                         error = ENOTEMPTY;
1419                         goto done;
1420                 }
1421                 hammer2_cluster_lookup_done(dparent);
1422                 dparent = NULL;
1423                 /* dcluster NULL */
1424         }
1425
1426         /*
1427          * If this was a hardlink then (cparent, cluster) is the hardlink
1428          * pointer, which we can simply destroy outright.  Discard the
1429          * clusters and replace with the hardlink target.
1430          */
1431         if (hcluster) {
1432                 hammer2_cluster_delete(trans, cparent, cluster,
1433                                        HAMMER2_DELETE_PERMANENT);
1434                 hammer2_cluster_unlock(cparent);
1435                 hammer2_cluster_drop(cparent);
1436                 hammer2_cluster_unlock(cluster);
1437                 hammer2_cluster_drop(cluster);
1438                 cparent = hparent;
1439                 cluster = hcluster;
1440                 hparent = NULL;
1441                 hcluster = NULL;
1442         }
1443
1444         /*
1445          * This leaves us with the hardlink target or non-hardlinked file
1446          * or directory in (cparent, cluster).
1447          *
1448          * Delete the target when nlinks reaches 0 with special handling
1449          * to avoid I/O (to avoid actually updating the inode) for the 1->0
1450          * transition, if possible.  This optimization makes rm -rf very
1451          * fast.
1452          *
1453          * NOTE! In DragonFly the vnops function calls cache_unlink() after
1454          *       calling us here to clean out the namecache association,
1455          *       (which does not represent a ref for the open-test), and to
1456          *       force finalization of the vnode if/when the last ref gets
1457          *       dropped.
1458          *
1459          * NOTE! Files are unlinked by rename and then relinked.  nch will be
1460          *       passed as NULL in this situation.  hammer2_inode_connect()
1461          *       will bump nlinks.
1462          */
1463         KKASSERT(cluster != NULL);
1464
1465         /*
1466          * Note: nlinks is negative when decrementing, positive when
1467          *       incrementing.
1468          */
1469         ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
1470         last_link = (ripdata->meta.nlinks + nlinks == 0);
1471
1472         if (last_link) {
1473                 /*
1474                  * Target nlinks has reached 0, file now unlinked (but may
1475                  * still be open).
1476                  *
1477                  * nlinks will be -1 for a normal remove().  If this is the
1478                  * last link we must flag the inode on deactivation. XXX race ?
1479                  */
1480                 hammer2_inode_t *ip;
1481
1482                 if (nlinks == -1) {
1483                         ip = hammer2_inode_lookup(trans->pmp,
1484                                                   ripdata->meta.inum);
1485                         if (ip) {
1486                                 atomic_set_int(&ip->flags,
1487                                                HAMMER2_INODE_ISUNLINKED);
1488                                 hammer2_inode_drop(ip);
1489                         }
1490                 }
1491
1492                 if (nch && cache_isopen(nch)) {
1493                         /*
1494                          * If an unlinked file is still open we must update
1495                          * the inodes link count.
1496                          */
1497                         hammer2_cluster_modify(trans, cluster, 0);
1498                         wipdata = &hammer2_cluster_wdata(cluster)->ipdata;
1499                         ripdata = wipdata;
1500                         wipdata->meta.nlinks += nlinks;
1501                         /* XXX race */
1502                         /* XXX debugging */
1503                         if ((int64_t)wipdata->meta.nlinks < 0) {
1504                                 wipdata->meta.nlinks = 0;
1505                         }
1506                         hammer2_cluster_modsync(cluster);
1507                         hammer2_inode_move_to_hidden(trans, &cparent, &cluster,
1508                                                      wipdata->meta.inum);
1509                 } else {
1510                         /*
1511                          * This won't get everything if a vnode is still
1512                          * present, but the cache_unlink() call the caller
1513                          * makes will.
1514                          */
1515                         hammer2_cluster_delete(trans, cparent, cluster,
1516                                                HAMMER2_DELETE_PERMANENT);
1517                 }
1518         } else if (hlink == 0) {
1519                 /*
1520                  * In this situation a normal non-hardlinked file (which can
1521                  * only have nlinks == 1) still has a non-zero nlinks, the
1522                  * caller must be doing a RENAME operation and so is passing
1523                  * a nlinks adjustment of 0, and only wishes to remove file
1524                  * in order to be able to reconnect it under a different name.
1525                  *
1526                  * In this situation we do a temporary deletion of the
1527                  * chain in order to allow the file to be reconnected in
1528                  * a different location.
1529                  */
1530                 KKASSERT(nlinks == 0);
1531                 hammer2_cluster_delete(trans, cparent, cluster, 0);
1532         } else {
1533                 /*
1534                  * Links remain, must update the inode link count.
1535                  */
1536                 hammer2_cluster_modify(trans, cluster, 0);
1537                 wipdata = &hammer2_cluster_wdata(cluster)->ipdata;
1538                 ripdata = wipdata;
1539                 wipdata->meta.nlinks += nlinks;
1540                 /* XXX debugging */
1541                 if ((int64_t)wipdata->meta.nlinks < 0) {
1542                         wipdata->meta.nlinks = 0;
1543                 }
1544                 hammer2_cluster_modsync(cluster);
1545         }
1546
1547         error = 0;
1548 done:
1549         if (cparent) {
1550                 hammer2_cluster_unlock(cparent);
1551                 hammer2_cluster_drop(cparent);
1552         }
1553         if (cluster) {
1554                 hammer2_cluster_unlock(cluster);
1555                 hammer2_cluster_drop(cluster);
1556         }
1557         if (hparent) {
1558                 hammer2_cluster_unlock(hparent);
1559                 hammer2_cluster_drop(hparent);
1560         }
1561         if (hcluster) {
1562                 hammer2_cluster_unlock(hcluster);
1563                 hammer2_cluster_drop(hcluster);
1564         }
1565         if (hlinkp)
1566                 *hlinkp = hlink;
1567
1568         return error;
1569 }
1570
1571 /*
1572  * This is called from the mount code to initialize pmp->ihidden
1573  */
1574 void
1575 hammer2_inode_install_hidden(hammer2_pfs_t *pmp)
1576 {
1577         hammer2_trans_t trans;
1578         hammer2_cluster_t *cparent;
1579         hammer2_cluster_t *cluster;
1580         hammer2_cluster_t *scan;
1581         const hammer2_inode_data_t *ripdata;
1582         hammer2_inode_data_t *wipdata;
1583         hammer2_key_t key_dummy;
1584         hammer2_key_t key_next;
1585         int error;
1586         int count;
1587         int dip_check_algo;
1588         int dip_comp_algo;
1589
1590         if (pmp->ihidden)
1591                 return;
1592
1593         /*
1594          * Find the hidden directory
1595          */
1596         bzero(&key_dummy, sizeof(key_dummy));
1597         hammer2_trans_init(&trans, pmp, 0);
1598
1599         /*
1600          * Setup for lookup, retrieve iroot's check and compression
1601          * algorithm request which was likely generated by newfs_hammer2.
1602          *
1603          * The check/comp fields will probably never be used since inodes
1604          * are renamed into the hidden directory and not created relative to
1605          * the hidden directory, chain creation inherits from bref.methods,
1606          * and data chains inherit from their respective file inode *_algo
1607          * fields.
1608          */
1609         cparent = hammer2_inode_lock(pmp->iroot, HAMMER2_RESOLVE_ALWAYS);
1610         ripdata = &hammer2_cluster_rdata(cparent)->ipdata;
1611         dip_check_algo = ripdata->meta.check_algo;
1612         dip_comp_algo = ripdata->meta.comp_algo;
1613         ripdata = NULL;
1614
1615         cluster = hammer2_cluster_lookup(cparent, &key_dummy,
1616                                          HAMMER2_INODE_HIDDENDIR,
1617                                          HAMMER2_INODE_HIDDENDIR,
1618                                          0);
1619         if (cluster) {
1620                 pmp->ihidden = hammer2_inode_get(pmp, pmp->iroot, cluster);
1621                 hammer2_inode_ref(pmp->ihidden);
1622
1623                 /*
1624                  * Remove any unlinked files which were left open as-of
1625                  * any system crash.
1626                  *
1627                  * Don't pass NODATA, we need the inode data so the delete
1628                  * can do proper statistics updates.
1629                  */
1630                 count = 0;
1631                 scan = hammer2_cluster_lookup(cluster, &key_next,
1632                                               0, HAMMER2_TID_MAX, 0);
1633                 while (scan) {
1634                         if (hammer2_cluster_type(scan) ==
1635                             HAMMER2_BREF_TYPE_INODE) {
1636                                 hammer2_cluster_delete(&trans, cluster, scan,
1637                                                    HAMMER2_DELETE_PERMANENT);
1638                                 ++count;
1639                         }
1640                         scan = hammer2_cluster_next(cluster, scan, &key_next,
1641                                                     0, HAMMER2_TID_MAX, 0);
1642                 }
1643
1644                 hammer2_inode_unlock(pmp->ihidden, cluster);
1645                 hammer2_inode_unlock(pmp->iroot, cparent);
1646                 hammer2_trans_done(&trans);
1647                 kprintf("hammer2: PFS loaded hidden dir, "
1648                         "removed %d dead entries\n", count);
1649                 return;
1650         }
1651
1652         /*
1653          * Create the hidden directory
1654          */
1655         error = hammer2_cluster_create(&trans, cparent, &cluster,
1656                                        HAMMER2_INODE_HIDDENDIR, 0,
1657                                        HAMMER2_BREF_TYPE_INODE,
1658                                        HAMMER2_INODE_BYTES,
1659                                        0);
1660         hammer2_inode_unlock(pmp->iroot, cparent);
1661
1662         hammer2_cluster_modify(&trans, cluster, 0);
1663         wipdata = &hammer2_cluster_wdata(cluster)->ipdata;
1664         wipdata->meta.type = HAMMER2_OBJTYPE_DIRECTORY;
1665         wipdata->meta.inum = HAMMER2_INODE_HIDDENDIR;
1666         wipdata->meta.nlinks = 1;
1667         wipdata->meta.comp_algo = dip_comp_algo;
1668         wipdata->meta.check_algo = dip_check_algo;
1669         hammer2_cluster_modsync(cluster);
1670         kprintf("hammer2: PFS root missing hidden directory, creating\n");
1671
1672         pmp->ihidden = hammer2_inode_get(pmp, pmp->iroot, cluster);
1673         hammer2_inode_ref(pmp->ihidden);
1674         hammer2_inode_unlock(pmp->ihidden, cluster);
1675         hammer2_trans_done(&trans);
1676 }
1677
1678 /*
1679  * If an open file is unlinked H2 needs to retain the file in the topology
1680  * to ensure that its backing store is not recovered by the bulk free scan.
1681  * This also allows us to avoid having to special-case the CHAIN_DELETED flag.
1682  *
1683  * To do this the file is moved to a hidden directory in the PFS root and
1684  * renamed.  The hidden directory must be created if it does not exist.
1685  */
1686 static
1687 void
1688 hammer2_inode_move_to_hidden(hammer2_trans_t *trans,
1689                              hammer2_cluster_t **cparentp,
1690                              hammer2_cluster_t **clusterp,
1691                              hammer2_tid_t inum)
1692 {
1693         hammer2_cluster_t *dcluster;
1694         hammer2_pfs_t *pmp;
1695         int error;
1696
1697         pmp = (*clusterp)->pmp;
1698         KKASSERT(pmp != NULL);
1699         KKASSERT(pmp->ihidden != NULL);
1700
1701         hammer2_cluster_delete(trans, *cparentp, *clusterp, 0);
1702         dcluster = hammer2_inode_lock(pmp->ihidden, HAMMER2_RESOLVE_ALWAYS);
1703         error = hammer2_inode_connect(trans,
1704                                       NULL/*XXX*/, clusterp, 0,
1705                                       pmp->ihidden, dcluster,
1706                                       NULL, 0, inum);
1707         hammer2_inode_unlock(pmp->ihidden, dcluster);
1708         KKASSERT(error == 0);
1709 }
1710
1711 /*
1712  * Given an exclusively locked inode and cluster we consolidate the cluster
1713  * for hardlink creation, adding (nlinks) to the file's link count and
1714  * potentially relocating the inode to (cdip) which is a parent directory
1715  * common to both the current location of the inode and the intended new
1716  * hardlink.
1717  *
1718  * Replaces (*clusterp) if consolidation occurred, unlocking the old cluster
1719  * and returning a new locked cluster.
1720  *
1721  * NOTE!  This function will also replace ip->cluster.
1722  */
1723 int
1724 hammer2_hardlink_consolidate(hammer2_trans_t *trans,
1725                              hammer2_inode_t *ip,
1726                              hammer2_cluster_t **clusterp,
1727                              hammer2_inode_t *cdip,
1728                              hammer2_cluster_t *cdcluster,
1729                              int nlinks)
1730 {
1731         const hammer2_inode_data_t *ripdata;
1732         hammer2_inode_data_t *wipdata;
1733         hammer2_cluster_t *cluster;
1734         hammer2_cluster_t *cparent;
1735         int error;
1736
1737         cluster = *clusterp;
1738         ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
1739         if (nlinks == 0 &&                      /* no hardlink needed */
1740             (ripdata->meta.name_key & HAMMER2_DIRHASH_VISIBLE)) {
1741                 return (0);
1742         }
1743
1744         if (hammer2_hardlink_enable == 0) {     /* disallow hardlinks */
1745                 hammer2_cluster_unlock(cluster);
1746                 hammer2_cluster_drop(cluster);
1747                 *clusterp = NULL;
1748                 return (ENOTSUP);
1749         }
1750
1751         cparent = NULL;
1752
1753         /*
1754          * If no change in the hardlink's target directory is required and
1755          * this is already a hardlink target, all we need to do is adjust
1756          * the link count.
1757          */
1758         ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
1759         if (cdip == ip->pip &&
1760             (ripdata->meta.name_key & HAMMER2_DIRHASH_VISIBLE) == 0) {
1761                 if (nlinks) {
1762                         hammer2_cluster_modify(trans, cluster, 0);
1763                         wipdata = &hammer2_cluster_wdata(cluster)->ipdata;
1764                         wipdata->meta.nlinks += nlinks;
1765                         hammer2_cluster_modsync(cluster);
1766                         ripdata = wipdata;
1767                 }
1768                 error = 0;
1769                 goto done;
1770         }
1771
1772         /*
1773          * Cluster is the real inode.  The originating directory is locked
1774          * by the caller so we can manipulate it without worrying about races
1775          * against other lookups.
1776          *
1777          * If cluster is visible we need to delete it from the current
1778          * location and create a hardlink pointer in its place.  If it is
1779          * not visible we need only delete it.  Then later cluster will be
1780          * renamed to a parent directory and converted (if necessary) to
1781          * a hidden inode (via shiftup).
1782          *
1783          * NOTE! We must hold cparent locked through the delete/create/rename
1784          *       operation to ensure that other threads block resolving to
1785          *       the same hardlink, otherwise the other threads may not see
1786          *       the hardlink.
1787          */
1788         KKASSERT((cluster->focus->flags & HAMMER2_CHAIN_DELETED) == 0);
1789         cparent = hammer2_cluster_parent(cluster);
1790
1791         hammer2_cluster_delete(trans, cparent, cluster, 0);
1792
1793         ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
1794         KKASSERT(ripdata->meta.type != HAMMER2_OBJTYPE_HARDLINK);
1795         if (ripdata->meta.name_key & HAMMER2_DIRHASH_VISIBLE) {
1796                 hammer2_cluster_t *ncluster;
1797                 hammer2_key_t lhc;
1798
1799                 ncluster = NULL;
1800                 lhc = cluster->focus->bref.key;
1801                 error = hammer2_cluster_create(trans, cparent, &ncluster,
1802                                              lhc, 0,
1803                                              HAMMER2_BREF_TYPE_INODE,
1804                                              HAMMER2_INODE_BYTES,
1805                                              0);
1806                 hammer2_cluster_modify(trans, ncluster, 0);
1807                 wipdata = &hammer2_cluster_wdata(ncluster)->ipdata;
1808
1809                 /* wipdata->comp_algo = ripdata->comp_algo; */
1810                 wipdata->meta.comp_algo = 0;
1811                 wipdata->meta.check_algo = 0;
1812                 wipdata->meta.version = HAMMER2_INODE_VERSION_ONE;
1813                 wipdata->meta.inum = ripdata->meta.inum;
1814                 wipdata->meta.target_type = ripdata->meta.type;
1815                 wipdata->meta.type = HAMMER2_OBJTYPE_HARDLINK;
1816                 wipdata->meta.uflags = 0;
1817                 wipdata->meta.rmajor = 0;
1818                 wipdata->meta.rminor = 0;
1819                 wipdata->meta.ctime = 0;
1820                 wipdata->meta.mtime = 0;
1821                 wipdata->meta.atime = 0;
1822                 wipdata->meta.btime = 0;
1823                 bzero(&wipdata->meta.uid, sizeof(wipdata->meta.uid));
1824                 bzero(&wipdata->meta.gid, sizeof(wipdata->meta.gid));
1825                 wipdata->meta.op_flags = HAMMER2_OPFLAG_DIRECTDATA;
1826                 wipdata->meta.cap_flags = 0;
1827                 wipdata->meta.mode = 0;
1828                 wipdata->meta.size = 0;
1829                 wipdata->meta.nlinks = 1;
1830                 wipdata->meta.iparent = 0;      /* XXX */
1831                 wipdata->meta.pfs_type = 0;
1832                 wipdata->meta.pfs_inum = 0;
1833                 bzero(&wipdata->meta.pfs_clid, sizeof(wipdata->meta.pfs_clid));
1834                 bzero(&wipdata->meta.pfs_fsid, sizeof(wipdata->meta.pfs_fsid));
1835                 wipdata->meta.data_quota = 0;
1836                 /* wipdata->data_count = 0; */
1837                 wipdata->meta.inode_quota = 0;
1838                 /* wipdata->inode_count = 0; */
1839                 wipdata->meta.attr_tid = 0;
1840                 wipdata->meta.dirent_tid = 0;
1841                 bzero(&wipdata->u, sizeof(wipdata->u));
1842                 bcopy(ripdata->filename, wipdata->filename,
1843                       ripdata->meta.name_len);
1844                 wipdata->meta.name_key = ncluster->focus->bref.key;
1845                 wipdata->meta.name_len = ripdata->meta.name_len;
1846                 /* XXX transaction ids */
1847                 hammer2_cluster_modsync(ncluster);
1848                 hammer2_cluster_unlock(ncluster);
1849                 hammer2_cluster_drop(ncluster);
1850         }
1851         ripdata = wipdata;
1852
1853         /*
1854          * cluster represents the hardlink target and is now flagged deleted.
1855          * duplicate it to the parent directory and adjust nlinks.
1856          *
1857          * WARNING! The shiftup() call can cause ncluster to be moved into
1858          *          an indirect block, and our ncluster will wind up pointing
1859          *          to the older/original version.
1860          */
1861         KKASSERT(cluster->focus->flags & HAMMER2_CHAIN_DELETED);
1862         hammer2_hardlink_shiftup(trans, cluster, ip, cdip, cdcluster,
1863                                  nlinks, &error);
1864
1865         if (error == 0)
1866                 hammer2_inode_repoint(ip, cdip, cluster);
1867
1868 done:
1869         /*
1870          * Cleanup, cluster/ncluster already dealt with.
1871          *
1872          * Return the shifted cluster in *clusterp.
1873          */
1874         if (cparent) {
1875                 hammer2_cluster_unlock(cparent);
1876                 hammer2_cluster_drop(cparent);
1877         }
1878         *clusterp = cluster;
1879
1880         return (error);
1881 }
1882
1883 /*
1884  * If (*ochainp) is non-NULL it points to the forward OBJTYPE_HARDLINK
1885  * inode while (*chainp) points to the resolved (hidden hardlink
1886  * target) inode.  In this situation when nlinks is 1 we wish to
1887  * deconsolidate the hardlink, moving it back to the directory that now
1888  * represents the only remaining link.
1889  */
1890 int
1891 hammer2_hardlink_deconsolidate(hammer2_trans_t *trans,
1892                                hammer2_inode_t *dip,
1893                                hammer2_chain_t **chainp,
1894                                hammer2_chain_t **ochainp)
1895 {
1896         if (*ochainp == NULL)
1897                 return (0);
1898         /* XXX */
1899         return (0);
1900 }
1901
1902 /*
1903  * The caller presents a locked cluster with an obj_type of
1904  * HAMMER2_OBJTYPE_HARDLINK in (*clusterp).  This routine will locate
1905  * the inode and replace (*clusterp) with a new locked cluster containing
1906  * the target hardlink, also locked.  The original cluster will be
1907  * unlocked and released.
1908  *
1909  * If cparentp is not NULL a locked cluster representing the hardlink's
1910  * parent is also returned.
1911  *
1912  * If we are unable to locate the hardlink target EIO is returned,
1913  * (*cparentp) is set to NULL, the original passed-in (*clusterp)
1914  * will be unlocked and released and (*clusterp) will be set to NULL
1915  * as well.
1916  */
1917 int
1918 hammer2_hardlink_find(hammer2_inode_t *dip,
1919                       hammer2_cluster_t **cparentp,
1920                       hammer2_cluster_t **clusterp)
1921 {
1922         const hammer2_inode_data_t *ipdata;
1923         hammer2_cluster_t *cluster;
1924         hammer2_cluster_t *cparent;
1925         hammer2_cluster_t *rcluster;
1926         hammer2_inode_t *ip;
1927         hammer2_inode_t *pip;
1928         hammer2_key_t key_dummy;
1929         hammer2_key_t lhc;
1930
1931         cluster = *clusterp;
1932         pip = dip;
1933         hammer2_inode_ref(pip);         /* for loop */
1934
1935         /*
1936          * Locate the hardlink.  pip is referenced and not locked.
1937          * Unlock and release (*clusterp) after extracting the needed
1938          * data.
1939          */
1940         ipdata = &hammer2_cluster_rdata(cluster)->ipdata;
1941         lhc = ipdata->meta.inum;
1942         ipdata = NULL;                  /* safety */
1943         hammer2_cluster_unlock(cluster);
1944         hammer2_cluster_drop(cluster);
1945         *clusterp = NULL;               /* safety */
1946
1947         rcluster = NULL;
1948         cparent = NULL;
1949
1950         while ((ip = pip) != NULL) {
1951                 cparent = hammer2_inode_lock(ip, HAMMER2_RESOLVE_ALWAYS);
1952                 hammer2_inode_drop(ip);                 /* loop */
1953                 KKASSERT(hammer2_cluster_type(cparent) ==
1954                          HAMMER2_BREF_TYPE_INODE);
1955                 rcluster = hammer2_cluster_lookup(cparent, &key_dummy,
1956                                              lhc, lhc, 0);
1957                 if (rcluster)
1958                         break;
1959                 hammer2_cluster_lookup_done(cparent);   /* discard parent */
1960                 cparent = NULL;                         /* safety */
1961                 pip = ip->pip;          /* safe, ip held locked */
1962                 if (pip)
1963                         hammer2_inode_ref(pip);         /* loop */
1964                 hammer2_inode_unlock(ip, NULL);
1965         }
1966
1967         /*
1968          * chain is locked, ip is locked.  Unlock ip, return the locked
1969          * chain.  *ipp is already set w/a ref count and not locked.
1970          *
1971          * (cparent is already unlocked).
1972          */
1973         *clusterp = rcluster;
1974         if (rcluster) {
1975                 if (cparentp) {
1976                         *cparentp = cparent;
1977                         hammer2_inode_unlock(ip, NULL);
1978                 } else {
1979                         hammer2_inode_unlock(ip, cparent);
1980                 }
1981                 return (0);
1982         } else {
1983                 if (cparentp)
1984                         *cparentp = NULL;
1985                 if (ip)
1986                         hammer2_inode_unlock(ip, cparent);
1987                 return (EIO);
1988         }
1989 }
1990
1991 /*
1992  * Find the directory common to both fdip and tdip.
1993  *
1994  * Returns a held but not locked inode.  Caller typically locks the inode,
1995  * and when through unlocks AND drops it.
1996  */
1997 hammer2_inode_t *
1998 hammer2_inode_common_parent(hammer2_inode_t *fdip, hammer2_inode_t *tdip)
1999 {
2000         hammer2_inode_t *scan1;
2001         hammer2_inode_t *scan2;
2002
2003         /*
2004          * We used to have a depth field but it complicated matters too
2005          * much for directory renames.  So now its ugly.  Check for
2006          * simple cases before giving up and doing it the expensive way.
2007          *
2008          * XXX need a bottom-up topology stability lock
2009          */
2010         if (fdip == tdip || fdip == tdip->pip) {
2011                 hammer2_inode_ref(fdip);
2012                 return(fdip);
2013         }
2014         if (fdip->pip == tdip) {
2015                 hammer2_inode_ref(tdip);
2016                 return(tdip);
2017         }
2018
2019         /*
2020          * XXX not MPSAFE
2021          */
2022         for (scan1 = fdip; scan1->pmp == fdip->pmp; scan1 = scan1->pip) {
2023                 scan2 = tdip;
2024                 while (scan2->pmp == tdip->pmp) {
2025                         if (scan1 == scan2) {
2026                                 hammer2_inode_ref(scan1);
2027                                 return(scan1);
2028                         }
2029                         scan2 = scan2->pip;
2030                         if (scan2 == NULL)
2031                                 break;
2032                 }
2033         }
2034         panic("hammer2_inode_common_parent: no common parent %p %p\n",
2035               fdip, tdip);
2036         /* NOT REACHED */
2037         return(NULL);
2038 }
2039
2040 /*
2041  * Synchronize the inode's frontend state with the chain state prior
2042  * to any explicit flush of the inode or any strategy write call.
2043  *
2044  * Called with a locked inode.
2045  */
2046 void
2047 hammer2_inode_fsync(hammer2_trans_t *trans, hammer2_inode_t *ip, 
2048                     hammer2_cluster_t *cparent)
2049 {
2050         const hammer2_inode_data_t *ripdata;
2051         hammer2_inode_data_t *wipdata;
2052         hammer2_cluster_t *dparent;
2053         hammer2_cluster_t *cluster;
2054         hammer2_key_t lbase;
2055         hammer2_key_t key_next;
2056         int dosync = 0;
2057
2058         ripdata = &hammer2_cluster_rdata(cparent)->ipdata;    /* target file */
2059
2060         if (ip->flags & HAMMER2_INODE_MTIME) {
2061                 wipdata = hammer2_cluster_modify_ip(trans, ip, cparent, 0);
2062                 atomic_clear_int(&ip->flags, HAMMER2_INODE_MTIME);
2063                 wipdata->meta.mtime = ip->meta.mtime;
2064                 dosync = 1;
2065                 ripdata = wipdata;
2066         }
2067         if ((ip->flags & HAMMER2_INODE_RESIZED) &&
2068             ip->meta.size < ripdata->meta.size) {
2069                 wipdata = hammer2_cluster_modify_ip(trans, ip, cparent, 0);
2070                 wipdata->meta.size = ip->meta.size;
2071                 dosync = 1;
2072                 ripdata = wipdata;
2073                 atomic_clear_int(&ip->flags, HAMMER2_INODE_RESIZED);
2074
2075                 /*
2076                  * We must delete any chains beyond the EOF.  The chain
2077                  * straddling the EOF will be pending in the bioq.
2078                  */
2079                 lbase = (ripdata->meta.size + HAMMER2_PBUFMASK64) &
2080                         ~HAMMER2_PBUFMASK64;
2081                 dparent = hammer2_cluster_lookup_init(&ip->cluster, 0);
2082                 cluster = hammer2_cluster_lookup(dparent, &key_next,
2083                                                  lbase, (hammer2_key_t)-1,
2084                                                  HAMMER2_LOOKUP_NODATA);
2085                 while (cluster) {
2086                         /*
2087                          * Degenerate embedded case, nothing to loop on
2088                          */
2089                         switch (hammer2_cluster_type(cluster)) {
2090                         case HAMMER2_BREF_TYPE_INODE:
2091                                 hammer2_cluster_unlock(cluster);
2092                                 hammer2_cluster_drop(cluster);
2093                                 cluster = NULL;
2094                                 break;
2095                         case HAMMER2_BREF_TYPE_DATA:
2096                                 hammer2_cluster_delete(trans, dparent, cluster,
2097                                                    HAMMER2_DELETE_PERMANENT);
2098                                 /* fall through */
2099                         default:
2100                                 cluster = hammer2_cluster_next(dparent, cluster,
2101                                                    &key_next,
2102                                                    key_next, (hammer2_key_t)-1,
2103                                                    HAMMER2_LOOKUP_NODATA);
2104                                 break;
2105                         }
2106                 }
2107                 hammer2_cluster_lookup_done(dparent);
2108         } else
2109         if ((ip->flags & HAMMER2_INODE_RESIZED) &&
2110             ip->meta.size > ripdata->meta.size) {
2111                 wipdata = hammer2_cluster_modify_ip(trans, ip, cparent, 0);
2112                 wipdata->meta.size = ip->meta.size;
2113                 atomic_clear_int(&ip->flags, HAMMER2_INODE_RESIZED);
2114
2115                 /*
2116                  * When resizing larger we may not have any direct-data
2117                  * available.
2118                  */
2119                 if ((wipdata->meta.op_flags & HAMMER2_OPFLAG_DIRECTDATA) &&
2120                     ip->meta.size > HAMMER2_EMBEDDED_BYTES) {
2121                         wipdata->meta.op_flags &= ~HAMMER2_OPFLAG_DIRECTDATA;
2122                         bzero(&wipdata->u.blockset,
2123                               sizeof(wipdata->u.blockset));
2124                 }
2125                 dosync = 1;
2126                 ripdata = wipdata;
2127         }
2128         if (dosync)
2129                 hammer2_cluster_modsync(cparent);
2130 }