2 * Copyright (c) 2011-2014 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@dragonflybsd.org>
6 * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
18 * 3. Neither the name of The DragonFly Project nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific, prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 #include <sys/cdefs.h>
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/types.h>
46 RB_GENERATE2(hammer2_inode_tree, hammer2_inode, rbnode, hammer2_inode_cmp,
47 hammer2_tid_t, meta.inum);
50 hammer2_inode_cmp(hammer2_inode_t *ip1, hammer2_inode_t *ip2)
52 if (ip1->meta.inum < ip2->meta.inum)
54 if (ip1->meta.inum > ip2->meta.inum)
62 * HAMMER2 offers shared and exclusive locks on inodes. Pass a mask of
65 * - pass HAMMER2_RESOLVE_SHARED if a shared lock is desired. The
66 * inode locking function will automatically set the RDONLY flag.
68 * - pass HAMMER2_RESOLVE_ALWAYS if you need the inode's meta-data.
69 * Most front-end inode locks do.
71 * - pass HAMMER2_RESOLVE_NEVER if you do not want to require that
72 * the inode data be resolved. This is used by the syncthr because
73 * it can run on an unresolved/out-of-sync cluster, and also by the
74 * vnode reclamation code to avoid unnecessary I/O (particularly when
75 * disposing of hundreds of thousands of cached vnodes).
77 * The inode locking function locks the inode itself, resolves any stale
78 * chains in the inode's cluster, and allocates a fresh copy of the
79 * cluster with 1 ref and all the underlying chains locked.
81 * ip->cluster will be stable while the inode is locked.
83 * NOTE: We don't combine the inode/chain lock because putting away an
84 * inode would otherwise confuse multiple lock holders of the inode.
86 * NOTE: In-memory inodes always point to hardlink targets (the actual file),
87 * and never point to a hardlink pointer.
89 * NOTE: If caller passes HAMMER2_RESOLVE_RDONLY the exclusive locking code
90 * will feel free to reduce the chain set in the cluster as an
91 * optimization. It will still be validated against the quorum if
92 * appropriate, but the optimization might be able to reduce data
93 * accesses to one node. This flag is automatically set if the inode
94 * is locked with HAMMER2_RESOLVE_SHARED.
97 hammer2_inode_lock(hammer2_inode_t *ip, int how)
99 hammer2_inode_ref(ip);
102 * Inode structure mutex
104 if (how & HAMMER2_RESOLVE_SHARED) {
105 /*how |= HAMMER2_RESOLVE_RDONLY; not used */
106 hammer2_mtx_sh(&ip->lock);
108 hammer2_mtx_ex(&ip->lock);
113 * Create a locked copy of ip->cluster. Note that the copy will have a
114 * ref on the cluster AND its chains and we don't want a second ref to
115 * either when we lock it.
117 * Exclusive inode locks set the template focus chain in (ip)
118 * as a hint. Cluster locks can ALWAYS replace the focus in the
119 * working copy if the hint does not work out, so beware.
122 hammer2_inode_cluster(hammer2_inode_t *ip, int how)
124 hammer2_cluster_t *cluster;
126 cluster = hammer2_cluster_copy(&ip->cluster);
127 hammer2_cluster_lock(cluster, how);
128 hammer2_cluster_resolve(cluster);
131 * cluster->focus will be set if resolving RESOLVE_ALWAYS, but
132 * only update the cached focus in the inode structure when taking
133 * out an exclusive lock.
135 if ((how & HAMMER2_RESOLVE_SHARED) == 0)
136 ip->cluster.focus = cluster->focus;
142 * Select a chain out of an inode's cluster and lock it.
144 * The inode does not have to be locked.
147 hammer2_inode_chain(hammer2_inode_t *ip, int clindex, int how)
149 hammer2_chain_t *chain;
151 hammer2_spin_sh(&ip->cluster_spin);
152 if (clindex >= ip->cluster.nchains)
155 chain = ip->cluster.array[clindex].chain;
157 hammer2_chain_ref(chain);
158 hammer2_spin_unsh(&ip->cluster_spin);
159 hammer2_chain_lock(chain, how);
161 hammer2_spin_unsh(&ip->cluster_spin);
167 hammer2_inode_chain_and_parent(hammer2_inode_t *ip, int clindex,
168 hammer2_chain_t **parentp, int how)
170 hammer2_chain_t *chain;
171 hammer2_chain_t *parent;
174 hammer2_spin_sh(&ip->cluster_spin);
175 if (clindex >= ip->cluster.nchains)
178 chain = ip->cluster.array[clindex].chain;
180 hammer2_chain_ref(chain);
181 hammer2_spin_unsh(&ip->cluster_spin);
182 hammer2_chain_lock(chain, how);
184 hammer2_spin_unsh(&ip->cluster_spin);
188 * Get parent, lock order must be (parent, chain).
190 parent = chain->parent;
191 hammer2_chain_ref(parent);
192 hammer2_chain_unlock(chain);
193 hammer2_chain_lock(parent, how);
194 hammer2_chain_lock(chain, how);
195 if (ip->cluster.array[clindex].chain == chain &&
196 chain->parent == parent) {
203 hammer2_chain_unlock(chain);
204 hammer2_chain_drop(chain);
205 hammer2_chain_unlock(parent);
206 hammer2_chain_drop(parent);
214 hammer2_inode_unlock(hammer2_inode_t *ip)
216 hammer2_mtx_unlock(&ip->lock);
217 hammer2_inode_drop(ip);
221 * Temporarily release a lock held shared or exclusive. Caller must
222 * hold the lock shared or exclusive on call and lock will be released
225 * Restore a lock that was temporarily released.
228 hammer2_inode_lock_temp_release(hammer2_inode_t *ip)
230 return hammer2_mtx_temp_release(&ip->lock);
234 hammer2_inode_lock_temp_restore(hammer2_inode_t *ip, hammer2_mtx_state_t ostate)
236 hammer2_mtx_temp_restore(&ip->lock, ostate);
240 * Upgrade a shared inode lock to exclusive and return. If the inode lock
241 * is already held exclusively this is a NOP.
243 * The caller MUST hold the inode lock either shared or exclusive on call
244 * and will own the lock exclusively on return.
246 * Returns non-zero if the lock was already exclusive prior to the upgrade.
249 hammer2_inode_lock_upgrade(hammer2_inode_t *ip)
253 if (mtx_islocked_ex(&ip->lock)) {
256 hammer2_mtx_unlock(&ip->lock);
257 hammer2_mtx_ex(&ip->lock);
264 * Downgrade an inode lock from exclusive to shared only if the inode
265 * lock was previously shared. If the inode lock was previously exclusive,
269 hammer2_inode_lock_downgrade(hammer2_inode_t *ip, int wasexclusive)
271 if (wasexclusive == 0)
272 mtx_downgrade(&ip->lock);
276 * Lookup an inode by inode number
279 hammer2_inode_lookup(hammer2_pfs_t *pmp, hammer2_tid_t inum)
287 hammer2_spin_ex(&pmp->inum_spin);
288 ip = RB_LOOKUP(hammer2_inode_tree, &pmp->inum_tree, inum);
290 hammer2_inode_ref(ip);
291 hammer2_spin_unex(&pmp->inum_spin);
297 * Adding a ref to an inode is only legal if the inode already has at least
300 * (can be called with spinlock held)
303 hammer2_inode_ref(hammer2_inode_t *ip)
305 atomic_add_int(&ip->refs, 1);
309 * Drop an inode reference, freeing the inode when the last reference goes
313 hammer2_inode_drop(hammer2_inode_t *ip)
316 hammer2_inode_t *pip;
324 * Transition to zero, must interlock with
325 * the inode inumber lookup tree (if applicable).
326 * It should not be possible for anyone to race
327 * the transition to 0.
331 hammer2_spin_ex(&pmp->inum_spin);
333 if (atomic_cmpset_int(&ip->refs, 1, 0)) {
334 KKASSERT(hammer2_mtx_refs(&ip->lock) == 0);
335 if (ip->flags & HAMMER2_INODE_ONRBTREE) {
336 atomic_clear_int(&ip->flags,
337 HAMMER2_INODE_ONRBTREE);
338 RB_REMOVE(hammer2_inode_tree,
339 &pmp->inum_tree, ip);
341 hammer2_spin_unex(&pmp->inum_spin);
348 * Cleaning out ip->cluster isn't entirely
351 hammer2_inode_repoint(ip, NULL, NULL);
354 * We have to drop pip (if non-NULL) to
355 * dispose of our implied reference from
356 * ip->pip. We can simply loop on it.
358 kfree(ip, pmp->minode);
359 atomic_add_long(&pmp->inmem_inodes, -1);
361 /* continue with pip (can be NULL) */
363 hammer2_spin_unex(&ip->pmp->inum_spin);
367 * Non zero transition
369 if (atomic_cmpset_int(&ip->refs, refs, refs - 1))
376 * Get the vnode associated with the given inode, allocating the vnode if
377 * necessary. The vnode will be returned exclusively locked.
379 * The caller must lock the inode (shared or exclusive).
381 * Great care must be taken to avoid deadlocks and vnode acquisition/reclaim
385 hammer2_igetv(hammer2_inode_t *ip, int *errorp)
391 KKASSERT(pmp != NULL);
396 * Attempt to reuse an existing vnode assignment. It is
397 * possible to race a reclaim so the vget() may fail. The
398 * inode must be unlocked during the vget() to avoid a
399 * deadlock against a reclaim.
406 * Inode must be unlocked during the vget() to avoid
407 * possible deadlocks, but leave the ip ref intact.
409 * vnode is held to prevent destruction during the
410 * vget(). The vget() can still fail if we lost
411 * a reclaim race on the vnode.
413 hammer2_mtx_state_t ostate;
416 ostate = hammer2_inode_lock_temp_release(ip);
417 if (vget(vp, LK_EXCLUSIVE)) {
419 hammer2_inode_lock_temp_restore(ip, ostate);
422 hammer2_inode_lock_temp_restore(ip, ostate);
424 /* vp still locked and ref from vget */
426 kprintf("hammer2: igetv race %p/%p\n",
436 * No vnode exists, allocate a new vnode. Beware of
437 * allocation races. This function will return an
438 * exclusively locked and referenced vnode.
440 *errorp = getnewvnode(VT_HAMMER2, pmp->mp, &vp, 0, 0);
442 kprintf("hammer2: igetv getnewvnode failed %d\n",
449 * Lock the inode and check for an allocation race.
451 wasexclusive = hammer2_inode_lock_upgrade(ip);
452 if (ip->vp != NULL) {
455 hammer2_inode_lock_downgrade(ip, wasexclusive);
459 switch (ip->meta.type) {
460 case HAMMER2_OBJTYPE_DIRECTORY:
463 case HAMMER2_OBJTYPE_REGFILE:
465 vinitvmio(vp, ip->meta.size,
467 (int)ip->meta.size & HAMMER2_LBUFMASK);
469 case HAMMER2_OBJTYPE_SOFTLINK:
471 * XXX for now we are using the generic file_read
472 * and file_write code so we need a buffer cache
476 vinitvmio(vp, ip->meta.size,
478 (int)ip->meta.size & HAMMER2_LBUFMASK);
480 case HAMMER2_OBJTYPE_CDEV:
483 case HAMMER2_OBJTYPE_BDEV:
484 vp->v_ops = &pmp->mp->mnt_vn_spec_ops;
485 if (ip->meta.type != HAMMER2_OBJTYPE_CDEV)
491 case HAMMER2_OBJTYPE_FIFO:
493 vp->v_ops = &pmp->mp->mnt_vn_fifo_ops;
496 panic("hammer2: unhandled objtype %d",
501 if (ip == pmp->iroot)
502 vsetflags(vp, VROOT);
506 hammer2_inode_ref(ip); /* vp association */
507 hammer2_inode_lock_downgrade(ip, wasexclusive);
512 * Return non-NULL vp and *errorp == 0, or NULL vp and *errorp != 0.
514 if (hammer2_debug & 0x0002) {
515 kprintf("igetv vp %p refs 0x%08x aux 0x%08x\n",
516 vp, vp->v_refcnt, vp->v_auxrefs);
522 * Returns the inode associated with the passed-in cluster, creating the
523 * inode if necessary and synchronizing it to the passed-in cluster otherwise.
525 * The passed-in cluster must be locked and will remain locked on return.
526 * The returned inode will be locked and the caller may dispose of both
527 * via hammer2_inode_unlock() + hammer2_inode_drop(). However, if the caller
528 * needs to resolve a hardlink it must ref/unlock/relock/drop the inode.
530 * The hammer2_inode structure regulates the interface between the high level
531 * kernel VNOPS API and the filesystem backend (the chains).
533 * On return the inode is locked with the supplied cluster.
536 hammer2_inode_get(hammer2_pfs_t *pmp, hammer2_inode_t *dip,
537 hammer2_cluster_t *cluster)
539 hammer2_inode_t *nip;
540 const hammer2_inode_data_t *iptmp;
541 const hammer2_inode_data_t *nipdata;
543 KKASSERT(cluster == NULL ||
544 hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE);
548 * Interlocked lookup/ref of the inode. This code is only needed
549 * when looking up inodes with nlinks != 0 (TODO: optimize out
550 * otherwise and test for duplicates).
552 * Cluster can be NULL during the initial pfs allocation.
556 iptmp = &hammer2_cluster_rdata(cluster)->ipdata;
557 nip = hammer2_inode_lookup(pmp, iptmp->meta.inum);
561 hammer2_mtx_ex(&nip->lock);
564 * Handle SMP race (not applicable to the super-root spmp
565 * which can't index inodes due to duplicative inode numbers).
567 if (pmp->spmp_hmp == NULL &&
568 (nip->flags & HAMMER2_INODE_ONRBTREE) == 0) {
569 hammer2_mtx_unlock(&nip->lock);
570 hammer2_inode_drop(nip);
573 hammer2_inode_repoint(nip, NULL, cluster);
579 * We couldn't find the inode number, create a new inode.
581 nip = kmalloc(sizeof(*nip), pmp->minode, M_WAITOK | M_ZERO);
582 spin_init(&nip->cluster_spin, "h2clspin");
583 atomic_add_long(&pmp->inmem_inodes, 1);
584 hammer2_pfs_memory_inc(pmp);
585 hammer2_pfs_memory_wakeup(pmp);
587 nip->flags = HAMMER2_INODE_SROOT;
590 * Initialize nip's cluster. A cluster is provided for normal
591 * inodes but typically not for the super-root or PFS inodes.
593 nip->cluster.refs = 1;
594 nip->cluster.pmp = pmp;
595 nip->cluster.flags |= HAMMER2_CLUSTER_INODE;
597 nipdata = &hammer2_cluster_rdata(cluster)->ipdata;
598 nip->meta = nipdata->meta;
599 atomic_set_int(&nip->flags, HAMMER2_INODE_METAGOOD);
600 hammer2_inode_repoint(nip, NULL, cluster);
602 nip->meta.inum = 1; /* PFS inum is always 1 XXX */
603 /* mtime will be updated when a cluster is available */
604 atomic_set_int(&nip->flags, HAMMER2_INODE_METAGOOD);/*XXX*/
607 nip->pip = dip; /* can be NULL */
609 hammer2_inode_ref(dip); /* ref dip for nip->pip */
614 * ref and lock on nip gives it state compatible to after a
615 * hammer2_inode_lock() call.
618 hammer2_mtx_init(&nip->lock, "h2inode");
619 hammer2_mtx_ex(&nip->lock);
620 /* combination of thread lock and chain lock == inode lock */
623 * Attempt to add the inode. If it fails we raced another inode
624 * get. Undo all the work and try again.
626 if (pmp->spmp_hmp == NULL) {
627 hammer2_spin_ex(&pmp->inum_spin);
628 if (RB_INSERT(hammer2_inode_tree, &pmp->inum_tree, nip)) {
629 hammer2_spin_unex(&pmp->inum_spin);
630 hammer2_mtx_unlock(&nip->lock);
631 hammer2_inode_drop(nip);
634 atomic_set_int(&nip->flags, HAMMER2_INODE_ONRBTREE);
635 hammer2_spin_unex(&pmp->inum_spin);
642 * Create a new inode in the specified directory using the vattr to
643 * figure out the type of inode.
645 * If no error occurs the new inode with its cluster locked is returned in
646 * *nipp, otherwise an error is returned and *nipp is set to NULL.
648 * If vap and/or cred are NULL the related fields are not set and the
649 * inode type defaults to a directory. This is used when creating PFSs
650 * under the super-root, so the inode number is set to 1 in this case.
652 * dip is not locked on entry.
654 * NOTE: When used to create a snapshot, the inode is temporarily associated
655 * with the super-root spmp. XXX should pass new pmp for snapshot.
658 hammer2_inode_create(hammer2_inode_t *dip,
659 struct vattr *vap, struct ucred *cred,
660 const uint8_t *name, size_t name_len, hammer2_key_t lhc,
661 hammer2_key_t inum, uint8_t type, uint8_t target_type,
662 int flags, int *errorp)
664 hammer2_xop_create_t *xop;
665 hammer2_inode_t *nip;
671 uint8_t dip_comp_algo;
672 uint8_t dip_check_algo;
675 lhc = hammer2_dirhash(name, name_len);
680 * Locate the inode or indirect block to create the new
681 * entry in. At the same time check for key collisions
682 * and iterate until we don't get one.
684 * NOTE: hidden inodes do not have iterators.
686 * Lock the directory exclusively for now to guarantee that
687 * we can find an unused lhc for the name. Due to collisions,
688 * two different creates can end up with the same lhc so we
689 * cannot depend on the OS to prevent the collision.
691 hammer2_inode_lock(dip, 0);
693 dip_uid = dip->meta.uid;
694 dip_gid = dip->meta.gid;
695 dip_mode = dip->meta.mode;
696 dip_comp_algo = dip->meta.comp_algo;
697 dip_check_algo = dip->meta.check_algo;
700 * If name specified, locate an unused key in the collision space.
701 * Otherwise use the passed-in lhc directly.
704 hammer2_xop_scanlhc_t *sxop;
705 hammer2_key_t lhcbase;
708 sxop = &hammer2_xop_alloc(dip)->xop_scanlhc;
710 hammer2_xop_start(&sxop->head, hammer2_xop_scanlhc);
711 while ((error = hammer2_xop_collect(&sxop->head, 0)) == 0) {
712 if (lhc != sxop->head.cluster.focus->bref.key)
716 hammer2_xop_retire(&sxop->head, HAMMER2_XOPMASK_VOP);
724 if ((lhcbase ^ lhc) & ~HAMMER2_DIRHASH_LOMASK) {
731 * Create the inode with the lhc as the key.
733 xop = &hammer2_xop_alloc(dip)->xop_create;
736 bzero(&xop->meta, sizeof(xop->meta));
739 xop->meta.type = hammer2_get_obj_type(vap->va_type);
741 switch (xop->meta.type) {
742 case HAMMER2_OBJTYPE_CDEV:
743 case HAMMER2_OBJTYPE_BDEV:
744 xop->meta.rmajor = vap->va_rmajor;
745 xop->meta.rminor = vap->va_rminor;
750 type = xop->meta.type;
752 xop->meta.type = type;
753 xop->meta.target_type = target_type;
755 xop->meta.inum = inum;
757 /* Inherit parent's inode compression mode. */
758 xop->meta.comp_algo = dip_comp_algo;
759 xop->meta.check_algo = dip_check_algo;
760 xop->meta.version = HAMMER2_INODE_VERSION_ONE;
761 hammer2_update_time(&xop->meta.ctime);
762 xop->meta.mtime = xop->meta.ctime;
764 xop->meta.mode = vap->va_mode;
765 xop->meta.nlinks = 1;
767 if (dip && dip->pmp) {
768 xuid = hammer2_to_unix_xid(&dip_uid);
769 xuid = vop_helper_create_uid(dip->pmp->mp,
775 /* super-root has no dip and/or pmp */
778 if (vap->va_vaflags & VA_UID_UUID_VALID)
779 xop->meta.uid = vap->va_uid_uuid;
780 else if (vap->va_uid != (uid_t)VNOVAL)
781 hammer2_guid_to_uuid(&xop->meta.uid, vap->va_uid);
783 hammer2_guid_to_uuid(&xop->meta.uid, xuid);
785 if (vap->va_vaflags & VA_GID_UUID_VALID)
786 xop->meta.gid = vap->va_gid_uuid;
787 else if (vap->va_gid != (gid_t)VNOVAL)
788 hammer2_guid_to_uuid(&xop->meta.gid, vap->va_gid);
790 xop->meta.gid = dip_gid;
794 * Regular files and softlinks allow a small amount of data to be
795 * directly embedded in the inode. This flag will be cleared if
796 * the size is extended past the embedded limit.
798 if (xop->meta.type == HAMMER2_OBJTYPE_REGFILE ||
799 xop->meta.type == HAMMER2_OBJTYPE_SOFTLINK ||
800 xop->meta.type == HAMMER2_OBJTYPE_HARDLINK) {
801 xop->meta.op_flags |= HAMMER2_OPFLAG_DIRECTDATA;
804 hammer2_xop_setname(&xop->head, name, name_len);
805 xop->meta.name_len = name_len;
806 xop->meta.name_key = lhc;
807 KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
809 hammer2_xop_start(&xop->head, hammer2_inode_xop_create);
811 error = hammer2_xop_collect(&xop->head, 0);
813 kprintf("CREATE INODE %*.*s\n",
814 (int)name_len, (int)name_len, name);
823 * Set up the new inode if not a hardlink pointer.
825 * NOTE: *_get() integrates chain's lock into the inode lock.
827 * NOTE: Only one new inode can currently be created per
828 * transaction. If the need arises we can adjust
829 * hammer2_trans_init() to allow more.
831 * NOTE: nipdata will have chain's blockset data.
833 if (type != HAMMER2_OBJTYPE_HARDLINK) {
834 nip = hammer2_inode_get(dip->pmp, dip, &xop->head.cluster);
835 nip->comp_heuristic = 0;
841 hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
843 hammer2_inode_unlock(dip);
849 * Connect the disconnected inode (ip) to the directory (dip) with the
850 * specified (name, name_len). If name is NULL, (lhc) will be used as
851 * the directory key and the inode's embedded name will not be modified
852 * for future recovery purposes.
854 * dip and ip must both be locked exclusively (dip in particular to avoid
858 hammer2_inode_connect(hammer2_inode_t *dip, hammer2_inode_t *ip,
859 const char *name, size_t name_len,
862 hammer2_xop_scanlhc_t *sxop;
863 hammer2_xop_connect_t *xop;
864 hammer2_inode_t *opip;
865 hammer2_key_t lhcbase;
869 * Calculate the lhc and resolve the collision space.
872 lhc = lhcbase = hammer2_dirhash(name, name_len);
873 sxop = &hammer2_xop_alloc(dip)->xop_scanlhc;
875 hammer2_xop_start(&sxop->head, hammer2_xop_scanlhc);
876 while ((error = hammer2_xop_collect(&sxop->head, 0)) == 0) {
877 if (lhc != sxop->head.cluster.focus->bref.key)
881 hammer2_xop_retire(&sxop->head, HAMMER2_XOPMASK_VOP);
889 if ((lhcbase ^ lhc) & ~HAMMER2_DIRHASH_LOMASK) {
898 * Formally reconnect the in-memory structure. ip must
899 * be locked exclusively to safely change ip->pip.
901 if (ip->pip != dip) {
902 hammer2_inode_ref(dip);
906 hammer2_inode_drop(opip);
912 xop = &hammer2_xop_alloc(dip)->xop_connect;
914 hammer2_xop_setname(&xop->head, name, name_len);
915 hammer2_xop_setip2(&xop->head, ip);
917 hammer2_xop_start(&xop->head, hammer2_inode_xop_connect);
918 error = hammer2_xop_collect(&xop->head, 0);
919 hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
922 * On success make the same adjustments to ip->meta or the
923 * next flush may blow up the chain.
926 hammer2_inode_modify(ip);
927 ip->meta.name_key = lhc;
929 ip->meta.name_len = name_len;
936 * Repoint ip->cluster's chains to cluster's chains and fixup the default
937 * focus. Only valid elements are repointed. Invalid elements have to be
938 * adjusted by the appropriate slave sync threads.
940 * Caller must hold the inode and cluster exclusive locked, if not NULL,
941 * must also be locked.
943 * Cluster may be NULL to clean out any chains in ip->cluster.
946 hammer2_inode_repoint(hammer2_inode_t *ip, hammer2_inode_t *pip,
947 hammer2_cluster_t *cluster)
949 hammer2_chain_t *dropch[HAMMER2_MAXCLUSTER];
950 hammer2_chain_t *ochain;
951 hammer2_chain_t *nchain;
952 hammer2_inode_t *opip;
955 bzero(dropch, sizeof(dropch));
958 * Replace chains in ip->cluster with chains from cluster and
959 * adjust the focus if necessary.
961 * NOTE: nchain and/or ochain can be NULL due to gaps
962 * in the cluster arrays.
964 hammer2_spin_ex(&ip->cluster_spin);
965 for (i = 0; cluster && i < cluster->nchains; ++i) {
967 * Do not replace invalid elements as this might race
968 * syncthr replacements.
970 if (cluster->array[i].flags & HAMMER2_CITEM_INVALID)
974 * Do not replace elements which are the same. Also handle
975 * element count discrepancies.
977 nchain = cluster->array[i].chain;
978 if (i < ip->cluster.nchains) {
979 ochain = ip->cluster.array[i].chain;
980 if (ochain == nchain)
989 ip->cluster.array[i].chain = nchain;
990 ip->cluster.array[i].flags &= ~HAMMER2_CITEM_INVALID;
991 ip->cluster.array[i].flags |= cluster->array[i].flags &
992 HAMMER2_CITEM_INVALID;
994 hammer2_chain_ref(nchain);
999 * Release any left-over chains in ip->cluster.
1001 while (i < ip->cluster.nchains) {
1002 nchain = ip->cluster.array[i].chain;
1004 ip->cluster.array[i].chain = NULL;
1005 ip->cluster.array[i].flags |= HAMMER2_CITEM_INVALID;
1012 * Fixup fields. Note that the inode-embedded cluster is never
1016 ip->cluster.nchains = cluster->nchains;
1017 ip->cluster.focus = cluster->focus;
1018 ip->cluster.flags = cluster->flags & ~HAMMER2_CLUSTER_LOCKED;
1020 ip->cluster.nchains = 0;
1021 ip->cluster.focus = NULL;
1022 ip->cluster.flags &= ~HAMMER2_CLUSTER_ZFLAGS;
1026 * Repoint ip->pip if requested (non-NULL pip).
1028 if (pip && ip->pip != pip) {
1030 hammer2_inode_ref(pip);
1035 hammer2_spin_unex(&ip->cluster_spin);
1038 * Cleanup outside of spinlock
1042 hammer2_chain_drop(dropch[i]);
1045 hammer2_inode_drop(opip);
1049 * Repoint a single element from the cluster to the ip. Used by the
1050 * synchronization threads to piecemeal update inodes. Does not change
1051 * focus and requires inode to be re-locked to clean-up flags (XXX).
1054 hammer2_inode_repoint_one(hammer2_inode_t *ip, hammer2_cluster_t *cluster,
1057 hammer2_chain_t *ochain;
1058 hammer2_chain_t *nchain;
1061 hammer2_spin_ex(&ip->cluster_spin);
1062 KKASSERT(idx < cluster->nchains);
1063 if (idx < ip->cluster.nchains) {
1064 ochain = ip->cluster.array[idx].chain;
1065 nchain = cluster->array[idx].chain;
1068 nchain = cluster->array[idx].chain;
1069 ip->cluster.nchains = idx + 1;
1070 for (i = ip->cluster.nchains; i <= idx; ++i) {
1071 bzero(&ip->cluster.array[i],
1072 sizeof(ip->cluster.array[i]));
1073 ip->cluster.array[i].flags |= HAMMER2_CITEM_INVALID;
1076 if (ochain != nchain) {
1080 ip->cluster.array[idx].chain = nchain;
1081 ip->cluster.array[idx].flags &= ~HAMMER2_CITEM_INVALID;
1082 ip->cluster.array[idx].flags |= cluster->array[idx].flags &
1083 HAMMER2_CITEM_INVALID;
1085 hammer2_spin_unex(&ip->cluster_spin);
1086 if (ochain != nchain) {
1088 hammer2_chain_ref(nchain);
1090 hammer2_chain_drop(ochain);
1095 * Called with a locked inode to finish unlinking an inode after xop_unlink
1096 * had been run. This function is responsible for decrementing nlinks and
1097 * moving deleted inodes to the hidden directory if they are still open.
1099 * We don't bother decrementing nlinks if the file is not open and this was
1102 * If the inode is a hardlink target it's chain has not yet been deleted,
1103 * otherwise it's chain has been deleted.
1105 * If isopen then any prior deletion was not permanent and the inode must
1106 * be moved to the hidden directory.
1109 hammer2_inode_unlink_finisher(hammer2_inode_t *ip, int isopen)
1117 * Decrement nlinks. If this is the last link and the file is
1118 * not open, the chain has already been removed and we don't bother
1119 * dirtying the inode.
1121 if (ip->meta.nlinks == 1) {
1122 atomic_set_int(&ip->flags, HAMMER2_INODE_ISUNLINKED);
1127 hammer2_inode_modify(ip);
1129 if ((int64_t)ip->meta.nlinks < 0)
1130 ip->meta.nlinks = 0; /* safety */
1133 * If nlinks is not zero we are done. However, this should only be
1134 * possible with a hardlink target. If the inode is an embedded
1135 * hardlink nlinks should have dropped to zero, warn and proceed
1136 * with the next step.
1138 if (ip->meta.nlinks) {
1139 if ((ip->meta.name_key & HAMMER2_DIRHASH_VISIBLE) == 0)
1141 kprintf("hammer2_inode_unlink: nlinks was not 0 (%jd)\n",
1142 (intmax_t)ip->meta.nlinks);
1147 * nlinks is now zero, the inode should have already been deleted.
1148 * If the file is open it was deleted non-permanently and must be
1149 * moved to the hidden directory.
1151 * When moving to the hidden directory we force the name_key to the
1152 * inode number to avoid collisions.
1155 hammer2_inode_lock(pmp->ihidden, 0);
1156 error = hammer2_inode_connect(pmp->ihidden, ip,
1157 NULL, 0, ip->meta.inum);
1158 hammer2_inode_unlock(pmp->ihidden);
1166 * This is called from the mount code to initialize pmp->ihidden
1169 hammer2_inode_install_hidden(hammer2_pfs_t *pmp)
1176 hammer2_trans_init(pmp, 0);
1177 hammer2_inode_lock(pmp->iroot, 0);
1180 * Find the hidden directory
1183 hammer2_xop_lookup_t *xop;
1185 xop = &hammer2_xop_alloc(pmp->iroot)->xop_lookup;
1186 xop->lhc = HAMMER2_INODE_HIDDENDIR;
1187 hammer2_xop_start(&xop->head, hammer2_xop_lookup);
1188 error = hammer2_xop_collect(&xop->head, 0);
1192 * Found the hidden directory
1194 kprintf("PFS FOUND HIDDEN DIR\n");
1195 pmp->ihidden = hammer2_inode_get(pmp, pmp->iroot,
1196 &xop->head.cluster);
1197 hammer2_inode_ref(pmp->ihidden);
1198 hammer2_inode_unlock(pmp->ihidden);
1200 hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
1204 * Create the hidden directory if it could not be found.
1206 if (error == ENOENT) {
1207 kprintf("PFS CREATE HIDDEN DIR\n");
1209 pmp->ihidden = hammer2_inode_create(pmp->iroot, NULL, NULL,
1211 /* lhc */ HAMMER2_INODE_HIDDENDIR,
1212 /* inum */ HAMMER2_INODE_HIDDENDIR,
1213 /* type */ HAMMER2_OBJTYPE_DIRECTORY,
1214 /* target_type */ 0,
1218 hammer2_inode_ref(pmp->ihidden);
1219 hammer2_inode_unlock(pmp->ihidden);
1222 kprintf("PFS CREATE ERROR %d\n", error);
1226 * Scan the hidden directory on-mount and destroy its contents
1229 hammer2_xop_unlinkall_t *xop;
1231 hammer2_inode_lock(pmp->ihidden, 0);
1232 xop = &hammer2_xop_alloc(pmp->ihidden)->xop_unlinkall;
1233 xop->key_beg = HAMMER2_KEY_MIN;
1234 xop->key_end = HAMMER2_KEY_MAX;
1235 hammer2_xop_start(&xop->head, hammer2_inode_xop_unlinkall);
1237 while ((error = hammer2_xop_collect(&xop->head, 0)) == 0) {
1240 hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
1241 hammer2_inode_unlock(pmp->ihidden);
1244 hammer2_inode_unlock(pmp->iroot);
1245 hammer2_trans_done(pmp);
1249 * Find the directory common to both fdip and tdip.
1251 * Returns a held but not locked inode. Caller typically locks the inode,
1252 * and when through unlocks AND drops it.
1255 hammer2_inode_common_parent(hammer2_inode_t *fdip, hammer2_inode_t *tdip)
1257 hammer2_inode_t *scan1;
1258 hammer2_inode_t *scan2;
1261 * We used to have a depth field but it complicated matters too
1262 * much for directory renames. So now its ugly. Check for
1263 * simple cases before giving up and doing it the expensive way.
1265 * XXX need a bottom-up topology stability lock
1267 if (fdip == tdip || fdip == tdip->pip) {
1268 hammer2_inode_ref(fdip);
1271 if (fdip->pip == tdip) {
1272 hammer2_inode_ref(tdip);
1279 for (scan1 = fdip; scan1->pmp == fdip->pmp; scan1 = scan1->pip) {
1281 while (scan2->pmp == tdip->pmp) {
1282 if (scan1 == scan2) {
1283 hammer2_inode_ref(scan1);
1291 panic("hammer2_inode_common_parent: no common parent %p %p\n",
1298 * Set an inode's cluster modified, marking the related chains RW and
1299 * duplicating them if necessary.
1301 * The passed-in chain is a localized copy of the chain previously acquired
1302 * when the inode was locked (and possilby replaced in the mean time), and
1303 * must also be updated. In fact, we update it first and then synchronize
1304 * the inode's cluster cache.
1307 hammer2_inode_modify(hammer2_inode_t *ip)
1309 atomic_set_int(&ip->flags, HAMMER2_INODE_MODIFIED);
1311 vsetisdirty(ip->vp);
1315 * Synchronize the inode's frontend state with the chain state prior
1316 * to any explicit flush of the inode or any strategy write call.
1318 * Called with a locked inode inside a transaction.
1321 hammer2_inode_fsync(hammer2_inode_t *ip)
1323 if (ip->flags & (HAMMER2_INODE_RESIZED | HAMMER2_INODE_MODIFIED)) {
1324 hammer2_xop_fsync_t *xop;
1327 xop = &hammer2_xop_alloc(ip)->xop_fsync;
1328 xop->clear_directdata = 0;
1329 if (ip->flags & HAMMER2_INODE_RESIZED) {
1330 if ((ip->meta.op_flags & HAMMER2_OPFLAG_DIRECTDATA) &&
1331 ip->meta.size > HAMMER2_EMBEDDED_BYTES) {
1332 ip->meta.op_flags &= ~HAMMER2_OPFLAG_DIRECTDATA;
1333 xop->clear_directdata = 1;
1335 xop->osize = ip->osize;
1337 xop->osize = ip->meta.size; /* safety */
1339 xop->ipflags = ip->flags;
1340 xop->meta = ip->meta;
1342 atomic_clear_int(&ip->flags, HAMMER2_INODE_RESIZED |
1343 HAMMER2_INODE_MODIFIED);
1344 hammer2_xop_start(&xop->head, hammer2_inode_xop_fsync);
1345 error = hammer2_xop_collect(&xop->head, 0);
1346 hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
1347 if (error == ENOENT)
1350 kprintf("hammer2: unable to fsync inode %p\n", ip);
1352 atomic_set_int(&ip->flags,
1353 xop->ipflags & (HAMMER2_INODE_RESIZED |
1354 HAMMER2_INODE_MODIFIED));
1356 /* XXX return error somehow? */
1362 * This handles unlinked open files after the vnode is finally dereferenced.
1363 * To avoid deadlocks it cannot be called from the normal vnode recycling
1364 * path, so we call it (1) after a unlink, rmdir, or rename, (2) on every
1365 * flush, and (3) on umount.
1367 * Caller must be in a transaction.
1370 hammer2_inode_run_unlinkq(hammer2_pfs_t *pmp)
1372 hammer2_xop_destroy_t *xop;
1373 hammer2_inode_unlink_t *ipul;
1374 hammer2_inode_t *ip;
1377 if (TAILQ_EMPTY(&pmp->unlinkq))
1381 hammer2_spin_ex(&pmp->list_spin);
1382 while ((ipul = TAILQ_FIRST(&pmp->unlinkq)) != NULL) {
1383 TAILQ_REMOVE(&pmp->unlinkq, ipul, entry);
1384 hammer2_spin_unex(&pmp->list_spin);
1386 kfree(ipul, pmp->minode);
1388 hammer2_inode_lock(ip, 0);
1389 xop = &hammer2_xop_alloc(ip)->xop_destroy;
1390 hammer2_xop_start(&xop->head, hammer2_inode_xop_destroy);
1391 error = hammer2_xop_collect(&xop->head, 0);
1392 hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
1394 hammer2_inode_unlock(ip);
1395 hammer2_inode_drop(ip); /* ipul ref */
1397 hammer2_spin_ex(&pmp->list_spin);
1399 hammer2_spin_unex(&pmp->list_spin);
1404 * Inode create helper (threaded, backend)
1406 * Used by ncreate, nmknod, nsymlink, nmkdir.
1407 * Used by nlink and rename to create HARDLINK pointers.
1409 * Frontend holds the parent directory ip locked exclusively. We
1410 * create the inode and feed the exclusively locked chain to the
1414 hammer2_inode_xop_create(hammer2_xop_t *arg, int clindex)
1416 hammer2_xop_create_t *xop = &arg->xop_create;
1417 hammer2_chain_t *parent;
1418 hammer2_chain_t *chain;
1419 hammer2_key_t key_next;
1420 int cache_index = -1;
1424 parent = hammer2_inode_chain(xop->head.ip, clindex,
1425 HAMMER2_RESOLVE_ALWAYS);
1426 if (parent == NULL) {
1430 chain = hammer2_chain_lookup(&parent, &key_next,
1434 hammer2_chain_unlock(chain);
1439 error = hammer2_chain_create(&parent, &chain,
1442 HAMMER2_BREF_TYPE_INODE,
1443 HAMMER2_INODE_BYTES,
1446 hammer2_chain_modify(chain, 0);
1447 chain->data->ipdata.meta = xop->meta;
1448 if (xop->head.name) {
1449 bcopy(xop->head.name,
1450 chain->data->ipdata.filename,
1451 xop->head.name_len);
1452 chain->data->ipdata.meta.name_len = xop->head.name_len;
1454 chain->data->ipdata.meta.name_key = xop->lhc;
1456 hammer2_chain_unlock(chain);
1457 hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS |
1458 HAMMER2_RESOLVE_SHARED);
1461 hammer2_chain_unlock(parent);
1462 hammer2_chain_drop(parent);
1464 error = hammer2_xop_feed(&xop->head, chain, clindex, error);
1466 hammer2_chain_drop(chain);
1470 * Inode delete helper (backend, threaded)
1472 * Generally used by hammer2_run_unlinkq()
1475 hammer2_inode_xop_destroy(hammer2_xop_t *arg, int clindex)
1477 hammer2_xop_destroy_t *xop = &arg->xop_destroy;
1479 hammer2_chain_t *parent;
1480 hammer2_chain_t *chain;
1481 hammer2_inode_t *ip;
1485 * We need the precise parent chain to issue the deletion.
1491 parent = hammer2_inode_chain(ip, clindex, HAMMER2_RESOLVE_ALWAYS);
1493 hammer2_chain_getparent(&parent, HAMMER2_RESOLVE_ALWAYS);
1494 if (parent == NULL) {
1498 chain = hammer2_inode_chain(ip, clindex, HAMMER2_RESOLVE_ALWAYS);
1499 if (chain == NULL) {
1503 hammer2_chain_delete(parent, chain, 0);
1506 hammer2_xop_feed(&xop->head, NULL, clindex, error);
1508 hammer2_chain_unlock(parent);
1509 hammer2_chain_drop(parent);
1512 hammer2_chain_unlock(chain);
1513 hammer2_chain_drop(chain);
1518 hammer2_inode_xop_unlinkall(hammer2_xop_t *arg, int clindex)
1520 hammer2_xop_unlinkall_t *xop = &arg->xop_unlinkall;
1521 hammer2_chain_t *parent;
1522 hammer2_chain_t *chain;
1523 hammer2_key_t key_next;
1524 int cache_index = -1;
1527 * We need the precise parent chain to issue the deletion.
1529 parent = hammer2_inode_chain(xop->head.ip, clindex,
1530 HAMMER2_RESOLVE_ALWAYS);
1531 chain = hammer2_chain_lookup(&parent, &key_next,
1532 xop->key_beg, xop->key_end,
1534 HAMMER2_LOOKUP_ALWAYS);
1536 hammer2_chain_delete(parent, chain, HAMMER2_DELETE_PERMANENT);
1537 hammer2_chain_unlock(chain);
1538 hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS |
1539 HAMMER2_RESOLVE_SHARED);
1540 hammer2_xop_feed(&xop->head, chain, clindex, chain->error);
1541 chain = hammer2_chain_next(&parent, chain, &key_next,
1542 key_next, xop->key_end,
1544 HAMMER2_LOOKUP_ALWAYS |
1545 HAMMER2_LOOKUP_NOUNLOCK);
1547 hammer2_xop_feed(&xop->head, NULL, clindex, ENOENT);
1549 hammer2_chain_unlock(parent);
1550 hammer2_chain_drop(parent);
1553 hammer2_chain_unlock(chain);
1554 hammer2_chain_drop(chain);
1559 hammer2_inode_xop_connect(hammer2_xop_t *arg, int clindex)
1561 hammer2_xop_connect_t *xop = &arg->xop_connect;
1562 hammer2_inode_data_t *wipdata;
1563 hammer2_chain_t *parent;
1564 hammer2_chain_t *chain;
1566 hammer2_key_t key_dummy;
1567 int cache_index = -1;
1571 * Get directory, then issue a lookup to prime the parent chain
1572 * for the create. The lookup is expected to fail.
1574 pmp = xop->head.ip->pmp;
1575 parent = hammer2_inode_chain(xop->head.ip, clindex,
1576 HAMMER2_RESOLVE_ALWAYS);
1577 if (parent == NULL) {
1582 chain = hammer2_chain_lookup(&parent, &key_dummy,
1586 hammer2_chain_unlock(chain);
1587 hammer2_chain_drop(chain);
1594 * Adjust the filename in the inode, set the name key.
1596 * NOTE: Frontend must also adjust ip2->meta on success, we can't
1599 chain = hammer2_inode_chain(xop->head.ip2, clindex,
1600 HAMMER2_RESOLVE_ALWAYS);
1601 hammer2_chain_modify(chain, 0);
1602 wipdata = &chain->data->ipdata;
1604 hammer2_inode_modify(xop->head.ip2);
1605 if (xop->head.name) {
1606 bzero(wipdata->filename, sizeof(wipdata->filename));
1607 bcopy(xop->head.name, wipdata->filename, xop->head.name_len);
1608 wipdata->meta.name_len = xop->head.name_len;
1610 wipdata->meta.name_key = xop->lhc;
1613 * Reconnect the chain to the new parent directory
1615 error = hammer2_chain_create(&parent, &chain, pmp,
1617 HAMMER2_BREF_TYPE_INODE,
1618 HAMMER2_INODE_BYTES,
1625 hammer2_xop_feed(&xop->head, NULL, clindex, error);
1627 hammer2_chain_unlock(parent);
1628 hammer2_chain_drop(parent);
1631 hammer2_chain_unlock(chain);
1632 hammer2_chain_drop(chain);
1637 hammer2_inode_xop_fsync(hammer2_xop_t *arg, int clindex)
1639 hammer2_xop_fsync_t *xop = &arg->xop_fsync;
1640 hammer2_chain_t *parent;
1641 hammer2_chain_t *chain;
1644 parent = hammer2_inode_chain(xop->head.ip, clindex,
1645 HAMMER2_RESOLVE_ALWAYS);
1647 if (parent == NULL) {
1651 if (parent->error) {
1652 error = parent->error;
1658 if ((xop->ipflags & HAMMER2_INODE_RESIZED) == 0) {
1659 /* osize must be ignored */
1660 } else if (xop->meta.size < xop->osize) {
1662 * We must delete any chains beyond the EOF. The chain
1663 * straddling the EOF will be pending in the bioq.
1665 hammer2_key_t lbase;
1666 hammer2_key_t key_next;
1667 int cache_index = -1;
1669 lbase = (xop->meta.size + HAMMER2_PBUFMASK64) &
1670 ~HAMMER2_PBUFMASK64;
1671 chain = hammer2_chain_lookup(&parent, &key_next,
1672 lbase, HAMMER2_KEY_MAX,
1674 HAMMER2_LOOKUP_NODATA |
1675 HAMMER2_LOOKUP_NODIRECT);
1678 * Degenerate embedded case, nothing to loop on
1680 switch (chain->bref.type) {
1681 case HAMMER2_BREF_TYPE_INODE:
1684 case HAMMER2_BREF_TYPE_DATA:
1685 hammer2_chain_delete(parent, chain,
1686 HAMMER2_DELETE_PERMANENT);
1689 chain = hammer2_chain_next(&parent, chain, &key_next,
1690 key_next, HAMMER2_KEY_MAX,
1692 HAMMER2_LOOKUP_NODATA |
1693 HAMMER2_LOOKUP_NODIRECT);
1698 * Sync the inode meta-data, potentially clear the blockset area
1699 * of direct data so it can be used for blockrefs.
1701 hammer2_chain_modify(parent, 0);
1702 parent->data->ipdata.meta = xop->meta;
1703 if (xop->clear_directdata) {
1704 bzero(&parent->data->ipdata.u.blockset,
1705 sizeof(parent->data->ipdata.u.blockset));
1709 hammer2_chain_unlock(chain);
1710 hammer2_chain_drop(chain);
1713 hammer2_chain_unlock(parent);
1714 hammer2_chain_drop(parent);
1716 hammer2_xop_feed(&xop->head, NULL, clindex, error);