hammer2 - serialized flush work part 1
authorMatthew Dillon <dillon@apollo.backplane.com>
Thu, 31 Jan 2013 04:53:27 +0000 (20:53 -0800)
committerMatthew Dillon <dillon@apollo.backplane.com>
Thu, 31 Jan 2013 04:53:27 +0000 (20:53 -0800)
This is preliminary work required to support chain structure replication
for the purposes of recording modifications which are then separated by
serialization points (by transaction id).  Ultimately this will allow the
flush code to flush to an exact serialization point and in the process
operate completely asynchronously from any further modifications being
made on the frontend after that serialization point.

* Separate hammer2_inode from hammer2_chain.

* Split the locking APIs for inodes and chains into their own functions.

* Move ip_data into chain->data->ipdata (kmalloc'd), instead of embedding
  it in hammer2_inode.  This allows the inode structure to disconnect from
  the chain.

sys/vfs/hammer2/hammer2.h
sys/vfs/hammer2/hammer2_ccms.c
sys/vfs/hammer2/hammer2_ccms.h
sys/vfs/hammer2/hammer2_chain.c
sys/vfs/hammer2/hammer2_flush.c
sys/vfs/hammer2/hammer2_inode.c
sys/vfs/hammer2/hammer2_ioctl.c
sys/vfs/hammer2/hammer2_subr.c
sys/vfs/hammer2/hammer2_vfsops.c
sys/vfs/hammer2/hammer2_vnops.c

index 08450bb..9e74734 100644 (file)
@@ -78,9 +78,6 @@ struct hammer2_span;
 struct hammer2_state;
 struct hammer2_msg;
 
-struct hammer2_indblock;
-struct hammer2_data;
-
 /*
  * The chain structure tracks blockref recursions all the way to
  * the root volume.  These consist of indirect blocks, inodes,
@@ -117,10 +114,8 @@ struct hammer2_chain {
        RB_ENTRY(hammer2_chain) rbnode;
        TAILQ_ENTRY(hammer2_chain) flush_node;  /* flush deferral list */
        union {
-               struct hammer2_inode *ip;
-               struct hammer2_indblock *np;
-               struct hammer2_data *dp;
-               void *mem;
+               void *mem;                      /* generic */
+               struct hammer2_inode *ip;       /* high-level h2 structure */
        } u;
 
        struct buf      *bp;            /* buffer cache (ro) */
@@ -147,7 +142,7 @@ RB_PROTOTYPE(hammer2_chain_tree, hammer2_chain, rbnode, hammer2_chain_cmp);
  *        a block reallocation).
  */
 #define HAMMER2_CHAIN_MODIFIED         0x00000001      /* active mods */
-#define HAMMER2_CHAIN_DIRTYEMBED       0x00000002      /* inode embedded */
+#define HAMMER2_CHAIN_UNUSED0002       0x00000002
 #define HAMMER2_CHAIN_DIRTYBP          0x00000004      /* dirty on unlock */
 #define HAMMER2_CHAIN_SUBMODIFIED      0x00000008      /* 1+ subs modified */
 #define HAMMER2_CHAIN_DELETED          0x00000010      /* deleted chain */
@@ -241,37 +236,23 @@ RB_PROTOTYPE(hammer2_chain_tree, hammer2_chain, rbnode, hammer2_chain_cmp);
  *      is embedded in the chain (chain.cst) and aliased w/ attr_cst.
  */
 struct hammer2_inode {
+       ccms_cst_t              topo_cst;       /* directory topology cst */
        struct hammer2_mount    *hmp;           /* Global mount */
        struct hammer2_pfsmount *pmp;           /* PFS mount */
        struct hammer2_inode    *pip;           /* parent inode */
        struct vnode            *vp;
-       ccms_cst_t              topo_cst;       /* directory topology cst */
-       hammer2_chain_t         chain;
-       struct hammer2_inode_data ip_data;
+       hammer2_chain_t         *chain;
        struct lockf            advlock;
-       hammer2_off_t           delta_dcount;   /* adjust data_count */
-       hammer2_off_t           delta_icount;   /* adjust inode_count */
+       struct lock             lk;
+       u_int                   flags;
+       u_int                   refs;           /* +vpref, +flushref */
 };
 
 typedef struct hammer2_inode hammer2_inode_t;
 
-/*
- * A hammer2 indirect block
- */
-struct hammer2_indblock {
-       hammer2_chain_t         chain;
-};
-
-typedef struct hammer2_indblock hammer2_indblock_t;
-
-/*
- * A hammer2 data block
- */
-struct hammer2_data {
-       hammer2_chain_t         chain;
-};
-
-typedef struct hammer2_data hammer2_data_t;
+#define HAMMER2_INODE_MODIFIED         0x0001
+#define HAMMER2_INODE_DIRTYEMBED       0x0002
+#define HAMMER2_INODE_DELETED          0x0004
 
 /*
  * XXX
@@ -321,6 +302,7 @@ struct hammer2_pfsmount {
        struct hammer2_mount    *hmp;           /* device global mount */
        hammer2_chain_t         *rchain;        /* PFS root chain */
        hammer2_inode_t         *iroot;         /* PFS root inode */
+       hammer2_off_t           inode_count;    /* copy of inode_count */
        ccms_domain_t           ccms_dom;
        struct netexport        export;         /* nfs export */
        int                     ronly;          /* read-only mount */
@@ -382,20 +364,21 @@ extern long hammer2_ioa_volu_write;
 #define hammer2_icrc32c(buf, size, crc)        iscsi_crc32_ext((buf), (size), (crc))
 
 void hammer2_inode_lock_ex(hammer2_inode_t *ip);
-void hammer2_inode_unlock_ex(hammer2_inode_t *ip);
 void hammer2_inode_lock_sh(hammer2_inode_t *ip);
+void hammer2_inode_unlock_ex(hammer2_inode_t *ip);
 void hammer2_inode_unlock_sh(hammer2_inode_t *ip);
-void hammer2_inode_busy(hammer2_inode_t *ip);
-void hammer2_inode_unbusy(hammer2_inode_t *ip);
 void hammer2_voldata_lock(hammer2_mount_t *hmp);
 void hammer2_voldata_unlock(hammer2_mount_t *hmp);
+ccms_state_t hammer2_inode_lock_temp_release(hammer2_inode_t *ip);
+ccms_state_t hammer2_inode_lock_upgrade(hammer2_inode_t *ip);
+void hammer2_inode_lock_restore(hammer2_inode_t *ip, ccms_state_t ostate);
 
 void hammer2_mount_exlock(hammer2_mount_t *hmp);
 void hammer2_mount_shlock(hammer2_mount_t *hmp);
 void hammer2_mount_unlock(hammer2_mount_t *hmp);
 
-int hammer2_get_dtype(hammer2_inode_t *ip);
-int hammer2_get_vtype(hammer2_inode_t *ip);
+int hammer2_get_dtype(hammer2_chain_t *chain);
+int hammer2_get_vtype(hammer2_chain_t *chain);
 u_int8_t hammer2_get_obj_type(enum vtype vtype);
 void hammer2_time_to_timespec(u_int64_t xtime, struct timespec *ts);
 u_int64_t hammer2_timespec_to_time(struct timespec *ts);
@@ -416,7 +399,9 @@ struct vnode *hammer2_igetv(hammer2_inode_t *ip, int *errorp);
 
 void hammer2_inode_lock_nlinks(hammer2_inode_t *ip);
 void hammer2_inode_unlock_nlinks(hammer2_inode_t *ip);
-hammer2_inode_t *hammer2_inode_alloc(hammer2_pfsmount_t *pmp, void *data);
+hammer2_inode_t *hammer2_inode_get(hammer2_pfsmount_t *pmp,
+                       hammer2_inode_t *dip, hammer2_chain_t *chain);
+void hammer2_inode_put(hammer2_inode_t *ip);
 void hammer2_inode_free(hammer2_inode_t *ip);
 void hammer2_inode_ref(hammer2_inode_t *ip);
 void hammer2_inode_drop(hammer2_inode_t *ip);
@@ -440,9 +425,9 @@ int hammer2_unlink_file(hammer2_inode_t *dip,
                        int isdir, hammer2_inode_t *retain_ip);
 int hammer2_hardlink_consolidate(hammer2_inode_t **ipp, hammer2_inode_t *tdip);
 int hammer2_hardlink_deconsolidate(hammer2_inode_t *dip,
-                       hammer2_chain_t **chainp, hammer2_inode_t **ipp);
-int hammer2_hardlink_find(hammer2_inode_t *dip, hammer2_chain_t **chainp,
-                       hammer2_inode_t **ipp);
+                       hammer2_chain_t **chainp, hammer2_chain_t **ochainp);
+int hammer2_hardlink_find(hammer2_inode_t *dip,
+                       hammer2_chain_t **chainp, hammer2_chain_t **ochainp);
 
 /*
  * hammer2_chain.c
index 771d921..ab21d84 100644 (file)
@@ -256,6 +256,20 @@ ccms_thread_lock_nonblock(ccms_cst_t *cst, ccms_state_t state)
        return(0);
 }
 
+ccms_state_t
+ccms_thread_lock_temp_release(ccms_cst_t *cst)
+{
+       if (cst->count < 0) {
+               ccms_thread_unlock(cst);
+               return(CCMS_STATE_EXCLUSIVE);
+       }
+       if (cst->count > 0) {
+               ccms_thread_unlock(cst);
+               return(CCMS_STATE_SHARED);
+       }
+       return (CCMS_STATE_INVALID);
+}
+
 /*
  * Temporarily upgrade a thread lock for making local structural changes.
  * No new shared or exclusive locks can be acquired by others while we are
@@ -320,6 +334,9 @@ void
 ccms_thread_unlock(ccms_cst_t *cst)
 {
        if (cst->count < 0) {
+               /*
+                * Exclusive
+                */
                KKASSERT(cst->td == curthread);
                if (cst->count < -1) {
                        ++cst->count;
@@ -337,6 +354,9 @@ ccms_thread_unlock(ccms_cst_t *cst)
                }
                spin_unlock(&cst->spin);
        } else if (cst->count > 0) {
+               /*
+                * Shared
+                */
                spin_lock(&cst->spin);
                if (--cst->count == 0 && cst->blocked) {
                        cst->blocked = 0;
index 4e6eb07..0b010c9 100644 (file)
@@ -235,6 +235,7 @@ void ccms_cst_uninit(ccms_cst_t *cst);
 
 void ccms_thread_lock(ccms_cst_t *cst, ccms_state_t state);
 int ccms_thread_lock_nonblock(ccms_cst_t *cst, ccms_state_t state);
+ccms_state_t ccms_thread_lock_temp_release(ccms_cst_t *cst);
 ccms_state_t ccms_thread_lock_upgrade(ccms_cst_t *cst);
 void ccms_thread_lock_restore(ccms_cst_t *cst, ccms_state_t ostate);
 void ccms_thread_unlock(ccms_cst_t *cst);
index 6c3c9af..5b1d700 100644 (file)
@@ -115,6 +115,7 @@ hammer2_chain_parent_setsubmod(hammer2_mount_t *hmp, hammer2_chain_t *chain)
 /*
  * Allocate a new disconnected chain element representing the specified
  * bref.  The chain element is locked exclusively and refs is set to 1.
+ * Media data (data) and meta-structure (u) pointers are left NULL.
  *
  * This essentially allocates a system memory structure representing one
  * of the media structure types, including inodes.
@@ -123,9 +124,6 @@ hammer2_chain_t *
 hammer2_chain_alloc(hammer2_mount_t *hmp, hammer2_blockref_t *bref)
 {
        hammer2_chain_t *chain;
-       hammer2_inode_t *ip;
-       hammer2_indblock_t *np;
-       hammer2_data_t *dp;
        u_int bytes = 1U << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX);
 
        /*
@@ -133,23 +131,12 @@ hammer2_chain_alloc(hammer2_mount_t *hmp, hammer2_blockref_t *bref)
         */
        switch(bref->type) {
        case HAMMER2_BREF_TYPE_INODE:
-               ip = kmalloc(sizeof(*ip), hmp->minode, M_WAITOK | M_ZERO);
-               chain = &ip->chain;
-               chain->u.ip = ip;
-               ip->hmp = hmp;
-               break;
        case HAMMER2_BREF_TYPE_INDIRECT:
        case HAMMER2_BREF_TYPE_FREEMAP_ROOT:
        case HAMMER2_BREF_TYPE_FREEMAP_NODE:
-               np = kmalloc(sizeof(*np), hmp->mchain, M_WAITOK | M_ZERO);
-               chain = &np->chain;
-               chain->u.np = np;
-               break;
        case HAMMER2_BREF_TYPE_DATA:
        case HAMMER2_BREF_TYPE_FREEMAP_LEAF:
-               dp = kmalloc(sizeof(*dp), hmp->mchain, M_WAITOK | M_ZERO);
-               chain = &dp->chain;
-               chain->u.dp = dp;
+               chain = kmalloc(sizeof(*chain), hmp->mchain, M_WAITOK | M_ZERO);
                break;
        case HAMMER2_BREF_TYPE_VOLUME:
                chain = NULL;
@@ -193,20 +180,15 @@ hammer2_chain_alloc(hammer2_mount_t *hmp, hammer2_blockref_t *bref)
 static void
 hammer2_chain_dealloc(hammer2_mount_t *hmp, hammer2_chain_t *chain)
 {
-       hammer2_inode_t *ip;
        hammer2_chain_t *parent;
        hammer2_chain_t *child;
 
        KKASSERT(chain->refs == 0);
        KKASSERT(chain->flushing == 0);
+       KKASSERT(chain->u.mem == NULL);
        KKASSERT((chain->flags &
                  (HAMMER2_CHAIN_MOVED | HAMMER2_CHAIN_MODIFIED)) == 0);
 
-       if (chain->bref.type == HAMMER2_BREF_TYPE_INODE)
-               ip = chain->u.ip;
-       else
-               ip = NULL;
-
        /*
         * If the sub-tree is not empty all the elements on it must have
         * 0 refs and be deallocatable.
@@ -229,18 +211,9 @@ hammer2_chain_dealloc(hammer2_mount_t *hmp, hammer2_chain_t *chain)
                parent = chain->parent;
                RB_REMOVE(hammer2_chain_tree, &parent->rbhead, chain);
                atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE);
-               if (ip)
-                       ip->pip = NULL;
                chain->parent = NULL;
                spin_unlock(&chain->cst.spin);
        }
-
-       /*
-        * When cleaning out a hammer2_inode we must
-        * also clean out the related ccms_inode.
-        */
-       if (ip)
-               ccms_cst_uninit(&ip->topo_cst);
        hammer2_chain_free(hmp, chain);
 }
 
@@ -250,28 +223,31 @@ hammer2_chain_dealloc(hammer2_mount_t *hmp, hammer2_chain_t *chain)
 void
 hammer2_chain_free(hammer2_mount_t *hmp, hammer2_chain_t *chain)
 {
-       void *mem;
-
-       if (chain->bref.type == HAMMER2_BREF_TYPE_INODE ||
-           chain->bref.type == HAMMER2_BREF_TYPE_VOLUME) {
+       switch(chain->bref.type) {
+       case HAMMER2_BREF_TYPE_VOLUME:
                chain->data = NULL;
+               break;
+       case HAMMER2_BREF_TYPE_INODE:
+               if (chain->data) {
+                       kfree(chain->data, hmp->minode);
+                       chain->data = NULL;
+               }
+               break;
+       default:
+               KKASSERT(chain->data == NULL);
+               break;
        }
 
        KKASSERT(chain->bp == NULL);
-       KKASSERT(chain->data == NULL);
        KKASSERT(chain->bref.type != HAMMER2_BREF_TYPE_INODE ||
-                chain->u.ip->vp == NULL);
+                chain->u.ip == NULL);
+
        ccms_thread_unlock(&chain->cst);
        KKASSERT(chain->cst.count == 0);
        KKASSERT(chain->cst.upgrade == 0);
+       KKASSERT(chain->u.mem == NULL);
 
-       if ((mem = chain->u.mem) != NULL) {
-               chain->u.mem = NULL;
-               if (chain->bref.type == HAMMER2_BREF_TYPE_INODE)
-                       kfree(mem, hmp->minode);
-               else
-                       kfree(mem, hmp->mchain);
-       }
+       kfree(chain, hmp->mchain);
 }
 
 /*
@@ -625,8 +601,10 @@ hammer2_chain_lock(hammer2_mount_t *hmp, hammer2_chain_t *chain, int how)
                 * Copy data from bp to embedded buffer, do not retain the
                 * device buffer.
                 */
-               bcopy(bdata, &chain->u.ip->ip_data, chain->bytes);
-               chain->data = (void *)&chain->u.ip->ip_data;
+               KKASSERT(chain->bytes == sizeof(chain->data->ipdata));
+               chain->data = kmalloc(sizeof(chain->data->ipdata),
+                                     hmp->minode, M_WAITOK | M_ZERO);
+               bcopy(bdata, &chain->data->ipdata, chain->bytes);
                bqrelse(chain->bp);
                chain->bp = NULL;
                break;
@@ -681,7 +659,8 @@ hammer2_chain_unlock(hammer2_mount_t *hmp, hammer2_chain_t *chain)
        /*
         * Shortcut the case if the data is embedded or not resolved.
         *
-        * Do NOT null-out pointers to embedded data (e.g. inode).
+        * Do NOT NULL out chain->data (e.g. inode data), it might be
+        * dirty.
         *
         * The DIRTYBP flag is non-applicable in this situation and can
         * be cleared to keep the flags state clean.
@@ -863,7 +842,7 @@ hammer2_chain_resize(hammer2_inode_t *ip, hammer2_chain_t *chain,
        chain->bref.data_off = hammer2_freemap_alloc(hmp, chain->bref.type,
                                                     nbytes);
        chain->bytes = nbytes;
-       ip->delta_dcount += (ssize_t)(nbytes - obytes); /* XXX atomic */
+       /*ip->delta_dcount += (ssize_t)(nbytes - obytes);*/ /* XXX atomic */
 
        /*
         * The device buffer may be larger than the allocation size.
@@ -1146,7 +1125,6 @@ hammer2_chain_get(hammer2_mount_t *hmp, hammer2_chain_t *parent,
                  int index, int flags)
 {
        hammer2_blockref_t *bref;
-       hammer2_inode_t *ip;
        hammer2_chain_t *chain;
        hammer2_chain_t dummy;
        int how;
@@ -1258,6 +1236,7 @@ hammer2_chain_get(hammer2_mount_t *hmp, hammer2_chain_t *parent,
        atomic_add_int(&parent->refs, 1);       /* for red-black entry */
        ccms_thread_lock_restore(&parent->cst, ostate);
 
+#if 0
        /*
         * Additional linkage for inodes.  Reuse the parent pointer to
         * find the parent directory.
@@ -1275,6 +1254,7 @@ hammer2_chain_get(hammer2_mount_t *hmp, hammer2_chain_t *parent,
                        ccms_cst_init(&ip->topo_cst, &ip->chain);
                }
        }
+#endif
 
        /*
         * Our new chain structure has already been referenced and locked
@@ -1693,7 +1673,7 @@ again2:
  * The element may or may not have a data area associated with it:
  *
  *     VOLUME          not allowed here
- *     INODE           embedded data are will be set-up
+ *     INODE           kmalloc()'d data area is set up
  *     INDIRECT        not allowed here
  *     DATA            no data area will be set-up (caller is expected
  *                     to have logical buffers, we don't want to alias
@@ -1749,7 +1729,8 @@ hammer2_chain_create(hammer2_mount_t *hmp, hammer2_chain_t *parent,
                        break;
                case HAMMER2_BREF_TYPE_INODE:
                        KKASSERT(bytes == HAMMER2_INODE_BYTES);
-                       chain->data = (void *)&chain->u.ip->ip_data;
+                       chain->data = kmalloc(sizeof(chain->data->ipdata),
+                                             hmp->minode, M_WAITOK | M_ZERO);
                        break;
                case HAMMER2_BREF_TYPE_INDIRECT:
                        panic("hammer2_chain_create: cannot be used to"
@@ -1781,7 +1762,7 @@ again:
         */
        switch(parent->bref.type) {
        case HAMMER2_BREF_TYPE_INODE:
-               KKASSERT((parent->u.ip->ip_data.op_flags &
+               KKASSERT((parent->data->ipdata.op_flags &
                          HAMMER2_OPFLAG_DIRECTDATA) == 0);
                KKASSERT(parent->data != NULL);
                base = &parent->data->ipdata.u.blockset.blockref[0];
@@ -1880,6 +1861,7 @@ again:
        KKASSERT(parent->refs > 0);
        atomic_add_int(&parent->refs, 1);
 
+#if 0
        /*
         * Additional linkage for inodes.  Reuse the parent pointer to
         * find the parent directory.
@@ -1905,7 +1887,7 @@ again:
                        ccms_cst_init(&ip->topo_cst, &ip->chain);
                }
        }
-
+#endif
        /*
         * (allocated) indicates that this is a newly-created chain element
         * rather than a renamed chain element.  In this situation we want
@@ -2427,7 +2409,6 @@ hammer2_chain_delete(hammer2_mount_t *hmp, hammer2_chain_t *parent,
                     hammer2_chain_t *chain, int retain)
 {
        hammer2_blockref_t *base;
-       hammer2_inode_t *ip;
        int count;
 
        if (chain->parent != parent)
@@ -2505,20 +2486,7 @@ hammer2_chain_delete(hammer2_mount_t *hmp, hammer2_chain_t *parent,
         */
        if ((chain->flags & HAMMER2_CHAIN_DELETED) == 0 &&
            chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
-               ip = chain->u.ip;
-               if (ip->pip) {
-                       /* XXX SMP, pip chain not necessarily parent chain */
-                       ip->pip->delta_icount -= ip->ip_data.inode_count;
-                       ip->pip->delta_dcount -= ip->ip_data.data_count;
-                       ip->ip_data.inode_count += ip->delta_icount;
-                       ip->ip_data.data_count += ip->delta_dcount;
-                       ip->delta_icount = 0;
-                       ip->delta_dcount = 0;
-                       --ip->pip->delta_icount;
-                       spin_lock(&chain->cst.spin); /* XXX */
-                       ip->pip = NULL;
-                       spin_unlock(&chain->cst.spin);
-               }
+               KKASSERT(chain->u.ip == NULL);
        }
 
        /*
index 5490b2e..487355b 100644 (file)
@@ -255,6 +255,9 @@ hammer2_chain_flush(hammer2_mount_t *hmp, hammer2_chain_t *chain,
        hammer2_chain_unlock(hmp, parent);
 }
 
+/*
+ * chain is locked by the caller and remains locked on return.
+ */
 static void
 hammer2_chain_flush_pass1(hammer2_mount_t *hmp, hammer2_chain_t *chain,
                          hammer2_flush_info_t *info)
@@ -540,9 +543,12 @@ hammer2_chain_flush_pass1(hammer2_mount_t *hmp, hammer2_chain_t *chain,
                goto done;
        }
 
+#if 0
        /*
         * Synchronize cumulative data and inode count adjustments to
         * the inode and propagate the deltas upward to the parent.
+        *
+        * XXX removed atm
         */
        if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
                hammer2_inode_t *ip;
@@ -557,6 +563,7 @@ hammer2_chain_flush_pass1(hammer2_mount_t *hmp, hammer2_chain_t *chain,
                ip->delta_icount = 0;
                ip->delta_dcount = 0;
        }
+#endif
 
        /*
         * Flush if MODIFIED or MODIFIED_AUX is set.  MODIFIED_AUX is only
index 1b4fb89..31d802f 100644 (file)
@@ -48,7 +48,7 @@
 void
 hammer2_inode_ref(hammer2_inode_t *ip)
 {
-       hammer2_chain_ref(ip->hmp, &ip->chain);
+       atomic_add_int(&ip->refs, 1);
 }
 
 /*
@@ -58,7 +58,27 @@ hammer2_inode_ref(hammer2_inode_t *ip)
 void
 hammer2_inode_drop(hammer2_inode_t *ip)
 {
-       hammer2_chain_drop(ip->hmp, &ip->chain);
+       u_int refs;
+       hammer2_mount_t *hmp;
+
+       for (;;) {
+               refs = ip->refs;
+               cpu_ccfence();
+               if (refs == 1) {
+                       if (atomic_cmpset_int(&ip->refs, 1, 0)) {
+                               kprintf("hammer2_inode_drop: 1->0 %p\n", ip);
+                               KKASSERT(ip->topo_cst.count == 0);
+                               KKASSERT(ip->chain == NULL);
+                               hmp = ip->hmp;
+                               ip->hmp = NULL;
+                               kfree(ip, hmp->minode);
+                               break;
+                       }
+               } else {
+                       if (atomic_cmpset_int(&ip->refs, refs, refs - 1))
+                               break;
+               }
+       }
 }
 
 /*
@@ -73,13 +93,15 @@ hammer2_inode_drop(hammer2_inode_t *ip)
 struct vnode *
 hammer2_igetv(hammer2_inode_t *ip, int *errorp)
 {
-       struct vnode *vp;
+       hammer2_inode_data_t *ipdata;
        hammer2_pfsmount_t *pmp;
+       struct vnode *vp;
        ccms_state_t ostate;
 
        pmp = ip->pmp;
        KKASSERT(pmp != NULL);
        *errorp = 0;
+       ipdata = &ip->chain->data->ipdata;
 
        for (;;) {
                /*
@@ -92,19 +114,20 @@ hammer2_igetv(hammer2_inode_t *ip, int *errorp)
                if (vp) {
                        /*
                         * Inode must be unlocked during the vget() to avoid
-                        * possible deadlocks, vnode is held to prevent
-                        * destruction during the vget().  The vget() can
-                        * still fail if we lost a reclaim race on the vnode.
+                        * possible deadlocks, but leave the ip ref intact.
+                        *
+                        * vnode is held to prevent destruction during the
+                        * vget().  The vget() can still fail if we lost
+                        * a reclaim race on the vnode.
                         */
                        vhold_interlocked(vp);
-                       ccms_thread_unlock(&ip->chain.cst);
+                       ostate = hammer2_inode_lock_temp_release(ip);
                        if (vget(vp, LK_EXCLUSIVE)) {
                                vdrop(vp);
-                               ccms_thread_lock(&ip->chain.cst,
-                                                CCMS_STATE_EXCLUSIVE);
+                               hammer2_inode_lock_restore(ip, ostate);
                                continue;
                        }
-                       ccms_thread_lock(&ip->chain.cst, CCMS_STATE_EXCLUSIVE);
+                       hammer2_inode_lock_restore(ip, ostate);
                        vdrop(vp);
                        /* vp still locked and ref from vget */
                        if (ip->vp != vp) {
@@ -133,23 +156,23 @@ hammer2_igetv(hammer2_inode_t *ip, int *errorp)
                /*
                 * Lock the inode and check for an allocation race.
                 */
-               ostate = ccms_thread_lock_upgrade(&ip->chain.cst);
+               ostate = hammer2_inode_lock_upgrade(ip);
                if (ip->vp != NULL) {
                        vp->v_type = VBAD;
                        vx_put(vp);
-                       ccms_thread_lock_restore(&ip->chain.cst, ostate);
+                       hammer2_inode_lock_restore(ip, ostate);
                        continue;
                }
 
-               switch (ip->ip_data.type) {
+               switch (ipdata->type) {
                case HAMMER2_OBJTYPE_DIRECTORY:
                        vp->v_type = VDIR;
                        break;
                case HAMMER2_OBJTYPE_REGFILE:
                        vp->v_type = VREG;
-                       vinitvmio(vp, ip->ip_data.size,
+                       vinitvmio(vp, ipdata->size,
                                  HAMMER2_LBUFSIZE,
-                                 (int)ip->ip_data.size & HAMMER2_LBUFMASK);
+                                 (int)ipdata->size & HAMMER2_LBUFMASK);
                        break;
                case HAMMER2_OBJTYPE_SOFTLINK:
                        /*
@@ -158,14 +181,13 @@ hammer2_igetv(hammer2_inode_t *ip, int *errorp)
                         * association.
                         */
                        vp->v_type = VLNK;
-                       vinitvmio(vp, ip->ip_data.size,
+                       vinitvmio(vp, ipdata->size,
                                  HAMMER2_LBUFSIZE,
-                                 (int)ip->ip_data.size & HAMMER2_LBUFMASK);
+                                 (int)ipdata->size & HAMMER2_LBUFMASK);
                        break;
                /* XXX FIFO */
                default:
-                       panic("hammer2: unhandled objtype %d",
-                             ip->ip_data.type);
+                       panic("hammer2: unhandled objtype %d", ipdata->type);
                        break;
                }
 
@@ -174,8 +196,8 @@ hammer2_igetv(hammer2_inode_t *ip, int *errorp)
 
                vp->v_data = ip;
                ip->vp = vp;
-               hammer2_chain_ref(ip->hmp, &ip->chain); /* vp association */
-               ccms_thread_lock_restore(&ip->chain.cst, ostate);
+               hammer2_inode_ref(ip);          /* vp association */
+               hammer2_inode_lock_restore(ip, ostate);
                break;
        }
 
@@ -190,6 +212,89 @@ hammer2_igetv(hammer2_inode_t *ip, int *errorp)
 }
 
 /*
+ * Return an exclusively locked inode associated with the specified
+ * chain.  The chain must be a BREF_TYPE_INODE, and (dip) must properly
+ * specify the inode's position in the topology.
+ *
+ * The passed-in chain must be locked and the returned inode will also be
+ * locked.
+ *
+ * WARNING!  This routine sucks up the chain's lock (makes it part of the
+ *          inode lock), so callers need to be careful.
+ *
+ * WARNING!  The mount code is allowed to pass dip == NULL for iroot.
+ */
+hammer2_inode_t *
+hammer2_inode_get(hammer2_pfsmount_t *pmp, hammer2_inode_t *dip,
+                 hammer2_chain_t *chain)
+{
+       hammer2_mount_t *hmp = pmp->hmp;
+       hammer2_inode_t *nip;
+
+       KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_INODE);
+       if (chain->u.ip) {
+               nip = chain->u.ip;
+               KKASSERT(nip->pip == dip);
+               KKASSERT(nip->pmp == pmp);
+       } else {
+               nip = kmalloc(sizeof(*nip), hmp->minode, M_WAITOK | M_ZERO);
+               nip->chain = chain;
+               nip->pip = dip; /* can be NULL */
+               if (dip)
+                       hammer2_inode_ref(dip);
+               nip->pmp = pmp;
+               nip->hmp = hmp;
+               nip->refs = 1;
+               ccms_cst_init(&nip->topo_cst, &nip->chain);
+               hammer2_chain_ref(hmp, chain);
+               chain->u.ip = nip;
+       }
+       ccms_thread_lock(&nip->topo_cst, CCMS_STATE_EXCLUSIVE);
+       /* combination of thread lock and chain lock == inode lock */
+
+       return (nip);
+}
+
+/*
+ * Put away an inode, disconnecting it from its chain.  The inode must be
+ * exclusively locked.
+ *
+ * The inode will be unlocked by this function.  Note however that any related
+ * chain returned by the hammer2_inode_lock_*() call will NOT be unlocked
+ * by this function.
+ */
+void
+hammer2_inode_put(hammer2_inode_t *ip)
+{
+       hammer2_mount_t *hmp = ip->hmp;
+       hammer2_chain_t *chain = ip->chain;
+       hammer2_inode_t *pip;
+
+       KKASSERT(chain);
+       KKASSERT(chain->u.ip == ip);
+       KKASSERT(ip->topo_cst.count == -1);     /* one excl lock allowed */
+       chain->u.ip = NULL;
+       ip->chain = NULL;
+       hammer2_chain_drop(ip->hmp, chain);     /* ref */
+
+       /*
+        * Disconnect ip from pip & related parent ref.
+        *
+        * We have to unlock the chain manually because
+        * the ip->chain pointer has already been NULL'd out.
+        */
+       if ((pip = ip->pip) != NULL) {
+               ip->pip = NULL;
+               hammer2_inode_unlock_ex(ip);
+               hammer2_chain_unlock(hmp, chain);
+               hammer2_inode_drop(pip);
+       } else {
+               hammer2_inode_unlock_ex(ip);
+               hammer2_chain_unlock(hmp, chain);
+       }
+}
+
+/*
  * Create a new inode in the specified directory using the vattr to
  * figure out the type of inode.
  *
@@ -199,6 +304,8 @@ hammer2_igetv(hammer2_inode_t *ip, int *errorp)
  * If vap and/or cred are NULL the related fields are not set and the
  * inode type defaults to a directory.  This is used when creating PFSs
  * under the super-root, so the inode number is set to 1 in this case.
+ *
+ * dip is not locked on entry.
  */
 int
 hammer2_inode_create(hammer2_inode_t *dip,
@@ -206,14 +313,19 @@ hammer2_inode_create(hammer2_inode_t *dip,
                     const uint8_t *name, size_t name_len,
                     hammer2_inode_t **nipp)
 {
-       hammer2_mount_t *hmp = dip->hmp;
+       hammer2_inode_data_t *nipdata;
+       hammer2_mount_t *hmp;
        hammer2_chain_t *chain;
        hammer2_chain_t *parent;
        hammer2_inode_t *nip;
        hammer2_key_t lhc;
        int error;
        uid_t xuid;
+       uuid_t dip_uid;
+       uuid_t dip_gid;
+       uint32_t dip_mode;
 
+       hmp = dip->hmp;
        lhc = hammer2_dirhash(name, name_len);
 
        /*
@@ -221,8 +333,9 @@ hammer2_inode_create(hammer2_inode_t *dip,
         * entry in.  At the same time check for key collisions
         * and iterate until we don't get one.
         */
+       hammer2_inode_lock_ex(dip);
 retry:
-       parent = &dip->chain;
+       parent = dip->chain;
        hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
 
        error = 0;
@@ -246,6 +359,12 @@ retry:
        }
        hammer2_chain_unlock(hmp, parent);
 
+       dip_uid = dip->chain->data->ipdata.uid;
+       dip_gid = dip->chain->data->ipdata.gid;
+       dip_mode = dip->chain->data->ipdata.mode;
+
+       hammer2_inode_unlock_ex(dip);
+
        /*
         * Handle the error case
         */
@@ -260,32 +379,36 @@ retry:
        }
 
        /*
-        * Set up the new inode
+        * Set up the new inode.
+        *
+        * NOTE: *_get() integrates chain's lock into the inode lock.
         */
-       nip = chain->u.ip;
+       nip = hammer2_inode_get(dip->pmp, dip, chain);
+       kprintf("nip %p chain %p\n", nip, nip->chain);
        *nipp = nip;
+       nipdata = &nip->chain->data->ipdata;
 
        hammer2_voldata_lock(hmp);
        if (vap) {
-               nip->ip_data.type = hammer2_get_obj_type(vap->va_type);
-               nip->ip_data.inum = hmp->voldata.alloc_tid++;
+               nipdata->type = hammer2_get_obj_type(vap->va_type);
+               nipdata->inum = hmp->voldata.alloc_tid++;
                /* XXX modify/lock */
        } else {
-               nip->ip_data.type = HAMMER2_OBJTYPE_DIRECTORY;
-               nip->ip_data.inum = 1;
+               nipdata->type = HAMMER2_OBJTYPE_DIRECTORY;
+               nipdata->inum = 1;
        }
        hammer2_voldata_unlock(hmp);
-       nip->ip_data.version = HAMMER2_INODE_VERSION_ONE;
-       hammer2_update_time(&nip->ip_data.ctime);
-       nip->ip_data.mtime = nip->ip_data.ctime;
+       nipdata->version = HAMMER2_INODE_VERSION_ONE;
+       hammer2_update_time(&nipdata->ctime);
+       nipdata->mtime = nipdata->ctime;
        if (vap)
-               nip->ip_data.mode = vap->va_mode;
-       nip->ip_data.nlinks = 1;
+               nipdata->mode = vap->va_mode;
+       nipdata->nlinks = 1;
        if (vap) {
                if (dip) {
-                       xuid = hammer2_to_unix_xid(&dip->ip_data.uid);
+                       xuid = hammer2_to_unix_xid(&dip_uid);
                        xuid = vop_helper_create_uid(dip->pmp->mp,
-                                                    dip->ip_data.mode,
+                                                    dip_mode,
                                                     xuid,
                                                     cred,
                                                     &vap->va_mode);
@@ -293,18 +416,18 @@ retry:
                        xuid = 0;
                }
                if (vap->va_vaflags & VA_UID_UUID_VALID)
-                       nip->ip_data.uid = vap->va_uid_uuid;
+                       nipdata->uid = vap->va_uid_uuid;
                else if (vap->va_uid != (uid_t)VNOVAL)
-                       hammer2_guid_to_uuid(&nip->ip_data.uid, vap->va_uid);
+                       hammer2_guid_to_uuid(&nipdata->uid, vap->va_uid);
                else
-                       hammer2_guid_to_uuid(&nip->ip_data.uid, xuid);
+                       hammer2_guid_to_uuid(&nipdata->uid, xuid);
 
                if (vap->va_vaflags & VA_GID_UUID_VALID)
-                       nip->ip_data.gid = vap->va_gid_uuid;
+                       nipdata->gid = vap->va_gid_uuid;
                else if (vap->va_gid != (gid_t)VNOVAL)
-                       hammer2_guid_to_uuid(&nip->ip_data.gid, vap->va_gid);
+                       hammer2_guid_to_uuid(&nipdata->gid, vap->va_gid);
                else if (dip)
-                       nip->ip_data.gid = dip->ip_data.gid;
+                       nipdata->gid = dip_gid;
        }
 
        /*
@@ -312,15 +435,15 @@ retry:
         * directly embedded in the inode.  This flag will be cleared if
         * the size is extended past the embedded limit.
         */
-       if (nip->ip_data.type == HAMMER2_OBJTYPE_REGFILE ||
-           nip->ip_data.type == HAMMER2_OBJTYPE_SOFTLINK) {
-               nip->ip_data.op_flags |= HAMMER2_OPFLAG_DIRECTDATA;
+       if (nipdata->type == HAMMER2_OBJTYPE_REGFILE ||
+           nipdata->type == HAMMER2_OBJTYPE_SOFTLINK) {
+               nipdata->op_flags |= HAMMER2_OPFLAG_DIRECTDATA;
        }
 
        KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
-       bcopy(name, nip->ip_data.filename, name_len);
-       nip->ip_data.name_key = lhc;
-       nip->ip_data.name_len = name_len;
+       bcopy(name, nipdata->filename, name_len);
+       nipdata->name_key = lhc;
+       nipdata->name_len = name_len;
 
        return (0);
 }
@@ -338,17 +461,21 @@ hammer2_inode_duplicate(hammer2_inode_t *dip, hammer2_inode_t *oip,
                        hammer2_inode_t **nipp,
                        const uint8_t *name, size_t name_len)
 {
-       hammer2_mount_t *hmp = dip->hmp;
+       hammer2_inode_data_t *nipdata;
+       hammer2_mount_t *hmp;
        hammer2_inode_t *nip;
        hammer2_chain_t *parent;
        hammer2_chain_t *chain;
        hammer2_key_t lhc;
        int error;
 
+       hmp = dip->hmp;
        if (name) {
                lhc = hammer2_dirhash(name, name_len);
        } else {
-               lhc = oip->ip_data.inum;
+               hammer2_inode_lock_ex(oip);
+               lhc = oip->chain->data->ipdata.inum;
+               hammer2_inode_unlock_ex(oip);
                KKASSERT((lhc & HAMMER2_DIRHASH_VISIBLE) == 0);
        }
 
@@ -358,8 +485,9 @@ hammer2_inode_duplicate(hammer2_inode_t *dip, hammer2_inode_t *oip,
         * and iterate until we don't get one.
         */
        nip = NULL;
+       hammer2_inode_lock_ex(dip);
 retry:
-       parent = &dip->chain;
+       parent = dip->chain;
        hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
 
        error = 0;
@@ -387,6 +515,7 @@ retry:
                                             &error);
        }
        hammer2_chain_unlock(hmp, parent);
+       hammer2_inode_unlock_ex(dip);
 
        /*
         * Handle the error case
@@ -420,13 +549,20 @@ retry:
         * XXX might be 0-ref chains left.
         */
        hammer2_inode_lock_ex(oip);
-       hammer2_chain_flush(hmp, &oip->chain, 0);
-       hammer2_inode_unlock_ex(oip);
+       hammer2_chain_flush(hmp, oip->chain, 0);
        /*KKASSERT(RB_EMPTY(&oip->chain.rbhead));*/
 
-       nip = chain->u.ip;
+       /*
+        * nip is a duplicate of oip.  Meta-data will be synchronized to
+        * media when nip is flushed.
+        *
+        * NOTE: *_get() integrates chain's lock into the inode lock.
+        */
+       nip = hammer2_inode_get(dip->pmp, dip, chain);
        hammer2_chain_modify(hmp, chain, 0);
-       nip->ip_data = oip->ip_data;    /* sync media data after flush */
+       nipdata = &chain->data->ipdata;
+       *nipdata = oip->chain->data->ipdata;
+       hammer2_inode_unlock_ex(oip);
 
        if (name) {
                /*
@@ -434,19 +570,19 @@ retry:
                 * we have to update the inode.
                 */
                KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
-               bcopy(name, nip->ip_data.filename, name_len);
-               nip->ip_data.name_key = lhc;
-               nip->ip_data.name_len = name_len;
+               bcopy(name, nipdata->filename, name_len);
+               nipdata->name_key = lhc;
+               nipdata->name_len = name_len;
        } else {
                /*
                 * Directory entries are inodes but this is a hidden hardlink
                 * target.  The name isn't used but to ease debugging give it
                 * a name after its inode number.
                 */
-               ksnprintf(nip->ip_data.filename, sizeof(nip->ip_data.filename),
-                         "0x%016jx", (intmax_t)nip->ip_data.inum);
-               nip->ip_data.name_len = strlen(nip->ip_data.filename);
-               nip->ip_data.name_key = lhc;
+               ksnprintf(nipdata->filename, sizeof(nipdata->filename),
+                         "0x%016jx", (intmax_t)nipdata->inum);
+               nipdata->name_len = strlen(nipdata->filename);
+               nipdata->name_key = lhc;
        }
        *nipp = nip;
 
@@ -469,7 +605,8 @@ int
 hammer2_inode_connect(hammer2_inode_t *dip, hammer2_inode_t *oip,
                      const uint8_t *name, size_t name_len)
 {
-       hammer2_mount_t *hmp = dip->hmp;
+       hammer2_inode_data_t *nipdata;
+       hammer2_mount_t *hmp;
        hammer2_chain_t *chain;
        hammer2_chain_t *parent;
        hammer2_inode_t *nip;
@@ -487,24 +624,26 @@ hammer2_inode_connect(hammer2_inode_t *dip, hammer2_inode_t *oip,
         * For now the caller deals with this for us by locking dip in
         * that case (and our lock here winds up just being recursive)
         */
+       hmp = dip->hmp;
 retry:
-       parent = &dip->chain;
        if (oip->pip == dip) {
-               hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
-               hammer2_chain_lock(hmp, &oip->chain, HAMMER2_RESOLVE_ALWAYS);
+               hammer2_inode_lock_ex(dip);
+               hammer2_inode_lock_ex(oip);
        } else {
-               hammer2_chain_lock(hmp, &oip->chain, HAMMER2_RESOLVE_ALWAYS);
-               hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
+               hammer2_inode_lock_ex(oip);
+               hammer2_inode_lock_ex(dip);
        }
+       parent = dip->chain;
+       hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
 
        lhc = hammer2_dirhash(name, name_len);
-       hlink = (oip->chain.parent != NULL);
+       hlink = (oip->chain->parent != NULL);
 
        /*
         * In fake mode flush oip so we can just snapshot it downbelow.
         */
        if (hlink && hammer2_hardlink_enable < 0)
-               hammer2_chain_flush(hmp, &oip->chain, 0);
+               hammer2_chain_flush(hmp, oip->chain, 0);
 
        /*
         * Locate the inode or indirect block to create the new
@@ -537,15 +676,23 @@ retry:
                                                     &error);
                } else {
                        chain = hammer2_chain_create(hmp, parent,
-                                                    &oip->chain, lhc, 0,
+                                                    oip->chain, lhc, 0,
                                                     HAMMER2_BREF_TYPE_INODE,
                                                     HAMMER2_INODE_BYTES,
                                                     &error);
                        if (chain)
-                               KKASSERT(chain == &oip->chain);
+                               KKASSERT(chain == oip->chain);
                }
        }
+
+       /*
+        * Unlock stuff.  This is a bit messy, if we have an EAGAIN error
+        * we need to wait for operations on parent to finish.
+        */
+       if (error == EAGAIN)
+               hammer2_chain_ref(hmp, parent);
        hammer2_chain_unlock(hmp, parent);
+       hammer2_inode_unlock_ex(dip);
 
        /*
         * Handle the error case
@@ -554,10 +701,11 @@ retry:
                KKASSERT(chain == NULL);
                if (error == EAGAIN) {
                        hammer2_chain_wait(hmp, parent);
-                       hammer2_chain_unlock(hmp, &oip->chain);
+                       hammer2_chain_drop(hmp, parent);
+                       hammer2_inode_unlock_ex(oip);
                        goto retry;
                }
-               hammer2_chain_unlock(hmp, &oip->chain);
+               hammer2_inode_unlock_ex(oip);
                return (error);
        }
 
@@ -573,34 +721,40 @@ retry:
                /*
                 * Create the HARDLINK pointer.  oip represents the hardlink
                 * target in this situation.
+                *
+                * NOTE: *_get() integrates chain's lock into the inode lock.
                 */
-               nip = chain->u.ip;
+               KKASSERT(chain->u.ip == NULL);
+               nip = hammer2_inode_get(dip->pmp, dip, chain);
                hammer2_chain_modify(hmp, chain, 0);
                KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
-               bcopy(name, nip->ip_data.filename, name_len);
-               nip->ip_data.name_key = lhc;
-               nip->ip_data.name_len = name_len;
-               nip->ip_data.target_type = oip->ip_data.type;
-               nip->ip_data.type = HAMMER2_OBJTYPE_HARDLINK;
-               nip->ip_data.inum = oip->ip_data.inum;
-               nip->ip_data.nlinks = 1;
+               nipdata = &nip->chain->data->ipdata;
+               bcopy(name, nipdata->filename, name_len);
+               nipdata->name_key = lhc;
+               nipdata->name_len = name_len;
+               nipdata->target_type = oip->chain->data->ipdata.type;
+               nipdata->type = HAMMER2_OBJTYPE_HARDLINK;
+               nipdata->inum = oip->chain->data->ipdata.inum;
+               nipdata->nlinks = 1;
                kprintf("created hardlink %*.*s\n",
                        (int)name_len, (int)name_len, name);
-               hammer2_chain_unlock(hmp, chain);
        } else if (hlink && hammer2_hardlink_enable < 0) {
                /*
                 * Create a snapshot (hardlink fake mode for debugging).
+                *
+                * NOTE: *_get() integrates chain's lock into the inode lock.
                 */
-               nip = chain->u.ip;
-               nip->ip_data = oip->ip_data;
+               KKASSERT(chain->u.ip == NULL);
+               nip = hammer2_inode_get(dip->pmp, dip, chain);
                hammer2_chain_modify(hmp, chain, 0);
                KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
-               bcopy(name, nip->ip_data.filename, name_len);
-               nip->ip_data.name_key = lhc;
-               nip->ip_data.name_len = name_len;
+               nipdata = &nip->chain->data->ipdata;
+               *nipdata = oip->chain->data->ipdata;
+               bcopy(name, nipdata->filename, name_len);
+               nipdata->name_key = lhc;
+               nipdata->name_len = name_len;
                kprintf("created fake hardlink %*.*s\n",
                        (int)name_len, (int)name_len, name);
-               hammer2_chain_unlock(hmp, chain);
        } else {
                /*
                 * Normally disconnected inode (e.g. during a rename) that
@@ -610,17 +764,20 @@ retry:
                 * We are using oip as chain, already locked by caller,
                 * do not unlock it.
                 */
+               KKASSERT(chain->u.ip != NULL);
                hammer2_chain_modify(hmp, chain, 0);
-               if (oip->ip_data.name_len != name_len ||
-                   bcmp(oip->ip_data.filename, name, name_len) != 0) {
+               nipdata = &oip->chain->data->ipdata;
+
+               if (nipdata->name_len != name_len ||
+                   bcmp(nipdata->filename, name, name_len) != 0) {
                        KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
-                       bcopy(name, oip->ip_data.filename, name_len);
-                       oip->ip_data.name_key = lhc;
-                       oip->ip_data.name_len = name_len;
+                       bcopy(name, nipdata->filename, name_len);
+                       nipdata->name_key = lhc;
+                       nipdata->name_len = name_len;
                }
-               oip->ip_data.nlinks = 1;
+               nipdata->nlinks = 1;
        }
-       hammer2_chain_unlock(hmp, &oip->chain);
+       hammer2_inode_unlock_ex(oip);
        return (0);
 }
 
@@ -639,8 +796,10 @@ hammer2_unlink_file(hammer2_inode_t *dip,
                    const uint8_t *name, size_t name_len,
                    int isdir, hammer2_inode_t *retain_ip)
 {
+       hammer2_inode_data_t *ipdata;
        hammer2_mount_t *hmp;
        hammer2_chain_t *parent;
+       hammer2_chain_t *ochain;
        hammer2_chain_t *chain;
        hammer2_chain_t *dparent;
        hammer2_chain_t *dchain;
@@ -651,21 +810,22 @@ hammer2_unlink_file(hammer2_inode_t *dip,
        uint8_t type;
 
        error = 0;
-       oip = NULL;
+       ip = NULL;
+       ochain = NULL;
        hmp = dip->hmp;
        lhc = hammer2_dirhash(name, name_len);
 
        /*
         * Search for the filename in the directory
         */
-       parent = &dip->chain;
+       hammer2_inode_lock_ex(dip);
+       parent = dip->chain;
        hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
        chain = hammer2_chain_lookup(hmp, &parent,
                                     lhc, lhc + HAMMER2_DIRHASH_LOMASK,
                                     0);
        while (chain) {
                if (chain->bref.type == HAMMER2_BREF_TYPE_INODE &&
-                   chain->u.ip &&
                    name_len == chain->data->ipdata.name_len &&
                    bcmp(name, chain->data->ipdata.filename, name_len) == 0) {
                        break;
@@ -674,6 +834,7 @@ hammer2_unlink_file(hammer2_inode_t *dip,
                                           lhc, lhc + HAMMER2_DIRHASH_LOMASK,
                                           0);
        }
+       hammer2_inode_unlock_ex(dip);
 
        /*
         * Not found or wrong type (isdir < 0 disables the type check).
@@ -697,11 +858,15 @@ hammer2_unlink_file(hammer2_inode_t *dip,
        /*
         * Hardlink must be resolved.  We can't hold parent locked while we
         * do this or we could deadlock.
+        *
+        * On success chain will be adjusted to point at the hardlink target
+        * and ochain will point to the hardlink pointer in the original
+        * directory.  Otherwise chain remains pointing to the original.
         */
        if (chain->data->ipdata.type == HAMMER2_OBJTYPE_HARDLINK) {
                hammer2_chain_unlock(hmp, parent);
                parent = NULL;
-               error = hammer2_hardlink_find(dip, &chain, &oip);
+               error = hammer2_hardlink_find(dip, &chain, &ochain);
        }
 
        /*
@@ -740,32 +905,50 @@ hammer2_unlink_file(hammer2_inode_t *dip,
         * can still be flushed to the media (e.g. if an open descriptor
         * remains).  When the last vnode/ip ref goes away the chain will
         * be marked unmodified, avoiding any further (now unnecesary) I/O.
+        *
+        * A non-NULL ochain indicates a hardlink.
         */
-       if (oip) {
+       if (ochain) {
                /*
-                * If this was a hardlink we first delete the hardlink
-                * pointer entry.  parent is NULL on entry due to the oip
-                * path.
+                * Delete the original hardlink pointer.
                 */
-               parent = oip->chain.parent;
-               hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
-               hammer2_chain_lock(hmp, &oip->chain, HAMMER2_RESOLVE_ALWAYS);
-               if (oip == retain_ip && oip->chain.flushing) {
-                       hammer2_chain_unlock(hmp, &oip->chain);
+               hammer2_chain_lock(hmp, ochain, HAMMER2_RESOLVE_ALWAYS);
+               for (;;) {
+                       parent = ochain->parent;
+                       hammer2_chain_ref(hmp, parent);
+                       hammer2_chain_unlock(hmp, ochain);
+                       hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
+                       hammer2_chain_lock(hmp, ochain, HAMMER2_RESOLVE_ALWAYS);
+                       if (ochain->parent == parent)
+                               break;
+                       hammer2_chain_unlock(hmp, parent);
+                       hammer2_chain_drop(hmp, parent);
+               }
+
+               if (ochain == retain_ip->chain && ochain->flushing) {
+                       hammer2_chain_unlock(hmp, ochain);
                        error = EAGAIN;
                        goto done;
                }
-               hammer2_chain_delete(hmp, parent, &oip->chain,
-                                    (retain_ip == oip));
-               hammer2_chain_unlock(hmp, &oip->chain);
+               if ((oip = ochain->u.ip) != NULL) {
+                       hammer2_inode_lock_ex(oip);
+                       oip->flags |= HAMMER2_INODE_DELETED;
+                       if (oip->vp || oip->refs > 1)
+                               hammer2_inode_unlock_ex(oip);
+                       else
+                               hammer2_inode_put(oip);
+               }
+               hammer2_chain_delete(hmp, parent, ochain,
+                                    (ochain == retain_ip->chain));
                hammer2_chain_unlock(hmp, parent);
+               hammer2_chain_drop(hmp, parent);
                parent = NULL;
 
                /*
-                * Then decrement nlinks on hardlink target.
+                * Then decrement nlinks on hardlink target, deleting
+                * the target when nlinks drops to 0.
                 */
-               ip = chain->u.ip;
-               if (ip->ip_data.nlinks == 1) {
+               if (chain->data->ipdata.nlinks == 1) {
                        dparent = chain->parent;
                        hammer2_chain_ref(hmp, chain);
                        hammer2_chain_unlock(hmp, chain);
@@ -775,27 +958,46 @@ hammer2_unlink_file(hammer2_inode_t *dip,
                                           HAMMER2_RESOLVE_ALWAYS);
                        hammer2_chain_drop(hmp, chain);
                        hammer2_chain_modify(hmp, chain, 0);
-                       --ip->ip_data.nlinks;
+                       --chain->data->ipdata.nlinks;
+                       if ((ip = chain->u.ip) != NULL) {
+                               hammer2_inode_lock_ex(ip);
+                               ip->flags |= HAMMER2_INODE_DELETED;
+                               if (ip->vp)
+                                       hammer2_inode_unlock_ex(ip);
+                               else
+                                       hammer2_inode_put(ip);
+                       }
                        hammer2_chain_delete(hmp, dparent, chain, 0);
                        hammer2_chain_unlock(hmp, dparent);
                } else {
                        hammer2_chain_modify(hmp, chain, 0);
-                       --ip->ip_data.nlinks;
+                       --chain->data->ipdata.nlinks;
                }
        } else {
                /*
                 * Otherwise this was not a hardlink and we can just
                 * remove the entry and decrement nlinks.
+                *
+                * NOTE: *_get() integrates chain's lock into the inode lock.
                 */
-               ip = chain->u.ip;
+               ip = hammer2_inode_get(dip->pmp, dip, chain);
+               ipdata = &ip->chain->data->ipdata;
                if (ip == retain_ip && chain->flushing) {
+                       hammer2_inode_unlock_ex(ip);
+                       chain = NULL;   /* inode_unlock eats chain */
                        error = EAGAIN;
                        goto done;
                }
                hammer2_chain_modify(hmp, chain, 0);
-               --ip->ip_data.nlinks;
+               --ipdata->nlinks;
+               ip->flags |= HAMMER2_INODE_DELETED;
                hammer2_chain_delete(hmp, parent, chain,
                                     (retain_ip == ip));
+               if (ip->vp)
+                       hammer2_inode_unlock_ex(ip);
+               else
+                       hammer2_inode_put(ip);
+               chain = NULL;   /* inode_unlock eats chain */
        }
 
        error = 0;
@@ -803,10 +1005,12 @@ hammer2_unlink_file(hammer2_inode_t *dip,
 done:
        if (chain)
                hammer2_chain_unlock(hmp, chain);
-       if (parent)
+       if (parent) {
                hammer2_chain_unlock(hmp, parent);
-       if (oip)
-               hammer2_chain_drop(oip->hmp, &oip->chain);
+               hammer2_chain_drop(hmp, parent);
+       }
+       if (ochain)
+               hammer2_chain_drop(hmp, ochain);
 
        return error;
 }
@@ -827,18 +1031,6 @@ hammer2_inode_calc_alloc(hammer2_key_t filesize)
        return (radix);
 }
 
-void
-hammer2_inode_lock_nlinks(hammer2_inode_t *ip)
-{
-       hammer2_chain_ref(ip->hmp, &ip->chain);
-}
-
-void
-hammer2_inode_unlock_nlinks(hammer2_inode_t *ip)
-{
-       hammer2_chain_drop(ip->hmp, &ip->chain);
-}
-
 /*
  * Consolidate for hard link creation.  This moves the specified terminal
  * hardlink inode to a directory common to its current directory and tdip
@@ -848,23 +1040,27 @@ hammer2_inode_unlock_nlinks(hammer2_inode_t *ip)
  * If the original inode chain element was a prior incarnation of a hidden
  * inode it can simply be deleted instead of converted.
  *
- * (*ipp)'s nlinks field is locked on entry and the new (*ipp)'s nlinks
- * field will be locked on return (with the original's unlocked).
+ * (*ipp) must be referenced on entry and the new (*ipp) will be referenced
+ * on return (with the original dropped).  (*ipp) must not be locked.
  *
  * The link count is bumped if requested.
  */
 int
 hammer2_hardlink_consolidate(hammer2_inode_t **ipp, hammer2_inode_t *tdip)
 {
+       hammer2_inode_data_t *oipdata;
        hammer2_mount_t *hmp;
-       hammer2_inode_t *oip = *ipp;
-       hammer2_inode_t *nip = NULL;
+       hammer2_inode_t *oip;
+       hammer2_inode_t *nip;
        hammer2_inode_t *fdip;
        hammer2_inode_t *cdip;
+       hammer2_chain_t *ochain;
        hammer2_chain_t *parent;
        int error;
 
        hmp = tdip->hmp;
+       oip = *ipp;
+       nip = NULL;
 
        if (hammer2_hardlink_enable < 0)
                return (0);
@@ -878,16 +1074,17 @@ hammer2_hardlink_consolidate(hammer2_inode_t **ipp, hammer2_inode_t *tdip)
         * Nothing to do (except bump the link count) if the hardlink has
         * already been consolidated in the correct place.
         */
-       if (cdip == fdip &&
-           (oip->ip_data.name_key & HAMMER2_DIRHASH_VISIBLE) == 0) {
-               kprintf("hardlink already consolidated correctly\n");
-               nip = oip;
-               hammer2_inode_lock_ex(nip);
-               hammer2_chain_modify(hmp, &nip->chain, 0);
-               ++nip->ip_data.nlinks;
-               hammer2_inode_unlock_ex(nip);
-               hammer2_inode_drop(cdip);
-               return (0);
+       if (cdip == fdip) {
+               hammer2_inode_lock_ex(oip);
+               if ((oip->chain->data->ipdata.name_key &
+                    HAMMER2_DIRHASH_VISIBLE) == 0) {
+                       hammer2_chain_modify(hmp, oip->chain, 0);
+                       ++oip->chain->data->ipdata.nlinks;
+                       hammer2_inode_unlock_ex(oip);
+                       hammer2_inode_drop(cdip);
+                       return(0);
+               }
+               hammer2_inode_unlock_ex(oip);
        }
 
        /*
@@ -903,51 +1100,49 @@ hammer2_hardlink_consolidate(hammer2_inode_t **ipp, hammer2_inode_t *tdip)
                /*
                 * Bump nlinks on duplicated hidden inode.
                 */
-               kprintf("hardlink consolidation success in parent dir %s\n",
-                       cdip->ip_data.filename);
-               hammer2_inode_lock_nlinks(nip);
-               hammer2_inode_unlock_nlinks(oip);
-               hammer2_chain_modify(hmp, &nip->chain, 0);
-               ++nip->ip_data.nlinks;
+               hammer2_inode_ref(nip);                 /* ref new *ipp */
+               hammer2_chain_modify(hmp, nip->chain, 0);
+               ++nip->chain->data->ipdata.nlinks;
                hammer2_inode_unlock_ex(nip);
+               hammer2_inode_lock_ex(oip);
+               hammer2_inode_drop(oip);                /* unref old *ipp */
 
-               if (oip->ip_data.name_key & HAMMER2_DIRHASH_VISIBLE) {
+               if (oip->chain->data->ipdata.name_key &
+                   HAMMER2_DIRHASH_VISIBLE) {
                        /*
                         * Replace the old inode with an OBJTYPE_HARDLINK
                         * pointer.
                         */
-                       hammer2_inode_lock_ex(oip);
-                       hammer2_chain_modify(hmp, &oip->chain, 0);
-                       oip->ip_data.target_type = oip->ip_data.type;
-                       oip->ip_data.type = HAMMER2_OBJTYPE_HARDLINK;
-                       oip->ip_data.uflags = 0;
-                       oip->ip_data.rmajor = 0;
-                       oip->ip_data.rminor = 0;
-                       oip->ip_data.ctime = 0;
-                       oip->ip_data.mtime = 0;
-                       oip->ip_data.atime = 0;
-                       oip->ip_data.btime = 0;
-                       bzero(&oip->ip_data.uid, sizeof(oip->ip_data.uid));
-                       bzero(&oip->ip_data.gid, sizeof(oip->ip_data.gid));
-                       oip->ip_data.op_flags = HAMMER2_OPFLAG_DIRECTDATA;
-                       oip->ip_data.cap_flags = 0;
-                       oip->ip_data.mode = 0;
-                       oip->ip_data.size = 0;
-                       oip->ip_data.nlinks = 1;
-                       oip->ip_data.iparent = 0;       /* XXX */
-                       oip->ip_data.pfs_type = 0;
-                       oip->ip_data.pfs_inum = 0;
-                       bzero(&oip->ip_data.pfs_clid,
-                             sizeof(oip->ip_data.pfs_clid));
-                       bzero(&oip->ip_data.pfs_fsid,
-                             sizeof(oip->ip_data.pfs_fsid));
-                       oip->ip_data.data_quota = 0;
-                       oip->ip_data.data_count = 0;
-                       oip->ip_data.inode_quota = 0;
-                       oip->ip_data.inode_count = 0;
-                       oip->ip_data.attr_tid = 0;
-                       oip->ip_data.dirent_tid = 0;
-                       bzero(&oip->ip_data.u, sizeof(oip->ip_data.u));
+                       hammer2_chain_modify(hmp, oip->chain, 0);
+                       oipdata = &oip->chain->data->ipdata;
+                       oipdata->target_type = oipdata->type;
+                       oipdata->type = HAMMER2_OBJTYPE_HARDLINK;
+                       oipdata->uflags = 0;
+                       oipdata->rmajor = 0;
+                       oipdata->rminor = 0;
+                       oipdata->ctime = 0;
+                       oipdata->mtime = 0;
+                       oipdata->atime = 0;
+                       oipdata->btime = 0;
+                       bzero(&oipdata->uid, sizeof(oipdata->uid));
+                       bzero(&oipdata->gid, sizeof(oipdata->gid));
+                       oipdata->op_flags = HAMMER2_OPFLAG_DIRECTDATA;
+                       oipdata->cap_flags = 0;
+                       oipdata->mode = 0;
+                       oipdata->size = 0;
+                       oipdata->nlinks = 1;
+                       oipdata->iparent = 0;   /* XXX */
+                       oipdata->pfs_type = 0;
+                       oipdata->pfs_inum = 0;
+                       bzero(&oipdata->pfs_clid, sizeof(oipdata->pfs_clid));
+                       bzero(&oipdata->pfs_fsid, sizeof(oipdata->pfs_fsid));
+                       oipdata->data_quota = 0;
+                       oipdata->data_count = 0;
+                       oipdata->inode_quota = 0;
+                       oipdata->inode_count = 0;
+                       oipdata->attr_tid = 0;
+                       oipdata->dirent_tid = 0;
+                       bzero(&oipdata->u, sizeof(oipdata->u));
                        /* XXX transaction ids */
 
                        hammer2_inode_unlock_ex(oip);
@@ -959,14 +1154,24 @@ hammer2_hardlink_consolidate(hammer2_inode_t **ipp, hammer2_inode_t *tdip)
                         * becomes the only hardlink target for this inode.
                         */
                        kprintf("DELETE INVISIBLE\n");
-                       parent = oip->chain.parent;
-                       hammer2_chain_lock(hmp, parent,
-                                          HAMMER2_RESOLVE_ALWAYS);
-                       hammer2_chain_lock(hmp, &oip->chain,
-                                          HAMMER2_RESOLVE_ALWAYS);
-                       hammer2_chain_delete(hmp, parent, &oip->chain, 0);
-                       hammer2_chain_unlock(hmp, &oip->chain);
+                       for (;;) {
+                               parent = oip->chain->parent;
+                               hammer2_chain_ref(hmp, parent);
+                               hammer2_inode_unlock_ex(oip);
+                               hammer2_chain_lock(hmp, parent,
+                                                  HAMMER2_RESOLVE_ALWAYS);
+                               hammer2_inode_lock_ex(oip);
+                               if (oip->chain->parent == parent)
+                                       break;
+                               hammer2_chain_unlock(hmp, parent);
+                               hammer2_chain_drop(hmp, parent);
+                       }
+                       oip->flags |= HAMMER2_INODE_DELETED;
+                       ochain = oip->chain;
+                       hammer2_chain_delete(hmp, parent, ochain, 0);
+                       hammer2_inode_put(oip);
                        hammer2_chain_unlock(hmp, parent);
+                       hammer2_chain_drop(hmp, parent);
                }
                *ipp = nip;
        } else {
@@ -978,57 +1183,84 @@ hammer2_hardlink_consolidate(hammer2_inode_t **ipp, hammer2_inode_t *tdip)
 }
 
 /*
- * If (*ipp) is non-NULL it points to the forward OBJTYPE_HARDLINK inode while
- * (*chainp) points to the resolved (hidden hardlink target) inode.  In this
- * situation when nlinks is 1 we wish to deconsolidate the hardlink, moving
- * it back to the directory that now represents the only remaining link.
+ * If (*ochainp) is non-NULL it points to the forward OBJTYPE_HARDLINK
+ * inode while (*chainp) points to the resolved (hidden hardlink
+ * target) inode.  In this situation when nlinks is 1 we wish to
+ * deconsolidate the hardlink, moving it back to the directory that now
+ * represents the only remaining link.
  */
 int
-hammer2_hardlink_deconsolidate(hammer2_inode_t *dip, hammer2_chain_t **chainp,
-                              hammer2_inode_t **ipp)
+hammer2_hardlink_deconsolidate(hammer2_inode_t *dip,
+                              hammer2_chain_t **chainp,
+                              hammer2_chain_t **ochainp)
 {
-       if (*ipp == NULL)
+       if (*ochainp == NULL)
                return (0);
        /* XXX */
        return (0);
 }
 
 /*
- * When presented with a (*chainp) representing an inode of type
- * OBJTYPE_HARDLINK this code will save the original inode (with a ref)
- * in (*ipp), and then locate the hidden hardlink target in (dip) or
- * any parent directory above (dip).  The locked (*chainp) is replaced
- * with a new locked (*chainp) representing the hardlink target.
+ * The caller presents a locked *chainp pointing to a HAMMER2_BREF_TYPE_INODE
+ * with an obj_type of HAMMER2_OBJTYPE_HARDLINK.  This routine will gobble
+ * the *chainp and return a new locked *chainp representing the file target
+ * (the original *chainp will be unlocked).
+ *
+ * When a match is found the chain representing the original HARDLINK
+ * will be returned in *ochainp with a ref, but not locked.
+ *
+ * When no match is found *chainp is set to NULL and EIO is returned.
+ * (*ochainp) will still be set to the original chain with a ref but not
+ * locked.
  */
 int
 hammer2_hardlink_find(hammer2_inode_t *dip, hammer2_chain_t **chainp,
-                     hammer2_inode_t **ipp)
+                     hammer2_chain_t **ochainp)
 {
        hammer2_mount_t *hmp = dip->hmp;
        hammer2_chain_t *chain = *chainp;
        hammer2_chain_t *parent;
+       hammer2_inode_t *ip;
        hammer2_inode_t *pip;
        hammer2_key_t lhc;
 
-       *ipp = chain->u.ip;
-       hammer2_inode_ref(chain->u.ip);
-       lhc = chain->u.ip->ip_data.inum;
+       pip = dip;
+       hammer2_inode_ref(pip);         /* for loop */
+       hammer2_chain_ref(hmp, chain);  /* for (*ochainp) */
 
-       hammer2_inode_unlock_ex(chain->u.ip);
-       pip = chain->u.ip->pip;
+       *ochainp = chain;
 
+       /*
+        * Locate the hardlink.  pip is referenced and not locked,
+        * ipp.
+        *
+        * chain is reused.
+        */
+       lhc = chain->data->ipdata.inum;
+       hammer2_chain_unlock(hmp, chain);
        chain = NULL;
-       while (pip) {
-               parent = &pip->chain;
-               KKASSERT(parent->bref.type == HAMMER2_BREF_TYPE_INODE);
 
+       while ((ip = pip) != NULL) {
+               hammer2_inode_lock_ex(ip);
+               hammer2_inode_drop(ip);
+               parent = ip->chain;
+               KKASSERT(parent->bref.type == HAMMER2_BREF_TYPE_INODE);
                hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
                chain = hammer2_chain_lookup(hmp, &parent, lhc, lhc, 0);
                hammer2_chain_unlock(hmp, parent);
                if (chain)
                        break;
-               pip = pip->pip; /* XXX SMP RACE */
+               pip = ip->pip;          /* safe, ip held locked */
+               if (pip)
+                       hammer2_inode_ref(pip);
+               hammer2_inode_unlock_ex(ip);
        }
+
+       /*
+        * chain is locked, ip is locked.  Unlock ip, return the locked
+        * chain.  *ipp is already set w/a ref count and not locked.
+        */
+       hammer2_inode_unlock_ex(ip);
        *chainp = chain;
        if (chain) {
                KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_INODE);
index 56aaab2..b34e086 100644 (file)
@@ -319,13 +319,16 @@ hammer2_ioctl_socket_set(hammer2_inode_t *ip, void *data)
 static int
 hammer2_ioctl_pfs_get(hammer2_inode_t *ip, void *data)
 {
-       hammer2_mount_t *hmp = ip->hmp;
-       hammer2_ioc_pfs_t *pfs = data;
+       hammer2_inode_data_t *ipdata;
+       hammer2_mount_t *hmp;
+       hammer2_ioc_pfs_t *pfs;
        hammer2_chain_t *parent;
        hammer2_chain_t *chain;
-       hammer2_inode_t *xip;
-       int error = 0;
+       int error;
 
+       error = 0;
+       hmp = ip->hmp;
+       pfs = data;
        parent = hmp->schain;
        error = hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
        if (error)
@@ -350,15 +353,15 @@ hammer2_ioctl_pfs_get(hammer2_inode_t *ip, void *data)
                /*
                 * Load the data being returned by the ioctl.
                 */
-               xip = chain->u.ip;
-               pfs->name_key = xip->ip_data.name_key;
-               pfs->pfs_type = xip->ip_data.pfs_type;
-               pfs->pfs_clid = xip->ip_data.pfs_clid;
-               pfs->pfs_fsid = xip->ip_data.pfs_fsid;
-               KKASSERT(xip->ip_data.name_len < sizeof(pfs->name));
-               bcopy(xip->ip_data.filename, pfs->name,
-                     xip->ip_data.name_len);
-               pfs->name[xip->ip_data.name_len] = 0;
+               ipdata = &chain->data->ipdata;
+               pfs->name_key = ipdata->name_key;
+               pfs->pfs_type = ipdata->pfs_type;
+               pfs->pfs_clid = ipdata->pfs_clid;
+               pfs->pfs_fsid = ipdata->pfs_fsid;
+               KKASSERT(ipdata->name_len < sizeof(pfs->name));
+               bcopy(ipdata->filename, pfs->name, ipdata->name_len);
+               pfs->name[ipdata->name_len] = 0;
+               ipdata = NULL;  /* safety */
 
                /*
                 * Calculate the next field
@@ -368,7 +371,7 @@ hammer2_ioctl_pfs_get(hammer2_inode_t *ip, void *data)
                                             0, (hammer2_key_t)-1, 0);
                } while (chain && chain->bref.type != HAMMER2_BREF_TYPE_INODE);
                if (chain) {
-                       pfs->name_next = chain->u.ip->ip_data.name_key;
+                       pfs->name_next = chain->data->ipdata.name_key;
                        hammer2_chain_unlock(hmp, chain);
                } else {
                        pfs->name_next = (hammer2_key_t)-1;
@@ -388,15 +391,18 @@ done:
 static int
 hammer2_ioctl_pfs_lookup(hammer2_inode_t *ip, void *data)
 {
-       hammer2_mount_t *hmp = ip->hmp;
-       hammer2_ioc_pfs_t *pfs = data;
+       hammer2_inode_data_t *ipdata;
+       hammer2_mount_t *hmp;
+       hammer2_ioc_pfs_t *pfs;
        hammer2_chain_t *parent;
        hammer2_chain_t *chain;
-       hammer2_inode_t *xip;
        hammer2_key_t lhc;
-       int error = 0;
+       int error;
        size_t len;
 
+       error = 0;
+       hmp = ip->hmp;
+       pfs = data;
        parent = hmp->schain;
        error = hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS |
                                                HAMMER2_RESOLVE_SHARED);
@@ -426,11 +432,12 @@ hammer2_ioctl_pfs_lookup(hammer2_inode_t *ip, void *data)
         * Load the data being returned by the ioctl.
         */
        if (chain) {
-               xip = chain->u.ip;
-               pfs->name_key = xip->ip_data.name_key;
-               pfs->pfs_type = xip->ip_data.pfs_type;
-               pfs->pfs_clid = xip->ip_data.pfs_clid;
-               pfs->pfs_fsid = xip->ip_data.pfs_fsid;
+               ipdata = &chain->data->ipdata;
+               pfs->name_key = ipdata->name_key;
+               pfs->pfs_type = ipdata->pfs_type;
+               pfs->pfs_clid = ipdata->pfs_clid;
+               pfs->pfs_fsid = ipdata->pfs_fsid;
+               ipdata = NULL;
 
                hammer2_chain_unlock(hmp, chain);
        } else {
@@ -447,21 +454,27 @@ done:
 static int
 hammer2_ioctl_pfs_create(hammer2_inode_t *ip, void *data)
 {
-       hammer2_mount_t *hmp = ip->hmp;
-       hammer2_ioc_pfs_t *pfs = data;
-       hammer2_inode_t *nip = NULL;
+       hammer2_inode_data_t *nipdata;
+       hammer2_mount_t *hmp;
+       hammer2_ioc_pfs_t *pfs;
+       hammer2_inode_t *nip;
        int error;
 
+       hmp = ip->hmp;
+       pfs = data;
+       nip = NULL;
+
        pfs->name[sizeof(pfs->name) - 1] = 0;   /* ensure 0-termination */
        error = hammer2_inode_create(hmp->schain->u.ip, NULL, NULL,
                                     pfs->name, strlen(pfs->name),
                                     &nip);
        if (error == 0) {
-               hammer2_chain_modify(hmp, &nip->chain, 0);
-               nip->ip_data.pfs_type = pfs->pfs_type;
-               nip->ip_data.pfs_clid = pfs->pfs_clid;
-               nip->ip_data.pfs_fsid = pfs->pfs_fsid;
-               hammer2_chain_unlock(hmp, &nip->chain);
+               hammer2_chain_modify(hmp, nip->chain, 0);
+               nipdata = &nip->chain->data->ipdata;
+               nipdata->pfs_type = pfs->pfs_type;
+               nipdata->pfs_clid = pfs->pfs_clid;
+               nipdata->pfs_fsid = pfs->pfs_fsid;
+               hammer2_inode_unlock_ex(nip);
        }
        return (error);
 }
@@ -491,7 +504,7 @@ hammer2_ioctl_inode_get(hammer2_inode_t *ip, void *data)
        hammer2_ioc_inode_t *ino = data;
 
        hammer2_inode_lock_sh(ip);
-       ino->ip_data = ip->ip_data;
+       ino->ip_data = ip->chain->data->ipdata;
        ino->kdata = ip;
        hammer2_inode_unlock_sh(ip);
        return (0);
index 262ee25..ae34049 100644 (file)
 /*
  * HAMMER2 inode locks
  *
- * HAMMER2 offers shared locks, update locks, and exclusive locks on inodes.
+ * HAMMER2 offers shared locks and exclusive locks on inodes.
  *
- * Shared locks allow concurrent access to an inode's fields, but exclude
- * access by concurrent exclusive locks.
+ * An inode's ip->chain pointer is resolved and stable while an inode is
+ * locked, and can be cleaned out at any time (become NULL) when an inode
+ * is not locked.
  *
- * Update locks are interesting -- an update lock will be taken after all
- * shared locks on an inode are released, but once it is in place, shared
- * locks may proceed. The update field is signalled by a busy flag in the
- * inode. Only one update lock may be in place at a given time on an inode.
+ * The underlying chain is also locked.
  *
- * Exclusive locks prevent concurrent access to the inode.
- *
- * XXX: What do we use each for? How is visibility to the inode controlled?
+ * NOTE: We don't combine the inode/chain lock because putting away an
+ *       inode would otherwise confuse multiple lock holders of the inode.
  */
-
-
 void
 hammer2_inode_lock_ex(hammer2_inode_t *ip)
 {
-       hammer2_chain_lock(ip->hmp, &ip->chain, HAMMER2_RESOLVE_ALWAYS);
+       hammer2_inode_ref(ip);
+       ccms_thread_lock(&ip->topo_cst, CCMS_STATE_EXCLUSIVE);
+       KKASSERT(ip->chain != NULL);    /* for now */
+       hammer2_chain_lock(ip->hmp, ip->chain, HAMMER2_RESOLVE_ALWAYS);
 }
 
 void
 hammer2_inode_unlock_ex(hammer2_inode_t *ip)
 {
-       hammer2_chain_unlock(ip->hmp, &ip->chain);
+       if (ip->chain)
+               hammer2_chain_unlock(ip->hmp, ip->chain);
+       ccms_thread_unlock(&ip->topo_cst);
+       hammer2_inode_drop(ip);
 }
 
 void
 hammer2_inode_lock_sh(hammer2_inode_t *ip)
 {
-       KKASSERT(ip->chain.refs > 0);
-       hammer2_chain_lock(ip->hmp, &ip->chain, HAMMER2_RESOLVE_ALWAYS |
-                                               HAMMER2_RESOLVE_SHARED);
+       hammer2_inode_ref(ip);
+       ccms_thread_lock(&ip->topo_cst, CCMS_STATE_SHARED);
+       KKASSERT(ip->chain != NULL);    /* for now */
+       hammer2_chain_lock(ip->hmp, ip->chain, HAMMER2_RESOLVE_ALWAYS |
+                                              HAMMER2_RESOLVE_SHARED);
 }
 
 void
 hammer2_inode_unlock_sh(hammer2_inode_t *ip)
 {
-       hammer2_chain_unlock(ip->hmp, &ip->chain);
+       if (ip->chain)
+               hammer2_chain_unlock(ip->hmp, ip->chain);
+       ccms_thread_unlock(&ip->topo_cst);
+       hammer2_inode_drop(ip);
 }
 
-#if 0
-/*
- * Soft-busy an inode.
- *
- * The inode must be exclusively locked while soft-busying or soft-unbusying
- * an inode.  Once busied or unbusied the caller can release the lock.
- */
-void
-hammer2_inode_busy(hammer2_inode_t *ip)
+ccms_state_t
+hammer2_inode_lock_temp_release(hammer2_inode_t *ip)
 {
-       if (ip->chain.busy++ == 0)
-               hammer2_chain_ref(ip->hmp, &ip->chain, 0);
+       return(ccms_thread_lock_temp_release(&ip->topo_cst));
 }
 
-void
-hammer2_inode_unbusy(hammer2_inode_t *ip)
+ccms_state_t
+hammer2_inode_lock_upgrade(hammer2_inode_t *ip)
 {
-       if (--ip->chain.busy == 0)
-               hammer2_chain_drop(ip->hmp, &ip->chain);
+       return(ccms_thread_lock_upgrade(&ip->topo_cst));
 }
 
-#endif
+void
+hammer2_inode_lock_restore(hammer2_inode_t *ip, ccms_state_t ostate)
+{
+       ccms_thread_lock_restore(&ip->topo_cst, ostate);
+}
 
 /*
  * Mount-wide locks
@@ -145,15 +146,19 @@ hammer2_voldata_unlock(hammer2_mount_t *hmp)
 }
 
 /*
- * Return the directory entry type for an inode
+ * Return the directory entry type for an inode.
+ *
+ * ip must be locked sh/ex.
  */
 int
-hammer2_get_dtype(hammer2_inode_t *ip)
+hammer2_get_dtype(hammer2_chain_t *chain)
 {
        uint8_t type;
 
-       if ((type = ip->ip_data.type) == HAMMER2_OBJTYPE_HARDLINK)
-               type = ip->ip_data.target_type;
+       KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_INODE);
+
+       if ((type = chain->data->ipdata.type) == HAMMER2_OBJTYPE_HARDLINK)
+               type = chain->data->ipdata.target_type;
 
        switch(type) {
        case HAMMER2_OBJTYPE_UNKNOWN:
@@ -186,9 +191,11 @@ hammer2_get_dtype(hammer2_inode_t *ip)
  * Return the directory entry type for an inode
  */
 int
-hammer2_get_vtype(hammer2_inode_t *ip)
+hammer2_get_vtype(hammer2_chain_t *chain)
 {
-       switch(ip->ip_data.type) {
+       KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_INODE);
+
+       switch(chain->data->ipdata.type) {
        case HAMMER2_OBJTYPE_UNKNOWN:
                return (VBAD);
        case HAMMER2_OBJTYPE_DIRECTORY:
@@ -369,17 +376,21 @@ hammer2_allocsize(size_t bytes)
        return (radix);
 }
 
+/*
+ * ip must be locked sh/ex
+ */
 int
 hammer2_calc_logical(hammer2_inode_t *ip, hammer2_off_t uoff,
                     hammer2_key_t *lbasep, hammer2_key_t *leofp)
 {
+       hammer2_inode_data_t *ipdata = &ip->chain->data->ipdata;
        int radix;
 
        *lbasep = uoff & ~HAMMER2_PBUFMASK64;
-       *leofp = ip->ip_data.size & ~HAMMER2_PBUFMASK64;
+       *leofp = ipdata->size & ~HAMMER2_PBUFMASK64;
        KKASSERT(*lbasep <= *leofp);
        if (*lbasep == *leofp /*&& *leofp < 1024 * 1024*/) {
-               radix = hammer2_allocsize((size_t)(ip->ip_data.size - *leofp));
+               radix = hammer2_allocsize((size_t)(ipdata->size - *leofp));
                if (radix < HAMMER2_MINALLOCRADIX)
                        radix = HAMMER2_MINALLOCRADIX;
                *leofp += 1U << radix;
index 4a39b15..7d1e899 100644 (file)
@@ -447,7 +447,6 @@ hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data,
                                      0);
        while (rchain) {
                if (rchain->bref.type == HAMMER2_BREF_TYPE_INODE &&
-                   rchain->u.ip &&
                    strcmp(label, rchain->data->ipdata.filename) == 0) {
                        break;
                }
@@ -469,11 +468,14 @@ hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data,
        }
        atomic_set_int(&rchain->flags, HAMMER2_CHAIN_MOUNTED);
 
-       hammer2_chain_ref(hmp, rchain); /* for pmp->rchain */
-       hammer2_chain_unlock(hmp, rchain);
-       pmp->rchain = rchain;           /* left held & unlocked */
-       pmp->iroot = rchain->u.ip;      /* implied hold from rchain */
-       pmp->iroot->pmp = pmp;
+       /*
+        * NOTE: *_get() integrates chain's lock into the inode lock.
+        */
+       hammer2_chain_ref(hmp, rchain);         /* for pmp->rchain */
+       pmp->rchain = rchain;                   /* left held & unlocked */
+       pmp->iroot = hammer2_inode_get(pmp, NULL, rchain);
+       hammer2_inode_ref(pmp->iroot);          /* ref for pmp->iroot */
+       hammer2_inode_unlock_ex(pmp->iroot);    /* iroot & its chain */
 
        kprintf("iroot %p\n", pmp->iroot);
 
@@ -582,7 +584,14 @@ hammer2_vfs_unmount(struct mount *mp, int mntflags)
         * Cleanup the root and super-root chain elements (which should be
         * clean).
         */
-       pmp->iroot = NULL;
+       if (pmp->iroot) {
+               hammer2_inode_lock_ex(pmp->iroot);
+               hammer2_inode_put(pmp->iroot);
+               /* lock destroyed by the put */
+               KKASSERT(pmp->iroot->refs == 1);
+               hammer2_inode_drop(pmp->iroot);
+               pmp->iroot = NULL;
+       }
        if (pmp->rchain) {
                atomic_clear_int(&pmp->rchain->flags, HAMMER2_CHAIN_MOUNTED);
                KKASSERT(pmp->rchain->refs == 1);
@@ -661,11 +670,9 @@ hammer2_vfs_root(struct mount *mp, struct vnode **vpp)
                *vpp = NULL;
                error = EINVAL;
        } else {
-               hammer2_chain_lock(hmp, &pmp->iroot->chain,
-                                  HAMMER2_RESOLVE_ALWAYS |
-                                  HAMMER2_RESOLVE_SHARED);
+               hammer2_inode_lock_sh(pmp->iroot);
                vp = hammer2_igetv(pmp->iroot, &error);
-               hammer2_chain_unlock(hmp, &pmp->iroot->chain);
+               hammer2_inode_unlock_sh(pmp->iroot);
                *vpp = vp;
                if (vp == NULL)
                        kprintf("vnodefail\n");
@@ -678,7 +685,7 @@ hammer2_vfs_root(struct mount *mp, struct vnode **vpp)
 /*
  * Filesystem status
  *
- * XXX incorporate pmp->iroot->ip_data.inode_quota and data_quota
+ * XXX incorporate ipdata->inode_quota and data_quota
  */
 static
 int
@@ -690,8 +697,7 @@ hammer2_vfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred)
        pmp = MPTOPMP(mp);
        hmp = MPTOHMP(mp);
 
-       mp->mnt_stat.f_files = pmp->iroot->ip_data.inode_count +
-                              pmp->iroot->delta_icount;
+       mp->mnt_stat.f_files = pmp->inode_count;
        mp->mnt_stat.f_ffree = 0;
        mp->mnt_stat.f_blocks = hmp->voldata.allocator_size / HAMMER2_PBUFSIZE;
        mp->mnt_stat.f_bfree = (hmp->voldata.allocator_size -
@@ -713,8 +719,7 @@ hammer2_vfs_statvfs(struct mount *mp, struct statvfs *sbp, struct ucred *cred)
        hmp = MPTOHMP(mp);
 
        mp->mnt_vstat.f_bsize = HAMMER2_PBUFSIZE;
-       mp->mnt_vstat.f_files = pmp->iroot->ip_data.inode_count +
-                               pmp->iroot->delta_icount;
+       mp->mnt_vstat.f_files = pmp->inode_count;
        mp->mnt_vstat.f_ffree = 0;
        mp->mnt_vstat.f_blocks = hmp->voldata.allocator_size / HAMMER2_PBUFSIZE;
        mp->mnt_vstat.f_bfree = (hmp->voldata.allocator_size -
@@ -860,8 +865,7 @@ hammer2_sync_scan1(struct mount *mp, struct vnode *vp, void *data)
 
        ip = VTOI(vp);
        if (vp->v_type == VNON || ip == NULL ||
-           ((ip->chain.flags & (HAMMER2_CHAIN_MODIFIED |
-                                HAMMER2_CHAIN_DIRTYEMBED)) == 0 &&
+           ((ip->flags & HAMMER2_INODE_MODIFIED) == 0 &&
             RB_EMPTY(&vp->v_rbdirty_tree))) {
                return(-1);
        }
@@ -877,9 +881,8 @@ hammer2_sync_scan2(struct mount *mp, struct vnode *vp, void *data)
 
        ip = VTOI(vp);
        if (vp->v_type == VNON || vp->v_type == VBAD ||
-           ((ip->chain.flags & (HAMMER2_CHAIN_MODIFIED |
-                                HAMMER2_CHAIN_DIRTYEMBED)) == 0 &&
-           RB_EMPTY(&vp->v_rbdirty_tree))) {
+           ((ip->flags & HAMMER2_INODE_MODIFIED) == 0 &&
+            RB_EMPTY(&vp->v_rbdirty_tree))) {
                return(0);
        }
        error = VOP_FSYNC(vp, MNT_NOWAIT, 0);
@@ -1018,6 +1021,7 @@ hammer2_install_volume_header(hammer2_mount_t *hmp)
 void
 hammer2_cluster_reconnect(hammer2_pfsmount_t *pmp, struct file *fp)
 {
+       hammer2_inode_data_t *ipdata;
        size_t name_len;
 
        /*
@@ -1030,11 +1034,14 @@ hammer2_cluster_reconnect(hammer2_pfsmount_t *pmp, struct file *fp)
        /*
         * Setup LNK_CONN fields for autoinitiated state machine
         */
-       pmp->iocom.auto_lnk_conn.pfs_clid = pmp->iroot->ip_data.pfs_clid;
-       pmp->iocom.auto_lnk_conn.pfs_fsid = pmp->iroot->ip_data.pfs_fsid;
-       pmp->iocom.auto_lnk_conn.pfs_type = pmp->iroot->ip_data.pfs_type;
+       hammer2_inode_lock_ex(pmp->iroot);
+       ipdata = &pmp->iroot->chain->data->ipdata;
+       pmp->iocom.auto_lnk_conn.pfs_clid = ipdata->pfs_clid;
+       pmp->iocom.auto_lnk_conn.pfs_fsid = ipdata->pfs_fsid;
+       pmp->iocom.auto_lnk_conn.pfs_type = ipdata->pfs_type;
        pmp->iocom.auto_lnk_conn.proto_version = DMSG_SPAN_PROTO_1;
        pmp->iocom.auto_lnk_conn.peer_type = pmp->hmp->voldata.peer_type;
+       hammer2_inode_unlock_ex(pmp->iroot);
 
        /*
         * Filter adjustment.  Clients do not need visibility into other
@@ -1043,7 +1050,7 @@ hammer2_cluster_reconnect(hammer2_pfsmount_t *pmp, struct file *fp)
         */
        pmp->iocom.auto_lnk_conn.peer_mask = 1LLU << HAMMER2_PEER_HAMMER2;
        pmp->iocom.auto_lnk_conn.pfs_mask = (uint64_t)-1;
-       switch (pmp->iroot->ip_data.pfs_type) {
+       switch (ipdata->pfs_type) {
        case DMSG_PFSTYPE_CLIENT:
                pmp->iocom.auto_lnk_conn.peer_mask &=
                                ~(1LLU << DMSG_PFSTYPE_CLIENT);
@@ -1052,10 +1059,10 @@ hammer2_cluster_reconnect(hammer2_pfsmount_t *pmp, struct file *fp)
                break;
        }
 
-       name_len = pmp->iroot->ip_data.name_len;
+       name_len = ipdata->name_len;
        if (name_len >= sizeof(pmp->iocom.auto_lnk_conn.fs_label))
                name_len = sizeof(pmp->iocom.auto_lnk_conn.fs_label) - 1;
-       bcopy(pmp->iroot->ip_data.filename,
+       bcopy(ipdata->filename,
              pmp->iocom.auto_lnk_conn.fs_label,
              name_len);
        pmp->iocom.auto_lnk_conn.fs_label[name_len] = 0;
@@ -1063,15 +1070,15 @@ hammer2_cluster_reconnect(hammer2_pfsmount_t *pmp, struct file *fp)
        /*
         * Setup LNK_SPAN fields for autoinitiated state machine
         */
-       pmp->iocom.auto_lnk_span.pfs_clid = pmp->iroot->ip_data.pfs_clid;
-       pmp->iocom.auto_lnk_span.pfs_fsid = pmp->iroot->ip_data.pfs_fsid;
-       pmp->iocom.auto_lnk_span.pfs_type = pmp->iroot->ip_data.pfs_type;
+       pmp->iocom.auto_lnk_span.pfs_clid = ipdata->pfs_clid;
+       pmp->iocom.auto_lnk_span.pfs_fsid = ipdata->pfs_fsid;
+       pmp->iocom.auto_lnk_span.pfs_type = ipdata->pfs_type;
        pmp->iocom.auto_lnk_span.peer_type = pmp->hmp->voldata.peer_type;
        pmp->iocom.auto_lnk_span.proto_version = DMSG_SPAN_PROTO_1;
-       name_len = pmp->iroot->ip_data.name_len;
+       name_len = ipdata->name_len;
        if (name_len >= sizeof(pmp->iocom.auto_lnk_span.fs_label))
                name_len = sizeof(pmp->iocom.auto_lnk_span.fs_label) - 1;
-       bcopy(pmp->iroot->ip_data.filename,
+       bcopy(ipdata->filename,
              pmp->iocom.auto_lnk_span.fs_label,
              name_len);
        pmp->iocom.auto_lnk_span.fs_label[name_len] = 0;
index 89e5711..a56120d 100644 (file)
@@ -95,17 +95,17 @@ hammer2_vop_inactive(struct vop_inactive_args *ap)
         * the strategy code.  Simply mark the inode modified so it gets
         * picked up by our normal flush.
         */
-       if (ip->chain.flags & HAMMER2_CHAIN_DIRTYEMBED) {
+       if (ip->flags & HAMMER2_INODE_DIRTYEMBED) {
                hammer2_inode_lock_ex(ip);
-               atomic_clear_int(&ip->chain.flags, HAMMER2_CHAIN_DIRTYEMBED);
-               hammer2_chain_modify(ip->hmp, &ip->chain, 0);
+               atomic_clear_int(&ip->flags, HAMMER2_INODE_DIRTYEMBED);
+               hammer2_chain_modify(ip->hmp, ip->chain, 0);
                hammer2_inode_unlock_ex(ip);
        }
 
        /*
         * Check for deleted inodes and recycle immediately.
         */
-       if (ip->chain.flags & HAMMER2_CHAIN_DELETED) {
+       if (ip->flags & HAMMER2_INODE_DELETED) {
                vrecycle(vp);
        }
        return (0);
@@ -136,13 +136,17 @@ hammer2_vop_reclaim(struct vop_reclaim_args *ap)
        hammer2_inode_lock_ex(ip);
        vp->v_data = NULL;
        ip->vp = NULL;
-       if (ip->chain.flags & HAMMER2_CHAIN_DELETED) {
-               atomic_set_int(&ip->chain.flags, HAMMER2_CHAIN_DESTROYED |
+       if (ip->flags & HAMMER2_INODE_DELETED) {
+               KKASSERT(ip->chain->flags & HAMMER2_CHAIN_DELETED);
+               atomic_set_int(&ip->chain->flags, HAMMER2_CHAIN_DESTROYED |
                                                 HAMMER2_CHAIN_SUBMODIFIED);
        }
-       hammer2_chain_flush(hmp, &ip->chain, 0);
-       hammer2_inode_unlock_ex(ip);
-       hammer2_chain_drop(hmp, &ip->chain);    /* vp ref */
+       hammer2_chain_flush(hmp, ip->chain, 0);
+       if (ip->refs > 2)                       /* (our lock + vp ref) */
+               hammer2_inode_unlock_ex(ip);    /* unlock */
+       else
+               hammer2_inode_put(ip);          /* unlock & disconnect */
+       hammer2_inode_drop(ip);                 /* vp ref */
 
        /*
         * XXX handle background sync when ip dirty, kernel will no longer
@@ -173,9 +177,9 @@ hammer2_vop_fsync(struct vop_fsync_args *ap)
         * the strategy code.  Simply mark the inode modified so it gets
         * picked up by our normal flush.
         */
-       if (ip->chain.flags & HAMMER2_CHAIN_DIRTYEMBED) {
-               atomic_clear_int(&ip->chain.flags, HAMMER2_CHAIN_DIRTYEMBED);
-               hammer2_chain_modify(hmp, &ip->chain, 0);
+       if (ip->flags & HAMMER2_INODE_DIRTYEMBED) {
+               atomic_clear_int(&ip->flags, HAMMER2_INODE_DIRTYEMBED);
+               hammer2_chain_modify(hmp, ip->chain, 0);
        }
 
        /*
@@ -187,7 +191,7 @@ hammer2_vop_fsync(struct vop_fsync_args *ap)
         * on the volume root as a catch-all, which is far more optimal.
         */
        if (ap->a_flags & VOP_FSYNC_SYSCALL)
-               hammer2_chain_flush(hmp, &ip->chain, 0);
+               hammer2_chain_flush(hmp, ip->chain, 0);
        hammer2_inode_unlock_ex(ip);
        return (0);
 }
@@ -197,15 +201,18 @@ int
 hammer2_vop_access(struct vop_access_args *ap)
 {
        hammer2_inode_t *ip = VTOI(ap->a_vp);
+       hammer2_inode_data_t *ipdata;
        uid_t uid;
        gid_t gid;
        int error;
 
-       uid = hammer2_to_unix_xid(&ip->ip_data.uid);
-       gid = hammer2_to_unix_xid(&ip->ip_data.gid);
+       hammer2_inode_lock_sh(ip);
+       ipdata = &ip->chain->data->ipdata;
+       uid = hammer2_to_unix_xid(&ipdata->uid);
+       gid = hammer2_to_unix_xid(&ipdata->gid);
+       error = vop_helper_access(ap, uid, gid, ipdata->mode, ipdata->uflags);
+       hammer2_inode_unlock_sh(ip);
 
-       error = vop_helper_access(ap, uid, gid, ip->ip_data.mode,
-                                 ip->ip_data.uflags);
        return (error);
 }
 
@@ -213,6 +220,7 @@ static
 int
 hammer2_vop_getattr(struct vop_getattr_args *ap)
 {
+       hammer2_inode_data_t *ipdata;
        hammer2_pfsmount_t *pmp;
        hammer2_inode_t *ip;
        struct vnode *vp;
@@ -225,27 +233,28 @@ hammer2_vop_getattr(struct vop_getattr_args *ap)
        pmp = ip->pmp;
 
        hammer2_inode_lock_sh(ip);
+       ipdata = &ip->chain->data->ipdata;
 
        vap->va_fsid = pmp->mp->mnt_stat.f_fsid.val[0];
-       vap->va_fileid = ip->ip_data.inum;
-       vap->va_mode = ip->ip_data.mode;
-       vap->va_nlink = ip->ip_data.nlinks;
-       vap->va_uid = hammer2_to_unix_xid(&ip->ip_data.uid);
-       vap->va_gid = hammer2_to_unix_xid(&ip->ip_data.gid);
+       vap->va_fileid = ipdata->inum;
+       vap->va_mode = ipdata->mode;
+       vap->va_nlink = ipdata->nlinks;
+       vap->va_uid = hammer2_to_unix_xid(&ipdata->uid);
+       vap->va_gid = hammer2_to_unix_xid(&ipdata->gid);
        vap->va_rmajor = 0;
        vap->va_rminor = 0;
-       vap->va_size = ip->ip_data.size;
+       vap->va_size = ipdata->size;
        vap->va_blocksize = HAMMER2_PBUFSIZE;
-       vap->va_flags = ip->ip_data.uflags;
-       hammer2_time_to_timespec(ip->ip_data.ctime, &vap->va_ctime);
-       hammer2_time_to_timespec(ip->ip_data.mtime, &vap->va_mtime);
-       hammer2_time_to_timespec(ip->ip_data.mtime, &vap->va_atime);
+       vap->va_flags = ipdata->uflags;
+       hammer2_time_to_timespec(ipdata->ctime, &vap->va_ctime);
+       hammer2_time_to_timespec(ipdata->mtime, &vap->va_mtime);
+       hammer2_time_to_timespec(ipdata->mtime, &vap->va_atime);
        vap->va_gen = 1;
        vap->va_bytes = vap->va_size;   /* XXX */
-       vap->va_type = hammer2_get_vtype(ip);
+       vap->va_type = hammer2_get_vtype(ip->chain);
        vap->va_filerev = 0;
-       vap->va_uid_uuid = ip->ip_data.uid;
-       vap->va_gid_uuid = ip->ip_data.gid;
+       vap->va_uid_uuid = ipdata->uid;
+       vap->va_gid_uuid = ipdata->gid;
        vap->va_vaflags = VA_UID_UUID_VALID | VA_GID_UUID_VALID |
                          VA_FSID_UUID_VALID;
 
@@ -258,6 +267,7 @@ static
 int
 hammer2_vop_setattr(struct vop_setattr_args *ap)
 {
+       hammer2_inode_data_t *ipdata;
        hammer2_mount_t *hmp;
        hammer2_inode_t *ip;
        struct vnode *vp;
@@ -278,37 +288,38 @@ hammer2_vop_setattr(struct vop_setattr_args *ap)
                return(EROFS);
 
        hammer2_inode_lock_ex(ip);
+       ipdata = &ip->chain->data->ipdata;
        error = 0;
 
        if (vap->va_flags != VNOVAL) {
                u_int32_t flags;
 
-               flags = ip->ip_data.uflags;
+               flags = ipdata->uflags;
                error = vop_helper_setattr_flags(&flags, vap->va_flags,
-                                        hammer2_to_unix_xid(&ip->ip_data.uid),
+                                        hammer2_to_unix_xid(&ipdata->uid),
                                         ap->a_cred);
                if (error == 0) {
-                       if (ip->ip_data.uflags != flags) {
-                               hammer2_chain_modify(hmp, &ip->chain, 0);
-                               ip->ip_data.uflags = flags;
-                               ip->ip_data.ctime = ctime;
+                       if (ipdata->uflags != flags) {
+                               hammer2_chain_modify(hmp, ip->chain, 0);
+                               ipdata->uflags = flags;
+                               ipdata->ctime = ctime;
                                kflags |= NOTE_ATTRIB;
                        }
-                       if (ip->ip_data.uflags & (IMMUTABLE | APPEND)) {
+                       if (ipdata->uflags & (IMMUTABLE | APPEND)) {
                                error = 0;
                                goto done;
                        }
                }
                goto done;
        }
-       if (ip->ip_data.uflags & (IMMUTABLE | APPEND)) {
+       if (ipdata->uflags & (IMMUTABLE | APPEND)) {
                error = EPERM;
                goto done;
        }
        if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
-               mode_t cur_mode = ip->ip_data.mode;
-               uid_t cur_uid = hammer2_to_unix_xid(&ip->ip_data.uid);
-               gid_t cur_gid = hammer2_to_unix_xid(&ip->ip_data.gid);
+               mode_t cur_mode = ipdata->mode;
+               uid_t cur_uid = hammer2_to_unix_xid(&ipdata->uid);
+               gid_t cur_gid = hammer2_to_unix_xid(&ipdata->gid);
                uuid_t uuid_uid;
                uuid_t uuid_gid;
 
@@ -318,17 +329,15 @@ hammer2_vop_setattr(struct vop_setattr_args *ap)
                if (error == 0) {
                        hammer2_guid_to_uuid(&uuid_uid, cur_uid);
                        hammer2_guid_to_uuid(&uuid_gid, cur_gid);
-                       if (bcmp(&uuid_uid, &ip->ip_data.uid,
-                                sizeof(uuid_uid)) ||
-                           bcmp(&uuid_gid, &ip->ip_data.gid,
-                                sizeof(uuid_gid)) ||
-                           ip->ip_data.mode != cur_mode
+                       if (bcmp(&uuid_uid, &ipdata->uid, sizeof(uuid_uid)) ||
+                           bcmp(&uuid_gid, &ipdata->gid, sizeof(uuid_gid)) ||
+                           ipdata->mode != cur_mode
                        ) {
-                               hammer2_chain_modify(hmp, &ip->chain, 0);
-                               ip->ip_data.uid = uuid_uid;
-                               ip->ip_data.gid = uuid_gid;
-                               ip->ip_data.mode = cur_mode;
-                               ip->ip_data.ctime = ctime;
+                               hammer2_chain_modify(hmp, ip->chain, 0);
+                               ipdata->uid = uuid_uid;
+                               ipdata->gid = uuid_gid;
+                               ipdata->mode = cur_mode;
+                               ipdata->ctime = ctime;
                        }
                        kflags |= NOTE_ATTRIB;
                }
@@ -337,12 +346,12 @@ hammer2_vop_setattr(struct vop_setattr_args *ap)
        /*
         * Resize the file
         */
-       if (vap->va_size != VNOVAL && ip->ip_data.size != vap->va_size) {
+       if (vap->va_size != VNOVAL && ipdata->size != vap->va_size) {
                switch(vp->v_type) {
                case VREG:
-                       if (vap->va_size == ip->ip_data.size)
+                       if (vap->va_size == ipdata->size)
                                break;
-                       if (vap->va_size < ip->ip_data.size) {
+                       if (vap->va_size < ipdata->size) {
                                hammer2_truncate_file(ip, vap->va_size);
                        } else {
                                hammer2_extend_file(ip, vap->va_size);
@@ -357,26 +366,26 @@ hammer2_vop_setattr(struct vop_setattr_args *ap)
 #if 0
        /* atime not supported */
        if (vap->va_atime.tv_sec != VNOVAL) {
-               hammer2_chain_modify(hmp, &ip->chain, 0);
-               ip->ip_data.atime = hammer2_timespec_to_time(&vap->va_atime);
+               hammer2_chain_modify(hmp, ip->chain, 0);
+               ipdata->atime = hammer2_timespec_to_time(&vap->va_atime);
                kflags |= NOTE_ATTRIB;
        }
 #endif
        if (vap->va_mtime.tv_sec != VNOVAL) {
-               hammer2_chain_modify(hmp, &ip->chain, 0);
-               ip->ip_data.mtime = hammer2_timespec_to_time(&vap->va_mtime);
+               hammer2_chain_modify(hmp, ip->chain, 0);
+               ipdata->mtime = hammer2_timespec_to_time(&vap->va_mtime);
                kflags |= NOTE_ATTRIB;
        }
        if (vap->va_mode != (mode_t)VNOVAL) {
-               mode_t cur_mode = ip->ip_data.mode;
-               uid_t cur_uid = hammer2_to_unix_xid(&ip->ip_data.uid);
-               gid_t cur_gid = hammer2_to_unix_xid(&ip->ip_data.gid);
+               mode_t cur_mode = ipdata->mode;
+               uid_t cur_uid = hammer2_to_unix_xid(&ipdata->uid);
+               gid_t cur_gid = hammer2_to_unix_xid(&ipdata->gid);
 
                error = vop_helper_chmod(ap->a_vp, vap->va_mode, ap->a_cred,
                                         cur_uid, cur_gid, &cur_mode);
-               if (error == 0 && ip->ip_data.mode != cur_mode) {
-                       ip->ip_data.mode = cur_mode;
-                       ip->ip_data.ctime = ctime;
+               if (error == 0 && ipdata->mode != cur_mode) {
+                       ipdata->mode = cur_mode;
+                       ipdata->ctime = ctime;
                        kflags |= NOTE_ATTRIB;
                }
        }
@@ -389,11 +398,13 @@ static
 int
 hammer2_vop_readdir(struct vop_readdir_args *ap)
 {
+       hammer2_inode_data_t *ipdata;
        hammer2_mount_t *hmp;
        hammer2_inode_t *ip;
        hammer2_inode_t *xip;
        hammer2_chain_t *parent;
        hammer2_chain_t *chain;
+       hammer2_tid_t inum;
        hammer2_key_t lkey;
        struct uio *uio;
        off_t *cookies;
@@ -423,6 +434,9 @@ hammer2_vop_readdir(struct vop_readdir_args *ap)
        }
        cookie_index = 0;
 
+       hammer2_inode_lock_sh(ip);
+       ipdata = &ip->chain->data->ipdata;
+
        /*
         * Handle artificial entries.  To ensure that only positive 64 bit
         * quantities are returned to userland we always strip off bit 63.
@@ -436,10 +450,8 @@ hammer2_vop_readdir(struct vop_readdir_args *ap)
        chain = (void *)(intptr_t)-1;   /* non-NULL for early goto done case */
 
        if (saveoff == 0) {
-               r = vop_write_dirent(&error, uio,
-                                    ip->ip_data.inum &
-                                       HAMMER2_DIRHASH_USERMSK,
-                                    DT_DIR, 1, ".");
+               inum = ipdata->inum & HAMMER2_DIRHASH_USERMSK;
+               r = vop_write_dirent(&error, uio, inum, DT_DIR, 1, ".");
                if (r)
                        goto done;
                if (cookies)
@@ -449,16 +461,28 @@ hammer2_vop_readdir(struct vop_readdir_args *ap)
                if (cookie_index == ncookies)
                        goto done;
        }
+
        if (saveoff == 1) {
-               if (ip->pip == NULL || ip == ip->pmp->iroot)
-                       xip = ip;
-               else
+               /*
+                * Be careful with lockorder when accessing ".."
+                */
+               inum = ip->chain->data->ipdata.inum & HAMMER2_DIRHASH_USERMSK;
+               while (ip->pip != NULL && ip != ip->pmp->iroot) {
                        xip = ip->pip;
-
-               r = vop_write_dirent(&error, uio,
-                                    xip->ip_data.inum &
-                                     HAMMER2_DIRHASH_USERMSK,
-                                    DT_DIR, 2, "..");
+                       hammer2_inode_ref(xip);
+                       hammer2_inode_unlock_sh(ip);
+                       hammer2_inode_lock_sh(xip);
+                       hammer2_inode_lock_sh(ip);
+                       hammer2_inode_drop(xip);
+                       if (xip == ip->pip) {
+                               inum = xip->chain->data->ipdata.inum &
+                                      HAMMER2_DIRHASH_USERMSK;
+                               hammer2_inode_unlock_sh(xip);
+                               break;
+                       }
+                       hammer2_inode_unlock_sh(xip);
+               }
+               r = vop_write_dirent(&error, uio, inum, DT_DIR, 2, "..");
                if (r)
                        goto done;
                if (cookies)
@@ -471,11 +495,10 @@ hammer2_vop_readdir(struct vop_readdir_args *ap)
 
        lkey = saveoff | HAMMER2_DIRHASH_VISIBLE;
 
-       parent = &ip->chain;
+       parent = ip->chain;
        error = hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS |
                                                HAMMER2_RESOLVE_SHARED);
        if (error) {
-               hammer2_chain_unlock(hmp, parent);
                goto done;
        }
        chain = hammer2_chain_lookup(hmp, &parent, lkey, lkey,
@@ -487,13 +510,14 @@ hammer2_vop_readdir(struct vop_readdir_args *ap)
        }
        while (chain) {
                if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
-                       dtype = hammer2_get_dtype(chain->u.ip);
+                       dtype = hammer2_get_dtype(chain);
                        saveoff = chain->bref.key & HAMMER2_DIRHASH_USERMSK;
                        r = vop_write_dirent(&error, uio,
-                                            chain->u.ip->ip_data.inum &
+                                            chain->data->ipdata.inum &
                                              HAMMER2_DIRHASH_USERMSK,
-                                            dtype, chain->u.ip->ip_data.name_len,
-                                            chain->u.ip->ip_data.filename);
+                                            dtype,
+                                            chain->data->ipdata.name_len,
+                                            chain->data->ipdata.filename);
                        if (r)
                                break;
                        if (cookies)
@@ -527,6 +551,7 @@ hammer2_vop_readdir(struct vop_readdir_args *ap)
                hammer2_chain_unlock(hmp, chain);
        hammer2_chain_unlock(hmp, parent);
 done:
+       hammer2_inode_unlock_sh(ip);
        if (ap->a_eofflag)
                *ap->a_eofflag = (chain == NULL);
        uio->uio_offset = saveoff & ~HAMMER2_DIRHASH_VISIBLE;
@@ -662,11 +687,14 @@ hammer2_vop_write(struct vop_write_args *ap)
 /*
  * Perform read operations on a file or symlink given an UNLOCKED
  * inode and uio.
+ *
+ * The passed ip is not locked.
  */
 static
 int
 hammer2_read_file(hammer2_inode_t *ip, struct uio *uio, int seqcount)
 {
+       hammer2_inode_data_t *ipdata;
        struct buf *bp;
        int error;
 
@@ -675,7 +703,9 @@ hammer2_read_file(hammer2_inode_t *ip, struct uio *uio, int seqcount)
        /*
         * UIO read loop
         */
-       while (uio->uio_resid > 0 && uio->uio_offset < ip->ip_data.size) {
+       hammer2_inode_lock_sh(ip);
+       ipdata = &ip->chain->data->ipdata;
+       while (uio->uio_resid > 0 && uio->uio_offset < ipdata->size) {
                hammer2_key_t lbase;
                hammer2_key_t leof;
                int lblksize;
@@ -695,12 +725,16 @@ hammer2_read_file(hammer2_inode_t *ip, struct uio *uio, int seqcount)
                n = lblksize - loff;
                if (n > uio->uio_resid)
                        n = uio->uio_resid;
-               if (n > ip->ip_data.size - uio->uio_offset)
-                       n = (int)(ip->ip_data.size - uio->uio_offset);
+               if (n > ipdata->size - uio->uio_offset)
+                       n = (int)(ipdata->size - uio->uio_offset);
                bp->b_flags |= B_AGE;
+               hammer2_inode_unlock_sh(ip);
                uiomove((char *)bp->b_data + loff, n, uio);
                bqrelse(bp);
+               hammer2_inode_lock_sh(ip);
+               ipdata = &ip->chain->data->ipdata;      /* reload */
        }
+       hammer2_inode_unlock_sh(ip);
        return (error);
 }
 
@@ -713,6 +747,7 @@ int
 hammer2_write_file(hammer2_inode_t *ip, struct uio *uio,
                   int ioflag, int seqcount)
 {
+       hammer2_inode_data_t *ipdata;
        hammer2_key_t old_eof;
        struct buf *bp;
        int kflags;
@@ -722,8 +757,9 @@ hammer2_write_file(hammer2_inode_t *ip, struct uio *uio,
        /*
         * Setup if append
         */
+       ipdata = &ip->chain->data->ipdata;
        if (ioflag & IO_APPEND)
-               uio->uio_offset = ip->ip_data.size;
+               uio->uio_offset = ipdata->size;
        kflags = 0;
        error = 0;
 
@@ -735,8 +771,8 @@ hammer2_write_file(hammer2_inode_t *ip, struct uio *uio,
         * Doing this now makes it easier to calculate buffer sizes in
         * the loop.
         */
-       old_eof = ip->ip_data.size;
-       if (uio->uio_offset + uio->uio_resid > ip->ip_data.size) {
+       old_eof = ipdata->size;
+       if (uio->uio_offset + uio->uio_resid > ipdata->size) {
                modified = 1;
                hammer2_extend_file(ip, uio->uio_offset + uio->uio_resid);
                kflags |= NOTE_EXTEND;
@@ -762,10 +798,10 @@ hammer2_write_file(hammer2_inode_t *ip, struct uio *uio,
                         * XXX should try to leave this unlocked through
                         *      the whole loop
                         */
-                       hammer2_chain_unlock(ip->hmp, &ip->chain);
+                       hammer2_inode_unlock_ex(ip);
                        bwillwrite(HAMMER2_PBUFSIZE);
-                       hammer2_chain_lock(ip->hmp, &ip->chain,
-                                          HAMMER2_RESOLVE_ALWAYS);
+                       hammer2_inode_lock_ex(ip);
+                       ipdata = &ip->chain->data->ipdata; /* reload */
                }
 
                /* XXX bigwrite & signal check test */
@@ -788,7 +824,7 @@ hammer2_write_file(hammer2_inode_t *ip, struct uio *uio,
                n = lblksize - loff;
                if (n > uio->uio_resid) {
                        n = uio->uio_resid;
-                       if (uio->uio_offset + n == ip->ip_data.size)
+                       if (uio->uio_offset + n == ipdata->size)
                                trivial = 1;
                } else if (loff == 0) {
                        trivial = 1;
@@ -855,9 +891,10 @@ hammer2_write_file(hammer2_inode_t *ip, struct uio *uio,
                /*
                 * Ok, copy the data in
                 */
-               hammer2_chain_unlock(ip->hmp, &ip->chain);
+               hammer2_inode_unlock_ex(ip);
                error = uiomove(bp->b_data + loff, n, uio);
-               hammer2_chain_lock(ip->hmp, &ip->chain, HAMMER2_RESOLVE_ALWAYS);
+               hammer2_inode_lock_ex(ip);
+               ipdata = &ip->chain->data->ipdata;      /* reload */
                kflags |= NOTE_WRITE;
                modified = 1;
 
@@ -899,11 +936,11 @@ hammer2_write_file(hammer2_inode_t *ip, struct uio *uio,
         * Cleanup.  If we extended the file EOF but failed to write through
         * the entire write is a failure and we have to back-up.
         */
-       if (error && ip->ip_data.size != old_eof) {
+       if (error && ipdata->size != old_eof) {
                hammer2_truncate_file(ip, old_eof);
        } else if (modified) {
-               hammer2_chain_modify(ip->hmp, &ip->chain, 0);
-               hammer2_update_time(&ip->ip_data.mtime);
+               hammer2_chain_modify(ip->hmp, ip->chain, 0);
+               hammer2_update_time(&ipdata->mtime);
        }
        hammer2_knote(ip->vp, kflags);
        return error;
@@ -934,9 +971,10 @@ hammer2_assign_physical(hammer2_inode_t *ip, hammer2_key_t lbase,
         * logical buffer cache buffer.
         */
        hmp = ip->hmp;
-retry:
        *errorp = 0;
-       parent = &ip->chain;
+       hammer2_inode_lock_ex(ip);
+retry:
+       parent = ip->chain;
        hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
 
        chain = hammer2_chain_lookup(hmp, &parent,
@@ -961,7 +999,7 @@ retry:
                }
 
                pbase = chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX;
-               ip->delta_dcount += lblksize;
+               /*ip->delta_dcount += lblksize;*/
        } else {
                switch (chain->bref.type) {
                case HAMMER2_BREF_TYPE_INODE:
@@ -1001,7 +1039,7 @@ retry:
 /*
  * Truncate the size of a file.
  *
- * This routine adjusts ip->ip_data.size smaller, destroying any related
+ * This routine adjusts ipdata->size smaller, destroying any related
  * data beyond the new EOF and potentially resizing the block straddling
  * the EOF.
  *
@@ -1011,6 +1049,7 @@ static
 void
 hammer2_truncate_file(hammer2_inode_t *ip, hammer2_key_t nsize)
 {
+       hammer2_inode_data_t *ipdata;
        hammer2_chain_t *parent;
        hammer2_chain_t *chain;
        hammer2_mount_t *hmp = ip->hmp;
@@ -1022,8 +1061,9 @@ hammer2_truncate_file(hammer2_inode_t *ip, hammer2_key_t nsize)
        int oblksize;
        int nblksize;
 
-       hammer2_chain_modify(hmp, &ip->chain, 0);
+       hammer2_chain_modify(hmp, ip->chain, 0);
        bp = NULL;
+       ipdata = &ip->chain->data->ipdata;
 
        /*
         * Destroy any logical buffer cache buffers beyond the file EOF.
@@ -1041,7 +1081,7 @@ hammer2_truncate_file(hammer2_inode_t *ip, hammer2_key_t nsize)
        /*
         * Setup for lookup/search
         */
-       parent = &ip->chain;
+       parent = ip->chain;
        error = hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
        if (error) {
                hammer2_chain_unlock(hmp, parent);
@@ -1061,7 +1101,7 @@ hammer2_truncate_file(hammer2_inode_t *ip, hammer2_key_t nsize)
                error = bread(ip->vp, lbase, oblksize, &bp);
                KKASSERT(error == 0);
        }
-       ip->ip_data.size = nsize;
+       ipdata->size = nsize;
        nblksize = hammer2_calc_logical(ip, nsize, &lbase, &leof);
 
        /*
@@ -1168,7 +1208,7 @@ hammer2_truncate_file(hammer2_inode_t *ip, hammer2_key_t nsize)
                 * Delete physical data blocks past the file EOF.
                 */
                if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) {
-                       ip->delta_dcount -= chain->bytes;
+                       /*ip->delta_dcount -= chain->bytes;*/
                        hammer2_chain_delete(hmp, parent, chain, 0);
                }
                /* XXX check parent if empty indirect block & delete */
@@ -1188,6 +1228,7 @@ static
 void
 hammer2_extend_file(hammer2_inode_t *ip, hammer2_key_t nsize)
 {
+       hammer2_inode_data_t *ipdata;
        hammer2_mount_t *hmp;
        hammer2_chain_t *parent;
        hammer2_chain_t *chain;
@@ -1204,16 +1245,17 @@ hammer2_extend_file(hammer2_inode_t *ip, hammer2_key_t nsize)
        KKASSERT(ip->vp);
        hmp = ip->hmp;
 
-       hammer2_chain_modify(hmp, &ip->chain, 0);
+       hammer2_chain_modify(hmp, ip->chain, 0);
+       ipdata = &ip->chain->data->ipdata;
 
        /*
         * Nothing to do if the direct-data case is still intact
         */
-       if ((ip->ip_data.op_flags & HAMMER2_OPFLAG_DIRECTDATA) &&
+       if ((ipdata->op_flags & HAMMER2_OPFLAG_DIRECTDATA) &&
            nsize <= HAMMER2_EMBEDDED_BYTES) {
-               ip->ip_data.size = nsize;
+               ipdata->size = nsize;
                nvextendbuf(ip->vp,
-                           ip->ip_data.size, nsize,
+                           ipdata->size, nsize,
                            0, HAMMER2_EMBEDDED_BYTES,
                            0, (int)nsize,
                            1);
@@ -1224,9 +1266,9 @@ hammer2_extend_file(hammer2_inode_t *ip, hammer2_key_t nsize)
         * Calculate the blocksize at the original EOF and resize the block
         * if necessary.  Adjust the file size in the inode.
         */
-       osize = ip->ip_data.size;
+       osize = ipdata->size;
        oblksize = hammer2_calc_logical(ip, osize, &obase, &leof);
-       ip->ip_data.size = nsize;
+       ipdata->size = nsize;
        nblksize = hammer2_calc_logical(ip, osize, &nbase, &leof);
 
        /*
@@ -1234,7 +1276,7 @@ hammer2_extend_file(hammer2_inode_t *ip, hammer2_key_t nsize)
         * buffer straddling the orignal EOF.
         */
        nvextendbuf(ip->vp,
-                   ip->ip_data.size, nsize,
+                   ipdata->size, nsize,
                    0, nblksize,
                    0, (int)nsize & HAMMER2_PBUFMASK,
                    1);
@@ -1243,7 +1285,7 @@ hammer2_extend_file(hammer2_inode_t *ip, hammer2_key_t nsize)
         * Early return if we have no more work to do.
         */
        if (obase == nbase && oblksize == nblksize &&
-           (ip->ip_data.op_flags & HAMMER2_OPFLAG_DIRECTDATA) == 0) {
+           (ipdata->op_flags & HAMMER2_OPFLAG_DIRECTDATA) == 0) {
                return;
        }
 
@@ -1270,9 +1312,9 @@ hammer2_extend_file(hammer2_inode_t *ip, hammer2_key_t nsize)
         * with the data, then converting the inode data area into the
         * inode indirect block array area.
         */
-       if (ip->ip_data.op_flags & HAMMER2_OPFLAG_DIRECTDATA) {
-               ip->ip_data.op_flags &= ~HAMMER2_OPFLAG_DIRECTDATA;
-               bzero(&ip->ip_data.u.blockset, sizeof(ip->ip_data.u.blockset));
+       if (ipdata->op_flags & HAMMER2_OPFLAG_DIRECTDATA) {
+               ipdata->op_flags &= ~HAMMER2_OPFLAG_DIRECTDATA;
+               bzero(&ipdata->u.blockset, sizeof(ipdata->u.blockset));
        }
 
        /*
@@ -1280,7 +1322,7 @@ hammer2_extend_file(hammer2_inode_t *ip, hammer2_key_t nsize)
         */
        if (((int)osize & HAMMER2_PBUFMASK)) {
 retry:
-               parent = &ip->chain;
+               parent = ip->chain;
                error = hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
                KKASSERT(error == 0);
 
@@ -1299,7 +1341,7 @@ retry:
                                hammer2_chain_unlock(hmp, parent);
                                goto retry;
                        }
-                       ip->delta_dcount += nblksize;
+                       /*ip->delta_dcount += nblksize;*/
                } else {
                        KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_DATA);
                        hammer2_chain_resize(ip, chain, nradix,
@@ -1320,10 +1362,10 @@ int
 hammer2_vop_nresolve(struct vop_nresolve_args *ap)
 {
        hammer2_inode_t *dip;
-       hammer2_inode_t *ip;
        hammer2_mount_t *hmp;
        hammer2_chain_t *parent;
        hammer2_chain_t *chain;
+       hammer2_chain_t *ochain;
        struct namecache *ncp;
        const uint8_t *name;
        size_t name_len;
@@ -1341,7 +1383,8 @@ hammer2_vop_nresolve(struct vop_nresolve_args *ap)
        /*
         * Note: In DragonFly the kernel handles '.' and '..'.
         */
-       parent = &dip->chain;
+       hammer2_inode_lock_sh(dip);
+       parent = dip->chain;
        hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS |
                                        HAMMER2_RESOLVE_SHARED);
        chain = hammer2_chain_lookup(hmp, &parent,
@@ -1359,6 +1402,7 @@ hammer2_vop_nresolve(struct vop_nresolve_args *ap)
                                           HAMMER2_LOOKUP_SHARED);
        }
        hammer2_chain_unlock(hmp, parent);
+       hammer2_inode_unlock_sh(dip);
 
        /*
         * If the inode represents a forwarding entry for a hardlink we have
@@ -1370,9 +1414,9 @@ hammer2_vop_nresolve(struct vop_nresolve_args *ap)
         *
         * XXX what kind of chain lock?
         */
-       ip = NULL;
-       if (chain && chain->u.ip->ip_data.type == HAMMER2_OBJTYPE_HARDLINK) {
-               error = hammer2_hardlink_find(dip, &chain, &ip);
+       ochain = NULL;
+       if (chain && chain->data->ipdata.type == HAMMER2_OBJTYPE_HARDLINK) {
+               error = hammer2_hardlink_find(dip, &chain, &ochain);
                if (error) {
                        kprintf("hammer2: unable to find hardlink\n");
                        if (chain) {
@@ -1389,10 +1433,11 @@ hammer2_vop_nresolve(struct vop_nresolve_args *ap)
         *
         * XXX upgrade shared lock?
         */
-       if (ip && chain && chain->u.ip->ip_data.nlinks == 1 && !hmp->ronly) {
+       if (ochain && chain && chain->data->ipdata.nlinks == 1 && !hmp->ronly) {
                kprintf("hammer2: need to unconsolidate hardlink for %s\n",
-                       chain->u.ip->ip_data.filename);
-               hammer2_hardlink_deconsolidate(dip, &chain, &ip);
+                       chain->data->ipdata.filename);
+               /* XXX retain shared lock on dip? (currently not held) */
+               hammer2_hardlink_deconsolidate(dip, &chain, &ochain);
        }
 
        /*
@@ -1420,8 +1465,8 @@ failed:
        KASSERT(error || ap->a_nch->ncp->nc_vp != NULL,
                ("resolve error %d/%p chain %p ap %p\n",
                 error, ap->a_nch->ncp->nc_vp, chain, ap));
-       if (ip)
-               hammer2_inode_drop(ip);
+       if (ochain)
+               hammer2_chain_drop(hmp, ochain);
        return error;
 }
 
@@ -1441,9 +1486,9 @@ hammer2_vop_nlookupdotdot(struct vop_nlookupdotdot_args *ap)
                *ap->a_vpp = NULL;
                return ENOENT;
        }
-       hammer2_chain_lock(hmp, &ip->chain, HAMMER2_RESOLVE_ALWAYS);
+       hammer2_inode_lock_ex(ip);
        *ap->a_vpp = hammer2_igetv(ip, &error);
-       hammer2_chain_unlock(hmp, &ip->chain);
+       hammer2_inode_unlock_ex(ip);
 
        return error;
 }
@@ -1477,7 +1522,7 @@ hammer2_vop_nmkdir(struct vop_nmkdir_args *ap)
                return error;
        }
        *ap->a_vpp = hammer2_igetv(nip, &error);
-       hammer2_chain_unlock(hmp, &nip->chain);
+       hammer2_inode_unlock_ex(nip);
 
        if (error == 0) {
                cache_setunresolved(ap->a_nch);
@@ -1535,7 +1580,8 @@ hammer2_vop_bmap(struct vop_bmap_args *ap)
                lend = lbeg;
        loff = ap->a_loffset & HAMMER2_OFF_MASK_LO;
 
-       parent = &ip->chain;
+       hammer2_inode_lock_sh(ip);
+       parent = ip->chain;
        hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS |
                                        HAMMER2_RESOLVE_SHARED);
        chain = hammer2_chain_lookup(hmp, &parent,
@@ -1545,6 +1591,7 @@ hammer2_vop_bmap(struct vop_bmap_args *ap)
        if (chain == NULL) {
                *ap->a_doffsetp = ZFOFFSET;
                hammer2_chain_unlock(hmp, parent);
+               hammer2_inode_unlock_sh(ip);
                return (0);
        }
 
@@ -1561,6 +1608,7 @@ hammer2_vop_bmap(struct vop_bmap_args *ap)
                                           HAMMER2_LOOKUP_SHARED);
        }
        hammer2_chain_unlock(hmp, parent);
+       hammer2_inode_unlock_sh(ip);
 
        /*
         * If the requested loffset is not mappable physically we can't
@@ -1605,8 +1653,12 @@ int
 hammer2_vop_advlock(struct vop_advlock_args *ap)
 {
        hammer2_inode_t *ip = VTOI(ap->a_vp);
+       hammer2_off_t size;
 
-       return (lf_advlock(ap, &ip->advlock, ip->ip_data.size));
+       hammer2_inode_lock_sh(ip);
+       size = ip->chain->data->ipdata.size;
+       hammer2_inode_unlock_sh(ip);
+       return (lf_advlock(ap, &ip->advlock, size));
 }
 
 
@@ -1644,7 +1696,7 @@ hammer2_vop_nlink(struct vop_nlink_args *ap)
         * (ip) is the inode we are linking to.
         */
        ip = oip = VTOI(ap->a_vp);
-       hammer2_inode_lock_nlinks(ip);
+       hammer2_inode_ref(ip);
 
        ncp = ap->a_nch->ncp;
        name = ncp->nc_name;
@@ -1666,7 +1718,7 @@ hammer2_vop_nlink(struct vop_nlink_args *ap)
         * XXX this can race against concurrent vnode ops.
         */
        if (oip != ip) {
-               hammer2_chain_ref(hmp, &ip->chain);
+               hammer2_inode_ref(ip);          /* vp ref+ */
                hammer2_inode_lock_ex(ip);
                hammer2_inode_lock_ex(oip);
                ip->vp = ap->a_vp;
@@ -1674,7 +1726,7 @@ hammer2_vop_nlink(struct vop_nlink_args *ap)
                oip->vp = NULL;
                hammer2_inode_unlock_ex(oip);
                hammer2_inode_unlock_ex(ip);
-               hammer2_chain_drop(hmp, &oip->chain);
+               hammer2_inode_drop(oip);        /* vp ref- */
        }
 
        /*
@@ -1690,7 +1742,7 @@ hammer2_vop_nlink(struct vop_nlink_args *ap)
                cache_setvp(ap->a_nch, ap->a_vp);
        }
 done:
-       hammer2_inode_unlock_nlinks(ip);
+       hammer2_inode_drop(ip);
        return error;
 }
 
@@ -1729,7 +1781,7 @@ hammer2_vop_ncreate(struct vop_ncreate_args *ap)
                return error;
        }
        *ap->a_vpp = hammer2_igetv(nip, &error);
-       hammer2_chain_unlock(hmp, &nip->chain);
+       hammer2_inode_unlock_ex(nip);
 
        if (error == 0) {
                cache_setunresolved(ap->a_nch);
@@ -1780,14 +1832,16 @@ hammer2_vop_nsymlink(struct vop_nsymlink_args *ap)
                size_t bytes;
                struct uio auio;
                struct iovec aiov;
+               hammer2_inode_data_t *nipdata;
 
+               nipdata = &nip->chain->data->ipdata;
                bytes = strlen(ap->a_target);
 
                if (bytes <= HAMMER2_EMBEDDED_BYTES) {
-                       KKASSERT(nip->ip_data.op_flags &
+                       KKASSERT(nipdata->op_flags &
                                 HAMMER2_OPFLAG_DIRECTDATA);
-                       bcopy(ap->a_target, nip->ip_data.u.data, bytes);
-                       nip->ip_data.size = bytes;
+                       bcopy(ap->a_target, nipdata->u.data, bytes);
+                       nipdata->size = bytes;
                } else {
                        bzero(&auio, sizeof(auio));
                        bzero(&aiov, sizeof(aiov));
@@ -1804,7 +1858,7 @@ hammer2_vop_nsymlink(struct vop_nsymlink_args *ap)
                        error = 0;
                }
        }
-       hammer2_chain_unlock(hmp, &nip->chain);
+       hammer2_inode_unlock_ex(nip);
 
        /*
         * Finalize namecache
@@ -1932,7 +1986,7 @@ hammer2_vop_nrename(struct vop_nrename_args *ap)
         *       unlinking elements from their directories.  Locking
         *       the nlinks field does not lock the whole inode.
         */
-       hammer2_inode_lock_nlinks(ip);
+       hammer2_inode_ref(ip);
 
        /*
         * Remove target if it exists
@@ -1957,10 +2011,14 @@ hammer2_vop_nrename(struct vop_nrename_args *ap)
         * If (ip) does not have multiple links we can just copy the physical
         * contents of the inode.
         */
-       if (ip->ip_data.nlinks > 1) {
+       hammer2_inode_lock_sh(ip);
+       if (ip->chain->data->ipdata.nlinks > 1) {
+               hammer2_inode_unlock_sh(ip);
                error = hammer2_hardlink_consolidate(&ip, tdip);
                if (error)
                        goto done;
+       } else {
+               hammer2_inode_unlock_sh(ip);
        }
 
        /*
@@ -1989,7 +2047,7 @@ hammer2_vop_nrename(struct vop_nrename_args *ap)
                cache_rename(ap->a_fnch, ap->a_tnch);
        }
 done:
-       hammer2_inode_unlock_nlinks(ip);
+       hammer2_inode_drop(ip);
 
        return (error);
 }
@@ -2058,7 +2116,8 @@ hammer2_strategy_read(struct vop_strategy_args *ap)
         * includes both small-block allocations and inode-embedded data.
         */
        if (nbio->bio_offset == NOOFFSET) {
-               parent = &ip->chain;
+               hammer2_inode_lock_sh(ip);
+               parent = ip->chain;
                hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS |
                                                HAMMER2_RESOLVE_SHARED);
 
@@ -2089,6 +2148,7 @@ hammer2_strategy_read(struct vop_strategy_args *ap)
                        panic("hammer2_strategy_read: unknown bref type");
                }
                hammer2_chain_unlock(hmp, parent);
+               hammer2_inode_unlock_sh(ip);
        }
 
        if (hammer2_debug & 0x0020) {
@@ -2146,9 +2206,14 @@ hammer2_strategy_write(struct vop_strategy_args *ap)
        if (nbio->bio_offset == NOOFFSET) {
                /*
                 * Must be embedded in the inode.
+                *
+                * Because the inode is dirty, the chain must exist whether
+                * the inode is locked or not. XXX
                 */
                KKASSERT(bio->bio_offset == 0);
-               bcopy(bp->b_data, ip->ip_data.u.data, HAMMER2_EMBEDDED_BYTES);
+               KKASSERT(ip->chain && ip->chain->data);
+               bcopy(bp->b_data, ip->chain->data->ipdata.u.data,
+                     HAMMER2_EMBEDDED_BYTES);
                bp->b_resid = 0;
                bp->b_error = 0;
                biodone(nbio);
@@ -2158,7 +2223,7 @@ hammer2_strategy_write(struct vop_strategy_args *ap)
                 * because we might deadlock on ip.  Instead we depend on
                 * VOP_FSYNC() to detect the case.
                 */
-               atomic_set_int(&ip->chain.flags, HAMMER2_CHAIN_DIRTYEMBED);
+               atomic_set_int(&ip->flags, HAMMER2_INODE_DIRTYEMBED);
        } else {
                /*
                 * Forward direct IO to the device