hammer2 - serialized flush work part 2
authorMatthew Dillon <dillon@apollo.backplane.com>
Thu, 31 Jan 2013 06:41:52 +0000 (22:41 -0800)
committerMatthew Dillon <dillon@apollo.backplane.com>
Thu, 31 Jan 2013 06:41:52 +0000 (22:41 -0800)
* Adjust the hammer2_inode_lock*() API to separate the inode lock from
  the chain lock (it gets both).  The locked chain associated with the
  inode is now returned and can change during search iterations, eventually
  to be returned via hammer2_inode_unlock*().

* Fixes issues related to threads obtaining more than 1 shared lock on the
  same chain, which can deadlock deeper procedures which might need to
  temporarily upgrade the lock.

* Continue working through bugs.

sys/vfs/hammer2/hammer2.h
sys/vfs/hammer2/hammer2_inode.c
sys/vfs/hammer2/hammer2_ioctl.c
sys/vfs/hammer2/hammer2_subr.c
sys/vfs/hammer2/hammer2_vfsops.c
sys/vfs/hammer2/hammer2_vnops.c

index 9e74734..4a8a124 100644 (file)
@@ -363,10 +363,10 @@ extern long hammer2_ioa_volu_write;
 #define hammer2_icrc32(buf, size)      iscsi_crc32((buf), (size))
 #define hammer2_icrc32c(buf, size, crc)        iscsi_crc32_ext((buf), (size), (crc))
 
-void hammer2_inode_lock_ex(hammer2_inode_t *ip);
-void hammer2_inode_lock_sh(hammer2_inode_t *ip);
-void hammer2_inode_unlock_ex(hammer2_inode_t *ip);
-void hammer2_inode_unlock_sh(hammer2_inode_t *ip);
+hammer2_chain_t *hammer2_inode_lock_ex(hammer2_inode_t *ip);
+hammer2_chain_t *hammer2_inode_lock_sh(hammer2_inode_t *ip);
+void hammer2_inode_unlock_ex(hammer2_inode_t *ip, hammer2_chain_t *chain);
+void hammer2_inode_unlock_sh(hammer2_inode_t *ip, hammer2_chain_t *chain);
 void hammer2_voldata_lock(hammer2_mount_t *hmp);
 void hammer2_voldata_unlock(hammer2_mount_t *hmp);
 ccms_state_t hammer2_inode_lock_temp_release(hammer2_inode_t *ip);
@@ -401,7 +401,7 @@ void hammer2_inode_lock_nlinks(hammer2_inode_t *ip);
 void hammer2_inode_unlock_nlinks(hammer2_inode_t *ip);
 hammer2_inode_t *hammer2_inode_get(hammer2_pfsmount_t *pmp,
                        hammer2_inode_t *dip, hammer2_chain_t *chain);
-void hammer2_inode_put(hammer2_inode_t *ip);
+void hammer2_inode_put(hammer2_inode_t *ip, hammer2_chain_t *chain);
 void hammer2_inode_free(hammer2_inode_t *ip);
 void hammer2_inode_ref(hammer2_inode_t *ip);
 void hammer2_inode_drop(hammer2_inode_t *ip);
@@ -410,10 +410,10 @@ int hammer2_inode_calc_alloc(hammer2_key_t filesize);
 int hammer2_inode_create(hammer2_inode_t *dip,
                        struct vattr *vap, struct ucred *cred,
                        const uint8_t *name, size_t name_len,
-                       hammer2_inode_t **nipp);
+                       hammer2_inode_t **nipp, hammer2_chain_t **nchainp);
 
-int hammer2_inode_duplicate(hammer2_inode_t *dip,
-                       hammer2_inode_t *oip, hammer2_inode_t **nipp,
+int hammer2_inode_duplicate(hammer2_inode_t *dip, hammer2_inode_t *oip,
+                       hammer2_inode_t **nipp, hammer2_chain_t **nchainp,
                        const uint8_t *name, size_t name_len);
 int hammer2_inode_connect(hammer2_inode_t *dip, hammer2_inode_t *oip,
                        const uint8_t *name, size_t name_len);
index 31d802f..195d8fb 100644 (file)
@@ -264,10 +264,9 @@ hammer2_inode_get(hammer2_pfsmount_t *pmp, hammer2_inode_t *dip,
  * by this function.
  */
 void
-hammer2_inode_put(hammer2_inode_t *ip)
+hammer2_inode_put(hammer2_inode_t *ip, hammer2_chain_t *chain)
 {
        hammer2_mount_t *hmp = ip->hmp;
-       hammer2_chain_t *chain = ip->chain;
        hammer2_inode_t *pip;
 
        KKASSERT(chain);
@@ -275,7 +274,7 @@ hammer2_inode_put(hammer2_inode_t *ip)
        KKASSERT(ip->topo_cst.count == -1);     /* one excl lock allowed */
        chain->u.ip = NULL;
        ip->chain = NULL;
-       hammer2_chain_drop(ip->hmp, chain);     /* ref */
+       hammer2_chain_drop(hmp, chain);         /* ref */
 
        /*
         * Disconnect ip from pip & related parent ref.
@@ -285,12 +284,10 @@ hammer2_inode_put(hammer2_inode_t *ip)
         */
        if ((pip = ip->pip) != NULL) {
                ip->pip = NULL;
-               hammer2_inode_unlock_ex(ip);
-               hammer2_chain_unlock(hmp, chain);
+               hammer2_inode_unlock_ex(ip, chain);
                hammer2_inode_drop(pip);
        } else {
-               hammer2_inode_unlock_ex(ip);
-               hammer2_chain_unlock(hmp, chain);
+               hammer2_inode_unlock_ex(ip, chain);
        }
 }
 
@@ -311,7 +308,7 @@ int
 hammer2_inode_create(hammer2_inode_t *dip,
                     struct vattr *vap, struct ucred *cred,
                     const uint8_t *name, size_t name_len,
-                    hammer2_inode_t **nipp)
+                    hammer2_inode_t **nipp, hammer2_chain_t **nchainp)
 {
        hammer2_inode_data_t *nipdata;
        hammer2_mount_t *hmp;
@@ -333,10 +330,12 @@ hammer2_inode_create(hammer2_inode_t *dip,
         * entry in.  At the same time check for key collisions
         * and iterate until we don't get one.
         */
-       hammer2_inode_lock_ex(dip);
 retry:
-       parent = dip->chain;
-       hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
+       parent = hammer2_inode_lock_ex(dip);
+
+       dip_uid = parent->data->ipdata.uid;
+       dip_gid = parent->data->ipdata.gid;
+       dip_mode = parent->data->ipdata.mode;
 
        error = 0;
        while (error == 0) {
@@ -357,13 +356,8 @@ retry:
                                             HAMMER2_INODE_BYTES,
                                             &error);
        }
-       hammer2_chain_unlock(hmp, parent);
 
-       dip_uid = dip->chain->data->ipdata.uid;
-       dip_gid = dip->chain->data->ipdata.gid;
-       dip_mode = dip->chain->data->ipdata.mode;
-
-       hammer2_inode_unlock_ex(dip);
+       hammer2_inode_unlock_ex(dip, parent);
 
        /*
         * Handle the error case
@@ -375,6 +369,7 @@ retry:
                        goto retry;
                }
                *nipp = NULL;
+               *nchainp = NULL;
                return (error);
        }
 
@@ -386,7 +381,8 @@ retry:
        nip = hammer2_inode_get(dip->pmp, dip, chain);
        kprintf("nip %p chain %p\n", nip, nip->chain);
        *nipp = nip;
-       nipdata = &nip->chain->data->ipdata;
+       *nchainp = chain;
+       nipdata = &chain->data->ipdata;
 
        hammer2_voldata_lock(hmp);
        if (vap) {
@@ -458,7 +454,7 @@ retry:
  */
 int
 hammer2_inode_duplicate(hammer2_inode_t *dip, hammer2_inode_t *oip,
-                       hammer2_inode_t **nipp,
+                       hammer2_inode_t **nipp, hammer2_chain_t **nchainp,
                        const uint8_t *name, size_t name_len)
 {
        hammer2_inode_data_t *nipdata;
@@ -473,9 +469,9 @@ hammer2_inode_duplicate(hammer2_inode_t *dip, hammer2_inode_t *oip,
        if (name) {
                lhc = hammer2_dirhash(name, name_len);
        } else {
-               hammer2_inode_lock_ex(oip);
-               lhc = oip->chain->data->ipdata.inum;
-               hammer2_inode_unlock_ex(oip);
+               parent = hammer2_inode_lock_ex(oip);
+               lhc = parent->data->ipdata.inum;
+               hammer2_inode_unlock_ex(oip, parent);
                KKASSERT((lhc & HAMMER2_DIRHASH_VISIBLE) == 0);
        }
 
@@ -485,10 +481,8 @@ hammer2_inode_duplicate(hammer2_inode_t *dip, hammer2_inode_t *oip,
         * and iterate until we don't get one.
         */
        nip = NULL;
-       hammer2_inode_lock_ex(dip);
 retry:
-       parent = dip->chain;
-       hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
+       parent = hammer2_inode_lock_ex(dip);
 
        error = 0;
        while (error == 0) {
@@ -514,8 +508,14 @@ retry:
                                             HAMMER2_INODE_BYTES,     /* n/a */
                                             &error);
        }
-       hammer2_chain_unlock(hmp, parent);
-       hammer2_inode_unlock_ex(dip);
+
+       /*
+        * Clean up, but we need to retain a ref on parent so we can wait
+        * on it for certain errors.
+        */
+       if (error == EAGAIN)
+               hammer2_chain_ref(hmp, parent);
+       hammer2_inode_unlock_ex(dip, parent);
 
        /*
         * Handle the error case
@@ -524,6 +524,7 @@ retry:
                KKASSERT(chain == NULL);
                if (error == EAGAIN) {
                        hammer2_chain_wait(hmp, parent);
+                       hammer2_chain_drop(hmp, parent);
                        goto retry;
                }
                return (error);
@@ -548,8 +549,8 @@ retry:
         * pointer to the real hardlink's inum and can't have any sub-chains.
         * XXX might be 0-ref chains left.
         */
-       hammer2_inode_lock_ex(oip);
-       hammer2_chain_flush(hmp, oip->chain, 0);
+       parent = hammer2_inode_lock_ex(oip);
+       hammer2_chain_flush(hmp, parent, 0);
        /*KKASSERT(RB_EMPTY(&oip->chain.rbhead));*/
 
        /*
@@ -561,8 +562,8 @@ retry:
        nip = hammer2_inode_get(dip->pmp, dip, chain);
        hammer2_chain_modify(hmp, chain, 0);
        nipdata = &chain->data->ipdata;
-       *nipdata = oip->chain->data->ipdata;
-       hammer2_inode_unlock_ex(oip);
+       *nipdata = parent->data->ipdata;
+       hammer2_inode_unlock_ex(oip, parent);
 
        if (name) {
                /*
@@ -585,6 +586,7 @@ retry:
                nipdata->name_key = lhc;
        }
        *nipp = nip;
+       *nchainp = chain;
 
        return (0);
 }
@@ -609,6 +611,7 @@ hammer2_inode_connect(hammer2_inode_t *dip, hammer2_inode_t *oip,
        hammer2_mount_t *hmp;
        hammer2_chain_t *chain;
        hammer2_chain_t *parent;
+       hammer2_chain_t *ochain;
        hammer2_inode_t *nip;
        hammer2_key_t lhc;
        int error;
@@ -627,23 +630,21 @@ hammer2_inode_connect(hammer2_inode_t *dip, hammer2_inode_t *oip,
        hmp = dip->hmp;
 retry:
        if (oip->pip == dip) {
-               hammer2_inode_lock_ex(dip);
-               hammer2_inode_lock_ex(oip);
+               parent = hammer2_inode_lock_ex(dip);
+               ochain = hammer2_inode_lock_ex(oip);
        } else {
-               hammer2_inode_lock_ex(oip);
-               hammer2_inode_lock_ex(dip);
+               ochain = hammer2_inode_lock_ex(oip);
+               parent = hammer2_inode_lock_ex(dip);
        }
-       parent = dip->chain;
-       hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
 
        lhc = hammer2_dirhash(name, name_len);
-       hlink = (oip->chain->parent != NULL);
+       hlink = (ochain->parent != NULL);
 
        /*
         * In fake mode flush oip so we can just snapshot it downbelow.
         */
        if (hlink && hammer2_hardlink_enable < 0)
-               hammer2_chain_flush(hmp, oip->chain, 0);
+               hammer2_chain_flush(hmp, ochain, 0);
 
        /*
         * Locate the inode or indirect block to create the new
@@ -680,8 +681,10 @@ retry:
                                                     HAMMER2_BREF_TYPE_INODE,
                                                     HAMMER2_INODE_BYTES,
                                                     &error);
-                       if (chain)
+                       if (chain) {
                                KKASSERT(chain == oip->chain);
+                               KKASSERT(ochain == oip->chain);
+                       }
                }
        }
 
@@ -691,10 +694,11 @@ retry:
         */
        if (error == EAGAIN)
                hammer2_chain_ref(hmp, parent);
-       hammer2_chain_unlock(hmp, parent);
-       hammer2_inode_unlock_ex(dip);
+       hammer2_inode_unlock_ex(dip, parent);
 
        /*
+        * oip/ochain still active.
+        *
         * Handle the error case
         */
        if (error) {
@@ -702,10 +706,10 @@ retry:
                if (error == EAGAIN) {
                        hammer2_chain_wait(hmp, parent);
                        hammer2_chain_drop(hmp, parent);
-                       hammer2_inode_unlock_ex(oip);
+                       hammer2_inode_unlock_ex(oip, ochain);
                        goto retry;
                }
-               hammer2_inode_unlock_ex(oip);
+               hammer2_inode_unlock_ex(oip, ochain);
                return (error);
        }
 
@@ -732,9 +736,9 @@ retry:
                bcopy(name, nipdata->filename, name_len);
                nipdata->name_key = lhc;
                nipdata->name_len = name_len;
-               nipdata->target_type = oip->chain->data->ipdata.type;
+               nipdata->target_type = ochain->data->ipdata.type;
                nipdata->type = HAMMER2_OBJTYPE_HARDLINK;
-               nipdata->inum = oip->chain->data->ipdata.inum;
+               nipdata->inum = ochain->data->ipdata.inum;
                nipdata->nlinks = 1;
                kprintf("created hardlink %*.*s\n",
                        (int)name_len, (int)name_len, name);
@@ -749,7 +753,7 @@ retry:
                hammer2_chain_modify(hmp, chain, 0);
                KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
                nipdata = &nip->chain->data->ipdata;
-               *nipdata = oip->chain->data->ipdata;
+               *nipdata = ochain->data->ipdata;
                bcopy(name, nipdata->filename, name_len);
                nipdata->name_key = lhc;
                nipdata->name_len = name_len;
@@ -766,7 +770,7 @@ retry:
                 */
                KKASSERT(chain->u.ip != NULL);
                hammer2_chain_modify(hmp, chain, 0);
-               nipdata = &oip->chain->data->ipdata;
+               nipdata = &ochain->data->ipdata;
 
                if (nipdata->name_len != name_len ||
                    bcmp(nipdata->filename, name, name_len) != 0) {
@@ -777,7 +781,7 @@ retry:
                }
                nipdata->nlinks = 1;
        }
-       hammer2_inode_unlock_ex(oip);
+       hammer2_inode_unlock_ex(oip, ochain);
        return (0);
 }
 
@@ -803,12 +807,15 @@ hammer2_unlink_file(hammer2_inode_t *dip,
        hammer2_chain_t *chain;
        hammer2_chain_t *dparent;
        hammer2_chain_t *dchain;
+       hammer2_chain_t *tmpchain;
        hammer2_key_t lhc;
        hammer2_inode_t *ip;
        hammer2_inode_t *oip;
        int error;
+       int parent_ref;
        uint8_t type;
 
+       parent_ref = 0;
        error = 0;
        ip = NULL;
        ochain = NULL;
@@ -818,9 +825,7 @@ hammer2_unlink_file(hammer2_inode_t *dip,
        /*
         * Search for the filename in the directory
         */
-       hammer2_inode_lock_ex(dip);
-       parent = dip->chain;
-       hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
+       parent = hammer2_inode_lock_ex(dip);
        chain = hammer2_chain_lookup(hmp, &parent,
                                     lhc, lhc + HAMMER2_DIRHASH_LOMASK,
                                     0);
@@ -834,14 +839,14 @@ hammer2_unlink_file(hammer2_inode_t *dip,
                                           lhc, lhc + HAMMER2_DIRHASH_LOMASK,
                                           0);
        }
-       hammer2_inode_unlock_ex(dip);
+       hammer2_inode_unlock_ex(dip, NULL);     /* retain parent */
 
        /*
         * Not found or wrong type (isdir < 0 disables the type check).
         */
        if (chain == NULL) {
-               hammer2_chain_unlock(hmp, parent);
-               return ENOENT;
+               error = ENOENT;
+               goto done;
        }
        if ((type = chain->data->ipdata.type) == HAMMER2_OBJTYPE_HARDLINK)
                type = chain->data->ipdata.target_type;
@@ -864,6 +869,7 @@ hammer2_unlink_file(hammer2_inode_t *dip,
         * directory.  Otherwise chain remains pointing to the original.
         */
        if (chain->data->ipdata.type == HAMMER2_OBJTYPE_HARDLINK) {
+               KKASSERT(parent_ref == 0);
                hammer2_chain_unlock(hmp, parent);
                parent = NULL;
                error = hammer2_hardlink_find(dip, &chain, &ochain);
@@ -911,8 +917,12 @@ hammer2_unlink_file(hammer2_inode_t *dip,
        if (ochain) {
                /*
                 * Delete the original hardlink pointer.
+                *
+                * NOTE: parent from above is NULL when ochain != NULL
+                *       so we can reuse it.
                 */
                hammer2_chain_lock(hmp, ochain, HAMMER2_RESOLVE_ALWAYS);
+               parent_ref = 1;
                for (;;) {
                        parent = ochain->parent;
                        hammer2_chain_ref(hmp, parent);
@@ -931,12 +941,14 @@ hammer2_unlink_file(hammer2_inode_t *dip,
                        goto done;
                }
                if ((oip = ochain->u.ip) != NULL) {
-                       hammer2_inode_lock_ex(oip);
+                       tmpchain = hammer2_inode_lock_ex(oip);
                        oip->flags |= HAMMER2_INODE_DELETED;
                        if (oip->vp || oip->refs > 1)
-                               hammer2_inode_unlock_ex(oip);
+                               hammer2_inode_unlock_ex(oip, tmpchain);
                        else
-                               hammer2_inode_put(oip);
+                               hammer2_inode_put(oip, tmpchain);
+                       KKASSERT(tmpchain == ochain);
+                       /* ochain still actively locked */
                }
                hammer2_chain_delete(hmp, parent, ochain,
                                     (ochain == retain_ip->chain));
@@ -960,12 +972,13 @@ hammer2_unlink_file(hammer2_inode_t *dip,
                        hammer2_chain_modify(hmp, chain, 0);
                        --chain->data->ipdata.nlinks;
                        if ((ip = chain->u.ip) != NULL) {
-                               hammer2_inode_lock_ex(ip);
+                               parent = hammer2_inode_lock_ex(ip);
                                ip->flags |= HAMMER2_INODE_DELETED;
                                if (ip->vp)
-                                       hammer2_inode_unlock_ex(ip);
+                                       hammer2_inode_unlock_ex(ip, parent);
                                else
-                                       hammer2_inode_put(ip);
+                                       hammer2_inode_put(ip, parent);
+                               parent = NULL;
                        }
                        hammer2_chain_delete(hmp, dparent, chain, 0);
                        hammer2_chain_unlock(hmp, dparent);
@@ -983,7 +996,7 @@ hammer2_unlink_file(hammer2_inode_t *dip,
                ip = hammer2_inode_get(dip->pmp, dip, chain);
                ipdata = &ip->chain->data->ipdata;
                if (ip == retain_ip && chain->flushing) {
-                       hammer2_inode_unlock_ex(ip);
+                       hammer2_inode_unlock_ex(ip, chain);
                        chain = NULL;   /* inode_unlock eats chain */
                        error = EAGAIN;
                        goto done;
@@ -994,9 +1007,9 @@ hammer2_unlink_file(hammer2_inode_t *dip,
                hammer2_chain_delete(hmp, parent, chain,
                                     (retain_ip == ip));
                if (ip->vp)
-                       hammer2_inode_unlock_ex(ip);
+                       hammer2_inode_unlock_ex(ip, chain);
                else
-                       hammer2_inode_put(ip);
+                       hammer2_inode_put(ip, chain);
                chain = NULL;   /* inode_unlock eats chain */
        }
 
@@ -1007,7 +1020,8 @@ done:
                hammer2_chain_unlock(hmp, chain);
        if (parent) {
                hammer2_chain_unlock(hmp, parent);
-               hammer2_chain_drop(hmp, parent);
+               if (parent_ref)
+                       hammer2_chain_drop(hmp, parent);
        }
        if (ochain)
                hammer2_chain_drop(hmp, ochain);
@@ -1055,6 +1069,7 @@ hammer2_hardlink_consolidate(hammer2_inode_t **ipp, hammer2_inode_t *tdip)
        hammer2_inode_t *fdip;
        hammer2_inode_t *cdip;
        hammer2_chain_t *ochain;
+       hammer2_chain_t *nchain;
        hammer2_chain_t *parent;
        int error;
 
@@ -1075,16 +1090,16 @@ hammer2_hardlink_consolidate(hammer2_inode_t **ipp, hammer2_inode_t *tdip)
         * already been consolidated in the correct place.
         */
        if (cdip == fdip) {
-               hammer2_inode_lock_ex(oip);
-               if ((oip->chain->data->ipdata.name_key &
+               ochain = hammer2_inode_lock_ex(oip);
+               if ((ochain->data->ipdata.name_key &
                     HAMMER2_DIRHASH_VISIBLE) == 0) {
-                       hammer2_chain_modify(hmp, oip->chain, 0);
-                       ++oip->chain->data->ipdata.nlinks;
-                       hammer2_inode_unlock_ex(oip);
+                       hammer2_chain_modify(hmp, ochain, 0);
+                       ++ochain->data->ipdata.nlinks;
+                       hammer2_inode_unlock_ex(oip, ochain);
                        hammer2_inode_drop(cdip);
                        return(0);
                }
-               hammer2_inode_unlock_ex(oip);
+               hammer2_inode_unlock_ex(oip, ochain);
        }
 
        /*
@@ -1095,26 +1110,25 @@ hammer2_hardlink_consolidate(hammer2_inode_t **ipp, hammer2_inode_t *tdip)
         * under oip to the new hardlink target inode, retiring all chains
         * related to oip before returning.  XXX vp->ip races.
         */
-       error = hammer2_inode_duplicate(cdip, oip, &nip, NULL, 0);
+       error = hammer2_inode_duplicate(cdip, oip, &nip, &nchain, NULL, 0);
        if (error == 0) {
                /*
                 * Bump nlinks on duplicated hidden inode.
                 */
                hammer2_inode_ref(nip);                 /* ref new *ipp */
-               hammer2_chain_modify(hmp, nip->chain, 0);
-               ++nip->chain->data->ipdata.nlinks;
-               hammer2_inode_unlock_ex(nip);
-               hammer2_inode_lock_ex(oip);
+               hammer2_chain_modify(hmp, nchain, 0);
+               ++nchain->data->ipdata.nlinks;
+               hammer2_inode_unlock_ex(nip, nchain);
+               ochain = hammer2_inode_lock_ex(oip);
                hammer2_inode_drop(oip);                /* unref old *ipp */
 
-               if (oip->chain->data->ipdata.name_key &
-                   HAMMER2_DIRHASH_VISIBLE) {
+               if (ochain->data->ipdata.name_key & HAMMER2_DIRHASH_VISIBLE) {
                        /*
                         * Replace the old inode with an OBJTYPE_HARDLINK
                         * pointer.
                         */
-                       hammer2_chain_modify(hmp, oip->chain, 0);
-                       oipdata = &oip->chain->data->ipdata;
+                       hammer2_chain_modify(hmp, ochain, 0);
+                       oipdata = &ochain->data->ipdata;
                        oipdata->target_type = oipdata->type;
                        oipdata->type = HAMMER2_OBJTYPE_HARDLINK;
                        oipdata->uflags = 0;
@@ -1145,7 +1159,7 @@ hammer2_hardlink_consolidate(hammer2_inode_t **ipp, hammer2_inode_t *tdip)
                        bzero(&oipdata->u, sizeof(oipdata->u));
                        /* XXX transaction ids */
 
-                       hammer2_inode_unlock_ex(oip);
+                       hammer2_inode_unlock_ex(oip, ochain);
                } else {
                        /*
                         * The old inode was a hardlink target, which we
@@ -1155,21 +1169,20 @@ hammer2_hardlink_consolidate(hammer2_inode_t **ipp, hammer2_inode_t *tdip)
                         */
                        kprintf("DELETE INVISIBLE\n");
                        for (;;) {
-                               parent = oip->chain->parent;
+                               parent = ochain->parent;
                                hammer2_chain_ref(hmp, parent);
-                               hammer2_inode_unlock_ex(oip);
+                               hammer2_inode_unlock_ex(oip, ochain);
                                hammer2_chain_lock(hmp, parent,
                                                   HAMMER2_RESOLVE_ALWAYS);
-                               hammer2_inode_lock_ex(oip);
+                               ochain = hammer2_inode_lock_ex(oip);
                                if (oip->chain->parent == parent)
                                        break;
                                hammer2_chain_unlock(hmp, parent);
                                hammer2_chain_drop(hmp, parent);
                        }
                        oip->flags |= HAMMER2_INODE_DELETED;
-                       ochain = oip->chain;
                        hammer2_chain_delete(hmp, parent, ochain, 0);
-                       hammer2_inode_put(oip);
+                       hammer2_inode_put(oip, ochain); /* unconditional */
                        hammer2_chain_unlock(hmp, parent);
                        hammer2_chain_drop(hmp, parent);
                }
@@ -1241,11 +1254,9 @@ hammer2_hardlink_find(hammer2_inode_t *dip, hammer2_chain_t **chainp,
        chain = NULL;
 
        while ((ip = pip) != NULL) {
-               hammer2_inode_lock_ex(ip);
+               parent = hammer2_inode_lock_ex(ip);
                hammer2_inode_drop(ip);
-               parent = ip->chain;
                KKASSERT(parent->bref.type == HAMMER2_BREF_TYPE_INODE);
-               hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
                chain = hammer2_chain_lookup(hmp, &parent, lhc, lhc, 0);
                hammer2_chain_unlock(hmp, parent);
                if (chain)
@@ -1253,14 +1264,16 @@ hammer2_hardlink_find(hammer2_inode_t *dip, hammer2_chain_t **chainp,
                pip = ip->pip;          /* safe, ip held locked */
                if (pip)
                        hammer2_inode_ref(pip);
-               hammer2_inode_unlock_ex(ip);
+               hammer2_inode_unlock_ex(ip, NULL);
        }
 
        /*
         * chain is locked, ip is locked.  Unlock ip, return the locked
         * chain.  *ipp is already set w/a ref count and not locked.
+        *
+        * (parent is already unlocked).
         */
-       hammer2_inode_unlock_ex(ip);
+       hammer2_inode_unlock_ex(ip, NULL);
        *chainp = chain;
        if (chain) {
                KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_INODE);
index b34e086..f2d0088 100644 (file)
@@ -455,6 +455,7 @@ static int
 hammer2_ioctl_pfs_create(hammer2_inode_t *ip, void *data)
 {
        hammer2_inode_data_t *nipdata;
+       hammer2_chain_t *nchain;
        hammer2_mount_t *hmp;
        hammer2_ioc_pfs_t *pfs;
        hammer2_inode_t *nip;
@@ -467,14 +468,14 @@ hammer2_ioctl_pfs_create(hammer2_inode_t *ip, void *data)
        pfs->name[sizeof(pfs->name) - 1] = 0;   /* ensure 0-termination */
        error = hammer2_inode_create(hmp->schain->u.ip, NULL, NULL,
                                     pfs->name, strlen(pfs->name),
-                                    &nip);
+                                    &nip, &nchain);
        if (error == 0) {
-               hammer2_chain_modify(hmp, nip->chain, 0);
-               nipdata = &nip->chain->data->ipdata;
+               hammer2_chain_modify(hmp, nchain, 0);
+               nipdata = &nchain->data->ipdata;
                nipdata->pfs_type = pfs->pfs_type;
                nipdata->pfs_clid = pfs->pfs_clid;
                nipdata->pfs_fsid = pfs->pfs_fsid;
-               hammer2_inode_unlock_ex(nip);
+               hammer2_inode_unlock_ex(nip, nchain);
        }
        return (error);
 }
@@ -502,11 +503,12 @@ static int
 hammer2_ioctl_inode_get(hammer2_inode_t *ip, void *data)
 {
        hammer2_ioc_inode_t *ino = data;
+       hammer2_chain_t *chain;
 
-       hammer2_inode_lock_sh(ip);
-       ino->ip_data = ip->chain->data->ipdata;
+       chain = hammer2_inode_lock_sh(ip);
+       ino->ip_data = chain->data->ipdata;
        ino->kdata = ip;
-       hammer2_inode_unlock_sh(ip);
+       hammer2_inode_unlock_sh(ip, chain);
        return (0);
 }
 
@@ -514,15 +516,16 @@ static int
 hammer2_ioctl_inode_set(hammer2_inode_t *ip, void *data)
 {
        hammer2_ioc_inode_t *ino = data;
+       hammer2_chain_t *chain;
        int error = EINVAL;
 
-       hammer2_inode_lock_ex(ip);
+       chain = hammer2_inode_lock_ex(ip);
        if (ino->flags & HAMMER2IOC_INODE_FLAG_IQUOTA) {
        }
        if (ino->flags & HAMMER2IOC_INODE_FLAG_DQUOTA) {
        }
        if (ino->flags & HAMMER2IOC_INODE_FLAG_COPIES) {
        }
-       hammer2_inode_unlock_ex(ip);
+       hammer2_inode_unlock_ex(ip, chain);
        return (error);
 }
index ae34049..01dfe47 100644 (file)
  * locked, and can be cleaned out at any time (become NULL) when an inode
  * is not locked.
  *
- * The underlying chain is also locked.
+ * The underlying chain is also locked and returned.
  *
  * NOTE: We don't combine the inode/chain lock because putting away an
  *       inode would otherwise confuse multiple lock holders of the inode.
  */
-void
+hammer2_chain_t *
 hammer2_inode_lock_ex(hammer2_inode_t *ip)
 {
+       hammer2_chain_t *chain;
+
        hammer2_inode_ref(ip);
        ccms_thread_lock(&ip->topo_cst, CCMS_STATE_EXCLUSIVE);
-       KKASSERT(ip->chain != NULL);    /* for now */
-       hammer2_chain_lock(ip->hmp, ip->chain, HAMMER2_RESOLVE_ALWAYS);
+
+       chain = ip->chain;
+       KKASSERT(chain != NULL);        /* for now */
+       hammer2_chain_lock(ip->hmp, chain, HAMMER2_RESOLVE_ALWAYS);
+
+       return (chain);
 }
 
 void
-hammer2_inode_unlock_ex(hammer2_inode_t *ip)
+hammer2_inode_unlock_ex(hammer2_inode_t *ip, hammer2_chain_t *chain)
 {
-       if (ip->chain)
-               hammer2_chain_unlock(ip->hmp, ip->chain);
+       if (chain)
+               hammer2_chain_unlock(ip->hmp, chain);
        ccms_thread_unlock(&ip->topo_cst);
        hammer2_inode_drop(ip);
 }
 
-void
+/*
+ * NOTE: We don't combine the inode/chain lock because putting away an
+ *       inode would otherwise confuse multiple lock holders of the inode.
+ *
+ *      Shared locks are especially sensitive to having too many shared
+ *      lock counts (from the same thread) on certain paths which might
+ *      need to upgrade them.  Only one count of a shared lock can be
+ *      upgraded.
+ */
+hammer2_chain_t *
 hammer2_inode_lock_sh(hammer2_inode_t *ip)
 {
+       hammer2_chain_t *chain;
+
        hammer2_inode_ref(ip);
        ccms_thread_lock(&ip->topo_cst, CCMS_STATE_SHARED);
-       KKASSERT(ip->chain != NULL);    /* for now */
-       hammer2_chain_lock(ip->hmp, ip->chain, HAMMER2_RESOLVE_ALWAYS |
-                                              HAMMER2_RESOLVE_SHARED);
+
+       chain = ip->chain;
+       KKASSERT(chain != NULL);        /* for now */
+       hammer2_chain_lock(ip->hmp, chain, HAMMER2_RESOLVE_ALWAYS |
+                                          HAMMER2_RESOLVE_SHARED);
+       return (chain);
 }
 
 void
-hammer2_inode_unlock_sh(hammer2_inode_t *ip)
+hammer2_inode_unlock_sh(hammer2_inode_t *ip, hammer2_chain_t *chain)
 {
-       if (ip->chain)
-               hammer2_chain_unlock(ip->hmp, ip->chain);
+       if (chain)
+               hammer2_chain_unlock(ip->hmp, chain);
        ccms_thread_unlock(&ip->topo_cst);
        hammer2_inode_drop(ip);
 }
index 7d1e899..201e9c3 100644 (file)
@@ -475,7 +475,7 @@ hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data,
        pmp->rchain = rchain;                   /* left held & unlocked */
        pmp->iroot = hammer2_inode_get(pmp, NULL, rchain);
        hammer2_inode_ref(pmp->iroot);          /* ref for pmp->iroot */
-       hammer2_inode_unlock_ex(pmp->iroot);    /* iroot & its chain */
+       hammer2_inode_unlock_ex(pmp->iroot, rchain); /* iroot & its chain */
 
        kprintf("iroot %p\n", pmp->iroot);
 
@@ -528,6 +528,7 @@ hammer2_vfs_unmount(struct mount *mp, int mntflags)
 {
        hammer2_pfsmount_t *pmp;
        hammer2_mount_t *hmp;
+       hammer2_chain_t *chain;
        int flags;
        int error = 0;
        int ronly = ((mp->mnt_flag & MNT_RDONLY) != 0);
@@ -585,8 +586,8 @@ hammer2_vfs_unmount(struct mount *mp, int mntflags)
         * clean).
         */
        if (pmp->iroot) {
-               hammer2_inode_lock_ex(pmp->iroot);
-               hammer2_inode_put(pmp->iroot);
+               chain = hammer2_inode_lock_ex(pmp->iroot);
+               hammer2_inode_put(pmp->iroot, chain);
                /* lock destroyed by the put */
                KKASSERT(pmp->iroot->refs == 1);
                hammer2_inode_drop(pmp->iroot);
@@ -659,6 +660,7 @@ int
 hammer2_vfs_root(struct mount *mp, struct vnode **vpp)
 {
        hammer2_pfsmount_t *pmp;
+       hammer2_chain_t *ichain;
        hammer2_mount_t *hmp;
        int error;
        struct vnode *vp;
@@ -670,9 +672,9 @@ hammer2_vfs_root(struct mount *mp, struct vnode **vpp)
                *vpp = NULL;
                error = EINVAL;
        } else {
-               hammer2_inode_lock_sh(pmp->iroot);
+               ichain = hammer2_inode_lock_sh(pmp->iroot);
                vp = hammer2_igetv(pmp->iroot, &error);
-               hammer2_inode_unlock_sh(pmp->iroot);
+               hammer2_inode_unlock_sh(pmp->iroot, ichain);
                *vpp = vp;
                if (vp == NULL)
                        kprintf("vnodefail\n");
@@ -1021,6 +1023,7 @@ hammer2_install_volume_header(hammer2_mount_t *hmp)
 void
 hammer2_cluster_reconnect(hammer2_pfsmount_t *pmp, struct file *fp)
 {
+       hammer2_chain_t *chain;
        hammer2_inode_data_t *ipdata;
        size_t name_len;
 
@@ -1034,14 +1037,14 @@ hammer2_cluster_reconnect(hammer2_pfsmount_t *pmp, struct file *fp)
        /*
         * Setup LNK_CONN fields for autoinitiated state machine
         */
-       hammer2_inode_lock_ex(pmp->iroot);
-       ipdata = &pmp->iroot->chain->data->ipdata;
+       chain = hammer2_inode_lock_ex(pmp->iroot);
+       ipdata = &chain->data->ipdata;
        pmp->iocom.auto_lnk_conn.pfs_clid = ipdata->pfs_clid;
        pmp->iocom.auto_lnk_conn.pfs_fsid = ipdata->pfs_fsid;
        pmp->iocom.auto_lnk_conn.pfs_type = ipdata->pfs_type;
        pmp->iocom.auto_lnk_conn.proto_version = DMSG_SPAN_PROTO_1;
        pmp->iocom.auto_lnk_conn.peer_type = pmp->hmp->voldata.peer_type;
-       hammer2_inode_unlock_ex(pmp->iroot);
+       hammer2_inode_unlock_ex(pmp->iroot, chain);
 
        /*
         * Filter adjustment.  Clients do not need visibility into other
index a56120d..4059662 100644 (file)
@@ -51,8 +51,8 @@
 
 static int hammer2_read_file(hammer2_inode_t *ip, struct uio *uio,
                                int seqcount);
-static int hammer2_write_file(hammer2_inode_t *ip, struct uio *uio, int ioflag,
-                             int seqcount);
+static int hammer2_write_file(hammer2_inode_t *ip, hammer2_chain_t **chainp,
+                               struct uio *uio, int ioflag, int seqcount);
 static hammer2_off_t hammer2_assign_physical(hammer2_inode_t *ip,
                                hammer2_key_t lbase, int lblksize, int *errorp);
 static void hammer2_extend_file(hammer2_inode_t *ip, hammer2_key_t nsize);
@@ -73,8 +73,9 @@ static
 int
 hammer2_vop_inactive(struct vop_inactive_args *ap)
 {
+       hammer2_chain_t *chain;
+       hammer2_inode_t *ip;
        struct vnode *vp;
-       struct hammer2_inode *ip;
 #if 0
        struct hammer2_mount *hmp;
 #endif
@@ -96,10 +97,10 @@ hammer2_vop_inactive(struct vop_inactive_args *ap)
         * picked up by our normal flush.
         */
        if (ip->flags & HAMMER2_INODE_DIRTYEMBED) {
-               hammer2_inode_lock_ex(ip);
+               chain = hammer2_inode_lock_ex(ip);
                atomic_clear_int(&ip->flags, HAMMER2_INODE_DIRTYEMBED);
-               hammer2_chain_modify(ip->hmp, ip->chain, 0);
-               hammer2_inode_unlock_ex(ip);
+               hammer2_chain_modify(ip->hmp, chain, 0);
+               hammer2_inode_unlock_ex(ip, chain);
        }
 
        /*
@@ -119,8 +120,9 @@ static
 int
 hammer2_vop_reclaim(struct vop_reclaim_args *ap)
 {
-       struct hammer2_inode *ip;
-       struct hammer2_mount *hmp;
+       hammer2_chain_t *chain;
+       hammer2_inode_t *ip;
+       hammer2_mount_t *hmp;
        struct vnode *vp;
 
        vp = ap->a_vp;
@@ -133,20 +135,20 @@ hammer2_vop_reclaim(struct vop_reclaim_args *ap)
         * Set SUBMODIFIED so we can detect and propagate the DESTROYED
         * bit in the flush code.
         */
-       hammer2_inode_lock_ex(ip);
+       chain = hammer2_inode_lock_ex(ip);
        vp->v_data = NULL;
        ip->vp = NULL;
        if (ip->flags & HAMMER2_INODE_DELETED) {
-               KKASSERT(ip->chain->flags & HAMMER2_CHAIN_DELETED);
-               atomic_set_int(&ip->chain->flags, HAMMER2_CHAIN_DESTROYED |
-                                                HAMMER2_CHAIN_SUBMODIFIED);
+               KKASSERT(chain->flags & HAMMER2_CHAIN_DELETED);
+               atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROYED |
+                                             HAMMER2_CHAIN_SUBMODIFIED);
        }
-       hammer2_chain_flush(hmp, ip->chain, 0);
+       hammer2_chain_flush(hmp, chain, 0);
        if (ip->refs > 2)                       /* (our lock + vp ref) */
-               hammer2_inode_unlock_ex(ip);    /* unlock */
+               hammer2_inode_unlock_ex(ip, chain); /* unlock */
        else
-               hammer2_inode_put(ip);          /* unlock & disconnect */
-       hammer2_inode_drop(ip);                 /* vp ref */
+               hammer2_inode_put(ip, chain);       /* unlock & disconnect */
+       hammer2_inode_drop(ip);                     /* vp ref */
 
        /*
         * XXX handle background sync when ip dirty, kernel will no longer
@@ -161,15 +163,16 @@ static
 int
 hammer2_vop_fsync(struct vop_fsync_args *ap)
 {
-       struct hammer2_inode *ip;
-       struct hammer2_mount *hmp;
+       hammer2_chain_t *chain;
+       hammer2_inode_t *ip;
+       hammer2_mount_t *hmp;
        struct vnode *vp;
 
        vp = ap->a_vp;
        ip = VTOI(vp);
        hmp = ip->hmp;
 
-       hammer2_inode_lock_ex(ip);
+       chain = hammer2_inode_lock_ex(ip);
        vfsync(vp, ap->a_waitfor, 1, NULL, NULL);
 
        /*
@@ -179,7 +182,7 @@ hammer2_vop_fsync(struct vop_fsync_args *ap)
         */
        if (ip->flags & HAMMER2_INODE_DIRTYEMBED) {
                atomic_clear_int(&ip->flags, HAMMER2_INODE_DIRTYEMBED);
-               hammer2_chain_modify(hmp, ip->chain, 0);
+               hammer2_chain_modify(hmp, chain, 0);
        }
 
        /*
@@ -191,8 +194,8 @@ hammer2_vop_fsync(struct vop_fsync_args *ap)
         * on the volume root as a catch-all, which is far more optimal.
         */
        if (ap->a_flags & VOP_FSYNC_SYSCALL)
-               hammer2_chain_flush(hmp, ip->chain, 0);
-       hammer2_inode_unlock_ex(ip);
+               hammer2_chain_flush(hmp, chain, 0);
+       hammer2_inode_unlock_ex(ip, chain);
        return (0);
 }
 
@@ -201,17 +204,18 @@ int
 hammer2_vop_access(struct vop_access_args *ap)
 {
        hammer2_inode_t *ip = VTOI(ap->a_vp);
+       hammer2_chain_t *chain;
        hammer2_inode_data_t *ipdata;
        uid_t uid;
        gid_t gid;
        int error;
 
-       hammer2_inode_lock_sh(ip);
-       ipdata = &ip->chain->data->ipdata;
+       chain = hammer2_inode_lock_sh(ip);
+       ipdata = &chain->data->ipdata;
        uid = hammer2_to_unix_xid(&ipdata->uid);
        gid = hammer2_to_unix_xid(&ipdata->gid);
        error = vop_helper_access(ap, uid, gid, ipdata->mode, ipdata->uflags);
-       hammer2_inode_unlock_sh(ip);
+       hammer2_inode_unlock_sh(ip, chain);
 
        return (error);
 }
@@ -223,6 +227,7 @@ hammer2_vop_getattr(struct vop_getattr_args *ap)
        hammer2_inode_data_t *ipdata;
        hammer2_pfsmount_t *pmp;
        hammer2_inode_t *ip;
+       hammer2_chain_t *chain;
        struct vnode *vp;
        struct vattr *vap;
 
@@ -232,8 +237,8 @@ hammer2_vop_getattr(struct vop_getattr_args *ap)
        ip = VTOI(vp);
        pmp = ip->pmp;
 
-       hammer2_inode_lock_sh(ip);
-       ipdata = &ip->chain->data->ipdata;
+       chain = hammer2_inode_lock_sh(ip);
+       ipdata = &chain->data->ipdata;
 
        vap->va_fsid = pmp->mp->mnt_stat.f_fsid.val[0];
        vap->va_fileid = ipdata->inum;
@@ -251,14 +256,14 @@ hammer2_vop_getattr(struct vop_getattr_args *ap)
        hammer2_time_to_timespec(ipdata->mtime, &vap->va_atime);
        vap->va_gen = 1;
        vap->va_bytes = vap->va_size;   /* XXX */
-       vap->va_type = hammer2_get_vtype(ip->chain);
+       vap->va_type = hammer2_get_vtype(chain);
        vap->va_filerev = 0;
        vap->va_uid_uuid = ipdata->uid;
        vap->va_gid_uuid = ipdata->gid;
        vap->va_vaflags = VA_UID_UUID_VALID | VA_GID_UUID_VALID |
                          VA_FSID_UUID_VALID;
 
-       hammer2_inode_unlock_sh(ip);
+       hammer2_inode_unlock_sh(ip, chain);
 
        return (0);
 }
@@ -268,8 +273,9 @@ int
 hammer2_vop_setattr(struct vop_setattr_args *ap)
 {
        hammer2_inode_data_t *ipdata;
-       hammer2_mount_t *hmp;
+       hammer2_chain_t *chain;
        hammer2_inode_t *ip;
+       hammer2_mount_t *hmp;
        struct vnode *vp;
        struct vattr *vap;
        int error;
@@ -287,8 +293,8 @@ hammer2_vop_setattr(struct vop_setattr_args *ap)
        if (hmp->ronly)
                return(EROFS);
 
-       hammer2_inode_lock_ex(ip);
-       ipdata = &ip->chain->data->ipdata;
+       chain = hammer2_inode_lock_ex(ip);
+       ipdata = &chain->data->ipdata;
        error = 0;
 
        if (vap->va_flags != VNOVAL) {
@@ -300,7 +306,7 @@ hammer2_vop_setattr(struct vop_setattr_args *ap)
                                         ap->a_cred);
                if (error == 0) {
                        if (ipdata->uflags != flags) {
-                               hammer2_chain_modify(hmp, ip->chain, 0);
+                               hammer2_chain_modify(hmp, chain, 0);
                                ipdata->uflags = flags;
                                ipdata->ctime = ctime;
                                kflags |= NOTE_ATTRIB;
@@ -333,7 +339,7 @@ hammer2_vop_setattr(struct vop_setattr_args *ap)
                            bcmp(&uuid_gid, &ipdata->gid, sizeof(uuid_gid)) ||
                            ipdata->mode != cur_mode
                        ) {
-                               hammer2_chain_modify(hmp, ip->chain, 0);
+                               hammer2_chain_modify(hmp, chain, 0);
                                ipdata->uid = uuid_uid;
                                ipdata->gid = uuid_gid;
                                ipdata->mode = cur_mode;
@@ -366,13 +372,13 @@ hammer2_vop_setattr(struct vop_setattr_args *ap)
 #if 0
        /* atime not supported */
        if (vap->va_atime.tv_sec != VNOVAL) {
-               hammer2_chain_modify(hmp, ip->chain, 0);
+               hammer2_chain_modify(hmp, chain, 0);
                ipdata->atime = hammer2_timespec_to_time(&vap->va_atime);
                kflags |= NOTE_ATTRIB;
        }
 #endif
        if (vap->va_mtime.tv_sec != VNOVAL) {
-               hammer2_chain_modify(hmp, ip->chain, 0);
+               hammer2_chain_modify(hmp, chain, 0);
                ipdata->mtime = hammer2_timespec_to_time(&vap->va_mtime);
                kflags |= NOTE_ATTRIB;
        }
@@ -384,13 +390,14 @@ hammer2_vop_setattr(struct vop_setattr_args *ap)
                error = vop_helper_chmod(ap->a_vp, vap->va_mode, ap->a_cred,
                                         cur_uid, cur_gid, &cur_mode);
                if (error == 0 && ipdata->mode != cur_mode) {
+                       hammer2_chain_modify(hmp, chain, 0);
                        ipdata->mode = cur_mode;
                        ipdata->ctime = ctime;
                        kflags |= NOTE_ATTRIB;
                }
        }
 done:
-       hammer2_inode_unlock_ex(ip);
+       hammer2_inode_unlock_ex(ip, chain);
        return (error);
 }
 
@@ -403,6 +410,7 @@ hammer2_vop_readdir(struct vop_readdir_args *ap)
        hammer2_inode_t *ip;
        hammer2_inode_t *xip;
        hammer2_chain_t *parent;
+       hammer2_chain_t *xparent;
        hammer2_chain_t *chain;
        hammer2_tid_t inum;
        hammer2_key_t lkey;
@@ -434,8 +442,8 @@ hammer2_vop_readdir(struct vop_readdir_args *ap)
        }
        cookie_index = 0;
 
-       hammer2_inode_lock_sh(ip);
-       ipdata = &ip->chain->data->ipdata;
+       parent = hammer2_inode_lock_sh(ip);
+       ipdata = &parent->data->ipdata;
 
        /*
         * Handle artificial entries.  To ensure that only positive 64 bit
@@ -465,22 +473,24 @@ hammer2_vop_readdir(struct vop_readdir_args *ap)
        if (saveoff == 1) {
                /*
                 * Be careful with lockorder when accessing ".."
+                *
+                * (parent is the current dir. xip is the parent dir).
                 */
-               inum = ip->chain->data->ipdata.inum & HAMMER2_DIRHASH_USERMSK;
+               inum = parent->data->ipdata.inum & HAMMER2_DIRHASH_USERMSK;
                while (ip->pip != NULL && ip != ip->pmp->iroot) {
                        xip = ip->pip;
                        hammer2_inode_ref(xip);
-                       hammer2_inode_unlock_sh(ip);
-                       hammer2_inode_lock_sh(xip);
-                       hammer2_inode_lock_sh(ip);
+                       hammer2_inode_unlock_sh(ip, parent);
+                       xparent = hammer2_inode_lock_sh(xip);
+                       parent = hammer2_inode_lock_sh(ip);
                        hammer2_inode_drop(xip);
                        if (xip == ip->pip) {
-                               inum = xip->chain->data->ipdata.inum &
+                               inum = xparent->data->ipdata.inum &
                                       HAMMER2_DIRHASH_USERMSK;
-                               hammer2_inode_unlock_sh(xip);
+                               hammer2_inode_unlock_sh(xip, xparent);
                                break;
                        }
-                       hammer2_inode_unlock_sh(xip);
+                       hammer2_inode_unlock_sh(xip, xparent);
                }
                r = vop_write_dirent(&error, uio, inum, DT_DIR, 2, "..");
                if (r)
@@ -495,9 +505,10 @@ hammer2_vop_readdir(struct vop_readdir_args *ap)
 
        lkey = saveoff | HAMMER2_DIRHASH_VISIBLE;
 
-       parent = ip->chain;
-       error = hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS |
-                                               HAMMER2_RESOLVE_SHARED);
+       /*
+        * parent is the inode chain, already locked for us.  Don't
+        * double lock shared locks as this will screw up upgrades.
+        */
        if (error) {
                goto done;
        }
@@ -549,9 +560,8 @@ hammer2_vop_readdir(struct vop_readdir_args *ap)
        }
        if (chain)
                hammer2_chain_unlock(hmp, chain);
-       hammer2_chain_unlock(hmp, parent);
 done:
-       hammer2_inode_unlock_sh(ip);
+       hammer2_inode_unlock_sh(ip, parent);
        if (ap->a_eofflag)
                *ap->a_eofflag = (chain == NULL);
        uio->uio_offset = saveoff & ~HAMMER2_DIRHASH_VISIBLE;
@@ -630,10 +640,11 @@ static
 int
 hammer2_vop_write(struct vop_write_args *ap)
 {
-       thread_t td;
-       struct vnode *vp;
+       hammer2_chain_t *chain;
        hammer2_mount_t *hmp;
        hammer2_inode_t *ip;
+       thread_t td;
+       struct vnode *vp;
        struct uio *uio;
        int error;
        int seqcount;
@@ -678,9 +689,9 @@ hammer2_vop_write(struct vop_write_args *ap)
         * ip must be marked modified, particularly because the write
         * might wind up being copied into the embedded data area.
         */
-       hammer2_inode_lock_ex(ip);
-       error = hammer2_write_file(ip, uio, ap->a_ioflag, seqcount);
-       hammer2_inode_unlock_ex(ip);
+       chain = hammer2_inode_lock_ex(ip);
+       error = hammer2_write_file(ip, &chain, uio, ap->a_ioflag, seqcount);
+       hammer2_inode_unlock_ex(ip, chain);
        return (error);
 }
 
@@ -694,18 +705,30 @@ static
 int
 hammer2_read_file(hammer2_inode_t *ip, struct uio *uio, int seqcount)
 {
-       hammer2_inode_data_t *ipdata;
+       hammer2_chain_t *chain;
+       hammer2_off_t size;
        struct buf *bp;
        int error;
 
        error = 0;
 
        /*
-        * UIO read loop
+        * UIO read loop.
+        *
+        * We can't hold a shared lock on ip's chain across file bread's
+        * because the bread operation will itself obtain a shared lock,
+        * resulting in one thread holding 2 shared refs.  This will deadlock
+        * against temporary lock upgrades.  Temporary lock upgrades are
+        * needed to insert new chain structures into a parent's RB tree.
+        *
+        * We should be able to safely retain the shared lock on ip itself.
         */
-       hammer2_inode_lock_sh(ip);
-       ipdata = &ip->chain->data->ipdata;
-       while (uio->uio_resid > 0 && uio->uio_offset < ipdata->size) {
+       chain = hammer2_inode_lock_sh(ip);
+       size = chain->data->ipdata.size;
+       hammer2_chain_unlock(ip->hmp, chain);
+       chain = NULL;
+
+       while (uio->uio_resid > 0 && uio->uio_offset < size) {
                hammer2_key_t lbase;
                hammer2_key_t leof;
                int lblksize;
@@ -725,16 +748,13 @@ hammer2_read_file(hammer2_inode_t *ip, struct uio *uio, int seqcount)
                n = lblksize - loff;
                if (n > uio->uio_resid)
                        n = uio->uio_resid;
-               if (n > ipdata->size - uio->uio_offset)
-                       n = (int)(ipdata->size - uio->uio_offset);
+               if (n > size - uio->uio_offset)
+                       n = (int)(size - uio->uio_offset);
                bp->b_flags |= B_AGE;
-               hammer2_inode_unlock_sh(ip);
                uiomove((char *)bp->b_data + loff, n, uio);
                bqrelse(bp);
-               hammer2_inode_lock_sh(ip);
-               ipdata = &ip->chain->data->ipdata;      /* reload */
        }
-       hammer2_inode_unlock_sh(ip);
+       hammer2_inode_unlock_sh(ip, chain);
        return (error);
 }
 
@@ -744,7 +764,8 @@ hammer2_read_file(hammer2_inode_t *ip, struct uio *uio, int seqcount)
  */
 static
 int
-hammer2_write_file(hammer2_inode_t *ip, struct uio *uio,
+hammer2_write_file(hammer2_inode_t *ip, hammer2_chain_t **chainp,
+                  struct uio *uio,
                   int ioflag, int seqcount)
 {
        hammer2_inode_data_t *ipdata;
@@ -798,10 +819,10 @@ hammer2_write_file(hammer2_inode_t *ip, struct uio *uio,
                         * XXX should try to leave this unlocked through
                         *      the whole loop
                         */
-                       hammer2_inode_unlock_ex(ip);
+                       hammer2_inode_unlock_ex(ip, *chainp);
                        bwillwrite(HAMMER2_PBUFSIZE);
-                       hammer2_inode_lock_ex(ip);
-                       ipdata = &ip->chain->data->ipdata; /* reload */
+                       *chainp = hammer2_inode_lock_ex(ip);
+                       ipdata = &(*chainp)->data->ipdata;      /* reload */
                }
 
                /* XXX bigwrite & signal check test */
@@ -891,10 +912,10 @@ hammer2_write_file(hammer2_inode_t *ip, struct uio *uio,
                /*
                 * Ok, copy the data in
                 */
-               hammer2_inode_unlock_ex(ip);
+               hammer2_inode_unlock_ex(ip, *chainp);
                error = uiomove(bp->b_data + loff, n, uio);
-               hammer2_inode_lock_ex(ip);
-               ipdata = &ip->chain->data->ipdata;      /* reload */
+               *chainp = hammer2_inode_lock_ex(ip);
+               ipdata = &(*chainp)->data->ipdata;      /* reload */
                kflags |= NOTE_WRITE;
                modified = 1;
 
@@ -939,7 +960,8 @@ hammer2_write_file(hammer2_inode_t *ip, struct uio *uio,
        if (error && ipdata->size != old_eof) {
                hammer2_truncate_file(ip, old_eof);
        } else if (modified) {
-               hammer2_chain_modify(ip->hmp, ip->chain, 0);
+               KKASSERT(ip->chain == *chainp);
+               hammer2_chain_modify(ip->hmp, *chainp, 0);
                hammer2_update_time(&ipdata->mtime);
        }
        hammer2_knote(ip->vp, kflags);
@@ -972,10 +994,8 @@ hammer2_assign_physical(hammer2_inode_t *ip, hammer2_key_t lbase,
         */
        hmp = ip->hmp;
        *errorp = 0;
-       hammer2_inode_lock_ex(ip);
 retry:
-       parent = ip->chain;
-       hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
+       parent = hammer2_inode_lock_ex(ip);
 
        chain = hammer2_chain_lookup(hmp, &parent,
                                     lbase, lbase,
@@ -994,7 +1014,7 @@ retry:
                                             lblksize, errorp);
                if (chain == NULL) {
                        KKASSERT(*errorp == EAGAIN); /* XXX */
-                       hammer2_chain_unlock(hmp, parent);
+                       hammer2_inode_unlock_ex(ip, parent);
                        goto retry;
                }
 
@@ -1031,7 +1051,7 @@ retry:
 
        if (chain)
                hammer2_chain_unlock(hmp, chain);
-       hammer2_chain_unlock(hmp, parent);
+       hammer2_inode_unlock_ex(ip, parent);
 
        return (pbase);
 }
@@ -1361,6 +1381,7 @@ static
 int
 hammer2_vop_nresolve(struct vop_nresolve_args *ap)
 {
+       hammer2_inode_t *ip;
        hammer2_inode_t *dip;
        hammer2_mount_t *hmp;
        hammer2_chain_t *parent;
@@ -1383,16 +1404,12 @@ hammer2_vop_nresolve(struct vop_nresolve_args *ap)
        /*
         * Note: In DragonFly the kernel handles '.' and '..'.
         */
-       hammer2_inode_lock_sh(dip);
-       parent = dip->chain;
-       hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS |
-                                       HAMMER2_RESOLVE_SHARED);
+       parent = hammer2_inode_lock_sh(dip);
        chain = hammer2_chain_lookup(hmp, &parent,
                                     lhc, lhc + HAMMER2_DIRHASH_LOMASK,
                                     HAMMER2_LOOKUP_SHARED);
        while (chain) {
                if (chain->bref.type == HAMMER2_BREF_TYPE_INODE &&
-                   chain->u.ip &&
                    name_len == chain->data->ipdata.name_len &&
                    bcmp(name, chain->data->ipdata.filename, name_len) == 0) {
                        break;
@@ -1401,8 +1418,7 @@ hammer2_vop_nresolve(struct vop_nresolve_args *ap)
                                           lhc, lhc + HAMMER2_DIRHASH_LOMASK,
                                           HAMMER2_LOOKUP_SHARED);
        }
-       hammer2_chain_unlock(hmp, parent);
-       hammer2_inode_unlock_sh(dip);
+       hammer2_inode_unlock_sh(dip, parent);
 
        /*
         * If the inode represents a forwarding entry for a hardlink we have
@@ -1448,7 +1464,8 @@ hammer2_vop_nresolve(struct vop_nresolve_args *ap)
         *       leave the namecache unresolved.
         */
        if (chain) {
-               vp = hammer2_igetv(chain->u.ip, &error);
+               ip = hammer2_inode_get(dip->pmp, dip, chain);
+               vp = hammer2_igetv(ip, &error);
                if (error == 0) {
                        vn_unlock(vp);
                        cache_setvp(ap->a_nch, vp);
@@ -1456,6 +1473,12 @@ hammer2_vop_nresolve(struct vop_nresolve_args *ap)
                } else if (error == ENOENT) {
                        cache_setvp(ap->a_nch, NULL);
                }
+               /*
+                * don't break the API, chain is locked shared so unlock
+                * it separately even though unlock_ex() currently doesn't
+                * care.
+                */
+               hammer2_inode_unlock_ex(ip, NULL);
                hammer2_chain_unlock(hmp, chain);
        } else {
                error = ENOENT;
@@ -1474,6 +1497,7 @@ static
 int
 hammer2_vop_nlookupdotdot(struct vop_nlookupdotdot_args *ap)
 {
+       hammer2_chain_t *chain;
        hammer2_inode_t *dip;
        hammer2_inode_t *ip;
        hammer2_mount_t *hmp;
@@ -1486,9 +1510,9 @@ hammer2_vop_nlookupdotdot(struct vop_nlookupdotdot_args *ap)
                *ap->a_vpp = NULL;
                return ENOENT;
        }
-       hammer2_inode_lock_ex(ip);
+       chain = hammer2_inode_lock_ex(ip);
        *ap->a_vpp = hammer2_igetv(ip, &error);
-       hammer2_inode_unlock_ex(ip);
+       hammer2_inode_unlock_ex(ip, chain);
 
        return error;
 }
@@ -1497,6 +1521,7 @@ static
 int
 hammer2_vop_nmkdir(struct vop_nmkdir_args *ap)
 {
+       hammer2_chain_t *nchain;
        hammer2_mount_t *hmp;
        hammer2_inode_t *dip;
        hammer2_inode_t *nip;
@@ -1515,14 +1540,14 @@ hammer2_vop_nmkdir(struct vop_nmkdir_args *ap)
        name_len = ncp->nc_nlen;
 
        error = hammer2_inode_create(dip, ap->a_vap, ap->a_cred,
-                                    name, name_len, &nip);
+                                    name, name_len, &nip, &nchain);
        if (error) {
                KKASSERT(nip == NULL);
                *ap->a_vpp = NULL;
                return error;
        }
        *ap->a_vpp = hammer2_igetv(nip, &error);
-       hammer2_inode_unlock_ex(nip);
+       hammer2_inode_unlock_ex(nip, nchain);
 
        if (error == 0) {
                cache_setunresolved(ap->a_nch);
@@ -1580,18 +1605,14 @@ hammer2_vop_bmap(struct vop_bmap_args *ap)
                lend = lbeg;
        loff = ap->a_loffset & HAMMER2_OFF_MASK_LO;
 
-       hammer2_inode_lock_sh(ip);
-       parent = ip->chain;
-       hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS |
-                                       HAMMER2_RESOLVE_SHARED);
+       parent = hammer2_inode_lock_sh(ip);
        chain = hammer2_chain_lookup(hmp, &parent,
                                     lbeg, lend,
                                     HAMMER2_LOOKUP_NODATA |
                                     HAMMER2_LOOKUP_SHARED);
        if (chain == NULL) {
                *ap->a_doffsetp = ZFOFFSET;
-               hammer2_chain_unlock(hmp, parent);
-               hammer2_inode_unlock_sh(ip);
+               hammer2_inode_unlock_sh(ip, parent);
                return (0);
        }
 
@@ -1607,8 +1628,7 @@ hammer2_vop_bmap(struct vop_bmap_args *ap)
                                           HAMMER2_LOOKUP_NODATA |
                                           HAMMER2_LOOKUP_SHARED);
        }
-       hammer2_chain_unlock(hmp, parent);
-       hammer2_inode_unlock_sh(ip);
+       hammer2_inode_unlock_sh(ip, parent);
 
        /*
         * If the requested loffset is not mappable physically we can't
@@ -1653,11 +1673,12 @@ int
 hammer2_vop_advlock(struct vop_advlock_args *ap)
 {
        hammer2_inode_t *ip = VTOI(ap->a_vp);
+       hammer2_chain_t *chain;
        hammer2_off_t size;
 
-       hammer2_inode_lock_sh(ip);
-       size = ip->chain->data->ipdata.size;
-       hammer2_inode_unlock_sh(ip);
+       chain = hammer2_inode_lock_sh(ip);
+       size = chain->data->ipdata.size;
+       hammer2_inode_unlock_sh(ip, chain);
        return (lf_advlock(ap, &ip->advlock, size));
 }
 
@@ -1682,6 +1703,8 @@ hammer2_vop_nlink(struct vop_nlink_args *ap)
        hammer2_inode_t *ip;    /* inode we are hardlinking to */
        hammer2_inode_t *oip;
        hammer2_mount_t *hmp;
+       hammer2_chain_t *chain;
+       hammer2_chain_t *ochain;
        struct namecache *ncp;
        const uint8_t *name;
        size_t name_len;
@@ -1719,13 +1742,13 @@ hammer2_vop_nlink(struct vop_nlink_args *ap)
         */
        if (oip != ip) {
                hammer2_inode_ref(ip);          /* vp ref+ */
-               hammer2_inode_lock_ex(ip);
-               hammer2_inode_lock_ex(oip);
+               chain = hammer2_inode_lock_ex(ip);
+               ochain = hammer2_inode_lock_ex(oip);
                ip->vp = ap->a_vp;
                ap->a_vp->v_data = ip;
                oip->vp = NULL;
-               hammer2_inode_unlock_ex(oip);
-               hammer2_inode_unlock_ex(ip);
+               hammer2_inode_unlock_ex(oip, ochain);
+               hammer2_inode_unlock_ex(ip, chain);
                hammer2_inode_drop(oip);        /* vp ref- */
        }
 
@@ -1759,6 +1782,7 @@ hammer2_vop_ncreate(struct vop_ncreate_args *ap)
        hammer2_mount_t *hmp;
        hammer2_inode_t *dip;
        hammer2_inode_t *nip;
+       hammer2_chain_t *nchain;
        struct namecache *ncp;
        const uint8_t *name;
        size_t name_len;
@@ -1774,14 +1798,14 @@ hammer2_vop_ncreate(struct vop_ncreate_args *ap)
        name_len = ncp->nc_nlen;
 
        error = hammer2_inode_create(dip, ap->a_vap, ap->a_cred,
-                                    name, name_len, &nip);
+                                    name, name_len, &nip, &nchain);
        if (error) {
                KKASSERT(nip == NULL);
                *ap->a_vpp = NULL;
                return error;
        }
        *ap->a_vpp = hammer2_igetv(nip, &error);
-       hammer2_inode_unlock_ex(nip);
+       hammer2_inode_unlock_ex(nip, nchain);
 
        if (error == 0) {
                cache_setunresolved(ap->a_nch);
@@ -1800,6 +1824,7 @@ hammer2_vop_nsymlink(struct vop_nsymlink_args *ap)
        hammer2_mount_t *hmp;
        hammer2_inode_t *dip;
        hammer2_inode_t *nip;
+       hammer2_chain_t *nchain;
        struct namecache *ncp;
        const uint8_t *name;
        size_t name_len;
@@ -1817,7 +1842,7 @@ hammer2_vop_nsymlink(struct vop_nsymlink_args *ap)
        ap->a_vap->va_type = VLNK;      /* enforce type */
 
        error = hammer2_inode_create(dip, ap->a_vap, ap->a_cred,
-                                    name, name_len, &nip);
+                                    name, name_len, &nip, &nchain);
        if (error) {
                KKASSERT(nip == NULL);
                *ap->a_vpp = NULL;
@@ -1834,7 +1859,7 @@ hammer2_vop_nsymlink(struct vop_nsymlink_args *ap)
                struct iovec aiov;
                hammer2_inode_data_t *nipdata;
 
-               nipdata = &nip->chain->data->ipdata;
+               nipdata = &nchain->data->ipdata;
                bytes = strlen(ap->a_target);
 
                if (bytes <= HAMMER2_EMBEDDED_BYTES) {
@@ -1853,12 +1878,13 @@ hammer2_vop_nsymlink(struct vop_nsymlink_args *ap)
                        auio.uio_td = curthread;
                        aiov.iov_base = ap->a_target;
                        aiov.iov_len = bytes;
-                       error = hammer2_write_file(nip, &auio, IO_APPEND, 0);
+                       error = hammer2_write_file(nip, &nchain,
+                                                  &auio, IO_APPEND, 0);
                        /* XXX handle error */
                        error = 0;
                }
        }
-       hammer2_inode_unlock_ex(nip);
+       hammer2_inode_unlock_ex(nip, nchain);
 
        /*
         * Finalize namecache
@@ -1945,6 +1971,7 @@ hammer2_vop_nrename(struct vop_nrename_args *ap)
        hammer2_inode_t *fdip;
        hammer2_inode_t *tdip;
        hammer2_inode_t *ip;
+       hammer2_chain_t *chain;
        hammer2_mount_t *hmp;
        const uint8_t *fname;
        size_t fname_len;
@@ -2011,14 +2038,14 @@ hammer2_vop_nrename(struct vop_nrename_args *ap)
         * If (ip) does not have multiple links we can just copy the physical
         * contents of the inode.
         */
-       hammer2_inode_lock_sh(ip);
-       if (ip->chain->data->ipdata.nlinks > 1) {
-               hammer2_inode_unlock_sh(ip);
+       chain = hammer2_inode_lock_sh(ip);
+       if (chain->data->ipdata.nlinks > 1) {
+               hammer2_inode_unlock_sh(ip, chain);
                error = hammer2_hardlink_consolidate(&ip, tdip);
                if (error)
                        goto done;
        } else {
-               hammer2_inode_unlock_sh(ip);
+               hammer2_inode_unlock_sh(ip, chain);
        }
 
        /*
@@ -2116,10 +2143,7 @@ hammer2_strategy_read(struct vop_strategy_args *ap)
         * includes both small-block allocations and inode-embedded data.
         */
        if (nbio->bio_offset == NOOFFSET) {
-               hammer2_inode_lock_sh(ip);
-               parent = ip->chain;
-               hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS |
-                                               HAMMER2_RESOLVE_SHARED);
+               parent = hammer2_inode_lock_sh(ip);
 
                chain = hammer2_chain_lookup(hmp, &parent, lbase, lbase,
                                             HAMMER2_LOOKUP_NODATA |
@@ -2147,8 +2171,7 @@ hammer2_strategy_read(struct vop_strategy_args *ap)
                } else {
                        panic("hammer2_strategy_read: unknown bref type");
                }
-               hammer2_chain_unlock(hmp, parent);
-               hammer2_inode_unlock_sh(ip);
+               hammer2_inode_unlock_sh(ip, parent);
        }
 
        if (hammer2_debug & 0x0020) {