hammer2 - refactor filesystem sync 5/N
authorMatthew Dillon <dillon@apollo.backplane.com>
Sat, 1 Dec 2018 21:57:35 +0000 (13:57 -0800)
committerMatthew Dillon <dillon@apollo.backplane.com>
Wed, 5 Dec 2018 18:28:39 +0000 (10:28 -0800)
* Dependency ops need one atomic wrapper.  Adjust the use of
  pmp->list_spin for the atomic wrapper.

* Reorder hammer2_inode_depend() call from after hammer2_igetv()
  to before it.  This is because hammer2_igetv() can temporarily
  release the inode lock and the dependency code assumes that does
  not happen.

* Cleanup

* This code is still not 100% because earlier dependency pairs
  that wind up on the sideq can be split if an overlapping dependency
  occurs later that flags PASS2.

sys/vfs/hammer2/hammer2_inode.c
sys/vfs/hammer2/hammer2_vfsops.c
sys/vfs/hammer2/hammer2_vnops.c

index 68d2834..0943786 100644 (file)
@@ -78,10 +78,8 @@ hammer2_inode_delayed_sideq(hammer2_inode_t *ip)
                        atomic_set_int(&ip->flags, HAMMER2_INODE_SIDEQ);
                        TAILQ_INSERT_TAIL(&pmp->sideq, ip, entry);
                        ++pmp->sideq_count;
-                       hammer2_spin_unex(&pmp->list_spin);
-               } else {
-                       hammer2_spin_unex(&pmp->list_spin);
                }
+               hammer2_spin_unex(&pmp->list_spin);
        }
 }
 
@@ -218,46 +216,56 @@ hammer2_inode_lock4(hammer2_inode_t *ip1, hammer2_inode_t *ip2,
                hammer2_inode_ref(ips[i]);
 
 restart:
-       dosyncq = 0;
-
        /*
         * Lock the inodes in order
         */
        for (i = 0; i < count; ++i) {
                iptmp = ips[i];
                hammer2_mtx_ex(&iptmp->lock);
-               if (iptmp->flags & HAMMER2_INODE_SYNCQ)
-                       dosyncq |= 1;
-               if (iptmp->flags & HAMMER2_INODE_SYNCQ_PASS2)
-                       dosyncq |= 2;
        }
 
        /*
         * If any of the inodes are part of a filesystem sync then we
         * have to make sure they ALL are, because their modifications
         * depend on each other (e.g. inode vs dirent).
+        *
+        * All PASS2 flags must be set atomically with the spinlock held
+        * to ensure that they are flushed together.
         */
-       for (i = 0; (dosyncq & 3) && i < count; ++i) {
+       hammer2_spin_ex(&pmp->list_spin);
+       dosyncq = 0;
+       for (i = 0; i < count; ++i) {
                iptmp = ips[i];
-               hammer2_spin_ex(&pmp->list_spin);
-               atomic_set_int(&iptmp->flags, HAMMER2_INODE_SYNCQ_WAKEUP);
-               if (iptmp->flags & HAMMER2_INODE_SYNCQ) {
-                       TAILQ_REMOVE(&pmp->syncq, iptmp, entry);
-                       TAILQ_INSERT_HEAD(&pmp->syncq, iptmp, entry);
-               } else if (iptmp->flags & HAMMER2_INODE_SIDEQ) {
-                       atomic_set_int(&iptmp->flags,
-                                      HAMMER2_INODE_SYNCQ_PASS2);
-                       hammer2_trans_setflags(pmp, HAMMER2_TRANS_RESCAN);
-               } else {
+               if (iptmp->flags & HAMMER2_INODE_SYNCQ)
+                       dosyncq |= 1;
+               if (iptmp->flags & HAMMER2_INODE_SYNCQ_PASS2)
+                       dosyncq |= 2;
+       }
+       if (dosyncq & 3) {
+               for (i = 0; i < count; ++i) {
+                       iptmp = ips[i];
                        atomic_set_int(&iptmp->flags,
-                                      HAMMER2_INODE_SIDEQ |
-                                      HAMMER2_INODE_SYNCQ_PASS2);
-                       TAILQ_INSERT_TAIL(&pmp->sideq, iptmp, entry);
-                       hammer2_inode_ref(iptmp);
-                       hammer2_trans_setflags(pmp, HAMMER2_TRANS_RESCAN);
+                                      HAMMER2_INODE_SYNCQ_WAKEUP);
+                       if (iptmp->flags & HAMMER2_INODE_SYNCQ) {
+                               TAILQ_REMOVE(&pmp->syncq, iptmp, entry);
+                               TAILQ_INSERT_HEAD(&pmp->syncq, iptmp, entry);
+                       } else if (iptmp->flags & HAMMER2_INODE_SIDEQ) {
+                               atomic_set_int(&iptmp->flags,
+                                              HAMMER2_INODE_SYNCQ_PASS2);
+                               hammer2_trans_setflags(pmp,
+                                                      HAMMER2_TRANS_RESCAN);
+                       } else {
+                               atomic_set_int(&iptmp->flags,
+                                              HAMMER2_INODE_SIDEQ |
+                                              HAMMER2_INODE_SYNCQ_PASS2);
+                               TAILQ_INSERT_TAIL(&pmp->sideq, iptmp, entry);
+                               hammer2_inode_ref(iptmp);
+                               hammer2_trans_setflags(pmp,
+                                                      HAMMER2_TRANS_RESCAN);
+                       }
                }
-               hammer2_spin_unex(&pmp->list_spin);
        }
+       hammer2_spin_unex(&pmp->list_spin);
 
        /*
         * Block and retry if any of the inodes are on SYNCQ.  It is
@@ -284,8 +292,10 @@ restart:
                 * We have to accept the inode if it's got more than one
                 * exclusive count because we can't safely unlock it.
                 */
-               if (hammer2_mtx_refs(&iptmp->lock) > 1)
+               if (hammer2_mtx_refs(&iptmp->lock) > 1) {
+                       kprintf("hammer2: exclcount > 1: %p %ld\n", iptmp, hammer2_mtx_refs(&iptmp->lock));
                        continue;
+               }
 
                /*
                 * Unlock everything (including the current index) and wait
@@ -337,18 +347,20 @@ hammer2_inode_depend(hammer2_inode_t *ip1, hammer2_inode_t *ip2)
 
        pmp = ip1->pmp;
 
+       hammer2_spin_ex(&pmp->list_spin);
        if (((ip1->flags | ip2->flags) & (HAMMER2_INODE_SYNCQ |
                                          HAMMER2_INODE_SYNCQ_PASS2)) == 0) {
+               hammer2_spin_unex(&pmp->list_spin);
                return;
        }
        if ((ip1->flags & (HAMMER2_INODE_SYNCQ |
                           HAMMER2_INODE_SYNCQ_PASS2)) &&
            (ip2->flags & (HAMMER2_INODE_SYNCQ |
                           HAMMER2_INODE_SYNCQ_PASS2))) {
+               hammer2_spin_unex(&pmp->list_spin);
                return;
        }
        KKASSERT(pmp == ip2->pmp);
-       hammer2_spin_ex(&pmp->list_spin);
        if ((ip1->flags & (HAMMER2_INODE_SYNCQ |
                           HAMMER2_INODE_SYNCQ_PASS2)) == 0) {
                if (ip1->flags & HAMMER2_INODE_SIDEQ) {
@@ -1083,7 +1095,7 @@ hammer2_inode_create_normal(hammer2_inode_t *pip,
 
        *errorp = 0;
 
-       hammer2_inode_lock(dip, 0);
+       /*hammer2_inode_lock(dip, 0);*/
 
        pip_uid = pip->meta.uid;
        pip_gid = pip->meta.gid;
@@ -1202,7 +1214,7 @@ hammer2_inode_create_normal(hammer2_inode_t *pip,
        hammer2_inode_repoint(nip, NULL, &xop->head.cluster);
 done:
        hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
-       hammer2_inode_unlock(dip);
+       /*hammer2_inode_unlock(dip);*/
 
        return (nip);
 }
@@ -1212,6 +1224,8 @@ done:
  * and OBJTYPE (type).
  *
  * This returns a UNIX errno code, not a HAMMER2_ERROR_* code.
+ *
+ * Caller must hold dip locked.
  */
 int
 hammer2_dirent_create(hammer2_inode_t *dip, const char *name, size_t name_len,
@@ -1237,7 +1251,6 @@ hammer2_dirent_create(hammer2_inode_t *dip, const char *name, size_t name_len,
         * two different creates can end up with the same lhc so we
         * cannot depend on the OS to prevent the collision.
         */
-       hammer2_inode_lock(dip, 0);
        hammer2_inode_modify(dip);
 
        /*
@@ -1291,7 +1304,6 @@ hammer2_dirent_create(hammer2_inode_t *dip, const char *name, size_t name_len,
        hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
 done2:
        error = hammer2_error_to_errno(error);
-       hammer2_inode_unlock(dip);
 
        return error;
 }
index 719c976..449cc86 100644 (file)
@@ -2447,8 +2447,8 @@ hammer2_vfs_sync_pmp(hammer2_pfs_t *pmp, int waitfor)
         * Move all inodes on sideq to syncq.  This will clear sideq.
         * This should represent all flushable inodes.  These inodes
         * will already have refs due to being on syncq or sideq.  We
-        * must do this all at once to ensure that inode dependencies
-        * are part of the same flush.
+        * must do this all at once with the spinlock held to ensure that
+        * all inode dependencies are part of the same flush.
         *
         * We should be able to do this asynchronously from frontend
         * operations because we will be locking the inodes later on
@@ -2457,7 +2457,9 @@ hammer2_vfs_sync_pmp(hammer2_pfs_t *pmp, int waitfor)
         * inode and we will block, or it has not yet locked the inode
         * and it will block until we are finished flushing that inode.
         *
-        * When restarting, only move the inodes flagged as PASS2.
+        * When restarting, only move the inodes flagged as PASS2 from
+        * SIDEQ to SYNCQ.  PASS2 propagation by inode_lock4() and
+        * inode_depend() are atomic with the spin-lock.
         */
        hammer2_trans_init(pmp, HAMMER2_TRANS_ISFLUSH);
 #ifdef HAMMER2_DEBUG_SYNC
@@ -2470,8 +2472,8 @@ restart:
 #endif
        hammer2_trans_setflags(pmp, HAMMER2_TRANS_COPYQ);
        hammer2_trans_clearflags(pmp, HAMMER2_TRANS_RESCAN);
-       hammer2_spin_ex(&pmp->list_spin);
 
+       hammer2_spin_ex(&pmp->list_spin);
        ipdrop = TAILQ_FIRST(&pmp->sideq);
        while ((ip = ipdrop) != NULL) {
                ipdrop = TAILQ_NEXT(ip, entry);
@@ -2553,8 +2555,10 @@ restart:
                if (vp) {
                        if (vget(vp, LK_EXCLUSIVE|LK_NOWAIT)) {
                                /*
-                                * Failed, move to SIDEQ.  It may already be
-                                * on the SIDEQ if we lost a race.
+                                * Failed to get the vnode, we have to
+                                * make sure the inode is on SYNCQ or SIDEQ.
+                                * It is already flagged PASS2. Then unlock,
+                                * possibly sleep, and retry later.
                                 */
                                vp = NULL;
                                dorestart = 1;
@@ -2565,21 +2569,12 @@ restart:
                                hammer2_spin_ex(&pmp->list_spin);
                                if ((ip->flags & (HAMMER2_INODE_SYNCQ |
                                                  HAMMER2_INODE_SIDEQ)) == 0) {
-                                       /* XXX PASS2 redundant */
                                        atomic_set_int(&ip->flags,
-                                                  HAMMER2_INODE_SIDEQ |
-                                                  HAMMER2_INODE_SYNCQ_PASS2);
+                                                  HAMMER2_INODE_SIDEQ);
                                        TAILQ_INSERT_TAIL(&pmp->sideq, ip,
                                                          entry);
                                        hammer2_spin_unex(&pmp->list_spin);
                                        hammer2_mtx_unlock(&ip->lock);
-                               } else if (ip->flags & HAMMER2_INODE_SIDEQ) {
-                                       /* XXX PASS2 redundant */
-                                       atomic_set_int(&ip->flags,
-                                                  HAMMER2_INODE_SYNCQ_PASS2);
-                                       hammer2_spin_unex(&pmp->list_spin);
-                                       hammer2_mtx_unlock(&ip->lock);
-                                       hammer2_inode_drop(ip);
                                } else {
                                        hammer2_spin_unex(&pmp->list_spin);
                                        hammer2_mtx_unlock(&ip->lock);
index eb8afd7..083e586 100644 (file)
@@ -1415,8 +1415,12 @@ hammer2_vop_nmkdir(struct vop_nmkdir_args *ap)
                }
                *ap->a_vpp = NULL;
        } else {
+               /*
+                * inode_depend() must occur before the igetv() because
+                * the igetv() can temporarily release the inode lock.
+                */
+               hammer2_inode_depend(dip, nip); /* before igetv */
                *ap->a_vpp = hammer2_igetv(nip, &error);
-               hammer2_inode_depend(dip, nip);
                hammer2_inode_unlock(nip);
        }
 
@@ -1623,8 +1627,8 @@ hammer2_vop_ncreate(struct vop_ncreate_args *ap)
                }
                *ap->a_vpp = NULL;
        } else {
+               hammer2_inode_depend(dip, nip); /* before igetv */
                *ap->a_vpp = hammer2_igetv(nip, &error);
-               hammer2_inode_depend(dip, nip);
                hammer2_inode_unlock(nip);
        }
 
@@ -1701,8 +1705,8 @@ hammer2_vop_nmknod(struct vop_nmknod_args *ap)
                }
                *ap->a_vpp = NULL;
        } else {
+               hammer2_inode_depend(dip, nip); /* before igetv */
                *ap->a_vpp = hammer2_igetv(nip, &error);
-               hammer2_inode_depend(dip, nip);
                hammer2_inode_unlock(nip);
        }
 
@@ -1785,8 +1789,8 @@ hammer2_vop_nsymlink(struct vop_nsymlink_args *ap)
                hammer2_trans_done(dip->pmp, HAMMER2_TRANS_SIDEQ);
                return error;
        }
+       hammer2_inode_depend(dip, nip); /* before igetv */
        *ap->a_vpp = hammer2_igetv(nip, &error);
-       hammer2_inode_depend(dip, nip);
 
        /*
         * Build the softlink (~like file data) and finalize the namecache.
@@ -1984,7 +1988,7 @@ hammer2_vop_nrmdir(struct vop_nrmdir_args *ap)
                hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
                if (ip) {
                        hammer2_inode_unlink_finisher(ip, isopen);
-                       hammer2_inode_depend(dip, ip);
+                       hammer2_inode_depend(dip, ip);  /* after modified */
                        hammer2_inode_unlock(ip);
                }
        } else {