hammer2 - refactor filesystem sync 4/N
authorMatthew Dillon <dillon@apollo.backplane.com>
Tue, 13 Nov 2018 21:32:33 +0000 (13:32 -0800)
committerMatthew Dillon <dillon@apollo.backplane.com>
Wed, 5 Dec 2018 18:28:39 +0000 (10:28 -0800)
* Save synchronized iroot blockmaps for snapshot code, and use them
  in the snapshot code.

* Improve dependency handling and syncq/sideq flagging for
  dependencies.  Also improve the hammer2_inode_t reordering
  code that allows the frontend to continue operating on dirty
  inodes simultaneous with a filesystem sync.

* Move inode deletion into the filesystem sync code (in addition to
  creation), for the same reason.

* Fix lost ref counts in the snapshot code which were causing umount
  panics.

* Stabilization pass on volume flush code.  Since flushes stop at
  inode boundaries, we must properly flush the superroot before
  flushing the volume header.  That is, the flush sequence is:

  - flush inodes for PFS  (flushes inode content)
  - flush PFS root inode  (flushes through to inodes)
  - flush superroot inode (flushes through to PFS root)
  - flush volume header   (flushes voulume header to superroot)

  Theoretically this allows the filesystem asynchronously write data
  and inodes flushed by the kernel's buffer cache and vnode code
  concurrent with a filesystem flush without messing up filesystem
  consistency, because these asynchronously flushed inodes are not
  included (or have already been flushed) in the filesystem flush that
  is already underway.

* Filesystem consistency still not perfect (using snapshot-debug
  directive to test during heavy filesystem modification loads,
  directory entries are sometimes desynchronized from their inodes).

sys/vfs/hammer2/hammer2.h
sys/vfs/hammer2/hammer2_flush.c
sys/vfs/hammer2/hammer2_inode.c
sys/vfs/hammer2/hammer2_ioctl.c
sys/vfs/hammer2/hammer2_vfsops.c
sys/vfs/hammer2/hammer2_vnops.c

index d3589f6..f32b8e8 100644 (file)
@@ -768,7 +768,7 @@ typedef struct hammer2_inode hammer2_inode_t;
 #define HAMMER2_INODE_RENAME_INPROG    0x0004
 #define HAMMER2_INODE_ONRBTREE         0x0008
 #define HAMMER2_INODE_RESIZED          0x0010  /* requires inode_fsync */
-#define HAMMER2_INODE_ISDELETED                0x0020  /* deleted */
+#define HAMMER2_INODE_UNUSED0020       0x0020
 #define HAMMER2_INODE_ISUNLINKED       0x0040
 #define HAMMER2_INODE_METAGOOD         0x0080  /* inode meta-data good */
 #define HAMMER2_INODE_SIDEQ            0x0100  /* on side processing queue */
@@ -804,6 +804,7 @@ typedef struct hammer2_trans hammer2_trans_t;
 #define HAMMER2_TRANS_SIDEQ            0x20000000      /* run sideq */
 #define HAMMER2_TRANS_COPYQ            0x10000000      /* sideq->syncq */
 #define HAMMER2_TRANS_WAITING          0x08000000      /* someone waiting */
+#define HAMMER2_TRANS_RESCAN           0x04000000      /* rescan sideq */
 #define HAMMER2_TRANS_MASK             0x00FFFFFF      /* count mask */
 
 #define HAMMER2_FREEMAP_HEUR_NRADIX    4       /* pwr 2 PBUFRADIX-MINIORADIX */
@@ -1098,6 +1099,7 @@ typedef struct hammer2_xop_group hammer2_xop_group_t;
 #define HAMMER2_XOP_INODE_STOP         0x00000004
 #define HAMMER2_XOP_VOLHDR             0x00000008
 #define HAMMER2_XOP_FSSYNC             0x00000010
+#define HAMMER2_XOP_IROOT              0x00000020
 
 /*
  * Global (per partition) management structure, represents a hard block
@@ -1208,6 +1210,7 @@ struct hammer2_pfs {
        uint8_t                 pfs_types[HAMMER2_MAXCLUSTER];
        char                    *pfs_names[HAMMER2_MAXCLUSTER];
        hammer2_dev_t           *pfs_hmps[HAMMER2_MAXCLUSTER];
+       hammer2_blockset_t      pfs_iroot_blocksets[HAMMER2_MAXCLUSTER];
        hammer2_trans_t         trans;
        struct lock             lock;           /* PFS lock for certain ops */
        struct lock             lock_nlink;     /* rename and nlink lock */
@@ -1524,6 +1527,7 @@ hammer2_inode_t *hammer2_inode_create_pfs(hammer2_pfs_t *spmp,
                        const uint8_t *name, size_t name_len,
                        int *errorp);
 int hammer2_inode_chain_ins(hammer2_inode_t *ip);
+int hammer2_inode_chain_des(hammer2_inode_t *ip);
 int hammer2_inode_chain_sync(hammer2_inode_t *ip);
 int hammer2_inode_chain_flush(hammer2_inode_t *ip, int flags);
 int hammer2_inode_unlink_finisher(hammer2_inode_t *ip, int isopen);
index 7df3e9d..8628510 100644 (file)
@@ -530,10 +530,10 @@ hammer2_flush_core(hammer2_flush_info_t *info, hammer2_chain_t *chain,
         * flush-through such situations. XXX removed
         */
        if ((chain->flags & HAMMER2_CHAIN_PFSBOUNDARY) &&
-                  /* (chain->flags & HAMMER2_CHAIN_UPDATE) == 0 && */
-                  (flags & HAMMER2_FLUSH_ALL) == 0 &&
-                  (flags & HAMMER2_FLUSH_TOP) == 0 &&
-                  chain->pmp && chain->pmp->mp) {
+           /* (chain->flags & HAMMER2_CHAIN_UPDATE) == 0 && */
+           (flags & HAMMER2_FLUSH_ALL) == 0 &&
+           (flags & HAMMER2_FLUSH_TOP) == 0 &&
+           chain->pmp && chain->pmp->mp) {
                /*
                 * If FLUSH_ALL is not specified the caller does not want
                 * to recurse through PFS roots that have been mounted.
@@ -927,9 +927,6 @@ hammer2_flush_core(hammer2_flush_info_t *info, hammer2_chain_t *chain,
 
                        KKASSERT((chain->flags & HAMMER2_CHAIN_EMBEDDED) == 0);
                        hammer2_chain_setcheck(chain, chain->data);
-
-                               hammer2_inode_data_t *ipdata;
-                       ipdata = &chain->data->ipdata;
                        break;
                default:
                        KKASSERT(chain->flags & HAMMER2_CHAIN_EMBEDDED);
@@ -1000,11 +997,20 @@ hammer2_flush_core(hammer2_flush_info_t *info, hammer2_chain_t *chain,
         * occurs at the wrong time.
         */
        if (chain->bref.type == HAMMER2_BREF_TYPE_INODE &&
+           (flags & HAMMER2_FLUSH_INODE_STOP) &&
            (flags & HAMMER2_FLUSH_FSSYNC) == 0 &&
            (flags & HAMMER2_FLUSH_ALL) == 0 &&
            chain->pmp && chain->pmp->mp) {
+#ifdef HAMMER2_DEBUG_SYNC
+               kprintf("inum %ld do not update parent, non-fssync\n",
+                       (long)chain->bref.key);
+#endif
                goto skipupdate;
        }
+#ifdef HAMMER2_DEBUG_SYNC
+       if (chain->bref.type == HAMMER2_BREF_TYPE_INODE)
+               kprintf("inum %ld update parent\n", (long)chain->bref.key);
+#endif
 
        /*
         * The chain may need its blockrefs updated in the parent, normal
@@ -1264,6 +1270,8 @@ hammer2_flush_recurse(hammer2_chain_t *child, void *data)
                        if (child->flags & HAMMER2_CHAIN_FLUSH_MASK) {
                                hammer2_chain_setflush(parent);
                        }
+                       kprintf("inum %ld do not dive root inode\n",
+                               (long)parent->bref.key);
                        goto done;
                }
        }
@@ -1332,7 +1340,9 @@ hammer2_xop_inode_flush(hammer2_xop_t *arg, void *scratch __unused, int clindex)
 {
        hammer2_xop_flush_t *xop = &arg->xop_flush;
        hammer2_chain_t *chain;
+       hammer2_inode_t *ip;
        hammer2_dev_t *hmp;
+       hammer2_pfs_t *pmp;
        int flush_error = 0;
        int fsync_error = 0;
        int total_error = 0;
@@ -1349,8 +1359,9 @@ hammer2_xop_inode_flush(hammer2_xop_t *arg, void *scratch __unused, int clindex)
        /*
         * Flush core chains
         */
-       chain = hammer2_inode_chain(xop->head.ip1, clindex,
-                                   HAMMER2_RESOLVE_ALWAYS);
+       ip = xop->head.ip1;
+       pmp = ip->pmp;
+       chain = hammer2_inode_chain(ip, clindex, HAMMER2_RESOLVE_ALWAYS);
        if (chain) {
                hmp = chain->hmp;
                if (chain->flags & HAMMER2_CHAIN_FLUSH_MASK) {
@@ -1364,6 +1375,14 @@ hammer2_xop_inode_flush(hammer2_xop_t *arg, void *scratch __unused, int clindex)
                                hammer2_chain_setflush(chain->parent);
                        hammer2_flush(chain, xflags);
 
+                       /* XXX cluster */
+                       if (ip == pmp->iroot && pmp != hmp->spmp) {
+                               hammer2_spin_ex(&pmp->inum_spin);
+                               pmp->pfs_iroot_blocksets[clindex] =
+                                       chain->data->ipdata.u.blockset;
+                               hammer2_spin_unex(&pmp->inum_spin);
+                       }
+
 #if 0
                        /*
                         * Propogate upwards but only cross an inode boundary
@@ -1401,7 +1420,7 @@ hammer2_xop_inode_flush(hammer2_xop_t *arg, void *scratch __unused, int clindex)
         * flush each hammer2_dev (hmp) once.
         */
        for (j = clindex - 1; j >= 0; --j) {
-               if ((chain = xop->head.ip1->cluster.array[j].chain) != NULL) {
+               if ((chain = ip->cluster.array[j].chain) != NULL) {
                        if (chain->hmp == hmp) {
                                chain = NULL;   /* safety */
                                goto skip;
@@ -1418,6 +1437,27 @@ hammer2_xop_inode_flush(hammer2_xop_t *arg, void *scratch __unused, int clindex)
         */
        hammer2_trans_init(hmp->spmp, HAMMER2_TRANS_ISFLUSH);
 
+       /*
+        * We must flush the superroot down to the PFS iroot.  Remember
+        * that hammer2_chain_setflush() stops at inode boundaries, so
+        * the pmp->iroot has been flushed and flagged down to the superroot,
+        * but the volume root (vchain) probably has not yet been flagged.
+        */
+       if (hmp->spmp->iroot) {
+               chain = hmp->spmp->iroot->cluster.array[0].chain;
+               if (chain) {
+                       hammer2_chain_ref(chain);
+                       hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS);
+                       flush_error |=
+                               hammer2_flush(chain,
+                                             HAMMER2_FLUSH_TOP |
+                                             HAMMER2_FLUSH_INODE_STOP |
+                                             HAMMER2_FLUSH_FSSYNC);
+                       hammer2_chain_unlock(chain);
+                       hammer2_chain_drop(chain);
+               }
+       }
+
        /*
         * Media mounts have two 'roots', vchain for the topology
         * and fchain for the free block table.  Flush both.
index 825c244..68d2834 100644 (file)
@@ -245,14 +245,16 @@ restart:
                        TAILQ_REMOVE(&pmp->syncq, iptmp, entry);
                        TAILQ_INSERT_HEAD(&pmp->syncq, iptmp, entry);
                } else if (iptmp->flags & HAMMER2_INODE_SIDEQ) {
-                       atomic_set_int(&iptmp->flags, HAMMER2_INODE_SYNCQ);
-                       atomic_clear_int(&iptmp->flags, HAMMER2_INODE_SIDEQ);
-                       TAILQ_REMOVE(&pmp->sideq, iptmp, entry);
-                       TAILQ_INSERT_HEAD(&pmp->syncq, iptmp, entry);
+                       atomic_set_int(&iptmp->flags,
+                                      HAMMER2_INODE_SYNCQ_PASS2);
+                       hammer2_trans_setflags(pmp, HAMMER2_TRANS_RESCAN);
                } else {
-                       atomic_set_int(&iptmp->flags, HAMMER2_INODE_SYNCQ);
+                       atomic_set_int(&iptmp->flags,
+                                      HAMMER2_INODE_SIDEQ |
+                                      HAMMER2_INODE_SYNCQ_PASS2);
+                       TAILQ_INSERT_TAIL(&pmp->sideq, iptmp, entry);
                        hammer2_inode_ref(iptmp);
-                       TAILQ_INSERT_HEAD(&pmp->syncq, iptmp, entry);
+                       hammer2_trans_setflags(pmp, HAMMER2_TRANS_RESCAN);
                }
                hammer2_spin_unex(&pmp->list_spin);
        }
@@ -317,8 +319,8 @@ hammer2_inode_unlock(hammer2_inode_t *ip)
 }
 
 /*
- * If either ip1 or ip2 are on SYNCQ, make sure the other one is too.
- * This ensure that dependencies (e.g. directory-v-inode) are flushed
+ * If either ip1 or ip2 have been tapped by the syncer, make sure that both
+ * are.  This ensure that dependencies (e.g. inode-vs-dirent) are synced
  * together.
  *
  * We must also check SYNCQ_PASS2, which occurs when the syncer cannot
@@ -334,8 +336,11 @@ hammer2_inode_depend(hammer2_inode_t *ip1, hammer2_inode_t *ip2)
        hammer2_pfs_t *pmp;
 
        pmp = ip1->pmp;
-       if (((ip1->flags | ip2->flags) & HAMMER2_INODE_SYNCQ) == 0)
+
+       if (((ip1->flags | ip2->flags) & (HAMMER2_INODE_SYNCQ |
+                                         HAMMER2_INODE_SYNCQ_PASS2)) == 0) {
                return;
+       }
        if ((ip1->flags & (HAMMER2_INODE_SYNCQ |
                           HAMMER2_INODE_SYNCQ_PASS2)) &&
            (ip2->flags & (HAMMER2_INODE_SYNCQ |
@@ -347,28 +352,28 @@ hammer2_inode_depend(hammer2_inode_t *ip1, hammer2_inode_t *ip2)
        if ((ip1->flags & (HAMMER2_INODE_SYNCQ |
                           HAMMER2_INODE_SYNCQ_PASS2)) == 0) {
                if (ip1->flags & HAMMER2_INODE_SIDEQ) {
-                       atomic_set_int(&ip1->flags, HAMMER2_INODE_SYNCQ);
-                       atomic_clear_int(&ip1->flags, HAMMER2_INODE_SIDEQ);
-                       TAILQ_REMOVE(&pmp->sideq, ip1, entry);
-                       TAILQ_INSERT_TAIL(&pmp->syncq, ip1, entry);
+                       atomic_set_int(&ip1->flags,
+                                      HAMMER2_INODE_SYNCQ_PASS2);
                } else {
-                       atomic_set_int(&ip1->flags, HAMMER2_INODE_SYNCQ);
+                       atomic_set_int(&ip1->flags, HAMMER2_INODE_SIDEQ |
+                                                   HAMMER2_INODE_SYNCQ_PASS2);
                        hammer2_inode_ref(ip1);
-                       TAILQ_INSERT_TAIL(&pmp->syncq, ip1, entry);
+                       TAILQ_INSERT_TAIL(&pmp->sideq, ip1, entry);
                }
+               hammer2_trans_setflags(pmp, HAMMER2_TRANS_RESCAN);
        }
        if ((ip2->flags & (HAMMER2_INODE_SYNCQ |
                           HAMMER2_INODE_SYNCQ_PASS2)) == 0) {
                if (ip2->flags & HAMMER2_INODE_SIDEQ) {
-                       atomic_set_int(&ip2->flags, HAMMER2_INODE_SYNCQ);
-                       atomic_clear_int(&ip2->flags, HAMMER2_INODE_SIDEQ);
-                       TAILQ_REMOVE(&pmp->sideq, ip2, entry);
-                       TAILQ_INSERT_TAIL(&pmp->syncq, ip2, entry);
+                       atomic_set_int(&ip2->flags,
+                                      HAMMER2_INODE_SYNCQ_PASS2);
                } else {
-                       atomic_set_int(&ip2->flags, HAMMER2_INODE_SYNCQ);
+                       atomic_set_int(&ip2->flags, HAMMER2_INODE_SIDEQ |
+                                                   HAMMER2_INODE_SYNCQ_PASS2);
                        hammer2_inode_ref(ip2);
-                       TAILQ_INSERT_TAIL(&pmp->syncq, ip2, entry);
+                       TAILQ_INSERT_TAIL(&pmp->sideq, ip2, entry);
                }
+               hammer2_trans_setflags(pmp, HAMMER2_TRANS_RESCAN);
        }
        hammer2_spin_unex(&pmp->list_spin);
 }
@@ -1233,6 +1238,7 @@ hammer2_dirent_create(hammer2_inode_t *dip, const char *name, size_t name_len,
         * cannot depend on the OS to prevent the collision.
         */
        hammer2_inode_lock(dip, 0);
+       hammer2_inode_modify(dip);
 
        /*
         * If name specified, locate an unused key in the collision space.
@@ -1489,18 +1495,27 @@ hammer2_inode_unlink_finisher(hammer2_inode_t *ip, int isopen)
         * decrement nlinks for the 1->0 transition
         *
         * Put the inode on the sideq to ensure that any disconnected chains
-        * get properly flushed (so they can be freed).
+        * get properly flushed (so they can be freed).  Defer the deletion
+        * to the sync code, doing it now will desynchronize the inode from
+        * related directory entries (which is bad).
+        *
+        * NOTE: killit can be reached without modifying the inode, so
+        *       make sure that it is on the SIDEQ.
         */
        if (isopen == 0) {
+#if 0
                hammer2_xop_destroy_t *xop;
+#endif
 
 killit:
+               atomic_set_int(&ip->flags, HAMMER2_INODE_DELETING);
                hammer2_inode_delayed_sideq(ip);
-               atomic_set_int(&ip->flags, HAMMER2_INODE_ISDELETED);
+#if 0
                xop = hammer2_xop_alloc(ip, HAMMER2_XOP_MODIFYING);
                hammer2_xop_start(&xop->head, &hammer2_inode_destroy_desc);
                error = hammer2_xop_collect(&xop->head, 0);
                hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
+#endif
        }
        error = 0;      /* XXX */
 
@@ -1621,6 +1636,41 @@ hammer2_inode_chain_ins(hammer2_inode_t *ip)
        return error;
 }
 
+/*
+ * When an inode is flagged INODE_DELETING it has been deleted (no directory
+ * entry or open refs are left, though as an optimization H2 might leave
+ * nlinks == 1 to avoid unnecessary block updates).  The backend flush then
+ * needs to actually remove it from the topology.
+ *
+ * NOTE: backend flush must still sync and flush the deleted inode to clean
+ *      out related chains.
+ */
+int
+hammer2_inode_chain_des(hammer2_inode_t *ip)
+{
+       int error;
+
+       error = 0;
+       if (ip->flags & HAMMER2_INODE_DELETING) {
+               hammer2_xop_destroy_t *xop;
+
+               atomic_clear_int(&ip->flags, HAMMER2_INODE_DELETING);
+               xop = hammer2_xop_alloc(ip, HAMMER2_XOP_MODIFYING);
+               hammer2_xop_start(&xop->head, &hammer2_inode_destroy_desc);
+               error = hammer2_xop_collect(&xop->head, 0);
+               hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
+
+               if (error == HAMMER2_ERROR_ENOENT)
+                       error = 0;
+               if (error) {
+                       kprintf("hammer2: backend unable to "
+                               "insert inode %p %ld\n", ip, ip->meta.inum);
+                       /* XXX return error somehow? */
+               }
+       }
+       return error;
+}
+
 /*
  * Flushes the inode's chain and its sub-topology to media.  Interlocks
  * HAMMER2_INODE_DIRTYDATA by clearing it prior to the flush.  Any strategy
@@ -1645,104 +1695,3 @@ hammer2_inode_chain_flush(hammer2_inode_t *ip, int flags)
 
        return error;
 }
-
-#if 0
-/*
- * The normal filesystem sync no longer has visibility to an inode structure
- * after its vnode has been reclaimed.  In this situation a dirty inode may
- * require additional processing to synchronize ip->meta to its underlying
- * cluster nodes.
- *
- * In particular, reclaims can occur in almost any state (for example, when
- * doing operations on unrelated vnodes) and flushing the reclaimed inode
- * in the reclaim path itself is a non-starter.
- *
- * Caller must be in a transaction.
- */
-void
-hammer2_inode_run_sideq(hammer2_pfs_t *pmp, int doall)
-{
-       hammer2_xop_destroy_t *xop;
-       hammer2_inode_sideq_t *ipul;
-       hammer2_inode_t *ip;
-       int error;
-
-       /*
-        * Nothing to do if sideq is empty or (if doall == 0) there just
-        * aren't very many sideq entries.
-        */
-       if (TAILQ_EMPTY(&pmp->sideq))
-               return;
-       if (doall == 0) {
-               if (pmp->sideq_count > (pmp->inum_count >> 3)) {
-                       if (hammer2_debug & 0x0001) {
-                               kprintf("hammer2: flush sideq %ld/%ld\n",
-                                       pmp->sideq_count, pmp->inum_count);
-                       }
-               }
-       }
-
-       if (doall == 0 && pmp->sideq_count <= (pmp->inum_count >> 3))
-               return;
-
-       hammer2_spin_ex(&pmp->list_spin);
-       while ((ipul = TAILQ_FIRST(&pmp->sideq)) != NULL) {
-               TAILQ_REMOVE(&pmp->sideq, ipul, entry);
-               --pmp->sideq_count;
-               ip = ipul->ip;
-               KKASSERT(ip->flags & HAMMER2_INODE_ONSIDEQ);
-               atomic_clear_int(&ip->flags, HAMMER2_INODE_ONSIDEQ);
-               hammer2_spin_unex(&pmp->list_spin);
-               kfree(ipul, pmp->minode);
-
-               hammer2_inode_lock(ip, 0);
-               if (ip->flags & HAMMER2_INODE_ISDELETED) {
-                       /*
-                        * The inode has already been deleted.  This is a
-                        * fairly rare circumstance.  For now we don't rock
-                        * the boat and synchronize it normally.
-                        */
-                       hammer2_inode_chain_sync(ip);
-                       hammer2_inode_chain_flush(ip);
-               } else if (ip->flags & HAMMER2_INODE_ISUNLINKED) {
-                       /*
-                        * The inode was unlinked while open.  The inode must
-                        * be deleted and destroyed.
-                        */
-                       xop = hammer2_xop_alloc(ip, HAMMER2_XOP_MODIFYING);
-                       hammer2_xop_start(&xop->head,
-                                         &hammer2_inode_destroy_desc);
-                       error = hammer2_xop_collect(&xop->head, 0);
-                       /* XXX error handling */
-                       hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
-               } else {
-                       /*
-                        * The inode was dirty as-of the reclaim, requiring
-                        * synchronization of ip->meta with its underlying
-                        * chains.
-                        */
-                       hammer2_inode_chain_sync(ip);
-                       hammer2_inode_chain_flush(ip);
-               }
-
-               hammer2_inode_unlock(ip);
-               hammer2_inode_drop(ip);                 /* ipul ref */
-
-               hammer2_spin_ex(&pmp->list_spin);
-
-               /*
-                * If doall is 0 the original sideq_count was greater than
-                * 1/8 the inode count.  Add some hysteresis in the loop,
-                * don't stop flushing until sideq_count drops below 1/16.
-                */
-               if (doall == 0 && pmp->sideq_count <= (pmp->inum_count >> 4)) {
-                       if (hammer2_debug & 0x0001) {
-                               kprintf("hammer2: flush sideq %ld/%ld (end)\n",
-                                       pmp->sideq_count, pmp->inum_count);
-                       }
-                       break;
-               }
-       }
-       hammer2_spin_unex(&pmp->list_spin);
-}
-#endif
index 960d6da..f7213c3 100644 (file)
@@ -621,7 +621,7 @@ hammer2_ioctl_pfs_create(hammer2_inode_t *ip, void *data)
        nip = hammer2_inode_create_pfs(hmp->spmp, pfs->name, strlen(pfs->name),
                                       &error);
        if (error == 0) {
-               /* nip->flags |= HAMMER2_INODE_NOSIDEQ; */
+               atomic_set_int(&nip->flags, HAMMER2_INODE_NOSIDEQ);
                hammer2_inode_modify(nip);
                nchain = hammer2_inode_chain(nip, 0, HAMMER2_RESOLVE_ALWAYS);
                error = hammer2_chain_modify(nchain, mtid, 0, 0);
@@ -661,6 +661,7 @@ hammer2_ioctl_pfs_create(hammer2_inode_t *ip, void *data)
                hammer2_inode_chain_flush(nip, HAMMER2_XOP_INODE_STOP |
                                               HAMMER2_XOP_FSSYNC);
                hammer2_inode_drop(nip);
+               /* nip is dead */
 
                /* 
                 * We still have a ref on the chain, relock and associate
@@ -676,7 +677,6 @@ hammer2_ioctl_pfs_create(hammer2_inode_t *ip, void *data)
 
                hammer2_chain_unlock(nchain);
                hammer2_chain_drop(nchain);
-
        }
        hammer2_trans_done(hmp->spmp, HAMMER2_TRANS_ISFLUSH |
                                      HAMMER2_TRANS_SIDEQ);
@@ -784,7 +784,6 @@ hammer2_ioctl_pfs_delete(hammer2_inode_t *ip, void *data)
 static int
 hammer2_ioctl_pfs_snapshot(hammer2_inode_t *ip, void *data)
 {
-       const hammer2_inode_data_t *ripdata;
        hammer2_ioc_pfs_t *pfs = data;
        hammer2_dev_t   *hmp;
        hammer2_pfs_t   *pmp;
@@ -839,10 +838,6 @@ hammer2_ioctl_pfs_snapshot(hammer2_inode_t *ip, void *data)
        /*
         * Get the clid
         */
-       ripdata = &chain->data->ipdata;
-#if 0
-       opfs_clid = ripdata->meta.pfs_clid;
-#endif
        hmp = chain->hmp;
 
        /*
@@ -860,7 +855,6 @@ hammer2_ioctl_pfs_snapshot(hammer2_inode_t *ip, void *data)
        hammer2_chain_unlock(chain);
        nip = hammer2_inode_create_pfs(hmp->spmp, pfs->name, name_len, &error);
        hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS);
-       ripdata = &chain->data->ipdata;
 
        if (nip) {
                hammer2_dev_t *force_local;
@@ -868,7 +862,7 @@ hammer2_ioctl_pfs_snapshot(hammer2_inode_t *ip, void *data)
                hammer2_inode_data_t *wipdata;
                hammer2_key_t   starting_inum;
 
-               /* nip->flags |= HAMMER2_INODE_NOSIDEQ; */
+               atomic_set_int(&nip->flags, HAMMER2_INODE_NOSIDEQ);
                hammer2_inode_modify(nip);
                nchain = hammer2_inode_chain(nip, 0, HAMMER2_RESOLVE_ALWAYS);
                error = hammer2_chain_modify(nchain, mtid, 0, 0);
@@ -901,7 +895,9 @@ hammer2_ioctl_pfs_snapshot(hammer2_inode_t *ip, void *data)
                /* XXX hack blockset copy */
                /* XXX doesn't work with real cluster */
                wipdata->meta = nip->meta;
-               wipdata->u.blockset = ripdata->u.blockset;
+               hammer2_spin_ex(&pmp->inum_spin);
+               wipdata->u.blockset = pmp->pfs_iroot_blocksets[0];
+               hammer2_spin_unex(&pmp->inum_spin);
 
                KKASSERT(wipdata == &nchain->data->ipdata);
 
@@ -911,7 +907,9 @@ hammer2_ioctl_pfs_snapshot(hammer2_inode_t *ip, void *data)
                hammer2_inode_chain_sync(nip);
                hammer2_inode_chain_flush(nip, HAMMER2_XOP_INODE_STOP |
                                               HAMMER2_XOP_FSSYNC);
+                                              /* XXX | HAMMER2_XOP_VOLHDR */
                hammer2_inode_drop(nip);
+               /* nip is dead */
 
                force_local = (hmp->hflags & HMNT2_LOCAL) ? hmp : NULL;
 
index 88cac68..719c976 100644 (file)
@@ -110,6 +110,8 @@ long hammer2_iod_meta_write;
 long hammer2_iod_indr_write;
 long hammer2_iod_fmap_write;
 long hammer2_iod_volu_write;
+long hammer2_iod_inode_creates;
+long hammer2_iod_inode_deletes;
 
 MALLOC_DECLARE(M_HAMMER2_CBUFFER);
 MALLOC_DEFINE(M_HAMMER2_CBUFFER, "HAMMER2-compbuffer",
@@ -185,6 +187,10 @@ SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_fmap_write, CTLFLAG_RW,
           &hammer2_iod_fmap_write, 0, "");
 SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_volu_write, CTLFLAG_RW,
           &hammer2_iod_volu_write, 0, "");
+SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_inode_creates, CTLFLAG_RW,
+          &hammer2_iod_inode_creates, 0, "");
+SYSCTL_LONG(_vfs_hammer2, OID_AUTO, iod_inode_deletes, CTLFLAG_RW,
+          &hammer2_iod_inode_deletes, 0, "");
 
 long hammer2_process_icrc32;
 long hammer2_process_xxhash64;
@@ -488,6 +494,9 @@ hammer2_pfsalloc(hammer2_chain_t *chain,
                        pmp->pfs_types[j] = ripdata->meta.pfs_type;
                pmp->pfs_names[j] = kstrdup(ripdata->filename, M_HAMMER2);
                pmp->pfs_hmps[j] = chain->hmp;
+               hammer2_spin_ex(&pmp->inum_spin);
+               pmp->pfs_iroot_blocksets[j] = chain->data->ipdata.u.blockset;
+               hammer2_spin_unex(&pmp->inum_spin);
 
                /*
                 * If the PFS is already mounted we must account
@@ -659,6 +668,7 @@ hammer2_pfsfree(hammer2_pfs_t *pmp)
 {
        hammer2_inode_t *iroot;
        hammer2_chain_t *chain;
+       int chains_still_present = 0;
        int i;
        int j;
 
@@ -671,25 +681,6 @@ hammer2_pfsfree(hammer2_pfs_t *pmp)
        else
                TAILQ_REMOVE(&hammer2_pfslist, pmp, mntentry);
 
-       iroot = pmp->iroot;
-       if (iroot) {
-               for (i = 0; i < iroot->cluster.nchains; ++i) {
-                       hammer2_thr_delete(&pmp->sync_thrs[i]);
-                       for (j = 0; j < HAMMER2_XOPGROUPS; ++j)
-                               hammer2_thr_delete(&pmp->xop_groups[j].thrs[i]);
-               }
-#if REPORT_REFS_ERRORS
-               if (pmp->iroot->refs != 1)
-                       kprintf("PMP->IROOT %p REFS WRONG %d\n",
-                               pmp->iroot, pmp->iroot->refs);
-#else
-               KKASSERT(pmp->iroot->refs == 1);
-#endif
-               /* ref for pmp->iroot */
-               hammer2_inode_drop(pmp->iroot);
-               pmp->iroot = NULL;
-       }
-
        /*
         * Cleanup chains remaining on LRU list.
         */
@@ -708,12 +699,43 @@ hammer2_pfsfree(hammer2_pfs_t *pmp)
        hammer2_spin_unex(&pmp->lru_spin);
 
        /*
-        * Free remaining pmp resources
+        * Clean up iroot
         */
-       kmalloc_destroy(&pmp->mmsg);
-       kmalloc_destroy(&pmp->minode);
+       iroot = pmp->iroot;
+       if (iroot) {
+               for (i = 0; i < iroot->cluster.nchains; ++i) {
+                       hammer2_thr_delete(&pmp->sync_thrs[i]);
+                       for (j = 0; j < HAMMER2_XOPGROUPS; ++j)
+                               hammer2_thr_delete(&pmp->xop_groups[j].thrs[i]);
+                       chain = iroot->cluster.array[i].chain;
+                       if (chain && !RB_EMPTY(&chain->core.rbtree)) {
+                               kprintf("hammer2: Warning pmp %p still "
+                                       "has active chains\n", pmp);
+                               chains_still_present = 1;
+                       }
+               }
+#if REPORT_REFS_ERRORS
+               if (iroot->refs != 1)
+                       kprintf("PMP->IROOT %p REFS WRONG %d\n",
+                               iroot, iroot->refs);
+#else
+               KKASSERT(iroot->refs == 1);
+#endif
+               /* ref for iroot */
+               hammer2_inode_drop(iroot);
+               pmp->iroot = NULL;
+       }
 
-       kfree(pmp, M_HAMMER2);
+       /*
+        * Free remaining pmp resources
+        */
+       if (chains_still_present) {
+               kprintf("hammer2: cannot free pmp %p, still in use\n", pmp);
+       } else {
+               kmalloc_destroy(&pmp->mmsg);
+               kmalloc_destroy(&pmp->minode);
+               kfree(pmp, M_HAMMER2);
+       }
 }
 
 /*
@@ -728,7 +750,6 @@ hammer2_pfsfree_scan(hammer2_dev_t *hmp, int which)
        hammer2_pfs_t *pmp;
        hammer2_inode_t *iroot;
        hammer2_chain_t *rchain;
-       int didfreeze;
        int i;
        int j;
        struct hammer2_pfslist *wlist;
@@ -741,7 +762,6 @@ again:
        TAILQ_FOREACH(pmp, wlist, mntentry) {
                if ((iroot = pmp->iroot) == NULL)
                        continue;
-               hammer2_vfs_sync_pmp(pmp, MNT_WAIT);
 
                /*
                 * Determine if this PFS is affected.  If it is we must
@@ -755,73 +775,73 @@ again:
                        if (pmp->pfs_hmps[i] == hmp)
                                break;
                }
-               if (i != HAMMER2_MAXCLUSTER) {
-                       /*
-                        * Make sure all synchronization threads are locked
-                        * down.
-                        */
-                       for (i = 0; i < HAMMER2_MAXCLUSTER; ++i) {
-                               if (pmp->pfs_hmps[i] == NULL)
-                                       continue;
-                               hammer2_thr_freeze_async(&pmp->sync_thrs[i]);
-                               for (j = 0; j < HAMMER2_XOPGROUPS; ++j) {
-                                       hammer2_thr_freeze_async(
-                                               &pmp->xop_groups[j].thrs[i]);
-                               }
+               if (i == HAMMER2_MAXCLUSTER)
+                       continue;
+
+               hammer2_vfs_sync_pmp(pmp, MNT_WAIT);
+
+               /*
+                * Make sure all synchronization threads are locked
+                * down.
+                */
+               for (i = 0; i < HAMMER2_MAXCLUSTER; ++i) {
+                       if (pmp->pfs_hmps[i] == NULL)
+                               continue;
+                       hammer2_thr_freeze_async(&pmp->sync_thrs[i]);
+                       for (j = 0; j < HAMMER2_XOPGROUPS; ++j) {
+                               hammer2_thr_freeze_async(
+                                       &pmp->xop_groups[j].thrs[i]);
                        }
-                       for (i = 0; i < HAMMER2_MAXCLUSTER; ++i) {
-                               if (pmp->pfs_hmps[i] == NULL)
-                                       continue;
-                               hammer2_thr_freeze(&pmp->sync_thrs[i]);
-                               for (j = 0; j < HAMMER2_XOPGROUPS; ++j) {
-                                       hammer2_thr_freeze(
-                                               &pmp->xop_groups[j].thrs[i]);
-                               }
+               }
+               for (i = 0; i < HAMMER2_MAXCLUSTER; ++i) {
+                       if (pmp->pfs_hmps[i] == NULL)
+                               continue;
+                       hammer2_thr_freeze(&pmp->sync_thrs[i]);
+                       for (j = 0; j < HAMMER2_XOPGROUPS; ++j) {
+                               hammer2_thr_freeze(
+                                       &pmp->xop_groups[j].thrs[i]);
                        }
+               }
 
-                       /*
-                        * Lock the inode and clean out matching chains.
-                        * Note that we cannot use hammer2_inode_lock_*()
-                        * here because that would attempt to validate the
-                        * cluster that we are in the middle of ripping
-                        * apart.
-                        *
-                        * WARNING! We are working directly on the inodes
-                        *          embedded cluster.
-                        */
-                       hammer2_mtx_ex(&iroot->lock);
+               /*
+                * Lock the inode and clean out matching chains.
+                * Note that we cannot use hammer2_inode_lock_*()
+                * here because that would attempt to validate the
+                * cluster that we are in the middle of ripping
+                * apart.
+                *
+                * WARNING! We are working directly on the inodes
+                *          embedded cluster.
+                */
+               hammer2_mtx_ex(&iroot->lock);
 
-                       /*
-                        * Remove the chain from matching elements of the PFS.
-                        */
-                       for (i = 0; i < HAMMER2_MAXCLUSTER; ++i) {
-                               if (pmp->pfs_hmps[i] != hmp)
-                                       continue;
-                               hammer2_thr_delete(&pmp->sync_thrs[i]);
-                               for (j = 0; j < HAMMER2_XOPGROUPS; ++j) {
-                                       hammer2_thr_delete(
-                                               &pmp->xop_groups[j].thrs[i]);
-                               }
-                               rchain = iroot->cluster.array[i].chain;
-                               iroot->cluster.array[i].chain = NULL;
-                               pmp->pfs_types[i] = 0;
-                               if (pmp->pfs_names[i]) {
-                                       kfree(pmp->pfs_names[i], M_HAMMER2);
-                                       pmp->pfs_names[i] = NULL;
-                               }
-                               if (rchain) {
-                                       hammer2_chain_drop(rchain);
-                                       /* focus hint */
-                                       if (iroot->cluster.focus == rchain)
-                                               iroot->cluster.focus = NULL;
-                               }
-                               pmp->pfs_hmps[i] = NULL;
+               /*
+                * Remove the chain from matching elements of the PFS.
+                */
+               for (i = 0; i < HAMMER2_MAXCLUSTER; ++i) {
+                       if (pmp->pfs_hmps[i] != hmp)
+                               continue;
+                       hammer2_thr_delete(&pmp->sync_thrs[i]);
+                       for (j = 0; j < HAMMER2_XOPGROUPS; ++j) {
+                               hammer2_thr_delete(
+                                       &pmp->xop_groups[j].thrs[i]);
                        }
-                       hammer2_mtx_unlock(&iroot->lock);
-                       didfreeze = 1;  /* remaster, unfreeze down below */
-               } else {
-                       didfreeze = 0;
+                       rchain = iroot->cluster.array[i].chain;
+                       iroot->cluster.array[i].chain = NULL;
+                       pmp->pfs_types[i] = 0;
+                       if (pmp->pfs_names[i]) {
+                               kfree(pmp->pfs_names[i], M_HAMMER2);
+                               pmp->pfs_names[i] = NULL;
+                       }
+                       if (rchain) {
+                               hammer2_chain_drop(rchain);
+                               /* focus hint */
+                               if (iroot->cluster.focus == rchain)
+                                       iroot->cluster.focus = NULL;
+                       }
+                       pmp->pfs_hmps[i] = NULL;
                }
+               hammer2_mtx_unlock(&iroot->lock);
 
                /*
                 * Cleanup trailing chains.  Gaps may remain.
@@ -851,6 +871,7 @@ again:
                         * Free the pmp and restart the loop
                         */
                        KKASSERT(TAILQ_EMPTY(&pmp->sideq));
+                       KKASSERT(TAILQ_EMPTY(&pmp->syncq));
                        hammer2_pfsfree(pmp);
                        goto again;
                }
@@ -859,18 +880,16 @@ again:
                 * If elements still remain we need to set the REMASTER
                 * flag and unfreeze it.
                 */
-               if (didfreeze) {
-                       for (i = 0; i < HAMMER2_MAXCLUSTER; ++i) {
-                               if (pmp->pfs_hmps[i] == NULL)
-                                       continue;
-                               hammer2_thr_remaster(&pmp->sync_thrs[i]);
-                               hammer2_thr_unfreeze(&pmp->sync_thrs[i]);
-                               for (j = 0; j < HAMMER2_XOPGROUPS; ++j) {
-                                       hammer2_thr_remaster(
-                                               &pmp->xop_groups[j].thrs[i]);
-                                       hammer2_thr_unfreeze(
-                                               &pmp->xop_groups[j].thrs[i]);
-                               }
+               for (i = 0; i < HAMMER2_MAXCLUSTER; ++i) {
+                       if (pmp->pfs_hmps[i] == NULL)
+                               continue;
+                       hammer2_thr_remaster(&pmp->sync_thrs[i]);
+                       hammer2_thr_unfreeze(&pmp->sync_thrs[i]);
+                       for (j = 0; j < HAMMER2_XOPGROUPS; ++j) {
+                               hammer2_thr_remaster(
+                                       &pmp->xop_groups[j].thrs[i]);
+                               hammer2_thr_unfreeze(
+                                       &pmp->xop_groups[j].thrs[i]);
                        }
                }
        }
@@ -1722,7 +1741,9 @@ again:
 
        hammer2_bulkfree_uninit(hmp);
        hammer2_pfsfree_scan(hmp, 0);
+#if 0
        hammer2_dev_exlock(hmp);        /* XXX order */
+#endif
 
        /*
         * Cycle the volume data lock as a safety (probably not needed any
@@ -1833,7 +1854,9 @@ again:
        hammer2_dump_chain(&hmp->vchain, 0, &dumpcnt, 'v', (u_int)-1);
        dumpcnt = 50;
        hammer2_dump_chain(&hmp->fchain, 0, &dumpcnt, 'f', (u_int)-1);
+#if 0
        hammer2_dev_unlock(hmp);
+#endif
        hammer2_chain_drop(&hmp->vchain);
 
        hammer2_io_cleanup(hmp, &hmp->iotree);
@@ -2409,9 +2432,8 @@ int
 hammer2_vfs_sync_pmp(hammer2_pfs_t *pmp, int waitfor)
 {
        struct mount *mp;
-       hammer2_xop_flush_t *xop;
+       /*hammer2_xop_flush_t *xop;*/
        /*struct hammer2_sync_info info;*/
-       hammer2_inode_t *iroot;
        hammer2_inode_t *ip;
        hammer2_inode_t *ipdrop;
        struct vnode *vp;
@@ -2420,9 +2442,6 @@ hammer2_vfs_sync_pmp(hammer2_pfs_t *pmp, int waitfor)
        int dorestart;
 
        mp = pmp->mp;
-       iroot = pmp->iroot;
-       KKASSERT(iroot);
-       KKASSERT(iroot->pmp == pmp);
 
        /*
         * Move all inodes on sideq to syncq.  This will clear sideq.
@@ -2441,30 +2460,30 @@ hammer2_vfs_sync_pmp(hammer2_pfs_t *pmp, int waitfor)
         * When restarting, only move the inodes flagged as PASS2.
         */
        hammer2_trans_init(pmp, HAMMER2_TRANS_ISFLUSH);
+#ifdef HAMMER2_DEBUG_SYNC
+       kprintf("FILESYSTEM SYNC BOUNDARY\n");
+#endif
+       dorestart = 0;
 restart:
+#ifdef HAMMER2_DEBUG_SYNC
+       kprintf("FILESYSTEM SYNC RESTART (%d)\n", dorestart);
+#endif
        hammer2_trans_setflags(pmp, HAMMER2_TRANS_COPYQ);
+       hammer2_trans_clearflags(pmp, HAMMER2_TRANS_RESCAN);
        hammer2_spin_ex(&pmp->list_spin);
-       if (dorestart == 0) {
-               TAILQ_FOREACH(ip, &pmp->sideq, entry) {
-                       KKASSERT(ip->flags & HAMMER2_INODE_SIDEQ);
+
+       ipdrop = TAILQ_FIRST(&pmp->sideq);
+       while ((ip = ipdrop) != NULL) {
+               ipdrop = TAILQ_NEXT(ip, entry);
+               KKASSERT(ip->flags & HAMMER2_INODE_SIDEQ);
+               if (dorestart == 0 ||
+                   (ip->flags & HAMMER2_INODE_SYNCQ_PASS2)) {
+                       TAILQ_REMOVE(&pmp->sideq, ip, entry);
+                       TAILQ_INSERT_TAIL(&pmp->syncq, ip, entry);
                        atomic_set_int(&ip->flags, HAMMER2_INODE_SYNCQ);
-                       atomic_clear_int(&ip->flags, HAMMER2_INODE_SIDEQ);
-               }
-               TAILQ_CONCAT(&pmp->syncq, &pmp->sideq, entry);
-               pmp->sideq_count = 0;
-       } else {
-               ipdrop = TAILQ_FIRST(&pmp->sideq);
-               while ((ip = ipdrop) != NULL) {
-                       ipdrop = TAILQ_NEXT(ip, entry);
-                       KKASSERT(ip->flags & HAMMER2_INODE_SIDEQ);
-                       if (ip->flags & HAMMER2_INODE_SYNCQ_PASS2) {
-                               TAILQ_REMOVE(&pmp->sideq, ip, entry);
-                               TAILQ_INSERT_TAIL(&pmp->syncq, ip, entry);
-                               atomic_set_int(&ip->flags, HAMMER2_INODE_SYNCQ);
-                               atomic_clear_int(&ip->flags,
-                                                HAMMER2_INODE_SIDEQ);
-                               --pmp->sideq_count;
-                       }
+                       atomic_clear_int(&ip->flags,
+                                        HAMMER2_INODE_SIDEQ);
+                       --pmp->sideq_count;
                }
        }
        hammer2_spin_unex(&pmp->list_spin);
@@ -2492,9 +2511,9 @@ restart:
                cpu_ccfence();
                if (atomic_cmpset_int(&ip->flags,
                              pass2,
-                             pass2 & ~(HAMMER2_INODE_SYNCQ |
-                                       HAMMER2_INODE_SYNCQ_WAKEUP |
-                                       HAMMER2_INODE_SYNCQ_PASS2)) == 0) {
+                             (pass2 & ~(HAMMER2_INODE_SYNCQ |
+                                       HAMMER2_INODE_SYNCQ_WAKEUP)) |
+                                       HAMMER2_INODE_SYNCQ_PASS2) == 0) {
                        continue;
                }
                if (pass2 & HAMMER2_INODE_SYNCQ_WAKEUP)
@@ -2534,13 +2553,19 @@ restart:
                if (vp) {
                        if (vget(vp, LK_EXCLUSIVE|LK_NOWAIT)) {
                                /*
-                                * Failed, move to SIDEQ
+                                * Failed, move to SIDEQ.  It may already be
+                                * on the SIDEQ if we lost a race.
                                 */
                                vp = NULL;
                                dorestart = 1;
+#ifdef HAMMER2_DEBUG_SYNC
+                               kprintf("inum %ld (sync delayed by vnode)\n",
+                                       (long)ip->meta.inum);
+#endif
                                hammer2_spin_ex(&pmp->list_spin);
                                if ((ip->flags & (HAMMER2_INODE_SYNCQ |
                                                  HAMMER2_INODE_SIDEQ)) == 0) {
+                                       /* XXX PASS2 redundant */
                                        atomic_set_int(&ip->flags,
                                                   HAMMER2_INODE_SIDEQ |
                                                   HAMMER2_INODE_SYNCQ_PASS2);
@@ -2548,6 +2573,13 @@ restart:
                                                          entry);
                                        hammer2_spin_unex(&pmp->list_spin);
                                        hammer2_mtx_unlock(&ip->lock);
+                               } else if (ip->flags & HAMMER2_INODE_SIDEQ) {
+                                       /* XXX PASS2 redundant */
+                                       atomic_set_int(&ip->flags,
+                                                  HAMMER2_INODE_SYNCQ_PASS2);
+                                       hammer2_spin_unex(&pmp->list_spin);
+                                       hammer2_mtx_unlock(&ip->lock);
+                                       hammer2_inode_drop(ip);
                                } else {
                                        hammer2_spin_unex(&pmp->list_spin);
                                        hammer2_mtx_unlock(&ip->lock);
@@ -2582,9 +2614,22 @@ restart:
                 * we must do so.  Then sync and flush it.  The flush should
                 * update the parent.
                 */
-               if (ip->flags & HAMMER2_INODE_CREATING) {
+               if (ip->flags & HAMMER2_INODE_DELETING) {
+#ifdef HAMMER2_DEBUG_SYNC
+                       kprintf("inum %ld destroy\n", (long)ip->meta.inum);
+#endif
+                       hammer2_inode_chain_des(ip);
+                       atomic_add_long(&hammer2_iod_inode_deletes, 1);
+               } else if (ip->flags & HAMMER2_INODE_CREATING) {
+#ifdef HAMMER2_DEBUG_SYNC
+                       kprintf("inum %ld insert\n", (long)ip->meta.inum);
+#endif
                        hammer2_inode_chain_ins(ip);
+                       atomic_add_long(&hammer2_iod_inode_creates, 1);
                }
+#ifdef HAMMER2_DEBUG_SYNC
+               kprintf("inum %ld chain-sync\n", (long)ip->meta.inum);
+#endif
                hammer2_inode_chain_sync(ip);
                hammer2_inode_chain_flush(ip, HAMMER2_XOP_INODE_STOP |
                                              HAMMER2_XOP_FSSYNC);
@@ -2600,6 +2645,7 @@ restart:
                        lwkt_reltoken(&vp->v_token);
                        vput(vp);
                }
+               atomic_clear_int(&ip->flags, HAMMER2_INODE_SYNCQ_PASS2);
                hammer2_inode_unlock(ip);       /* unlock+drop */
                /* ip pointer invalid */
 
@@ -2614,23 +2660,40 @@ restart:
                hammer2_inode_drop(ipdrop);
                ipdrop = NULL;
        }
-       if (dorestart)
+       if (dorestart || (pmp->trans.flags & HAMMER2_TRANS_RESCAN)) {
+#ifdef HAMMER2_DEBUG_SYNC
+               kprintf("FILESYSTEM SYNC STAGE 1 RESTART\n");
+               tsleep(&dorestart, 0, "h2STG1-R", hz*20);
+#endif
+               dorestart = 1;
                goto restart;
+       }
+#ifdef HAMMER2_DEBUG_SYNC
+       kprintf("FILESYSTEM SYNC STAGE 2 BEGIN\n");
+       tsleep(&dorestart, 0, "h2STG2", hz*20);
+#endif
 
        /*
-        * We have to flush iroot last, even if it does not appear to be
-        * dirty, because all the inodes in the PFS are indexed under the
-        * iroot.  The normal flushing of iroot above would only occur if
-        * directory entries under the root were changed.
+        * We have to flush the PFS root last, even if it does not appear to
+        * be dirty, because all the inodes in the PFS are indexed under it.
+        * The normal flushing of iroot above would only occur if directory
+        * entries under the root were changed.
+        *
+        * Specifying VOLHDR will cause an additionl flush of hmp->spmp
+        * for the media making up the cluster.
         */
        if ((ip = pmp->iroot) != NULL) {
                hammer2_inode_ref(ip);
                hammer2_mtx_ex(&ip->lock);
                hammer2_inode_chain_sync(ip);
                hammer2_inode_chain_flush(ip, HAMMER2_XOP_INODE_STOP |
-                                             HAMMER2_XOP_FSSYNC);
+                                             HAMMER2_XOP_FSSYNC |
+                                             HAMMER2_XOP_VOLHDR);
                hammer2_inode_unlock(ip);       /* unlock+drop */
        }
+#ifdef HAMMER2_DEBUG_SYNC
+       kprintf("FILESYSTEM SYNC STAGE 2 DONE\n");
+#endif
 
        /*
         * device bioq sync
@@ -2646,7 +2709,7 @@ restart:
        info.waitfor = MNT_WAIT;
        vsyncscan(mp, flags, hammer2_sync_scan2, &info);
 #endif
-
+#if 0
        /*
         * Generally speaking we now want to flush the media topology from
         * the iroot through to the inodes.  The flush stops at any inode
@@ -2663,25 +2726,36 @@ restart:
         *
         * XXX For now wait for all flushes to complete.
         */
-       if (mp && iroot) {
+       if (mp && (ip = pmp->iroot) != NULL) {
                /*
                 * If unmounting try to flush everything including any
                 * sub-trees under inodes, just in case there is dangling
                 * modified data, as a safety.  Otherwise just flush up to
                 * the inodes in this stage.
                 */
+               kprintf("MP & IROOT\n");
+#ifdef HAMMER2_DEBUG_SYNC
+               kprintf("FILESYSTEM SYNC STAGE 3 IROOT BEGIN\n");
+#endif
                if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
-                       xop = hammer2_xop_alloc(iroot, HAMMER2_XOP_MODIFYING |
-                                                      HAMMER2_XOP_VOLHDR);
+                       xop = hammer2_xop_alloc(ip, HAMMER2_XOP_MODIFYING |
+                                                   HAMMER2_XOP_VOLHDR |
+                                                   HAMMER2_XOP_FSSYNC |
+                                                   HAMMER2_XOP_INODE_STOP);
                } else {
-                       xop = hammer2_xop_alloc(iroot, HAMMER2_XOP_MODIFYING |
-                                                      HAMMER2_XOP_INODE_STOP |
-                                                      HAMMER2_XOP_VOLHDR);
+                       xop = hammer2_xop_alloc(ip, HAMMER2_XOP_MODIFYING |
+                                                   HAMMER2_XOP_INODE_STOP |
+                                                   HAMMER2_XOP_VOLHDR |
+                                                   HAMMER2_XOP_FSSYNC |
+                                                   HAMMER2_XOP_INODE_STOP);
                }
                hammer2_xop_start(&xop->head, &hammer2_inode_flush_desc);
                error = hammer2_xop_collect(&xop->head,
                                            HAMMER2_XOP_COLLECT_WAITALL);
                hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
+#ifdef HAMMER2_DEBUG_SYNC
+               kprintf("FILESYSTEM SYNC STAGE 3 IROOT END\n");
+#endif
                if (error == HAMMER2_ERROR_ENOENT)
                        error = 0;
                else
@@ -2689,6 +2763,8 @@ restart:
        } else {
                error = 0;
        }
+#endif
+       error = 0;      /* XXX */
        hammer2_trans_done(pmp, HAMMER2_TRANS_ISFLUSH);
 
        return (error);
index c932fcf..eb8afd7 100644 (file)
@@ -162,21 +162,16 @@ hammer2_vop_reclaim(struct vop_reclaim_args *ap)
        vclrisdirty(vp);
 
        /*
-        * A modified inode may require chain synchronization.  This
-        * synchronization is usually handled by VOP_SYNC / VOP_FSYNC
-        * when vfsync() is called.  However, that requires a vnode.
+        * Modified inodes will already be on SIDEQ or SYNCQ, no further
+        * action is needed.
         *
-        * When the vnode is disassociated we must keep track of any modified
-        * inode to be flushed in a later filesystem sync.  We cannot safely
-        * synchronize the inode from inside the reclaim due to potentially
-        * deep locks held as-of when the reclaim occurs.
-        * Interactions and potential deadlocks abound.
-        *
-        * Place the inode on SIDEQ, unless it is already on the SIDEQ or
-        * SYNCQ.  It will be transfered to the SYNCQ in the next filesystem
-        * sync.  It is not safe to try to shoehorn it into the current fs
-        * sync.
+        * We cannot safely synchronize the inode from inside the reclaim
+        * due to potentially deep locks held as-of when the reclaim occurs.
+        * Interactions and potential deadlocks abound.  We also can't do it
+        * here without desynchronizing from the related directory entrie(s).
         */
+       hammer2_inode_drop(ip);                 /* vp ref */
+#if 0
        if ((ip->flags & (HAMMER2_INODE_ISUNLINKED |
                          HAMMER2_INODE_MODIFIED |
                          HAMMER2_INODE_RESIZED |
@@ -200,6 +195,7 @@ hammer2_vop_reclaim(struct vop_reclaim_args *ap)
        } else {
                hammer2_inode_drop(ip);                 /* vp ref */
        }
+#endif
 
        /*
         * XXX handle background sync when ip dirty, kernel will no longer
@@ -1420,6 +1416,7 @@ hammer2_vop_nmkdir(struct vop_nmkdir_args *ap)
                *ap->a_vpp = NULL;
        } else {
                *ap->a_vpp = hammer2_igetv(nip, &error);
+               hammer2_inode_depend(dip, nip);
                hammer2_inode_unlock(nip);
        }
 
@@ -1627,6 +1624,7 @@ hammer2_vop_ncreate(struct vop_ncreate_args *ap)
                *ap->a_vpp = NULL;
        } else {
                *ap->a_vpp = hammer2_igetv(nip, &error);
+               hammer2_inode_depend(dip, nip);
                hammer2_inode_unlock(nip);
        }
 
@@ -1704,6 +1702,7 @@ hammer2_vop_nmknod(struct vop_nmknod_args *ap)
                *ap->a_vpp = NULL;
        } else {
                *ap->a_vpp = hammer2_igetv(nip, &error);
+               hammer2_inode_depend(dip, nip);
                hammer2_inode_unlock(nip);
        }
 
@@ -1787,6 +1786,7 @@ hammer2_vop_nsymlink(struct vop_nsymlink_args *ap)
                return error;
        }
        *ap->a_vpp = hammer2_igetv(nip, &error);
+       hammer2_inode_depend(dip, nip);
 
        /*
         * Build the softlink (~like file data) and finalize the namecache.
@@ -1905,8 +1905,8 @@ hammer2_vop_nremove(struct vop_nremove_args *ap)
                ip = hammer2_inode_get(dip->pmp, &xop->head, -1, -1);
                hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
                if (ip) {
-                       hammer2_inode_depend(dip, ip);
                        hammer2_inode_unlink_finisher(ip, isopen);
+                       hammer2_inode_depend(dip, ip); /* after modified */
                        hammer2_inode_unlock(ip);
                }
        } else {
@@ -1983,8 +1983,8 @@ hammer2_vop_nrmdir(struct vop_nrmdir_args *ap)
                ip = hammer2_inode_get(dip->pmp, &xop->head, -1, -1);
                hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
                if (ip) {
-                       hammer2_inode_depend(dip, ip);
                        hammer2_inode_unlink_finisher(ip, isopen);
+                       hammer2_inode_depend(dip, ip);
                        hammer2_inode_unlock(ip);
                }
        } else {