hammer2 - Major restructuring, part 6/several
authorMatthew Dillon <dillon@apollo.backplane.com>
Fri, 3 May 2013 20:17:26 +0000 (13:17 -0700)
committerMatthew Dillon <dillon@apollo.backplane.com>
Fri, 3 May 2013 20:17:26 +0000 (13:17 -0700)
* Stabilization pass.

* Protect vfs_sync with a master lock to serialize operations, to protect
  the master volume header sequencing.  The related fsync scan will use the
  same transaction id.

* Fix a bug where races could cause SUBMODIFIED set in deep chains to not
  propagate back up to the root.

* Limit debug dumps on unmount.

* API adjustments.

sys/vfs/hammer2/hammer2.h
sys/vfs/hammer2/hammer2_flush.c
sys/vfs/hammer2/hammer2_inode.c
sys/vfs/hammer2/hammer2_ioctl.c
sys/vfs/hammer2/hammer2_vfsops.c
sys/vfs/hammer2/hammer2_vnops.c

index c62d5dd..3775e54 100644 (file)
@@ -323,7 +323,9 @@ struct hammer2_mount {
        hammer2_inode_t *sroot;         /* super-root inode */
        struct lock     alloclk;        /* lockmgr lock */
        struct lock     voldatalk;      /* lockmgr lock */
-
+       hammer2_tid_t   flush_tid;      /* (voldata locked, flush running) */
+       thread_t        flush_td;       /* vfs_sync cycle owner */
+       int             flush_wait;
        int             volhdrno;       /* last volhdrno written */
        hammer2_volume_data_t voldata;
        hammer2_volume_data_t volsync;  /* synchronized voldata */
@@ -530,7 +532,10 @@ void hammer2_chain_parent_setsubmod(hammer2_chain_t *chain);
 /*
  * hammer2_trans.c
  */
-void hammer2_trans_init(hammer2_trans_t *trans, hammer2_mount_t *hmp);
+void hammer2_trans_init_flush(hammer2_mount_t *hmp, hammer2_trans_t *trans,
+                             int master);
+void hammer2_trans_done_flush(hammer2_trans_t *trans, int master);
+void hammer2_trans_init(hammer2_mount_t *hmp, hammer2_trans_t *trans);
 void hammer2_trans_done(hammer2_trans_t *trans);
 
 /*
@@ -551,7 +556,7 @@ int hammer2_msg_adhoc_input(kdmsg_msg_t *msg);
 void hammer2_clusterctl_wakeup(kdmsg_iocom_t *iocom);
 void hammer2_volconf_update(hammer2_pfsmount_t *pmp, int index);
 void hammer2_cluster_reconnect(hammer2_pfsmount_t *pmp, struct file *fp);
-void hammer2_dump_chain(hammer2_chain_t *chain, int tab);
+void hammer2_dump_chain(hammer2_chain_t *chain, int tab, int *countp);
 
 /*
  * hammer2_freemap.c
index ae43cca..6a040a1 100644 (file)
@@ -72,21 +72,62 @@ static int hammer2_chain_flush_scan2(hammer2_chain_t *child, void *data);
  *
  * Initializing a new transaction allocates a transaction ID.  We
  * don't bother marking the volume header MODIFIED.  Instead, the volume
- * header will be updated only if the operation actually makes modifications
- * (when then propagate to the root).
+ * will be synchronized at a later time as part of a larger flush sequence.
  *
  * WARNING! Modifications to the root volume cannot dup the root volume
  *         header to handle synchronization points, so alloc_tid can
  *         wind up (harmlessly) more advanced on flush.
+ *
+ * WARNING! Operations which might call inode_duplicate()/chain_duplicate()
+ *         depend heavily on having a unique sync_tid to avoid duplication
+ *         collisions (which key off of delete_tid).
  */
 void
-hammer2_trans_init(hammer2_trans_t *trans, hammer2_mount_t *hmp)
+hammer2_trans_init(hammer2_mount_t *hmp, hammer2_trans_t *trans)
 {
        bzero(trans, sizeof(*trans));
        trans->hmp = hmp;
        hammer2_voldata_lock(hmp);
        trans->sync_tid = hmp->voldata.alloc_tid++;
-       hammer2_voldata_unlock(hmp, 0); /* don't immediately mark modified */
+       hammer2_voldata_unlock(hmp, 0);
+}
+
+void
+hammer2_trans_init_flush(hammer2_mount_t *hmp, hammer2_trans_t *trans,
+                        int master)
+{
+       thread_t td = curthread;
+
+       bzero(trans, sizeof(*trans));
+       trans->hmp = hmp;
+
+       hammer2_voldata_lock(hmp);
+       if (master) {
+               /*
+                * New master flush (sync).
+                */
+               while (hmp->flush_td) {
+                       hmp->flush_wait = 1;
+                       lksleep(&hmp->flush_wait, &hmp->voldatalk,
+                               0, "h2sync", hz);
+               }
+               hmp->flush_td = td;
+               hmp->flush_tid = hmp->voldata.alloc_tid++;
+               trans->sync_tid = hmp->flush_tid;
+       } else if (hmp->flush_td == td) {
+               /*
+                * Part of a running master flush (sync->fsync)
+                */
+               trans->sync_tid = hmp->flush_tid;
+               KKASSERT(trans->sync_tid != 0);
+       } else {
+               /*
+                * Independent flush request, make sure the sync_tid
+                * covers all modifications made to date.
+                */
+               trans->sync_tid = hmp->voldata.alloc_tid++;
+       }
+       hammer2_voldata_unlock(hmp, 0);
 }
 
 void
@@ -95,6 +136,24 @@ hammer2_trans_done(hammer2_trans_t *trans)
        trans->hmp = NULL;
 }
 
+void
+hammer2_trans_done_flush(hammer2_trans_t *trans, int master)
+{
+       hammer2_mount_t *hmp = trans->hmp;
+
+       hammer2_voldata_lock(hmp);
+       if (master) {
+               hmp->flush_td = NULL;
+               if (hmp->flush_wait) {
+                       hmp->flush_wait = 0;
+                       wakeup(&hmp->flush_wait);
+               }
+       }
+       hammer2_voldata_unlock(hmp, 0);
+
+       trans->hmp = NULL;
+}
+
 /*
  * Flush the chain and all modified sub-chains through the specified
  * synchronization point (sync_tid), propagating parent chain modifications
@@ -1062,9 +1121,9 @@ finalize:
        kprintf("G child %08x act=%08x\n", child_flags, child->flags);
 #endif
        if (child_flags & (HAMMER2_CHAIN_MOVED |
-                           HAMMER2_CHAIN_DELETED /* |
+                           HAMMER2_CHAIN_DELETED |
                            HAMMER2_CHAIN_MODIFIED |
-                           HAMMER2_CHAIN_SUBMODIFIED*/)) {
+                           HAMMER2_CHAIN_SUBMODIFIED)) {
                atomic_set_int(&parent->flags, HAMMER2_CHAIN_SUBMODIFIED);
        }
 
index 9ddecec..15ea841 100644 (file)
@@ -1409,7 +1409,6 @@ hammer2_hardlink_find(hammer2_inode_t *dip, hammer2_chain_t **chainp,
        pip = dip;
        hammer2_inode_ref(pip);         /* for loop */
        hammer2_chain_ref(chain);       /* for (*ochainp) */
-
        *ochainp = chain;
 
        /*
index 9aecf00..a8c1c77 100644 (file)
@@ -458,7 +458,7 @@ hammer2_ioctl_pfs_create(hammer2_inode_t *ip, void *data)
 
        pfs->name[sizeof(pfs->name) - 1] = 0;   /* ensure 0-termination */
 
-       hammer2_trans_init(&trans, hmp);
+       hammer2_trans_init(hmp, &trans);
        nip = hammer2_inode_create(&trans, hmp->sroot, NULL, NULL,
                                     pfs->name, strlen(pfs->name),
                                     &error);
@@ -484,7 +484,7 @@ hammer2_ioctl_pfs_delete(hammer2_inode_t *ip, void *data)
        hammer2_trans_t trans;
        int error;
 
-       hammer2_trans_init(&trans, hmp);
+       hammer2_trans_init(hmp, &trans);
        error = hammer2_unlink_file(&trans, hmp->sroot,
                                    pfs->name, strlen(pfs->name),
                                    0, NULL);
index a179d4b..44fa6e8 100644 (file)
@@ -546,6 +546,7 @@ hammer2_vfs_unmount(struct mount *mp, int mntflags)
        int flags;
        int error = 0;
        int ronly = ((mp->mnt_flag & MNT_RDONLY) != 0);
+       int dumpcnt;
        struct vnode *devvp;
 
        pmp = MPTOPMP(mp);
@@ -663,7 +664,8 @@ hammer2_vfs_unmount(struct mount *mp, int mntflags)
        }
        hammer2_mount_unlock(hmp);
 
-       hammer2_dump_chain(&hmp->vchain, 0);
+       dumpcnt = 200;
+       hammer2_dump_chain(&hmp->vchain, 0, &dumpcnt);
 
        /*
         * Final drop of embedded volume root chain to clean up
@@ -799,18 +801,23 @@ hammer2_vfs_sync(struct mount *mp, int waitfor)
        int flags;
        int error;
        int i;
+#if 0
+       int dumpcnt;
+#endif
 
        hmp = MPTOHMP(mp);
 #if 0
-       if ((waitfor & MNT_LAZY) == 0)
-               hammer2_dump_chain(&hmp->vchain, 0);
+       if ((waitfor & MNT_LAZY) == 0) {
+               dumpcnt = 50;
+               hammer2_dump_chain(&hmp->vchain, 0, &dumpcnt);
+       }
 #endif
 
        flags = VMSC_GETVP;
        if (waitfor & MNT_LAZY)
                flags |= VMSC_ONEPASS;
 
-       hammer2_trans_init(&info.trans, hmp);
+       hammer2_trans_init_flush(hmp, &info.trans, 1);
        info.error = 0;
        info.waitfor = MNT_NOWAIT;
        vmntvnodescan(mp, flags | VMSC_NOWAIT,
@@ -836,7 +843,6 @@ hammer2_vfs_sync(struct mount *mp, int waitfor)
                hammer2_chain_flush(&info.trans, &hmp->vchain);
        }
        hammer2_chain_unlock(&hmp->vchain);
-       hammer2_trans_done(&info.trans);
 
        error = 0;
 
@@ -900,6 +906,7 @@ hammer2_vfs_sync(struct mount *mp, int waitfor)
                bawrite(bp);
                hmp->volhdrno = i;
        }
+       hammer2_trans_done_flush(&info.trans, 1);
        return (error);
 }
 
@@ -1249,10 +1256,17 @@ hammer2_volconf_update(hammer2_pfsmount_t *pmp, int index)
 }
 
 void
-hammer2_dump_chain(hammer2_chain_t *chain, int tab)
+hammer2_dump_chain(hammer2_chain_t *chain, int tab, int *countp)
 {
        hammer2_chain_t *scan;
 
+       --*countp;
+       if (*countp == 0) {
+               kprintf("%*.*s...\n", tab, tab, "");
+               return;
+       }
+       if (*countp < 0)
+               return;
        kprintf("%*.*schain[%d] %p.%d [%08x][core=%p] (%s) dl=%p refs=%d",
                tab, tab, "",
                chain->index, chain, chain->bref.type, chain->flags,
@@ -1265,7 +1279,7 @@ hammer2_dump_chain(hammer2_chain_t *chain, int tab)
        else
                kprintf(" {\n");
        RB_FOREACH(scan, hammer2_chain_tree, &chain->core->rbtree) {
-               hammer2_dump_chain(scan, tab + 4);
+               hammer2_dump_chain(scan, tab + 4, countp);
        }
        if (chain->core && !RB_EMPTY(&chain->core->rbtree)) {
                if (chain->bref.type == HAMMER2_BREF_TYPE_INODE && chain->data)
index fcb36ad..0686d84 100644 (file)
@@ -111,7 +111,8 @@ hammer2_vop_inactive(struct vop_inactive_args *ap)
        KKASSERT(ip->chain);
        if (ip->flags & HAMMER2_INODE_DIRTYEMBED) {
                atomic_clear_int(&ip->flags, HAMMER2_INODE_DIRTYEMBED);
-               hammer2_trans_init(&trans, ip->hmp);
+               atomic_set_int(&ip->flags, HAMMER2_INODE_MODIFIED);
+               hammer2_trans_init(ip->hmp, &trans);
                hammer2_chain_modify(&trans, ip->chain, 0);
                hammer2_trans_done(&trans);
        }
@@ -139,7 +140,9 @@ hammer2_vop_reclaim(struct vop_reclaim_args *ap)
        hammer2_chain_t *chain;
        hammer2_inode_t *ip;
        hammer2_mount_t *hmp;
+#if 0
        hammer2_trans_t trans;
+#endif
        struct vnode *vp;
 
        vp = ap->a_vp;
@@ -190,13 +193,18 @@ hammer2_vop_reclaim(struct vop_reclaim_args *ap)
                atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROYED |
                                              HAMMER2_CHAIN_SUBMODIFIED);
        }
+#if 0
+       /*
+        * XXX chains will be flushed on sync, no need to do it here.
+        */
        if (chain->flags & (HAMMER2_CHAIN_MODIFIED |
                            HAMMER2_CHAIN_DELETED |
                            HAMMER2_CHAIN_SUBMODIFIED)) {
-               hammer2_trans_init(&trans, ip->hmp);
+               hammer2_trans_init_flush(ip->hmp, &trans, 0);
                hammer2_chain_flush(&trans, chain);
-               hammer2_trans_done(&trans);
+               hammer2_trans_done_flush(&trans, 0);
        }
+#endif
        if (ip->refs > 2)                           /* (our lock + vp ref) */
                hammer2_inode_unlock_ex(ip);        /* unlock */
        else
@@ -218,14 +226,16 @@ static
 int
 hammer2_vop_fsync(struct vop_fsync_args *ap)
 {
+       hammer2_mount_t *hmp;
        hammer2_inode_t *ip;
        hammer2_trans_t trans;
        struct vnode *vp;
 
        vp = ap->a_vp;
        ip = VTOI(vp);
+       hmp = ip->hmp;
 
-       hammer2_trans_init(&trans, ip->hmp);
+       hammer2_trans_init_flush(hmp, &trans, 0);
        hammer2_inode_lock_ex(ip);
 
        vfsync(vp, ap->a_waitfor, 1, NULL, NULL);
@@ -254,7 +264,8 @@ hammer2_vop_fsync(struct vop_fsync_args *ap)
                hammer2_chain_flush(&trans, ip->chain);
        }
        hammer2_inode_unlock_ex(ip);
-       hammer2_trans_done(&trans);
+       hammer2_trans_done_flush(&trans, 0);
+
        return (0);
 }
 
@@ -350,7 +361,7 @@ hammer2_vop_setattr(struct vop_setattr_args *ap)
        if (hmp->ronly)
                return(EROFS);
 
-       hammer2_trans_init(&trans, hmp);
+       hammer2_trans_init(hmp, &trans);
        hammer2_inode_lock_ex(ip);
        ipdata = &ip->chain->data->ipdata;
        error = 0;
@@ -756,7 +767,7 @@ hammer2_vop_write(struct vop_write_args *ap)
         * might wind up being copied into the embedded data area.
         */
        hammer2_inode_lock_ex(ip);
-       hammer2_trans_init(&trans, ip->hmp);
+       hammer2_trans_init(ip->hmp, &trans);
        error = hammer2_write_file(ip, &trans, uio, ap->a_ioflag, seqcount);
        hammer2_inode_unlock_ex(ip);
        hammer2_trans_done(&trans);
@@ -1547,7 +1558,7 @@ hammer2_vop_nresolve(struct vop_nresolve_args *ap)
                kprintf("hammer2: need to unconsolidate hardlink for %s\n",
                        chain->data->ipdata.filename);
                /* XXX retain shared lock on dip? (currently not held) */
-               hammer2_trans_init(&trans, dip->hmp);
+               hammer2_trans_init(dip->hmp, &trans);
                hammer2_hardlink_deconsolidate(&trans, dip, &chain, &ochain);
                hammer2_trans_done(&trans);
        }
@@ -1644,7 +1655,7 @@ hammer2_vop_nmkdir(struct vop_nmkdir_args *ap)
        name = ncp->nc_name;
        name_len = ncp->nc_nlen;
 
-       hammer2_trans_init(&trans, hmp);
+       hammer2_trans_init(hmp, &trans);
        nip = hammer2_inode_create(&trans, dip, ap->a_vap, ap->a_cred,
                                   name, name_len, &error);
        if (error) {
@@ -1826,7 +1837,7 @@ hammer2_vop_nlink(struct vop_nlink_args *ap)
        ncp = ap->a_nch->ncp;
        name = ncp->nc_name;
        name_len = ncp->nc_nlen;
-       hammer2_trans_init(&trans, hmp);
+       hammer2_trans_init(hmp, &trans);
 
        /*
         * ip represents the file being hardlinked.  The file could be a
@@ -1897,7 +1908,7 @@ hammer2_vop_ncreate(struct vop_ncreate_args *ap)
        ncp = ap->a_nch->ncp;
        name = ncp->nc_name;
        name_len = ncp->nc_nlen;
-       hammer2_trans_init(&trans, hmp);
+       hammer2_trans_init(hmp, &trans);
 
        nip = hammer2_inode_create(&trans, dip, ap->a_vap, ap->a_cred,
                                   name, name_len, &error);
@@ -1941,7 +1952,7 @@ hammer2_vop_nsymlink(struct vop_nsymlink_args *ap)
        ncp = ap->a_nch->ncp;
        name = ncp->nc_name;
        name_len = ncp->nc_nlen;
-       hammer2_trans_init(&trans, hmp);
+       hammer2_trans_init(hmp, &trans);
 
        ap->a_vap->va_type = VLNK;      /* enforce type */
 
@@ -2027,7 +2038,7 @@ hammer2_vop_nremove(struct vop_nremove_args *ap)
        ncp = ap->a_nch->ncp;
        name = ncp->nc_name;
        name_len = ncp->nc_nlen;
-       hammer2_trans_init(&trans, hmp);
+       hammer2_trans_init(hmp, &trans);
        error = hammer2_unlink_file(&trans, dip, name, name_len, 0, NULL);
        hammer2_trans_done(&trans);
        if (error == 0) {
@@ -2060,7 +2071,7 @@ hammer2_vop_nrmdir(struct vop_nrmdir_args *ap)
        name = ncp->nc_name;
        name_len = ncp->nc_nlen;
 
-       hammer2_trans_init(&trans, hmp);
+       hammer2_trans_init(hmp, &trans);
        error = hammer2_unlink_file(&trans, dip, name, name_len, 1, NULL);
        hammer2_trans_done(&trans);
        if (error == 0) {
@@ -2111,7 +2122,7 @@ hammer2_vop_nrename(struct vop_nrename_args *ap)
        tname = tncp->nc_name;
        tname_len = tncp->nc_nlen;
 
-       hammer2_trans_init(&trans, hmp);
+       hammer2_trans_init(hmp, &trans);
 
        /*
         * ip is the inode being renamed.  If this is a hardlink then