hammer2 - Revamp hammer2_cluster structure part 1
authorMatthew Dillon <dillon@apollo.backplane.com>
Mon, 30 Sep 2013 22:31:14 +0000 (15:31 -0700)
committerMatthew Dillon <dillon@apollo.backplane.com>
Mon, 30 Sep 2013 22:42:16 +0000 (15:42 -0700)
* Adjust the hammer2_cluster structure to track multiple chains (and thus
  multiple mount points).  Use cases still just pull out one chain.

* Remove schain from hammer2_mount, leaving just sroot.  The cached chain
  structure isn't useful because it doesn't track delete-duplicate
  operations.  Use sroot for basing functions.

* Mark pfs-mount chains with the PFSROOT flag and propagate through
  duplications to detect when a snapshot should inherit the PFS
  cluster id or not.

sys/vfs/hammer2/hammer2.h
sys/vfs/hammer2/hammer2_chain.c
sys/vfs/hammer2/hammer2_flush.c
sys/vfs/hammer2/hammer2_inode.c
sys/vfs/hammer2/hammer2_ioctl.c
sys/vfs/hammer2/hammer2_vfsops.c
sys/vfs/hammer2/hammer2_vnops.c

index 96e760a..8518062 100644 (file)
@@ -212,6 +212,7 @@ RB_PROTOTYPE(hammer2_chain_tree, hammer2_chain, rbnode, hammer2_chain_cmp);
 #define HAMMER2_CHAIN_REPLACE          0x00040000      /* replace bref */
 #define HAMMER2_CHAIN_COUNTEDBREFS     0x00080000      /* counted brefs */
 #define HAMMER2_CHAIN_DUPLICATED       0x00100000      /* fwd delete-dup */
+#define HAMMER2_CHAIN_PFSROOT          0x00200000      /* in pfs->cluster */
 
 /*
  * Flags passed to hammer2_chain_lookup() and hammer2_chain_next()
@@ -420,10 +421,9 @@ struct hammer2_mount {
        struct malloc_type *mchain;
        int             nipstacks;
        int             maxipstacks;
-       hammer2_chain_t vchain;         /* anchor chain */
-       hammer2_chain_t fchain;         /* freemap chain special */
-       hammer2_chain_t *schain;        /* super-root */
-       hammer2_inode_t *sroot;         /* super-root inode */
+       hammer2_chain_t vchain;         /* anchor chain (topology) */
+       hammer2_chain_t fchain;         /* anchor chain (freemap) */
+       hammer2_inode_t *sroot;         /* super-root localized to media */
        struct lock     alloclk;        /* lockmgr lock */
        struct lock     voldatalk;      /* lockmgr lock */
        struct hammer2_trans_queue transq; /* all in-progress transactions */
@@ -448,24 +448,34 @@ typedef struct hammer2_mount hammer2_mount_t;
  *
  * A PFS may have several hammer2_cluster's associated with it.
  */
+#define HAMMER2_MAXCLUSTER     8
+
 struct hammer2_cluster {
-       struct hammer2_mount    *hmp;           /* device global mount */
-       hammer2_chain_t         *rchain;        /* PFS root chain */
+       int                     nchains;
+       int                     status;
+       hammer2_chain_t         *chains[HAMMER2_MAXCLUSTER];
 };
 
 typedef struct hammer2_cluster hammer2_cluster_t;
 
 /*
  * HAMMER2 PFS mount point structure (aka vp->v_mount->mnt_data).
+ * This has a 1:1 correspondence to struct mount (note that the
+ * hammer2_mount structure has a N:1 correspondence).
  *
  * This structure represents a cluster mount and not necessarily a
  * PFS under a specific device mount (HMP).  The distinction is important
  * because the elements backing a cluster mount can change on the fly.
+ *
+ * Usually the first element under the cluster represents the original
+ * user-requested mount that bootstraps the whole mess.  In significant
+ * setups the original is usually just a read-only media image (or
+ * representitive file) that simply contains a bootstrap volume header
+ * listing the configuration.
  */
 struct hammer2_pfsmount {
-       struct mount            *mp;            /* kernel mount */
-       hammer2_cluster_t       *mount_cluster;
-       hammer2_cluster_t       *cluster;
+       struct mount            *mp;
+       hammer2_cluster_t       cluster;
        hammer2_inode_t         *iroot;         /* PFS root inode */
        hammer2_off_t           inode_count;    /* copy of inode_count */
        ccms_domain_t           ccms_dom;
@@ -548,13 +558,6 @@ MPTOPMP(struct mount *mp)
        return ((hammer2_pfsmount_t *)mp->mnt_data);
 }
 
-static __inline
-hammer2_mount_t *
-MPTOHMP(struct mount *mp)
-{
-       return (((hammer2_pfsmount_t *)mp->mnt_data)->cluster->hmp);
-}
-
 extern struct vop_ops hammer2_vnode_vops;
 extern struct vop_ops hammer2_spec_vops;
 extern struct vop_ops hammer2_fifo_vops;
@@ -716,8 +719,8 @@ int hammer2_chain_create(hammer2_trans_t *trans,
                                int type, size_t bytes);
 void hammer2_chain_duplicate(hammer2_trans_t *trans, hammer2_chain_t *parent,
                                hammer2_chain_t **chainp,
-                               hammer2_blockref_t *bref);
-int hammer2_chain_snapshot(hammer2_trans_t *trans, hammer2_inode_t *ip,
+                               hammer2_blockref_t *bref, int snapshot);
+int hammer2_chain_snapshot(hammer2_trans_t *trans, hammer2_chain_t *chain,
                                hammer2_ioc_pfs_t *pfs);
 void hammer2_chain_delete(hammer2_trans_t *trans, hammer2_chain_t *chain,
                                int flags);
index 66b0f97..f413186 100644 (file)
@@ -249,6 +249,13 @@ hammer2_chain_core_alloc(hammer2_trans_t *trans,
                TAILQ_INSERT_TAIL(&core->ownerq, nchain, core_entry);
        } else {
                /*
+                * Propagate the PFSROOT flag which we set on all subdirs
+                * under the super-root.
+                */
+               atomic_set_int(&nchain->flags,
+                              ochain->flags & HAMMER2_CHAIN_PFSROOT);
+
+               /*
                 * Multi-homing (delete-duplicate) sub-tree under ochain.
                 * Set the DUPLICATED flag on ochain but only if this is
                 * not a snapshot.  This flag governs forward iterations
@@ -2529,7 +2536,8 @@ static void hammer2_chain_dup_fixup(hammer2_chain_t *ochain,
 
 void
 hammer2_chain_duplicate(hammer2_trans_t *trans, hammer2_chain_t *parent,
-                       hammer2_chain_t **chainp, hammer2_blockref_t *bref)
+                       hammer2_chain_t **chainp, hammer2_blockref_t *bref,
+                       int snapshot)
 {
        hammer2_mount_t *hmp;
        hammer2_blockref_t *base;
@@ -2551,6 +2559,8 @@ hammer2_chain_duplicate(hammer2_trans_t *trans, hammer2_chain_t *parent,
        if (bref == NULL)
                bref = &ochain->bref;
        nchain = hammer2_chain_alloc(hmp, ochain->pmp, trans, bref);
+       if (snapshot)
+               atomic_set_int(&nchain->flags, HAMMER2_CHAIN_SNAPSHOT);
        hammer2_chain_core_alloc(trans, nchain, ochain);
        bytes = (hammer2_off_t)1 <<
                (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX);
@@ -2868,52 +2878,49 @@ hammer2_chain_dup_fixup(hammer2_chain_t *ochain, hammer2_chain_t *nchain)
 
 /*
  * Create a snapshot of the specified {parent, chain} with the specified
- * label.
- *
- * (a) We create a duplicate connected to the super-root as the specified
- *     label.
- *
- * (b) We issue a restricted flush using the current transaction on the
- *     duplicate.
- *
- * (c) We disconnect and reallocate the duplicate's core.
+ * label.  The originating hammer2_inode must be exclusively locked for
+ * safety.
  */
 int
-hammer2_chain_snapshot(hammer2_trans_t *trans, hammer2_inode_t *ip,
+hammer2_chain_snapshot(hammer2_trans_t *trans, hammer2_chain_t *ochain,
                       hammer2_ioc_pfs_t *pfs)
 {
-       hammer2_cluster_t *cluster;
        hammer2_mount_t *hmp;
-       hammer2_chain_t *chain;
        hammer2_chain_t *nchain;
+       hammer2_chain_t *chain;
        hammer2_chain_t *parent;
        hammer2_inode_data_t *ipdata;
        size_t name_len;
        hammer2_key_t key_dummy;
        hammer2_key_t lhc;
+       uuid_t opfs_clid;
        int error;
        int cache_index = -1;
 
        name_len = strlen(pfs->name);
        lhc = hammer2_dirhash(pfs->name, name_len);
-       cluster = ip->pmp->mount_cluster;
-       hmp = ip->chain->hmp;
-       KKASSERT(hmp == cluster->hmp);  /* XXX */
+
+       hmp = ochain->hmp;
+       opfs_clid = ochain->data->ipdata.pfs_clid;
+       KKASSERT((trans->flags & HAMMER2_TRANS_RESTRICTED) == 0);
 
        /*
-        * Create disconnected duplicate
+        * Get second lock for duplication to replace, original lock
+        * will be left intact (caller must unlock the original chain).
         */
-       KKASSERT((trans->flags & HAMMER2_TRANS_RESTRICTED) == 0);
-       nchain = ip->chain;
+       nchain = ochain;
        hammer2_chain_lock(nchain, HAMMER2_RESOLVE_MAYBE);
-       atomic_set_int(&nchain->flags, HAMMER2_CHAIN_RECYCLE |
-                                      HAMMER2_CHAIN_SNAPSHOT);
-       hammer2_chain_duplicate(trans, NULL, &nchain, NULL);
+
+       /*
+        * Create disconnected duplicate flagged as a snapshot
+        */
+       atomic_set_int(&nchain->flags, HAMMER2_CHAIN_RECYCLE);
+       hammer2_chain_duplicate(trans, NULL, &nchain, NULL, 1);
 
        /*
         * Create named entry in the super-root.
         */
-        parent = hammer2_chain_lookup_init(hmp->schain, 0);
+       parent = hammer2_inode_lock_ex(hmp->sroot);
        error = 0;
        while (error == 0) {
                chain = hammer2_chain_lookup(&parent, &key_dummy,
@@ -2930,7 +2937,7 @@ hammer2_chain_snapshot(hammer2_trans_t *trans, hammer2_inode_t *ip,
                             HAMMER2_BREF_TYPE_INODE,
                             HAMMER2_INODE_BYTES);
        hammer2_chain_modify(trans, &nchain, HAMMER2_MODIFY_ASSERTNOCOPY);
-       hammer2_chain_lookup_done(parent);
+       hammer2_inode_unlock_ex(hmp->sroot, parent);
        parent = NULL;  /* safety */
 
        /*
@@ -2949,10 +2956,11 @@ hammer2_chain_snapshot(hammer2_trans_t *trans, hammer2_inode_t *ip,
         */
        ipdata->pfs_type = HAMMER2_PFSTYPE_SNAPSHOT;
        kern_uuidgen(&ipdata->pfs_fsid, 1);
-       if (ip->chain == cluster->rchain)
-               ipdata->pfs_clid = ip->chain->data->ipdata.pfs_clid;
+       if (ochain->flags & HAMMER2_CHAIN_PFSROOT)
+               ipdata->pfs_clid = opfs_clid;
        else
                kern_uuidgen(&ipdata->pfs_clid, 1);
+       atomic_set_int(&nchain->flags, HAMMER2_CHAIN_PFSROOT);
 
        /*
         * Issue a restricted flush of the snapshot.  This is a synchronous
@@ -3294,7 +3302,7 @@ hammer2_chain_create_indirect(hammer2_trans_t *trans, hammer2_chain_t *parent,
                                                  HAMMER2_RESOLVE_NOREF);
                }
                hammer2_chain_delete(trans, chain, HAMMER2_DELETE_WILLDUP);
-               hammer2_chain_duplicate(trans, ichain, &chain, NULL);
+               hammer2_chain_duplicate(trans, ichain, &chain, NULL, 0);
                hammer2_chain_unlock(chain);
                KKASSERT(parent->refs > 0);
                chain = NULL;
index e7e9a7c..f1ad53c 100644 (file)
@@ -120,14 +120,12 @@ hammer2_updatestats(hammer2_flush_info_t *info, hammer2_blockref_t *bref,
 void
 hammer2_trans_init(hammer2_trans_t *trans, hammer2_pfsmount_t *pmp, int flags)
 {
-       hammer2_cluster_t *cluster;
        hammer2_mount_t *hmp;
        hammer2_trans_t *scan;
 
        bzero(trans, sizeof(*trans));
        trans->pmp = pmp;
-       cluster = pmp->cluster;
-       hmp = cluster->hmp;
+       hmp = pmp->cluster.chains[0]->hmp;      /* XXX */
 
        hammer2_voldata_lock(hmp);
        trans->flags = flags;
@@ -235,12 +233,10 @@ hammer2_trans_init(hammer2_trans_t *trans, hammer2_pfsmount_t *pmp, int flags)
 void
 hammer2_trans_done(hammer2_trans_t *trans)
 {
-       hammer2_cluster_t *cluster;
        hammer2_mount_t *hmp;
        hammer2_trans_t *scan;
 
-       cluster = trans->pmp->cluster;
-       hmp = cluster->hmp;
+       hmp = trans->pmp->cluster.chains[0]->hmp;
 
        hammer2_voldata_lock(hmp);
        TAILQ_REMOVE(&hmp->transq, trans, entry);
index 3de640e..ed72877 100644 (file)
@@ -823,7 +823,7 @@ retry:
        bref = tmp->bref;
        bref.key = lhc;                 /* invisible dir entry key */
        bref.keybits = 0;
-       hammer2_chain_duplicate(trans, parent, &tmp, &bref);
+       hammer2_chain_duplicate(trans, parent, &tmp, &bref, 0);
        hammer2_inode_unlock_ex(dip, parent);
        /*hammer2_chain_lookup_done(parent);*/
        hammer2_chain_unlock(nchain);   /* no longer needed */
@@ -931,7 +931,7 @@ hammer2_inode_connect(hammer2_trans_t *trans, int hlink,
                         */
                        nchain = ochain;
                        ochain = NULL;
-                       hammer2_chain_duplicate(trans, NULL, &nchain, NULL);
+                       hammer2_chain_duplicate(trans, NULL, &nchain, NULL, 0);
                        error = hammer2_chain_create(trans, &parent, &nchain,
                                                     lhc, 0,
                                                     HAMMER2_BREF_TYPE_INODE,
index b18768c..339b476 100644 (file)
@@ -154,7 +154,7 @@ hammer2_ioctl(hammer2_inode_t *ip, u_long com, void *data, int fflag,
 static int
 hammer2_ioctl_version_get(hammer2_inode_t *ip, void *data)
 {
-       hammer2_mount_t *hmp = ip->pmp->mount_cluster->hmp;
+       hammer2_mount_t *hmp = ip->pmp->cluster.chains[0]->hmp;
        hammer2_ioc_version_t *version = data;
 
        version->version = hmp->voldata.version;
@@ -183,7 +183,7 @@ hammer2_ioctl_recluster(hammer2_inode_t *ip, void *data)
 static int
 hammer2_ioctl_remote_scan(hammer2_inode_t *ip, void *data)
 {
-       hammer2_mount_t *hmp = ip->pmp->mount_cluster->hmp;
+       hammer2_mount_t *hmp = ip->pmp->cluster.chains[0]->hmp;
        hammer2_ioc_remote_t *remote = data;
        int copyid = remote->copyid;
 
@@ -224,7 +224,7 @@ hammer2_ioctl_remote_add(hammer2_inode_t *ip, void *data)
        if (copyid >= HAMMER2_COPYID_COUNT)
                return (EINVAL);
 
-       hmp = pmp->mount_cluster->hmp;
+       hmp = pmp->cluster.chains[0]->hmp; /* XXX */
        hammer2_voldata_lock(hmp);
        if (copyid < 0) {
                for (copyid = 1; copyid < HAMMER2_COPYID_COUNT; ++copyid) {
@@ -257,7 +257,7 @@ hammer2_ioctl_remote_del(hammer2_inode_t *ip, void *data)
        int copyid = remote->copyid;
        int error = 0;
 
-       hmp = pmp->mount_cluster->hmp;
+       hmp = pmp->cluster.chains[0]->hmp; /* XXX */
        if (copyid >= HAMMER2_COPYID_COUNT)
                return (EINVAL);
        remote->copy1.path[sizeof(remote->copy1.path) - 1] = 0;
@@ -294,7 +294,7 @@ hammer2_ioctl_remote_rep(hammer2_inode_t *ip, void *data)
        hammer2_mount_t *hmp;
        int copyid = remote->copyid;
 
-       hmp = ip->pmp->mount_cluster->hmp;
+       hmp = ip->pmp->cluster.chains[0]->hmp; /* XXX */
 
        if (copyid < 0 || copyid >= HAMMER2_COPYID_COUNT)
                return (EINVAL);
@@ -325,7 +325,7 @@ hammer2_ioctl_socket_set(hammer2_inode_t *ip, void *data)
        hammer2_mount_t *hmp;
        int copyid = remote->copyid;
 
-       hmp = ip->pmp->mount_cluster->hmp;
+       hmp = ip->pmp->cluster.chains[0]->hmp; /* XXX */
        if (copyid < 0 || copyid >= HAMMER2_COPYID_COUNT)
                return (EINVAL);
 
@@ -360,10 +360,10 @@ hammer2_ioctl_pfs_get(hammer2_inode_t *ip, void *data)
        int cache_index = -1;
 
        error = 0;
-       hmp = ip->pmp->mount_cluster->hmp;
+       hmp = ip->pmp->cluster.chains[0]->hmp; /* XXX */
        pfs = data;
-       parent = hammer2_chain_lookup_init(hmp->schain, 0);
-       rchain = ip->pmp->mount_cluster->rchain;
+       parent = hammer2_inode_lock_ex(hmp->sroot);
+       rchain = ip->pmp->cluster.chains[0];    /* XXX */
 
        /*
         * Search for the first key or specific key.  Remember that keys
@@ -420,7 +420,7 @@ hammer2_ioctl_pfs_get(hammer2_inode_t *ip, void *data)
                pfs->name_next = (hammer2_key_t)-1;
                error = ENOENT;
        }
-       hammer2_chain_lookup_done(parent);
+       hammer2_inode_unlock_ex(hmp->sroot, parent);
 
        return (error);
 }
@@ -443,9 +443,9 @@ hammer2_ioctl_pfs_lookup(hammer2_inode_t *ip, void *data)
        size_t len;
 
        error = 0;
-       hmp = ip->pmp->mount_cluster->hmp;
+       hmp = ip->pmp->cluster.chains[0]->hmp; /* XXX */
        pfs = data;
-       parent = hammer2_chain_lookup_init(hmp->schain, HAMMER2_LOOKUP_SHARED);
+       parent = hammer2_inode_lock_sh(hmp->sroot);
 
        pfs->name[sizeof(pfs->name) - 1] = 0;
        len = strlen(pfs->name);
@@ -481,7 +481,8 @@ hammer2_ioctl_pfs_lookup(hammer2_inode_t *ip, void *data)
        } else {
                error = ENOENT;
        }
-       hammer2_chain_lookup_done(parent);
+       hammer2_inode_unlock_sh(hmp->sroot, parent);
+
        return (error);
 }
 
@@ -499,7 +500,7 @@ hammer2_ioctl_pfs_create(hammer2_inode_t *ip, void *data)
        hammer2_trans_t trans;
        int error;
 
-       hmp = ip->pmp->mount_cluster->hmp;
+       hmp = ip->pmp->cluster.chains[0]->hmp; /* XXX */
        pfs = data;
        nip = NULL;
 
@@ -541,7 +542,7 @@ hammer2_ioctl_pfs_delete(hammer2_inode_t *ip, void *data)
        hammer2_trans_t trans;
        int error;
 
-       hmp = ip->pmp->mount_cluster->hmp;
+       hmp = ip->pmp->cluster.chains[0]->hmp; /* XXX */
        hammer2_trans_init(&trans, ip->pmp, 0);
        error = hammer2_unlink_file(&trans, hmp->sroot,
                                    pfs->name, strlen(pfs->name),
@@ -566,7 +567,7 @@ hammer2_ioctl_pfs_snapshot(hammer2_inode_t *ip, void *data)
 
        hammer2_trans_init(&trans, ip->pmp, 0);
        parent = hammer2_inode_lock_ex(ip);
-       error = hammer2_chain_snapshot(&trans, ip, pfs);
+       error = hammer2_chain_snapshot(&trans, parent, pfs);
        hammer2_inode_unlock_ex(ip, parent);
        hammer2_trans_done(&trans);
 
index 9c169c1..daad3db 100644 (file)
@@ -171,7 +171,7 @@ static int hammer2_vfs_init(struct vfsconf *conf);
 static int hammer2_vfs_uninit(struct vfsconf *vfsp);
 static int hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data,
                                struct ucred *cred);
-static int hammer2_remount(struct mount *, char *, struct vnode *,
+static int hammer2_remount(hammer2_mount_t *, char *, struct vnode *,
                                struct ucred *);
 static int hammer2_vfs_unmount(struct mount *mp, int mntflags);
 static int hammer2_vfs_root(struct mount *mp, struct vnode **vpp);
@@ -347,6 +347,7 @@ hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data,
        int ronly = 1;
        int error;
        int cache_index;
+       int i;
 
        hmp = NULL;
        pmp = NULL;
@@ -391,9 +392,14 @@ hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data,
                if (mp->mnt_flag & MNT_UPDATE) {
                        /* Update mount */
                        /* HAMMER2 implements NFS export via mountctl */
-                       hmp = MPTOHMP(mp);
-                       devvp = hmp->devvp;
-                       error = hammer2_remount(mp, path, devvp, cred);
+                       pmp = MPTOPMP(mp);
+                       for (i = 0; i < pmp->cluster.nchains; ++i) {
+                               hmp = pmp->cluster.chains[i]->hmp;
+                               devvp = hmp->devvp;
+                               error = hammer2_remount(hmp, path, devvp, cred);
+                               if (error)
+                                       break;
+                       }
                        return error;
                }
        }
@@ -528,12 +534,16 @@ hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data,
                        hammer2_vfs_unmount(mp, MNT_FORCE);
                        return EINVAL;
                }
-               hammer2_chain_ref(schain);      /* for hmp->schain */
-               hmp->schain = schain;           /* left locked for inode_get */
+
+               /*
+                * NOTE: inode_get sucks up schain's lock.
+                */
+               atomic_set_int(&schain->flags, HAMMER2_CHAIN_PFSROOT);
                hmp->sroot = hammer2_inode_get(NULL, NULL, schain);
-               hammer2_inode_ref(hmp->sroot);  /* for hmp->sroot */
+               hammer2_inode_ref(hmp->sroot);
                hammer2_inode_unlock_ex(hmp->sroot, schain);
                schain = NULL;
+               /* leave hmp->sroot with one ref */
                
                mtx_init(&hmp->wthread_mtx);
                bioq_init(&hmp->wthread_bioq);
@@ -553,14 +563,10 @@ hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data,
         * From this point on we have to call hammer2_unmount() on failure.
         */
        pmp = kmalloc(sizeof(*pmp), M_HAMMER2, M_WAITOK | M_ZERO);
-       pmp->mount_cluster = kmalloc(sizeof(hammer2_cluster_t), M_HAMMER2,
-                                    M_WAITOK | M_ZERO);
-       pmp->cluster = pmp->mount_cluster;
 
        kmalloc_create(&pmp->minode, "HAMMER2-inodes");
        kmalloc_create(&pmp->mmsg, "HAMMER2-pfsmsg");
 
-       pmp->mount_cluster->hmp = hmp;
        spin_init(&pmp->inum_spin);
        RB_INIT(&pmp->inum_tree);
 
@@ -596,10 +602,9 @@ hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data,
        pmp->mp = mp;
 
        /*
-        * schain only has 1 ref now for its hmp->schain assignment.
-        * Setup for lookup (which will lock it).
+        * Lookup mount point under the media-localized super-root.
         */
-       parent = hammer2_chain_lookup_init(hmp->schain, 0);
+       parent = hammer2_inode_lock_ex(hmp->sroot);
        lhc = hammer2_dirhash(label, strlen(label));
        rchain = hammer2_chain_lookup(&parent, &key_next,
                                      lhc, lhc + HAMMER2_DIRHASH_LOMASK,
@@ -614,7 +619,8 @@ hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data,
                                            lhc + HAMMER2_DIRHASH_LOMASK,
                                            &cache_index, 0);
        }
-       hammer2_chain_lookup_done(parent);
+       hammer2_inode_unlock_ex(hmp->sroot, parent);
+
        if (rchain == NULL) {
                kprintf("hammer2_mount: PFS label not found\n");
                hammer2_vfs_unmount(mp, MNT_FORCE);
@@ -638,7 +644,8 @@ hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data,
         * NOTE: *_get() integrates chain's lock into the inode lock.
         */
        hammer2_chain_ref(rchain);              /* for pmp->rchain */
-       pmp->mount_cluster->rchain = rchain;    /* left held & unlocked */
+       pmp->cluster.nchains = 1;
+       pmp->cluster.chains[0] = rchain;
        pmp->iroot = hammer2_inode_get(pmp, NULL, rchain);
        hammer2_inode_ref(pmp->iroot);          /* ref for pmp->iroot */
 
@@ -1339,7 +1346,7 @@ hammer2_write_bp(hammer2_chain_t *chain, struct buf *bp, int ioflag,
 
 static
 int
-hammer2_remount(struct mount *mp, char *path, struct vnode *devvp,
+hammer2_remount(hammer2_mount_t *hmp, char *path, struct vnode *devvp,
                 struct ucred *cred)
 {
        return (0);
@@ -1351,175 +1358,165 @@ hammer2_vfs_unmount(struct mount *mp, int mntflags)
 {
        hammer2_pfsmount_t *pmp;
        hammer2_mount_t *hmp;
-       hammer2_cluster_t *cluster;
+       hammer2_chain_t *rchain;
        int flags;
        int error = 0;
        int ronly = ((mp->mnt_flag & MNT_RDONLY) != 0);
        int dumpcnt;
+       int i;
        struct vnode *devvp;
 
        pmp = MPTOPMP(mp);
-       cluster = pmp->mount_cluster;
-       hmp = cluster->hmp;
-       flags = 0;
 
-       if (mntflags & MNT_FORCE)
-               flags |= FORCECLOSE;
+       ccms_domain_uninit(&pmp->ccms_dom);
+       kdmsg_iocom_uninit(&pmp->iocom);        /* XXX chain dependency */
 
-       hammer2_mount_exlock(hmp);
+       for (i = 0; i < pmp->cluster.nchains; ++i) {
+               hmp = pmp->cluster.chains[i]->hmp;
 
-       /*
-        * If mount initialization proceeded far enough we must flush
-        * its vnodes.
-        */
-       if (pmp->iroot)
-               error = vflush(mp, 0, flags);
+               flags = 0;
 
-       if (error) {
-               hammer2_mount_unlock(hmp);
-               return error;
-       }
+               if (mntflags & MNT_FORCE)
+                       flags |= FORCECLOSE;
 
-       lockmgr(&hammer2_mntlk, LK_EXCLUSIVE);
-       --hmp->pmp_count;
-       kprintf("hammer2_unmount hmp=%p pmpcnt=%d\n", hmp, hmp->pmp_count);
+               hammer2_mount_exlock(hmp);
 
-       /*
-        * Flush any left over chains.  The voldata lock is only used
-        * to synchronize against HAMMER2_CHAIN_MODIFIED_AUX.
-        */
-       hammer2_voldata_lock(hmp);
-       if ((hmp->vchain.flags | hmp->fchain.flags) &
-           (HAMMER2_CHAIN_MODIFIED | HAMMER2_CHAIN_SUBMODIFIED)) {
-               hammer2_voldata_unlock(hmp, 0);
-               hammer2_vfs_sync(mp, MNT_WAIT);
-               hammer2_vfs_sync(mp, MNT_WAIT);
-       } else {
-               hammer2_voldata_unlock(hmp, 0);
-       }
-       if (hmp->pmp_count == 0) {
-               if (hmp->vchain.flags & (HAMMER2_CHAIN_MODIFIED |
-                                        HAMMER2_CHAIN_SUBMODIFIED)) {
-                       kprintf("hammer2_unmount: chains left over after "
-                               "final sync\n");
-                       if (hammer2_debug & 0x0010)
-                               Debugger("entered debugger");
+               /*
+                * If mount initialization proceeded far enough we must flush
+                * its vnodes.
+                */
+               if (pmp->iroot)
+                       error = vflush(mp, 0, flags);
+
+               if (error) {
+                       hammer2_mount_unlock(hmp);
+                       return error;
                }
-       }
 
-       /*
-        * Cleanup the root and super-root chain elements (which should be
-        * clean).
-        */
-       if (pmp->iroot) {
-#if REPORT_REFS_ERRORS
-               if (pmp->iroot->refs != 1)
-                       kprintf("PMP->IROOT %p REFS WRONG %d\n",
-                               pmp->iroot, pmp->iroot->refs);
-#else
-               KKASSERT(pmp->iroot->refs == 1);
-#endif
-               hammer2_inode_drop(pmp->iroot);     /* ref for pmp->iroot */
-               pmp->iroot = NULL;
-       }
-       if (cluster->rchain) {
-               atomic_clear_int(&cluster->rchain->flags,
-                                HAMMER2_CHAIN_MOUNTED);
+               lockmgr(&hammer2_mntlk, LK_EXCLUSIVE);
+               --hmp->pmp_count;
+               kprintf("hammer2_unmount hmp=%p pmpcnt=%d\n",
+                       hmp, hmp->pmp_count);
+
+               /*
+                * Flush any left over chains.  The voldata lock is only used
+                * to synchronize against HAMMER2_CHAIN_MODIFIED_AUX.
+                */
+               hammer2_voldata_lock(hmp);
+               if ((hmp->vchain.flags | hmp->fchain.flags) &
+                   (HAMMER2_CHAIN_MODIFIED | HAMMER2_CHAIN_SUBMODIFIED)) {
+                       hammer2_voldata_unlock(hmp, 0);
+                       hammer2_vfs_sync(mp, MNT_WAIT);
+                       hammer2_vfs_sync(mp, MNT_WAIT);
+               } else {
+                       hammer2_voldata_unlock(hmp, 0);
+               }
+               if (hmp->pmp_count == 0) {
+                       if (hmp->vchain.flags & (HAMMER2_CHAIN_MODIFIED |
+                                                HAMMER2_CHAIN_SUBMODIFIED)) {
+                               kprintf("hammer2_unmount: chains left over "
+                                       "after final sync\n");
+                               if (hammer2_debug & 0x0010)
+                                       Debugger("entered debugger");
+                       }
+               }
+
+               /*
+                * Cleanup the root and super-root chain elements
+                * (which should be clean).
+                */
+               if (pmp->iroot) {
 #if REPORT_REFS_ERRORS
-               if (cluster->rchain->refs != 1)
-                       kprintf("PMP->RCHAIN %p REFS WRONG %d\n",
-                               cluster->rchain, cluster->rchain->refs);
+                       if (pmp->iroot->refs != 1)
+                               kprintf("PMP->IROOT %p REFS WRONG %d\n",
+                                       pmp->iroot, pmp->iroot->refs);
 #else
-               KKASSERT(cluster->rchain->refs == 1);
+                       KKASSERT(pmp->iroot->refs == 1);
 #endif
-               hammer2_chain_drop(cluster->rchain);
-               cluster->rchain = NULL;
-       }
-       ccms_domain_uninit(&pmp->ccms_dom);
-
-       /*
-        * Kill cluster controller
-        */
-       kdmsg_iocom_uninit(&pmp->iocom);
-
-       /*
-        * If no PFS's left drop the master hammer2_mount for the device.
-        */
-       if (hmp->pmp_count == 0) {
-               if (hmp->sroot) {
-                       hammer2_inode_drop(hmp->sroot);
-                       hmp->sroot = NULL;
+                       /* ref for pmp->iroot */
+                       hammer2_inode_drop(pmp->iroot);
+                       pmp->iroot = NULL;
                }
-               if (hmp->schain) {
+
+               rchain = pmp->cluster.chains[i];
+               if (rchain) {
+                       atomic_clear_int(&rchain->flags, HAMMER2_CHAIN_MOUNTED);
 #if REPORT_REFS_ERRORS
-                       if (hmp->schain->refs != 1)
-                               kprintf("HMP->SCHAIN %p REFS WRONG %d\n",
-                                       hmp->schain, hmp->schain->refs);
+                       if (rchain->refs != 1)
+                               kprintf("PMP->RCHAIN %p REFS WRONG %d\n",
+                                       rchain, rchain->refs);
 #else
-                       KKASSERT(hmp->schain->refs == 1);
+                       KKASSERT(rchain->refs == 1);
 #endif
-                       hammer2_chain_drop(hmp->schain);
-                       hmp->schain = NULL;
+                       hammer2_chain_drop(rchain);
+                       pmp->cluster.chains[i] = NULL;
                }
 
                /*
-                * Finish up with the device vnode
+                * If no PFS's left drop the master hammer2_mount for the
+                * device.
                 */
-               if ((devvp = hmp->devvp) != NULL) {
-                       vinvalbuf(devvp, (ronly ? 0 : V_SAVE), 0, 0);
-                       hmp->devvp = NULL;
-                       VOP_CLOSE(devvp, (ronly ? FREAD : FREAD|FWRITE));
-                       vrele(devvp);
-                       devvp = NULL;
-               }
+               if (hmp->pmp_count == 0) {
+                       if (hmp->sroot) {
+                               hammer2_inode_drop(hmp->sroot);
+                               hmp->sroot = NULL;
+                       }
 
-               /*
-                * Final drop of embedded freemap root chain to clean up
-                * fchain.core (fchain structure is not flagged ALLOCATED
-                * so it is cleaned out and then left to rot).
-                */
-               hammer2_chain_drop(&hmp->fchain);
+                       /*
+                        * Finish up with the device vnode
+                        */
+                       if ((devvp = hmp->devvp) != NULL) {
+                               vinvalbuf(devvp, (ronly ? 0 : V_SAVE), 0, 0);
+                               hmp->devvp = NULL;
+                               VOP_CLOSE(devvp,
+                                         (ronly ? FREAD : FREAD|FWRITE));
+                               vrele(devvp);
+                               devvp = NULL;
+                       }
 
-               /*
-                * Final drop of embedded volume root chain to clean up
-                * vchain.core (vchain structure is not flagged ALLOCATED
-                * so it is cleaned out and then left to rot).
-                */
-               dumpcnt = 50;
-               hammer2_dump_chain(&hmp->vchain, 0, &dumpcnt);
-               hammer2_mount_unlock(hmp);
-               hammer2_chain_drop(&hmp->vchain);
-       } else {
-               hammer2_mount_unlock(hmp);
+                       /*
+                        * Final drop of embedded freemap root chain to clean up
+                        * fchain.core (fchain structure is not flagged ALLOCATED
+                        * so it is cleaned out and then left to rot).
+                        */
+                       hammer2_chain_drop(&hmp->fchain);
+
+                       /*
+                        * Final drop of embedded volume root chain to clean up
+                        * vchain.core (vchain structure is not flagged ALLOCATED
+                        * so it is cleaned out and then left to rot).
+                        */
+                       dumpcnt = 50;
+                       hammer2_dump_chain(&hmp->vchain, 0, &dumpcnt);
+                       hammer2_mount_unlock(hmp);
+                       hammer2_chain_drop(&hmp->vchain);
+               } else {
+                       hammer2_mount_unlock(hmp);
+               }
+               if (hmp->pmp_count == 0) {
+                       mtx_lock(&hmp->wthread_mtx);
+                       hmp->wthread_destroy = 1;
+                       wakeup(&hmp->wthread_bioq);
+                       while (hmp->wthread_destroy != -1) {
+                               mtxsleep(&hmp->wthread_destroy,
+                                       &hmp->wthread_mtx, 0,
+                                       "umount-sleep", 0);
+                       }
+                       mtx_unlock(&hmp->wthread_mtx);
+
+                       TAILQ_REMOVE(&hammer2_mntlist, hmp, mntentry);
+                       kmalloc_destroy(&hmp->mchain);
+                       kfree(hmp, M_HAMMER2);
+               }
        }
 
        pmp->mp = NULL;
        mp->mnt_data = NULL;
 
-       pmp->mount_cluster = NULL;
-       pmp->cluster = NULL;            /* XXX */
-
        kmalloc_destroy(&pmp->mmsg);
        kmalloc_destroy(&pmp->minode);
 
-       cluster->hmp = NULL;
-
-       kfree(cluster, M_HAMMER2);
        kfree(pmp, M_HAMMER2);
-       if (hmp->pmp_count == 0) {
-               mtx_lock(&hmp->wthread_mtx);
-               hmp->wthread_destroy = 1;
-               wakeup(&hmp->wthread_bioq);
-               while (hmp->wthread_destroy != -1) {
-                       mtxsleep(&hmp->wthread_destroy, &hmp->wthread_mtx, 0,
-                               "umount-sleep", 0);
-               }
-               mtx_unlock(&hmp->wthread_mtx);
-               
-               TAILQ_REMOVE(&hammer2_mntlist, hmp, mntentry);
-               kmalloc_destroy(&hmp->mchain);
-               kfree(hmp, M_HAMMER2);
-       }
        lockmgr(&hammer2_mntlk, LK_RELEASE);
 
        return (error);
@@ -1572,7 +1569,8 @@ hammer2_vfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred)
        hammer2_mount_t *hmp;
 
        pmp = MPTOPMP(mp);
-       hmp = MPTOHMP(mp);
+       KKASSERT(pmp->cluster.nchains >= 1);
+       hmp = pmp->cluster.chains[0]->hmp;      /* XXX */
 
        mp->mnt_stat.f_files = pmp->inode_count;
        mp->mnt_stat.f_ffree = 0;
@@ -1592,7 +1590,8 @@ hammer2_vfs_statvfs(struct mount *mp, struct statvfs *sbp, struct ucred *cred)
        hammer2_mount_t *hmp;
 
        pmp = MPTOPMP(mp);
-       hmp = MPTOHMP(mp);
+       KKASSERT(pmp->cluster.nchains >= 1);
+       hmp = pmp->cluster.chains[0]->hmp;      /* XXX */
 
        mp->mnt_vstat.f_bsize = HAMMER2_PBUFSIZE;
        mp->mnt_vstat.f_files = pmp->inode_count;
@@ -1627,10 +1626,10 @@ hammer2_vfs_sync(struct mount *mp, int waitfor)
 {
        struct hammer2_sync_info info;
        hammer2_pfsmount_t *pmp;
-       hammer2_cluster_t *cluster;
        hammer2_mount_t *hmp;
        int flags;
        int error;
+       int total_error;
        int i;
 
        pmp = MPTOPMP(mp);
@@ -1669,89 +1668,95 @@ hammer2_vfs_sync(struct mount *mp, int waitfor)
        }
 #endif
 
-       cluster = pmp->cluster;
-       hmp = cluster->hmp;
+       total_error = 0;
+       for (i = 0; i < pmp->cluster.nchains; ++i) {
+               hmp = pmp->cluster.chains[i]->hmp;
 
-       hammer2_chain_lock(&hmp->vchain, HAMMER2_RESOLVE_ALWAYS);
-       if (hmp->vchain.flags & (HAMMER2_CHAIN_MODIFIED |
-                                 HAMMER2_CHAIN_SUBMODIFIED)) {
-               hammer2_chain_flush(&info.trans, &hmp->vchain);
-       }
-       hammer2_chain_unlock(&hmp->vchain);
+               hammer2_chain_lock(&hmp->vchain, HAMMER2_RESOLVE_ALWAYS);
+               if (hmp->vchain.flags & (HAMMER2_CHAIN_MODIFIED |
+                                         HAMMER2_CHAIN_SUBMODIFIED)) {
+                       hammer2_chain_flush(&info.trans, &hmp->vchain);
+               }
+               hammer2_chain_unlock(&hmp->vchain);
 
 #if 1
-       /*
-        * Rollup flush.  The fsyncs above basically just flushed
-        * data blocks.  The flush below gets all the meta-data.
-        */
-       hammer2_chain_lock(&hmp->fchain, HAMMER2_RESOLVE_ALWAYS);
-       if (hmp->fchain.flags & (HAMMER2_CHAIN_MODIFIED |
-                                HAMMER2_CHAIN_SUBMODIFIED)) {
-               /* this will modify vchain as a side effect */
-               hammer2_chain_flush(&info.trans, &hmp->fchain);
-       }
-       hammer2_chain_unlock(&hmp->fchain);
+               /*
+                * Rollup flush.  The fsyncs above basically just flushed
+                * data blocks.  The flush below gets all the meta-data.
+                */
+               hammer2_chain_lock(&hmp->fchain, HAMMER2_RESOLVE_ALWAYS);
+               if (hmp->fchain.flags & (HAMMER2_CHAIN_MODIFIED |
+                                        HAMMER2_CHAIN_SUBMODIFIED)) {
+                       /* this will modify vchain as a side effect */
+                       hammer2_chain_flush(&info.trans, &hmp->fchain);
+               }
+               hammer2_chain_unlock(&hmp->fchain);
 #endif
 
-
-       error = 0;
-
-       /*
-        * We can't safely flush the volume header until we have
-        * flushed any device buffers which have built up.
-        *
-        * XXX this isn't being incremental
-        */
-       vn_lock(hmp->devvp, LK_EXCLUSIVE | LK_RETRY);
-       error = VOP_FSYNC(hmp->devvp, MNT_WAIT, 0);
-       vn_unlock(hmp->devvp);
-
-       /*
-        * The flush code sets CHAIN_VOLUMESYNC to indicate that the
-        * volume header needs synchronization via hmp->volsync.
-        *
-        * XXX synchronize the flag & data with only this flush XXX
-        */
-       if (error == 0 && (hmp->vchain.flags & HAMMER2_CHAIN_VOLUMESYNC)) {
-               struct buf *bp;
+               error = 0;
 
                /*
-                * Synchronize the disk before flushing the volume
-                * header.
+                * We can't safely flush the volume header until we have
+                * flushed any device buffers which have built up.
+                *
+                * XXX this isn't being incremental
                 */
-               bp = getpbuf(NULL);
-               bp->b_bio1.bio_offset = 0;
-               bp->b_bufsize = 0;
-               bp->b_bcount = 0;
-               bp->b_cmd = BUF_CMD_FLUSH;
-               bp->b_bio1.bio_done = biodone_sync;
-               bp->b_bio1.bio_flags |= BIO_SYNC;
-               vn_strategy(hmp->devvp, &bp->b_bio1);
-               biowait(&bp->b_bio1, "h2vol");
-               relpbuf(bp, NULL);
+               vn_lock(hmp->devvp, LK_EXCLUSIVE | LK_RETRY);
+               error = VOP_FSYNC(hmp->devvp, MNT_WAIT, 0);
+               vn_unlock(hmp->devvp);
 
                /*
-                * Then we can safely flush the version of the volume header
-                * synchronized by the flush code.
+                * The flush code sets CHAIN_VOLUMESYNC to indicate that the
+                * volume header needs synchronization via hmp->volsync.
+                *
+                * XXX synchronize the flag & data with only this flush XXX
                 */
-               i = hmp->volhdrno + 1;
-               if (i >= HAMMER2_NUM_VOLHDRS)
-                       i = 0;
-               if (i * HAMMER2_ZONE_BYTES64 + HAMMER2_SEGSIZE >
-                   hmp->volsync.volu_size) {
-                       i = 0;
+               if (error == 0 &&
+                   (hmp->vchain.flags & HAMMER2_CHAIN_VOLUMESYNC)) {
+                       struct buf *bp;
+
+                       /*
+                        * Synchronize the disk before flushing the volume
+                        * header.
+                        */
+                       bp = getpbuf(NULL);
+                       bp->b_bio1.bio_offset = 0;
+                       bp->b_bufsize = 0;
+                       bp->b_bcount = 0;
+                       bp->b_cmd = BUF_CMD_FLUSH;
+                       bp->b_bio1.bio_done = biodone_sync;
+                       bp->b_bio1.bio_flags |= BIO_SYNC;
+                       vn_strategy(hmp->devvp, &bp->b_bio1);
+                       biowait(&bp->b_bio1, "h2vol");
+                       relpbuf(bp, NULL);
+
+                       /*
+                        * Then we can safely flush the version of the
+                        * volume header synchronized by the flush code.
+                        */
+                       i = hmp->volhdrno + 1;
+                       if (i >= HAMMER2_NUM_VOLHDRS)
+                               i = 0;
+                       if (i * HAMMER2_ZONE_BYTES64 + HAMMER2_SEGSIZE >
+                           hmp->volsync.volu_size) {
+                               i = 0;
+                       }
+                       kprintf("sync volhdr %d %jd\n",
+                               i, (intmax_t)hmp->volsync.volu_size);
+                       bp = getblk(hmp->devvp, i * HAMMER2_ZONE_BYTES64,
+                                   HAMMER2_PBUFSIZE, 0, 0);
+                       atomic_clear_int(&hmp->vchain.flags,
+                                        HAMMER2_CHAIN_VOLUMESYNC);
+                       bcopy(&hmp->volsync, bp->b_data, HAMMER2_PBUFSIZE);
+                       bawrite(bp);
+                       hmp->volhdrno = i;
                }
-               kprintf("sync volhdr %d %jd\n",
-                       i, (intmax_t)hmp->volsync.volu_size);
-               bp = getblk(hmp->devvp, i * HAMMER2_ZONE_BYTES64,
-                           HAMMER2_PBUFSIZE, 0, 0);
-               atomic_clear_int(&hmp->vchain.flags, HAMMER2_CHAIN_VOLUMESYNC);
-               bcopy(&hmp->volsync, bp->b_data, HAMMER2_PBUFSIZE);
-               bawrite(bp);
-               hmp->volhdrno = i;
+               if (error)
+                       total_error = error;
        }
+
        hammer2_trans_done(&info.trans);
-       return (error);
+       return (total_error);
 }
 
 /*
@@ -1947,7 +1952,7 @@ hammer2_cluster_reconnect(hammer2_pfsmount_t *pmp, struct file *fp)
        hammer2_mount_t *hmp;
        size_t name_len;
 
-       hmp = pmp->mount_cluster->hmp;
+       hmp = pmp->cluster.chains[0]->hmp;      /* XXX */
 
        /*
         * Closes old comm descriptor, kills threads, cleans up
@@ -2060,7 +2065,7 @@ static void
 hammer2_autodmsg(kdmsg_msg_t *msg)
 {
        hammer2_pfsmount_t *pmp = msg->iocom->handle;
-       hammer2_mount_t *hmp = pmp->mount_cluster->hmp;
+       hammer2_mount_t *hmp = pmp->cluster.chains[0]->hmp; /* XXX */
        int copyid;
 
        /*
@@ -2103,7 +2108,7 @@ hammer2_autodmsg(kdmsg_msg_t *msg)
 void
 hammer2_volconf_update(hammer2_pfsmount_t *pmp, int index)
 {
-       hammer2_mount_t *hmp = pmp->mount_cluster->hmp;
+       hammer2_mount_t *hmp = pmp->cluster.chains[0]->hmp;     /* XXX */
        kdmsg_msg_t *msg;
 
        /* XXX interlock against connection state termination */
index c06b57e..c53fd41 100644 (file)
@@ -2139,7 +2139,7 @@ hammer2_strategy_write(struct vop_strategy_args *ap)
        bio = ap->a_bio;
        bp = bio->bio_buf;
        ip = VTOI(ap->a_vp);
-       hmp = ip->pmp->mount_cluster->hmp;
+       hmp = ip->pmp->cluster.chains[0]->hmp;
        
        mtx_lock(&hmp->wthread_mtx);
        bioq_insert_tail(&hmp->wthread_bioq, ap->a_bio);