hammer2 - Starting refactoring PFS management in mount
authorMatthew Dillon <dillon@apollo.backplane.com>
Sat, 28 Mar 2015 05:51:17 +0000 (22:51 -0700)
committerMatthew Dillon <dillon@apollo.backplane.com>
Sat, 28 Mar 2015 06:58:37 +0000 (23:58 -0700)
* Start removing single-device shims.

* Adjust data structures.  Rename hammer2_mount to hammer2_dev and
  rename hammer2_pfsmount to hammer2_pfs.  Refactor unmount.

* Integrate all available PFSs when a block device is mounted and
  De-integrate related PFSs when a block device is unmounted.

  At least one PFS (typically @LOCAL) must be mounted from a HAMMER2 block
  device for that device's PFSs to be used.  At least for now.  We could
  eventually trigger auto-mounting via the probe code.

* Add a synchronization thread abstraction for a PFS.

* Normalize chain->pmp to NULL for any chain which is part of the super-root
  topology, for later sanity assertions.

* The ioctl to create a PFS now adds it (delete and snapshot do not, yet).

14 files changed:
sys/vfs/hammer2/hammer2.h
sys/vfs/hammer2/hammer2_bulkscan.c
sys/vfs/hammer2/hammer2_chain.c
sys/vfs/hammer2/hammer2_cluster.c
sys/vfs/hammer2/hammer2_disk.h
sys/vfs/hammer2/hammer2_flush.c
sys/vfs/hammer2/hammer2_freemap.c
sys/vfs/hammer2/hammer2_inode.c
sys/vfs/hammer2/hammer2_io.c
sys/vfs/hammer2/hammer2_iocom.c
sys/vfs/hammer2/hammer2_ioctl.c
sys/vfs/hammer2/hammer2_subr.c
sys/vfs/hammer2/hammer2_vfsops.c
sys/vfs/hammer2/hammer2_vnops.c

index b75e27a..6ab855d 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011-2014 The DragonFly Project.  All rights reserved.
+ * Copyright (c) 2011-2015 The DragonFly Project.  All rights reserved.
  *
  * This code is derived from software contributed to The DragonFly Project
  * by Matthew Dillon <dillon@dragonflybsd.org>
@@ -98,8 +98,8 @@ struct hammer2_iocb;
 struct hammer2_chain;
 struct hammer2_cluster;
 struct hammer2_inode;
-struct hammer2_mount;
-struct hammer2_pfsmount;
+struct hammer2_dev;
+struct hammer2_pfs;
 struct hammer2_span;
 struct hammer2_state;
 struct hammer2_msg;
@@ -297,7 +297,7 @@ struct hammer2_io {
        RB_ENTRY(hammer2_io) rbnode;    /* indexed by device offset */
        struct h2_iocb_list iocbq;
        struct spinlock spin;
-       struct hammer2_mount *hmp;
+       struct hammer2_dev *hmp;
        struct buf      *bp;
        off_t           pbase;
        int             psize;
@@ -323,8 +323,8 @@ struct hammer2_chain {
        hammer2_blockref_t      bref;
        struct hammer2_chain    *parent;
        struct hammer2_state    *state;         /* if active cache msg */
-       struct hammer2_mount    *hmp;
-       struct hammer2_pfsmount *pmp;           /* (pfs-cluster pmp or spmp) */
+       struct hammer2_dev      *hmp;
+       struct hammer2_pfs      *pmp;           /* A PFS or super-root (spmp) */
 
        hammer2_xid_t   flush_xid;              /* flush sequencing */
        hammer2_key_t   data_count;             /* delta's to apply */
@@ -373,7 +373,7 @@ RB_PROTOTYPE(hammer2_chain_tree, hammer2_chain, rbnode, hammer2_chain_cmp);
 #define HAMMER2_CHAIN_UNUSED00000400   0x00000400
 #define HAMMER2_CHAIN_VOLUMESYNC       0x00000800      /* needs volume sync */
 #define HAMMER2_CHAIN_UNUSED00001000   0x00001000
-#define HAMMER2_CHAIN_MOUNTED          0x00002000      /* PFS is mounted */
+#define HAMMER2_CHAIN_UNUSED00002000   0x00002000
 #define HAMMER2_CHAIN_ONRBTREE         0x00004000      /* on parent RB tree */
 #define HAMMER2_CHAIN_SNAPSHOT         0x00008000      /* snapshot special */
 #define HAMMER2_CHAIN_EMBEDDED         0x00010000      /* embedded data */
@@ -431,6 +431,7 @@ RB_PROTOTYPE(hammer2_chain_tree, hammer2_chain, rbnode, hammer2_chain_cmp);
 #define HAMMER2_DELETE_NOSTATS         0x0002
 
 #define HAMMER2_INSERT_NOSTATS         0x0002
+#define HAMMER2_INSERT_PFSROOT         0x0004
 
 /*
  * Flags passed to hammer2_chain_delete_duplicate()
@@ -500,7 +501,7 @@ typedef struct hammer2_cluster_item hammer2_cluster_item_t;
 struct hammer2_cluster {
        int                     status;         /* operational status */
        int                     refs;           /* track for deallocation */
-       struct hammer2_pfsmount *pmp;
+       struct hammer2_pfs      *pmp;
        uint32_t                flags;
        int                     nchains;
        hammer2_iocb_t          iocb;
@@ -526,7 +527,7 @@ RB_HEAD(hammer2_inode_tree, hammer2_inode);
 struct hammer2_inode {
        RB_ENTRY(hammer2_inode) rbnode;         /* inumber lookup (HL) */
        hammer2_mtx_t           lock;           /* inode lock */
-       struct hammer2_pfsmount *pmp;           /* PFS mount */
+       struct hammer2_pfs      *pmp;           /* PFS mount */
        struct hammer2_inode    *pip;           /* parent inode */
        struct vnode            *vp;
        hammer2_cluster_t       cluster;
@@ -563,10 +564,38 @@ TAILQ_HEAD(h2_unlk_list, hammer2_inode_unlink);
 
 typedef struct hammer2_inode_unlink hammer2_inode_unlink_t;
 
+/*
+ * Cluster node synchronization thread element.
+ *
+ * Multiple syncthr's can hang off of a hammer2_pfs structure, typically one
+ * for each block device that is part of the PFS.  Synchronization threads
+ * for PFSs accessed over the network are handled by their respective hosts.
+ *
+ * Synchronization threads are responsible for keeping a local node
+ * synchronized to the greater cluster.
+ *
+ * A syncthr can also hang off each hammer2_dev's super-root PFS (spmp).
+ * This thread is responsible for automatic bulkfree and dedup scans.
+ */
+struct hammer2_syncthr {
+       TAILQ_ENTRY(hammer2_syncthr) entry;
+       hammer2_inode_t *iroot;
+       struct hammer2_pfs *pfs;
+       kdmsg_state_t   *span;
+       thread_t        td;
+       uint32_t        flags;
+};
+TAILQ_HEAD(h2_syncthr_list, hammer2_syncthr);
+
+#define HAMMER2_SYNCTHR_UNMOUNTING     0x0001  /* unmount request */
+#define HAMMER2_SYNCTHR_DEV            0x0002  /* related to dev, not pfs */
+#define HAMMER2_SYNCTHR_SPANNED                0x0004  /* LNK_SPAN active */
+
+
 /*
  * A hammer2 transaction and flush sequencing structure.
  *
- * This global structure is tied into hammer2_mount and is used
+ * This global structure is tied into hammer2_dev and is used
  * to sequence modifying operations and flushes.
  *
  * (a) Any modifying operations with sync_tid >= flush_tid will stall until
@@ -607,7 +636,7 @@ typedef struct hammer2_inode_unlink hammer2_inode_unlink_t;
  */
 struct hammer2_trans {
        TAILQ_ENTRY(hammer2_trans) entry;
-       struct hammer2_pfsmount *pmp;
+       struct hammer2_pfs      *pmp;
        hammer2_xid_t           sync_xid;
        hammer2_tid_t           inode_tid;      /* inode number assignment */
        thread_t                td;             /* pointer */
@@ -647,13 +676,21 @@ struct hammer2_trans_manage {
 typedef struct hammer2_trans_manage hammer2_trans_manage_t;
 
 /*
- * Global (per device) mount structure for device (aka vp->v_mount->hmp)
+ * Global (per partition) management structure, represents a hard block
+ * device.  Typically referenced by hammer2_chain structures when applicable.
+ * Typically not used for network-managed elements.
+ *
+ * Note that a single hammer2_dev can be indirectly tied to multiple system
+ * mount points.  There is no direct relationship.  System mounts are
+ * per-cluster-id, not per-block-device, and a single hard mount might contain
+ * many PFSs and those PFSs might combine together in various ways to form
+ * the set of available clusters.
  */
-struct hammer2_mount {
+struct hammer2_dev {
        struct vnode    *devvp;         /* device vnode */
        int             ronly;          /* read-only mount */
-       int             pmp_count;      /* PFS mounts backed by us */
-       TAILQ_ENTRY(hammer2_mount) mntentry; /* hammer2_mntlist */
+       int             pmp_count;      /* number of actively mounted PFSs */
+       TAILQ_ENTRY(hammer2_dev) mntentry; /* hammer2_mntlist */
 
        struct malloc_type *mchain;
        int             nipstacks;
@@ -666,7 +703,7 @@ struct hammer2_mount {
        hammer2_chain_t fchain;         /* anchor chain (freemap) */
        struct spinlock list_spin;
        struct h2_flush_list    flushq; /* flush seeds */
-       struct hammer2_pfsmount *spmp;  /* super-root pmp for transactions */
+       struct hammer2_pfs *spmp;       /* super-root pmp for transactions */
        struct lock     vollk;          /* lockmgr lock */
        hammer2_off_t   heur_freemap[HAMMER2_FREEMAP_HEUR];
        int             volhdrno;       /* last volhdrno written */
@@ -674,57 +711,47 @@ struct hammer2_mount {
        hammer2_volume_data_t volsync;  /* synchronized voldata */
 };
 
-typedef struct hammer2_mount hammer2_mount_t;
+typedef struct hammer2_dev hammer2_dev_t;
 
 /*
- * HAMMER2 PFS mount point structure (aka vp->v_mount->mnt_data).
- * This has a 1:1 correspondence to struct mount (note that the
- * hammer2_mount structure has a N:1 correspondence).
- *
- * This structure represents a cluster mount and not necessarily a
- * PFS under a specific device mount (HMP).  The distinction is important
- * because the elements backing a cluster mount can change on the fly.
- *
- * pfs_mode and pfs_nmasters critically describes how a HAMMER2 filesytem
- * mount should operate.  pfs_nmasters indicates how many master PFSs
- * exist for the filesystem (whether available or not).  pfs_mode is
- * a bitmask:
+ * Per-cluster management structure.  This structure will be tied to a
+ * system mount point if the system is mounting the PFS, but is also used
+ * to manage clusters encountered during the super-root scan or received
+ * via LNK_SPANs that might not be mounted.
  *
- *     XXX this should be automatic based on the 'primary' mount.. based on
- *     which target you are mounting.
+ * This structure is also used to represent the super-root that hangs off
+ * of a hard mount point.  The super-root is not really a cluster element.
+ * In this case the spmp_hmp field will be non-NULL.  It's just easier to do
+ * this than to special case super-root manipulation in the hammer2_chain*
+ * code as being only hammer2_dev-related.
  *
- *     HAMMER2_PFSMODE_QUORUM  - Validate against quorum of masters,
- *                               else operate unsynchronized.
+ * pfs_mode and pfs_nmasters are rollup fields which critically describes
+ * how elements of the cluster act on the cluster.  pfs_mode is only applicable
+ * when a PFS is mounted by the system.  pfs_nmasters is our best guess as to
+ * how many masters have been configured for a cluster and is always
+ * applicable.
  *
- *     HAMMER2_PFSMODE_RW      - Allow writing to the cluster,
- *                               else do not allow.
+ * WARNING! Portions of this structure have deferred initialization.  In
+ *         particular, if not mounted there will be no ihidden or wthread.
+ *         umounted network PFSs will also be missing iroot and numerous
+ *         other fields will not be initialized prior to mount.
  *
- *     When operating in quorum mode modifying operations flow into
- *     a quorum+ of masters and all other local PFS types are synchronized
- *     in the background.  Other PFS types will be used to improve or avoid
- *     network I/O only if they agree with a quorum of masters.
+ *         Synchronization threads are chain-specific and only applicable
+ *         to local hard PFS entries.  A hammer2_pfs structure may contain
+ *         more than one when multiple hard PFSs are present on the local
+ *         machine which require synchronization monitoring.  Most PFSs
+ *         (such as snapshots) are 1xMASTER PFSs which do not need a
+ *         synchronization thread.
  *
- *     When not operating in quorum mode modifying operations may only flow
- *     into a SOFT_MASTER and will be synchronized with the quorum in the
- *     background, and will not be cache-coherent with the quorum.  Think
- *     laptop-on-the-road.  Other PFS types will be used to improve or avoid
- *     network I/O only if they agree with the SOFT_MASTER.
- *
- *     When not operating in quorum mode a read-only mount can be used to
- *     access a particular PFS unsynchronized.
- *
- * Usually the first element under the cluster represents the original
- * user-requested mount that bootstraps the whole mess.  In significant
- * setups the original is usually just a read-only media image (or
- * representitive file) that simply contains a bootstrap volume header
- * listing the configuration.
+ * WARNING! The chains making up pfs->iroot's cluster are accounted for in
+ *         hammer2_dev->pmp_count when the pfs is associated with a mount
+ *         point.
  */
-struct hammer2_pfsmount {
+struct hammer2_pfs {
        struct mount            *mp;
-       TAILQ_ENTRY(hammer2_pfsmount) mntentry; /* hammer2_pfslist */
+       TAILQ_ENTRY(hammer2_pfs) mntentry;      /* hammer2_pfslist */
        uuid_t                  pfs_clid;
-       uuid_t                  pfs_fsid;
-       hammer2_mount_t         *spmp_hmp;      /* (spmp only) */
+       hammer2_dev_t           *spmp_hmp;      /* only if super-root pmp */
        hammer2_inode_t         *iroot;         /* PFS root inode */
        hammer2_inode_t         *ihidden;       /* PFS hidden directory */
        struct lock             lock;           /* PFS lock for certain ops */
@@ -748,13 +775,14 @@ struct hammer2_pfsmount {
        int                     count_lwinprog; /* logical write in prog */
        struct spinlock         list_spin;
        struct h2_unlk_list     unlinkq;        /* last-close unlink */
+       struct h2_syncthr_list  syncthrq;       /* synchronization threads */
        thread_t                wthread_td;     /* write thread td */
        struct bio_queue_head   wthread_bioq;   /* logical buffer bioq */
        hammer2_mtx_t           wthread_mtx;    /* interlock */
        int                     wthread_destroy;/* termination sequencing */
 };
 
-typedef struct hammer2_pfsmount hammer2_pfsmount_t;
+typedef struct hammer2_pfs hammer2_pfs_t;
 
 #define HAMMER2_DIRTYCHAIN_WAITING     0x80000000
 #define HAMMER2_DIRTYCHAIN_MASK                0x7FFFFFFF
@@ -819,10 +847,10 @@ hammer2_devblksize(size_t bytes)
 
 
 static __inline
-hammer2_pfsmount_t *
+hammer2_pfs_t *
 MPTOPMP(struct mount *mp)
 {
-       return ((hammer2_pfsmount_t *)mp->mnt_data);
+       return ((hammer2_pfs_t *)mp->mnt_data);
 }
 
 #define LOCKSTART      int __nlocks = curthread->td_locks
@@ -886,9 +914,9 @@ void hammer2_inode_lock_temp_restore(hammer2_inode_t *ip,
 int hammer2_inode_lock_upgrade(hammer2_inode_t *ip);
 void hammer2_inode_lock_downgrade(hammer2_inode_t *ip, int);
 
-void hammer2_mount_exlock(hammer2_mount_t *hmp);
-void hammer2_mount_shlock(hammer2_mount_t *hmp);
-void hammer2_mount_unlock(hammer2_mount_t *hmp);
+void hammer2_dev_exlock(hammer2_dev_t *hmp);
+void hammer2_dev_shlock(hammer2_dev_t *hmp);
+void hammer2_dev_unlock(hammer2_dev_t *hmp);
 
 int hammer2_get_dtype(const hammer2_inode_data_t *ipdata);
 int hammer2_get_vtype(const hammer2_inode_data_t *ipdata);
@@ -897,7 +925,7 @@ void hammer2_time_to_timespec(u_int64_t xtime, struct timespec *ts);
 u_int64_t hammer2_timespec_to_time(const struct timespec *ts);
 u_int32_t hammer2_to_unix_xid(const uuid_t *uuid);
 void hammer2_guid_to_uuid(uuid_t *uuid, u_int32_t guid);
-hammer2_xid_t hammer2_trans_newxid(hammer2_pfsmount_t *pmp);
+hammer2_xid_t hammer2_trans_newxid(hammer2_pfs_t *pmp);
 void hammer2_trans_manage_init(void);
 
 hammer2_key_t hammer2_dirhash(const unsigned char *name, size_t len);
@@ -918,22 +946,23 @@ struct vnode *hammer2_igetv(hammer2_inode_t *ip, hammer2_cluster_t *cparent,
                        int *errorp);
 void hammer2_inode_lock_nlinks(hammer2_inode_t *ip);
 void hammer2_inode_unlock_nlinks(hammer2_inode_t *ip);
-hammer2_inode_t *hammer2_inode_lookup(hammer2_pfsmount_t *pmp,
+hammer2_inode_t *hammer2_inode_lookup(hammer2_pfs_t *pmp,
                        hammer2_tid_t inum);
-hammer2_inode_t *hammer2_inode_get(hammer2_pfsmount_t *pmp,
+hammer2_inode_t *hammer2_inode_get(hammer2_pfs_t *pmp,
                        hammer2_inode_t *dip, hammer2_cluster_t *cluster);
 void hammer2_inode_free(hammer2_inode_t *ip);
 void hammer2_inode_ref(hammer2_inode_t *ip);
 void hammer2_inode_drop(hammer2_inode_t *ip);
 void hammer2_inode_repoint(hammer2_inode_t *ip, hammer2_inode_t *pip,
                        hammer2_cluster_t *cluster);
-void hammer2_run_unlinkq(hammer2_trans_t *trans, hammer2_pfsmount_t *pmp);
+void hammer2_run_unlinkq(hammer2_trans_t *trans, hammer2_pfs_t *pmp);
 
 hammer2_inode_t *hammer2_inode_create(hammer2_trans_t *trans,
                        hammer2_inode_t *dip,
                        struct vattr *vap, struct ucred *cred,
                        const uint8_t *name, size_t name_len,
-                       hammer2_cluster_t **clusterp, int *errorp);
+                       hammer2_cluster_t **clusterp,
+                       int flags, int *errorp);
 int hammer2_inode_connect(hammer2_trans_t *trans,
                        hammer2_cluster_t **clusterp, int hlink,
                        hammer2_inode_t *dip, hammer2_cluster_t *dcluster,
@@ -956,16 +985,16 @@ int hammer2_hardlink_find(hammer2_inode_t *dip, hammer2_cluster_t **cparentp,
                        hammer2_cluster_t *cluster);
 int hammer2_parent_find(hammer2_cluster_t **cparentp,
                        hammer2_cluster_t *cluster);
-void hammer2_inode_install_hidden(hammer2_pfsmount_t *pmp);
+void hammer2_inode_install_hidden(hammer2_pfs_t *pmp);
 
 /*
  * hammer2_chain.c
  */
-void hammer2_voldata_lock(hammer2_mount_t *hmp);
-void hammer2_voldata_unlock(hammer2_mount_t *hmp);
-void hammer2_voldata_modify(hammer2_mount_t *hmp);
-hammer2_chain_t *hammer2_chain_alloc(hammer2_mount_t *hmp,
-                               hammer2_pfsmount_t *pmp,
+void hammer2_voldata_lock(hammer2_dev_t *hmp);
+void hammer2_voldata_unlock(hammer2_dev_t *hmp);
+void hammer2_voldata_modify(hammer2_dev_t *hmp);
+hammer2_chain_t *hammer2_chain_alloc(hammer2_dev_t *hmp,
+                               hammer2_pfs_t *pmp,
                                hammer2_trans_t *trans,
                                hammer2_blockref_t *bref);
 void hammer2_chain_core_alloc(hammer2_trans_t *trans, hammer2_chain_t *chain);
@@ -1010,7 +1039,7 @@ hammer2_chain_t *hammer2_chain_scan(hammer2_chain_t *parent,
 
 int hammer2_chain_create(hammer2_trans_t *trans, hammer2_chain_t **parentp,
                                hammer2_chain_t **chainp,
-                               hammer2_pfsmount_t *pmp,
+                               hammer2_pfs_t *pmp,
                                hammer2_key_t key, int keybits,
                                int type, size_t bytes, int flags);
 void hammer2_chain_rename(hammer2_trans_t *trans, hammer2_blockref_t *bref,
@@ -1032,9 +1061,9 @@ void hammer2_chain_setcheck(hammer2_chain_t *chain, void *bdata);
 int hammer2_chain_testcheck(hammer2_chain_t *chain, void *bdata);
 
 
-void hammer2_pfs_memory_wait(hammer2_pfsmount_t *pmp);
-void hammer2_pfs_memory_inc(hammer2_pfsmount_t *pmp);
-void hammer2_pfs_memory_wakeup(hammer2_pfsmount_t *pmp);
+void hammer2_pfs_memory_wait(hammer2_pfs_t *pmp);
+void hammer2_pfs_memory_inc(hammer2_pfs_t *pmp);
+void hammer2_pfs_memory_wakeup(hammer2_pfs_t *pmp);
 
 void hammer2_base_delete(hammer2_trans_t *trans, hammer2_chain_t *chain,
                                hammer2_blockref_t *base, int count,
@@ -1046,9 +1075,9 @@ void hammer2_base_insert(hammer2_trans_t *trans, hammer2_chain_t *chain,
 /*
  * hammer2_trans.c
  */
-void hammer2_trans_init(hammer2_trans_t *trans, hammer2_pfsmount_t *pmp,
+void hammer2_trans_init(hammer2_trans_t *trans, hammer2_pfs_t *pmp,
                                int flags);
-void hammer2_trans_spmp(hammer2_trans_t *trans, hammer2_pfsmount_t *pmp);
+void hammer2_trans_spmp(hammer2_trans_t *trans, hammer2_pfs_t *pmp);
 void hammer2_trans_done(hammer2_trans_t *trans);
 
 /*
@@ -1061,20 +1090,20 @@ int hammer2_ioctl(hammer2_inode_t *ip, u_long com, void *data,
  * hammer2_io.c
  */
 void hammer2_io_putblk(hammer2_io_t **diop);
-void hammer2_io_cleanup(hammer2_mount_t *hmp, struct hammer2_io_tree *tree);
+void hammer2_io_cleanup(hammer2_dev_t *hmp, struct hammer2_io_tree *tree);
 char *hammer2_io_data(hammer2_io_t *dio, off_t lbase);
-void hammer2_io_getblk(hammer2_mount_t *hmp, off_t lbase, int lsize,
+void hammer2_io_getblk(hammer2_dev_t *hmp, off_t lbase, int lsize,
                                hammer2_iocb_t *iocb);
 void hammer2_io_complete(hammer2_iocb_t *iocb);
 void hammer2_io_callback(struct bio *bio);
 void hammer2_iocb_wait(hammer2_iocb_t *iocb);
-int hammer2_io_new(hammer2_mount_t *hmp, off_t lbase, int lsize,
+int hammer2_io_new(hammer2_dev_t *hmp, off_t lbase, int lsize,
                                hammer2_io_t **diop);
-int hammer2_io_newnz(hammer2_mount_t *hmp, off_t lbase, int lsize,
+int hammer2_io_newnz(hammer2_dev_t *hmp, off_t lbase, int lsize,
                                hammer2_io_t **diop);
-int hammer2_io_newq(hammer2_mount_t *hmp, off_t lbase, int lsize,
+int hammer2_io_newq(hammer2_dev_t *hmp, off_t lbase, int lsize,
                                hammer2_io_t **diop);
-int hammer2_io_bread(hammer2_mount_t *hmp, off_t lbase, int lsize,
+int hammer2_io_bread(hammer2_dev_t *hmp, off_t lbase, int lsize,
                                hammer2_io_t **diop);
 void hammer2_io_bawrite(hammer2_io_t **diop);
 void hammer2_io_bdwrite(hammer2_io_t **diop);
@@ -1095,20 +1124,24 @@ int hammer2_msg_adhoc_input(kdmsg_msg_t *msg);
  * hammer2_vfsops.c
  */
 void hammer2_clusterctl_wakeup(kdmsg_iocom_t *iocom);
-void hammer2_volconf_update(hammer2_mount_t *hmp, int index);
+void hammer2_volconf_update(hammer2_dev_t *hmp, int index);
 void hammer2_dump_chain(hammer2_chain_t *chain, int tab, int *countp, char pfx);
-void hammer2_bioq_sync(hammer2_pfsmount_t *pmp);
+void hammer2_bioq_sync(hammer2_pfs_t *pmp);
 int hammer2_vfs_sync(struct mount *mp, int waitflags);
-void hammer2_lwinprog_ref(hammer2_pfsmount_t *pmp);
-void hammer2_lwinprog_drop(hammer2_pfsmount_t *pmp);
-void hammer2_lwinprog_wait(hammer2_pfsmount_t *pmp);
+hammer2_pfs_t *hammer2_pfsalloc(hammer2_cluster_t *cluster,
+                               const hammer2_inode_data_t *ripdata,
+                               hammer2_tid_t alloc_tid);
+
+void hammer2_lwinprog_ref(hammer2_pfs_t *pmp);
+void hammer2_lwinprog_drop(hammer2_pfs_t *pmp);
+void hammer2_lwinprog_wait(hammer2_pfs_t *pmp);
 
 /*
  * hammer2_freemap.c
  */
 int hammer2_freemap_alloc(hammer2_trans_t *trans, hammer2_chain_t *chain,
                                size_t bytes);
-void hammer2_freemap_adjust(hammer2_trans_t *trans, hammer2_mount_t *hmp,
+void hammer2_freemap_adjust(hammer2_trans_t *trans, hammer2_dev_t *hmp,
                                hammer2_blockref_t *bref, int how);
 
 /*
@@ -1128,7 +1161,7 @@ void hammer2_cluster_setflush(hammer2_trans_t *trans,
                        hammer2_cluster_t *cluster);
 void hammer2_cluster_setmethod_check(hammer2_trans_t *trans,
                        hammer2_cluster_t *cluster, int check_algo);
-hammer2_cluster_t *hammer2_cluster_alloc(hammer2_pfsmount_t *pmp,
+hammer2_cluster_t *hammer2_cluster_alloc(hammer2_pfs_t *pmp,
                        hammer2_trans_t *trans,
                        hammer2_blockref_t *bref);
 void hammer2_cluster_ref(hammer2_cluster_t *cluster);
@@ -1179,15 +1212,15 @@ hammer2_cluster_t *hammer2_cluster_parent(hammer2_cluster_t *cluster);
 int hammer2_bulk_scan(hammer2_trans_t *trans, hammer2_chain_t *parent,
                        int (*func)(hammer2_chain_t *chain, void *info),
                        void *info);
-int hammer2_bulkfree_pass(hammer2_mount_t *hmp,
+int hammer2_bulkfree_pass(hammer2_dev_t *hmp,
                        struct hammer2_ioc_bulkfree *bfi);
 
 /*
  * hammer2_iocom.c
  */
-void hammer2_iocom_init(hammer2_mount_t *hmp);
-void hammer2_iocom_uninit(hammer2_mount_t *hmp);
-void hammer2_cluster_reconnect(hammer2_mount_t *hmp, struct file *fp);
+void hammer2_iocom_init(hammer2_dev_t *hmp);
+void hammer2_iocom_uninit(hammer2_dev_t *hmp);
+void hammer2_cluster_reconnect(hammer2_dev_t *hmp, struct file *fp);
 
 #endif /* !_KERNEL */
 #endif /* !_VFS_HAMMER2_HAMMER2_H_ */
index c9af4a4..ed7694b 100644 (file)
@@ -189,7 +189,7 @@ hammer2_bulk_scan(hammer2_trans_t *trans, hammer2_chain_t *parent,
  * Bulkfree callback info
  */
 typedef struct hammer2_bulkfree_info {
-       hammer2_mount_t         *hmp;
+       hammer2_dev_t           *hmp;
        hammer2_trans_t         *trans;
        kmem_anon_desc_t        kp;
        hammer2_off_t           sbase;          /* sub-loop iteration */
@@ -210,7 +210,7 @@ static void h2_bulkfree_sync_adjust(hammer2_bulkfree_info_t *cbinfo,
                        hammer2_bmap_data_t *live, hammer2_bmap_data_t *bmap);
 
 int
-hammer2_bulkfree_pass(hammer2_mount_t *hmp, hammer2_ioc_bulkfree_t *bfi)
+hammer2_bulkfree_pass(hammer2_dev_t *hmp, hammer2_ioc_bulkfree_t *bfi)
 {
        hammer2_trans_t trans;
        hammer2_bulkfree_info_t cbinfo;
index 4e28a95..88dc23b 100644 (file)
@@ -165,7 +165,7 @@ hammer2_chain_setflush(hammer2_trans_t *trans, hammer2_chain_t *chain)
  * NOTE: Returns a referenced but unlocked (because there is no core) chain.
  */
 hammer2_chain_t *
-hammer2_chain_alloc(hammer2_mount_t *hmp, hammer2_pfsmount_t *pmp,
+hammer2_chain_alloc(hammer2_dev_t *hmp, hammer2_pfs_t *pmp,
                    hammer2_trans_t *trans, hammer2_blockref_t *bref)
 {
        hammer2_chain_t *chain;
@@ -200,7 +200,10 @@ hammer2_chain_alloc(hammer2_mount_t *hmp, hammer2_pfsmount_t *pmp,
        /*
         * Initialize the new chain structure.
         */
-       chain->pmp = pmp;
+       if (pmp == hmp->spmp)
+               chain->pmp = NULL;
+       else
+               chain->pmp = pmp;
        chain->hmp = hmp;
        chain->bref = *bref;
        chain->bytes = bytes;
@@ -361,8 +364,8 @@ static
 hammer2_chain_t *
 hammer2_chain_lastdrop(hammer2_chain_t *chain)
 {
-       hammer2_pfsmount_t *pmp;
-       hammer2_mount_t *hmp;
+       hammer2_pfs_t *pmp;
+       hammer2_dev_t *hmp;
        hammer2_chain_t *parent;
        hammer2_chain_t *rdrop;
 
@@ -493,7 +496,7 @@ hammer2_chain_lastdrop(hammer2_chain_t *chain)
 static void
 hammer2_chain_drop_data(hammer2_chain_t *chain, int lastdrop)
 {
-       /*hammer2_mount_t *hmp = chain->hmp;*/
+       /*hammer2_dev_t *hmp = chain->hmp;*/
 
        switch(chain->bref.type) {
        case HAMMER2_BREF_TYPE_VOLUME:
@@ -554,7 +557,7 @@ hammer2_chain_drop_data(hammer2_chain_t *chain, int lastdrop)
 int
 hammer2_chain_lock(hammer2_chain_t *chain, int how)
 {
-       hammer2_mount_t *hmp;
+       hammer2_dev_t *hmp;
        hammer2_blockref_t *bref;
        hammer2_mtx_state_t ostate;
        char *bdata;
@@ -935,7 +938,7 @@ hammer2_chain_resize(hammer2_trans_t *trans, hammer2_inode_t *ip,
                     hammer2_chain_t *parent, hammer2_chain_t *chain,
                     int nradix, int flags)
 {
-       hammer2_mount_t *hmp;
+       hammer2_dev_t *hmp;
        size_t obytes;
        size_t nbytes;
 
@@ -995,7 +998,7 @@ void
 hammer2_chain_modify(hammer2_trans_t *trans, hammer2_chain_t *chain, int flags)
 {
        hammer2_blockref_t obref;
-       hammer2_mount_t *hmp;
+       hammer2_dev_t *hmp;
        hammer2_io_t *dio;
        int error;
        int wasinitial;
@@ -1033,7 +1036,7 @@ hammer2_chain_modify(hammer2_trans_t *trans, hammer2_chain_t *chain, int flags)
        if ((chain->flags & HAMMER2_CHAIN_MODIFIED) == 0) {
                atomic_set_int(&chain->flags, HAMMER2_CHAIN_MODIFIED);
                hammer2_chain_ref(chain);
-               hammer2_pfs_memory_inc(chain->pmp);
+               hammer2_pfs_memory_inc(chain->pmp);     /* can be NULL */
                newmod = 1;
        } else {
                newmod = 0;
@@ -1202,19 +1205,19 @@ skip2:
  * Volume header data locks
  */
 void
-hammer2_voldata_lock(hammer2_mount_t *hmp)
+hammer2_voldata_lock(hammer2_dev_t *hmp)
 {
        lockmgr(&hmp->vollk, LK_EXCLUSIVE);
 }
 
 void
-hammer2_voldata_unlock(hammer2_mount_t *hmp)
+hammer2_voldata_unlock(hammer2_dev_t *hmp)
 {
        lockmgr(&hmp->vollk, LK_RELEASE);
 }
 
 void
-hammer2_voldata_modify(hammer2_mount_t *hmp)
+hammer2_voldata_modify(hammer2_dev_t *hmp)
 {
        if ((hmp->vchain.flags & HAMMER2_CHAIN_MODIFIED) == 0) {
                atomic_set_int(&hmp->vchain.flags, HAMMER2_CHAIN_MODIFIED);
@@ -1377,14 +1380,14 @@ hammer2_chain_find_callback(hammer2_chain_t *child, void *data)
  * Caller must hold the parent locked shared or exclusive since we may
  * need the parent's bref array to find our block.
  *
- * WARNING! chain->pmp is left NULL if the bref represents a PFS mount
- *         point.
+ * WARNING! chain->pmp is always set to NULL for any chain representing
+ *         part of the super-root topology.
  */
 hammer2_chain_t *
 hammer2_chain_get(hammer2_chain_t *parent, int generation,
                  hammer2_blockref_t *bref)
 {
-       hammer2_mount_t *hmp = parent->hmp;
+       hammer2_dev_t *hmp = parent->hmp;
        hammer2_chain_t *chain;
        int error;
 
@@ -1519,7 +1522,7 @@ hammer2_chain_lookup(hammer2_chain_t **parentp, hammer2_key_t *key_nextp,
                     hammer2_key_t key_beg, hammer2_key_t key_end,
                     int *cache_indexp, int flags, int *ddflagp)
 {
-       hammer2_mount_t *hmp;
+       hammer2_dev_t *hmp;
        hammer2_chain_t *parent;
        hammer2_chain_t *chain;
        hammer2_blockref_t *base;
@@ -1867,7 +1870,7 @@ hammer2_chain_t *
 hammer2_chain_scan(hammer2_chain_t *parent, hammer2_chain_t *chain,
                   int *cache_indexp, int flags)
 {
-       hammer2_mount_t *hmp;
+       hammer2_dev_t *hmp;
        hammer2_blockref_t *base;
        hammer2_blockref_t *bref;
        hammer2_blockref_t bcopy;
@@ -2077,14 +2080,17 @@ done:
  * locked chain to insert (else we create a new chain).  The function will
  * adjust (*parentp) as necessary, create or connect the chain, and
  * return an exclusively locked chain in *chainp.
+ *
+ * When creating a PFSROOT inode under the super-root, pmp is typically NULL
+ * and will be reassigned.
  */
 int
 hammer2_chain_create(hammer2_trans_t *trans, hammer2_chain_t **parentp,
-                    hammer2_chain_t **chainp, hammer2_pfsmount_t *pmp,
+                    hammer2_chain_t **chainp, hammer2_pfs_t *pmp,
                     hammer2_key_t key, int keybits, int type, size_t bytes,
                     int flags)
 {
-       hammer2_mount_t *hmp;
+       hammer2_dev_t *hmp;
        hammer2_chain_t *chain;
        hammer2_chain_t *parent;
        hammer2_blockref_t *base;
@@ -2193,6 +2199,10 @@ hammer2_chain_create(hammer2_trans_t *trans, hammer2_chain_t **parentp,
                        atomic_clear_int(&chain->flags, HAMMER2_CHAIN_DELETED);
                KKASSERT(chain->parent == NULL);
        }
+       if (flags & HAMMER2_INSERT_PFSROOT)
+               chain->bref.flags |= HAMMER2_BREF_FLAG_PFSROOT;
+       else
+               chain->bref.flags &= ~HAMMER2_BREF_FLAG_PFSROOT;
 
        /*
         * Calculate how many entries we have in the blockref array and
@@ -2384,7 +2394,7 @@ hammer2_chain_rename(hammer2_trans_t *trans, hammer2_blockref_t *bref,
                     hammer2_chain_t **parentp, hammer2_chain_t *chain,
                     int flags)
 {
-       hammer2_mount_t *hmp;
+       hammer2_dev_t *hmp;
        hammer2_chain_t *parent;
        size_t bytes;
 
@@ -2443,7 +2453,7 @@ _hammer2_chain_delete_helper(hammer2_trans_t *trans,
                             hammer2_chain_t *parent, hammer2_chain_t *chain,
                             int flags)
 {
-       hammer2_mount_t *hmp;
+       hammer2_dev_t *hmp;
 
        KKASSERT((chain->flags & HAMMER2_CHAIN_DELETED) == 0);
        hmp = chain->hmp;
@@ -2655,7 +2665,7 @@ hammer2_chain_create_indirect(hammer2_trans_t *trans, hammer2_chain_t *parent,
                              hammer2_key_t create_key, int create_bits,
                              int for_type, int *errorp)
 {
-       hammer2_mount_t *hmp;
+       hammer2_dev_t *hmp;
        hammer2_blockref_t *base;
        hammer2_blockref_t *bref;
        hammer2_blockref_t bcopy;
index 0c13e78..8f64694 100644 (file)
@@ -269,7 +269,7 @@ hammer2_cluster_from_chain(hammer2_chain_t *chain)
  * XXX focus on first chain.
  */
 hammer2_cluster_t *
-hammer2_cluster_alloc(hammer2_pfsmount_t *pmp,
+hammer2_cluster_alloc(hammer2_pfs_t *pmp,
                      hammer2_trans_t *trans, hammer2_blockref_t *bref)
 {
        hammer2_cluster_t *cluster;
@@ -524,7 +524,7 @@ hammer2_cluster_replace_locked(hammer2_cluster_t *dst, hammer2_cluster_t *src)
 hammer2_cluster_t *
 hammer2_cluster_copy(hammer2_cluster_t *ocluster)
 {
-       hammer2_pfsmount_t *pmp = ocluster->pmp;
+       hammer2_pfs_t *pmp = ocluster->pmp;
        hammer2_cluster_t *ncluster;
        hammer2_chain_t *chain;
        int i;
@@ -747,7 +747,7 @@ hammer2_cluster_lookup(hammer2_cluster_t *cparent, hammer2_key_t *key_nextp,
                     hammer2_key_t key_beg, hammer2_key_t key_end,
                     int flags, int *ddflagp)
 {
-       hammer2_pfsmount_t *pmp;
+       hammer2_pfs_t *pmp;
        hammer2_cluster_t *cluster;
        hammer2_chain_t *chain;
        hammer2_key_t key_accum;
@@ -944,7 +944,7 @@ hammer2_cluster_create(hammer2_trans_t *trans, hammer2_cluster_t *cparent,
                     int type, size_t bytes, int flags)
 {
        hammer2_cluster_t *cluster;
-       hammer2_pfsmount_t *pmp;
+       hammer2_pfs_t *pmp;
        int error;
        int i;
 
@@ -1076,7 +1076,7 @@ int
 hammer2_cluster_snapshot(hammer2_trans_t *trans, hammer2_cluster_t *ocluster,
                       hammer2_ioc_pfs_t *pfs)
 {
-       hammer2_mount_t *hmp;
+       hammer2_dev_t *hmp;
        hammer2_cluster_t *ncluster;
        const hammer2_inode_data_t *ripdata;
        hammer2_inode_data_t *wipdata;
@@ -1119,7 +1119,8 @@ hammer2_cluster_snapshot(hammer2_trans_t *trans, hammer2_cluster_t *ocluster,
        ncluster = NULL;
        nip = hammer2_inode_create(trans, hmp->spmp->iroot, &vat,
                                   proc0.p_ucred, pfs->name, name_len,
-                                  &ncluster, &error);
+                                  &ncluster,
+                                  HAMMER2_INSERT_PFSROOT, &error);
 
        if (nip) {
                wipdata = hammer2_cluster_modify_ip(trans, nip, ncluster, 0);
@@ -1243,7 +1244,7 @@ hammer2_cluster_load_async(hammer2_cluster_t *cluster,
 {
        hammer2_chain_t *chain;
        hammer2_iocb_t *iocb;
-       hammer2_mount_t *hmp;
+       hammer2_dev_t *hmp;
        hammer2_blockref_t *bref;
        int i;
 
index ca027e2..15a2919 100644 (file)
@@ -910,30 +910,52 @@ typedef struct hammer2_inode_data hammer2_inode_data_t;
 #define HAMMER2_COPYID_COUNT           256
 
 /*
- * PFS types identify a PFS on media and in LNK_SPAN messages.
- * PFS types >= 16 belong to HAMMER, 0-15 are defined in sys/dmsg.h
- *
- * For example, a mount operating in SOFT_MASTER mode might have nodes
- * representing several MASTERs, CACHEs, and one SOFT_MASTER, and will
- * operate by modifying the SOFT_MASTER and allowing another thread
- * synchronize it to the MASTERs.  But if it were operating in MASTER
- * mode it would ignore the SOFT_MASTER and use the quorum protocol
- * on the MASTERs.
+ * PFS types identify the role of a PFS within a cluster.  The PFS types
+ * is stored on media and in LNK_SPAN messages and used in other places.
+ *
+ * The low 4 bits specify the current active type while the high 4 bits
+ * specify the transition target if the PFS is being upgraded or downgraded,
+ * If the upper 4 bits are not zero it may effect how a PFS is used during
+ * the transition.
+ *
+ * Generally speaking, downgrading a MASTER to a SLAVE cannot complete until
+ * at least all MASTERs have updated their pfs_nmasters field.  And upgrading
+ * a SLAVE to a MASTER cannot complete until the new prospective master has
+ * been fully synchronized (though theoretically full synchronization is
+ * not required if a (new) quorum of other masters are fully synchronized).
+ *
+ * It generally does not matter which PFS element you actually mount, you
+ * are mounting 'the cluster'.  So, for example, a network mount will mount
+ * a DUMMY PFS type on a memory filesystem.  However, there are two exceptions.
+ * In order to gain the benefits of a SOFT_MASTER or SOFT_SLAVE, those PFSs
+ * must be directly mounted.
  */
-/* 0-15 reserved by sys/dmsg.h */
-#define HAMMER2_PFSTYPE_NONE           0
-#define HAMMER2_PFSTYPE_CACHE          1
-#define HAMMER2_PFSTYPE_COPY           2
-#define HAMMER2_PFSTYPE_SLAVE          3
-#define HAMMER2_PFSTYPE_SOFT_SLAVE     4
-#define HAMMER2_PFSTYPE_SOFT_MASTER    5
-#define HAMMER2_PFSTYPE_MASTER         6
-#define HAMMER2_PFSTYPE_SNAPSHOT       7
-#define HAMMER2_PFSTYPE_SUPROOT                8
-#define HAMMER2_PFSTYPE_DUMMY          9
+#define HAMMER2_PFSTYPE_NONE           0x00
+#define HAMMER2_PFSTYPE_CACHE          0x01
+#define HAMMER2_PFSTYPE_COPY           0x02
+#define HAMMER2_PFSTYPE_SLAVE          0x03
+#define HAMMER2_PFSTYPE_SOFT_SLAVE     0x04
+#define HAMMER2_PFSTYPE_SOFT_MASTER    0x05
+#define HAMMER2_PFSTYPE_MASTER         0x06
+#define HAMMER2_PFSTYPE_SNAPSHOT       0x07
+#define HAMMER2_PFSTYPE_SUPROOT                0x08
+#define HAMMER2_PFSTYPE_DUMMY          0x09
 #define HAMMER2_PFSTYPE_MAX            16
 
-#define HAMMER2_PFSTYPE_MASK           0x0F
+#define HAMMER2_PFSTRAN_NONE           0x00    /* no transition in progress */
+#define HAMMER2_PFSTRAN_CACHE          0x10
+#define HAMMER2_PFSTRAN_COPY           0x20
+#define HAMMER2_PFSTRAN_SLAVE          0x30
+#define HAMMER2_PFSTRAN_SOFT_SLAVE     0x40
+#define HAMMER2_PFSTRAN_SOFT_MASTER    0x50
+#define HAMMER2_PFSTRAN_MASTER         0x60
+#define HAMMER2_PFSTRAN_SNAPSHOT       0x70
+#define HAMMER2_PFSTRAN_SUPROOT                0x80
+#define HAMMER2_PFSTRAN_DUMMY          0x90
+
+#define HAMMER2_PFS_DEC(n)             ((n) & 0x0F)
+#define HAMMER2_PFS_DEC_TRANSITION(n)  (((n) >> 4) & 0x0F)
+#define HAMMER2_PFS_ENC_TRANSITION(n)  (((n) & 0x0F) << 4)
 
 /*
  * PFS mode of operation is a bitmask.  This is typically not stored
index f8d5309..01edca7 100644 (file)
@@ -91,8 +91,8 @@ static int hammer2_flush_recurse(hammer2_chain_t *child, void *data);
  *
  * Transactions govern XID tracking on the physical media (the hmp), but they
  * also govern TID tracking which is per-PFS and thus might cross multiple
- * hmp's.  So we can't just stuff tmanage into hammer2_mount or
- * hammer2_pfsmount.
+ * hmp's.  So we can't just stuff tmanage into hammer2_dev or
+ * hammer2_pfs.
  */
 static hammer2_trans_manage_t  tmanage;
 
@@ -106,7 +106,7 @@ hammer2_trans_manage_init(void)
 }
 
 hammer2_xid_t
-hammer2_trans_newxid(hammer2_pfsmount_t *pmp __unused)
+hammer2_trans_newxid(hammer2_pfs_t *pmp __unused)
 {
        hammer2_xid_t xid;
 
@@ -139,7 +139,7 @@ hammer2_trans_newxid(hammer2_pfsmount_t *pmp __unused)
  * flush depending on its state.
  */
 void
-hammer2_trans_init(hammer2_trans_t *trans, hammer2_pfsmount_t *pmp, int flags)
+hammer2_trans_init(hammer2_trans_t *trans, hammer2_pfs_t *pmp, int flags)
 {
        hammer2_trans_manage_t *tman;
        hammer2_trans_t *head;
@@ -271,7 +271,7 @@ hammer2_trans_init(hammer2_trans_t *trans, hammer2_pfsmount_t *pmp, int flags)
  * of the same transaction.
  */
 void
-hammer2_trans_spmp(hammer2_trans_t *trans, hammer2_pfsmount_t *spmp)
+hammer2_trans_spmp(hammer2_trans_t *trans, hammer2_pfs_t *spmp)
 {
        ++spmp->alloc_tid;
        spmp->flush_tid = spmp->alloc_tid;
@@ -485,8 +485,8 @@ hammer2_flush_core(hammer2_flush_info_t *info, hammer2_chain_t *chain,
                   int deleting)
 {
        hammer2_chain_t *parent;
-       hammer2_mount_t *hmp;
-       hammer2_pfsmount_t *pmp;
+       hammer2_dev_t *hmp;
+       hammer2_pfs_t *pmp;
        int diddeferral;
 
        /*
index 4fd9b26..18da757 100644 (file)
@@ -58,9 +58,9 @@ typedef struct hammer2_fiterate hammer2_fiterate_t;
 static int hammer2_freemap_try_alloc(hammer2_trans_t *trans,
                        hammer2_chain_t **parentp, hammer2_blockref_t *bref,
                        int radix, hammer2_fiterate_t *iter);
-static void hammer2_freemap_init(hammer2_trans_t *trans, hammer2_mount_t *hmp,
+static void hammer2_freemap_init(hammer2_trans_t *trans, hammer2_dev_t *hmp,
                        hammer2_key_t key, hammer2_chain_t *chain);
-static int hammer2_bmap_alloc(hammer2_trans_t *trans, hammer2_mount_t *hmp,
+static int hammer2_bmap_alloc(hammer2_trans_t *trans, hammer2_dev_t *hmp,
                        hammer2_bmap_data_t *bmap, uint16_t class,
                        int n, int radix, hammer2_key_t *basep);
 static int hammer2_freemap_iterate(hammer2_trans_t *trans,
@@ -191,7 +191,7 @@ int
 hammer2_freemap_alloc(hammer2_trans_t *trans, hammer2_chain_t *chain,
                      size_t bytes)
 {
-       hammer2_mount_t *hmp = chain->hmp;
+       hammer2_dev_t *hmp = chain->hmp;
        hammer2_blockref_t *bref = &chain->bref;
        hammer2_chain_t *parent;
        int radix;
@@ -294,7 +294,7 @@ hammer2_freemap_try_alloc(hammer2_trans_t *trans, hammer2_chain_t **parentp,
                          hammer2_blockref_t *bref, int radix,
                          hammer2_fiterate_t *iter)
 {
-       hammer2_mount_t *hmp = (*parentp)->hmp;
+       hammer2_dev_t *hmp = (*parentp)->hmp;
        hammer2_off_t l0size;
        hammer2_off_t l1size;
        hammer2_off_t l1mask;
@@ -511,7 +511,7 @@ hammer2_freemap_try_alloc(hammer2_trans_t *trans, hammer2_chain_t **parentp,
  */
 static
 int
-hammer2_bmap_alloc(hammer2_trans_t *trans, hammer2_mount_t *hmp,
+hammer2_bmap_alloc(hammer2_trans_t *trans, hammer2_dev_t *hmp,
                   hammer2_bmap_data_t *bmap,
                   uint16_t class, int n, int radix, hammer2_key_t *basep)
 {
@@ -695,7 +695,7 @@ success:
 
 static
 void
-hammer2_freemap_init(hammer2_trans_t *trans, hammer2_mount_t *hmp,
+hammer2_freemap_init(hammer2_trans_t *trans, hammer2_dev_t *hmp,
                     hammer2_key_t key, hammer2_chain_t *chain)
 {
        hammer2_off_t l1size;
@@ -769,7 +769,7 @@ static int
 hammer2_freemap_iterate(hammer2_trans_t *trans, hammer2_chain_t **parentp,
                        hammer2_chain_t **chainp, hammer2_fiterate_t *iter)
 {
-       hammer2_mount_t *hmp = (*parentp)->hmp;
+       hammer2_dev_t *hmp = (*parentp)->hmp;
 
        iter->bnext &= ~(H2FMSHIFT(HAMMER2_FREEMAP_LEVEL1_RADIX) - 1);
        iter->bnext += H2FMSHIFT(HAMMER2_FREEMAP_LEVEL1_RADIX);
@@ -793,7 +793,7 @@ hammer2_freemap_iterate(hammer2_trans_t *trans, hammer2_chain_t **parentp,
  * and to do the actual free.
  */
 void
-hammer2_freemap_adjust(hammer2_trans_t *trans, hammer2_mount_t *hmp,
+hammer2_freemap_adjust(hammer2_trans_t *trans, hammer2_dev_t *hmp,
                       hammer2_blockref_t *bref, int how)
 {
        hammer2_off_t data_off = bref->data_off;
index a6f2e50..fc143a4 100644 (file)
@@ -260,7 +260,7 @@ hammer2_inode_lock_downgrade(hammer2_inode_t *ip, int wasexclusive)
  * Lookup an inode by inode number
  */
 hammer2_inode_t *
-hammer2_inode_lookup(hammer2_pfsmount_t *pmp, hammer2_tid_t inum)
+hammer2_inode_lookup(hammer2_pfs_t *pmp, hammer2_tid_t inum)
 {
        hammer2_inode_t *ip;
 
@@ -296,7 +296,7 @@ hammer2_inode_ref(hammer2_inode_t *ip)
 void
 hammer2_inode_drop(hammer2_inode_t *ip)
 {
-       hammer2_pfsmount_t *pmp;
+       hammer2_pfs_t *pmp;
        hammer2_inode_t *pip;
        u_int refs;
 
@@ -370,7 +370,7 @@ struct vnode *
 hammer2_igetv(hammer2_inode_t *ip, hammer2_cluster_t *cparent, int *errorp)
 {
        const hammer2_inode_data_t *ripdata;
-       hammer2_pfsmount_t *pmp;
+       hammer2_pfs_t *pmp;
        struct vnode *vp;
 
        pmp = ip->pmp;
@@ -516,23 +516,26 @@ hammer2_igetv(hammer2_inode_t *ip, hammer2_cluster_t *cparent, int *errorp)
  * kernel VNOPS API and the filesystem backend (the chains).
  */
 hammer2_inode_t *
-hammer2_inode_get(hammer2_pfsmount_t *pmp, hammer2_inode_t *dip,
+hammer2_inode_get(hammer2_pfs_t *pmp, hammer2_inode_t *dip,
                  hammer2_cluster_t *cluster)
 {
        hammer2_inode_t *nip;
        const hammer2_inode_data_t *iptmp;
        const hammer2_inode_data_t *nipdata;
 
-       KKASSERT(hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE);
+       KKASSERT(cluster == NULL ||
+                hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE);
        KKASSERT(pmp);
 
        /*
         * Interlocked lookup/ref of the inode.  This code is only needed
         * when looking up inodes with nlinks != 0 (TODO: optimize out
         * otherwise and test for duplicates).
+        *
+        * Cluster can be NULL during the initial pfs allocation.
         */
 again:
-       for (;;) {
+       while (cluster) {
                iptmp = &hammer2_cluster_rdata(cluster)->ipdata;
                nip = hammer2_inode_lookup(pmp, iptmp->inum);
                if (nip == NULL)
@@ -565,18 +568,24 @@ again:
                nip->flags = HAMMER2_INODE_SROOT;
 
        /*
-        * Initialize nip's cluster
+        * Initialize nip's cluster.  A cluster is provided for normal
+        * inodes but typically not for the super-root or PFS inodes.
         */
        nip->cluster.refs = 1;
        nip->cluster.pmp = pmp;
        nip->cluster.flags |= HAMMER2_CLUSTER_INODE;
-       hammer2_cluster_replace(&nip->cluster, cluster);
+       if (cluster) {
+               hammer2_cluster_replace(&nip->cluster, cluster);
+               nipdata = &hammer2_cluster_rdata(cluster)->ipdata;
+               nip->inum = nipdata->inum;
+               nip->size = nipdata->size;
+               nip->mtime = nipdata->mtime;
+               hammer2_inode_repoint(nip, NULL, cluster);
+       } else {
+               nip->inum = 1;                  /* PFS inum is always 1 XXX */
+               /* mtime will be updated when a cluster is available */
+       }
 
-       nipdata = &hammer2_cluster_rdata(cluster)->ipdata;
-       nip->inum = nipdata->inum;
-       nip->size = nipdata->size;
-       nip->mtime = nipdata->mtime;
-       hammer2_inode_repoint(nip, NULL, cluster);
        nip->pip = dip;                         /* can be NULL */
        if (dip)
                hammer2_inode_ref(dip); /* ref dip for nip->pip */
@@ -631,7 +640,8 @@ hammer2_inode_t *
 hammer2_inode_create(hammer2_trans_t *trans, hammer2_inode_t *dip,
                     struct vattr *vap, struct ucred *cred,
                     const uint8_t *name, size_t name_len,
-                    hammer2_cluster_t **clusterp, int *errorp)
+                    hammer2_cluster_t **clusterp,
+                    int flags, int *errorp)
 {
        const hammer2_inode_data_t *dipdata;
        hammer2_inode_data_t *nipdata;
@@ -688,7 +698,7 @@ retry:
                                             lhc, 0,
                                             HAMMER2_BREF_TYPE_INODE,
                                             HAMMER2_INODE_BYTES,
-                                            0);
+                                            flags);
        }
 #if INODE_DEBUG
        kprintf("CREATE INODE %*.*s chain=%p\n",
@@ -1441,7 +1451,7 @@ done:
  * This is called from the mount code to initialize pmp->ihidden
  */
 void
-hammer2_inode_install_hidden(hammer2_pfsmount_t *pmp)
+hammer2_inode_install_hidden(hammer2_pfs_t *pmp)
 {
        hammer2_trans_t trans;
        hammer2_cluster_t *cparent;
@@ -1562,7 +1572,7 @@ hammer2_inode_move_to_hidden(hammer2_trans_t *trans,
                             hammer2_tid_t inum)
 {
        hammer2_cluster_t *dcluster;
-       hammer2_pfsmount_t *pmp;
+       hammer2_pfs_t *pmp;
        int error;
 
        pmp = (*clusterp)->pmp;
index ce34647..03e962b 100644 (file)
@@ -70,7 +70,7 @@ struct hammer2_cleanupcb_info {
  * Allocate/Locate the requested dio, reference it, issue or queue iocb.
  */
 void
-hammer2_io_getblk(hammer2_mount_t *hmp, off_t lbase, int lsize,
+hammer2_io_getblk(hammer2_dev_t *hmp, off_t lbase, int lsize,
                  hammer2_iocb_t *iocb)
 {
        hammer2_io_t *dio;
@@ -329,7 +329,7 @@ hammer2_iocb_wait(hammer2_iocb_t *iocb)
 void
 hammer2_io_putblk(hammer2_io_t **diop)
 {
-       hammer2_mount_t *hmp;
+       hammer2_dev_t *hmp;
        hammer2_io_t *dio;
        hammer2_iocb_t iocb;
        struct buf *bp;
@@ -481,7 +481,7 @@ hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg)
 }
 
 void
-hammer2_io_cleanup(hammer2_mount_t *hmp, struct hammer2_io_tree *tree)
+hammer2_io_cleanup(hammer2_dev_t *hmp, struct hammer2_io_tree *tree)
 {
        hammer2_io_t *dio;
 
@@ -591,7 +591,7 @@ hammer2_iocb_new_callback(hammer2_iocb_t *iocb)
 
 static
 int
-_hammer2_io_new(hammer2_mount_t *hmp, off_t lbase, int lsize,
+_hammer2_io_new(hammer2_dev_t *hmp, off_t lbase, int lsize,
                hammer2_io_t **diop, int flags)
 {
        hammer2_iocb_t iocb;
@@ -614,21 +614,21 @@ _hammer2_io_new(hammer2_mount_t *hmp, off_t lbase, int lsize,
 }
 
 int
-hammer2_io_new(hammer2_mount_t *hmp, off_t lbase, int lsize,
+hammer2_io_new(hammer2_dev_t *hmp, off_t lbase, int lsize,
               hammer2_io_t **diop)
 {
        return(_hammer2_io_new(hmp, lbase, lsize, diop, HAMMER2_IOCB_ZERO));
 }
 
 int
-hammer2_io_newnz(hammer2_mount_t *hmp, off_t lbase, int lsize,
+hammer2_io_newnz(hammer2_dev_t *hmp, off_t lbase, int lsize,
               hammer2_io_t **diop)
 {
        return(_hammer2_io_new(hmp, lbase, lsize, diop, 0));
 }
 
 int
-hammer2_io_newq(hammer2_mount_t *hmp, off_t lbase, int lsize,
+hammer2_io_newq(hammer2_dev_t *hmp, off_t lbase, int lsize,
               hammer2_io_t **diop)
 {
        return(_hammer2_io_new(hmp, lbase, lsize, diop, HAMMER2_IOCB_QUICK));
@@ -690,7 +690,7 @@ hammer2_iocb_bread_callback(hammer2_iocb_t *iocb)
 }
 
 int
-hammer2_io_bread(hammer2_mount_t *hmp, off_t lbase, int lsize,
+hammer2_io_bread(hammer2_dev_t *hmp, off_t lbase, int lsize,
                hammer2_io_t **diop)
 {
        hammer2_iocb_t iocb;
index c8dae08..85e7ddf 100644 (file)
@@ -64,7 +64,7 @@ static void hammer2_autodmsg(kdmsg_msg_t *msg);
 static int hammer2_lnk_span_reply(kdmsg_state_t *state, kdmsg_msg_t *msg);
 
 void
-hammer2_iocom_init(hammer2_mount_t *hmp)
+hammer2_iocom_init(hammer2_dev_t *hmp)
 {
        /*
         * Automatic LNK_CONN
@@ -79,7 +79,7 @@ hammer2_iocom_init(hammer2_mount_t *hmp)
 }
 
 void
-hammer2_iocom_uninit(hammer2_mount_t *hmp)
+hammer2_iocom_uninit(hammer2_dev_t *hmp)
 {
        /* XXX chain depend deadlck? */
        if (hmp->iocom.mmsg)
@@ -91,7 +91,7 @@ hammer2_iocom_uninit(hammer2_mount_t *hmp)
  * fp for us.
  */
 void
-hammer2_cluster_reconnect(hammer2_mount_t *hmp, struct file *fp)
+hammer2_cluster_reconnect(hammer2_dev_t *hmp, struct file *fp)
 {
        /*
         * Closes old comm descriptor, kills threads, cleans up
@@ -194,12 +194,12 @@ hammer2_rcvdmsg(kdmsg_msg_t *msg)
  *
  * We collect span state
  */
-static void hammer2_update_spans(hammer2_mount_t *hmp, kdmsg_state_t *state);
+static void hammer2_update_spans(hammer2_dev_t *hmp, kdmsg_state_t *state);
 
 static void
 hammer2_autodmsg(kdmsg_msg_t *msg)
 {
-       hammer2_mount_t *hmp = msg->state->iocom->handle;
+       hammer2_dev_t *hmp = msg->state->iocom->handle;
        int copyid;
 
        switch(msg->tcmd) {
@@ -281,12 +281,12 @@ hammer2_autodmsg(kdmsg_msg_t *msg)
  * Update LNK_SPAN state
  */
 static void
-hammer2_update_spans(hammer2_mount_t *hmp, kdmsg_state_t *state)
+hammer2_update_spans(hammer2_dev_t *hmp, kdmsg_state_t *state)
 {
        const hammer2_inode_data_t *ripdata;
        hammer2_cluster_t *cparent;
        hammer2_cluster_t *cluster;
-       hammer2_pfsmount_t *spmp;
+       hammer2_pfs_t *spmp;
        hammer2_key_t key_next;
        kdmsg_msg_t *rmsg;
        size_t name_len;
@@ -352,7 +352,7 @@ hammer2_lnk_span_reply(kdmsg_state_t *state, kdmsg_msg_t *msg)
  * daemon via the open LNK_CONN transaction.
  */
 void
-hammer2_volconf_update(hammer2_mount_t *hmp, int index)
+hammer2_volconf_update(hammer2_dev_t *hmp, int index)
 {
        kdmsg_msg_t *msg;
 
index d84be94..3d1fa80 100644 (file)
@@ -162,7 +162,7 @@ hammer2_ioctl(hammer2_inode_t *ip, u_long com, void *data, int fflag,
 static int
 hammer2_ioctl_version_get(hammer2_inode_t *ip, void *data)
 {
-       hammer2_mount_t *hmp = ip->pmp->iroot->cluster.focus->hmp;
+       hammer2_dev_t *hmp = ip->pmp->iroot->cluster.focus->hmp;
        hammer2_ioc_version_t *version = data;
 
        version->version = hmp->voldata.version;
@@ -201,7 +201,7 @@ hammer2_ioctl_recluster(hammer2_inode_t *ip, void *data)
 static int
 hammer2_ioctl_remote_scan(hammer2_inode_t *ip, void *data)
 {
-       hammer2_mount_t *hmp = ip->pmp->iroot->cluster.focus->hmp;
+       hammer2_dev_t *hmp = ip->pmp->iroot->cluster.focus->hmp;
        hammer2_ioc_remote_t *remote = data;
        int copyid = remote->copyid;
 
@@ -234,8 +234,8 @@ static int
 hammer2_ioctl_remote_add(hammer2_inode_t *ip, void *data)
 {
        hammer2_ioc_remote_t *remote = data;
-       hammer2_pfsmount_t *pmp = ip->pmp;
-       hammer2_mount_t *hmp;
+       hammer2_pfs_t *pmp = ip->pmp;
+       hammer2_dev_t *hmp;
        int copyid = remote->copyid;
        int error = 0;
 
@@ -270,8 +270,8 @@ static int
 hammer2_ioctl_remote_del(hammer2_inode_t *ip, void *data)
 {
        hammer2_ioc_remote_t *remote = data;
-       hammer2_pfsmount_t *pmp = ip->pmp;
-       hammer2_mount_t *hmp;
+       hammer2_pfs_t *pmp = ip->pmp;
+       hammer2_dev_t *hmp;
        int copyid = remote->copyid;
        int error = 0;
 
@@ -309,7 +309,7 @@ static int
 hammer2_ioctl_remote_rep(hammer2_inode_t *ip, void *data)
 {
        hammer2_ioc_remote_t *remote = data;
-       hammer2_mount_t *hmp;
+       hammer2_dev_t *hmp;
        int copyid = remote->copyid;
 
        hmp = ip->pmp->iroot->cluster.focus->hmp; /* XXX */
@@ -341,7 +341,7 @@ static int
 hammer2_ioctl_socket_set(hammer2_inode_t *ip, void *data)
 {
        hammer2_ioc_remote_t *remote = data;
-       hammer2_mount_t *hmp;
+       hammer2_dev_t *hmp;
        int copyid = remote->copyid;
 
        hmp = ip->pmp->iroot->cluster.focus->hmp; /* XXX */
@@ -369,7 +369,7 @@ static int
 hammer2_ioctl_pfs_get(hammer2_inode_t *ip, void *data)
 {
        const hammer2_inode_data_t *ripdata;
-       hammer2_mount_t *hmp;
+       hammer2_dev_t *hmp;
        hammer2_ioc_pfs_t *pfs;
        hammer2_cluster_t *cparent;
        hammer2_cluster_t *rcluster;
@@ -460,7 +460,7 @@ static int
 hammer2_ioctl_pfs_lookup(hammer2_inode_t *ip, void *data)
 {
        const hammer2_inode_data_t *ripdata;
-       hammer2_mount_t *hmp;
+       hammer2_dev_t *hmp;
        hammer2_ioc_pfs_t *pfs;
        hammer2_cluster_t *cparent;
        hammer2_cluster_t *cluster;
@@ -524,11 +524,12 @@ static int
 hammer2_ioctl_pfs_create(hammer2_inode_t *ip, void *data)
 {
        hammer2_inode_data_t *nipdata;
-       hammer2_mount_t *hmp;
+       hammer2_dev_t *hmp;
        hammer2_ioc_pfs_t *pfs;
        hammer2_inode_t *nip;
        hammer2_cluster_t *ncluster;
        hammer2_trans_t trans;
+       hammer2_blockref_t bref;
        int error;
 
        hmp = ip->pmp->iroot->cluster.focus->hmp; /* XXX */
@@ -542,10 +543,11 @@ hammer2_ioctl_pfs_create(hammer2_inode_t *ip, void *data)
        if (hammer2_ioctl_pfs_lookup(ip, pfs) == 0)
                return(EEXIST);
 
-       hammer2_trans_init(&trans, ip->pmp, HAMMER2_TRANS_NEWINODE);
+       hammer2_trans_init(&trans, hmp->spmp, HAMMER2_TRANS_NEWINODE);
        nip = hammer2_inode_create(&trans, hmp->spmp->iroot, NULL, NULL,
                                     pfs->name, strlen(pfs->name),
-                                    &ncluster, &error);
+                                    &ncluster,
+                                    HAMMER2_INSERT_PFSROOT, &error);
        if (error == 0) {
                nipdata = hammer2_cluster_modify_ip(&trans, nip, ncluster, 0);
                nipdata->pfs_type = pfs->pfs_type;
@@ -560,6 +562,12 @@ hammer2_ioctl_pfs_create(hammer2_inode_t *ip, void *data)
                        nipdata->comp_algo = HAMMER2_ENC_ALGO(
                                                        HAMMER2_COMP_AUTOZERO);
                hammer2_cluster_modsync(ncluster);
+               hammer2_cluster_bref(ncluster, &bref);
+#if 1
+               kprintf("ADD LOCAL PFS (IOCTL): %s\n", nipdata->filename);
+               hammer2_pfsalloc(ncluster, nipdata, bref.mirror_tid);
+               /* XXX rescan */
+#endif
                hammer2_inode_unlock_ex(nip, ncluster);
        }
        hammer2_trans_done(&trans);
@@ -573,13 +581,13 @@ hammer2_ioctl_pfs_create(hammer2_inode_t *ip, void *data)
 static int
 hammer2_ioctl_pfs_delete(hammer2_inode_t *ip, void *data)
 {
-       hammer2_mount_t *hmp;
+       hammer2_dev_t *hmp;
        hammer2_ioc_pfs_t *pfs = data;
        hammer2_trans_t trans;
        int error;
 
        hmp = ip->pmp->iroot->cluster.focus->hmp; /* XXX */
-       hammer2_trans_init(&trans, ip->pmp, 0);
+       hammer2_trans_init(&trans, hmp->spmp, 0);
        error = hammer2_unlink_file(&trans, hmp->spmp->iroot,
                                    pfs->name, strlen(pfs->name),
                                    2, NULL, NULL, -1);
@@ -706,7 +714,7 @@ int
 hammer2_ioctl_bulkfree_scan(hammer2_inode_t *ip, void *data)
 {
        hammer2_ioc_bulkfree_t *bfi = data;
-       hammer2_mount_t *hmp = ip->pmp->iroot->cluster.focus->hmp;
+       hammer2_dev_t *hmp = ip->pmp->iroot->cluster.focus->hmp;
        int error;
 
        /* XXX run local cluster targets only */
index b4c7c32..c7563be 100644 (file)
  * Mount-wide locks
  */
 void
-hammer2_mount_exlock(hammer2_mount_t *hmp)
+hammer2_dev_exlock(hammer2_dev_t *hmp)
 {
        hammer2_mtx_ex(&hmp->vchain.core.lock);
 }
 
 void
-hammer2_mount_shlock(hammer2_mount_t *hmp)
+hammer2_dev_shlock(hammer2_dev_t *hmp)
 {
        hammer2_mtx_sh(&hmp->vchain.core.lock);
 }
 
 void
-hammer2_mount_unlock(hammer2_mount_t *hmp)
+hammer2_dev_unlock(hammer2_dev_t *hmp)
 {
        hammer2_mtx_unlock(&hmp->vchain.core.lock);
 }
index e2f2685..fd8d020 100644 (file)
@@ -72,8 +72,8 @@ struct hammer2_sync_info {
        int waitfor;
 };
 
-TAILQ_HEAD(hammer2_mntlist, hammer2_mount);
-TAILQ_HEAD(hammer2_pfslist, hammer2_pfsmount);
+TAILQ_HEAD(hammer2_mntlist, hammer2_dev);
+TAILQ_HEAD(hammer2_pfslist, hammer2_pfs);
 static struct hammer2_mntlist hammer2_mntlist;
 static struct hammer2_pfslist hammer2_pfslist;
 static struct lock hammer2_mntlk;
@@ -177,9 +177,9 @@ static int hammer2_vfs_init(struct vfsconf *conf);
 static int hammer2_vfs_uninit(struct vfsconf *vfsp);
 static int hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data,
                                struct ucred *cred);
-static int hammer2_remount(hammer2_mount_t *, struct mount *, char *,
+static int hammer2_remount(hammer2_dev_t *, struct mount *, char *,
                                struct vnode *, struct ucred *);
-static int hammer2_recovery(hammer2_mount_t *hmp);
+static int hammer2_recovery(hammer2_dev_t *hmp);
 static int hammer2_vfs_unmount(struct mount *mp, int mntflags);
 static int hammer2_vfs_root(struct mount *mp, struct vnode **vpp);
 static int hammer2_vfs_statfs(struct mount *mp, struct statfs *sbp,
@@ -194,13 +194,15 @@ static int hammer2_vfs_vptofh(struct vnode *vp, struct fid *fhp);
 static int hammer2_vfs_checkexp(struct mount *mp, struct sockaddr *nam,
                                int *exflagsp, struct ucred **credanonp);
 
-static int hammer2_install_volume_header(hammer2_mount_t *hmp);
+static int hammer2_install_volume_header(hammer2_dev_t *hmp);
 static int hammer2_sync_scan2(struct mount *mp, struct vnode *vp, void *data);
 
+static void hammer2_update_pmps(hammer2_dev_t *hmp);
 static void hammer2_write_thread(void *arg);
 
-static void hammer2_vfs_unmount_hmp1(struct mount *mp, hammer2_mount_t *hmp);
-static void hammer2_vfs_unmount_hmp2(struct mount *mp, hammer2_mount_t *hmp);
+static void hammer2_mount_helper(struct mount *mp, hammer2_pfs_t *pmp);
+static void hammer2_unmount_helper(struct mount *mp, hammer2_pfs_t *pmp,
+                               hammer2_dev_t *hmp);
 
 /* 
  * Functions for compression in threads,
@@ -317,33 +319,205 @@ hammer2_vfs_uninit(struct vfsconf *vfsp __unused)
 /*
  * Core PFS allocator.  Used to allocate the pmp structure for PFS cluster
  * mounts and the spmp structure for media (hmp) structures.
+ *
+ * XXX check locking
  */
-static hammer2_pfsmount_t *
-hammer2_pfsalloc(const hammer2_inode_data_t *ripdata, hammer2_tid_t alloc_tid)
+hammer2_pfs_t *
+hammer2_pfsalloc(hammer2_cluster_t *cluster,
+                const hammer2_inode_data_t *ripdata,
+                hammer2_tid_t alloc_tid)
 {
-       hammer2_pfsmount_t *pmp;
-
-       pmp = kmalloc(sizeof(*pmp), M_HAMMER2, M_WAITOK | M_ZERO);
-       kmalloc_create(&pmp->minode, "HAMMER2-inodes");
-       kmalloc_create(&pmp->mmsg, "HAMMER2-pfsmsg");
-       lockinit(&pmp->lock, "pfslk", 0, 0);
-       spin_init(&pmp->inum_spin, "hm2pfsalloc_inum");
-       RB_INIT(&pmp->inum_tree);
-       TAILQ_INIT(&pmp->unlinkq);
-       spin_init(&pmp->list_spin, "hm2pfsalloc_list");
-
-       pmp->alloc_tid = alloc_tid + 1;   /* our first media transaction id */
-       pmp->flush_tid = pmp->alloc_tid;
+       hammer2_chain_t *rchain;
+       hammer2_pfs_t *pmp;
+       int i;
+       int j;
+
+       /*
+        * Locate existing PFS if ripdata is present.  If ripdata is not
+        * present this is a spmp which is always unique and not listed.
+        */
        if (ripdata) {
-               pmp->inode_tid = ripdata->pfs_inum + 1;
-               pmp->pfs_clid = ripdata->pfs_clid;
+               TAILQ_FOREACH(pmp, &hammer2_pfslist, mntentry) {
+                       if (bcmp(&pmp->pfs_clid, &ripdata->pfs_clid,
+                                sizeof(pmp->pfs_clid)) == 0) {
+                                       break;
+                       }
+               }
+       } else {
+               pmp = NULL;
+       }
+
+       if (pmp == NULL) {
+               pmp = kmalloc(sizeof(*pmp), M_HAMMER2, M_WAITOK | M_ZERO);
+               kmalloc_create(&pmp->minode, "HAMMER2-inodes");
+               kmalloc_create(&pmp->mmsg, "HAMMER2-pfsmsg");
+               lockinit(&pmp->lock, "pfslk", 0, 0);
+               spin_init(&pmp->inum_spin, "hm2pfsalloc_inum");
+               RB_INIT(&pmp->inum_tree);
+               TAILQ_INIT(&pmp->unlinkq);
+               TAILQ_INIT(&pmp->syncthrq);
+               spin_init(&pmp->list_spin, "hm2pfsalloc_list");
+
+               /* our first media transaction id */
+               pmp->alloc_tid = alloc_tid + 1;
+               pmp->flush_tid = pmp->alloc_tid;
+               if (ripdata) {
+                       pmp->inode_tid = ripdata->pfs_inum + 1;
+                       pmp->pfs_clid = ripdata->pfs_clid;
+               }
+               hammer2_mtx_init(&pmp->wthread_mtx, "h2wthr");
+               bioq_init(&pmp->wthread_bioq);
+               TAILQ_INSERT_TAIL(&hammer2_pfslist, pmp, mntentry);
+       }
+
+       /*
+        * Create the PFS's root inode.
+        */
+       if (pmp->iroot == NULL) {
+               pmp->iroot = hammer2_inode_get(pmp, NULL, NULL);
+               hammer2_inode_ref(pmp->iroot);
+               hammer2_inode_unlock_ex(pmp->iroot, NULL);
+       }
+
+       /*
+        * When a cluster is passed in we must add the cluster's chains
+        * to the PFS's root inode.
+        *
+        * XXX should fill empty array spots ?
+        */
+       if (cluster) {
+               hammer2_inode_ref(pmp->iroot);
+               hammer2_mtx_ex(&pmp->iroot->lock);
+               j = pmp->iroot->cluster.nchains;
+
+               kprintf("add PFS to pmp %p[%d]\n", pmp, j);
+
+               for (i = 0; i < cluster->nchains; ++i) {
+                       if (j == HAMMER2_MAXCLUSTER)
+                               break;
+                       rchain = cluster->array[i].chain;
+                       KKASSERT(rchain->pmp == NULL);
+                       rchain->pmp = pmp;
+                       hammer2_chain_ref(rchain);
+                       pmp->iroot->cluster.array[j].chain = rchain;
+                       ++j;
+               }
+               pmp->iroot->cluster.nchains = j;
+               hammer2_mtx_unlock(&pmp->iroot->lock);
+               hammer2_inode_drop(pmp->iroot);
+
+               if (i != cluster->nchains) {
+                       kprintf("hammer2_mount: cluster full!\n");
+                       /* XXX fatal error? */
+               }
        }
-       hammer2_mtx_init(&pmp->wthread_mtx, "h2wthr");
-       bioq_init(&pmp->wthread_bioq);
 
        return pmp;
 }
 
+/*
+ * Destroy a PFS, typically only occurs after the last mount on a device
+ * has gone away.
+ */
+static void
+hammer2_pfsfree(hammer2_pfs_t *pmp)
+{
+       /*
+        * Cleanup our reference on iroot.  iroot is (should) not be needed
+        * by the flush code.
+        */
+       TAILQ_REMOVE(&hammer2_pfslist, pmp, mntentry);
+
+       if (pmp->iroot) {
+#if REPORT_REFS_ERRORS
+               if (pmp->iroot->refs != 1)
+                       kprintf("PMP->IROOT %p REFS WRONG %d\n",
+                               pmp->iroot, pmp->iroot->refs);
+#else
+               KKASSERT(pmp->iroot->refs == 1);
+#endif
+               /* ref for pmp->iroot */
+               hammer2_inode_drop(pmp->iroot);
+               pmp->iroot = NULL;
+       }
+
+       kmalloc_destroy(&pmp->mmsg);
+       kmalloc_destroy(&pmp->minode);
+
+       kfree(pmp, M_HAMMER2);
+}
+
+/*
+ * Remove all references to hmp from the pfs list.  Free any PFS which
+ * becomes empty.
+ *
+ * XXX inefficient.
+ */
+static void
+hammer2_pfsfree_scan(hammer2_dev_t *hmp)
+{
+       hammer2_pfs_t *pmp;
+       hammer2_cluster_t *cluster;
+       hammer2_cluster_t *cparent;
+       hammer2_chain_t *rchain;
+       int i;
+
+again:
+       cparent = NULL;
+       TAILQ_FOREACH(pmp, &hammer2_pfslist, mntentry) {
+               if (pmp->iroot == NULL)
+                       continue;
+               if (hmp->spmp == pmp) {
+                       kprintf("unmount hmp %p remove spmp %p\n",
+                               hmp, pmp);
+                       hmp->spmp = NULL;
+               }
+               cluster = &pmp->iroot->cluster;
+               for (i = 0; i < cluster->nchains; ++i) {
+                       rchain = cluster->array[i].chain;
+                       if (rchain == NULL || rchain->hmp != hmp)
+                               continue;
+
+                       /*
+                        * If the pmp is possibly using hmp we need to
+                        * lock its iroot, there might be other mounts
+                        * using it which are still active.  Restart the
+                        * loop.
+                        */
+                       if (cparent == NULL) {
+                               cparent = hammer2_inode_lock_ex(pmp->iroot);
+                               i = -1;
+                               continue;
+                       }
+                       cluster->array[i].chain = NULL;
+                       hammer2_chain_drop(rchain);
+                       cluster->focus = NULL;
+               }
+               if (cparent)
+                       hammer2_inode_unlock_ex(pmp->iroot, cparent);
+
+               /*
+                * Cleanup trailing chains.  Do not reorder chains (for now).
+                */
+               while (i > 0) {
+                       if (cluster->array[i - 1].chain)
+                               break;
+                       --i;
+               }
+               cluster->nchains = i;
+
+               /*
+                * If the PMP has no elements remaining we can destroy it.
+                */
+               if (cluster->nchains == 0) {
+                       kprintf("unmount hmp %p last ref to PMP=%p\n",
+                               hmp, pmp);
+                       hammer2_pfsfree(pmp);
+                       goto again;
+               }
+       }
+}
+
 /*
  * Mount or remount HAMMER2 fileystem from physical media
  *
@@ -370,16 +544,15 @@ hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data,
                  struct ucred *cred)
 {
        struct hammer2_mount_info info;
-       hammer2_pfsmount_t *pmp;
-       hammer2_pfsmount_t *spmp;
-       hammer2_mount_t *hmp;
+       hammer2_pfs_t *pmp;
+       hammer2_pfs_t *spmp;
+       hammer2_dev_t *hmp;
        hammer2_key_t key_next;
        hammer2_key_t key_dummy;
        hammer2_key_t lhc;
        struct vnode *devvp;
        struct nlookupdata nd;
        hammer2_chain_t *parent;
-       hammer2_chain_t *rchain;
        hammer2_cluster_t *cluster;
        hammer2_cluster_t *cparent;
        const hammer2_inode_data_t *ripdata;
@@ -579,9 +752,7 @@ hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data,
                 */
                error = hammer2_install_volume_header(hmp);
                if (error) {
-                       ++hmp->pmp_count;
-                       hammer2_vfs_unmount_hmp1(mp, hmp);
-                       hammer2_vfs_unmount_hmp2(mp, hmp);
+                       hammer2_unmount_helper(mp, NULL, hmp);
                        lockmgr(&hammer2_mntlk, LK_RELEASE);
                        hammer2_vfs_unmount(mp, MNT_FORCE);
                        return error;
@@ -591,7 +762,8 @@ hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data,
                 * Really important to get these right or flush will get
                 * confused.
                 */
-               hmp->spmp = hammer2_pfsalloc(NULL, hmp->voldata.mirror_tid);
+               hmp->spmp = hammer2_pfsalloc(NULL, NULL,
+                                            hmp->voldata.mirror_tid);
                kprintf("alloc spmp %p tid %016jx\n",
                        hmp->spmp, hmp->voldata.mirror_tid);
                spmp = hmp->spmp;
@@ -619,25 +791,29 @@ hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data,
                hammer2_chain_lookup_done(parent);
                if (schain == NULL) {
                        kprintf("hammer2_mount: invalid super-root\n");
-                       ++hmp->pmp_count;
-                       hammer2_vfs_unmount_hmp1(mp, hmp);
-                       hammer2_vfs_unmount_hmp2(mp, hmp);
+                       hammer2_unmount_helper(mp, NULL, hmp);
                        lockmgr(&hammer2_mntlk, LK_RELEASE);
                        hammer2_vfs_unmount(mp, MNT_FORCE);
                        return EINVAL;
                }
 
                /*
-                * Sanity-check schain's pmp, finish initializing spmp.
+                * Sanity-check schain's pmp and finish initialization.
+                * Any chain belonging to the super-root topology should
+                * have a NULL pmp (not even set to spmp).
                 */
                ripdata = &hammer2_chain_rdata(schain)->ipdata;
-               KKASSERT(schain->pmp == spmp);
+               KKASSERT(schain->pmp == NULL);
                spmp->pfs_clid = ripdata->pfs_clid;
 
                /*
-                * NOTE: inode_get sucks up schain's lock.
+                * Replace the dummy spmp->iroot with a real one.  It's
+                * easier to just do a wholesale replacement than to try
+                * to update the chain and fixup the iroot fields.
                 */
                cluster = hammer2_cluster_from_chain(schain);
+               hammer2_inode_drop(spmp->iroot);
+               spmp->iroot = NULL;
                spmp->iroot = hammer2_inode_get(spmp, NULL, cluster);
                spmp->spmp_hmp = hmp;
                hammer2_inode_ref(spmp->iroot);
@@ -649,8 +825,7 @@ hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data,
                        error = hammer2_recovery(hmp);
                        /* XXX do something with error */
                }
-               ++hmp->pmp_count;
-
+               hammer2_update_pmps(hmp);
                hammer2_iocom_init(hmp);
 
                /*
@@ -665,11 +840,12 @@ hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data,
                }
        } else {
                spmp = hmp->spmp;
-               ++hmp->pmp_count;
        }
 
        /*
-        * Lookup mount point under the media-localized super-root.
+        * Lookup the mount point under the media-localized super-root.
+        * Scanning hammer2_pfslist doesn't help us because it represents
+        * PFS cluster ids which can aggregate several named PFSs together.
         *
         * cluster->pmp will incorrectly point to spmp and must be fixed
         * up later on.
@@ -691,156 +867,77 @@ hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data,
        }
        hammer2_inode_unlock_ex(spmp->iroot, cparent);
 
+       /*
+        * PFS could not be found?
+        */
        if (cluster == NULL) {
                kprintf("hammer2_mount: PFS label not found\n");
-               hammer2_vfs_unmount_hmp1(mp, hmp);
-               hammer2_vfs_unmount_hmp2(mp, hmp);
+               hammer2_unmount_helper(mp, NULL, hmp);
                lockmgr(&hammer2_mntlk, LK_RELEASE);
                hammer2_vfs_unmount(mp, MNT_FORCE);
-               return EINVAL;
-       }
 
-       for (i = 0; i < cluster->nchains; ++i) {
-               rchain = cluster->array[i].chain;
-               if (rchain->flags & HAMMER2_CHAIN_MOUNTED) {
-                       kprintf("hammer2_mount: PFS label already mounted!\n");
-                       hammer2_cluster_unlock(cluster);
-                       hammer2_vfs_unmount_hmp1(mp, hmp);
-                       hammer2_vfs_unmount_hmp2(mp, hmp);
-                       lockmgr(&hammer2_mntlk, LK_RELEASE);
-                       hammer2_vfs_unmount(mp, MNT_FORCE);
-                       return EBUSY;
-               }
-               KKASSERT(rchain->pmp == NULL);
-#if 0
-               if (rchain->flags & HAMMER2_CHAIN_RECYCLE) {
-                       kprintf("hammer2_mount: PFS label is recycling\n");
-                       hammer2_cluster_unlock(cluster);
-                       hammer2_vfs_unmount_hmp1(mp, hmp);
-                       hammer2_vfs_unmount_hmp2(mp, hmp);
-                       lockmgr(&hammer2_mntlk, LK_RELEASE);
-                       hammer2_vfs_unmount(mp, MNT_FORCE);
-                       return EBUSY;
-               }
-#endif
+               return EINVAL;
        }
 
        /*
-        * Check to see if the cluster id is already mounted at the mount
-        * point.  If it is, add us to the cluster.
+        * Acquire the pmp structure (it should have already been allocated
+        * via hammer2_update_pmps() so do not pass cluster in to add to
+        * available chains).
+        *
+        * Check if the cluster has already been mounted.  A cluster can
+        * only be mounted once, use null mounts to mount additional copies.
         */
        ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
        hammer2_cluster_bref(cluster, &bref);
-       TAILQ_FOREACH(pmp, &hammer2_pfslist, mntentry) {
-               if (pmp->spmp_hmp == NULL &&
-                   bcmp(&pmp->pfs_clid, &ripdata->pfs_clid,
-                        sizeof(pmp->pfs_clid)) == 0) {
-                       break;
-               }
-       }
-
-       if (pmp) {
-               int i;
-               int j;
-
-               /*
-                * Directly lock the inode->lock, do not run through
-                * hammer2_inode_lock*().
-                */
-               hammer2_inode_ref(pmp->iroot);
-               hammer2_mtx_ex(&pmp->iroot->lock);
-
-               if (pmp->iroot->cluster.nchains + cluster->nchains >
-                   HAMMER2_MAXCLUSTER) {
-                       kprintf("hammer2_mount: cluster full!\n");
-
-                       hammer2_mtx_unlock(&pmp->iroot->lock);
-                       hammer2_inode_drop(pmp->iroot);
+       pmp = hammer2_pfsalloc(NULL, ripdata, bref.mirror_tid);
+       hammer2_cluster_unlock(cluster);
 
-                       hammer2_cluster_unlock(cluster);
-                       hammer2_vfs_unmount_hmp1(mp, hmp);
-                       hammer2_vfs_unmount_hmp2(mp, hmp);
-                       lockmgr(&hammer2_mntlk, LK_RELEASE);
-                       hammer2_vfs_unmount(mp, MNT_FORCE);
-                       return EBUSY;
-               }
-               kprintf("hammer2_vfs_mount: Adding pfs to existing cluster\n");
-               j = pmp->iroot->cluster.nchains;
-               for (i = 0; i < cluster->nchains; ++i) {
-                       rchain = cluster->array[i].chain;
-                       KKASSERT(rchain->pmp == NULL);
-                       rchain->pmp = pmp;
-                       hammer2_chain_ref(rchain);
-                       pmp->iroot->cluster.array[j].chain = rchain;
-                       ++j;
-               }
-               pmp->iroot->cluster.nchains = j;
-               hammer2_mtx_unlock(&pmp->iroot->lock);
-               hammer2_inode_drop(pmp->iroot);
-               hammer2_cluster_unlock(cluster);
+       if (pmp->mp) {
+               kprintf("hammer2_mount: PFS already mounted!\n");
+               hammer2_unmount_helper(mp, NULL, hmp);
                lockmgr(&hammer2_mntlk, LK_RELEASE);
+               hammer2_vfs_unmount(mp, MNT_FORCE);
 
-               kprintf("ok\n");
-               hammer2_inode_install_hidden(pmp);
-
-               return ERANGE;
+               return EBUSY;
        }
 
        /*
-        * Block device opened successfully, finish initializing the
-        * mount structure.
-        *
-        * From this point on we have to call hammer2_unmount() on failure.
+        * Finish the mount
         */
-       pmp = hammer2_pfsalloc(ripdata, bref.mirror_tid);
-       kprintf("PMP mirror_tid is %016jx\n", bref.mirror_tid);
-       for (i = 0; i < cluster->nchains; ++i) {
-               rchain = cluster->array[i].chain;
-               KKASSERT(rchain->pmp == NULL);
-               rchain->pmp = pmp;
-               atomic_set_int(&rchain->flags, HAMMER2_CHAIN_MOUNTED);
-       }
-       cluster->pmp = pmp;
-
-       TAILQ_INSERT_TAIL(&hammer2_pfslist, pmp, mntentry);
-       lockmgr(&hammer2_mntlk, LK_RELEASE);
-
-       kprintf("hammer2_mount hmp=%p pmp=%p pmpcnt=%d\n",
-               hmp, pmp, hmp->pmp_count);
-
-       mp->mnt_flag = MNT_LOCAL;
-       mp->mnt_kern_flag |= MNTK_ALL_MPSAFE;   /* all entry pts are SMP */
-       mp->mnt_kern_flag |= MNTK_THR_SYNC;     /* new vsyncscan semantics */
+        kprintf("hammer2_mount hmp=%p pmp=%p\n", hmp, pmp);
+
+        mp->mnt_flag = MNT_LOCAL;
+        mp->mnt_kern_flag |= MNTK_ALL_MPSAFE;   /* all entry pts are SMP */
+        mp->mnt_kern_flag |= MNTK_THR_SYNC;     /* new vsyncscan semantics */
+        /*
+         * required mount structure initializations
+         */
+        mp->mnt_stat.f_iosize = HAMMER2_PBUFSIZE;
+        mp->mnt_stat.f_bsize = HAMMER2_PBUFSIZE;
+        mp->mnt_vstat.f_frsize = HAMMER2_PBUFSIZE;
+        mp->mnt_vstat.f_bsize = HAMMER2_PBUFSIZE;
+        /*
+         * Optional fields
+         */
+        mp->mnt_iosize_max = MAXPHYS;
 
        /*
-        * required mount structure initializations
+        * Connect up mount pointers.
         */
-       mp->mnt_stat.f_iosize = HAMMER2_PBUFSIZE;
-       mp->mnt_stat.f_bsize = HAMMER2_PBUFSIZE;
+       hammer2_mount_helper(mp, pmp);
 
-       mp->mnt_vstat.f_frsize = HAMMER2_PBUFSIZE;
-       mp->mnt_vstat.f_bsize = HAMMER2_PBUFSIZE;
+        lockmgr(&hammer2_mntlk, LK_RELEASE);
 
        /*
-        * Optional fields
-        */
-       mp->mnt_iosize_max = MAXPHYS;
-       mp->mnt_data = (qaddr_t)pmp;
-       pmp->mp = mp;
-
-       /*
-        * After this point hammer2_vfs_unmount() has visibility on hmp
-        * and manual hmp1/hmp2 calls are not needed on fatal errors.
-        */
-       pmp->iroot = hammer2_inode_get(pmp, NULL, cluster);
-       hammer2_inode_ref(pmp->iroot);          /* ref for pmp->iroot */
-       hammer2_inode_unlock_ex(pmp->iroot, cluster);
-
-       /*
-        * The logical file buffer bio write thread handles things
-        * like physical block assignment and compression.
+        * A mounted PFS needs a write thread for logical buffers and
+        * a hidden directory for deletions of open files.  These features
+        * are not used by unmounted PFSs.
         *
-        * (only applicable to pfs mounts, not applicable to spmp)
+        * The logical file buffer bio write thread handles things like
+        * physical block assignment and compression.
         */
        pmp->wthread_destroy = 0;
        lwkt_create(hammer2_write_thread, pmp,
@@ -875,6 +972,51 @@ hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data,
        return 0;
 }
 
+/*
+ * Scan PFSs under the super-root and create hammer2_pfs structures.
+ */
+static
+void
+hammer2_update_pmps(hammer2_dev_t *hmp)
+{
+       const hammer2_inode_data_t *ripdata;
+       hammer2_cluster_t *cparent;
+       hammer2_cluster_t *cluster;
+       hammer2_blockref_t bref;
+       hammer2_pfs_t *spmp;
+       hammer2_pfs_t *pmp;
+       hammer2_key_t key_next;
+       int ddflag;
+
+       /*
+        * Lookup mount point under the media-localized super-root.
+        *
+        * cluster->pmp will incorrectly point to spmp and must be fixed
+        * up later on.
+        */
+       spmp = hmp->spmp;
+       cparent = hammer2_inode_lock_ex(spmp->iroot);
+       cluster = hammer2_cluster_lookup(cparent, &key_next,
+                                        HAMMER2_KEY_MIN,
+                                        HAMMER2_KEY_MAX,
+                                        0, &ddflag);
+       while (cluster) {
+               if (hammer2_cluster_type(cluster) != HAMMER2_BREF_TYPE_INODE)
+                       continue;
+               ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
+               hammer2_cluster_bref(cluster, &bref);
+               kprintf("ADD LOCAL PFS: %s\n", ripdata->filename);
+
+               pmp = hammer2_pfsalloc(cluster, ripdata, bref.mirror_tid);
+               cluster = hammer2_cluster_next(cparent, cluster,
+                                              &key_next,
+                                              key_next,
+                                              HAMMER2_KEY_MAX,
+                                              0);
+       }
+       hammer2_inode_unlock_ex(spmp->iroot, cparent);
+}
+
 /*
  * Handle bioq for strategy write
  */
@@ -882,7 +1024,7 @@ static
 void
 hammer2_write_thread(void *arg)
 {
-       hammer2_pfsmount_t *pmp;
+       hammer2_pfs_t *pmp;
        struct bio *bio;
        struct buf *bp;
        hammer2_trans_t trans;
@@ -975,7 +1117,7 @@ hammer2_write_thread(void *arg)
 }
 
 void
-hammer2_bioq_sync(hammer2_pfsmount_t *pmp)
+hammer2_bioq_sync(hammer2_pfs_t *pmp)
 {
        struct bio sync_bio;
 
@@ -1552,7 +1694,7 @@ hammer2_write_bp(hammer2_cluster_t *cluster, struct buf *bp, int ioflag,
 
 static
 int
-hammer2_remount(hammer2_mount_t *hmp, struct mount *mp, char *path,
+hammer2_remount(hammer2_dev_t *hmp, struct mount *mp, char *path,
                struct vnode *devvp, struct ucred *cred)
 {
        int error;
@@ -1569,13 +1711,9 @@ static
 int
 hammer2_vfs_unmount(struct mount *mp, int mntflags)
 {
-       hammer2_pfsmount_t *pmp;
-       hammer2_mount_t *hmp;
-       hammer2_chain_t *rchain;
-       hammer2_cluster_t *cluster;
+       hammer2_pfs_t *pmp;
        int flags;
        int error = 0;
-       int i;
 
        pmp = MPTOPMP(mp);
 
@@ -1583,11 +1721,12 @@ hammer2_vfs_unmount(struct mount *mp, int mntflags)
                return(0);
 
        lockmgr(&hammer2_mntlk, LK_EXCLUSIVE);
-       TAILQ_REMOVE(&hammer2_pfslist, pmp, mntentry);
 
        /*
         * If mount initialization proceeded far enough we must flush
-        * its vnodes.
+        * its vnodes and sync the underlying mount points.  Three syncs
+        * are required to fully flush the filesystem (freemap updates lag
+        * by one flush, and one extra for safety).
         */
        if (mntflags & MNT_FORCE)
                flags = FORCECLOSE;
@@ -1597,6 +1736,9 @@ hammer2_vfs_unmount(struct mount *mp, int mntflags)
                error = vflush(mp, 0, flags);
                if (error)
                        goto failed;
+               hammer2_vfs_sync(mp, MNT_WAIT);
+               hammer2_vfs_sync(mp, MNT_WAIT);
+               hammer2_vfs_sync(mp, MNT_WAIT);
        }
 
        if (pmp->wthread_td) {
@@ -1619,69 +1761,110 @@ hammer2_vfs_unmount(struct mount *mp, int mntflags)
                hammer2_inode_drop(pmp->ihidden);
                pmp->ihidden = NULL;
        }
+       if (pmp->mp)
+               hammer2_unmount_helper(mp, pmp, NULL);
 
-       /*
-        * Cleanup our reference on iroot.  iroot is (should) not be needed
-        * by the flush code.
-        */
-       if (pmp->iroot) {
-               cluster = &pmp->iroot->cluster;
-               for (i = 0; i < pmp->iroot->cluster.nchains; ++i) {
-                       rchain = pmp->iroot->cluster.array[i].chain;
-                       if (rchain == NULL)
-                               continue;
-                       hmp = rchain->hmp;
-                       hammer2_vfs_unmount_hmp1(mp, hmp);
-
-                       atomic_clear_int(&rchain->flags, HAMMER2_CHAIN_MOUNTED);
-#if REPORT_REFS_ERRORS
-                       if (rchain->refs != 1)
-                               kprintf("PMP->RCHAIN %p REFS WRONG %d\n",
-                                       rchain, rchain->refs);
-#else
-                       KKASSERT(rchain->refs == 1);
-#endif
-                       hammer2_chain_drop(rchain);
-                       cluster->array[i].chain = NULL;
-                       hammer2_vfs_unmount_hmp2(mp, hmp);
-               }
-               cluster->focus = NULL;
-
-#if REPORT_REFS_ERRORS
-               if (pmp->iroot->refs != 1)
-                       kprintf("PMP->IROOT %p REFS WRONG %d\n",
-                               pmp->iroot, pmp->iroot->refs);
-#else
-               KKASSERT(pmp->iroot->refs == 1);
-#endif
-               /* ref for pmp->iroot */
-               hammer2_inode_drop(pmp->iroot);
-               pmp->iroot = NULL;
-       }
-
-       pmp->mp = NULL;
-       mp->mnt_data = NULL;
-
-       kmalloc_destroy(&pmp->mmsg);
-       kmalloc_destroy(&pmp->minode);
-
-       kfree(pmp, M_HAMMER2);
        error = 0;
-
 failed:
        lockmgr(&hammer2_mntlk, LK_RELEASE);
 
        return (error);
 }
 
+/*
+ * Mount helper, hook the system mount into our PFS.
+ * The mount lock is held.
+ *
+ * We must bump the pmp_count on related devices for any
+ * mounted PFSs.
+ */
 static
 void
-hammer2_vfs_unmount_hmp1(struct mount *mp, hammer2_mount_t *hmp)
+hammer2_mount_helper(struct mount *mp, hammer2_pfs_t *pmp)
 {
-       hammer2_mount_exlock(hmp);
-       --hmp->pmp_count;
+       hammer2_cluster_t *cluster;
+       hammer2_chain_t *rchain;
+       int i;
+
+        mp->mnt_data = (qaddr_t)pmp;
+       pmp->mp = mp;
 
-       kprintf("hammer2_unmount hmp=%p pmpcnt=%d\n", hmp, hmp->pmp_count);
+       cluster = &pmp->iroot->cluster;
+       for (i = 0; i < cluster->nchains; ++i) {
+               rchain = cluster->array[i].chain;
+               if (rchain == NULL)
+                       continue;
+               ++rchain->hmp->pmp_count;
+               kprintf("hammer2_mount hmp=%p ++pmp_count=%d\n",
+                       rchain->hmp, rchain->hmp->pmp_count);
+       }
+}
+
+/*
+ * Mount helper, unhook the system mount from our PFS.
+ * The mount lock is held.
+ *
+ * If hmp is supplied a mount responsible for being the first to open
+ * the block device failed and the block device and all PFSs using the
+ * block device must be cleaned up.
+ *
+ * If pmp is supplied multiple devices might be backing the PFS and each
+ * must be disconnect.  This might not be the last PFS using some of the
+ * underlying devices.  Also, we have to adjust our hmp->pmp_count accounting
+ * for the devices backing the pmp which is now undergoing an unmount.
+ */
+static
+void
+hammer2_unmount_helper(struct mount *mp, hammer2_pfs_t *pmp, hammer2_dev_t *hmp)
+{
+       hammer2_cluster_t *cluster;
+       hammer2_chain_t *rchain;
+       struct vnode *devvp;
+       int dumpcnt;
+       int ronly = 0;
+       int i;
+
+       /*
+        * If no device supplied this is a high-level unmount and we have to
+        * to disconnect the mount, adjust pmp_count, and locate devices that
+        * might now have no mounts.
+        */
+       if (pmp) {
+               KKASSERT(hmp == NULL);
+               KKASSERT((void *)(intptr_t)mp->mnt_data == pmp);
+               pmp->mp = NULL;
+               mp->mnt_data = NULL;
+
+               cluster = &pmp->iroot->cluster;
+               for (i = 0; i < cluster->nchains; ++i) {
+                       rchain = cluster->array[i].chain;
+                       if (rchain == NULL)
+                               continue;
+                       --rchain->hmp->pmp_count;
+                       kprintf("hammer2_unmount hmp=%p --pmp_count=%d\n",
+                               rchain->hmp, rchain->hmp->pmp_count);
+                       /* scrapping hmp now may invalidate the pmp */
+               }
+again:
+               TAILQ_FOREACH(hmp, &hammer2_mntlist, mntentry) {
+                       if (hmp->pmp_count == 0) {
+                               hammer2_unmount_helper(NULL, NULL, hmp);
+                               goto again;
+                       }
+               }
+               return;
+       }
+
+       /*
+        * Try to terminate the block device.  We can't terminate it if
+        * there are still PFSs referencing it.
+        */
+       kprintf("hammer2_unmount hmp=%p pmp_count=%d\n", hmp, hmp->pmp_count);
+       if (hmp->pmp_count)
+               return;
+
+       hammer2_pfsfree_scan(hmp);
+       hammer2_dev_exlock(hmp);        /* XXX order */
 
        /*
         * Cycle the volume data lock as a safety (probably not needed any
@@ -1693,136 +1876,96 @@ hammer2_vfs_unmount_hmp1(struct mount *mp, hammer2_mount_t *hmp)
         * The next mount's recovery scan can clean everything up but we want
         * to leave the filesystem in a 100% clean state on a normal unmount.
         */
+#if 0
        hammer2_voldata_lock(hmp);
        hammer2_voldata_unlock(hmp);
-       if (mp->mnt_data) {
-               hammer2_vfs_sync(mp, MNT_WAIT);
-               hammer2_vfs_sync(mp, MNT_WAIT);
-               hammer2_vfs_sync(mp, MNT_WAIT);
-       }
-
-       /*
-        * XXX chain depend deadlock?
-        */
+#endif
        hammer2_iocom_uninit(hmp);
 
-       if (hmp->pmp_count == 0) {
-               if ((hmp->vchain.flags | hmp->fchain.flags) &
-                   HAMMER2_CHAIN_FLUSH_MASK) {
-                       kprintf("hammer2_unmount: chains left over "
-                               "after final sync\n");
-                       kprintf("    vchain %08x\n", hmp->vchain.flags);
-                       kprintf("    fchain %08x\n", hmp->fchain.flags);
+       if ((hmp->vchain.flags | hmp->fchain.flags) &
+           HAMMER2_CHAIN_FLUSH_MASK) {
+               kprintf("hammer2_unmount: chains left over "
+                       "after final sync\n");
+               kprintf("    vchain %08x\n", hmp->vchain.flags);
+               kprintf("    fchain %08x\n", hmp->fchain.flags);
 
-                       if (hammer2_debug & 0x0010)
-                               Debugger("entered debugger");
-               }
+               if (hammer2_debug & 0x0010)
+                       Debugger("entered debugger");
        }
-}
 
-static
-void
-hammer2_vfs_unmount_hmp2(struct mount *mp, hammer2_mount_t *hmp)
-{
-       hammer2_pfsmount_t *spmp;
-       struct vnode *devvp;
-       int dumpcnt;
-       int ronly = ((mp->mnt_flag & MNT_RDONLY) != 0);
+       KKASSERT(hmp->spmp == NULL);
 
        /*
-        * If no PFS's left drop the master hammer2_mount for the
-        * device.
+        * Finish up with the device vnode
         */
-       if (hmp->pmp_count == 0) {
-               /*
-                * Clean up SPMP and the super-root inode
-                */
-               spmp = hmp->spmp;
-               if (spmp) {
-                       if (spmp->iroot) {
-                               hammer2_inode_drop(spmp->iroot);
-                               spmp->iroot = NULL;
-                       }
-                       hmp->spmp = NULL;
-                       kmalloc_destroy(&spmp->mmsg);
-                       kmalloc_destroy(&spmp->minode);
-                       kfree(spmp, M_HAMMER2);
-               }
-
-               /*
-                * Finish up with the device vnode
-                */
-               if ((devvp = hmp->devvp) != NULL) {
-                       vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
-                       vinvalbuf(devvp, (ronly ? 0 : V_SAVE), 0, 0);
-                       hmp->devvp = NULL;
-                       VOP_CLOSE(devvp, (ronly ? FREAD : FREAD|FWRITE), NULL);
-                       vn_unlock(devvp);
-                       vrele(devvp);
-                       devvp = NULL;
-               }
-
-               /*
-                * Clear vchain/fchain flags that might prevent final cleanup
-                * of these chains.
-                */
-               if (hmp->vchain.flags & HAMMER2_CHAIN_MODIFIED) {
-                       atomic_clear_int(&hmp->vchain.flags,
-                                        HAMMER2_CHAIN_MODIFIED);
-                       hammer2_pfs_memory_wakeup(hmp->vchain.pmp);
-                       hammer2_chain_drop(&hmp->vchain);
-               }
-               if (hmp->vchain.flags & HAMMER2_CHAIN_UPDATE) {
-                       atomic_clear_int(&hmp->vchain.flags,
-                                        HAMMER2_CHAIN_UPDATE);
-                       hammer2_chain_drop(&hmp->vchain);
-               }
+       if ((devvp = hmp->devvp) != NULL) {
+               vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
+               vinvalbuf(devvp, (ronly ? 0 : V_SAVE), 0, 0);
+               hmp->devvp = NULL;
+               VOP_CLOSE(devvp, (ronly ? FREAD : FREAD|FWRITE), NULL);
+               vn_unlock(devvp);
+               vrele(devvp);
+               devvp = NULL;
+       }
 
-               if (hmp->fchain.flags & HAMMER2_CHAIN_MODIFIED) {
-                       atomic_clear_int(&hmp->fchain.flags,
-                                        HAMMER2_CHAIN_MODIFIED);
-                       hammer2_pfs_memory_wakeup(hmp->fchain.pmp);
-                       hammer2_chain_drop(&hmp->fchain);
-               }
-               if (hmp->fchain.flags & HAMMER2_CHAIN_UPDATE) {
-                       atomic_clear_int(&hmp->fchain.flags,
-                                        HAMMER2_CHAIN_UPDATE);
-                       hammer2_chain_drop(&hmp->fchain);
-               }
+       /*
+        * Clear vchain/fchain flags that might prevent final cleanup
+        * of these chains.
+        */
+       if (hmp->vchain.flags & HAMMER2_CHAIN_MODIFIED) {
+               atomic_clear_int(&hmp->vchain.flags,
+                                HAMMER2_CHAIN_MODIFIED);
+               hammer2_pfs_memory_wakeup(hmp->vchain.pmp);
+               hammer2_chain_drop(&hmp->vchain);
+       }
+       if (hmp->vchain.flags & HAMMER2_CHAIN_UPDATE) {
+               atomic_clear_int(&hmp->vchain.flags,
+                                HAMMER2_CHAIN_UPDATE);
+               hammer2_chain_drop(&hmp->vchain);
+       }
 
-               /*
-                * Final drop of embedded freemap root chain to
-                * clean up fchain.core (fchain structure is not
-                * flagged ALLOCATED so it is cleaned out and then
-                * left to rot).
-                */
+       if (hmp->fchain.flags & HAMMER2_CHAIN_MODIFIED) {
+               atomic_clear_int(&hmp->fchain.flags,
+                                HAMMER2_CHAIN_MODIFIED);
+               hammer2_pfs_memory_wakeup(hmp->fchain.pmp);
                hammer2_chain_drop(&hmp->fchain);
+       }
+       if (hmp->fchain.flags & HAMMER2_CHAIN_UPDATE) {
+               atomic_clear_int(&hmp->fchain.flags,
+                                HAMMER2_CHAIN_UPDATE);
+               hammer2_chain_drop(&hmp->fchain);
+       }
 
-               /*
-                * Final drop of embedded volume root chain to clean
-                * up vchain.core (vchain structure is not flagged
-                * ALLOCATED so it is cleaned out and then left to
-                * rot).
-                */
-               dumpcnt = 50;
-               hammer2_dump_chain(&hmp->vchain, 0, &dumpcnt, 'v');
-               dumpcnt = 50;
-               hammer2_dump_chain(&hmp->fchain, 0, &dumpcnt, 'f');
-               hammer2_mount_unlock(hmp);
-               hammer2_chain_drop(&hmp->vchain);
-
-               hammer2_io_cleanup(hmp, &hmp->iotree);
-               if (hmp->iofree_count) {
-                       kprintf("io_cleanup: %d I/O's left hanging\n",
-                               hmp->iofree_count);
-               }
+       /*
+        * Final drop of embedded freemap root chain to
+        * clean up fchain.core (fchain structure is not
+        * flagged ALLOCATED so it is cleaned out and then
+        * left to rot).
+        */
+       hammer2_chain_drop(&hmp->fchain);
 
-               TAILQ_REMOVE(&hammer2_mntlist, hmp, mntentry);
-               kmalloc_destroy(&hmp->mchain);
-               kfree(hmp, M_HAMMER2);
-       } else {
-               hammer2_mount_unlock(hmp);
+       /*
+        * Final drop of embedded volume root chain to clean
+        * up vchain.core (vchain structure is not flagged
+        * ALLOCATED so it is cleaned out and then left to
+        * rot).
+        */
+       dumpcnt = 50;
+       hammer2_dump_chain(&hmp->vchain, 0, &dumpcnt, 'v');
+       dumpcnt = 50;
+       hammer2_dump_chain(&hmp->fchain, 0, &dumpcnt, 'f');
+       hammer2_dev_unlock(hmp);
+       hammer2_chain_drop(&hmp->vchain);
+
+       hammer2_io_cleanup(hmp, &hmp->iotree);
+       if (hmp->iofree_count) {
+               kprintf("io_cleanup: %d I/O's left hanging\n",
+                       hmp->iofree_count);
        }
+
+       TAILQ_REMOVE(&hammer2_mntlist, hmp, mntentry);
+       kmalloc_destroy(&hmp->mchain);
+       kfree(hmp, M_HAMMER2);
 }
 
 static
@@ -1838,7 +1981,7 @@ static
 int
 hammer2_vfs_root(struct mount *mp, struct vnode **vpp)
 {
-       hammer2_pfsmount_t *pmp;
+       hammer2_pfs_t *pmp;
        hammer2_cluster_t *cparent;
        int error;
        struct vnode *vp;
@@ -1868,8 +2011,8 @@ static
 int
 hammer2_vfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred)
 {
-       hammer2_pfsmount_t *pmp;
-       hammer2_mount_t *hmp;
+       hammer2_pfs_t *pmp;
+       hammer2_dev_t *hmp;
 
        pmp = MPTOPMP(mp);
        KKASSERT(pmp->iroot->cluster.nchains >= 1);
@@ -1889,8 +2032,8 @@ static
 int
 hammer2_vfs_statvfs(struct mount *mp, struct statvfs *sbp, struct ucred *cred)
 {
-       hammer2_pfsmount_t *pmp;
-       hammer2_mount_t *hmp;
+       hammer2_pfs_t *pmp;
+       hammer2_dev_t *hmp;
 
        pmp = MPTOPMP(mp);
        KKASSERT(pmp->iroot->cluster.nchains >= 1);
@@ -1931,7 +2074,7 @@ struct hammer2_recovery_info {
        int     depth;
 };
 
-static int hammer2_recovery_scan(hammer2_trans_t *trans, hammer2_mount_t *hmp,
+static int hammer2_recovery_scan(hammer2_trans_t *trans, hammer2_dev_t *hmp,
                        hammer2_chain_t *parent,
                        struct hammer2_recovery_info *info,
                        hammer2_tid_t sync_tid);
@@ -1940,7 +2083,7 @@ static int hammer2_recovery_scan(hammer2_trans_t *trans, hammer2_mount_t *hmp,
 
 static
 int
-hammer2_recovery(hammer2_mount_t *hmp)
+hammer2_recovery(hammer2_dev_t *hmp)
 {
        hammer2_trans_t trans;
        struct hammer2_recovery_info info;
@@ -1981,7 +2124,7 @@ hammer2_recovery(hammer2_mount_t *hmp)
 
 static
 int
-hammer2_recovery_scan(hammer2_trans_t *trans, hammer2_mount_t *hmp,
+hammer2_recovery_scan(hammer2_trans_t *trans, hammer2_dev_t *hmp,
                      hammer2_chain_t *parent,
                      struct hammer2_recovery_info *info,
                      hammer2_tid_t sync_tid)
@@ -2111,8 +2254,8 @@ hammer2_vfs_sync(struct mount *mp, int waitfor)
        hammer2_inode_t *iroot;
        hammer2_chain_t *chain;
        hammer2_chain_t *parent;
-       hammer2_pfsmount_t *pmp;
-       hammer2_mount_t *hmp;
+       hammer2_pfs_t *pmp;
+       hammer2_dev_t *hmp;
        int flags;
        int error;
        int total_error;
@@ -2449,7 +2592,7 @@ hammer2_vfs_checkexp(struct mount *mp, struct sockaddr *nam,
  */
 static
 int
-hammer2_install_volume_header(hammer2_mount_t *hmp)
+hammer2_install_volume_header(hammer2_dev_t *hmp)
 {
        hammer2_volume_data_t *vd;
        struct buf *bp;
@@ -2543,13 +2686,13 @@ hammer2_install_volume_header(hammer2_mount_t *hmp)
  * This is a particular problem when compression is used.
  */
 void
-hammer2_lwinprog_ref(hammer2_pfsmount_t *pmp)
+hammer2_lwinprog_ref(hammer2_pfs_t *pmp)
 {
        atomic_add_int(&pmp->count_lwinprog, 1);
 }
 
 void
-hammer2_lwinprog_drop(hammer2_pfsmount_t *pmp)
+hammer2_lwinprog_drop(hammer2_pfs_t *pmp)
 {
        int lwinprog;
 
@@ -2563,7 +2706,7 @@ hammer2_lwinprog_drop(hammer2_pfsmount_t *pmp)
 }
 
 void
-hammer2_lwinprog_wait(hammer2_pfsmount_t *pmp)
+hammer2_lwinprog_wait(hammer2_pfs_t *pmp)
 {
        int lwinprog;
 
@@ -2586,7 +2729,7 @@ hammer2_lwinprog_wait(hammer2_pfsmount_t *pmp)
  * structures.
  */
 void
-hammer2_pfs_memory_wait(hammer2_pfsmount_t *pmp)
+hammer2_pfs_memory_wait(hammer2_pfs_t *pmp)
 {
        uint32_t waiting;
        uint32_t count;
@@ -2643,7 +2786,7 @@ hammer2_pfs_memory_wait(hammer2_pfsmount_t *pmp)
 }
 
 void
-hammer2_pfs_memory_inc(hammer2_pfsmount_t *pmp)
+hammer2_pfs_memory_inc(hammer2_pfs_t *pmp)
 {
        if (pmp) {
                atomic_add_int(&pmp->inmem_dirty_chains, 1);
@@ -2651,7 +2794,7 @@ hammer2_pfs_memory_inc(hammer2_pfsmount_t *pmp)
 }
 
 void
-hammer2_pfs_memory_wakeup(hammer2_pfsmount_t *pmp)
+hammer2_pfs_memory_wakeup(hammer2_pfs_t *pmp)
 {
        uint32_t waiting;
 
index ec6de17..e3fe502 100644 (file)
@@ -240,7 +240,7 @@ hammer2_vop_reclaim(struct vop_reclaim_args *ap)
 {
        hammer2_cluster_t *cluster;
        hammer2_inode_t *ip;
-       hammer2_pfsmount_t *pmp;
+       hammer2_pfs_t *pmp;
        struct vnode *vp;
 
        LOCKSTART;
@@ -388,7 +388,7 @@ hammer2_vop_getattr(struct vop_getattr_args *ap)
 {
        const hammer2_inode_data_t *ripdata;
        hammer2_cluster_t *cluster;
-       hammer2_pfsmount_t *pmp;
+       hammer2_pfs_t *pmp;
        hammer2_inode_t *ip;
        struct vnode *vp;
        struct vattr *vap;
@@ -1430,7 +1430,8 @@ hammer2_vop_nmkdir(struct vop_nmkdir_args *ap)
        hammer2_pfs_memory_wait(dip->pmp);
        hammer2_trans_init(&trans, dip->pmp, HAMMER2_TRANS_NEWINODE);
        nip = hammer2_inode_create(&trans, dip, ap->a_vap, ap->a_cred,
-                                  name, name_len, &cluster, &error);
+                                  name, name_len,
+                                  &cluster, 0, &error);
        if (error) {
                KKASSERT(nip == NULL);
                *ap->a_vpp = NULL;
@@ -1626,7 +1627,8 @@ hammer2_vop_ncreate(struct vop_ncreate_args *ap)
        ncluster = NULL;
 
        nip = hammer2_inode_create(&trans, dip, ap->a_vap, ap->a_cred,
-                                  name, name_len, &ncluster, &error);
+                                  name, name_len,
+                                  &ncluster, 0, &error);
        if (error) {
                KKASSERT(nip == NULL);
                *ap->a_vpp = NULL;
@@ -1675,7 +1677,8 @@ hammer2_vop_nmknod(struct vop_nmknod_args *ap)
        ncluster = NULL;
 
        nip = hammer2_inode_create(&trans, dip, ap->a_vap, ap->a_cred,
-                                  name, name_len, &ncluster, &error);
+                                  name, name_len,
+                                  &ncluster, 0, &error);
        if (error) {
                KKASSERT(nip == NULL);
                *ap->a_vpp = NULL;
@@ -1723,7 +1726,8 @@ hammer2_vop_nsymlink(struct vop_nsymlink_args *ap)
        ap->a_vap->va_type = VLNK;      /* enforce type */
 
        nip = hammer2_inode_create(&trans, dip, ap->a_vap, ap->a_cred,
-                                  name, name_len, &ncparent, &error);
+                                  name, name_len,
+                                  &ncparent, 0, &error);
        if (error) {
                KKASSERT(nip == NULL);
                *ap->a_vpp = NULL;
@@ -2317,7 +2321,7 @@ static
 int
 hammer2_strategy_write(struct vop_strategy_args *ap)
 {      
-       hammer2_pfsmount_t *pmp;
+       hammer2_pfs_t *pmp;
        struct bio *bio;
        struct buf *bp;
        hammer2_inode_t *ip;
@@ -2366,7 +2370,7 @@ int
 hammer2_vop_mountctl(struct vop_mountctl_args *ap)
 {
        struct mount *mp;
-       hammer2_pfsmount_t *pmp;
+       hammer2_pfs_t *pmp;
        int rc;
 
        LOCKSTART;
@@ -2396,7 +2400,7 @@ hammer2_vop_mountctl(struct vop_mountctl_args *ap)
  * flush, and (3) on umount.
  */
 void
-hammer2_run_unlinkq(hammer2_trans_t *trans, hammer2_pfsmount_t *pmp)
+hammer2_run_unlinkq(hammer2_trans_t *trans, hammer2_pfs_t *pmp)
 {
        const hammer2_inode_data_t *ripdata;
        hammer2_inode_unlink_t *ipul;