hammer2 - Start adding internal cluster API
authorMatthew Dillon <dillon@apollo.backplane.com>
Tue, 18 Mar 2014 16:35:21 +0000 (09:35 -0700)
committerMatthew Dillon <dillon@apollo.backplane.com>
Tue, 18 Mar 2014 16:35:21 +0000 (09:35 -0700)
Initial addition of the cluster API.  H2 will be non-operational until this
is stabilized.  Adding the cluster API will require a few stages.  This first
stage is to add the API and make it work under degenerate (single-target)
conditions.

The hammer2_cluster structure collects and manages an array of up to
8 chains representing mirrors / cluster nodes / copies of the same point
in the topology.

* Add hammer2_cluster.c, and hammer2_cluster_t

* Replace nearly all high-level (vnops, vfsops) chain calls with
  cluster calls.

13 files changed:
sys/vfs/hammer2/Makefile
sys/vfs/hammer2/TODO
sys/vfs/hammer2/hammer2.h
sys/vfs/hammer2/hammer2_chain.c
sys/vfs/hammer2/hammer2_cluster.c [new file with mode: 0644]
sys/vfs/hammer2/hammer2_flush.c
sys/vfs/hammer2/hammer2_freemap.c
sys/vfs/hammer2/hammer2_inode.c
sys/vfs/hammer2/hammer2_io.c
sys/vfs/hammer2/hammer2_ioctl.c
sys/vfs/hammer2/hammer2_subr.c
sys/vfs/hammer2/hammer2_vfsops.c
sys/vfs/hammer2/hammer2_vnops.c

index 6bc7265..402664a 100644 (file)
@@ -6,7 +6,7 @@
 CFLAGS+= -DINVARIANTS -DSMP
 KMOD=  hammer2
 SRCS=  hammer2_vfsops.c hammer2_vnops.c hammer2_inode.c hammer2_ccms.c
-SRCS+= hammer2_chain.c hammer2_flush.c hammer2_freemap.c
+SRCS+= hammer2_chain.c hammer2_flush.c hammer2_freemap.c hammer2_cluster.c
 SRCS+= hammer2_ioctl.c hammer2_msgops.c hammer2_subr.c
 SRCS+=  hammer2_lz4.c hammer2_io.c
 SRCS+=  hammer2_zlib_adler32.c hammer2_zlib_deflate.c
index 089dcb1..e597f60 100644 (file)
@@ -1,4 +1,13 @@
 
+* transaction on cluster - multiple trans structures, subtrans
+
+* inode always contains target cluster/chain, not hardlink
+
+* cluster_modify_ip -> data returned mod to all chains
+* and hammer2_cluster_data() -> same thing
+
+* chain refs in cluster, cluster refs
+
 * check inode shared lock ... can end up in endless loop if following
   hardlink because ip->chain is not updated in the exclusive lock cycle
   when following hardlink.
index ce40e67..d6920e3 100644 (file)
  */
 
 /*
+ * HAMMER2 IN-MEMORY CACHE OF MEDIA STRUCTURES
+ *
  * This header file contains structures used internally by the HAMMER2
  * implementation.  See hammer2_disk.h for on-disk structures.
+ *
+ * There is an in-memory representation of all on-media data structure.
+ * Basically everything is represented by a hammer2_chain structure
+ * in-memory and other higher-level structures map to chains.
+ *
+ * A great deal of data is accessed simply via its buffer cache buffer,
+ * which is mapped for the duration of the chain's lock.  However, because
+ * chains may represent blocks smaller than the 16KB minimum we impose
+ * on buffer cache buffers, we cannot hold related buffer cache buffers
+ * locked for smaller blocks.  In these situations we kmalloc() a copy
+ * of the block.
+ *
+ * When modifications are made to a chain a new filesystem block must be
+ * allocated.  Multiple modifications do not necessarily allocate new
+ * blocks.  However, when a flush occurs a flush synchronization point
+ * is created and any new modifications made after this point will allocate
+ * a new block even if the chain is already in a modified state.
+ *
+ * The in-memory representation may remain cached (for example in order to
+ * placemark clustering locks) even after the related data has been
+ * detached.
+ *
+ *                             CORE SHARING
+ *
+ * In order to support concurrent flushes a flush synchronization point
+ * is created represented by a transaction id.  Among other things,
+ * operations may move filesystem objects from one part of the topology
+ * to another (for example, if you rename a file or when indirect blocks
+ * are created or destroyed, and a few other things).  When this occurs
+ * across a flush synchronization point the flusher needs to be able to
+ * recurse down BOTH the 'before' version of the topology and the 'after'
+ * version.
+ *
+ * To facilitate this modifications to chains do what is called a
+ * DELETE-DUPLICATE operation.  Chains are not actually moved in-memory.
+ * Instead the chain we wish to move is deleted and a new chain is created
+ * at the target location in the topology.  ANY SUBCHAINS PLACED UNDER THE
+ * CHAIN BEING MOVED HAVE TO EXIST IN BOTH PLACES.  To make this work
+ * all sub-chains are managed by the hammer2_chain_core structure.  This
+ * structure can be multi-homed, meaning that it can have more than one
+ * chain as its parent.  When a chain is delete-duplicated the chain's core
+ * becomes shared under both the old and new chain.
+ *
+ *                             STALE CHAINS
+ *
+ * When a chain is delete-duplicated the old chain typically becomes stale.
+ * This is detected via the HAMMER2_CHAIN_DUPLICATED flag in chain->flags.
+ * To avoid executing live filesystem operations on stale chains, the inode
+ * locking code will follow stale chains via core->ownerq until it finds
+ * the live chain.  The lock prevents ripups by other threads.  Lookups
+ * must properly order locking operations to prevent other threads from
+ * racing the lookup operation and will also follow stale chains when
+ * required.
  */
 
 #ifndef _VFS_HAMMER2_HAMMER2_H_
 #include <sys/buf.h>
 #include <sys/queue.h>
 #include <sys/limits.h>
-#include <sys/buf2.h>
 #include <sys/signal2.h>
 #include <sys/dmsg.h>
 #include <sys/mutex.h>
+#include <sys/kern_syscall.h>
+
+#include <sys/buf2.h>
 #include <sys/mutex2.h>
 
 #include "hammer2_disk.h"
 #include "hammer2_ccms.h"
 
 struct hammer2_chain;
+struct hammer2_cluster;
 struct hammer2_inode;
 struct hammer2_mount;
 struct hammer2_pfsmount;
@@ -179,8 +237,10 @@ struct hammer2_io {
        off_t           pbase;
        int             psize;
        void            (*callback)(struct hammer2_io *dio,
+                                   struct hammer2_cluster *cluster,
                                    struct hammer2_chain *chain,
                                    void *arg1, off_t arg2);
+       struct hammer2_cluster *arg_l;          /* INPROG I/O only */
        struct hammer2_chain *arg_c;            /* INPROG I/O only */
        void            *arg_p;                 /* INPROG I/O only */
        off_t           arg_o;                  /* INPROG I/O only */
@@ -352,61 +412,41 @@ RB_PROTOTYPE(hammer2_chain_tree, hammer2_chain, rbnode, hammer2_chain_cmp);
 #define HAMMER2_FREEMAP_DOREALFREE     3
 
 /*
- * HAMMER2 IN-MEMORY CACHE OF MEDIA STRUCTURES
- *
- * There is an in-memory representation of all on-media data structure.
- * Basically everything is represented by a hammer2_chain structure
- * in-memory and other higher-level structures map to chains.
- *
- * A great deal of data is accessed simply via its buffer cache buffer,
- * which is mapped for the duration of the chain's lock.  However, because
- * chains may represent blocks smaller than the 16KB minimum we impose
- * on buffer cache buffers, we cannot hold related buffer cache buffers
- * locked for smaller blocks.  In these situations we kmalloc() a copy
- * of the block.
+ * HAMMER2 cluster - A set of chains representing the same entity.
  *
- * When modifications are made to a chain a new filesystem block must be
- * allocated.  Multiple modifications do not necessarily allocate new
- * blocks.  However, when a flush occurs a flush synchronization point
- * is created and any new modifications made after this point will allocate
- * a new block even if the chain is already in a modified state.
- *
- * The in-memory representation may remain cached (for example in order to
- * placemark clustering locks) even after the related data has been
- * detached.
+ * The hammer2_pfsmount structure embeds a hammer2_cluster.  All other
+ * hammer2_cluster use cases use temporary allocations.
  *
- *                             CORE SHARING
+ * The cluster API mimics the chain API.  Except as used in the pfsmount,
+ * the cluster structure is a temporary 'working copy' of a set of chains
+ * representing targets compatible with the operation.  However, for
+ * performance reasons the cluster API does not necessarily issue concurrent
+ * requests to the underlying chain API for all compatible chains all the
+ * time.  This may sometimes necessitate revisiting parent cluster nodes
+ * to 'flesh out' (validate more chains).
  *
- * In order to support concurrent flushes a flush synchronization point
- * is created represented by a transaction id.  Among other things,
- * operations may move filesystem objects from one part of the topology
- * to another (for example, if you rename a file or when indirect blocks
- * are created or destroyed, and a few other things).  When this occurs
- * across a flush synchronization point the flusher needs to be able to
- * recurse down BOTH the 'before' version of the topology and the 'after'
- * version.
- *
- * To facilitate this modifications to chains do what is called a
- * DELETE-DUPLICATE operation.  Chains are not actually moved in-memory.
- * Instead the chain we wish to move is deleted and a new chain is created
- * at the target location in the topology.  ANY SUBCHAINS PLACED UNDER THE
- * CHAIN BEING MOVED HAVE TO EXIST IN BOTH PLACES.  To make this work
- * all sub-chains are managed by the hammer2_chain_core structure.  This
- * structure can be multi-homed, meaning that it can have more than one
- * chain as its parent.  When a chain is delete-duplicated the chain's core
- * becomes shared under both the old and new chain.
- *
- *                             STALE CHAINS
- *
- * When a chain is delete-duplicated the old chain typically becomes stale.
- * This is detected via the HAMMER2_CHAIN_DUPLICATED flag in chain->flags.
- * To avoid executing live filesystem operations on stale chains, the inode
- * locking code will follow stale chains via core->ownerq until it finds
- * the live chain.  The lock prevents ripups by other threads.  Lookups
- * must properly order locking operations to prevent other threads from
- * racing the lookup operation and will also follow stale chains when
- * required.
+ * If an insufficient number of chains remain in a working copy, the operation
+ * may have to be downgraded, retried, or stall until the requisit number
+ * of chains are available.
  */
+#define HAMMER2_MAXCLUSTER     8
+
+struct hammer2_cluster {
+       int                     status;         /* operational status */
+       int                     refs;           /* track for deallocation */
+       struct hammer2_pfsmount *pmp;
+       uint32_t                flags;
+       int                     nchains;
+       hammer2_chain_t         *focus;         /* current focus (or mod) */
+       hammer2_chain_t         *array[HAMMER2_MAXCLUSTER];
+       int                     cache_index[HAMMER2_MAXCLUSTER];
+};
+
+typedef struct hammer2_cluster hammer2_cluster_t;
+
+#define HAMMER2_CLUSTER_PFS    0x00000001      /* embedded in pfsmount */
+#define HAMMER2_CLUSTER_INODE  0x00000002      /* embedded in inode */
+
 
 RB_HEAD(hammer2_inode_tree, hammer2_inode);
 
@@ -422,7 +462,7 @@ struct hammer2_inode {
        struct hammer2_pfsmount *pmp;           /* PFS mount */
        struct hammer2_inode    *pip;           /* parent inode */
        struct vnode            *vp;
-       hammer2_chain_t         *chain;         /* NOTE: rehomed on rename */
+       hammer2_cluster_t       cluster;
        struct lockf            advlock;
        hammer2_tid_t           inum;
        u_int                   flags;
@@ -560,21 +600,6 @@ struct hammer2_mount {
 typedef struct hammer2_mount hammer2_mount_t;
 
 /*
- * HAMMER2 cluster - a device/root associated with a PFS.
- *
- * A PFS may have several hammer2_cluster's associated with it.
- */
-#define HAMMER2_MAXCLUSTER     8
-
-struct hammer2_cluster {
-       int                     nchains;
-       int                     status;
-       hammer2_chain_t         *chains[HAMMER2_MAXCLUSTER];
-};
-
-typedef struct hammer2_cluster hammer2_cluster_t;
-
-/*
  * HAMMER2 PFS mount point structure (aka vp->v_mount->mnt_data).
  * This has a 1:1 correspondence to struct mount (note that the
  * hammer2_mount structure has a N:1 correspondence).
@@ -714,11 +739,10 @@ extern mtx_t thread_protect;
 #define hammer2_icrc32(buf, size)      iscsi_crc32((buf), (size))
 #define hammer2_icrc32c(buf, size, crc)        iscsi_crc32_ext((buf), (size), (crc))
 
-hammer2_chain_t *hammer2_inode_lock_ex(hammer2_inode_t *ip);
-hammer2_chain_t *hammer2_inode_lock_sh(hammer2_inode_t *ip);
-void hammer2_inode_unlock_ex(hammer2_inode_t *ip, hammer2_chain_t *chain);
-void hammer2_inode_unlock_sh(hammer2_inode_t *ip, hammer2_chain_t *chain);
-void hammer2_chain_refactor(hammer2_chain_t **chainp);
+hammer2_cluster_t *hammer2_inode_lock_ex(hammer2_inode_t *ip);
+hammer2_cluster_t *hammer2_inode_lock_sh(hammer2_inode_t *ip);
+void hammer2_inode_unlock_ex(hammer2_inode_t *ip, hammer2_cluster_t *chain);
+void hammer2_inode_unlock_sh(hammer2_inode_t *ip, hammer2_cluster_t *chain);
 void hammer2_voldata_lock(hammer2_mount_t *hmp);
 void hammer2_voldata_unlock(hammer2_mount_t *hmp, int modify);
 ccms_state_t hammer2_inode_lock_temp_release(hammer2_inode_t *ip);
@@ -730,8 +754,8 @@ void hammer2_mount_exlock(hammer2_mount_t *hmp);
 void hammer2_mount_shlock(hammer2_mount_t *hmp);
 void hammer2_mount_unlock(hammer2_mount_t *hmp);
 
-int hammer2_get_dtype(hammer2_chain_t *chain);
-int hammer2_get_vtype(hammer2_chain_t *chain);
+int hammer2_get_dtype(hammer2_inode_data_t *ipdata);
+int hammer2_get_vtype(hammer2_inode_data_t *ipdata);
 u_int8_t hammer2_get_obj_type(enum vtype vtype);
 void hammer2_time_to_timespec(u_int64_t xtime, struct timespec *ts);
 u_int64_t hammer2_timespec_to_time(struct timespec *ts);
@@ -743,8 +767,10 @@ int hammer2_getradix(size_t bytes);
 
 int hammer2_calc_logical(hammer2_inode_t *ip, hammer2_off_t uoff,
                        hammer2_key_t *lbasep, hammer2_key_t *leofp);
-int hammer2_calc_physical(hammer2_inode_t *ip, hammer2_key_t lbase);
+int hammer2_calc_physical(hammer2_inode_t *ip, hammer2_inode_data_t *ipdata,
+                       hammer2_key_t lbase);
 void hammer2_update_time(uint64_t *timep);
+void hammer2_adjreadcounter(hammer2_blockref_t *bref, size_t bytes);
 
 /*
  * hammer2_inode.c
@@ -756,39 +782,38 @@ void hammer2_inode_unlock_nlinks(hammer2_inode_t *ip);
 hammer2_inode_t *hammer2_inode_lookup(hammer2_pfsmount_t *pmp,
                        hammer2_tid_t inum);
 hammer2_inode_t *hammer2_inode_get(hammer2_pfsmount_t *pmp,
-                       hammer2_inode_t *dip, hammer2_chain_t *chain);
+                       hammer2_inode_t *dip, hammer2_cluster_t *cluster);
 void hammer2_inode_free(hammer2_inode_t *ip);
 void hammer2_inode_ref(hammer2_inode_t *ip);
 void hammer2_inode_drop(hammer2_inode_t *ip);
 void hammer2_inode_repoint(hammer2_inode_t *ip, hammer2_inode_t *pip,
-                       hammer2_chain_t *chain);
+                       hammer2_cluster_t *cluster);
 void hammer2_run_unlinkq(hammer2_trans_t *trans, hammer2_pfsmount_t *pmp);
 
 hammer2_inode_t *hammer2_inode_create(hammer2_trans_t *trans,
                        hammer2_inode_t *dip,
                        struct vattr *vap, struct ucred *cred,
                        const uint8_t *name, size_t name_len,
-                       hammer2_chain_t **chainp, int *errorp);
+                       hammer2_cluster_t **clusterp, int *errorp);
 int hammer2_inode_connect(hammer2_trans_t *trans,
-                       hammer2_chain_t **chainp, int hlink,
-                       hammer2_inode_t *dip, hammer2_chain_t **dchainp,
+                       hammer2_cluster_t **clusterp, int hlink,
+                       hammer2_inode_t *dip, hammer2_cluster_t *dcluster,
                        const uint8_t *name, size_t name_len,
                        hammer2_key_t key);
 hammer2_inode_t *hammer2_inode_common_parent(hammer2_inode_t *fdip,
                        hammer2_inode_t *tdip);
 void hammer2_inode_fsync(hammer2_trans_t *trans, hammer2_inode_t *ip,
-                       hammer2_chain_t **parentp);
+                       hammer2_cluster_t *cparent);
 int hammer2_unlink_file(hammer2_trans_t *trans, hammer2_inode_t *dip,
                        const uint8_t *name, size_t name_len, int isdir,
                        int *hlinkp, struct nchandle *nch);
 int hammer2_hardlink_consolidate(hammer2_trans_t *trans,
-                       hammer2_inode_t *ip, hammer2_chain_t **chainp,
-                       hammer2_inode_t *cdip, hammer2_chain_t **cdchainp,
+                       hammer2_inode_t *ip, hammer2_cluster_t **clusterp,
+                       hammer2_inode_t *cdip, hammer2_cluster_t *cdcluster,
                        int nlinks);
 int hammer2_hardlink_deconsolidate(hammer2_trans_t *trans, hammer2_inode_t *dip,
                        hammer2_chain_t **chainp, hammer2_chain_t **ochainp);
-int hammer2_hardlink_find(hammer2_inode_t *dip,
-                       hammer2_chain_t **chainp, hammer2_chain_t **ochainp);
+int hammer2_hardlink_find(hammer2_inode_t *dip, hammer2_cluster_t *cluster);
 void hammer2_inode_install_hidden(hammer2_pfsmount_t *pmp);
 
 /*
@@ -804,17 +829,15 @@ void hammer2_chain_core_alloc(hammer2_trans_t *trans, hammer2_chain_t *nchain,
 void hammer2_chain_ref(hammer2_chain_t *chain);
 void hammer2_chain_drop(hammer2_chain_t *chain);
 int hammer2_chain_lock(hammer2_chain_t *chain, int how);
-void hammer2_chain_load_async(hammer2_chain_t *chain,
+void hammer2_chain_load_async(hammer2_cluster_t *cluster,
                                void (*func)(hammer2_io_t *dio,
+                                            hammer2_cluster_t *cluster,
                                             hammer2_chain_t *chain,
                                             void *arg_p, off_t arg_o),
-                               void *arg_p, off_t arg_o);
+                               void *arg_p);
 void hammer2_chain_moved(hammer2_chain_t *chain);
 void hammer2_chain_modify(hammer2_trans_t *trans,
                                hammer2_chain_t **chainp, int flags);
-hammer2_inode_data_t *hammer2_chain_modify_ip(hammer2_trans_t *trans,
-                               hammer2_inode_t *ip, hammer2_chain_t **chainp,
-                               int flags);
 void hammer2_chain_resize(hammer2_trans_t *trans, hammer2_inode_t *ip,
                                hammer2_chain_t *parent,
                                hammer2_chain_t **chainp,
@@ -828,7 +851,7 @@ void hammer2_chain_lookup_done(hammer2_chain_t *parent);
 hammer2_chain_t *hammer2_chain_lookup(hammer2_chain_t **parentp,
                                hammer2_key_t *key_nextp,
                                hammer2_key_t key_beg, hammer2_key_t key_end,
-                               int *cache_indexp, int flags);
+                               int *cache_indexp, int flags, int *ddflagp);
 hammer2_chain_t *hammer2_chain_next(hammer2_chain_t **parentp,
                                hammer2_chain_t *chain,
                                hammer2_key_t *key_nextp,
@@ -856,13 +879,13 @@ void hammer2_chain_delete_duplicate(hammer2_trans_t *trans,
 void hammer2_flush(hammer2_trans_t *trans, hammer2_chain_t **chainp);
 void hammer2_chain_commit(hammer2_trans_t *trans, hammer2_chain_t *chain);
 void hammer2_chain_setsubmod(hammer2_trans_t *trans, hammer2_chain_t *chain);
-
-void hammer2_chain_memory_wait(hammer2_pfsmount_t *pmp);
-void hammer2_chain_memory_inc(hammer2_pfsmount_t *pmp);
-void hammer2_chain_memory_wakeup(hammer2_pfsmount_t *pmp);
 void hammer2_chain_countbrefs(hammer2_chain_t *chain,
                                hammer2_blockref_t *base, int count);
 
+void hammer2_pfs_memory_wait(hammer2_pfsmount_t *pmp);
+void hammer2_pfs_memory_inc(hammer2_pfsmount_t *pmp);
+void hammer2_pfs_memory_wakeup(hammer2_pfsmount_t *pmp);
+
 int hammer2_base_find(hammer2_chain_t *chain,
                                hammer2_blockref_t *base, int count,
                                int *cache_indexp, hammer2_key_t *key_nextp,
@@ -874,6 +897,7 @@ void hammer2_base_delete(hammer2_trans_t *trans, hammer2_chain_t *chain,
 void hammer2_base_insert(hammer2_trans_t *trans, hammer2_chain_t *chain,
                                hammer2_blockref_t *base, int count,
                                int *cache_indexp, hammer2_chain_t *child);
+void hammer2_chain_refactor(hammer2_chain_t **chainp);
 
 /*
  * hammer2_trans.c
@@ -906,8 +930,10 @@ int hammer2_io_bread(hammer2_mount_t *hmp, off_t lbase, int lsize,
                                hammer2_io_t **diop);
 void hammer2_io_breadcb(hammer2_mount_t *hmp, off_t lbase, int lsize,
                                void (*callback)(hammer2_io_t *dio,
+                                                hammer2_cluster_t *arg_l,
                                                 hammer2_chain_t *arg_c,
                                                 void *arg_p, off_t arg_o),
+                               hammer2_cluster_t *arg_l,
                                hammer2_chain_t *arg_c,
                                void *arg_p, off_t arg_o);
 void hammer2_io_bawrite(hammer2_io_t **diop);
@@ -946,6 +972,72 @@ int hammer2_freemap_alloc(hammer2_trans_t *trans, hammer2_chain_t *chain,
 void hammer2_freemap_adjust(hammer2_trans_t *trans, hammer2_mount_t *hmp,
                                hammer2_blockref_t *bref, int how);
 
+/*
+ * hammer2_cluster.c
+ */
+u_int hammer2_cluster_bytes(hammer2_cluster_t *cluster);
+uint8_t hammer2_cluster_type(hammer2_cluster_t *cluster);
+hammer2_media_data_t *hammer2_cluster_data(hammer2_cluster_t *cluster);
+hammer2_cluster_t *hammer2_cluster_from_chain(hammer2_chain_t *chain);
+int hammer2_cluster_modified(hammer2_cluster_t *cluster);
+int hammer2_cluster_unlinked(hammer2_cluster_t *cluster);
+int hammer2_cluster_duplicated(hammer2_cluster_t *cluster);
+void hammer2_cluster_set_chainflags(hammer2_cluster_t *cluster, uint32_t flags);
+void hammer2_cluster_bref(hammer2_cluster_t *cluster, hammer2_blockref_t *bref);
+void hammer2_cluster_setsubmod(hammer2_trans_t *trans,
+                       hammer2_cluster_t *cluster);
+hammer2_cluster_t *hammer2_cluster_alloc(hammer2_pfsmount_t *pmp,
+                       hammer2_trans_t *trans,
+                       hammer2_blockref_t *bref);
+void hammer2_cluster_core_alloc(hammer2_trans_t *trans,
+                       hammer2_cluster_t *ncluster,
+                       hammer2_cluster_t *ocluster);
+void hammer2_cluster_ref(hammer2_cluster_t *cluster);
+void hammer2_cluster_drop(hammer2_cluster_t *cluster);
+void hammer2_cluster_wait(hammer2_cluster_t *cluster);
+int hammer2_cluster_lock(hammer2_cluster_t *cluster, int how);
+void hammer2_cluster_replace(hammer2_cluster_t *dst, hammer2_cluster_t *src);
+void hammer2_cluster_replace_locked(hammer2_cluster_t *dst,
+                       hammer2_cluster_t *src);
+hammer2_cluster_t *hammer2_cluster_copy(hammer2_cluster_t *ocluster,
+                       int with_chains);
+void hammer2_cluster_refactor(hammer2_cluster_t *cluster);
+void hammer2_cluster_unlock(hammer2_cluster_t *cluster);
+void hammer2_cluster_resize(hammer2_trans_t *trans, hammer2_inode_t *ip,
+                       hammer2_cluster_t *cparent, hammer2_cluster_t *cluster,
+                       int nradix, int flags);
+hammer2_inode_data_t *hammer2_cluster_modify_ip(hammer2_trans_t *trans,
+                       hammer2_inode_t *ip, hammer2_cluster_t *cluster,
+                       int flags);
+void hammer2_cluster_modify(hammer2_trans_t *trans, hammer2_cluster_t *cluster,
+                       int flags);
+hammer2_cluster_t *hammer2_cluster_lookup_init(hammer2_cluster_t *cparent,
+                       int flags);
+void hammer2_cluster_lookup_done(hammer2_cluster_t *cparent);
+hammer2_cluster_t *hammer2_cluster_lookup(hammer2_cluster_t *cparent,
+                       hammer2_key_t *key_nextp,
+                       hammer2_key_t key_beg, hammer2_key_t key_end,
+                       int flags, int *ddflagp);
+hammer2_cluster_t *hammer2_cluster_next(hammer2_cluster_t *cparent,
+                       hammer2_cluster_t *cluster,
+                       hammer2_key_t *key_nextp,
+                       hammer2_key_t key_beg, hammer2_key_t key_end,
+                       int flags);
+hammer2_cluster_t *hammer2_cluster_scan(hammer2_cluster_t *cparent,
+                       hammer2_cluster_t *cluster, int flags);
+int hammer2_cluster_create(hammer2_trans_t *trans, hammer2_cluster_t *cparent,
+                       hammer2_cluster_t **clusterp,
+                       hammer2_key_t key, int keybits, int type, size_t bytes);
+void hammer2_cluster_duplicate(hammer2_trans_t *trans,
+                       hammer2_cluster_t *cparent, hammer2_cluster_t *cluster,
+                       hammer2_blockref_t *bref,
+                       int snapshot, int duplicate_reason);
+void hammer2_cluster_delete_duplicate(hammer2_trans_t *trans,
+                       hammer2_cluster_t *cluster, int flags);
+void hammer2_cluster_delete(hammer2_trans_t *trans, hammer2_cluster_t *cluster,
+                       int flags);
+int hammer2_cluster_snapshot(hammer2_trans_t *trans,
+                       hammer2_cluster_t *ocluster, hammer2_ioc_pfs_t *pfs);
 
 #endif /* !_KERNEL */
 #endif /* !_VFS_HAMMER2_HAMMER2_H_ */
index 0f232da..b031dd6 100644 (file)
@@ -86,7 +86,6 @@ static hammer2_chain_t *hammer2_chain_create_indirect(
                hammer2_trans_t *trans, hammer2_chain_t *parent,
                hammer2_key_t key, int keybits, int for_type, int *errorp);
 static void hammer2_chain_drop_data(hammer2_chain_t *chain, int lastdrop);
-static void adjreadcounter(hammer2_blockref_t *bref, size_t bytes);
 static hammer2_chain_t *hammer2_combined_find(
                hammer2_chain_t *parent,
                hammer2_blockref_t *base, int count,
@@ -896,7 +895,7 @@ hammer2_chain_lock(hammer2_chain_t *chain, int how)
        } else {
                error = hammer2_io_bread(hmp, bref->data_off, chain->bytes,
                                         &chain->dio);
-               adjreadcounter(&chain->bref, chain->bytes);
+               hammer2_adjreadcounter(&chain->bref, chain->bytes);
        }
 
        if (error) {
@@ -965,19 +964,37 @@ hammer2_chain_lock(hammer2_chain_t *chain, int how)
  * of the chain first to handle certain cases.
  */
 void
-hammer2_chain_load_async(hammer2_chain_t *chain,
+hammer2_chain_load_async(hammer2_cluster_t *cluster,
                         void (*callback)(hammer2_io_t *dio,
+                                         hammer2_cluster_t *cluster,
                                          hammer2_chain_t *chain,
                                          void *arg_p, off_t arg_o),
-                        void *arg_p, off_t arg_o)
+                        void *arg_p)
 {
+       hammer2_chain_t *chain;
        hammer2_mount_t *hmp;
        struct hammer2_io *dio;
        hammer2_blockref_t *bref;
        int error;
+       int i;
+
+       /*
+        * If no chain specified see if any chain data is available and use
+        * that, otherwise begin an I/O iteration using the first chain.
+        */
+       chain = NULL;
+       for (i = 0; i < cluster->nchains; ++i) {
+               chain = cluster->array[i];
+               if (chain->data)
+                       break;
+       }
+       if (i == cluster->nchains) {
+               chain = cluster->array[0];
+               i = 0;
+       }
 
        if (chain->data) {
-               callback(NULL, chain, arg_p, arg_o);
+               callback(NULL, cluster, chain, arg_p, (off_t)i);
                return;
        }
 
@@ -1005,16 +1022,16 @@ hammer2_chain_load_async(hammer2_chain_t *chain,
            chain->bytes == hammer2_devblksize(chain->bytes)) {
                error = hammer2_io_new(hmp, bref->data_off, chain->bytes, &dio);
                KKASSERT(error == 0);
-               callback(dio, chain, arg_p, arg_o);
+               callback(dio, cluster, chain, arg_p, (off_t)i);
                return;
        }
 
        /*
         * Otherwise issue a read
         */
-       adjreadcounter(&chain->bref, chain->bytes);
+       hammer2_adjreadcounter(&chain->bref, chain->bytes);
        hammer2_io_breadcb(hmp, bref->data_off, chain->bytes,
-                          callback, chain, arg_p, arg_o);
+                          callback, cluster, chain, arg_p, (off_t)i);
 }
 
 /*
@@ -1291,7 +1308,11 @@ hammer2_chain_resize(hammer2_trans_t *trans, hammer2_inode_t *ip,
        *chainp = chain;
 }
 
+#if 0
+
 /*
+ * REMOVED - see cluster code
+ *
  * Set a chain modified, making it read-write and duplicating it if necessary.
  * This function will assign a new physical block to the chain if necessary
  *
@@ -1322,6 +1343,8 @@ hammer2_chain_modify_ip(hammer2_trans_t *trans, hammer2_inode_t *ip,
        return(&ip->chain->data->ipdata);
 }
 
+#endif
+
 void
 hammer2_chain_modify(hammer2_trans_t *trans, hammer2_chain_t **chainp,
                     int flags)
@@ -1387,7 +1410,7 @@ hammer2_chain_modify(hammer2_trans_t *trans, hammer2_chain_t **chainp,
        if ((chain->flags & HAMMER2_CHAIN_MODIFIED) == 0) {
                atomic_set_int(&chain->flags, HAMMER2_CHAIN_MODIFIED);
                hammer2_chain_ref(chain);
-               hammer2_chain_memory_inc(chain->pmp);
+               hammer2_pfs_memory_inc(chain->pmp);
        }
        if ((chain->flags & HAMMER2_CHAIN_FLUSH_CREATE) == 0) {
                atomic_set_int(&chain->flags, HAMMER2_CHAIN_FLUSH_CREATE);
@@ -1494,7 +1517,7 @@ hammer2_chain_modify(hammer2_trans_t *trans, hammer2_chain_t **chainp,
                        error = hammer2_io_bread(hmp, chain->bref.data_off,
                                                 chain->bytes, &dio);
                }
-               adjreadcounter(&chain->bref, chain->bytes);
+               hammer2_adjreadcounter(&chain->bref, chain->bytes);
                KKASSERT(error == 0);
 
                bdata = hammer2_io_data(dio, chain->bref.data_off);
@@ -1908,7 +1931,7 @@ hammer2_chain_getparent(hammer2_chain_t **parentp, int how)
 hammer2_chain_t *
 hammer2_chain_lookup(hammer2_chain_t **parentp, hammer2_key_t *key_nextp,
                     hammer2_key_t key_beg, hammer2_key_t key_end,
-                    int *cache_indexp, int flags)
+                    int *cache_indexp, int flags, int *ddflagp)
 {
        hammer2_mount_t *hmp;
        hammer2_chain_t *parent;
@@ -1927,6 +1950,7 @@ hammer2_chain_lookup(hammer2_chain_t **parentp, hammer2_key_t *key_nextp,
        int maxloops = 300000;
        int wasdup;
 
+       *ddflagp = 0;
        if (flags & HAMMER2_LOOKUP_ALWAYS) {
                how_maybe = how_always;
                how = HAMMER2_RESOLVE_ALWAYS;
@@ -1984,6 +2008,7 @@ again:
                        else
                                hammer2_chain_lock(parent, how_always);
                        *key_nextp = key_end + 1;
+                       *ddflagp = 1;
                        return (parent);
                }
                base = &parent->data->ipdata.u.blockset.blockref[0];
@@ -2192,6 +2217,7 @@ hammer2_chain_next(hammer2_chain_t **parentp, hammer2_chain_t *chain,
 {
        hammer2_chain_t *parent;
        int how_maybe;
+       int ddflag;
 
        /*
         * Calculate locking flags for upward recursion.
@@ -2245,7 +2271,7 @@ hammer2_chain_next(hammer2_chain_t **parentp, hammer2_chain_t *chain,
         */
        return (hammer2_chain_lookup(parentp, key_nextp,
                                     key_beg, key_end,
-                                    cache_indexp, flags));
+                                    cache_indexp, flags, &ddflag));
 }
 
 /*
@@ -3187,74 +3213,6 @@ hammer2_chain_delete_duplicate(hammer2_trans_t *trans, hammer2_chain_t **chainp,
 }
 
 /*
- * Create a snapshot of the specified {parent, ochain} with the specified
- * label.  The originating hammer2_inode must be exclusively locked for
- * safety.
- *
- * The ioctl code has already synced the filesystem.
- */
-int
-hammer2_chain_snapshot(hammer2_trans_t *trans, hammer2_chain_t **ochainp,
-                      hammer2_ioc_pfs_t *pfs)
-{
-       hammer2_mount_t *hmp;
-       hammer2_chain_t *ochain = *ochainp;
-       hammer2_chain_t *nchain;
-       hammer2_inode_data_t *ipdata;
-       hammer2_inode_t *nip;
-       size_t name_len;
-       hammer2_key_t lhc;
-       struct vattr vat;
-       uuid_t opfs_clid;
-       int error;
-
-       kprintf("snapshot %s ochain->refs %d ochain->flags %08x\n",
-               pfs->name, ochain->refs, ochain->flags);
-
-       name_len = strlen(pfs->name);
-       lhc = hammer2_dirhash(pfs->name, name_len);
-
-       hmp = ochain->hmp;
-       opfs_clid = ochain->data->ipdata.pfs_clid;
-
-       *ochainp = ochain;
-
-       /*
-        * Create the snapshot directory under the super-root
-        *
-        * Set PFS type, generate a unique filesystem id, and generate
-        * a cluster id.  Use the same clid when snapshotting a PFS root,
-        * which theoretically allows the snapshot to be used as part of
-        * the same cluster (perhaps as a cache).
-        *
-        * Copy the (flushed) ochain's blockref array.  Theoretically we
-        * could use chain_duplicate() but it becomes difficult to disentangle
-        * the shared core so for now just brute-force it.
-        */
-       VATTR_NULL(&vat);
-       vat.va_type = VDIR;
-       vat.va_mode = 0755;
-       nchain = NULL;
-       nip = hammer2_inode_create(trans, hmp->sroot, &vat, proc0.p_ucred,
-                                  pfs->name, name_len, &nchain, &error);
-
-       if (nip) {
-               ipdata = hammer2_chain_modify_ip(trans, nip, &nchain, 0);
-               ipdata->pfs_type = HAMMER2_PFSTYPE_SNAPSHOT;
-               kern_uuidgen(&ipdata->pfs_fsid, 1);
-               if (ochain->flags & HAMMER2_CHAIN_PFSROOT)
-                       ipdata->pfs_clid = opfs_clid;
-               else
-                       kern_uuidgen(&ipdata->pfs_clid, 1);
-               atomic_set_int(&nchain->flags, HAMMER2_CHAIN_PFSROOT);
-               ipdata->u.blockset = ochain->data->ipdata.u.blockset;
-
-               hammer2_inode_unlock_ex(nip, nchain);
-       }
-       return (error);
-}
-
-/*
  * Create an indirect block that covers one or more of the elements in the
  * current parent.  Either returns the existing parent with no locking or
  * ref changes or returns the new indirect block locked and referenced
@@ -4485,119 +4443,27 @@ hammer2_chain_wait(hammer2_chain_t *chain)
 }
 
 /*
- * Manage excessive memory resource use for chain and related
- * structures.
+ * chain may have been moved around by the create.
  */
 void
-hammer2_chain_memory_wait(hammer2_pfsmount_t *pmp)
-{
-       long waiting;
-       long count;
-       long limit;
-#if 0
-       static int zzticks;
-#endif
-
-       /*
-        * Atomic check condition and wait.  Also do an early speedup of
-        * the syncer to try to avoid hitting the wait.
-        */
-       for (;;) {
-               waiting = pmp->inmem_dirty_chains;
-               cpu_ccfence();
-               count = waiting & HAMMER2_DIRTYCHAIN_MASK;
-
-               limit = pmp->mp->mnt_nvnodelistsize / 10;
-               if (limit < hammer2_limit_dirty_chains)
-                       limit = hammer2_limit_dirty_chains;
-               if (limit < 1000)
-                       limit = 1000;
-
-#if 0
-               if ((int)(ticks - zzticks) > hz) {
-                       zzticks = ticks;
-                       kprintf("count %ld %ld\n", count, limit);
-               }
-#endif
-
-               /*
-                * Block if there are too many dirty chains present, wait
-                * for the flush to clean some out.
-                */
-               if (count > limit) {
-                       tsleep_interlock(&pmp->inmem_dirty_chains, 0);
-                       if (atomic_cmpset_long(&pmp->inmem_dirty_chains,
-                                              waiting,
-                                      waiting | HAMMER2_DIRTYCHAIN_WAITING)) {
-                               speedup_syncer(pmp->mp);
-                               tsleep(&pmp->inmem_dirty_chains, PINTERLOCKED,
-                                      "chnmem", hz);
-                       }
-                       continue;       /* loop on success or fail */
-               }
-
-               /*
-                * Try to start an early flush before we are forced to block.
-                */
-               if (count > limit * 7 / 10)
-                       speedup_syncer(pmp->mp);
-               break;
-       }
-}
-
-void
-hammer2_chain_memory_inc(hammer2_pfsmount_t *pmp)
-{
-       if (pmp)
-               atomic_add_long(&pmp->inmem_dirty_chains, 1);
-}
-
-void
-hammer2_chain_memory_wakeup(hammer2_pfsmount_t *pmp)
+hammer2_chain_refactor(hammer2_chain_t **chainp)
 {
-       long waiting;
-
-       if (pmp == NULL)
-               return;
-
-       for (;;) {
-               waiting = pmp->inmem_dirty_chains;
-               cpu_ccfence();
-               if (atomic_cmpset_long(&pmp->inmem_dirty_chains,
-                                      waiting,
-                                      (waiting - 1) &
-                                       ~HAMMER2_DIRTYCHAIN_WAITING)) {
-                       break;
-               }
-       }
-
-       if (waiting & HAMMER2_DIRTYCHAIN_WAITING)
-               wakeup(&pmp->inmem_dirty_chains);
-}
+       hammer2_chain_t *chain = *chainp;
+       hammer2_chain_core_t *core;
 
-static
-void
-adjreadcounter(hammer2_blockref_t *bref, size_t bytes)
-{
-       long *counterp;
+       core = chain->core;
+       while (chain->flags & HAMMER2_CHAIN_DUPLICATED) {
+               spin_lock(&core->cst.spin);
+               chain = TAILQ_NEXT(chain, core_entry);
+               while (chain->flags & HAMMER2_CHAIN_DUPLICATED)
+                       chain = TAILQ_NEXT(chain, core_entry);
+               hammer2_chain_ref(chain);
+               spin_unlock(&core->cst.spin);
+               KKASSERT(chain->core == core);
 
-       switch(bref->type) {
-       case HAMMER2_BREF_TYPE_DATA:
-               counterp = &hammer2_iod_file_read;
-               break;
-       case HAMMER2_BREF_TYPE_INODE:
-               counterp = &hammer2_iod_meta_read;
-               break;
-       case HAMMER2_BREF_TYPE_INDIRECT:
-               counterp = &hammer2_iod_indr_read;
-               break;
-       case HAMMER2_BREF_TYPE_FREEMAP_NODE:
-       case HAMMER2_BREF_TYPE_FREEMAP_LEAF:
-               counterp = &hammer2_iod_fmap_read;
-               break;
-       default:
-               counterp = &hammer2_iod_volu_read;
-               break;
+               hammer2_chain_unlock(*chainp);
+               hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS |
+                                         HAMMER2_RESOLVE_NOREF); /* eat ref */
+               *chainp = chain;
        }
-       *counterp += bytes;
 }
diff --git a/sys/vfs/hammer2/hammer2_cluster.c b/sys/vfs/hammer2/hammer2_cluster.c
new file mode 100644 (file)
index 0000000..ad5a8fc
--- /dev/null
@@ -0,0 +1,784 @@
+/*
+ * Copyright (c) 2013-2014 The DragonFly Project.  All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@dragonflybsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * The cluster module collects multiple chains representing the same
+ * information into a single entity.  It allows direct access to media
+ * data as long as it is not blockref array data.  Meaning, basically,
+ * just inode and file data.
+ *
+ * This module also handles I/O dispatch, status rollup, and various
+ * mastership arrangements including quorum operations.  It effectively
+ * presents one topology to the vnops layer.
+ *
+ * Many of the API calls mimic chain API calls but operate on clusters
+ * instead of chains.  Please see hammer2_chain.c for more complete code
+ * documentation of the API functions.
+ */
+#include <sys/cdefs.h>
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/types.h>
+#include <sys/lock.h>
+#include <sys/uuid.h>
+
+#include "hammer2.h"
+
+u_int
+hammer2_cluster_bytes(hammer2_cluster_t *cluster)
+{
+       return(cluster->focus->bytes);
+}
+
+uint8_t
+hammer2_cluster_type(hammer2_cluster_t *cluster)
+{
+       return(cluster->focus->bref.type);
+}
+
+hammer2_media_data_t *
+hammer2_cluster_data(hammer2_cluster_t *cluster)
+{
+       return(cluster->focus->data);
+}
+
+int
+hammer2_cluster_modified(hammer2_cluster_t *cluster)
+{
+       return((cluster->focus->flags & HAMMER2_CHAIN_MODIFIED) != 0);
+}
+
+int
+hammer2_cluster_unlinked(hammer2_cluster_t *cluster)
+{
+       return((cluster->focus->flags & HAMMER2_CHAIN_UNLINKED) != 0);
+}
+
+void
+hammer2_cluster_bref(hammer2_cluster_t *cluster, hammer2_blockref_t *bref)
+{
+       *bref = cluster->focus->bref;
+       bref->data_off = 0;     /* should be opaque to caller */
+}
+
+void
+hammer2_cluster_set_chainflags(hammer2_cluster_t *cluster, uint32_t flags)
+{
+       int i;
+
+       for (i = 0; i < cluster->nchains; ++i)
+               atomic_set_int(&cluster->array[i]->flags, flags);
+}
+
+void
+hammer2_cluster_setsubmod(hammer2_trans_t *trans, hammer2_cluster_t *cluster)
+{
+       int i;
+
+       for (i = 0; i < cluster->nchains; ++i)
+               hammer2_chain_setsubmod(trans, cluster->array[i]);
+}
+
+/*
+ * Allocates a cluster and its underlying chain structures.  The underlying
+ * chains will be locked.  The cluster and underlying chains will have one
+ * ref.
+ */
+hammer2_cluster_t *
+hammer2_cluster_alloc(hammer2_pfsmount_t *pmp,
+                     hammer2_trans_t *trans, hammer2_blockref_t *bref)
+{
+       hammer2_cluster_t *cluster;
+       hammer2_chain_t *chain;
+       u_int bytes = 1U << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX);
+       int i;
+
+       KKASSERT(pmp != NULL);
+
+       /*
+        * Construct the appropriate system structure.
+        */
+       switch(bref->type) {
+       case HAMMER2_BREF_TYPE_INODE:
+       case HAMMER2_BREF_TYPE_INDIRECT:
+       case HAMMER2_BREF_TYPE_FREEMAP_NODE:
+       case HAMMER2_BREF_TYPE_DATA:
+       case HAMMER2_BREF_TYPE_FREEMAP_LEAF:
+               /*
+                * Chain's are really only associated with the hmp but we
+                * maintain a pmp association for per-mount memory tracking
+                * purposes.  The pmp can be NULL.
+                */
+               break;
+       case HAMMER2_BREF_TYPE_VOLUME:
+       case HAMMER2_BREF_TYPE_FREEMAP:
+               chain = NULL;
+               panic("hammer2_cluster_alloc volume type illegal for op");
+       default:
+               chain = NULL;
+               panic("hammer2_cluster_alloc: unrecognized blockref type: %d",
+                     bref->type);
+       }
+
+       cluster = kmalloc(sizeof(*cluster), M_HAMMER2, M_WAITOK | M_ZERO);
+       cluster->refs = 1;
+
+       for (i = 0; i < pmp->cluster.nchains; ++i) {
+               chain = hammer2_chain_alloc(pmp->cluster.array[i]->hmp, pmp,
+                                           trans, bref);
+               chain->pmp = pmp;
+               chain->hmp = pmp->cluster.array[i]->hmp;
+               chain->bref = *bref;
+               chain->bytes = bytes;
+               chain->refs = 1;
+               chain->flags = HAMMER2_CHAIN_ALLOCATED;
+               chain->delete_tid = HAMMER2_MAX_TID;
+
+               /*
+                * Set modify_tid if a transaction is creating the inode.
+                * Enforce update_lo = 0 so nearby transactions do not think
+                * it has been flushed when it hasn't.
+                *
+                * NOTE: When loading a chain from backing store or creating a
+                *       snapshot, trans will be NULL and the caller is
+                *       responsible for setting these fields.
+                */
+               if (trans) {
+                       chain->modify_tid = trans->sync_tid;
+                       chain->update_lo = 0;
+               }
+               cluster->array[i] = chain;
+       }
+       cluster->nchains = i;
+       cluster->pmp = pmp;
+       cluster->focus = cluster->array[0];
+
+       return (cluster);
+}
+
+/*
+ * Associate an existing core with the chain or allocate a new core.
+ *
+ * The core is not locked.  No additional refs on the chain are made.
+ * (trans) must not be NULL if (core) is not NULL.
+ *
+ * When chains are delete-duplicated during flushes we insert nchain on
+ * the ownerq after ochain instead of at the end in order to give the
+ * drop code visibility in the correct order, otherwise drops can be missed.
+ */
+void
+hammer2_cluster_core_alloc(hammer2_trans_t *trans,
+                          hammer2_cluster_t *ncluster,
+                          hammer2_cluster_t *ocluster)
+{
+       int i;
+
+       for (i = 0; i < ocluster->nchains; ++i) {
+               hammer2_chain_core_alloc(trans,
+                                        ncluster->array[i],
+                                        ocluster->array[i]);
+       }
+}
+
+/*
+ * Add a reference to a cluster.
+ *
+ * We must also ref the underlying chains in order to allow ref/unlock
+ * sequences to later re-lock.
+ */
+void
+hammer2_cluster_ref(hammer2_cluster_t *cluster)
+{
+       int i;
+
+       atomic_add_int(&cluster->refs, 1);
+       for (i = 0; i < cluster->nchains; ++i) {
+               hammer2_chain_ref(cluster->array[i]);
+       }
+}
+
+/*
+ * Drop the caller's reference to the cluster.  When the ref count drops to
+ * zero this function frees the cluster and drops all underlying chains.
+ */
+void
+hammer2_cluster_drop(hammer2_cluster_t *cluster)
+{
+       int i;
+
+       for (i = 0; i < cluster->nchains; ++i) {
+               hammer2_chain_drop(cluster->array[i]);
+               if (cluster->refs == 1)
+                       cluster->array[i] = NULL;
+       }
+       if (atomic_fetchadd_int(&cluster->refs, -1) != 1) {
+               KKASSERT(cluster->refs > 0);
+               return;
+       }
+       kfree(cluster, M_HAMMER2);
+}
+
+void
+hammer2_cluster_wait(hammer2_cluster_t *cluster)
+{
+       tsleep(cluster->focus, 0, "h2clcw", 1);
+}
+
+/*
+ * Lock and ref a cluster.  This adds a ref to the cluster and its chains
+ * and then locks them.
+ */
+int
+hammer2_cluster_lock(hammer2_cluster_t *cluster, int how)
+{
+       int i;
+       int error;
+
+       error = 0;
+       atomic_add_int(&cluster->refs, 1);
+       for (i = 0; i < cluster->nchains; ++i) {
+               error = hammer2_chain_lock(cluster->array[i], how);
+               if (error) {
+                       while (--i >= 0)
+                               hammer2_chain_unlock(cluster->array[i]);
+                       atomic_add_int(&cluster->refs, -1);
+                       break;
+               }
+       }
+       return error;
+}
+
+/*
+ * Replace the contents of dst with src, adding a reference to src's chains.
+ * dst is assumed to already have a ref and any chains present in dst are
+ * assumed to be locked and will be unlocked.
+ *
+ * If the chains in src are locked, only one of (src) or (dst) should be
+ * considered locked by the caller after return, not both.
+ */
+void
+hammer2_cluster_replace(hammer2_cluster_t *dst, hammer2_cluster_t *src)
+{
+       int i;
+
+       KKASSERT(dst->refs == 1);
+
+       for (i = 0; i < src->nchains; ++i) {
+               hammer2_chain_ref(src->array[i]);
+               if (i < dst->nchains)
+                       hammer2_chain_unlock(dst->array[i]);
+               dst->array[i] = src->array[i];
+       }
+       while (i < dst->nchains) {
+               hammer2_chain_unlock(dst->array[i]);
+               dst->array[i] = NULL;
+               ++i;
+       }
+       dst->nchains = src->nchains;
+       dst->focus = src->focus;
+}
+
+/*
+ * Replace the contents of the locked destination with the contents of the
+ * locked source.  Destination must have one ref.
+ *
+ * Returns with the destination still with one ref and the copied chains
+ * with an additional lock (representing their state on the destination).
+ * The original chains associated with the destination are unlocked.
+ */
+void
+hammer2_cluster_replace_locked(hammer2_cluster_t *dst, hammer2_cluster_t *src)
+{
+       int i;
+
+       KKASSERT(dst->refs == 1);
+
+       for (i = 0; i < src->nchains; ++i) {
+               hammer2_chain_lock(src->array[i], 0);
+               if (i < dst->nchains)
+                       hammer2_chain_unlock(dst->array[i]);
+               dst->array[i] = src->array[i];
+       }
+       while (i < dst->nchains) {
+               hammer2_chain_unlock(dst->array[i]);
+               dst->array[i] = NULL;
+               ++i;
+       }
+       dst->nchains = src->nchains;
+       dst->focus = src->focus;
+}
+
+/*
+ * Copy a cluster, returned a ref'd cluster.  All underlying chains
+ * are also ref'd, but not locked.
+ *
+ * If with_chains is 0 the returned cluster has a ref count of 1 but
+ * no chains will be assigned.
+ */
+hammer2_cluster_t *
+hammer2_cluster_copy(hammer2_cluster_t *ocluster, int with_chains)
+{
+       hammer2_pfsmount_t *pmp = ocluster->pmp;
+       hammer2_cluster_t *ncluster;
+       int i;
+
+       ncluster = kmalloc(sizeof(*ncluster), M_HAMMER2, M_WAITOK | M_ZERO);
+       ncluster->pmp = pmp;
+       ncluster->nchains = ocluster->nchains;
+       ncluster->focus = ocluster->focus;
+       if (with_chains) {
+               ncluster->refs = 1;
+               for (i = 0; i < ocluster->nchains; ++i) {
+                       ncluster->array[i] = ocluster->array[i];
+                       hammer2_chain_ref(ncluster->array[i]);
+               }
+       }
+       return (ncluster);
+}
+
+/*
+ * Unlock and deref a cluster.  The cluster is destroyed if this is the
+ * last ref.
+ */
+void
+hammer2_cluster_unlock(hammer2_cluster_t *cluster)
+{
+       int i;
+
+       for (i = 0; i < cluster->nchains; ++i)
+               hammer2_chain_unlock(cluster->array[i]);
+       if (atomic_fetchadd_int(&cluster->refs, -1) == 1) {
+               for (i = 0; i < cluster->nchains; ++i)  /* safety */
+                       cluster->array[i] = NULL;
+               kfree(cluster, M_HAMMER2);
+               return;
+       }
+       KKASSERT(cluster->refs > 0);
+}
+
+/*
+ * Refactor the chains of a locked cluster
+ */
+void
+hammer2_cluster_refactor(hammer2_cluster_t *cluster)
+{
+       int i;
+
+       for (i = 0; i < cluster->nchains; ++i) {
+               hammer2_chain_refactor(&cluster->array[i]);
+       }
+       cluster->focus = cluster->array[0];
+}
+
+/*
+ * Resize the cluster's physical storage allocation in-place.  This may
+ * replace the cluster's chains.
+ */
+void
+hammer2_cluster_resize(hammer2_trans_t *trans, hammer2_inode_t *ip,
+                      hammer2_cluster_t *cparent, hammer2_cluster_t *cluster,
+                      int nradix, int flags)
+{
+       int i;
+
+       KKASSERT(cparent->pmp == cluster->pmp);         /* can be NULL */
+       KKASSERT(cparent->nchains == cluster->nchains);
+
+       for (i = 0; i < cluster->nchains; ++i) {
+               hammer2_chain_resize(trans, ip,
+                                    cparent->array[i], &cluster->array[i],
+                                    nradix, flags);
+       }
+       cluster->focus = cluster->array[0];
+}
+
+/*
+ * Set an inode's cluster modified, marking the related chains RW and
+ * duplicating them if necessary.
+ *
+ * The passed-in chain is a localized copy of the chain previously acquired
+ * when the inode was locked (and possilby replaced in the mean time), and
+ * must also be updated.  In fact, we update it first and then synchronize
+ * the inode's cluster cache.
+ */
+hammer2_inode_data_t *
+hammer2_cluster_modify_ip(hammer2_trans_t *trans, hammer2_inode_t *ip,
+                         hammer2_cluster_t *cluster, int flags)
+{
+       atomic_set_int(&ip->flags, HAMMER2_INODE_MODIFIED);
+       hammer2_cluster_modify(trans, cluster, flags);
+
+       hammer2_inode_repoint(ip, NULL, cluster);
+       if (ip->vp)
+               vsetisdirty(ip->vp);
+       return (&hammer2_cluster_data(cluster)->ipdata);
+}
+
+/*
+ * Adjust the cluster's chains to allow modification.
+ */
+void
+hammer2_cluster_modify(hammer2_trans_t *trans, hammer2_cluster_t *cluster,
+                      int flags)
+{
+       int i;
+
+       for (i = 0; i < cluster->nchains; ++i)
+               hammer2_chain_modify(trans, &cluster->array[i], flags);
+       cluster->focus = cluster->array[0];
+}
+
+/*
+ * Lookup initialization/completion API
+ */
+hammer2_cluster_t *
+hammer2_cluster_lookup_init(hammer2_cluster_t *cparent, int flags)
+{
+       hammer2_cluster_t *cluster;
+       int i;
+
+       cluster = kmalloc(sizeof(*cluster), M_HAMMER2, M_WAITOK | M_ZERO);
+       cluster->pmp = cparent->pmp;                    /* can be NULL */
+       for (i = 0; i < cparent->nchains; ++i)
+               cluster->array[i] = cparent->array[i];
+       cluster->nchains = cparent->nchains;
+       cluster->focus = cluster->array[0];
+
+       /*
+        * Independently lock (this will also give cluster 1 ref)
+        */
+       if (flags & HAMMER2_LOOKUP_SHARED) {
+               hammer2_cluster_lock(cluster, HAMMER2_RESOLVE_ALWAYS |
+                                             HAMMER2_RESOLVE_SHARED);
+       } else {
+               hammer2_cluster_lock(cluster, HAMMER2_RESOLVE_ALWAYS);
+       }
+       return (cluster);
+}
+
+void
+hammer2_cluster_lookup_done(hammer2_cluster_t *cparent)
+{
+       if (cparent)
+               hammer2_cluster_unlock(cparent);
+}
+
+/*
+ * Locate first match or overlap under parent, return a new cluster
+ */
+hammer2_cluster_t *
+hammer2_cluster_lookup(hammer2_cluster_t *cparent, hammer2_key_t *key_nextp,
+                    hammer2_key_t key_beg, hammer2_key_t key_end,
+                    int flags, int *ddflagp)
+{
+       hammer2_pfsmount_t *pmp;
+       hammer2_cluster_t *cluster;
+       hammer2_chain_t *chain;
+       hammer2_key_t key_accum;
+       hammer2_key_t key_next;
+       int null_count;
+       int ddflag;
+       int i;
+       uint8_t bref_type;
+       u_int bytes;
+
+       pmp = cparent->pmp;                             /* can be NULL */
+       key_accum = *key_nextp;
+       null_count = 0;
+       bref_type = 0;
+       bytes = 0;
+
+       cluster = kmalloc(sizeof(*cluster), M_HAMMER2, M_WAITOK | M_ZERO);
+       cluster->pmp = pmp;                             /* can be NULL */
+       cluster->refs = 1;
+       *ddflagp = 0;
+
+       for (i = 0; i < cparent->nchains; ++i) {
+               key_next = *key_nextp;
+               chain = hammer2_chain_lookup(&cparent->array[i], &key_next,
+                                            key_beg, key_end,
+                                            &cparent->cache_index[i],
+                                            flags, &ddflag);
+               cluster->array[i] = chain;
+               if (chain == NULL) {
+                       ++null_count;
+               } else {
+                       if (bref_type == 0)
+                               bref_type = chain->bref.type;
+                       KKASSERT(bref_type == chain->bref.type);
+                       if (bytes == 0)
+                               bytes = chain->bytes;
+                       KKASSERT(bytes == chain->bytes);
+               }
+               if (key_accum > key_next)
+                       key_accum = key_next;
+               KKASSERT(i == 0 || *ddflagp == ddflag);
+               *ddflagp = ddflag;
+       }
+       *key_nextp = key_accum;
+       cluster->nchains = i;
+       cluster->focus = cluster->array[0];
+
+       if (null_count == i) {
+               hammer2_cluster_drop(cluster);
+               cluster = NULL;
+       }
+
+       return (cluster);
+}
+
+/*
+ * Locate next match or overlap under parent, replace cluster
+ */
+hammer2_cluster_t *
+hammer2_cluster_next(hammer2_cluster_t *cparent, hammer2_cluster_t *cluster,
+                    hammer2_key_t *key_nextp,
+                    hammer2_key_t key_beg, hammer2_key_t key_end, int flags)
+{
+       hammer2_chain_t *chain;
+       hammer2_key_t key_accum;
+       hammer2_key_t key_next;
+       int null_count;
+       int i;
+
+       key_accum = *key_nextp;
+       null_count = 0;
+
+       for (i = 0; i < cparent->nchains; ++i) {
+               key_next = *key_nextp;
+               chain = hammer2_chain_next(&cparent->array[i],
+                                          cluster->array[i],
+                                          &key_next,
+                                          key_beg, key_end,
+                                          &cparent->cache_index[i], flags);
+               cluster->array[i] = chain;
+               if (chain == NULL)
+                       ++null_count;
+               if (key_accum > key_next)
+                       key_accum = key_next;
+       }
+
+       if (null_count == i) {
+               hammer2_cluster_drop(cluster);
+               cluster = NULL;
+       } else {
+               cluster->focus = cluster->array[0];
+       }
+       return(cluster);
+}
+
+/*
+ * The raw scan function is similar to lookup/next but does not seek to a key.
+ * Blockrefs are iterated via first_chain = (parent, NULL) and
+ * next_chain = (parent, chain).
+ *
+ * The passed-in parent must be locked and its data resolved.  The returned
+ * chain will be locked.  Pass chain == NULL to acquire the first sub-chain
+ * under parent and then iterate with the passed-in chain (which this
+ * function will unlock).
+ */
+hammer2_cluster_t *
+hammer2_cluster_scan(hammer2_cluster_t *cparent, hammer2_cluster_t *cluster,
+                    int flags)
+{
+       hammer2_chain_t *chain;
+       int null_count;
+       int i;
+
+       null_count = 0;
+
+       for (i = 0; i < cparent->nchains; ++i) {
+               chain = hammer2_chain_scan(cparent->array[i],
+                                          cluster->array[i],
+                                          &cparent->cache_index[i], flags);
+               cluster->array[i] = chain;
+               if (chain == NULL)
+                       ++null_count;
+       }
+
+       if (null_count == i) {
+               hammer2_cluster_drop(cluster);
+               cluster = NULL;
+       }
+       return(cluster);
+}
+
+/*
+ * Create a new cluster using the specified key
+ */
+int
+hammer2_cluster_create(hammer2_trans_t *trans, hammer2_cluster_t *cparent,
+                    hammer2_cluster_t **clusterp,
+                    hammer2_key_t key, int keybits, int type, size_t bytes)
+{
+       hammer2_cluster_t *cluster;
+       hammer2_chain_t *chain;
+       hammer2_pfsmount_t *pmp;
+       int error;
+       int i;
+
+       pmp = trans->pmp;                               /* can be NULL */
+
+       if ((cluster = *clusterp) == NULL) {
+               cluster = kmalloc(sizeof(*cluster), M_HAMMER2,
+                                 M_WAITOK | M_ZERO);
+               cluster->pmp = pmp;                     /* can be NULL */
+               cluster->refs = 1;
+       }
+       for (i = 0; i < cparent->nchains; ++i) {
+               chain = cluster->array[i];
+               error = hammer2_chain_create(trans, &cparent->array[i], &chain,
+                                            key, keybits, type, bytes);
+               KKASSERT(error == 0);
+               cluster->array[i] = chain;
+       }
+       cluster->focus = cluster->array[0];
+       *clusterp = cluster;
+
+       return error;
+}
+
+/*
+ * Duplicate a cluster under a new parent
+ */
+void
+hammer2_cluster_duplicate(hammer2_trans_t *trans, hammer2_cluster_t *cparent,
+                         hammer2_cluster_t *cluster, hammer2_blockref_t *bref,
+                         int snapshot, int duplicate_reason)
+{
+       int i;
+
+       for (i = 0; i < cluster->nchains; ++i) {
+               hammer2_chain_duplicate(trans, &cparent->array[i],
+                                       &cluster->array[i], bref,
+                                       snapshot, duplicate_reason);
+       }
+       cluster->focus = cluster->array[0];
+}
+
+/*
+ * Delete-duplicate a cluster in-place.
+ */
+void
+hammer2_cluster_delete_duplicate(hammer2_trans_t *trans,
+                                hammer2_cluster_t *cluster, int flags)
+{
+       int i;
+
+       for (i = 0; i < cluster->nchains; ++i) {
+               hammer2_chain_delete_duplicate(trans, &cluster->array[i],
+                                              flags);
+       }
+       cluster->focus = cluster->array[0];
+}
+
+/*
+ * Mark a cluster deleted
+ */
+void
+hammer2_cluster_delete(hammer2_trans_t *trans, hammer2_cluster_t *cluster,
+                      int flags)
+{
+       int i;
+
+       for (i = 0; i < cluster->nchains; ++i) {
+               hammer2_chain_delete(trans, cluster->array[i], flags);
+       }
+}
+
+/*
+ * Create a snapshot of the specified {parent, ochain} with the specified
+ * label.  The originating hammer2_inode must be exclusively locked for
+ * safety.
+ *
+ * The ioctl code has already synced the filesystem.
+ */
+int
+hammer2_cluster_snapshot(hammer2_trans_t *trans, hammer2_cluster_t *ocluster,
+                      hammer2_ioc_pfs_t *pfs)
+{
+       hammer2_mount_t *hmp;
+       hammer2_cluster_t *ncluster;
+       hammer2_inode_data_t *ipdata;
+       hammer2_inode_t *nip;
+       size_t name_len;
+       hammer2_key_t lhc;
+       struct vattr vat;
+       uuid_t opfs_clid;
+       int error;
+
+       kprintf("snapshot %s\n", pfs->name);
+
+       name_len = strlen(pfs->name);
+       lhc = hammer2_dirhash(pfs->name, name_len);
+
+       ipdata = &hammer2_cluster_data(ocluster)->ipdata;
+       opfs_clid = ipdata->pfs_clid;
+       hmp = ocluster->focus->hmp;
+
+       /*
+        * Create the snapshot directory under the super-root
+        *
+        * Set PFS type, generate a unique filesystem id, and generate
+        * a cluster id.  Use the same clid when snapshotting a PFS root,
+        * which theoretically allows the snapshot to be used as part of
+        * the same cluster (perhaps as a cache).
+        *
+        * Copy the (flushed) blockref array.  Theoretically we could use
+        * chain_duplicate() but it becomes difficult to disentangle
+        * the shared core so for now just brute-force it.
+        */
+       VATTR_NULL(&vat);
+       vat.va_type = VDIR;
+       vat.va_mode = 0755;
+       ncluster = NULL;
+       nip = hammer2_inode_create(trans, hmp->sroot, &vat, proc0.p_ucred,
+                                  pfs->name, name_len, &ncluster, &error);
+
+       if (nip) {
+               ipdata = hammer2_cluster_modify_ip(trans, nip, ncluster, 0);
+               ipdata->pfs_type = HAMMER2_PFSTYPE_SNAPSHOT;
+               kern_uuidgen(&ipdata->pfs_fsid, 1);
+               if (ocluster->focus->flags & HAMMER2_CHAIN_PFSROOT)
+                       ipdata->pfs_clid = opfs_clid;
+               else
+                       kern_uuidgen(&ipdata->pfs_clid, 1);
+               hammer2_cluster_set_chainflags(ncluster, HAMMER2_CHAIN_PFSROOT);
+
+               /* XXX hack blockset copy */
+               ipdata->u.blockset = ocluster->focus->data->ipdata.u.blockset;
+
+               hammer2_inode_unlock_ex(nip, ncluster);
+       }
+       return (error);
+}
index 887863b..f812b98 100644 (file)
@@ -148,7 +148,7 @@ hammer2_trans_init(hammer2_trans_t *trans, hammer2_pfsmount_t *pmp,
        if (pmp) {
                trans->pmp = pmp;
                KKASSERT(hmp == NULL);
-               hmp = pmp->cluster.chains[0]->hmp;      /* XXX */
+               hmp = pmp->cluster.focus->hmp;  /* XXX */
        } else {
                trans->hmp_single = hmp;
                KKASSERT(hmp);
@@ -265,7 +265,7 @@ hammer2_trans_done(hammer2_trans_t *trans)
        hammer2_trans_t *scan;
 
        if (trans->pmp)
-               hmp = trans->pmp->cluster.chains[0]->hmp;
+               hmp = trans->pmp->cluster.focus->hmp;
        else
                hmp = trans->hmp_single;
 
@@ -624,7 +624,7 @@ hammer2_flush_core(hammer2_flush_info_t *info, hammer2_chain_t **chainp,
                        if (chain->flags & HAMMER2_CHAIN_MODIFIED) {
                                atomic_clear_int(&chain->flags,
                                                HAMMER2_CHAIN_MODIFIED);
-                               hammer2_chain_memory_wakeup(chain->pmp);
+                               hammer2_pfs_memory_wakeup(chain->pmp);
                                hammer2_chain_drop(chain);
                        }
 #if 0
@@ -819,7 +819,7 @@ hammer2_flush_core(hammer2_flush_info_t *info, hammer2_chain_t **chainp,
         */
        KKASSERT(chain->flags & HAMMER2_CHAIN_FLUSH_CREATE);
        atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED);
-       hammer2_chain_memory_wakeup(chain->pmp);
+       hammer2_pfs_memory_wakeup(chain->pmp);
 
        if ((chain->flags & HAMMER2_CHAIN_FLUSH_CREATE) ||
            chain == &hmp->vchain ||
index 54d46b8..b7f29a3 100644 (file)
@@ -367,6 +367,7 @@ hammer2_freemap_try_alloc(hammer2_trans_t *trans, hammer2_chain_t **parentp,
        uint16_t class;
        int error = 0;
        int cache_index = -1;
+       int ddflag;
 
 
        /*
@@ -393,7 +394,7 @@ hammer2_freemap_try_alloc(hammer2_trans_t *trans, hammer2_chain_t **parentp,
        chain = hammer2_chain_lookup(parentp, &key_dummy, key, key + l1mask,
                                     &cache_index,
                                     HAMMER2_LOOKUP_ALWAYS |
-                                    HAMMER2_LOOKUP_MATCHIND);
+                                    HAMMER2_LOOKUP_MATCHIND, &ddflag);
 
        if (chain == NULL) {
                /*
@@ -821,6 +822,7 @@ hammer2_freemap_adjust(hammer2_trans_t *trans, hammer2_mount_t *hmp,
        int modified = 0;
        int cache_index = -1;
        int error;
+       int ddflag;
 
        radix = (int)data_off & HAMMER2_OFF_MASK_RADIX;
        data_off &= ~HAMMER2_OFF_MASK_RADIX;
@@ -856,7 +858,7 @@ hammer2_freemap_adjust(hammer2_trans_t *trans, hammer2_mount_t *hmp,
        chain = hammer2_chain_lookup(&parent, &key_dummy, key, key + l1mask,
                                     &cache_index,
                                     HAMMER2_LOOKUP_ALWAYS |
-                                    HAMMER2_LOOKUP_MATCHIND);
+                                    HAMMER2_LOOKUP_MATCHIND, &ddflag);
 
        /*
         * Stop early if we are trying to free something but no leaf exists.
index f6d4a30..c0fc670 100644 (file)
@@ -44,7 +44,7 @@
 #define INODE_DEBUG    0
 
 static void hammer2_inode_move_to_hidden(hammer2_trans_t *trans,
-                                        hammer2_chain_t **chainp,
+                                        hammer2_cluster_t **clusterp,
                                         hammer2_tid_t inum);
 
 RB_GENERATE2(hammer2_inode_tree, hammer2_inode, rbnode, hammer2_inode_cmp,
@@ -65,66 +65,78 @@ hammer2_inode_cmp(hammer2_inode_t *ip1, hammer2_inode_t *ip2)
  *
  * HAMMER2 offers shared locks and exclusive locks on inodes.
  *
- * An inode's ip->chain pointer is resolved and stable while an inode is
- * locked, and can be cleaned out at any time (become NULL) when an inode
- * is not locked.
+ * The inode locking function locks the inode itself, resolves any stale
+ * chains in the inode's cluster, and allocates a fresh copy of the
+ * cluster with 1 ref and all the underlying chains locked.  Duplication
+ * races are handled by this function.
  *
- * This function handles duplication races and hardlink replacement races
- * which can cause ip's cached chain to become stale.
- *
- * The underlying chain is also locked and returned.
+ * ip->cluster will be stable while the inode is locked.
  *
  * NOTE: We don't combine the inode/chain lock because putting away an
  *       inode would otherwise confuse multiple lock holders of the inode.
+ *
+ * NOTE: Hardlinks are followed in the returned cluster but not in the
+ *      inode's internal cluster (ip->cluster).
  */
-hammer2_chain_t *
+hammer2_cluster_t *
 hammer2_inode_lock_ex(hammer2_inode_t *ip)
 {
+       hammer2_inode_data_t *ipdata;
+       hammer2_cluster_t *cluster;
        hammer2_chain_t *chain;
        hammer2_chain_t *ochain;
        hammer2_chain_core_t *core;
        int error;
+       int i;
 
        hammer2_inode_ref(ip);
        ccms_thread_lock(&ip->topo_cst, CCMS_STATE_EXCLUSIVE);
-
-       chain = ip->chain;
-       core = chain->core;
-       for (;;) {
-               if (chain->flags & HAMMER2_CHAIN_DUPLICATED) {
-                       spin_lock(&core->cst.spin);
-                       while (chain->flags & HAMMER2_CHAIN_DUPLICATED)
-                               chain = TAILQ_NEXT(chain, core_entry);
-                       hammer2_chain_ref(chain);
-                       spin_unlock(&core->cst.spin);
-                       hammer2_inode_repoint(ip, NULL, chain);
-                       hammer2_chain_drop(chain);
+       cluster = hammer2_cluster_copy(&ip->cluster, 0);
+
+       for (i = 0; i < cluster->nchains; ++i) {
+               chain = ip->cluster.array[i];
+               core = chain->core;
+               for (;;) {
+                       if (chain->flags & HAMMER2_CHAIN_DUPLICATED) {
+                               spin_lock(&core->cst.spin);
+                               while (chain->flags & HAMMER2_CHAIN_DUPLICATED)
+                                       chain = TAILQ_NEXT(chain, core_entry);
+                               hammer2_chain_ref(chain);
+                               spin_unlock(&core->cst.spin);
+                               ochain = ip->cluster.array[i];
+                               ip->cluster.array[i] = chain;
+                               hammer2_chain_drop(ochain);
+                       }
+                       hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS);
+                       if ((chain->flags & HAMMER2_CHAIN_DUPLICATED) == 0)
+                               break;
+                       hammer2_chain_unlock(chain);
                }
-               hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS);
-               if ((chain->flags & HAMMER2_CHAIN_DUPLICATED) == 0)
-                       break;
-               hammer2_chain_unlock(chain);
+               cluster->array[i] = chain;
        }
-       if (chain->data->ipdata.type == HAMMER2_OBJTYPE_HARDLINK &&
-           (chain->flags & HAMMER2_CHAIN_DELETED) == 0) {
-               error = hammer2_hardlink_find(ip->pip, &chain, &ochain);
-               hammer2_chain_drop(ochain);
-               KKASSERT((chain->flags & HAMMER2_CHAIN_DUPLICATED) == 0);
+       cluster->focus = cluster->array[0];
+
+       /*
+        * Returned cluster must resolve hardlink pointers
+        */
+       ipdata = &hammer2_cluster_data(cluster)->ipdata;
+       if (ipdata->type == HAMMER2_OBJTYPE_HARDLINK &&
+           (cluster->focus->flags & HAMMER2_CHAIN_DELETED) == 0) {
+               error = hammer2_hardlink_find(ip->pip, cluster);
+               KKASSERT((cluster->focus->flags &
+                         HAMMER2_CHAIN_DUPLICATED) == 0);
                KKASSERT(error == 0);
-               /* XXX error handling */
        }
-       return (chain);
+       cluster->focus = cluster->array[0];
+
+       return (cluster);
 }
 
 void
-hammer2_inode_unlock_ex(hammer2_inode_t *ip, hammer2_chain_t *chain)
+hammer2_inode_unlock_ex(hammer2_inode_t *ip, hammer2_cluster_t *cluster)
 {
-       /*
-        * XXX this will catch parent directories too which we don't
-        *     really want.
-        */
-       if (chain)
-               hammer2_chain_unlock(chain);
+       if (cluster)
+               hammer2_cluster_unlock(cluster);
        ccms_thread_unlock(&ip->topo_cst);
        hammer2_inode_drop(ip);
 }
@@ -138,41 +150,71 @@ hammer2_inode_unlock_ex(hammer2_inode_t *ip, hammer2_chain_t *chain)
  *      need to upgrade them.  Only one count of a shared lock can be
  *      upgraded.
  */
-hammer2_chain_t *
+hammer2_cluster_t *
 hammer2_inode_lock_sh(hammer2_inode_t *ip)
 {
+       hammer2_inode_data_t *ipdata;
+       hammer2_cluster_t *cluster;
+       hammer2_chain_core_t *core;
        hammer2_chain_t *chain;
+       int error = 0;
+       int i;
 
        hammer2_inode_ref(ip);
-       for (;;) {
-               ccms_thread_lock(&ip->topo_cst, CCMS_STATE_SHARED);
+       cluster = hammer2_cluster_copy(&ip->cluster, 0);
+       ccms_thread_lock(&ip->topo_cst, CCMS_STATE_SHARED);
 
-               chain = ip->chain;
-               KKASSERT(chain != NULL);        /* for now */
+       for (i = 0; i < cluster->nchains; ++i) {
+               chain = ip->cluster.array[i];
+               core = chain->core;
+
+               if (chain->flags & HAMMER2_CHAIN_DUPLICATED)
+                       goto cycle_excl;
                hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS |
                                          HAMMER2_RESOLVE_SHARED);
+               if (chain->flags & HAMMER2_CHAIN_DUPLICATED) {
+                       hammer2_chain_unlock(chain);
 
-               /*
-                * Resolve duplication races, resolve hardlinks by giving
-                * up and cycling an exclusive lock.
-                */
-               if ((chain->flags & HAMMER2_CHAIN_DUPLICATED) == 0 &&
-                   chain->data->ipdata.type != HAMMER2_OBJTYPE_HARDLINK) {
-                       break;
+                       /*
+                        * Cycle exclusive inode lock and start the loop
+                        * over again.
+                        */
+cycle_excl:
+                       while (--i >= 0) {
+                               chain = cluster->array[i];
+                               cluster->array[i] = NULL;
+                               hammer2_chain_unlock(chain);
+                       }
+                       ccms_thread_unlock(&ip->topo_cst);
+                       hammer2_inode_unlock_ex(ip, hammer2_inode_lock_ex(ip));
+                       ccms_thread_lock(&ip->topo_cst, CCMS_STATE_SHARED);
+                       continue;       /* restart at i=-1 -> i=0 on loop */
                }
-               hammer2_chain_unlock(chain);
-               ccms_thread_unlock(&ip->topo_cst);
-               chain = hammer2_inode_lock_ex(ip);
-               hammer2_inode_unlock_ex(ip, chain);
+               cluster->array[i] = chain;
        }
-       return (chain);
+       cluster->focus = cluster->array[0];
+
+       /*
+        * Returned cluster must resolve hardlink pointers
+        */
+       ipdata = &hammer2_cluster_data(cluster)->ipdata;
+       if (ipdata->type == HAMMER2_OBJTYPE_HARDLINK &&
+           (cluster->focus->flags & HAMMER2_CHAIN_DELETED) == 0) {
+               error = hammer2_hardlink_find(ip->pip, cluster);
+               KKASSERT((cluster->focus->flags &
+                         HAMMER2_CHAIN_DUPLICATED) == 0);
+               KKASSERT(error == 0);
+       }
+       cluster->focus = cluster->array[0];
+
+       return (cluster);
 }
 
 void
-hammer2_inode_unlock_sh(hammer2_inode_t *ip, hammer2_chain_t *chain)
+hammer2_inode_unlock_sh(hammer2_inode_t *ip, hammer2_cluster_t *cluster)
 {
-       if (chain)
-               hammer2_chain_unlock(chain);
+       if (cluster)
+               hammer2_cluster_unlock(cluster);
        ccms_thread_unlock(&ip->topo_cst);
        hammer2_inode_drop(ip);
 }
@@ -328,7 +370,8 @@ hammer2_igetv(hammer2_inode_t *ip, int *errorp)
        pmp = ip->pmp;
        KKASSERT(pmp != NULL);
        *errorp = 0;
-       ipdata = &ip->chain->data->ipdata;
+
+       ipdata = &hammer2_cluster_data(&ip->cluster)->ipdata;
 
        for (;;) {
                /*
@@ -451,27 +494,29 @@ hammer2_igetv(hammer2_inode_t *ip, int *errorp)
 }
 
 /*
- * The passed-in chain must be locked and the returned inode will also be
- * locked.  This routine typically locates or allocates the inode, assigns
- * ip->chain (adding a ref to chain if necessary), and returns the inode.
+ * Returns the inode associated with the passed-in cluster, creating the
+ * inode if necessary and synchronizing it to the passed-in cluster otherwise.
+ *
+ * The passed-in chain must be locked and will remain locked on return.
+ * The returned inode will be locked and the caller may dispose of both
+ * via hammer2_inode_unlock_ex().  However, if the caller needs to resolve
+ * a hardlink it must ref/unlock/relock/drop the inode.
  *
  * The hammer2_inode structure regulates the interface between the high level
  * kernel VNOPS API and the filesystem backend (the chains).
  *
- * WARNING!  This routine sucks up the chain's lock (makes it part of the
- *          inode lock from the point of view of the inode lock API),
- *          so callers need to be careful.
- *
  * WARNING!  The mount code is allowed to pass dip == NULL for iroot and
  *          is allowed to pass pmp == NULL and dip == NULL for sroot.
  */
 hammer2_inode_t *
 hammer2_inode_get(hammer2_pfsmount_t *pmp, hammer2_inode_t *dip,
-                 hammer2_chain_t *chain)
+                 hammer2_cluster_t *cluster)
 {
        hammer2_inode_t *nip;
+       hammer2_inode_data_t *iptmp;
+       hammer2_inode_data_t *nipdata;
 
-       KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_INODE);
+       KKASSERT(hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE);
 
        /*
         * Interlocked lookup/ref of the inode.  This code is only needed
@@ -480,22 +525,18 @@ hammer2_inode_get(hammer2_pfsmount_t *pmp, hammer2_inode_t *dip,
         */
 again:
        for (;;) {
-               nip = hammer2_inode_lookup(pmp, chain->data->ipdata.inum);
+               iptmp = &hammer2_cluster_data(cluster)->ipdata;
+               nip = hammer2_inode_lookup(pmp, iptmp->inum);
                if (nip == NULL)
                        break;
+
                ccms_thread_lock(&nip->topo_cst, CCMS_STATE_EXCLUSIVE);
                if ((nip->flags & HAMMER2_INODE_ONRBTREE) == 0) { /* race */
                        ccms_thread_unlock(&nip->topo_cst);
                        hammer2_inode_drop(nip);
                        continue;
                }
-               if (nip->chain != chain)
-                       hammer2_inode_repoint(nip, NULL, chain);
-
-               /*
-                * Consolidated nip/nip->chain is locked (chain locked
-                * by caller).
-                */
+               hammer2_inode_repoint(nip, NULL, cluster);
                return nip;
        }
 
@@ -505,16 +546,24 @@ again:
        if (pmp) {
                nip = kmalloc(sizeof(*nip), pmp->minode, M_WAITOK | M_ZERO);
                atomic_add_long(&pmp->inmem_inodes, 1);
-               hammer2_chain_memory_inc(pmp);
-               hammer2_chain_memory_wakeup(pmp);
+               hammer2_pfs_memory_inc(pmp);
+               hammer2_pfs_memory_wakeup(pmp);
        } else {
                nip = kmalloc(sizeof(*nip), M_HAMMER2, M_WAITOK | M_ZERO);
                nip->flags = HAMMER2_INODE_SROOT;
        }
-       nip->inum = chain->data->ipdata.inum;
-       nip->size = chain->data->ipdata.size;
-       nip->mtime = chain->data->ipdata.mtime;
-       hammer2_inode_repoint(nip, NULL, chain);
+
+       /*
+        * Initialize nip's cluster
+        */
+       nip->cluster.refs = 1;
+       nip->flags = HAMMER2_CLUSTER_INODE;
+
+       nipdata = &hammer2_cluster_data(cluster)->ipdata;
+       nip->inum = nipdata->inum;
+       nip->size = nipdata->size;
+       nip->mtime = nipdata->mtime;
+       hammer2_inode_repoint(nip, NULL, cluster);
        nip->pip = dip;                         /* can be NULL */
        if (dip)
                hammer2_inode_ref(dip); /* ref dip for nip->pip */
@@ -526,7 +575,7 @@ again:
         * hammer2_inode_lock_ex() call.
         */
        nip->refs = 1;
-       ccms_cst_init(&nip->topo_cst, &nip->chain);
+       ccms_cst_init(&nip->topo_cst, &nip->cluster);
        ccms_thread_lock(&nip->topo_cst, CCMS_STATE_EXCLUSIVE);
        /* combination of thread lock and chain lock == inode lock */
 
@@ -553,7 +602,7 @@ again:
  * Create a new inode in the specified directory using the vattr to
  * figure out the type of inode.
  *
- * If no error occurs the new inode with its chain locked is returned in
+ * If no error occurs the new inode with its cluster locked is returned in
  * *nipp, otherwise an error is returned and *nipp is set to NULL.
  *
  * If vap and/or cred are NULL the related fields are not set and the
@@ -566,12 +615,12 @@ hammer2_inode_t *
 hammer2_inode_create(hammer2_trans_t *trans, hammer2_inode_t *dip,
                     struct vattr *vap, struct ucred *cred,
                     const uint8_t *name, size_t name_len,
-                    hammer2_chain_t **chainp, int *errorp)
+                    hammer2_cluster_t **clusterp, int *errorp)
 {
        hammer2_inode_data_t *dipdata;
        hammer2_inode_data_t *nipdata;
-       hammer2_chain_t *chain;
-       hammer2_chain_t *parent;
+       hammer2_cluster_t *cluster;
+       hammer2_cluster_t *cparent;
        hammer2_inode_t *nip;
        hammer2_key_t key_dummy;
        hammer2_key_t lhc;
@@ -581,7 +630,7 @@ hammer2_inode_create(hammer2_trans_t *trans, hammer2_inode_t *dip,
        uuid_t dip_gid;
        uint32_t dip_mode;
        uint8_t dip_algo;
-       int cache_index = -1;
+       int ddflag;
 
        lhc = hammer2_dirhash(name, name_len);
        *errorp = 0;
@@ -594,8 +643,8 @@ hammer2_inode_create(hammer2_trans_t *trans, hammer2_inode_t *dip,
         * NOTE: hidden inodes do not have iterators.
         */
 retry:
-       parent = hammer2_inode_lock_ex(dip);
-       dipdata = &dip->chain->data->ipdata;
+       cparent = hammer2_inode_lock_ex(dip);
+       dipdata = &hammer2_cluster_data(cparent)->ipdata;
        dip_uid = dipdata->uid;
        dip_gid = dipdata->gid;
        dip_mode = dipdata->mode;
@@ -603,44 +652,46 @@ retry:
 
        error = 0;
        while (error == 0) {
-               chain = hammer2_chain_lookup(&parent, &key_dummy,
-                                            lhc, lhc, &cache_index, 0);
-               if (chain == NULL)
+               cluster = hammer2_cluster_lookup(cparent, &key_dummy,
+                                                lhc, lhc, 0, &ddflag);
+               if (cluster == NULL)
                        break;
                if ((lhc & HAMMER2_DIRHASH_VISIBLE) == 0)
                        error = ENOSPC;
                if ((lhc & HAMMER2_DIRHASH_LOMASK) == HAMMER2_DIRHASH_LOMASK)
                        error = ENOSPC;
-               hammer2_chain_unlock(chain);
-               chain = NULL;
+               hammer2_cluster_unlock(cluster);
+               cluster = NULL;
                ++lhc;
        }
 
        if (error == 0) {
-               error = hammer2_chain_create(trans, &parent, &chain,
+               error = hammer2_cluster_create(trans, cparent, &cluster,
                                             lhc, 0,
                                             HAMMER2_BREF_TYPE_INODE,
                                             HAMMER2_INODE_BYTES);
        }
 #if INODE_DEBUG
        kprintf("CREATE INODE %*.*s chain=%p\n",
-               (int)name_len, (int)name_len, name, chain);
+               (int)name_len, (int)name_len, name,
+               (cluster ? cluster->focus : NULL));
 #endif
 
        /*
         * Cleanup and handle retries.
         */
        if (error == EAGAIN) {
-               hammer2_chain_ref(parent);
-               hammer2_inode_unlock_ex(dip, parent);
-               hammer2_chain_wait(parent);
-               hammer2_chain_drop(parent);
+               hammer2_cluster_ref(cparent);
+               hammer2_inode_unlock_ex(dip, cparent);
+               hammer2_cluster_wait(cparent);
+               hammer2_cluster_drop(cparent);
                goto retry;
        }
-       hammer2_inode_unlock_ex(dip, parent);
+       hammer2_inode_unlock_ex(dip, cparent);
+       cparent = NULL;
 
        if (error) {
-               KKASSERT(chain == NULL);
+               KKASSERT(cluster == NULL);
                *errorp = error;
                return (NULL);
        }
@@ -656,9 +707,10 @@ retry:
         *
         * NOTE: nipdata will have chain's blockset data.
         */
-       chain->data->ipdata.inum = trans->inode_tid;
-       nip = hammer2_inode_get(dip->pmp, dip, chain);
-       nipdata = &chain->data->ipdata;
+       nipdata = &hammer2_cluster_data(cluster)->ipdata;
+       nipdata->inum = trans->inode_tid;
+       nip = hammer2_inode_get(dip->pmp, dip, cluster);
+       nipdata = &hammer2_cluster_data(cluster)->ipdata;
 
        if (vap) {
                KKASSERT(trans->inodes_created == 0);
@@ -730,38 +782,12 @@ retry:
        bcopy(name, nipdata->filename, name_len);
        nipdata->name_key = lhc;
        nipdata->name_len = name_len;
-       *chainp = chain;
+       *clusterp = cluster;
 
        return (nip);
 }
 
 /*
- * chain may have been moved around by the create.
- */
-void
-hammer2_chain_refactor(hammer2_chain_t **chainp)
-{
-       hammer2_chain_t *chain = *chainp;
-       hammer2_chain_core_t *core;
-
-       core = chain->core;
-       while (chain->flags & HAMMER2_CHAIN_DUPLICATED) {
-               spin_lock(&core->cst.spin);
-               chain = TAILQ_NEXT(chain, core_entry);
-               while (chain->flags & HAMMER2_CHAIN_DUPLICATED)
-                       chain = TAILQ_NEXT(chain, core_entry);
-               hammer2_chain_ref(chain);
-               spin_unlock(&core->cst.spin);
-               KKASSERT(chain->core == core);
-
-               hammer2_chain_unlock(*chainp);
-               hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS |
-                                         HAMMER2_RESOLVE_NOREF); /* eat ref */
-               *chainp = chain;
-       }
-}
-
-/*
  * Shift *chainp up to the specified directory, change the filename
  * to "0xINODENUMBER", and adjust the key.  The chain becomes the
  * invisible hardlink target.
@@ -770,20 +796,20 @@ hammer2_chain_refactor(hammer2_chain_t **chainp)
  */
 static
 void
-hammer2_hardlink_shiftup(hammer2_trans_t *trans, hammer2_chain_t **chainp,
-                       hammer2_inode_t *dip, hammer2_chain_t **dchainp,
+hammer2_hardlink_shiftup(hammer2_trans_t *trans, hammer2_cluster_t *cluster,
+                       hammer2_inode_t *dip, hammer2_cluster_t *dcluster,
                        int nlinks, int *errorp)
 {
+       hammer2_inode_data_t *iptmp;
        hammer2_inode_data_t *nipdata;
-       hammer2_chain_t *chain;
-       hammer2_chain_t *xchain;
+       hammer2_cluster_t *xcluster;
        hammer2_key_t key_dummy;
        hammer2_key_t lhc;
        hammer2_blockref_t bref;
-       int cache_index = -1;
+       int ddflag;
 
-       chain = *chainp;
-       lhc = chain->data->ipdata.inum;
+       iptmp = &hammer2_cluster_data(cluster)->ipdata;
+       lhc = iptmp->inum;
        KKASSERT((lhc & HAMMER2_DIRHASH_VISIBLE) == 0);
 
        /*
@@ -794,17 +820,18 @@ hammer2_hardlink_shiftup(hammer2_trans_t *trans, hammer2_chain_t **chainp,
         * There should be no key collisions with invisible inode keys.
         *
         * WARNING! Must use inode_lock_ex() on dip to handle a stale
-        *          dip->chain cache.
+        *          dip->cluster cache.
         */
 retry:
        *errorp = 0;
-       xchain = hammer2_chain_lookup(dchainp, &key_dummy,
-                                     lhc, lhc, &cache_index, 0);
-       if (xchain) {
+       xcluster = hammer2_cluster_lookup(dcluster, &key_dummy,
+                                     lhc, lhc, 0, &ddflag);
+       if (xcluster) {
                kprintf("X3 chain %p dip %p dchain %p dip->chain %p\n",
-                       xchain, dip, *dchainp, dip->chain);
-               hammer2_chain_unlock(xchain);
-               xchain = NULL;
+                       xcluster->focus, dip, dcluster->focus,
+                       dip->cluster.focus);
+               hammer2_cluster_unlock(xcluster);
+               xcluster = NULL;
                *errorp = ENOSPC;
 #if 0
                Debugger("X3");
@@ -815,18 +842,18 @@ retry:
         * Create entry in common parent directory using the seek position
         * calculated above.
         *
-        * We must refactor chain because it might have been shifted into
-        * an indirect chain by the create.
+        * We must refactor cluster because it might have been shifted into
+        * an indirect cluster by the create.
         */
        if (*errorp == 0) {
-               KKASSERT(xchain == NULL);
+               KKASSERT(xcluster == NULL);
 #if 0
-               *errorp = hammer2_chain_create(trans, dchainp, &xchain,
+               *errorp = hammer2_cluster_create(trans, dcluster, &xcluster,
                                               lhc, 0,
                                               HAMMER2_BREF_TYPE_INODE,/* n/a */
                                               HAMMER2_INODE_BYTES);   /* n/a */
 #endif
-               /*XXX this somehow isn't working on chain XXX*/
+               /*XXX this somehow isn't working on cluster XXX*/
                /*KKASSERT(xxx)*/
        }
 
@@ -835,8 +862,8 @@ retry:
         */
        if (*errorp == EAGAIN) {
                kprintf("R");
-               hammer2_chain_wait(*dchainp);
-               hammer2_chain_drop(*dchainp);
+               hammer2_cluster_wait(dcluster);
+               hammer2_cluster_drop(dcluster);
                goto retry;
        }
 
@@ -845,40 +872,38 @@ retry:
         */
        if (*errorp) {
                panic("error2");
-               KKASSERT(xchain == NULL);
+               KKASSERT(xcluster == NULL);
                return;
        }
 
        /*
-        * Use xchain as a placeholder for (lhc).  Duplicate chain to the
-        * same target bref as xchain and then delete xchain.  The duplication
-        * occurs after xchain in flush order even though xchain is deleted
-        * after the duplication. XXX
+        * Use xcluster as a placeholder for (lhc).  Duplicate cluster to the
+        * same target bref as xcluster and then delete xcluster.  The
+        * duplication occurs after xcluster in flush order even though
+        * xcluster is deleted after the duplication. XXX
         *
         * WARNING! Duplications (to a different parent) can cause indirect
-        *          blocks to be inserted, refactor xchain.
+        *          blocks to be inserted, refactor xcluster.
         */
-       bref = chain->bref;
+       hammer2_cluster_bref(cluster, &bref);
        bref.key = lhc;                 /* invisible dir entry key */
        bref.keybits = 0;
-       hammer2_chain_duplicate(trans, dchainp, &chain, &bref, 0, 2);
+       hammer2_cluster_duplicate(trans, dcluster, cluster, &bref, 0, 2);
 
        /*
-        * chain is now 'live' again.. adjust the filename.
+        * cluster is now 'live' again.. adjust the filename.
         *
         * Directory entries are inodes but this is a hidden hardlink
         * target.  The name isn't used but to ease debugging give it
         * a name after its inode number.
         */
-       hammer2_chain_modify(trans, &chain, 0);
-       nipdata = &chain->data->ipdata;
+       hammer2_cluster_modify(trans, cluster, 0);
+       nipdata = &hammer2_cluster_data(cluster)->ipdata;
        ksnprintf(nipdata->filename, sizeof(nipdata->filename),
                  "0x%016jx", (intmax_t)nipdata->inum);
        nipdata->name_len = strlen(nipdata->filename);
        nipdata->name_key = lhc;
        nipdata->nlinks += nlinks;
-
-       *chainp = chain;
 }
 
 /*
@@ -896,27 +921,28 @@ retry:
  */
 int
 hammer2_inode_connect(hammer2_trans_t *trans,
-                     hammer2_chain_t **chainp, int hlink,
-                     hammer2_inode_t *dip, hammer2_chain_t **dchainp,
+                     hammer2_cluster_t **clusterp, int hlink,
+                     hammer2_inode_t *dip, hammer2_cluster_t *dcluster,
                      const uint8_t *name, size_t name_len,
                      hammer2_key_t lhc)
 {
        hammer2_inode_data_t *ipdata;
-       hammer2_chain_t *nchain;
-       hammer2_chain_t *ochain;
+       hammer2_cluster_t *ocluster;
+       hammer2_cluster_t *ncluster;
        hammer2_key_t key_dummy;
-       int cache_index = -1;
+       int ddflag;
        int error;
 
        /*
-        * Since ochain is either disconnected from the topology or represents
-        * a hardlink terminus which is always a parent of or equal to dip,
-        * we should be able to safely lock dip->chain for our setup.
+        * Since ocluster is either disconnected from the topology or
+        * represents a hardlink terminus which is always a parent of or
+        * equal to dip, we should be able to safely lock dip->chain for
+        * our setup.
         *
         * WARNING! Must use inode_lock_ex() on dip to handle a stale
-        *          dip->chain cache.
+        *          dip->cluster.
         */
-       ochain = *chainp;
+       ocluster = *clusterp;
 
        /*
         * If name is non-NULL we calculate lhc, else we use the passed-in
@@ -932,17 +958,17 @@ hammer2_inode_connect(hammer2_trans_t *trans,
                 */
                error = 0;
                while (error == 0) {
-                       nchain = hammer2_chain_lookup(dchainp, &key_dummy,
+                       ncluster = hammer2_cluster_lookup(dcluster, &key_dummy,
                                                      lhc, lhc,
-                                                     &cache_index, 0);
-                       if (nchain == NULL)
+                                                     0, &ddflag);
+                       if (ncluster == NULL)
                                break;
                        if ((lhc & HAMMER2_DIRHASH_LOMASK) ==
                            HAMMER2_DIRHASH_LOMASK) {
                                error = ENOSPC;
                        }
-                       hammer2_chain_unlock(nchain);
-                       nchain = NULL;
+                       hammer2_cluster_unlock(ncluster);
+                       ncluster = NULL;
                        ++lhc;
                }
        } else {
@@ -950,9 +976,10 @@ hammer2_inode_connect(hammer2_trans_t *trans,
                 * Reconnect to specific key (used when moving
                 * unlinked-but-open files into the hidden directory).
                 */
-               nchain = hammer2_chain_lookup(dchainp, &key_dummy,
-                                             lhc, lhc, &cache_index, 0);
-               KKASSERT(nchain == NULL);
+               ncluster = hammer2_cluster_lookup(dcluster, &key_dummy,
+                                                 lhc, lhc,
+                                                 0, &ddflag);
+               KKASSERT(ncluster == NULL);
        }
 
        if (error == 0) {
@@ -961,40 +988,42 @@ hammer2_inode_connect(hammer2_trans_t *trans,
                         * Hardlink pointer needed, create totally fresh
                         * directory entry.
                         *
-                        * We must refactor ochain because it might have
-                        * been shifted into an indirect chain by the
+                        * We must refactor ocluster because it might have
+                        * been shifted into an indirect cluster by the
                         * create.
                         */
-                       KKASSERT(nchain == NULL);
-                       error = hammer2_chain_create(trans, dchainp, &nchain,
-                                                    lhc, 0,
-                                                    HAMMER2_BREF_TYPE_INODE,
-                                                    HAMMER2_INODE_BYTES);
-                       hammer2_chain_refactor(&ochain);
+                       KKASSERT(ncluster == NULL);
+                       error = hammer2_cluster_create(trans,
+                                                      dcluster, &ncluster,
+                                                      lhc, 0,
+                                                      HAMMER2_BREF_TYPE_INODE,
+                                                      HAMMER2_INODE_BYTES);
+                       hammer2_cluster_refactor(ocluster);
                } else {
                        /*
-                        * Reconnect the original chain and rename.  Use
-                        * chain_duplicate().  The caller will likely delete
+                        * Reconnect the original cluster and rename.  Use
+                        * cluster_duplicate().  The caller will likely delete
                         * or has already deleted the original chain in
                         * this case.
                         *
-                        * NOTE: chain_duplicate() generates a new chain
-                        *       with CHAIN_DELETED cleared (ochain typically
+                        * NOTE: cluster_duplicate() generates a new cluster
+                        *       with CHAIN_DELETED cleared (ocluster typically
                         *       has it set from the file unlink).
                         *
-                        * WARNING! Can cause held-over chains to require a
+                        * WARNING! Can cause held-over clusters to require a
                         *          refactor.  Fortunately we have none (our
-                        *          locked chains are passed into and
+                        *          locked clusters are passed into and
                         *          modified by the call).
                         */
-                       nchain = ochain;
-                       ochain = NULL;
-                       hammer2_chain_duplicate(trans, NULL, &nchain, NULL,
-                                               0, 3);
-                       error = hammer2_chain_create(trans, dchainp, &nchain,
-                                                    lhc, 0,
-                                                    HAMMER2_BREF_TYPE_INODE,
-                                                    HAMMER2_INODE_BYTES);
+                       ncluster = ocluster;
+                       ocluster = NULL;
+                       hammer2_cluster_duplicate(trans, NULL, ncluster, NULL,
+                                                 0, 3);
+                       error = hammer2_cluster_create(trans,
+                                                      dcluster, &ncluster,
+                                                      lhc, 0,
+                                                      HAMMER2_BREF_TYPE_INODE,
+                                                      HAMMER2_INODE_BYTES);
                }
        }
 
@@ -1004,10 +1033,11 @@ hammer2_inode_connect(hammer2_trans_t *trans,
        KKASSERT(error != EAGAIN);
 
        /*
-        * nchain should be NULL on error, leave ochain (== *chainp) alone.
+        * ncluster should be NULL on error, leave ocluster
+        * (ocluster == *clusterp) alone.
         */
        if (error) {
-               KKASSERT(nchain == NULL);
+               KKASSERT(ncluster == NULL);
                return (error);
        }
 
@@ -1016,58 +1046,39 @@ hammer2_inode_connect(hammer2_trans_t *trans,
         * to update the inode.
         *
         * When creating an OBJTYPE_HARDLINK entry remember to unlock the
-        * chain, the caller will access the hardlink via the actual hardlink
+        * cluster, the caller will access the hardlink via the actual hardlink
         * target file and not the hardlink pointer entry, so we must still
-        * return ochain.
+        * return ocluster.
         */
        if (hlink && hammer2_hardlink_enable >= 0) {
                /*
                 * Create the HARDLINK pointer.  oip represents the hardlink
                 * target in this situation.
                 *
-                * We will return ochain (the hardlink target).
+                * We will return ocluster (the hardlink target).
                 */
-               hammer2_chain_modify(trans, &nchain, 0);
+               hammer2_cluster_modify(trans, ncluster, 0);
                KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
-               ipdata = &nchain->data->ipdata;
+               ipdata = &hammer2_cluster_data(ncluster)->ipdata;
                bcopy(name, ipdata->filename, name_len);
                ipdata->name_key = lhc;
                ipdata->name_len = name_len;
-               ipdata->target_type = ochain->data->ipdata.type;
+               ipdata->target_type =
+                               hammer2_cluster_data(ocluster)->ipdata.type;
                ipdata->type = HAMMER2_OBJTYPE_HARDLINK;
-               ipdata->inum = ochain->data->ipdata.inum;
+               ipdata->inum = hammer2_cluster_data(ocluster)->ipdata.inum;
                ipdata->nlinks = 1;
-               hammer2_chain_unlock(nchain);
-               nchain = ochain;
-               ochain = NULL;
-       } else if (hlink && hammer2_hardlink_enable < 0) {
-               /*
-                * Create a snapshot (hardlink fake mode for debugging).
-                * (ochain already flushed above so we can just copy the
-                * bref XXX).
-                *
-                * Since this is a snapshot we return nchain in the fake
-                * hardlink case.
-                */
-               hammer2_chain_modify(trans, &nchain, 0);
-               KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
-               ipdata = &nchain->data->ipdata;
-               *ipdata = ochain->data->ipdata;
-               bcopy(name, ipdata->filename, name_len);
-               ipdata->name_key = lhc;
-               ipdata->name_len = name_len;
-               atomic_clear_int(&nchain->core->flags,
-                                HAMMER2_CORE_COUNTEDBREFS);
-               kprintf("created fake hardlink %*.*s\n",
-                       (int)name_len, (int)name_len, name);
+               hammer2_cluster_unlock(ncluster);
+               ncluster = ocluster;
+               ocluster = NULL;
        } else {
                /*
-                * nchain is a duplicate of ochain at the new location.
+                * ncluster is a duplicate of ocluster at the new location.
                 * We must fixup the name stored in oip.  The bref key
                 * has already been set up.
                 */
-               hammer2_chain_modify(trans, &nchain, 0);
-               ipdata = &nchain->data->ipdata;
+               hammer2_cluster_modify(trans, ncluster, 0);
+               ipdata = &hammer2_cluster_data(ncluster)->ipdata;
 
                KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
                bcopy(name, ipdata->filename, name_len);
@@ -1077,13 +1088,13 @@ hammer2_inode_connect(hammer2_trans_t *trans,
        }
 
        /*
-        * We are replacing ochain with nchain, unlock ochain.  In the
-        * case where ochain is left unchanged the code above sets
-        * nchain to ochain and ochain to NULL, resulting in a NOP here.
+        * We are replacing ocluster with ncluster, unlock ocluster.  In the
+        * case where ocluster is left unchanged the code above sets
+        * ncluster to ocluster and ocluster to NULL, resulting in a NOP here.
         */
-       if (ochain)
-               hammer2_chain_unlock(ochain);
-       *chainp = nchain;
+       if (ocluster)
+               hammer2_cluster_unlock(ocluster);
+       *clusterp = ncluster;
 
        return (0);
 }
@@ -1097,20 +1108,36 @@ hammer2_inode_connect(hammer2_trans_t *trans,
  */
 void
 hammer2_inode_repoint(hammer2_inode_t *ip, hammer2_inode_t *pip,
-                     hammer2_chain_t *nchain)
+                     hammer2_cluster_t *cluster)
 {
        hammer2_chain_t *ochain;
+       hammer2_chain_t *nchain;
        hammer2_inode_t *opip;
+       int i;
 
-       /*
-        * Repoint ip->chain if requested.
-        */
-       ochain = ip->chain;
-       ip->chain = nchain;
-       if (nchain)
-               hammer2_chain_ref(nchain);
-       if (ochain)
-               hammer2_chain_drop(ochain);
+       for (i = 0; i < cluster->nchains; ++i) {
+               /*
+                * Get possible replacement chain, loop if nothing to do.
+                */
+               nchain = cluster->array[i];
+               if (i < ip->cluster.nchains) {
+                       ochain = ip->cluster.array[i];
+                       if (ochain == nchain)
+                               continue;
+               } else {
+                       ochain = NULL;
+               }
+
+               /*
+                * Make adjustment
+                */
+               ip->cluster.array[i] = nchain;
+               if (nchain)
+                       hammer2_chain_ref(nchain);
+               if (ochain)
+                       hammer2_chain_drop(ochain);
+       }
+       ip->cluster.focus = ip->cluster.array[0];
 
        /*
         * Repoint ip->pip if requested (non-NULL pip).
@@ -1151,20 +1178,20 @@ hammer2_unlink_file(hammer2_trans_t *trans, hammer2_inode_t *dip,
                    int isdir, int *hlinkp, struct nchandle *nch)
 {
        hammer2_inode_data_t *ipdata;
-       hammer2_chain_t *parent;
-       hammer2_chain_t *ochain;
-       hammer2_chain_t *chain;
-       hammer2_chain_t *dparent;
-       hammer2_chain_t *dchain;
+       hammer2_cluster_t *cparent;
+       hammer2_cluster_t *ocluster;
+       hammer2_cluster_t *cluster;
+       hammer2_cluster_t *dparent;
+       hammer2_cluster_t *dcluster;
        hammer2_key_t key_dummy;
        hammer2_key_t key_next;
        hammer2_key_t lhc;
        int error;
-       int cache_index = -1;
+       int ddflag;
        uint8_t type;
 
        error = 0;
-       ochain = NULL;
+       ocluster = NULL;
        lhc = hammer2_dirhash(name, name_len);
 
        /*
@@ -1172,20 +1199,22 @@ hammer2_unlink_file(hammer2_trans_t *trans, hammer2_inode_t *dip,
         */
        if (hlinkp)
                *hlinkp = 0;
-       parent = hammer2_inode_lock_ex(dip);
-       chain = hammer2_chain_lookup(&parent, &key_next,
+       cparent = hammer2_inode_lock_ex(dip);
+       cluster = hammer2_cluster_lookup(cparent, &key_next,
                                     lhc, lhc + HAMMER2_DIRHASH_LOMASK,
-                                    &cache_index, 0);
-       while (chain) {
-               if (chain->bref.type == HAMMER2_BREF_TYPE_INODE &&
-                   name_len == chain->data->ipdata.name_len &&
-                   bcmp(name, chain->data->ipdata.filename, name_len) == 0) {
-                       break;
+                                    0, &ddflag);
+       while (cluster) {
+               if (hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE) {
+                       ipdata = &hammer2_cluster_data(cluster)->ipdata;
+                       if (ipdata->name_len == name_len &&
+                           bcmp(ipdata->filename, name, name_len) == 0) {
+                               break;
+                       }
                }
-               chain = hammer2_chain_next(&parent, chain, &key_next,
-                                          key_next,
-                                          lhc + HAMMER2_DIRHASH_LOMASK,
-                                          &cache_index, 0);
+               cluster = hammer2_cluster_next(cparent, cluster, &key_next,
+                                              key_next,
+                                              lhc + HAMMER2_DIRHASH_LOMASK,
+                                              0);
        }
        hammer2_inode_unlock_ex(dip, NULL);     /* retain parent */
 
@@ -1193,14 +1222,16 @@ hammer2_unlink_file(hammer2_trans_t *trans, hammer2_inode_t *dip,
         * Not found or wrong type (isdir < 0 disables the type check).
         * If a hardlink pointer, type checks use the hardlink target.
         */
-       if (chain == NULL) {
+       if (cluster == NULL) {
                error = ENOENT;
                goto done;
        }
-       if ((type = chain->data->ipdata.type) == HAMMER2_OBJTYPE_HARDLINK) {
+       ipdata = &hammer2_cluster_data(cluster)->ipdata;
+       type = ipdata->type;
+       if (type == HAMMER2_OBJTYPE_HARDLINK) {
                if (hlinkp)
                        *hlinkp = 1;
-               type = chain->data->ipdata.target_type;
+               type = ipdata->target_type;
        }
 
        if (type == HAMMER2_OBJTYPE_DIRECTORY && isdir == 0) {
@@ -1216,14 +1247,21 @@ hammer2_unlink_file(hammer2_trans_t *trans, hammer2_inode_t *dip,
         * Hardlink must be resolved.  We can't hold the parent locked
         * while we do this or we could deadlock.
         *
-        * On success chain will be adjusted to point at the hardlink target
-        * and ochain will point to the hardlink pointer in the original
-        * directory.  Otherwise chain remains pointing to the original.
+        * On success cluster will be adjusted to point at the hardlink target
+        * and ocluster will point to the hardlink pointer in the original
+        * directory.  Otherwise cluster remains pointing to the original.
+        *
+        * Lock ownership is transfered to cluster.  ocluster is merely
+        * referenced.
         */
-       if (chain->data->ipdata.type == HAMMER2_OBJTYPE_HARDLINK) {
-               hammer2_chain_unlock(parent);
-               parent = NULL;
-               error = hammer2_hardlink_find(dip, &chain, &ochain);
+       if (ipdata->type == HAMMER2_OBJTYPE_HARDLINK) {
+               hammer2_cluster_unlock(cparent);
+               cparent = NULL;
+
+               ocluster = cluster;
+               cluster = hammer2_cluster_copy(ocluster, 1);
+               error = hammer2_hardlink_find(dip, cluster);
+               KKASSERT(error == 0);
        }
 
        /*
@@ -1238,46 +1276,46 @@ hammer2_unlink_file(hammer2_trans_t *trans, hammer2_inode_t *dip,
         *       entries.
         */
        if (type == HAMMER2_OBJTYPE_DIRECTORY && isdir == 1) {
-               dparent = hammer2_chain_lookup_init(chain, 0);
-               dchain = hammer2_chain_lookup(&dparent, &key_dummy,
-                                             0, (hammer2_key_t)-1,
-                                             &cache_index,
-                                             HAMMER2_LOOKUP_NODATA);
-               if (dchain) {
-                       hammer2_chain_unlock(dchain);
-                       hammer2_chain_lookup_done(dparent);
+               dparent = hammer2_cluster_lookup_init(cluster, 0);
+               dcluster = hammer2_cluster_lookup(dparent, &key_dummy,
+                                                 0, (hammer2_key_t)-1,
+                                                 HAMMER2_LOOKUP_NODATA,
+                                                 &ddflag);
+               if (dcluster) {
+                       hammer2_cluster_unlock(dcluster);
+                       hammer2_cluster_lookup_done(dparent);
                        error = ENOTEMPTY;
                        goto done;
                }
-               hammer2_chain_lookup_done(dparent);
+               hammer2_cluster_lookup_done(dparent);
                dparent = NULL;
-               /* dchain NULL */
+               /* dcluster NULL */
        }
 
        /*
-        * Ok, we can now unlink the chain.  We always decrement nlinks even
+        * Ok, we can now unlink the cluster.  We always decrement nlinks even
         * if the entry can be deleted in case someone has the file open and
         * does an fstat().
         *
-        * The chain itself will no longer be in the on-media topology but
+        * The cluster itself will no longer be in the on-media topology but
         * can still be flushed to the media (e.g. if an open descriptor
-        * remains).  When the last vnode/ip ref goes away the chain will
+        * remains).  When the last vnode/ip ref goes away the cluster will
         * be marked unmodified, avoiding any further (now unnecesary) I/O.
         *
-        * A non-NULL ochain indicates a hardlink.
+        * A non-NULL ocluster indicates a hardlink.
         */
-       if (ochain) {
+       if (ocluster) {
                /*
                 * Delete the original hardlink pointer unconditionally.
                 * (any open descriptors will migrate to the hardlink
                 * target and have no affect on this operation).
                 *
-                * NOTE: parent from above is NULL when ochain != NULL
+                * NOTE: parent from above is NULL when ocluster != NULL
                 *       so we can reuse it.
                 */
-               hammer2_chain_lock(ochain, HAMMER2_RESOLVE_ALWAYS);
-               hammer2_chain_delete(trans, ochain, 0);
-               hammer2_chain_unlock(ochain);
+               hammer2_cluster_lock(ocluster, HAMMER2_RESOLVE_ALWAYS);
+               hammer2_cluster_delete(trans, ocluster, 0);
+               hammer2_cluster_unlock(ocluster);
        }
 
        /*
@@ -1295,14 +1333,15 @@ hammer2_unlink_file(hammer2_trans_t *trans, hammer2_inode_t *dip,
         *       passed as NULL in this situation.  hammer2_inode_connect()
         *       will bump nlinks.
         */
-       KKASSERT(chain != NULL);
-       hammer2_chain_modify(trans, &chain, 0);
-       ipdata = &chain->data->ipdata;
+       KKASSERT(cluster != NULL);
+       hammer2_cluster_modify(trans, cluster, 0);
+       ipdata = &hammer2_cluster_data(cluster)->ipdata;
        --ipdata->nlinks;
        if ((int64_t)ipdata->nlinks < 0)        /* XXX debugging */
                ipdata->nlinks = 0;
        if (ipdata->nlinks == 0) {
-               if ((chain->flags & HAMMER2_CHAIN_PFSROOT) && chain->pmp) {
+               if ((cluster->focus->flags & HAMMER2_CHAIN_PFSROOT) &&
+                   cluster->pmp) {
                        error = EINVAL;
                        kprintf("hammer2: PFS \"%s\" cannot be deleted "
                                "while still mounted\n",
@@ -1311,21 +1350,22 @@ hammer2_unlink_file(hammer2_trans_t *trans, hammer2_inode_t *dip,
                }
                if (nch && cache_isopen(nch)) {
                        kprintf("WARNING: unlinking open file\n");
-                       atomic_set_int(&chain->flags, HAMMER2_CHAIN_UNLINKED);
-                       hammer2_inode_move_to_hidden(trans, &chain,
+                       hammer2_cluster_set_chainflags(cluster,
+                                                       HAMMER2_CHAIN_UNLINKED);
+                       hammer2_inode_move_to_hidden(trans, &cluster,
                                                     ipdata->inum);
                } else {
-                       hammer2_chain_delete(trans, chain, 0);
+                       hammer2_cluster_delete(trans, cluster, 0);
                }
        }
        error = 0;
 done:
-       if (chain)
-               hammer2_chain_unlock(chain);
-       if (parent)
-               hammer2_chain_lookup_done(parent);
-       if (ochain)
-               hammer2_chain_drop(ochain);
+       if (cluster)
+               hammer2_cluster_unlock(cluster);
+       if (cparent)
+               hammer2_cluster_lookup_done(cparent);
+       if (ocluster)
+               hammer2_cluster_drop(ocluster);
 
        return error;
 }
@@ -1337,13 +1377,13 @@ void
 hammer2_inode_install_hidden(hammer2_pfsmount_t *pmp)
 {
        hammer2_trans_t trans;
-       hammer2_chain_t *parent;
-       hammer2_chain_t *chain;
-       hammer2_chain_t *scan;
+       hammer2_cluster_t *cparent;
+       hammer2_cluster_t *cluster;
+       hammer2_cluster_t *scan;
        hammer2_inode_data_t *ipdata;
        hammer2_key_t key_dummy;
        hammer2_key_t key_next;
-       int cache_index;
+       int ddflag;
        int error;
        int count;
 
@@ -1356,13 +1396,13 @@ hammer2_inode_install_hidden(hammer2_pfsmount_t *pmp)
        bzero(&key_dummy, sizeof(key_dummy));
        hammer2_trans_init(&trans, pmp, NULL, 0);
 
-       parent = hammer2_inode_lock_ex(pmp->iroot);
-       chain = hammer2_chain_lookup(&parent, &key_dummy,
-                                    HAMMER2_INODE_HIDDENDIR,
-                                    HAMMER2_INODE_HIDDENDIR,
-                                    &cache_index, 0);
-       if (chain) {
-               pmp->ihidden = hammer2_inode_get(pmp, pmp->iroot, chain);
+       cparent = hammer2_inode_lock_ex(pmp->iroot);
+       cluster = hammer2_cluster_lookup(cparent, &key_dummy,
+                                        HAMMER2_INODE_HIDDENDIR,
+                                        HAMMER2_INODE_HIDDENDIR,
+                                        0, &ddflag);
+       if (cluster) {
+               pmp->ihidden = hammer2_inode_get(pmp, pmp->iroot, cluster);
                hammer2_inode_ref(pmp->ihidden);
 
                /*
@@ -1370,23 +1410,22 @@ hammer2_inode_install_hidden(hammer2_pfsmount_t *pmp)
                 * any system crash.
                 */
                count = 0;
-               scan = hammer2_chain_lookup(&chain, &key_next,
-                                           0, HAMMER2_MAX_TID,
-                                           &cache_index,
-                                           HAMMER2_LOOKUP_NODATA);
+               scan = hammer2_cluster_lookup(cluster, &key_next,
+                                             0, HAMMER2_MAX_TID,
+                                             HAMMER2_LOOKUP_NODATA, &ddflag);
                while (scan) {
-                       if (scan->bref.type == HAMMER2_BREF_TYPE_INODE) {
-                               hammer2_chain_delete(&trans, scan, 0);
+                       if (hammer2_cluster_type(scan) ==
+                           HAMMER2_BREF_TYPE_INODE) {
+                               hammer2_cluster_delete(&trans, scan, 0);
                                ++count;
                        }
-                       scan = hammer2_chain_next(&chain, scan, &key_next,
-                                                  0, HAMMER2_MAX_TID,
-                                                  &cache_index,
-                                                  HAMMER2_LOOKUP_NODATA);
+                       scan = hammer2_cluster_next(cluster, scan, &key_next,
+                                                   0, HAMMER2_MAX_TID,
+                                                   HAMMER2_LOOKUP_NODATA);
                }
 
-               hammer2_inode_unlock_ex(pmp->ihidden, chain);
-               hammer2_inode_unlock_ex(pmp->iroot, parent);
+               hammer2_inode_unlock_ex(pmp->ihidden, cluster);
+               hammer2_inode_unlock_ex(pmp->iroot, cparent);
                hammer2_trans_done(&trans);
                kprintf("hammer2: PFS loaded hidden dir, "
                        "removed %d dead entries\n", count);
@@ -1396,21 +1435,21 @@ hammer2_inode_install_hidden(hammer2_pfsmount_t *pmp)
        /*
         * Create the hidden directory
         */
-       error = hammer2_chain_create(&trans, &parent, &chain,
-                                    HAMMER2_INODE_HIDDENDIR, 0,
-                                    HAMMER2_BREF_TYPE_INODE,
-                                    HAMMER2_INODE_BYTES);
-       hammer2_inode_unlock_ex(pmp->iroot, parent);
-       hammer2_chain_modify(&trans, &chain, 0);
-       ipdata = &chain->data->ipdata;
+       error = hammer2_cluster_create(&trans, cparent, &cluster,
+                                      HAMMER2_INODE_HIDDENDIR, 0,
+                                      HAMMER2_BREF_TYPE_INODE,
+                                      HAMMER2_INODE_BYTES);
+       hammer2_inode_unlock_ex(pmp->iroot, cparent);
+       hammer2_cluster_modify(&trans, cluster, 0);
+       ipdata = &hammer2_cluster_data(cluster)->ipdata;
        ipdata->type = HAMMER2_OBJTYPE_DIRECTORY;
        ipdata->inum = HAMMER2_INODE_HIDDENDIR;
        ipdata->nlinks = 1;
        kprintf("hammer2: PFS root missing hidden directory, creating\n");
 
-       pmp->ihidden = hammer2_inode_get(pmp, pmp->iroot, chain);
+       pmp->ihidden = hammer2_inode_get(pmp, pmp->iroot, cluster);
        hammer2_inode_ref(pmp->ihidden);
-       hammer2_inode_unlock_ex(pmp->ihidden, chain);
+       hammer2_inode_unlock_ex(pmp->ihidden, cluster);
        hammer2_trans_done(&trans);
 }
 
@@ -1424,61 +1463,59 @@ hammer2_inode_install_hidden(hammer2_pfsmount_t *pmp)
  */
 static
 void
-hammer2_inode_move_to_hidden(hammer2_trans_t *trans, hammer2_chain_t **chainp,
-                            hammer2_tid_t inum)
+hammer2_inode_move_to_hidden(hammer2_trans_t *trans,
+                            hammer2_cluster_t **clusterp, hammer2_tid_t inum)
 {
-       hammer2_chain_t *chain;
-       hammer2_chain_t *dchain;
+       hammer2_cluster_t *dcluster;
        hammer2_pfsmount_t *pmp;
        int error;
 
-       chain = *chainp;
-       pmp = chain->pmp;
+       pmp = (*clusterp)->pmp;
        KKASSERT(pmp != NULL);
        KKASSERT(pmp->ihidden != NULL);
-       hammer2_chain_delete(trans, chain, 0);
 
-       dchain = hammer2_inode_lock_ex(pmp->ihidden);
-        error = hammer2_inode_connect(trans, chainp, 0,
-                                      pmp->ihidden, &dchain,
+       hammer2_cluster_delete(trans, *clusterp, 0);
+       dcluster = hammer2_inode_lock_ex(pmp->ihidden);
+       error = hammer2_inode_connect(trans, clusterp, 0,
+                                     pmp->ihidden, dcluster,
                                      NULL, 0, inum);
-       hammer2_inode_unlock_ex(pmp->ihidden, dchain);
+       hammer2_inode_unlock_ex(pmp->ihidden, dcluster);
        KKASSERT(error == 0);
 }
 
 /*
- * Given an exclusively locked inode and chain we consolidate its chain
+ * Given an exclusively locked inode and cluster we consolidate its cluster
  * for hardlink creation, adding (nlinks) to the file's link count and
  * potentially relocating the inode to a directory common to ip->pip and tdip.
  *
- * Replaces (*chainp) if consolidation occurred, unlocking the old chain
- * and returning a new locked chain.
+ * Replaces (*clusterp) if consolidation occurred, unlocking the old cluster
+ * and returning a new locked cluster.
  *
- * NOTE!  This function will also replace ip->chain.
+ * NOTE!  This function will also replace ip->cluster.
  */
 int
 hammer2_hardlink_consolidate(hammer2_trans_t *trans,
-                            hammer2_inode_t *ip, hammer2_chain_t **chainp,
-                            hammer2_inode_t *cdip, hammer2_chain_t **cdchainp,
+                            hammer2_inode_t *ip,
+                            hammer2_cluster_t **clusterp,
+                            hammer2_inode_t *cdip,
+                            hammer2_cluster_t *cdcluster,
                             int nlinks)
 {
        hammer2_inode_data_t *ipdata;
-       hammer2_chain_t *chain;
-       hammer2_chain_t *nchain;
+       hammer2_cluster_t *cluster;
+       hammer2_cluster_t *ncluster;
        int error;
 
-       chain = *chainp;
+       cluster = *clusterp;
+       ipdata = &hammer2_cluster_data(cluster)->ipdata;
        if (nlinks == 0 &&                      /* no hardlink needed */
-           (chain->data->ipdata.name_key & HAMMER2_DIRHASH_VISIBLE)) {
-               return (0);
-       }
-       if (hammer2_hardlink_enable < 0) {      /* fake hardlinks */
+           (ipdata->name_key & HAMMER2_DIRHASH_VISIBLE)) {
                return (0);
        }
 
        if (hammer2_hardlink_enable == 0) {     /* disallow hardlinks */
-               hammer2_chain_unlock(chain);
-               *chainp = NULL;
+               hammer2_cluster_unlock(cluster);
+               *clusterp = NULL;
                return (ENOTSUP);
        }
 
@@ -1487,11 +1524,13 @@ hammer2_hardlink_consolidate(hammer2_trans_t *trans,
         * this is already a hardlink target, all we need to do is adjust
         * the link count.
         */
+       ipdata = &hammer2_cluster_data(cluster)->ipdata;
        if (cdip == ip->pip &&
-           (chain->data->ipdata.name_key & HAMMER2_DIRHASH_VISIBLE) == 0) {
+           (ipdata->name_key & HAMMER2_DIRHASH_VISIBLE) == 0) {
                if (nlinks) {
-                       hammer2_chain_modify(trans, &chain, 0);
-                       chain->data->ipdata.nlinks += nlinks;
+                       hammer2_cluster_modify(trans, cluster, 0);
+                       ipdata = &hammer2_cluster_data(cluster)->ipdata;
+                       ipdata->nlinks += nlinks;
                }
                error = 0;
                goto done;
@@ -1499,27 +1538,29 @@ hammer2_hardlink_consolidate(hammer2_trans_t *trans,
 
 
        /*
-        * chain is the real inode.  If it's visible we have to convert it
+        * cluster is the real inode.  If it's visible we have to convert it
         * to a hardlink pointer.  If it is not visible then it is already
         * a hardlink target and only needs to be deleted.
         */
-       KKASSERT((chain->flags & HAMMER2_CHAIN_DELETED) == 0);
-       KKASSERT(chain->data->ipdata.type != HAMMER2_OBJTYPE_HARDLINK);
-       if (chain->data->ipdata.name_key & HAMMER2_DIRHASH_VISIBLE) {
+       KKASSERT((cluster->focus->flags & HAMMER2_CHAIN_DELETED) == 0);
+       ipdata = &hammer2_cluster_data(cluster)->ipdata;
+       KKASSERT(ipdata->type != HAMMER2_OBJTYPE_HARDLINK);
+       if (ipdata->name_key & HAMMER2_DIRHASH_VISIBLE) {
                /*
-                * We are going to duplicate chain later, causing its
+                * We are going to duplicate cluster later, causing its
                 * media block to be shifted to the duplicate.  Even though
-                * we are delete-duplicating nchain here it might decide not
+                * we are delete-duplicating ncluster here it might decide not
                 * to reallocate the block.  Set FORCECOW to force it to.
                 */
-               nchain = chain;
-               hammer2_chain_lock(nchain, HAMMER2_RESOLVE_ALWAYS);
-               atomic_set_int(&nchain->flags, HAMMER2_CHAIN_FORCECOW);
-               hammer2_chain_delete_duplicate(trans, &nchain,
-                                              HAMMER2_DELDUP_RECORE);
-               KKASSERT((chain->flags & HAMMER2_CHAIN_DUPLICATED) == 0);
-
-               ipdata = &nchain->data->ipdata;
+               ncluster = cluster;
+               hammer2_cluster_lock(ncluster, HAMMER2_RESOLVE_ALWAYS);
+               hammer2_cluster_set_chainflags(ncluster,
+                                              HAMMER2_CHAIN_FORCECOW);
+               hammer2_cluster_delete_duplicate(trans, ncluster,
+                                                HAMMER2_DELDUP_RECORE);
+               KKASSERT((ncluster->focus->flags &
+                        HAMMER2_CHAIN_DUPLICATED) == 0);
+               ipdata = &hammer2_cluster_data(ncluster)->ipdata;
                ipdata->target_type = ipdata->type;
                ipdata->type = HAMMER2_OBJTYPE_HARDLINK;
                ipdata->uflags = 0;
@@ -1550,36 +1591,37 @@ hammer2_hardlink_consolidate(hammer2_trans_t *trans,
                bzero(&ipdata->u, sizeof(ipdata->u));
                /* XXX transaction ids */
        } else {
-               hammer2_chain_delete(trans, chain, 0);
-               nchain = NULL;
+               hammer2_cluster_delete(trans, cluster, 0);
+               ncluster = NULL;
        }
 
        /*
-        * chain represents the hardlink target and is now flagged deleted.
+        * cluster represents the hardlink target and is now flagged deleted.
         * duplicate it to the parent directory and adjust nlinks.
         *
-        * WARNING! The shiftup() call can cause nchain to be moved into
-        *          an indirect block, and our nchain will wind up pointing
+        * WARNING! The shiftup() call can cause ncluster to be moved into
+        *          an indirect block, and our ncluster will wind up pointing
         *          to the older/original version.
         */
-       KKASSERT(chain->flags & HAMMER2_CHAIN_DELETED);
-       hammer2_hardlink_shiftup(trans, &chain, cdip, cdchainp, nlinks, &error);
+       KKASSERT(cluster->focus->flags & HAMMER2_CHAIN_DELETED);
+       hammer2_hardlink_shiftup(trans, cluster, cdip, cdcluster,
+                                nlinks, &error);
 
        if (error == 0)
-               hammer2_inode_repoint(ip, cdip, chain);
+               hammer2_inode_repoint(ip, cdip, cluster);
 
        /*
-        * Unlock the original chain last as the lock blocked races against
+        * Unlock the original cluster last as the lock blocked races against
         * the creation of the new hardlink target.
         */
-       if (nchain)
-               hammer2_chain_unlock(nchain);
+       if (ncluster)
+               hammer2_cluster_unlock(ncluster);
 
 done:
        /*
-        * Cleanup, chain/nchain already dealt with.
+        * Cleanup, cluster/ncluster already dealt with.
         */
-       *chainp = chain;
+       *clusterp = cluster;
        hammer2_inode_drop(cdip);
 
        return (error);
@@ -1618,40 +1660,47 @@ hammer2_hardlink_deconsolidate(hammer2_trans_t *trans,
  * locked.
  */
 int
-hammer2_hardlink_find(hammer2_inode_t *dip, hammer2_chain_t **chainp,
-                     hammer2_chain_t **ochainp)
+hammer2_hardlink_find(hammer2_inode_t *dip, hammer2_cluster_t *cluster)
 {
-       hammer2_chain_t *chain = *chainp;
-       hammer2_chain_t *parent;
+       hammer2_inode_data_t *ipdata;
+       hammer2_cluster_t *cparent;
+       hammer2_cluster_t *rcluster;
        hammer2_inode_t *ip;
        hammer2_inode_t *pip;
        hammer2_key_t key_dummy;
        hammer2_key_t lhc;
-       int cache_index = -1;
+       int ddflag;
 
        pip = dip;
        hammer2_inode_ref(pip);         /* for loop */
-       hammer2_chain_ref(chain);       /* for (*ochainp) */
-       *ochainp = chain;
 
        /*
-        * Locate the hardlink.  pip is referenced and not locked,
-        * ipp.
-        *
-        * chain is reused.
+        * Locate the hardlink.  pip is referenced and not locked.
         */
-       lhc = chain->data->ipdata.inum;
-       hammer2_chain_unlock(chain);
-       chain = NULL;
+       ipdata = &hammer2_cluster_data(cluster)->ipdata;
+       lhc = ipdata->inum;
+
+       /*
+        * We don't need the cluster's chains, but we need to retain the
+        * cluster structure itself so we can load the hardlink search
+        * result into it.
+        */
+       KKASSERT(cluster->refs == 1);
+       atomic_add_int(&cluster->refs, 1);
+       hammer2_cluster_unlock(cluster);        /* hack */
+       cluster->nchains = 0;                   /* hack */
+
+       rcluster = NULL;
 
        while ((ip = pip) != NULL) {
-               parent = hammer2_inode_lock_ex(ip);
+               cparent = hammer2_inode_lock_ex(ip);
                hammer2_inode_drop(ip);                 /* loop */
-               KKASSERT(parent->bref.type == HAMMER2_BREF_TYPE_INODE);
-               chain = hammer2_chain_lookup(&parent, &key_dummy,
-                                            lhc, lhc, &cache_index, 0);
-               hammer2_chain_lookup_done(parent);      /* discard parent */
-               if (chain)
+               KKASSERT(hammer2_cluster_type(cparent) ==
+                        HAMMER2_BREF_TYPE_INODE);
+               rcluster = hammer2_cluster_lookup(cparent, &key_dummy,
+                                            lhc, lhc, 0, &ddflag);
+               hammer2_cluster_lookup_done(cparent);   /* discard parent */
+               if (rcluster)
                        break;
                pip = ip->pip;          /* safe, ip held locked */
                if (pip)
@@ -1663,14 +1712,14 @@ hammer2_hardlink_find(hammer2_inode_t *dip, hammer2_chain_t **chainp,
         * chain is locked, ip is locked.  Unlock ip, return the locked
         * chain.  *ipp is already set w/a ref count and not locked.
         *
-        * (parent is already unlocked).
+        * (cparent is already unlocked).
         */
        if (ip)
                hammer2_inode_unlock_ex(ip, NULL);
-       *chainp = chain;
-       if (chain) {
-               KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_INODE);
-               /* already locked */
+
+       if (rcluster) {
+               hammer2_cluster_replace(cluster, rcluster);
+               hammer2_cluster_drop(rcluster);
                return (0);
        } else {
                return (EIO);
@@ -1732,24 +1781,24 @@ hammer2_inode_common_parent(hammer2_inode_t *fdip, hammer2_inode_t *tdip)
  */
 void
 hammer2_inode_fsync(hammer2_trans_t *trans, hammer2_inode_t *ip, 
-                   hammer2_chain_t **chainp)
+                   hammer2_cluster_t *cparent)
 {
        hammer2_inode_data_t *ipdata;
-       hammer2_chain_t *parent;
-       hammer2_chain_t *chain;
+       hammer2_cluster_t *dparent;
+       hammer2_cluster_t *cluster;
        hammer2_key_t lbase;
        hammer2_key_t key_next;
-       int cache_index;
+       int ddflag;
 
-       ipdata = &ip->chain->data->ipdata;
+       ipdata = &hammer2_cluster_data(cparent)->ipdata;    /* target file */
 
        if (ip->flags & HAMMER2_INODE_MTIME) {
-               ipdata = hammer2_chain_modify_ip(trans, ip, chainp, 0);
+               ipdata = hammer2_cluster_modify_ip(trans, ip, cparent, 0);
                atomic_clear_int(&ip->flags, HAMMER2_INODE_MTIME);
                ipdata->mtime = ip->mtime;
        }
        if ((ip->flags & HAMMER2_INODE_RESIZED) && ip->size < ipdata->size) {
-               ipdata = hammer2_chain_modify_ip(trans, ip, chainp, 0);
+               ipdata = hammer2_cluster_modify_ip(trans, ip, cparent, 0);
                ipdata->size = ip->size;
                atomic_clear_int(&ip->flags, HAMMER2_INODE_RESIZED);
 
@@ -1759,31 +1808,35 @@ hammer2_inode_fsync(hammer2_trans_t *trans, hammer2_inode_t *ip,
                 */
                lbase = (ipdata->size + HAMMER2_PBUFMASK64) &
                        ~HAMMER2_PBUFMASK64;
-               parent = hammer2_chain_lookup_init(ip->chain, 0);
-               chain = hammer2_chain_lookup(&parent, &key_next,
-                                            lbase, (hammer2_key_t)-1,
-                                            &cache_index,
-                                            HAMMER2_LOOKUP_NODATA);
-               while (chain) {
+               dparent = hammer2_cluster_lookup_init(&ip->cluster, 0);
+               cluster = hammer2_cluster_lookup(dparent, &key_next,
+                                                lbase, (hammer2_key_t)-1,
+                                                HAMMER2_LOOKUP_NODATA,
+                                                &ddflag);
+               while (cluster) {
                        /*
                         * Degenerate embedded case, nothing to loop on
                         */
-                       if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
-                               hammer2_chain_unlock(chain);
+                       switch (hammer2_cluster_type(cluster)) {
+                       case HAMMER2_BREF_TYPE_INODE:
+                               hammer2_cluster_unlock(cluster);
+                               cluster = NULL;
                                break;
-                       }
-                       if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) {
-                               hammer2_chain_delete(trans, chain, 0);
-                       }
-                       chain = hammer2_chain_next(&parent, chain, &key_next,
+                       case HAMMER2_BREF_TYPE_DATA:
+                               hammer2_cluster_delete(trans, cluster, 0);
+                               /* fall through */
+                       default:
+                               cluster = hammer2_cluster_next(dparent, cluster,
+                                                  &key_next,
                                                   key_next, (hammer2_key_t)-1,
-                                                  &cache_index,
                                                   HAMMER2_LOOKUP_NODATA);
+                               break;
+                       }
                }
-               hammer2_chain_lookup_done(parent);
+               hammer2_cluster_lookup_done(dparent);
        } else
        if ((ip->flags & HAMMER2_INODE_RESIZED) && ip->size > ipdata->size) {
-               ipdata = hammer2_chain_modify_ip(trans, ip, chainp, 0);
+               ipdata = hammer2_cluster_modify_ip(trans, ip, cparent, 0);
                ipdata->size = ip->size;
                atomic_clear_int(&ip->flags, HAMMER2_INODE_RESIZED);
 
index 1b52ed5..b257aec 100644 (file)
@@ -451,9 +451,12 @@ hammer2_io_bread(hammer2_mount_t *hmp, off_t lbase, int lsize,
 
 void
 hammer2_io_breadcb(hammer2_mount_t *hmp, off_t lbase, int lsize,
-                 void (*callback)(hammer2_io_t *dio, hammer2_chain_t *arg_c,
+                 void (*callback)(hammer2_io_t *dio,
+                                  hammer2_cluster_t *arg_l,
+                                  hammer2_chain_t *arg_c,
                                   void *arg_p, off_t arg_o),
-                 hammer2_chain_t *arg_c, void *arg_p, off_t arg_o)
+                 hammer2_cluster_t *arg_l, hammer2_chain_t *arg_c,
+                 void *arg_p, off_t arg_o)
 {
        hammer2_io_t *dio;
        int owner;
@@ -462,6 +465,7 @@ hammer2_io_breadcb(hammer2_mount_t *hmp, off_t lbase, int lsize,
        dio = hammer2_io_getblk(hmp, lbase, lsize, &owner);
        if (owner) {
                dio->callback = callback;
+               dio->arg_l = arg_l;
                dio->arg_c = arg_c;
                dio->arg_p = arg_p;
                dio->arg_o = arg_o;
@@ -469,7 +473,7 @@ hammer2_io_breadcb(hammer2_mount_t *hmp, off_t lbase, int lsize,
                        hammer2_io_callback, dio);
        } else {
                error = 0;
-               callback(dio, arg_c, arg_p, arg_o);
+               callback(dio, arg_l, arg_c, arg_p, arg_o);
                hammer2_io_bqrelse(&dio);
        }
 }
@@ -491,7 +495,7 @@ hammer2_io_callback(struct bio *bio)
         * We still have the ref and DIO_GOOD is now set so nothing else
         * should mess with the callback fields until we release the dio.
         */
-       dio->callback(dio, dio->arg_c, dio->arg_p, dio->arg_o);
+       dio->callback(dio, dio->arg_l, dio->arg_c, dio->arg_p, dio->arg_o);
        hammer2_io_bqrelse(&dio);
        /* TODO: async load meta-data and assign chain->dio */
 }
index 05bedfc..0a5c9bc 100644 (file)
@@ -154,7 +154,7 @@ hammer2_ioctl(hammer2_inode_t *ip, u_long com, void *data, int fflag,
 static int
 hammer2_ioctl_version_get(hammer2_inode_t *ip, void *data)
 {
-       hammer2_mount_t *hmp = ip->pmp->cluster.chains[0]->hmp;
+       hammer2_mount_t *hmp = ip->pmp->cluster.focus->hmp;
        hammer2_ioc_version_t *version = data;
 
        version->version = hmp->voldata.version;
@@ -183,7 +183,7 @@ hammer2_ioctl_recluster(hammer2_inode_t *ip, void *data)
 static int
 hammer2_ioctl_remote_scan(hammer2_inode_t *ip, void *data)
 {
-       hammer2_mount_t *hmp = ip->pmp->cluster.chains[0]->hmp;
+       hammer2_mount_t *hmp = ip->pmp->cluster.focus->hmp;
        hammer2_ioc_remote_t *remote = data;
        int copyid = remote->copyid;
 
@@ -224,7 +224,7 @@ hammer2_ioctl_remote_add(hammer2_inode_t *ip, void *data)
        if (copyid >= HAMMER2_COPYID_COUNT)
                return (EINVAL);
 
-       hmp = pmp->cluster.chains[0]->hmp; /* XXX */
+       hmp = pmp->cluster.focus->hmp; /* XXX */
        hammer2_voldata_lock(hmp);
        if (copyid < 0) {
                for (copyid = 1; copyid < HAMMER2_COPYID_COUNT; ++copyid) {
@@ -257,7 +257,7 @@ hammer2_ioctl_remote_del(hammer2_inode_t *ip, void *data)
        int copyid = remote->copyid;
        int error = 0;
 
-       hmp = pmp->cluster.chains[0]->hmp; /* XXX */
+       hmp = pmp->cluster.focus->hmp; /* XXX */
        if (copyid >= HAMMER2_COPYID_COUNT)
                return (EINVAL);
        remote->copy1.path[sizeof(remote->copy1.path) - 1] = 0;
@@ -294,7 +294,7 @@ hammer2_ioctl_remote_rep(hammer2_inode_t *ip, void *data)
        hammer2_mount_t *hmp;
        int copyid = remote->copyid;
 
-       hmp = ip->pmp->cluster.chains[0]->hmp; /* XXX */
+       hmp = ip->pmp->cluster.focus->hmp; /* XXX */
 
        if (copyid < 0 || copyid >= HAMMER2_COPYID_COUNT)
                return (EINVAL);
@@ -325,7 +325,7 @@ hammer2_ioctl_socket_set(hammer2_inode_t *ip, void *data)
        hammer2_mount_t *hmp;
        int copyid = remote->copyid;
 
-       hmp = ip->pmp->cluster.chains[0]->hmp; /* XXX */
+       hmp = ip->pmp->cluster.focus->hmp; /* XXX */
        if (copyid < 0 || copyid >= HAMMER2_COPYID_COUNT)
                return (EINVAL);
 
@@ -352,49 +352,52 @@ hammer2_ioctl_pfs_get(hammer2_inode_t *ip, void *data)
        hammer2_inode_data_t *ipdata;
        hammer2_mount_t *hmp;
        hammer2_ioc_pfs_t *pfs;
-       hammer2_chain_t *parent;
-       hammer2_chain_t *chain;
-       hammer2_chain_t *rchain;
+       hammer2_cluster_t *cparent;
+       hammer2_cluster_t *rcluster;
+       hammer2_cluster_t *cluster;
        hammer2_key_t key_next;
        int error;
-       int cache_index = -1;
+       int ddflag;
 
        error = 0;
-       hmp = ip->pmp->cluster.chains[0]->hmp; /* XXX */
+       hmp = ip->pmp->cluster.focus->hmp; /* XXX */
        pfs = data;
-       parent = hammer2_inode_lock_ex(hmp->sroot);
-       rchain = hammer2_inode_lock_ex(ip->pmp->iroot);
+       cparent = hammer2_inode_lock_ex(hmp->sroot);
+       rcluster = hammer2_inode_lock_ex(ip->pmp->iroot);
 
        /*
         * Search for the first key or specific key.  Remember that keys
         * can be returned in any order.
         */
        if (pfs->name_key == 0) {
-               chain = hammer2_chain_lookup(&parent, &key_next,
-                                            0, (hammer2_key_t)-1,
-                                            &cache_index, 0);
+               cluster = hammer2_cluster_lookup(cparent, &key_next,
+                                                0, (hammer2_key_t)-1,
+                                                0, &ddflag);
        } else if (pfs->name_key == (hammer2_key_t)-1) {
-               chain = hammer2_chain_lookup(&parent, &key_next,
-                                            rchain->data->ipdata.name_key,
-                                            rchain->data->ipdata.name_key,
-                                            &cache_index, 0);
+               ipdata = &hammer2_cluster_data(rcluster)->ipdata;
+               cluster = hammer2_cluster_lookup(cparent, &key_next,
+                                                ipdata->name_key,
+                                                ipdata->name_key,
+                                                0, &ddflag);
+               ipdata = NULL;  /* safety */
        } else {
-               chain = hammer2_chain_lookup(&parent, &key_next,
-                                            pfs->name_key, pfs->name_key,
-                                            &cache_index, 0);
+               cluster = hammer2_cluster_lookup(cparent, &key_next,
+                                                pfs->name_key, pfs->name_key,
+                                                0, &ddflag);
        }
-       hammer2_inode_unlock_ex(ip->pmp->iroot, rchain);
+       hammer2_inode_unlock_ex(ip->pmp->iroot, rcluster);
 
-       while (chain && chain->bref.type != HAMMER2_BREF_TYPE_INODE) {
-               chain = hammer2_chain_next(&parent, chain, &key_next,
-                                          key_next, (hammer2_key_t)-1,
-                                          &cache_index, 0);
+       while (cluster &&
+              hammer2_cluster_type(cluster) != HAMMER2_BREF_TYPE_INODE) {
+               cluster = hammer2_cluster_next(cparent, cluster, &key_next,
+                                              key_next, (hammer2_key_t)-1,
+                                              0);
        }
-       if (chain) {
+       if (cluster) {
                /*
                 * Load the data being returned by the ioctl.
                 */
-               ipdata = &chain->data->ipdata;
+               ipdata = &hammer2_cluster_data(cluster)->ipdata;
                pfs->name_key = ipdata->name_key;
                pfs->pfs_type = ipdata->pfs_type;
                pfs->pfs_clid = ipdata->pfs_clid;
@@ -408,13 +411,17 @@ hammer2_ioctl_pfs_get(hammer2_inode_t *ip, void *data)
                 * Calculate the next field
                 */
                do {
-                       chain = hammer2_chain_next(&parent, chain, &key_next,
-                                                  0, (hammer2_key_t)-1,
-                                                  &cache_index, 0);
-               } while (chain && chain->bref.type != HAMMER2_BREF_TYPE_INODE);
-               if (chain) {
-                       pfs->name_next = chain->data->ipdata.name_key;
-                       hammer2_chain_unlock(chain);
+                       cluster = hammer2_cluster_next(cparent, cluster,
+                                                      &key_next,
+                                                      0, (hammer2_key_t)-1,
+                                                      0);
+               } while (cluster &&
+                        hammer2_cluster_type(cluster) !=
+                         HAMMER2_BREF_TYPE_INODE);
+               if (cluster) {
+                       ipdata = &hammer2_cluster_data(cluster)->ipdata;
+                       pfs->name_next = ipdata->name_key;
+                       hammer2_cluster_unlock(cluster);
                } else {
                        pfs->name_next = (hammer2_key_t)-1;
                }
@@ -422,7 +429,7 @@ hammer2_ioctl_pfs_get(hammer2_inode_t *ip, void *data)
                pfs->name_next = (hammer2_key_t)-1;
                error = ENOENT;
        }
-       hammer2_inode_unlock_ex(hmp->sroot, parent);
+       hammer2_inode_unlock_ex(hmp->sroot, cparent);
 
        return (error);
 }
@@ -436,54 +443,57 @@ hammer2_ioctl_pfs_lookup(hammer2_inode_t *ip, void *data)
        hammer2_inode_data_t *ipdata;
        hammer2_mount_t *hmp;
        hammer2_ioc_pfs_t *pfs;
-       hammer2_chain_t *parent;
-       hammer2_chain_t *chain;
+       hammer2_cluster_t *cparent;
+       hammer2_cluster_t *cluster;
        hammer2_key_t key_next;
        hammer2_key_t lhc;
        int error;
-       int cache_index = -1;
+       int ddflag;
        size_t len;
 
        error = 0;
-       hmp = ip->pmp->cluster.chains[0]->hmp; /* XXX */
+       hmp = ip->pmp->cluster.focus->hmp; /* XXX */
        pfs = data;
-       parent = hammer2_inode_lock_sh(hmp->sroot);
+       cparent = hammer2_inode_lock_sh(hmp->sroot);
 
        pfs->name[sizeof(pfs->name) - 1] = 0;
        len = strlen(pfs->name);
        lhc = hammer2_dirhash(pfs->name, len);
 
-       chain = hammer2_chain_lookup(&parent, &key_next,
-                                    lhc, lhc + HAMMER2_DIRHASH_LOMASK,
-                                    &cache_index, HAMMER2_LOOKUP_SHARED);
-       while (chain) {
-               if (chain->bref.type == HAMMER2_BREF_TYPE_INODE &&
-                   len == chain->data->ipdata.name_len &&
-                   bcmp(pfs->name, chain->data->ipdata.filename, len) == 0) {
-                       break;
+       cluster = hammer2_cluster_lookup(cparent, &key_next,
+                                        lhc, lhc + HAMMER2_DIRHASH_LOMASK,
+                                        HAMMER2_LOOKUP_SHARED, &ddflag);
+       while (cluster) {
+               if (hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE) {
+                       ipdata = &hammer2_cluster_data(cluster)->ipdata;
+                       if (ipdata->name_len == len &&
+                           bcmp(ipdata->filename, pfs->name, len) == 0) {
+                               break;
+                       }
+                       ipdata = NULL;  /* safety */
                }
-               chain = hammer2_chain_next(&parent, chain, &key_next,
+               cluster = hammer2_cluster_next(cparent, cluster, &key_next,
                                           key_next,
                                           lhc + HAMMER2_DIRHASH_LOMASK,
-                                          &cache_index, HAMMER2_LOOKUP_SHARED);
+                                          HAMMER2_LOOKUP_SHARED);
        }
 
        /*
         * Load the data being returned by the ioctl.
         */
-       if (chain) {
-               ipdata = &chain->data->ipdata;
+       if (cluster) {
+               ipdata = &hammer2_cluster_data(cluster)->ipdata;
                pfs->name_key = ipdata->name_key;
                pfs->pfs_type = ipdata->pfs_type;
                pfs->pfs_clid = ipdata->pfs_clid;
                pfs->pfs_fsid = ipdata->pfs_fsid;
                ipdata = NULL;
 
-               hammer2_chain_unlock(chain);
+               hammer2_cluster_unlock(cluster);
        } else {
                error = ENOENT;
        }
-       hammer2_inode_unlock_sh(hmp->sroot, parent);
+       hammer2_inode_unlock_sh(hmp->sroot, cparent);
 
        return (error);
 }
@@ -498,11 +508,11 @@ hammer2_ioctl_pfs_create(hammer2_inode_t *ip, void *data)
        hammer2_mount_t *hmp;
        hammer2_ioc_pfs_t *pfs;
        hammer2_inode_t *nip;
-       hammer2_chain_t *nchain;
+       hammer2_cluster_t *ncluster;
        hammer2_trans_t trans;
        int error;
 
-       hmp = ip->pmp->cluster.chains[0]->hmp; /* XXX */
+       hmp = ip->pmp->cluster.focus->hmp; /* XXX */
        pfs = data;
        nip = NULL;
 
@@ -513,9 +523,9 @@ hammer2_ioctl_pfs_create(hammer2_inode_t *ip, void *data)
        hammer2_trans_init(&trans, ip->pmp, NULL, HAMMER2_TRANS_NEWINODE);
        nip = hammer2_inode_create(&trans, hmp->sroot, NULL, NULL,
                                     pfs->name, strlen(pfs->name),
-                                    &nchain, &error);
+                                    &ncluster, &error);
        if (error == 0) {
-               nipdata = hammer2_chain_modify_ip(&trans, nip, &nchain,
+               nipdata = hammer2_cluster_modify_ip(&trans, nip, ncluster,
                                                  HAMMER2_MODIFY_ASSERTNOCOPY);
                nipdata->pfs_type = pfs->pfs_type;
                nipdata->pfs_clid = pfs->pfs_clid;
@@ -527,9 +537,10 @@ hammer2_ioctl_pfs_create(hammer2_inode_t *ip, void *data)
                 */
                if (strcmp(pfs->name, "boot") == 0)
                        nipdata->comp_algo = HAMMER2_COMP_AUTOZERO;
-               hammer2_inode_unlock_ex(nip, nchain);
+               hammer2_inode_unlock_ex(nip, ncluster);
        }
        hammer2_trans_done(&trans);
+
        return (error);
 }
 
@@ -544,7 +555,7 @@ hammer2_ioctl_pfs_delete(hammer2_inode_t *ip, void *data)
        hammer2_trans_t trans;
        int error;
 
-       hmp = ip->pmp->cluster.chains[0]->hmp; /* XXX */
+       hmp = ip->pmp->cluster.focus->hmp; /* XXX */
        hammer2_trans_init(&trans, ip->pmp, NULL, 0);
        error = hammer2_unlink_file(&trans, hmp->sroot,
                                    pfs->name, strlen(pfs->name),
@@ -559,7 +570,7 @@ hammer2_ioctl_pfs_snapshot(hammer2_inode_t *ip, void *data)
 {
        hammer2_ioc_pfs_t *pfs = data;
        hammer2_trans_t trans;
-       hammer2_chain_t *parent;
+       hammer2_cluster_t *cparent;
        int error;
 
        if (pfs->name[0] == 0)
@@ -570,9 +581,9 @@ hammer2_ioctl_pfs_snapshot(hammer2_inode_t *ip, void *data)
        hammer2_vfs_sync(ip->pmp->mp, MNT_WAIT);
 
        hammer2_trans_init(&trans, ip->pmp, NULL, HAMMER2_TRANS_NEWINODE);
-       parent = hammer2_inode_lock_ex(ip);
-       error = hammer2_chain_snapshot(&trans, &parent, pfs);
-       hammer2_inode_unlock_ex(ip, parent);
+       cparent = hammer2_inode_lock_ex(ip);
+       error = hammer2_cluster_snapshot(&trans, cparent, pfs);
+       hammer2_inode_unlock_ex(ip, cparent);
        hammer2_trans_done(&trans);
 
        return (error);
@@ -585,12 +596,14 @@ static int
 hammer2_ioctl_inode_get(hammer2_inode_t *ip, void *data)
 {
        hammer2_ioc_inode_t *ino = data;
-       hammer2_chain_t *parent;
+       hammer2_inode_data_t *ipdata;
+       hammer2_cluster_t *cparent;
 
-       parent = hammer2_inode_lock_sh(ip);
-       ino->ip_data = ip->chain->data->ipdata;
+       cparent = hammer2_inode_lock_sh(ip);
+       ipdata = &hammer2_cluster_data(cparent)->ipdata;
+       ino->ip_data = *ipdata;
        ino->kdata = ip;
-       hammer2_inode_unlock_sh(ip, parent);
+       hammer2_inode_unlock_sh(ip, cparent);
 
        return (0);
 }
@@ -604,15 +617,16 @@ hammer2_ioctl_inode_set(hammer2_inode_t *ip, void *data)
 {
        hammer2_inode_data_t *ipdata;
        hammer2_ioc_inode_t *ino = data;
-       hammer2_chain_t *chain;
+       hammer2_cluster_t *cparent;
        hammer2_trans_t trans;
        int error = 0;
 
        hammer2_trans_init(&trans, ip->pmp, NULL, 0);
-       chain = hammer2_inode_lock_ex(ip);
+       cparent = hammer2_inode_lock_ex(ip);
+       ipdata = &hammer2_cluster_data(cparent)->ipdata;
 
-       if (ino->ip_data.comp_algo != chain->data->ipdata.comp_algo) {
-               ipdata = hammer2_chain_modify_ip(&trans, ip, &chain, 0);
+       if (ino->ip_data.comp_algo != ipdata->comp_algo) {
+               ipdata = hammer2_cluster_modify_ip(&trans, ip, cparent, 0);
                ipdata->comp_algo = ino->ip_data.comp_algo;
        }
        ino->kdata = ip;
@@ -625,7 +639,7 @@ hammer2_ioctl_inode_set(hammer2_inode_t *ip, void *data)
        if (ino->flags & HAMMER2IOC_INODE_FLAG_COPIES) {
        }
        hammer2_trans_done(&trans);
-       hammer2_inode_unlock_ex(ip, chain);
+       hammer2_inode_unlock_ex(ip, cparent);
 
        return (error);
 }
index 5c934a4..0cb517a 100644 (file)
@@ -87,14 +87,12 @@ hammer2_voldata_unlock(hammer2_mount_t *hmp, int modify)
  * ip must be locked sh/ex.
  */
 int
-hammer2_get_dtype(hammer2_chain_t *chain)
+hammer2_get_dtype(hammer2_inode_data_t *ipdata)
 {
        uint8_t type;
 
-       KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_INODE);
-
-       if ((type = chain->data->ipdata.type) == HAMMER2_OBJTYPE_HARDLINK)
-               type = chain->data->ipdata.target_type;
+       if ((type = ipdata->type) == HAMMER2_OBJTYPE_HARDLINK)
+               type = ipdata->target_type;
 
        switch(type) {
        case HAMMER2_OBJTYPE_UNKNOWN:
@@ -127,11 +125,9 @@ hammer2_get_dtype(hammer2_chain_t *chain)
  * Return the directory entry type for an inode
  */
 int
-hammer2_get_vtype(hammer2_chain_t *chain)
+hammer2_get_vtype(hammer2_inode_data_t *ipdata)
 {
-       KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_INODE);
-
-       switch(chain->data->ipdata.type) {
+       switch(ipdata->type) {
        case HAMMER2_OBJTYPE_UNKNOWN:
                return (VBAD);
        case HAMMER2_OBJTYPE_DIRECTORY:
@@ -387,18 +383,19 @@ hammer2_calc_logical(hammer2_inode_t *ip, hammer2_off_t uoff,
  * Returns 0 if the requested base offset is beyond the file EOF.
  */
 int
-hammer2_calc_physical(hammer2_inode_t *ip, hammer2_key_t lbase)
+hammer2_calc_physical(hammer2_inode_t *ip, hammer2_inode_data_t *ipdata,
+                     hammer2_key_t lbase)
 {
        int lblksize;
        int pblksize;
        int eofbytes;
 
        lblksize = hammer2_calc_logical(ip, lbase, NULL, NULL);
-       if (lbase + lblksize <= ip->chain->data->ipdata.size)
+       if (lbase + lblksize <= ipdata->size)
                return (lblksize);
-       if (lbase >= ip->chain->data->ipdata.size)
+       if (lbase >= ipdata->size)
                return (0);
-       eofbytes = (int)(ip->chain->data->ipdata.size - lbase);
+       eofbytes = (int)(ipdata->size - lbase);
        pblksize = lblksize;
        while (pblksize >= eofbytes && pblksize >= HAMMER2_MIN_ALLOC)
                pblksize >>= 1;
@@ -415,3 +412,29 @@ hammer2_update_time(uint64_t *timep)
        getmicrotime(&tv);
        *timep = (unsigned long)tv.tv_sec * 1000000 + tv.tv_usec;
 }
+
+void
+hammer2_adjreadcounter(hammer2_blockref_t *bref, size_t bytes)
+{
+       long *counterp;
+
+       switch(bref->type) {
+       case HAMMER2_BREF_TYPE_DATA:
+               counterp = &hammer2_iod_file_read;
+               break;
+       case HAMMER2_BREF_TYPE_INODE:
+               counterp = &hammer2_iod_meta_read;
+               break;
+       case HAMMER2_BREF_TYPE_INDIRECT:
+               counterp = &hammer2_iod_indr_read;
+               break;
+       case HAMMER2_BREF_TYPE_FREEMAP_NODE:
+       case HAMMER2_BREF_TYPE_FREEMAP_LEAF:
+               counterp = &hammer2_iod_fmap_read;
+               break;
+       default:
+               counterp = &hammer2_iod_volu_read;
+               break;
+       }
+       *counterp += bytes;
+}
index 4edc569..2190873 100644 (file)
@@ -206,29 +206,29 @@ static void hammer2_vfs_unmount_hmp2(struct mount *mp, hammer2_mount_t *hmp);
 static void hammer2_write_file_core(struct buf *bp, hammer2_trans_t *trans,
                                hammer2_inode_t *ip,
                                hammer2_inode_data_t *ipdata,
-                               hammer2_chain_t **parentp,
+                               hammer2_cluster_t *cparent,
                                hammer2_key_t lbase, int ioflag, int pblksize,
                                int *errorp);
 static void hammer2_compress_and_write(struct buf *bp, hammer2_trans_t *trans,
                                hammer2_inode_t *ip,
                                hammer2_inode_data_t *ipdata,
-                               hammer2_chain_t **parentp,
+                               hammer2_cluster_t *cparent,
                                hammer2_key_t lbase, int ioflag,
                                int pblksize, int *errorp, int comp_algo);
 static void hammer2_zero_check_and_write(struct buf *bp,
                                hammer2_trans_t *trans, hammer2_inode_t *ip,
                                hammer2_inode_data_t *ipdata,
-                               hammer2_chain_t **parentp,
+                               hammer2_cluster_t *cparent,
                                hammer2_key_t lbase,
                                int ioflag, int pblksize, int *errorp);
 static int test_block_zeros(const char *buf, size_t bytes);
 static void zero_write(struct buf *bp, hammer2_trans_t *trans,
                                hammer2_inode_t *ip,
                                hammer2_inode_data_t *ipdata,
-                               hammer2_chain_t **parentp, 
+                               hammer2_cluster_t *cparent,
                                hammer2_key_t lbase,
                                int *errorp);
-static void hammer2_write_bp(hammer2_chain_t *chain, struct buf *bp,
+static void hammer2_write_bp(hammer2_cluster_t *cluster, struct buf *bp,
                                int ioflag, int pblksize, int *errorp);
 
 static int hammer2_rcvdmsg(kdmsg_msg_t *msg);
@@ -343,8 +343,10 @@ hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data,
        struct vnode *devvp;
        struct nlookupdata nd;
        hammer2_chain_t *parent;
-       hammer2_chain_t *schain;
        hammer2_chain_t *rchain;
+       hammer2_chain_t *schain;
+       hammer2_cluster_t *cluster;
+       hammer2_cluster_t *cparent;
        struct file *fp;
        char devstr[MNAMELEN];
        size_t size;
@@ -354,6 +356,7 @@ hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data,
        int ronly = 1;
        int error;
        int cache_index;
+       int ddflag;
        int i;
 
        hmp = NULL;
@@ -401,7 +404,7 @@ hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data,
                        /* HAMMER2 implements NFS export via mountctl */
                        pmp = MPTOPMP(mp);
                        for (i = 0; i < pmp->cluster.nchains; ++i) {
-                               hmp = pmp->cluster.chains[i]->hmp;
+                               hmp = pmp->cluster.array[i]->hmp;
                                devvp = hmp->devvp;
                                error = hammer2_remount(hmp, mp, path,
                                                        devvp, cred);
@@ -556,7 +559,7 @@ hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data,
                parent = hammer2_chain_lookup_init(&hmp->vchain, 0);
                schain = hammer2_chain_lookup(&parent, &key_dummy,
                                      HAMMER2_SROOT_KEY, HAMMER2_SROOT_KEY,
-                                     &cache_index, 0);
+                                     &cache_index, 0, &ddflag);
                hammer2_chain_lookup_done(parent);
                if (schain == NULL) {
                        kprintf("hammer2_mount: invalid super-root\n");
@@ -571,9 +574,10 @@ hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data,
                 * NOTE: inode_get sucks up schain's lock.
                 */
                atomic_set_int(&schain->flags, HAMMER2_CHAIN_PFSROOT);
-               hmp->sroot = hammer2_inode_get(NULL, NULL, schain);
+               cluster = hammer2_cluster_from_chain(schain);
+               hmp->sroot = hammer2_inode_get(NULL, NULL, cluster);
                hammer2_inode_ref(hmp->sroot);
-               hammer2_inode_unlock_ex(hmp->sroot, schain);
+               hammer2_inode_unlock_ex(hmp->sroot, cluster);
                schain = NULL;
                /* leave hmp->sroot with one ref */
 
@@ -598,6 +602,7 @@ hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data,
        RB_INIT(&pmp->inum_tree);
        TAILQ_INIT(&pmp->unlinkq);
        spin_init(&pmp->unlinkq_spin);
+       pmp->cluster.flags = HAMMER2_CLUSTER_PFS;
 
        kdmsg_iocom_init(&pmp->iocom, pmp,
                         KDMSG_IOCOMF_AUTOCONN |
@@ -634,67 +639,69 @@ hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data,
        /*
         * Lookup mount point under the media-localized super-root.
         */
-       parent = hammer2_inode_lock_ex(hmp->sroot);
+       cparent = hammer2_inode_lock_ex(hmp->sroot);
        lhc = hammer2_dirhash(label, strlen(label));
-       rchain = hammer2_chain_lookup(&parent, &key_next,
+       cluster = hammer2_cluster_lookup(cparent, &key_next,
                                      lhc, lhc + HAMMER2_DIRHASH_LOMASK,
-                                     &cache_index, 0);
-       while (rchain) {
-               if (rchain->bref.type == HAMMER2_BREF_TYPE_INODE &&
-                   strcmp(label, rchain->data->ipdata.filename) == 0) {
+                                     0, &ddflag);
+       while (cluster) {
+               if (hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE &&
+                   strcmp(label,
+                      hammer2_cluster_data(cluster)->ipdata.filename) == 0) {
                        break;
                }
-               rchain = hammer2_chain_next(&parent, rchain, &key_next,
+               cluster = hammer2_cluster_next(cparent, cluster, &key_next,
                                            key_next,
-                                           lhc + HAMMER2_DIRHASH_LOMASK,
-                                           &cache_index, 0);
+                                           lhc + HAMMER2_DIRHASH_LOMASK, 0);
        }
-       hammer2_inode_unlock_ex(hmp->sroot, parent);
+       hammer2_inode_unlock_ex(hmp->sroot, cparent);
 
-       if (rchain == NULL) {
+       if (cluster == NULL) {
                kprintf("hammer2_mount: PFS label not found\n");
                hammer2_vfs_unmount_hmp1(mp, hmp);
                hammer2_vfs_unmount_hmp2(mp, hmp);
                hammer2_vfs_unmount(mp, MNT_FORCE);
                return EINVAL;
        }
-       if (rchain->flags & HAMMER2_CHAIN_MOUNTED) {
-               hammer2_chain_unlock(rchain);
-               kprintf("hammer2_mount: PFS label already mounted!\n");
-               hammer2_vfs_unmount_hmp1(mp, hmp);
-               hammer2_vfs_unmount_hmp2(mp, hmp);
-               hammer2_vfs_unmount(mp, MNT_FORCE);
-               return EBUSY;
-       }
+
+       for (i = 0; i < cluster->nchains; ++i) {
+               rchain = cluster->array[i];
+               if (rchain->flags & HAMMER2_CHAIN_MOUNTED) {
+                       kprintf("hammer2_mount: PFS label already mounted!\n");
+                       hammer2_cluster_unlock(cluster);
+                       hammer2_vfs_unmount_hmp1(mp, hmp);
+                       hammer2_vfs_unmount_hmp2(mp, hmp);
+                       hammer2_vfs_unmount(mp, MNT_FORCE);
+                       return EBUSY;
+               }
 #if 0
-       if (rchain->flags & HAMMER2_CHAIN_RECYCLE) {
-               kprintf("hammer2_mount: PFS label currently recycling\n");
-               hammer2_vfs_unmount_hmp1(mp, hmp);
-               hammer2_vfs_unmount_hmp2(mp, hmp);
-               hammer2_vfs_unmount(mp, MNT_FORCE);
-               return EBUSY;
-       }
+               if (rchain->flags & HAMMER2_CHAIN_RECYCLE) {
+                       kprintf("hammer2_mount: PFS label is recycling\n");
+                       hammer2_cluster_unlock(cluster);
+                       hammer2_vfs_unmount_hmp1(mp, hmp);
+                       hammer2_vfs_unmount_hmp2(mp, hmp);
+                       hammer2_vfs_unmount(mp, MNT_FORCE);
+                       return EBUSY;
+               }
 #endif
+       }
+
        /*
         * After this point hammer2_vfs_unmount() has visibility on hmp
         * and manual hmp1/hmp2 calls are not needed on fatal errors.
         */
-
-       atomic_set_int(&rchain->flags, HAMMER2_CHAIN_MOUNTED);
-
-       /*
-        * NOTE: *_get() integrates chain's lock into the inode lock.
-        */
-       hammer2_chain_ref(rchain);              /* for pmp->rchain */
-       pmp->cluster.nchains = 1;
-       pmp->cluster.chains[0] = rchain;
-       pmp->iroot = hammer2_inode_get(pmp, NULL, rchain);
+       pmp->cluster = *cluster;
+       KKASSERT(pmp->cluster.refs == 1);
+       for (i = 0; i < cluster->nchains; ++i) {
+               rchain = cluster->array[i];
+               KKASSERT(rchain->pmp == NULL);  /* tracking pmp for rchain */
+               rchain->pmp = pmp;
+               atomic_set_int(&rchain->flags, HAMMER2_CHAIN_MOUNTED);
+               hammer2_chain_ref(rchain);      /* ref for pmp->cluster */
+       }
+       pmp->iroot = hammer2_inode_get(pmp, NULL, cluster);
        hammer2_inode_ref(pmp->iroot);          /* ref for pmp->iroot */
-
-       KKASSERT(rchain->pmp == NULL);          /* tracking pmp for rchain */
-       rchain->pmp = pmp;
-
-       hammer2_inode_unlock_ex(pmp->iroot, rchain);
+       hammer2_inode_unlock_ex(pmp->iroot, cluster);
 
        kprintf("iroot %p\n", pmp->iroot);
 
@@ -761,8 +768,7 @@ hammer2_write_thread(void *arg)
        hammer2_trans_t trans;
        struct vnode *vp;
        hammer2_inode_t *ip;
-       hammer2_chain_t *parent;
-       hammer2_chain_t **parentp;
+       hammer2_cluster_t *cparent;
        hammer2_inode_data_t *ipdata;
        hammer2_key_t lbase;
        int lblksize;
@@ -777,8 +783,7 @@ hammer2_write_thread(void *arg)
                        mtxsleep(&pmp->wthread_bioq, &pmp->wthread_mtx,
                                 0, "h2bioqw", 0);
                }
-               parent = NULL;
-               parentp = &parent;
+               cparent = NULL;
 
                hammer2_trans_init(&trans, pmp, NULL, HAMMER2_TRANS_BUFCACHE);
 
@@ -817,21 +822,21 @@ hammer2_write_thread(void *arg)
                         *       inode's meta-data state, it doesn't try
                         *       to flush underlying buffers or chains.
                         */
-                       parent = hammer2_inode_lock_ex(ip);
+                       cparent = hammer2_inode_lock_ex(ip);
                        if (ip->flags & (HAMMER2_INODE_RESIZED |
                                         HAMMER2_INODE_MTIME)) {
-                               hammer2_inode_fsync(&trans, ip, parentp);
+                               hammer2_inode_fsync(&trans, ip, cparent);
                        }
-                       ipdata = hammer2_chain_modify_ip(&trans, ip,
-                                                        parentp, 0);
+                       ipdata = hammer2_cluster_modify_ip(&trans, ip,
+                                                        cparent, 0);
                        lblksize = hammer2_calc_logical(ip, bio->bio_offset,
                                                        &lbase, NULL);
-                       pblksize = hammer2_calc_physical(ip, lbase);
+                       pblksize = hammer2_calc_physical(ip, ipdata, lbase);
                        hammer2_write_file_core(bp, &trans, ip, ipdata,
-                                               parentp,
+                                               cparent,
                                                lbase, IO_ASYNC,
                                                pblksize, &error);
-                       hammer2_inode_unlock_ex(ip, parent);
+                       hammer2_inode_unlock_ex(ip, cparent);
                        if (error) {
                                kprintf("hammer2: error in buffer write\n");
                                bp->b_flags |= B_ERROR;
@@ -869,17 +874,15 @@ hammer2_bioq_sync(hammer2_pfsmount_t *pmp)
  * and assigning its physical block.
  */
 static
-hammer2_chain_t *
+hammer2_cluster_t *
 hammer2_assign_physical(hammer2_trans_t *trans,
-                       hammer2_inode_t *ip, hammer2_chain_t **parentp,
+                       hammer2_inode_t *ip, hammer2_cluster_t *cparent,
                        hammer2_key_t lbase, int pblksize, int *errorp)
 {
-       hammer2_chain_t *parent;
-       hammer2_chain_t *chain;
-       hammer2_off_t pbase;
+       hammer2_cluster_t *cluster;
        hammer2_key_t key_dummy;
        int pradix = hammer2_getradix(pblksize);
-       int cache_index = -1;
+       int ddflag;
 
        /*
         * Locate the chain associated with lbase, return a locked chain.
@@ -890,34 +893,31 @@ hammer2_assign_physical(hammer2_trans_t *trans,
        *errorp = 0;
        KKASSERT(pblksize >= HAMMER2_MIN_ALLOC);
 retry:
-       parent = *parentp;
-       hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS); /* extra lock */
-       chain = hammer2_chain_lookup(&parent, &key_dummy,
+       hammer2_cluster_lock(cparent, HAMMER2_RESOLVE_ALWAYS); /* extra lock */
+       cluster = hammer2_cluster_lookup(cparent, &key_dummy,
                                     lbase, lbase,
-                                    &cache_index, HAMMER2_LOOKUP_NODATA);
+                                    HAMMER2_LOOKUP_NODATA, &ddflag);
 
-       if (chain == NULL) {
+       if (cluster == NULL) {
                /*
                 * We found a hole, create a new chain entry.
                 *
                 * NOTE: DATA chains are created without device backing
                 *       store (nor do we want any).
                 */
-               *errorp = hammer2_chain_create(trans, &parent, &chain,
+               *errorp = hammer2_cluster_create(trans, cparent, &cluster,
                                               lbase, HAMMER2_PBUFRADIX,
                                               HAMMER2_BREF_TYPE_DATA,
                                               pblksize);
-               if (chain == NULL) {
-                       hammer2_chain_lookup_done(parent);
-                       panic("hammer2_chain_create: par=%p error=%d\n",
-                               parent, *errorp);
+               if (cluster == NULL) {
+                       hammer2_cluster_lookup_done(cparent);
+                       panic("hammer2_cluster_create: par=%p error=%d\n",
+                               cparent->focus, *errorp);
                        goto retry;
                }
-
-               pbase = chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX;
                /*ip->delta_dcount += pblksize;*/
        } else {
-               switch (chain->bref.type) {
+               switch (hammer2_cluster_type(cluster)) {
                case HAMMER2_BREF_TYPE_INODE:
                        /*
                         * The data is embedded in the inode.  The
@@ -925,43 +925,35 @@ retry:
                         * modified and copying the data to the embedded
                         * area.
                         */
-                       pbase = NOOFFSET;
                        break;
                case HAMMER2_BREF_TYPE_DATA:
-                       if (chain->bytes != pblksize) {
-                               hammer2_chain_resize(trans, ip,
-                                                    parent, &chain,
+                       if (hammer2_cluster_bytes(cluster) != pblksize) {
+                               hammer2_cluster_resize(trans, ip,
+                                                    cparent, cluster,
                                                     pradix,
                                                     HAMMER2_MODIFY_OPTDATA);
                        }
-                       hammer2_chain_modify(trans, &chain,
+                       hammer2_cluster_modify(trans, cluster,
                                             HAMMER2_MODIFY_OPTDATA);
-                       pbase = chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX;
                        break;
                default:
                        panic("hammer2_assign_physical: bad type");
                        /* NOT REACHED */
-                       pbase = NOOFFSET;
                        break;
                }
        }
 
        /*
         * Cleanup.  If chain wound up being the inode (i.e. DIRECTDATA),
-        * we might have to replace *parentp.
+        * we need to update cparent.  The caller expects cparent to not
+        * become stale.
         */
-       hammer2_chain_lookup_done(parent);
-       if (chain) {
-               if (*parentp != chain &&
-                   (*parentp)->core == chain->core) {
-                       parent = *parentp;
-                       *parentp = chain;               /* eats lock */
-                       hammer2_chain_unlock(parent);
-                       hammer2_chain_lock(chain, 0);   /* need another */
-               }
-               /* else chain already locked for return */
+       hammer2_cluster_lookup_done(cparent);
+       if (cluster && ddflag) {
+               kprintf("replace parent XXX\n");
+               hammer2_cluster_replace_locked(cparent, cluster);
        }
-       return (chain);
+       return (cluster);
 }
 
 /* 
@@ -973,11 +965,11 @@ static
 void
 hammer2_write_file_core(struct buf *bp, hammer2_trans_t *trans,
                        hammer2_inode_t *ip, hammer2_inode_data_t *ipdata,
-                       hammer2_chain_t **parentp,
+                       hammer2_cluster_t *cparent,
                        hammer2_key_t lbase, int ioflag, int pblksize,
                        int *errorp)
 {
-       hammer2_chain_t *chain;
+       hammer2_cluster_t *cluster;
 
        switch(HAMMER2_DEC_COMP(ipdata->comp_algo)) {
        case HAMMER2_COMP_NONE:
@@ -989,19 +981,19 @@ hammer2_write_file_core(struct buf *bp, hammer2_trans_t *trans,
                 * This can return NOOFFSET for inode-embedded data.
                 * The strategy code will take care of it in that case.
                 */
-               chain = hammer2_assign_physical(trans, ip, parentp,
+               cluster = hammer2_assign_physical(trans, ip, cparent,
                                                lbase, pblksize,
                                                errorp);
-               hammer2_write_bp(chain, bp, ioflag, pblksize, errorp);
-               if (chain)
-                       hammer2_chain_unlock(chain);
+               hammer2_write_bp(cluster, bp, ioflag, pblksize, errorp);
+               if (cluster)
+                       hammer2_cluster_unlock(cluster);
                break;
        case HAMMER2_COMP_AUTOZERO:
                /*
                 * Check for zero-fill only
                 */
                hammer2_zero_check_and_write(bp, trans, ip,
-                                   ipdata, parentp, lbase,
+                                   ipdata, cparent, lbase,
                                    ioflag, pblksize, errorp);
                break;
        case HAMMER2_COMP_LZ4:
@@ -1011,17 +1003,15 @@ hammer2_write_file_core(struct buf *bp, hammer2_trans_t *trans,
                 * Check for zero-fill and attempt compression.
                 */
                hammer2_compress_and_write(bp, trans, ip,
-                                          ipdata, parentp,
+                                          ipdata, cparent,
                                           lbase, ioflag,
                                           pblksize, errorp,
                                           ipdata->comp_algo);
                break;
        }
-       /* ipdata = &ip->chain->data->ipdata;  reload (not needed here) */
 }
 
 /*
- * From hammer2_vnops.c
  * Generic function that will perform the compression in compression
  * write path. The compression algorithm is determined by the settings
  * obtained from inode.
@@ -1030,17 +1020,19 @@ static
 void
 hammer2_compress_and_write(struct buf *bp, hammer2_trans_t *trans,
        hammer2_inode_t *ip, hammer2_inode_data_t *ipdata,
-       hammer2_chain_t **parentp,
+       hammer2_cluster_t *cparent,
        hammer2_key_t lbase, int ioflag, int pblksize,
        int *errorp, int comp_algo)
 {
+       hammer2_cluster_t *cluster;
        hammer2_chain_t *chain;
        int comp_size;
        int comp_block_size;
+       int i;
        char *comp_buffer;
 
        if (test_block_zeros(bp->b_data, pblksize)) {
-               zero_write(bp, trans, ip, ipdata, parentp, lbase, errorp);
+               zero_write(bp, trans, ip, ipdata, cparent, lbase, errorp);
                return;
        }
 
@@ -1140,21 +1132,24 @@ hammer2_compress_and_write(struct buf *bp, hammer2_trans_t *trans,
                }
        }
 
-       chain = hammer2_assign_physical(trans, ip, parentp,
-                                       lbase, comp_block_size,
-                                       errorp);
-       ipdata = &ip->chain->data->ipdata;      /* RELOAD */
+       cluster = hammer2_assign_physical(trans, ip, cparent,
+                                         lbase, comp_block_size,
+                                         errorp);
+       ipdata = &hammer2_cluster_data(&ip->cluster)->ipdata;
 
        if (*errorp) {
                kprintf("WRITE PATH: An error occurred while "
                        "assigning physical space.\n");
-               KKASSERT(chain == NULL);
-       } else {
-               /* Get device offset */
+               KKASSERT(cluster == NULL);
+               goto done;
+       }
+
+       for (i = 0; i < cluster->nchains; ++i) {
                hammer2_io_t *dio;
                char *bdata;
                int temp_check;
 
+               chain = cluster->array[i];
                KKASSERT(chain->flags & HAMMER2_CHAIN_MODIFIED);
 
                switch(chain->bref.type) {
@@ -1237,6 +1232,7 @@ hammer2_compress_and_write(struct buf *bp, hammer2_trans_t *trans,
 
                hammer2_chain_unlock(chain);
        }
+done:
        if (comp_buffer)
                objcache_put(cache_buffer_write, comp_buffer);
 }
@@ -1249,19 +1245,19 @@ static
 void
 hammer2_zero_check_and_write(struct buf *bp, hammer2_trans_t *trans,
        hammer2_inode_t *ip, hammer2_inode_data_t *ipdata,
-       hammer2_chain_t **parentp,
+       hammer2_cluster_t *cparent,
        hammer2_key_t lbase, int ioflag, int pblksize, int *errorp)
 {
-       hammer2_chain_t *chain;
+       hammer2_cluster_t *cluster;
 
        if (test_block_zeros(bp->b_data, pblksize)) {
-               zero_write(bp, trans, ip, ipdata, parentp, lbase, errorp);
+               zero_write(bp, trans, ip, ipdata, cparent, lbase, errorp);
        } else {
-               chain = hammer2_assign_physical(trans, ip, parentp,
-                                               lbase, pblksize, errorp);
-               hammer2_write_bp(chain, bp, ioflag, pblksize, errorp);
-               if (chain)
-                       hammer2_chain_unlock(chain);
+               cluster = hammer2_assign_physical(trans, ip, cparent,
+                                                 lbase, pblksize, errorp);
+               hammer2_write_bp(cluster, bp, ioflag, pblksize, errorp);
+               if (cluster)
+                       hammer2_cluster_unlock(cluster);
        }
 }
 
@@ -1288,28 +1284,28 @@ test_block_zeros(const char *buf, size_t bytes)
 static
 void
 zero_write(struct buf *bp, hammer2_trans_t *trans, hammer2_inode_t *ip,
-       hammer2_inode_data_t *ipdata, hammer2_chain_t **parentp,
+       hammer2_inode_data_t *ipdata, hammer2_cluster_t *cparent,
        hammer2_key_t lbase, int *errorp __unused)
 {
-       hammer2_chain_t *parent;
-       hammer2_chain_t *chain;
+       hammer2_cluster_t *cluster;
+       hammer2_media_data_t *data;
        hammer2_key_t key_dummy;
-       int cache_index = -1;
+       int ddflag;
 
-       parent = hammer2_chain_lookup_init(*parentp, 0);
+       cparent = hammer2_cluster_lookup_init(cparent, 0);
+       cluster = hammer2_cluster_lookup(cparent, &key_dummy, lbase, lbase,
+                                    HAMMER2_LOOKUP_NODATA, &ddflag);
+       if (cluster) {
+               data = hammer2_cluster_data(cluster);
 
-       chain = hammer2_chain_lookup(&parent, &key_dummy, lbase, lbase,
-                                    &cache_index, HAMMER2_LOOKUP_NODATA);
-       if (chain) {
-               if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
-                       bzero(chain->data->ipdata.u.data,
-                             HAMMER2_EMBEDDED_BYTES);
+               if (ddflag) {
+                       bzero(data->ipdata.u.data, HAMMER2_EMBEDDED_BYTES);
                } else {
-                       hammer2_chain_delete(trans, chain, 0);
+                       hammer2_cluster_delete(trans, cluster, 0);
                }
-               hammer2_chain_unlock(chain);
+               hammer2_cluster_unlock(cluster);
        }
-       hammer2_chain_lookup_done(parent);
+       hammer2_cluster_lookup_done(cparent);
 }
 
 /*
@@ -1319,66 +1315,81 @@ zero_write(struct buf *bp, hammer2_trans_t *trans, hammer2_inode_t *ip,
  */
 static
 void
-hammer2_write_bp(hammer2_chain_t *chain, struct buf *bp, int ioflag,
+hammer2_write_bp(hammer2_cluster_t *cluster, struct buf *bp, int ioflag,
                                int pblksize, int *errorp)
 {
+       hammer2_chain_t *chain;
        hammer2_io_t *dio;
        char *bdata;
        int error;
-       int temp_check = HAMMER2_DEC_CHECK(chain->bref.methods);
+       int i;
+       int temp_check;
 
-       KKASSERT(chain->flags & HAMMER2_CHAIN_MODIFIED);
+       error = 0;      /* XXX TODO below */
 
-       switch(chain->bref.type) {
-       case HAMMER2_BREF_TYPE_INODE:
-               KKASSERT(chain->data->ipdata.op_flags &
-                        HAMMER2_OPFLAG_DIRECTDATA);
-               KKASSERT(bp->b_loffset == 0);
-               bcopy(bp->b_data, chain->data->ipdata.u.data,
-                     HAMMER2_EMBEDDED_BYTES);
-               error = 0;
-               break;
-       case HAMMER2_BREF_TYPE_DATA:
-               error = hammer2_io_newnz(chain->hmp, chain->bref.data_off,
-                                        chain->bytes, &dio);
-               if (error) {
-                       hammer2_io_bqrelse(&dio);
-                       kprintf("hammer2: WRITE PATH: dbp bread error\n");
+       for (i = 0; i < cluster->nchains; ++i) {
+               chain = cluster->array[i];
+
+               temp_check = HAMMER2_DEC_CHECK(chain->bref.methods);
+
+               KKASSERT(chain->flags & HAMMER2_CHAIN_MODIFIED);
+
+               switch(chain->bref.type) {
+               case HAMMER2_BREF_TYPE_INODE:
+                       KKASSERT(chain->data->ipdata.op_flags &
+                                HAMMER2_OPFLAG_DIRECTDATA);
+                       KKASSERT(bp->b_loffset == 0);
+                       bcopy(bp->b_data, chain->data->ipdata.u.data,
+                             HAMMER2_EMBEDDED_BYTES);
+                       error = 0;
                        break;
-               }
-               bdata = hammer2_io_data(dio, chain->bref.data_off);
+               case HAMMER2_BREF_TYPE_DATA:
+                       error = hammer2_io_newnz(chain->hmp,
+                                                chain->bref.data_off,
+                                                chain->bytes, &dio);
+                       if (error) {
+                               hammer2_io_bqrelse(&dio);
+                               kprintf("hammer2: WRITE PATH: "
+                                       "dbp bread error\n");
+                               break;
+                       }
+                       bdata = hammer2_io_data(dio, chain->bref.data_off);
 
-               chain->bref.methods = HAMMER2_ENC_COMP(HAMMER2_COMP_NONE) +
-                                     HAMMER2_ENC_CHECK(temp_check);
-               bcopy(bp->b_data, bdata, chain->bytes);
-               
-               /*
-                * Device buffer is now valid, chain is no
-                * longer in the initial state.
-                */
-               atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL);
+                       chain->bref.methods = HAMMER2_ENC_COMP(
+                                                       HAMMER2_COMP_NONE) +
+                                             HAMMER2_ENC_CHECK(temp_check);
+                       bcopy(bp->b_data, bdata, chain->bytes);
 
-               if (ioflag & IO_SYNC) {
                        /*
-                        * Synchronous I/O requested.
+                        * Device buffer is now valid, chain is no
+                        * longer in the initial state.
                         */
-                       hammer2_io_bwrite(&dio);
-               /*
-               } else if ((ioflag & IO_DIRECT) && loff + n == pblksize) {
-                       hammer2_io_bdwrite(&dio);
-               */
-               } else if (ioflag & IO_ASYNC) {
-                       hammer2_io_bawrite(&dio);
-               } else {
-                       hammer2_io_bdwrite(&dio);
+                       atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL);
+
+                       if (ioflag & IO_SYNC) {
+                               /*
+                                * Synchronous I/O requested.
+                                */
+                               hammer2_io_bwrite(&dio);
+                       /*
+                       } else if ((ioflag & IO_DIRECT) &&
+                                  loff + n == pblksize) {
+                               hammer2_io_bdwrite(&dio);
+                       */
+                       } else if (ioflag & IO_ASYNC) {
+                               hammer2_io_bawrite(&dio);
+                       } else {
+                               hammer2_io_bdwrite(&dio);
+                       }
+                       break;
+               default:
+                       panic("hammer2_write_bp: bad chain type %d\n",
+                             chain->bref.type);
+                       /* NOT REACHED */
+                       error = 0;
+                       break;
                }
-               break;
-       default:
-               panic("hammer2_write_bp: bad chain type %d\n",
-                     chain->bref.type);
-               /* NOT REACHED */
-               error = 0;
-               break;
+               KKASSERT(error == 0);   /* XXX TODO */
        }
        *errorp = error;
 }
@@ -1472,11 +1483,11 @@ hammer2_vfs_unmount(struct mount *mp, int mntflags)
        }
 
        for (i = 0; i < pmp->cluster.nchains; ++i) {
-               hmp = pmp->cluster.chains[i]->hmp;
+               hmp = pmp->cluster.array[i]->hmp;
 
                hammer2_vfs_unmount_hmp1(mp, hmp);
 
-               rchain = pmp->cluster.chains[i];
+               rchain = pmp->cluster.array[i];
                if (rchain) {
                        atomic_clear_int(&rchain->flags, HAMMER2_CHAIN_MOUNTED);
 #if REPORT_REFS_ERRORS
@@ -1487,7 +1498,7 @@ hammer2_vfs_unmount(struct mount *mp, int mntflags)
                        KKASSERT(rchain->refs == 1);
 #endif
                        hammer2_chain_drop(rchain);
-                       pmp->cluster.chains[i] = NULL;
+                       pmp->cluster.array[i] = NULL;
                }
 
                hammer2_vfs_unmount_hmp2(mp, hmp);
@@ -1642,7 +1653,7 @@ int
 hammer2_vfs_root(struct mount *mp, struct vnode **vpp)
 {
        hammer2_pfsmount_t *pmp;
-       hammer2_chain_t *parent;
+       hammer2_cluster_t *cparent;
        int error;
        struct vnode *vp;
 
@@ -1651,9 +1662,9 @@ hammer2_vfs_root(struct mount *mp, struct vnode **vpp)
                *vpp = NULL;
                error = EINVAL;
        } else {
-               parent = hammer2_inode_lock_sh(pmp->iroot);
+               cparent = hammer2_inode_lock_sh(pmp->iroot);
                vp = hammer2_igetv(pmp->iroot, &error);
-               hammer2_inode_unlock_sh(pmp->iroot, parent);
+               hammer2_inode_unlock_sh(pmp->iroot, cparent);
                *vpp = vp;
                if (vp == NULL)
                        kprintf("vnodefail\n");
@@ -1676,7 +1687,7 @@ hammer2_vfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred)
 
        pmp = MPTOPMP(mp);
        KKASSERT(pmp->cluster.nchains >= 1);
-       hmp = pmp->cluster.chains[0]->hmp;      /* XXX */
+       hmp = pmp->cluster.focus->hmp;  /* XXX */
 
        mp->mnt_stat.f_files = pmp->inode_count;
        mp->mnt_stat.f_ffree = 0;
@@ -1697,7 +1708,7 @@ hammer2_vfs_statvfs(struct mount *mp, struct statvfs *sbp, struct ucred *cred)
 
        pmp = MPTOPMP(mp);
        KKASSERT(pmp->cluster.nchains >= 1);
-       hmp = pmp->cluster.chains[0]->hmp;      /* XXX */
+       hmp = pmp->cluster.focus->hmp;  /* XXX */
 
        mp->mnt_vstat.f_bsize = HAMMER2_PBUFSIZE;
        mp->mnt_vstat.f_files = pmp->inode_count;
@@ -1953,7 +1964,7 @@ hammer2_vfs_sync(struct mount *mp, int waitfor)
 
        total_error = 0;
        for (i = 0; i < pmp->cluster.nchains; ++i) {
-               hmp = pmp->cluster.chains[i]->hmp;
+               hmp = pmp->cluster.array[i]->hmp;
 
                /*
                 * Media mounts have two 'roots', vchain for the topology
@@ -2252,11 +2263,11 @@ void
 hammer2_cluster_reconnect(hammer2_pfsmount_t *pmp, struct file *fp)
 {
        hammer2_inode_data_t *ipdata;
-       hammer2_chain_t *parent;
+       hammer2_cluster_t *cparent;
        hammer2_mount_t *hmp;
        size_t name_len;
 
-       hmp = pmp->cluster.chains[0]->hmp;      /* XXX */
+       hmp = pmp->cluster.focus->hmp;  /* XXX */
 
        /*
         * Closes old comm descriptor, kills threads, cleans up
@@ -2268,8 +2279,8 @@ hammer2_cluster_reconnect(hammer2_pfsmount_t *pmp, struct file *fp)
        /*
         * Setup LNK_CONN fields for autoinitiated state machine
         */
-       parent = hammer2_inode_lock_ex(pmp->iroot);
-       ipdata = &parent->data->ipdata;
+       cparent = hammer2_inode_lock_ex(pmp->iroot);
+       ipdata = &hammer2_cluster_data(cparent)->ipdata;
        pmp->iocom.auto_lnk_conn.pfs_clid = ipdata->pfs_clid;
        pmp->iocom.auto_lnk_conn.pfs_fsid = ipdata->pfs_fsid;
        pmp->iocom.auto_lnk_conn.pfs_type = ipdata->pfs_type;
@@ -2315,7 +2326,7 @@ hammer2_cluster_reconnect(hammer2_pfsmount_t *pmp, struct file *fp)
              pmp->iocom.auto_lnk_span.fs_label,
              name_len);
        pmp->iocom.auto_lnk_span.fs_label[name_len] = 0;
-       hammer2_inode_unlock_ex(pmp->iroot, parent);
+       hammer2_inode_unlock_ex(pmp->iroot, cparent);
 
        kdmsg_iocom_autoinitiate(&pmp->iocom, hammer2_autodmsg);
 }
@@ -2369,7 +2380,7 @@ static void
 hammer2_autodmsg(kdmsg_msg_t *msg)
 {
        hammer2_pfsmount_t *pmp = msg->iocom->handle;
-       hammer2_mount_t *hmp = pmp->cluster.chains[0]->hmp; /* XXX */
+       hammer2_mount_t *hmp = pmp->cluster.focus->hmp; /* XXX */
        int copyid;
 
        /*
@@ -2412,7 +2423,7 @@ hammer2_autodmsg(kdmsg_msg_t *msg)
 void
 hammer2_volconf_update(hammer2_pfsmount_t *pmp, int index)
 {
-       hammer2_mount_t *hmp = pmp->cluster.chains[0]->hmp;     /* XXX */
+       hammer2_mount_t *hmp = pmp->cluster.focus->hmp; /* XXX */
        kdmsg_msg_t *msg;
 
        /* XXX interlock against connection state termination */
@@ -2476,6 +2487,100 @@ hammer2_lwinprog_wait(hammer2_pfsmount_t *pmp)
        }
 }
 
+/*
+ * Manage excessive memory resource use for chain and related
+ * structures.
+ */
+void
+hammer2_pfs_memory_wait(hammer2_pfsmount_t *pmp)
+{
+       long waiting;
+       long count;
+       long limit;
+#if 0
+       static int zzticks;
+#endif
+
+       /*
+        * Atomic check condition and wait.  Also do an early speedup of
+        * the syncer to try to avoid hitting the wait.
+        */
+       for (;;) {
+               waiting = pmp->inmem_dirty_chains;
+               cpu_ccfence();
+               count = waiting & HAMMER2_DIRTYCHAIN_MASK;
+
+               limit = pmp->mp->mnt_nvnodelistsize / 10;
+               if (limit < hammer2_limit_dirty_chains)
+                       limit = hammer2_limit_dirty_chains;
+               if (limit < 1000)
+                       limit = 1000;
+
+#if 0
+               if ((int)(ticks - zzticks) > hz) {
+                       zzticks = ticks;
+                       kprintf("count %ld %ld\n", count, limit);
+               }
+#endif
+
+               /*
+                * Block if there are too many dirty chains present, wait
+                * for the flush to clean some out.
+                */
+               if (count > limit) {
+                       tsleep_interlock(&pmp->inmem_dirty_chains, 0);
+                       if (atomic_cmpset_long(&pmp->inmem_dirty_chains,
+                                              waiting,
+                                      waiting | HAMMER2_DIRTYCHAIN_WAITING)) {
+                               speedup_syncer(pmp->mp);
+                               tsleep(&pmp->inmem_dirty_chains, PINTERLOCKED,
+                                      "chnmem", hz);
+                       }
+                       continue;       /* loop on success or fail */
+               }
+
+               /*
+                * Try to start an early flush before we are forced to block.
+                */
+               if (count > limit * 7 / 10)
+                       speedup_syncer(pmp->mp);
+               break;
+       }
+}
+
+void
+hammer2_pfs_memory_inc(hammer2_pfsmount_t *pmp)
+{
+       if (pmp)
+               atomic_add_long(&pmp->inmem_dirty_chains, 1);
+}
+
+void
+hammer2_pfs_memory_wakeup(hammer2_pfsmount_t *pmp)
+{
+       long waiting;
+
+       if (pmp == NULL)
+               return;
+
+       for (;;) {
+               waiting = pmp->inmem_dirty_chains;
+               cpu_ccfence();
+               if (atomic_cmpset_long(&pmp->inmem_dirty_chains,
+                                      waiting,
+                                      (waiting - 1) &
+                                       ~HAMMER2_DIRTYCHAIN_WAITING)) {
+                       break;
+               }
+       }
+
+       if (waiting & HAMMER2_DIRTYCHAIN_WAITING)
+               wakeup(&pmp->inmem_dirty_chains);
+}
+
+/*
+ * Debugging
+ */
 void
 hammer2_dump_chain(hammer2_chain_t *chain, int tab, int *countp, char pfx)
 {
index 361bc2a..68d97d3 100644 (file)
@@ -70,12 +70,6 @@ static int hammer2_write_file(hammer2_inode_t *ip, struct uio *uio,
                                int ioflag, int seqcount);
 static void hammer2_extend_file(hammer2_inode_t *ip, hammer2_key_t nsize);
 static void hammer2_truncate_file(hammer2_inode_t *ip, hammer2_key_t nsize);
-static void hammer2_decompress_LZ4_callback(hammer2_io_t *dio,
-                               hammer2_chain_t *arg_c,
-                               void *arg_p, off_t arg_o);
-static void hammer2_decompress_ZLIB_callback(hammer2_io_t *dio,
-                               hammer2_chain_t *arg_c,
-                               void *arg_p, off_t arg_o);
 
 struct objcache *cache_buffer_read;
 struct objcache *cache_buffer_write;
@@ -85,72 +79,44 @@ struct objcache *cache_buffer_write;
  */
 static
 void
-hammer2_decompress_LZ4_callback(hammer2_io_t *dio, hammer2_chain_t *arg_c,
-                               void *arg_p, off_t arg_o)
+hammer2_decompress_LZ4_callback(const char *data, u_int bytes, struct bio *bio)
 {
-       struct buf *obp;
-       struct bio *obio = arg_p;
-       char *bdata;
-       int bytes = 1 << (int)(arg_o & HAMMER2_OFF_MASK_RADIX);
+       struct buf *bp;
+       char *compressed_buffer;
+       int compressed_size;
+       int result;
 
-       /*
-        * If BIO_DONE is already set the device buffer was already
-        * fully valid (B_CACHE).  If it is not set then I/O was issued
-        * and we have to run I/O completion as the last bio.
-        *
-        * Nobody is waiting for our device I/O to complete, we are
-        * responsible for bqrelse()ing it which means we also have to do
-        * the equivalent of biowait() and clear BIO_DONE (which breadcb()
-        * may have set).
-        *
-        * Any preexisting device buffer should match the requested size,
-        * but due to bigblock recycling and other factors there is some
-        * fragility there, so we assert that the device buffer covers
-        * the request.
-        */
-       obp = obio->bio_buf;
+       bp = bio->bio_buf;
 
-       if (dio->bp->b_flags & B_ERROR) {
-               obp->b_flags |= B_ERROR;
-               obp->b_error = dio->bp->b_error;
 #if 0
-       } else if (obio->bio_caller_info2.index &&
-                  obio->bio_caller_info1.uvalue32 !=
-                   crc32(bp->b_data, bp->b_bufsize)) {
-               obp->b_flags |= B_ERROR;
-               obp->b_error = EIO;
+       if bio->bio_caller_info2.index &&
+             bio->bio_caller_info1.uvalue32 !=
+             crc32(bp->b_data, bp->b_bufsize) --- return error
 #endif
-       } else {
-               char *compressed_buffer;
-               int *compressed_size;
-               int result;
-
-               KKASSERT(obp->b_bufsize <= HAMMER2_PBUFSIZE);
-               bdata = hammer2_io_data(dio, arg_o);
-               compressed_size = (int *)bdata;
-               compressed_buffer = objcache_get(cache_buffer_read, M_INTWAIT);
-               KKASSERT((unsigned int)*compressed_size <= HAMMER2_PBUFSIZE);
-               result = LZ4_decompress_safe(&bdata[sizeof(int)],
-                                            compressed_buffer,
-                                            *compressed_size,
-                                            obp->b_bufsize);
-               if (result < 0) {
-                       kprintf("READ PATH: Error during decompression."
-                               "bio %016jx/%d log %016jx/%d\n",
-                               (intmax_t)dio->pbase, dio->psize,
-                               (intmax_t)arg_o, bytes);
-                       /* make sure it isn't random garbage */
-                       bzero(compressed_buffer, obp->b_bufsize);
-               }
-               KKASSERT(result <= obp->b_bufsize);
-               bcopy(compressed_buffer, obp->b_data, obp->b_bufsize);
-               if (result < obp->b_bufsize)
-                       bzero(obp->b_data + result, obp->b_bufsize - result);
-               objcache_put(cache_buffer_read, compressed_buffer);
-               obp->b_resid = 0;
-               obp->b_flags |= B_AGE;
+
+       KKASSERT(bp->b_bufsize <= HAMMER2_PBUFSIZE);
+       compressed_size = *(const int *)data;
+       KKASSERT(compressed_size <= bytes - sizeof(int));
+
+       compressed_buffer = objcache_get(cache_buffer_read, M_INTWAIT);
+       result = LZ4_decompress_safe(__DECONST(char *, &data[sizeof(int)]),
+                                    compressed_buffer,
+                                    compressed_size,
+                                    bp->b_bufsize);
+       if (result < 0) {
+               kprintf("READ PATH: Error during decompression."
+                       "bio %016jx/%d\n",
+                       (intmax_t)bio->bio_offset, bytes);
+               /* make sure it isn't random garbage */
+               bzero(compressed_buffer, bp->b_bufsize);
        }
-       biodone(obio);
+       KKASSERT(result <= bp->b_bufsize);
+       bcopy(compressed_buffer, bp->b_data, bp->b_bufsize);
+       if (result < bp->b_bufsize)
+               bzero(bp->b_data + result, bp->b_bufsize - result);
+       objcache_put(cache_buffer_read, compressed_buffer);
+       bp->b_resid = 0;
+       bp->b_flags |= B_AGE;
 }
 
 /*
@@ -160,80 +126,47 @@ hammer2_decompress_LZ4_callback(hammer2_io_t *dio, hammer2_chain_t *arg_c,
  */
 static
 void
-hammer2_decompress_ZLIB_callback(hammer2_io_t *dio, hammer2_chain_t *arg_c,
-                                void *arg_p, off_t arg_o)
+hammer2_decompress_ZLIB_callback(const char *data, u_int bytes, struct bio *bio)
 {
-       struct buf *obp;
-       struct bio *obio = arg_p;
-       char *bdata;
-       int bytes = 1 << (int)(arg_o & HAMMER2_OFF_MASK_RADIX);
+       struct buf *bp;
+       char *compressed_buffer;
+       z_stream strm_decompress;
+       int result;
+       int ret;
 
-       /*
-        * If BIO_DONE is already set the device buffer was already
-        * fully valid (B_CACHE).  If it is not set then I/O was issued
-        * and we have to run I/O completion as the last bio.
-        *
-        * Nobody is waiting for our device I/O to complete, we are
-        * responsible for bqrelse()ing it which means we also have to do
-        * the equivalent of biowait() and clear BIO_DONE (which breadcb()
-        * may have set).
-        *
-        * Any preexisting device buffer should match the requested size,
-        * but due to bigblock recycling and other factors there is some
-        * fragility there, so we assert that the device buffer covers
-        * the request.
-        */
-       obp = obio->bio_buf;
+       bp = bio->bio_buf;
 
-       if (dio->bp->b_flags & B_ERROR) {
-               obp->b_flags |= B_ERROR;
-               obp->b_error = dio->bp->b_error;
-#if 0
-       } else if (obio->bio_caller_info2.index &&
-                  obio->bio_caller_info1.uvalue32 !=
-                   crc32(bp->b_data, bp->b_bufsize)) {
-               obp->b_flags |= B_ERROR;
-               obp->b_error = EIO;
-#endif
-       } else {
-               char *compressed_buffer;
-               z_stream strm_decompress;
-               int result;
-               int ret;
-
-               KKASSERT(obp->b_bufsize <= HAMMER2_PBUFSIZE);
-               strm_decompress.avail_in = 0;
-               strm_decompress.next_in = Z_NULL;
-               
-               ret = inflateInit(&strm_decompress);
-               
-               if (ret != Z_OK)
-                       kprintf("HAMMER2 ZLIB: Fatal error in inflateInit.\n");
-               
-               bdata = hammer2_io_data(dio, arg_o);
-               compressed_buffer = objcache_get(cache_buffer_read, M_INTWAIT);
-               strm_decompress.next_in = bdata;
-
-               /* XXX supply proper size, subset of device bp */
-               strm_decompress.avail_in = bytes;
-               strm_decompress.next_out = compressed_buffer;
-               strm_decompress.avail_out = obp->b_bufsize;
-               
-               ret = inflate(&strm_decompress, Z_FINISH);
-               if (ret != Z_STREAM_END) {
-                       kprintf("HAMMER2 ZLIB: Fatar error during decompression.\n");
-                       bzero(compressed_buffer, obp->b_bufsize);
-               }
-               bcopy(compressed_buffer, obp->b_data, obp->b_bufsize);
-               result = obp->b_bufsize - strm_decompress.avail_out;
-               if (result < obp->b_bufsize)
-                       bzero(obp->b_data + result, strm_decompress.avail_out);
-               objcache_put(cache_buffer_read, compressed_buffer);
-               obp->b_resid = 0;
-               obp->b_flags |= B_AGE;
-               ret = inflateEnd(&strm_decompress);
+       KKASSERT(bp->b_bufsize <= HAMMER2_PBUFSIZE);
+       strm_decompress.avail_in = 0;
+       strm_decompress.next_in = Z_NULL;
+
+       ret = inflateInit(&strm_decompress);
+
+       if (ret != Z_OK)
+               kprintf("HAMMER2 ZLIB: Fatal error in inflateInit.\n");
+
+       compressed_buffer = objcache_get(cache_buffer_read, M_INTWAIT);
+       strm_decompress.next_in = __DECONST(char *, data);
+
+       /* XXX supply proper size, subset of device bp */
+       strm_decompress.avail_in = bytes;
+       strm_decompress.next_out = compressed_buffer;
+       strm_decompress.avail_out = bp->b_bufsize;
+
+       ret = inflate(&strm_decompress, Z_FINISH);
+       if (ret != Z_STREAM_END) {
+               kprintf("HAMMER2 ZLIB: Fatar error during decompression.\n");
+               bzero(compressed_buffer, bp->b_bufsize);
        }
-       biodone(obio);
+       bcopy(compressed_buffer, bp->b_data, bp->b_bufsize);
+       result = bp->b_bufsize - strm_decompress.avail_out;
+       if (result < bp->b_bufsize)
+               bzero(bp->b_data + result, strm_decompress.avail_out);
+       objcache_put(cache_buffer_read, compressed_buffer);
+       ret = inflateEnd(&strm_decompress);
+
+       bp->b_resid = 0;
+       bp->b_flags |= B_AGE;
 }
 
 static __inline
@@ -252,7 +185,7 @@ int
 hammer2_vop_inactive(struct vop_inactive_args *ap)
 {
        hammer2_inode_t *ip;
-       hammer2_chain_t *parent;
+       hammer2_cluster_t *cparent;
        struct vnode *vp;
 
        vp = ap->a_vp;
@@ -271,17 +204,17 @@ hammer2_vop_inactive(struct vop_inactive_args *ap)
         * the strategy code.  Simply mark the inode modified so it gets
         * picked up by our normal flush.
         */
-       parent = hammer2_inode_lock_ex(ip);
-       KKASSERT(parent);
+       cparent = hammer2_inode_lock_ex(ip);
+       KKASSERT(cparent);
 
        /*
         * Check for deleted inodes and recycle immediately.
         */
-       if (parent->flags & HAMMER2_CHAIN_UNLINKED) {
-               hammer2_inode_unlock_ex(ip, parent);
+       if (hammer2_cluster_unlinked(cparent) & HAMMER2_CHAIN_UNLINKED) {
+               hammer2_inode_unlock_ex(ip, cparent);
                vrecycle(vp);
        } else {
-               hammer2_inode_unlock_ex(ip, parent);
+               hammer2_inode_unlock_ex(ip, cparent);
        }
        return (0);
 }
@@ -294,7 +227,7 @@ static
 int
 hammer2_vop_reclaim(struct vop_reclaim_args *ap)
 {
-       hammer2_chain_t *chain;
+       hammer2_cluster_t *cluster;
        hammer2_inode_t *ip;
        hammer2_pfsmount_t *pmp;
        struct vnode *vp;
@@ -308,7 +241,7 @@ hammer2_vop_reclaim(struct vop_reclaim_args *ap)
         * Inode must be locked for reclaim.
         */
        pmp = ip->pmp;
-       chain = hammer2_inode_lock_ex(ip);
+       cluster = hammer2_inode_lock_ex(ip);
 
        /*
         * The final close of a deleted file or directory marks it for
@@ -334,7 +267,7 @@ hammer2_vop_reclaim(struct vop_reclaim_args *ap)
         * the ip is left with a reference and placed on a linked list and
         * handled later on.
         */
-       if (chain->flags & HAMMER2_CHAIN_UNLINKED) {
+       if (hammer2_cluster_unlinked(cluster)) {
                hammer2_inode_unlink_t *ipul;
 
                ipul = kmalloc(sizeof(*ipul), pmp->minode, M_WAITOK | M_ZERO);
@@ -343,14 +276,14 @@ hammer2_vop_reclaim(struct vop_reclaim_args *ap)
                spin_lock(&pmp->unlinkq_spin);
                TAILQ_INSERT_TAIL(&pmp->unlinkq, ipul, entry);
                spin_unlock(&pmp->unlinkq_spin);
-               hammer2_inode_unlock_ex(ip, chain);     /* unlock */
+               hammer2_inode_unlock_ex(ip, cluster);   /* unlock */
                /* retain ref from vp for ipul */
        } else {
-               hammer2_inode_unlock_ex(ip, chain);     /* unlock */
+               hammer2_inode_unlock_ex(ip, cluster);   /* unlock */
                hammer2_inode_drop(ip);                 /* vp ref */
        }
-       /* chain no longer referenced */
-       /* chain = NULL; not needed */
+       /* cluster no longer referenced */
+       /* cluster = NULL; not needed */
 
        /*
         * XXX handle background sync when ip dirty, kernel will no longer
@@ -367,7 +300,7 @@ hammer2_vop_fsync(struct vop_fsync_args *ap)
 {
        hammer2_inode_t *ip;
        hammer2_trans_t trans;
-       hammer2_chain_t *chain;
+       hammer2_cluster_t *cluster;
        struct vnode *vp;
 
        vp = ap->a_vp;
@@ -389,21 +322,21 @@ hammer2_vop_fsync(struct vop_fsync_args *ap)
         * which call this function will eventually call chain_flush
         * on the volume root as a catch-all, which is far more optimal.
         */
-       chain = hammer2_inode_lock_ex(ip);
+       cluster = hammer2_inode_lock_ex(ip);
        atomic_clear_int(&ip->flags, HAMMER2_INODE_MODIFIED);
        vclrisdirty(vp);
        if (ip->flags & (HAMMER2_INODE_RESIZED|HAMMER2_INODE_MTIME))
-               hammer2_inode_fsync(&trans, ip, &chain);
+               hammer2_inode_fsync(&trans, ip, cluster);
 
 #if 0
        /*
         * XXX creates discontinuity w/modify_tid
         */
        if (ap->a_flags & VOP_FSYNC_SYSCALL) {
-               hammer2_flush(&trans, &chain);
+               hammer2_flush(&trans, cluster);
        }
 #endif
-       hammer2_inode_unlock_ex(ip, chain);
+       hammer2_inode_unlock_ex(ip, cluster);
        hammer2_trans_done(&trans);
 
        return (0);
@@ -415,17 +348,17 @@ hammer2_vop_access(struct vop_access_args *ap)
 {
        hammer2_inode_t *ip = VTOI(ap->a_vp);
        hammer2_inode_data_t *ipdata;
-       hammer2_chain_t *chain;
+       hammer2_cluster_t *cluster;
        uid_t uid;
        gid_t gid;
        int error;
 
-       chain = hammer2_inode_lock_sh(ip);
-       ipdata = &chain->data->ipdata;
+       cluster = hammer2_inode_lock_sh(ip);
+       ipdata = &hammer2_cluster_data(cluster)->ipdata;
        uid = hammer2_to_unix_xid(&ipdata->uid);
        gid = hammer2_to_unix_xid(&ipdata->gid);
        error = vop_helper_access(ap, uid, gid, ipdata->mode, ipdata->uflags);
-       hammer2_inode_unlock_sh(ip, chain);
+       hammer2_inode_unlock_sh(ip, cluster);
 
        return (error);
 }
@@ -435,7 +368,7 @@ int
 hammer2_vop_getattr(struct vop_getattr_args *ap)
 {
        hammer2_inode_data_t *ipdata;
-       hammer2_chain_t *chain;
+       hammer2_cluster_t *cluster;
        hammer2_pfsmount_t *pmp;
        hammer2_inode_t *ip;
        struct vnode *vp;
@@ -447,8 +380,9 @@ hammer2_vop_getattr(struct vop_getattr_args *ap)
        ip = VTOI(vp);
        pmp = ip->pmp;
 
-       chain = hammer2_inode_lock_sh(ip);
-       ipdata = &chain->data->ipdata;
+       cluster = hammer2_inode_lock_sh(ip);
+       ipdata = &hammer2_cluster_data(cluster)->ipdata;
+       KKASSERT(hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE);
 
        vap->va_fsid = pmp->mp->mnt_stat.f_fsid.val[0];
        vap->va_fileid = ipdata->inum;
@@ -466,14 +400,14 @@ hammer2_vop_getattr(struct vop_getattr_args *ap)
        hammer2_time_to_timespec(ipdata->mtime, &vap->va_atime);
        vap->va_gen = 1;
        vap->va_bytes = vap->va_size;   /* XXX */
-       vap->va_type = hammer2_get_vtype(chain);
+       vap->va_type = hammer2_get_vtype(ipdata);
        vap->va_filerev = 0;
        vap->va_uid_uuid = ipdata->uid;
        vap->va_gid_uuid = ipdata->gid;
        vap->va_vaflags = VA_UID_UUID_VALID | VA_GID_UUID_VALID |
                          VA_FSID_UUID_VALID;
 
-       hammer2_inode_unlock_sh(ip, chain);
+       hammer2_inode_unlock_sh(ip, cluster);
 
        return (0);
 }
@@ -484,7 +418,7 @@ hammer2_vop_setattr(struct vop_setattr_args *ap)
 {
        hammer2_inode_data_t *ipdata;
        hammer2_inode_t *ip;
-       hammer2_chain_t *chain;
+       hammer2_cluster_t *cluster;
        hammer2_trans_t trans;
        struct vnode *vp;
        struct vattr *vap;
@@ -502,10 +436,10 @@ hammer2_vop_setattr(struct vop_setattr_args *ap)
        if (ip->pmp->ronly)
                return(EROFS);
 
-       hammer2_chain_memory_wait(ip->pmp);
+       hammer2_pfs_memory_wait(ip->pmp);
        hammer2_trans_init(&trans, ip->pmp, NULL, 0);
-       chain = hammer2_inode_lock_ex(ip);
-       ipdata = &chain->data->ipdata;
+       cluster = hammer2_inode_lock_ex(ip);
+       ipdata = &hammer2_cluster_data(cluster)->ipdata;
        error = 0;
 
        if (vap->va_flags != VNOVAL) {
@@ -517,8 +451,8 @@ hammer2_vop_setattr(struct vop_setattr_args *ap)
                                         ap->a_cred);
                if (error == 0) {
                        if (ipdata->uflags != flags) {
-                               ipdata = hammer2_chain_modify_ip(&trans, ip,
-                                                                &chain, 0);
+                               ipdata = hammer2_cluster_modify_ip(&trans, ip,
+                                                                cluster, 0);
                                ipdata->uflags = flags;
                                ipdata->ctime = ctime;
                                kflags |= NOTE_ATTRIB;
@@ -551,8 +485,8 @@ hammer2_vop_setattr(struct vop_setattr_args *ap)
                            bcmp(&uuid_gid, &ipdata->gid, sizeof(uuid_gid)) ||
                            ipdata->mode != cur_mode
                        ) {
-                               ipdata = hammer2_chain_modify_ip(&trans, ip,
-                                                                &chain, 0);
+                               ipdata = hammer2_cluster_modify_ip(&trans, ip,
+                                                                cluster, 0);
                                ipdata->uid = uuid_uid;
                                ipdata->gid = uuid_gid;
                                ipdata->mode = cur_mode;
@@ -570,14 +504,15 @@ hammer2_vop_setattr(struct vop_setattr_args *ap)
                case VREG:
                        if (vap->va_size == ip->size)
                                break;
-                       hammer2_inode_unlock_ex(ip, chain);
+                       hammer2_inode_unlock_ex(ip, cluster);
                        if (vap->va_size < ip->size) {
                                hammer2_truncate_file(ip, vap->va_size);
                        } else {
                                hammer2_extend_file(ip, vap->va_size);
                        }
-                       chain = hammer2_inode_lock_ex(ip);
-                       ipdata = &chain->data->ipdata; /* RELOAD */
+                       cluster = hammer2_inode_lock_ex(ip);
+                       /* RELOAD */
+                       ipdata = &hammer2_cluster_data(cluster)->ipdata;
                        domtime = 1;
                        break;
                default:
@@ -588,13 +523,13 @@ hammer2_vop_setattr(struct vop_setattr_args *ap)
 #if 0
        /* atime not supported */
        if (vap->va_atime.tv_sec != VNOVAL) {
-               ipdata = hammer2_chain_modify_ip(&trans, ip, &chain, 0);
+               ipdata = hammer2_cluster_modify_ip(&trans, ip, cluster, 0);
                ipdata->atime = hammer2_timespec_to_time(&vap->va_atime);
                kflags |= NOTE_ATTRIB;
        }
 #endif
        if (vap->va_mtime.tv_sec != VNOVAL) {
-               ipdata = hammer2_chain_modify_ip(&trans, ip, &chain, 0);
+               ipdata = hammer2_cluster_modify_ip(&trans, ip, cluster, 0);
                ipdata->mtime = hammer2_timespec_to_time(&vap->va_mtime);
                kflags |= NOTE_ATTRIB;
                domtime = 0;
@@ -607,7 +542,8 @@ hammer2_vop_setattr(struct vop_setattr_args *ap)
                error = vop_helper_chmod(ap->a_vp, vap->va_mode, ap->a_cred,
                                         cur_uid, cur_gid, &cur_mode);
                if (error == 0 && ipdata->mode != cur_mode) {
-                       ipdata = hammer2_chain_modify_ip(&trans, ip, &chain, 0);
+                       ipdata = hammer2_cluster_modify_ip(&trans, ip,
+                                                          cluster, 0);
                        ipdata->mode = cur_mode;
                        ipdata->ctime = ctime;
                        kflags |= NOTE_ATTRIB;
@@ -619,7 +555,7 @@ hammer2_vop_setattr(struct vop_setattr_args *ap)
         * to trim the related data chains, otherwise a later expansion can
         * cause havoc.
         */
-       hammer2_inode_fsync(&trans, ip, &chain);
+       hammer2_inode_fsync(&trans, ip, cluster);
 
        /*
         * Cleanup.  If domtime is set an additional inode modification
@@ -632,7 +568,7 @@ done:
                                           HAMMER2_INODE_MTIME);
                vsetisdirty(ip->vp);
        }
-       hammer2_inode_unlock_ex(ip, chain);
+       hammer2_inode_unlock_ex(ip, cluster);
        hammer2_trans_done(&trans);
        hammer2_knote(ip->vp, kflags);
 
@@ -646,9 +582,10 @@ hammer2_vop_readdir(struct vop_readdir_args *ap)
        hammer2_inode_data_t *ipdata;
        hammer2_inode_t *ip;
        hammer2_inode_t *xip;
-       hammer2_chain_t *parent;
-       hammer2_chain_t *chain;
-       hammer2_chain_t *xchain;
+       hammer2_cluster_t *cparent;
+       hammer2_cluster_t *cluster;
+       hammer2_cluster_t *xcluster;
+       hammer2_blockref_t bref;
        hammer2_tid_t inum;
        hammer2_key_t key_next;
        hammer2_key_t lkey;
@@ -656,10 +593,10 @@ hammer2_vop_readdir(struct vop_readdir_args *ap)
        off_t *cookies;
        off_t saveoff;
        int cookie_index;
-       int cache_index = -1;
        int ncookies;
        int error;
        int dtype;
+       int ddflag;
        int r;
 
        ip = VTOI(ap->a_vp);
@@ -680,8 +617,8 @@ hammer2_vop_readdir(struct vop_readdir_args *ap)
        }
        cookie_index = 0;
 
-       parent = hammer2_inode_lock_sh(ip);
-       ipdata = &parent->data->ipdata;
+       cparent = hammer2_inode_lock_sh(ip);
+       ipdata = &hammer2_cluster_data(cparent)->ipdata;
 
        /*
         * Handle artificial entries.  To ensure that only positive 64 bit
@@ -693,7 +630,7 @@ hammer2_vop_readdir(struct vop_readdir_args *ap)
         * allow '..' to cross the mount point into (e.g.) the super-root.
         */
        error = 0;
-       chain = (void *)(intptr_t)-1;   /* non-NULL for early goto done case */
+       cluster = (void *)(intptr_t)-1; /* non-NULL for early goto done case */
 
        if (saveoff == 0) {
                inum = ipdata->inum & HAMMER2_DIRHASH_USERMSK;
@@ -718,17 +655,18 @@ hammer2_vop_readdir(struct vop_readdir_args *ap)
                while (ip->pip != NULL && ip != ip->pmp->iroot) {
                        xip = ip->pip;
                        hammer2_inode_ref(xip);
-                       hammer2_inode_unlock_sh(ip, parent);
-                       xchain = hammer2_inode_lock_sh(xip);
-                       parent = hammer2_inode_lock_sh(ip);
+                       hammer2_inode_unlock_sh(ip, cparent);
+                       xcluster = hammer2_inode_lock_sh(xip);
+                       cparent = hammer2_inode_lock_sh(ip);
                        hammer2_inode_drop(xip);
+                       ipdata = &hammer2_cluster_data(cparent)->ipdata;
                        if (xip == ip->pip) {
-                               inum = xchain->data->ipdata.inum &
-                                      HAMMER2_DIRHASH_USERMSK;
-                               hammer2_inode_unlock_sh(xip, xchain);
+                               inum = hammer2_cluster_data(xcluster)->
+                                       ipdata.inum & HAMMER2_DIRHASH_USERMSK;
+                               hammer2_inode_unlock_sh(xip, xcluster);
                                break;
                        }
-                       hammer2_inode_unlock_sh(xip, xchain);
+                       hammer2_inode_unlock_sh(xip, xcluster);
                }
                r = vop_write_dirent(&error, uio, inum, DT_DIR, 2, "..");
                if (r)
@@ -746,33 +684,37 @@ hammer2_vop_readdir(struct vop_readdir_args *ap)
                kprintf("readdir: lkey %016jx\n", lkey);
 
        /*
-        * parent is the inode chain, already locked for us.  Don't
+        * parent is the inode cluster, already locked for us.  Don't
         * double lock shared locks as this will screw up upgrades.
         */
        if (error) {
                goto done;
        }
-       chain = hammer2_chain_lookup(&parent, &key_next, lkey, lkey,
-                                    &cache_index, HAMMER2_LOOKUP_SHARED);
-       if (chain == NULL) {
-               chain = hammer2_chain_lookup(&parent, &key_next,
+       cluster = hammer2_cluster_lookup(cparent, &key_next, lkey, lkey,
+                                    HAMMER2_LOOKUP_SHARED, &ddflag);
+       if (cluster == NULL) {
+               cluster = hammer2_cluster_lookup(cparent, &key_next,
                                             lkey, (hammer2_key_t)-1,
-                                            &cache_index,
-                                            HAMMER2_LOOKUP_SHARED);
+                                            HAMMER2_LOOKUP_SHARED, &ddflag);
        }
-       while (chain) {
+       if (cluster)
+               hammer2_cluster_bref(cluster, &bref);
+       while (cluster) {
                if (hammer2_debug & 0x0020)
                        kprintf("readdir: p=%p chain=%p %016jx (next %016jx)\n",
-                               parent, chain, chain->bref.key, key_next);
-               if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
-                       dtype = hammer2_get_dtype(chain);
-                       saveoff = chain->bref.key & HAMMER2_DIRHASH_USERMSK;
+                               cparent->focus, cluster->focus,
+                               bref.key, key_next);
+
+               if (bref.type == HAMMER2_BREF_TYPE_INODE) {
+                       ipdata = &hammer2_cluster_data(cluster)->ipdata;
+                       dtype = hammer2_get_dtype(ipdata);
+                       saveoff = bref.key & HAMMER2_DIRHASH_USERMSK;
                        r = vop_write_dirent(&error, uio,
-                                            chain->data->ipdata.inum &
+                                            ipdata->inum &
                                              HAMMER2_DIRHASH_USERMSK,
                                             dtype,
-                                            chain->data->ipdata.name_len,
-                                            chain->data->ipdata.filename);
+                                            ipdata->name_len,
+                                            ipdata->filename);
                        if (r)
                                break;
                        if (cookies)
@@ -780,33 +722,32 @@ hammer2_vop_readdir(struct vop_readdir_args *ap)
                        ++cookie_index;
                } else {
                        /* XXX chain error */
-                       kprintf("bad chain type readdir %d\n",
-                               chain->bref.type);
+                       kprintf("bad chain type readdir %d\n", bref.type);
                }
 
                /*
                 * Keys may not be returned in order so once we have a
-                * placemarker (chain) the scan must allow the full range
+                * placemarker (cluster) the scan must allow the full range
                 * or some entries will be missed.
                 */
-               chain = hammer2_chain_next(&parent, chain, &key_next,
-                                          key_next, (hammer2_key_t)-1,
-                                          &cache_index, HAMMER2_LOOKUP_SHARED);
-               if (chain) {
-                       saveoff = (chain->bref.key &
-                                  HAMMER2_DIRHASH_USERMSK) + 1;
+               cluster = hammer2_cluster_next(cparent, cluster, &key_next,
+                                              key_next, (hammer2_key_t)-1,
+                                              HAMMER2_LOOKUP_SHARED);
+               if (cluster) {
+                       hammer2_cluster_bref(cluster, &bref);
+                       saveoff = (bref.key & HAMMER2_DIRHASH_USERMSK) + 1;
                } else {
                        saveoff = (hammer2_key_t)-1;
                }
                if (cookie_index == ncookies)
                        break;
        }
-       if (chain)
-               hammer2_chain_unlock(chain);
+       if (cluster)
+               hammer2_cluster_unlock(cluster);
 done:
-       hammer2_inode_unlock_sh(ip, parent);
+       hammer2_inode_unlock_sh(ip, cparent);
        if (ap->a_eofflag)
-               *ap->a_eofflag = (chain == NULL);
+               *ap->a_eofflag = (cluster == NULL);
        if (hammer2_debug & 0x0020)
                kprintf("readdir: done at %016jx\n", saveoff);
        uio->uio_offset = saveoff & ~HAMMER2_DIRHASH_VISIBLE;
@@ -1228,17 +1169,16 @@ hammer2_vop_nresolve(struct vop_nresolve_args *ap)
 {
        hammer2_inode_t *ip;
        hammer2_inode_t *dip;
-       hammer2_chain_t *parent;
-       hammer2_chain_t *chain;
-       hammer2_chain_t *ochain;
-       hammer2_trans_t trans;
+       hammer2_cluster_t *cparent;
+       hammer2_cluster_t *cluster;
+       hammer2_inode_data_t *ipdata;
        hammer2_key_t key_next;
        hammer2_key_t lhc;
        struct namecache *ncp;
        const uint8_t *name;
        size_t name_len;
        int error = 0;
-       int cache_index = -1;
+       int ddflag;
        struct vnode *vp;
 
        dip = VTOI(ap->a_dvp);
@@ -1250,46 +1190,46 @@ hammer2_vop_nresolve(struct vop_nresolve_args *ap)
        /*
         * Note: In DragonFly the kernel handles '.' and '..'.
         */
-       parent = hammer2_inode_lock_sh(dip);
-       chain = hammer2_chain_lookup(&parent, &key_next,
-                                    lhc, lhc + HAMMER2_DIRHASH_LOMASK,
-                                    &cache_index, HAMMER2_LOOKUP_SHARED);
-       while (chain) {
-               if (chain->bref.type == HAMMER2_BREF_TYPE_INODE &&
-                   name_len == chain->data->ipdata.name_len &&
-                   bcmp(name, chain->data->ipdata.filename, name_len) == 0) {
-                       break;
+       cparent = hammer2_inode_lock_sh(dip);
+       cluster = hammer2_cluster_lookup(cparent, &key_next,
+                                        lhc, lhc + HAMMER2_DIRHASH_LOMASK,
+                                        HAMMER2_LOOKUP_SHARED, &ddflag);
+       while (cluster) {
+               if (hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE) {
+                       ipdata = &hammer2_cluster_data(cluster)->ipdata;
+                       if (ipdata->name_len == name_len &&
+                           bcmp(ipdata->filename, name, name_len) == 0) {
+                               break;
+                       }
                }
-               chain = hammer2_chain_next(&parent, chain, &key_next,
-                                          key_next,
-                                          lhc + HAMMER2_DIRHASH_LOMASK,
-                                          &cache_index, HAMMER2_LOOKUP_SHARED);
+               cluster = hammer2_cluster_next(cparent, cluster, &key_next,
+                                              key_next,
+                                              lhc + HAMMER2_DIRHASH_LOMASK,
+                                              HAMMER2_LOOKUP_SHARED);
        }
-       hammer2_inode_unlock_sh(dip, parent);
+       hammer2_inode_unlock_sh(dip, cparent);
 
        /*
-        * If the inode represents a forwarding entry for a hardlink we have
-        * to locate the actual inode.  The original ip is saved for possible
-        * deconsolidation.  (ip) will only be set to non-NULL when we have
-        * to locate the real file via a hardlink.  ip will be referenced but
-        * not locked in that situation.  chain is passed in locked and
-        * returned locked.
-        *
-        * XXX what kind of chain lock?
+        * nresolve needs to resolve hardlinks, the original cluster is not
+        * sufficient.
         */
-       ochain = NULL;
-       if (chain && chain->data->ipdata.type == HAMMER2_OBJTYPE_HARDLINK) {
-               error = hammer2_hardlink_find(dip, &chain, &ochain);
-               if (error) {
-                       kprintf("hammer2: unable to find hardlink\n");
-                       if (chain) {
-                               hammer2_chain_unlock(chain);
-                               chain = NULL;
-                       }
-                       goto failed;
+       if (cluster) {
+               ip = hammer2_inode_get(dip->pmp, dip, cluster);
+               ipdata = &hammer2_cluster_data(cluster)->ipdata;
+               if (ipdata->type == HAMMER2_OBJTYPE_HARDLINK) {
+                       kprintf("nresolve: fixup hardlink\n");
+                       hammer2_inode_ref(ip);
+                       hammer2_inode_unlock_ex(ip, NULL);
+                       hammer2_cluster_unlock(cluster);
+                       cluster = hammer2_inode_lock_ex(ip);
+                       ipdata = &hammer2_cluster_data(cluster)->ipdata;
+                       kprintf("nresolve: fixup to type %02x\n", ipdata->type);
                }
+       } else {
+               ip = NULL;
        }
 
+#if 0
        /*
         * Deconsolidate any hardlink whos nlinks == 1.  Ignore errors.
         * If an error occurs chain and ip are left alone.
@@ -1305,6 +1245,7 @@ hammer2_vop_nresolve(struct vop_nresolve_args *ap)
                hammer2_hardlink_deconsolidate(&trans, dip, &chain, &ochain);
                hammer2_trans_done(&trans);
        }
+#endif
 
        /*
         * Acquire the related vnode
@@ -1322,8 +1263,7 @@ hammer2_vop_nresolve(struct vop_nresolve_args *ap)
         *          but chain was locked shared.  inode_unlock_ex()
         *          will handle it properly.
         */
-       if (chain) {
-               ip = hammer2_inode_get(dip->pmp, dip, chain);
+       if (cluster) {
                vp = hammer2_igetv(ip, &error);
                if (error == 0) {
                        vn_unlock(vp);
@@ -1331,7 +1271,7 @@ hammer2_vop_nresolve(struct vop_nresolve_args *ap)
                } else if (error == ENOENT) {
                        cache_setvp(ap->a_nch, NULL);
                }
-               hammer2_inode_unlock_ex(ip, chain);
+               hammer2_inode_unlock_ex(ip, cluster);
 
                /*
                 * The vp should not be released until after we've disposed
@@ -1344,12 +1284,9 @@ hammer2_vop_nresolve(struct vop_nresolve_args *ap)
                error = ENOENT;
                cache_setvp(ap->a_nch, NULL);
        }
-failed:
        KASSERT(error || ap->a_nch->ncp->nc_vp != NULL,
-               ("resolve error %d/%p chain %p ap %p\n",
-                error, ap->a_nch->ncp->nc_vp, chain, ap));
-       if (ochain)
-               hammer2_chain_drop(ochain);
+               ("resolve error %d/%p ap %p\n",
+                error, ap->a_nch->ncp->nc_vp, ap));
        return error;
 }
 
@@ -1359,7 +1296,7 @@ hammer2_vop_nlookupdotdot(struct vop_nlookupdotdot_args *ap)
 {
        hammer2_inode_t *dip;
        hammer2_inode_t *ip;
-       hammer2_chain_t *parent;
+       hammer2_cluster_t *cparent;
        int error;
 
        dip = VTOI(ap->a_dvp);
@@ -1368,9 +1305,9 @@ hammer2_vop_nlookupdotdot(struct vop_nlookupdotdot_args *ap)
                *ap->a_vpp = NULL;
                return ENOENT;
        }
-       parent = hammer2_inode_lock_ex(ip);
+       cparent = hammer2_inode_lock_ex(ip);
        *ap->a_vpp = hammer2_igetv(ip, &error);
-       hammer2_inode_unlock_ex(ip, parent);
+       hammer2_inode_unlock_ex(ip, cparent);
 
        return error;
 }
@@ -1382,7 +1319,7 @@ hammer2_vop_nmkdir(struct vop_nmkdir_args *ap)
        hammer2_inode_t *dip;
        hammer2_inode_t *nip;
        hammer2_trans_t trans;
-       hammer2_chain_t *chain;
+       hammer2_cluster_t *cluster;
        struct namecache *ncp;
        const uint8_t *name;
        size_t name_len;
@@ -1395,18 +1332,19 @@ hammer2_vop_nmkdir(struct vop_nmkdir_args *ap)
        ncp = ap->a_nch->ncp;
        name = ncp->nc_name;
        name_len = ncp->nc_nlen;
+       cluster = NULL;
 
-       hammer2_chain_memory_wait(dip->pmp);
+       hammer2_pfs_memory_wait(dip->pmp);
        hammer2_trans_init(&trans, dip->pmp, NULL, HAMMER2_TRANS_NEWINODE);
        nip = hammer2_inode_create(&trans, dip, ap->a_vap, ap->a_cred,
-                                  name, name_len, &chain, &error);
-       chain->inode_reason = 1;
+                                  name, name_len, &cluster, &error);
+       cluster->focus->inode_reason = 1;
        if (error) {
                KKASSERT(nip == NULL);
                *ap->a_vpp = NULL;
        } else {
                *ap->a_vpp = hammer2_igetv(nip, &error);
-               hammer2_inode_unlock_ex(nip, chain);
+               hammer2_inode_unlock_ex(nip, cluster);
        }
        hammer2_trans_done(&trans);
 
@@ -1453,12 +1391,14 @@ int
 hammer2_vop_advlock(struct vop_advlock_args *ap)
 {
        hammer2_inode_t *ip = VTOI(ap->a_vp);
-       hammer2_chain_t *parent;
+       hammer2_inode_data_t *ipdata;
+       hammer2_cluster_t *cparent;
        hammer2_off_t size;
 
-       parent = hammer2_inode_lock_sh(ip);
-       size = parent->data->ipdata.size;
-       hammer2_inode_unlock_sh(ip, parent);
+       cparent = hammer2_inode_lock_sh(ip);
+       ipdata = &hammer2_cluster_data(cparent)->ipdata;
+       size = ipdata->size;
+       hammer2_inode_unlock_sh(ip, cparent);
        return (lf_advlock(ap, &ip->advlock, size));
 }
 
@@ -1483,10 +1423,10 @@ hammer2_vop_nlink(struct vop_nlink_args *ap)
        hammer2_inode_t *tdip;  /* target directory to create link in */
        hammer2_inode_t *cdip;  /* common parent directory */
        hammer2_inode_t *ip;    /* inode we are hardlinking to */
-       hammer2_chain_t *chain;
-       hammer2_chain_t *fdchain;
-       hammer2_chain_t *tdchain;
-       hammer2_chain_t *cdchain;
+       hammer2_cluster_t *cluster;
+       hammer2_cluster_t *fdcluster;
+       hammer2_cluster_t *tdcluster;
+       hammer2_cluster_t *cdcluster;
        hammer2_trans_t trans;
        struct namecache *ncp;
        const uint8_t *name;
@@ -1509,11 +1449,11 @@ hammer2_vop_nlink(struct vop_nlink_args *ap)
         *
         * Bump nlinks and potentially also create or move the hardlink
         * target in the parent directory common to (ip) and (tdip).  The
-        * consolidation code can modify ip->chain and ip->pip.  The
-        * returned chain is locked.
+        * consolidation code can modify ip->cluster and ip->pip.  The
+        * returned cluster is locked.
         */
        ip = VTOI(ap->a_vp);
-       hammer2_chain_memory_wait(ip->pmp);
+       hammer2_pfs_memory_wait(ip->pmp);
        hammer2_trans_init(&trans, ip->pmp, NULL, HAMMER2_TRANS_NEWINODE);
 
        /*
@@ -1522,39 +1462,33 @@ hammer2_vop_nlink(struct vop_nlink_args *ap)
         */
        fdip = ip->pip;
        cdip = hammer2_inode_common_parent(fdip, tdip);
-       cdchain = hammer2_inode_lock_ex(cdip);
-       fdchain = hammer2_inode_lock_ex(fdip);
-       tdchain = hammer2_inode_lock_ex(tdip);
-       chain = hammer2_inode_lock_ex(ip);
-       error = hammer2_hardlink_consolidate(&trans, ip, &chain,
-                                            cdip, &cdchain, 1);
+       cdcluster = hammer2_inode_lock_ex(cdip);
+       fdcluster = hammer2_inode_lock_ex(fdip);
+       tdcluster = hammer2_inode_lock_ex(tdip);
+       cluster = hammer2_inode_lock_ex(ip);
+       error = hammer2_hardlink_consolidate(&trans, ip, &cluster,
+                                            cdip, cdcluster, 1);
        if (error)
                goto done;
 
        /*
-        * Create a directory entry connected to the specified chain.
-        * The hardlink consolidation code has already adjusted ip->pip
-        * to the common parent directory containing the actual hardlink
-        *
-        * (which may be different from dip where we created our hardlink
-        * entry. ip->chain always represents the actual hardlink and not
-        * any of the pointers to the actual hardlink).
+        * Create a directory entry connected to the specified cluster.
         *
         * WARNING! chain can get moved by the connect (indirectly due to
         *          potential indirect block creation).
         */
-       error = hammer2_inode_connect(&trans, &chain, 1,
-                                     tdip, &tdchain,
+       error = hammer2_inode_connect(&trans, &cluster, 1,
+                                     tdip, tdcluster,
                                      name, name_len, 0);
        if (error == 0) {
                cache_setunresolved(ap->a_nch);
                cache_setvp(ap->a_nch, ap->a_vp);
        }
 done:
-       hammer2_inode_unlock_ex(ip, chain);
-       hammer2_inode_unlock_ex(tdip, tdchain);
-       hammer2_inode_unlock_ex(fdip, fdchain);
-       hammer2_inode_unlock_ex(cdip, cdchain);
+       hammer2_inode_unlock_ex(ip, cluster);
+       hammer2_inode_unlock_ex(tdip, tdcluster);
+       hammer2_inode_unlock_ex(fdip, fdcluster);
+       hammer2_inode_unlock_ex(cdip, cdcluster);
        hammer2_trans_done(&trans);
 
        return error;
@@ -1573,7 +1507,7 @@ hammer2_vop_ncreate(struct vop_ncreate_args *ap)
        hammer2_inode_t *dip;
        hammer2_inode_t *nip;
        hammer2_trans_t trans;
-       hammer2_chain_t *nchain;
+       hammer2_cluster_t *ncluster;
        struct namecache *ncp;
        const uint8_t *name;
        size_t name_len;
@@ -1586,18 +1520,19 @@ hammer2_vop_ncreate(struct vop_ncreate_args *ap)
        ncp = ap->a_nch->ncp;
        name = ncp->nc_name;
        name_len = ncp->nc_nlen;
-       hammer2_chain_memory_wait(dip->pmp);
+       hammer2_pfs_memory_wait(dip->pmp);
        hammer2_trans_init(&trans, dip->pmp, NULL, HAMMER2_TRANS_NEWINODE);
+       ncluster = NULL;
 
        nip = hammer2_inode_create(&trans, dip, ap->a_vap, ap->a_cred,
-                                  name, name_len, &nchain, &error);
-       nchain->inode_reason = 2;
+                                  name, name_len, &ncluster, &error);
+       ncluster->focus->inode_reason = 2;
        if (error) {
                KKASSERT(nip == NULL);
                *ap->a_vpp = NULL;
        } else {
                *ap->a_vpp = hammer2_igetv(nip, &error);
-               hammer2_inode_unlock_ex(nip, nchain);
+               hammer2_inode_unlock_ex(nip, ncluster);
        }
        hammer2_trans_done(&trans);
 
@@ -1609,7 +1544,7 @@ hammer2_vop_ncreate(struct vop_ncreate_args *ap)
 }
 
 /*
- *
+ * Make a device node (typically a fifo)
  */
 static
 int
@@ -1618,7 +1553,7 @@ hammer2_vop_nmknod(struct vop_nmknod_args *ap)
        hammer2_inode_t *dip;
        hammer2_inode_t *nip;
        hammer2_trans_t trans;
-       hammer2_chain_t *nchain;
+       hammer2_cluster_t *ncluster;
        struct namecache *ncp;
        const uint8_t *name;
        size_t name_len;
@@ -1631,18 +1566,19 @@ hammer2_vop_nmknod(struct vop_nmknod_args *ap)
        ncp = ap->a_nch->ncp;
        name = ncp->nc_name;
        name_len = ncp->nc_nlen;
-       hammer2_chain_memory_wait(dip->pmp);
+       hammer2_pfs_memory_wait(dip->pmp);
        hammer2_trans_init(&trans, dip->pmp, NULL, HAMMER2_TRANS_NEWINODE);
+       ncluster = NULL;
 
        nip = hammer2_inode_create(&trans, dip, ap->a_vap, ap->a_cred,
-                                  name, name_len, &nchain, &error);
-       nchain->inode_reason = 3;
+                                  name, name_len, &ncluster, &error);
+       ncluster->focus->inode_reason = 3;
        if (error) {
                KKASSERT(nip == NULL);
                *ap->a_vpp = NULL;
        } else {
                *ap->a_vpp = hammer2_igetv(nip, &error);
-               hammer2_inode_unlock_ex(nip, nchain);
+               hammer2_inode_unlock_ex(nip, ncluster);
        }
        hammer2_trans_done(&trans);
 
@@ -1662,7 +1598,7 @@ hammer2_vop_nsymlink(struct vop_nsymlink_args *ap)
 {
        hammer2_inode_t *dip;
        hammer2_inode_t *nip;
-       hammer2_chain_t *nparent;
+       hammer2_cluster_t *ncparent;
        hammer2_trans_t trans;
        struct namecache *ncp;
        const uint8_t *name;
@@ -1676,14 +1612,15 @@ hammer2_vop_nsymlink(struct vop_nsymlink_args *ap)
        ncp = ap->a_nch->ncp;
        name = ncp->nc_name;
        name_len = ncp->nc_nlen;
-       hammer2_chain_memory_wait(dip->pmp);
+       hammer2_pfs_memory_wait(dip->pmp);
        hammer2_trans_init(&trans, dip->pmp, NULL, HAMMER2_TRANS_NEWINODE);
+       ncparent = NULL;
 
        ap->a_vap->va_type = VLNK;      /* enforce type */
 
        nip = hammer2_inode_create(&trans, dip, ap->a_vap, ap->a_cred,
-                                  name, name_len, &nparent, &error);
-       nparent->inode_reason = 4;
+                                  name, name_len, &ncparent, &error);
+       ncparent->focus->inode_reason = 4;
        if (error) {
                KKASSERT(nip == NULL);
                *ap->a_vpp = NULL;
@@ -1701,7 +1638,8 @@ hammer2_vop_nsymlink(struct vop_nsymlink_args *ap)
                struct iovec aiov;
                hammer2_inode_data_t *nipdata;
 
-               nipdata = &nip->chain->data->ipdata;
+               nipdata = &hammer2_cluster_data(ncparent)->ipdata;
+               /* nipdata = &nip->chain->data->ipdata;XXX */
                bytes = strlen(ap->a_target);
 
                if (bytes <= HAMMER2_EMBEDDED_BYTES) {
@@ -1710,9 +1648,11 @@ hammer2_vop_nsymlink(struct vop_nsymlink_args *ap)
                        bcopy(ap->a_target, nipdata->u.data, bytes);
                        nipdata->size = bytes;
                        nip->size = bytes;
-                       hammer2_inode_unlock_ex(nip, nparent);
+                       hammer2_inode_unlock_ex(nip, ncparent);
+                       /* nipdata = NULL; not needed */
                } else {
-                       hammer2_inode_unlock_ex(nip, nparent);
+                       hammer2_inode_unlock_ex(nip, ncparent);
+                       /* nipdata = NULL; not needed */
                        bzero(&auio, sizeof(auio));
                        bzero(&aiov, sizeof(aiov));
                        auio.uio_iov = &aiov;
@@ -1724,12 +1664,11 @@ hammer2_vop_nsymlink(struct vop_nsymlink_args *ap)
                        aiov.iov_base = ap->a_target;
                        aiov.iov_len = bytes;
                        error = hammer2_write_file(nip, &auio, IO_APPEND, 0);
-                       nipdata = &nip->chain->data->ipdata; /* RELOAD */
                        /* XXX handle error */
                        error = 0;
                }
        } else {
-               hammer2_inode_unlock_ex(nip, nparent);
+               hammer2_inode_unlock_ex(nip, ncparent);
        }
        hammer2_trans_done(&trans);
 
@@ -1766,7 +1705,7 @@ hammer2_vop_nremove(struct vop_nremove_args *ap)
        name = ncp->nc_name;
        name_len = ncp->nc_nlen;
 
-       hammer2_chain_memory_wait(dip->pmp);
+       hammer2_pfs_memory_wait(dip->pmp);
        hammer2_trans_init(&trans, dip->pmp, NULL, 0);
        error = hammer2_unlink_file(&trans, dip, name, name_len,
                                    0, NULL, ap->a_nch);
@@ -1798,7 +1737,7 @@ hammer2_vop_nrmdir(struct vop_nrmdir_args *ap)
        name = ncp->nc_name;
        name_len = ncp->nc_nlen;
 
-       hammer2_chain_memory_wait(dip->pmp);
+       hammer2_pfs_memory_wait(dip->pmp);
        hammer2_trans_init(&trans, dip->pmp, NULL, 0);
        hammer2_run_unlinkq(&trans, dip->pmp);
        error = hammer2_unlink_file(&trans, dip, name, name_len,
@@ -1822,10 +1761,10 @@ hammer2_vop_nrename(struct vop_nrename_args *ap)
        hammer2_inode_t *fdip;
        hammer2_inode_t *tdip;
        hammer2_inode_t *ip;
-       hammer2_chain_t *chain;
-       hammer2_chain_t *fdchain;
-       hammer2_chain_t *tdchain;
-       hammer2_chain_t *cdchain;
+       hammer2_cluster_t *cluster;
+       hammer2_cluster_t *fdcluster;
+       hammer2_cluster_t *tdcluster;
+       hammer2_cluster_t *cdcluster;
        hammer2_trans_t trans;
        const uint8_t *fname;
        size_t fname_len;
@@ -1853,7 +1792,7 @@ hammer2_vop_nrename(struct vop_nrename_args *ap)
        tname = tncp->nc_name;
        tname_len = tncp->nc_nlen;
 
-       hammer2_chain_memory_wait(tdip->pmp);
+       hammer2_pfs_memory_wait(tdip->pmp);
        hammer2_trans_init(&trans, tdip->pmp, NULL, 0);
 
        /*
@@ -1861,7 +1800,7 @@ hammer2_vop_nrename(struct vop_nrename_args *ap)
         * ip represents the actual file and not the hardlink marker.
         */
        ip = VTOI(fncp->nc_vp);
-       chain = NULL;
+       cluster = NULL;
 
 
        /*
@@ -1880,14 +1819,14 @@ hammer2_vop_nrename(struct vop_nrename_args *ap)
         *          other pointers.
         */
        cdip = hammer2_inode_common_parent(ip->pip, tdip);
-       cdchain = hammer2_inode_lock_ex(cdip);
-       fdchain = hammer2_inode_lock_ex(fdip);
-       tdchain = hammer2_inode_lock_ex(tdip);
+       cdcluster = hammer2_inode_lock_ex(cdip);
+       fdcluster = hammer2_inode_lock_ex(fdip);
+       tdcluster = hammer2_inode_lock_ex(tdip);
 
        /*
         * Keep a tight grip on the inode so the temporary unlinking from
         * the source location prior to linking to the target location
-        * does not cause the chain to be destroyed.
+        * does not cause the cluster to be destroyed.
         *
         * NOTE: To avoid deadlocks we cannot lock (ip) while we are
         *       unlinking elements from their directories.  Locking
@@ -1910,18 +1849,18 @@ hammer2_vop_nrename(struct vop_nrename_args *ap)
         * to counter-act the unlink below.
         *
         * If ip represents a regular file the consolidation code essentially
-        * does nothing other than return the same locked chain that was
+        * does nothing other than return the same locked cluster that was
         * passed in.
         *
-        * The returned chain will be locked.
+        * The returned cluster will be locked.
         *
         * WARNING!  We do not currently have a local copy of ipdata but
         *           we do use one later remember that it must be reloaded
         *           on any modification to the inode, including connects.
         */
-       chain = hammer2_inode_lock_ex(ip);
-       error = hammer2_hardlink_consolidate(&trans, ip, &chain,
-                                            cdip, &cdchain, 1);
+       cluster = hammer2_inode_lock_ex(ip);
+       error = hammer2_hardlink_consolidate(&trans, ip, &cluster,
+                                            cdip, cdcluster, 1);
        if (error)
                goto done;
 
@@ -1935,8 +1874,8 @@ hammer2_vop_nrename(struct vop_nrename_args *ap)
         * so we don't want hammer2_unlink_file() to rename it to the hidden
         * open-but-unlinked directory.
         *
-        * The target chain may be marked DELETED but will not be destroyed
-        * since we retain our hold on ip and chain.
+        * The target cluster may be marked DELETED but will not be destroyed
+        * since we retain our hold on ip and cluster.
         */
        error = hammer2_unlink_file(&trans, fdip, fname, fname_len,
                                    -1, &hlink, NULL);
@@ -1945,33 +1884,33 @@ hammer2_vop_nrename(struct vop_nrename_args *ap)
                goto done;
 
        /*
-        * Reconnect ip to target directory using chain.  Chains cannot
-        * actually be moved, so this will duplicate the chain in the new
-        * spot and assign it to the ip, replacing the old chain.
+        * Reconnect ip to target directory using cluster.  Chains cannot
+        * actually be moved, so this will duplicate the cluster in the new
+        * spot and assign it to the ip, replacing the old cluster.
         *
         * WARNING: Because recursive locks are allowed and we unlinked the
-        *          file that we have a chain-in-hand for just above, the
-        *          chain might have been delete-duplicated.  We must refactor
-        *          the chain.
+        *          file that we have a cluster-in-hand for just above, the
+        *          cluster might have been delete-duplicated.  We must
+        *          refactor the cluster.
         *
         * WARNING: Chain locks can lock buffer cache buffers, to avoid
         *          deadlocks we want to unlock before issuing a cache_*()
         *          op (that might have to lock a vnode).
         */
-       hammer2_chain_refactor(&chain);
-       error = hammer2_inode_connect(&trans, &chain, hlink,
-                                     tdip, &tdchain,
+       hammer2_cluster_refactor(cluster);
+       error = hammer2_inode_connect(&trans, &cluster, hlink,
+                                     tdip, tdcluster,
                                      tname, tname_len, 0);
-       chain->inode_reason = 5;
+       cluster->focus->inode_reason = 5;
        if (error == 0) {
-               KKASSERT(chain != NULL);
-               hammer2_inode_repoint(ip, (hlink ? ip->pip : tdip), chain);
+               KKASSERT(cluster != NULL);
+               hammer2_inode_repoint(ip, (hlink ? ip->pip : tdip), cluster);
        }
 done:
-       hammer2_inode_unlock_ex(ip, chain);
-       hammer2_inode_unlock_ex(tdip, tdchain);
-       hammer2_inode_unlock_ex(fdip, fdchain);
-       hammer2_inode_unlock_ex(cdip, cdchain);
+       hammer2_inode_unlock_ex(ip, cluster);
+       hammer2_inode_unlock_ex(tdip, tdcluster);
+       hammer2_inode_unlock_ex(fdip, fdcluster);
+       hammer2_inode_unlock_ex(cdip, cdcluster);
        hammer2_inode_drop(ip);
        hammer2_trans_done(&trans);
 
@@ -1995,6 +1934,7 @@ done:
 static int hammer2_strategy_read(struct vop_strategy_args *ap);
 static int hammer2_strategy_write(struct vop_strategy_args *ap);
 static void hammer2_strategy_read_callback(hammer2_io_t *dio,
+                               hammer2_cluster_t *cluster,
                                hammer2_chain_t *chain,
                                void *arg_p, off_t arg_o);
 
@@ -2036,11 +1976,12 @@ hammer2_strategy_read(struct vop_strategy_args *ap)
        struct bio *bio;
        struct bio *nbio;
        hammer2_inode_t *ip;
-       hammer2_chain_t *parent;
-       hammer2_chain_t *chain;
+       hammer2_cluster_t *cparent;
+       hammer2_cluster_t *cluster;
        hammer2_key_t key_dummy;
        hammer2_key_t lbase;
-       int cache_index = -1;
+       int ddflag;
+       uint8_t btype;
 
        bio = ap->a_bio;
        bp = bio->bio_buf;
@@ -2048,72 +1989,41 @@ hammer2_strategy_read(struct vop_strategy_args *ap)
        nbio = push_bio(bio);
 
        lbase = bio->bio_offset;
-       chain = NULL;
        KKASSERT(((int)lbase & HAMMER2_PBUFMASK) == 0);
 
-       parent = hammer2_inode_lock_sh(ip);
-       chain = hammer2_chain_lookup(&parent, &key_dummy,
-                                    lbase, lbase,
-                                    &cache_index,
-                                    HAMMER2_LOOKUP_NODATA |
-                                    HAMMER2_LOOKUP_SHARED);
+       cparent = hammer2_inode_lock_sh(ip);
+       cluster = hammer2_cluster_lookup(cparent, &key_dummy,
+                                      lbase, lbase,
+                                      HAMMER2_LOOKUP_NODATA |
+                                      HAMMER2_LOOKUP_SHARED,
+                                      &ddflag);
+       hammer2_inode_unlock_sh(ip, cparent);
 
-       if (chain == NULL) {
-               /*
-                * Data is zero-fill
-                */
+       /*
+        * Data is zero-fill if no cluster could be found
+        * (XXX or EIO on a cluster failure).
+        */
+       if (cluster == NULL) {
                bp->b_resid = 0;
                bp->b_error = 0;
                bzero(bp->b_data, bp->b_bcount);
                biodone(nbio);
-       } else if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
-               /*
-                * Data is embedded in the inode (copy from inode).
-                */
-               hammer2_chain_load_async(chain,
-                                        hammer2_strategy_read_callback,
-                                        nbio, 0);
-       } else if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) {
-               /*
-                * Data is on-media, issue device I/O and copy.
-                *
-                * XXX direct-IO shortcut could go here XXX.
-                */
-               if (HAMMER2_DEC_COMP(chain->bref.methods) == HAMMER2_COMP_LZ4) {
-                       /*
-                        * Block compression is determined by bref.methods
-                        */
-                       hammer2_blockref_t *bref;
-                               
-                       bref = &chain->bref;
-                       hammer2_io_breadcb(chain->hmp, bref->data_off,
-                                          chain->bytes,
-                                          hammer2_decompress_LZ4_callback,
-                                          NULL, nbio, bref->data_off);
-                       /* XXX async read dev blk not protected by chain lk */
-                       hammer2_chain_unlock(chain);
-               } else if (HAMMER2_DEC_COMP(chain->bref.methods) ==
-                          HAMMER2_COMP_ZLIB) {
-                       hammer2_blockref_t *bref;
-                               
-                       bref = &chain->bref;
-                       hammer2_io_breadcb(chain->hmp, bref->data_off,
-                                          chain->bytes,
-                                          hammer2_decompress_ZLIB_callback,
-                                          NULL, nbio, bref->data_off);
-                       /* XXX async read dev blk not protected by chain lk */
-                       hammer2_chain_unlock(chain);
-               } else {
-                       hammer2_chain_load_async(chain,
-                                                hammer2_strategy_read_callback,
-                                                nbio, 0);
-               }
-       } else {
+               return(0);
+       }
+
+       /*
+        * Cluster elements must be type INODE or type DATA, but the
+        * compression mode (or not) for DATA chains can be different for
+        * each chain.  This will be handled by the callback.
+        */
+       btype = hammer2_cluster_type(cluster);
+       if (btype != HAMMER2_BREF_TYPE_INODE &&
+           btype != HAMMER2_BREF_TYPE_DATA) {
                panic("READ PATH: hammer2_strategy_read: unknown bref type");
-               chain = NULL;
        }
-       hammer2_inode_unlock_sh(ip, parent);
-       return (0);
+
+       hammer2_chain_load_async(cluster, hammer2_strategy_read_callback, nbio);
+       return(0);
 }
 
 /*
@@ -2121,17 +2031,45 @@ hammer2_strategy_read(struct vop_strategy_args *ap)
  */
 static
 void
-hammer2_strategy_read_callback(hammer2_io_t *dio, hammer2_chain_t *chain,
-                              void *arg_p, off_t arg_o __unused)
+hammer2_strategy_read_callback(hammer2_io_t *dio,
+                              hammer2_cluster_t *cluster,
+                              hammer2_chain_t *chain,
+                              void *arg_p, off_t arg_o)
 {
-       struct bio *nbio = arg_p;
-       struct buf *bp = nbio->bio_buf;
+       struct bio *bio = arg_p;
+       struct buf *bp = bio->bio_buf;
        char *data;
+       int i;
 
-       if (dio)
+       /*
+        * Extract data and handle iteration on I/O failure.  arg_o is the
+        * cluster index for iteration.
+        */
+       if (dio) {
+               if (dio->bp->b_flags & B_ERROR) {
+                       i = (int)arg_o + 1;
+                       if (i >= cluster->nchains) {
+                               bp->b_flags |= B_ERROR;
+                               bp->b_error = dio->bp->b_error;
+                               biodone(bio);
+                       } else {
+                               chain = cluster->array[i];
+                               kprintf("hammer2: IO CHAIN-%d %p\n", i, chain);
+                               hammer2_adjreadcounter(&chain->bref,
+                                                      chain->bytes);
+                               hammer2_io_breadcb(chain->hmp,
+                                                  chain->bref.data_off,
+                                                  chain->bytes,
+                                              hammer2_strategy_read_callback,
+                                                  cluster, chain,
+                                                  arg_p, (off_t)i);
+                       }
+                       return;
+               }
                data = hammer2_io_data(dio, chain->bref.data_off);
-       else
+       } else {
                data = (void *)chain->data;
+       }
 
        if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
                /*
@@ -2143,25 +2081,37 @@ hammer2_strategy_read_callback(hammer2_io_t *dio, hammer2_chain_t *chain,
                      bp->b_bcount - HAMMER2_EMBEDDED_BYTES);
                bp->b_resid = 0;
                bp->b_error = 0;
-               hammer2_chain_unlock(chain);
-               biodone(nbio);
        } else if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) {
                /*
                 * Data is on-media, issue device I/O and copy.
                 *
                 * XXX direct-IO shortcut could go here XXX.
                 */
-               KKASSERT(chain->bytes <= bp->b_bcount);
-               bcopy(data, bp->b_data, chain->bytes);
-               if (chain->bytes < bp->b_bcount) {
-                       bzero(bp->b_data + chain->bytes,
-                             bp->b_bcount - chain->bytes);
+               switch (HAMMER2_DEC_COMP(chain->bref.methods)) {
+               case HAMMER2_COMP_LZ4:
+                       hammer2_decompress_LZ4_callback(data, chain->bytes,
+                                                       bio);
+                       break;
+               case HAMMER2_COMP_ZLIB:
+                       hammer2_decompress_ZLIB_callback(data, chain->bytes,
+                                                        bio);
+                       break;
+               case HAMMER2_COMP_NONE:
+                       KKASSERT(chain->bytes <= bp->b_bcount);
+                       bcopy(data, bp->b_data, chain->bytes);
+                       if (chain->bytes < bp->b_bcount) {
+                               bzero(bp->b_data + chain->bytes,
+                                     bp->b_bcount - chain->bytes);
+                       }
+                       bp->b_flags |= B_NOTMETA;
+                       bp->b_resid = 0;
+                       bp->b_error = 0;
+                       hammer2_chain_unlock(chain);
+                       break;
+               default:
+                       panic("hammer2_strategy_read: "
+                             "unknown compression type");
                }
-               bp->b_flags |= B_NOTMETA;
-               bp->b_resid = 0;
-               bp->b_error = 0;
-               hammer2_chain_unlock(chain);
-               biodone(nbio);
        } else {
                /* bqrelse the dio to help stabilize the call to panic() */
                if (dio)
@@ -2170,6 +2120,8 @@ hammer2_strategy_read_callback(hammer2_io_t *dio, hammer2_chain_t *chain,
                /*hammer2_chain_unlock(chain);*/
                /*chain = NULL;*/
        }
+       hammer2_cluster_unlock(cluster);
+       biodone(bio);
 }
 
 static
@@ -2252,7 +2204,7 @@ hammer2_run_unlinkq(hammer2_trans_t *trans, hammer2_pfsmount_t *pmp)
 {
        hammer2_inode_unlink_t *ipul;
        hammer2_inode_t *ip;
-       hammer2_chain_t *chain;
+       hammer2_cluster_t *cluster;
 
        if (TAILQ_EMPTY(&pmp->unlinkq))
                return;
@@ -2264,12 +2216,12 @@ hammer2_run_unlinkq(hammer2_trans_t *trans, hammer2_pfsmount_t *pmp)
                ip = ipul->ip;
                kfree(ipul, pmp->minode);
 
-               chain = hammer2_inode_lock_ex(ip);
-               KKASSERT(chain->flags & HAMMER2_CHAIN_UNLINKED);
+               cluster = hammer2_inode_lock_ex(ip);
+               KKASSERT(cluster->focus->flags & HAMMER2_CHAIN_UNLINKED);
                kprintf("hammer2: unlink on reclaim: %s\n",
-                       chain->data->ipdata.filename);
-               hammer2_chain_delete(trans, chain, 0);
-               hammer2_inode_unlock_ex(ip, chain);     /* inode lock */
+                       cluster->focus->data->ipdata.filename);
+               hammer2_cluster_delete(trans, cluster, 0);
+               hammer2_inode_unlock_ex(ip, cluster);   /* inode lock */
                hammer2_inode_drop(ip);                 /* ipul ref */
 
                spin_lock(&pmp->unlinkq_spin);