From 278ab2b27e913575840951f625eb2d8620aafa34 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Tue, 18 Mar 2014 09:35:21 -0700 Subject: [PATCH] hammer2 - Start adding internal cluster API Initial addition of the cluster API. H2 will be non-operational until this is stabilized. Adding the cluster API will require a few stages. This first stage is to add the API and make it work under degenerate (single-target) conditions. The hammer2_cluster structure collects and manages an array of up to 8 chains representing mirrors / cluster nodes / copies of the same point in the topology. * Add hammer2_cluster.c, and hammer2_cluster_t * Replace nearly all high-level (vnops, vfsops) chain calls with cluster calls. --- sys/vfs/hammer2/Makefile | 2 +- sys/vfs/hammer2/TODO | 9 + sys/vfs/hammer2/hammer2.h | 284 ++++++--- sys/vfs/hammer2/hammer2_chain.c | 244 ++------ sys/vfs/hammer2/hammer2_cluster.c | 784 ++++++++++++++++++++++++ sys/vfs/hammer2/hammer2_flush.c | 8 +- sys/vfs/hammer2/hammer2_freemap.c | 6 +- sys/vfs/hammer2/hammer2_inode.c | 965 ++++++++++++++++-------------- sys/vfs/hammer2/hammer2_io.c | 12 +- sys/vfs/hammer2/hammer2_ioctl.c | 166 ++--- sys/vfs/hammer2/hammer2_subr.c | 49 +- sys/vfs/hammer2/hammer2_vfsops.c | 513 +++++++++------- sys/vfs/hammer2/hammer2_vnops.c | 838 ++++++++++++-------------- 13 files changed, 2392 insertions(+), 1488 deletions(-) create mode 100644 sys/vfs/hammer2/hammer2_cluster.c diff --git a/sys/vfs/hammer2/Makefile b/sys/vfs/hammer2/Makefile index 6bc7265d71..402664aef3 100644 --- a/sys/vfs/hammer2/Makefile +++ b/sys/vfs/hammer2/Makefile @@ -6,7 +6,7 @@ CFLAGS+= -DINVARIANTS -DSMP KMOD= hammer2 SRCS= hammer2_vfsops.c hammer2_vnops.c hammer2_inode.c hammer2_ccms.c -SRCS+= hammer2_chain.c hammer2_flush.c hammer2_freemap.c +SRCS+= hammer2_chain.c hammer2_flush.c hammer2_freemap.c hammer2_cluster.c SRCS+= hammer2_ioctl.c hammer2_msgops.c hammer2_subr.c SRCS+= hammer2_lz4.c hammer2_io.c SRCS+= hammer2_zlib_adler32.c hammer2_zlib_deflate.c diff --git a/sys/vfs/hammer2/TODO b/sys/vfs/hammer2/TODO index 089dcb12b9..e597f601ca 100644 --- a/sys/vfs/hammer2/TODO +++ b/sys/vfs/hammer2/TODO @@ -1,4 +1,13 @@ +* transaction on cluster - multiple trans structures, subtrans + +* inode always contains target cluster/chain, not hardlink + +* cluster_modify_ip -> data returned mod to all chains +* and hammer2_cluster_data() -> same thing + +* chain refs in cluster, cluster refs + * check inode shared lock ... can end up in endless loop if following hardlink because ip->chain is not updated in the exclusive lock cycle when following hardlink. diff --git a/sys/vfs/hammer2/hammer2.h b/sys/vfs/hammer2/hammer2.h index ce40e67384..d6920e345f 100644 --- a/sys/vfs/hammer2/hammer2.h +++ b/sys/vfs/hammer2/hammer2.h @@ -34,8 +34,63 @@ */ /* + * HAMMER2 IN-MEMORY CACHE OF MEDIA STRUCTURES + * * This header file contains structures used internally by the HAMMER2 * implementation. See hammer2_disk.h for on-disk structures. + * + * There is an in-memory representation of all on-media data structure. + * Basically everything is represented by a hammer2_chain structure + * in-memory and other higher-level structures map to chains. + * + * A great deal of data is accessed simply via its buffer cache buffer, + * which is mapped for the duration of the chain's lock. However, because + * chains may represent blocks smaller than the 16KB minimum we impose + * on buffer cache buffers, we cannot hold related buffer cache buffers + * locked for smaller blocks. In these situations we kmalloc() a copy + * of the block. + * + * When modifications are made to a chain a new filesystem block must be + * allocated. Multiple modifications do not necessarily allocate new + * blocks. However, when a flush occurs a flush synchronization point + * is created and any new modifications made after this point will allocate + * a new block even if the chain is already in a modified state. + * + * The in-memory representation may remain cached (for example in order to + * placemark clustering locks) even after the related data has been + * detached. + * + * CORE SHARING + * + * In order to support concurrent flushes a flush synchronization point + * is created represented by a transaction id. Among other things, + * operations may move filesystem objects from one part of the topology + * to another (for example, if you rename a file or when indirect blocks + * are created or destroyed, and a few other things). When this occurs + * across a flush synchronization point the flusher needs to be able to + * recurse down BOTH the 'before' version of the topology and the 'after' + * version. + * + * To facilitate this modifications to chains do what is called a + * DELETE-DUPLICATE operation. Chains are not actually moved in-memory. + * Instead the chain we wish to move is deleted and a new chain is created + * at the target location in the topology. ANY SUBCHAINS PLACED UNDER THE + * CHAIN BEING MOVED HAVE TO EXIST IN BOTH PLACES. To make this work + * all sub-chains are managed by the hammer2_chain_core structure. This + * structure can be multi-homed, meaning that it can have more than one + * chain as its parent. When a chain is delete-duplicated the chain's core + * becomes shared under both the old and new chain. + * + * STALE CHAINS + * + * When a chain is delete-duplicated the old chain typically becomes stale. + * This is detected via the HAMMER2_CHAIN_DUPLICATED flag in chain->flags. + * To avoid executing live filesystem operations on stale chains, the inode + * locking code will follow stale chains via core->ownerq until it finds + * the live chain. The lock prevents ripups by other threads. Lookups + * must properly order locking operations to prevent other threads from + * racing the lookup operation and will also follow stale chains when + * required. */ #ifndef _VFS_HAMMER2_HAMMER2_H_ @@ -60,10 +115,12 @@ #include #include #include -#include #include #include #include +#include + +#include #include #include "hammer2_disk.h" @@ -72,6 +129,7 @@ #include "hammer2_ccms.h" struct hammer2_chain; +struct hammer2_cluster; struct hammer2_inode; struct hammer2_mount; struct hammer2_pfsmount; @@ -179,8 +237,10 @@ struct hammer2_io { off_t pbase; int psize; void (*callback)(struct hammer2_io *dio, + struct hammer2_cluster *cluster, struct hammer2_chain *chain, void *arg1, off_t arg2); + struct hammer2_cluster *arg_l; /* INPROG I/O only */ struct hammer2_chain *arg_c; /* INPROG I/O only */ void *arg_p; /* INPROG I/O only */ off_t arg_o; /* INPROG I/O only */ @@ -352,61 +412,41 @@ RB_PROTOTYPE(hammer2_chain_tree, hammer2_chain, rbnode, hammer2_chain_cmp); #define HAMMER2_FREEMAP_DOREALFREE 3 /* - * HAMMER2 IN-MEMORY CACHE OF MEDIA STRUCTURES - * - * There is an in-memory representation of all on-media data structure. - * Basically everything is represented by a hammer2_chain structure - * in-memory and other higher-level structures map to chains. - * - * A great deal of data is accessed simply via its buffer cache buffer, - * which is mapped for the duration of the chain's lock. However, because - * chains may represent blocks smaller than the 16KB minimum we impose - * on buffer cache buffers, we cannot hold related buffer cache buffers - * locked for smaller blocks. In these situations we kmalloc() a copy - * of the block. + * HAMMER2 cluster - A set of chains representing the same entity. * - * When modifications are made to a chain a new filesystem block must be - * allocated. Multiple modifications do not necessarily allocate new - * blocks. However, when a flush occurs a flush synchronization point - * is created and any new modifications made after this point will allocate - * a new block even if the chain is already in a modified state. - * - * The in-memory representation may remain cached (for example in order to - * placemark clustering locks) even after the related data has been - * detached. + * The hammer2_pfsmount structure embeds a hammer2_cluster. All other + * hammer2_cluster use cases use temporary allocations. * - * CORE SHARING + * The cluster API mimics the chain API. Except as used in the pfsmount, + * the cluster structure is a temporary 'working copy' of a set of chains + * representing targets compatible with the operation. However, for + * performance reasons the cluster API does not necessarily issue concurrent + * requests to the underlying chain API for all compatible chains all the + * time. This may sometimes necessitate revisiting parent cluster nodes + * to 'flesh out' (validate more chains). * - * In order to support concurrent flushes a flush synchronization point - * is created represented by a transaction id. Among other things, - * operations may move filesystem objects from one part of the topology - * to another (for example, if you rename a file or when indirect blocks - * are created or destroyed, and a few other things). When this occurs - * across a flush synchronization point the flusher needs to be able to - * recurse down BOTH the 'before' version of the topology and the 'after' - * version. - * - * To facilitate this modifications to chains do what is called a - * DELETE-DUPLICATE operation. Chains are not actually moved in-memory. - * Instead the chain we wish to move is deleted and a new chain is created - * at the target location in the topology. ANY SUBCHAINS PLACED UNDER THE - * CHAIN BEING MOVED HAVE TO EXIST IN BOTH PLACES. To make this work - * all sub-chains are managed by the hammer2_chain_core structure. This - * structure can be multi-homed, meaning that it can have more than one - * chain as its parent. When a chain is delete-duplicated the chain's core - * becomes shared under both the old and new chain. - * - * STALE CHAINS - * - * When a chain is delete-duplicated the old chain typically becomes stale. - * This is detected via the HAMMER2_CHAIN_DUPLICATED flag in chain->flags. - * To avoid executing live filesystem operations on stale chains, the inode - * locking code will follow stale chains via core->ownerq until it finds - * the live chain. The lock prevents ripups by other threads. Lookups - * must properly order locking operations to prevent other threads from - * racing the lookup operation and will also follow stale chains when - * required. + * If an insufficient number of chains remain in a working copy, the operation + * may have to be downgraded, retried, or stall until the requisit number + * of chains are available. */ +#define HAMMER2_MAXCLUSTER 8 + +struct hammer2_cluster { + int status; /* operational status */ + int refs; /* track for deallocation */ + struct hammer2_pfsmount *pmp; + uint32_t flags; + int nchains; + hammer2_chain_t *focus; /* current focus (or mod) */ + hammer2_chain_t *array[HAMMER2_MAXCLUSTER]; + int cache_index[HAMMER2_MAXCLUSTER]; +}; + +typedef struct hammer2_cluster hammer2_cluster_t; + +#define HAMMER2_CLUSTER_PFS 0x00000001 /* embedded in pfsmount */ +#define HAMMER2_CLUSTER_INODE 0x00000002 /* embedded in inode */ + RB_HEAD(hammer2_inode_tree, hammer2_inode); @@ -422,7 +462,7 @@ struct hammer2_inode { struct hammer2_pfsmount *pmp; /* PFS mount */ struct hammer2_inode *pip; /* parent inode */ struct vnode *vp; - hammer2_chain_t *chain; /* NOTE: rehomed on rename */ + hammer2_cluster_t cluster; struct lockf advlock; hammer2_tid_t inum; u_int flags; @@ -559,21 +599,6 @@ struct hammer2_mount { typedef struct hammer2_mount hammer2_mount_t; -/* - * HAMMER2 cluster - a device/root associated with a PFS. - * - * A PFS may have several hammer2_cluster's associated with it. - */ -#define HAMMER2_MAXCLUSTER 8 - -struct hammer2_cluster { - int nchains; - int status; - hammer2_chain_t *chains[HAMMER2_MAXCLUSTER]; -}; - -typedef struct hammer2_cluster hammer2_cluster_t; - /* * HAMMER2 PFS mount point structure (aka vp->v_mount->mnt_data). * This has a 1:1 correspondence to struct mount (note that the @@ -714,11 +739,10 @@ extern mtx_t thread_protect; #define hammer2_icrc32(buf, size) iscsi_crc32((buf), (size)) #define hammer2_icrc32c(buf, size, crc) iscsi_crc32_ext((buf), (size), (crc)) -hammer2_chain_t *hammer2_inode_lock_ex(hammer2_inode_t *ip); -hammer2_chain_t *hammer2_inode_lock_sh(hammer2_inode_t *ip); -void hammer2_inode_unlock_ex(hammer2_inode_t *ip, hammer2_chain_t *chain); -void hammer2_inode_unlock_sh(hammer2_inode_t *ip, hammer2_chain_t *chain); -void hammer2_chain_refactor(hammer2_chain_t **chainp); +hammer2_cluster_t *hammer2_inode_lock_ex(hammer2_inode_t *ip); +hammer2_cluster_t *hammer2_inode_lock_sh(hammer2_inode_t *ip); +void hammer2_inode_unlock_ex(hammer2_inode_t *ip, hammer2_cluster_t *chain); +void hammer2_inode_unlock_sh(hammer2_inode_t *ip, hammer2_cluster_t *chain); void hammer2_voldata_lock(hammer2_mount_t *hmp); void hammer2_voldata_unlock(hammer2_mount_t *hmp, int modify); ccms_state_t hammer2_inode_lock_temp_release(hammer2_inode_t *ip); @@ -730,8 +754,8 @@ void hammer2_mount_exlock(hammer2_mount_t *hmp); void hammer2_mount_shlock(hammer2_mount_t *hmp); void hammer2_mount_unlock(hammer2_mount_t *hmp); -int hammer2_get_dtype(hammer2_chain_t *chain); -int hammer2_get_vtype(hammer2_chain_t *chain); +int hammer2_get_dtype(hammer2_inode_data_t *ipdata); +int hammer2_get_vtype(hammer2_inode_data_t *ipdata); u_int8_t hammer2_get_obj_type(enum vtype vtype); void hammer2_time_to_timespec(u_int64_t xtime, struct timespec *ts); u_int64_t hammer2_timespec_to_time(struct timespec *ts); @@ -743,8 +767,10 @@ int hammer2_getradix(size_t bytes); int hammer2_calc_logical(hammer2_inode_t *ip, hammer2_off_t uoff, hammer2_key_t *lbasep, hammer2_key_t *leofp); -int hammer2_calc_physical(hammer2_inode_t *ip, hammer2_key_t lbase); +int hammer2_calc_physical(hammer2_inode_t *ip, hammer2_inode_data_t *ipdata, + hammer2_key_t lbase); void hammer2_update_time(uint64_t *timep); +void hammer2_adjreadcounter(hammer2_blockref_t *bref, size_t bytes); /* * hammer2_inode.c @@ -756,39 +782,38 @@ void hammer2_inode_unlock_nlinks(hammer2_inode_t *ip); hammer2_inode_t *hammer2_inode_lookup(hammer2_pfsmount_t *pmp, hammer2_tid_t inum); hammer2_inode_t *hammer2_inode_get(hammer2_pfsmount_t *pmp, - hammer2_inode_t *dip, hammer2_chain_t *chain); + hammer2_inode_t *dip, hammer2_cluster_t *cluster); void hammer2_inode_free(hammer2_inode_t *ip); void hammer2_inode_ref(hammer2_inode_t *ip); void hammer2_inode_drop(hammer2_inode_t *ip); void hammer2_inode_repoint(hammer2_inode_t *ip, hammer2_inode_t *pip, - hammer2_chain_t *chain); + hammer2_cluster_t *cluster); void hammer2_run_unlinkq(hammer2_trans_t *trans, hammer2_pfsmount_t *pmp); hammer2_inode_t *hammer2_inode_create(hammer2_trans_t *trans, hammer2_inode_t *dip, struct vattr *vap, struct ucred *cred, const uint8_t *name, size_t name_len, - hammer2_chain_t **chainp, int *errorp); + hammer2_cluster_t **clusterp, int *errorp); int hammer2_inode_connect(hammer2_trans_t *trans, - hammer2_chain_t **chainp, int hlink, - hammer2_inode_t *dip, hammer2_chain_t **dchainp, + hammer2_cluster_t **clusterp, int hlink, + hammer2_inode_t *dip, hammer2_cluster_t *dcluster, const uint8_t *name, size_t name_len, hammer2_key_t key); hammer2_inode_t *hammer2_inode_common_parent(hammer2_inode_t *fdip, hammer2_inode_t *tdip); void hammer2_inode_fsync(hammer2_trans_t *trans, hammer2_inode_t *ip, - hammer2_chain_t **parentp); + hammer2_cluster_t *cparent); int hammer2_unlink_file(hammer2_trans_t *trans, hammer2_inode_t *dip, const uint8_t *name, size_t name_len, int isdir, int *hlinkp, struct nchandle *nch); int hammer2_hardlink_consolidate(hammer2_trans_t *trans, - hammer2_inode_t *ip, hammer2_chain_t **chainp, - hammer2_inode_t *cdip, hammer2_chain_t **cdchainp, + hammer2_inode_t *ip, hammer2_cluster_t **clusterp, + hammer2_inode_t *cdip, hammer2_cluster_t *cdcluster, int nlinks); int hammer2_hardlink_deconsolidate(hammer2_trans_t *trans, hammer2_inode_t *dip, hammer2_chain_t **chainp, hammer2_chain_t **ochainp); -int hammer2_hardlink_find(hammer2_inode_t *dip, - hammer2_chain_t **chainp, hammer2_chain_t **ochainp); +int hammer2_hardlink_find(hammer2_inode_t *dip, hammer2_cluster_t *cluster); void hammer2_inode_install_hidden(hammer2_pfsmount_t *pmp); /* @@ -804,17 +829,15 @@ void hammer2_chain_core_alloc(hammer2_trans_t *trans, hammer2_chain_t *nchain, void hammer2_chain_ref(hammer2_chain_t *chain); void hammer2_chain_drop(hammer2_chain_t *chain); int hammer2_chain_lock(hammer2_chain_t *chain, int how); -void hammer2_chain_load_async(hammer2_chain_t *chain, +void hammer2_chain_load_async(hammer2_cluster_t *cluster, void (*func)(hammer2_io_t *dio, + hammer2_cluster_t *cluster, hammer2_chain_t *chain, void *arg_p, off_t arg_o), - void *arg_p, off_t arg_o); + void *arg_p); void hammer2_chain_moved(hammer2_chain_t *chain); void hammer2_chain_modify(hammer2_trans_t *trans, hammer2_chain_t **chainp, int flags); -hammer2_inode_data_t *hammer2_chain_modify_ip(hammer2_trans_t *trans, - hammer2_inode_t *ip, hammer2_chain_t **chainp, - int flags); void hammer2_chain_resize(hammer2_trans_t *trans, hammer2_inode_t *ip, hammer2_chain_t *parent, hammer2_chain_t **chainp, @@ -828,7 +851,7 @@ void hammer2_chain_lookup_done(hammer2_chain_t *parent); hammer2_chain_t *hammer2_chain_lookup(hammer2_chain_t **parentp, hammer2_key_t *key_nextp, hammer2_key_t key_beg, hammer2_key_t key_end, - int *cache_indexp, int flags); + int *cache_indexp, int flags, int *ddflagp); hammer2_chain_t *hammer2_chain_next(hammer2_chain_t **parentp, hammer2_chain_t *chain, hammer2_key_t *key_nextp, @@ -856,13 +879,13 @@ void hammer2_chain_delete_duplicate(hammer2_trans_t *trans, void hammer2_flush(hammer2_trans_t *trans, hammer2_chain_t **chainp); void hammer2_chain_commit(hammer2_trans_t *trans, hammer2_chain_t *chain); void hammer2_chain_setsubmod(hammer2_trans_t *trans, hammer2_chain_t *chain); - -void hammer2_chain_memory_wait(hammer2_pfsmount_t *pmp); -void hammer2_chain_memory_inc(hammer2_pfsmount_t *pmp); -void hammer2_chain_memory_wakeup(hammer2_pfsmount_t *pmp); void hammer2_chain_countbrefs(hammer2_chain_t *chain, hammer2_blockref_t *base, int count); +void hammer2_pfs_memory_wait(hammer2_pfsmount_t *pmp); +void hammer2_pfs_memory_inc(hammer2_pfsmount_t *pmp); +void hammer2_pfs_memory_wakeup(hammer2_pfsmount_t *pmp); + int hammer2_base_find(hammer2_chain_t *chain, hammer2_blockref_t *base, int count, int *cache_indexp, hammer2_key_t *key_nextp, @@ -874,6 +897,7 @@ void hammer2_base_delete(hammer2_trans_t *trans, hammer2_chain_t *chain, void hammer2_base_insert(hammer2_trans_t *trans, hammer2_chain_t *chain, hammer2_blockref_t *base, int count, int *cache_indexp, hammer2_chain_t *child); +void hammer2_chain_refactor(hammer2_chain_t **chainp); /* * hammer2_trans.c @@ -906,8 +930,10 @@ int hammer2_io_bread(hammer2_mount_t *hmp, off_t lbase, int lsize, hammer2_io_t **diop); void hammer2_io_breadcb(hammer2_mount_t *hmp, off_t lbase, int lsize, void (*callback)(hammer2_io_t *dio, + hammer2_cluster_t *arg_l, hammer2_chain_t *arg_c, void *arg_p, off_t arg_o), + hammer2_cluster_t *arg_l, hammer2_chain_t *arg_c, void *arg_p, off_t arg_o); void hammer2_io_bawrite(hammer2_io_t **diop); @@ -946,6 +972,72 @@ int hammer2_freemap_alloc(hammer2_trans_t *trans, hammer2_chain_t *chain, void hammer2_freemap_adjust(hammer2_trans_t *trans, hammer2_mount_t *hmp, hammer2_blockref_t *bref, int how); +/* + * hammer2_cluster.c + */ +u_int hammer2_cluster_bytes(hammer2_cluster_t *cluster); +uint8_t hammer2_cluster_type(hammer2_cluster_t *cluster); +hammer2_media_data_t *hammer2_cluster_data(hammer2_cluster_t *cluster); +hammer2_cluster_t *hammer2_cluster_from_chain(hammer2_chain_t *chain); +int hammer2_cluster_modified(hammer2_cluster_t *cluster); +int hammer2_cluster_unlinked(hammer2_cluster_t *cluster); +int hammer2_cluster_duplicated(hammer2_cluster_t *cluster); +void hammer2_cluster_set_chainflags(hammer2_cluster_t *cluster, uint32_t flags); +void hammer2_cluster_bref(hammer2_cluster_t *cluster, hammer2_blockref_t *bref); +void hammer2_cluster_setsubmod(hammer2_trans_t *trans, + hammer2_cluster_t *cluster); +hammer2_cluster_t *hammer2_cluster_alloc(hammer2_pfsmount_t *pmp, + hammer2_trans_t *trans, + hammer2_blockref_t *bref); +void hammer2_cluster_core_alloc(hammer2_trans_t *trans, + hammer2_cluster_t *ncluster, + hammer2_cluster_t *ocluster); +void hammer2_cluster_ref(hammer2_cluster_t *cluster); +void hammer2_cluster_drop(hammer2_cluster_t *cluster); +void hammer2_cluster_wait(hammer2_cluster_t *cluster); +int hammer2_cluster_lock(hammer2_cluster_t *cluster, int how); +void hammer2_cluster_replace(hammer2_cluster_t *dst, hammer2_cluster_t *src); +void hammer2_cluster_replace_locked(hammer2_cluster_t *dst, + hammer2_cluster_t *src); +hammer2_cluster_t *hammer2_cluster_copy(hammer2_cluster_t *ocluster, + int with_chains); +void hammer2_cluster_refactor(hammer2_cluster_t *cluster); +void hammer2_cluster_unlock(hammer2_cluster_t *cluster); +void hammer2_cluster_resize(hammer2_trans_t *trans, hammer2_inode_t *ip, + hammer2_cluster_t *cparent, hammer2_cluster_t *cluster, + int nradix, int flags); +hammer2_inode_data_t *hammer2_cluster_modify_ip(hammer2_trans_t *trans, + hammer2_inode_t *ip, hammer2_cluster_t *cluster, + int flags); +void hammer2_cluster_modify(hammer2_trans_t *trans, hammer2_cluster_t *cluster, + int flags); +hammer2_cluster_t *hammer2_cluster_lookup_init(hammer2_cluster_t *cparent, + int flags); +void hammer2_cluster_lookup_done(hammer2_cluster_t *cparent); +hammer2_cluster_t *hammer2_cluster_lookup(hammer2_cluster_t *cparent, + hammer2_key_t *key_nextp, + hammer2_key_t key_beg, hammer2_key_t key_end, + int flags, int *ddflagp); +hammer2_cluster_t *hammer2_cluster_next(hammer2_cluster_t *cparent, + hammer2_cluster_t *cluster, + hammer2_key_t *key_nextp, + hammer2_key_t key_beg, hammer2_key_t key_end, + int flags); +hammer2_cluster_t *hammer2_cluster_scan(hammer2_cluster_t *cparent, + hammer2_cluster_t *cluster, int flags); +int hammer2_cluster_create(hammer2_trans_t *trans, hammer2_cluster_t *cparent, + hammer2_cluster_t **clusterp, + hammer2_key_t key, int keybits, int type, size_t bytes); +void hammer2_cluster_duplicate(hammer2_trans_t *trans, + hammer2_cluster_t *cparent, hammer2_cluster_t *cluster, + hammer2_blockref_t *bref, + int snapshot, int duplicate_reason); +void hammer2_cluster_delete_duplicate(hammer2_trans_t *trans, + hammer2_cluster_t *cluster, int flags); +void hammer2_cluster_delete(hammer2_trans_t *trans, hammer2_cluster_t *cluster, + int flags); +int hammer2_cluster_snapshot(hammer2_trans_t *trans, + hammer2_cluster_t *ocluster, hammer2_ioc_pfs_t *pfs); #endif /* !_KERNEL */ #endif /* !_VFS_HAMMER2_HAMMER2_H_ */ diff --git a/sys/vfs/hammer2/hammer2_chain.c b/sys/vfs/hammer2/hammer2_chain.c index 0f232da52a..b031dd6312 100644 --- a/sys/vfs/hammer2/hammer2_chain.c +++ b/sys/vfs/hammer2/hammer2_chain.c @@ -86,7 +86,6 @@ static hammer2_chain_t *hammer2_chain_create_indirect( hammer2_trans_t *trans, hammer2_chain_t *parent, hammer2_key_t key, int keybits, int for_type, int *errorp); static void hammer2_chain_drop_data(hammer2_chain_t *chain, int lastdrop); -static void adjreadcounter(hammer2_blockref_t *bref, size_t bytes); static hammer2_chain_t *hammer2_combined_find( hammer2_chain_t *parent, hammer2_blockref_t *base, int count, @@ -896,7 +895,7 @@ hammer2_chain_lock(hammer2_chain_t *chain, int how) } else { error = hammer2_io_bread(hmp, bref->data_off, chain->bytes, &chain->dio); - adjreadcounter(&chain->bref, chain->bytes); + hammer2_adjreadcounter(&chain->bref, chain->bytes); } if (error) { @@ -965,19 +964,37 @@ hammer2_chain_lock(hammer2_chain_t *chain, int how) * of the chain first to handle certain cases. */ void -hammer2_chain_load_async(hammer2_chain_t *chain, +hammer2_chain_load_async(hammer2_cluster_t *cluster, void (*callback)(hammer2_io_t *dio, + hammer2_cluster_t *cluster, hammer2_chain_t *chain, void *arg_p, off_t arg_o), - void *arg_p, off_t arg_o) + void *arg_p) { + hammer2_chain_t *chain; hammer2_mount_t *hmp; struct hammer2_io *dio; hammer2_blockref_t *bref; int error; + int i; + + /* + * If no chain specified see if any chain data is available and use + * that, otherwise begin an I/O iteration using the first chain. + */ + chain = NULL; + for (i = 0; i < cluster->nchains; ++i) { + chain = cluster->array[i]; + if (chain->data) + break; + } + if (i == cluster->nchains) { + chain = cluster->array[0]; + i = 0; + } if (chain->data) { - callback(NULL, chain, arg_p, arg_o); + callback(NULL, cluster, chain, arg_p, (off_t)i); return; } @@ -1005,16 +1022,16 @@ hammer2_chain_load_async(hammer2_chain_t *chain, chain->bytes == hammer2_devblksize(chain->bytes)) { error = hammer2_io_new(hmp, bref->data_off, chain->bytes, &dio); KKASSERT(error == 0); - callback(dio, chain, arg_p, arg_o); + callback(dio, cluster, chain, arg_p, (off_t)i); return; } /* * Otherwise issue a read */ - adjreadcounter(&chain->bref, chain->bytes); + hammer2_adjreadcounter(&chain->bref, chain->bytes); hammer2_io_breadcb(hmp, bref->data_off, chain->bytes, - callback, chain, arg_p, arg_o); + callback, cluster, chain, arg_p, (off_t)i); } /* @@ -1291,7 +1308,11 @@ hammer2_chain_resize(hammer2_trans_t *trans, hammer2_inode_t *ip, *chainp = chain; } +#if 0 + /* + * REMOVED - see cluster code + * * Set a chain modified, making it read-write and duplicating it if necessary. * This function will assign a new physical block to the chain if necessary * @@ -1322,6 +1343,8 @@ hammer2_chain_modify_ip(hammer2_trans_t *trans, hammer2_inode_t *ip, return(&ip->chain->data->ipdata); } +#endif + void hammer2_chain_modify(hammer2_trans_t *trans, hammer2_chain_t **chainp, int flags) @@ -1387,7 +1410,7 @@ hammer2_chain_modify(hammer2_trans_t *trans, hammer2_chain_t **chainp, if ((chain->flags & HAMMER2_CHAIN_MODIFIED) == 0) { atomic_set_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); hammer2_chain_ref(chain); - hammer2_chain_memory_inc(chain->pmp); + hammer2_pfs_memory_inc(chain->pmp); } if ((chain->flags & HAMMER2_CHAIN_FLUSH_CREATE) == 0) { atomic_set_int(&chain->flags, HAMMER2_CHAIN_FLUSH_CREATE); @@ -1494,7 +1517,7 @@ hammer2_chain_modify(hammer2_trans_t *trans, hammer2_chain_t **chainp, error = hammer2_io_bread(hmp, chain->bref.data_off, chain->bytes, &dio); } - adjreadcounter(&chain->bref, chain->bytes); + hammer2_adjreadcounter(&chain->bref, chain->bytes); KKASSERT(error == 0); bdata = hammer2_io_data(dio, chain->bref.data_off); @@ -1908,7 +1931,7 @@ hammer2_chain_getparent(hammer2_chain_t **parentp, int how) hammer2_chain_t * hammer2_chain_lookup(hammer2_chain_t **parentp, hammer2_key_t *key_nextp, hammer2_key_t key_beg, hammer2_key_t key_end, - int *cache_indexp, int flags) + int *cache_indexp, int flags, int *ddflagp) { hammer2_mount_t *hmp; hammer2_chain_t *parent; @@ -1927,6 +1950,7 @@ hammer2_chain_lookup(hammer2_chain_t **parentp, hammer2_key_t *key_nextp, int maxloops = 300000; int wasdup; + *ddflagp = 0; if (flags & HAMMER2_LOOKUP_ALWAYS) { how_maybe = how_always; how = HAMMER2_RESOLVE_ALWAYS; @@ -1984,6 +2008,7 @@ again: else hammer2_chain_lock(parent, how_always); *key_nextp = key_end + 1; + *ddflagp = 1; return (parent); } base = &parent->data->ipdata.u.blockset.blockref[0]; @@ -2192,6 +2217,7 @@ hammer2_chain_next(hammer2_chain_t **parentp, hammer2_chain_t *chain, { hammer2_chain_t *parent; int how_maybe; + int ddflag; /* * Calculate locking flags for upward recursion. @@ -2245,7 +2271,7 @@ hammer2_chain_next(hammer2_chain_t **parentp, hammer2_chain_t *chain, */ return (hammer2_chain_lookup(parentp, key_nextp, key_beg, key_end, - cache_indexp, flags)); + cache_indexp, flags, &ddflag)); } /* @@ -3186,74 +3212,6 @@ hammer2_chain_delete_duplicate(hammer2_trans_t *trans, hammer2_chain_t **chainp, *chainp = nchain; } -/* - * Create a snapshot of the specified {parent, ochain} with the specified - * label. The originating hammer2_inode must be exclusively locked for - * safety. - * - * The ioctl code has already synced the filesystem. - */ -int -hammer2_chain_snapshot(hammer2_trans_t *trans, hammer2_chain_t **ochainp, - hammer2_ioc_pfs_t *pfs) -{ - hammer2_mount_t *hmp; - hammer2_chain_t *ochain = *ochainp; - hammer2_chain_t *nchain; - hammer2_inode_data_t *ipdata; - hammer2_inode_t *nip; - size_t name_len; - hammer2_key_t lhc; - struct vattr vat; - uuid_t opfs_clid; - int error; - - kprintf("snapshot %s ochain->refs %d ochain->flags %08x\n", - pfs->name, ochain->refs, ochain->flags); - - name_len = strlen(pfs->name); - lhc = hammer2_dirhash(pfs->name, name_len); - - hmp = ochain->hmp; - opfs_clid = ochain->data->ipdata.pfs_clid; - - *ochainp = ochain; - - /* - * Create the snapshot directory under the super-root - * - * Set PFS type, generate a unique filesystem id, and generate - * a cluster id. Use the same clid when snapshotting a PFS root, - * which theoretically allows the snapshot to be used as part of - * the same cluster (perhaps as a cache). - * - * Copy the (flushed) ochain's blockref array. Theoretically we - * could use chain_duplicate() but it becomes difficult to disentangle - * the shared core so for now just brute-force it. - */ - VATTR_NULL(&vat); - vat.va_type = VDIR; - vat.va_mode = 0755; - nchain = NULL; - nip = hammer2_inode_create(trans, hmp->sroot, &vat, proc0.p_ucred, - pfs->name, name_len, &nchain, &error); - - if (nip) { - ipdata = hammer2_chain_modify_ip(trans, nip, &nchain, 0); - ipdata->pfs_type = HAMMER2_PFSTYPE_SNAPSHOT; - kern_uuidgen(&ipdata->pfs_fsid, 1); - if (ochain->flags & HAMMER2_CHAIN_PFSROOT) - ipdata->pfs_clid = opfs_clid; - else - kern_uuidgen(&ipdata->pfs_clid, 1); - atomic_set_int(&nchain->flags, HAMMER2_CHAIN_PFSROOT); - ipdata->u.blockset = ochain->data->ipdata.u.blockset; - - hammer2_inode_unlock_ex(nip, nchain); - } - return (error); -} - /* * Create an indirect block that covers one or more of the elements in the * current parent. Either returns the existing parent with no locking or @@ -4485,119 +4443,27 @@ hammer2_chain_wait(hammer2_chain_t *chain) } /* - * Manage excessive memory resource use for chain and related - * structures. + * chain may have been moved around by the create. */ void -hammer2_chain_memory_wait(hammer2_pfsmount_t *pmp) -{ - long waiting; - long count; - long limit; -#if 0 - static int zzticks; -#endif - - /* - * Atomic check condition and wait. Also do an early speedup of - * the syncer to try to avoid hitting the wait. - */ - for (;;) { - waiting = pmp->inmem_dirty_chains; - cpu_ccfence(); - count = waiting & HAMMER2_DIRTYCHAIN_MASK; - - limit = pmp->mp->mnt_nvnodelistsize / 10; - if (limit < hammer2_limit_dirty_chains) - limit = hammer2_limit_dirty_chains; - if (limit < 1000) - limit = 1000; - -#if 0 - if ((int)(ticks - zzticks) > hz) { - zzticks = ticks; - kprintf("count %ld %ld\n", count, limit); - } -#endif - - /* - * Block if there are too many dirty chains present, wait - * for the flush to clean some out. - */ - if (count > limit) { - tsleep_interlock(&pmp->inmem_dirty_chains, 0); - if (atomic_cmpset_long(&pmp->inmem_dirty_chains, - waiting, - waiting | HAMMER2_DIRTYCHAIN_WAITING)) { - speedup_syncer(pmp->mp); - tsleep(&pmp->inmem_dirty_chains, PINTERLOCKED, - "chnmem", hz); - } - continue; /* loop on success or fail */ - } - - /* - * Try to start an early flush before we are forced to block. - */ - if (count > limit * 7 / 10) - speedup_syncer(pmp->mp); - break; - } -} - -void -hammer2_chain_memory_inc(hammer2_pfsmount_t *pmp) -{ - if (pmp) - atomic_add_long(&pmp->inmem_dirty_chains, 1); -} - -void -hammer2_chain_memory_wakeup(hammer2_pfsmount_t *pmp) +hammer2_chain_refactor(hammer2_chain_t **chainp) { - long waiting; - - if (pmp == NULL) - return; - - for (;;) { - waiting = pmp->inmem_dirty_chains; - cpu_ccfence(); - if (atomic_cmpset_long(&pmp->inmem_dirty_chains, - waiting, - (waiting - 1) & - ~HAMMER2_DIRTYCHAIN_WAITING)) { - break; - } - } - - if (waiting & HAMMER2_DIRTYCHAIN_WAITING) - wakeup(&pmp->inmem_dirty_chains); -} + hammer2_chain_t *chain = *chainp; + hammer2_chain_core_t *core; -static -void -adjreadcounter(hammer2_blockref_t *bref, size_t bytes) -{ - long *counterp; + core = chain->core; + while (chain->flags & HAMMER2_CHAIN_DUPLICATED) { + spin_lock(&core->cst.spin); + chain = TAILQ_NEXT(chain, core_entry); + while (chain->flags & HAMMER2_CHAIN_DUPLICATED) + chain = TAILQ_NEXT(chain, core_entry); + hammer2_chain_ref(chain); + spin_unlock(&core->cst.spin); + KKASSERT(chain->core == core); - switch(bref->type) { - case HAMMER2_BREF_TYPE_DATA: - counterp = &hammer2_iod_file_read; - break; - case HAMMER2_BREF_TYPE_INODE: - counterp = &hammer2_iod_meta_read; - break; - case HAMMER2_BREF_TYPE_INDIRECT: - counterp = &hammer2_iod_indr_read; - break; - case HAMMER2_BREF_TYPE_FREEMAP_NODE: - case HAMMER2_BREF_TYPE_FREEMAP_LEAF: - counterp = &hammer2_iod_fmap_read; - break; - default: - counterp = &hammer2_iod_volu_read; - break; + hammer2_chain_unlock(*chainp); + hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS | + HAMMER2_RESOLVE_NOREF); /* eat ref */ + *chainp = chain; } - *counterp += bytes; } diff --git a/sys/vfs/hammer2/hammer2_cluster.c b/sys/vfs/hammer2/hammer2_cluster.c new file mode 100644 index 0000000000..ad5a8fca24 --- /dev/null +++ b/sys/vfs/hammer2/hammer2_cluster.c @@ -0,0 +1,784 @@ +/* + * Copyright (c) 2013-2014 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Matthew Dillon + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +/* + * The cluster module collects multiple chains representing the same + * information into a single entity. It allows direct access to media + * data as long as it is not blockref array data. Meaning, basically, + * just inode and file data. + * + * This module also handles I/O dispatch, status rollup, and various + * mastership arrangements including quorum operations. It effectively + * presents one topology to the vnops layer. + * + * Many of the API calls mimic chain API calls but operate on clusters + * instead of chains. Please see hammer2_chain.c for more complete code + * documentation of the API functions. + */ +#include +#include +#include +#include +#include +#include + +#include "hammer2.h" + +u_int +hammer2_cluster_bytes(hammer2_cluster_t *cluster) +{ + return(cluster->focus->bytes); +} + +uint8_t +hammer2_cluster_type(hammer2_cluster_t *cluster) +{ + return(cluster->focus->bref.type); +} + +hammer2_media_data_t * +hammer2_cluster_data(hammer2_cluster_t *cluster) +{ + return(cluster->focus->data); +} + +int +hammer2_cluster_modified(hammer2_cluster_t *cluster) +{ + return((cluster->focus->flags & HAMMER2_CHAIN_MODIFIED) != 0); +} + +int +hammer2_cluster_unlinked(hammer2_cluster_t *cluster) +{ + return((cluster->focus->flags & HAMMER2_CHAIN_UNLINKED) != 0); +} + +void +hammer2_cluster_bref(hammer2_cluster_t *cluster, hammer2_blockref_t *bref) +{ + *bref = cluster->focus->bref; + bref->data_off = 0; /* should be opaque to caller */ +} + +void +hammer2_cluster_set_chainflags(hammer2_cluster_t *cluster, uint32_t flags) +{ + int i; + + for (i = 0; i < cluster->nchains; ++i) + atomic_set_int(&cluster->array[i]->flags, flags); +} + +void +hammer2_cluster_setsubmod(hammer2_trans_t *trans, hammer2_cluster_t *cluster) +{ + int i; + + for (i = 0; i < cluster->nchains; ++i) + hammer2_chain_setsubmod(trans, cluster->array[i]); +} + +/* + * Allocates a cluster and its underlying chain structures. The underlying + * chains will be locked. The cluster and underlying chains will have one + * ref. + */ +hammer2_cluster_t * +hammer2_cluster_alloc(hammer2_pfsmount_t *pmp, + hammer2_trans_t *trans, hammer2_blockref_t *bref) +{ + hammer2_cluster_t *cluster; + hammer2_chain_t *chain; + u_int bytes = 1U << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX); + int i; + + KKASSERT(pmp != NULL); + + /* + * Construct the appropriate system structure. + */ + switch(bref->type) { + case HAMMER2_BREF_TYPE_INODE: + case HAMMER2_BREF_TYPE_INDIRECT: + case HAMMER2_BREF_TYPE_FREEMAP_NODE: + case HAMMER2_BREF_TYPE_DATA: + case HAMMER2_BREF_TYPE_FREEMAP_LEAF: + /* + * Chain's are really only associated with the hmp but we + * maintain a pmp association for per-mount memory tracking + * purposes. The pmp can be NULL. + */ + break; + case HAMMER2_BREF_TYPE_VOLUME: + case HAMMER2_BREF_TYPE_FREEMAP: + chain = NULL; + panic("hammer2_cluster_alloc volume type illegal for op"); + default: + chain = NULL; + panic("hammer2_cluster_alloc: unrecognized blockref type: %d", + bref->type); + } + + cluster = kmalloc(sizeof(*cluster), M_HAMMER2, M_WAITOK | M_ZERO); + cluster->refs = 1; + + for (i = 0; i < pmp->cluster.nchains; ++i) { + chain = hammer2_chain_alloc(pmp->cluster.array[i]->hmp, pmp, + trans, bref); + chain->pmp = pmp; + chain->hmp = pmp->cluster.array[i]->hmp; + chain->bref = *bref; + chain->bytes = bytes; + chain->refs = 1; + chain->flags = HAMMER2_CHAIN_ALLOCATED; + chain->delete_tid = HAMMER2_MAX_TID; + + /* + * Set modify_tid if a transaction is creating the inode. + * Enforce update_lo = 0 so nearby transactions do not think + * it has been flushed when it hasn't. + * + * NOTE: When loading a chain from backing store or creating a + * snapshot, trans will be NULL and the caller is + * responsible for setting these fields. + */ + if (trans) { + chain->modify_tid = trans->sync_tid; + chain->update_lo = 0; + } + cluster->array[i] = chain; + } + cluster->nchains = i; + cluster->pmp = pmp; + cluster->focus = cluster->array[0]; + + return (cluster); +} + +/* + * Associate an existing core with the chain or allocate a new core. + * + * The core is not locked. No additional refs on the chain are made. + * (trans) must not be NULL if (core) is not NULL. + * + * When chains are delete-duplicated during flushes we insert nchain on + * the ownerq after ochain instead of at the end in order to give the + * drop code visibility in the correct order, otherwise drops can be missed. + */ +void +hammer2_cluster_core_alloc(hammer2_trans_t *trans, + hammer2_cluster_t *ncluster, + hammer2_cluster_t *ocluster) +{ + int i; + + for (i = 0; i < ocluster->nchains; ++i) { + hammer2_chain_core_alloc(trans, + ncluster->array[i], + ocluster->array[i]); + } +} + +/* + * Add a reference to a cluster. + * + * We must also ref the underlying chains in order to allow ref/unlock + * sequences to later re-lock. + */ +void +hammer2_cluster_ref(hammer2_cluster_t *cluster) +{ + int i; + + atomic_add_int(&cluster->refs, 1); + for (i = 0; i < cluster->nchains; ++i) { + hammer2_chain_ref(cluster->array[i]); + } +} + +/* + * Drop the caller's reference to the cluster. When the ref count drops to + * zero this function frees the cluster and drops all underlying chains. + */ +void +hammer2_cluster_drop(hammer2_cluster_t *cluster) +{ + int i; + + for (i = 0; i < cluster->nchains; ++i) { + hammer2_chain_drop(cluster->array[i]); + if (cluster->refs == 1) + cluster->array[i] = NULL; + } + if (atomic_fetchadd_int(&cluster->refs, -1) != 1) { + KKASSERT(cluster->refs > 0); + return; + } + kfree(cluster, M_HAMMER2); +} + +void +hammer2_cluster_wait(hammer2_cluster_t *cluster) +{ + tsleep(cluster->focus, 0, "h2clcw", 1); +} + +/* + * Lock and ref a cluster. This adds a ref to the cluster and its chains + * and then locks them. + */ +int +hammer2_cluster_lock(hammer2_cluster_t *cluster, int how) +{ + int i; + int error; + + error = 0; + atomic_add_int(&cluster->refs, 1); + for (i = 0; i < cluster->nchains; ++i) { + error = hammer2_chain_lock(cluster->array[i], how); + if (error) { + while (--i >= 0) + hammer2_chain_unlock(cluster->array[i]); + atomic_add_int(&cluster->refs, -1); + break; + } + } + return error; +} + +/* + * Replace the contents of dst with src, adding a reference to src's chains. + * dst is assumed to already have a ref and any chains present in dst are + * assumed to be locked and will be unlocked. + * + * If the chains in src are locked, only one of (src) or (dst) should be + * considered locked by the caller after return, not both. + */ +void +hammer2_cluster_replace(hammer2_cluster_t *dst, hammer2_cluster_t *src) +{ + int i; + + KKASSERT(dst->refs == 1); + + for (i = 0; i < src->nchains; ++i) { + hammer2_chain_ref(src->array[i]); + if (i < dst->nchains) + hammer2_chain_unlock(dst->array[i]); + dst->array[i] = src->array[i]; + } + while (i < dst->nchains) { + hammer2_chain_unlock(dst->array[i]); + dst->array[i] = NULL; + ++i; + } + dst->nchains = src->nchains; + dst->focus = src->focus; +} + +/* + * Replace the contents of the locked destination with the contents of the + * locked source. Destination must have one ref. + * + * Returns with the destination still with one ref and the copied chains + * with an additional lock (representing their state on the destination). + * The original chains associated with the destination are unlocked. + */ +void +hammer2_cluster_replace_locked(hammer2_cluster_t *dst, hammer2_cluster_t *src) +{ + int i; + + KKASSERT(dst->refs == 1); + + for (i = 0; i < src->nchains; ++i) { + hammer2_chain_lock(src->array[i], 0); + if (i < dst->nchains) + hammer2_chain_unlock(dst->array[i]); + dst->array[i] = src->array[i]; + } + while (i < dst->nchains) { + hammer2_chain_unlock(dst->array[i]); + dst->array[i] = NULL; + ++i; + } + dst->nchains = src->nchains; + dst->focus = src->focus; +} + +/* + * Copy a cluster, returned a ref'd cluster. All underlying chains + * are also ref'd, but not locked. + * + * If with_chains is 0 the returned cluster has a ref count of 1 but + * no chains will be assigned. + */ +hammer2_cluster_t * +hammer2_cluster_copy(hammer2_cluster_t *ocluster, int with_chains) +{ + hammer2_pfsmount_t *pmp = ocluster->pmp; + hammer2_cluster_t *ncluster; + int i; + + ncluster = kmalloc(sizeof(*ncluster), M_HAMMER2, M_WAITOK | M_ZERO); + ncluster->pmp = pmp; + ncluster->nchains = ocluster->nchains; + ncluster->focus = ocluster->focus; + if (with_chains) { + ncluster->refs = 1; + for (i = 0; i < ocluster->nchains; ++i) { + ncluster->array[i] = ocluster->array[i]; + hammer2_chain_ref(ncluster->array[i]); + } + } + return (ncluster); +} + +/* + * Unlock and deref a cluster. The cluster is destroyed if this is the + * last ref. + */ +void +hammer2_cluster_unlock(hammer2_cluster_t *cluster) +{ + int i; + + for (i = 0; i < cluster->nchains; ++i) + hammer2_chain_unlock(cluster->array[i]); + if (atomic_fetchadd_int(&cluster->refs, -1) == 1) { + for (i = 0; i < cluster->nchains; ++i) /* safety */ + cluster->array[i] = NULL; + kfree(cluster, M_HAMMER2); + return; + } + KKASSERT(cluster->refs > 0); +} + +/* + * Refactor the chains of a locked cluster + */ +void +hammer2_cluster_refactor(hammer2_cluster_t *cluster) +{ + int i; + + for (i = 0; i < cluster->nchains; ++i) { + hammer2_chain_refactor(&cluster->array[i]); + } + cluster->focus = cluster->array[0]; +} + +/* + * Resize the cluster's physical storage allocation in-place. This may + * replace the cluster's chains. + */ +void +hammer2_cluster_resize(hammer2_trans_t *trans, hammer2_inode_t *ip, + hammer2_cluster_t *cparent, hammer2_cluster_t *cluster, + int nradix, int flags) +{ + int i; + + KKASSERT(cparent->pmp == cluster->pmp); /* can be NULL */ + KKASSERT(cparent->nchains == cluster->nchains); + + for (i = 0; i < cluster->nchains; ++i) { + hammer2_chain_resize(trans, ip, + cparent->array[i], &cluster->array[i], + nradix, flags); + } + cluster->focus = cluster->array[0]; +} + +/* + * Set an inode's cluster modified, marking the related chains RW and + * duplicating them if necessary. + * + * The passed-in chain is a localized copy of the chain previously acquired + * when the inode was locked (and possilby replaced in the mean time), and + * must also be updated. In fact, we update it first and then synchronize + * the inode's cluster cache. + */ +hammer2_inode_data_t * +hammer2_cluster_modify_ip(hammer2_trans_t *trans, hammer2_inode_t *ip, + hammer2_cluster_t *cluster, int flags) +{ + atomic_set_int(&ip->flags, HAMMER2_INODE_MODIFIED); + hammer2_cluster_modify(trans, cluster, flags); + + hammer2_inode_repoint(ip, NULL, cluster); + if (ip->vp) + vsetisdirty(ip->vp); + return (&hammer2_cluster_data(cluster)->ipdata); +} + +/* + * Adjust the cluster's chains to allow modification. + */ +void +hammer2_cluster_modify(hammer2_trans_t *trans, hammer2_cluster_t *cluster, + int flags) +{ + int i; + + for (i = 0; i < cluster->nchains; ++i) + hammer2_chain_modify(trans, &cluster->array[i], flags); + cluster->focus = cluster->array[0]; +} + +/* + * Lookup initialization/completion API + */ +hammer2_cluster_t * +hammer2_cluster_lookup_init(hammer2_cluster_t *cparent, int flags) +{ + hammer2_cluster_t *cluster; + int i; + + cluster = kmalloc(sizeof(*cluster), M_HAMMER2, M_WAITOK | M_ZERO); + cluster->pmp = cparent->pmp; /* can be NULL */ + for (i = 0; i < cparent->nchains; ++i) + cluster->array[i] = cparent->array[i]; + cluster->nchains = cparent->nchains; + cluster->focus = cluster->array[0]; + + /* + * Independently lock (this will also give cluster 1 ref) + */ + if (flags & HAMMER2_LOOKUP_SHARED) { + hammer2_cluster_lock(cluster, HAMMER2_RESOLVE_ALWAYS | + HAMMER2_RESOLVE_SHARED); + } else { + hammer2_cluster_lock(cluster, HAMMER2_RESOLVE_ALWAYS); + } + return (cluster); +} + +void +hammer2_cluster_lookup_done(hammer2_cluster_t *cparent) +{ + if (cparent) + hammer2_cluster_unlock(cparent); +} + +/* + * Locate first match or overlap under parent, return a new cluster + */ +hammer2_cluster_t * +hammer2_cluster_lookup(hammer2_cluster_t *cparent, hammer2_key_t *key_nextp, + hammer2_key_t key_beg, hammer2_key_t key_end, + int flags, int *ddflagp) +{ + hammer2_pfsmount_t *pmp; + hammer2_cluster_t *cluster; + hammer2_chain_t *chain; + hammer2_key_t key_accum; + hammer2_key_t key_next; + int null_count; + int ddflag; + int i; + uint8_t bref_type; + u_int bytes; + + pmp = cparent->pmp; /* can be NULL */ + key_accum = *key_nextp; + null_count = 0; + bref_type = 0; + bytes = 0; + + cluster = kmalloc(sizeof(*cluster), M_HAMMER2, M_WAITOK | M_ZERO); + cluster->pmp = pmp; /* can be NULL */ + cluster->refs = 1; + *ddflagp = 0; + + for (i = 0; i < cparent->nchains; ++i) { + key_next = *key_nextp; + chain = hammer2_chain_lookup(&cparent->array[i], &key_next, + key_beg, key_end, + &cparent->cache_index[i], + flags, &ddflag); + cluster->array[i] = chain; + if (chain == NULL) { + ++null_count; + } else { + if (bref_type == 0) + bref_type = chain->bref.type; + KKASSERT(bref_type == chain->bref.type); + if (bytes == 0) + bytes = chain->bytes; + KKASSERT(bytes == chain->bytes); + } + if (key_accum > key_next) + key_accum = key_next; + KKASSERT(i == 0 || *ddflagp == ddflag); + *ddflagp = ddflag; + } + *key_nextp = key_accum; + cluster->nchains = i; + cluster->focus = cluster->array[0]; + + if (null_count == i) { + hammer2_cluster_drop(cluster); + cluster = NULL; + } + + return (cluster); +} + +/* + * Locate next match or overlap under parent, replace cluster + */ +hammer2_cluster_t * +hammer2_cluster_next(hammer2_cluster_t *cparent, hammer2_cluster_t *cluster, + hammer2_key_t *key_nextp, + hammer2_key_t key_beg, hammer2_key_t key_end, int flags) +{ + hammer2_chain_t *chain; + hammer2_key_t key_accum; + hammer2_key_t key_next; + int null_count; + int i; + + key_accum = *key_nextp; + null_count = 0; + + for (i = 0; i < cparent->nchains; ++i) { + key_next = *key_nextp; + chain = hammer2_chain_next(&cparent->array[i], + cluster->array[i], + &key_next, + key_beg, key_end, + &cparent->cache_index[i], flags); + cluster->array[i] = chain; + if (chain == NULL) + ++null_count; + if (key_accum > key_next) + key_accum = key_next; + } + + if (null_count == i) { + hammer2_cluster_drop(cluster); + cluster = NULL; + } else { + cluster->focus = cluster->array[0]; + } + return(cluster); +} + +/* + * The raw scan function is similar to lookup/next but does not seek to a key. + * Blockrefs are iterated via first_chain = (parent, NULL) and + * next_chain = (parent, chain). + * + * The passed-in parent must be locked and its data resolved. The returned + * chain will be locked. Pass chain == NULL to acquire the first sub-chain + * under parent and then iterate with the passed-in chain (which this + * function will unlock). + */ +hammer2_cluster_t * +hammer2_cluster_scan(hammer2_cluster_t *cparent, hammer2_cluster_t *cluster, + int flags) +{ + hammer2_chain_t *chain; + int null_count; + int i; + + null_count = 0; + + for (i = 0; i < cparent->nchains; ++i) { + chain = hammer2_chain_scan(cparent->array[i], + cluster->array[i], + &cparent->cache_index[i], flags); + cluster->array[i] = chain; + if (chain == NULL) + ++null_count; + } + + if (null_count == i) { + hammer2_cluster_drop(cluster); + cluster = NULL; + } + return(cluster); +} + +/* + * Create a new cluster using the specified key + */ +int +hammer2_cluster_create(hammer2_trans_t *trans, hammer2_cluster_t *cparent, + hammer2_cluster_t **clusterp, + hammer2_key_t key, int keybits, int type, size_t bytes) +{ + hammer2_cluster_t *cluster; + hammer2_chain_t *chain; + hammer2_pfsmount_t *pmp; + int error; + int i; + + pmp = trans->pmp; /* can be NULL */ + + if ((cluster = *clusterp) == NULL) { + cluster = kmalloc(sizeof(*cluster), M_HAMMER2, + M_WAITOK | M_ZERO); + cluster->pmp = pmp; /* can be NULL */ + cluster->refs = 1; + } + for (i = 0; i < cparent->nchains; ++i) { + chain = cluster->array[i]; + error = hammer2_chain_create(trans, &cparent->array[i], &chain, + key, keybits, type, bytes); + KKASSERT(error == 0); + cluster->array[i] = chain; + } + cluster->focus = cluster->array[0]; + *clusterp = cluster; + + return error; +} + +/* + * Duplicate a cluster under a new parent + */ +void +hammer2_cluster_duplicate(hammer2_trans_t *trans, hammer2_cluster_t *cparent, + hammer2_cluster_t *cluster, hammer2_blockref_t *bref, + int snapshot, int duplicate_reason) +{ + int i; + + for (i = 0; i < cluster->nchains; ++i) { + hammer2_chain_duplicate(trans, &cparent->array[i], + &cluster->array[i], bref, + snapshot, duplicate_reason); + } + cluster->focus = cluster->array[0]; +} + +/* + * Delete-duplicate a cluster in-place. + */ +void +hammer2_cluster_delete_duplicate(hammer2_trans_t *trans, + hammer2_cluster_t *cluster, int flags) +{ + int i; + + for (i = 0; i < cluster->nchains; ++i) { + hammer2_chain_delete_duplicate(trans, &cluster->array[i], + flags); + } + cluster->focus = cluster->array[0]; +} + +/* + * Mark a cluster deleted + */ +void +hammer2_cluster_delete(hammer2_trans_t *trans, hammer2_cluster_t *cluster, + int flags) +{ + int i; + + for (i = 0; i < cluster->nchains; ++i) { + hammer2_chain_delete(trans, cluster->array[i], flags); + } +} + +/* + * Create a snapshot of the specified {parent, ochain} with the specified + * label. The originating hammer2_inode must be exclusively locked for + * safety. + * + * The ioctl code has already synced the filesystem. + */ +int +hammer2_cluster_snapshot(hammer2_trans_t *trans, hammer2_cluster_t *ocluster, + hammer2_ioc_pfs_t *pfs) +{ + hammer2_mount_t *hmp; + hammer2_cluster_t *ncluster; + hammer2_inode_data_t *ipdata; + hammer2_inode_t *nip; + size_t name_len; + hammer2_key_t lhc; + struct vattr vat; + uuid_t opfs_clid; + int error; + + kprintf("snapshot %s\n", pfs->name); + + name_len = strlen(pfs->name); + lhc = hammer2_dirhash(pfs->name, name_len); + + ipdata = &hammer2_cluster_data(ocluster)->ipdata; + opfs_clid = ipdata->pfs_clid; + hmp = ocluster->focus->hmp; + + /* + * Create the snapshot directory under the super-root + * + * Set PFS type, generate a unique filesystem id, and generate + * a cluster id. Use the same clid when snapshotting a PFS root, + * which theoretically allows the snapshot to be used as part of + * the same cluster (perhaps as a cache). + * + * Copy the (flushed) blockref array. Theoretically we could use + * chain_duplicate() but it becomes difficult to disentangle + * the shared core so for now just brute-force it. + */ + VATTR_NULL(&vat); + vat.va_type = VDIR; + vat.va_mode = 0755; + ncluster = NULL; + nip = hammer2_inode_create(trans, hmp->sroot, &vat, proc0.p_ucred, + pfs->name, name_len, &ncluster, &error); + + if (nip) { + ipdata = hammer2_cluster_modify_ip(trans, nip, ncluster, 0); + ipdata->pfs_type = HAMMER2_PFSTYPE_SNAPSHOT; + kern_uuidgen(&ipdata->pfs_fsid, 1); + if (ocluster->focus->flags & HAMMER2_CHAIN_PFSROOT) + ipdata->pfs_clid = opfs_clid; + else + kern_uuidgen(&ipdata->pfs_clid, 1); + hammer2_cluster_set_chainflags(ncluster, HAMMER2_CHAIN_PFSROOT); + + /* XXX hack blockset copy */ + ipdata->u.blockset = ocluster->focus->data->ipdata.u.blockset; + + hammer2_inode_unlock_ex(nip, ncluster); + } + return (error); +} diff --git a/sys/vfs/hammer2/hammer2_flush.c b/sys/vfs/hammer2/hammer2_flush.c index 887863bc98..f812b986ac 100644 --- a/sys/vfs/hammer2/hammer2_flush.c +++ b/sys/vfs/hammer2/hammer2_flush.c @@ -148,7 +148,7 @@ hammer2_trans_init(hammer2_trans_t *trans, hammer2_pfsmount_t *pmp, if (pmp) { trans->pmp = pmp; KKASSERT(hmp == NULL); - hmp = pmp->cluster.chains[0]->hmp; /* XXX */ + hmp = pmp->cluster.focus->hmp; /* XXX */ } else { trans->hmp_single = hmp; KKASSERT(hmp); @@ -265,7 +265,7 @@ hammer2_trans_done(hammer2_trans_t *trans) hammer2_trans_t *scan; if (trans->pmp) - hmp = trans->pmp->cluster.chains[0]->hmp; + hmp = trans->pmp->cluster.focus->hmp; else hmp = trans->hmp_single; @@ -624,7 +624,7 @@ hammer2_flush_core(hammer2_flush_info_t *info, hammer2_chain_t **chainp, if (chain->flags & HAMMER2_CHAIN_MODIFIED) { atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); - hammer2_chain_memory_wakeup(chain->pmp); + hammer2_pfs_memory_wakeup(chain->pmp); hammer2_chain_drop(chain); } #if 0 @@ -819,7 +819,7 @@ hammer2_flush_core(hammer2_flush_info_t *info, hammer2_chain_t **chainp, */ KKASSERT(chain->flags & HAMMER2_CHAIN_FLUSH_CREATE); atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); - hammer2_chain_memory_wakeup(chain->pmp); + hammer2_pfs_memory_wakeup(chain->pmp); if ((chain->flags & HAMMER2_CHAIN_FLUSH_CREATE) || chain == &hmp->vchain || diff --git a/sys/vfs/hammer2/hammer2_freemap.c b/sys/vfs/hammer2/hammer2_freemap.c index 54d46b8758..b7f29a3f8e 100644 --- a/sys/vfs/hammer2/hammer2_freemap.c +++ b/sys/vfs/hammer2/hammer2_freemap.c @@ -367,6 +367,7 @@ hammer2_freemap_try_alloc(hammer2_trans_t *trans, hammer2_chain_t **parentp, uint16_t class; int error = 0; int cache_index = -1; + int ddflag; /* @@ -393,7 +394,7 @@ hammer2_freemap_try_alloc(hammer2_trans_t *trans, hammer2_chain_t **parentp, chain = hammer2_chain_lookup(parentp, &key_dummy, key, key + l1mask, &cache_index, HAMMER2_LOOKUP_ALWAYS | - HAMMER2_LOOKUP_MATCHIND); + HAMMER2_LOOKUP_MATCHIND, &ddflag); if (chain == NULL) { /* @@ -821,6 +822,7 @@ hammer2_freemap_adjust(hammer2_trans_t *trans, hammer2_mount_t *hmp, int modified = 0; int cache_index = -1; int error; + int ddflag; radix = (int)data_off & HAMMER2_OFF_MASK_RADIX; data_off &= ~HAMMER2_OFF_MASK_RADIX; @@ -856,7 +858,7 @@ hammer2_freemap_adjust(hammer2_trans_t *trans, hammer2_mount_t *hmp, chain = hammer2_chain_lookup(&parent, &key_dummy, key, key + l1mask, &cache_index, HAMMER2_LOOKUP_ALWAYS | - HAMMER2_LOOKUP_MATCHIND); + HAMMER2_LOOKUP_MATCHIND, &ddflag); /* * Stop early if we are trying to free something but no leaf exists. diff --git a/sys/vfs/hammer2/hammer2_inode.c b/sys/vfs/hammer2/hammer2_inode.c index f6d4a30b34..c0fc670aee 100644 --- a/sys/vfs/hammer2/hammer2_inode.c +++ b/sys/vfs/hammer2/hammer2_inode.c @@ -44,7 +44,7 @@ #define INODE_DEBUG 0 static void hammer2_inode_move_to_hidden(hammer2_trans_t *trans, - hammer2_chain_t **chainp, + hammer2_cluster_t **clusterp, hammer2_tid_t inum); RB_GENERATE2(hammer2_inode_tree, hammer2_inode, rbnode, hammer2_inode_cmp, @@ -65,66 +65,78 @@ hammer2_inode_cmp(hammer2_inode_t *ip1, hammer2_inode_t *ip2) * * HAMMER2 offers shared locks and exclusive locks on inodes. * - * An inode's ip->chain pointer is resolved and stable while an inode is - * locked, and can be cleaned out at any time (become NULL) when an inode - * is not locked. + * The inode locking function locks the inode itself, resolves any stale + * chains in the inode's cluster, and allocates a fresh copy of the + * cluster with 1 ref and all the underlying chains locked. Duplication + * races are handled by this function. * - * This function handles duplication races and hardlink replacement races - * which can cause ip's cached chain to become stale. - * - * The underlying chain is also locked and returned. + * ip->cluster will be stable while the inode is locked. * * NOTE: We don't combine the inode/chain lock because putting away an * inode would otherwise confuse multiple lock holders of the inode. + * + * NOTE: Hardlinks are followed in the returned cluster but not in the + * inode's internal cluster (ip->cluster). */ -hammer2_chain_t * +hammer2_cluster_t * hammer2_inode_lock_ex(hammer2_inode_t *ip) { + hammer2_inode_data_t *ipdata; + hammer2_cluster_t *cluster; hammer2_chain_t *chain; hammer2_chain_t *ochain; hammer2_chain_core_t *core; int error; + int i; hammer2_inode_ref(ip); ccms_thread_lock(&ip->topo_cst, CCMS_STATE_EXCLUSIVE); - - chain = ip->chain; - core = chain->core; - for (;;) { - if (chain->flags & HAMMER2_CHAIN_DUPLICATED) { - spin_lock(&core->cst.spin); - while (chain->flags & HAMMER2_CHAIN_DUPLICATED) - chain = TAILQ_NEXT(chain, core_entry); - hammer2_chain_ref(chain); - spin_unlock(&core->cst.spin); - hammer2_inode_repoint(ip, NULL, chain); - hammer2_chain_drop(chain); + cluster = hammer2_cluster_copy(&ip->cluster, 0); + + for (i = 0; i < cluster->nchains; ++i) { + chain = ip->cluster.array[i]; + core = chain->core; + for (;;) { + if (chain->flags & HAMMER2_CHAIN_DUPLICATED) { + spin_lock(&core->cst.spin); + while (chain->flags & HAMMER2_CHAIN_DUPLICATED) + chain = TAILQ_NEXT(chain, core_entry); + hammer2_chain_ref(chain); + spin_unlock(&core->cst.spin); + ochain = ip->cluster.array[i]; + ip->cluster.array[i] = chain; + hammer2_chain_drop(ochain); + } + hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS); + if ((chain->flags & HAMMER2_CHAIN_DUPLICATED) == 0) + break; + hammer2_chain_unlock(chain); } - hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS); - if ((chain->flags & HAMMER2_CHAIN_DUPLICATED) == 0) - break; - hammer2_chain_unlock(chain); + cluster->array[i] = chain; } - if (chain->data->ipdata.type == HAMMER2_OBJTYPE_HARDLINK && - (chain->flags & HAMMER2_CHAIN_DELETED) == 0) { - error = hammer2_hardlink_find(ip->pip, &chain, &ochain); - hammer2_chain_drop(ochain); - KKASSERT((chain->flags & HAMMER2_CHAIN_DUPLICATED) == 0); + cluster->focus = cluster->array[0]; + + /* + * Returned cluster must resolve hardlink pointers + */ + ipdata = &hammer2_cluster_data(cluster)->ipdata; + if (ipdata->type == HAMMER2_OBJTYPE_HARDLINK && + (cluster->focus->flags & HAMMER2_CHAIN_DELETED) == 0) { + error = hammer2_hardlink_find(ip->pip, cluster); + KKASSERT((cluster->focus->flags & + HAMMER2_CHAIN_DUPLICATED) == 0); KKASSERT(error == 0); - /* XXX error handling */ } - return (chain); + cluster->focus = cluster->array[0]; + + return (cluster); } void -hammer2_inode_unlock_ex(hammer2_inode_t *ip, hammer2_chain_t *chain) +hammer2_inode_unlock_ex(hammer2_inode_t *ip, hammer2_cluster_t *cluster) { - /* - * XXX this will catch parent directories too which we don't - * really want. - */ - if (chain) - hammer2_chain_unlock(chain); + if (cluster) + hammer2_cluster_unlock(cluster); ccms_thread_unlock(&ip->topo_cst); hammer2_inode_drop(ip); } @@ -138,41 +150,71 @@ hammer2_inode_unlock_ex(hammer2_inode_t *ip, hammer2_chain_t *chain) * need to upgrade them. Only one count of a shared lock can be * upgraded. */ -hammer2_chain_t * +hammer2_cluster_t * hammer2_inode_lock_sh(hammer2_inode_t *ip) { + hammer2_inode_data_t *ipdata; + hammer2_cluster_t *cluster; + hammer2_chain_core_t *core; hammer2_chain_t *chain; + int error = 0; + int i; hammer2_inode_ref(ip); - for (;;) { - ccms_thread_lock(&ip->topo_cst, CCMS_STATE_SHARED); + cluster = hammer2_cluster_copy(&ip->cluster, 0); + ccms_thread_lock(&ip->topo_cst, CCMS_STATE_SHARED); - chain = ip->chain; - KKASSERT(chain != NULL); /* for now */ + for (i = 0; i < cluster->nchains; ++i) { + chain = ip->cluster.array[i]; + core = chain->core; + + if (chain->flags & HAMMER2_CHAIN_DUPLICATED) + goto cycle_excl; hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS | HAMMER2_RESOLVE_SHARED); + if (chain->flags & HAMMER2_CHAIN_DUPLICATED) { + hammer2_chain_unlock(chain); - /* - * Resolve duplication races, resolve hardlinks by giving - * up and cycling an exclusive lock. - */ - if ((chain->flags & HAMMER2_CHAIN_DUPLICATED) == 0 && - chain->data->ipdata.type != HAMMER2_OBJTYPE_HARDLINK) { - break; + /* + * Cycle exclusive inode lock and start the loop + * over again. + */ +cycle_excl: + while (--i >= 0) { + chain = cluster->array[i]; + cluster->array[i] = NULL; + hammer2_chain_unlock(chain); + } + ccms_thread_unlock(&ip->topo_cst); + hammer2_inode_unlock_ex(ip, hammer2_inode_lock_ex(ip)); + ccms_thread_lock(&ip->topo_cst, CCMS_STATE_SHARED); + continue; /* restart at i=-1 -> i=0 on loop */ } - hammer2_chain_unlock(chain); - ccms_thread_unlock(&ip->topo_cst); - chain = hammer2_inode_lock_ex(ip); - hammer2_inode_unlock_ex(ip, chain); + cluster->array[i] = chain; } - return (chain); + cluster->focus = cluster->array[0]; + + /* + * Returned cluster must resolve hardlink pointers + */ + ipdata = &hammer2_cluster_data(cluster)->ipdata; + if (ipdata->type == HAMMER2_OBJTYPE_HARDLINK && + (cluster->focus->flags & HAMMER2_CHAIN_DELETED) == 0) { + error = hammer2_hardlink_find(ip->pip, cluster); + KKASSERT((cluster->focus->flags & + HAMMER2_CHAIN_DUPLICATED) == 0); + KKASSERT(error == 0); + } + cluster->focus = cluster->array[0]; + + return (cluster); } void -hammer2_inode_unlock_sh(hammer2_inode_t *ip, hammer2_chain_t *chain) +hammer2_inode_unlock_sh(hammer2_inode_t *ip, hammer2_cluster_t *cluster) { - if (chain) - hammer2_chain_unlock(chain); + if (cluster) + hammer2_cluster_unlock(cluster); ccms_thread_unlock(&ip->topo_cst); hammer2_inode_drop(ip); } @@ -328,7 +370,8 @@ hammer2_igetv(hammer2_inode_t *ip, int *errorp) pmp = ip->pmp; KKASSERT(pmp != NULL); *errorp = 0; - ipdata = &ip->chain->data->ipdata; + + ipdata = &hammer2_cluster_data(&ip->cluster)->ipdata; for (;;) { /* @@ -451,27 +494,29 @@ hammer2_igetv(hammer2_inode_t *ip, int *errorp) } /* - * The passed-in chain must be locked and the returned inode will also be - * locked. This routine typically locates or allocates the inode, assigns - * ip->chain (adding a ref to chain if necessary), and returns the inode. + * Returns the inode associated with the passed-in cluster, creating the + * inode if necessary and synchronizing it to the passed-in cluster otherwise. + * + * The passed-in chain must be locked and will remain locked on return. + * The returned inode will be locked and the caller may dispose of both + * via hammer2_inode_unlock_ex(). However, if the caller needs to resolve + * a hardlink it must ref/unlock/relock/drop the inode. * * The hammer2_inode structure regulates the interface between the high level * kernel VNOPS API and the filesystem backend (the chains). * - * WARNING! This routine sucks up the chain's lock (makes it part of the - * inode lock from the point of view of the inode lock API), - * so callers need to be careful. - * * WARNING! The mount code is allowed to pass dip == NULL for iroot and * is allowed to pass pmp == NULL and dip == NULL for sroot. */ hammer2_inode_t * hammer2_inode_get(hammer2_pfsmount_t *pmp, hammer2_inode_t *dip, - hammer2_chain_t *chain) + hammer2_cluster_t *cluster) { hammer2_inode_t *nip; + hammer2_inode_data_t *iptmp; + hammer2_inode_data_t *nipdata; - KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_INODE); + KKASSERT(hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE); /* * Interlocked lookup/ref of the inode. This code is only needed @@ -480,22 +525,18 @@ hammer2_inode_get(hammer2_pfsmount_t *pmp, hammer2_inode_t *dip, */ again: for (;;) { - nip = hammer2_inode_lookup(pmp, chain->data->ipdata.inum); + iptmp = &hammer2_cluster_data(cluster)->ipdata; + nip = hammer2_inode_lookup(pmp, iptmp->inum); if (nip == NULL) break; + ccms_thread_lock(&nip->topo_cst, CCMS_STATE_EXCLUSIVE); if ((nip->flags & HAMMER2_INODE_ONRBTREE) == 0) { /* race */ ccms_thread_unlock(&nip->topo_cst); hammer2_inode_drop(nip); continue; } - if (nip->chain != chain) - hammer2_inode_repoint(nip, NULL, chain); - - /* - * Consolidated nip/nip->chain is locked (chain locked - * by caller). - */ + hammer2_inode_repoint(nip, NULL, cluster); return nip; } @@ -505,16 +546,24 @@ again: if (pmp) { nip = kmalloc(sizeof(*nip), pmp->minode, M_WAITOK | M_ZERO); atomic_add_long(&pmp->inmem_inodes, 1); - hammer2_chain_memory_inc(pmp); - hammer2_chain_memory_wakeup(pmp); + hammer2_pfs_memory_inc(pmp); + hammer2_pfs_memory_wakeup(pmp); } else { nip = kmalloc(sizeof(*nip), M_HAMMER2, M_WAITOK | M_ZERO); nip->flags = HAMMER2_INODE_SROOT; } - nip->inum = chain->data->ipdata.inum; - nip->size = chain->data->ipdata.size; - nip->mtime = chain->data->ipdata.mtime; - hammer2_inode_repoint(nip, NULL, chain); + + /* + * Initialize nip's cluster + */ + nip->cluster.refs = 1; + nip->flags = HAMMER2_CLUSTER_INODE; + + nipdata = &hammer2_cluster_data(cluster)->ipdata; + nip->inum = nipdata->inum; + nip->size = nipdata->size; + nip->mtime = nipdata->mtime; + hammer2_inode_repoint(nip, NULL, cluster); nip->pip = dip; /* can be NULL */ if (dip) hammer2_inode_ref(dip); /* ref dip for nip->pip */ @@ -526,7 +575,7 @@ again: * hammer2_inode_lock_ex() call. */ nip->refs = 1; - ccms_cst_init(&nip->topo_cst, &nip->chain); + ccms_cst_init(&nip->topo_cst, &nip->cluster); ccms_thread_lock(&nip->topo_cst, CCMS_STATE_EXCLUSIVE); /* combination of thread lock and chain lock == inode lock */ @@ -553,7 +602,7 @@ again: * Create a new inode in the specified directory using the vattr to * figure out the type of inode. * - * If no error occurs the new inode with its chain locked is returned in + * If no error occurs the new inode with its cluster locked is returned in * *nipp, otherwise an error is returned and *nipp is set to NULL. * * If vap and/or cred are NULL the related fields are not set and the @@ -566,12 +615,12 @@ hammer2_inode_t * hammer2_inode_create(hammer2_trans_t *trans, hammer2_inode_t *dip, struct vattr *vap, struct ucred *cred, const uint8_t *name, size_t name_len, - hammer2_chain_t **chainp, int *errorp) + hammer2_cluster_t **clusterp, int *errorp) { hammer2_inode_data_t *dipdata; hammer2_inode_data_t *nipdata; - hammer2_chain_t *chain; - hammer2_chain_t *parent; + hammer2_cluster_t *cluster; + hammer2_cluster_t *cparent; hammer2_inode_t *nip; hammer2_key_t key_dummy; hammer2_key_t lhc; @@ -581,7 +630,7 @@ hammer2_inode_create(hammer2_trans_t *trans, hammer2_inode_t *dip, uuid_t dip_gid; uint32_t dip_mode; uint8_t dip_algo; - int cache_index = -1; + int ddflag; lhc = hammer2_dirhash(name, name_len); *errorp = 0; @@ -594,8 +643,8 @@ hammer2_inode_create(hammer2_trans_t *trans, hammer2_inode_t *dip, * NOTE: hidden inodes do not have iterators. */ retry: - parent = hammer2_inode_lock_ex(dip); - dipdata = &dip->chain->data->ipdata; + cparent = hammer2_inode_lock_ex(dip); + dipdata = &hammer2_cluster_data(cparent)->ipdata; dip_uid = dipdata->uid; dip_gid = dipdata->gid; dip_mode = dipdata->mode; @@ -603,44 +652,46 @@ retry: error = 0; while (error == 0) { - chain = hammer2_chain_lookup(&parent, &key_dummy, - lhc, lhc, &cache_index, 0); - if (chain == NULL) + cluster = hammer2_cluster_lookup(cparent, &key_dummy, + lhc, lhc, 0, &ddflag); + if (cluster == NULL) break; if ((lhc & HAMMER2_DIRHASH_VISIBLE) == 0) error = ENOSPC; if ((lhc & HAMMER2_DIRHASH_LOMASK) == HAMMER2_DIRHASH_LOMASK) error = ENOSPC; - hammer2_chain_unlock(chain); - chain = NULL; + hammer2_cluster_unlock(cluster); + cluster = NULL; ++lhc; } if (error == 0) { - error = hammer2_chain_create(trans, &parent, &chain, + error = hammer2_cluster_create(trans, cparent, &cluster, lhc, 0, HAMMER2_BREF_TYPE_INODE, HAMMER2_INODE_BYTES); } #if INODE_DEBUG kprintf("CREATE INODE %*.*s chain=%p\n", - (int)name_len, (int)name_len, name, chain); + (int)name_len, (int)name_len, name, + (cluster ? cluster->focus : NULL)); #endif /* * Cleanup and handle retries. */ if (error == EAGAIN) { - hammer2_chain_ref(parent); - hammer2_inode_unlock_ex(dip, parent); - hammer2_chain_wait(parent); - hammer2_chain_drop(parent); + hammer2_cluster_ref(cparent); + hammer2_inode_unlock_ex(dip, cparent); + hammer2_cluster_wait(cparent); + hammer2_cluster_drop(cparent); goto retry; } - hammer2_inode_unlock_ex(dip, parent); + hammer2_inode_unlock_ex(dip, cparent); + cparent = NULL; if (error) { - KKASSERT(chain == NULL); + KKASSERT(cluster == NULL); *errorp = error; return (NULL); } @@ -656,9 +707,10 @@ retry: * * NOTE: nipdata will have chain's blockset data. */ - chain->data->ipdata.inum = trans->inode_tid; - nip = hammer2_inode_get(dip->pmp, dip, chain); - nipdata = &chain->data->ipdata; + nipdata = &hammer2_cluster_data(cluster)->ipdata; + nipdata->inum = trans->inode_tid; + nip = hammer2_inode_get(dip->pmp, dip, cluster); + nipdata = &hammer2_cluster_data(cluster)->ipdata; if (vap) { KKASSERT(trans->inodes_created == 0); @@ -730,37 +782,11 @@ retry: bcopy(name, nipdata->filename, name_len); nipdata->name_key = lhc; nipdata->name_len = name_len; - *chainp = chain; + *clusterp = cluster; return (nip); } -/* - * chain may have been moved around by the create. - */ -void -hammer2_chain_refactor(hammer2_chain_t **chainp) -{ - hammer2_chain_t *chain = *chainp; - hammer2_chain_core_t *core; - - core = chain->core; - while (chain->flags & HAMMER2_CHAIN_DUPLICATED) { - spin_lock(&core->cst.spin); - chain = TAILQ_NEXT(chain, core_entry); - while (chain->flags & HAMMER2_CHAIN_DUPLICATED) - chain = TAILQ_NEXT(chain, core_entry); - hammer2_chain_ref(chain); - spin_unlock(&core->cst.spin); - KKASSERT(chain->core == core); - - hammer2_chain_unlock(*chainp); - hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS | - HAMMER2_RESOLVE_NOREF); /* eat ref */ - *chainp = chain; - } -} - /* * Shift *chainp up to the specified directory, change the filename * to "0xINODENUMBER", and adjust the key. The chain becomes the @@ -770,20 +796,20 @@ hammer2_chain_refactor(hammer2_chain_t **chainp) */ static void -hammer2_hardlink_shiftup(hammer2_trans_t *trans, hammer2_chain_t **chainp, - hammer2_inode_t *dip, hammer2_chain_t **dchainp, +hammer2_hardlink_shiftup(hammer2_trans_t *trans, hammer2_cluster_t *cluster, + hammer2_inode_t *dip, hammer2_cluster_t *dcluster, int nlinks, int *errorp) { + hammer2_inode_data_t *iptmp; hammer2_inode_data_t *nipdata; - hammer2_chain_t *chain; - hammer2_chain_t *xchain; + hammer2_cluster_t *xcluster; hammer2_key_t key_dummy; hammer2_key_t lhc; hammer2_blockref_t bref; - int cache_index = -1; + int ddflag; - chain = *chainp; - lhc = chain->data->ipdata.inum; + iptmp = &hammer2_cluster_data(cluster)->ipdata; + lhc = iptmp->inum; KKASSERT((lhc & HAMMER2_DIRHASH_VISIBLE) == 0); /* @@ -794,17 +820,18 @@ hammer2_hardlink_shiftup(hammer2_trans_t *trans, hammer2_chain_t **chainp, * There should be no key collisions with invisible inode keys. * * WARNING! Must use inode_lock_ex() on dip to handle a stale - * dip->chain cache. + * dip->cluster cache. */ retry: *errorp = 0; - xchain = hammer2_chain_lookup(dchainp, &key_dummy, - lhc, lhc, &cache_index, 0); - if (xchain) { + xcluster = hammer2_cluster_lookup(dcluster, &key_dummy, + lhc, lhc, 0, &ddflag); + if (xcluster) { kprintf("X3 chain %p dip %p dchain %p dip->chain %p\n", - xchain, dip, *dchainp, dip->chain); - hammer2_chain_unlock(xchain); - xchain = NULL; + xcluster->focus, dip, dcluster->focus, + dip->cluster.focus); + hammer2_cluster_unlock(xcluster); + xcluster = NULL; *errorp = ENOSPC; #if 0 Debugger("X3"); @@ -815,18 +842,18 @@ retry: * Create entry in common parent directory using the seek position * calculated above. * - * We must refactor chain because it might have been shifted into - * an indirect chain by the create. + * We must refactor cluster because it might have been shifted into + * an indirect cluster by the create. */ if (*errorp == 0) { - KKASSERT(xchain == NULL); + KKASSERT(xcluster == NULL); #if 0 - *errorp = hammer2_chain_create(trans, dchainp, &xchain, + *errorp = hammer2_cluster_create(trans, dcluster, &xcluster, lhc, 0, HAMMER2_BREF_TYPE_INODE,/* n/a */ HAMMER2_INODE_BYTES); /* n/a */ #endif - /*XXX this somehow isn't working on chain XXX*/ + /*XXX this somehow isn't working on cluster XXX*/ /*KKASSERT(xxx)*/ } @@ -835,8 +862,8 @@ retry: */ if (*errorp == EAGAIN) { kprintf("R"); - hammer2_chain_wait(*dchainp); - hammer2_chain_drop(*dchainp); + hammer2_cluster_wait(dcluster); + hammer2_cluster_drop(dcluster); goto retry; } @@ -845,40 +872,38 @@ retry: */ if (*errorp) { panic("error2"); - KKASSERT(xchain == NULL); + KKASSERT(xcluster == NULL); return; } /* - * Use xchain as a placeholder for (lhc). Duplicate chain to the - * same target bref as xchain and then delete xchain. The duplication - * occurs after xchain in flush order even though xchain is deleted - * after the duplication. XXX + * Use xcluster as a placeholder for (lhc). Duplicate cluster to the + * same target bref as xcluster and then delete xcluster. The + * duplication occurs after xcluster in flush order even though + * xcluster is deleted after the duplication. XXX * * WARNING! Duplications (to a different parent) can cause indirect - * blocks to be inserted, refactor xchain. + * blocks to be inserted, refactor xcluster. */ - bref = chain->bref; + hammer2_cluster_bref(cluster, &bref); bref.key = lhc; /* invisible dir entry key */ bref.keybits = 0; - hammer2_chain_duplicate(trans, dchainp, &chain, &bref, 0, 2); + hammer2_cluster_duplicate(trans, dcluster, cluster, &bref, 0, 2); /* - * chain is now 'live' again.. adjust the filename. + * cluster is now 'live' again.. adjust the filename. * * Directory entries are inodes but this is a hidden hardlink * target. The name isn't used but to ease debugging give it * a name after its inode number. */ - hammer2_chain_modify(trans, &chain, 0); - nipdata = &chain->data->ipdata; + hammer2_cluster_modify(trans, cluster, 0); + nipdata = &hammer2_cluster_data(cluster)->ipdata; ksnprintf(nipdata->filename, sizeof(nipdata->filename), "0x%016jx", (intmax_t)nipdata->inum); nipdata->name_len = strlen(nipdata->filename); nipdata->name_key = lhc; nipdata->nlinks += nlinks; - - *chainp = chain; } /* @@ -896,27 +921,28 @@ retry: */ int hammer2_inode_connect(hammer2_trans_t *trans, - hammer2_chain_t **chainp, int hlink, - hammer2_inode_t *dip, hammer2_chain_t **dchainp, + hammer2_cluster_t **clusterp, int hlink, + hammer2_inode_t *dip, hammer2_cluster_t *dcluster, const uint8_t *name, size_t name_len, hammer2_key_t lhc) { hammer2_inode_data_t *ipdata; - hammer2_chain_t *nchain; - hammer2_chain_t *ochain; + hammer2_cluster_t *ocluster; + hammer2_cluster_t *ncluster; hammer2_key_t key_dummy; - int cache_index = -1; + int ddflag; int error; /* - * Since ochain is either disconnected from the topology or represents - * a hardlink terminus which is always a parent of or equal to dip, - * we should be able to safely lock dip->chain for our setup. + * Since ocluster is either disconnected from the topology or + * represents a hardlink terminus which is always a parent of or + * equal to dip, we should be able to safely lock dip->chain for + * our setup. * * WARNING! Must use inode_lock_ex() on dip to handle a stale - * dip->chain cache. + * dip->cluster. */ - ochain = *chainp; + ocluster = *clusterp; /* * If name is non-NULL we calculate lhc, else we use the passed-in @@ -932,17 +958,17 @@ hammer2_inode_connect(hammer2_trans_t *trans, */ error = 0; while (error == 0) { - nchain = hammer2_chain_lookup(dchainp, &key_dummy, + ncluster = hammer2_cluster_lookup(dcluster, &key_dummy, lhc, lhc, - &cache_index, 0); - if (nchain == NULL) + 0, &ddflag); + if (ncluster == NULL) break; if ((lhc & HAMMER2_DIRHASH_LOMASK) == HAMMER2_DIRHASH_LOMASK) { error = ENOSPC; } - hammer2_chain_unlock(nchain); - nchain = NULL; + hammer2_cluster_unlock(ncluster); + ncluster = NULL; ++lhc; } } else { @@ -950,9 +976,10 @@ hammer2_inode_connect(hammer2_trans_t *trans, * Reconnect to specific key (used when moving * unlinked-but-open files into the hidden directory). */ - nchain = hammer2_chain_lookup(dchainp, &key_dummy, - lhc, lhc, &cache_index, 0); - KKASSERT(nchain == NULL); + ncluster = hammer2_cluster_lookup(dcluster, &key_dummy, + lhc, lhc, + 0, &ddflag); + KKASSERT(ncluster == NULL); } if (error == 0) { @@ -961,40 +988,42 @@ hammer2_inode_connect(hammer2_trans_t *trans, * Hardlink pointer needed, create totally fresh * directory entry. * - * We must refactor ochain because it might have - * been shifted into an indirect chain by the + * We must refactor ocluster because it might have + * been shifted into an indirect cluster by the * create. */ - KKASSERT(nchain == NULL); - error = hammer2_chain_create(trans, dchainp, &nchain, - lhc, 0, - HAMMER2_BREF_TYPE_INODE, - HAMMER2_INODE_BYTES); - hammer2_chain_refactor(&ochain); + KKASSERT(ncluster == NULL); + error = hammer2_cluster_create(trans, + dcluster, &ncluster, + lhc, 0, + HAMMER2_BREF_TYPE_INODE, + HAMMER2_INODE_BYTES); + hammer2_cluster_refactor(ocluster); } else { /* - * Reconnect the original chain and rename. Use - * chain_duplicate(). The caller will likely delete + * Reconnect the original cluster and rename. Use + * cluster_duplicate(). The caller will likely delete * or has already deleted the original chain in * this case. * - * NOTE: chain_duplicate() generates a new chain - * with CHAIN_DELETED cleared (ochain typically + * NOTE: cluster_duplicate() generates a new cluster + * with CHAIN_DELETED cleared (ocluster typically * has it set from the file unlink). * - * WARNING! Can cause held-over chains to require a + * WARNING! Can cause held-over clusters to require a * refactor. Fortunately we have none (our - * locked chains are passed into and + * locked clusters are passed into and * modified by the call). */ - nchain = ochain; - ochain = NULL; - hammer2_chain_duplicate(trans, NULL, &nchain, NULL, - 0, 3); - error = hammer2_chain_create(trans, dchainp, &nchain, - lhc, 0, - HAMMER2_BREF_TYPE_INODE, - HAMMER2_INODE_BYTES); + ncluster = ocluster; + ocluster = NULL; + hammer2_cluster_duplicate(trans, NULL, ncluster, NULL, + 0, 3); + error = hammer2_cluster_create(trans, + dcluster, &ncluster, + lhc, 0, + HAMMER2_BREF_TYPE_INODE, + HAMMER2_INODE_BYTES); } } @@ -1004,10 +1033,11 @@ hammer2_inode_connect(hammer2_trans_t *trans, KKASSERT(error != EAGAIN); /* - * nchain should be NULL on error, leave ochain (== *chainp) alone. + * ncluster should be NULL on error, leave ocluster + * (ocluster == *clusterp) alone. */ if (error) { - KKASSERT(nchain == NULL); + KKASSERT(ncluster == NULL); return (error); } @@ -1016,58 +1046,39 @@ hammer2_inode_connect(hammer2_trans_t *trans, * to update the inode. * * When creating an OBJTYPE_HARDLINK entry remember to unlock the - * chain, the caller will access the hardlink via the actual hardlink + * cluster, the caller will access the hardlink via the actual hardlink * target file and not the hardlink pointer entry, so we must still - * return ochain. + * return ocluster. */ if (hlink && hammer2_hardlink_enable >= 0) { /* * Create the HARDLINK pointer. oip represents the hardlink * target in this situation. * - * We will return ochain (the hardlink target). + * We will return ocluster (the hardlink target). */ - hammer2_chain_modify(trans, &nchain, 0); + hammer2_cluster_modify(trans, ncluster, 0); KKASSERT(name_len < HAMMER2_INODE_MAXNAME); - ipdata = &nchain->data->ipdata; + ipdata = &hammer2_cluster_data(ncluster)->ipdata; bcopy(name, ipdata->filename, name_len); ipdata->name_key = lhc; ipdata->name_len = name_len; - ipdata->target_type = ochain->data->ipdata.type; + ipdata->target_type = + hammer2_cluster_data(ocluster)->ipdata.type; ipdata->type = HAMMER2_OBJTYPE_HARDLINK; - ipdata->inum = ochain->data->ipdata.inum; + ipdata->inum = hammer2_cluster_data(ocluster)->ipdata.inum; ipdata->nlinks = 1; - hammer2_chain_unlock(nchain); - nchain = ochain; - ochain = NULL; - } else if (hlink && hammer2_hardlink_enable < 0) { - /* - * Create a snapshot (hardlink fake mode for debugging). - * (ochain already flushed above so we can just copy the - * bref XXX). - * - * Since this is a snapshot we return nchain in the fake - * hardlink case. - */ - hammer2_chain_modify(trans, &nchain, 0); - KKASSERT(name_len < HAMMER2_INODE_MAXNAME); - ipdata = &nchain->data->ipdata; - *ipdata = ochain->data->ipdata; - bcopy(name, ipdata->filename, name_len); - ipdata->name_key = lhc; - ipdata->name_len = name_len; - atomic_clear_int(&nchain->core->flags, - HAMMER2_CORE_COUNTEDBREFS); - kprintf("created fake hardlink %*.*s\n", - (int)name_len, (int)name_len, name); + hammer2_cluster_unlock(ncluster); + ncluster = ocluster; + ocluster = NULL; } else { /* - * nchain is a duplicate of ochain at the new location. + * ncluster is a duplicate of ocluster at the new location. * We must fixup the name stored in oip. The bref key * has already been set up. */ - hammer2_chain_modify(trans, &nchain, 0); - ipdata = &nchain->data->ipdata; + hammer2_cluster_modify(trans, ncluster, 0); + ipdata = &hammer2_cluster_data(ncluster)->ipdata; KKASSERT(name_len < HAMMER2_INODE_MAXNAME); bcopy(name, ipdata->filename, name_len); @@ -1077,13 +1088,13 @@ hammer2_inode_connect(hammer2_trans_t *trans, } /* - * We are replacing ochain with nchain, unlock ochain. In the - * case where ochain is left unchanged the code above sets - * nchain to ochain and ochain to NULL, resulting in a NOP here. + * We are replacing ocluster with ncluster, unlock ocluster. In the + * case where ocluster is left unchanged the code above sets + * ncluster to ocluster and ocluster to NULL, resulting in a NOP here. */ - if (ochain) - hammer2_chain_unlock(ochain); - *chainp = nchain; + if (ocluster) + hammer2_cluster_unlock(ocluster); + *clusterp = ncluster; return (0); } @@ -1097,20 +1108,36 @@ hammer2_inode_connect(hammer2_trans_t *trans, */ void hammer2_inode_repoint(hammer2_inode_t *ip, hammer2_inode_t *pip, - hammer2_chain_t *nchain) + hammer2_cluster_t *cluster) { hammer2_chain_t *ochain; + hammer2_chain_t *nchain; hammer2_inode_t *opip; + int i; - /* - * Repoint ip->chain if requested. - */ - ochain = ip->chain; - ip->chain = nchain; - if (nchain) - hammer2_chain_ref(nchain); - if (ochain) - hammer2_chain_drop(ochain); + for (i = 0; i < cluster->nchains; ++i) { + /* + * Get possible replacement chain, loop if nothing to do. + */ + nchain = cluster->array[i]; + if (i < ip->cluster.nchains) { + ochain = ip->cluster.array[i]; + if (ochain == nchain) + continue; + } else { + ochain = NULL; + } + + /* + * Make adjustment + */ + ip->cluster.array[i] = nchain; + if (nchain) + hammer2_chain_ref(nchain); + if (ochain) + hammer2_chain_drop(ochain); + } + ip->cluster.focus = ip->cluster.array[0]; /* * Repoint ip->pip if requested (non-NULL pip). @@ -1151,20 +1178,20 @@ hammer2_unlink_file(hammer2_trans_t *trans, hammer2_inode_t *dip, int isdir, int *hlinkp, struct nchandle *nch) { hammer2_inode_data_t *ipdata; - hammer2_chain_t *parent; - hammer2_chain_t *ochain; - hammer2_chain_t *chain; - hammer2_chain_t *dparent; - hammer2_chain_t *dchain; + hammer2_cluster_t *cparent; + hammer2_cluster_t *ocluster; + hammer2_cluster_t *cluster; + hammer2_cluster_t *dparent; + hammer2_cluster_t *dcluster; hammer2_key_t key_dummy; hammer2_key_t key_next; hammer2_key_t lhc; int error; - int cache_index = -1; + int ddflag; uint8_t type; error = 0; - ochain = NULL; + ocluster = NULL; lhc = hammer2_dirhash(name, name_len); /* @@ -1172,20 +1199,22 @@ hammer2_unlink_file(hammer2_trans_t *trans, hammer2_inode_t *dip, */ if (hlinkp) *hlinkp = 0; - parent = hammer2_inode_lock_ex(dip); - chain = hammer2_chain_lookup(&parent, &key_next, + cparent = hammer2_inode_lock_ex(dip); + cluster = hammer2_cluster_lookup(cparent, &key_next, lhc, lhc + HAMMER2_DIRHASH_LOMASK, - &cache_index, 0); - while (chain) { - if (chain->bref.type == HAMMER2_BREF_TYPE_INODE && - name_len == chain->data->ipdata.name_len && - bcmp(name, chain->data->ipdata.filename, name_len) == 0) { - break; + 0, &ddflag); + while (cluster) { + if (hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE) { + ipdata = &hammer2_cluster_data(cluster)->ipdata; + if (ipdata->name_len == name_len && + bcmp(ipdata->filename, name, name_len) == 0) { + break; + } } - chain = hammer2_chain_next(&parent, chain, &key_next, - key_next, - lhc + HAMMER2_DIRHASH_LOMASK, - &cache_index, 0); + cluster = hammer2_cluster_next(cparent, cluster, &key_next, + key_next, + lhc + HAMMER2_DIRHASH_LOMASK, + 0); } hammer2_inode_unlock_ex(dip, NULL); /* retain parent */ @@ -1193,14 +1222,16 @@ hammer2_unlink_file(hammer2_trans_t *trans, hammer2_inode_t *dip, * Not found or wrong type (isdir < 0 disables the type check). * If a hardlink pointer, type checks use the hardlink target. */ - if (chain == NULL) { + if (cluster == NULL) { error = ENOENT; goto done; } - if ((type = chain->data->ipdata.type) == HAMMER2_OBJTYPE_HARDLINK) { + ipdata = &hammer2_cluster_data(cluster)->ipdata; + type = ipdata->type; + if (type == HAMMER2_OBJTYPE_HARDLINK) { if (hlinkp) *hlinkp = 1; - type = chain->data->ipdata.target_type; + type = ipdata->target_type; } if (type == HAMMER2_OBJTYPE_DIRECTORY && isdir == 0) { @@ -1216,14 +1247,21 @@ hammer2_unlink_file(hammer2_trans_t *trans, hammer2_inode_t *dip, * Hardlink must be resolved. We can't hold the parent locked * while we do this or we could deadlock. * - * On success chain will be adjusted to point at the hardlink target - * and ochain will point to the hardlink pointer in the original - * directory. Otherwise chain remains pointing to the original. + * On success cluster will be adjusted to point at the hardlink target + * and ocluster will point to the hardlink pointer in the original + * directory. Otherwise cluster remains pointing to the original. + * + * Lock ownership is transfered to cluster. ocluster is merely + * referenced. */ - if (chain->data->ipdata.type == HAMMER2_OBJTYPE_HARDLINK) { - hammer2_chain_unlock(parent); - parent = NULL; - error = hammer2_hardlink_find(dip, &chain, &ochain); + if (ipdata->type == HAMMER2_OBJTYPE_HARDLINK) { + hammer2_cluster_unlock(cparent); + cparent = NULL; + + ocluster = cluster; + cluster = hammer2_cluster_copy(ocluster, 1); + error = hammer2_hardlink_find(dip, cluster); + KKASSERT(error == 0); } /* @@ -1238,46 +1276,46 @@ hammer2_unlink_file(hammer2_trans_t *trans, hammer2_inode_t *dip, * entries. */ if (type == HAMMER2_OBJTYPE_DIRECTORY && isdir == 1) { - dparent = hammer2_chain_lookup_init(chain, 0); - dchain = hammer2_chain_lookup(&dparent, &key_dummy, - 0, (hammer2_key_t)-1, - &cache_index, - HAMMER2_LOOKUP_NODATA); - if (dchain) { - hammer2_chain_unlock(dchain); - hammer2_chain_lookup_done(dparent); + dparent = hammer2_cluster_lookup_init(cluster, 0); + dcluster = hammer2_cluster_lookup(dparent, &key_dummy, + 0, (hammer2_key_t)-1, + HAMMER2_LOOKUP_NODATA, + &ddflag); + if (dcluster) { + hammer2_cluster_unlock(dcluster); + hammer2_cluster_lookup_done(dparent); error = ENOTEMPTY; goto done; } - hammer2_chain_lookup_done(dparent); + hammer2_cluster_lookup_done(dparent); dparent = NULL; - /* dchain NULL */ + /* dcluster NULL */ } /* - * Ok, we can now unlink the chain. We always decrement nlinks even + * Ok, we can now unlink the cluster. We always decrement nlinks even * if the entry can be deleted in case someone has the file open and * does an fstat(). * - * The chain itself will no longer be in the on-media topology but + * The cluster itself will no longer be in the on-media topology but * can still be flushed to the media (e.g. if an open descriptor - * remains). When the last vnode/ip ref goes away the chain will + * remains). When the last vnode/ip ref goes away the cluster will * be marked unmodified, avoiding any further (now unnecesary) I/O. * - * A non-NULL ochain indicates a hardlink. + * A non-NULL ocluster indicates a hardlink. */ - if (ochain) { + if (ocluster) { /* * Delete the original hardlink pointer unconditionally. * (any open descriptors will migrate to the hardlink * target and have no affect on this operation). * - * NOTE: parent from above is NULL when ochain != NULL + * NOTE: parent from above is NULL when ocluster != NULL * so we can reuse it. */ - hammer2_chain_lock(ochain, HAMMER2_RESOLVE_ALWAYS); - hammer2_chain_delete(trans, ochain, 0); - hammer2_chain_unlock(ochain); + hammer2_cluster_lock(ocluster, HAMMER2_RESOLVE_ALWAYS); + hammer2_cluster_delete(trans, ocluster, 0); + hammer2_cluster_unlock(ocluster); } /* @@ -1295,14 +1333,15 @@ hammer2_unlink_file(hammer2_trans_t *trans, hammer2_inode_t *dip, * passed as NULL in this situation. hammer2_inode_connect() * will bump nlinks. */ - KKASSERT(chain != NULL); - hammer2_chain_modify(trans, &chain, 0); - ipdata = &chain->data->ipdata; + KKASSERT(cluster != NULL); + hammer2_cluster_modify(trans, cluster, 0); + ipdata = &hammer2_cluster_data(cluster)->ipdata; --ipdata->nlinks; if ((int64_t)ipdata->nlinks < 0) /* XXX debugging */ ipdata->nlinks = 0; if (ipdata->nlinks == 0) { - if ((chain->flags & HAMMER2_CHAIN_PFSROOT) && chain->pmp) { + if ((cluster->focus->flags & HAMMER2_CHAIN_PFSROOT) && + cluster->pmp) { error = EINVAL; kprintf("hammer2: PFS \"%s\" cannot be deleted " "while still mounted\n", @@ -1311,21 +1350,22 @@ hammer2_unlink_file(hammer2_trans_t *trans, hammer2_inode_t *dip, } if (nch && cache_isopen(nch)) { kprintf("WARNING: unlinking open file\n"); - atomic_set_int(&chain->flags, HAMMER2_CHAIN_UNLINKED); - hammer2_inode_move_to_hidden(trans, &chain, + hammer2_cluster_set_chainflags(cluster, + HAMMER2_CHAIN_UNLINKED); + hammer2_inode_move_to_hidden(trans, &cluster, ipdata->inum); } else { - hammer2_chain_delete(trans, chain, 0); + hammer2_cluster_delete(trans, cluster, 0); } } error = 0; done: - if (chain) - hammer2_chain_unlock(chain); - if (parent) - hammer2_chain_lookup_done(parent); - if (ochain) - hammer2_chain_drop(ochain); + if (cluster) + hammer2_cluster_unlock(cluster); + if (cparent) + hammer2_cluster_lookup_done(cparent); + if (ocluster) + hammer2_cluster_drop(ocluster); return error; } @@ -1337,13 +1377,13 @@ void hammer2_inode_install_hidden(hammer2_pfsmount_t *pmp) { hammer2_trans_t trans; - hammer2_chain_t *parent; - hammer2_chain_t *chain; - hammer2_chain_t *scan; + hammer2_cluster_t *cparent; + hammer2_cluster_t *cluster; + hammer2_cluster_t *scan; hammer2_inode_data_t *ipdata; hammer2_key_t key_dummy; hammer2_key_t key_next; - int cache_index; + int ddflag; int error; int count; @@ -1356,13 +1396,13 @@ hammer2_inode_install_hidden(hammer2_pfsmount_t *pmp) bzero(&key_dummy, sizeof(key_dummy)); hammer2_trans_init(&trans, pmp, NULL, 0); - parent = hammer2_inode_lock_ex(pmp->iroot); - chain = hammer2_chain_lookup(&parent, &key_dummy, - HAMMER2_INODE_HIDDENDIR, - HAMMER2_INODE_HIDDENDIR, - &cache_index, 0); - if (chain) { - pmp->ihidden = hammer2_inode_get(pmp, pmp->iroot, chain); + cparent = hammer2_inode_lock_ex(pmp->iroot); + cluster = hammer2_cluster_lookup(cparent, &key_dummy, + HAMMER2_INODE_HIDDENDIR, + HAMMER2_INODE_HIDDENDIR, + 0, &ddflag); + if (cluster) { + pmp->ihidden = hammer2_inode_get(pmp, pmp->iroot, cluster); hammer2_inode_ref(pmp->ihidden); /* @@ -1370,23 +1410,22 @@ hammer2_inode_install_hidden(hammer2_pfsmount_t *pmp) * any system crash. */ count = 0; - scan = hammer2_chain_lookup(&chain, &key_next, - 0, HAMMER2_MAX_TID, - &cache_index, - HAMMER2_LOOKUP_NODATA); + scan = hammer2_cluster_lookup(cluster, &key_next, + 0, HAMMER2_MAX_TID, + HAMMER2_LOOKUP_NODATA, &ddflag); while (scan) { - if (scan->bref.type == HAMMER2_BREF_TYPE_INODE) { - hammer2_chain_delete(&trans, scan, 0); + if (hammer2_cluster_type(scan) == + HAMMER2_BREF_TYPE_INODE) { + hammer2_cluster_delete(&trans, scan, 0); ++count; } - scan = hammer2_chain_next(&chain, scan, &key_next, - 0, HAMMER2_MAX_TID, - &cache_index, - HAMMER2_LOOKUP_NODATA); + scan = hammer2_cluster_next(cluster, scan, &key_next, + 0, HAMMER2_MAX_TID, + HAMMER2_LOOKUP_NODATA); } - hammer2_inode_unlock_ex(pmp->ihidden, chain); - hammer2_inode_unlock_ex(pmp->iroot, parent); + hammer2_inode_unlock_ex(pmp->ihidden, cluster); + hammer2_inode_unlock_ex(pmp->iroot, cparent); hammer2_trans_done(&trans); kprintf("hammer2: PFS loaded hidden dir, " "removed %d dead entries\n", count); @@ -1396,21 +1435,21 @@ hammer2_inode_install_hidden(hammer2_pfsmount_t *pmp) /* * Create the hidden directory */ - error = hammer2_chain_create(&trans, &parent, &chain, - HAMMER2_INODE_HIDDENDIR, 0, - HAMMER2_BREF_TYPE_INODE, - HAMMER2_INODE_BYTES); - hammer2_inode_unlock_ex(pmp->iroot, parent); - hammer2_chain_modify(&trans, &chain, 0); - ipdata = &chain->data->ipdata; + error = hammer2_cluster_create(&trans, cparent, &cluster, + HAMMER2_INODE_HIDDENDIR, 0, + HAMMER2_BREF_TYPE_INODE, + HAMMER2_INODE_BYTES); + hammer2_inode_unlock_ex(pmp->iroot, cparent); + hammer2_cluster_modify(&trans, cluster, 0); + ipdata = &hammer2_cluster_data(cluster)->ipdata; ipdata->type = HAMMER2_OBJTYPE_DIRECTORY; ipdata->inum = HAMMER2_INODE_HIDDENDIR; ipdata->nlinks = 1; kprintf("hammer2: PFS root missing hidden directory, creating\n"); - pmp->ihidden = hammer2_inode_get(pmp, pmp->iroot, chain); + pmp->ihidden = hammer2_inode_get(pmp, pmp->iroot, cluster); hammer2_inode_ref(pmp->ihidden); - hammer2_inode_unlock_ex(pmp->ihidden, chain); + hammer2_inode_unlock_ex(pmp->ihidden, cluster); hammer2_trans_done(&trans); } @@ -1424,61 +1463,59 @@ hammer2_inode_install_hidden(hammer2_pfsmount_t *pmp) */ static void -hammer2_inode_move_to_hidden(hammer2_trans_t *trans, hammer2_chain_t **chainp, - hammer2_tid_t inum) +hammer2_inode_move_to_hidden(hammer2_trans_t *trans, + hammer2_cluster_t **clusterp, hammer2_tid_t inum) { - hammer2_chain_t *chain; - hammer2_chain_t *dchain; + hammer2_cluster_t *dcluster; hammer2_pfsmount_t *pmp; int error; - chain = *chainp; - pmp = chain->pmp; + pmp = (*clusterp)->pmp; KKASSERT(pmp != NULL); KKASSERT(pmp->ihidden != NULL); - hammer2_chain_delete(trans, chain, 0); - dchain = hammer2_inode_lock_ex(pmp->ihidden); - error = hammer2_inode_connect(trans, chainp, 0, - pmp->ihidden, &dchain, + hammer2_cluster_delete(trans, *clusterp, 0); + dcluster = hammer2_inode_lock_ex(pmp->ihidden); + error = hammer2_inode_connect(trans, clusterp, 0, + pmp->ihidden, dcluster, NULL, 0, inum); - hammer2_inode_unlock_ex(pmp->ihidden, dchain); + hammer2_inode_unlock_ex(pmp->ihidden, dcluster); KKASSERT(error == 0); } /* - * Given an exclusively locked inode and chain we consolidate its chain + * Given an exclusively locked inode and cluster we consolidate its cluster * for hardlink creation, adding (nlinks) to the file's link count and * potentially relocating the inode to a directory common to ip->pip and tdip. * - * Replaces (*chainp) if consolidation occurred, unlocking the old chain - * and returning a new locked chain. + * Replaces (*clusterp) if consolidation occurred, unlocking the old cluster + * and returning a new locked cluster. * - * NOTE! This function will also replace ip->chain. + * NOTE! This function will also replace ip->cluster. */ int hammer2_hardlink_consolidate(hammer2_trans_t *trans, - hammer2_inode_t *ip, hammer2_chain_t **chainp, - hammer2_inode_t *cdip, hammer2_chain_t **cdchainp, + hammer2_inode_t *ip, + hammer2_cluster_t **clusterp, + hammer2_inode_t *cdip, + hammer2_cluster_t *cdcluster, int nlinks) { hammer2_inode_data_t *ipdata; - hammer2_chain_t *chain; - hammer2_chain_t *nchain; + hammer2_cluster_t *cluster; + hammer2_cluster_t *ncluster; int error; - chain = *chainp; + cluster = *clusterp; + ipdata = &hammer2_cluster_data(cluster)->ipdata; if (nlinks == 0 && /* no hardlink needed */ - (chain->data->ipdata.name_key & HAMMER2_DIRHASH_VISIBLE)) { - return (0); - } - if (hammer2_hardlink_enable < 0) { /* fake hardlinks */ + (ipdata->name_key & HAMMER2_DIRHASH_VISIBLE)) { return (0); } if (hammer2_hardlink_enable == 0) { /* disallow hardlinks */ - hammer2_chain_unlock(chain); - *chainp = NULL; + hammer2_cluster_unlock(cluster); + *clusterp = NULL; return (ENOTSUP); } @@ -1487,11 +1524,13 @@ hammer2_hardlink_consolidate(hammer2_trans_t *trans, * this is already a hardlink target, all we need to do is adjust * the link count. */ + ipdata = &hammer2_cluster_data(cluster)->ipdata; if (cdip == ip->pip && - (chain->data->ipdata.name_key & HAMMER2_DIRHASH_VISIBLE) == 0) { + (ipdata->name_key & HAMMER2_DIRHASH_VISIBLE) == 0) { if (nlinks) { - hammer2_chain_modify(trans, &chain, 0); - chain->data->ipdata.nlinks += nlinks; + hammer2_cluster_modify(trans, cluster, 0); + ipdata = &hammer2_cluster_data(cluster)->ipdata; + ipdata->nlinks += nlinks; } error = 0; goto done; @@ -1499,27 +1538,29 @@ hammer2_hardlink_consolidate(hammer2_trans_t *trans, /* - * chain is the real inode. If it's visible we have to convert it + * cluster is the real inode. If it's visible we have to convert it * to a hardlink pointer. If it is not visible then it is already * a hardlink target and only needs to be deleted. */ - KKASSERT((chain->flags & HAMMER2_CHAIN_DELETED) == 0); - KKASSERT(chain->data->ipdata.type != HAMMER2_OBJTYPE_HARDLINK); - if (chain->data->ipdata.name_key & HAMMER2_DIRHASH_VISIBLE) { + KKASSERT((cluster->focus->flags & HAMMER2_CHAIN_DELETED) == 0); + ipdata = &hammer2_cluster_data(cluster)->ipdata; + KKASSERT(ipdata->type != HAMMER2_OBJTYPE_HARDLINK); + if (ipdata->name_key & HAMMER2_DIRHASH_VISIBLE) { /* - * We are going to duplicate chain later, causing its + * We are going to duplicate cluster later, causing its * media block to be shifted to the duplicate. Even though - * we are delete-duplicating nchain here it might decide not + * we are delete-duplicating ncluster here it might decide not * to reallocate the block. Set FORCECOW to force it to. */ - nchain = chain; - hammer2_chain_lock(nchain, HAMMER2_RESOLVE_ALWAYS); - atomic_set_int(&nchain->flags, HAMMER2_CHAIN_FORCECOW); - hammer2_chain_delete_duplicate(trans, &nchain, - HAMMER2_DELDUP_RECORE); - KKASSERT((chain->flags & HAMMER2_CHAIN_DUPLICATED) == 0); - - ipdata = &nchain->data->ipdata; + ncluster = cluster; + hammer2_cluster_lock(ncluster, HAMMER2_RESOLVE_ALWAYS); + hammer2_cluster_set_chainflags(ncluster, + HAMMER2_CHAIN_FORCECOW); + hammer2_cluster_delete_duplicate(trans, ncluster, + HAMMER2_DELDUP_RECORE); + KKASSERT((ncluster->focus->flags & + HAMMER2_CHAIN_DUPLICATED) == 0); + ipdata = &hammer2_cluster_data(ncluster)->ipdata; ipdata->target_type = ipdata->type; ipdata->type = HAMMER2_OBJTYPE_HARDLINK; ipdata->uflags = 0; @@ -1550,36 +1591,37 @@ hammer2_hardlink_consolidate(hammer2_trans_t *trans, bzero(&ipdata->u, sizeof(ipdata->u)); /* XXX transaction ids */ } else { - hammer2_chain_delete(trans, chain, 0); - nchain = NULL; + hammer2_cluster_delete(trans, cluster, 0); + ncluster = NULL; } /* - * chain represents the hardlink target and is now flagged deleted. + * cluster represents the hardlink target and is now flagged deleted. * duplicate it to the parent directory and adjust nlinks. * - * WARNING! The shiftup() call can cause nchain to be moved into - * an indirect block, and our nchain will wind up pointing + * WARNING! The shiftup() call can cause ncluster to be moved into + * an indirect block, and our ncluster will wind up pointing * to the older/original version. */ - KKASSERT(chain->flags & HAMMER2_CHAIN_DELETED); - hammer2_hardlink_shiftup(trans, &chain, cdip, cdchainp, nlinks, &error); + KKASSERT(cluster->focus->flags & HAMMER2_CHAIN_DELETED); + hammer2_hardlink_shiftup(trans, cluster, cdip, cdcluster, + nlinks, &error); if (error == 0) - hammer2_inode_repoint(ip, cdip, chain); + hammer2_inode_repoint(ip, cdip, cluster); /* - * Unlock the original chain last as the lock blocked races against + * Unlock the original cluster last as the lock blocked races against * the creation of the new hardlink target. */ - if (nchain) - hammer2_chain_unlock(nchain); + if (ncluster) + hammer2_cluster_unlock(ncluster); done: /* - * Cleanup, chain/nchain already dealt with. + * Cleanup, cluster/ncluster already dealt with. */ - *chainp = chain; + *clusterp = cluster; hammer2_inode_drop(cdip); return (error); @@ -1618,40 +1660,47 @@ hammer2_hardlink_deconsolidate(hammer2_trans_t *trans, * locked. */ int -hammer2_hardlink_find(hammer2_inode_t *dip, hammer2_chain_t **chainp, - hammer2_chain_t **ochainp) +hammer2_hardlink_find(hammer2_inode_t *dip, hammer2_cluster_t *cluster) { - hammer2_chain_t *chain = *chainp; - hammer2_chain_t *parent; + hammer2_inode_data_t *ipdata; + hammer2_cluster_t *cparent; + hammer2_cluster_t *rcluster; hammer2_inode_t *ip; hammer2_inode_t *pip; hammer2_key_t key_dummy; hammer2_key_t lhc; - int cache_index = -1; + int ddflag; pip = dip; hammer2_inode_ref(pip); /* for loop */ - hammer2_chain_ref(chain); /* for (*ochainp) */ - *ochainp = chain; /* - * Locate the hardlink. pip is referenced and not locked, - * ipp. - * - * chain is reused. + * Locate the hardlink. pip is referenced and not locked. */ - lhc = chain->data->ipdata.inum; - hammer2_chain_unlock(chain); - chain = NULL; + ipdata = &hammer2_cluster_data(cluster)->ipdata; + lhc = ipdata->inum; + + /* + * We don't need the cluster's chains, but we need to retain the + * cluster structure itself so we can load the hardlink search + * result into it. + */ + KKASSERT(cluster->refs == 1); + atomic_add_int(&cluster->refs, 1); + hammer2_cluster_unlock(cluster); /* hack */ + cluster->nchains = 0; /* hack */ + + rcluster = NULL; while ((ip = pip) != NULL) { - parent = hammer2_inode_lock_ex(ip); + cparent = hammer2_inode_lock_ex(ip); hammer2_inode_drop(ip); /* loop */ - KKASSERT(parent->bref.type == HAMMER2_BREF_TYPE_INODE); - chain = hammer2_chain_lookup(&parent, &key_dummy, - lhc, lhc, &cache_index, 0); - hammer2_chain_lookup_done(parent); /* discard parent */ - if (chain) + KKASSERT(hammer2_cluster_type(cparent) == + HAMMER2_BREF_TYPE_INODE); + rcluster = hammer2_cluster_lookup(cparent, &key_dummy, + lhc, lhc, 0, &ddflag); + hammer2_cluster_lookup_done(cparent); /* discard parent */ + if (rcluster) break; pip = ip->pip; /* safe, ip held locked */ if (pip) @@ -1663,14 +1712,14 @@ hammer2_hardlink_find(hammer2_inode_t *dip, hammer2_chain_t **chainp, * chain is locked, ip is locked. Unlock ip, return the locked * chain. *ipp is already set w/a ref count and not locked. * - * (parent is already unlocked). + * (cparent is already unlocked). */ if (ip) hammer2_inode_unlock_ex(ip, NULL); - *chainp = chain; - if (chain) { - KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_INODE); - /* already locked */ + + if (rcluster) { + hammer2_cluster_replace(cluster, rcluster); + hammer2_cluster_drop(rcluster); return (0); } else { return (EIO); @@ -1732,24 +1781,24 @@ hammer2_inode_common_parent(hammer2_inode_t *fdip, hammer2_inode_t *tdip) */ void hammer2_inode_fsync(hammer2_trans_t *trans, hammer2_inode_t *ip, - hammer2_chain_t **chainp) + hammer2_cluster_t *cparent) { hammer2_inode_data_t *ipdata; - hammer2_chain_t *parent; - hammer2_chain_t *chain; + hammer2_cluster_t *dparent; + hammer2_cluster_t *cluster; hammer2_key_t lbase; hammer2_key_t key_next; - int cache_index; + int ddflag; - ipdata = &ip->chain->data->ipdata; + ipdata = &hammer2_cluster_data(cparent)->ipdata; /* target file */ if (ip->flags & HAMMER2_INODE_MTIME) { - ipdata = hammer2_chain_modify_ip(trans, ip, chainp, 0); + ipdata = hammer2_cluster_modify_ip(trans, ip, cparent, 0); atomic_clear_int(&ip->flags, HAMMER2_INODE_MTIME); ipdata->mtime = ip->mtime; } if ((ip->flags & HAMMER2_INODE_RESIZED) && ip->size < ipdata->size) { - ipdata = hammer2_chain_modify_ip(trans, ip, chainp, 0); + ipdata = hammer2_cluster_modify_ip(trans, ip, cparent, 0); ipdata->size = ip->size; atomic_clear_int(&ip->flags, HAMMER2_INODE_RESIZED); @@ -1759,31 +1808,35 @@ hammer2_inode_fsync(hammer2_trans_t *trans, hammer2_inode_t *ip, */ lbase = (ipdata->size + HAMMER2_PBUFMASK64) & ~HAMMER2_PBUFMASK64; - parent = hammer2_chain_lookup_init(ip->chain, 0); - chain = hammer2_chain_lookup(&parent, &key_next, - lbase, (hammer2_key_t)-1, - &cache_index, - HAMMER2_LOOKUP_NODATA); - while (chain) { + dparent = hammer2_cluster_lookup_init(&ip->cluster, 0); + cluster = hammer2_cluster_lookup(dparent, &key_next, + lbase, (hammer2_key_t)-1, + HAMMER2_LOOKUP_NODATA, + &ddflag); + while (cluster) { /* * Degenerate embedded case, nothing to loop on */ - if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) { - hammer2_chain_unlock(chain); + switch (hammer2_cluster_type(cluster)) { + case HAMMER2_BREF_TYPE_INODE: + hammer2_cluster_unlock(cluster); + cluster = NULL; break; - } - if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) { - hammer2_chain_delete(trans, chain, 0); - } - chain = hammer2_chain_next(&parent, chain, &key_next, + case HAMMER2_BREF_TYPE_DATA: + hammer2_cluster_delete(trans, cluster, 0); + /* fall through */ + default: + cluster = hammer2_cluster_next(dparent, cluster, + &key_next, key_next, (hammer2_key_t)-1, - &cache_index, HAMMER2_LOOKUP_NODATA); + break; + } } - hammer2_chain_lookup_done(parent); + hammer2_cluster_lookup_done(dparent); } else if ((ip->flags & HAMMER2_INODE_RESIZED) && ip->size > ipdata->size) { - ipdata = hammer2_chain_modify_ip(trans, ip, chainp, 0); + ipdata = hammer2_cluster_modify_ip(trans, ip, cparent, 0); ipdata->size = ip->size; atomic_clear_int(&ip->flags, HAMMER2_INODE_RESIZED); diff --git a/sys/vfs/hammer2/hammer2_io.c b/sys/vfs/hammer2/hammer2_io.c index 1b52ed5e2f..b257aec220 100644 --- a/sys/vfs/hammer2/hammer2_io.c +++ b/sys/vfs/hammer2/hammer2_io.c @@ -451,9 +451,12 @@ hammer2_io_bread(hammer2_mount_t *hmp, off_t lbase, int lsize, void hammer2_io_breadcb(hammer2_mount_t *hmp, off_t lbase, int lsize, - void (*callback)(hammer2_io_t *dio, hammer2_chain_t *arg_c, + void (*callback)(hammer2_io_t *dio, + hammer2_cluster_t *arg_l, + hammer2_chain_t *arg_c, void *arg_p, off_t arg_o), - hammer2_chain_t *arg_c, void *arg_p, off_t arg_o) + hammer2_cluster_t *arg_l, hammer2_chain_t *arg_c, + void *arg_p, off_t arg_o) { hammer2_io_t *dio; int owner; @@ -462,6 +465,7 @@ hammer2_io_breadcb(hammer2_mount_t *hmp, off_t lbase, int lsize, dio = hammer2_io_getblk(hmp, lbase, lsize, &owner); if (owner) { dio->callback = callback; + dio->arg_l = arg_l; dio->arg_c = arg_c; dio->arg_p = arg_p; dio->arg_o = arg_o; @@ -469,7 +473,7 @@ hammer2_io_breadcb(hammer2_mount_t *hmp, off_t lbase, int lsize, hammer2_io_callback, dio); } else { error = 0; - callback(dio, arg_c, arg_p, arg_o); + callback(dio, arg_l, arg_c, arg_p, arg_o); hammer2_io_bqrelse(&dio); } } @@ -491,7 +495,7 @@ hammer2_io_callback(struct bio *bio) * We still have the ref and DIO_GOOD is now set so nothing else * should mess with the callback fields until we release the dio. */ - dio->callback(dio, dio->arg_c, dio->arg_p, dio->arg_o); + dio->callback(dio, dio->arg_l, dio->arg_c, dio->arg_p, dio->arg_o); hammer2_io_bqrelse(&dio); /* TODO: async load meta-data and assign chain->dio */ } diff --git a/sys/vfs/hammer2/hammer2_ioctl.c b/sys/vfs/hammer2/hammer2_ioctl.c index 05bedfcfb0..0a5c9bc71c 100644 --- a/sys/vfs/hammer2/hammer2_ioctl.c +++ b/sys/vfs/hammer2/hammer2_ioctl.c @@ -154,7 +154,7 @@ hammer2_ioctl(hammer2_inode_t *ip, u_long com, void *data, int fflag, static int hammer2_ioctl_version_get(hammer2_inode_t *ip, void *data) { - hammer2_mount_t *hmp = ip->pmp->cluster.chains[0]->hmp; + hammer2_mount_t *hmp = ip->pmp->cluster.focus->hmp; hammer2_ioc_version_t *version = data; version->version = hmp->voldata.version; @@ -183,7 +183,7 @@ hammer2_ioctl_recluster(hammer2_inode_t *ip, void *data) static int hammer2_ioctl_remote_scan(hammer2_inode_t *ip, void *data) { - hammer2_mount_t *hmp = ip->pmp->cluster.chains[0]->hmp; + hammer2_mount_t *hmp = ip->pmp->cluster.focus->hmp; hammer2_ioc_remote_t *remote = data; int copyid = remote->copyid; @@ -224,7 +224,7 @@ hammer2_ioctl_remote_add(hammer2_inode_t *ip, void *data) if (copyid >= HAMMER2_COPYID_COUNT) return (EINVAL); - hmp = pmp->cluster.chains[0]->hmp; /* XXX */ + hmp = pmp->cluster.focus->hmp; /* XXX */ hammer2_voldata_lock(hmp); if (copyid < 0) { for (copyid = 1; copyid < HAMMER2_COPYID_COUNT; ++copyid) { @@ -257,7 +257,7 @@ hammer2_ioctl_remote_del(hammer2_inode_t *ip, void *data) int copyid = remote->copyid; int error = 0; - hmp = pmp->cluster.chains[0]->hmp; /* XXX */ + hmp = pmp->cluster.focus->hmp; /* XXX */ if (copyid >= HAMMER2_COPYID_COUNT) return (EINVAL); remote->copy1.path[sizeof(remote->copy1.path) - 1] = 0; @@ -294,7 +294,7 @@ hammer2_ioctl_remote_rep(hammer2_inode_t *ip, void *data) hammer2_mount_t *hmp; int copyid = remote->copyid; - hmp = ip->pmp->cluster.chains[0]->hmp; /* XXX */ + hmp = ip->pmp->cluster.focus->hmp; /* XXX */ if (copyid < 0 || copyid >= HAMMER2_COPYID_COUNT) return (EINVAL); @@ -325,7 +325,7 @@ hammer2_ioctl_socket_set(hammer2_inode_t *ip, void *data) hammer2_mount_t *hmp; int copyid = remote->copyid; - hmp = ip->pmp->cluster.chains[0]->hmp; /* XXX */ + hmp = ip->pmp->cluster.focus->hmp; /* XXX */ if (copyid < 0 || copyid >= HAMMER2_COPYID_COUNT) return (EINVAL); @@ -352,49 +352,52 @@ hammer2_ioctl_pfs_get(hammer2_inode_t *ip, void *data) hammer2_inode_data_t *ipdata; hammer2_mount_t *hmp; hammer2_ioc_pfs_t *pfs; - hammer2_chain_t *parent; - hammer2_chain_t *chain; - hammer2_chain_t *rchain; + hammer2_cluster_t *cparent; + hammer2_cluster_t *rcluster; + hammer2_cluster_t *cluster; hammer2_key_t key_next; int error; - int cache_index = -1; + int ddflag; error = 0; - hmp = ip->pmp->cluster.chains[0]->hmp; /* XXX */ + hmp = ip->pmp->cluster.focus->hmp; /* XXX */ pfs = data; - parent = hammer2_inode_lock_ex(hmp->sroot); - rchain = hammer2_inode_lock_ex(ip->pmp->iroot); + cparent = hammer2_inode_lock_ex(hmp->sroot); + rcluster = hammer2_inode_lock_ex(ip->pmp->iroot); /* * Search for the first key or specific key. Remember that keys * can be returned in any order. */ if (pfs->name_key == 0) { - chain = hammer2_chain_lookup(&parent, &key_next, - 0, (hammer2_key_t)-1, - &cache_index, 0); + cluster = hammer2_cluster_lookup(cparent, &key_next, + 0, (hammer2_key_t)-1, + 0, &ddflag); } else if (pfs->name_key == (hammer2_key_t)-1) { - chain = hammer2_chain_lookup(&parent, &key_next, - rchain->data->ipdata.name_key, - rchain->data->ipdata.name_key, - &cache_index, 0); + ipdata = &hammer2_cluster_data(rcluster)->ipdata; + cluster = hammer2_cluster_lookup(cparent, &key_next, + ipdata->name_key, + ipdata->name_key, + 0, &ddflag); + ipdata = NULL; /* safety */ } else { - chain = hammer2_chain_lookup(&parent, &key_next, - pfs->name_key, pfs->name_key, - &cache_index, 0); + cluster = hammer2_cluster_lookup(cparent, &key_next, + pfs->name_key, pfs->name_key, + 0, &ddflag); } - hammer2_inode_unlock_ex(ip->pmp->iroot, rchain); + hammer2_inode_unlock_ex(ip->pmp->iroot, rcluster); - while (chain && chain->bref.type != HAMMER2_BREF_TYPE_INODE) { - chain = hammer2_chain_next(&parent, chain, &key_next, - key_next, (hammer2_key_t)-1, - &cache_index, 0); + while (cluster && + hammer2_cluster_type(cluster) != HAMMER2_BREF_TYPE_INODE) { + cluster = hammer2_cluster_next(cparent, cluster, &key_next, + key_next, (hammer2_key_t)-1, + 0); } - if (chain) { + if (cluster) { /* * Load the data being returned by the ioctl. */ - ipdata = &chain->data->ipdata; + ipdata = &hammer2_cluster_data(cluster)->ipdata; pfs->name_key = ipdata->name_key; pfs->pfs_type = ipdata->pfs_type; pfs->pfs_clid = ipdata->pfs_clid; @@ -408,13 +411,17 @@ hammer2_ioctl_pfs_get(hammer2_inode_t *ip, void *data) * Calculate the next field */ do { - chain = hammer2_chain_next(&parent, chain, &key_next, - 0, (hammer2_key_t)-1, - &cache_index, 0); - } while (chain && chain->bref.type != HAMMER2_BREF_TYPE_INODE); - if (chain) { - pfs->name_next = chain->data->ipdata.name_key; - hammer2_chain_unlock(chain); + cluster = hammer2_cluster_next(cparent, cluster, + &key_next, + 0, (hammer2_key_t)-1, + 0); + } while (cluster && + hammer2_cluster_type(cluster) != + HAMMER2_BREF_TYPE_INODE); + if (cluster) { + ipdata = &hammer2_cluster_data(cluster)->ipdata; + pfs->name_next = ipdata->name_key; + hammer2_cluster_unlock(cluster); } else { pfs->name_next = (hammer2_key_t)-1; } @@ -422,7 +429,7 @@ hammer2_ioctl_pfs_get(hammer2_inode_t *ip, void *data) pfs->name_next = (hammer2_key_t)-1; error = ENOENT; } - hammer2_inode_unlock_ex(hmp->sroot, parent); + hammer2_inode_unlock_ex(hmp->sroot, cparent); return (error); } @@ -436,54 +443,57 @@ hammer2_ioctl_pfs_lookup(hammer2_inode_t *ip, void *data) hammer2_inode_data_t *ipdata; hammer2_mount_t *hmp; hammer2_ioc_pfs_t *pfs; - hammer2_chain_t *parent; - hammer2_chain_t *chain; + hammer2_cluster_t *cparent; + hammer2_cluster_t *cluster; hammer2_key_t key_next; hammer2_key_t lhc; int error; - int cache_index = -1; + int ddflag; size_t len; error = 0; - hmp = ip->pmp->cluster.chains[0]->hmp; /* XXX */ + hmp = ip->pmp->cluster.focus->hmp; /* XXX */ pfs = data; - parent = hammer2_inode_lock_sh(hmp->sroot); + cparent = hammer2_inode_lock_sh(hmp->sroot); pfs->name[sizeof(pfs->name) - 1] = 0; len = strlen(pfs->name); lhc = hammer2_dirhash(pfs->name, len); - chain = hammer2_chain_lookup(&parent, &key_next, - lhc, lhc + HAMMER2_DIRHASH_LOMASK, - &cache_index, HAMMER2_LOOKUP_SHARED); - while (chain) { - if (chain->bref.type == HAMMER2_BREF_TYPE_INODE && - len == chain->data->ipdata.name_len && - bcmp(pfs->name, chain->data->ipdata.filename, len) == 0) { - break; + cluster = hammer2_cluster_lookup(cparent, &key_next, + lhc, lhc + HAMMER2_DIRHASH_LOMASK, + HAMMER2_LOOKUP_SHARED, &ddflag); + while (cluster) { + if (hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE) { + ipdata = &hammer2_cluster_data(cluster)->ipdata; + if (ipdata->name_len == len && + bcmp(ipdata->filename, pfs->name, len) == 0) { + break; + } + ipdata = NULL; /* safety */ } - chain = hammer2_chain_next(&parent, chain, &key_next, + cluster = hammer2_cluster_next(cparent, cluster, &key_next, key_next, lhc + HAMMER2_DIRHASH_LOMASK, - &cache_index, HAMMER2_LOOKUP_SHARED); + HAMMER2_LOOKUP_SHARED); } /* * Load the data being returned by the ioctl. */ - if (chain) { - ipdata = &chain->data->ipdata; + if (cluster) { + ipdata = &hammer2_cluster_data(cluster)->ipdata; pfs->name_key = ipdata->name_key; pfs->pfs_type = ipdata->pfs_type; pfs->pfs_clid = ipdata->pfs_clid; pfs->pfs_fsid = ipdata->pfs_fsid; ipdata = NULL; - hammer2_chain_unlock(chain); + hammer2_cluster_unlock(cluster); } else { error = ENOENT; } - hammer2_inode_unlock_sh(hmp->sroot, parent); + hammer2_inode_unlock_sh(hmp->sroot, cparent); return (error); } @@ -498,11 +508,11 @@ hammer2_ioctl_pfs_create(hammer2_inode_t *ip, void *data) hammer2_mount_t *hmp; hammer2_ioc_pfs_t *pfs; hammer2_inode_t *nip; - hammer2_chain_t *nchain; + hammer2_cluster_t *ncluster; hammer2_trans_t trans; int error; - hmp = ip->pmp->cluster.chains[0]->hmp; /* XXX */ + hmp = ip->pmp->cluster.focus->hmp; /* XXX */ pfs = data; nip = NULL; @@ -513,9 +523,9 @@ hammer2_ioctl_pfs_create(hammer2_inode_t *ip, void *data) hammer2_trans_init(&trans, ip->pmp, NULL, HAMMER2_TRANS_NEWINODE); nip = hammer2_inode_create(&trans, hmp->sroot, NULL, NULL, pfs->name, strlen(pfs->name), - &nchain, &error); + &ncluster, &error); if (error == 0) { - nipdata = hammer2_chain_modify_ip(&trans, nip, &nchain, + nipdata = hammer2_cluster_modify_ip(&trans, nip, ncluster, HAMMER2_MODIFY_ASSERTNOCOPY); nipdata->pfs_type = pfs->pfs_type; nipdata->pfs_clid = pfs->pfs_clid; @@ -527,9 +537,10 @@ hammer2_ioctl_pfs_create(hammer2_inode_t *ip, void *data) */ if (strcmp(pfs->name, "boot") == 0) nipdata->comp_algo = HAMMER2_COMP_AUTOZERO; - hammer2_inode_unlock_ex(nip, nchain); + hammer2_inode_unlock_ex(nip, ncluster); } hammer2_trans_done(&trans); + return (error); } @@ -544,7 +555,7 @@ hammer2_ioctl_pfs_delete(hammer2_inode_t *ip, void *data) hammer2_trans_t trans; int error; - hmp = ip->pmp->cluster.chains[0]->hmp; /* XXX */ + hmp = ip->pmp->cluster.focus->hmp; /* XXX */ hammer2_trans_init(&trans, ip->pmp, NULL, 0); error = hammer2_unlink_file(&trans, hmp->sroot, pfs->name, strlen(pfs->name), @@ -559,7 +570,7 @@ hammer2_ioctl_pfs_snapshot(hammer2_inode_t *ip, void *data) { hammer2_ioc_pfs_t *pfs = data; hammer2_trans_t trans; - hammer2_chain_t *parent; + hammer2_cluster_t *cparent; int error; if (pfs->name[0] == 0) @@ -570,9 +581,9 @@ hammer2_ioctl_pfs_snapshot(hammer2_inode_t *ip, void *data) hammer2_vfs_sync(ip->pmp->mp, MNT_WAIT); hammer2_trans_init(&trans, ip->pmp, NULL, HAMMER2_TRANS_NEWINODE); - parent = hammer2_inode_lock_ex(ip); - error = hammer2_chain_snapshot(&trans, &parent, pfs); - hammer2_inode_unlock_ex(ip, parent); + cparent = hammer2_inode_lock_ex(ip); + error = hammer2_cluster_snapshot(&trans, cparent, pfs); + hammer2_inode_unlock_ex(ip, cparent); hammer2_trans_done(&trans); return (error); @@ -585,12 +596,14 @@ static int hammer2_ioctl_inode_get(hammer2_inode_t *ip, void *data) { hammer2_ioc_inode_t *ino = data; - hammer2_chain_t *parent; + hammer2_inode_data_t *ipdata; + hammer2_cluster_t *cparent; - parent = hammer2_inode_lock_sh(ip); - ino->ip_data = ip->chain->data->ipdata; + cparent = hammer2_inode_lock_sh(ip); + ipdata = &hammer2_cluster_data(cparent)->ipdata; + ino->ip_data = *ipdata; ino->kdata = ip; - hammer2_inode_unlock_sh(ip, parent); + hammer2_inode_unlock_sh(ip, cparent); return (0); } @@ -604,15 +617,16 @@ hammer2_ioctl_inode_set(hammer2_inode_t *ip, void *data) { hammer2_inode_data_t *ipdata; hammer2_ioc_inode_t *ino = data; - hammer2_chain_t *chain; + hammer2_cluster_t *cparent; hammer2_trans_t trans; int error = 0; hammer2_trans_init(&trans, ip->pmp, NULL, 0); - chain = hammer2_inode_lock_ex(ip); + cparent = hammer2_inode_lock_ex(ip); + ipdata = &hammer2_cluster_data(cparent)->ipdata; - if (ino->ip_data.comp_algo != chain->data->ipdata.comp_algo) { - ipdata = hammer2_chain_modify_ip(&trans, ip, &chain, 0); + if (ino->ip_data.comp_algo != ipdata->comp_algo) { + ipdata = hammer2_cluster_modify_ip(&trans, ip, cparent, 0); ipdata->comp_algo = ino->ip_data.comp_algo; } ino->kdata = ip; @@ -625,7 +639,7 @@ hammer2_ioctl_inode_set(hammer2_inode_t *ip, void *data) if (ino->flags & HAMMER2IOC_INODE_FLAG_COPIES) { } hammer2_trans_done(&trans); - hammer2_inode_unlock_ex(ip, chain); + hammer2_inode_unlock_ex(ip, cparent); return (error); } diff --git a/sys/vfs/hammer2/hammer2_subr.c b/sys/vfs/hammer2/hammer2_subr.c index 5c934a45c7..0cb517a631 100644 --- a/sys/vfs/hammer2/hammer2_subr.c +++ b/sys/vfs/hammer2/hammer2_subr.c @@ -87,14 +87,12 @@ hammer2_voldata_unlock(hammer2_mount_t *hmp, int modify) * ip must be locked sh/ex. */ int -hammer2_get_dtype(hammer2_chain_t *chain) +hammer2_get_dtype(hammer2_inode_data_t *ipdata) { uint8_t type; - KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_INODE); - - if ((type = chain->data->ipdata.type) == HAMMER2_OBJTYPE_HARDLINK) - type = chain->data->ipdata.target_type; + if ((type = ipdata->type) == HAMMER2_OBJTYPE_HARDLINK) + type = ipdata->target_type; switch(type) { case HAMMER2_OBJTYPE_UNKNOWN: @@ -127,11 +125,9 @@ hammer2_get_dtype(hammer2_chain_t *chain) * Return the directory entry type for an inode */ int -hammer2_get_vtype(hammer2_chain_t *chain) +hammer2_get_vtype(hammer2_inode_data_t *ipdata) { - KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_INODE); - - switch(chain->data->ipdata.type) { + switch(ipdata->type) { case HAMMER2_OBJTYPE_UNKNOWN: return (VBAD); case HAMMER2_OBJTYPE_DIRECTORY: @@ -387,18 +383,19 @@ hammer2_calc_logical(hammer2_inode_t *ip, hammer2_off_t uoff, * Returns 0 if the requested base offset is beyond the file EOF. */ int -hammer2_calc_physical(hammer2_inode_t *ip, hammer2_key_t lbase) +hammer2_calc_physical(hammer2_inode_t *ip, hammer2_inode_data_t *ipdata, + hammer2_key_t lbase) { int lblksize; int pblksize; int eofbytes; lblksize = hammer2_calc_logical(ip, lbase, NULL, NULL); - if (lbase + lblksize <= ip->chain->data->ipdata.size) + if (lbase + lblksize <= ipdata->size) return (lblksize); - if (lbase >= ip->chain->data->ipdata.size) + if (lbase >= ipdata->size) return (0); - eofbytes = (int)(ip->chain->data->ipdata.size - lbase); + eofbytes = (int)(ipdata->size - lbase); pblksize = lblksize; while (pblksize >= eofbytes && pblksize >= HAMMER2_MIN_ALLOC) pblksize >>= 1; @@ -415,3 +412,29 @@ hammer2_update_time(uint64_t *timep) getmicrotime(&tv); *timep = (unsigned long)tv.tv_sec * 1000000 + tv.tv_usec; } + +void +hammer2_adjreadcounter(hammer2_blockref_t *bref, size_t bytes) +{ + long *counterp; + + switch(bref->type) { + case HAMMER2_BREF_TYPE_DATA: + counterp = &hammer2_iod_file_read; + break; + case HAMMER2_BREF_TYPE_INODE: + counterp = &hammer2_iod_meta_read; + break; + case HAMMER2_BREF_TYPE_INDIRECT: + counterp = &hammer2_iod_indr_read; + break; + case HAMMER2_BREF_TYPE_FREEMAP_NODE: + case HAMMER2_BREF_TYPE_FREEMAP_LEAF: + counterp = &hammer2_iod_fmap_read; + break; + default: + counterp = &hammer2_iod_volu_read; + break; + } + *counterp += bytes; +} diff --git a/sys/vfs/hammer2/hammer2_vfsops.c b/sys/vfs/hammer2/hammer2_vfsops.c index 4edc56975d..219087316f 100644 --- a/sys/vfs/hammer2/hammer2_vfsops.c +++ b/sys/vfs/hammer2/hammer2_vfsops.c @@ -206,29 +206,29 @@ static void hammer2_vfs_unmount_hmp2(struct mount *mp, hammer2_mount_t *hmp); static void hammer2_write_file_core(struct buf *bp, hammer2_trans_t *trans, hammer2_inode_t *ip, hammer2_inode_data_t *ipdata, - hammer2_chain_t **parentp, + hammer2_cluster_t *cparent, hammer2_key_t lbase, int ioflag, int pblksize, int *errorp); static void hammer2_compress_and_write(struct buf *bp, hammer2_trans_t *trans, hammer2_inode_t *ip, hammer2_inode_data_t *ipdata, - hammer2_chain_t **parentp, + hammer2_cluster_t *cparent, hammer2_key_t lbase, int ioflag, int pblksize, int *errorp, int comp_algo); static void hammer2_zero_check_and_write(struct buf *bp, hammer2_trans_t *trans, hammer2_inode_t *ip, hammer2_inode_data_t *ipdata, - hammer2_chain_t **parentp, + hammer2_cluster_t *cparent, hammer2_key_t lbase, int ioflag, int pblksize, int *errorp); static int test_block_zeros(const char *buf, size_t bytes); static void zero_write(struct buf *bp, hammer2_trans_t *trans, hammer2_inode_t *ip, hammer2_inode_data_t *ipdata, - hammer2_chain_t **parentp, + hammer2_cluster_t *cparent, hammer2_key_t lbase, int *errorp); -static void hammer2_write_bp(hammer2_chain_t *chain, struct buf *bp, +static void hammer2_write_bp(hammer2_cluster_t *cluster, struct buf *bp, int ioflag, int pblksize, int *errorp); static int hammer2_rcvdmsg(kdmsg_msg_t *msg); @@ -343,8 +343,10 @@ hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data, struct vnode *devvp; struct nlookupdata nd; hammer2_chain_t *parent; - hammer2_chain_t *schain; hammer2_chain_t *rchain; + hammer2_chain_t *schain; + hammer2_cluster_t *cluster; + hammer2_cluster_t *cparent; struct file *fp; char devstr[MNAMELEN]; size_t size; @@ -354,6 +356,7 @@ hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data, int ronly = 1; int error; int cache_index; + int ddflag; int i; hmp = NULL; @@ -401,7 +404,7 @@ hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data, /* HAMMER2 implements NFS export via mountctl */ pmp = MPTOPMP(mp); for (i = 0; i < pmp->cluster.nchains; ++i) { - hmp = pmp->cluster.chains[i]->hmp; + hmp = pmp->cluster.array[i]->hmp; devvp = hmp->devvp; error = hammer2_remount(hmp, mp, path, devvp, cred); @@ -556,7 +559,7 @@ hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data, parent = hammer2_chain_lookup_init(&hmp->vchain, 0); schain = hammer2_chain_lookup(&parent, &key_dummy, HAMMER2_SROOT_KEY, HAMMER2_SROOT_KEY, - &cache_index, 0); + &cache_index, 0, &ddflag); hammer2_chain_lookup_done(parent); if (schain == NULL) { kprintf("hammer2_mount: invalid super-root\n"); @@ -571,9 +574,10 @@ hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data, * NOTE: inode_get sucks up schain's lock. */ atomic_set_int(&schain->flags, HAMMER2_CHAIN_PFSROOT); - hmp->sroot = hammer2_inode_get(NULL, NULL, schain); + cluster = hammer2_cluster_from_chain(schain); + hmp->sroot = hammer2_inode_get(NULL, NULL, cluster); hammer2_inode_ref(hmp->sroot); - hammer2_inode_unlock_ex(hmp->sroot, schain); + hammer2_inode_unlock_ex(hmp->sroot, cluster); schain = NULL; /* leave hmp->sroot with one ref */ @@ -598,6 +602,7 @@ hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data, RB_INIT(&pmp->inum_tree); TAILQ_INIT(&pmp->unlinkq); spin_init(&pmp->unlinkq_spin); + pmp->cluster.flags = HAMMER2_CLUSTER_PFS; kdmsg_iocom_init(&pmp->iocom, pmp, KDMSG_IOCOMF_AUTOCONN | @@ -634,67 +639,69 @@ hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data, /* * Lookup mount point under the media-localized super-root. */ - parent = hammer2_inode_lock_ex(hmp->sroot); + cparent = hammer2_inode_lock_ex(hmp->sroot); lhc = hammer2_dirhash(label, strlen(label)); - rchain = hammer2_chain_lookup(&parent, &key_next, + cluster = hammer2_cluster_lookup(cparent, &key_next, lhc, lhc + HAMMER2_DIRHASH_LOMASK, - &cache_index, 0); - while (rchain) { - if (rchain->bref.type == HAMMER2_BREF_TYPE_INODE && - strcmp(label, rchain->data->ipdata.filename) == 0) { + 0, &ddflag); + while (cluster) { + if (hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE && + strcmp(label, + hammer2_cluster_data(cluster)->ipdata.filename) == 0) { break; } - rchain = hammer2_chain_next(&parent, rchain, &key_next, + cluster = hammer2_cluster_next(cparent, cluster, &key_next, key_next, - lhc + HAMMER2_DIRHASH_LOMASK, - &cache_index, 0); + lhc + HAMMER2_DIRHASH_LOMASK, 0); } - hammer2_inode_unlock_ex(hmp->sroot, parent); + hammer2_inode_unlock_ex(hmp->sroot, cparent); - if (rchain == NULL) { + if (cluster == NULL) { kprintf("hammer2_mount: PFS label not found\n"); hammer2_vfs_unmount_hmp1(mp, hmp); hammer2_vfs_unmount_hmp2(mp, hmp); hammer2_vfs_unmount(mp, MNT_FORCE); return EINVAL; } - if (rchain->flags & HAMMER2_CHAIN_MOUNTED) { - hammer2_chain_unlock(rchain); - kprintf("hammer2_mount: PFS label already mounted!\n"); - hammer2_vfs_unmount_hmp1(mp, hmp); - hammer2_vfs_unmount_hmp2(mp, hmp); - hammer2_vfs_unmount(mp, MNT_FORCE); - return EBUSY; - } + + for (i = 0; i < cluster->nchains; ++i) { + rchain = cluster->array[i]; + if (rchain->flags & HAMMER2_CHAIN_MOUNTED) { + kprintf("hammer2_mount: PFS label already mounted!\n"); + hammer2_cluster_unlock(cluster); + hammer2_vfs_unmount_hmp1(mp, hmp); + hammer2_vfs_unmount_hmp2(mp, hmp); + hammer2_vfs_unmount(mp, MNT_FORCE); + return EBUSY; + } #if 0 - if (rchain->flags & HAMMER2_CHAIN_RECYCLE) { - kprintf("hammer2_mount: PFS label currently recycling\n"); - hammer2_vfs_unmount_hmp1(mp, hmp); - hammer2_vfs_unmount_hmp2(mp, hmp); - hammer2_vfs_unmount(mp, MNT_FORCE); - return EBUSY; - } + if (rchain->flags & HAMMER2_CHAIN_RECYCLE) { + kprintf("hammer2_mount: PFS label is recycling\n"); + hammer2_cluster_unlock(cluster); + hammer2_vfs_unmount_hmp1(mp, hmp); + hammer2_vfs_unmount_hmp2(mp, hmp); + hammer2_vfs_unmount(mp, MNT_FORCE); + return EBUSY; + } #endif + } + /* * After this point hammer2_vfs_unmount() has visibility on hmp * and manual hmp1/hmp2 calls are not needed on fatal errors. */ - - atomic_set_int(&rchain->flags, HAMMER2_CHAIN_MOUNTED); - - /* - * NOTE: *_get() integrates chain's lock into the inode lock. - */ - hammer2_chain_ref(rchain); /* for pmp->rchain */ - pmp->cluster.nchains = 1; - pmp->cluster.chains[0] = rchain; - pmp->iroot = hammer2_inode_get(pmp, NULL, rchain); + pmp->cluster = *cluster; + KKASSERT(pmp->cluster.refs == 1); + for (i = 0; i < cluster->nchains; ++i) { + rchain = cluster->array[i]; + KKASSERT(rchain->pmp == NULL); /* tracking pmp for rchain */ + rchain->pmp = pmp; + atomic_set_int(&rchain->flags, HAMMER2_CHAIN_MOUNTED); + hammer2_chain_ref(rchain); /* ref for pmp->cluster */ + } + pmp->iroot = hammer2_inode_get(pmp, NULL, cluster); hammer2_inode_ref(pmp->iroot); /* ref for pmp->iroot */ - - KKASSERT(rchain->pmp == NULL); /* tracking pmp for rchain */ - rchain->pmp = pmp; - - hammer2_inode_unlock_ex(pmp->iroot, rchain); + hammer2_inode_unlock_ex(pmp->iroot, cluster); kprintf("iroot %p\n", pmp->iroot); @@ -761,8 +768,7 @@ hammer2_write_thread(void *arg) hammer2_trans_t trans; struct vnode *vp; hammer2_inode_t *ip; - hammer2_chain_t *parent; - hammer2_chain_t **parentp; + hammer2_cluster_t *cparent; hammer2_inode_data_t *ipdata; hammer2_key_t lbase; int lblksize; @@ -777,8 +783,7 @@ hammer2_write_thread(void *arg) mtxsleep(&pmp->wthread_bioq, &pmp->wthread_mtx, 0, "h2bioqw", 0); } - parent = NULL; - parentp = &parent; + cparent = NULL; hammer2_trans_init(&trans, pmp, NULL, HAMMER2_TRANS_BUFCACHE); @@ -817,21 +822,21 @@ hammer2_write_thread(void *arg) * inode's meta-data state, it doesn't try * to flush underlying buffers or chains. */ - parent = hammer2_inode_lock_ex(ip); + cparent = hammer2_inode_lock_ex(ip); if (ip->flags & (HAMMER2_INODE_RESIZED | HAMMER2_INODE_MTIME)) { - hammer2_inode_fsync(&trans, ip, parentp); + hammer2_inode_fsync(&trans, ip, cparent); } - ipdata = hammer2_chain_modify_ip(&trans, ip, - parentp, 0); + ipdata = hammer2_cluster_modify_ip(&trans, ip, + cparent, 0); lblksize = hammer2_calc_logical(ip, bio->bio_offset, &lbase, NULL); - pblksize = hammer2_calc_physical(ip, lbase); + pblksize = hammer2_calc_physical(ip, ipdata, lbase); hammer2_write_file_core(bp, &trans, ip, ipdata, - parentp, + cparent, lbase, IO_ASYNC, pblksize, &error); - hammer2_inode_unlock_ex(ip, parent); + hammer2_inode_unlock_ex(ip, cparent); if (error) { kprintf("hammer2: error in buffer write\n"); bp->b_flags |= B_ERROR; @@ -869,17 +874,15 @@ hammer2_bioq_sync(hammer2_pfsmount_t *pmp) * and assigning its physical block. */ static -hammer2_chain_t * +hammer2_cluster_t * hammer2_assign_physical(hammer2_trans_t *trans, - hammer2_inode_t *ip, hammer2_chain_t **parentp, + hammer2_inode_t *ip, hammer2_cluster_t *cparent, hammer2_key_t lbase, int pblksize, int *errorp) { - hammer2_chain_t *parent; - hammer2_chain_t *chain; - hammer2_off_t pbase; + hammer2_cluster_t *cluster; hammer2_key_t key_dummy; int pradix = hammer2_getradix(pblksize); - int cache_index = -1; + int ddflag; /* * Locate the chain associated with lbase, return a locked chain. @@ -890,34 +893,31 @@ hammer2_assign_physical(hammer2_trans_t *trans, *errorp = 0; KKASSERT(pblksize >= HAMMER2_MIN_ALLOC); retry: - parent = *parentp; - hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS); /* extra lock */ - chain = hammer2_chain_lookup(&parent, &key_dummy, + hammer2_cluster_lock(cparent, HAMMER2_RESOLVE_ALWAYS); /* extra lock */ + cluster = hammer2_cluster_lookup(cparent, &key_dummy, lbase, lbase, - &cache_index, HAMMER2_LOOKUP_NODATA); + HAMMER2_LOOKUP_NODATA, &ddflag); - if (chain == NULL) { + if (cluster == NULL) { /* * We found a hole, create a new chain entry. * * NOTE: DATA chains are created without device backing * store (nor do we want any). */ - *errorp = hammer2_chain_create(trans, &parent, &chain, + *errorp = hammer2_cluster_create(trans, cparent, &cluster, lbase, HAMMER2_PBUFRADIX, HAMMER2_BREF_TYPE_DATA, pblksize); - if (chain == NULL) { - hammer2_chain_lookup_done(parent); - panic("hammer2_chain_create: par=%p error=%d\n", - parent, *errorp); + if (cluster == NULL) { + hammer2_cluster_lookup_done(cparent); + panic("hammer2_cluster_create: par=%p error=%d\n", + cparent->focus, *errorp); goto retry; } - - pbase = chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX; /*ip->delta_dcount += pblksize;*/ } else { - switch (chain->bref.type) { + switch (hammer2_cluster_type(cluster)) { case HAMMER2_BREF_TYPE_INODE: /* * The data is embedded in the inode. The @@ -925,43 +925,35 @@ retry: * modified and copying the data to the embedded * area. */ - pbase = NOOFFSET; break; case HAMMER2_BREF_TYPE_DATA: - if (chain->bytes != pblksize) { - hammer2_chain_resize(trans, ip, - parent, &chain, + if (hammer2_cluster_bytes(cluster) != pblksize) { + hammer2_cluster_resize(trans, ip, + cparent, cluster, pradix, HAMMER2_MODIFY_OPTDATA); } - hammer2_chain_modify(trans, &chain, + hammer2_cluster_modify(trans, cluster, HAMMER2_MODIFY_OPTDATA); - pbase = chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX; break; default: panic("hammer2_assign_physical: bad type"); /* NOT REACHED */ - pbase = NOOFFSET; break; } } /* * Cleanup. If chain wound up being the inode (i.e. DIRECTDATA), - * we might have to replace *parentp. + * we need to update cparent. The caller expects cparent to not + * become stale. */ - hammer2_chain_lookup_done(parent); - if (chain) { - if (*parentp != chain && - (*parentp)->core == chain->core) { - parent = *parentp; - *parentp = chain; /* eats lock */ - hammer2_chain_unlock(parent); - hammer2_chain_lock(chain, 0); /* need another */ - } - /* else chain already locked for return */ + hammer2_cluster_lookup_done(cparent); + if (cluster && ddflag) { + kprintf("replace parent XXX\n"); + hammer2_cluster_replace_locked(cparent, cluster); } - return (chain); + return (cluster); } /* @@ -973,11 +965,11 @@ static void hammer2_write_file_core(struct buf *bp, hammer2_trans_t *trans, hammer2_inode_t *ip, hammer2_inode_data_t *ipdata, - hammer2_chain_t **parentp, + hammer2_cluster_t *cparent, hammer2_key_t lbase, int ioflag, int pblksize, int *errorp) { - hammer2_chain_t *chain; + hammer2_cluster_t *cluster; switch(HAMMER2_DEC_COMP(ipdata->comp_algo)) { case HAMMER2_COMP_NONE: @@ -989,19 +981,19 @@ hammer2_write_file_core(struct buf *bp, hammer2_trans_t *trans, * This can return NOOFFSET for inode-embedded data. * The strategy code will take care of it in that case. */ - chain = hammer2_assign_physical(trans, ip, parentp, + cluster = hammer2_assign_physical(trans, ip, cparent, lbase, pblksize, errorp); - hammer2_write_bp(chain, bp, ioflag, pblksize, errorp); - if (chain) - hammer2_chain_unlock(chain); + hammer2_write_bp(cluster, bp, ioflag, pblksize, errorp); + if (cluster) + hammer2_cluster_unlock(cluster); break; case HAMMER2_COMP_AUTOZERO: /* * Check for zero-fill only */ hammer2_zero_check_and_write(bp, trans, ip, - ipdata, parentp, lbase, + ipdata, cparent, lbase, ioflag, pblksize, errorp); break; case HAMMER2_COMP_LZ4: @@ -1011,17 +1003,15 @@ hammer2_write_file_core(struct buf *bp, hammer2_trans_t *trans, * Check for zero-fill and attempt compression. */ hammer2_compress_and_write(bp, trans, ip, - ipdata, parentp, + ipdata, cparent, lbase, ioflag, pblksize, errorp, ipdata->comp_algo); break; } - /* ipdata = &ip->chain->data->ipdata; reload (not needed here) */ } /* - * From hammer2_vnops.c * Generic function that will perform the compression in compression * write path. The compression algorithm is determined by the settings * obtained from inode. @@ -1030,17 +1020,19 @@ static void hammer2_compress_and_write(struct buf *bp, hammer2_trans_t *trans, hammer2_inode_t *ip, hammer2_inode_data_t *ipdata, - hammer2_chain_t **parentp, + hammer2_cluster_t *cparent, hammer2_key_t lbase, int ioflag, int pblksize, int *errorp, int comp_algo) { + hammer2_cluster_t *cluster; hammer2_chain_t *chain; int comp_size; int comp_block_size; + int i; char *comp_buffer; if (test_block_zeros(bp->b_data, pblksize)) { - zero_write(bp, trans, ip, ipdata, parentp, lbase, errorp); + zero_write(bp, trans, ip, ipdata, cparent, lbase, errorp); return; } @@ -1140,21 +1132,24 @@ hammer2_compress_and_write(struct buf *bp, hammer2_trans_t *trans, } } - chain = hammer2_assign_physical(trans, ip, parentp, - lbase, comp_block_size, - errorp); - ipdata = &ip->chain->data->ipdata; /* RELOAD */ + cluster = hammer2_assign_physical(trans, ip, cparent, + lbase, comp_block_size, + errorp); + ipdata = &hammer2_cluster_data(&ip->cluster)->ipdata; if (*errorp) { kprintf("WRITE PATH: An error occurred while " "assigning physical space.\n"); - KKASSERT(chain == NULL); - } else { - /* Get device offset */ + KKASSERT(cluster == NULL); + goto done; + } + + for (i = 0; i < cluster->nchains; ++i) { hammer2_io_t *dio; char *bdata; int temp_check; + chain = cluster->array[i]; KKASSERT(chain->flags & HAMMER2_CHAIN_MODIFIED); switch(chain->bref.type) { @@ -1237,6 +1232,7 @@ hammer2_compress_and_write(struct buf *bp, hammer2_trans_t *trans, hammer2_chain_unlock(chain); } +done: if (comp_buffer) objcache_put(cache_buffer_write, comp_buffer); } @@ -1249,19 +1245,19 @@ static void hammer2_zero_check_and_write(struct buf *bp, hammer2_trans_t *trans, hammer2_inode_t *ip, hammer2_inode_data_t *ipdata, - hammer2_chain_t **parentp, + hammer2_cluster_t *cparent, hammer2_key_t lbase, int ioflag, int pblksize, int *errorp) { - hammer2_chain_t *chain; + hammer2_cluster_t *cluster; if (test_block_zeros(bp->b_data, pblksize)) { - zero_write(bp, trans, ip, ipdata, parentp, lbase, errorp); + zero_write(bp, trans, ip, ipdata, cparent, lbase, errorp); } else { - chain = hammer2_assign_physical(trans, ip, parentp, - lbase, pblksize, errorp); - hammer2_write_bp(chain, bp, ioflag, pblksize, errorp); - if (chain) - hammer2_chain_unlock(chain); + cluster = hammer2_assign_physical(trans, ip, cparent, + lbase, pblksize, errorp); + hammer2_write_bp(cluster, bp, ioflag, pblksize, errorp); + if (cluster) + hammer2_cluster_unlock(cluster); } } @@ -1288,28 +1284,28 @@ test_block_zeros(const char *buf, size_t bytes) static void zero_write(struct buf *bp, hammer2_trans_t *trans, hammer2_inode_t *ip, - hammer2_inode_data_t *ipdata, hammer2_chain_t **parentp, + hammer2_inode_data_t *ipdata, hammer2_cluster_t *cparent, hammer2_key_t lbase, int *errorp __unused) { - hammer2_chain_t *parent; - hammer2_chain_t *chain; + hammer2_cluster_t *cluster; + hammer2_media_data_t *data; hammer2_key_t key_dummy; - int cache_index = -1; + int ddflag; - parent = hammer2_chain_lookup_init(*parentp, 0); + cparent = hammer2_cluster_lookup_init(cparent, 0); + cluster = hammer2_cluster_lookup(cparent, &key_dummy, lbase, lbase, + HAMMER2_LOOKUP_NODATA, &ddflag); + if (cluster) { + data = hammer2_cluster_data(cluster); - chain = hammer2_chain_lookup(&parent, &key_dummy, lbase, lbase, - &cache_index, HAMMER2_LOOKUP_NODATA); - if (chain) { - if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) { - bzero(chain->data->ipdata.u.data, - HAMMER2_EMBEDDED_BYTES); + if (ddflag) { + bzero(data->ipdata.u.data, HAMMER2_EMBEDDED_BYTES); } else { - hammer2_chain_delete(trans, chain, 0); + hammer2_cluster_delete(trans, cluster, 0); } - hammer2_chain_unlock(chain); + hammer2_cluster_unlock(cluster); } - hammer2_chain_lookup_done(parent); + hammer2_cluster_lookup_done(cparent); } /* @@ -1319,66 +1315,81 @@ zero_write(struct buf *bp, hammer2_trans_t *trans, hammer2_inode_t *ip, */ static void -hammer2_write_bp(hammer2_chain_t *chain, struct buf *bp, int ioflag, +hammer2_write_bp(hammer2_cluster_t *cluster, struct buf *bp, int ioflag, int pblksize, int *errorp) { + hammer2_chain_t *chain; hammer2_io_t *dio; char *bdata; int error; - int temp_check = HAMMER2_DEC_CHECK(chain->bref.methods); + int i; + int temp_check; - KKASSERT(chain->flags & HAMMER2_CHAIN_MODIFIED); + error = 0; /* XXX TODO below */ - switch(chain->bref.type) { - case HAMMER2_BREF_TYPE_INODE: - KKASSERT(chain->data->ipdata.op_flags & - HAMMER2_OPFLAG_DIRECTDATA); - KKASSERT(bp->b_loffset == 0); - bcopy(bp->b_data, chain->data->ipdata.u.data, - HAMMER2_EMBEDDED_BYTES); - error = 0; - break; - case HAMMER2_BREF_TYPE_DATA: - error = hammer2_io_newnz(chain->hmp, chain->bref.data_off, - chain->bytes, &dio); - if (error) { - hammer2_io_bqrelse(&dio); - kprintf("hammer2: WRITE PATH: dbp bread error\n"); + for (i = 0; i < cluster->nchains; ++i) { + chain = cluster->array[i]; + + temp_check = HAMMER2_DEC_CHECK(chain->bref.methods); + + KKASSERT(chain->flags & HAMMER2_CHAIN_MODIFIED); + + switch(chain->bref.type) { + case HAMMER2_BREF_TYPE_INODE: + KKASSERT(chain->data->ipdata.op_flags & + HAMMER2_OPFLAG_DIRECTDATA); + KKASSERT(bp->b_loffset == 0); + bcopy(bp->b_data, chain->data->ipdata.u.data, + HAMMER2_EMBEDDED_BYTES); + error = 0; break; - } - bdata = hammer2_io_data(dio, chain->bref.data_off); + case HAMMER2_BREF_TYPE_DATA: + error = hammer2_io_newnz(chain->hmp, + chain->bref.data_off, + chain->bytes, &dio); + if (error) { + hammer2_io_bqrelse(&dio); + kprintf("hammer2: WRITE PATH: " + "dbp bread error\n"); + break; + } + bdata = hammer2_io_data(dio, chain->bref.data_off); - chain->bref.methods = HAMMER2_ENC_COMP(HAMMER2_COMP_NONE) + - HAMMER2_ENC_CHECK(temp_check); - bcopy(bp->b_data, bdata, chain->bytes); - - /* - * Device buffer is now valid, chain is no - * longer in the initial state. - */ - atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL); + chain->bref.methods = HAMMER2_ENC_COMP( + HAMMER2_COMP_NONE) + + HAMMER2_ENC_CHECK(temp_check); + bcopy(bp->b_data, bdata, chain->bytes); - if (ioflag & IO_SYNC) { /* - * Synchronous I/O requested. + * Device buffer is now valid, chain is no + * longer in the initial state. */ - hammer2_io_bwrite(&dio); - /* - } else if ((ioflag & IO_DIRECT) && loff + n == pblksize) { - hammer2_io_bdwrite(&dio); - */ - } else if (ioflag & IO_ASYNC) { - hammer2_io_bawrite(&dio); - } else { - hammer2_io_bdwrite(&dio); + atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL); + + if (ioflag & IO_SYNC) { + /* + * Synchronous I/O requested. + */ + hammer2_io_bwrite(&dio); + /* + } else if ((ioflag & IO_DIRECT) && + loff + n == pblksize) { + hammer2_io_bdwrite(&dio); + */ + } else if (ioflag & IO_ASYNC) { + hammer2_io_bawrite(&dio); + } else { + hammer2_io_bdwrite(&dio); + } + break; + default: + panic("hammer2_write_bp: bad chain type %d\n", + chain->bref.type); + /* NOT REACHED */ + error = 0; + break; } - break; - default: - panic("hammer2_write_bp: bad chain type %d\n", - chain->bref.type); - /* NOT REACHED */ - error = 0; - break; + KKASSERT(error == 0); /* XXX TODO */ } *errorp = error; } @@ -1472,11 +1483,11 @@ hammer2_vfs_unmount(struct mount *mp, int mntflags) } for (i = 0; i < pmp->cluster.nchains; ++i) { - hmp = pmp->cluster.chains[i]->hmp; + hmp = pmp->cluster.array[i]->hmp; hammer2_vfs_unmount_hmp1(mp, hmp); - rchain = pmp->cluster.chains[i]; + rchain = pmp->cluster.array[i]; if (rchain) { atomic_clear_int(&rchain->flags, HAMMER2_CHAIN_MOUNTED); #if REPORT_REFS_ERRORS @@ -1487,7 +1498,7 @@ hammer2_vfs_unmount(struct mount *mp, int mntflags) KKASSERT(rchain->refs == 1); #endif hammer2_chain_drop(rchain); - pmp->cluster.chains[i] = NULL; + pmp->cluster.array[i] = NULL; } hammer2_vfs_unmount_hmp2(mp, hmp); @@ -1642,7 +1653,7 @@ int hammer2_vfs_root(struct mount *mp, struct vnode **vpp) { hammer2_pfsmount_t *pmp; - hammer2_chain_t *parent; + hammer2_cluster_t *cparent; int error; struct vnode *vp; @@ -1651,9 +1662,9 @@ hammer2_vfs_root(struct mount *mp, struct vnode **vpp) *vpp = NULL; error = EINVAL; } else { - parent = hammer2_inode_lock_sh(pmp->iroot); + cparent = hammer2_inode_lock_sh(pmp->iroot); vp = hammer2_igetv(pmp->iroot, &error); - hammer2_inode_unlock_sh(pmp->iroot, parent); + hammer2_inode_unlock_sh(pmp->iroot, cparent); *vpp = vp; if (vp == NULL) kprintf("vnodefail\n"); @@ -1676,7 +1687,7 @@ hammer2_vfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred) pmp = MPTOPMP(mp); KKASSERT(pmp->cluster.nchains >= 1); - hmp = pmp->cluster.chains[0]->hmp; /* XXX */ + hmp = pmp->cluster.focus->hmp; /* XXX */ mp->mnt_stat.f_files = pmp->inode_count; mp->mnt_stat.f_ffree = 0; @@ -1697,7 +1708,7 @@ hammer2_vfs_statvfs(struct mount *mp, struct statvfs *sbp, struct ucred *cred) pmp = MPTOPMP(mp); KKASSERT(pmp->cluster.nchains >= 1); - hmp = pmp->cluster.chains[0]->hmp; /* XXX */ + hmp = pmp->cluster.focus->hmp; /* XXX */ mp->mnt_vstat.f_bsize = HAMMER2_PBUFSIZE; mp->mnt_vstat.f_files = pmp->inode_count; @@ -1953,7 +1964,7 @@ hammer2_vfs_sync(struct mount *mp, int waitfor) total_error = 0; for (i = 0; i < pmp->cluster.nchains; ++i) { - hmp = pmp->cluster.chains[i]->hmp; + hmp = pmp->cluster.array[i]->hmp; /* * Media mounts have two 'roots', vchain for the topology @@ -2252,11 +2263,11 @@ void hammer2_cluster_reconnect(hammer2_pfsmount_t *pmp, struct file *fp) { hammer2_inode_data_t *ipdata; - hammer2_chain_t *parent; + hammer2_cluster_t *cparent; hammer2_mount_t *hmp; size_t name_len; - hmp = pmp->cluster.chains[0]->hmp; /* XXX */ + hmp = pmp->cluster.focus->hmp; /* XXX */ /* * Closes old comm descriptor, kills threads, cleans up @@ -2268,8 +2279,8 @@ hammer2_cluster_reconnect(hammer2_pfsmount_t *pmp, struct file *fp) /* * Setup LNK_CONN fields for autoinitiated state machine */ - parent = hammer2_inode_lock_ex(pmp->iroot); - ipdata = &parent->data->ipdata; + cparent = hammer2_inode_lock_ex(pmp->iroot); + ipdata = &hammer2_cluster_data(cparent)->ipdata; pmp->iocom.auto_lnk_conn.pfs_clid = ipdata->pfs_clid; pmp->iocom.auto_lnk_conn.pfs_fsid = ipdata->pfs_fsid; pmp->iocom.auto_lnk_conn.pfs_type = ipdata->pfs_type; @@ -2315,7 +2326,7 @@ hammer2_cluster_reconnect(hammer2_pfsmount_t *pmp, struct file *fp) pmp->iocom.auto_lnk_span.fs_label, name_len); pmp->iocom.auto_lnk_span.fs_label[name_len] = 0; - hammer2_inode_unlock_ex(pmp->iroot, parent); + hammer2_inode_unlock_ex(pmp->iroot, cparent); kdmsg_iocom_autoinitiate(&pmp->iocom, hammer2_autodmsg); } @@ -2369,7 +2380,7 @@ static void hammer2_autodmsg(kdmsg_msg_t *msg) { hammer2_pfsmount_t *pmp = msg->iocom->handle; - hammer2_mount_t *hmp = pmp->cluster.chains[0]->hmp; /* XXX */ + hammer2_mount_t *hmp = pmp->cluster.focus->hmp; /* XXX */ int copyid; /* @@ -2412,7 +2423,7 @@ hammer2_autodmsg(kdmsg_msg_t *msg) void hammer2_volconf_update(hammer2_pfsmount_t *pmp, int index) { - hammer2_mount_t *hmp = pmp->cluster.chains[0]->hmp; /* XXX */ + hammer2_mount_t *hmp = pmp->cluster.focus->hmp; /* XXX */ kdmsg_msg_t *msg; /* XXX interlock against connection state termination */ @@ -2476,6 +2487,100 @@ hammer2_lwinprog_wait(hammer2_pfsmount_t *pmp) } } +/* + * Manage excessive memory resource use for chain and related + * structures. + */ +void +hammer2_pfs_memory_wait(hammer2_pfsmount_t *pmp) +{ + long waiting; + long count; + long limit; +#if 0 + static int zzticks; +#endif + + /* + * Atomic check condition and wait. Also do an early speedup of + * the syncer to try to avoid hitting the wait. + */ + for (;;) { + waiting = pmp->inmem_dirty_chains; + cpu_ccfence(); + count = waiting & HAMMER2_DIRTYCHAIN_MASK; + + limit = pmp->mp->mnt_nvnodelistsize / 10; + if (limit < hammer2_limit_dirty_chains) + limit = hammer2_limit_dirty_chains; + if (limit < 1000) + limit = 1000; + +#if 0 + if ((int)(ticks - zzticks) > hz) { + zzticks = ticks; + kprintf("count %ld %ld\n", count, limit); + } +#endif + + /* + * Block if there are too many dirty chains present, wait + * for the flush to clean some out. + */ + if (count > limit) { + tsleep_interlock(&pmp->inmem_dirty_chains, 0); + if (atomic_cmpset_long(&pmp->inmem_dirty_chains, + waiting, + waiting | HAMMER2_DIRTYCHAIN_WAITING)) { + speedup_syncer(pmp->mp); + tsleep(&pmp->inmem_dirty_chains, PINTERLOCKED, + "chnmem", hz); + } + continue; /* loop on success or fail */ + } + + /* + * Try to start an early flush before we are forced to block. + */ + if (count > limit * 7 / 10) + speedup_syncer(pmp->mp); + break; + } +} + +void +hammer2_pfs_memory_inc(hammer2_pfsmount_t *pmp) +{ + if (pmp) + atomic_add_long(&pmp->inmem_dirty_chains, 1); +} + +void +hammer2_pfs_memory_wakeup(hammer2_pfsmount_t *pmp) +{ + long waiting; + + if (pmp == NULL) + return; + + for (;;) { + waiting = pmp->inmem_dirty_chains; + cpu_ccfence(); + if (atomic_cmpset_long(&pmp->inmem_dirty_chains, + waiting, + (waiting - 1) & + ~HAMMER2_DIRTYCHAIN_WAITING)) { + break; + } + } + + if (waiting & HAMMER2_DIRTYCHAIN_WAITING) + wakeup(&pmp->inmem_dirty_chains); +} + +/* + * Debugging + */ void hammer2_dump_chain(hammer2_chain_t *chain, int tab, int *countp, char pfx) { diff --git a/sys/vfs/hammer2/hammer2_vnops.c b/sys/vfs/hammer2/hammer2_vnops.c index 361bc2a9ab..68d97d3564 100644 --- a/sys/vfs/hammer2/hammer2_vnops.c +++ b/sys/vfs/hammer2/hammer2_vnops.c @@ -70,12 +70,6 @@ static int hammer2_write_file(hammer2_inode_t *ip, struct uio *uio, int ioflag, int seqcount); static void hammer2_extend_file(hammer2_inode_t *ip, hammer2_key_t nsize); static void hammer2_truncate_file(hammer2_inode_t *ip, hammer2_key_t nsize); -static void hammer2_decompress_LZ4_callback(hammer2_io_t *dio, - hammer2_chain_t *arg_c, - void *arg_p, off_t arg_o); -static void hammer2_decompress_ZLIB_callback(hammer2_io_t *dio, - hammer2_chain_t *arg_c, - void *arg_p, off_t arg_o); struct objcache *cache_buffer_read; struct objcache *cache_buffer_write; @@ -85,72 +79,44 @@ struct objcache *cache_buffer_write; */ static void -hammer2_decompress_LZ4_callback(hammer2_io_t *dio, hammer2_chain_t *arg_c, - void *arg_p, off_t arg_o) +hammer2_decompress_LZ4_callback(const char *data, u_int bytes, struct bio *bio) { - struct buf *obp; - struct bio *obio = arg_p; - char *bdata; - int bytes = 1 << (int)(arg_o & HAMMER2_OFF_MASK_RADIX); + struct buf *bp; + char *compressed_buffer; + int compressed_size; + int result; - /* - * If BIO_DONE is already set the device buffer was already - * fully valid (B_CACHE). If it is not set then I/O was issued - * and we have to run I/O completion as the last bio. - * - * Nobody is waiting for our device I/O to complete, we are - * responsible for bqrelse()ing it which means we also have to do - * the equivalent of biowait() and clear BIO_DONE (which breadcb() - * may have set). - * - * Any preexisting device buffer should match the requested size, - * but due to bigblock recycling and other factors there is some - * fragility there, so we assert that the device buffer covers - * the request. - */ - obp = obio->bio_buf; + bp = bio->bio_buf; - if (dio->bp->b_flags & B_ERROR) { - obp->b_flags |= B_ERROR; - obp->b_error = dio->bp->b_error; #if 0 - } else if (obio->bio_caller_info2.index && - obio->bio_caller_info1.uvalue32 != - crc32(bp->b_data, bp->b_bufsize)) { - obp->b_flags |= B_ERROR; - obp->b_error = EIO; + if bio->bio_caller_info2.index && + bio->bio_caller_info1.uvalue32 != + crc32(bp->b_data, bp->b_bufsize) --- return error #endif - } else { - char *compressed_buffer; - int *compressed_size; - int result; - - KKASSERT(obp->b_bufsize <= HAMMER2_PBUFSIZE); - bdata = hammer2_io_data(dio, arg_o); - compressed_size = (int *)bdata; - compressed_buffer = objcache_get(cache_buffer_read, M_INTWAIT); - KKASSERT((unsigned int)*compressed_size <= HAMMER2_PBUFSIZE); - result = LZ4_decompress_safe(&bdata[sizeof(int)], - compressed_buffer, - *compressed_size, - obp->b_bufsize); - if (result < 0) { - kprintf("READ PATH: Error during decompression." - "bio %016jx/%d log %016jx/%d\n", - (intmax_t)dio->pbase, dio->psize, - (intmax_t)arg_o, bytes); - /* make sure it isn't random garbage */ - bzero(compressed_buffer, obp->b_bufsize); - } - KKASSERT(result <= obp->b_bufsize); - bcopy(compressed_buffer, obp->b_data, obp->b_bufsize); - if (result < obp->b_bufsize) - bzero(obp->b_data + result, obp->b_bufsize - result); - objcache_put(cache_buffer_read, compressed_buffer); - obp->b_resid = 0; - obp->b_flags |= B_AGE; + + KKASSERT(bp->b_bufsize <= HAMMER2_PBUFSIZE); + compressed_size = *(const int *)data; + KKASSERT(compressed_size <= bytes - sizeof(int)); + + compressed_buffer = objcache_get(cache_buffer_read, M_INTWAIT); + result = LZ4_decompress_safe(__DECONST(char *, &data[sizeof(int)]), + compressed_buffer, + compressed_size, + bp->b_bufsize); + if (result < 0) { + kprintf("READ PATH: Error during decompression." + "bio %016jx/%d\n", + (intmax_t)bio->bio_offset, bytes); + /* make sure it isn't random garbage */ + bzero(compressed_buffer, bp->b_bufsize); } - biodone(obio); + KKASSERT(result <= bp->b_bufsize); + bcopy(compressed_buffer, bp->b_data, bp->b_bufsize); + if (result < bp->b_bufsize) + bzero(bp->b_data + result, bp->b_bufsize - result); + objcache_put(cache_buffer_read, compressed_buffer); + bp->b_resid = 0; + bp->b_flags |= B_AGE; } /* @@ -160,80 +126,47 @@ hammer2_decompress_LZ4_callback(hammer2_io_t *dio, hammer2_chain_t *arg_c, */ static void -hammer2_decompress_ZLIB_callback(hammer2_io_t *dio, hammer2_chain_t *arg_c, - void *arg_p, off_t arg_o) +hammer2_decompress_ZLIB_callback(const char *data, u_int bytes, struct bio *bio) { - struct buf *obp; - struct bio *obio = arg_p; - char *bdata; - int bytes = 1 << (int)(arg_o & HAMMER2_OFF_MASK_RADIX); + struct buf *bp; + char *compressed_buffer; + z_stream strm_decompress; + int result; + int ret; - /* - * If BIO_DONE is already set the device buffer was already - * fully valid (B_CACHE). If it is not set then I/O was issued - * and we have to run I/O completion as the last bio. - * - * Nobody is waiting for our device I/O to complete, we are - * responsible for bqrelse()ing it which means we also have to do - * the equivalent of biowait() and clear BIO_DONE (which breadcb() - * may have set). - * - * Any preexisting device buffer should match the requested size, - * but due to bigblock recycling and other factors there is some - * fragility there, so we assert that the device buffer covers - * the request. - */ - obp = obio->bio_buf; + bp = bio->bio_buf; - if (dio->bp->b_flags & B_ERROR) { - obp->b_flags |= B_ERROR; - obp->b_error = dio->bp->b_error; -#if 0 - } else if (obio->bio_caller_info2.index && - obio->bio_caller_info1.uvalue32 != - crc32(bp->b_data, bp->b_bufsize)) { - obp->b_flags |= B_ERROR; - obp->b_error = EIO; -#endif - } else { - char *compressed_buffer; - z_stream strm_decompress; - int result; - int ret; - - KKASSERT(obp->b_bufsize <= HAMMER2_PBUFSIZE); - strm_decompress.avail_in = 0; - strm_decompress.next_in = Z_NULL; - - ret = inflateInit(&strm_decompress); - - if (ret != Z_OK) - kprintf("HAMMER2 ZLIB: Fatal error in inflateInit.\n"); - - bdata = hammer2_io_data(dio, arg_o); - compressed_buffer = objcache_get(cache_buffer_read, M_INTWAIT); - strm_decompress.next_in = bdata; - - /* XXX supply proper size, subset of device bp */ - strm_decompress.avail_in = bytes; - strm_decompress.next_out = compressed_buffer; - strm_decompress.avail_out = obp->b_bufsize; - - ret = inflate(&strm_decompress, Z_FINISH); - if (ret != Z_STREAM_END) { - kprintf("HAMMER2 ZLIB: Fatar error during decompression.\n"); - bzero(compressed_buffer, obp->b_bufsize); - } - bcopy(compressed_buffer, obp->b_data, obp->b_bufsize); - result = obp->b_bufsize - strm_decompress.avail_out; - if (result < obp->b_bufsize) - bzero(obp->b_data + result, strm_decompress.avail_out); - objcache_put(cache_buffer_read, compressed_buffer); - obp->b_resid = 0; - obp->b_flags |= B_AGE; - ret = inflateEnd(&strm_decompress); + KKASSERT(bp->b_bufsize <= HAMMER2_PBUFSIZE); + strm_decompress.avail_in = 0; + strm_decompress.next_in = Z_NULL; + + ret = inflateInit(&strm_decompress); + + if (ret != Z_OK) + kprintf("HAMMER2 ZLIB: Fatal error in inflateInit.\n"); + + compressed_buffer = objcache_get(cache_buffer_read, M_INTWAIT); + strm_decompress.next_in = __DECONST(char *, data); + + /* XXX supply proper size, subset of device bp */ + strm_decompress.avail_in = bytes; + strm_decompress.next_out = compressed_buffer; + strm_decompress.avail_out = bp->b_bufsize; + + ret = inflate(&strm_decompress, Z_FINISH); + if (ret != Z_STREAM_END) { + kprintf("HAMMER2 ZLIB: Fatar error during decompression.\n"); + bzero(compressed_buffer, bp->b_bufsize); } - biodone(obio); + bcopy(compressed_buffer, bp->b_data, bp->b_bufsize); + result = bp->b_bufsize - strm_decompress.avail_out; + if (result < bp->b_bufsize) + bzero(bp->b_data + result, strm_decompress.avail_out); + objcache_put(cache_buffer_read, compressed_buffer); + ret = inflateEnd(&strm_decompress); + + bp->b_resid = 0; + bp->b_flags |= B_AGE; } static __inline @@ -252,7 +185,7 @@ int hammer2_vop_inactive(struct vop_inactive_args *ap) { hammer2_inode_t *ip; - hammer2_chain_t *parent; + hammer2_cluster_t *cparent; struct vnode *vp; vp = ap->a_vp; @@ -271,17 +204,17 @@ hammer2_vop_inactive(struct vop_inactive_args *ap) * the strategy code. Simply mark the inode modified so it gets * picked up by our normal flush. */ - parent = hammer2_inode_lock_ex(ip); - KKASSERT(parent); + cparent = hammer2_inode_lock_ex(ip); + KKASSERT(cparent); /* * Check for deleted inodes and recycle immediately. */ - if (parent->flags & HAMMER2_CHAIN_UNLINKED) { - hammer2_inode_unlock_ex(ip, parent); + if (hammer2_cluster_unlinked(cparent) & HAMMER2_CHAIN_UNLINKED) { + hammer2_inode_unlock_ex(ip, cparent); vrecycle(vp); } else { - hammer2_inode_unlock_ex(ip, parent); + hammer2_inode_unlock_ex(ip, cparent); } return (0); } @@ -294,7 +227,7 @@ static int hammer2_vop_reclaim(struct vop_reclaim_args *ap) { - hammer2_chain_t *chain; + hammer2_cluster_t *cluster; hammer2_inode_t *ip; hammer2_pfsmount_t *pmp; struct vnode *vp; @@ -308,7 +241,7 @@ hammer2_vop_reclaim(struct vop_reclaim_args *ap) * Inode must be locked for reclaim. */ pmp = ip->pmp; - chain = hammer2_inode_lock_ex(ip); + cluster = hammer2_inode_lock_ex(ip); /* * The final close of a deleted file or directory marks it for @@ -334,7 +267,7 @@ hammer2_vop_reclaim(struct vop_reclaim_args *ap) * the ip is left with a reference and placed on a linked list and * handled later on. */ - if (chain->flags & HAMMER2_CHAIN_UNLINKED) { + if (hammer2_cluster_unlinked(cluster)) { hammer2_inode_unlink_t *ipul; ipul = kmalloc(sizeof(*ipul), pmp->minode, M_WAITOK | M_ZERO); @@ -343,14 +276,14 @@ hammer2_vop_reclaim(struct vop_reclaim_args *ap) spin_lock(&pmp->unlinkq_spin); TAILQ_INSERT_TAIL(&pmp->unlinkq, ipul, entry); spin_unlock(&pmp->unlinkq_spin); - hammer2_inode_unlock_ex(ip, chain); /* unlock */ + hammer2_inode_unlock_ex(ip, cluster); /* unlock */ /* retain ref from vp for ipul */ } else { - hammer2_inode_unlock_ex(ip, chain); /* unlock */ + hammer2_inode_unlock_ex(ip, cluster); /* unlock */ hammer2_inode_drop(ip); /* vp ref */ } - /* chain no longer referenced */ - /* chain = NULL; not needed */ + /* cluster no longer referenced */ + /* cluster = NULL; not needed */ /* * XXX handle background sync when ip dirty, kernel will no longer @@ -367,7 +300,7 @@ hammer2_vop_fsync(struct vop_fsync_args *ap) { hammer2_inode_t *ip; hammer2_trans_t trans; - hammer2_chain_t *chain; + hammer2_cluster_t *cluster; struct vnode *vp; vp = ap->a_vp; @@ -389,21 +322,21 @@ hammer2_vop_fsync(struct vop_fsync_args *ap) * which call this function will eventually call chain_flush * on the volume root as a catch-all, which is far more optimal. */ - chain = hammer2_inode_lock_ex(ip); + cluster = hammer2_inode_lock_ex(ip); atomic_clear_int(&ip->flags, HAMMER2_INODE_MODIFIED); vclrisdirty(vp); if (ip->flags & (HAMMER2_INODE_RESIZED|HAMMER2_INODE_MTIME)) - hammer2_inode_fsync(&trans, ip, &chain); + hammer2_inode_fsync(&trans, ip, cluster); #if 0 /* * XXX creates discontinuity w/modify_tid */ if (ap->a_flags & VOP_FSYNC_SYSCALL) { - hammer2_flush(&trans, &chain); + hammer2_flush(&trans, cluster); } #endif - hammer2_inode_unlock_ex(ip, chain); + hammer2_inode_unlock_ex(ip, cluster); hammer2_trans_done(&trans); return (0); @@ -415,17 +348,17 @@ hammer2_vop_access(struct vop_access_args *ap) { hammer2_inode_t *ip = VTOI(ap->a_vp); hammer2_inode_data_t *ipdata; - hammer2_chain_t *chain; + hammer2_cluster_t *cluster; uid_t uid; gid_t gid; int error; - chain = hammer2_inode_lock_sh(ip); - ipdata = &chain->data->ipdata; + cluster = hammer2_inode_lock_sh(ip); + ipdata = &hammer2_cluster_data(cluster)->ipdata; uid = hammer2_to_unix_xid(&ipdata->uid); gid = hammer2_to_unix_xid(&ipdata->gid); error = vop_helper_access(ap, uid, gid, ipdata->mode, ipdata->uflags); - hammer2_inode_unlock_sh(ip, chain); + hammer2_inode_unlock_sh(ip, cluster); return (error); } @@ -435,7 +368,7 @@ int hammer2_vop_getattr(struct vop_getattr_args *ap) { hammer2_inode_data_t *ipdata; - hammer2_chain_t *chain; + hammer2_cluster_t *cluster; hammer2_pfsmount_t *pmp; hammer2_inode_t *ip; struct vnode *vp; @@ -447,8 +380,9 @@ hammer2_vop_getattr(struct vop_getattr_args *ap) ip = VTOI(vp); pmp = ip->pmp; - chain = hammer2_inode_lock_sh(ip); - ipdata = &chain->data->ipdata; + cluster = hammer2_inode_lock_sh(ip); + ipdata = &hammer2_cluster_data(cluster)->ipdata; + KKASSERT(hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE); vap->va_fsid = pmp->mp->mnt_stat.f_fsid.val[0]; vap->va_fileid = ipdata->inum; @@ -466,14 +400,14 @@ hammer2_vop_getattr(struct vop_getattr_args *ap) hammer2_time_to_timespec(ipdata->mtime, &vap->va_atime); vap->va_gen = 1; vap->va_bytes = vap->va_size; /* XXX */ - vap->va_type = hammer2_get_vtype(chain); + vap->va_type = hammer2_get_vtype(ipdata); vap->va_filerev = 0; vap->va_uid_uuid = ipdata->uid; vap->va_gid_uuid = ipdata->gid; vap->va_vaflags = VA_UID_UUID_VALID | VA_GID_UUID_VALID | VA_FSID_UUID_VALID; - hammer2_inode_unlock_sh(ip, chain); + hammer2_inode_unlock_sh(ip, cluster); return (0); } @@ -484,7 +418,7 @@ hammer2_vop_setattr(struct vop_setattr_args *ap) { hammer2_inode_data_t *ipdata; hammer2_inode_t *ip; - hammer2_chain_t *chain; + hammer2_cluster_t *cluster; hammer2_trans_t trans; struct vnode *vp; struct vattr *vap; @@ -502,10 +436,10 @@ hammer2_vop_setattr(struct vop_setattr_args *ap) if (ip->pmp->ronly) return(EROFS); - hammer2_chain_memory_wait(ip->pmp); + hammer2_pfs_memory_wait(ip->pmp); hammer2_trans_init(&trans, ip->pmp, NULL, 0); - chain = hammer2_inode_lock_ex(ip); - ipdata = &chain->data->ipdata; + cluster = hammer2_inode_lock_ex(ip); + ipdata = &hammer2_cluster_data(cluster)->ipdata; error = 0; if (vap->va_flags != VNOVAL) { @@ -517,8 +451,8 @@ hammer2_vop_setattr(struct vop_setattr_args *ap) ap->a_cred); if (error == 0) { if (ipdata->uflags != flags) { - ipdata = hammer2_chain_modify_ip(&trans, ip, - &chain, 0); + ipdata = hammer2_cluster_modify_ip(&trans, ip, + cluster, 0); ipdata->uflags = flags; ipdata->ctime = ctime; kflags |= NOTE_ATTRIB; @@ -551,8 +485,8 @@ hammer2_vop_setattr(struct vop_setattr_args *ap) bcmp(&uuid_gid, &ipdata->gid, sizeof(uuid_gid)) || ipdata->mode != cur_mode ) { - ipdata = hammer2_chain_modify_ip(&trans, ip, - &chain, 0); + ipdata = hammer2_cluster_modify_ip(&trans, ip, + cluster, 0); ipdata->uid = uuid_uid; ipdata->gid = uuid_gid; ipdata->mode = cur_mode; @@ -570,14 +504,15 @@ hammer2_vop_setattr(struct vop_setattr_args *ap) case VREG: if (vap->va_size == ip->size) break; - hammer2_inode_unlock_ex(ip, chain); + hammer2_inode_unlock_ex(ip, cluster); if (vap->va_size < ip->size) { hammer2_truncate_file(ip, vap->va_size); } else { hammer2_extend_file(ip, vap->va_size); } - chain = hammer2_inode_lock_ex(ip); - ipdata = &chain->data->ipdata; /* RELOAD */ + cluster = hammer2_inode_lock_ex(ip); + /* RELOAD */ + ipdata = &hammer2_cluster_data(cluster)->ipdata; domtime = 1; break; default: @@ -588,13 +523,13 @@ hammer2_vop_setattr(struct vop_setattr_args *ap) #if 0 /* atime not supported */ if (vap->va_atime.tv_sec != VNOVAL) { - ipdata = hammer2_chain_modify_ip(&trans, ip, &chain, 0); + ipdata = hammer2_cluster_modify_ip(&trans, ip, cluster, 0); ipdata->atime = hammer2_timespec_to_time(&vap->va_atime); kflags |= NOTE_ATTRIB; } #endif if (vap->va_mtime.tv_sec != VNOVAL) { - ipdata = hammer2_chain_modify_ip(&trans, ip, &chain, 0); + ipdata = hammer2_cluster_modify_ip(&trans, ip, cluster, 0); ipdata->mtime = hammer2_timespec_to_time(&vap->va_mtime); kflags |= NOTE_ATTRIB; domtime = 0; @@ -607,7 +542,8 @@ hammer2_vop_setattr(struct vop_setattr_args *ap) error = vop_helper_chmod(ap->a_vp, vap->va_mode, ap->a_cred, cur_uid, cur_gid, &cur_mode); if (error == 0 && ipdata->mode != cur_mode) { - ipdata = hammer2_chain_modify_ip(&trans, ip, &chain, 0); + ipdata = hammer2_cluster_modify_ip(&trans, ip, + cluster, 0); ipdata->mode = cur_mode; ipdata->ctime = ctime; kflags |= NOTE_ATTRIB; @@ -619,7 +555,7 @@ hammer2_vop_setattr(struct vop_setattr_args *ap) * to trim the related data chains, otherwise a later expansion can * cause havoc. */ - hammer2_inode_fsync(&trans, ip, &chain); + hammer2_inode_fsync(&trans, ip, cluster); /* * Cleanup. If domtime is set an additional inode modification @@ -632,7 +568,7 @@ done: HAMMER2_INODE_MTIME); vsetisdirty(ip->vp); } - hammer2_inode_unlock_ex(ip, chain); + hammer2_inode_unlock_ex(ip, cluster); hammer2_trans_done(&trans); hammer2_knote(ip->vp, kflags); @@ -646,9 +582,10 @@ hammer2_vop_readdir(struct vop_readdir_args *ap) hammer2_inode_data_t *ipdata; hammer2_inode_t *ip; hammer2_inode_t *xip; - hammer2_chain_t *parent; - hammer2_chain_t *chain; - hammer2_chain_t *xchain; + hammer2_cluster_t *cparent; + hammer2_cluster_t *cluster; + hammer2_cluster_t *xcluster; + hammer2_blockref_t bref; hammer2_tid_t inum; hammer2_key_t key_next; hammer2_key_t lkey; @@ -656,10 +593,10 @@ hammer2_vop_readdir(struct vop_readdir_args *ap) off_t *cookies; off_t saveoff; int cookie_index; - int cache_index = -1; int ncookies; int error; int dtype; + int ddflag; int r; ip = VTOI(ap->a_vp); @@ -680,8 +617,8 @@ hammer2_vop_readdir(struct vop_readdir_args *ap) } cookie_index = 0; - parent = hammer2_inode_lock_sh(ip); - ipdata = &parent->data->ipdata; + cparent = hammer2_inode_lock_sh(ip); + ipdata = &hammer2_cluster_data(cparent)->ipdata; /* * Handle artificial entries. To ensure that only positive 64 bit @@ -693,7 +630,7 @@ hammer2_vop_readdir(struct vop_readdir_args *ap) * allow '..' to cross the mount point into (e.g.) the super-root. */ error = 0; - chain = (void *)(intptr_t)-1; /* non-NULL for early goto done case */ + cluster = (void *)(intptr_t)-1; /* non-NULL for early goto done case */ if (saveoff == 0) { inum = ipdata->inum & HAMMER2_DIRHASH_USERMSK; @@ -718,17 +655,18 @@ hammer2_vop_readdir(struct vop_readdir_args *ap) while (ip->pip != NULL && ip != ip->pmp->iroot) { xip = ip->pip; hammer2_inode_ref(xip); - hammer2_inode_unlock_sh(ip, parent); - xchain = hammer2_inode_lock_sh(xip); - parent = hammer2_inode_lock_sh(ip); + hammer2_inode_unlock_sh(ip, cparent); + xcluster = hammer2_inode_lock_sh(xip); + cparent = hammer2_inode_lock_sh(ip); hammer2_inode_drop(xip); + ipdata = &hammer2_cluster_data(cparent)->ipdata; if (xip == ip->pip) { - inum = xchain->data->ipdata.inum & - HAMMER2_DIRHASH_USERMSK; - hammer2_inode_unlock_sh(xip, xchain); + inum = hammer2_cluster_data(xcluster)-> + ipdata.inum & HAMMER2_DIRHASH_USERMSK; + hammer2_inode_unlock_sh(xip, xcluster); break; } - hammer2_inode_unlock_sh(xip, xchain); + hammer2_inode_unlock_sh(xip, xcluster); } r = vop_write_dirent(&error, uio, inum, DT_DIR, 2, ".."); if (r) @@ -746,33 +684,37 @@ hammer2_vop_readdir(struct vop_readdir_args *ap) kprintf("readdir: lkey %016jx\n", lkey); /* - * parent is the inode chain, already locked for us. Don't + * parent is the inode cluster, already locked for us. Don't * double lock shared locks as this will screw up upgrades. */ if (error) { goto done; } - chain = hammer2_chain_lookup(&parent, &key_next, lkey, lkey, - &cache_index, HAMMER2_LOOKUP_SHARED); - if (chain == NULL) { - chain = hammer2_chain_lookup(&parent, &key_next, + cluster = hammer2_cluster_lookup(cparent, &key_next, lkey, lkey, + HAMMER2_LOOKUP_SHARED, &ddflag); + if (cluster == NULL) { + cluster = hammer2_cluster_lookup(cparent, &key_next, lkey, (hammer2_key_t)-1, - &cache_index, - HAMMER2_LOOKUP_SHARED); + HAMMER2_LOOKUP_SHARED, &ddflag); } - while (chain) { + if (cluster) + hammer2_cluster_bref(cluster, &bref); + while (cluster) { if (hammer2_debug & 0x0020) kprintf("readdir: p=%p chain=%p %016jx (next %016jx)\n", - parent, chain, chain->bref.key, key_next); - if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) { - dtype = hammer2_get_dtype(chain); - saveoff = chain->bref.key & HAMMER2_DIRHASH_USERMSK; + cparent->focus, cluster->focus, + bref.key, key_next); + + if (bref.type == HAMMER2_BREF_TYPE_INODE) { + ipdata = &hammer2_cluster_data(cluster)->ipdata; + dtype = hammer2_get_dtype(ipdata); + saveoff = bref.key & HAMMER2_DIRHASH_USERMSK; r = vop_write_dirent(&error, uio, - chain->data->ipdata.inum & + ipdata->inum & HAMMER2_DIRHASH_USERMSK, dtype, - chain->data->ipdata.name_len, - chain->data->ipdata.filename); + ipdata->name_len, + ipdata->filename); if (r) break; if (cookies) @@ -780,33 +722,32 @@ hammer2_vop_readdir(struct vop_readdir_args *ap) ++cookie_index; } else { /* XXX chain error */ - kprintf("bad chain type readdir %d\n", - chain->bref.type); + kprintf("bad chain type readdir %d\n", bref.type); } /* * Keys may not be returned in order so once we have a - * placemarker (chain) the scan must allow the full range + * placemarker (cluster) the scan must allow the full range * or some entries will be missed. */ - chain = hammer2_chain_next(&parent, chain, &key_next, - key_next, (hammer2_key_t)-1, - &cache_index, HAMMER2_LOOKUP_SHARED); - if (chain) { - saveoff = (chain->bref.key & - HAMMER2_DIRHASH_USERMSK) + 1; + cluster = hammer2_cluster_next(cparent, cluster, &key_next, + key_next, (hammer2_key_t)-1, + HAMMER2_LOOKUP_SHARED); + if (cluster) { + hammer2_cluster_bref(cluster, &bref); + saveoff = (bref.key & HAMMER2_DIRHASH_USERMSK) + 1; } else { saveoff = (hammer2_key_t)-1; } if (cookie_index == ncookies) break; } - if (chain) - hammer2_chain_unlock(chain); + if (cluster) + hammer2_cluster_unlock(cluster); done: - hammer2_inode_unlock_sh(ip, parent); + hammer2_inode_unlock_sh(ip, cparent); if (ap->a_eofflag) - *ap->a_eofflag = (chain == NULL); + *ap->a_eofflag = (cluster == NULL); if (hammer2_debug & 0x0020) kprintf("readdir: done at %016jx\n", saveoff); uio->uio_offset = saveoff & ~HAMMER2_DIRHASH_VISIBLE; @@ -1228,17 +1169,16 @@ hammer2_vop_nresolve(struct vop_nresolve_args *ap) { hammer2_inode_t *ip; hammer2_inode_t *dip; - hammer2_chain_t *parent; - hammer2_chain_t *chain; - hammer2_chain_t *ochain; - hammer2_trans_t trans; + hammer2_cluster_t *cparent; + hammer2_cluster_t *cluster; + hammer2_inode_data_t *ipdata; hammer2_key_t key_next; hammer2_key_t lhc; struct namecache *ncp; const uint8_t *name; size_t name_len; int error = 0; - int cache_index = -1; + int ddflag; struct vnode *vp; dip = VTOI(ap->a_dvp); @@ -1250,46 +1190,46 @@ hammer2_vop_nresolve(struct vop_nresolve_args *ap) /* * Note: In DragonFly the kernel handles '.' and '..'. */ - parent = hammer2_inode_lock_sh(dip); - chain = hammer2_chain_lookup(&parent, &key_next, - lhc, lhc + HAMMER2_DIRHASH_LOMASK, - &cache_index, HAMMER2_LOOKUP_SHARED); - while (chain) { - if (chain->bref.type == HAMMER2_BREF_TYPE_INODE && - name_len == chain->data->ipdata.name_len && - bcmp(name, chain->data->ipdata.filename, name_len) == 0) { - break; + cparent = hammer2_inode_lock_sh(dip); + cluster = hammer2_cluster_lookup(cparent, &key_next, + lhc, lhc + HAMMER2_DIRHASH_LOMASK, + HAMMER2_LOOKUP_SHARED, &ddflag); + while (cluster) { + if (hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE) { + ipdata = &hammer2_cluster_data(cluster)->ipdata; + if (ipdata->name_len == name_len && + bcmp(ipdata->filename, name, name_len) == 0) { + break; + } } - chain = hammer2_chain_next(&parent, chain, &key_next, - key_next, - lhc + HAMMER2_DIRHASH_LOMASK, - &cache_index, HAMMER2_LOOKUP_SHARED); + cluster = hammer2_cluster_next(cparent, cluster, &key_next, + key_next, + lhc + HAMMER2_DIRHASH_LOMASK, + HAMMER2_LOOKUP_SHARED); } - hammer2_inode_unlock_sh(dip, parent); + hammer2_inode_unlock_sh(dip, cparent); /* - * If the inode represents a forwarding entry for a hardlink we have - * to locate the actual inode. The original ip is saved for possible - * deconsolidation. (ip) will only be set to non-NULL when we have - * to locate the real file via a hardlink. ip will be referenced but - * not locked in that situation. chain is passed in locked and - * returned locked. - * - * XXX what kind of chain lock? + * nresolve needs to resolve hardlinks, the original cluster is not + * sufficient. */ - ochain = NULL; - if (chain && chain->data->ipdata.type == HAMMER2_OBJTYPE_HARDLINK) { - error = hammer2_hardlink_find(dip, &chain, &ochain); - if (error) { - kprintf("hammer2: unable to find hardlink\n"); - if (chain) { - hammer2_chain_unlock(chain); - chain = NULL; - } - goto failed; + if (cluster) { + ip = hammer2_inode_get(dip->pmp, dip, cluster); + ipdata = &hammer2_cluster_data(cluster)->ipdata; + if (ipdata->type == HAMMER2_OBJTYPE_HARDLINK) { + kprintf("nresolve: fixup hardlink\n"); + hammer2_inode_ref(ip); + hammer2_inode_unlock_ex(ip, NULL); + hammer2_cluster_unlock(cluster); + cluster = hammer2_inode_lock_ex(ip); + ipdata = &hammer2_cluster_data(cluster)->ipdata; + kprintf("nresolve: fixup to type %02x\n", ipdata->type); } + } else { + ip = NULL; } +#if 0 /* * Deconsolidate any hardlink whos nlinks == 1. Ignore errors. * If an error occurs chain and ip are left alone. @@ -1305,6 +1245,7 @@ hammer2_vop_nresolve(struct vop_nresolve_args *ap) hammer2_hardlink_deconsolidate(&trans, dip, &chain, &ochain); hammer2_trans_done(&trans); } +#endif /* * Acquire the related vnode @@ -1322,8 +1263,7 @@ hammer2_vop_nresolve(struct vop_nresolve_args *ap) * but chain was locked shared. inode_unlock_ex() * will handle it properly. */ - if (chain) { - ip = hammer2_inode_get(dip->pmp, dip, chain); + if (cluster) { vp = hammer2_igetv(ip, &error); if (error == 0) { vn_unlock(vp); @@ -1331,7 +1271,7 @@ hammer2_vop_nresolve(struct vop_nresolve_args *ap) } else if (error == ENOENT) { cache_setvp(ap->a_nch, NULL); } - hammer2_inode_unlock_ex(ip, chain); + hammer2_inode_unlock_ex(ip, cluster); /* * The vp should not be released until after we've disposed @@ -1344,12 +1284,9 @@ hammer2_vop_nresolve(struct vop_nresolve_args *ap) error = ENOENT; cache_setvp(ap->a_nch, NULL); } -failed: KASSERT(error || ap->a_nch->ncp->nc_vp != NULL, - ("resolve error %d/%p chain %p ap %p\n", - error, ap->a_nch->ncp->nc_vp, chain, ap)); - if (ochain) - hammer2_chain_drop(ochain); + ("resolve error %d/%p ap %p\n", + error, ap->a_nch->ncp->nc_vp, ap)); return error; } @@ -1359,7 +1296,7 @@ hammer2_vop_nlookupdotdot(struct vop_nlookupdotdot_args *ap) { hammer2_inode_t *dip; hammer2_inode_t *ip; - hammer2_chain_t *parent; + hammer2_cluster_t *cparent; int error; dip = VTOI(ap->a_dvp); @@ -1368,9 +1305,9 @@ hammer2_vop_nlookupdotdot(struct vop_nlookupdotdot_args *ap) *ap->a_vpp = NULL; return ENOENT; } - parent = hammer2_inode_lock_ex(ip); + cparent = hammer2_inode_lock_ex(ip); *ap->a_vpp = hammer2_igetv(ip, &error); - hammer2_inode_unlock_ex(ip, parent); + hammer2_inode_unlock_ex(ip, cparent); return error; } @@ -1382,7 +1319,7 @@ hammer2_vop_nmkdir(struct vop_nmkdir_args *ap) hammer2_inode_t *dip; hammer2_inode_t *nip; hammer2_trans_t trans; - hammer2_chain_t *chain; + hammer2_cluster_t *cluster; struct namecache *ncp; const uint8_t *name; size_t name_len; @@ -1395,18 +1332,19 @@ hammer2_vop_nmkdir(struct vop_nmkdir_args *ap) ncp = ap->a_nch->ncp; name = ncp->nc_name; name_len = ncp->nc_nlen; + cluster = NULL; - hammer2_chain_memory_wait(dip->pmp); + hammer2_pfs_memory_wait(dip->pmp); hammer2_trans_init(&trans, dip->pmp, NULL, HAMMER2_TRANS_NEWINODE); nip = hammer2_inode_create(&trans, dip, ap->a_vap, ap->a_cred, - name, name_len, &chain, &error); - chain->inode_reason = 1; + name, name_len, &cluster, &error); + cluster->focus->inode_reason = 1; if (error) { KKASSERT(nip == NULL); *ap->a_vpp = NULL; } else { *ap->a_vpp = hammer2_igetv(nip, &error); - hammer2_inode_unlock_ex(nip, chain); + hammer2_inode_unlock_ex(nip, cluster); } hammer2_trans_done(&trans); @@ -1453,12 +1391,14 @@ int hammer2_vop_advlock(struct vop_advlock_args *ap) { hammer2_inode_t *ip = VTOI(ap->a_vp); - hammer2_chain_t *parent; + hammer2_inode_data_t *ipdata; + hammer2_cluster_t *cparent; hammer2_off_t size; - parent = hammer2_inode_lock_sh(ip); - size = parent->data->ipdata.size; - hammer2_inode_unlock_sh(ip, parent); + cparent = hammer2_inode_lock_sh(ip); + ipdata = &hammer2_cluster_data(cparent)->ipdata; + size = ipdata->size; + hammer2_inode_unlock_sh(ip, cparent); return (lf_advlock(ap, &ip->advlock, size)); } @@ -1483,10 +1423,10 @@ hammer2_vop_nlink(struct vop_nlink_args *ap) hammer2_inode_t *tdip; /* target directory to create link in */ hammer2_inode_t *cdip; /* common parent directory */ hammer2_inode_t *ip; /* inode we are hardlinking to */ - hammer2_chain_t *chain; - hammer2_chain_t *fdchain; - hammer2_chain_t *tdchain; - hammer2_chain_t *cdchain; + hammer2_cluster_t *cluster; + hammer2_cluster_t *fdcluster; + hammer2_cluster_t *tdcluster; + hammer2_cluster_t *cdcluster; hammer2_trans_t trans; struct namecache *ncp; const uint8_t *name; @@ -1509,11 +1449,11 @@ hammer2_vop_nlink(struct vop_nlink_args *ap) * * Bump nlinks and potentially also create or move the hardlink * target in the parent directory common to (ip) and (tdip). The - * consolidation code can modify ip->chain and ip->pip. The - * returned chain is locked. + * consolidation code can modify ip->cluster and ip->pip. The + * returned cluster is locked. */ ip = VTOI(ap->a_vp); - hammer2_chain_memory_wait(ip->pmp); + hammer2_pfs_memory_wait(ip->pmp); hammer2_trans_init(&trans, ip->pmp, NULL, HAMMER2_TRANS_NEWINODE); /* @@ -1522,39 +1462,33 @@ hammer2_vop_nlink(struct vop_nlink_args *ap) */ fdip = ip->pip; cdip = hammer2_inode_common_parent(fdip, tdip); - cdchain = hammer2_inode_lock_ex(cdip); - fdchain = hammer2_inode_lock_ex(fdip); - tdchain = hammer2_inode_lock_ex(tdip); - chain = hammer2_inode_lock_ex(ip); - error = hammer2_hardlink_consolidate(&trans, ip, &chain, - cdip, &cdchain, 1); + cdcluster = hammer2_inode_lock_ex(cdip); + fdcluster = hammer2_inode_lock_ex(fdip); + tdcluster = hammer2_inode_lock_ex(tdip); + cluster = hammer2_inode_lock_ex(ip); + error = hammer2_hardlink_consolidate(&trans, ip, &cluster, + cdip, cdcluster, 1); if (error) goto done; /* - * Create a directory entry connected to the specified chain. - * The hardlink consolidation code has already adjusted ip->pip - * to the common parent directory containing the actual hardlink - * - * (which may be different from dip where we created our hardlink - * entry. ip->chain always represents the actual hardlink and not - * any of the pointers to the actual hardlink). + * Create a directory entry connected to the specified cluster. * * WARNING! chain can get moved by the connect (indirectly due to * potential indirect block creation). */ - error = hammer2_inode_connect(&trans, &chain, 1, - tdip, &tdchain, + error = hammer2_inode_connect(&trans, &cluster, 1, + tdip, tdcluster, name, name_len, 0); if (error == 0) { cache_setunresolved(ap->a_nch); cache_setvp(ap->a_nch, ap->a_vp); } done: - hammer2_inode_unlock_ex(ip, chain); - hammer2_inode_unlock_ex(tdip, tdchain); - hammer2_inode_unlock_ex(fdip, fdchain); - hammer2_inode_unlock_ex(cdip, cdchain); + hammer2_inode_unlock_ex(ip, cluster); + hammer2_inode_unlock_ex(tdip, tdcluster); + hammer2_inode_unlock_ex(fdip, fdcluster); + hammer2_inode_unlock_ex(cdip, cdcluster); hammer2_trans_done(&trans); return error; @@ -1573,7 +1507,7 @@ hammer2_vop_ncreate(struct vop_ncreate_args *ap) hammer2_inode_t *dip; hammer2_inode_t *nip; hammer2_trans_t trans; - hammer2_chain_t *nchain; + hammer2_cluster_t *ncluster; struct namecache *ncp; const uint8_t *name; size_t name_len; @@ -1586,18 +1520,19 @@ hammer2_vop_ncreate(struct vop_ncreate_args *ap) ncp = ap->a_nch->ncp; name = ncp->nc_name; name_len = ncp->nc_nlen; - hammer2_chain_memory_wait(dip->pmp); + hammer2_pfs_memory_wait(dip->pmp); hammer2_trans_init(&trans, dip->pmp, NULL, HAMMER2_TRANS_NEWINODE); + ncluster = NULL; nip = hammer2_inode_create(&trans, dip, ap->a_vap, ap->a_cred, - name, name_len, &nchain, &error); - nchain->inode_reason = 2; + name, name_len, &ncluster, &error); + ncluster->focus->inode_reason = 2; if (error) { KKASSERT(nip == NULL); *ap->a_vpp = NULL; } else { *ap->a_vpp = hammer2_igetv(nip, &error); - hammer2_inode_unlock_ex(nip, nchain); + hammer2_inode_unlock_ex(nip, ncluster); } hammer2_trans_done(&trans); @@ -1609,7 +1544,7 @@ hammer2_vop_ncreate(struct vop_ncreate_args *ap) } /* - * + * Make a device node (typically a fifo) */ static int @@ -1618,7 +1553,7 @@ hammer2_vop_nmknod(struct vop_nmknod_args *ap) hammer2_inode_t *dip; hammer2_inode_t *nip; hammer2_trans_t trans; - hammer2_chain_t *nchain; + hammer2_cluster_t *ncluster; struct namecache *ncp; const uint8_t *name; size_t name_len; @@ -1631,18 +1566,19 @@ hammer2_vop_nmknod(struct vop_nmknod_args *ap) ncp = ap->a_nch->ncp; name = ncp->nc_name; name_len = ncp->nc_nlen; - hammer2_chain_memory_wait(dip->pmp); + hammer2_pfs_memory_wait(dip->pmp); hammer2_trans_init(&trans, dip->pmp, NULL, HAMMER2_TRANS_NEWINODE); + ncluster = NULL; nip = hammer2_inode_create(&trans, dip, ap->a_vap, ap->a_cred, - name, name_len, &nchain, &error); - nchain->inode_reason = 3; + name, name_len, &ncluster, &error); + ncluster->focus->inode_reason = 3; if (error) { KKASSERT(nip == NULL); *ap->a_vpp = NULL; } else { *ap->a_vpp = hammer2_igetv(nip, &error); - hammer2_inode_unlock_ex(nip, nchain); + hammer2_inode_unlock_ex(nip, ncluster); } hammer2_trans_done(&trans); @@ -1662,7 +1598,7 @@ hammer2_vop_nsymlink(struct vop_nsymlink_args *ap) { hammer2_inode_t *dip; hammer2_inode_t *nip; - hammer2_chain_t *nparent; + hammer2_cluster_t *ncparent; hammer2_trans_t trans; struct namecache *ncp; const uint8_t *name; @@ -1676,14 +1612,15 @@ hammer2_vop_nsymlink(struct vop_nsymlink_args *ap) ncp = ap->a_nch->ncp; name = ncp->nc_name; name_len = ncp->nc_nlen; - hammer2_chain_memory_wait(dip->pmp); + hammer2_pfs_memory_wait(dip->pmp); hammer2_trans_init(&trans, dip->pmp, NULL, HAMMER2_TRANS_NEWINODE); + ncparent = NULL; ap->a_vap->va_type = VLNK; /* enforce type */ nip = hammer2_inode_create(&trans, dip, ap->a_vap, ap->a_cred, - name, name_len, &nparent, &error); - nparent->inode_reason = 4; + name, name_len, &ncparent, &error); + ncparent->focus->inode_reason = 4; if (error) { KKASSERT(nip == NULL); *ap->a_vpp = NULL; @@ -1701,7 +1638,8 @@ hammer2_vop_nsymlink(struct vop_nsymlink_args *ap) struct iovec aiov; hammer2_inode_data_t *nipdata; - nipdata = &nip->chain->data->ipdata; + nipdata = &hammer2_cluster_data(ncparent)->ipdata; + /* nipdata = &nip->chain->data->ipdata;XXX */ bytes = strlen(ap->a_target); if (bytes <= HAMMER2_EMBEDDED_BYTES) { @@ -1710,9 +1648,11 @@ hammer2_vop_nsymlink(struct vop_nsymlink_args *ap) bcopy(ap->a_target, nipdata->u.data, bytes); nipdata->size = bytes; nip->size = bytes; - hammer2_inode_unlock_ex(nip, nparent); + hammer2_inode_unlock_ex(nip, ncparent); + /* nipdata = NULL; not needed */ } else { - hammer2_inode_unlock_ex(nip, nparent); + hammer2_inode_unlock_ex(nip, ncparent); + /* nipdata = NULL; not needed */ bzero(&auio, sizeof(auio)); bzero(&aiov, sizeof(aiov)); auio.uio_iov = &aiov; @@ -1724,12 +1664,11 @@ hammer2_vop_nsymlink(struct vop_nsymlink_args *ap) aiov.iov_base = ap->a_target; aiov.iov_len = bytes; error = hammer2_write_file(nip, &auio, IO_APPEND, 0); - nipdata = &nip->chain->data->ipdata; /* RELOAD */ /* XXX handle error */ error = 0; } } else { - hammer2_inode_unlock_ex(nip, nparent); + hammer2_inode_unlock_ex(nip, ncparent); } hammer2_trans_done(&trans); @@ -1766,7 +1705,7 @@ hammer2_vop_nremove(struct vop_nremove_args *ap) name = ncp->nc_name; name_len = ncp->nc_nlen; - hammer2_chain_memory_wait(dip->pmp); + hammer2_pfs_memory_wait(dip->pmp); hammer2_trans_init(&trans, dip->pmp, NULL, 0); error = hammer2_unlink_file(&trans, dip, name, name_len, 0, NULL, ap->a_nch); @@ -1798,7 +1737,7 @@ hammer2_vop_nrmdir(struct vop_nrmdir_args *ap) name = ncp->nc_name; name_len = ncp->nc_nlen; - hammer2_chain_memory_wait(dip->pmp); + hammer2_pfs_memory_wait(dip->pmp); hammer2_trans_init(&trans, dip->pmp, NULL, 0); hammer2_run_unlinkq(&trans, dip->pmp); error = hammer2_unlink_file(&trans, dip, name, name_len, @@ -1822,10 +1761,10 @@ hammer2_vop_nrename(struct vop_nrename_args *ap) hammer2_inode_t *fdip; hammer2_inode_t *tdip; hammer2_inode_t *ip; - hammer2_chain_t *chain; - hammer2_chain_t *fdchain; - hammer2_chain_t *tdchain; - hammer2_chain_t *cdchain; + hammer2_cluster_t *cluster; + hammer2_cluster_t *fdcluster; + hammer2_cluster_t *tdcluster; + hammer2_cluster_t *cdcluster; hammer2_trans_t trans; const uint8_t *fname; size_t fname_len; @@ -1853,7 +1792,7 @@ hammer2_vop_nrename(struct vop_nrename_args *ap) tname = tncp->nc_name; tname_len = tncp->nc_nlen; - hammer2_chain_memory_wait(tdip->pmp); + hammer2_pfs_memory_wait(tdip->pmp); hammer2_trans_init(&trans, tdip->pmp, NULL, 0); /* @@ -1861,7 +1800,7 @@ hammer2_vop_nrename(struct vop_nrename_args *ap) * ip represents the actual file and not the hardlink marker. */ ip = VTOI(fncp->nc_vp); - chain = NULL; + cluster = NULL; /* @@ -1880,14 +1819,14 @@ hammer2_vop_nrename(struct vop_nrename_args *ap) * other pointers. */ cdip = hammer2_inode_common_parent(ip->pip, tdip); - cdchain = hammer2_inode_lock_ex(cdip); - fdchain = hammer2_inode_lock_ex(fdip); - tdchain = hammer2_inode_lock_ex(tdip); + cdcluster = hammer2_inode_lock_ex(cdip); + fdcluster = hammer2_inode_lock_ex(fdip); + tdcluster = hammer2_inode_lock_ex(tdip); /* * Keep a tight grip on the inode so the temporary unlinking from * the source location prior to linking to the target location - * does not cause the chain to be destroyed. + * does not cause the cluster to be destroyed. * * NOTE: To avoid deadlocks we cannot lock (ip) while we are * unlinking elements from their directories. Locking @@ -1910,18 +1849,18 @@ hammer2_vop_nrename(struct vop_nrename_args *ap) * to counter-act the unlink below. * * If ip represents a regular file the consolidation code essentially - * does nothing other than return the same locked chain that was + * does nothing other than return the same locked cluster that was * passed in. * - * The returned chain will be locked. + * The returned cluster will be locked. * * WARNING! We do not currently have a local copy of ipdata but * we do use one later remember that it must be reloaded * on any modification to the inode, including connects. */ - chain = hammer2_inode_lock_ex(ip); - error = hammer2_hardlink_consolidate(&trans, ip, &chain, - cdip, &cdchain, 1); + cluster = hammer2_inode_lock_ex(ip); + error = hammer2_hardlink_consolidate(&trans, ip, &cluster, + cdip, cdcluster, 1); if (error) goto done; @@ -1935,8 +1874,8 @@ hammer2_vop_nrename(struct vop_nrename_args *ap) * so we don't want hammer2_unlink_file() to rename it to the hidden * open-but-unlinked directory. * - * The target chain may be marked DELETED but will not be destroyed - * since we retain our hold on ip and chain. + * The target cluster may be marked DELETED but will not be destroyed + * since we retain our hold on ip and cluster. */ error = hammer2_unlink_file(&trans, fdip, fname, fname_len, -1, &hlink, NULL); @@ -1945,33 +1884,33 @@ hammer2_vop_nrename(struct vop_nrename_args *ap) goto done; /* - * Reconnect ip to target directory using chain. Chains cannot - * actually be moved, so this will duplicate the chain in the new - * spot and assign it to the ip, replacing the old chain. + * Reconnect ip to target directory using cluster. Chains cannot + * actually be moved, so this will duplicate the cluster in the new + * spot and assign it to the ip, replacing the old cluster. * * WARNING: Because recursive locks are allowed and we unlinked the - * file that we have a chain-in-hand for just above, the - * chain might have been delete-duplicated. We must refactor - * the chain. + * file that we have a cluster-in-hand for just above, the + * cluster might have been delete-duplicated. We must + * refactor the cluster. * * WARNING: Chain locks can lock buffer cache buffers, to avoid * deadlocks we want to unlock before issuing a cache_*() * op (that might have to lock a vnode). */ - hammer2_chain_refactor(&chain); - error = hammer2_inode_connect(&trans, &chain, hlink, - tdip, &tdchain, + hammer2_cluster_refactor(cluster); + error = hammer2_inode_connect(&trans, &cluster, hlink, + tdip, tdcluster, tname, tname_len, 0); - chain->inode_reason = 5; + cluster->focus->inode_reason = 5; if (error == 0) { - KKASSERT(chain != NULL); - hammer2_inode_repoint(ip, (hlink ? ip->pip : tdip), chain); + KKASSERT(cluster != NULL); + hammer2_inode_repoint(ip, (hlink ? ip->pip : tdip), cluster); } done: - hammer2_inode_unlock_ex(ip, chain); - hammer2_inode_unlock_ex(tdip, tdchain); - hammer2_inode_unlock_ex(fdip, fdchain); - hammer2_inode_unlock_ex(cdip, cdchain); + hammer2_inode_unlock_ex(ip, cluster); + hammer2_inode_unlock_ex(tdip, tdcluster); + hammer2_inode_unlock_ex(fdip, fdcluster); + hammer2_inode_unlock_ex(cdip, cdcluster); hammer2_inode_drop(ip); hammer2_trans_done(&trans); @@ -1995,6 +1934,7 @@ done: static int hammer2_strategy_read(struct vop_strategy_args *ap); static int hammer2_strategy_write(struct vop_strategy_args *ap); static void hammer2_strategy_read_callback(hammer2_io_t *dio, + hammer2_cluster_t *cluster, hammer2_chain_t *chain, void *arg_p, off_t arg_o); @@ -2036,11 +1976,12 @@ hammer2_strategy_read(struct vop_strategy_args *ap) struct bio *bio; struct bio *nbio; hammer2_inode_t *ip; - hammer2_chain_t *parent; - hammer2_chain_t *chain; + hammer2_cluster_t *cparent; + hammer2_cluster_t *cluster; hammer2_key_t key_dummy; hammer2_key_t lbase; - int cache_index = -1; + int ddflag; + uint8_t btype; bio = ap->a_bio; bp = bio->bio_buf; @@ -2048,72 +1989,41 @@ hammer2_strategy_read(struct vop_strategy_args *ap) nbio = push_bio(bio); lbase = bio->bio_offset; - chain = NULL; KKASSERT(((int)lbase & HAMMER2_PBUFMASK) == 0); - parent = hammer2_inode_lock_sh(ip); - chain = hammer2_chain_lookup(&parent, &key_dummy, - lbase, lbase, - &cache_index, - HAMMER2_LOOKUP_NODATA | - HAMMER2_LOOKUP_SHARED); + cparent = hammer2_inode_lock_sh(ip); + cluster = hammer2_cluster_lookup(cparent, &key_dummy, + lbase, lbase, + HAMMER2_LOOKUP_NODATA | + HAMMER2_LOOKUP_SHARED, + &ddflag); + hammer2_inode_unlock_sh(ip, cparent); - if (chain == NULL) { - /* - * Data is zero-fill - */ + /* + * Data is zero-fill if no cluster could be found + * (XXX or EIO on a cluster failure). + */ + if (cluster == NULL) { bp->b_resid = 0; bp->b_error = 0; bzero(bp->b_data, bp->b_bcount); biodone(nbio); - } else if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) { - /* - * Data is embedded in the inode (copy from inode). - */ - hammer2_chain_load_async(chain, - hammer2_strategy_read_callback, - nbio, 0); - } else if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) { - /* - * Data is on-media, issue device I/O and copy. - * - * XXX direct-IO shortcut could go here XXX. - */ - if (HAMMER2_DEC_COMP(chain->bref.methods) == HAMMER2_COMP_LZ4) { - /* - * Block compression is determined by bref.methods - */ - hammer2_blockref_t *bref; - - bref = &chain->bref; - hammer2_io_breadcb(chain->hmp, bref->data_off, - chain->bytes, - hammer2_decompress_LZ4_callback, - NULL, nbio, bref->data_off); - /* XXX async read dev blk not protected by chain lk */ - hammer2_chain_unlock(chain); - } else if (HAMMER2_DEC_COMP(chain->bref.methods) == - HAMMER2_COMP_ZLIB) { - hammer2_blockref_t *bref; - - bref = &chain->bref; - hammer2_io_breadcb(chain->hmp, bref->data_off, - chain->bytes, - hammer2_decompress_ZLIB_callback, - NULL, nbio, bref->data_off); - /* XXX async read dev blk not protected by chain lk */ - hammer2_chain_unlock(chain); - } else { - hammer2_chain_load_async(chain, - hammer2_strategy_read_callback, - nbio, 0); - } - } else { + return(0); + } + + /* + * Cluster elements must be type INODE or type DATA, but the + * compression mode (or not) for DATA chains can be different for + * each chain. This will be handled by the callback. + */ + btype = hammer2_cluster_type(cluster); + if (btype != HAMMER2_BREF_TYPE_INODE && + btype != HAMMER2_BREF_TYPE_DATA) { panic("READ PATH: hammer2_strategy_read: unknown bref type"); - chain = NULL; } - hammer2_inode_unlock_sh(ip, parent); - return (0); + + hammer2_chain_load_async(cluster, hammer2_strategy_read_callback, nbio); + return(0); } /* @@ -2121,17 +2031,45 @@ hammer2_strategy_read(struct vop_strategy_args *ap) */ static void -hammer2_strategy_read_callback(hammer2_io_t *dio, hammer2_chain_t *chain, - void *arg_p, off_t arg_o __unused) +hammer2_strategy_read_callback(hammer2_io_t *dio, + hammer2_cluster_t *cluster, + hammer2_chain_t *chain, + void *arg_p, off_t arg_o) { - struct bio *nbio = arg_p; - struct buf *bp = nbio->bio_buf; + struct bio *bio = arg_p; + struct buf *bp = bio->bio_buf; char *data; + int i; - if (dio) + /* + * Extract data and handle iteration on I/O failure. arg_o is the + * cluster index for iteration. + */ + if (dio) { + if (dio->bp->b_flags & B_ERROR) { + i = (int)arg_o + 1; + if (i >= cluster->nchains) { + bp->b_flags |= B_ERROR; + bp->b_error = dio->bp->b_error; + biodone(bio); + } else { + chain = cluster->array[i]; + kprintf("hammer2: IO CHAIN-%d %p\n", i, chain); + hammer2_adjreadcounter(&chain->bref, + chain->bytes); + hammer2_io_breadcb(chain->hmp, + chain->bref.data_off, + chain->bytes, + hammer2_strategy_read_callback, + cluster, chain, + arg_p, (off_t)i); + } + return; + } data = hammer2_io_data(dio, chain->bref.data_off); - else + } else { data = (void *)chain->data; + } if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) { /* @@ -2143,25 +2081,37 @@ hammer2_strategy_read_callback(hammer2_io_t *dio, hammer2_chain_t *chain, bp->b_bcount - HAMMER2_EMBEDDED_BYTES); bp->b_resid = 0; bp->b_error = 0; - hammer2_chain_unlock(chain); - biodone(nbio); } else if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) { /* * Data is on-media, issue device I/O and copy. * * XXX direct-IO shortcut could go here XXX. */ - KKASSERT(chain->bytes <= bp->b_bcount); - bcopy(data, bp->b_data, chain->bytes); - if (chain->bytes < bp->b_bcount) { - bzero(bp->b_data + chain->bytes, - bp->b_bcount - chain->bytes); + switch (HAMMER2_DEC_COMP(chain->bref.methods)) { + case HAMMER2_COMP_LZ4: + hammer2_decompress_LZ4_callback(data, chain->bytes, + bio); + break; + case HAMMER2_COMP_ZLIB: + hammer2_decompress_ZLIB_callback(data, chain->bytes, + bio); + break; + case HAMMER2_COMP_NONE: + KKASSERT(chain->bytes <= bp->b_bcount); + bcopy(data, bp->b_data, chain->bytes); + if (chain->bytes < bp->b_bcount) { + bzero(bp->b_data + chain->bytes, + bp->b_bcount - chain->bytes); + } + bp->b_flags |= B_NOTMETA; + bp->b_resid = 0; + bp->b_error = 0; + hammer2_chain_unlock(chain); + break; + default: + panic("hammer2_strategy_read: " + "unknown compression type"); } - bp->b_flags |= B_NOTMETA; - bp->b_resid = 0; - bp->b_error = 0; - hammer2_chain_unlock(chain); - biodone(nbio); } else { /* bqrelse the dio to help stabilize the call to panic() */ if (dio) @@ -2170,6 +2120,8 @@ hammer2_strategy_read_callback(hammer2_io_t *dio, hammer2_chain_t *chain, /*hammer2_chain_unlock(chain);*/ /*chain = NULL;*/ } + hammer2_cluster_unlock(cluster); + biodone(bio); } static @@ -2252,7 +2204,7 @@ hammer2_run_unlinkq(hammer2_trans_t *trans, hammer2_pfsmount_t *pmp) { hammer2_inode_unlink_t *ipul; hammer2_inode_t *ip; - hammer2_chain_t *chain; + hammer2_cluster_t *cluster; if (TAILQ_EMPTY(&pmp->unlinkq)) return; @@ -2264,12 +2216,12 @@ hammer2_run_unlinkq(hammer2_trans_t *trans, hammer2_pfsmount_t *pmp) ip = ipul->ip; kfree(ipul, pmp->minode); - chain = hammer2_inode_lock_ex(ip); - KKASSERT(chain->flags & HAMMER2_CHAIN_UNLINKED); + cluster = hammer2_inode_lock_ex(ip); + KKASSERT(cluster->focus->flags & HAMMER2_CHAIN_UNLINKED); kprintf("hammer2: unlink on reclaim: %s\n", - chain->data->ipdata.filename); - hammer2_chain_delete(trans, chain, 0); - hammer2_inode_unlock_ex(ip, chain); /* inode lock */ + cluster->focus->data->ipdata.filename); + hammer2_cluster_delete(trans, cluster, 0); + hammer2_inode_unlock_ex(ip, cluster); /* inode lock */ hammer2_inode_drop(ip); /* ipul ref */ spin_lock(&pmp->unlinkq_spin); -- 2.41.0