From b8ba9690ca56cd9ac33ff1a5185ddd164169d9ca Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Sun, 29 Mar 2015 19:40:17 -0700 Subject: [PATCH] hammer2 - Cleanup hammer2_cluster API * Track whether a cluster structure is locked or not. For the upcoming cluster locking code, cluster structures cannot be recursively locked (though you can always make a copy and lock that). * Move the ddflag (direct-data-in-inode) flag into the cluster structure And remove it from a number of API calls. * Add fields and flags in preparation for more formalized locking and tracking of clusters. * Refactor code to remove hammer2_cluster_replace() and hammer2_cluster_replace_locked(). This involves changing hammer2_hardlink_find() and a few other things. * Refactor the cluster focus code. cluster->focus is only set and valid when a cluster is locked. Do not set it as a side effect of other operations. * Refactor chain validation in hammer2_cluster_lookup() and add it to hammer2_cluster_next(). * Add the concept of a read-only cluster lock which will allow upcoming cluster locking code to reduce the number of chains that actually get locked or undergo I/O. * In strategy file writing code call hammer2_cluster_modify_ip() only when the inode itself is being modified by the write() due to being in OPFLAG_DIRECTDATA mode (when 512 bytes of data is embedded in the inode). --- sys/vfs/hammer2/hammer2.h | 29 ++++-- sys/vfs/hammer2/hammer2_bulkscan.c | 4 +- sys/vfs/hammer2/hammer2_chain.c | 24 +---- sys/vfs/hammer2/hammer2_cluster.c | 144 +++++++++++++++++++++++------ sys/vfs/hammer2/hammer2_freemap.c | 7 +- sys/vfs/hammer2/hammer2_inode.c | 135 +++++++++++++++------------ sys/vfs/hammer2/hammer2_iocom.c | 3 +- sys/vfs/hammer2/hammer2_ioctl.c | 10 +- sys/vfs/hammer2/hammer2_syncthr.c | 16 +++- sys/vfs/hammer2/hammer2_vfsops.c | 86 +++++++++-------- sys/vfs/hammer2/hammer2_vnops.c | 22 ++--- 11 files changed, 293 insertions(+), 187 deletions(-) diff --git a/sys/vfs/hammer2/hammer2.h b/sys/vfs/hammer2/hammer2.h index 539a857883..3684d1cdc4 100644 --- a/sys/vfs/hammer2/hammer2.h +++ b/sys/vfs/hammer2/hammer2.h @@ -416,6 +416,11 @@ RB_PROTOTYPE(hammer2_chain_tree, hammer2_chain, rbnode, hammer2_chain_cmp); /* * Flags passed to hammer2_chain_lock() + * + * NOTE: RDONLY is set to optimize cluster operations when *no* modifications + * will be made to either the cluster being locked or any underlying + * cluster. It allows the cluster to lock and access data for a subset + * of available nodes instead of all available nodes. */ #define HAMMER2_RESOLVE_NEVER 1 #define HAMMER2_RESOLVE_MAYBE 2 @@ -424,6 +429,7 @@ RB_PROTOTYPE(hammer2_chain_tree, hammer2_chain, rbnode, hammer2_chain_cmp); #define HAMMER2_RESOLVE_SHARED 0x10 /* request shared lock */ #define HAMMER2_RESOLVE_NOREF 0x20 /* already ref'd on lock */ +#define HAMMER2_RESOLVE_RDONLY 0x40 /* higher level op flag */ /* * Flags passed to hammer2_chain_delete() @@ -498,14 +504,17 @@ struct hammer2_cluster_item { hammer2_chain_t *chain; struct hammer2_cluster *cluster; /* link back to cluster */ int cache_index; - int unused01; + uint32_t flags; }; typedef struct hammer2_cluster_item hammer2_cluster_item_t; +#define HAMMER2_CLUSTER_ITEM_LOCKED 0x0001 /* valid lock */ +#define HAMMER2_CLUSTER_ITEM_DATA + struct hammer2_cluster { - int unused01; int refs; /* track for deallocation */ + int ddflag; struct hammer2_pfs *pmp; uint32_t flags; int nchains; @@ -549,6 +558,7 @@ typedef struct hammer2_cluster hammer2_cluster_t; */ #define HAMMER2_CLUSTER_INODE 0x00000001 /* embedded in inode */ #define HAMMER2_CLUSTER_NOSYNC 0x00000002 /* not in sync (cumulative) */ +#define HAMMER2_CLUSTER_LOCKED 0x00000004 /* cluster lks not recursive */ #define HAMMER2_CLUSTER_WRHARD 0x00000100 /* hard-mount can write */ #define HAMMER2_CLUSTER_RDHARD 0x00000200 /* hard-mount can read */ #define HAMMER2_CLUSTER_WRSOFT 0x00000400 /* soft-mount can write */ @@ -566,6 +576,12 @@ typedef struct hammer2_cluster hammer2_cluster_t; #define HAMMER2_CLUSTER_WROK ( HAMMER2_CLUSTER_WRHARD | \ HAMMER2_CLUSTER_WRSOFT) +#define HAMMER2_CLUSTER_ZFLAGS ( HAMMER2_CLUSTER_WRHARD | \ + HAMMER2_CLUSTER_RDHARD | \ + HAMMER2_CLUSTER_WRSOFT | \ + HAMMER2_CLUSTER_RDSOFT | \ + HAMMER2_CLUSTER_MSYNCED | \ + HAMMER2_CLUSTER_SSYNCED) RB_HEAD(hammer2_inode_tree, hammer2_inode); @@ -1038,7 +1054,7 @@ int hammer2_hardlink_consolidate(hammer2_trans_t *trans, int hammer2_hardlink_deconsolidate(hammer2_trans_t *trans, hammer2_inode_t *dip, hammer2_chain_t **chainp, hammer2_chain_t **ochainp); int hammer2_hardlink_find(hammer2_inode_t *dip, hammer2_cluster_t **cparentp, - hammer2_cluster_t *cluster); + hammer2_cluster_t **clusterp); int hammer2_parent_find(hammer2_cluster_t **cparentp, hammer2_cluster_t *cluster); void hammer2_inode_install_hidden(hammer2_pfs_t *pmp); @@ -1083,7 +1099,7 @@ void hammer2_chain_lookup_done(hammer2_chain_t *parent); hammer2_chain_t *hammer2_chain_lookup(hammer2_chain_t **parentp, hammer2_key_t *key_nextp, hammer2_key_t key_beg, hammer2_key_t key_end, - int *cache_indexp, int flags, int *ddflagp); + int *cache_indexp, int flags); hammer2_chain_t *hammer2_chain_next(hammer2_chain_t **parentp, hammer2_chain_t *chain, hammer2_key_t *key_nextp, @@ -1224,9 +1240,6 @@ void hammer2_cluster_ref(hammer2_cluster_t *cluster); void hammer2_cluster_drop(hammer2_cluster_t *cluster); void hammer2_cluster_wait(hammer2_cluster_t *cluster); int hammer2_cluster_lock(hammer2_cluster_t *cluster, int how); -void hammer2_cluster_replace(hammer2_cluster_t *dst, hammer2_cluster_t *src); -void hammer2_cluster_replace_locked(hammer2_cluster_t *dst, - hammer2_cluster_t *src); hammer2_cluster_t *hammer2_cluster_copy(hammer2_cluster_t *ocluster); void hammer2_cluster_unlock(hammer2_cluster_t *cluster); void hammer2_cluster_resize(hammer2_trans_t *trans, hammer2_inode_t *ip, @@ -1244,7 +1257,7 @@ void hammer2_cluster_lookup_done(hammer2_cluster_t *cparent); hammer2_cluster_t *hammer2_cluster_lookup(hammer2_cluster_t *cparent, hammer2_key_t *key_nextp, hammer2_key_t key_beg, hammer2_key_t key_end, - int flags, int *ddflagp); + int flags); hammer2_cluster_t *hammer2_cluster_next(hammer2_cluster_t *cparent, hammer2_cluster_t *cluster, hammer2_key_t *key_nextp, diff --git a/sys/vfs/hammer2/hammer2_bulkscan.c b/sys/vfs/hammer2/hammer2_bulkscan.c index ed7694b31e..f4eeb6044b 100644 --- a/sys/vfs/hammer2/hammer2_bulkscan.c +++ b/sys/vfs/hammer2/hammer2_bulkscan.c @@ -460,7 +460,6 @@ h2_bulkfree_sync(hammer2_bulkfree_info_t *cbinfo) hammer2_chain_t *live_chain; int cache_index = -1; int bmapindex; - int ddflag; kprintf("hammer2_bulkfree - range %016jx-%016jx\n", (intmax_t)cbinfo->sbase, @@ -496,8 +495,7 @@ h2_bulkfree_sync(hammer2_bulkfree_info_t *cbinfo) key, key + HAMMER2_FREEMAP_LEVEL1_MASK, &cache_index, - HAMMER2_LOOKUP_ALWAYS, - &ddflag); + HAMMER2_LOOKUP_ALWAYS); if (live_chain) kprintf("live_chain %016jx\n", (intmax_t)key); diff --git a/sys/vfs/hammer2/hammer2_chain.c b/sys/vfs/hammer2/hammer2_chain.c index 88dc23b4ab..d5833520f3 100644 --- a/sys/vfs/hammer2/hammer2_chain.c +++ b/sys/vfs/hammer2/hammer2_chain.c @@ -1182,23 +1182,6 @@ skip2: */ if (chain->parent) hammer2_chain_setflush(trans, chain->parent); - -#if 0 - /* - * Adjust the freemap bitmap to indicate that the related blocks - * MIGHT be freeable. Bulkfree must still determine that the blocks - * are actually freeable. - * - * We no longer do this in the normal filesystem operations path - * as it interferes with the bulkfree algorithm. - */ - if (obref.type != HAMMER2_BREF_TYPE_FREEMAP_NODE && - obref.type != HAMMER2_BREF_TYPE_FREEMAP_LEAF && - (obref.data_off & ~HAMMER2_OFF_MASK_RADIX)) { - hammer2_freemap_adjust(trans, hmp, - &obref, HAMMER2_FREEMAP_DOMAYFREE); - } -#endif } /* @@ -1520,7 +1503,7 @@ hammer2_chain_getparent(hammer2_chain_t **parentp, int how) hammer2_chain_t * hammer2_chain_lookup(hammer2_chain_t **parentp, hammer2_key_t *key_nextp, hammer2_key_t key_beg, hammer2_key_t key_end, - int *cache_indexp, int flags, int *ddflagp) + int *cache_indexp, int flags) { hammer2_dev_t *hmp; hammer2_chain_t *parent; @@ -1537,7 +1520,6 @@ hammer2_chain_lookup(hammer2_chain_t **parentp, hammer2_key_t *key_nextp, int generation; int maxloops = 300000; - *ddflagp = 0; if (flags & HAMMER2_LOOKUP_ALWAYS) { how_maybe = how_always; how = HAMMER2_RESOLVE_ALWAYS; @@ -1595,7 +1577,6 @@ again: else hammer2_chain_lock(parent, how_always); *key_nextp = key_end + 1; - *ddflagp = 1; return (parent); } base = &parent->data->ipdata.u.blockset.blockref[0]; @@ -1799,7 +1780,6 @@ hammer2_chain_next(hammer2_chain_t **parentp, hammer2_chain_t *chain, { hammer2_chain_t *parent; int how_maybe; - int ddflag; /* * Calculate locking flags for upward recursion. @@ -1853,7 +1833,7 @@ hammer2_chain_next(hammer2_chain_t **parentp, hammer2_chain_t *chain, */ return (hammer2_chain_lookup(parentp, key_nextp, key_beg, key_end, - cache_indexp, flags, &ddflag)); + cache_indexp, flags)); } /* diff --git a/sys/vfs/hammer2/hammer2_cluster.c b/sys/vfs/hammer2/hammer2_cluster.c index 808fdc7240..ffc1838eca 100644 --- a/sys/vfs/hammer2/hammer2_cluster.c +++ b/sys/vfs/hammer2/hammer2_cluster.c @@ -257,10 +257,12 @@ hammer2_cluster_from_chain(hammer2_chain_t *chain) cluster->focus = chain; cluster->pmp = chain->pmp; cluster->refs = 1; + cluster->flags = HAMMER2_CLUSTER_LOCKED; return cluster; } +#if 0 /* * Allocates a cluster and its underlying chain structures. The underlying * chains will be locked. The cluster and underlying chains will have one @@ -310,6 +312,7 @@ hammer2_cluster_alloc(hammer2_pfs_t *pmp, cluster = kmalloc(sizeof(*cluster), M_HAMMER2, M_WAITOK | M_ZERO); cluster->refs = 1; + cluster->flags = HAMMER2_CLUSTER_LOCKED; rcluster = &pmp->iroot->cluster; for (i = 0; i < rcluster->nchains; ++i) { @@ -320,7 +323,7 @@ hammer2_cluster_alloc(hammer2_pfs_t *pmp, chain->bref = *bref; chain->bytes = bytes; chain->refs = 1; - chain->flags = HAMMER2_CHAIN_ALLOCATED; + chain->flags |= HAMMER2_CHAIN_ALLOCATED; #endif /* @@ -336,6 +339,7 @@ hammer2_cluster_alloc(hammer2_pfs_t *pmp, return (cluster); } +#endif /* * Add a reference to a cluster. @@ -398,6 +402,17 @@ hammer2_cluster_wait(hammer2_cluster_t *cluster) * and then locks them. * * The act of locking a cluster sets its focus if not already set. + * + * The chains making up the cluster may be narrowed down based on quorum + * acceptability, and if RESOLVE_RDONLY is specified the chains can be + * narrowed down to a single chain as long as the entire subtopology is known + * to be intact. So, for example, we can narrow a read-only op to a single + * fast SLAVE but if we focus a CACHE chain we must still retain at least + * a SLAVE to ensure that the subtopology can be accessed. + * + * RESOLVE_RDONLY operations are effectively as-of so the quorum does not need + * to be maintained once the topology is validated as-of the top level of + * the operation. */ int hammer2_cluster_lock(hammer2_cluster_t *cluster, int how) @@ -407,6 +422,14 @@ hammer2_cluster_lock(hammer2_cluster_t *cluster, int how) int i; int error; + /* cannot be on inode-embedded cluster template, must be on copy */ + KKASSERT((cluster->flags & HAMMER2_CLUSTER_INODE) == 0); + if (cluster->flags & HAMMER2_CLUSTER_LOCKED) { + kprintf("hammer2_cluster_lock: cluster %p already locked!\n", + cluster); + } + atomic_set_int(&cluster->flags, HAMMER2_CLUSTER_LOCKED); + if ((how & HAMMER2_RESOLVE_NOREF) == 0) atomic_add_int(&cluster->refs, 1); @@ -431,8 +454,11 @@ hammer2_cluster_lock(hammer2_cluster_t *cluster, int how) return error; } +#if 0 /* - * Replace the contents of dst with src, adding a reference to src's chains. + * Replace the contents of dst with src, adding a reference to src's chains + * but not adding any additional locks. + * * dst is assumed to already have a ref and any chains present in dst are * assumed to be locked and will be unlocked. * @@ -475,11 +501,16 @@ hammer2_cluster_replace(hammer2_cluster_t *dst, hammer2_cluster_t *src) /* * Replace the contents of the locked destination with the contents of the - * locked source. Destination must have one ref. + * locked source. The destination must have one ref. * * Returns with the destination still with one ref and the copied chains * with an additional lock (representing their state on the destination). * The original chains associated with the destination are unlocked. + * + * From the point of view of the caller, both src and dst are locked on + * call and remain locked on return. + * + * XXX adjust flag state */ void hammer2_cluster_replace_locked(hammer2_cluster_t *dst, hammer2_cluster_t *src) @@ -500,8 +531,6 @@ hammer2_cluster_replace_locked(hammer2_cluster_t *dst, hammer2_cluster_t *src) hammer2_chain_unlock(tmp); } dst->array[i].chain = chain; - if (dst->focus == NULL) - dst->focus = chain; } } while (i < dst->nchains) { @@ -513,7 +542,10 @@ hammer2_cluster_replace_locked(hammer2_cluster_t *dst, hammer2_cluster_t *src) ++i; } dst->nchains = src->nchains; + dst->flags = src->flags; + dst->focus = src->focus; } +#endif /* * Copy a cluster, returned a ref'd cluster. All underlying chains @@ -533,6 +565,7 @@ hammer2_cluster_copy(hammer2_cluster_t *ocluster) ncluster->pmp = pmp; ncluster->nchains = ocluster->nchains; ncluster->refs = 1; + ncluster->flags = 0; /* cluster not locked */ for (i = 0; i < ocluster->nchains; ++i) { chain = ocluster->array[i].chain; @@ -553,7 +586,14 @@ hammer2_cluster_unlock(hammer2_cluster_t *cluster) hammer2_chain_t *chain; int i; + if ((cluster->flags & HAMMER2_CLUSTER_LOCKED) == 0) { + kprintf("hammer2_cluster_unlock: cluster %p not locked\n", + cluster); + } + /* KKASSERT(cluster->flags & HAMMER2_CLUSTER_LOCKED); */ KKASSERT(cluster->refs > 0); + atomic_clear_int(&cluster->flags, HAMMER2_CLUSTER_LOCKED); + for (i = 0; i < cluster->nchains; ++i) { chain = cluster->array[i].chain; if (chain) { @@ -711,6 +751,7 @@ hammer2_cluster_lookup_init(hammer2_cluster_t *cparent, int flags) cluster = kmalloc(sizeof(*cluster), M_HAMMER2, M_WAITOK | M_ZERO); cluster->pmp = cparent->pmp; /* can be NULL */ + cluster->flags = 0; /* cluster not locked (yet) */ /* cluster->focus = NULL; already null */ for (i = 0; i < cparent->nchains; ++i) { @@ -744,8 +785,7 @@ hammer2_cluster_lookup_done(hammer2_cluster_t *cparent) */ hammer2_cluster_t * hammer2_cluster_lookup(hammer2_cluster_t *cparent, hammer2_key_t *key_nextp, - hammer2_key_t key_beg, hammer2_key_t key_end, - int flags, int *ddflagp) + hammer2_key_t key_beg, hammer2_key_t key_end, int flags) { hammer2_pfs_t *pmp; hammer2_cluster_t *cluster; @@ -753,9 +793,8 @@ hammer2_cluster_lookup(hammer2_cluster_t *cparent, hammer2_key_t *key_nextp, hammer2_key_t key_accum; hammer2_key_t key_next; hammer2_key_t bref_key; - int bref_keybits; int null_count; - int ddflag; + int bref_keybits; int i; uint8_t bref_type; u_int bytes; @@ -772,8 +811,8 @@ hammer2_cluster_lookup(hammer2_cluster_t *cparent, hammer2_key_t *key_nextp, cluster->pmp = pmp; /* can be NULL */ cluster->refs = 1; /* cluster->focus = NULL; already null */ - cparent->focus = NULL; - *ddflagp = 0; + if ((flags & HAMMER2_LOOKUP_NOLOCK) == 0) + cluster->flags |= HAMMER2_CLUSTER_LOCKED; for (i = 0; i < cparent->nchains; ++i) { key_next = *key_nextp; @@ -785,26 +824,37 @@ hammer2_cluster_lookup(hammer2_cluster_t *cparent, hammer2_key_t *key_nextp, &key_next, key_beg, key_end, &cparent->array[i].cache_index, - flags, &ddflag); - if (cparent->focus == NULL) - cparent->focus = cparent->array[i].chain; + flags); cluster->array[i].chain = chain; if (chain == NULL) { ++null_count; } else { + int ddflag = (chain->bref.type == + HAMMER2_BREF_TYPE_INODE); + + /* + * Set default focus. + */ if (cluster->focus == NULL) { bref_type = chain->bref.type; bref_key = chain->bref.key; bref_keybits = chain->bref.keybits; bytes = chain->bytes; - *ddflagp = ddflag; + cluster->ddflag = ddflag; cluster->focus = chain; } + + /* + * Override default focus to follow the parent. + */ + if (cparent->focus == cparent->array[i].chain) + cluster->focus = chain; + KKASSERT(bref_type == chain->bref.type); KKASSERT(bref_key == chain->bref.key); KKASSERT(bref_keybits == chain->bref.keybits); KKASSERT(bytes == chain->bytes); - KKASSERT(*ddflagp == ddflag); + KKASSERT(cluster->ddflag == ddflag); } if (key_accum > key_next) key_accum = key_next; @@ -831,20 +881,28 @@ hammer2_cluster_next(hammer2_cluster_t *cparent, hammer2_cluster_t *cluster, hammer2_chain_t *chain; hammer2_key_t key_accum; hammer2_key_t key_next; + hammer2_key_t bref_key; int null_count; + int bref_keybits; int i; + uint8_t bref_type; + u_int bytes; key_accum = *key_nextp; null_count = 0; cluster->focus = NULL; cparent->focus = NULL; + bref_type = 0; + bref_key = 0; + bref_keybits = 0; + bytes = 0; + cluster->ddflag = 0; + for (i = 0; i < cparent->nchains; ++i) { key_next = *key_nextp; chain = cluster->array[i].chain; if (chain == NULL) { - if (cparent->focus == NULL) - cparent->focus = cparent->array[i].chain; ++null_count; continue; } @@ -860,17 +918,37 @@ hammer2_cluster_next(hammer2_cluster_t *cparent, hammer2_cluster_t *cluster, &key_next, key_beg, key_end, &cparent->array[i].cache_index, flags); - if (cparent->focus == NULL) - cparent->focus = cparent->array[i].chain; cluster->array[i].chain = chain; if (chain == NULL) { ++null_count; - } else if (cluster->focus == NULL) { - cluster->focus = chain; + } else { + int ddflag = (chain->bref.type == + HAMMER2_BREF_TYPE_INODE); + if (cluster->focus == NULL) { + bref_type = chain->bref.type; + bref_key = chain->bref.key; + bref_keybits = chain->bref.keybits; + bytes = chain->bytes; + cluster->ddflag = ddflag; + cluster->focus = chain; + } + + /* + * Override default focus to follow the parent. + */ + if (cparent->focus == cparent->array[i].chain) + cluster->focus = chain; + + KKASSERT(bref_type == chain->bref.type); + KKASSERT(bref_key == chain->bref.key); + KKASSERT(bref_keybits == chain->bref.keybits); + KKASSERT(bytes == chain->bytes); + KKASSERT(cluster->ddflag == ddflag); } if (key_accum > key_next) key_accum = key_next; } + cluster->nchains = i; if (null_count == i) { hammer2_cluster_drop(cluster); @@ -955,9 +1033,9 @@ hammer2_cluster_create(hammer2_trans_t *trans, hammer2_cluster_t *cparent, M_WAITOK | M_ZERO); cluster->pmp = pmp; /* can be NULL */ cluster->refs = 1; + cluster->flags = HAMMER2_CLUSTER_LOCKED; } cluster->focus = NULL; - cparent->focus = NULL; /* * NOTE: cluster->array[] entries can initially be NULL. If @@ -966,8 +1044,6 @@ hammer2_cluster_create(hammer2_trans_t *trans, hammer2_cluster_t *cparent, */ for (i = 0; i < cparent->nchains; ++i) { if (*clusterp && cluster->array[i].chain == NULL) { - if (cparent->focus == NULL) - cparent->focus = cparent->array[i].chain; continue; } error = hammer2_chain_create(trans, &cparent->array[i].chain, @@ -975,10 +1051,10 @@ hammer2_cluster_create(hammer2_trans_t *trans, hammer2_cluster_t *cparent, key, keybits, type, bytes, flags); KKASSERT(error == 0); - if (cparent->focus == NULL) - cparent->focus = cparent->array[i].chain; if (cluster->focus == NULL) cluster->focus = cluster->array[i].chain; + if (cparent->focus == cparent->array[i].chain) + cluster->focus = cluster->array[i].chain; } cluster->nchains = i; *clusterp = cluster; @@ -1186,6 +1262,7 @@ hammer2_cluster_parent(hammer2_cluster_t *cluster) int i; cparent = hammer2_cluster_copy(cluster); + for (i = 0; i < cparent->nchains; ++i) { hammer2_chain_t *chain; hammer2_chain_t *rchain; @@ -1212,6 +1289,8 @@ hammer2_cluster_parent(hammer2_cluster_t *cluster) cparent->array[i].chain = rchain; hammer2_chain_drop(chain); } + cparent->flags |= HAMMER2_CLUSTER_LOCKED; + return cparent; } @@ -1244,9 +1323,10 @@ hammer2_cluster_wdata(hammer2_cluster_t *cluster) } /* - * Load async into independent buffer - used to load logical buffers from - * underlying device data. The callback is made for the first validated - * data found, or NULL if no valid data is available. + * Load cluster data asynchronously with callback. + * + * The callback is made for the first validated data found, or NULL + * if no valid data is available. * * NOTE! The cluster structure is either unique or serialized (e.g. embedded * in the inode with an exclusive lock held), the chain structure may be @@ -1307,6 +1387,10 @@ hammer2_cluster_load_async(hammer2_cluster_t *cluster, * * The minimum physical IO size may be larger than the variable * block size. + * + * XXX TODO - handle HAMMER2_CHAIN_INITIAL for case where chain->bytes + * matches hammer2_devblksize()? Or does the freemap's + * pre-zeroing handle the case for us? */ bref = &chain->bref; hmp = chain->hmp; diff --git a/sys/vfs/hammer2/hammer2_freemap.c b/sys/vfs/hammer2/hammer2_freemap.c index 18da75767b..69f3c4183f 100644 --- a/sys/vfs/hammer2/hammer2_freemap.c +++ b/sys/vfs/hammer2/hammer2_freemap.c @@ -305,8 +305,6 @@ hammer2_freemap_try_alloc(hammer2_trans_t *trans, hammer2_chain_t **parentp, uint16_t class; int error = 0; int cache_index = -1; - int ddflag; - /* * Calculate the number of bytes being allocated, the number @@ -332,7 +330,7 @@ hammer2_freemap_try_alloc(hammer2_trans_t *trans, hammer2_chain_t **parentp, chain = hammer2_chain_lookup(parentp, &key_dummy, key, key + l1mask, &cache_index, HAMMER2_LOOKUP_ALWAYS | - HAMMER2_LOOKUP_MATCHIND, &ddflag); + HAMMER2_LOOKUP_MATCHIND); if (chain == NULL) { /* @@ -818,7 +816,6 @@ hammer2_freemap_adjust(hammer2_trans_t *trans, hammer2_dev_t *hmp, int modified = 0; int cache_index = -1; int error; - int ddflag; KKASSERT(how == HAMMER2_FREEMAP_DORECOVER); @@ -852,7 +849,7 @@ hammer2_freemap_adjust(hammer2_trans_t *trans, hammer2_dev_t *hmp, chain = hammer2_chain_lookup(&parent, &key_dummy, key, key + l1mask, &cache_index, HAMMER2_LOOKUP_ALWAYS | - HAMMER2_LOOKUP_MATCHIND, &ddflag); + HAMMER2_LOOKUP_MATCHIND); /* * Stop early if we are trying to free something but no leaf exists. diff --git a/sys/vfs/hammer2/hammer2_inode.c b/sys/vfs/hammer2/hammer2_inode.c index fc143a42ab..df577accbe 100644 --- a/sys/vfs/hammer2/hammer2_inode.c +++ b/sys/vfs/hammer2/hammer2_inode.c @@ -85,6 +85,12 @@ hammer2_inode_cmp(hammer2_inode_t *ip1, hammer2_inode_t *ip2) * * NOTE: Caller must not passed HAMMER2_RESOLVE_NOREF because we use it * internally and refs confusion will ensue. + * + * NOTE: If caller passes HAMMER2_RESOLVE_RDONLY the exclusive locking code + * will feel free to reduce the chain set in the cluster as an + * optimization. It will still be validated against the quorum if + * appropriate, but the optimization might be able to reduce data + * accesses to one node. */ hammer2_cluster_t * hammer2_inode_lock_ex(hammer2_inode_t *ip) @@ -109,7 +115,9 @@ hammer2_inode_lock_nex(hammer2_inode_t *ip, int how) * * The copy will not have a focus until it is locked. * - * We save the focused chain in our embedded ip->cluster for now XXX. + * Exclusive inode locks set the template focus chain in (ip) + * as a hint. Cluster locks can ALWAYS replace the focus in the + * working copy if the hint does not work out, so beware. */ cluster = hammer2_cluster_copy(&ip->cluster); hammer2_cluster_lock(cluster, how | HAMMER2_RESOLVE_NOREF); @@ -122,13 +130,13 @@ hammer2_inode_lock_nex(hammer2_inode_t *ip, int how) const hammer2_inode_data_t *ripdata; ripdata = &hammer2_cluster_rdata(cluster)->ipdata; KKASSERT(ripdata->type != HAMMER2_OBJTYPE_HARDLINK); - /* +#if 0 if (ripdata->type == HAMMER2_OBJTYPE_HARDLINK && (cluster->focus->flags & HAMMER2_CHAIN_DELETED) == 0) { - error = hammer2_hardlink_find(ip->pip, NULL, cluster); + error = hammer2_hardlink_find(ip->pip, NULL, &cluster); KKASSERT(error == 0); } - */ +#endif } return (cluster); } @@ -145,6 +153,10 @@ hammer2_inode_unlock_ex(hammer2_inode_t *ip, hammer2_cluster_t *cluster) /* * Standard shared inode lock always resolves the inode meta-data. * + * This type of inode lock may be used only when the overall operation is + * non-modifying. It will also optimize cluster accesses for non-modifying + * operations. + * * NOTE: We don't combine the inode/chain lock because putting away an * inode would otherwise confuse multiple lock holders of the inode. * @@ -168,11 +180,16 @@ hammer2_inode_lock_sh(hammer2_inode_t *ip) * a second ref to either when we lock it. * * The copy will not have a focus until it is locked. + * + * Chains available in the cluster may be reduced once a quorum is + * acquired, and can be reduced further as an optimization due to + * RDONLY being set. */ cluster = hammer2_cluster_copy(&ip->cluster); hammer2_cluster_lock(cluster, HAMMER2_RESOLVE_ALWAYS | HAMMER2_RESOLVE_SHARED | - HAMMER2_RESOLVE_NOREF); + HAMMER2_RESOLVE_NOREF | + HAMMER2_RESOLVE_RDONLY); /* do not update ip->cluster.focus on a shared inode lock! */ /*ip->cluster.focus = cluster->focus;*/ @@ -181,13 +198,13 @@ hammer2_inode_lock_sh(hammer2_inode_t *ip) */ ripdata = &hammer2_cluster_rdata(cluster)->ipdata; KKASSERT(ripdata->type != HAMMER2_OBJTYPE_HARDLINK); - /* +#if 0 if (ripdata->type == HAMMER2_OBJTYPE_HARDLINK && (cluster->focus->flags & HAMMER2_CHAIN_DELETED) == 0) { - error = hammer2_hardlink_find(ip->pip, NULL, cluster); + error = hammer2_hardlink_find(ip->pip, NULL, &cluster); KKASSERT(error == 0); } - */ +#endif return (cluster); } @@ -507,13 +524,15 @@ hammer2_igetv(hammer2_inode_t *ip, hammer2_cluster_t *cparent, int *errorp) * Returns the inode associated with the passed-in cluster, creating the * inode if necessary and synchronizing it to the passed-in cluster otherwise. * - * The passed-in chain must be locked and will remain locked on return. + * The passed-in cluster must be locked and will remain locked on return. * The returned inode will be locked and the caller may dispose of both * via hammer2_inode_unlock_ex(). However, if the caller needs to resolve * a hardlink it must ref/unlock/relock/drop the inode. * * The hammer2_inode structure regulates the interface between the high level * kernel VNOPS API and the filesystem backend (the chains). + * + * On return the inode is locked with the supplied cluster. */ hammer2_inode_t * hammer2_inode_get(hammer2_pfs_t *pmp, hammer2_inode_t *dip, @@ -554,6 +573,7 @@ again: continue; } hammer2_inode_repoint(nip, NULL, cluster); + return nip; } @@ -575,7 +595,6 @@ again: nip->cluster.pmp = pmp; nip->cluster.flags |= HAMMER2_CLUSTER_INODE; if (cluster) { - hammer2_cluster_replace(&nip->cluster, cluster); nipdata = &hammer2_cluster_rdata(cluster)->ipdata; nip->inum = nipdata->inum; nip->size = nipdata->size; @@ -657,7 +676,6 @@ hammer2_inode_create(hammer2_trans_t *trans, hammer2_inode_t *dip, uint32_t dip_mode; uint8_t dip_comp_algo; uint8_t dip_check_algo; - int ddflag; lhc = hammer2_dirhash(name, name_len); *errorp = 0; @@ -681,7 +699,7 @@ retry: error = 0; while (error == 0) { cluster = hammer2_cluster_lookup(cparent, &key_dummy, - lhc, lhc, 0, &ddflag); + lhc, lhc, 0); if (cluster == NULL) break; if ((lhc & HAMMER2_DIRHASH_VISIBLE) == 0) @@ -840,7 +858,6 @@ hammer2_hardlink_shiftup(hammer2_trans_t *trans, hammer2_cluster_t *cluster, hammer2_key_t key_dummy; hammer2_key_t lhc; hammer2_blockref_t bref; - int ddflag; iptmp = &hammer2_cluster_rdata(cluster)->ipdata; lhc = iptmp->inum; @@ -858,7 +875,7 @@ hammer2_hardlink_shiftup(hammer2_trans_t *trans, hammer2_cluster_t *cluster, */ *errorp = 0; xcluster = hammer2_cluster_lookup(dcluster, &key_dummy, - lhc, lhc, 0, &ddflag); + lhc, lhc, 0); if (xcluster) { kprintf("X3 chain %p dip %p dchain %p dip->chain %p\n", xcluster->focus, dip, dcluster->focus, @@ -935,7 +952,6 @@ hammer2_inode_connect(hammer2_trans_t *trans, hammer2_cluster_t *ocluster; hammer2_cluster_t *ncluster; hammer2_key_t key_dummy; - int ddflag; int error; /* @@ -963,8 +979,7 @@ hammer2_inode_connect(hammer2_trans_t *trans, error = 0; while (error == 0) { ncluster = hammer2_cluster_lookup(dcluster, &key_dummy, - lhc, lhc, - 0, &ddflag); + lhc, lhc, 0); if (ncluster == NULL) break; if ((lhc & HAMMER2_DIRHASH_LOMASK) == @@ -981,8 +996,7 @@ hammer2_inode_connect(hammer2_trans_t *trans, * unlinked-but-open files into the hidden directory). */ ncluster = hammer2_cluster_lookup(dcluster, &key_dummy, - lhc, lhc, - 0, &ddflag); + lhc, lhc, 0); KKASSERT(ncluster == NULL); } @@ -1109,7 +1123,7 @@ hammer2_inode_connect(hammer2_trans_t *trans, * Repoint ip->cluster's chains to cluster's chains and fixup the default * focus. * - * Caller must hold the inode exclusively locked and cluster, if not NULL, + * Caller must hold the inode and cluster exclusive locked, if not NULL, * must also be locked. * * Cluster may be NULL to clean out any chains in ip->cluster. @@ -1130,16 +1144,12 @@ hammer2_inode_repoint(hammer2_inode_t *ip, hammer2_inode_t *pip, * NOTE: nchain and/or ochain can be NULL due to gaps * in the cluster arrays. */ - ip->cluster.focus = NULL; for (i = 0; cluster && i < cluster->nchains; ++i) { nchain = cluster->array[i].chain; if (i < ip->cluster.nchains) { ochain = ip->cluster.array[i].chain; - if (ochain == nchain) { - if (ip->cluster.focus == NULL) - ip->cluster.focus = nchain; + if (ochain == nchain) continue; - } } else { ochain = NULL; } @@ -1148,8 +1158,6 @@ hammer2_inode_repoint(hammer2_inode_t *ip, hammer2_inode_t *pip, * Make adjustments */ ip->cluster.array[i].chain = nchain; - if (ip->cluster.focus == NULL) - ip->cluster.focus = nchain; if (nchain) hammer2_chain_ref(nchain); if (ochain) @@ -1167,7 +1175,20 @@ hammer2_inode_repoint(hammer2_inode_t *ip, hammer2_inode_t *pip, } ++i; } - ip->cluster.nchains = cluster ? cluster->nchains : 0; + + /* + * Fixup fields. Note that the inode-embedded cluster is never + * directly locked. + */ + if (cluster) { + ip->cluster.nchains = cluster->nchains; + ip->cluster.focus = cluster->focus; + ip->cluster.flags = cluster->flags & ~HAMMER2_CLUSTER_LOCKED; + } else { + ip->cluster.nchains = 0; + ip->cluster.focus = NULL; + ip->cluster.flags &= ~HAMMER2_CLUSTER_ZFLAGS; + } /* * Repoint ip->pip if requested (non-NULL pip). @@ -1223,7 +1244,6 @@ hammer2_unlink_file(hammer2_trans_t *trans, hammer2_inode_t *dip, hammer2_key_t key_next; hammer2_key_t lhc; int error; - int ddflag; int hlink; uint8_t type; @@ -1239,8 +1259,7 @@ again: */ cparent = hammer2_inode_lock_ex(dip); cluster = hammer2_cluster_lookup(cparent, &key_next, - lhc, lhc + HAMMER2_DIRHASH_LOMASK, - 0, &ddflag); + lhc, lhc + HAMMER2_DIRHASH_LOMASK, 0); while (cluster) { if (hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE) { ripdata = &hammer2_cluster_rdata(cluster)->ipdata; @@ -1297,7 +1316,7 @@ again: hammer2_cluster_unlock(cparent); cparent = NULL; /* safety */ ripdata = NULL; /* safety (associated w/cparent) */ - error = hammer2_hardlink_find(dip, &hparent, hcluster); + error = hammer2_hardlink_find(dip, &hparent, &hcluster); /* * If we couldn't find the hardlink target then some @@ -1332,8 +1351,7 @@ again: dparent = hammer2_cluster_lookup_init(cluster, 0); dcluster = hammer2_cluster_lookup(dparent, &key_dummy, 0, (hammer2_key_t)-1, - HAMMER2_LOOKUP_NODATA, - &ddflag); + HAMMER2_LOOKUP_NODATA); if (dcluster) { hammer2_cluster_unlock(dcluster); hammer2_cluster_lookup_done(dparent); @@ -1461,7 +1479,6 @@ hammer2_inode_install_hidden(hammer2_pfs_t *pmp) hammer2_inode_data_t *wipdata; hammer2_key_t key_dummy; hammer2_key_t key_next; - int ddflag; int error; int count; int dip_check_algo; @@ -1495,7 +1512,7 @@ hammer2_inode_install_hidden(hammer2_pfs_t *pmp) cluster = hammer2_cluster_lookup(cparent, &key_dummy, HAMMER2_INODE_HIDDENDIR, HAMMER2_INODE_HIDDENDIR, - 0, &ddflag); + 0); if (cluster) { pmp->ihidden = hammer2_inode_get(pmp, pmp->iroot, cluster); hammer2_inode_ref(pmp->ihidden); @@ -1509,8 +1526,7 @@ hammer2_inode_install_hidden(hammer2_pfs_t *pmp) */ count = 0; scan = hammer2_cluster_lookup(cluster, &key_next, - 0, HAMMER2_TID_MAX, - 0, &ddflag); + 0, HAMMER2_TID_MAX, 0); while (scan) { if (hammer2_cluster_type(scan) == HAMMER2_BREF_TYPE_INODE) { @@ -1776,47 +1792,47 @@ hammer2_hardlink_deconsolidate(hammer2_trans_t *trans, /* * The caller presents a locked cluster with an obj_type of - * HAMMER2_OBJTYPE_HARDLINK. This routine will locate and replace the - * cluster with the target hardlink, also locked. + * HAMMER2_OBJTYPE_HARDLINK in (*clusterp). This routine will locate + * the inode and replace (*clusterp) with a new locked cluster containing + * the target hardlink, also locked. The original cluster will be + * unlocked and released. * * If cparentp is not NULL a locked cluster representing the hardlink's * parent is also returned. * - * If we are unable to locate the hardlink target EIO is returned and - * (*cparentp) is set to NULL. The passed-in cluster still needs to be - * unlocked by the caller but will be degenerate... not have any chains. + * If we are unable to locate the hardlink target EIO is returned, + * (*cparentp) is set to NULL, the original passed-in (*clusterp) + * will be unlocked and released and (*clusterp) will be set to NULL + * as well. */ int hammer2_hardlink_find(hammer2_inode_t *dip, - hammer2_cluster_t **cparentp, hammer2_cluster_t *cluster) + hammer2_cluster_t **cparentp, + hammer2_cluster_t **clusterp) { const hammer2_inode_data_t *ipdata; + hammer2_cluster_t *cluster; hammer2_cluster_t *cparent; hammer2_cluster_t *rcluster; hammer2_inode_t *ip; hammer2_inode_t *pip; hammer2_key_t key_dummy; hammer2_key_t lhc; - int ddflag; + cluster = *clusterp; pip = dip; hammer2_inode_ref(pip); /* for loop */ /* * Locate the hardlink. pip is referenced and not locked. + * Unlock and release (*clusterp) after extracting the needed + * data. */ ipdata = &hammer2_cluster_rdata(cluster)->ipdata; lhc = ipdata->inum; - - /* - * We don't need the cluster's chains, but we need to retain the - * cluster structure itself so we can load the hardlink search - * result into it. - */ - KKASSERT(cluster->refs == 1); - atomic_add_int(&cluster->refs, 1); - hammer2_cluster_unlock(cluster); /* hack */ - cluster->nchains = 0; /* hack */ + ipdata = NULL; /* safety */ + hammer2_cluster_unlock(cluster); + *clusterp = NULL; /* safety */ rcluster = NULL; cparent = NULL; @@ -1827,7 +1843,7 @@ hammer2_hardlink_find(hammer2_inode_t *dip, KKASSERT(hammer2_cluster_type(cparent) == HAMMER2_BREF_TYPE_INODE); rcluster = hammer2_cluster_lookup(cparent, &key_dummy, - lhc, lhc, 0, &ddflag); + lhc, lhc, 0); if (rcluster) break; hammer2_cluster_lookup_done(cparent); /* discard parent */ @@ -1844,9 +1860,8 @@ hammer2_hardlink_find(hammer2_inode_t *dip, * * (cparent is already unlocked). */ + *clusterp = rcluster; if (rcluster) { - hammer2_cluster_replace(cluster, rcluster); - hammer2_cluster_drop(rcluster); if (cparentp) { *cparentp = cparent; hammer2_inode_unlock_ex(ip, NULL); @@ -1929,7 +1944,6 @@ hammer2_inode_fsync(hammer2_trans_t *trans, hammer2_inode_t *ip, hammer2_key_t lbase; hammer2_key_t key_next; int dosync = 0; - int ddflag; ripdata = &hammer2_cluster_rdata(cparent)->ipdata; /* target file */ @@ -1956,8 +1970,7 @@ hammer2_inode_fsync(hammer2_trans_t *trans, hammer2_inode_t *ip, dparent = hammer2_cluster_lookup_init(&ip->cluster, 0); cluster = hammer2_cluster_lookup(dparent, &key_next, lbase, (hammer2_key_t)-1, - HAMMER2_LOOKUP_NODATA, - &ddflag); + HAMMER2_LOOKUP_NODATA); while (cluster) { /* * Degenerate embedded case, nothing to loop on diff --git a/sys/vfs/hammer2/hammer2_iocom.c b/sys/vfs/hammer2/hammer2_iocom.c index 85e7ddfffa..17dc0b084f 100644 --- a/sys/vfs/hammer2/hammer2_iocom.c +++ b/sys/vfs/hammer2/hammer2_iocom.c @@ -290,7 +290,6 @@ hammer2_update_spans(hammer2_dev_t *hmp, kdmsg_state_t *state) hammer2_key_t key_next; kdmsg_msg_t *rmsg; size_t name_len; - int ddflag; /* * Lookup mount point under the media-localized super-root. @@ -303,7 +302,7 @@ hammer2_update_spans(hammer2_dev_t *hmp, kdmsg_state_t *state) cluster = hammer2_cluster_lookup(cparent, &key_next, HAMMER2_KEY_MIN, HAMMER2_KEY_MAX, - 0, &ddflag); + 0); while (cluster) { if (hammer2_cluster_type(cluster) != HAMMER2_BREF_TYPE_INODE) continue; diff --git a/sys/vfs/hammer2/hammer2_ioctl.c b/sys/vfs/hammer2/hammer2_ioctl.c index 4b7db2daca..1a8217a0e3 100644 --- a/sys/vfs/hammer2/hammer2_ioctl.c +++ b/sys/vfs/hammer2/hammer2_ioctl.c @@ -376,7 +376,6 @@ hammer2_ioctl_pfs_get(hammer2_inode_t *ip, void *data) hammer2_cluster_t *cluster; hammer2_key_t key_next; int error; - int ddflag; error = 0; hmp = ip->pmp->iroot->cluster.focus->hmp; /* XXX */ @@ -391,18 +390,18 @@ hammer2_ioctl_pfs_get(hammer2_inode_t *ip, void *data) if (pfs->name_key == 0) { cluster = hammer2_cluster_lookup(cparent, &key_next, 0, (hammer2_key_t)-1, - 0, &ddflag); + 0); } else if (pfs->name_key == (hammer2_key_t)-1) { ripdata = &hammer2_cluster_rdata(rcluster)->ipdata; cluster = hammer2_cluster_lookup(cparent, &key_next, ripdata->name_key, ripdata->name_key, - 0, &ddflag); + 0); ripdata = NULL; /* safety */ } else { cluster = hammer2_cluster_lookup(cparent, &key_next, pfs->name_key, pfs->name_key, - 0, &ddflag); + 0); } hammer2_inode_unlock_ex(ip->pmp->iroot, rcluster); @@ -467,7 +466,6 @@ hammer2_ioctl_pfs_lookup(hammer2_inode_t *ip, void *data) hammer2_key_t key_next; hammer2_key_t lhc; int error; - int ddflag; size_t len; error = 0; @@ -481,7 +479,7 @@ hammer2_ioctl_pfs_lookup(hammer2_inode_t *ip, void *data) cluster = hammer2_cluster_lookup(cparent, &key_next, lhc, lhc + HAMMER2_DIRHASH_LOMASK, - HAMMER2_LOOKUP_SHARED, &ddflag); + HAMMER2_LOOKUP_SHARED); while (cluster) { if (hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE) { ripdata = &hammer2_cluster_rdata(cluster)->ipdata; diff --git a/sys/vfs/hammer2/hammer2_syncthr.c b/sys/vfs/hammer2/hammer2_syncthr.c index f91a59b45e..d7032b2ccb 100644 --- a/sys/vfs/hammer2/hammer2_syncthr.c +++ b/sys/vfs/hammer2/hammer2_syncthr.c @@ -72,6 +72,11 @@ hammer2_syncthr_delete(hammer2_syncthr_t *thr) lockuninit(&thr->lk); } +/* + * Asynchronous remaster request. Ask the synchronization thread to + * start over soon (as if it were frozen and unfrozen, but without waiting). + * The thread always recalculates mastership relationships when restarting. + */ void hammer2_syncthr_remaster(hammer2_syncthr_t *thr) { @@ -109,7 +114,10 @@ hammer2_syncthr_unfreeze(hammer2_syncthr_t *thr) } /* - * Primary management thread + * Primary management thread. + * + * On the SPMP - handles bulkfree and dedup operations + * On a PFS - handles remastering and synchronization */ void hammer2_syncthr_primary(void *arg) @@ -133,6 +141,12 @@ hammer2_syncthr_primary(void *arg) lksleep(&thr->flags, &thr->lk, 0, "h2idle", 0); continue; } + + /* reset state on REMASTER request */ + if (thr->flags & HAMMER2_SYNCTHR_REMASTER) { + atomic_clear_int(&thr->flags, HAMMER2_SYNCTHR_REMASTER); + /* reset state */ + } lksleep(&thr->flags, &thr->lk, 0, "h2idle", 0); } thr->td = NULL; diff --git a/sys/vfs/hammer2/hammer2_vfsops.c b/sys/vfs/hammer2/hammer2_vfsops.c index b36b7fe635..8f13f1da65 100644 --- a/sys/vfs/hammer2/hammer2_vfsops.c +++ b/sys/vfs/hammer2/hammer2_vfsops.c @@ -493,13 +493,11 @@ hammer2_pfsfree_scan(hammer2_dev_t *hmp) { hammer2_pfs_t *pmp; hammer2_cluster_t *cluster; - hammer2_cluster_t *cparent; hammer2_chain_t *rchain; int didfreeze; int i; again: - cparent = NULL; TAILQ_FOREACH(pmp, &hammer2_pfslist, mntentry) { if (pmp->iroot == NULL) continue; @@ -508,7 +506,6 @@ again: hmp, pmp); hmp->spmp = NULL; } - cluster = &pmp->iroot->cluster; /* * Determine if this PFS is affected. If it is we must @@ -518,6 +515,7 @@ again: * in-progress will be aborted and it will have to start * over again when unfrozen, or exit if told to exit. */ + cluster = &pmp->iroot->cluster; for (i = 0; i < cluster->nchains; ++i) { rchain = cluster->array[i].chain; if (rchain == NULL || rchain->hmp != hmp) @@ -526,7 +524,8 @@ again: } if (i != cluster->nchains) { hammer2_syncthr_freeze(&pmp->primary_thr); - cparent = hammer2_inode_lock_ex(pmp->iroot); + + cluster = hammer2_inode_lock_ex(pmp->iroot); /* * Remove the chain from matching elements of the PFS. @@ -537,17 +536,20 @@ again: continue; cluster->array[i].chain = NULL; - hammer2_chain_drop(rchain); - cluster->focus = NULL; + hammer2_chain_unlock(rchain); + if (cluster->focus == rchain) + cluster->focus = NULL; } - hammer2_inode_unlock_ex(pmp->iroot, cparent); - didfreeze = 1; + hammer2_inode_repoint(pmp->iroot, NULL, cluster); + hammer2_inode_unlock_ex(pmp->iroot, cluster); + didfreeze = 1; /* remaster, unfreeze down below */ } else { didfreeze = 0; } /* * Cleanup trailing chains. Do not reorder chains (for now). + * XXX might remove more than we intended. */ while (i > 0) { if (cluster->array[i - 1].chain) @@ -626,7 +628,6 @@ hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data, int ronly = 1; int error; int cache_index; - int ddflag; int i; hmp = NULL; @@ -847,7 +848,7 @@ hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data, parent = hammer2_chain_lookup_init(&hmp->vchain, 0); schain = hammer2_chain_lookup(&parent, &key_dummy, HAMMER2_SROOT_KEY, HAMMER2_SROOT_KEY, - &cache_index, 0, &ddflag); + &cache_index, 0); hammer2_chain_lookup_done(parent); if (schain == NULL) { kprintf("hammer2_mount: invalid super-root\n"); @@ -870,6 +871,8 @@ hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data, * Replace the dummy spmp->iroot with a real one. It's * easier to just do a wholesale replacement than to try * to update the chain and fixup the iroot fields. + * + * The returned inode is locked with the supplied cluster. */ cluster = hammer2_cluster_from_chain(schain); hammer2_inode_drop(spmp->iroot); @@ -914,7 +917,7 @@ hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data, lhc = hammer2_dirhash(label, strlen(label)); cluster = hammer2_cluster_lookup(cparent, &key_next, lhc, lhc + HAMMER2_DIRHASH_LOMASK, - 0, &ddflag); + 0); while (cluster) { if (hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE && strcmp(label, @@ -1046,7 +1049,6 @@ hammer2_update_pmps(hammer2_dev_t *hmp) hammer2_pfs_t *spmp; hammer2_pfs_t *pmp; hammer2_key_t key_next; - int ddflag; /* * Lookup mount point under the media-localized super-root. @@ -1059,7 +1061,7 @@ hammer2_update_pmps(hammer2_dev_t *hmp) cluster = hammer2_cluster_lookup(cparent, &key_next, HAMMER2_KEY_MIN, HAMMER2_KEY_MAX, - 0, &ddflag); + 0); while (cluster) { if (hammer2_cluster_type(cluster) != HAMMER2_BREF_TYPE_INODE) continue; @@ -1091,7 +1093,7 @@ hammer2_write_thread(void *arg) struct vnode *vp; hammer2_inode_t *ip; hammer2_cluster_t *cparent; - hammer2_inode_data_t *wipdata; + const hammer2_inode_data_t *ripdata; hammer2_key_t lbase; int lblksize; int pblksize; @@ -1143,22 +1145,24 @@ hammer2_write_thread(void *arg) * NOTE: The inode_fsync() call only flushes the * inode's meta-data state, it doesn't try * to flush underlying buffers or chains. + * + * NOTE: hammer2_write_file_core() may indirectly + * modify and modsync the inode. */ cparent = hammer2_inode_lock_ex(ip); if (ip->flags & (HAMMER2_INODE_RESIZED | HAMMER2_INODE_MTIME)) { hammer2_inode_fsync(&trans, ip, cparent); } - wipdata = hammer2_cluster_modify_ip(&trans, ip, - cparent, 0); + ripdata = &hammer2_cluster_rdata(cparent)->ipdata; lblksize = hammer2_calc_logical(ip, bio->bio_offset, &lbase, NULL); - pblksize = hammer2_calc_physical(ip, wipdata, lbase); - hammer2_write_file_core(bp, &trans, ip, wipdata, + pblksize = hammer2_calc_physical(ip, ripdata, lbase); + hammer2_write_file_core(bp, &trans, ip, ripdata, cparent, lbase, IO_ASYNC, pblksize, &error); - hammer2_cluster_modsync(cparent); + /* ripdata can be invalid after call */ hammer2_inode_unlock_ex(ip, cparent); if (error) { kprintf("hammer2: error in buffer write\n"); @@ -1195,6 +1199,8 @@ hammer2_bioq_sync(hammer2_pfs_t *pmp) /* * Return a chain suitable for I/O, creating the chain if necessary * and assigning its physical block. + * + * cparent can wind up being anything. */ static hammer2_cluster_t * @@ -1206,7 +1212,6 @@ hammer2_assign_physical(hammer2_trans_t *trans, hammer2_cluster_t *dparent; hammer2_key_t key_dummy; int pradix = hammer2_getradix(pblksize); - int ddflag; /* * Locate the chain associated with lbase, return a locked chain. @@ -1220,7 +1225,7 @@ retry: dparent = hammer2_cluster_lookup_init(cparent, 0); cluster = hammer2_cluster_lookup(dparent, &key_dummy, lbase, lbase, - HAMMER2_LOOKUP_NODATA, &ddflag); + HAMMER2_LOOKUP_NODATA); if (cluster == NULL) { /* @@ -1282,8 +1287,6 @@ retry: */ hammer2_cluster_lookup_done(dparent); /* dparent = NULL; safety */ - if (cluster && ddflag) - hammer2_cluster_replace_locked(cparent, cluster); return (cluster); } @@ -1321,6 +1324,7 @@ hammer2_write_file_core(struct buf *bp, hammer2_trans_t *trans, errorp); hammer2_write_bp(cluster, bp, ioflag, pblksize, errorp, ripdata->check_algo); + /* ripdata can become invalid */ if (cluster) hammer2_cluster_unlock(cluster); break; @@ -1482,21 +1486,27 @@ hammer2_compress_and_write(struct buf *bp, hammer2_trans_t *trans, goto done; } - for (i = 0; i < cluster->nchains; ++i) { + if (cluster->ddflag) { hammer2_inode_data_t *wipdata; + + wipdata = hammer2_cluster_modify_ip(trans, ip, cluster, 0); + KKASSERT(wipdata->op_flags & HAMMER2_OPFLAG_DIRECTDATA); + KKASSERT(bp->b_loffset == 0); + bcopy(bp->b_data, wipdata->u.data, HAMMER2_EMBEDDED_BYTES); + hammer2_cluster_modsync(cluster); + } else + for (i = 0; i < cluster->nchains; ++i) { hammer2_io_t *dio; char *bdata; + /* XXX hackx */ + chain = cluster->array[i].chain; /* XXX */ KKASSERT(chain->flags & HAMMER2_CHAIN_MODIFIED); switch(chain->bref.type) { case HAMMER2_BREF_TYPE_INODE: - wipdata = &hammer2_chain_wdata(chain)->ipdata; - KKASSERT(wipdata->op_flags & HAMMER2_OPFLAG_DIRECTDATA); - KKASSERT(bp->b_loffset == 0); - bcopy(bp->b_data, wipdata->u.data, - HAMMER2_EMBEDDED_BYTES); + panic("hammer2_write_bp: unexpected inode\n"); break; case HAMMER2_BREF_TYPE_DATA: /* @@ -1598,11 +1608,13 @@ hammer2_zero_check_and_write(struct buf *bp, hammer2_trans_t *trans, if (test_block_zeros(bp->b_data, pblksize)) { zero_write(bp, trans, ip, ripdata, cparent, lbase, errorp); + /* ripdata can become invalid */ } else { cluster = hammer2_assign_physical(trans, ip, cparent, lbase, pblksize, errorp); hammer2_write_bp(cluster, bp, ioflag, pblksize, errorp, check_algo); + /* ripdata can become invalid */ if (cluster) hammer2_cluster_unlock(cluster); } @@ -1636,20 +1648,20 @@ zero_write(struct buf *bp, hammer2_trans_t *trans, hammer2_key_t lbase, int *errorp __unused) { hammer2_cluster_t *cluster; - hammer2_media_data_t *data; hammer2_key_t key_dummy; - int ddflag; cparent = hammer2_cluster_lookup_init(cparent, 0); cluster = hammer2_cluster_lookup(cparent, &key_dummy, lbase, lbase, - HAMMER2_LOOKUP_NODATA, &ddflag); + HAMMER2_LOOKUP_NODATA); if (cluster) { - data = hammer2_cluster_wdata(cluster); + if (cluster->ddflag) { + hammer2_inode_data_t *wipdata; - if (ddflag) { - KKASSERT(cluster->focus->flags & - HAMMER2_CHAIN_MODIFIED); - bzero(data->ipdata.u.data, HAMMER2_EMBEDDED_BYTES); + wipdata = hammer2_cluster_modify_ip(trans, ip, + cluster, 0); + KKASSERT(wipdata->op_flags & HAMMER2_OPFLAG_DIRECTDATA); + KKASSERT(bp->b_loffset == 0); + bzero(wipdata->u.data, HAMMER2_EMBEDDED_BYTES); hammer2_cluster_modsync(cluster); } else { hammer2_cluster_delete(trans, cparent, cluster, diff --git a/sys/vfs/hammer2/hammer2_vnops.c b/sys/vfs/hammer2/hammer2_vnops.c index e3fe502d18..991c7fc1e2 100644 --- a/sys/vfs/hammer2/hammer2_vnops.c +++ b/sys/vfs/hammer2/hammer2_vnops.c @@ -206,7 +206,8 @@ hammer2_vop_inactive(struct vop_inactive_args *ap) * the strategy code. Simply mark the inode modified so it gets * picked up by our normal flush. */ - cluster = hammer2_inode_lock_nex(ip, HAMMER2_RESOLVE_NEVER); + cluster = hammer2_inode_lock_nex(ip, HAMMER2_RESOLVE_NEVER | + HAMMER2_RESOLVE_RDONLY); KKASSERT(cluster); /* @@ -255,7 +256,8 @@ hammer2_vop_reclaim(struct vop_reclaim_args *ap) * Inode must be locked for reclaim. */ pmp = ip->pmp; - cluster = hammer2_inode_lock_nex(ip, HAMMER2_RESOLVE_NEVER); + cluster = hammer2_inode_lock_nex(ip, HAMMER2_RESOLVE_NEVER | + HAMMER2_RESOLVE_RDONLY); /* * The final close of a deleted file or directory marks it for @@ -639,7 +641,6 @@ hammer2_vop_readdir(struct vop_readdir_args *ap) int ncookies; int error; int dtype; - int ddflag; int r; LOCKSTART; @@ -735,11 +736,11 @@ hammer2_vop_readdir(struct vop_readdir_args *ap) goto done; } cluster = hammer2_cluster_lookup(cparent, &key_next, lkey, lkey, - HAMMER2_LOOKUP_SHARED, &ddflag); + HAMMER2_LOOKUP_SHARED); if (cluster == NULL) { cluster = hammer2_cluster_lookup(cparent, &key_next, lkey, (hammer2_key_t)-1, - HAMMER2_LOOKUP_SHARED, &ddflag); + HAMMER2_LOOKUP_SHARED); } if (cluster) hammer2_cluster_bref(cluster, &bref); @@ -1243,7 +1244,6 @@ hammer2_vop_nresolve(struct vop_nresolve_args *ap) const uint8_t *name; size_t name_len; int error = 0; - int ddflag; struct vnode *vp; LOCKSTART; @@ -1259,7 +1259,7 @@ hammer2_vop_nresolve(struct vop_nresolve_args *ap) cparent = hammer2_inode_lock_sh(dip); cluster = hammer2_cluster_lookup(cparent, &key_next, lhc, lhc + HAMMER2_DIRHASH_LOMASK, - HAMMER2_LOOKUP_SHARED, &ddflag); + HAMMER2_LOOKUP_SHARED); while (cluster) { if (hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE) { ripdata = &hammer2_cluster_rdata(cluster)->ipdata; @@ -1282,12 +1282,12 @@ hammer2_vop_nresolve(struct vop_nresolve_args *ap) ripdata = &hammer2_cluster_rdata(cluster)->ipdata; if (ripdata->type == HAMMER2_OBJTYPE_HARDLINK) { hammer2_tid_t inum = ripdata->inum; - error = hammer2_hardlink_find(dip, NULL, cluster); + error = hammer2_hardlink_find(dip, NULL, &cluster); if (error) { kprintf("hammer2: unable to find hardlink " "0x%016jx\n", inum); - hammer2_cluster_unlock(cluster); LOCKSTOP; + return error; } } @@ -2112,7 +2112,6 @@ hammer2_strategy_read(struct vop_strategy_args *ap) hammer2_cluster_t *cluster; hammer2_key_t key_dummy; hammer2_key_t lbase; - int ddflag; uint8_t btype; bio = ap->a_bio; @@ -2130,8 +2129,7 @@ hammer2_strategy_read(struct vop_strategy_args *ap) cluster = hammer2_cluster_lookup(cparent, &key_dummy, lbase, lbase, HAMMER2_LOOKUP_NODATA | - HAMMER2_LOOKUP_SHARED, - &ddflag); + HAMMER2_LOOKUP_SHARED); hammer2_inode_unlock_sh(ip, cparent); /* -- 2.41.0