From 46558838767cd04bc2d6886fc0bc326e4470dec3 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Wed, 6 Jun 2012 22:43:14 -0700 Subject: [PATCH] hammer2 - Initial CCMS locking tie-in This is a necessary precursor step to being able to integrate the cache state grants with our chain locks. Basically we are replacing the hammer2 chain lockmgr lock (hammer2_chain->lk) with a CCMS cst structure (hammer2_chain->cst). This structure will become the attribute CST for hammer2 inodes. The topological CST is built into the hammer2_inode. Data-space CSTs will initially be the hammer2_chain->cst for indirect blocks though we will probably also need one or more in hammer2_inode to handle generic casess. --- sys/vfs/hammer2/hammer2.h | 14 +- sys/vfs/hammer2/hammer2_ccms.c | 1231 ++++-------------------------- sys/vfs/hammer2/hammer2_ccms.h | 357 +++------ sys/vfs/hammer2/hammer2_chain.c | 104 +-- sys/vfs/hammer2/hammer2_subr.c | 10 +- sys/vfs/hammer2/hammer2_vfsops.c | 2 +- 6 files changed, 334 insertions(+), 1384 deletions(-) diff --git a/sys/vfs/hammer2/hammer2.h b/sys/vfs/hammer2/hammer2.h index fadb555ad2..f099fcf43f 100644 --- a/sys/vfs/hammer2/hammer2.h +++ b/sys/vfs/hammer2/hammer2.h @@ -54,6 +54,7 @@ #include #include #include +#include #include #include #include @@ -99,6 +100,7 @@ struct hammer2_pfsmount; SPLAY_HEAD(hammer2_chain_splay, hammer2_chain); struct hammer2_chain { + ccms_cst_t cst; /* attr or data cst */ struct hammer2_blockref bref; struct hammer2_blockref bref_flush; /* synchronized w/MOVED bit */ struct hammer2_chain *parent; /* return chain to root */ @@ -115,7 +117,6 @@ struct hammer2_chain { struct buf *bp; /* buffer cache (ro) */ hammer2_media_data_t *data; /* modified copy of data (rw) */ u_int bytes; /* physical size of data */ - struct lock lk; /* lockmgr lock */ int index; /* index in parent */ u_int refs; u_int busy; /* soft-busy */ @@ -221,13 +222,16 @@ SPLAY_PROTOTYPE(hammer2_chain_splay, hammer2_chain, snode, hammer2_chain_cmp); /* * A hammer2 inode. + * + * NOTE: The inode's attribute CST which is also used to lock the inode + * is embedded in the chain (chain.cst) and aliased w/ attr_cst. */ struct hammer2_inode { struct hammer2_mount *hmp; /* Global mount */ struct hammer2_pfsmount *pmp; /* PFS mount */ struct hammer2_inode *pip; /* parent inode */ struct vnode *vp; - ccms_inode_t *cino; /* cluster cache state */ + ccms_cst_t topo_cst; /* directory topology cst */ hammer2_chain_t chain; struct hammer2_inode_data ip_data; struct lockf advlock; @@ -238,6 +242,12 @@ struct hammer2_inode { typedef struct hammer2_inode hammer2_inode_t; +#if defined(_KERNEL) + +#define attr_cst chain.cst + +#endif + /* * A hammer2 indirect block */ diff --git a/sys/vfs/hammer2/hammer2_ccms.c b/sys/vfs/hammer2/hammer2_ccms.c index 70b3631bf5..4d3c8b4211 100644 --- a/sys/vfs/hammer2/hammer2_ccms.c +++ b/sys/vfs/hammer2/hammer2_ccms.c @@ -48,74 +48,7 @@ #include "hammer2_ccms.h" -struct ccms_lock_scan_info { - ccms_inode_t *cino; - ccms_lock_t *lock; - ccms_cst_t *coll_cst; - int rstate_upgrade_needed; -}; - -static int ccms_cst_cmp(ccms_cst_t *b1, ccms_cst_t *b2); -static int ccms_lock_scan_cmp(ccms_cst_t *b1, void *arg); - -static int ccms_lock_get_match(ccms_cst_t *cst, void *arg); -static int ccms_lock_undo_match(ccms_cst_t *cst, void *arg); -static int ccms_lock_redo_match(ccms_cst_t *cst, void *arg); -static int ccms_lock_upgrade_match(ccms_cst_t *cst, void *arg); -static int ccms_lock_put_match(ccms_cst_t *cst, void *arg); - -static void ccms_lstate_get(ccms_cst_t *cst, ccms_state_t state); -static void ccms_lstate_put(ccms_cst_t *cst); -static void ccms_rstate_get(ccms_cst_t *cst, ccms_state_t state); -static void ccms_rstate_put(ccms_cst_t *cst); - -struct ccms_rb_tree; -RB_GENERATE3(ccms_rb_tree, ccms_cst, rbnode, ccms_cst_cmp, - ccms_off_t, beg_offset, end_offset); -static MALLOC_DEFINE(M_CCMS, "CCMS", "Cache Coherency Management System"); - -static int ccms_debug = 0; - -/* - * These helpers are called to manage the CST cache so we can avoid - * unnecessary kmalloc()'s and kfree()'s in hot paths. - * - * ccms_free_pass1() must be called with the spinlock held. - * ccms_free_pass2() must be called with the spinlock not held. - */ -static __inline -ccms_cst_t * -ccms_free_pass1(ccms_inode_t *cino, int keep) -{ - ccms_cst_t *cst; - ccms_cst_t **cstp; - - cstp = &cino->free_cache; - while ((cst = *cstp) != NULL && keep) { - cstp = &cst->free_next; - --keep; - } - *cstp = NULL; - return (cst); -} - -static __inline -void -ccms_free_pass2(ccms_cst_t *next) -{ - ccms_cst_t *cst; - ccms_domain_t *dom; - - while ((cst = next) != NULL) { - next = cst->free_next; - cst->free_next = NULL; - - dom = cst->cino->domain; - atomic_add_int(&dom->cst_count, -1); - - kfree(cst, dom->mcst); - } -} +int ccms_debug = 0; /* * Initialize a new CCMS dataspace. Create a new RB tree with a single @@ -126,1098 +59,272 @@ void ccms_domain_init(ccms_domain_t *dom) { bzero(dom, sizeof(*dom)); - kmalloc_create(&dom->mcst, "CCMS-cst"); + /*kmalloc_create(&dom->mcst, "CCMS-cst");*/ /*dom->root.domain = dom;*/ } void ccms_domain_uninit(ccms_domain_t *dom) { - kmalloc_destroy(&dom->mcst); + /*kmalloc_destroy(&dom->mcst);*/ } -#if 0 -/* - * Initialize a ccms_inode for use. The inode will be initialized but - * is not yet connected to the rest of the topology. However, it can - * still be used stand-alone if desired without being connected to the - * topology. - */ void -ccms_inode_init(ccms_domain_t *dom, ccms_inode_t *cino, void *handle) +ccms_cst_init(ccms_cst_t *cst, void *handle) { - ccms_cst_t *cst; - - bzero(cino, sizeof(*cino)); - - spin_init(&cino->spin); - RB_INIT(&cino->tree); - cino->domain = dom; - cino->handle = handle; - /* cino->attr_cst.cino = cino; no rbtree association */ - cino->attr_cst.lstate = CCMS_STATE_INVALID; - cino->attr_cst.rstate = CCMS_STATE_INVALID; - - /* - * The dataspace must be initialized w/cache-state set to INVALID - * for the entire range. - */ - cst = kmalloc(sizeof(*cst), dom->mcst, M_WAITOK | M_ZERO); - cst->cino = cino; - cst->flags = CCMS_CST_DYNAMIC; - cst->beg_offset = 0; - cst->end_offset = 0xFFFFFFFFFFFFFFFFLLU; - cst->lstate = CCMS_STATE_INVALID; - cst->rstate = CCMS_STATE_INVALID; - RB_INSERT(ccms_rb_tree, &cino->tree, cst); - atomic_add_int(&dom->cst_count, 1); + bzero(cst, sizeof(*cst)); + cst->handle = handle; } -/* - * Associate the topology CST with a CCMS inode. The topology CST must - * be held locked (typically SHARED) by the caller. The caller is responsible - * for interlocking a unique ccms_inode to prevent SMP races. - */ void -ccms_inode_associate(ccms_inode_t *cino, ccms_cst_t *topo_cst) +ccms_cst_uninit(ccms_cst_t *cst) { - KKASSERT(topo_cst->tag.cino == NULL); - - spin_lock(&cino->spin); - topo_cst->tag.cino = cino; - topo_cst->flags |= CCMS_CST_INODE; - - cino->topo_cst = topo_cst; - cino->parent = topo_cst->cino; - cino->flags |= CCMS_INODE_INSERTED; - spin_unlock(&cino->spin); -} - -#if 0 - -int -ccms_lock_get(ccms_inode_t *cino, ccms_lock_t *lock) - - spin_lock(&cpar->spin); - spin_lock(&cino->spin); - - KKASSERT((cino->flags & CCMS_INODE_INSERTED) == 0); - cino->topo_cst.beg_offset = key; - cino->topo_cst.end_offset = key; - - if (RB_INSERT(ccms_rb_tree, &cpar->tree, &cino->topo_cst)) { - spin_unlock(&cino->spin); - spin_unlock(&cpar->spin); - panic("ccms_inode_insert: duplicate entry"); + KKASSERT(cst->count == 0); + if (cst->state != CCMS_STATE_INVALID) { + /* XXX */ } - cino->parent = cpar; - cino->flags |= CCMS_INODE_INSERTED; - spin_unlock(&cino->spin); - spin_unlock(&cpar->spin); + cst->handle = NULL; } -#endif - +#if 0 /* - * Delete an inode from the topology. The inode can remain in active use - * after the deletion (e.g. when unlinking a file which still has open - * descriptors) but it's topo_cst is removed from its parent. + * Acquire an operational CCMS lock on multiple CSTs. * - * If the caller is destroying the ccms_inode the caller must call - * ccms_inode_uninit() to invalidate the cache state (which can block). + * This code is in the critical path and highly streamlined. */ void -ccms_inode_disassociate(ccms_inode_t *cino) +ccms_lock_get(ccms_lock_t *lock) { - ccms_inode_t *cpar; - ccms_cst_t *topo_cst; - int flags; - - /* - * Interlock with the DELETING flag. - */ - spin_lock(&cino->spin); - flags = cino->flags; - cino->flags |= CCMS_INODE_DELETING; - spin_unlock(&cino->spin); - - if (flags & CCMS_INODE_DELETING) - return; - if ((flags & CCMS_INODE_INSERTED) == 0) - return; - - /* - * - */ - topo_cst = cino->topo_cst; + ccms_inode_t *cino = lock->cino; -ccms_lock_put(ccms_inode_t *cino, ccms_lock_t *lock) +again: + lock->flags &= ~CCMS_LOCK_FAILED; /* - * We have the interlock, we are the only ones who can delete - * the inode now. + * Acquire all local locks first, then resolve them against the + * remote cache state. Order is important here. */ - cpar = cino->parent; - spin_lock(&cpar->spin); - spin_lock(&cino->spin); - KKASSERT(cpar == cino->parent); - - cino->flags &= ~CCMS_INODE_INSERTED; - RB_REMOVE(ccms_rb_tree, &cpar->tree, &cino->topo_cst); - - spin_unlock(&cino->spin); - spin_unlock(&cpar->spin); -} - -/* - * The caller has removed the inode from the topology and is now trying - * to destroy the structure. This routine flushes the cache state and - * can block on third-party interactions. - * - * NOTE: Caller must have already destroyed any recursive inode state. - */ -void -ccms_inode_uninit(ccms_inode_t *cino) -{ - ccms_cst_t *scan; - - KKASSERT((cino->flags & CCMS_INODE_INSERTED) == 0); - spin_lock(&cino->spin); - - while ((scan = RB_ROOT(&cino->tree)) != NULL) { - KKASSERT(scan->flags & CCMS_CST_DYNAMIC); - KKASSERT((scan->flags & CCMS_CST_DELETING) == 0); - RB_REMOVE(ccms_rb_tree, &cino->tree, scan); - scan->flags |= CCMS_CST_DELETING; - scan->flags &= ~CCMS_CST_INSERTED; - spin_unlock(&cino->spin); - - /* - * Inval can be called without the inode spinlock because - * we own the DELETING flag. - */ - ccms_lstate_put(scan); - ccms_rstate_put(scan); - atomic_add_int(&cino->domain->cst_count, -1); - - kfree(scan, cino->domain->mcst); - spin_lock(&cino->spin); + if (lock->req_t) { + KKASSERT(lock->req_d <= lock->req_t); + KKASSERT(lock->req_a <= lock->req_t); + ccms_thread_lock(&cino->topo_cst, lock->req_t); } - KKASSERT((cino->attr_cst.flags & CCMS_CST_DELETING) == 0); - cino->attr_cst.flags |= CCMS_CST_DELETING; - KKASSERT((cino->topo_cst.flags & CCMS_CST_DELETING) == 0); - cino->topo_cst.flags |= CCMS_CST_DELETING; - spin_unlock(&cino->spin); - - /* - * Inval can be called without the inode spinlock because - * we own the DELETING flag. Similarly we can clear cino->domain - * and cino->handle because we own the DELETING flag on the cino. - */ - ccms_lstate_put(&cino->attr_cst); - ccms_rstate_put(&cino->attr_cst); - ccms_lstate_put(&cino->topo_cst); - ccms_rstate_put(&cino->topo_cst); + if (lock->req_a) + ccms_thread_lock(&cino->attr_cst, lock->req_a); + if (lock->req_d) + ccms_thread_lock(&cino->data_cst[0], lock->req_d); /* - * Clean out the ccms_inode free CST cache + * Once the local locks are established the CST grant state cannot + * be pulled out from under us. However, it is entirely possible + * to deadlock on it so when CST grant state cannot be obtained + * trivially we have to unwind our local locks, then get the state, + * and then loop. */ - spin_lock(&cino->spin); - scan = ccms_free_pass1(cino, 0); - spin_unlock(&cino->spin); - ccms_free_pass2(scan); - - cino->domain = NULL; - cino->handle = NULL; -} - -#endif - -/* - * This is the core CCMS lock acquisition code and is typically called - * by program-specific wrappers which initialize the lock structure. - * - * Three cache coherent domains can be obtained, the topological 't' - * domain, the attribute 'a' domain, and a range in the data 'd' domain. - * - * A topological CCMS lock covers the entire attribute and data domain - * plus recursively covers the entire directory sub-tree, so if a topo - * lock is requested the other 'a' and 'd' locks currently assert if - * specified in the same request. - * - * You can get both an 'a' and a 'd' lock at the same time and, in - * particular, a VFS can use the 'a' lock to also lock the related - * VFS inode structure if it desires to. HAMMER2 utilizes this feature. - * - * Topo locks are typically needed for rename operations and topo CST - * cache state on the backend can be used to limit the number of dynamic - * CST allocations backing the live CCMS locks. - */ -int -ccms_lock_get(ccms_inode_t *cino, ccms_lock_t *lock) -{ - struct ccms_lock_scan_info info; - ccms_cst_t *cst; - int use_redo = 0; - ccms_state_t highest_state; - - /* - * Live local locks prevent remotes from downgrading the rstate, - * so we have to acquire a local lock before testing rstate. If - * - * The local lock must be released if a remote upgrade is required - * to avoid a deadlock, and we retry in that situation. - */ -again: - if (lock->tstate) { - KKASSERT(lock->astate == 0 && lock->dstate == 0); - lock->icst = &cino->topo_cst; - ccms_lstate_get(lock->icst, lock->tstate); - - if (cino->topo_cst.rstate < lock->tstate) { - ccms_lstate_put(&cino->topo_cst); - ccms_rstate_get(&cino->topo_cst, lock->tstate); - goto again; - } - } else { - /* - * The topo rstate must be at least ALLOWED for us to be - * able to acquire any other cache state. If the topo - * rstate is already higher than that then we may have - * to upgrade it further to cover the lstate's we are - * requesting. - */ - highest_state = CCMS_STATE_ALLOWED; - if (cino->topo_cst.rstate > highest_state) { - if (highest_state < lock->astate) - highest_state = lock->astate; - if (highest_state < lock->dstate) - highest_state = lock->dstate; - } - if (cino->topo_cst.rstate < highest_state) - ccms_rstate_get(&cino->topo_cst, highest_state); - /* no need to retry */ + if (lock->req_t > cino->topo_cst.state) { + ccms_rstate_get(lock, &cino->topo_cst, lock->req_t); + } else if (cino->topo_cst.state == CCMS_STATE_INVALID) { + ccms_rstate_get(lock, &cino->topo_cst, CCMS_STATE_ALLOWED); + } else if (cino->topo_cst.state == CCMS_STATE_SHARED && + (lock->req_d > CCMS_STATE_SHARED || + lock->req_a > CCMS_STATE_SHARED)) { + ccms_rstate_get(lock, &cino->topo_cst, CCMS_STATE_ALLOWED); } - if (lock->astate) { - lock->icst = &cino->attr_cst; - ccms_lstate_get(lock->icst, lock->astate); - - if (cino->attr_cst.rstate < lock->astate) { - ccms_lstate_put(&cino->attr_cst); - if (lock->tstate) - ccms_lstate_put(&cino->topo_cst); - ccms_rstate_get(&cino->attr_cst, lock->astate); - goto again; - } - } - - /* - * The data-lock is a range-lock and requires a bit more code. - * The CST space is partitioned so the precise range is covered. - * - * Multiple CST's may be involved and dcst points to the left hand - * edge. - */ - if (lock->dstate) { - info.lock = lock; - info.cino = cino; - info.coll_cst = NULL; + /* else the rstate is compatible */ - spin_lock(&cino->spin); - - /* - * Make sure cino has enough free CSTs to cover the operation, - * so we can hold the spinlock through the scan later on. - */ - while (cino->free_cache == NULL || - cino->free_cache->free_next == NULL) { - spin_unlock(&cino->spin); - cst = kmalloc(sizeof(*cst), cino->domain->mcst, - M_WAITOK | M_ZERO); - atomic_add_int(&cino->domain->cst_count, 1); - spin_lock(&cino->spin); - cst->free_next = cino->free_cache; - cino->free_cache = cst; - } - - /* - * The partitioning code runs with the spinlock held. If - * we've already partitioned due to having to do an rstate - * upgrade we run a redo instead of a get. - */ - info.rstate_upgrade_needed = 0; - if (use_redo == 0) { - RB_SCAN(ccms_rb_tree, &cino->tree, ccms_lock_scan_cmp, - ccms_lock_get_match, &info); - } else { - RB_SCAN(ccms_rb_tree, &cino->tree, ccms_lock_scan_cmp, - ccms_lock_redo_match, &info); - } - - /* - * If a collision occured, undo the fragments we were able - * to obtain, block, and try again. - */ - while (info.coll_cst != NULL) { - RB_SCAN(ccms_rb_tree, &cino->tree, ccms_lock_scan_cmp, - ccms_lock_undo_match, &info); - info.coll_cst->blocked = 1; - info.coll_cst = NULL; - ssleep(info.coll_cst, &cino->spin, 0, "ccmsget", hz); - info.rstate_upgrade_needed = 0; - RB_SCAN(ccms_rb_tree, &cino->tree, ccms_lock_scan_cmp, - ccms_lock_redo_match, &info); - } - - /* - * If the rstate needs to be upgraded we have to undo the - * local locks (but we retain the partitioning). - * - * Set use_redo to indicate that the partioning was retained - * (i.e. lrefs and rrefs remain intact). - */ - if (info.rstate_upgrade_needed) { - RB_SCAN(ccms_rb_tree, &cino->tree, ccms_lock_scan_cmp, - ccms_lock_undo_match, &info); - spin_unlock(&cino->spin); - if (lock->astate) - ccms_lstate_put(&cino->attr_cst); - if (lock->tstate) - ccms_lstate_put(&cino->topo_cst); - spin_lock(&cino->spin); - RB_SCAN(ccms_rb_tree, &cino->tree, ccms_lock_scan_cmp, - ccms_lock_upgrade_match, &info); - spin_unlock(&cino->spin); - use_redo = 1; - goto again; - } + if (lock->req_a > cino->attr_cst.state) + ccms_rstate_get(lock, &cino->attr_cst, lock->req_a); - /* - * Cleanup free CSTs beyond the 2 we wish to retain. - */ - cst = ccms_free_pass1(cino, 2); - spin_unlock(&cino->spin); - ccms_free_pass2(cst); - } + if (lock->req_d > cino->data_cst[0].state) + ccms_rstate_get(lock, &cino->data_cst[0], lock->req_d); /* - * Ok, everything is in good shape EXCEPT we might not have - * sufficient topo_cst.rstate. It could have gotten ripped - * out from under us. Once we have the local locks it can - * no longer be downgraded so a check here suffices. + * If the ccms_rstate_get() code deadlocks (or even if it just + * blocks), it will release all local locks and set the FAILED + * bit. The routine will still acquire the requested remote grants + * before returning but since the local locks are lost at that + * point the remote grants are no longer protected and we have to + * retry. */ - highest_state = CCMS_STATE_ALLOWED; - if (highest_state < lock->tstate) - highest_state = lock->tstate; - if (highest_state < lock->astate) - highest_state = lock->astate; - if (highest_state < lock->dstate) - highest_state = lock->dstate; - - if (cino->topo_cst.rstate < highest_state) { - if (lock->dstate) { - spin_lock(&cino->spin); - RB_SCAN(ccms_rb_tree, &cino->tree, ccms_lock_scan_cmp, - ccms_lock_put_match, &info); - spin_unlock(&cino->spin); - } - if (lock->astate) - ccms_lstate_put(&cino->attr_cst); - if (lock->tstate) - ccms_lstate_put(&cino->topo_cst); - ccms_rstate_get(&cino->topo_cst, highest_state); - use_redo = 0; + if (lock->flags & CCMS_LOCK_FAILED) { goto again; } - return(0); } /* - * Obtain a CCMS lock, initialize the lock structure based on the uio. - * - * Both the attribute AND a ranged-data lock is acquired. + * Release a previously acquired CCMS lock. */ -int -ccms_lock_get_uio(ccms_inode_t *cino, ccms_lock_t *lock, struct uio *uio) +void +ccms_lock_put(ccms_lock_t *lock) { - ccms_state_t dstate; - ccms_off_t eoff; - - if (uio->uio_rw == UIO_READ) - dstate = CCMS_STATE_SHARED; - else - dstate = CCMS_STATE_MODIFIED; + ccms_inode_t *cino = lock->cino; - /* - * Calculate the ending offset (byte inclusive), make sure a seek - * overflow does not blow us up. - */ - eoff = uio->uio_offset + uio->uio_resid - 1; - if (eoff < uio->uio_offset) - eoff = 0x7FFFFFFFFFFFFFFFLL; - lock->beg_offset = uio->uio_offset; - lock->end_offset = eoff; - lock->tstate = 0; - lock->astate = dstate; - lock->dstate = dstate; - return (ccms_lock_get(cino, lock)); + if (lock->req_d) { + ccms_thread_unlock(&cino->data_cst[0]); + } + if (lock->req_a) { + ccms_thread_unlock(&cino->attr_cst); + } + if (lock->req_t) { + ccms_thread_unlock(&cino->topo_cst); + } } -/* - * Obtain a CCMS lock. Only the attribute lock is acquired. - */ -int -ccms_lock_get_attr(ccms_inode_t *cino, ccms_lock_t *lock, ccms_state_t astate) -{ - lock->tstate = 0; - lock->astate = astate; - lock->dstate = 0; - return (ccms_lock_get(cino, lock)); -} +#endif + +/************************************************************************ + * CST SUPPORT FUNCTIONS * + ************************************************************************/ /* - * Helper routine. - * - * NOTE: called with spinlock held. + * Acquire local cache state & lock. If the current thread already holds + * the lock exclusively we bump the exclusive count, even if the thread is + * trying to get a shared lock. */ -static -int -ccms_lock_get_match(ccms_cst_t *cst, void *arg) +void +ccms_thread_lock(ccms_cst_t *cst, ccms_state_t state) { - struct ccms_lock_scan_info *info = arg; - ccms_lock_t *lock = info->lock; - ccms_cst_t *ncst; - - /* - * If the lock's left edge is within the CST we must split the CST - * into two pieces [cst][ncst]. lrefs must be bumped on the CST - * containing the left edge. - * - * NOTE! cst->beg_offset may not be modified. This allows us to - * avoid having to manipulate the cst's position in the tree. - */ - if (lock->beg_offset > cst->beg_offset) { - ncst = info->cino->free_cache; - info->cino->free_cache = ncst->free_next; - ncst->free_next = NULL; - KKASSERT(ncst != NULL); - - *ncst = *cst; - cst->end_offset = lock->beg_offset - 1; - cst->rrefs = 0; - ncst->beg_offset = lock->beg_offset; - ncst->lrefs = 1; - RB_INSERT(ccms_rb_tree, &info->cino->tree, ncst); - - /* - * ncst becomes our 'matching' cst. - */ - cst = ncst; - } else if (lock->beg_offset == cst->beg_offset) { - ++cst->lrefs; - } - - /* - * If the lock's right edge is within the CST we must split the CST - * into two pieces [cst][ncst]. rrefs must be bumped on the CST - * containing the right edge. - * - * NOTE! cst->beg_offset may not be modified. This allows us to - * avoid having to manipulate the cst's position in the tree. - */ - if (lock->end_offset < cst->end_offset) { - ncst = info->cino->free_cache; - info->cino->free_cache = ncst->free_next; - ncst->free_next = NULL; - KKASSERT(ncst != NULL); - - *ncst = *cst; - cst->end_offset = lock->end_offset; - cst->rrefs = 1; - ncst->beg_offset = lock->end_offset + 1; - ncst->lrefs = 0; - RB_INSERT(ccms_rb_tree, &info->cino->tree, ncst); - /* cst remains our 'matching' cst */ - } else if (lock->end_offset == cst->end_offset) { - ++cst->rrefs; + if (cst->count < 0 && cst->td == curthread) { + --cst->count; + return; } - /* - * The lock covers the CST, so increment the CST's coverage count. - * Then attempt to obtain the shared/exclusive lock. The coverage - * count is maintained until the put operation. - */ - ++cst->xrefs; - if (cst->lstate < lock->dstate) - cst->lstate = lock->dstate; - - /* - * If we have already collided we make no more modifications - * to cst->count, but we must continue the scan to properly - * partition the cst. - */ - if (info->coll_cst) - return(0); - - switch(lock->dstate) { - case CCMS_STATE_INVALID: - break; - case CCMS_STATE_ALLOWED: - case CCMS_STATE_SHARED: - case CCMS_STATE_SLAVE: - if (cst->count < 0) { - info->coll_cst = cst; - } else { - ++cst->count; - if (ccms_debug >= 9) { - kprintf("CST SHARE %d %lld-%lld\n", - cst->count, - (long long)cst->beg_offset, - (long long)cst->end_offset); - } - } - break; - case CCMS_STATE_MASTER: - case CCMS_STATE_EXCLUSIVE: - if (cst->count != 0) { - info->coll_cst = cst; - } else { - --cst->count; - if (ccms_debug >= 9) { - kprintf("CST EXCLS %d %lld-%lld\n", - cst->count, - (long long)cst->beg_offset, - (long long)cst->end_offset); - } + spin_lock(&cst->spin); + if (state == CCMS_STATE_SHARED) { + while (cst->count < 0) { + cst->blocked = 1; + ssleep(cst, &cst->spin, 0, "ccmslck", hz); } - break; - case CCMS_STATE_MODIFIED: - if (cst->count != 0) { - info->coll_cst = cst; - } else { - --cst->count; - if (cst->lstate <= CCMS_STATE_EXCLUSIVE) - cst->lstate = CCMS_STATE_MODIFIED; - if (ccms_debug >= 9) { - kprintf("CST MODXL %d %lld-%lld\n", - cst->count, - (long long)cst->beg_offset, - (long long)cst->end_offset); - } + ++cst->count; + } else if (state == CCMS_STATE_EXCLUSIVE) { + while (cst->count != 0) { + cst->blocked = 1; + ssleep(cst, &cst->spin, 0, "ccmslck", hz); } - break; - default: - panic("ccms_lock_get_match: bad state %d\n", lock->dstate); - break; + cst->count = -1; + cst->td = curthread; + } else { + spin_unlock(&cst->spin); + panic("ccms_thread_lock: bad state %d\n", state); } - return(0); + spin_unlock(&cst->spin); } /* - * Undo a partially resolved ccms_ltype rangelock. This is atomic with - * the scan/redo code so there should not be any blocked locks when - * transitioning to 0. lrefs and rrefs are not touched in order to - * retain the partitioning. - * - * If coll_cst is non-NULL we stop when we hit this element as locks on - * no further elements were obtained. This element might not represent - * a left or right edge but coll_cst can only be non-NULL if the spinlock - * was held throughout the get/redo and the undo. - * - * NOTE: called with spinlock held. + * Same as ccms_thread_lock() but acquires the lock non-blocking. Returns + * 0 on success, EBUSY on failure. */ -static int -ccms_lock_undo_match(ccms_cst_t *cst, void *arg) +ccms_thread_lock_nonblock(ccms_cst_t *cst, ccms_state_t state) { - struct ccms_lock_scan_info *info = arg; - ccms_lock_t *lock = info->lock; - - if (cst == info->coll_cst) - return(-1); - - switch (lock->dstate) { - case CCMS_STATE_INVALID: - break; - case CCMS_STATE_ALLOWED: - case CCMS_STATE_SHARED: - case CCMS_STATE_SLAVE: - KKASSERT(cst->count > 0); + if (cst->count < 0 && cst->td == curthread) { --cst->count; - KKASSERT(cst->count || cst->blocked == 0); - break; - case CCMS_STATE_MASTER: - case CCMS_STATE_EXCLUSIVE: - case CCMS_STATE_MODIFIED: - KKASSERT(cst->count < 0); - ++cst->count; - KKASSERT(cst->count || cst->blocked == 0); - break; - default: - panic("ccms_lock_undo_match: bad state %d\n", lock->dstate); - break; + return(0); } - return(0); -} - -/* - * Redo the local lock request for a range which has already been - * partitioned. - * - * NOTE: called with spinlock held. - */ -static -int -ccms_lock_redo_match(ccms_cst_t *cst, void *arg) -{ - struct ccms_lock_scan_info *info = arg; - ccms_lock_t *lock = info->lock; - KKASSERT(info->coll_cst == NULL); - - switch(lock->dstate) { - case CCMS_STATE_INVALID: - break; - case CCMS_STATE_ALLOWED: - case CCMS_STATE_SHARED: - case CCMS_STATE_SLAVE: + spin_lock(&cst->spin); + if (state == CCMS_STATE_SHARED) { if (cst->count < 0) { - info->coll_cst = cst; - } else { - if (ccms_debug >= 9) { - kprintf("CST SHARE %d %lld-%lld\n", - cst->count, - (long long)cst->beg_offset, - (long long)cst->end_offset); - } - ++cst->count; + spin_unlock(&cst->spin); + return (EBUSY); } - break; - case CCMS_STATE_MASTER: - case CCMS_STATE_EXCLUSIVE: - if (cst->count != 0) { - info->coll_cst = cst; - } else { - --cst->count; - if (ccms_debug >= 9) { - kprintf("CST EXCLS %d %lld-%lld\n", - cst->count, - (long long)cst->beg_offset, - (long long)cst->end_offset); - } - } - break; - case CCMS_STATE_MODIFIED: + ++cst->count; + } else if (state == CCMS_STATE_EXCLUSIVE) { if (cst->count != 0) { - info->coll_cst = cst; - } else { - --cst->count; - if (ccms_debug >= 9) { - kprintf("CST MODXL %d %lld-%lld\n", - cst->count, - (long long)cst->beg_offset, - (long long)cst->end_offset); - } + spin_unlock(&cst->spin); + return (EBUSY); } - break; - default: - panic("ccms_lock_redo_match: bad state %d\n", lock->dstate); - break; - } - - if (info->coll_cst) - return(-1); /* stop the scan */ - return(0); /* continue the scan */ -} - -/* - * Upgrade the rstate for the matching range. - * - * NOTE: Called with spinlock held. - */ -static -int -ccms_lock_upgrade_match(ccms_cst_t *cst, void *arg) -{ - struct ccms_lock_scan_info *info = arg; - ccms_lock_t *lock = info->lock; - - /* - * ccms_rstate_get() can block so we must release the spinlock. - * To prevent the cst from getting ripped out on us we temporarily - * bump both lrefs and rrefs. - */ - if (cst->rstate < lock->dstate) { - ++cst->lrefs; - ++cst->rrefs; - spin_unlock(&info->cino->spin); - ccms_rstate_get(cst, lock->dstate); - spin_lock(&info->cino->spin); - --cst->lrefs; - --cst->rrefs; - } - return(0); -} - -/* - * Release a previously acquired CCMS lock. - */ -int -ccms_lock_put(ccms_inode_t *cino, ccms_lock_t *lock) -{ - struct ccms_lock_scan_info info; - ccms_cst_t *scan; - - if (lock->tstate) { - ccms_lstate_put(lock->icst); - lock->tstate = 0; - lock->icst = NULL; - } else if (lock->astate) { - ccms_lstate_put(lock->icst); - lock->astate = 0; - lock->icst = NULL; - } - - if (lock->dstate) { - info.lock = lock; - info.cino = cino; - spin_lock(&cino->spin); - RB_SCAN(ccms_rb_tree, &cino->tree, ccms_lock_scan_cmp, - ccms_lock_put_match, &info); - scan = ccms_free_pass1(cino, 2); - spin_unlock(&cino->spin); - ccms_free_pass2(scan); - lock->dstate = 0; - lock->dcst = NULL; + cst->count = -1; + cst->td = curthread; + } else { + spin_unlock(&cst->spin); + panic("ccms_thread_lock_nonblock: bad state %d\n", state); } - + spin_unlock(&cst->spin); return(0); } /* - * Release a local lock. The related CST's lstate is set to INVALID once - * the coverage drops to 0 and adjacent compatible entries will be - * recombined. - * - * NOTE: called with spinlock held. + * Release a local thread lock */ -static -int -ccms_lock_put_match(ccms_cst_t *cst, void *arg) +void +ccms_thread_unlock(ccms_cst_t *cst) { - struct ccms_lock_scan_info *info = arg; - ccms_lock_t *lock = info->lock; - ccms_cst_t *ocst; - - /* - * Undo the local shared/exclusive rangelock. - */ - switch(lock->dstate) { - case CCMS_STATE_INVALID: - break; - case CCMS_STATE_ALLOWED: - case CCMS_STATE_SHARED: - case CCMS_STATE_SLAVE: - KKASSERT(cst->count > 0); - --cst->count; - if (ccms_debug >= 9) { - kprintf("CST UNSHR %d %lld-%lld (%d)\n", cst->count, - (long long)cst->beg_offset, - (long long)cst->end_offset, - cst->blocked); + if (cst->count < 0) { + if (cst->count < -1) { + ++cst->count; + return; } - if (cst->blocked && cst->count == 0) { + spin_lock(&cst->spin); + KKASSERT(cst->count == -1); + cst->count = 0; + cst->td = NULL; + if (cst->blocked) { cst->blocked = 0; + spin_unlock(&cst->spin); wakeup(cst); + return; } - break; - case CCMS_STATE_MASTER: - case CCMS_STATE_EXCLUSIVE: - case CCMS_STATE_MODIFIED: - KKASSERT(cst->count < 0); - ++cst->count; - if (ccms_debug >= 9) { - kprintf("CST UNEXC %d %lld-%lld (%d)\n", cst->count, - (long long)cst->beg_offset, - (long long)cst->end_offset, - cst->blocked); - } - if (cst->blocked && cst->count == 0) { + spin_unlock(&cst->spin); + } else if (cst->count > 0) { + spin_lock(&cst->spin); + if (--cst->count == 0 && cst->blocked) { cst->blocked = 0; + spin_unlock(&cst->spin); wakeup(cst); + return; } - break; - default: - panic("ccms_lock_put_match: bad state %d\n", lock->dstate); - break; - } - - /* - * Decrement the lock coverage count on the CST. Decrement the left - * and right edge counts as appropriate. - * - * When lrefs or rrefs drops to zero we check the adjacent entry to - * determine whether a merge is possible. If the appropriate refs - * field (rrefs for the entry to our left, lrefs for the entry to - * our right) is 0, then all covering locks must cover both entries - * and the xrefs field must match. We can then merge the entries - * if they have compatible cache states. - * - * However, because we are cleaning up the shared/exclusive count - * at the same time, the count field may be temporarily out of - * sync, so require that the count field also match before doing - * a merge. - * - * When merging an element which is being blocked on, the blocking - * thread(s) will be woken up. - * - * If the dataspace has too many CSTs we may be able to merge the - * entries even if their cache states are not the same, by dropping - * both to a compatible (lower) cache state and performing the - * appropriate management operations. XXX - */ - if (--cst->xrefs == 0) - cst->lstate = CCMS_STATE_INVALID; - - if (lock->beg_offset == cst->beg_offset && --cst->lrefs == 0) { - if ((ocst = RB_PREV(ccms_rb_tree, - &info->cino->tree, cst)) != NULL && - ocst->rrefs == 0 && - ocst->lstate == cst->lstate && - ocst->rstate == cst->rstate && - ocst->count == cst->count - ) { - KKASSERT(ocst->xrefs == cst->xrefs); - KKASSERT(ocst->end_offset + 1 == cst->beg_offset); - RB_REMOVE(ccms_rb_tree, &info->cino->tree, ocst); - cst->beg_offset = ocst->beg_offset; - cst->lrefs = ocst->lrefs; - if (ccms_debug >= 9) { - kprintf("MERGELEFT %p %lld-%lld (%d)\n", - ocst, - (long long)cst->beg_offset, - (long long)cst->end_offset, - cst->blocked); - } - if (ocst->blocked) { - ocst->blocked = 0; - wakeup(ocst); - } - ocst->free_next = info->cino->free_cache; - info->cino->free_cache = ocst; - } - } - if (lock->end_offset == cst->end_offset && --cst->rrefs == 0) { - if ((ocst = RB_NEXT(ccms_rb_tree, - &info->cino->tree, cst)) != NULL && - ocst->lrefs == 0 && - ocst->lstate == cst->lstate && - ocst->rstate == cst->rstate && - ocst->count == cst->count - ) { - KKASSERT(ocst->xrefs == cst->xrefs); - KKASSERT(cst->end_offset + 1 == ocst->beg_offset); - RB_REMOVE(ccms_rb_tree, &info->cino->tree, ocst); - cst->end_offset = ocst->end_offset; - cst->rrefs = ocst->rrefs; - if (ccms_debug >= 9) { - kprintf("MERGERIGHT %p %lld-%lld\n", - ocst, - (long long)cst->beg_offset, - (long long)cst->end_offset); - } - ocst->free_next = info->cino->free_cache; - info->cino->free_cache = ocst; - } + spin_unlock(&cst->spin); + } else { + panic("ccms_thread_unlock: bad zero count\n"); } - return(0); } /* - * RB tree compare function for insertions and deletions. This function - * compares two CSTs. - */ -static int -ccms_cst_cmp(ccms_cst_t *b1, ccms_cst_t *b2) -{ - if (b1->end_offset < b2->beg_offset) - return(-1); - if (b1->beg_offset > b2->end_offset) - return(1); - return(0); -} - -/* - * RB tree scanning compare function. This function compares the CST - * from the tree against the supplied ccms_lock and returns the CST's - * placement relative to the lock. - */ -static int -ccms_lock_scan_cmp(ccms_cst_t *cst, void *arg) -{ - struct ccms_lock_scan_info *info = arg; - ccms_lock_t *lock = info->lock; - - if (cst->end_offset < lock->beg_offset) - return(-1); - if (cst->beg_offset > lock->end_offset) - return(1); - return(0); -} - -/************************************************************************ - * STANDALONE LSTATE AND RSTATE SUPPORT FUNCTIONS * - ************************************************************************ + * Release a local thread lock with special handling of the last lock + * reference. + * + * On the last lock reference the lock, if shared, will be upgraded to + * an exclusive lock and we return 0 without unlocking it. * - * These functions are used to perform work on the attr_cst and topo_cst - * embedded in a ccms_inode, and to issue remote state operations. These - * functions are called without the ccms_inode spinlock held. + * If more than one reference remains we drop the reference and return + * non-zero. */ - -static -void -ccms_lstate_get(ccms_cst_t *cst, ccms_state_t state) -{ - int blocked; - - spin_lock(&cst->cino->spin); - ++cst->xrefs; - - for (;;) { - blocked = 0; - - switch(state) { - case CCMS_STATE_INVALID: - break; - case CCMS_STATE_ALLOWED: - case CCMS_STATE_SHARED: - case CCMS_STATE_SLAVE: - if (cst->count < 0) { - blocked = 1; - } else { - ++cst->count; - if (ccms_debug >= 9) { - kprintf("CST SHARE %d %lld-%lld\n", - cst->count, - (long long)cst->beg_offset, - (long long)cst->end_offset); - } - } - break; - case CCMS_STATE_MASTER: - case CCMS_STATE_EXCLUSIVE: - if (cst->count != 0) { - blocked = 1; - } else { - --cst->count; - if (ccms_debug >= 9) { - kprintf("CST EXCLS %d %lld-%lld\n", - cst->count, - (long long)cst->beg_offset, - (long long)cst->end_offset); - } - } - break; - case CCMS_STATE_MODIFIED: - if (cst->count != 0) { - blocked = 1; - } else { - --cst->count; - if (cst->lstate <= CCMS_STATE_EXCLUSIVE) - cst->lstate = CCMS_STATE_MODIFIED; - if (ccms_debug >= 9) { - kprintf("CST MODXL %d %lld-%lld\n", - cst->count, - (long long)cst->beg_offset, - (long long)cst->end_offset); - } - } - break; - default: - panic("ccms_lock_get_match: bad state %d\n", state); - break; - } - if (blocked == 0) - break; - ssleep(cst, &cst->cino->spin, 0, "ccmslget", hz); - } - if (cst->lstate < state) - cst->lstate = state; - spin_unlock(&cst->cino->spin); -} - -static -void -ccms_lstate_put(ccms_cst_t *cst) +int +ccms_thread_unlock_zero(ccms_cst_t *cst) { - spin_lock(&cst->cino->spin); - - switch(cst->lstate) { - case CCMS_STATE_INVALID: - break; - case CCMS_STATE_ALLOWED: - case CCMS_STATE_SHARED: - case CCMS_STATE_SLAVE: - KKASSERT(cst->count > 0); - --cst->count; - if (ccms_debug >= 9) { - kprintf("CST UNSHR %d %lld-%lld (%d)\n", cst->count, - (long long)cst->beg_offset, - (long long)cst->end_offset, - cst->blocked); - } - if (cst->blocked && cst->count == 0) { - cst->blocked = 0; - wakeup(cst); - } - break; - case CCMS_STATE_MASTER: - case CCMS_STATE_EXCLUSIVE: - case CCMS_STATE_MODIFIED: - KKASSERT(cst->count < 0); + if (cst->count < 0) { + if (cst->count == -1) + return(0); ++cst->count; - if (ccms_debug >= 9) { - kprintf("CST UNEXC %d %lld-%lld (%d)\n", cst->count, - (long long)cst->beg_offset, - (long long)cst->end_offset, - cst->blocked); - } - if (cst->blocked && cst->count == 0) { - cst->blocked = 0; - wakeup(cst); + } else { + KKASSERT(cst->count > 0); + spin_lock(&cst->spin); + if (cst->count == 1) { + cst->count = -1; + cst->td = curthread; + spin_unlock(&cst->spin); + return(0); } - break; - default: - panic("ccms_lock_put_match: bad state %d\n", cst->lstate); - break; + --cst->count; + spin_unlock(&cst->spin); } - - if (--cst->xrefs == 0) - cst->lstate = CCMS_STATE_INVALID; - spin_unlock(&cst->cino->spin); + return(1); } +#if 0 /* - * XXX third-party interaction & granularity + * Acquire remote grant state. This routine can be used to upgrade or + * downgrade the state. If it blocks it will release any local locks + * acquired via (lock) but then it will continue getting the requested + * remote grant. */ static void -ccms_rstate_get(ccms_cst_t *cst, ccms_state_t state) +ccms_rstate_get(ccms_lock_t *lock, ccms_cst_t *cst, ccms_state_t state) { - spin_lock(&cst->cino->spin); - if (cst->rstate < state) - cst->rstate = state; - spin_unlock(&cst->cino->spin); + /* XXX */ + cst->state = state; } -/* - * XXX third-party interaction & granularity - */ -static -void -ccms_rstate_put(ccms_cst_t *cst) -{ - spin_lock(&cst->cino->spin); - cst->rstate = CCMS_STATE_INVALID; - spin_unlock(&cst->cino->spin); -} +#endif diff --git a/sys/vfs/hammer2/hammer2_ccms.h b/sys/vfs/hammer2/hammer2_ccms.h index c677d3643d..510f13a78b 100644 --- a/sys/vfs/hammer2/hammer2_ccms.h +++ b/sys/vfs/hammer2/hammer2_ccms.h @@ -31,74 +31,39 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ + /* - * This module is HAMMER2-independent. - * - * CCMS - Cache Coherency Management System. These structures are used - * to manage cache coherency and locking for an object. - * - * ccms_inode - * - * Cache coherency is tied into a kernel or VFS structure, creating a - * directory/file topology and a keyspace on an inode-by-inode basis - * via the (ccms_inode) structure. - * - * Each CCMS inode contains a RB-Tree holding ccms_cst (CST) elements - * for its file range or directory key range, plus two independent embedded - * ccms_cst structures representing the inode attributes and the entire - * recursive sub-tree. - * - * The CST representing the entire sub-tree is inclusive of that inode's - * attribute state and data/key range state AND inclusive of the entire - * filesystem topology under that point, recursively. - * - * Two ccms_cst's are embedded in each cached inode via the ccms_inode - * structure to represent attribute and recursive topological cache state. + * CCMS - Cache Coherency Management System. * - * ccms_cst + * This subsystem can be tied into a VFS in order to supply persistent + * cache management state for cluster or for remote cache-coherent operations. * - * The (ccms_cst) structure, called the CST, represents specific, persistent - * cache state. This structure is allocated and freed on the fly as needed - * (except for the two embedded in the ccms_inode). + * Local and cluster/remote cache state is maintained in a cache-coherent + * fashion as well as integrated into the VFS's inode locking subsystem + * (as a means of avoiding deadlocks). * - * The persistence ties into network/cluster operations via the 'rstate' - * field. When cluster-maintained state is present then certain operations - * on the CST's local state (including when a vnode is reclaimed) will - * block while third-party synchronization occurs. + * To operate properly the VFS must maintain a complete directory topology + * leading to any given vnode/inode either open or cached by the system. + * The vnode/namecache subsystem does not have to implement this but the + * VFS (aka HAMMER2) does. * - * The number of dynamically allocated CSTs is strictly limited, forcing - * a degree of aggregation when the limit is reached. + * The filesystem embeds CCMS_CST structures in its internal inode + * representatino as needed and implements callback to allow CCMS to + * do topological recursions. * - * ccms_lock + * -- * - * The (ccms_lock) structure represents a live local lock for the duration of - * any given filesystem operation. A single ccms_lock can cover both - * attribute state AND a byte-range/key-range. + * The CCMS_CST structures represent granted cache and local locking states. + * Grants can be recursively inherited, minimizing protocol overhead in + * situations where there are no conflicts of interest. * - * This lock represents the exact lock being requested but the CST structure - * it points to can be a more general representation which covers the lock. - * The minimum granularity for the cst pointer in the ccms_lock will be to - * the ccms_inode's embedded topo_cst. + * -- * - * Theoretically a single CST at the root can cover the entire filesystem, - * but this creates a great deal of SMP interaction. - * - * Management - * - * Because cache state is persistent the CCMS module may desire to limit the - * total number of CSTs under management. It does this by aggregating cache - * state which in turn may require callbacks to invalidate third-party - * (cluster-related) cache state. - * - * CCMS operations related to locks can stall on third-party state - * transitions. Because third-party state can also change independently - * due to foreign interactions (often with a userland program), no filesystem - * lock can be held while manipulating CST states. For this reason, - * HAMMER2 (or any VFS using CCMS) must provide roll-up functions to acquire - * CCMS lock state up-front prior to locking the VFS inode structure. - * - * vnode locks which are under the control of the filesystem can be more - * problematic and may require additional care. + * CCMS supports active front-end 'locks' on data objects utilizing the + * ccms_inode, key, and desired cache state. It can grant the lock based + * on inherited CST state and prevents downgrading of the CST by other + * parties or threads while the lock is held. The CST's arranged + * lock within the embedded CCMS_INODE and ref-counts the related CST. */ #ifndef _SYS_CCMS_H_ @@ -116,92 +81,73 @@ #ifndef _SYS_SPINLOCK_H_ #include #endif -#ifndef _SYS_TREE_H_ -#include -#endif -typedef uint64_t ccms_off_t; +typedef uint64_t ccms_key_t; +typedef uint64_t ccms_tid_t; typedef uint8_t ccms_state_t; +typedef uint8_t ccms_type_t; -/* - * CCMS uses a red-black tree to organize CSTs. - */ -RB_HEAD(ccms_rb_tree, ccms_cst); -RB_PROTOTYPE3(ccms_rb_tree, ccms_cst, rbnode, ccms_cst_cmp, ccms_off_t); - -struct ccms_inode; struct ccms_cst; struct ccms_lock; /* - * CCMS cache states + * CCMS_STATE_T - CCMS cache states. * - * CCMS uses an extended MESI caching model. There are two extension states, - * MASTER and SLAVE, which represents dirty data which has not been - * synchronized to backing store but which nevertheless is being shared - * between distinct caches. These states are designed to allow data - * to be shared between nodes in a cluster without having to wait for it - * to be synchronized with its backing store. + * INVALID - Cache state is unknown and must be acquired. * - * Each CST has lstate and rstate. lstate is the local cache state and rstate - * is the remotely-granted state. Changes to the lstate require a compatible - * rstate. If the rstate is not compatible a third-party transaction is - * required to obtain the proper rstate. + * ALLOWED - Cache state allows any recursive state to be acquired. * - * INVALID - Cache state is unknown and must be acquired. + * SHARED - Cache state allows shared access. If this is a topo_cst + * only INVALID or SHARED recursive states are allowed. * - * ALLOWED - (topo_cst.rstate only). This is a granted state which - * allows cache state transactions underneath the current - * node (data, attribute, and recursively), but is not a proper - * grant for topo_cst itself. Someone specifically trying to - * acquire topo_cst still needs to do a third party transaction - * to get the cache into the proper state. + * EXCLUSIVE - Cache state allows exclusive access. If this is a + * topo_cst then INVALID, SHARED, or EXCLUSIVE recursive + * state is allowed. * - * SHARED - Indicates that the information is clean, shared, read-only. + * CCMS Implements an extended MESI model. The extensions are implemented + * as CCMS_TYPE_T flags. + */ +#define CCMS_STATE_INVALID 0 /* unknown cache state */ +#define CCMS_STATE_ALLOWED 1 /* allow subsystem (topo only) */ +#define CCMS_STATE_SHARED 2 /* clean, shared, read-only */ +#define CCMS_STATE_EXCLUSIVE 3 /* clean, exclusive, read-only */ + +/* + * CCMS_TYPE_T FLAGS * - * SLAVE - Indicates that the information is clean, shared, read-only. - * Indicates that local backing store is out of date but the - * in-memory cache is valid, meaning that we can only obtain - * the data from the MASTER (somewhere in the cluster), and - * that we may not be allowed to sync it to local backing - * store yet e.g. due to the quorum protocol not having - * completed. + * INHERITED - Indicates the state field was inherited and was not directly + * granted by the cluster controller. * - * MASTER - Indicates that the information is dirty, but readonly - * because other nodes in the cluster are in a SLAVE state. - * This state is typically transitional and occurs while - * a quorum operation is in progress, allowing slaves to - * access the data without stalling. + * MODIFIED - This is a type-field flag associated with an EXCLUSIVE cache + * state * - * EXCLUSIVE - Indicates that the information is clean, read-only, and - * that nobody else can access the data while we are in this - * state. A local node can upgrade both rstate and lstate - * from EXCLUSIVE to MODIFIED without having to perform a - * third-party transaction. + * MASTER - This is a type-field flag associated with an EXCLUSIVE+MODIFIED + * cache state which indicates that slaves might be present + * which are caching our unsynchronized state. * - * MODIFIED - Indicates that the information is dirty, read-write, and - * that nobody else can access the data while we are in this - * state. + * SLAVE - This is a type-field flag associated with the SHARED cache + * state which indicates that the data present in our memory + * caches is being mastered elsewhere and has not been + * synchronized (meaning no quorum protocol has been run to + * sync the data yet). Thus only the version of the data in + * our memory and its originator is valid. * - * It is important to note that remote cache-state grants can be more - * general than what was requested, plus they can be persistent. So, - * for example, a remote can grant EXCLUSIVE access even if you just - * requested SHARED, which saves you from having to do another network - * transaction if you later need EXCLUSIVE. + * QSLAVE - This indicates that the slaved data is also present in the + * memory caches of a quorum of master nodes. */ - -#define CCMS_STATE_INVALID 0 /* unknown cache state */ -#define CCMS_STATE_ALLOWED 1 /* allow subsystem (topo only) */ -#define CCMS_STATE_SHARED 2 /* clean, shared, read-only */ -#define CCMS_STATE_SLAVE 3 /* live only, shared, read-only */ -#define CCMS_STATE_MASTER 4 /* dirty, shared, read-only */ -#define CCMS_STATE_EXCLUSIVE 5 /* clean, exclusive, read-only */ -#define CCMS_STATE_MODIFIED 6 /* dirty, exclusive, read-write */ +#define CCMS_TYPE_INHERITED 0x01 +#define CCMS_TYPE_MODIFIED 0x02 +#define CCMS_TYPE_MASTER 0x04 +#define CCMS_TYPE_SLAVE 0x08 +#define CCMS_TYPE_QSALVE 0x10 +#define CCMS_TYPE_RECURSIVE 0x80 /* - * A CCMS locking element - represents a high level locking request, - * such as used by read, write, and attribute operations. Initialize - * the ccms_lock structure and call ccms_lock_get(). + * CCMS_LOCK - High level active lock + * + * This represents a high level locking request, such as used by + * read, write, and attribute operations. Initialize the ccms_lock + * structure and call ccms_lock_get(). * * When a CCMS lock is established the cache state of the underlying elements * is adjusted to meet the requirements of the lock. The cache state @@ -211,126 +157,62 @@ struct ccms_lock; * * CCMS data locks imply a shared CCMS inode lock. A CCMS topology lock does * not imply a data or inode lock but topology locks can have far-reaching - * effects and block on numerous CST state. + * effects such as block ccms_locks on multiple inodes. */ struct ccms_lock { - ccms_state_t tstate; - ccms_state_t astate; - ccms_state_t dstate; - ccms_off_t beg_offset; /* applies to dstate */ - ccms_off_t end_offset; /* applies to dstate */ - struct ccms_cst *icst; /* points to topo_cst or attr_cst */ - struct ccms_cst *dcst; /* points to left edge in rbtree */ -#ifdef CCMS_DEBUG TAILQ_ENTRY(ccms_lock) entry; -#endif + ccms_state_t req_t; + ccms_state_t req_a; + ccms_state_t req_d; + uint8_t flags; + struct ccms_cst *topo_cst; + struct ccms_cst *attr_cst; + struct ccms_cst *data_cst; + ccms_key_t key_beg; /* applies to dstate */ + ccms_key_t key_end; /* applies to dstate */ }; -#ifdef CCMS_DEBUG - -TAILQ_HEAD(ccms_lock_head, ccms_lock); - -#endif +#define CCMS_LOCK_FAILED 0x01 /* - * CCMS cache state tree element (CST) - represents the actual cache - * management state for a data space. The cache state tree is a - * non-overlaping red-black tree containing ranged ccms_cst structures - * which reflect the resolved state for all current high level locking - * requests. For example, two overlapping ccms_lock requests for shared - * access would typically be represented by three non-overlapping ccms_cst - * items in the CST. The CST item representing the overlapped portion of - * the ccms_lock requests would have ref count of 2 while the other CST - * items would have a ref count of 1. - * - * [lock request #01] - * [lock request #02] - * [--cst--][--cst--][--cst--] + * CCMS_CST - Low level locking state, persistent cache state * - * CSTs are partitioned so their edges line up to all current and pending - * ccms_lock requests. CSTs are re-merged whenever possible. A freshly - * initialized database typically has a single CST representing the default - * cache state for the host. - * - * A CST keeps track of local cache state (lstate) AND remote cache state - * (rstate). - * - * Any arbitrary data range within a dataspace can be locked shared or - * exclusive. Obtaining a lock has the side effect of potentially modifying - * the cache state. A positive sharecount in a CST indicates that a - * shared access lock is being held. A negative sharecount indicates an - * exclusive access lock is being held on the range. A MODIFYING lock - * type is just an exclusive lock but one which effects the cache state - * differently. - * - * The end offset is byte-inclusive, allowing the entire 64 bit data space + * Offset ranges are byte-inclusive, allowing the entire 64 bit data space * to be represented without overflowing the edge case. For example, a * 64 byte area might be represented as (0,63). The offsets are UNSIGNED * entities. - */ -struct ccms_cst { - RB_ENTRY(ccms_cst) rbnode; /* stored in a red-black tree */ - struct ccms_cst *free_next; /* free cache linked list */ - struct ccms_inode *cino; /* related ccms_inode */ - ccms_off_t beg_offset; /* range (inclusive) */ - ccms_off_t end_offset; /* range (inclusive) */ - ccms_state_t lstate; /* local cache state */ - ccms_state_t rstate; /* cache state granted by protocol */ - - int32_t flags; - int32_t count; /* shared/exclusive count */ - int32_t blocked; /* indicates a blocked lock request */ - int32_t xrefs; /* lock overlap references */ - int32_t lrefs; /* left edge refs */ - int32_t rrefs; /* right edge refs */ -#ifdef CCMS_DEBUG - struct ccms_lock_head list; -#endif -}; - -#define CCMS_CST_DYNAMIC 0x00000001 -#define CCMS_CST_DELETING 0x00000002 -#define CCMS_CST_INSERTED 0x00000004 -#define CCMS_CST_INHERITED 0x00000008 /* rstate inherited from par */ - -/* - * A CCMS inode is typically embedded in a VFS file or directory object. - * - * The subdirectory topology is accessible downward by indexing topo_cst's - * from the children in the parent's cst_tree. * - * attr_cst is independent of data-range CSTs. However, adjustments to - * the topo_cst can have far-reaching effects to attr_cst, the CSTs in - * the tree, recursively both downward and upward. + * count - negative value indicates active exclusive lock, positive value + * indicates active shared lock. */ -struct ccms_inode { - struct spinlock spin; - struct ccms_inode *parent; - struct ccms_rb_tree tree; - struct ccms_cst attr_cst; - struct ccms_cst topo_cst; - struct ccms_cst *free_cache; /* cst free cache */ - struct ccms_domain *domain; - void *handle; /* VFS opaque */ - int32_t flags; +struct ccms_cst { + struct spinlock spin; /* thread spinlock */ + void *handle; /* opaque VFS handle */ + ccms_state_t state; /* granted or inherited state */ + ccms_type_t type; /* CST type and flags */ + uint8_t unused02; + uint8_t unused03; + + ccms_tid_t path_id; /* rendezvous inode id */ + ccms_tid_t tid; /* [meta]data versioning id */ + ccms_key_t key_beg; /* key range (inclusive) */ + ccms_key_t key_end; /* key range (inclusive) */ + + int32_t count; /* active shared/exclusive count */ + int32_t blocked; /* wakeup blocked on release */ + thread_t td; /* if excl lock (count < 0) */ }; -#define CCMS_INODE_INSERTED 0x0001 -#define CCMS_INODE_DELETING 0x0002 - /* * Domain management, contains a pseudo-root for the CCMS topology. */ struct ccms_domain { - struct malloc_type *mcst; /* malloc space for cst's */ - struct ccms_inode root; /* dummy protocol root */ int cst_count; /* dynamic cst count */ int cst_limit; /* dynamic cst limit */ }; typedef struct ccms_lock ccms_lock_t; typedef struct ccms_cst ccms_cst_t; -typedef struct ccms_inode ccms_inode_t; typedef struct ccms_domain ccms_domain_t; /* @@ -338,33 +220,18 @@ typedef struct ccms_domain ccms_domain_t; */ #ifdef _KERNEL -/* - * Helper inline to initialize primarily a dstate lock which shortcuts - * the more common locking operations. A dstate is specified and an - * astate is implied. tstate locks cannot be acquired with this inline. - */ -static __inline -void -ccms_lock_init(ccms_lock_t *lock, ccms_state_t dstate, - ccms_off_t beg_offset, ccms_off_t end_offset) -{ - lock->beg_offset = beg_offset; - lock->end_offset = end_offset; - lock->tstate = 0; - lock->astate = 0; - lock->dstate = dstate; -} - void ccms_domain_init(ccms_domain_t *dom); -void ccms_inode_init(ccms_domain_t *dom, ccms_inode_t *cino, void *handle); -void ccms_inode_insert(ccms_inode_t *cpar, ccms_inode_t *cino); -void ccms_inode_delete(ccms_inode_t *cino); -void ccms_inode_uninit(ccms_inode_t *cino); +void ccms_domain_uninit(ccms_domain_t *dom); +void ccms_cst_init(ccms_cst_t *cst, void *handle); +void ccms_cst_uninit(ccms_cst_t *cst); + +void ccms_thread_lock(ccms_cst_t *cst, ccms_state_t state); +int ccms_thread_lock_nonblock(ccms_cst_t *cst, ccms_state_t state); +void ccms_thread_unlock(ccms_cst_t *cst); +int ccms_thread_unlock_zero(ccms_cst_t *cst); -int ccms_lock_get(ccms_inode_t *cino, ccms_lock_t *lock); -int ccms_lock_get_uio(ccms_inode_t *cino, ccms_lock_t *lock, struct uio *uio); -int ccms_lock_get_attr(ccms_inode_t *cino, ccms_lock_t *lock, ccms_state_t st); -int ccms_lock_put(ccms_inode_t *cino, ccms_lock_t *lock); +void ccms_lock_get(ccms_lock_t *lock); +void ccms_lock_put(ccms_lock_t *lock); #endif diff --git a/sys/vfs/hammer2/hammer2_chain.c b/sys/vfs/hammer2/hammer2_chain.c index 5921a7865d..7a193a12dc 100644 --- a/sys/vfs/hammer2/hammer2_chain.c +++ b/sys/vfs/hammer2/hammer2_chain.c @@ -111,20 +111,17 @@ hammer2_chain_alloc(hammer2_mount_t *hmp, hammer2_blockref_t *bref) ip = kmalloc(sizeof(*ip), hmp->minode, M_WAITOK | M_ZERO); chain = &ip->chain; chain->u.ip = ip; - lockinit(&chain->lk, "inode", 0, LK_CANRECURSE); ip->hmp = hmp; break; case HAMMER2_BREF_TYPE_INDIRECT: np = kmalloc(sizeof(*np), hmp->mchain, M_WAITOK | M_ZERO); chain = &np->chain; chain->u.np = np; - lockinit(&chain->lk, "iblk", 0, LK_CANRECURSE); break; case HAMMER2_BREF_TYPE_DATA: dp = kmalloc(sizeof(*dp), hmp->mchain, M_WAITOK | M_ZERO); chain = &dp->chain; chain->u.dp = dp; - lockinit(&chain->lk, "dblk", 0, LK_CANRECURSE); break; case HAMMER2_BREF_TYPE_VOLUME: chain = NULL; @@ -146,7 +143,8 @@ hammer2_chain_alloc(hammer2_mount_t *hmp, hammer2_blockref_t *bref) chain->index = -1; /* not yet assigned */ chain->refs = 1; chain->bytes = bytes; - lockmgr(&chain->lk, LK_EXCLUSIVE); + ccms_cst_init(&chain->cst, chain); + ccms_thread_lock(&chain->cst, CCMS_STATE_EXCLUSIVE); return (chain); } @@ -217,8 +215,10 @@ hammer2_chain_drop(hammer2_mount_t *hmp, hammer2_chain_t *chain) if (refs == 1) { KKASSERT(chain != &hmp->vchain); parent = chain->parent; - if (parent) - lockmgr(&parent->lk, LK_EXCLUSIVE); + if (parent) { + ccms_thread_lock(&parent->cst, + CCMS_STATE_EXCLUSIVE); + } if (atomic_cmpset_int(&chain->refs, 1, 0)) { /* * Succeeded, recurse and drop parent. @@ -239,13 +239,6 @@ hammer2_chain_drop(hammer2_mount_t *hmp, hammer2_chain_t *chain) * Delete interlock */ if (!(chain->flags & HAMMER2_CHAIN_DELETED)) { - /* - * Disconnect the CCMS inode if this - * was an inode. - */ - if (ip && ip->cino) - ccms_inode_delete(ip->cino); - /* * Disconnect the chain and clear * pip if it was an inode. @@ -260,22 +253,20 @@ hammer2_chain_drop(hammer2_mount_t *hmp, hammer2_chain_t *chain) } /* - * Destroy the disconnected ccms_inode if - * applicable. + * When cleaning out a hammer2_inode we must + * also clean out the related ccms_inode. */ - if (ip && ip->cino) { - ccms_inode_destroy(ip->cino); - ip->cino = NULL; - } + if (ip) + ccms_cst_uninit(&ip->topo_cst); chain->parent = NULL; if (parent) - lockmgr(&parent->lk, LK_RELEASE); + ccms_thread_unlock(&parent->cst); hammer2_chain_free(hmp, chain); chain = parent; /* recurse on parent */ } else { if (parent) - lockmgr(&parent->lk, LK_RELEASE); + ccms_thread_unlock(&parent->cst); /* retry the same chain */ } } else { @@ -345,7 +336,7 @@ hammer2_chain_lock(hammer2_mount_t *hmp, hammer2_chain_t *chain, int how) */ KKASSERT(chain->refs > 0); atomic_add_int(&chain->refs, 1); - lockmgr(&chain->lk, LK_EXCLUSIVE); + ccms_thread_lock(&chain->cst, CCMS_STATE_EXCLUSIVE); /* * If we already have a valid data pointer no further action is @@ -485,14 +476,15 @@ hammer2_chain_unlock(hammer2_mount_t *hmp, hammer2_chain_t *chain) long *counterp; /* - * Undo a recursive lock + * Release the CST lock but with a special 1->0 transition case. * - * XXX shared locks not handled properly + * Returns non-zero if lock references remain. When zero is + * returned the last lock reference is retained and any shared + * lock is upgraded to an exclusive lock for final disposition. */ - if (lockcountnb(&chain->lk) > 1) { + if (ccms_thread_unlock_zero(&chain->cst)) { KKASSERT(chain->refs > 1); atomic_add_int(&chain->refs, -1); - lockmgr(&chain->lk, LK_RELEASE); return; } @@ -506,7 +498,7 @@ hammer2_chain_unlock(hammer2_mount_t *hmp, hammer2_chain_t *chain) */ if (chain->bp == NULL) { atomic_clear_int(&chain->flags, HAMMER2_CHAIN_DIRTYBP); - lockmgr(&chain->lk, LK_RELEASE); + ccms_thread_unlock(&chain->cst); hammer2_chain_drop(hmp, chain); return; } @@ -590,7 +582,7 @@ hammer2_chain_unlock(hammer2_mount_t *hmp, hammer2_chain_t *chain) } } chain->bp = NULL; - lockmgr(&chain->lk, LK_RELEASE); + ccms_thread_unlock(&chain->cst); hammer2_chain_drop(hmp, chain); } @@ -947,9 +939,9 @@ hammer2_chain_get(hammer2_mount_t *hmp, hammer2_chain_t *parent, int index, int flags) { hammer2_blockref_t *bref; + hammer2_inode_t *ip; hammer2_chain_t *chain; hammer2_chain_t dummy; - ccms_cst_t *cst; int how; /* @@ -961,11 +953,6 @@ hammer2_chain_get(hammer2_mount_t *hmp, hammer2_chain_t *parent, else how = HAMMER2_RESOLVE_MAYBE; - /* - * Resolve cache state XXX - */ - cst = NULL; - /* * First see if we have a (possibly modified) chain element cached * for this (parent, index). Acquire the data if necessary. @@ -1043,20 +1030,18 @@ hammer2_chain_get(hammer2_mount_t *hmp, hammer2_chain_t *parent, * Additional linkage for inodes. Reuse the parent pointer to * find the parent directory. * - * The CCMS for the pfs-root is initialized from the mount code, - * this chain_get, or chain_create, when the pmp is assigned and - * non-NULL. No CCMS is initialized here for the super-root and - * the CCMS for the PFS root is initialized in the mount code. + * The ccms_inode is initialized from its parent directory. The + * chain of ccms_inode's is seeded by the mount code. */ if (bref->type == HAMMER2_BREF_TYPE_INODE) { + ip = chain->u.ip; while (parent->bref.type == HAMMER2_BREF_TYPE_INDIRECT) parent = parent->parent; if (parent->bref.type == HAMMER2_BREF_TYPE_INODE) { - chain->u.ip->pip = parent->u.ip; - chain->u.ip->pmp = parent->u.ip->pmp; - chain->u.ip->depth = parent->u.ip->depth + 1; - if (cst) - chain->u.ip->cino = cst->tag.cino; + ip->pip = parent->u.ip; + ip->pmp = parent->u.ip->pmp; + ip->depth = parent->u.ip->depth + 1; + ccms_cst_init(&ip->topo_cst, &ip->chain); } } @@ -1071,7 +1056,7 @@ hammer2_chain_get(hammer2_mount_t *hmp, hammer2_chain_t *parent, hammer2_chain_lock(hmp, chain, how); /* recusive lock */ hammer2_chain_drop(hmp, chain); /* excess ref */ } - lockmgr(&chain->lk, LK_RELEASE); /* from alloc */ + ccms_thread_unlock(&chain->cst); /* from alloc */ return (chain); } @@ -1463,12 +1448,6 @@ hammer2_chain_create(hammer2_mount_t *hmp, hammer2_chain_t *parent, int allocated = 0; int count; int i; - ccms_cst_t *cst; - - /* - * Resolve cache state - */ - cst = NULL; if (chain == NULL) { /* @@ -1627,10 +1606,8 @@ again: * Cumulative adjustments are inherited on [re]attach and will * propagate up the tree on the next flush. * - * The CCMS for the pfs-root is initialized from the mount code, - * this chain_get, or chain_create, when the pmp is assigned and - * non-NULL. No CCMS is initialized here for the super-root and - * the CCMS for the PFS root is initialized in the mount code. + * The ccms_inode is initialized from its parent directory. The + * chain of ccms_inode's is seeded by the mount code. */ if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) { hammer2_chain_t *scan = parent; @@ -1645,9 +1622,7 @@ again: ip->pip->delta_icount += ip->ip_data.inode_count; ip->pip->delta_dcount += ip->ip_data.data_count; ++ip->pip->delta_icount; - - if (cst) - ip->cino = cst->tag.cino; + ccms_cst_init(&ip->topo_cst, &ip->chain); } } @@ -2146,11 +2121,6 @@ hammer2_chain_delete(hammer2_mount_t *hmp, hammer2_chain_t *parent, * Cumulative adjustments must be propagated to the parent inode * when deleting and synchronized to ip. * - * The CCMS is deleted when pip is NULL'd out, here and also in - * chain_drop(). The CCMS is uninitialized when the pmp is NULL'd - * out (if it was non-NULL). This is interlocked by the - * HAMMER2_CHAIN_DELETED flag to prevent reentrancy. - * * NOTE: We do not propagate ip->delta_*count to the parent because * these represent adjustments that have not yet been * propagated upward, so we don't need to remove them from @@ -2161,8 +2131,6 @@ hammer2_chain_delete(hammer2_mount_t *hmp, hammer2_chain_t *parent, if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) { ip = chain->u.ip; if (ip->pip) { - ccms_inode_delete(ip->cino); - ip->pip->delta_icount -= ip->ip_data.inode_count; ip->pip->delta_dcount -= ip->ip_data.data_count; ip->ip_data.inode_count += ip->delta_icount; @@ -2801,11 +2769,10 @@ hammer2_chain_flush(hammer2_mount_t *hmp, hammer2_chain_t *chain, } /* - * We are locking backwards so allow the lock to fail + * We are locking backwards so allow the lock to fail. */ - if (lockmgr(&parent->lk, LK_EXCLUSIVE | LK_NOWAIT) != 0) { + if (ccms_thread_lock_nonblock(&parent->cst, CCMS_STATE_EXCLUSIVE)) return; - } /* * We are updating brefs but we have to call chain_modify() @@ -2871,7 +2838,6 @@ hammer2_chain_flush(hammer2_mount_t *hmp, hammer2_chain_t *chain, sizeof(chain->bref)) != 0) { panic("hammer2: unflagged bref update(2)"); } - - lockmgr(&parent->lk, LK_RELEASE); /* release manual lockmgr op */ + ccms_thread_unlock(&parent->cst); /* release manual op */ hammer2_chain_unlock(hmp, parent); } diff --git a/sys/vfs/hammer2/hammer2_subr.c b/sys/vfs/hammer2/hammer2_subr.c index c356bb59d0..af88da35d2 100644 --- a/sys/vfs/hammer2/hammer2_subr.c +++ b/sys/vfs/hammer2/hammer2_subr.c @@ -77,13 +77,13 @@ void hammer2_inode_lock_sh(hammer2_inode_t *ip) { KKASSERT(ip->chain.refs > 0); - lockmgr(&ip->chain.lk, LK_SHARED); + ccms_thread_lock(&ip->chain.cst, CCMS_STATE_SHARED); } void hammer2_inode_unlock_sh(hammer2_inode_t *ip) { - lockmgr(&ip->chain.lk, LK_RELEASE); + ccms_thread_unlock(&ip->chain.cst); } /* @@ -113,19 +113,19 @@ hammer2_inode_unbusy(hammer2_inode_t *ip) void hammer2_mount_exlock(hammer2_mount_t *hmp) { - lockmgr(&hmp->vchain.lk, LK_EXCLUSIVE); + ccms_thread_lock(&hmp->vchain.cst, CCMS_STATE_EXCLUSIVE); } void hammer2_mount_shlock(hammer2_mount_t *hmp) { - lockmgr(&hmp->vchain.lk, LK_SHARED); + ccms_thread_lock(&hmp->vchain.cst, CCMS_STATE_SHARED); } void hammer2_mount_unlock(hammer2_mount_t *hmp) { - lockmgr(&hmp->vchain.lk, LK_RELEASE); + ccms_thread_unlock(&hmp->vchain.cst); } void diff --git a/sys/vfs/hammer2/hammer2_vfsops.c b/sys/vfs/hammer2/hammer2_vfsops.c index bc17dbe6d0..3d6147fa4d 100644 --- a/sys/vfs/hammer2/hammer2_vfsops.c +++ b/sys/vfs/hammer2/hammer2_vfsops.c @@ -371,8 +371,8 @@ hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data, hmp->vchain.bref.type = HAMMER2_BREF_TYPE_VOLUME; hmp->vchain.bref.data_off = 0 | HAMMER2_PBUFRADIX; hmp->vchain.bref_flush = hmp->vchain.bref; + ccms_cst_init(&hmp->vchain.cst, NULL); /* hmp->vchain.u.xxx is left NULL */ - lockinit(&hmp->vchain.lk, "volume", 0, LK_CANRECURSE); lockinit(&hmp->alloclk, "h2alloc", 0, 0); lockinit(&hmp->voldatalk, "voldata", 0, LK_CANRECURSE); -- 2.41.0