SYSCTL_INT(_debug, OID_AUTO, ncposlimit, CTLFLAG_RW, &ncposlimit, 0,
"Number of cache entries allocated");
+static int ncp_shared_lock_disable = 1;
+SYSCTL_INT(_debug, OID_AUTO, ncp_shared_lock_disable, CTLFLAG_RW,
+ &ncp_shared_lock_disable, 0, "Disable shared namecache locks");
+
SYSCTL_INT(_debug, OID_AUTO, vnsize, CTLFLAG_RD, 0, sizeof(struct vnode),
"sizeof(struct vnode)");
SYSCTL_INT(_debug, OID_AUTO, ncsize, CTLFLAG_RD, 0, sizeof(struct namecache),
* vnode association state changes by other threads, and prevents the
* namecache entry from being resolved or unresolved by other threads.
*
- * The lock owner has full authority to associate/disassociate vnodes
- * and resolve/unresolve the locked ncp.
+ * An exclusive lock owner has full authority to associate/disassociate
+ * vnodes and resolve/unresolve the locked ncp.
+ *
+ * A shared lock owner only has authority to acquire the underlying vnode,
+ * if any.
*
- * The primary lock field is nc_exlocks. nc_locktd is set after the
+ * The primary lock field is nc_lockstatus. nc_locktd is set after the
* fact (when locking) or cleared prior to unlocking.
*
* WARNING! Holding a locked ncp will prevent a vnode from being destroyed
* way the refs counter is handled). Or, alternatively, make an
* unconditional call to cache_validate() or cache_resolve()
* after cache_lock() returns.
- *
- * MPSAFE
*/
static
void
td = curthread;
for (;;) {
- count = ncp->nc_exlocks;
+ count = ncp->nc_lockstatus;
+ cpu_ccfence();
- if (count == 0) {
- if (atomic_cmpset_int(&ncp->nc_exlocks, 0, 1)) {
+ if ((count & ~(NC_EXLOCK_REQ|NC_SHLOCK_REQ)) == 0) {
+ if (atomic_cmpset_int(&ncp->nc_lockstatus,
+ count, count + 1)) {
/*
* The vp associated with a locked ncp must
* be held to prevent it from being recycled.
*/
ncp->nc_locktd = td;
if (ncp->nc_vp)
- vhold(ncp->nc_vp); /* MPSAFE */
+ vhold(ncp->nc_vp);
break;
}
/* cmpset failed */
continue;
}
if (ncp->nc_locktd == td) {
- if (atomic_cmpset_int(&ncp->nc_exlocks, count,
- count + 1)) {
+ KKASSERT((count & NC_SHLOCK_FLAG) == 0);
+ if (atomic_cmpset_int(&ncp->nc_lockstatus,
+ count, count + 1)) {
break;
}
/* cmpset failed */
continue;
}
- tsleep_interlock(ncp, 0);
- if (atomic_cmpset_int(&ncp->nc_exlocks, count,
+ tsleep_interlock(&ncp->nc_locktd, 0);
+ if (atomic_cmpset_int(&ncp->nc_lockstatus, count,
count | NC_EXLOCK_REQ) == 0) {
/* cmpset failed */
continue;
}
- error = tsleep(ncp, PINTERLOCKED, "clock", nclockwarn);
+ error = tsleep(&ncp->nc_locktd, PINTERLOCKED,
+ "clock", nclockwarn);
if (error == EWOULDBLOCK) {
if (didwarn == 0) {
didwarn = ticks;
- kprintf("[diagnostic] cache_lock: blocked "
- "on %p",
- ncp);
+ kprintf("[diagnostic] cache_lock: "
+ "blocked on %p %08x",
+ ncp, count);
kprintf(" \"%*.*s\"\n",
ncp->nc_nlen, ncp->nc_nlen,
ncp->nc_name);
}
}
+ /* loop */
}
if (didwarn) {
kprintf("[diagnostic] cache_lock: unblocked %*.*s after "
}
/*
+ * The shared lock works similarly to the exclusive lock except
+ * nc_locktd is left NULL and we need an interlock (VHOLD) to
+ * prevent vhold() races, since the moment our cmpset_int succeeds
+ * another cpu can come in and get its own shared lock.
+ *
+ * A critical section is needed to prevent interruption during the
+ * VHOLD interlock.
+ */
+static
+void
+_cache_lock_shared(struct namecache *ncp)
+{
+ int didwarn;
+ int error;
+ u_int count;
+
+ KKASSERT(ncp->nc_refs != 0);
+ didwarn = 0;
+
+ for (;;) {
+ count = ncp->nc_lockstatus;
+ cpu_ccfence();
+
+ if ((count & ~NC_SHLOCK_REQ) == 0) {
+ crit_enter();
+ if (atomic_cmpset_int(&ncp->nc_lockstatus,
+ count,
+ (count + 1) | NC_SHLOCK_FLAG |
+ NC_SHLOCK_VHOLD)) {
+ /*
+ * The vp associated with a locked ncp must
+ * be held to prevent it from being recycled.
+ *
+ * WARNING! If VRECLAIMED is set the vnode
+ * could already be in the middle of a recycle.
+ * Callers must use cache_vref() or
+ * cache_vget() on the locked ncp to
+ * validate the vp or set the cache entry
+ * to unresolved.
+ *
+ * NOTE! vhold() is allowed if we hold a
+ * lock on the ncp (which we do).
+ */
+ if (ncp->nc_vp)
+ vhold(ncp->nc_vp);
+ atomic_clear_int(&ncp->nc_lockstatus,
+ NC_SHLOCK_VHOLD);
+ crit_exit();
+ break;
+ }
+ /* cmpset failed */
+ crit_exit();
+ continue;
+ }
+
+ /*
+ * If already held shared we can just bump the count, but
+ * only allow this if nobody is trying to get the lock
+ * exclusively.
+ *
+ * VHOLD is a bit of a hack. Even though we successfully
+ * added another shared ref, the cpu that got the first
+ * shared ref might not yet have held the vnode.
+ */
+ if ((count & (NC_EXLOCK_REQ|NC_SHLOCK_FLAG)) ==
+ NC_SHLOCK_FLAG) {
+ KKASSERT((count & ~(NC_EXLOCK_REQ |
+ NC_SHLOCK_REQ |
+ NC_SHLOCK_FLAG)) > 0);
+ if (atomic_cmpset_int(&ncp->nc_lockstatus,
+ count, count + 1)) {
+ while (ncp->nc_lockstatus & NC_SHLOCK_VHOLD)
+ cpu_pause();
+ break;
+ }
+ continue;
+ }
+ tsleep_interlock(ncp, 0);
+ if (atomic_cmpset_int(&ncp->nc_lockstatus, count,
+ count | NC_SHLOCK_REQ) == 0) {
+ /* cmpset failed */
+ continue;
+ }
+ error = tsleep(ncp, PINTERLOCKED, "clocksh", nclockwarn);
+ if (error == EWOULDBLOCK) {
+ if (didwarn == 0) {
+ didwarn = ticks;
+ kprintf("[diagnostic] cache_lock_shared: "
+ "blocked on %p %08x",
+ ncp, count);
+ kprintf(" \"%*.*s\"\n",
+ ncp->nc_nlen, ncp->nc_nlen,
+ ncp->nc_name);
+ }
+ }
+ /* loop */
+ }
+ if (didwarn) {
+ kprintf("[diagnostic] cache_lock_shared: "
+ "unblocked %*.*s after %d secs\n",
+ ncp->nc_nlen, ncp->nc_nlen, ncp->nc_name,
+ (int)(ticks - didwarn) / hz);
+ }
+}
+
+/*
* NOTE: nc_refs may be zero if the ncp is interlocked by circumstance,
* such as the case where one of its children is locked.
- *
- * MPSAFE
*/
static
int
td = curthread;
for (;;) {
- count = ncp->nc_exlocks;
+ count = ncp->nc_lockstatus;
- if (count == 0) {
- if (atomic_cmpset_int(&ncp->nc_exlocks, 0, 1)) {
+ if ((count & ~(NC_EXLOCK_REQ|NC_SHLOCK_REQ)) == 0) {
+ if (atomic_cmpset_int(&ncp->nc_lockstatus,
+ count, count + 1)) {
/*
* The vp associated with a locked ncp must
* be held to prevent it from being recycled.
*/
ncp->nc_locktd = td;
if (ncp->nc_vp)
- vhold(ncp->nc_vp); /* MPSAFE */
+ vhold(ncp->nc_vp);
break;
}
/* cmpset failed */
continue;
}
if (ncp->nc_locktd == td) {
- if (atomic_cmpset_int(&ncp->nc_exlocks, count,
- count + 1)) {
+ if (atomic_cmpset_int(&ncp->nc_lockstatus,
+ count, count + 1)) {
break;
}
/* cmpset failed */
}
/*
+ * The shared lock works similarly to the exclusive lock except
+ * nc_locktd is left NULL and we need an interlock (VHOLD) to
+ * prevent vhold() races, since the moment our cmpset_int succeeds
+ * another cpu can come in and get its own shared lock.
+ *
+ * A critical section is needed to prevent interruption during the
+ * VHOLD interlock.
+ */
+static
+int
+_cache_lock_shared_nonblock(struct namecache *ncp)
+{
+ u_int count;
+
+ for (;;) {
+ count = ncp->nc_lockstatus;
+
+ if ((count & ~NC_SHLOCK_REQ) == 0) {
+ crit_enter();
+ if (atomic_cmpset_int(&ncp->nc_lockstatus,
+ count,
+ (count + 1) | NC_SHLOCK_FLAG |
+ NC_SHLOCK_VHOLD)) {
+ /*
+ * The vp associated with a locked ncp must
+ * be held to prevent it from being recycled.
+ *
+ * WARNING! If VRECLAIMED is set the vnode
+ * could already be in the middle of a recycle.
+ * Callers must use cache_vref() or
+ * cache_vget() on the locked ncp to
+ * validate the vp or set the cache entry
+ * to unresolved.
+ *
+ * NOTE! vhold() is allowed if we hold a
+ * lock on the ncp (which we do).
+ */
+ if (ncp->nc_vp)
+ vhold(ncp->nc_vp);
+ atomic_clear_int(&ncp->nc_lockstatus,
+ NC_SHLOCK_VHOLD);
+ crit_exit();
+ break;
+ }
+ /* cmpset failed */
+ crit_exit();
+ continue;
+ }
+
+ /*
+ * If already held shared we can just bump the count, but
+ * only allow this if nobody is trying to get the lock
+ * exclusively.
+ *
+ * VHOLD is a bit of a hack. Even though we successfully
+ * added another shared ref, the cpu that got the first
+ * shared ref might not yet have held the vnode.
+ */
+ if ((count & (NC_EXLOCK_REQ|NC_SHLOCK_FLAG)) ==
+ NC_SHLOCK_FLAG) {
+ KKASSERT((count & ~(NC_EXLOCK_REQ |
+ NC_SHLOCK_REQ |
+ NC_SHLOCK_FLAG)) > 0);
+ if (atomic_cmpset_int(&ncp->nc_lockstatus,
+ count, count + 1)) {
+ while (ncp->nc_lockstatus & NC_SHLOCK_VHOLD)
+ cpu_pause();
+ break;
+ }
+ continue;
+ }
+ return(EWOULDBLOCK);
+ }
+ return(0);
+}
+
+/*
* Helper function
*
* NOTE: nc_refs can be 0 (degenerate case during _cache_drop).
*
- * nc_locktd must be NULLed out prior to nc_exlocks getting cleared.
- *
- * MPSAFE
+ * nc_locktd must be NULLed out prior to nc_lockstatus getting cleared.
*/
static
void
{
thread_t td __debugvar = curthread;
u_int count;
+ u_int ncount;
+ struct vnode *dropvp;
KKASSERT(ncp->nc_refs >= 0);
- KKASSERT(ncp->nc_exlocks > 0);
- KKASSERT(ncp->nc_locktd == td);
+ KKASSERT((ncp->nc_lockstatus & ~(NC_EXLOCK_REQ|NC_SHLOCK_REQ)) > 0);
+ KKASSERT((ncp->nc_lockstatus & NC_SHLOCK_FLAG) || ncp->nc_locktd == td);
+
+ count = ncp->nc_lockstatus;
+ cpu_ccfence();
- count = ncp->nc_exlocks;
- if ((count & ~NC_EXLOCK_REQ) == 1) {
+ /*
+ * Clear nc_locktd prior to the atomic op (excl lock only)
+ */
+ if ((count & ~(NC_EXLOCK_REQ|NC_SHLOCK_REQ)) == 1)
ncp->nc_locktd = NULL;
- if (ncp->nc_vp)
- vdrop(ncp->nc_vp);
- }
+ dropvp = NULL;
+
for (;;) {
- if ((count & ~NC_EXLOCK_REQ) == 1) {
- if (atomic_cmpset_int(&ncp->nc_exlocks, count, 0)) {
+ if ((count &
+ ~(NC_EXLOCK_REQ|NC_SHLOCK_REQ|NC_SHLOCK_FLAG)) == 1) {
+ dropvp = ncp->nc_vp;
+ if (count & NC_EXLOCK_REQ)
+ ncount = count & NC_SHLOCK_REQ; /* cnt->0 */
+ else
+ ncount = 0;
+
+ if (atomic_cmpset_int(&ncp->nc_lockstatus,
+ count, ncount)) {
if (count & NC_EXLOCK_REQ)
+ wakeup(&ncp->nc_locktd);
+ else if (count & NC_SHLOCK_REQ)
wakeup(ncp);
break;
}
+ dropvp = NULL;
} else {
- if (atomic_cmpset_int(&ncp->nc_exlocks, count,
- count - 1)) {
+ KKASSERT((count & NC_SHLOCK_VHOLD) == 0);
+ KKASSERT((count & ~(NC_EXLOCK_REQ |
+ NC_SHLOCK_REQ |
+ NC_SHLOCK_FLAG)) > 1);
+ if (atomic_cmpset_int(&ncp->nc_lockstatus,
+ count, count - 1)) {
break;
}
}
- count = ncp->nc_exlocks;
+ count = ncp->nc_lockstatus;
+ cpu_ccfence();
}
+
+ /*
+ * Don't actually drop the vp until we successfully clean out
+ * the lock, otherwise we may race another shared lock.
+ */
+ if (dropvp)
+ vdrop(dropvp);
}
+static
+int
+_cache_lockstatus(struct namecache *ncp)
+{
+ if (ncp->nc_locktd == curthread)
+ return(LK_EXCLUSIVE);
+ if (ncp->nc_lockstatus & NC_SHLOCK_FLAG)
+ return(LK_SHARED);
+ return(-1);
+}
/*
* cache_hold() and cache_drop() prevent the premature deletion of a
*
* This is a rare case where callers are allowed to hold a spinlock,
* so we can't ourselves.
- *
- * MPSAFE
*/
static __inline
struct namecache *
*
* NOTE: cache_zap() may return a non-NULL referenced parent which must
* be dropped in a loop.
- *
- * MPSAFE
*/
static __inline
void
*
* NOTE: The hash table spinlock is likely held during this call, we
* can't do anything fancy.
- *
- * MPSAFE
*/
static void
_cache_link_parent(struct namecache *ncp, struct namecache *par,
*
* ncp must be locked. This routine will acquire a temporary lock on
* the parent as wlel as the appropriate hash chain.
- *
- * MPSAFE
*/
static void
_cache_unlink_parent(struct namecache *ncp)
/*
* Allocate a new namecache structure. Most of the code does not require
* zero-termination of the string but it makes vop_compat_ncreate() easier.
- *
- * MPSAFE
*/
static struct namecache *
cache_alloc(int nlen)
/*
* Can only be called for the case where the ncp has never been
* associated with anything (so no spinlocks are needed).
- *
- * MPSAFE
*/
static void
_cache_free(struct namecache *ncp)
{
- KKASSERT(ncp->nc_refs == 1 && ncp->nc_exlocks == 1);
+ KKASSERT(ncp->nc_refs == 1 && ncp->nc_lockstatus == 1);
if (ncp->nc_name)
kfree(ncp->nc_name, M_VFSCACHE);
kfree(ncp, M_VFSCACHE);
}
/*
- * MPSAFE
+ * [re]initialize a nchandle.
*/
void
cache_zero(struct nchandle *nch)
atomic_add_int(&nch->mount->mnt_refs, 1);
}
-/*
- * MPSAFE
- */
void
cache_drop(struct nchandle *nch)
{
nch->mount = NULL;
}
-/*
- * MPSAFE
- */
+int
+cache_lockstatus(struct nchandle *nch)
+{
+ return(_cache_lockstatus(nch->ncp));
+}
+
void
cache_lock(struct nchandle *nch)
{
_cache_lock(nch->ncp);
}
+void
+cache_lock_maybe_shared(struct nchandle *nch, int excl)
+{
+ struct namecache *ncp = nch->ncp;
+
+ if (ncp_shared_lock_disable || excl ||
+ (ncp->nc_flag & NCF_UNRESOLVED)) {
+ _cache_lock(ncp);
+ } else {
+ _cache_lock_shared(ncp);
+ if ((ncp->nc_flag & NCF_UNRESOLVED) == 0) {
+ if (ncp->nc_vp && (ncp->nc_vp->v_flag & VRECLAIMED)) {
+ _cache_unlock(ncp);
+ _cache_lock(ncp);
+ }
+ } else {
+ _cache_unlock(ncp);
+ _cache_lock(ncp);
+ }
+ }
+}
+
/*
* Relock nch1 given an unlocked nch1 and a locked nch2. The caller
* is responsible for checking both for validity on return as they
}
}
-/*
- * MPSAFE
- */
int
cache_lock_nonblock(struct nchandle *nch)
{
return(_cache_lock_nonblock(nch->ncp));
}
-
-/*
- * MPSAFE
- */
void
cache_unlock(struct nchandle *nch)
{
*
* We want cache_get() to return a definitively usable vnode or a
* definitively unresolved ncp.
- *
- * MPSAFE
*/
static
struct namecache *
}
/*
+ * Attempt to obtain a shared lock on the ncp. A shared lock will only
+ * be obtained if the ncp is resolved and the vnode (if not ENOENT) is
+ * valid. Otherwise an exclusive lock will be acquired instead.
+ */
+static
+struct namecache *
+_cache_get_maybe_shared(struct namecache *ncp, int excl)
+{
+ if (ncp_shared_lock_disable || excl ||
+ (ncp->nc_flag & NCF_UNRESOLVED)) {
+ return(_cache_get(ncp));
+ }
+ _cache_hold(ncp);
+ _cache_lock_shared(ncp);
+ if ((ncp->nc_flag & NCF_UNRESOLVED) == 0) {
+ if (ncp->nc_vp && (ncp->nc_vp->v_flag & VRECLAIMED)) {
+ _cache_unlock(ncp);
+ ncp = _cache_get(ncp);
+ _cache_drop(ncp);
+ }
+ } else {
+ _cache_unlock(ncp);
+ ncp = _cache_get(ncp);
+ _cache_drop(ncp);
+ }
+ return(ncp);
+}
+
+/*
* This is a special form of _cache_lock() which only succeeds if
* it can get a pristine, non-recursive lock. The caller must have
* already ref'd the ncp.
*
* We want _cache_lock_special() (on success) to return a definitively
* usable vnode or a definitively unresolved ncp.
- *
- * MPSAFE
*/
static int
_cache_lock_special(struct namecache *ncp)
{
if (_cache_lock_nonblock(ncp) == 0) {
- if ((ncp->nc_exlocks & ~NC_EXLOCK_REQ) == 1) {
+ if ((ncp->nc_lockstatus &
+ ~(NC_EXLOCK_REQ|NC_SHLOCK_REQ)) == 1) {
if (ncp->nc_vp && (ncp->nc_vp->v_flag & VRECLAIMED))
_cache_setunresolved(ncp);
return(0);
return(EWOULDBLOCK);
}
+static int
+_cache_lock_shared_special(struct namecache *ncp)
+{
+ if (_cache_lock_shared_nonblock(ncp) == 0) {
+ if ((ncp->nc_lockstatus &
+ ~(NC_EXLOCK_REQ|NC_SHLOCK_REQ)) == (NC_SHLOCK_FLAG | 1)) {
+ if (ncp->nc_vp == NULL ||
+ (ncp->nc_vp->v_flag & VRECLAIMED) == 0) {
+ return(0);
+ }
+ }
+ _cache_unlock(ncp);
+ }
+ return(EWOULDBLOCK);
+}
+
/*
* NOTE: The same nchandle can be passed for both arguments.
- *
- * MPSAFE
*/
void
cache_get(struct nchandle *nch, struct nchandle *target)
atomic_add_int(&target->mount->mnt_refs, 1);
}
+void
+cache_get_maybe_shared(struct nchandle *nch, struct nchandle *target, int excl)
+{
+ KKASSERT(nch->ncp->nc_refs > 0);
+ target->mount = nch->mount;
+ target->ncp = _cache_get_maybe_shared(nch->ncp, excl);
+ atomic_add_int(&target->mount->mnt_refs, 1);
+}
+
/*
- * MPSAFE
+ *
*/
static __inline
void
}
/*
- * MPSAFE
+ *
*/
void
cache_put(struct nchandle *nch)
* vnode is NULL, a negative cache entry is created.
*
* The ncp should be locked on entry and will remain locked on return.
- *
- * MPSAFE
*/
static
void
_cache_setvp(struct mount *mp, struct namecache *ncp, struct vnode *vp)
{
KKASSERT(ncp->nc_flag & NCF_UNRESOLVED);
+ KKASSERT(_cache_lockstatus(ncp) == LK_EXCLUSIVE);
if (vp != NULL) {
/*
ncp->nc_vp = vp;
TAILQ_INSERT_HEAD(&vp->v_namecache, ncp, nc_vnode);
spin_unlock(&vp->v_spin);
- if (ncp->nc_exlocks)
+ if (ncp->nc_lockstatus & ~(NC_EXLOCK_REQ|NC_SHLOCK_REQ))
vhold(vp);
/*
}
/*
- * MPSAFE
+ *
*/
void
cache_setvp(struct nchandle *nch, struct vnode *vp)
}
/*
- * MPSAFE
+ *
*/
void
cache_settimeout(struct nchandle *nch, int nticks)
* from its namecache and can cause the OLDAPI and NEWAPI to get out of
* sync.
*
- * MPSAFE
*/
static
void
*/
if (!TAILQ_EMPTY(&ncp->nc_list))
vdrop(vp);
- if (ncp->nc_exlocks)
+ if (ncp->nc_lockstatus & ~(NC_EXLOCK_REQ|NC_SHLOCK_REQ))
vdrop(vp);
} else {
spin_lock(&ncspin);
* set a resolved cache element to unresolved if it has timed out
* or if it is a negative cache hit and the mount point namecache_gen
* has changed.
- *
- * MPSAFE
*/
-static __inline void
-_cache_auto_unresolve(struct mount *mp, struct namecache *ncp)
+static __inline int
+_cache_auto_unresolve_test(struct mount *mp, struct namecache *ncp)
{
/*
- * Already in an unresolved state, nothing to do.
- */
- if (ncp->nc_flag & NCF_UNRESOLVED)
- return;
-
- /*
* Try to zap entries that have timed out. We have
* to be careful here because locked leafs may depend
* on the vnode remaining intact in a parent, so only
*/
if (ncp->nc_timeout && (int)(ncp->nc_timeout - ticks) < 0 &&
TAILQ_EMPTY(&ncp->nc_list)) {
- _cache_setunresolved(ncp);
- return;
+ return 1;
}
/*
* the mount's namecache generation being bumped, zap it.
*/
if (ncp->nc_vp == NULL && VFS_NCPGEN_TEST(mp, ncp)) {
- _cache_setunresolved(ncp);
- return;
+ return 1;
+ }
+
+ /*
+ * Otherwise we are good
+ */
+ return 0;
+}
+
+static __inline void
+_cache_auto_unresolve(struct mount *mp, struct namecache *ncp)
+{
+ /*
+ * Already in an unresolved state, nothing to do.
+ */
+ if ((ncp->nc_flag & NCF_UNRESOLVED) == 0) {
+ if (_cache_auto_unresolve_test(mp, ncp))
+ _cache_setunresolved(ncp);
}
}
/*
- * MPSAFE
+ *
*/
void
cache_setunresolved(struct nchandle *nch)
* looking for matches. This flag tells the lookup code when it must
* check for a mount linkage and also prevents the directories in question
* from being deleted or renamed.
- *
- * MPSAFE
*/
static
int
}
/*
- * MPSAFE
+ *
*/
void
cache_clrmountpt(struct nchandle *nch)
* node using a depth-first algorithm in order to allow multiple deep
* recursions to chain through each other, then we restart the invalidation
* from scratch.
- *
- * MPSAFE
*/
struct cinvtrack {
struct namecache *nextkid;
int rcnt = 0;
- KKASSERT(ncp->nc_exlocks);
+ KKASSERT(_cache_lockstatus(ncp) == LK_EXCLUSIVE);
_cache_setunresolved(ncp);
if (flags & CINV_DESTROY)
*
* In addition, the v_namecache list itself must be locked via
* the vnode's spinlock.
- *
- * MPSAFE
*/
int
cache_inval_vp(struct vnode *vp, int flags)
*
* Return 0 on success, non-zero if not all namecache records could be
* disassociated from the vnode (for various reasons).
- *
- * MPSAFE
*/
int
cache_inval_vp_nonblock(struct vnode *vp)
* Because there may be references to the source ncp we cannot copy its
* contents to the target. Instead the source ncp is relinked as the target
* and the target ncp is removed from the namecache topology.
- *
- * MPSAFE
*/
void
cache_rename(struct nchandle *fnch, struct nchandle *tnch)
* can safely acquire the vnode. In fact, we MUST NOT release the ncp
* lock when acquiring the vp lock or we might cause a deadlock.
*
- * MPSAFE
+ * NOTE: The passed-in ncp must be locked exclusively if it is initially
+ * unresolved. If a reclaim race occurs the passed-in ncp will be
+ * relocked exclusively before being re-resolved.
*/
int
cache_vget(struct nchandle *nch, struct ucred *cred,
int error;
ncp = nch->ncp;
- KKASSERT(ncp->nc_locktd == curthread);
again:
vp = NULL;
if (ncp->nc_flag & NCF_UNRESOLVED)
kprintf("Warning: vnode reclaim race detected "
"in cache_vget on %p (%s)\n",
vp, ncp->nc_name);
+ _cache_unlock(ncp);
+ _cache_lock(ncp);
_cache_setunresolved(ncp);
goto again;
}
return(error);
}
+/*
+ * Similar to cache_vget() but only acquires a ref on the vnode.
+ *
+ * NOTE: The passed-in ncp must be locked exclusively if it is initially
+ * unresolved. If a reclaim race occurs the passed-in ncp will be
+ * relocked exclusively before being re-resolved.
+ */
int
cache_vref(struct nchandle *nch, struct ucred *cred, struct vnode **vpp)
{
int error;
ncp = nch->ncp;
- KKASSERT(ncp->nc_locktd == curthread);
again:
vp = NULL;
if (ncp->nc_flag & NCF_UNRESOLVED)
kprintf("Warning: vnode reclaim race detected "
"in cache_vget on %p (%s)\n",
vp, ncp->nc_name);
+ _cache_unlock(ncp);
+ _cache_lock(ncp);
_cache_setunresolved(ncp);
goto again;
}
* so use vhold()/vdrop() while holding the lock to prevent dvp from
* getting destroyed.
*
- * MPSAFE - Note vhold() is allowed when dvp has 0 refs if we hold a
- * lock on the ncp in question..
+ * NOTE: vhold() is allowed when dvp has 0 refs if we hold a
+ * lock on the ncp in question..
*/
static struct vnode *
cache_dvpref(struct namecache *ncp)
}
/*
+ * Attempt to lookup a namecache entry and return with a shared namecache
+ * lock.
+ */
+int
+cache_nlookup_maybe_shared(struct nchandle *par_nch, struct nlcomponent *nlc,
+ int excl, struct nchandle *res_nch)
+{
+ struct namecache *ncp;
+ struct nchash_head *nchpp;
+ struct mount *mp;
+ u_int32_t hash;
+ globaldata_t gd;
+
+ /*
+ * If exclusive requested or shared namecache locks are disabled,
+ * return failure.
+ */
+ if (ncp_shared_lock_disable || excl)
+ return(EWOULDBLOCK);
+
+ numcalls++;
+ gd = mycpu;
+ mp = par_nch->mount;
+
+ /*
+ * This is a good time to call it, no ncp's are locked by
+ * the caller or us.
+ */
+ cache_hysteresis();
+
+ /*
+ * Try to locate an existing entry
+ */
+ hash = fnv_32_buf(nlc->nlc_nameptr, nlc->nlc_namelen, FNV1_32_INIT);
+ hash = fnv_32_buf(&par_nch->ncp, sizeof(par_nch->ncp), hash);
+ nchpp = NCHHASH(hash);
+
+ spin_lock(&nchpp->spin);
+
+ LIST_FOREACH(ncp, &nchpp->list, nc_hash) {
+ numchecks++;
+
+ /*
+ * Break out if we find a matching entry. Note that
+ * UNRESOLVED entries may match, but DESTROYED entries
+ * do not.
+ */
+ if (ncp->nc_parent == par_nch->ncp &&
+ ncp->nc_nlen == nlc->nlc_namelen &&
+ bcmp(ncp->nc_name, nlc->nlc_nameptr, ncp->nc_nlen) == 0 &&
+ (ncp->nc_flag & NCF_DESTROYED) == 0
+ ) {
+ _cache_hold(ncp);
+ spin_unlock(&nchpp->spin);
+ if (_cache_lock_shared_special(ncp) == 0) {
+ if ((ncp->nc_flag & NCF_UNRESOLVED) == 0 &&
+ (ncp->nc_flag & NCF_DESTROYED) == 0 &&
+ _cache_auto_unresolve_test(mp, ncp) == 0) {
+ goto found;
+ }
+ _cache_unlock(ncp);
+ }
+ _cache_drop(ncp);
+ spin_lock(&nchpp->spin);
+ break;
+ }
+ }
+
+ /*
+ * Failure
+ */
+ spin_unlock(&nchpp->spin);
+ return(EWOULDBLOCK);
+
+ /*
+ * Success
+ *
+ * Note that nc_error might be non-zero (e.g ENOENT).
+ */
+found:
+ res_nch->mount = mp;
+ res_nch->ncp = ncp;
+ ++gd->gd_nchstats->ncs_goodhits;
+ atomic_add_int(&res_nch->mount->mnt_refs, 1);
+
+ KKASSERT(ncp->nc_error != EWOULDBLOCK);
+ return(ncp->nc_error);
+}
+
+/*
* This is a non-blocking verison of cache_nlookup() used by
* nfs_readdirplusrpc_uio(). It can fail for any reason and
* will return nch.ncp == NULL in that case.
* Note that successful resolution does not necessarily return an error
* code of 0. If the ncp resolves to a negative cache hit then ENOENT
* will be returned.
- *
- * MPSAFE
*/
int
cache_resolve(struct nchandle *nch, struct ucred *cred)
ncp = nch->ncp;
mp = nch->mount;
+ KKASSERT(_cache_lockstatus(ncp) == LK_EXCLUSIVE);
restart:
/*
* If the ncp is already resolved we have nothing to do. However,
/*
* Clean out negative cache entries when too many have accumulated.
- *
- * MPSAFE
*/
static void
_cache_cleanneg(int count)
/*
* Clean out positive cache entries when too many have accumulated.
- *
- * MPSAFE
*/
static void
_cache_cleanpos(int count)
*
* Such entries can also be removed via cache_inval_vp(), such
* as when unmounting.
- *
- * MPSAFE
*/
static void
_cache_cleandefered(void)
* have to check again.
*/
while ((nch.ncp = ncp->nc_parent) != NULL) {
- _cache_lock(ncp);
+ if (ncp_shared_lock_disable)
+ _cache_lock(ncp);
+ else
+ _cache_lock_shared(ncp);
if (nch.ncp != ncp->nc_parent) {
_cache_unlock(ncp);
continue;
* If NLC_REFDVP is set nd->nl_dvp will be set to the directory vnode
* of the returned entry. The vnode will be referenced, but not locked,
* and will be released by nlookup_done() along with everything else.
+ *
+ * NOTE: As an optimization we attempt to obtain a shared namecache lock
+ * on any intermediate elements. On success, the returned element
+ * is ALWAYS locked exclusively.
*/
+static
+int
+islastelement(const char *ptr)
+{
+ while (*ptr == '/')
+ ++ptr;
+ return (*ptr == 0);
+}
+
int
nlookup(struct nlookupdata *nd)
{
struct vnode *hvp; /* hold to prevent recyclement */
int wasdotordotdot;
char *ptr;
- char *xptr;
int error;
int len;
int dflags;
*/
if ((nd->nl_flags & NLC_NCPISLOCKED) == 0) {
nd->nl_flags |= NLC_NCPISLOCKED;
- cache_lock(&nd->nl_nch);
+ cache_lock_maybe_shared(&nd->nl_nch, islastelement(ptr));
}
/*
do {
++ptr;
} while (*ptr == '/');
- cache_get(&nd->nl_rootnch, &nch);
- cache_put(&nd->nl_nch);
+ cache_unlock(&nd->nl_nch);
+ cache_get_maybe_shared(&nd->nl_rootnch, &nch,
+ islastelement(ptr));
+ cache_drop(&nd->nl_nch);
nd->nl_nch = nch; /* remains locked */
/*
}
/*
- * Check directory search permissions.
+ * Check directory search permissions (nd->nl_nch is locked & refd)
*/
dflags = 0;
error = naccess(&nd->nl_nch, NLC_EXEC, nd->nl_cred, &dflags);
* since our dflags will be for some sub-directory instead of the
* parent dir.
*
- * This subsection returns a locked, refd 'nch' unless it errors out.
+ * This subsection returns a locked, refd 'nch' unless it errors out,
+ * and an unlocked but still ref'd nd->nl_nch.
+ *
* The namecache topology is not allowed to be disconnected, so
* encountering a NULL parent will generate EINVAL. This typically
* occurs when a directory is removed out from under a process.
+ *
+ * WARNING! The unlocking of nd->nl_nch is sensitive code.
*/
+ KKASSERT(nd->nl_flags & NLC_NCPISLOCKED);
+
if (nlc.nlc_namelen == 1 && nlc.nlc_nameptr[0] == '.') {
- cache_get(&nd->nl_nch, &nch);
+ cache_unlock(&nd->nl_nch);
+ nd->nl_flags &= ~NLC_NCPISLOCKED;
+ cache_get_maybe_shared(&nd->nl_nch, &nch, islastelement(ptr));
wasdotordotdot = 1;
} else if (nlc.nlc_namelen == 2 &&
nlc.nlc_nameptr[0] == '.' && nlc.nlc_nameptr[1] == '.') {
/*
* ".." at the root returns the root
*/
- cache_get(&nd->nl_nch, &nch);
+ cache_unlock(&nd->nl_nch);
+ nd->nl_flags &= ~NLC_NCPISLOCKED;
+ cache_get_maybe_shared(&nd->nl_nch, &nch, islastelement(ptr));
} else {
/*
* Locate the parent ncp. If we are at the root of a
nctmp.ncp = nctmp.ncp->nc_parent;
KKASSERT(nctmp.ncp != NULL);
cache_hold(&nctmp);
- cache_get(&nctmp, &nch);
+ cache_unlock(&nd->nl_nch);
+ nd->nl_flags &= ~NLC_NCPISLOCKED;
+ cache_get_maybe_shared(&nctmp, &nch, islastelement(ptr));
cache_drop(&nctmp); /* NOTE: zero's nctmp */
}
wasdotordotdot = 2;
vhold(hvp);
cache_unlock(&nd->nl_nch);
nd->nl_flags &= ~NLC_NCPISLOCKED;
- nch = cache_nlookup(&nd->nl_nch, &nlc);
- if (nch.ncp->nc_flag & NCF_UNRESOLVED)
- hit = 0;
- while ((error = cache_resolve(&nch, nd->nl_cred)) == EAGAIN ||
- (nch.ncp->nc_flag & NCF_DESTROYED)) {
- kprintf("[diagnostic] nlookup: relookup %*.*s\n",
- nch.ncp->nc_nlen, nch.ncp->nc_nlen, nch.ncp->nc_name);
- cache_put(&nch);
- nch = cache_nlookup(&nd->nl_nch, &nlc);
+ error = cache_nlookup_maybe_shared(&nd->nl_nch, &nlc,
+ islastelement(ptr), &nch);
+ if (error == EWOULDBLOCK) {
+ nch = cache_nlookup(&nd->nl_nch, &nlc);
+ if (nch.ncp->nc_flag & NCF_UNRESOLVED)
+ hit = 0;
+ for (;;) {
+ error = cache_resolve(&nch, nd->nl_cred);
+ if (error != EAGAIN &&
+ (nch.ncp->nc_flag & NCF_DESTROYED) == 0) {
+ break;
+ }
+ kprintf("[diagnostic] nlookup: relookup %*.*s\n",
+ nch.ncp->nc_nlen, nch.ncp->nc_nlen,
+ nch.ncp->nc_name);
+ cache_put(&nch);
+ nch = cache_nlookup(&nd->nl_nch, &nlc);
+ }
}
if (hvp)
vdrop(hvp);
if ((par.ncp = nch.ncp->nc_parent) != NULL) {
par.mount = nch.mount;
cache_hold(&par);
- cache_lock(&par);
+ cache_lock_maybe_shared(&par, islastelement(ptr));
error = naccess(&par, 0, nd->nl_cred, &dflags);
cache_put(&par);
}
}
- if (nd->nl_flags & NLC_NCPISLOCKED) {
- cache_unlock(&nd->nl_nch);
- nd->nl_flags &= ~NLC_NCPISLOCKED;
- }
/*
* [end of subsection]
* nl_nch must be unlocked or we could chain lock to the root
* if a resolve gets stuck (e.g. in NFS).
*/
+ KKASSERT((nd->nl_flags & NLC_NCPISLOCKED) == 0);
/*
* Resolve the namespace if necessary. The ncp returned by
* for a create/rename/delete. The standard requires this and pax
* pretty stupidly depends on it.
*/
- for (xptr = ptr; *xptr == '/'; ++xptr)
- ;
- if (*xptr == 0) {
+ if (islastelement(ptr)) {
if (error == ENOENT &&
(nd->nl_flags & (NLC_CREATE | NLC_RENAME_DST))
) {
}
}
}
- cache_get(&mp->mnt_ncmountpt, &nch);
+ cache_get_maybe_shared(&mp->mnt_ncmountpt, &nch,
+ islastelement(ptr));
if (nch.ncp->nc_flag & NCF_UNRESOLVED) {
if (vfs_do_busy == 0) {
}
if (hit)
- ++gd->gd_nchstats->ncs_longhits;
+ ++gd->gd_nchstats->ncs_longhits;
else
- ++gd->gd_nchstats->ncs_longmiss;
+ ++gd->gd_nchstats->ncs_longmiss;
+
+ if (nd->nl_flags & NLC_NCPISLOCKED)
+ KKASSERT(cache_lockstatus(&nd->nl_nch) == LK_EXCLUSIVE);
/*
* NOTE: If NLC_CREATE was set the ncp may represent a negative hit
* The directory sticky bit is tested for NLC_DELETE and NLC_RENAME_DST,
* the latter is only tested if the target exists.
*
- * The passed ncp must be referenced and locked.
+ * The passed ncp must be referenced and locked. If it is already resolved
+ * it may be locked shared but otherwise should be locked exclusively.
*/
static int
naccess(struct nchandle *nch, int nflags, struct ucred *cred, int *nflagsp)
int error;
int cflags;
- ASSERT_NCH_LOCKED(nch);
+ KKASSERT(cache_lockstatus(nch) > 0);
+
ncp = nch->ncp;
if (ncp->nc_flag & NCF_UNRESOLVED) {
cache_resolve(nch, cred);
} else if (error == 0 || error == ENOENT) {
par.mount = nch->mount;
cache_hold(&par);
- cache_lock(&par);
+ cache_lock_maybe_shared(&par, 0);
error = naccess(&par, NLC_WRITE, cred, NULL);
cache_put(&par);
}
cflags |= NCF_UF_PCACHE;
}
}
- ncp->nc_flag &= ~(NCF_SF_NOCACHE | NCF_UF_CACHE |
- NCF_SF_PNOCACHE | NCF_UF_PCACHE);
- ncp->nc_flag |= cflags;
+
+ /*
+ * XXX we're not supposed to update nc_flag when holding
+ * a shared lock.
+ */
+ atomic_clear_short(&ncp->nc_flag,
+ (NCF_SF_NOCACHE | NCF_UF_CACHE |
+ NCF_SF_PNOCACHE | NCF_UF_PCACHE) & ~cflags);
+ atomic_set_short(&ncp->nc_flag, cflags);
/*
* Process general access.