(VREAD|VEXEC) >> 3 |
(VREAD|VEXEC) >> 6;
vp->v_type = VDIR;
- vp->v_flag |= VROOT;
+ vsetflags(vp, VROOT);
break;
case Pself: /* /proc/self = lr--r--r-- */
}
/* prevent more writers */
- vp->v_flag |= VTEXT;
+ vsetflags(vp, VTEXT);
/*
* Check if file_offset page aligned. Currently we cannot handle
imgp->proc->p_sysent = &aout_sysvec;
/* Indicate that this file should not be modified */
- imgp->vp->v_flag |= VTEXT;
+ vsetflags(imgp->vp, VTEXT);
return (0);
}
* its VTEXT flag, too.
*/
if (error == 0)
- imgp->vp->v_flag |= VTEXT;
+ vsetflags(imgp->vp, VTEXT);
vn_unlock(imgp->vp);
if (error)
goto fail;
if (p->p_textvp)
vrele(p->p_textvp);
p->p_textvp = (struct vnode *)fp->f_data;
- p->p_textvp->v_flag |= VCKPT;
+ vsetflags(p->p_textvp, VCKPT);
vref(p->p_textvp);
}
done:
static struct iosched_data ioscpu[SMP_MAXCPU];
+/*
+ * MPSAFE
+ */
static int
badjiosched(thread_t td, size_t bytes)
{
/*
* Caller intends to write (bytes)
+ *
+ * MPSAFE
*/
void
bwillwrite(int bytes)
/*
* Caller intends to read (bytes)
+ *
+ * MPSAFE
*/
void
bwillread(int bytes)
/*
* Call intends to do an inode-modifying operation of some sort.
+ *
+ * MPSAFE
*/
void
bwillinode(int n)
* then before.
*/
error = lf_setlock(lock, owner, type, flags, start, end);
- ap->a_vp->v_flag |= VMAYHAVELOCKS;
+ vsetflags(ap->a_vp, VMAYHAVELOCKS);
break;
case F_UNLCK:
error = lf_setlock(lock, owner, type, flags, start, end);
if (TAILQ_EMPTY(&lock->lf_range) &&
TAILQ_EMPTY(&lock->lf_blocked)) {
- ap->a_vp->v_flag &= ~VMAYHAVELOCKS;
+ vclrflags(ap->a_vp, VMAYHAVELOCKS);
}
break;
/*
- * Copyright (c) 2003,2004 The DragonFly Project. All rights reserved.
+ * Copyright (c) 2003,2004,2009 The DragonFly Project. All rights reserved.
*
* This code is derived from software contributed to The DragonFly Project
* by Matthew Dillon <dillon@backplane.com>
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
- *
- * @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95
- * $FreeBSD: src/sys/kern/vfs_cache.c,v 1.42.2.6 2001/10/05 20:07:03 dillon Exp $
- * $DragonFly: src/sys/kern/vfs_cache.c,v 1.91 2008/06/14 05:34:06 dillon Exp $
*/
#include <sys/param.h>
/*
* Random lookups in the cache are accomplished with a hash table using
- * a hash key of (nc_src_vp, name).
+ * a hash key of (nc_src_vp, name). Each hash chain has its own spin lock.
+ *
+ * Negative entries may exist and correspond to resolved namecache
+ * structures where nc_vp is NULL. In a negative entry, NCF_WHITEOUT
+ * will be set if the entry corresponds to a whited-out directory entry
+ * (verses simply not finding the entry at all). ncneglist is locked
+ * with a global spinlock (ncspin).
+ *
+ * MPSAFE RULES:
+ *
+ * (1) A ncp must be referenced before it can be locked.
+ *
+ * (2) A ncp must be locked in order to modify it.
+ *
+ * (3) ncp locks are always ordered child -> parent. That may seem
+ * backwards but forward scans use the hash table and thus can hold
+ * the parent unlocked when traversing downward.
*
- * Negative entries may exist and correspond to structures where nc_vp
- * is NULL. In a negative entry, NCF_WHITEOUT will be set if the entry
- * corresponds to a whited-out directory entry (verses simply not finding the
- * entry at all).
+ * This allows insert/rename/delete/dot-dot and other operations
+ * to use ncp->nc_parent links.
*
- * Upon reaching the last segment of a path, if the reference is for DELETE,
- * or NOCACHE is set (rewrite), and the name is located in the cache, it
- * will be dropped.
+ * This also prevents a locked up e.g. NFS node from creating a
+ * chain reaction all the way back to the root vnode / namecache.
+ *
+ * (4) parent linkages require both the parent and child to be locked.
*/
/*
static struct nchash_head *nchashtbl;
static struct namecache_list ncneglist;
static struct spinlock ncspin;
-struct lwkt_token vfs_token;
/*
* ncvp_debug - debug cache_fromvp(). This is used by the NFS server
SYSCTL_INT(_debug, OID_AUTO, vnsize, CTLFLAG_RD, 0, sizeof(struct vnode), "");
SYSCTL_INT(_debug, OID_AUTO, ncsize, CTLFLAG_RD, 0, sizeof(struct namecache), "");
+int cache_mpsafe;
+SYSCTL_INT(_vfs, OID_AUTO, cache_mpsafe, CTLFLAG_RW, &cache_mpsafe, 0, "");
+
static int cache_resolve_mp(struct mount *mp);
static struct vnode *cache_dvpref(struct namecache *ncp);
-static void _cache_rehash(struct namecache *ncp);
static void _cache_lock(struct namecache *ncp);
static void _cache_setunresolved(struct namecache *ncp);
* The lock owner has full authority to associate/disassociate vnodes
* and resolve/unresolve the locked ncp.
*
+ * The primary lock field is nc_exlocks. nc_locktd is set after the
+ * fact (when locking) or cleared prior to unlocking.
+ *
* WARNING! Holding a locked ncp will prevent a vnode from being destroyed
* or recycled, but it does NOT help you if the vnode had already
* initiated a recyclement. If this is important, use cache_get()
* way the refs counter is handled). Or, alternatively, make an
* unconditional call to cache_validate() or cache_resolve()
* after cache_lock() returns.
+ *
+ * MPSAFE
*/
static
void
_cache_lock(struct namecache *ncp)
{
thread_t td;
- thread_t xtd;
int didwarn;
int error;
+ u_int count;
KKASSERT(ncp->nc_refs != 0);
didwarn = 0;
td = curthread;
for (;;) {
- xtd = ncp->nc_locktd;
-
- if (xtd == td) {
- ++ncp->nc_exlocks;
- break;
- }
- if (xtd == NULL) {
- if (atomic_cmpset_ptr(&ncp->nc_locktd, NULL, td)) {
- KKASSERT(ncp->nc_exlocks == 0);
- ncp->nc_exlocks = 1;
+ count = ncp->nc_exlocks;
+ if (count == 0) {
+ if (atomic_cmpset_int(&ncp->nc_exlocks, 0, 1)) {
/*
* The vp associated with a locked ncp must
* be held to prevent it from being recycled.
* cache_vget() on the locked ncp to
* validate the vp or set the cache entry
* to unresolved.
+ *
+ * NOTE! vhold() is allowed if we hold a
+ * lock on the ncp (which we do).
*/
+ ncp->nc_locktd = td;
if (ncp->nc_vp)
vhold(ncp->nc_vp); /* MPSAFE */
break;
}
+ /* cmpset failed */
+ continue;
+ }
+ if (ncp->nc_locktd == td) {
+ if (atomic_cmpset_int(&ncp->nc_exlocks, count,
+ count + 1)) {
+ break;
+ }
+ /* cmpset failed */
continue;
}
-
- /*
- * Memory interlock (XXX)
- */
- ncp->nc_lockreq = 1;
tsleep_interlock(ncp, 0);
- cpu_mfence();
- if (xtd != ncp->nc_locktd)
+ if (atomic_cmpset_int(&ncp->nc_exlocks, count,
+ count | NC_EXLOCK_REQ) == 0) {
+ /* cmpset failed */
continue;
+ }
error = tsleep(ncp, PINTERLOCKED, "clock", nclockwarn);
if (error == EWOULDBLOCK) {
- if (didwarn)
- continue;
- didwarn = 1;
- kprintf("[diagnostic] cache_lock: blocked on %p", ncp);
- kprintf(" \"%*.*s\"\n",
- ncp->nc_nlen, ncp->nc_nlen, ncp->nc_name);
+ if (didwarn == 0) {
+ didwarn = ticks;
+ kprintf("[diagnostic] cache_lock: blocked "
+ "on %p",
+ ncp);
+ kprintf(" \"%*.*s\"\n",
+ ncp->nc_nlen, ncp->nc_nlen,
+ ncp->nc_name);
+ }
}
}
-
- if (didwarn == 1) {
- kprintf("[diagnostic] cache_lock: unblocked %*.*s\n",
- ncp->nc_nlen, ncp->nc_nlen, ncp->nc_name);
+ if (didwarn) {
+ kprintf("[diagnostic] cache_lock: unblocked %*.*s after "
+ "%d secs\n",
+ ncp->nc_nlen, ncp->nc_nlen, ncp->nc_name,
+ (int)(ticks - didwarn) / hz);
}
}
+/*
+ * MPSAFE
+ */
static
int
_cache_lock_nonblock(struct namecache *ncp)
{
thread_t td;
- thread_t xtd;
+ u_int count;
KKASSERT(ncp->nc_refs != 0);
td = curthread;
for (;;) {
- xtd = ncp->nc_locktd;
-
- if (xtd == td) {
- ++ncp->nc_exlocks;
- break;
- }
- if (xtd == NULL) {
- if (atomic_cmpset_ptr(&ncp->nc_locktd, NULL, td)) {
- KKASSERT(ncp->nc_exlocks == 0);
- ncp->nc_exlocks = 1;
+ count = ncp->nc_exlocks;
+ if (count == 0) {
+ if (atomic_cmpset_int(&ncp->nc_exlocks, 0, 1)) {
/*
* The vp associated with a locked ncp must
* be held to prevent it from being recycled.
* cache_vget() on the locked ncp to
* validate the vp or set the cache entry
* to unresolved.
+ *
+ * NOTE! vhold() is allowed if we hold a
+ * lock on the ncp (which we do).
*/
+ ncp->nc_locktd = td;
if (ncp->nc_vp)
vhold(ncp->nc_vp); /* MPSAFE */
break;
}
+ /* cmpset failed */
+ continue;
+ }
+ if (ncp->nc_locktd == td) {
+ if (atomic_cmpset_int(&ncp->nc_exlocks, count,
+ count + 1)) {
+ break;
+ }
+ /* cmpset failed */
continue;
}
return(EWOULDBLOCK);
* Helper function
*
* NOTE: nc_refs can be 0 (degenerate case during _cache_drop).
+ *
+ * NOTE: nc_locktd must be NULLed out prior to nc_exlocks getting cleared.
+ *
+ * MPSAFE
*/
static
void
_cache_unlock(struct namecache *ncp)
{
thread_t td __debugvar = curthread;
+ u_int count;
KKASSERT(ncp->nc_refs >= 0);
KKASSERT(ncp->nc_exlocks > 0);
KKASSERT(ncp->nc_locktd == td);
- if (--ncp->nc_exlocks == 0) {
+ count = ncp->nc_exlocks;
+ if ((count & ~NC_EXLOCK_REQ) == 1) {
+ ncp->nc_locktd = NULL;
if (ncp->nc_vp)
vdrop(ncp->nc_vp);
- ncp->nc_locktd = NULL;
- cpu_mfence();
- if (ncp->nc_lockreq) {
- ncp->nc_lockreq = 0;
- wakeup(ncp);
+ }
+ for (;;) {
+ if ((count & ~NC_EXLOCK_REQ) == 1) {
+ if (atomic_cmpset_int(&ncp->nc_exlocks, count, 0)) {
+ if (count & NC_EXLOCK_REQ)
+ wakeup(ncp);
+ break;
+ }
+ } else {
+ if (atomic_cmpset_int(&ncp->nc_exlocks, count,
+ count - 1)) {
+ break;
+ }
}
+ count = ncp->nc_exlocks;
}
}
*
* NOTE: cache_zap() may return a non-NULL referenced parent which must
* be dropped in a loop.
+ *
+ * MPSAFE
*/
static __inline
void
if (atomic_cmpset_int(&ncp->nc_refs, refs, refs - 1))
break;
}
+ cpu_pause();
}
}
/*
- * Link a new namecache entry to its parent. Be careful to avoid races
- * if vhold() blocks in the future.
+ * Link a new namecache entry to its parent and to the hash table. Be
+ * careful to avoid races if vhold() blocks in the future.
+ *
+ * Both ncp and par must be referenced and locked.
+ *
+ * NOTE: The hash table spinlock is likely held during this call, we
+ * can't do anything fancy.
*
- * MPSAFE - ncp must be locked and vfs_token must be held.
+ * MPSAFE
*/
static void
-_cache_link_parent(struct namecache *ncp, struct namecache *par)
+_cache_link_parent(struct namecache *ncp, struct namecache *par,
+ struct nchash_head *nchpp)
{
KKASSERT(ncp->nc_parent == NULL);
ncp->nc_parent = par;
+ ncp->nc_head = nchpp;
+ LIST_INSERT_HEAD(&nchpp->list, ncp, nc_hash);
+
if (TAILQ_EMPTY(&par->nc_list)) {
TAILQ_INSERT_HEAD(&par->nc_list, ncp, nc_entry);
/*
* be held to prevent it from being recycled.
*/
if (par->nc_vp)
- vhold(par->nc_vp); /* MPSAFE */
+ vhold(par->nc_vp);
} else {
TAILQ_INSERT_HEAD(&par->nc_list, ncp, nc_entry);
}
}
/*
- * Remove the parent association from a namecache structure. If this is
- * the last child of the parent the cache_drop(par) will attempt to
- * recursively zap the parent.
+ * Remove the parent and hash associations from a namecache structure.
+ * If this is the last child of the parent the cache_drop(par) will
+ * attempt to recursively zap the parent.
+ *
+ * ncp must be locked. This routine will acquire a temporary lock on
+ * the parent as wlel as the appropriate hash chain.
*
- * MPSAFE - ncp must be locked and vfs_token must be held.
+ * MPSAFE
*/
static void
_cache_unlink_parent(struct namecache *ncp)
struct vnode *dropvp;
if ((par = ncp->nc_parent) != NULL) {
- ncp->nc_parent = NULL;
+ KKASSERT(ncp->nc_parent == par);
_cache_hold(par);
+ _cache_lock(par);
+ spin_lock_wr(&ncp->nc_head->spin);
+ LIST_REMOVE(ncp, nc_hash);
TAILQ_REMOVE(&par->nc_list, ncp, nc_entry);
dropvp = NULL;
if (par->nc_vp && TAILQ_EMPTY(&par->nc_list))
dropvp = par->nc_vp;
+ spin_unlock_wr(&ncp->nc_head->spin);
+ ncp->nc_parent = NULL;
+ ncp->nc_head = NULL;
+ _cache_unlock(par);
_cache_drop(par);
/*
/*
* Allocate a new namecache structure. Most of the code does not require
* zero-termination of the string but it makes vop_compat_ncreate() easier.
+ *
+ * MPSAFE
*/
static struct namecache *
cache_alloc(int nlen)
/*
* Can only be called for the case where the ncp has never been
* associated with anything (so no spinlocks are needed).
+ *
+ * MPSAFE
*/
static void
_cache_free(struct namecache *ncp)
kfree(ncp, M_VFSCACHE);
}
+/*
+ * MPSAFE
+ */
void
cache_zero(struct nchandle *nch)
{
/*
* Ref and deref a namecache structure.
*
- * Warning: caller may hold an unrelated read spinlock, which means we can't
- * use read spinlocks here.
+ * The caller must specify a stable ncp pointer, typically meaning the
+ * ncp is already referenced but this can also occur indirectly through
+ * e.g. holding a lock on a direct child.
+ *
+ * WARNING: Caller may hold an unrelated read spinlock, which means we can't
+ * use read spinlocks here.
*
* MPSAFE if nch is
*/
atomic_add_int(&nch->mount->mnt_refs, 1);
}
+/*
+ * MPSAFE
+ */
void
cache_drop(struct nchandle *nch)
{
nch->mount = NULL;
}
+/*
+ * MPSAFE
+ */
void
cache_lock(struct nchandle *nch)
{
_cache_lock(nch->ncp);
}
+/*
+ * Relock nch1 given an unlocked nch1 and a locked nch2. The caller
+ * is responsible for checking both for validity on return as they
+ * may have become invalid.
+ *
+ * We have to deal with potential deadlocks here, just ping pong
+ * the lock until we get it (we will always block somewhere when
+ * looping so this is not cpu-intensive).
+ *
+ * which = 0 nch1 not locked, nch2 is locked
+ * which = 1 nch1 is locked, nch2 is not locked
+ */
+void
+cache_relock(struct nchandle *nch1, struct ucred *cred1,
+ struct nchandle *nch2, struct ucred *cred2)
+{
+ int which;
+
+ which = 0;
+
+ for (;;) {
+ if (which == 0) {
+ if (cache_lock_nonblock(nch1) == 0) {
+ cache_resolve(nch1, cred1);
+ break;
+ }
+ cache_unlock(nch2);
+ cache_lock(nch1);
+ cache_resolve(nch1, cred1);
+ which = 1;
+ } else {
+ if (cache_lock_nonblock(nch2) == 0) {
+ cache_resolve(nch2, cred2);
+ break;
+ }
+ cache_unlock(nch1);
+ cache_lock(nch2);
+ cache_resolve(nch2, cred2);
+ which = 0;
+ }
+ }
+}
+
+/*
+ * MPSAFE
+ */
int
cache_lock_nonblock(struct nchandle *nch)
{
}
+/*
+ * MPSAFE
+ */
void
cache_unlock(struct nchandle *nch)
{
*
* We want cache_get() to return a definitively usable vnode or a
* definitively unresolved ncp.
+ *
+ * MPSAFE
*/
static
struct namecache *
}
/*
- * This is a special form of _cache_get() which only succeeds if
+ * This is a special form of _cache_lock() which only succeeds if
* it can get a pristine, non-recursive lock. The caller must have
* already ref'd the ncp.
*
* On success the ncp will be locked, on failure it will not. The
* ref count does not change either way.
*
- * We want _cache_get_nonblock() (on success) to return a definitively
+ * We want _cache_lock_special() (on success) to return a definitively
* usable vnode or a definitively unresolved ncp.
+ *
+ * MPSAFE
*/
static int
-_cache_get_nonblock(struct namecache *ncp)
+_cache_lock_special(struct namecache *ncp)
{
if (_cache_lock_nonblock(ncp) == 0) {
- if (ncp->nc_exlocks == 1) {
+ if ((ncp->nc_exlocks & ~NC_EXLOCK_REQ) == 1) {
if (ncp->nc_vp && (ncp->nc_vp->v_flag & VRECLAIMED))
_cache_setunresolved(ncp);
return(0);
/*
* NOTE: The same nchandle can be passed for both arguments.
+ *
+ * MPSAFE
*/
void
cache_get(struct nchandle *nch, struct nchandle *target)
atomic_add_int(&target->mount->mnt_refs, 1);
}
-#if 0
-int
-cache_get_nonblock(struct nchandle *nch)
-{
- int error;
-
- if ((error = _cache_get_nonblock(nch->ncp)) == 0)
- atomic_add_int(&nch->mount->mnt_refs, 1);
- return (error);
-}
-#endif
-
+/*
+ * MPSAFE
+ */
static __inline
void
_cache_put(struct namecache *ncp)
_cache_drop(ncp);
}
+/*
+ * MPSAFE
+ */
void
cache_put(struct nchandle *nch)
{
* vnode is NULL, a negative cache entry is created.
*
* The ncp should be locked on entry and will remain locked on return.
+ *
+ * MPSAFE
*/
static
void
_cache_setvp(struct mount *mp, struct namecache *ncp, struct vnode *vp)
{
KKASSERT(ncp->nc_flag & NCF_UNRESOLVED);
+
if (vp != NULL) {
/*
* Any vp associated with an ncp which has children must
*/
ncp->nc_vp = NULL;
spin_lock_wr(&ncspin);
- lwkt_token_init(&vfs_token);
TAILQ_INSERT_TAIL(&ncneglist, ncp, nc_vnode);
++numneg;
spin_unlock_wr(&ncspin);
ncp->nc_flag &= ~NCF_UNRESOLVED;
}
+/*
+ * MPSAFE
+ */
void
cache_setvp(struct nchandle *nch, struct vnode *vp)
{
_cache_setvp(nch->mount, nch->ncp, vp);
}
+/*
+ * MPSAFE
+ */
void
cache_settimeout(struct nchandle *nch, int nticks)
{
* avoid complex namespace operations. This disconnects a directory vnode
* from its namecache and can cause the OLDAPI and NEWAPI to get out of
* sync.
+ *
+ * MPSAFE
*/
static
void
* set a resolved cache element to unresolved if it has timed out
* or if it is a negative cache hit and the mount point namecache_gen
* has changed.
+ *
+ * MPSAFE
*/
static __inline void
_cache_auto_unresolve(struct mount *mp, struct namecache *ncp)
}
}
+/*
+ * MPSAFE
+ */
void
cache_setunresolved(struct nchandle *nch)
{
* looking for matches. This flag tells the lookup code when it must
* check for a mount linkage and also prevents the directories in question
* from being deleted or renamed.
+ *
+ * MPSAFE
*/
static
int
return(0);
}
+/*
+ * MPSAFE
+ */
void
cache_clrmountpt(struct nchandle *nch)
{
* Invalidate portions of the namecache topology given a starting entry.
* The passed ncp is set to an unresolved state and:
*
- * The passed ncp must be locked.
+ * The passed ncp must be referencxed and locked. The routine may unlock
+ * and relock ncp several times, and will recheck the children and loop
+ * to catch races. When done the passed ncp will be returned with the
+ * reference and lock intact.
*
* CINV_DESTROY - Set a flag in the passed ncp entry indicating
* that the physical underlying nodes have been
* cleaning out any unreferenced nodes in the topology
* from the leaves up as the recursion backs out.
*
- * Note that the topology for any referenced nodes remains intact.
+ * Note that the topology for any referenced nodes remains intact, but
+ * the nodes will be marked as having been destroyed and will be set
+ * to an unresolved state.
*
* It is possible for cache_inval() to race a cache_resolve(), meaning that
* the namecache entry may not actually be invalidated on return if it was
* node using a depth-first algorithm in order to allow multiple deep
* recursions to chain through each other, then we restart the invalidation
* from scratch.
+ *
+ * MPSAFE
*/
struct cinvtrack {
return(_cache_inval(nch->ncp, flags));
}
+/*
+ * Helper for _cache_inval(). The passed ncp is refd and locked and
+ * remains that way on return, but may be unlocked/relocked multiple
+ * times by the routine.
+ */
static int
_cache_inval_internal(struct namecache *ncp, int flags, struct cinvtrack *track)
{
struct namecache *kid;
struct namecache *nextkid;
- lwkt_tokref nlock;
int rcnt = 0;
KKASSERT(ncp->nc_exlocks);
_cache_setunresolved(ncp);
- lwkt_gettoken(&nlock, &vfs_token);
if (flags & CINV_DESTROY)
ncp->nc_flag |= NCF_DESTROYED;
if ((flags & CINV_CHILDREN) &&
--track->depth;
_cache_lock(ncp);
}
- lwkt_reltoken(&nlock);
/*
* Someone could have gotten in there while ncp was unlocked,
*
* In addition, the v_namecache list itself must be locked via
* the vnode's spinlock.
+ *
+ * MPSAFE
*/
int
cache_inval_vp(struct vnode *vp, int flags)
_cache_inval(ncp, flags);
_cache_put(ncp); /* also releases reference */
ncp = next;
+ spin_lock_wr(&vp->v_spinlock);
if (ncp && ncp->nc_vp != vp) {
+ spin_unlock_wr(&vp->v_spinlock);
kprintf("Warning: cache_inval_vp: race-B detected on "
"%s\n", ncp->nc_name);
_cache_drop(ncp);
goto restart;
}
- spin_lock_wr(&vp->v_spinlock);
}
spin_unlock_wr(&vp->v_spinlock);
return(TAILQ_FIRST(&vp->v_namecache) != NULL);
*
* Return 0 on success, non-zero if not all namecache records could be
* disassociated from the vnode (for various reasons).
+ *
+ * MPSAFE
*/
int
cache_inval_vp_nonblock(struct vnode *vp)
_cache_drop(ncp);
if (next)
_cache_drop(next);
- break;
+ goto done;
}
if (ncp->nc_vp != vp) {
kprintf("Warning: cache_inval_vp: race-A detected on "
_cache_put(ncp);
if (next)
_cache_drop(next);
- break;
+ goto done;
}
_cache_inval(ncp, 0);
_cache_put(ncp); /* also releases reference */
ncp = next;
+ spin_lock_wr(&vp->v_spinlock);
if (ncp && ncp->nc_vp != vp) {
+ spin_unlock_wr(&vp->v_spinlock);
kprintf("Warning: cache_inval_vp: race-B detected on "
"%s\n", ncp->nc_name);
_cache_drop(ncp);
- break;
+ goto done;
}
- spin_lock_wr(&vp->v_spinlock);
}
spin_unlock_wr(&vp->v_spinlock);
+done:
return(TAILQ_FIRST(&vp->v_namecache) != NULL);
}
* Because there may be references to the source ncp we cannot copy its
* contents to the target. Instead the source ncp is relinked as the target
* and the target ncp is removed from the namecache topology.
+ *
+ * MPSAFE
*/
void
cache_rename(struct nchandle *fnch, struct nchandle *tnch)
{
struct namecache *fncp = fnch->ncp;
struct namecache *tncp = tnch->ncp;
+ struct namecache *tncp_par;
+ struct nchash_head *nchpp;
+ u_int32_t hash;
char *oname;
- lwkt_tokref nlock;
- lwkt_gettoken(&nlock, &vfs_token);
- _cache_setunresolved(tncp);
+ /*
+ * Rename fncp (unlink)
+ */
_cache_unlink_parent(fncp);
- _cache_link_parent(fncp, tncp->nc_parent);
- _cache_unlink_parent(tncp);
oname = fncp->nc_name;
fncp->nc_name = tncp->nc_name;
fncp->nc_nlen = tncp->nc_nlen;
+ tncp_par = tncp->nc_parent;
+ _cache_hold(tncp_par);
+ _cache_lock(tncp_par);
+
+ /*
+ * Rename fncp (relink)
+ */
+ hash = fnv_32_buf(fncp->nc_name, fncp->nc_nlen, FNV1_32_INIT);
+ hash = fnv_32_buf(&tncp_par, sizeof(tncp_par), hash);
+ nchpp = NCHHASH(hash);
+
+ spin_lock_wr(&nchpp->spin);
+ _cache_link_parent(fncp, tncp_par, nchpp);
+ spin_unlock_wr(&nchpp->spin);
+
+ _cache_put(tncp_par);
+
+ /*
+ * Get rid of the overwritten tncp (unlink)
+ */
+ _cache_setunresolved(tncp);
+ _cache_unlink_parent(tncp);
tncp->nc_name = NULL;
tncp->nc_nlen = 0;
- if (fncp->nc_head)
- _cache_rehash(fncp);
- if (tncp->nc_head)
- _cache_rehash(tncp);
- lwkt_reltoken(&nlock);
if (oname)
kfree(oname, M_VFSCACHE);
/*
* vget the vnode associated with the namecache entry. Resolve the namecache
- * entry if necessary and deal with namecache/vp races. The passed ncp must
- * be referenced and may be locked. The ncp's ref/locking state is not
- * effected by this call.
+ * entry if necessary. The passed ncp must be referenced and locked.
*
* lk_type may be LK_SHARED, LK_EXCLUSIVE. A ref'd, possibly locked
* (depending on the passed lk_type) will be returned in *vpp with an error
* cache hit and there is no vnode to retrieve, but other errors can occur
* too.
*
- * The main race we have to deal with are namecache zaps. The ncp itself
- * will not disappear since it is referenced, and it turns out that the
- * validity of the vp pointer can be checked simply by rechecking the
- * contents of ncp->nc_vp.
+ * The vget() can race a reclaim. If this occurs we re-resolve the
+ * namecache entry.
+ *
+ * There are numerous places in the kernel where vget() is called on a
+ * vnode while one or more of its namecache entries is locked. Releasing
+ * a vnode never deadlocks against locked namecache entries (the vnode
+ * will not get recycled while referenced ncp's exist). This means we
+ * can safely acquire the vnode. In fact, we MUST NOT release the ncp
+ * lock when acquiring the vp lock or we might cause a deadlock.
+ *
+ * MPSAFE
*/
int
cache_vget(struct nchandle *nch, struct ucred *cred,
int error;
ncp = nch->ncp;
+ KKASSERT(ncp->nc_locktd == curthread);
again:
vp = NULL;
- if (ncp->nc_flag & NCF_UNRESOLVED) {
- _cache_lock(ncp);
+ if (ncp->nc_flag & NCF_UNRESOLVED)
error = cache_resolve(nch, cred);
- _cache_unlock(ncp);
- } else {
+ else
error = 0;
- }
+
if (error == 0 && (vp = ncp->nc_vp) != NULL) {
- /*
- * Accessing the vnode from the namecache is a bit
- * dangerous. Because there are no refs on the vnode, it
- * could be in the middle of a reclaim.
- */
- if (vp->v_flag & VRECLAIMED) {
- kprintf("Warning: vnode reclaim race detected in cache_vget on %p (%s)\n", vp, ncp->nc_name);
- _cache_lock(ncp);
- _cache_setunresolved(ncp);
- _cache_unlock(ncp);
- goto again;
- }
error = vget(vp, lk_type);
if (error) {
- if (vp != ncp->nc_vp)
+ /*
+ * VRECLAIM race
+ */
+ if (error == ENOENT) {
+ kprintf("Warning: vnode reclaim race detected "
+ "in cache_vget on %p (%s)\n",
+ vp, ncp->nc_name);
+ _cache_setunresolved(ncp);
goto again;
+ }
+
+ /*
+ * Not a reclaim race, some other error.
+ */
+ KKASSERT(ncp->nc_vp == vp);
vp = NULL;
- } else if (vp != ncp->nc_vp) {
- vput(vp);
- goto again;
- } else if (vp->v_flag & VRECLAIMED) {
- panic("vget succeeded on a VRECLAIMED node! vp %p", vp);
+ } else {
+ KKASSERT(ncp->nc_vp == vp);
+ KKASSERT((vp->v_flag & VRECLAIMED) == 0);
}
}
if (error == 0 && vp == NULL)
int error;
ncp = nch->ncp;
-
+ KKASSERT(ncp->nc_locktd == curthread);
again:
vp = NULL;
- if (ncp->nc_flag & NCF_UNRESOLVED) {
- _cache_lock(ncp);
+ if (ncp->nc_flag & NCF_UNRESOLVED)
error = cache_resolve(nch, cred);
- _cache_unlock(ncp);
- } else {
+ else
error = 0;
- }
+
if (error == 0 && (vp = ncp->nc_vp) != NULL) {
- /*
- * Since we did not obtain any locks, a cache zap
- * race can occur here if the vnode is in the middle
- * of being reclaimed and has not yet been able to
- * clean out its cache node. If that case occurs,
- * we must lock and unresolve the cache, then loop
- * to retry.
- */
- if ((error = vget(vp, LK_SHARED)) != 0) {
+ error = vget(vp, LK_SHARED);
+ if (error) {
+ /*
+ * VRECLAIM race
+ */
if (error == ENOENT) {
- kprintf("Warning: vnode reclaim race detected on cache_vref %p (%s)\n", vp, ncp->nc_name);
- _cache_lock(ncp);
+ kprintf("Warning: vnode reclaim race detected "
+ "in cache_vget on %p (%s)\n",
+ vp, ncp->nc_name);
_cache_setunresolved(ncp);
- _cache_unlock(ncp);
goto again;
}
- /* fatal error */
+
+ /*
+ * Not a reclaim race, some other error.
+ */
+ KKASSERT(ncp->nc_vp == vp);
+ vp = NULL;
} else {
+ KKASSERT(ncp->nc_vp == vp);
+ KKASSERT((vp->v_flag & VRECLAIMED) == 0);
/* caller does not want a lock */
vn_unlock(vp);
}
* We have to leave par unlocked when vget()ing dvp to avoid a deadlock,
* so use vhold()/vdrop() while holding the lock to prevent dvp from
* getting destroyed.
+ *
+ * MPSAFE - Note vhold() is allowed when dvp has 0 refs if we hold a
+ * lock on the ncp in question..
*/
static struct vnode *
cache_dvpref(struct namecache *ncp)
dvp = NULL;
if ((par = ncp->nc_parent) != NULL) {
_cache_hold(par);
- if (_cache_lock_nonblock(par) == 0) {
- if ((par->nc_flag & NCF_UNRESOLVED) == 0) {
- if ((dvp = par->nc_vp) != NULL)
- vhold(dvp);
- }
- _cache_unlock(par);
- if (dvp) {
- if (vget(dvp, LK_SHARED) == 0) {
- vn_unlock(dvp);
- vdrop(dvp);
- /* return refd, unlocked dvp */
- } else {
- vdrop(dvp);
- dvp = NULL;
- }
+ _cache_lock(par);
+ if ((par->nc_flag & NCF_UNRESOLVED) == 0) {
+ if ((dvp = par->nc_vp) != NULL)
+ vhold(dvp);
+ }
+ _cache_unlock(par);
+ if (dvp) {
+ if (vget(dvp, LK_SHARED) == 0) {
+ vn_unlock(dvp);
+ vdrop(dvp);
+ /* return refd, unlocked dvp */
+ } else {
+ vdrop(dvp);
+ dvp = NULL;
}
}
_cache_drop(par);
vat.va_blocksize = 0;
if ((error = VOP_GETATTR(dvp, &vat)) != 0)
return (error);
- if ((error = cache_vref(nch, cred, &pvp)) != 0)
+ cache_lock(nch);
+ error = cache_vref(nch, cred, &pvp);
+ cache_unlock(nch);
+ if (error)
return (error);
if (ncvp_debug) {
kprintf("inefficient_scan: directory iosize %ld "
cache_zap(struct namecache *ncp)
{
struct namecache *par;
- struct spinlock *hspin;
struct vnode *dropvp;
- lwkt_tokref nlock;
int refs;
/*
/*
* Acquire locks
*/
- lwkt_gettoken(&nlock, &vfs_token);
- hspin = NULL;
- if (ncp->nc_head) {
- hspin = &ncp->nc_head->spin;
- spin_lock_wr(hspin);
+ if ((par = ncp->nc_parent) != NULL) {
+ _cache_hold(par);
+ _cache_lock(par);
+ spin_lock_wr(&ncp->nc_head->spin);
}
/*
if (refs == 1 && TAILQ_EMPTY(&ncp->nc_list))
break;
if (atomic_cmpset_int(&ncp->nc_refs, refs, refs - 1)) {
- if (hspin)
- spin_unlock_wr(hspin);
- lwkt_reltoken(&nlock);
+ if (par) {
+ spin_unlock_wr(&ncp->nc_head->spin);
+ _cache_put(par);
+ }
_cache_unlock(ncp);
return(NULL);
}
+ cpu_pause();
}
/*
* drop a ref on the parent's vp if the parent's list becomes
* empty.
*/
- if (ncp->nc_head) {
- LIST_REMOVE(ncp, nc_hash);
- ncp->nc_head = NULL;
- }
dropvp = NULL;
- if ((par = ncp->nc_parent) != NULL) {
- par = _cache_hold(par);
- TAILQ_REMOVE(&par->nc_list, ncp, nc_entry);
- ncp->nc_parent = NULL;
+ if (par) {
+ struct nchash_head *nchpp = ncp->nc_head;
+ KKASSERT(nchpp != NULL);
+ LIST_REMOVE(ncp, nc_hash);
+ TAILQ_REMOVE(&par->nc_list, ncp, nc_entry);
if (par->nc_vp && TAILQ_EMPTY(&par->nc_list))
dropvp = par->nc_vp;
+ ncp->nc_head = NULL;
+ ncp->nc_parent = NULL;
+ spin_unlock_wr(&nchpp->spin);
+ _cache_unlock(par);
+ } else {
+ KKASSERT(ncp->nc_head == NULL);
}
/*
* ncp should not have picked up any refs. Physically
* destroy the ncp.
*/
- if (hspin)
- spin_unlock_wr(hspin);
- lwkt_reltoken(&nlock);
KKASSERT(ncp->nc_refs == 1);
atomic_add_int(&numunres, -1);
/* _cache_unlock(ncp) not required */
/*
* NEW NAMECACHE LOOKUP API
*
- * Lookup an entry in the cache. A locked, referenced, non-NULL
- * entry is *always* returned, even if the supplied component is illegal.
+ * Lookup an entry in the namecache. The passed par_nch must be referenced
+ * and unlocked. A referenced and locked nchandle with a non-NULL nch.ncp
+ * is ALWAYS returned, eve if the supplied component is illegal.
+ *
* The resulting namecache entry should be returned to the system with
- * cache_put() or _cache_unlock() + cache_drop().
+ * cache_put() or cache_unlock() + cache_drop().
*
* namecache locks are recursive but care must be taken to avoid lock order
- * reversals.
+ * reversals (hence why the passed par_nch must be unlocked). Locking
+ * rules are to order for parent traversals, not for child traversals.
*
* Nobody else will be able to manipulate the associated namespace (e.g.
* create, delete, rename, rename-target) until the caller unlocks the
struct mount *mp;
u_int32_t hash;
globaldata_t gd;
- lwkt_tokref nlock;
+ int par_locked;
numcalls++;
gd = mycpu;
mp = par_nch->mount;
+ par_locked = 0;
+
+ /*
+ * This is a good time to call it, no ncp's are locked by
+ * the caller or us.
+ */
+ _cache_hysteresis();
/*
* Try to locate an existing entry
) {
_cache_hold(ncp);
spin_unlock_wr(&nchpp->spin);
- if (_cache_get_nonblock(ncp) == 0) {
+ if (par_locked) {
+ _cache_unlock(par_nch->ncp);
+ par_locked = 0;
+ }
+ if (_cache_lock_special(ncp) == 0) {
_cache_auto_unresolve(mp, ncp);
if (new_ncp)
_cache_free(new_ncp);
goto restart;
}
}
- spin_unlock_wr(&nchpp->spin);
/*
* We failed to locate an entry, create a new entry and add it to
- * the cache. We have to relookup after possibly blocking in
- * malloc.
+ * the cache. The parent ncp must also be locked so we
+ * can link into it.
+ *
+ * We have to relookup after possibly blocking in kmalloc or
+ * when locking par_nch.
+ *
+ * NOTE: nlc_namelen can be 0 and nlc_nameptr NULL as a special
+ * mount case, in which case nc_name will be NULL.
*/
if (new_ncp == NULL) {
+ spin_unlock_wr(&nchpp->spin);
new_ncp = cache_alloc(nlc->nlc_namelen);
+ if (nlc->nlc_namelen) {
+ bcopy(nlc->nlc_nameptr, new_ncp->nc_name,
+ nlc->nlc_namelen);
+ new_ncp->nc_name[nlc->nlc_namelen] = 0;
+ }
+ goto restart;
+ }
+ if (par_locked == 0) {
+ spin_unlock_wr(&nchpp->spin);
+ _cache_lock(par_nch->ncp);
+ par_locked = 1;
goto restart;
}
-
- ncp = new_ncp;
/*
- * Initialize as a new UNRESOLVED entry, lock (non-blocking),
- * and link to the parent. The mount point is usually inherited
- * from the parent unless this is a special case such as a mount
- * point where nlc_namelen is 0. If nlc_namelen is 0 nc_name will
- * be NULL.
+ * WARNING! We still hold the spinlock. We have to set the hash
+ * table entry attomically.
*/
- if (nlc->nlc_namelen) {
- bcopy(nlc->nlc_nameptr, ncp->nc_name, nlc->nlc_namelen);
- ncp->nc_name[nlc->nlc_namelen] = 0;
- }
- nchpp = NCHHASH(hash); /* compiler optimization */
- spin_lock_wr(&nchpp->spin);
- LIST_INSERT_HEAD(&nchpp->list, ncp, nc_hash);
- ncp->nc_head = nchpp;
+ ncp = new_ncp;
+ _cache_link_parent(ncp, par_nch->ncp, nchpp);
spin_unlock_wr(&nchpp->spin);
- lwkt_gettoken(&nlock, &vfs_token);
- _cache_link_parent(ncp, par_nch->ncp);
- lwkt_reltoken(&nlock);
+ _cache_unlock(par_nch->ncp);
+ /* par_locked = 0 - not used */
found:
/*
* stats and namecache size management
++gd->gd_nchstats->ncs_goodhits;
else
++gd->gd_nchstats->ncs_neghits;
- _cache_hysteresis();
nch.mount = mp;
nch.ncp = ncp;
atomic_add_int(&nch.mount->mnt_refs, 1);
* Note that successful resolution does not necessarily return an error
* code of 0. If the ncp resolves to a negative cache hit then ENOENT
* will be returned.
+ *
+ * MPSAFE
*/
int
cache_resolve(struct nchandle *nch, struct ucred *cred)
{
+ struct namecache *par_tmp;
struct namecache *par;
struct namecache *ncp;
struct nchandle nctmp;
*/
if (ncp->nc_parent->nc_flag & NCF_DESTROYED)
return(ENOENT);
-
par = ncp->nc_parent;
- while (par->nc_parent && par->nc_parent->nc_vp == NULL)
- par = par->nc_parent;
+ _cache_hold(par);
+ _cache_lock(par);
+ while ((par_tmp = par->nc_parent) != NULL &&
+ par_tmp->nc_vp == NULL) {
+ _cache_hold(par_tmp);
+ _cache_lock(par_tmp);
+ _cache_put(par);
+ par = par_tmp;
+ }
if (par->nc_parent == NULL) {
kprintf("EXDEV case 2 %*.*s\n",
par->nc_nlen, par->nc_nlen, par->nc_name);
+ _cache_put(par);
return (EXDEV);
}
kprintf("[diagnostic] cache_resolve: had to recurse on %*.*s\n",
* be one of its parents. We resolve it anyway, the loop
* will handle any moves.
*/
- _cache_get(par);
+ _cache_get(par); /* additional hold/lock */
+ _cache_put(par); /* from earlier hold/lock */
if (par == nch->mount->mnt_ncmountpt.ncp) {
cache_resolve_mp(nch->mount);
} else if ((dvp = cache_dvpref(par)) == NULL) {
TAILQ_INSERT_TAIL(&ncneglist, ncp, nc_vnode);
_cache_hold(ncp);
spin_unlock_wr(&ncspin);
- if (_cache_get_nonblock(ncp) == 0) {
+ if (_cache_lock_special(ncp) == 0) {
ncp = cache_zap(ncp);
if (ncp)
_cache_drop(ncp);
}
/*
- * Rehash a ncp. Rehashing is typically required if the name changes (should
- * not generally occur) or the parent link changes. This function will
- * unhash the ncp if the ncp is no longer hashable.
- */
-static void
-_cache_rehash(struct namecache *ncp)
-{
- struct nchash_head *nchpp;
- u_int32_t hash;
-
- if ((nchpp = ncp->nc_head) != NULL) {
- spin_lock_wr(&nchpp->spin);
- LIST_REMOVE(ncp, nc_hash);
- ncp->nc_head = NULL;
- spin_unlock_wr(&nchpp->spin);
- }
- if (ncp->nc_nlen && ncp->nc_parent) {
- hash = fnv_32_buf(ncp->nc_name, ncp->nc_nlen, FNV1_32_INIT);
- hash = fnv_32_buf(&ncp->nc_parent,
- sizeof(ncp->nc_parent), hash);
- nchpp = NCHHASH(hash);
- spin_lock_wr(&nchpp->spin);
- LIST_INSERT_HEAD(&nchpp->list, ncp, nc_hash);
- ncp->nc_head = nchpp;
- spin_unlock_wr(&nchpp->spin);
- }
-}
-
-/*
* Name cache initialization, from vfsinit() when we are booting
*/
void
int i, slash_prefixed;
struct filedesc *fdp;
struct nchandle nch;
+ struct namecache *ncp;
numcwdcalls++;
bp = buf;
slash_prefixed = 0;
nch = fdp->fd_ncdir;
- while (nch.ncp && (nch.ncp != fdp->fd_nrdir.ncp ||
+ ncp = nch.ncp;
+ if (ncp)
+ _cache_hold(ncp);
+
+ while (ncp && (ncp != fdp->fd_nrdir.ncp ||
nch.mount != fdp->fd_nrdir.mount)
) {
/*
* of the current mount we have to skip to the mount point
* in the underlying filesystem.
*/
- if (nch.ncp == nch.mount->mnt_ncmountpt.ncp) {
+ if (ncp == nch.mount->mnt_ncmountpt.ncp) {
nch = nch.mount->mnt_ncmounton;
+ _cache_drop(ncp);
+ ncp = nch.ncp;
+ if (ncp)
+ _cache_hold(ncp);
continue;
}
/*
* Prepend the path segment
*/
- for (i = nch.ncp->nc_nlen - 1; i >= 0; i--) {
+ for (i = ncp->nc_nlen - 1; i >= 0; i--) {
if (bp == buf) {
numcwdfail4++;
*error = ERANGE;
- return(NULL);
+ bp = NULL;
+ goto done;
}
- *--bp = nch.ncp->nc_name[i];
+ *--bp = ncp->nc_name[i];
}
if (bp == buf) {
numcwdfail4++;
*error = ERANGE;
- return(NULL);
+ bp = NULL;
+ goto done;
}
*--bp = '/';
slash_prefixed = 1;
* Go up a directory. This isn't a mount point so we don't
* have to check again.
*/
- nch.ncp = nch.ncp->nc_parent;
+ while ((nch.ncp = ncp->nc_parent) != NULL) {
+ _cache_lock(ncp);
+ if (nch.ncp != ncp->nc_parent) {
+ _cache_unlock(ncp);
+ continue;
+ }
+ _cache_hold(nch.ncp);
+ _cache_unlock(ncp);
+ break;
+ }
+ _cache_drop(ncp);
+ ncp = nch.ncp;
}
- if (nch.ncp == NULL) {
+ if (ncp == NULL) {
numcwdfail2++;
*error = ENOENT;
- return(NULL);
+ bp = NULL;
+ goto done;
}
if (!slash_prefixed) {
if (bp == buf) {
numcwdfail4++;
*error = ERANGE;
- return(NULL);
+ bp = NULL;
+ goto done;
}
*--bp = '/';
}
numcwdfound++;
*error = 0;
+done:
+ if (ncp)
+ _cache_drop(ncp);
return (bp);
}
/*
* Thus begins the fullpath magic.
+ *
+ * The passed nchp is referenced but not locked.
*/
-
#undef STATNODE
#define STATNODE(name) \
static u_int name; \
STATNODE(numfullpathfound);
int
-cache_fullpath(struct proc *p, struct nchandle *nchp, char **retbuf, char **freebuf)
+cache_fullpath(struct proc *p, struct nchandle *nchp,
+ char **retbuf, char **freebuf)
{
struct nchandle fd_nrdir;
struct nchandle nch;
struct namecache *ncp;
- lwkt_tokref nlock;
struct mount *mp;
char *bp, *buf;
int slash_prefixed;
int i;
atomic_add_int(&numfullpathcalls, -1);
- lwkt_gettoken(&nlock, &vfs_token);
*retbuf = NULL;
*freebuf = NULL;
else
fd_nrdir = rootnch;
slash_prefixed = 0;
- cache_copy(nchp, &nch);
+ nch = *nchp;
ncp = nch.ncp;
+ if (ncp)
+ _cache_hold(ncp);
mp = nch.mount;
while (ncp && (ncp != fd_nrdir.ncp || mp != fd_nrdir.mount)) {
* of the current mount we have to skip to the mount point.
*/
if (ncp == mp->mnt_ncmountpt.ncp) {
- cache_drop(&nch);
- cache_copy(&mp->mnt_ncmounton, &nch);
+ nch = mp->mnt_ncmounton;
+ _cache_drop(ncp);
ncp = nch.ncp;
+ if (ncp)
+ _cache_hold(ncp);
mp = nch.mount;
continue;
}
/*
* Prepend the path segment
*/
- for (i = nch.ncp->nc_nlen - 1; i >= 0; i--) {
+ for (i = ncp->nc_nlen - 1; i >= 0; i--) {
if (bp == buf) {
numfullpathfail4++;
kfree(buf, M_TEMP);
error = ENOMEM;
goto done;
}
- *--bp = nch.ncp->nc_name[i];
+ *--bp = ncp->nc_name[i];
}
if (bp == buf) {
numfullpathfail4++;
* Go up a directory. This isn't a mount point so we don't
* have to check again.
*
- * We need the ncp's spinlock to safely access nc_parent.
+ * We can only safely access nc_parent with ncp held locked.
*/
- if ((nch.ncp = ncp->nc_parent) != NULL)
+ while ((nch.ncp = ncp->nc_parent) != NULL) {
+ _cache_lock(ncp);
+ if (nch.ncp != ncp->nc_parent) {
+ _cache_unlock(ncp);
+ continue;
+ }
_cache_hold(nch.ncp);
+ _cache_unlock(ncp);
+ break;
+ }
_cache_drop(ncp);
ncp = nch.ncp;
}
- if (nch.ncp == NULL) {
+ if (ncp == NULL) {
numfullpathfail2++;
kfree(buf, M_TEMP);
error = ENOENT;
*freebuf = buf;
error = 0;
done:
- cache_drop(&nch);
- lwkt_reltoken(&nlock);
+ if (ncp)
+ _cache_drop(ncp);
return(error);
}
}
vfsp = vfsconf_find_by_name("devfs");
- vp->v_flag |= VMOUNT;
+ vsetflags(vp, VMOUNT);
/*
* Allocate and initialize the filesystem.
nch.ncp->nc_flag |= NCF_ISMOUNTPT;
/* XXX get the root of the fs and cache_setvp(mnt_ncmountpt...) */
- vp->v_flag &= ~VMOUNT;
+ vclrflags(vp, VMOUNT);
mountlist_insert(mp, MNTINS_LAST);
vn_unlock(vp);
//checkdirs(&mp->mnt_ncmounton, &mp->mnt_ncmountpt);
vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops);
vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops);
vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops);
- vp->v_flag &= ~VMOUNT;
+ vclrflags(vp, VMOUNT);
mp->mnt_vfc->vfc_refcount--;
vfs_unbusy(mp);
kfree(mp, M_MOUNT);
*/
static TAILQ_HEAD(freelst, vnode) vnode_free_list;
static struct vnode vnode_free_mid;
+static struct spinlock vfs_spin = SPINLOCK_INITIALIZER(vfs_spin);
int freevnodes = 0;
SYSCTL_INT(_debug, OID_AUTO, freevnodes, CTLFLAG_RD,
{
TAILQ_INIT(&vnode_free_list);
TAILQ_INSERT_HEAD(&vnode_free_list, &vnode_free_mid, v_freelist);
+ spin_init(&vfs_spin);
}
/*
- * Inline helper functions. vbusy() and vfree() must be called while in a
- * critical section.
+ * Misc functions
+ */
+static __inline
+void
+_vsetflags(struct vnode *vp, int flags)
+{
+ atomic_set_int(&vp->v_flag, flags);
+}
+
+static __inline
+void
+_vclrflags(struct vnode *vp, int flags)
+{
+ atomic_clear_int(&vp->v_flag, flags);
+}
+
+void
+vsetflags(struct vnode *vp, int flags)
+{
+ _vsetflags(vp, flags);
+}
+
+void
+vclrflags(struct vnode *vp, int flags)
+{
+ _vclrflags(vp, flags);
+}
+
+/*
+ * Inline helper functions. vbusy() and vfree() must be called while
+ * vp->v_spinlock is held.
+ *
+ * WARNING! This functions is typically called with v_spinlock held.
*
- * Warning: must be callable if the caller holds a read spinlock to something
- * else, meaning we can't use read spinlocks here.
+ * MPSAFE
*/
static __inline
void
if ((ulong)vp == trackvnode)
kprintf("__vbusy %p %08x\n", vp, vp->v_flag);
#endif
+ spin_lock_wr(&vfs_spin);
TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
freevnodes--;
- vp->v_flag &= ~VFREE;
+ _vclrflags(vp, VFREE);
+ spin_unlock_wr(&vfs_spin);
}
+/*
+ * WARNING! This functions is typically called with v_spinlock held.
+ *
+ * MPSAFE
+ */
static __inline
void
__vfree(struct vnode *vp)
print_backtrace();
}
#endif
+ spin_lock_wr(&vfs_spin);
if (vp->v_flag & VRECLAIMED)
TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
else if (vp->v_flag & (VAGE0 | VAGE1))
else
TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
freevnodes++;
- vp->v_flag |= VFREE;
+ _vsetflags(vp, VFREE);
+ spin_unlock_wr(&vfs_spin);
}
+/*
+ * WARNING! This functions is typically called with v_spinlock held.
+ *
+ * MPSAFE
+ */
static __inline
void
__vfreetail(struct vnode *vp)
if ((ulong)vp == trackvnode)
kprintf("__vfreetail %p %08x\n", vp, vp->v_flag);
#endif
+ spin_lock_wr(&vfs_spin);
TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
freevnodes++;
- vp->v_flag |= VFREE;
+ _vsetflags(vp, VFREE);
+ spin_unlock_wr(&vfs_spin);
}
/*
*
* This routine is only valid if the vnode is already either VFREE or
* VCACHED, or if it can become VFREE or VCACHED via vnode_terminate().
+ *
+ * WARNING! This functions is typically called with v_spinlock held.
+ *
+ * MPSAFE
*/
static __inline boolean_t
vshouldfree(struct vnode *vp)
/*
* Add a ref to an active vnode. This function should never be called
* with an inactive vnode (use vget() instead).
+ *
+ * MPSAFE
*/
void
vref(struct vnode *vp)
*
* vdrop needs to check for a VCACHE->VFREE transition to catch cases
* where a vnode is held past its reclamation.
+ *
+ * MPSAFE
*/
void
vdrop(struct vnode *vp)
{
KKASSERT(vp->v_sysref.refcnt != 0 && vp->v_auxrefs > 0);
+ spin_lock_wr(&vp->v_spinlock);
atomic_subtract_int(&vp->v_auxrefs, 1);
if ((vp->v_flag & VCACHED) && vshouldfree(vp)) {
- vp->v_flag &= ~VCACHED;
+ _vclrflags(vp, VCACHED);
__vfree(vp);
}
+ spin_unlock_wr(&vp->v_spinlock);
}
/*
* v_auxrefs, we must interlock auxiliary references against termination
* via the VX lock mechanism. It is possible for a vnode to be reactivated
* while we were blocked on the lock.
+ *
+ * MPSAFE
*/
void
vnode_terminate(struct vnode *vp)
* or dirty pages in its cached VM object still present.
*/
if ((vp->v_flag & VINACTIVE) == 0) {
- vp->v_flag |= VINACTIVE;
+ _vsetflags(vp, VINACTIVE);
if (vp->v_mount)
VOP_INACTIVE(vp);
}
+ spin_lock_wr(&vp->v_spinlock);
KKASSERT((vp->v_flag & (VFREE|VCACHED)) == 0);
if (vshouldfree(vp))
__vfree(vp);
else
- vp->v_flag |= VCACHED; /* inactive but not yet free */
+ _vsetflags(vp, VCACHED); /* inactive but not yet free*/
+ spin_unlock_wr(&vp->v_spinlock);
vx_unlock(vp);
} else {
/*
* Physical vnode constructor / destructor. These are only executed on
* the backend of the objcache. They are NOT executed on every vnode
* allocation or deallocation.
+ *
+ * MPSAFE
*/
boolean_t
vnode_ctor(void *obj, void *private, int ocflags)
return(TRUE);
}
+/*
+ * MPSAFE
+ */
void
vnode_dtor(void *obj, void *private)
{
* These functions lock vnodes for reclamation and deactivation related
* activities. The caller must already be holding some sort of reference
* on the vnode.
+ *
+ * MPSAFE
*/
-
void
vx_lock(struct vnode *vp)
{
* These functions are MANDATORY for any code chain accessing a vnode
* whos activation state is not known.
*
+ * vget() can be called with LK_NOWAIT and will return EBUSY if the
+ * lock cannot be immediately acquired.
+ *
* vget()/vput() are used when reactivation is desired.
*
* vx_get() and vx_put() are used when reactivation is not desired.
* transitions and refs during termination are allowed here so
* call sysref directly.
*/
-
sysref_get(&vp->v_sysref);
if ((error = vn_lock(vp, flags)) != 0) {
/*
* sysref that was earmarking those cases and preventing
* the vnode from being destroyed. Our sysref is still held.
*/
+ spin_lock_wr(&vp->v_spinlock);
if (vp->v_flag & VFREE) {
__vbusy(vp);
+ spin_unlock_wr(&vp->v_spinlock);
sysref_put(&vp->v_sysref);
sysref_activate(&vp->v_sysref);
} else if (vp->v_flag & VCACHED) {
- vp->v_flag &= ~VCACHED;
+ _vclrflags(vp, VCACHED);
+ spin_unlock_wr(&vp->v_spinlock);
sysref_put(&vp->v_sysref);
sysref_activate(&vp->v_sysref);
} else {
- KKASSERT(sysref_isactive(&vp->v_sysref));
+ spin_unlock_wr(&vp->v_spinlock);
+ if (sysref_isinactive(&vp->v_sysref)) {
+ sysref_activate(&vp->v_sysref);
+ kprintf("Warning vp %p reactivation race\n",
+ vp);
+ }
}
- vp->v_flag &= ~VINACTIVE;
+ _vclrflags(vp, VINACTIVE);
error = 0;
}
return(error);
}
+/*
+ * MPSAFE
+ */
void
vput(struct vnode *vp)
{
/*
* XXX The vx_*() locks should use auxrefs, not the main reference counter.
+ *
+ * MPSAFE
*/
void
vx_get(struct vnode *vp)
lockmgr(&vp->v_lock, LK_EXCLUSIVE);
}
+/*
+ * MPSAFE
+ */
int
vx_get_nonblock(struct vnode *vp)
{
*
* vx_put needs to check for a VCACHE->VFREE transition to catch the
* case where e.g. vnlru issues a vgone*().
+ *
+ * MPSAFE
*/
void
vx_put(struct vnode *vp)
{
+ spin_lock_wr(&vp->v_spinlock);
if ((vp->v_flag & VCACHED) && vshouldfree(vp)) {
- vp->v_flag &= ~VCACHED;
+ _vclrflags(vp, VCACHED);
__vfree(vp);
}
+ spin_unlock_wr(&vp->v_spinlock);
lockmgr(&vp->v_lock, LK_RELEASE);
sysref_put(&vp->v_sysref);
}
/*
- * Misc functions
- */
-
-void
-vsetflags(struct vnode *vp, int flags)
-{
- crit_enter();
- vp->v_flag |= flags;
- crit_exit();
-}
-
-void
-vclrflags(struct vnode *vp, int flags)
-{
- crit_enter();
- vp->v_flag &= ~flags;
- crit_exit();
-}
-
-/*
* Try to reuse a vnode from the free list. NOTE: The returned vnode
* is not completely initialized.
+ *
+ * MPSAFE
*/
static
struct vnode *
*
* XXX NOT MP SAFE
*/
+ spin_lock_wr(&vfs_spin);
vp = TAILQ_FIRST(&vnode_free_list);
if (vp == &vnode_free_mid)
vp = TAILQ_NEXT(vp, v_freelist);
TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
TAILQ_INSERT_TAIL(&vnode_free_list,
vp, v_freelist);
+ spin_unlock_wr(&vfs_spin);
continue;
}
+ spin_unlock_wr(&vfs_spin);
#ifdef TRACKVNODE
if ((ulong)vp == trackvnode)
kprintf("allocfreevnode %p %08x\n", vp, vp->v_flag);
* All new vnodes set the VAGE flags. An open() of the vnode will
* decrement the (2-bit) flags. Vnodes which are opened several times
* are thus retained in the cache over vnodes which are merely stat()d.
+ *
+ * MPSAFE
*/
struct vnode *
allocvnode(int lktimeout, int lkflags)
return (vp);
}
+/*
+ * MPSAFE
+ */
int
freesomevnodes(int n)
{
/*
* Move a vnode from one mount queue to another.
+ *
+ * MPSAFE
*/
void
insmntque(struct vnode *vp, struct mount *mp)
nd->nl_flags &= ~NLC_NCPISLOCKED;
cache_unlock(&nd->nl_nch);
}
- cache_drop(&nd->nl_nch);
+ cache_drop(&nd->nl_nch); /* NULL's out the nch */
}
if (nd->nl_rootnch.ncp)
cache_drop(&nd->nl_rootnch);
void
nlookup_zero(struct nlookupdata *nd)
{
- bzero(nd, sizeof(struct nlookupdata));
+ bzero(nd, sizeof(struct nlookupdata));
}
/*
bzero(&nlc, sizeof(nlc));
/*
- * Setup for the loop. The current working namecache element must
- * be in a refd + unlocked state. This typically the case on entry except
- * when stringing nlookup()'s along in a chain, since nlookup() always
- * returns nl_nch in a locked state.
+ * Setup for the loop. The current working namecache element is
+ * always at least referenced. We lock it as required, but always
+ * return a locked, resolved namecache entry.
*/
nd->nl_loopcnt = 0;
- if (nd->nl_flags & NLC_NCPISLOCKED) {
- nd->nl_flags &= ~NLC_NCPISLOCKED;
- cache_unlock(&nd->nl_nch);
- }
- if (nd->nl_dvp ) {
+ if (nd->nl_dvp) {
vrele(nd->nl_dvp);
nd->nl_dvp = NULL;
}
*/
for (;;) {
/*
+ * Make sure nl_nch is locked so we can access the vnode, resolution
+ * state, etc.
+ */
+ if ((nd->nl_flags & NLC_NCPISLOCKED) == 0) {
+ nd->nl_flags |= NLC_NCPISLOCKED;
+ cache_lock(&nd->nl_nch);
+ }
+
+ /*
* Check if the root directory should replace the current
* directory. This is done at the start of a translation
* or after a symbolic link has been found. In other cases
do {
++ptr;
} while (*ptr == '/');
- cache_copy(&nd->nl_rootnch, &nch);
- cache_drop(&nd->nl_nch);
- nd->nl_nch = nch;
+ cache_get(&nd->nl_rootnch, &nch);
+ cache_put(&nd->nl_nch);
+ nd->nl_nch = nch; /* remains locked */
/*
* Fast-track termination. There is no parent directory of
* e.g. 'rmdir /' is not allowed.
*/
if (*ptr == 0) {
- if (nd->nl_flags & NLC_REFDVP) {
+ if (nd->nl_flags & NLC_REFDVP)
error = EPERM;
- } else {
- cache_lock(&nd->nl_nch);
- nd->nl_flags |= NLC_NCPISLOCKED;
+ else
error = 0;
- }
break;
}
continue;
nctmp = nctmp.mount->mnt_ncmounton;
nctmp.ncp = nctmp.ncp->nc_parent;
KKASSERT(nctmp.ncp != NULL);
- cache_copy(&nctmp, &nch); /* XXX hack */
- cache_get(&nch, &nch);
+ cache_hold(&nctmp);
+ cache_get(&nctmp, &nch);
cache_drop(&nctmp); /* NOTE: zero's nctmp */
}
wasdotordotdot = 2;
} else {
+ /*
+ * Must unlock nl_nch when traversing down the path.
+ */
+ cache_unlock(&nd->nl_nch);
+ nd->nl_flags &= ~NLC_NCPISLOCKED;
nch = cache_nlookup(&nd->nl_nch, &nlc);
while ((error = cache_resolve(&nch, nd->nl_cred)) == EAGAIN) {
kprintf("[diagnostic] nlookup: relookup %*.*s\n",
if ((par.ncp = nch.ncp->nc_parent) != NULL) {
par.mount = nch.mount;
cache_hold(&par);
- dflags = 0;
+ cache_lock(&par);
error = naccess(&par, 0, nd->nl_cred, &dflags);
- cache_drop(&par);
+ cache_put(&par);
}
}
+ if (nd->nl_flags & NLC_NCPISLOCKED) {
+ cache_unlock(&nd->nl_nch);
+ nd->nl_flags &= ~NLC_NCPISLOCKED;
+ }
/*
- * [end of subsection] ncp is locked and ref'd. nd->nl_nch is ref'd
+ * [end of subsection]
+ *
+ * nch is locked and referenced.
+ * nd->nl_nch is unlocked and referenced.
+ *
+ * nl_nch must be unlocked or we could chain lock to the root
+ * if a resolve gets stuck (e.g. in NFS).
*/
/*
if (*ptr && (nch.ncp->nc_flag & NCF_ISDIR)) {
cache_drop(&nd->nl_nch);
cache_unlock(&nch);
+ KKASSERT((nd->nl_flags & NLC_NCPISLOCKED) == 0);
nd->nl_nch = nch;
continue;
}
* If NLC_REFDVP is set acquire a referenced parent dvp.
*/
if (nd->nl_flags & NLC_REFDVP) {
+ cache_lock(&nd->nl_nch);
error = cache_vref(&nd->nl_nch, nd->nl_cred, &nd->nl_dvp);
+ cache_unlock(&nd->nl_nch);
if (error) {
kprintf("NLC_REFDVP: Cannot ref dvp of %p\n", nch.ncp);
cache_put(&nch);
* The directory sticky bit is tested for NLC_DELETE and NLC_RENAME_DST,
* the latter is only tested if the target exists.
*
- * The passed ncp may or may not be locked. The caller should use a
- * locked ncp on leaf lookups, especially for NLC_CREATE, NLC_RENAME_DST,
- * NLC_DELETE, and NLC_EXCL checks.
+ * The passed ncp must be referenced and locked.
*/
int
naccess(struct nchandle *nch, int nflags, struct ucred *cred, int *nflagsp)
int error;
int sticky;
+ ASSERT_NCH_LOCKED(nch);
if (nch->ncp->nc_flag & NCF_UNRESOLVED) {
- cache_lock(nch);
cache_resolve(nch, cred);
- cache_unlock(nch);
}
error = nch->ncp->nc_error;
/*
* Directory permissions checks. Silently ignore ENOENT if these
* tests pass. It isn't an error.
+ *
+ * We have to lock nch.ncp to safely resolve nch.ncp->nc_parent
*/
if (nflags & (NLC_CREATE | NLC_DELETE | NLC_RENAME_SRC | NLC_RENAME_DST)) {
if (((nflags & NLC_CREATE) && nch->ncp->nc_vp == NULL) ||
((nflags & NLC_RENAME_SRC) && nch->ncp->nc_vp != NULL) ||
(nflags & NLC_RENAME_DST)
) {
- lwkt_tokref nlock;
struct nchandle par;
- lwkt_gettoken(&nlock, &vfs_token);
if ((par.ncp = nch->ncp->nc_parent) == NULL) {
if (error != EAGAIN)
error = EINVAL;
} else if (error == 0 || error == ENOENT) {
par.mount = nch->mount;
- cache_hold(&par);
sticky = 0;
+ cache_hold(&par);
+ cache_lock(&par);
error = naccess(&par, NLC_WRITE, cred, NULL);
- cache_drop(&par);
+ cache_put(&par);
}
- lwkt_reltoken(&nlock);
}
}
#include <sys/buf2.h>
#include <sys/thread2.h>
#include <sys/sysref2.h>
+#include <sys/mplock2.h>
static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure");
bp->b_flags &= ~B_HASHED;
}
if ((vp->v_flag & VONWORKLST) && RB_EMPTY(&vp->v_rbdirty_tree)) {
- vp->v_flag &= ~VONWORKLST;
+ vclrflags(vp, VONWORKLST);
LIST_REMOVE(vp, v_synclist);
}
bp->b_vp = NULL;
}
if ((vp->v_flag & VONWORKLST) &&
RB_EMPTY(&vp->v_rbdirty_tree)) {
- vp->v_flag &= ~VONWORKLST;
+ vclrflags(vp, VONWORKLST);
LIST_REMOVE(vp, v_synclist);
}
}
*/
if (vp->v_flag & VRECLAIMED)
return;
- vp->v_flag |= VRECLAIMED;
+ vsetflags(vp, VRECLAIMED);
/*
* Scrap the vfs cache
*
* This can occur if a file with a link count of 0 needs to be
* truncated.
+ *
+ * If the vnode is already dead don't try to deactivate it.
*/
if ((vp->v_flag & VINACTIVE) == 0) {
- vp->v_flag |= VINACTIVE;
- VOP_INACTIVE(vp);
+ vsetflags(vp, VINACTIVE);
+ if (vp->v_mount)
+ VOP_INACTIVE(vp);
vinvalbuf(vp, V_SAVE, 0, 0);
}
} else {
vm_pager_deallocate(object);
}
- vp->v_flag &= ~VOBJBUF;
+ vclrflags(vp, VOBJBUF);
}
KKASSERT((vp->v_flag & VOBJBUF) == 0);
/*
- * Reclaim the vnode.
+ * Reclaim the vnode if not already dead.
*/
- if (VOP_RECLAIM(vp))
+ if (vp->v_mount && VOP_RECLAIM(vp))
panic("vclean: cannot reclaim");
/*
* as inactive or reclaimed.
*/
if (active && (flags & DOCLOSE)) {
- vp->v_flag &= ~(VINACTIVE|VRECLAIMED);
+ vclrflags(vp, VINACTIVE | VRECLAIMED);
}
}
* Instead, it happens automatically when the caller releases the VX lock
* (assuming there aren't any other references).
*/
-
void
vgone_vxlocked(struct vnode *vp)
{
*/
KKASSERT(vp->v_lock.lk_exclusivecount == 1);
+ get_mplock();
+
/*
* Clean out the filesystem specific data and set the VRECLAIMED
* bit. Also deactivate the vnode if necessary.
* Set us to VBAD
*/
vp->v_type = VBAD;
+ rel_mplock();
}
/*
}
}
KASSERT(vp->v_object != NULL, ("vinitvmio: NULL object"));
- vp->v_flag |= VOBJBUF;
+ vsetflags(vp, VOBJBUF);
return (error);
}
slot = (syncer_delayno + delay) & syncer_mask;
LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist);
- vp->v_flag |= VONWORKLST;
+ vsetflags(vp, VONWORKLST);
lwkt_reltoken(&ilock);
}
KKASSERT(vp->v_mount->mnt_syncer != vp);
if (vp->v_flag & VONWORKLST) {
LIST_REMOVE(vp, v_synclist);
- vp->v_flag &= ~VONWORKLST;
+ vclrflags(vp, VONWORKLST);
}
lwkt_reltoken(&ilock);
error = EBUSY;
goto done;
}
- vp->v_flag |= VMOUNT;
+ vsetflags(vp, VMOUNT);
mp->mnt_flag |=
uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
vn_unlock(vp);
error = EBUSY;
goto done;
}
- vp->v_flag |= VMOUNT;
+ vsetflags(vp, VMOUNT);
/*
* Allocate and initialize the filesystem.
mp->mnt_kern_flag = flag2;
}
vfs_unbusy(mp);
- vp->v_flag &= ~VMOUNT;
+ vclrflags(vp, VMOUNT);
vrele(vp);
cache_drop(&nch);
goto done;
nch.ncp->nc_flag |= NCF_ISMOUNTPT;
/* XXX get the root of the fs and cache_setvp(mnt_ncmountpt...) */
- vp->v_flag &= ~VMOUNT;
+ vclrflags(vp, VMOUNT);
mountlist_insert(mp, MNTINS_LAST);
vn_unlock(vp);
checkdirs(&mp->mnt_ncmounton, &mp->mnt_ncmountpt);
vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops);
vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops);
vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops);
- vp->v_flag &= ~VMOUNT;
+ vclrflags(vp, VMOUNT);
mp->mnt_vfc->vfc_refcount--;
vfs_unbusy(mp);
kfree(mp, M_MOUNT);
int
sys_open(struct open_args *uap)
{
+ CACHE_MPLOCK_DECLARE;
struct nlookupdata nd;
int error;
- get_mplock();
+ CACHE_GETMPLOCK1();
error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
if (error == 0) {
error = kern_open(&nd, uap->flags,
uap->mode, &uap->sysmsg_result);
}
nlookup_done(&nd);
- rel_mplock();
+ CACHE_RELMPLOCK();
return (error);
}
int
sys_openat(struct openat_args *uap)
{
+ CACHE_MPLOCK_DECLARE;
struct nlookupdata nd;
int error;
struct file *fp;
- get_mplock();
+ CACHE_GETMPLOCK1();
error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0);
if (error == 0) {
error = kern_open(&nd, uap->flags, uap->mode,
&uap->sysmsg_result);
}
nlookup_done_at(&nd, fp);
- rel_mplock();
+ CACHE_RELMPLOCK();
return (error);
}
}
+/*
+ * MPSAFE
+ */
int
kern_stat(struct nlookupdata *nd, struct stat *st)
{
*
* Get file status; this version follows links.
*
- * MPALMOSTSAFE
+ * MPSAFE
*/
int
sys_stat(struct stat_args *uap)
{
+ CACHE_MPLOCK_DECLARE;
struct nlookupdata nd;
struct stat st;
int error;
- get_mplock();
+ CACHE_GETMPLOCK1();
error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
if (error == 0) {
error = kern_stat(&nd, &st);
error = copyout(&st, uap->ub, sizeof(*uap->ub));
}
nlookup_done(&nd);
- rel_mplock();
+ CACHE_RELMPLOCK();
return (error);
}
int
sys_lstat(struct lstat_args *uap)
{
+ CACHE_MPLOCK_DECLARE;
struct nlookupdata nd;
struct stat st;
int error;
- get_mplock();
+ CACHE_GETMPLOCK1();
error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
if (error == 0) {
error = kern_stat(&nd, &st);
error = copyout(&st, uap->ub, sizeof(*uap->ub));
}
nlookup_done(&nd);
- rel_mplock();
+ CACHE_RELMPLOCK();
return (error);
}
int
sys_fstatat(struct fstatat_args *uap)
{
+ CACHE_MPLOCK_DECLARE;
struct nlookupdata nd;
struct stat st;
int error;
flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW;
- get_mplock();
+ CACHE_GETMPLOCK1();
error = nlookup_init_at(&nd, &fp, uap->fd, uap->path,
UIO_USERSPACE, flags);
if (error == 0) {
error = copyout(&st, uap->sb, sizeof(*uap->sb));
}
nlookup_done_at(&nd, fp);
- rel_mplock();
+ CACHE_RELMPLOCK();
return (error);
}
}
/*
- * relock the source ncp. NOTE AFTER RELOCKING: the source ncp
- * may have become invalid while it was unlocked, nc_vp and nc_mount
- * could be NULL.
+ * Relock the source ncp. cache_relock() will deal with any
+ * deadlocks against the already-locked tond and will also
+ * make sure both are resolved.
+ *
+ * NOTE AFTER RELOCKING: The source or target ncp may have become
+ * invalid while they were unlocked, nc_vp and nc_mount could
+ * be NULL.
*/
- if (cache_lock_nonblock(&fromnd->nl_nch) == 0) {
- cache_resolve(&fromnd->nl_nch, fromnd->nl_cred);
- } else if (fromnd->nl_nch.ncp > tond->nl_nch.ncp) {
- cache_lock(&fromnd->nl_nch);
- cache_resolve(&fromnd->nl_nch, fromnd->nl_cred);
- } else {
- cache_unlock(&tond->nl_nch);
- cache_lock(&fromnd->nl_nch);
- cache_resolve(&fromnd->nl_nch, fromnd->nl_cred);
- cache_lock(&tond->nl_nch);
- cache_resolve(&tond->nl_nch, tond->nl_cred);
- }
+ cache_relock(&fromnd->nl_nch, fromnd->nl_cred,
+ &tond->nl_nch, tond->nl_cred);
fromnd->nl_flags |= NLC_NCPISLOCKED;
/*
* You cannot rename a source into itself or a subdirectory of itself.
* We check this by travsersing the target directory upwards looking
* for a match against the source.
+ *
+ * XXX MPSAFE
*/
if (error == 0) {
for (ncp = tnchd.ncp; ncp; ncp = ncp->nc_parent) {
#include <sys/thread2.h>
#include <sys/mplock2.h>
-#define VFS_MPLOCK_DECLARE struct lwkt_tokref xlock; int xlock_mpsafe
-
-#define VFS_MPLOCK(mp) VFS_MPLOCK_FLAG(mp, MNTK_MPSAFE)
-
-#define VFS_MPLOCK_FLAG(mp, flag) \
- do { \
- if (mp->mnt_kern_flag & flag) { \
- xlock_mpsafe = 1; \
- } else { \
- get_mplock(); /* TEMPORARY */ \
- lwkt_gettoken(&xlock, &mp->mnt_token); \
- xlock_mpsafe = 0; \
- } \
- } while (0)
-
-#define VFS_MPUNLOCK(mp) \
- do { \
- if (xlock_mpsafe == 0) { \
- lwkt_reltoken(&xlock); \
- rel_mplock(); /* TEMPORARY */ \
- } \
- } while(0)
-
/*
* MPSAFE
*/
VFS_MPLOCK_DECLARE;
int error;
- VFS_MPLOCK(mp);
+ VFS_MPLOCK1(mp);
error = (mp->mnt_op->vfs_mount)(mp, path, data, cred);
VFS_MPUNLOCK(mp);
return (error);
VFS_MPLOCK_DECLARE;
int error;
- VFS_MPLOCK(mp);
+ VFS_MPLOCK1(mp);
error = (mp->mnt_op->vfs_start)(mp, flags);
VFS_MPUNLOCK(mp);
return (error);
VFS_MPLOCK_DECLARE;
int error;
- VFS_MPLOCK(mp);
+ VFS_MPLOCK1(mp);
error = (mp->mnt_op->vfs_unmount)(mp, mntflags);
VFS_MPUNLOCK(mp);
return (error);
VFS_MPLOCK_DECLARE;
int error;
- VFS_MPLOCK(mp);
+ VFS_MPLOCK1(mp);
error = (mp->mnt_op->vfs_root)(mp, vpp);
VFS_MPUNLOCK(mp);
return (error);
VFS_MPLOCK_DECLARE;
int error;
- VFS_MPLOCK(mp);
+ VFS_MPLOCK1(mp);
error = (mp->mnt_op->vfs_quotactl)(mp, cmds, uid, arg, cred);
VFS_MPUNLOCK(mp);
return (error);
VFS_MPLOCK_DECLARE;
int error;
- VFS_MPLOCK(mp);
+ VFS_MPLOCK1(mp);
error = (mp->mnt_op->vfs_statfs)(mp, sbp, cred);
VFS_MPUNLOCK(mp);
return (error);
VFS_MPLOCK_DECLARE;
int error;
- VFS_MPLOCK(mp);
+ VFS_MPLOCK1(mp);
error = (mp->mnt_op->vfs_statvfs)(mp, sbp, cred);
VFS_MPUNLOCK(mp);
return (error);
VFS_MPLOCK_DECLARE;
int error;
- VFS_MPLOCK(mp);
+ VFS_MPLOCK1(mp);
error = (mp->mnt_op->vfs_sync)(mp, waitfor);
VFS_MPUNLOCK(mp);
return (error);
VFS_MPLOCK_DECLARE;
int error;
- VFS_MPLOCK(mp);
+ VFS_MPLOCK1(mp);
error = (mp->mnt_op->vfs_vget)(mp, dvp, ino, vpp);
VFS_MPUNLOCK(mp);
return (error);
VFS_MPLOCK_DECLARE;
int error;
- VFS_MPLOCK(mp);
+ VFS_MPLOCK1(mp);
error = (mp->mnt_op->vfs_fhtovp)(mp, rootvp, fhp, vpp);
VFS_MPUNLOCK(mp);
return (error);
VFS_MPLOCK_DECLARE;
int error;
- VFS_MPLOCK(mp);
+ VFS_MPLOCK1(mp);
error = (mp->mnt_op->vfs_checkexp)(mp, nam, extflagsp, credanonp);
VFS_MPUNLOCK(mp);
return (error);
VFS_MPLOCK_DECLARE;
int error;
- VFS_MPLOCK(vp->v_mount);
+ VFS_MPLOCK1(vp->v_mount);
error = (vp->v_mount->mnt_op->vfs_vptofh)(vp, fhp);
VFS_MPUNLOCK(vp->v_mount);
return (error);
VFS_MPLOCK_DECLARE;
int error;
- VFS_MPLOCK(mp);
+ VFS_MPLOCK1(mp);
error = (mp->mnt_op->vfs_extattrctl)(mp, cmd, attrname, arg, cred);
VFS_MPUNLOCK(mp);
return (error);
/*
* Vnode close call
+ *
+ * MPSAFE
*/
int
vn_close(struct vnode *vp, int flags)
return (error);
}
+/*
+ * Sequential heuristic.
+ *
+ * MPSAFE (f_seqcount and f_nextoff are allowed to race)
+ */
static __inline
int
sequential_heuristic(struct uio *uio, struct file *fp)
if ((uio->uio_offset == 0 && fp->f_seqcount > 0) ||
uio->uio_offset == fp->f_nextoff) {
int tmpseq = fp->f_seqcount;
- /*
- * XXX we assume that the filesystem block size is
- * the default. Not true, but still gives us a pretty
- * good indicator of how sequential the read operations
- * are.
- */
+
tmpseq += (uio->uio_resid + BKVASIZE - 1) / BKVASIZE;
if (tmpseq > IO_SEQMAX)
tmpseq = IO_SEQMAX;
* These routines serve the dual purpose of serializing access to the
* f_offset field (at least on i386) and guaranteeing operational integrity
* when multiple read()ers and write()ers are present on the same fp.
+ *
+ * MPSAFE
*/
static __inline off_t
vn_get_fpf_offset(struct file *fp)
return(fp->f_offset);
}
+/*
+ * MPSAFE
+ */
static __inline void
vn_set_fpf_offset(struct file *fp, off_t offset)
{
}
}
+/*
+ * MPSAFE
+ */
static __inline off_t
vn_poll_fpf_offset(struct file *fp)
{
/*
* Package up an I/O request on a vnode into a uio and do it.
+ *
+ * MPSAFE
*/
int
vn_rdwr(enum uio_rw rw, struct vnode *vp, caddr_t base, int len,
* check bwillwrite() before calling vn_rdwr(). We also call uio_yield()
* to give other processes a chance to lock the vnode (either other processes
* core'ing the same binary, or unrelated processes scanning the directory).
+ *
+ * MPSAFE
*/
int
vn_rdwr_inchunks(enum uio_rw rw, struct vnode *vp, caddr_t base, int len,
}
}
error = vn_rdwr(rw, vp, base, chunk, offset, segflg,
- ioflg, cred, aresid);
+ ioflg, cred, aresid);
len -= chunk; /* aresid calc already includes length */
if (error)
break;
}
/*
- * MPSAFE - acquires mplock
- *
* File pointers can no longer get ripped up by revoke so
* we don't need to lock access to the vp.
*
* f_offset updates are not guaranteed against multiple readers
+ *
+ * MPSAFE
*/
static int
vn_read(struct file *fp, struct uio *uio, struct ucred *cred, int flags)
}
/*
- * MPSAFE - acquires mplock
+ * MPSAFE
*/
static int
vn_write(struct file *fp, struct uio *uio, struct ucred *cred, int flags)
int error;
off_t size;
- get_mplock();
-
switch (vp->v_type) {
case VREG:
case VDIR:
break;
}
+ get_mplock();
sess = p->p_session;
/* Do nothing if reassigning same control tty */
if (sess->s_ttyvp == vp) {
error = 0;
+ rel_mplock();
break;
}
sess->s_ttyvp = vp;
if (ovp)
vrele(ovp);
+ rel_mplock();
}
break;
}
- rel_mplock();
return (error);
}
/*
- * MPALMOSTSAFE - acquires mplock
+ * MPSAFE
*/
static int
vn_poll(struct file *fp, int events, struct ucred *cred)
{
int error;
- get_mplock();
error = VOP_POLL(((struct vnode *)fp->f_data), events, cred);
- rel_mplock();
return (error);
}
return (error);
}
+/*
+ * MPSAFE
+ */
void
vn_unlock(struct vnode *vp)
{
lockmgr(&vp->v_lock, LK_RELEASE);
}
+/*
+ * MPSAFE
+ */
int
vn_islocked(struct vnode *vp)
{
}
/*
- * MPALMOSTSAFE - acquires mplock
+ * MPSAFE
*/
static int
vn_closefile(struct file *fp)
{
int error;
- get_mplock();
fp->f_ops = &badfileops;
error = vn_close(((struct vnode *)fp->f_data), fp->f_flag);
- rel_mplock();
return (error);
}
/*
- * MPALMOSTSAFE - acquires mplock
+ * MPSAFE
*/
static int
vn_kqfilter(struct file *fp, struct knote *kn)
{
int error;
- get_mplock();
error = VOP_KQFILTER(((struct vnode *)fp->f_data), kn);
- rel_mplock();
return (error);
}
#define DO_OPS(ops, error, ap, vop_field) \
error = ops->vop_field(ap);
-#define VFS_MPLOCK_DECLARE struct lwkt_tokref xlock; int xlock_mpsafe
-
-#define VFS_MPLOCK(mp) VFS_MPLOCK_FLAG(mp, MNTK_MPSAFE)
-
-#define VFS_MPLOCK_FLAG(mp, flag) \
- do { \
- if (mp->mnt_kern_flag & flag) { \
- xlock_mpsafe = 1; \
- } else { \
- get_mplock(); /* TEMPORARY */ \
- lwkt_gettoken(&xlock, &mp->mnt_token); \
- xlock_mpsafe = 0; \
- } \
- } while(0)
-
-#define VFS_MPUNLOCK(mp) \
- do { \
- if (xlock_mpsafe == 0) { \
- lwkt_reltoken(&xlock); \
- rel_mplock(); /* TEMPORARY */ \
- } \
- } while(0)
-
/************************************************************************
* PRIMARY HIGH LEVEL VNODE OPERATIONS CALLS *
************************************************************************
ap.a_dvp = dvp;
ap.a_vpp = vpp;
ap.a_cnp = cnp;
- VFS_MPLOCK(dvp->v_mount);
+ VFS_MPLOCK1(dvp->v_mount);
DO_OPS(ops, error, &ap, vop_old_lookup);
VFS_MPUNLOCK(dvp->v_mount);
return(error);
ap.a_cnp = cnp;
ap.a_vap = vap;
- VFS_MPLOCK(dvp->v_mount);
+ VFS_MPLOCK1(dvp->v_mount);
DO_OPS(ops, error, &ap, vop_old_create);
VFS_MPUNLOCK(dvp->v_mount);
return(error);
ap.a_cnp = cnp;
ap.a_flags = flags;
- VFS_MPLOCK(dvp->v_mount);
+ VFS_MPLOCK1(dvp->v_mount);
DO_OPS(ops, error, &ap, vop_old_whiteout);
VFS_MPUNLOCK(dvp->v_mount);
return(error);
ap.a_cnp = cnp;
ap.a_vap = vap;
- VFS_MPLOCK(dvp->v_mount);
+ VFS_MPLOCK1(dvp->v_mount);
DO_OPS(ops, error, &ap, vop_old_mknod);
VFS_MPUNLOCK(dvp->v_mount);
return(error);
* Decrement 3-2-1-0. Does not decrement beyond 0
*/
if (vp->v_flag & VAGE0) {
- vp->v_flag &= ~VAGE0;
+ vclrflags(vp, VAGE0);
} else if (vp->v_flag & VAGE1) {
- vp->v_flag &= ~VAGE1;
- vp->v_flag |= VAGE0;
+ vclrflags(vp, VAGE1);
+ vsetflags(vp, VAGE0);
}
ap.a_head.a_desc = &vop_open_desc;
ap.a_mode = mode;
ap.a_cred = cred;
- VFS_MPLOCK(vp->v_mount);
+ VFS_MPLOCK1(vp->v_mount);
DO_OPS(ops, error, &ap, vop_open);
VFS_MPUNLOCK(vp->v_mount);
return(error);
ap.a_vp = vp;
ap.a_fflag = fflag;
- VFS_MPLOCK(vp->v_mount);
+ VFS_MPLOCK1(vp->v_mount);
DO_OPS(ops, error, &ap, vop_close);
VFS_MPUNLOCK(vp->v_mount);
return(error);
ap.a_flags = flags;
ap.a_cred = cred;
- VFS_MPLOCK(vp->v_mount);
+ VFS_MPLOCK1(vp->v_mount);
DO_OPS(ops, error, &ap, vop_access);
VFS_MPUNLOCK(vp->v_mount);
return(error);
ap.a_vap = vap;
ap.a_cred = cred;
- VFS_MPLOCK(vp->v_mount);
+ VFS_MPLOCK1(vp->v_mount);
DO_OPS(ops, error, &ap, vop_setattr);
VFS_MPUNLOCK(vp->v_mount);
return(error);
ap.a_cred = cred;
ap.a_sysmsg = msg;
- VFS_MPLOCK(vp->v_mount);
+ VFS_MPLOCK1(vp->v_mount);
DO_OPS(ops, error, &ap, vop_ioctl);
VFS_MPUNLOCK(vp->v_mount);
return(error);
ap.a_events = events;
ap.a_cred = cred;
- VFS_MPLOCK(vp->v_mount);
+ VFS_MPLOCK1(vp->v_mount);
DO_OPS(ops, error, &ap, vop_poll);
VFS_MPUNLOCK(vp->v_mount);
return(error);
ap.a_vp = vp;
ap.a_kn = kn;
- VFS_MPLOCK(vp->v_mount);
+ VFS_MPLOCK1(vp->v_mount);
DO_OPS(ops, error, &ap, vop_kqfilter);
VFS_MPUNLOCK(vp->v_mount);
return(error);
ap.a_fflags = fflags;
ap.a_cred = cred;
- VFS_MPLOCK(vp->v_mount);
+ VFS_MPLOCK1(vp->v_mount);
DO_OPS(ops, error, &ap, vop_mmap);
VFS_MPUNLOCK(vp->v_mount);
return(error);
ap.a_waitfor = waitfor;
ap.a_flags = flags;
- VFS_MPLOCK(vp->v_mount);
+ VFS_MPLOCK1(vp->v_mount);
DO_OPS(ops, error, &ap, vop_fsync);
VFS_MPUNLOCK(vp->v_mount);
return(error);
ap.a_vp = vp;
ap.a_cnp = cnp;
- VFS_MPLOCK(dvp->v_mount);
+ VFS_MPLOCK1(dvp->v_mount);
DO_OPS(ops, error, &ap, vop_old_remove);
VFS_MPUNLOCK(dvp->v_mount);
return(error);
ap.a_vp = vp;
ap.a_cnp = cnp;
- VFS_MPLOCK(tdvp->v_mount);
+ VFS_MPLOCK1(tdvp->v_mount);
DO_OPS(ops, error, &ap, vop_old_link);
VFS_MPUNLOCK(tdvp->v_mount);
return(error);
ap.a_tvp = tvp;
ap.a_tcnp = tcnp;
- VFS_MPLOCK(tdvp->v_mount);
+ VFS_MPLOCK1(tdvp->v_mount);
DO_OPS(ops, error, &ap, vop_old_rename);
VFS_MPUNLOCK(tdvp->v_mount);
return(error);
ap.a_cnp = cnp;
ap.a_vap = vap;
- VFS_MPLOCK(dvp->v_mount);
+ VFS_MPLOCK1(dvp->v_mount);
DO_OPS(ops, error, &ap, vop_old_mkdir);
VFS_MPUNLOCK(dvp->v_mount);
return(error);
ap.a_vp = vp;
ap.a_cnp = cnp;
- VFS_MPLOCK(dvp->v_mount);
+ VFS_MPLOCK1(dvp->v_mount);
DO_OPS(ops, error, &ap, vop_old_rmdir);
VFS_MPUNLOCK(dvp->v_mount);
return(error);
ap.a_vap = vap;
ap.a_target = target;
- VFS_MPLOCK(dvp->v_mount);
+ VFS_MPLOCK1(dvp->v_mount);
DO_OPS(ops, error, &ap, vop_old_symlink);
VFS_MPUNLOCK(dvp->v_mount);
return(error);
ap.a_ncookies = ncookies;
ap.a_cookies = cookies;
- VFS_MPLOCK(vp->v_mount);
+ VFS_MPLOCK1(vp->v_mount);
DO_OPS(ops, error, &ap, vop_readdir);
VFS_MPUNLOCK(vp->v_mount);
return(error);
ap.a_uio = uio;
ap.a_cred = cred;
- VFS_MPLOCK(vp->v_mount);
+ VFS_MPLOCK1(vp->v_mount);
DO_OPS(ops, error, &ap, vop_readlink);
VFS_MPUNLOCK(vp->v_mount);
return(error);
ap.a_head.a_ops = ops;
ap.a_vp = vp;
- VFS_MPLOCK(vp->v_mount);
+ VFS_MPLOCK_FLAG(vp->v_mount, MNTK_IN_MPSAFE);
DO_OPS(ops, error, &ap, vop_inactive);
VFS_MPUNLOCK(vp->v_mount);
return(error);
ap.a_head.a_ops = ops;
ap.a_vp = vp;
- VFS_MPLOCK(vp->v_mount);
+ VFS_MPLOCK1(vp->v_mount);
DO_OPS(ops, error, &ap, vop_reclaim);
VFS_MPUNLOCK(vp->v_mount);
return(error);
ap.a_runb = runb;
ap.a_cmd = cmd;
- VFS_MPLOCK(vp->v_mount);
+ VFS_MPLOCK1(vp->v_mount);
DO_OPS(ops, error, &ap, vop_bmap);
VFS_MPUNLOCK(vp->v_mount);
return(error);
ap.a_vp = vp;
ap.a_bio = bio;
- VFS_MPLOCK(vp->v_mount);
+ VFS_MPLOCK1(vp->v_mount);
DO_OPS(ops, error, &ap, vop_strategy);
VFS_MPUNLOCK(vp->v_mount);
return(error);
ap.a_head.a_ops = ops;
ap.a_vp = vp;
- VFS_MPLOCK(vp->v_mount);
+ VFS_MPLOCK1(vp->v_mount);
DO_OPS(ops, error, &ap, vop_print);
VFS_MPUNLOCK(vp->v_mount);
return(error);
ap.a_name = name;
ap.a_retval = retval;
- VFS_MPLOCK(vp->v_mount);
+ VFS_MPLOCK1(vp->v_mount);
DO_OPS(ops, error, &ap, vop_pathconf);
VFS_MPUNLOCK(vp->v_mount);
return(error);
ap.a_fl = fl;
ap.a_flags = flags;
- VFS_MPLOCK(vp->v_mount);
+ VFS_MPLOCK1(vp->v_mount);
DO_OPS(ops, error, &ap, vop_advlock);
VFS_MPUNLOCK(vp->v_mount);
return(error);
ap.a_flags = flags;
ap.a_bpp = bpp;
- VFS_MPLOCK(vp->v_mount);
+ VFS_MPLOCK1(vp->v_mount);
DO_OPS(ops, error, &ap, vop_balloc);
VFS_MPUNLOCK(vp->v_mount);
return(error);
ap.a_vp = vp;
ap.a_buflist = buflist;
- VFS_MPLOCK(vp->v_mount);
+ VFS_MPLOCK1(vp->v_mount);
DO_OPS(ops, error, &ap, vop_reallocblks);
VFS_MPUNLOCK(vp->v_mount);
return(error);
ap.a_reqpage = reqpage;
ap.a_offset = offset;
- VFS_MPLOCK(vp->v_mount);
+ VFS_MPLOCK1(vp->v_mount);
DO_OPS(ops, error, &ap, vop_getpages);
VFS_MPUNLOCK(vp->v_mount);
return(error);
ap.a_rtvals = rtvals;
ap.a_offset = offset;
- VFS_MPLOCK(vp->v_mount);
+ VFS_MPLOCK1(vp->v_mount);
DO_OPS(ops, error, &ap, vop_putpages);
VFS_MPUNLOCK(vp->v_mount);
return(error);
ap.a_offset = offset;
ap.a_length = length;
- VFS_MPLOCK(vp->v_mount);
+ VFS_MPLOCK1(vp->v_mount);
DO_OPS(ops, error, &ap, vop_freeblks);
VFS_MPUNLOCK(vp->v_mount);
return(error);
ap.a_aclp = aclp;
ap.a_cred = cred;
- VFS_MPLOCK(vp->v_mount);
+ VFS_MPLOCK1(vp->v_mount);
DO_OPS(ops, error, &ap, vop_getacl);
VFS_MPUNLOCK(vp->v_mount);
return(error);
ap.a_aclp = aclp;
ap.a_cred = cred;
- VFS_MPLOCK(vp->v_mount);
+ VFS_MPLOCK1(vp->v_mount);
DO_OPS(ops, error, &ap, vop_setacl);
VFS_MPUNLOCK(vp->v_mount);
return(error);
ap.a_aclp = aclp;
ap.a_cred = cred;
- VFS_MPLOCK(vp->v_mount);
+ VFS_MPLOCK1(vp->v_mount);
DO_OPS(ops, error, &ap, vop_aclcheck);
VFS_MPUNLOCK(vp->v_mount);
return(error);
ap.a_uio = uio;
ap.a_cred = cred;
- VFS_MPLOCK(vp->v_mount);
+ VFS_MPLOCK1(vp->v_mount);
DO_OPS(ops, error, &ap, vop_getextattr);
VFS_MPUNLOCK(vp->v_mount);
return(error);
ap.a_uio = uio;
ap.a_cred = cred;
- VFS_MPLOCK(vp->v_mount);
+ VFS_MPLOCK1(vp->v_mount);
DO_OPS(ops, error, &ap, vop_setextattr);
VFS_MPUNLOCK(vp->v_mount);
return(error);
ap.a_buflen = buflen;
ap.a_res = res;
- VFS_MPLOCK(vp->v_mount);
+ VFS_MPLOCK1(vp->v_mount);
DO_OPS(ops, error, &ap, vop_mountctl);
VFS_MPUNLOCK(vp->v_mount);
return(error);
ap.a_vp = vp;
ap.a_cred = cred;
- VFS_MPLOCK(vp->v_mount);
+ VFS_MPLOCK1(vp->v_mount);
DO_OPS(ops, error, &ap, vop_markatime);
VFS_MPUNLOCK(vp->v_mount);
return(error);
ap.a_dvp = dvp;
ap.a_cred = cred;
- VFS_MPLOCK(dvp->v_mount);
+ VFS_MPLOCK1(dvp->v_mount);
DO_OPS(ops, error, &ap, vop_nresolve);
VFS_MPUNLOCK(dvp->v_mount);
return(error);
ap.a_cred = cred;
ap.a_fakename = fakename;
- VFS_MPLOCK(dvp->v_mount);
+ VFS_MPLOCK1(dvp->v_mount);
DO_OPS(ops, error, &ap, vop_nlookupdotdot);
VFS_MPUNLOCK(dvp->v_mount);
return(error);
ap.a_cred = cred;
ap.a_vap = vap;
- VFS_MPLOCK(dvp->v_mount);
+ VFS_MPLOCK1(dvp->v_mount);
DO_OPS(ops, error, &ap, vop_ncreate);
VFS_MPUNLOCK(dvp->v_mount);
return(error);
ap.a_cred = cred;
ap.a_vap = vap;
- VFS_MPLOCK(dvp->v_mount);
+ VFS_MPLOCK1(dvp->v_mount);
DO_OPS(ops, error, &ap, vop_nmkdir);
VFS_MPUNLOCK(dvp->v_mount);
return(error);
ap.a_cred = cred;
ap.a_vap = vap;
- VFS_MPLOCK(dvp->v_mount);
+ VFS_MPLOCK1(dvp->v_mount);
DO_OPS(ops, error, &ap, vop_nmknod);
VFS_MPUNLOCK(dvp->v_mount);
return(error);
ap.a_vp = vp;
ap.a_cred = cred;
- VFS_MPLOCK(dvp->v_mount);
+ VFS_MPLOCK1(dvp->v_mount);
DO_OPS(ops, error, &ap, vop_nlink);
VFS_MPUNLOCK(dvp->v_mount);
return(error);
ap.a_vap = vap;
ap.a_target = target;
- VFS_MPLOCK(dvp->v_mount);
+ VFS_MPLOCK1(dvp->v_mount);
DO_OPS(ops, error, &ap, vop_nsymlink);
VFS_MPUNLOCK(dvp->v_mount);
return(error);
ap.a_cred = cred;
ap.a_flags = flags;
- VFS_MPLOCK(dvp->v_mount);
+ VFS_MPLOCK1(dvp->v_mount);
DO_OPS(ops, error, &ap, vop_nwhiteout);
VFS_MPUNLOCK(dvp->v_mount);
return(error);
ap.a_dvp = dvp;
ap.a_cred = cred;
- VFS_MPLOCK(dvp->v_mount);
+ VFS_MPLOCK1(dvp->v_mount);
DO_OPS(ops, error, &ap, vop_nremove);
VFS_MPUNLOCK(dvp->v_mount);
return(error);
ap.a_dvp = dvp;
ap.a_cred = cred;
- VFS_MPLOCK(dvp->v_mount);
+ VFS_MPLOCK1(dvp->v_mount);
DO_OPS(ops, error, &ap, vop_nrmdir);
VFS_MPUNLOCK(dvp->v_mount);
return(error);
ap.a_tdvp = tdvp;
ap.a_cred = cred;
- VFS_MPLOCK(fdvp->v_mount);
+ VFS_MPLOCK1(fdvp->v_mount);
DO_OPS(ops, error, &ap, vop_nrename);
VFS_MPUNLOCK(fdvp->v_mount);
return(error);
*/
vm_page_busy(m);
pmap_inval_add(info, pmap, -1);
+ KKASSERT(pmap->pm_pdir[m->pindex]);
pmap->pm_pdir[m->pindex] = 0;
KKASSERT(pmap->pm_stats.resident_count > 0);
/*
* Remove the page table page from the processes address space.
*/
- pde[p->pindex] = 0;
KKASSERT(pmap->pm_stats.resident_count > 0);
+ KKASSERT(pde[p->pindex]);
+ pde[p->pindex] = 0;
--pmap->pm_stats.resident_count;
if (p->hold_count) {
pmap_inval_add(info, pmap, va);
oldpte = loadandclear(ptq);
+ KKASSERT(oldpte);
if (oldpte & PG_W)
pmap->pm_stats.wired_count -= 1;
/*
npv = TAILQ_NEXT(pv, pv_plist);
continue;
}
+ KKASSERT(*pte);
tpte = loadandclear(pte);
m = PHYS_TO_VM_PAGE(tpte);
*/
#define MNTK_UNMOUNTF 0x00000001 /* forced unmount in progress */
#define MNTK_MPSAFE 0x00010000 /* call vops without mnt_token lock */
-#define MNTK_RD_MPSAFE 0x00020000 /* reads do not require mnt_token */
-#define MNTK_WR_MPSAFE 0x00040000 /* writes do not require mnt_token */
-#define MNTK_GA_MPSAFE 0x00080000 /* getattrs do not require mnt_token */
+#define MNTK_RD_MPSAFE 0x00020000 /* vop_read is MPSAFE */
+#define MNTK_WR_MPSAFE 0x00040000 /* vop_write is MPSAFE */
+#define MNTK_GA_MPSAFE 0x00080000 /* vop_getattr is MPSAFE */
+#define MNTK_IN_MPSAFE 0x00100000 /* vop_inactive is MPSAFE */
#define MNTK_NCALIASED 0x00800000 /* namecached aliased */
#define MNTK_UNMOUNT 0x01000000 /* unmount in progress */
#define MNTK_MWAIT 0x02000000 /* waiting for unmount to finish */
as next argument */
/*
+ * VFS MPLOCK helper.
+ */
+#define VFS_MPLOCK_DECLARE struct lwkt_tokref xlock; int xlock_mpsafe
+
+#define VFS_MPLOCK1(mp) VFS_MPLOCK_FLAG(mp, MNTK_MPSAFE)
+
+#define VFS_MPLOCK2(mp) \
+ do { \
+ if (xlock_mpsafe) { \
+ get_mplock(); /* TEMPORARY */ \
+ lwkt_gettoken(&xlock, &mp->mnt_token); \
+ xlock_mpsafe = 0; \
+ } \
+ } while(0)
+
+#define VFS_MPLOCK_FLAG(mp, flag) \
+ do { \
+ if (mp->mnt_kern_flag & flag) { \
+ xlock_mpsafe = 1; \
+ } else { \
+ get_mplock(); /* TEMPORARY */ \
+ lwkt_gettoken(&xlock, &mp->mnt_token); \
+ xlock_mpsafe = 0; \
+ } \
+ } while(0)
+
+#define VFS_MPUNLOCK(mp) \
+ do { \
+ if (xlock_mpsafe == 0) { \
+ lwkt_reltoken(&xlock); \
+ rel_mplock(); /* TEMPORARY */ \
+ } \
+ } while(0)
+
+/*
* Flags for various system call interfaces.
*
* waitfor flags to vfs_sync() and getfsstat()
* vnodes cached by the system will reference one or more associated namecache
* structures.
*
- * The namecache is disjoint, there may not always be a path to the system
- * root through nc_parent links. If a namecache entry has no parent, that
- * entry will not be hashed and can only be 'found' via '.' or '..'.
+ * The DragonFly namecache maintains elements from active nodes to the root
+ * in all but the NFS server case and the removed file/directory case.
+ * NFS servers use fhtovp() and may have to regenerate the topology to
+ * the leaf on the fly.
*
* Because the namecache structure maintains the path through mount points,
* null, and union mounts, and other VFS overlays, several namecache
* confusion, but only the one representing the physical directory is passed
* into lower layer VOP calls.
*
+ * ncp locking is done using atomic ops on nc_exlocks, including a request
+ * flag for waiters. nc_locktd is set after locking or cleared before
+ * the last unlock. ncp locks are reentrant.
+ *
* Many new API VOP operations do not pass vnodes. In these cases the
* operations vector is typically obtained via nc_mount->mnt_vn_use_ops.
*/
int nc_refs; /* ref count prevents deletion */
u_short nc_flag;
u_char nc_nlen; /* The length of the name, 255 max */
- u_char nc_lockreq;
+ u_char nc_unused;
char *nc_name; /* Separately allocated seg name */
int nc_error;
int nc_timeout; /* compared against ticks, or 0 */
- int nc_exlocks; /* namespace locking */
+ u_int nc_exlocks; /* namespace locking */
struct thread *nc_locktd; /* namespace locking */
long nc_namecache_gen; /* cmp against mnt_namecache_gen */
};
struct mount *mount; /* mount pt (possible overlay) */
};
+#define ASSERT_NCH_LOCKED(nch) KKASSERT(nch->ncp->nc_locktd == curthread)
+
/*
* Flags in namecache.nc_flag (u_char)
*/
#define NCF_DESTROYED 0x0400 /* name association is considered destroyed */
#define NCF_UNUSED800 0x0800
+#define NC_EXLOCK_REQ 0x80000000 /* ex_lock state */
+
/*
* cache_inval[_vp]() flags
*/
#define CINV_UNUSED02 0x0002
#define CINV_CHILDREN 0x0004 /* recursively set children to unresolved */
+/*
+ * MP lock helper for namecache.
+ *
+ * CACHE_GETMPLOCK1() Conditionally gets the MP lock if cache_mpsafe
+ * is not set, otherwise does not.
+ *
+ * CACHE_GETMPLOCK2() Unconditionally gets the MP lock if it is not already
+ * held (e.g. from GETMPLOCK1).
+ *
+ * CACHE_RELMPLOCK() Releases the MP lock if it was previously acquired
+ * by GETMPLOCK1 or GETMPLOCK2.
+ */
+#define CACHE_MPLOCK_DECLARE int have_mplock
+
+#define CACHE_GETMPLOCK1() \
+ do { \
+ if (cache_mpsafe) { \
+ have_mplock = 0; \
+ } else { \
+ get_mplock(); \
+ have_mplock = 1; \
+ } \
+ } while (0)
+
+#define CACHE_GETMPLOCK2() \
+ do { \
+ if (have_mplock == 0) { \
+ have_mplock = 1; \
+ get_mplock(); \
+ } \
+ } while(0)
+
+#define CACHE_RELMPLOCK() \
+ do { \
+ if (have_mplock) { \
+ have_mplock = 0; \
+ rel_mplock(); \
+ } \
+ } while(0)
+
#ifdef _KERNEL
-extern struct lwkt_token vfs_token;
+extern int cache_mpsafe;
struct componentname;
struct nlcomponent;
struct mount;
void cache_lock(struct nchandle *nch);
+void cache_relock(struct nchandle *nch1, struct ucred *cred1,
+ struct nchandle *nch2, struct ucred *cred2);
int cache_lock_nonblock(struct nchandle *nch);
void cache_unlock(struct nchandle *nch);
void cache_setvp(struct nchandle *nch, struct vnode *vp);
switch (node->node_type) {
case Proot:
- vp->v_flag |= VROOT;
+ vsetflags(vp, VROOT);
+ /* fall through */
case Pdir:
vp->v_type = VDIR;
break;
dev->si_iosize_max = DFLTPHYS;
if (dev_dflags(dev) & D_TTY)
- vp->v_flag |= VISTTY;
+ vsetflags(vp, VISTTY);
vn_unlock(vp);
error = dev_dopen(dev, ap->a_mode, S_IFCHR, ap->a_cred);
MALLOC(fmp, struct fdescmount *, sizeof(struct fdescmount),
M_FDESCMNT, M_WAITOK); /* XXX */
rvp->v_type = VDIR;
- rvp->v_flag |= VROOT;
+ vsetflags(rvp, VROOT);
fmp->f_root = rvp;
/* XXX -- don't mark as local to work around fts() problems */
/*mp->mnt_flag |= MNT_LOCAL;*/
}
}
}
- vp->v_flag |= VNOTSEEKABLE;
+ vsetflags(vp, VNOTSEEKABLE);
error = vop_stdopen(ap);
lwkt_reltoken(&vlock);
return (error);
ext2_quotaoff(mp, type);
ump->um_qflags[type] |= QTF_OPENING;
mp->mnt_flag |= MNT_QUOTA;
- vp->v_flag |= VSYSTEM;
+ vsetflags(vp, VSYSTEM);
*vpp = vp;
/* XXX release duplicate vp if *vpp == vp? */
/*
vmntvnodescan(mp, VMSC_GETVP, NULL, ext2_quotaoff_scan, &scaninfo);
}
ext2_dqflush(qvp);
- qvp->v_flag &= ~VSYSTEM;
+ vclrflags(qvp, VSYSTEM);
error = vn_close(qvp, FREAD|FWRITE);
ump->um_quotas[type] = NULLVP;
crfree(ump->um_cred[type]);
#include <sys/buf2.h>
#include <sys/signal2.h>
+#include <sys/mplock2.h>
#include "hammer_disk.h"
#include "hammer_mount.h"
#include "hammer_ioctl.h"
#include "hammer.h"
#include <vm/vm_extern.h>
-#include <sys/buf.h>
-#include <sys/buf2.h>
static int hammer_unload_inode(struct hammer_inode *ip);
static void hammer_free_inode(hammer_inode_t ip);
* it cached.
*
* This is called from the frontend.
+ *
+ * MPALMOSTSAFE
*/
int
hammer_vop_inactive(struct vop_inactive_args *ap)
* otherwise namespace calls such as chmod will unnecessarily generate
* multiple inode updates.
*/
- hammer_inode_unloadable_check(ip, 0);
if (ip->ino_data.nlinks == 0) {
+ get_mplock();
+ hammer_inode_unloadable_check(ip, 0);
if (ip->flags & HAMMER_INODE_MODMASK)
hammer_flush_inode(ip, 0);
vrecycle(ap->a_vp);
+ rel_mplock();
}
return(0);
}
if (ip->obj_id == HAMMER_OBJID_ROOT &&
ip->obj_asof == hmp->asof) {
if (ip->obj_localization == 0)
- vp->v_flag |= VROOT;
+ vsetflags(vp, VROOT);
else
- vp->v_flag |= VPFSROOT;
+ vsetflags(vp, VPFSROOT);
}
vp->v_data = (void *)ip;
* on return, so even if we do not specify it we no longer get
* the BGL regardlless of how we are flagged.
*/
- mp->mnt_kern_flag |= MNTK_RD_MPSAFE | MNTK_GA_MPSAFE;
+ mp->mnt_kern_flag |= MNTK_RD_MPSAFE | MNTK_GA_MPSAFE |
+ MNTK_IN_MPSAFE;
/*
* note: f_iosize is used by vnode_pager_haspage() when constructing
vp->v_data = hp;
if (ino == (ino_t)hpmp->hpm_su.su_rootfno)
- vp->v_flag |= VROOT;
+ vsetflags(vp, VROOT);
lwkt_token_init(&hp->h_interlock);
}
if (ip->iso_extent == imp->root_extent)
- vp->v_flag |= VROOT;
+ vsetflags(vp, VROOT);
/*
* Return the locked and refd vp
* exists), and then use the time and date from that entry
* as the time and date for the root denode.
*/
- nvp->v_flag |= VROOT; /* should be further down XXX */
+ vsetflags(nvp, VROOT); /* should be further down XXX */
ldep->de_Attributes = ATTR_DIRECTORY;
ldep->de_LowerCase = 0;
if (nd->nl_nch.ncp->nc_parent) {
nch = nd->nl_nch;
nch.ncp = nch.ncp->nc_parent;
+ cache_hold(&nch);
+ cache_lock(&nch);
error = cache_vget(&nch, nd->nl_cred,
LK_EXCLUSIVE, dvpp);
+ cache_put(&nch);
} else {
error = ENXIO;
}
* Since the swap file is not the root dir of a file system,
* hack it to a regular file.
*/
- vp->v_flag &= ~VROOT;
+ vclrflags(vp, VROOT);
vref(vp);
nfs_setvtype(vp, VREG);
swaponvp(td, vp, nd->swap_nblks);
}
if (vp->v_type == VNON)
nfs_setvtype(vp, VDIR);
- vp->v_flag |= VROOT;
+ vsetflags(vp, VROOT);
if (error)
vput(vp);
else
pi[i], &(ntmp->ntm_sysvn[pi[i]]));
if(error)
goto out1;
- ntmp->ntm_sysvn[pi[i]]->v_flag |= VSYSTEM;
+ vsetflags(ntmp->ntm_sysvn[pi[i]], VSYSTEM);
vref(ntmp->ntm_sysvn[pi[i]]);
vput(ntmp->ntm_sysvn[pi[i]]);
}
vp->v_type = f_type;
if (ino == NTFS_ROOTINO)
- vp->v_flag |= VROOT;
+ vsetflags(vp, VROOT);
/*
* Normal files use the buffer cache
error = nwfs_nget(mp, nmp->n_rootent, &fattr, NULL, &vp);
if (error)
return (error);
- vp->v_flag |= VROOT;
+ vsetflags(vp, VROOT);
np = VTONW(vp);
if (nmp->m.root_path[0] == 0)
np->n_flag |= NVOLUME;
rvp->v_data = pn;
rvp->v_type = VDIR;
- rvp->v_flag |= VROOT;
+ vsetflags(rvp, VROOT);
VTOPORTAL(rvp)->pt_arg = 0;
VTOPORTAL(rvp)->pt_size = 0;
VTOPORTAL(rvp)->pt_fileid = PORTAL_ROOTFILEID;
error = smbfs_nget(mp, NULL, "TheRooT", 7, &fattr, &vp);
if (error)
return error;
- vp->v_flag |= VROOT;
+ vsetflags(vp, VROOT);
np = VTOSMB(vp);
smp->sm_root = np;
*vpp = vp;
return(error);
vp = *vpp;
- vp->v_flag |= VROOT;
+ vsetflags(vp, VROOT);
udfmp->root_vp = vp;
return(0);
ufs_quotaoff(mp, type);
ump->um_qflags[type] |= QTF_OPENING;
mp->mnt_flag |= MNT_QUOTA;
- vp->v_flag |= VSYSTEM;
+ vsetflags(vp, VSYSTEM);
*vpp = vp;
/* XXX release duplicate vp if *vpp == vp? */
/*
vmntvnodescan(mp, VMSC_GETVP, NULL, ufs_quotaoff_scan, &scaninfo);
}
ufs_dqflush(qvp);
- qvp->v_flag &= ~VSYSTEM;
+ vclrflags(qvp, VSYSTEM);
error = vn_close(qvp, FREAD|FWRITE);
ump->um_quotas[type] = NULLVP;
crfree(ump->um_cred[type]);
}
if (ip->i_number == ROOTINO)
- vp->v_flag |= VROOT;
+ vsetflags(vp, VROOT);
/*
* Initialize modrev times
*/
MALLOC((*vpp)->v_data, void *, sizeof(struct union_node),
M_TEMP, M_WAITOK);
- (*vpp)->v_flag |= vflag;
+ vsetflags(*vpp, vflag);
if (uppervp)
(*vpp)->v_type = uppervp->v_type;
else
object->ref_count--;
if (object->ref_count == 0)
- vp->v_flag &= ~VTEXT;
+ vclrflags(vp, VTEXT);
vrele(vp);
}
* can happen with NFS vnodes since the nfsnode isn't locked.
*/
while (vp->v_flag & VOLOCK) {
- vp->v_flag |= VOWANT;
+ vsetflags(vp, VOWANT);
tsleep(vp, 0, "vnpobj", 0);
}
- vp->v_flag |= VOLOCK;
+ vsetflags(vp, VOLOCK);
/*
* If the object is being terminated, wait for it to
}
vref(vp);
- vp->v_flag &= ~VOLOCK;
+ vclrflags(vp, VOLOCK);
if (vp->v_flag & VOWANT) {
- vp->v_flag &= ~VOWANT;
+ vclrflags(vp, VOWANT);
wakeup(vp);
}
return (object);
object->type = OBJT_DEAD;
vp->v_object = NULL;
vp->v_filesize = NOOFFSET;
- vp->v_flag &= ~(VTEXT | VOBJBUF);
+ vclrflags(vp, VTEXT | VOBJBUF);
}
/*