gitweb.dragonflybsd.org Git - dragonfly.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 2003,2004,2009 The DragonFly Project. All rights reserved.
	3	*
	4	* This code is derived from software contributed to The DragonFly Project
	5	* by Matthew Dillon <dillon@backplane.com>
	6	*
	7	* Redistribution and use in source and binary forms, with or without
	8	* modification, are permitted provided that the following conditions
	9	* are met:
	10	*
	11	* 1. Redistributions of source code must retain the above copyright
	12	* notice, this list of conditions and the following disclaimer.
	13	* 2. Redistributions in binary form must reproduce the above copyright
	14	* notice, this list of conditions and the following disclaimer in
	15	* the documentation and/or other materials provided with the
	16	* distribution.
	17	* 3. Neither the name of The DragonFly Project nor the names of its
	18	* contributors may be used to endorse or promote products derived
	19	* from this software without specific, prior written permission.
	20	*
	21	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	22	* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	23	* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
	24	* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
	25	* COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
	26	* INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
	27	* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
	28	* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
	29	* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
	30	* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
	31	* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	32	* SUCH DAMAGE.
	33	*
	34	* Copyright (c) 1989, 1993, 1995
	35	* The Regents of the University of California. All rights reserved.
	36	*
	37	* This code is derived from software contributed to Berkeley by
	38	* Poul-Henning Kamp of the FreeBSD Project.
	39	*
	40	* Redistribution and use in source and binary forms, with or without
	41	* modification, are permitted provided that the following conditions
	42	* are met:
	43	* 1. Redistributions of source code must retain the above copyright
	44	* notice, this list of conditions and the following disclaimer.
	45	* 2. Redistributions in binary form must reproduce the above copyright
	46	* notice, this list of conditions and the following disclaimer in the
	47	* documentation and/or other materials provided with the distribution.
	48	* 3. All advertising materials mentioning features or use of this software
	49	* must display the following acknowledgement:
	50	* This product includes software developed by the University of
	51	* California, Berkeley and its contributors.
	52	* 4. Neither the name of the University nor the names of its contributors
	53	* may be used to endorse or promote products derived from this software
	54	* without specific prior written permission.
	55	*
	56	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	57	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	58	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	59	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	60	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	61	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	62	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	63	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	64	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	65	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	66	* SUCH DAMAGE.
	67	*/
	68
	69	#include <sys/param.h>
	70	#include <sys/systm.h>
	71	#include <sys/kernel.h>
	72	#include <sys/sysctl.h>
	73	#include <sys/mount.h>
	74	#include <sys/vnode.h>
	75	#include <sys/malloc.h>
	76	#include <sys/sysproto.h>
	77	#include <sys/spinlock.h>
	78	#include <sys/proc.h>
	79	#include <sys/namei.h>
	80	#include <sys/nlookup.h>
	81	#include <sys/filedesc.h>
	82	#include <sys/fnv_hash.h>
	83	#include <sys/globaldata.h>
	84	#include <sys/kern_syscall.h>
	85	#include <sys/dirent.h>
	86	#include <ddb/ddb.h>
	87
	88	#include <sys/sysref2.h>
	89	#include <sys/spinlock2.h>
	90	#include <sys/mplock2.h>
	91
	92	#define MAX_RECURSION_DEPTH 64
	93
	94	/*
	95	* Random lookups in the cache are accomplished with a hash table using
	96	* a hash key of (nc_src_vp, name). Each hash chain has its own spin lock.
	97	*
	98	* Negative entries may exist and correspond to resolved namecache
	99	* structures where nc_vp is NULL. In a negative entry, NCF_WHITEOUT
	100	* will be set if the entry corresponds to a whited-out directory entry
	101	* (verses simply not finding the entry at all). ncneglist is locked
	102	* with a global spinlock (ncspin).
	103	*
	104	* MPSAFE RULES:
	105	*
	106	* (1) A ncp must be referenced before it can be locked.
	107	*
	108	* (2) A ncp must be locked in order to modify it.
	109	*
	110	* (3) ncp locks are always ordered child -> parent. That may seem
	111	* backwards but forward scans use the hash table and thus can hold
	112	* the parent unlocked when traversing downward.
	113	*
	114	* This allows insert/rename/delete/dot-dot and other operations
	115	* to use ncp->nc_parent links.
	116	*
	117	* This also prevents a locked up e.g. NFS node from creating a
	118	* chain reaction all the way back to the root vnode / namecache.
	119	*
	120	* (4) parent linkages require both the parent and child to be locked.
	121	*/
	122
	123	/*
	124	* Structures associated with name cacheing.
	125	*/
	126	#define NCHHASH(hash) (&nchashtbl[(hash) & nchash])
	127	#define MINNEG 1024
	128
	129	MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries");
	130
	131	LIST_HEAD(nchash_list, namecache);
	132
	133	struct nchash_head {
	134	struct nchash_list list;
	135	struct spinlock spin;
	136	};
	137
	138	static struct nchash_head *nchashtbl;
	139	static struct namecache_list ncneglist;
	140	static struct spinlock ncspin;
	141
	142	/*
	143	* ncvp_debug - debug cache_fromvp(). This is used by the NFS server
	144	* to create the namecache infrastructure leading to a dangling vnode.
	145	*
	146	* 0 Only errors are reported
	147	* 1 Successes are reported
	148	* 2 Successes + the whole directory scan is reported
	149	* 3 Force the directory scan code run as if the parent vnode did not
	150	* have a namecache record, even if it does have one.
	151	*/
	152	static int ncvp_debug;
	153	SYSCTL_INT(_debug, OID_AUTO, ncvp_debug, CTLFLAG_RW, &ncvp_debug, 0, "");
	154
	155	static u_long nchash; /* size of hash table */
	156	SYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0, "");
	157
	158	static int ncnegfactor = 16; /* ratio of negative entries */
	159	SYSCTL_INT(_debug, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0, "");
	160
	161	static int nclockwarn; /* warn on locked entries in ticks */
	162	SYSCTL_INT(_debug, OID_AUTO, nclockwarn, CTLFLAG_RW, &nclockwarn, 0, "");
	163
	164	static int numneg; /* number of cache entries allocated */
	165	SYSCTL_INT(_debug, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0, "");
	166
	167	static int numdefered; /* number of cache entries allocated */
	168	SYSCTL_INT(_debug, OID_AUTO, numdefered, CTLFLAG_RD, &numdefered, 0, "");
	169
	170	static int numcache; /* number of cache entries allocated */
	171	SYSCTL_INT(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0, "");
	172
	173	SYSCTL_INT(_debug, OID_AUTO, vnsize, CTLFLAG_RD, 0, sizeof(struct vnode), "");
	174	SYSCTL_INT(_debug, OID_AUTO, ncsize, CTLFLAG_RD, 0, sizeof(struct namecache), "");
	175
	176	int cache_mpsafe;
	177	SYSCTL_INT(_vfs, OID_AUTO, cache_mpsafe, CTLFLAG_RW, &cache_mpsafe, 0, "");
	178
	179	static int cache_resolve_mp(struct mount *mp);
	180	static struct vnode cache_dvpref(struct namecache ncp);
	181	static void _cache_lock(struct namecache *ncp);
	182	static void _cache_setunresolved(struct namecache *ncp);
	183	static void _cache_cleanneg(int count);
	184	static void _cache_cleandefered(void);
	185
	186	/*
	187	* The new name cache statistics
	188	*/
	189	SYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW, 0, "Name cache statistics");
	190	#define STATNODE(mode, name, var) \
	191	SYSCTL_ULONG(_vfs_cache, OID_AUTO, name, mode, var, 0, "");
	192	STATNODE(CTLFLAG_RD, numneg, &numneg);
	193	STATNODE(CTLFLAG_RD, numcache, &numcache);
	194	static u_long numcalls; STATNODE(CTLFLAG_RD, numcalls, &numcalls);
	195	static u_long dothits; STATNODE(CTLFLAG_RD, dothits, &dothits);
	196	static u_long dotdothits; STATNODE(CTLFLAG_RD, dotdothits, &dotdothits);
	197	static u_long numchecks; STATNODE(CTLFLAG_RD, numchecks, &numchecks);
	198	static u_long nummiss; STATNODE(CTLFLAG_RD, nummiss, &nummiss);
	199	static u_long nummisszap; STATNODE(CTLFLAG_RD, nummisszap, &nummisszap);
	200	static u_long numposzaps; STATNODE(CTLFLAG_RD, numposzaps, &numposzaps);
	201	static u_long numposhits; STATNODE(CTLFLAG_RD, numposhits, &numposhits);
	202	static u_long numnegzaps; STATNODE(CTLFLAG_RD, numnegzaps, &numnegzaps);
	203	static u_long numneghits; STATNODE(CTLFLAG_RD, numneghits, &numneghits);
	204
	205	struct nchstats nchstats[SMP_MAXCPU];
	206	/*
	207	* Export VFS cache effectiveness statistics to user-land.
	208	*
	209	* The statistics are left for aggregation to user-land so
	210	* neat things can be achieved, like observing per-CPU cache
	211	* distribution.
	212	*/
	213	static int
	214	sysctl_nchstats(SYSCTL_HANDLER_ARGS)
	215	{
	216	struct globaldata *gd;
	217	int i, error;
	218
	219	error = 0;
	220	for (i = 0; i < ncpus; ++i) {
	221	gd = globaldata_find(i);
	222	if ((error = SYSCTL_OUT(req, (void )&(gd->gd_nchstats),
	223	sizeof(struct nchstats))))
	224	break;
	225	}
	226
	227	return (error);
	228	}
	229	SYSCTL_PROC(_vfs_cache, OID_AUTO, nchstats, CTLTYPE_OPAQUE\|CTLFLAG_RD,
	230	0, 0, sysctl_nchstats, "S,nchstats", "VFS cache effectiveness statistics");
	231
	232	static struct namecache cache_zap(struct namecache ncp, int nonblock);
	233
	234	/*
	235	* Namespace locking. The caller must already hold a reference to the
	236	* namecache structure in order to lock/unlock it. This function prevents
	237	* the namespace from being created or destroyed by accessors other then
	238	* the lock holder.
	239	*
	240	* Note that holding a locked namecache structure prevents other threads
	241	* from making namespace changes (e.g. deleting or creating), prevents
	242	* vnode association state changes by other threads, and prevents the
	243	* namecache entry from being resolved or unresolved by other threads.
	244	*
	245	* The lock owner has full authority to associate/disassociate vnodes
	246	* and resolve/unresolve the locked ncp.
	247	*
	248	* The primary lock field is nc_exlocks. nc_locktd is set after the
	249	* fact (when locking) or cleared prior to unlocking.
	250	*
	251	* WARNING! Holding a locked ncp will prevent a vnode from being destroyed
	252	* or recycled, but it does NOT help you if the vnode had already
	253	* initiated a recyclement. If this is important, use cache_get()
	254	* rather then cache_lock() (and deal with the differences in the
	255	* way the refs counter is handled). Or, alternatively, make an
	256	* unconditional call to cache_validate() or cache_resolve()
	257	* after cache_lock() returns.
	258	*
	259	* MPSAFE
	260	*/
	261	static
	262	void
	263	_cache_lock(struct namecache *ncp)
	264	{
	265	thread_t td;
	266	int didwarn;
	267	int error;
	268	u_int count;
	269
	270	KKASSERT(ncp->nc_refs != 0);
	271	didwarn = 0;
	272	td = curthread;
	273
	274	for (;;) {
	275	count = ncp->nc_exlocks;
	276
	277	if (count == 0) {
	278	if (atomic_cmpset_int(&ncp->nc_exlocks, 0, 1)) {
	279	/*
	280	* The vp associated with a locked ncp must
	281	* be held to prevent it from being recycled.
	282	*
	283	* WARNING! If VRECLAIMED is set the vnode
	284	* could already be in the middle of a recycle.
	285	* Callers must use cache_vref() or
	286	* cache_vget() on the locked ncp to
	287	* validate the vp or set the cache entry
	288	* to unresolved.
	289	*
	290	* NOTE! vhold() is allowed if we hold a
	291	* lock on the ncp (which we do).
	292	*/
	293	ncp->nc_locktd = td;
	294	if (ncp->nc_vp)
	295	vhold(ncp->nc_vp); /* MPSAFE */
	296	break;
	297	}
	298	/* cmpset failed */
	299	continue;
	300	}
	301	if (ncp->nc_locktd == td) {
	302	if (atomic_cmpset_int(&ncp->nc_exlocks, count,
	303	count + 1)) {
	304	break;
	305	}
	306	/* cmpset failed */
	307	continue;
	308	}
	309	tsleep_interlock(ncp, 0);
	310	if (atomic_cmpset_int(&ncp->nc_exlocks, count,
	311	count \| NC_EXLOCK_REQ) == 0) {
	312	/* cmpset failed */
	313	continue;
	314	}
	315	error = tsleep(ncp, PINTERLOCKED, "clock", nclockwarn);
	316	if (error == EWOULDBLOCK) {
	317	if (didwarn == 0) {
	318	didwarn = ticks;
	319	kprintf("[diagnostic] cache_lock: blocked "
	320	"on %p",
	321	ncp);
	322	kprintf(" \"%.s\"\n",
	323	ncp->nc_nlen, ncp->nc_nlen,
	324	ncp->nc_name);
	325	}
	326	}
	327	}
	328	if (didwarn) {
	329	kprintf("[diagnostic] cache_lock: unblocked %.s after "
	330	"%d secs\n",
	331	ncp->nc_nlen, ncp->nc_nlen, ncp->nc_name,
	332	(int)(ticks - didwarn) / hz);
	333	}
	334	}
	335
	336	/*
	337	* NOTE: nc_refs may be zero if the ncp is interlocked by circumstance,
	338	* such as the case where one of its children is locked.
	339	*
	340	* MPSAFE
	341	*/
	342	static
	343	int
	344	_cache_lock_nonblock(struct namecache *ncp)
	345	{
	346	thread_t td;
	347	u_int count;
	348
	349	td = curthread;
	350
	351	for (;;) {
	352	count = ncp->nc_exlocks;
	353
	354	if (count == 0) {
	355	if (atomic_cmpset_int(&ncp->nc_exlocks, 0, 1)) {
	356	/*
	357	* The vp associated with a locked ncp must
	358	* be held to prevent it from being recycled.
	359	*
	360	* WARNING! If VRECLAIMED is set the vnode
	361	* could already be in the middle of a recycle.
	362	* Callers must use cache_vref() or
	363	* cache_vget() on the locked ncp to
	364	* validate the vp or set the cache entry
	365	* to unresolved.
	366	*
	367	* NOTE! vhold() is allowed if we hold a
	368	* lock on the ncp (which we do).
	369	*/
	370	ncp->nc_locktd = td;
	371	if (ncp->nc_vp)
	372	vhold(ncp->nc_vp); /* MPSAFE */
	373	break;
	374	}
	375	/* cmpset failed */
	376	continue;
	377	}
	378	if (ncp->nc_locktd == td) {
	379	if (atomic_cmpset_int(&ncp->nc_exlocks, count,
	380	count + 1)) {
	381	break;
	382	}
	383	/* cmpset failed */
	384	continue;
	385	}
	386	return(EWOULDBLOCK);
	387	}
	388	return(0);
	389	}
	390
	391	/*
	392	* Helper function
	393	*
	394	* NOTE: nc_refs can be 0 (degenerate case during _cache_drop).
	395	*
	396	* nc_locktd must be NULLed out prior to nc_exlocks getting cleared.
	397	*
	398	* MPSAFE
	399	*/
	400	static
	401	void
	402	_cache_unlock(struct namecache *ncp)
	403	{
	404	thread_t td __debugvar = curthread;
	405	u_int count;
	406
	407	KKASSERT(ncp->nc_refs >= 0);
	408	KKASSERT(ncp->nc_exlocks > 0);
	409	KKASSERT(ncp->nc_locktd == td);
	410
	411	count = ncp->nc_exlocks;
	412	if ((count & ~NC_EXLOCK_REQ) == 1) {
	413	ncp->nc_locktd = NULL;
	414	if (ncp->nc_vp)
	415	vdrop(ncp->nc_vp);
	416	}
	417	for (;;) {
	418	if ((count & ~NC_EXLOCK_REQ) == 1) {
	419	if (atomic_cmpset_int(&ncp->nc_exlocks, count, 0)) {
	420	if (count & NC_EXLOCK_REQ)
	421	wakeup(ncp);
	422	break;
	423	}
	424	} else {
	425	if (atomic_cmpset_int(&ncp->nc_exlocks, count,
	426	count - 1)) {
	427	break;
	428	}
	429	}
	430	count = ncp->nc_exlocks;
	431	}
	432	}
	433
	434
	435	/*
	436	* cache_hold() and cache_drop() prevent the premature deletion of a
	437	* namecache entry but do not prevent operations (such as zapping) on
	438	* that namecache entry.
	439	*
	440	* This routine may only be called from outside this source module if
	441	* nc_refs is already at least 1.
	442	*
	443	* This is a rare case where callers are allowed to hold a spinlock,
	444	* so we can't ourselves.
	445	*
	446	* MPSAFE
	447	*/
	448	static __inline
	449	struct namecache *
	450	_cache_hold(struct namecache *ncp)
	451	{
	452	atomic_add_int(&ncp->nc_refs, 1);
	453	return(ncp);
	454	}
	455
	456	/*
	457	* Drop a cache entry, taking care to deal with races.
	458	*
	459	* For potential 1->0 transitions we must hold the ncp lock to safely
	460	* test its flags. An unresolved entry with no children must be zapped
	461	* to avoid leaks.
	462	*
	463	* The call to cache_zap() itself will handle all remaining races and
	464	* will decrement the ncp's refs regardless. If we are resolved or
	465	* have children nc_refs can safely be dropped to 0 without having to
	466	* zap the entry.
	467	*
	468	* NOTE: cache_zap() will re-check nc_refs and nc_list in a MPSAFE fashion.
	469	*
	470	* NOTE: cache_zap() may return a non-NULL referenced parent which must
	471	* be dropped in a loop.
	472	*
	473	* MPSAFE
	474	*/
	475	static __inline
	476	void
	477	_cache_drop(struct namecache *ncp)
	478	{
	479	int refs;
	480
	481	while (ncp) {
	482	KKASSERT(ncp->nc_refs > 0);
	483	refs = ncp->nc_refs;
	484
	485	if (refs == 1) {
	486	if (_cache_lock_nonblock(ncp) == 0) {
	487	ncp->nc_flag &= ~NCF_DEFEREDZAP;
	488	if ((ncp->nc_flag & NCF_UNRESOLVED) &&
	489	TAILQ_EMPTY(&ncp->nc_list)) {
	490	ncp = cache_zap(ncp, 1);
	491	continue;
	492	}
	493	if (atomic_cmpset_int(&ncp->nc_refs, 1, 0)) {
	494	_cache_unlock(ncp);
	495	break;
	496	}
	497	_cache_unlock(ncp);
	498	}
	499	} else {
	500	if (atomic_cmpset_int(&ncp->nc_refs, refs, refs - 1))
	501	break;
	502	}
	503	cpu_pause();
	504	}
	505	}
	506
	507	/*
	508	* Link a new namecache entry to its parent and to the hash table. Be
	509	* careful to avoid races if vhold() blocks in the future.
	510	*
	511	* Both ncp and par must be referenced and locked.
	512	*
	513	* NOTE: The hash table spinlock is likely held during this call, we
	514	* can't do anything fancy.
	515	*
	516	* MPSAFE
	517	*/
	518	static void
	519	_cache_link_parent(struct namecache ncp, struct namecache par,
	520	struct nchash_head *nchpp)
	521	{
	522	KKASSERT(ncp->nc_parent == NULL);
	523	ncp->nc_parent = par;
	524	ncp->nc_head = nchpp;
	525
	526	/*
	527	* Set inheritance flags. Note that the parent flags may be
	528	* stale due to getattr potentially not having been run yet
	529	* (it gets run during nlookup()'s).
	530	*/
	531	ncp->nc_flag &= ~(NCF_SF_PNOCACHE \| NCF_UF_PCACHE);
	532	if (par->nc_flag & (NCF_SF_NOCACHE \| NCF_SF_PNOCACHE))
	533	ncp->nc_flag \|= NCF_SF_PNOCACHE;
	534	if (par->nc_flag & (NCF_UF_CACHE \| NCF_UF_PCACHE))
	535	ncp->nc_flag \|= NCF_UF_PCACHE;
	536
	537	LIST_INSERT_HEAD(&nchpp->list, ncp, nc_hash);
	538
	539	if (TAILQ_EMPTY(&par->nc_list)) {
	540	TAILQ_INSERT_HEAD(&par->nc_list, ncp, nc_entry);
	541	/*
	542	* Any vp associated with an ncp which has children must
	543	* be held to prevent it from being recycled.
	544	*/
	545	if (par->nc_vp)
	546	vhold(par->nc_vp);
	547	} else {
	548	TAILQ_INSERT_HEAD(&par->nc_list, ncp, nc_entry);
	549	}
	550	}
	551
	552	/*
	553	* Remove the parent and hash associations from a namecache structure.
	554	* If this is the last child of the parent the cache_drop(par) will
	555	* attempt to recursively zap the parent.
	556	*
	557	* ncp must be locked. This routine will acquire a temporary lock on
	558	* the parent as wlel as the appropriate hash chain.
	559	*
	560	* MPSAFE
	561	*/
	562	static void
	563	_cache_unlink_parent(struct namecache *ncp)
	564	{
	565	struct namecache *par;
	566	struct vnode *dropvp;
	567
	568	if ((par = ncp->nc_parent) != NULL) {
	569	KKASSERT(ncp->nc_parent == par);
	570	_cache_hold(par);
	571	_cache_lock(par);
	572	spin_lock_wr(&ncp->nc_head->spin);
	573	LIST_REMOVE(ncp, nc_hash);
	574	TAILQ_REMOVE(&par->nc_list, ncp, nc_entry);
	575	dropvp = NULL;
	576	if (par->nc_vp && TAILQ_EMPTY(&par->nc_list))
	577	dropvp = par->nc_vp;
	578	spin_unlock_wr(&ncp->nc_head->spin);
	579	ncp->nc_parent = NULL;
	580	ncp->nc_head = NULL;
	581	_cache_unlock(par);
	582	_cache_drop(par);
	583
	584	/*
	585	* We can only safely vdrop with no spinlocks held.
	586	*/
	587	if (dropvp)
	588	vdrop(dropvp);
	589	}
	590	}
	591
	592	/*
	593	* Allocate a new namecache structure. Most of the code does not require
	594	* zero-termination of the string but it makes vop_compat_ncreate() easier.
	595	*
	596	* MPSAFE
	597	*/
	598	static struct namecache *
	599	cache_alloc(int nlen)
	600	{
	601	struct namecache *ncp;
	602
	603	ncp = kmalloc(sizeof(*ncp), M_VFSCACHE, M_WAITOK\|M_ZERO);
	604	if (nlen)
	605	ncp->nc_name = kmalloc(nlen + 1, M_VFSCACHE, M_WAITOK);
	606	ncp->nc_nlen = nlen;
	607	ncp->nc_flag = NCF_UNRESOLVED;
	608	ncp->nc_error = ENOTCONN; /* needs to be resolved */
	609	ncp->nc_refs = 1;
	610
	611	TAILQ_INIT(&ncp->nc_list);
	612	_cache_lock(ncp);
	613	return(ncp);
	614	}
	615
	616	/*
	617	* Can only be called for the case where the ncp has never been
	618	* associated with anything (so no spinlocks are needed).
	619	*
	620	* MPSAFE
	621	*/
	622	static void
	623	_cache_free(struct namecache *ncp)
	624	{
	625	KKASSERT(ncp->nc_refs == 1 && ncp->nc_exlocks == 1);
	626	if (ncp->nc_name)
	627	kfree(ncp->nc_name, M_VFSCACHE);
	628	kfree(ncp, M_VFSCACHE);
	629	}
	630
	631	/*
	632	* MPSAFE
	633	*/
	634	void
	635	cache_zero(struct nchandle *nch)
	636	{
	637	nch->ncp = NULL;
	638	nch->mount = NULL;
	639	}
	640
	641	/*
	642	* Ref and deref a namecache structure.
	643	*
	644	* The caller must specify a stable ncp pointer, typically meaning the
	645	* ncp is already referenced but this can also occur indirectly through
	646	* e.g. holding a lock on a direct child.
	647	*
	648	* WARNING: Caller may hold an unrelated read spinlock, which means we can't
	649	* use read spinlocks here.
	650	*
	651	* MPSAFE if nch is
	652	*/
	653	struct nchandle *
	654	cache_hold(struct nchandle *nch)
	655	{
	656	_cache_hold(nch->ncp);
	657	atomic_add_int(&nch->mount->mnt_refs, 1);
	658	return(nch);
	659	}
	660
	661	/*
	662	* Create a copy of a namecache handle for an already-referenced
	663	* entry.
	664	*
	665	* MPSAFE if nch is
	666	*/
	667	void
	668	cache_copy(struct nchandle nch, struct nchandle target)
	669	{
	670	target = nch;
	671	if (target->ncp)
	672	_cache_hold(target->ncp);
	673	atomic_add_int(&nch->mount->mnt_refs, 1);
	674	}
	675
	676	/*
	677	* MPSAFE if nch is
	678	*/
	679	void
	680	cache_changemount(struct nchandle nch, struct mount mp)
	681	{
	682	atomic_add_int(&nch->mount->mnt_refs, -1);
	683	nch->mount = mp;
	684	atomic_add_int(&nch->mount->mnt_refs, 1);
	685	}
	686
	687	/*
	688	* MPSAFE
	689	*/
	690	void
	691	cache_drop(struct nchandle *nch)
	692	{
	693	atomic_add_int(&nch->mount->mnt_refs, -1);
	694	_cache_drop(nch->ncp);
	695	nch->ncp = NULL;
	696	nch->mount = NULL;
	697	}
	698
	699	/*
	700	* MPSAFE
	701	*/
	702	void
	703	cache_lock(struct nchandle *nch)
	704	{
	705	_cache_lock(nch->ncp);
	706	}
	707
	708	/*
	709	* Relock nch1 given an unlocked nch1 and a locked nch2. The caller
	710	* is responsible for checking both for validity on return as they
	711	* may have become invalid.
	712	*
	713	* We have to deal with potential deadlocks here, just ping pong
	714	* the lock until we get it (we will always block somewhere when
	715	* looping so this is not cpu-intensive).
	716	*
	717	* which = 0 nch1 not locked, nch2 is locked
	718	* which = 1 nch1 is locked, nch2 is not locked
	719	*/
	720	void
	721	cache_relock(struct nchandle nch1, struct ucred cred1,
	722	struct nchandle nch2, struct ucred cred2)
	723	{
	724	int which;
	725
	726	which = 0;
	727
	728	for (;;) {
	729	if (which == 0) {
	730	if (cache_lock_nonblock(nch1) == 0) {
	731	cache_resolve(nch1, cred1);
	732	break;
	733	}
	734	cache_unlock(nch2);
	735	cache_lock(nch1);
	736	cache_resolve(nch1, cred1);
	737	which = 1;
	738	} else {
	739	if (cache_lock_nonblock(nch2) == 0) {
	740	cache_resolve(nch2, cred2);
	741	break;
	742	}
	743	cache_unlock(nch1);
	744	cache_lock(nch2);
	745	cache_resolve(nch2, cred2);
	746	which = 0;
	747	}
	748	}
	749	}
	750
	751	/*
	752	* MPSAFE
	753	*/
	754	int
	755	cache_lock_nonblock(struct nchandle *nch)
	756	{
	757	return(_cache_lock_nonblock(nch->ncp));
	758	}
	759
	760
	761	/*
	762	* MPSAFE
	763	*/
	764	void
	765	cache_unlock(struct nchandle *nch)
	766	{
	767	_cache_unlock(nch->ncp);
	768	}
	769
	770	/*
	771	* ref-and-lock, unlock-and-deref functions.
	772	*
	773	* This function is primarily used by nlookup. Even though cache_lock
	774	* holds the vnode, it is possible that the vnode may have already
	775	* initiated a recyclement.
	776	*
	777	* We want cache_get() to return a definitively usable vnode or a
	778	* definitively unresolved ncp.
	779	*
	780	* MPSAFE
	781	*/
	782	static
	783	struct namecache *
	784	_cache_get(struct namecache *ncp)
	785	{
	786	_cache_hold(ncp);
	787	_cache_lock(ncp);
	788	if (ncp->nc_vp && (ncp->nc_vp->v_flag & VRECLAIMED))
	789	_cache_setunresolved(ncp);
	790	return(ncp);
	791	}
	792
	793	/*
	794	* This is a special form of _cache_lock() which only succeeds if
	795	* it can get a pristine, non-recursive lock. The caller must have
	796	* already ref'd the ncp.
	797	*
	798	* On success the ncp will be locked, on failure it will not. The
	799	* ref count does not change either way.
	800	*
	801	* We want _cache_lock_special() (on success) to return a definitively
	802	* usable vnode or a definitively unresolved ncp.
	803	*
	804	* MPSAFE
	805	*/
	806	static int
	807	_cache_lock_special(struct namecache *ncp)
	808	{
	809	if (_cache_lock_nonblock(ncp) == 0) {
	810	if ((ncp->nc_exlocks & ~NC_EXLOCK_REQ) == 1) {
	811	if (ncp->nc_vp && (ncp->nc_vp->v_flag & VRECLAIMED))
	812	_cache_setunresolved(ncp);
	813	return(0);
	814	}
	815	_cache_unlock(ncp);
	816	}
	817	return(EWOULDBLOCK);
	818	}
	819
	820
	821	/*
	822	* NOTE: The same nchandle can be passed for both arguments.
	823	*
	824	* MPSAFE
	825	*/
	826	void
	827	cache_get(struct nchandle nch, struct nchandle target)
	828	{
	829	KKASSERT(nch->ncp->nc_refs > 0);
	830	target->mount = nch->mount;
	831	target->ncp = _cache_get(nch->ncp);
	832	atomic_add_int(&target->mount->mnt_refs, 1);
	833	}
	834
	835	/*
	836	* MPSAFE
	837	*/
	838	static __inline
	839	void
	840	_cache_put(struct namecache *ncp)
	841	{
	842	_cache_unlock(ncp);
	843	_cache_drop(ncp);
	844	}
	845
	846	/*
	847	* MPSAFE
	848	*/
	849	void
	850	cache_put(struct nchandle *nch)
	851	{
	852	atomic_add_int(&nch->mount->mnt_refs, -1);
	853	_cache_put(nch->ncp);
	854	nch->ncp = NULL;
	855	nch->mount = NULL;
	856	}
	857
	858	/*
	859	* Resolve an unresolved ncp by associating a vnode with it. If the
	860	* vnode is NULL, a negative cache entry is created.
	861	*
	862	* The ncp should be locked on entry and will remain locked on return.
	863	*
	864	* MPSAFE
	865	*/
	866	static
	867	void
	868	_cache_setvp(struct mount mp, struct namecache ncp, struct vnode *vp)
	869	{
	870	KKASSERT(ncp->nc_flag & NCF_UNRESOLVED);
	871
	872	if (vp != NULL) {
	873	/*
	874	* Any vp associated with an ncp which has children must
	875	* be held. Any vp associated with a locked ncp must be held.
	876	*/
	877	if (!TAILQ_EMPTY(&ncp->nc_list))
	878	vhold(vp);
	879	spin_lock_wr(&vp->v_spinlock);
	880	ncp->nc_vp = vp;
	881	TAILQ_INSERT_HEAD(&vp->v_namecache, ncp, nc_vnode);
	882	spin_unlock_wr(&vp->v_spinlock);
	883	if (ncp->nc_exlocks)
	884	vhold(vp);
	885
	886	/*
	887	* Set auxiliary flags
	888	*/
	889	switch(vp->v_type) {
	890	case VDIR:
	891	ncp->nc_flag \|= NCF_ISDIR;
	892	break;
	893	case VLNK:
	894	ncp->nc_flag \|= NCF_ISSYMLINK;
	895	/* XXX cache the contents of the symlink */
	896	break;
	897	default:
	898	break;
	899	}
	900	atomic_add_int(&numcache, 1);
	901	ncp->nc_error = 0;
	902	} else {
	903	/*
	904	* When creating a negative cache hit we set the
	905	* namecache_gen. A later resolve will clean out the
	906	* negative cache hit if the mount point's namecache_gen
	907	* has changed. Used by devfs, could also be used by
	908	* other remote FSs.
	909	*/
	910	ncp->nc_vp = NULL;
	911	spin_lock_wr(&ncspin);
	912	TAILQ_INSERT_TAIL(&ncneglist, ncp, nc_vnode);
	913	++numneg;
	914	spin_unlock_wr(&ncspin);
	915	ncp->nc_error = ENOENT;
	916	if (mp)
	917	ncp->nc_namecache_gen = mp->mnt_namecache_gen;
	918	}
	919	ncp->nc_flag &= ~(NCF_UNRESOLVED \| NCF_DEFEREDZAP);
	920	}
	921
	922	/*
	923	* MPSAFE
	924	*/
	925	void
	926	cache_setvp(struct nchandle nch, struct vnode vp)
	927	{
	928	_cache_setvp(nch->mount, nch->ncp, vp);
	929	}
	930
	931	/*
	932	* MPSAFE
	933	*/
	934	void
	935	cache_settimeout(struct nchandle *nch, int nticks)
	936	{
	937	struct namecache *ncp = nch->ncp;
	938
	939	if ((ncp->nc_timeout = ticks + nticks) == 0)
	940	ncp->nc_timeout = 1;
	941	}
	942
	943	/*
	944	* Disassociate the vnode or negative-cache association and mark a
	945	* namecache entry as unresolved again. Note that the ncp is still
	946	* left in the hash table and still linked to its parent.
	947	*
	948	* The ncp should be locked and refd on entry and will remain locked and refd
	949	* on return.
	950	*
	951	* This routine is normally never called on a directory containing children.
	952	* However, NFS often does just that in its rename() code as a cop-out to
	953	* avoid complex namespace operations. This disconnects a directory vnode
	954	* from its namecache and can cause the OLDAPI and NEWAPI to get out of
	955	* sync.
	956	*
	957	* MPSAFE
	958	*/
	959	static
	960	void
	961	_cache_setunresolved(struct namecache *ncp)
	962	{
	963	struct vnode *vp;
	964
	965	if ((ncp->nc_flag & NCF_UNRESOLVED) == 0) {
	966	ncp->nc_flag \|= NCF_UNRESOLVED;
	967	ncp->nc_timeout = 0;
	968	ncp->nc_error = ENOTCONN;
	969	if ((vp = ncp->nc_vp) != NULL) {
	970	atomic_add_int(&numcache, -1);
	971	spin_lock_wr(&vp->v_spinlock);
	972	ncp->nc_vp = NULL;
	973	TAILQ_REMOVE(&vp->v_namecache, ncp, nc_vnode);
	974	spin_unlock_wr(&vp->v_spinlock);
	975
	976	/*
	977	* Any vp associated with an ncp with children is
	978	* held by that ncp. Any vp associated with a locked
	979	* ncp is held by that ncp. These conditions must be
	980	* undone when the vp is cleared out from the ncp.
	981	*/
	982	if (!TAILQ_EMPTY(&ncp->nc_list))
	983	vdrop(vp);
	984	if (ncp->nc_exlocks)
	985	vdrop(vp);
	986	} else {
	987	spin_lock_wr(&ncspin);
	988	TAILQ_REMOVE(&ncneglist, ncp, nc_vnode);
	989	--numneg;
	990	spin_unlock_wr(&ncspin);
	991	}
	992	ncp->nc_flag &= ~(NCF_WHITEOUT\|NCF_ISDIR\|NCF_ISSYMLINK);
	993	}
	994	}
	995
	996	/*
	997	* The cache_nresolve() code calls this function to automatically
	998	* set a resolved cache element to unresolved if it has timed out
	999	* or if it is a negative cache hit and the mount point namecache_gen
	1000	* has changed.
	1001	*
	1002	* MPSAFE
	1003	*/
	1004	static __inline void
	1005	_cache_auto_unresolve(struct mount mp, struct namecache ncp)
	1006	{
	1007	/*
	1008	* Already in an unresolved state, nothing to do.
	1009	*/
	1010	if (ncp->nc_flag & NCF_UNRESOLVED)
	1011	return;
	1012
	1013	/*
	1014	* Try to zap entries that have timed out. We have
	1015	* to be careful here because locked leafs may depend
	1016	* on the vnode remaining intact in a parent, so only
	1017	* do this under very specific conditions.
	1018	*/
	1019	if (ncp->nc_timeout && (int)(ncp->nc_timeout - ticks) < 0 &&
	1020	TAILQ_EMPTY(&ncp->nc_list)) {
	1021	_cache_setunresolved(ncp);
	1022	return;
	1023	}
	1024
	1025	/*
	1026	* If a resolved negative cache hit is invalid due to
	1027	* the mount's namecache generation being bumped, zap it.
	1028	*/
	1029	if (ncp->nc_vp == NULL &&
	1030	ncp->nc_namecache_gen != mp->mnt_namecache_gen) {
	1031	_cache_setunresolved(ncp);
	1032	return;
	1033	}
	1034	}
	1035
	1036	/*
	1037	* MPSAFE
	1038	*/
	1039	void
	1040	cache_setunresolved(struct nchandle *nch)
	1041	{
	1042	_cache_setunresolved(nch->ncp);
	1043	}
	1044
	1045	/*
	1046	* Determine if we can clear NCF_ISMOUNTPT by scanning the mountlist
	1047	* looking for matches. This flag tells the lookup code when it must
	1048	* check for a mount linkage and also prevents the directories in question
	1049	* from being deleted or renamed.
	1050	*
	1051	* MPSAFE
	1052	*/
	1053	static
	1054	int
	1055	cache_clrmountpt_callback(struct mount mp, void data)
	1056	{
	1057	struct nchandle *nch = data;
	1058
	1059	if (mp->mnt_ncmounton.ncp == nch->ncp)
	1060	return(1);
	1061	if (mp->mnt_ncmountpt.ncp == nch->ncp)
	1062	return(1);
	1063	return(0);
	1064	}
	1065
	1066	/*
	1067	* MPSAFE
	1068	*/
	1069	void
	1070	cache_clrmountpt(struct nchandle *nch)
	1071	{
	1072	int count;
	1073
	1074	count = mountlist_scan(cache_clrmountpt_callback, nch,
	1075	MNTSCAN_FORWARD\|MNTSCAN_NOBUSY);
	1076	if (count == 0)
	1077	nch->ncp->nc_flag &= ~NCF_ISMOUNTPT;
	1078	}
	1079
	1080	/*
	1081	* Invalidate portions of the namecache topology given a starting entry.
	1082	* The passed ncp is set to an unresolved state and:
	1083	*
	1084	* The passed ncp must be referencxed and locked. The routine may unlock
	1085	* and relock ncp several times, and will recheck the children and loop
	1086	* to catch races. When done the passed ncp will be returned with the
	1087	* reference and lock intact.
	1088	*
	1089	* CINV_DESTROY - Set a flag in the passed ncp entry indicating
	1090	* that the physical underlying nodes have been
	1091	* destroyed... as in deleted. For example, when
	1092	* a directory is removed. This will cause record
	1093	* lookups on the name to no longer be able to find
	1094	* the record and tells the resolver to return failure
	1095	* rather then trying to resolve through the parent.
	1096	*
	1097	* The topology itself, including ncp->nc_name,
	1098	* remains intact.
	1099	*
	1100	* This only applies to the passed ncp, if CINV_CHILDREN
	1101	* is specified the children are not flagged.
	1102	*
	1103	* CINV_CHILDREN - Set all children (recursively) to an unresolved
	1104	* state as well.
	1105	*
	1106	* Note that this will also have the side effect of
	1107	* cleaning out any unreferenced nodes in the topology
	1108	* from the leaves up as the recursion backs out.
	1109	*
	1110	* Note that the topology for any referenced nodes remains intact, but
	1111	* the nodes will be marked as having been destroyed and will be set
	1112	* to an unresolved state.
	1113	*
	1114	* It is possible for cache_inval() to race a cache_resolve(), meaning that
	1115	* the namecache entry may not actually be invalidated on return if it was
	1116	* revalidated while recursing down into its children. This code guarentees
	1117	* that the node(s) will go through an invalidation cycle, but does not
	1118	* guarentee that they will remain in an invalidated state.
	1119	*
	1120	* Returns non-zero if a revalidation was detected during the invalidation
	1121	* recursion, zero otherwise. Note that since only the original ncp is
	1122	* locked the revalidation ultimately can only indicate that the original ncp
	1123	* MIGHT no have been reresolved.
	1124	*
	1125	* DEEP RECURSION HANDLING - If a recursive invalidation recurses deeply we
	1126	* have to avoid blowing out the kernel stack. We do this by saving the
	1127	* deep namecache node and aborting the recursion, then re-recursing at that
	1128	* node using a depth-first algorithm in order to allow multiple deep
	1129	* recursions to chain through each other, then we restart the invalidation
	1130	* from scratch.
	1131	*
	1132	* MPSAFE
	1133	*/
	1134
	1135	struct cinvtrack {
	1136	struct namecache *resume_ncp;
	1137	int depth;
	1138	};
	1139
	1140	static int _cache_inval_internal(struct namecache , int, struct cinvtrack );
	1141
	1142	static
	1143	int
	1144	_cache_inval(struct namecache *ncp, int flags)
	1145	{
	1146	struct cinvtrack track;
	1147	struct namecache *ncp2;
	1148	int r;
	1149
	1150	track.depth = 0;
	1151	track.resume_ncp = NULL;
	1152
	1153	for (;;) {
	1154	r = _cache_inval_internal(ncp, flags, &track);
	1155	if (track.resume_ncp == NULL)
	1156	break;
	1157	kprintf("Warning: deep namecache recursion at %s\n",
	1158	ncp->nc_name);
	1159	_cache_unlock(ncp);
	1160	while ((ncp2 = track.resume_ncp) != NULL) {
	1161	track.resume_ncp = NULL;
	1162	_cache_lock(ncp2);
	1163	_cache_inval_internal(ncp2, flags & ~CINV_DESTROY,
	1164	&track);
	1165	_cache_put(ncp2);
	1166	}
	1167	_cache_lock(ncp);
	1168	}
	1169	return(r);
	1170	}
	1171
	1172	int
	1173	cache_inval(struct nchandle *nch, int flags)
	1174	{
	1175	return(_cache_inval(nch->ncp, flags));
	1176	}
	1177
	1178	/*
	1179	* Helper for _cache_inval(). The passed ncp is refd and locked and
	1180	* remains that way on return, but may be unlocked/relocked multiple
	1181	* times by the routine.
	1182	*/
	1183	static int
	1184	_cache_inval_internal(struct namecache ncp, int flags, struct cinvtrack track)
	1185	{
	1186	struct namecache *kid;
	1187	struct namecache *nextkid;
	1188	int rcnt = 0;
	1189
	1190	KKASSERT(ncp->nc_exlocks);
	1191
	1192	_cache_setunresolved(ncp);
	1193	if (flags & CINV_DESTROY)
	1194	ncp->nc_flag \|= NCF_DESTROYED;
	1195	if ((flags & CINV_CHILDREN) &&
	1196	(kid = TAILQ_FIRST(&ncp->nc_list)) != NULL
	1197	) {
	1198	_cache_hold(kid);
	1199	if (++track->depth > MAX_RECURSION_DEPTH) {
	1200	track->resume_ncp = ncp;
	1201	_cache_hold(ncp);
	1202	++rcnt;
	1203	}
	1204	_cache_unlock(ncp);
	1205	while (kid) {
	1206	if (track->resume_ncp) {
	1207	_cache_drop(kid);
	1208	break;
	1209	}
	1210	if ((nextkid = TAILQ_NEXT(kid, nc_entry)) != NULL)
	1211	_cache_hold(nextkid);
	1212	if ((kid->nc_flag & NCF_UNRESOLVED) == 0 \|\|
	1213	TAILQ_FIRST(&kid->nc_list)
	1214	) {
	1215	_cache_lock(kid);
	1216	rcnt += _cache_inval_internal(kid, flags & ~CINV_DESTROY, track);
	1217	_cache_unlock(kid);
	1218	}
	1219	_cache_drop(kid);
	1220	kid = nextkid;
	1221	}
	1222	--track->depth;
	1223	_cache_lock(ncp);
	1224	}
	1225
	1226	/*
	1227	* Someone could have gotten in there while ncp was unlocked,
	1228	* retry if so.
	1229	*/
	1230	if ((ncp->nc_flag & NCF_UNRESOLVED) == 0)
	1231	++rcnt;
	1232	return (rcnt);
	1233	}
	1234
	1235	/*
	1236	* Invalidate a vnode's namecache associations. To avoid races against
	1237	* the resolver we do not invalidate a node which we previously invalidated
	1238	* but which was then re-resolved while we were in the invalidation loop.
	1239	*
	1240	* Returns non-zero if any namecache entries remain after the invalidation
	1241	* loop completed.
	1242	*
	1243	* NOTE: Unlike the namecache topology which guarentees that ncp's will not
	1244	* be ripped out of the topology while held, the vnode's v_namecache
	1245	* list has no such restriction. NCP's can be ripped out of the list
	1246	* at virtually any time if not locked, even if held.
	1247	*
	1248	* In addition, the v_namecache list itself must be locked via
	1249	* the vnode's spinlock.
	1250	*
	1251	* MPSAFE
	1252	*/
	1253	int
	1254	cache_inval_vp(struct vnode *vp, int flags)
	1255	{
	1256	struct namecache *ncp;
	1257	struct namecache *next;
	1258
	1259	restart:
	1260	spin_lock_wr(&vp->v_spinlock);
	1261	ncp = TAILQ_FIRST(&vp->v_namecache);
	1262	if (ncp)
	1263	_cache_hold(ncp);
	1264	while (ncp) {
	1265	/* loop entered with ncp held and vp spin-locked */
	1266	if ((next = TAILQ_NEXT(ncp, nc_vnode)) != NULL)
	1267	_cache_hold(next);
	1268	spin_unlock_wr(&vp->v_spinlock);
	1269	_cache_lock(ncp);
	1270	if (ncp->nc_vp != vp) {
	1271	kprintf("Warning: cache_inval_vp: race-A detected on "
	1272	"%s\n", ncp->nc_name);
	1273	_cache_put(ncp);
	1274	if (next)
	1275	_cache_drop(next);
	1276	goto restart;
	1277	}
	1278	_cache_inval(ncp, flags);
	1279	_cache_put(ncp); /* also releases reference */
	1280	ncp = next;
	1281	spin_lock_wr(&vp->v_spinlock);
	1282	if (ncp && ncp->nc_vp != vp) {
	1283	spin_unlock_wr(&vp->v_spinlock);
	1284	kprintf("Warning: cache_inval_vp: race-B detected on "
	1285	"%s\n", ncp->nc_name);
	1286	_cache_drop(ncp);
	1287	goto restart;
	1288	}
	1289	}
	1290	spin_unlock_wr(&vp->v_spinlock);
	1291	return(TAILQ_FIRST(&vp->v_namecache) != NULL);
	1292	}
	1293
	1294	/*
	1295	* This routine is used instead of the normal cache_inval_vp() when we
	1296	* are trying to recycle otherwise good vnodes.
	1297	*
	1298	* Return 0 on success, non-zero if not all namecache records could be
	1299	* disassociated from the vnode (for various reasons).
	1300	*
	1301	* MPSAFE
	1302	*/
	1303	int
	1304	cache_inval_vp_nonblock(struct vnode *vp)
	1305	{
	1306	struct namecache *ncp;
	1307	struct namecache *next;
	1308
	1309	spin_lock_wr(&vp->v_spinlock);
	1310	ncp = TAILQ_FIRST(&vp->v_namecache);
	1311	if (ncp)
	1312	_cache_hold(ncp);
	1313	while (ncp) {
	1314	/* loop entered with ncp held */
	1315	if ((next = TAILQ_NEXT(ncp, nc_vnode)) != NULL)
	1316	_cache_hold(next);
	1317	spin_unlock_wr(&vp->v_spinlock);
	1318	if (_cache_lock_nonblock(ncp)) {
	1319	_cache_drop(ncp);
	1320	if (next)
	1321	_cache_drop(next);
	1322	goto done;
	1323	}
	1324	if (ncp->nc_vp != vp) {
	1325	kprintf("Warning: cache_inval_vp: race-A detected on "
	1326	"%s\n", ncp->nc_name);
	1327	_cache_put(ncp);
	1328	if (next)
	1329	_cache_drop(next);
	1330	goto done;
	1331	}
	1332	_cache_inval(ncp, 0);
	1333	_cache_put(ncp); /* also releases reference */
	1334	ncp = next;
	1335	spin_lock_wr(&vp->v_spinlock);
	1336	if (ncp && ncp->nc_vp != vp) {
	1337	spin_unlock_wr(&vp->v_spinlock);
	1338	kprintf("Warning: cache_inval_vp: race-B detected on "
	1339	"%s\n", ncp->nc_name);
	1340	_cache_drop(ncp);
	1341	goto done;
	1342	}
	1343	}
	1344	spin_unlock_wr(&vp->v_spinlock);
	1345	done:
	1346	return(TAILQ_FIRST(&vp->v_namecache) != NULL);
	1347	}
	1348
	1349	/*
	1350	* The source ncp has been renamed to the target ncp. Both fncp and tncp
	1351	* must be locked. The target ncp is destroyed (as a normal rename-over
	1352	* would destroy the target file or directory).
	1353	*
	1354	* Because there may be references to the source ncp we cannot copy its
	1355	* contents to the target. Instead the source ncp is relinked as the target
	1356	* and the target ncp is removed from the namecache topology.
	1357	*
	1358	* MPSAFE
	1359	*/
	1360	void
	1361	cache_rename(struct nchandle fnch, struct nchandle tnch)
	1362	{
	1363	struct namecache *fncp = fnch->ncp;
	1364	struct namecache *tncp = tnch->ncp;
	1365	struct namecache *tncp_par;
	1366	struct nchash_head *nchpp;
	1367	u_int32_t hash;
	1368	char *oname;
	1369
	1370	/*
	1371	* Rename fncp (unlink)
	1372	*/
	1373	_cache_unlink_parent(fncp);
	1374	oname = fncp->nc_name;
	1375	fncp->nc_name = tncp->nc_name;
	1376	fncp->nc_nlen = tncp->nc_nlen;
	1377	tncp_par = tncp->nc_parent;
	1378	_cache_hold(tncp_par);
	1379	_cache_lock(tncp_par);
	1380
	1381	/*
	1382	* Rename fncp (relink)
	1383	*/
	1384	hash = fnv_32_buf(fncp->nc_name, fncp->nc_nlen, FNV1_32_INIT);
	1385	hash = fnv_32_buf(&tncp_par, sizeof(tncp_par), hash);
	1386	nchpp = NCHHASH(hash);
	1387
	1388	spin_lock_wr(&nchpp->spin);
	1389	_cache_link_parent(fncp, tncp_par, nchpp);
	1390	spin_unlock_wr(&nchpp->spin);
	1391
	1392	_cache_put(tncp_par);
	1393
	1394	/*
	1395	* Get rid of the overwritten tncp (unlink)
	1396	*/
	1397	_cache_setunresolved(tncp);
	1398	_cache_unlink_parent(tncp);
	1399	tncp->nc_name = NULL;
	1400	tncp->nc_nlen = 0;
	1401
	1402	if (oname)
	1403	kfree(oname, M_VFSCACHE);
	1404	}
	1405
	1406	/*
	1407	* vget the vnode associated with the namecache entry. Resolve the namecache
	1408	* entry if necessary. The passed ncp must be referenced and locked.
	1409	*
	1410	* lk_type may be LK_SHARED, LK_EXCLUSIVE. A ref'd, possibly locked
	1411	* (depending on the passed lk_type) will be returned in *vpp with an error
	1412	* of 0, or NULL will be returned in *vpp with a non-0 error code. The
	1413	* most typical error is ENOENT, meaning that the ncp represents a negative
	1414	* cache hit and there is no vnode to retrieve, but other errors can occur
	1415	* too.
	1416	*
	1417	* The vget() can race a reclaim. If this occurs we re-resolve the
	1418	* namecache entry.
	1419	*
	1420	* There are numerous places in the kernel where vget() is called on a
	1421	* vnode while one or more of its namecache entries is locked. Releasing
	1422	* a vnode never deadlocks against locked namecache entries (the vnode
	1423	* will not get recycled while referenced ncp's exist). This means we
	1424	* can safely acquire the vnode. In fact, we MUST NOT release the ncp
	1425	* lock when acquiring the vp lock or we might cause a deadlock.
	1426	*
	1427	* MPSAFE
	1428	*/
	1429	int
	1430	cache_vget(struct nchandle nch, struct ucred cred,
	1431	int lk_type, struct vnode **vpp)
	1432	{
	1433	struct namecache *ncp;
	1434	struct vnode *vp;
	1435	int error;
	1436
	1437	ncp = nch->ncp;
	1438	KKASSERT(ncp->nc_locktd == curthread);
	1439	again:
	1440	vp = NULL;
	1441	if (ncp->nc_flag & NCF_UNRESOLVED)
	1442	error = cache_resolve(nch, cred);
	1443	else
	1444	error = 0;
	1445
	1446	if (error == 0 && (vp = ncp->nc_vp) != NULL) {
	1447	error = vget(vp, lk_type);
	1448	if (error) {
	1449	/*
	1450	* VRECLAIM race
	1451	*/
	1452	if (error == ENOENT) {
	1453	kprintf("Warning: vnode reclaim race detected "
	1454	"in cache_vget on %p (%s)\n",
	1455	vp, ncp->nc_name);
	1456	_cache_setunresolved(ncp);
	1457	goto again;
	1458	}
	1459
	1460	/*
	1461	* Not a reclaim race, some other error.
	1462	*/
	1463	KKASSERT(ncp->nc_vp == vp);
	1464	vp = NULL;
	1465	} else {
	1466	KKASSERT(ncp->nc_vp == vp);
	1467	KKASSERT((vp->v_flag & VRECLAIMED) == 0);
	1468	}
	1469	}
	1470	if (error == 0 && vp == NULL)
	1471	error = ENOENT;
	1472	*vpp = vp;
	1473	return(error);
	1474	}
	1475
	1476	int
	1477	cache_vref(struct nchandle nch, struct ucred cred, struct vnode **vpp)
	1478	{
	1479	struct namecache *ncp;
	1480	struct vnode *vp;
	1481	int error;
	1482
	1483	ncp = nch->ncp;
	1484	KKASSERT(ncp->nc_locktd == curthread);
	1485	again:
	1486	vp = NULL;
	1487	if (ncp->nc_flag & NCF_UNRESOLVED)
	1488	error = cache_resolve(nch, cred);
	1489	else
	1490	error = 0;
	1491
	1492	if (error == 0 && (vp = ncp->nc_vp) != NULL) {
	1493	error = vget(vp, LK_SHARED);
	1494	if (error) {
	1495	/*
	1496	* VRECLAIM race
	1497	*/
	1498	if (error == ENOENT) {
	1499	kprintf("Warning: vnode reclaim race detected "
	1500	"in cache_vget on %p (%s)\n",
	1501	vp, ncp->nc_name);
	1502	_cache_setunresolved(ncp);
	1503	goto again;
	1504	}
	1505
	1506	/*
	1507	* Not a reclaim race, some other error.
	1508	*/
	1509	KKASSERT(ncp->nc_vp == vp);
	1510	vp = NULL;
	1511	} else {
	1512	KKASSERT(ncp->nc_vp == vp);
	1513	KKASSERT((vp->v_flag & VRECLAIMED) == 0);
	1514	/* caller does not want a lock */
	1515	vn_unlock(vp);
	1516	}
	1517	}
	1518	if (error == 0 && vp == NULL)
	1519	error = ENOENT;
	1520	*vpp = vp;
	1521	return(error);
	1522	}
	1523
	1524	/*
	1525	* Return a referenced vnode representing the parent directory of
	1526	* ncp.
	1527	*
	1528	* Because the caller has locked the ncp it should not be possible for
	1529	* the parent ncp to go away. However, the parent can unresolve its
	1530	* dvp at any time so we must be able to acquire a lock on the parent
	1531	* to safely access nc_vp.
	1532	*
	1533	* We have to leave par unlocked when vget()ing dvp to avoid a deadlock,
	1534	* so use vhold()/vdrop() while holding the lock to prevent dvp from
	1535	* getting destroyed.
	1536	*
	1537	* MPSAFE - Note vhold() is allowed when dvp has 0 refs if we hold a
	1538	* lock on the ncp in question..
	1539	*/
	1540	static struct vnode *
	1541	cache_dvpref(struct namecache *ncp)
	1542	{
	1543	struct namecache *par;
	1544	struct vnode *dvp;
	1545
	1546	dvp = NULL;
	1547	if ((par = ncp->nc_parent) != NULL) {
	1548	_cache_hold(par);
	1549	_cache_lock(par);
	1550	if ((par->nc_flag & NCF_UNRESOLVED) == 0) {
	1551	if ((dvp = par->nc_vp) != NULL)
	1552	vhold(dvp);
	1553	}
	1554	_cache_unlock(par);
	1555	if (dvp) {
	1556	if (vget(dvp, LK_SHARED) == 0) {
	1557	vn_unlock(dvp);
	1558	vdrop(dvp);
	1559	/* return refd, unlocked dvp */
	1560	} else {
	1561	vdrop(dvp);
	1562	dvp = NULL;
	1563	}
	1564	}
	1565	_cache_drop(par);
	1566	}
	1567	return(dvp);
	1568	}
	1569
	1570	/*
	1571	* Convert a directory vnode to a namecache record without any other
	1572	* knowledge of the topology. This ONLY works with directory vnodes and
	1573	* is ONLY used by the NFS server. dvp must be refd but unlocked, and the
	1574	* returned ncp (if not NULL) will be held and unlocked.
	1575	*
	1576	* If 'makeit' is 0 and dvp has no existing namecache record, NULL is returned.
	1577	* If 'makeit' is 1 we attempt to track-down and create the namecache topology
	1578	* for dvp. This will fail only if the directory has been deleted out from
	1579	* under the caller.
	1580	*
	1581	* Callers must always check for a NULL return no matter the value of 'makeit'.
	1582	*
	1583	* To avoid underflowing the kernel stack each recursive call increments
	1584	* the makeit variable.
	1585	*/
	1586
	1587	static int cache_inefficient_scan(struct nchandle nch, struct ucred cred,
	1588	struct vnode dvp, char fakename);
	1589	static int cache_fromdvp_try(struct vnode dvp, struct ucred cred,
	1590	struct vnode **saved_dvp);
	1591
	1592	int
	1593	cache_fromdvp(struct vnode dvp, struct ucred cred, int makeit,
	1594	struct nchandle *nch)
	1595	{
	1596	struct vnode *saved_dvp;
	1597	struct vnode *pvp;
	1598	char *fakename;
	1599	int error;
	1600
	1601	nch->ncp = NULL;
	1602	nch->mount = dvp->v_mount;
	1603	saved_dvp = NULL;
	1604	fakename = NULL;
	1605
	1606	/*
	1607	* Handle the makeit == 0 degenerate case
	1608	*/
	1609	if (makeit == 0) {
	1610	spin_lock_wr(&dvp->v_spinlock);
	1611	nch->ncp = TAILQ_FIRST(&dvp->v_namecache);
	1612	if (nch->ncp)
	1613	cache_hold(nch);
	1614	spin_unlock_wr(&dvp->v_spinlock);
	1615	}
	1616
	1617	/*
	1618	* Loop until resolution, inside code will break out on error.
	1619	*/
	1620	while (makeit) {
	1621	/*
	1622	* Break out if we successfully acquire a working ncp.
	1623	*/
	1624	spin_lock_wr(&dvp->v_spinlock);
	1625	nch->ncp = TAILQ_FIRST(&dvp->v_namecache);
	1626	if (nch->ncp) {
	1627	cache_hold(nch);
	1628	spin_unlock_wr(&dvp->v_spinlock);
	1629	break;
	1630	}
	1631	spin_unlock_wr(&dvp->v_spinlock);
	1632
	1633	/*
	1634	* If dvp is the root of its filesystem it should already
	1635	* have a namecache pointer associated with it as a side
	1636	* effect of the mount, but it may have been disassociated.
	1637	*/
	1638	if (dvp->v_flag & VROOT) {
	1639	nch->ncp = _cache_get(nch->mount->mnt_ncmountpt.ncp);
	1640	error = cache_resolve_mp(nch->mount);
	1641	_cache_put(nch->ncp);
	1642	if (ncvp_debug) {
	1643	kprintf("cache_fromdvp: resolve root of mount %p error %d",
	1644	dvp->v_mount, error);
	1645	}
	1646	if (error) {
	1647	if (ncvp_debug)
	1648	kprintf(" failed\n");
	1649	nch->ncp = NULL;
	1650	break;
	1651	}
	1652	if (ncvp_debug)
	1653	kprintf(" succeeded\n");
	1654	continue;
	1655	}
	1656
	1657	/*
	1658	* If we are recursed too deeply resort to an O(n^2)
	1659	* algorithm to resolve the namecache topology. The
	1660	* resolved pvp is left referenced in saved_dvp to
	1661	* prevent the tree from being destroyed while we loop.
	1662	*/
	1663	if (makeit > 20) {
	1664	error = cache_fromdvp_try(dvp, cred, &saved_dvp);
	1665	if (error) {
	1666	kprintf("lookupdotdot(longpath) failed %d "
	1667	"dvp %p\n", error, dvp);
	1668	nch->ncp = NULL;
	1669	break;
	1670	}
	1671	continue;
	1672	}
	1673
	1674	/*
	1675	* Get the parent directory and resolve its ncp.
	1676	*/
	1677	if (fakename) {
	1678	kfree(fakename, M_TEMP);
	1679	fakename = NULL;
	1680	}
	1681	error = vop_nlookupdotdot(*dvp->v_ops, dvp, &pvp, cred,
	1682	&fakename);
	1683	if (error) {
	1684	kprintf("lookupdotdot failed %d dvp %p\n", error, dvp);
	1685	break;
	1686	}
	1687	vn_unlock(pvp);
	1688
	1689	/*
	1690	* Reuse makeit as a recursion depth counter. On success
	1691	* nch will be fully referenced.
	1692	*/
	1693	cache_fromdvp(pvp, cred, makeit + 1, nch);
	1694	vrele(pvp);
	1695	if (nch->ncp == NULL)
	1696	break;
	1697
	1698	/*
	1699	* Do an inefficient scan of pvp (embodied by ncp) to look
	1700	* for dvp. This will create a namecache record for dvp on
	1701	* success. We loop up to recheck on success.
	1702	*
	1703	* ncp and dvp are both held but not locked.
	1704	*/
	1705	error = cache_inefficient_scan(nch, cred, dvp, fakename);
	1706	if (error) {
	1707	kprintf("cache_fromdvp: scan %p (%s) failed on dvp=%p\n",
	1708	pvp, nch->ncp->nc_name, dvp);
	1709	cache_drop(nch);
	1710	/* nch was NULLed out, reload mount */
	1711	nch->mount = dvp->v_mount;
	1712	break;
	1713	}
	1714	if (ncvp_debug) {
	1715	kprintf("cache_fromdvp: scan %p (%s) succeeded\n",
	1716	pvp, nch->ncp->nc_name);
	1717	}
	1718	cache_drop(nch);
	1719	/* nch was NULLed out, reload mount */
	1720	nch->mount = dvp->v_mount;
	1721	}
	1722
	1723	/*
	1724	* If nch->ncp is non-NULL it will have been held already.
	1725	*/
	1726	if (fakename)
	1727	kfree(fakename, M_TEMP);
	1728	if (saved_dvp)
	1729	vrele(saved_dvp);
	1730	if (nch->ncp)
	1731	return (0);
	1732	return (EINVAL);
	1733	}
	1734
	1735	/*
	1736	* Go up the chain of parent directories until we find something
	1737	* we can resolve into the namecache. This is very inefficient.
	1738	*/
	1739	static
	1740	int
	1741	cache_fromdvp_try(struct vnode dvp, struct ucred cred,
	1742	struct vnode **saved_dvp)
	1743	{
	1744	struct nchandle nch;
	1745	struct vnode *pvp;
	1746	int error;
	1747	static time_t last_fromdvp_report;
	1748	char *fakename;
	1749
	1750	/*
	1751	* Loop getting the parent directory vnode until we get something we
	1752	* can resolve in the namecache.
	1753	*/
	1754	vref(dvp);
	1755	nch.mount = dvp->v_mount;
	1756	nch.ncp = NULL;
	1757	fakename = NULL;
	1758
	1759	for (;;) {
	1760	if (fakename) {
	1761	kfree(fakename, M_TEMP);
	1762	fakename = NULL;
	1763	}
	1764	error = vop_nlookupdotdot(*dvp->v_ops, dvp, &pvp, cred,
	1765	&fakename);
	1766	if (error) {
	1767	vrele(dvp);
	1768	break;
	1769	}
	1770	vn_unlock(pvp);
	1771	spin_lock_wr(&pvp->v_spinlock);
	1772	if ((nch.ncp = TAILQ_FIRST(&pvp->v_namecache)) != NULL) {
	1773	_cache_hold(nch.ncp);
	1774	spin_unlock_wr(&pvp->v_spinlock);
	1775	vrele(pvp);
	1776	break;
	1777	}
	1778	spin_unlock_wr(&pvp->v_spinlock);
	1779	if (pvp->v_flag & VROOT) {
	1780	nch.ncp = _cache_get(pvp->v_mount->mnt_ncmountpt.ncp);
	1781	error = cache_resolve_mp(nch.mount);
	1782	_cache_unlock(nch.ncp);
	1783	vrele(pvp);
	1784	if (error) {
	1785	_cache_drop(nch.ncp);
	1786	nch.ncp = NULL;
	1787	vrele(dvp);
	1788	}
	1789	break;
	1790	}
	1791	vrele(dvp);
	1792	dvp = pvp;
	1793	}
	1794	if (error == 0) {
	1795	if (last_fromdvp_report != time_second) {
	1796	last_fromdvp_report = time_second;
	1797	kprintf("Warning: extremely inefficient path "
	1798	"resolution on %s\n",
	1799	nch.ncp->nc_name);
	1800	}
	1801	error = cache_inefficient_scan(&nch, cred, dvp, fakename);
	1802
	1803	/*
	1804	* Hopefully dvp now has a namecache record associated with
	1805	* it. Leave it referenced to prevent the kernel from
	1806	* recycling the vnode. Otherwise extremely long directory
	1807	* paths could result in endless recycling.
	1808	*/
	1809	if (*saved_dvp)
	1810	vrele(*saved_dvp);
	1811	*saved_dvp = dvp;
	1812	_cache_drop(nch.ncp);
	1813	}
	1814	if (fakename)
	1815	kfree(fakename, M_TEMP);
	1816	return (error);
	1817	}
	1818
	1819	/*
	1820	* Do an inefficient scan of the directory represented by ncp looking for
	1821	* the directory vnode dvp. ncp must be held but not locked on entry and
	1822	* will be held on return. dvp must be refd but not locked on entry and
	1823	* will remain refd on return.
	1824	*
	1825	* Why do this at all? Well, due to its stateless nature the NFS server
	1826	* converts file handles directly to vnodes without necessarily going through
	1827	* the namecache ops that would otherwise create the namecache topology
	1828	* leading to the vnode. We could either (1) Change the namecache algorithms
	1829	* to allow disconnect namecache records that are re-merged opportunistically,
	1830	* or (2) Make the NFS server backtrack and scan to recover a connected
	1831	* namecache topology in order to then be able to issue new API lookups.
	1832	*
	1833	* It turns out that (1) is a huge mess. It takes a nice clean set of
	1834	* namecache algorithms and introduces a lot of complication in every subsystem
	1835	* that calls into the namecache to deal with the re-merge case, especially
	1836	* since we are using the namecache to placehold negative lookups and the
	1837	* vnode might not be immediately assigned. (2) is certainly far less
	1838	* efficient then (1), but since we are only talking about directories here
	1839	* (which are likely to remain cached), the case does not actually run all
	1840	* that often and has the supreme advantage of not polluting the namecache
	1841	* algorithms.
	1842	*
	1843	* If a fakename is supplied just construct a namecache entry using the
	1844	* fake name.
	1845	*/
	1846	static int
	1847	cache_inefficient_scan(struct nchandle nch, struct ucred cred,
	1848	struct vnode dvp, char fakename)
	1849	{
	1850	struct nlcomponent nlc;
	1851	struct nchandle rncp;
	1852	struct dirent *den;
	1853	struct vnode *pvp;
	1854	struct vattr vat;
	1855	struct iovec iov;
	1856	struct uio uio;
	1857	int blksize;
	1858	int eofflag;
	1859	int bytes;
	1860	char *rbuf;
	1861	int error;
	1862
	1863	vat.va_blocksize = 0;
	1864	if ((error = VOP_GETATTR(dvp, &vat)) != 0)
	1865	return (error);
	1866	cache_lock(nch);
	1867	error = cache_vref(nch, cred, &pvp);
	1868	cache_unlock(nch);
	1869	if (error)
	1870	return (error);
	1871	if (ncvp_debug) {
	1872	kprintf("inefficient_scan: directory iosize %ld "
	1873	"vattr fileid = %lld\n",
	1874	vat.va_blocksize,
	1875	(long long)vat.va_fileid);
	1876	}
	1877
	1878	/*
	1879	* Use the supplied fakename if not NULL. Fake names are typically
	1880	* not in the actual filesystem hierarchy. This is used by HAMMER
	1881	* to glue @@timestamp recursions together.
	1882	*/
	1883	if (fakename) {
	1884	nlc.nlc_nameptr = fakename;
	1885	nlc.nlc_namelen = strlen(fakename);
	1886	rncp = cache_nlookup(nch, &nlc);
	1887	goto done;
	1888	}
	1889
	1890	if ((blksize = vat.va_blocksize) == 0)
	1891	blksize = DEV_BSIZE;
	1892	rbuf = kmalloc(blksize, M_TEMP, M_WAITOK);
	1893	rncp.ncp = NULL;
	1894
	1895	eofflag = 0;
	1896	uio.uio_offset = 0;
	1897	again:
	1898	iov.iov_base = rbuf;
	1899	iov.iov_len = blksize;
	1900	uio.uio_iov = &iov;
	1901	uio.uio_iovcnt = 1;
	1902	uio.uio_resid = blksize;
	1903	uio.uio_segflg = UIO_SYSSPACE;
	1904	uio.uio_rw = UIO_READ;
	1905	uio.uio_td = curthread;
	1906
	1907	if (ncvp_debug >= 2)
	1908	kprintf("cache_inefficient_scan: readdir @ %08x\n", (int)uio.uio_offset);
	1909	error = VOP_READDIR(pvp, &uio, cred, &eofflag, NULL, NULL);
	1910	if (error == 0) {
	1911	den = (struct dirent *)rbuf;
	1912	bytes = blksize - uio.uio_resid;
	1913
	1914	while (bytes > 0) {
	1915	if (ncvp_debug >= 2) {
	1916	kprintf("cache_inefficient_scan: %.s\n",
	1917	den->d_namlen, den->d_namlen,
	1918	den->d_name);
	1919	}
	1920	if (den->d_type != DT_WHT &&
	1921	den->d_ino == vat.va_fileid) {
	1922	if (ncvp_debug) {
	1923	kprintf("cache_inefficient_scan: "
	1924	"MATCHED inode %lld path %s/%.s\n",
	1925	(long long)vat.va_fileid,
	1926	nch->ncp->nc_name,
	1927	den->d_namlen, den->d_namlen,
	1928	den->d_name);
	1929	}
	1930	nlc.nlc_nameptr = den->d_name;
	1931	nlc.nlc_namelen = den->d_namlen;
	1932	rncp = cache_nlookup(nch, &nlc);
	1933	KKASSERT(rncp.ncp != NULL);
	1934	break;
	1935	}
	1936	bytes -= _DIRENT_DIRSIZ(den);
	1937	den = _DIRENT_NEXT(den);
	1938	}
	1939	if (rncp.ncp == NULL && eofflag == 0 && uio.uio_resid != blksize)
	1940	goto again;
	1941	}
	1942	kfree(rbuf, M_TEMP);
	1943	done:
	1944	vrele(pvp);
	1945	if (rncp.ncp) {
	1946	if (rncp.ncp->nc_flag & NCF_UNRESOLVED) {
	1947	_cache_setvp(rncp.mount, rncp.ncp, dvp);
	1948	if (ncvp_debug >= 2) {
	1949	kprintf("cache_inefficient_scan: setvp %s/%s = %p\n",
	1950	nch->ncp->nc_name, rncp.ncp->nc_name, dvp);
	1951	}
	1952	} else {
	1953	if (ncvp_debug >= 2) {
	1954	kprintf("cache_inefficient_scan: setvp %s/%s already set %p/%p\n",
	1955	nch->ncp->nc_name, rncp.ncp->nc_name, dvp,
	1956	rncp.ncp->nc_vp);
	1957	}
	1958	}
	1959	if (rncp.ncp->nc_vp == NULL)
	1960	error = rncp.ncp->nc_error;
	1961	/*
	1962	* Release rncp after a successful nlookup. rncp was fully
	1963	* referenced.
	1964	*/
	1965	cache_put(&rncp);
	1966	} else {
	1967	kprintf("cache_inefficient_scan: dvp %p NOT FOUND in %s\n",
	1968	dvp, nch->ncp->nc_name);
	1969	error = ENOENT;
	1970	}
	1971	return (error);
	1972	}
	1973
	1974	/*
	1975	* Zap a namecache entry. The ncp is unconditionally set to an unresolved
	1976	* state, which disassociates it from its vnode or ncneglist.
	1977	*
	1978	* Then, if there are no additional references to the ncp and no children,
	1979	* the ncp is removed from the topology and destroyed.
	1980	*
	1981	* References and/or children may exist if the ncp is in the middle of the
	1982	* topology, preventing the ncp from being destroyed.
	1983	*
	1984	* This function must be called with the ncp held and locked and will unlock
	1985	* and drop it during zapping.
	1986	*
	1987	* If nonblock is non-zero and the parent ncp cannot be locked we give up.
	1988	* This case can occur in the cache_drop() path.
	1989	*
	1990	* This function may returned a held (but NOT locked) parent node which the
	1991	* caller must drop. We do this so _cache_drop() can loop, to avoid
	1992	* blowing out the kernel stack.
	1993	*
	1994	* WARNING! For MPSAFE operation this routine must acquire up to three
	1995	* spin locks to be able to safely test nc_refs. Lock order is
	1996	* very important.
	1997	*
	1998	* hash spinlock if on hash list
	1999	* parent spinlock if child of parent
	2000	* (the ncp is unresolved so there is no vnode association)
	2001	*/
	2002	static struct namecache *
	2003	cache_zap(struct namecache *ncp, int nonblock)
	2004	{
	2005	struct namecache *par;
	2006	struct vnode *dropvp;
	2007	int refs;
	2008
	2009	/*
	2010	* Disassociate the vnode or negative cache ref and set NCF_UNRESOLVED.
	2011	*/
	2012	_cache_setunresolved(ncp);
	2013
	2014	/*
	2015	* Try to scrap the entry and possibly tail-recurse on its parent.
	2016	* We only scrap unref'd (other then our ref) unresolved entries,
	2017	* we do not scrap 'live' entries.
	2018	*
	2019	* Note that once the spinlocks are acquired if nc_refs == 1 no
	2020	* other references are possible. If it isn't, however, we have
	2021	* to decrement but also be sure to avoid a 1->0 transition.
	2022	*/
	2023	KKASSERT(ncp->nc_flag & NCF_UNRESOLVED);
	2024	KKASSERT(ncp->nc_refs > 0);
	2025
	2026	/*
	2027	* Acquire locks. Note that the parent can't go away while we hold
	2028	* a child locked.
	2029	*/
	2030	if ((par = ncp->nc_parent) != NULL) {
	2031	if (nonblock) {
	2032	for (;;) {
	2033	if (_cache_lock_nonblock(par) == 0)
	2034	break;
	2035	refs = ncp->nc_refs;
	2036	ncp->nc_flag \|= NCF_DEFEREDZAP;
	2037	++numdefered; /* MP race ok */
	2038	if (atomic_cmpset_int(&ncp->nc_refs,
	2039	refs, refs - 1)) {
	2040	_cache_unlock(ncp);
	2041	return(NULL);
	2042	}
	2043	cpu_pause();
	2044	}
	2045	_cache_hold(par);
	2046	} else {
	2047	_cache_hold(par);
	2048	_cache_lock(par);
	2049	}
	2050	spin_lock_wr(&ncp->nc_head->spin);
	2051	}
	2052
	2053	/*
	2054	* If someone other then us has a ref or we have children
	2055	* we cannot zap the entry. The 1->0 transition and any
	2056	* further list operation is protected by the spinlocks
	2057	* we have acquired but other transitions are not.
	2058	*/
	2059	for (;;) {
	2060	refs = ncp->nc_refs;
	2061	if (refs == 1 && TAILQ_EMPTY(&ncp->nc_list))
	2062	break;
	2063	if (atomic_cmpset_int(&ncp->nc_refs, refs, refs - 1)) {
	2064	if (par) {
	2065	spin_unlock_wr(&ncp->nc_head->spin);
	2066	_cache_put(par);
	2067	}
	2068	_cache_unlock(ncp);
	2069	return(NULL);
	2070	}
	2071	cpu_pause();
	2072	}
	2073
	2074	/*
	2075	* We are the only ref and with the spinlocks held no further
	2076	* refs can be acquired by others.
	2077	*
	2078	* Remove us from the hash list and parent list. We have to
	2079	* drop a ref on the parent's vp if the parent's list becomes
	2080	* empty.
	2081	*/
	2082	dropvp = NULL;
	2083	if (par) {
	2084	struct nchash_head *nchpp = ncp->nc_head;
	2085
	2086	KKASSERT(nchpp != NULL);
	2087	LIST_REMOVE(ncp, nc_hash);
	2088	TAILQ_REMOVE(&par->nc_list, ncp, nc_entry);
	2089	if (par->nc_vp && TAILQ_EMPTY(&par->nc_list))
	2090	dropvp = par->nc_vp;
	2091	ncp->nc_head = NULL;
	2092	ncp->nc_parent = NULL;
	2093	spin_unlock_wr(&nchpp->spin);
	2094	_cache_unlock(par);
	2095	} else {
	2096	KKASSERT(ncp->nc_head == NULL);
	2097	}
	2098
	2099	/*
	2100	* ncp should not have picked up any refs. Physically
	2101	* destroy the ncp.
	2102	*/
	2103	KKASSERT(ncp->nc_refs == 1);
	2104	/* _cache_unlock(ncp) not required */
	2105	ncp->nc_refs = -1; /* safety */
	2106	if (ncp->nc_name)
	2107	kfree(ncp->nc_name, M_VFSCACHE);
	2108	kfree(ncp, M_VFSCACHE);
	2109
	2110	/*
	2111	* Delayed drop (we had to release our spinlocks)
	2112	*
	2113	* The refed parent (if not NULL) must be dropped. The
	2114	* caller is responsible for looping.
	2115	*/
	2116	if (dropvp)
	2117	vdrop(dropvp);
	2118	return(par);
	2119	}
	2120
	2121	/*
	2122	* Clean up dangling negative cache and defered-drop entries in the
	2123	* namecache.
	2124	*/
	2125	static enum { CHI_LOW, CHI_HIGH } cache_hysteresis_state = CHI_LOW;
	2126
	2127	void
	2128	cache_hysteresis(void)
	2129	{
	2130	/*
	2131	* Don't cache too many negative hits. We use hysteresis to reduce
	2132	* the impact on the critical path.
	2133	*/
	2134	switch(cache_hysteresis_state) {
	2135	case CHI_LOW:
	2136	if (numneg > MINNEG && numneg * ncnegfactor > numcache) {
	2137	_cache_cleanneg(10);
	2138	cache_hysteresis_state = CHI_HIGH;
	2139	}
	2140	break;
	2141	case CHI_HIGH:
	2142	if (numneg > MINNEG * 9 / 10 &&
	2143	numneg * ncnegfactor * 9 / 10 > numcache
	2144	) {
	2145	_cache_cleanneg(10);
	2146	} else {
	2147	cache_hysteresis_state = CHI_LOW;
	2148	}
	2149	break;
	2150	}
	2151
	2152	/*
	2153	* Clean out dangling defered-zap ncps which could not
	2154	* be cleanly dropped if too many build up. Note
	2155	* that numdefered is not an exact number as such ncps
	2156	* can be reused and the counter is not handled in a MP
	2157	* safe manner by design.
	2158	*/
	2159	if (numdefered * ncnegfactor > numcache) {
	2160	_cache_cleandefered();
	2161	}
	2162	}
	2163
	2164	/*
	2165	* NEW NAMECACHE LOOKUP API
	2166	*
	2167	* Lookup an entry in the namecache. The passed par_nch must be referenced
	2168	* and unlocked. A referenced and locked nchandle with a non-NULL nch.ncp
	2169	* is ALWAYS returned, eve if the supplied component is illegal.
	2170	*
	2171	* The resulting namecache entry should be returned to the system with
	2172	* cache_put() or cache_unlock() + cache_drop().
	2173	*
	2174	* namecache locks are recursive but care must be taken to avoid lock order
	2175	* reversals (hence why the passed par_nch must be unlocked). Locking
	2176	* rules are to order for parent traversals, not for child traversals.
	2177	*
	2178	* Nobody else will be able to manipulate the associated namespace (e.g.
	2179	* create, delete, rename, rename-target) until the caller unlocks the
	2180	* entry.
	2181	*
	2182	* The returned entry will be in one of three states: positive hit (non-null
	2183	* vnode), negative hit (null vnode), or unresolved (NCF_UNRESOLVED is set).
	2184	* Unresolved entries must be resolved through the filesystem to associate the
	2185	* vnode and/or determine whether a positive or negative hit has occured.
	2186	*
	2187	* It is not necessary to lock a directory in order to lock namespace under
	2188	* that directory. In fact, it is explicitly not allowed to do that. A
	2189	* directory is typically only locked when being created, renamed, or
	2190	* destroyed.
	2191	*
	2192	* The directory (par) may be unresolved, in which case any returned child
	2193	* will likely also be marked unresolved. Likely but not guarenteed. Since
	2194	* the filesystem lookup requires a resolved directory vnode the caller is
	2195	* responsible for resolving the namecache chain top-down. This API
	2196	* specifically allows whole chains to be created in an unresolved state.
	2197	*/
	2198	struct nchandle
	2199	cache_nlookup(struct nchandle par_nch, struct nlcomponent nlc)
	2200	{
	2201	struct nchandle nch;
	2202	struct namecache *ncp;
	2203	struct namecache *new_ncp;
	2204	struct nchash_head *nchpp;
	2205	struct mount *mp;
	2206	u_int32_t hash;
	2207	globaldata_t gd;
	2208	int par_locked;
	2209
	2210	numcalls++;
	2211	gd = mycpu;
	2212	mp = par_nch->mount;
	2213	par_locked = 0;
	2214
	2215	/*
	2216	* This is a good time to call it, no ncp's are locked by
	2217	* the caller or us.
	2218	*/
	2219	cache_hysteresis();
	2220
	2221	/*
	2222	* Try to locate an existing entry
	2223	*/
	2224	hash = fnv_32_buf(nlc->nlc_nameptr, nlc->nlc_namelen, FNV1_32_INIT);
	2225	hash = fnv_32_buf(&par_nch->ncp, sizeof(par_nch->ncp), hash);
	2226	new_ncp = NULL;
	2227	nchpp = NCHHASH(hash);
	2228	restart:
	2229	spin_lock_wr(&nchpp->spin);
	2230	LIST_FOREACH(ncp, &nchpp->list, nc_hash) {
	2231	numchecks++;
	2232
	2233	/*
	2234	* Break out if we find a matching entry. Note that
	2235	* UNRESOLVED entries may match, but DESTROYED entries
	2236	* do not.
	2237	*/
	2238	if (ncp->nc_parent == par_nch->ncp &&
	2239	ncp->nc_nlen == nlc->nlc_namelen &&
	2240	bcmp(ncp->nc_name, nlc->nlc_nameptr, ncp->nc_nlen) == 0 &&
	2241	(ncp->nc_flag & NCF_DESTROYED) == 0
	2242	) {
	2243	_cache_hold(ncp);
	2244	spin_unlock_wr(&nchpp->spin);
	2245	if (par_locked) {
	2246	_cache_unlock(par_nch->ncp);
	2247	par_locked = 0;
	2248	}
	2249	if (_cache_lock_special(ncp) == 0) {
	2250	_cache_auto_unresolve(mp, ncp);
	2251	if (new_ncp)
	2252	_cache_free(new_ncp);
	2253	goto found;
	2254	}
	2255	_cache_get(ncp);
	2256	_cache_put(ncp);
	2257	_cache_drop(ncp);
	2258	goto restart;
	2259	}
	2260	}
	2261
	2262	/*
	2263	* We failed to locate an entry, create a new entry and add it to
	2264	* the cache. The parent ncp must also be locked so we
	2265	* can link into it.
	2266	*
	2267	* We have to relookup after possibly blocking in kmalloc or
	2268	* when locking par_nch.
	2269	*
	2270	* NOTE: nlc_namelen can be 0 and nlc_nameptr NULL as a special
	2271	* mount case, in which case nc_name will be NULL.
	2272	*/
	2273	if (new_ncp == NULL) {
	2274	spin_unlock_wr(&nchpp->spin);
	2275	new_ncp = cache_alloc(nlc->nlc_namelen);
	2276	if (nlc->nlc_namelen) {
	2277	bcopy(nlc->nlc_nameptr, new_ncp->nc_name,
	2278	nlc->nlc_namelen);
	2279	new_ncp->nc_name[nlc->nlc_namelen] = 0;
	2280	}
	2281	goto restart;
	2282	}
	2283	if (par_locked == 0) {
	2284	spin_unlock_wr(&nchpp->spin);
	2285	_cache_lock(par_nch->ncp);
	2286	par_locked = 1;
	2287	goto restart;
	2288	}
	2289
	2290	/*
	2291	* WARNING! We still hold the spinlock. We have to set the hash
	2292	* table entry atomically.
	2293	*/
	2294	ncp = new_ncp;
	2295	_cache_link_parent(ncp, par_nch->ncp, nchpp);
	2296	spin_unlock_wr(&nchpp->spin);
	2297	_cache_unlock(par_nch->ncp);
	2298	/* par_locked = 0 - not used */
	2299	found:
	2300	/*
	2301	* stats and namecache size management
	2302	*/
	2303	if (ncp->nc_flag & NCF_UNRESOLVED)
	2304	++gd->gd_nchstats->ncs_miss;
	2305	else if (ncp->nc_vp)
	2306	++gd->gd_nchstats->ncs_goodhits;
	2307	else
	2308	++gd->gd_nchstats->ncs_neghits;
	2309	nch.mount = mp;
	2310	nch.ncp = ncp;
	2311	atomic_add_int(&nch.mount->mnt_refs, 1);
	2312	return(nch);
	2313	}
	2314
	2315	/*
	2316	* This is a non-blocking verison of cache_nlookup() used by
	2317	* nfs_readdirplusrpc_uio(). It can fail for any reason and
	2318	* will return nch.ncp == NULL in that case.
	2319	*/
	2320	struct nchandle
	2321	cache_nlookup_nonblock(struct nchandle par_nch, struct nlcomponent nlc)
	2322	{
	2323	struct nchandle nch;
	2324	struct namecache *ncp;
	2325	struct namecache *new_ncp;
	2326	struct nchash_head *nchpp;
	2327	struct mount *mp;
	2328	u_int32_t hash;
	2329	globaldata_t gd;
	2330	int par_locked;
	2331
	2332	numcalls++;
	2333	gd = mycpu;
	2334	mp = par_nch->mount;
	2335	par_locked = 0;
	2336
	2337	/*
	2338	* Try to locate an existing entry
	2339	*/
	2340	hash = fnv_32_buf(nlc->nlc_nameptr, nlc->nlc_namelen, FNV1_32_INIT);
	2341	hash = fnv_32_buf(&par_nch->ncp, sizeof(par_nch->ncp), hash);
	2342	new_ncp = NULL;
	2343	nchpp = NCHHASH(hash);
	2344	restart:
	2345	spin_lock_wr(&nchpp->spin);
	2346	LIST_FOREACH(ncp, &nchpp->list, nc_hash) {
	2347	numchecks++;
	2348
	2349	/*
	2350	* Break out if we find a matching entry. Note that
	2351	* UNRESOLVED entries may match, but DESTROYED entries
	2352	* do not.
	2353	*/
	2354	if (ncp->nc_parent == par_nch->ncp &&
	2355	ncp->nc_nlen == nlc->nlc_namelen &&
	2356	bcmp(ncp->nc_name, nlc->nlc_nameptr, ncp->nc_nlen) == 0 &&
	2357	(ncp->nc_flag & NCF_DESTROYED) == 0
	2358	) {
	2359	_cache_hold(ncp);
	2360	spin_unlock_wr(&nchpp->spin);
	2361	if (par_locked) {
	2362	_cache_unlock(par_nch->ncp);
	2363	par_locked = 0;
	2364	}
	2365	if (_cache_lock_special(ncp) == 0) {
	2366	_cache_auto_unresolve(mp, ncp);
	2367	if (new_ncp) {
	2368	_cache_free(new_ncp);
	2369	new_ncp = NULL;
	2370	}
	2371	goto found;
	2372	}
	2373	_cache_drop(ncp);
	2374	goto failed;
	2375	}
	2376	}
	2377
	2378	/*
	2379	* We failed to locate an entry, create a new entry and add it to
	2380	* the cache. The parent ncp must also be locked so we
	2381	* can link into it.
	2382	*
	2383	* We have to relookup after possibly blocking in kmalloc or
	2384	* when locking par_nch.
	2385	*
	2386	* NOTE: nlc_namelen can be 0 and nlc_nameptr NULL as a special
	2387	* mount case, in which case nc_name will be NULL.
	2388	*/
	2389	if (new_ncp == NULL) {
	2390	spin_unlock_wr(&nchpp->spin);
	2391	new_ncp = cache_alloc(nlc->nlc_namelen);
	2392	if (nlc->nlc_namelen) {
	2393	bcopy(nlc->nlc_nameptr, new_ncp->nc_name,
	2394	nlc->nlc_namelen);
	2395	new_ncp->nc_name[nlc->nlc_namelen] = 0;
	2396	}
	2397	goto restart;
	2398	}
	2399	if (par_locked == 0) {
	2400	spin_unlock_wr(&nchpp->spin);
	2401	if (_cache_lock_nonblock(par_nch->ncp) == 0) {
	2402	par_locked = 1;
	2403	goto restart;
	2404	}
	2405	goto failed;
	2406	}
	2407
	2408	/*
	2409	* WARNING! We still hold the spinlock. We have to set the hash
	2410	* table entry atomically.
	2411	*/
	2412	ncp = new_ncp;
	2413	_cache_link_parent(ncp, par_nch->ncp, nchpp);
	2414	spin_unlock_wr(&nchpp->spin);
	2415	_cache_unlock(par_nch->ncp);
	2416	/* par_locked = 0 - not used */
	2417	found:
	2418	/*
	2419	* stats and namecache size management
	2420	*/
	2421	if (ncp->nc_flag & NCF_UNRESOLVED)
	2422	++gd->gd_nchstats->ncs_miss;
	2423	else if (ncp->nc_vp)
	2424	++gd->gd_nchstats->ncs_goodhits;
	2425	else
	2426	++gd->gd_nchstats->ncs_neghits;
	2427	nch.mount = mp;
	2428	nch.ncp = ncp;
	2429	atomic_add_int(&nch.mount->mnt_refs, 1);
	2430	return(nch);
	2431	failed:
	2432	if (new_ncp) {
	2433	_cache_free(new_ncp);
	2434	new_ncp = NULL;
	2435	}
	2436	nch.mount = NULL;
	2437	nch.ncp = NULL;
	2438	return(nch);
	2439	}
	2440
	2441	/*
	2442	* The namecache entry is marked as being used as a mount point.
	2443	* Locate the mount if it is visible to the caller.
	2444	*/
	2445	struct findmount_info {
	2446	struct mount *result;
	2447	struct mount *nch_mount;
	2448	struct namecache *nch_ncp;
	2449	};
	2450
	2451	static
	2452	int
	2453	cache_findmount_callback(struct mount mp, void data)
	2454	{
	2455	struct findmount_info *info = data;
	2456
	2457	/*
	2458	* Check the mount's mounted-on point against the passed nch.
	2459	*/
	2460	if (mp->mnt_ncmounton.mount == info->nch_mount &&
	2461	mp->mnt_ncmounton.ncp == info->nch_ncp
	2462	) {
	2463	info->result = mp;
	2464	return(-1);
	2465	}
	2466	return(0);
	2467	}
	2468
	2469	struct mount *
	2470	cache_findmount(struct nchandle *nch)
	2471	{
	2472	struct findmount_info info;
	2473
	2474	info.result = NULL;
	2475	info.nch_mount = nch->mount;
	2476	info.nch_ncp = nch->ncp;
	2477	mountlist_scan(cache_findmount_callback, &info,
	2478	MNTSCAN_FORWARD\|MNTSCAN_NOBUSY);
	2479	return(info.result);
	2480	}
	2481
	2482	/*
	2483	* Resolve an unresolved namecache entry, generally by looking it up.
	2484	* The passed ncp must be locked and refd.
	2485	*
	2486	* Theoretically since a vnode cannot be recycled while held, and since
	2487	* the nc_parent chain holds its vnode as long as children exist, the
	2488	* direct parent of the cache entry we are trying to resolve should
	2489	* have a valid vnode. If not then generate an error that we can
	2490	* determine is related to a resolver bug.
	2491	*
	2492	* However, if a vnode was in the middle of a recyclement when the NCP
	2493	* got locked, ncp->nc_vp might point to a vnode that is about to become
	2494	* invalid. cache_resolve() handles this case by unresolving the entry
	2495	* and then re-resolving it.
	2496	*
	2497	* Note that successful resolution does not necessarily return an error
	2498	* code of 0. If the ncp resolves to a negative cache hit then ENOENT
	2499	* will be returned.
	2500	*
	2501	* MPSAFE
	2502	*/
	2503	int
	2504	cache_resolve(struct nchandle nch, struct ucred cred)
	2505	{
	2506	struct namecache *par_tmp;
	2507	struct namecache *par;
	2508	struct namecache *ncp;
	2509	struct nchandle nctmp;
	2510	struct mount *mp;
	2511	struct vnode *dvp;
	2512	int error;
	2513
	2514	ncp = nch->ncp;
	2515	mp = nch->mount;
	2516	restart:
	2517	/*
	2518	* If the ncp is already resolved we have nothing to do. However,
	2519	* we do want to guarentee that a usable vnode is returned when
	2520	* a vnode is present, so make sure it hasn't been reclaimed.
	2521	*/
	2522	if ((ncp->nc_flag & NCF_UNRESOLVED) == 0) {
	2523	if (ncp->nc_vp && (ncp->nc_vp->v_flag & VRECLAIMED))
	2524	_cache_setunresolved(ncp);
	2525	if ((ncp->nc_flag & NCF_UNRESOLVED) == 0)
	2526	return (ncp->nc_error);
	2527	}
	2528
	2529	/*
	2530	* Mount points need special handling because the parent does not
	2531	* belong to the same filesystem as the ncp.
	2532	*/
	2533	if (ncp == mp->mnt_ncmountpt.ncp)
	2534	return (cache_resolve_mp(mp));
	2535
	2536	/*
	2537	* We expect an unbroken chain of ncps to at least the mount point,
	2538	* and even all the way to root (but this code doesn't have to go
	2539	* past the mount point).
	2540	*/
	2541	if (ncp->nc_parent == NULL) {
	2542	kprintf("EXDEV case 1 %p %.s\n", ncp,
	2543	ncp->nc_nlen, ncp->nc_nlen, ncp->nc_name);
	2544	ncp->nc_error = EXDEV;
	2545	return(ncp->nc_error);
	2546	}
	2547
	2548	/*
	2549	* The vp's of the parent directories in the chain are held via vhold()
	2550	* due to the existance of the child, and should not disappear.
	2551	* However, there are cases where they can disappear:
	2552	*
	2553	* - due to filesystem I/O errors.
	2554	* - due to NFS being stupid about tracking the namespace and
	2555	* destroys the namespace for entire directories quite often.
	2556	* - due to forced unmounts.
	2557	* - due to an rmdir (parent will be marked DESTROYED)
	2558	*
	2559	* When this occurs we have to track the chain backwards and resolve
	2560	* it, looping until the resolver catches up to the current node. We
	2561	* could recurse here but we might run ourselves out of kernel stack
	2562	* so we do it in a more painful manner. This situation really should
	2563	* not occur all that often, or if it does not have to go back too
	2564	* many nodes to resolve the ncp.
	2565	*/
	2566	while ((dvp = cache_dvpref(ncp)) == NULL) {
	2567	/*
	2568	* This case can occur if a process is CD'd into a
	2569	* directory which is then rmdir'd. If the parent is marked
	2570	* destroyed there is no point trying to resolve it.
	2571	*/
	2572	if (ncp->nc_parent->nc_flag & NCF_DESTROYED)
	2573	return(ENOENT);
	2574	par = ncp->nc_parent;
	2575	_cache_hold(par);
	2576	_cache_lock(par);
	2577	while ((par_tmp = par->nc_parent) != NULL &&
	2578	par_tmp->nc_vp == NULL) {
	2579	_cache_hold(par_tmp);
	2580	_cache_lock(par_tmp);
	2581	_cache_put(par);
	2582	par = par_tmp;
	2583	}
	2584	if (par->nc_parent == NULL) {
	2585	kprintf("EXDEV case 2 %.s\n",
	2586	par->nc_nlen, par->nc_nlen, par->nc_name);
	2587	_cache_put(par);
	2588	return (EXDEV);
	2589	}
	2590	kprintf("[diagnostic] cache_resolve: had to recurse on %.s\n",
	2591	par->nc_nlen, par->nc_nlen, par->nc_name);
	2592	/*
	2593	* The parent is not set in stone, ref and lock it to prevent
	2594	* it from disappearing. Also note that due to renames it
	2595	* is possible for our ncp to move and for par to no longer
	2596	* be one of its parents. We resolve it anyway, the loop
	2597	* will handle any moves.
	2598	*/
	2599	_cache_get(par); /* additional hold/lock */
	2600	_cache_put(par); /* from earlier hold/lock */
	2601	if (par == nch->mount->mnt_ncmountpt.ncp) {
	2602	cache_resolve_mp(nch->mount);
	2603	} else if ((dvp = cache_dvpref(par)) == NULL) {
	2604	kprintf("[diagnostic] cache_resolve: raced on %.s\n", par->nc_nlen, par->nc_nlen, par->nc_name);
	2605	_cache_put(par);
	2606	continue;
	2607	} else {
	2608	if (par->nc_flag & NCF_UNRESOLVED) {
	2609	nctmp.mount = mp;
	2610	nctmp.ncp = par;
	2611	par->nc_error = VOP_NRESOLVE(&nctmp, dvp, cred);
	2612	}
	2613	vrele(dvp);
	2614	}
	2615	if ((error = par->nc_error) != 0) {
	2616	if (par->nc_error != EAGAIN) {
	2617	kprintf("EXDEV case 3 %.s error %d\n",
	2618	par->nc_nlen, par->nc_nlen, par->nc_name,
	2619	par->nc_error);
	2620	_cache_put(par);
	2621	return(error);
	2622	}
	2623	kprintf("[diagnostic] cache_resolve: EAGAIN par %p %.s\n",
	2624	par, par->nc_nlen, par->nc_nlen, par->nc_name);
	2625	}
	2626	_cache_put(par);
	2627	/* loop */
	2628	}
	2629
	2630	/*
	2631	* Call VOP_NRESOLVE() to get the vp, then scan for any disconnected
	2632	* ncp's and reattach them. If this occurs the original ncp is marked
	2633	* EAGAIN to force a relookup.
	2634	*
	2635	* NOTE: in order to call VOP_NRESOLVE(), the parent of the passed
	2636	* ncp must already be resolved.
	2637	*/
	2638	if (dvp) {
	2639	nctmp.mount = mp;
	2640	nctmp.ncp = ncp;
	2641	ncp->nc_error = VOP_NRESOLVE(&nctmp, dvp, cred);
	2642	vrele(dvp);
	2643	} else {
	2644	ncp->nc_error = EPERM;
	2645	}
	2646	if (ncp->nc_error == EAGAIN) {
	2647	kprintf("[diagnostic] cache_resolve: EAGAIN ncp %p %.s\n",
	2648	ncp, ncp->nc_nlen, ncp->nc_nlen, ncp->nc_name);
	2649	goto restart;
	2650	}
	2651	return(ncp->nc_error);
	2652	}
	2653
	2654	/*
	2655	* Resolve the ncp associated with a mount point. Such ncp's almost always
	2656	* remain resolved and this routine is rarely called. NFS MPs tends to force
	2657	* re-resolution more often due to its mac-truck-smash-the-namecache
	2658	* method of tracking namespace changes.
	2659	*
	2660	* The semantics for this call is that the passed ncp must be locked on
	2661	* entry and will be locked on return. However, if we actually have to
	2662	* resolve the mount point we temporarily unlock the entry in order to
	2663	* avoid race-to-root deadlocks due to e.g. dead NFS mounts. Because of
	2664	* the unlock we have to recheck the flags after we relock.
	2665	*/
	2666	static int
	2667	cache_resolve_mp(struct mount *mp)
	2668	{
	2669	struct namecache *ncp = mp->mnt_ncmountpt.ncp;
	2670	struct vnode *vp;
	2671	int error;
	2672
	2673	KKASSERT(mp != NULL);
	2674
	2675	/*
	2676	* If the ncp is already resolved we have nothing to do. However,
	2677	* we do want to guarentee that a usable vnode is returned when
	2678	* a vnode is present, so make sure it hasn't been reclaimed.
	2679	*/
	2680	if ((ncp->nc_flag & NCF_UNRESOLVED) == 0) {
	2681	if (ncp->nc_vp && (ncp->nc_vp->v_flag & VRECLAIMED))
	2682	_cache_setunresolved(ncp);
	2683	}
	2684
	2685	if (ncp->nc_flag & NCF_UNRESOLVED) {
	2686	_cache_unlock(ncp);
	2687	while (vfs_busy(mp, 0))
	2688	;
	2689	error = VFS_ROOT(mp, &vp);
	2690	_cache_lock(ncp);
	2691
	2692	/*
	2693	* recheck the ncp state after relocking.
	2694	*/
	2695	if (ncp->nc_flag & NCF_UNRESOLVED) {
	2696	ncp->nc_error = error;
	2697	if (error == 0) {
	2698	_cache_setvp(mp, ncp, vp);
	2699	vput(vp);
	2700	} else {
	2701	kprintf("[diagnostic] cache_resolve_mp: failed"
	2702	" to resolve mount %p err=%d ncp=%p\n",
	2703	mp, error, ncp);
	2704	_cache_setvp(mp, ncp, NULL);
	2705	}
	2706	} else if (error == 0) {
	2707	vput(vp);
	2708	}
	2709	vfs_unbusy(mp);
	2710	}
	2711	return(ncp->nc_error);
	2712	}
	2713
	2714	/*
	2715	* Clean out negative cache entries when too many have accumulated.
	2716	*
	2717	* MPSAFE
	2718	*/
	2719	static void
	2720	_cache_cleanneg(int count)
	2721	{
	2722	struct namecache *ncp;
	2723
	2724	/*
	2725	* Automode from the vnlru proc - clean out 10% of the negative cache
	2726	* entries.
	2727	*/
	2728	if (count == 0)
	2729	count = numneg / 10 + 1;
	2730
	2731	/*
	2732	* Attempt to clean out the specified number of negative cache
	2733	* entries.
	2734	*/
	2735	while (count) {
	2736	spin_lock_wr(&ncspin);
	2737	ncp = TAILQ_FIRST(&ncneglist);
	2738	if (ncp == NULL) {
	2739	spin_unlock_wr(&ncspin);
	2740	break;
	2741	}
	2742	TAILQ_REMOVE(&ncneglist, ncp, nc_vnode);
	2743	TAILQ_INSERT_TAIL(&ncneglist, ncp, nc_vnode);
	2744	_cache_hold(ncp);
	2745	spin_unlock_wr(&ncspin);
	2746	if (_cache_lock_special(ncp) == 0) {
	2747	ncp = cache_zap(ncp, 0);
	2748	if (ncp)
	2749	_cache_drop(ncp);
	2750	} else {
	2751	_cache_drop(ncp);
	2752	}
	2753	--count;
	2754	}
	2755	}
	2756
	2757	/*
	2758	* This is a kitchen sink function to clean out ncps which we
	2759	* tried to zap from cache_drop() but failed because we were
	2760	* unable to acquire the parent lock.
	2761	*
	2762	* Such entries can also be removed via cache_inval_vp(), such
	2763	* as when unmounting.
	2764	*
	2765	* MPSAFE
	2766	*/
	2767	static void
	2768	_cache_cleandefered(void)
	2769	{
	2770	struct nchash_head *nchpp;
	2771	struct namecache *ncp;
	2772	struct namecache dummy;
	2773	int i;
	2774
	2775	numdefered = 0;
	2776	bzero(&dummy, sizeof(dummy));
	2777	dummy.nc_flag = NCF_DESTROYED;
	2778
	2779	for (i = 0; i <= nchash; ++i) {
	2780	nchpp = &nchashtbl[i];
	2781
	2782	spin_lock_wr(&nchpp->spin);
	2783	LIST_INSERT_HEAD(&nchpp->list, &dummy, nc_hash);
	2784	ncp = &dummy;
	2785	while ((ncp = LIST_NEXT(ncp, nc_hash)) != NULL) {
	2786	if ((ncp->nc_flag & NCF_DEFEREDZAP) == 0)
	2787	continue;
	2788	LIST_REMOVE(&dummy, nc_hash);
	2789	LIST_INSERT_AFTER(ncp, &dummy, nc_hash);
	2790	_cache_hold(ncp);
	2791	spin_unlock_wr(&nchpp->spin);
	2792	if (_cache_lock_nonblock(ncp) == 0) {
	2793	ncp->nc_flag &= ~NCF_DEFEREDZAP;
	2794	_cache_unlock(ncp);
	2795	}
	2796	_cache_drop(ncp);
	2797	spin_lock_wr(&nchpp->spin);
	2798	ncp = &dummy;
	2799	}
	2800	LIST_REMOVE(&dummy, nc_hash);
	2801	spin_unlock_wr(&nchpp->spin);
	2802	}
	2803	}
	2804
	2805	/*
	2806	* Name cache initialization, from vfsinit() when we are booting
	2807	*/
	2808	void
	2809	nchinit(void)
	2810	{
	2811	int i;
	2812	globaldata_t gd;
	2813
	2814	/* initialise per-cpu namecache effectiveness statistics. */
	2815	for (i = 0; i < ncpus; ++i) {
	2816	gd = globaldata_find(i);
	2817	gd->gd_nchstats = &nchstats[i];
	2818	}
	2819	TAILQ_INIT(&ncneglist);
	2820	spin_init(&ncspin);
	2821	nchashtbl = hashinit_ext(desiredvnodes*2, sizeof(struct nchash_head),
	2822	M_VFSCACHE, &nchash);
	2823	for (i = 0; i <= (int)nchash; ++i) {
	2824	LIST_INIT(&nchashtbl[i].list);
	2825	spin_init(&nchashtbl[i].spin);
	2826	}
	2827	nclockwarn = 5 * hz;
	2828	}
	2829
	2830	/*
	2831	* Called from start_init() to bootstrap the root filesystem. Returns
	2832	* a referenced, unlocked namecache record.
	2833	*/
	2834	void
	2835	cache_allocroot(struct nchandle nch, struct mount mp, struct vnode *vp)
	2836	{
	2837	nch->ncp = cache_alloc(0);
	2838	nch->mount = mp;
	2839	atomic_add_int(&mp->mnt_refs, 1);
	2840	if (vp)
	2841	_cache_setvp(nch->mount, nch->ncp, vp);
	2842	}
	2843
	2844	/*
	2845	* vfs_cache_setroot()
	2846	*
	2847	* Create an association between the root of our namecache and
	2848	* the root vnode. This routine may be called several times during
	2849	* booting.
	2850	*
	2851	* If the caller intends to save the returned namecache pointer somewhere
	2852	* it must cache_hold() it.
	2853	*/
	2854	void
	2855	vfs_cache_setroot(struct vnode nvp, struct nchandle nch)
	2856	{
	2857	struct vnode *ovp;
	2858	struct nchandle onch;
	2859
	2860	ovp = rootvnode;
	2861	onch = rootnch;
	2862	rootvnode = nvp;
	2863	if (nch)
	2864	rootnch = *nch;
	2865	else
	2866	cache_zero(&rootnch);
	2867	if (ovp)
	2868	vrele(ovp);
	2869	if (onch.ncp)
	2870	cache_drop(&onch);
	2871	}
	2872
	2873	/*
	2874	* XXX OLD API COMPAT FUNCTION. This really messes up the new namecache
	2875	* topology and is being removed as quickly as possible. The new VOP_N*()
	2876	* API calls are required to make specific adjustments using the supplied
	2877	* ncp pointers rather then just bogusly purging random vnodes.
	2878	*
	2879	* Invalidate all namecache entries to a particular vnode as well as
	2880	* any direct children of that vnode in the namecache. This is a
	2881	* 'catch all' purge used by filesystems that do not know any better.
	2882	*
	2883	* Note that the linkage between the vnode and its namecache entries will
	2884	* be removed, but the namecache entries themselves might stay put due to
	2885	* active references from elsewhere in the system or due to the existance of
	2886	* the children. The namecache topology is left intact even if we do not
	2887	* know what the vnode association is. Such entries will be marked
	2888	* NCF_UNRESOLVED.
	2889	*/
	2890	void
	2891	cache_purge(struct vnode *vp)
	2892	{
	2893	cache_inval_vp(vp, CINV_DESTROY \| CINV_CHILDREN);
	2894	}
	2895
	2896	/*
	2897	* Flush all entries referencing a particular filesystem.
	2898	*
	2899	* Since we need to check it anyway, we will flush all the invalid
	2900	* entries at the same time.
	2901	*/
	2902	#if 0
	2903
	2904	void
	2905	cache_purgevfs(struct mount *mp)
	2906	{
	2907	struct nchash_head *nchpp;
	2908	struct namecache ncp, nnp;
	2909
	2910	/*
	2911	* Scan hash tables for applicable entries.
	2912	*/
	2913	for (nchpp = &nchashtbl[nchash]; nchpp >= nchashtbl; nchpp--) {
	2914	spin_lock_wr(&nchpp->spin); XXX
	2915	ncp = LIST_FIRST(&nchpp->list);
	2916	if (ncp)
	2917	_cache_hold(ncp);
	2918	while (ncp) {
	2919	nnp = LIST_NEXT(ncp, nc_hash);
	2920	if (nnp)
	2921	_cache_hold(nnp);
	2922	if (ncp->nc_mount == mp) {
	2923	_cache_lock(ncp);
	2924	ncp = cache_zap(ncp, 0);
	2925	if (ncp)
	2926	_cache_drop(ncp);
	2927	} else {
	2928	_cache_drop(ncp);
	2929	}
	2930	ncp = nnp;
	2931	}
	2932	spin_unlock_wr(&nchpp->spin); XXX
	2933	}
	2934	}
	2935
	2936	#endif
	2937
	2938	static int disablecwd;
	2939	SYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0, "");
	2940
	2941	static u_long numcwdcalls; STATNODE(CTLFLAG_RD, numcwdcalls, &numcwdcalls);
	2942	static u_long numcwdfail1; STATNODE(CTLFLAG_RD, numcwdfail1, &numcwdfail1);
	2943	static u_long numcwdfail2; STATNODE(CTLFLAG_RD, numcwdfail2, &numcwdfail2);
	2944	static u_long numcwdfail3; STATNODE(CTLFLAG_RD, numcwdfail3, &numcwdfail3);
	2945	static u_long numcwdfail4; STATNODE(CTLFLAG_RD, numcwdfail4, &numcwdfail4);
	2946	static u_long numcwdfound; STATNODE(CTLFLAG_RD, numcwdfound, &numcwdfound);
	2947
	2948	/*
	2949	* MPALMOSTSAFE
	2950	*/
	2951	int
	2952	sys___getcwd(struct __getcwd_args *uap)
	2953	{
	2954	int buflen;
	2955	int error;
	2956	char *buf;
	2957	char *bp;
	2958
	2959	if (disablecwd)
	2960	return (ENODEV);
	2961
	2962	buflen = uap->buflen;
	2963	if (buflen == 0)
	2964	return (EINVAL);
	2965	if (buflen > MAXPATHLEN)
	2966	buflen = MAXPATHLEN;
	2967
	2968	buf = kmalloc(buflen, M_TEMP, M_WAITOK);
	2969	get_mplock();
	2970	bp = kern_getcwd(buf, buflen, &error);
	2971	rel_mplock();
	2972	if (error == 0)
	2973	error = copyout(bp, uap->buf, strlen(bp) + 1);
	2974	kfree(buf, M_TEMP);
	2975	return (error);
	2976	}
	2977
	2978	char *
	2979	kern_getcwd(char buf, size_t buflen, int error)
	2980	{
	2981	struct proc *p = curproc;
	2982	char *bp;
	2983	int i, slash_prefixed;
	2984	struct filedesc *fdp;
	2985	struct nchandle nch;
	2986	struct namecache *ncp;
	2987
	2988	numcwdcalls++;
	2989	bp = buf;
	2990	bp += buflen - 1;
	2991	*bp = '\0';
	2992	fdp = p->p_fd;
	2993	slash_prefixed = 0;
	2994
	2995	nch = fdp->fd_ncdir;
	2996	ncp = nch.ncp;
	2997	if (ncp)
	2998	_cache_hold(ncp);
	2999
	3000	while (ncp && (ncp != fdp->fd_nrdir.ncp \|\|
	3001	nch.mount != fdp->fd_nrdir.mount)
	3002	) {
	3003	/*
	3004	* While traversing upwards if we encounter the root
	3005	* of the current mount we have to skip to the mount point
	3006	* in the underlying filesystem.
	3007	*/
	3008	if (ncp == nch.mount->mnt_ncmountpt.ncp) {
	3009	nch = nch.mount->mnt_ncmounton;
	3010	_cache_drop(ncp);
	3011	ncp = nch.ncp;
	3012	if (ncp)
	3013	_cache_hold(ncp);
	3014	continue;
	3015	}
	3016
	3017	/*
	3018	* Prepend the path segment
	3019	*/
	3020	for (i = ncp->nc_nlen - 1; i >= 0; i--) {
	3021	if (bp == buf) {
	3022	numcwdfail4++;
	3023	*error = ERANGE;
	3024	bp = NULL;
	3025	goto done;
	3026	}
	3027	*--bp = ncp->nc_name[i];
	3028	}
	3029	if (bp == buf) {
	3030	numcwdfail4++;
	3031	*error = ERANGE;
	3032	bp = NULL;
	3033	goto done;
	3034	}
	3035	*--bp = '/';
	3036	slash_prefixed = 1;
	3037
	3038	/*
	3039	* Go up a directory. This isn't a mount point so we don't
	3040	* have to check again.
	3041	*/
	3042	while ((nch.ncp = ncp->nc_parent) != NULL) {
	3043	_cache_lock(ncp);
	3044	if (nch.ncp != ncp->nc_parent) {
	3045	_cache_unlock(ncp);
	3046	continue;
	3047	}
	3048	_cache_hold(nch.ncp);
	3049	_cache_unlock(ncp);
	3050	break;
	3051	}
	3052	_cache_drop(ncp);
	3053	ncp = nch.ncp;
	3054	}
	3055	if (ncp == NULL) {
	3056	numcwdfail2++;
	3057	*error = ENOENT;
	3058	bp = NULL;
	3059	goto done;
	3060	}
	3061	if (!slash_prefixed) {
	3062	if (bp == buf) {
	3063	numcwdfail4++;
	3064	*error = ERANGE;
	3065	bp = NULL;
	3066	goto done;
	3067	}
	3068	*--bp = '/';
	3069	}
	3070	numcwdfound++;
	3071	*error = 0;
	3072	done:
	3073	if (ncp)
	3074	_cache_drop(ncp);
	3075	return (bp);
	3076	}
	3077
	3078	/*
	3079	* Thus begins the fullpath magic.
	3080	*
	3081	* The passed nchp is referenced but not locked.
	3082	*/
	3083	#undef STATNODE
	3084	#define STATNODE(name) \
	3085	static u_int name; \
	3086	SYSCTL_UINT(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, 0, "")
	3087
	3088	static int disablefullpath;
	3089	SYSCTL_INT(_debug, OID_AUTO, disablefullpath, CTLFLAG_RW,
	3090	&disablefullpath, 0, "");
	3091
	3092	STATNODE(numfullpathcalls);
	3093	STATNODE(numfullpathfail1);
	3094	STATNODE(numfullpathfail2);
	3095	STATNODE(numfullpathfail3);
	3096	STATNODE(numfullpathfail4);
	3097	STATNODE(numfullpathfound);
	3098
	3099	int
	3100	cache_fullpath(struct proc p, struct nchandle nchp,
	3101	char retbuf, char freebuf)
	3102	{
	3103	struct nchandle fd_nrdir;
	3104	struct nchandle nch;
	3105	struct namecache *ncp;
	3106	struct mount *mp;
	3107	char bp, buf;
	3108	int slash_prefixed;
	3109	int error = 0;
	3110	int i;
	3111
	3112	atomic_add_int(&numfullpathcalls, -1);
	3113
	3114	*retbuf = NULL;
	3115	*freebuf = NULL;
	3116
	3117	buf = kmalloc(MAXPATHLEN, M_TEMP, M_WAITOK);
	3118	bp = buf + MAXPATHLEN - 1;
	3119	*bp = '\0';
	3120	if (p != NULL)
	3121	fd_nrdir = p->p_fd->fd_nrdir;
	3122	else
	3123	fd_nrdir = rootnch;
	3124	slash_prefixed = 0;
	3125	nch = *nchp;
	3126	ncp = nch.ncp;
	3127	if (ncp)
	3128	_cache_hold(ncp);
	3129	mp = nch.mount;
	3130
	3131	while (ncp && (ncp != fd_nrdir.ncp \|\| mp != fd_nrdir.mount)) {
	3132	/*
	3133	* While traversing upwards if we encounter the root
	3134	* of the current mount we have to skip to the mount point.
	3135	*/
	3136	if (ncp == mp->mnt_ncmountpt.ncp) {
	3137	nch = mp->mnt_ncmounton;
	3138	_cache_drop(ncp);
	3139	ncp = nch.ncp;
	3140	if (ncp)
	3141	_cache_hold(ncp);
	3142	mp = nch.mount;
	3143	continue;
	3144	}
	3145
	3146	/*
	3147	* Prepend the path segment
	3148	*/
	3149	for (i = ncp->nc_nlen - 1; i >= 0; i--) {
	3150	if (bp == buf) {
	3151	numfullpathfail4++;
	3152	kfree(buf, M_TEMP);
	3153	error = ENOMEM;
	3154	goto done;
	3155	}
	3156	*--bp = ncp->nc_name[i];
	3157	}
	3158	if (bp == buf) {
	3159	numfullpathfail4++;
	3160	kfree(buf, M_TEMP);
	3161	error = ENOMEM;
	3162	goto done;
	3163	}
	3164	*--bp = '/';
	3165	slash_prefixed = 1;
	3166
	3167	/*
	3168	* Go up a directory. This isn't a mount point so we don't
	3169	* have to check again.
	3170	*
	3171	* We can only safely access nc_parent with ncp held locked.
	3172	*/
	3173	while ((nch.ncp = ncp->nc_parent) != NULL) {
	3174	_cache_lock(ncp);
	3175	if (nch.ncp != ncp->nc_parent) {
	3176	_cache_unlock(ncp);
	3177	continue;
	3178	}
	3179	_cache_hold(nch.ncp);
	3180	_cache_unlock(ncp);
	3181	break;
	3182	}
	3183	_cache_drop(ncp);
	3184	ncp = nch.ncp;
	3185	}
	3186	if (ncp == NULL) {
	3187	numfullpathfail2++;
	3188	kfree(buf, M_TEMP);
	3189	error = ENOENT;
	3190	goto done;
	3191	}
	3192
	3193	if (!slash_prefixed) {
	3194	if (bp == buf) {
	3195	numfullpathfail4++;
	3196	kfree(buf, M_TEMP);
	3197	error = ENOMEM;
	3198	goto done;
	3199	}
	3200	*--bp = '/';
	3201	}
	3202	numfullpathfound++;
	3203	*retbuf = bp;
	3204	*freebuf = buf;
	3205	error = 0;
	3206	done:
	3207	if (ncp)
	3208	_cache_drop(ncp);
	3209	return(error);
	3210	}
	3211
	3212	int
	3213	vn_fullpath(struct proc p, struct vnode vn, char retbuf, char freebuf)
	3214	{
	3215	struct namecache *ncp;
	3216	struct nchandle nch;
	3217	int error;
	3218
	3219	atomic_add_int(&numfullpathcalls, 1);
	3220	if (disablefullpath)
	3221	return (ENODEV);
	3222
	3223	if (p == NULL)
	3224	return (EINVAL);
	3225
	3226	/* vn is NULL, client wants us to use p->p_textvp */
	3227	if (vn == NULL) {
	3228	if ((vn = p->p_textvp) == NULL)
	3229	return (EINVAL);
	3230	}
	3231	spin_lock_wr(&vn->v_spinlock);
	3232	TAILQ_FOREACH(ncp, &vn->v_namecache, nc_vnode) {
	3233	if (ncp->nc_nlen)
	3234	break;
	3235	}
	3236	if (ncp == NULL) {
	3237	spin_unlock_wr(&vn->v_spinlock);
	3238	return (EINVAL);
	3239	}
	3240	_cache_hold(ncp);
	3241	spin_unlock_wr(&vn->v_spinlock);
	3242
	3243	atomic_add_int(&numfullpathcalls, -1);
	3244	nch.ncp = ncp;;
	3245	nch.mount = vn->v_mount;
	3246	error = cache_fullpath(p, &nch, retbuf, freebuf);
	3247	_cache_drop(ncp);
	3248	return (error);
	3249	}