gitweb.dragonflybsd.org Git - dragonfly.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 1989, 1993
	3	* The Regents of the University of California. All rights reserved.
	4	* (c) UNIX System Laboratories, Inc.
	5	* All or some portions of this file are derived from material licensed
	6	* to the University of California by American Telephone and Telegraph
	7	* Co. or Unix System Laboratories, Inc. and are reproduced herein with
	8	* the permission of UNIX System Laboratories, Inc.
	9	*
	10	* Redistribution and use in source and binary forms, with or without
	11	* modification, are permitted provided that the following conditions
	12	* are met:
	13	* 1. Redistributions of source code must retain the above copyright
	14	* notice, this list of conditions and the following disclaimer.
	15	* 2. Redistributions in binary form must reproduce the above copyright
	16	* notice, this list of conditions and the following disclaimer in the
	17	* documentation and/or other materials provided with the distribution.
	18	* 3. All advertising materials mentioning features or use of this software
	19	* must display the following acknowledgement:
	20	* This product includes software developed by the University of
	21	* California, Berkeley and its contributors.
	22	* 4. Neither the name of the University nor the names of its contributors
	23	* may be used to endorse or promote products derived from this software
	24	* without specific prior written permission.
	25	*
	26	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	27	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	28	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	29	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	30	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	31	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	32	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	33	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	34	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	35	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	36	* SUCH DAMAGE.
	37	*
	38	* @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95
	39	* $FreeBSD: src/sys/kern/vfs_subr.c,v 1.249.2.30 2003/04/04 20:35:57 tegge Exp $
	40	* $DragonFly: src/sys/kern/vfs_subr.c,v 1.15 2003/07/26 19:42:11 rob Exp $
	41	*/
	42
	43	/*
	44	* External virtual filesystem routines
	45	*/
	46	#include "opt_ddb.h"
	47
	48	#include <sys/param.h>
	49	#include <sys/systm.h>
	50	#include <sys/buf.h>
	51	#include <sys/conf.h>
	52	#include <sys/dirent.h>
	53	#include <sys/domain.h>
	54	#include <sys/eventhandler.h>
	55	#include <sys/fcntl.h>
	56	#include <sys/kernel.h>
	57	#include <sys/kthread.h>
	58	#include <sys/malloc.h>
	59	#include <sys/mbuf.h>
	60	#include <sys/mount.h>
	61	#include <sys/proc.h>
	62	#include <sys/namei.h>
	63	#include <sys/reboot.h>
	64	#include <sys/socket.h>
	65	#include <sys/stat.h>
	66	#include <sys/sysctl.h>
	67	#include <sys/syslog.h>
	68	#include <sys/vmmeter.h>
	69	#include <sys/vnode.h>
	70
	71	#include <machine/limits.h>
	72
	73	#include <vm/vm.h>
	74	#include <vm/vm_object.h>
	75	#include <vm/vm_extern.h>
	76	#include <vm/pmap.h>
	77	#include <vm/vm_map.h>
	78	#include <vm/vm_page.h>
	79	#include <vm/vm_pager.h>
	80	#include <vm/vnode_pager.h>
	81	#include <vm/vm_zone.h>
	82
	83	#include <sys/buf2.h>
	84	#include <sys/thread2.h>
	85
	86	static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure");
	87
	88	static void insmntque __P((struct vnode vp, struct mount mp));
	89	static void vclean __P((struct vnode vp, int flags, struct thread td));
	90	static unsigned long numvnodes;
	91	static void vlruvp(struct vnode *vp);
	92	SYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, "");
	93
	94	enum vtype iftovt_tab[16] = {
	95	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
	96	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
	97	};
	98	int vttoif_tab[9] = {
	99	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
	100	S_IFSOCK, S_IFIFO, S_IFMT,
	101	};
	102
	103	static TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */
	104
	105	static u_long wantfreevnodes = 25;
	106	SYSCTL_INT(_debug, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, "");
	107	static u_long freevnodes = 0;
	108	SYSCTL_INT(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, "");
	109
	110	static int reassignbufcalls;
	111	SYSCTL_INT(_vfs, OID_AUTO, reassignbufcalls, CTLFLAG_RW, &reassignbufcalls, 0, "");
	112	static int reassignbufloops;
	113	SYSCTL_INT(_vfs, OID_AUTO, reassignbufloops, CTLFLAG_RW, &reassignbufloops, 0, "");
	114	static int reassignbufsortgood;
	115	SYSCTL_INT(_vfs, OID_AUTO, reassignbufsortgood, CTLFLAG_RW, &reassignbufsortgood, 0, "");
	116	static int reassignbufsortbad;
	117	SYSCTL_INT(_vfs, OID_AUTO, reassignbufsortbad, CTLFLAG_RW, &reassignbufsortbad, 0, "");
	118	static int reassignbufmethod = 1;
	119	SYSCTL_INT(_vfs, OID_AUTO, reassignbufmethod, CTLFLAG_RW, &reassignbufmethod, 0, "");
	120	static int nameileafonly = 0;
	121	SYSCTL_INT(_vfs, OID_AUTO, nameileafonly, CTLFLAG_RW, &nameileafonly, 0, "");
	122
	123	#ifdef ENABLE_VFS_IOOPT
	124	int vfs_ioopt = 0;
	125	SYSCTL_INT(_vfs, OID_AUTO, ioopt, CTLFLAG_RW, &vfs_ioopt, 0, "");
	126	#endif
	127
	128	struct mntlist mountlist = TAILQ_HEAD_INITIALIZER(mountlist); /* mounted fs */
	129	struct lwkt_token mountlist_token;
	130	struct lwkt_token mntvnode_token;
	131	int nfs_mount_type = -1;
	132	static struct lwkt_token mntid_token;
	133	static struct lwkt_token vnode_free_list_token;
	134	static struct lwkt_token spechash_token;
	135	struct nfs_public nfs_pub; /* publicly exported FS */
	136	static vm_zone_t vnode_zone;
	137
	138	/*
	139	* The workitem queue.
	140	*/
	141	#define SYNCER_MAXDELAY 32
	142	static int syncer_maxdelay = SYNCER_MAXDELAY; /* maximum delay time */
	143	time_t syncdelay = 30; /* max time to delay syncing data */
	144	time_t filedelay = 30; /* time to delay syncing files */
	145	SYSCTL_INT(_kern, OID_AUTO, filedelay, CTLFLAG_RW, &filedelay, 0, "");
	146	time_t dirdelay = 29; /* time to delay syncing directories */
	147	SYSCTL_INT(_kern, OID_AUTO, dirdelay, CTLFLAG_RW, &dirdelay, 0, "");
	148	time_t metadelay = 28; /* time to delay syncing metadata */
	149	SYSCTL_INT(_kern, OID_AUTO, metadelay, CTLFLAG_RW, &metadelay, 0, "");
	150	static int rushjob; /* number of slots to run ASAP */
	151	static int stat_rush_requests; /* number of times I/O speeded up */
	152	SYSCTL_INT(_debug, OID_AUTO, rush_requests, CTLFLAG_RW, &stat_rush_requests, 0, "");
	153
	154	static int syncer_delayno = 0;
	155	static long syncer_mask;
	156	LIST_HEAD(synclist, vnode);
	157	static struct synclist *syncer_workitem_pending;
	158
	159	int desiredvnodes;
	160	SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW,
	161	&desiredvnodes, 0, "Maximum number of vnodes");
	162	static int minvnodes;
	163	SYSCTL_INT(_kern, OID_AUTO, minvnodes, CTLFLAG_RW,
	164	&minvnodes, 0, "Minimum number of vnodes");
	165	static int vnlru_nowhere = 0;
	166	SYSCTL_INT(_debug, OID_AUTO, vnlru_nowhere, CTLFLAG_RW, &vnlru_nowhere, 0,
	167	"Number of times the vnlru process ran without success");
	168
	169	static void vfs_free_addrlist __P((struct netexport *nep));
	170	static int vfs_free_netcred __P((struct radix_node rn, void w));
	171	static int vfs_hang_addrlist __P((struct mount mp, struct netexport nep,
	172	struct export_args *argp));
	173
	174	/*
	175	* Initialize the vnode management data structures.
	176	*/
	177	void
	178	vntblinit()
	179	{
	180
	181	desiredvnodes = maxproc + vmstats.v_page_count / 4;
	182	minvnodes = desiredvnodes / 4;
	183	lwkt_inittoken(&mntvnode_token);
	184	lwkt_inittoken(&mntid_token);
	185	lwkt_inittoken(&spechash_token);
	186	TAILQ_INIT(&vnode_free_list);
	187	lwkt_inittoken(&vnode_free_list_token);
	188	vnode_zone = zinit("VNODE", sizeof (struct vnode), 0, 0, 5);
	189	/*
	190	* Initialize the filesystem syncer.
	191	*/
	192	syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE,
	193	&syncer_mask);
	194	syncer_maxdelay = syncer_mask + 1;
	195	}
	196
	197	/*
	198	* Mark a mount point as busy. Used to synchronize access and to delay
	199	* unmounting. Interlock is not released on failure.
	200	*/
	201	int
	202	vfs_busy(struct mount mp, int flags, struct lwkt_token interlkp,
	203	struct thread *td)
	204	{
	205	int lkflags;
	206
	207	if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
	208	if (flags & LK_NOWAIT)
	209	return (ENOENT);
	210	mp->mnt_kern_flag \|= MNTK_MWAIT;
	211	if (interlkp) {
	212	lwkt_reltoken(interlkp);
	213	}
	214	/*
	215	* Since all busy locks are shared except the exclusive
	216	* lock granted when unmounting, the only place that a
	217	* wakeup needs to be done is at the release of the
	218	* exclusive lock at the end of dounmount.
	219	*/
	220	tsleep((caddr_t)mp, 0, "vfs_busy", 0);
	221	if (interlkp) {
	222	lwkt_gettoken(interlkp);
	223	}
	224	return (ENOENT);
	225	}
	226	lkflags = LK_SHARED \| LK_NOPAUSE;
	227	if (interlkp)
	228	lkflags \|= LK_INTERLOCK;
	229	if (lockmgr(&mp->mnt_lock, lkflags, interlkp, td))
	230	panic("vfs_busy: unexpected lock failure");
	231	return (0);
	232	}
	233
	234	/*
	235	* Free a busy filesystem.
	236	*/
	237	void
	238	vfs_unbusy(struct mount mp, struct thread td)
	239	{
	240	lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, td);
	241	}
	242
	243	/*
	244	* Lookup a filesystem type, and if found allocate and initialize
	245	* a mount structure for it.
	246	*
	247	* Devname is usually updated by mount(8) after booting.
	248	*/
	249	int
	250	vfs_rootmountalloc(char fstypename, char devname, struct mount **mpp)
	251	{
	252	struct thread td = curthread; / XXX */
	253	struct vfsconf *vfsp;
	254	struct mount *mp;
	255
	256	if (fstypename == NULL)
	257	return (ENODEV);
	258	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
	259	if (!strcmp(vfsp->vfc_name, fstypename))
	260	break;
	261	if (vfsp == NULL)
	262	return (ENODEV);
	263	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
	264	bzero((char *)mp, (u_long)sizeof(struct mount));
	265	lockinit(&mp->mnt_lock, 0, "vfslock", VLKTIMEOUT, LK_NOPAUSE);
	266	(void)vfs_busy(mp, LK_NOWAIT, 0, td);
	267	TAILQ_INIT(&mp->mnt_nvnodelist);
	268	TAILQ_INIT(&mp->mnt_reservedvnlist);
	269	mp->mnt_nvnodelistsize = 0;
	270	mp->mnt_vfc = vfsp;
	271	mp->mnt_op = vfsp->vfc_vfsops;
	272	mp->mnt_flag = MNT_RDONLY;
	273	mp->mnt_vnodecovered = NULLVP;
	274	vfsp->vfc_refcount++;
	275	mp->mnt_iosize_max = DFLTPHYS;
	276	mp->mnt_stat.f_type = vfsp->vfc_typenum;
	277	mp->mnt_flag \|= vfsp->vfc_flags & MNT_VISFLAGMASK;
	278	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
	279	mp->mnt_stat.f_mntonname[0] = '/';
	280	mp->mnt_stat.f_mntonname[1] = 0;
	281	(void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
	282	*mpp = mp;
	283	return (0);
	284	}
	285
	286	/*
	287	* Find an appropriate filesystem to use for the root. If a filesystem
	288	* has not been preselected, walk through the list of known filesystems
	289	* trying those that have mountroot routines, and try them until one
	290	* works or we have tried them all.
	291	*/
	292	#ifdef notdef /* XXX JH */
	293	int
	294	lite2_vfs_mountroot()
	295	{
	296	struct vfsconf *vfsp;
	297	extern int (*lite2_mountroot) __P((void));
	298	int error;
	299
	300	if (lite2_mountroot != NULL)
	301	return ((*lite2_mountroot)());
	302	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
	303	if (vfsp->vfc_mountroot == NULL)
	304	continue;
	305	if ((error = (*vfsp->vfc_mountroot)()) == 0)
	306	return (0);
	307	printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
	308	}
	309	return (ENODEV);
	310	}
	311	#endif
	312
	313	/*
	314	* Lookup a mount point by filesystem identifier.
	315	*/
	316	struct mount *
	317	vfs_getvfs(fsid)
	318	fsid_t *fsid;
	319	{
	320	struct mount *mp;
	321
	322	lwkt_gettoken(&mountlist_token);
	323	TAILQ_FOREACH(mp, &mountlist, mnt_list) {
	324	if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
	325	mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
	326	lwkt_reltoken(&mountlist_token);
	327	return (mp);
	328	}
	329	}
	330	lwkt_reltoken(&mountlist_token);
	331	return ((struct mount *) 0);
	332	}
	333
	334	/*
	335	* Get a new unique fsid. Try to make its val[0] unique, since this value
	336	* will be used to create fake device numbers for stat(). Also try (but
	337	* not so hard) make its val[0] unique mod 2^16, since some emulators only
	338	* support 16-bit device numbers. We end up with unique val[0]'s for the
	339	* first 2^16 calls and unique val[0]'s mod 2^16 for the first 2^8 calls.
	340	*
	341	* Keep in mind that several mounts may be running in parallel. Starting
	342	* the search one past where the previous search terminated is both a
	343	* micro-optimization and a defense against returning the same fsid to
	344	* different mounts.
	345	*/
	346	void
	347	vfs_getnewfsid(mp)
	348	struct mount *mp;
	349	{
	350	static u_int16_t mntid_base;
	351	fsid_t tfsid;
	352	int mtype;
	353
	354	lwkt_gettoken(&mntid_token);
	355	mtype = mp->mnt_vfc->vfc_typenum;
	356	tfsid.val[1] = mtype;
	357	mtype = (mtype & 0xFF) << 24;
	358	for (;;) {
	359	tfsid.val[0] = makeudev(255,
	360	mtype \| ((mntid_base & 0xFF00) << 8) \| (mntid_base & 0xFF));
	361	mntid_base++;
	362	if (vfs_getvfs(&tfsid) == NULL)
	363	break;
	364	}
	365	mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
	366	mp->mnt_stat.f_fsid.val[1] = tfsid.val[1];
	367	lwkt_reltoken(&mntid_token);
	368	}
	369
	370	/*
	371	* Knob to control the precision of file timestamps:
	372	*
	373	* 0 = seconds only; nanoseconds zeroed.
	374	* 1 = seconds and nanoseconds, accurate within 1/HZ.
	375	* 2 = seconds and nanoseconds, truncated to microseconds.
	376	* >=3 = seconds and nanoseconds, maximum precision.
	377	*/
	378	enum { TSP_SEC, TSP_HZ, TSP_USEC, TSP_NSEC };
	379
	380	static int timestamp_precision = TSP_SEC;
	381	SYSCTL_INT(_vfs, OID_AUTO, timestamp_precision, CTLFLAG_RW,
	382	&timestamp_precision, 0, "");
	383
	384	/*
	385	* Get a current timestamp.
	386	*/
	387	void
	388	vfs_timestamp(tsp)
	389	struct timespec *tsp;
	390	{
	391	struct timeval tv;
	392
	393	switch (timestamp_precision) {
	394	case TSP_SEC:
	395	tsp->tv_sec = time_second;
	396	tsp->tv_nsec = 0;
	397	break;
	398	case TSP_HZ:
	399	getnanotime(tsp);
	400	break;
	401	case TSP_USEC:
	402	microtime(&tv);
	403	TIMEVAL_TO_TIMESPEC(&tv, tsp);
	404	break;
	405	case TSP_NSEC:
	406	default:
	407	nanotime(tsp);
	408	break;
	409	}
	410	}
	411
	412	/*
	413	* Set vnode attributes to VNOVAL
	414	*/
	415	void
	416	vattr_null(vap)
	417	struct vattr *vap;
	418	{
	419
	420	vap->va_type = VNON;
	421	vap->va_size = VNOVAL;
	422	vap->va_bytes = VNOVAL;
	423	vap->va_mode = VNOVAL;
	424	vap->va_nlink = VNOVAL;
	425	vap->va_uid = VNOVAL;
	426	vap->va_gid = VNOVAL;
	427	vap->va_fsid = VNOVAL;
	428	vap->va_fileid = VNOVAL;
	429	vap->va_blocksize = VNOVAL;
	430	vap->va_rdev = VNOVAL;
	431	vap->va_atime.tv_sec = VNOVAL;
	432	vap->va_atime.tv_nsec = VNOVAL;
	433	vap->va_mtime.tv_sec = VNOVAL;
	434	vap->va_mtime.tv_nsec = VNOVAL;
	435	vap->va_ctime.tv_sec = VNOVAL;
	436	vap->va_ctime.tv_nsec = VNOVAL;
	437	vap->va_flags = VNOVAL;
	438	vap->va_gen = VNOVAL;
	439	vap->va_vaflags = 0;
	440	}
	441
	442	/*
	443	* This routine is called when we have too many vnodes. It attempts
	444	* to free <count> vnodes and will potentially free vnodes that still
	445	* have VM backing store (VM backing store is typically the cause
	446	* of a vnode blowout so we want to do this). Therefore, this operation
	447	* is not considered cheap.
	448	*
	449	* A number of conditions may prevent a vnode from being reclaimed.
	450	* the buffer cache may have references on the vnode, a directory
	451	* vnode may still have references due to the namei cache representing
	452	* underlying files, or the vnode may be in active use. It is not
	453	* desireable to reuse such vnodes. These conditions may cause the
	454	* number of vnodes to reach some minimum value regardless of what
	455	* you set kern.maxvnodes to. Do not set kern.maxvnodes too low.
	456	*/
	457	static int
	458	vlrureclaim(struct mount *mp)
	459	{
	460	struct vnode *vp;
	461	int done;
	462	int trigger;
	463	int usevnodes;
	464	int count;
	465	int gen;
	466
	467	/*
	468	* Calculate the trigger point, don't allow user
	469	* screwups to blow us up. This prevents us from
	470	* recycling vnodes with lots of resident pages. We
	471	* aren't trying to free memory, we are trying to
	472	* free vnodes.
	473	*/
	474	usevnodes = desiredvnodes;
	475	if (usevnodes <= 0)
	476	usevnodes = 1;
	477	trigger = vmstats.v_page_count * 2 / usevnodes;
	478
	479	done = 0;
	480	gen = lwkt_gettoken(&mntvnode_token);
	481	count = mp->mnt_nvnodelistsize / 10 + 1;
	482	while (count && (vp = TAILQ_FIRST(&mp->mnt_nvnodelist)) != NULL) {
	483	TAILQ_REMOVE(&mp->mnt_nvnodelist, vp, v_nmntvnodes);
	484	TAILQ_INSERT_TAIL(&mp->mnt_nvnodelist, vp, v_nmntvnodes);
	485
	486	if (vp->v_type != VNON &&
	487	vp->v_type != VBAD &&
	488	VMIGHTFREE(vp) && /* critical path opt */
	489	(vp->v_object == NULL \|\| vp->v_object->resident_page_count < trigger)
	490	) {
	491	lwkt_gettoken(&vp->v_interlock);
	492	if (lwkt_gentoken(&mntvnode_token, &gen) == 0) {
	493	if (VMIGHTFREE(vp)) {
	494	vgonel(vp, curthread);
	495	done++;
	496	} else {
	497	lwkt_reltoken(&vp->v_interlock);
	498	}
	499	} else {
	500	lwkt_reltoken(&vp->v_interlock);
	501	}
	502	}
	503	--count;
	504	}
	505	lwkt_reltoken(&mntvnode_token);
	506	return done;
	507	}
	508
	509	/*
	510	* Attempt to recycle vnodes in a context that is always safe to block.
	511	* Calling vlrurecycle() from the bowels of file system code has some
	512	* interesting deadlock problems.
	513	*/
	514	static struct thread *vnlruthread;
	515	static int vnlruproc_sig;
	516
	517	static void
	518	vnlru_proc(void)
	519	{
	520	struct mount mp, nmp;
	521	int s;
	522	int done;
	523	struct thread *td = curthread;
	524
	525	EVENTHANDLER_REGISTER(shutdown_pre_sync, shutdown_kproc, td,
	526	SHUTDOWN_PRI_FIRST);
	527
	528	s = splbio();
	529	for (;;) {
	530	kproc_suspend_loop();
	531	if (numvnodes - freevnodes <= desiredvnodes * 9 / 10) {
	532	vnlruproc_sig = 0;
	533	wakeup(&vnlruproc_sig);
	534	tsleep(td, 0, "vlruwt", hz);
	535	continue;
	536	}
	537	done = 0;
	538	lwkt_gettoken(&mountlist_token);
	539	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
	540	if (vfs_busy(mp, LK_NOWAIT, &mountlist_token, td)) {
	541	nmp = TAILQ_NEXT(mp, mnt_list);
	542	continue;
	543	}
	544	done += vlrureclaim(mp);
	545	lwkt_gettoken(&mountlist_token);
	546	nmp = TAILQ_NEXT(mp, mnt_list);
	547	vfs_unbusy(mp, td);
	548	}
	549	lwkt_reltoken(&mountlist_token);
	550	if (done == 0) {
	551	vnlru_nowhere++;
	552	tsleep(td, 0, "vlrup", hz * 3);
	553	}
	554	}
	555	splx(s);
	556	}
	557
	558	static struct kproc_desc vnlru_kp = {
	559	"vnlru",
	560	vnlru_proc,
	561	&vnlruthread
	562	};
	563	SYSINIT(vnlru, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &vnlru_kp)
	564
	565	/*
	566	* Routines having to do with the management of the vnode table.
	567	*/
	568	extern vop_t **dead_vnodeop_p;
	569
	570	/*
	571	* Return the next vnode from the free list.
	572	*/
	573	int
	574	getnewvnode(tag, mp, vops, vpp)
	575	enum vtagtype tag;
	576	struct mount *mp;
	577	vop_t **vops;
	578	struct vnode **vpp;
	579	{
	580	int s;
	581	int gen;
	582	int vgen;
	583	struct thread td = curthread; / XXX */
	584	struct vnode *vp = NULL;
	585	vm_object_t object;
	586
	587	s = splbio();
	588
	589	/*
	590	* Try to reuse vnodes if we hit the max. This situation only
	591	* occurs in certain large-memory (2G+) situations. We cannot
	592	* attempt to directly reclaim vnodes due to nasty recursion
	593	* problems.
	594	*/
	595	while (numvnodes - freevnodes > desiredvnodes) {
	596	if (vnlruproc_sig == 0) {
	597	vnlruproc_sig = 1; /* avoid unnecessary wakeups */
	598	wakeup(vnlruthread);
	599	}
	600	tsleep(&vnlruproc_sig, 0, "vlruwk", hz);
	601	}
	602
	603
	604	/*
	605	* Attempt to reuse a vnode already on the free list, allocating
	606	* a new vnode if we can't find one or if we have not reached a
	607	* good minimum for good LRU performance.
	608	*/
	609	gen = lwkt_gettoken(&vnode_free_list_token);
	610	if (freevnodes >= wantfreevnodes && numvnodes >= minvnodes) {
	611	int count;
	612
	613	for (count = 0; count < freevnodes; count++) {
	614	vp = TAILQ_FIRST(&vnode_free_list);
	615	if (vp == NULL \|\| vp->v_usecount)
	616	panic("getnewvnode: free vnode isn't");
	617
	618	/*
	619	* Get the vnode's interlock, then re-obtain
	620	* vnode_free_list_token in case we lost it. If we
	621	* did lose it while getting the vnode interlock,
	622	* even if we got it back again, then retry.
	623	*/
	624	vgen = lwkt_gettoken(&vp->v_interlock);
	625	if (lwkt_gentoken(&vnode_free_list_token, &gen) != 0) {
	626	--count;
	627	lwkt_reltoken(&vp->v_interlock);
	628	vp = NULL;
	629	continue;
	630	}
	631
	632	/*
	633	* Whew! We have both tokens. Since we didn't lose
	634	* the free list VFREE had better still be set. But
	635	* we aren't out of the woods yet. We have to get
	636	* the object (may block). If the vnode is not
	637	* suitable then move it to the end of the list
	638	* if we can. If we can't move it to the end of the
	639	* list retry again.
	640	*/
	641	if ((VOP_GETVOBJECT(vp, &object) == 0 &&
	642	(object->resident_page_count \|\| object->ref_count))
	643	) {
	644	if (lwkt_gentoken(&vp->v_interlock, &vgen) == 0 &&
	645	lwkt_gentoken(&vnode_free_list_token, &gen) == 0
	646	) {
	647	TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
	648	TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
	649	} else {
	650	--count;
	651	}
	652	lwkt_reltoken(&vp->v_interlock);
	653	vp = NULL;
	654	continue;
	655	}
	656
	657	/*
	658	* Still not out of the woods. VOBJECT might have
	659	* blocked, if we did not retain our tokens we have
	660	* to retry.
	661	*/
	662	if (lwkt_gentoken(&vp->v_interlock, &vgen) != 0 \|\|
	663	lwkt_gentoken(&vnode_free_list_token, &gen) != 0) {
	664	--count;
	665	vp = NULL;
	666	continue;
	667	}
	668	TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
	669	KKASSERT(vp->v_flag & VFREE);
	670
	671	if (LIST_FIRST(&vp->v_cache_src)) {
	672	/*
	673	* note: nameileafonly sysctl is temporary,
	674	* for debugging only, and will eventually be
	675	* removed.
	676	*/
	677	if (nameileafonly > 0) {
	678	/*
	679	* Do not reuse namei-cached directory
	680	* vnodes that have cached
	681	* subdirectories.
	682	*/
	683	if (cache_leaf_test(vp) < 0) {
	684	lwkt_reltoken(&vp->v_interlock);
	685	TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
	686	vp = NULL;
	687	continue;
	688	}
	689	} else if (nameileafonly < 0 \|\|
	690	vmiodirenable == 0) {
	691	/*
	692	* Do not reuse namei-cached directory
	693	* vnodes if nameileafonly is -1 or
	694	* if VMIO backing for directories is
	695	* turned off (otherwise we reuse them
	696	* too quickly).
	697	*/
	698	lwkt_reltoken(&vp->v_interlock);
	699	TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
	700	vp = NULL;
	701	continue;
	702	}
	703	}
	704	break;
	705	}
	706	}
	707
	708	if (vp) {
	709	vp->v_flag \|= VDOOMED;
	710	vp->v_flag &= ~VFREE;
	711	freevnodes--;
	712	lwkt_reltoken(&vnode_free_list_token);
	713	cache_purge(vp); /* YYY may block */
	714	vp->v_lease = NULL;
	715	if (vp->v_type != VBAD) {
	716	vgonel(vp, td);
	717	} else {
	718	lwkt_reltoken(&vp->v_interlock);
	719	}
	720
	721	#ifdef INVARIANTS
	722	{
	723	int s;
	724
	725	if (vp->v_data)
	726	panic("cleaned vnode isn't");
	727	s = splbio();
	728	if (vp->v_numoutput)
	729	panic("Clean vnode has pending I/O's");
	730	splx(s);
	731	}
	732	#endif
	733	vp->v_flag = 0;
	734	vp->v_lastw = 0;
	735	vp->v_lasta = 0;
	736	vp->v_cstart = 0;
	737	vp->v_clen = 0;
	738	vp->v_socket = 0;
	739	vp->v_writecount = 0; /* XXX */
	740	} else {
	741	lwkt_reltoken(&vnode_free_list_token);
	742	vp = (struct vnode *) zalloc(vnode_zone);
	743	bzero((char ) vp, sizeof vp);
	744	lwkt_inittoken(&vp->v_interlock);
	745	vp->v_dd = vp;
	746	cache_purge(vp);
	747	LIST_INIT(&vp->v_cache_src);
	748	TAILQ_INIT(&vp->v_cache_dst);
	749	numvnodes++;
	750	}
	751
	752	TAILQ_INIT(&vp->v_cleanblkhd);
	753	TAILQ_INIT(&vp->v_dirtyblkhd);
	754	vp->v_type = VNON;
	755	vp->v_tag = tag;
	756	vp->v_op = vops;
	757	insmntque(vp, mp);
	758	*vpp = vp;
	759	vp->v_usecount = 1;
	760	vp->v_data = 0;
	761	splx(s);
	762
	763	vfs_object_create(vp, td);
	764	return (0);
	765	}
	766
	767	/*
	768	* Move a vnode from one mount queue to another.
	769	*/
	770	static void
	771	insmntque(vp, mp)
	772	struct vnode *vp;
	773	struct mount *mp;
	774	{
	775
	776	lwkt_gettoken(&mntvnode_token);
	777	/*
	778	* Delete from old mount point vnode list, if on one.
	779	*/
	780	if (vp->v_mount != NULL) {
	781	KASSERT(vp->v_mount->mnt_nvnodelistsize > 0,
	782	("bad mount point vnode list size"));
	783	TAILQ_REMOVE(&vp->v_mount->mnt_nvnodelist, vp, v_nmntvnodes);
	784	vp->v_mount->mnt_nvnodelistsize--;
	785	}
	786	/*
	787	* Insert into list of vnodes for the new mount point, if available.
	788	*/
	789	if ((vp->v_mount = mp) == NULL) {
	790	lwkt_reltoken(&mntvnode_token);
	791	return;
	792	}
	793	TAILQ_INSERT_TAIL(&mp->mnt_nvnodelist, vp, v_nmntvnodes);
	794	mp->mnt_nvnodelistsize++;
	795	lwkt_reltoken(&mntvnode_token);
	796	}
	797
	798	/*
	799	* Update outstanding I/O count and do wakeup if requested.
	800	*/
	801	void
	802	vwakeup(bp)
	803	struct buf *bp;
	804	{
	805	struct vnode *vp;
	806
	807	bp->b_flags &= ~B_WRITEINPROG;
	808	if ((vp = bp->b_vp)) {
	809	vp->v_numoutput--;
	810	if (vp->v_numoutput < 0)
	811	panic("vwakeup: neg numoutput");
	812	if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) {
	813	vp->v_flag &= ~VBWAIT;
	814	wakeup((caddr_t) &vp->v_numoutput);
	815	}
	816	}
	817	}
	818
	819	/*
	820	* Flush out and invalidate all buffers associated with a vnode.
	821	* Called with the underlying object locked.
	822	*/
	823	int
	824	vinvalbuf(struct vnode vp, int flags, struct thread td,
	825	int slpflag, int slptimeo)
	826	{
	827	struct buf *bp;
	828	struct buf nbp, blist;
	829	int s, error;
	830	vm_object_t object;
	831
	832	if (flags & V_SAVE) {
	833	s = splbio();
	834	while (vp->v_numoutput) {
	835	vp->v_flag \|= VBWAIT;
	836	error = tsleep((caddr_t)&vp->v_numoutput,
	837	slpflag, "vinvlbuf", slptimeo);
	838	if (error) {
	839	splx(s);
	840	return (error);
	841	}
	842	}
	843	if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) {
	844	splx(s);
	845	if ((error = VOP_FSYNC(vp, MNT_WAIT, td)) != 0)
	846	return (error);
	847	s = splbio();
	848	if (vp->v_numoutput > 0 \|\|
	849	!TAILQ_EMPTY(&vp->v_dirtyblkhd))
	850	panic("vinvalbuf: dirty bufs");
	851	}
	852	splx(s);
	853	}
	854	s = splbio();
	855	for (;;) {
	856	blist = TAILQ_FIRST(&vp->v_cleanblkhd);
	857	if (!blist)
	858	blist = TAILQ_FIRST(&vp->v_dirtyblkhd);
	859	if (!blist)
	860	break;
	861
	862	for (bp = blist; bp; bp = nbp) {
	863	nbp = TAILQ_NEXT(bp, b_vnbufs);
	864	if (BUF_LOCK(bp, LK_EXCLUSIVE \| LK_NOWAIT)) {
	865	error = BUF_TIMELOCK(bp,
	866	LK_EXCLUSIVE \| LK_SLEEPFAIL,
	867	"vinvalbuf", slpflag, slptimeo);
	868	if (error == ENOLCK)
	869	break;
	870	splx(s);
	871	return (error);
	872	}
	873	/*
	874	* XXX Since there are no node locks for NFS, I
	875	* believe there is a slight chance that a delayed
	876	* write will occur while sleeping just above, so
	877	* check for it. Note that vfs_bio_awrite expects
	878	* buffers to reside on a queue, while VOP_BWRITE and
	879	* brelse do not.
	880	*/
	881	if (((bp->b_flags & (B_DELWRI \| B_INVAL)) == B_DELWRI) &&
	882	(flags & V_SAVE)) {
	883
	884	if (bp->b_vp == vp) {
	885	if (bp->b_flags & B_CLUSTEROK) {
	886	BUF_UNLOCK(bp);
	887	vfs_bio_awrite(bp);
	888	} else {
	889	bremfree(bp);
	890	bp->b_flags \|= B_ASYNC;
	891	VOP_BWRITE(bp->b_vp, bp);
	892	}
	893	} else {
	894	bremfree(bp);
	895	(void) VOP_BWRITE(bp->b_vp, bp);
	896	}
	897	break;
	898	}
	899	bremfree(bp);
	900	bp->b_flags \|= (B_INVAL \| B_NOCACHE \| B_RELBUF);
	901	bp->b_flags &= ~B_ASYNC;
	902	brelse(bp);
	903	}
	904	}
	905
	906	/*
	907	* Wait for I/O to complete. XXX needs cleaning up. The vnode can
	908	* have write I/O in-progress but if there is a VM object then the
	909	* VM object can also have read-I/O in-progress.
	910	*/
	911	do {
	912	while (vp->v_numoutput > 0) {
	913	vp->v_flag \|= VBWAIT;
	914	tsleep(&vp->v_numoutput, 0, "vnvlbv", 0);
	915	}
	916	if (VOP_GETVOBJECT(vp, &object) == 0) {
	917	while (object->paging_in_progress)
	918	vm_object_pip_sleep(object, "vnvlbx");
	919	}
	920	} while (vp->v_numoutput > 0);
	921
	922	splx(s);
	923
	924	/*
	925	* Destroy the copy in the VM cache, too.
	926	*/
	927	lwkt_gettoken(&vp->v_interlock);
	928	if (VOP_GETVOBJECT(vp, &object) == 0) {
	929	vm_object_page_remove(object, 0, 0,
	930	(flags & V_SAVE) ? TRUE : FALSE);
	931	}
	932	lwkt_reltoken(&vp->v_interlock);
	933
	934	if (!TAILQ_EMPTY(&vp->v_dirtyblkhd) \|\| !TAILQ_EMPTY(&vp->v_cleanblkhd))
	935	panic("vinvalbuf: flush failed");
	936	return (0);
	937	}
	938
	939	/*
	940	* Truncate a file's buffer and pages to a specified length. This
	941	* is in lieu of the old vinvalbuf mechanism, which performed unneeded
	942	* sync activity.
	943	*/
	944	int
	945	vtruncbuf(struct vnode vp, struct thread td, off_t length, int blksize)
	946	{
	947	struct buf *bp;
	948	struct buf *nbp;
	949	int s, anyfreed;
	950	int trunclbn;
	951
	952	/*
	953	* Round up to the next lbn.
	954	*/
	955	trunclbn = (length + blksize - 1) / blksize;
	956
	957	s = splbio();
	958	restart:
	959	anyfreed = 1;
	960	for (;anyfreed;) {
	961	anyfreed = 0;
	962	for (bp = TAILQ_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
	963	nbp = TAILQ_NEXT(bp, b_vnbufs);
	964	if (bp->b_lblkno >= trunclbn) {
	965	if (BUF_LOCK(bp, LK_EXCLUSIVE \| LK_NOWAIT)) {
	966	BUF_LOCK(bp, LK_EXCLUSIVE\|LK_SLEEPFAIL);
	967	goto restart;
	968	} else {
	969	bremfree(bp);
	970	bp->b_flags \|= (B_INVAL \| B_RELBUF);
	971	bp->b_flags &= ~B_ASYNC;
	972	brelse(bp);
	973	anyfreed = 1;
	974	}
	975	if (nbp &&
	976	(((nbp->b_xflags & BX_VNCLEAN) == 0) \|\|
	977	(nbp->b_vp != vp) \|\|
	978	(nbp->b_flags & B_DELWRI))) {
	979	goto restart;
	980	}
	981	}
	982	}
	983
	984	for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
	985	nbp = TAILQ_NEXT(bp, b_vnbufs);
	986	if (bp->b_lblkno >= trunclbn) {
	987	if (BUF_LOCK(bp, LK_EXCLUSIVE \| LK_NOWAIT)) {
	988	BUF_LOCK(bp, LK_EXCLUSIVE\|LK_SLEEPFAIL);
	989	goto restart;
	990	} else {
	991	bremfree(bp);
	992	bp->b_flags \|= (B_INVAL \| B_RELBUF);
	993	bp->b_flags &= ~B_ASYNC;
	994	brelse(bp);
	995	anyfreed = 1;
	996	}
	997	if (nbp &&
	998	(((nbp->b_xflags & BX_VNDIRTY) == 0) \|\|
	999	(nbp->b_vp != vp) \|\|
	1000	(nbp->b_flags & B_DELWRI) == 0)) {
	1001	goto restart;
	1002	}
	1003	}
	1004	}
	1005	}
	1006
	1007	if (length > 0) {
	1008	restartsync:
	1009	for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
	1010	nbp = TAILQ_NEXT(bp, b_vnbufs);
	1011	if ((bp->b_flags & B_DELWRI) && (bp->b_lblkno < 0)) {
	1012	if (BUF_LOCK(bp, LK_EXCLUSIVE \| LK_NOWAIT)) {
	1013	BUF_LOCK(bp, LK_EXCLUSIVE\|LK_SLEEPFAIL);
	1014	goto restart;
	1015	} else {
	1016	bremfree(bp);
	1017	if (bp->b_vp == vp) {
	1018	bp->b_flags \|= B_ASYNC;
	1019	} else {
	1020	bp->b_flags &= ~B_ASYNC;
	1021	}
	1022	VOP_BWRITE(bp->b_vp, bp);
	1023	}
	1024	goto restartsync;
	1025	}
	1026
	1027	}
	1028	}
	1029
	1030	while (vp->v_numoutput > 0) {
	1031	vp->v_flag \|= VBWAIT;
	1032	tsleep(&vp->v_numoutput, 0, "vbtrunc", 0);
	1033	}
	1034
	1035	splx(s);
	1036
	1037	vnode_pager_setsize(vp, length);
	1038
	1039	return (0);
	1040	}
	1041
	1042	/*
	1043	* Associate a buffer with a vnode.
	1044	*/
	1045	void
	1046	bgetvp(vp, bp)
	1047	struct vnode *vp;
	1048	struct buf *bp;
	1049	{
	1050	int s;
	1051
	1052	KASSERT(bp->b_vp == NULL, ("bgetvp: not free"));
	1053
	1054	vhold(vp);
	1055	bp->b_vp = vp;
	1056	bp->b_dev = vn_todev(vp);
	1057	/*
	1058	* Insert onto list for new vnode.
	1059	*/
	1060	s = splbio();
	1061	bp->b_xflags \|= BX_VNCLEAN;
	1062	bp->b_xflags &= ~BX_VNDIRTY;
	1063	TAILQ_INSERT_TAIL(&vp->v_cleanblkhd, bp, b_vnbufs);
	1064	splx(s);
	1065	}
	1066
	1067	/*
	1068	* Disassociate a buffer from a vnode.
	1069	*/
	1070	void
	1071	brelvp(bp)
	1072	struct buf *bp;
	1073	{
	1074	struct vnode *vp;
	1075	struct buflists *listheadp;
	1076	int s;
	1077
	1078	KASSERT(bp->b_vp != NULL, ("brelvp: NULL"));
	1079
	1080	/*
	1081	* Delete from old vnode list, if on one.
	1082	*/
	1083	vp = bp->b_vp;
	1084	s = splbio();
	1085	if (bp->b_xflags & (BX_VNDIRTY \| BX_VNCLEAN)) {
	1086	if (bp->b_xflags & BX_VNDIRTY)
	1087	listheadp = &vp->v_dirtyblkhd;
	1088	else
	1089	listheadp = &vp->v_cleanblkhd;
	1090	TAILQ_REMOVE(listheadp, bp, b_vnbufs);
	1091	bp->b_xflags &= ~(BX_VNDIRTY \| BX_VNCLEAN);
	1092	}
	1093	if ((vp->v_flag & VONWORKLST) && TAILQ_EMPTY(&vp->v_dirtyblkhd)) {
	1094	vp->v_flag &= ~VONWORKLST;
	1095	LIST_REMOVE(vp, v_synclist);
	1096	}
	1097	splx(s);
	1098	bp->b_vp = (struct vnode *) 0;
	1099	vdrop(vp);
	1100	}
	1101
	1102	/*
	1103	* The workitem queue.
	1104	*
	1105	* It is useful to delay writes of file data and filesystem metadata
	1106	* for tens of seconds so that quickly created and deleted files need
	1107	* not waste disk bandwidth being created and removed. To realize this,
	1108	* we append vnodes to a "workitem" queue. When running with a soft
	1109	* updates implementation, most pending metadata dependencies should
	1110	* not wait for more than a few seconds. Thus, mounted on block devices
	1111	* are delayed only about a half the time that file data is delayed.
	1112	* Similarly, directory updates are more critical, so are only delayed
	1113	* about a third the time that file data is delayed. Thus, there are
	1114	* SYNCER_MAXDELAY queues that are processed round-robin at a rate of
	1115	* one each second (driven off the filesystem syncer process). The
	1116	* syncer_delayno variable indicates the next queue that is to be processed.
	1117	* Items that need to be processed soon are placed in this queue:
	1118	*
	1119	* syncer_workitem_pending[syncer_delayno]
	1120	*
	1121	* A delay of fifteen seconds is done by placing the request fifteen
	1122	* entries later in the queue:
	1123	*
	1124	* syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask]
	1125	*
	1126	*/
	1127
	1128	/*
	1129	* Add an item to the syncer work queue.
	1130	*/
	1131	static void
	1132	vn_syncer_add_to_worklist(struct vnode *vp, int delay)
	1133	{
	1134	int s, slot;
	1135
	1136	s = splbio();
	1137
	1138	if (vp->v_flag & VONWORKLST) {
	1139	LIST_REMOVE(vp, v_synclist);
	1140	}
	1141
	1142	if (delay > syncer_maxdelay - 2)
	1143	delay = syncer_maxdelay - 2;
	1144	slot = (syncer_delayno + delay) & syncer_mask;
	1145
	1146	LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist);
	1147	vp->v_flag \|= VONWORKLST;
	1148	splx(s);
	1149	}
	1150
	1151	struct thread *updatethread;
	1152	static void sched_sync __P((void));
	1153	static struct kproc_desc up_kp = {
	1154	"syncer",
	1155	sched_sync,
	1156	&updatethread
	1157	};
	1158	SYSINIT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp)
	1159
	1160	/*
	1161	* System filesystem synchronizer daemon.
	1162	*/
	1163	void
	1164	sched_sync(void)
	1165	{
	1166	struct synclist *slp;
	1167	struct vnode *vp;
	1168	long starttime;
	1169	int s;
	1170	struct thread *td = curthread;
	1171
	1172	EVENTHANDLER_REGISTER(shutdown_pre_sync, shutdown_kproc, td,
	1173	SHUTDOWN_PRI_LAST);
	1174
	1175	for (;;) {
	1176	kproc_suspend_loop();
	1177
	1178	starttime = time_second;
	1179
	1180	/*
	1181	* Push files whose dirty time has expired. Be careful
	1182	* of interrupt race on slp queue.
	1183	*/
	1184	s = splbio();
	1185	slp = &syncer_workitem_pending[syncer_delayno];
	1186	syncer_delayno += 1;
	1187	if (syncer_delayno == syncer_maxdelay)
	1188	syncer_delayno = 0;
	1189	splx(s);
	1190
	1191	while ((vp = LIST_FIRST(slp)) != NULL) {
	1192	if (VOP_ISLOCKED(vp, NULL) == 0) {
	1193	vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY, td);
	1194	(void) VOP_FSYNC(vp, MNT_LAZY, td);
	1195	VOP_UNLOCK(vp, 0, td);
	1196	}
	1197	s = splbio();
	1198	if (LIST_FIRST(slp) == vp) {
	1199	/*
	1200	* Note: v_tag VT_VFS vps can remain on the
	1201	* worklist too with no dirty blocks, but
	1202	* since sync_fsync() moves it to a different
	1203	* slot we are safe.
	1204	*/
	1205	if (TAILQ_EMPTY(&vp->v_dirtyblkhd) &&
	1206	!vn_isdisk(vp, NULL))
	1207	panic("sched_sync: fsync failed vp %p tag %d", vp, vp->v_tag);
	1208	/*
	1209	* Put us back on the worklist. The worklist
	1210	* routine will remove us from our current
	1211	* position and then add us back in at a later
	1212	* position.
	1213	*/
	1214	vn_syncer_add_to_worklist(vp, syncdelay);
	1215	}
	1216	splx(s);
	1217	}
	1218
	1219	/*
	1220	* Do soft update processing.
	1221	*/
	1222	if (bioops.io_sync)
	1223	(*bioops.io_sync)(NULL);
	1224
	1225	/*
	1226	* The variable rushjob allows the kernel to speed up the
	1227	* processing of the filesystem syncer process. A rushjob
	1228	* value of N tells the filesystem syncer to process the next
	1229	* N seconds worth of work on its queue ASAP. Currently rushjob
	1230	* is used by the soft update code to speed up the filesystem
	1231	* syncer process when the incore state is getting so far
	1232	* ahead of the disk that the kernel memory pool is being
	1233	* threatened with exhaustion.
	1234	*/
	1235	if (rushjob > 0) {
	1236	rushjob -= 1;
	1237	continue;
	1238	}
	1239	/*
	1240	* If it has taken us less than a second to process the
	1241	* current work, then wait. Otherwise start right over
	1242	* again. We can still lose time if any single round
	1243	* takes more than two seconds, but it does not really
	1244	* matter as we are just trying to generally pace the
	1245	* filesystem activity.
	1246	*/
	1247	if (time_second == starttime)
	1248	tsleep(&lbolt, 0, "syncer", 0);
	1249	}
	1250	}
	1251
	1252	/*
	1253	* Request the syncer daemon to speed up its work.
	1254	* We never push it to speed up more than half of its
	1255	* normal turn time, otherwise it could take over the cpu.
	1256	*
	1257	* YYY wchan field protected by the BGL.
	1258	*/
	1259	int
	1260	speedup_syncer()
	1261	{
	1262	crit_enter();
	1263	if (updatethread->td_wchan == &lbolt) { /* YYY */
	1264	unsleep(updatethread);
	1265	lwkt_schedule(updatethread);
	1266	}
	1267	crit_exit();
	1268	if (rushjob < syncdelay / 2) {
	1269	rushjob += 1;
	1270	stat_rush_requests += 1;
	1271	return (1);
	1272	}
	1273	return(0);
	1274	}
	1275
	1276	/*
	1277	* Associate a p-buffer with a vnode.
	1278	*
	1279	* Also sets B_PAGING flag to indicate that vnode is not fully associated
	1280	* with the buffer. i.e. the bp has not been linked into the vnode or
	1281	* ref-counted.
	1282	*/
	1283	void
	1284	pbgetvp(vp, bp)
	1285	struct vnode *vp;
	1286	struct buf *bp;
	1287	{
	1288
	1289	KASSERT(bp->b_vp == NULL, ("pbgetvp: not free"));
	1290
	1291	bp->b_vp = vp;
	1292	bp->b_flags \|= B_PAGING;
	1293	bp->b_dev = vn_todev(vp);
	1294	}
	1295
	1296	/*
	1297	* Disassociate a p-buffer from a vnode.
	1298	*/
	1299	void
	1300	pbrelvp(bp)
	1301	struct buf *bp;
	1302	{
	1303
	1304	KASSERT(bp->b_vp != NULL, ("pbrelvp: NULL"));
	1305
	1306	/* XXX REMOVE ME */
	1307	if (TAILQ_NEXT(bp, b_vnbufs) != NULL) {
	1308	panic(
	1309	"relpbuf(): b_vp was probably reassignbuf()d %p %x",
	1310	bp,
	1311	(int)bp->b_flags
	1312	);
	1313	}
	1314	bp->b_vp = (struct vnode *) 0;
	1315	bp->b_flags &= ~B_PAGING;
	1316	}
	1317
	1318	void
	1319	pbreassignbuf(bp, newvp)
	1320	struct buf *bp;
	1321	struct vnode *newvp;
	1322	{
	1323	if ((bp->b_flags & B_PAGING) == 0) {
	1324	panic(
	1325	"pbreassignbuf() on non phys bp %p",
	1326	bp
	1327	);
	1328	}
	1329	bp->b_vp = newvp;
	1330	}
	1331
	1332	/*
	1333	* Reassign a buffer from one vnode to another.
	1334	* Used to assign file specific control information
	1335	* (indirect blocks) to the vnode to which they belong.
	1336	*/
	1337	void
	1338	reassignbuf(bp, newvp)
	1339	struct buf *bp;
	1340	struct vnode *newvp;
	1341	{
	1342	struct buflists *listheadp;
	1343	int delay;
	1344	int s;
	1345
	1346	if (newvp == NULL) {
	1347	printf("reassignbuf: NULL");
	1348	return;
	1349	}
	1350	++reassignbufcalls;
	1351
	1352	/*
	1353	* B_PAGING flagged buffers cannot be reassigned because their vp
	1354	* is not fully linked in.
	1355	*/
	1356	if (bp->b_flags & B_PAGING)
	1357	panic("cannot reassign paging buffer");
	1358
	1359	s = splbio();
	1360	/*
	1361	* Delete from old vnode list, if on one.
	1362	*/
	1363	if (bp->b_xflags & (BX_VNDIRTY \| BX_VNCLEAN)) {
	1364	if (bp->b_xflags & BX_VNDIRTY)
	1365	listheadp = &bp->b_vp->v_dirtyblkhd;
	1366	else
	1367	listheadp = &bp->b_vp->v_cleanblkhd;
	1368	TAILQ_REMOVE(listheadp, bp, b_vnbufs);
	1369	bp->b_xflags &= ~(BX_VNDIRTY \| BX_VNCLEAN);
	1370	if (bp->b_vp != newvp) {
	1371	vdrop(bp->b_vp);
	1372	bp->b_vp = NULL; /* for clarification */
	1373	}
	1374	}
	1375	/*
	1376	* If dirty, put on list of dirty buffers; otherwise insert onto list
	1377	* of clean buffers.
	1378	*/
	1379	if (bp->b_flags & B_DELWRI) {
	1380	struct buf *tbp;
	1381
	1382	listheadp = &newvp->v_dirtyblkhd;
	1383	if ((newvp->v_flag & VONWORKLST) == 0) {
	1384	switch (newvp->v_type) {
	1385	case VDIR:
	1386	delay = dirdelay;
	1387	break;
	1388	case VCHR:
	1389	case VBLK:
	1390	if (newvp->v_specmountpoint != NULL) {
	1391	delay = metadelay;
	1392	break;
	1393	}
	1394	/* fall through */
	1395	default:
	1396	delay = filedelay;
	1397	}
	1398	vn_syncer_add_to_worklist(newvp, delay);
	1399	}
	1400	bp->b_xflags \|= BX_VNDIRTY;
	1401	tbp = TAILQ_FIRST(listheadp);
	1402	if (tbp == NULL \|\|
	1403	bp->b_lblkno == 0 \|\|
	1404	(bp->b_lblkno > 0 && tbp->b_lblkno < 0) \|\|
	1405	(bp->b_lblkno > 0 && bp->b_lblkno < tbp->b_lblkno)) {
	1406	TAILQ_INSERT_HEAD(listheadp, bp, b_vnbufs);
	1407	++reassignbufsortgood;
	1408	} else if (bp->b_lblkno < 0) {
	1409	TAILQ_INSERT_TAIL(listheadp, bp, b_vnbufs);
	1410	++reassignbufsortgood;
	1411	} else if (reassignbufmethod == 1) {
	1412	/*
	1413	* New sorting algorithm, only handle sequential case,
	1414	* otherwise append to end (but before metadata)
	1415	*/
	1416	if ((tbp = gbincore(newvp, bp->b_lblkno - 1)) != NULL &&
	1417	(tbp->b_xflags & BX_VNDIRTY)) {
	1418	/*
	1419	* Found the best place to insert the buffer
	1420	*/
	1421	TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs);
	1422	++reassignbufsortgood;
	1423	} else {
	1424	/*
	1425	* Missed, append to end, but before meta-data.
	1426	* We know that the head buffer in the list is
	1427	* not meta-data due to prior conditionals.
	1428	*
	1429	* Indirect effects: NFS second stage write
	1430	* tends to wind up here, giving maximum
	1431	* distance between the unstable write and the
	1432	* commit rpc.
	1433	*/
	1434	tbp = TAILQ_LAST(listheadp, buflists);
	1435	while (tbp && tbp->b_lblkno < 0)
	1436	tbp = TAILQ_PREV(tbp, buflists, b_vnbufs);
	1437	TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs);
	1438	++reassignbufsortbad;
	1439	}
	1440	} else {
	1441	/*
	1442	* Old sorting algorithm, scan queue and insert
	1443	*/
	1444	struct buf *ttbp;
	1445	while ((ttbp = TAILQ_NEXT(tbp, b_vnbufs)) &&
	1446	(ttbp->b_lblkno < bp->b_lblkno)) {
	1447	++reassignbufloops;
	1448	tbp = ttbp;
	1449	}
	1450	TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs);
	1451	}
	1452	} else {
	1453	bp->b_xflags \|= BX_VNCLEAN;
	1454	TAILQ_INSERT_TAIL(&newvp->v_cleanblkhd, bp, b_vnbufs);
	1455	if ((newvp->v_flag & VONWORKLST) &&
	1456	TAILQ_EMPTY(&newvp->v_dirtyblkhd)) {
	1457	newvp->v_flag &= ~VONWORKLST;
	1458	LIST_REMOVE(newvp, v_synclist);
	1459	}
	1460	}
	1461	if (bp->b_vp != newvp) {
	1462	bp->b_vp = newvp;
	1463	vhold(bp->b_vp);
	1464	}
	1465	splx(s);
	1466	}
	1467
	1468	/*
	1469	* Create a vnode for a block device.
	1470	* Used for mounting the root file system.
	1471	*/
	1472	int
	1473	bdevvp(dev, vpp)
	1474	dev_t dev;
	1475	struct vnode **vpp;
	1476	{
	1477	struct vnode *vp;
	1478	struct vnode *nvp;
	1479	int error;
	1480
	1481	if (dev == NODEV) {
	1482	*vpp = NULLVP;
	1483	return (ENXIO);
	1484	}
	1485	error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp);
	1486	if (error) {
	1487	*vpp = NULLVP;
	1488	return (error);
	1489	}
	1490	vp = nvp;
	1491	vp->v_type = VBLK;
	1492	addalias(vp, dev);
	1493	*vpp = vp;
	1494	return (0);
	1495	}
	1496
	1497	/*
	1498	* Add vnode to the alias list hung off the dev_t.
	1499	*
	1500	* The reason for this gunk is that multiple vnodes can reference
	1501	* the same physical device, so checking vp->v_usecount to see
	1502	* how many users there are is inadequate; the v_usecount for
	1503	* the vnodes need to be accumulated. vcount() does that.
	1504	*/
	1505	void
	1506	addaliasu(nvp, nvp_rdev)
	1507	struct vnode *nvp;
	1508	udev_t nvp_rdev;
	1509	{
	1510
	1511	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
	1512	panic("addaliasu on non-special vnode");
	1513	addalias(nvp, udev2dev(nvp_rdev, nvp->v_type == VBLK ? 1 : 0));
	1514	}
	1515
	1516	void
	1517	addalias(nvp, dev)
	1518	struct vnode *nvp;
	1519	dev_t dev;
	1520	{
	1521
	1522	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
	1523	panic("addalias on non-special vnode");
	1524
	1525	nvp->v_rdev = dev;
	1526	lwkt_gettoken(&spechash_token);
	1527	SLIST_INSERT_HEAD(&dev->si_hlist, nvp, v_specnext);
	1528	lwkt_reltoken(&spechash_token);
	1529	}
	1530
	1531	/*
	1532	* Grab a particular vnode from the free list, increment its
	1533	* reference count and lock it. The vnode lock bit is set if the
	1534	* vnode is being eliminated in vgone. The process is awakened
	1535	* when the transition is completed, and an error returned to
	1536	* indicate that the vnode is no longer usable (possibly having
	1537	* been changed to a new file system type).
	1538	*/
	1539	int
	1540	vget(vp, flags, td)
	1541	struct vnode *vp;
	1542	int flags;
	1543	struct thread *td;
	1544	{
	1545	int error;
	1546
	1547	/*
	1548	* If the vnode is in the process of being cleaned out for
	1549	* another use, we wait for the cleaning to finish and then
	1550	* return failure. Cleaning is determined by checking that
	1551	* the VXLOCK flag is set.
	1552	*/
	1553	if ((flags & LK_INTERLOCK) == 0) {
	1554	lwkt_gettoken(&vp->v_interlock);
	1555	}
	1556	if (vp->v_flag & VXLOCK) {
	1557	if (vp->v_vxproc == curproc) {
	1558	#if 0
	1559	/* this can now occur in normal operation */
	1560	log(LOG_INFO, "VXLOCK interlock avoided\n");
	1561	#endif
	1562	} else {
	1563	vp->v_flag \|= VXWANT;
	1564	lwkt_reltoken(&vp->v_interlock);
	1565	tsleep((caddr_t)vp, 0, "vget", 0);
	1566	return (ENOENT);
	1567	}
	1568	}
	1569
	1570	vp->v_usecount++;
	1571
	1572	if (VSHOULDBUSY(vp))
	1573	vbusy(vp);
	1574	if (flags & LK_TYPE_MASK) {
	1575	if ((error = vn_lock(vp, flags \| LK_INTERLOCK, td)) != 0) {
	1576	/*
	1577	* must expand vrele here because we do not want
	1578	* to call VOP_INACTIVE if the reference count
	1579	* drops back to zero since it was never really
	1580	* active. We must remove it from the free list
	1581	* before sleeping so that multiple processes do
	1582	* not try to recycle it.
	1583	*/
	1584	lwkt_gettoken(&vp->v_interlock);
	1585	vp->v_usecount--;
	1586	if (VSHOULDFREE(vp))
	1587	vfree(vp);
	1588	else
	1589	vlruvp(vp);
	1590	lwkt_reltoken(&vp->v_interlock);
	1591	}
	1592	return (error);
	1593	}
	1594	lwkt_reltoken(&vp->v_interlock);
	1595	return (0);
	1596	}
	1597
	1598	void
	1599	vref(struct vnode *vp)
	1600	{
	1601	lwkt_gettoken(&vp->v_interlock);
	1602	vp->v_usecount++;
	1603	lwkt_reltoken(&vp->v_interlock);
	1604	}
	1605
	1606	/*
	1607	* Vnode put/release.
	1608	* If count drops to zero, call inactive routine and return to freelist.
	1609	*/
	1610	void
	1611	vrele(struct vnode *vp)
	1612	{
	1613	struct thread td = curthread; / XXX */
	1614
	1615	KASSERT(vp != NULL, ("vrele: null vp"));
	1616
	1617	lwkt_gettoken(&vp->v_interlock);
	1618
	1619	if (vp->v_usecount > 1) {
	1620
	1621	vp->v_usecount--;
	1622	lwkt_reltoken(&vp->v_interlock);
	1623
	1624	return;
	1625	}
	1626
	1627	if (vp->v_usecount == 1) {
	1628	vp->v_usecount--;
	1629	/*
	1630	* We must call VOP_INACTIVE with the node locked.
	1631	* If we are doing a vpu, the node is already locked,
	1632	* but, in the case of vrele, we must explicitly lock
	1633	* the vnode before calling VOP_INACTIVE
	1634	*/
	1635
	1636	if (vn_lock(vp, LK_EXCLUSIVE \| LK_INTERLOCK, td) == 0)
	1637	VOP_INACTIVE(vp, td);
	1638	if (VSHOULDFREE(vp))
	1639	vfree(vp);
	1640	else
	1641	vlruvp(vp);
	1642	} else {
	1643	#ifdef DIAGNOSTIC
	1644	vprint("vrele: negative ref count", vp);
	1645	lwkt_reltoken(&vp->v_interlock);
	1646	#endif
	1647	panic("vrele: negative ref cnt");
	1648	}
	1649	}
	1650
	1651	void
	1652	vput(struct vnode *vp)
	1653	{
	1654	struct thread td = curthread; / XXX */
	1655
	1656	KASSERT(vp != NULL, ("vput: null vp"));
	1657
	1658	lwkt_gettoken(&vp->v_interlock);
	1659
	1660	if (vp->v_usecount > 1) {
	1661	vp->v_usecount--;
	1662	VOP_UNLOCK(vp, LK_INTERLOCK, td);
	1663	return;
	1664	}
	1665
	1666	if (vp->v_usecount == 1) {
	1667	vp->v_usecount--;
	1668	/*
	1669	* We must call VOP_INACTIVE with the node locked.
	1670	* If we are doing a vpu, the node is already locked,
	1671	* so we just need to release the vnode mutex.
	1672	*/
	1673	lwkt_reltoken(&vp->v_interlock);
	1674	VOP_INACTIVE(vp, td);
	1675	if (VSHOULDFREE(vp))
	1676	vfree(vp);
	1677	else
	1678	vlruvp(vp);
	1679	} else {
	1680	#ifdef DIAGNOSTIC
	1681	vprint("vput: negative ref count", vp);
	1682	#endif
	1683	panic("vput: negative ref cnt");
	1684	}
	1685	}
	1686
	1687	/*
	1688	* Somebody doesn't want the vnode recycled.
	1689	*/
	1690	void
	1691	vhold(vp)
	1692	struct vnode *vp;
	1693	{
	1694	int s;
	1695
	1696	s = splbio();
	1697	vp->v_holdcnt++;
	1698	if (VSHOULDBUSY(vp))
	1699	vbusy(vp);
	1700	splx(s);
	1701	}
	1702
	1703	/*
	1704	* One less who cares about this vnode.
	1705	*/
	1706	void
	1707	vdrop(vp)
	1708	struct vnode *vp;
	1709	{
	1710	int s;
	1711
	1712	s = splbio();
	1713	if (vp->v_holdcnt <= 0)
	1714	panic("vdrop: holdcnt");
	1715	vp->v_holdcnt--;
	1716	if (VSHOULDFREE(vp))
	1717	vfree(vp);
	1718	splx(s);
	1719	}
	1720
	1721	/*
	1722	* Remove any vnodes in the vnode table belonging to mount point mp.
	1723	*
	1724	* If FORCECLOSE is not specified, there should not be any active ones,
	1725	* return error if any are found (nb: this is a user error, not a
	1726	* system error). If FORCECLOSE is specified, detach any active vnodes
	1727	* that are found.
	1728	*
	1729	* If WRITECLOSE is set, only flush out regular file vnodes open for
	1730	* writing.
	1731	*
	1732	* SKIPSYSTEM causes any vnodes marked VSYSTEM to be skipped.
	1733	*
	1734	* `rootrefs' specifies the base reference count for the root vnode
	1735	* of this filesystem. The root vnode is considered busy if its
	1736	* v_usecount exceeds this value. On a successful return, vflush()
	1737	* will call vrele() on the root vnode exactly rootrefs times.
	1738	* If the SKIPSYSTEM or WRITECLOSE flags are specified, rootrefs must
	1739	* be zero.
	1740	*/
	1741	#ifdef DIAGNOSTIC
	1742	static int busyprt = 0; /* print out busy vnodes */
	1743	SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, "");
	1744	#endif
	1745
	1746	int
	1747	vflush(mp, rootrefs, flags)
	1748	struct mount *mp;
	1749	int rootrefs;
	1750	int flags;
	1751	{
	1752	struct thread td = curthread; / XXX */
	1753	struct vnode vp, nvp, *rootvp = NULL;
	1754	struct vattr vattr;
	1755	int busy = 0, error;
	1756
	1757	if (rootrefs > 0) {
	1758	KASSERT((flags & (SKIPSYSTEM \| WRITECLOSE)) == 0,
	1759	("vflush: bad args"));
	1760	/*
	1761	* Get the filesystem root vnode. We can vput() it
	1762	* immediately, since with rootrefs > 0, it won't go away.
	1763	*/
	1764	if ((error = VFS_ROOT(mp, &rootvp)) != 0)
	1765	return (error);
	1766	vput(rootvp);
	1767	}
	1768	lwkt_gettoken(&mntvnode_token);
	1769	loop:
	1770	for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp; vp = nvp) {
	1771	/*
	1772	* Make sure this vnode wasn't reclaimed in getnewvnode().
	1773	* Start over if it has (it won't be on the list anymore).
	1774	*/
	1775	if (vp->v_mount != mp)
	1776	goto loop;
	1777	nvp = TAILQ_NEXT(vp, v_nmntvnodes);
	1778
	1779	lwkt_gettoken(&vp->v_interlock);
	1780	/*
	1781	* Skip over a vnodes marked VSYSTEM.
	1782	*/
	1783	if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
	1784	lwkt_reltoken(&vp->v_interlock);
	1785	continue;
	1786	}
	1787	/*
	1788	* If WRITECLOSE is set, flush out unlinked but still open
	1789	* files (even if open only for reading) and regular file
	1790	* vnodes open for writing.
	1791	*/
	1792	if ((flags & WRITECLOSE) &&
	1793	(vp->v_type == VNON \|\|
	1794	(VOP_GETATTR(vp, &vattr, td) == 0 &&
	1795	vattr.va_nlink > 0)) &&
	1796	(vp->v_writecount == 0 \|\| vp->v_type != VREG)) {
	1797	lwkt_reltoken(&vp->v_interlock);
	1798	continue;
	1799	}
	1800
	1801	/*
	1802	* With v_usecount == 0, all we need to do is clear out the
	1803	* vnode data structures and we are done.
	1804	*/
	1805	if (vp->v_usecount == 0) {
	1806	lwkt_reltoken(&mntvnode_token);
	1807	vgonel(vp, td);
	1808	lwkt_gettoken(&mntvnode_token);
	1809	continue;
	1810	}
	1811
	1812	/*
	1813	* If FORCECLOSE is set, forcibly close the vnode. For block
	1814	* or character devices, revert to an anonymous device. For
	1815	* all other files, just kill them.
	1816	*/
	1817	if (flags & FORCECLOSE) {
	1818	lwkt_reltoken(&mntvnode_token);
	1819	if (vp->v_type != VBLK && vp->v_type != VCHR) {
	1820	vgonel(vp, td);
	1821	} else {
	1822	vclean(vp, 0, td);
	1823	vp->v_op = spec_vnodeop_p;
	1824	insmntque(vp, (struct mount *) 0);
	1825	}
	1826	lwkt_gettoken(&mntvnode_token);
	1827	continue;
	1828	}
	1829	#ifdef DIAGNOSTIC
	1830	if (busyprt)
	1831	vprint("vflush: busy vnode", vp);
	1832	#endif
	1833	lwkt_reltoken(&vp->v_interlock);
	1834	busy++;
	1835	}
	1836	lwkt_reltoken(&mntvnode_token);
	1837	if (rootrefs > 0 && (flags & FORCECLOSE) == 0) {
	1838	/*
	1839	* If just the root vnode is busy, and if its refcount
	1840	* is equal to `rootrefs', then go ahead and kill it.
	1841	*/
	1842	lwkt_gettoken(&rootvp->v_interlock);
	1843	KASSERT(busy > 0, ("vflush: not busy"));
	1844	KASSERT(rootvp->v_usecount >= rootrefs, ("vflush: rootrefs"));
	1845	if (busy == 1 && rootvp->v_usecount == rootrefs) {
	1846	vgonel(rootvp, td);
	1847	busy = 0;
	1848	} else
	1849	lwkt_reltoken(&rootvp->v_interlock);
	1850	}
	1851	if (busy)
	1852	return (EBUSY);
	1853	for (; rootrefs > 0; rootrefs--)
	1854	vrele(rootvp);
	1855	return (0);
	1856	}
	1857
	1858	/*
	1859	* We do not want to recycle the vnode too quickly.
	1860	*
	1861	* XXX we can't move vp's around the nvnodelist without really screwing
	1862	* up the efficiency of filesystem SYNC and friends. This code is
	1863	* disabled until we fix the syncing code's scanning algorithm.
	1864	*/
	1865	static void
	1866	vlruvp(struct vnode *vp)
	1867	{
	1868	#if 0
	1869	struct mount *mp;
	1870
	1871	if ((mp = vp->v_mount) != NULL) {
	1872	lwkt_gettoken(&mntvnode_token);
	1873	TAILQ_REMOVE(&mp->mnt_nvnodelist, vp, v_nmntvnodes);
	1874	TAILQ_INSERT_TAIL(&mp->mnt_nvnodelist, vp, v_nmntvnodes);
	1875	lwkt_reltoken(&mntvnode_token);
	1876	}
	1877	#endif
	1878	}
	1879
	1880	/*
	1881	* Disassociate the underlying file system from a vnode.
	1882	*/
	1883	static void
	1884	vclean(struct vnode vp, int flags, struct thread td)
	1885	{
	1886	int active;
	1887
	1888	/*
	1889	* Check to see if the vnode is in use. If so we have to reference it
	1890	* before we clean it out so that its count cannot fall to zero and
	1891	* generate a race against ourselves to recycle it.
	1892	*/
	1893	if ((active = vp->v_usecount))
	1894	vp->v_usecount++;
	1895
	1896	/*
	1897	* Prevent the vnode from being recycled or brought into use while we
	1898	* clean it out.
	1899	*/
	1900	if (vp->v_flag & VXLOCK)
	1901	panic("vclean: deadlock");
	1902	vp->v_flag \|= VXLOCK;
	1903	vp->v_vxproc = curproc;
	1904	/*
	1905	* Even if the count is zero, the VOP_INACTIVE routine may still
	1906	* have the object locked while it cleans it out. The VOP_LOCK
	1907	* ensures that the VOP_INACTIVE routine is done with its work.
	1908	* For active vnodes, it ensures that no other activity can
	1909	* occur while the underlying object is being cleaned out.
	1910	*/
	1911	VOP_LOCK(vp, LK_DRAIN \| LK_INTERLOCK, td);
	1912
	1913	/*
	1914	* Clean out any buffers associated with the vnode.
	1915	*/
	1916	vinvalbuf(vp, V_SAVE, td, 0, 0);
	1917
	1918	VOP_DESTROYVOBJECT(vp);
	1919
	1920	/*
	1921	* If purging an active vnode, it must be closed and
	1922	* deactivated before being reclaimed. Note that the
	1923	* VOP_INACTIVE will unlock the vnode.
	1924	*/
	1925	if (active) {
	1926	if (flags & DOCLOSE)
	1927	VOP_CLOSE(vp, FNONBLOCK, td);
	1928	VOP_INACTIVE(vp, td);
	1929	} else {
	1930	/*
	1931	* Any other processes trying to obtain this lock must first
	1932	* wait for VXLOCK to clear, then call the new lock operation.
	1933	*/
	1934	VOP_UNLOCK(vp, 0, td);
	1935	}
	1936	/*
	1937	* Reclaim the vnode.
	1938	*/
	1939	if (VOP_RECLAIM(vp, td))
	1940	panic("vclean: cannot reclaim");
	1941
	1942	if (active) {
	1943	/*
	1944	* Inline copy of vrele() since VOP_INACTIVE
	1945	* has already been called.
	1946	*/
	1947	lwkt_gettoken(&vp->v_interlock);
	1948	if (--vp->v_usecount <= 0) {
	1949	#ifdef DIAGNOSTIC
	1950	if (vp->v_usecount < 0 \|\| vp->v_writecount != 0) {
	1951	vprint("vclean: bad ref count", vp);
	1952	panic("vclean: ref cnt");
	1953	}
	1954	#endif
	1955	vfree(vp);
	1956	}
	1957	lwkt_reltoken(&vp->v_interlock);
	1958	}
	1959
	1960	cache_purge(vp);
	1961	vp->v_vnlock = NULL;
	1962
	1963	if (VSHOULDFREE(vp))
	1964	vfree(vp);
	1965
	1966	/*
	1967	* Done with purge, notify sleepers of the grim news.
	1968	*/
	1969	vp->v_op = dead_vnodeop_p;
	1970	vn_pollgone(vp);
	1971	vp->v_tag = VT_NON;
	1972	vp->v_flag &= ~VXLOCK;
	1973	vp->v_vxproc = NULL;
	1974	if (vp->v_flag & VXWANT) {
	1975	vp->v_flag &= ~VXWANT;
	1976	wakeup((caddr_t) vp);
	1977	}
	1978	}
	1979
	1980	/*
	1981	* Eliminate all activity associated with the requested vnode
	1982	* and with all vnodes aliased to the requested vnode.
	1983	*/
	1984	int
	1985	vop_revoke(ap)
	1986	struct vop_revoke_args /* {
	1987	struct vnode *a_vp;
	1988	int a_flags;
	1989	} / ap;
	1990	{
	1991	struct vnode vp, vq;
	1992	dev_t dev;
	1993
	1994	KASSERT((ap->a_flags & REVOKEALL) != 0, ("vop_revoke"));
	1995
	1996	vp = ap->a_vp;
	1997	/*
	1998	* If a vgone (or vclean) is already in progress,
	1999	* wait until it is done and return.
	2000	*/
	2001	if (vp->v_flag & VXLOCK) {
	2002	vp->v_flag \|= VXWANT;
	2003	lwkt_reltoken(&vp->v_interlock);
	2004	tsleep((caddr_t)vp, 0, "vop_revokeall", 0);
	2005	return (0);
	2006	}
	2007	dev = vp->v_rdev;
	2008	for (;;) {
	2009	lwkt_gettoken(&spechash_token);
	2010	vq = SLIST_FIRST(&dev->si_hlist);
	2011	lwkt_reltoken(&spechash_token);
	2012	if (!vq)
	2013	break;
	2014	vgone(vq);
	2015	}
	2016	return (0);
	2017	}
	2018
	2019	/*
	2020	* Recycle an unused vnode to the front of the free list.
	2021	* Release the passed interlock if the vnode will be recycled.
	2022	*/
	2023	int
	2024	vrecycle(struct vnode vp, struct lwkt_token inter_lkp, struct thread *td)
	2025	{
	2026	lwkt_gettoken(&vp->v_interlock);
	2027	if (vp->v_usecount == 0) {
	2028	if (inter_lkp) {
	2029	lwkt_reltoken(inter_lkp);
	2030	}
	2031	vgonel(vp, td);
	2032	return (1);
	2033	}
	2034	lwkt_reltoken(&vp->v_interlock);
	2035	return (0);
	2036	}
	2037
	2038	/*
	2039	* Eliminate all activity associated with a vnode
	2040	* in preparation for reuse.
	2041	*/
	2042	void
	2043	vgone(struct vnode *vp)
	2044	{
	2045	struct thread td = curthread; / XXX */
	2046
	2047	lwkt_gettoken(&vp->v_interlock);
	2048	vgonel(vp, td);
	2049	}
	2050
	2051	/*
	2052	* vgone, with the vp interlock held.
	2053	*/
	2054	void
	2055	vgonel(struct vnode vp, struct thread td)
	2056	{
	2057	int s;
	2058
	2059	/*
	2060	* If a vgone (or vclean) is already in progress,
	2061	* wait until it is done and return.
	2062	*/
	2063	if (vp->v_flag & VXLOCK) {
	2064	vp->v_flag \|= VXWANT;
	2065	lwkt_reltoken(&vp->v_interlock);
	2066	tsleep((caddr_t)vp, 0, "vgone", 0);
	2067	return;
	2068	}
	2069
	2070	/*
	2071	* Clean out the filesystem specific data.
	2072	*/
	2073	vclean(vp, DOCLOSE, td);
	2074	lwkt_gettoken(&vp->v_interlock);
	2075
	2076	/*
	2077	* Delete from old mount point vnode list, if on one.
	2078	*/
	2079	if (vp->v_mount != NULL)
	2080	insmntque(vp, (struct mount *)0);
	2081	/*
	2082	* If special device, remove it from special device alias list
	2083	* if it is on one.
	2084	*/
	2085	if ((vp->v_type == VBLK \|\| vp->v_type == VCHR) && vp->v_rdev != NULL) {
	2086	lwkt_gettoken(&spechash_token);
	2087	SLIST_REMOVE(&vp->v_hashchain, vp, vnode, v_specnext);
	2088	freedev(vp->v_rdev);
	2089	lwkt_reltoken(&spechash_token);
	2090	vp->v_rdev = NULL;
	2091	}
	2092
	2093	/*
	2094	* If it is on the freelist and not already at the head,
	2095	* move it to the head of the list. The test of the
	2096	* VDOOMED flag and the reference count of zero is because
	2097	* it will be removed from the free list by getnewvnode,
	2098	* but will not have its reference count incremented until
	2099	* after calling vgone. If the reference count were
	2100	* incremented first, vgone would (incorrectly) try to
	2101	* close the previous instance of the underlying object.
	2102	*/
	2103	if (vp->v_usecount == 0 && !(vp->v_flag & VDOOMED)) {
	2104	s = splbio();
	2105	lwkt_gettoken(&vnode_free_list_token);
	2106	if (vp->v_flag & VFREE)
	2107	TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
	2108	else
	2109	freevnodes++;
	2110	vp->v_flag \|= VFREE;
	2111	TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
	2112	lwkt_reltoken(&vnode_free_list_token);
	2113	splx(s);
	2114	}
	2115
	2116	vp->v_type = VBAD;
	2117	lwkt_reltoken(&vp->v_interlock);
	2118	}
	2119
	2120	/*
	2121	* Lookup a vnode by device number.
	2122	*/
	2123	int
	2124	vfinddev(dev, type, vpp)
	2125	dev_t dev;
	2126	enum vtype type;
	2127	struct vnode **vpp;
	2128	{
	2129	struct vnode *vp;
	2130
	2131	lwkt_gettoken(&spechash_token);
	2132	SLIST_FOREACH(vp, &dev->si_hlist, v_specnext) {
	2133	if (type == vp->v_type) {
	2134	*vpp = vp;
	2135	lwkt_reltoken(&spechash_token);
	2136	return (1);
	2137	}
	2138	}
	2139	lwkt_reltoken(&spechash_token);
	2140	return (0);
	2141	}
	2142
	2143	/*
	2144	* Calculate the total number of references to a special device.
	2145	*/
	2146	int
	2147	vcount(vp)
	2148	struct vnode *vp;
	2149	{
	2150	struct vnode *vq;
	2151	int count;
	2152
	2153	count = 0;
	2154	lwkt_gettoken(&spechash_token);
	2155	SLIST_FOREACH(vq, &vp->v_hashchain, v_specnext)
	2156	count += vq->v_usecount;
	2157	lwkt_reltoken(&spechash_token);
	2158	return (count);
	2159	}
	2160
	2161	/*
	2162	* Same as above, but using the dev_t as argument
	2163	*/
	2164
	2165	int
	2166	count_dev(dev)
	2167	dev_t dev;
	2168	{
	2169	struct vnode *vp;
	2170
	2171	vp = SLIST_FIRST(&dev->si_hlist);
	2172	if (vp == NULL)
	2173	return (0);
	2174	return(vcount(vp));
	2175	}
	2176
	2177	/*
	2178	* Print out a description of a vnode.
	2179	*/
	2180	static char *typename[] =
	2181	{"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"};
	2182
	2183	void
	2184	vprint(label, vp)
	2185	char *label;
	2186	struct vnode *vp;
	2187	{
	2188	char buf[96];
	2189
	2190	if (label != NULL)
	2191	printf("%s: %p: ", label, (void *)vp);
	2192	else
	2193	printf("%p: ", (void *)vp);
	2194	printf("type %s, usecount %d, writecount %d, refcount %d,",
	2195	typename[vp->v_type], vp->v_usecount, vp->v_writecount,
	2196	vp->v_holdcnt);
	2197	buf[0] = '\0';
	2198	if (vp->v_flag & VROOT)
	2199	strcat(buf, "\|VROOT");
	2200	if (vp->v_flag & VTEXT)
	2201	strcat(buf, "\|VTEXT");
	2202	if (vp->v_flag & VSYSTEM)
	2203	strcat(buf, "\|VSYSTEM");
	2204	if (vp->v_flag & VXLOCK)
	2205	strcat(buf, "\|VXLOCK");
	2206	if (vp->v_flag & VXWANT)
	2207	strcat(buf, "\|VXWANT");
	2208	if (vp->v_flag & VBWAIT)
	2209	strcat(buf, "\|VBWAIT");
	2210	if (vp->v_flag & VDOOMED)
	2211	strcat(buf, "\|VDOOMED");
	2212	if (vp->v_flag & VFREE)
	2213	strcat(buf, "\|VFREE");
	2214	if (vp->v_flag & VOBJBUF)
	2215	strcat(buf, "\|VOBJBUF");
	2216	if (buf[0] != '\0')
	2217	printf(" flags (%s)", &buf[1]);
	2218	if (vp->v_data == NULL) {
	2219	printf("\n");
	2220	} else {
	2221	printf("\n\t");
	2222	VOP_PRINT(vp);
	2223	}
	2224	}
	2225
	2226	#ifdef DDB
	2227	#include <ddb/ddb.h>
	2228	/*
	2229	* List all of the locked vnodes in the system.
	2230	* Called when debugging the kernel.
	2231	*/
	2232	DB_SHOW_COMMAND(lockedvnodes, lockedvnodes)
	2233	{
	2234	struct thread td = curthread; / XXX */
	2235	struct mount mp, nmp;
	2236	struct vnode *vp;
	2237
	2238	printf("Locked vnodes\n");
	2239	lwkt_gettoken(&mountlist_token);
	2240	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
	2241	if (vfs_busy(mp, LK_NOWAIT, &mountlist_token, td)) {
	2242	nmp = TAILQ_NEXT(mp, mnt_list);
	2243	continue;
	2244	}
	2245	TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) {
	2246	if (VOP_ISLOCKED(vp, NULL))
	2247	vprint((char *)0, vp);
	2248	}
	2249	lwkt_gettoken(&mountlist_token);
	2250	nmp = TAILQ_NEXT(mp, mnt_list);
	2251	vfs_unbusy(mp, td);
	2252	}
	2253	lwkt_reltoken(&mountlist_token);
	2254	}
	2255	#endif
	2256
	2257	/*
	2258	* Top level filesystem related information gathering.
	2259	*/
	2260	static int sysctl_ovfs_conf __P((SYSCTL_HANDLER_ARGS));
	2261
	2262	static int
	2263	vfs_sysctl(SYSCTL_HANDLER_ARGS)
	2264	{
	2265	int name = (int )arg1 - 1; /* XXX */
	2266	u_int namelen = arg2 + 1; /* XXX */
	2267	struct vfsconf *vfsp;
	2268
	2269	#if 1 \|\| defined(COMPAT_PRELITE2)
	2270	/* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */
	2271	if (namelen == 1)
	2272	return (sysctl_ovfs_conf(oidp, arg1, arg2, req));
	2273	#endif
	2274
	2275	#ifdef notyet
	2276	/* all sysctl names at this level are at least name and field */
	2277	if (namelen < 2)
	2278	return (ENOTDIR); /* overloaded */
	2279	if (name[0] != VFS_GENERIC) {
	2280	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
	2281	if (vfsp->vfc_typenum == name[0])
	2282	break;
	2283	if (vfsp == NULL)
	2284	return (EOPNOTSUPP);
	2285	return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
	2286	oldp, oldlenp, newp, newlen, p));
	2287	}
	2288	#endif
	2289	switch (name[1]) {
	2290	case VFS_MAXTYPENUM:
	2291	if (namelen != 2)
	2292	return (ENOTDIR);
	2293	return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int)));
	2294	case VFS_CONF:
	2295	if (namelen != 3)
	2296	return (ENOTDIR); /* overloaded */
	2297	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
	2298	if (vfsp->vfc_typenum == name[2])
	2299	break;
	2300	if (vfsp == NULL)
	2301	return (EOPNOTSUPP);
	2302	return (SYSCTL_OUT(req, vfsp, sizeof *vfsp));
	2303	}
	2304	return (EOPNOTSUPP);
	2305	}
	2306
	2307	SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD, vfs_sysctl,
	2308	"Generic filesystem");
	2309
	2310	#if 1 \|\| defined(COMPAT_PRELITE2)
	2311
	2312	static int
	2313	sysctl_ovfs_conf(SYSCTL_HANDLER_ARGS)
	2314	{
	2315	int error;
	2316	struct vfsconf *vfsp;
	2317	struct ovfsconf ovfs;
	2318
	2319	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
	2320	ovfs.vfc_vfsops = vfsp->vfc_vfsops; /* XXX used as flag */
	2321	strcpy(ovfs.vfc_name, vfsp->vfc_name);
	2322	ovfs.vfc_index = vfsp->vfc_typenum;
	2323	ovfs.vfc_refcount = vfsp->vfc_refcount;
	2324	ovfs.vfc_flags = vfsp->vfc_flags;
	2325	error = SYSCTL_OUT(req, &ovfs, sizeof ovfs);
	2326	if (error)
	2327	return error;
	2328	}
	2329	return 0;
	2330	}
	2331
	2332	#endif /* 1 \|\| COMPAT_PRELITE2 */
	2333
	2334	#if 0
	2335	#define KINFO_VNODESLOP 10
	2336	/*
	2337	* Dump vnode list (via sysctl).
	2338	* Copyout address of vnode followed by vnode.
	2339	*/
	2340	/* ARGSUSED */
	2341	static int
	2342	sysctl_vnode(SYSCTL_HANDLER_ARGS)
	2343	{
	2344	struct proc p = curproc; / XXX */
	2345	struct mount mp, nmp;
	2346	struct vnode nvp, vp;
	2347	int error;
	2348
	2349	#define VPTRSZ sizeof (struct vnode *)
	2350	#define VNODESZ sizeof (struct vnode)
	2351
	2352	req->lock = 0;
	2353	if (!req->oldptr) /* Make an estimate */
	2354	return (SYSCTL_OUT(req, 0,
	2355	(numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ)));
	2356
	2357	lwkt_gettoken(&mountlist_token);
	2358	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
	2359	if (vfs_busy(mp, LK_NOWAIT, &mountlist_token, p)) {
	2360	nmp = TAILQ_NEXT(mp, mnt_list);
	2361	continue;
	2362	}
	2363	again:
	2364	lwkt_gettoken(&mntvnode_token);
	2365	for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist);
	2366	vp != NULL;
	2367	vp = nvp) {
	2368	/*
	2369	* Check that the vp is still associated with
	2370	* this filesystem. RACE: could have been
	2371	* recycled onto the same filesystem.
	2372	*/
	2373	if (vp->v_mount != mp) {
	2374	lwkt_reltoken(&mntvnode_token);
	2375	goto again;
	2376	}
	2377	nvp = TAILQ_NEXT(vp, v_nmntvnodes);
	2378	lwkt_reltoken(&mntvnode_token);
	2379	if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) \|\|
	2380	(error = SYSCTL_OUT(req, vp, VNODESZ)))
	2381	return (error);
	2382	lwkt_gettoken(&mntvnode_token);
	2383	}
	2384	lwkt_reltoken(&mntvnode_token);
	2385	lwkt_gettoken(&mountlist_token);
	2386	nmp = TAILQ_NEXT(mp, mnt_list);
	2387	vfs_unbusy(mp, p);
	2388	}
	2389	lwkt_reltoken(&mountlist_token);
	2390
	2391	return (0);
	2392	}
	2393	#endif
	2394
	2395	/*
	2396	* XXX
	2397	* Exporting the vnode list on large systems causes them to crash.
	2398	* Exporting the vnode list on medium systems causes sysctl to coredump.
	2399	*/
	2400	#if 0
	2401	SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE\|CTLFLAG_RD,
	2402	0, 0, sysctl_vnode, "S,vnode", "");
	2403	#endif
	2404
	2405	/*
	2406	* Check to see if a filesystem is mounted on a block device.
	2407	*/
	2408	int
	2409	vfs_mountedon(vp)
	2410	struct vnode *vp;
	2411	{
	2412
	2413	if (vp->v_specmountpoint != NULL)
	2414	return (EBUSY);
	2415	return (0);
	2416	}
	2417
	2418	/*
	2419	* Unmount all filesystems. The list is traversed in reverse order
	2420	* of mounting to avoid dependencies.
	2421	*/
	2422	void
	2423	vfs_unmountall()
	2424	{
	2425	struct mount *mp;
	2426	struct thread *td = curthread;
	2427	int error;
	2428
	2429	if (td->td_proc == NULL)
	2430	td = initproc->p_thread; /* XXX XXX use proc0 instead? */
	2431
	2432	/*
	2433	* Since this only runs when rebooting, it is not interlocked.
	2434	*/
	2435	while(!TAILQ_EMPTY(&mountlist)) {
	2436	mp = TAILQ_LAST(&mountlist, mntlist);
	2437	error = dounmount(mp, MNT_FORCE, td);
	2438	if (error) {
	2439	TAILQ_REMOVE(&mountlist, mp, mnt_list);
	2440	printf("unmount of %s failed (",
	2441	mp->mnt_stat.f_mntonname);
	2442	if (error == EBUSY)
	2443	printf("BUSY)\n");
	2444	else
	2445	printf("%d)\n", error);
	2446	} else {
	2447	/* The unmount has removed mp from the mountlist */
	2448	}
	2449	}
	2450	}
	2451
	2452	/*
	2453	* Build hash lists of net addresses and hang them off the mount point.
	2454	* Called by ufs_mount() to set up the lists of export addresses.
	2455	*/
	2456	static int
	2457	vfs_hang_addrlist(mp, nep, argp)
	2458	struct mount *mp;
	2459	struct netexport *nep;
	2460	struct export_args *argp;
	2461	{
	2462	struct netcred *np;
	2463	struct radix_node_head *rnh;
	2464	int i;
	2465	struct radix_node *rn;
	2466	struct sockaddr saddr, smask = 0;
	2467	struct domain *dom;
	2468	int error;
	2469
	2470	if (argp->ex_addrlen == 0) {
	2471	if (mp->mnt_flag & MNT_DEFEXPORTED)
	2472	return (EPERM);
	2473	np = &nep->ne_defexported;
	2474	np->netc_exflags = argp->ex_flags;
	2475	np->netc_anon = argp->ex_anon;
	2476	np->netc_anon.cr_ref = 1;
	2477	mp->mnt_flag \|= MNT_DEFEXPORTED;
	2478	return (0);
	2479	}
	2480
	2481	if (argp->ex_addrlen < 0 \|\| argp->ex_addrlen > MLEN)
	2482	return (EINVAL);
	2483	if (argp->ex_masklen < 0 \|\| argp->ex_masklen > MLEN)
	2484	return (EINVAL);
	2485
	2486	i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
	2487	np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK);
	2488	bzero((caddr_t) np, i);
	2489	saddr = (struct sockaddr *) (np + 1);
	2490	if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen)))
	2491	goto out;
	2492	if (saddr->sa_len > argp->ex_addrlen)
	2493	saddr->sa_len = argp->ex_addrlen;
	2494	if (argp->ex_masklen) {
	2495	smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen);
	2496	error = copyin(argp->ex_mask, (caddr_t) smask, argp->ex_masklen);
	2497	if (error)
	2498	goto out;
	2499	if (smask->sa_len > argp->ex_masklen)
	2500	smask->sa_len = argp->ex_masklen;
	2501	}
	2502	i = saddr->sa_family;
	2503	if ((rnh = nep->ne_rtable[i]) == 0) {
	2504	/*
	2505	* Seems silly to initialize every AF when most are not used,
	2506	* do so on demand here
	2507	*/
	2508	for (dom = domains; dom; dom = dom->dom_next)
	2509	if (dom->dom_family == i && dom->dom_rtattach) {
	2510	dom->dom_rtattach((void **) &nep->ne_rtable[i],
	2511	dom->dom_rtoffset);
	2512	break;
	2513	}
	2514	if ((rnh = nep->ne_rtable[i]) == 0) {
	2515	error = ENOBUFS;
	2516	goto out;
	2517	}
	2518	}
	2519	rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh,
	2520	np->netc_rnodes);
	2521	if (rn == 0 \|\| np != (struct netcred ) rn) { / already exists */
	2522	error = EPERM;
	2523	goto out;
	2524	}
	2525	np->netc_exflags = argp->ex_flags;
	2526	np->netc_anon = argp->ex_anon;
	2527	np->netc_anon.cr_ref = 1;
	2528	return (0);
	2529	out:
	2530	free(np, M_NETADDR);
	2531	return (error);
	2532	}
	2533
	2534	/* ARGSUSED */
	2535	static int
	2536	vfs_free_netcred(rn, w)
	2537	struct radix_node *rn;
	2538	void *w;
	2539	{
	2540	struct radix_node_head rnh = (struct radix_node_head ) w;
	2541
	2542	(*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh);
	2543	free((caddr_t) rn, M_NETADDR);
	2544	return (0);
	2545	}
	2546
	2547	/*
	2548	* Free the net address hash lists that are hanging off the mount points.
	2549	*/
	2550	static void
	2551	vfs_free_addrlist(nep)
	2552	struct netexport *nep;
	2553	{
	2554	int i;
	2555	struct radix_node_head *rnh;
	2556
	2557	for (i = 0; i <= AF_MAX; i++)
	2558	if ((rnh = nep->ne_rtable[i])) {
	2559	(*rnh->rnh_walktree) (rnh, vfs_free_netcred,
	2560	(caddr_t) rnh);
	2561	free((caddr_t) rnh, M_RTABLE);
	2562	nep->ne_rtable[i] = 0;
	2563	}
	2564	}
	2565
	2566	int
	2567	vfs_export(mp, nep, argp)
	2568	struct mount *mp;
	2569	struct netexport *nep;
	2570	struct export_args *argp;
	2571	{
	2572	int error;
	2573
	2574	if (argp->ex_flags & MNT_DELEXPORT) {
	2575	if (mp->mnt_flag & MNT_EXPUBLIC) {
	2576	vfs_setpublicfs(NULL, NULL, NULL);
	2577	mp->mnt_flag &= ~MNT_EXPUBLIC;
	2578	}
	2579	vfs_free_addrlist(nep);
	2580	mp->mnt_flag &= ~(MNT_EXPORTED \| MNT_DEFEXPORTED);
	2581	}
	2582	if (argp->ex_flags & MNT_EXPORTED) {
	2583	if (argp->ex_flags & MNT_EXPUBLIC) {
	2584	if ((error = vfs_setpublicfs(mp, nep, argp)) != 0)
	2585	return (error);
	2586	mp->mnt_flag \|= MNT_EXPUBLIC;
	2587	}
	2588	if ((error = vfs_hang_addrlist(mp, nep, argp)))
	2589	return (error);
	2590	mp->mnt_flag \|= MNT_EXPORTED;
	2591	}
	2592	return (0);
	2593	}
	2594
	2595
	2596	/*
	2597	* Set the publicly exported filesystem (WebNFS). Currently, only
	2598	* one public filesystem is possible in the spec (RFC 2054 and 2055)
	2599	*/
	2600	int
	2601	vfs_setpublicfs(mp, nep, argp)
	2602	struct mount *mp;
	2603	struct netexport *nep;
	2604	struct export_args *argp;
	2605	{
	2606	int error;
	2607	struct vnode *rvp;
	2608	char *cp;
	2609
	2610	/*
	2611	* mp == NULL -> invalidate the current info, the FS is
	2612	* no longer exported. May be called from either vfs_export
	2613	* or unmount, so check if it hasn't already been done.
	2614	*/
	2615	if (mp == NULL) {
	2616	if (nfs_pub.np_valid) {
	2617	nfs_pub.np_valid = 0;
	2618	if (nfs_pub.np_index != NULL) {
	2619	FREE(nfs_pub.np_index, M_TEMP);
	2620	nfs_pub.np_index = NULL;
	2621	}
	2622	}
	2623	return (0);
	2624	}
	2625
	2626	/*
	2627	* Only one allowed at a time.
	2628	*/
	2629	if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount)
	2630	return (EBUSY);
	2631
	2632	/*
	2633	* Get real filehandle for root of exported FS.
	2634	*/
	2635	bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle));
	2636	nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid;
	2637
	2638	if ((error = VFS_ROOT(mp, &rvp)))
	2639	return (error);
	2640
	2641	if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid)))
	2642	return (error);
	2643
	2644	vput(rvp);
	2645
	2646	/*
	2647	* If an indexfile was specified, pull it in.
	2648	*/
	2649	if (argp->ex_indexfile != NULL) {
	2650	MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP,
	2651	M_WAITOK);
	2652	error = copyinstr(argp->ex_indexfile, nfs_pub.np_index,
	2653	MAXNAMLEN, (size_t *)0);
	2654	if (!error) {
	2655	/*
	2656	* Check for illegal filenames.
	2657	*/
	2658	for (cp = nfs_pub.np_index; *cp; cp++) {
	2659	if (*cp == '/') {
	2660	error = EINVAL;
	2661	break;
	2662	}
	2663	}
	2664	}
	2665	if (error) {
	2666	FREE(nfs_pub.np_index, M_TEMP);
	2667	return (error);
	2668	}
	2669	}
	2670
	2671	nfs_pub.np_mount = mp;
	2672	nfs_pub.np_valid = 1;
	2673	return (0);
	2674	}
	2675
	2676	struct netcred *
	2677	vfs_export_lookup(mp, nep, nam)
	2678	struct mount *mp;
	2679	struct netexport *nep;
	2680	struct sockaddr *nam;
	2681	{
	2682	struct netcred *np;
	2683	struct radix_node_head *rnh;
	2684	struct sockaddr *saddr;
	2685
	2686	np = NULL;
	2687	if (mp->mnt_flag & MNT_EXPORTED) {
	2688	/*
	2689	* Lookup in the export list first.
	2690	*/
	2691	if (nam != NULL) {
	2692	saddr = nam;
	2693	rnh = nep->ne_rtable[saddr->sa_family];
	2694	if (rnh != NULL) {
	2695	np = (struct netcred *)
	2696	(*rnh->rnh_matchaddr)((caddr_t)saddr,
	2697	rnh);
	2698	if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
	2699	np = NULL;
	2700	}
	2701	}
	2702	/*
	2703	* If no address match, use the default if it exists.
	2704	*/
	2705	if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
	2706	np = &nep->ne_defexported;
	2707	}
	2708	return (np);
	2709	}
	2710
	2711	/*
	2712	* perform msync on all vnodes under a mount point
	2713	* the mount point must be locked.
	2714	*/
	2715	void
	2716	vfs_msync(struct mount *mp, int flags)
	2717	{
	2718	struct thread td = curthread; / XXX */
	2719	struct vnode vp, nvp;
	2720	struct vm_object *obj;
	2721	int tries;
	2722
	2723	tries = 5;
	2724	lwkt_gettoken(&mntvnode_token);
	2725	loop:
	2726	for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) {
	2727	if (vp->v_mount != mp) {
	2728	if (--tries > 0)
	2729	goto loop;
	2730	break;
	2731	}
	2732	nvp = TAILQ_NEXT(vp, v_nmntvnodes);
	2733
	2734	if (vp->v_flag & VXLOCK) /* XXX: what if MNT_WAIT? */
	2735	continue;
	2736
	2737	/*
	2738	* There could be hundreds of thousands of vnodes, we cannot
	2739	* afford to do anything heavy-weight until we have a fairly
	2740	* good indication that there is something to do.
	2741	*/
	2742	if ((vp->v_flag & VOBJDIRTY) &&
	2743	(flags == MNT_WAIT \|\| VOP_ISLOCKED(vp, NULL) == 0)) {
	2744	lwkt_reltoken(&mntvnode_token);
	2745	if (!vget(vp,
	2746	LK_EXCLUSIVE \| LK_RETRY \| LK_NOOBJ, td)) {
	2747	if (VOP_GETVOBJECT(vp, &obj) == 0) {
	2748	vm_object_page_clean(obj, 0, 0, flags == MNT_WAIT ? OBJPC_SYNC : OBJPC_NOSYNC);
	2749	}
	2750	vput(vp);
	2751	}
	2752	lwkt_gettoken(&mntvnode_token);
	2753	if (TAILQ_NEXT(vp, v_nmntvnodes) != nvp) {
	2754	if (--tries > 0)
	2755	goto loop;
	2756	break;
	2757	}
	2758	}
	2759	}
	2760	lwkt_reltoken(&mntvnode_token);
	2761	}
	2762
	2763	/*
	2764	* Create the VM object needed for VMIO and mmap support. This
	2765	* is done for all VREG files in the system. Some filesystems might
	2766	* afford the additional metadata buffering capability of the
	2767	* VMIO code by making the device node be VMIO mode also.
	2768	*
	2769	* vp must be locked when vfs_object_create is called.
	2770	*/
	2771	int
	2772	vfs_object_create(struct vnode vp, struct thread td)
	2773	{
	2774	return (VOP_CREATEVOBJECT(vp, td));
	2775	}
	2776
	2777	void
	2778	vfree(vp)
	2779	struct vnode *vp;
	2780	{
	2781	int s;
	2782
	2783	s = splbio();
	2784	lwkt_gettoken(&vnode_free_list_token);
	2785	KASSERT((vp->v_flag & VFREE) == 0, ("vnode already free"));
	2786	if (vp->v_flag & VAGE) {
	2787	TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
	2788	} else {
	2789	TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
	2790	}
	2791	freevnodes++;
	2792	lwkt_reltoken(&vnode_free_list_token);
	2793	vp->v_flag &= ~VAGE;
	2794	vp->v_flag \|= VFREE;
	2795	splx(s);
	2796	}
	2797
	2798	void
	2799	vbusy(vp)
	2800	struct vnode *vp;
	2801	{
	2802	int s;
	2803
	2804	s = splbio();
	2805	lwkt_gettoken(&vnode_free_list_token);
	2806	KASSERT((vp->v_flag & VFREE) != 0, ("vnode not free"));
	2807	TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
	2808	freevnodes--;
	2809	lwkt_reltoken(&vnode_free_list_token);
	2810	vp->v_flag &= ~(VFREE\|VAGE);
	2811	splx(s);
	2812	}
	2813
	2814	/*
	2815	* Record a process's interest in events which might happen to
	2816	* a vnode. Because poll uses the historic select-style interface
	2817	* internally, this routine serves as both the ``check for any
	2818	* pending events'' and the ``record my interest in future events''
	2819	* functions. (These are done together, while the lock is held,
	2820	* to avoid race conditions.)
	2821	*/
	2822	int
	2823	vn_pollrecord(struct vnode vp, struct thread td, int events)
	2824	{
	2825	lwkt_gettoken(&vp->v_pollinfo.vpi_token);
	2826	if (vp->v_pollinfo.vpi_revents & events) {
	2827	/*
	2828	* This leaves events we are not interested
	2829	* in available for the other process which
	2830	* which presumably had requested them
	2831	* (otherwise they would never have been
	2832	* recorded).
	2833	*/
	2834	events &= vp->v_pollinfo.vpi_revents;
	2835	vp->v_pollinfo.vpi_revents &= ~events;
	2836
	2837	lwkt_reltoken(&vp->v_pollinfo.vpi_token);
	2838	return events;
	2839	}
	2840	vp->v_pollinfo.vpi_events \|= events;
	2841	selrecord(td, &vp->v_pollinfo.vpi_selinfo);
	2842	lwkt_reltoken(&vp->v_pollinfo.vpi_token);
	2843	return 0;
	2844	}
	2845
	2846	/*
	2847	* Note the occurrence of an event. If the VN_POLLEVENT macro is used,
	2848	* it is possible for us to miss an event due to race conditions, but
	2849	* that condition is expected to be rare, so for the moment it is the
	2850	* preferred interface.
	2851	*/
	2852	void
	2853	vn_pollevent(vp, events)
	2854	struct vnode *vp;
	2855	short events;
	2856	{
	2857	lwkt_gettoken(&vp->v_pollinfo.vpi_token);
	2858	if (vp->v_pollinfo.vpi_events & events) {
	2859	/*
	2860	* We clear vpi_events so that we don't
	2861	* call selwakeup() twice if two events are
	2862	* posted before the polling process(es) is
	2863	* awakened. This also ensures that we take at
	2864	* most one selwakeup() if the polling process
	2865	* is no longer interested. However, it does
	2866	* mean that only one event can be noticed at
	2867	* a time. (Perhaps we should only clear those
	2868	* event bits which we note?) XXX
	2869	*/
	2870	vp->v_pollinfo.vpi_events = 0; /* &= ~events ??? */
	2871	vp->v_pollinfo.vpi_revents \|= events;
	2872	selwakeup(&vp->v_pollinfo.vpi_selinfo);
	2873	}
	2874	lwkt_reltoken(&vp->v_pollinfo.vpi_token);
	2875	}
	2876
	2877	/*
	2878	* Wake up anyone polling on vp because it is being revoked.
	2879	* This depends on dead_poll() returning POLLHUP for correct
	2880	* behavior.
	2881	*/
	2882	void
	2883	vn_pollgone(vp)
	2884	struct vnode *vp;
	2885	{
	2886	lwkt_gettoken(&vp->v_pollinfo.vpi_token);
	2887	if (vp->v_pollinfo.vpi_events) {
	2888	vp->v_pollinfo.vpi_events = 0;
	2889	selwakeup(&vp->v_pollinfo.vpi_selinfo);
	2890	}
	2891	lwkt_reltoken(&vp->v_pollinfo.vpi_token);
	2892	}
	2893
	2894
	2895
	2896	/*
	2897	* Routine to create and manage a filesystem syncer vnode.
	2898	*/
	2899	#define sync_close ((int () __P((struct vop_close_args )))nullop)
	2900	static int sync_fsync __P((struct vop_fsync_args *));
	2901	static int sync_inactive __P((struct vop_inactive_args *));
	2902	static int sync_reclaim __P((struct vop_reclaim_args *));
	2903	#define sync_lock ((int () __P((struct vop_lock_args )))vop_nolock)
	2904	#define sync_unlock ((int () __P((struct vop_unlock_args )))vop_nounlock)
	2905	static int sync_print __P((struct vop_print_args *));
	2906	#define sync_islocked ((int() __P((struct vop_islocked_args )))vop_noislocked)
	2907
	2908	static vop_t **sync_vnodeop_p;
	2909	static struct vnodeopv_entry_desc sync_vnodeop_entries[] = {
	2910	{ &vop_default_desc, (vop_t *) vop_eopnotsupp },
	2911	{ &vop_close_desc, (vop_t ) sync_close }, / close */
	2912	{ &vop_fsync_desc, (vop_t ) sync_fsync }, / fsync */
	2913	{ &vop_inactive_desc, (vop_t ) sync_inactive }, / inactive */
	2914	{ &vop_reclaim_desc, (vop_t ) sync_reclaim }, / reclaim */
	2915	{ &vop_lock_desc, (vop_t ) sync_lock }, / lock */
	2916	{ &vop_unlock_desc, (vop_t ) sync_unlock }, / unlock */
	2917	{ &vop_print_desc, (vop_t ) sync_print }, / print */
	2918	{ &vop_islocked_desc, (vop_t ) sync_islocked }, / islocked */
	2919	{ NULL, NULL }
	2920	};
	2921	static struct vnodeopv_desc sync_vnodeop_opv_desc =
	2922	{ &sync_vnodeop_p, sync_vnodeop_entries };
	2923
	2924	VNODEOP_SET(sync_vnodeop_opv_desc);
	2925
	2926	/*
	2927	* Create a new filesystem syncer vnode for the specified mount point.
	2928	*/
	2929	int
	2930	vfs_allocate_syncvnode(mp)
	2931	struct mount *mp;
	2932	{
	2933	struct vnode *vp;
	2934	static long start, incr, next;
	2935	int error;
	2936
	2937	/* Allocate a new vnode */
	2938	if ((error = getnewvnode(VT_VFS, mp, sync_vnodeop_p, &vp)) != 0) {
	2939	mp->mnt_syncer = NULL;
	2940	return (error);
	2941	}
	2942	vp->v_type = VNON;
	2943	/*
	2944	* Place the vnode onto the syncer worklist. We attempt to
	2945	* scatter them about on the list so that they will go off
	2946	* at evenly distributed times even if all the filesystems
	2947	* are mounted at once.
	2948	*/
	2949	next += incr;
	2950	if (next == 0 \|\| next > syncer_maxdelay) {
	2951	start /= 2;
	2952	incr /= 2;
	2953	if (start == 0) {
	2954	start = syncer_maxdelay / 2;
	2955	incr = syncer_maxdelay;
	2956	}
	2957	next = start;
	2958	}
	2959	vn_syncer_add_to_worklist(vp, syncdelay > 0 ? next % syncdelay : 0);
	2960	mp->mnt_syncer = vp;
	2961	return (0);
	2962	}
	2963
	2964	/*
	2965	* Do a lazy sync of the filesystem.
	2966	*/
	2967	static int
	2968	sync_fsync(ap)
	2969	struct vop_fsync_args /* {
	2970	struct vnode *a_vp;
	2971	struct ucred *a_cred;
	2972	int a_waitfor;
	2973	struct thread *a_td;
	2974	} / ap;
	2975	{
	2976	struct vnode *syncvp = ap->a_vp;
	2977	struct mount *mp = syncvp->v_mount;
	2978	struct thread *td = ap->a_td;
	2979	int asyncflag;
	2980
	2981	/*
	2982	* We only need to do something if this is a lazy evaluation.
	2983	*/
	2984	if (ap->a_waitfor != MNT_LAZY)
	2985	return (0);
	2986
	2987	/*
	2988	* Move ourselves to the back of the sync list.
	2989	*/
	2990	vn_syncer_add_to_worklist(syncvp, syncdelay);
	2991
	2992	/*
	2993	* Walk the list of vnodes pushing all that are dirty and
	2994	* not already on the sync list.
	2995	*/
	2996	lwkt_gettoken(&mountlist_token);
	2997	if (vfs_busy(mp, LK_EXCLUSIVE \| LK_NOWAIT, &mountlist_token, td) != 0) {
	2998	lwkt_reltoken(&mountlist_token);
	2999	return (0);
	3000	}
	3001	asyncflag = mp->mnt_flag & MNT_ASYNC;
	3002	mp->mnt_flag &= ~MNT_ASYNC;
	3003	vfs_msync(mp, MNT_NOWAIT);
	3004	VFS_SYNC(mp, MNT_LAZY, td);
	3005	if (asyncflag)
	3006	mp->mnt_flag \|= MNT_ASYNC;
	3007	vfs_unbusy(mp, td);
	3008	return (0);
	3009	}
	3010
	3011	/*
	3012	* The syncer vnode is no referenced.
	3013	*/
	3014	static int
	3015	sync_inactive(ap)
	3016	struct vop_inactive_args /* {
	3017	struct vnode *a_vp;
	3018	struct proc *a_p;
	3019	} / ap;
	3020	{
	3021
	3022	vgone(ap->a_vp);
	3023	return (0);
	3024	}
	3025
	3026	/*
	3027	* The syncer vnode is no longer needed and is being decommissioned.
	3028	*
	3029	* Modifications to the worklist must be protected at splbio().
	3030	*/
	3031	static int
	3032	sync_reclaim(ap)
	3033	struct vop_reclaim_args /* {
	3034	struct vnode *a_vp;
	3035	} / ap;
	3036	{
	3037	struct vnode *vp = ap->a_vp;
	3038	int s;
	3039
	3040	s = splbio();
	3041	vp->v_mount->mnt_syncer = NULL;
	3042	if (vp->v_flag & VONWORKLST) {
	3043	LIST_REMOVE(vp, v_synclist);
	3044	vp->v_flag &= ~VONWORKLST;
	3045	}
	3046	splx(s);
	3047
	3048	return (0);
	3049	}
	3050
	3051	/*
	3052	* Print out a syncer vnode.
	3053	*/
	3054	static int
	3055	sync_print(ap)
	3056	struct vop_print_args /* {
	3057	struct vnode *a_vp;
	3058	} / ap;
	3059	{
	3060	struct vnode *vp = ap->a_vp;
	3061
	3062	printf("syncer vnode");
	3063	if (vp->v_vnlock != NULL)
	3064	lockmgr_printinfo(vp->v_vnlock);
	3065	printf("\n");
	3066	return (0);
	3067	}
	3068
	3069	/*
	3070	* extract the dev_t from a VBLK or VCHR
	3071	*/
	3072	dev_t
	3073	vn_todev(vp)
	3074	struct vnode *vp;
	3075	{
	3076	if (vp->v_type != VBLK && vp->v_type != VCHR)
	3077	return (NODEV);
	3078	return (vp->v_rdev);
	3079	}
	3080
	3081	/*
	3082	* Check if vnode represents a disk device
	3083	*/
	3084	int
	3085	vn_isdisk(vp, errp)
	3086	struct vnode *vp;
	3087	int *errp;
	3088	{
	3089	if (vp->v_type != VBLK && vp->v_type != VCHR) {
	3090	if (errp != NULL)
	3091	*errp = ENOTBLK;
	3092	return (0);
	3093	}
	3094	if (vp->v_rdev == NULL) {
	3095	if (errp != NULL)
	3096	*errp = ENXIO;
	3097	return (0);
	3098	}
	3099	if (!dev_dport(vp->v_rdev)) {
	3100	if (errp != NULL)
	3101	*errp = ENXIO;
	3102	return (0);
	3103	}
	3104	if (!(dev_dflags(vp->v_rdev) & D_DISK)) {
	3105	if (errp != NULL)
	3106	*errp = ENOTBLK;
	3107	return (0);
	3108	}
	3109	if (errp != NULL)
	3110	*errp = 0;
	3111	return (1);
	3112	}
	3113
	3114	void
	3115	NDFREE(ndp, flags)
	3116	struct nameidata *ndp;
	3117	const uint flags;
	3118	{
	3119	if (!(flags & NDF_NO_FREE_PNBUF) &&
	3120	(ndp->ni_cnd.cn_flags & HASBUF)) {
	3121	zfree(namei_zone, ndp->ni_cnd.cn_pnbuf);
	3122	ndp->ni_cnd.cn_flags &= ~HASBUF;
	3123	}
	3124	if (!(flags & NDF_NO_DVP_UNLOCK) &&
	3125	(ndp->ni_cnd.cn_flags & LOCKPARENT) &&
	3126	ndp->ni_dvp != ndp->ni_vp)
	3127	VOP_UNLOCK(ndp->ni_dvp, 0, ndp->ni_cnd.cn_td);
	3128	if (!(flags & NDF_NO_DVP_RELE) &&
	3129	(ndp->ni_cnd.cn_flags & (LOCKPARENT\|WANTPARENT))) {
	3130	vrele(ndp->ni_dvp);
	3131	ndp->ni_dvp = NULL;
	3132	}
	3133	if (!(flags & NDF_NO_VP_UNLOCK) &&
	3134	(ndp->ni_cnd.cn_flags & LOCKLEAF) && ndp->ni_vp)
	3135	VOP_UNLOCK(ndp->ni_vp, 0, ndp->ni_cnd.cn_td);
	3136	if (!(flags & NDF_NO_VP_RELE) &&
	3137	ndp->ni_vp) {
	3138	vrele(ndp->ni_vp);
	3139	ndp->ni_vp = NULL;
	3140	}
	3141	if (!(flags & NDF_NO_STARTDIR_RELE) &&
	3142	(ndp->ni_cnd.cn_flags & SAVESTART)) {
	3143	vrele(ndp->ni_startdir);
	3144	ndp->ni_startdir = NULL;
	3145	}
	3146	}