gitweb.dragonflybsd.org Git - dragonfly.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 1989, 1993
	3	* The Regents of the University of California. All rights reserved.
	4	* (c) UNIX System Laboratories, Inc.
	5	* All or some portions of this file are derived from material licensed
	6	* to the University of California by American Telephone and Telegraph
	7	* Co. or Unix System Laboratories, Inc. and are reproduced herein with
	8	* the permission of UNIX System Laboratories, Inc.
	9	*
	10	* Redistribution and use in source and binary forms, with or without
	11	* modification, are permitted provided that the following conditions
	12	* are met:
	13	* 1. Redistributions of source code must retain the above copyright
	14	* notice, this list of conditions and the following disclaimer.
	15	* 2. Redistributions in binary form must reproduce the above copyright
	16	* notice, this list of conditions and the following disclaimer in the
	17	* documentation and/or other materials provided with the distribution.
	18	* 3. All advertising materials mentioning features or use of this software
	19	* must display the following acknowledgement:
	20	* This product includes software developed by the University of
	21	* California, Berkeley and its contributors.
	22	* 4. Neither the name of the University nor the names of its contributors
	23	* may be used to endorse or promote products derived from this software
	24	* without specific prior written permission.
	25	*
	26	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	27	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	28	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	29	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	30	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	31	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	32	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	33	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	34	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	35	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	36	* SUCH DAMAGE.
	37	*
	38	* @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95
	39	* $FreeBSD: src/sys/kern/vfs_subr.c,v 1.249.2.30 2003/04/04 20:35:57 tegge Exp $
	40	* $DragonFly: src/sys/kern/vfs_subr.c,v 1.63 2005/08/27 20:23:05 joerg Exp $
	41	*/
	42
	43	/*
	44	* External virtual filesystem routines
	45	*/
	46	#include "opt_ddb.h"
	47
	48	#include <sys/param.h>
	49	#include <sys/systm.h>
	50	#include <sys/buf.h>
	51	#include <sys/conf.h>
	52	#include <sys/dirent.h>
	53	#include <sys/domain.h>
	54	#include <sys/eventhandler.h>
	55	#include <sys/fcntl.h>
	56	#include <sys/kernel.h>
	57	#include <sys/kthread.h>
	58	#include <sys/malloc.h>
	59	#include <sys/mbuf.h>
	60	#include <sys/mount.h>
	61	#include <sys/proc.h>
	62	#include <sys/reboot.h>
	63	#include <sys/socket.h>
	64	#include <sys/stat.h>
	65	#include <sys/sysctl.h>
	66	#include <sys/syslog.h>
	67	#include <sys/unistd.h>
	68	#include <sys/vmmeter.h>
	69	#include <sys/vnode.h>
	70
	71	#include <machine/limits.h>
	72
	73	#include <vm/vm.h>
	74	#include <vm/vm_object.h>
	75	#include <vm/vm_extern.h>
	76	#include <vm/vm_kern.h>
	77	#include <vm/pmap.h>
	78	#include <vm/vm_map.h>
	79	#include <vm/vm_page.h>
	80	#include <vm/vm_pager.h>
	81	#include <vm/vnode_pager.h>
	82	#include <vm/vm_zone.h>
	83
	84	#include <sys/buf2.h>
	85	#include <sys/thread2.h>
	86
	87	static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure");
	88
	89	int numvnodes;
	90	SYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, "");
	91	int vfs_fastdev = 1;
	92	SYSCTL_INT(_vfs, OID_AUTO, fastdev, CTLFLAG_RW, &vfs_fastdev, 0, "");
	93
	94	enum vtype iftovt_tab[16] = {
	95	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
	96	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
	97	};
	98	int vttoif_tab[9] = {
	99	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
	100	S_IFSOCK, S_IFIFO, S_IFMT,
	101	};
	102
	103	static int reassignbufcalls;
	104	SYSCTL_INT(_vfs, OID_AUTO, reassignbufcalls, CTLFLAG_RW,
	105	&reassignbufcalls, 0, "");
	106	static int reassignbufloops;
	107	SYSCTL_INT(_vfs, OID_AUTO, reassignbufloops, CTLFLAG_RW,
	108	&reassignbufloops, 0, "");
	109	static int reassignbufsortgood;
	110	SYSCTL_INT(_vfs, OID_AUTO, reassignbufsortgood, CTLFLAG_RW,
	111	&reassignbufsortgood, 0, "");
	112	static int reassignbufsortbad;
	113	SYSCTL_INT(_vfs, OID_AUTO, reassignbufsortbad, CTLFLAG_RW,
	114	&reassignbufsortbad, 0, "");
	115	static int reassignbufmethod = 1;
	116	SYSCTL_INT(_vfs, OID_AUTO, reassignbufmethod, CTLFLAG_RW,
	117	&reassignbufmethod, 0, "");
	118
	119	int nfs_mount_type = -1;
	120	static struct lwkt_token spechash_token;
	121	struct nfs_public nfs_pub; /* publicly exported FS */
	122
	123	int desiredvnodes;
	124	SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW,
	125	&desiredvnodes, 0, "Maximum number of vnodes");
	126
	127	static void vfs_free_addrlist (struct netexport *nep);
	128	static int vfs_free_netcred (struct radix_node rn, void w);
	129	static int vfs_hang_addrlist (struct mount mp, struct netexport nep,
	130	struct export_args *argp);
	131
	132	extern int dev_ref_debug;
	133	extern struct vnodeopv_entry_desc spec_vnodeop_entries[];
	134
	135	/*
	136	* Red black tree functions
	137	*/
	138	static int rb_buf_compare(struct buf b1, struct buf b2);
	139	RB_GENERATE(buf_rb_tree, buf, b_rbnode, rb_buf_compare);
	140
	141	static int
	142	rb_buf_compare(struct buf b1, struct buf b2)
	143	{
	144	if (b1->b_lblkno < b2->b_lblkno)
	145	return(-1);
	146	if (b1->b_lblkno > b2->b_lblkno)
	147	return(1);
	148	return(0);
	149	}
	150
	151	/*
	152	* Return 0 if the vnode is already on the free list or cannot be placed
	153	* on the free list. Return 1 if the vnode can be placed on the free list.
	154	*/
	155	static __inline int
	156	vshouldfree(struct vnode *vp, int usecount)
	157	{
	158	if (vp->v_flag & VFREE)
	159	return (0); /* already free */
	160	if (vp->v_holdcnt != 0 \|\| vp->v_usecount != usecount)
	161	return (0); /* other holderse */
	162	if (vp->v_object &&
	163	(vp->v_object->ref_count \|\| vp->v_object->resident_page_count)) {
	164	return (0);
	165	}
	166	return (1);
	167	}
	168
	169	/*
	170	* Initialize the vnode management data structures.
	171	*
	172	* Called from vfsinit()
	173	*/
	174	void
	175	vfs_subr_init(void)
	176	{
	177	/*
	178	* Desired vnodes is a result of the physical page count
	179	* and the size of kernel's heap. It scales in proportion
	180	* to the amount of available physical memory. This can
	181	* cause trouble on 64-bit and large memory platforms.
	182	*/
	183	/* desiredvnodes = maxproc + vmstats.v_page_count / 4; */
	184	desiredvnodes =
	185	min(maxproc + vmstats.v_page_count /4,
	186	2 * (VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) /
	187	(5 * (sizeof(struct vm_object) + sizeof(struct vnode))));
	188
	189	lwkt_token_init(&spechash_token);
	190	}
	191
	192	/*
	193	* Knob to control the precision of file timestamps:
	194	*
	195	* 0 = seconds only; nanoseconds zeroed.
	196	* 1 = seconds and nanoseconds, accurate within 1/HZ.
	197	* 2 = seconds and nanoseconds, truncated to microseconds.
	198	* >=3 = seconds and nanoseconds, maximum precision.
	199	*/
	200	enum { TSP_SEC, TSP_HZ, TSP_USEC, TSP_NSEC };
	201
	202	static int timestamp_precision = TSP_SEC;
	203	SYSCTL_INT(_vfs, OID_AUTO, timestamp_precision, CTLFLAG_RW,
	204	&timestamp_precision, 0, "");
	205
	206	/*
	207	* Get a current timestamp.
	208	*/
	209	void
	210	vfs_timestamp(struct timespec *tsp)
	211	{
	212	struct timeval tv;
	213
	214	switch (timestamp_precision) {
	215	case TSP_SEC:
	216	tsp->tv_sec = time_second;
	217	tsp->tv_nsec = 0;
	218	break;
	219	case TSP_HZ:
	220	getnanotime(tsp);
	221	break;
	222	case TSP_USEC:
	223	microtime(&tv);
	224	TIMEVAL_TO_TIMESPEC(&tv, tsp);
	225	break;
	226	case TSP_NSEC:
	227	default:
	228	nanotime(tsp);
	229	break;
	230	}
	231	}
	232
	233	/*
	234	* Set vnode attributes to VNOVAL
	235	*/
	236	void
	237	vattr_null(struct vattr *vap)
	238	{
	239	vap->va_type = VNON;
	240	vap->va_size = VNOVAL;
	241	vap->va_bytes = VNOVAL;
	242	vap->va_mode = VNOVAL;
	243	vap->va_nlink = VNOVAL;
	244	vap->va_uid = VNOVAL;
	245	vap->va_gid = VNOVAL;
	246	vap->va_fsid = VNOVAL;
	247	vap->va_fileid = VNOVAL;
	248	vap->va_blocksize = VNOVAL;
	249	vap->va_rdev = VNOVAL;
	250	vap->va_atime.tv_sec = VNOVAL;
	251	vap->va_atime.tv_nsec = VNOVAL;
	252	vap->va_mtime.tv_sec = VNOVAL;
	253	vap->va_mtime.tv_nsec = VNOVAL;
	254	vap->va_ctime.tv_sec = VNOVAL;
	255	vap->va_ctime.tv_nsec = VNOVAL;
	256	vap->va_flags = VNOVAL;
	257	vap->va_gen = VNOVAL;
	258	vap->va_vaflags = 0;
	259	}
	260
	261	/*
	262	* Update outstanding I/O count and do wakeup if requested.
	263	*/
	264	void
	265	vwakeup(struct buf *bp)
	266	{
	267	struct vnode *vp;
	268
	269	if ((vp = bp->b_vp)) {
	270	vp->v_numoutput--;
	271	if (vp->v_numoutput < 0)
	272	panic("vwakeup: neg numoutput");
	273	if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) {
	274	vp->v_flag &= ~VBWAIT;
	275	wakeup((caddr_t) &vp->v_numoutput);
	276	}
	277	}
	278	}
	279
	280	/*
	281	* Flush out and invalidate all buffers associated with a vnode.
	282	*
	283	* vp must be locked.
	284	*/
	285	static int vinvalbuf_bp(struct buf bp, void data);
	286
	287	struct vinvalbuf_bp_info {
	288	struct vnode *vp;
	289	int slptimeo;
	290	int slpflag;
	291	int flags;
	292	};
	293
	294	int
	295	vinvalbuf(struct vnode vp, int flags, struct thread td,
	296	int slpflag, int slptimeo)
	297	{
	298	struct vinvalbuf_bp_info info;
	299	int error;
	300	vm_object_t object;
	301
	302	/*
	303	* If we are being asked to save, call fsync to ensure that the inode
	304	* is updated.
	305	*/
	306	if (flags & V_SAVE) {
	307	crit_enter();
	308	while (vp->v_numoutput) {
	309	vp->v_flag \|= VBWAIT;
	310	error = tsleep((caddr_t)&vp->v_numoutput,
	311	slpflag, "vinvlbuf", slptimeo);
	312	if (error) {
	313	crit_exit();
	314	return (error);
	315	}
	316	}
	317	if (!RB_EMPTY(&vp->v_rbdirty_tree)) {
	318	crit_exit();
	319	if ((error = VOP_FSYNC(vp, MNT_WAIT, td)) != 0)
	320	return (error);
	321	crit_enter();
	322	if (vp->v_numoutput > 0 \|\|
	323	!RB_EMPTY(&vp->v_rbdirty_tree))
	324	panic("vinvalbuf: dirty bufs");
	325	}
	326	crit_exit();
	327	}
	328	crit_enter();
	329	info.slptimeo = slptimeo;
	330	info.slpflag = slpflag;
	331	info.flags = flags;
	332	info.vp = vp;
	333
	334	/*
	335	* Flush the buffer cache until nothing is left.
	336	*/
	337	while (!RB_EMPTY(&vp->v_rbclean_tree) \|\|
	338	!RB_EMPTY(&vp->v_rbdirty_tree)) {
	339	error = RB_SCAN(buf_rb_tree, &vp->v_rbclean_tree, NULL,
	340	vinvalbuf_bp, &info);
	341	if (error == 0) {
	342	error = RB_SCAN(buf_rb_tree, &vp->v_rbdirty_tree, NULL,
	343	vinvalbuf_bp, &info);
	344	}
	345	}
	346
	347	/*
	348	* Wait for I/O to complete. XXX needs cleaning up. The vnode can
	349	* have write I/O in-progress but if there is a VM object then the
	350	* VM object can also have read-I/O in-progress.
	351	*/
	352	do {
	353	while (vp->v_numoutput > 0) {
	354	vp->v_flag \|= VBWAIT;
	355	tsleep(&vp->v_numoutput, 0, "vnvlbv", 0);
	356	}
	357	if (VOP_GETVOBJECT(vp, &object) == 0) {
	358	while (object->paging_in_progress)
	359	vm_object_pip_sleep(object, "vnvlbx");
	360	}
	361	} while (vp->v_numoutput > 0);
	362
	363	crit_exit();
	364
	365	/*
	366	* Destroy the copy in the VM cache, too.
	367	*/
	368	if (VOP_GETVOBJECT(vp, &object) == 0) {
	369	vm_object_page_remove(object, 0, 0,
	370	(flags & V_SAVE) ? TRUE : FALSE);
	371	}
	372
	373	if (!RB_EMPTY(&vp->v_rbdirty_tree) \|\| !RB_EMPTY(&vp->v_rbclean_tree))
	374	panic("vinvalbuf: flush failed");
	375	return (0);
	376	}
	377
	378	static int
	379	vinvalbuf_bp(struct buf bp, void data)
	380	{
	381	struct vinvalbuf_bp_info *info = data;
	382	int error;
	383
	384	if (BUF_LOCK(bp, LK_EXCLUSIVE \| LK_NOWAIT)) {
	385	error = BUF_TIMELOCK(bp,
	386	LK_EXCLUSIVE \| LK_SLEEPFAIL,
	387	"vinvalbuf", info->slpflag, info->slptimeo);
	388	if (error == 0) {
	389	BUF_UNLOCK(bp);
	390	error = ENOLCK;
	391	}
	392	if (error == ENOLCK)
	393	return(0);
	394	return (-error);
	395	}
	396	/*
	397	* XXX Since there are no node locks for NFS, I
	398	* believe there is a slight chance that a delayed
	399	* write will occur while sleeping just above, so
	400	* check for it. Note that vfs_bio_awrite expects
	401	* buffers to reside on a queue, while VOP_BWRITE and
	402	* brelse do not.
	403	*/
	404	if (((bp->b_flags & (B_DELWRI \| B_INVAL)) == B_DELWRI) &&
	405	(info->flags & V_SAVE)) {
	406	if (bp->b_vp == info->vp) {
	407	if (bp->b_flags & B_CLUSTEROK) {
	408	BUF_UNLOCK(bp);
	409	vfs_bio_awrite(bp);
	410	} else {
	411	bremfree(bp);
	412	bp->b_flags \|= B_ASYNC;
	413	VOP_BWRITE(bp->b_vp, bp);
	414	}
	415	} else {
	416	bremfree(bp);
	417	VOP_BWRITE(bp->b_vp, bp);
	418	}
	419	} else {
	420	bremfree(bp);
	421	bp->b_flags \|= (B_INVAL \| B_NOCACHE \| B_RELBUF);
	422	bp->b_flags &= ~B_ASYNC;
	423	brelse(bp);
	424	}
	425	return(0);
	426	}
	427
	428	/*
	429	* Truncate a file's buffer and pages to a specified length. This
	430	* is in lieu of the old vinvalbuf mechanism, which performed unneeded
	431	* sync activity.
	432	*
	433	* The vnode must be locked.
	434	*/
	435	static int vtruncbuf_bp_trunc_cmp(struct buf bp, void data);
	436	static int vtruncbuf_bp_trunc(struct buf bp, void data);
	437	static int vtruncbuf_bp_metasync_cmp(struct buf bp, void data);
	438	static int vtruncbuf_bp_metasync(struct buf bp, void data);
	439
	440	int
	441	vtruncbuf(struct vnode vp, struct thread td, off_t length, int blksize)
	442	{
	443	daddr_t trunclbn;
	444	int count;
	445
	446	/*
	447	* Round up to the next lbn, then destroy the buffers in question.
	448	* Since we are only removing some of the buffers we must rely on the
	449	* scan count to determine whether a loop is necessary.
	450	*/
	451	trunclbn = (length + blksize - 1) / blksize;
	452
	453	crit_enter();
	454	do {
	455	count = RB_SCAN(buf_rb_tree, &vp->v_rbclean_tree,
	456	vtruncbuf_bp_trunc_cmp,
	457	vtruncbuf_bp_trunc, &trunclbn);
	458	count += RB_SCAN(buf_rb_tree, &vp->v_rbdirty_tree,
	459	vtruncbuf_bp_trunc_cmp,
	460	vtruncbuf_bp_trunc, &trunclbn);
	461	} while(count);
	462
	463	/*
	464	* For safety, fsync any remaining metadata if the file is not being
	465	* truncated to 0. Since the metadata does not represent the entire
	466	* dirty list we have to rely on the hit count to ensure that we get
	467	* all of it.
	468	*/
	469	if (length > 0) {
	470	do {
	471	count = RB_SCAN(buf_rb_tree, &vp->v_rbdirty_tree,
	472	vtruncbuf_bp_metasync_cmp,
	473	vtruncbuf_bp_metasync, vp);
	474	} while (count);
	475	}
	476
	477	/*
	478	* Wait for any in-progress I/O to complete before returning (why?)
	479	*/
	480	while (vp->v_numoutput > 0) {
	481	vp->v_flag \|= VBWAIT;
	482	tsleep(&vp->v_numoutput, 0, "vbtrunc", 0);
	483	}
	484
	485	crit_exit();
	486
	487	vnode_pager_setsize(vp, length);
	488
	489	return (0);
	490	}
	491
	492	/*
	493	* The callback buffer is beyond the new file EOF and must be destroyed.
	494	* Note that the compare function must conform to the RB_SCAN's requirements.
	495	*/
	496	static
	497	int
	498	vtruncbuf_bp_trunc_cmp(struct buf bp, void data)
	499	{
	500	if (bp->b_lblkno >= (daddr_t )data)
	501	return(0);
	502	return(-1);
	503	}
	504
	505	static
	506	int
	507	vtruncbuf_bp_trunc(struct buf bp, void data)
	508	{
	509	/*
	510	* Do not try to use a buffer we cannot immediately lock, but sleep
	511	* anyway to prevent a livelock. The code will loop until all buffers
	512	* can be acted upon.
	513	*/
	514	if (BUF_LOCK(bp, LK_EXCLUSIVE \| LK_NOWAIT)) {
	515	if (BUF_LOCK(bp, LK_EXCLUSIVE\|LK_SLEEPFAIL) == 0)
	516	BUF_UNLOCK(bp);
	517	} else {
	518	bremfree(bp);
	519	bp->b_flags \|= (B_INVAL \| B_RELBUF);
	520	bp->b_flags &= ~B_ASYNC;
	521	brelse(bp);
	522	}
	523	return(1);
	524	}
	525
	526	/*
	527	* Fsync all meta-data after truncating a file to be non-zero. Only metadata
	528	* blocks (with a negative lblkno) are scanned.
	529	* Note that the compare function must conform to the RB_SCAN's requirements.
	530	*/
	531	static int
	532	vtruncbuf_bp_metasync_cmp(struct buf bp, void data)
	533	{
	534	if (bp->b_lblkno < 0)
	535	return(0);
	536	return(1);
	537	}
	538
	539	static int
	540	vtruncbuf_bp_metasync(struct buf bp, void data)
	541	{
	542	struct vnode *vp = data;
	543
	544	if (bp->b_flags & B_DELWRI) {
	545	/*
	546	* Do not try to use a buffer we cannot immediately lock,
	547	* but sleep anyway to prevent a livelock. The code will
	548	* loop until all buffers can be acted upon.
	549	*/
	550	if (BUF_LOCK(bp, LK_EXCLUSIVE \| LK_NOWAIT)) {
	551	if (BUF_LOCK(bp, LK_EXCLUSIVE\|LK_SLEEPFAIL) == 0)
	552	BUF_UNLOCK(bp);
	553	} else {
	554	bremfree(bp);
	555	if (bp->b_vp == vp) {
	556	bp->b_flags \|= B_ASYNC;
	557	} else {
	558	bp->b_flags &= ~B_ASYNC;
	559	}
	560	VOP_BWRITE(bp->b_vp, bp);
	561	}
	562	return(1);
	563	} else {
	564	return(0);
	565	}
	566	}
	567
	568	/*
	569	* vfsync - implements a multipass fsync on a file which understands
	570	* dependancies and meta-data. The passed vnode must be locked. The
	571	* waitfor argument may be MNT_WAIT or MNT_NOWAIT, or MNT_LAZY.
	572	*
	573	* When fsyncing data asynchronously just do one consolidated pass starting
	574	* with the most negative block number. This may not get all the data due
	575	* to dependancies.
	576	*
	577	* When fsyncing data synchronously do a data pass, then a metadata pass,
	578	* then do additional data+metadata passes to try to get all the data out.
	579	*/
	580	static int vfsync_wait_output(struct vnode *vp,
	581	int (waitoutput)(struct vnode , struct thread *));
	582	static int vfsync_data_only_cmp(struct buf bp, void data);
	583	static int vfsync_meta_only_cmp(struct buf bp, void data);
	584	static int vfsync_lazy_range_cmp(struct buf bp, void data);
	585	static int vfsync_bp(struct buf bp, void data);
	586
	587	struct vfsync_info {
	588	struct vnode *vp;
	589	int synchronous;
	590	int syncdeps;
	591	int lazycount;
	592	int lazylimit;
	593	daddr_t lbn;
	594	int (checkdef)(struct buf );
	595	};
	596
	597	int
	598	vfsync(struct vnode *vp, int waitfor, int passes, daddr_t lbn,
	599	int (checkdef)(struct buf ),
	600	int (waitoutput)(struct vnode , struct thread *))
	601	{
	602	struct vfsync_info info;
	603	int error;
	604
	605	bzero(&info, sizeof(info));
	606	info.vp = vp;
	607	info.lbn = lbn;
	608	if ((info.checkdef = checkdef) == NULL)
	609	info.syncdeps = 1;
	610
	611	crit_enter();
	612
	613	switch(waitfor) {
	614	case MNT_LAZY:
	615	/*
	616	* Lazy (filesystem syncer typ) Asynchronous plus limit the
	617	* number of data (not meta) pages we try to flush to 1MB.
	618	* A non-zero return means that lazy limit was reached.
	619	*/
	620	info.lazylimit = 1024 * 1024;
	621	info.syncdeps = 1;
	622	error = RB_SCAN(buf_rb_tree, &vp->v_rbdirty_tree,
	623	vfsync_lazy_range_cmp, vfsync_bp, &info);
	624	RB_SCAN(buf_rb_tree, &vp->v_rbdirty_tree,
	625	vfsync_meta_only_cmp, vfsync_bp, &info);
	626	if (error == 0)
	627	vp->v_lazyw = 0;
	628	else if (!RB_EMPTY(&vp->v_rbdirty_tree))
	629	vn_syncer_add_to_worklist(vp, 1);
	630	error = 0;
	631	break;
	632	case MNT_NOWAIT:
	633	/*
	634	* Asynchronous. Do a data-only pass and a meta-only pass.
	635	*/
	636	info.syncdeps = 1;
	637	RB_SCAN(buf_rb_tree, &vp->v_rbdirty_tree, vfsync_data_only_cmp,
	638	vfsync_bp, &info);
	639	RB_SCAN(buf_rb_tree, &vp->v_rbdirty_tree, vfsync_meta_only_cmp,
	640	vfsync_bp, &info);
	641	error = 0;
	642	break;
	643	default:
	644	/*
	645	* Synchronous. Do a data-only pass, then a meta-data+data
	646	* pass, then additional integrated passes to try to get
	647	* all the dependancies flushed.
	648	*/
	649	RB_SCAN(buf_rb_tree, &vp->v_rbdirty_tree, vfsync_data_only_cmp,
	650	vfsync_bp, &info);
	651	error = vfsync_wait_output(vp, waitoutput);
	652	if (error == 0) {
	653	RB_SCAN(buf_rb_tree, &vp->v_rbdirty_tree, NULL,
	654	vfsync_bp, &info);
	655	error = vfsync_wait_output(vp, waitoutput);
	656	}
	657	while (error == 0 && passes > 0 &&
	658	!RB_EMPTY(&vp->v_rbdirty_tree)) {
	659	if (--passes == 0) {
	660	info.synchronous = 1;
	661	info.syncdeps = 1;
	662	}
	663	error = RB_SCAN(buf_rb_tree, &vp->v_rbdirty_tree, NULL,
	664	vfsync_bp, &info);
	665	if (error < 0)
	666	error = -error;
	667	info.syncdeps = 1;
	668	if (error == 0)
	669	error = vfsync_wait_output(vp, waitoutput);
	670	}
	671	break;
	672	}
	673	crit_exit();
	674	return(error);
	675	}
	676
	677	static int
	678	vfsync_wait_output(struct vnode vp, int (waitoutput)(struct vnode , struct thread ))
	679	{
	680	int error = 0;
	681
	682	while (vp->v_numoutput) {
	683	vp->v_flag \|= VBWAIT;
	684	tsleep(&vp->v_numoutput, 0, "fsfsn", 0);
	685	}
	686	if (waitoutput)
	687	error = waitoutput(vp, curthread);
	688	return(error);
	689	}
	690
	691	static int
	692	vfsync_data_only_cmp(struct buf bp, void data)
	693	{
	694	if (bp->b_lblkno < 0)
	695	return(-1);
	696	return(0);
	697	}
	698
	699	static int
	700	vfsync_meta_only_cmp(struct buf bp, void data)
	701	{
	702	if (bp->b_lblkno < 0)
	703	return(0);
	704	return(1);
	705	}
	706
	707	static int
	708	vfsync_lazy_range_cmp(struct buf bp, void data)
	709	{
	710	struct vfsync_info *info = data;
	711	if (bp->b_lblkno < info->vp->v_lazyw)
	712	return(-1);
	713	return(0);
	714	}
	715
	716	static int
	717	vfsync_bp(struct buf bp, void data)
	718	{
	719	struct vfsync_info *info = data;
	720	struct vnode *vp = info->vp;
	721	int error;
	722
	723	/*
	724	* if syncdeps is not set we do not try to write buffers which have
	725	* dependancies.
	726	*/
	727	if (!info->synchronous && info->syncdeps == 0 && info->checkdef(bp))
	728	return(0);
	729
	730	/*
	731	* Ignore buffers that we cannot immediately lock. XXX
	732	*/
	733	if (BUF_LOCK(bp, LK_EXCLUSIVE \| LK_NOWAIT))
	734	return(0);
	735	if ((bp->b_flags & B_DELWRI) == 0)
	736	panic("vfsync_bp: buffer not dirty");
	737	if (vp != bp->b_vp)
	738	panic("vfsync_bp: buffer vp mismatch");
	739
	740	/*
	741	* B_NEEDCOMMIT (primarily used by NFS) is a state where the buffer
	742	* has been written but an additional handshake with the device
	743	* is required before we can dispose of the buffer. We have no idea
	744	* how to do this so we have to skip these buffers.
	745	*/
	746	if (bp->b_flags & B_NEEDCOMMIT) {
	747	BUF_UNLOCK(bp);
	748	return(0);
	749	}
	750
	751	/*
	752	* (LEGACY FROM UFS, REMOVE WHEN POSSIBLE) - invalidate any dirty
	753	* buffers beyond the file EOF.
	754	*/
	755	if (info->lbn != (daddr_t)-1 && vp->v_type == VREG &&
	756	bp->b_lblkno >= info->lbn) {
	757	bremfree(bp);
	758	bp->b_flags \|= B_INVAL \| B_NOCACHE;
	759	crit_exit();
	760	brelse(bp);
	761	crit_enter();
	762	}
	763
	764	if (info->synchronous) {
	765	/*
	766	* Synchronous flushing. An error may be returned.
	767	*/
	768	bremfree(bp);
	769	crit_exit();
	770	error = bwrite(bp);
	771	crit_enter();
	772	} else {
	773	/*
	774	* Asynchronous flushing. A negative return value simply
	775	* stops the scan and is not considered an error. We use
	776	* this to support limited MNT_LAZY flushes.
	777	*/
	778	vp->v_lazyw = bp->b_lblkno;
	779	if ((vp->v_flag & VOBJBUF) && (bp->b_flags & B_CLUSTEROK)) {
	780	BUF_UNLOCK(bp);
	781	info->lazycount += vfs_bio_awrite(bp);
	782	} else {
	783	info->lazycount += bp->b_bufsize;
	784	bremfree(bp);
	785	crit_exit();
	786	bawrite(bp);
	787	crit_enter();
	788	}
	789	if (info->lazylimit && info->lazycount >= info->lazylimit)
	790	error = 1;
	791	else
	792	error = 0;
	793	}
	794	return(-error);
	795	}
	796
	797	/*
	798	* Associate a buffer with a vnode.
	799	*/
	800	void
	801	bgetvp(struct vnode vp, struct buf bp)
	802	{
	803	KASSERT(bp->b_vp == NULL, ("bgetvp: not free"));
	804
	805	vhold(vp);
	806	bp->b_vp = vp;
	807	bp->b_dev = vn_todev(vp);
	808	/*
	809	* Insert onto list for new vnode.
	810	*/
	811	crit_enter();
	812	bp->b_xflags \|= BX_VNCLEAN;
	813	bp->b_xflags &= ~BX_VNDIRTY;
	814	if (buf_rb_tree_RB_INSERT(&vp->v_rbclean_tree, bp))
	815	panic("reassignbuf: dup lblk vp %p bp %p", vp, bp);
	816	crit_exit();
	817	}
	818
	819	/*
	820	* Disassociate a buffer from a vnode.
	821	*/
	822	void
	823	brelvp(struct buf *bp)
	824	{
	825	struct vnode *vp;
	826
	827	KASSERT(bp->b_vp != NULL, ("brelvp: NULL"));
	828
	829	/*
	830	* Delete from old vnode list, if on one.
	831	*/
	832	vp = bp->b_vp;
	833	crit_enter();
	834	if (bp->b_xflags & (BX_VNDIRTY \| BX_VNCLEAN)) {
	835	if (bp->b_xflags & BX_VNDIRTY)
	836	buf_rb_tree_RB_REMOVE(&vp->v_rbdirty_tree, bp);
	837	else
	838	buf_rb_tree_RB_REMOVE(&vp->v_rbclean_tree, bp);
	839	bp->b_xflags &= ~(BX_VNDIRTY \| BX_VNCLEAN);
	840	}
	841	if ((vp->v_flag & VONWORKLST) && RB_EMPTY(&vp->v_rbdirty_tree)) {
	842	vp->v_flag &= ~VONWORKLST;
	843	LIST_REMOVE(vp, v_synclist);
	844	}
	845	crit_exit();
	846	bp->b_vp = NULL;
	847	vdrop(vp);
	848	}
	849
	850	/*
	851	* Associate a p-buffer with a vnode.
	852	*
	853	* Also sets B_PAGING flag to indicate that vnode is not fully associated
	854	* with the buffer. i.e. the bp has not been linked into the vnode or
	855	* ref-counted.
	856	*/
	857	void
	858	pbgetvp(struct vnode vp, struct buf bp)
	859	{
	860	KASSERT(bp->b_vp == NULL, ("pbgetvp: not free"));
	861
	862	bp->b_vp = vp;
	863	bp->b_flags \|= B_PAGING;
	864	bp->b_dev = vn_todev(vp);
	865	}
	866
	867	/*
	868	* Disassociate a p-buffer from a vnode.
	869	*/
	870	void
	871	pbrelvp(struct buf *bp)
	872	{
	873	KASSERT(bp->b_vp != NULL, ("pbrelvp: NULL"));
	874
	875	bp->b_vp = NULL;
	876	bp->b_flags &= ~B_PAGING;
	877	}
	878
	879	void
	880	pbreassignbuf(struct buf bp, struct vnode newvp)
	881	{
	882	if ((bp->b_flags & B_PAGING) == 0) {
	883	panic(
	884	"pbreassignbuf() on non phys bp %p",
	885	bp
	886	);
	887	}
	888	bp->b_vp = newvp;
	889	}
	890
	891	/*
	892	* Reassign a buffer from one vnode to another.
	893	* Used to assign file specific control information
	894	* (indirect blocks) to the vnode to which they belong.
	895	*/
	896	void
	897	reassignbuf(struct buf bp, struct vnode newvp)
	898	{
	899	int delay;
	900
	901	if (newvp == NULL) {
	902	printf("reassignbuf: NULL");
	903	return;
	904	}
	905	++reassignbufcalls;
	906
	907	/*
	908	* B_PAGING flagged buffers cannot be reassigned because their vp
	909	* is not fully linked in.
	910	*/
	911	if (bp->b_flags & B_PAGING)
	912	panic("cannot reassign paging buffer");
	913
	914	crit_enter();
	915	/*
	916	* Delete from old vnode list, if on one.
	917	*/
	918	if (bp->b_xflags & (BX_VNDIRTY \| BX_VNCLEAN)) {
	919	if (bp->b_xflags & BX_VNDIRTY)
	920	buf_rb_tree_RB_REMOVE(&bp->b_vp->v_rbdirty_tree, bp);
	921	else
	922	buf_rb_tree_RB_REMOVE(&bp->b_vp->v_rbclean_tree, bp);
	923	bp->b_xflags &= ~(BX_VNDIRTY \| BX_VNCLEAN);
	924	if (bp->b_vp != newvp) {
	925	vdrop(bp->b_vp);
	926	bp->b_vp = NULL; /* for clarification */
	927	}
	928	}
	929	/*
	930	* If dirty, put on list of dirty buffers; otherwise insert onto list
	931	* of clean buffers.
	932	*/
	933	if (bp->b_flags & B_DELWRI) {
	934	if ((newvp->v_flag & VONWORKLST) == 0) {
	935	switch (newvp->v_type) {
	936	case VDIR:
	937	delay = dirdelay;
	938	break;
	939	case VCHR:
	940	case VBLK:
	941	if (newvp->v_rdev &&
	942	newvp->v_rdev->si_mountpoint != NULL) {
	943	delay = metadelay;
	944	break;
	945	}
	946	/* fall through */
	947	default:
	948	delay = filedelay;
	949	}
	950	vn_syncer_add_to_worklist(newvp, delay);
	951	}
	952	bp->b_xflags \|= BX_VNDIRTY;
	953	if (buf_rb_tree_RB_INSERT(&newvp->v_rbdirty_tree, bp))
	954	panic("reassignbuf: dup lblk vp %p bp %p", newvp, bp);
	955	} else {
	956	bp->b_xflags \|= BX_VNCLEAN;
	957	if (buf_rb_tree_RB_INSERT(&newvp->v_rbclean_tree, bp))
	958	panic("reassignbuf: dup lblk vp %p bp %p", newvp, bp);
	959	if ((newvp->v_flag & VONWORKLST) &&
	960	RB_EMPTY(&newvp->v_rbdirty_tree)) {
	961	newvp->v_flag &= ~VONWORKLST;
	962	LIST_REMOVE(newvp, v_synclist);
	963	}
	964	}
	965	if (bp->b_vp != newvp) {
	966	bp->b_vp = newvp;
	967	vhold(bp->b_vp);
	968	}
	969	crit_exit();
	970	}
	971
	972	/*
	973	* Create a vnode for a block device.
	974	* Used for mounting the root file system.
	975	*/
	976	int
	977	bdevvp(dev_t dev, struct vnode **vpp)
	978	{
	979	struct vnode *vp;
	980	struct vnode *nvp;
	981	int error;
	982
	983	if (dev == NODEV) {
	984	*vpp = NULLVP;
	985	return (ENXIO);
	986	}
	987	error = getspecialvnode(VT_NON, NULL, &spec_vnode_vops, &nvp, 0, 0);
	988	if (error) {
	989	*vpp = NULLVP;
	990	return (error);
	991	}
	992	vp = nvp;
	993	vp->v_type = VCHR;
	994	vp->v_udev = dev->si_udev;
	995	vx_unlock(vp);
	996	*vpp = vp;
	997	return (0);
	998	}
	999
	1000	int
	1001	v_associate_rdev(struct vnode *vp, dev_t dev)
	1002	{
	1003	lwkt_tokref ilock;
	1004
	1005	if (dev == NULL \|\| dev == NODEV)
	1006	return(ENXIO);
	1007	if (dev_is_good(dev) == 0)
	1008	return(ENXIO);
	1009	KKASSERT(vp->v_rdev == NULL);
	1010	if (dev_ref_debug)
	1011	printf("Z1");
	1012	vp->v_rdev = reference_dev(dev);
	1013	lwkt_gettoken(&ilock, &spechash_token);
	1014	SLIST_INSERT_HEAD(&dev->si_hlist, vp, v_specnext);
	1015	lwkt_reltoken(&ilock);
	1016	return(0);
	1017	}
	1018
	1019	void
	1020	v_release_rdev(struct vnode *vp)
	1021	{
	1022	lwkt_tokref ilock;
	1023	dev_t dev;
	1024
	1025	if ((dev = vp->v_rdev) != NULL) {
	1026	lwkt_gettoken(&ilock, &spechash_token);
	1027	SLIST_REMOVE(&dev->si_hlist, vp, vnode, v_specnext);
	1028	if (dev_ref_debug && vp->v_opencount != 0) {
	1029	printf("releasing rdev with non-0 "
	1030	"v_opencount(%d) (revoked?)\n",
	1031	vp->v_opencount);
	1032	}
	1033	vp->v_rdev = NULL;
	1034	vp->v_opencount = 0;
	1035	release_dev(dev);
	1036	lwkt_reltoken(&ilock);
	1037	}
	1038	}
	1039
	1040	/*
	1041	* Add a vnode to the alias list hung off the dev_t. We only associate
	1042	* the device number with the vnode. The actual device is not associated
	1043	* until the vnode is opened (usually in spec_open()), and will be
	1044	* disassociated on last close.
	1045	*/
	1046	void
	1047	addaliasu(struct vnode *nvp, udev_t nvp_udev)
	1048	{
	1049	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
	1050	panic("addaliasu on non-special vnode");
	1051	nvp->v_udev = nvp_udev;
	1052	}
	1053
	1054	/*
	1055	* Disassociate a vnode from its underlying filesystem.
	1056	*
	1057	* The vnode must be VX locked and refd
	1058	*
	1059	* If there are v_usecount references to the vnode other then ours we have
	1060	* to VOP_CLOSE the vnode before we can deactivate and reclaim it.
	1061	*/
	1062	void
	1063	vclean(struct vnode vp, int flags, struct thread td)
	1064	{
	1065	int active;
	1066
	1067	/*
	1068	* If the vnode has already been reclaimed we have nothing to do.
	1069	*/
	1070	if (vp->v_flag & VRECLAIMED)
	1071	return;
	1072	vp->v_flag \|= VRECLAIMED;
	1073
	1074	/*
	1075	* Scrap the vfs cache
	1076	*/
	1077	while (cache_inval_vp(vp, 0) != 0) {
	1078	printf("Warning: vnode %p clean/cache_resolution race detected\n", vp);
	1079	tsleep(vp, 0, "vclninv", 2);
	1080	}
	1081
	1082	/*
	1083	* Check to see if the vnode is in use. If so we have to reference it
	1084	* before we clean it out so that its count cannot fall to zero and
	1085	* generate a race against ourselves to recycle it.
	1086	*/
	1087	active = (vp->v_usecount > 1);
	1088
	1089	/*
	1090	* Clean out any buffers associated with the vnode and destroy its
	1091	* object, if it has one.
	1092	*/
	1093	vinvalbuf(vp, V_SAVE, td, 0, 0);
	1094	VOP_DESTROYVOBJECT(vp);
	1095
	1096	/*
	1097	* If purging an active vnode, it must be closed and
	1098	* deactivated before being reclaimed. XXX
	1099	*
	1100	* Note that neither of these routines unlocks the vnode.
	1101	*/
	1102	if (active) {
	1103	if (flags & DOCLOSE)
	1104	VOP_CLOSE(vp, FNONBLOCK, td);
	1105	}
	1106
	1107	/*
	1108	* If the vnode has not be deactivated, deactivated it.
	1109	*/
	1110	if ((vp->v_flag & VINACTIVE) == 0) {
	1111	vp->v_flag \|= VINACTIVE;
	1112	VOP_INACTIVE(vp, td);
	1113	}
	1114
	1115	/*
	1116	* Reclaim the vnode.
	1117	*/
	1118	if (VOP_RECLAIM(vp, td))
	1119	panic("vclean: cannot reclaim");
	1120
	1121	/*
	1122	* Done with purge, notify sleepers of the grim news.
	1123	*/
	1124	vp->v_ops = &dead_vnode_vops;
	1125	vn_pollgone(vp);
	1126	vp->v_tag = VT_NON;
	1127	}
	1128
	1129	/*
	1130	* Eliminate all activity associated with the requested vnode
	1131	* and with all vnodes aliased to the requested vnode.
	1132	*
	1133	* The vnode must be referenced and vx_lock()'d
	1134	*
	1135	* revoke { struct vnode *a_vp, int a_flags }
	1136	*/
	1137	int
	1138	vop_stdrevoke(struct vop_revoke_args *ap)
	1139	{
	1140	struct vnode vp, vq;
	1141	lwkt_tokref ilock;
	1142	dev_t dev;
	1143
	1144	KASSERT((ap->a_flags & REVOKEALL) != 0, ("vop_revoke"));
	1145
	1146	vp = ap->a_vp;
	1147
	1148	/*
	1149	* If the vnode is already dead don't try to revoke it
	1150	*/
	1151	if (vp->v_flag & VRECLAIMED)
	1152	return (0);
	1153
	1154	/*
	1155	* If the vnode has a device association, scrap all vnodes associated
	1156	* with the device. Don't let the device disappear on us while we
	1157	* are scrapping the vnodes.
	1158	*
	1159	* The passed vp will probably show up in the list, do not VX lock
	1160	* it twice!
	1161	*/
	1162	if (vp->v_type != VCHR && vp->v_type != VBLK)
	1163	return(0);
	1164	if ((dev = vp->v_rdev) == NULL) {
	1165	if ((dev = udev2dev(vp->v_udev, vp->v_type == VBLK)) == NODEV)
	1166	return(0);
	1167	}
	1168	reference_dev(dev);
	1169	lwkt_gettoken(&ilock, &spechash_token);
	1170	while ((vq = SLIST_FIRST(&dev->si_hlist)) != NULL) {
	1171	if (vp == vq \|\| vx_get(vq) == 0) {
	1172	if (vq == SLIST_FIRST(&dev->si_hlist))
	1173	vgone(vq);
	1174	if (vp != vq)
	1175	vx_put(vq);
	1176	}
	1177	}
	1178	lwkt_reltoken(&ilock);
	1179	release_dev(dev);
	1180	return (0);
	1181	}
	1182
	1183	/*
	1184	* Recycle an unused vnode to the front of the free list.
	1185	*
	1186	* Returns 1 if we were successfully able to recycle the vnode,
	1187	* 0 otherwise.
	1188	*/
	1189	int
	1190	vrecycle(struct vnode vp, struct thread td)
	1191	{
	1192	if (vp->v_usecount == 1) {
	1193	vgone(vp);
	1194	return (1);
	1195	}
	1196	return (0);
	1197	}
	1198
	1199	/*
	1200	* Eliminate all activity associated with a vnode in preparation for reuse.
	1201	*
	1202	* The vnode must be VX locked and refd and will remain VX locked and refd
	1203	* on return. This routine may be called with the vnode in any state, as
	1204	* long as it is VX locked. The vnode will be cleaned out and marked
	1205	* VRECLAIMED but will not actually be reused until all existing refs and
	1206	* holds go away.
	1207	*
	1208	* NOTE: This routine may be called on a vnode which has not yet been
	1209	* already been deactivated (VOP_INACTIVE), or on a vnode which has
	1210	* already been reclaimed.
	1211	*
	1212	* This routine is not responsible for placing us back on the freelist.
	1213	* Instead, it happens automatically when the caller releases the VX lock
	1214	* (assuming there aren't any other references).
	1215	*/
	1216	void
	1217	vgone(struct vnode *vp)
	1218	{
	1219	/*
	1220	* assert that the VX lock is held. This is an absolute requirement
	1221	* now for vgone() to be called.
	1222	*/
	1223	KKASSERT(vp->v_lock.lk_exclusivecount == 1);
	1224
	1225	/*
	1226	* Clean out the filesystem specific data and set the VRECLAIMED
	1227	* bit. Also deactivate the vnode if necessary.
	1228	*/
	1229	vclean(vp, DOCLOSE, curthread);
	1230
	1231	/*
	1232	* Delete from old mount point vnode list, if on one.
	1233	*/
	1234	if (vp->v_mount != NULL)
	1235	insmntque(vp, NULL);
	1236
	1237	/*
	1238	* If special device, remove it from special device alias list
	1239	* if it is on one. This should normally only occur if a vnode is
	1240	* being revoked as the device should otherwise have been released
	1241	* naturally.
	1242	*/
	1243	if ((vp->v_type == VBLK \|\| vp->v_type == VCHR) && vp->v_rdev != NULL) {
	1244	v_release_rdev(vp);
	1245	}
	1246
	1247	/*
	1248	* Set us to VBAD
	1249	*/
	1250	vp->v_type = VBAD;
	1251	}
	1252
	1253	/*
	1254	* Lookup a vnode by device number.
	1255	*/
	1256	int
	1257	vfinddev(dev_t dev, enum vtype type, struct vnode **vpp)
	1258	{
	1259	lwkt_tokref ilock;
	1260	struct vnode *vp;
	1261
	1262	lwkt_gettoken(&ilock, &spechash_token);
	1263	SLIST_FOREACH(vp, &dev->si_hlist, v_specnext) {
	1264	if (type == vp->v_type) {
	1265	*vpp = vp;
	1266	lwkt_reltoken(&ilock);
	1267	return (1);
	1268	}
	1269	}
	1270	lwkt_reltoken(&ilock);
	1271	return (0);
	1272	}
	1273
	1274	/*
	1275	* Calculate the total number of references to a special device. This
	1276	* routine may only be called for VBLK and VCHR vnodes since v_rdev is
	1277	* an overloaded field. Since udev2dev can now return NODEV, we have
	1278	* to check for a NULL v_rdev.
	1279	*/
	1280	int
	1281	count_dev(dev_t dev)
	1282	{
	1283	lwkt_tokref ilock;
	1284	struct vnode *vp;
	1285	int count = 0;
	1286
	1287	if (SLIST_FIRST(&dev->si_hlist)) {
	1288	lwkt_gettoken(&ilock, &spechash_token);
	1289	SLIST_FOREACH(vp, &dev->si_hlist, v_specnext) {
	1290	count += vp->v_usecount;
	1291	}
	1292	lwkt_reltoken(&ilock);
	1293	}
	1294	return(count);
	1295	}
	1296
	1297	int
	1298	count_udev(udev_t udev)
	1299	{
	1300	dev_t dev;
	1301
	1302	if ((dev = udev2dev(udev, 0)) == NODEV)
	1303	return(0);
	1304	return(count_dev(dev));
	1305	}
	1306
	1307	int
	1308	vcount(struct vnode *vp)
	1309	{
	1310	if (vp->v_rdev == NULL)
	1311	return(0);
	1312	return(count_dev(vp->v_rdev));
	1313	}
	1314
	1315	/*
	1316	* Print out a description of a vnode.
	1317	*/
	1318	static char *typename[] =
	1319	{"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"};
	1320
	1321	void
	1322	vprint(char label, struct vnode vp)
	1323	{
	1324	char buf[96];
	1325
	1326	if (label != NULL)
	1327	printf("%s: %p: ", label, (void *)vp);
	1328	else
	1329	printf("%p: ", (void *)vp);
	1330	printf("type %s, usecount %d, writecount %d, refcount %d,",
	1331	typename[vp->v_type], vp->v_usecount, vp->v_writecount,
	1332	vp->v_holdcnt);
	1333	buf[0] = '\0';
	1334	if (vp->v_flag & VROOT)
	1335	strcat(buf, "\|VROOT");
	1336	if (vp->v_flag & VTEXT)
	1337	strcat(buf, "\|VTEXT");
	1338	if (vp->v_flag & VSYSTEM)
	1339	strcat(buf, "\|VSYSTEM");
	1340	if (vp->v_flag & VBWAIT)
	1341	strcat(buf, "\|VBWAIT");
	1342	if (vp->v_flag & VFREE)
	1343	strcat(buf, "\|VFREE");
	1344	if (vp->v_flag & VOBJBUF)
	1345	strcat(buf, "\|VOBJBUF");
	1346	if (buf[0] != '\0')
	1347	printf(" flags (%s)", &buf[1]);
	1348	if (vp->v_data == NULL) {
	1349	printf("\n");
	1350	} else {
	1351	printf("\n\t");
	1352	VOP_PRINT(vp);
	1353	}
	1354	}
	1355
	1356	#ifdef DDB
	1357	#include <ddb/ddb.h>
	1358
	1359	static int db_show_locked_vnodes(struct mount mp, void data);
	1360
	1361	/*
	1362	* List all of the locked vnodes in the system.
	1363	* Called when debugging the kernel.
	1364	*/
	1365	DB_SHOW_COMMAND(lockedvnodes, lockedvnodes)
	1366	{
	1367	printf("Locked vnodes\n");
	1368	mountlist_scan(db_show_locked_vnodes, NULL,
	1369	MNTSCAN_FORWARD\|MNTSCAN_NOBUSY);
	1370	}
	1371
	1372	static int
	1373	db_show_locked_vnodes(struct mount mp, void data __unused)
	1374	{
	1375	struct vnode *vp;
	1376
	1377	TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) {
	1378	if (VOP_ISLOCKED(vp, NULL))
	1379	vprint((char *)0, vp);
	1380	}
	1381	return(0);
	1382	}
	1383	#endif
	1384
	1385	/*
	1386	* Top level filesystem related information gathering.
	1387	*/
	1388	static int sysctl_ovfs_conf (SYSCTL_HANDLER_ARGS);
	1389
	1390	static int
	1391	vfs_sysctl(SYSCTL_HANDLER_ARGS)
	1392	{
	1393	int name = (int )arg1 - 1; /* XXX */
	1394	u_int namelen = arg2 + 1; /* XXX */
	1395	struct vfsconf *vfsp;
	1396
	1397	#if 1 \|\| defined(COMPAT_PRELITE2)
	1398	/* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */
	1399	if (namelen == 1)
	1400	return (sysctl_ovfs_conf(oidp, arg1, arg2, req));
	1401	#endif
	1402
	1403	#ifdef notyet
	1404	/* all sysctl names at this level are at least name and field */
	1405	if (namelen < 2)
	1406	return (ENOTDIR); /* overloaded */
	1407	if (name[0] != VFS_GENERIC) {
	1408	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
	1409	if (vfsp->vfc_typenum == name[0])
	1410	break;
	1411	if (vfsp == NULL)
	1412	return (EOPNOTSUPP);
	1413	return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
	1414	oldp, oldlenp, newp, newlen, p));
	1415	}
	1416	#endif
	1417	switch (name[1]) {
	1418	case VFS_MAXTYPENUM:
	1419	if (namelen != 2)
	1420	return (ENOTDIR);
	1421	return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int)));
	1422	case VFS_CONF:
	1423	if (namelen != 3)
	1424	return (ENOTDIR); /* overloaded */
	1425	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
	1426	if (vfsp->vfc_typenum == name[2])
	1427	break;
	1428	if (vfsp == NULL)
	1429	return (EOPNOTSUPP);
	1430	return (SYSCTL_OUT(req, vfsp, sizeof *vfsp));
	1431	}
	1432	return (EOPNOTSUPP);
	1433	}
	1434
	1435	SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD, vfs_sysctl,
	1436	"Generic filesystem");
	1437
	1438	#if 1 \|\| defined(COMPAT_PRELITE2)
	1439
	1440	static int
	1441	sysctl_ovfs_conf(SYSCTL_HANDLER_ARGS)
	1442	{
	1443	int error;
	1444	struct vfsconf *vfsp;
	1445	struct ovfsconf ovfs;
	1446
	1447	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
	1448	bzero(&ovfs, sizeof(ovfs));
	1449	ovfs.vfc_vfsops = vfsp->vfc_vfsops; /* XXX used as flag */
	1450	strcpy(ovfs.vfc_name, vfsp->vfc_name);
	1451	ovfs.vfc_index = vfsp->vfc_typenum;
	1452	ovfs.vfc_refcount = vfsp->vfc_refcount;
	1453	ovfs.vfc_flags = vfsp->vfc_flags;
	1454	error = SYSCTL_OUT(req, &ovfs, sizeof ovfs);
	1455	if (error)
	1456	return error;
	1457	}
	1458	return 0;
	1459	}
	1460
	1461	#endif /* 1 \|\| COMPAT_PRELITE2 */
	1462
	1463	/*
	1464	* Check to see if a filesystem is mounted on a block device.
	1465	*/
	1466	int
	1467	vfs_mountedon(struct vnode *vp)
	1468	{
	1469	dev_t dev;
	1470
	1471	if ((dev = vp->v_rdev) == NULL)
	1472	dev = udev2dev(vp->v_udev, (vp->v_type == VBLK));
	1473	if (dev != NODEV && dev->si_mountpoint)
	1474	return (EBUSY);
	1475	return (0);
	1476	}
	1477
	1478	/*
	1479	* Unmount all filesystems. The list is traversed in reverse order
	1480	* of mounting to avoid dependencies.
	1481	*/
	1482
	1483	static int vfs_umountall_callback(struct mount mp, void data);
	1484
	1485	void
	1486	vfs_unmountall(void)
	1487	{
	1488	struct thread *td = curthread;
	1489	int count;
	1490
	1491	if (td->td_proc == NULL)
	1492	td = initproc->p_thread; /* XXX XXX use proc0 instead? */
	1493
	1494	do {
	1495	count = mountlist_scan(vfs_umountall_callback,
	1496	&td, MNTSCAN_REVERSE\|MNTSCAN_NOBUSY);
	1497	} while (count);
	1498	}
	1499
	1500	static
	1501	int
	1502	vfs_umountall_callback(struct mount mp, void data)
	1503	{
	1504	struct thread td = (struct thread **)data;
	1505	int error;
	1506
	1507	error = dounmount(mp, MNT_FORCE, td);
	1508	if (error) {
	1509	mountlist_remove(mp);
	1510	printf("unmount of filesystem mounted from %s failed (",
	1511	mp->mnt_stat.f_mntfromname);
	1512	if (error == EBUSY)
	1513	printf("BUSY)\n");
	1514	else
	1515	printf("%d)\n", error);
	1516	}
	1517	return(1);
	1518	}
	1519
	1520	/*
	1521	* Build hash lists of net addresses and hang them off the mount point.
	1522	* Called by ufs_mount() to set up the lists of export addresses.
	1523	*/
	1524	static int
	1525	vfs_hang_addrlist(struct mount mp, struct netexport nep,
	1526	struct export_args *argp)
	1527	{
	1528	struct netcred *np;
	1529	struct radix_node_head *rnh;
	1530	int i;
	1531	struct radix_node *rn;
	1532	struct sockaddr saddr, smask = 0;
	1533	struct domain *dom;
	1534	int error;
	1535
	1536	if (argp->ex_addrlen == 0) {
	1537	if (mp->mnt_flag & MNT_DEFEXPORTED)
	1538	return (EPERM);
	1539	np = &nep->ne_defexported;
	1540	np->netc_exflags = argp->ex_flags;
	1541	np->netc_anon = argp->ex_anon;
	1542	np->netc_anon.cr_ref = 1;
	1543	mp->mnt_flag \|= MNT_DEFEXPORTED;
	1544	return (0);
	1545	}
	1546
	1547	if (argp->ex_addrlen < 0 \|\| argp->ex_addrlen > MLEN)
	1548	return (EINVAL);
	1549	if (argp->ex_masklen < 0 \|\| argp->ex_masklen > MLEN)
	1550	return (EINVAL);
	1551
	1552	i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
	1553	np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK);
	1554	bzero((caddr_t) np, i);
	1555	saddr = (struct sockaddr *) (np + 1);
	1556	if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen)))
	1557	goto out;
	1558	if (saddr->sa_len > argp->ex_addrlen)
	1559	saddr->sa_len = argp->ex_addrlen;
	1560	if (argp->ex_masklen) {
	1561	smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
	1562	error = copyin(argp->ex_mask, (caddr_t)smask, argp->ex_masklen);
	1563	if (error)
	1564	goto out;
	1565	if (smask->sa_len > argp->ex_masklen)
	1566	smask->sa_len = argp->ex_masklen;
	1567	}
	1568	i = saddr->sa_family;
	1569	if ((rnh = nep->ne_rtable[i]) == 0) {
	1570	/*
	1571	* Seems silly to initialize every AF when most are not used,
	1572	* do so on demand here
	1573	*/
	1574	SLIST_FOREACH(dom, &domains, dom_next)
	1575	if (dom->dom_family == i && dom->dom_rtattach) {
	1576	dom->dom_rtattach((void **) &nep->ne_rtable[i],
	1577	dom->dom_rtoffset);
	1578	break;
	1579	}
	1580	if ((rnh = nep->ne_rtable[i]) == 0) {
	1581	error = ENOBUFS;
	1582	goto out;
	1583	}
	1584	}
	1585	rn = (rnh->rnh_addaddr) ((char ) saddr, (char *) smask, rnh,
	1586	np->netc_rnodes);
	1587	if (rn == 0 \|\| np != (struct netcred ) rn) { / already exists */
	1588	error = EPERM;
	1589	goto out;
	1590	}
	1591	np->netc_exflags = argp->ex_flags;
	1592	np->netc_anon = argp->ex_anon;
	1593	np->netc_anon.cr_ref = 1;
	1594	return (0);
	1595	out:
	1596	free(np, M_NETADDR);
	1597	return (error);
	1598	}
	1599
	1600	/* ARGSUSED */
	1601	static int
	1602	vfs_free_netcred(struct radix_node rn, void w)
	1603	{
	1604	struct radix_node_head rnh = (struct radix_node_head ) w;
	1605
	1606	(*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh);
	1607	free((caddr_t) rn, M_NETADDR);
	1608	return (0);
	1609	}
	1610
	1611	/*
	1612	* Free the net address hash lists that are hanging off the mount points.
	1613	*/
	1614	static void
	1615	vfs_free_addrlist(struct netexport *nep)
	1616	{
	1617	int i;
	1618	struct radix_node_head *rnh;
	1619
	1620	for (i = 0; i <= AF_MAX; i++)
	1621	if ((rnh = nep->ne_rtable[i])) {
	1622	(*rnh->rnh_walktree) (rnh, vfs_free_netcred,
	1623	(caddr_t) rnh);
	1624	free((caddr_t) rnh, M_RTABLE);
	1625	nep->ne_rtable[i] = 0;
	1626	}
	1627	}
	1628
	1629	int
	1630	vfs_export(struct mount mp, struct netexport nep, struct export_args *argp)
	1631	{
	1632	int error;
	1633
	1634	if (argp->ex_flags & MNT_DELEXPORT) {
	1635	if (mp->mnt_flag & MNT_EXPUBLIC) {
	1636	vfs_setpublicfs(NULL, NULL, NULL);
	1637	mp->mnt_flag &= ~MNT_EXPUBLIC;
	1638	}
	1639	vfs_free_addrlist(nep);
	1640	mp->mnt_flag &= ~(MNT_EXPORTED \| MNT_DEFEXPORTED);
	1641	}
	1642	if (argp->ex_flags & MNT_EXPORTED) {
	1643	if (argp->ex_flags & MNT_EXPUBLIC) {
	1644	if ((error = vfs_setpublicfs(mp, nep, argp)) != 0)
	1645	return (error);
	1646	mp->mnt_flag \|= MNT_EXPUBLIC;
	1647	}
	1648	if ((error = vfs_hang_addrlist(mp, nep, argp)))
	1649	return (error);
	1650	mp->mnt_flag \|= MNT_EXPORTED;
	1651	}
	1652	return (0);
	1653	}
	1654
	1655
	1656	/*
	1657	* Set the publicly exported filesystem (WebNFS). Currently, only
	1658	* one public filesystem is possible in the spec (RFC 2054 and 2055)
	1659	*/
	1660	int
	1661	vfs_setpublicfs(struct mount mp, struct netexport nep,
	1662	struct export_args *argp)
	1663	{
	1664	int error;
	1665	struct vnode *rvp;
	1666	char *cp;
	1667
	1668	/*
	1669	* mp == NULL -> invalidate the current info, the FS is
	1670	* no longer exported. May be called from either vfs_export
	1671	* or unmount, so check if it hasn't already been done.
	1672	*/
	1673	if (mp == NULL) {
	1674	if (nfs_pub.np_valid) {
	1675	nfs_pub.np_valid = 0;
	1676	if (nfs_pub.np_index != NULL) {
	1677	FREE(nfs_pub.np_index, M_TEMP);
	1678	nfs_pub.np_index = NULL;
	1679	}
	1680	}
	1681	return (0);
	1682	}
	1683
	1684	/*
	1685	* Only one allowed at a time.
	1686	*/
	1687	if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount)
	1688	return (EBUSY);
	1689
	1690	/*
	1691	* Get real filehandle for root of exported FS.
	1692	*/
	1693	bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle));
	1694	nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid;
	1695
	1696	if ((error = VFS_ROOT(mp, &rvp)))
	1697	return (error);
	1698
	1699	if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid)))
	1700	return (error);
	1701
	1702	vput(rvp);
	1703
	1704	/*
	1705	* If an indexfile was specified, pull it in.
	1706	*/
	1707	if (argp->ex_indexfile != NULL) {
	1708	int namelen;
	1709
	1710	error = vn_get_namelen(rvp, &namelen);
	1711	if (error)
	1712	return (error);
	1713	MALLOC(nfs_pub.np_index, char *, namelen, M_TEMP,
	1714	M_WAITOK);
	1715	error = copyinstr(argp->ex_indexfile, nfs_pub.np_index,
	1716	namelen, (size_t *)0);
	1717	if (!error) {
	1718	/*
	1719	* Check for illegal filenames.
	1720	*/
	1721	for (cp = nfs_pub.np_index; *cp; cp++) {
	1722	if (*cp == '/') {
	1723	error = EINVAL;
	1724	break;
	1725	}
	1726	}
	1727	}
	1728	if (error) {
	1729	FREE(nfs_pub.np_index, M_TEMP);
	1730	return (error);
	1731	}
	1732	}
	1733
	1734	nfs_pub.np_mount = mp;
	1735	nfs_pub.np_valid = 1;
	1736	return (0);
	1737	}
	1738
	1739	struct netcred *
	1740	vfs_export_lookup(struct mount mp, struct netexport nep,
	1741	struct sockaddr *nam)
	1742	{
	1743	struct netcred *np;
	1744	struct radix_node_head *rnh;
	1745	struct sockaddr *saddr;
	1746
	1747	np = NULL;
	1748	if (mp->mnt_flag & MNT_EXPORTED) {
	1749	/*
	1750	* Lookup in the export list first.
	1751	*/
	1752	if (nam != NULL) {
	1753	saddr = nam;
	1754	rnh = nep->ne_rtable[saddr->sa_family];
	1755	if (rnh != NULL) {
	1756	np = (struct netcred *)
	1757	(rnh->rnh_matchaddr)((char )saddr,
	1758	rnh);
	1759	if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
	1760	np = NULL;
	1761	}
	1762	}
	1763	/*
	1764	* If no address match, use the default if it exists.
	1765	*/
	1766	if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
	1767	np = &nep->ne_defexported;
	1768	}
	1769	return (np);
	1770	}
	1771
	1772	/*
	1773	* perform msync on all vnodes under a mount point. The mount point must
	1774	* be locked. This code is also responsible for lazy-freeing unreferenced
	1775	* vnodes whos VM objects no longer contain pages.
	1776	*
	1777	* NOTE: MNT_WAIT still skips vnodes in the VXLOCK state.
	1778	*/
	1779	static int vfs_msync_scan1(struct mount mp, struct vnode vp, void *data);
	1780	static int vfs_msync_scan2(struct mount mp, struct vnode vp, void *data);
	1781
	1782	void
	1783	vfs_msync(struct mount *mp, int flags)
	1784	{
	1785	vmntvnodescan(mp, VMSC_REFVP, vfs_msync_scan1, vfs_msync_scan2,
	1786	(void *)flags);
	1787	}
	1788
	1789	/*
	1790	* scan1 is a fast pre-check. There could be hundreds of thousands of
	1791	* vnodes, we cannot afford to do anything heavy weight until we have a
	1792	* fairly good indication that there is work to do.
	1793	*/
	1794	static
	1795	int
	1796	vfs_msync_scan1(struct mount mp, struct vnode vp, void *data)
	1797	{
	1798	int flags = (int)data;
	1799
	1800	if ((vp->v_flag & VRECLAIMED) == 0) {
	1801	if (vshouldfree(vp, 0))
	1802	return(0); /* call scan2 */
	1803	if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
	1804	(vp->v_flag & VOBJDIRTY) &&
	1805	(flags == MNT_WAIT \|\| VOP_ISLOCKED(vp, NULL) == 0)) {
	1806	return(0); /* call scan2 */
	1807	}
	1808	}
	1809
	1810	/*
	1811	* do not call scan2, continue the loop
	1812	*/
	1813	return(-1);
	1814	}
	1815
	1816	static
	1817	int
	1818	vfs_msync_scan2(struct mount mp, struct vnode vp, void *data)
	1819	{
	1820	vm_object_t obj;
	1821	int flags = (int)data;
	1822
	1823	if (vp->v_flag & VRECLAIMED)
	1824	return(0);
	1825
	1826	if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
	1827	(vp->v_flag & VOBJDIRTY) &&
	1828	(flags == MNT_WAIT \|\| VOP_ISLOCKED(vp, NULL) == 0)) {
	1829	if (VOP_GETVOBJECT(vp, &obj) == 0) {
	1830	vm_object_page_clean(obj, 0, 0,
	1831	flags == MNT_WAIT ? OBJPC_SYNC : OBJPC_NOSYNC);
	1832	}
	1833	}
	1834	return(0);
	1835	}
	1836
	1837	/*
	1838	* Create the VM object needed for VMIO and mmap support. This
	1839	* is done for all VREG files in the system. Some filesystems might
	1840	* afford the additional metadata buffering capability of the
	1841	* VMIO code by making the device node be VMIO mode also.
	1842	*
	1843	* vp must be locked when vfs_object_create is called.
	1844	*/
	1845	int
	1846	vfs_object_create(struct vnode vp, struct thread td)
	1847	{
	1848	return (VOP_CREATEVOBJECT(vp, td));
	1849	}
	1850
	1851	/*
	1852	* Record a process's interest in events which might happen to
	1853	* a vnode. Because poll uses the historic select-style interface
	1854	* internally, this routine serves as both the ``check for any
	1855	* pending events'' and the ``record my interest in future events''
	1856	* functions. (These are done together, while the lock is held,
	1857	* to avoid race conditions.)
	1858	*/
	1859	int
	1860	vn_pollrecord(struct vnode vp, struct thread td, int events)
	1861	{
	1862	lwkt_tokref ilock;
	1863
	1864	lwkt_gettoken(&ilock, &vp->v_pollinfo.vpi_token);
	1865	if (vp->v_pollinfo.vpi_revents & events) {
	1866	/*
	1867	* This leaves events we are not interested
	1868	* in available for the other process which
	1869	* which presumably had requested them
	1870	* (otherwise they would never have been
	1871	* recorded).
	1872	*/
	1873	events &= vp->v_pollinfo.vpi_revents;
	1874	vp->v_pollinfo.vpi_revents &= ~events;
	1875
	1876	lwkt_reltoken(&ilock);
	1877	return events;
	1878	}
	1879	vp->v_pollinfo.vpi_events \|= events;
	1880	selrecord(td, &vp->v_pollinfo.vpi_selinfo);
	1881	lwkt_reltoken(&ilock);
	1882	return 0;
	1883	}
	1884
	1885	/*
	1886	* Note the occurrence of an event. If the VN_POLLEVENT macro is used,
	1887	* it is possible for us to miss an event due to race conditions, but
	1888	* that condition is expected to be rare, so for the moment it is the
	1889	* preferred interface.
	1890	*/
	1891	void
	1892	vn_pollevent(struct vnode *vp, int events)
	1893	{
	1894	lwkt_tokref ilock;
	1895
	1896	lwkt_gettoken(&ilock, &vp->v_pollinfo.vpi_token);
	1897	if (vp->v_pollinfo.vpi_events & events) {
	1898	/*
	1899	* We clear vpi_events so that we don't
	1900	* call selwakeup() twice if two events are
	1901	* posted before the polling process(es) is
	1902	* awakened. This also ensures that we take at
	1903	* most one selwakeup() if the polling process
	1904	* is no longer interested. However, it does
	1905	* mean that only one event can be noticed at
	1906	* a time. (Perhaps we should only clear those
	1907	* event bits which we note?) XXX
	1908	*/
	1909	vp->v_pollinfo.vpi_events = 0; /* &= ~events ??? */
	1910	vp->v_pollinfo.vpi_revents \|= events;
	1911	selwakeup(&vp->v_pollinfo.vpi_selinfo);
	1912	}
	1913	lwkt_reltoken(&ilock);
	1914	}
	1915
	1916	/*
	1917	* Wake up anyone polling on vp because it is being revoked.
	1918	* This depends on dead_poll() returning POLLHUP for correct
	1919	* behavior.
	1920	*/
	1921	void
	1922	vn_pollgone(struct vnode *vp)
	1923	{
	1924	lwkt_tokref ilock;
	1925
	1926	lwkt_gettoken(&ilock, &vp->v_pollinfo.vpi_token);
	1927	if (vp->v_pollinfo.vpi_events) {
	1928	vp->v_pollinfo.vpi_events = 0;
	1929	selwakeup(&vp->v_pollinfo.vpi_selinfo);
	1930	}
	1931	lwkt_reltoken(&ilock);
	1932	}
	1933
	1934	/*
	1935	* extract the dev_t from a VBLK or VCHR. The vnode must have been opened
	1936	* (or v_rdev might be NULL).
	1937	*/
	1938	dev_t
	1939	vn_todev(struct vnode *vp)
	1940	{
	1941	if (vp->v_type != VBLK && vp->v_type != VCHR)
	1942	return (NODEV);
	1943	KKASSERT(vp->v_rdev != NULL);
	1944	return (vp->v_rdev);
	1945	}
	1946
	1947	/*
	1948	* Check if vnode represents a disk device. The vnode does not need to be
	1949	* opened.
	1950	*/
	1951	int
	1952	vn_isdisk(struct vnode vp, int errp)
	1953	{
	1954	dev_t dev;
	1955
	1956	if (vp->v_type != VBLK && vp->v_type != VCHR) {
	1957	if (errp != NULL)
	1958	*errp = ENOTBLK;
	1959	return (0);
	1960	}
	1961
	1962	if ((dev = vp->v_rdev) == NULL)
	1963	dev = udev2dev(vp->v_udev, (vp->v_type == VBLK));
	1964	if (dev == NULL \|\| dev == NODEV) {
	1965	if (errp != NULL)
	1966	*errp = ENXIO;
	1967	return (0);
	1968	}
	1969	if (dev_is_good(dev) == 0) {
	1970	if (errp != NULL)
	1971	*errp = ENXIO;
	1972	return (0);
	1973	}
	1974	if ((dev_dflags(dev) & D_DISK) == 0) {
	1975	if (errp != NULL)
	1976	*errp = ENOTBLK;
	1977	return (0);
	1978	}
	1979	if (errp != NULL)
	1980	*errp = 0;
	1981	return (1);
	1982	}
	1983
	1984	#ifdef DEBUG_VFS_LOCKS
	1985
	1986	void
	1987	assert_vop_locked(struct vnode vp, const char str)
	1988	{
	1989	if (vp && IS_LOCKING_VFS(vp) && !VOP_ISLOCKED(vp, NULL)) {
	1990	panic("%s: %p is not locked shared but should be", str, vp);
	1991	}
	1992	}
	1993
	1994	void
	1995	assert_vop_unlocked(struct vnode vp, const char str)
	1996	{
	1997	if (vp && IS_LOCKING_VFS(vp)) {
	1998	if (VOP_ISLOCKED(vp, curthread) == LK_EXCLUSIVE) {
	1999	panic("%s: %p is locked but should not be", str, vp);
	2000	}
	2001	}
	2002	}
	2003
	2004	#endif
	2005
	2006	int
	2007	vn_get_namelen(struct vnode vp, int namelen)
	2008	{
	2009	int error, retval[2];
	2010
	2011	error = VOP_PATHCONF(vp, _PC_NAME_MAX, retval);
	2012	if (error)
	2013	return (error);
	2014	namelen = retval;
	2015	return (0);
	2016	}
	2017
	2018	int
	2019	vop_write_dirent(int error, struct uio uio, ino_t d_ino, uint8_t d_type,
	2020	uint16_t d_namlen, const char *d_name)
	2021	{
	2022	struct dirent *dp;
	2023	size_t len;
	2024
	2025	len = _DIRENT_RECLEN(d_namlen);
	2026	if (len > uio->uio_resid)
	2027	return(1);
	2028
	2029	dp = malloc(len, M_TEMP, M_WAITOK \| M_ZERO);
	2030
	2031	dp->d_ino = d_ino;
	2032	dp->d_namlen = d_namlen;
	2033	dp->d_type = d_type;
	2034	bcopy(d_name, dp->d_name, d_namlen);
	2035
	2036	*error = uiomove((caddr_t)dp, len, uio);
	2037
	2038	free(dp, M_TEMP);
	2039
	2040	return(0);
	2041	}