gitweb.dragonflybsd.org Git - dragonfly.git/blame

Commit	Line	Data
984263bc MD	1	/*
	2	* Copyright (c) 1982, 1986, 1989, 1993
	3	* The Regents of the University of California. All rights reserved.
	4	* (c) UNIX System Laboratories, Inc.
	5	* All or some portions of this file are derived from material licensed
	6	* to the University of California by American Telephone and Telegraph
	7	* Co. or Unix System Laboratories, Inc. and are reproduced herein with
	8	* the permission of UNIX System Laboratories, Inc.
	9	*
	10	* Redistribution and use in source and binary forms, with or without
	11	* modification, are permitted provided that the following conditions
	12	* are met:
	13	* 1. Redistributions of source code must retain the above copyright
	14	* notice, this list of conditions and the following disclaimer.
	15	* 2. Redistributions in binary form must reproduce the above copyright
	16	* notice, this list of conditions and the following disclaimer in the
	17	* documentation and/or other materials provided with the distribution.
dc71b7ab	18	* 3. Neither the name of the University nor the names of its contributors
984263bc MD	19	* may be used to endorse or promote products derived from this software
	20	* without specific prior written permission.
	21	*
	22	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	23	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	24	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	25	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	26	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	27	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	28	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	29	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	30	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	31	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	32	* SUCH DAMAGE.
	33	*
	34	* @(#)vfs_vnops.c 8.2 (Berkeley) 1/21/94
	35	* $FreeBSD: src/sys/kern/vfs_vnops.c,v 1.87.2.13 2002/12/29 18:19:53 dillon Exp $
	36	*/
	37
	38	#include <sys/param.h>
	39	#include <sys/systm.h>
13dd34d8	40	#include <sys/uio.h>
984263bc MD	41	#include <sys/fcntl.h>
	42	#include <sys/file.h>
	43	#include <sys/stat.h>
	44	#include <sys/proc.h>
2b3f93ea	45	#include <sys/caps.h>
984263bc	46	#include <sys/mount.h>
fad57d0e	47	#include <sys/nlookup.h>
984263bc MD	48	#include <sys/vnode.h>
	49	#include <sys/buf.h>
	50	#include <sys/filio.h>
	51	#include <sys/ttycom.h>
	52	#include <sys/conf.h>
c0885fab	53	#include <sys/sysctl.h>
984263bc MD	54	#include <sys/syslog.h>
984263bc MD	55
684a93c4	56	#include <sys/mplock2.h>
c0885fab	57
87de5057 MD	58	static int vn_closefile (struct file *fp);
87de5057 MD	59	static int vn_ioctl (struct file *fp, u_long com, caddr_t data,
87baaf0c	60	struct ucred cred, struct sysmsg msg);
0a80a445	61	static int vn_read (struct file fp, struct uio uio,
87de5057	62	struct ucred *cred, int flags);
402ed7e1	63	static int vn_kqfilter (struct file fp, struct knote kn);
87de5057	64	static int vn_statfile (struct file fp, struct stat sb, struct ucred *cred);
0a80a445	65	static int vn_write (struct file fp, struct uio uio,
87de5057	66	struct ucred *cred, int flags);
984263bc	67
fad57d0e	68	struct fileops vnode_fileops = {
b2d248cb MD	69	.fo_read = vn_read,
	70	.fo_write = vn_write,
	71	.fo_ioctl = vn_ioctl,
b2d248cb MD	72	.fo_kqfilter = vn_kqfilter,
	73	.fo_stat = vn_statfile,
	74	.fo_close = vn_closefile,
	75	.fo_shutdown = nofo_shutdown
984263bc MD	76	};
	77
	78	/*
fad57d0e MD	79	* Common code for vnode open operations. Check permissions, and call
	80	* the VOP_NOPEN or VOP_NCREATE routine.
	81	*
	82	* The caller is responsible for setting up nd with nlookup_init() and
	83	* for cleaning it up with nlookup_done(), whether we return an error
	84	* or not.
	85	*
	86	* On success nd->nl_open_vp will hold a referenced and, if requested,
	87	* locked vnode. A locked vnode is requested via NLC_LOCKVP. If fp
	88	* is non-NULL the vnode will be installed in the file pointer.
	89	*
12cdc371 MD	90	* NOTE: If the caller wishes the namecache entry to be operated with
	91	* a shared lock it must use NLC_SHAREDLOCK. If NLC_LOCKVP is set
	92	* then the vnode lock will also be shared.
	93	*
fad57d0e	94	* NOTE: The vnode is referenced just once on return whether or not it
12cdc371	95	* is also installed in the file pointer.
984263bc MD	96	*/
984263bc MD	97	int
5bd45597	98	vn_open(struct nlookupdata nd, struct file *fpp, int fmode, int cmode)
984263bc	99	{
5bd45597	100	struct file fp = fpp ? fpp : NULL;
1fd87d54	101	struct vnode *vp;
fad57d0e	102	struct ucred *cred = nd->nl_cred;
984263bc MD	103	struct vattr vat;
984263bc MD	104	struct vattr *vap = &vat;
3a907475	105	int error;
221494c8	106	int vpexcl;
e9b56058	107	u_int flags;
18cd8808 FT	108	uint64_t osize;
18cd8808 FT	109	struct mount *mp;
984263bc	110
d7c75c7a MD	111	/*
	112	* Certain combinations are illegal
	113	*/
	114	if ((fmode & (FWRITE \| O_TRUNC)) == O_TRUNC)
	115	return(EACCES);
	116
fad57d0e MD	117	/*
fad57d0e MD	118	* Lookup the path and create or obtain the vnode. After a
28623bf9	119	* successful lookup a locked nd->nl_nch will be returned.
fad57d0e MD	120	*
	121	* The result of this section should be a locked vnode.
	122	*
	123	* XXX with only a little work we should be able to avoid locking
	124	* the vnode if FWRITE, O_CREAT, and O_TRUNC are not set.
	125	*/
3a907475 MD	126	nd->nl_flags \|= NLC_OPEN;
	127	if (fmode & O_APPEND)
	128	nd->nl_flags \|= NLC_APPEND;
	129	if (fmode & O_TRUNC)
	130	nd->nl_flags \|= NLC_TRUNCATE;
	131	if (fmode & FREAD)
	132	nd->nl_flags \|= NLC_READ;
	133	if (fmode & FWRITE)
	134	nd->nl_flags \|= NLC_WRITE;
d7c75c7a MD	135	if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0)
d7c75c7a MD	136	nd->nl_flags \|= NLC_FOLLOW;
3a907475	137
984263bc	138	if (fmode & O_CREAT) {
fad57d0e MD	139	/*
	140	* CONDITIONAL CREATE FILE CASE
	141	*
	142	* Setting NLC_CREATE causes a negative hit to store
	143	* the negative hit ncp and not return an error. Then
0a80a445	144	* nc_error or nc_vp may be checked to see if the ncp
fad57d0e MD	145	* represents a negative hit. NLC_CREATE also requires
	146	* write permission on the governing directory or EPERM
	147	* is returned.
aaf02314	148	*
9f86c598 DF	149	* If the file exists but is missing write permission,
	150	* nlookup() returns EACCES. This has to be handled specially
	151	* when combined with O_EXCL.
fad57d0e	152	*/
fad57d0e	153	nd->nl_flags \|= NLC_CREATE;
5312fa43	154	nd->nl_flags \|= NLC_REFDVP;
c4df9635	155	bwillinode(1);
fad57d0e	156	error = nlookup(nd);
9f86c598	157	if (error == EACCES && nd->nl_nch.ncp->nc_vp != NULL &&
038b596b MD	158	(fmode & O_EXCL) && !nd->nl_dir_error)
038b596b MD	159	{
9f86c598	160	error = EEXIST;
aaf02314 MD	161	}
	162
	163	/*
	164	* If no error and nd->nl_dvp is NULL, the nlookup represents
	165	* a mount-point or cross-mount situation. e.g.
	166	* open("/var/cache", O_CREAT), where /var/cache is a
	167	* mount point or a null-mount point.
	168	*/
	169	if (error == 0 && nd->nl_dvp == NULL)
	170	error = EINVAL;
806dcf9a MD	171	} else {
	172	/*
	173	* NORMAL OPEN FILE CASE
	174	*/
	175	error = nlookup(nd);
	176	}
fad57d0e	177
806dcf9a MD	178	if (error)
806dcf9a MD	179	return (error);
fad57d0e	180
806dcf9a MD	181	/*
	182	* split case to allow us to re-resolve and retry the ncp in case
	183	* we get ESTALE.
d0e99d5d MD	184	*
d0e99d5d MD	185	* (error is 0 on entry / retry)
806dcf9a MD	186	*/
806dcf9a MD	187	again:
d0e99d5d MD	188	/*
	189	* Checks for (likely) filesystem-modifying cases and allows
	190	* the filesystem to stall the front-end.
	191	*/
	192	if ((fmode & (FWRITE \| O_TRUNC)) \|\|
	193	((fmode & O_CREAT) && nd->nl_nch.ncp->nc_vp == NULL)) {
	194	error = ncp_writechk(&nd->nl_nch);
	195	if (error)
	196	return error;
	197	}
	198
221494c8	199	vpexcl = 1;
806dcf9a	200	if (fmode & O_CREAT) {
28623bf9	201	if (nd->nl_nch.ncp->nc_vp == NULL) {
984263bc MD	202	VATTR_NULL(vap);
	203	vap->va_type = VREG;
	204	vap->va_mode = cmode;
5812c3cc	205	vap->va_fuseflags = fmode; /* FUSE */
984263bc MD	206	if (fmode & O_EXCL)
984263bc MD	207	vap->va_vaflags \|= VA_EXCLUSIVE;
5312fa43	208	error = VOP_NCREATE(&nd->nl_nch, nd->nl_dvp, &vp,
dff430ab	209	nd->nl_cred, vap);
fad57d0e	210	if (error)
984263bc	211	return (error);
984263bc	212	fmode &= ~O_TRUNC;
fad57d0e	213	/* locked vnode is returned */
984263bc	214	} else {
984263bc MD	215	if (fmode & O_EXCL) {
984263bc MD	216	error = EEXIST;
fad57d0e	217	} else {
0a80a445	218	error = cache_vget(&nd->nl_nch, cred,
fad57d0e	219	LK_EXCLUSIVE, &vp);
984263bc	220	}
fad57d0e MD	221	if (error)
fad57d0e MD	222	return (error);
984263bc MD	223	fmode &= ~O_CREAT;
	224	}
	225	} else {
221494c8 MD	226	/*
	227	* In most other cases a shared lock on the vnode is
	228	* sufficient. However, the O_RDWR case needs an
	229	* exclusive lock if the vnode is executable. The
	230	* NLC_EXCLLOCK_IFEXEC and NCF_NOTX flags help resolve
	231	* this.
	232	*
	233	* NOTE: If NCF_NOTX is not set, we do not know the
	234	* the state of the 'x' bits and have to get
	235	* an exclusive lock for the EXCLLOCK_IFEXEC case.
	236	*/
	237	if ((nd->nl_flags & NLC_SHAREDLOCK) &&
	238	((nd->nl_flags & NLC_EXCLLOCK_IFEXEC) == 0 \|\|
	239	nd->nl_nch.ncp->nc_flag & NCF_NOTX)) {
12cdc371	240	error = cache_vget(&nd->nl_nch, cred, LK_SHARED, &vp);
221494c8	241	vpexcl = 0;
12cdc371 MD	242	} else {
	243	error = cache_vget(&nd->nl_nch, cred,
	244	LK_EXCLUSIVE, &vp);
	245	}
984263bc MD	246	if (error)
984263bc MD	247	return (error);
984263bc	248	}
fad57d0e MD	249
fad57d0e MD	250	/*
806dcf9a	251	* We have a locked vnode and ncp now. Note that the ncp will
28623bf9	252	* be cleaned up by the caller if nd->nl_nch is left intact.
fad57d0e	253	*/
984263bc MD	254	if (vp->v_type == VLNK) {
	255	error = EMLINK;
	256	goto bad;
	257	}
	258	if (vp->v_type == VSOCK) {
	259	error = EOPNOTSUPP;
	260	goto bad;
	261	}
28d748b9 AH	262	if (vp->v_type != VDIR && (fmode & O_DIRECTORY)) {
	263	error = ENOTDIR;
	264	goto bad;
	265	}
984263bc	266	if ((fmode & O_CREAT) == 0) {
984263bc MD	267	if (fmode & (FWRITE \| O_TRUNC)) {
	268	if (vp->v_type == VDIR) {
	269	error = EISDIR;
	270	goto bad;
	271	}
d0e99d5d MD	272
	273	/*
	274	* Additional checks on vnode (does not substitute
	275	* for ncp_writechk()).
	276	*/
	277	error = vn_writechk(vp);
806dcf9a MD	278	if (error) {
	279	/*
	280	* Special stale handling, re-resolve the
	281	* vnode.
	282	*/
	283	if (error == ESTALE) {
417b1086 MD	284	u_int dummy_gen = 0;
417b1086 MD	285
806dcf9a MD	286	vput(vp);
806dcf9a MD	287	vp = NULL;
221494c8	288	if (vpexcl == 0) {
12cdc371 MD	289	cache_unlock(&nd->nl_nch);
	290	cache_lock(&nd->nl_nch);
	291	}
28623bf9	292	cache_setunresolved(&nd->nl_nch);
12cdc371	293	error = cache_resolve(&nd->nl_nch,
417b1086	294	&dummy_gen,
12cdc371	295	cred);
806dcf9a MD	296	if (error == 0)
	297	goto again;
	298	}
984263bc	299	goto bad;
806dcf9a	300	}
984263bc MD	301	}
	302	}
	303	if (fmode & O_TRUNC) {
a11aaa81	304	vn_unlock(vp); /* XXX */
ca466bae	305	vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY); /* XXX */
18cd8808	306	osize = vp->v_filesize;
984263bc MD	307	VATTR_NULL(vap);
984263bc MD	308	vap->va_size = 0;
2dfa19fa	309	error = VOP_SETATTR_FP(vp, vap, cred, fp);
984263bc MD	310	if (error)
984263bc MD	311	goto bad;
18cd8808 FT	312	error = VOP_GETATTR(vp, vap);
	313	if (error)
	314	goto bad;
	315	mp = vq_vptomp(vp);
	316	VFS_ACCOUNT(mp, vap->va_uid, vap->va_gid, -osize);
984263bc	317	}
fad57d0e	318
e9b56058 MD	319	/*
	320	* Set or clear VNSWAPCACHE on the vp based on nd->nl_nch.ncp->nc_flag.
	321	* These particular bits a tracked all the way from the root.
	322	*
	323	* NOTE: Might not work properly on NFS servers due to the
	324	* disconnected namecache.
	325	*/
	326	flags = nd->nl_nch.ncp->nc_flag;
	327	if ((flags & (NCF_UF_CACHE \| NCF_UF_PCACHE)) &&
	328	(flags & (NCF_SF_NOCACHE \| NCF_SF_PNOCACHE)) == 0) {
	329	vsetflags(vp, VSWAPCACHE);
	330	} else {
	331	vclrflags(vp, VSWAPCACHE);
	332	}
	333
fad57d0e MD	334	/*
fad57d0e MD	335	* Setup the fp so VOP_OPEN can override it. No descriptor has been
0a80a445	336	* associated with the fp yet so we own it clean.
72310cfb	337	*
28623bf9	338	* f_nchandle inherits nl_nch. This used to be necessary only for
72310cfb MD	339	* directories but now we do it unconditionally so f*() ops
	340	* such as fchmod() can access the actual namespace that was
	341	* used to open the file.
fad57d0e MD	342	*/
fad57d0e MD	343	if (fp) {
3a907475 MD	344	if (nd->nl_flags & NLC_APPENDONLY)
3a907475 MD	345	fmode \|= FAPPENDONLY;
28623bf9 MD	346	fp->f_nchandle = nd->nl_nch;
	347	cache_zero(&nd->nl_nch);
	348	cache_unlock(&fp->f_nchandle);
fad57d0e MD	349	}
	350
	351	/*
28623bf9	352	* Get rid of nl_nch. vn_open does not return it (it returns the
5bd45597 MD	353	* vnode or the file pointer).
	354	*
	355	* NOTE: We can't leave nl_nch locked through the VOP_OPEN anyway
	356	* since the VOP_OPEN may block, e.g. on /dev/ttyd0
	357	*
	358	* NOTE: The VOP_OPEN() can replace the *fpp we supply with its own
	359	* (it will fdrop/fhold), and can also set the *fpp up however
	360	* it wants, not necessarily using DTYPE_VNODE.
fad57d0e	361	*/
28623bf9 MD	362	if (nd->nl_nch.ncp)
28623bf9 MD	363	cache_put(&nd->nl_nch);
fad57d0e	364
5bd45597 MD	365	error = VOP_OPEN(vp, fmode, cred, fpp);
	366	fp = fpp ? *fpp : NULL;
	367
fad57d0e MD	368	if (error) {
	369	/*
	370	* setting f_ops to &badfileops will prevent the descriptor
	371	* code from trying to close and release the vnode, since
	372	* the open failed we do not want to call close.
	373	*/
675eb4c0 MD	374	if (fp) {
	375	fp->f_data = NULL;
	376	fp->f_ops = &badfileops;
	377	}
984263bc	378	goto bad;
fad57d0e	379	}
fad57d0e	380
7540ab49	381	#if 0
984263bc	382	/*
7540ab49	383	* Assert that VREG files have been setup for vmio.
984263bc	384	*/
7540ab49 MD	385	KASSERT(vp->v_type != VREG \|\| vp->v_object != NULL,
	386	("vn_open: regular file was not VMIO enabled!"));
	387	#endif
984263bc	388
fad57d0e MD	389	/*
fad57d0e MD	390	* Return the vnode. XXX needs some cleaning up. The vnode is
8ddc6004	391	* only returned in the fp == NULL case.
5bd45597 MD	392	*
5bd45597 MD	393	* NOTE: vnode stored in fp may be different
fad57d0e MD	394	*/
	395	if (fp == NULL) {
	396	nd->nl_open_vp = vp;
	397	nd->nl_vp_fmode = fmode;
	398	if ((nd->nl_flags & NLC_LOCKVP) == 0)
a11aaa81	399	vn_unlock(vp);
fad57d0e	400	} else {
8ddc6004	401	vput(vp);
fad57d0e	402	}
984263bc MD	403	return (0);
984263bc MD	404	bad:
bb5c9c00 MD	405	if (vp)
bb5c9c00 MD	406	vput(vp);
984263bc MD	407	return (error);
	408	}
	409
a8873631 MD	410	int
	411	vn_opendisk(const char devname, int fmode, struct vnode *vpp)
	412	{
	413	struct vnode *vp;
	414	int error;
	415
	416	if (strncmp(devname, "/dev/", 5) == 0)
	417	devname += 5;
	418	if ((vp = getsynthvnode(devname)) == NULL) {
	419	error = ENODEV;
	420	} else {
	421	error = VOP_OPEN(vp, fmode, proc0.p_ucred, NULL);
	422	vn_unlock(vp);
	423	if (error) {
	424	vrele(vp);
	425	vp = NULL;
	426	}
	427	}
	428	*vpp = vp;
	429	return (error);
	430	}
	431
984263bc	432	/*
d0e99d5d MD	433	* Checks for special conditions on the vnode which might prevent writing
	434	* after the vnode has (likely) been locked. The vnode might or might not
	435	* be locked as of this call, but will be at least referenced.
	436	*
	437	* Also re-checks the mount RDONLY flag that ncp_writechk() checked prior
	438	* to the vnode being locked.
984263bc MD	439	*/
984263bc MD	440	int
d0e99d5d	441	vn_writechk(struct vnode *vp)
984263bc	442	{
984263bc MD	443	/*
	444	* If there's shared text associated with
	445	* the vnode, try to free it up once. If
	446	* we fail, we can't allow writing.
	447	*/
	448	if (vp->v_flag & VTEXT)
	449	return (ETXTBSY);
d0e99d5d MD	450	if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_RDONLY))
	451	return (EROFS);
	452	return 0;
984263bc MD	453	}
984263bc MD	454
468bb1f9	455	/*
0a80a445	456	* Check whether the underlying mount is read-only. The mount point
468bb1f9 MD	457	* referenced by the namecache may be different from the mount point
	458	* used by the underlying vnode in the case of NULLFS, so a separate
	459	* check is needed.
d0e99d5d MD	460	*
d0e99d5d MD	461	* Must be called PRIOR to any vnodes being locked.
468bb1f9	462	*/
468bb1f9	463	int
28623bf9	464	ncp_writechk(struct nchandle *nch)
468bb1f9	465	{
1c222faf MD	466	struct mount *mp;
	467
	468	if ((mp = nch->mount) != NULL) {
	469	if (mp->mnt_flag & MNT_RDONLY)
	470	return (EROFS);
	471	if (mp->mnt_op->vfs_modifying != vfs_stdmodifying)
	472	VFS_MODIFYING(mp);
	473	}
468bb1f9 MD	474	return(0);
	475	}
	476
984263bc MD	477	/*
984263bc MD	478	* Vnode close call
2247fe02 MD	479	*
2247fe02 MD	480	* MPSAFE
984263bc MD	481	*/
984263bc MD	482	int
3596743e	483	vn_close(struct vnode vp, int flags, struct file fp)
984263bc MD	484	{
	485	int error;
	486
b458d1ab	487	error = vn_lock(vp, LK_SHARED \| LK_RETRY \| LK_FAILRECLAIM);
4698dfb3	488	if (error == 0) {
3596743e	489	error = VOP_CLOSE(vp, flags, fp);
a11aaa81	490	vn_unlock(vp);
5fd012e0	491	}
984263bc MD	492	vrele(vp);
	493	return (error);
	494	}
	495
2247fe02 MD	496	/*
	497	* Sequential heuristic.
	498	*
	499	* MPSAFE (f_seqcount and f_nextoff are allowed to race)
	500	*/
984263bc MD	501	static __inline
	502	int
	503	sequential_heuristic(struct uio uio, struct file fp)
	504	{
	505	/*
	506	* Sequential heuristic - detect sequential operation
c0885fab MD	507	*
c0885fab MD	508	* NOTE: SMP: We allow f_seqcount updates to race.
984263bc MD	509	*/
	510	if ((uio->uio_offset == 0 && fp->f_seqcount > 0) \|\|
	511	uio->uio_offset == fp->f_nextoff) {
	512	int tmpseq = fp->f_seqcount;
2247fe02	513
4f048b1c	514	tmpseq += howmany(uio->uio_resid, MAXBSIZE);
984263bc MD	515	if (tmpseq > IO_SEQMAX)
	516	tmpseq = IO_SEQMAX;
	517	fp->f_seqcount = tmpseq;
	518	return(fp->f_seqcount << IO_SEQSHIFT);
	519	}
	520
	521	/*
	522	* Not sequential, quick draw-down of seqcount
c0885fab MD	523	*
c0885fab MD	524	* NOTE: SMP: We allow f_seqcount updates to race.
984263bc MD	525	*/
	526	if (fp->f_seqcount > 1)
	527	fp->f_seqcount = 1;
	528	else
	529	fp->f_seqcount = 0;
	530	return(0);
	531	}
	532
c0885fab MD	533	/*
	534	* get - lock and return the f_offset field.
	535	* set - set and unlock the f_offset field.
	536	*
	537	* These routines serve the dual purpose of serializing access to the
0a80a445	538	* f_offset field (at least on x86) and guaranteeing operational integrity
c0885fab	539	* when multiple read()ers and write()ers are present on the same fp.
2247fe02 MD	540	*
2247fe02 MD	541	* MPSAFE
c0885fab MD	542	*/
	543	static __inline off_t
	544	vn_get_fpf_offset(struct file *fp)
	545	{
	546	u_int flags;
	547	u_int nflags;
	548
	549	/*
	550	* Shortcut critical path.
	551	*/
	552	flags = fp->f_flag & ~FOFFSETLOCK;
	553	if (atomic_cmpset_int(&fp->f_flag, flags, flags \| FOFFSETLOCK))
	554	return(fp->f_offset);
	555
	556	/*
	557	* The hard way
	558	*/
	559	for (;;) {
	560	flags = fp->f_flag;
	561	if (flags & FOFFSETLOCK) {
	562	nflags = flags \| FOFFSETWAKE;
ae8e83e6	563	tsleep_interlock(&fp->f_flag, 0);
c0885fab	564	if (atomic_cmpset_int(&fp->f_flag, flags, nflags))
d9345d3a	565	tsleep(&fp->f_flag, PINTERLOCKED, "fpoff", 0);
c0885fab MD	566	} else {
	567	nflags = flags \| FOFFSETLOCK;
	568	if (atomic_cmpset_int(&fp->f_flag, flags, nflags))
	569	break;
	570	}
	571	}
	572	return(fp->f_offset);
	573	}
	574
2247fe02 MD	575	/*
	576	* MPSAFE
	577	*/
c0885fab MD	578	static __inline void
	579	vn_set_fpf_offset(struct file *fp, off_t offset)
	580	{
	581	u_int flags;
	582	u_int nflags;
	583
	584	/*
	585	* We hold the lock so we can set the offset without interference.
	586	*/
	587	fp->f_offset = offset;
	588
	589	/*
	590	* Normal release is already a reasonably critical path.
	591	*/
	592	for (;;) {
	593	flags = fp->f_flag;
	594	nflags = flags & ~(FOFFSETLOCK \| FOFFSETWAKE);
	595	if (atomic_cmpset_int(&fp->f_flag, flags, nflags)) {
	596	if (flags & FOFFSETWAKE)
	597	wakeup(&fp->f_flag);
	598	break;
	599	}
	600	}
	601	}
	602
2247fe02 MD	603	/*
	604	* MPSAFE
	605	*/
c0885fab MD	606	static __inline off_t
	607	vn_poll_fpf_offset(struct file *fp)
	608	{
1918fc5c	609	#if defined(__x86_64__)
c0885fab MD	610	return(fp->f_offset);
	611	#else
	612	off_t off = vn_get_fpf_offset(fp);
	613	vn_set_fpf_offset(fp, off);
	614	return(off);
	615	#endif
	616	}
	617
984263bc MD	618	/*
984263bc MD	619	* Package up an I/O request on a vnode into a uio and do it.
2247fe02 MD	620	*
2247fe02 MD	621	* MPSAFE
984263bc MD	622	*/
984263bc MD	623	int
87de5057	624	vn_rdwr(enum uio_rw rw, struct vnode *vp, caddr_t base, int len,
0a80a445	625	off_t offset, enum uio_seg segflg, int ioflg,
87de5057	626	struct ucred cred, int aresid)
984263bc MD	627	{
	628	struct uio auio;
	629	struct iovec aiov;
	630	int error;
	631
	632	if ((ioflg & IO_NODELOCKED) == 0)
ca466bae	633	vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY);
984263bc MD	634	auio.uio_iov = &aiov;
	635	auio.uio_iovcnt = 1;
	636	aiov.iov_base = base;
	637	aiov.iov_len = len;
	638	auio.uio_resid = len;
	639	auio.uio_offset = offset;
	640	auio.uio_segflg = segflg;
	641	auio.uio_rw = rw;
87de5057	642	auio.uio_td = curthread;
984263bc MD	643	if (rw == UIO_READ) {
	644	error = VOP_READ(vp, &auio, ioflg, cred);
	645	} else {
	646	error = VOP_WRITE(vp, &auio, ioflg, cred);
	647	}
	648	if (aresid)
	649	*aresid = auio.uio_resid;
	650	else
	651	if (auio.uio_resid && error == 0)
	652	error = EIO;
	653	if ((ioflg & IO_NODELOCKED) == 0)
a11aaa81	654	vn_unlock(vp);
984263bc MD	655	return (error);
	656	}
	657
	658	/*
	659	* Package up an I/O request on a vnode into a uio and do it. The I/O
	660	* request is split up into smaller chunks and we try to avoid saturating
0a80a445	661	* the buffer cache while potentially holding a vnode locked, so we
f9235b6d	662	* check bwillwrite() before calling vn_rdwr(). We also call lwkt_user_yield()
984263bc MD	663	* to give other processes a chance to lock the vnode (either other processes
984263bc MD	664	* core'ing the same binary, or unrelated processes scanning the directory).
2247fe02 MD	665	*
2247fe02 MD	666	* MPSAFE
984263bc MD	667	*/
984263bc MD	668	int
87de5057 MD	669	vn_rdwr_inchunks(enum uio_rw rw, struct vnode *vp, caddr_t base, int len,
	670	off_t offset, enum uio_seg segflg, int ioflg,
	671	struct ucred cred, int aresid)
984263bc MD	672	{
	673	int error = 0;
	674
	675	do {
9a0222ac	676	int chunk;
984263bc	677
9a0222ac DR	678	/*
	679	* Force `offset' to a multiple of MAXBSIZE except possibly
	680	* for the first chunk, so that filesystems only need to
	681	* write full blocks except possibly for the first and last
	682	* chunks.
	683	*/
	684	chunk = MAXBSIZE - (uoff_t)offset % MAXBSIZE;
	685
	686	if (chunk > len)
	687	chunk = len;
d84f6fa1	688	if (vp->v_type == VREG && (ioflg & IO_RECURSE) == 0) {
c4df9635 MD	689	switch(rw) {
	690	case UIO_READ:
	691	bwillread(chunk);
	692	break;
	693	case UIO_WRITE:
	694	bwillwrite(chunk);
	695	break;
	696	}
	697	}
984263bc	698	error = vn_rdwr(rw, vp, base, chunk, offset, segflg,
2247fe02	699	ioflg, cred, aresid);
984263bc MD	700	len -= chunk; /* aresid calc already includes length */
	701	if (error)
	702	break;
	703	offset += chunk;
	704	base += chunk;
f9235b6d	705	lwkt_user_yield();
984263bc MD	706	} while (len);
	707	if (aresid)
	708	*aresid += len;
	709	return (error);
	710	}
	711
	712	/*
c0885fab MD	713	* File pointers can no longer get ripped up by revoke so
	714	* we don't need to lock access to the vp.
	715	*
	716	* f_offset updates are not guaranteed against multiple readers
984263bc MD	717	*/
984263bc MD	718	static int
87de5057	719	vn_read(struct file fp, struct uio uio, struct ucred *cred, int flags)
984263bc MD	720	{
	721	struct vnode *vp;
	722	int error, ioflag;
	723
87de5057 MD	724	KASSERT(uio->uio_td == curthread,
87de5057 MD	725	("uio_td %p is not td %p", uio->uio_td, curthread));
984263bc	726	vp = (struct vnode *)fp->f_data;
9ba76b73	727
984263bc	728	ioflag = 0;
05dd1c0b	729	if (flags & O_FBLOCKING) {
9ba76b73 MD	730	/* ioflag &= ~IO_NDELAY; */
	731	} else if (flags & O_FNONBLOCKING) {
	732	ioflag \|= IO_NDELAY;
	733	} else if (fp->f_flag & FNONBLOCK) {
984263bc	734	ioflag \|= IO_NDELAY;
9ba76b73	735	}
c72df65d	736	if (fp->f_flag & O_DIRECT) {
984263bc	737	ioflag \|= IO_DIRECT;
9ba76b73	738	}
c0885fab MD	739	if ((flags & O_FOFFSET) == 0 && (vp->v_flag & VNOTSEEKABLE) == 0)
c0885fab MD	740	uio->uio_offset = vn_get_fpf_offset(fp);
ab6f251b	741	vn_lock(vp, LK_SHARED \| LK_RETRY);
984263bc MD	742	ioflag \|= sequential_heuristic(uio, fp);
984263bc MD	743
2dfa19fa	744	error = VOP_READ_FP(vp, uio, ioflag, cred, fp);
984263bc	745	fp->f_nextoff = uio->uio_offset;
a11aaa81	746	vn_unlock(vp);
c0885fab MD	747	if ((flags & O_FOFFSET) == 0 && (vp->v_flag & VNOTSEEKABLE) == 0)
c0885fab MD	748	vn_set_fpf_offset(fp, uio->uio_offset);
984263bc MD	749	return (error);
	750	}
	751
	752	/*
2247fe02	753	* MPSAFE
984263bc MD	754	*/
984263bc MD	755	static int
87de5057	756	vn_write(struct file fp, struct uio uio, struct ucred *cred, int flags)
984263bc MD	757	{
	758	struct vnode *vp;
	759	int error, ioflag;
	760
87de5057	761	KASSERT(uio->uio_td == curthread,
f4d08668	762	("uio_td %p is not p %p", uio->uio_td, curthread));
984263bc	763	vp = (struct vnode *)fp->f_data;
9ba76b73	764
984263bc	765	ioflag = IO_UNIT;
9ba76b73 MD	766	if (vp->v_type == VREG &&
9ba76b73 MD	767	((fp->f_flag & O_APPEND) \|\| (flags & O_FAPPEND))) {
984263bc	768	ioflag \|= IO_APPEND;
9ba76b73 MD	769	}
	770
	771	if (flags & O_FBLOCKING) {
	772	/* ioflag &= ~IO_NDELAY; */
	773	} else if (flags & O_FNONBLOCKING) {
984263bc	774	ioflag \|= IO_NDELAY;
9ba76b73 MD	775	} else if (fp->f_flag & FNONBLOCK) {
	776	ioflag \|= IO_NDELAY;
	777	}
c72df65d	778	if (fp->f_flag & O_DIRECT) {
984263bc	779	ioflag \|= IO_DIRECT;
9ba76b73 MD	780	}
	781	if (flags & O_FASYNCWRITE) {
	782	/* ioflag &= ~IO_SYNC; */
	783	} else if (flags & O_FSYNCWRITE) {
	784	ioflag \|= IO_SYNC;
	785	} else if (fp->f_flag & O_FSYNC) {
	786	ioflag \|= IO_SYNC;
	787	}
	788
	789	if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS))
984263bc	790	ioflag \|= IO_SYNC;
9ba76b73	791	if ((flags & O_FOFFSET) == 0)
c0885fab	792	uio->uio_offset = vn_get_fpf_offset(fp);
1c222faf MD	793	if (vp->v_mount)
1c222faf MD	794	VFS_MODIFYING(vp->v_mount);
c0885fab	795	vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY);
984263bc	796	ioflag \|= sequential_heuristic(uio, fp);
2dfa19fa	797	error = VOP_WRITE_FP(vp, uio, ioflag, cred, fp);
984263bc	798	fp->f_nextoff = uio->uio_offset;
a11aaa81	799	vn_unlock(vp);
c0885fab MD	800	if ((flags & O_FOFFSET) == 0)
c0885fab MD	801	vn_set_fpf_offset(fp, uio->uio_offset);
984263bc MD	802	return (error);
	803	}
	804
	805	/*
2ad080fe	806	* MPSAFE
984263bc MD	807	*/
984263bc MD	808	static int
87de5057	809	vn_statfile(struct file fp, struct stat sb, struct ucred *cred)
984263bc	810	{
d9b2033e MD	811	struct vnode *vp;
d9b2033e MD	812	int error;
984263bc	813
d9b2033e MD	814	vp = (struct vnode *)fp->f_data;
d9b2033e MD	815	error = vn_stat(vp, sb, cred);
d9b2033e	816	return (error);
984263bc MD	817	}
984263bc MD	818
2ad080fe	819	/*
aac0aabd	820	* MPSAFE
2ad080fe	821	*/
984263bc	822	int
87de5057	823	vn_stat(struct vnode vp, struct stat sb, struct ucred *cred)
984263bc MD	824	{
984263bc MD	825	struct vattr vattr;
dadab5e9	826	struct vattr *vap;
984263bc MD	827	int error;
984263bc MD	828	u_short mode;
b13267a5	829	cdev_t dev;
984263bc	830
c065b635 MD	831	/*
	832	* vp already has a ref and is validated, can call unlocked.
	833	*/
984263bc	834	vap = &vattr;
aac0aabd	835	error = VOP_GETATTR(vp, vap);
984263bc MD	836	if (error)
	837	return (error);
	838
	839	/*
	840	* Zero the spare stat fields
	841	*/
	842	sb->st_lspare = 0;
d98152a8	843	sb->st_qspare2 = 0;
984263bc MD	844
	845	/*
	846	* Copy from vattr table
	847	*/
	848	if (vap->va_fsid != VNOVAL)
	849	sb->st_dev = vap->va_fsid;
	850	else
	851	sb->st_dev = vp->v_mount->mnt_stat.f_fsid.val[0];
	852	sb->st_ino = vap->va_fileid;
	853	mode = vap->va_mode;
	854	switch (vap->va_type) {
	855	case VREG:
	856	mode \|= S_IFREG;
	857	break;
50626622 MD	858	case VDATABASE:
	859	mode \|= S_IFDB;
	860	break;
984263bc MD	861	case VDIR:
	862	mode \|= S_IFDIR;
	863	break;
	864	case VBLK:
	865	mode \|= S_IFBLK;
	866	break;
	867	case VCHR:
	868	mode \|= S_IFCHR;
	869	break;
	870	case VLNK:
	871	mode \|= S_IFLNK;
	872	/* This is a cosmetic change, symlinks do not have a mode. */
	873	if (vp->v_mount->mnt_flag & MNT_NOSYMFOLLOW)
	874	sb->st_mode &= ~ACCESSPERMS; /* 0000 */
	875	else
	876	sb->st_mode \|= ACCESSPERMS; /* 0777 */
	877	break;
	878	case VSOCK:
	879	mode \|= S_IFSOCK;
	880	break;
	881	case VFIFO:
	882	mode \|= S_IFIFO;
	883	break;
	884	default:
	885	return (EBADF);
4698dfb3	886	}
984263bc	887	sb->st_mode = mode;
50626622 MD	888	if (vap->va_nlink > (nlink_t)-1)
	889	sb->st_nlink = (nlink_t)-1;
	890	else
	891	sb->st_nlink = vap->va_nlink;
984263bc MD	892	sb->st_uid = vap->va_uid;
984263bc MD	893	sb->st_gid = vap->va_gid;
2ac7d105	894	sb->st_rdev = devid_from_dev(vp->v_rdev);
984263bc MD	895	sb->st_size = vap->va_size;
	896	sb->st_atimespec = vap->va_atime;
	897	sb->st_mtimespec = vap->va_mtime;
	898	sb->st_ctimespec = vap->va_ctime;
	899
d8869c1b MD	900	/*
	901	* A VCHR and VBLK device may track the last access and last modified
	902	* time independantly of the filesystem. This is particularly true
	903	* because device read and write calls may bypass the filesystem.
	904	*/
	905	if (vp->v_type == VCHR \|\| vp->v_type == VBLK) {
4698dfb3 MN	906	dev = vp->v_rdev;
4698dfb3 MN	907	if (dev != NULL) {
d8869c1b	908	if (dev->si_lastread) {
cec73927	909	sb->st_atimespec.tv_sec = time_second +
1e45dd8c MD	910	(dev->si_lastread -
1e45dd8c MD	911	time_uptime);
d8869c1b MD	912	sb->st_atimespec.tv_nsec = 0;
	913	}
	914	if (dev->si_lastwrite) {
1e45dd8c MD	915	sb->st_mtimespec.tv_sec = time_second +
	916	(dev->si_lastwrite -
	917	time_uptime);
	918	sb->st_mtimespec.tv_nsec = 0;
d8869c1b MD	919	}
	920	}
	921	}
	922
984263bc	923	/*
0a80a445	924	* According to www.opengroup.org, the meaning of st_blksize is
0a80a445	925	* "a filesystem-specific preferred I/O block size for this
984263bc MD	926	* object. In some filesystem types, this may vary from file
	927	* to file"
	928	* Default to PAGE_SIZE after much discussion.
	929	*/
	930
	931	if (vap->va_type == VREG) {
	932	sb->st_blksize = vap->va_blocksize;
	933	} else if (vn_isdisk(vp, NULL)) {
e4c9c0c8 MD	934	/*
	935	* XXX this is broken. If the device is not yet open (aka
	936	* stat() call, aka v_rdev == NULL), how are we supposed
	937	* to get a valid block size out of it?
	938	*/
4698dfb3	939	dev = vp->v_rdev;
cd29885a	940
e4c9c0c8 MD	941	sb->st_blksize = dev->si_bsize_best;
	942	if (sb->st_blksize < dev->si_bsize_phys)
	943	sb->st_blksize = dev->si_bsize_phys;
984263bc MD	944	if (sb->st_blksize < BLKDEV_IOSIZE)
	945	sb->st_blksize = BLKDEV_IOSIZE;
	946	} else {
	947	sb->st_blksize = PAGE_SIZE;
	948	}
0a80a445	949
984263bc	950	sb->st_flags = vap->va_flags;
f00b5e4e	951
2b3f93ea	952	error = caps_priv_check(cred, SYSCAP_NOVFS_GENERATION);
f00b5e4e	953	if (error)
984263bc MD	954	sb->st_gen = 0;
984263bc MD	955	else
50626622	956	sb->st_gen = (u_int32_t)vap->va_gen;
984263bc	957
984263bc	958	sb->st_blocks = vap->va_bytes / S_BLKSIZE;
34c6728e MD	959
	960	/*
	961	* This is for ABI compatibility <= 5.7 (for ABI change made in
	962	* 5.7 master).
	963	*/
	964	sb->__old_st_blksize = sb->st_blksize;
	965
984263bc MD	966	return (0);
	967	}
	968
	969	/*
d9b2033e	970	* MPALMOSTSAFE - acquires mplock
984263bc MD	971	*/
984263bc MD	972	static int
87baaf0c MD	973	vn_ioctl(struct file fp, u_long com, caddr_t data, struct ucred ucred,
87baaf0c MD	974	struct sysmsg *msg)
984263bc	975	{
dadab5e9	976	struct vnode vp = ((struct vnode )fp->f_data);
1fbb5fc0	977	struct vnode *ovp;
984263bc MD	978	struct vattr vattr;
984263bc MD	979	int error;
c0885fab	980	off_t size;
984263bc	981
dadab5e9	982	switch (vp->v_type) {
984263bc MD	983	case VREG:
	984	case VDIR:
	985	if (com == FIONREAD) {
4698dfb3 MN	986	error = VOP_GETATTR(vp, &vattr);
4698dfb3 MN	987	if (error)
d9b2033e	988	break;
c0885fab MD	989	size = vattr.va_size;
	990	if ((vp->v_flag & VNOTSEEKABLE) == 0)
	991	size -= vn_poll_fpf_offset(fp);
	992	if (size > 0x7FFFFFFF)
	993	size = 0x7FFFFFFF;
	994	(int )data = size;
d9b2033e MD	995	error = 0;
	996	break;
	997	}
9ba76b73	998	if (com == FIOASYNC) { /* XXX */
d9b2033e MD	999	error = 0; /* XXX */
d9b2033e MD	1000	break;
984263bc	1001	}
984263bc	1002	/* fall into ... */
984263bc MD	1003	default:
	1004	#if 0
	1005	return (ENOTTY);
	1006	#endif
	1007	case VFIFO:
	1008	case VCHR:
	1009	case VBLK:
	1010	if (com == FIODTYPE) {
d9b2033e MD	1011	if (vp->v_type != VCHR && vp->v_type != VBLK) {
	1012	error = ENOTTY;
	1013	break;
	1014	}
335dda38	1015	(int )data = dev_dflags(vp->v_rdev) & D_TYPEMASK;
d9b2033e MD	1016	error = 0;
d9b2033e MD	1017	break;
984263bc	1018	}
87baaf0c	1019	error = VOP_IOCTL(vp, com, data, fp->f_flag, ucred, msg);
984263bc	1020	if (error == 0 && com == TIOCSCTTY) {
87de5057 MD	1021	struct proc *p = curthread->td_proc;
	1022	struct session *sess;
	1023
d9b2033e MD	1024	if (p == NULL) {
	1025	error = ENOTTY;
	1026	break;
	1027	}
984263bc	1028
2247fe02	1029	get_mplock();
87de5057	1030	sess = p->p_session;
984263bc	1031	/* Do nothing if reassigning same control tty */
d9b2033e MD	1032	if (sess->s_ttyvp == vp) {
d9b2033e MD	1033	error = 0;
2247fe02	1034	rel_mplock();
d9b2033e MD	1035	break;
d9b2033e MD	1036	}
984263bc MD	1037
984263bc MD	1038	/* Get rid of reference to old control tty */
1fbb5fc0	1039	ovp = sess->s_ttyvp;
597aea93	1040	vref(vp);
1fbb5fc0 MD	1041	sess->s_ttyvp = vp;
	1042	if (ovp)
	1043	vrele(ovp);
2247fe02	1044	rel_mplock();
984263bc	1045	}
d9b2033e	1046	break;
984263bc	1047	}
d9b2033e	1048	return (error);
984263bc MD	1049	}
984263bc MD	1050
984263bc	1051	/*
b458d1ab MD	1052	* Obtain the requested vnode lock
	1053	*
	1054	* LK_RETRY Automatically retry on timeout
	1055	* LK_FAILRECLAIM Fail if the vnode is being reclaimed
	1056	*
	1057	* Failures will occur if the vnode is undergoing recyclement, but not
	1058	* all callers expect that the function will fail so the caller must pass
	1059	* LK_FAILOK if it wants to process an error code.
	1060	*
	1061	* Errors can occur for other reasons if you pass in other LK_ flags,
	1062	* regardless of whether you pass in LK_FAILRECLAIM
984263bc MD	1063	*/
984263bc MD	1064	int
ca466bae	1065	vn_lock(struct vnode *vp, int flags)
984263bc MD	1066	{
984263bc MD	1067	int error;
0a80a445	1068
984263bc	1069	do {
a11aaa81	1070	error = lockmgr(&vp->v_lock, flags);
5fd012e0 MD	1071	if (error == 0)
5fd012e0 MD	1072	break;
984263bc	1073	} while (flags & LK_RETRY);
5fd012e0 MD	1074
	1075	/*
	1076	* Because we (had better!) have a ref on the vnode, once it
	1077	* goes to VRECLAIMED state it will not be recycled until all
	1078	* refs go away. So we can just check the flag.
	1079	*/
	1080	if (error == 0 && (vp->v_flag & VRECLAIMED)) {
b458d1ab MD	1081	if (flags & LK_FAILRECLAIM) {
	1082	lockmgr(&vp->v_lock, LK_RELEASE);
	1083	error = ENOENT;
	1084	}
5fd012e0	1085	}
984263bc MD	1086	return (error);
	1087	}
	1088
fc36a10b MD	1089	int
	1090	vn_relock(struct vnode *vp, int flags)
	1091	{
	1092	int error;
	1093
	1094	do {
	1095	error = lockmgr(&vp->v_lock, flags);
	1096	if (error == 0)
	1097	break;
	1098	} while (flags & LK_RETRY);
	1099
	1100	return error;
	1101	}
	1102
ead16d5b MD	1103	#ifdef DEBUG_VN_UNLOCK
	1104
	1105	void
	1106	debug_vn_unlock(struct vnode vp, const char filename, int line)
	1107	{
	1108	kprintf("vn_unlock from %s:%d\n", filename, line);
	1109	lockmgr(&vp->v_lock, LK_RELEASE);
	1110	}
	1111
	1112	#else
	1113
a11aaa81 MD	1114	void
	1115	vn_unlock(struct vnode *vp)
	1116	{
	1117	lockmgr(&vp->v_lock, LK_RELEASE);
	1118	}
	1119
ead16d5b MD	1120	#endif
ead16d5b MD	1121
2247fe02 MD	1122	/*
	1123	* MPSAFE
	1124	*/
a11aaa81 MD	1125	int
	1126	vn_islocked(struct vnode *vp)
	1127	{
	1128	return (lockstatus(&vp->v_lock, curthread));
	1129	}
	1130
94f2e6f2 MD	1131	/*
	1132	* Return the lock status of a vnode and unlock the vnode
	1133	* if we owned the lock. This is not a boolean, if the
	1134	* caller cares what the lock status is the caller must
	1135	* check the various possible values.
	1136	*
	1137	* This only unlocks exclusive locks held by the caller,
	1138	* it will NOT unlock shared locks (there is no way to
	1139	* tell who the shared lock belongs to).
	1140	*
	1141	* MPSAFE
	1142	*/
	1143	int
	1144	vn_islocked_unlock(struct vnode *vp)
	1145	{
	1146	int vpls;
	1147
	1148	vpls = lockstatus(&vp->v_lock, curthread);
	1149	if (vpls == LK_EXCLUSIVE)
	1150	lockmgr(&vp->v_lock, LK_RELEASE);
	1151	return(vpls);
	1152	}
	1153
	1154	/*
	1155	* Restore a vnode lock that we previously released via
	1156	* vn_islocked_unlock(). This is a NOP if we did not
	1157	* own the original lock.
	1158	*
	1159	* MPSAFE
	1160	*/
	1161	void
	1162	vn_islocked_relock(struct vnode *vp, int vpls)
	1163	{
	1164	int error;
	1165
	1166	if (vpls == LK_EXCLUSIVE)
	1167	error = lockmgr(&vp->v_lock, vpls);
	1168	}
	1169
984263bc	1170	/*
2247fe02	1171	* MPSAFE
984263bc MD	1172	*/
984263bc MD	1173	static int
87de5057	1174	vn_closefile(struct file *fp)
984263bc	1175	{
d9b2033e	1176	int error;
984263bc MD	1177
984263bc MD	1178	fp->f_ops = &badfileops;
3596743e	1179	error = vn_close(((struct vnode *)fp->f_data), fp->f_flag, fp);
4698dfb3	1180	return (error);
984263bc MD	1181	}
984263bc MD	1182
d9b2033e	1183	/*
2247fe02	1184	* MPSAFE
d9b2033e	1185	*/
984263bc MD	1186	static int
	1187	vn_kqfilter(struct file fp, struct knote kn)
	1188	{
d9b2033e	1189	int error;
984263bc	1190
d9b2033e	1191	error = VOP_KQFILTER(((struct vnode *)fp->f_data), kn);
d9b2033e	1192	return (error);
984263bc	1193	}