gitweb.dragonflybsd.org Git - dragonfly.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 2005 The DragonFly Project. All rights reserved.
	3	*
	4	* This code is derived from software contributed to The DragonFly Project
	5	* by Jeffrey Hsu.
	6	*
	7	* Redistribution and use in source and binary forms, with or without
	8	* modification, are permitted provided that the following conditions
	9	* are met:
	10	*
	11	* 1. Redistributions of source code must retain the above copyright
	12	* notice, this list of conditions and the following disclaimer.
	13	* 2. Redistributions in binary form must reproduce the above copyright
	14	* notice, this list of conditions and the following disclaimer in
	15	* the documentation and/or other materials provided with the
	16	* distribution.
	17	* 3. Neither the name of The DragonFly Project nor the names of its
	18	* contributors may be used to endorse or promote products derived
	19	* from this software without specific, prior written permission.
	20	*
	21	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	22	* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	23	* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
	24	* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
	25	* COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
	26	* INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
	27	* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
	28	* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
	29	* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
	30	* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
	31	* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	32	* SUCH DAMAGE.
	33	*
	34	*
	35	* Copyright (c) 1982, 1986, 1989, 1991, 1993
	36	* The Regents of the University of California. All rights reserved.
	37	* (c) UNIX System Laboratories, Inc.
	38	* All or some portions of this file are derived from material licensed
	39	* to the University of California by American Telephone and Telegraph
	40	* Co. or Unix System Laboratories, Inc. and are reproduced herein with
	41	* the permission of UNIX System Laboratories, Inc.
	42	*
	43	* Redistribution and use in source and binary forms, with or without
	44	* modification, are permitted provided that the following conditions
	45	* are met:
	46	* 1. Redistributions of source code must retain the above copyright
	47	* notice, this list of conditions and the following disclaimer.
	48	* 2. Redistributions in binary form must reproduce the above copyright
	49	* notice, this list of conditions and the following disclaimer in the
	50	* documentation and/or other materials provided with the distribution.
	51	* 3. All advertising materials mentioning features or use of this software
	52	* must display the following acknowledgement:
	53	* This product includes software developed by the University of
	54	* California, Berkeley and its contributors.
	55	* 4. Neither the name of the University nor the names of its contributors
	56	* may be used to endorse or promote products derived from this software
	57	* without specific prior written permission.
	58	*
	59	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	60	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	61	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	62	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	63	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	64	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	65	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	66	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	67	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	68	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	69	* SUCH DAMAGE.
	70	*
	71	* @(#)kern_descrip.c 8.6 (Berkeley) 4/19/94
	72	* $FreeBSD: src/sys/kern/kern_descrip.c,v 1.81.2.19 2004/02/28 00:43:31 tegge Exp $
	73	* $DragonFly: src/sys/kern/kern_descrip.c,v 1.79 2008/08/31 13:18:28 aggelos Exp $
	74	*/
	75
	76	#include "opt_compat.h"
	77	#include <sys/param.h>
	78	#include <sys/systm.h>
	79	#include <sys/malloc.h>
	80	#include <sys/sysproto.h>
	81	#include <sys/conf.h>
	82	#include <sys/device.h>
	83	#include <sys/filedesc.h>
	84	#include <sys/kernel.h>
	85	#include <sys/sysctl.h>
	86	#include <sys/vnode.h>
	87	#include <sys/proc.h>
	88	#include <sys/nlookup.h>
	89	#include <sys/file.h>
	90	#include <sys/stat.h>
	91	#include <sys/filio.h>
	92	#include <sys/fcntl.h>
	93	#include <sys/unistd.h>
	94	#include <sys/resourcevar.h>
	95	#include <sys/event.h>
	96	#include <sys/kern_syscall.h>
	97	#include <sys/kcore.h>
	98	#include <sys/kinfo.h>
	99	#include <sys/un.h>
	100
	101	#include <vm/vm.h>
	102	#include <vm/vm_extern.h>
	103
	104	#include <sys/thread2.h>
	105	#include <sys/file2.h>
	106	#include <sys/spinlock2.h>
	107
	108	static void fsetfd_locked(struct filedesc fdp, struct file fp, int fd);
	109	static void fdreserve_locked (struct filedesc *fdp, int fd0, int incr);
	110	static struct file funsetfd_locked (struct filedesc fdp, int fd);
	111	static int checkfpclosed(struct filedesc fdp, int fd, struct file fp);
	112	static void ffree(struct file *fp);
	113
	114	static MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table");
	115	static MALLOC_DEFINE(M_FILEDESC_TO_LEADER, "file desc to leader",
	116	"file desc to leader structures");
	117	MALLOC_DEFINE(M_FILE, "file", "Open file structure");
	118	static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures");
	119
	120	static d_open_t fdopen;
	121	#define NUMFDESC 64
	122
	123	#define CDEV_MAJOR 22
	124	static struct dev_ops fildesc_ops = {
	125	{ "FD", CDEV_MAJOR, 0 },
	126	.d_open = fdopen,
	127	};
	128
	129	static int badfo_readwrite (struct file fp, struct uio uio,
	130	struct ucred *cred, int flags);
	131	static int badfo_ioctl (struct file *fp, u_long com, caddr_t data,
	132	struct ucred *cred);
	133	static int badfo_poll (struct file fp, int events, struct ucred cred);
	134	static int badfo_kqfilter (struct file fp, struct knote kn);
	135	static int badfo_stat (struct file fp, struct stat sb, struct ucred *cred);
	136	static int badfo_close (struct file *fp);
	137	static int badfo_shutdown (struct file *fp, int how);
	138
	139	/*
	140	* Descriptor management.
	141	*/
	142	static struct filelist filehead = LIST_HEAD_INITIALIZER(&filehead);
	143	static struct spinlock filehead_spin = SPINLOCK_INITIALIZER(&filehead_spin);
	144	static int nfiles; /* actual number of open files */
	145	extern int cmask;
	146
	147	/*
	148	* Fixup fd_freefile and fd_lastfile after a descriptor has been cleared.
	149	*
	150	* MPSAFE - must be called with fdp->fd_spin exclusively held
	151	*/
	152	static __inline
	153	void
	154	fdfixup_locked(struct filedesc *fdp, int fd)
	155	{
	156	if (fd < fdp->fd_freefile) {
	157	fdp->fd_freefile = fd;
	158	}
	159	while (fdp->fd_lastfile >= 0 &&
	160	fdp->fd_files[fdp->fd_lastfile].fp == NULL &&
	161	fdp->fd_files[fdp->fd_lastfile].reserved == 0
	162	) {
	163	--fdp->fd_lastfile;
	164	}
	165	}
	166
	167	/*
	168	* System calls on descriptors.
	169	*
	170	* MPSAFE
	171	*/
	172	int
	173	sys_getdtablesize(struct getdtablesize_args *uap)
	174	{
	175	struct proc *p = curproc;
	176	struct plimit *limit = p->p_limit;
	177
	178	spin_lock_rd(&limit->p_spin);
	179	uap->sysmsg_result =
	180	min((int)limit->pl_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
	181	spin_unlock_rd(&limit->p_spin);
	182	return (0);
	183	}
	184
	185	/*
	186	* Duplicate a file descriptor to a particular value.
	187	*
	188	* note: keep in mind that a potential race condition exists when closing
	189	* descriptors from a shared descriptor table (via rfork).
	190	*
	191	* MPSAFE
	192	*/
	193	int
	194	sys_dup2(struct dup2_args *uap)
	195	{
	196	int error;
	197
	198	error = kern_dup(DUP_FIXED, uap->from, uap->to, uap->sysmsg_fds);
	199
	200	return (error);
	201	}
	202
	203	/*
	204	* Duplicate a file descriptor.
	205	*
	206	* MPSAFE
	207	*/
	208	int
	209	sys_dup(struct dup_args *uap)
	210	{
	211	int error;
	212
	213	error = kern_dup(DUP_VARIABLE, uap->fd, 0, uap->sysmsg_fds);
	214
	215	return (error);
	216	}
	217
	218	/*
	219	* MPALMOSTSAFE - acquires mplock for fp operations
	220	*/
	221	int
	222	kern_fcntl(int fd, int cmd, union fcntl_dat dat, struct ucred cred)
	223	{
	224	struct thread *td = curthread;
	225	struct proc *p = td->td_proc;
	226	struct file *fp;
	227	struct vnode *vp;
	228	u_int newmin;
	229	u_int oflags;
	230	u_int nflags;
	231	int tmp, error, flg = F_POSIX;
	232
	233	KKASSERT(p);
	234
	235	/*
	236	* Operations on file descriptors that do not require a file pointer.
	237	*/
	238	switch (cmd) {
	239	case F_GETFD:
	240	error = fgetfdflags(p->p_fd, fd, &tmp);
	241	if (error == 0)
	242	dat->fc_cloexec = (tmp & UF_EXCLOSE) ? FD_CLOEXEC : 0;
	243	return (error);
	244
	245	case F_SETFD:
	246	if (dat->fc_cloexec & FD_CLOEXEC)
	247	error = fsetfdflags(p->p_fd, fd, UF_EXCLOSE);
	248	else
	249	error = fclrfdflags(p->p_fd, fd, UF_EXCLOSE);
	250	return (error);
	251	case F_DUPFD:
	252	newmin = dat->fc_fd;
	253	error = kern_dup(DUP_VARIABLE, fd, newmin, &dat->fc_fd);
	254	return (error);
	255	default:
	256	break;
	257	}
	258
	259	/*
	260	* Operations on file pointers
	261	*/
	262	if ((fp = holdfp(p->p_fd, fd, -1)) == NULL)
	263	return (EBADF);
	264
	265	get_mplock();
	266	switch (cmd) {
	267	case F_GETFL:
	268	dat->fc_flags = OFLAGS(fp->f_flag);
	269	error = 0;
	270	break;
	271
	272	case F_SETFL:
	273	oflags = fp->f_flag;
	274	nflags = FFLAGS(dat->fc_flags & ~O_ACCMODE) & FCNTLFLAGS;
	275	nflags \|= oflags & ~FCNTLFLAGS;
	276
	277	error = 0;
	278	if (((nflags ^ oflags) & O_APPEND) && (oflags & FAPPENDONLY))
	279	error = EINVAL;
	280	if (error == 0 && ((nflags ^ oflags) & FASYNC)) {
	281	tmp = fp->f_flag & FASYNC;
	282	error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, cred);
	283	}
	284	if (error == 0)
	285	fp->f_flag = nflags;
	286	break;
	287
	288	case F_GETOWN:
	289	error = fo_ioctl(fp, FIOGETOWN, (caddr_t)&dat->fc_owner, cred);
	290	break;
	291
	292	case F_SETOWN:
	293	error = fo_ioctl(fp, FIOSETOWN, (caddr_t)&dat->fc_owner, cred);
	294	break;
	295
	296	case F_SETLKW:
	297	flg \|= F_WAIT;
	298	/* Fall into F_SETLK */
	299
	300	case F_SETLK:
	301	if (fp->f_type != DTYPE_VNODE) {
	302	error = EBADF;
	303	break;
	304	}
	305	vp = (struct vnode *)fp->f_data;
	306
	307	/*
	308	* copyin/lockop may block
	309	*/
	310	if (dat->fc_flock.l_whence == SEEK_CUR)
	311	dat->fc_flock.l_start += fp->f_offset;
	312
	313	switch (dat->fc_flock.l_type) {
	314	case F_RDLCK:
	315	if ((fp->f_flag & FREAD) == 0) {
	316	error = EBADF;
	317	break;
	318	}
	319	p->p_leader->p_flag \|= P_ADVLOCK;
	320	error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK,
	321	&dat->fc_flock, flg);
	322	break;
	323	case F_WRLCK:
	324	if ((fp->f_flag & FWRITE) == 0) {
	325	error = EBADF;
	326	break;
	327	}
	328	p->p_leader->p_flag \|= P_ADVLOCK;
	329	error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK,
	330	&dat->fc_flock, flg);
	331	break;
	332	case F_UNLCK:
	333	error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK,
	334	&dat->fc_flock, F_POSIX);
	335	break;
	336	default:
	337	error = EINVAL;
	338	break;
	339	}
	340
	341	/*
	342	* It is possible to race a close() on the descriptor while
	343	* we were blocked getting the lock. If this occurs the
	344	* close might not have caught the lock.
	345	*/
	346	if (checkfpclosed(p->p_fd, fd, fp)) {
	347	dat->fc_flock.l_whence = SEEK_SET;
	348	dat->fc_flock.l_start = 0;
	349	dat->fc_flock.l_len = 0;
	350	dat->fc_flock.l_type = F_UNLCK;
	351	(void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader,
	352	F_UNLCK, &dat->fc_flock, F_POSIX);
	353	}
	354	break;
	355
	356	case F_GETLK:
	357	if (fp->f_type != DTYPE_VNODE) {
	358	error = EBADF;
	359	break;
	360	}
	361	vp = (struct vnode *)fp->f_data;
	362	/*
	363	* copyin/lockop may block
	364	*/
	365	if (dat->fc_flock.l_type != F_RDLCK &&
	366	dat->fc_flock.l_type != F_WRLCK &&
	367	dat->fc_flock.l_type != F_UNLCK) {
	368	error = EINVAL;
	369	break;
	370	}
	371	if (dat->fc_flock.l_whence == SEEK_CUR)
	372	dat->fc_flock.l_start += fp->f_offset;
	373	error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK,
	374	&dat->fc_flock, F_POSIX);
	375	break;
	376	default:
	377	error = EINVAL;
	378	break;
	379	}
	380	rel_mplock();
	381
	382	fdrop(fp);
	383	return (error);
	384	}
	385
	386	/*
	387	* The file control system call.
	388	*
	389	* MPSAFE
	390	*/
	391	int
	392	sys_fcntl(struct fcntl_args *uap)
	393	{
	394	union fcntl_dat dat;
	395	int error;
	396
	397	switch (uap->cmd) {
	398	case F_DUPFD:
	399	dat.fc_fd = uap->arg;
	400	break;
	401	case F_SETFD:
	402	dat.fc_cloexec = uap->arg;
	403	break;
	404	case F_SETFL:
	405	dat.fc_flags = uap->arg;
	406	break;
	407	case F_SETOWN:
	408	dat.fc_owner = uap->arg;
	409	break;
	410	case F_SETLKW:
	411	case F_SETLK:
	412	case F_GETLK:
	413	error = copyin((caddr_t)uap->arg, &dat.fc_flock,
	414	sizeof(struct flock));
	415	if (error)
	416	return (error);
	417	break;
	418	}
	419
	420	error = kern_fcntl(uap->fd, uap->cmd, &dat, curproc->p_ucred);
	421
	422	if (error == 0) {
	423	switch (uap->cmd) {
	424	case F_DUPFD:
	425	uap->sysmsg_result = dat.fc_fd;
	426	break;
	427	case F_GETFD:
	428	uap->sysmsg_result = dat.fc_cloexec;
	429	break;
	430	case F_GETFL:
	431	uap->sysmsg_result = dat.fc_flags;
	432	break;
	433	case F_GETOWN:
	434	uap->sysmsg_result = dat.fc_owner;
	435	case F_GETLK:
	436	error = copyout(&dat.fc_flock, (caddr_t)uap->arg,
	437	sizeof(struct flock));
	438	break;
	439	}
	440	}
	441
	442	return (error);
	443	}
	444
	445	/*
	446	* Common code for dup, dup2, and fcntl(F_DUPFD).
	447	*
	448	* The type flag can be either DUP_FIXED or DUP_VARIABLE. DUP_FIXED tells
	449	* kern_dup() to destructively dup over an existing file descriptor if new
	450	* is already open. DUP_VARIABLE tells kern_dup() to find the lowest
	451	* unused file descriptor that is greater than or equal to new.
	452	*
	453	* MPSAFE
	454	*/
	455	int
	456	kern_dup(enum dup_type type, int old, int new, int *res)
	457	{
	458	struct thread *td = curthread;
	459	struct proc *p = td->td_proc;
	460	struct filedesc *fdp = p->p_fd;
	461	struct file *fp;
	462	struct file *delfp;
	463	int oldflags;
	464	int holdleaders;
	465	int error, newfd;
	466
	467	/*
	468	* Verify that we have a valid descriptor to dup from and
	469	* possibly to dup to.
	470	*/
	471	retry:
	472	spin_lock_wr(&fdp->fd_spin);
	473	if (new < 0 \|\| new > p->p_rlimit[RLIMIT_NOFILE].rlim_cur \|\|
	474	new >= maxfilesperproc) {
	475	spin_unlock_wr(&fdp->fd_spin);
	476	return (EINVAL);
	477	}
	478	if ((unsigned)old >= fdp->fd_nfiles \|\| fdp->fd_files[old].fp == NULL) {
	479	spin_unlock_wr(&fdp->fd_spin);
	480	return (EBADF);
	481	}
	482	if (type == DUP_FIXED && old == new) {
	483	*res = new;
	484	spin_unlock_wr(&fdp->fd_spin);
	485	return (0);
	486	}
	487	fp = fdp->fd_files[old].fp;
	488	oldflags = fdp->fd_files[old].fileflags;
	489	fhold(fp); /* MPSAFE - can be called with a spinlock held */
	490
	491	/*
	492	* Allocate a new descriptor if DUP_VARIABLE, or expand the table
	493	* if the requested descriptor is beyond the current table size.
	494	*
	495	* This can block. Retry if the source descriptor no longer matches
	496	* or if our expectation in the expansion case races.
	497	*
	498	* If we are not expanding or allocating a new decriptor, then reset
	499	* the target descriptor to a reserved state so we have a uniform
	500	* setup for the next code block.
	501	*/
	502	if (type == DUP_VARIABLE \|\| new >= fdp->fd_nfiles) {
	503	spin_unlock_wr(&fdp->fd_spin);
	504	error = fdalloc(p, new, &newfd);
	505	spin_lock_wr(&fdp->fd_spin);
	506	if (error) {
	507	spin_unlock_wr(&fdp->fd_spin);
	508	fdrop(fp);
	509	return (error);
	510	}
	511	/*
	512	* Check for ripout
	513	*/
	514	if (old >= fdp->fd_nfiles \|\| fdp->fd_files[old].fp != fp) {
	515	fsetfd_locked(fdp, NULL, newfd);
	516	spin_unlock_wr(&fdp->fd_spin);
	517	fdrop(fp);
	518	goto retry;
	519	}
	520	/*
	521	* Check for expansion race
	522	*/
	523	if (type != DUP_VARIABLE && new != newfd) {
	524	fsetfd_locked(fdp, NULL, newfd);
	525	spin_unlock_wr(&fdp->fd_spin);
	526	fdrop(fp);
	527	goto retry;
	528	}
	529	/*
	530	* Check for ripout, newfd reused old (this case probably
	531	* can't occur).
	532	*/
	533	if (old == newfd) {
	534	fsetfd_locked(fdp, NULL, newfd);
	535	spin_unlock_wr(&fdp->fd_spin);
	536	fdrop(fp);
	537	goto retry;
	538	}
	539	new = newfd;
	540	delfp = NULL;
	541	} else {
	542	if (fdp->fd_files[new].reserved) {
	543	spin_unlock_wr(&fdp->fd_spin);
	544	fdrop(fp);
	545	kprintf("Warning: dup(): target descriptor %d is reserved, waiting for it to be resolved\n", new);
	546	tsleep(fdp, 0, "fdres", hz);
	547	goto retry;
	548	}
	549
	550	/*
	551	* If the target descriptor was never allocated we have
	552	* to allocate it. If it was we have to clean out the
	553	* old descriptor. delfp inherits the ref from the
	554	* descriptor table.
	555	*/
	556	delfp = fdp->fd_files[new].fp;
	557	fdp->fd_files[new].fp = NULL;
	558	fdp->fd_files[new].reserved = 1;
	559	if (delfp == NULL) {
	560	fdreserve_locked(fdp, new, 1);
	561	if (new > fdp->fd_lastfile)
	562	fdp->fd_lastfile = new;
	563	}
	564
	565	}
	566
	567	/*
	568	* NOTE: still holding an exclusive spinlock
	569	*/
	570
	571	/*
	572	* If a descriptor is being overwritten we may hve to tell
	573	* fdfree() to sleep to ensure that all relevant process
	574	* leaders can be traversed in closef().
	575	*/
	576	if (delfp != NULL && p->p_fdtol != NULL) {
	577	fdp->fd_holdleaderscount++;
	578	holdleaders = 1;
	579	} else {
	580	holdleaders = 0;
	581	}
	582	KASSERT(delfp == NULL \|\| type == DUP_FIXED,
	583	("dup() picked an open file"));
	584
	585	/*
	586	* Duplicate the source descriptor, update lastfile. If the new
	587	* descriptor was not allocated and we aren't replacing an existing
	588	* descriptor we have to mark the descriptor as being in use.
	589	*
	590	* The fd_files[] array inherits fp's hold reference.
	591	*/
	592	fsetfd_locked(fdp, fp, new);
	593	fdp->fd_files[new].fileflags = oldflags & ~UF_EXCLOSE;
	594	spin_unlock_wr(&fdp->fd_spin);
	595	fdrop(fp);
	596	*res = new;
	597
	598	/*
	599	* If we dup'd over a valid file, we now own the reference to it
	600	* and must dispose of it using closef() semantics (as if a
	601	* close() were performed on it).
	602	*/
	603	if (delfp) {
	604	closef(delfp, p);
	605	if (holdleaders) {
	606	spin_lock_wr(&fdp->fd_spin);
	607	fdp->fd_holdleaderscount--;
	608	if (fdp->fd_holdleaderscount == 0 &&
	609	fdp->fd_holdleaderswakeup != 0) {
	610	fdp->fd_holdleaderswakeup = 0;
	611	spin_unlock_wr(&fdp->fd_spin);
	612	wakeup(&fdp->fd_holdleaderscount);
	613	} else {
	614	spin_unlock_wr(&fdp->fd_spin);
	615	}
	616	}
	617	}
	618	return (0);
	619	}
	620
	621	/*
	622	* If sigio is on the list associated with a process or process group,
	623	* disable signalling from the device, remove sigio from the list and
	624	* free sigio.
	625	*/
	626	void
	627	funsetown(struct sigio *sigio)
	628	{
	629	if (sigio == NULL)
	630	return;
	631	crit_enter();
	632	*(sigio->sio_myref) = NULL;
	633	crit_exit();
	634	if (sigio->sio_pgid < 0) {
	635	SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio,
	636	sigio, sio_pgsigio);
	637	} else /* if ((sigiop)->sio_pgid > 0) / {
	638	SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio,
	639	sigio, sio_pgsigio);
	640	}
	641	crfree(sigio->sio_ucred);
	642	kfree(sigio, M_SIGIO);
	643	}
	644
	645	/* Free a list of sigio structures. */
	646	void
	647	funsetownlst(struct sigiolst *sigiolst)
	648	{
	649	struct sigio *sigio;
	650
	651	while ((sigio = SLIST_FIRST(sigiolst)) != NULL)
	652	funsetown(sigio);
	653	}
	654
	655	/*
	656	* This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg).
	657	*
	658	* After permission checking, add a sigio structure to the sigio list for
	659	* the process or process group.
	660	*/
	661	int
	662	fsetown(pid_t pgid, struct sigio **sigiop)
	663	{
	664	struct proc *proc;
	665	struct pgrp *pgrp;
	666	struct sigio *sigio;
	667
	668	if (pgid == 0) {
	669	funsetown(*sigiop);
	670	return (0);
	671	}
	672	if (pgid > 0) {
	673	proc = pfind(pgid);
	674	if (proc == NULL)
	675	return (ESRCH);
	676
	677	/*
	678	* Policy - Don't allow a process to FSETOWN a process
	679	* in another session.
	680	*
	681	* Remove this test to allow maximum flexibility or
	682	* restrict FSETOWN to the current process or process
	683	* group for maximum safety.
	684	*/
	685	if (proc->p_session != curproc->p_session)
	686	return (EPERM);
	687
	688	pgrp = NULL;
	689	} else /* if (pgid < 0) */ {
	690	pgrp = pgfind(-pgid);
	691	if (pgrp == NULL)
	692	return (ESRCH);
	693
	694	/*
	695	* Policy - Don't allow a process to FSETOWN a process
	696	* in another session.
	697	*
	698	* Remove this test to allow maximum flexibility or
	699	* restrict FSETOWN to the current process or process
	700	* group for maximum safety.
	701	*/
	702	if (pgrp->pg_session != curproc->p_session)
	703	return (EPERM);
	704
	705	proc = NULL;
	706	}
	707	funsetown(*sigiop);
	708	sigio = kmalloc(sizeof(struct sigio), M_SIGIO, M_WAITOK);
	709	if (pgid > 0) {
	710	SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio);
	711	sigio->sio_proc = proc;
	712	} else {
	713	SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio);
	714	sigio->sio_pgrp = pgrp;
	715	}
	716	sigio->sio_pgid = pgid;
	717	sigio->sio_ucred = crhold(curproc->p_ucred);
	718	/* It would be convenient if p_ruid was in ucred. */
	719	sigio->sio_ruid = curproc->p_ucred->cr_ruid;
	720	sigio->sio_myref = sigiop;
	721	crit_enter();
	722	*sigiop = sigio;
	723	crit_exit();
	724	return (0);
	725	}
	726
	727	/*
	728	* This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg).
	729	*/
	730	pid_t
	731	fgetown(struct sigio *sigio)
	732	{
	733	return (sigio != NULL ? sigio->sio_pgid : 0);
	734	}
	735
	736	/*
	737	* Close many file descriptors.
	738	*
	739	* MPSAFE
	740	*/
	741	int
	742	sys_closefrom(struct closefrom_args *uap)
	743	{
	744	return(kern_closefrom(uap->fd));
	745	}
	746
	747	/*
	748	* Close all file descriptors greater then or equal to fd
	749	*
	750	* MPSAFE
	751	*/
	752	int
	753	kern_closefrom(int fd)
	754	{
	755	struct thread *td = curthread;
	756	struct proc *p = td->td_proc;
	757	struct filedesc *fdp;
	758
	759	KKASSERT(p);
	760	fdp = p->p_fd;
	761
	762	if (fd < 0)
	763	return (EINVAL);
	764
	765	/*
	766	* NOTE: This function will skip unassociated descriptors and
	767	* reserved descriptors that have not yet been assigned.
	768	* fd_lastfile can change as a side effect of kern_close().
	769	*/
	770	spin_lock_wr(&fdp->fd_spin);
	771	while (fd <= fdp->fd_lastfile) {
	772	if (fdp->fd_files[fd].fp != NULL) {
	773	spin_unlock_wr(&fdp->fd_spin);
	774	/* ok if this races another close */
	775	if (kern_close(fd) == EINTR)
	776	return (EINTR);
	777	spin_lock_wr(&fdp->fd_spin);
	778	}
	779	++fd;
	780	}
	781	spin_unlock_wr(&fdp->fd_spin);
	782	return (0);
	783	}
	784
	785	/*
	786	* Close a file descriptor.
	787	*
	788	* MPSAFE
	789	*/
	790	int
	791	sys_close(struct close_args *uap)
	792	{
	793	return(kern_close(uap->fd));
	794	}
	795
	796	/*
	797	* MPALMOSTSAFE - acquires mplock around knote_fdclose() calls
	798	*/
	799	int
	800	kern_close(int fd)
	801	{
	802	struct thread *td = curthread;
	803	struct proc *p = td->td_proc;
	804	struct filedesc *fdp;
	805	struct file *fp;
	806	int error;
	807	int holdleaders;
	808
	809	KKASSERT(p);
	810	fdp = p->p_fd;
	811
	812	spin_lock_wr(&fdp->fd_spin);
	813	if ((fp = funsetfd_locked(fdp, fd)) == NULL) {
	814	spin_unlock_wr(&fdp->fd_spin);
	815	return (EBADF);
	816	}
	817	holdleaders = 0;
	818	if (p->p_fdtol != NULL) {
	819	/*
	820	* Ask fdfree() to sleep to ensure that all relevant
	821	* process leaders can be traversed in closef().
	822	*/
	823	fdp->fd_holdleaderscount++;
	824	holdleaders = 1;
	825	}
	826
	827	/*
	828	* we now hold the fp reference that used to be owned by the descriptor
	829	* array.
	830	*/
	831	spin_unlock_wr(&fdp->fd_spin);
	832	if (fd < fdp->fd_knlistsize) {
	833	get_mplock();
	834	if (fd < fdp->fd_knlistsize)
	835	knote_fdclose(p, fd);
	836	rel_mplock();
	837	}
	838	error = closef(fp, p);
	839	if (holdleaders) {
	840	spin_lock_wr(&fdp->fd_spin);
	841	fdp->fd_holdleaderscount--;
	842	if (fdp->fd_holdleaderscount == 0 &&
	843	fdp->fd_holdleaderswakeup != 0) {
	844	fdp->fd_holdleaderswakeup = 0;
	845	spin_unlock_wr(&fdp->fd_spin);
	846	wakeup(&fdp->fd_holdleaderscount);
	847	} else {
	848	spin_unlock_wr(&fdp->fd_spin);
	849	}
	850	}
	851	return (error);
	852	}
	853
	854	/*
	855	* shutdown_args(int fd, int how)
	856	*/
	857	int
	858	kern_shutdown(int fd, int how)
	859	{
	860	struct thread *td = curthread;
	861	struct proc *p = td->td_proc;
	862	struct file *fp;
	863	int error;
	864
	865	KKASSERT(p);
	866
	867	if ((fp = holdfp(p->p_fd, fd, -1)) == NULL)
	868	return (EBADF);
	869	error = fo_shutdown(fp, how);
	870	fdrop(fp);
	871
	872	return (error);
	873	}
	874
	875	int
	876	sys_shutdown(struct shutdown_args *uap)
	877	{
	878	int error;
	879
	880	error = kern_shutdown(uap->s, uap->how);
	881
	882	return (error);
	883	}
	884
	885	int
	886	kern_fstat(int fd, struct stat *ub)
	887	{
	888	struct thread *td = curthread;
	889	struct proc *p = td->td_proc;
	890	struct file *fp;
	891	int error;
	892
	893	KKASSERT(p);
	894
	895	if ((fp = holdfp(p->p_fd, fd, -1)) == NULL)
	896	return (EBADF);
	897	error = fo_stat(fp, ub, p->p_ucred);
	898	fdrop(fp);
	899
	900	return (error);
	901	}
	902
	903	/*
	904	* Return status information about a file descriptor.
	905	*/
	906	int
	907	sys_fstat(struct fstat_args *uap)
	908	{
	909	struct stat st;
	910	int error;
	911
	912	error = kern_fstat(uap->fd, &st);
	913
	914	if (error == 0)
	915	error = copyout(&st, uap->sb, sizeof(st));
	916	return (error);
	917	}
	918
	919	/*
	920	* Return pathconf information about a file descriptor.
	921	*/
	922	/* ARGSUSED */
	923	int
	924	sys_fpathconf(struct fpathconf_args *uap)
	925	{
	926	struct thread *td = curthread;
	927	struct proc *p = td->td_proc;
	928	struct file *fp;
	929	struct vnode *vp;
	930	int error = 0;
	931
	932	KKASSERT(p);
	933
	934	if ((fp = holdfp(p->p_fd, uap->fd, -1)) == NULL)
	935	return (EBADF);
	936
	937	switch (fp->f_type) {
	938	case DTYPE_PIPE:
	939	case DTYPE_SOCKET:
	940	if (uap->name != _PC_PIPE_BUF) {
	941	error = EINVAL;
	942	} else {
	943	uap->sysmsg_result = PIPE_BUF;
	944	error = 0;
	945	}
	946	break;
	947	case DTYPE_FIFO:
	948	case DTYPE_VNODE:
	949	vp = (struct vnode *)fp->f_data;
	950	error = VOP_PATHCONF(vp, uap->name, uap->sysmsg_fds);
	951	break;
	952	default:
	953	error = EOPNOTSUPP;
	954	break;
	955	}
	956	fdrop(fp);
	957	return(error);
	958	}
	959
	960	static int fdexpand;
	961	SYSCTL_INT(_debug, OID_AUTO, fdexpand, CTLFLAG_RD, &fdexpand, 0, "");
	962
	963	/*
	964	* Grow the file table so it can hold through descriptor (want).
	965	*
	966	* The fdp's spinlock must be held exclusively on entry and may be held
	967	* exclusively on return. The spinlock may be cycled by the routine.
	968	*
	969	* MPSAFE
	970	*/
	971	static void
	972	fdgrow_locked(struct filedesc *fdp, int want)
	973	{
	974	struct fdnode *newfiles;
	975	struct fdnode *oldfiles;
	976	int nf, extra;
	977
	978	nf = fdp->fd_nfiles;
	979	do {
	980	/* nf has to be of the form 2^n - 1 */
	981	nf = 2 * nf + 1;
	982	} while (nf <= want);
	983
	984	spin_unlock_wr(&fdp->fd_spin);
	985	newfiles = kmalloc(nf * sizeof(struct fdnode), M_FILEDESC, M_WAITOK);
	986	spin_lock_wr(&fdp->fd_spin);
	987
	988	/*
	989	* We could have raced another extend while we were not holding
	990	* the spinlock.
	991	*/
	992	if (fdp->fd_nfiles >= nf) {
	993	spin_unlock_wr(&fdp->fd_spin);
	994	kfree(newfiles, M_FILEDESC);
	995	spin_lock_wr(&fdp->fd_spin);
	996	return;
	997	}
	998	/*
	999	* Copy the existing ofile and ofileflags arrays
	1000	* and zero the new portion of each array.
	1001	*/
	1002	extra = nf - fdp->fd_nfiles;
	1003	bcopy(fdp->fd_files, newfiles, fdp->fd_nfiles * sizeof(struct fdnode));
	1004	bzero(&newfiles[fdp->fd_nfiles], extra * sizeof(struct fdnode));
	1005
	1006	oldfiles = fdp->fd_files;
	1007	fdp->fd_files = newfiles;
	1008	fdp->fd_nfiles = nf;
	1009
	1010	if (oldfiles != fdp->fd_builtin_files) {
	1011	spin_unlock_wr(&fdp->fd_spin);
	1012	kfree(oldfiles, M_FILEDESC);
	1013	spin_lock_wr(&fdp->fd_spin);
	1014	}
	1015	fdexpand++;
	1016	}
	1017
	1018	/*
	1019	* Number of nodes in right subtree, including the root.
	1020	*/
	1021	static __inline int
	1022	right_subtree_size(int n)
	1023	{
	1024	return (n ^ (n \| (n + 1)));
	1025	}
	1026
	1027	/*
	1028	* Bigger ancestor.
	1029	*/
	1030	static __inline int
	1031	right_ancestor(int n)
	1032	{
	1033	return (n \| (n + 1));
	1034	}
	1035
	1036	/*
	1037	* Smaller ancestor.
	1038	*/
	1039	static __inline int
	1040	left_ancestor(int n)
	1041	{
	1042	return ((n & (n + 1)) - 1);
	1043	}
	1044
	1045	/*
	1046	* Traverse the in-place binary tree buttom-up adjusting the allocation
	1047	* count so scans can determine where free descriptors are located.
	1048	*
	1049	* MPSAFE - caller must be holding an exclusive spinlock on fdp
	1050	*/
	1051	static
	1052	void
	1053	fdreserve_locked(struct filedesc *fdp, int fd, int incr)
	1054	{
	1055	while (fd >= 0) {
	1056	fdp->fd_files[fd].allocated += incr;
	1057	KKASSERT(fdp->fd_files[fd].allocated >= 0);
	1058	fd = left_ancestor(fd);
	1059	}
	1060	}
	1061
	1062	/*
	1063	* Reserve a file descriptor for the process. If no error occurs, the
	1064	* caller MUST at some point call fsetfd() or assign a file pointer
	1065	* or dispose of the reservation.
	1066	*
	1067	* MPSAFE
	1068	*/
	1069	int
	1070	fdalloc(struct proc p, int want, int result)
	1071	{
	1072	struct filedesc *fdp = p->p_fd;
	1073	int fd, rsize, rsum, node, lim;
	1074
	1075	spin_lock_rd(&p->p_limit->p_spin);
	1076	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
	1077	spin_unlock_rd(&p->p_limit->p_spin);
	1078	if (want >= lim)
	1079	return (EMFILE);
	1080	spin_lock_wr(&fdp->fd_spin);
	1081	if (want >= fdp->fd_nfiles)
	1082	fdgrow_locked(fdp, want);
	1083
	1084	/*
	1085	* Search for a free descriptor starting at the higher
	1086	* of want or fd_freefile. If that fails, consider
	1087	* expanding the ofile array.
	1088	*
	1089	* NOTE! the 'allocated' field is a cumulative recursive allocation
	1090	* count. If we happen to see a value of 0 then we can shortcut
	1091	* our search. Otherwise we run through through the tree going
	1092	* down branches we know have free descriptor(s) until we hit a
	1093	* leaf node. The leaf node will be free but will not necessarily
	1094	* have an allocated field of 0.
	1095	*/
	1096	retry:
	1097	/* move up the tree looking for a subtree with a free node */
	1098	for (fd = max(want, fdp->fd_freefile); fd < min(fdp->fd_nfiles, lim);
	1099	fd = right_ancestor(fd)) {
	1100	if (fdp->fd_files[fd].allocated == 0)
	1101	goto found;
	1102
	1103	rsize = right_subtree_size(fd);
	1104	if (fdp->fd_files[fd].allocated == rsize)
	1105	continue; /* right subtree full */
	1106
	1107	/*
	1108	* Free fd is in the right subtree of the tree rooted at fd.
	1109	* Call that subtree R. Look for the smallest (leftmost)
	1110	* subtree of R with an unallocated fd: continue moving
	1111	* down the left branch until encountering a full left
	1112	* subtree, then move to the right.
	1113	*/
	1114	for (rsum = 0, rsize /= 2; rsize > 0; rsize /= 2) {
	1115	node = fd + rsize;
	1116	rsum += fdp->fd_files[node].allocated;
	1117	if (fdp->fd_files[fd].allocated == rsum + rsize) {
	1118	fd = node; /* move to the right */
	1119	if (fdp->fd_files[node].allocated == 0)
	1120	goto found;
	1121	rsum = 0;
	1122	}
	1123	}
	1124	goto found;
	1125	}
	1126
	1127	/*
	1128	* No space in current array. Expand?
	1129	*/
	1130	if (fdp->fd_nfiles >= lim) {
	1131	spin_unlock_wr(&fdp->fd_spin);
	1132	return (EMFILE);
	1133	}
	1134	fdgrow_locked(fdp, want);
	1135	goto retry;
	1136
	1137	found:
	1138	KKASSERT(fd < fdp->fd_nfiles);
	1139	if (fd > fdp->fd_lastfile)
	1140	fdp->fd_lastfile = fd;
	1141	if (want <= fdp->fd_freefile)
	1142	fdp->fd_freefile = fd;
	1143	*result = fd;
	1144	KKASSERT(fdp->fd_files[fd].fp == NULL);
	1145	KKASSERT(fdp->fd_files[fd].reserved == 0);
	1146	fdp->fd_files[fd].fileflags = 0;
	1147	fdp->fd_files[fd].reserved = 1;
	1148	fdreserve_locked(fdp, fd, 1);
	1149	spin_unlock_wr(&fdp->fd_spin);
	1150	return (0);
	1151	}
	1152
	1153	/*
	1154	* Check to see whether n user file descriptors
	1155	* are available to the process p.
	1156	*
	1157	* MPSAFE
	1158	*/
	1159	int
	1160	fdavail(struct proc *p, int n)
	1161	{
	1162	struct filedesc *fdp = p->p_fd;
	1163	struct fdnode *fdnode;
	1164	int i, lim, last;
	1165
	1166	spin_lock_rd(&p->p_limit->p_spin);
	1167	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
	1168	spin_unlock_rd(&p->p_limit->p_spin);
	1169
	1170	spin_lock_rd(&fdp->fd_spin);
	1171	if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0) {
	1172	spin_unlock_rd(&fdp->fd_spin);
	1173	return (1);
	1174	}
	1175	last = min(fdp->fd_nfiles, lim);
	1176	fdnode = &fdp->fd_files[fdp->fd_freefile];
	1177	for (i = last - fdp->fd_freefile; --i >= 0; ++fdnode) {
	1178	if (fdnode->fp == NULL && --n <= 0) {
	1179	spin_unlock_rd(&fdp->fd_spin);
	1180	return (1);
	1181	}
	1182	}
	1183	spin_unlock_rd(&fdp->fd_spin);
	1184	return (0);
	1185	}
	1186
	1187	/*
	1188	* Revoke open descriptors referencing (f_data, f_type)
	1189	*
	1190	* Any revoke executed within a prison is only able to
	1191	* revoke descriptors for processes within that prison.
	1192	*
	1193	* Returns 0 on success or an error code.
	1194	*/
	1195	struct fdrevoke_info {
	1196	void *data;
	1197	short type;
	1198	short unused;
	1199	int count;
	1200	int intransit;
	1201	struct ucred *cred;
	1202	struct file *nfp;
	1203	};
	1204
	1205	static int fdrevoke_check_callback(struct file fp, void vinfo);
	1206	static int fdrevoke_proc_callback(struct proc p, void vinfo);
	1207
	1208	int
	1209	fdrevoke(void f_data, short f_type, struct ucred cred)
	1210	{
	1211	struct fdrevoke_info info;
	1212	int error;
	1213
	1214	bzero(&info, sizeof(info));
	1215	info.data = f_data;
	1216	info.type = f_type;
	1217	info.cred = cred;
	1218	error = falloc(NULL, &info.nfp, NULL);
	1219	if (error)
	1220	return (error);
	1221
	1222	/*
	1223	* Scan the file pointer table once. dups do not dup file pointers,
	1224	* only descriptors, so there is no leak. Set FREVOKED on the fps
	1225	* being revoked.
	1226	*/
	1227	allfiles_scan_exclusive(fdrevoke_check_callback, &info);
	1228
	1229	/*
	1230	* If any fps were marked track down the related descriptors
	1231	* and close them. Any dup()s at this point will notice
	1232	* the FREVOKED already set in the fp and do the right thing.
	1233	*
	1234	* Any fps with non-zero msgcounts (aka sent over a unix-domain
	1235	* socket) bumped the intransit counter and will require a
	1236	* scan. Races against fps leaving the socket are closed by
	1237	* the socket code checking for FREVOKED.
	1238	*/
	1239	if (info.count)
	1240	allproc_scan(fdrevoke_proc_callback, &info);
	1241	if (info.intransit)
	1242	unp_revoke_gc(info.nfp);
	1243	fdrop(info.nfp);
	1244	return(0);
	1245	}
	1246
	1247	/*
	1248	* Locate matching file pointers directly.
	1249	*/
	1250	static int
	1251	fdrevoke_check_callback(struct file fp, void vinfo)
	1252	{
	1253	struct fdrevoke_info *info = vinfo;
	1254
	1255	/*
	1256	* File pointers already flagged for revokation are skipped.
	1257	*/
	1258	if (fp->f_flag & FREVOKED)
	1259	return(0);
	1260
	1261	/*
	1262	* If revoking from a prison file pointers created outside of
	1263	* that prison, or file pointers without creds, cannot be revoked.
	1264	*/
	1265	if (info->cred->cr_prison &&
	1266	(fp->f_cred == NULL \|\|
	1267	info->cred->cr_prison != fp->f_cred->cr_prison)) {
	1268	return(0);
	1269	}
	1270
	1271	/*
	1272	* If the file pointer matches then mark it for revocation. The
	1273	* flag is currently only used by unp_revoke_gc().
	1274	*
	1275	* info->count is a heuristic and can race in a SMP environment.
	1276	*/
	1277	if (info->data == fp->f_data && info->type == fp->f_type) {
	1278	atomic_set_int(&fp->f_flag, FREVOKED);
	1279	info->count += fp->f_count;
	1280	if (fp->f_msgcount)
	1281	++info->intransit;
	1282	}
	1283	return(0);
	1284	}
	1285
	1286	/*
	1287	* Locate matching file pointers via process descriptor tables.
	1288	*/
	1289	static int
	1290	fdrevoke_proc_callback(struct proc p, void vinfo)
	1291	{
	1292	struct fdrevoke_info *info = vinfo;
	1293	struct filedesc *fdp;
	1294	struct file *fp;
	1295	int n;
	1296
	1297	if (p->p_stat == SIDL \|\| p->p_stat == SZOMB)
	1298	return(0);
	1299	if (info->cred->cr_prison &&
	1300	info->cred->cr_prison != p->p_ucred->cr_prison) {
	1301	return(0);
	1302	}
	1303
	1304	/*
	1305	* If the controlling terminal of the process matches the
	1306	* vnode being revoked we clear the controlling terminal.
	1307	*
	1308	* The normal spec_close() may not catch this because it
	1309	* uses curproc instead of p.
	1310	*/
	1311	if (p->p_session && info->type == DTYPE_VNODE &&
	1312	info->data == p->p_session->s_ttyvp) {
	1313	p->p_session->s_ttyvp = NULL;
	1314	vrele(info->data);
	1315	}
	1316
	1317	/*
	1318	* Locate and close any matching file descriptors.
	1319	*/
	1320	if ((fdp = p->p_fd) == NULL)
	1321	return(0);
	1322	spin_lock_wr(&fdp->fd_spin);
	1323	for (n = 0; n < fdp->fd_nfiles; ++n) {
	1324	if ((fp = fdp->fd_files[n].fp) == NULL)
	1325	continue;
	1326	if (fp->f_flag & FREVOKED) {
	1327	fhold(info->nfp);
	1328	fdp->fd_files[n].fp = info->nfp;
	1329	spin_unlock_wr(&fdp->fd_spin);
	1330	closef(fp, p);
	1331	spin_lock_wr(&fdp->fd_spin);
	1332	--info->count;
	1333	}
	1334	}
	1335	spin_unlock_wr(&fdp->fd_spin);
	1336	return(0);
	1337	}
	1338
	1339	/*
	1340	* falloc:
	1341	* Create a new open file structure and reserve a file decriptor
	1342	* for the process that refers to it.
	1343	*
	1344	* Root creds are checked using p, or assumed if p is NULL. If
	1345	* resultfd is non-NULL then p must also be non-NULL. No file
	1346	* descriptor is reserved if resultfd is NULL.
	1347	*
	1348	* A file pointer with a refcount of 1 is returned. Note that the
	1349	* file pointer is NOT associated with the descriptor. If falloc
	1350	* returns success, fsetfd() MUST be called to either associate the
	1351	* file pointer or clear the reservation.
	1352	*
	1353	* MPSAFE
	1354	*/
	1355	int
	1356	falloc(struct proc p, struct file resultfp, int resultfd)
	1357	{
	1358	static struct timeval lastfail;
	1359	static int curfail;
	1360	struct file *fp;
	1361	int error;
	1362
	1363	fp = NULL;
	1364
	1365	/*
	1366	* Handle filetable full issues and root overfill.
	1367	*/
	1368	if (nfiles >= maxfiles - maxfilesrootres &&
	1369	((p && p->p_ucred->cr_ruid != 0) \|\| nfiles >= maxfiles)) {
	1370	if (ppsratecheck(&lastfail, &curfail, 1)) {
	1371	kprintf("kern.maxfiles limit exceeded by uid %d, please see tuning(7).\n",
	1372	(p ? p->p_ucred->cr_ruid : -1));
	1373	}
	1374	error = ENFILE;
	1375	goto done;
	1376	}
	1377
	1378	/*
	1379	* Allocate a new file descriptor.
	1380	*/
	1381	fp = kmalloc(sizeof(struct file), M_FILE, M_WAITOK \| M_ZERO);
	1382	spin_init(&fp->f_spin);
	1383	fp->f_count = 1;
	1384	fp->f_ops = &badfileops;
	1385	fp->f_seqcount = 1;
	1386	if (p)
	1387	fp->f_cred = crhold(p->p_ucred);
	1388	else
	1389	fp->f_cred = crhold(proc0.p_ucred);
	1390	spin_lock_wr(&filehead_spin);
	1391	nfiles++;
	1392	LIST_INSERT_HEAD(&filehead, fp, f_list);
	1393	spin_unlock_wr(&filehead_spin);
	1394	if (resultfd) {
	1395	if ((error = fdalloc(p, 0, resultfd)) != 0) {
	1396	fdrop(fp);
	1397	fp = NULL;
	1398	}
	1399	} else {
	1400	error = 0;
	1401	}
	1402	done:
	1403	*resultfp = fp;
	1404	return (error);
	1405	}
	1406
	1407	/*
	1408	* MPSAFE
	1409	*/
	1410	static
	1411	int
	1412	checkfpclosed(struct filedesc fdp, int fd, struct file fp)
	1413	{
	1414	int error;
	1415
	1416	spin_lock_rd(&fdp->fd_spin);
	1417	if ((unsigned) fd >= fdp->fd_nfiles \|\| fp != fdp->fd_files[fd].fp)
	1418	error = EBADF;
	1419	else
	1420	error = 0;
	1421	spin_unlock_rd(&fdp->fd_spin);
	1422	return (error);
	1423	}
	1424
	1425	/*
	1426	* Associate a file pointer with a previously reserved file descriptor.
	1427	* This function always succeeds.
	1428	*
	1429	* If fp is NULL, the file descriptor is returned to the pool.
	1430	*/
	1431
	1432	/*
	1433	* MPSAFE (exclusive spinlock must be held on call)
	1434	*/
	1435	static void
	1436	fsetfd_locked(struct filedesc fdp, struct file fp, int fd)
	1437	{
	1438	KKASSERT((unsigned)fd < fdp->fd_nfiles);
	1439	KKASSERT(fdp->fd_files[fd].reserved != 0);
	1440	if (fp) {
	1441	fhold(fp);
	1442	fdp->fd_files[fd].fp = fp;
	1443	fdp->fd_files[fd].reserved = 0;
	1444	if (fp->f_type == DTYPE_KQUEUE) {
	1445	if (fdp->fd_knlistsize < 0)
	1446	fdp->fd_knlistsize = 0;
	1447	}
	1448	} else {
	1449	fdp->fd_files[fd].reserved = 0;
	1450	fdreserve_locked(fdp, fd, -1);
	1451	fdfixup_locked(fdp, fd);
	1452	}
	1453	}
	1454
	1455	/*
	1456	* MPSAFE
	1457	*/
	1458	void
	1459	fsetfd(struct proc p, struct file fp, int fd)
	1460	{
	1461	struct filedesc *fdp = p->p_fd;
	1462
	1463	spin_lock_wr(&fdp->fd_spin);
	1464	fsetfd_locked(fdp, fp, fd);
	1465	spin_unlock_wr(&fdp->fd_spin);
	1466	}
	1467
	1468	/*
	1469	* MPSAFE (exclusive spinlock must be held on call)
	1470	*/
	1471	static
	1472	struct file *
	1473	funsetfd_locked(struct filedesc *fdp, int fd)
	1474	{
	1475	struct file *fp;
	1476
	1477	if ((unsigned)fd >= fdp->fd_nfiles)
	1478	return (NULL);
	1479	if ((fp = fdp->fd_files[fd].fp) == NULL)
	1480	return (NULL);
	1481	fdp->fd_files[fd].fp = NULL;
	1482	fdp->fd_files[fd].fileflags = 0;
	1483
	1484	fdreserve_locked(fdp, fd, -1);
	1485	fdfixup_locked(fdp, fd);
	1486	return(fp);
	1487	}
	1488
	1489	/*
	1490	* MPSAFE
	1491	*/
	1492	int
	1493	fgetfdflags(struct filedesc fdp, int fd, int flagsp)
	1494	{
	1495	int error;
	1496
	1497	spin_lock_rd(&fdp->fd_spin);
	1498	if (((u_int)fd) >= fdp->fd_nfiles) {
	1499	error = EBADF;
	1500	} else if (fdp->fd_files[fd].fp == NULL) {
	1501	error = EBADF;
	1502	} else {
	1503	*flagsp = fdp->fd_files[fd].fileflags;
	1504	error = 0;
	1505	}
	1506	spin_unlock_rd(&fdp->fd_spin);
	1507	return (error);
	1508	}
	1509
	1510	/*
	1511	* MPSAFE
	1512	*/
	1513	int
	1514	fsetfdflags(struct filedesc *fdp, int fd, int add_flags)
	1515	{
	1516	int error;
	1517
	1518	spin_lock_wr(&fdp->fd_spin);
	1519	if (((u_int)fd) >= fdp->fd_nfiles) {
	1520	error = EBADF;
	1521	} else if (fdp->fd_files[fd].fp == NULL) {
	1522	error = EBADF;
	1523	} else {
	1524	fdp->fd_files[fd].fileflags \|= add_flags;
	1525	error = 0;
	1526	}
	1527	spin_unlock_wr(&fdp->fd_spin);
	1528	return (error);
	1529	}
	1530
	1531	/*
	1532	* MPSAFE
	1533	*/
	1534	int
	1535	fclrfdflags(struct filedesc *fdp, int fd, int rem_flags)
	1536	{
	1537	int error;
	1538
	1539	spin_lock_wr(&fdp->fd_spin);
	1540	if (((u_int)fd) >= fdp->fd_nfiles) {
	1541	error = EBADF;
	1542	} else if (fdp->fd_files[fd].fp == NULL) {
	1543	error = EBADF;
	1544	} else {
	1545	fdp->fd_files[fd].fileflags &= ~rem_flags;
	1546	error = 0;
	1547	}
	1548	spin_unlock_wr(&fdp->fd_spin);
	1549	return (error);
	1550	}
	1551
	1552	void
	1553	fsetcred(struct file fp, struct ucred cr)
	1554	{
	1555	crhold(cr);
	1556	crfree(fp->f_cred);
	1557	fp->f_cred = cr;
	1558	}
	1559
	1560	/*
	1561	* Free a file descriptor.
	1562	*/
	1563	static
	1564	void
	1565	ffree(struct file *fp)
	1566	{
	1567	KASSERT((fp->f_count == 0), ("ffree: fp_fcount not 0!"));
	1568	spin_lock_wr(&filehead_spin);
	1569	LIST_REMOVE(fp, f_list);
	1570	nfiles--;
	1571	spin_unlock_wr(&filehead_spin);
	1572	crfree(fp->f_cred);
	1573	if (fp->f_nchandle.ncp)
	1574	cache_drop(&fp->f_nchandle);
	1575	kfree(fp, M_FILE);
	1576	}
	1577
	1578	/*
	1579	* called from init_main, initialize filedesc0 for proc0.
	1580	*/
	1581	void
	1582	fdinit_bootstrap(struct proc p0, struct filedesc fdp0, int cmask)
	1583	{
	1584	p0->p_fd = fdp0;
	1585	p0->p_fdtol = NULL;
	1586	fdp0->fd_refcnt = 1;
	1587	fdp0->fd_cmask = cmask;
	1588	fdp0->fd_files = fdp0->fd_builtin_files;
	1589	fdp0->fd_nfiles = NDFILE;
	1590	fdp0->fd_lastfile = -1;
	1591	spin_init(&fdp0->fd_spin);
	1592	}
	1593
	1594	/*
	1595	* Build a new filedesc structure.
	1596	*
	1597	* NOT MPSAFE (vref)
	1598	*/
	1599	struct filedesc *
	1600	fdinit(struct proc *p)
	1601	{
	1602	struct filedesc *newfdp;
	1603	struct filedesc *fdp = p->p_fd;
	1604
	1605	newfdp = kmalloc(sizeof(struct filedesc), M_FILEDESC, M_WAITOK\|M_ZERO);
	1606	spin_lock_rd(&fdp->fd_spin);
	1607	if (fdp->fd_cdir) {
	1608	newfdp->fd_cdir = fdp->fd_cdir;
	1609	vref(newfdp->fd_cdir);
	1610	cache_copy(&fdp->fd_ncdir, &newfdp->fd_ncdir);
	1611	}
	1612
	1613	/*
	1614	* rdir may not be set in e.g. proc0 or anything vm_fork'd off of
	1615	* proc0, but should unconditionally exist in other processes.
	1616	*/
	1617	if (fdp->fd_rdir) {
	1618	newfdp->fd_rdir = fdp->fd_rdir;
	1619	vref(newfdp->fd_rdir);
	1620	cache_copy(&fdp->fd_nrdir, &newfdp->fd_nrdir);
	1621	}
	1622	if (fdp->fd_jdir) {
	1623	newfdp->fd_jdir = fdp->fd_jdir;
	1624	vref(newfdp->fd_jdir);
	1625	cache_copy(&fdp->fd_njdir, &newfdp->fd_njdir);
	1626	}
	1627	spin_unlock_rd(&fdp->fd_spin);
	1628
	1629	/* Create the file descriptor table. */
	1630	newfdp->fd_refcnt = 1;
	1631	newfdp->fd_cmask = cmask;
	1632	newfdp->fd_files = newfdp->fd_builtin_files;
	1633	newfdp->fd_nfiles = NDFILE;
	1634	newfdp->fd_knlistsize = -1;
	1635	newfdp->fd_lastfile = -1;
	1636	spin_init(&newfdp->fd_spin);
	1637
	1638	return (newfdp);
	1639	}
	1640
	1641	/*
	1642	* Share a filedesc structure.
	1643	*
	1644	* MPSAFE
	1645	*/
	1646	struct filedesc *
	1647	fdshare(struct proc *p)
	1648	{
	1649	struct filedesc *fdp;
	1650
	1651	fdp = p->p_fd;
	1652	spin_lock_wr(&fdp->fd_spin);
	1653	fdp->fd_refcnt++;
	1654	spin_unlock_wr(&fdp->fd_spin);
	1655	return (fdp);
	1656	}
	1657
	1658	/*
	1659	* Copy a filedesc structure.
	1660	*
	1661	* MPSAFE
	1662	*/
	1663	struct filedesc *
	1664	fdcopy(struct proc *p)
	1665	{
	1666	struct filedesc *fdp = p->p_fd;
	1667	struct filedesc *newfdp;
	1668	struct fdnode *fdnode;
	1669	int i;
	1670	int ni;
	1671
	1672	/*
	1673	* Certain daemons might not have file descriptors.
	1674	*/
	1675	if (fdp == NULL)
	1676	return (NULL);
	1677
	1678	/*
	1679	* Allocate the new filedesc and fd_files[] array. This can race
	1680	* with operations by other threads on the fdp so we have to be
	1681	* careful.
	1682	*/
	1683	newfdp = kmalloc(sizeof(struct filedesc), M_FILEDESC, M_WAITOK \| M_ZERO);
	1684	again:
	1685	spin_lock_rd(&fdp->fd_spin);
	1686	if (fdp->fd_lastfile < NDFILE) {
	1687	newfdp->fd_files = newfdp->fd_builtin_files;
	1688	i = NDFILE;
	1689	} else {
	1690	/*
	1691	* We have to allocate (N^2-1) entries for our in-place
	1692	* binary tree. Allow the table to shrink.
	1693	*/
	1694	i = fdp->fd_nfiles;
	1695	ni = (i - 1) / 2;
	1696	while (ni > fdp->fd_lastfile && ni > NDFILE) {
	1697	i = ni;
	1698	ni = (i - 1) / 2;
	1699	}
	1700	spin_unlock_rd(&fdp->fd_spin);
	1701	newfdp->fd_files = kmalloc(i * sizeof(struct fdnode),
	1702	M_FILEDESC, M_WAITOK \| M_ZERO);
	1703
	1704	/*
	1705	* Check for race, retry
	1706	*/
	1707	spin_lock_rd(&fdp->fd_spin);
	1708	if (i <= fdp->fd_lastfile) {
	1709	spin_unlock_rd(&fdp->fd_spin);
	1710	kfree(newfdp->fd_files, M_FILEDESC);
	1711	goto again;
	1712	}
	1713	}
	1714
	1715	/*
	1716	* Dup the remaining fields. vref() and cache_hold() can be
	1717	* safely called while holding the read spinlock on fdp.
	1718	*
	1719	* The read spinlock on fdp is still being held.
	1720	*
	1721	* NOTE: vref and cache_hold calls for the case where the vnode
	1722	* or cache entry already has at least one ref may be called
	1723	* while holding spin locks.
	1724	*/
	1725	if ((newfdp->fd_cdir = fdp->fd_cdir) != NULL) {
	1726	vref(newfdp->fd_cdir);
	1727	cache_copy(&fdp->fd_ncdir, &newfdp->fd_ncdir);
	1728	}
	1729	/*
	1730	* We must check for fd_rdir here, at least for now because
	1731	* the init process is created before we have access to the
	1732	* rootvode to take a reference to it.
	1733	*/
	1734	if ((newfdp->fd_rdir = fdp->fd_rdir) != NULL) {
	1735	vref(newfdp->fd_rdir);
	1736	cache_copy(&fdp->fd_nrdir, &newfdp->fd_nrdir);
	1737	}
	1738	if ((newfdp->fd_jdir = fdp->fd_jdir) != NULL) {
	1739	vref(newfdp->fd_jdir);
	1740	cache_copy(&fdp->fd_njdir, &newfdp->fd_njdir);
	1741	}
	1742	newfdp->fd_refcnt = 1;
	1743	newfdp->fd_nfiles = i;
	1744	newfdp->fd_lastfile = fdp->fd_lastfile;
	1745	newfdp->fd_freefile = fdp->fd_freefile;
	1746	newfdp->fd_cmask = fdp->fd_cmask;
	1747	newfdp->fd_knlist = NULL;
	1748	newfdp->fd_knlistsize = -1;
	1749	newfdp->fd_knhash = NULL;
	1750	newfdp->fd_knhashmask = 0;
	1751	spin_init(&newfdp->fd_spin);
	1752
	1753	/*
	1754	* Copy the descriptor table through (i). This also copies the
	1755	* allocation state. Then go through and ref the file pointers
	1756	* and clean up any KQ descriptors.
	1757	*
	1758	* kq descriptors cannot be copied. Since we haven't ref'd the
	1759	* copied files yet we can ignore the return value from funsetfd().
	1760	*
	1761	* The read spinlock on fdp is still being held.
	1762	*/
	1763	bcopy(fdp->fd_files, newfdp->fd_files, i * sizeof(struct fdnode));
	1764	for (i = 0 ; i < newfdp->fd_nfiles; ++i) {
	1765	fdnode = &newfdp->fd_files[i];
	1766	if (fdnode->reserved) {
	1767	fdreserve_locked(newfdp, i, -1);
	1768	fdnode->reserved = 0;
	1769	fdfixup_locked(newfdp, i);
	1770	} else if (fdnode->fp) {
	1771	if (fdnode->fp->f_type == DTYPE_KQUEUE) {
	1772	(void)funsetfd_locked(newfdp, i);
	1773	} else {
	1774	fhold(fdnode->fp);
	1775	}
	1776	}
	1777	}
	1778	spin_unlock_rd(&fdp->fd_spin);
	1779	return (newfdp);
	1780	}
	1781
	1782	/*
	1783	* Release a filedesc structure.
	1784	*
	1785	* NOT MPSAFE (MPSAFE for refs > 1, but the final cleanup code is not MPSAFE)
	1786	*/
	1787	void
	1788	fdfree(struct proc *p)
	1789	{
	1790	struct filedesc *fdp = p->p_fd;
	1791	struct fdnode *fdnode;
	1792	int i;
	1793	struct filedesc_to_leader *fdtol;
	1794	struct file *fp;
	1795	struct vnode *vp;
	1796	struct flock lf;
	1797
	1798	/* Certain daemons might not have file descriptors. */
	1799	if (fdp == NULL)
	1800	return;
	1801
	1802	/*
	1803	* Severe messing around to follow
	1804	*/
	1805	spin_lock_wr(&fdp->fd_spin);
	1806
	1807	/* Check for special need to clear POSIX style locks */
	1808	fdtol = p->p_fdtol;
	1809	if (fdtol != NULL) {
	1810	KASSERT(fdtol->fdl_refcount > 0,
	1811	("filedesc_to_refcount botch: fdl_refcount=%d",
	1812	fdtol->fdl_refcount));
	1813	if (fdtol->fdl_refcount == 1 &&
	1814	(p->p_leader->p_flag & P_ADVLOCK) != 0) {
	1815	for (i = 0; i <= fdp->fd_lastfile; ++i) {
	1816	fdnode = &fdp->fd_files[i];
	1817	if (fdnode->fp == NULL \|\|
	1818	fdnode->fp->f_type != DTYPE_VNODE) {
	1819	continue;
	1820	}
	1821	fp = fdnode->fp;
	1822	fhold(fp);
	1823	spin_unlock_wr(&fdp->fd_spin);
	1824
	1825	lf.l_whence = SEEK_SET;
	1826	lf.l_start = 0;
	1827	lf.l_len = 0;
	1828	lf.l_type = F_UNLCK;
	1829	vp = (struct vnode *)fp->f_data;
	1830	(void) VOP_ADVLOCK(vp,
	1831	(caddr_t)p->p_leader,
	1832	F_UNLCK,
	1833	&lf,
	1834	F_POSIX);
	1835	fdrop(fp);
	1836	spin_lock_wr(&fdp->fd_spin);
	1837	}
	1838	}
	1839	retry:
	1840	if (fdtol->fdl_refcount == 1) {
	1841	if (fdp->fd_holdleaderscount > 0 &&
	1842	(p->p_leader->p_flag & P_ADVLOCK) != 0) {
	1843	/*
	1844	* close() or do_dup() has cleared a reference
	1845	* in a shared file descriptor table.
	1846	*/
	1847	fdp->fd_holdleaderswakeup = 1;
	1848	msleep(&fdp->fd_holdleaderscount,
	1849	&fdp->fd_spin, 0, "fdlhold", 0);
	1850	goto retry;
	1851	}
	1852	if (fdtol->fdl_holdcount > 0) {
	1853	/*
	1854	* Ensure that fdtol->fdl_leader
	1855	* remains valid in closef().
	1856	*/
	1857	fdtol->fdl_wakeup = 1;
	1858	msleep(fdtol, &fdp->fd_spin, 0, "fdlhold", 0);
	1859	goto retry;
	1860	}
	1861	}
	1862	fdtol->fdl_refcount--;
	1863	if (fdtol->fdl_refcount == 0 &&
	1864	fdtol->fdl_holdcount == 0) {
	1865	fdtol->fdl_next->fdl_prev = fdtol->fdl_prev;
	1866	fdtol->fdl_prev->fdl_next = fdtol->fdl_next;
	1867	} else {
	1868	fdtol = NULL;
	1869	}
	1870	p->p_fdtol = NULL;
	1871	if (fdtol != NULL) {
	1872	spin_unlock_wr(&fdp->fd_spin);
	1873	kfree(fdtol, M_FILEDESC_TO_LEADER);
	1874	spin_lock_wr(&fdp->fd_spin);
	1875	}
	1876	}
	1877	if (--fdp->fd_refcnt > 0) {
	1878	spin_unlock_wr(&fdp->fd_spin);
	1879	return;
	1880	}
	1881	spin_unlock_wr(&fdp->fd_spin);
	1882
	1883	/*
	1884	* we are the last reference to the structure, we can
	1885	* safely assume it will not change out from under us.
	1886	*/
	1887	for (i = 0; i <= fdp->fd_lastfile; ++i) {
	1888	if (fdp->fd_files[i].fp)
	1889	closef(fdp->fd_files[i].fp, p);
	1890	}
	1891	if (fdp->fd_files != fdp->fd_builtin_files)
	1892	kfree(fdp->fd_files, M_FILEDESC);
	1893	if (fdp->fd_cdir) {
	1894	cache_drop(&fdp->fd_ncdir);
	1895	vrele(fdp->fd_cdir);
	1896	}
	1897	if (fdp->fd_rdir) {
	1898	cache_drop(&fdp->fd_nrdir);
	1899	vrele(fdp->fd_rdir);
	1900	}
	1901	if (fdp->fd_jdir) {
	1902	cache_drop(&fdp->fd_njdir);
	1903	vrele(fdp->fd_jdir);
	1904	}
	1905	if (fdp->fd_knlist)
	1906	kfree(fdp->fd_knlist, M_KQUEUE);
	1907	if (fdp->fd_knhash)
	1908	kfree(fdp->fd_knhash, M_KQUEUE);
	1909	kfree(fdp, M_FILEDESC);
	1910	}
	1911
	1912	/*
	1913	* Retrieve and reference the file pointer associated with a descriptor.
	1914	*
	1915	* MPSAFE
	1916	*/
	1917	struct file *
	1918	holdfp(struct filedesc *fdp, int fd, int flag)
	1919	{
	1920	struct file* fp;
	1921
	1922	spin_lock_rd(&fdp->fd_spin);
	1923	if (((u_int)fd) >= fdp->fd_nfiles) {
	1924	fp = NULL;
	1925	goto done;
	1926	}
	1927	if ((fp = fdp->fd_files[fd].fp) == NULL)
	1928	goto done;
	1929	if ((fp->f_flag & flag) == 0 && flag != -1) {
	1930	fp = NULL;
	1931	goto done;
	1932	}
	1933	fhold(fp);
	1934	done:
	1935	spin_unlock_rd(&fdp->fd_spin);
	1936	return (fp);
	1937	}
	1938
	1939	/*
	1940	* holdsock() - load the struct file pointer associated
	1941	* with a socket into *fpp. If an error occurs, non-zero
	1942	* will be returned and *fpp will be set to NULL.
	1943	*
	1944	* MPSAFE
	1945	*/
	1946	int
	1947	holdsock(struct filedesc fdp, int fd, struct file *fpp)
	1948	{
	1949	struct file *fp;
	1950	int error;
	1951
	1952	spin_lock_rd(&fdp->fd_spin);
	1953	if ((unsigned)fd >= fdp->fd_nfiles) {
	1954	error = EBADF;
	1955	fp = NULL;
	1956	goto done;
	1957	}
	1958	if ((fp = fdp->fd_files[fd].fp) == NULL) {
	1959	error = EBADF;
	1960	goto done;
	1961	}
	1962	if (fp->f_type != DTYPE_SOCKET) {
	1963	error = ENOTSOCK;
	1964	goto done;
	1965	}
	1966	fhold(fp);
	1967	error = 0;
	1968	done:
	1969	spin_unlock_rd(&fdp->fd_spin);
	1970	*fpp = fp;
	1971	return (error);
	1972	}
	1973
	1974	/*
	1975	* Convert a user file descriptor to a held file pointer.
	1976	*
	1977	* MPSAFE
	1978	*/
	1979	int
	1980	holdvnode(struct filedesc fdp, int fd, struct file *fpp)
	1981	{
	1982	struct file *fp;
	1983	int error;
	1984
	1985	spin_lock_rd(&fdp->fd_spin);
	1986	if ((unsigned)fd >= fdp->fd_nfiles) {
	1987	error = EBADF;
	1988	fp = NULL;
	1989	goto done;
	1990	}
	1991	if ((fp = fdp->fd_files[fd].fp) == NULL) {
	1992	error = EBADF;
	1993	goto done;
	1994	}
	1995	if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) {
	1996	fp = NULL;
	1997	error = EINVAL;
	1998	goto done;
	1999	}
	2000	fhold(fp);
	2001	error = 0;
	2002	done:
	2003	spin_unlock_rd(&fdp->fd_spin);
	2004	*fpp = fp;
	2005	return (error);
	2006	}
	2007
	2008	/*
	2009	* For setugid programs, we don't want to people to use that setugidness
	2010	* to generate error messages which write to a file which otherwise would
	2011	* otherwise be off-limits to the process.
	2012	*
	2013	* This is a gross hack to plug the hole. A better solution would involve
	2014	* a special vop or other form of generalized access control mechanism. We
	2015	* go ahead and just reject all procfs file systems accesses as dangerous.
	2016	*
	2017	* Since setugidsafety calls this only for fd 0, 1 and 2, this check is
	2018	* sufficient. We also don't for check setugidness since we know we are.
	2019	*/
	2020	static int
	2021	is_unsafe(struct file *fp)
	2022	{
	2023	if (fp->f_type == DTYPE_VNODE &&
	2024	((struct vnode *)(fp->f_data))->v_tag == VT_PROCFS)
	2025	return (1);
	2026	return (0);
	2027	}
	2028
	2029	/*
	2030	* Make this setguid thing safe, if at all possible.
	2031	*
	2032	* NOT MPSAFE - scans fdp without spinlocks, calls knote_fdclose()
	2033	*/
	2034	void
	2035	setugidsafety(struct proc *p)
	2036	{
	2037	struct filedesc *fdp = p->p_fd;
	2038	int i;
	2039
	2040	/* Certain daemons might not have file descriptors. */
	2041	if (fdp == NULL)
	2042	return;
	2043
	2044	/*
	2045	* note: fdp->fd_files may be reallocated out from under us while
	2046	* we are blocked in a close. Be careful!
	2047	*/
	2048	for (i = 0; i <= fdp->fd_lastfile; i++) {
	2049	if (i > 2)
	2050	break;
	2051	if (fdp->fd_files[i].fp && is_unsafe(fdp->fd_files[i].fp)) {
	2052	struct file *fp;
	2053
	2054	if (i < fdp->fd_knlistsize)
	2055	knote_fdclose(p, i);
	2056	/*
	2057	* NULL-out descriptor prior to close to avoid
	2058	* a race while close blocks.
	2059	*/
	2060	if ((fp = funsetfd_locked(fdp, i)) != NULL)
	2061	closef(fp, p);
	2062	}
	2063	}
	2064	}
	2065
	2066	/*
	2067	* Close any files on exec?
	2068	*
	2069	* NOT MPSAFE - scans fdp without spinlocks, calls knote_fdclose()
	2070	*/
	2071	void
	2072	fdcloseexec(struct proc *p)
	2073	{
	2074	struct filedesc *fdp = p->p_fd;
	2075	int i;
	2076
	2077	/* Certain daemons might not have file descriptors. */
	2078	if (fdp == NULL)
	2079	return;
	2080
	2081	/*
	2082	* We cannot cache fd_files since operations may block and rip
	2083	* them out from under us.
	2084	*/
	2085	for (i = 0; i <= fdp->fd_lastfile; i++) {
	2086	if (fdp->fd_files[i].fp != NULL &&
	2087	(fdp->fd_files[i].fileflags & UF_EXCLOSE)) {
	2088	struct file *fp;
	2089
	2090	if (i < fdp->fd_knlistsize)
	2091	knote_fdclose(p, i);
	2092	/*
	2093	* NULL-out descriptor prior to close to avoid
	2094	* a race while close blocks.
	2095	*/
	2096	if ((fp = funsetfd_locked(fdp, i)) != NULL)
	2097	closef(fp, p);
	2098	}
	2099	}
	2100	}
	2101
	2102	/*
	2103	* It is unsafe for set[ug]id processes to be started with file
	2104	* descriptors 0..2 closed, as these descriptors are given implicit
	2105	* significance in the Standard C library. fdcheckstd() will create a
	2106	* descriptor referencing /dev/null for each of stdin, stdout, and
	2107	* stderr that is not already open.
	2108	*
	2109	* NOT MPSAFE - calls falloc, vn_open, etc
	2110	*/
	2111	int
	2112	fdcheckstd(struct proc *p)
	2113	{
	2114	struct nlookupdata nd;
	2115	struct filedesc *fdp;
	2116	struct file *fp;
	2117	register_t retval;
	2118	int i, error, flags, devnull;
	2119
	2120	fdp = p->p_fd;
	2121	if (fdp == NULL)
	2122	return (0);
	2123	devnull = -1;
	2124	error = 0;
	2125	for (i = 0; i < 3; i++) {
	2126	if (fdp->fd_files[i].fp != NULL)
	2127	continue;
	2128	if (devnull < 0) {
	2129	if ((error = falloc(p, &fp, &devnull)) != 0)
	2130	break;
	2131
	2132	error = nlookup_init(&nd, "/dev/null", UIO_SYSSPACE,
	2133	NLC_FOLLOW\|NLC_LOCKVP);
	2134	flags = FREAD \| FWRITE;
	2135	if (error == 0)
	2136	error = vn_open(&nd, fp, flags, 0);
	2137	if (error == 0)
	2138	fsetfd(p, fp, devnull);
	2139	else
	2140	fsetfd(p, NULL, devnull);
	2141	fdrop(fp);
	2142	nlookup_done(&nd);
	2143	if (error)
	2144	break;
	2145	KKASSERT(i == devnull);
	2146	} else {
	2147	error = kern_dup(DUP_FIXED, devnull, i, &retval);
	2148	if (error != 0)
	2149	break;
	2150	}
	2151	}
	2152	return (error);
	2153	}
	2154
	2155	/*
	2156	* Internal form of close.
	2157	* Decrement reference count on file structure.
	2158	* Note: td and/or p may be NULL when closing a file
	2159	* that was being passed in a message.
	2160	*
	2161	* MPALMOSTSAFE - acquires mplock for VOP operations
	2162	*/
	2163	int
	2164	closef(struct file fp, struct proc p)
	2165	{
	2166	struct vnode *vp;
	2167	struct flock lf;
	2168	struct filedesc_to_leader *fdtol;
	2169
	2170	if (fp == NULL)
	2171	return (0);
	2172
	2173	/*
	2174	* POSIX record locking dictates that any close releases ALL
	2175	* locks owned by this process. This is handled by setting
	2176	* a flag in the unlock to free ONLY locks obeying POSIX
	2177	* semantics, and not to free BSD-style file locks.
	2178	* If the descriptor was in a message, POSIX-style locks
	2179	* aren't passed with the descriptor.
	2180	*/
	2181	if (p != NULL && fp->f_type == DTYPE_VNODE &&
	2182	(((struct vnode *)fp->f_data)->v_flag & VMAYHAVELOCKS)
	2183	) {
	2184	get_mplock();
	2185	if ((p->p_leader->p_flag & P_ADVLOCK) != 0) {
	2186	lf.l_whence = SEEK_SET;
	2187	lf.l_start = 0;
	2188	lf.l_len = 0;
	2189	lf.l_type = F_UNLCK;
	2190	vp = (struct vnode *)fp->f_data;
	2191	(void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK,
	2192	&lf, F_POSIX);
	2193	}
	2194	fdtol = p->p_fdtol;
	2195	if (fdtol != NULL) {
	2196	/*
	2197	* Handle special case where file descriptor table
	2198	* is shared between multiple process leaders.
	2199	*/
	2200	for (fdtol = fdtol->fdl_next;
	2201	fdtol != p->p_fdtol;
	2202	fdtol = fdtol->fdl_next) {
	2203	if ((fdtol->fdl_leader->p_flag &
	2204	P_ADVLOCK) == 0)
	2205	continue;
	2206	fdtol->fdl_holdcount++;
	2207	lf.l_whence = SEEK_SET;
	2208	lf.l_start = 0;
	2209	lf.l_len = 0;
	2210	lf.l_type = F_UNLCK;
	2211	vp = (struct vnode *)fp->f_data;
	2212	(void) VOP_ADVLOCK(vp,
	2213	(caddr_t)fdtol->fdl_leader,
	2214	F_UNLCK, &lf, F_POSIX);
	2215	fdtol->fdl_holdcount--;
	2216	if (fdtol->fdl_holdcount == 0 &&
	2217	fdtol->fdl_wakeup != 0) {
	2218	fdtol->fdl_wakeup = 0;
	2219	wakeup(fdtol);
	2220	}
	2221	}
	2222	}
	2223	rel_mplock();
	2224	}
	2225	return (fdrop(fp));
	2226	}
	2227
	2228	/*
	2229	* MPSAFE
	2230	*
	2231	* fhold() can only be called if f_count is already at least 1 (i.e. the
	2232	* caller of fhold() already has a reference to the file pointer in some
	2233	* manner or other).
	2234	*
	2235	* f_count is not spin-locked. Instead, atomic ops are used for
	2236	* incrementing, decrementing, and handling the 1->0 transition.
	2237	*/
	2238	void
	2239	fhold(struct file *fp)
	2240	{
	2241	atomic_add_int(&fp->f_count, 1);
	2242	}
	2243
	2244	/*
	2245	* fdrop() - drop a reference to a descriptor
	2246	*
	2247	* MPALMOSTSAFE - acquires mplock for final close sequence
	2248	*/
	2249	int
	2250	fdrop(struct file *fp)
	2251	{
	2252	struct flock lf;
	2253	struct vnode *vp;
	2254	int error;
	2255
	2256	/*
	2257	* A combined fetch and subtract is needed to properly detect
	2258	* 1->0 transitions, otherwise two cpus dropping from a ref
	2259	* count of 2 might both try to run the 1->0 code.
	2260	*/
	2261	if (atomic_fetchadd_int(&fp->f_count, -1) > 1)
	2262	return (0);
	2263
	2264	get_mplock();
	2265
	2266	/*
	2267	* The last reference has gone away, we own the fp structure free
	2268	* and clear.
	2269	*/
	2270	if (fp->f_count < 0)
	2271	panic("fdrop: count < 0");
	2272	if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE &&
	2273	(((struct vnode *)fp->f_data)->v_flag & VMAYHAVELOCKS)
	2274	) {
	2275	lf.l_whence = SEEK_SET;
	2276	lf.l_start = 0;
	2277	lf.l_len = 0;
	2278	lf.l_type = F_UNLCK;
	2279	vp = (struct vnode *)fp->f_data;
	2280	(void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, 0);
	2281	}
	2282	if (fp->f_ops != &badfileops)
	2283	error = fo_close(fp);
	2284	else
	2285	error = 0;
	2286	ffree(fp);
	2287	rel_mplock();
	2288	return (error);
	2289	}
	2290
	2291	/*
	2292	* Apply an advisory lock on a file descriptor.
	2293	*
	2294	* Just attempt to get a record lock of the requested type on
	2295	* the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
	2296	*/
	2297	int
	2298	sys_flock(struct flock_args *uap)
	2299	{
	2300	struct proc *p = curproc;
	2301	struct file *fp;
	2302	struct vnode *vp;
	2303	struct flock lf;
	2304	int error;
	2305
	2306	if ((fp = holdfp(p->p_fd, uap->fd, -1)) == NULL)
	2307	return (EBADF);
	2308	if (fp->f_type != DTYPE_VNODE) {
	2309	error = EOPNOTSUPP;
	2310	goto done;
	2311	}
	2312	vp = (struct vnode *)fp->f_data;
	2313	lf.l_whence = SEEK_SET;
	2314	lf.l_start = 0;
	2315	lf.l_len = 0;
	2316	if (uap->how & LOCK_UN) {
	2317	lf.l_type = F_UNLCK;
	2318	fp->f_flag &= ~FHASLOCK;
	2319	error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, 0);
	2320	goto done;
	2321	}
	2322	if (uap->how & LOCK_EX)
	2323	lf.l_type = F_WRLCK;
	2324	else if (uap->how & LOCK_SH)
	2325	lf.l_type = F_RDLCK;
	2326	else {
	2327	error = EBADF;
	2328	goto done;
	2329	}
	2330	fp->f_flag \|= FHASLOCK;
	2331	if (uap->how & LOCK_NB)
	2332	error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, 0);
	2333	else
	2334	error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_WAIT);
	2335	done:
	2336	fdrop(fp);
	2337	return (error);
	2338	}
	2339
	2340	/*
	2341	* File Descriptor pseudo-device driver (/dev/fd/).
	2342	*
	2343	* Opening minor device N dup()s the file (if any) connected to file
	2344	* descriptor N belonging to the calling process. Note that this driver
	2345	* consists of only the ``open()'' routine, because all subsequent
	2346	* references to this file will be direct to the other driver.
	2347	*/
	2348	/* ARGSUSED */
	2349	static int
	2350	fdopen(struct dev_open_args *ap)
	2351	{
	2352	thread_t td = curthread;
	2353
	2354	KKASSERT(td->td_lwp != NULL);
	2355
	2356	/*
	2357	* XXX Kludge: set curlwp->lwp_dupfd to contain the value of the
	2358	* the file descriptor being sought for duplication. The error
	2359	* return ensures that the vnode for this device will be released
	2360	* by vn_open. Open will detect this special error and take the
	2361	* actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
	2362	* will simply report the error.
	2363	*/
	2364	td->td_lwp->lwp_dupfd = minor(ap->a_head.a_dev);
	2365	return (ENODEV);
	2366	}
	2367
	2368	/*
	2369	* The caller has reserved the file descriptor dfd for us. On success we
	2370	* must fsetfd() it. On failure the caller will clean it up.
	2371	*
	2372	* NOT MPSAFE - isn't getting spinlocks, possibly other things
	2373	*/
	2374	int
	2375	dupfdopen(struct proc *p, int dfd, int sfd, int mode, int error)
	2376	{
	2377	struct filedesc *fdp = p->p_fd;
	2378	struct file *wfp;
	2379	struct file *xfp;
	2380	int werror;
	2381
	2382	if ((wfp = holdfp(fdp, sfd, -1)) == NULL)
	2383	return (EBADF);
	2384
	2385	/*
	2386	* Close a revoke/dup race. Duping a descriptor marked as revoked
	2387	* will dup a dummy descriptor instead of the real one.
	2388	*/
	2389	if (wfp->f_flag & FREVOKED) {
	2390	kprintf("Warning: attempt to dup() a revoked descriptor\n");
	2391	fdrop(wfp);
	2392	wfp = NULL;
	2393	werror = falloc(NULL, &wfp, NULL);
	2394	if (werror)
	2395	return (werror);
	2396	}
	2397
	2398	/*
	2399	* There are two cases of interest here.
	2400	*
	2401	* For ENODEV simply dup sfd to file descriptor dfd and return.
	2402	*
	2403	* For ENXIO steal away the file structure from sfd and store it
	2404	* dfd. sfd is effectively closed by this operation.
	2405	*
	2406	* Any other error code is just returned.
	2407	*/
	2408	switch (error) {
	2409	case ENODEV:
	2410	/*
	2411	* Check that the mode the file is being opened for is a
	2412	* subset of the mode of the existing descriptor.
	2413	*/
	2414	if (((mode & (FREAD\|FWRITE)) \| wfp->f_flag) != wfp->f_flag) {
	2415	error = EACCES;
	2416	break;
	2417	}
	2418	fdp->fd_files[dfd].fileflags = fdp->fd_files[sfd].fileflags;
	2419	fsetfd(p, wfp, dfd);
	2420	error = 0;
	2421	break;
	2422	case ENXIO:
	2423	/*
	2424	* Steal away the file pointer from dfd, and stuff it into indx.
	2425	*/
	2426	fdp->fd_files[dfd].fileflags = fdp->fd_files[sfd].fileflags;
	2427	fsetfd(p, wfp, dfd);
	2428	if ((xfp = funsetfd_locked(fdp, sfd)) != NULL)
	2429	fdrop(xfp);
	2430	error = 0;
	2431	break;
	2432	default:
	2433	break;
	2434	}
	2435	fdrop(wfp);
	2436	return (error);
	2437	}
	2438
	2439	/*
	2440	* NOT MPSAFE - I think these refer to a common file descriptor table
	2441	* and we need to spinlock that to link fdtol in.
	2442	*/
	2443	struct filedesc_to_leader *
	2444	filedesc_to_leader_alloc(struct filedesc_to_leader *old,
	2445	struct proc *leader)
	2446	{
	2447	struct filedesc_to_leader *fdtol;
	2448
	2449	fdtol = kmalloc(sizeof(struct filedesc_to_leader),
	2450	M_FILEDESC_TO_LEADER, M_WAITOK);
	2451	fdtol->fdl_refcount = 1;
	2452	fdtol->fdl_holdcount = 0;
	2453	fdtol->fdl_wakeup = 0;
	2454	fdtol->fdl_leader = leader;
	2455	if (old != NULL) {
	2456	fdtol->fdl_next = old->fdl_next;
	2457	fdtol->fdl_prev = old;
	2458	old->fdl_next = fdtol;
	2459	fdtol->fdl_next->fdl_prev = fdtol;
	2460	} else {
	2461	fdtol->fdl_next = fdtol;
	2462	fdtol->fdl_prev = fdtol;
	2463	}
	2464	return fdtol;
	2465	}
	2466
	2467	/*
	2468	* Scan all file pointers in the system. The callback is made with
	2469	* the master list spinlock held exclusively.
	2470	*
	2471	* MPSAFE
	2472	*/
	2473	void
	2474	allfiles_scan_exclusive(int (callback)(struct file , void ), void data)
	2475	{
	2476	struct file *fp;
	2477	int res;
	2478
	2479	spin_lock_wr(&filehead_spin);
	2480	LIST_FOREACH(fp, &filehead, f_list) {
	2481	res = callback(fp, data);
	2482	if (res < 0)
	2483	break;
	2484	}
	2485	spin_unlock_wr(&filehead_spin);
	2486	}
	2487
	2488	/*
	2489	* Get file structures.
	2490	*
	2491	* NOT MPSAFE - process list scan, SYSCTL_OUT (probably not mpsafe)
	2492	*/
	2493
	2494	struct sysctl_kern_file_info {
	2495	int count;
	2496	int error;
	2497	struct sysctl_req *req;
	2498	};
	2499
	2500	static int sysctl_kern_file_callback(struct proc p, void data);
	2501
	2502	static int
	2503	sysctl_kern_file(SYSCTL_HANDLER_ARGS)
	2504	{
	2505	struct sysctl_kern_file_info info;
	2506
	2507	/*
	2508	* Note: because the number of file descriptors is calculated
	2509	* in different ways for sizing vs returning the data,
	2510	* there is information leakage from the first loop. However,
	2511	* it is of a similar order of magnitude to the leakage from
	2512	* global system statistics such as kern.openfiles.
	2513	*
	2514	* When just doing a count, note that we cannot just count
	2515	* the elements and add f_count via the filehead list because
	2516	* threaded processes share their descriptor table and f_count might
	2517	* still be '1' in that case.
	2518	*
	2519	* Since the SYSCTL op can block, we must hold the process to
	2520	* prevent it being ripped out from under us either in the
	2521	* file descriptor loop or in the greater LIST_FOREACH. The
	2522	* process may be in varying states of disrepair. If the process
	2523	* is in SZOMB we may have caught it just as it is being removed
	2524	* from the allproc list, we must skip it in that case to maintain
	2525	* an unbroken chain through the allproc list.
	2526	*/
	2527	info.count = 0;
	2528	info.error = 0;
	2529	info.req = req;
	2530	allproc_scan(sysctl_kern_file_callback, &info);
	2531
	2532	/*
	2533	* When just calculating the size, overestimate a bit to try to
	2534	* prevent system activity from causing the buffer-fill call
	2535	* to fail later on.
	2536	*/
	2537	if (req->oldptr == NULL) {
	2538	info.count = (info.count + 16) + (info.count / 10);
	2539	info.error = SYSCTL_OUT(req, NULL,
	2540	info.count * sizeof(struct kinfo_file));
	2541	}
	2542	return (info.error);
	2543	}
	2544
	2545	static int
	2546	sysctl_kern_file_callback(struct proc p, void data)
	2547	{
	2548	struct sysctl_kern_file_info *info = data;
	2549	struct kinfo_file kf;
	2550	struct filedesc *fdp;
	2551	struct file *fp;
	2552	uid_t uid;
	2553	int n;
	2554
	2555	if (p->p_stat == SIDL \|\| p->p_stat == SZOMB)
	2556	return(0);
	2557	if (!PRISON_CHECK(info->req->td->td_proc->p_ucred, p->p_ucred) != 0)
	2558	return(0);
	2559	if ((fdp = p->p_fd) == NULL)
	2560	return(0);
	2561	spin_lock_rd(&fdp->fd_spin);
	2562	for (n = 0; n < fdp->fd_nfiles; ++n) {
	2563	if ((fp = fdp->fd_files[n].fp) == NULL)
	2564	continue;
	2565	if (info->req->oldptr == NULL) {
	2566	++info->count;
	2567	} else {
	2568	uid = p->p_ucred ? p->p_ucred->cr_uid : -1;
	2569	kcore_make_file(&kf, fp, p->p_pid, uid, n);
	2570	spin_unlock_rd(&fdp->fd_spin);
	2571	info->error = SYSCTL_OUT(info->req, &kf, sizeof(kf));
	2572	spin_lock_rd(&fdp->fd_spin);
	2573	if (info->error)
	2574	break;
	2575	}
	2576	}
	2577	spin_unlock_rd(&fdp->fd_spin);
	2578	if (info->error)
	2579	return(-1);
	2580	return(0);
	2581	}
	2582
	2583	SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE\|CTLFLAG_RD,
	2584	0, 0, sysctl_kern_file, "S,file", "Entire file table");
	2585
	2586	SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW,
	2587	&maxfilesperproc, 0, "Maximum files allowed open per process");
	2588
	2589	SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW,
	2590	&maxfiles, 0, "Maximum number of files");
	2591
	2592	SYSCTL_INT(_kern, OID_AUTO, maxfilesrootres, CTLFLAG_RW,
	2593	&maxfilesrootres, 0, "Descriptors reserved for root use");
	2594
	2595	SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD,
	2596	&nfiles, 0, "System-wide number of open files");
	2597
	2598	static void
	2599	fildesc_drvinit(void *unused)
	2600	{
	2601	int fd;
	2602
	2603	dev_ops_add(&fildesc_ops, 0, 0);
	2604	for (fd = 0; fd < NUMFDESC; fd++) {
	2605	make_dev(&fildesc_ops, fd,
	2606	UID_BIN, GID_BIN, 0666, "fd/%d", fd);
	2607	}
	2608	make_dev(&fildesc_ops, 0, UID_ROOT, GID_WHEEL, 0666, "stdin");
	2609	make_dev(&fildesc_ops, 1, UID_ROOT, GID_WHEEL, 0666, "stdout");
	2610	make_dev(&fildesc_ops, 2, UID_ROOT, GID_WHEEL, 0666, "stderr");
	2611	}
	2612
	2613	/*
	2614	* MPSAFE
	2615	*/
	2616	struct fileops badfileops = {
	2617	.fo_read = badfo_readwrite,
	2618	.fo_write = badfo_readwrite,
	2619	.fo_ioctl = badfo_ioctl,
	2620	.fo_poll = badfo_poll,
	2621	.fo_kqfilter = badfo_kqfilter,
	2622	.fo_stat = badfo_stat,
	2623	.fo_close = badfo_close,
	2624	.fo_shutdown = badfo_shutdown
	2625	};
	2626
	2627	/*
	2628	* MPSAFE
	2629	*/
	2630	static int
	2631	badfo_readwrite(
	2632	struct file *fp,
	2633	struct uio *uio,
	2634	struct ucred *cred,
	2635	int flags
	2636	) {
	2637	return (EBADF);
	2638	}
	2639
	2640	/*
	2641	* MPSAFE
	2642	*/
	2643	static int
	2644	badfo_ioctl(struct file fp, u_long com, caddr_t data, struct ucred cred)
	2645	{
	2646	return (EBADF);
	2647	}
	2648
	2649	/*
	2650	* MPSAFE
	2651	*/
	2652	static int
	2653	badfo_poll(struct file fp, int events, struct ucred cred)
	2654	{
	2655	return (0);
	2656	}
	2657
	2658	/*
	2659	* MPSAFE
	2660	*/
	2661	static int
	2662	badfo_kqfilter(struct file fp, struct knote kn)
	2663	{
	2664	return (0);
	2665	}
	2666
	2667	static int
	2668	badfo_stat(struct file fp, struct stat sb, struct ucred *cred)
	2669	{
	2670	return (EBADF);
	2671	}
	2672
	2673	/*
	2674	* MPSAFE
	2675	*/
	2676	static int
	2677	badfo_close(struct file *fp)
	2678	{
	2679	return (EBADF);
	2680	}
	2681
	2682	/*
	2683	* MPSAFE
	2684	*/
	2685	static int
	2686	badfo_shutdown(struct file *fp, int how)
	2687	{
	2688	return (EBADF);
	2689	}
	2690
	2691	/*
	2692	* MPSAFE
	2693	*/
	2694	int
	2695	nofo_shutdown(struct file *fp, int how)
	2696	{
	2697	return (EOPNOTSUPP);
	2698	}
	2699
	2700	SYSINIT(fildescdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,
	2701	fildesc_drvinit,NULL)