2 * Copyright (c) 1982, 1986, 1989, 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * @(#)kern_descrip.c 8.6 (Berkeley) 4/19/94
39 * $FreeBSD: src/sys/kern/kern_descrip.c,v 1.81.2.17 2003/06/06 20:21:32 tegge Exp $
40 * $DragonFly: src/sys/kern/kern_descrip.c,v 1.2 2003/06/17 04:28:41 dillon Exp $
43 #include "opt_compat.h"
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/malloc.h>
47 #include <sys/sysproto.h>
49 #include <sys/filedesc.h>
50 #include <sys/kernel.h>
51 #include <sys/sysctl.h>
52 #include <sys/vnode.h>
54 #include <sys/namei.h>
57 #include <sys/filio.h>
58 #include <sys/fcntl.h>
59 #include <sys/unistd.h>
60 #include <sys/resourcevar.h>
61 #include <sys/event.h>
64 #include <vm/vm_extern.h>
66 static MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table");
67 static MALLOC_DEFINE(M_FILEDESC_TO_LEADER, "file desc to leader",
68 "file desc to leader structures");
69 MALLOC_DEFINE(M_FILE, "file", "Open file structure");
70 static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures");
72 static d_open_t fdopen;
76 static struct cdevsw fildesc_cdevsw = {
84 /* strategy */ nostrategy,
93 static int do_dup __P((struct filedesc *fdp, int old, int new, register_t *retval, struct proc *p));
94 static int badfo_readwrite __P((struct file *fp, struct uio *uio,
95 struct ucred *cred, int flags, struct proc *p));
96 static int badfo_ioctl __P((struct file *fp, u_long com, caddr_t data,
98 static int badfo_poll __P((struct file *fp, int events,
99 struct ucred *cred, struct proc *p));
100 static int badfo_kqfilter __P((struct file *fp, struct knote *kn));
101 static int badfo_stat __P((struct file *fp, struct stat *sb, struct proc *p));
102 static int badfo_close __P((struct file *fp, struct proc *p));
105 * Descriptor management.
107 struct filelist filehead; /* head of list of open files */
108 int nfiles; /* actual number of open files */
112 * System calls on descriptors.
114 #ifndef _SYS_SYSPROTO_H_
115 struct getdtablesize_args {
121 getdtablesize(p, uap)
123 struct getdtablesize_args *uap;
127 min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
132 * Duplicate a file descriptor to a particular value.
134 * note: keep in mind that a potential race condition exists when closing
135 * descriptors from a shared descriptor table (via rfork).
137 #ifndef _SYS_SYSPROTO_H_
147 struct dup2_args *uap;
149 register struct filedesc *fdp = p->p_fd;
150 register u_int old = uap->from, new = uap->to;
154 if (old >= fdp->fd_nfiles ||
155 fdp->fd_ofiles[old] == NULL ||
156 new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
157 new >= maxfilesperproc) {
161 p->p_retval[0] = new;
164 if (new >= fdp->fd_nfiles) {
165 if ((error = fdalloc(p, new, &i)))
168 * fdalloc() may block, retest everything.
172 return (do_dup(fdp, (int)old, (int)new, p->p_retval, p));
176 * Duplicate a file descriptor.
178 #ifndef _SYS_SYSPROTO_H_
187 struct dup_args *uap;
189 register struct filedesc *fdp;
195 if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL)
197 if ((error = fdalloc(p, 0, &new)))
199 return (do_dup(fdp, (int)old, new, p->p_retval, p));
203 * The file control system call.
205 #ifndef _SYS_SYSPROTO_H_
216 register struct fcntl_args *uap;
218 register struct filedesc *fdp = p->p_fd;
219 register struct file *fp;
222 int i, tmp, error, flg = F_POSIX;
226 if ((unsigned)uap->fd >= fdp->fd_nfiles ||
227 (fp = fdp->fd_ofiles[uap->fd]) == NULL)
229 pop = &fdp->fd_ofileflags[uap->fd];
234 if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
235 newmin >= maxfilesperproc)
237 if ((error = fdalloc(p, newmin, &i)))
239 return (do_dup(fdp, uap->fd, i, p->p_retval, p));
242 p->p_retval[0] = (*pop & UF_EXCLOSE) ? FD_CLOEXEC : 0;
246 *pop = (*pop &~ UF_EXCLOSE) |
247 (uap->arg & FD_CLOEXEC ? UF_EXCLOSE : 0);
251 p->p_retval[0] = OFLAGS(fp->f_flag);
256 fp->f_flag &= ~FCNTLFLAGS;
257 fp->f_flag |= FFLAGS(uap->arg & ~O_ACCMODE) & FCNTLFLAGS;
258 tmp = fp->f_flag & FNONBLOCK;
259 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, p);
264 tmp = fp->f_flag & FASYNC;
265 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, p);
270 fp->f_flag &= ~FNONBLOCK;
272 (void)fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, p);
278 error = fo_ioctl(fp, FIOGETOWN, (caddr_t)p->p_retval, p);
284 error = fo_ioctl(fp, FIOSETOWN, (caddr_t)&uap->arg, p);
290 /* Fall into F_SETLK */
293 if (fp->f_type != DTYPE_VNODE)
295 vp = (struct vnode *)fp->f_data;
298 * copyin/lockop may block
301 /* Copy in the lock structure */
302 error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl,
308 if (fl.l_whence == SEEK_CUR)
309 fl.l_start += fp->f_offset;
313 if ((fp->f_flag & FREAD) == 0) {
317 p->p_leader->p_flag |= P_ADVLOCK;
318 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK,
322 if ((fp->f_flag & FWRITE) == 0) {
326 p->p_leader->p_flag |= P_ADVLOCK;
327 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK,
331 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK,
338 /* Check for race with close */
339 if ((unsigned) uap->fd >= fdp->fd_nfiles ||
340 fp != fdp->fd_ofiles[uap->fd]) {
341 fl.l_whence = SEEK_SET;
345 (void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader,
346 F_UNLCK, &fl, F_POSIX);
352 if (fp->f_type != DTYPE_VNODE)
354 vp = (struct vnode *)fp->f_data;
356 * copyin/lockop may block
359 /* Copy in the lock structure */
360 error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl,
366 if (fl.l_type != F_RDLCK && fl.l_type != F_WRLCK &&
367 fl.l_type != F_UNLCK) {
371 if (fl.l_whence == SEEK_CUR)
372 fl.l_start += fp->f_offset;
373 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK,
377 error = copyout((caddr_t)&fl,
378 (caddr_t)(intptr_t)uap->arg, sizeof(fl));
388 * Common code for dup, dup2, and fcntl(F_DUPFD).
391 do_dup(fdp, old, new, retval, p)
392 register struct filedesc *fdp;
393 register int old, new;
402 * Save info on the descriptor being overwritten. We have
403 * to do the unmap now, but we cannot close it without
404 * introducing an ownership race for the slot.
406 delfp = fdp->fd_ofiles[new];
407 if (delfp != NULL && p->p_fdtol != NULL) {
409 * Ask fdfree() to sleep to ensure that all relevant
410 * process leaders can be traversed in closef().
412 fdp->fd_holdleaderscount++;
417 if (delfp && (fdp->fd_ofileflags[new] & UF_MAPPED))
418 (void) munmapfd(p, new);
422 * Duplicate the source descriptor, update lastfile
424 fp = fdp->fd_ofiles[old];
425 fdp->fd_ofiles[new] = fp;
426 fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
428 if (new > fdp->fd_lastfile)
429 fdp->fd_lastfile = new;
433 * If we dup'd over a valid file, we now own the reference to it
434 * and must dispose of it using closef() semantics (as if a
435 * close() were performed on it).
438 (void) closef(delfp, p);
440 fdp->fd_holdleaderscount--;
441 if (fdp->fd_holdleaderscount == 0 &&
442 fdp->fd_holdleaderswakeup != 0) {
443 fdp->fd_holdleaderswakeup = 0;
444 wakeup(&fdp->fd_holdleaderscount);
452 * If sigio is on the list associated with a process or process group,
453 * disable signalling from the device, remove sigio from the list and
465 *(sigio->sio_myref) = NULL;
467 if (sigio->sio_pgid < 0) {
468 SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio,
470 } else /* if ((*sigiop)->sio_pgid > 0) */ {
471 SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio,
474 crfree(sigio->sio_ucred);
475 FREE(sigio, M_SIGIO);
478 /* Free a list of sigio structures. */
480 funsetownlst(sigiolst)
481 struct sigiolst *sigiolst;
485 while ((sigio = SLIST_FIRST(sigiolst)) != NULL)
490 * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg).
492 * After permission checking, add a sigio structure to the sigio list for
493 * the process or process group.
496 fsetown(pgid, sigiop)
498 struct sigio **sigiop;
515 * Policy - Don't allow a process to FSETOWN a process
516 * in another session.
518 * Remove this test to allow maximum flexibility or
519 * restrict FSETOWN to the current process or process
520 * group for maximum safety.
522 if (proc->p_session != curproc->p_session)
526 } else /* if (pgid < 0) */ {
527 pgrp = pgfind(-pgid);
532 * Policy - Don't allow a process to FSETOWN a process
533 * in another session.
535 * Remove this test to allow maximum flexibility or
536 * restrict FSETOWN to the current process or process
537 * group for maximum safety.
539 if (pgrp->pg_session != curproc->p_session)
545 MALLOC(sigio, struct sigio *, sizeof(struct sigio), M_SIGIO, M_WAITOK);
547 SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio);
548 sigio->sio_proc = proc;
550 SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio);
551 sigio->sio_pgrp = pgrp;
553 sigio->sio_pgid = pgid;
554 crhold(curproc->p_ucred);
555 sigio->sio_ucred = curproc->p_ucred;
556 /* It would be convenient if p_ruid was in ucred. */
557 sigio->sio_ruid = curproc->p_cred->p_ruid;
558 sigio->sio_myref = sigiop;
566 * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg).
572 return (sigio != NULL ? sigio->sio_pgid : 0);
576 * Close a file descriptor.
578 #ifndef _SYS_SYSPROTO_H_
587 struct close_args *uap;
589 register struct filedesc *fdp = p->p_fd;
590 register struct file *fp;
591 register int fd = uap->fd;
595 if ((unsigned)fd >= fdp->fd_nfiles ||
596 (fp = fdp->fd_ofiles[fd]) == NULL)
599 if (fdp->fd_ofileflags[fd] & UF_MAPPED)
600 (void) munmapfd(p, fd);
602 fdp->fd_ofiles[fd] = NULL;
603 fdp->fd_ofileflags[fd] = 0;
605 if (p->p_fdtol != NULL) {
607 * Ask fdfree() to sleep to ensure that all relevant
608 * process leaders can be traversed in closef().
610 fdp->fd_holdleaderscount++;
615 * we now hold the fp reference that used to be owned by the descriptor
618 while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
620 if (fd < fdp->fd_freefile)
621 fdp->fd_freefile = fd;
622 if (fd < fdp->fd_knlistsize)
623 knote_fdclose(p, fd);
624 error = closef(fp, p);
626 fdp->fd_holdleaderscount--;
627 if (fdp->fd_holdleaderscount == 0 &&
628 fdp->fd_holdleaderswakeup != 0) {
629 fdp->fd_holdleaderswakeup = 0;
630 wakeup(&fdp->fd_holdleaderscount);
636 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
638 * Return status information about a file descriptor.
640 #ifndef _SYS_SYSPROTO_H_
650 register struct ofstat_args *uap;
652 register struct filedesc *fdp = p->p_fd;
653 register struct file *fp;
658 if ((unsigned)uap->fd >= fdp->fd_nfiles ||
659 (fp = fdp->fd_ofiles[uap->fd]) == NULL)
662 error = fo_stat(fp, &ub, p);
665 error = copyout((caddr_t)&oub, (caddr_t)uap->sb, sizeof (oub));
670 #endif /* COMPAT_43 || COMPAT_SUNOS */
673 * Return status information about a file descriptor.
675 #ifndef _SYS_SYSPROTO_H_
685 register struct fstat_args *uap;
687 register struct filedesc *fdp = p->p_fd;
688 register struct file *fp;
692 if ((unsigned)uap->fd >= fdp->fd_nfiles ||
693 (fp = fdp->fd_ofiles[uap->fd]) == NULL)
696 error = fo_stat(fp, &ub, p);
698 error = copyout((caddr_t)&ub, (caddr_t)uap->sb, sizeof (ub));
704 * Return status information about a file descriptor.
706 #ifndef _SYS_SYSPROTO_H_
716 register struct nfstat_args *uap;
718 register struct filedesc *fdp = p->p_fd;
719 register struct file *fp;
724 if ((unsigned)uap->fd >= fdp->fd_nfiles ||
725 (fp = fdp->fd_ofiles[uap->fd]) == NULL)
728 error = fo_stat(fp, &ub, p);
731 error = copyout((caddr_t)&nub, (caddr_t)uap->sb, sizeof (nub));
738 * Return pathconf information about a file descriptor.
740 #ifndef _SYS_SYSPROTO_H_
741 struct fpathconf_args {
750 register struct fpathconf_args *uap;
752 struct filedesc *fdp = p->p_fd;
757 if ((unsigned)uap->fd >= fdp->fd_nfiles ||
758 (fp = fdp->fd_ofiles[uap->fd]) == NULL)
763 switch (fp->f_type) {
766 if (uap->name != _PC_PIPE_BUF) {
769 p->p_retval[0] = PIPE_BUF;
775 vp = (struct vnode *)fp->f_data;
776 error = VOP_PATHCONF(vp, uap->name, p->p_retval);
787 * Allocate a file descriptor for the process.
790 SYSCTL_INT(_debug, OID_AUTO, fdexpand, CTLFLAG_RD, &fdexpand, 0, "");
793 fdalloc(p, want, result)
798 register struct filedesc *fdp = p->p_fd;
800 int lim, last, nfiles;
801 struct file **newofile;
805 * Search for a free descriptor starting at the higher
806 * of want or fd_freefile. If that fails, consider
807 * expanding the ofile array.
809 lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
811 last = min(fdp->fd_nfiles, lim);
812 if ((i = want) < fdp->fd_freefile)
813 i = fdp->fd_freefile;
814 for (; i < last; i++) {
815 if (fdp->fd_ofiles[i] == NULL) {
816 fdp->fd_ofileflags[i] = 0;
817 if (i > fdp->fd_lastfile)
818 fdp->fd_lastfile = i;
819 if (want <= fdp->fd_freefile)
820 fdp->fd_freefile = i;
827 * No space in current array. Expand?
829 if (fdp->fd_nfiles >= lim)
831 if (fdp->fd_nfiles < NDEXTENT)
834 nfiles = 2 * fdp->fd_nfiles;
835 MALLOC(newofile, struct file **, nfiles * OFILESIZE,
836 M_FILEDESC, M_WAITOK);
839 * deal with file-table extend race that might have occured
840 * when malloc was blocked.
842 if (fdp->fd_nfiles >= nfiles) {
843 FREE(newofile, M_FILEDESC);
846 newofileflags = (char *) &newofile[nfiles];
848 * Copy the existing ofile and ofileflags arrays
849 * and zero the new portion of each array.
851 bcopy(fdp->fd_ofiles, newofile,
852 (i = sizeof(struct file *) * fdp->fd_nfiles));
853 bzero((char *)newofile + i, nfiles * sizeof(struct file *) - i);
854 bcopy(fdp->fd_ofileflags, newofileflags,
855 (i = sizeof(char) * fdp->fd_nfiles));
856 bzero(newofileflags + i, nfiles * sizeof(char) - i);
857 if (fdp->fd_nfiles > NDFILE)
858 FREE(fdp->fd_ofiles, M_FILEDESC);
859 fdp->fd_ofiles = newofile;
860 fdp->fd_ofileflags = newofileflags;
861 fdp->fd_nfiles = nfiles;
868 * Check to see whether n user file descriptors
869 * are available to the process p.
876 register struct filedesc *fdp = p->p_fd;
877 register struct file **fpp;
878 register int i, lim, last;
880 lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
881 if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
884 last = min(fdp->fd_nfiles, lim);
885 fpp = &fdp->fd_ofiles[fdp->fd_freefile];
886 for (i = last - fdp->fd_freefile; --i >= 0; fpp++) {
887 if (*fpp == NULL && --n <= 0)
894 * Create a new open file structure and allocate
895 * a file decriptor for the process that refers to it.
898 falloc(p, resultfp, resultfd)
899 register struct proc *p;
900 struct file **resultfp;
903 register struct file *fp, *fq;
906 if (nfiles >= maxfiles) {
911 * Allocate a new file descriptor.
912 * If the process has file descriptor zero open, add to the list
913 * of open files at that point, otherwise put it at the front of
914 * the list of open files.
917 MALLOC(fp, struct file *, sizeof(struct file), M_FILE, M_WAITOK);
918 bzero(fp, sizeof(struct file));
921 * wait until after malloc (which may have blocked) returns before
922 * allocating the slot, else a race might have shrunk it if we had
923 * allocated it before the malloc.
925 if ((error = fdalloc(p, 0, &i))) {
931 fp->f_cred = p->p_ucred;
932 fp->f_ops = &badfileops;
935 if ((fq = p->p_fd->fd_ofiles[0])) {
936 LIST_INSERT_AFTER(fq, fp, f_list);
938 LIST_INSERT_HEAD(&filehead, fp, f_list);
940 p->p_fd->fd_ofiles[i] = fp;
949 * Free a file descriptor.
953 register struct file *fp;
955 KASSERT((fp->f_count == 0), ("ffree: fp_fcount not 0!"));
956 LIST_REMOVE(fp, f_list);
963 * Build a new filedesc structure.
969 register struct filedesc0 *newfdp;
970 register struct filedesc *fdp = p->p_fd;
972 MALLOC(newfdp, struct filedesc0 *, sizeof(struct filedesc0),
973 M_FILEDESC, M_WAITOK);
974 bzero(newfdp, sizeof(struct filedesc0));
975 newfdp->fd_fd.fd_cdir = fdp->fd_cdir;
976 if (newfdp->fd_fd.fd_cdir)
977 VREF(newfdp->fd_fd.fd_cdir);
978 newfdp->fd_fd.fd_rdir = fdp->fd_rdir;
979 if (newfdp->fd_fd.fd_rdir)
980 VREF(newfdp->fd_fd.fd_rdir);
981 newfdp->fd_fd.fd_jdir = fdp->fd_jdir;
982 if (newfdp->fd_fd.fd_jdir)
983 VREF(newfdp->fd_fd.fd_jdir);
985 /* Create the file descriptor table. */
986 newfdp->fd_fd.fd_refcnt = 1;
987 newfdp->fd_fd.fd_cmask = cmask;
988 newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
989 newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
990 newfdp->fd_fd.fd_nfiles = NDFILE;
991 newfdp->fd_fd.fd_knlistsize = -1;
993 return (&newfdp->fd_fd);
997 * Share a filedesc structure.
1003 p->p_fd->fd_refcnt++;
1008 * Copy a filedesc structure.
1014 register struct filedesc *newfdp, *fdp = p->p_fd;
1015 register struct file **fpp;
1018 /* Certain daemons might not have file descriptors. */
1022 MALLOC(newfdp, struct filedesc *, sizeof(struct filedesc0),
1023 M_FILEDESC, M_WAITOK);
1024 bcopy(fdp, newfdp, sizeof(struct filedesc));
1025 if (newfdp->fd_cdir)
1026 VREF(newfdp->fd_cdir);
1027 if (newfdp->fd_rdir)
1028 VREF(newfdp->fd_rdir);
1029 if (newfdp->fd_jdir)
1030 VREF(newfdp->fd_jdir);
1031 newfdp->fd_refcnt = 1;
1034 * If the number of open files fits in the internal arrays
1035 * of the open file structure, use them, otherwise allocate
1036 * additional memory for the number of descriptors currently
1039 if (newfdp->fd_lastfile < NDFILE) {
1040 newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
1041 newfdp->fd_ofileflags =
1042 ((struct filedesc0 *) newfdp)->fd_dfileflags;
1046 * Compute the smallest multiple of NDEXTENT needed
1047 * for the file descriptors currently in use,
1048 * allowing the table to shrink.
1050 i = newfdp->fd_nfiles;
1051 while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
1053 MALLOC(newfdp->fd_ofiles, struct file **, i * OFILESIZE,
1054 M_FILEDESC, M_WAITOK);
1055 newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
1057 newfdp->fd_nfiles = i;
1058 bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **));
1059 bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char));
1062 * kq descriptors cannot be copied.
1064 if (newfdp->fd_knlistsize != -1) {
1065 fpp = &newfdp->fd_ofiles[newfdp->fd_lastfile];
1066 for (i = newfdp->fd_lastfile; i >= 0; i--, fpp--) {
1067 if (*fpp != NULL && (*fpp)->f_type == DTYPE_KQUEUE) {
1069 if (i < newfdp->fd_freefile)
1070 newfdp->fd_freefile = i;
1072 if (*fpp == NULL && i == newfdp->fd_lastfile && i > 0)
1073 newfdp->fd_lastfile--;
1075 newfdp->fd_knlist = NULL;
1076 newfdp->fd_knlistsize = -1;
1077 newfdp->fd_knhash = NULL;
1078 newfdp->fd_knhashmask = 0;
1081 fpp = newfdp->fd_ofiles;
1082 for (i = newfdp->fd_lastfile; i-- >= 0; fpp++) {
1090 * Release a filedesc structure.
1096 register struct filedesc *fdp = p->p_fd;
1099 struct filedesc_to_leader *fdtol;
1104 /* Certain daemons might not have file descriptors. */
1108 /* Check for special need to clear POSIX style locks */
1110 if (fdtol != NULL) {
1111 KASSERT(fdtol->fdl_refcount > 0,
1112 ("filedesc_to_refcount botch: fdl_refcount=%d",
1113 fdtol->fdl_refcount));
1114 if (fdtol->fdl_refcount == 1 &&
1115 (p->p_leader->p_flag & P_ADVLOCK) != 0) {
1117 fpp = fdp->fd_ofiles;
1118 for (i = 0, fpp = fdp->fd_ofiles;
1119 i < fdp->fd_lastfile;
1122 (*fpp)->f_type != DTYPE_VNODE)
1126 lf.l_whence = SEEK_SET;
1129 lf.l_type = F_UNLCK;
1130 vp = (struct vnode *)fp->f_data;
1131 (void) VOP_ADVLOCK(vp,
1132 (caddr_t)p->p_leader,
1137 fpp = fdp->fd_ofiles + i;
1141 if (fdtol->fdl_refcount == 1) {
1142 if (fdp->fd_holdleaderscount > 0 &&
1143 (p->p_leader->p_flag & P_ADVLOCK) != 0) {
1145 * close() or do_dup() has cleared a reference
1146 * in a shared file descriptor table.
1148 fdp->fd_holdleaderswakeup = 1;
1149 tsleep(&fdp->fd_holdleaderscount,
1150 PLOCK, "fdlhold", 0);
1153 if (fdtol->fdl_holdcount > 0) {
1155 * Ensure that fdtol->fdl_leader
1156 * remains valid in closef().
1158 fdtol->fdl_wakeup = 1;
1159 tsleep(fdtol, PLOCK, "fdlhold", 0);
1163 fdtol->fdl_refcount--;
1164 if (fdtol->fdl_refcount == 0 &&
1165 fdtol->fdl_holdcount == 0) {
1166 fdtol->fdl_next->fdl_prev = fdtol->fdl_prev;
1167 fdtol->fdl_prev->fdl_next = fdtol->fdl_next;
1172 FREE(fdtol, M_FILEDESC_TO_LEADER);
1174 if (--fdp->fd_refcnt > 0)
1177 * we are the last reference to the structure, we can
1178 * safely assume it will not change out from under us.
1180 fpp = fdp->fd_ofiles;
1181 for (i = fdp->fd_lastfile; i-- >= 0; fpp++) {
1183 (void) closef(*fpp, p);
1185 if (fdp->fd_nfiles > NDFILE)
1186 FREE(fdp->fd_ofiles, M_FILEDESC);
1188 vrele(fdp->fd_cdir);
1190 vrele(fdp->fd_rdir);
1192 vrele(fdp->fd_jdir);
1194 FREE(fdp->fd_knlist, M_KQUEUE);
1196 FREE(fdp->fd_knhash, M_KQUEUE);
1197 FREE(fdp, M_FILEDESC);
1201 * For setugid programs, we don't want to people to use that setugidness
1202 * to generate error messages which write to a file which otherwise would
1203 * otherwise be off-limits to the process.
1205 * This is a gross hack to plug the hole. A better solution would involve
1206 * a special vop or other form of generalized access control mechanism. We
1207 * go ahead and just reject all procfs file systems accesses as dangerous.
1209 * Since setugidsafety calls this only for fd 0, 1 and 2, this check is
1210 * sufficient. We also don't for check setugidness since we know we are.
1213 is_unsafe(struct file *fp)
1215 if (fp->f_type == DTYPE_VNODE &&
1216 ((struct vnode *)(fp->f_data))->v_tag == VT_PROCFS)
1222 * Make this setguid thing safe, if at all possible.
1228 struct filedesc *fdp = p->p_fd;
1231 /* Certain daemons might not have file descriptors. */
1236 * note: fdp->fd_ofiles may be reallocated out from under us while
1237 * we are blocked in a close. Be careful!
1239 for (i = 0; i <= fdp->fd_lastfile; i++) {
1242 if (fdp->fd_ofiles[i] && is_unsafe(fdp->fd_ofiles[i])) {
1246 if ((fdp->fd_ofileflags[i] & UF_MAPPED) != 0)
1247 (void) munmapfd(p, i);
1249 if (i < fdp->fd_knlistsize)
1250 knote_fdclose(p, i);
1252 * NULL-out descriptor prior to close to avoid
1253 * a race while close blocks.
1255 fp = fdp->fd_ofiles[i];
1256 fdp->fd_ofiles[i] = NULL;
1257 fdp->fd_ofileflags[i] = 0;
1258 if (i < fdp->fd_freefile)
1259 fdp->fd_freefile = i;
1260 (void) closef(fp, p);
1263 while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1268 * Close any files on exec?
1274 struct filedesc *fdp = p->p_fd;
1277 /* Certain daemons might not have file descriptors. */
1282 * We cannot cache fd_ofiles or fd_ofileflags since operations
1283 * may block and rip them out from under us.
1285 for (i = 0; i <= fdp->fd_lastfile; i++) {
1286 if (fdp->fd_ofiles[i] != NULL &&
1287 (fdp->fd_ofileflags[i] & UF_EXCLOSE)) {
1291 if (fdp->fd_ofileflags[i] & UF_MAPPED)
1292 (void) munmapfd(p, i);
1294 if (i < fdp->fd_knlistsize)
1295 knote_fdclose(p, i);
1297 * NULL-out descriptor prior to close to avoid
1298 * a race while close blocks.
1300 fp = fdp->fd_ofiles[i];
1301 fdp->fd_ofiles[i] = NULL;
1302 fdp->fd_ofileflags[i] = 0;
1303 if (i < fdp->fd_freefile)
1304 fdp->fd_freefile = i;
1305 (void) closef(fp, p);
1308 while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1313 * It is unsafe for set[ug]id processes to be started with file
1314 * descriptors 0..2 closed, as these descriptors are given implicit
1315 * significance in the Standard C library. fdcheckstd() will create a
1316 * descriptor referencing /dev/null for each of stdin, stdout, and
1317 * stderr that is not already open.
1323 struct nameidata nd;
1324 struct filedesc *fdp;
1327 int fd, i, error, flags, devnull;
1334 for (i = 0; i < 3; i++) {
1335 if (fdp->fd_ofiles[i] != NULL)
1338 error = falloc(p, &fp, &fd);
1341 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/dev/null",
1343 flags = FREAD | FWRITE;
1344 error = vn_open(&nd, flags, 0);
1346 fdp->fd_ofiles[i] = NULL;
1350 NDFREE(&nd, NDF_ONLY_PNBUF);
1351 fp->f_data = (caddr_t)nd.ni_vp;
1354 fp->f_type = DTYPE_VNODE;
1355 VOP_UNLOCK(nd.ni_vp, 0, p);
1358 error = fdalloc(p, 0, &fd);
1361 error = do_dup(fdp, devnull, fd, &retval, p);
1370 * Internal form of close.
1371 * Decrement reference count on file structure.
1372 * Note: p may be NULL when closing a file
1373 * that was being passed in a message.
1377 register struct file *fp;
1378 register struct proc *p;
1382 struct filedesc_to_leader *fdtol;
1387 * POSIX record locking dictates that any close releases ALL
1388 * locks owned by this process. This is handled by setting
1389 * a flag in the unlock to free ONLY locks obeying POSIX
1390 * semantics, and not to free BSD-style file locks.
1391 * If the descriptor was in a message, POSIX-style locks
1392 * aren't passed with the descriptor.
1395 fp->f_type == DTYPE_VNODE) {
1396 if ((p->p_leader->p_flag & P_ADVLOCK) != 0) {
1397 lf.l_whence = SEEK_SET;
1400 lf.l_type = F_UNLCK;
1401 vp = (struct vnode *)fp->f_data;
1402 (void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK,
1406 if (fdtol != NULL) {
1408 * Handle special case where file descriptor table
1409 * is shared between multiple process leaders.
1411 for (fdtol = fdtol->fdl_next;
1412 fdtol != p->p_fdtol;
1413 fdtol = fdtol->fdl_next) {
1414 if ((fdtol->fdl_leader->p_flag &
1417 fdtol->fdl_holdcount++;
1418 lf.l_whence = SEEK_SET;
1421 lf.l_type = F_UNLCK;
1422 vp = (struct vnode *)fp->f_data;
1423 (void) VOP_ADVLOCK(vp,
1424 (caddr_t)p->p_leader,
1425 F_UNLCK, &lf, F_POSIX);
1426 fdtol->fdl_holdcount--;
1427 if (fdtol->fdl_holdcount == 0 &&
1428 fdtol->fdl_wakeup != 0) {
1429 fdtol->fdl_wakeup = 0;
1435 return (fdrop(fp, p));
1447 if (--fp->f_count > 0)
1449 if (fp->f_count < 0)
1450 panic("fdrop: count < 0");
1451 if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
1452 lf.l_whence = SEEK_SET;
1455 lf.l_type = F_UNLCK;
1456 vp = (struct vnode *)fp->f_data;
1457 (void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1459 if (fp->f_ops != &badfileops)
1460 error = fo_close(fp, p);
1468 * Apply an advisory lock on a file descriptor.
1470 * Just attempt to get a record lock of the requested type on
1471 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
1473 #ifndef _SYS_SYSPROTO_H_
1483 register struct flock_args *uap;
1485 register struct filedesc *fdp = p->p_fd;
1486 register struct file *fp;
1490 if ((unsigned)uap->fd >= fdp->fd_nfiles ||
1491 (fp = fdp->fd_ofiles[uap->fd]) == NULL)
1493 if (fp->f_type != DTYPE_VNODE)
1494 return (EOPNOTSUPP);
1495 vp = (struct vnode *)fp->f_data;
1496 lf.l_whence = SEEK_SET;
1499 if (uap->how & LOCK_UN) {
1500 lf.l_type = F_UNLCK;
1501 fp->f_flag &= ~FHASLOCK;
1502 return (VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK));
1504 if (uap->how & LOCK_EX)
1505 lf.l_type = F_WRLCK;
1506 else if (uap->how & LOCK_SH)
1507 lf.l_type = F_RDLCK;
1510 fp->f_flag |= FHASLOCK;
1511 if (uap->how & LOCK_NB)
1512 return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK));
1513 return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK|F_WAIT));
1517 * File Descriptor pseudo-device driver (/dev/fd/).
1519 * Opening minor device N dup()s the file (if any) connected to file
1520 * descriptor N belonging to the calling process. Note that this driver
1521 * consists of only the ``open()'' routine, because all subsequent
1522 * references to this file will be direct to the other driver.
1526 fdopen(dev, mode, type, p)
1533 * XXX Kludge: set curproc->p_dupfd to contain the value of the
1534 * the file descriptor being sought for duplication. The error
1535 * return ensures that the vnode for this device will be released
1536 * by vn_open. Open will detect this special error and take the
1537 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
1538 * will simply report the error.
1540 p->p_dupfd = minor(dev);
1545 * Duplicate the specified descriptor to a free descriptor.
1548 dupfdopen(p, fdp, indx, dfd, mode, error)
1550 struct filedesc *fdp;
1555 register struct file *wfp;
1559 * If the to-be-dup'd fd number is greater than the allowed number
1560 * of file descriptors, or the fd to be dup'd has already been
1561 * closed, then reject.
1563 if ((u_int)dfd >= fdp->fd_nfiles ||
1564 (wfp = fdp->fd_ofiles[dfd]) == NULL) {
1569 * There are two cases of interest here.
1571 * For ENODEV simply dup (dfd) to file descriptor
1572 * (indx) and return.
1574 * For ENXIO steal away the file structure from (dfd) and
1575 * store it in (indx). (dfd) is effectively closed by
1578 * Any other error code is just returned.
1583 * Check that the mode the file is being opened for is a
1584 * subset of the mode of the existing descriptor.
1586 if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag)
1588 fp = fdp->fd_ofiles[indx];
1590 if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED)
1591 (void) munmapfd(p, indx);
1593 fdp->fd_ofiles[indx] = wfp;
1594 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1596 if (indx > fdp->fd_lastfile)
1597 fdp->fd_lastfile = indx;
1599 * we now own the reference to fp that the ofiles[] array
1600 * used to own. Release it.
1608 * Steal away the file pointer from dfd, and stuff it into indx.
1610 fp = fdp->fd_ofiles[indx];
1612 if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED)
1613 (void) munmapfd(p, indx);
1615 fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
1616 fdp->fd_ofiles[dfd] = NULL;
1617 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1618 fdp->fd_ofileflags[dfd] = 0;
1621 * we now own the reference to fp that the ofiles[] array
1622 * used to own. Release it.
1627 * Complete the clean up of the filedesc structure by
1628 * recomputing the various hints.
1630 if (indx > fdp->fd_lastfile) {
1631 fdp->fd_lastfile = indx;
1633 while (fdp->fd_lastfile > 0 &&
1634 fdp->fd_ofiles[fdp->fd_lastfile] == NULL) {
1637 if (dfd < fdp->fd_freefile)
1638 fdp->fd_freefile = dfd;
1649 struct filedesc_to_leader *
1650 filedesc_to_leader_alloc(struct filedesc_to_leader *old,
1651 struct proc *leader)
1653 struct filedesc_to_leader *fdtol;
1655 MALLOC(fdtol, struct filedesc_to_leader *,
1656 sizeof(struct filedesc_to_leader),
1657 M_FILEDESC_TO_LEADER,
1659 fdtol->fdl_refcount = 1;
1660 fdtol->fdl_holdcount = 0;
1661 fdtol->fdl_wakeup = 0;
1662 fdtol->fdl_leader = leader;
1664 fdtol->fdl_next = old->fdl_next;
1665 fdtol->fdl_prev = old;
1666 old->fdl_next = fdtol;
1667 fdtol->fdl_next->fdl_prev = fdtol;
1669 fdtol->fdl_next = fdtol;
1670 fdtol->fdl_prev = fdtol;
1676 * Get file structures.
1679 sysctl_kern_file(SYSCTL_HANDLER_ARGS)
1686 * overestimate by 10 files
1688 return (SYSCTL_OUT(req, 0, sizeof(filehead) +
1689 (nfiles + 10) * sizeof(struct file)));
1692 error = SYSCTL_OUT(req, (caddr_t)&filehead, sizeof(filehead));
1697 * followed by an array of file structures
1699 LIST_FOREACH(fp, &filehead, f_list) {
1700 error = SYSCTL_OUT(req, (caddr_t)fp, sizeof (struct file));
1707 SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD,
1708 0, 0, sysctl_kern_file, "S,file", "Entire file table");
1710 SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW,
1711 &maxfilesperproc, 0, "Maximum files allowed open per process");
1713 SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW,
1714 &maxfiles, 0, "Maximum number of files");
1716 SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD,
1717 &nfiles, 0, "System-wide number of open files");
1720 fildesc_drvinit(void *unused)
1724 for (fd = 0; fd < NUMFDESC; fd++)
1725 make_dev(&fildesc_cdevsw, fd,
1726 UID_BIN, GID_BIN, 0666, "fd/%d", fd);
1727 make_dev(&fildesc_cdevsw, 0, UID_ROOT, GID_WHEEL, 0666, "stdin");
1728 make_dev(&fildesc_cdevsw, 1, UID_ROOT, GID_WHEEL, 0666, "stdout");
1729 make_dev(&fildesc_cdevsw, 2, UID_ROOT, GID_WHEEL, 0666, "stderr");
1732 struct fileops badfileops = {
1743 badfo_readwrite(fp, uio, cred, flags, p)
1755 badfo_ioctl(fp, com, data, p)
1766 badfo_poll(fp, events, cred, p)
1777 badfo_kqfilter(fp, kn)
1786 badfo_stat(fp, sb, p)
1804 SYSINIT(fildescdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,
1805 fildesc_drvinit,NULL)