Register keyword removal
[dragonfly.git] / sys / kern / sys_generic.c
CommitLineData
984263bc
MD
1/*
2 * Copyright (c) 1982, 1986, 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94
39 * $FreeBSD: src/sys/kern/sys_generic.c,v 1.55.2.10 2001/03/17 10:39:32 peter Exp $
1fd87d54 40 * $DragonFly: src/sys/kern/sys_generic.c,v 1.9 2003/07/26 19:42:11 rob Exp $
984263bc
MD
41 */
42
43#include "opt_ktrace.h"
44
45#include <sys/param.h>
46#include <sys/systm.h>
47#include <sys/sysproto.h>
48#include <sys/filedesc.h>
49#include <sys/filio.h>
50#include <sys/fcntl.h>
51#include <sys/file.h>
52#include <sys/proc.h>
53#include <sys/signalvar.h>
54#include <sys/socketvar.h>
55#include <sys/uio.h>
56#include <sys/kernel.h>
57#include <sys/malloc.h>
58#include <sys/poll.h>
59#include <sys/resourcevar.h>
60#include <sys/sysctl.h>
61#include <sys/sysent.h>
62#include <sys/buf.h>
63#ifdef KTRACE
64#include <sys/ktrace.h>
65#endif
66#include <vm/vm.h>
67#include <vm/vm_page.h>
dadab5e9 68#include <sys/file2.h>
984263bc
MD
69
70#include <machine/limits.h>
71
72static MALLOC_DEFINE(M_IOCTLOPS, "ioctlops", "ioctl data buffer");
73static MALLOC_DEFINE(M_SELECT, "select", "select() buffer");
74MALLOC_DEFINE(M_IOV, "iov", "large iov's");
75
90b9818c
MD
76static int pollscan __P((struct proc *, struct pollfd *, u_int, int *));
77static int selscan __P((struct proc *, fd_mask **, fd_mask **,
78 int, int *));
41c20dac 79static int dofileread __P((struct file *, int, void *,
90b9818c 80 size_t, off_t, int, int *));
41c20dac 81static int dofilewrite __P((struct file *, int,
90b9818c 82 const void *, size_t, off_t, int, int *));
984263bc
MD
83
84struct file*
85holdfp(fdp, fd, flag)
86 struct filedesc* fdp;
87 int fd, flag;
88{
89 struct file* fp;
90
91 if (((u_int)fd) >= fdp->fd_nfiles ||
92 (fp = fdp->fd_ofiles[fd]) == NULL ||
93 (fp->f_flag & flag) == 0) {
94 return (NULL);
95 }
96 fhold(fp);
97 return (fp);
98}
99
100/*
101 * Read system call.
102 */
984263bc 103int
41c20dac 104read(struct read_args *uap)
984263bc 105{
dadab5e9
MD
106 struct thread *td = curthread;
107 struct proc *p = td->td_proc;
41c20dac 108 struct file *fp;
984263bc
MD
109 int error;
110
dadab5e9 111 KKASSERT(p);
984263bc
MD
112 if ((fp = holdfp(p->p_fd, uap->fd, FREAD)) == NULL)
113 return (EBADF);
90b9818c
MD
114 error = dofileread(fp, uap->fd, uap->buf, uap->nbyte, (off_t)-1, 0,
115 &uap->lmsg.u.ms_result);
dadab5e9 116 fdrop(fp, td);
984263bc
MD
117 return(error);
118}
119
120/*
121 * Pread system call
122 */
984263bc 123int
41c20dac 124pread(struct pread_args *uap)
984263bc 125{
dadab5e9
MD
126 struct thread *td = curthread;
127 struct proc *p = td->td_proc;
41c20dac 128 struct file *fp;
984263bc
MD
129 int error;
130
dadab5e9 131 KKASSERT(p);
984263bc
MD
132 if ((fp = holdfp(p->p_fd, uap->fd, FREAD)) == NULL)
133 return (EBADF);
134 if (fp->f_type != DTYPE_VNODE) {
135 error = ESPIPE;
136 } else {
41c20dac 137 error = dofileread(fp, uap->fd, uap->buf, uap->nbyte,
90b9818c 138 uap->offset, FOF_OFFSET, &uap->lmsg.u.ms_result);
984263bc 139 }
dadab5e9 140 fdrop(fp, td);
984263bc
MD
141 return(error);
142}
143
144/*
145 * Code common for read and pread
146 */
147int
90b9818c 148dofileread(fp, fd, buf, nbyte, offset, flags, res)
984263bc
MD
149 struct file *fp;
150 int fd, flags;
151 void *buf;
152 size_t nbyte;
153 off_t offset;
90b9818c 154 int *res;
984263bc 155{
dadab5e9
MD
156 struct thread *td = curthread;
157 struct proc *p = td->td_proc;
984263bc
MD
158 struct uio auio;
159 struct iovec aiov;
160 long cnt, error = 0;
161#ifdef KTRACE
162 struct iovec ktriov;
163 struct uio ktruio;
164 int didktr = 0;
165#endif
166
167 aiov.iov_base = (caddr_t)buf;
168 aiov.iov_len = nbyte;
169 auio.uio_iov = &aiov;
170 auio.uio_iovcnt = 1;
171 auio.uio_offset = offset;
172 if (nbyte > INT_MAX)
173 return (EINVAL);
174 auio.uio_resid = nbyte;
175 auio.uio_rw = UIO_READ;
176 auio.uio_segflg = UIO_USERSPACE;
dadab5e9 177 auio.uio_td = td;
984263bc
MD
178#ifdef KTRACE
179 /*
180 * if tracing, save a copy of iovec
181 */
dadab5e9 182 if (KTRPOINT(td, KTR_GENIO)) {
984263bc
MD
183 ktriov = aiov;
184 ktruio = auio;
185 didktr = 1;
186 }
187#endif
188 cnt = nbyte;
189
dadab5e9 190 if ((error = fo_read(fp, &auio, fp->f_cred, flags, td))) {
984263bc
MD
191 if (auio.uio_resid != cnt && (error == ERESTART ||
192 error == EINTR || error == EWOULDBLOCK))
193 error = 0;
194 }
195 cnt -= auio.uio_resid;
196#ifdef KTRACE
197 if (didktr && error == 0) {
198 ktruio.uio_iov = &ktriov;
199 ktruio.uio_resid = cnt;
200 ktrgenio(p->p_tracep, fd, UIO_READ, &ktruio, error);
201 }
202#endif
90b9818c 203 *res = cnt;
984263bc
MD
204 return (error);
205}
206
207/*
208 * Scatter read system call.
209 */
984263bc 210int
41c20dac 211readv(struct readv_args *uap)
984263bc 212{
dadab5e9
MD
213 struct thread *td = curthread;
214 struct proc *p = td->td_proc;
41c20dac
MD
215 struct file *fp;
216 struct filedesc *fdp = p->p_fd;
984263bc 217 struct uio auio;
41c20dac 218 struct iovec *iov;
984263bc
MD
219 struct iovec *needfree;
220 struct iovec aiov[UIO_SMALLIOV];
221 long i, cnt, error = 0;
222 u_int iovlen;
223#ifdef KTRACE
224 struct iovec *ktriov = NULL;
225 struct uio ktruio;
226#endif
227
228 if ((fp = holdfp(fdp, uap->fd, FREAD)) == NULL)
229 return (EBADF);
230 /* note: can't use iovlen until iovcnt is validated */
231 iovlen = uap->iovcnt * sizeof (struct iovec);
232 if (uap->iovcnt > UIO_SMALLIOV) {
233 if (uap->iovcnt > UIO_MAXIOV)
234 return (EINVAL);
235 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
236 needfree = iov;
237 } else {
238 iov = aiov;
239 needfree = NULL;
240 }
241 auio.uio_iov = iov;
242 auio.uio_iovcnt = uap->iovcnt;
243 auio.uio_rw = UIO_READ;
244 auio.uio_segflg = UIO_USERSPACE;
dadab5e9 245 auio.uio_td = td;
984263bc
MD
246 auio.uio_offset = -1;
247 if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen)))
248 goto done;
249 auio.uio_resid = 0;
250 for (i = 0; i < uap->iovcnt; i++) {
251 if (iov->iov_len > INT_MAX - auio.uio_resid) {
252 error = EINVAL;
253 goto done;
254 }
255 auio.uio_resid += iov->iov_len;
256 iov++;
257 }
258#ifdef KTRACE
259 /*
260 * if tracing, save a copy of iovec
261 */
dadab5e9 262 if (KTRPOINT(td, KTR_GENIO)) {
984263bc
MD
263 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
264 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
265 ktruio = auio;
266 }
267#endif
268 cnt = auio.uio_resid;
dadab5e9 269 if ((error = fo_read(fp, &auio, fp->f_cred, 0, td))) {
984263bc
MD
270 if (auio.uio_resid != cnt && (error == ERESTART ||
271 error == EINTR || error == EWOULDBLOCK))
272 error = 0;
273 }
274 cnt -= auio.uio_resid;
275#ifdef KTRACE
276 if (ktriov != NULL) {
277 if (error == 0) {
278 ktruio.uio_iov = ktriov;
279 ktruio.uio_resid = cnt;
280 ktrgenio(p->p_tracep, uap->fd, UIO_READ, &ktruio,
281 error);
282 }
283 FREE(ktriov, M_TEMP);
284 }
285#endif
90b9818c 286 uap->lmsg.u.ms_result = cnt;
984263bc 287done:
dadab5e9 288 fdrop(fp, td);
984263bc
MD
289 if (needfree)
290 FREE(needfree, M_IOV);
291 return (error);
292}
293
294/*
295 * Write system call
296 */
984263bc 297int
41c20dac 298write(struct write_args *uap)
984263bc 299{
dadab5e9
MD
300 struct thread *td = curthread;
301 struct proc *p = td->td_proc;
41c20dac 302 struct file *fp;
984263bc
MD
303 int error;
304
dadab5e9
MD
305 KKASSERT(p);
306
984263bc
MD
307 if ((fp = holdfp(p->p_fd, uap->fd, FWRITE)) == NULL)
308 return (EBADF);
90b9818c
MD
309 error = dofilewrite(fp, uap->fd, uap->buf, uap->nbyte, (off_t)-1, 0,
310 &uap->lmsg.u.ms_result);
dadab5e9 311 fdrop(fp, td);
984263bc
MD
312 return(error);
313}
314
315/*
316 * Pwrite system call
317 */
984263bc 318int
41c20dac 319pwrite(struct pwrite_args *uap)
984263bc 320{
dadab5e9
MD
321 struct thread *td = curthread;
322 struct proc *p = td->td_proc;
41c20dac 323 struct file *fp;
984263bc
MD
324 int error;
325
dadab5e9 326 KKASSERT(p);
984263bc
MD
327 if ((fp = holdfp(p->p_fd, uap->fd, FWRITE)) == NULL)
328 return (EBADF);
329 if (fp->f_type != DTYPE_VNODE) {
330 error = ESPIPE;
331 } else {
41c20dac 332 error = dofilewrite(fp, uap->fd, uap->buf, uap->nbyte,
90b9818c 333 uap->offset, FOF_OFFSET, &uap->lmsg.u.ms_result);
984263bc 334 }
dadab5e9 335 fdrop(fp, td);
984263bc
MD
336 return(error);
337}
338
339static int
41c20dac
MD
340dofilewrite(
341 struct file *fp,
342 int fd,
343 const void *buf,
344 size_t nbyte,
345 off_t offset,
90b9818c
MD
346 int flags,
347 int *res
41c20dac 348) {
dadab5e9
MD
349 struct thread *td = curthread;
350 struct proc *p = td->td_proc;
984263bc
MD
351 struct uio auio;
352 struct iovec aiov;
353 long cnt, error = 0;
354#ifdef KTRACE
355 struct iovec ktriov;
356 struct uio ktruio;
357 int didktr = 0;
358#endif
359
360 aiov.iov_base = (void *)(uintptr_t)buf;
361 aiov.iov_len = nbyte;
362 auio.uio_iov = &aiov;
363 auio.uio_iovcnt = 1;
364 auio.uio_offset = offset;
365 if (nbyte > INT_MAX)
366 return (EINVAL);
367 auio.uio_resid = nbyte;
368 auio.uio_rw = UIO_WRITE;
369 auio.uio_segflg = UIO_USERSPACE;
dadab5e9 370 auio.uio_td = td;
984263bc
MD
371#ifdef KTRACE
372 /*
373 * if tracing, save a copy of iovec and uio
374 */
dadab5e9 375 if (KTRPOINT(td, KTR_GENIO)) {
984263bc
MD
376 ktriov = aiov;
377 ktruio = auio;
378 didktr = 1;
379 }
380#endif
381 cnt = nbyte;
382 if (fp->f_type == DTYPE_VNODE)
383 bwillwrite();
dadab5e9 384 if ((error = fo_write(fp, &auio, fp->f_cred, flags, td))) {
984263bc
MD
385 if (auio.uio_resid != cnt && (error == ERESTART ||
386 error == EINTR || error == EWOULDBLOCK))
387 error = 0;
388 if (error == EPIPE)
389 psignal(p, SIGPIPE);
390 }
391 cnt -= auio.uio_resid;
392#ifdef KTRACE
393 if (didktr && error == 0) {
394 ktruio.uio_iov = &ktriov;
395 ktruio.uio_resid = cnt;
396 ktrgenio(p->p_tracep, fd, UIO_WRITE, &ktruio, error);
397 }
398#endif
90b9818c 399 *res = cnt;
984263bc
MD
400 return (error);
401}
402
403/*
404 * Gather write system call
405 */
984263bc 406int
41c20dac 407writev(struct writev_args *uap)
984263bc 408{
dadab5e9
MD
409 struct thread *td = curthread;
410 struct proc *p = td->td_proc;
41c20dac 411 struct file *fp;
dadab5e9 412 struct filedesc *fdp;
984263bc 413 struct uio auio;
41c20dac 414 struct iovec *iov;
984263bc
MD
415 struct iovec *needfree;
416 struct iovec aiov[UIO_SMALLIOV];
417 long i, cnt, error = 0;
418 u_int iovlen;
419#ifdef KTRACE
420 struct iovec *ktriov = NULL;
421 struct uio ktruio;
422#endif
423
dadab5e9
MD
424 KKASSERT(p);
425 fdp = p->p_fd;
426
984263bc
MD
427 if ((fp = holdfp(fdp, uap->fd, FWRITE)) == NULL)
428 return (EBADF);
429 /* note: can't use iovlen until iovcnt is validated */
430 iovlen = uap->iovcnt * sizeof (struct iovec);
431 if (uap->iovcnt > UIO_SMALLIOV) {
432 if (uap->iovcnt > UIO_MAXIOV) {
433 needfree = NULL;
434 error = EINVAL;
435 goto done;
436 }
437 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
438 needfree = iov;
439 } else {
440 iov = aiov;
441 needfree = NULL;
442 }
443 auio.uio_iov = iov;
444 auio.uio_iovcnt = uap->iovcnt;
445 auio.uio_rw = UIO_WRITE;
446 auio.uio_segflg = UIO_USERSPACE;
dadab5e9 447 auio.uio_td = td;
984263bc
MD
448 auio.uio_offset = -1;
449 if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen)))
450 goto done;
451 auio.uio_resid = 0;
452 for (i = 0; i < uap->iovcnt; i++) {
453 if (iov->iov_len > INT_MAX - auio.uio_resid) {
454 error = EINVAL;
455 goto done;
456 }
457 auio.uio_resid += iov->iov_len;
458 iov++;
459 }
460#ifdef KTRACE
461 /*
462 * if tracing, save a copy of iovec and uio
463 */
dadab5e9 464 if (KTRPOINT(td, KTR_GENIO)) {
984263bc
MD
465 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
466 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
467 ktruio = auio;
468 }
469#endif
470 cnt = auio.uio_resid;
471 if (fp->f_type == DTYPE_VNODE)
472 bwillwrite();
dadab5e9 473 if ((error = fo_write(fp, &auio, fp->f_cred, 0, td))) {
984263bc
MD
474 if (auio.uio_resid != cnt && (error == ERESTART ||
475 error == EINTR || error == EWOULDBLOCK))
476 error = 0;
477 if (error == EPIPE)
478 psignal(p, SIGPIPE);
479 }
480 cnt -= auio.uio_resid;
481#ifdef KTRACE
482 if (ktriov != NULL) {
483 if (error == 0) {
484 ktruio.uio_iov = ktriov;
485 ktruio.uio_resid = cnt;
486 ktrgenio(p->p_tracep, uap->fd, UIO_WRITE, &ktruio,
487 error);
488 }
489 FREE(ktriov, M_TEMP);
490 }
491#endif
90b9818c 492 uap->lmsg.u.ms_result = cnt;
984263bc 493done:
dadab5e9 494 fdrop(fp, td);
984263bc
MD
495 if (needfree)
496 FREE(needfree, M_IOV);
497 return (error);
498}
499
500/*
501 * Ioctl system call
502 */
984263bc
MD
503/* ARGSUSED */
504int
41c20dac 505ioctl(struct ioctl_args *uap)
984263bc 506{
dadab5e9
MD
507 struct thread *td = curthread;
508 struct proc *p = td->td_proc;
41c20dac
MD
509 struct file *fp;
510 struct filedesc *fdp;
511 u_long com;
984263bc 512 int error;
1fd87d54 513 u_int size;
984263bc
MD
514 caddr_t data, memp;
515 int tmp;
516#define STK_PARAMS 128
517 union {
518 char stkbuf[STK_PARAMS];
519 long align;
520 } ubuf;
521
dadab5e9 522 KKASSERT(p);
984263bc
MD
523 fdp = p->p_fd;
524 if ((u_int)uap->fd >= fdp->fd_nfiles ||
525 (fp = fdp->fd_ofiles[uap->fd]) == NULL)
526 return (EBADF);
527
528 if ((fp->f_flag & (FREAD | FWRITE)) == 0)
529 return (EBADF);
530
531 switch (com = uap->com) {
532 case FIONCLEX:
533 fdp->fd_ofileflags[uap->fd] &= ~UF_EXCLOSE;
534 return (0);
535 case FIOCLEX:
536 fdp->fd_ofileflags[uap->fd] |= UF_EXCLOSE;
537 return (0);
538 }
539
540 /*
541 * Interpret high order word to find amount of data to be
542 * copied to/from the user's address space.
543 */
544 size = IOCPARM_LEN(com);
545 if (size > IOCPARM_MAX)
546 return (ENOTTY);
547
548 fhold(fp);
549
550 memp = NULL;
551 if (size > sizeof (ubuf.stkbuf)) {
552 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
553 data = memp;
554 } else {
555 data = ubuf.stkbuf;
556 }
557 if (com&IOC_IN) {
558 if (size) {
559 error = copyin(uap->data, data, (u_int)size);
560 if (error) {
561 if (memp)
562 free(memp, M_IOCTLOPS);
dadab5e9 563 fdrop(fp, td);
984263bc
MD
564 return (error);
565 }
566 } else {
567 *(caddr_t *)data = uap->data;
568 }
569 } else if ((com&IOC_OUT) && size) {
570 /*
571 * Zero the buffer so the user always
572 * gets back something deterministic.
573 */
574 bzero(data, size);
575 } else if (com&IOC_VOID) {
576 *(caddr_t *)data = uap->data;
577 }
578
579 switch (com) {
580
581 case FIONBIO:
582 if ((tmp = *(int *)data))
583 fp->f_flag |= FNONBLOCK;
584 else
585 fp->f_flag &= ~FNONBLOCK;
dadab5e9 586 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, td);
984263bc
MD
587 break;
588
589 case FIOASYNC:
590 if ((tmp = *(int *)data))
591 fp->f_flag |= FASYNC;
592 else
593 fp->f_flag &= ~FASYNC;
dadab5e9 594 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, td);
984263bc
MD
595 break;
596
597 default:
dadab5e9 598 error = fo_ioctl(fp, com, data, td);
984263bc
MD
599 /*
600 * Copy any data to user, size was
601 * already set and checked above.
602 */
603 if (error == 0 && (com&IOC_OUT) && size)
604 error = copyout(data, uap->data, (u_int)size);
605 break;
606 }
607 if (memp)
608 free(memp, M_IOCTLOPS);
dadab5e9 609 fdrop(fp, td);
984263bc
MD
610 return (error);
611}
612
613static int nselcoll; /* Select collisions since boot */
614int selwait;
615SYSCTL_INT(_kern, OID_AUTO, nselcoll, CTLFLAG_RD, &nselcoll, 0, "");
616
617/*
618 * Select system call.
619 */
984263bc 620int
41c20dac 621select(struct select_args *uap)
984263bc 622{
41c20dac
MD
623 struct proc *p = curproc;
624
984263bc
MD
625 /*
626 * The magic 2048 here is chosen to be just enough for FD_SETSIZE
627 * infds with the new FD_SETSIZE of 1024, and more than enough for
628 * FD_SETSIZE infds, outfds and exceptfds with the old FD_SETSIZE
629 * of 256.
630 */
631 fd_mask s_selbits[howmany(2048, NFDBITS)];
632 fd_mask *ibits[3], *obits[3], *selbits, *sbp;
633 struct timeval atv, rtv, ttv;
634 int s, ncoll, error, timo;
635 u_int nbufbytes, ncpbytes, nfdbits;
636
637 if (uap->nd < 0)
638 return (EINVAL);
639 if (uap->nd > p->p_fd->fd_nfiles)
640 uap->nd = p->p_fd->fd_nfiles; /* forgiving; slightly wrong */
641
642 /*
643 * Allocate just enough bits for the non-null fd_sets. Use the
644 * preallocated auto buffer if possible.
645 */
646 nfdbits = roundup(uap->nd, NFDBITS);
647 ncpbytes = nfdbits / NBBY;
648 nbufbytes = 0;
649 if (uap->in != NULL)
650 nbufbytes += 2 * ncpbytes;
651 if (uap->ou != NULL)
652 nbufbytes += 2 * ncpbytes;
653 if (uap->ex != NULL)
654 nbufbytes += 2 * ncpbytes;
655 if (nbufbytes <= sizeof s_selbits)
656 selbits = &s_selbits[0];
657 else
658 selbits = malloc(nbufbytes, M_SELECT, M_WAITOK);
659
660 /*
661 * Assign pointers into the bit buffers and fetch the input bits.
662 * Put the output buffers together so that they can be bzeroed
663 * together.
664 */
665 sbp = selbits;
666#define getbits(name, x) \
667 do { \
668 if (uap->name == NULL) \
669 ibits[x] = NULL; \
670 else { \
671 ibits[x] = sbp + nbufbytes / 2 / sizeof *sbp; \
672 obits[x] = sbp; \
673 sbp += ncpbytes / sizeof *sbp; \
674 error = copyin(uap->name, ibits[x], ncpbytes); \
675 if (error != 0) \
676 goto done; \
677 } \
678 } while (0)
679 getbits(in, 0);
680 getbits(ou, 1);
681 getbits(ex, 2);
682#undef getbits
683 if (nbufbytes != 0)
684 bzero(selbits, nbufbytes / 2);
685
686 if (uap->tv) {
687 error = copyin((caddr_t)uap->tv, (caddr_t)&atv,
688 sizeof (atv));
689 if (error)
690 goto done;
691 if (itimerfix(&atv)) {
692 error = EINVAL;
693 goto done;
694 }
695 getmicrouptime(&rtv);
696 timevaladd(&atv, &rtv);
697 } else {
698 atv.tv_sec = 0;
699 atv.tv_usec = 0;
700 }
701 timo = 0;
702retry:
703 ncoll = nselcoll;
704 p->p_flag |= P_SELECT;
90b9818c
MD
705 error = selscan(p, ibits, obits, uap->nd, &uap->lmsg.u.ms_result);
706 if (error || uap->lmsg.u.ms_result)
984263bc
MD
707 goto done;
708 if (atv.tv_sec || atv.tv_usec) {
709 getmicrouptime(&rtv);
710 if (timevalcmp(&rtv, &atv, >=))
711 goto done;
712 ttv = atv;
713 timevalsub(&ttv, &rtv);
714 timo = ttv.tv_sec > 24 * 60 * 60 ?
715 24 * 60 * 60 * hz : tvtohz(&ttv);
716 }
717 s = splhigh();
718 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
719 splx(s);
720 goto retry;
721 }
722 p->p_flag &= ~P_SELECT;
723
377d4740 724 error = tsleep((caddr_t)&selwait, PCATCH, "select", timo);
984263bc
MD
725
726 splx(s);
727 if (error == 0)
728 goto retry;
729done:
730 p->p_flag &= ~P_SELECT;
731 /* select is not restarted after signals... */
732 if (error == ERESTART)
733 error = EINTR;
734 if (error == EWOULDBLOCK)
735 error = 0;
736#define putbits(name, x) \
737 if (uap->name && (error2 = copyout(obits[x], uap->name, ncpbytes))) \
738 error = error2;
739 if (error == 0) {
740 int error2;
741
742 putbits(in, 0);
743 putbits(ou, 1);
744 putbits(ex, 2);
745#undef putbits
746 }
747 if (selbits != &s_selbits[0])
748 free(selbits, M_SELECT);
749 return (error);
750}
751
752static int
90b9818c 753selscan(struct proc *p, fd_mask **ibits, fd_mask **obits, int nfd, int *res)
984263bc 754{
dadab5e9 755 struct thread *td = p->p_thread;
984263bc
MD
756 struct filedesc *fdp = p->p_fd;
757 int msk, i, fd;
758 fd_mask bits;
759 struct file *fp;
760 int n = 0;
761 /* Note: backend also returns POLLHUP/POLLERR if appropriate. */
762 static int flag[3] = { POLLRDNORM, POLLWRNORM, POLLRDBAND };
763
764 for (msk = 0; msk < 3; msk++) {
765 if (ibits[msk] == NULL)
766 continue;
767 for (i = 0; i < nfd; i += NFDBITS) {
768 bits = ibits[msk][i/NFDBITS];
769 /* ffs(int mask) not portable, fd_mask is long */
770 for (fd = i; bits && fd < nfd; fd++, bits >>= 1) {
771 if (!(bits & 1))
772 continue;
773 fp = fdp->fd_ofiles[fd];
774 if (fp == NULL)
775 return (EBADF);
dadab5e9 776 if (fo_poll(fp, flag[msk], fp->f_cred, td)) {
984263bc
MD
777 obits[msk][(fd)/NFDBITS] |=
778 ((fd_mask)1 << ((fd) % NFDBITS));
779 n++;
780 }
781 }
782 }
783 }
90b9818c 784 *res = n;
984263bc
MD
785 return (0);
786}
787
788/*
789 * Poll system call.
790 */
984263bc 791int
41c20dac 792poll(struct poll_args *uap)
984263bc
MD
793{
794 caddr_t bits;
795 char smallbits[32 * sizeof(struct pollfd)];
796 struct timeval atv, rtv, ttv;
797 int s, ncoll, error = 0, timo;
798 u_int nfds;
799 size_t ni;
41c20dac 800 struct proc *p = curproc;
984263bc
MD
801
802 nfds = SCARG(uap, nfds);
803 /*
804 * This is kinda bogus. We have fd limits, but that is not
805 * really related to the size of the pollfd array. Make sure
806 * we let the process use at least FD_SETSIZE entries and at
807 * least enough for the current limits. We want to be reasonably
808 * safe, but not overly restrictive.
809 */
810 if (nfds > p->p_rlimit[RLIMIT_NOFILE].rlim_cur && nfds > FD_SETSIZE)
811 return (EINVAL);
812 ni = nfds * sizeof(struct pollfd);
813 if (ni > sizeof(smallbits))
814 bits = malloc(ni, M_TEMP, M_WAITOK);
815 else
816 bits = smallbits;
817 error = copyin(SCARG(uap, fds), bits, ni);
818 if (error)
819 goto done;
820 if (SCARG(uap, timeout) != INFTIM) {
821 atv.tv_sec = SCARG(uap, timeout) / 1000;
822 atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000;
823 if (itimerfix(&atv)) {
824 error = EINVAL;
825 goto done;
826 }
827 getmicrouptime(&rtv);
828 timevaladd(&atv, &rtv);
829 } else {
830 atv.tv_sec = 0;
831 atv.tv_usec = 0;
832 }
833 timo = 0;
834retry:
835 ncoll = nselcoll;
836 p->p_flag |= P_SELECT;
90b9818c
MD
837 error = pollscan(p, (struct pollfd *)bits, nfds, &uap->lmsg.u.ms_result);
838 if (error || uap->lmsg.u.ms_result)
984263bc
MD
839 goto done;
840 if (atv.tv_sec || atv.tv_usec) {
841 getmicrouptime(&rtv);
842 if (timevalcmp(&rtv, &atv, >=))
843 goto done;
844 ttv = atv;
845 timevalsub(&ttv, &rtv);
846 timo = ttv.tv_sec > 24 * 60 * 60 ?
847 24 * 60 * 60 * hz : tvtohz(&ttv);
848 }
849 s = splhigh();
850 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
851 splx(s);
852 goto retry;
853 }
854 p->p_flag &= ~P_SELECT;
377d4740 855 error = tsleep((caddr_t)&selwait, PCATCH, "poll", timo);
984263bc
MD
856 splx(s);
857 if (error == 0)
858 goto retry;
859done:
860 p->p_flag &= ~P_SELECT;
861 /* poll is not restarted after signals... */
862 if (error == ERESTART)
863 error = EINTR;
864 if (error == EWOULDBLOCK)
865 error = 0;
866 if (error == 0) {
867 error = copyout(bits, SCARG(uap, fds), ni);
868 if (error)
869 goto out;
870 }
871out:
872 if (ni > sizeof(smallbits))
873 free(bits, M_TEMP);
874 return (error);
875}
876
877static int
90b9818c 878pollscan(struct proc *p, struct pollfd *fds, u_int nfd, int *res)
984263bc 879{
dadab5e9
MD
880 struct thread *td = p->p_thread;
881 struct filedesc *fdp = p->p_fd;
984263bc
MD
882 int i;
883 struct file *fp;
884 int n = 0;
885
886 for (i = 0; i < nfd; i++, fds++) {
887 if (fds->fd >= fdp->fd_nfiles) {
888 fds->revents = POLLNVAL;
889 n++;
890 } else if (fds->fd < 0) {
891 fds->revents = 0;
892 } else {
893 fp = fdp->fd_ofiles[fds->fd];
894 if (fp == NULL) {
895 fds->revents = POLLNVAL;
896 n++;
897 } else {
898 /*
899 * Note: backend also returns POLLHUP and
900 * POLLERR if appropriate.
901 */
902 fds->revents = fo_poll(fp, fds->events,
dadab5e9 903 fp->f_cred, td);
984263bc
MD
904 if (fds->revents != 0)
905 n++;
906 }
907 }
908 }
90b9818c 909 *res = n;
984263bc
MD
910 return (0);
911}
912
913/*
914 * OpenBSD poll system call.
915 * XXX this isn't quite a true representation.. OpenBSD uses select ops.
916 */
984263bc 917int
41c20dac 918openbsd_poll(struct openbsd_poll_args *uap)
984263bc 919{
41c20dac 920 return (poll((struct poll_args *)uap));
984263bc
MD
921}
922
923/*ARGSUSED*/
924int
41c20dac 925seltrue(dev_t dev, int events, struct thread *td)
984263bc 926{
984263bc
MD
927 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
928}
929
930/*
41c20dac
MD
931 * Record a select request. A global wait must be used since a process/thread
932 * might go away after recording its request.
984263bc
MD
933 */
934void
41c20dac 935selrecord(struct thread *selector, struct selinfo *sip)
984263bc
MD
936{
937 struct proc *p;
938 pid_t mypid;
939
41c20dac
MD
940 if ((p = selector->td_proc) == NULL)
941 panic("selrecord: thread needs a process");
942
943 mypid = p->p_pid;
984263bc
MD
944 if (sip->si_pid == mypid)
945 return;
946 if (sip->si_pid && (p = pfind(sip->si_pid)) &&
41c20dac 947 p->p_wchan == (caddr_t)&selwait) {
984263bc 948 sip->si_flags |= SI_COLL;
41c20dac 949 } else {
984263bc 950 sip->si_pid = mypid;
41c20dac 951 }
984263bc
MD
952}
953
954/*
955 * Do a wakeup when a selectable event occurs.
956 */
957void
41c20dac 958selwakeup(struct selinfo *sip)
984263bc 959{
41c20dac 960 struct proc *p;
984263bc
MD
961 int s;
962
963 if (sip->si_pid == 0)
964 return;
965 if (sip->si_flags & SI_COLL) {
966 nselcoll++;
967 sip->si_flags &= ~SI_COLL;
41c20dac 968 wakeup((caddr_t)&selwait); /* YYY fixable */
984263bc
MD
969 }
970 p = pfind(sip->si_pid);
971 sip->si_pid = 0;
972 if (p != NULL) {
973 s = splhigh();
974 if (p->p_wchan == (caddr_t)&selwait) {
975 if (p->p_stat == SSLEEP)
976 setrunnable(p);
977 else
0cfcada1 978 unsleep(p->p_thread);
984263bc
MD
979 } else if (p->p_flag & P_SELECT)
980 p->p_flag &= ~P_SELECT;
981 splx(s);
982 }
983}
41c20dac 984