Merge from vendor branch OPENSSH:
[dragonfly.git] / sys / kern / sys_generic.c
CommitLineData
984263bc
MD
1/*
2 * Copyright (c) 1982, 1986, 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94
39 * $FreeBSD: src/sys/kern/sys_generic.c,v 1.55.2.10 2001/03/17 10:39:32 peter Exp $
b13267a5 40 * $DragonFly: src/sys/kern/sys_generic.c,v 1.37 2006/09/10 01:26:39 dillon Exp $
984263bc
MD
41 */
42
43#include "opt_ktrace.h"
44
45#include <sys/param.h>
46#include <sys/systm.h>
47#include <sys/sysproto.h>
48#include <sys/filedesc.h>
49#include <sys/filio.h>
50#include <sys/fcntl.h>
51#include <sys/file.h>
52#include <sys/proc.h>
53#include <sys/signalvar.h>
54#include <sys/socketvar.h>
55#include <sys/uio.h>
56#include <sys/kernel.h>
ba023347 57#include <sys/kern_syscall.h>
984263bc 58#include <sys/malloc.h>
a0c5fc96 59#include <sys/mapped_ioctl.h>
984263bc 60#include <sys/poll.h>
a0c5fc96 61#include <sys/queue.h>
984263bc
MD
62#include <sys/resourcevar.h>
63#include <sys/sysctl.h>
64#include <sys/sysent.h>
65#include <sys/buf.h>
66#ifdef KTRACE
67#include <sys/ktrace.h>
68#endif
69#include <vm/vm.h>
70#include <vm/vm_page.h>
dadab5e9 71#include <sys/file2.h>
984263bc
MD
72
73#include <machine/limits.h>
74
75static MALLOC_DEFINE(M_IOCTLOPS, "ioctlops", "ioctl data buffer");
a0c5fc96 76static MALLOC_DEFINE(M_IOCTLMAP, "ioctlmap", "mapped ioctl handler buffer");
984263bc
MD
77static MALLOC_DEFINE(M_SELECT, "select", "select() buffer");
78MALLOC_DEFINE(M_IOV, "iov", "large iov's");
79
402ed7e1
RG
80static int pollscan (struct proc *, struct pollfd *, u_int, int *);
81static int selscan (struct proc *, fd_mask **, fd_mask **,
82 int, int *);
7f83ed38
MD
83static int dofileread(int, struct file *, struct uio *, int, int *);
84static int dofilewrite(int, struct file *, struct uio *, int, int *);
85
984263bc
MD
86/*
87 * Read system call.
f832287e
MD
88 *
89 * MPSAFE
984263bc 90 */
984263bc 91int
753fd850 92sys_read(struct read_args *uap)
984263bc 93{
dadab5e9 94 struct thread *td = curthread;
ba023347
DRJ
95 struct uio auio;
96 struct iovec aiov;
984263bc
MD
97 int error;
98
ba023347
DRJ
99 aiov.iov_base = uap->buf;
100 aiov.iov_len = uap->nbyte;
101 auio.uio_iov = &aiov;
102 auio.uio_iovcnt = 1;
103 auio.uio_offset = -1;
104 auio.uio_resid = uap->nbyte;
105 auio.uio_rw = UIO_READ;
106 auio.uio_segflg = UIO_USERSPACE;
107 auio.uio_td = td;
108
ef5c76d7
MD
109 if (auio.uio_resid < 0)
110 error = EINVAL;
111 else
112 error = kern_preadv(uap->fd, &auio, 0, &uap->sysmsg_result);
984263bc
MD
113 return(error);
114}
115
116/*
7f83ed38 117 * Positioned (Pread) read system call
f832287e
MD
118 *
119 * MPSAFE
984263bc 120 */
984263bc 121int
9ba76b73 122sys___pread(struct __pread_args *uap)
984263bc 123{
dadab5e9 124 struct thread *td = curthread;
ba023347
DRJ
125 struct uio auio;
126 struct iovec aiov;
984263bc 127 int error;
9ba76b73 128 int flags;
984263bc 129
ba023347
DRJ
130 aiov.iov_base = uap->buf;
131 aiov.iov_len = uap->nbyte;
132 auio.uio_iov = &aiov;
133 auio.uio_iovcnt = 1;
134 auio.uio_offset = uap->offset;
135 auio.uio_resid = uap->nbyte;
136 auio.uio_rw = UIO_READ;
137 auio.uio_segflg = UIO_USERSPACE;
138 auio.uio_td = td;
139
9ba76b73
MD
140 flags = uap->flags & O_FMASK;
141 if (uap->offset != (off_t)-1)
142 flags |= O_FOFFSET;
143
ef5c76d7
MD
144 if (auio.uio_resid < 0)
145 error = EINVAL;
146 else
9ba76b73 147 error = kern_preadv(uap->fd, &auio, flags, &uap->sysmsg_result);
984263bc
MD
148 return(error);
149}
150
7f83ed38
MD
151/*
152 * Scatter read system call.
f832287e
MD
153 *
154 * MPSAFE
7f83ed38 155 */
984263bc 156int
753fd850 157sys_readv(struct readv_args *uap)
984263bc 158{
dadab5e9 159 struct thread *td = curthread;
984263bc 160 struct uio auio;
ba023347
DRJ
161 struct iovec aiov[UIO_SMALLIOV], *iov = NULL;
162 int error;
984263bc 163
ba023347 164 error = iovec_copyin(uap->iovp, &iov, aiov, uap->iovcnt,
ef5c76d7 165 &auio.uio_resid);
ba023347
DRJ
166 if (error)
167 return (error);
168 auio.uio_iov = iov;
169 auio.uio_iovcnt = uap->iovcnt;
170 auio.uio_offset = -1;
984263bc
MD
171 auio.uio_rw = UIO_READ;
172 auio.uio_segflg = UIO_USERSPACE;
dadab5e9 173 auio.uio_td = td;
984263bc 174
7f83ed38 175 error = kern_preadv(uap->fd, &auio, 0, &uap->sysmsg_result);
ba023347
DRJ
176
177 iovec_free(&iov, aiov);
984263bc
MD
178 return (error);
179}
180
7f83ed38
MD
181
182/*
183 * Scatter positioned read system call.
f832287e
MD
184 *
185 * MPSAFE
7f83ed38
MD
186 */
187int
9ba76b73 188sys___preadv(struct __preadv_args *uap)
7f83ed38
MD
189{
190 struct thread *td = curthread;
191 struct uio auio;
192 struct iovec aiov[UIO_SMALLIOV], *iov = NULL;
193 int error;
9ba76b73 194 int flags;
7f83ed38
MD
195
196 error = iovec_copyin(uap->iovp, &iov, aiov, uap->iovcnt,
ef5c76d7 197 &auio.uio_resid);
7f83ed38
MD
198 if (error)
199 return (error);
200 auio.uio_iov = iov;
201 auio.uio_iovcnt = uap->iovcnt;
202 auio.uio_offset = uap->offset;
203 auio.uio_rw = UIO_READ;
204 auio.uio_segflg = UIO_USERSPACE;
205 auio.uio_td = td;
206
9ba76b73
MD
207 flags = uap->flags & O_FMASK;
208 if (uap->offset != (off_t)-1)
209 flags |= O_FOFFSET;
210
211 error = kern_preadv(uap->fd, &auio, flags, &uap->sysmsg_result);
7f83ed38
MD
212
213 iovec_free(&iov, aiov);
214 return(error);
215}
216
f832287e
MD
217/*
218 * MPSAFE
219 */
984263bc 220int
7f83ed38 221kern_preadv(int fd, struct uio *auio, int flags, int *res)
984263bc 222{
dadab5e9
MD
223 struct thread *td = curthread;
224 struct proc *p = td->td_proc;
41c20dac 225 struct file *fp;
7f83ed38 226 int error;
984263bc 227
ba023347
DRJ
228 KKASSERT(p);
229
228b401d 230 fp = holdfp(p->p_fd, fd, FREAD);
ba023347 231 if (fp == NULL)
984263bc 232 return (EBADF);
9ba76b73 233 if (flags & O_FOFFSET && fp->f_type != DTYPE_VNODE) {
ba023347 234 error = ESPIPE;
7f83ed38 235 } else if (auio->uio_resid < 0) {
ba023347 236 error = EINVAL;
7f83ed38
MD
237 } else {
238 error = dofileread(fd, fp, auio, flags, res);
984263bc 239 }
9f87144f 240 fdrop(fp);
7f83ed38
MD
241 return(error);
242}
243
244/*
245 * Common code for readv and preadv that reads data in
246 * from a file using the passed in uio, offset, and flags.
f832287e
MD
247 *
248 * MPALMOSTSAFE - ktrace needs help
7f83ed38
MD
249 */
250static int
251dofileread(int fd, struct file *fp, struct uio *auio, int flags, int *res)
252{
253 struct thread *td = curthread;
254 struct proc *p = td->td_proc;
7f83ed38 255 int error;
ef5c76d7 256 int len;
7f83ed38
MD
257#ifdef KTRACE
258 struct iovec *ktriov = NULL;
259 struct uio ktruio;
260#endif
261
984263bc
MD
262#ifdef KTRACE
263 /*
264 * if tracing, save a copy of iovec
265 */
dadab5e9 266 if (KTRPOINT(td, KTR_GENIO)) {
ba023347
DRJ
267 int iovlen = auio->uio_iovcnt * sizeof(struct iovec);
268
984263bc 269 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
ba023347
DRJ
270 bcopy((caddr_t)auio->uio_iov, (caddr_t)ktriov, iovlen);
271 ktruio = *auio;
984263bc
MD
272 }
273#endif
ba023347 274 len = auio->uio_resid;
87de5057 275 error = fo_read(fp, auio, fp->f_cred, flags);
ba023347
DRJ
276 if (error) {
277 if (auio->uio_resid != len && (error == ERESTART ||
984263bc
MD
278 error == EINTR || error == EWOULDBLOCK))
279 error = 0;
280 }
984263bc
MD
281#ifdef KTRACE
282 if (ktriov != NULL) {
283 if (error == 0) {
284 ktruio.uio_iov = ktriov;
ba023347 285 ktruio.uio_resid = len - auio->uio_resid;
f832287e 286 get_mplock();
a9b80e23 287 ktrgenio(p, fd, UIO_READ, &ktruio, error);
f832287e 288 rel_mplock();
984263bc
MD
289 }
290 FREE(ktriov, M_TEMP);
291 }
292#endif
ba023347
DRJ
293 if (error == 0)
294 *res = len - auio->uio_resid;
7f83ed38
MD
295
296 return(error);
984263bc
MD
297}
298
299/*
300 * Write system call
f832287e
MD
301 *
302 * MPSAFE
984263bc 303 */
984263bc 304int
753fd850 305sys_write(struct write_args *uap)
984263bc 306{
dadab5e9 307 struct thread *td = curthread;
ba023347
DRJ
308 struct uio auio;
309 struct iovec aiov;
984263bc
MD
310 int error;
311
ba023347
DRJ
312 aiov.iov_base = (void *)(uintptr_t)uap->buf;
313 aiov.iov_len = uap->nbyte;
314 auio.uio_iov = &aiov;
315 auio.uio_iovcnt = 1;
316 auio.uio_offset = -1;
317 auio.uio_resid = uap->nbyte;
318 auio.uio_rw = UIO_WRITE;
319 auio.uio_segflg = UIO_USERSPACE;
320 auio.uio_td = td;
321
ef5c76d7
MD
322 if (auio.uio_resid < 0)
323 error = EINVAL;
324 else
325 error = kern_pwritev(uap->fd, &auio, 0, &uap->sysmsg_result);
dadab5e9 326
984263bc
MD
327 return(error);
328}
329
330/*
331 * Pwrite system call
f832287e
MD
332 *
333 * MPSAFE
984263bc 334 */
984263bc 335int
9ba76b73 336sys___pwrite(struct __pwrite_args *uap)
984263bc 337{
dadab5e9 338 struct thread *td = curthread;
ba023347
DRJ
339 struct uio auio;
340 struct iovec aiov;
984263bc 341 int error;
9ba76b73 342 int flags;
984263bc 343
ba023347
DRJ
344 aiov.iov_base = (void *)(uintptr_t)uap->buf;
345 aiov.iov_len = uap->nbyte;
346 auio.uio_iov = &aiov;
347 auio.uio_iovcnt = 1;
348 auio.uio_offset = uap->offset;
349 auio.uio_resid = uap->nbyte;
350 auio.uio_rw = UIO_WRITE;
351 auio.uio_segflg = UIO_USERSPACE;
352 auio.uio_td = td;
353
9ba76b73
MD
354 flags = uap->flags & O_FMASK;
355 if (uap->offset != (off_t)-1)
356 flags |= O_FOFFSET;
357
ef5c76d7
MD
358 if (auio.uio_resid < 0)
359 error = EINVAL;
360 else
9ba76b73 361 error = kern_pwritev(uap->fd, &auio, flags, &uap->sysmsg_result);
ba023347 362
984263bc
MD
363 return(error);
364}
365
f832287e
MD
366/*
367 * MPSAFE
368 */
ba023347 369int
753fd850 370sys_writev(struct writev_args *uap)
ba023347 371{
dadab5e9 372 struct thread *td = curthread;
984263bc 373 struct uio auio;
ba023347
DRJ
374 struct iovec aiov[UIO_SMALLIOV], *iov = NULL;
375 int error;
984263bc 376
ba023347 377 error = iovec_copyin(uap->iovp, &iov, aiov, uap->iovcnt,
ef5c76d7 378 &auio.uio_resid);
ba023347
DRJ
379 if (error)
380 return (error);
381 auio.uio_iov = iov;
382 auio.uio_iovcnt = uap->iovcnt;
383 auio.uio_offset = -1;
984263bc
MD
384 auio.uio_rw = UIO_WRITE;
385 auio.uio_segflg = UIO_USERSPACE;
dadab5e9 386 auio.uio_td = td;
ba023347 387
7f83ed38 388 error = kern_pwritev(uap->fd, &auio, 0, &uap->sysmsg_result);
ba023347
DRJ
389
390 iovec_free(&iov, aiov);
984263bc
MD
391 return (error);
392}
393
7f83ed38 394
984263bc 395/*
7f83ed38 396 * Gather positioned write system call
f832287e
MD
397 *
398 * MPSAFE
984263bc 399 */
984263bc 400int
9ba76b73 401sys___pwritev(struct __pwritev_args *uap)
7f83ed38
MD
402{
403 struct thread *td = curthread;
404 struct uio auio;
405 struct iovec aiov[UIO_SMALLIOV], *iov = NULL;
406 int error;
9ba76b73 407 int flags;
7f83ed38
MD
408
409 error = iovec_copyin(uap->iovp, &iov, aiov, uap->iovcnt,
ef5c76d7 410 &auio.uio_resid);
7f83ed38
MD
411 if (error)
412 return (error);
413 auio.uio_iov = iov;
414 auio.uio_iovcnt = uap->iovcnt;
415 auio.uio_offset = uap->offset;
416 auio.uio_rw = UIO_WRITE;
417 auio.uio_segflg = UIO_USERSPACE;
418 auio.uio_td = td;
419
9ba76b73
MD
420 flags = uap->flags & O_FMASK;
421 if (uap->offset != (off_t)-1)
422 flags |= O_FOFFSET;
423
424 error = kern_pwritev(uap->fd, &auio, flags, &uap->sysmsg_result);
7f83ed38
MD
425
426 iovec_free(&iov, aiov);
427 return(error);
428}
429
f832287e
MD
430/*
431 * MPSAFE
432 */
7f83ed38
MD
433int
434kern_pwritev(int fd, struct uio *auio, int flags, int *res)
984263bc 435{
dadab5e9
MD
436 struct thread *td = curthread;
437 struct proc *p = td->td_proc;
41c20dac 438 struct file *fp;
7f83ed38 439 int error;
984263bc 440
dadab5e9 441 KKASSERT(p);
dadab5e9 442
228b401d 443 fp = holdfp(p->p_fd, fd, FWRITE);
ba023347 444 if (fp == NULL)
984263bc 445 return (EBADF);
9ba76b73 446 else if ((flags & O_FOFFSET) && fp->f_type != DTYPE_VNODE) {
ba023347 447 error = ESPIPE;
7f83ed38
MD
448 } else {
449 error = dofilewrite(fd, fp, auio, flags, res);
984263bc 450 }
7f83ed38 451
9f87144f 452 fdrop(fp);
7f83ed38
MD
453 return (error);
454}
455
456/*
457 * Common code for writev and pwritev that writes data to
458 * a file using the passed in uio, offset, and flags.
f832287e
MD
459 *
460 * MPALMOSTSAFE - ktrace needs help
7f83ed38
MD
461 */
462static int
463dofilewrite(int fd, struct file *fp, struct uio *auio, int flags, int *res)
464{
465 struct thread *td = curthread;
466 struct proc *p = td->td_proc;
7f83ed38 467 int error;
ef5c76d7 468 int len;
7f83ed38
MD
469#ifdef KTRACE
470 struct iovec *ktriov = NULL;
471 struct uio ktruio;
472#endif
473
984263bc
MD
474#ifdef KTRACE
475 /*
476 * if tracing, save a copy of iovec and uio
477 */
dadab5e9 478 if (KTRPOINT(td, KTR_GENIO)) {
ba023347
DRJ
479 int iovlen = auio->uio_iovcnt * sizeof(struct iovec);
480
984263bc 481 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
ba023347
DRJ
482 bcopy((caddr_t)auio->uio_iov, (caddr_t)ktriov, iovlen);
483 ktruio = *auio;
984263bc
MD
484 }
485#endif
ba023347 486 len = auio->uio_resid;
984263bc
MD
487 if (fp->f_type == DTYPE_VNODE)
488 bwillwrite();
87de5057 489 error = fo_write(fp, auio, fp->f_cred, flags);
ba023347
DRJ
490 if (error) {
491 if (auio->uio_resid != len && (error == ERESTART ||
984263bc
MD
492 error == EINTR || error == EWOULDBLOCK))
493 error = 0;
7f83ed38 494 /* Socket layer is responsible for issuing SIGPIPE. */
f832287e
MD
495 if (error == EPIPE) {
496 get_mplock();
84204577 497 ksignal(p, SIGPIPE);
f832287e
MD
498 rel_mplock();
499 }
984263bc 500 }
984263bc
MD
501#ifdef KTRACE
502 if (ktriov != NULL) {
503 if (error == 0) {
504 ktruio.uio_iov = ktriov;
ba023347 505 ktruio.uio_resid = len - auio->uio_resid;
f832287e 506 get_mplock();
a9b80e23 507 ktrgenio(p, fd, UIO_WRITE, &ktruio, error);
f832287e 508 rel_mplock();
984263bc
MD
509 }
510 FREE(ktriov, M_TEMP);
511 }
512#endif
ba023347
DRJ
513 if (error == 0)
514 *res = len - auio->uio_resid;
7f83ed38
MD
515
516 return(error);
984263bc
MD
517}
518
519/*
520 * Ioctl system call
521 */
984263bc
MD
522/* ARGSUSED */
523int
753fd850 524sys_ioctl(struct ioctl_args *uap)
a0c5fc96
JS
525{
526 return(mapped_ioctl(uap->fd, uap->com, uap->data, NULL));
527}
528
529struct ioctl_map_entry {
530 const char *subsys;
531 struct ioctl_map_range *cmd_ranges;
532 LIST_ENTRY(ioctl_map_entry) entries;
533};
534
25b5b94d
SS
535/*
536 * The true heart of all ioctl syscall handlers (native, emulation).
537 * If map != NULL, it will be searched for a matching entry for com,
538 * and appropriate conversions/conversion functions will be utilized.
539 */
a0c5fc96
JS
540int
541mapped_ioctl(int fd, u_long com, caddr_t uspc_data, struct ioctl_map *map)
984263bc 542{
dadab5e9
MD
543 struct thread *td = curthread;
544 struct proc *p = td->td_proc;
87de5057 545 struct ucred *cred;
41c20dac 546 struct file *fp;
a0c5fc96 547 struct ioctl_map_range *iomc = NULL;
984263bc 548 int error;
1fd87d54 549 u_int size;
a0c5fc96 550 u_long ocom = com;
984263bc
MD
551 caddr_t data, memp;
552 int tmp;
553#define STK_PARAMS 128
554 union {
555 char stkbuf[STK_PARAMS];
556 long align;
557 } ubuf;
558
dadab5e9 559 KKASSERT(p);
87de5057 560 cred = p->p_ucred;
984263bc 561
228b401d
MD
562 fp = holdfp(p->p_fd, fd, FREAD|FWRITE);
563 if (fp == NULL)
a0c5fc96
JS
564 return(EBADF);
565
566 if (map != NULL) { /* obey translation map */
567 u_long maskcmd;
568 struct ioctl_map_entry *e;
569
570 maskcmd = com & map->mask;
571
572 LIST_FOREACH(e, &map->mapping, entries) {
573 for (iomc = e->cmd_ranges; iomc->start != 0 ||
25b5b94d
SS
574 iomc->maptocmd != 0 || iomc->wrapfunc != NULL ||
575 iomc->mapfunc != NULL;
a0c5fc96
JS
576 iomc++) {
577 if (maskcmd >= iomc->start &&
578 maskcmd <= iomc->end)
579 break;
580 }
581
582 /* Did we find a match? */
583 if (iomc->start != 0 || iomc->maptocmd != 0 ||
25b5b94d 584 iomc->wrapfunc != NULL || iomc->mapfunc != NULL)
a0c5fc96
JS
585 break;
586 }
587
588 if (iomc == NULL ||
589 (iomc->start == 0 && iomc->maptocmd == 0
25b5b94d 590 && iomc->wrapfunc == NULL && iomc->mapfunc == NULL)) {
a0c5fc96
JS
591 printf("%s: 'ioctl' fd=%d, cmd=0x%lx ('%c',%d) not implemented\n",
592 map->sys, fd, maskcmd,
593 (int)((maskcmd >> 8) & 0xff),
594 (int)(maskcmd & 0xff));
228b401d
MD
595 error = EINVAL;
596 goto done;
a0c5fc96 597 }
984263bc 598
25b5b94d
SS
599 /*
600 * If it's a non-range one to one mapping, maptocmd should be
601 * correct. If it's a ranged one to one mapping, we pass the
602 * original value of com, and for a range mapped to a different
603 * range, we always need a mapping function to translate the
604 * ioctl to our native ioctl. Ex. 6500-65ff <-> 9500-95ff
605 */
606 if (iomc->start == iomc->end && iomc->maptocmd == iomc->maptoend) {
607 com = iomc->maptocmd;
608 } else if (iomc->start == iomc->maptocmd && iomc->end == iomc->maptoend) {
609 if (iomc->mapfunc != NULL)
610 com = iomc->mapfunc(iomc->start, iomc->end,
611 iomc->start, iomc->end,
612 com, com);
613 } else {
614 if (iomc->mapfunc != NULL) {
615 com = iomc->mapfunc(iomc->start, iomc->end,
616 iomc->maptocmd, iomc->maptoend,
617 com, ocom);
618 } else {
619 printf("%s: Invalid mapping for fd=%d, cmd=%#lx ('%c',%d)\n",
620 map->sys, fd, maskcmd,
621 (int)((maskcmd >> 8) & 0xff),
622 (int)(maskcmd & 0xff));
228b401d
MD
623 error = EINVAL;
624 goto done;
25b5b94d
SS
625 }
626 }
a0c5fc96
JS
627 }
628
629 switch (com) {
984263bc 630 case FIONCLEX:
228b401d
MD
631 error = fclrfdflags(p->p_fd, fd, UF_EXCLOSE);
632 goto done;
984263bc 633 case FIOCLEX:
228b401d
MD
634 error = fsetfdflags(p->p_fd, fd, UF_EXCLOSE);
635 goto done;
984263bc
MD
636 }
637
638 /*
639 * Interpret high order word to find amount of data to be
640 * copied to/from the user's address space.
641 */
642 size = IOCPARM_LEN(com);
228b401d
MD
643 if (size > IOCPARM_MAX) {
644 error = ENOTTY;
645 goto done;
646 }
984263bc
MD
647
648 memp = NULL;
649 if (size > sizeof (ubuf.stkbuf)) {
efda3bd0 650 memp = kmalloc(size, M_IOCTLOPS, M_WAITOK);
984263bc
MD
651 data = memp;
652 } else {
653 data = ubuf.stkbuf;
654 }
a0c5fc96
JS
655 if ((com & IOC_IN) != 0) {
656 if (size != 0) {
657 error = copyin(uspc_data, data, (u_int)size);
984263bc 658 if (error) {
a0c5fc96 659 if (memp != NULL)
efda3bd0 660 kfree(memp, M_IOCTLOPS);
228b401d 661 goto done;
984263bc
MD
662 }
663 } else {
a0c5fc96 664 *(caddr_t *)data = uspc_data;
984263bc 665 }
a0c5fc96 666 } else if ((com & IOC_OUT) != 0 && size) {
984263bc
MD
667 /*
668 * Zero the buffer so the user always
669 * gets back something deterministic.
670 */
671 bzero(data, size);
a0c5fc96
JS
672 } else if ((com & IOC_VOID) != 0) {
673 *(caddr_t *)data = uspc_data;
984263bc
MD
674 }
675
676 switch (com) {
984263bc
MD
677 case FIONBIO:
678 if ((tmp = *(int *)data))
679 fp->f_flag |= FNONBLOCK;
680 else
681 fp->f_flag &= ~FNONBLOCK;
9ba76b73 682 error = 0;
984263bc
MD
683 break;
684
685 case FIOASYNC:
686 if ((tmp = *(int *)data))
687 fp->f_flag |= FASYNC;
688 else
689 fp->f_flag &= ~FASYNC;
87de5057 690 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, cred);
984263bc
MD
691 break;
692
693 default:
a0c5fc96
JS
694 /*
695 * If there is a override function,
696 * call it instead of directly routing the call
697 */
25b5b94d 698 if (map != NULL && iomc->wrapfunc != NULL)
87de5057 699 error = iomc->wrapfunc(fp, com, ocom, data, cred);
a0c5fc96 700 else
87de5057 701 error = fo_ioctl(fp, com, data, cred);
984263bc
MD
702 /*
703 * Copy any data to user, size was
704 * already set and checked above.
705 */
a0c5fc96
JS
706 if (error == 0 && (com & IOC_OUT) != 0 && size != 0)
707 error = copyout(data, uspc_data, (u_int)size);
984263bc
MD
708 break;
709 }
a0c5fc96 710 if (memp != NULL)
efda3bd0 711 kfree(memp, M_IOCTLOPS);
228b401d 712done:
9f87144f 713 fdrop(fp);
a0c5fc96
JS
714 return(error);
715}
716
717int
718mapped_ioctl_register_handler(struct ioctl_map_handler *he)
719{
720 struct ioctl_map_entry *ne;
721
722 KKASSERT(he != NULL && he->map != NULL && he->cmd_ranges != NULL &&
723 he->subsys != NULL && *he->subsys != '\0');
724
efda3bd0 725 ne = kmalloc(sizeof(struct ioctl_map_entry), M_IOCTLMAP, M_WAITOK);
a0c5fc96
JS
726
727 ne->subsys = he->subsys;
728 ne->cmd_ranges = he->cmd_ranges;
729
730 LIST_INSERT_HEAD(&he->map->mapping, ne, entries);
731
732 return(0);
733}
734
735int
736mapped_ioctl_unregister_handler(struct ioctl_map_handler *he)
737{
738 struct ioctl_map_entry *ne;
739
740 KKASSERT(he != NULL && he->map != NULL && he->cmd_ranges != NULL);
741
742 LIST_FOREACH(ne, &he->map->mapping, entries) {
743 if (ne->cmd_ranges != he->cmd_ranges)
744 continue;
745 LIST_REMOVE(ne, entries);
efda3bd0 746 kfree(ne, M_IOCTLMAP);
a0c5fc96
JS
747 return(0);
748 }
749 return(EINVAL);
984263bc
MD
750}
751
752static int nselcoll; /* Select collisions since boot */
753int selwait;
754SYSCTL_INT(_kern, OID_AUTO, nselcoll, CTLFLAG_RD, &nselcoll, 0, "");
755
756/*
757 * Select system call.
758 */
984263bc 759int
753fd850 760sys_select(struct select_args *uap)
984263bc 761{
41c20dac
MD
762 struct proc *p = curproc;
763
984263bc
MD
764 /*
765 * The magic 2048 here is chosen to be just enough for FD_SETSIZE
766 * infds with the new FD_SETSIZE of 1024, and more than enough for
767 * FD_SETSIZE infds, outfds and exceptfds with the old FD_SETSIZE
768 * of 256.
769 */
770 fd_mask s_selbits[howmany(2048, NFDBITS)];
771 fd_mask *ibits[3], *obits[3], *selbits, *sbp;
772 struct timeval atv, rtv, ttv;
e43a034f 773 int ncoll, error, timo;
984263bc
MD
774 u_int nbufbytes, ncpbytes, nfdbits;
775
776 if (uap->nd < 0)
777 return (EINVAL);
778 if (uap->nd > p->p_fd->fd_nfiles)
779 uap->nd = p->p_fd->fd_nfiles; /* forgiving; slightly wrong */
780
781 /*
782 * Allocate just enough bits for the non-null fd_sets. Use the
783 * preallocated auto buffer if possible.
784 */
785 nfdbits = roundup(uap->nd, NFDBITS);
786 ncpbytes = nfdbits / NBBY;
787 nbufbytes = 0;
788 if (uap->in != NULL)
789 nbufbytes += 2 * ncpbytes;
790 if (uap->ou != NULL)
791 nbufbytes += 2 * ncpbytes;
792 if (uap->ex != NULL)
793 nbufbytes += 2 * ncpbytes;
794 if (nbufbytes <= sizeof s_selbits)
795 selbits = &s_selbits[0];
796 else
efda3bd0 797 selbits = kmalloc(nbufbytes, M_SELECT, M_WAITOK);
984263bc
MD
798
799 /*
800 * Assign pointers into the bit buffers and fetch the input bits.
801 * Put the output buffers together so that they can be bzeroed
802 * together.
803 */
804 sbp = selbits;
805#define getbits(name, x) \
806 do { \
807 if (uap->name == NULL) \
808 ibits[x] = NULL; \
809 else { \
810 ibits[x] = sbp + nbufbytes / 2 / sizeof *sbp; \
811 obits[x] = sbp; \
812 sbp += ncpbytes / sizeof *sbp; \
813 error = copyin(uap->name, ibits[x], ncpbytes); \
814 if (error != 0) \
815 goto done; \
816 } \
817 } while (0)
818 getbits(in, 0);
819 getbits(ou, 1);
820 getbits(ex, 2);
821#undef getbits
822 if (nbufbytes != 0)
823 bzero(selbits, nbufbytes / 2);
824
825 if (uap->tv) {
826 error = copyin((caddr_t)uap->tv, (caddr_t)&atv,
827 sizeof (atv));
828 if (error)
829 goto done;
830 if (itimerfix(&atv)) {
831 error = EINVAL;
832 goto done;
833 }
834 getmicrouptime(&rtv);
835 timevaladd(&atv, &rtv);
836 } else {
837 atv.tv_sec = 0;
838 atv.tv_usec = 0;
839 }
840 timo = 0;
841retry:
842 ncoll = nselcoll;
843 p->p_flag |= P_SELECT;
c7114eea
MD
844 error = selscan(p, ibits, obits, uap->nd, &uap->sysmsg_result);
845 if (error || uap->sysmsg_result)
984263bc
MD
846 goto done;
847 if (atv.tv_sec || atv.tv_usec) {
848 getmicrouptime(&rtv);
849 if (timevalcmp(&rtv, &atv, >=))
850 goto done;
851 ttv = atv;
852 timevalsub(&ttv, &rtv);
853 timo = ttv.tv_sec > 24 * 60 * 60 ?
a94976ad 854 24 * 60 * 60 * hz : tvtohz_high(&ttv);
984263bc 855 }
e43a034f 856 crit_enter();
984263bc 857 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
e43a034f 858 crit_exit();
984263bc
MD
859 goto retry;
860 }
861 p->p_flag &= ~P_SELECT;
862
377d4740 863 error = tsleep((caddr_t)&selwait, PCATCH, "select", timo);
984263bc 864
e43a034f 865 crit_exit();
984263bc
MD
866 if (error == 0)
867 goto retry;
868done:
869 p->p_flag &= ~P_SELECT;
870 /* select is not restarted after signals... */
871 if (error == ERESTART)
872 error = EINTR;
873 if (error == EWOULDBLOCK)
874 error = 0;
875#define putbits(name, x) \
876 if (uap->name && (error2 = copyout(obits[x], uap->name, ncpbytes))) \
877 error = error2;
878 if (error == 0) {
879 int error2;
880
881 putbits(in, 0);
882 putbits(ou, 1);
883 putbits(ex, 2);
884#undef putbits
885 }
886 if (selbits != &s_selbits[0])
efda3bd0 887 kfree(selbits, M_SELECT);
984263bc
MD
888 return (error);
889}
890
891static int
90b9818c 892selscan(struct proc *p, fd_mask **ibits, fd_mask **obits, int nfd, int *res)
984263bc 893{
984263bc
MD
894 int msk, i, fd;
895 fd_mask bits;
896 struct file *fp;
897 int n = 0;
898 /* Note: backend also returns POLLHUP/POLLERR if appropriate. */
899 static int flag[3] = { POLLRDNORM, POLLWRNORM, POLLRDBAND };
900
901 for (msk = 0; msk < 3; msk++) {
902 if (ibits[msk] == NULL)
903 continue;
904 for (i = 0; i < nfd; i += NFDBITS) {
905 bits = ibits[msk][i/NFDBITS];
906 /* ffs(int mask) not portable, fd_mask is long */
907 for (fd = i; bits && fd < nfd; fd++, bits >>= 1) {
908 if (!(bits & 1))
909 continue;
228b401d 910 fp = holdfp(p->p_fd, fd, -1);
984263bc
MD
911 if (fp == NULL)
912 return (EBADF);
87de5057 913 if (fo_poll(fp, flag[msk], fp->f_cred)) {
984263bc
MD
914 obits[msk][(fd)/NFDBITS] |=
915 ((fd_mask)1 << ((fd) % NFDBITS));
916 n++;
917 }
228b401d 918 fdrop(fp);
984263bc
MD
919 }
920 }
921 }
90b9818c 922 *res = n;
984263bc
MD
923 return (0);
924}
925
926/*
927 * Poll system call.
928 */
984263bc 929int
753fd850 930sys_poll(struct poll_args *uap)
984263bc 931{
b525b7ed
DR
932 struct pollfd *bits;
933 struct pollfd smallbits[32];
984263bc 934 struct timeval atv, rtv, ttv;
e43a034f 935 int ncoll, error = 0, timo;
984263bc
MD
936 u_int nfds;
937 size_t ni;
41c20dac 938 struct proc *p = curproc;
984263bc 939
ab2eb4eb 940 nfds = uap->nfds;
984263bc
MD
941 /*
942 * This is kinda bogus. We have fd limits, but that is not
943 * really related to the size of the pollfd array. Make sure
944 * we let the process use at least FD_SETSIZE entries and at
945 * least enough for the current limits. We want to be reasonably
946 * safe, but not overly restrictive.
947 */
948 if (nfds > p->p_rlimit[RLIMIT_NOFILE].rlim_cur && nfds > FD_SETSIZE)
949 return (EINVAL);
950 ni = nfds * sizeof(struct pollfd);
951 if (ni > sizeof(smallbits))
efda3bd0 952 bits = kmalloc(ni, M_TEMP, M_WAITOK);
984263bc
MD
953 else
954 bits = smallbits;
ab2eb4eb 955 error = copyin(uap->fds, bits, ni);
984263bc
MD
956 if (error)
957 goto done;
ab2eb4eb
DR
958 if (uap->timeout != INFTIM) {
959 atv.tv_sec = uap->timeout / 1000;
960 atv.tv_usec = (uap->timeout % 1000) * 1000;
984263bc
MD
961 if (itimerfix(&atv)) {
962 error = EINVAL;
963 goto done;
964 }
965 getmicrouptime(&rtv);
966 timevaladd(&atv, &rtv);
967 } else {
968 atv.tv_sec = 0;
969 atv.tv_usec = 0;
970 }
971 timo = 0;
972retry:
973 ncoll = nselcoll;
974 p->p_flag |= P_SELECT;
b525b7ed 975 error = pollscan(p, bits, nfds, &uap->sysmsg_result);
c7114eea 976 if (error || uap->sysmsg_result)
984263bc
MD
977 goto done;
978 if (atv.tv_sec || atv.tv_usec) {
979 getmicrouptime(&rtv);
980 if (timevalcmp(&rtv, &atv, >=))
981 goto done;
982 ttv = atv;
983 timevalsub(&ttv, &rtv);
984 timo = ttv.tv_sec > 24 * 60 * 60 ?
a94976ad 985 24 * 60 * 60 * hz : tvtohz_high(&ttv);
984263bc 986 }
e43a034f 987 crit_enter();
984263bc 988 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
e43a034f 989 crit_exit();
984263bc
MD
990 goto retry;
991 }
992 p->p_flag &= ~P_SELECT;
377d4740 993 error = tsleep((caddr_t)&selwait, PCATCH, "poll", timo);
e43a034f 994 crit_exit();
984263bc
MD
995 if (error == 0)
996 goto retry;
997done:
998 p->p_flag &= ~P_SELECT;
999 /* poll is not restarted after signals... */
1000 if (error == ERESTART)
1001 error = EINTR;
1002 if (error == EWOULDBLOCK)
1003 error = 0;
1004 if (error == 0) {
ab2eb4eb 1005 error = copyout(bits, uap->fds, ni);
984263bc
MD
1006 if (error)
1007 goto out;
1008 }
1009out:
1010 if (ni > sizeof(smallbits))
efda3bd0 1011 kfree(bits, M_TEMP);
984263bc
MD
1012 return (error);
1013}
1014
1015static int
90b9818c 1016pollscan(struct proc *p, struct pollfd *fds, u_int nfd, int *res)
984263bc 1017{
984263bc
MD
1018 int i;
1019 struct file *fp;
1020 int n = 0;
1021
1022 for (i = 0; i < nfd; i++, fds++) {
228b401d 1023 if (fds->fd >= p->p_fd->fd_nfiles) {
984263bc
MD
1024 fds->revents = POLLNVAL;
1025 n++;
1026 } else if (fds->fd < 0) {
1027 fds->revents = 0;
1028 } else {
228b401d 1029 fp = holdfp(p->p_fd, fds->fd, -1);
984263bc
MD
1030 if (fp == NULL) {
1031 fds->revents = POLLNVAL;
1032 n++;
1033 } else {
1034 /*
1035 * Note: backend also returns POLLHUP and
1036 * POLLERR if appropriate.
1037 */
1038 fds->revents = fo_poll(fp, fds->events,
87de5057 1039 fp->f_cred);
984263bc
MD
1040 if (fds->revents != 0)
1041 n++;
228b401d 1042 fdrop(fp);
984263bc
MD
1043 }
1044 }
1045 }
90b9818c 1046 *res = n;
984263bc
MD
1047 return (0);
1048}
1049
1050/*
1051 * OpenBSD poll system call.
1052 * XXX this isn't quite a true representation.. OpenBSD uses select ops.
1053 */
984263bc 1054int
753fd850 1055sys_openbsd_poll(struct openbsd_poll_args *uap)
984263bc 1056{
753fd850 1057 return (sys_poll((struct poll_args *)uap));
984263bc
MD
1058}
1059
1060/*ARGSUSED*/
1061int
b13267a5 1062seltrue(cdev_t dev, int events)
984263bc 1063{
984263bc
MD
1064 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
1065}
1066
1067/*
41c20dac
MD
1068 * Record a select request. A global wait must be used since a process/thread
1069 * might go away after recording its request.
984263bc
MD
1070 */
1071void
41c20dac 1072selrecord(struct thread *selector, struct selinfo *sip)
984263bc
MD
1073{
1074 struct proc *p;
1075 pid_t mypid;
1076
41c20dac
MD
1077 if ((p = selector->td_proc) == NULL)
1078 panic("selrecord: thread needs a process");
1079
1080 mypid = p->p_pid;
984263bc
MD
1081 if (sip->si_pid == mypid)
1082 return;
1083 if (sip->si_pid && (p = pfind(sip->si_pid)) &&
41c20dac 1084 p->p_wchan == (caddr_t)&selwait) {
984263bc 1085 sip->si_flags |= SI_COLL;
41c20dac 1086 } else {
984263bc 1087 sip->si_pid = mypid;
41c20dac 1088 }
984263bc
MD
1089}
1090
1091/*
1092 * Do a wakeup when a selectable event occurs.
1093 */
1094void
41c20dac 1095selwakeup(struct selinfo *sip)
984263bc 1096{
41c20dac 1097 struct proc *p;
984263bc
MD
1098
1099 if (sip->si_pid == 0)
1100 return;
1101 if (sip->si_flags & SI_COLL) {
1102 nselcoll++;
1103 sip->si_flags &= ~SI_COLL;
41c20dac 1104 wakeup((caddr_t)&selwait); /* YYY fixable */
984263bc
MD
1105 }
1106 p = pfind(sip->si_pid);
1107 sip->si_pid = 0;
1108 if (p != NULL) {
e43a034f 1109 crit_enter();
984263bc 1110 if (p->p_wchan == (caddr_t)&selwait) {
344ad853
MD
1111 /*
1112 * Flag the process to break the tsleep when
1113 * setrunnable is called, but only call setrunnable
1114 * here if the process is not in a stopped state.
1115 */
1116 p->p_flag |= P_BREAKTSLEEP;
1117 if ((p->p_flag & P_STOPPED) == 0)
984263bc 1118 setrunnable(p);
344ad853 1119 } else if (p->p_flag & P_SELECT) {
984263bc 1120 p->p_flag &= ~P_SELECT;
344ad853 1121 }
e43a034f 1122 crit_exit();
984263bc
MD
1123 }
1124}
41c20dac 1125