Commit | Line | Data |
---|---|---|
984263bc MD |
1 | /* |
2 | * Copyright (c) 1982, 1986, 1989, 1993 | |
3 | * The Regents of the University of California. All rights reserved. | |
4 | * (c) UNIX System Laboratories, Inc. | |
5 | * All or some portions of this file are derived from material licensed | |
6 | * to the University of California by American Telephone and Telegraph | |
7 | * Co. or Unix System Laboratories, Inc. and are reproduced herein with | |
8 | * the permission of UNIX System Laboratories, Inc. | |
9 | * | |
10 | * Redistribution and use in source and binary forms, with or without | |
11 | * modification, are permitted provided that the following conditions | |
12 | * are met: | |
13 | * 1. Redistributions of source code must retain the above copyright | |
14 | * notice, this list of conditions and the following disclaimer. | |
15 | * 2. Redistributions in binary form must reproduce the above copyright | |
16 | * notice, this list of conditions and the following disclaimer in the | |
17 | * documentation and/or other materials provided with the distribution. | |
dc71b7ab | 18 | * 3. Neither the name of the University nor the names of its contributors |
984263bc MD |
19 | * may be used to endorse or promote products derived from this software |
20 | * without specific prior written permission. | |
21 | * | |
22 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
23 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
24 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
25 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
26 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
27 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
28 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
29 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
30 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
31 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
32 | * SUCH DAMAGE. | |
33 | * | |
34 | * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94 | |
35 | * $FreeBSD: src/sys/kern/sys_generic.c,v 1.55.2.10 2001/03/17 10:39:32 peter Exp $ | |
36 | */ | |
37 | ||
38 | #include "opt_ktrace.h" | |
39 | ||
40 | #include <sys/param.h> | |
41 | #include <sys/systm.h> | |
80d831e1 | 42 | #include <sys/sysmsg.h> |
e5857bf7 | 43 | #include <sys/event.h> |
984263bc MD |
44 | #include <sys/filedesc.h> |
45 | #include <sys/filio.h> | |
46 | #include <sys/fcntl.h> | |
47 | #include <sys/file.h> | |
48 | #include <sys/proc.h> | |
49 | #include <sys/signalvar.h> | |
50 | #include <sys/socketvar.h> | |
e58ce663 | 51 | #include <sys/malloc.h> |
984263bc MD |
52 | #include <sys/uio.h> |
53 | #include <sys/kernel.h> | |
ba023347 | 54 | #include <sys/kern_syscall.h> |
a0c5fc96 | 55 | #include <sys/mapped_ioctl.h> |
984263bc | 56 | #include <sys/poll.h> |
a0c5fc96 | 57 | #include <sys/queue.h> |
984263bc | 58 | #include <sys/resourcevar.h> |
8b5c39bb | 59 | #include <sys/socketops.h> |
984263bc MD |
60 | #include <sys/sysctl.h> |
61 | #include <sys/sysent.h> | |
62 | #include <sys/buf.h> | |
63 | #ifdef KTRACE | |
64 | #include <sys/ktrace.h> | |
65 | #endif | |
66 | #include <vm/vm.h> | |
67 | #include <vm/vm_page.h> | |
684a93c4 | 68 | |
dadab5e9 | 69 | #include <sys/file2.h> |
ac62ea3c | 70 | #include <sys/spinlock2.h> |
984263bc MD |
71 | |
72 | #include <machine/limits.h> | |
73 | ||
74 | static MALLOC_DEFINE(M_IOCTLOPS, "ioctlops", "ioctl data buffer"); | |
a0c5fc96 | 75 | static MALLOC_DEFINE(M_IOCTLMAP, "ioctlmap", "mapped ioctl handler buffer"); |
984263bc MD |
76 | static MALLOC_DEFINE(M_SELECT, "select", "select() buffer"); |
77 | MALLOC_DEFINE(M_IOV, "iov", "large iov's"); | |
78 | ||
e5857bf7 | 79 | typedef struct kfd_set { |
8acdf1cf | 80 | fd_mask fds_bits[2]; |
e5857bf7 SG |
81 | } kfd_set; |
82 | ||
83 | enum select_copyin_states { | |
84 | COPYIN_READ, COPYIN_WRITE, COPYIN_EXCEPT, COPYIN_DONE }; | |
85 | ||
86 | struct select_kevent_copyin_args { | |
87 | kfd_set *read_set; | |
88 | kfd_set *write_set; | |
89 | kfd_set *except_set; | |
90 | int active_set; /* One of select_copyin_states */ | |
91 | struct lwp *lwp; /* Pointer to our lwp */ | |
92 | int num_fds; /* Number of file descriptors (syscall arg) */ | |
93 | int proc_fds; /* Processed fd's (wraps) */ | |
94 | int error; /* Returned to userland */ | |
95 | }; | |
96 | ||
7fbfbe29 SG |
97 | struct poll_kevent_copyin_args { |
98 | struct lwp *lwp; | |
99 | struct pollfd *fds; | |
100 | int nfds; | |
101 | int pfds; | |
102 | int error; | |
103 | }; | |
104 | ||
a3c18566 | 105 | static struct lwkt_token mioctl_token = LWKT_TOKEN_INITIALIZER(mioctl_token); |
3c499555 | 106 | |
ae7cb1b5 | 107 | static int doselect(int nd, fd_set *in, fd_set *ou, fd_set *ex, |
8acdf1cf | 108 | struct timespec *ts, int *res); |
7fbfbe29 | 109 | static int dopoll(int nfds, struct pollfd *fds, struct timespec *ts, |
6d2444c4 | 110 | int *res, int flags); |
e54488bb MD |
111 | static int dofileread(int, struct file *, struct uio *, int, size_t *); |
112 | static int dofilewrite(int, struct file *, struct uio *, int, size_t *); | |
7f83ed38 | 113 | |
984263bc MD |
114 | /* |
115 | * Read system call. | |
f832287e MD |
116 | * |
117 | * MPSAFE | |
984263bc | 118 | */ |
984263bc | 119 | int |
80d831e1 | 120 | sys_read(struct sysmsg *sysmsg, const struct read_args *uap) |
984263bc | 121 | { |
dadab5e9 | 122 | struct thread *td = curthread; |
ba023347 DRJ |
123 | struct uio auio; |
124 | struct iovec aiov; | |
984263bc MD |
125 | int error; |
126 | ||
e54488bb MD |
127 | if ((ssize_t)uap->nbyte < 0) |
128 | error = EINVAL; | |
129 | ||
ba023347 DRJ |
130 | aiov.iov_base = uap->buf; |
131 | aiov.iov_len = uap->nbyte; | |
132 | auio.uio_iov = &aiov; | |
133 | auio.uio_iovcnt = 1; | |
134 | auio.uio_offset = -1; | |
135 | auio.uio_resid = uap->nbyte; | |
136 | auio.uio_rw = UIO_READ; | |
137 | auio.uio_segflg = UIO_USERSPACE; | |
138 | auio.uio_td = td; | |
139 | ||
80d831e1 | 140 | error = kern_preadv(uap->fd, &auio, 0, &sysmsg->sysmsg_szresult); |
984263bc MD |
141 | return(error); |
142 | } | |
143 | ||
144 | /* | |
7f83ed38 | 145 | * Positioned (Pread) read system call |
f832287e MD |
146 | * |
147 | * MPSAFE | |
984263bc | 148 | */ |
984263bc | 149 | int |
80d831e1 | 150 | sys_extpread(struct sysmsg *sysmsg, const struct extpread_args *uap) |
984263bc | 151 | { |
dadab5e9 | 152 | struct thread *td = curthread; |
ba023347 DRJ |
153 | struct uio auio; |
154 | struct iovec aiov; | |
984263bc | 155 | int error; |
9ba76b73 | 156 | int flags; |
984263bc | 157 | |
e54488bb MD |
158 | if ((ssize_t)uap->nbyte < 0) |
159 | return(EINVAL); | |
160 | ||
ba023347 DRJ |
161 | aiov.iov_base = uap->buf; |
162 | aiov.iov_len = uap->nbyte; | |
163 | auio.uio_iov = &aiov; | |
164 | auio.uio_iovcnt = 1; | |
165 | auio.uio_offset = uap->offset; | |
166 | auio.uio_resid = uap->nbyte; | |
167 | auio.uio_rw = UIO_READ; | |
168 | auio.uio_segflg = UIO_USERSPACE; | |
169 | auio.uio_td = td; | |
170 | ||
9ba76b73 MD |
171 | flags = uap->flags & O_FMASK; |
172 | if (uap->offset != (off_t)-1) | |
173 | flags |= O_FOFFSET; | |
174 | ||
80d831e1 | 175 | error = kern_preadv(uap->fd, &auio, flags, &sysmsg->sysmsg_szresult); |
984263bc MD |
176 | return(error); |
177 | } | |
178 | ||
7f83ed38 MD |
179 | /* |
180 | * Scatter read system call. | |
f832287e MD |
181 | * |
182 | * MPSAFE | |
7f83ed38 | 183 | */ |
984263bc | 184 | int |
80d831e1 | 185 | sys_readv(struct sysmsg *sysmsg, const struct readv_args *uap) |
984263bc | 186 | { |
dadab5e9 | 187 | struct thread *td = curthread; |
984263bc | 188 | struct uio auio; |
ba023347 DRJ |
189 | struct iovec aiov[UIO_SMALLIOV], *iov = NULL; |
190 | int error; | |
984263bc | 191 | |
ba023347 | 192 | error = iovec_copyin(uap->iovp, &iov, aiov, uap->iovcnt, |
ef5c76d7 | 193 | &auio.uio_resid); |
ba023347 DRJ |
194 | if (error) |
195 | return (error); | |
196 | auio.uio_iov = iov; | |
197 | auio.uio_iovcnt = uap->iovcnt; | |
198 | auio.uio_offset = -1; | |
984263bc MD |
199 | auio.uio_rw = UIO_READ; |
200 | auio.uio_segflg = UIO_USERSPACE; | |
dadab5e9 | 201 | auio.uio_td = td; |
984263bc | 202 | |
80d831e1 | 203 | error = kern_preadv(uap->fd, &auio, 0, &sysmsg->sysmsg_szresult); |
ba023347 DRJ |
204 | |
205 | iovec_free(&iov, aiov); | |
984263bc MD |
206 | return (error); |
207 | } | |
208 | ||
7f83ed38 MD |
209 | |
210 | /* | |
211 | * Scatter positioned read system call. | |
f832287e MD |
212 | * |
213 | * MPSAFE | |
7f83ed38 MD |
214 | */ |
215 | int | |
80d831e1 | 216 | sys_extpreadv(struct sysmsg *sysmsg, const struct extpreadv_args *uap) |
7f83ed38 MD |
217 | { |
218 | struct thread *td = curthread; | |
219 | struct uio auio; | |
220 | struct iovec aiov[UIO_SMALLIOV], *iov = NULL; | |
221 | int error; | |
9ba76b73 | 222 | int flags; |
7f83ed38 MD |
223 | |
224 | error = iovec_copyin(uap->iovp, &iov, aiov, uap->iovcnt, | |
ef5c76d7 | 225 | &auio.uio_resid); |
7f83ed38 MD |
226 | if (error) |
227 | return (error); | |
228 | auio.uio_iov = iov; | |
229 | auio.uio_iovcnt = uap->iovcnt; | |
230 | auio.uio_offset = uap->offset; | |
231 | auio.uio_rw = UIO_READ; | |
232 | auio.uio_segflg = UIO_USERSPACE; | |
233 | auio.uio_td = td; | |
234 | ||
9ba76b73 MD |
235 | flags = uap->flags & O_FMASK; |
236 | if (uap->offset != (off_t)-1) | |
237 | flags |= O_FOFFSET; | |
238 | ||
80d831e1 | 239 | error = kern_preadv(uap->fd, &auio, flags, &sysmsg->sysmsg_szresult); |
7f83ed38 MD |
240 | |
241 | iovec_free(&iov, aiov); | |
242 | return(error); | |
243 | } | |
244 | ||
f832287e MD |
245 | /* |
246 | * MPSAFE | |
247 | */ | |
984263bc | 248 | int |
e54488bb | 249 | kern_preadv(int fd, struct uio *auio, int flags, size_t *res) |
984263bc | 250 | { |
dadab5e9 | 251 | struct thread *td = curthread; |
41c20dac | 252 | struct file *fp; |
7f83ed38 | 253 | int error; |
984263bc | 254 | |
35949930 | 255 | fp = holdfp(td, fd, FREAD); |
ba023347 | 256 | if (fp == NULL) |
984263bc | 257 | return (EBADF); |
9ba76b73 | 258 | if (flags & O_FOFFSET && fp->f_type != DTYPE_VNODE) { |
ba023347 | 259 | error = ESPIPE; |
7f83ed38 MD |
260 | } else { |
261 | error = dofileread(fd, fp, auio, flags, res); | |
984263bc | 262 | } |
35949930 MD |
263 | dropfp(td, fd, fp); |
264 | ||
7f83ed38 MD |
265 | return(error); |
266 | } | |
267 | ||
268 | /* | |
269 | * Common code for readv and preadv that reads data in | |
270 | * from a file using the passed in uio, offset, and flags. | |
f832287e MD |
271 | * |
272 | * MPALMOSTSAFE - ktrace needs help | |
7f83ed38 MD |
273 | */ |
274 | static int | |
e54488bb | 275 | dofileread(int fd, struct file *fp, struct uio *auio, int flags, size_t *res) |
7f83ed38 | 276 | { |
7f83ed38 | 277 | int error; |
e54488bb | 278 | size_t len; |
7f83ed38 | 279 | #ifdef KTRACE |
fc9ae81d | 280 | struct thread *td = curthread; |
7f83ed38 MD |
281 | struct iovec *ktriov = NULL; |
282 | struct uio ktruio; | |
283 | #endif | |
284 | ||
984263bc MD |
285 | #ifdef KTRACE |
286 | /* | |
287 | * if tracing, save a copy of iovec | |
288 | */ | |
dadab5e9 | 289 | if (KTRPOINT(td, KTR_GENIO)) { |
ba023347 DRJ |
290 | int iovlen = auio->uio_iovcnt * sizeof(struct iovec); |
291 | ||
884717e1 | 292 | ktriov = kmalloc(iovlen, M_TEMP, M_WAITOK); |
ba023347 DRJ |
293 | bcopy((caddr_t)auio->uio_iov, (caddr_t)ktriov, iovlen); |
294 | ktruio = *auio; | |
984263bc MD |
295 | } |
296 | #endif | |
ba023347 | 297 | len = auio->uio_resid; |
87de5057 | 298 | error = fo_read(fp, auio, fp->f_cred, flags); |
ba023347 DRJ |
299 | if (error) { |
300 | if (auio->uio_resid != len && (error == ERESTART || | |
984263bc MD |
301 | error == EINTR || error == EWOULDBLOCK)) |
302 | error = 0; | |
303 | } | |
984263bc MD |
304 | #ifdef KTRACE |
305 | if (ktriov != NULL) { | |
306 | if (error == 0) { | |
307 | ktruio.uio_iov = ktriov; | |
ba023347 | 308 | ktruio.uio_resid = len - auio->uio_resid; |
9fb04d14 | 309 | ktrgenio(td->td_lwp, fd, UIO_READ, &ktruio, error); |
984263bc | 310 | } |
884717e1 | 311 | kfree(ktriov, M_TEMP); |
984263bc MD |
312 | } |
313 | #endif | |
ba023347 DRJ |
314 | if (error == 0) |
315 | *res = len - auio->uio_resid; | |
7f83ed38 MD |
316 | |
317 | return(error); | |
984263bc MD |
318 | } |
319 | ||
320 | /* | |
321 | * Write system call | |
f832287e MD |
322 | * |
323 | * MPSAFE | |
984263bc | 324 | */ |
984263bc | 325 | int |
80d831e1 | 326 | sys_write(struct sysmsg *sysmsg, const struct write_args *uap) |
984263bc | 327 | { |
dadab5e9 | 328 | struct thread *td = curthread; |
ba023347 DRJ |
329 | struct uio auio; |
330 | struct iovec aiov; | |
984263bc MD |
331 | int error; |
332 | ||
e54488bb MD |
333 | if ((ssize_t)uap->nbyte < 0) |
334 | error = EINVAL; | |
335 | ||
ba023347 DRJ |
336 | aiov.iov_base = (void *)(uintptr_t)uap->buf; |
337 | aiov.iov_len = uap->nbyte; | |
338 | auio.uio_iov = &aiov; | |
339 | auio.uio_iovcnt = 1; | |
340 | auio.uio_offset = -1; | |
341 | auio.uio_resid = uap->nbyte; | |
342 | auio.uio_rw = UIO_WRITE; | |
343 | auio.uio_segflg = UIO_USERSPACE; | |
344 | auio.uio_td = td; | |
345 | ||
80d831e1 | 346 | error = kern_pwritev(uap->fd, &auio, 0, &sysmsg->sysmsg_szresult); |
dadab5e9 | 347 | |
984263bc MD |
348 | return(error); |
349 | } | |
350 | ||
351 | /* | |
352 | * Pwrite system call | |
f832287e MD |
353 | * |
354 | * MPSAFE | |
984263bc | 355 | */ |
984263bc | 356 | int |
80d831e1 | 357 | sys_extpwrite(struct sysmsg *sysmsg, const struct extpwrite_args *uap) |
984263bc | 358 | { |
dadab5e9 | 359 | struct thread *td = curthread; |
ba023347 DRJ |
360 | struct uio auio; |
361 | struct iovec aiov; | |
984263bc | 362 | int error; |
9ba76b73 | 363 | int flags; |
984263bc | 364 | |
e54488bb MD |
365 | if ((ssize_t)uap->nbyte < 0) |
366 | error = EINVAL; | |
367 | ||
ba023347 DRJ |
368 | aiov.iov_base = (void *)(uintptr_t)uap->buf; |
369 | aiov.iov_len = uap->nbyte; | |
370 | auio.uio_iov = &aiov; | |
371 | auio.uio_iovcnt = 1; | |
372 | auio.uio_offset = uap->offset; | |
373 | auio.uio_resid = uap->nbyte; | |
374 | auio.uio_rw = UIO_WRITE; | |
375 | auio.uio_segflg = UIO_USERSPACE; | |
376 | auio.uio_td = td; | |
377 | ||
9ba76b73 MD |
378 | flags = uap->flags & O_FMASK; |
379 | if (uap->offset != (off_t)-1) | |
380 | flags |= O_FOFFSET; | |
80d831e1 | 381 | error = kern_pwritev(uap->fd, &auio, flags, &sysmsg->sysmsg_szresult); |
984263bc MD |
382 | return(error); |
383 | } | |
384 | ||
f832287e MD |
385 | /* |
386 | * MPSAFE | |
387 | */ | |
ba023347 | 388 | int |
80d831e1 | 389 | sys_writev(struct sysmsg *sysmsg, const struct writev_args *uap) |
ba023347 | 390 | { |
dadab5e9 | 391 | struct thread *td = curthread; |
984263bc | 392 | struct uio auio; |
ba023347 DRJ |
393 | struct iovec aiov[UIO_SMALLIOV], *iov = NULL; |
394 | int error; | |
984263bc | 395 | |
ba023347 | 396 | error = iovec_copyin(uap->iovp, &iov, aiov, uap->iovcnt, |
ef5c76d7 | 397 | &auio.uio_resid); |
ba023347 DRJ |
398 | if (error) |
399 | return (error); | |
400 | auio.uio_iov = iov; | |
401 | auio.uio_iovcnt = uap->iovcnt; | |
402 | auio.uio_offset = -1; | |
984263bc MD |
403 | auio.uio_rw = UIO_WRITE; |
404 | auio.uio_segflg = UIO_USERSPACE; | |
dadab5e9 | 405 | auio.uio_td = td; |
ba023347 | 406 | |
80d831e1 | 407 | error = kern_pwritev(uap->fd, &auio, 0, &sysmsg->sysmsg_szresult); |
ba023347 DRJ |
408 | |
409 | iovec_free(&iov, aiov); | |
984263bc MD |
410 | return (error); |
411 | } | |
412 | ||
7f83ed38 | 413 | |
984263bc | 414 | /* |
7f83ed38 | 415 | * Gather positioned write system call |
f832287e MD |
416 | * |
417 | * MPSAFE | |
984263bc | 418 | */ |
984263bc | 419 | int |
80d831e1 | 420 | sys_extpwritev(struct sysmsg *sysmsg, const struct extpwritev_args *uap) |
7f83ed38 MD |
421 | { |
422 | struct thread *td = curthread; | |
423 | struct uio auio; | |
424 | struct iovec aiov[UIO_SMALLIOV], *iov = NULL; | |
425 | int error; | |
9ba76b73 | 426 | int flags; |
7f83ed38 MD |
427 | |
428 | error = iovec_copyin(uap->iovp, &iov, aiov, uap->iovcnt, | |
ef5c76d7 | 429 | &auio.uio_resid); |
7f83ed38 MD |
430 | if (error) |
431 | return (error); | |
432 | auio.uio_iov = iov; | |
433 | auio.uio_iovcnt = uap->iovcnt; | |
434 | auio.uio_offset = uap->offset; | |
435 | auio.uio_rw = UIO_WRITE; | |
436 | auio.uio_segflg = UIO_USERSPACE; | |
437 | auio.uio_td = td; | |
438 | ||
9ba76b73 MD |
439 | flags = uap->flags & O_FMASK; |
440 | if (uap->offset != (off_t)-1) | |
441 | flags |= O_FOFFSET; | |
442 | ||
80d831e1 | 443 | error = kern_pwritev(uap->fd, &auio, flags, &sysmsg->sysmsg_szresult); |
7f83ed38 MD |
444 | |
445 | iovec_free(&iov, aiov); | |
446 | return(error); | |
447 | } | |
448 | ||
f832287e MD |
449 | /* |
450 | * MPSAFE | |
451 | */ | |
7f83ed38 | 452 | int |
e54488bb | 453 | kern_pwritev(int fd, struct uio *auio, int flags, size_t *res) |
984263bc | 454 | { |
dadab5e9 | 455 | struct thread *td = curthread; |
41c20dac | 456 | struct file *fp; |
7f83ed38 | 457 | int error; |
984263bc | 458 | |
35949930 | 459 | fp = holdfp(td, fd, FWRITE); |
ba023347 | 460 | if (fp == NULL) |
984263bc | 461 | return (EBADF); |
9ba76b73 | 462 | else if ((flags & O_FOFFSET) && fp->f_type != DTYPE_VNODE) { |
ba023347 | 463 | error = ESPIPE; |
7f83ed38 MD |
464 | } else { |
465 | error = dofilewrite(fd, fp, auio, flags, res); | |
984263bc | 466 | } |
35949930 MD |
467 | dropfp(td, fd, fp); |
468 | ||
469 | return(error); | |
7f83ed38 MD |
470 | } |
471 | ||
472 | /* | |
473 | * Common code for writev and pwritev that writes data to | |
474 | * a file using the passed in uio, offset, and flags. | |
f832287e MD |
475 | * |
476 | * MPALMOSTSAFE - ktrace needs help | |
7f83ed38 MD |
477 | */ |
478 | static int | |
e54488bb | 479 | dofilewrite(int fd, struct file *fp, struct uio *auio, int flags, size_t *res) |
7f83ed38 MD |
480 | { |
481 | struct thread *td = curthread; | |
7278a846 | 482 | struct lwp *lp = td->td_lwp; |
7f83ed38 | 483 | int error; |
e54488bb | 484 | size_t len; |
7f83ed38 MD |
485 | #ifdef KTRACE |
486 | struct iovec *ktriov = NULL; | |
487 | struct uio ktruio; | |
488 | #endif | |
489 | ||
984263bc MD |
490 | #ifdef KTRACE |
491 | /* | |
492 | * if tracing, save a copy of iovec and uio | |
493 | */ | |
dadab5e9 | 494 | if (KTRPOINT(td, KTR_GENIO)) { |
ba023347 DRJ |
495 | int iovlen = auio->uio_iovcnt * sizeof(struct iovec); |
496 | ||
884717e1 | 497 | ktriov = kmalloc(iovlen, M_TEMP, M_WAITOK); |
ba023347 DRJ |
498 | bcopy((caddr_t)auio->uio_iov, (caddr_t)ktriov, iovlen); |
499 | ktruio = *auio; | |
984263bc MD |
500 | } |
501 | #endif | |
ba023347 | 502 | len = auio->uio_resid; |
87de5057 | 503 | error = fo_write(fp, auio, fp->f_cred, flags); |
ba023347 DRJ |
504 | if (error) { |
505 | if (auio->uio_resid != len && (error == ERESTART || | |
984263bc MD |
506 | error == EINTR || error == EWOULDBLOCK)) |
507 | error = 0; | |
7f83ed38 | 508 | /* Socket layer is responsible for issuing SIGPIPE. */ |
89233cfd | 509 | if (error == EPIPE && fp->f_type != DTYPE_SOCKET) |
9fb04d14 | 510 | lwpsignal(lp->lwp_proc, lp, SIGPIPE); |
984263bc | 511 | } |
984263bc MD |
512 | #ifdef KTRACE |
513 | if (ktriov != NULL) { | |
514 | if (error == 0) { | |
515 | ktruio.uio_iov = ktriov; | |
ba023347 | 516 | ktruio.uio_resid = len - auio->uio_resid; |
9fb04d14 | 517 | ktrgenio(lp, fd, UIO_WRITE, &ktruio, error); |
984263bc | 518 | } |
884717e1 | 519 | kfree(ktriov, M_TEMP); |
984263bc MD |
520 | } |
521 | #endif | |
ba023347 DRJ |
522 | if (error == 0) |
523 | *res = len - auio->uio_resid; | |
7f83ed38 MD |
524 | |
525 | return(error); | |
984263bc MD |
526 | } |
527 | ||
528 | /* | |
529 | * Ioctl system call | |
3919ced0 | 530 | * |
3c499555 | 531 | * MPSAFE |
984263bc | 532 | */ |
984263bc | 533 | int |
80d831e1 | 534 | sys_ioctl(struct sysmsg *sysmsg, const struct ioctl_args *uap) |
a0c5fc96 | 535 | { |
3919ced0 MD |
536 | int error; |
537 | ||
80d831e1 | 538 | error = mapped_ioctl(uap->fd, uap->com, uap->data, NULL, sysmsg); |
3919ced0 | 539 | return (error); |
a0c5fc96 JS |
540 | } |
541 | ||
542 | struct ioctl_map_entry { | |
543 | const char *subsys; | |
544 | struct ioctl_map_range *cmd_ranges; | |
545 | LIST_ENTRY(ioctl_map_entry) entries; | |
546 | }; | |
547 | ||
25b5b94d SS |
548 | /* |
549 | * The true heart of all ioctl syscall handlers (native, emulation). | |
550 | * If map != NULL, it will be searched for a matching entry for com, | |
551 | * and appropriate conversions/conversion functions will be utilized. | |
3c499555 MD |
552 | * |
553 | * MPSAFE | |
25b5b94d | 554 | */ |
a0c5fc96 | 555 | int |
87baaf0c MD |
556 | mapped_ioctl(int fd, u_long com, caddr_t uspc_data, struct ioctl_map *map, |
557 | struct sysmsg *msg) | |
984263bc | 558 | { |
dadab5e9 MD |
559 | struct thread *td = curthread; |
560 | struct proc *p = td->td_proc; | |
87de5057 | 561 | struct ucred *cred; |
41c20dac | 562 | struct file *fp; |
a0c5fc96 | 563 | struct ioctl_map_range *iomc = NULL; |
984263bc | 564 | int error; |
1fd87d54 | 565 | u_int size; |
a0c5fc96 | 566 | u_long ocom = com; |
984263bc MD |
567 | caddr_t data, memp; |
568 | int tmp; | |
569 | #define STK_PARAMS 128 | |
570 | union { | |
571 | char stkbuf[STK_PARAMS]; | |
572 | long align; | |
573 | } ubuf; | |
574 | ||
dadab5e9 | 575 | KKASSERT(p); |
9910d07b | 576 | cred = td->td_ucred; |
9dd5bb28 | 577 | memp = NULL; |
984263bc | 578 | |
35949930 | 579 | fp = holdfp(td, fd, FREAD|FWRITE); |
228b401d | 580 | if (fp == NULL) |
a0c5fc96 JS |
581 | return(EBADF); |
582 | ||
583 | if (map != NULL) { /* obey translation map */ | |
584 | u_long maskcmd; | |
585 | struct ioctl_map_entry *e; | |
586 | ||
587 | maskcmd = com & map->mask; | |
588 | ||
12586b82 | 589 | lwkt_gettoken(&mioctl_token); |
a0c5fc96 JS |
590 | LIST_FOREACH(e, &map->mapping, entries) { |
591 | for (iomc = e->cmd_ranges; iomc->start != 0 || | |
25b5b94d SS |
592 | iomc->maptocmd != 0 || iomc->wrapfunc != NULL || |
593 | iomc->mapfunc != NULL; | |
a0c5fc96 JS |
594 | iomc++) { |
595 | if (maskcmd >= iomc->start && | |
596 | maskcmd <= iomc->end) | |
597 | break; | |
598 | } | |
599 | ||
600 | /* Did we find a match? */ | |
601 | if (iomc->start != 0 || iomc->maptocmd != 0 || | |
25b5b94d | 602 | iomc->wrapfunc != NULL || iomc->mapfunc != NULL) |
a0c5fc96 JS |
603 | break; |
604 | } | |
12586b82 | 605 | lwkt_reltoken(&mioctl_token); |
a0c5fc96 JS |
606 | |
607 | if (iomc == NULL || | |
608 | (iomc->start == 0 && iomc->maptocmd == 0 | |
25b5b94d | 609 | && iomc->wrapfunc == NULL && iomc->mapfunc == NULL)) { |
6ea70f76 | 610 | kprintf("%s: 'ioctl' fd=%d, cmd=0x%lx ('%c',%d) not implemented\n", |
a0c5fc96 JS |
611 | map->sys, fd, maskcmd, |
612 | (int)((maskcmd >> 8) & 0xff), | |
613 | (int)(maskcmd & 0xff)); | |
228b401d MD |
614 | error = EINVAL; |
615 | goto done; | |
a0c5fc96 | 616 | } |
984263bc | 617 | |
25b5b94d SS |
618 | /* |
619 | * If it's a non-range one to one mapping, maptocmd should be | |
620 | * correct. If it's a ranged one to one mapping, we pass the | |
621 | * original value of com, and for a range mapped to a different | |
622 | * range, we always need a mapping function to translate the | |
623 | * ioctl to our native ioctl. Ex. 6500-65ff <-> 9500-95ff | |
624 | */ | |
625 | if (iomc->start == iomc->end && iomc->maptocmd == iomc->maptoend) { | |
626 | com = iomc->maptocmd; | |
627 | } else if (iomc->start == iomc->maptocmd && iomc->end == iomc->maptoend) { | |
628 | if (iomc->mapfunc != NULL) | |
629 | com = iomc->mapfunc(iomc->start, iomc->end, | |
630 | iomc->start, iomc->end, | |
631 | com, com); | |
632 | } else { | |
633 | if (iomc->mapfunc != NULL) { | |
634 | com = iomc->mapfunc(iomc->start, iomc->end, | |
635 | iomc->maptocmd, iomc->maptoend, | |
636 | com, ocom); | |
637 | } else { | |
6ea70f76 | 638 | kprintf("%s: Invalid mapping for fd=%d, cmd=%#lx ('%c',%d)\n", |
25b5b94d SS |
639 | map->sys, fd, maskcmd, |
640 | (int)((maskcmd >> 8) & 0xff), | |
641 | (int)(maskcmd & 0xff)); | |
228b401d MD |
642 | error = EINVAL; |
643 | goto done; | |
25b5b94d SS |
644 | } |
645 | } | |
a0c5fc96 JS |
646 | } |
647 | ||
648 | switch (com) { | |
984263bc | 649 | case FIONCLEX: |
228b401d MD |
650 | error = fclrfdflags(p->p_fd, fd, UF_EXCLOSE); |
651 | goto done; | |
984263bc | 652 | case FIOCLEX: |
228b401d MD |
653 | error = fsetfdflags(p->p_fd, fd, UF_EXCLOSE); |
654 | goto done; | |
984263bc MD |
655 | } |
656 | ||
657 | /* | |
658 | * Interpret high order word to find amount of data to be | |
659 | * copied to/from the user's address space. | |
660 | */ | |
661 | size = IOCPARM_LEN(com); | |
228b401d MD |
662 | if (size > IOCPARM_MAX) { |
663 | error = ENOTTY; | |
664 | goto done; | |
665 | } | |
984263bc | 666 | |
13886075 | 667 | if ((com & IOC_VOID) == 0 && size > sizeof(ubuf.stkbuf)) { |
efda3bd0 | 668 | memp = kmalloc(size, M_IOCTLOPS, M_WAITOK); |
984263bc MD |
669 | data = memp; |
670 | } else { | |
d0d4a734 | 671 | memp = NULL; |
984263bc MD |
672 | data = ubuf.stkbuf; |
673 | } | |
13886075 MD |
674 | if (com & IOC_VOID) { |
675 | *(caddr_t *)data = uspc_data; | |
676 | } else if (com & IOC_IN) { | |
a0c5fc96 | 677 | if (size != 0) { |
e54488bb | 678 | error = copyin(uspc_data, data, (size_t)size); |
13886075 | 679 | if (error) |
228b401d | 680 | goto done; |
984263bc | 681 | } else { |
a0c5fc96 | 682 | *(caddr_t *)data = uspc_data; |
984263bc | 683 | } |
a0c5fc96 | 684 | } else if ((com & IOC_OUT) != 0 && size) { |
984263bc MD |
685 | /* |
686 | * Zero the buffer so the user always | |
687 | * gets back something deterministic. | |
688 | */ | |
e54488bb | 689 | bzero(data, (size_t)size); |
984263bc MD |
690 | } |
691 | ||
692 | switch (com) { | |
984263bc MD |
693 | case FIONBIO: |
694 | if ((tmp = *(int *)data)) | |
3c499555 | 695 | atomic_set_int(&fp->f_flag, FNONBLOCK); |
984263bc | 696 | else |
3c499555 | 697 | atomic_clear_int(&fp->f_flag, FNONBLOCK); |
9ba76b73 | 698 | error = 0; |
984263bc MD |
699 | break; |
700 | ||
701 | case FIOASYNC: | |
702 | if ((tmp = *(int *)data)) | |
3c499555 | 703 | atomic_set_int(&fp->f_flag, FASYNC); |
984263bc | 704 | else |
3c499555 | 705 | atomic_clear_int(&fp->f_flag, FASYNC); |
87baaf0c | 706 | error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, cred, msg); |
984263bc MD |
707 | break; |
708 | ||
709 | default: | |
a0c5fc96 JS |
710 | /* |
711 | * If there is a override function, | |
712 | * call it instead of directly routing the call | |
713 | */ | |
25b5b94d | 714 | if (map != NULL && iomc->wrapfunc != NULL) |
87de5057 | 715 | error = iomc->wrapfunc(fp, com, ocom, data, cred); |
a0c5fc96 | 716 | else |
87baaf0c | 717 | error = fo_ioctl(fp, com, data, cred, msg); |
984263bc MD |
718 | /* |
719 | * Copy any data to user, size was | |
720 | * already set and checked above. | |
721 | */ | |
a0c5fc96 | 722 | if (error == 0 && (com & IOC_OUT) != 0 && size != 0) |
e54488bb | 723 | error = copyout(data, uspc_data, (size_t)size); |
984263bc MD |
724 | break; |
725 | } | |
13886075 | 726 | done: |
a0c5fc96 | 727 | if (memp != NULL) |
efda3bd0 | 728 | kfree(memp, M_IOCTLOPS); |
35949930 MD |
729 | dropfp(td, fd, fp); |
730 | ||
a0c5fc96 JS |
731 | return(error); |
732 | } | |
733 | ||
3c499555 MD |
734 | /* |
735 | * MPSAFE | |
736 | */ | |
a0c5fc96 JS |
737 | int |
738 | mapped_ioctl_register_handler(struct ioctl_map_handler *he) | |
739 | { | |
740 | struct ioctl_map_entry *ne; | |
741 | ||
742 | KKASSERT(he != NULL && he->map != NULL && he->cmd_ranges != NULL && | |
743 | he->subsys != NULL && *he->subsys != '\0'); | |
744 | ||
3c499555 MD |
745 | ne = kmalloc(sizeof(struct ioctl_map_entry), M_IOCTLMAP, |
746 | M_WAITOK | M_ZERO); | |
a0c5fc96 JS |
747 | |
748 | ne->subsys = he->subsys; | |
749 | ne->cmd_ranges = he->cmd_ranges; | |
750 | ||
12586b82 | 751 | lwkt_gettoken(&mioctl_token); |
a0c5fc96 | 752 | LIST_INSERT_HEAD(&he->map->mapping, ne, entries); |
12586b82 | 753 | lwkt_reltoken(&mioctl_token); |
a0c5fc96 JS |
754 | |
755 | return(0); | |
756 | } | |
757 | ||
3c499555 MD |
758 | /* |
759 | * MPSAFE | |
760 | */ | |
a0c5fc96 JS |
761 | int |
762 | mapped_ioctl_unregister_handler(struct ioctl_map_handler *he) | |
763 | { | |
764 | struct ioctl_map_entry *ne; | |
3c499555 | 765 | int error = EINVAL; |
a0c5fc96 JS |
766 | |
767 | KKASSERT(he != NULL && he->map != NULL && he->cmd_ranges != NULL); | |
768 | ||
12586b82 | 769 | lwkt_gettoken(&mioctl_token); |
a0c5fc96 | 770 | LIST_FOREACH(ne, &he->map->mapping, entries) { |
3c499555 MD |
771 | if (ne->cmd_ranges == he->cmd_ranges) { |
772 | LIST_REMOVE(ne, entries); | |
773 | kfree(ne, M_IOCTLMAP); | |
774 | error = 0; | |
775 | break; | |
776 | } | |
a0c5fc96 | 777 | } |
12586b82 | 778 | lwkt_reltoken(&mioctl_token); |
3c499555 | 779 | return(error); |
984263bc MD |
780 | } |
781 | ||
fe24d605 MD |
782 | static int nseldebug; |
783 | SYSCTL_INT(_kern, OID_AUTO, nseldebug, CTLFLAG_RW, &nseldebug, 0, ""); | |
984263bc MD |
784 | |
785 | /* | |
786 | * Select system call. | |
3919ced0 | 787 | * |
e5857bf7 | 788 | * MPSAFE |
984263bc | 789 | */ |
984263bc | 790 | int |
80d831e1 | 791 | sys_select(struct sysmsg *sysmsg, const struct select_args *uap) |
ae7cb1b5 NT |
792 | { |
793 | struct timeval ktv; | |
e5857bf7 | 794 | struct timespec *ktsp, kts; |
ae7cb1b5 NT |
795 | int error; |
796 | ||
797 | /* | |
798 | * Get timeout if any. | |
799 | */ | |
800 | if (uap->tv != NULL) { | |
801 | error = copyin(uap->tv, &ktv, sizeof (ktv)); | |
802 | if (error) | |
803 | return (error); | |
e5857bf7 SG |
804 | TIMEVAL_TO_TIMESPEC(&ktv, &kts); |
805 | ktsp = &kts; | |
ae7cb1b5 | 806 | } else { |
e5857bf7 | 807 | ktsp = NULL; |
ae7cb1b5 NT |
808 | } |
809 | ||
810 | /* | |
811 | * Do real work. | |
812 | */ | |
e5857bf7 | 813 | error = doselect(uap->nd, uap->in, uap->ou, uap->ex, ktsp, |
80d831e1 | 814 | &sysmsg->sysmsg_result); |
ae7cb1b5 NT |
815 | |
816 | return (error); | |
817 | } | |
818 | ||
819 | ||
820 | /* | |
821 | * Pselect system call. | |
822 | */ | |
823 | int | |
80d831e1 | 824 | sys_pselect(struct sysmsg *sysmsg, const struct pselect_args *uap) |
ae7cb1b5 NT |
825 | { |
826 | struct thread *td = curthread; | |
827 | struct lwp *lp = td->td_lwp; | |
e5857bf7 | 828 | struct timespec *ktsp, kts; |
ae7cb1b5 NT |
829 | sigset_t sigmask; |
830 | int error; | |
831 | ||
832 | /* | |
e5857bf7 | 833 | * Get timeout if any. |
ae7cb1b5 NT |
834 | */ |
835 | if (uap->ts != NULL) { | |
836 | error = copyin(uap->ts, &kts, sizeof (kts)); | |
837 | if (error) | |
838 | return (error); | |
e5857bf7 | 839 | ktsp = &kts; |
ae7cb1b5 | 840 | } else { |
e5857bf7 | 841 | ktsp = NULL; |
ae7cb1b5 NT |
842 | } |
843 | ||
844 | /* | |
845 | * Install temporary signal mask if any provided. | |
846 | */ | |
847 | if (uap->sigmask != NULL) { | |
848 | error = copyin(uap->sigmask, &sigmask, sizeof(sigmask)); | |
849 | if (error) | |
850 | return (error); | |
73241316 | 851 | lwkt_gettoken(&lp->lwp_proc->p_token); |
ae7cb1b5 NT |
852 | lp->lwp_oldsigmask = lp->lwp_sigmask; |
853 | SIG_CANTMASK(sigmask); | |
854 | lp->lwp_sigmask = sigmask; | |
73241316 | 855 | lwkt_reltoken(&lp->lwp_proc->p_token); |
ae7cb1b5 NT |
856 | } |
857 | ||
858 | /* | |
859 | * Do real job. | |
860 | */ | |
e5857bf7 | 861 | error = doselect(uap->nd, uap->in, uap->ou, uap->ex, ktsp, |
80d831e1 | 862 | &sysmsg->sysmsg_result); |
ae7cb1b5 NT |
863 | |
864 | if (uap->sigmask != NULL) { | |
73241316 | 865 | lwkt_gettoken(&lp->lwp_proc->p_token); |
ae7cb1b5 NT |
866 | /* doselect() responsible for turning ERESTART into EINTR */ |
867 | KKASSERT(error != ERESTART); | |
868 | if (error == EINTR) { | |
869 | /* | |
870 | * We can't restore the previous signal mask now | |
871 | * because it could block the signal that interrupted | |
872 | * us. So make a note to restore it after executing | |
873 | * the handler. | |
874 | */ | |
4643740a | 875 | lp->lwp_flags |= LWP_OLDMASK; |
ae7cb1b5 NT |
876 | } else { |
877 | /* | |
878 | * No handler to run. Restore previous mask immediately. | |
879 | */ | |
880 | lp->lwp_sigmask = lp->lwp_oldsigmask; | |
881 | } | |
73241316 | 882 | lwkt_reltoken(&lp->lwp_proc->p_token); |
ae7cb1b5 NT |
883 | } |
884 | ||
885 | return (error); | |
886 | } | |
887 | ||
e5857bf7 | 888 | static int |
8acdf1cf | 889 | select_copyin(void *arg, struct kevent *kevp, int maxevents, int *events) |
e5857bf7 SG |
890 | { |
891 | struct select_kevent_copyin_args *skap = NULL; | |
8acdf1cf | 892 | struct kevent *kev; |
e5857bf7 SG |
893 | int fd; |
894 | kfd_set *fdp = NULL; | |
895 | short filter = 0; | |
896 | u_int fflags = 0; | |
897 | ||
898 | skap = (struct select_kevent_copyin_args *)arg; | |
899 | ||
8acdf1cf MD |
900 | if (*events == maxevents) |
901 | return (0); | |
902 | ||
e5857bf7 SG |
903 | while (skap->active_set < COPYIN_DONE) { |
904 | switch (skap->active_set) { | |
905 | case COPYIN_READ: | |
8acdf1cf MD |
906 | /* |
907 | * Register descriptors for the read filter | |
908 | */ | |
e5857bf7 SG |
909 | fdp = skap->read_set; |
910 | filter = EVFILT_READ; | |
57b24f4e | 911 | fflags = NOTE_OLDAPI; |
8acdf1cf MD |
912 | if (fdp) |
913 | break; | |
914 | ++skap->active_set; | |
915 | skap->proc_fds = 0; | |
916 | /* fall through */ | |
e5857bf7 | 917 | case COPYIN_WRITE: |
8acdf1cf MD |
918 | /* |
919 | * Register descriptors for the write filter | |
920 | */ | |
e5857bf7 SG |
921 | fdp = skap->write_set; |
922 | filter = EVFILT_WRITE; | |
57b24f4e | 923 | fflags = NOTE_OLDAPI; |
8acdf1cf MD |
924 | if (fdp) |
925 | break; | |
926 | ++skap->active_set; | |
927 | skap->proc_fds = 0; | |
928 | /* fall through */ | |
e5857bf7 | 929 | case COPYIN_EXCEPT: |
8acdf1cf MD |
930 | /* |
931 | * Register descriptors for the exception filter | |
932 | */ | |
e5857bf7 | 933 | fdp = skap->except_set; |
73c344d3 | 934 | filter = EVFILT_EXCEPT; |
57b24f4e | 935 | fflags = NOTE_OLDAPI | NOTE_OOB; |
8acdf1cf MD |
936 | if (fdp) |
937 | break; | |
938 | ++skap->active_set; | |
939 | skap->proc_fds = 0; | |
940 | /* fall through */ | |
941 | case COPYIN_DONE: | |
942 | /* | |
943 | * Nothing left to register | |
944 | */ | |
945 | return(0); | |
946 | /* NOT REACHED */ | |
e5857bf7 SG |
947 | } |
948 | ||
8acdf1cf MD |
949 | while (skap->proc_fds < skap->num_fds) { |
950 | fd = skap->proc_fds; | |
e5857bf7 | 951 | if (FD_ISSET(fd, fdp)) { |
8acdf1cf MD |
952 | kev = &kevp[*events]; |
953 | EV_SET(kev, fd, filter, | |
954 | EV_ADD|EV_ENABLE, | |
955 | fflags, 0, | |
b4372719 MD |
956 | (void *)(uintptr_t) |
957 | skap->lwp->lwp_kqueue_serial); | |
e5857bf7 | 958 | FD_CLR(fd, fdp); |
8acdf1cf | 959 | ++*events; |
22690e50 | 960 | |
fd399d96 SZ |
961 | if (nseldebug) { |
962 | kprintf("select fd %d filter %d " | |
963 | "serial %ju\n", fd, filter, | |
964 | (uintmax_t) | |
965 | skap->lwp->lwp_kqueue_serial); | |
966 | } | |
e5857bf7 | 967 | } |
8acdf1cf MD |
968 | ++skap->proc_fds; |
969 | if (*events == maxevents) | |
e5857bf7 SG |
970 | return (0); |
971 | } | |
e5857bf7 SG |
972 | skap->active_set++; |
973 | skap->proc_fds = 0; | |
974 | } | |
975 | ||
976 | return (0); | |
977 | } | |
978 | ||
979 | static int | |
980 | select_copyout(void *arg, struct kevent *kevp, int count, int *res) | |
981 | { | |
982 | struct select_kevent_copyin_args *skap; | |
983 | struct kevent kev; | |
d6299163 MD |
984 | int i; |
985 | int n; | |
e5857bf7 SG |
986 | |
987 | skap = (struct select_kevent_copyin_args *)arg; | |
988 | ||
e5857bf7 | 989 | for (i = 0; i < count; ++i) { |
22690e50 SG |
990 | /* |
991 | * Filter out and delete spurious events | |
992 | */ | |
fd399d96 | 993 | if ((uint64_t)(uintptr_t)kevp[i].udata != |
e6bc4d0d MD |
994 | skap->lwp->lwp_kqueue_serial) |
995 | { | |
996 | panic("select_copyout: unexpected udata"); | |
95121ed2 | 997 | deregister: |
ce497544 SZ |
998 | kev = kevp[i]; |
999 | kev.flags = EV_DISABLE|EV_DELETE; | |
d6299163 | 1000 | n = 1; |
0d47c594 | 1001 | kqueue_register(&skap->lwp->lwp_kqueue, &kev, &n, 0); |
fd399d96 SZ |
1002 | if (nseldebug) { |
1003 | kprintf("select fd %ju mismatched serial %ju\n", | |
1004 | (uintmax_t)kevp[i].ident, | |
1005 | (uintmax_t)skap->lwp->lwp_kqueue_serial); | |
1006 | } | |
22690e50 SG |
1007 | continue; |
1008 | } | |
1009 | ||
1010 | /* | |
1011 | * Handle errors | |
1012 | */ | |
1013 | if (kevp[i].flags & EV_ERROR) { | |
23a031e3 SZ |
1014 | int error = kevp[i].data; |
1015 | ||
1016 | switch (error) { | |
22690e50 SG |
1017 | case EBADF: |
1018 | /* | |
1019 | * A bad file descriptor is considered a | |
1020 | * fatal error for select, bail out. | |
1021 | */ | |
23a031e3 SZ |
1022 | skap->error = error; |
1023 | *res = -1; | |
1024 | return error; | |
1025 | ||
22690e50 SG |
1026 | default: |
1027 | /* | |
1028 | * Select silently swallows any unknown errors | |
1029 | * for descriptors in the read or write sets. | |
4fbb9324 SG |
1030 | * |
1031 | * ALWAYS filter out EOPNOTSUPP errors from | |
1032 | * filters (at least until all filters support | |
1033 | * EVFILT_EXCEPT) | |
59737d5e MP |
1034 | * |
1035 | * We also filter out ENODEV since dev_dkqfilter | |
1036 | * returns ENODEV if EOPNOTSUPP is returned in an | |
1037 | * inner call. | |
1038 | * | |
1039 | * XXX: fix this | |
22690e50 SG |
1040 | */ |
1041 | if (kevp[i].filter != EVFILT_READ && | |
4fbb9324 | 1042 | kevp[i].filter != EVFILT_WRITE && |
59737d5e MP |
1043 | error != EOPNOTSUPP && |
1044 | error != ENODEV) { | |
23a031e3 SZ |
1045 | skap->error = error; |
1046 | *res = -1; | |
1047 | return error; | |
22690e50 SG |
1048 | } |
1049 | break; | |
1050 | } | |
95121ed2 MD |
1051 | |
1052 | /* | |
1053 | * We must deregister any unsupported select events | |
1054 | * to avoid a live-lock. | |
1055 | */ | |
1056 | if (nseldebug) { | |
23a031e3 | 1057 | kprintf("select fd %ju filter %d error %d\n", |
c9a1a153 | 1058 | (uintmax_t)kevp[i].ident, |
23a031e3 | 1059 | kevp[i].filter, error); |
95121ed2 MD |
1060 | } |
1061 | goto deregister; | |
e5857bf7 | 1062 | } |
203bf8e2 SZ |
1063 | |
1064 | switch (kevp[i].filter) { | |
1065 | case EVFILT_READ: | |
1066 | FD_SET(kevp[i].ident, skap->read_set); | |
1067 | break; | |
1068 | case EVFILT_WRITE: | |
1069 | FD_SET(kevp[i].ident, skap->write_set); | |
1070 | break; | |
1071 | case EVFILT_EXCEPT: | |
1072 | FD_SET(kevp[i].ident, skap->except_set); | |
1073 | break; | |
1074 | } | |
e5857bf7 | 1075 | |
8acdf1cf | 1076 | ++*res; |
e5857bf7 SG |
1077 | } |
1078 | ||
1079 | return (0); | |
1080 | } | |
1081 | ||
8acdf1cf MD |
1082 | /* |
1083 | * Copy select bits in from userland. Allocate kernel memory if the | |
1084 | * set is large. | |
1085 | */ | |
1086 | static int | |
203bf8e2 | 1087 | getbits(int bytes, fd_set *in_set, kfd_set **out_set, kfd_set *tmp_set) |
8acdf1cf | 1088 | { |
203bf8e2 | 1089 | int error; |
8acdf1cf MD |
1090 | |
1091 | if (in_set) { | |
203bf8e2 SZ |
1092 | if (bytes < sizeof(*tmp_set)) |
1093 | *out_set = tmp_set; | |
1094 | else | |
1095 | *out_set = kmalloc(bytes, M_SELECT, M_WAITOK); | |
1096 | error = copyin(in_set, *out_set, bytes); | |
1097 | } else { | |
1098 | *out_set = NULL; | |
1099 | error = 0; | |
8acdf1cf MD |
1100 | } |
1101 | return (error); | |
1102 | } | |
1103 | ||
1104 | /* | |
1105 | * Copy returned select bits back out to userland. | |
1106 | */ | |
1107 | static int | |
1108 | putbits(int bytes, kfd_set *in_set, fd_set *out_set) | |
1109 | { | |
1110 | int error; | |
1111 | ||
1112 | if (in_set) { | |
1113 | error = copyout(in_set, out_set, bytes); | |
1114 | } else { | |
1115 | error = 0; | |
1116 | } | |
1117 | return (error); | |
1118 | } | |
1119 | ||
4979e190 MD |
1120 | static int |
1121 | dotimeout_only(struct timespec *ts) | |
1122 | { | |
1123 | return(nanosleep1(ts, NULL)); | |
1124 | } | |
1125 | ||
ae7cb1b5 NT |
1126 | /* |
1127 | * Common code for sys_select() and sys_pselect(). | |
1128 | * | |
e5857bf7 | 1129 | * in, out and ex are userland pointers. ts must point to validated |
ae7cb1b5 NT |
1130 | * kernel-side timeout value or NULL for infinite timeout. res must |
1131 | * point to syscall return value. | |
1132 | */ | |
1133 | static int | |
e5857bf7 | 1134 | doselect(int nd, fd_set *read, fd_set *write, fd_set *except, |
8acdf1cf | 1135 | struct timespec *ts, int *res) |
984263bc | 1136 | { |
41c20dac | 1137 | struct proc *p = curproc; |
e5857bf7 SG |
1138 | struct select_kevent_copyin_args *kap, ka; |
1139 | int bytes, error; | |
203bf8e2 SZ |
1140 | kfd_set read_tmp; |
1141 | kfd_set write_tmp; | |
1142 | kfd_set except_tmp; | |
984263bc | 1143 | |
8acdf1cf | 1144 | *res = 0; |
ae7cb1b5 | 1145 | if (nd < 0) |
984263bc | 1146 | return (EINVAL); |
0c134065 | 1147 | if (nd == 0 && ts) |
4979e190 MD |
1148 | return (dotimeout_only(ts)); |
1149 | ||
679058fb MD |
1150 | if (nd > p->p_fd->fd_nfiles) /* limit kmalloc */ |
1151 | nd = p->p_fd->fd_nfiles; | |
984263bc | 1152 | |
e5857bf7 SG |
1153 | kap = &ka; |
1154 | kap->lwp = curthread->td_lwp; | |
1155 | kap->num_fds = nd; | |
1156 | kap->proc_fds = 0; | |
1157 | kap->error = 0; | |
8acdf1cf | 1158 | kap->active_set = COPYIN_READ; |
e6f31a83 | 1159 | |
8acdf1cf MD |
1160 | /* |
1161 | * Calculate bytes based on the number of __fd_mask[] array entries | |
1162 | * multiplied by the size of __fd_mask. | |
1163 | */ | |
1164 | bytes = howmany(nd, __NFDBITS) * sizeof(__fd_mask); | |
984263bc | 1165 | |
d0d4a734 MD |
1166 | /* kap->read_set = NULL; not needed */ |
1167 | kap->write_set = NULL; | |
1168 | kap->except_set = NULL; | |
1169 | ||
203bf8e2 SZ |
1170 | error = getbits(bytes, read, &kap->read_set, &read_tmp); |
1171 | if (error == 0) | |
1172 | error = getbits(bytes, write, &kap->write_set, &write_tmp); | |
1173 | if (error == 0) | |
1174 | error = getbits(bytes, except, &kap->except_set, &except_tmp); | |
e5857bf7 | 1175 | if (error) |
8acdf1cf MD |
1176 | goto done; |
1177 | ||
1178 | /* | |
1179 | * NOTE: Make sure the max events passed to kern_kevent() is | |
1180 | * effectively unlimited. (nd * 3) accomplishes this. | |
1181 | * | |
1182 | * (*res) continues to increment as returned events are | |
1183 | * loaded in. | |
1184 | */ | |
679058fb | 1185 | error = kern_kevent(&kap->lwp->lwp_kqueue, 0x7FFFFFFF, res, kap, |
e6bc4d0d MD |
1186 | select_copyin, select_copyout, ts, |
1187 | KEVENT_AUTO_STALE); | |
8acdf1cf MD |
1188 | if (error == 0) |
1189 | error = putbits(bytes, kap->read_set, read); | |
1190 | if (error == 0) | |
1191 | error = putbits(bytes, kap->write_set, write); | |
1192 | if (error == 0) | |
1193 | error = putbits(bytes, kap->except_set, except); | |
984263bc | 1194 | |
8acdf1cf | 1195 | /* |
22690e50 SG |
1196 | * An error from an individual event that should be passed |
1197 | * back to userland (EBADF) | |
8acdf1cf MD |
1198 | */ |
1199 | if (kap->error) | |
1200 | error = kap->error; | |
e5857bf7 | 1201 | |
8acdf1cf MD |
1202 | /* |
1203 | * Clean up. | |
1204 | */ | |
1205 | done: | |
203bf8e2 | 1206 | if (kap->read_set && kap->read_set != &read_tmp) |
8acdf1cf | 1207 | kfree(kap->read_set, M_SELECT); |
203bf8e2 | 1208 | if (kap->write_set && kap->write_set != &write_tmp) |
8acdf1cf | 1209 | kfree(kap->write_set, M_SELECT); |
203bf8e2 | 1210 | if (kap->except_set && kap->except_set != &except_tmp) |
8acdf1cf MD |
1211 | kfree(kap->except_set, M_SELECT); |
1212 | ||
22690e50 | 1213 | kap->lwp->lwp_kqueue_serial += kap->num_fds; |
73c344d3 | 1214 | |
e5857bf7 | 1215 | return (error); |
984263bc MD |
1216 | } |
1217 | ||
1218 | /* | |
1219 | * Poll system call. | |
3919ced0 | 1220 | * |
7fbfbe29 | 1221 | * MPSAFE |
984263bc | 1222 | */ |
984263bc | 1223 | int |
80d831e1 | 1224 | sys_poll(struct sysmsg *sysmsg, const struct poll_args *uap) |
984263bc | 1225 | { |
7fbfbe29 SG |
1226 | struct timespec ts, *tsp; |
1227 | int error; | |
984263bc | 1228 | |
ab2eb4eb | 1229 | if (uap->timeout != INFTIM) { |
0d20b837 MD |
1230 | if (uap->timeout < 0) |
1231 | return (EINVAL); | |
7fbfbe29 SG |
1232 | ts.tv_sec = uap->timeout / 1000; |
1233 | ts.tv_nsec = (uap->timeout % 1000) * 1000 * 1000; | |
1234 | tsp = &ts; | |
984263bc | 1235 | } else { |
7fbfbe29 | 1236 | tsp = NULL; |
984263bc | 1237 | } |
e6f31a83 | 1238 | |
80d831e1 | 1239 | error = dopoll(uap->nfds, uap->fds, tsp, &sysmsg->sysmsg_result, 0); |
6d2444c4 IV |
1240 | |
1241 | return (error); | |
1242 | } | |
1243 | ||
1244 | /* | |
1245 | * Ppoll system call. | |
1246 | * | |
1247 | * MPSAFE | |
1248 | */ | |
1249 | int | |
80d831e1 | 1250 | sys_ppoll(struct sysmsg *sysmsg, const struct ppoll_args *uap) |
6d2444c4 IV |
1251 | { |
1252 | struct thread *td = curthread; | |
1253 | struct lwp *lp = td->td_lwp; | |
1254 | struct timespec *ktsp, kts; | |
1255 | sigset_t sigmask; | |
1256 | int error; | |
1257 | ||
1258 | /* | |
1259 | * Get timeout if any. | |
1260 | */ | |
1261 | if (uap->ts != NULL) { | |
1262 | error = copyin(uap->ts, &kts, sizeof (kts)); | |
1263 | if (error) | |
1264 | return (error); | |
1265 | ktsp = &kts; | |
1266 | } else { | |
1267 | ktsp = NULL; | |
1268 | } | |
1269 | ||
1270 | /* | |
1271 | * Install temporary signal mask if any provided. | |
1272 | */ | |
1273 | if (uap->sigmask != NULL) { | |
1274 | error = copyin(uap->sigmask, &sigmask, sizeof(sigmask)); | |
1275 | if (error) | |
1276 | return (error); | |
1277 | lwkt_gettoken(&lp->lwp_proc->p_token); | |
1278 | lp->lwp_oldsigmask = lp->lwp_sigmask; | |
1279 | SIG_CANTMASK(sigmask); | |
1280 | lp->lwp_sigmask = sigmask; | |
1281 | lwkt_reltoken(&lp->lwp_proc->p_token); | |
1282 | } | |
1283 | ||
80d831e1 | 1284 | error = dopoll(uap->nfds, uap->fds, ktsp, &sysmsg->sysmsg_result, |
6d2444c4 IV |
1285 | ktsp != NULL ? KEVENT_TIMEOUT_PRECISE : 0); |
1286 | ||
1287 | if (uap->sigmask != NULL) { | |
1288 | lwkt_gettoken(&lp->lwp_proc->p_token); | |
1289 | /* dopoll() responsible for turning ERESTART into EINTR */ | |
1290 | KKASSERT(error != ERESTART); | |
1291 | if (error == EINTR) { | |
1292 | /* | |
1293 | * We can't restore the previous signal mask now | |
1294 | * because it could block the signal that interrupted | |
1295 | * us. So make a note to restore it after executing | |
1296 | * the handler. | |
1297 | */ | |
1298 | lp->lwp_flags |= LWP_OLDMASK; | |
1299 | } else { | |
1300 | /* | |
1301 | * No handler to run. Restore previous mask immediately. | |
1302 | */ | |
1303 | lp->lwp_sigmask = lp->lwp_oldsigmask; | |
1304 | } | |
1305 | lwkt_reltoken(&lp->lwp_proc->p_token); | |
1306 | } | |
7fbfbe29 | 1307 | |
984263bc MD |
1308 | return (error); |
1309 | } | |
1310 | ||
1311 | static int | |
7fbfbe29 | 1312 | poll_copyin(void *arg, struct kevent *kevp, int maxevents, int *events) |
984263bc | 1313 | { |
7fbfbe29 SG |
1314 | struct poll_kevent_copyin_args *pkap; |
1315 | struct pollfd *pfd; | |
1316 | struct kevent *kev; | |
1317 | int kev_count; | |
1318 | ||
1319 | pkap = (struct poll_kevent_copyin_args *)arg; | |
1320 | ||
1321 | while (pkap->pfds < pkap->nfds) { | |
1322 | pfd = &pkap->fds[pkap->pfds]; | |
1323 | ||
1324 | /* Clear return events */ | |
1325 | pfd->revents = 0; | |
1326 | ||
ffdd7c7c SG |
1327 | /* Do not check if fd is equal to -1 */ |
1328 | if (pfd->fd == -1) { | |
1329 | ++pkap->pfds; | |
1330 | continue; | |
1331 | } | |
1332 | ||
0d47c594 MD |
1333 | /* |
1334 | * NOTE: pfd->events == 0 implies POLLHUP in BSDs. Used | |
1335 | * by at least sshd and X11 udev support. | |
1336 | */ | |
7fbfbe29 | 1337 | kev_count = 0; |
0d47c594 MD |
1338 | if (pfd->events == 0) |
1339 | kev_count++; | |
8eaaa203 | 1340 | if (pfd->events & (POLLIN | POLLHUP | POLLRDNORM)) |
7fbfbe29 SG |
1341 | kev_count++; |
1342 | if (pfd->events & (POLLOUT | POLLWRNORM)) | |
1343 | kev_count++; | |
1344 | if (pfd->events & (POLLPRI | POLLRDBAND)) | |
1345 | kev_count++; | |
1346 | ||
1347 | if (*events + kev_count > maxevents) | |
1348 | return (0); | |
1349 | ||
fe24d605 MD |
1350 | /* |
1351 | * NOTE: A combined serial number and poll array index is | |
0d47c594 MD |
1352 | * stored in kev->udata. |
1353 | * | |
1354 | * NOTE: Events will be registered with KEVENT_UNIQUE_NOTES | |
1355 | * set, using kev->data for the uniqifier. kev->data | |
1356 | * is an implied in the actual registration. | |
fe24d605 | 1357 | */ |
7fbfbe29 | 1358 | kev = &kevp[*events]; |
0d47c594 MD |
1359 | |
1360 | /* | |
1361 | * Implied POLLHUP | |
1362 | */ | |
1363 | if (pfd->events == 0) { | |
1364 | int notes = NOTE_OLDAPI | NOTE_HUPONLY; | |
1365 | ||
1366 | EV_SET(kev++, pfd->fd, EVFILT_READ, EV_ADD|EV_ENABLE, | |
1367 | notes, pkap->pfds, (void *)(uintptr_t) | |
1368 | (pkap->lwp->lwp_kqueue_serial + pkap->pfds)); | |
1369 | } | |
1370 | ||
1371 | /* | |
1372 | * Nominal read events | |
1373 | */ | |
8eaaa203 | 1374 | if (pfd->events & (POLLIN | POLLHUP | POLLRDNORM)) { |
6df899ee MD |
1375 | int notes = NOTE_OLDAPI; |
1376 | if ((pfd->events & (POLLIN | POLLRDNORM)) == 0) | |
1377 | notes |= NOTE_HUPONLY; | |
1378 | ||
7fbfbe29 | 1379 | EV_SET(kev++, pfd->fd, EVFILT_READ, EV_ADD|EV_ENABLE, |
0d47c594 | 1380 | notes, pkap->pfds, (void *)(uintptr_t) |
b4372719 | 1381 | (pkap->lwp->lwp_kqueue_serial + pkap->pfds)); |
fe24d605 | 1382 | } |
0d47c594 MD |
1383 | |
1384 | /* | |
1385 | * Nominal write events | |
1386 | */ | |
fe24d605 | 1387 | if (pfd->events & (POLLOUT | POLLWRNORM)) { |
7fbfbe29 | 1388 | EV_SET(kev++, pfd->fd, EVFILT_WRITE, EV_ADD|EV_ENABLE, |
0d47c594 | 1389 | NOTE_OLDAPI, pkap->pfds, (void *)(uintptr_t) |
b4372719 | 1390 | (pkap->lwp->lwp_kqueue_serial + pkap->pfds)); |
fe24d605 | 1391 | } |
0d47c594 MD |
1392 | |
1393 | /* | |
1394 | * Nominal exceptional events | |
1395 | */ | |
fe24d605 | 1396 | if (pfd->events & (POLLPRI | POLLRDBAND)) { |
7fbfbe29 | 1397 | EV_SET(kev++, pfd->fd, EVFILT_EXCEPT, EV_ADD|EV_ENABLE, |
0d47c594 | 1398 | NOTE_OLDAPI | NOTE_OOB, pkap->pfds, |
b4372719 MD |
1399 | (void *)(uintptr_t) |
1400 | (pkap->lwp->lwp_kqueue_serial + pkap->pfds)); | |
fe24d605 MD |
1401 | } |
1402 | ||
1403 | if (nseldebug) { | |
fd399d96 SZ |
1404 | kprintf("poll index %d/%d fd %d events %08x " |
1405 | "serial %ju\n", pkap->pfds, pkap->nfds-1, | |
1406 | pfd->fd, pfd->events, | |
1407 | (uintmax_t)pkap->lwp->lwp_kqueue_serial); | |
fe24d605 | 1408 | } |
7fbfbe29 SG |
1409 | |
1410 | ++pkap->pfds; | |
1411 | (*events) += kev_count; | |
1412 | } | |
1413 | ||
1414 | return (0); | |
1415 | } | |
1416 | ||
1417 | static int | |
1418 | poll_copyout(void *arg, struct kevent *kevp, int count, int *res) | |
1419 | { | |
1420 | struct poll_kevent_copyin_args *pkap; | |
1421 | struct pollfd *pfd; | |
1422 | struct kevent kev; | |
8c4ed426 | 1423 | int count_res; |
984263bc | 1424 | int i; |
d6299163 | 1425 | int n; |
fd399d96 | 1426 | uint64_t pi; |
7fbfbe29 SG |
1427 | |
1428 | pkap = (struct poll_kevent_copyin_args *)arg; | |
1429 | ||
1430 | for (i = 0; i < count; ++i) { | |
fe24d605 MD |
1431 | /* |
1432 | * Extract the poll array index and delete spurious events. | |
1433 | * We can easily tell if the serial number is incorrect | |
1434 | * by checking whether the extracted index is out of range. | |
1435 | */ | |
fd399d96 | 1436 | pi = (uint64_t)(uintptr_t)kevp[i].udata - |
95121ed2 | 1437 | pkap->lwp->lwp_kqueue_serial; |
fe24d605 | 1438 | if (pi >= pkap->nfds) { |
e6bc4d0d | 1439 | panic("poll_copyout: unexpected udata"); |
95121ed2 | 1440 | deregister: |
fe24d605 MD |
1441 | kev = kevp[i]; |
1442 | kev.flags = EV_DISABLE|EV_DELETE; | |
0d47c594 | 1443 | kev.data = pi; /* uniquifier */ |
d6299163 | 1444 | n = 1; |
0d47c594 MD |
1445 | kqueue_register(&pkap->lwp->lwp_kqueue, &kev, &n, |
1446 | KEVENT_UNIQUE_NOTES); | |
fd399d96 SZ |
1447 | if (nseldebug) { |
1448 | kprintf("poll index %ju out of range against " | |
1449 | "serial %ju\n", (uintmax_t)pi, | |
1450 | (uintmax_t)pkap->lwp->lwp_kqueue_serial); | |
1451 | } | |
fe24d605 MD |
1452 | continue; |
1453 | } | |
95121ed2 MD |
1454 | |
1455 | /* | |
1456 | * Locate the pollfd and process events | |
1457 | */ | |
fe24d605 MD |
1458 | pfd = &pkap->fds[pi]; |
1459 | if (kevp[i].ident == pfd->fd) { | |
3c2a46a7 SG |
1460 | /* |
1461 | * A single descriptor may generate an error against | |
8c4ed426 MD |
1462 | * more than one filter, make sure to set the |
1463 | * appropriate flags but do not increment (*res) | |
1464 | * more than once. | |
3c2a46a7 | 1465 | */ |
8c4ed426 | 1466 | count_res = (pfd->revents == 0); |
fe24d605 MD |
1467 | if (kevp[i].flags & EV_ERROR) { |
1468 | switch(kevp[i].data) { | |
fe24d605 | 1469 | case EBADF: |
403191da | 1470 | case POLLNVAL: |
fe24d605 | 1471 | /* Bad file descriptor */ |
8c4ed426 | 1472 | if (count_res) |
3c2a46a7 | 1473 | ++*res; |
fe24d605 | 1474 | pfd->revents |= POLLNVAL; |
7fbfbe29 | 1475 | break; |
fe24d605 | 1476 | default: |
22690e50 SG |
1477 | /* |
1478 | * Poll silently swallows any unknown | |
1479 | * errors except in the case of POLLPRI | |
1480 | * (OOB/urgent data). | |
2b7d1884 SG |
1481 | * |
1482 | * ALWAYS filter out EOPNOTSUPP errors | |
1483 | * from filters, common applications | |
1484 | * set POLLPRI|POLLRDBAND and most | |
1485 | * filters do not support EVFILT_EXCEPT. | |
59737d5e | 1486 | * |
95121ed2 MD |
1487 | * We also filter out ENODEV since |
1488 | * dev_dkqfilter returns ENODEV if | |
1489 | * EOPNOTSUPP is returned in an | |
59737d5e MP |
1490 | * inner call. |
1491 | * | |
1492 | * XXX: fix this | |
22690e50 SG |
1493 | */ |
1494 | if (kevp[i].filter != EVFILT_READ && | |
2b7d1884 | 1495 | kevp[i].filter != EVFILT_WRITE && |
59737d5e MP |
1496 | kevp[i].data != EOPNOTSUPP && |
1497 | kevp[i].data != ENODEV) { | |
6a165a97 | 1498 | if (count_res) |
22690e50 SG |
1499 | ++*res; |
1500 | pfd->revents |= POLLERR; | |
1501 | } | |
7fbfbe29 SG |
1502 | break; |
1503 | } | |
95121ed2 MD |
1504 | if (pfd->revents == 0 && nseldebug) { |
1505 | kprintf("poll index EV_ERROR %ju fd %d " | |
c9a1a153 | 1506 | "filter %d error %jd\n", |
fd399d96 | 1507 | (uintmax_t)pi, pfd->fd, |
b4372719 | 1508 | kevp[i].filter, |
c9a1a153 | 1509 | (intmax_t)kevp[i].data); |
b4372719 | 1510 | } |
95121ed2 MD |
1511 | |
1512 | /* | |
1513 | * Silently deregister any unhandled EV_ERROR | |
1514 | * condition (usually EOPNOTSUPP). | |
1515 | */ | |
1516 | if (pfd->revents == 0) | |
1517 | goto deregister; | |
fe24d605 MD |
1518 | continue; |
1519 | } | |
7fbfbe29 | 1520 | |
fe24d605 MD |
1521 | switch (kevp[i].filter) { |
1522 | case EVFILT_READ: | |
9a5d20db | 1523 | /* |
3bcb6e5e | 1524 | * NODATA on the read side can indicate a |
9a5d20db MD |
1525 | * half-closed situation and not necessarily |
1526 | * a disconnect, so depend on the user | |
1527 | * issuing a read() and getting 0 bytes back. | |
7323bef7 MD |
1528 | * |
1529 | * If EV_HUP is set the peer completely | |
6df899ee MD |
1530 | * disconnected and we can set POLLHUP. |
1531 | * Linux can return POLLHUP even if read | |
1532 | * data has not been drained, so we should | |
1533 | * too. | |
9a5d20db | 1534 | */ |
6df899ee | 1535 | /* if (kevp[i].flags & EV_NODATA) */ { |
7323bef7 MD |
1536 | if (kevp[i].flags & EV_HUP) |
1537 | pfd->revents |= POLLHUP; | |
1538 | } | |
aa622c3d SZ |
1539 | if ((kevp[i].flags & EV_EOF) && |
1540 | kevp[i].fflags != 0) | |
1541 | pfd->revents |= POLLERR; | |
484efec7 SG |
1542 | if (pfd->events & POLLIN) |
1543 | pfd->revents |= POLLIN; | |
1544 | if (pfd->events & POLLRDNORM) | |
1545 | pfd->revents |= POLLRDNORM; | |
fe24d605 MD |
1546 | break; |
1547 | case EVFILT_WRITE: | |
9a5d20db MD |
1548 | /* |
1549 | * As per the OpenGroup POLLHUP is mutually | |
1550 | * exclusive with the writability flags. I | |
1551 | * consider this a bit broken but... | |
1552 | * | |
1553 | * In this case a disconnect is implied even | |
1554 | * for a half-closed (write side) situation. | |
1555 | */ | |
aa622c3d | 1556 | if (kevp[i].flags & EV_EOF) { |
9a5d20db | 1557 | pfd->revents |= POLLHUP; |
aa622c3d SZ |
1558 | if (kevp[i].fflags != 0) |
1559 | pfd->revents |= POLLERR; | |
9a5d20db MD |
1560 | } else { |
1561 | if (pfd->events & POLLOUT) | |
1562 | pfd->revents |= POLLOUT; | |
1563 | if (pfd->events & POLLWRNORM) | |
1564 | pfd->revents |= POLLWRNORM; | |
1565 | } | |
fe24d605 MD |
1566 | break; |
1567 | case EVFILT_EXCEPT: | |
9a5d20db | 1568 | /* |
3bcb6e5e | 1569 | * EV_NODATA should never be tagged for this |
9a5d20db MD |
1570 | * filter. |
1571 | */ | |
484efec7 SG |
1572 | if (pfd->events & POLLPRI) |
1573 | pfd->revents |= POLLPRI; | |
1574 | if (pfd->events & POLLRDBAND) | |
1575 | pfd->revents |= POLLRDBAND; | |
fe24d605 MD |
1576 | break; |
1577 | } | |
1578 | ||
1579 | if (nseldebug) { | |
fd399d96 SZ |
1580 | kprintf("poll index %ju/%d fd %d " |
1581 | "revents %08x\n", (uintmax_t)pi, pkap->nfds, | |
1582 | pfd->fd, pfd->revents); | |
fe24d605 MD |
1583 | } |
1584 | ||
8c4ed426 MD |
1585 | if (count_res && pfd->revents) |
1586 | ++*res; | |
95121ed2 MD |
1587 | } |
1588 | ||
1589 | /* | |
1590 | * We must deregister any kqueue poll event that does not | |
1591 | * set poll return bits to prevent a live-lock. | |
1592 | */ | |
1593 | if (pfd->revents == 0) { | |
1594 | kprintf("poll index %ju no-action %ju/%d " | |
1595 | "events=%08x kevpfilt=%d/%08x\n", | |
1596 | (uintmax_t)pi, (uintmax_t)kevp[i].ident, | |
1597 | pfd->fd, pfd->events, | |
1598 | kevp[i].filter, kevp[i].flags); | |
1599 | goto deregister; | |
fe24d605 | 1600 | } |
984263bc | 1601 | } |
7fbfbe29 | 1602 | |
984263bc MD |
1603 | return (0); |
1604 | } | |
1605 | ||
7fbfbe29 | 1606 | static int |
6d2444c4 | 1607 | dopoll(int nfds, struct pollfd *fds, struct timespec *ts, int *res, int flags) |
7fbfbe29 | 1608 | { |
7fbfbe29 SG |
1609 | struct poll_kevent_copyin_args ka; |
1610 | struct pollfd sfds[64]; | |
679058fb | 1611 | int bytes; |
7fbfbe29 SG |
1612 | int error; |
1613 | ||
0d47c594 | 1614 | flags |= KEVENT_AUTO_STALE | KEVENT_UNIQUE_NOTES; |
e6bc4d0d | 1615 | |
7fbfbe29 SG |
1616 | *res = 0; |
1617 | if (nfds < 0) | |
1618 | return (EINVAL); | |
679058fb | 1619 | |
0c134065 | 1620 | if (nfds == 0 && ts) |
4979e190 MD |
1621 | return (dotimeout_only(ts)); |
1622 | ||
679058fb MD |
1623 | /* |
1624 | * This is a bit arbitrary but we need to limit internal kmallocs. | |
1625 | */ | |
1626 | if (nfds > maxfilesperproc * 2) | |
1627 | nfds = maxfilesperproc * 2; | |
1628 | bytes = sizeof(struct pollfd) * nfds; | |
7fbfbe29 SG |
1629 | |
1630 | ka.lwp = curthread->td_lwp; | |
1631 | ka.nfds = nfds; | |
1632 | ka.pfds = 0; | |
1633 | ka.error = 0; | |
1634 | ||
1635 | if (ka.nfds < 64) | |
1636 | ka.fds = sfds; | |
1637 | else | |
1638 | ka.fds = kmalloc(bytes, M_SELECT, M_WAITOK); | |
1639 | ||
679058fb | 1640 | error = copyin(fds, ka.fds, bytes); |
0d47c594 | 1641 | |
7fbfbe29 | 1642 | if (error == 0) |
3c2a46a7 | 1643 | error = kern_kevent(&ka.lwp->lwp_kqueue, 0x7FFFFFFF, res, &ka, |
6d2444c4 | 1644 | poll_copyin, poll_copyout, ts, flags); |
7fbfbe29 SG |
1645 | |
1646 | if (error == 0) | |
679058fb | 1647 | error = copyout(ka.fds, fds, bytes); |
7fbfbe29 SG |
1648 | |
1649 | if (ka.fds != sfds) | |
1650 | kfree(ka.fds, M_SELECT); | |
1651 | ||
fe24d605 MD |
1652 | ka.lwp->lwp_kqueue_serial += nfds; |
1653 | ||
7fbfbe29 SG |
1654 | return (error); |
1655 | } | |
1656 | ||
8b5c39bb SG |
1657 | static int |
1658 | socket_wait_copyin(void *arg, struct kevent *kevp, int maxevents, int *events) | |
1659 | { | |
1660 | return (0); | |
1661 | } | |
1662 | ||
1663 | static int | |
1664 | socket_wait_copyout(void *arg, struct kevent *kevp, int count, int *res) | |
1665 | { | |
1666 | ++*res; | |
1667 | return (0); | |
1668 | } | |
1669 | ||
1670 | extern struct fileops socketops; | |
6cef7136 MD |
1671 | |
1672 | /* | |
1673 | * NOTE: Callers of socket_wait() must already have a reference on the | |
1674 | * socket. | |
1675 | */ | |
8b5c39bb SG |
1676 | int |
1677 | socket_wait(struct socket *so, struct timespec *ts, int *res) | |
1678 | { | |
ac62ea3c | 1679 | struct thread *td = curthread; |
8b5c39bb | 1680 | struct file *fp; |
8b5c39bb SG |
1681 | struct kqueue kq; |
1682 | struct kevent kev; | |
ac62ea3c | 1683 | int error, fd; |
d6299163 | 1684 | int n; |
8b5c39bb | 1685 | |
ac62ea3c | 1686 | if ((error = falloc(td->td_lwp, &fp, &fd)) != 0) |
8b5c39bb SG |
1687 | return (error); |
1688 | ||
ac62ea3c SG |
1689 | fp->f_type = DTYPE_SOCKET; |
1690 | fp->f_flag = FREAD | FWRITE; | |
8b5c39bb SG |
1691 | fp->f_ops = &socketops; |
1692 | fp->f_data = so; | |
ac62ea3c | 1693 | fsetfd(td->td_lwp->lwp_proc->p_fd, fp, fd); |
eba68494 | 1694 | fsetfdflags(td->td_proc->p_fd, fd, UF_EXCLOSE); |
8b5c39bb | 1695 | |
eba68494 | 1696 | bzero(&kq, sizeof(kq)); |
ac62ea3c SG |
1697 | kqueue_init(&kq, td->td_lwp->lwp_proc->p_fd); |
1698 | EV_SET(&kev, fd, EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, NULL); | |
d6299163 | 1699 | n = 1; |
0d47c594 | 1700 | if ((error = kqueue_register(&kq, &kev, &n, 0)) != 0) { |
8b5c39bb SG |
1701 | fdrop(fp); |
1702 | return (error); | |
1703 | } | |
1704 | ||
1705 | error = kern_kevent(&kq, 1, res, NULL, socket_wait_copyin, | |
6d2444c4 | 1706 | socket_wait_copyout, ts, 0); |
ac62ea3c | 1707 | |
eba68494 | 1708 | EV_SET(&kev, fd, EVFILT_READ, EV_DELETE|EV_DISABLE, 0, 0, NULL); |
d6299163 | 1709 | n = 1; |
0d47c594 | 1710 | kqueue_register(&kq, &kev, &n, 0); |
ac62ea3c | 1711 | fp->f_ops = &badfileops; |
8b5c39bb SG |
1712 | fdrop(fp); |
1713 | ||
1714 | return (error); | |
1715 | } | |
1716 | ||
984263bc MD |
1717 | /* |
1718 | * OpenBSD poll system call. | |
1719 | * XXX this isn't quite a true representation.. OpenBSD uses select ops. | |
3919ced0 MD |
1720 | * |
1721 | * MPSAFE | |
984263bc | 1722 | */ |
984263bc | 1723 | int |
80d831e1 | 1724 | sys_openbsd_poll(struct sysmsg *sysmsg, const struct openbsd_poll_args *uap) |
984263bc | 1725 | { |
80d831e1 | 1726 | return (sys_poll(sysmsg, (const struct poll_args *)uap)); |
984263bc MD |
1727 | } |
1728 | ||
1729 | /*ARGSUSED*/ | |
1730 | int | |
b13267a5 | 1731 | seltrue(cdev_t dev, int events) |
984263bc | 1732 | { |
984263bc MD |
1733 | return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); |
1734 | } |