Initial import from FreeBSD RELENG_4:
[dragonfly.git] / sys / kern / kern_event.c
CommitLineData
984263bc
MD
1/*-
2 * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: src/sys/kern/kern_event.c,v 1.2.2.9 2003/05/08 07:47:16 kbyanc Exp $
27 */
28
29#include <sys/param.h>
30#include <sys/systm.h>
31#include <sys/kernel.h>
32#include <sys/proc.h>
33#include <sys/malloc.h>
34#include <sys/unistd.h>
35#include <sys/file.h>
36#include <sys/fcntl.h>
37#include <sys/select.h>
38#include <sys/queue.h>
39#include <sys/event.h>
40#include <sys/eventvar.h>
41#include <sys/poll.h>
42#include <sys/protosw.h>
43#include <sys/socket.h>
44#include <sys/socketvar.h>
45#include <sys/stat.h>
46#include <sys/sysctl.h>
47#include <sys/sysproto.h>
48#include <sys/uio.h>
49
50#include <vm/vm_zone.h>
51
52MALLOC_DEFINE(M_KQUEUE, "kqueue", "memory for kqueue system");
53
54static int kqueue_scan(struct file *fp, int maxevents,
55 struct kevent *ulistp, const struct timespec *timeout,
56 struct proc *p);
57static int kqueue_read(struct file *fp, struct uio *uio,
58 struct ucred *cred, int flags, struct proc *p);
59static int kqueue_write(struct file *fp, struct uio *uio,
60 struct ucred *cred, int flags, struct proc *p);
61static int kqueue_ioctl(struct file *fp, u_long com, caddr_t data,
62 struct proc *p);
63static int kqueue_poll(struct file *fp, int events, struct ucred *cred,
64 struct proc *p);
65static int kqueue_kqfilter(struct file *fp, struct knote *kn);
66static int kqueue_stat(struct file *fp, struct stat *st, struct proc *p);
67static int kqueue_close(struct file *fp, struct proc *p);
68static void kqueue_wakeup(struct kqueue *kq);
69
70static struct fileops kqueueops = {
71 kqueue_read,
72 kqueue_write,
73 kqueue_ioctl,
74 kqueue_poll,
75 kqueue_kqfilter,
76 kqueue_stat,
77 kqueue_close
78};
79
80static void knote_attach(struct knote *kn, struct filedesc *fdp);
81static void knote_drop(struct knote *kn, struct proc *p);
82static void knote_enqueue(struct knote *kn);
83static void knote_dequeue(struct knote *kn);
84static void knote_init(void);
85static struct knote *knote_alloc(void);
86static void knote_free(struct knote *kn);
87
88static void filt_kqdetach(struct knote *kn);
89static int filt_kqueue(struct knote *kn, long hint);
90static int filt_procattach(struct knote *kn);
91static void filt_procdetach(struct knote *kn);
92static int filt_proc(struct knote *kn, long hint);
93static int filt_fileattach(struct knote *kn);
94static void filt_timerexpire(void *knx);
95static int filt_timerattach(struct knote *kn);
96static void filt_timerdetach(struct knote *kn);
97static int filt_timer(struct knote *kn, long hint);
98
99static struct filterops file_filtops =
100 { 1, filt_fileattach, NULL, NULL };
101static struct filterops kqread_filtops =
102 { 1, NULL, filt_kqdetach, filt_kqueue };
103static struct filterops proc_filtops =
104 { 0, filt_procattach, filt_procdetach, filt_proc };
105static struct filterops timer_filtops =
106 { 0, filt_timerattach, filt_timerdetach, filt_timer };
107
108static vm_zone_t knote_zone;
109static int kq_ncallouts = 0;
110static int kq_calloutmax = (4 * 1024);
111SYSCTL_INT(_kern, OID_AUTO, kq_calloutmax, CTLFLAG_RW,
112 &kq_calloutmax, 0, "Maximum number of callouts allocated for kqueue");
113
114#define KNOTE_ACTIVATE(kn) do { \
115 kn->kn_status |= KN_ACTIVE; \
116 if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) \
117 knote_enqueue(kn); \
118} while(0)
119
120#define KN_HASHSIZE 64 /* XXX should be tunable */
121#define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask))
122
123extern struct filterops aio_filtops;
124extern struct filterops sig_filtops;
125
126/*
127 * Table for for all system-defined filters.
128 */
129static struct filterops *sysfilt_ops[] = {
130 &file_filtops, /* EVFILT_READ */
131 &file_filtops, /* EVFILT_WRITE */
132 &aio_filtops, /* EVFILT_AIO */
133 &file_filtops, /* EVFILT_VNODE */
134 &proc_filtops, /* EVFILT_PROC */
135 &sig_filtops, /* EVFILT_SIGNAL */
136 &timer_filtops, /* EVFILT_TIMER */
137};
138
139static int
140filt_fileattach(struct knote *kn)
141{
142
143 return (fo_kqfilter(kn->kn_fp, kn));
144}
145
146/*ARGSUSED*/
147static int
148kqueue_kqfilter(struct file *fp, struct knote *kn)
149{
150 struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data;
151
152 if (kn->kn_filter != EVFILT_READ)
153 return (1);
154
155 kn->kn_fop = &kqread_filtops;
156 SLIST_INSERT_HEAD(&kq->kq_sel.si_note, kn, kn_selnext);
157 return (0);
158}
159
160static void
161filt_kqdetach(struct knote *kn)
162{
163 struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data;
164
165 SLIST_REMOVE(&kq->kq_sel.si_note, kn, knote, kn_selnext);
166}
167
168/*ARGSUSED*/
169static int
170filt_kqueue(struct knote *kn, long hint)
171{
172 struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data;
173
174 kn->kn_data = kq->kq_count;
175 return (kn->kn_data > 0);
176}
177
178static int
179filt_procattach(struct knote *kn)
180{
181 struct proc *p;
182 int immediate;
183
184 immediate = 0;
185 p = pfind(kn->kn_id);
186 if (p == NULL && (kn->kn_sfflags & NOTE_EXIT)) {
187 p = zpfind(kn->kn_id);
188 immediate = 1;
189 }
190 if (p == NULL)
191 return (ESRCH);
192 if (! PRISON_CHECK(curproc, p))
193 return (EACCES);
194
195 kn->kn_ptr.p_proc = p;
196 kn->kn_flags |= EV_CLEAR; /* automatically set */
197
198 /*
199 * internal flag indicating registration done by kernel
200 */
201 if (kn->kn_flags & EV_FLAG1) {
202 kn->kn_data = kn->kn_sdata; /* ppid */
203 kn->kn_fflags = NOTE_CHILD;
204 kn->kn_flags &= ~EV_FLAG1;
205 }
206
207 /* XXX lock the proc here while adding to the list? */
208 SLIST_INSERT_HEAD(&p->p_klist, kn, kn_selnext);
209
210 /*
211 * Immediately activate any exit notes if the target process is a
212 * zombie. This is necessary to handle the case where the target
213 * process, e.g. a child, dies before the kevent is registered.
214 */
215 if (immediate && filt_proc(kn, NOTE_EXIT))
216 KNOTE_ACTIVATE(kn);
217
218 return (0);
219}
220
221/*
222 * The knote may be attached to a different process, which may exit,
223 * leaving nothing for the knote to be attached to. So when the process
224 * exits, the knote is marked as DETACHED and also flagged as ONESHOT so
225 * it will be deleted when read out. However, as part of the knote deletion,
226 * this routine is called, so a check is needed to avoid actually performing
227 * a detach, because the original process does not exist any more.
228 */
229static void
230filt_procdetach(struct knote *kn)
231{
232 struct proc *p = kn->kn_ptr.p_proc;
233
234 if (kn->kn_status & KN_DETACHED)
235 return;
236
237 /* XXX locking? this might modify another process. */
238 SLIST_REMOVE(&p->p_klist, kn, knote, kn_selnext);
239}
240
241static int
242filt_proc(struct knote *kn, long hint)
243{
244 u_int event;
245
246 /*
247 * mask off extra data
248 */
249 event = (u_int)hint & NOTE_PCTRLMASK;
250
251 /*
252 * if the user is interested in this event, record it.
253 */
254 if (kn->kn_sfflags & event)
255 kn->kn_fflags |= event;
256
257 /*
258 * process is gone, so flag the event as finished.
259 */
260 if (event == NOTE_EXIT) {
261 kn->kn_status |= KN_DETACHED;
262 kn->kn_flags |= (EV_EOF | EV_ONESHOT);
263 return (1);
264 }
265
266 /*
267 * process forked, and user wants to track the new process,
268 * so attach a new knote to it, and immediately report an
269 * event with the parent's pid.
270 */
271 if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) {
272 struct kevent kev;
273 int error;
274
275 /*
276 * register knote with new process.
277 */
278 kev.ident = hint & NOTE_PDATAMASK; /* pid */
279 kev.filter = kn->kn_filter;
280 kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1;
281 kev.fflags = kn->kn_sfflags;
282 kev.data = kn->kn_id; /* parent */
283 kev.udata = kn->kn_kevent.udata; /* preserve udata */
284 error = kqueue_register(kn->kn_kq, &kev, NULL);
285 if (error)
286 kn->kn_fflags |= NOTE_TRACKERR;
287 }
288
289 return (kn->kn_fflags != 0);
290}
291
292static void
293filt_timerexpire(void *knx)
294{
295 struct knote *kn = knx;
296 struct callout *calloutp;
297 struct timeval tv;
298 int tticks;
299
300 kn->kn_data++;
301 KNOTE_ACTIVATE(kn);
302
303 if ((kn->kn_flags & EV_ONESHOT) == 0) {
304 tv.tv_sec = kn->kn_sdata / 1000;
305 tv.tv_usec = (kn->kn_sdata % 1000) * 1000;
306 tticks = tvtohz(&tv);
307 calloutp = (struct callout *)kn->kn_hook;
308 callout_reset(calloutp, tticks, filt_timerexpire, kn);
309 }
310}
311
312/*
313 * data contains amount of time to sleep, in milliseconds
314 */
315static int
316filt_timerattach(struct knote *kn)
317{
318 struct callout *calloutp;
319 struct timeval tv;
320 int tticks;
321
322 if (kq_ncallouts >= kq_calloutmax)
323 return (ENOMEM);
324 kq_ncallouts++;
325
326 tv.tv_sec = kn->kn_sdata / 1000;
327 tv.tv_usec = (kn->kn_sdata % 1000) * 1000;
328 tticks = tvtohz(&tv);
329
330 kn->kn_flags |= EV_CLEAR; /* automatically set */
331 MALLOC(calloutp, struct callout *, sizeof(*calloutp),
332 M_KQUEUE, M_WAITOK);
333 callout_init(calloutp);
334 callout_reset(calloutp, tticks, filt_timerexpire, kn);
335 kn->kn_hook = (caddr_t)calloutp;
336
337 return (0);
338}
339
340static void
341filt_timerdetach(struct knote *kn)
342{
343 struct callout *calloutp;
344
345 calloutp = (struct callout *)kn->kn_hook;
346 callout_stop(calloutp);
347 FREE(calloutp, M_KQUEUE);
348 kq_ncallouts--;
349}
350
351static int
352filt_timer(struct knote *kn, long hint)
353{
354
355 return (kn->kn_data != 0);
356}
357
358int
359kqueue(struct proc *p, struct kqueue_args *uap)
360{
361 struct filedesc *fdp = p->p_fd;
362 struct kqueue *kq;
363 struct file *fp;
364 int fd, error;
365
366 error = falloc(p, &fp, &fd);
367 if (error)
368 return (error);
369 fp->f_flag = FREAD | FWRITE;
370 fp->f_type = DTYPE_KQUEUE;
371 fp->f_ops = &kqueueops;
372 kq = malloc(sizeof(struct kqueue), M_KQUEUE, M_WAITOK | M_ZERO);
373 TAILQ_INIT(&kq->kq_head);
374 fp->f_data = (caddr_t)kq;
375 p->p_retval[0] = fd;
376 if (fdp->fd_knlistsize < 0)
377 fdp->fd_knlistsize = 0; /* this process has a kq */
378 kq->kq_fdp = fdp;
379 return (error);
380}
381
382#ifndef _SYS_SYSPROTO_H_
383struct kevent_args {
384 int fd;
385 const struct kevent *changelist;
386 int nchanges;
387 struct kevent *eventlist;
388 int nevents;
389 const struct timespec *timeout;
390};
391#endif
392int
393kevent(struct proc *p, struct kevent_args *uap)
394{
395 struct filedesc* fdp = p->p_fd;
396 struct kevent *kevp;
397 struct kqueue *kq;
398 struct file *fp = NULL;
399 struct timespec ts;
400 int i, n, nerrors, error;
401
402 if (((u_int)uap->fd) >= fdp->fd_nfiles ||
403 (fp = fdp->fd_ofiles[uap->fd]) == NULL ||
404 (fp->f_type != DTYPE_KQUEUE))
405 return (EBADF);
406
407 fhold(fp);
408
409 if (uap->timeout != NULL) {
410 error = copyin(uap->timeout, &ts, sizeof(ts));
411 if (error)
412 goto done;
413 uap->timeout = &ts;
414 }
415
416 kq = (struct kqueue *)fp->f_data;
417 nerrors = 0;
418
419 while (uap->nchanges > 0) {
420 n = uap->nchanges > KQ_NEVENTS ? KQ_NEVENTS : uap->nchanges;
421 error = copyin(uap->changelist, kq->kq_kev,
422 n * sizeof(struct kevent));
423 if (error)
424 goto done;
425 for (i = 0; i < n; i++) {
426 kevp = &kq->kq_kev[i];
427 kevp->flags &= ~EV_SYSFLAGS;
428 error = kqueue_register(kq, kevp, p);
429 if (error) {
430 if (uap->nevents != 0) {
431 kevp->flags = EV_ERROR;
432 kevp->data = error;
433 (void) copyout((caddr_t)kevp,
434 (caddr_t)uap->eventlist,
435 sizeof(*kevp));
436 uap->eventlist++;
437 uap->nevents--;
438 nerrors++;
439 } else {
440 goto done;
441 }
442 }
443 }
444 uap->nchanges -= n;
445 uap->changelist += n;
446 }
447 if (nerrors) {
448 p->p_retval[0] = nerrors;
449 error = 0;
450 goto done;
451 }
452
453 error = kqueue_scan(fp, uap->nevents, uap->eventlist, uap->timeout, p);
454done:
455 if (fp != NULL)
456 fdrop(fp, p);
457 return (error);
458}
459
460int
461kqueue_register(struct kqueue *kq, struct kevent *kev, struct proc *p)
462{
463 struct filedesc *fdp = kq->kq_fdp;
464 struct filterops *fops;
465 struct file *fp = NULL;
466 struct knote *kn = NULL;
467 int s, error = 0;
468
469 if (kev->filter < 0) {
470 if (kev->filter + EVFILT_SYSCOUNT < 0)
471 return (EINVAL);
472 fops = sysfilt_ops[~kev->filter]; /* to 0-base index */
473 } else {
474 /*
475 * XXX
476 * filter attach routine is responsible for insuring that
477 * the identifier can be attached to it.
478 */
479 printf("unknown filter: %d\n", kev->filter);
480 return (EINVAL);
481 }
482
483 if (fops->f_isfd) {
484 /* validate descriptor */
485 if ((u_int)kev->ident >= fdp->fd_nfiles ||
486 (fp = fdp->fd_ofiles[kev->ident]) == NULL)
487 return (EBADF);
488 fhold(fp);
489
490 if (kev->ident < fdp->fd_knlistsize) {
491 SLIST_FOREACH(kn, &fdp->fd_knlist[kev->ident], kn_link)
492 if (kq == kn->kn_kq &&
493 kev->filter == kn->kn_filter)
494 break;
495 }
496 } else {
497 if (fdp->fd_knhashmask != 0) {
498 struct klist *list;
499
500 list = &fdp->fd_knhash[
501 KN_HASH((u_long)kev->ident, fdp->fd_knhashmask)];
502 SLIST_FOREACH(kn, list, kn_link)
503 if (kev->ident == kn->kn_id &&
504 kq == kn->kn_kq &&
505 kev->filter == kn->kn_filter)
506 break;
507 }
508 }
509
510 if (kn == NULL && ((kev->flags & EV_ADD) == 0)) {
511 error = ENOENT;
512 goto done;
513 }
514
515 /*
516 * kn now contains the matching knote, or NULL if no match
517 */
518 if (kev->flags & EV_ADD) {
519
520 if (kn == NULL) {
521 kn = knote_alloc();
522 if (kn == NULL) {
523 error = ENOMEM;
524 goto done;
525 }
526 kn->kn_fp = fp;
527 kn->kn_kq = kq;
528 kn->kn_fop = fops;
529
530 /*
531 * apply reference count to knote structure, and
532 * do not release it at the end of this routine.
533 */
534 fp = NULL;
535
536 kn->kn_sfflags = kev->fflags;
537 kn->kn_sdata = kev->data;
538 kev->fflags = 0;
539 kev->data = 0;
540 kn->kn_kevent = *kev;
541
542 knote_attach(kn, fdp);
543 if ((error = fops->f_attach(kn)) != 0) {
544 knote_drop(kn, p);
545 goto done;
546 }
547 } else {
548 /*
549 * The user may change some filter values after the
550 * initial EV_ADD, but doing so will not reset any
551 * filter which have already been triggered.
552 */
553 kn->kn_sfflags = kev->fflags;
554 kn->kn_sdata = kev->data;
555 kn->kn_kevent.udata = kev->udata;
556 }
557
558 s = splhigh();
559 if (kn->kn_fop->f_event(kn, 0))
560 KNOTE_ACTIVATE(kn);
561 splx(s);
562
563 } else if (kev->flags & EV_DELETE) {
564 kn->kn_fop->f_detach(kn);
565 knote_drop(kn, p);
566 goto done;
567 }
568
569 if ((kev->flags & EV_DISABLE) &&
570 ((kn->kn_status & KN_DISABLED) == 0)) {
571 s = splhigh();
572 kn->kn_status |= KN_DISABLED;
573 splx(s);
574 }
575
576 if ((kev->flags & EV_ENABLE) && (kn->kn_status & KN_DISABLED)) {
577 s = splhigh();
578 kn->kn_status &= ~KN_DISABLED;
579 if ((kn->kn_status & KN_ACTIVE) &&
580 ((kn->kn_status & KN_QUEUED) == 0))
581 knote_enqueue(kn);
582 splx(s);
583 }
584
585done:
586 if (fp != NULL)
587 fdrop(fp, p);
588 return (error);
589}
590
591static int
592kqueue_scan(struct file *fp, int maxevents, struct kevent *ulistp,
593 const struct timespec *tsp, struct proc *p)
594{
595 struct kqueue *kq = (struct kqueue *)fp->f_data;
596 struct kevent *kevp;
597 struct timeval atv, rtv, ttv;
598 struct knote *kn, marker;
599 int s, count, timeout, nkev = 0, error = 0;
600
601 count = maxevents;
602 if (count == 0)
603 goto done;
604
605 if (tsp != NULL) {
606 TIMESPEC_TO_TIMEVAL(&atv, tsp);
607 if (itimerfix(&atv)) {
608 error = EINVAL;
609 goto done;
610 }
611 if (tsp->tv_sec == 0 && tsp->tv_nsec == 0)
612 timeout = -1;
613 else
614 timeout = atv.tv_sec > 24 * 60 * 60 ?
615 24 * 60 * 60 * hz : tvtohz(&atv);
616 getmicrouptime(&rtv);
617 timevaladd(&atv, &rtv);
618 } else {
619 atv.tv_sec = 0;
620 atv.tv_usec = 0;
621 timeout = 0;
622 }
623 goto start;
624
625retry:
626 if (atv.tv_sec || atv.tv_usec) {
627 getmicrouptime(&rtv);
628 if (timevalcmp(&rtv, &atv, >=))
629 goto done;
630 ttv = atv;
631 timevalsub(&ttv, &rtv);
632 timeout = ttv.tv_sec > 24 * 60 * 60 ?
633 24 * 60 * 60 * hz : tvtohz(&ttv);
634 }
635
636start:
637 kevp = kq->kq_kev;
638 s = splhigh();
639 if (kq->kq_count == 0) {
640 if (timeout < 0) {
641 error = EWOULDBLOCK;
642 } else {
643 kq->kq_state |= KQ_SLEEP;
644 error = tsleep(kq, PSOCK | PCATCH, "kqread", timeout);
645 }
646 splx(s);
647 if (error == 0)
648 goto retry;
649 /* don't restart after signals... */
650 if (error == ERESTART)
651 error = EINTR;
652 else if (error == EWOULDBLOCK)
653 error = 0;
654 goto done;
655 }
656
657 TAILQ_INSERT_TAIL(&kq->kq_head, &marker, kn_tqe);
658 while (count) {
659 kn = TAILQ_FIRST(&kq->kq_head);
660 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
661 if (kn == &marker) {
662 splx(s);
663 if (count == maxevents)
664 goto retry;
665 goto done;
666 }
667 if (kn->kn_status & KN_DISABLED) {
668 kn->kn_status &= ~KN_QUEUED;
669 kq->kq_count--;
670 continue;
671 }
672 if ((kn->kn_flags & EV_ONESHOT) == 0 &&
673 kn->kn_fop->f_event(kn, 0) == 0) {
674 kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE);
675 kq->kq_count--;
676 continue;
677 }
678 *kevp = kn->kn_kevent;
679 kevp++;
680 nkev++;
681 if (kn->kn_flags & EV_ONESHOT) {
682 kn->kn_status &= ~KN_QUEUED;
683 kq->kq_count--;
684 splx(s);
685 kn->kn_fop->f_detach(kn);
686 knote_drop(kn, p);
687 s = splhigh();
688 } else if (kn->kn_flags & EV_CLEAR) {
689 kn->kn_data = 0;
690 kn->kn_fflags = 0;
691 kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE);
692 kq->kq_count--;
693 } else {
694 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
695 }
696 count--;
697 if (nkev == KQ_NEVENTS) {
698 splx(s);
699 error = copyout((caddr_t)&kq->kq_kev, (caddr_t)ulistp,
700 sizeof(struct kevent) * nkev);
701 ulistp += nkev;
702 nkev = 0;
703 kevp = kq->kq_kev;
704 s = splhigh();
705 if (error)
706 break;
707 }
708 }
709 TAILQ_REMOVE(&kq->kq_head, &marker, kn_tqe);
710 splx(s);
711done:
712 if (nkev != 0)
713 error = copyout((caddr_t)&kq->kq_kev, (caddr_t)ulistp,
714 sizeof(struct kevent) * nkev);
715 p->p_retval[0] = maxevents - count;
716 return (error);
717}
718
719/*
720 * XXX
721 * This could be expanded to call kqueue_scan, if desired.
722 */
723/*ARGSUSED*/
724static int
725kqueue_read(struct file *fp, struct uio *uio, struct ucred *cred,
726 int flags, struct proc *p)
727{
728 return (ENXIO);
729}
730
731/*ARGSUSED*/
732static int
733kqueue_write(struct file *fp, struct uio *uio, struct ucred *cred,
734 int flags, struct proc *p)
735{
736 return (ENXIO);
737}
738
739/*ARGSUSED*/
740static int
741kqueue_ioctl(struct file *fp, u_long com, caddr_t data, struct proc *p)
742{
743 return (ENOTTY);
744}
745
746/*ARGSUSED*/
747static int
748kqueue_poll(struct file *fp, int events, struct ucred *cred, struct proc *p)
749{
750 struct kqueue *kq = (struct kqueue *)fp->f_data;
751 int revents = 0;
752 int s = splnet();
753
754 if (events & (POLLIN | POLLRDNORM)) {
755 if (kq->kq_count) {
756 revents |= events & (POLLIN | POLLRDNORM);
757 } else {
758 selrecord(p, &kq->kq_sel);
759 kq->kq_state |= KQ_SEL;
760 }
761 }
762 splx(s);
763 return (revents);
764}
765
766/*ARGSUSED*/
767static int
768kqueue_stat(struct file *fp, struct stat *st, struct proc *p)
769{
770 struct kqueue *kq = (struct kqueue *)fp->f_data;
771
772 bzero((void *)st, sizeof(*st));
773 st->st_size = kq->kq_count;
774 st->st_blksize = sizeof(struct kevent);
775 st->st_mode = S_IFIFO;
776 return (0);
777}
778
779/*ARGSUSED*/
780static int
781kqueue_close(struct file *fp, struct proc *p)
782{
783 struct kqueue *kq = (struct kqueue *)fp->f_data;
784 struct filedesc *fdp = p->p_fd;
785 struct knote **knp, *kn, *kn0;
786 int i;
787
788 for (i = 0; i < fdp->fd_knlistsize; i++) {
789 knp = &SLIST_FIRST(&fdp->fd_knlist[i]);
790 kn = *knp;
791 while (kn != NULL) {
792 kn0 = SLIST_NEXT(kn, kn_link);
793 if (kq == kn->kn_kq) {
794 kn->kn_fop->f_detach(kn);
795 fdrop(kn->kn_fp, p);
796 knote_free(kn);
797 *knp = kn0;
798 } else {
799 knp = &SLIST_NEXT(kn, kn_link);
800 }
801 kn = kn0;
802 }
803 }
804 if (fdp->fd_knhashmask != 0) {
805 for (i = 0; i < fdp->fd_knhashmask + 1; i++) {
806 knp = &SLIST_FIRST(&fdp->fd_knhash[i]);
807 kn = *knp;
808 while (kn != NULL) {
809 kn0 = SLIST_NEXT(kn, kn_link);
810 if (kq == kn->kn_kq) {
811 kn->kn_fop->f_detach(kn);
812 /* XXX non-fd release of kn->kn_ptr */
813 knote_free(kn);
814 *knp = kn0;
815 } else {
816 knp = &SLIST_NEXT(kn, kn_link);
817 }
818 kn = kn0;
819 }
820 }
821 }
822 free(kq, M_KQUEUE);
823 fp->f_data = NULL;
824
825 return (0);
826}
827
828static void
829kqueue_wakeup(struct kqueue *kq)
830{
831
832 if (kq->kq_state & KQ_SLEEP) {
833 kq->kq_state &= ~KQ_SLEEP;
834 wakeup(kq);
835 }
836 if (kq->kq_state & KQ_SEL) {
837 kq->kq_state &= ~KQ_SEL;
838 selwakeup(&kq->kq_sel);
839 }
840 KNOTE(&kq->kq_sel.si_note, 0);
841}
842
843/*
844 * walk down a list of knotes, activating them if their event has triggered.
845 */
846void
847knote(struct klist *list, long hint)
848{
849 struct knote *kn;
850
851 SLIST_FOREACH(kn, list, kn_selnext)
852 if (kn->kn_fop->f_event(kn, hint))
853 KNOTE_ACTIVATE(kn);
854}
855
856/*
857 * remove all knotes from a specified klist
858 */
859void
860knote_remove(struct proc *p, struct klist *list)
861{
862 struct knote *kn;
863
864 while ((kn = SLIST_FIRST(list)) != NULL) {
865 kn->kn_fop->f_detach(kn);
866 knote_drop(kn, p);
867 }
868}
869
870/*
871 * remove all knotes referencing a specified fd
872 */
873void
874knote_fdclose(struct proc *p, int fd)
875{
876 struct filedesc *fdp = p->p_fd;
877 struct klist *list = &fdp->fd_knlist[fd];
878
879 knote_remove(p, list);
880}
881
882static void
883knote_attach(struct knote *kn, struct filedesc *fdp)
884{
885 struct klist *list;
886 int size;
887
888 if (! kn->kn_fop->f_isfd) {
889 if (fdp->fd_knhashmask == 0)
890 fdp->fd_knhash = hashinit(KN_HASHSIZE, M_KQUEUE,
891 &fdp->fd_knhashmask);
892 list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)];
893 goto done;
894 }
895
896 if (fdp->fd_knlistsize <= kn->kn_id) {
897 size = fdp->fd_knlistsize;
898 while (size <= kn->kn_id)
899 size += KQEXTENT;
900 MALLOC(list, struct klist *,
901 size * sizeof(struct klist *), M_KQUEUE, M_WAITOK);
902 bcopy((caddr_t)fdp->fd_knlist, (caddr_t)list,
903 fdp->fd_knlistsize * sizeof(struct klist *));
904 bzero((caddr_t)list +
905 fdp->fd_knlistsize * sizeof(struct klist *),
906 (size - fdp->fd_knlistsize) * sizeof(struct klist *));
907 if (fdp->fd_knlist != NULL)
908 FREE(fdp->fd_knlist, M_KQUEUE);
909 fdp->fd_knlistsize = size;
910 fdp->fd_knlist = list;
911 }
912 list = &fdp->fd_knlist[kn->kn_id];
913done:
914 SLIST_INSERT_HEAD(list, kn, kn_link);
915 kn->kn_status = 0;
916}
917
918/*
919 * should be called at spl == 0, since we don't want to hold spl
920 * while calling fdrop and free.
921 */
922static void
923knote_drop(struct knote *kn, struct proc *p)
924{
925 struct filedesc *fdp = p->p_fd;
926 struct klist *list;
927
928 if (kn->kn_fop->f_isfd)
929 list = &fdp->fd_knlist[kn->kn_id];
930 else
931 list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)];
932
933 SLIST_REMOVE(list, kn, knote, kn_link);
934 if (kn->kn_status & KN_QUEUED)
935 knote_dequeue(kn);
936 if (kn->kn_fop->f_isfd)
937 fdrop(kn->kn_fp, p);
938 knote_free(kn);
939}
940
941
942static void
943knote_enqueue(struct knote *kn)
944{
945 struct kqueue *kq = kn->kn_kq;
946 int s = splhigh();
947
948 KASSERT((kn->kn_status & KN_QUEUED) == 0, ("knote already queued"));
949
950 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
951 kn->kn_status |= KN_QUEUED;
952 kq->kq_count++;
953 splx(s);
954 kqueue_wakeup(kq);
955}
956
957static void
958knote_dequeue(struct knote *kn)
959{
960 struct kqueue *kq = kn->kn_kq;
961 int s = splhigh();
962
963 KASSERT(kn->kn_status & KN_QUEUED, ("knote not queued"));
964
965 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
966 kn->kn_status &= ~KN_QUEUED;
967 kq->kq_count--;
968 splx(s);
969}
970
971static void
972knote_init(void)
973{
974 knote_zone = zinit("KNOTE", sizeof(struct knote), 0, 0, 1);
975}
976SYSINIT(knote, SI_SUB_PSEUDO, SI_ORDER_ANY, knote_init, NULL)
977
978static struct knote *
979knote_alloc(void)
980{
981 return ((struct knote *)zalloc(knote_zone));
982}
983
984static void
985knote_free(struct knote *kn)
986{
987 zfree(knote_zone, kn);
988}