kernel - Add description to static token initializers
[dragonfly.git] / sys / kern / kern_event.c
CommitLineData
984263bc
MD
1/*-
2 * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
6aa81998 26 * $FreeBSD: src/sys/kern/kern_event.c,v 1.2.2.10 2004/04/04 07:03:14 cperciva Exp $
08f2f1bb 27 * $DragonFly: src/sys/kern/kern_event.c,v 1.33 2007/02/03 17:05:57 corecode Exp $
984263bc
MD
28 */
29
30#include <sys/param.h>
31#include <sys/systm.h>
32#include <sys/kernel.h>
33#include <sys/proc.h>
34#include <sys/malloc.h>
35#include <sys/unistd.h>
36#include <sys/file.h>
3b564f1f 37#include <sys/lock.h>
984263bc 38#include <sys/fcntl.h>
984263bc
MD
39#include <sys/queue.h>
40#include <sys/event.h>
41#include <sys/eventvar.h>
984263bc
MD
42#include <sys/protosw.h>
43#include <sys/socket.h>
44#include <sys/socketvar.h>
45#include <sys/stat.h>
46#include <sys/sysctl.h>
47#include <sys/sysproto.h>
5b22f1a7 48#include <sys/thread.h>
984263bc 49#include <sys/uio.h>
a73855e8
MD
50#include <sys/signalvar.h>
51#include <sys/filio.h>
e5857bf7 52#include <sys/ktr.h>
684a93c4
MD
53
54#include <sys/thread2.h>
dadab5e9 55#include <sys/file2.h>
684a93c4 56#include <sys/mplock2.h>
984263bc
MD
57
58#include <vm/vm_zone.h>
59
5b22f1a7
SG
60/*
61 * Global token for kqueue subsystem
62 */
12586b82 63struct lwkt_token kq_token = LWKT_TOKEN_UP_INITIALIZER(kq_token);
5b22f1a7 64
984263bc
MD
65MALLOC_DEFINE(M_KQUEUE, "kqueue", "memory for kqueue system");
66
e5857bf7
SG
67struct kevent_copyin_args {
68 struct kevent_args *ka;
69 int pchanges;
70};
71
5bad2bc2 72static int kqueue_sleep(struct kqueue *kq, struct timespec *tsp);
a591f597 73static int kqueue_scan(struct kqueue *kq, struct kevent *kevp, int count,
5bad2bc2 74 struct knote *marker);
984263bc 75static int kqueue_read(struct file *fp, struct uio *uio,
87de5057 76 struct ucred *cred, int flags);
984263bc 77static int kqueue_write(struct file *fp, struct uio *uio,
87de5057 78 struct ucred *cred, int flags);
984263bc 79static int kqueue_ioctl(struct file *fp, u_long com, caddr_t data,
87baaf0c 80 struct ucred *cred, struct sysmsg *msg);
984263bc 81static int kqueue_kqfilter(struct file *fp, struct knote *kn);
87de5057
MD
82static int kqueue_stat(struct file *fp, struct stat *st,
83 struct ucred *cred);
84static int kqueue_close(struct file *fp);
5b22f1a7 85static void kqueue_wakeup(struct kqueue *kq);
4c91dbc9 86static int filter_attach(struct knote *kn);
4c91dbc9 87static int filter_event(struct knote *kn, long hint);
984263bc 88
d9b2033e
MD
89/*
90 * MPSAFE
91 */
984263bc 92static struct fileops kqueueops = {
b2d248cb
MD
93 .fo_read = kqueue_read,
94 .fo_write = kqueue_write,
95 .fo_ioctl = kqueue_ioctl,
b2d248cb
MD
96 .fo_kqfilter = kqueue_kqfilter,
97 .fo_stat = kqueue_stat,
98 .fo_close = kqueue_close,
99 .fo_shutdown = nofo_shutdown
984263bc
MD
100};
101
ccafe911
MD
102static void knote_attach(struct knote *kn);
103static void knote_drop(struct knote *kn);
cf9f4e88 104static void knote_detach_and_drop(struct knote *kn);
984263bc
MD
105static void knote_enqueue(struct knote *kn);
106static void knote_dequeue(struct knote *kn);
107static void knote_init(void);
108static struct knote *knote_alloc(void);
109static void knote_free(struct knote *kn);
110
111static void filt_kqdetach(struct knote *kn);
112static int filt_kqueue(struct knote *kn, long hint);
113static int filt_procattach(struct knote *kn);
114static void filt_procdetach(struct knote *kn);
115static int filt_proc(struct knote *kn, long hint);
116static int filt_fileattach(struct knote *kn);
117static void filt_timerexpire(void *knx);
118static int filt_timerattach(struct knote *kn);
119static void filt_timerdetach(struct knote *kn);
120static int filt_timer(struct knote *kn, long hint);
121
122static struct filterops file_filtops =
4c91dbc9 123 { FILTEROP_ISFD, filt_fileattach, NULL, NULL };
984263bc 124static struct filterops kqread_filtops =
4c91dbc9 125 { FILTEROP_ISFD, NULL, filt_kqdetach, filt_kqueue };
984263bc
MD
126static struct filterops proc_filtops =
127 { 0, filt_procattach, filt_procdetach, filt_proc };
128static struct filterops timer_filtops =
129 { 0, filt_timerattach, filt_timerdetach, filt_timer };
130
131static vm_zone_t knote_zone;
132static int kq_ncallouts = 0;
133static int kq_calloutmax = (4 * 1024);
134SYSCTL_INT(_kern, OID_AUTO, kq_calloutmax, CTLFLAG_RW,
135 &kq_calloutmax, 0, "Maximum number of callouts allocated for kqueue");
34e191bd
MD
136static int kq_checkloop = 1000000;
137SYSCTL_INT(_kern, OID_AUTO, kq_checkloop, CTLFLAG_RW,
138 &kq_checkloop, 0, "Maximum number of callouts allocated for kqueue");
984263bc
MD
139
140#define KNOTE_ACTIVATE(kn) do { \
141 kn->kn_status |= KN_ACTIVE; \
142 if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) \
143 knote_enqueue(kn); \
144} while(0)
145
146#define KN_HASHSIZE 64 /* XXX should be tunable */
147#define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask))
148
149extern struct filterops aio_filtops;
150extern struct filterops sig_filtops;
151
152/*
153 * Table for for all system-defined filters.
154 */
155static struct filterops *sysfilt_ops[] = {
156 &file_filtops, /* EVFILT_READ */
157 &file_filtops, /* EVFILT_WRITE */
158 &aio_filtops, /* EVFILT_AIO */
159 &file_filtops, /* EVFILT_VNODE */
160 &proc_filtops, /* EVFILT_PROC */
161 &sig_filtops, /* EVFILT_SIGNAL */
162 &timer_filtops, /* EVFILT_TIMER */
73c344d3 163 &file_filtops, /* EVFILT_EXCEPT */
984263bc
MD
164};
165
166static int
167filt_fileattach(struct knote *kn)
168{
984263bc
MD
169 return (fo_kqfilter(kn->kn_fp, kn));
170}
171
d9b2033e 172/*
5b22f1a7 173 * MPSAFE
d9b2033e 174 */
984263bc
MD
175static int
176kqueue_kqfilter(struct file *fp, struct knote *kn)
177{
178 struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data;
179
5b22f1a7 180 if (kn->kn_filter != EVFILT_READ)
b287d649 181 return (EOPNOTSUPP);
984263bc
MD
182
183 kn->kn_fop = &kqread_filtops;
5b22f1a7 184 knote_insert(&kq->kq_kqinfo.ki_note, kn);
984263bc
MD
185 return (0);
186}
187
188static void
189filt_kqdetach(struct knote *kn)
190{
191 struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data;
192
5b22f1a7 193 knote_remove(&kq->kq_kqinfo.ki_note, kn);
984263bc
MD
194}
195
196/*ARGSUSED*/
197static int
198filt_kqueue(struct knote *kn, long hint)
199{
200 struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data;
201
202 kn->kn_data = kq->kq_count;
203 return (kn->kn_data > 0);
204}
205
206static int
207filt_procattach(struct knote *kn)
208{
209 struct proc *p;
210 int immediate;
211
212 immediate = 0;
6f9db615 213 lwkt_gettoken(&proc_token);
984263bc
MD
214 p = pfind(kn->kn_id);
215 if (p == NULL && (kn->kn_sfflags & NOTE_EXIT)) {
216 p = zpfind(kn->kn_id);
217 immediate = 1;
218 }
6f9db615
MD
219 if (p == NULL) {
220 lwkt_reltoken(&proc_token);
984263bc 221 return (ESRCH);
6f9db615
MD
222 }
223 if (!PRISON_CHECK(curthread->td_ucred, p->p_ucred)) {
224 lwkt_reltoken(&proc_token);
984263bc 225 return (EACCES);
6f9db615 226 }
984263bc
MD
227
228 kn->kn_ptr.p_proc = p;
229 kn->kn_flags |= EV_CLEAR; /* automatically set */
230
231 /*
232 * internal flag indicating registration done by kernel
233 */
234 if (kn->kn_flags & EV_FLAG1) {
235 kn->kn_data = kn->kn_sdata; /* ppid */
236 kn->kn_fflags = NOTE_CHILD;
237 kn->kn_flags &= ~EV_FLAG1;
238 }
239
5b22f1a7 240 knote_insert(&p->p_klist, kn);
984263bc
MD
241
242 /*
243 * Immediately activate any exit notes if the target process is a
244 * zombie. This is necessary to handle the case where the target
fe24d605 245 * process, e.g. a child, dies before the kevent is negistered.
984263bc
MD
246 */
247 if (immediate && filt_proc(kn, NOTE_EXIT))
248 KNOTE_ACTIVATE(kn);
6f9db615 249 lwkt_reltoken(&proc_token);
984263bc
MD
250
251 return (0);
252}
253
254/*
255 * The knote may be attached to a different process, which may exit,
256 * leaving nothing for the knote to be attached to. So when the process
257 * exits, the knote is marked as DETACHED and also flagged as ONESHOT so
258 * it will be deleted when read out. However, as part of the knote deletion,
259 * this routine is called, so a check is needed to avoid actually performing
260 * a detach, because the original process does not exist any more.
261 */
262static void
263filt_procdetach(struct knote *kn)
264{
8fb57988 265 struct proc *p;
984263bc
MD
266
267 if (kn->kn_status & KN_DETACHED)
268 return;
5b22f1a7 269 /* XXX locking? take proc_token here? */
8fb57988 270 p = kn->kn_ptr.p_proc;
5b22f1a7 271 knote_remove(&p->p_klist, kn);
984263bc
MD
272}
273
274static int
275filt_proc(struct knote *kn, long hint)
276{
277 u_int event;
278
279 /*
280 * mask off extra data
281 */
282 event = (u_int)hint & NOTE_PCTRLMASK;
283
284 /*
285 * if the user is interested in this event, record it.
286 */
287 if (kn->kn_sfflags & event)
288 kn->kn_fflags |= event;
289
290 /*
8fb57988
MD
291 * Process is gone, so flag the event as finished. Detach the
292 * knote from the process now because the process will be poof,
293 * gone later on.
984263bc
MD
294 */
295 if (event == NOTE_EXIT) {
8fb57988
MD
296 struct proc *p = kn->kn_ptr.p_proc;
297 if ((kn->kn_status & KN_DETACHED) == 0) {
5b22f1a7 298 knote_remove(&p->p_klist, kn);
8fb57988
MD
299 kn->kn_status |= KN_DETACHED;
300 kn->kn_data = p->p_xstat;
301 kn->kn_ptr.p_proc = NULL;
302 }
984263bc
MD
303 kn->kn_flags |= (EV_EOF | EV_ONESHOT);
304 return (1);
305 }
306
307 /*
308 * process forked, and user wants to track the new process,
309 * so attach a new knote to it, and immediately report an
310 * event with the parent's pid.
311 */
312 if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) {
313 struct kevent kev;
314 int error;
315
316 /*
317 * register knote with new process.
318 */
319 kev.ident = hint & NOTE_PDATAMASK; /* pid */
320 kev.filter = kn->kn_filter;
321 kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1;
322 kev.fflags = kn->kn_sfflags;
323 kev.data = kn->kn_id; /* parent */
324 kev.udata = kn->kn_kevent.udata; /* preserve udata */
ccafe911 325 error = kqueue_register(kn->kn_kq, &kev);
984263bc
MD
326 if (error)
327 kn->kn_fflags |= NOTE_TRACKERR;
328 }
329
330 return (kn->kn_fflags != 0);
331}
332
333static void
334filt_timerexpire(void *knx)
335{
336 struct knote *kn = knx;
337 struct callout *calloutp;
338 struct timeval tv;
339 int tticks;
340
341 kn->kn_data++;
342 KNOTE_ACTIVATE(kn);
343
344 if ((kn->kn_flags & EV_ONESHOT) == 0) {
345 tv.tv_sec = kn->kn_sdata / 1000;
346 tv.tv_usec = (kn->kn_sdata % 1000) * 1000;
a94976ad 347 tticks = tvtohz_high(&tv);
984263bc
MD
348 calloutp = (struct callout *)kn->kn_hook;
349 callout_reset(calloutp, tticks, filt_timerexpire, kn);
350 }
351}
352
353/*
354 * data contains amount of time to sleep, in milliseconds
355 */
356static int
357filt_timerattach(struct knote *kn)
358{
359 struct callout *calloutp;
360 struct timeval tv;
361 int tticks;
362
363 if (kq_ncallouts >= kq_calloutmax)
364 return (ENOMEM);
365 kq_ncallouts++;
366
367 tv.tv_sec = kn->kn_sdata / 1000;
368 tv.tv_usec = (kn->kn_sdata % 1000) * 1000;
a94976ad 369 tticks = tvtohz_high(&tv);
984263bc
MD
370
371 kn->kn_flags |= EV_CLEAR; /* automatically set */
372 MALLOC(calloutp, struct callout *, sizeof(*calloutp),
373 M_KQUEUE, M_WAITOK);
374 callout_init(calloutp);
984263bc 375 kn->kn_hook = (caddr_t)calloutp;
6aa81998 376 callout_reset(calloutp, tticks, filt_timerexpire, kn);
984263bc
MD
377
378 return (0);
379}
380
381static void
382filt_timerdetach(struct knote *kn)
383{
384 struct callout *calloutp;
385
386 calloutp = (struct callout *)kn->kn_hook;
387 callout_stop(calloutp);
388 FREE(calloutp, M_KQUEUE);
389 kq_ncallouts--;
390}
391
392static int
393filt_timer(struct knote *kn, long hint)
394{
395
396 return (kn->kn_data != 0);
397}
398
3919ced0 399/*
ccafe911
MD
400 * Initialize a kqueue.
401 *
402 * NOTE: The lwp/proc code initializes a kqueue for select/poll ops.
403 *
404 * MPSAFE
405 */
406void
407kqueue_init(struct kqueue *kq, struct filedesc *fdp)
408{
409 TAILQ_INIT(&kq->kq_knpend);
410 TAILQ_INIT(&kq->kq_knlist);
ac62ea3c 411 kq->kq_count = 0;
ccafe911 412 kq->kq_fdp = fdp;
5b22f1a7 413 SLIST_INIT(&kq->kq_kqinfo.ki_note);
ccafe911
MD
414}
415
416/*
417 * Terminate a kqueue. Freeing the actual kq itself is left up to the
418 * caller (it might be embedded in a lwp so we don't do it here).
419 */
420void
421kqueue_terminate(struct kqueue *kq)
422{
423 struct knote *kn;
ccafe911 424
853fe8da 425 lwkt_gettoken(&kq_token);
cf9f4e88
MD
426 while ((kn = TAILQ_FIRST(&kq->kq_knlist)) != NULL)
427 knote_detach_and_drop(kn);
ccafe911
MD
428
429 if (kq->kq_knhash) {
430 kfree(kq->kq_knhash, M_KQUEUE);
431 kq->kq_knhash = NULL;
432 kq->kq_knhashmask = 0;
433 }
853fe8da 434 lwkt_reltoken(&kq_token);
ccafe911
MD
435}
436
437/*
3919ced0
MD
438 * MPSAFE
439 */
984263bc 440int
753fd850 441sys_kqueue(struct kqueue_args *uap)
984263bc 442{
f3a2d8c4 443 struct thread *td = curthread;
984263bc
MD
444 struct kqueue *kq;
445 struct file *fp;
446 int fd, error;
447
f3a2d8c4 448 error = falloc(td->td_lwp, &fp, &fd);
984263bc
MD
449 if (error)
450 return (error);
451 fp->f_flag = FREAD | FWRITE;
452 fp->f_type = DTYPE_KQUEUE;
453 fp->f_ops = &kqueueops;
d9b2033e 454
efda3bd0 455 kq = kmalloc(sizeof(struct kqueue), M_KQUEUE, M_WAITOK | M_ZERO);
ccafe911 456 kqueue_init(kq, td->td_proc->p_fd);
fbb4eeab 457 fp->f_data = kq;
d9b2033e 458
f3a2d8c4 459 fsetfd(kq->kq_fdp, fp, fd);
c7114eea 460 uap->sysmsg_result = fd;
9f87144f 461 fdrop(fp);
984263bc
MD
462 return (error);
463}
464
3919ced0 465/*
8ba5f7ef
AH
466 * Copy 'count' items into the destination list pointed to by uap->eventlist.
467 */
468static int
e5857bf7 469kevent_copyout(void *arg, struct kevent *kevp, int count, int *res)
8ba5f7ef 470{
e5857bf7 471 struct kevent_copyin_args *kap;
8ba5f7ef
AH
472 int error;
473
e5857bf7
SG
474 kap = (struct kevent_copyin_args *)arg;
475
8acdf1cf 476 error = copyout(kevp, kap->ka->eventlist, count * sizeof(*kevp));
e5857bf7
SG
477 if (error == 0) {
478 kap->ka->eventlist += count;
479 *res += count;
480 } else {
481 *res = -1;
482 }
8ba5f7ef 483
8ba5f7ef
AH
484 return (error);
485}
486
487/*
e5857bf7
SG
488 * Copy at most 'max' items from the list pointed to by kap->changelist,
489 * return number of items in 'events'.
8ba5f7ef
AH
490 */
491static int
e5857bf7 492kevent_copyin(void *arg, struct kevent *kevp, int max, int *events)
8ba5f7ef 493{
e5857bf7
SG
494 struct kevent_copyin_args *kap;
495 int error, count;
8ba5f7ef 496
e5857bf7
SG
497 kap = (struct kevent_copyin_args *)arg;
498
499 count = min(kap->ka->nchanges - kap->pchanges, max);
500 error = copyin(kap->ka->changelist, kevp, count * sizeof *kevp);
501 if (error == 0) {
502 kap->ka->changelist += count;
503 kap->pchanges += count;
504 *events = count;
505 }
8ba5f7ef 506
8ba5f7ef
AH
507 return (error);
508}
509
510/*
5b22f1a7 511 * MPSAFE
3919ced0 512 */
984263bc 513int
e5857bf7 514kern_kevent(struct kqueue *kq, int nevents, int *res, void *uap,
8acdf1cf
MD
515 k_copyin_fn kevent_copyinfn, k_copyout_fn kevent_copyoutfn,
516 struct timespec *tsp_in)
984263bc 517{
984263bc 518 struct kevent *kevp;
a591f597 519 struct timespec *tsp;
e5857bf7 520 int i, n, total, error, nerrors = 0;
fe24d605 521 int lres;
34e191bd 522 int limit = kq_checkloop;
10f6680a 523 struct kevent kev[KQ_NEVENTS];
5bad2bc2 524 struct knote marker;
10f6680a 525
8ba5f7ef 526 tsp = tsp_in;
e5857bf7 527 *res = 0;
984263bc 528
5b22f1a7 529 lwkt_gettoken(&kq_token);
e5857bf7
SG
530 for ( ;; ) {
531 n = 0;
532 error = kevent_copyinfn(uap, kev, KQ_NEVENTS, &n);
984263bc
MD
533 if (error)
534 goto done;
e5857bf7
SG
535 if (n == 0)
536 break;
984263bc 537 for (i = 0; i < n; i++) {
10f6680a 538 kevp = &kev[i];
984263bc 539 kevp->flags &= ~EV_SYSFLAGS;
ccafe911 540 error = kqueue_register(kq, kevp);
fe24d605
MD
541
542 /*
543 * If a registration returns an error we
544 * immediately post the error. The kevent()
545 * call itself will fail with the error if
546 * no space is available for posting.
547 *
548 * Such errors normally bypass the timeout/blocking
549 * code. However, if the copyoutfn function refuses
550 * to post the error (see sys_poll()), then we
551 * ignore it too.
552 */
984263bc 553 if (error) {
3c2a46a7
SG
554 kevp->flags = EV_ERROR;
555 kevp->data = error;
556 lres = *res;
557 kevent_copyoutfn(uap, kevp, 1, res);
558 if (lres != *res) {
559 nevents--;
560 nerrors++;
984263bc
MD
561 }
562 }
563 }
984263bc
MD
564 }
565 if (nerrors) {
984263bc
MD
566 error = 0;
567 goto done;
568 }
569
a591f597
MD
570 /*
571 * Acquire/wait for events - setup timeout
572 */
573 if (tsp != NULL) {
574 struct timespec ats;
575
576 if (tsp->tv_sec || tsp->tv_nsec) {
577 nanouptime(&ats);
578 timespecadd(tsp, &ats); /* tsp = target time */
579 }
580 }
581
582 /*
583 * Loop as required.
584 *
5bad2bc2
SG
585 * Collect as many events as we can. Sleeping on successive
586 * loops is disabled if copyoutfn has incremented (*res).
8acdf1cf 587 *
679058fb 588 * The loop stops if an error occurs, all events have been
5bad2bc2
SG
589 * scanned (the marker has been reached), or fewer than the
590 * maximum number of events is found.
679058fb
MD
591 *
592 * The copyoutfn function does not have to increment (*res) in
593 * order for the loop to continue.
594 *
595 * NOTE: doselect() usually passes 0x7FFFFFFF for nevents.
a591f597
MD
596 */
597 total = 0;
598 error = 0;
5bad2bc2 599 marker.kn_filter = EVFILT_MARKER;
5bad2bc2 600 TAILQ_INSERT_TAIL(&kq->kq_knpend, &marker, kn_tqe);
8ba5f7ef 601 while ((n = nevents - total) > 0) {
a591f597
MD
602 if (n > KQ_NEVENTS)
603 n = KQ_NEVENTS;
5bad2bc2 604
62405ecc
MD
605 /*
606 * If no events are pending sleep until timeout (if any)
607 * or an event occurs.
608 *
609 * After the sleep completes the marker is moved to the
610 * end of the list, making any received events available
611 * to our scan.
612 */
5bad2bc2
SG
613 if (kq->kq_count == 0 && *res == 0) {
614 error = kqueue_sleep(kq, tsp);
5bad2bc2
SG
615 if (error)
616 break;
5b22f1a7 617
5bad2bc2
SG
618 TAILQ_REMOVE(&kq->kq_knpend, &marker, kn_tqe);
619 TAILQ_INSERT_TAIL(&kq->kq_knpend, &marker, kn_tqe);
5bad2bc2
SG
620 }
621
62405ecc
MD
622 /*
623 * Process all received events
21ae0f4c 624 * Account for all non-spurious events in our total
62405ecc 625 */
5bad2bc2
SG
626 i = kqueue_scan(kq, kev, n, &marker);
627 if (i) {
21ae0f4c 628 lres = *res;
5bad2bc2 629 error = kevent_copyoutfn(uap, kev, i, res);
21ae0f4c 630 total += *res - lres;
5bad2bc2
SG
631 if (error)
632 break;
633 }
34e191bd
MD
634 if (limit && --limit == 0)
635 panic("kqueue: checkloop failed i=%d", i);
679058fb
MD
636
637 /*
638 * Normally when fewer events are returned than requested
639 * we can stop. However, if only spurious events were
640 * collected the copyout will not bump (*res) and we have
641 * to continue.
642 */
643 if (i < n && *res)
a591f597 644 break;
62405ecc
MD
645
646 /*
647 * Deal with an edge case where spurious events can cause
648 * a loop to occur without moving the marker. This can
649 * prevent kqueue_scan() from picking up new events which
650 * race us. We must be sure to move the marker for this
651 * case.
652 *
653 * NOTE: We do not want to move the marker if events
654 * were scanned because normal kqueue operations
655 * may reactivate events. Moving the marker in
656 * that case could result in duplicates for the
657 * same event.
658 */
659 if (i == 0) {
62405ecc
MD
660 TAILQ_REMOVE(&kq->kq_knpend, &marker, kn_tqe);
661 TAILQ_INSERT_TAIL(&kq->kq_knpend, &marker, kn_tqe);
62405ecc 662 }
a591f597 663 }
5bad2bc2 664 TAILQ_REMOVE(&kq->kq_knpend, &marker, kn_tqe);
e5857bf7 665
5bad2bc2 666 /* Timeouts do not return EWOULDBLOCK. */
679058fb
MD
667 if (error == EWOULDBLOCK)
668 error = 0;
5bad2bc2
SG
669
670done:
5b22f1a7 671 lwkt_reltoken(&kq_token);
984263bc
MD
672 return (error);
673}
674
8ba5f7ef
AH
675/*
676 * MPALMOSTSAFE
677 */
678int
679sys_kevent(struct kevent_args *uap)
680{
e5857bf7
SG
681 struct thread *td = curthread;
682 struct proc *p = td->td_proc;
8ba5f7ef 683 struct timespec ts, *tsp;
e5857bf7
SG
684 struct kqueue *kq;
685 struct file *fp = NULL;
686 struct kevent_copyin_args *kap, ka;
8ba5f7ef
AH
687 int error;
688
689 if (uap->timeout) {
690 error = copyin(uap->timeout, &ts, sizeof(ts));
691 if (error)
692 return (error);
693 tsp = &ts;
694 } else {
695 tsp = NULL;
696 }
697
e5857bf7
SG
698 fp = holdfp(p->p_fd, uap->fd, -1);
699 if (fp == NULL)
700 return (EBADF);
701 if (fp->f_type != DTYPE_KQUEUE) {
702 fdrop(fp);
703 return (EBADF);
704 }
705
706 kq = (struct kqueue *)fp->f_data;
707
708 kap = &ka;
709 kap->ka = uap;
710 kap->pchanges = 0;
711
712 error = kern_kevent(kq, uap->nevents, &uap->sysmsg_result, kap,
8acdf1cf 713 kevent_copyin, kevent_copyout, tsp);
e5857bf7
SG
714
715 fdrop(fp);
8ba5f7ef
AH
716
717 return (error);
718}
719
984263bc 720int
ccafe911 721kqueue_register(struct kqueue *kq, struct kevent *kev)
984263bc
MD
722{
723 struct filedesc *fdp = kq->kq_fdp;
724 struct filterops *fops;
725 struct file *fp = NULL;
726 struct knote *kn = NULL;
e43a034f 727 int error = 0;
984263bc
MD
728
729 if (kev->filter < 0) {
730 if (kev->filter + EVFILT_SYSCOUNT < 0)
731 return (EINVAL);
732 fops = sysfilt_ops[~kev->filter]; /* to 0-base index */
733 } else {
734 /*
735 * XXX
736 * filter attach routine is responsible for insuring that
737 * the identifier can be attached to it.
738 */
6ea70f76 739 kprintf("unknown filter: %d\n", kev->filter);
984263bc
MD
740 return (EINVAL);
741 }
742
853fe8da 743 lwkt_gettoken(&kq_token);
4c91dbc9 744 if (fops->f_flags & FILTEROP_ISFD) {
984263bc 745 /* validate descriptor */
228b401d 746 fp = holdfp(fdp, kev->ident, -1);
853fe8da
MD
747 if (fp == NULL) {
748 lwkt_reltoken(&kq_token);
984263bc 749 return (EBADF);
853fe8da 750 }
984263bc 751
ccafe911
MD
752 SLIST_FOREACH(kn, &fp->f_klist, kn_link) {
753 if (kn->kn_kq == kq &&
754 kn->kn_filter == kev->filter &&
755 kn->kn_id == kev->ident) {
756 break;
757 }
984263bc
MD
758 }
759 } else {
ccafe911 760 if (kq->kq_knhashmask) {
984263bc
MD
761 struct klist *list;
762
ccafe911
MD
763 list = &kq->kq_knhash[
764 KN_HASH((u_long)kev->ident, kq->kq_knhashmask)];
765 SLIST_FOREACH(kn, list, kn_link) {
766 if (kn->kn_id == kev->ident &&
767 kn->kn_filter == kev->filter)
984263bc 768 break;
ccafe911 769 }
984263bc
MD
770 }
771 }
772
773 if (kn == NULL && ((kev->flags & EV_ADD) == 0)) {
774 error = ENOENT;
775 goto done;
776 }
777
778 /*
779 * kn now contains the matching knote, or NULL if no match
780 */
781 if (kev->flags & EV_ADD) {
984263bc
MD
782 if (kn == NULL) {
783 kn = knote_alloc();
784 if (kn == NULL) {
785 error = ENOMEM;
786 goto done;
787 }
788 kn->kn_fp = fp;
789 kn->kn_kq = kq;
790 kn->kn_fop = fops;
791
792 /*
793 * apply reference count to knote structure, and
794 * do not release it at the end of this routine.
795 */
796 fp = NULL;
797
798 kn->kn_sfflags = kev->fflags;
799 kn->kn_sdata = kev->data;
800 kev->fflags = 0;
801 kev->data = 0;
802 kn->kn_kevent = *kev;
803
cf9f4e88
MD
804 /*
805 * Interlock against creation/deletion races due
806 * to f_attach() blocking. knote_attach() will set
807 * KN_CREATING.
808 */
ccafe911 809 knote_attach(kn);
4c91dbc9 810 if ((error = filter_attach(kn)) != 0) {
cf9f4e88 811 kn->kn_status |= KN_DELETING;
ccafe911 812 knote_drop(kn);
984263bc
MD
813 goto done;
814 }
cf9f4e88
MD
815 kn->kn_status &= ~KN_CREATING;
816
817 /*
818 * Interlock against close races which remove our
819 * knotes. We do not want to end up with a knote
820 * on a closed descriptor.
821 */
822 if ((fops->f_flags & FILTEROP_ISFD) &&
823 (error = checkfdclosed(fdp, kev->ident, kn->kn_fp)) != 0) {
824 knote_detach_and_drop(kn);
825 goto done;
826 }
984263bc
MD
827 } else {
828 /*
829 * The user may change some filter values after the
830 * initial EV_ADD, but doing so will not reset any
831 * filter which have already been triggered.
832 */
833 kn->kn_sfflags = kev->fflags;
834 kn->kn_sdata = kev->data;
835 kn->kn_kevent.udata = kev->udata;
836 }
837
4c91dbc9 838 if (filter_event(kn, 0))
984263bc 839 KNOTE_ACTIVATE(kn);
984263bc 840 } else if (kev->flags & EV_DELETE) {
cf9f4e88 841 knote_detach_and_drop(kn);
984263bc
MD
842 goto done;
843 }
844
845 if ((kev->flags & EV_DISABLE) &&
846 ((kn->kn_status & KN_DISABLED) == 0)) {
984263bc 847 kn->kn_status |= KN_DISABLED;
984263bc
MD
848 }
849
850 if ((kev->flags & EV_ENABLE) && (kn->kn_status & KN_DISABLED)) {
984263bc
MD
851 kn->kn_status &= ~KN_DISABLED;
852 if ((kn->kn_status & KN_ACTIVE) &&
853 ((kn->kn_status & KN_QUEUED) == 0))
854 knote_enqueue(kn);
984263bc
MD
855 }
856
857done:
853fe8da 858 lwkt_reltoken(&kq_token);
984263bc 859 if (fp != NULL)
9f87144f 860 fdrop(fp);
984263bc
MD
861 return (error);
862}
863
a591f597 864/*
5bad2bc2 865 * Block as necessary until the target time is reached.
a591f597
MD
866 * If tsp is NULL we block indefinitely. If tsp->ts_secs/nsecs are both
867 * 0 we do not block at all.
868 */
984263bc 869static int
5bad2bc2 870kqueue_sleep(struct kqueue *kq, struct timespec *tsp)
984263bc 871{
5bad2bc2 872 int error = 0;
984263bc 873
5bad2bc2
SG
874 if (tsp == NULL) {
875 kq->kq_state |= KQ_SLEEP;
876 error = tsleep(kq, PCATCH, "kqread", 0);
877 } else if (tsp->tv_sec == 0 && tsp->tv_nsec == 0) {
878 error = EWOULDBLOCK;
879 } else {
880 struct timespec ats;
881 struct timespec atx = *tsp;
882 int timeout;
a591f597 883
5bad2bc2
SG
884 nanouptime(&ats);
885 timespecsub(&atx, &ats);
886 if (ats.tv_sec < 0) {
887 error = EWOULDBLOCK;
888 } else {
889 timeout = atx.tv_sec > 24 * 60 * 60 ?
890 24 * 60 * 60 * hz : tstohz_high(&atx);
891 kq->kq_state |= KQ_SLEEP;
892 error = tsleep(kq, PCATCH, "kqread", timeout);
984263bc 893 }
984263bc 894 }
5bad2bc2
SG
895
896 /* don't restart after signals... */
897 if (error == ERESTART)
898 return (EINTR);
899
900 return (error);
901}
902
903/*
904 * Scan the kqueue, return the number of active events placed in kevp up
905 * to count.
906 *
907 * Continuous mode events may get recycled, do not continue scanning past
908 * marker unless no events have been collected.
909 */
910static int
911kqueue_scan(struct kqueue *kq, struct kevent *kevp, int count,
912 struct knote *marker)
913{
6e32cd64 914 struct knote *kn, local_marker;
5bad2bc2
SG
915 int total;
916
917 total = 0;
6e32cd64 918 local_marker.kn_filter = EVFILT_MARKER;
984263bc 919
a591f597 920 /*
5bad2bc2 921 * Collect events.
a591f597 922 */
6e32cd64 923 TAILQ_INSERT_HEAD(&kq->kq_knpend, &local_marker, kn_tqe);
984263bc 924 while (count) {
6e32cd64
SG
925 kn = TAILQ_NEXT(&local_marker, kn_tqe);
926 if (kn->kn_filter == EVFILT_MARKER) {
927 /* Marker reached, we are done */
928 if (kn == marker)
929 break;
930
931 /* Move local marker past some other threads marker */
932 kn = TAILQ_NEXT(kn, kn_tqe);
933 TAILQ_REMOVE(&kq->kq_knpend, &local_marker, kn_tqe);
934 TAILQ_INSERT_BEFORE(kn, &local_marker, kn_tqe);
935 continue;
936 }
5bad2bc2 937
853fe8da
MD
938 /*
939 * Remove the event for processing.
940 *
941 * WARNING! We must leave KN_QUEUED set to prevent the
942 * event from being KNOTE()d again while we
943 * potentially block in the filter function.
4fe3185e
MD
944 *
945 * This protects the knote from everything except
946 * getting dropped.
947 *
948 * WARNING! KN_PROCESSING is meant to handle any cases
949 * that leaving KN_QUEUED set does not.
853fe8da 950 */
a74548c7 951 TAILQ_REMOVE(&kq->kq_knpend, kn, kn_tqe);
4c91dbc9 952 kq->kq_count--;
4fe3185e 953 kn->kn_status |= KN_PROCESSING;
cf9f4e88
MD
954
955 /*
956 * Even though close/dup2 will clean out pending knotes this
957 * code is MPSAFE and it is possible to race a close inbetween
958 * the removal of its descriptor and the clearing out of the
959 * knote(s).
960 *
961 * In this case we must ensure that the knote is not queued
962 * to knpend or we risk an infinite kernel loop calling
963 * kscan, because the select/poll code will not be able to
964 * delete the event.
965 */
966 if ((kn->kn_fop->f_flags & FILTEROP_ISFD) &&
967 checkfdclosed(kq->kq_fdp, kn->kn_kevent.ident, kn->kn_fp)) {
4fe3185e
MD
968 kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE |
969 KN_PROCESSING);
984263bc
MD
970 continue;
971 }
cf9f4e88
MD
972
973 /*
974 * If disabled we ensure the event is not queued but leave
975 * its active bit set. On re-enablement the event may be
976 * immediately triggered.
977 */
853fe8da 978 if (kn->kn_status & KN_DISABLED) {
4fe3185e 979 kn->kn_status &= ~(KN_QUEUED | KN_PROCESSING);
cf9f4e88 980 continue;
853fe8da 981 }
cf9f4e88
MD
982
983 /*
984 * If not running in one-shot mode and the event is no
985 * longer present we ensure it is removed from the queue and
986 * ignore it.
987 */
984263bc 988 if ((kn->kn_flags & EV_ONESHOT) == 0 &&
4c91dbc9 989 filter_event(kn, 0) == 0) {
4fe3185e
MD
990 kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE |
991 KN_PROCESSING);
984263bc
MD
992 continue;
993 }
cf9f4e88 994
a591f597
MD
995 *kevp++ = kn->kn_kevent;
996 ++total;
997 --count;
998
999 /*
1000 * Post-event action on the note
1001 */
984263bc 1002 if (kn->kn_flags & EV_ONESHOT) {
4fe3185e 1003 kn->kn_status &= ~(KN_QUEUED | KN_PROCESSING);
cf9f4e88 1004 knote_detach_and_drop(kn);
984263bc
MD
1005 } else if (kn->kn_flags & EV_CLEAR) {
1006 kn->kn_data = 0;
1007 kn->kn_fflags = 0;
4fe3185e
MD
1008 kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE |
1009 KN_PROCESSING);
984263bc 1010 } else {
ccafe911 1011 TAILQ_INSERT_TAIL(&kq->kq_knpend, kn, kn_tqe);
4c91dbc9 1012 kq->kq_count++;
4fe3185e 1013 kn->kn_status &= ~KN_PROCESSING;
984263bc 1014 }
984263bc 1015 }
6e32cd64 1016 TAILQ_REMOVE(&kq->kq_knpend, &local_marker, kn_tqe);
5bad2bc2 1017
a591f597 1018 return (total);
984263bc
MD
1019}
1020
1021/*
1022 * XXX
1023 * This could be expanded to call kqueue_scan, if desired.
d9b2033e
MD
1024 *
1025 * MPSAFE
984263bc 1026 */
984263bc 1027static int
87de5057 1028kqueue_read(struct file *fp, struct uio *uio, struct ucred *cred, int flags)
984263bc
MD
1029{
1030 return (ENXIO);
1031}
1032
d9b2033e
MD
1033/*
1034 * MPSAFE
1035 */
984263bc 1036static int
87de5057 1037kqueue_write(struct file *fp, struct uio *uio, struct ucred *cred, int flags)
984263bc
MD
1038{
1039 return (ENXIO);
1040}
1041
d9b2033e 1042/*
a74548c7 1043 * MPALMOSTSAFE
d9b2033e 1044 */
984263bc 1045static int
87baaf0c
MD
1046kqueue_ioctl(struct file *fp, u_long com, caddr_t data,
1047 struct ucred *cred, struct sysmsg *msg)
984263bc 1048{
a73855e8
MD
1049 struct kqueue *kq;
1050 int error;
1051
5b22f1a7 1052 lwkt_gettoken(&kq_token);
a73855e8
MD
1053 kq = (struct kqueue *)fp->f_data;
1054
1055 switch(com) {
1056 case FIOASYNC:
1057 if (*(int *)data)
1058 kq->kq_state |= KQ_ASYNC;
1059 else
1060 kq->kq_state &= ~KQ_ASYNC;
1061 error = 0;
1062 break;
1063 case FIOSETOWN:
1064 error = fsetown(*(int *)data, &kq->kq_sigio);
1065 break;
1066 default:
1067 error = ENOTTY;
1068 break;
1069 }
5b22f1a7 1070 lwkt_reltoken(&kq_token);
a73855e8 1071 return (error);
984263bc
MD
1072}
1073
d9b2033e 1074/*
d9b2033e
MD
1075 * MPSAFE
1076 */
984263bc 1077static int
87de5057 1078kqueue_stat(struct file *fp, struct stat *st, struct ucred *cred)
984263bc
MD
1079{
1080 struct kqueue *kq = (struct kqueue *)fp->f_data;
1081
1082 bzero((void *)st, sizeof(*st));
1083 st->st_size = kq->kq_count;
1084 st->st_blksize = sizeof(struct kevent);
1085 st->st_mode = S_IFIFO;
1086 return (0);
1087}
1088
d9b2033e 1089/*
5b22f1a7 1090 * MPSAFE
d9b2033e 1091 */
984263bc 1092static int
87de5057 1093kqueue_close(struct file *fp)
984263bc
MD
1094{
1095 struct kqueue *kq = (struct kqueue *)fp->f_data;
984263bc 1096
ccafe911
MD
1097 kqueue_terminate(kq);
1098
984263bc 1099 fp->f_data = NULL;
a73855e8 1100 funsetown(kq->kq_sigio);
984263bc 1101
efda3bd0 1102 kfree(kq, M_KQUEUE);
984263bc
MD
1103 return (0);
1104}
1105
5b22f1a7 1106static void
984263bc
MD
1107kqueue_wakeup(struct kqueue *kq)
1108{
984263bc
MD
1109 if (kq->kq_state & KQ_SLEEP) {
1110 kq->kq_state &= ~KQ_SLEEP;
1111 wakeup(kq);
1112 }
5b22f1a7 1113 KNOTE(&kq->kq_kqinfo.ki_note, 0);
984263bc
MD
1114}
1115
1116/*
4c91dbc9
SG
1117 * Calls filterops f_attach function, acquiring mplock if filter is not
1118 * marked as FILTEROP_MPSAFE.
1119 */
1120static int
1121filter_attach(struct knote *kn)
1122{
1123 int ret;
1124
1125 if (!(kn->kn_fop->f_flags & FILTEROP_MPSAFE)) {
1126 get_mplock();
1127 ret = kn->kn_fop->f_attach(kn);
1128 rel_mplock();
1129 } else {
1130 ret = kn->kn_fop->f_attach(kn);
1131 }
1132
1133 return (ret);
1134}
1135
1136/*
cf9f4e88
MD
1137 * Detach the knote and drop it, destroying the knote.
1138 *
4c91dbc9
SG
1139 * Calls filterops f_detach function, acquiring mplock if filter is not
1140 * marked as FILTEROP_MPSAFE.
cf9f4e88
MD
1141 *
1142 * This can race due to the MP lock and/or locks acquired by f_detach,
1143 * so we interlock with KN_DELETING. It is also possible to race
1144 * a create for the same reason if userland tries to delete the knote
1145 * before the create is complete.
4c91dbc9
SG
1146 */
1147static void
cf9f4e88 1148knote_detach_and_drop(struct knote *kn)
4c91dbc9 1149{
cf9f4e88
MD
1150 if (kn->kn_status & (KN_CREATING | KN_DELETING))
1151 return;
1152 kn->kn_status |= KN_DELETING;
1153
1154 if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) {
4c91dbc9 1155 kn->kn_fop->f_detach(kn);
4c91dbc9 1156 } else {
cf9f4e88 1157 get_mplock();
4c91dbc9 1158 kn->kn_fop->f_detach(kn);
cf9f4e88 1159 rel_mplock();
4c91dbc9 1160 }
cf9f4e88 1161 knote_drop(kn);
4c91dbc9
SG
1162}
1163
1164/*
1165 * Calls filterops f_event function, acquiring mplock if filter is not
1166 * marked as FILTEROP_MPSAFE.
cf9f4e88
MD
1167 *
1168 * If the knote is in the middle of being created or deleted we cannot
1169 * safely call the filter op.
4c91dbc9
SG
1170 */
1171static int
1172filter_event(struct knote *kn, long hint)
1173{
1174 int ret;
1175
cf9f4e88
MD
1176 if (kn->kn_status & (KN_CREATING | KN_DELETING))
1177 return(0);
1178
4c91dbc9
SG
1179 if (!(kn->kn_fop->f_flags & FILTEROP_MPSAFE)) {
1180 get_mplock();
1181 ret = kn->kn_fop->f_event(kn, hint);
1182 rel_mplock();
1183 } else {
1184 ret = kn->kn_fop->f_event(kn, hint);
1185 }
1186
1187 return (ret);
1188}
1189
1190/*
984263bc
MD
1191 * walk down a list of knotes, activating them if their event has triggered.
1192 */
1193void
1194knote(struct klist *list, long hint)
1195{
1196 struct knote *kn;
1197
5b22f1a7 1198 lwkt_gettoken(&kq_token);
cf9f4e88 1199 SLIST_FOREACH(kn, list, kn_next) {
4c91dbc9 1200 if (filter_event(kn, hint))
984263bc 1201 KNOTE_ACTIVATE(kn);
cf9f4e88 1202 }
5b22f1a7
SG
1203 lwkt_reltoken(&kq_token);
1204}
1205
1206/*
1207 * insert knote at head of klist
1208 *
1209 * Requires: kq_token
1210 */
1211void
1212knote_insert(struct klist *klist, struct knote *kn)
1213{
853fe8da 1214 lwkt_gettoken(&kq_token);
5b22f1a7 1215 SLIST_INSERT_HEAD(klist, kn, kn_next);
853fe8da 1216 lwkt_reltoken(&kq_token);
5b22f1a7
SG
1217}
1218
1219/*
1220 * remove knote from a klist
1221 *
1222 * Requires: kq_token
1223 */
1224void
1225knote_remove(struct klist *klist, struct knote *kn)
1226{
853fe8da 1227 lwkt_gettoken(&kq_token);
5b22f1a7 1228 SLIST_REMOVE(klist, kn, knote, kn_next);
853fe8da 1229 lwkt_reltoken(&kq_token);
984263bc
MD
1230}
1231
1232/*
1233 * remove all knotes from a specified klist
1234 */
1235void
5b22f1a7 1236knote_empty(struct klist *list)
984263bc
MD
1237{
1238 struct knote *kn;
1239
5b22f1a7 1240 lwkt_gettoken(&kq_token);
cf9f4e88
MD
1241 while ((kn = SLIST_FIRST(list)) != NULL)
1242 knote_detach_and_drop(kn);
5b22f1a7 1243 lwkt_reltoken(&kq_token);
984263bc
MD
1244}
1245
1246/*
1247 * remove all knotes referencing a specified fd
1248 */
1249void
ccafe911 1250knote_fdclose(struct file *fp, struct filedesc *fdp, int fd)
984263bc 1251{
ccafe911 1252 struct knote *kn;
984263bc 1253
5b22f1a7 1254 lwkt_gettoken(&kq_token);
ccafe911
MD
1255restart:
1256 SLIST_FOREACH(kn, &fp->f_klist, kn_link) {
1257 if (kn->kn_kq->kq_fdp == fdp && kn->kn_id == fd) {
cf9f4e88 1258 knote_detach_and_drop(kn);
ccafe911
MD
1259 goto restart;
1260 }
1261 }
5b22f1a7 1262 lwkt_reltoken(&kq_token);
984263bc
MD
1263}
1264
1265static void
ccafe911 1266knote_attach(struct knote *kn)
984263bc
MD
1267{
1268 struct klist *list;
ccafe911 1269 struct kqueue *kq = kn->kn_kq;
984263bc 1270
4c91dbc9 1271 if (kn->kn_fop->f_flags & FILTEROP_ISFD) {
ccafe911
MD
1272 KKASSERT(kn->kn_fp);
1273 list = &kn->kn_fp->f_klist;
1274 } else {
1275 if (kq->kq_knhashmask == 0)
1276 kq->kq_knhash = hashinit(KN_HASHSIZE, M_KQUEUE,
1277 &kq->kq_knhashmask);
1278 list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)];
984263bc 1279 }
984263bc 1280 SLIST_INSERT_HEAD(list, kn, kn_link);
ccafe911 1281 TAILQ_INSERT_HEAD(&kq->kq_knlist, kn, kn_kqlink);
cf9f4e88 1282 kn->kn_status = KN_CREATING;
984263bc
MD
1283}
1284
984263bc 1285static void
ccafe911 1286knote_drop(struct knote *kn)
984263bc 1287{
ccafe911 1288 struct kqueue *kq;
984263bc
MD
1289 struct klist *list;
1290
ccafe911
MD
1291 kq = kn->kn_kq;
1292
4c91dbc9 1293 if (kn->kn_fop->f_flags & FILTEROP_ISFD)
ccafe911 1294 list = &kn->kn_fp->f_klist;
984263bc 1295 else
ccafe911 1296 list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)];
984263bc
MD
1297
1298 SLIST_REMOVE(list, kn, knote, kn_link);
ccafe911 1299 TAILQ_REMOVE(&kq->kq_knlist, kn, kn_kqlink);
984263bc
MD
1300 if (kn->kn_status & KN_QUEUED)
1301 knote_dequeue(kn);
34e191bd 1302 if (kn->kn_fop->f_flags & FILTEROP_ISFD) {
9f87144f 1303 fdrop(kn->kn_fp);
34e191bd
MD
1304 kn->kn_fp = NULL;
1305 }
984263bc
MD
1306 knote_free(kn);
1307}
1308
984263bc
MD
1309static void
1310knote_enqueue(struct knote *kn)
1311{
1312 struct kqueue *kq = kn->kn_kq;
984263bc 1313
45610071 1314 KASSERT((kn->kn_status & KN_QUEUED) == 0, ("knote already queued"));
984263bc 1315
ccafe911 1316 TAILQ_INSERT_TAIL(&kq->kq_knpend, kn, kn_tqe);
984263bc 1317 kn->kn_status |= KN_QUEUED;
a73855e8
MD
1318 ++kq->kq_count;
1319
1320 /*
1321 * Send SIGIO on request (typically set up as a mailbox signal)
1322 */
1323 if (kq->kq_sigio && (kq->kq_state & KQ_ASYNC) && kq->kq_count == 1)
1324 pgsigio(kq->kq_sigio, SIGIO, 0);
5b22f1a7 1325
984263bc
MD
1326 kqueue_wakeup(kq);
1327}
1328
1329static void
1330knote_dequeue(struct knote *kn)
1331{
1332 struct kqueue *kq = kn->kn_kq;
984263bc
MD
1333
1334 KASSERT(kn->kn_status & KN_QUEUED, ("knote not queued"));
4fe3185e 1335 KKASSERT((kn->kn_status & KN_PROCESSING) == 0);
984263bc 1336
ccafe911 1337 TAILQ_REMOVE(&kq->kq_knpend, kn, kn_tqe);
984263bc
MD
1338 kn->kn_status &= ~KN_QUEUED;
1339 kq->kq_count--;
984263bc
MD
1340}
1341
1342static void
1343knote_init(void)
1344{
1345 knote_zone = zinit("KNOTE", sizeof(struct knote), 0, 0, 1);
1346}
1347SYSINIT(knote, SI_SUB_PSEUDO, SI_ORDER_ANY, knote_init, NULL)
1348
1349static struct knote *
1350knote_alloc(void)
1351{
1352 return ((struct knote *)zalloc(knote_zone));
1353}
1354
1355static void
1356knote_free(struct knote *kn)
1357{
1358 zfree(knote_zone, kn);
1359}