2 * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
3 * Copyright (c) 1995-1999 by Internet Software Consortium
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
15 * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 /* eventlib.c - implement glue for the eventlib
19 * vix 09sep95 [initial]
22 #if !defined(LINT) && !defined(CODECENTER)
23 static const char rcsid[] = "$Id: eventlib.c,v 1.2.2.1.4.6 2006/03/10 00:17:21 marka Exp $";
26 #include "port_before.h"
27 #include "fd_setsize.h"
29 #include <sys/types.h>
42 #include <isc/eventlib.h>
43 #include <isc/assertions.h>
44 #include "eventlib_p.h"
46 #include "port_after.h"
51 #define pselect Pselect
56 #if defined(NEED_PSELECT) || defined(USE_POLL)
57 static int pselect(int, void *, void *, void *,
67 evCreate(evContext *opaqueCtx) {
70 /* Make sure the memory heap is initialized. */
71 if (meminit(0, 0) < 0 && errno != EEXIST)
85 INIT_LIST(ctx->accepts);
92 emulMaskInit(ctx, rdLast, EV_READ, 1);
93 emulMaskInit(ctx, rdNext, EV_READ, 0);
94 emulMaskInit(ctx, wrLast, EV_WRITE, 1);
95 emulMaskInit(ctx, wrNext, EV_WRITE, 0);
96 emulMaskInit(ctx, exLast, EV_EXCEPT, 1);
97 emulMaskInit(ctx, exNext, EV_EXCEPT, 0);
98 emulMaskInit(ctx, nonblockBefore, EV_WASNONBLOCKING, 0);
101 FD_ZERO(&ctx->rdNext);
102 FD_ZERO(&ctx->wrNext);
103 FD_ZERO(&ctx->exNext);
104 FD_ZERO(&ctx->nonblockBefore);
107 ctx->fdCount = 0; /* Invalidate {rd,wr,ex}Last. */
109 ctx->highestFD = FD_SETSIZE - 1;
110 memset(ctx->fdTable, 0, sizeof ctx->fdTable);
112 ctx->highestFD = INT_MAX / sizeof(struct pollfd);
115 #ifdef EVENTLIB_TIME_CHECKS
116 ctx->lastFdCount = 0;
125 ctx->lastEventTime = evNowTime();
126 #ifdef EVENTLIB_TIME_CHECKS
127 ctx->lastSelectTime = ctx->lastEventTime;
129 ctx->timers = evCreateTimers(ctx);
130 if (ctx->timers == NULL)
134 ctx->waitLists = NULL;
135 ctx->waitDone.first = ctx->waitDone.last = NULL;
136 ctx->waitDone.prev = ctx->waitDone.next = NULL;
138 opaqueCtx->opaque = ctx;
143 evSetDebug(evContext opaqueCtx, int level, FILE *output) {
144 evContext_p *ctx = opaqueCtx.opaque;
147 ctx->output = output;
151 evDestroy(evContext opaqueCtx) {
152 evContext_p *ctx = opaqueCtx.opaque;
153 int revs = 424242; /* Doug Adams. */
154 evWaitList *this_wl, *next_wl;
155 evWait *this_wait, *next_wait;
158 while (revs-- > 0 && ctx->conns != NULL) {
161 id.opaque = ctx->conns;
162 (void) evCancelConn(opaqueCtx, id);
167 while (revs-- > 0 && ctx->streams != NULL) {
170 id.opaque = ctx->streams;
171 (void) evCancelRW(opaqueCtx, id);
175 while (revs-- > 0 && ctx->files != NULL) {
178 id.opaque = ctx->files;
179 (void) evDeselectFD(opaqueCtx, id);
184 evDestroyTimers(ctx);
187 for (this_wl = ctx->waitLists;
188 revs-- > 0 && this_wl != NULL;
190 next_wl = this_wl->next;
191 for (this_wait = this_wl->first;
192 revs-- > 0 && this_wait != NULL;
193 this_wait = next_wait) {
194 next_wait = this_wait->next;
199 for (this_wait = ctx->waitDone.first;
200 revs-- > 0 && this_wait != NULL;
201 this_wait = next_wait) {
202 next_wait = this_wait->next;
211 evGetNext(evContext opaqueCtx, evEvent *opaqueEv, int options) {
212 evContext_p *ctx = opaqueCtx.opaque;
213 struct timespec nextTime;
216 int x, pselect_errno, timerPast;
217 #ifdef EVENTLIB_TIME_CHECKS
218 struct timespec interval;
221 /* Ensure that exactly one of EV_POLL or EV_WAIT was specified. */
222 x = ((options & EV_POLL) != 0) + ((options & EV_WAIT) != 0);
226 /* Get the time of day. We'll do this again after select() blocks. */
227 ctx->lastEventTime = evNowTime();
230 /* Finished accept()'s do not require a select(). */
231 if (!EMPTY(ctx->accepts)) {
234 new->u.accept.this = HEAD(ctx->accepts);
235 UNLINK(ctx->accepts, HEAD(ctx->accepts), link);
236 opaqueEv->opaque = new;
240 /* Stream IO does not require a select(). */
241 if (ctx->strDone != NULL) {
244 new->u.stream.this = ctx->strDone;
245 ctx->strDone = ctx->strDone->nextDone;
246 if (ctx->strDone == NULL)
248 opaqueEv->opaque = new;
252 /* Waits do not require a select(). */
253 if (ctx->waitDone.first != NULL) {
256 new->u.wait.this = ctx->waitDone.first;
257 ctx->waitDone.first = ctx->waitDone.first->next;
258 if (ctx->waitDone.first == NULL)
259 ctx->waitDone.last = NULL;
260 opaqueEv->opaque = new;
264 /* Get the status and content of the next timer. */
265 if ((nextTimer = heap_element(ctx->timers, 1)) != NULL) {
266 nextTime = nextTimer->due;
267 timerPast = (evCmpTime(nextTime, ctx->lastEventTime) <= 0);
269 timerPast = 0; /* Make gcc happy. */
271 evPrintf(ctx, 9, "evGetNext: fdCount %d\n", ctx->fdCount);
272 if (ctx->fdCount == 0) {
273 static const struct timespec NoTime = {0, 0L};
274 enum { JustPoll, Block, Timer } m;
275 struct timespec t, *tp;
277 /* Are there any events at all? */
278 if ((options & EV_WAIT) != 0 && !nextTimer && ctx->fdMax == -1)
281 /* Figure out what select()'s timeout parameter should be. */
282 if ((options & EV_POLL) != 0) {
286 } else if (nextTimer == NULL) {
290 } else if (timerPast) {
296 /* ``t'' filled in later. */
299 #ifdef EVENTLIB_TIME_CHECKS
300 if (ctx->debug > 0) {
301 interval = evSubTime(ctx->lastEventTime,
302 ctx->lastSelectTime);
303 if (interval.tv_sec > 0 || interval.tv_nsec > 0)
305 "time between pselect() %u.%09u count %d\n",
306 interval.tv_sec, interval.tv_nsec,
312 /* XXX need to copy only the bits we are using. */
313 ctx->rdLast = ctx->rdNext;
314 ctx->wrLast = ctx->wrNext;
315 ctx->exLast = ctx->exNext;
318 * The pollfd structure uses separate fields for
319 * the input and output events (corresponding to
320 * the ??Next and ??Last fd sets), so there's no
321 * need to copy one to the other.
323 #endif /* USE_POLL */
326 t = evSubTime(nextTime, ctx->lastEventTime);
329 /* XXX should predict system's earliness and adjust. */
330 x = pselect(ctx->fdMax+1,
331 &ctx->rdLast, &ctx->wrLast, &ctx->exLast,
333 pselect_errno = errno;
336 evPrintf(ctx, 4, "select() returns %d (err: %s)\n",
337 x, (x == -1) ? strerror(errno) : "none");
339 evPrintf(ctx, 4, "poll() returns %d (err: %s)\n",
340 x, (x == -1) ? strerror(errno) : "none");
341 #endif /* USE_POLL */
342 /* Anything but a poll can change the time. */
344 ctx->lastEventTime = evNowTime();
346 /* Select() likes to finish about 10ms early. */
347 } while (x == 0 && m == Timer &&
348 evCmpTime(ctx->lastEventTime, nextTime) < 0);
349 #ifdef EVENTLIB_TIME_CHECKS
350 ctx->lastSelectTime = ctx->lastEventTime;
353 if (pselect_errno == EINTR) {
354 if ((options & EV_NULL) != 0)
359 opaqueEv->opaque = new;
362 if (pselect_errno == EBADF) {
363 for (x = 0; x <= ctx->fdMax; x++) {
366 if (FD_ISSET(x, &ctx->rdNext) == 0 &&
367 FD_ISSET(x, &ctx->wrNext) == 0 &&
368 FD_ISSET(x, &ctx->exNext) == 0)
370 if (fstat(x, &sb) == -1 &&
372 evPrintf(ctx, 1, "EBADF: %d\n",
377 EV_ERR(pselect_errno);
379 if (x == 0 && (nextTimer == NULL || !timerPast) &&
383 #ifdef EVENTLIB_TIME_CHECKS
384 ctx->lastFdCount = x;
387 INSIST(nextTimer || ctx->fdCount);
389 /* Timers go first since we'd like them to be accurate. */
390 if (nextTimer && !timerPast) {
391 /* Has anything happened since we blocked? */
392 timerPast = (evCmpTime(nextTime, ctx->lastEventTime) <= 0);
394 if (nextTimer && timerPast) {
397 new->u.timer.this = nextTimer;
398 opaqueEv->opaque = new;
402 /* No timers, so there should be a ready file descriptor. */
404 while (ctx->fdCount > 0) {
408 if (ctx->fdNext == NULL) {
411 * Hitting the end twice means that the last
412 * select() found some FD's which have since
415 * On some systems, the count returned by
416 * selects is the total number of bits in
417 * all masks that are set, and on others it's
418 * the number of fd's that have some bit set,
419 * and on others, it's just broken. We
420 * always assume that it's the number of
421 * bits set in all masks, because that's what
422 * the man page says it should do, and
423 * the worst that can happen is we do an
429 ctx->fdNext = ctx->files;
432 ctx->fdNext = fid->next;
436 if (FD_ISSET(fd, &ctx->rdLast))
437 eventmask |= EV_READ;
438 if (FD_ISSET(fd, &ctx->wrLast))
439 eventmask |= EV_WRITE;
440 if (FD_ISSET(fd, &ctx->exLast))
441 eventmask |= EV_EXCEPT;
442 eventmask &= fid->eventmask;
443 if (eventmask != 0) {
444 if ((eventmask & EV_READ) != 0) {
445 FD_CLR(fd, &ctx->rdLast);
448 if ((eventmask & EV_WRITE) != 0) {
449 FD_CLR(fd, &ctx->wrLast);
452 if ((eventmask & EV_EXCEPT) != 0) {
453 FD_CLR(fd, &ctx->exLast);
458 new->u.file.this = fid;
459 new->u.file.eventmask = eventmask;
460 opaqueEv->opaque = new;
464 if (ctx->fdCount < 0) {
466 * select()'s count is off on a number of systems, and
467 * can result in fdCount < 0.
469 evPrintf(ctx, 4, "fdCount < 0 (%d)\n", ctx->fdCount);
473 /* We get here if the caller deselect()'s an FD. Gag me with a goto. */
478 evDispatch(evContext opaqueCtx, evEvent opaqueEv) {
479 evContext_p *ctx = opaqueCtx.opaque;
480 evEvent_p *ev = opaqueEv.opaque;
481 #ifdef EVENTLIB_TIME_CHECKS
483 struct timespec start_time;
484 struct timespec interval;
487 #ifdef EVENTLIB_TIME_CHECKS
489 start_time = evNowTime();
494 evAccept *this = ev->u.accept.this;
497 "Dispatch.Accept: fd %d -> %d, func %p, uap %p\n",
498 this->conn->fd, this->fd,
499 this->conn->func, this->conn->uap);
500 errno = this->ioErrno;
501 (this->conn->func)(opaqueCtx, this->conn->uap, this->fd,
502 &this->la, this->lalen,
503 &this->ra, this->ralen);
504 #ifdef EVENTLIB_TIME_CHECKS
505 func = this->conn->func;
510 evFile *this = ev->u.file.this;
511 int eventmask = ev->u.file.eventmask;
514 "Dispatch.File: fd %d, mask 0x%x, func %p, uap %p\n",
515 this->fd, this->eventmask, this->func, this->uap);
516 (this->func)(opaqueCtx, this->uap, this->fd, eventmask);
517 #ifdef EVENTLIB_TIME_CHECKS
523 evStream *this = ev->u.stream.this;
526 "Dispatch.Stream: fd %d, func %p, uap %p\n",
527 this->fd, this->func, this->uap);
528 errno = this->ioErrno;
529 (this->func)(opaqueCtx, this->uap, this->fd, this->ioDone);
530 #ifdef EVENTLIB_TIME_CHECKS
536 evTimer *this = ev->u.timer.this;
538 evPrintf(ctx, 5, "Dispatch.Timer: func %p, uap %p\n",
539 this->func, this->uap);
540 (this->func)(opaqueCtx, this->uap, this->due, this->inter);
541 #ifdef EVENTLIB_TIME_CHECKS
547 evWait *this = ev->u.wait.this;
550 "Dispatch.Wait: tag %p, func %p, uap %p\n",
551 this->tag, this->func, this->uap);
552 (this->func)(opaqueCtx, this->uap, this->tag);
553 #ifdef EVENTLIB_TIME_CHECKS
560 #ifdef EVENTLIB_TIME_CHECKS
569 #ifdef EVENTLIB_TIME_CHECKS
570 if (ctx->debug > 0) {
571 interval = evSubTime(evNowTime(), start_time);
573 * Complain if it took longer than 50 milliseconds.
575 * We call getuid() to make an easy to find mark in a kernel
578 if (interval.tv_sec > 0 || interval.tv_nsec > 50000000)
580 "dispatch interval %u.%09u uid %d type %d func %p\n",
581 interval.tv_sec, interval.tv_nsec,
582 getuid(), ev->type, func);
586 evDrop(opaqueCtx, opaqueEv);
591 evDrop(evContext opaqueCtx, evEvent opaqueEv) {
592 evContext_p *ctx = opaqueCtx.opaque;
593 evEvent_p *ev = opaqueEv.opaque;
597 FREE(ev->u.accept.this);
607 id.opaque = ev->u.stream.this;
608 (void) evCancelRW(opaqueCtx, id);
612 evTimer *this = ev->u.timer.this;
615 /* Check to see whether the user func cleared the timer. */
616 if (heap_element(ctx->timers, this->index) != this) {
617 evPrintf(ctx, 5, "Dispatch.Timer: timer rm'd?\n");
621 * Timer is still there. Delete it if it has expired,
622 * otherwise set it according to its next interval.
624 if (this->inter.tv_sec == (time_t)0 &&
625 this->inter.tv_nsec == 0L) {
626 opaque.opaque = this;
627 (void) evClearTimer(opaqueCtx, opaque);
629 opaque.opaque = this;
630 (void) evResetTimer(opaqueCtx, opaque, this->func,
632 evAddTime((this->mode & EV_TMR_RATE) ?
641 FREE(ev->u.wait.this);
656 evMainLoop(evContext opaqueCtx) {
660 while ((x = evGetNext(opaqueCtx, &event, EV_WAIT)) == 0)
661 if ((x = evDispatch(opaqueCtx, event)) < 0)
667 evHighestFD(evContext opaqueCtx) {
668 evContext_p *ctx = opaqueCtx.opaque;
670 return (ctx->highestFD);
674 evPrintf(const evContext_p *ctx, int level, const char *fmt, ...) {
678 if (ctx->output != NULL && ctx->debug >= level) {
679 vfprintf(ctx->output, fmt, ap);
686 evSetOption(evContext *opaqueCtx, const char *option, int value) {
687 /* evContext_p *ctx = opaqueCtx->opaque; */
691 #ifndef CLOCK_MONOTONIC
695 #ifdef CLOCK_MONOTONIC
696 if (strcmp(option, "monotime") == 0) {
697 if (opaqueCtx != NULL)
699 if (value == 0 || value == 1) {
700 __evOptMonoTime = value;
713 evGetOption(evContext *opaqueCtx, const char *option, int *value) {
714 /* evContext_p *ctx = opaqueCtx->opaque; */
717 #ifndef CLOCK_MONOTONIC
722 #ifdef CLOCK_MONOTONIC
723 if (strcmp(option, "monotime") == 0) {
724 if (opaqueCtx != NULL)
726 *value = __evOptMonoTime;
734 #if defined(NEED_PSELECT) || defined(USE_POLL)
735 /* XXX needs to move to the porting library. */
737 pselect(int nfds, void *rfds, void *wfds, void *efds,
738 struct timespec *tsp,
739 const sigset_t *sigmask)
741 struct timeval tv, *tvp;
745 int polltimeout = INFTIM;
751 #endif /* USE_POLL */
755 tv = evTimeVal(*tsp);
757 polltimeout = 1000 * tv.tv_sec + tv.tv_usec / 1000;
758 #endif /* USE_POLL */
762 sigprocmask(SIG_SETMASK, sigmask, &sigs);
764 n = select(nfds, rfds, wfds, efds, tvp);
767 * rfds, wfds, and efds should all be from the same evContext_p,
768 * so any of them will do. If they're all NULL, the caller is
769 * presumably calling us to block.
772 ctx = ((__evEmulMask *)rfds)->ctx;
773 else if (wfds != NULL)
774 ctx = ((__evEmulMask *)wfds)->ctx;
775 else if (efds != NULL)
776 ctx = ((__evEmulMask *)efds)->ctx;
779 if (ctx != NULL && ctx->fdMax != -1) {
780 fds = &(ctx->pollfds[ctx->firstfd]);
781 pnfds = ctx->fdMax - ctx->firstfd + 1;
786 n = poll(fds, pnfds, polltimeout);
791 for (e = 0, i = ctx->firstfd; i <= ctx->fdMax; i++) {
792 if (ctx->pollfds[i].fd < 0)
794 if (FD_ISSET(i, &ctx->rdLast))
796 if (FD_ISSET(i, &ctx->wrLast))
798 if (FD_ISSET(i, &ctx->exLast))
803 #endif /* USE_POLL */
805 sigprocmask(SIG_SETMASK, &sigs, NULL);
807 *tsp = evTimeSpec(tv);
814 evPollfdRealloc(evContext_p *ctx, int pollfd_chunk_size, int fd) {
817 void *pollfds, *fdTable;
819 if (fd < ctx->maxnfds)
822 /* Don't allow ridiculously small values for pollfd_chunk_size */
823 if (pollfd_chunk_size < 20)
824 pollfd_chunk_size = 20;
826 maxnfds = (1 + (fd/pollfd_chunk_size)) * pollfd_chunk_size;
828 pollfds = realloc(ctx->pollfds, maxnfds * sizeof(*ctx->pollfds));
830 ctx->pollfds = pollfds;
831 fdTable = realloc(ctx->fdTable, maxnfds * sizeof(*ctx->fdTable));
833 ctx->fdTable = fdTable;
835 if (pollfds == NULL || fdTable == NULL) {
836 evPrintf(ctx, 2, "pollfd() realloc (%ld) failed\n",
837 (long)maxnfds*sizeof(struct pollfd));
841 for (i = ctx->maxnfds; i < maxnfds; i++) {
842 ctx->pollfds[i].fd = -1;
843 ctx->pollfds[i].events = 0;
847 ctx->maxnfds = maxnfds;
852 /* Find the appropriate 'events' or 'revents' field in the pollfds array */
854 __fd_eventfield(int fd, __evEmulMask *maskp) {
856 evContext_p *ctx = (evContext_p *)maskp->ctx;
858 if (!maskp->result || maskp->type == EV_WASNONBLOCKING)
859 return (&(ctx->pollfds[fd].events));
861 return (&(ctx->pollfds[fd].revents));
864 /* Translate to poll(2) event */
866 __poll_event(__evEmulMask *maskp) {
868 switch ((maskp)->type) {
874 return (POLLRDBAND | POLLPRI | POLLWRBAND);
875 case EV_WASNONBLOCKING:
883 * Clear the events corresponding to the specified mask. If this leaves
884 * the events mask empty (apart from the POLLHUP bit), set the fd field
885 * to -1 so that poll(2) will ignore this fd.
888 __fd_clr(int fd, __evEmulMask *maskp) {
890 evContext_p *ctx = maskp->ctx;
892 *__fd_eventfield(fd, maskp) &= ~__poll_event(maskp);
893 if ((ctx->pollfds[fd].events & ~POLLHUP) == 0) {
894 ctx->pollfds[fd].fd = -1;
895 if (fd == ctx->fdMax)
896 while (ctx->fdMax > ctx->firstfd &&
897 ctx->pollfds[ctx->fdMax].fd < 0)
899 if (fd == ctx->firstfd)
900 while (ctx->firstfd <= ctx->fdMax &&
901 ctx->pollfds[ctx->firstfd].fd < 0)
904 * Do we have a empty set of descriptors?
906 if (ctx->firstfd > ctx->fdMax) {
914 * Set the events bit(s) corresponding to the specified mask. If the events
915 * field has any other bits than POLLHUP set, also set the fd field so that
916 * poll(2) will watch this fd.
919 __fd_set(int fd, __evEmulMask *maskp) {
921 evContext_p *ctx = maskp->ctx;
923 *__fd_eventfield(fd, maskp) |= __poll_event(maskp);
924 if ((ctx->pollfds[fd].events & ~POLLHUP) != 0) {
925 ctx->pollfds[fd].fd = fd;
926 if (fd < ctx->firstfd || ctx->fdMax == -1)
932 #endif /* USE_POLL */