2 * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
3 * Copyright (c) 1995-1999 by Internet Software Consortium
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
15 * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 /* eventlib.c - implement glue for the eventlib
19 * vix 09sep95 [initial]
22 #if !defined(LINT) && !defined(CODECENTER)
23 static const char rcsid[] = "$Id: eventlib.c,v 1.10 2006/03/09 23:57:56 marka Exp $";
26 #include "port_before.h"
27 #include "fd_setsize.h"
29 #include <sys/types.h>
42 #include <isc/eventlib.h>
43 #include <isc/assertions.h>
44 #include "eventlib_p.h"
46 #include "port_after.h"
51 #define pselect Pselect
56 #if defined(NEED_PSELECT) || defined(USE_POLL)
57 static int pselect(int, void *, void *, void *,
67 evCreate(evContext *opaqueCtx) {
70 /* Make sure the memory heap is initialized. */
71 if (meminit(0, 0) < 0 && errno != EEXIST)
85 INIT_LIST(ctx->accepts);
93 emulMaskInit(ctx, rdLast, EV_READ, 1);
94 emulMaskInit(ctx, rdNext, EV_READ, 0);
95 emulMaskInit(ctx, wrLast, EV_WRITE, 1);
96 emulMaskInit(ctx, wrNext, EV_WRITE, 0);
97 emulMaskInit(ctx, exLast, EV_EXCEPT, 1);
98 emulMaskInit(ctx, exNext, EV_EXCEPT, 0);
99 emulMaskInit(ctx, nonblockBefore, EV_WASNONBLOCKING, 0);
100 #endif /* USE_POLL */
101 FD_ZERO(&ctx->rdNext);
102 FD_ZERO(&ctx->wrNext);
103 FD_ZERO(&ctx->exNext);
104 FD_ZERO(&ctx->nonblockBefore);
107 ctx->fdCount = 0; /*%< Invalidate {rd,wr,ex}Last. */
109 ctx->highestFD = FD_SETSIZE - 1;
110 memset(ctx->fdTable, 0, sizeof ctx->fdTable);
112 ctx->highestFD = INT_MAX / sizeof(struct pollfd);
114 #endif /* USE_POLL */
115 #ifdef EVENTLIB_TIME_CHECKS
116 ctx->lastFdCount = 0;
125 ctx->lastEventTime = evNowTime();
126 #ifdef EVENTLIB_TIME_CHECKS
127 ctx->lastSelectTime = ctx->lastEventTime;
129 ctx->timers = evCreateTimers(ctx);
130 if (ctx->timers == NULL)
134 ctx->waitLists = NULL;
135 ctx->waitDone.first = ctx->waitDone.last = NULL;
136 ctx->waitDone.prev = ctx->waitDone.next = NULL;
138 opaqueCtx->opaque = ctx;
143 evSetDebug(evContext opaqueCtx, int level, FILE *output) {
144 evContext_p *ctx = opaqueCtx.opaque;
147 ctx->output = output;
151 evDestroy(evContext opaqueCtx) {
152 evContext_p *ctx = opaqueCtx.opaque;
153 int revs = 424242; /*%< Doug Adams. */
154 evWaitList *this_wl, *next_wl;
155 evWait *this_wait, *next_wait;
158 while (revs-- > 0 && ctx->conns != NULL) {
161 id.opaque = ctx->conns;
162 (void) evCancelConn(opaqueCtx, id);
167 while (revs-- > 0 && ctx->streams != NULL) {
170 id.opaque = ctx->streams;
171 (void) evCancelRW(opaqueCtx, id);
175 while (revs-- > 0 && ctx->files != NULL) {
178 id.opaque = ctx->files;
179 (void) evDeselectFD(opaqueCtx, id);
184 evDestroyTimers(ctx);
187 for (this_wl = ctx->waitLists;
188 revs-- > 0 && this_wl != NULL;
190 next_wl = this_wl->next;
191 for (this_wait = this_wl->first;
192 revs-- > 0 && this_wait != NULL;
193 this_wait = next_wait) {
194 next_wait = this_wait->next;
199 for (this_wait = ctx->waitDone.first;
200 revs-- > 0 && this_wait != NULL;
201 this_wait = next_wait) {
202 next_wait = this_wait->next;
211 evGetNext(evContext opaqueCtx, evEvent *opaqueEv, int options) {
212 evContext_p *ctx = opaqueCtx.opaque;
213 struct timespec nextTime;
216 int x, pselect_errno, timerPast;
217 #ifdef EVENTLIB_TIME_CHECKS
218 struct timespec interval;
221 /* Ensure that exactly one of EV_POLL or EV_WAIT was specified. */
222 x = ((options & EV_POLL) != 0) + ((options & EV_WAIT) != 0);
226 /* Get the time of day. We'll do this again after select() blocks. */
227 ctx->lastEventTime = evNowTime();
230 /* Finished accept()'s do not require a select(). */
231 if (!EMPTY(ctx->accepts)) {
234 new->u.accept.this = HEAD(ctx->accepts);
235 UNLINK(ctx->accepts, HEAD(ctx->accepts), link);
236 opaqueEv->opaque = new;
240 /* Stream IO does not require a select(). */
241 if (ctx->strDone != NULL) {
244 new->u.stream.this = ctx->strDone;
245 ctx->strDone = ctx->strDone->nextDone;
246 if (ctx->strDone == NULL)
248 opaqueEv->opaque = new;
252 /* Waits do not require a select(). */
253 if (ctx->waitDone.first != NULL) {
256 new->u.wait.this = ctx->waitDone.first;
257 ctx->waitDone.first = ctx->waitDone.first->next;
258 if (ctx->waitDone.first == NULL)
259 ctx->waitDone.last = NULL;
260 opaqueEv->opaque = new;
264 /* Get the status and content of the next timer. */
265 if ((nextTimer = heap_element(ctx->timers, 1)) != NULL) {
266 nextTime = nextTimer->due;
267 timerPast = (evCmpTime(nextTime, ctx->lastEventTime) <= 0);
269 timerPast = 0; /*%< Make gcc happy. */
270 evPrintf(ctx, 9, "evGetNext: fdCount %d\n", ctx->fdCount);
271 if (ctx->fdCount == 0) {
272 static const struct timespec NoTime = {0, 0L};
273 enum { JustPoll, Block, Timer } m;
274 struct timespec t, *tp;
276 /* Are there any events at all? */
277 if ((options & EV_WAIT) != 0 && !nextTimer && ctx->fdMax == -1)
280 /* Figure out what select()'s timeout parameter should be. */
281 if ((options & EV_POLL) != 0) {
285 } else if (nextTimer == NULL) {
289 } else if (timerPast) {
295 /* ``t'' filled in later. */
298 #ifdef EVENTLIB_TIME_CHECKS
299 if (ctx->debug > 0) {
300 interval = evSubTime(ctx->lastEventTime,
301 ctx->lastSelectTime);
302 if (interval.tv_sec > 0 || interval.tv_nsec > 0)
304 "time between pselect() %u.%09u count %d\n",
305 interval.tv_sec, interval.tv_nsec,
311 /* XXX need to copy only the bits we are using. */
312 ctx->rdLast = ctx->rdNext;
313 ctx->wrLast = ctx->wrNext;
314 ctx->exLast = ctx->exNext;
317 * The pollfd structure uses separate fields for
318 * the input and output events (corresponding to
319 * the ??Next and ??Last fd sets), so there's no
320 * need to copy one to the other.
322 #endif /* USE_POLL */
325 t = evSubTime(nextTime, ctx->lastEventTime);
328 /* XXX should predict system's earliness and adjust. */
329 x = pselect(ctx->fdMax+1,
330 &ctx->rdLast, &ctx->wrLast, &ctx->exLast,
332 pselect_errno = errno;
335 evPrintf(ctx, 4, "select() returns %d (err: %s)\n",
336 x, (x == -1) ? strerror(errno) : "none");
338 evPrintf(ctx, 4, "poll() returns %d (err: %s)\n",
339 x, (x == -1) ? strerror(errno) : "none");
340 #endif /* USE_POLL */
341 /* Anything but a poll can change the time. */
343 ctx->lastEventTime = evNowTime();
345 /* Select() likes to finish about 10ms early. */
346 } while (x == 0 && m == Timer &&
347 evCmpTime(ctx->lastEventTime, nextTime) < 0);
348 #ifdef EVENTLIB_TIME_CHECKS
349 ctx->lastSelectTime = ctx->lastEventTime;
352 if (pselect_errno == EINTR) {
353 if ((options & EV_NULL) != 0)
358 opaqueEv->opaque = new;
361 if (pselect_errno == EBADF) {
362 for (x = 0; x <= ctx->fdMax; x++) {
365 if (FD_ISSET(x, &ctx->rdNext) == 0 &&
366 FD_ISSET(x, &ctx->wrNext) == 0 &&
367 FD_ISSET(x, &ctx->exNext) == 0)
369 if (fstat(x, &sb) == -1 &&
371 evPrintf(ctx, 1, "EBADF: %d\n",
376 EV_ERR(pselect_errno);
378 if (x == 0 && (nextTimer == NULL || !timerPast) &&
382 #ifdef EVENTLIB_TIME_CHECKS
383 ctx->lastFdCount = x;
386 INSIST(nextTimer || ctx->fdCount);
388 /* Timers go first since we'd like them to be accurate. */
389 if (nextTimer && !timerPast) {
390 /* Has anything happened since we blocked? */
391 timerPast = (evCmpTime(nextTime, ctx->lastEventTime) <= 0);
393 if (nextTimer && timerPast) {
396 new->u.timer.this = nextTimer;
397 opaqueEv->opaque = new;
401 /* No timers, so there should be a ready file descriptor. */
403 while (ctx->fdCount > 0) {
407 if (ctx->fdNext == NULL) {
410 * Hitting the end twice means that the last
411 * select() found some FD's which have since
414 * On some systems, the count returned by
415 * selects is the total number of bits in
416 * all masks that are set, and on others it's
417 * the number of fd's that have some bit set,
418 * and on others, it's just broken. We
419 * always assume that it's the number of
420 * bits set in all masks, because that's what
421 * the man page says it should do, and
422 * the worst that can happen is we do an
428 ctx->fdNext = ctx->files;
431 ctx->fdNext = fid->next;
435 if (FD_ISSET(fd, &ctx->rdLast))
436 eventmask |= EV_READ;
437 if (FD_ISSET(fd, &ctx->wrLast))
438 eventmask |= EV_WRITE;
439 if (FD_ISSET(fd, &ctx->exLast))
440 eventmask |= EV_EXCEPT;
441 eventmask &= fid->eventmask;
442 if (eventmask != 0) {
443 if ((eventmask & EV_READ) != 0) {
444 FD_CLR(fd, &ctx->rdLast);
447 if ((eventmask & EV_WRITE) != 0) {
448 FD_CLR(fd, &ctx->wrLast);
451 if ((eventmask & EV_EXCEPT) != 0) {
452 FD_CLR(fd, &ctx->exLast);
457 new->u.file.this = fid;
458 new->u.file.eventmask = eventmask;
459 opaqueEv->opaque = new;
463 if (ctx->fdCount < 0) {
465 * select()'s count is off on a number of systems, and
466 * can result in fdCount < 0.
468 evPrintf(ctx, 4, "fdCount < 0 (%d)\n", ctx->fdCount);
472 /* We get here if the caller deselect()'s an FD. Gag me with a goto. */
477 evDispatch(evContext opaqueCtx, evEvent opaqueEv) {
478 evContext_p *ctx = opaqueCtx.opaque;
479 evEvent_p *ev = opaqueEv.opaque;
480 #ifdef EVENTLIB_TIME_CHECKS
482 struct timespec start_time;
483 struct timespec interval;
486 #ifdef EVENTLIB_TIME_CHECKS
488 start_time = evNowTime();
493 evAccept *this = ev->u.accept.this;
496 "Dispatch.Accept: fd %d -> %d, func %p, uap %p\n",
497 this->conn->fd, this->fd,
498 this->conn->func, this->conn->uap);
499 errno = this->ioErrno;
500 (this->conn->func)(opaqueCtx, this->conn->uap, this->fd,
501 &this->la, this->lalen,
502 &this->ra, this->ralen);
503 #ifdef EVENTLIB_TIME_CHECKS
504 func = this->conn->func;
509 evFile *this = ev->u.file.this;
510 int eventmask = ev->u.file.eventmask;
513 "Dispatch.File: fd %d, mask 0x%x, func %p, uap %p\n",
514 this->fd, this->eventmask, this->func, this->uap);
515 (this->func)(opaqueCtx, this->uap, this->fd, eventmask);
516 #ifdef EVENTLIB_TIME_CHECKS
522 evStream *this = ev->u.stream.this;
525 "Dispatch.Stream: fd %d, func %p, uap %p\n",
526 this->fd, this->func, this->uap);
527 errno = this->ioErrno;
528 (this->func)(opaqueCtx, this->uap, this->fd, this->ioDone);
529 #ifdef EVENTLIB_TIME_CHECKS
535 evTimer *this = ev->u.timer.this;
537 evPrintf(ctx, 5, "Dispatch.Timer: func %p, uap %p\n",
538 this->func, this->uap);
539 (this->func)(opaqueCtx, this->uap, this->due, this->inter);
540 #ifdef EVENTLIB_TIME_CHECKS
546 evWait *this = ev->u.wait.this;
549 "Dispatch.Wait: tag %p, func %p, uap %p\n",
550 this->tag, this->func, this->uap);
551 (this->func)(opaqueCtx, this->uap, this->tag);
552 #ifdef EVENTLIB_TIME_CHECKS
559 #ifdef EVENTLIB_TIME_CHECKS
568 #ifdef EVENTLIB_TIME_CHECKS
569 if (ctx->debug > 0) {
570 interval = evSubTime(evNowTime(), start_time);
572 * Complain if it took longer than 50 milliseconds.
574 * We call getuid() to make an easy to find mark in a kernel
577 if (interval.tv_sec > 0 || interval.tv_nsec > 50000000)
579 "dispatch interval %u.%09u uid %d type %d func %p\n",
580 interval.tv_sec, interval.tv_nsec,
581 getuid(), ev->type, func);
585 evDrop(opaqueCtx, opaqueEv);
590 evDrop(evContext opaqueCtx, evEvent opaqueEv) {
591 evContext_p *ctx = opaqueCtx.opaque;
592 evEvent_p *ev = opaqueEv.opaque;
596 FREE(ev->u.accept.this);
606 id.opaque = ev->u.stream.this;
607 (void) evCancelRW(opaqueCtx, id);
611 evTimer *this = ev->u.timer.this;
614 /* Check to see whether the user func cleared the timer. */
615 if (heap_element(ctx->timers, this->index) != this) {
616 evPrintf(ctx, 5, "Dispatch.Timer: timer rm'd?\n");
620 * Timer is still there. Delete it if it has expired,
621 * otherwise set it according to its next interval.
623 if (this->inter.tv_sec == (time_t)0 &&
624 this->inter.tv_nsec == 0L) {
625 opaque.opaque = this;
626 (void) evClearTimer(opaqueCtx, opaque);
628 opaque.opaque = this;
629 (void) evResetTimer(opaqueCtx, opaque, this->func,
631 evAddTime((this->mode & EV_TMR_RATE) ?
640 FREE(ev->u.wait.this);
655 evMainLoop(evContext opaqueCtx) {
659 while ((x = evGetNext(opaqueCtx, &event, EV_WAIT)) == 0)
660 if ((x = evDispatch(opaqueCtx, event)) < 0)
666 evHighestFD(evContext opaqueCtx) {
667 evContext_p *ctx = opaqueCtx.opaque;
669 return (ctx->highestFD);
673 evPrintf(const evContext_p *ctx, int level, const char *fmt, ...) {
677 if (ctx->output != NULL && ctx->debug >= level) {
678 vfprintf(ctx->output, fmt, ap);
685 evSetOption(evContext *opaqueCtx, const char *option, int value) {
686 /* evContext_p *ctx = opaqueCtx->opaque; */
690 #ifndef CLOCK_MONOTONIC
694 #ifdef CLOCK_MONOTONIC
695 if (strcmp(option, "monotime") == 0) {
696 if (opaqueCtx != NULL)
698 if (value == 0 || value == 1) {
699 __evOptMonoTime = value;
712 evGetOption(evContext *opaqueCtx, const char *option, int *value) {
713 /* evContext_p *ctx = opaqueCtx->opaque; */
716 #ifndef CLOCK_MONOTONIC
721 #ifdef CLOCK_MONOTONIC
722 if (strcmp(option, "monotime") == 0) {
723 if (opaqueCtx != NULL)
725 *value = __evOptMonoTime;
733 #if defined(NEED_PSELECT) || defined(USE_POLL)
734 /* XXX needs to move to the porting library. */
736 pselect(int nfds, void *rfds, void *wfds, void *efds,
737 struct timespec *tsp,
738 const sigset_t *sigmask)
740 struct timeval tv, *tvp;
744 int polltimeout = INFTIM;
750 #endif /* USE_POLL */
754 tv = evTimeVal(*tsp);
756 polltimeout = 1000 * tv.tv_sec + tv.tv_usec / 1000;
757 #endif /* USE_POLL */
761 sigprocmask(SIG_SETMASK, sigmask, &sigs);
763 n = select(nfds, rfds, wfds, efds, tvp);
766 * rfds, wfds, and efds should all be from the same evContext_p,
767 * so any of them will do. If they're all NULL, the caller is
768 * presumably calling us to block.
771 ctx = ((__evEmulMask *)rfds)->ctx;
772 else if (wfds != NULL)
773 ctx = ((__evEmulMask *)wfds)->ctx;
774 else if (efds != NULL)
775 ctx = ((__evEmulMask *)efds)->ctx;
778 if (ctx != NULL && ctx->fdMax != -1) {
779 fds = &(ctx->pollfds[ctx->firstfd]);
780 pnfds = ctx->fdMax - ctx->firstfd + 1;
785 n = poll(fds, pnfds, polltimeout);
790 for (e = 0, i = ctx->firstfd; i <= ctx->fdMax; i++) {
791 if (ctx->pollfds[i].fd < 0)
793 if (FD_ISSET(i, &ctx->rdLast))
795 if (FD_ISSET(i, &ctx->wrLast))
797 if (FD_ISSET(i, &ctx->exLast))
802 #endif /* USE_POLL */
804 sigprocmask(SIG_SETMASK, &sigs, NULL);
806 *tsp = evTimeSpec(tv);
813 evPollfdRealloc(evContext_p *ctx, int pollfd_chunk_size, int fd) {
816 void *pollfds, *fdTable;
818 if (fd < ctx->maxnfds)
821 /* Don't allow ridiculously small values for pollfd_chunk_size */
822 if (pollfd_chunk_size < 20)
823 pollfd_chunk_size = 20;
825 maxnfds = (1 + (fd/pollfd_chunk_size)) * pollfd_chunk_size;
827 pollfds = realloc(ctx->pollfds, maxnfds * sizeof(*ctx->pollfds));
829 ctx->pollfds = pollfds;
830 fdTable = realloc(ctx->fdTable, maxnfds * sizeof(*ctx->fdTable));
832 ctx->fdTable = fdTable;
834 if (pollfds == NULL || fdTable == NULL) {
835 evPrintf(ctx, 2, "pollfd() realloc (%ld) failed\n",
836 (long)maxnfds*sizeof(struct pollfd));
840 for (i = ctx->maxnfds; i < maxnfds; i++) {
841 ctx->pollfds[i].fd = -1;
842 ctx->pollfds[i].events = 0;
846 ctx->maxnfds = maxnfds;
851 /* Find the appropriate 'events' or 'revents' field in the pollfds array */
853 __fd_eventfield(int fd, __evEmulMask *maskp) {
855 evContext_p *ctx = (evContext_p *)maskp->ctx;
857 if (!maskp->result || maskp->type == EV_WASNONBLOCKING)
858 return (&(ctx->pollfds[fd].events));
860 return (&(ctx->pollfds[fd].revents));
863 /* Translate to poll(2) event */
865 __poll_event(__evEmulMask *maskp) {
867 switch ((maskp)->type) {
873 return (POLLRDBAND | POLLPRI | POLLWRBAND);
874 case EV_WASNONBLOCKING:
882 * Clear the events corresponding to the specified mask. If this leaves
883 * the events mask empty (apart from the POLLHUP bit), set the fd field
884 * to -1 so that poll(2) will ignore this fd.
887 __fd_clr(int fd, __evEmulMask *maskp) {
889 evContext_p *ctx = maskp->ctx;
891 *__fd_eventfield(fd, maskp) &= ~__poll_event(maskp);
892 if ((ctx->pollfds[fd].events & ~POLLHUP) == 0) {
893 ctx->pollfds[fd].fd = -1;
894 if (fd == ctx->fdMax)
895 while (ctx->fdMax > ctx->firstfd &&
896 ctx->pollfds[ctx->fdMax].fd < 0)
898 if (fd == ctx->firstfd)
899 while (ctx->firstfd <= ctx->fdMax &&
900 ctx->pollfds[ctx->firstfd].fd < 0)
903 * Do we have a empty set of descriptors?
905 if (ctx->firstfd > ctx->fdMax) {
913 * Set the events bit(s) corresponding to the specified mask. If the events
914 * field has any other bits than POLLHUP set, also set the fd field so that
915 * poll(2) will watch this fd.
918 __fd_set(int fd, __evEmulMask *maskp) {
920 evContext_p *ctx = maskp->ctx;
922 *__fd_eventfield(fd, maskp) |= __poll_event(maskp);
923 if ((ctx->pollfds[fd].events & ~POLLHUP) != 0) {
924 ctx->pollfds[fd].fd = fd;
925 if (fd < ctx->firstfd || ctx->fdMax == -1)
931 #endif /* USE_POLL */