2 * Copyright (C) 2004, 2006 Internet Systems Consortium, Inc. ("ISC")
3 * Copyright (C) 1999-2003 Internet Software Consortium.
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15 * PERFORMANCE OF THIS SOFTWARE.
18 /* $Id: dispatch.c,v 1.101.2.6.2.13.6.4 2007/06/27 04:19:50 marka Exp $ */
23 #include <sys/types.h>
26 #include <isc/entropy.h>
28 #include <isc/mutex.h>
29 #include <isc/print.h>
30 #include <isc/random.h>
31 #include <isc/string.h>
37 #include <dns/dispatch.h>
38 #include <dns/events.h>
40 #include <dns/message.h>
41 #include <dns/portlist.h>
42 #include <dns/tcpmsg.h>
43 #include <dns/types.h>
45 typedef ISC_LIST(dns_dispentry_t) dns_displist_t;
47 typedef struct dns_qid {
49 unsigned int qid_nbuckets; /* hash table size */
50 unsigned int qid_increment; /* id increment on collision */
52 dns_displist_t *qid_table; /* the table itself */
55 /* ARC4 Random generator state */
56 typedef struct arc4ctx {
63 struct dns_dispatchmgr {
68 dns_portlist_t *portlist;
70 /* Locked by "lock". */
73 ISC_LIST(dns_dispatch_t) list;
75 /* Locked by arc4_lock. */
76 isc_mutex_t arc4_lock;
77 arc4ctx_t arc4ctx; /*%< ARC4 context for QID */
79 /* locked by buffer lock */
81 isc_mutex_t buffer_lock;
82 unsigned int buffers; /* allocated buffers */
83 unsigned int buffersize; /* size of each buffer */
84 unsigned int maxbuffers; /* max buffers */
86 /* Locked internally. */
87 isc_mutex_t pool_lock;
88 isc_mempool_t *epool; /* memory pool for events */
89 isc_mempool_t *rpool; /* memory pool for replies */
90 isc_mempool_t *dpool; /* dispatch allocations */
91 isc_mempool_t *bpool; /* memory pool for buffers */
93 isc_entropy_t *entropy; /* entropy source */
96 #define MGR_SHUTTINGDOWN 0x00000001U
97 #define MGR_IS_SHUTTINGDOWN(l) (((l)->state & MGR_SHUTTINGDOWN) != 0)
99 #define IS_PRIVATE(d) (((d)->attributes & DNS_DISPATCHATTR_PRIVATE) != 0)
101 struct dns_dispentry {
103 dns_dispatch_t *disp;
109 isc_taskaction_t action;
111 isc_boolean_t item_out;
112 ISC_LIST(dns_dispatchevent_t) items;
113 ISC_LINK(dns_dispentry_t) link;
116 #define INVALID_BUCKET (0xffffdead)
118 struct dns_dispatch {
120 unsigned int magic; /* magic */
121 dns_dispatchmgr_t *mgr; /* dispatch manager */
122 isc_task_t *task; /* internal task */
123 isc_socket_t *socket; /* isc socket attached to */
124 isc_sockaddr_t local; /* local address */
125 in_port_t localport; /* local UDP port */
126 unsigned int maxrequests; /* max requests */
127 isc_event_t *ctlevent;
129 /* Locked by mgr->lock. */
130 ISC_LINK(dns_dispatch_t) link;
132 /* Locked by "lock". */
133 isc_mutex_t lock; /* locks all below */
134 isc_sockettype_t socktype;
135 unsigned int attributes;
136 unsigned int refcount; /* number of users */
137 dns_dispatchevent_t *failsafe_ev; /* failsafe cancel event */
138 unsigned int shutting_down : 1,
142 recv_pending : 1; /* is a recv() pending? */
143 isc_result_t shutdown_why;
144 unsigned int requests; /* how many requests we have */
145 unsigned int tcpbuffers; /* allocated buffers */
146 dns_tcpmsg_t tcpmsg; /* for tcp streams */
150 #define QID_MAGIC ISC_MAGIC('Q', 'i', 'd', ' ')
151 #define VALID_QID(e) ISC_MAGIC_VALID((e), QID_MAGIC)
153 #define RESPONSE_MAGIC ISC_MAGIC('D', 'r', 's', 'p')
154 #define VALID_RESPONSE(e) ISC_MAGIC_VALID((e), RESPONSE_MAGIC)
156 #define DISPATCH_MAGIC ISC_MAGIC('D', 'i', 's', 'p')
157 #define VALID_DISPATCH(e) ISC_MAGIC_VALID((e), DISPATCH_MAGIC)
159 #define DNS_DISPATCHMGR_MAGIC ISC_MAGIC('D', 'M', 'g', 'r')
160 #define VALID_DISPATCHMGR(e) ISC_MAGIC_VALID((e), DNS_DISPATCHMGR_MAGIC)
162 #define DNS_QID(disp) ((disp)->socktype == isc_sockettype_tcp) ? \
163 (disp)->qid : (disp)->mgr->qid
167 static dns_dispentry_t *bucket_search(dns_qid_t *, isc_sockaddr_t *,
168 dns_messageid_t, in_port_t, unsigned int);
169 static isc_boolean_t destroy_disp_ok(dns_dispatch_t *);
170 static void destroy_disp(isc_task_t *task, isc_event_t *event);
171 static void udp_recv(isc_task_t *, isc_event_t *);
172 static void tcp_recv(isc_task_t *, isc_event_t *);
173 static void startrecv(dns_dispatch_t *);
174 static isc_uint32_t dns_hash(dns_qid_t *, isc_sockaddr_t *, dns_messageid_t,
176 static void free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len);
177 static void *allocate_udp_buffer(dns_dispatch_t *disp);
178 static inline void free_event(dns_dispatch_t *disp, dns_dispatchevent_t *ev);
179 static inline dns_dispatchevent_t *allocate_event(dns_dispatch_t *disp);
180 static void do_cancel(dns_dispatch_t *disp);
181 static dns_dispentry_t *linear_first(dns_qid_t *disp);
182 static dns_dispentry_t *linear_next(dns_qid_t *disp,
183 dns_dispentry_t *resp);
184 static void dispatch_free(dns_dispatch_t **dispp);
185 static isc_result_t dispatch_createudp(dns_dispatchmgr_t *mgr,
186 isc_socketmgr_t *sockmgr,
187 isc_taskmgr_t *taskmgr,
188 isc_sockaddr_t *localaddr,
189 unsigned int maxrequests,
190 unsigned int attributes,
191 dns_dispatch_t **dispp);
192 static isc_boolean_t destroy_mgr_ok(dns_dispatchmgr_t *mgr);
193 static void destroy_mgr(dns_dispatchmgr_t **mgrp);
194 static isc_result_t qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets,
195 unsigned int increment, dns_qid_t **qidp);
196 static void qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp);
198 #define LVL(x) ISC_LOG_DEBUG(x)
201 mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...)
202 ISC_FORMAT_PRINTF(3, 4);
205 mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...) {
209 if (! isc_log_wouldlog(dns_lctx, level))
213 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
216 isc_log_write(dns_lctx,
217 DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH,
218 level, "dispatchmgr %p: %s", mgr, msgbuf);
222 dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...)
223 ISC_FORMAT_PRINTF(3, 4);
226 dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...) {
230 if (! isc_log_wouldlog(dns_lctx, level))
234 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
237 isc_log_write(dns_lctx,
238 DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH,
239 level, "dispatch %p: %s", disp, msgbuf);
243 request_log(dns_dispatch_t *disp, dns_dispentry_t *resp,
244 int level, const char *fmt, ...)
245 ISC_FORMAT_PRINTF(4, 5);
248 request_log(dns_dispatch_t *disp, dns_dispentry_t *resp,
249 int level, const char *fmt, ...)
255 if (! isc_log_wouldlog(dns_lctx, level))
259 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
262 if (VALID_RESPONSE(resp)) {
263 isc_sockaddr_format(&resp->host, peerbuf, sizeof(peerbuf));
264 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
265 DNS_LOGMODULE_DISPATCH, level,
266 "dispatch %p response %p %s: %s", disp, resp,
269 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
270 DNS_LOGMODULE_DISPATCH, level,
271 "dispatch %p req/resp %p: %s", disp, resp,
277 * ARC4 random number generator obtained from OpenBSD
280 dispatch_arc4init(arc4ctx_t *actx) {
282 for (n = 0; n < 256; n++)
290 dispatch_arc4addrandom(arc4ctx_t *actx, unsigned char *dat, int datlen) {
295 for (n = 0; n < 256; n++) {
296 actx->i = (actx->i + 1);
297 si = actx->s[actx->i];
298 actx->j = (actx->j + si + dat[n % datlen]);
299 actx->s[actx->i] = actx->s[actx->j];
300 actx->s[actx->j] = si;
305 static inline isc_uint8_t
306 dispatch_arc4get8(arc4ctx_t *actx) {
309 actx->i = (actx->i + 1);
310 si = actx->s[actx->i];
311 actx->j = (actx->j + si);
312 sj = actx->s[actx->j];
313 actx->s[actx->i] = sj;
314 actx->s[actx->j] = si;
316 return (actx->s[(si + sj) & 0xff]);
319 static inline isc_uint16_t
320 dispatch_arc4get16(arc4ctx_t *actx) {
323 val = dispatch_arc4get8(actx) << 8;
324 val |= dispatch_arc4get8(actx);
330 dispatch_arc4stir(dns_dispatchmgr_t *mgr) {
333 unsigned char rnd[128];
334 isc_uint32_t rnd32[32];
338 if (mgr->entropy != NULL) {
340 * We accept any quality of random data to avoid blocking.
342 result = isc_entropy_getdata(mgr->entropy, rnd.rnd,
343 sizeof(rnd), NULL, 0);
344 RUNTIME_CHECK(result == ISC_R_SUCCESS);
346 for (i = 0; i < 32; i++)
347 isc_random_get(&rnd.rnd32[i]);
349 dispatch_arc4addrandom(&mgr->arc4ctx, rnd.rnd, sizeof(rnd.rnd));
352 * Discard early keystream, as per recommendations in:
353 * http://www.wisdom.weizmann.ac.il/~itsik/RC4/Papers/Rc4_ksa.ps
355 for (i = 0; i < 256; i++)
356 (void)dispatch_arc4get8(&mgr->arc4ctx);
359 * Derived from OpenBSD's implementation. The rationale is not clear,
360 * but should be conservative enough in safety, and reasonably large
363 mgr->arc4ctx.count = 1600000;
367 dispatch_arc4random(dns_dispatchmgr_t *mgr) {
370 LOCK(&mgr->arc4_lock);
371 mgr->arc4ctx.count -= sizeof(isc_uint16_t);
372 if (mgr->arc4ctx.count <= 0)
373 dispatch_arc4stir(mgr);
374 result = dispatch_arc4get16(&mgr->arc4ctx);
375 UNLOCK(&mgr->arc4_lock);
380 dispatch_arc4uniformrandom(dns_dispatchmgr_t *mgr, isc_uint16_t upper_bound) {
382 /* The caller must hold the manager lock. */
388 * Ensure the range of random numbers [min, 0xffff] be a multiple of
389 * upper_bound and contain at least a half of the 16 bit range.
392 if (upper_bound > 0x8000)
393 min = 1 + ~upper_bound; /* 0x8000 - upper_bound */
395 min = (isc_uint16_t)(0x10000 % (isc_uint32_t)upper_bound);
398 * This could theoretically loop forever but each retry has
399 * p > 0.5 (worst case, usually far better) of selecting a
400 * number inside the range we need, so it should rarely need
404 r = dispatch_arc4random(mgr);
409 return (r % upper_bound);
413 * Return a hash of the destination and message id.
416 dns_hash(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id,
421 ret = isc_sockaddr_hash(dest, ISC_TRUE);
422 ret ^= (id << 16) | port;
423 ret %= qid->qid_nbuckets;
425 INSIST(ret < qid->qid_nbuckets);
431 * Find the first entry in 'qid'. Returns NULL if there are no entries.
433 static dns_dispentry_t *
434 linear_first(dns_qid_t *qid) {
435 dns_dispentry_t *ret;
440 while (bucket < qid->qid_nbuckets) {
441 ret = ISC_LIST_HEAD(qid->qid_table[bucket]);
451 * Find the next entry after 'resp' in 'qid'. Return NULL if there are
454 static dns_dispentry_t *
455 linear_next(dns_qid_t *qid, dns_dispentry_t *resp) {
456 dns_dispentry_t *ret;
459 ret = ISC_LIST_NEXT(resp, link);
463 bucket = resp->bucket;
465 while (bucket < qid->qid_nbuckets) {
466 ret = ISC_LIST_HEAD(qid->qid_table[bucket]);
476 * The dispatch must be locked.
479 destroy_disp_ok(dns_dispatch_t *disp)
481 if (disp->refcount != 0)
484 if (disp->recv_pending != 0)
487 if (disp->shutting_down == 0)
495 * Called when refcount reaches 0 (and safe to destroy).
497 * The dispatcher must not be locked.
498 * The manager must be locked.
501 destroy_disp(isc_task_t *task, isc_event_t *event) {
502 dns_dispatch_t *disp;
503 dns_dispatchmgr_t *mgr;
504 isc_boolean_t killmgr;
506 INSIST(event->ev_type == DNS_EVENT_DISPATCHCONTROL);
510 disp = event->ev_arg;
514 ISC_LIST_UNLINK(mgr->list, disp, link);
516 dispatch_log(disp, LVL(90),
517 "shutting down; detaching from sock %p, task %p",
518 disp->socket, disp->task);
520 isc_socket_detach(&disp->socket);
521 isc_task_detach(&disp->task);
522 isc_event_free(&event);
524 dispatch_free(&disp);
526 killmgr = destroy_mgr_ok(mgr);
534 * Find an entry for query ID 'id' and socket address 'dest' in 'qid'.
535 * Return NULL if no such entry exists.
537 static dns_dispentry_t *
538 bucket_search(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id,
539 in_port_t port, unsigned int bucket)
541 dns_dispentry_t *res;
543 REQUIRE(bucket < qid->qid_nbuckets);
545 res = ISC_LIST_HEAD(qid->qid_table[bucket]);
547 while (res != NULL) {
548 if ((res->id == id) && isc_sockaddr_equal(dest, &res->host) &&
552 res = ISC_LIST_NEXT(res, link);
559 free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len) {
560 INSIST(buf != NULL && len != 0);
563 switch (disp->socktype) {
564 case isc_sockettype_tcp:
565 INSIST(disp->tcpbuffers > 0);
567 isc_mem_put(disp->mgr->mctx, buf, len);
569 case isc_sockettype_udp:
570 LOCK(&disp->mgr->buffer_lock);
571 INSIST(disp->mgr->buffers > 0);
572 INSIST(len == disp->mgr->buffersize);
573 disp->mgr->buffers--;
574 isc_mempool_put(disp->mgr->bpool, buf);
575 UNLOCK(&disp->mgr->buffer_lock);
584 allocate_udp_buffer(dns_dispatch_t *disp) {
587 LOCK(&disp->mgr->buffer_lock);
588 temp = isc_mempool_get(disp->mgr->bpool);
591 disp->mgr->buffers++;
592 UNLOCK(&disp->mgr->buffer_lock);
598 free_event(dns_dispatch_t *disp, dns_dispatchevent_t *ev) {
599 if (disp->failsafe_ev == ev) {
600 INSIST(disp->shutdown_out == 1);
601 disp->shutdown_out = 0;
606 isc_mempool_put(disp->mgr->epool, ev);
609 static inline dns_dispatchevent_t *
610 allocate_event(dns_dispatch_t *disp) {
611 dns_dispatchevent_t *ev;
613 ev = isc_mempool_get(disp->mgr->epool);
616 ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, 0,
617 NULL, NULL, NULL, NULL, NULL);
625 * If I/O result == CANCELED or error, free the buffer.
627 * If query, free the buffer, restart.
630 * Allocate event, fill in details.
631 * If cannot allocate, free buffer, restart.
632 * find target. If not found, free buffer, restart.
633 * if event queue is not empty, queue. else, send.
637 udp_recv(isc_task_t *task, isc_event_t *ev_in) {
638 isc_socketevent_t *ev = (isc_socketevent_t *)ev_in;
639 dns_dispatch_t *disp = ev_in->ev_arg;
644 dns_dispentry_t *resp;
645 dns_dispatchevent_t *rev;
647 isc_boolean_t killit;
648 isc_boolean_t queue_response;
649 dns_dispatchmgr_t *mgr;
651 isc_netaddr_t netaddr;
661 dispatch_log(disp, LVL(90),
662 "got packet: requests %d, buffers %d, recvs %d",
663 disp->requests, disp->mgr->buffers, disp->recv_pending);
665 if (ev->ev_type == ISC_SOCKEVENT_RECVDONE) {
667 * Unless the receive event was imported from a listening
668 * interface, in which case the event type is
669 * DNS_EVENT_IMPORTRECVDONE, receive operation must be pending.
671 INSIST(disp->recv_pending != 0);
672 disp->recv_pending = 0;
675 if (disp->shutting_down) {
677 * This dispatcher is shutting down.
679 free_buffer(disp, ev->region.base, ev->region.length);
681 isc_event_free(&ev_in);
684 killit = destroy_disp_ok(disp);
687 isc_task_send(disp->task, &disp->ctlevent);
692 if (ev->result != ISC_R_SUCCESS) {
693 free_buffer(disp, ev->region.base, ev->region.length);
695 if (ev->result != ISC_R_CANCELED)
696 dispatch_log(disp, ISC_LOG_ERROR,
697 "odd socket result in udp_recv(): %s",
698 isc_result_totext(ev->result));
701 isc_event_free(&ev_in);
706 * If this is from a blackholed address, drop it.
708 isc_netaddr_fromsockaddr(&netaddr, &ev->address);
709 if (disp->mgr->blackhole != NULL &&
710 dns_acl_match(&netaddr, NULL, disp->mgr->blackhole,
711 NULL, &match, NULL) == ISC_R_SUCCESS &&
714 if (isc_log_wouldlog(dns_lctx, LVL(10))) {
715 char netaddrstr[ISC_NETADDR_FORMATSIZE];
716 isc_netaddr_format(&netaddr, netaddrstr,
718 dispatch_log(disp, LVL(10),
719 "blackholed packet from %s",
722 free_buffer(disp, ev->region.base, ev->region.length);
727 * Peek into the buffer to see what we can see.
729 isc_buffer_init(&source, ev->region.base, ev->region.length);
730 isc_buffer_add(&source, ev->n);
731 dres = dns_message_peekheader(&source, &id, &flags);
732 if (dres != ISC_R_SUCCESS) {
733 free_buffer(disp, ev->region.base, ev->region.length);
734 dispatch_log(disp, LVL(10), "got garbage packet");
738 dispatch_log(disp, LVL(92),
739 "got valid DNS message header, /QR %c, id %u",
740 ((flags & DNS_MESSAGEFLAG_QR) ? '1' : '0'), id);
743 * Look at flags. If query, drop it. If response,
744 * look to see where it goes.
746 queue_response = ISC_FALSE;
747 if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
749 free_buffer(disp, ev->region.base, ev->region.length);
754 bucket = dns_hash(qid, &ev->address, id, disp->localport);
756 resp = bucket_search(qid, &ev->address, id, disp->localport, bucket);
757 dispatch_log(disp, LVL(90),
758 "search for response in bucket %d: %s",
759 bucket, (resp == NULL ? "not found" : "found"));
762 free_buffer(disp, ev->region.base, ev->region.length);
767 * Now that we have the original dispatch the query was sent
768 * from check that the address and port the response was
769 * sent to make sense.
771 if (disp != resp->disp) {
776 * Check that the socket types and ports match.
778 if (disp->socktype != resp->disp->socktype ||
779 isc_sockaddr_getport(&disp->local) !=
780 isc_sockaddr_getport(&resp->disp->local)) {
781 free_buffer(disp, ev->region.base, ev->region.length);
786 * If both dispatches are bound to an address then fail as
787 * the addresses can't be equal (enforced by the IP stack).
789 * Note under Linux a packet can be sent out via IPv4 socket
790 * and the response be received via a IPv6 socket.
792 * Requests sent out via IPv6 should always come back in
795 if (isc_sockaddr_pf(&resp->disp->local) == PF_INET6 &&
796 isc_sockaddr_pf(&disp->local) != PF_INET6) {
797 free_buffer(disp, ev->region.base, ev->region.length);
800 isc_sockaddr_anyofpf(&a1, isc_sockaddr_pf(&resp->disp->local));
801 isc_sockaddr_anyofpf(&a2, isc_sockaddr_pf(&disp->local));
802 if (!isc_sockaddr_eqaddr(&a1, &resp->disp->local) &&
803 !isc_sockaddr_eqaddr(&a2, &disp->local)) {
804 free_buffer(disp, ev->region.base, ev->region.length);
809 queue_response = resp->item_out;
810 rev = allocate_event(resp->disp);
812 free_buffer(disp, ev->region.base, ev->region.length);
817 * At this point, rev contains the event we want to fill in, and
818 * resp contains the information on the place to send it to.
819 * Send the event off.
821 isc_buffer_init(&rev->buffer, ev->region.base, ev->region.length);
822 isc_buffer_add(&rev->buffer, ev->n);
823 rev->result = ISC_R_SUCCESS;
825 rev->addr = ev->address;
826 rev->pktinfo = ev->pktinfo;
827 rev->attributes = ev->attributes;
828 if (queue_response) {
829 ISC_LIST_APPEND(resp->items, rev, ev_link);
831 ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL,
833 resp->action, resp->arg, resp, NULL, NULL);
834 request_log(disp, resp, LVL(90),
835 "[a] Sent event %p buffer %p len %d to task %p",
836 rev, rev->buffer.base, rev->buffer.length,
838 resp->item_out = ISC_TRUE;
839 isc_task_send(resp->task, ISC_EVENT_PTR(&rev));
845 * Restart recv() to get the next packet.
852 isc_event_free(&ev_in);
858 * If I/O result == CANCELED, EOF, or error, notify everyone as the
859 * various queues drain.
864 * Allocate event, fill in details.
865 * If cannot allocate, restart.
866 * find target. If not found, restart.
867 * if event queue is not empty, queue. else, send.
871 tcp_recv(isc_task_t *task, isc_event_t *ev_in) {
872 dns_dispatch_t *disp = ev_in->ev_arg;
873 dns_tcpmsg_t *tcpmsg = &disp->tcpmsg;
877 dns_dispentry_t *resp;
878 dns_dispatchevent_t *rev;
880 isc_boolean_t killit;
881 isc_boolean_t queue_response;
884 char buf[ISC_SOCKADDR_FORMATSIZE];
888 REQUIRE(VALID_DISPATCH(disp));
892 dispatch_log(disp, LVL(90),
893 "got TCP packet: requests %d, buffers %d, recvs %d",
894 disp->requests, disp->tcpbuffers, disp->recv_pending);
898 INSIST(disp->recv_pending != 0);
899 disp->recv_pending = 0;
901 if (disp->refcount == 0) {
903 * This dispatcher is shutting down. Force cancelation.
905 tcpmsg->result = ISC_R_CANCELED;
908 if (tcpmsg->result != ISC_R_SUCCESS) {
909 switch (tcpmsg->result) {
914 dispatch_log(disp, LVL(90), "shutting down on EOF");
918 case ISC_R_CONNECTIONRESET:
919 level = ISC_LOG_INFO;
923 level = ISC_LOG_ERROR;
925 isc_sockaddr_format(&tcpmsg->address, buf, sizeof(buf));
926 dispatch_log(disp, level, "shutting down due to TCP "
927 "receive error: %s: %s", buf,
928 isc_result_totext(tcpmsg->result));
934 * The event is statically allocated in the tcpmsg
935 * structure, and destroy_disp() frees the tcpmsg, so we must
936 * free the event *before* calling destroy_disp().
938 isc_event_free(&ev_in);
940 disp->shutting_down = 1;
941 disp->shutdown_why = tcpmsg->result;
944 * If the recv() was canceled pass the word on.
946 killit = destroy_disp_ok(disp);
949 isc_task_send(disp->task, &disp->ctlevent);
953 dispatch_log(disp, LVL(90), "result %d, length == %d, addr = %p",
955 tcpmsg->buffer.length, tcpmsg->buffer.base);
958 * Peek into the buffer to see what we can see.
960 dres = dns_message_peekheader(&tcpmsg->buffer, &id, &flags);
961 if (dres != ISC_R_SUCCESS) {
962 dispatch_log(disp, LVL(10), "got garbage packet");
966 dispatch_log(disp, LVL(92),
967 "got valid DNS message header, /QR %c, id %u",
968 ((flags & DNS_MESSAGEFLAG_QR) ? '1' : '0'), id);
971 * Allocate an event to send to the query or response client, and
972 * allocate a new buffer for our use.
976 * Look at flags. If query, drop it. If response,
977 * look to see where it goes.
979 queue_response = ISC_FALSE;
980 if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
990 bucket = dns_hash(qid, &tcpmsg->address, id, disp->localport);
992 resp = bucket_search(qid, &tcpmsg->address, id, disp->localport,
994 dispatch_log(disp, LVL(90),
995 "search for response in bucket %d: %s",
996 bucket, (resp == NULL ? "not found" : "found"));
1000 queue_response = resp->item_out;
1001 rev = allocate_event(disp);
1006 * At this point, rev contains the event we want to fill in, and
1007 * resp contains the information on the place to send it to.
1008 * Send the event off.
1010 dns_tcpmsg_keepbuffer(tcpmsg, &rev->buffer);
1012 rev->result = ISC_R_SUCCESS;
1014 rev->addr = tcpmsg->address;
1015 if (queue_response) {
1016 ISC_LIST_APPEND(resp->items, rev, ev_link);
1018 ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL, DNS_EVENT_DISPATCH,
1019 resp->action, resp->arg, resp, NULL, NULL);
1020 request_log(disp, resp, LVL(90),
1021 "[b] Sent event %p buffer %p len %d to task %p",
1022 rev, rev->buffer.base, rev->buffer.length,
1024 resp->item_out = ISC_TRUE;
1025 isc_task_send(resp->task, ISC_EVENT_PTR(&rev));
1031 * Restart recv() to get the next packet.
1036 UNLOCK(&disp->lock);
1038 isc_event_free(&ev_in);
1042 * disp must be locked.
1045 startrecv(dns_dispatch_t *disp) {
1047 isc_region_t region;
1049 if (disp->shutting_down == 1)
1052 if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0)
1055 if (disp->recv_pending != 0)
1058 if (disp->mgr->buffers >= disp->mgr->maxbuffers)
1061 switch (disp->socktype) {
1063 * UDP reads are always maximal.
1065 case isc_sockettype_udp:
1066 region.length = disp->mgr->buffersize;
1067 region.base = allocate_udp_buffer(disp);
1068 if (region.base == NULL)
1070 res = isc_socket_recv(disp->socket, ®ion, 1,
1071 disp->task, udp_recv, disp);
1072 if (res != ISC_R_SUCCESS) {
1073 free_buffer(disp, region.base, region.length);
1074 disp->shutdown_why = res;
1075 disp->shutting_down = 1;
1079 INSIST(disp->recv_pending == 0);
1080 disp->recv_pending = 1;
1083 case isc_sockettype_tcp:
1084 res = dns_tcpmsg_readmessage(&disp->tcpmsg, disp->task,
1086 if (res != ISC_R_SUCCESS) {
1087 disp->shutdown_why = res;
1088 disp->shutting_down = 1;
1092 INSIST(disp->recv_pending == 0);
1093 disp->recv_pending = 1;
1099 * Mgr must be locked when calling this function.
1101 static isc_boolean_t
1102 destroy_mgr_ok(dns_dispatchmgr_t *mgr) {
1103 mgr_log(mgr, LVL(90),
1104 "destroy_mgr_ok: shuttingdown=%d, listnonempty=%d, "
1105 "epool=%d, rpool=%d, dpool=%d",
1106 MGR_IS_SHUTTINGDOWN(mgr), !ISC_LIST_EMPTY(mgr->list),
1107 isc_mempool_getallocated(mgr->epool),
1108 isc_mempool_getallocated(mgr->rpool),
1109 isc_mempool_getallocated(mgr->dpool));
1110 if (!MGR_IS_SHUTTINGDOWN(mgr))
1112 if (!ISC_LIST_EMPTY(mgr->list))
1114 if (isc_mempool_getallocated(mgr->epool) != 0)
1116 if (isc_mempool_getallocated(mgr->rpool) != 0)
1118 if (isc_mempool_getallocated(mgr->dpool) != 0)
1125 * Mgr must be unlocked when calling this function.
1128 destroy_mgr(dns_dispatchmgr_t **mgrp) {
1130 dns_dispatchmgr_t *mgr;
1139 DESTROYLOCK(&mgr->lock);
1142 DESTROYLOCK(&mgr->arc4_lock);
1144 isc_mempool_destroy(&mgr->epool);
1145 isc_mempool_destroy(&mgr->rpool);
1146 isc_mempool_destroy(&mgr->dpool);
1147 isc_mempool_destroy(&mgr->bpool);
1149 DESTROYLOCK(&mgr->pool_lock);
1151 if (mgr->entropy != NULL)
1152 isc_entropy_detach(&mgr->entropy);
1153 if (mgr->qid != NULL)
1154 qid_destroy(mctx, &mgr->qid);
1156 DESTROYLOCK(&mgr->buffer_lock);
1158 if (mgr->blackhole != NULL)
1159 dns_acl_detach(&mgr->blackhole);
1161 if (mgr->portlist != NULL)
1162 dns_portlist_detach(&mgr->portlist);
1164 isc_mem_put(mctx, mgr, sizeof(dns_dispatchmgr_t));
1165 isc_mem_detach(&mctx);
1169 create_socket(isc_socketmgr_t *mgr, isc_sockaddr_t *local,
1170 isc_socket_t **sockp)
1173 isc_result_t result;
1176 result = isc_socket_create(mgr, isc_sockaddr_pf(local),
1177 isc_sockettype_udp, &sock);
1178 if (result != ISC_R_SUCCESS)
1181 #ifndef ISC_ALLOW_MAPPED
1182 isc_socket_ipv6only(sock, ISC_TRUE);
1184 result = isc_socket_bind(sock, local);
1185 if (result != ISC_R_SUCCESS) {
1186 isc_socket_detach(&sock);
1191 return (ISC_R_SUCCESS);
1199 dns_dispatchmgr_create(isc_mem_t *mctx, isc_entropy_t *entropy,
1200 dns_dispatchmgr_t **mgrp)
1202 dns_dispatchmgr_t *mgr;
1203 isc_result_t result;
1205 REQUIRE(mctx != NULL);
1206 REQUIRE(mgrp != NULL && *mgrp == NULL);
1208 mgr = isc_mem_get(mctx, sizeof(dns_dispatchmgr_t));
1210 return (ISC_R_NOMEMORY);
1213 isc_mem_attach(mctx, &mgr->mctx);
1215 mgr->blackhole = NULL;
1216 mgr->portlist = NULL;
1218 result = isc_mutex_init(&mgr->lock);
1219 if (result != ISC_R_SUCCESS)
1222 result = isc_mutex_init(&mgr->arc4_lock);
1223 if (result != ISC_R_SUCCESS)
1226 result = isc_mutex_init(&mgr->buffer_lock);
1227 if (result != ISC_R_SUCCESS)
1228 goto kill_arc4_lock;
1230 result = isc_mutex_init(&mgr->pool_lock);
1231 if (result != ISC_R_SUCCESS)
1232 goto kill_buffer_lock;
1235 if (isc_mempool_create(mgr->mctx, sizeof(dns_dispatchevent_t),
1236 &mgr->epool) != ISC_R_SUCCESS) {
1237 result = ISC_R_NOMEMORY;
1238 goto kill_pool_lock;
1242 if (isc_mempool_create(mgr->mctx, sizeof(dns_dispentry_t),
1243 &mgr->rpool) != ISC_R_SUCCESS) {
1244 result = ISC_R_NOMEMORY;
1249 if (isc_mempool_create(mgr->mctx, sizeof(dns_dispatch_t),
1250 &mgr->dpool) != ISC_R_SUCCESS) {
1251 result = ISC_R_NOMEMORY;
1255 isc_mempool_setname(mgr->epool, "dispmgr_epool");
1256 isc_mempool_setfreemax(mgr->epool, 1024);
1257 isc_mempool_associatelock(mgr->epool, &mgr->pool_lock);
1259 isc_mempool_setname(mgr->rpool, "dispmgr_rpool");
1260 isc_mempool_setfreemax(mgr->rpool, 1024);
1261 isc_mempool_associatelock(mgr->rpool, &mgr->pool_lock);
1263 isc_mempool_setname(mgr->dpool, "dispmgr_dpool");
1264 isc_mempool_setfreemax(mgr->dpool, 1024);
1265 isc_mempool_associatelock(mgr->dpool, &mgr->pool_lock);
1268 mgr->buffersize = 0;
1269 mgr->maxbuffers = 0;
1271 mgr->entropy = NULL;
1274 ISC_LIST_INIT(mgr->list);
1275 mgr->magic = DNS_DISPATCHMGR_MAGIC;
1277 if (entropy != NULL)
1278 isc_entropy_attach(entropy, &mgr->entropy);
1280 dispatch_arc4init(&mgr->arc4ctx);
1283 return (ISC_R_SUCCESS);
1286 isc_mempool_destroy(&mgr->rpool);
1288 isc_mempool_destroy(&mgr->epool);
1290 DESTROYLOCK(&mgr->pool_lock);
1292 DESTROYLOCK(&mgr->buffer_lock);
1294 DESTROYLOCK(&mgr->arc4_lock);
1296 DESTROYLOCK(&mgr->lock);
1298 isc_mem_put(mctx, mgr, sizeof(dns_dispatchmgr_t));
1299 isc_mem_detach(&mctx);
1305 dns_dispatchmgr_setblackhole(dns_dispatchmgr_t *mgr, dns_acl_t *blackhole) {
1306 REQUIRE(VALID_DISPATCHMGR(mgr));
1307 if (mgr->blackhole != NULL)
1308 dns_acl_detach(&mgr->blackhole);
1309 dns_acl_attach(blackhole, &mgr->blackhole);
1313 dns_dispatchmgr_getblackhole(dns_dispatchmgr_t *mgr) {
1314 REQUIRE(VALID_DISPATCHMGR(mgr));
1315 return (mgr->blackhole);
1319 dns_dispatchmgr_setblackportlist(dns_dispatchmgr_t *mgr,
1320 dns_portlist_t *portlist)
1322 REQUIRE(VALID_DISPATCHMGR(mgr));
1323 if (mgr->portlist != NULL)
1324 dns_portlist_detach(&mgr->portlist);
1325 if (portlist != NULL)
1326 dns_portlist_attach(portlist, &mgr->portlist);
1330 dns_dispatchmgr_getblackportlist(dns_dispatchmgr_t *mgr) {
1331 REQUIRE(VALID_DISPATCHMGR(mgr));
1332 return (mgr->portlist);
1336 dns_dispatchmgr_setudp(dns_dispatchmgr_t *mgr,
1337 unsigned int buffersize, unsigned int maxbuffers,
1338 unsigned int buckets, unsigned int increment)
1340 isc_result_t result;
1342 REQUIRE(VALID_DISPATCHMGR(mgr));
1343 REQUIRE(buffersize >= 512 && buffersize < (64 * 1024));
1344 REQUIRE(maxbuffers > 0);
1345 REQUIRE(buckets < 2097169); /* next prime > 65536 * 32 */
1346 REQUIRE(increment > buckets);
1349 * Keep some number of items around. This should be a config
1350 * option. For now, keep 8, but later keep at least two even
1351 * if the caller wants less. This allows us to ensure certain
1352 * things, like an event can be "freed" and the next allocation
1353 * will always succeed.
1355 * Note that if limits are placed on anything here, we use one
1356 * event internally, so the actual limit should be "wanted + 1."
1364 LOCK(&mgr->buffer_lock);
1365 if (mgr->bpool != NULL) {
1366 isc_mempool_setmaxalloc(mgr->bpool, maxbuffers);
1367 mgr->maxbuffers = maxbuffers;
1368 UNLOCK(&mgr->buffer_lock);
1369 return (ISC_R_SUCCESS);
1372 if (isc_mempool_create(mgr->mctx, buffersize,
1373 &mgr->bpool) != ISC_R_SUCCESS) {
1374 return (ISC_R_NOMEMORY);
1377 isc_mempool_setname(mgr->bpool, "dispmgr_bpool");
1378 isc_mempool_setmaxalloc(mgr->bpool, maxbuffers);
1379 isc_mempool_associatelock(mgr->bpool, &mgr->pool_lock);
1381 result = qid_allocate(mgr, buckets, increment, &mgr->qid);
1382 if (result != ISC_R_SUCCESS)
1385 mgr->buffersize = buffersize;
1386 mgr->maxbuffers = maxbuffers;
1387 UNLOCK(&mgr->buffer_lock);
1388 return (ISC_R_SUCCESS);
1391 isc_mempool_destroy(&mgr->bpool);
1392 UNLOCK(&mgr->buffer_lock);
1393 return (ISC_R_NOMEMORY);
1397 dns_dispatchmgr_destroy(dns_dispatchmgr_t **mgrp) {
1398 dns_dispatchmgr_t *mgr;
1399 isc_boolean_t killit;
1401 REQUIRE(mgrp != NULL);
1402 REQUIRE(VALID_DISPATCHMGR(*mgrp));
1408 mgr->state |= MGR_SHUTTINGDOWN;
1410 killit = destroy_mgr_ok(mgr);
1413 mgr_log(mgr, LVL(90), "destroy: killit=%d", killit);
1419 static isc_boolean_t
1420 blacklisted(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
1421 isc_sockaddr_t *sockaddrp)
1423 isc_sockaddr_t sockaddr;
1424 isc_result_t result;
1426 REQUIRE(sock != NULL || sockaddrp != NULL);
1428 if (mgr->portlist == NULL)
1432 sockaddrp = &sockaddr;
1433 result = isc_socket_getsockname(sock, sockaddrp);
1434 if (result != ISC_R_SUCCESS)
1438 if (mgr->portlist != NULL &&
1439 dns_portlist_match(mgr->portlist, isc_sockaddr_pf(sockaddrp),
1440 isc_sockaddr_getport(sockaddrp)))
1445 #define ATTRMATCH(_a1, _a2, _mask) (((_a1) & (_mask)) == ((_a2) & (_mask)))
1447 static isc_boolean_t
1448 local_addr_match(dns_dispatch_t *disp, isc_sockaddr_t *addr) {
1449 isc_sockaddr_t sockaddr;
1450 isc_result_t result;
1456 * Don't match wildcard ports against newly blacklisted ports.
1458 if (disp->mgr->portlist != NULL &&
1459 isc_sockaddr_getport(addr) == 0 &&
1460 isc_sockaddr_getport(&disp->local) == 0 &&
1461 blacklisted(disp->mgr, disp->socket, NULL))
1465 * Check if we match the binding <address,port>.
1466 * Wildcard ports match/fail here.
1468 if (isc_sockaddr_equal(&disp->local, addr))
1470 if (isc_sockaddr_getport(addr) == 0)
1474 * Check if we match a bound wildcard port <address,port>.
1476 if (!isc_sockaddr_eqaddr(&disp->local, addr))
1478 result = isc_socket_getsockname(disp->socket, &sockaddr);
1479 if (result != ISC_R_SUCCESS)
1482 return (isc_sockaddr_equal(&sockaddr, addr));
1486 * Requires mgr be locked.
1488 * No dispatcher can be locked by this thread when calling this function.
1492 * If a matching dispatcher is found, it is locked after this function
1493 * returns, and must be unlocked by the caller.
1496 dispatch_find(dns_dispatchmgr_t *mgr, isc_sockaddr_t *local,
1497 unsigned int attributes, unsigned int mask,
1498 dns_dispatch_t **dispp)
1500 dns_dispatch_t *disp;
1501 isc_result_t result;
1504 * Make certain that we will not match a private dispatch.
1506 attributes &= ~DNS_DISPATCHATTR_PRIVATE;
1507 mask |= DNS_DISPATCHATTR_PRIVATE;
1509 disp = ISC_LIST_HEAD(mgr->list);
1510 while (disp != NULL) {
1512 if ((disp->shutting_down == 0)
1513 && ATTRMATCH(disp->attributes, attributes, mask)
1514 && local_addr_match(disp, local))
1516 UNLOCK(&disp->lock);
1517 disp = ISC_LIST_NEXT(disp, link);
1521 result = ISC_R_NOTFOUND;
1526 result = ISC_R_SUCCESS;
1533 qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets,
1534 unsigned int increment, dns_qid_t **qidp)
1539 REQUIRE(VALID_DISPATCHMGR(mgr));
1540 REQUIRE(buckets < 2097169); /* next prime > 65536 * 32 */
1541 REQUIRE(increment > buckets);
1542 REQUIRE(qidp != NULL && *qidp == NULL);
1544 qid = isc_mem_get(mgr->mctx, sizeof(*qid));
1546 return (ISC_R_NOMEMORY);
1548 qid->qid_table = isc_mem_get(mgr->mctx,
1549 buckets * sizeof(dns_displist_t));
1550 if (qid->qid_table == NULL) {
1551 isc_mem_put(mgr->mctx, qid, sizeof(*qid));
1552 return (ISC_R_NOMEMORY);
1555 if (isc_mutex_init(&qid->lock) != ISC_R_SUCCESS) {
1556 UNEXPECTED_ERROR(__FILE__, __LINE__, "isc_mutex_init failed");
1557 isc_mem_put(mgr->mctx, qid->qid_table,
1558 buckets * sizeof(dns_displist_t));
1559 isc_mem_put(mgr->mctx, qid, sizeof(*qid));
1560 return (ISC_R_UNEXPECTED);
1563 for (i = 0; i < buckets; i++)
1564 ISC_LIST_INIT(qid->qid_table[i]);
1566 qid->qid_nbuckets = buckets;
1567 qid->qid_increment = increment;
1568 qid->magic = QID_MAGIC;
1570 return (ISC_R_SUCCESS);
1574 qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp) {
1577 REQUIRE(qidp != NULL);
1580 REQUIRE(VALID_QID(qid));
1584 isc_mem_put(mctx, qid->qid_table,
1585 qid->qid_nbuckets * sizeof(dns_displist_t));
1586 DESTROYLOCK(&qid->lock);
1587 isc_mem_put(mctx, qid, sizeof(*qid));
1591 * Allocate and set important limits.
1594 dispatch_allocate(dns_dispatchmgr_t *mgr, unsigned int maxrequests,
1595 dns_dispatch_t **dispp)
1597 dns_dispatch_t *disp;
1600 REQUIRE(VALID_DISPATCHMGR(mgr));
1601 REQUIRE(dispp != NULL && *dispp == NULL);
1604 * Set up the dispatcher, mostly. Don't bother setting some of
1605 * the options that are controlled by tcp vs. udp, etc.
1608 disp = isc_mempool_get(mgr->dpool);
1610 return (ISC_R_NOMEMORY);
1614 disp->maxrequests = maxrequests;
1615 disp->attributes = 0;
1616 ISC_LINK_INIT(disp, link);
1618 disp->recv_pending = 0;
1619 memset(&disp->local, 0, sizeof(disp->local));
1620 disp->localport = 0;
1621 disp->shutting_down = 0;
1622 disp->shutdown_out = 0;
1623 disp->connected = 0;
1624 disp->tcpmsg_valid = 0;
1625 disp->shutdown_why = ISC_R_UNEXPECTED;
1627 disp->tcpbuffers = 0;
1630 if (isc_mutex_init(&disp->lock) != ISC_R_SUCCESS) {
1631 res = ISC_R_UNEXPECTED;
1632 UNEXPECTED_ERROR(__FILE__, __LINE__, "isc_mutex_init failed");
1636 disp->failsafe_ev = allocate_event(disp);
1637 if (disp->failsafe_ev == NULL) {
1638 res = ISC_R_NOMEMORY;
1642 disp->magic = DISPATCH_MAGIC;
1645 return (ISC_R_SUCCESS);
1651 DESTROYLOCK(&disp->lock);
1653 isc_mempool_put(mgr->dpool, disp);
1660 * MUST be unlocked, and not used by anthing.
1663 dispatch_free(dns_dispatch_t **dispp)
1665 dns_dispatch_t *disp;
1666 dns_dispatchmgr_t *mgr;
1668 REQUIRE(VALID_DISPATCH(*dispp));
1673 REQUIRE(VALID_DISPATCHMGR(mgr));
1675 if (disp->tcpmsg_valid) {
1676 dns_tcpmsg_invalidate(&disp->tcpmsg);
1677 disp->tcpmsg_valid = 0;
1680 INSIST(disp->tcpbuffers == 0);
1681 INSIST(disp->requests == 0);
1682 INSIST(disp->recv_pending == 0);
1684 isc_mempool_put(mgr->epool, disp->failsafe_ev);
1685 disp->failsafe_ev = NULL;
1687 if (disp->qid != NULL)
1688 qid_destroy(mgr->mctx, &disp->qid);
1690 DESTROYLOCK(&disp->lock);
1692 isc_mempool_put(mgr->dpool, disp);
1696 dns_dispatch_createtcp(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
1697 isc_taskmgr_t *taskmgr, unsigned int buffersize,
1698 unsigned int maxbuffers, unsigned int maxrequests,
1699 unsigned int buckets, unsigned int increment,
1700 unsigned int attributes, dns_dispatch_t **dispp)
1702 isc_result_t result;
1703 dns_dispatch_t *disp;
1708 REQUIRE(VALID_DISPATCHMGR(mgr));
1709 REQUIRE(isc_socket_gettype(sock) == isc_sockettype_tcp);
1710 REQUIRE((attributes & DNS_DISPATCHATTR_TCP) != 0);
1711 REQUIRE((attributes & DNS_DISPATCHATTR_UDP) == 0);
1713 attributes |= DNS_DISPATCHATTR_PRIVATE; /* XXXMLG */
1718 * dispatch_allocate() checks mgr for us.
1719 * qid_allocate() checks buckets and increment for us.
1722 result = dispatch_allocate(mgr, maxrequests, &disp);
1723 if (result != ISC_R_SUCCESS) {
1728 result = qid_allocate(mgr, buckets, increment, &disp->qid);
1729 if (result != ISC_R_SUCCESS)
1730 goto deallocate_dispatch;
1732 disp->socktype = isc_sockettype_tcp;
1733 disp->socket = NULL;
1734 isc_socket_attach(sock, &disp->socket);
1737 result = isc_task_create(taskmgr, 0, &disp->task);
1738 if (result != ISC_R_SUCCESS)
1741 disp->ctlevent = isc_event_allocate(mgr->mctx, disp,
1742 DNS_EVENT_DISPATCHCONTROL,
1744 sizeof(isc_event_t));
1745 if (disp->ctlevent == NULL)
1748 isc_task_setname(disp->task, "tcpdispatch", disp);
1750 dns_tcpmsg_init(mgr->mctx, disp->socket, &disp->tcpmsg);
1751 disp->tcpmsg_valid = 1;
1753 disp->attributes = attributes;
1756 * Append it to the dispatcher list.
1758 ISC_LIST_APPEND(mgr->list, disp, link);
1761 mgr_log(mgr, LVL(90), "created TCP dispatcher %p", disp);
1762 dispatch_log(disp, LVL(90), "created task %p", disp->task);
1766 return (ISC_R_SUCCESS);
1772 isc_task_detach(&disp->task);
1774 isc_socket_detach(&disp->socket);
1775 deallocate_dispatch:
1776 dispatch_free(&disp);
1784 dns_dispatch_getudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr,
1785 isc_taskmgr_t *taskmgr, isc_sockaddr_t *localaddr,
1786 unsigned int buffersize,
1787 unsigned int maxbuffers, unsigned int maxrequests,
1788 unsigned int buckets, unsigned int increment,
1789 unsigned int attributes, unsigned int mask,
1790 dns_dispatch_t **dispp)
1792 isc_result_t result;
1793 dns_dispatch_t *disp = NULL;
1795 REQUIRE(VALID_DISPATCHMGR(mgr));
1796 REQUIRE(sockmgr != NULL);
1797 REQUIRE(localaddr != NULL);
1798 REQUIRE(taskmgr != NULL);
1799 REQUIRE(buffersize >= 512 && buffersize < (64 * 1024));
1800 REQUIRE(maxbuffers > 0);
1801 REQUIRE(buckets < 2097169); /* next prime > 65536 * 32 */
1802 REQUIRE(increment > buckets);
1803 REQUIRE(dispp != NULL && *dispp == NULL);
1804 REQUIRE((attributes & DNS_DISPATCHATTR_TCP) == 0);
1806 result = dns_dispatchmgr_setudp(mgr, buffersize, maxbuffers,
1807 buckets, increment);
1808 if (result != ISC_R_SUCCESS)
1813 if ((attributes & DNS_DISPATCHATTR_RANDOMPORT) != 0) {
1814 REQUIRE(isc_sockaddr_getport(localaddr) == 0);
1819 * First, see if we have a dispatcher that matches.
1822 result = dispatch_find(mgr, localaddr, attributes, mask, &disp);
1823 if (result == ISC_R_SUCCESS) {
1826 if (disp->maxrequests < maxrequests)
1827 disp->maxrequests = maxrequests;
1829 if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) == 0 &&
1830 (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0)
1832 disp->attributes |= DNS_DISPATCHATTR_NOLISTEN;
1833 if (disp->recv_pending != 0)
1834 isc_socket_cancel(disp->socket, disp->task,
1835 ISC_SOCKCANCEL_RECV);
1838 UNLOCK(&disp->lock);
1843 return (ISC_R_SUCCESS);
1850 result = dispatch_createudp(mgr, sockmgr, taskmgr, localaddr,
1851 maxrequests, attributes, &disp);
1852 if (result != ISC_R_SUCCESS) {
1859 return (ISC_R_SUCCESS);
1863 * mgr should be locked.
1866 #ifndef DNS_DISPATCH_HELD
1867 #define DNS_DISPATCH_HELD 20U
1871 dispatch_createudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr,
1872 isc_taskmgr_t *taskmgr,
1873 isc_sockaddr_t *localaddr,
1874 unsigned int maxrequests,
1875 unsigned int attributes,
1876 dns_dispatch_t **dispp)
1878 isc_result_t result;
1879 dns_dispatch_t *disp;
1880 isc_socket_t *sock = NULL;
1881 isc_socket_t *held[DNS_DISPATCH_HELD];
1882 unsigned int i = 0, j = 0, k = 0;
1883 isc_sockaddr_t localaddr_bound;
1884 in_port_t localport = 0;
1887 * dispatch_allocate() checks mgr for us.
1890 result = dispatch_allocate(mgr, maxrequests, &disp);
1891 if (result != ISC_R_SUCCESS)
1895 * Try to allocate a socket that is not on the blacklist.
1896 * Hold up to DNS_DISPATCH_HELD sockets to prevent the OS
1897 * from returning the same port to us too quickly.
1899 memset(held, 0, sizeof(held));
1900 localaddr_bound = *localaddr;
1902 if ((attributes & DNS_DISPATCHATTR_RANDOMPORT) != 0) {
1905 /* XXX: should the range be configurable? */
1906 prt = 1024 + dispatch_arc4uniformrandom(mgr, 65535 - 1023);
1907 isc_sockaddr_setport(&localaddr_bound, prt);
1908 if (blacklisted(mgr, NULL, &localaddr_bound)) {
1910 attributes &= ~DNS_DISPATCHATTR_RANDOMPORT;
1913 result = create_socket(sockmgr, &localaddr_bound, &sock);
1914 if (result == ISC_R_ADDRINUSE) {
1916 attributes &= ~DNS_DISPATCHATTR_RANDOMPORT;
1921 result = create_socket(sockmgr, localaddr, &sock);
1922 if (result != ISC_R_SUCCESS)
1923 goto deallocate_dispatch;
1924 if ((attributes & DNS_DISPATCHATTR_RANDOMPORT) == 0 &&
1925 isc_sockaddr_getport(localaddr) == 0 &&
1926 blacklisted(mgr, sock, NULL))
1928 if (held[i] != NULL)
1929 isc_socket_detach(&held[i]);
1932 if (i == DNS_DISPATCH_HELD)
1934 if (j++ == 0xffffU) {
1935 mgr_log(mgr, ISC_LOG_ERROR, "avoid-v%s-udp-ports: "
1936 "unable to allocate a non-blacklisted port",
1937 isc_sockaddr_pf(localaddr) == AF_INET ?
1939 result = ISC_R_FAILURE;
1940 goto deallocate_dispatch;
1945 disp->socktype = isc_sockettype_udp;
1946 disp->socket = sock;
1947 disp->local = *localaddr;
1948 disp->localport = localport;
1951 result = isc_task_create(taskmgr, 0, &disp->task);
1952 if (result != ISC_R_SUCCESS)
1955 disp->ctlevent = isc_event_allocate(mgr->mctx, disp,
1956 DNS_EVENT_DISPATCHCONTROL,
1958 sizeof(isc_event_t));
1959 if (disp->ctlevent == NULL)
1962 isc_task_setname(disp->task, "udpdispatch", disp);
1964 attributes &= ~DNS_DISPATCHATTR_TCP;
1965 attributes |= DNS_DISPATCHATTR_UDP;
1966 disp->attributes = attributes;
1969 * Append it to the dispatcher list.
1971 ISC_LIST_APPEND(mgr->list, disp, link);
1973 mgr_log(mgr, LVL(90), "created UDP dispatcher %p", disp);
1974 dispatch_log(disp, LVL(90), "created task %p", disp->task);
1975 dispatch_log(disp, LVL(90), "created socket %p", disp->socket);
1985 isc_task_detach(&disp->task);
1987 isc_socket_detach(&disp->socket);
1988 deallocate_dispatch:
1989 dispatch_free(&disp);
1991 for (i = 0; i < DNS_DISPATCH_HELD; i++)
1992 if (held[i] != NULL)
1993 isc_socket_detach(&held[i]);
1998 dns_dispatch_attach(dns_dispatch_t *disp, dns_dispatch_t **dispp) {
1999 REQUIRE(VALID_DISPATCH(disp));
2000 REQUIRE(dispp != NULL && *dispp == NULL);
2004 UNLOCK(&disp->lock);
2010 * It is important to lock the manager while we are deleting the dispatch,
2011 * since dns_dispatch_getudp will call dispatch_find, which returns to
2012 * the caller a dispatch but does not attach to it until later. _getudp
2013 * locks the manager, however, so locking it here will keep us from attaching
2014 * to a dispatcher that is in the process of going away.
2017 dns_dispatch_detach(dns_dispatch_t **dispp) {
2018 dns_dispatch_t *disp;
2019 isc_boolean_t killit;
2021 REQUIRE(dispp != NULL && VALID_DISPATCH(*dispp));
2028 INSIST(disp->refcount > 0);
2031 if (disp->refcount == 0) {
2032 if (disp->recv_pending > 0)
2033 isc_socket_cancel(disp->socket, disp->task,
2034 ISC_SOCKCANCEL_RECV);
2035 disp->shutting_down = 1;
2038 dispatch_log(disp, LVL(90), "detach: refcount %d", disp->refcount);
2040 killit = destroy_disp_ok(disp);
2041 UNLOCK(&disp->lock);
2043 isc_task_send(disp->task, &disp->ctlevent);
2047 dns_dispatch_addresponse(dns_dispatch_t *disp, isc_sockaddr_t *dest,
2048 isc_task_t *task, isc_taskaction_t action, void *arg,
2049 dns_messageid_t *idp, dns_dispentry_t **resp)
2051 dns_dispentry_t *res;
2052 unsigned int bucket;
2058 REQUIRE(VALID_DISPATCH(disp));
2059 REQUIRE(task != NULL);
2060 REQUIRE(dest != NULL);
2061 REQUIRE(resp != NULL && *resp == NULL);
2062 REQUIRE(idp != NULL);
2066 if (disp->shutting_down == 1) {
2067 UNLOCK(&disp->lock);
2068 return (ISC_R_SHUTTINGDOWN);
2071 if (disp->requests >= disp->maxrequests) {
2072 UNLOCK(&disp->lock);
2073 return (ISC_R_QUOTA);
2077 * Try somewhat hard to find an unique ID.
2079 id = (dns_messageid_t)dispatch_arc4random(disp->mgr);
2080 qid = DNS_QID(disp);
2082 bucket = dns_hash(qid, dest, id, disp->localport);
2084 for (i = 0; i < 64; i++) {
2085 if (bucket_search(qid, dest, id, disp->localport, bucket) ==
2090 id += qid->qid_increment;
2092 bucket = dns_hash(qid, dest, id, disp->localport);
2097 UNLOCK(&disp->lock);
2098 return (ISC_R_NOMORE);
2101 res = isc_mempool_get(disp->mgr->rpool);
2104 UNLOCK(&disp->lock);
2105 return (ISC_R_NOMEMORY);
2111 isc_task_attach(task, &res->task);
2114 res->port = disp->localport;
2115 res->bucket = bucket;
2117 res->action = action;
2119 res->item_out = ISC_FALSE;
2120 ISC_LIST_INIT(res->items);
2121 ISC_LINK_INIT(res, link);
2122 res->magic = RESPONSE_MAGIC;
2123 ISC_LIST_APPEND(qid->qid_table[bucket], res, link);
2126 request_log(disp, res, LVL(90),
2127 "attached to task %p", res->task);
2129 if (((disp->attributes & DNS_DISPATCHATTR_UDP) != 0) ||
2130 ((disp->attributes & DNS_DISPATCHATTR_CONNECTED) != 0))
2133 UNLOCK(&disp->lock);
2138 return (ISC_R_SUCCESS);
2142 dns_dispatch_starttcp(dns_dispatch_t *disp) {
2144 REQUIRE(VALID_DISPATCH(disp));
2146 dispatch_log(disp, LVL(90), "starttcp %p", disp->task);
2149 disp->attributes |= DNS_DISPATCHATTR_CONNECTED;
2151 UNLOCK(&disp->lock);
2155 dns_dispatch_removeresponse(dns_dispentry_t **resp,
2156 dns_dispatchevent_t **sockevent)
2158 dns_dispatchmgr_t *mgr;
2159 dns_dispatch_t *disp;
2160 dns_dispentry_t *res;
2161 dns_dispatchevent_t *ev;
2162 unsigned int bucket;
2163 isc_boolean_t killit;
2165 isc_eventlist_t events;
2168 REQUIRE(resp != NULL);
2169 REQUIRE(VALID_RESPONSE(*resp));
2175 REQUIRE(VALID_DISPATCH(disp));
2177 REQUIRE(VALID_DISPATCHMGR(mgr));
2179 qid = DNS_QID(disp);
2181 if (sockevent != NULL) {
2182 REQUIRE(*sockevent != NULL);
2191 INSIST(disp->requests > 0);
2193 INSIST(disp->refcount > 0);
2196 if (disp->refcount == 0) {
2197 if (disp->recv_pending > 0)
2198 isc_socket_cancel(disp->socket, disp->task,
2199 ISC_SOCKCANCEL_RECV);
2200 disp->shutting_down = 1;
2203 bucket = res->bucket;
2206 ISC_LIST_UNLINK(qid->qid_table[bucket], res, link);
2209 if (ev == NULL && res->item_out) {
2211 * We've posted our event, but the caller hasn't gotten it
2212 * yet. Take it back.
2214 ISC_LIST_INIT(events);
2215 n = isc_task_unsend(res->task, res, DNS_EVENT_DISPATCH,
2218 * We had better have gotten it back.
2221 ev = (dns_dispatchevent_t *)ISC_LIST_HEAD(events);
2225 REQUIRE(res->item_out == ISC_TRUE);
2226 res->item_out = ISC_FALSE;
2227 if (ev->buffer.base != NULL)
2228 free_buffer(disp, ev->buffer.base, ev->buffer.length);
2229 free_event(disp, ev);
2232 request_log(disp, res, LVL(90), "detaching from task %p", res->task);
2233 isc_task_detach(&res->task);
2236 * Free any buffered requests as well
2238 ev = ISC_LIST_HEAD(res->items);
2239 while (ev != NULL) {
2240 ISC_LIST_UNLINK(res->items, ev, ev_link);
2241 if (ev->buffer.base != NULL)
2242 free_buffer(disp, ev->buffer.base, ev->buffer.length);
2243 free_event(disp, ev);
2244 ev = ISC_LIST_HEAD(res->items);
2247 isc_mempool_put(disp->mgr->rpool, res);
2248 if (disp->shutting_down == 1)
2253 killit = destroy_disp_ok(disp);
2254 UNLOCK(&disp->lock);
2256 isc_task_send(disp->task, &disp->ctlevent);
2260 do_cancel(dns_dispatch_t *disp) {
2261 dns_dispatchevent_t *ev;
2262 dns_dispentry_t *resp;
2265 if (disp->shutdown_out == 1)
2268 qid = DNS_QID(disp);
2271 * Search for the first response handler without packets outstanding.
2274 for (resp = linear_first(qid);
2275 resp != NULL && resp->item_out != ISC_FALSE;
2277 resp = linear_next(qid, resp);
2279 * No one to send the cancel event to, so nothing to do.
2285 * Send the shutdown failsafe event to this resp.
2287 ev = disp->failsafe_ev;
2288 ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, DNS_EVENT_DISPATCH,
2289 resp->action, resp->arg, resp, NULL, NULL);
2290 ev->result = disp->shutdown_why;
2291 ev->buffer.base = NULL;
2292 ev->buffer.length = 0;
2293 disp->shutdown_out = 1;
2294 request_log(disp, resp, LVL(10),
2295 "cancel: failsafe event %p -> task %p",
2297 resp->item_out = ISC_TRUE;
2298 isc_task_send(resp->task, ISC_EVENT_PTR(&ev));
2304 dns_dispatch_getsocket(dns_dispatch_t *disp) {
2305 REQUIRE(VALID_DISPATCH(disp));
2307 return (disp->socket);
2311 dns_dispatch_getlocaladdress(dns_dispatch_t *disp, isc_sockaddr_t *addrp) {
2313 REQUIRE(VALID_DISPATCH(disp));
2314 REQUIRE(addrp != NULL);
2316 if (disp->socktype == isc_sockettype_udp) {
2317 *addrp = disp->local;
2318 return (ISC_R_SUCCESS);
2320 return (ISC_R_NOTIMPLEMENTED);
2324 dns_dispatch_cancel(dns_dispatch_t *disp) {
2325 REQUIRE(VALID_DISPATCH(disp));
2329 if (disp->shutting_down == 1) {
2330 UNLOCK(&disp->lock);
2334 disp->shutdown_why = ISC_R_CANCELED;
2335 disp->shutting_down = 1;
2338 UNLOCK(&disp->lock);
2344 dns_dispatch_changeattributes(dns_dispatch_t *disp,
2345 unsigned int attributes, unsigned int mask)
2347 REQUIRE(VALID_DISPATCH(disp));
2350 * Should check for valid attributes here!
2355 if ((mask & DNS_DISPATCHATTR_NOLISTEN) != 0) {
2356 if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0 &&
2357 (attributes & DNS_DISPATCHATTR_NOLISTEN) == 0) {
2358 disp->attributes &= ~DNS_DISPATCHATTR_NOLISTEN;
2360 } else if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN)
2362 (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0) {
2363 disp->attributes |= DNS_DISPATCHATTR_NOLISTEN;
2364 if (disp->recv_pending != 0)
2365 isc_socket_cancel(disp->socket, disp->task,
2366 ISC_SOCKCANCEL_RECV);
2370 disp->attributes &= ~mask;
2371 disp->attributes |= (attributes & mask);
2372 UNLOCK(&disp->lock);
2376 dns_dispatch_importrecv(dns_dispatch_t *disp, isc_event_t *event) {
2378 isc_socketevent_t *sevent, *newsevent;
2380 REQUIRE(VALID_DISPATCH(disp));
2381 REQUIRE((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0);
2382 REQUIRE(event != NULL);
2384 sevent = (isc_socketevent_t *)event;
2386 INSIST(sevent->n <= disp->mgr->buffersize);
2387 newsevent = (isc_socketevent_t *)
2388 isc_event_allocate(disp->mgr->mctx, NULL,
2389 DNS_EVENT_IMPORTRECVDONE, udp_recv,
2390 disp, sizeof(isc_socketevent_t));
2391 if (newsevent == NULL)
2394 buf = allocate_udp_buffer(disp);
2396 isc_event_free(ISC_EVENT_PTR(&newsevent));
2399 memcpy(buf, sevent->region.base, sevent->n);
2400 newsevent->region.base = buf;
2401 newsevent->region.length = disp->mgr->buffersize;
2402 newsevent->n = sevent->n;
2403 newsevent->result = sevent->result;
2404 newsevent->address = sevent->address;
2405 newsevent->timestamp = sevent->timestamp;
2406 newsevent->pktinfo = sevent->pktinfo;
2407 newsevent->attributes = sevent->attributes;
2409 isc_task_send(disp->task, ISC_EVENT_PTR(&newsevent));
2414 dns_dispatchmgr_dump(dns_dispatchmgr_t *mgr) {
2415 dns_dispatch_t *disp;
2418 disp = ISC_LIST_HEAD(mgr->list);
2419 while (disp != NULL) {
2420 isc_sockaddr_format(&disp->local, foo, sizeof(foo));
2421 printf("\tdispatch %p, addr %s\n", disp, foo);
2422 disp = ISC_LIST_NEXT(disp, link);