acpi.4: Add some missing references.
[dragonfly.git] / contrib / bind-9.3 / lib / dns / dispatch.c
1 /*
2  * Copyright (C) 2004, 2006  Internet Systems Consortium, Inc. ("ISC")
3  * Copyright (C) 1999-2003  Internet Software Consortium.
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10  * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11  * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12  * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13  * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14  * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15  * PERFORMANCE OF THIS SOFTWARE.
16  */
17
18 /* $Id: dispatch.c,v 1.101.2.6.2.13.6.4 2007/06/27 04:19:50 marka Exp $ */
19
20 #include <config.h>
21
22 #include <stdlib.h>
23 #include <sys/types.h>
24 #include <unistd.h>
25
26 #include <isc/entropy.h>
27 #include <isc/mem.h>
28 #include <isc/mutex.h>
29 #include <isc/print.h>
30 #include <isc/random.h>
31 #include <isc/string.h>
32 #include <isc/task.h>
33 #include <isc/time.h>
34 #include <isc/util.h>
35
36 #include <dns/acl.h>
37 #include <dns/dispatch.h>
38 #include <dns/events.h>
39 #include <dns/log.h>
40 #include <dns/message.h>
41 #include <dns/portlist.h>
42 #include <dns/tcpmsg.h>
43 #include <dns/types.h>
44
45 typedef ISC_LIST(dns_dispentry_t)       dns_displist_t;
46
47 typedef struct dns_qid {
48         unsigned int    magic;
49         unsigned int    qid_nbuckets;   /* hash table size */
50         unsigned int    qid_increment;  /* id increment on collision */
51         isc_mutex_t     lock;
52         dns_displist_t  *qid_table;     /* the table itself */
53 } dns_qid_t;
54
55 /* ARC4 Random generator state */
56 typedef struct arc4ctx {
57         isc_uint8_t     i;
58         isc_uint8_t     j;
59         isc_uint8_t     s[256];
60         int             count;
61 } arc4ctx_t;
62
63 struct dns_dispatchmgr {
64         /* Unlocked. */
65         unsigned int                    magic;
66         isc_mem_t                      *mctx;
67         dns_acl_t                      *blackhole;
68         dns_portlist_t                 *portlist;
69
70         /* Locked by "lock". */
71         isc_mutex_t                     lock;
72         unsigned int                    state;
73         ISC_LIST(dns_dispatch_t)        list;
74
75         /* Locked by arc4_lock. */
76         isc_mutex_t                     arc4_lock;
77         arc4ctx_t                       arc4ctx;    /*%< ARC4 context for QID */
78
79         /* locked by buffer lock */
80         dns_qid_t                       *qid;
81         isc_mutex_t                     buffer_lock;
82         unsigned int                    buffers;    /* allocated buffers */
83         unsigned int                    buffersize; /* size of each buffer */
84         unsigned int                    maxbuffers; /* max buffers */
85
86         /* Locked internally. */
87         isc_mutex_t                     pool_lock;
88         isc_mempool_t                  *epool;  /* memory pool for events */
89         isc_mempool_t                  *rpool;  /* memory pool for replies */
90         isc_mempool_t                  *dpool;  /* dispatch allocations */
91         isc_mempool_t                  *bpool;  /* memory pool for buffers */
92
93         isc_entropy_t                  *entropy; /* entropy source */
94 };
95
96 #define MGR_SHUTTINGDOWN                0x00000001U
97 #define MGR_IS_SHUTTINGDOWN(l)  (((l)->state & MGR_SHUTTINGDOWN) != 0)
98
99 #define IS_PRIVATE(d)   (((d)->attributes & DNS_DISPATCHATTR_PRIVATE) != 0)
100
101 struct dns_dispentry {
102         unsigned int                    magic;
103         dns_dispatch_t                 *disp;
104         dns_messageid_t                 id;
105         in_port_t                       port;
106         unsigned int                    bucket;
107         isc_sockaddr_t                  host;
108         isc_task_t                     *task;
109         isc_taskaction_t                action;
110         void                           *arg;
111         isc_boolean_t                   item_out;
112         ISC_LIST(dns_dispatchevent_t)   items;
113         ISC_LINK(dns_dispentry_t)       link;
114 };
115
116 #define INVALID_BUCKET          (0xffffdead)
117
118 struct dns_dispatch {
119         /* Unlocked. */
120         unsigned int            magic;          /* magic */
121         dns_dispatchmgr_t      *mgr;            /* dispatch manager */
122         isc_task_t             *task;           /* internal task */
123         isc_socket_t           *socket;         /* isc socket attached to */
124         isc_sockaddr_t          local;          /* local address */
125         in_port_t               localport;      /* local UDP port */
126         unsigned int            maxrequests;    /* max requests */
127         isc_event_t            *ctlevent;
128
129         /* Locked by mgr->lock. */
130         ISC_LINK(dns_dispatch_t) link;
131
132         /* Locked by "lock". */
133         isc_mutex_t             lock;           /* locks all below */
134         isc_sockettype_t        socktype;
135         unsigned int            attributes;
136         unsigned int            refcount;       /* number of users */
137         dns_dispatchevent_t    *failsafe_ev;    /* failsafe cancel event */
138         unsigned int            shutting_down : 1,
139                                 shutdown_out : 1,
140                                 connected : 1,
141                                 tcpmsg_valid : 1,
142                                 recv_pending : 1; /* is a recv() pending? */
143         isc_result_t            shutdown_why;
144         unsigned int            requests;       /* how many requests we have */
145         unsigned int            tcpbuffers;     /* allocated buffers */
146         dns_tcpmsg_t            tcpmsg;         /* for tcp streams */
147         dns_qid_t               *qid;
148 };
149
150 #define QID_MAGIC               ISC_MAGIC('Q', 'i', 'd', ' ')
151 #define VALID_QID(e)            ISC_MAGIC_VALID((e), QID_MAGIC)
152
153 #define RESPONSE_MAGIC          ISC_MAGIC('D', 'r', 's', 'p')
154 #define VALID_RESPONSE(e)       ISC_MAGIC_VALID((e), RESPONSE_MAGIC)
155
156 #define DISPATCH_MAGIC          ISC_MAGIC('D', 'i', 's', 'p')
157 #define VALID_DISPATCH(e)       ISC_MAGIC_VALID((e), DISPATCH_MAGIC)
158
159 #define DNS_DISPATCHMGR_MAGIC   ISC_MAGIC('D', 'M', 'g', 'r')
160 #define VALID_DISPATCHMGR(e)    ISC_MAGIC_VALID((e), DNS_DISPATCHMGR_MAGIC)
161
162 #define DNS_QID(disp) ((disp)->socktype == isc_sockettype_tcp) ? \
163                        (disp)->qid : (disp)->mgr->qid
164 /*
165  * Statics.
166  */
167 static dns_dispentry_t *bucket_search(dns_qid_t *, isc_sockaddr_t *,
168                                       dns_messageid_t, in_port_t, unsigned int);
169 static isc_boolean_t destroy_disp_ok(dns_dispatch_t *);
170 static void destroy_disp(isc_task_t *task, isc_event_t *event);
171 static void udp_recv(isc_task_t *, isc_event_t *);
172 static void tcp_recv(isc_task_t *, isc_event_t *);
173 static void startrecv(dns_dispatch_t *);
174 static isc_uint32_t dns_hash(dns_qid_t *, isc_sockaddr_t *, dns_messageid_t,
175                              in_port_t);
176 static void free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len);
177 static void *allocate_udp_buffer(dns_dispatch_t *disp);
178 static inline void free_event(dns_dispatch_t *disp, dns_dispatchevent_t *ev);
179 static inline dns_dispatchevent_t *allocate_event(dns_dispatch_t *disp);
180 static void do_cancel(dns_dispatch_t *disp);
181 static dns_dispentry_t *linear_first(dns_qid_t *disp);
182 static dns_dispentry_t *linear_next(dns_qid_t *disp,
183                                     dns_dispentry_t *resp);
184 static void dispatch_free(dns_dispatch_t **dispp);
185 static isc_result_t dispatch_createudp(dns_dispatchmgr_t *mgr,
186                                        isc_socketmgr_t *sockmgr,
187                                        isc_taskmgr_t *taskmgr,
188                                        isc_sockaddr_t *localaddr,
189                                        unsigned int maxrequests,
190                                        unsigned int attributes,
191                                        dns_dispatch_t **dispp);
192 static isc_boolean_t destroy_mgr_ok(dns_dispatchmgr_t *mgr);
193 static void destroy_mgr(dns_dispatchmgr_t **mgrp);
194 static isc_result_t qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets,
195                                  unsigned int increment, dns_qid_t **qidp);
196 static void qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp);
197
198 #define LVL(x) ISC_LOG_DEBUG(x)
199
200 static void
201 mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...)
202      ISC_FORMAT_PRINTF(3, 4);
203
204 static void
205 mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...) {
206         char msgbuf[2048];
207         va_list ap;
208
209         if (! isc_log_wouldlog(dns_lctx, level))
210                 return;
211
212         va_start(ap, fmt);
213         vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
214         va_end(ap);
215
216         isc_log_write(dns_lctx,
217                       DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH,
218                       level, "dispatchmgr %p: %s", mgr, msgbuf);
219 }
220
221 static void
222 dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...)
223      ISC_FORMAT_PRINTF(3, 4);
224
225 static void
226 dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...) {
227         char msgbuf[2048];
228         va_list ap;
229
230         if (! isc_log_wouldlog(dns_lctx, level))
231                 return;
232
233         va_start(ap, fmt);
234         vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
235         va_end(ap);
236
237         isc_log_write(dns_lctx,
238                       DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH,
239                       level, "dispatch %p: %s", disp, msgbuf);
240 }
241
242 static void
243 request_log(dns_dispatch_t *disp, dns_dispentry_t *resp,
244             int level, const char *fmt, ...)
245      ISC_FORMAT_PRINTF(4, 5);
246
247 static void
248 request_log(dns_dispatch_t *disp, dns_dispentry_t *resp,
249             int level, const char *fmt, ...)
250 {
251         char msgbuf[2048];
252         char peerbuf[256];
253         va_list ap;
254
255         if (! isc_log_wouldlog(dns_lctx, level))
256                 return;
257
258         va_start(ap, fmt);
259         vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
260         va_end(ap);
261
262         if (VALID_RESPONSE(resp)) {
263                 isc_sockaddr_format(&resp->host, peerbuf, sizeof(peerbuf));
264                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
265                               DNS_LOGMODULE_DISPATCH, level,
266                               "dispatch %p response %p %s: %s", disp, resp,
267                               peerbuf, msgbuf);
268         } else {
269                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
270                               DNS_LOGMODULE_DISPATCH, level,
271                               "dispatch %p req/resp %p: %s", disp, resp,
272                               msgbuf);
273         }
274 }
275
276 /*
277  * ARC4 random number generator obtained from OpenBSD
278  */
279 static void
280 dispatch_arc4init(arc4ctx_t *actx) {
281         int n;
282         for (n = 0; n < 256; n++)
283                 actx->s[n] = n;
284         actx->i = 0;
285         actx->j = 0;
286         actx->count = 0;
287 }
288
289 static void
290 dispatch_arc4addrandom(arc4ctx_t *actx, unsigned char *dat, int datlen) {
291         int n;
292         isc_uint8_t si;
293
294         actx->i--;
295         for (n = 0; n < 256; n++) {
296                 actx->i = (actx->i + 1);
297                 si = actx->s[actx->i];
298                 actx->j = (actx->j + si + dat[n % datlen]);
299                 actx->s[actx->i] = actx->s[actx->j];
300                 actx->s[actx->j] = si;
301         }
302         actx->j = actx->i;
303 }
304
305 static inline isc_uint8_t
306 dispatch_arc4get8(arc4ctx_t *actx) {
307         isc_uint8_t si, sj;
308
309         actx->i = (actx->i + 1);
310         si = actx->s[actx->i];
311         actx->j = (actx->j + si);
312         sj = actx->s[actx->j];
313         actx->s[actx->i] = sj;
314         actx->s[actx->j] = si;
315
316         return (actx->s[(si + sj) & 0xff]);
317 }
318
319 static inline isc_uint16_t
320 dispatch_arc4get16(arc4ctx_t *actx) {
321         isc_uint16_t val;
322
323         val = dispatch_arc4get8(actx) << 8;
324         val |= dispatch_arc4get8(actx);
325
326         return (val);
327 }
328
329 static void
330 dispatch_arc4stir(dns_dispatchmgr_t *mgr) {
331         int i;
332         union {
333                 unsigned char rnd[128];
334                 isc_uint32_t rnd32[32];
335         } rnd;
336         isc_result_t result;
337
338         if (mgr->entropy != NULL) {
339                 /*
340                  * We accept any quality of random data to avoid blocking.
341                  */
342                 result = isc_entropy_getdata(mgr->entropy, rnd.rnd,
343                                              sizeof(rnd), NULL, 0);
344                 RUNTIME_CHECK(result == ISC_R_SUCCESS);
345         } else {
346                 for (i = 0; i < 32; i++)
347                         isc_random_get(&rnd.rnd32[i]);
348         }
349         dispatch_arc4addrandom(&mgr->arc4ctx, rnd.rnd, sizeof(rnd.rnd));
350
351         /*
352          * Discard early keystream, as per recommendations in:
353          * http://www.wisdom.weizmann.ac.il/~itsik/RC4/Papers/Rc4_ksa.ps
354          */
355         for (i = 0; i < 256; i++)
356                 (void)dispatch_arc4get8(&mgr->arc4ctx);
357
358         /*
359          * Derived from OpenBSD's implementation.  The rationale is not clear,
360          * but should be conservative enough in safety, and reasonably large
361          * for efficiency.
362          */
363         mgr->arc4ctx.count = 1600000;
364 }
365
366 static isc_uint16_t
367 dispatch_arc4random(dns_dispatchmgr_t *mgr) {
368         isc_uint16_t result;
369
370         LOCK(&mgr->arc4_lock);
371         mgr->arc4ctx.count -= sizeof(isc_uint16_t);
372         if (mgr->arc4ctx.count <= 0)
373                 dispatch_arc4stir(mgr);
374         result = dispatch_arc4get16(&mgr->arc4ctx);
375         UNLOCK(&mgr->arc4_lock);
376         return (result);
377 }
378
379 static isc_uint16_t
380 dispatch_arc4uniformrandom(dns_dispatchmgr_t *mgr, isc_uint16_t upper_bound) {
381         isc_uint16_t min, r;
382         /* The caller must hold the manager lock. */
383
384         if (upper_bound < 2)
385                 return (0);
386
387         /*
388          * Ensure the range of random numbers [min, 0xffff] be a multiple of
389          * upper_bound and contain at least a half of the 16 bit range.
390          */
391
392         if (upper_bound > 0x8000)
393                 min = 1 + ~upper_bound; /* 0x8000 - upper_bound */
394         else
395                 min = (isc_uint16_t)(0x10000 % (isc_uint32_t)upper_bound);
396
397         /*
398          * This could theoretically loop forever but each retry has
399          * p > 0.5 (worst case, usually far better) of selecting a
400          * number inside the range we need, so it should rarely need
401          * to re-roll.
402          */
403         for (;;) {
404                 r = dispatch_arc4random(mgr);
405                 if (r >= min)
406                         break;
407         }
408
409         return (r % upper_bound);
410 }
411
412 /*
413  * Return a hash of the destination and message id.
414  */
415 static isc_uint32_t
416 dns_hash(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id,
417          in_port_t port)
418 {
419         unsigned int ret;
420
421         ret = isc_sockaddr_hash(dest, ISC_TRUE);
422         ret ^= (id << 16) | port;
423         ret %= qid->qid_nbuckets;
424
425         INSIST(ret < qid->qid_nbuckets);
426
427         return (ret);
428 }
429
430 /*
431  * Find the first entry in 'qid'.  Returns NULL if there are no entries.
432  */
433 static dns_dispentry_t *
434 linear_first(dns_qid_t *qid) {
435         dns_dispentry_t *ret;
436         unsigned int bucket;
437
438         bucket = 0;
439
440         while (bucket < qid->qid_nbuckets) {
441                 ret = ISC_LIST_HEAD(qid->qid_table[bucket]);
442                 if (ret != NULL)
443                         return (ret);
444                 bucket++;
445         }
446
447         return (NULL);
448 }
449
450 /*
451  * Find the next entry after 'resp' in 'qid'.  Return NULL if there are
452  * no more entries.
453  */
454 static dns_dispentry_t *
455 linear_next(dns_qid_t *qid, dns_dispentry_t *resp) {
456         dns_dispentry_t *ret;
457         unsigned int bucket;
458
459         ret = ISC_LIST_NEXT(resp, link);
460         if (ret != NULL)
461                 return (ret);
462
463         bucket = resp->bucket;
464         bucket++;
465         while (bucket < qid->qid_nbuckets) {
466                 ret = ISC_LIST_HEAD(qid->qid_table[bucket]);
467                 if (ret != NULL)
468                         return (ret);
469                 bucket++;
470         }
471
472         return (NULL);
473 }
474
475 /*
476  * The dispatch must be locked.
477  */
478 static isc_boolean_t
479 destroy_disp_ok(dns_dispatch_t *disp)
480 {
481         if (disp->refcount != 0)
482                 return (ISC_FALSE);
483
484         if (disp->recv_pending != 0)
485                 return (ISC_FALSE);
486
487         if (disp->shutting_down == 0)
488                 return (ISC_FALSE);
489
490         return (ISC_TRUE);
491 }
492
493
494 /*
495  * Called when refcount reaches 0 (and safe to destroy).
496  *
497  * The dispatcher must not be locked.
498  * The manager must be locked.
499  */
500 static void
501 destroy_disp(isc_task_t *task, isc_event_t *event) {
502         dns_dispatch_t *disp;
503         dns_dispatchmgr_t *mgr;
504         isc_boolean_t killmgr;
505
506         INSIST(event->ev_type == DNS_EVENT_DISPATCHCONTROL);
507
508         UNUSED(task);
509
510         disp = event->ev_arg;
511         mgr = disp->mgr;
512
513         LOCK(&mgr->lock);
514         ISC_LIST_UNLINK(mgr->list, disp, link);
515
516         dispatch_log(disp, LVL(90),
517                      "shutting down; detaching from sock %p, task %p",
518                      disp->socket, disp->task);
519
520         isc_socket_detach(&disp->socket);
521         isc_task_detach(&disp->task);
522         isc_event_free(&event);
523
524         dispatch_free(&disp);
525
526         killmgr = destroy_mgr_ok(mgr);
527         UNLOCK(&mgr->lock);
528         if (killmgr)
529                 destroy_mgr(&mgr);
530 }
531
532
533 /*
534  * Find an entry for query ID 'id' and socket address 'dest' in 'qid'.
535  * Return NULL if no such entry exists.
536  */
537 static dns_dispentry_t *
538 bucket_search(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id,
539               in_port_t port, unsigned int bucket)
540 {
541         dns_dispentry_t *res;
542
543         REQUIRE(bucket < qid->qid_nbuckets);
544
545         res = ISC_LIST_HEAD(qid->qid_table[bucket]);
546
547         while (res != NULL) {
548                 if ((res->id == id) && isc_sockaddr_equal(dest, &res->host) &&
549                     res->port == port) {
550                         return (res);
551                 }
552                 res = ISC_LIST_NEXT(res, link);
553         }
554
555         return (NULL);
556 }
557
558 static void
559 free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len) {
560         INSIST(buf != NULL && len != 0);
561
562
563         switch (disp->socktype) {
564         case isc_sockettype_tcp:
565                 INSIST(disp->tcpbuffers > 0);
566                 disp->tcpbuffers--;
567                 isc_mem_put(disp->mgr->mctx, buf, len);
568                 break;
569         case isc_sockettype_udp:
570                 LOCK(&disp->mgr->buffer_lock);
571                 INSIST(disp->mgr->buffers > 0);
572                 INSIST(len == disp->mgr->buffersize);
573                 disp->mgr->buffers--;
574                 isc_mempool_put(disp->mgr->bpool, buf);
575                 UNLOCK(&disp->mgr->buffer_lock);
576                 break;
577         default:
578                 INSIST(0);
579                 break;
580         }
581 }
582
583 static void *
584 allocate_udp_buffer(dns_dispatch_t *disp) {
585         void *temp;
586
587         LOCK(&disp->mgr->buffer_lock);
588         temp = isc_mempool_get(disp->mgr->bpool);
589
590         if (temp != NULL)
591                 disp->mgr->buffers++;
592         UNLOCK(&disp->mgr->buffer_lock);
593
594         return (temp);
595 }
596
597 static inline void
598 free_event(dns_dispatch_t *disp, dns_dispatchevent_t *ev) {
599         if (disp->failsafe_ev == ev) {
600                 INSIST(disp->shutdown_out == 1);
601                 disp->shutdown_out = 0;
602
603                 return;
604         }
605
606         isc_mempool_put(disp->mgr->epool, ev);
607 }
608
609 static inline dns_dispatchevent_t *
610 allocate_event(dns_dispatch_t *disp) {
611         dns_dispatchevent_t *ev;
612
613         ev = isc_mempool_get(disp->mgr->epool);
614         if (ev == NULL)
615                 return (NULL);
616         ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, 0,
617                        NULL, NULL, NULL, NULL, NULL);
618
619         return (ev);
620 }
621
622 /*
623  * General flow:
624  *
625  * If I/O result == CANCELED or error, free the buffer.
626  *
627  * If query, free the buffer, restart.
628  *
629  * If response:
630  *      Allocate event, fill in details.
631  *              If cannot allocate, free buffer, restart.
632  *      find target.  If not found, free buffer, restart.
633  *      if event queue is not empty, queue.  else, send.
634  *      restart.
635  */
636 static void
637 udp_recv(isc_task_t *task, isc_event_t *ev_in) {
638         isc_socketevent_t *ev = (isc_socketevent_t *)ev_in;
639         dns_dispatch_t *disp = ev_in->ev_arg;
640         dns_messageid_t id;
641         isc_result_t dres;
642         isc_buffer_t source;
643         unsigned int flags;
644         dns_dispentry_t *resp;
645         dns_dispatchevent_t *rev;
646         unsigned int bucket;
647         isc_boolean_t killit;
648         isc_boolean_t queue_response;
649         dns_dispatchmgr_t *mgr;
650         dns_qid_t *qid;
651         isc_netaddr_t netaddr;
652         int match;
653
654         UNUSED(task);
655
656         LOCK(&disp->lock);
657
658         mgr = disp->mgr;
659         qid = mgr->qid;
660
661         dispatch_log(disp, LVL(90),
662                      "got packet: requests %d, buffers %d, recvs %d",
663                      disp->requests, disp->mgr->buffers, disp->recv_pending);
664
665         if (ev->ev_type == ISC_SOCKEVENT_RECVDONE) {
666                 /*
667                  * Unless the receive event was imported from a listening
668                  * interface, in which case the event type is
669                  * DNS_EVENT_IMPORTRECVDONE, receive operation must be pending.
670                  */
671                 INSIST(disp->recv_pending != 0);
672                 disp->recv_pending = 0;
673         }
674
675         if (disp->shutting_down) {
676                 /*
677                  * This dispatcher is shutting down.
678                  */
679                 free_buffer(disp, ev->region.base, ev->region.length);
680
681                 isc_event_free(&ev_in);
682                 ev = NULL;
683
684                 killit = destroy_disp_ok(disp);
685                 UNLOCK(&disp->lock);
686                 if (killit)
687                         isc_task_send(disp->task, &disp->ctlevent);
688
689                 return;
690         }
691
692         if (ev->result != ISC_R_SUCCESS) {
693                 free_buffer(disp, ev->region.base, ev->region.length);
694
695                 if (ev->result != ISC_R_CANCELED)
696                         dispatch_log(disp, ISC_LOG_ERROR,
697                                      "odd socket result in udp_recv(): %s",
698                                      isc_result_totext(ev->result));
699
700                 UNLOCK(&disp->lock);
701                 isc_event_free(&ev_in);
702                 return;
703         }
704
705         /*
706          * If this is from a blackholed address, drop it.
707          */
708         isc_netaddr_fromsockaddr(&netaddr, &ev->address);
709         if (disp->mgr->blackhole != NULL &&
710             dns_acl_match(&netaddr, NULL, disp->mgr->blackhole,
711                           NULL, &match, NULL) == ISC_R_SUCCESS &&
712             match > 0)
713         {
714                 if (isc_log_wouldlog(dns_lctx, LVL(10))) {
715                         char netaddrstr[ISC_NETADDR_FORMATSIZE];
716                         isc_netaddr_format(&netaddr, netaddrstr,
717                                            sizeof(netaddrstr));
718                         dispatch_log(disp, LVL(10),
719                                      "blackholed packet from %s",
720                                      netaddrstr);
721                 }
722                 free_buffer(disp, ev->region.base, ev->region.length);
723                 goto restart;
724         }
725
726         /*
727          * Peek into the buffer to see what we can see.
728          */
729         isc_buffer_init(&source, ev->region.base, ev->region.length);
730         isc_buffer_add(&source, ev->n);
731         dres = dns_message_peekheader(&source, &id, &flags);
732         if (dres != ISC_R_SUCCESS) {
733                 free_buffer(disp, ev->region.base, ev->region.length);
734                 dispatch_log(disp, LVL(10), "got garbage packet");
735                 goto restart;
736         }
737
738         dispatch_log(disp, LVL(92),
739                      "got valid DNS message header, /QR %c, id %u",
740                      ((flags & DNS_MESSAGEFLAG_QR) ? '1' : '0'), id);
741
742         /*
743          * Look at flags.  If query, drop it. If response,
744          * look to see where it goes.
745          */
746         queue_response = ISC_FALSE;
747         if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
748                 /* query */
749                 free_buffer(disp, ev->region.base, ev->region.length);
750                 goto restart;
751         }
752
753         /* response */
754         bucket = dns_hash(qid, &ev->address, id, disp->localport);
755         LOCK(&qid->lock);
756         resp = bucket_search(qid, &ev->address, id, disp->localport, bucket);
757         dispatch_log(disp, LVL(90),
758                      "search for response in bucket %d: %s",
759                      bucket, (resp == NULL ? "not found" : "found"));
760
761         if (resp == NULL) {
762                 free_buffer(disp, ev->region.base, ev->region.length);
763                 goto unlock;
764         } 
765
766         /*
767          * Now that we have the original dispatch the query was sent
768          * from check that the address and port the response was
769          * sent to make sense.
770          */
771         if (disp != resp->disp) {
772                 isc_sockaddr_t a1;
773                 isc_sockaddr_t a2;
774                 
775                 /*
776                  * Check that the socket types and ports match.
777                  */
778                 if (disp->socktype != resp->disp->socktype ||
779                     isc_sockaddr_getport(&disp->local) !=
780                     isc_sockaddr_getport(&resp->disp->local)) {
781                         free_buffer(disp, ev->region.base, ev->region.length);
782                         goto unlock;
783                 }
784
785                 /*
786                  * If both dispatches are bound to an address then fail as
787                  * the addresses can't be equal (enforced by the IP stack).  
788                  *
789                  * Note under Linux a packet can be sent out via IPv4 socket
790                  * and the response be received via a IPv6 socket.
791                  * 
792                  * Requests sent out via IPv6 should always come back in
793                  * via IPv6.
794                  */
795                 if (isc_sockaddr_pf(&resp->disp->local) == PF_INET6 &&
796                     isc_sockaddr_pf(&disp->local) != PF_INET6) {
797                         free_buffer(disp, ev->region.base, ev->region.length);
798                         goto unlock;
799                 }
800                 isc_sockaddr_anyofpf(&a1, isc_sockaddr_pf(&resp->disp->local));
801                 isc_sockaddr_anyofpf(&a2, isc_sockaddr_pf(&disp->local));
802                 if (!isc_sockaddr_eqaddr(&a1, &resp->disp->local) &&
803                     !isc_sockaddr_eqaddr(&a2, &disp->local)) {
804                         free_buffer(disp, ev->region.base, ev->region.length);
805                         goto unlock;
806                 }
807         }
808
809         queue_response = resp->item_out;
810         rev = allocate_event(resp->disp);
811         if (rev == NULL) {
812                 free_buffer(disp, ev->region.base, ev->region.length);
813                 goto unlock;
814         }
815
816         /*
817          * At this point, rev contains the event we want to fill in, and
818          * resp contains the information on the place to send it to.
819          * Send the event off.
820          */
821         isc_buffer_init(&rev->buffer, ev->region.base, ev->region.length);
822         isc_buffer_add(&rev->buffer, ev->n);
823         rev->result = ISC_R_SUCCESS;
824         rev->id = id;
825         rev->addr = ev->address;
826         rev->pktinfo = ev->pktinfo;
827         rev->attributes = ev->attributes;
828         if (queue_response) {
829                 ISC_LIST_APPEND(resp->items, rev, ev_link);
830         } else {
831                 ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL,
832                                DNS_EVENT_DISPATCH,
833                                resp->action, resp->arg, resp, NULL, NULL);
834                 request_log(disp, resp, LVL(90),
835                             "[a] Sent event %p buffer %p len %d to task %p",
836                             rev, rev->buffer.base, rev->buffer.length,
837                             resp->task);
838                 resp->item_out = ISC_TRUE;
839                 isc_task_send(resp->task, ISC_EVENT_PTR(&rev));
840         }
841  unlock:
842         UNLOCK(&qid->lock);
843
844         /*
845          * Restart recv() to get the next packet.
846          */
847  restart:
848         startrecv(disp);
849
850         UNLOCK(&disp->lock);
851
852         isc_event_free(&ev_in);
853 }
854
855 /*
856  * General flow:
857  *
858  * If I/O result == CANCELED, EOF, or error, notify everyone as the
859  * various queues drain.
860  *
861  * If query, restart.
862  *
863  * If response:
864  *      Allocate event, fill in details.
865  *              If cannot allocate, restart.
866  *      find target.  If not found, restart.
867  *      if event queue is not empty, queue.  else, send.
868  *      restart.
869  */
870 static void
871 tcp_recv(isc_task_t *task, isc_event_t *ev_in) {
872         dns_dispatch_t *disp = ev_in->ev_arg;
873         dns_tcpmsg_t *tcpmsg = &disp->tcpmsg;
874         dns_messageid_t id;
875         isc_result_t dres;
876         unsigned int flags;
877         dns_dispentry_t *resp;
878         dns_dispatchevent_t *rev;
879         unsigned int bucket;
880         isc_boolean_t killit;
881         isc_boolean_t queue_response;
882         dns_qid_t *qid;
883         int level;
884         char buf[ISC_SOCKADDR_FORMATSIZE];
885
886         UNUSED(task);
887
888         REQUIRE(VALID_DISPATCH(disp));
889
890         qid = disp->qid;
891
892         dispatch_log(disp, LVL(90),
893                      "got TCP packet: requests %d, buffers %d, recvs %d",
894                      disp->requests, disp->tcpbuffers, disp->recv_pending);
895
896         LOCK(&disp->lock);
897
898         INSIST(disp->recv_pending != 0);
899         disp->recv_pending = 0;
900
901         if (disp->refcount == 0) {
902                 /*
903                  * This dispatcher is shutting down.  Force cancelation.
904                  */
905                 tcpmsg->result = ISC_R_CANCELED;
906         }
907
908         if (tcpmsg->result != ISC_R_SUCCESS) {
909                 switch (tcpmsg->result) {
910                 case ISC_R_CANCELED:
911                         break;
912                         
913                 case ISC_R_EOF:
914                         dispatch_log(disp, LVL(90), "shutting down on EOF");
915                         do_cancel(disp);
916                         break;
917
918                 case ISC_R_CONNECTIONRESET:
919                         level = ISC_LOG_INFO;
920                         goto logit;
921
922                 default:
923                         level = ISC_LOG_ERROR;
924                 logit:
925                         isc_sockaddr_format(&tcpmsg->address, buf, sizeof(buf));
926                         dispatch_log(disp, level, "shutting down due to TCP "
927                                      "receive error: %s: %s", buf,
928                                      isc_result_totext(tcpmsg->result));
929                         do_cancel(disp);
930                         break;
931                 }
932
933                 /*
934                  * The event is statically allocated in the tcpmsg
935                  * structure, and destroy_disp() frees the tcpmsg, so we must
936                  * free the event *before* calling destroy_disp().
937                  */
938                 isc_event_free(&ev_in);
939
940                 disp->shutting_down = 1;
941                 disp->shutdown_why = tcpmsg->result;
942
943                 /*
944                  * If the recv() was canceled pass the word on.
945                  */
946                 killit = destroy_disp_ok(disp);
947                 UNLOCK(&disp->lock);
948                 if (killit)
949                         isc_task_send(disp->task, &disp->ctlevent);
950                 return;
951         }
952
953         dispatch_log(disp, LVL(90), "result %d, length == %d, addr = %p",
954                      tcpmsg->result,
955                      tcpmsg->buffer.length, tcpmsg->buffer.base);
956
957         /*
958          * Peek into the buffer to see what we can see.
959          */
960         dres = dns_message_peekheader(&tcpmsg->buffer, &id, &flags);
961         if (dres != ISC_R_SUCCESS) {
962                 dispatch_log(disp, LVL(10), "got garbage packet");
963                 goto restart;
964         }
965
966         dispatch_log(disp, LVL(92),
967                      "got valid DNS message header, /QR %c, id %u",
968                      ((flags & DNS_MESSAGEFLAG_QR) ? '1' : '0'), id);
969
970         /*
971          * Allocate an event to send to the query or response client, and
972          * allocate a new buffer for our use.
973          */
974
975         /*
976          * Look at flags.  If query, drop it. If response,
977          * look to see where it goes.
978          */
979         queue_response = ISC_FALSE;
980         if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
981                 /*
982                  * Query.
983                  */
984                 goto restart;
985         }
986
987         /*
988          * Response.
989          */
990         bucket = dns_hash(qid, &tcpmsg->address, id, disp->localport);
991         LOCK(&qid->lock);
992         resp = bucket_search(qid, &tcpmsg->address, id, disp->localport,
993                              bucket);
994         dispatch_log(disp, LVL(90),
995                      "search for response in bucket %d: %s",
996                      bucket, (resp == NULL ? "not found" : "found"));
997
998         if (resp == NULL)
999                 goto unlock;
1000         queue_response = resp->item_out;
1001         rev = allocate_event(disp);
1002         if (rev == NULL)
1003                 goto unlock;
1004
1005         /*
1006          * At this point, rev contains the event we want to fill in, and
1007          * resp contains the information on the place to send it to.
1008          * Send the event off.
1009          */
1010         dns_tcpmsg_keepbuffer(tcpmsg, &rev->buffer);
1011         disp->tcpbuffers++;
1012         rev->result = ISC_R_SUCCESS;
1013         rev->id = id;
1014         rev->addr = tcpmsg->address;
1015         if (queue_response) {
1016                 ISC_LIST_APPEND(resp->items, rev, ev_link);
1017         } else {
1018                 ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL, DNS_EVENT_DISPATCH,
1019                                resp->action, resp->arg, resp, NULL, NULL);
1020                 request_log(disp, resp, LVL(90),
1021                             "[b] Sent event %p buffer %p len %d to task %p",
1022                             rev, rev->buffer.base, rev->buffer.length,
1023                             resp->task);
1024                 resp->item_out = ISC_TRUE;
1025                 isc_task_send(resp->task, ISC_EVENT_PTR(&rev));
1026         }
1027  unlock:
1028         UNLOCK(&qid->lock);
1029
1030         /*
1031          * Restart recv() to get the next packet.
1032          */
1033  restart:
1034         startrecv(disp);
1035
1036         UNLOCK(&disp->lock);
1037
1038         isc_event_free(&ev_in);
1039 }
1040
1041 /*
1042  * disp must be locked.
1043  */
1044 static void
1045 startrecv(dns_dispatch_t *disp) {
1046         isc_result_t res;
1047         isc_region_t region;
1048
1049         if (disp->shutting_down == 1)
1050                 return;
1051
1052         if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0)
1053                 return;
1054
1055         if (disp->recv_pending != 0)
1056                 return;
1057
1058         if (disp->mgr->buffers >= disp->mgr->maxbuffers)
1059                 return;
1060
1061         switch (disp->socktype) {
1062                 /*
1063                  * UDP reads are always maximal.
1064                  */
1065         case isc_sockettype_udp:
1066                 region.length = disp->mgr->buffersize;
1067                 region.base = allocate_udp_buffer(disp);
1068                 if (region.base == NULL)
1069                         return;
1070                 res = isc_socket_recv(disp->socket, &region, 1,
1071                                       disp->task, udp_recv, disp);
1072                 if (res != ISC_R_SUCCESS) {
1073                         free_buffer(disp, region.base, region.length);
1074                         disp->shutdown_why = res;
1075                         disp->shutting_down = 1;
1076                         do_cancel(disp);
1077                         return;
1078                 }
1079                 INSIST(disp->recv_pending == 0);
1080                 disp->recv_pending = 1;
1081                 break;
1082
1083         case isc_sockettype_tcp:
1084                 res = dns_tcpmsg_readmessage(&disp->tcpmsg, disp->task,
1085                                              tcp_recv, disp);
1086                 if (res != ISC_R_SUCCESS) {
1087                         disp->shutdown_why = res;
1088                         disp->shutting_down = 1;
1089                         do_cancel(disp);
1090                         return;
1091                 }
1092                 INSIST(disp->recv_pending == 0);
1093                 disp->recv_pending = 1;
1094                 break;
1095         }
1096 }
1097
1098 /*
1099  * Mgr must be locked when calling this function.
1100  */
1101 static isc_boolean_t
1102 destroy_mgr_ok(dns_dispatchmgr_t *mgr) {
1103         mgr_log(mgr, LVL(90),
1104                 "destroy_mgr_ok: shuttingdown=%d, listnonempty=%d, "
1105                 "epool=%d, rpool=%d, dpool=%d",
1106                 MGR_IS_SHUTTINGDOWN(mgr), !ISC_LIST_EMPTY(mgr->list),
1107                 isc_mempool_getallocated(mgr->epool),
1108                 isc_mempool_getallocated(mgr->rpool),
1109                 isc_mempool_getallocated(mgr->dpool));
1110         if (!MGR_IS_SHUTTINGDOWN(mgr))
1111                 return (ISC_FALSE);
1112         if (!ISC_LIST_EMPTY(mgr->list))
1113                 return (ISC_FALSE);
1114         if (isc_mempool_getallocated(mgr->epool) != 0)
1115                 return (ISC_FALSE);
1116         if (isc_mempool_getallocated(mgr->rpool) != 0)
1117                 return (ISC_FALSE);
1118         if (isc_mempool_getallocated(mgr->dpool) != 0)
1119                 return (ISC_FALSE);
1120
1121         return (ISC_TRUE);
1122 }
1123
1124 /*
1125  * Mgr must be unlocked when calling this function.
1126  */
1127 static void
1128 destroy_mgr(dns_dispatchmgr_t **mgrp) {
1129         isc_mem_t *mctx;
1130         dns_dispatchmgr_t *mgr;
1131
1132         mgr = *mgrp;
1133         *mgrp = NULL;
1134
1135         mctx = mgr->mctx;
1136
1137         mgr->magic = 0;
1138         mgr->mctx = NULL;
1139         DESTROYLOCK(&mgr->lock);
1140         mgr->state = 0;
1141
1142         DESTROYLOCK(&mgr->arc4_lock);
1143
1144         isc_mempool_destroy(&mgr->epool);
1145         isc_mempool_destroy(&mgr->rpool);
1146         isc_mempool_destroy(&mgr->dpool);
1147         isc_mempool_destroy(&mgr->bpool);
1148
1149         DESTROYLOCK(&mgr->pool_lock);
1150
1151         if (mgr->entropy != NULL)
1152                 isc_entropy_detach(&mgr->entropy);
1153         if (mgr->qid != NULL)
1154                 qid_destroy(mctx, &mgr->qid);
1155
1156         DESTROYLOCK(&mgr->buffer_lock);
1157
1158         if (mgr->blackhole != NULL)
1159                 dns_acl_detach(&mgr->blackhole);
1160
1161         if (mgr->portlist != NULL)
1162                 dns_portlist_detach(&mgr->portlist);
1163
1164         isc_mem_put(mctx, mgr, sizeof(dns_dispatchmgr_t));
1165         isc_mem_detach(&mctx);
1166 }
1167
1168 static isc_result_t
1169 create_socket(isc_socketmgr_t *mgr, isc_sockaddr_t *local,
1170               isc_socket_t **sockp)
1171 {
1172         isc_socket_t *sock;
1173         isc_result_t result;
1174
1175         sock = NULL;
1176         result = isc_socket_create(mgr, isc_sockaddr_pf(local),
1177                                    isc_sockettype_udp, &sock);
1178         if (result != ISC_R_SUCCESS)
1179                 return (result);
1180
1181 #ifndef ISC_ALLOW_MAPPED
1182         isc_socket_ipv6only(sock, ISC_TRUE);
1183 #endif
1184         result = isc_socket_bind(sock, local);
1185         if (result != ISC_R_SUCCESS) {
1186                 isc_socket_detach(&sock);
1187                 return (result);
1188         }
1189
1190         *sockp = sock;
1191         return (ISC_R_SUCCESS);
1192 }
1193
1194 /*
1195  * Publics.
1196  */
1197
1198 isc_result_t
1199 dns_dispatchmgr_create(isc_mem_t *mctx, isc_entropy_t *entropy,
1200                        dns_dispatchmgr_t **mgrp)
1201 {
1202         dns_dispatchmgr_t *mgr;
1203         isc_result_t result;
1204
1205         REQUIRE(mctx != NULL);
1206         REQUIRE(mgrp != NULL && *mgrp == NULL);
1207
1208         mgr = isc_mem_get(mctx, sizeof(dns_dispatchmgr_t));
1209         if (mgr == NULL)
1210                 return (ISC_R_NOMEMORY);
1211
1212         mgr->mctx = NULL;
1213         isc_mem_attach(mctx, &mgr->mctx);
1214
1215         mgr->blackhole = NULL;
1216         mgr->portlist = NULL;
1217
1218         result = isc_mutex_init(&mgr->lock);
1219         if (result != ISC_R_SUCCESS)
1220                 goto deallocate;
1221
1222         result = isc_mutex_init(&mgr->arc4_lock);
1223         if (result != ISC_R_SUCCESS)
1224                 goto kill_lock;
1225
1226         result = isc_mutex_init(&mgr->buffer_lock);
1227         if (result != ISC_R_SUCCESS)
1228                 goto kill_arc4_lock;
1229
1230         result = isc_mutex_init(&mgr->pool_lock);
1231         if (result != ISC_R_SUCCESS)
1232                 goto kill_buffer_lock;
1233
1234         mgr->epool = NULL;
1235         if (isc_mempool_create(mgr->mctx, sizeof(dns_dispatchevent_t),
1236                                &mgr->epool) != ISC_R_SUCCESS) {
1237                 result = ISC_R_NOMEMORY;
1238                 goto kill_pool_lock;
1239         }
1240
1241         mgr->rpool = NULL;
1242         if (isc_mempool_create(mgr->mctx, sizeof(dns_dispentry_t),
1243                                &mgr->rpool) != ISC_R_SUCCESS) {
1244                 result = ISC_R_NOMEMORY;
1245                 goto kill_epool;
1246         }
1247
1248         mgr->dpool = NULL;
1249         if (isc_mempool_create(mgr->mctx, sizeof(dns_dispatch_t),
1250                                &mgr->dpool) != ISC_R_SUCCESS) {
1251                 result = ISC_R_NOMEMORY;
1252                 goto kill_rpool;
1253         }
1254
1255         isc_mempool_setname(mgr->epool, "dispmgr_epool");
1256         isc_mempool_setfreemax(mgr->epool, 1024);
1257         isc_mempool_associatelock(mgr->epool, &mgr->pool_lock);
1258
1259         isc_mempool_setname(mgr->rpool, "dispmgr_rpool");
1260         isc_mempool_setfreemax(mgr->rpool, 1024);
1261         isc_mempool_associatelock(mgr->rpool, &mgr->pool_lock);
1262
1263         isc_mempool_setname(mgr->dpool, "dispmgr_dpool");
1264         isc_mempool_setfreemax(mgr->dpool, 1024);
1265         isc_mempool_associatelock(mgr->dpool, &mgr->pool_lock);
1266
1267         mgr->buffers = 0;
1268         mgr->buffersize = 0;
1269         mgr->maxbuffers = 0;
1270         mgr->bpool = NULL;
1271         mgr->entropy = NULL;
1272         mgr->qid = NULL;
1273         mgr->state = 0;
1274         ISC_LIST_INIT(mgr->list);
1275         mgr->magic = DNS_DISPATCHMGR_MAGIC;
1276
1277         if (entropy != NULL)
1278                 isc_entropy_attach(entropy, &mgr->entropy);
1279
1280         dispatch_arc4init(&mgr->arc4ctx);
1281
1282         *mgrp = mgr;
1283         return (ISC_R_SUCCESS);
1284
1285  kill_rpool:
1286         isc_mempool_destroy(&mgr->rpool);
1287  kill_epool:
1288         isc_mempool_destroy(&mgr->epool);
1289  kill_pool_lock:
1290         DESTROYLOCK(&mgr->pool_lock);
1291  kill_buffer_lock:
1292         DESTROYLOCK(&mgr->buffer_lock);
1293  kill_arc4_lock:
1294         DESTROYLOCK(&mgr->arc4_lock);
1295  kill_lock:
1296         DESTROYLOCK(&mgr->lock);
1297  deallocate:
1298         isc_mem_put(mctx, mgr, sizeof(dns_dispatchmgr_t));
1299         isc_mem_detach(&mctx);
1300
1301         return (result);
1302 }
1303
1304 void
1305 dns_dispatchmgr_setblackhole(dns_dispatchmgr_t *mgr, dns_acl_t *blackhole) {
1306         REQUIRE(VALID_DISPATCHMGR(mgr));
1307         if (mgr->blackhole != NULL)
1308                 dns_acl_detach(&mgr->blackhole);
1309         dns_acl_attach(blackhole, &mgr->blackhole);
1310 }
1311
1312 dns_acl_t *
1313 dns_dispatchmgr_getblackhole(dns_dispatchmgr_t *mgr) {
1314         REQUIRE(VALID_DISPATCHMGR(mgr));
1315         return (mgr->blackhole);
1316 }
1317
1318 void
1319 dns_dispatchmgr_setblackportlist(dns_dispatchmgr_t *mgr,
1320                                  dns_portlist_t *portlist)
1321 {
1322         REQUIRE(VALID_DISPATCHMGR(mgr));
1323         if (mgr->portlist != NULL)
1324                 dns_portlist_detach(&mgr->portlist);
1325         if (portlist != NULL)
1326                 dns_portlist_attach(portlist, &mgr->portlist);
1327 }
1328
1329 dns_portlist_t *
1330 dns_dispatchmgr_getblackportlist(dns_dispatchmgr_t *mgr) {
1331         REQUIRE(VALID_DISPATCHMGR(mgr));
1332         return (mgr->portlist);
1333 }
1334
1335 static isc_result_t
1336 dns_dispatchmgr_setudp(dns_dispatchmgr_t *mgr,
1337                         unsigned int buffersize, unsigned int maxbuffers,
1338                         unsigned int buckets, unsigned int increment)
1339 {
1340         isc_result_t result;
1341
1342         REQUIRE(VALID_DISPATCHMGR(mgr));
1343         REQUIRE(buffersize >= 512 && buffersize < (64 * 1024));
1344         REQUIRE(maxbuffers > 0);
1345         REQUIRE(buckets < 2097169);  /* next prime > 65536 * 32 */
1346         REQUIRE(increment > buckets);
1347
1348         /*
1349          * Keep some number of items around.  This should be a config
1350          * option.  For now, keep 8, but later keep at least two even
1351          * if the caller wants less.  This allows us to ensure certain
1352          * things, like an event can be "freed" and the next allocation
1353          * will always succeed.
1354          *
1355          * Note that if limits are placed on anything here, we use one
1356          * event internally, so the actual limit should be "wanted + 1."
1357          *
1358          * XXXMLG
1359          */
1360
1361         if (maxbuffers < 8)
1362                 maxbuffers = 8;
1363
1364         LOCK(&mgr->buffer_lock);
1365         if (mgr->bpool != NULL) {
1366                 isc_mempool_setmaxalloc(mgr->bpool, maxbuffers);
1367                 mgr->maxbuffers = maxbuffers;
1368                 UNLOCK(&mgr->buffer_lock);
1369                 return (ISC_R_SUCCESS);
1370         }
1371
1372         if (isc_mempool_create(mgr->mctx, buffersize,
1373                                &mgr->bpool) != ISC_R_SUCCESS) {
1374                 return (ISC_R_NOMEMORY);
1375         }
1376
1377         isc_mempool_setname(mgr->bpool, "dispmgr_bpool");
1378         isc_mempool_setmaxalloc(mgr->bpool, maxbuffers);
1379         isc_mempool_associatelock(mgr->bpool, &mgr->pool_lock);
1380
1381         result = qid_allocate(mgr, buckets, increment, &mgr->qid);
1382         if (result != ISC_R_SUCCESS)
1383                 goto cleanup;
1384
1385         mgr->buffersize = buffersize;
1386         mgr->maxbuffers = maxbuffers;
1387         UNLOCK(&mgr->buffer_lock);
1388         return (ISC_R_SUCCESS);
1389
1390  cleanup:
1391         isc_mempool_destroy(&mgr->bpool);
1392         UNLOCK(&mgr->buffer_lock);
1393         return (ISC_R_NOMEMORY);
1394 }
1395
1396 void
1397 dns_dispatchmgr_destroy(dns_dispatchmgr_t **mgrp) {
1398         dns_dispatchmgr_t *mgr;
1399         isc_boolean_t killit;
1400
1401         REQUIRE(mgrp != NULL);
1402         REQUIRE(VALID_DISPATCHMGR(*mgrp));
1403
1404         mgr = *mgrp;
1405         *mgrp = NULL;
1406
1407         LOCK(&mgr->lock);
1408         mgr->state |= MGR_SHUTTINGDOWN;
1409
1410         killit = destroy_mgr_ok(mgr);
1411         UNLOCK(&mgr->lock);
1412
1413         mgr_log(mgr, LVL(90), "destroy: killit=%d", killit);
1414
1415         if (killit)
1416                 destroy_mgr(&mgr);
1417 }
1418
1419 static isc_boolean_t
1420 blacklisted(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
1421             isc_sockaddr_t *sockaddrp)
1422 {
1423         isc_sockaddr_t sockaddr;
1424         isc_result_t result;
1425
1426         REQUIRE(sock != NULL || sockaddrp != NULL);
1427
1428         if (mgr->portlist == NULL)
1429                 return (ISC_FALSE);
1430
1431         if (sock != NULL) {
1432                 sockaddrp = &sockaddr;
1433                 result = isc_socket_getsockname(sock, sockaddrp);
1434                 if (result != ISC_R_SUCCESS)
1435                         return (ISC_FALSE);
1436         }
1437
1438         if (mgr->portlist != NULL &&
1439             dns_portlist_match(mgr->portlist, isc_sockaddr_pf(sockaddrp),
1440                                isc_sockaddr_getport(sockaddrp)))
1441                 return (ISC_TRUE);
1442         return (ISC_FALSE);
1443 }
1444
1445 #define ATTRMATCH(_a1, _a2, _mask) (((_a1) & (_mask)) == ((_a2) & (_mask)))
1446
1447 static isc_boolean_t
1448 local_addr_match(dns_dispatch_t *disp, isc_sockaddr_t *addr) {
1449         isc_sockaddr_t sockaddr;
1450         isc_result_t result;
1451
1452         if (addr == NULL)
1453                 return (ISC_TRUE);
1454
1455         /*
1456          * Don't match wildcard ports against newly blacklisted ports.
1457          */
1458         if (disp->mgr->portlist != NULL &&
1459             isc_sockaddr_getport(addr) == 0 &&
1460             isc_sockaddr_getport(&disp->local) == 0 &&
1461             blacklisted(disp->mgr, disp->socket, NULL))
1462                 return (ISC_FALSE);
1463
1464         /*
1465          * Check if we match the binding <address,port>.
1466          * Wildcard ports match/fail here.
1467          */
1468         if (isc_sockaddr_equal(&disp->local, addr))
1469                 return (ISC_TRUE);
1470         if (isc_sockaddr_getport(addr) == 0)
1471                 return (ISC_FALSE);
1472
1473         /*
1474          * Check if we match a bound wildcard port <address,port>.
1475          */
1476         if (!isc_sockaddr_eqaddr(&disp->local, addr))
1477                 return (ISC_FALSE);
1478         result = isc_socket_getsockname(disp->socket, &sockaddr);
1479         if (result != ISC_R_SUCCESS)
1480                 return (ISC_FALSE);
1481
1482         return (isc_sockaddr_equal(&sockaddr, addr));
1483 }
1484
1485 /*
1486  * Requires mgr be locked.
1487  *
1488  * No dispatcher can be locked by this thread when calling this function.
1489  *
1490  *
1491  * NOTE:
1492  *      If a matching dispatcher is found, it is locked after this function
1493  *      returns, and must be unlocked by the caller.
1494  */
1495 static isc_result_t
1496 dispatch_find(dns_dispatchmgr_t *mgr, isc_sockaddr_t *local,
1497               unsigned int attributes, unsigned int mask,
1498               dns_dispatch_t **dispp)
1499 {
1500         dns_dispatch_t *disp;
1501         isc_result_t result;
1502
1503         /*
1504          * Make certain that we will not match a private dispatch.
1505          */
1506         attributes &= ~DNS_DISPATCHATTR_PRIVATE;
1507         mask |= DNS_DISPATCHATTR_PRIVATE;
1508
1509         disp = ISC_LIST_HEAD(mgr->list);
1510         while (disp != NULL) {
1511                 LOCK(&disp->lock);
1512                 if ((disp->shutting_down == 0)
1513                     && ATTRMATCH(disp->attributes, attributes, mask)
1514                     && local_addr_match(disp, local))
1515                         break;
1516                 UNLOCK(&disp->lock);
1517                 disp = ISC_LIST_NEXT(disp, link);
1518         }
1519
1520         if (disp == NULL) {
1521                 result = ISC_R_NOTFOUND;
1522                 goto out;
1523         }
1524
1525         *dispp = disp;
1526         result = ISC_R_SUCCESS;
1527  out:
1528
1529         return (result);
1530 }
1531
1532 static isc_result_t
1533 qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets,
1534              unsigned int increment, dns_qid_t **qidp)
1535 {
1536         dns_qid_t *qid;
1537         unsigned int i;
1538
1539         REQUIRE(VALID_DISPATCHMGR(mgr));
1540         REQUIRE(buckets < 2097169);  /* next prime > 65536 * 32 */
1541         REQUIRE(increment > buckets);
1542         REQUIRE(qidp != NULL && *qidp == NULL);
1543
1544         qid = isc_mem_get(mgr->mctx, sizeof(*qid));
1545         if (qid == NULL)
1546                 return (ISC_R_NOMEMORY);
1547
1548         qid->qid_table = isc_mem_get(mgr->mctx,
1549                                      buckets * sizeof(dns_displist_t));
1550         if (qid->qid_table == NULL) {
1551                 isc_mem_put(mgr->mctx, qid, sizeof(*qid));
1552                 return (ISC_R_NOMEMORY);
1553         }
1554
1555         if (isc_mutex_init(&qid->lock) != ISC_R_SUCCESS) {
1556                 UNEXPECTED_ERROR(__FILE__, __LINE__, "isc_mutex_init failed");
1557                 isc_mem_put(mgr->mctx, qid->qid_table,
1558                             buckets * sizeof(dns_displist_t));
1559                 isc_mem_put(mgr->mctx, qid, sizeof(*qid));
1560                 return (ISC_R_UNEXPECTED);
1561         }
1562
1563         for (i = 0; i < buckets; i++)
1564                 ISC_LIST_INIT(qid->qid_table[i]);
1565
1566         qid->qid_nbuckets = buckets;
1567         qid->qid_increment = increment;
1568         qid->magic = QID_MAGIC;
1569         *qidp = qid;
1570         return (ISC_R_SUCCESS);
1571 }
1572
1573 static void
1574 qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp) {
1575         dns_qid_t *qid;
1576
1577         REQUIRE(qidp != NULL);
1578         qid = *qidp;
1579
1580         REQUIRE(VALID_QID(qid));
1581
1582         *qidp = NULL;
1583         qid->magic = 0;
1584         isc_mem_put(mctx, qid->qid_table,
1585                     qid->qid_nbuckets * sizeof(dns_displist_t));
1586         DESTROYLOCK(&qid->lock);
1587         isc_mem_put(mctx, qid, sizeof(*qid));
1588 }
1589
1590 /*
1591  * Allocate and set important limits.
1592  */
1593 static isc_result_t
1594 dispatch_allocate(dns_dispatchmgr_t *mgr, unsigned int maxrequests,
1595                   dns_dispatch_t **dispp)
1596 {
1597         dns_dispatch_t *disp;
1598         isc_result_t res;
1599
1600         REQUIRE(VALID_DISPATCHMGR(mgr));
1601         REQUIRE(dispp != NULL && *dispp == NULL);
1602
1603         /*
1604          * Set up the dispatcher, mostly.  Don't bother setting some of
1605          * the options that are controlled by tcp vs. udp, etc.
1606          */
1607
1608         disp = isc_mempool_get(mgr->dpool);
1609         if (disp == NULL)
1610                 return (ISC_R_NOMEMORY);
1611
1612         disp->magic = 0;
1613         disp->mgr = mgr;
1614         disp->maxrequests = maxrequests;
1615         disp->attributes = 0;
1616         ISC_LINK_INIT(disp, link);
1617         disp->refcount = 1;
1618         disp->recv_pending = 0;
1619         memset(&disp->local, 0, sizeof(disp->local));
1620         disp->localport = 0;
1621         disp->shutting_down = 0;
1622         disp->shutdown_out = 0;
1623         disp->connected = 0;
1624         disp->tcpmsg_valid = 0;
1625         disp->shutdown_why = ISC_R_UNEXPECTED;
1626         disp->requests = 0;
1627         disp->tcpbuffers = 0;
1628         disp->qid = NULL;
1629
1630         if (isc_mutex_init(&disp->lock) != ISC_R_SUCCESS) {
1631                 res = ISC_R_UNEXPECTED;
1632                 UNEXPECTED_ERROR(__FILE__, __LINE__, "isc_mutex_init failed");
1633                 goto deallocate;
1634         }
1635
1636         disp->failsafe_ev = allocate_event(disp);
1637         if (disp->failsafe_ev == NULL) {
1638                 res = ISC_R_NOMEMORY;
1639                 goto kill_lock;
1640         }
1641
1642         disp->magic = DISPATCH_MAGIC;
1643
1644         *dispp = disp;
1645         return (ISC_R_SUCCESS);
1646
1647         /*
1648          * error returns
1649          */
1650  kill_lock:
1651         DESTROYLOCK(&disp->lock);
1652  deallocate:
1653         isc_mempool_put(mgr->dpool, disp);
1654
1655         return (res);
1656 }
1657
1658
1659 /*
1660  * MUST be unlocked, and not used by anthing.
1661  */
1662 static void
1663 dispatch_free(dns_dispatch_t **dispp)
1664 {
1665         dns_dispatch_t *disp;
1666         dns_dispatchmgr_t *mgr;
1667
1668         REQUIRE(VALID_DISPATCH(*dispp));
1669         disp = *dispp;
1670         *dispp = NULL;
1671
1672         mgr = disp->mgr;
1673         REQUIRE(VALID_DISPATCHMGR(mgr));
1674
1675         if (disp->tcpmsg_valid) {
1676                 dns_tcpmsg_invalidate(&disp->tcpmsg);
1677                 disp->tcpmsg_valid = 0;
1678         }
1679
1680         INSIST(disp->tcpbuffers == 0);
1681         INSIST(disp->requests == 0);
1682         INSIST(disp->recv_pending == 0);
1683
1684         isc_mempool_put(mgr->epool, disp->failsafe_ev);
1685         disp->failsafe_ev = NULL;
1686
1687         if (disp->qid != NULL)
1688                 qid_destroy(mgr->mctx, &disp->qid);
1689         disp->mgr = NULL;
1690         DESTROYLOCK(&disp->lock);
1691         disp->magic = 0;
1692         isc_mempool_put(mgr->dpool, disp);
1693 }
1694
1695 isc_result_t
1696 dns_dispatch_createtcp(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
1697                        isc_taskmgr_t *taskmgr, unsigned int buffersize,
1698                        unsigned int maxbuffers, unsigned int maxrequests,
1699                        unsigned int buckets, unsigned int increment,
1700                        unsigned int attributes, dns_dispatch_t **dispp)
1701 {
1702         isc_result_t result;
1703         dns_dispatch_t *disp;
1704
1705         UNUSED(maxbuffers);
1706         UNUSED(buffersize);
1707
1708         REQUIRE(VALID_DISPATCHMGR(mgr));
1709         REQUIRE(isc_socket_gettype(sock) == isc_sockettype_tcp);
1710         REQUIRE((attributes & DNS_DISPATCHATTR_TCP) != 0);
1711         REQUIRE((attributes & DNS_DISPATCHATTR_UDP) == 0);
1712
1713         attributes |= DNS_DISPATCHATTR_PRIVATE;  /* XXXMLG */
1714
1715         LOCK(&mgr->lock);
1716
1717         /*
1718          * dispatch_allocate() checks mgr for us.
1719          * qid_allocate() checks buckets and increment for us.
1720          */
1721         disp = NULL;
1722         result = dispatch_allocate(mgr, maxrequests, &disp);
1723         if (result != ISC_R_SUCCESS) {
1724                 UNLOCK(&mgr->lock);
1725                 return (result);
1726         }
1727
1728         result = qid_allocate(mgr, buckets, increment, &disp->qid);
1729         if (result != ISC_R_SUCCESS)
1730                 goto deallocate_dispatch;
1731
1732         disp->socktype = isc_sockettype_tcp;
1733         disp->socket = NULL;
1734         isc_socket_attach(sock, &disp->socket);
1735
1736         disp->task = NULL;
1737         result = isc_task_create(taskmgr, 0, &disp->task);
1738         if (result != ISC_R_SUCCESS)
1739                 goto kill_socket;
1740
1741         disp->ctlevent = isc_event_allocate(mgr->mctx, disp,
1742                                             DNS_EVENT_DISPATCHCONTROL,
1743                                             destroy_disp, disp,
1744                                             sizeof(isc_event_t));
1745         if (disp->ctlevent == NULL)
1746                 goto kill_task;
1747
1748         isc_task_setname(disp->task, "tcpdispatch", disp);
1749
1750         dns_tcpmsg_init(mgr->mctx, disp->socket, &disp->tcpmsg);
1751         disp->tcpmsg_valid = 1;
1752
1753         disp->attributes = attributes;
1754
1755         /*
1756          * Append it to the dispatcher list.
1757          */
1758         ISC_LIST_APPEND(mgr->list, disp, link);
1759         UNLOCK(&mgr->lock);
1760
1761         mgr_log(mgr, LVL(90), "created TCP dispatcher %p", disp);
1762         dispatch_log(disp, LVL(90), "created task %p", disp->task);
1763
1764         *dispp = disp;
1765
1766         return (ISC_R_SUCCESS);
1767
1768         /*
1769          * Error returns.
1770          */
1771  kill_task:
1772         isc_task_detach(&disp->task);
1773  kill_socket:
1774         isc_socket_detach(&disp->socket);
1775  deallocate_dispatch:
1776         dispatch_free(&disp);
1777
1778         UNLOCK(&mgr->lock);
1779
1780         return (result);
1781 }
1782
1783 isc_result_t
1784 dns_dispatch_getudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr,
1785                     isc_taskmgr_t *taskmgr, isc_sockaddr_t *localaddr,
1786                     unsigned int buffersize,
1787                     unsigned int maxbuffers, unsigned int maxrequests,
1788                     unsigned int buckets, unsigned int increment,
1789                     unsigned int attributes, unsigned int mask,
1790                     dns_dispatch_t **dispp)
1791 {
1792         isc_result_t result;
1793         dns_dispatch_t *disp = NULL;
1794
1795         REQUIRE(VALID_DISPATCHMGR(mgr));
1796         REQUIRE(sockmgr != NULL);
1797         REQUIRE(localaddr != NULL);
1798         REQUIRE(taskmgr != NULL);
1799         REQUIRE(buffersize >= 512 && buffersize < (64 * 1024));
1800         REQUIRE(maxbuffers > 0);
1801         REQUIRE(buckets < 2097169);  /* next prime > 65536 * 32 */
1802         REQUIRE(increment > buckets);
1803         REQUIRE(dispp != NULL && *dispp == NULL);
1804         REQUIRE((attributes & DNS_DISPATCHATTR_TCP) == 0);
1805
1806         result = dns_dispatchmgr_setudp(mgr, buffersize, maxbuffers,
1807                                         buckets, increment);
1808         if (result != ISC_R_SUCCESS)
1809                 return (result);
1810
1811         LOCK(&mgr->lock);
1812
1813         if ((attributes & DNS_DISPATCHATTR_RANDOMPORT) != 0) {
1814                 REQUIRE(isc_sockaddr_getport(localaddr) == 0);
1815                 goto createudp;
1816         }
1817
1818         /*
1819          * First, see if we have a dispatcher that matches.
1820          */
1821         disp = NULL;
1822         result = dispatch_find(mgr, localaddr, attributes, mask, &disp);
1823         if (result == ISC_R_SUCCESS) {
1824                 disp->refcount++;
1825
1826                 if (disp->maxrequests < maxrequests)
1827                         disp->maxrequests = maxrequests;
1828
1829                 if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) == 0 &&
1830                     (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0)
1831                 {
1832                         disp->attributes |= DNS_DISPATCHATTR_NOLISTEN;
1833                         if (disp->recv_pending != 0)
1834                                 isc_socket_cancel(disp->socket, disp->task,
1835                                                   ISC_SOCKCANCEL_RECV);
1836                 }
1837
1838                 UNLOCK(&disp->lock);
1839                 UNLOCK(&mgr->lock);
1840
1841                 *dispp = disp;
1842
1843                 return (ISC_R_SUCCESS);
1844         }
1845
1846  createudp:
1847         /*
1848          * Nope, create one.
1849          */
1850         result = dispatch_createudp(mgr, sockmgr, taskmgr, localaddr,
1851                                     maxrequests, attributes, &disp);
1852         if (result != ISC_R_SUCCESS) {
1853                 UNLOCK(&mgr->lock);
1854                 return (result);
1855         }
1856
1857         UNLOCK(&mgr->lock);
1858         *dispp = disp;
1859         return (ISC_R_SUCCESS);
1860 }
1861
1862 /*
1863  * mgr should be locked.
1864  */
1865
1866 #ifndef DNS_DISPATCH_HELD
1867 #define DNS_DISPATCH_HELD 20U
1868 #endif
1869
1870 static isc_result_t
1871 dispatch_createudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr,
1872                    isc_taskmgr_t *taskmgr,
1873                    isc_sockaddr_t *localaddr,
1874                    unsigned int maxrequests,
1875                    unsigned int attributes,
1876                    dns_dispatch_t **dispp)
1877 {
1878         isc_result_t result;
1879         dns_dispatch_t *disp;
1880         isc_socket_t *sock = NULL;
1881         isc_socket_t *held[DNS_DISPATCH_HELD];
1882         unsigned int i = 0, j = 0, k = 0;
1883         isc_sockaddr_t localaddr_bound;
1884         in_port_t localport = 0;
1885
1886         /*
1887          * dispatch_allocate() checks mgr for us.
1888          */
1889         disp = NULL;
1890         result = dispatch_allocate(mgr, maxrequests, &disp);
1891         if (result != ISC_R_SUCCESS)
1892                 return (result);
1893
1894         /*
1895          * Try to allocate a socket that is not on the blacklist.
1896          * Hold up to DNS_DISPATCH_HELD sockets to prevent the OS
1897          * from returning the same port to us too quickly.
1898          */
1899         memset(held, 0, sizeof(held));
1900         localaddr_bound = *localaddr;
1901  getsocket:
1902         if ((attributes & DNS_DISPATCHATTR_RANDOMPORT) != 0) {
1903                 in_port_t prt;
1904
1905                 /* XXX: should the range be configurable? */
1906                 prt = 1024 + dispatch_arc4uniformrandom(mgr, 65535 - 1023);
1907                 isc_sockaddr_setport(&localaddr_bound, prt);
1908                 if (blacklisted(mgr, NULL, &localaddr_bound)) {
1909                         if (++k == 1024)
1910                                 attributes &= ~DNS_DISPATCHATTR_RANDOMPORT;
1911                         goto getsocket;
1912                 }
1913                 result = create_socket(sockmgr, &localaddr_bound, &sock);
1914                 if (result == ISC_R_ADDRINUSE) {
1915                         if (++k == 1024)
1916                                 attributes &= ~DNS_DISPATCHATTR_RANDOMPORT;
1917                         goto getsocket;
1918                 }
1919                 localport = prt;
1920         } else
1921                 result = create_socket(sockmgr, localaddr, &sock);
1922         if (result != ISC_R_SUCCESS)
1923                 goto deallocate_dispatch;
1924         if ((attributes & DNS_DISPATCHATTR_RANDOMPORT) == 0 &&
1925             isc_sockaddr_getport(localaddr) == 0 &&
1926             blacklisted(mgr, sock, NULL))
1927         {
1928                 if (held[i] != NULL)
1929                         isc_socket_detach(&held[i]);
1930                 held[i++] = sock;
1931                 sock = NULL;
1932                 if (i == DNS_DISPATCH_HELD)
1933                         i = 0;
1934                 if (j++ == 0xffffU) {
1935                         mgr_log(mgr, ISC_LOG_ERROR, "avoid-v%s-udp-ports: "
1936                                 "unable to allocate a non-blacklisted port",
1937                                 isc_sockaddr_pf(localaddr) == AF_INET ?
1938                                         "4" : "6");
1939                         result = ISC_R_FAILURE;
1940                         goto deallocate_dispatch;
1941                 }
1942                 goto getsocket;
1943         }
1944
1945         disp->socktype = isc_sockettype_udp;
1946         disp->socket = sock;
1947         disp->local = *localaddr;
1948         disp->localport = localport;
1949
1950         disp->task = NULL;
1951         result = isc_task_create(taskmgr, 0, &disp->task);
1952         if (result != ISC_R_SUCCESS)
1953                 goto kill_socket;
1954
1955         disp->ctlevent = isc_event_allocate(mgr->mctx, disp,
1956                                             DNS_EVENT_DISPATCHCONTROL,
1957                                             destroy_disp, disp,
1958                                             sizeof(isc_event_t));
1959         if (disp->ctlevent == NULL)
1960                 goto kill_task;
1961
1962         isc_task_setname(disp->task, "udpdispatch", disp);
1963
1964         attributes &= ~DNS_DISPATCHATTR_TCP;
1965         attributes |= DNS_DISPATCHATTR_UDP;
1966         disp->attributes = attributes;
1967
1968         /*
1969          * Append it to the dispatcher list.
1970          */
1971         ISC_LIST_APPEND(mgr->list, disp, link);
1972
1973         mgr_log(mgr, LVL(90), "created UDP dispatcher %p", disp);
1974         dispatch_log(disp, LVL(90), "created task %p", disp->task);
1975         dispatch_log(disp, LVL(90), "created socket %p", disp->socket);
1976
1977         *dispp = disp;
1978
1979         goto cleanheld;
1980
1981         /*
1982          * Error returns.
1983          */
1984  kill_task:
1985         isc_task_detach(&disp->task);
1986  kill_socket:
1987         isc_socket_detach(&disp->socket);
1988  deallocate_dispatch:
1989         dispatch_free(&disp);
1990  cleanheld:
1991         for (i = 0; i < DNS_DISPATCH_HELD; i++)
1992                 if (held[i] != NULL)
1993                         isc_socket_detach(&held[i]);
1994         return (result);
1995 }
1996
1997 void
1998 dns_dispatch_attach(dns_dispatch_t *disp, dns_dispatch_t **dispp) {
1999         REQUIRE(VALID_DISPATCH(disp));
2000         REQUIRE(dispp != NULL && *dispp == NULL);
2001
2002         LOCK(&disp->lock);
2003         disp->refcount++;
2004         UNLOCK(&disp->lock);
2005
2006         *dispp = disp;
2007 }
2008
2009 /*
2010  * It is important to lock the manager while we are deleting the dispatch,
2011  * since dns_dispatch_getudp will call dispatch_find, which returns to
2012  * the caller a dispatch but does not attach to it until later.  _getudp
2013  * locks the manager, however, so locking it here will keep us from attaching
2014  * to a dispatcher that is in the process of going away.
2015  */
2016 void
2017 dns_dispatch_detach(dns_dispatch_t **dispp) {
2018         dns_dispatch_t *disp;
2019         isc_boolean_t killit;
2020
2021         REQUIRE(dispp != NULL && VALID_DISPATCH(*dispp));
2022
2023         disp = *dispp;
2024         *dispp = NULL;
2025
2026         LOCK(&disp->lock);
2027
2028         INSIST(disp->refcount > 0);
2029         disp->refcount--;
2030         killit = ISC_FALSE;
2031         if (disp->refcount == 0) {
2032                 if (disp->recv_pending > 0)
2033                         isc_socket_cancel(disp->socket, disp->task,
2034                                           ISC_SOCKCANCEL_RECV);
2035                 disp->shutting_down = 1;
2036         }
2037
2038         dispatch_log(disp, LVL(90), "detach: refcount %d", disp->refcount);
2039
2040         killit = destroy_disp_ok(disp);
2041         UNLOCK(&disp->lock);
2042         if (killit)
2043                 isc_task_send(disp->task, &disp->ctlevent);
2044 }
2045
2046 isc_result_t
2047 dns_dispatch_addresponse(dns_dispatch_t *disp, isc_sockaddr_t *dest,
2048                          isc_task_t *task, isc_taskaction_t action, void *arg,
2049                          dns_messageid_t *idp, dns_dispentry_t **resp)
2050 {
2051         dns_dispentry_t *res;
2052         unsigned int bucket;
2053         dns_messageid_t id;
2054         int i;
2055         isc_boolean_t ok;
2056         dns_qid_t *qid;
2057
2058         REQUIRE(VALID_DISPATCH(disp));
2059         REQUIRE(task != NULL);
2060         REQUIRE(dest != NULL);
2061         REQUIRE(resp != NULL && *resp == NULL);
2062         REQUIRE(idp != NULL);
2063
2064         LOCK(&disp->lock);
2065
2066         if (disp->shutting_down == 1) {
2067                 UNLOCK(&disp->lock);
2068                 return (ISC_R_SHUTTINGDOWN);
2069         }
2070
2071         if (disp->requests >= disp->maxrequests) {
2072                 UNLOCK(&disp->lock);
2073                 return (ISC_R_QUOTA);
2074         }
2075
2076         /*
2077          * Try somewhat hard to find an unique ID.
2078          */
2079         id = (dns_messageid_t)dispatch_arc4random(disp->mgr);
2080         qid = DNS_QID(disp);
2081         LOCK(&qid->lock);
2082         bucket = dns_hash(qid, dest, id, disp->localport);
2083         ok = ISC_FALSE;
2084         for (i = 0; i < 64; i++) {
2085                 if (bucket_search(qid, dest, id, disp->localport, bucket) ==
2086                     NULL) {
2087                         ok = ISC_TRUE;
2088                         break;
2089                 }
2090                 id += qid->qid_increment;
2091                 id &= 0x0000ffff;
2092                 bucket = dns_hash(qid, dest, id, disp->localport);
2093         }
2094
2095         if (!ok) {
2096                 UNLOCK(&qid->lock);
2097                 UNLOCK(&disp->lock);
2098                 return (ISC_R_NOMORE);
2099         }
2100
2101         res = isc_mempool_get(disp->mgr->rpool);
2102         if (res == NULL) {
2103                 UNLOCK(&qid->lock);
2104                 UNLOCK(&disp->lock);
2105                 return (ISC_R_NOMEMORY);
2106         }
2107
2108         disp->refcount++;
2109         disp->requests++;
2110         res->task = NULL;
2111         isc_task_attach(task, &res->task);
2112         res->disp = disp;
2113         res->id = id;
2114         res->port = disp->localport;
2115         res->bucket = bucket;
2116         res->host = *dest;
2117         res->action = action;
2118         res->arg = arg;
2119         res->item_out = ISC_FALSE;
2120         ISC_LIST_INIT(res->items);
2121         ISC_LINK_INIT(res, link);
2122         res->magic = RESPONSE_MAGIC;
2123         ISC_LIST_APPEND(qid->qid_table[bucket], res, link);
2124         UNLOCK(&qid->lock);
2125
2126         request_log(disp, res, LVL(90),
2127                     "attached to task %p", res->task);
2128
2129         if (((disp->attributes & DNS_DISPATCHATTR_UDP) != 0) ||
2130             ((disp->attributes & DNS_DISPATCHATTR_CONNECTED) != 0))
2131                 startrecv(disp);
2132
2133         UNLOCK(&disp->lock);
2134
2135         *idp = id;
2136         *resp = res;
2137
2138         return (ISC_R_SUCCESS);
2139 }
2140
2141 void
2142 dns_dispatch_starttcp(dns_dispatch_t *disp) {
2143
2144         REQUIRE(VALID_DISPATCH(disp));
2145
2146         dispatch_log(disp, LVL(90), "starttcp %p", disp->task);
2147
2148         LOCK(&disp->lock);
2149         disp->attributes |= DNS_DISPATCHATTR_CONNECTED;
2150         startrecv(disp);
2151         UNLOCK(&disp->lock);
2152 }
2153
2154 void
2155 dns_dispatch_removeresponse(dns_dispentry_t **resp,
2156                             dns_dispatchevent_t **sockevent)
2157 {
2158         dns_dispatchmgr_t *mgr;
2159         dns_dispatch_t *disp;
2160         dns_dispentry_t *res;
2161         dns_dispatchevent_t *ev;
2162         unsigned int bucket;
2163         isc_boolean_t killit;
2164         unsigned int n;
2165         isc_eventlist_t events;
2166         dns_qid_t *qid;
2167
2168         REQUIRE(resp != NULL);
2169         REQUIRE(VALID_RESPONSE(*resp));
2170
2171         res = *resp;
2172         *resp = NULL;
2173
2174         disp = res->disp;
2175         REQUIRE(VALID_DISPATCH(disp));
2176         mgr = disp->mgr;
2177         REQUIRE(VALID_DISPATCHMGR(mgr));
2178
2179         qid = DNS_QID(disp);
2180
2181         if (sockevent != NULL) {
2182                 REQUIRE(*sockevent != NULL);
2183                 ev = *sockevent;
2184                 *sockevent = NULL;
2185         } else {
2186                 ev = NULL;
2187         }
2188
2189         LOCK(&disp->lock);
2190
2191         INSIST(disp->requests > 0);
2192         disp->requests--;
2193         INSIST(disp->refcount > 0);
2194         disp->refcount--;
2195         killit = ISC_FALSE;
2196         if (disp->refcount == 0) {
2197                 if (disp->recv_pending > 0)
2198                         isc_socket_cancel(disp->socket, disp->task,
2199                                           ISC_SOCKCANCEL_RECV);
2200                 disp->shutting_down = 1;
2201         }
2202
2203         bucket = res->bucket;
2204
2205         LOCK(&qid->lock);
2206         ISC_LIST_UNLINK(qid->qid_table[bucket], res, link);
2207         UNLOCK(&qid->lock);
2208
2209         if (ev == NULL && res->item_out) {
2210                 /*
2211                  * We've posted our event, but the caller hasn't gotten it
2212                  * yet.  Take it back.
2213                  */
2214                 ISC_LIST_INIT(events);
2215                 n = isc_task_unsend(res->task, res, DNS_EVENT_DISPATCH,
2216                                     NULL, &events);
2217                 /*
2218                  * We had better have gotten it back.
2219                  */
2220                 INSIST(n == 1);
2221                 ev = (dns_dispatchevent_t *)ISC_LIST_HEAD(events);
2222         }
2223
2224         if (ev != NULL) {
2225                 REQUIRE(res->item_out == ISC_TRUE);
2226                 res->item_out = ISC_FALSE;
2227                 if (ev->buffer.base != NULL)
2228                         free_buffer(disp, ev->buffer.base, ev->buffer.length);
2229                 free_event(disp, ev);
2230         }
2231
2232         request_log(disp, res, LVL(90), "detaching from task %p", res->task);
2233         isc_task_detach(&res->task);
2234
2235         /*
2236          * Free any buffered requests as well
2237          */
2238         ev = ISC_LIST_HEAD(res->items);
2239         while (ev != NULL) {
2240                 ISC_LIST_UNLINK(res->items, ev, ev_link);
2241                 if (ev->buffer.base != NULL)
2242                         free_buffer(disp, ev->buffer.base, ev->buffer.length);
2243                 free_event(disp, ev);
2244                 ev = ISC_LIST_HEAD(res->items);
2245         }
2246         res->magic = 0;
2247         isc_mempool_put(disp->mgr->rpool, res);
2248         if (disp->shutting_down == 1)
2249                 do_cancel(disp);
2250         else
2251                 startrecv(disp);
2252
2253         killit = destroy_disp_ok(disp);
2254         UNLOCK(&disp->lock);
2255         if (killit)
2256                 isc_task_send(disp->task, &disp->ctlevent);
2257 }
2258
2259 static void
2260 do_cancel(dns_dispatch_t *disp) {
2261         dns_dispatchevent_t *ev;
2262         dns_dispentry_t *resp;
2263         dns_qid_t *qid;
2264
2265         if (disp->shutdown_out == 1)
2266                 return;
2267
2268         qid = DNS_QID(disp);
2269
2270         /*
2271          * Search for the first response handler without packets outstanding.
2272          */
2273         LOCK(&qid->lock);
2274         for (resp = linear_first(qid);
2275              resp != NULL && resp->item_out != ISC_FALSE;
2276              /* Empty. */)
2277                 resp = linear_next(qid, resp);
2278         /*
2279          * No one to send the cancel event to, so nothing to do.
2280          */
2281         if (resp == NULL)
2282                 goto unlock;
2283
2284         /*
2285          * Send the shutdown failsafe event to this resp.
2286          */
2287         ev = disp->failsafe_ev;
2288         ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, DNS_EVENT_DISPATCH,
2289                        resp->action, resp->arg, resp, NULL, NULL);
2290         ev->result = disp->shutdown_why;
2291         ev->buffer.base = NULL;
2292         ev->buffer.length = 0;
2293         disp->shutdown_out = 1;
2294         request_log(disp, resp, LVL(10),
2295                     "cancel: failsafe event %p -> task %p",
2296                     ev, resp->task);
2297         resp->item_out = ISC_TRUE;
2298         isc_task_send(resp->task, ISC_EVENT_PTR(&ev));
2299  unlock:
2300         UNLOCK(&qid->lock);
2301 }
2302
2303 isc_socket_t *
2304 dns_dispatch_getsocket(dns_dispatch_t *disp) {
2305         REQUIRE(VALID_DISPATCH(disp));
2306
2307         return (disp->socket);
2308 }
2309
2310 isc_result_t
2311 dns_dispatch_getlocaladdress(dns_dispatch_t *disp, isc_sockaddr_t *addrp) {
2312
2313         REQUIRE(VALID_DISPATCH(disp));
2314         REQUIRE(addrp != NULL);
2315
2316         if (disp->socktype == isc_sockettype_udp) {
2317                 *addrp = disp->local;
2318                 return (ISC_R_SUCCESS);
2319         }
2320         return (ISC_R_NOTIMPLEMENTED);
2321 }
2322
2323 void
2324 dns_dispatch_cancel(dns_dispatch_t *disp) {
2325         REQUIRE(VALID_DISPATCH(disp));
2326
2327         LOCK(&disp->lock);
2328
2329         if (disp->shutting_down == 1) {
2330                 UNLOCK(&disp->lock);
2331                 return;
2332         }
2333
2334         disp->shutdown_why = ISC_R_CANCELED;
2335         disp->shutting_down = 1;
2336         do_cancel(disp);
2337
2338         UNLOCK(&disp->lock);
2339
2340         return;
2341 }
2342
2343 void
2344 dns_dispatch_changeattributes(dns_dispatch_t *disp,
2345                               unsigned int attributes, unsigned int mask)
2346 {
2347         REQUIRE(VALID_DISPATCH(disp));
2348
2349         /* XXXMLG
2350          * Should check for valid attributes here!
2351          */
2352
2353         LOCK(&disp->lock);
2354
2355         if ((mask & DNS_DISPATCHATTR_NOLISTEN) != 0) {
2356                 if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0 &&
2357                     (attributes & DNS_DISPATCHATTR_NOLISTEN) == 0) {
2358                         disp->attributes &= ~DNS_DISPATCHATTR_NOLISTEN;
2359                         startrecv(disp);
2360                 } else if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN)
2361                            == 0 &&
2362                            (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0) {
2363                         disp->attributes |= DNS_DISPATCHATTR_NOLISTEN;
2364                         if (disp->recv_pending != 0)
2365                                 isc_socket_cancel(disp->socket, disp->task,
2366                                                   ISC_SOCKCANCEL_RECV);
2367                 }
2368         }
2369
2370         disp->attributes &= ~mask;
2371         disp->attributes |= (attributes & mask);
2372         UNLOCK(&disp->lock);
2373 }
2374
2375 void
2376 dns_dispatch_importrecv(dns_dispatch_t *disp, isc_event_t *event) {
2377         void *buf;
2378         isc_socketevent_t *sevent, *newsevent;
2379
2380         REQUIRE(VALID_DISPATCH(disp));
2381         REQUIRE((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0);
2382         REQUIRE(event != NULL);
2383
2384         sevent = (isc_socketevent_t *)event;
2385
2386         INSIST(sevent->n <= disp->mgr->buffersize);
2387         newsevent = (isc_socketevent_t *)
2388                     isc_event_allocate(disp->mgr->mctx, NULL,
2389                                       DNS_EVENT_IMPORTRECVDONE, udp_recv,
2390                                       disp, sizeof(isc_socketevent_t));
2391         if (newsevent == NULL)
2392                 return;
2393
2394         buf = allocate_udp_buffer(disp);
2395         if (buf == NULL) {
2396                 isc_event_free(ISC_EVENT_PTR(&newsevent));
2397                 return;
2398         }
2399         memcpy(buf, sevent->region.base, sevent->n);
2400         newsevent->region.base = buf;
2401         newsevent->region.length = disp->mgr->buffersize;
2402         newsevent->n = sevent->n;
2403         newsevent->result = sevent->result;
2404         newsevent->address = sevent->address;
2405         newsevent->timestamp = sevent->timestamp;
2406         newsevent->pktinfo = sevent->pktinfo;
2407         newsevent->attributes = sevent->attributes;
2408         
2409         isc_task_send(disp->task, ISC_EVENT_PTR(&newsevent));
2410 }
2411
2412 #if 0
2413 void
2414 dns_dispatchmgr_dump(dns_dispatchmgr_t *mgr) {
2415         dns_dispatch_t *disp;
2416         char foo[1024];
2417
2418         disp = ISC_LIST_HEAD(mgr->list);
2419         while (disp != NULL) {
2420                 isc_sockaddr_format(&disp->local, foo, sizeof(foo));
2421                 printf("\tdispatch %p, addr %s\n", disp, foo);
2422                 disp = ISC_LIST_NEXT(disp, link);
2423         }
2424 }
2425 #endif