Merge from vendor branch GCC:
[dragonfly.git] / contrib / bind-9.2.4rc7 / lib / dns / rbtdb.c
1 /*
2  * Copyright (C) 2004  Internet Systems Consortium, Inc. ("ISC")
3  * Copyright (C) 1999-2003  Internet Software Consortium.
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10  * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11  * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12  * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13  * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14  * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15  * PERFORMANCE OF THIS SOFTWARE.
16  */
17
18 /* $Id: rbtdb.c,v 1.168.2.18 2004/05/23 11:05:21 marka Exp $ */
19
20 /*
21  * Principal Author: Bob Halley
22  */
23
24 #include <config.h>
25
26 #include <isc/event.h>
27 #include <isc/mem.h>
28 #include <isc/print.h>
29 #include <isc/mutex.h>
30 #include <isc/random.h>
31 #include <isc/refcount.h>
32 #include <isc/rwlock.h>
33 #include <isc/string.h>
34 #include <isc/task.h>
35 #include <isc/util.h>
36
37 #include <dns/db.h>
38 #include <dns/dbiterator.h>
39 #include <dns/events.h>
40 #include <dns/fixedname.h>
41 #include <dns/log.h>
42 #include <dns/masterdump.h>
43 #include <dns/rbt.h>
44 #include <dns/rdata.h>
45 #include <dns/rdataset.h>
46 #include <dns/rdatasetiter.h>
47 #include <dns/rdataslab.h>
48 #include <dns/result.h>
49 #include <dns/zonekey.h>
50
51 #ifdef DNS_RBTDB_VERSION64
52 #include "rbtdb64.h"
53 #else
54 #include "rbtdb.h"
55 #endif
56
57 #ifdef DNS_RBTDB_VERSION64
58 #define RBTDB_MAGIC                     ISC_MAGIC('R', 'B', 'D', '8')
59 #else
60 #define RBTDB_MAGIC                     ISC_MAGIC('R', 'B', 'D', '4')
61 #endif
62
63 /*
64  * Note that "impmagic" is not the first four bytes of the struct, so
65  * ISC_MAGIC_VALID cannot be used.
66  */
67 #define VALID_RBTDB(rbtdb)      ((rbtdb) != NULL && \
68                                  (rbtdb)->common.impmagic == RBTDB_MAGIC)
69
70 #ifdef DNS_RBTDB_VERSION64
71 typedef isc_uint64_t                    rbtdb_serial_t;
72 #else
73 typedef isc_uint32_t                    rbtdb_serial_t;
74 #endif
75
76 typedef isc_uint32_t                    rbtdb_rdatatype_t;
77
78 #define RBTDB_RDATATYPE_BASE(type)      ((dns_rdatatype_t)((type) & 0xFFFF))
79 #define RBTDB_RDATATYPE_EXT(type)       ((dns_rdatatype_t)((type) >> 16))
80 #define RBTDB_RDATATYPE_VALUE(b, e)     (((e) << 16) | (b))
81
82 #define RBTDB_RDATATYPE_SIGNXT \
83                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_sig, dns_rdatatype_nxt)
84 #define RBTDB_RDATATYPE_SIGNS \
85                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_sig, dns_rdatatype_ns)
86 #define RBTDB_RDATATYPE_SIGCNAME \
87                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_sig, dns_rdatatype_cname)
88 #define RBTDB_RDATATYPE_SIGDNAME \
89                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_sig, dns_rdatatype_dname)
90 #define RBTDB_RDATATYPE_NCACHEANY \
91                 RBTDB_RDATATYPE_VALUE(0, dns_rdatatype_any)
92
93 typedef struct rdatasetheader {
94         /*
95          * Locked by the owning node's lock.
96          */
97         rbtdb_serial_t                  serial;
98         dns_ttl_t                       ttl;
99         rbtdb_rdatatype_t               type;
100         isc_uint16_t                    attributes;
101         dns_trust_t                     trust;
102         /*
103          * We don't use the LIST macros, because the LIST structure has
104          * both head and tail pointers, and is doubly linked.
105          */
106         struct rdatasetheader           *next;
107         struct rdatasetheader           *down;
108 } rdatasetheader_t;
109
110 #define RDATASET_ATTR_NONEXISTENT       0x0001
111 #define RDATASET_ATTR_STALE             0x0002
112 #define RDATASET_ATTR_IGNORE            0x0004
113 #define RDATASET_ATTR_RETAIN            0x0008
114 #define RDATASET_ATTR_NXDOMAIN          0x0010
115
116 /*
117  * XXX
118  * When the cache will pre-expire data (due to memory low or other
119  * situations) before the rdataset's TTL has expired, it MUST
120  * respect the RETAIN bit and not expire the data until its TTL is
121  * expired.
122  */
123
124 #undef IGNORE                   /* WIN32 winbase.h defines this. */
125
126 #define EXISTS(header) \
127         (((header)->attributes & RDATASET_ATTR_NONEXISTENT) == 0)
128 #define NONEXISTENT(header) \
129         (((header)->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
130 #define IGNORE(header) \
131         (((header)->attributes & RDATASET_ATTR_IGNORE) != 0)
132 #define RETAIN(header) \
133         (((header)->attributes & RDATASET_ATTR_RETAIN) != 0)
134 #define NXDOMAIN(header) \
135         (((header)->attributes & RDATASET_ATTR_NXDOMAIN) != 0)
136
137 #define DEFAULT_NODE_LOCK_COUNT         7               /* Should be prime. */
138
139 typedef struct {
140         isc_mutex_t                     lock;
141         /* Locked by lock. */
142         unsigned int                    references;
143         isc_boolean_t                   exiting;
144 } rbtdb_nodelock_t;
145
146 typedef struct rbtdb_changed {
147         dns_rbtnode_t *                 node;
148         isc_boolean_t                   dirty;
149         ISC_LINK(struct rbtdb_changed)  link;
150 } rbtdb_changed_t;
151
152 typedef ISC_LIST(rbtdb_changed_t)       rbtdb_changedlist_t;
153
154 typedef struct rbtdb_version {
155         /* Not locked */
156         rbtdb_serial_t                  serial;
157         /* Locked by database lock. */
158         isc_boolean_t                   writer;
159         unsigned int                    references;
160         isc_boolean_t                   commit_ok;
161         rbtdb_changedlist_t             changed_list;
162         ISC_LINK(struct rbtdb_version)  link;
163 } rbtdb_version_t;
164
165 typedef ISC_LIST(rbtdb_version_t)       rbtdb_versionlist_t;
166
167 typedef struct {
168         /* Unlocked. */
169         dns_db_t                        common;
170         isc_mutex_t                     lock;
171         isc_rwlock_t                    tree_lock;
172         unsigned int                    node_lock_count;
173         rbtdb_nodelock_t *              node_locks;
174         dns_rbtnode_t *                 origin_node;
175         /* Locked by lock. */
176         unsigned int                    active;
177         isc_refcount_t                  references;
178         unsigned int                    attributes;
179         rbtdb_serial_t                  current_serial;
180         rbtdb_serial_t                  least_serial;
181         rbtdb_serial_t                  next_serial;
182         rbtdb_version_t *               current_version;
183         rbtdb_version_t *               future_version;
184         rbtdb_versionlist_t             open_versions;
185         isc_boolean_t                   overmem;
186         isc_task_t *                    task;
187         /* Locked by tree_lock. */
188         dns_rbt_t *                     tree;
189         isc_boolean_t                   secure;
190 } dns_rbtdb_t;
191
192 #define RBTDB_ATTR_LOADED               0x01
193 #define RBTDB_ATTR_LOADING              0x02
194
195 /*
196  * Search Context
197  */
198 typedef struct {
199         dns_rbtdb_t *           rbtdb;
200         rbtdb_version_t *       rbtversion;
201         rbtdb_serial_t          serial;
202         unsigned int            options;
203         dns_rbtnodechain_t      chain;
204         isc_boolean_t           copy_name;
205         isc_boolean_t           need_cleanup;
206         isc_boolean_t           wild;
207         dns_rbtnode_t *         zonecut;
208         rdatasetheader_t *      zonecut_rdataset;
209         rdatasetheader_t *      zonecut_sigrdataset;
210         dns_fixedname_t         zonecut_name;
211         isc_stdtime_t           now;
212 } rbtdb_search_t;
213
214 /*
215  * Load Context
216  */
217 typedef struct {
218         dns_rbtdb_t *           rbtdb;
219         isc_stdtime_t           now;
220 } rbtdb_load_t;
221
222 static void rdataset_disassociate(dns_rdataset_t *rdataset);
223 static isc_result_t rdataset_first(dns_rdataset_t *rdataset);
224 static isc_result_t rdataset_next(dns_rdataset_t *rdataset);
225 static void rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata);
226 static void rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target);
227 static unsigned int rdataset_count(dns_rdataset_t *rdataset);
228
229 static dns_rdatasetmethods_t rdataset_methods = {
230         rdataset_disassociate,
231         rdataset_first,
232         rdataset_next,
233         rdataset_current,
234         rdataset_clone,
235         rdataset_count
236 };
237
238 static void rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp);
239 static isc_result_t rdatasetiter_first(dns_rdatasetiter_t *iterator);
240 static isc_result_t rdatasetiter_next(dns_rdatasetiter_t *iterator);
241 static void rdatasetiter_current(dns_rdatasetiter_t *iterator,
242                                  dns_rdataset_t *rdataset);
243
244 static dns_rdatasetitermethods_t rdatasetiter_methods = {
245         rdatasetiter_destroy,
246         rdatasetiter_first,
247         rdatasetiter_next,
248         rdatasetiter_current
249 };
250
251 typedef struct rbtdb_rdatasetiter {
252         dns_rdatasetiter_t              common;
253         rdatasetheader_t *              current;
254 } rbtdb_rdatasetiter_t;
255
256 static void             dbiterator_destroy(dns_dbiterator_t **iteratorp);
257 static isc_result_t     dbiterator_first(dns_dbiterator_t *iterator);
258 static isc_result_t     dbiterator_last(dns_dbiterator_t *iterator);
259 static isc_result_t     dbiterator_seek(dns_dbiterator_t *iterator,
260                                         dns_name_t *name);
261 static isc_result_t     dbiterator_prev(dns_dbiterator_t *iterator);
262 static isc_result_t     dbiterator_next(dns_dbiterator_t *iterator);
263 static isc_result_t     dbiterator_current(dns_dbiterator_t *iterator,
264                                            dns_dbnode_t **nodep,
265                                            dns_name_t *name);
266 static isc_result_t     dbiterator_pause(dns_dbiterator_t *iterator);
267 static isc_result_t     dbiterator_origin(dns_dbiterator_t *iterator,
268                                           dns_name_t *name);
269
270 static dns_dbiteratormethods_t dbiterator_methods = {
271         dbiterator_destroy,
272         dbiterator_first,
273         dbiterator_last,
274         dbiterator_seek,
275         dbiterator_prev,
276         dbiterator_next,
277         dbiterator_current,
278         dbiterator_pause,
279         dbiterator_origin
280 };
281
282 #define DELETION_BATCH_MAX 64
283
284 /*
285  * If 'paused' is ISC_TRUE, then the tree lock is not being held.
286  */
287 typedef struct rbtdb_dbiterator {
288         dns_dbiterator_t                common;
289         isc_boolean_t                   paused;
290         isc_boolean_t                   new_origin;
291         isc_rwlocktype_t                tree_locked;
292         isc_result_t                    result;
293         dns_fixedname_t                 name;
294         dns_fixedname_t                 origin;
295         dns_rbtnodechain_t              chain;
296         dns_rbtnode_t                   *node;
297         dns_rbtnode_t                   *deletions[DELETION_BATCH_MAX];
298         int                             delete;
299 } rbtdb_dbiterator_t;
300
301
302 #define IS_STUB(rbtdb)  (((rbtdb)->common.attributes & DNS_DBATTR_STUB)  != 0)
303 #define IS_CACHE(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_CACHE) != 0)
304
305 static void free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log,
306                        isc_event_t *event);
307
308 /*
309  * Locking
310  *
311  * If a routine is going to lock more than one lock in this module, then
312  * the locking must be done in the following order:
313  *
314  *      Tree Lock
315  *
316  *      Node Lock       (Only one from the set may be locked at one time by
317  *                       any caller)
318  *
319  *      Database Lock
320  *
321  * Failure to follow this hierarchy can result in deadlock.
322  */
323
324 /*
325  * Deleting Nodes
326  *
327  * Currently there is no deletion of nodes from the database, except when
328  * the database is being destroyed.
329  *
330  * If node deletion is added in the future, then for zone databases the node
331  * for the origin of the zone MUST NOT be deleted.
332  */
333
334
335 /*
336  * DB Routines
337  */
338
339 static void
340 attach(dns_db_t *source, dns_db_t **targetp) {
341         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)source;
342
343         REQUIRE(VALID_RBTDB(rbtdb));
344
345         isc_refcount_increment(&rbtdb->references, NULL);
346
347         *targetp = source;
348 }
349
350 static void
351 free_rbtdb_callback(isc_task_t *task, isc_event_t *event) {
352         dns_rbtdb_t *rbtdb = event->ev_arg;
353
354         UNUSED(task);
355
356         free_rbtdb(rbtdb, ISC_TRUE, event);
357 }
358
359 static void
360 free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log, isc_event_t *event) {
361         unsigned int i;
362         isc_ondestroy_t ondest;
363         isc_result_t result;
364         char buf[DNS_NAME_FORMATSIZE];
365
366         REQUIRE(EMPTY(rbtdb->open_versions));
367         REQUIRE(rbtdb->future_version == NULL);
368
369         if (rbtdb->current_version != NULL)
370                 isc_mem_put(rbtdb->common.mctx, rbtdb->current_version,
371                             sizeof (rbtdb_version_t));
372  again:
373         if (rbtdb->tree != NULL) {
374                 result = dns_rbt_destroy2(&rbtdb->tree,
375                                           (rbtdb->task != NULL) ? 5 : 0);
376                 if (result == ISC_R_QUOTA) {
377                         INSIST(rbtdb->task != NULL);
378                         if (event == NULL)
379                                 event = isc_event_allocate(rbtdb->common.mctx,
380                                                            NULL,
381                                                          DNS_EVENT_FREESTORAGE,
382                                                            free_rbtdb_callback,
383                                                            rbtdb,
384                                                            sizeof(isc_event_t));
385                         if (event == NULL)
386                                 goto again;
387                         isc_task_send(rbtdb->task, &event);
388                         return;
389                 }
390                 INSIST(result == ISC_R_SUCCESS && rbtdb->tree == NULL);
391         }
392         if (event != NULL)
393                 isc_event_free(&event);
394         if (log) {
395                 if (dns_name_dynamic(&rbtdb->common.origin))
396                         dns_name_format(&rbtdb->common.origin, buf,
397                                         sizeof(buf));
398                 else
399                         strcpy(buf, "<UNKNOWN>");
400                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
401                               DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
402                               "done free_rbtdb(%s)", buf);
403         }
404         if (dns_name_dynamic(&rbtdb->common.origin))
405                 dns_name_free(&rbtdb->common.origin, rbtdb->common.mctx);
406         for (i = 0; i < rbtdb->node_lock_count; i++)
407                 DESTROYLOCK(&rbtdb->node_locks[i].lock);
408         isc_mem_put(rbtdb->common.mctx, rbtdb->node_locks,
409                     rbtdb->node_lock_count * sizeof (rbtdb_nodelock_t));
410         isc_rwlock_destroy(&rbtdb->tree_lock);
411         isc_refcount_destroy(&rbtdb->references);
412         if (rbtdb->task != NULL)
413                 isc_task_detach(&rbtdb->task);
414         DESTROYLOCK(&rbtdb->lock);
415         rbtdb->common.magic = 0;
416         rbtdb->common.impmagic = 0;
417         ondest = rbtdb->common.ondest;
418         isc_mem_putanddetach(&rbtdb->common.mctx, rbtdb, sizeof(*rbtdb));
419         isc_ondestroy_notify(&ondest, rbtdb);
420 }
421
422 static inline void
423 maybe_free_rbtdb(dns_rbtdb_t *rbtdb) {
424         isc_boolean_t want_free = ISC_FALSE;
425         unsigned int i;
426         unsigned int inactive = 0;
427
428         /* XXX check for open versions here */
429
430         /*
431          * Even though there are no external direct references, there still
432          * may be nodes in use.
433          */
434         for (i = 0; i < rbtdb->node_lock_count; i++) {
435                 LOCK(&rbtdb->node_locks[i].lock);
436                 rbtdb->node_locks[i].exiting = ISC_TRUE;
437                 if (rbtdb->node_locks[i].references == 0)
438                         inactive++;
439                 UNLOCK(&rbtdb->node_locks[i].lock);
440         }
441
442         if (inactive != 0) {
443                 LOCK(&rbtdb->lock);
444                 rbtdb->active -= inactive;
445                 if (rbtdb->active == 0)
446                         want_free = ISC_TRUE;
447                 UNLOCK(&rbtdb->lock);
448                 if (want_free) {
449                         char buf[DNS_NAME_FORMATSIZE];
450                         if (dns_name_dynamic(&rbtdb->common.origin))
451                                 dns_name_format(&rbtdb->common.origin, buf,
452                                                 sizeof(buf));
453                         else
454                                 strcpy(buf, "<UNKNOWN>");
455                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
456                                       DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
457                                       "calling free_rbtdb(%s)", buf);
458                         free_rbtdb(rbtdb, ISC_TRUE, NULL);
459                 }
460         }
461 }
462
463 static void
464 detach(dns_db_t **dbp) {
465         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(*dbp);
466         unsigned int refs;
467
468         REQUIRE(VALID_RBTDB(rbtdb));
469
470         isc_refcount_decrement(&rbtdb->references, &refs);
471
472         if (refs == 0)
473                 maybe_free_rbtdb(rbtdb);
474
475         *dbp = NULL;
476 }
477
478 static void
479 currentversion(dns_db_t *db, dns_dbversion_t **versionp) {
480         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
481         rbtdb_version_t *version;
482
483         REQUIRE(VALID_RBTDB(rbtdb));
484
485         LOCK(&rbtdb->lock);
486         version = rbtdb->current_version;
487         if (version->references == 0)
488                 PREPEND(rbtdb->open_versions, version, link);
489         version->references++;
490         UNLOCK(&rbtdb->lock);
491
492         *versionp = (dns_dbversion_t *)version;
493 }
494
495 static inline rbtdb_version_t *
496 allocate_version(isc_mem_t *mctx, rbtdb_serial_t serial,
497                  unsigned int references, isc_boolean_t writer)
498 {
499         rbtdb_version_t *version;
500
501         version = isc_mem_get(mctx, sizeof *version);
502         if (version == NULL)
503                 return (NULL);
504         version->serial = serial;
505         version->references = references;
506         version->writer = writer;
507         version->commit_ok = ISC_FALSE;
508         ISC_LIST_INIT(version->changed_list);
509         ISC_LINK_INIT(version, link);
510
511         return (version);
512 }
513
514 static isc_result_t
515 newversion(dns_db_t *db, dns_dbversion_t **versionp) {
516         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
517         rbtdb_version_t *version;
518
519         REQUIRE(VALID_RBTDB(rbtdb));
520         REQUIRE(versionp != NULL && *versionp == NULL);
521         REQUIRE(rbtdb->future_version == NULL);
522
523         LOCK(&rbtdb->lock);
524         RUNTIME_CHECK(rbtdb->next_serial != 0);         /* XXX Error? */
525         version = allocate_version(rbtdb->common.mctx, rbtdb->next_serial, 1,
526                                    ISC_TRUE);
527         if (version != NULL) {
528                 version->commit_ok = ISC_TRUE;
529                 rbtdb->next_serial++;
530                 rbtdb->future_version = version;
531         }
532         UNLOCK(&rbtdb->lock);
533
534         if (version == NULL)
535                 return (ISC_R_NOMEMORY);
536
537         *versionp = version;
538
539         return (ISC_R_SUCCESS);
540 }
541
542 static void
543 attachversion(dns_db_t *db, dns_dbversion_t *source,
544               dns_dbversion_t **targetp)
545 {
546         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
547         rbtdb_version_t *rbtversion = source;
548
549         REQUIRE(VALID_RBTDB(rbtdb));
550
551         LOCK(&rbtdb->lock);
552
553         INSIST(rbtversion->references > 0);
554         rbtversion->references++;
555         INSIST(rbtversion->references != 0);
556
557         UNLOCK(&rbtdb->lock);
558
559         *targetp = rbtversion;
560 }
561
562 static rbtdb_changed_t *
563 add_changed(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
564             dns_rbtnode_t *node)
565 {
566         rbtdb_changed_t *changed;
567
568         /*
569          * Caller must be holding the node lock.
570          */
571
572         changed = isc_mem_get(rbtdb->common.mctx, sizeof *changed);
573
574         LOCK(&rbtdb->lock);
575
576         REQUIRE(version->writer);
577
578         if (changed != NULL) {
579                 INSIST(node->references > 0);
580                 node->references++;
581                 INSIST(node->references != 0);
582                 changed->node = node;
583                 changed->dirty = ISC_FALSE;
584                 ISC_LIST_INITANDAPPEND(version->changed_list, changed, link);
585         } else
586                 version->commit_ok = ISC_FALSE;
587
588         UNLOCK(&rbtdb->lock);
589
590         return (changed);
591 }
592
593 static inline void
594 free_rdataset(isc_mem_t *mctx, rdatasetheader_t *rdataset) {
595         unsigned int size;
596
597         if ((rdataset->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
598                 size = sizeof *rdataset;
599         else
600                 size = dns_rdataslab_size((unsigned char *)rdataset,
601                                           sizeof *rdataset);
602         isc_mem_put(mctx, rdataset, size);
603 }
604
605 static inline void
606 rollback_node(dns_rbtnode_t *node, rbtdb_serial_t serial) {
607         rdatasetheader_t *header, *dcurrent;
608         isc_boolean_t make_dirty = ISC_FALSE;
609
610         /*
611          * Caller must hold the node lock.
612          */
613
614         /*
615          * We set the IGNORE attribute on rdatasets with serial number
616          * 'serial'.  When the reference count goes to zero, these rdatasets
617          * will be cleaned up; until that time, they will be ignored.
618          */
619         for (header = node->data; header != NULL; header = header->next) {
620                 if (header->serial == serial) {
621                         header->attributes |= RDATASET_ATTR_IGNORE;
622                         make_dirty = ISC_TRUE;
623                 }
624                 for (dcurrent = header->down;
625                      dcurrent != NULL;
626                      dcurrent = dcurrent->down) {
627                         if (dcurrent->serial == serial) {
628                                 dcurrent->attributes |= RDATASET_ATTR_IGNORE;
629                                 make_dirty = ISC_TRUE;
630                         }
631                 }
632         }
633         if (make_dirty)
634                 node->dirty = 1;
635 }
636
637 static inline void
638 clean_cache_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
639         rdatasetheader_t *current, *dcurrent, *top_prev, *top_next, *down_next;
640         isc_mem_t *mctx = rbtdb->common.mctx;
641
642         /*
643          * Caller must be holding the node lock.
644          */
645
646         top_prev = NULL;
647         for (current = node->data; current != NULL; current = top_next) {
648                 top_next = current->next;
649                 dcurrent = current->down;
650                 if (dcurrent != NULL) {
651                         do {
652                                 down_next = dcurrent->down;
653                                 free_rdataset(mctx, dcurrent);
654                                 dcurrent = down_next;
655                         } while (dcurrent != NULL);
656                         current->down = NULL;
657                 }
658                 /*
659                  * If current is nonexistent or stale, we can clean it up.
660                  */
661                 if ((current->attributes &
662                      (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0) {
663                         if (top_prev != NULL)
664                                 top_prev->next = current->next;
665                         else
666                                 node->data = current->next;
667                         free_rdataset(mctx, current);
668                 } else
669                         top_prev = current;
670         }
671         node->dirty = 0;
672 }
673
674 static inline void
675 clean_zone_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
676                 rbtdb_serial_t least_serial)
677 {
678         rdatasetheader_t *current, *dcurrent, *down_next, *dparent;
679         rdatasetheader_t *top_prev, *top_next;
680         isc_mem_t *mctx = rbtdb->common.mctx;
681         isc_boolean_t still_dirty = ISC_FALSE;
682
683         /*
684          * Caller must be holding the node lock.
685          */
686         REQUIRE(least_serial != 0);
687
688         top_prev = NULL;
689         for (current = node->data; current != NULL; current = top_next) {
690                 top_next = current->next;
691
692                 /*
693                  * First, we clean up any instances of multiple rdatasets
694                  * with the same serial number, or that have the IGNORE
695                  * attribute.
696                  */
697                 dparent = current;
698                 for (dcurrent = current->down;
699                      dcurrent != NULL;
700                      dcurrent = down_next) {
701                         down_next = dcurrent->down;
702                         INSIST(dcurrent->serial <= dparent->serial);
703                         if (dcurrent->serial == dparent->serial ||
704                             IGNORE(dcurrent)) {
705                                 if (down_next != NULL)
706                                         down_next->next = dparent;
707                                 dparent->down = down_next;
708                                 free_rdataset(mctx, dcurrent);
709                         } else
710                                 dparent = dcurrent;
711                 }
712
713                 /*
714                  * We've now eliminated all IGNORE datasets with the possible
715                  * exception of current, which we now check.
716                  */
717                 if (IGNORE(current)) {
718                         down_next = current->down;
719                         if (down_next == NULL) {
720                                 if (top_prev != NULL)
721                                         top_prev->next = current->next;
722                                 else
723                                         node->data = current->next;
724                                 free_rdataset(mctx, current);
725                                 /*
726                                  * current no longer exists, so we can
727                                  * just continue with the loop.
728                                  */
729                                 continue;
730                         } else {
731                                 /*
732                                  * Pull up current->down, making it the new
733                                  * current.
734                                  */
735                                 if (top_prev != NULL)
736                                         top_prev->next = down_next;
737                                 else
738                                         node->data = down_next;
739                                 down_next->next = top_next;
740                                 free_rdataset(mctx, current);
741                                 current = down_next;
742                         }
743                 }
744
745                 /*
746                  * We now try to find the first down node less than the
747                  * least serial.
748                  */
749                 dparent = current;
750                 for (dcurrent = current->down;
751                      dcurrent != NULL;
752                      dcurrent = down_next) {
753                         down_next = dcurrent->down;
754                         if (dcurrent->serial < least_serial)
755                                 break;
756                         dparent = dcurrent;
757                 }
758
759                 /*
760                  * If there is a such an rdataset, delete it and any older
761                  * versions.
762                  */
763                 if (dcurrent != NULL) {
764                         do {
765                                 down_next = dcurrent->down;
766                                 INSIST(dcurrent->serial <= least_serial);
767                                 free_rdataset(mctx, dcurrent);
768                                 dcurrent = down_next;
769                         } while (dcurrent != NULL);
770                         dparent->down = NULL;
771                 }
772
773                 /*
774                  * Note.  The serial number of 'current' might be less than
775                  * least_serial too, but we cannot delete it because it is
776                  * the most recent version, unless it is a NONEXISTENT
777                  * rdataset.
778                  */
779                 if (current->down != NULL) {
780                         still_dirty = ISC_TRUE;
781                         top_prev = current;
782                 } else {
783                         /*
784                          * If this is a NONEXISTENT rdataset, we can delete it.
785                          */
786                         if ((current->attributes & RDATASET_ATTR_NONEXISTENT)
787                             != 0) {
788                                 if (top_prev != NULL)
789                                         top_prev->next = current->next;
790                                 else
791                                         node->data = current->next;
792                                 free_rdataset(mctx, current);
793                         } else
794                                 top_prev = current;
795                 }
796         }
797         if (!still_dirty)
798                 node->dirty = 0;
799 }
800
801 static inline void
802 new_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
803         if (node->references == 0) {
804                 rbtdb->node_locks[node->locknum].references++;
805                 INSIST(rbtdb->node_locks[node->locknum].references != 0);
806         }
807         node->references++;
808         INSIST(node->references != 0);
809 }
810
811 static void
812 no_references(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
813               rbtdb_serial_t least_serial, isc_rwlocktype_t lock)
814 {
815         isc_result_t result;
816         isc_boolean_t write_locked;
817         unsigned int locknum;
818
819         /*
820          * Caller must be holding the node lock.
821          */
822
823         REQUIRE(node->references == 0);
824
825         if (node->dirty) {
826                 if (IS_CACHE(rbtdb))
827                         clean_cache_node(rbtdb, node);
828                 else {
829                         if (least_serial == 0) {
830                                 /*
831                                  * Caller doesn't know the least serial.
832                                  * Get it.
833                                  */
834                                 LOCK(&rbtdb->lock);
835                                 least_serial = rbtdb->least_serial;
836                                 UNLOCK(&rbtdb->lock);
837                         }
838                         clean_zone_node(rbtdb, node, least_serial);
839                 }
840         }
841
842         locknum = node->locknum;
843
844         INSIST(rbtdb->node_locks[locknum].references > 0);
845         rbtdb->node_locks[locknum].references--;
846
847         /*
848          * XXXDCL should this only be done for cache zones?
849          */
850         if (node->data != NULL || node->down != NULL)
851                 return;
852
853         /*
854          * XXXDCL need to add a deferred delete method for ISC_R_LOCKBUSY.
855          */
856         if (lock != isc_rwlocktype_write) {
857                 /*
858                  * Locking hierarchy notwithstanding, we don't need to free
859                  * the node lock before acquiring the tree write lock because
860                  * we only do a trylock.
861                  */
862                 if (lock == isc_rwlocktype_read)
863                         result = isc_rwlock_tryupgrade(&rbtdb->tree_lock);
864                 else
865                         result = isc_rwlock_trylock(&rbtdb->tree_lock,
866                                                     isc_rwlocktype_write);
867                 RUNTIME_CHECK(result == ISC_R_SUCCESS ||
868                               result == ISC_R_LOCKBUSY);
869  
870                 write_locked = ISC_TF(result == ISC_R_SUCCESS);
871         } else
872                 write_locked = ISC_TRUE;
873
874         if (write_locked) {
875                 if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) {
876                         char printname[DNS_NAME_FORMATSIZE];
877
878                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
879                                       DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
880                                       "no_references: delete from rbt: %p %s",
881                                       node,
882                                       dns_rbt_formatnodename(node, printname,
883                                                            sizeof(printname)));
884                 }
885
886                 result = dns_rbt_deletenode(rbtdb->tree, node, ISC_FALSE);
887                 if (result != ISC_R_SUCCESS)
888                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
889                                       DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
890                                       "no_references: dns_rbt_deletenode: %s",
891                                       isc_result_totext(result));
892         }
893
894         /*
895          * Relock a read lock, or unlock the write lock if no lock was held.
896          */
897         if (lock == isc_rwlocktype_none)
898                 if (write_locked)
899                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
900
901         if (lock == isc_rwlocktype_read)
902                 if (write_locked)
903                         isc_rwlock_downgrade(&rbtdb->tree_lock);
904 }
905
906 static inline void
907 make_least_version(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
908                    rbtdb_changedlist_t *cleanup_list)
909 {
910         /*
911          * Caller must be holding the database lock.
912          */
913
914         rbtdb->least_serial = version->serial;
915         *cleanup_list = version->changed_list;
916         ISC_LIST_INIT(version->changed_list);
917 }
918
919 static inline void
920 cleanup_nondirty(rbtdb_version_t *version, rbtdb_changedlist_t *cleanup_list) {
921         rbtdb_changed_t *changed, *next_changed;
922
923         /*
924          * If the changed record is dirty, then
925          * an update created multiple versions of
926          * a given rdataset.  We keep this list
927          * until we're the least open version, at
928          * which point it's safe to get rid of any
929          * older versions.
930          *
931          * If the changed record isn't dirty, then
932          * we don't need it anymore since we're
933          * committing and not rolling back.
934          *
935          * The caller must be holding the database lock.
936          */
937         for (changed = HEAD(version->changed_list);
938              changed != NULL;
939              changed = next_changed) {
940                 next_changed = NEXT(changed, link);
941                 if (!changed->dirty) {
942                         UNLINK(version->changed_list,
943                                changed, link);
944                         APPEND(*cleanup_list,
945                                changed, link);
946                 }
947         }
948 }
949
950 static void
951 closeversion(dns_db_t *db, dns_dbversion_t **versionp, isc_boolean_t commit) {
952         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
953         rbtdb_version_t *version, *cleanup_version, *least_greater;
954         isc_boolean_t rollback = ISC_FALSE;
955         rbtdb_changedlist_t cleanup_list;
956         rbtdb_changed_t *changed, *next_changed;
957         rbtdb_serial_t serial, least_serial;
958         dns_rbtnode_t *rbtnode;
959         isc_mutex_t *lock;
960
961         REQUIRE(VALID_RBTDB(rbtdb));
962         version = (rbtdb_version_t *)*versionp;
963
964         cleanup_version = NULL;
965         ISC_LIST_INIT(cleanup_list);
966
967         LOCK(&rbtdb->lock);
968         INSIST(version->references > 0);
969         INSIST(!version->writer || !(commit && version->references > 1));
970         version->references--;
971         serial = version->serial;
972         if (version->references == 0) {
973                 if (version->writer) {
974                         if (commit) {
975                                 INSIST(version->commit_ok);
976                                 INSIST(version == rbtdb->future_version);
977                                 if (EMPTY(rbtdb->open_versions)) {
978                                         /*
979                                          * We're going to become the least open
980                                          * version.
981                                          */
982                                         make_least_version(rbtdb, version,
983                                                            &cleanup_list);
984                                 } else {
985                                         /*
986                                          * Some other open version is the
987                                          * least version.  We can't cleanup
988                                          * records that were changed in this
989                                          * version because the older versions
990                                          * may still be in use by an open
991                                          * version.
992                                          *
993                                          * We can, however, discard the
994                                          * changed records for things that
995                                          * we've added that didn't exist in
996                                          * prior versions.
997                                          */
998                                         cleanup_nondirty(version,
999                                                          &cleanup_list);
1000                                 }
1001                                 /*
1002                                  * If the (soon to be former) current version
1003                                  * isn't being used by anyone, we can clean
1004                                  * it up.
1005                                  */
1006                                 if (rbtdb->current_version->references == 0) {
1007                                         cleanup_version =
1008                                                 rbtdb->current_version;
1009                                         APPENDLIST(version->changed_list,
1010                                                  cleanup_version->changed_list,
1011                                                    link);
1012                                 }
1013                                 /*
1014                                  * Become the current version.
1015                                  */
1016                                 version->writer = ISC_FALSE;
1017                                 rbtdb->current_version = version;
1018                                 rbtdb->current_serial = version->serial;
1019                                 rbtdb->future_version = NULL;
1020                         } else {
1021                                 /*
1022                                  * We're rolling back this transaction.
1023                                  */
1024                                 cleanup_list = version->changed_list;
1025                                 ISC_LIST_INIT(version->changed_list);
1026                                 rollback = ISC_TRUE;
1027                                 cleanup_version = version;
1028                                 rbtdb->future_version = NULL;
1029                         }
1030                 } else {
1031                         if (version != rbtdb->current_version) {
1032                                 /*
1033                                  * There are no external or internal references
1034                                  * to this version and it can be cleaned up.
1035                                  */
1036                                 cleanup_version = version;
1037
1038                                 /*
1039                                  * Find the version with the least serial
1040                                  * number greater than ours.
1041                                  */
1042                                 least_greater = PREV(version, link);
1043                                 if (least_greater == NULL)
1044                                         least_greater = rbtdb->current_version;
1045
1046                                 INSIST(version->serial < least_greater->serial);
1047                                 /*
1048                                  * Is this the least open version?
1049                                  */
1050                                 if (version->serial == rbtdb->least_serial) {
1051                                         /*
1052                                          * Yes.  Install the new least open
1053                                          * version.
1054                                          */
1055                                         make_least_version(rbtdb,
1056                                                            least_greater,
1057                                                            &cleanup_list);
1058                                 } else {
1059                                         /*
1060                                          * Add any unexecuted cleanups to
1061                                          * those of the least greater version.
1062                                          */
1063                                         APPENDLIST(least_greater->changed_list,
1064                                                    version->changed_list,
1065                                                    link);
1066                                 }
1067                         } else if (version->serial == rbtdb->least_serial)
1068                                 INSIST(EMPTY(version->changed_list));
1069                         UNLINK(rbtdb->open_versions, version, link);
1070                 }
1071         }
1072         least_serial = rbtdb->least_serial;
1073         UNLOCK(&rbtdb->lock);
1074
1075         if (cleanup_version != NULL) {
1076                 INSIST(EMPTY(cleanup_version->changed_list));
1077                 isc_mem_put(rbtdb->common.mctx, cleanup_version,
1078                             sizeof *cleanup_version);
1079         }
1080
1081         if (!EMPTY(cleanup_list)) {
1082                 for (changed = HEAD(cleanup_list);
1083                      changed != NULL;
1084                      changed = next_changed) {
1085                         next_changed = NEXT(changed, link);
1086                         rbtnode = changed->node;
1087                         lock = &rbtdb->node_locks[rbtnode->locknum].lock;
1088
1089                         LOCK(lock);
1090
1091                         INSIST(rbtnode->references > 0);
1092                         rbtnode->references--;
1093                         if (rollback)
1094                                 rollback_node(rbtnode, serial);
1095
1096                         if (rbtnode->references == 0)
1097                                 no_references(rbtdb, rbtnode, least_serial,
1098                                               isc_rwlocktype_none);
1099
1100                         UNLOCK(lock);
1101
1102                         isc_mem_put(rbtdb->common.mctx, changed,
1103                                     sizeof *changed);
1104                 }
1105         }
1106
1107         *versionp = NULL;
1108 }
1109
1110 /*
1111  * Add the necessary magic for the wildcard name 'name'
1112  * to be found in 'rbtdb'.
1113  *
1114  * In order for wildcard matching to work correctly in
1115  * zone_find(), we must ensure that a node for the wildcarding
1116  * level exists in the database, and has its 'find_callback'
1117  * and 'wild' bits set.
1118  *
1119  * E.g. if the wildcard name is "*.sub.example." then we
1120  * must ensure that "sub.example." exists and is marked as
1121  * a wildcard level.
1122  */
1123 static isc_result_t
1124 add_wildcard_magic(dns_rbtdb_t *rbtdb, dns_name_t *name) {
1125         isc_result_t result;
1126         dns_name_t foundname;
1127         dns_offsets_t offsets;
1128         unsigned int n;
1129         dns_rbtnode_t *node = NULL;
1130
1131         dns_name_init(&foundname, offsets);
1132         n = dns_name_countlabels(name);
1133         INSIST(n >= 2);
1134         n--;
1135         dns_name_getlabelsequence(name, 1, n, &foundname);
1136         result = dns_rbt_addnode(rbtdb->tree, &foundname, &node);
1137         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
1138                 return (result);
1139         node->find_callback = 1;
1140         node->wild = 1;
1141         return (ISC_R_SUCCESS);
1142 }
1143
1144 static isc_result_t
1145 add_empty_wildcards(dns_rbtdb_t *rbtdb, dns_name_t *name) {
1146         isc_result_t result;
1147         dns_name_t foundname;
1148         dns_offsets_t offsets;
1149         unsigned int n, l, i;
1150
1151         dns_name_init(&foundname, offsets);
1152         n = dns_name_countlabels(name);
1153         l = dns_name_countlabels(&rbtdb->common.origin);
1154         i = l + 1;
1155         while (i < n) {
1156                 dns_rbtnode_t *node = NULL;     /* dummy */
1157                 dns_name_getlabelsequence(name, n - i, i, &foundname);
1158                 if (dns_name_iswildcard(&foundname)) {
1159                         result = add_wildcard_magic(rbtdb, &foundname);
1160                         if (result != ISC_R_SUCCESS)
1161                                 return (result);
1162                         result = dns_rbt_addnode(rbtdb->tree, &foundname,
1163                                                  &node);
1164                         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
1165                                 return (result);
1166                 }
1167                 i++;
1168         }
1169         return (ISC_R_SUCCESS);
1170 }
1171
1172 static isc_result_t
1173 findnode(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
1174          dns_dbnode_t **nodep)
1175 {
1176         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1177         dns_rbtnode_t *node = NULL;
1178         dns_name_t nodename;
1179         unsigned int locknum;
1180         isc_result_t result;
1181         isc_rwlocktype_t locktype = isc_rwlocktype_read;
1182
1183         REQUIRE(VALID_RBTDB(rbtdb));
1184
1185         dns_name_init(&nodename, NULL);
1186         RWLOCK(&rbtdb->tree_lock, locktype);
1187         result = dns_rbt_findnode(rbtdb->tree, name, NULL, &node, NULL,
1188                                   DNS_RBTFIND_EMPTYDATA, NULL, NULL);
1189         if (result != ISC_R_SUCCESS) {
1190                 RWUNLOCK(&rbtdb->tree_lock, locktype);
1191                 if (!create) {
1192                         if (result == DNS_R_PARTIALMATCH)
1193                                 result = ISC_R_NOTFOUND;
1194                         return (result);
1195                 }
1196                 /*
1197                  * It would be nice to try to upgrade the lock instead of
1198                  * unlocking then relocking.
1199                  */
1200                 locktype = isc_rwlocktype_write;
1201                 RWLOCK(&rbtdb->tree_lock, locktype);
1202                 node = NULL;
1203                 result = dns_rbt_addnode(rbtdb->tree, name, &node);
1204                 if (result == ISC_R_SUCCESS) {
1205                         dns_rbt_namefromnode(node, &nodename);
1206 #ifdef DNS_RBT_USEHASH
1207                         node->locknum = node->hashval % rbtdb->node_lock_count;
1208 #else
1209                         node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
1210                                 rbtdb->node_lock_count;
1211 #endif
1212                         add_empty_wildcards(rbtdb, name);
1213
1214                         if (dns_name_iswildcard(name)) {
1215                                 result = add_wildcard_magic(rbtdb, name);
1216                                 if (result != ISC_R_SUCCESS) {
1217                                         RWUNLOCK(&rbtdb->tree_lock, locktype);
1218                                         return (result);
1219                                 }
1220                         }
1221                 } else if (result != ISC_R_EXISTS) {
1222                         RWUNLOCK(&rbtdb->tree_lock, locktype);
1223                         return (result);
1224                 }
1225         }
1226         locknum = node->locknum;
1227         LOCK(&rbtdb->node_locks[locknum].lock);
1228         new_reference(rbtdb, node);
1229         UNLOCK(&rbtdb->node_locks[locknum].lock);
1230         RWUNLOCK(&rbtdb->tree_lock, locktype);
1231
1232         *nodep = (dns_dbnode_t *)node;
1233
1234         return (ISC_R_SUCCESS);
1235 }
1236
1237 static isc_result_t
1238 zone_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
1239         rbtdb_search_t *search = arg;
1240         rdatasetheader_t *header, *header_next;
1241         rdatasetheader_t *dname_header, *sigdname_header, *ns_header;
1242         rdatasetheader_t *found;
1243         isc_result_t result;
1244         dns_rbtnode_t *onode;
1245
1246         /*
1247          * We only want to remember the topmost zone cut, since it's the one
1248          * that counts, so we'll just continue if we've already found a
1249          * zonecut.
1250          */
1251         if (search->zonecut != NULL)
1252                 return (DNS_R_CONTINUE);
1253
1254         found = NULL;
1255         result = DNS_R_CONTINUE;
1256         onode = search->rbtdb->origin_node;
1257
1258         LOCK(&(search->rbtdb->node_locks[node->locknum].lock));
1259
1260         /*
1261          * Look for an NS or DNAME rdataset active in our version.
1262          */
1263         ns_header = NULL;
1264         dname_header = NULL;
1265         sigdname_header = NULL;
1266         for (header = node->data; header != NULL; header = header_next) {
1267                 header_next = header->next;
1268                 if (header->type == dns_rdatatype_ns ||
1269                     header->type == dns_rdatatype_dname ||
1270                     header->type == RBTDB_RDATATYPE_SIGDNAME) {
1271                         do {
1272                                 if (header->serial <= search->serial &&
1273                                     !IGNORE(header)) {
1274                                         /*
1275                                          * Is this a "this rdataset doesn't
1276                                          * exist" record?
1277                                          */
1278                                         if (NONEXISTENT(header))
1279                                                 header = NULL;
1280                                         break;
1281                                 } else
1282                                         header = header->down;
1283                         } while (header != NULL);
1284                         if (header != NULL) {
1285                                 if (header->type == dns_rdatatype_dname)
1286                                         dname_header = header;
1287                                 else if (header->type == 
1288                                            RBTDB_RDATATYPE_SIGDNAME)
1289                                         sigdname_header = header;
1290                                 else if (node != onode ||
1291                                          IS_STUB(search->rbtdb)) {
1292                                         /*
1293                                          * We've found an NS rdataset that
1294                                          * isn't at the origin node.  We check
1295                                          * that they're not at the origin node,
1296                                          * because otherwise we'd erroneously
1297                                          * treat the zone top as if it were
1298                                          * a delegation.
1299                                          */
1300                                         ns_header = header;
1301                                 }
1302                         }
1303                 }
1304         }
1305
1306         /*
1307          * Did we find anything?
1308          */
1309         if (dname_header != NULL) {
1310                 /*
1311                  * Note that DNAME has precedence over NS if both exist.
1312                  */
1313                 found = dname_header;
1314                 search->zonecut_sigrdataset = sigdname_header;
1315         } else if (ns_header != NULL) {
1316                 found = ns_header;
1317                 search->zonecut_sigrdataset = NULL;
1318         }
1319
1320         if (found != NULL) {
1321                 /*
1322                  * We increment the reference count on node to ensure that
1323                  * search->zonecut_rdataset will still be valid later.
1324                  */
1325                 new_reference(search->rbtdb, node);
1326                 search->zonecut = node;
1327                 search->zonecut_rdataset = found;
1328                 search->need_cleanup = ISC_TRUE;
1329                 /*
1330                  * Since we've found a zonecut, anything beneath it is
1331                  * glue and is not subject to wildcard matching, so we
1332                  * may clear search->wild.
1333                  */
1334                 search->wild = ISC_FALSE;
1335                 if ((search->options & DNS_DBFIND_GLUEOK) == 0) {
1336                         /*
1337                          * If the caller does not want to find glue, then
1338                          * this is the best answer and the search should
1339                          * stop now.
1340                          */
1341                         result = DNS_R_PARTIALMATCH;
1342                 } else {
1343                         dns_name_t *zcname;
1344
1345                         /*
1346                          * The search will continue beneath the zone cut.
1347                          * This may or may not be the best match.  In case it
1348                          * is, we need to remember the node name.
1349                          */
1350                         zcname = dns_fixedname_name(&search->zonecut_name);
1351                         RUNTIME_CHECK(dns_name_copy(name, zcname, NULL) ==
1352                                       ISC_R_SUCCESS);
1353                         search->copy_name = ISC_TRUE;
1354                 }
1355         } else {
1356                 /*
1357                  * There is no zonecut at this node which is active in this
1358                  * version.
1359                  *
1360                  * If this is a "wild" node and the caller hasn't disabled
1361                  * wildcard matching, remember that we've seen a wild node
1362                  * in case we need to go searching for wildcard matches
1363                  * later on.
1364                  */
1365                 if (node->wild && (search->options & DNS_DBFIND_NOWILD) == 0)
1366                         search->wild = ISC_TRUE;
1367         }
1368
1369         UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock));
1370
1371         return (result);
1372 }
1373
1374 static inline void
1375 bind_rdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1376               rdatasetheader_t *header, isc_stdtime_t now,
1377               dns_rdataset_t *rdataset)
1378 {
1379         unsigned char *raw;
1380
1381         /*
1382          * Caller must be holding the node lock.
1383          */
1384
1385         if (rdataset == NULL)
1386                 return;
1387
1388         new_reference(rbtdb, node);
1389
1390         INSIST(rdataset->methods == NULL);      /* We must be disassociated. */
1391
1392         rdataset->methods = &rdataset_methods;
1393         rdataset->rdclass = rbtdb->common.rdclass;
1394         rdataset->type = RBTDB_RDATATYPE_BASE(header->type);
1395         rdataset->covers = RBTDB_RDATATYPE_EXT(header->type);
1396         rdataset->ttl = header->ttl - now;
1397         rdataset->trust = header->trust;
1398         if (NXDOMAIN(header))
1399                 rdataset->attributes |= DNS_RDATASETATTR_NXDOMAIN;
1400         rdataset->private1 = rbtdb;
1401         rdataset->private2 = node;
1402         raw = (unsigned char *)header + sizeof(*header);
1403         rdataset->private3 = raw;
1404         
1405         /*
1406          * Reset iterator state.
1407          */
1408         rdataset->privateuint4 = 0;
1409         rdataset->private5 = NULL;
1410 }
1411
1412 static inline isc_result_t
1413 setup_delegation(rbtdb_search_t *search, dns_dbnode_t **nodep,
1414                  dns_name_t *foundname, dns_rdataset_t *rdataset,
1415                  dns_rdataset_t *sigrdataset)
1416 {
1417         isc_result_t result;
1418         dns_name_t *zcname;
1419         rbtdb_rdatatype_t type;
1420         dns_rbtnode_t *node;
1421
1422         /*
1423          * The caller MUST NOT be holding any node locks.
1424          */
1425
1426         node = search->zonecut;
1427         type = search->zonecut_rdataset->type;
1428
1429         /*
1430          * If we have to set foundname, we do it before anything else.
1431          * If we were to set foundname after we had set nodep or bound the
1432          * rdataset, then we'd have to undo that work if dns_name_concatenate()
1433          * failed.  By setting foundname first, there's nothing to undo if
1434          * we have trouble.
1435          */
1436         if (foundname != NULL && search->copy_name) {
1437                 zcname = dns_fixedname_name(&search->zonecut_name);
1438                 result = dns_name_copy(zcname, foundname, NULL);
1439                 if (result != ISC_R_SUCCESS)
1440                         return (result);
1441         }
1442         if (nodep != NULL) {
1443                 /*
1444                  * Note that we don't have to increment the node's reference
1445                  * count here because we're going to use the reference we
1446                  * already have in the search block.
1447                  */
1448                 *nodep = node;
1449                 search->need_cleanup = ISC_FALSE;
1450         }
1451         if (rdataset != NULL) {
1452                 LOCK(&(search->rbtdb->node_locks[node->locknum].lock));
1453                 bind_rdataset(search->rbtdb, node, search->zonecut_rdataset,
1454                               search->now, rdataset);
1455                 if (sigrdataset != NULL && search->zonecut_sigrdataset != NULL)
1456                         bind_rdataset(search->rbtdb, node,
1457                                       search->zonecut_sigrdataset,
1458                                       search->now, sigrdataset);
1459                 UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock));
1460         }
1461
1462         if (type == dns_rdatatype_dname)
1463                 return (DNS_R_DNAME);
1464         return (DNS_R_DELEGATION);
1465 }
1466
1467 static inline isc_boolean_t
1468 valid_glue(rbtdb_search_t *search, dns_name_t *name, rbtdb_rdatatype_t type,
1469            dns_rbtnode_t *node)
1470 {
1471         unsigned char *raw;
1472         unsigned int count, size;
1473         dns_name_t ns_name;
1474         isc_boolean_t valid = ISC_FALSE;
1475         dns_offsets_t offsets;
1476         isc_region_t region;
1477         rdatasetheader_t *header;
1478
1479         /*
1480          * No additional locking is required.
1481          */
1482
1483         /*
1484          * Valid glue types are A, AAAA, A6.  NS is also a valid glue type
1485          * if it occurs at a zone cut, but is not valid below it.
1486          */
1487         if (type == dns_rdatatype_ns) {
1488                 if (node != search->zonecut) {
1489                         return (ISC_FALSE);
1490                 }
1491         } else if (type != dns_rdatatype_a &&
1492                    type != dns_rdatatype_aaaa &&
1493                    type != dns_rdatatype_a6) {
1494                 return (ISC_FALSE);
1495         }
1496
1497         header = search->zonecut_rdataset;
1498         raw = (unsigned char *)header + sizeof *header;
1499         count = raw[0] * 256 + raw[1];
1500         raw += 2;
1501
1502         while (count > 0) {
1503                 count--;
1504                 size = raw[0] * 256 + raw[1];
1505                 raw += 2;
1506                 region.base = raw;
1507                 region.length = size;
1508                 raw += size;
1509                 /*
1510                  * XXX Until we have rdata structures, we have no choice but
1511                  * to directly access the rdata format.
1512                  */
1513                 dns_name_init(&ns_name, offsets);
1514                 dns_name_fromregion(&ns_name, &region);
1515                 if (dns_name_compare(&ns_name, name) == 0) {
1516                         valid = ISC_TRUE;
1517                         break;
1518                 }
1519         }
1520
1521         return (valid);
1522 }
1523
1524 static inline isc_boolean_t
1525 activeempty(rbtdb_search_t *search, dns_rbtnodechain_t *chain,
1526             dns_name_t *name)
1527 {
1528         dns_fixedname_t fnext;
1529         dns_fixedname_t forigin;
1530         dns_name_t *next;
1531         dns_name_t *origin;
1532         dns_name_t prefix;
1533         dns_rbtdb_t *rbtdb;
1534         dns_rbtnode_t *node;
1535         isc_result_t result;
1536         isc_boolean_t answer = ISC_FALSE;
1537         rdatasetheader_t *header;
1538
1539         rbtdb = search->rbtdb;
1540
1541         dns_name_init(&prefix, NULL);
1542         dns_fixedname_init(&fnext);
1543         next = dns_fixedname_name(&fnext);
1544         dns_fixedname_init(&forigin);
1545         origin = dns_fixedname_name(&forigin);
1546
1547         result = dns_rbtnodechain_next(chain, NULL, NULL);
1548         while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
1549                 node = NULL;
1550                 result = dns_rbtnodechain_current(chain, &prefix,
1551                                                   origin, &node);
1552                 if (result != ISC_R_SUCCESS)
1553                         break;
1554                 LOCK(&(rbtdb->node_locks[node->locknum].lock));
1555                 for (header = node->data;
1556                      header != NULL;
1557                      header = header->next) {
1558                         if (header->serial <= search->serial &&
1559                             !IGNORE(header) && EXISTS(header))
1560                                 break;
1561                 }
1562                 UNLOCK(&(rbtdb->node_locks[node->locknum].lock));
1563                 if (header != NULL)
1564                         break;
1565                 result = dns_rbtnodechain_next(chain, NULL, NULL);
1566         }
1567         if (result == ISC_R_SUCCESS)
1568                 result = dns_name_concatenate(&prefix, origin, next, NULL);
1569         if (result == ISC_R_SUCCESS && dns_name_issubdomain(next, name))
1570                 answer = ISC_TRUE;
1571         return (answer);
1572 }
1573
1574 static inline isc_boolean_t
1575 activeemtpynode(rbtdb_search_t *search, dns_name_t *qname, dns_name_t *wname) {
1576         dns_fixedname_t fnext;
1577         dns_fixedname_t forigin;
1578         dns_fixedname_t fprev;
1579         dns_name_t *next;
1580         dns_name_t *origin;
1581         dns_name_t *prev;
1582         dns_name_t name;
1583         dns_name_t rname;
1584         dns_name_t tname;
1585         dns_rbtdb_t *rbtdb;
1586         dns_rbtnode_t *node;
1587         dns_rbtnodechain_t chain;
1588         isc_boolean_t check_next = ISC_TRUE;
1589         isc_boolean_t check_prev = ISC_TRUE;
1590         isc_boolean_t answer = ISC_FALSE;
1591         isc_result_t result;
1592         rdatasetheader_t *header;
1593         unsigned int n;
1594
1595         rbtdb = search->rbtdb;
1596
1597         dns_name_init(&name, NULL);
1598         dns_name_init(&tname, NULL);
1599         dns_name_init(&rname, NULL);
1600         dns_fixedname_init(&fnext);
1601         next = dns_fixedname_name(&fnext);
1602         dns_fixedname_init(&fprev);
1603         prev = dns_fixedname_name(&fprev);
1604         dns_fixedname_init(&forigin);
1605         origin = dns_fixedname_name(&forigin);
1606
1607         /*
1608          * Find if qname is at or below a empty node.
1609          * Use our own copy of the chain.
1610          */
1611
1612         chain = search->chain;
1613         do {
1614                 node = NULL;
1615                 result = dns_rbtnodechain_current(&chain, &name,
1616                                                   origin, &node);
1617                 if (result != ISC_R_SUCCESS)
1618                         break;
1619                 LOCK(&(rbtdb->node_locks[node->locknum].lock));
1620                 for (header = node->data;
1621                      header != NULL;
1622                      header = header->next) {
1623                         if (header->serial <= search->serial &&
1624                             !IGNORE(header) && EXISTS(header))
1625                                 break;
1626                 }
1627                 UNLOCK(&(rbtdb->node_locks[node->locknum].lock));
1628                 if (header != NULL)
1629                         break;
1630                 result = dns_rbtnodechain_prev(&chain, NULL, NULL);
1631         } while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN);
1632         if (result == ISC_R_SUCCESS)
1633                 result = dns_name_concatenate(&name, origin, prev, NULL);
1634         if (result != ISC_R_SUCCESS)
1635                 check_prev = ISC_FALSE;
1636
1637         result = dns_rbtnodechain_next(&chain, NULL, NULL);
1638         while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
1639                 node = NULL;
1640                 result = dns_rbtnodechain_current(&chain, &name,
1641                                                   origin, &node);
1642                 if (result != ISC_R_SUCCESS)
1643                         break;
1644                 LOCK(&(rbtdb->node_locks[node->locknum].lock));
1645                 for (header = node->data;
1646                      header != NULL;
1647                      header = header->next) {
1648                         if (header->serial <= search->serial &&
1649                             !IGNORE(header) && EXISTS(header))
1650                                 break;
1651                 }
1652                 UNLOCK(&(rbtdb->node_locks[node->locknum].lock));
1653                 if (header != NULL)
1654                         break;
1655                 result = dns_rbtnodechain_next(&chain, NULL, NULL);
1656         }
1657         if (result == ISC_R_SUCCESS)
1658                 result = dns_name_concatenate(&name, origin, next, NULL);
1659         if (result != ISC_R_SUCCESS)
1660                 check_next = ISC_FALSE;
1661
1662         dns_name_clone(qname, &rname);
1663
1664         /*
1665          * Remove the wildcard label to find the terminal name.
1666          */
1667         n = dns_name_countlabels(wname);
1668         dns_name_getlabelsequence(wname, 1, n - 1, &tname);
1669
1670         do {
1671                 if ((check_prev && dns_name_issubdomain(prev, &rname)) ||
1672                     (check_next && dns_name_issubdomain(next, &rname))) {
1673                         answer = ISC_TRUE;
1674                         break;
1675                 }
1676                 /*
1677                  * Remove the left hand label.
1678                  */
1679                 n = dns_name_countlabels(&rname);
1680                 dns_name_getlabelsequence(&rname, 1, n - 1, &rname);
1681         } while (!dns_name_equal(&rname, &tname));
1682         return (answer);
1683 }
1684
1685 static inline isc_result_t
1686 find_wildcard(rbtdb_search_t *search, dns_rbtnode_t **nodep,
1687               dns_name_t *qname)
1688 {
1689         unsigned int i, j;
1690         dns_rbtnode_t *node, *level_node, *wnode;
1691         rdatasetheader_t *header;
1692         isc_result_t result = ISC_R_NOTFOUND;
1693         dns_name_t name;
1694         dns_name_t *wname;
1695         dns_fixedname_t fwname;
1696         dns_rbtdb_t *rbtdb;
1697         isc_boolean_t done, wild, active;
1698         dns_rbtnodechain_t wchain;
1699
1700         /*
1701          * Caller must be holding the tree lock and MUST NOT be holding
1702          * any node locks.
1703          */
1704
1705         /*
1706          * Examine each ancestor level.  If the level's wild bit
1707          * is set, then construct the corresponding wildcard name and
1708          * search for it.  If the wildcard node exists, and is active in
1709          * this version, we're done.  If not, then we next check to see
1710          * if the ancestor is active in this version.  If so, then there
1711          * can be no possible wildcard match and again we're done.  If not,
1712          * continue the search.
1713          */
1714
1715         rbtdb = search->rbtdb;
1716         i = search->chain.level_matches;
1717         done = ISC_FALSE;
1718         node = *nodep;
1719         do {
1720                 LOCK(&(rbtdb->node_locks[node->locknum].lock));
1721
1722                 /*
1723                  * First we try to figure out if this node is active in
1724                  * the search's version.  We do this now, even though we
1725                  * may not need the information, because it simplifies the
1726                  * locking and code flow.
1727                  */
1728                 for (header = node->data;
1729                      header != NULL;
1730                      header = header->next) {
1731                         if (header->serial <= search->serial &&
1732                             !IGNORE(header) && EXISTS(header))
1733                                 break;
1734                 }
1735                 if (header != NULL)
1736                         active = ISC_TRUE;
1737                 else
1738                         active = ISC_FALSE;
1739
1740                 if (node->wild)
1741                         wild = ISC_TRUE;
1742                 else
1743                         wild = ISC_FALSE;
1744
1745                 UNLOCK(&(rbtdb->node_locks[node->locknum].lock));
1746
1747                 if (wild) {
1748                         /*
1749                          * Construct the wildcard name for this level.
1750                          */
1751                         dns_name_init(&name, NULL);
1752                         dns_rbt_namefromnode(node, &name);
1753                         dns_fixedname_init(&fwname);
1754                         wname = dns_fixedname_name(&fwname);
1755                         result = dns_name_concatenate(dns_wildcardname, &name,
1756                                                       wname, NULL);
1757                         j = i;
1758                         while (result == ISC_R_SUCCESS && j != 0) {
1759                                 j--;
1760                                 level_node = search->chain.levels[j];
1761                                 dns_name_init(&name, NULL);
1762                                 dns_rbt_namefromnode(level_node, &name);
1763                                 result = dns_name_concatenate(wname,
1764                                                               &name,
1765                                                               wname,
1766                                                               NULL);
1767                         }
1768                         if (result != ISC_R_SUCCESS)
1769                                 break;
1770
1771                         wnode = NULL;
1772                         dns_rbtnodechain_init(&wchain, NULL);
1773                         result = dns_rbt_findnode(rbtdb->tree, wname,
1774                                                   NULL, &wnode, &wchain,
1775                                                   DNS_RBTFIND_EMPTYDATA,
1776                                                   NULL, NULL);
1777                         if (result == ISC_R_SUCCESS) {
1778                             /*
1779                              * We have found the wildcard node.  If it
1780                              * is active in the search's version, we're
1781                              * done.
1782                              */
1783                             LOCK(&(rbtdb->node_locks[wnode->locknum].lock));
1784                             for (header = wnode->data;
1785                                  header != NULL;
1786                                  header = header->next) {
1787                                     if (header->serial <= search->serial &&
1788                                         !IGNORE(header) && EXISTS(header))
1789                                             break;
1790                             }
1791                             UNLOCK(&(rbtdb->node_locks[wnode->locknum].lock));
1792                             if (header != NULL ||
1793                                 activeempty(search, &wchain, wname)) {
1794                                     if (activeemtpynode(search, qname, wname))
1795                                                 return (ISC_R_NOTFOUND);
1796                                     /*
1797                                      * The wildcard node is active!
1798                                      *
1799                                      * Note: result is still ISC_R_SUCCESS
1800                                      * so we don't have to set it.
1801                                      */
1802                                     *nodep = wnode;
1803                                     break;
1804                             }
1805                         } else if (result != ISC_R_NOTFOUND &&
1806                                    result != DNS_R_PARTIALMATCH) {
1807                                 /*
1808                                  * An error has occurred.  Bail out.
1809                                  */
1810                                 break;
1811                         }
1812                 }
1813
1814                 if (active) {
1815                         /*
1816                          * The level node is active.  Any wildcarding
1817                          * present at higher levels has no
1818                          * effect and we're done.
1819                          */
1820                         result = ISC_R_NOTFOUND;
1821                         break;
1822                 }
1823
1824                 if (i > 0) {
1825                         i--;
1826                         node = search->chain.levels[i];
1827                 } else
1828                         done = ISC_TRUE;
1829         } while (!done);
1830
1831         return (result);
1832 }
1833
1834 static inline isc_result_t
1835 find_closest_nxt(rbtdb_search_t *search, dns_dbnode_t **nodep,
1836                  dns_name_t *foundname, dns_rdataset_t *rdataset,
1837                  dns_rdataset_t *sigrdataset)
1838 {
1839         dns_rbtnode_t *node;
1840         rdatasetheader_t *header, *header_next, *found, *foundsig;
1841         isc_boolean_t empty_node;
1842         isc_result_t result;
1843         dns_fixedname_t fname, forigin;
1844         dns_name_t *name, *origin;
1845
1846         do {
1847                 node = NULL;
1848                 dns_fixedname_init(&fname);
1849                 name = dns_fixedname_name(&fname);
1850                 dns_fixedname_init(&forigin);
1851                 origin = dns_fixedname_name(&forigin);
1852                 result = dns_rbtnodechain_current(&search->chain, name,
1853                                                   origin, &node);
1854                 if (result != ISC_R_SUCCESS)
1855                         return (result);
1856                 LOCK(&(search->rbtdb->node_locks[node->locknum].lock));
1857                 found = NULL;
1858                 foundsig = NULL;
1859                 empty_node = ISC_TRUE;
1860                 for (header = node->data;
1861                      header != NULL;
1862                      header = header_next) {
1863                         header_next = header->next;
1864                         /*
1865                          * Look for an active, extant NXT or SIG NXT.
1866                          */
1867                         do {
1868                                 if (header->serial <= search->serial &&
1869                                     !IGNORE(header)) {
1870                                         /*
1871                                          * Is this a "this rdataset doesn't
1872                                          * exist" record?
1873                                          */
1874                                         if ((header->attributes &
1875                                              RDATASET_ATTR_NONEXISTENT) != 0)
1876                                                 header = NULL;
1877                                         break;
1878                                 } else
1879                                         header = header->down;
1880                         } while (header != NULL);
1881                         if (header != NULL) {
1882                                 /*
1883                                  * We now know that there is at least one
1884                                  * active rdataset at this node.
1885                                  */
1886                                 empty_node = ISC_FALSE;
1887                                 if (header->type == dns_rdatatype_nxt) {
1888                                         found = header;
1889                                         if (foundsig != NULL)
1890                                                 break;
1891                                 } else if (header->type ==
1892                                            RBTDB_RDATATYPE_SIGNXT) {
1893                                         foundsig = header;
1894                                         if (found != NULL)
1895                                                 break;
1896                                 }
1897                         }
1898                 }
1899                 if (!empty_node) {
1900                         if (found != NULL && foundsig != NULL) {
1901                                 /*
1902                                  * We've found the right NXT record.
1903                                  *
1904                                  * Note: for this to really be the right
1905                                  * NXT record, it's essential that the NXT
1906                                  * records of any nodes obscured by a zone
1907                                  * cut have been removed; we assume this is
1908                                  * the case.
1909                                  */
1910                                 result = dns_name_concatenate(name, origin,
1911                                                               foundname, NULL);
1912                                 if (result == ISC_R_SUCCESS) {
1913                                         if (nodep != NULL) {
1914                                                 new_reference(search->rbtdb,
1915                                                               node);
1916                                                 *nodep = node;
1917                                         }
1918                                         bind_rdataset(search->rbtdb, node,
1919                                                       found, search->now,
1920                                                       rdataset);
1921                                         bind_rdataset(search->rbtdb, node,
1922                                                       foundsig, search->now,
1923                                                       sigrdataset);
1924                                 }
1925                         } else if (found == NULL && foundsig == NULL) {
1926                                 /*
1927                                  * This node is active, but has no NXT or
1928                                  * SIG NXT.  That means it's glue or
1929                                  * other obscured zone data that isn't
1930                                  * relevant for our search.  Treat the
1931                                  * node as if it were empty and keep looking.
1932                                  */
1933                                 empty_node = ISC_TRUE;
1934                                 result = dns_rbtnodechain_prev(&search->chain,
1935                                                                NULL, NULL);
1936                         } else {
1937                                 /*
1938                                  * We found an active node, but either the
1939                                  * NXT or the SIG NXT is missing.  This
1940                                  * shouldn't happen.
1941                                  */
1942                                 result = DNS_R_BADDB;
1943                         }
1944                 } else {
1945                         /*
1946                          * This node isn't active.  We've got to keep
1947                          * looking.
1948                          */
1949                         result = dns_rbtnodechain_prev(&search->chain, NULL,
1950                                                        NULL);
1951                 }
1952                 UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock));
1953         } while (empty_node && result == ISC_R_SUCCESS);
1954
1955         /*
1956          * If the result is ISC_R_NOMORE, then we got to the beginning of
1957          * the database and didn't find a NXT record.  This shouldn't
1958          * happen.
1959          */
1960         if (result == ISC_R_NOMORE)
1961                 result = DNS_R_BADDB;
1962
1963         return (result);
1964 }
1965
1966 static isc_result_t
1967 zone_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
1968           dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
1969           dns_dbnode_t **nodep, dns_name_t *foundname,
1970           dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
1971 {
1972         dns_rbtnode_t *node = NULL;
1973         isc_result_t result;
1974         rbtdb_search_t search;
1975         isc_boolean_t cname_ok = ISC_TRUE;
1976         isc_boolean_t close_version = ISC_FALSE;
1977         isc_boolean_t maybe_zonecut = ISC_FALSE;
1978         isc_boolean_t at_zonecut = ISC_FALSE;
1979         isc_boolean_t wild;
1980         isc_boolean_t empty_node;
1981         isc_mutex_t *lock;
1982         rdatasetheader_t *header, *header_next, *found, *nxtheader;
1983         rdatasetheader_t *foundsig, *cnamesig, *nxtsig;
1984         rbtdb_rdatatype_t sigtype;
1985         isc_boolean_t active;
1986         dns_rbtnodechain_t chain;
1987
1988
1989         search.rbtdb = (dns_rbtdb_t *)db;
1990
1991         REQUIRE(VALID_RBTDB(search.rbtdb));
1992
1993         /*
1994          * We don't care about 'now'.
1995          */
1996         UNUSED(now);
1997
1998         /*
1999          * If the caller didn't supply a version, attach to the current
2000          * version.
2001          */
2002         if (version == NULL) {
2003                 currentversion(db, &version);
2004                 close_version = ISC_TRUE;
2005         }
2006
2007         search.rbtversion = version;
2008         search.serial = search.rbtversion->serial;
2009         search.options = options;
2010         search.copy_name = ISC_FALSE;
2011         search.need_cleanup = ISC_FALSE;
2012         search.wild = ISC_FALSE;
2013         search.zonecut = NULL;
2014         dns_fixedname_init(&search.zonecut_name);
2015         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
2016         search.now = 0;
2017
2018         /*
2019          * 'wild' will be true iff. we've matched a wildcard.
2020          */
2021         wild = ISC_FALSE;
2022
2023         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
2024
2025         /*
2026          * Search down from the root of the tree.  If, while going down, we
2027          * encounter a callback node, zone_zonecut_callback() will search the
2028          * rdatasets at the zone cut for active DNAME or NS rdatasets.
2029          */
2030         result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
2031                                   &search.chain, DNS_RBTFIND_EMPTYDATA,
2032                                   zone_zonecut_callback, &search);
2033
2034         if (result == DNS_R_PARTIALMATCH) {
2035         partial_match:
2036                 if (search.zonecut != NULL) {
2037                     result = setup_delegation(&search, nodep, foundname,
2038                                               rdataset, sigrdataset);
2039                     goto tree_exit;
2040                 }
2041
2042                 if (search.wild) {
2043                         /*
2044                          * At least one of the levels in the search chain
2045                          * potentially has a wildcard.  For each such level,
2046                          * we must see if there's a matching wildcard active
2047                          * in the current version.
2048                          */
2049                         result = find_wildcard(&search, &node, name);
2050                         if (result == ISC_R_SUCCESS) {
2051                                 result = dns_name_copy(name, foundname, NULL);
2052                                 if (result != ISC_R_SUCCESS)
2053                                         goto tree_exit;
2054                                 wild = ISC_TRUE;
2055                                 goto found;
2056                         }
2057                         else if (result != ISC_R_NOTFOUND)
2058                                 goto tree_exit;
2059                 }
2060
2061                 chain = search.chain;
2062                 active = activeempty(&search, &chain, name);
2063
2064                 /*
2065                  * If we're here, then the name does not exist, is not
2066                  * beneath a zonecut, and there's no matching wildcard.
2067                  */
2068                 if (search.rbtdb->secure) {
2069                         result = find_closest_nxt(&search, nodep, foundname,
2070                                                   rdataset, sigrdataset);
2071                         if (result == ISC_R_SUCCESS)
2072                                 result = active ? DNS_R_EMPTYNAME :
2073                                                   DNS_R_NXDOMAIN;
2074                 } else
2075                         result = active ? DNS_R_EMPTYNAME : DNS_R_NXDOMAIN;
2076                 goto tree_exit;
2077         } else if (result != ISC_R_SUCCESS)
2078                 goto tree_exit;
2079
2080  found:
2081         /*
2082          * We have found a node whose name is the desired name, or we
2083          * have matched a wildcard.
2084          */
2085
2086         if (search.zonecut != NULL) {
2087                 /*
2088                  * If we're beneath a zone cut, we don't want to look for
2089                  * CNAMEs because they're not legitimate zone glue.
2090                  */
2091                 cname_ok = ISC_FALSE;
2092         } else {
2093                 /*
2094                  * The node may be a zone cut itself.  If it might be one,
2095                  * make sure we check for it later.
2096                  */
2097                 if (node->find_callback &&
2098                     (node != search.rbtdb->origin_node ||
2099                      IS_STUB(search.rbtdb)))
2100                         maybe_zonecut = ISC_TRUE;
2101         }
2102
2103         /*
2104          * Certain DNSSEC types are not subject to CNAME matching
2105          * (RFC 2535, section 2.3.5).
2106          *
2107          * We don't check for SIG, because we don't store SIG records
2108          * directly.
2109          */
2110         if (type == dns_rdatatype_key || type == dns_rdatatype_nxt)
2111                 cname_ok = ISC_FALSE;
2112
2113         /*
2114          * We now go looking for rdata...
2115          */
2116
2117         LOCK(&(search.rbtdb->node_locks[node->locknum].lock));
2118
2119         found = NULL;
2120         foundsig = NULL;
2121         sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_sig, type);
2122         nxtheader = NULL;
2123         nxtsig = NULL;
2124         cnamesig = NULL;
2125         empty_node = ISC_TRUE;
2126         for (header = node->data; header != NULL; header = header_next) {
2127                 header_next = header->next;
2128                 /*
2129                  * Look for an active, extant rdataset.
2130                  */
2131                 do {
2132                         if (header->serial <= search.serial &&
2133                             !IGNORE(header)) {
2134                                 /*
2135                                  * Is this a "this rdataset doesn't
2136                                  * exist" record?
2137                                  */
2138                                 if ((header->attributes &
2139                                      RDATASET_ATTR_NONEXISTENT) != 0)
2140                                         header = NULL;
2141                                 break;
2142                         } else
2143                                 header = header->down;
2144                 } while (header != NULL);
2145                 if (header != NULL) {
2146                         /*
2147                          * We now know that there is at least one active
2148                          * rdataset at this node.
2149                          */
2150                         empty_node = ISC_FALSE;
2151
2152                         /*
2153                          * Do special zone cut handling, if requested.
2154                          */
2155                         if (maybe_zonecut &&
2156                             header->type == dns_rdatatype_ns) {
2157                                 /*
2158                                  * We increment the reference count on node to
2159                                  * ensure that search->zonecut_rdataset will
2160                                  * still be valid later.
2161                                  */
2162                                 new_reference(search.rbtdb, node);
2163                                 search.zonecut = node;
2164                                 search.zonecut_rdataset = header;
2165                                 search.zonecut_sigrdataset = NULL;
2166                                 search.need_cleanup = ISC_TRUE;
2167                                 maybe_zonecut = ISC_FALSE;
2168                                 at_zonecut = ISC_TRUE;
2169                                 if ((search.options & DNS_DBFIND_GLUEOK) == 0
2170                                     && type != dns_rdatatype_nxt
2171                                     && type != dns_rdatatype_key) {
2172                                         /*
2173                                          * Glue is not OK, but any answer we
2174                                          * could return would be glue.  Return
2175                                          * the delegation.
2176                                          */
2177                                         found = NULL;
2178                                         break;
2179                                 }
2180                                 if (found != NULL && foundsig != NULL)
2181                                         break;
2182                         }
2183
2184                         /*
2185                          * If we found a type we were looking for,
2186                          * remember it.
2187                          */
2188                         if (header->type == type ||
2189                             type == dns_rdatatype_any ||
2190                             (header->type == dns_rdatatype_cname &&
2191                              cname_ok)) {
2192                                 /*
2193                                  * We've found the answer!
2194                                  */
2195                                 found = header;
2196                                 if (header->type == dns_rdatatype_cname &&
2197                                     cname_ok) {
2198                                         /*
2199                                          * We may be finding a CNAME instead
2200                                          * of the desired type.
2201                                          *
2202                                          * If we've already got the CNAME SIG,
2203                                          * use it, otherwise change sigtype
2204                                          * so that we find it.
2205                                          */
2206                                         if (cnamesig != NULL)
2207                                                 foundsig = cnamesig;
2208                                         else
2209                                                 sigtype =
2210                                                     RBTDB_RDATATYPE_SIGCNAME;
2211                                 }
2212                                 /*
2213                                  * If we've got all we need, end the search.
2214                                  */
2215                                 if (!maybe_zonecut && foundsig != NULL)
2216                                         break;
2217                         } else if (header->type == sigtype) {
2218                                 /*
2219                                  * We've found the SIG rdataset for our
2220                                  * target type.  Remember it.
2221                                  */
2222                                 foundsig = header;
2223                                 /*
2224                                  * If we've got all we need, end the search.
2225                                  */
2226                                 if (!maybe_zonecut && found != NULL)
2227                                         break;
2228                         } else if (header->type == dns_rdatatype_nxt) {
2229                                 /*
2230                                  * Remember a NXT rdataset even if we're
2231                                  * not specifically looking for it, because
2232                                  * we might need it later.
2233                                  */
2234                                 nxtheader = header;
2235                         } else if (header->type == RBTDB_RDATATYPE_SIGNXT) {
2236                                 /*
2237                                  * If we need the NXT rdataset, we'll also
2238                                  * need its signature.
2239                                  */
2240                                 nxtsig = header;
2241                         } else if (cname_ok &&
2242                                    header->type == RBTDB_RDATATYPE_SIGCNAME) {
2243                                 /*
2244                                  * If we get a CNAME match, we'll also need
2245                                  * its signature.
2246                                  */
2247                                 cnamesig = header;
2248                         }
2249                 }
2250         }
2251
2252         if (empty_node) {
2253                 /*
2254                  * We have an exact match for the name, but there are no
2255                  * active rdatasets in the desired version.  That means that
2256                  * this node doesn't exist in the desired version, and that
2257                  * we really have a partial match.
2258                  */
2259                 if (!wild) {
2260                         UNLOCK(&(search.rbtdb->node_locks[node->locknum].lock));
2261                         goto partial_match;
2262                 }
2263         }
2264
2265         /*
2266          * If we didn't find what we were looking for...
2267          */
2268         if (found == NULL) {
2269                 if (search.zonecut != NULL) {
2270                     /*
2271                      * We were trying to find glue at a node beneath a
2272                      * zone cut, but didn't.
2273                      *
2274                      * Return the delegation.
2275                      */
2276                     UNLOCK(&(search.rbtdb->node_locks[node->locknum].lock));
2277                     result = setup_delegation(&search, nodep, foundname,
2278                                               rdataset, sigrdataset);
2279                     goto tree_exit;
2280                 }
2281                 /*
2282                  * The desired type doesn't exist.
2283                  */
2284                 result = DNS_R_NXRRSET;
2285                 if (search.rbtdb->secure &&
2286                     (nxtheader == NULL || nxtsig == NULL)) {
2287                         /*
2288                          * The zone is secure but there's no NXT,
2289                          * or the NXT has no signature!
2290                          */
2291                         if (!wild) {
2292                                 result = DNS_R_BADDB;
2293                                 goto node_exit;
2294                         }
2295                         UNLOCK(&(search.rbtdb->node_locks[node->locknum].lock));
2296                         result = find_closest_nxt(&search, nodep, foundname,
2297                                                   rdataset, sigrdataset);
2298                         if (result == ISC_R_SUCCESS)
2299                                 result = DNS_R_EMPTYWILD;
2300                         goto tree_exit;
2301                 }
2302                 if (nodep != NULL) {
2303                         new_reference(search.rbtdb, node);
2304                         *nodep = node;
2305                 }
2306                 if (search.rbtdb->secure) {
2307                         bind_rdataset(search.rbtdb, node, nxtheader,
2308                                       0, rdataset);
2309                         bind_rdataset(search.rbtdb, node, nxtsig,
2310                                       0, sigrdataset);
2311                 }
2312                 goto node_exit;
2313         }
2314
2315         /*
2316          * We found what we were looking for, or we found a CNAME.
2317          */
2318
2319         if (type != found->type &&
2320             type != dns_rdatatype_any &&
2321             found->type == dns_rdatatype_cname) {
2322                 /*
2323                  * We weren't doing an ANY query and we found a CNAME instead
2324                  * of the type we were looking for, so we need to indicate
2325                  * that result to the caller.
2326                  */
2327                 result = DNS_R_CNAME;
2328         } else if (search.zonecut != NULL) {
2329                 /*
2330                  * If we're beneath a zone cut, we must indicate that the
2331                  * result is glue, unless we're actually at the zone cut
2332                  * and the type is NXT or KEY.
2333                  */
2334                 if (search.zonecut == node) {
2335                         if (type == dns_rdatatype_nxt ||
2336                             type == dns_rdatatype_key)
2337                                 result = ISC_R_SUCCESS;
2338                         else if (type == dns_rdatatype_any)
2339                                 result = DNS_R_ZONECUT;
2340                         else
2341                                 result = DNS_R_GLUE;
2342                 } else
2343                         result = DNS_R_GLUE;
2344                 /*
2345                  * We might have found data that isn't glue, but was occluded
2346                  * by a dynamic update.  If the caller cares about this, they
2347                  * will have told us to validate glue.
2348                  *
2349                  * XXX We should cache the glue validity state!
2350                  */
2351                 if (result == DNS_R_GLUE &&
2352                     (search.options & DNS_DBFIND_VALIDATEGLUE) != 0 &&
2353                     !valid_glue(&search, foundname, type, node)) {
2354                     UNLOCK(&(search.rbtdb->node_locks[node->locknum].lock));
2355                     result = setup_delegation(&search, nodep, foundname,
2356                                               rdataset, sigrdataset);
2357                     goto tree_exit;
2358                 }
2359         } else {
2360                 /*
2361                  * An ordinary successful query!
2362                  */
2363                 result = ISC_R_SUCCESS;
2364         }
2365
2366         if (nodep != NULL) {
2367                 if (!at_zonecut)
2368                         new_reference(search.rbtdb, node);
2369                 else
2370                         search.need_cleanup = ISC_FALSE;
2371                 *nodep = node;
2372         }
2373
2374         if (type != dns_rdatatype_any) {
2375                 bind_rdataset(search.rbtdb, node, found, 0, rdataset);
2376                 if (foundsig != NULL)
2377                         bind_rdataset(search.rbtdb, node, foundsig, 0,
2378                                       sigrdataset);
2379         }
2380
2381  node_exit:
2382         UNLOCK(&(search.rbtdb->node_locks[node->locknum].lock));
2383
2384  tree_exit:
2385         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
2386
2387         /*
2388          * If we found a zonecut but aren't going to use it, we have to
2389          * let go of it.
2390          */
2391         if (search.need_cleanup) {
2392                 node = search.zonecut;
2393                 lock = &(search.rbtdb->node_locks[node->locknum].lock);
2394
2395                 LOCK(lock);
2396                 INSIST(node->references > 0);
2397                 node->references--;
2398                 if (node->references == 0)
2399                         no_references(search.rbtdb, node, 0,
2400                                       isc_rwlocktype_none);
2401
2402                 UNLOCK(lock);
2403         }
2404
2405         if (close_version)
2406                 closeversion(db, &version, ISC_FALSE);
2407
2408         dns_rbtnodechain_reset(&search.chain);
2409
2410         return (result);
2411 }
2412
2413 static isc_result_t
2414 zone_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
2415                  isc_stdtime_t now, dns_dbnode_t **nodep,
2416                  dns_name_t *foundname,
2417                  dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
2418 {
2419         UNUSED(db);
2420         UNUSED(name);
2421         UNUSED(options);
2422         UNUSED(now);
2423         UNUSED(nodep);
2424         UNUSED(foundname);
2425         UNUSED(rdataset);
2426         UNUSED(sigrdataset);
2427
2428         FATAL_ERROR(__FILE__, __LINE__, "zone_findzonecut() called!");
2429
2430         return (ISC_R_NOTIMPLEMENTED);
2431 }
2432
2433 static isc_result_t
2434 cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
2435         rbtdb_search_t *search = arg;
2436         rdatasetheader_t *header, *header_prev, *header_next;
2437         rdatasetheader_t *dname_header, *sigdname_header;
2438         isc_result_t result;
2439
2440         /* XXX comment */
2441
2442         REQUIRE(search->zonecut == NULL);
2443
2444         /*
2445          * Keep compiler silent.
2446          */
2447         UNUSED(name);
2448
2449         LOCK(&(search->rbtdb->node_locks[node->locknum].lock));
2450
2451         /*
2452          * Look for a DNAME or SIG DNAME rdataset.
2453          */
2454         dname_header = NULL;
2455         sigdname_header = NULL;
2456         header_prev = NULL;
2457         for (header = node->data; header != NULL; header = header_next) {
2458                 header_next = header->next;
2459                 if (header->ttl <= search->now) {
2460                         /*
2461                          * This rdataset is stale.  If no one else is
2462                          * using the node, we can clean it up right
2463                          * now, otherwise we mark it as stale, and
2464                          * the node as dirty, so it will get cleaned
2465                          * up later.
2466                          */
2467                         if (node->references == 0) {
2468                                 INSIST(header->down == NULL);
2469                                 if (header_prev != NULL)
2470                                         header_prev->next =
2471                                                 header->next;
2472                                 else
2473                                         node->data = header->next;
2474                                 free_rdataset(search->rbtdb->common.mctx,
2475                                               header);
2476                         } else {
2477                                 header->attributes |=
2478                                         RDATASET_ATTR_STALE;
2479                                 node->dirty = 1;
2480                                 header_prev = header;
2481                         }
2482                 } else if (header->type == dns_rdatatype_dname &&
2483                            EXISTS(header)) {
2484                         dname_header = header;
2485                         header_prev = header;
2486                 } else if (header->type == RBTDB_RDATATYPE_SIGDNAME &&
2487                          EXISTS(header)) {
2488                         sigdname_header = header;
2489                         header_prev = header;
2490                 } else
2491                         header_prev = header;
2492         }
2493
2494         if (dname_header != NULL &&
2495             (dname_header->trust != dns_trust_pending ||
2496              (search->options & DNS_DBFIND_PENDINGOK) != 0)) {
2497                 /*
2498                  * We increment the reference count on node to ensure that
2499                  * search->zonecut_rdataset will still be valid later.
2500                  */
2501                 new_reference(search->rbtdb, node);
2502                 search->zonecut = node;
2503                 search->zonecut_rdataset = dname_header;
2504                 search->zonecut_sigrdataset = sigdname_header;
2505                 search->need_cleanup = ISC_TRUE;
2506                 result = DNS_R_PARTIALMATCH;
2507         } else
2508                 result = DNS_R_CONTINUE;
2509
2510         UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock));
2511
2512         return (result);
2513 }
2514
2515 static inline isc_result_t
2516 find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node,
2517                      dns_dbnode_t **nodep, dns_name_t *foundname,
2518                      dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
2519 {
2520         unsigned int i;
2521         dns_rbtnode_t *level_node;
2522         rdatasetheader_t *header, *header_prev, *header_next;
2523         rdatasetheader_t *found, *foundsig;
2524         isc_result_t result = ISC_R_NOTFOUND;
2525         dns_name_t name;
2526         dns_rbtdb_t *rbtdb;
2527         isc_boolean_t done;
2528
2529         /*
2530          * Caller must be holding the tree lock.
2531          */
2532
2533         rbtdb = search->rbtdb;
2534         i = search->chain.level_matches;
2535         done = ISC_FALSE;
2536         do {
2537                 LOCK(&(rbtdb->node_locks[node->locknum].lock));
2538
2539                 /*
2540                  * Look for NS and SIG NS rdatasets.
2541                  */
2542                 found = NULL;
2543                 foundsig = NULL;
2544                 header_prev = NULL;
2545                 for (header = node->data;
2546                      header != NULL;
2547                      header = header_next) {
2548                         header_next = header->next;
2549                         if (header->ttl <= search->now) {
2550                                 /*
2551                                  * This rdataset is stale.  If no one else is
2552                                  * using the node, we can clean it up right
2553                                  * now, otherwise we mark it as stale, and
2554                                  * the node as dirty, so it will get cleaned
2555                                  * up later.
2556                                  */
2557                                 if (node->references == 0) {
2558                                         INSIST(header->down == NULL);
2559                                         if (header_prev != NULL)
2560                                                 header_prev->next =
2561                                                         header->next;
2562                                         else
2563                                                 node->data = header->next;
2564                                         free_rdataset(rbtdb->common.mctx,
2565                                                       header);
2566                                 } else {
2567                                         header->attributes |=
2568                                                 RDATASET_ATTR_STALE;
2569                                         node->dirty = 1;
2570                                         header_prev = header;
2571                                 }
2572                         } else if ((header->attributes &
2573                                     RDATASET_ATTR_NONEXISTENT) == 0) {
2574                                 /*
2575                                  * We've found an extant rdataset.  See if
2576                                  * we're interested in it.
2577                                  */
2578                                 if (header->type == dns_rdatatype_ns) {
2579                                         found = header;
2580                                         if (foundsig != NULL)
2581                                                 break;
2582                                 } else if (header->type ==
2583                                            RBTDB_RDATATYPE_SIGNS) {
2584                                         foundsig = header;
2585                                         if (found != NULL)
2586                                                 break;
2587                                 }
2588                                 header_prev = header;
2589                         } else
2590                                 header_prev = header;
2591                 }
2592
2593                 if (found != NULL) {
2594                         /*
2595                          * If we have to set foundname, we do it before
2596                          * anything else.  If we were to set foundname after
2597                          * we had set nodep or bound the rdataset, then we'd
2598                          * have to undo that work if dns_name_concatenate()
2599                          * failed.  By setting foundname first, there's
2600                          * nothing to undo if we have trouble.
2601                          */
2602                         if (foundname != NULL) {
2603                                 dns_name_init(&name, NULL);
2604                                 dns_rbt_namefromnode(node, &name);
2605                                 result = dns_name_copy(&name, foundname, NULL);
2606                                 while (result == ISC_R_SUCCESS && i > 0) {
2607                                         i--;
2608                                         level_node = search->chain.levels[i];
2609                                         dns_name_init(&name, NULL);
2610                                         dns_rbt_namefromnode(level_node,
2611                                                              &name);
2612                                         result =
2613                                                 dns_name_concatenate(foundname,
2614                                                                      &name,
2615                                                                      foundname,
2616                                                                      NULL);
2617                                 }
2618                                 if (result != ISC_R_SUCCESS) {
2619                                         *nodep = NULL;
2620                                         goto node_exit;
2621                                 }
2622                         }
2623                         result = DNS_R_DELEGATION;
2624                         if (nodep != NULL) {
2625                                 new_reference(search->rbtdb, node);
2626                                 *nodep = node;
2627                         }
2628                         bind_rdataset(search->rbtdb, node, found, search->now,
2629                                       rdataset);
2630                         if (foundsig != NULL)
2631                                 bind_rdataset(search->rbtdb, node, foundsig,
2632                                               search->now, sigrdataset);
2633                 }
2634
2635         node_exit:
2636                 UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock));
2637
2638                 if (found == NULL && i > 0) {
2639                         i--;
2640                         node = search->chain.levels[i];
2641                 } else
2642                         done = ISC_TRUE;
2643
2644         } while (!done);
2645
2646         return (result);
2647 }
2648
2649 static isc_result_t
2650 cache_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
2651            dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
2652            dns_dbnode_t **nodep, dns_name_t *foundname,
2653            dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
2654 {
2655         dns_rbtnode_t *node = NULL;
2656         isc_result_t result;
2657         rbtdb_search_t search;
2658         isc_boolean_t cname_ok = ISC_TRUE;
2659         isc_boolean_t empty_node;
2660         isc_mutex_t *lock;
2661         rdatasetheader_t *header, *header_prev, *header_next;
2662         rdatasetheader_t *found, *nsheader;
2663         rdatasetheader_t *foundsig, *nssig, *cnamesig;
2664         rbtdb_rdatatype_t sigtype, nxtype;
2665
2666         UNUSED(version);
2667
2668         search.rbtdb = (dns_rbtdb_t *)db;
2669
2670         REQUIRE(VALID_RBTDB(search.rbtdb));
2671         REQUIRE(version == NULL);
2672
2673         if (now == 0)
2674                 isc_stdtime_get(&now);
2675
2676         search.rbtversion = NULL;
2677         search.serial = 1;
2678         search.options = options;
2679         search.copy_name = ISC_FALSE;
2680         search.need_cleanup = ISC_FALSE;
2681         search.wild = ISC_FALSE;
2682         search.zonecut = NULL;
2683         dns_fixedname_init(&search.zonecut_name);
2684         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
2685         search.now = now;
2686
2687         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
2688
2689         /*
2690          * Search down from the root of the tree.  If, while going down, we
2691          * encounter a callback node, cache_zonecut_callback() will search the
2692          * rdatasets at the zone cut for a DNAME rdataset.
2693          */
2694         result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
2695                                   &search.chain, DNS_RBTFIND_EMPTYDATA,
2696                                   cache_zonecut_callback, &search);
2697
2698         if (result == DNS_R_PARTIALMATCH) {
2699                 if (search.zonecut != NULL) {
2700                     result = setup_delegation(&search, nodep, foundname,
2701                                               rdataset, sigrdataset);
2702                     goto tree_exit;
2703                 } else {
2704                 find_ns:
2705                         result = find_deepest_zonecut(&search, node, nodep,
2706                                                       foundname, rdataset,
2707                                                       sigrdataset);
2708                         goto tree_exit;
2709                 }
2710         } else if (result != ISC_R_SUCCESS)
2711                 goto tree_exit;
2712
2713         /*
2714          * Certain DNSSEC types are not subject to CNAME matching
2715          * (RFC 2535, section 2.3.5).
2716          *
2717          * We don't check for SIG, because we don't store SIG records
2718          * directly.
2719          */
2720         if (type == dns_rdatatype_key || type == dns_rdatatype_nxt)
2721                 cname_ok = ISC_FALSE;
2722
2723         /*
2724          * We now go looking for rdata...
2725          */
2726
2727         LOCK(&(search.rbtdb->node_locks[node->locknum].lock));
2728
2729         found = NULL;
2730         foundsig = NULL;
2731         sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_sig, type);
2732         nxtype = RBTDB_RDATATYPE_VALUE(0, type);
2733         nsheader = NULL;
2734         nssig = NULL;
2735         cnamesig = NULL;
2736         empty_node = ISC_TRUE;
2737         header_prev = NULL;
2738         for (header = node->data; header != NULL; header = header_next) {
2739                 header_next = header->next;
2740                 if (header->ttl <= now) {
2741                         /*
2742                          * This rdataset is stale.  If no one else is using the
2743                          * node, we can clean it up right now, otherwise we
2744                          * mark it as stale, and the node as dirty, so it will
2745                          * get cleaned up later.
2746                          */
2747                         if (node->references == 0) {
2748                                 INSIST(header->down == NULL);
2749                                 if (header_prev != NULL)
2750                                         header_prev->next = header->next;
2751                                 else
2752                                         node->data = header->next;
2753                                 free_rdataset(search.rbtdb->common.mctx,
2754                                               header);
2755                         } else {
2756                                 header->attributes |= RDATASET_ATTR_STALE;
2757                                 node->dirty = 1;
2758                                 header_prev = header;
2759                         }
2760                 } else if ((header->attributes & RDATASET_ATTR_NONEXISTENT)
2761                            == 0) {
2762                         /*
2763                          * We now know that there is at least one active
2764                          * non-stale rdataset at this node.
2765                          */
2766                         empty_node = ISC_FALSE;
2767
2768                         /*
2769                          * If we found a type we were looking for, remember
2770                          * it.
2771                          */
2772                         if (header->type == type ||
2773                             (type == dns_rdatatype_any &&
2774                              RBTDB_RDATATYPE_BASE(header->type) != 0) ||
2775                             (cname_ok && header->type ==
2776                              dns_rdatatype_cname)) {
2777                                 /*
2778                                  * We've found the answer.
2779                                  */
2780                                 found = header;
2781                                 if (header->type == dns_rdatatype_cname &&
2782                                     cname_ok &&
2783                                     cnamesig != NULL) {
2784                                         /*
2785                                          * If we've already got the CNAME SIG,
2786                                          * use it, otherwise change sigtype
2787                                          * so that we find it.
2788                                          */
2789                                         if (cnamesig != NULL)
2790                                                 foundsig = cnamesig;
2791                                         else
2792                                                 sigtype =
2793                                                     RBTDB_RDATATYPE_SIGCNAME;
2794                                         foundsig = cnamesig;
2795                                 }
2796                         } else if (header->type == sigtype) {
2797                                 /*
2798                                  * We've found the SIG rdataset for our
2799                                  * target type.  Remember it.
2800                                  */
2801                                 foundsig = header;
2802                         } else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
2803                                    header->type == nxtype) {
2804                                 /*
2805                                  * We've found a negative cache entry.
2806                                  */
2807                                 found = header;
2808                         } else if (header->type == dns_rdatatype_ns) {
2809                                 /*
2810                                  * Remember a NS rdataset even if we're
2811                                  * not specifically looking for it, because
2812                                  * we might need it later.
2813                                  */
2814                                 nsheader = header;
2815                         } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
2816                                 /*
2817                                  * If we need the NS rdataset, we'll also
2818                                  * need its signature.
2819                                  */
2820                                 nssig = header;
2821                         } else if (cname_ok &&
2822                                    header->type == RBTDB_RDATATYPE_SIGCNAME) {
2823                                 /*
2824                                  * If we get a CNAME match, we'll also need
2825                                  * its signature.
2826                                  */
2827                                 cnamesig = header;
2828                         }
2829                         header_prev = header;
2830                 } else
2831                         header_prev = header;
2832         }
2833
2834         if (empty_node) {
2835                 /*
2836                  * We have an exact match for the name, but there are no
2837                  * extant rdatasets.  That means that this node doesn't
2838                  * meaningfully exist, and that we really have a partial match.
2839                  */
2840                 UNLOCK(&(search.rbtdb->node_locks[node->locknum].lock));
2841                 goto find_ns;
2842         }
2843
2844         /*
2845          * If we didn't find what we were looking for...
2846          */
2847         if (found == NULL ||
2848             (found->trust == dns_trust_glue &&
2849              ((options & DNS_DBFIND_GLUEOK) == 0)) ||
2850             (found->trust == dns_trust_pending &&
2851              ((options & DNS_DBFIND_PENDINGOK) == 0))) {
2852                 /*
2853                  * If there is an NS rdataset at this node, then this is the
2854                  * deepest zone cut.
2855                  */
2856                 if (nsheader != NULL) {
2857                         if (nodep != NULL) {
2858                                 new_reference(search.rbtdb, node);
2859                                 *nodep = node;
2860                         }
2861                         bind_rdataset(search.rbtdb, node, nsheader, search.now,
2862                                       rdataset);
2863                         if (nssig != NULL)
2864                                 bind_rdataset(search.rbtdb, node, nssig,
2865                                               search.now, sigrdataset);
2866                         result = DNS_R_DELEGATION;
2867                         goto node_exit;
2868                 }
2869
2870                 /*
2871                  * Go find the deepest zone cut.
2872                  */
2873                 UNLOCK(&(search.rbtdb->node_locks[node->locknum].lock));
2874                 goto find_ns;
2875         }
2876
2877         /*
2878          * We found what we were looking for, or we found a CNAME.
2879          */
2880
2881         if (nodep != NULL) {
2882                 new_reference(search.rbtdb, node);
2883                 *nodep = node;
2884         }
2885
2886         if (RBTDB_RDATATYPE_BASE(found->type) == 0) {
2887                 /*
2888                  * We found a negative cache entry.
2889                  */
2890                 if (NXDOMAIN(found))
2891                         result = DNS_R_NCACHENXDOMAIN;
2892                 else
2893                         result = DNS_R_NCACHENXRRSET;
2894         } else if (type != found->type &&
2895                    type != dns_rdatatype_any &&
2896                    found->type == dns_rdatatype_cname) {
2897                 /*
2898                  * We weren't doing an ANY query and we found a CNAME instead
2899                  * of the type we were looking for, so we need to indicate
2900                  * that result to the caller.
2901                  */
2902                 result = DNS_R_CNAME;
2903         } else {
2904                 /*
2905                  * An ordinary successful query!
2906                  */
2907                 result = ISC_R_SUCCESS;
2908         }
2909
2910         if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN ||
2911             result == DNS_R_NCACHENXRRSET) {
2912                 bind_rdataset(search.rbtdb, node, found, search.now,
2913                               rdataset);
2914                 if (foundsig != NULL)
2915                         bind_rdataset(search.rbtdb, node, foundsig, search.now,
2916                                       sigrdataset);
2917         }
2918
2919  node_exit:
2920         UNLOCK(&(search.rbtdb->node_locks[node->locknum].lock));
2921
2922  tree_exit:
2923         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
2924
2925         /*
2926          * If we found a zonecut but aren't going to use it, we have to
2927          * let go of it.
2928          */
2929         if (search.need_cleanup) {
2930                 node = search.zonecut;
2931                 lock = &(search.rbtdb->node_locks[node->locknum].lock);
2932
2933                 LOCK(lock);
2934                 INSIST(node->references > 0);
2935                 node->references--;
2936                 if (node->references == 0)
2937                         no_references(search.rbtdb, node, 0,
2938                                       isc_rwlocktype_none);
2939                 UNLOCK(lock);
2940         }
2941
2942         dns_rbtnodechain_reset(&search.chain);
2943
2944         return (result);
2945 }
2946
2947 static isc_result_t
2948 cache_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
2949                   isc_stdtime_t now, dns_dbnode_t **nodep,
2950                   dns_name_t *foundname,
2951                   dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
2952 {
2953         dns_rbtnode_t *node = NULL;
2954         isc_result_t result;
2955         rbtdb_search_t search;
2956         rdatasetheader_t *header, *header_prev, *header_next;
2957         rdatasetheader_t *found, *foundsig;
2958         unsigned int rbtoptions = DNS_RBTFIND_EMPTYDATA;
2959
2960         search.rbtdb = (dns_rbtdb_t *)db;
2961
2962         REQUIRE(VALID_RBTDB(search.rbtdb));
2963
2964         if (now == 0)
2965                 isc_stdtime_get(&now);
2966
2967         search.rbtversion = NULL;
2968         search.serial = 1;
2969         search.options = options;
2970         search.copy_name = ISC_FALSE;
2971         search.need_cleanup = ISC_FALSE;
2972         search.wild = ISC_FALSE;
2973         search.zonecut = NULL;
2974         dns_fixedname_init(&search.zonecut_name);
2975         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
2976         search.now = now;
2977
2978         if ((options & DNS_DBFIND_NOEXACT) != 0)
2979                 rbtoptions |= DNS_RBTFIND_NOEXACT;
2980
2981         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
2982
2983         /*
2984          * Search down from the root of the tree.
2985          */
2986         result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
2987                                   &search.chain, rbtoptions, NULL, &search);
2988
2989         if (result == DNS_R_PARTIALMATCH) {
2990         find_ns:
2991                 result = find_deepest_zonecut(&search, node, nodep, foundname,
2992                                               rdataset, sigrdataset);
2993                 goto tree_exit;
2994         } else if (result != ISC_R_SUCCESS)
2995                 goto tree_exit;
2996
2997         /*
2998          * We now go looking for an NS rdataset at the node.
2999          */
3000
3001         LOCK(&(search.rbtdb->node_locks[node->locknum].lock));
3002
3003         found = NULL;
3004         foundsig = NULL;
3005         header_prev = NULL;
3006         for (header = node->data; header != NULL; header = header_next) {
3007                 header_next = header->next;
3008                 if (header->ttl <= now) {
3009                         /*
3010                          * This rdataset is stale.  If no one else is using the
3011                          * node, we can clean it up right now, otherwise we
3012                          * mark it as stale, and the node as dirty, so it will
3013                          * get cleaned up later.
3014                          */
3015                         if (node->references == 0) {
3016                                 INSIST(header->down == NULL);
3017                                 if (header_prev != NULL)
3018                                         header_prev->next = header->next;
3019                                 else
3020                                         node->data = header->next;
3021                                 free_rdataset(search.rbtdb->common.mctx,
3022                                               header);
3023                         } else {
3024                                 header->attributes |= RDATASET_ATTR_STALE;
3025                                 node->dirty = 1;
3026                                 header_prev = header;
3027                         }
3028                 } else if ((header->attributes & RDATASET_ATTR_NONEXISTENT)
3029                            == 0) {
3030                         /*
3031                          * If we found a type we were looking for, remember
3032                          * it.
3033                          */
3034                         if (header->type == dns_rdatatype_ns) {
3035                                 /*
3036                                  * Remember a NS rdataset even if we're
3037                                  * not specifically looking for it, because
3038                                  * we might need it later.
3039                                  */
3040                                 found = header;
3041                         } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
3042                                 /*
3043                                  * If we need the NS rdataset, we'll also
3044                                  * need its signature.
3045                                  */
3046                                 foundsig = header;
3047                         }
3048                         header_prev = header;
3049                 } else
3050                         header_prev = header;
3051         }
3052
3053         if (found == NULL) {
3054                 /*
3055                  * No NS records here.
3056                  */
3057                 UNLOCK(&(search.rbtdb->node_locks[node->locknum].lock));
3058                 goto find_ns;
3059         }
3060
3061         if (nodep != NULL) {
3062                 new_reference(search.rbtdb, node);
3063                 *nodep = node;
3064         }
3065
3066         bind_rdataset(search.rbtdb, node, found, search.now, rdataset);
3067         if (foundsig != NULL)
3068                 bind_rdataset(search.rbtdb, node, foundsig, search.now,
3069                               sigrdataset);
3070
3071         UNLOCK(&(search.rbtdb->node_locks[node->locknum].lock));
3072
3073  tree_exit:
3074         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3075
3076         INSIST(!search.need_cleanup);
3077
3078         dns_rbtnodechain_reset(&search.chain);
3079
3080         if (result == DNS_R_DELEGATION)
3081                 result = ISC_R_SUCCESS;
3082
3083         return (result);
3084 }
3085
3086 static void
3087 attachnode(dns_db_t *db, dns_dbnode_t *source, dns_dbnode_t **targetp) {
3088         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
3089         dns_rbtnode_t *node = (dns_rbtnode_t *)source;
3090
3091         REQUIRE(VALID_RBTDB(rbtdb));
3092
3093         LOCK(&rbtdb->node_locks[node->locknum].lock);
3094         INSIST(node->references > 0);
3095         node->references++;
3096         INSIST(node->references != 0);                  /* Catch overflow. */
3097         UNLOCK(&rbtdb->node_locks[node->locknum].lock);
3098
3099         *targetp = source;
3100 }
3101
3102 static void
3103 detachnode(dns_db_t *db, dns_dbnode_t **targetp) {
3104         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
3105         dns_rbtnode_t *node;
3106         isc_boolean_t want_free = ISC_FALSE;
3107         isc_boolean_t inactive = ISC_FALSE;
3108         unsigned int locknum;
3109
3110         REQUIRE(VALID_RBTDB(rbtdb));
3111         REQUIRE(targetp != NULL && *targetp != NULL);
3112
3113         node = (dns_rbtnode_t *)(*targetp);
3114         locknum = node->locknum;
3115
3116         LOCK(&rbtdb->node_locks[locknum].lock);
3117
3118         INSIST(node->references > 0);
3119         node->references--;
3120         if (node->references == 0) {
3121                 no_references(rbtdb, node, 0, isc_rwlocktype_none);
3122                 if (rbtdb->node_locks[locknum].references == 0 &&
3123                     rbtdb->node_locks[locknum].exiting)
3124                         inactive = ISC_TRUE;
3125         }
3126
3127         UNLOCK(&rbtdb->node_locks[locknum].lock);
3128
3129         *targetp = NULL;
3130
3131         if (inactive) {
3132                 LOCK(&rbtdb->lock);
3133                 rbtdb->active--;
3134                 if (rbtdb->active == 0)
3135                         want_free = ISC_TRUE;
3136                 UNLOCK(&rbtdb->lock);
3137                 if (want_free) {
3138                         char buf[DNS_NAME_FORMATSIZE];
3139                         if (dns_name_dynamic(&rbtdb->common.origin))
3140                                 dns_name_format(&rbtdb->common.origin, buf,
3141                                                 sizeof(buf));
3142                         else
3143                                 strcpy(buf, "<UNKNOWN>");
3144                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
3145                                       DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
3146                                       "calling free_rbtdb(%s)", buf);
3147                         free_rbtdb(rbtdb, ISC_TRUE, NULL);
3148                 }
3149         }
3150 }
3151
3152 static isc_result_t
3153 expirenode(dns_db_t *db, dns_dbnode_t *node, isc_stdtime_t now) {
3154         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
3155         dns_rbtnode_t *rbtnode = node;
3156         rdatasetheader_t *header;
3157         isc_boolean_t force_expire = ISC_FALSE;
3158         /*
3159          * These are the category and module used by the cache cleaner.
3160          */
3161         isc_boolean_t log = ISC_FALSE;
3162         isc_logcategory_t *category = DNS_LOGCATEGORY_DATABASE;
3163         isc_logmodule_t *module = DNS_LOGMODULE_CACHE;
3164         int level = ISC_LOG_DEBUG(2);
3165         char printname[DNS_NAME_FORMATSIZE];
3166
3167         REQUIRE(VALID_RBTDB(rbtdb));
3168
3169         /*
3170          * Caller must hold a tree lock.
3171          */
3172
3173         if (now == 0)
3174                 isc_stdtime_get(&now);
3175
3176         if (rbtdb->overmem) {
3177                 isc_uint32_t val;
3178
3179                 isc_random_get(&val);
3180                 /*
3181                  * XXXDCL Could stand to have a better policy, like LRU.
3182                  */
3183                 force_expire = ISC_TF(rbtnode->down == NULL && val % 4 == 0);
3184
3185                 /*
3186                  * Note that 'log' can be true IFF rbtdb->overmem is also true.
3187                  * rbtdb->ovemem can currently only be true for cache databases
3188                  * -- hence all of the "overmem cache" log strings.
3189                  */
3190                 log = ISC_TF(isc_log_wouldlog(dns_lctx, level));
3191                 if (log)
3192                         isc_log_write(dns_lctx, category, module, level,
3193                                       "overmem cache: %s %s",
3194                                       force_expire ? "FORCE" : "check",
3195                                       dns_rbt_formatnodename(rbtnode,
3196                                                            printname,
3197                                                            sizeof(printname)));
3198         }
3199
3200         LOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
3201
3202         for (header = rbtnode->data; header != NULL; header = header->next)
3203                 if (header->ttl <= now) {
3204                         /*
3205                          * We don't check if rbtnode->references == 0 and try
3206                          * to free like we do in cache_find(), because
3207                          * rbtnode->references must be non-zero.  This is so
3208                          * because 'node' is an argument to the function.
3209                          */
3210                         header->attributes |= RDATASET_ATTR_STALE;
3211                         rbtnode->dirty = 1;
3212                         if (log)
3213                                 isc_log_write(dns_lctx, category, module,
3214                                               level, "overmem cache: stale %s",
3215                                               printname);
3216                 } else if (force_expire) {
3217                         if (! RETAIN(header)) {
3218                                 header->ttl = 0;
3219                                 header->attributes |= RDATASET_ATTR_STALE;
3220                                 rbtnode->dirty = 1;
3221                         } else if (log) {
3222                                 isc_log_write(dns_lctx, category, module,
3223                                               level, "overmem cache: "
3224                                               "reprieve by RETAIN() %s",
3225                                               printname);
3226                         }
3227                 } else if (rbtdb->overmem && log)
3228                         isc_log_write(dns_lctx, category, module, level,
3229                                       "overmem cache: saved %s", printname);
3230
3231         UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
3232
3233         return (ISC_R_SUCCESS);
3234 }
3235
3236 static void
3237 overmem(dns_db_t *db, isc_boolean_t overmem) {
3238         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
3239
3240         if (IS_CACHE(rbtdb)) {
3241                 rbtdb->overmem = overmem;
3242         }
3243 }
3244
3245 static void
3246 printnode(dns_db_t *db, dns_dbnode_t *node, FILE *out) {
3247         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
3248         dns_rbtnode_t *rbtnode = node;
3249         isc_boolean_t first;
3250
3251         REQUIRE(VALID_RBTDB(rbtdb));
3252
3253         LOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
3254
3255         fprintf(out, "node %p, %u references, locknum = %u\n",
3256                 rbtnode, rbtnode->references, rbtnode->locknum);
3257         if (rbtnode->data != NULL) {
3258                 rdatasetheader_t *current, *top_next;
3259
3260                 for (current = rbtnode->data; current != NULL;
3261                      current = top_next) {
3262                         top_next = current->next;
3263                         first = ISC_TRUE;
3264                         fprintf(out, "\ttype %u", current->type);
3265                         do {
3266                                 if (!first)
3267                                         fprintf(out, "\t");
3268                                 first = ISC_FALSE;
3269                                 fprintf(out,
3270                                         "\tserial = %lu, ttl = %u, "
3271                                         "trust = %u, attributes = %u\n",
3272                                         (unsigned long)current->serial,
3273                                         current->ttl,
3274                                         current->trust,
3275                                         current->attributes);
3276                                 current = current->down;
3277                         } while (current != NULL);
3278                 }
3279         } else
3280                 fprintf(out, "(empty)\n");
3281
3282         UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
3283 }
3284
3285 static isc_result_t
3286 createiterator(dns_db_t *db, isc_boolean_t relative_names,
3287                dns_dbiterator_t **iteratorp)
3288 {
3289         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
3290         rbtdb_dbiterator_t *rbtdbiter;
3291
3292         REQUIRE(VALID_RBTDB(rbtdb));
3293
3294         rbtdbiter = isc_mem_get(rbtdb->common.mctx, sizeof *rbtdbiter);
3295         if (rbtdbiter == NULL)
3296                 return (ISC_R_NOMEMORY);
3297
3298         rbtdbiter->common.methods = &dbiterator_methods;
3299         rbtdbiter->common.db = NULL;
3300         dns_db_attach(db, &rbtdbiter->common.db);
3301         rbtdbiter->common.relative_names = relative_names;
3302         rbtdbiter->common.magic = DNS_DBITERATOR_MAGIC;
3303         rbtdbiter->common.cleaning = ISC_FALSE;
3304         rbtdbiter->paused = ISC_TRUE;
3305         rbtdbiter->tree_locked = isc_rwlocktype_none;
3306         rbtdbiter->result = ISC_R_SUCCESS;
3307         dns_fixedname_init(&rbtdbiter->name);
3308         dns_fixedname_init(&rbtdbiter->origin);
3309         rbtdbiter->node = NULL;
3310         rbtdbiter->delete = 0;
3311         memset(rbtdbiter->deletions, 0, sizeof(rbtdbiter->deletions));
3312         dns_rbtnodechain_init(&rbtdbiter->chain, db->mctx);
3313
3314         *iteratorp = (dns_dbiterator_t *)rbtdbiter;
3315
3316         return (ISC_R_SUCCESS);
3317 }
3318
3319 static isc_result_t
3320 zone_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
3321                   dns_rdatatype_t type, dns_rdatatype_t covers,
3322                   isc_stdtime_t now, dns_rdataset_t *rdataset,
3323                   dns_rdataset_t *sigrdataset)
3324 {
3325         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
3326         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
3327         rdatasetheader_t *header, *header_next, *found, *foundsig;
3328         rbtdb_serial_t serial;
3329         rbtdb_version_t *rbtversion = version;
3330         isc_boolean_t close_version = ISC_FALSE;
3331         rbtdb_rdatatype_t matchtype, sigmatchtype;
3332
3333         REQUIRE(VALID_RBTDB(rbtdb));
3334         REQUIRE(type != dns_rdatatype_any);
3335
3336         if (rbtversion == NULL) {
3337                 currentversion(db, (dns_dbversion_t **) (void *)(&rbtversion));
3338                 close_version = ISC_TRUE;
3339         }
3340         serial = rbtversion->serial;
3341         now = 0;
3342
3343         LOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
3344
3345         found = NULL;
3346         foundsig = NULL;
3347         matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
3348         if (covers == 0)
3349                 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_sig, type);
3350         else
3351                 sigmatchtype = 0;
3352
3353         for (header = rbtnode->data; header != NULL; header = header_next) {
3354                 header_next = header->next;
3355                 do {
3356                         if (header->serial <= serial &&
3357                             !IGNORE(header)) {
3358                                 /*
3359                                  * Is this a "this rdataset doesn't
3360                                  * exist" record?
3361                                  */
3362                                 if ((header->attributes &
3363                                      RDATASET_ATTR_NONEXISTENT) != 0)
3364                                         header = NULL;
3365                                 break;
3366                         } else
3367                                 header = header->down;
3368                 } while (header != NULL);
3369                 if (header != NULL) {
3370                         /*
3371                          * We have an active, extant rdataset.  If it's a
3372                          * type we're looking for, remember it.
3373                          */
3374                         if (header->type == matchtype) {
3375                                 found = header;
3376                                 if (foundsig != NULL)
3377                                         break;
3378                         } else if (header->type == sigmatchtype) {
3379                                 foundsig = header;
3380                                 if (found != NULL)
3381                                         break;
3382                         }
3383                 }
3384         }
3385         if (found != NULL) {
3386                 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
3387                 if (foundsig != NULL)
3388                         bind_rdataset(rbtdb, rbtnode, foundsig, now,
3389                                       sigrdataset);
3390         }
3391
3392         UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
3393
3394         if (close_version)
3395                 closeversion(db, (dns_dbversion_t **) (void *)(&rbtversion),
3396                              ISC_FALSE);
3397
3398         if (found == NULL)
3399                 return (ISC_R_NOTFOUND);
3400
3401         return (ISC_R_SUCCESS);
3402 }
3403
3404 static isc_result_t
3405 cache_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
3406                    dns_rdatatype_t type, dns_rdatatype_t covers,
3407                    isc_stdtime_t now, dns_rdataset_t *rdataset,
3408                    dns_rdataset_t *sigrdataset)
3409 {
3410         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
3411         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
3412         rdatasetheader_t *header, *header_next, *found, *foundsig;
3413         rbtdb_rdatatype_t matchtype, sigmatchtype, nxtype;
3414         isc_result_t result;
3415
3416         REQUIRE(VALID_RBTDB(rbtdb));
3417         REQUIRE(type != dns_rdatatype_any);
3418
3419         UNUSED(version);
3420
3421         result = ISC_R_SUCCESS;
3422
3423         if (now == 0)
3424                 isc_stdtime_get(&now);
3425
3426         LOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
3427
3428         found = NULL;
3429         foundsig = NULL;
3430         matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
3431         nxtype = RBTDB_RDATATYPE_VALUE(0, type);
3432         if (covers == 0)
3433                 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_sig, type);
3434         else
3435                 sigmatchtype = 0;
3436
3437         for (header = rbtnode->data; header != NULL; header = header_next) {
3438                 header_next = header->next;
3439                 if (header->ttl <= now) {
3440                         /*
3441                          * We don't check if rbtnode->references == 0 and try
3442                          * to free like we do in cache_find(), because
3443                          * rbtnode->references must be non-zero.  This is so
3444                          * because 'node' is an argument to the function.
3445                          */
3446                         header->attributes |= RDATASET_ATTR_STALE;
3447                         rbtnode->dirty = 1;
3448                 } else if ((header->attributes & RDATASET_ATTR_NONEXISTENT) ==
3449                            0) {
3450                         if (header->type == matchtype)
3451                                 found = header;
3452                         else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
3453                                  header->type == nxtype)
3454                                 found = header;
3455                         else if (header->type == sigmatchtype)
3456                                 foundsig = header;
3457                 }
3458         }
3459         if (found != NULL) {
3460                 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
3461                 if (foundsig != NULL)
3462                         bind_rdataset(rbtdb, rbtnode, foundsig, now,
3463                                       sigrdataset);
3464         }
3465
3466         UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
3467
3468         if (found == NULL)
3469                 return (ISC_R_NOTFOUND);
3470
3471         if (RBTDB_RDATATYPE_BASE(found->type) == 0) {
3472                 /*
3473                  * We found a negative cache entry.
3474                  */
3475                 if (NXDOMAIN(found))
3476                         result = DNS_R_NCACHENXDOMAIN;
3477                 else
3478                         result = DNS_R_NCACHENXRRSET;
3479         }
3480
3481         return (result);
3482 }
3483
3484 static isc_result_t
3485 allrdatasets(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
3486              isc_stdtime_t now, dns_rdatasetiter_t **iteratorp)
3487 {
3488         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
3489         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
3490         rbtdb_version_t *rbtversion = version;
3491         rbtdb_rdatasetiter_t *iterator;
3492
3493         REQUIRE(VALID_RBTDB(rbtdb));
3494
3495         iterator = isc_mem_get(rbtdb->common.mctx, sizeof *iterator);
3496         if (iterator == NULL)
3497                 return (ISC_R_NOMEMORY);
3498
3499         if ((db->attributes & DNS_DBATTR_CACHE) == 0) {
3500                 now = 0;
3501                 if (rbtversion == NULL)
3502                         currentversion(db,
3503                                  (dns_dbversion_t **) (void *)(&rbtversion));
3504                 else {
3505                         LOCK(&rbtdb->lock);
3506                         INSIST(rbtversion->references > 0);
3507                         rbtversion->references++;
3508                         INSIST(rbtversion->references != 0);
3509                         UNLOCK(&rbtdb->lock);
3510                 }
3511         } else {
3512                 if (now == 0)
3513                         isc_stdtime_get(&now);
3514                 rbtversion = NULL;
3515         }
3516
3517         iterator->common.magic = DNS_RDATASETITER_MAGIC;
3518         iterator->common.methods = &rdatasetiter_methods;
3519         iterator->common.db = db;
3520         iterator->common.node = node;
3521         iterator->common.version = (dns_dbversion_t *)rbtversion;
3522         iterator->common.now = now;
3523
3524         LOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
3525
3526         INSIST(rbtnode->references > 0);
3527         rbtnode->references++;
3528         INSIST(rbtnode->references != 0);
3529         iterator->current = NULL;
3530
3531         UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
3532
3533         *iteratorp = (dns_rdatasetiter_t *)iterator;
3534
3535         return (ISC_R_SUCCESS);
3536 }
3537
3538 static isc_boolean_t
3539 cname_and_other_data(dns_rbtnode_t *node, rbtdb_serial_t serial) {
3540         rdatasetheader_t *header, *header_next;
3541         isc_boolean_t cname, other_data;
3542         dns_rdatatype_t rdtype;
3543
3544         /*
3545          * The caller must hold the node lock.
3546          */
3547
3548         /*
3549          * Look for CNAME and "other data" rdatasets active in our version.
3550          */
3551         cname = ISC_FALSE;
3552         other_data = ISC_FALSE;
3553         for (header = node->data; header != NULL; header = header_next) {
3554                 header_next = header->next;
3555                 if (header->type == dns_rdatatype_cname) {
3556                         /*
3557                          * Look for an active extant CNAME.
3558                          */
3559                         do {
3560                                 if (header->serial <= serial &&
3561                                     !IGNORE(header)) {
3562                                         /*
3563                                          * Is this a "this rdataset doesn't
3564                                          * exist" record?
3565                                          */
3566                                         if (NONEXISTENT(header))
3567                                                 header = NULL;
3568                                         break;
3569                                 } else
3570                                         header = header->down;
3571                         } while (header != NULL);
3572                         if (header != NULL)
3573                                 cname = ISC_TRUE;
3574                 } else {
3575                         /*
3576                          * Look for active extant "other data".
3577                          *
3578                          * "Other data" is any rdataset whose type is not
3579                          * KEY, SIG KEY, NXT, SIG NXT, or SIG CNAME.
3580                          */
3581                         rdtype = RBTDB_RDATATYPE_BASE(header->type);
3582                         if (rdtype == dns_rdatatype_sig)
3583                                 rdtype = RBTDB_RDATATYPE_EXT(header->type);
3584                         if (rdtype != dns_rdatatype_nxt &&
3585                             rdtype != dns_rdatatype_key &&
3586                             rdtype != dns_rdatatype_cname) {
3587                                 /*
3588                                  * We've found a type that isn't
3589                                  * NXT, KEY, CNAME, or one of their
3590                                  * signatures.  Is it active and extant?
3591                                  */
3592                                 do {
3593                                         if (header->serial <= serial &&
3594                                             !IGNORE(header)) {
3595                                                 /*
3596                                                  * Is this a "this rdataset
3597                                                  * doesn't exist" record?
3598                                                  */
3599                                                 if (NONEXISTENT(header))
3600                                                         header = NULL;
3601                                                 break;
3602                                         } else
3603                                                 header = header->down;
3604                                 } while (header != NULL);
3605                                 if (header != NULL)
3606                                         other_data = ISC_TRUE;
3607                         }
3608                 }
3609         }
3610
3611         if (cname && other_data)
3612                 return (ISC_TRUE);
3613
3614         return (ISC_FALSE);
3615 }
3616
3617 static isc_result_t
3618 add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
3619     rdatasetheader_t *newheader, unsigned int options, isc_boolean_t loading,
3620     dns_rdataset_t *addedrdataset, isc_stdtime_t now)
3621 {
3622         rbtdb_changed_t *changed = NULL;
3623         rdatasetheader_t *topheader, *topheader_prev, *header;
3624         unsigned char *merged;
3625         isc_result_t result;
3626         isc_boolean_t header_nx;
3627         isc_boolean_t newheader_nx;
3628         isc_boolean_t merge;
3629         dns_rdatatype_t nxtype, rdtype, covers;
3630         dns_trust_t trust;
3631
3632         /*
3633          * Add an rdatasetheader_t to a node.
3634          */
3635
3636         /*
3637          * Caller must be holding the node lock.
3638          */
3639
3640         if ((options & DNS_DBADD_MERGE) != 0) {
3641                 REQUIRE(rbtversion != NULL);
3642                 merge = ISC_TRUE;
3643         } else
3644                 merge = ISC_FALSE;
3645
3646         if ((options & DNS_DBADD_FORCE) != 0)
3647                 trust = dns_trust_ultimate;
3648         else
3649                 trust = newheader->trust;
3650
3651         if (rbtversion != NULL && !loading) {
3652                 /*
3653                  * We always add a changed record, even if no changes end up
3654                  * being made to this node, because it's harmless and
3655                  * simplifies the code.
3656                  */
3657                 changed = add_changed(rbtdb, rbtversion, rbtnode);
3658                 if (changed == NULL) {
3659                         free_rdataset(rbtdb->common.mctx, newheader);
3660                         return (ISC_R_NOMEMORY);
3661                 }
3662         }
3663
3664         newheader_nx = NONEXISTENT(newheader) ? ISC_TRUE : ISC_FALSE;
3665         topheader_prev = NULL;
3666
3667         nxtype = 0;
3668         if (rbtversion == NULL && !newheader_nx) {
3669                 rdtype = RBTDB_RDATATYPE_BASE(newheader->type);
3670                 if (rdtype == 0) {
3671                         /*
3672                          * We're adding a negative cache entry.
3673                          */
3674                         covers = RBTDB_RDATATYPE_EXT(newheader->type);
3675                         if (covers == dns_rdatatype_any) {
3676                                 /*
3677                                  * We're adding an NXDOMAIN negative cache
3678                                  * entry.
3679                                  *
3680                                  * We make all other data stale so that the
3681                                  * only rdataset that can be found at this
3682                                  * node is the NXDOMAIN negative cache entry.
3683                                  */
3684                                 for (topheader = rbtnode->data;
3685                                      topheader != NULL;
3686                                      topheader = topheader->next) {
3687                                         topheader->ttl = 0;
3688                                         topheader->attributes |=
3689                                                 RDATASET_ATTR_STALE;
3690                                 }
3691                                 rbtnode->dirty = 1;
3692                                 goto find_header;
3693                         }
3694                         nxtype = RBTDB_RDATATYPE_VALUE(covers, 0);
3695                 } else {
3696                         /*
3697                          * We're adding something that isn't a
3698                          * negative cache entry.  Look for an extant
3699                          * non-stale NXDOMAIN negative cache entry.
3700                          */
3701                         for (topheader = rbtnode->data;
3702                              topheader != NULL;
3703                              topheader = topheader->next) {
3704                                 if (NXDOMAIN(topheader))
3705                                         break;
3706                         }
3707                         if (topheader != NULL && EXISTS(topheader) &&
3708                             topheader->ttl > now) {
3709                                 /*
3710                                  * Found one.
3711                                  */
3712                                 if (trust < topheader->trust) {
3713                                         /*
3714                                          * The NXDOMAIN is more trusted.
3715                                          */
3716                                         free_rdataset(rbtdb->common.mctx,
3717                                                       newheader);
3718                                         if (addedrdataset != NULL)
3719                                                 bind_rdataset(rbtdb, rbtnode,
3720                                                               topheader, now,
3721                                                               addedrdataset);
3722                                         return (DNS_R_UNCHANGED);
3723                                 }
3724                                 /*
3725                                  * The new rdataset is better.  Expire the
3726                                  * NXDOMAIN.
3727                                  */
3728                                 topheader->ttl = 0;
3729                                 topheader->attributes |= RDATASET_ATTR_STALE;
3730                                 rbtnode->dirty = 1;
3731                                 topheader = NULL;
3732                                 goto find_header;
3733                         }
3734                         nxtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
3735                 }
3736         }
3737
3738         for (topheader = rbtnode->data;
3739              topheader != NULL;
3740              topheader = topheader->next) {
3741                 if (topheader->type == newheader->type ||
3742                     topheader->type == nxtype)
3743                         break;
3744                 topheader_prev = topheader;
3745         }
3746
3747  find_header:
3748         /*
3749          * If header isn't NULL, we've found the right type.  There may be
3750          * IGNORE rdatasets between the top of the chain and the first real
3751          * data.  We skip over them.
3752          */
3753         header = topheader;
3754         while (header != NULL && IGNORE(header))
3755                 header = header->down;
3756         if (header != NULL) {
3757                 header_nx = NONEXISTENT(header) ? ISC_TRUE : ISC_FALSE;
3758
3759                 /*
3760                  * Deleting an already non-existent rdataset has no effect.
3761                  */
3762                 if (header_nx && newheader_nx) {
3763                         free_rdataset(rbtdb->common.mctx, newheader);
3764                         return (DNS_R_UNCHANGED);
3765                 }
3766
3767                 /*
3768                  * Trying to add an rdataset with lower trust to a cache DB
3769                  * has no effect, provided that the cache data isn't stale.
3770                  */
3771                 if (rbtversion == NULL && trust < header->trust &&
3772                     (header->ttl > now || header_nx)) {
3773                         free_rdataset(rbtdb->common.mctx, newheader);
3774                         if (addedrdataset != NULL)
3775                                 bind_rdataset(rbtdb, rbtnode, header, now,
3776                                               addedrdataset);
3777                         return (DNS_R_UNCHANGED);
3778                 }
3779
3780                 /*
3781                  * Don't merge if a nonexistent rdataset is involved.
3782                  */
3783                 if (merge && (header_nx || newheader_nx))
3784                         merge = ISC_FALSE;
3785
3786                 /*
3787                  * If 'merge' is ISC_TRUE, we'll try to create a new rdataset
3788                  * that is the union of 'newheader' and 'header'.
3789                  */
3790                 if (merge) {
3791                         unsigned int flags = 0;
3792                         INSIST(rbtversion->serial >= header->serial);
3793                         merged = NULL;
3794                         result = ISC_R_SUCCESS;
3795                         
3796                         if ((options & DNS_DBADD_EXACT) != 0)
3797                                 flags |= DNS_RDATASLAB_EXACT;
3798                         if ((options & DNS_DBADD_EXACTTTL) != 0 &&
3799                              newheader->ttl != header->ttl)
3800                                         result = DNS_R_NOTEXACT;
3801                         else if (newheader->ttl != header->ttl)
3802                                 flags |= DNS_RDATASLAB_FORCE;
3803                         if (result == ISC_R_SUCCESS)
3804                                 result = dns_rdataslab_merge(
3805                                              (unsigned char *)header,
3806                                              (unsigned char *)newheader,
3807                                              (unsigned int)(sizeof *newheader),
3808                                              rbtdb->common.mctx,
3809                                              rbtdb->common.rdclass,
3810                                              (dns_rdatatype_t)header->type,
3811                                              flags, &merged);
3812                         if (result == ISC_R_SUCCESS) {
3813                                 /*
3814                                  * If 'header' has the same serial number as
3815                                  * we do, we could clean it up now if we knew
3816                                  * that our caller had no references to it.
3817                                  * We don't know this, however, so we leave it
3818                                  * alone.  It will get cleaned up when
3819                                  * clean_zone_node() runs.
3820                                  */
3821                                 free_rdataset(rbtdb->common.mctx, newheader);
3822                                 newheader = (rdatasetheader_t *)merged;
3823                         } else {
3824                                 free_rdataset(rbtdb->common.mctx, newheader);
3825                                 return (result);
3826                         }
3827                 }
3828                 /*
3829                  * Don't replace existing NS, A and AAAA RRsets
3830                  * in the cache if they are already exist.  This
3831                  * prevents named being locked to old servers.
3832                  */
3833                 if (IS_CACHE(rbtdb) && header->ttl > now &&
3834                     header->type == dns_rdatatype_ns &&
3835                     !header_nx && !newheader_nx &&
3836                     header->trust == newheader->trust &&
3837                     dns_rdataslab_equalx((unsigned char *)header,
3838                                          (unsigned char *)newheader,
3839                                          (unsigned int)(sizeof(*newheader)),
3840                                          rbtdb->common.rdclass,
3841                                          (dns_rdatatype_t)header->type)) {
3842                         /*
3843                          * Honour the new ttl if it is less than the
3844                          * older one.
3845                          */
3846                         if (header->ttl > newheader->ttl)
3847                                 header->ttl = newheader->ttl;
3848                         free_rdataset(rbtdb->common.mctx, newheader);
3849                         if (addedrdataset != NULL)
3850                                 bind_rdataset(rbtdb, rbtnode, header, now,
3851                                               addedrdataset);
3852                         return (ISC_R_SUCCESS);
3853                 }
3854                 if (IS_CACHE(rbtdb) && header->ttl > now &&
3855                     (header->type == dns_rdatatype_a ||
3856                      header->type == dns_rdatatype_aaaa) &&
3857                     !header_nx && !newheader_nx &&
3858                     header->trust == newheader->trust &&
3859                     dns_rdataslab_equal((unsigned char *)header,
3860                                         (unsigned char *)newheader,
3861                                         (unsigned int)(sizeof(*newheader)))) {
3862                         /*
3863                          * Honour the new ttl if it is less than the
3864                          * older one.
3865                          */
3866                         if (header->ttl > newheader->ttl)
3867                                 header->ttl = newheader->ttl;
3868                         free_rdataset(rbtdb->common.mctx, newheader);
3869                         if (addedrdataset != NULL)
3870                                 bind_rdataset(rbtdb, rbtnode, header, now,
3871                                               addedrdataset);
3872                         return (ISC_R_SUCCESS);
3873                 }
3874                 INSIST(rbtversion == NULL ||
3875                        rbtversion->serial >= topheader->serial);
3876                 if (topheader_prev != NULL)
3877                         topheader_prev->next = newheader;
3878                 else
3879                         rbtnode->data = newheader;
3880                 newheader->next = topheader->next;
3881                 if (loading) {
3882                         /*
3883                          * There are no other references to 'header' when
3884                          * loading, so we MAY clean up 'header' now.
3885                          * Since we don't generate changed records when
3886                          * loading, we MUST clean up 'header' now.
3887                          */
3888                         newheader->down = NULL;
3889                         free_rdataset(rbtdb->common.mctx, header);
3890                 } else {
3891                         newheader->down = topheader;
3892                         topheader->next = newheader;
3893                         rbtnode->dirty = 1;
3894                         if (changed != NULL)
3895                                 changed->dirty = ISC_TRUE;
3896                 }
3897         } else {
3898                 /*
3899                  * No non-IGNORED rdatasets of the given type exist at
3900                  * this node.
3901                  */
3902
3903                 /*
3904                  * If we're trying to delete the type, don't bother.
3905                  */
3906                 if (newheader_nx) {
3907                         free_rdataset(rbtdb->common.mctx, newheader);
3908                         return (DNS_R_UNCHANGED);
3909                 }
3910
3911                 if (topheader != NULL) {
3912                         /*
3913                          * We have an list of rdatasets of the given type,
3914                          * but they're all marked IGNORE.  We simply insert
3915                          * the new rdataset at the head of the list.
3916                          *
3917                          * Ignored rdatasets cannot occur during loading, so
3918                          * we INSIST on it.
3919                          */
3920                         INSIST(!loading);
3921                         INSIST(rbtversion == NULL ||
3922                                rbtversion->serial >= topheader->serial);
3923                         if (topheader_prev != NULL)
3924                                 topheader_prev->next = newheader;
3925                         else
3926                                 rbtnode->data = newheader;
3927                         newheader->next = topheader->next;
3928                         newheader->down = topheader;
3929                         topheader->next = newheader;
3930                         rbtnode->dirty = 1;
3931                         if (changed != NULL)
3932                                 changed->dirty = ISC_TRUE;
3933                 } else {
3934                         /*
3935                          * No rdatasets of the given type exist at the node.
3936                          */
3937                         newheader->next = rbtnode->data;
3938                         newheader->down = NULL;
3939                         rbtnode->data = newheader;
3940                 }
3941         }
3942
3943         /*
3944          * Check if the node now contains CNAME and other data.
3945          */
3946         if (rbtversion != NULL &&
3947             cname_and_other_data(rbtnode, rbtversion->serial))
3948                 return (DNS_R_CNAMEANDOTHER);
3949
3950         if (addedrdataset != NULL)
3951                 bind_rdataset(rbtdb, rbtnode, newheader, now, addedrdataset);
3952
3953         return (ISC_R_SUCCESS);
3954 }
3955
3956 static inline isc_boolean_t
3957 delegating_type(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
3958                 rbtdb_rdatatype_t type)
3959 {
3960         if (IS_CACHE(rbtdb)) {
3961                 if (type == dns_rdatatype_dname)
3962                         return (ISC_TRUE);
3963                 else
3964                         return (ISC_FALSE);
3965         } else if (type == dns_rdatatype_dname ||
3966                    (type == dns_rdatatype_ns &&
3967                     (node != rbtdb->origin_node || IS_STUB(rbtdb))))
3968                 return (ISC_TRUE);
3969         return (ISC_FALSE);
3970 }
3971
3972 static isc_result_t
3973 addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
3974             isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options,
3975             dns_rdataset_t *addedrdataset)
3976 {
3977         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
3978         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
3979         rbtdb_version_t *rbtversion = version;
3980         isc_region_t region;
3981         rdatasetheader_t *newheader;
3982         isc_result_t result;
3983         isc_boolean_t delegating;
3984
3985         REQUIRE(VALID_RBTDB(rbtdb));
3986
3987         if (rbtversion == NULL) {
3988                 if (now == 0)
3989                         isc_stdtime_get(&now);
3990         } else
3991                 now = 0;
3992
3993         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
3994                                             &region,
3995                                             sizeof (rdatasetheader_t));
3996         if (result != ISC_R_SUCCESS)
3997                 return (result);
3998
3999         newheader = (rdatasetheader_t *)region.base;
4000         newheader->ttl = rdataset->ttl + now;
4001         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
4002                                                 rdataset->covers);
4003         newheader->attributes = 0;
4004         newheader->trust = rdataset->trust;
4005         if (rbtversion != NULL) {
4006                 newheader->serial = rbtversion->serial;
4007                 now = 0;
4008         } else {
4009                 newheader->serial = 1;
4010                 if ((rdataset->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0)
4011                         newheader->attributes |= RDATASET_ATTR_NXDOMAIN;
4012         }
4013
4014         /*
4015          * If we're adding a delegation type (e.g. NS or DNAME for a zone,
4016          * just DNAME for the cache), then we need to set the callback bit
4017          * on the node, and to do that we must be holding an exclusive lock
4018          * on the tree.
4019          */
4020         if (delegating_type(rbtdb, rbtnode, rdataset->type)) {
4021                 delegating = ISC_TRUE;
4022                 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
4023         } else
4024                 delegating = ISC_FALSE;
4025
4026         LOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
4027
4028         result = add(rbtdb, rbtnode, rbtversion, newheader, options, ISC_FALSE,
4029                      addedrdataset, now);
4030         if (result == ISC_R_SUCCESS && delegating)
4031                 rbtnode->find_callback = 1;
4032
4033         UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
4034
4035         if (delegating)
4036                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
4037
4038         return (result);
4039 }
4040
4041 static isc_result_t
4042 subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
4043                  dns_rdataset_t *rdataset, unsigned int options,
4044                  dns_rdataset_t *newrdataset)
4045 {
4046         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4047         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
4048         rbtdb_version_t *rbtversion = version;
4049         rdatasetheader_t *topheader, *topheader_prev, *header, *newheader;
4050         unsigned char *subresult;
4051         isc_region_t region;
4052         isc_result_t result;
4053         rbtdb_changed_t *changed;
4054
4055         REQUIRE(VALID_RBTDB(rbtdb));
4056
4057         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
4058                                             &region,
4059                                             sizeof (rdatasetheader_t));
4060         if (result != ISC_R_SUCCESS)
4061                 return (result);
4062         newheader = (rdatasetheader_t *)region.base;
4063         newheader->ttl = rdataset->ttl;
4064         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
4065                                                 rdataset->covers);
4066         newheader->attributes = 0;
4067         newheader->serial = rbtversion->serial;
4068         newheader->trust = 0;
4069
4070         LOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
4071
4072         changed = add_changed(rbtdb, rbtversion, rbtnode);
4073         if (changed == NULL) {
4074                 free_rdataset(rbtdb->common.mctx, newheader);
4075                 return (ISC_R_NOMEMORY);
4076         }
4077
4078         topheader_prev = NULL;
4079         for (topheader = rbtnode->data;
4080              topheader != NULL;
4081              topheader = topheader->next) {
4082                 if (topheader->type == newheader->type)
4083                         break;
4084                 topheader_prev = topheader;
4085         }
4086         /*
4087          * If header isn't NULL, we've found the right type.  There may be
4088          * IGNORE rdatasets between the top of the chain and the first real
4089          * data.  We skip over them.
4090          */
4091         header = topheader;
4092         while (header != NULL && IGNORE(header))
4093                 header = header->down;
4094         if (header != NULL && EXISTS(header)) {
4095                 unsigned int flags = 0;
4096                 subresult = NULL;
4097                 result = ISC_R_SUCCESS;
4098                 if ((options & DNS_DBSUB_EXACT) != 0) {
4099                         flags |= DNS_RDATASLAB_EXACT;
4100                         if (newheader->ttl != header->ttl)
4101                                 result = DNS_R_NOTEXACT;
4102                 }
4103                 if (result == ISC_R_SUCCESS)
4104                         result = dns_rdataslab_subtract(
4105                                         (unsigned char *)header,
4106                                         (unsigned char *)newheader,
4107                                         (unsigned int)(sizeof *newheader),
4108                                         rbtdb->common.mctx,
4109                                         rbtdb->common.rdclass,
4110                                         (dns_rdatatype_t)header->type,
4111                                         flags, &subresult);
4112                 if (result == ISC_R_SUCCESS) {
4113                         free_rdataset(rbtdb->common.mctx, newheader);
4114                         newheader = (rdatasetheader_t *)subresult;
4115                         /*
4116                          * We have to set the serial since the rdataslab
4117                          * subtraction routine copies the reserved portion of
4118                          * header, not newheader.
4119                          */
4120                         newheader->serial = rbtversion->serial;
4121                 } else if (result == DNS_R_NXRRSET) {
4122                         /*
4123                          * This subtraction would remove all of the rdata;
4124                          * add a nonexistent header instead.
4125                          */
4126                         free_rdataset(rbtdb->common.mctx, newheader);
4127                         newheader = isc_mem_get(rbtdb->common.mctx,
4128                                                 sizeof *newheader);
4129                         if (newheader == NULL) {
4130                                 result = ISC_R_NOMEMORY;
4131                                 goto unlock;
4132                         }
4133                         newheader->ttl = 0;
4134                         newheader->type = topheader->type;
4135                         newheader->attributes = RDATASET_ATTR_NONEXISTENT;
4136                         newheader->trust = 0;
4137                         newheader->serial = rbtversion->serial;
4138                 } else {
4139                         free_rdataset(rbtdb->common.mctx, newheader);
4140                         goto unlock;
4141                 }
4142
4143                 /*
4144                  * If we're here, we want to link newheader in front of
4145                  * topheader.
4146                  */
4147                 INSIST(rbtversion->serial >= topheader->serial);
4148                 if (topheader_prev != NULL)
4149                         topheader_prev->next = newheader;
4150                 else
4151                         rbtnode->data = newheader;
4152                 newheader->next = topheader->next;
4153                 newheader->down = topheader;
4154                 topheader->next = newheader;
4155                 rbtnode->dirty = 1;
4156                 changed->dirty = ISC_TRUE;
4157         } else {
4158                 /*
4159                  * The rdataset doesn't exist, so we don't need to do anything
4160                  * to satisfy the deletion request.
4161                  */
4162                 free_rdataset(rbtdb->common.mctx, newheader);
4163                 if ((options & DNS_DBSUB_EXACT) != 0)
4164                         result = DNS_R_NOTEXACT;
4165                 else
4166                         result = DNS_R_UNCHANGED;                       
4167         }
4168
4169         if (result == ISC_R_SUCCESS && newrdataset != NULL)
4170                 bind_rdataset(rbtdb, rbtnode, newheader, 0, newrdataset);
4171
4172  unlock:
4173         UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
4174
4175         return (result);
4176 }
4177
4178 static isc_result_t
4179 deleterdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
4180                dns_rdatatype_t type, dns_rdatatype_t covers)
4181 {
4182         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4183         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
4184         rbtdb_version_t *rbtversion = version;
4185         isc_result_t result;
4186         rdatasetheader_t *newheader;
4187
4188         REQUIRE(VALID_RBTDB(rbtdb));
4189
4190         if (type == dns_rdatatype_any)
4191                 return (ISC_R_NOTIMPLEMENTED);
4192         if (type == dns_rdatatype_sig && covers == 0)
4193                 return (ISC_R_NOTIMPLEMENTED);
4194
4195         newheader = isc_mem_get(rbtdb->common.mctx, sizeof *newheader);
4196         if (newheader == NULL)
4197                 return (ISC_R_NOMEMORY);
4198         newheader->ttl = 0;
4199         newheader->type = RBTDB_RDATATYPE_VALUE(type, covers);
4200         newheader->attributes = RDATASET_ATTR_NONEXISTENT;
4201         newheader->trust = 0;
4202         if (rbtversion != NULL)
4203                 newheader->serial = rbtversion->serial;
4204         else
4205                 newheader->serial = 0;
4206
4207         LOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
4208
4209         result = add(rbtdb, rbtnode, rbtversion, newheader, DNS_DBADD_FORCE,
4210                      ISC_FALSE, NULL, 0);
4211
4212         UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
4213
4214         return (result);
4215 }
4216
4217 static isc_result_t
4218 loading_addrdataset(void *arg, dns_name_t *name, dns_rdataset_t *rdataset) {
4219         rbtdb_load_t *loadctx = arg;
4220         dns_rbtdb_t *rbtdb = loadctx->rbtdb;
4221         dns_rbtnode_t *node;
4222         isc_result_t result;
4223         isc_region_t region;
4224         rdatasetheader_t *newheader;
4225
4226         /*
4227          * This routine does no node locking.  See comments in
4228          * 'load' below for more information on loading and
4229          * locking.
4230          */
4231
4232
4233         /*
4234          * SOA records are only allowed at top of zone.
4235          */
4236         if (rdataset->type == dns_rdatatype_soa &&
4237             !IS_CACHE(rbtdb) && !dns_name_equal(name, &rbtdb->common.origin))
4238                 return (DNS_R_NOTZONETOP);
4239
4240         add_empty_wildcards(rbtdb, name);
4241
4242         if (dns_name_iswildcard(name)) {
4243                 /*
4244                  * NS record owners cannot legally be wild cards.
4245                  */
4246                 if (rdataset->type == dns_rdatatype_ns)
4247                         return (DNS_R_INVALIDNS);
4248                 result = add_wildcard_magic(rbtdb, name);
4249                 if (result != ISC_R_SUCCESS)
4250                         return (result);
4251         }
4252
4253         node = NULL;
4254         result = dns_rbt_addnode(rbtdb->tree, name, &node);
4255         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
4256                 return (result);
4257         if (result != ISC_R_EXISTS) {
4258                 dns_name_t foundname;
4259                 dns_name_init(&foundname, NULL);
4260                 dns_rbt_namefromnode(node, &foundname);
4261 #ifdef DNS_RBT_USEHASH
4262                 node->locknum = node->hashval % rbtdb->node_lock_count;
4263 #else
4264                 node->locknum = dns_name_hash(&foundname, ISC_TRUE) %
4265                         rbtdb->node_lock_count;
4266 #endif
4267         }
4268
4269         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
4270                                             &region,
4271                                             sizeof (rdatasetheader_t));
4272         if (result != ISC_R_SUCCESS)
4273                 return (result);
4274         newheader = (rdatasetheader_t *)region.base;
4275         newheader->ttl = rdataset->ttl + loadctx->now; /* XXX overflow check */
4276         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
4277                                                 rdataset->covers);
4278         newheader->attributes = 0;
4279         newheader->trust = rdataset->trust;
4280         newheader->serial = 1;
4281
4282         result = add(rbtdb, node, rbtdb->current_version, newheader,
4283                      DNS_DBADD_MERGE, ISC_TRUE, NULL, 0);
4284         if (result == ISC_R_SUCCESS &&
4285             delegating_type(rbtdb, node, rdataset->type))
4286                 node->find_callback = 1;
4287         else if (result == DNS_R_UNCHANGED)
4288                 result = ISC_R_SUCCESS;
4289
4290         return (result);
4291 }
4292
4293 static isc_result_t
4294 beginload(dns_db_t *db, dns_addrdatasetfunc_t *addp, dns_dbload_t **dbloadp) {
4295         rbtdb_load_t *loadctx;
4296         dns_rbtdb_t *rbtdb;
4297
4298         rbtdb = (dns_rbtdb_t *)db;
4299
4300         REQUIRE(VALID_RBTDB(rbtdb));
4301
4302         loadctx = isc_mem_get(rbtdb->common.mctx, sizeof *loadctx);
4303         if (loadctx == NULL)
4304                 return (ISC_R_NOMEMORY);
4305
4306         loadctx->rbtdb = rbtdb;
4307         if (IS_CACHE(rbtdb))
4308                 isc_stdtime_get(&loadctx->now);
4309         else
4310                 loadctx->now = 0;
4311
4312         LOCK(&rbtdb->lock);
4313
4314         REQUIRE((rbtdb->attributes & (RBTDB_ATTR_LOADED|RBTDB_ATTR_LOADING))
4315                 == 0);
4316         rbtdb->attributes |= RBTDB_ATTR_LOADING;
4317
4318         UNLOCK(&rbtdb->lock);
4319
4320         *addp = loading_addrdataset;
4321         *dbloadp = loadctx;
4322
4323         return (ISC_R_SUCCESS);
4324 }
4325
4326 static isc_boolean_t
4327 iszonesecure(dns_db_t *db, dns_dbnode_t *origin) {
4328         dns_rdataset_t keyset;
4329         dns_rdataset_t nxtset, signxtset;
4330         isc_boolean_t haszonekey = ISC_FALSE;
4331         isc_boolean_t hasnxt = ISC_FALSE;
4332         isc_result_t result;
4333
4334         dns_rdataset_init(&keyset);
4335         result = dns_db_findrdataset(db, origin, NULL, dns_rdatatype_key, 0,
4336                                      0, &keyset, NULL);
4337         if (result == ISC_R_SUCCESS) {
4338                 dns_rdata_t keyrdata = DNS_RDATA_INIT;
4339                 result = dns_rdataset_first(&keyset);
4340                 while (result == ISC_R_SUCCESS) {
4341                         dns_rdataset_current(&keyset, &keyrdata);
4342                         if (dns_zonekey_iszonekey(&keyrdata)) {
4343                                 haszonekey = ISC_TRUE;
4344                                 break;
4345                         }
4346                         result = dns_rdataset_next(&keyset);
4347                 }
4348                 dns_rdataset_disassociate(&keyset);
4349         }
4350         if (!haszonekey)
4351                 return (ISC_FALSE);
4352
4353         dns_rdataset_init(&nxtset);
4354         dns_rdataset_init(&signxtset);
4355         result = dns_db_findrdataset(db, origin, NULL, dns_rdatatype_nxt, 0,
4356                                      0, &nxtset, &signxtset);
4357         if (result == ISC_R_SUCCESS) {
4358                 if (dns_rdataset_isassociated(&signxtset)) {
4359                         hasnxt = ISC_TRUE;
4360                         dns_rdataset_disassociate(&signxtset);
4361                 }
4362                 dns_rdataset_disassociate(&nxtset);
4363         }
4364         return (hasnxt);
4365
4366 }
4367
4368 static isc_result_t
4369 endload(dns_db_t *db, dns_dbload_t **dbloadp) {
4370         rbtdb_load_t *loadctx;
4371         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4372
4373         REQUIRE(VALID_RBTDB(rbtdb));
4374         REQUIRE(dbloadp != NULL);
4375         loadctx = *dbloadp;
4376         REQUIRE(loadctx->rbtdb == rbtdb);
4377
4378         LOCK(&rbtdb->lock);
4379
4380         REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADING) != 0);
4381         REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADED) == 0);
4382
4383         rbtdb->attributes &= ~RBTDB_ATTR_LOADING;
4384         rbtdb->attributes |= RBTDB_ATTR_LOADED;
4385
4386         UNLOCK(&rbtdb->lock);
4387
4388         /*
4389          * If there's a KEY rdataset at the zone origin containing a
4390          * zone key, we consider the zone secure.
4391          */
4392         if (! IS_CACHE(rbtdb))
4393                 rbtdb->secure = iszonesecure(db, rbtdb->origin_node);
4394
4395         *dbloadp = NULL;
4396
4397         isc_mem_put(rbtdb->common.mctx, loadctx, sizeof *loadctx);
4398
4399         return (ISC_R_SUCCESS);
4400 }
4401
4402 static isc_result_t
4403 dump(dns_db_t *db, dns_dbversion_t *version, const char *filename) {
4404         dns_rbtdb_t *rbtdb;
4405
4406         rbtdb = (dns_rbtdb_t *)db;
4407
4408         REQUIRE(VALID_RBTDB(rbtdb));
4409
4410         return (dns_master_dump(rbtdb->common.mctx, db, version,
4411                                 &dns_master_style_default,
4412                                 filename));
4413 }
4414
4415 static void
4416 delete_callback(void *data, void *arg) {
4417         dns_rbtdb_t *rbtdb = arg;
4418         rdatasetheader_t *current, *next;
4419
4420         for (current = data; current != NULL; current = next) {
4421                 next = current->next;
4422                 free_rdataset(rbtdb->common.mctx, current);
4423         }
4424 }
4425
4426 static isc_boolean_t
4427 issecure(dns_db_t *db) {
4428         dns_rbtdb_t *rbtdb;
4429         isc_boolean_t secure;
4430
4431         rbtdb = (dns_rbtdb_t *)db;
4432
4433         REQUIRE(VALID_RBTDB(rbtdb));
4434
4435         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
4436         secure = rbtdb->secure;
4437         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
4438
4439         return (secure);
4440 }
4441
4442 static unsigned int
4443 nodecount(dns_db_t *db) {
4444         dns_rbtdb_t *rbtdb;
4445         unsigned int count;
4446
4447         rbtdb = (dns_rbtdb_t *)db;
4448
4449         REQUIRE(VALID_RBTDB(rbtdb));
4450
4451         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
4452         count = dns_rbt_nodecount(rbtdb->tree);
4453         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
4454
4455         return (count);
4456 }
4457
4458 static void
4459 settask(dns_db_t *db, isc_task_t *task) {
4460         dns_rbtdb_t *rbtdb;
4461
4462         rbtdb = (dns_rbtdb_t *)db;
4463
4464         REQUIRE(VALID_RBTDB(rbtdb));
4465
4466         LOCK(&rbtdb->lock);
4467         if (rbtdb->task != NULL)
4468                 isc_task_detach(&rbtdb->task);
4469         if (task != NULL)
4470                 isc_task_attach(task, &rbtdb->task);
4471         UNLOCK(&rbtdb->lock);
4472 }
4473
4474 static isc_boolean_t
4475 ispersistent(dns_db_t *db) {
4476         UNUSED(db);
4477         return (ISC_FALSE);
4478 }
4479
4480 static dns_dbmethods_t zone_methods = {
4481         attach,
4482         detach,
4483         beginload,
4484         endload,
4485         dump,
4486         currentversion,
4487         newversion,
4488         attachversion,
4489         closeversion,
4490         findnode,
4491         zone_find,
4492         zone_findzonecut,
4493         attachnode,
4494         detachnode,
4495         expirenode,
4496         printnode,
4497         createiterator,
4498         zone_findrdataset,
4499         allrdatasets,
4500         addrdataset,
4501         subtractrdataset,
4502         deleterdataset,
4503         issecure,
4504         nodecount,
4505         ispersistent,
4506         overmem,
4507         settask
4508 };
4509
4510 static dns_dbmethods_t cache_methods = {
4511         attach,
4512         detach,
4513         beginload,
4514         endload,
4515         dump,
4516         currentversion,
4517         newversion,
4518         attachversion,
4519         closeversion,
4520         findnode,
4521         cache_find,
4522         cache_findzonecut,
4523         attachnode,
4524         detachnode,
4525         expirenode,
4526         printnode,
4527         createiterator,
4528         cache_findrdataset,
4529         allrdatasets,
4530         addrdataset,
4531         subtractrdataset,
4532         deleterdataset,
4533         issecure,
4534         nodecount,
4535         ispersistent,
4536         overmem,
4537         settask
4538 };
4539
4540 isc_result_t
4541 #ifdef DNS_RBTDB_VERSION64
4542 dns_rbtdb64_create
4543 #else
4544 dns_rbtdb_create
4545 #endif
4546                 (isc_mem_t *mctx, dns_name_t *origin, dns_dbtype_t type,
4547                  dns_rdataclass_t rdclass, unsigned int argc, char *argv[],
4548                  void *driverarg, dns_db_t **dbp)
4549 {
4550         dns_rbtdb_t *rbtdb;
4551         isc_result_t result;
4552         int i;
4553         dns_name_t name;
4554
4555         /* Keep the compiler happy. */
4556         UNUSED(argc);
4557         UNUSED(argv);
4558         UNUSED(driverarg);
4559
4560         rbtdb = isc_mem_get(mctx, sizeof *rbtdb);
4561         if (rbtdb == NULL)
4562                 return (ISC_R_NOMEMORY);
4563         memset(rbtdb, '\0', sizeof *rbtdb);
4564         dns_name_init(&rbtdb->common.origin, NULL);
4565         rbtdb->common.attributes = 0;
4566         if (type == dns_dbtype_cache) {
4567                 rbtdb->common.methods = &cache_methods;
4568                 rbtdb->common.attributes |= DNS_DBATTR_CACHE;
4569         } else if (type == dns_dbtype_stub) {
4570                 rbtdb->common.methods = &zone_methods;
4571                 rbtdb->common.attributes |= DNS_DBATTR_STUB;
4572         } else
4573                 rbtdb->common.methods = &zone_methods;
4574         rbtdb->common.rdclass = rdclass;
4575         rbtdb->common.mctx = NULL;
4576
4577         result = isc_mutex_init(&rbtdb->lock);
4578         if (result != ISC_R_SUCCESS) {
4579                 isc_mem_put(mctx, rbtdb, sizeof *rbtdb);
4580                 UNEXPECTED_ERROR(__FILE__, __LINE__,
4581                                  "isc_mutex_init() failed: %s",
4582                                  isc_result_totext(result));
4583                 return (ISC_R_UNEXPECTED);
4584         }
4585
4586         result = isc_rwlock_init(&rbtdb->tree_lock, 0, 0);
4587         if (result != ISC_R_SUCCESS) {
4588                 DESTROYLOCK(&rbtdb->lock);
4589                 isc_mem_put(mctx, rbtdb, sizeof *rbtdb);
4590                 UNEXPECTED_ERROR(__FILE__, __LINE__,
4591                                  "isc_rwlock_init() failed: %s",
4592                                  isc_result_totext(result));
4593                 return (ISC_R_UNEXPECTED);
4594         }
4595
4596         INSIST(rbtdb->node_lock_count < (1 << DNS_RBT_LOCKLENGTH));
4597
4598         if (rbtdb->node_lock_count == 0)
4599                 rbtdb->node_lock_count = DEFAULT_NODE_LOCK_COUNT;
4600         rbtdb->node_locks = isc_mem_get(mctx, rbtdb->node_lock_count *
4601                                         sizeof (rbtdb_nodelock_t));
4602         rbtdb->active = rbtdb->node_lock_count;
4603         for (i = 0; i < (int)(rbtdb->node_lock_count); i++) {
4604                 result = isc_mutex_init(&rbtdb->node_locks[i].lock);
4605                 if (result != ISC_R_SUCCESS) {
4606                         i--;
4607                         while (i >= 0) {
4608                                 DESTROYLOCK(&rbtdb->node_locks[i].lock);
4609                                 i--;
4610                         }
4611                         isc_mem_put(mctx, rbtdb->node_locks,
4612                                     rbtdb->node_lock_count *
4613                                     sizeof (rbtdb_nodelock_t));
4614                         isc_rwlock_destroy(&rbtdb->tree_lock);
4615                         DESTROYLOCK(&rbtdb->lock);
4616                         isc_mem_put(mctx, rbtdb, sizeof *rbtdb);
4617                         UNEXPECTED_ERROR(__FILE__, __LINE__,
4618                                          "isc_mutex_init() failed: %s",
4619                                          isc_result_totext(result));
4620                         return (ISC_R_UNEXPECTED);
4621                 }
4622                 rbtdb->node_locks[i].references = 0;
4623                 rbtdb->node_locks[i].exiting = ISC_FALSE;
4624         }
4625
4626         /*
4627          * Attach to the mctx.  The database will persist so long as there
4628          * are references to it, and attaching to the mctx ensures that our
4629          * mctx won't disappear out from under us.
4630          */
4631         isc_mem_attach(mctx, &rbtdb->common.mctx);
4632
4633         /*
4634          * Make a copy of the origin name.
4635          */
4636         result = dns_name_dupwithoffsets(origin, mctx, &rbtdb->common.origin);
4637         if (result != ISC_R_SUCCESS) {
4638                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
4639                 return (result);
4640         }
4641
4642         /*
4643          * Make the Red-Black Tree.
4644          */
4645         result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->tree);
4646         if (result != ISC_R_SUCCESS) {
4647                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
4648                 return (result);
4649         }
4650         /*
4651          * In order to set the node callback bit correctly in zone databases,
4652          * we need to know if the node has the origin name of the zone.
4653          * In loading_addrdataset() we could simply compare the new name
4654          * to the origin name, but this is expensive.  Also, we don't know the
4655          * node name in addrdataset(), so we need another way of knowing the
4656          * zone's top.
4657          *
4658          * We now explicitly create a node for the zone's origin, and then
4659          * we simply remember the node's address.  This is safe, because
4660          * the top-of-zone node can never be deleted, nor can its address
4661          * change.
4662          */
4663         if (! IS_CACHE(rbtdb)) {
4664                 rbtdb->origin_node = NULL;
4665                 result = dns_rbt_addnode(rbtdb->tree, &rbtdb->common.origin,
4666                                          &rbtdb->origin_node);
4667                 if (result != ISC_R_SUCCESS) {
4668                         INSIST(result != ISC_R_EXISTS);
4669                         free_rbtdb(rbtdb, ISC_FALSE, NULL);
4670                         return (result);
4671                 }
4672                 /*
4673                  * We need to give the origin node the right locknum.
4674                  */
4675                 dns_name_init(&name, NULL);
4676                 dns_rbt_namefromnode(rbtdb->origin_node, &name);
4677 #ifdef DNS_RBT_USEHASH
4678                 rbtdb->origin_node->locknum =
4679                         rbtdb->origin_node->hashval %
4680                         rbtdb->node_lock_count;
4681 #else
4682                 rbtdb->origin_node->locknum =
4683                         dns_name_hash(&name, ISC_TRUE) %
4684                         rbtdb->node_lock_count;
4685 #endif
4686         }
4687
4688         /*
4689          * Misc. Initialization.
4690          */
4691         isc_refcount_init(&rbtdb->references, 1);
4692         rbtdb->attributes = 0;
4693         rbtdb->secure = ISC_FALSE;
4694         rbtdb->overmem = ISC_FALSE;
4695         rbtdb->task = NULL;
4696
4697         /*
4698          * Version Initialization.
4699          */
4700         rbtdb->current_serial = 1;
4701         rbtdb->least_serial = 1;
4702         rbtdb->next_serial = 2;
4703         rbtdb->current_version = allocate_version(mctx, 1, 0, ISC_FALSE);
4704         if (rbtdb->current_version == NULL) {
4705                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
4706                 return (ISC_R_NOMEMORY);
4707         }
4708         rbtdb->future_version = NULL;
4709         ISC_LIST_INIT(rbtdb->open_versions);
4710
4711         isc_ondestroy_init(&rbtdb->common.ondest);
4712
4713         rbtdb->common.magic = DNS_DB_MAGIC;
4714         rbtdb->common.impmagic = RBTDB_MAGIC;
4715
4716         *dbp = (dns_db_t *)rbtdb;
4717
4718         return (ISC_R_SUCCESS);
4719 }
4720
4721
4722 /*
4723  * Slabbed Rdataset Methods
4724  */
4725
4726 static void
4727 rdataset_disassociate(dns_rdataset_t *rdataset) {
4728         dns_db_t *db = rdataset->private1;
4729         dns_dbnode_t *node = rdataset->private2;
4730
4731         detachnode(db, &node);
4732 }
4733
4734 static isc_result_t
4735 rdataset_first(dns_rdataset_t *rdataset) {
4736         unsigned char *raw = rdataset->private3;
4737         unsigned int count;
4738
4739         count = raw[0] * 256 + raw[1];
4740         if (count == 0) {
4741                 rdataset->private5 = NULL;
4742                 return (ISC_R_NOMORE);
4743         }
4744         raw += 2;
4745         /*
4746          * The privateuint4 field is the number of rdata beyond the cursor
4747          * position, so we decrement the total count by one before storing
4748          * it.
4749          */
4750         count--;
4751         rdataset->privateuint4 = count;
4752         rdataset->private5 = raw;
4753
4754         return (ISC_R_SUCCESS);
4755 }
4756
4757 static isc_result_t
4758 rdataset_next(dns_rdataset_t *rdataset) {
4759         unsigned int count;
4760         unsigned int length;
4761         unsigned char *raw;
4762
4763         count = rdataset->privateuint4;
4764         if (count == 0)
4765                 return (ISC_R_NOMORE);
4766         count--;
4767         rdataset->privateuint4 = count;
4768         raw = rdataset->private5;
4769         length = raw[0] * 256 + raw[1];
4770         raw += length + 2;
4771         rdataset->private5 = raw;
4772
4773         return (ISC_R_SUCCESS);
4774 }
4775
4776 static void
4777 rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata) {
4778         unsigned char *raw = rdataset->private5;
4779         isc_region_t r;
4780
4781         REQUIRE(raw != NULL);
4782
4783         r.length = raw[0] * 256 + raw[1];
4784         raw += 2;
4785         r.base = raw;
4786         dns_rdata_fromregion(rdata, rdataset->rdclass, rdataset->type, &r);
4787 }
4788
4789 static void
4790 rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target) {
4791         dns_db_t *db = source->private1;
4792         dns_dbnode_t *node = source->private2;
4793         dns_dbnode_t *cloned_node;
4794
4795         attachnode(db, node, &cloned_node);
4796         *target = *source;
4797
4798         /*
4799          * Reset iterator state.
4800          */
4801         target->privateuint4 = 0;
4802         target->private5 = NULL;
4803 }
4804
4805 static unsigned int
4806 rdataset_count(dns_rdataset_t *rdataset) {
4807         unsigned char *raw = rdataset->private3;
4808         unsigned int count;
4809
4810         count = raw[0] * 256 + raw[1];
4811
4812         return (count);
4813 }
4814
4815
4816 /*
4817  * Rdataset Iterator Methods
4818  */
4819
4820 static void
4821 rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp) {
4822         rbtdb_rdatasetiter_t *rbtiterator;
4823
4824         rbtiterator = (rbtdb_rdatasetiter_t *)(*iteratorp);
4825
4826         if (rbtiterator->common.version != NULL)
4827                 closeversion(rbtiterator->common.db,
4828                              &rbtiterator->common.version, ISC_FALSE);
4829         detachnode(rbtiterator->common.db, &rbtiterator->common.node);
4830         isc_mem_put(rbtiterator->common.db->mctx, rbtiterator,
4831                     sizeof *rbtiterator);
4832
4833         *iteratorp = NULL;
4834 }
4835
4836 static isc_result_t
4837 rdatasetiter_first(dns_rdatasetiter_t *iterator) {
4838         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
4839         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
4840         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
4841         rbtdb_version_t *rbtversion = rbtiterator->common.version;
4842         rdatasetheader_t *header, *top_next;
4843         rbtdb_serial_t serial;
4844         isc_stdtime_t now;
4845
4846         if (IS_CACHE(rbtdb)) {
4847                 serial = 1;
4848                 now = rbtiterator->common.now;
4849         } else {
4850                 serial = rbtversion->serial;
4851                 now = 0;
4852         }
4853
4854         LOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
4855
4856         for (header = rbtnode->data; header != NULL; header = top_next) {
4857                 top_next = header->next;
4858                 do {
4859                         if (header->serial <= serial && !IGNORE(header)) {
4860                                 /*
4861                                  * Is this a "this rdataset doesn't exist"
4862                                  * record?  Or is it too old in the cache?
4863                                  *
4864                                  * Note: unlike everywhere else, we
4865                                  * check for now > header->ttl instead
4866                                  * of now >= header->ttl.  This allows
4867                                  * ANY and SIG queries for 0 TTL
4868                                  * rdatasets to work.
4869                                  */
4870                                 if (NONEXISTENT(header) ||
4871                                     (now != 0 && now > header->ttl))
4872                                         header = NULL;
4873                                 break;
4874                         } else
4875                                 header = header->down;
4876                 } while (header != NULL);
4877                 if (header != NULL)
4878                         break;
4879         }
4880
4881         UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
4882
4883         rbtiterator->current = header;
4884
4885         if (header == NULL)
4886                 return (ISC_R_NOMORE);
4887
4888         return (ISC_R_SUCCESS);
4889 }
4890
4891 static isc_result_t
4892 rdatasetiter_next(dns_rdatasetiter_t *iterator) {
4893         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
4894         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
4895         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
4896         rbtdb_version_t *rbtversion = rbtiterator->common.version;
4897         rdatasetheader_t *header, *top_next;
4898         rbtdb_serial_t serial;
4899         isc_stdtime_t now;
4900         rbtdb_rdatatype_t type;
4901
4902         header = rbtiterator->current;
4903         if (header == NULL)
4904                 return (ISC_R_NOMORE);
4905
4906         if (IS_CACHE(rbtdb)) {
4907                 serial = 1;
4908                 now = rbtiterator->common.now;
4909         } else {
4910                 serial = rbtversion->serial;
4911                 now = 0;
4912         }
4913
4914         LOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
4915
4916         type = header->type;
4917         for (header = header->next; header != NULL; header = top_next) {
4918                 top_next = header->next;
4919                 if (header->type != type) {
4920                         do {
4921                                 if (header->serial <= serial &&
4922                                     !IGNORE(header)) {
4923                                         /*
4924                                          * Is this a "this rdataset doesn't
4925                                          * exist" record?
4926                                          *
4927                                          * Note: unlike everywhere else, we
4928                                          * check for now > header->ttl instead
4929                                          * of now >= header->ttl.  This allows
4930                                          * ANY and SIG queries for 0 TTL
4931                                          * rdatasets to work.
4932                                          */
4933                                         if ((header->attributes &
4934                                              RDATASET_ATTR_NONEXISTENT) != 0 ||
4935                                             (now != 0 && now > header->ttl))
4936                                                 header = NULL;
4937                                         break;
4938                                 } else
4939                                         header = header->down;
4940                         } while (header != NULL);
4941                         if (header != NULL)
4942                                 break;
4943                 }
4944         }
4945
4946         UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
4947
4948         rbtiterator->current = header;
4949
4950         if (header == NULL)
4951                 return (ISC_R_NOMORE);
4952
4953         return (ISC_R_SUCCESS);
4954 }
4955
4956 static void
4957 rdatasetiter_current(dns_rdatasetiter_t *iterator, dns_rdataset_t *rdataset) {
4958         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
4959         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
4960         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
4961         rdatasetheader_t *header;
4962
4963         header = rbtiterator->current;
4964         REQUIRE(header != NULL);
4965
4966         LOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
4967
4968         bind_rdataset(rbtdb, rbtnode, header, rbtiterator->common.now,
4969                       rdataset);
4970
4971         UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
4972 }
4973
4974
4975 /*
4976  * Database Iterator Methods
4977  */
4978
4979 static inline void
4980 reference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
4981         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
4982         dns_rbtnode_t *node = rbtdbiter->node;
4983
4984         if (node == NULL)
4985                 return;
4986
4987         INSIST(rbtdbiter->tree_locked != isc_rwlocktype_none);
4988         LOCK(&rbtdb->node_locks[node->locknum].lock);
4989         new_reference(rbtdb, node);
4990         UNLOCK(&rbtdb->node_locks[node->locknum].lock);
4991 }
4992
4993 static inline void
4994 dereference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
4995         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
4996         dns_rbtnode_t *node = rbtdbiter->node;
4997         isc_mutex_t *lock;
4998
4999         if (node == NULL)
5000                 return;
5001
5002         lock = &rbtdb->node_locks[node->locknum].lock;
5003         LOCK(lock);
5004         INSIST(rbtdbiter->node->references > 0);
5005         if (--node->references == 0)
5006                 no_references(rbtdb, node, 0, rbtdbiter->tree_locked);
5007         UNLOCK(lock);
5008
5009         rbtdbiter->node = NULL;
5010 }
5011
5012 static void
5013 flush_deletions(rbtdb_dbiterator_t *rbtdbiter) {
5014         dns_rbtnode_t *node;
5015         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
5016         isc_boolean_t was_read_locked = ISC_FALSE;
5017         isc_mutex_t *lock;
5018         int i;
5019
5020         if (rbtdbiter->delete != 0) {
5021                 /*
5022                  * Note that "%d node of %d in tree" can report things like
5023                  * "flush_deletions: 59 nodes of 41 in tree".  This means
5024                  * That some nodes appear on the deletions list more than
5025                  * once.  Only the last occurence will actually be deleted.
5026                  */
5027                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
5028                               DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
5029                               "flush_deletions: %d nodes of %d in tree",
5030                               rbtdbiter->delete,
5031                               dns_rbt_nodecount(rbtdb->tree));
5032
5033                 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
5034                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
5035                         was_read_locked = ISC_TRUE;
5036                 }
5037                 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
5038                 rbtdbiter->tree_locked = isc_rwlocktype_write;
5039
5040                 for (i = 0; i < rbtdbiter->delete; i++) {
5041                         node = rbtdbiter->deletions[i];
5042                         lock = &rbtdb->node_locks[node->locknum].lock;
5043
5044                         LOCK(lock);
5045                         INSIST(node->references > 0);
5046                         node->references--;
5047                         if (node->references == 0)
5048                                 no_references(rbtdb, node, 0,
5049                                               rbtdbiter->tree_locked);
5050                         UNLOCK(lock);
5051                 }
5052
5053                 rbtdbiter->delete = 0;
5054
5055                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
5056                 if (was_read_locked) {
5057                         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
5058                         rbtdbiter->tree_locked = isc_rwlocktype_read;
5059
5060                 } else {
5061                         rbtdbiter->tree_locked = isc_rwlocktype_none;
5062                 }
5063         }
5064 }
5065
5066 static inline void
5067 resume_iteration(rbtdb_dbiterator_t *rbtdbiter) {
5068         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
5069
5070         REQUIRE(rbtdbiter->paused);
5071         REQUIRE(rbtdbiter->tree_locked == isc_rwlocktype_none);
5072
5073         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
5074         rbtdbiter->tree_locked = isc_rwlocktype_read;
5075
5076         rbtdbiter->paused = ISC_FALSE;
5077 }
5078
5079 static void
5080 dbiterator_destroy(dns_dbiterator_t **iteratorp) {
5081         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)(*iteratorp);
5082         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
5083         dns_db_t *db = NULL;
5084
5085         if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
5086                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
5087                 rbtdbiter->tree_locked = isc_rwlocktype_none;
5088         } else
5089                 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_none);
5090
5091         dereference_iter_node(rbtdbiter);
5092
5093         flush_deletions(rbtdbiter);
5094
5095         dns_db_attach(rbtdbiter->common.db, &db);
5096         dns_db_detach(&rbtdbiter->common.db);
5097
5098         dns_rbtnodechain_reset(&rbtdbiter->chain);
5099         isc_mem_put(db->mctx, rbtdbiter, sizeof(*rbtdbiter));
5100         dns_db_detach(&db);
5101
5102         *iteratorp = NULL;
5103 }
5104
5105 static isc_result_t
5106 dbiterator_first(dns_dbiterator_t *iterator) {
5107         isc_result_t result;
5108         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
5109         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
5110         dns_name_t *name, *origin;
5111
5112         if (rbtdbiter->result != ISC_R_SUCCESS &&
5113             rbtdbiter->result != ISC_R_NOMORE)
5114                 return (rbtdbiter->result);
5115
5116         if (rbtdbiter->paused)
5117                 resume_iteration(rbtdbiter);
5118
5119         dereference_iter_node(rbtdbiter);
5120
5121         name = dns_fixedname_name(&rbtdbiter->name);
5122         origin = dns_fixedname_name(&rbtdbiter->origin);
5123         dns_rbtnodechain_reset(&rbtdbiter->chain);
5124
5125         result = dns_rbtnodechain_first(&rbtdbiter->chain, rbtdb->tree, name,
5126                                         origin);
5127
5128         if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
5129                 result = dns_rbtnodechain_current(&rbtdbiter->chain, NULL,
5130                                                   NULL, &rbtdbiter->node);
5131                 if (result == ISC_R_SUCCESS) {
5132                         rbtdbiter->new_origin = ISC_TRUE;
5133                         reference_iter_node(rbtdbiter);
5134                 }
5135         } else {
5136                 INSIST(result == ISC_R_NOTFOUND);
5137                 result = ISC_R_NOMORE; /* The tree is empty. */
5138         }
5139
5140         rbtdbiter->result = result;
5141
5142         return (result);
5143 }
5144
5145 static isc_result_t
5146 dbiterator_last(dns_dbiterator_t *iterator) {
5147         isc_result_t result;
5148         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
5149         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
5150         dns_name_t *name, *origin;
5151
5152         if (rbtdbiter->result != ISC_R_SUCCESS &&
5153             rbtdbiter->result != ISC_R_NOMORE)
5154                 return (rbtdbiter->result);
5155
5156         if (rbtdbiter->paused)
5157                 resume_iteration(rbtdbiter);
5158
5159         dereference_iter_node(rbtdbiter);
5160
5161         name = dns_fixedname_name(&rbtdbiter->name);
5162         origin = dns_fixedname_name(&rbtdbiter->origin);
5163         dns_rbtnodechain_reset(&rbtdbiter->chain);
5164
5165         result = dns_rbtnodechain_last(&rbtdbiter->chain, rbtdb->tree, name,
5166                                        origin);
5167         if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
5168                 result = dns_rbtnodechain_current(&rbtdbiter->chain, NULL,
5169                                                   NULL, &rbtdbiter->node);
5170                 if (result == ISC_R_SUCCESS) {
5171                         rbtdbiter->new_origin = ISC_TRUE;
5172                         reference_iter_node(rbtdbiter);
5173                 }
5174         } else {
5175                 INSIST(result == ISC_R_NOTFOUND);
5176                 result = ISC_R_NOMORE; /* The tree is empty. */
5177         }
5178
5179         rbtdbiter->result = result;
5180
5181         return (result);
5182 }
5183
5184 static isc_result_t
5185 dbiterator_seek(dns_dbiterator_t *iterator, dns_name_t *name) {
5186         isc_result_t result;
5187         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
5188         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
5189         dns_name_t *iname, *origin;
5190
5191         if (rbtdbiter->result != ISC_R_SUCCESS &&
5192             rbtdbiter->result != ISC_R_NOMORE)
5193                 return (rbtdbiter->result);
5194
5195         if (rbtdbiter->paused)
5196                 resume_iteration(rbtdbiter);
5197
5198         dereference_iter_node(rbtdbiter);
5199
5200         iname = dns_fixedname_name(&rbtdbiter->name);
5201         origin = dns_fixedname_name(&rbtdbiter->origin);
5202         dns_rbtnodechain_reset(&rbtdbiter->chain);
5203
5204         result = dns_rbt_findnode(rbtdb->tree, name, NULL, &rbtdbiter->node,
5205                                   &rbtdbiter->chain, DNS_RBTFIND_EMPTYDATA,
5206                                   NULL, NULL);
5207         if (result == ISC_R_SUCCESS) {
5208                 result = dns_rbtnodechain_current(&rbtdbiter->chain, iname,
5209                                                   origin, NULL);
5210                 if (result == ISC_R_SUCCESS) {
5211                         rbtdbiter->new_origin = ISC_TRUE;
5212                         reference_iter_node(rbtdbiter);
5213                 }
5214
5215         } else if (result == DNS_R_PARTIALMATCH)
5216                 result = ISC_R_NOTFOUND;
5217
5218         rbtdbiter->result = result;
5219
5220         return (result);
5221 }
5222
5223 static isc_result_t
5224 dbiterator_prev(dns_dbiterator_t *iterator) {
5225         isc_result_t result;
5226         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
5227         dns_name_t *name, *origin;
5228
5229         REQUIRE(rbtdbiter->node != NULL);
5230
5231         if (rbtdbiter->result != ISC_R_SUCCESS)
5232                 return (rbtdbiter->result);
5233
5234         if (rbtdbiter->paused)
5235                 resume_iteration(rbtdbiter);
5236
5237         name = dns_fixedname_name(&rbtdbiter->name);
5238         origin = dns_fixedname_name(&rbtdbiter->origin);
5239         result = dns_rbtnodechain_prev(&rbtdbiter->chain, name, origin);
5240
5241         dereference_iter_node(rbtdbiter);
5242
5243         if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
5244                 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
5245                 result = dns_rbtnodechain_current(&rbtdbiter->chain, NULL,
5246                                                   NULL, &rbtdbiter->node);
5247         }
5248
5249         if (result == ISC_R_SUCCESS)
5250                 reference_iter_node(rbtdbiter);
5251
5252         rbtdbiter->result = result;
5253
5254         return (result);
5255 }
5256
5257 static isc_result_t
5258 dbiterator_next(dns_dbiterator_t *iterator) {
5259         isc_result_t result;
5260         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
5261         dns_name_t *name, *origin;
5262
5263         REQUIRE(rbtdbiter->node != NULL);
5264
5265         if (rbtdbiter->result != ISC_R_SUCCESS)
5266                 return (rbtdbiter->result);
5267
5268         if (rbtdbiter->paused)
5269                 resume_iteration(rbtdbiter);
5270
5271         name = dns_fixedname_name(&rbtdbiter->name);
5272         origin = dns_fixedname_name(&rbtdbiter->origin);
5273         result = dns_rbtnodechain_next(&rbtdbiter->chain, name, origin);
5274
5275         dereference_iter_node(rbtdbiter);
5276
5277         if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
5278                 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
5279                 result = dns_rbtnodechain_current(&rbtdbiter->chain, NULL,
5280                                                   NULL, &rbtdbiter->node);
5281         }
5282         if (result == ISC_R_SUCCESS)
5283                 reference_iter_node(rbtdbiter);
5284
5285         rbtdbiter->result = result;
5286
5287         return (result);
5288 }
5289
5290 static isc_result_t
5291 dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep,
5292                    dns_name_t *name)
5293 {
5294         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
5295         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
5296         dns_rbtnode_t *node = rbtdbiter->node;
5297         isc_result_t result;
5298         dns_name_t *nodename = dns_fixedname_name(&rbtdbiter->name);
5299         dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
5300
5301         REQUIRE(rbtdbiter->result == ISC_R_SUCCESS);
5302         REQUIRE(rbtdbiter->node != NULL);
5303
5304         if (rbtdbiter->paused)
5305                 resume_iteration(rbtdbiter);
5306
5307         if (name != NULL) {
5308                 if (rbtdbiter->common.relative_names)
5309                         origin = NULL;
5310                 result = dns_name_concatenate(nodename, origin, name, NULL);
5311                 if (result != ISC_R_SUCCESS)
5312                         return (result);
5313                 if (rbtdbiter->common.relative_names && rbtdbiter->new_origin)
5314                         result = DNS_R_NEWORIGIN;
5315         } else
5316                 result = ISC_R_SUCCESS;
5317
5318         LOCK(&rbtdb->node_locks[node->locknum].lock);
5319         new_reference(rbtdb, node);
5320         UNLOCK(&rbtdb->node_locks[node->locknum].lock);
5321
5322         *nodep = rbtdbiter->node;
5323
5324         if (iterator->cleaning && result == ISC_R_SUCCESS) {
5325                 isc_result_t expire_result;
5326
5327                 /*
5328                  * If the deletion array is full, flush it before trying
5329                  * to expire the current node.  The current node can't
5330                  * fully deleted while the iteration cursor is still on it.
5331                  */
5332                 if (rbtdbiter->delete == DELETION_BATCH_MAX)
5333                         flush_deletions(rbtdbiter);
5334
5335                 expire_result = expirenode(iterator->db, *nodep, 0);
5336
5337                 /*
5338                  * expirenode() currently always returns success.
5339                  */
5340                 if (expire_result == ISC_R_SUCCESS && node->down == NULL) {
5341                         rbtdbiter->deletions[rbtdbiter->delete++] = node;
5342                         LOCK(&rbtdb->node_locks[node->locknum].lock);
5343                         node->references++;
5344                         UNLOCK(&rbtdb->node_locks[node->locknum].lock);
5345                 }
5346         }
5347
5348         return (result);
5349 }
5350
5351 static isc_result_t
5352 dbiterator_pause(dns_dbiterator_t *iterator) {
5353         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
5354         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
5355
5356         if (rbtdbiter->result != ISC_R_SUCCESS &&
5357             rbtdbiter->result != ISC_R_NOMORE)
5358                 return (rbtdbiter->result);
5359
5360         if (rbtdbiter->paused)
5361                 return (ISC_R_SUCCESS);
5362
5363         rbtdbiter->paused = ISC_TRUE;
5364
5365         if (rbtdbiter->tree_locked != isc_rwlocktype_none) {
5366                 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_read);
5367                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
5368                 rbtdbiter->tree_locked = isc_rwlocktype_none;
5369         }
5370
5371         flush_deletions(rbtdbiter);
5372
5373         return (ISC_R_SUCCESS);
5374 }
5375
5376 static isc_result_t
5377 dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name) {
5378         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
5379         dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
5380
5381         if (rbtdbiter->result != ISC_R_SUCCESS)
5382                 return (rbtdbiter->result);
5383
5384         return (dns_name_copy(origin, name, NULL));
5385 }