Merge from vendor branch TCPDUMP:
[dragonfly.git] / contrib / bind-9.3 / lib / dns / rbtdb.c
1 /*
2  * Copyright (C) 2004, 2005  Internet Systems Consortium, Inc. ("ISC")
3  * Copyright (C) 1999-2003  Internet Software Consortium.
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10  * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11  * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12  * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13  * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14  * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15  * PERFORMANCE OF THIS SOFTWARE.
16  */
17
18 /* $Id: rbtdb.c,v 1.168.2.11.2.22 2005/10/14 01:38:48 marka Exp $ */
19
20 /*
21  * Principal Author: Bob Halley
22  */
23
24 #include <config.h>
25
26 #include <isc/event.h>
27 #include <isc/mem.h>
28 #include <isc/print.h>
29 #include <isc/mutex.h>
30 #include <isc/random.h>
31 #include <isc/refcount.h>
32 #include <isc/rwlock.h>
33 #include <isc/string.h>
34 #include <isc/task.h>
35 #include <isc/util.h>
36
37 #include <dns/db.h>
38 #include <dns/dbiterator.h>
39 #include <dns/events.h>
40 #include <dns/fixedname.h>
41 #include <dns/log.h>
42 #include <dns/masterdump.h>
43 #include <dns/rbt.h>
44 #include <dns/rdata.h>
45 #include <dns/rdataset.h>
46 #include <dns/rdatasetiter.h>
47 #include <dns/rdataslab.h>
48 #include <dns/result.h>
49 #include <dns/zonekey.h>
50
51 #ifdef DNS_RBTDB_VERSION64
52 #include "rbtdb64.h"
53 #else
54 #include "rbtdb.h"
55 #endif
56
57 #ifdef DNS_RBTDB_VERSION64
58 #define RBTDB_MAGIC                     ISC_MAGIC('R', 'B', 'D', '8')
59 #else
60 #define RBTDB_MAGIC                     ISC_MAGIC('R', 'B', 'D', '4')
61 #endif
62
63 /*
64  * Note that "impmagic" is not the first four bytes of the struct, so
65  * ISC_MAGIC_VALID cannot be used.
66  */
67 #define VALID_RBTDB(rbtdb)      ((rbtdb) != NULL && \
68                                  (rbtdb)->common.impmagic == RBTDB_MAGIC)
69
70 #ifdef DNS_RBTDB_VERSION64
71 typedef isc_uint64_t                    rbtdb_serial_t;
72 /*
73  * Make casting easier in symbolic debuggers by using different names
74  * for the 64 bit version.
75  */
76 #define dns_rbtdb_t dns_rbtdb64_t
77 #define rdatasetheader_t rdatasetheader64_t
78 #define rbtdb_version_t rbtdb_version64_t
79 #else
80 typedef isc_uint32_t                    rbtdb_serial_t;
81 #endif
82
83 typedef isc_uint32_t                    rbtdb_rdatatype_t;
84
85 #define RBTDB_RDATATYPE_BASE(type)      ((dns_rdatatype_t)((type) & 0xFFFF))
86 #define RBTDB_RDATATYPE_EXT(type)       ((dns_rdatatype_t)((type) >> 16))
87 #define RBTDB_RDATATYPE_VALUE(b, e)     (((e) << 16) | (b))
88
89 #define RBTDB_RDATATYPE_SIGNSEC \
90                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec)
91 #define RBTDB_RDATATYPE_SIGNS \
92                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ns)
93 #define RBTDB_RDATATYPE_SIGCNAME \
94                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_cname)
95 #define RBTDB_RDATATYPE_SIGDNAME \
96                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_dname)
97 #define RBTDB_RDATATYPE_NCACHEANY \
98                 RBTDB_RDATATYPE_VALUE(0, dns_rdatatype_any)
99
100 /*
101  * Allow clients with a virtual time of upto 5 minutes in the past to see
102  * records that would have otherwise have expired.
103  */
104 #define RBTDB_VIRTUAL 300
105
106 struct noqname {
107         dns_name_t name;
108         void *     nsec;
109         void *     nsecsig;
110 };
111
112 typedef struct rdatasetheader {
113         /*
114          * Locked by the owning node's lock.
115          */
116         rbtdb_serial_t                  serial;
117         dns_ttl_t                       ttl;
118         rbtdb_rdatatype_t               type;
119         isc_uint16_t                    attributes;
120         dns_trust_t                     trust;
121         struct noqname                  *noqname;
122         /*
123          * We don't use the LIST macros, because the LIST structure has
124          * both head and tail pointers, and is doubly linked.
125          */
126
127         struct rdatasetheader           *next;
128         /*
129          * If this is the top header for an rdataset, 'next' points
130          * to the top header for the next rdataset (i.e., the next type).
131          * Otherwise, it points up to the header whose down pointer points
132          * at this header.
133          */
134           
135         struct rdatasetheader           *down;
136         /*
137          * Points to the header for the next older version of
138          * this rdataset.
139          */
140
141         isc_uint32_t                    count;
142         /*
143          * Monotonously increased every time this rdataset is bound so that
144          * it is used as the base of the starting point in DNS responses
145          * when the "cyclic" rrset-order is required.  Since the ordering
146          * should not be so crucial, no lock is set for the counter for
147          * performance reasons.
148          */
149 } rdatasetheader_t;
150
151 #define RDATASET_ATTR_NONEXISTENT       0x0001
152 #define RDATASET_ATTR_STALE             0x0002
153 #define RDATASET_ATTR_IGNORE            0x0004
154 #define RDATASET_ATTR_RETAIN            0x0008
155 #define RDATASET_ATTR_NXDOMAIN          0x0010
156
157 /*
158  * XXX
159  * When the cache will pre-expire data (due to memory low or other
160  * situations) before the rdataset's TTL has expired, it MUST
161  * respect the RETAIN bit and not expire the data until its TTL is
162  * expired.
163  */
164
165 #undef IGNORE                   /* WIN32 winbase.h defines this. */
166
167 #define EXISTS(header) \
168         (((header)->attributes & RDATASET_ATTR_NONEXISTENT) == 0)
169 #define NONEXISTENT(header) \
170         (((header)->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
171 #define IGNORE(header) \
172         (((header)->attributes & RDATASET_ATTR_IGNORE) != 0)
173 #define RETAIN(header) \
174         (((header)->attributes & RDATASET_ATTR_RETAIN) != 0)
175 #define NXDOMAIN(header) \
176         (((header)->attributes & RDATASET_ATTR_NXDOMAIN) != 0)
177
178 #define DEFAULT_NODE_LOCK_COUNT         7               /* Should be prime. */
179
180 typedef struct {
181         isc_mutex_t                     lock;
182         /* Locked by lock. */
183         unsigned int                    references;
184         isc_boolean_t                   exiting;
185 } rbtdb_nodelock_t;
186
187 typedef struct rbtdb_changed {
188         dns_rbtnode_t *                 node;
189         isc_boolean_t                   dirty;
190         ISC_LINK(struct rbtdb_changed)  link;
191 } rbtdb_changed_t;
192
193 typedef ISC_LIST(rbtdb_changed_t)       rbtdb_changedlist_t;
194
195 typedef struct rbtdb_version {
196         /* Not locked */
197         rbtdb_serial_t                  serial;
198         /* Locked by database lock. */
199         isc_boolean_t                   writer;
200         unsigned int                    references;
201         isc_boolean_t                   commit_ok;
202         rbtdb_changedlist_t             changed_list;
203         ISC_LINK(struct rbtdb_version)  link;
204 } rbtdb_version_t;
205
206 typedef ISC_LIST(rbtdb_version_t)       rbtdb_versionlist_t;
207
208 typedef struct {
209         /* Unlocked. */
210         dns_db_t                        common;
211         isc_mutex_t                     lock;
212         isc_rwlock_t                    tree_lock;
213         unsigned int                    node_lock_count;
214         rbtdb_nodelock_t *              node_locks;
215         dns_rbtnode_t *                 origin_node;
216         /* Locked by lock. */
217         unsigned int                    active;
218         isc_refcount_t                  references;
219         unsigned int                    attributes;
220         rbtdb_serial_t                  current_serial;
221         rbtdb_serial_t                  least_serial;
222         rbtdb_serial_t                  next_serial;
223         rbtdb_version_t *               current_version;
224         rbtdb_version_t *               future_version;
225         rbtdb_versionlist_t             open_versions;
226         isc_boolean_t                   overmem;
227         isc_task_t *                    task;
228         /* Locked by tree_lock. */
229         dns_rbt_t *                     tree;
230         isc_boolean_t                   secure;
231 } dns_rbtdb_t;
232
233 #define RBTDB_ATTR_LOADED               0x01
234 #define RBTDB_ATTR_LOADING              0x02
235
236 /*
237  * Search Context
238  */
239 typedef struct {
240         dns_rbtdb_t *           rbtdb;
241         rbtdb_version_t *       rbtversion;
242         rbtdb_serial_t          serial;
243         unsigned int            options;
244         dns_rbtnodechain_t      chain;
245         isc_boolean_t           copy_name;
246         isc_boolean_t           need_cleanup;
247         isc_boolean_t           wild;
248         dns_rbtnode_t *         zonecut;
249         rdatasetheader_t *      zonecut_rdataset;
250         rdatasetheader_t *      zonecut_sigrdataset;
251         dns_fixedname_t         zonecut_name;
252         isc_stdtime_t           now;
253 } rbtdb_search_t;
254
255 /*
256  * Load Context
257  */
258 typedef struct {
259         dns_rbtdb_t *           rbtdb;
260         isc_stdtime_t           now;
261 } rbtdb_load_t;
262
263 static void rdataset_disassociate(dns_rdataset_t *rdataset);
264 static isc_result_t rdataset_first(dns_rdataset_t *rdataset);
265 static isc_result_t rdataset_next(dns_rdataset_t *rdataset);
266 static void rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata);
267 static void rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target);
268 static unsigned int rdataset_count(dns_rdataset_t *rdataset);
269 static isc_result_t rdataset_getnoqname(dns_rdataset_t *rdataset,
270                                         dns_name_t *name,
271                                         dns_rdataset_t *nsec,
272                                         dns_rdataset_t *nsecsig);
273
274 static dns_rdatasetmethods_t rdataset_methods = {
275         rdataset_disassociate,
276         rdataset_first,
277         rdataset_next,
278         rdataset_current,
279         rdataset_clone,
280         rdataset_count,
281         NULL,
282         rdataset_getnoqname
283 };
284
285 static void rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp);
286 static isc_result_t rdatasetiter_first(dns_rdatasetiter_t *iterator);
287 static isc_result_t rdatasetiter_next(dns_rdatasetiter_t *iterator);
288 static void rdatasetiter_current(dns_rdatasetiter_t *iterator,
289                                  dns_rdataset_t *rdataset);
290
291 static dns_rdatasetitermethods_t rdatasetiter_methods = {
292         rdatasetiter_destroy,
293         rdatasetiter_first,
294         rdatasetiter_next,
295         rdatasetiter_current
296 };
297
298 typedef struct rbtdb_rdatasetiter {
299         dns_rdatasetiter_t              common;
300         rdatasetheader_t *              current;
301 } rbtdb_rdatasetiter_t;
302
303 static void             dbiterator_destroy(dns_dbiterator_t **iteratorp);
304 static isc_result_t     dbiterator_first(dns_dbiterator_t *iterator);
305 static isc_result_t     dbiterator_last(dns_dbiterator_t *iterator);
306 static isc_result_t     dbiterator_seek(dns_dbiterator_t *iterator,
307                                         dns_name_t *name);
308 static isc_result_t     dbiterator_prev(dns_dbiterator_t *iterator);
309 static isc_result_t     dbiterator_next(dns_dbiterator_t *iterator);
310 static isc_result_t     dbiterator_current(dns_dbiterator_t *iterator,
311                                            dns_dbnode_t **nodep,
312                                            dns_name_t *name);
313 static isc_result_t     dbiterator_pause(dns_dbiterator_t *iterator);
314 static isc_result_t     dbiterator_origin(dns_dbiterator_t *iterator,
315                                           dns_name_t *name);
316
317 static dns_dbiteratormethods_t dbiterator_methods = {
318         dbiterator_destroy,
319         dbiterator_first,
320         dbiterator_last,
321         dbiterator_seek,
322         dbiterator_prev,
323         dbiterator_next,
324         dbiterator_current,
325         dbiterator_pause,
326         dbiterator_origin
327 };
328
329 #define DELETION_BATCH_MAX 64
330
331 /*
332  * If 'paused' is ISC_TRUE, then the tree lock is not being held.
333  */
334 typedef struct rbtdb_dbiterator {
335         dns_dbiterator_t                common;
336         isc_boolean_t                   paused;
337         isc_boolean_t                   new_origin;
338         isc_rwlocktype_t                tree_locked;
339         isc_result_t                    result;
340         dns_fixedname_t                 name;
341         dns_fixedname_t                 origin;
342         dns_rbtnodechain_t              chain;
343         dns_rbtnode_t                   *node;
344         dns_rbtnode_t                   *deletions[DELETION_BATCH_MAX];
345         int                             delete;
346 } rbtdb_dbiterator_t;
347
348
349 #define IS_STUB(rbtdb)  (((rbtdb)->common.attributes & DNS_DBATTR_STUB)  != 0)
350 #define IS_CACHE(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_CACHE) != 0)
351
352 static void free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log,
353                        isc_event_t *event);
354
355 /*
356  * Locking
357  *
358  * If a routine is going to lock more than one lock in this module, then
359  * the locking must be done in the following order:
360  *
361  *      Tree Lock
362  *
363  *      Node Lock       (Only one from the set may be locked at one time by
364  *                       any caller)
365  *
366  *      Database Lock
367  *
368  * Failure to follow this hierarchy can result in deadlock.
369  */
370
371 /*
372  * Deleting Nodes
373  *
374  * Currently there is no deletion of nodes from the database, except when
375  * the database is being destroyed.
376  *
377  * If node deletion is added in the future, then for zone databases the node
378  * for the origin of the zone MUST NOT be deleted.
379  */
380
381
382 /*
383  * DB Routines
384  */
385
386 static void
387 attach(dns_db_t *source, dns_db_t **targetp) {
388         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)source;
389
390         REQUIRE(VALID_RBTDB(rbtdb));
391
392         isc_refcount_increment(&rbtdb->references, NULL);
393
394         *targetp = source;
395 }
396
397 static void
398 free_rbtdb_callback(isc_task_t *task, isc_event_t *event) {
399         dns_rbtdb_t *rbtdb = event->ev_arg;
400
401         UNUSED(task);
402
403         free_rbtdb(rbtdb, ISC_TRUE, event);
404 }
405
406 static void
407 free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log, isc_event_t *event) {
408         unsigned int i;
409         isc_ondestroy_t ondest;
410         isc_result_t result;
411         char buf[DNS_NAME_FORMATSIZE];
412
413         REQUIRE(EMPTY(rbtdb->open_versions));
414         REQUIRE(rbtdb->future_version == NULL);
415
416         if (rbtdb->current_version != NULL)
417                 isc_mem_put(rbtdb->common.mctx, rbtdb->current_version,
418                             sizeof(rbtdb_version_t));
419  again:
420         if (rbtdb->tree != NULL) {
421                 result = dns_rbt_destroy2(&rbtdb->tree,
422                                           (rbtdb->task != NULL) ? 1000 : 0);
423                 if (result == ISC_R_QUOTA) {
424                         INSIST(rbtdb->task != NULL);
425                         if (event == NULL)
426                                 event = isc_event_allocate(rbtdb->common.mctx,
427                                                            NULL,
428                                                          DNS_EVENT_FREESTORAGE,
429                                                            free_rbtdb_callback,
430                                                            rbtdb,
431                                                            sizeof(isc_event_t));
432                         if (event == NULL)
433                                 goto again;
434                         isc_task_send(rbtdb->task, &event);
435                         return;
436                 }
437                 INSIST(result == ISC_R_SUCCESS && rbtdb->tree == NULL);
438         }
439         if (event != NULL)
440                 isc_event_free(&event);
441         if (log) {
442                 if (dns_name_dynamic(&rbtdb->common.origin))
443                         dns_name_format(&rbtdb->common.origin, buf,
444                                         sizeof(buf));
445                 else
446                         strcpy(buf, "<UNKNOWN>");
447                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
448                               DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
449                               "done free_rbtdb(%s)", buf);
450         }
451         if (dns_name_dynamic(&rbtdb->common.origin))
452                 dns_name_free(&rbtdb->common.origin, rbtdb->common.mctx);
453         for (i = 0; i < rbtdb->node_lock_count; i++)
454                 DESTROYLOCK(&rbtdb->node_locks[i].lock);
455         isc_mem_put(rbtdb->common.mctx, rbtdb->node_locks,
456                     rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
457         isc_rwlock_destroy(&rbtdb->tree_lock);
458         isc_refcount_destroy(&rbtdb->references);
459         if (rbtdb->task != NULL)
460                 isc_task_detach(&rbtdb->task);
461         DESTROYLOCK(&rbtdb->lock);
462         rbtdb->common.magic = 0;
463         rbtdb->common.impmagic = 0;
464         ondest = rbtdb->common.ondest;
465         isc_mem_putanddetach(&rbtdb->common.mctx, rbtdb, sizeof(*rbtdb));
466         isc_ondestroy_notify(&ondest, rbtdb);
467 }
468
469 static inline void
470 maybe_free_rbtdb(dns_rbtdb_t *rbtdb) {
471         isc_boolean_t want_free = ISC_FALSE;
472         unsigned int i;
473         unsigned int inactive = 0;
474
475         /* XXX check for open versions here */
476
477         /*
478          * Even though there are no external direct references, there still
479          * may be nodes in use.
480          */
481         for (i = 0; i < rbtdb->node_lock_count; i++) {
482                 LOCK(&rbtdb->node_locks[i].lock);
483                 rbtdb->node_locks[i].exiting = ISC_TRUE;
484                 if (rbtdb->node_locks[i].references == 0)
485                         inactive++;
486                 UNLOCK(&rbtdb->node_locks[i].lock);
487         }
488
489         if (inactive != 0) {
490                 LOCK(&rbtdb->lock);
491                 rbtdb->active -= inactive;
492                 if (rbtdb->active == 0)
493                         want_free = ISC_TRUE;
494                 UNLOCK(&rbtdb->lock);
495                 if (want_free) {
496                         char buf[DNS_NAME_FORMATSIZE];
497                         if (dns_name_dynamic(&rbtdb->common.origin))
498                                 dns_name_format(&rbtdb->common.origin, buf,
499                                                 sizeof(buf));
500                         else
501                                 strcpy(buf, "<UNKNOWN>");
502                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
503                                       DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
504                                       "calling free_rbtdb(%s)", buf);
505                         free_rbtdb(rbtdb, ISC_TRUE, NULL);
506                 }
507         }
508 }
509
510 static void
511 detach(dns_db_t **dbp) {
512         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(*dbp);
513         unsigned int refs;
514
515         REQUIRE(VALID_RBTDB(rbtdb));
516
517         isc_refcount_decrement(&rbtdb->references, &refs);
518
519         if (refs == 0)
520                 maybe_free_rbtdb(rbtdb);
521
522         *dbp = NULL;
523 }
524
525 static void
526 currentversion(dns_db_t *db, dns_dbversion_t **versionp) {
527         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
528         rbtdb_version_t *version;
529
530         REQUIRE(VALID_RBTDB(rbtdb));
531
532         LOCK(&rbtdb->lock);
533         version = rbtdb->current_version;
534         if (version->references == 0)
535                 PREPEND(rbtdb->open_versions, version, link);
536         version->references++;
537         UNLOCK(&rbtdb->lock);
538
539         *versionp = (dns_dbversion_t *)version;
540 }
541
542 static inline rbtdb_version_t *
543 allocate_version(isc_mem_t *mctx, rbtdb_serial_t serial,
544                  unsigned int references, isc_boolean_t writer)
545 {
546         rbtdb_version_t *version;
547
548         version = isc_mem_get(mctx, sizeof(*version));
549         if (version == NULL)
550                 return (NULL);
551         version->serial = serial;
552         version->references = references;
553         version->writer = writer;
554         version->commit_ok = ISC_FALSE;
555         ISC_LIST_INIT(version->changed_list);
556         ISC_LINK_INIT(version, link);
557
558         return (version);
559 }
560
561 static isc_result_t
562 newversion(dns_db_t *db, dns_dbversion_t **versionp) {
563         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
564         rbtdb_version_t *version;
565
566         REQUIRE(VALID_RBTDB(rbtdb));
567         REQUIRE(versionp != NULL && *versionp == NULL);
568         REQUIRE(rbtdb->future_version == NULL);
569
570         LOCK(&rbtdb->lock);
571         RUNTIME_CHECK(rbtdb->next_serial != 0);         /* XXX Error? */
572         version = allocate_version(rbtdb->common.mctx, rbtdb->next_serial, 1,
573                                    ISC_TRUE);
574         if (version != NULL) {
575                 version->commit_ok = ISC_TRUE;
576                 rbtdb->next_serial++;
577                 rbtdb->future_version = version;
578         }
579         UNLOCK(&rbtdb->lock);
580
581         if (version == NULL)
582                 return (ISC_R_NOMEMORY);
583
584         *versionp = version;
585
586         return (ISC_R_SUCCESS);
587 }
588
589 static void
590 attachversion(dns_db_t *db, dns_dbversion_t *source,
591               dns_dbversion_t **targetp)
592 {
593         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
594         rbtdb_version_t *rbtversion = source;
595
596         REQUIRE(VALID_RBTDB(rbtdb));
597
598         LOCK(&rbtdb->lock);
599
600         INSIST(rbtversion->references > 0);
601         rbtversion->references++;
602         INSIST(rbtversion->references != 0);
603
604         UNLOCK(&rbtdb->lock);
605
606         *targetp = rbtversion;
607 }
608
609 static rbtdb_changed_t *
610 add_changed(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
611             dns_rbtnode_t *node)
612 {
613         rbtdb_changed_t *changed;
614
615         /*
616          * Caller must be holding the node lock.
617          */
618
619         changed = isc_mem_get(rbtdb->common.mctx, sizeof(*changed));
620
621         LOCK(&rbtdb->lock);
622
623         REQUIRE(version->writer);
624
625         if (changed != NULL) {
626                 INSIST(node->references > 0);
627                 node->references++;
628                 INSIST(node->references != 0);
629                 changed->node = node;
630                 changed->dirty = ISC_FALSE;
631                 ISC_LIST_INITANDAPPEND(version->changed_list, changed, link);
632         } else
633                 version->commit_ok = ISC_FALSE;
634
635         UNLOCK(&rbtdb->lock);
636
637         return (changed);
638 }
639
640 static inline void
641 free_noqname(isc_mem_t *mctx, struct noqname **noqname) {
642
643         if (dns_name_dynamic(&(*noqname)->name))
644                 dns_name_free(&(*noqname)->name, mctx);
645         if ((*noqname)->nsec != NULL)
646                 isc_mem_put(mctx, (*noqname)->nsec,
647                             dns_rdataslab_size((*noqname)->nsec, 0));
648         if ((*noqname)->nsec != NULL)
649                 isc_mem_put(mctx, (*noqname)->nsecsig,
650                             dns_rdataslab_size((*noqname)->nsecsig, 0));
651         isc_mem_put(mctx, *noqname, sizeof(**noqname));
652         *noqname = NULL;
653 }
654
655 static inline void
656 free_rdataset(isc_mem_t *mctx, rdatasetheader_t *rdataset) {
657         unsigned int size;
658
659         if (rdataset->noqname != NULL)
660                 free_noqname(mctx, &rdataset->noqname);
661         
662         if ((rdataset->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
663                 size = sizeof(*rdataset);
664         else
665                 size = dns_rdataslab_size((unsigned char *)rdataset,
666                                           sizeof(*rdataset));
667         isc_mem_put(mctx, rdataset, size);
668 }
669
670 static inline void
671 rollback_node(dns_rbtnode_t *node, rbtdb_serial_t serial) {
672         rdatasetheader_t *header, *dcurrent;
673         isc_boolean_t make_dirty = ISC_FALSE;
674
675         /*
676          * Caller must hold the node lock.
677          */
678
679         /*
680          * We set the IGNORE attribute on rdatasets with serial number
681          * 'serial'.  When the reference count goes to zero, these rdatasets
682          * will be cleaned up; until that time, they will be ignored.
683          */
684         for (header = node->data; header != NULL; header = header->next) {
685                 if (header->serial == serial) {
686                         header->attributes |= RDATASET_ATTR_IGNORE;
687                         make_dirty = ISC_TRUE;
688                 }
689                 for (dcurrent = header->down;
690                      dcurrent != NULL;
691                      dcurrent = dcurrent->down) {
692                         if (dcurrent->serial == serial) {
693                                 dcurrent->attributes |= RDATASET_ATTR_IGNORE;
694                                 make_dirty = ISC_TRUE;
695                         }
696                 }
697         }
698         if (make_dirty)
699                 node->dirty = 1;
700 }
701
702 static inline void
703 clean_cache_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
704         rdatasetheader_t *current, *dcurrent, *top_prev, *top_next, *down_next;
705         isc_mem_t *mctx = rbtdb->common.mctx;
706
707         /*
708          * Caller must be holding the node lock.
709          */
710
711         top_prev = NULL;
712         for (current = node->data; current != NULL; current = top_next) {
713                 top_next = current->next;
714                 dcurrent = current->down;
715                 if (dcurrent != NULL) {
716                         do {
717                                 down_next = dcurrent->down;
718                                 free_rdataset(mctx, dcurrent);
719                                 dcurrent = down_next;
720                         } while (dcurrent != NULL);
721                         current->down = NULL;
722                 }
723                 /*
724                  * If current is nonexistent or stale, we can clean it up.
725                  */
726                 if ((current->attributes &
727                      (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0) {
728                         if (top_prev != NULL)
729                                 top_prev->next = current->next;
730                         else
731                                 node->data = current->next;
732                         free_rdataset(mctx, current);
733                 } else
734                         top_prev = current;
735         }
736         node->dirty = 0;
737 }
738
739 static inline void
740 clean_zone_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
741                 rbtdb_serial_t least_serial)
742 {
743         rdatasetheader_t *current, *dcurrent, *down_next, *dparent;
744         rdatasetheader_t *top_prev, *top_next;
745         isc_mem_t *mctx = rbtdb->common.mctx;
746         isc_boolean_t still_dirty = ISC_FALSE;
747
748         /*
749          * Caller must be holding the node lock.
750          */
751         REQUIRE(least_serial != 0);
752
753         top_prev = NULL;
754         for (current = node->data; current != NULL; current = top_next) {
755                 top_next = current->next;
756
757                 /*
758                  * First, we clean up any instances of multiple rdatasets
759                  * with the same serial number, or that have the IGNORE
760                  * attribute.
761                  */
762                 dparent = current;
763                 for (dcurrent = current->down;
764                      dcurrent != NULL;
765                      dcurrent = down_next) {
766                         down_next = dcurrent->down;
767                         INSIST(dcurrent->serial <= dparent->serial);
768                         if (dcurrent->serial == dparent->serial ||
769                             IGNORE(dcurrent)) {
770                                 if (down_next != NULL)
771                                         down_next->next = dparent;
772                                 dparent->down = down_next;
773                                 free_rdataset(mctx, dcurrent);
774                         } else
775                                 dparent = dcurrent;
776                 }
777
778                 /*
779                  * We've now eliminated all IGNORE datasets with the possible
780                  * exception of current, which we now check.
781                  */
782                 if (IGNORE(current)) {
783                         down_next = current->down;
784                         if (down_next == NULL) {
785                                 if (top_prev != NULL)
786                                         top_prev->next = current->next;
787                                 else
788                                         node->data = current->next;
789                                 free_rdataset(mctx, current);
790                                 /*
791                                  * current no longer exists, so we can
792                                  * just continue with the loop.
793                                  */
794                                 continue;
795                         } else {
796                                 /*
797                                  * Pull up current->down, making it the new
798                                  * current.
799                                  */
800                                 if (top_prev != NULL)
801                                         top_prev->next = down_next;
802                                 else
803                                         node->data = down_next;
804                                 down_next->next = top_next;
805                                 free_rdataset(mctx, current);
806                                 current = down_next;
807                         }
808                 }
809
810                 /*
811                  * We now try to find the first down node less than the
812                  * least serial.
813                  */
814                 dparent = current;
815                 for (dcurrent = current->down;
816                      dcurrent != NULL;
817                      dcurrent = down_next) {
818                         down_next = dcurrent->down;
819                         if (dcurrent->serial < least_serial)
820                                 break;
821                         dparent = dcurrent;
822                 }
823
824                 /*
825                  * If there is a such an rdataset, delete it and any older
826                  * versions.
827                  */
828                 if (dcurrent != NULL) {
829                         do {
830                                 down_next = dcurrent->down;
831                                 INSIST(dcurrent->serial <= least_serial);
832                                 free_rdataset(mctx, dcurrent);
833                                 dcurrent = down_next;
834                         } while (dcurrent != NULL);
835                         dparent->down = NULL;
836                 }
837
838                 /*
839                  * Note.  The serial number of 'current' might be less than
840                  * least_serial too, but we cannot delete it because it is
841                  * the most recent version, unless it is a NONEXISTENT
842                  * rdataset.
843                  */
844                 if (current->down != NULL) {
845                         still_dirty = ISC_TRUE;
846                         top_prev = current;
847                 } else {
848                         /*
849                          * If this is a NONEXISTENT rdataset, we can delete it.
850                          */
851                         if (NONEXISTENT(current)) {
852                                 if (top_prev != NULL)
853                                         top_prev->next = current->next;
854                                 else
855                                         node->data = current->next;
856                                 free_rdataset(mctx, current);
857                         } else
858                                 top_prev = current;
859                 }
860         }
861         if (!still_dirty)
862                 node->dirty = 0;
863 }
864
865 static inline void
866 new_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
867         if (node->references == 0) {
868                 rbtdb->node_locks[node->locknum].references++;
869                 INSIST(rbtdb->node_locks[node->locknum].references != 0);
870         }
871         node->references++;
872         INSIST(node->references != 0);
873 }
874
875 static void
876 no_references(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
877               rbtdb_serial_t least_serial, isc_rwlocktype_t lock)
878 {
879         isc_result_t result;
880         isc_boolean_t write_locked;
881         unsigned int locknum;
882
883         /*
884          * Caller must be holding the node lock.
885          */
886
887         REQUIRE(node->references == 0);
888
889         if (node->dirty) {
890                 if (IS_CACHE(rbtdb))
891                         clean_cache_node(rbtdb, node);
892                 else {
893                         if (least_serial == 0) {
894                                 /*
895                                  * Caller doesn't know the least serial.
896                                  * Get it.
897                                  */
898                                 LOCK(&rbtdb->lock);
899                                 least_serial = rbtdb->least_serial;
900                                 UNLOCK(&rbtdb->lock);
901                         }
902                         clean_zone_node(rbtdb, node, least_serial);
903                 }
904         }
905
906         locknum = node->locknum;
907
908         INSIST(rbtdb->node_locks[locknum].references > 0);
909         rbtdb->node_locks[locknum].references--;
910
911         /*
912          * XXXDCL should this only be done for cache zones?
913          */
914         if (node->data != NULL || node->down != NULL)
915                 return;
916
917         /*
918          * XXXDCL need to add a deferred delete method for ISC_R_LOCKBUSY.
919          */
920         if (lock != isc_rwlocktype_write) {
921                 /*
922                  * Locking hierarchy notwithstanding, we don't need to free
923                  * the node lock before acquiring the tree write lock because
924                  * we only do a trylock.
925                  */
926                 if (lock == isc_rwlocktype_read)
927                         result = isc_rwlock_tryupgrade(&rbtdb->tree_lock);
928                 else
929                         result = isc_rwlock_trylock(&rbtdb->tree_lock,
930                                                     isc_rwlocktype_write);
931                 RUNTIME_CHECK(result == ISC_R_SUCCESS ||
932                               result == ISC_R_LOCKBUSY);
933  
934                 write_locked = ISC_TF(result == ISC_R_SUCCESS);
935         } else
936                 write_locked = ISC_TRUE;
937
938         if (write_locked) {
939                 if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) {
940                         char printname[DNS_NAME_FORMATSIZE];
941
942                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
943                                       DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
944                                       "no_references: delete from rbt: %p %s",
945                                       node,
946                                       dns_rbt_formatnodename(node, printname,
947                                                            sizeof(printname)));
948                 }
949
950                 result = dns_rbt_deletenode(rbtdb->tree, node, ISC_FALSE);
951                 if (result != ISC_R_SUCCESS)
952                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
953                                       DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
954                                       "no_references: dns_rbt_deletenode: %s",
955                                       isc_result_totext(result));
956         }
957
958         /*
959          * Relock a read lock, or unlock the write lock if no lock was held.
960          */
961         if (lock == isc_rwlocktype_none)
962                 if (write_locked)
963                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
964
965         if (lock == isc_rwlocktype_read)
966                 if (write_locked)
967                         isc_rwlock_downgrade(&rbtdb->tree_lock);
968 }
969
970 static inline void
971 make_least_version(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
972                    rbtdb_changedlist_t *cleanup_list)
973 {
974         /*
975          * Caller must be holding the database lock.
976          */
977
978         rbtdb->least_serial = version->serial;
979         *cleanup_list = version->changed_list;
980         ISC_LIST_INIT(version->changed_list);
981 }
982
983 static inline void
984 cleanup_nondirty(rbtdb_version_t *version, rbtdb_changedlist_t *cleanup_list) {
985         rbtdb_changed_t *changed, *next_changed;
986
987         /*
988          * If the changed record is dirty, then
989          * an update created multiple versions of
990          * a given rdataset.  We keep this list
991          * until we're the least open version, at
992          * which point it's safe to get rid of any
993          * older versions.
994          *
995          * If the changed record isn't dirty, then
996          * we don't need it anymore since we're
997          * committing and not rolling back.
998          *
999          * The caller must be holding the database lock.
1000          */
1001         for (changed = HEAD(version->changed_list);
1002              changed != NULL;
1003              changed = next_changed) {
1004                 next_changed = NEXT(changed, link);
1005                 if (!changed->dirty) {
1006                         UNLINK(version->changed_list,
1007                                changed, link);
1008                         APPEND(*cleanup_list,
1009                                changed, link);
1010                 }
1011         }
1012 }
1013
1014 static void
1015 closeversion(dns_db_t *db, dns_dbversion_t **versionp, isc_boolean_t commit) {
1016         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1017         rbtdb_version_t *version, *cleanup_version, *least_greater;
1018         isc_boolean_t rollback = ISC_FALSE;
1019         rbtdb_changedlist_t cleanup_list;
1020         rbtdb_changed_t *changed, *next_changed;
1021         rbtdb_serial_t serial, least_serial;
1022         dns_rbtnode_t *rbtnode;
1023         isc_mutex_t *lock;
1024
1025         REQUIRE(VALID_RBTDB(rbtdb));
1026         version = (rbtdb_version_t *)*versionp;
1027
1028         cleanup_version = NULL;
1029         ISC_LIST_INIT(cleanup_list);
1030
1031         LOCK(&rbtdb->lock);
1032         INSIST(version->references > 0);
1033         INSIST(!version->writer || !(commit && version->references > 1));
1034         version->references--;
1035         serial = version->serial;
1036         if (version->references == 0) {
1037                 if (version->writer) {
1038                         if (commit) {
1039                                 INSIST(version->commit_ok);
1040                                 INSIST(version == rbtdb->future_version);
1041                                 if (EMPTY(rbtdb->open_versions)) {
1042                                         /*
1043                                          * We're going to become the least open
1044                                          * version.
1045                                          */
1046                                         make_least_version(rbtdb, version,
1047                                                            &cleanup_list);
1048                                 } else {
1049                                         /*
1050                                          * Some other open version is the
1051                                          * least version.  We can't cleanup
1052                                          * records that were changed in this
1053                                          * version because the older versions
1054                                          * may still be in use by an open
1055                                          * version.
1056                                          *
1057                                          * We can, however, discard the
1058                                          * changed records for things that
1059                                          * we've added that didn't exist in
1060                                          * prior versions.
1061                                          */
1062                                         cleanup_nondirty(version,
1063                                                          &cleanup_list);
1064                                 }
1065                                 /*
1066                                  * If the (soon to be former) current version
1067                                  * isn't being used by anyone, we can clean
1068                                  * it up.
1069                                  */
1070                                 if (rbtdb->current_version->references == 0) {
1071                                         cleanup_version =
1072                                                 rbtdb->current_version;
1073                                         APPENDLIST(version->changed_list,
1074                                                  cleanup_version->changed_list,
1075                                                    link);
1076                                 }
1077                                 /*
1078                                  * Become the current version.
1079                                  */
1080                                 version->writer = ISC_FALSE;
1081                                 rbtdb->current_version = version;
1082                                 rbtdb->current_serial = version->serial;
1083                                 rbtdb->future_version = NULL;
1084                         } else {
1085                                 /*
1086                                  * We're rolling back this transaction.
1087                                  */
1088                                 cleanup_list = version->changed_list;
1089                                 ISC_LIST_INIT(version->changed_list);
1090                                 rollback = ISC_TRUE;
1091                                 cleanup_version = version;
1092                                 rbtdb->future_version = NULL;
1093                         }
1094                 } else {
1095                         if (version != rbtdb->current_version) {
1096                                 /*
1097                                  * There are no external or internal references
1098                                  * to this version and it can be cleaned up.
1099                                  */
1100                                 cleanup_version = version;
1101
1102                                 /*
1103                                  * Find the version with the least serial
1104                                  * number greater than ours.
1105                                  */
1106                                 least_greater = PREV(version, link);
1107                                 if (least_greater == NULL)
1108                                         least_greater = rbtdb->current_version;
1109
1110                                 INSIST(version->serial < least_greater->serial);
1111                                 /*
1112                                  * Is this the least open version?
1113                                  */
1114                                 if (version->serial == rbtdb->least_serial) {
1115                                         /*
1116                                          * Yes.  Install the new least open
1117                                          * version.
1118                                          */
1119                                         make_least_version(rbtdb,
1120                                                            least_greater,
1121                                                            &cleanup_list);
1122                                 } else {
1123                                         /*
1124                                          * Add any unexecuted cleanups to
1125                                          * those of the least greater version.
1126                                          */
1127                                         APPENDLIST(least_greater->changed_list,
1128                                                    version->changed_list,
1129                                                    link);
1130                                 }
1131                         } else if (version->serial == rbtdb->least_serial)
1132                                 INSIST(EMPTY(version->changed_list));
1133                         UNLINK(rbtdb->open_versions, version, link);
1134                 }
1135         }
1136         least_serial = rbtdb->least_serial;
1137         UNLOCK(&rbtdb->lock);
1138
1139         if (cleanup_version != NULL) {
1140                 INSIST(EMPTY(cleanup_version->changed_list));
1141                 isc_mem_put(rbtdb->common.mctx, cleanup_version,
1142                             sizeof(*cleanup_version));
1143         }
1144
1145         if (!EMPTY(cleanup_list)) {
1146                 for (changed = HEAD(cleanup_list);
1147                      changed != NULL;
1148                      changed = next_changed) {
1149                         next_changed = NEXT(changed, link);
1150                         rbtnode = changed->node;
1151                         lock = &rbtdb->node_locks[rbtnode->locknum].lock;
1152
1153                         LOCK(lock);
1154
1155                         INSIST(rbtnode->references > 0);
1156                         rbtnode->references--;
1157                         if (rollback)
1158                                 rollback_node(rbtnode, serial);
1159
1160                         if (rbtnode->references == 0)
1161                                 no_references(rbtdb, rbtnode, least_serial,
1162                                               isc_rwlocktype_none);
1163
1164                         UNLOCK(lock);
1165
1166                         isc_mem_put(rbtdb->common.mctx, changed,
1167                                     sizeof(*changed));
1168                 }
1169         }
1170
1171         *versionp = NULL;
1172 }
1173
1174 /*
1175  * Add the necessary magic for the wildcard name 'name'
1176  * to be found in 'rbtdb'.
1177  *
1178  * In order for wildcard matching to work correctly in
1179  * zone_find(), we must ensure that a node for the wildcarding
1180  * level exists in the database, and has its 'find_callback'
1181  * and 'wild' bits set.
1182  *
1183  * E.g. if the wildcard name is "*.sub.example." then we
1184  * must ensure that "sub.example." exists and is marked as
1185  * a wildcard level.
1186  */
1187 static isc_result_t
1188 add_wildcard_magic(dns_rbtdb_t *rbtdb, dns_name_t *name) {
1189         isc_result_t result;
1190         dns_name_t foundname;
1191         dns_offsets_t offsets;
1192         unsigned int n;
1193         dns_rbtnode_t *node = NULL;
1194
1195         dns_name_init(&foundname, offsets);
1196         n = dns_name_countlabels(name);
1197         INSIST(n >= 2);
1198         n--;
1199         dns_name_getlabelsequence(name, 1, n, &foundname);
1200         result = dns_rbt_addnode(rbtdb->tree, &foundname, &node);
1201         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
1202                 return (result);
1203         node->find_callback = 1;
1204         node->wild = 1;
1205         return (ISC_R_SUCCESS);
1206 }
1207
1208 static isc_result_t
1209 add_empty_wildcards(dns_rbtdb_t *rbtdb, dns_name_t *name) {
1210         isc_result_t result;
1211         dns_name_t foundname;
1212         dns_offsets_t offsets;
1213         unsigned int n, l, i;
1214
1215         dns_name_init(&foundname, offsets);
1216         n = dns_name_countlabels(name);
1217         l = dns_name_countlabels(&rbtdb->common.origin);
1218         i = l + 1;
1219         while (i < n) {
1220                 dns_rbtnode_t *node = NULL;     /* dummy */
1221                 dns_name_getlabelsequence(name, n - i, i, &foundname);
1222                 if (dns_name_iswildcard(&foundname)) {
1223                         result = add_wildcard_magic(rbtdb, &foundname);
1224                         if (result != ISC_R_SUCCESS)
1225                                 return (result);
1226                         result = dns_rbt_addnode(rbtdb->tree, &foundname,
1227                                                  &node);
1228                         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
1229                                 return (result);
1230                 }
1231                 i++;
1232         }
1233         return (ISC_R_SUCCESS);
1234 }
1235
1236 static isc_result_t
1237 findnode(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
1238          dns_dbnode_t **nodep)
1239 {
1240         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1241         dns_rbtnode_t *node = NULL;
1242         dns_name_t nodename;
1243         unsigned int locknum;
1244         isc_result_t result;
1245         isc_rwlocktype_t locktype = isc_rwlocktype_read;
1246
1247         REQUIRE(VALID_RBTDB(rbtdb));
1248
1249         dns_name_init(&nodename, NULL);
1250         RWLOCK(&rbtdb->tree_lock, locktype);
1251         result = dns_rbt_findnode(rbtdb->tree, name, NULL, &node, NULL,
1252                                   DNS_RBTFIND_EMPTYDATA, NULL, NULL);
1253         if (result != ISC_R_SUCCESS) {
1254                 RWUNLOCK(&rbtdb->tree_lock, locktype);
1255                 if (!create) {
1256                         if (result == DNS_R_PARTIALMATCH)
1257                                 result = ISC_R_NOTFOUND;
1258                         return (result);
1259                 }
1260                 /*
1261                  * It would be nice to try to upgrade the lock instead of
1262                  * unlocking then relocking.
1263                  */
1264                 locktype = isc_rwlocktype_write;
1265                 RWLOCK(&rbtdb->tree_lock, locktype);
1266                 node = NULL;
1267                 result = dns_rbt_addnode(rbtdb->tree, name, &node);
1268                 if (result == ISC_R_SUCCESS) {
1269                         dns_rbt_namefromnode(node, &nodename);
1270 #ifdef DNS_RBT_USEHASH
1271                         node->locknum = node->hashval % rbtdb->node_lock_count;
1272 #else
1273                         node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
1274                                 rbtdb->node_lock_count;
1275 #endif
1276                         add_empty_wildcards(rbtdb, name);
1277
1278                         if (dns_name_iswildcard(name)) {
1279                                 result = add_wildcard_magic(rbtdb, name);
1280                                 if (result != ISC_R_SUCCESS) {
1281                                         RWUNLOCK(&rbtdb->tree_lock, locktype);
1282                                         return (result);
1283                                 }
1284                         }
1285                 } else if (result != ISC_R_EXISTS) {
1286                         RWUNLOCK(&rbtdb->tree_lock, locktype);
1287                         return (result);
1288                 }
1289         }
1290         locknum = node->locknum;
1291         LOCK(&rbtdb->node_locks[locknum].lock);
1292         new_reference(rbtdb, node);
1293         UNLOCK(&rbtdb->node_locks[locknum].lock);
1294         RWUNLOCK(&rbtdb->tree_lock, locktype);
1295
1296         *nodep = (dns_dbnode_t *)node;
1297
1298         return (ISC_R_SUCCESS);
1299 }
1300
1301 static isc_result_t
1302 zone_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
1303         rbtdb_search_t *search = arg;
1304         rdatasetheader_t *header, *header_next;
1305         rdatasetheader_t *dname_header, *sigdname_header, *ns_header;
1306         rdatasetheader_t *found;
1307         isc_result_t result;
1308         dns_rbtnode_t *onode;
1309
1310         /*
1311          * We only want to remember the topmost zone cut, since it's the one
1312          * that counts, so we'll just continue if we've already found a
1313          * zonecut.
1314          */
1315         if (search->zonecut != NULL)
1316                 return (DNS_R_CONTINUE);
1317
1318         found = NULL;
1319         result = DNS_R_CONTINUE;
1320         onode = search->rbtdb->origin_node;
1321
1322         LOCK(&(search->rbtdb->node_locks[node->locknum].lock));
1323
1324         /*
1325          * Look for an NS or DNAME rdataset active in our version.
1326          */
1327         ns_header = NULL;
1328         dname_header = NULL;
1329         sigdname_header = NULL;
1330         for (header = node->data; header != NULL; header = header_next) {
1331                 header_next = header->next;
1332                 if (header->type == dns_rdatatype_ns ||
1333                     header->type == dns_rdatatype_dname ||
1334                     header->type == RBTDB_RDATATYPE_SIGDNAME) {
1335                         do {
1336                                 if (header->serial <= search->serial &&
1337                                     !IGNORE(header)) {
1338                                         /*
1339                                          * Is this a "this rdataset doesn't
1340                                          * exist" record?
1341                                          */
1342                                         if (NONEXISTENT(header))
1343                                                 header = NULL;
1344                                         break;
1345                                 } else
1346                                         header = header->down;
1347                         } while (header != NULL);
1348                         if (header != NULL) {
1349                                 if (header->type == dns_rdatatype_dname)
1350                                         dname_header = header;
1351                                 else if (header->type == 
1352                                            RBTDB_RDATATYPE_SIGDNAME)
1353                                         sigdname_header = header;
1354                                 else if (node != onode ||
1355                                          IS_STUB(search->rbtdb)) {
1356                                         /*
1357                                          * We've found an NS rdataset that
1358                                          * isn't at the origin node.  We check
1359                                          * that they're not at the origin node,
1360                                          * because otherwise we'd erroneously
1361                                          * treat the zone top as if it were
1362                                          * a delegation.
1363                                          */
1364                                         ns_header = header;
1365                                 }
1366                         }
1367                 }
1368         }
1369
1370         /*
1371          * Did we find anything?
1372          */
1373         if (dname_header != NULL) {
1374                 /*
1375                  * Note that DNAME has precedence over NS if both exist.
1376                  */
1377                 found = dname_header;
1378                 search->zonecut_sigrdataset = sigdname_header;
1379         } else if (ns_header != NULL) {
1380                 found = ns_header;
1381                 search->zonecut_sigrdataset = NULL;
1382         }
1383
1384         if (found != NULL) {
1385                 /*
1386                  * We increment the reference count on node to ensure that
1387                  * search->zonecut_rdataset will still be valid later.
1388                  */
1389                 new_reference(search->rbtdb, node);
1390                 search->zonecut = node;
1391                 search->zonecut_rdataset = found;
1392                 search->need_cleanup = ISC_TRUE;
1393                 /*
1394                  * Since we've found a zonecut, anything beneath it is
1395                  * glue and is not subject to wildcard matching, so we
1396                  * may clear search->wild.
1397                  */
1398                 search->wild = ISC_FALSE;
1399                 if ((search->options & DNS_DBFIND_GLUEOK) == 0) {
1400                         /*
1401                          * If the caller does not want to find glue, then
1402                          * this is the best answer and the search should
1403                          * stop now.
1404                          */
1405                         result = DNS_R_PARTIALMATCH;
1406                 } else {
1407                         dns_name_t *zcname;
1408
1409                         /*
1410                          * The search will continue beneath the zone cut.
1411                          * This may or may not be the best match.  In case it
1412                          * is, we need to remember the node name.
1413                          */
1414                         zcname = dns_fixedname_name(&search->zonecut_name);
1415                         RUNTIME_CHECK(dns_name_copy(name, zcname, NULL) ==
1416                                       ISC_R_SUCCESS);
1417                         search->copy_name = ISC_TRUE;
1418                 }
1419         } else {
1420                 /*
1421                  * There is no zonecut at this node which is active in this
1422                  * version.
1423                  *
1424                  * If this is a "wild" node and the caller hasn't disabled
1425                  * wildcard matching, remember that we've seen a wild node
1426                  * in case we need to go searching for wildcard matches
1427                  * later on.
1428                  */
1429                 if (node->wild && (search->options & DNS_DBFIND_NOWILD) == 0)
1430                         search->wild = ISC_TRUE;
1431         }
1432
1433         UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock));
1434
1435         return (result);
1436 }
1437
1438 static inline void
1439 bind_rdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1440               rdatasetheader_t *header, isc_stdtime_t now,
1441               dns_rdataset_t *rdataset)
1442 {
1443         unsigned char *raw;
1444
1445         /*
1446          * Caller must be holding the node lock.
1447          */
1448
1449         if (rdataset == NULL)
1450                 return;
1451
1452         new_reference(rbtdb, node);
1453
1454         INSIST(rdataset->methods == NULL);      /* We must be disassociated. */
1455
1456         rdataset->methods = &rdataset_methods;
1457         rdataset->rdclass = rbtdb->common.rdclass;
1458         rdataset->type = RBTDB_RDATATYPE_BASE(header->type);
1459         rdataset->covers = RBTDB_RDATATYPE_EXT(header->type);
1460         rdataset->ttl = header->ttl - now;
1461         rdataset->trust = header->trust;
1462         if (NXDOMAIN(header))
1463                 rdataset->attributes |= DNS_RDATASETATTR_NXDOMAIN;
1464         rdataset->private1 = rbtdb;
1465         rdataset->private2 = node;
1466         raw = (unsigned char *)header + sizeof(*header);
1467         rdataset->private3 = raw;
1468         rdataset->count = header->count++;
1469         if (header->count == ISC_UINT32_MAX)
1470                 header->count = 0;
1471
1472         /*
1473          * Reset iterator state.
1474          */
1475         rdataset->privateuint4 = 0;
1476         rdataset->private5 = NULL;
1477
1478         /*
1479          * Add noqname proof.
1480          */
1481         rdataset->private6 = header->noqname;
1482         if (rdataset->private6 != NULL)
1483                 rdataset->attributes |=  DNS_RDATASETATTR_NOQNAME;
1484 }
1485
1486 static inline isc_result_t
1487 setup_delegation(rbtdb_search_t *search, dns_dbnode_t **nodep,
1488                  dns_name_t *foundname, dns_rdataset_t *rdataset,
1489                  dns_rdataset_t *sigrdataset)
1490 {
1491         isc_result_t result;
1492         dns_name_t *zcname;
1493         rbtdb_rdatatype_t type;
1494         dns_rbtnode_t *node;
1495
1496         /*
1497          * The caller MUST NOT be holding any node locks.
1498          */
1499
1500         node = search->zonecut;
1501         type = search->zonecut_rdataset->type;
1502
1503         /*
1504          * If we have to set foundname, we do it before anything else.
1505          * If we were to set foundname after we had set nodep or bound the
1506          * rdataset, then we'd have to undo that work if dns_name_copy()
1507          * failed.  By setting foundname first, there's nothing to undo if
1508          * we have trouble.
1509          */
1510         if (foundname != NULL && search->copy_name) {
1511                 zcname = dns_fixedname_name(&search->zonecut_name);
1512                 result = dns_name_copy(zcname, foundname, NULL);
1513                 if (result != ISC_R_SUCCESS)
1514                         return (result);
1515         }
1516         if (nodep != NULL) {
1517                 /*
1518                  * Note that we don't have to increment the node's reference
1519                  * count here because we're going to use the reference we
1520                  * already have in the search block.
1521                  */
1522                 *nodep = node;
1523                 search->need_cleanup = ISC_FALSE;
1524         }
1525         if (rdataset != NULL) {
1526                 LOCK(&(search->rbtdb->node_locks[node->locknum].lock));
1527                 bind_rdataset(search->rbtdb, node, search->zonecut_rdataset,
1528                               search->now, rdataset);
1529                 if (sigrdataset != NULL && search->zonecut_sigrdataset != NULL)
1530                         bind_rdataset(search->rbtdb, node,
1531                                       search->zonecut_sigrdataset,
1532                                       search->now, sigrdataset);
1533                 UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock));
1534         }
1535
1536         if (type == dns_rdatatype_dname)
1537                 return (DNS_R_DNAME);
1538         return (DNS_R_DELEGATION);
1539 }
1540
1541 static inline isc_boolean_t
1542 valid_glue(rbtdb_search_t *search, dns_name_t *name, rbtdb_rdatatype_t type,
1543            dns_rbtnode_t *node)
1544 {
1545         unsigned char *raw;
1546         unsigned int count, size;
1547         dns_name_t ns_name;
1548         isc_boolean_t valid = ISC_FALSE;
1549         dns_offsets_t offsets;
1550         isc_region_t region;
1551         rdatasetheader_t *header;
1552
1553         /*
1554          * No additional locking is required.
1555          */
1556
1557         /*
1558          * Valid glue types are A, AAAA, A6.  NS is also a valid glue type
1559          * if it occurs at a zone cut, but is not valid below it.
1560          */
1561         if (type == dns_rdatatype_ns) {
1562                 if (node != search->zonecut) {
1563                         return (ISC_FALSE);
1564                 }
1565         } else if (type != dns_rdatatype_a &&
1566                    type != dns_rdatatype_aaaa &&
1567                    type != dns_rdatatype_a6) {
1568                 return (ISC_FALSE);
1569         }
1570
1571         header = search->zonecut_rdataset;
1572         raw = (unsigned char *)header + sizeof(*header);
1573         count = raw[0] * 256 + raw[1];
1574         raw += 2;
1575
1576         while (count > 0) {
1577                 count--;
1578                 size = raw[0] * 256 + raw[1];
1579                 raw += 2;
1580                 region.base = raw;
1581                 region.length = size;
1582                 raw += size;
1583                 /*
1584                  * XXX Until we have rdata structures, we have no choice but
1585                  * to directly access the rdata format.
1586                  */
1587                 dns_name_init(&ns_name, offsets);
1588                 dns_name_fromregion(&ns_name, &region);
1589                 if (dns_name_compare(&ns_name, name) == 0) {
1590                         valid = ISC_TRUE;
1591                         break;
1592                 }
1593         }
1594
1595         return (valid);
1596 }
1597
1598 static inline isc_boolean_t
1599 activeempty(rbtdb_search_t *search, dns_rbtnodechain_t *chain,
1600             dns_name_t *name)
1601 {
1602         dns_fixedname_t fnext;
1603         dns_fixedname_t forigin;
1604         dns_name_t *next;
1605         dns_name_t *origin;
1606         dns_name_t prefix;
1607         dns_rbtdb_t *rbtdb;
1608         dns_rbtnode_t *node;
1609         isc_result_t result;
1610         isc_boolean_t answer = ISC_FALSE;
1611         rdatasetheader_t *header;
1612
1613         rbtdb = search->rbtdb;
1614
1615         dns_name_init(&prefix, NULL);
1616         dns_fixedname_init(&fnext);
1617         next = dns_fixedname_name(&fnext);
1618         dns_fixedname_init(&forigin);
1619         origin = dns_fixedname_name(&forigin);
1620
1621         result = dns_rbtnodechain_next(chain, NULL, NULL);
1622         while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
1623                 node = NULL;
1624                 result = dns_rbtnodechain_current(chain, &prefix,
1625                                                   origin, &node);
1626                 if (result != ISC_R_SUCCESS)
1627                         break;
1628                 LOCK(&(rbtdb->node_locks[node->locknum].lock));
1629                 for (header = node->data;
1630                      header != NULL;
1631                      header = header->next) {
1632                         if (header->serial <= search->serial &&
1633                             !IGNORE(header) && EXISTS(header))
1634                                 break;
1635                 }
1636                 UNLOCK(&(rbtdb->node_locks[node->locknum].lock));
1637                 if (header != NULL)
1638                         break;
1639                 result = dns_rbtnodechain_next(chain, NULL, NULL);
1640         }
1641         if (result == ISC_R_SUCCESS)
1642                 result = dns_name_concatenate(&prefix, origin, next, NULL);
1643         if (result == ISC_R_SUCCESS && dns_name_issubdomain(next, name))
1644                 answer = ISC_TRUE;
1645         return (answer);
1646 }
1647
1648 static inline isc_boolean_t
1649 activeemtpynode(rbtdb_search_t *search, dns_name_t *qname, dns_name_t *wname) {
1650         dns_fixedname_t fnext;
1651         dns_fixedname_t forigin;
1652         dns_fixedname_t fprev;
1653         dns_name_t *next;
1654         dns_name_t *origin;
1655         dns_name_t *prev;
1656         dns_name_t name;
1657         dns_name_t rname;
1658         dns_name_t tname;
1659         dns_rbtdb_t *rbtdb;
1660         dns_rbtnode_t *node;
1661         dns_rbtnodechain_t chain;
1662         isc_boolean_t check_next = ISC_TRUE;
1663         isc_boolean_t check_prev = ISC_TRUE;
1664         isc_boolean_t answer = ISC_FALSE;
1665         isc_result_t result;
1666         rdatasetheader_t *header;
1667         unsigned int n;
1668
1669         rbtdb = search->rbtdb;
1670
1671         dns_name_init(&name, NULL);
1672         dns_name_init(&tname, NULL);
1673         dns_name_init(&rname, NULL);
1674         dns_fixedname_init(&fnext);
1675         next = dns_fixedname_name(&fnext);
1676         dns_fixedname_init(&fprev);
1677         prev = dns_fixedname_name(&fprev);
1678         dns_fixedname_init(&forigin);
1679         origin = dns_fixedname_name(&forigin);
1680
1681         /*
1682          * Find if qname is at or below a empty node.
1683          * Use our own copy of the chain.
1684          */
1685
1686         chain = search->chain;
1687         do {
1688                 node = NULL;
1689                 result = dns_rbtnodechain_current(&chain, &name,
1690                                                   origin, &node);
1691                 if (result != ISC_R_SUCCESS)
1692                         break;
1693                 LOCK(&(rbtdb->node_locks[node->locknum].lock));
1694                 for (header = node->data;
1695                      header != NULL;
1696                      header = header->next) {
1697                         if (header->serial <= search->serial &&
1698                             !IGNORE(header) && EXISTS(header))
1699                                 break;
1700                 }
1701                 UNLOCK(&(rbtdb->node_locks[node->locknum].lock));
1702                 if (header != NULL)
1703                         break;
1704                 result = dns_rbtnodechain_prev(&chain, NULL, NULL);
1705         } while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN);
1706         if (result == ISC_R_SUCCESS)
1707                 result = dns_name_concatenate(&name, origin, prev, NULL);
1708         if (result != ISC_R_SUCCESS)
1709                 check_prev = ISC_FALSE;
1710
1711         result = dns_rbtnodechain_next(&chain, NULL, NULL);
1712         while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
1713                 node = NULL;
1714                 result = dns_rbtnodechain_current(&chain, &name,
1715                                                   origin, &node);
1716                 if (result != ISC_R_SUCCESS)
1717                         break;
1718                 LOCK(&(rbtdb->node_locks[node->locknum].lock));
1719                 for (header = node->data;
1720                      header != NULL;
1721                      header = header->next) {
1722                         if (header->serial <= search->serial &&
1723                             !IGNORE(header) && EXISTS(header))
1724                                 break;
1725                 }
1726                 UNLOCK(&(rbtdb->node_locks[node->locknum].lock));
1727                 if (header != NULL)
1728                         break;
1729                 result = dns_rbtnodechain_next(&chain, NULL, NULL);
1730         }
1731         if (result == ISC_R_SUCCESS)
1732                 result = dns_name_concatenate(&name, origin, next, NULL);
1733         if (result != ISC_R_SUCCESS)
1734                 check_next = ISC_FALSE;
1735
1736         dns_name_clone(qname, &rname);
1737
1738         /*
1739          * Remove the wildcard label to find the terminal name.
1740          */
1741         n = dns_name_countlabels(wname);
1742         dns_name_getlabelsequence(wname, 1, n - 1, &tname);
1743
1744         do {
1745                 if ((check_prev && dns_name_issubdomain(prev, &rname)) ||
1746                     (check_next && dns_name_issubdomain(next, &rname))) {
1747                         answer = ISC_TRUE;
1748                         break;
1749                 }
1750                 /*
1751                  * Remove the left hand label.
1752                  */
1753                 n = dns_name_countlabels(&rname);
1754                 dns_name_getlabelsequence(&rname, 1, n - 1, &rname);
1755         } while (!dns_name_equal(&rname, &tname));
1756         return (answer);
1757 }
1758
1759 static inline isc_result_t
1760 find_wildcard(rbtdb_search_t *search, dns_rbtnode_t **nodep,
1761               dns_name_t *qname)
1762 {
1763         unsigned int i, j;
1764         dns_rbtnode_t *node, *level_node, *wnode;
1765         rdatasetheader_t *header;
1766         isc_result_t result = ISC_R_NOTFOUND;
1767         dns_name_t name;
1768         dns_name_t *wname;
1769         dns_fixedname_t fwname;
1770         dns_rbtdb_t *rbtdb;
1771         isc_boolean_t done, wild, active;
1772         dns_rbtnodechain_t wchain;
1773
1774         /*
1775          * Caller must be holding the tree lock and MUST NOT be holding
1776          * any node locks.
1777          */
1778
1779         /*
1780          * Examine each ancestor level.  If the level's wild bit
1781          * is set, then construct the corresponding wildcard name and
1782          * search for it.  If the wildcard node exists, and is active in
1783          * this version, we're done.  If not, then we next check to see
1784          * if the ancestor is active in this version.  If so, then there
1785          * can be no possible wildcard match and again we're done.  If not,
1786          * continue the search.
1787          */
1788
1789         rbtdb = search->rbtdb;
1790         i = search->chain.level_matches;
1791         done = ISC_FALSE;
1792         node = *nodep;
1793         do {
1794                 LOCK(&(rbtdb->node_locks[node->locknum].lock));
1795
1796                 /*
1797                  * First we try to figure out if this node is active in
1798                  * the search's version.  We do this now, even though we
1799                  * may not need the information, because it simplifies the
1800                  * locking and code flow.
1801                  */
1802                 for (header = node->data;
1803                      header != NULL;
1804                      header = header->next) {
1805                         if (header->serial <= search->serial &&
1806                             !IGNORE(header) && EXISTS(header))
1807                                 break;
1808                 }
1809                 if (header != NULL)
1810                         active = ISC_TRUE;
1811                 else
1812                         active = ISC_FALSE;
1813
1814                 if (node->wild)
1815                         wild = ISC_TRUE;
1816                 else
1817                         wild = ISC_FALSE;
1818
1819                 UNLOCK(&(rbtdb->node_locks[node->locknum].lock));
1820
1821                 if (wild) {
1822                         /*
1823                          * Construct the wildcard name for this level.
1824                          */
1825                         dns_name_init(&name, NULL);
1826                         dns_rbt_namefromnode(node, &name);
1827                         dns_fixedname_init(&fwname);
1828                         wname = dns_fixedname_name(&fwname);
1829                         result = dns_name_concatenate(dns_wildcardname, &name,
1830                                                       wname, NULL);
1831                         j = i;
1832                         while (result == ISC_R_SUCCESS && j != 0) {
1833                                 j--;
1834                                 level_node = search->chain.levels[j];
1835                                 dns_name_init(&name, NULL);
1836                                 dns_rbt_namefromnode(level_node, &name);
1837                                 result = dns_name_concatenate(wname,
1838                                                               &name,
1839                                                               wname,
1840                                                               NULL);
1841                         }
1842                         if (result != ISC_R_SUCCESS)
1843                                 break;
1844
1845                         wnode = NULL;
1846                         dns_rbtnodechain_init(&wchain, NULL);
1847                         result = dns_rbt_findnode(rbtdb->tree, wname,
1848                                                   NULL, &wnode, &wchain,
1849                                                   DNS_RBTFIND_EMPTYDATA,
1850                                                   NULL, NULL);
1851                         if (result == ISC_R_SUCCESS) {
1852                             /*
1853                              * We have found the wildcard node.  If it
1854                              * is active in the search's version, we're
1855                              * done.
1856                              */
1857                             LOCK(&(rbtdb->node_locks[wnode->locknum].lock));
1858                             for (header = wnode->data;
1859                                  header != NULL;
1860                                  header = header->next) {
1861                                     if (header->serial <= search->serial &&
1862                                         !IGNORE(header) && EXISTS(header))
1863                                             break;
1864                             }
1865                             UNLOCK(&(rbtdb->node_locks[wnode->locknum].lock));
1866                             if (header != NULL ||
1867                                 activeempty(search, &wchain, wname)) {
1868                                     if (activeemtpynode(search, qname, wname))
1869                                                 return (ISC_R_NOTFOUND);
1870                                     /*
1871                                      * The wildcard node is active!
1872                                      *
1873                                      * Note: result is still ISC_R_SUCCESS
1874                                      * so we don't have to set it.
1875                                      */
1876                                     *nodep = wnode;
1877                                     break;
1878                             }
1879                         } else if (result != ISC_R_NOTFOUND &&
1880                                    result != DNS_R_PARTIALMATCH) {
1881                                 /*
1882                                  * An error has occurred.  Bail out.
1883                                  */
1884                                 break;
1885                         }
1886                 }
1887
1888                 if (active) {
1889                         /*
1890                          * The level node is active.  Any wildcarding
1891                          * present at higher levels has no
1892                          * effect and we're done.
1893                          */
1894                         result = ISC_R_NOTFOUND;
1895                         break;
1896                 }
1897
1898                 if (i > 0) {
1899                         i--;
1900                         node = search->chain.levels[i];
1901                 } else
1902                         done = ISC_TRUE;
1903         } while (!done);
1904
1905         return (result);
1906 }
1907
1908 static inline isc_result_t
1909 find_closest_nsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
1910                   dns_name_t *foundname, dns_rdataset_t *rdataset,
1911                   dns_rdataset_t *sigrdataset, isc_boolean_t need_sig)
1912 {
1913         dns_rbtnode_t *node;
1914         rdatasetheader_t *header, *header_next, *found, *foundsig;
1915         isc_boolean_t empty_node;
1916         isc_result_t result;
1917         dns_fixedname_t fname, forigin;
1918         dns_name_t *name, *origin;
1919
1920         do {
1921                 node = NULL;
1922                 dns_fixedname_init(&fname);
1923                 name = dns_fixedname_name(&fname);
1924                 dns_fixedname_init(&forigin);
1925                 origin = dns_fixedname_name(&forigin);
1926                 result = dns_rbtnodechain_current(&search->chain, name,
1927                                                   origin, &node);
1928                 if (result != ISC_R_SUCCESS)
1929                         return (result);
1930                 LOCK(&(search->rbtdb->node_locks[node->locknum].lock));
1931                 found = NULL;
1932                 foundsig = NULL;
1933                 empty_node = ISC_TRUE;
1934                 for (header = node->data;
1935                      header != NULL;
1936                      header = header_next) {
1937                         header_next = header->next;
1938                         /*
1939                          * Look for an active, extant NSEC or RRSIG NSEC.
1940                          */
1941                         do {
1942                                 if (header->serial <= search->serial &&
1943                                     !IGNORE(header)) {
1944                                         /*
1945                                          * Is this a "this rdataset doesn't
1946                                          * exist" record?
1947                                          */
1948                                         if (NONEXISTENT(header))
1949                                                 header = NULL;
1950                                         break;
1951                                 } else
1952                                         header = header->down;
1953                         } while (header != NULL);
1954                         if (header != NULL) {
1955                                 /*
1956                                  * We now know that there is at least one
1957                                  * active rdataset at this node.
1958                                  */
1959                                 empty_node = ISC_FALSE;
1960                                 if (header->type == dns_rdatatype_nsec) {
1961                                         found = header;
1962                                         if (foundsig != NULL)
1963                                                 break;
1964                                 } else if (header->type ==
1965                                            RBTDB_RDATATYPE_SIGNSEC) {
1966                                         foundsig = header;
1967                                         if (found != NULL)
1968                                                 break;
1969                                 }
1970                         }
1971                 }
1972                 if (!empty_node) {
1973                         if (found != NULL &&
1974                             (foundsig != NULL || !need_sig))
1975                         {
1976                                 /*
1977                                  * We've found the right NSEC record.
1978                                  *
1979                                  * Note: for this to really be the right
1980                                  * NSEC record, it's essential that the NSEC
1981                                  * records of any nodes obscured by a zone
1982                                  * cut have been removed; we assume this is
1983                                  * the case.
1984                                  */
1985                                 result = dns_name_concatenate(name, origin,
1986                                                               foundname, NULL);
1987                                 if (result == ISC_R_SUCCESS) {
1988                                         if (nodep != NULL) {
1989                                                 new_reference(search->rbtdb,
1990                                                               node);
1991                                                 *nodep = node;
1992                                         }
1993                                         bind_rdataset(search->rbtdb, node,
1994                                                       found, search->now,
1995                                                       rdataset);
1996                                         if (foundsig != NULL)
1997                                                 bind_rdataset(search->rbtdb,
1998                                                               node,
1999                                                               foundsig,
2000                                                               search->now,
2001                                                               sigrdataset);
2002                                 }
2003                         } else if (found == NULL && foundsig == NULL) {
2004                                 /*
2005                                  * This node is active, but has no NSEC or
2006                                  * RRSIG NSEC.  That means it's glue or
2007                                  * other obscured zone data that isn't
2008                                  * relevant for our search.  Treat the
2009                                  * node as if it were empty and keep looking.
2010                                  */
2011                                 empty_node = ISC_TRUE;
2012                                 result = dns_rbtnodechain_prev(&search->chain,
2013                                                                NULL, NULL);
2014                         } else {
2015                                 /*
2016                                  * We found an active node, but either the
2017                                  * NSEC or the RRSIG NSEC is missing.  This
2018                                  * shouldn't happen.
2019                                  */
2020                                 result = DNS_R_BADDB;
2021                         }
2022                 } else {
2023                         /*
2024                          * This node isn't active.  We've got to keep
2025                          * looking.
2026                          */
2027                         result = dns_rbtnodechain_prev(&search->chain, NULL,
2028                                                        NULL);
2029                 }
2030                 UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock));
2031         } while (empty_node && result == ISC_R_SUCCESS);
2032
2033         /*
2034          * If the result is ISC_R_NOMORE, then we got to the beginning of
2035          * the database and didn't find a NSEC record.  This shouldn't
2036          * happen.
2037          */
2038         if (result == ISC_R_NOMORE)
2039                 result = DNS_R_BADDB;
2040
2041         return (result);
2042 }
2043
2044 static isc_result_t
2045 zone_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
2046           dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
2047           dns_dbnode_t **nodep, dns_name_t *foundname,
2048           dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
2049 {
2050         dns_rbtnode_t *node = NULL;
2051         isc_result_t result;
2052         rbtdb_search_t search;
2053         isc_boolean_t cname_ok = ISC_TRUE;
2054         isc_boolean_t close_version = ISC_FALSE;
2055         isc_boolean_t maybe_zonecut = ISC_FALSE;
2056         isc_boolean_t at_zonecut = ISC_FALSE;
2057         isc_boolean_t wild;
2058         isc_boolean_t empty_node;
2059         isc_mutex_t *lock;
2060         rdatasetheader_t *header, *header_next, *found, *nsecheader;
2061         rdatasetheader_t *foundsig, *cnamesig, *nsecsig;
2062         rbtdb_rdatatype_t sigtype;
2063         isc_boolean_t active;
2064         dns_rbtnodechain_t chain;
2065
2066
2067         search.rbtdb = (dns_rbtdb_t *)db;
2068
2069         REQUIRE(VALID_RBTDB(search.rbtdb));
2070
2071         /*
2072          * We don't care about 'now'.
2073          */
2074         UNUSED(now);
2075
2076         /*
2077          * If the caller didn't supply a version, attach to the current
2078          * version.
2079          */
2080         if (version == NULL) {
2081                 currentversion(db, &version);
2082                 close_version = ISC_TRUE;
2083         }
2084
2085         search.rbtversion = version;
2086         search.serial = search.rbtversion->serial;
2087         search.options = options;
2088         search.copy_name = ISC_FALSE;
2089         search.need_cleanup = ISC_FALSE;
2090         search.wild = ISC_FALSE;
2091         search.zonecut = NULL;
2092         dns_fixedname_init(&search.zonecut_name);
2093         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
2094         search.now = 0;
2095
2096         /*
2097          * 'wild' will be true iff. we've matched a wildcard.
2098          */
2099         wild = ISC_FALSE;
2100
2101         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
2102
2103         /*
2104          * Search down from the root of the tree.  If, while going down, we
2105          * encounter a callback node, zone_zonecut_callback() will search the
2106          * rdatasets at the zone cut for active DNAME or NS rdatasets.
2107          */
2108         result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
2109                                   &search.chain, DNS_RBTFIND_EMPTYDATA,
2110                                   zone_zonecut_callback, &search);
2111
2112         if (result == DNS_R_PARTIALMATCH) {
2113         partial_match:
2114                 if (search.zonecut != NULL) {
2115                     result = setup_delegation(&search, nodep, foundname,
2116                                               rdataset, sigrdataset);
2117                     goto tree_exit;
2118                 }
2119
2120                 if (search.wild) {
2121                         /*
2122                          * At least one of the levels in the search chain
2123                          * potentially has a wildcard.  For each such level,
2124                          * we must see if there's a matching wildcard active
2125                          * in the current version.
2126                          */
2127                         result = find_wildcard(&search, &node, name);
2128                         if (result == ISC_R_SUCCESS) {
2129                                 result = dns_name_copy(name, foundname, NULL);
2130                                 if (result != ISC_R_SUCCESS)
2131                                         goto tree_exit;
2132                                 wild = ISC_TRUE;
2133                                 goto found;
2134                         }
2135                         else if (result != ISC_R_NOTFOUND)
2136                                 goto tree_exit;
2137                 }
2138
2139                 chain = search.chain;
2140                 active = activeempty(&search, &chain, name);
2141
2142                 /*
2143                  * If we're here, then the name does not exist, is not
2144                  * beneath a zonecut, and there's no matching wildcard.
2145                  */
2146                 if (search.rbtdb->secure ||
2147                     (search.options & DNS_DBFIND_FORCENSEC) != 0)
2148                 {
2149                         result = find_closest_nsec(&search, nodep, foundname,
2150                                                   rdataset, sigrdataset,
2151                                                   search.rbtdb->secure);
2152                         if (result == ISC_R_SUCCESS)
2153                                 result = active ? DNS_R_EMPTYNAME :
2154                                                   DNS_R_NXDOMAIN;
2155                 } else
2156                         result = active ? DNS_R_EMPTYNAME : DNS_R_NXDOMAIN;
2157                 goto tree_exit;
2158         } else if (result != ISC_R_SUCCESS)
2159                 goto tree_exit;
2160
2161  found:
2162         /*
2163          * We have found a node whose name is the desired name, or we
2164          * have matched a wildcard.
2165          */
2166
2167         if (search.zonecut != NULL) {
2168                 /*
2169                  * If we're beneath a zone cut, we don't want to look for
2170                  * CNAMEs because they're not legitimate zone glue.
2171                  */
2172                 cname_ok = ISC_FALSE;
2173         } else {
2174                 /*
2175                  * The node may be a zone cut itself.  If it might be one,
2176                  * make sure we check for it later.
2177                  */
2178                 if (node->find_callback &&
2179                     (node != search.rbtdb->origin_node ||
2180                      IS_STUB(search.rbtdb)) &&
2181                     !dns_rdatatype_atparent(type))
2182                         maybe_zonecut = ISC_TRUE;
2183         }
2184
2185         /*
2186          * Certain DNSSEC types are not subject to CNAME matching
2187          * (RFC 2535, section 2.3.5).
2188          *
2189          * We don't check for RRSIG, because we don't store RRSIG records
2190          * directly.
2191          */
2192         if (type == dns_rdatatype_dnskey || type == dns_rdatatype_nsec)
2193                 cname_ok = ISC_FALSE;
2194
2195         /*
2196          * We now go looking for rdata...
2197          */
2198
2199         LOCK(&(search.rbtdb->node_locks[node->locknum].lock));
2200
2201         found = NULL;
2202         foundsig = NULL;
2203         sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
2204         nsecheader = NULL;
2205         nsecsig = NULL;
2206         cnamesig = NULL;
2207         empty_node = ISC_TRUE;
2208         for (header = node->data; header != NULL; header = header_next) {
2209                 header_next = header->next;
2210                 /*
2211                  * Look for an active, extant rdataset.
2212                  */
2213                 do {
2214                         if (header->serial <= search.serial &&
2215                             !IGNORE(header)) {
2216                                 /*
2217                                  * Is this a "this rdataset doesn't
2218                                  * exist" record?
2219                                  */
2220                                 if (NONEXISTENT(header))
2221                                         header = NULL;
2222                                 break;
2223                         } else
2224                                 header = header->down;
2225                 } while (header != NULL);
2226                 if (header != NULL) {
2227                         /*
2228                          * We now know that there is at least one active
2229                          * rdataset at this node.
2230                          */
2231                         empty_node = ISC_FALSE;
2232
2233                         /*
2234                          * Do special zone cut handling, if requested.
2235                          */
2236                         if (maybe_zonecut &&
2237                             header->type == dns_rdatatype_ns) {
2238                                 /*
2239                                  * We increment the reference count on node to
2240                                  * ensure that search->zonecut_rdataset will
2241                                  * still be valid later.
2242                                  */
2243                                 new_reference(search.rbtdb, node);
2244                                 search.zonecut = node;
2245                                 search.zonecut_rdataset = header;
2246                                 search.zonecut_sigrdataset = NULL;
2247                                 search.need_cleanup = ISC_TRUE;
2248                                 maybe_zonecut = ISC_FALSE;
2249                                 at_zonecut = ISC_TRUE;
2250                                 if ((search.options & DNS_DBFIND_GLUEOK) == 0
2251                                     && type != dns_rdatatype_nsec
2252                                     && type != dns_rdatatype_dnskey) {
2253                                         /*
2254                                          * Glue is not OK, but any answer we
2255                                          * could return would be glue.  Return
2256                                          * the delegation.
2257                                          */
2258                                         found = NULL;
2259                                         break;
2260                                 }
2261                                 if (found != NULL && foundsig != NULL)
2262                                         break;
2263                         }
2264
2265                         /*
2266                          * If we found a type we were looking for,
2267                          * remember it.
2268                          */
2269                         if (header->type == type ||
2270                             type == dns_rdatatype_any ||
2271                             (header->type == dns_rdatatype_cname &&
2272                              cname_ok)) {
2273                                 /*
2274                                  * We've found the answer!
2275                                  */
2276                                 found = header;
2277                                 if (header->type == dns_rdatatype_cname &&
2278                                     cname_ok) {
2279                                         /*
2280                                          * We may be finding a CNAME instead
2281                                          * of the desired type.
2282                                          *
2283                                          * If we've already got the CNAME RRSIG,
2284                                          * use it, otherwise change sigtype
2285                                          * so that we find it.
2286                                          */
2287                                         if (cnamesig != NULL)
2288                                                 foundsig = cnamesig;
2289                                         else
2290                                                 sigtype =
2291                                                     RBTDB_RDATATYPE_SIGCNAME;
2292                                 }
2293                                 /*
2294                                  * If we've got all we need, end the search.
2295                                  */
2296                                 if (!maybe_zonecut && foundsig != NULL)
2297                                         break;
2298                         } else if (header->type == sigtype) {
2299                                 /*
2300                                  * We've found the RRSIG rdataset for our
2301                                  * target type.  Remember it.
2302                                  */
2303                                 foundsig = header;
2304                                 /*
2305                                  * If we've got all we need, end the search.
2306                                  */
2307                                 if (!maybe_zonecut && found != NULL)
2308                                         break;
2309                         } else if (header->type == dns_rdatatype_nsec) {
2310                                 /*
2311                                  * Remember a NSEC rdataset even if we're
2312                                  * not specifically looking for it, because
2313                                  * we might need it later.
2314                                  */
2315                                 nsecheader = header;
2316                         } else if (header->type == RBTDB_RDATATYPE_SIGNSEC) {
2317                                 /*
2318                                  * If we need the NSEC rdataset, we'll also
2319                                  * need its signature.
2320                                  */
2321                                 nsecsig = header;
2322                         } else if (cname_ok &&
2323                                    header->type == RBTDB_RDATATYPE_SIGCNAME) {
2324                                 /*
2325                                  * If we get a CNAME match, we'll also need
2326                                  * its signature.
2327                                  */
2328                                 cnamesig = header;
2329                         }
2330                 }
2331         }
2332
2333         if (empty_node) {
2334                 /*
2335                  * We have an exact match for the name, but there are no
2336                  * active rdatasets in the desired version.  That means that
2337                  * this node doesn't exist in the desired version, and that
2338                  * we really have a partial match.
2339                  */
2340                 if (!wild) {
2341                         UNLOCK(&(search.rbtdb->node_locks[node->locknum].lock));
2342                         goto partial_match;
2343                 }
2344         }
2345
2346         /*
2347          * If we didn't find what we were looking for...
2348          */
2349         if (found == NULL) {
2350                 if (search.zonecut != NULL) {
2351                     /*
2352                      * We were trying to find glue at a node beneath a
2353                      * zone cut, but didn't.
2354                      *
2355                      * Return the delegation.
2356                      */
2357                     UNLOCK(&(search.rbtdb->node_locks[node->locknum].lock));
2358                     result = setup_delegation(&search, nodep, foundname,
2359                                               rdataset, sigrdataset);
2360                     goto tree_exit;
2361                 }
2362                 /*
2363                  * The desired type doesn't exist.
2364                  */
2365                 result = DNS_R_NXRRSET;
2366                 if (search.rbtdb->secure &&
2367                     (nsecheader == NULL || nsecsig == NULL)) {
2368                         /*
2369                          * The zone is secure but there's no NSEC,
2370                          * or the NSEC has no signature!
2371                          */
2372                         if (!wild) {
2373                                 result = DNS_R_BADDB;
2374                                 goto node_exit;
2375                         }
2376                         
2377                         UNLOCK(&(search.rbtdb->node_locks[node->locknum].lock));
2378                         result = find_closest_nsec(&search, nodep, foundname,
2379                                                   rdataset, sigrdataset,
2380                                                   search.rbtdb->secure);
2381                         if (result == ISC_R_SUCCESS)
2382                                 result = DNS_R_EMPTYWILD;
2383                         goto tree_exit;
2384                 }
2385                 if ((search.options & DNS_DBFIND_FORCENSEC) != 0 &&
2386                     nsecheader == NULL)
2387                 {
2388                         /*
2389                          * There's no NSEC record, and we were told
2390                          * to find one.
2391                          */
2392                         result = DNS_R_BADDB;
2393                         goto node_exit;
2394                 }
2395                 if (nodep != NULL) {
2396                         new_reference(search.rbtdb, node);
2397                         *nodep = node;
2398                 }
2399                 if (search.rbtdb->secure ||
2400                     (search.options & DNS_DBFIND_FORCENSEC) != 0)
2401                 {
2402                         bind_rdataset(search.rbtdb, node, nsecheader,
2403                                       0, rdataset);
2404                         if (nsecsig != NULL)
2405                                 bind_rdataset(search.rbtdb, node,
2406                                               nsecsig, 0, sigrdataset);
2407                 }
2408                 if (wild)
2409                         foundname->attributes |= DNS_NAMEATTR_WILDCARD;
2410                 goto node_exit;
2411         }
2412
2413         /*
2414          * We found what we were looking for, or we found a CNAME.
2415          */
2416
2417         if (type != found->type &&
2418             type != dns_rdatatype_any &&
2419             found->type == dns_rdatatype_cname) {
2420                 /*
2421                  * We weren't doing an ANY query and we found a CNAME instead
2422                  * of the type we were looking for, so we need to indicate
2423                  * that result to the caller.
2424                  */
2425                 result = DNS_R_CNAME;
2426         } else if (search.zonecut != NULL) {
2427                 /*
2428                  * If we're beneath a zone cut, we must indicate that the
2429                  * result is glue, unless we're actually at the zone cut
2430                  * and the type is NSEC or KEY.
2431                  */
2432                 if (search.zonecut == node) {
2433                         if (type == dns_rdatatype_nsec ||
2434                             type == dns_rdatatype_dnskey)
2435                                 result = ISC_R_SUCCESS;
2436                         else if (type == dns_rdatatype_any)
2437                                 result = DNS_R_ZONECUT;
2438                         else
2439                                 result = DNS_R_GLUE;
2440                 } else
2441                         result = DNS_R_GLUE;
2442                 /*
2443                  * We might have found data that isn't glue, but was occluded
2444                  * by a dynamic update.  If the caller cares about this, they
2445                  * will have told us to validate glue.
2446                  *
2447                  * XXX We should cache the glue validity state!
2448                  */
2449                 if (result == DNS_R_GLUE &&
2450                     (search.options & DNS_DBFIND_VALIDATEGLUE) != 0 &&
2451                     !valid_glue(&search, foundname, type, node)) {
2452                     UNLOCK(&(search.rbtdb->node_locks[node->locknum].lock));
2453                     result = setup_delegation(&search, nodep, foundname,
2454                                               rdataset, sigrdataset);
2455                     goto tree_exit;
2456                 }
2457         } else {
2458                 /*
2459                  * An ordinary successful query!
2460                  */
2461                 result = ISC_R_SUCCESS;
2462         }
2463
2464         if (nodep != NULL) {
2465                 if (!at_zonecut)
2466                         new_reference(search.rbtdb, node);
2467                 else
2468                         search.need_cleanup = ISC_FALSE;
2469                 *nodep = node;
2470         }
2471
2472         if (type != dns_rdatatype_any) {
2473                 bind_rdataset(search.rbtdb, node, found, 0, rdataset);
2474                 if (foundsig != NULL)
2475                         bind_rdataset(search.rbtdb, node, foundsig, 0,
2476                                       sigrdataset);
2477         }
2478
2479         if (wild)
2480                 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
2481
2482  node_exit:
2483         UNLOCK(&(search.rbtdb->node_locks[node->locknum].lock));
2484
2485  tree_exit:
2486         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
2487
2488         /*
2489          * If we found a zonecut but aren't going to use it, we have to
2490          * let go of it.
2491          */
2492         if (search.need_cleanup) {
2493                 node = search.zonecut;
2494                 lock = &(search.rbtdb->node_locks[node->locknum].lock);
2495
2496                 LOCK(lock);
2497                 INSIST(node->references > 0);
2498                 node->references--;
2499                 if (node->references == 0)
2500                         no_references(search.rbtdb, node, 0,
2501                                       isc_rwlocktype_none);
2502
2503                 UNLOCK(lock);
2504         }
2505
2506         if (close_version)
2507                 closeversion(db, &version, ISC_FALSE);
2508
2509         dns_rbtnodechain_reset(&search.chain);
2510
2511         return (result);
2512 }
2513
2514 static isc_result_t
2515 zone_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
2516                  isc_stdtime_t now, dns_dbnode_t **nodep,
2517                  dns_name_t *foundname,
2518                  dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
2519 {
2520         UNUSED(db);
2521         UNUSED(name);
2522         UNUSED(options);
2523         UNUSED(now);
2524         UNUSED(nodep);
2525         UNUSED(foundname);
2526         UNUSED(rdataset);
2527         UNUSED(sigrdataset);
2528
2529         FATAL_ERROR(__FILE__, __LINE__, "zone_findzonecut() called!");
2530
2531         return (ISC_R_NOTIMPLEMENTED);
2532 }
2533
2534 static isc_result_t
2535 cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
2536         rbtdb_search_t *search = arg;
2537         rdatasetheader_t *header, *header_prev, *header_next;
2538         rdatasetheader_t *dname_header, *sigdname_header;
2539         isc_result_t result;
2540
2541         /* XXX comment */
2542
2543         REQUIRE(search->zonecut == NULL);
2544
2545         /*
2546          * Keep compiler silent.
2547          */
2548         UNUSED(name);
2549
2550         LOCK(&(search->rbtdb->node_locks[node->locknum].lock));
2551
2552         /*
2553          * Look for a DNAME or RRSIG DNAME rdataset.
2554          */
2555         dname_header = NULL;
2556         sigdname_header = NULL;
2557         header_prev = NULL;
2558         for (header = node->data; header != NULL; header = header_next) {
2559                 header_next = header->next;
2560                 if (header->ttl <= search->now) {
2561                         /*
2562                          * This rdataset is stale.  If no one else is
2563                          * using the node, we can clean it up right
2564                          * now, otherwise we mark it as stale, and
2565                          * the node as dirty, so it will get cleaned
2566                          * up later.
2567                          */
2568                         if (node->references == 0) {
2569                                 INSIST(header->down == NULL);
2570                                 if (header_prev != NULL)
2571                                         header_prev->next =
2572                                                 header->next;
2573                                 else
2574                                         node->data = header->next;
2575                                 free_rdataset(search->rbtdb->common.mctx,
2576                                               header);
2577                         } else {
2578                                 header->attributes |=
2579                                         RDATASET_ATTR_STALE;
2580                                 node->dirty = 1;
2581                                 header_prev = header;
2582                         }
2583                 } else if (header->type == dns_rdatatype_dname &&
2584                            EXISTS(header)) {
2585                         dname_header = header;
2586                         header_prev = header;
2587                 } else if (header->type == RBTDB_RDATATYPE_SIGDNAME &&
2588                          EXISTS(header)) {
2589                         sigdname_header = header;
2590                         header_prev = header;
2591                 } else
2592                         header_prev = header;
2593         }
2594
2595         if (dname_header != NULL &&
2596             (dname_header->trust != dns_trust_pending ||
2597              (search->options & DNS_DBFIND_PENDINGOK) != 0)) {
2598                 /*
2599                  * We increment the reference count on node to ensure that
2600                  * search->zonecut_rdataset will still be valid later.
2601                  */
2602                 new_reference(search->rbtdb, node);
2603                 search->zonecut = node;
2604                 search->zonecut_rdataset = dname_header;
2605                 search->zonecut_sigrdataset = sigdname_header;
2606                 search->need_cleanup = ISC_TRUE;
2607                 result = DNS_R_PARTIALMATCH;
2608         } else
2609                 result = DNS_R_CONTINUE;
2610
2611         UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock));
2612
2613         return (result);
2614 }
2615
2616 static inline isc_result_t
2617 find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node,
2618                      dns_dbnode_t **nodep, dns_name_t *foundname,
2619                      dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
2620 {
2621         unsigned int i;
2622         dns_rbtnode_t *level_node;
2623         rdatasetheader_t *header, *header_prev, *header_next;
2624         rdatasetheader_t *found, *foundsig;
2625         isc_result_t result = ISC_R_NOTFOUND;
2626         dns_name_t name;
2627         dns_rbtdb_t *rbtdb;
2628         isc_boolean_t done;
2629
2630         /*
2631          * Caller must be holding the tree lock.
2632          */
2633
2634         rbtdb = search->rbtdb;
2635         i = search->chain.level_matches;
2636         done = ISC_FALSE;
2637         do {
2638                 LOCK(&(rbtdb->node_locks[node->locknum].lock));
2639
2640                 /*
2641                  * Look for NS and RRSIG NS rdatasets.
2642                  */
2643                 found = NULL;
2644                 foundsig = NULL;
2645                 header_prev = NULL;
2646                 for (header = node->data;
2647                      header != NULL;
2648                      header = header_next) {
2649                         header_next = header->next;
2650                         if (header->ttl <= search->now) {
2651                                 /*
2652                                  * This rdataset is stale.  If no one else is
2653                                  * using the node, we can clean it up right
2654                                  * now, otherwise we mark it as stale, and
2655                                  * the node as dirty, so it will get cleaned
2656                                  * up later.
2657                                  */
2658                                 if (node->references == 0) {
2659                                         INSIST(header->down == NULL);
2660                                         if (header_prev != NULL)
2661                                                 header_prev->next =
2662                                                         header->next;
2663                                         else
2664                                                 node->data = header->next;
2665                                         free_rdataset(rbtdb->common.mctx,
2666                                                       header);
2667                                 } else {
2668                                         header->attributes |=
2669                                                 RDATASET_ATTR_STALE;
2670                                         node->dirty = 1;
2671                                         header_prev = header;
2672                                 }
2673                         } else if (EXISTS(header)) {
2674                                 /*
2675                                  * We've found an extant rdataset.  See if
2676                                  * we're interested in it.
2677                                  */
2678                                 if (header->type == dns_rdatatype_ns) {
2679                                         found = header;
2680                                         if (foundsig != NULL)
2681                                                 break;
2682                                 } else if (header->type ==
2683                                            RBTDB_RDATATYPE_SIGNS) {
2684                                         foundsig = header;
2685                                         if (found != NULL)
2686                                                 break;
2687                                 }
2688                                 header_prev = header;
2689                         } else
2690                                 header_prev = header;
2691                 }
2692
2693                 if (found != NULL) {
2694                         /*
2695                          * If we have to set foundname, we do it before
2696                          * anything else.  If we were to set foundname after
2697                          * we had set nodep or bound the rdataset, then we'd
2698                          * have to undo that work if dns_name_concatenate()
2699                          * failed.  By setting foundname first, there's
2700                          * nothing to undo if we have trouble.
2701                          */
2702                         if (foundname != NULL) {
2703                                 dns_name_init(&name, NULL);
2704                                 dns_rbt_namefromnode(node, &name);
2705                                 result = dns_name_copy(&name, foundname, NULL);
2706                                 while (result == ISC_R_SUCCESS && i > 0) {
2707                                         i--;
2708                                         level_node = search->chain.levels[i];
2709                                         dns_name_init(&name, NULL);
2710                                         dns_rbt_namefromnode(level_node,
2711                                                              &name);
2712                                         result =
2713                                                 dns_name_concatenate(foundname,
2714                                                                      &name,
2715                                                                      foundname,
2716                                                                      NULL);
2717                                 }
2718                                 if (result != ISC_R_SUCCESS) {
2719                                         *nodep = NULL;
2720                                         goto node_exit;
2721                                 }
2722                         }
2723                         result = DNS_R_DELEGATION;
2724                         if (nodep != NULL) {
2725                                 new_reference(search->rbtdb, node);
2726                                 *nodep = node;
2727                         }
2728                         bind_rdataset(search->rbtdb, node, found, search->now,
2729                                       rdataset);
2730                         if (foundsig != NULL)
2731                                 bind_rdataset(search->rbtdb, node, foundsig,
2732                                               search->now, sigrdataset);
2733                 }
2734
2735         node_exit:
2736                 UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock));
2737
2738                 if (found == NULL && i > 0) {
2739                         i--;
2740                         node = search->chain.levels[i];
2741                 } else
2742                         done = ISC_TRUE;
2743
2744         } while (!done);
2745
2746         return (result);
2747 }
2748
2749 static isc_result_t
2750 find_coveringnsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
2751                   isc_stdtime_t now, dns_name_t *foundname,
2752                   dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
2753 {
2754         dns_rbtnode_t *node;
2755         rdatasetheader_t *header, *header_next, *header_prev;
2756         rdatasetheader_t *found, *foundsig;
2757         isc_boolean_t empty_node;
2758         isc_result_t result;
2759         dns_fixedname_t fname, forigin;
2760         dns_name_t *name, *origin;
2761         rbtdb_rdatatype_t matchtype, sigmatchtype;
2762
2763         matchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_nsec, 0);
2764         sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
2765                                              dns_rdatatype_nsec);
2766         
2767         do {
2768                 node = NULL;
2769                 dns_fixedname_init(&fname);
2770                 name = dns_fixedname_name(&fname);
2771                 dns_fixedname_init(&forigin);
2772                 origin = dns_fixedname_name(&forigin);
2773                 result = dns_rbtnodechain_current(&search->chain, name,
2774                                                   origin, &node);
2775                 if (result != ISC_R_SUCCESS)
2776                         return (result);
2777                 LOCK(&(search->rbtdb->node_locks[node->locknum].lock));
2778                 found = NULL;
2779                 foundsig = NULL;
2780                 empty_node = ISC_TRUE;
2781                 header_prev = NULL;
2782                 for (header = node->data;
2783                      header != NULL;
2784                      header = header_next) {
2785                         header_next = header->next;
2786                         if (header->ttl <= now) {
2787                                 /*
2788                                  * This rdataset is stale.  If no one else is
2789                                  * using the node, we can clean it up right
2790                                  * now, otherwise we mark it as stale, and the
2791                                  * node as dirty, so it will get cleaned up 
2792                                  * later.
2793                                  */
2794                                 if (header->ttl > search->now - RBTDB_VIRTUAL)
2795                                         header_prev = header;
2796                                 else if (node->references == 0) {
2797                                         INSIST(header->down == NULL);
2798                                         if (header_prev != NULL)
2799                                                 header_prev->next =
2800                                                         header->next;
2801                                         else
2802                                                 node->data = header->next;
2803                                         free_rdataset(search->rbtdb->common.mctx,
2804                                                       header);
2805                                 } else {
2806                                         header->attributes |=
2807                                                  RDATASET_ATTR_STALE;
2808                                         node->dirty = 1;
2809                                         header_prev = header;
2810                                 }
2811                                 continue;
2812                         }
2813                         if (NONEXISTENT(header) || NXDOMAIN(header)) {
2814                                 header_prev = header;
2815                                 continue;
2816                         }
2817                         empty_node = ISC_FALSE;
2818                         if (header->type == matchtype)
2819                                 found = header;
2820                         else if (header->type == sigmatchtype)
2821                                 foundsig = header;
2822                         header_prev = header;
2823                 }
2824                 if (found != NULL) {
2825                         result = dns_name_concatenate(name, origin,
2826                                                       foundname, NULL);
2827                         if (result != ISC_R_SUCCESS)
2828                                 goto unlock_node;
2829                         bind_rdataset(search->rbtdb, node, found,
2830                                       now, rdataset);
2831                         if (foundsig != NULL)
2832                                 bind_rdataset(search->rbtdb, node, foundsig,
2833                                               now, sigrdataset);
2834                         new_reference(search->rbtdb, node);
2835                         *nodep = node;
2836                         result = DNS_R_COVERINGNSEC;
2837                 } else if (!empty_node) {
2838                         result = ISC_R_NOTFOUND;
2839                 }else
2840                         result = dns_rbtnodechain_prev(&search->chain, NULL,
2841                                                        NULL);
2842  unlock_node:
2843                 UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock));
2844         } while (empty_node && result == ISC_R_SUCCESS);
2845         return (result);
2846 }
2847
2848 static isc_result_t
2849 cache_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
2850            dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
2851            dns_dbnode_t **nodep, dns_name_t *foundname,
2852            dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
2853 {
2854         dns_rbtnode_t *node = NULL;
2855         isc_result_t result;
2856         rbtdb_search_t search;
2857         isc_boolean_t cname_ok = ISC_TRUE;
2858         isc_boolean_t empty_node;
2859         isc_mutex_t *lock;
2860         rdatasetheader_t *header, *header_prev, *header_next;
2861         rdatasetheader_t *found, *nsheader;
2862         rdatasetheader_t *foundsig, *nssig, *cnamesig;
2863         rbtdb_rdatatype_t sigtype, nsectype;
2864
2865         UNUSED(version);
2866
2867         search.rbtdb = (dns_rbtdb_t *)db;
2868
2869         REQUIRE(VALID_RBTDB(search.rbtdb));
2870         REQUIRE(version == NULL);
2871
2872         if (now == 0)
2873                 isc_stdtime_get(&now);
2874
2875         search.rbtversion = NULL;
2876         search.serial = 1;
2877         search.options = options;
2878         search.copy_name = ISC_FALSE;
2879         search.need_cleanup = ISC_FALSE;
2880         search.wild = ISC_FALSE;
2881         search.zonecut = NULL;
2882         dns_fixedname_init(&search.zonecut_name);
2883         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
2884         search.now = now;
2885
2886         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
2887
2888         /*
2889          * Search down from the root of the tree.  If, while going down, we
2890          * encounter a callback node, cache_zonecut_callback() will search the
2891          * rdatasets at the zone cut for a DNAME rdataset.
2892          */
2893         result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
2894                                   &search.chain, DNS_RBTFIND_EMPTYDATA,
2895                                   cache_zonecut_callback, &search);
2896
2897         if (result == DNS_R_PARTIALMATCH) {
2898                 if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0) {
2899                         result = find_coveringnsec(&search, nodep, now,
2900                                                    foundname, rdataset,
2901                                                    sigrdataset);
2902                         if (result == DNS_R_COVERINGNSEC)
2903                                 goto tree_exit;
2904                 }
2905                 if (search.zonecut != NULL) {
2906                     result = setup_delegation(&search, nodep, foundname,
2907                                               rdataset, sigrdataset);
2908                     goto tree_exit;
2909                 } else {
2910                 find_ns:
2911                         result = find_deepest_zonecut(&search, node, nodep,
2912                                                       foundname, rdataset,
2913                                                       sigrdataset);
2914                         goto tree_exit;
2915                 }
2916         } else if (result != ISC_R_SUCCESS)
2917                 goto tree_exit;
2918
2919         /*
2920          * Certain DNSSEC types are not subject to CNAME matching
2921          * (RFC 2535, section 2.3.5).
2922          *
2923          * We don't check for RRSIG, because we don't store RRSIG records
2924          * directly.
2925          */
2926         if (type == dns_rdatatype_dnskey || type == dns_rdatatype_nsec)
2927                 cname_ok = ISC_FALSE;
2928
2929         /*
2930          * We now go looking for rdata...
2931          */
2932
2933         LOCK(&(search.rbtdb->node_locks[node->locknum].lock));
2934
2935         found = NULL;
2936         foundsig = NULL;
2937         sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
2938         nsectype = RBTDB_RDATATYPE_VALUE(0, type);
2939         nsheader = NULL;
2940         nssig = NULL;
2941         cnamesig = NULL;
2942         empty_node = ISC_TRUE;
2943         header_prev = NULL;
2944         for (header = node->data; header != NULL; header = header_next) {
2945                 header_next = header->next;
2946                 if (header->ttl <= now) {
2947                         /*
2948                          * This rdataset is stale.  If no one else is using the
2949                          * node, we can clean it up right now, otherwise we
2950                          * mark it as stale, and the node as dirty, so it will
2951                          * get cleaned up later.
2952                          */
2953                         if (header->ttl > now - RBTDB_VIRTUAL)
2954                                 header_prev = header;
2955                         else if (node->references == 0) {
2956                                 INSIST(header->down == NULL);
2957                                 if (header_prev != NULL)
2958                                         header_prev->next = header->next;
2959                                 else
2960                                         node->data = header->next;
2961                                 free_rdataset(search.rbtdb->common.mctx,
2962                                               header);
2963                         } else {
2964                                 header->attributes |= RDATASET_ATTR_STALE;
2965                                 node->dirty = 1;
2966                                 header_prev = header;
2967                         }
2968                 } else if (EXISTS(header)) {
2969                         /*
2970                          * We now know that there is at least one active
2971                          * non-stale rdataset at this node.
2972                          */
2973                         empty_node = ISC_FALSE;
2974
2975                         /*
2976                          * If we found a type we were looking for, remember
2977                          * it.
2978                          */
2979                         if (header->type == type ||
2980                             (type == dns_rdatatype_any &&
2981                              RBTDB_RDATATYPE_BASE(header->type) != 0) ||
2982                             (cname_ok && header->type ==
2983                              dns_rdatatype_cname)) {
2984                                 /*
2985                                  * We've found the answer.
2986                                  */
2987                                 found = header;
2988                                 if (header->type == dns_rdatatype_cname &&
2989                                     cname_ok &&
2990                                     cnamesig != NULL) {
2991                                         /*
2992                                          * If we've already got the CNAME RRSIG,
2993                                          * use it, otherwise change sigtype
2994                                          * so that we find it.
2995                                          */
2996                                         if (cnamesig != NULL)
2997                                                 foundsig = cnamesig;
2998                                         else
2999                                                 sigtype =
3000                                                     RBTDB_RDATATYPE_SIGCNAME;
3001                                         foundsig = cnamesig;
3002                                 }
3003                         } else if (header->type == sigtype) {
3004                                 /*
3005                                  * We've found the RRSIG rdataset for our
3006                                  * target type.  Remember it.
3007                                  */
3008                                 foundsig = header;
3009                         } else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
3010                                    header->type == nsectype) {
3011                                 /*
3012                                  * We've found a negative cache entry.
3013                                  */
3014                                 found = header;
3015                         } else if (header->type == dns_rdatatype_ns) {
3016                                 /*
3017                                  * Remember a NS rdataset even if we're
3018                                  * not specifically looking for it, because
3019                                  * we might need it later.
3020                                  */
3021                                 nsheader = header;
3022                         } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
3023                                 /*
3024                                  * If we need the NS rdataset, we'll also
3025                                  * need its signature.
3026                                  */
3027                                 nssig = header;
3028                         } else if (cname_ok &&
3029                                    header->type == RBTDB_RDATATYPE_SIGCNAME) {
3030                                 /*
3031                                  * If we get a CNAME match, we'll also need
3032                                  * its signature.
3033                                  */
3034                                 cnamesig = header;
3035                         }
3036                         header_prev = header;
3037                 } else
3038                         header_prev = header;
3039         }
3040
3041         if (empty_node) {
3042                 /*
3043                  * We have an exact match for the name, but there are no
3044                  * extant rdatasets.  That means that this node doesn't
3045                  * meaningfully exist, and that we really have a partial match.
3046                  */
3047                 UNLOCK(&(search.rbtdb->node_locks[node->locknum].lock));
3048                 goto find_ns;
3049         }
3050
3051         /*
3052          * If we didn't find what we were looking for...
3053          */
3054         if (found == NULL ||
3055             (found->trust == dns_trust_glue &&
3056              ((options & DNS_DBFIND_GLUEOK) == 0)) ||
3057             (found->trust == dns_trust_pending &&
3058              ((options & DNS_DBFIND_PENDINGOK) == 0))) {
3059                 /*
3060                  * If there is an NS rdataset at this node, then this is the
3061                  * deepest zone cut.
3062                  */
3063                 if (nsheader != NULL) {
3064                         if (nodep != NULL) {
3065                                 new_reference(search.rbtdb, node);
3066                                 *nodep = node;
3067                         }
3068                         bind_rdataset(search.rbtdb, node, nsheader, search.now,
3069                                       rdataset);
3070                         if (nssig != NULL)
3071                                 bind_rdataset(search.rbtdb, node, nssig,
3072                                               search.now, sigrdataset);
3073                         result = DNS_R_DELEGATION;
3074                         goto node_exit;
3075                 }
3076
3077                 /*
3078                  * Go find the deepest zone cut.
3079                  */
3080                 UNLOCK(&(search.rbtdb->node_locks[node->locknum].lock));
3081                 goto find_ns;
3082         }
3083
3084         /*
3085          * We found what we were looking for, or we found a CNAME.
3086          */
3087
3088         if (nodep != NULL) {
3089                 new_reference(search.rbtdb, node);
3090                 *nodep = node;
3091         }
3092
3093         if (RBTDB_RDATATYPE_BASE(found->type) == 0) {
3094                 /*
3095                  * We found a negative cache entry.
3096                  */
3097                 if (NXDOMAIN(found))
3098                         result = DNS_R_NCACHENXDOMAIN;
3099                 else
3100                         result = DNS_R_NCACHENXRRSET;
3101         } else if (type != found->type &&
3102                    type != dns_rdatatype_any &&
3103                    found->type == dns_rdatatype_cname) {
3104                 /*
3105                  * We weren't doing an ANY query and we found a CNAME instead
3106                  * of the type we were looking for, so we need to indicate
3107                  * that result to the caller.
3108                  */
3109                 result = DNS_R_CNAME;
3110         } else {
3111                 /*
3112                  * An ordinary successful query!
3113                  */
3114                 result = ISC_R_SUCCESS;
3115         }
3116
3117         if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN ||
3118             result == DNS_R_NCACHENXRRSET) {
3119                 bind_rdataset(search.rbtdb, node, found, search.now,
3120                               rdataset);
3121                 if (foundsig != NULL)
3122                         bind_rdataset(search.rbtdb, node, foundsig, search.now,
3123                                       sigrdataset);
3124         }
3125
3126  node_exit:
3127         UNLOCK(&(search.rbtdb->node_locks[node->locknum].lock));
3128
3129  tree_exit:
3130         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3131
3132         /*
3133          * If we found a zonecut but aren't going to use it, we have to
3134          * let go of it.
3135          */
3136         if (search.need_cleanup) {
3137                 node = search.zonecut;
3138                 lock = &(search.rbtdb->node_locks[node->locknum].lock);
3139
3140                 LOCK(lock);
3141                 INSIST(node->references > 0);
3142                 node->references--;
3143                 if (node->references == 0)
3144                         no_references(search.rbtdb, node, 0,
3145                                       isc_rwlocktype_none);
3146                 UNLOCK(lock);
3147         }
3148
3149         dns_rbtnodechain_reset(&search.chain);
3150
3151         return (result);
3152 }
3153
3154 static isc_result_t
3155 cache_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
3156                   isc_stdtime_t now, dns_dbnode_t **nodep,
3157                   dns_name_t *foundname,
3158                   dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3159 {
3160         dns_rbtnode_t *node = NULL;
3161         isc_result_t result;
3162         rbtdb_search_t search;
3163         rdatasetheader_t *header, *header_prev, *header_next;
3164         rdatasetheader_t *found, *foundsig;
3165         unsigned int rbtoptions = DNS_RBTFIND_EMPTYDATA;
3166
3167         search.rbtdb = (dns_rbtdb_t *)db;
3168
3169         REQUIRE(VALID_RBTDB(search.rbtdb));
3170
3171         if (now == 0)
3172                 isc_stdtime_get(&now);
3173
3174         search.rbtversion = NULL;
3175         search.serial = 1;
3176         search.options = options;
3177         search.copy_name = ISC_FALSE;
3178         search.need_cleanup = ISC_FALSE;
3179         search.wild = ISC_FALSE;
3180         search.zonecut = NULL;
3181         dns_fixedname_init(&search.zonecut_name);
3182         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
3183         search.now = now;
3184
3185         if ((options & DNS_DBFIND_NOEXACT) != 0)
3186                 rbtoptions |= DNS_RBTFIND_NOEXACT;
3187
3188         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3189
3190         /*
3191          * Search down from the root of the tree.
3192          */
3193         result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
3194                                   &search.chain, rbtoptions, NULL, &search);
3195
3196         if (result == DNS_R_PARTIALMATCH) {
3197         find_ns:
3198                 result = find_deepest_zonecut(&search, node, nodep, foundname,
3199                                               rdataset, sigrdataset);
3200                 goto tree_exit;
3201         } else if (result != ISC_R_SUCCESS)
3202                 goto tree_exit;
3203
3204         /*
3205          * We now go looking for an NS rdataset at the node.
3206          */
3207
3208         LOCK(&(search.rbtdb->node_locks[node->locknum].lock));
3209
3210         found = NULL;
3211         foundsig = NULL;
3212         header_prev = NULL;
3213         for (header = node->data; header != NULL; header = header_next) {
3214                 header_next = header->next;
3215                 if (header->ttl <= now) {
3216                         /*
3217                          * This rdataset is stale.  If no one else is using the
3218                          * node, we can clean it up right now, otherwise we
3219                          * mark it as stale, and the node as dirty, so it will
3220                          * get cleaned up later.
3221                          */
3222                         if (header->ttl > now - RBTDB_VIRTUAL)
3223                                 header_prev = header;
3224                         else if (node->references == 0) {
3225                                 INSIST(header->down == NULL);
3226                                 if (header_prev != NULL)
3227                                         header_prev->next = header->next;
3228                                 else
3229                                         node->data = header->next;
3230                                 free_rdataset(search.rbtdb->common.mctx,
3231                                               header);
3232                         } else {
3233                                 header->attributes |= RDATASET_ATTR_STALE;
3234                                 node->dirty = 1;
3235                                 header_prev = header;
3236                         }
3237                 } else if (EXISTS(header)) {
3238                         /*
3239                          * If we found a type we were looking for, remember
3240                          * it.
3241                          */
3242                         if (header->type == dns_rdatatype_ns) {
3243                                 /*
3244                                  * Remember a NS rdataset even if we're
3245                                  * not specifically looking for it, because
3246                                  * we might need it later.
3247                                  */
3248                                 found = header;
3249                         } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
3250                                 /*
3251                                  * If we need the NS rdataset, we'll also
3252                                  * need its signature.
3253                                  */
3254                                 foundsig = header;
3255                         }
3256                         header_prev = header;
3257                 } else
3258                         header_prev = header;
3259         }
3260
3261         if (found == NULL) {
3262                 /*
3263                  * No NS records here.
3264                  */
3265                 UNLOCK(&(search.rbtdb->node_locks[node->locknum].lock));
3266                 goto find_ns;
3267         }
3268
3269         if (nodep != NULL) {
3270                 new_reference(search.rbtdb, node);
3271                 *nodep = node;
3272         }
3273
3274         bind_rdataset(search.rbtdb, node, found, search.now, rdataset);
3275         if (foundsig != NULL)
3276                 bind_rdataset(search.rbtdb, node, foundsig, search.now,
3277                               sigrdataset);
3278
3279         UNLOCK(&(search.rbtdb->node_locks[node->locknum].lock));
3280
3281  tree_exit:
3282         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3283
3284         INSIST(!search.need_cleanup);
3285
3286         dns_rbtnodechain_reset(&search.chain);
3287
3288         if (result == DNS_R_DELEGATION)
3289                 result = ISC_R_SUCCESS;
3290
3291         return (result);
3292 }
3293
3294 static void
3295 attachnode(dns_db_t *db, dns_dbnode_t *source, dns_dbnode_t **targetp) {
3296         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
3297         dns_rbtnode_t *node = (dns_rbtnode_t *)source;
3298
3299         REQUIRE(VALID_RBTDB(rbtdb));
3300         REQUIRE(targetp != NULL && *targetp == NULL);
3301
3302         LOCK(&rbtdb->node_locks[node->locknum].lock);
3303         INSIST(node->references > 0);
3304         node->references++;
3305         INSIST(node->references != 0);                  /* Catch overflow. */
3306         UNLOCK(&rbtdb->node_locks[node->locknum].lock);
3307
3308         *targetp = source;
3309 }
3310
3311 static void
3312 detachnode(dns_db_t *db, dns_dbnode_t **targetp) {
3313         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
3314         dns_rbtnode_t *node;
3315         isc_boolean_t want_free = ISC_FALSE;
3316         isc_boolean_t inactive = ISC_FALSE;
3317         unsigned int locknum;
3318
3319         REQUIRE(VALID_RBTDB(rbtdb));
3320         REQUIRE(targetp != NULL && *targetp != NULL);
3321
3322         node = (dns_rbtnode_t *)(*targetp);
3323         locknum = node->locknum;
3324
3325         LOCK(&rbtdb->node_locks[locknum].lock);
3326
3327         INSIST(node->references > 0);
3328         node->references--;
3329         if (node->references == 0) {
3330                 no_references(rbtdb, node, 0, isc_rwlocktype_none);
3331                 if (rbtdb->node_locks[locknum].references == 0 &&
3332                     rbtdb->node_locks[locknum].exiting)
3333                         inactive = ISC_TRUE;
3334         }
3335
3336         UNLOCK(&rbtdb->node_locks[locknum].lock);
3337
3338         *targetp = NULL;
3339
3340         if (inactive) {
3341                 LOCK(&rbtdb->lock);
3342                 rbtdb->active--;
3343                 if (rbtdb->active == 0)
3344                         want_free = ISC_TRUE;
3345                 UNLOCK(&rbtdb->lock);
3346                 if (want_free) {
3347                         char buf[DNS_NAME_FORMATSIZE];
3348                         if (dns_name_dynamic(&rbtdb->common.origin))
3349                                 dns_name_format(&rbtdb->common.origin, buf,
3350                                                 sizeof(buf));
3351                         else
3352                                 strcpy(buf, "<UNKNOWN>");
3353                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
3354                                       DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
3355                                       "calling free_rbtdb(%s)", buf);
3356                         free_rbtdb(rbtdb, ISC_TRUE, NULL);
3357                 }
3358         }
3359 }
3360
3361 static isc_result_t
3362 expirenode(dns_db_t *db, dns_dbnode_t *node, isc_stdtime_t now) {
3363         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
3364         dns_rbtnode_t *rbtnode = node;
3365         rdatasetheader_t *header;
3366         isc_boolean_t force_expire = ISC_FALSE;
3367         /*
3368          * These are the category and module used by the cache cleaner.
3369          */
3370         isc_boolean_t log = ISC_FALSE;
3371         isc_logcategory_t *category = DNS_LOGCATEGORY_DATABASE;
3372         isc_logmodule_t *module = DNS_LOGMODULE_CACHE;
3373         int level = ISC_LOG_DEBUG(2);
3374         char printname[DNS_NAME_FORMATSIZE];
3375
3376         REQUIRE(VALID_RBTDB(rbtdb));
3377
3378         /*
3379          * Caller must hold a tree lock.
3380          */
3381
3382         if (now == 0)
3383                 isc_stdtime_get(&now);
3384
3385         if (rbtdb->overmem) {
3386                 isc_uint32_t val;
3387
3388                 isc_random_get(&val);
3389                 /*
3390                  * XXXDCL Could stand to have a better policy, like LRU.
3391                  */
3392                 force_expire = ISC_TF(rbtnode->down == NULL && val % 4 == 0);
3393
3394                 /*
3395                  * Note that 'log' can be true IFF rbtdb->overmem is also true.
3396                  * rbtdb->ovemem can currently only be true for cache databases
3397                  * -- hence all of the "overmem cache" log strings.
3398                  */
3399                 log = ISC_TF(isc_log_wouldlog(dns_lctx, level));
3400                 if (log)
3401                         isc_log_write(dns_lctx, category, module, level,
3402                                       "overmem cache: %s %s",
3403                                       force_expire ? "FORCE" : "check",
3404                                       dns_rbt_formatnodename(rbtnode,
3405                                                            printname,
3406                                                            sizeof(printname)));
3407         }
3408
3409         LOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
3410
3411         for (header = rbtnode->data; header != NULL; header = header->next)
3412                 if (header->ttl <= now - RBTDB_VIRTUAL) {
3413                         /*
3414                          * We don't check if rbtnode->references == 0 and try
3415                          * to free like we do in cache_find(), because
3416                          * rbtnode->references must be non-zero.  This is so
3417                          * because 'node' is an argument to the function.
3418                          */
3419                         header->attributes |= RDATASET_ATTR_STALE;
3420                         rbtnode->dirty = 1;
3421                         if (log)
3422                                 isc_log_write(dns_lctx, category, module,
3423                                               level, "overmem cache: stale %s",
3424                                               printname);
3425                 } else if (force_expire) {
3426                         if (! RETAIN(header)) {
3427                                 header->ttl = 0;
3428                                 header->attributes |= RDATASET_ATTR_STALE;
3429                                 rbtnode->dirty = 1;
3430                         } else if (log) {
3431                                 isc_log_write(dns_lctx, category, module,
3432                                               level, "overmem cache: "
3433                                               "reprieve by RETAIN() %s",
3434                                               printname);
3435                         }
3436                 } else if (rbtdb->overmem && log)
3437                         isc_log_write(dns_lctx, category, module, level,
3438                                       "overmem cache: saved %s", printname);
3439
3440         UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
3441
3442         return (ISC_R_SUCCESS);
3443 }
3444
3445 static void
3446 overmem(dns_db_t *db, isc_boolean_t overmem) {
3447         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
3448
3449         if (IS_CACHE(rbtdb)) {
3450                 rbtdb->overmem = overmem;
3451         }
3452 }
3453
3454 static void
3455 printnode(dns_db_t *db, dns_dbnode_t *node, FILE *out) {
3456         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
3457         dns_rbtnode_t *rbtnode = node;
3458         isc_boolean_t first;
3459
3460         REQUIRE(VALID_RBTDB(rbtdb));
3461
3462         LOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
3463
3464         fprintf(out, "node %p, %u references, locknum = %u\n",
3465                 rbtnode, rbtnode->references, rbtnode->locknum);
3466         if (rbtnode->data != NULL) {
3467                 rdatasetheader_t *current, *top_next;
3468
3469                 for (current = rbtnode->data; current != NULL;
3470                      current = top_next) {
3471                         top_next = current->next;
3472                         first = ISC_TRUE;
3473                         fprintf(out, "\ttype %u", current->type);
3474                         do {
3475                                 if (!first)
3476                                         fprintf(out, "\t");
3477                                 first = ISC_FALSE;
3478                                 fprintf(out,
3479                                         "\tserial = %lu, ttl = %u, "
3480                                         "trust = %u, attributes = %u\n",
3481                                         (unsigned long)current->serial,
3482                                         current->ttl,
3483                                         current->trust,
3484                                         current->attributes);
3485                                 current = current->down;
3486                         } while (current != NULL);
3487                 }
3488         } else
3489                 fprintf(out, "(empty)\n");
3490
3491         UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
3492 }
3493
3494 static isc_result_t
3495 createiterator(dns_db_t *db, isc_boolean_t relative_names,
3496                dns_dbiterator_t **iteratorp)
3497 {
3498         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
3499         rbtdb_dbiterator_t *rbtdbiter;
3500
3501         REQUIRE(VALID_RBTDB(rbtdb));
3502
3503         rbtdbiter = isc_mem_get(rbtdb->common.mctx, sizeof(*rbtdbiter));
3504         if (rbtdbiter == NULL)
3505                 return (ISC_R_NOMEMORY);
3506
3507         rbtdbiter->common.methods = &dbiterator_methods;
3508         rbtdbiter->common.db = NULL;
3509         dns_db_attach(db, &rbtdbiter->common.db);
3510         rbtdbiter->common.relative_names = relative_names;
3511         rbtdbiter->common.magic = DNS_DBITERATOR_MAGIC;
3512         rbtdbiter->common.cleaning = ISC_FALSE;
3513         rbtdbiter->paused = ISC_TRUE;
3514         rbtdbiter->tree_locked = isc_rwlocktype_none;
3515         rbtdbiter->result = ISC_R_SUCCESS;
3516         dns_fixedname_init(&rbtdbiter->name);
3517         dns_fixedname_init(&rbtdbiter->origin);
3518         rbtdbiter->node = NULL;
3519         rbtdbiter->delete = 0;
3520         memset(rbtdbiter->deletions, 0, sizeof(rbtdbiter->deletions));
3521         dns_rbtnodechain_init(&rbtdbiter->chain, db->mctx);
3522
3523         *iteratorp = (dns_dbiterator_t *)rbtdbiter;
3524
3525         return (ISC_R_SUCCESS);
3526 }
3527
3528 static isc_result_t
3529 zone_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
3530                   dns_rdatatype_t type, dns_rdatatype_t covers,
3531                   isc_stdtime_t now, dns_rdataset_t *rdataset,
3532                   dns_rdataset_t *sigrdataset)
3533 {
3534         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
3535         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
3536         rdatasetheader_t *header, *header_next, *found, *foundsig;
3537         rbtdb_serial_t serial;
3538         rbtdb_version_t *rbtversion = version;
3539         isc_boolean_t close_version = ISC_FALSE;
3540         rbtdb_rdatatype_t matchtype, sigmatchtype;
3541
3542         REQUIRE(VALID_RBTDB(rbtdb));
3543         REQUIRE(type != dns_rdatatype_any);
3544
3545         if (rbtversion == NULL) {
3546                 currentversion(db, (dns_dbversion_t **) (void *)(&rbtversion));
3547                 close_version = ISC_TRUE;
3548         }
3549         serial = rbtversion->serial;
3550         now = 0;
3551
3552         LOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
3553
3554         found = NULL;
3555         foundsig = NULL;
3556         matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
3557         if (covers == 0)
3558                 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
3559         else
3560                 sigmatchtype = 0;
3561
3562         for (header = rbtnode->data; header != NULL; header = header_next) {
3563                 header_next = header->next;
3564                 do {
3565                         if (header->serial <= serial &&
3566                             !IGNORE(header)) {
3567                                 /*
3568                                  * Is this a "this rdataset doesn't
3569                                  * exist" record?
3570                                  */
3571                                 if (NONEXISTENT(header))
3572                                         header = NULL;
3573                                 break;
3574                         } else
3575                                 header = header->down;
3576                 } while (header != NULL);
3577                 if (header != NULL) {
3578                         /*
3579                          * We have an active, extant rdataset.  If it's a
3580                          * type we're looking for, remember it.
3581                          */
3582                         if (header->type == matchtype) {
3583                                 found = header;
3584                                 if (foundsig != NULL)
3585                                         break;
3586                         } else if (header->type == sigmatchtype) {
3587                                 foundsig = header;
3588                                 if (found != NULL)
3589                                         break;
3590                         }
3591                 }
3592         }
3593         if (found != NULL) {
3594                 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
3595                 if (foundsig != NULL)
3596                         bind_rdataset(rbtdb, rbtnode, foundsig, now,
3597                                       sigrdataset);
3598         }
3599
3600         UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
3601
3602         if (close_version)
3603                 closeversion(db, (dns_dbversion_t **) (void *)(&rbtversion),
3604                              ISC_FALSE);
3605
3606         if (found == NULL)
3607                 return (ISC_R_NOTFOUND);
3608
3609         return (ISC_R_SUCCESS);
3610 }
3611
3612 static isc_result_t
3613 cache_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
3614                    dns_rdatatype_t type, dns_rdatatype_t covers,
3615                    isc_stdtime_t now, dns_rdataset_t *rdataset,
3616                    dns_rdataset_t *sigrdataset)
3617 {
3618         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
3619         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
3620         rdatasetheader_t *header, *header_next, *found, *foundsig;
3621         rbtdb_rdatatype_t matchtype, sigmatchtype, nsectype;
3622         isc_result_t result;
3623
3624         REQUIRE(VALID_RBTDB(rbtdb));
3625         REQUIRE(type != dns_rdatatype_any);
3626
3627         UNUSED(version);
3628
3629         result = ISC_R_SUCCESS;
3630
3631         if (now == 0)
3632                 isc_stdtime_get(&now);
3633
3634         LOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
3635
3636         found = NULL;
3637         foundsig = NULL;
3638         matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
3639         nsectype = RBTDB_RDATATYPE_VALUE(0, type);
3640         if (covers == 0)
3641                 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
3642         else
3643                 sigmatchtype = 0;
3644
3645         for (header = rbtnode->data; header != NULL; header = header_next) {
3646                 header_next = header->next;
3647                 if (header->ttl <= now) {
3648                         /*
3649                          * We don't check if rbtnode->references == 0 and try
3650                          * to free like we do in cache_find(), because
3651                          * rbtnode->references must be non-zero.  This is so
3652                          * because 'node' is an argument to the function.
3653                          */
3654                         if (header->ttl <= now - RBTDB_VIRTUAL) {
3655                                 header->attributes |= RDATASET_ATTR_STALE;
3656                                 rbtnode->dirty = 1;
3657                         }
3658                 } else if (EXISTS(header)) {
3659                         if (header->type == matchtype)
3660                                 found = header;
3661                         else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
3662                                  header->type == nsectype)
3663                                 found = header;
3664                         else if (header->type == sigmatchtype)
3665                                 foundsig = header;
3666                 }
3667         }
3668         if (found != NULL) {
3669                 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
3670                 if (foundsig != NULL)
3671                         bind_rdataset(rbtdb, rbtnode, foundsig, now,
3672                                       sigrdataset);
3673         }
3674
3675         UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
3676
3677         if (found == NULL)
3678                 return (ISC_R_NOTFOUND);
3679
3680         if (RBTDB_RDATATYPE_BASE(found->type) == 0) {
3681                 /*
3682                  * We found a negative cache entry.
3683                  */
3684                 if (NXDOMAIN(found))
3685                         result = DNS_R_NCACHENXDOMAIN;
3686                 else
3687                         result = DNS_R_NCACHENXRRSET;
3688         }
3689
3690         return (result);
3691 }
3692
3693 static isc_result_t
3694 allrdatasets(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
3695              isc_stdtime_t now, dns_rdatasetiter_t **iteratorp)
3696 {
3697         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
3698         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
3699         rbtdb_version_t *rbtversion = version;
3700         rbtdb_rdatasetiter_t *iterator;
3701
3702         REQUIRE(VALID_RBTDB(rbtdb));
3703
3704         iterator = isc_mem_get(rbtdb->common.mctx, sizeof(*iterator));
3705         if (iterator == NULL)
3706                 return (ISC_R_NOMEMORY);
3707
3708         if ((db->attributes & DNS_DBATTR_CACHE) == 0) {
3709                 now = 0;
3710                 if (rbtversion == NULL)
3711                         currentversion(db,
3712                                  (dns_dbversion_t **) (void *)(&rbtversion));
3713                 else {
3714                         LOCK(&rbtdb->lock);
3715                         INSIST(rbtversion->references > 0);
3716                         rbtversion->references++;
3717                         INSIST(rbtversion->references != 0);
3718                         UNLOCK(&rbtdb->lock);
3719                 }
3720         } else {
3721                 if (now == 0)
3722                         isc_stdtime_get(&now);
3723                 rbtversion = NULL;
3724         }
3725
3726         iterator->common.magic = DNS_RDATASETITER_MAGIC;
3727         iterator->common.methods = &rdatasetiter_methods;
3728         iterator->common.db = db;
3729         iterator->common.node = node;
3730         iterator->common.version = (dns_dbversion_t *)rbtversion;
3731         iterator->common.now = now;
3732
3733         LOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
3734
3735         INSIST(rbtnode->references > 0);
3736         rbtnode->references++;
3737         INSIST(rbtnode->references != 0);
3738         iterator->current = NULL;
3739
3740         UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
3741
3742         *iteratorp = (dns_rdatasetiter_t *)iterator;
3743
3744         return (ISC_R_SUCCESS);
3745 }
3746
3747 static isc_boolean_t
3748 cname_and_other_data(dns_rbtnode_t *node, rbtdb_serial_t serial) {
3749         rdatasetheader_t *header, *header_next;
3750         isc_boolean_t cname, other_data;
3751         dns_rdatatype_t rdtype;
3752
3753         /*
3754          * The caller must hold the node lock.
3755          */
3756
3757         /*
3758          * Look for CNAME and "other data" rdatasets active in our version.
3759          */
3760         cname = ISC_FALSE;
3761         other_data = ISC_FALSE;
3762         for (header = node->data; header != NULL; header = header_next) {
3763                 header_next = header->next;
3764                 if (header->type == dns_rdatatype_cname) {
3765                         /*
3766                          * Look for an active extant CNAME.
3767                          */
3768                         do {
3769                                 if (header->serial <= serial &&
3770                                     !IGNORE(header)) {
3771                                         /*
3772                                          * Is this a "this rdataset doesn't
3773                                          * exist" record?
3774                                          */
3775                                         if (NONEXISTENT(header))
3776                                                 header = NULL;
3777                                         break;
3778                                 } else
3779                                         header = header->down;
3780                         } while (header != NULL);
3781                         if (header != NULL)
3782                                 cname = ISC_TRUE;
3783                 } else {
3784                         /*
3785                          * Look for active extant "other data".
3786                          *
3787                          * "Other data" is any rdataset whose type is not
3788                          * DNSKEY, RRSIG DNSKEY, NSEC, RRSIG NSEC,
3789                          * or RRSIG CNAME.
3790                          */
3791                         rdtype = RBTDB_RDATATYPE_BASE(header->type);
3792                         if (rdtype == dns_rdatatype_rrsig ||
3793                             rdtype == dns_rdatatype_sig)
3794                                 rdtype = RBTDB_RDATATYPE_EXT(header->type);
3795                         if (rdtype != dns_rdatatype_nsec &&
3796                             rdtype != dns_rdatatype_dnskey &&
3797                             rdtype != dns_rdatatype_nxt &&
3798                             rdtype != dns_rdatatype_key &&
3799                             rdtype != dns_rdatatype_cname) {
3800                                 /*
3801                                  * We've found a type that isn't
3802                                  * NSEC, KEY, CNAME, or one of their
3803                                  * signatures.  Is it active and extant?
3804                                  */
3805                                 do {
3806                                         if (header->serial <= serial &&
3807                                             !IGNORE(header)) {
3808                                                 /*
3809                                                  * Is this a "this rdataset
3810                                                  * doesn't exist" record?
3811                                                  */
3812                                                 if (NONEXISTENT(header))
3813                                                         header = NULL;
3814                                                 break;
3815                                         } else
3816                                                 header = header->down;
3817                                 } while (header != NULL);
3818                                 if (header != NULL)
3819                                         other_data = ISC_TRUE;
3820                         }
3821                 }
3822         }
3823
3824         if (cname && other_data)
3825                 return (ISC_TRUE);
3826
3827         return (ISC_FALSE);
3828 }
3829
3830 static isc_result_t
3831 add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
3832     rdatasetheader_t *newheader, unsigned int options, isc_boolean_t loading,
3833     dns_rdataset_t *addedrdataset, isc_stdtime_t now)
3834 {
3835         rbtdb_changed_t *changed = NULL;
3836         rdatasetheader_t *topheader, *topheader_prev, *header;
3837         unsigned char *merged;
3838         isc_result_t result;
3839         isc_boolean_t header_nx;
3840         isc_boolean_t newheader_nx;
3841         isc_boolean_t merge;
3842         dns_rdatatype_t nsectype, rdtype, covers;
3843         dns_trust_t trust;
3844
3845         /*
3846          * Add an rdatasetheader_t to a node.
3847          */
3848
3849         /*
3850          * Caller must be holding the node lock.
3851          */
3852
3853         if ((options & DNS_DBADD_MERGE) != 0) {
3854                 REQUIRE(rbtversion != NULL);
3855                 merge = ISC_TRUE;
3856         } else
3857                 merge = ISC_FALSE;
3858
3859         if ((options & DNS_DBADD_FORCE) != 0)
3860                 trust = dns_trust_ultimate;
3861         else
3862                 trust = newheader->trust;
3863
3864         if (rbtversion != NULL && !loading) {
3865                 /*
3866                  * We always add a changed record, even if no changes end up
3867                  * being made to this node, because it's harmless and
3868                  * simplifies the code.
3869                  */
3870                 changed = add_changed(rbtdb, rbtversion, rbtnode);
3871                 if (changed == NULL) {
3872                         free_rdataset(rbtdb->common.mctx, newheader);
3873                         return (ISC_R_NOMEMORY);
3874                 }
3875         }
3876
3877         newheader_nx = NONEXISTENT(newheader) ? ISC_TRUE : ISC_FALSE;
3878         topheader_prev = NULL;
3879
3880         nsectype = 0;
3881         if (rbtversion == NULL && !newheader_nx) {
3882                 rdtype = RBTDB_RDATATYPE_BASE(newheader->type);
3883                 if (rdtype == 0) {
3884                         /*
3885                          * We're adding a negative cache entry.
3886                          */
3887                         covers = RBTDB_RDATATYPE_EXT(newheader->type);
3888                         if (covers == dns_rdatatype_any) {
3889                                 /*
3890                                  * We're adding an NXDOMAIN negative cache
3891                                  * entry.
3892                                  *
3893                                  * We make all other data stale so that the
3894                                  * only rdataset that can be found at this
3895                                  * node is the NXDOMAIN negative cache entry.
3896                                  */
3897                                 for (topheader = rbtnode->data;
3898                                      topheader != NULL;
3899                                      topheader = topheader->next) {
3900                                         topheader->ttl = 0;
3901                                         topheader->attributes |=
3902                                                 RDATASET_ATTR_STALE;
3903                                 }
3904                                 rbtnode->dirty = 1;
3905                                 goto find_header;
3906                         }
3907                         nsectype = RBTDB_RDATATYPE_VALUE(covers, 0);
3908                 } else {
3909                         /*
3910                          * We're adding something that isn't a
3911                          * negative cache entry.  Look for an extant
3912                          * non-stale NXDOMAIN negative cache entry.
3913                          */
3914                         for (topheader = rbtnode->data;
3915                              topheader != NULL;
3916                              topheader = topheader->next) {
3917                                 if (NXDOMAIN(topheader))
3918                                         break;
3919                         }
3920                         if (topheader != NULL && EXISTS(topheader) &&
3921                             topheader->ttl > now) {
3922                                 /*
3923                                  * Found one.
3924                                  */
3925                                 if (trust < topheader->trust) {
3926                                         /*
3927                                          * The NXDOMAIN is more trusted.
3928                                          */
3929                                         free_rdataset(rbtdb->common.mctx,
3930                                                       newheader);
3931                                         if (addedrdataset != NULL)
3932                                                 bind_rdataset(rbtdb, rbtnode,
3933                                                               topheader, now,
3934                                                               addedrdataset);
3935                                         return (DNS_R_UNCHANGED);
3936                                 }
3937                                 /*
3938                                  * The new rdataset is better.  Expire the
3939                                  * NXDOMAIN.
3940                                  */
3941                                 topheader->ttl = 0;
3942                                 topheader->attributes |= RDATASET_ATTR_STALE;
3943                                 rbtnode->dirty = 1;
3944                                 topheader = NULL;
3945                                 goto find_header;
3946                         }
3947                         nsectype = RBTDB_RDATATYPE_VALUE(0, rdtype);
3948                 }
3949         }
3950
3951         for (topheader = rbtnode->data;
3952              topheader != NULL;
3953              topheader = topheader->next) {
3954                 if (topheader->type == newheader->type ||
3955                     topheader->type == nsectype)
3956                         break;
3957                 topheader_prev = topheader;
3958         }
3959
3960  find_header:
3961         /*
3962          * If header isn't NULL, we've found the right type.  There may be
3963          * IGNORE rdatasets between the top of the chain and the first real
3964          * data.  We skip over them.
3965          */
3966         header = topheader;
3967         while (header != NULL && IGNORE(header))
3968                 header = header->down;
3969         if (header != NULL) {
3970                 header_nx = NONEXISTENT(header) ? ISC_TRUE : ISC_FALSE;
3971
3972                 /*
3973                  * Deleting an already non-existent rdataset has no effect.
3974                  */
3975                 if (header_nx && newheader_nx) {
3976                         free_rdataset(rbtdb->common.mctx, newheader);
3977                         return (DNS_R_UNCHANGED);
3978                 }
3979
3980                 /*
3981                  * Trying to add an rdataset with lower trust to a cache DB
3982                  * has no effect, provided that the cache data isn't stale.
3983                  */
3984                 if (rbtversion == NULL && trust < header->trust &&
3985                     (header->ttl > now || header_nx)) {
3986                         free_rdataset(rbtdb->common.mctx, newheader);
3987                         if (addedrdataset != NULL)
3988                                 bind_rdataset(rbtdb, rbtnode, header, now,
3989                                               addedrdataset);
3990                         return (DNS_R_UNCHANGED);
3991                 }
3992
3993                 /*
3994                  * Don't merge if a nonexistent rdataset is involved.
3995                  */
3996                 if (merge && (header_nx || newheader_nx))
3997                         merge = ISC_FALSE;
3998
3999                 /*
4000                  * If 'merge' is ISC_TRUE, we'll try to create a new rdataset
4001                  * that is the union of 'newheader' and 'header'.
4002                  */
4003                 if (merge) {
4004                         unsigned int flags = 0;
4005                         INSIST(rbtversion->serial >= header->serial);
4006                         merged = NULL;
4007                         result = ISC_R_SUCCESS;
4008                         
4009                         if ((options & DNS_DBADD_EXACT) != 0)
4010                                 flags |= DNS_RDATASLAB_EXACT;
4011                         if ((options & DNS_DBADD_EXACTTTL) != 0 &&
4012                              newheader->ttl != header->ttl)
4013                                         result = DNS_R_NOTEXACT;
4014                         else if (newheader->ttl != header->ttl)
4015                                 flags |= DNS_RDATASLAB_FORCE;
4016                         if (result == ISC_R_SUCCESS)
4017                                 result = dns_rdataslab_merge(
4018                                              (unsigned char *)header,
4019                                              (unsigned char *)newheader,
4020                                              (unsigned int)(sizeof(*newheader)),
4021                                              rbtdb->common.mctx,
4022                                              rbtdb->common.rdclass,
4023                                              (dns_rdatatype_t)header->type,
4024                                              flags, &merged);
4025                         if (result == ISC_R_SUCCESS) {
4026                                 /*
4027                                  * If 'header' has the same serial number as
4028                                  * we do, we could clean it up now if we knew
4029                                  * that our caller had no references to it.
4030                                  * We don't know this, however, so we leave it
4031                                  * alone.  It will get cleaned up when
4032                                  * clean_zone_node() runs.
4033                                  */
4034                                 free_rdataset(rbtdb->common.mctx, newheader);
4035                                 newheader = (rdatasetheader_t *)merged;
4036                         } else {
4037                                 free_rdataset(rbtdb->common.mctx, newheader);
4038                                 return (result);
4039                         }
4040                 }
4041                 /*
4042                  * Don't replace existing NS, A and AAAA RRsets
4043                  * in the cache if they are already exist.  This
4044                  * prevents named being locked to old servers.
4045                  * Don't lower trust of existing record if the
4046                  * update is forced.
4047                  */
4048                 if (IS_CACHE(rbtdb) && header->ttl > now &&
4049                     header->type == dns_rdatatype_ns &&
4050                     !header_nx && !newheader_nx &&
4051                     header->trust >= newheader->trust &&
4052                     dns_rdataslab_equalx((unsigned char *)header,
4053                                          (unsigned char *)newheader,
4054                                          (unsigned int)(sizeof(*newheader)),
4055                                          rbtdb->common.rdclass,
4056                                          (dns_rdatatype_t)header->type)) {
4057                         /*
4058                          * Honour the new ttl if it is less than the
4059                          * older one.
4060                          */
4061                         if (header->ttl > newheader->ttl)
4062                                 header->ttl = newheader->ttl;
4063                         if (header->noqname == NULL &&
4064                             newheader->noqname != NULL) {
4065                                 header->noqname = newheader->noqname;
4066                                 newheader->noqname = NULL;
4067                         }
4068                         free_rdataset(rbtdb->common.mctx, newheader);
4069                         if (addedrdataset != NULL)
4070                                 bind_rdataset(rbtdb, rbtnode, header, now,
4071                                               addedrdataset);
4072                         return (ISC_R_SUCCESS);
4073                 }
4074                 if (IS_CACHE(rbtdb) && header->ttl > now &&
4075                     (header->type == dns_rdatatype_a ||
4076                      header->type == dns_rdatatype_aaaa) &&
4077                     !header_nx && !newheader_nx &&
4078                     header->trust >= newheader->trust &&
4079                     dns_rdataslab_equal((unsigned char *)header,
4080                                         (unsigned char *)newheader,
4081                                         (unsigned int)(sizeof(*newheader)))) {
4082                         /*
4083                          * Honour the new ttl if it is less than the
4084                          * older one.
4085                          */
4086                         if (header->ttl > newheader->ttl)
4087                                 header->ttl = newheader->ttl;
4088                         if (header->noqname == NULL &&
4089                             newheader->noqname != NULL) {
4090                                 header->noqname = newheader->noqname;
4091                                 newheader->noqname = NULL;
4092                         }
4093                         free_rdataset(rbtdb->common.mctx, newheader);
4094                         if (addedrdataset != NULL)
4095                                 bind_rdataset(rbtdb, rbtnode, header, now,
4096                                               addedrdataset);
4097                         return (ISC_R_SUCCESS);
4098                 }
4099                 INSIST(rbtversion == NULL ||
4100                        rbtversion->serial >= topheader->serial);
4101                 if (topheader_prev != NULL)
4102                         topheader_prev->next = newheader;
4103                 else
4104                         rbtnode->data = newheader;
4105                 newheader->next = topheader->next;
4106                 if (loading) {
4107                         /*
4108                          * There are no other references to 'header' when
4109                          * loading, so we MAY clean up 'header' now.
4110                          * Since we don't generate changed records when
4111                          * loading, we MUST clean up 'header' now.
4112                          */
4113                         newheader->down = NULL;
4114                         free_rdataset(rbtdb->common.mctx, header);
4115                 } else {
4116                         newheader->down = topheader;
4117                         topheader->next = newheader;
4118                         rbtnode->dirty = 1;
4119                         if (changed != NULL)
4120                                 changed->dirty = ISC_TRUE;
4121                 }
4122         } else {
4123                 /*
4124                  * No non-IGNORED rdatasets of the given type exist at
4125                  * this node.
4126                  */
4127
4128                 /*
4129                  * If we're trying to delete the type, don't bother.
4130                  */
4131                 if (newheader_nx) {
4132                         free_rdataset(rbtdb->common.mctx, newheader);
4133                         return (DNS_R_UNCHANGED);
4134                 }
4135
4136                 if (topheader != NULL) {
4137                         /*
4138                          * We have an list of rdatasets of the given type,
4139                          * but they're all marked IGNORE.  We simply insert
4140                          * the new rdataset at the head of the list.
4141                          *
4142                          * Ignored rdatasets cannot occur during loading, so
4143                          * we INSIST on it.
4144                          */
4145                         INSIST(!loading);
4146                         INSIST(rbtversion == NULL ||
4147                                rbtversion->serial >= topheader->serial);
4148                         if (topheader_prev != NULL)
4149                                 topheader_prev->next = newheader;
4150                         else
4151                                 rbtnode->data = newheader;
4152                         newheader->next = topheader->next;
4153                         newheader->down = topheader;
4154                         topheader->next = newheader;
4155                         rbtnode->dirty = 1;
4156                         if (changed != NULL)
4157                                 changed->dirty = ISC_TRUE;
4158                 } else {
4159                         /*
4160                          * No rdatasets of the given type exist at the node.
4161                          */
4162                         newheader->next = rbtnode->data;
4163                         newheader->down = NULL;
4164                         rbtnode->data = newheader;
4165                 }
4166         }
4167
4168         /*
4169          * Check if the node now contains CNAME and other data.
4170          */
4171         if (rbtversion != NULL &&
4172             cname_and_other_data(rbtnode, rbtversion->serial))
4173                 return (DNS_R_CNAMEANDOTHER);
4174
4175         if (addedrdataset != NULL)
4176                 bind_rdataset(rbtdb, rbtnode, newheader, now, addedrdataset);
4177
4178         return (ISC_R_SUCCESS);
4179 }
4180
4181 static inline isc_boolean_t
4182 delegating_type(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
4183                 rbtdb_rdatatype_t type)
4184 {
4185         if (IS_CACHE(rbtdb)) {
4186                 if (type == dns_rdatatype_dname)
4187                         return (ISC_TRUE);
4188                 else
4189                         return (ISC_FALSE);
4190         } else if (type == dns_rdatatype_dname ||
4191                    (type == dns_rdatatype_ns &&
4192                     (node != rbtdb->origin_node || IS_STUB(rbtdb))))
4193                 return (ISC_TRUE);
4194         return (ISC_FALSE);
4195 }
4196
4197 static inline isc_result_t
4198 addnoqname(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
4199            dns_rdataset_t *rdataset)
4200 {
4201         struct noqname *noqname;
4202         isc_mem_t *mctx = rbtdb->common.mctx;
4203         dns_name_t name;
4204         dns_rdataset_t nsec, nsecsig;
4205         isc_result_t result;
4206         isc_region_t r;
4207
4208         dns_name_init(&name, NULL);
4209         dns_rdataset_init(&nsec);
4210         dns_rdataset_init(&nsecsig);
4211
4212         result = dns_rdataset_getnoqname(rdataset, &name, &nsec, &nsecsig);
4213         RUNTIME_CHECK(result == ISC_R_SUCCESS);
4214
4215         noqname = isc_mem_get(mctx, sizeof(*noqname));
4216         if (noqname == NULL) {
4217                 result = ISC_R_NOMEMORY;
4218                 goto cleanup;
4219         }
4220         dns_name_init(&noqname->name, NULL);
4221         noqname->nsec = NULL;
4222         noqname->nsecsig = NULL;
4223         result = dns_name_dup(&name, mctx, &noqname->name);
4224         if (result != ISC_R_SUCCESS)
4225                 goto cleanup;
4226         result = dns_rdataslab_fromrdataset(&nsec, mctx, &r, 0);
4227         if (result != ISC_R_SUCCESS)
4228                 goto cleanup;
4229         noqname->nsec = r.base;
4230         result = dns_rdataslab_fromrdataset(&nsecsig, mctx, &r, 0);
4231         if (result != ISC_R_SUCCESS)
4232                 goto cleanup;
4233         noqname->nsecsig = r.base;
4234         dns_rdataset_disassociate(&nsec);
4235         dns_rdataset_disassociate(&nsecsig);
4236         newheader->noqname = noqname;
4237         return (ISC_R_SUCCESS);
4238
4239 cleanup:
4240         dns_rdataset_disassociate(&nsec);
4241         dns_rdataset_disassociate(&nsecsig);
4242         free_noqname(mctx, &noqname);
4243         return(result);
4244 }
4245
4246 static isc_result_t
4247 addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
4248             isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options,
4249             dns_rdataset_t *addedrdataset)
4250 {
4251         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4252         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
4253         rbtdb_version_t *rbtversion = version;
4254         isc_region_t region;
4255         rdatasetheader_t *newheader;
4256         isc_result_t result;
4257         isc_boolean_t delegating;
4258
4259         REQUIRE(VALID_RBTDB(rbtdb));
4260
4261         if (rbtversion == NULL) {
4262                 if (now == 0)
4263                         isc_stdtime_get(&now);
4264         } else
4265                 now = 0;
4266
4267         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
4268                                             &region,
4269                                             sizeof(rdatasetheader_t));
4270         if (result != ISC_R_SUCCESS)
4271                 return (result);
4272
4273         newheader = (rdatasetheader_t *)region.base;
4274         newheader->ttl = rdataset->ttl + now;
4275         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
4276                                                 rdataset->covers);
4277         newheader->attributes = 0;
4278         newheader->noqname = NULL;
4279         newheader->count = 0;
4280         newheader->trust = rdataset->trust;
4281         if (rbtversion != NULL) {
4282                 newheader->serial = rbtversion->serial;
4283                 now = 0;
4284         } else {
4285                 newheader->serial = 1;
4286                 if ((rdataset->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0)
4287                         newheader->attributes |= RDATASET_ATTR_NXDOMAIN;
4288                 if ((rdataset->attributes & DNS_RDATASETATTR_NOQNAME) != 0) {
4289                         result = addnoqname(rbtdb, newheader, rdataset);
4290                         if (result != ISC_R_SUCCESS) {
4291                                 free_rdataset(rbtdb->common.mctx, newheader);
4292                                 return (result);
4293                         }
4294                 }
4295         }
4296
4297         /*
4298          * If we're adding a delegation type (e.g. NS or DNAME for a zone,
4299          * just DNAME for the cache), then we need to set the callback bit
4300          * on the node, and to do that we must be holding an exclusive lock
4301          * on the tree.
4302          */
4303         if (delegating_type(rbtdb, rbtnode, rdataset->type)) {
4304                 delegating = ISC_TRUE;
4305                 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
4306         } else
4307                 delegating = ISC_FALSE;
4308
4309         LOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
4310
4311         result = add(rbtdb, rbtnode, rbtversion, newheader, options, ISC_FALSE,
4312                      addedrdataset, now);
4313         if (result == ISC_R_SUCCESS && delegating)
4314                 rbtnode->find_callback = 1;
4315
4316         UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
4317
4318         if (delegating)
4319                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
4320
4321         return (result);
4322 }
4323
4324 static isc_result_t
4325 subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
4326                  dns_rdataset_t *rdataset, unsigned int options,
4327                  dns_rdataset_t *newrdataset)
4328 {
4329         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4330         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
4331         rbtdb_version_t *rbtversion = version;
4332         rdatasetheader_t *topheader, *topheader_prev, *header, *newheader;
4333         unsigned char *subresult;
4334         isc_region_t region;
4335         isc_result_t result;
4336         rbtdb_changed_t *changed;
4337
4338         REQUIRE(VALID_RBTDB(rbtdb));
4339
4340         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
4341                                             &region,
4342                                             sizeof(rdatasetheader_t));
4343         if (result != ISC_R_SUCCESS)
4344                 return (result);
4345         newheader = (rdatasetheader_t *)region.base;
4346         newheader->ttl = rdataset->ttl;
4347         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
4348                                                 rdataset->covers);
4349         newheader->attributes = 0;
4350         newheader->serial = rbtversion->serial;
4351         newheader->trust = 0;
4352         newheader->noqname = NULL;
4353         newheader->count = 0;
4354
4355         LOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
4356
4357         changed = add_changed(rbtdb, rbtversion, rbtnode);
4358         if (changed == NULL) {
4359                 free_rdataset(rbtdb->common.mctx, newheader);
4360                 UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
4361                 return (ISC_R_NOMEMORY);
4362         }
4363
4364         topheader_prev = NULL;
4365         for (topheader = rbtnode->data;
4366              topheader != NULL;
4367              topheader = topheader->next) {
4368                 if (topheader->type == newheader->type)
4369                         break;
4370                 topheader_prev = topheader;
4371         }
4372         /*
4373          * If header isn't NULL, we've found the right type.  There may be
4374          * IGNORE rdatasets between the top of the chain and the first real
4375          * data.  We skip over them.
4376          */
4377         header = topheader;
4378         while (header != NULL && IGNORE(header))
4379                 header = header->down;
4380         if (header != NULL && EXISTS(header)) {
4381                 unsigned int flags = 0;
4382                 subresult = NULL;
4383                 result = ISC_R_SUCCESS;
4384                 if ((options & DNS_DBSUB_EXACT) != 0) {
4385                         flags |= DNS_RDATASLAB_EXACT;
4386                         if (newheader->ttl != header->ttl)
4387                                 result = DNS_R_NOTEXACT;
4388                 }
4389                 if (result == ISC_R_SUCCESS)
4390                         result = dns_rdataslab_subtract(
4391                                         (unsigned char *)header,
4392                                         (unsigned char *)newheader,
4393                                         (unsigned int)(sizeof(*newheader)),
4394                                         rbtdb->common.mctx,
4395                                         rbtdb->common.rdclass,
4396                                         (dns_rdatatype_t)header->type,
4397                                         flags, &subresult);
4398                 if (result == ISC_R_SUCCESS) {
4399                         free_rdataset(rbtdb->common.mctx, newheader);
4400                         newheader = (rdatasetheader_t *)subresult;
4401                         /*
4402                          * We have to set the serial since the rdataslab
4403                          * subtraction routine copies the reserved portion of
4404                          * header, not newheader.
4405                          */
4406                         newheader->serial = rbtversion->serial;
4407                 } else if (result == DNS_R_NXRRSET) {
4408                         /*
4409                          * This subtraction would remove all of the rdata;
4410                          * add a nonexistent header instead.
4411                          */
4412                         free_rdataset(rbtdb->common.mctx, newheader);
4413                         newheader = isc_mem_get(rbtdb->common.mctx,
4414                                                 sizeof(*newheader));
4415                         if (newheader == NULL) {
4416                                 result = ISC_R_NOMEMORY;
4417                                 goto unlock;
4418                         }
4419                         newheader->ttl = 0;
4420                         newheader->type = topheader->type;
4421                         newheader->attributes = RDATASET_ATTR_NONEXISTENT;
4422                         newheader->trust = 0;
4423                         newheader->serial = rbtversion->serial;
4424                         newheader->noqname = NULL;
4425                         newheader->count = 0;
4426                 } else {
4427                         free_rdataset(rbtdb->common.mctx, newheader);
4428                         goto unlock;
4429                 }
4430
4431                 /*
4432                  * If we're here, we want to link newheader in front of
4433                  * topheader.
4434                  */
4435                 INSIST(rbtversion->serial >= topheader->serial);
4436                 if (topheader_prev != NULL)
4437                         topheader_prev->next = newheader;
4438                 else
4439                         rbtnode->data = newheader;
4440                 newheader->next = topheader->next;
4441                 newheader->down = topheader;
4442                 topheader->next = newheader;
4443                 rbtnode->dirty = 1;
4444                 changed->dirty = ISC_TRUE;
4445         } else {
4446                 /*
4447                  * The rdataset doesn't exist, so we don't need to do anything
4448                  * to satisfy the deletion request.
4449                  */
4450                 free_rdataset(rbtdb->common.mctx, newheader);
4451                 if ((options & DNS_DBSUB_EXACT) != 0)
4452                         result = DNS_R_NOTEXACT;
4453                 else
4454                         result = DNS_R_UNCHANGED;                       
4455         }
4456
4457         if (result == ISC_R_SUCCESS && newrdataset != NULL)
4458                 bind_rdataset(rbtdb, rbtnode, newheader, 0, newrdataset);
4459
4460  unlock:
4461         UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
4462
4463         return (result);
4464 }
4465
4466 static isc_result_t
4467 deleterdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
4468                dns_rdatatype_t type, dns_rdatatype_t covers)
4469 {
4470         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4471         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
4472         rbtdb_version_t *rbtversion = version;
4473         isc_result_t result;
4474         rdatasetheader_t *newheader;
4475
4476         REQUIRE(VALID_RBTDB(rbtdb));
4477
4478         if (type == dns_rdatatype_any)
4479                 return (ISC_R_NOTIMPLEMENTED);
4480         if (type == dns_rdatatype_rrsig && covers == 0)
4481                 return (ISC_R_NOTIMPLEMENTED);
4482
4483         newheader = isc_mem_get(rbtdb->common.mctx, sizeof(*newheader));
4484         if (newheader == NULL)
4485                 return (ISC_R_NOMEMORY);
4486         newheader->ttl = 0;
4487         newheader->type = RBTDB_RDATATYPE_VALUE(type, covers);
4488         newheader->attributes = RDATASET_ATTR_NONEXISTENT;
4489         newheader->trust = 0;
4490         newheader->noqname = NULL;
4491         if (rbtversion != NULL)
4492                 newheader->serial = rbtversion->serial;
4493         else
4494                 newheader->serial = 0;
4495         newheader->count = 0;
4496
4497         LOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
4498
4499         result = add(rbtdb, rbtnode, rbtversion, newheader, DNS_DBADD_FORCE,
4500                      ISC_FALSE, NULL, 0);
4501
4502         UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
4503
4504         return (result);
4505 }
4506
4507 static isc_result_t
4508 loading_addrdataset(void *arg, dns_name_t *name, dns_rdataset_t *rdataset) {
4509         rbtdb_load_t *loadctx = arg;
4510         dns_rbtdb_t *rbtdb = loadctx->rbtdb;
4511         dns_rbtnode_t *node;
4512         isc_result_t result;
4513         isc_region_t region;
4514         rdatasetheader_t *newheader;
4515
4516         /*
4517          * This routine does no node locking.  See comments in
4518          * 'load' below for more information on loading and
4519          * locking.
4520          */
4521
4522
4523         /*
4524          * SOA records are only allowed at top of zone.
4525          */
4526         if (rdataset->type == dns_rdatatype_soa &&
4527             !IS_CACHE(rbtdb) && !dns_name_equal(name, &rbtdb->common.origin))
4528                 return (DNS_R_NOTZONETOP);
4529
4530         add_empty_wildcards(rbtdb, name);
4531
4532         if (dns_name_iswildcard(name)) {
4533                 /*
4534                  * NS record owners cannot legally be wild cards.
4535                  */
4536                 if (rdataset->type == dns_rdatatype_ns)
4537                         return (DNS_R_INVALIDNS);
4538                 result = add_wildcard_magic(rbtdb, name);
4539                 if (result != ISC_R_SUCCESS)
4540                         return (result);
4541         }
4542
4543         node = NULL;
4544         result = dns_rbt_addnode(rbtdb->tree, name, &node);
4545         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
4546                 return (result);
4547         if (result != ISC_R_EXISTS) {
4548                 dns_name_t foundname;
4549                 dns_name_init(&foundname, NULL);
4550                 dns_rbt_namefromnode(node, &foundname);
4551 #ifdef DNS_RBT_USEHASH
4552                 node->locknum = node->hashval % rbtdb->node_lock_count;
4553 #else
4554                 node->locknum = dns_name_hash(&foundname, ISC_TRUE) %
4555                         rbtdb->node_lock_count;
4556 #endif
4557         }
4558
4559         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
4560                                             &region,
4561                                             sizeof(rdatasetheader_t));
4562         if (result != ISC_R_SUCCESS)
4563                 return (result);
4564         newheader = (rdatasetheader_t *)region.base;
4565         newheader->ttl = rdataset->ttl + loadctx->now; /* XXX overflow check */
4566         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
4567                                                 rdataset->covers);
4568         newheader->attributes = 0;
4569         newheader->trust = rdataset->trust;
4570         newheader->serial = 1;
4571         newheader->noqname = NULL;
4572         newheader->count = 0;
4573
4574         result = add(rbtdb, node, rbtdb->current_version, newheader,
4575                      DNS_DBADD_MERGE, ISC_TRUE, NULL, 0);
4576         if (result == ISC_R_SUCCESS &&
4577             delegating_type(rbtdb, node, rdataset->type))
4578                 node->find_callback = 1;
4579         else if (result == DNS_R_UNCHANGED)
4580                 result = ISC_R_SUCCESS;
4581
4582         return (result);
4583 }
4584
4585 static isc_result_t
4586 beginload(dns_db_t *db, dns_addrdatasetfunc_t *addp, dns_dbload_t **dbloadp) {
4587         rbtdb_load_t *loadctx;
4588         dns_rbtdb_t *rbtdb;
4589
4590         rbtdb = (dns_rbtdb_t *)db;
4591
4592         REQUIRE(VALID_RBTDB(rbtdb));
4593
4594         loadctx = isc_mem_get(rbtdb->common.mctx, sizeof(*loadctx));
4595         if (loadctx == NULL)
4596                 return (ISC_R_NOMEMORY);
4597
4598         loadctx->rbtdb = rbtdb;
4599         if (IS_CACHE(rbtdb))
4600                 isc_stdtime_get(&loadctx->now);
4601         else
4602                 loadctx->now = 0;
4603
4604         LOCK(&rbtdb->lock);
4605
4606         REQUIRE((rbtdb->attributes & (RBTDB_ATTR_LOADED|RBTDB_ATTR_LOADING))
4607                 == 0);
4608         rbtdb->attributes |= RBTDB_ATTR_LOADING;
4609
4610         UNLOCK(&rbtdb->lock);
4611
4612         *addp = loading_addrdataset;
4613         *dbloadp = loadctx;
4614
4615         return (ISC_R_SUCCESS);
4616 }
4617
4618 static isc_boolean_t
4619 iszonesecure(dns_db_t *db, dns_dbnode_t *origin) {
4620         dns_rdataset_t keyset;
4621         dns_rdataset_t nsecset, signsecset;
4622         isc_boolean_t haszonekey = ISC_FALSE;
4623         isc_boolean_t hasnsec = ISC_FALSE;
4624         isc_result_t result;
4625
4626         dns_rdataset_init(&keyset);
4627         result = dns_db_findrdataset(db, origin, NULL, dns_rdatatype_dnskey, 0,
4628                                      0, &keyset, NULL);
4629         if (result == ISC_R_SUCCESS) {
4630                 dns_rdata_t keyrdata = DNS_RDATA_INIT;
4631                 result = dns_rdataset_first(&keyset);
4632                 while (result == ISC_R_SUCCESS) {
4633                         dns_rdataset_current(&keyset, &keyrdata);
4634                         if (dns_zonekey_iszonekey(&keyrdata)) {
4635                                 haszonekey = ISC_TRUE;
4636                                 break;
4637                         }
4638                         result = dns_rdataset_next(&keyset);
4639                 }
4640                 dns_rdataset_disassociate(&keyset);
4641         }
4642         if (!haszonekey)
4643                 return (ISC_FALSE);
4644
4645         dns_rdataset_init(&nsecset);
4646         dns_rdataset_init(&signsecset);
4647         result = dns_db_findrdataset(db, origin, NULL, dns_rdatatype_nsec, 0,
4648                                      0, &nsecset, &signsecset);
4649         if (result == ISC_R_SUCCESS) {
4650                 if (dns_rdataset_isassociated(&signsecset)) {
4651                         hasnsec = ISC_TRUE;
4652                         dns_rdataset_disassociate(&signsecset);
4653                 }
4654                 dns_rdataset_disassociate(&nsecset);
4655         }
4656         return (hasnsec);
4657
4658 }
4659
4660 static isc_result_t
4661 endload(dns_db_t *db, dns_dbload_t **dbloadp) {
4662         rbtdb_load_t *loadctx;
4663         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4664
4665         REQUIRE(VALID_RBTDB(rbtdb));
4666         REQUIRE(dbloadp != NULL);
4667         loadctx = *dbloadp;
4668         REQUIRE(loadctx->rbtdb == rbtdb);
4669
4670         LOCK(&rbtdb->lock);
4671
4672         REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADING) != 0);
4673         REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADED) == 0);
4674
4675         rbtdb->attributes &= ~RBTDB_ATTR_LOADING;
4676         rbtdb->attributes |= RBTDB_ATTR_LOADED;
4677
4678         UNLOCK(&rbtdb->lock);
4679
4680         /*
4681          * If there's a KEY rdataset at the zone origin containing a
4682          * zone key, we consider the zone secure.
4683          */
4684         if (! IS_CACHE(rbtdb))
4685                 rbtdb->secure = iszonesecure(db, rbtdb->origin_node);
4686
4687         *dbloadp = NULL;
4688
4689         isc_mem_put(rbtdb->common.mctx, loadctx, sizeof(*loadctx));
4690
4691         return (ISC_R_SUCCESS);
4692 }
4693
4694 static isc_result_t
4695 dump(dns_db_t *db, dns_dbversion_t *version, const char *filename) {
4696         dns_rbtdb_t *rbtdb;
4697
4698         rbtdb = (dns_rbtdb_t *)db;
4699
4700         REQUIRE(VALID_RBTDB(rbtdb));
4701
4702         return (dns_master_dump(rbtdb->common.mctx, db, version,
4703                                 &dns_master_style_default,
4704                                 filename));
4705 }
4706
4707 static void
4708 delete_callback(void *data, void *arg) {
4709         dns_rbtdb_t *rbtdb = arg;
4710         rdatasetheader_t *current, *next;
4711
4712         for (current = data; current != NULL; current = next) {
4713                 next = current->next;
4714                 free_rdataset(rbtdb->common.mctx, current);
4715         }
4716 }
4717
4718 static isc_boolean_t
4719 issecure(dns_db_t *db) {
4720         dns_rbtdb_t *rbtdb;
4721         isc_boolean_t secure;
4722
4723         rbtdb = (dns_rbtdb_t *)db;
4724
4725         REQUIRE(VALID_RBTDB(rbtdb));
4726
4727         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
4728         secure = rbtdb->secure;
4729         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
4730
4731         return (secure);
4732 }
4733
4734 static unsigned int
4735 nodecount(dns_db_t *db) {
4736         dns_rbtdb_t *rbtdb;
4737         unsigned int count;
4738
4739         rbtdb = (dns_rbtdb_t *)db;
4740
4741         REQUIRE(VALID_RBTDB(rbtdb));
4742
4743         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
4744         count = dns_rbt_nodecount(rbtdb->tree);
4745         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
4746
4747         return (count);
4748 }
4749
4750 static void
4751 settask(dns_db_t *db, isc_task_t *task) {
4752         dns_rbtdb_t *rbtdb;
4753
4754         rbtdb = (dns_rbtdb_t *)db;
4755
4756         REQUIRE(VALID_RBTDB(rbtdb));
4757
4758         LOCK(&rbtdb->lock);
4759         if (rbtdb->task != NULL)
4760                 isc_task_detach(&rbtdb->task);
4761         if (task != NULL)
4762                 isc_task_attach(task, &rbtdb->task);
4763         UNLOCK(&rbtdb->lock);
4764 }
4765
4766 static isc_boolean_t
4767 ispersistent(dns_db_t *db) {
4768         UNUSED(db);
4769         return (ISC_FALSE);
4770 }
4771
4772 static dns_dbmethods_t zone_methods = {
4773         attach,
4774         detach,
4775         beginload,
4776         endload,
4777         dump,
4778         currentversion,
4779         newversion,
4780         attachversion,
4781         closeversion,
4782         findnode,
4783         zone_find,
4784         zone_findzonecut,
4785         attachnode,
4786         detachnode,
4787         expirenode,
4788         printnode,
4789         createiterator,
4790         zone_findrdataset,
4791         allrdatasets,
4792         addrdataset,
4793         subtractrdataset,
4794         deleterdataset,
4795         issecure,
4796         nodecount,
4797         ispersistent,
4798         overmem,
4799         settask
4800 };
4801
4802 static dns_dbmethods_t cache_methods = {
4803         attach,
4804         detach,
4805         beginload,
4806         endload,
4807         dump,
4808         currentversion,
4809         newversion,
4810         attachversion,
4811         closeversion,
4812         findnode,
4813         cache_find,
4814         cache_findzonecut,
4815         attachnode,
4816         detachnode,
4817         expirenode,
4818         printnode,
4819         createiterator,
4820         cache_findrdataset,
4821         allrdatasets,
4822         addrdataset,
4823         subtractrdataset,
4824         deleterdataset,
4825         issecure,
4826         nodecount,
4827         ispersistent,
4828         overmem,
4829         settask
4830 };
4831
4832 isc_result_t
4833 #ifdef DNS_RBTDB_VERSION64
4834 dns_rbtdb64_create
4835 #else
4836 dns_rbtdb_create
4837 #endif
4838                 (isc_mem_t *mctx, dns_name_t *origin, dns_dbtype_t type,
4839                  dns_rdataclass_t rdclass, unsigned int argc, char *argv[],
4840                  void *driverarg, dns_db_t **dbp)
4841 {
4842         dns_rbtdb_t *rbtdb;
4843         isc_result_t result;
4844         int i;
4845         dns_name_t name;
4846
4847         /* Keep the compiler happy. */
4848         UNUSED(argc);
4849         UNUSED(argv);
4850         UNUSED(driverarg);
4851
4852         rbtdb = isc_mem_get(mctx, sizeof(*rbtdb));
4853         if (rbtdb == NULL)
4854                 return (ISC_R_NOMEMORY);
4855         memset(rbtdb, '\0', sizeof(*rbtdb));
4856         dns_name_init(&rbtdb->common.origin, NULL);
4857         rbtdb->common.attributes = 0;
4858         if (type == dns_dbtype_cache) {
4859                 rbtdb->common.methods = &cache_methods;
4860                 rbtdb->common.attributes |= DNS_DBATTR_CACHE;
4861         } else if (type == dns_dbtype_stub) {
4862                 rbtdb->common.methods = &zone_methods;
4863                 rbtdb->common.attributes |= DNS_DBATTR_STUB;
4864         } else
4865                 rbtdb->common.methods = &zone_methods;
4866         rbtdb->common.rdclass = rdclass;
4867         rbtdb->common.mctx = NULL;
4868
4869         result = isc_mutex_init(&rbtdb->lock);
4870         if (result != ISC_R_SUCCESS) {
4871                 isc_mem_put(mctx, rbtdb, sizeof(*rbtdb));
4872                 UNEXPECTED_ERROR(__FILE__, __LINE__,
4873                                  "isc_mutex_init() failed: %s",
4874                                  isc_result_totext(result));
4875                 return (ISC_R_UNEXPECTED);
4876         }
4877
4878         result = isc_rwlock_init(&rbtdb->tree_lock, 0, 0);
4879         if (result != ISC_R_SUCCESS) {
4880                 DESTROYLOCK(&rbtdb->lock);
4881                 isc_mem_put(mctx, rbtdb, sizeof(*rbtdb));
4882                 UNEXPECTED_ERROR(__FILE__, __LINE__,
4883                                  "isc_rwlock_init() failed: %s",
4884                                  isc_result_totext(result));
4885                 return (ISC_R_UNEXPECTED);
4886         }
4887
4888         INSIST(rbtdb->node_lock_count < (1 << DNS_RBT_LOCKLENGTH));
4889
4890         if (rbtdb->node_lock_count == 0)
4891                 rbtdb->node_lock_count = DEFAULT_NODE_LOCK_COUNT;
4892         rbtdb->node_locks = isc_mem_get(mctx, rbtdb->node_lock_count *
4893                                         sizeof(rbtdb_nodelock_t));
4894         rbtdb->active = rbtdb->node_lock_count;
4895         for (i = 0; i < (int)(rbtdb->node_lock_count); i++) {
4896                 result = isc_mutex_init(&rbtdb->node_locks[i].lock);
4897                 if (result != ISC_R_SUCCESS) {
4898                         i--;
4899                         while (i >= 0) {
4900                                 DESTROYLOCK(&rbtdb->node_locks[i].lock);
4901                                 i--;
4902                         }
4903                         isc_mem_put(mctx, rbtdb->node_locks,
4904                                     rbtdb->node_lock_count *
4905                                     sizeof(rbtdb_nodelock_t));
4906                         isc_rwlock_destroy(&rbtdb->tree_lock);
4907                         DESTROYLOCK(&rbtdb->lock);
4908                         isc_mem_put(mctx, rbtdb, sizeof(*rbtdb));
4909                         UNEXPECTED_ERROR(__FILE__, __LINE__,
4910                                          "isc_mutex_init() failed: %s",
4911                                          isc_result_totext(result));
4912                         return (ISC_R_UNEXPECTED);
4913                 }
4914                 rbtdb->node_locks[i].references = 0;
4915                 rbtdb->node_locks[i].exiting = ISC_FALSE;
4916         }
4917
4918         /*
4919          * Attach to the mctx.  The database will persist so long as there
4920          * are references to it, and attaching to the mctx ensures that our
4921          * mctx won't disappear out from under us.
4922          */
4923         isc_mem_attach(mctx, &rbtdb->common.mctx);
4924
4925         /*
4926          * Must be initalized before free_rbtdb() is called.
4927          */
4928         isc_ondestroy_init(&rbtdb->common.ondest);
4929
4930         /*
4931          * Make a copy of the origin name.
4932          */
4933         result = dns_name_dupwithoffsets(origin, mctx, &rbtdb->common.origin);
4934         if (result != ISC_R_SUCCESS) {
4935                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
4936                 return (result);
4937         }
4938
4939         /*
4940          * Make the Red-Black Tree.
4941          */
4942         result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->tree);
4943         if (result != ISC_R_SUCCESS) {
4944                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
4945                 return (result);
4946         }
4947         /*
4948          * In order to set the node callback bit correctly in zone databases,
4949          * we need to know if the node has the origin name of the zone.
4950          * In loading_addrdataset() we could simply compare the new name
4951          * to the origin name, but this is expensive.  Also, we don't know the
4952          * node name in addrdataset(), so we need another way of knowing the
4953          * zone's top.
4954          *
4955          * We now explicitly create a node for the zone's origin, and then
4956          * we simply remember the node's address.  This is safe, because
4957          * the top-of-zone node can never be deleted, nor can its address
4958          * change.
4959          */
4960         if (! IS_CACHE(rbtdb)) {
4961                 rbtdb->origin_node = NULL;
4962                 result = dns_rbt_addnode(rbtdb->tree, &rbtdb->common.origin,
4963                                          &rbtdb->origin_node);
4964                 if (result != ISC_R_SUCCESS) {
4965                         INSIST(result != ISC_R_EXISTS);
4966                         free_rbtdb(rbtdb, ISC_FALSE, NULL);
4967                         return (result);
4968                 }
4969                 /*
4970                  * We need to give the origin node the right locknum.
4971                  */
4972                 dns_name_init(&name, NULL);
4973                 dns_rbt_namefromnode(rbtdb->origin_node, &name);
4974 #ifdef DNS_RBT_USEHASH
4975                 rbtdb->origin_node->locknum =
4976                         rbtdb->origin_node->hashval %
4977                         rbtdb->node_lock_count;
4978 #else
4979                 rbtdb->origin_node->locknum =
4980                         dns_name_hash(&name, ISC_TRUE) %
4981                         rbtdb->node_lock_count;
4982 #endif
4983         }
4984
4985         /*
4986          * Misc. Initialization.
4987          */
4988         isc_refcount_init(&rbtdb->references, 1);
4989         rbtdb->attributes = 0;
4990         rbtdb->secure = ISC_FALSE;
4991         rbtdb->overmem = ISC_FALSE;
4992         rbtdb->task = NULL;
4993
4994         /*
4995          * Version Initialization.
4996          */
4997         rbtdb->current_serial = 1;
4998         rbtdb->least_serial = 1;
4999         rbtdb->next_serial = 2;
5000         rbtdb->current_version = allocate_version(mctx, 1, 0, ISC_FALSE);
5001         if (rbtdb->current_version == NULL) {
5002                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
5003                 return (ISC_R_NOMEMORY);
5004         }
5005         rbtdb->future_version = NULL;
5006         ISC_LIST_INIT(rbtdb->open_versions);
5007
5008         rbtdb->common.magic = DNS_DB_MAGIC;
5009         rbtdb->common.impmagic = RBTDB_MAGIC;
5010
5011         *dbp = (dns_db_t *)rbtdb;
5012
5013         return (ISC_R_SUCCESS);
5014 }
5015
5016
5017 /*
5018  * Slabbed Rdataset Methods
5019  */
5020
5021 static void
5022 rdataset_disassociate(dns_rdataset_t *rdataset) {
5023         dns_db_t *db = rdataset->private1;
5024         dns_dbnode_t *node = rdataset->private2;
5025
5026         detachnode(db, &node);
5027 }
5028
5029 static isc_result_t
5030 rdataset_first(dns_rdataset_t *rdataset) {
5031         unsigned char *raw = rdataset->private3;
5032         unsigned int count;
5033
5034         count = raw[0] * 256 + raw[1];
5035         if (count == 0) {
5036                 rdataset->private5 = NULL;
5037                 return (ISC_R_NOMORE);
5038         }
5039         raw += 2;
5040         /*
5041          * The privateuint4 field is the number of rdata beyond the cursor
5042          * position, so we decrement the total count by one before storing
5043          * it.
5044          */
5045         count--;
5046         rdataset->privateuint4 = count;
5047         rdataset->private5 = raw;
5048
5049         return (ISC_R_SUCCESS);
5050 }
5051
5052 static isc_result_t
5053 rdataset_next(dns_rdataset_t *rdataset) {
5054         unsigned int count;
5055         unsigned int length;
5056         unsigned char *raw;
5057
5058         count = rdataset->privateuint4;
5059         if (count == 0)
5060                 return (ISC_R_NOMORE);
5061         count--;
5062         rdataset->privateuint4 = count;
5063         raw = rdataset->private5;
5064         length = raw[0] * 256 + raw[1];
5065         raw += length + 2;
5066         rdataset->private5 = raw;
5067
5068         return (ISC_R_SUCCESS);
5069 }
5070
5071 static void
5072 rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata) {
5073         unsigned char *raw = rdataset->private5;
5074         isc_region_t r;
5075
5076         REQUIRE(raw != NULL);
5077
5078         r.length = raw[0] * 256 + raw[1];
5079         raw += 2;
5080         r.base = raw;
5081         dns_rdata_fromregion(rdata, rdataset->rdclass, rdataset->type, &r);
5082 }
5083
5084 static void
5085 rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target) {
5086         dns_db_t *db = source->private1;
5087         dns_dbnode_t *node = source->private2;
5088         dns_dbnode_t *cloned_node = NULL;
5089
5090         attachnode(db, node, &cloned_node);
5091         *target = *source;
5092
5093         /*
5094          * Reset iterator state.
5095          */
5096         target->privateuint4 = 0;
5097         target->private5 = NULL;
5098 }
5099
5100 static unsigned int
5101 rdataset_count(dns_rdataset_t *rdataset) {
5102         unsigned char *raw = rdataset->private3;
5103         unsigned int count;
5104
5105         count = raw[0] * 256 + raw[1];
5106
5107         return (count);
5108 }
5109
5110 static isc_result_t
5111 rdataset_getnoqname(dns_rdataset_t *rdataset, dns_name_t *name,
5112                     dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
5113 {
5114         dns_db_t *db = rdataset->private1;
5115         dns_dbnode_t *node = rdataset->private2;
5116         dns_dbnode_t *cloned_node;
5117         struct noqname *noqname = rdataset->private6;
5118
5119         cloned_node = NULL;
5120         attachnode(db, node, &cloned_node);
5121         nsec->methods = &rdataset_methods;
5122         nsec->rdclass = db->rdclass;
5123         nsec->type = dns_rdatatype_nsec;
5124         nsec->covers = 0;
5125         nsec->ttl = rdataset->ttl;
5126         nsec->trust = rdataset->trust;
5127         nsec->private1 = rdataset->private1;
5128         nsec->private2 = rdataset->private2;
5129         nsec->private3 = noqname->nsec;
5130         nsec->privateuint4 = 0;
5131         nsec->private5 = NULL;
5132         nsec->private6 = NULL;
5133
5134         cloned_node = NULL;
5135         attachnode(db, node, &cloned_node);
5136         nsecsig->methods = &rdataset_methods;
5137         nsecsig->rdclass = db->rdclass;
5138         nsecsig->type = dns_rdatatype_rrsig;
5139         nsecsig->covers = dns_rdatatype_nsec;
5140         nsecsig->ttl = rdataset->ttl;
5141         nsecsig->trust = rdataset->trust;
5142         nsecsig->private1 = rdataset->private1;
5143         nsecsig->private2 = rdataset->private2;
5144         nsecsig->private3 = noqname->nsecsig;
5145         nsecsig->privateuint4 = 0;
5146         nsecsig->private5 = NULL;
5147         nsec->private6 = NULL;
5148
5149         dns_name_clone(&noqname->name, name);
5150
5151         return (ISC_R_SUCCESS);
5152 }
5153
5154 /*
5155  * Rdataset Iterator Methods
5156  */
5157
5158 static void
5159 rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp) {
5160         rbtdb_rdatasetiter_t *rbtiterator;
5161
5162         rbtiterator = (rbtdb_rdatasetiter_t *)(*iteratorp);
5163
5164         if (rbtiterator->common.version != NULL)
5165                 closeversion(rbtiterator->common.db,
5166                              &rbtiterator->common.version, ISC_FALSE);
5167         detachnode(rbtiterator->common.db, &rbtiterator->common.node);
5168         isc_mem_put(rbtiterator->common.db->mctx, rbtiterator,
5169                     sizeof(*rbtiterator));
5170
5171         *iteratorp = NULL;
5172 }
5173
5174 static isc_result_t
5175 rdatasetiter_first(dns_rdatasetiter_t *iterator) {
5176         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
5177         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
5178         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
5179         rbtdb_version_t *rbtversion = rbtiterator->common.version;
5180         rdatasetheader_t *header, *top_next;
5181         rbtdb_serial_t serial;
5182         isc_stdtime_t now;
5183
5184         if (IS_CACHE(rbtdb)) {
5185                 serial = 1;
5186                 now = rbtiterator->common.now;
5187         } else {
5188                 serial = rbtversion->serial;
5189                 now = 0;
5190         }
5191
5192         LOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5193
5194         for (header = rbtnode->data; header != NULL; header = top_next) {
5195                 top_next = header->next;
5196                 do {
5197                         if (header->serial <= serial && !IGNORE(header)) {
5198                                 /*
5199                                  * Is this a "this rdataset doesn't exist"
5200                                  * record?  Or is it too old in the cache?
5201                                  *
5202                                  * Note: unlike everywhere else, we
5203                                  * check for now > header->ttl instead
5204                                  * of now >= header->ttl.  This allows
5205                                  * ANY and RRSIG queries for 0 TTL
5206                                  * rdatasets to work.
5207                                  */
5208                                 if (NONEXISTENT(header) ||
5209                                     (now != 0 && now > header->ttl))
5210                                         header = NULL;
5211                                 break;
5212                         } else
5213                                 header = header->down;
5214                 } while (header != NULL);
5215                 if (header != NULL)
5216                         break;
5217         }
5218
5219         UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5220
5221         rbtiterator->current = header;
5222
5223         if (header == NULL)
5224                 return (ISC_R_NOMORE);
5225
5226         return (ISC_R_SUCCESS);
5227 }
5228
5229 static isc_result_t
5230 rdatasetiter_next(dns_rdatasetiter_t *iterator) {
5231         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
5232         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
5233         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
5234         rbtdb_version_t *rbtversion = rbtiterator->common.version;
5235         rdatasetheader_t *header, *top_next;
5236         rbtdb_serial_t serial;
5237         isc_stdtime_t now;
5238         rbtdb_rdatatype_t type;
5239
5240         header = rbtiterator->current;
5241         if (header == NULL)
5242                 return (ISC_R_NOMORE);
5243
5244         if (IS_CACHE(rbtdb)) {
5245                 serial = 1;
5246                 now = rbtiterator->common.now;
5247         } else {
5248                 serial = rbtversion->serial;
5249                 now = 0;
5250         }
5251
5252         LOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5253
5254         type = header->type;
5255         for (header = header->next; header != NULL; header = top_next) {
5256                 top_next = header->next;
5257                 if (header->type != type) {
5258                         do {
5259                                 if (header->serial <= serial &&
5260                                     !IGNORE(header)) {
5261                                         /*
5262                                          * Is this a "this rdataset doesn't
5263                                          * exist" record?
5264                                          *
5265                                          * Note: unlike everywhere else, we
5266                                          * check for now > header->ttl instead
5267                                          * of now >= header->ttl.  This allows
5268                                          * ANY and RRSIG queries for 0 TTL
5269                                          * rdatasets to work.
5270                                          */
5271                                         if ((header->attributes &
5272                                              RDATASET_ATTR_NONEXISTENT) != 0 ||
5273                                             (now != 0 && now > header->ttl))
5274                                                 header = NULL;
5275                                         break;
5276                                 } else
5277                                         header = header->down;
5278                         } while (header != NULL);
5279                         if (header != NULL)
5280                                 break;
5281                 }
5282         }
5283
5284         UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5285
5286         rbtiterator->current = header;
5287
5288         if (header == NULL)
5289                 return (ISC_R_NOMORE);
5290
5291         return (ISC_R_SUCCESS);
5292 }
5293
5294 static void
5295 rdatasetiter_current(dns_rdatasetiter_t *iterator, dns_rdataset_t *rdataset) {
5296         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
5297         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
5298         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
5299         rdatasetheader_t *header;
5300
5301         header = rbtiterator->current;
5302         REQUIRE(header != NULL);
5303
5304         LOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5305
5306         bind_rdataset(rbtdb, rbtnode, header, rbtiterator->common.now,
5307                       rdataset);
5308
5309         UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5310 }
5311
5312
5313 /*
5314  * Database Iterator Methods
5315  */
5316
5317 static inline void
5318 reference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
5319         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
5320         dns_rbtnode_t *node = rbtdbiter->node;
5321
5322         if (node == NULL)
5323                 return;
5324
5325         INSIST(rbtdbiter->tree_locked != isc_rwlocktype_none);
5326         LOCK(&rbtdb->node_locks[node->locknum].lock);
5327         new_reference(rbtdb, node);
5328         UNLOCK(&rbtdb->node_locks[node->locknum].lock);
5329 }
5330
5331 static inline void
5332 dereference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
5333         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
5334         dns_rbtnode_t *node = rbtdbiter->node;
5335         isc_mutex_t *lock;
5336
5337         if (node == NULL)
5338                 return;
5339
5340         lock = &rbtdb->node_locks[node->locknum].lock;
5341         LOCK(lock);
5342         INSIST(rbtdbiter->node->references > 0);
5343         if (--node->references == 0)
5344                 no_references(rbtdb, node, 0, rbtdbiter->tree_locked);
5345         UNLOCK(lock);
5346
5347         rbtdbiter->node = NULL;
5348 }
5349
5350 static void
5351 flush_deletions(rbtdb_dbiterator_t *rbtdbiter) {
5352         dns_rbtnode_t *node;
5353         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
5354         isc_boolean_t was_read_locked = ISC_FALSE;
5355         isc_mutex_t *lock;
5356         int i;
5357
5358         if (rbtdbiter->delete != 0) {
5359                 /*
5360                  * Note that "%d node of %d in tree" can report things like
5361                  * "flush_deletions: 59 nodes of 41 in tree".  This means
5362                  * That some nodes appear on the deletions list more than
5363                  * once.  Only the last occurence will actually be deleted.
5364                  */
5365                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
5366                               DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
5367                               "flush_deletions: %d nodes of %d in tree",
5368                               rbtdbiter->delete,
5369                               dns_rbt_nodecount(rbtdb->tree));
5370
5371                 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
5372                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
5373                         was_read_locked = ISC_TRUE;
5374                 }
5375                 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
5376                 rbtdbiter->tree_locked = isc_rwlocktype_write;
5377
5378                 for (i = 0; i < rbtdbiter->delete; i++) {
5379                         node = rbtdbiter->deletions[i];
5380                         lock = &rbtdb->node_locks[node->locknum].lock;
5381
5382                         LOCK(lock);
5383                         INSIST(node->references > 0);
5384                         node->references--;
5385                         if (node->references == 0)
5386                                 no_references(rbtdb, node, 0,
5387                                               rbtdbiter->tree_locked);
5388                         UNLOCK(lock);
5389                 }
5390
5391                 rbtdbiter->delete = 0;
5392
5393                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
5394                 if (was_read_locked) {
5395                         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
5396                         rbtdbiter->tree_locked = isc_rwlocktype_read;
5397
5398                 } else {
5399                         rbtdbiter->tree_locked = isc_rwlocktype_none;
5400                 }
5401         }
5402 }
5403
5404 static inline void
5405 resume_iteration(rbtdb_dbiterator_t *rbtdbiter) {
5406         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
5407
5408         REQUIRE(rbtdbiter->paused);
5409         REQUIRE(rbtdbiter->tree_locked == isc_rwlocktype_none);
5410
5411         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
5412         rbtdbiter->tree_locked = isc_rwlocktype_read;
5413
5414         rbtdbiter->paused = ISC_FALSE;
5415 }
5416
5417 static void
5418 dbiterator_destroy(dns_dbiterator_t **iteratorp) {
5419         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)(*iteratorp);
5420         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
5421         dns_db_t *db = NULL;
5422
5423         if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
5424                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
5425                 rbtdbiter->tree_locked = isc_rwlocktype_none;
5426         } else
5427                 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_none);
5428
5429         dereference_iter_node(rbtdbiter);
5430
5431         flush_deletions(rbtdbiter);
5432
5433         dns_db_attach(rbtdbiter->common.db, &db);
5434         dns_db_detach(&rbtdbiter->common.db);
5435
5436         dns_rbtnodechain_reset(&rbtdbiter->chain);
5437         isc_mem_put(db->mctx, rbtdbiter, sizeof(*rbtdbiter));
5438         dns_db_detach(&db);
5439
5440         *iteratorp = NULL;
5441 }
5442
5443 static isc_result_t
5444 dbiterator_first(dns_dbiterator_t *iterator) {
5445         isc_result_t result;
5446         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
5447         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
5448         dns_name_t *name, *origin;
5449
5450         if (rbtdbiter->result != ISC_R_SUCCESS &&
5451             rbtdbiter->result != ISC_R_NOMORE)
5452                 return (rbtdbiter->result);
5453
5454         if (rbtdbiter->paused)
5455                 resume_iteration(rbtdbiter);
5456
5457         dereference_iter_node(rbtdbiter);
5458
5459         name = dns_fixedname_name(&rbtdbiter->name);
5460         origin = dns_fixedname_name(&rbtdbiter->origin);
5461         dns_rbtnodechain_reset(&rbtdbiter->chain);
5462
5463         result = dns_rbtnodechain_first(&rbtdbiter->chain, rbtdb->tree, name,
5464                                         origin);
5465
5466         if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
5467                 result = dns_rbtnodechain_current(&rbtdbiter->chain, NULL,
5468                                                   NULL, &rbtdbiter->node);
5469                 if (result == ISC_R_SUCCESS) {
5470                         rbtdbiter->new_origin = ISC_TRUE;
5471                         reference_iter_node(rbtdbiter);
5472                 }
5473         } else {
5474                 INSIST(result == ISC_R_NOTFOUND);
5475                 result = ISC_R_NOMORE; /* The tree is empty. */
5476         }
5477
5478         rbtdbiter->result = result;
5479
5480         return (result);
5481 }
5482
5483 static isc_result_t
5484 dbiterator_last(dns_dbiterator_t *iterator) {
5485         isc_result_t result;
5486         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
5487         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
5488         dns_name_t *name, *origin;
5489
5490         if (rbtdbiter->result != ISC_R_SUCCESS &&
5491             rbtdbiter->result != ISC_R_NOMORE)
5492                 return (rbtdbiter->result);
5493
5494         if (rbtdbiter->paused)
5495                 resume_iteration(rbtdbiter);
5496
5497         dereference_iter_node(rbtdbiter);
5498
5499         name = dns_fixedname_name(&rbtdbiter->name);
5500         origin = dns_fixedname_name(&rbtdbiter->origin);
5501         dns_rbtnodechain_reset(&rbtdbiter->chain);
5502
5503         result = dns_rbtnodechain_last(&rbtdbiter->chain, rbtdb->tree, name,
5504                                        origin);
5505         if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
5506                 result = dns_rbtnodechain_current(&rbtdbiter->chain, NULL,
5507                                                   NULL, &rbtdbiter->node);
5508                 if (result == ISC_R_SUCCESS) {
5509                         rbtdbiter->new_origin = ISC_TRUE;
5510                         reference_iter_node(rbtdbiter);
5511                 }
5512         } else {
5513                 INSIST(result == ISC_R_NOTFOUND);
5514                 result = ISC_R_NOMORE; /* The tree is empty. */
5515         }
5516
5517         rbtdbiter->result = result;
5518
5519         return (result);
5520 }
5521
5522 static isc_result_t
5523 dbiterator_seek(dns_dbiterator_t *iterator, dns_name_t *name) {
5524         isc_result_t result;
5525         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
5526         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
5527         dns_name_t *iname, *origin;
5528
5529         if (rbtdbiter->result != ISC_R_SUCCESS &&
5530             rbtdbiter->result != ISC_R_NOMORE)
5531                 return (rbtdbiter->result);
5532
5533         if (rbtdbiter->paused)
5534                 resume_iteration(rbtdbiter);
5535
5536         dereference_iter_node(rbtdbiter);
5537
5538         iname = dns_fixedname_name(&rbtdbiter->name);
5539         origin = dns_fixedname_name(&rbtdbiter->origin);
5540         dns_rbtnodechain_reset(&rbtdbiter->chain);
5541
5542         result = dns_rbt_findnode(rbtdb->tree, name, NULL, &rbtdbiter->node,
5543                                   &rbtdbiter->chain, DNS_RBTFIND_EMPTYDATA,
5544                                   NULL, NULL);
5545         if (result == ISC_R_SUCCESS) {
5546                 result = dns_rbtnodechain_current(&rbtdbiter->chain, iname,
5547                                                   origin, NULL);
5548                 if (result == ISC_R_SUCCESS) {
5549                         rbtdbiter->new_origin = ISC_TRUE;
5550                         reference_iter_node(rbtdbiter);
5551                 }
5552
5553         } else if (result == DNS_R_PARTIALMATCH)
5554                 result = ISC_R_NOTFOUND;
5555
5556         rbtdbiter->result = result;
5557
5558         return (result);
5559 }
5560
5561 static isc_result_t
5562 dbiterator_prev(dns_dbiterator_t *iterator) {
5563         isc_result_t result;
5564         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
5565         dns_name_t *name, *origin;
5566
5567         REQUIRE(rbtdbiter->node != NULL);
5568
5569         if (rbtdbiter->result != ISC_R_SUCCESS)
5570                 return (rbtdbiter->result);
5571
5572         if (rbtdbiter->paused)
5573                 resume_iteration(rbtdbiter);
5574
5575         name = dns_fixedname_name(&rbtdbiter->name);
5576         origin = dns_fixedname_name(&rbtdbiter->origin);
5577         result = dns_rbtnodechain_prev(&rbtdbiter->chain, name, origin);
5578
5579         dereference_iter_node(rbtdbiter);
5580
5581         if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
5582                 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
5583                 result = dns_rbtnodechain_current(&rbtdbiter->chain, NULL,
5584                                                   NULL, &rbtdbiter->node);
5585         }
5586
5587         if (result == ISC_R_SUCCESS)
5588                 reference_iter_node(rbtdbiter);
5589
5590         rbtdbiter->result = result;
5591
5592         return (result);
5593 }
5594
5595 static isc_result_t
5596 dbiterator_next(dns_dbiterator_t *iterator) {
5597         isc_result_t result;
5598         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
5599         dns_name_t *name, *origin;
5600
5601         REQUIRE(rbtdbiter->node != NULL);
5602
5603         if (rbtdbiter->result != ISC_R_SUCCESS)
5604                 return (rbtdbiter->result);
5605
5606         if (rbtdbiter->paused)
5607                 resume_iteration(rbtdbiter);
5608
5609         name = dns_fixedname_name(&rbtdbiter->name);
5610         origin = dns_fixedname_name(&rbtdbiter->origin);
5611         result = dns_rbtnodechain_next(&rbtdbiter->chain, name, origin);
5612
5613         dereference_iter_node(rbtdbiter);
5614
5615         if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
5616                 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
5617                 result = dns_rbtnodechain_current(&rbtdbiter->chain, NULL,
5618                                                   NULL, &rbtdbiter->node);
5619         }
5620         if (result == ISC_R_SUCCESS)
5621                 reference_iter_node(rbtdbiter);
5622
5623         rbtdbiter->result = result;
5624
5625         return (result);
5626 }
5627
5628 static isc_result_t
5629 dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep,
5630                    dns_name_t *name)
5631 {
5632         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
5633         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
5634         dns_rbtnode_t *node = rbtdbiter->node;
5635         isc_result_t result;
5636         dns_name_t *nodename = dns_fixedname_name(&rbtdbiter->name);
5637         dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
5638
5639         REQUIRE(rbtdbiter->result == ISC_R_SUCCESS);
5640         REQUIRE(rbtdbiter->node != NULL);
5641
5642         if (rbtdbiter->paused)
5643                 resume_iteration(rbtdbiter);
5644
5645         if (name != NULL) {
5646                 if (rbtdbiter->common.relative_names)
5647                         origin = NULL;
5648                 result = dns_name_concatenate(nodename, origin, name, NULL);
5649                 if (result != ISC_R_SUCCESS)
5650                         return (result);
5651                 if (rbtdbiter->common.relative_names && rbtdbiter->new_origin)
5652                         result = DNS_R_NEWORIGIN;
5653         } else
5654                 result = ISC_R_SUCCESS;
5655
5656         LOCK(&rbtdb->node_locks[node->locknum].lock);
5657         new_reference(rbtdb, node);
5658         UNLOCK(&rbtdb->node_locks[node->locknum].lock);
5659
5660         *nodep = rbtdbiter->node;
5661
5662         if (iterator->cleaning && result == ISC_R_SUCCESS) {
5663                 isc_result_t expire_result;
5664
5665                 /*
5666                  * If the deletion array is full, flush it before trying
5667                  * to expire the current node.  The current node can't
5668                  * fully deleted while the iteration cursor is still on it.
5669                  */
5670                 if (rbtdbiter->delete == DELETION_BATCH_MAX)
5671                         flush_deletions(rbtdbiter);
5672
5673                 expire_result = expirenode(iterator->db, *nodep, 0);
5674
5675                 /*
5676                  * expirenode() currently always returns success.
5677                  */
5678                 if (expire_result == ISC_R_SUCCESS && node->down == NULL) {
5679                         rbtdbiter->deletions[rbtdbiter->delete++] = node;
5680                         LOCK(&rbtdb->node_locks[node->locknum].lock);
5681                         node->references++;
5682                         UNLOCK(&rbtdb->node_locks[node->locknum].lock);
5683                 }
5684         }
5685
5686         return (result);
5687 }
5688
5689 static isc_result_t
5690 dbiterator_pause(dns_dbiterator_t *iterator) {
5691         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
5692         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
5693
5694         if (rbtdbiter->result != ISC_R_SUCCESS &&
5695             rbtdbiter->result != ISC_R_NOMORE)
5696                 return (rbtdbiter->result);
5697
5698         if (rbtdbiter->paused)
5699                 return (ISC_R_SUCCESS);
5700
5701         rbtdbiter->paused = ISC_TRUE;
5702
5703         if (rbtdbiter->tree_locked != isc_rwlocktype_none) {
5704                 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_read);
5705                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
5706                 rbtdbiter->tree_locked = isc_rwlocktype_none;
5707         }
5708
5709         flush_deletions(rbtdbiter);
5710
5711         return (ISC_R_SUCCESS);
5712 }
5713
5714 static isc_result_t
5715 dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name) {
5716         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
5717         dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
5718
5719         if (rbtdbiter->result != ISC_R_SUCCESS)
5720                 return (rbtdbiter->result);
5721
5722         return (dns_name_copy(origin, name, NULL));
5723 }