2 * Copyright (C) 2004-2009 Internet Systems Consortium, Inc. ("ISC")
3 * Copyright (C) 1999-2003 Internet Software Consortium.
5 * Permission to use, copy, modify, and/or distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15 * PERFORMANCE OF THIS SOFTWARE.
18 /* $Id: rbtdb.c,v 1.248.12.18 2009/05/06 23:34:47 jinmei Exp $ */
23 * Principal Author: Bob Halley
29 #include <isc/event.h>
31 #include <isc/platform.h>
32 #include <isc/print.h>
33 #include <isc/mutex.h>
34 #include <isc/random.h>
35 #include <isc/refcount.h>
36 #include <isc/rwlock.h>
37 #include <isc/string.h>
42 #include <dns/acache.h>
44 #include <dns/dbiterator.h>
45 #include <dns/events.h>
46 #include <dns/fixedname.h>
49 #include <dns/masterdump.h>
51 #include <dns/rdata.h>
52 #include <dns/rdataset.h>
53 #include <dns/rdatasetiter.h>
54 #include <dns/rdataslab.h>
55 #include <dns/result.h>
56 #include <dns/stats.h>
59 #include <dns/zonekey.h>
61 #ifdef DNS_RBTDB_VERSION64
67 #ifdef DNS_RBTDB_VERSION64
68 #define RBTDB_MAGIC ISC_MAGIC('R', 'B', 'D', '8')
70 #define RBTDB_MAGIC ISC_MAGIC('R', 'B', 'D', '4')
74 * Note that "impmagic" is not the first four bytes of the struct, so
75 * ISC_MAGIC_VALID cannot be used.
77 #define VALID_RBTDB(rbtdb) ((rbtdb) != NULL && \
78 (rbtdb)->common.impmagic == RBTDB_MAGIC)
80 #ifdef DNS_RBTDB_VERSION64
81 typedef isc_uint64_t rbtdb_serial_t;
83 * Make casting easier in symbolic debuggers by using different names
84 * for the 64 bit version.
86 #define dns_rbtdb_t dns_rbtdb64_t
87 #define rdatasetheader_t rdatasetheader64_t
88 #define rbtdb_version_t rbtdb_version64_t
90 typedef isc_uint32_t rbtdb_serial_t;
93 typedef isc_uint32_t rbtdb_rdatatype_t;
95 #define RBTDB_RDATATYPE_BASE(type) ((dns_rdatatype_t)((type) & 0xFFFF))
96 #define RBTDB_RDATATYPE_EXT(type) ((dns_rdatatype_t)((type) >> 16))
97 #define RBTDB_RDATATYPE_VALUE(b, e) (((e) << 16) | (b))
99 #define RBTDB_RDATATYPE_SIGNSEC \
100 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec)
101 #define RBTDB_RDATATYPE_SIGNS \
102 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ns)
103 #define RBTDB_RDATATYPE_SIGCNAME \
104 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_cname)
105 #define RBTDB_RDATATYPE_SIGDNAME \
106 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_dname)
107 #define RBTDB_RDATATYPE_NCACHEANY \
108 RBTDB_RDATATYPE_VALUE(0, dns_rdatatype_any)
111 * We use rwlock for DB lock only when ISC_RWLOCK_USEATOMIC is non 0.
112 * Using rwlock is effective with regard to lookup performance only when
113 * it is implemented in an efficient way.
114 * Otherwise, it is generally wise to stick to the simple locking since rwlock
115 * would require more memory or can even make lookups slower due to its own
116 * overhead (when it internally calls mutex locks).
118 #ifdef ISC_RWLOCK_USEATOMIC
119 #define DNS_RBTDB_USERWLOCK 1
121 #define DNS_RBTDB_USERWLOCK 0
124 #if DNS_RBTDB_USERWLOCK
125 #define RBTDB_INITLOCK(l) isc_rwlock_init((l), 0, 0)
126 #define RBTDB_DESTROYLOCK(l) isc_rwlock_destroy(l)
127 #define RBTDB_LOCK(l, t) RWLOCK((l), (t))
128 #define RBTDB_UNLOCK(l, t) RWUNLOCK((l), (t))
130 #define RBTDB_INITLOCK(l) isc_mutex_init(l)
131 #define RBTDB_DESTROYLOCK(l) DESTROYLOCK(l)
132 #define RBTDB_LOCK(l, t) LOCK(l)
133 #define RBTDB_UNLOCK(l, t) UNLOCK(l)
137 * Since node locking is sensitive to both performance and memory footprint,
138 * we need some trick here. If we have both high-performance rwlock and
139 * high performance and small-memory reference counters, we use rwlock for
140 * node lock and isc_refcount for node references. In this case, we don't have
141 * to protect the access to the counters by locks.
142 * Otherwise, we simply use ordinary mutex lock for node locking, and use
143 * simple integers as reference counters which is protected by the lock.
144 * In most cases, we can simply use wrapper macros such as NODE_LOCK and
145 * NODE_UNLOCK. In some other cases, however, we need to protect reference
146 * counters first and then protect other parts of a node as read-only data.
147 * Special additional macros, NODE_STRONGLOCK(), NODE_WEAKLOCK(), etc, are also
148 * provided for these special cases. When we can use the efficient backend
149 * routines, we should only protect the "other members" by NODE_WEAKLOCK(read).
150 * Otherwise, we should use NODE_STRONGLOCK() to protect the entire critical
151 * section including the access to the reference counter.
152 * Note that we cannot use NODE_LOCK()/NODE_UNLOCK() wherever the protected
153 * section is also protected by NODE_STRONGLOCK().
155 #if defined(ISC_RWLOCK_USEATOMIC) && defined(DNS_RBT_USEISCREFCOUNT)
156 typedef isc_rwlock_t nodelock_t;
158 #define NODE_INITLOCK(l) isc_rwlock_init((l), 0, 0)
159 #define NODE_DESTROYLOCK(l) isc_rwlock_destroy(l)
160 #define NODE_LOCK(l, t) RWLOCK((l), (t))
161 #define NODE_UNLOCK(l, t) RWUNLOCK((l), (t))
162 #define NODE_TRYUPGRADE(l) isc_rwlock_tryupgrade(l)
164 #define NODE_STRONGLOCK(l) ((void)0)
165 #define NODE_STRONGUNLOCK(l) ((void)0)
166 #define NODE_WEAKLOCK(l, t) NODE_LOCK(l, t)
167 #define NODE_WEAKUNLOCK(l, t) NODE_UNLOCK(l, t)
168 #define NODE_WEAKDOWNGRADE(l) isc_rwlock_downgrade(l)
170 typedef isc_mutex_t nodelock_t;
172 #define NODE_INITLOCK(l) isc_mutex_init(l)
173 #define NODE_DESTROYLOCK(l) DESTROYLOCK(l)
174 #define NODE_LOCK(l, t) LOCK(l)
175 #define NODE_UNLOCK(l, t) UNLOCK(l)
176 #define NODE_TRYUPGRADE(l) ISC_R_SUCCESS
178 #define NODE_STRONGLOCK(l) LOCK(l)
179 #define NODE_STRONGUNLOCK(l) UNLOCK(l)
180 #define NODE_WEAKLOCK(l, t) ((void)0)
181 #define NODE_WEAKUNLOCK(l, t) ((void)0)
182 #define NODE_WEAKDOWNGRADE(l) ((void)0)
186 * Whether to rate-limit updating the LRU to avoid possible thread contention.
187 * Our performance measurement has shown the cost is marginal, so it's defined
188 * to be 0 by default either with or without threads.
190 #ifndef DNS_RBTDB_LIMITLRUUPDATE
191 #define DNS_RBTDB_LIMITLRUUPDATE 0
195 * Allow clients with a virtual time of up to 5 minutes in the past to see
196 * records that would have otherwise have expired.
198 #define RBTDB_VIRTUAL 300
206 typedef struct acachectl acachectl_t;
208 typedef struct rdatasetheader {
210 * Locked by the owning node's lock.
212 rbtdb_serial_t serial;
214 rbtdb_rdatatype_t type;
215 isc_uint16_t attributes;
217 struct noqname *noqname;
219 * We don't use the LIST macros, because the LIST structure has
220 * both head and tail pointers, and is doubly linked.
223 struct rdatasetheader *next;
225 * If this is the top header for an rdataset, 'next' points
226 * to the top header for the next rdataset (i.e., the next type).
227 * Otherwise, it points up to the header whose down pointer points
231 struct rdatasetheader *down;
233 * Points to the header for the next older version of
239 * Monotonously increased every time this rdataset is bound so that
240 * it is used as the base of the starting point in DNS responses
241 * when the "cyclic" rrset-order is required. Since the ordering
242 * should not be so crucial, no lock is set for the counter for
243 * performance reasons.
246 acachectl_t *additional_auth;
247 acachectl_t *additional_glue;
250 isc_stdtime_t last_used;
251 ISC_LINK(struct rdatasetheader) lru_link;
253 * Used for LRU-based cache management. We should probably make
254 * these cache-DB specific. We might also make it a pointer and
255 * ensure only the top header has a valid link to save memory.
256 * The linked-list is locked by the rbtdb->lrulock.
260 * It's possible this should not be here anymore, but instead
261 * referenced from the bucket's heap directly.
266 unsigned int heap_index;
268 * Used for TTL-based cache cleaning.
272 typedef ISC_LIST(rdatasetheader_t) rdatasetheaderlist_t;
273 typedef ISC_LIST(dns_rbtnode_t) rbtnodelist_t;
275 #define RDATASET_ATTR_NONEXISTENT 0x0001
276 #define RDATASET_ATTR_STALE 0x0002
277 #define RDATASET_ATTR_IGNORE 0x0004
278 #define RDATASET_ATTR_RETAIN 0x0008
279 #define RDATASET_ATTR_NXDOMAIN 0x0010
280 #define RDATASET_ATTR_RESIGN 0x0020
281 #define RDATASET_ATTR_STATCOUNT 0x0040
283 typedef struct acache_cbarg {
284 dns_rdatasetadditional_t type;
288 rdatasetheader_t *header;
292 dns_acacheentry_t *entry;
293 acache_cbarg_t *cbarg;
298 * When the cache will pre-expire data (due to memory low or other
299 * situations) before the rdataset's TTL has expired, it MUST
300 * respect the RETAIN bit and not expire the data until its TTL is
304 #undef IGNORE /* WIN32 winbase.h defines this. */
306 #define EXISTS(header) \
307 (((header)->attributes & RDATASET_ATTR_NONEXISTENT) == 0)
308 #define NONEXISTENT(header) \
309 (((header)->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
310 #define IGNORE(header) \
311 (((header)->attributes & RDATASET_ATTR_IGNORE) != 0)
312 #define RETAIN(header) \
313 (((header)->attributes & RDATASET_ATTR_RETAIN) != 0)
314 #define NXDOMAIN(header) \
315 (((header)->attributes & RDATASET_ATTR_NXDOMAIN) != 0)
317 #define DEFAULT_NODE_LOCK_COUNT 7 /*%< Should be prime. */
320 * Number of buckets for cache DB entries (locks, LRU lists, TTL heaps).
321 * There is a tradeoff issue about configuring this value: if this is too
322 * small, it may cause heavier contention between threads; if this is too large,
323 * LRU purge algorithm won't work well (entries tend to be purged prematurely).
324 * The default value should work well for most environments, but this can
325 * also be configurable at compilation time via the
326 * DNS_RBTDB_CACHE_NODE_LOCK_COUNT variable. This value must be larger than
327 * 1 due to the assumption of overmem_purge().
329 #ifdef DNS_RBTDB_CACHE_NODE_LOCK_COUNT
330 #if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1
331 #error "DNS_RBTDB_CACHE_NODE_LOCK_COUNT must be larger than 1"
333 #define DEFAULT_CACHE_NODE_LOCK_COUNT DNS_RBTDB_CACHE_NODE_LOCK_COUNT
336 #define DEFAULT_CACHE_NODE_LOCK_COUNT 16
337 #endif /* DNS_RBTDB_CACHE_NODE_LOCK_COUNT */
341 /* Protected in the refcount routines. */
342 isc_refcount_t references;
343 /* Locked by lock. */
344 isc_boolean_t exiting;
347 typedef struct rbtdb_changed {
348 dns_rbtnode_t * node;
350 ISC_LINK(struct rbtdb_changed) link;
353 typedef ISC_LIST(rbtdb_changed_t) rbtdb_changedlist_t;
355 typedef struct rbtdb_version {
357 rbtdb_serial_t serial;
359 * Protected in the refcount routines.
360 * XXXJT: should we change the lock policy based on the refcount
363 isc_refcount_t references;
364 /* Locked by database lock. */
365 isc_boolean_t writer;
366 isc_boolean_t commit_ok;
367 rbtdb_changedlist_t changed_list;
368 ISC_LINK(struct rbtdb_version) link;
371 typedef ISC_LIST(rbtdb_version_t) rbtdb_versionlist_t;
376 #if DNS_RBTDB_USERWLOCK
381 isc_rwlock_t tree_lock;
382 unsigned int node_lock_count;
383 rbtdb_nodelock_t * node_locks;
384 dns_rbtnode_t * origin_node;
385 dns_stats_t * rrsetstats; /* cache DB only */
386 /* Locked by lock. */
388 isc_refcount_t references;
389 unsigned int attributes;
390 rbtdb_serial_t current_serial;
391 rbtdb_serial_t least_serial;
392 rbtdb_serial_t next_serial;
393 rbtdb_version_t * current_version;
394 rbtdb_version_t * future_version;
395 rbtdb_versionlist_t open_versions;
396 isc_boolean_t overmem;
398 dns_dbnode_t *soanode;
399 dns_dbnode_t *nsnode;
402 * This is a linked list used to implement the LRU cache. There will
403 * be node_lock_count linked lists here. Nodes in bucket 1 will be
404 * placed on the linked list rdatasets[1].
406 rdatasetheaderlist_t *rdatasets;
409 * Temporary storage for stale cache nodes and dynamically deleted
410 * nodes that await being cleaned up.
412 rbtnodelist_t *deadnodes;
415 * Heaps. Each of these is used for TTL based expiry.
419 /* Locked by tree_lock. */
421 isc_boolean_t secure;
424 unsigned int quantum;
427 #define RBTDB_ATTR_LOADED 0x01
428 #define RBTDB_ATTR_LOADING 0x02
435 rbtdb_version_t * rbtversion;
436 rbtdb_serial_t serial;
437 unsigned int options;
438 dns_rbtnodechain_t chain;
439 isc_boolean_t copy_name;
440 isc_boolean_t need_cleanup;
442 dns_rbtnode_t * zonecut;
443 rdatasetheader_t * zonecut_rdataset;
444 rdatasetheader_t * zonecut_sigrdataset;
445 dns_fixedname_t zonecut_name;
457 static void rdataset_disassociate(dns_rdataset_t *rdataset);
458 static isc_result_t rdataset_first(dns_rdataset_t *rdataset);
459 static isc_result_t rdataset_next(dns_rdataset_t *rdataset);
460 static void rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata);
461 static void rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target);
462 static unsigned int rdataset_count(dns_rdataset_t *rdataset);
463 static isc_result_t rdataset_getnoqname(dns_rdataset_t *rdataset,
465 dns_rdataset_t *nsec,
466 dns_rdataset_t *nsecsig);
467 static isc_result_t rdataset_getadditional(dns_rdataset_t *rdataset,
468 dns_rdatasetadditional_t type,
469 dns_rdatatype_t qtype,
470 dns_acache_t *acache,
473 dns_dbversion_t **versionp,
474 dns_dbnode_t **nodep,
478 static isc_result_t rdataset_setadditional(dns_rdataset_t *rdataset,
479 dns_rdatasetadditional_t type,
480 dns_rdatatype_t qtype,
481 dns_acache_t *acache,
484 dns_dbversion_t *version,
487 static isc_result_t rdataset_putadditional(dns_acache_t *acache,
488 dns_rdataset_t *rdataset,
489 dns_rdatasetadditional_t type,
490 dns_rdatatype_t qtype);
491 static inline isc_boolean_t need_headerupdate(rdatasetheader_t *header,
493 static void update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
495 static void expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
496 isc_boolean_t tree_locked);
497 static void overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
498 isc_stdtime_t now, isc_boolean_t tree_locked);
499 static void prune_tree(isc_task_t *task, isc_event_t *event);
501 static dns_rdatasetmethods_t rdataset_methods = {
502 rdataset_disassociate,
510 rdataset_getadditional,
511 rdataset_setadditional,
512 rdataset_putadditional
515 static void rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp);
516 static isc_result_t rdatasetiter_first(dns_rdatasetiter_t *iterator);
517 static isc_result_t rdatasetiter_next(dns_rdatasetiter_t *iterator);
518 static void rdatasetiter_current(dns_rdatasetiter_t *iterator,
519 dns_rdataset_t *rdataset);
521 static dns_rdatasetitermethods_t rdatasetiter_methods = {
522 rdatasetiter_destroy,
528 typedef struct rbtdb_rdatasetiter {
529 dns_rdatasetiter_t common;
530 rdatasetheader_t * current;
531 } rbtdb_rdatasetiter_t;
533 static void dbiterator_destroy(dns_dbiterator_t **iteratorp);
534 static isc_result_t dbiterator_first(dns_dbiterator_t *iterator);
535 static isc_result_t dbiterator_last(dns_dbiterator_t *iterator);
536 static isc_result_t dbiterator_seek(dns_dbiterator_t *iterator,
538 static isc_result_t dbiterator_prev(dns_dbiterator_t *iterator);
539 static isc_result_t dbiterator_next(dns_dbiterator_t *iterator);
540 static isc_result_t dbiterator_current(dns_dbiterator_t *iterator,
541 dns_dbnode_t **nodep,
543 static isc_result_t dbiterator_pause(dns_dbiterator_t *iterator);
544 static isc_result_t dbiterator_origin(dns_dbiterator_t *iterator,
547 static dns_dbiteratormethods_t dbiterator_methods = {
559 #define DELETION_BATCH_MAX 64
562 * If 'paused' is ISC_TRUE, then the tree lock is not being held.
564 typedef struct rbtdb_dbiterator {
565 dns_dbiterator_t common;
566 isc_boolean_t paused;
567 isc_boolean_t new_origin;
568 isc_rwlocktype_t tree_locked;
570 dns_fixedname_t name;
571 dns_fixedname_t origin;
572 dns_rbtnodechain_t chain;
574 dns_rbtnode_t *deletions[DELETION_BATCH_MAX];
576 } rbtdb_dbiterator_t;
579 #define IS_STUB(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_STUB) != 0)
580 #define IS_CACHE(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_CACHE) != 0)
582 static void free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log,
584 static void overmem(dns_db_t *db, isc_boolean_t overmem);
587 * 'init_count' is used to initialize 'newheader->count' which inturn
588 * is used to determine where in the cycle rrset-order cyclic starts.
589 * We don't lock this as we don't care about simultaneous updates.
592 * Both init_count and header->count can be ISC_UINT32_MAX.
593 * The count on the returned rdataset however can't be as
594 * that indicates that the database does not implement cyclic
597 static unsigned int init_count;
602 * If a routine is going to lock more than one lock in this module, then
603 * the locking must be done in the following order:
607 * Node Lock (Only one from the set may be locked at one time by
612 * Failure to follow this hierarchy can result in deadlock.
618 * For zone databases the node for the origin of the zone MUST NOT be deleted.
627 attach(dns_db_t *source, dns_db_t **targetp) {
628 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)source;
630 REQUIRE(VALID_RBTDB(rbtdb));
632 isc_refcount_increment(&rbtdb->references, NULL);
638 free_rbtdb_callback(isc_task_t *task, isc_event_t *event) {
639 dns_rbtdb_t *rbtdb = event->ev_arg;
643 free_rbtdb(rbtdb, ISC_TRUE, event);
647 update_rrsetstats(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
648 isc_boolean_t increment)
650 dns_rdatastatstype_t statattributes = 0;
651 dns_rdatastatstype_t base = 0;
652 dns_rdatastatstype_t type;
654 /* At the moment we count statistics only for cache DB */
655 INSIST(IS_CACHE(rbtdb));
657 if (NXDOMAIN(header))
658 statattributes = DNS_RDATASTATSTYPE_ATTR_NXDOMAIN;
659 else if (RBTDB_RDATATYPE_BASE(header->type) == 0) {
660 statattributes = DNS_RDATASTATSTYPE_ATTR_NXRRSET;
661 base = RBTDB_RDATATYPE_EXT(header->type);
663 base = RBTDB_RDATATYPE_BASE(header->type);
665 type = DNS_RDATASTATSTYPE_VALUE(base, statattributes);
667 dns_rdatasetstats_increment(rbtdb->rrsetstats, type);
669 dns_rdatasetstats_decrement(rbtdb->rrsetstats, type);
673 set_ttl(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, dns_ttl_t newttl) {
678 oldttl = header->rdh_ttl;
679 header->rdh_ttl = newttl;
682 * It's possible the rbtdb is not a cache. If this is the case,
683 * we will not have a heap, and we move on. If we do, though,
684 * we might need to adjust things.
686 if (header->heap_index == 0 || newttl == oldttl)
688 idx = header->node->locknum;
689 if (rbtdb->heaps == NULL || rbtdb->heaps[idx] == NULL)
691 heap = rbtdb->heaps[idx];
694 isc_heap_increased(heap, header->heap_index);
696 isc_heap_decreased(heap, header->heap_index);
700 * This function allows the heap code to rank the priority of each
701 * element. It returns ISC_TRUE if v1 happens "sooner" than v2.
704 ttl_sooner(void *v1, void *v2) {
705 rdatasetheader_t *h1 = v1;
706 rdatasetheader_t *h2 = v2;
708 if (h1->rdh_ttl < h2->rdh_ttl)
714 * This function sets the heap index into the header.
717 ttl_set_index(void *what, unsigned int index) {
718 rdatasetheader_t *h = what;
720 h->heap_index = index;
724 * Work out how many nodes can be deleted in the time between two
725 * requests to the nameserver. Smooth the resulting number and use it
726 * as a estimate for the number of nodes to be deleted in the next
730 adjust_quantum(unsigned int old, isc_time_t *start) {
731 unsigned int pps = dns_pps; /* packets per second */
732 unsigned int interval;
741 interval = 1000000 / pps; /* interval in usec */
744 usecs = isc_time_microdiff(&end, start);
747 * We were unable to measure the amount of time taken.
748 * Double the nodes deleted next time.
755 new = old * interval;
756 new /= (unsigned int)usecs;
763 new = (new + old * 3) / 4;
765 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, DNS_LOGMODULE_CACHE,
766 ISC_LOG_DEBUG(1), "adjust_quantum -> %d", new);
772 free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log, isc_event_t *event) {
774 isc_ondestroy_t ondest;
776 char buf[DNS_NAME_FORMATSIZE];
779 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
780 overmem((dns_db_t *)rbtdb, (isc_boolean_t)-1);
782 REQUIRE(rbtdb->current_version != NULL || EMPTY(rbtdb->open_versions));
783 REQUIRE(rbtdb->future_version == NULL);
785 if (rbtdb->current_version != NULL) {
788 isc_refcount_decrement(&rbtdb->current_version->references,
791 UNLINK(rbtdb->open_versions, rbtdb->current_version, link);
792 isc_refcount_destroy(&rbtdb->current_version->references);
793 isc_mem_put(rbtdb->common.mctx, rbtdb->current_version,
794 sizeof(rbtdb_version_t));
798 * We assume the number of remaining dead nodes is reasonably small;
799 * the overhead of unlinking all nodes here should be negligible.
801 for (i = 0; i < rbtdb->node_lock_count; i++) {
804 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
805 while (node != NULL) {
806 ISC_LIST_UNLINK(rbtdb->deadnodes[i], node, deadlink);
807 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
812 rbtdb->quantum = (rbtdb->task != NULL) ? 100 : 0;
814 if (rbtdb->tree != NULL) {
815 isc_time_now(&start);
816 result = dns_rbt_destroy2(&rbtdb->tree, rbtdb->quantum);
817 if (result == ISC_R_QUOTA) {
818 INSIST(rbtdb->task != NULL);
819 if (rbtdb->quantum != 0)
820 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
823 event = isc_event_allocate(rbtdb->common.mctx,
825 DNS_EVENT_FREESTORAGE,
828 sizeof(isc_event_t));
831 isc_task_send(rbtdb->task, &event);
834 INSIST(result == ISC_R_SUCCESS && rbtdb->tree == NULL);
837 isc_event_free(&event);
839 if (dns_name_dynamic(&rbtdb->common.origin))
840 dns_name_format(&rbtdb->common.origin, buf,
843 strcpy(buf, "<UNKNOWN>");
844 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
845 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
846 "done free_rbtdb(%s)", buf);
848 if (dns_name_dynamic(&rbtdb->common.origin))
849 dns_name_free(&rbtdb->common.origin, rbtdb->common.mctx);
850 for (i = 0; i < rbtdb->node_lock_count; i++) {
851 isc_refcount_destroy(&rbtdb->node_locks[i].references);
852 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
856 * Clean up LRU cache objects.
858 if (rbtdb->rdatasets != NULL) {
859 for (i = 0; i < rbtdb->node_lock_count; i++)
860 INSIST(ISC_LIST_EMPTY(rbtdb->rdatasets[i]));
861 isc_mem_put(rbtdb->common.mctx, rbtdb->rdatasets,
862 rbtdb->node_lock_count *
863 sizeof(rdatasetheaderlist_t));
866 * Clean up dead node buckets.
868 if (rbtdb->deadnodes != NULL) {
869 for (i = 0; i < rbtdb->node_lock_count; i++)
870 INSIST(ISC_LIST_EMPTY(rbtdb->deadnodes[i]));
871 isc_mem_put(rbtdb->common.mctx, rbtdb->deadnodes,
872 rbtdb->node_lock_count * sizeof(rbtnodelist_t));
875 * Clean up TTL heap cache objects.
877 if (rbtdb->heaps != NULL) {
878 for (i = 0; i < rbtdb->node_lock_count; i++)
879 isc_heap_destroy(&rbtdb->heaps[i]);
880 isc_mem_put(rbtdb->common.mctx, rbtdb->heaps,
881 rbtdb->node_lock_count *
882 sizeof(isc_heap_t *));
885 if (rbtdb->rrsetstats != NULL)
886 dns_stats_detach(&rbtdb->rrsetstats);
888 isc_mem_put(rbtdb->common.mctx, rbtdb->node_locks,
889 rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
890 isc_rwlock_destroy(&rbtdb->tree_lock);
891 isc_refcount_destroy(&rbtdb->references);
892 if (rbtdb->task != NULL)
893 isc_task_detach(&rbtdb->task);
895 RBTDB_DESTROYLOCK(&rbtdb->lock);
896 rbtdb->common.magic = 0;
897 rbtdb->common.impmagic = 0;
898 ondest = rbtdb->common.ondest;
899 isc_mem_putanddetach(&rbtdb->common.mctx, rbtdb, sizeof(*rbtdb));
900 isc_ondestroy_notify(&ondest, rbtdb);
904 maybe_free_rbtdb(dns_rbtdb_t *rbtdb) {
905 isc_boolean_t want_free = ISC_FALSE;
907 unsigned int inactive = 0;
909 /* XXX check for open versions here */
911 if (rbtdb->soanode != NULL)
912 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->soanode);
913 if (rbtdb->nsnode != NULL)
914 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->nsnode);
917 * Even though there are no external direct references, there still
918 * may be nodes in use.
920 for (i = 0; i < rbtdb->node_lock_count; i++) {
921 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
922 rbtdb->node_locks[i].exiting = ISC_TRUE;
923 NODE_UNLOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
924 if (isc_refcount_current(&rbtdb->node_locks[i].references)
931 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
932 rbtdb->active -= inactive;
933 if (rbtdb->active == 0)
934 want_free = ISC_TRUE;
935 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
937 char buf[DNS_NAME_FORMATSIZE];
938 if (dns_name_dynamic(&rbtdb->common.origin))
939 dns_name_format(&rbtdb->common.origin, buf,
942 strcpy(buf, "<UNKNOWN>");
943 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
944 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
945 "calling free_rbtdb(%s)", buf);
946 free_rbtdb(rbtdb, ISC_TRUE, NULL);
952 detach(dns_db_t **dbp) {
953 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(*dbp);
956 REQUIRE(VALID_RBTDB(rbtdb));
958 isc_refcount_decrement(&rbtdb->references, &refs);
961 maybe_free_rbtdb(rbtdb);
967 currentversion(dns_db_t *db, dns_dbversion_t **versionp) {
968 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
969 rbtdb_version_t *version;
972 REQUIRE(VALID_RBTDB(rbtdb));
974 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
975 version = rbtdb->current_version;
976 isc_refcount_increment(&version->references, &refs);
977 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
979 *versionp = (dns_dbversion_t *)version;
982 static inline rbtdb_version_t *
983 allocate_version(isc_mem_t *mctx, rbtdb_serial_t serial,
984 unsigned int references, isc_boolean_t writer)
987 rbtdb_version_t *version;
989 version = isc_mem_get(mctx, sizeof(*version));
992 version->serial = serial;
993 result = isc_refcount_init(&version->references, references);
994 if (result != ISC_R_SUCCESS) {
995 isc_mem_put(mctx, version, sizeof(*version));
998 version->writer = writer;
999 version->commit_ok = ISC_FALSE;
1000 ISC_LIST_INIT(version->changed_list);
1001 ISC_LINK_INIT(version, link);
1007 newversion(dns_db_t *db, dns_dbversion_t **versionp) {
1008 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1009 rbtdb_version_t *version;
1011 REQUIRE(VALID_RBTDB(rbtdb));
1012 REQUIRE(versionp != NULL && *versionp == NULL);
1013 REQUIRE(rbtdb->future_version == NULL);
1015 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1016 RUNTIME_CHECK(rbtdb->next_serial != 0); /* XXX Error? */
1017 version = allocate_version(rbtdb->common.mctx, rbtdb->next_serial, 1,
1019 if (version != NULL) {
1020 version->commit_ok = ISC_TRUE;
1021 rbtdb->next_serial++;
1022 rbtdb->future_version = version;
1024 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1026 if (version == NULL)
1027 return (ISC_R_NOMEMORY);
1029 *versionp = version;
1031 return (ISC_R_SUCCESS);
1035 attachversion(dns_db_t *db, dns_dbversion_t *source,
1036 dns_dbversion_t **targetp)
1038 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1039 rbtdb_version_t *rbtversion = source;
1042 REQUIRE(VALID_RBTDB(rbtdb));
1044 isc_refcount_increment(&rbtversion->references, &refs);
1047 *targetp = rbtversion;
1050 static rbtdb_changed_t *
1051 add_changed(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1052 dns_rbtnode_t *node)
1054 rbtdb_changed_t *changed;
1058 * Caller must be holding the node lock if its reference must be
1059 * protected by the lock.
1062 changed = isc_mem_get(rbtdb->common.mctx, sizeof(*changed));
1064 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1066 REQUIRE(version->writer);
1068 if (changed != NULL) {
1069 dns_rbtnode_refincrement(node, &refs);
1071 changed->node = node;
1072 changed->dirty = ISC_FALSE;
1073 ISC_LIST_INITANDAPPEND(version->changed_list, changed, link);
1075 version->commit_ok = ISC_FALSE;
1077 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1083 free_acachearray(isc_mem_t *mctx, rdatasetheader_t *header,
1088 unsigned char *raw; /* RDATASLAB */
1091 * The caller must be holding the corresponding node lock.
1097 raw = (unsigned char *)header + sizeof(*header);
1098 count = raw[0] * 256 + raw[1];
1101 * Sanity check: since an additional cache entry has a reference to
1102 * the original DB node (in the callback arg), there should be no
1103 * acache entries when the node can be freed.
1105 for (i = 0; i < count; i++)
1106 INSIST(array[i].entry == NULL && array[i].cbarg == NULL);
1108 isc_mem_put(mctx, array, count * sizeof(acachectl_t));
1112 free_noqname(isc_mem_t *mctx, struct noqname **noqname) {
1114 if (dns_name_dynamic(&(*noqname)->name))
1115 dns_name_free(&(*noqname)->name, mctx);
1116 if ((*noqname)->nsec != NULL)
1117 isc_mem_put(mctx, (*noqname)->nsec,
1118 dns_rdataslab_size((*noqname)->nsec, 0));
1119 if ((*noqname)->nsecsig != NULL)
1120 isc_mem_put(mctx, (*noqname)->nsecsig,
1121 dns_rdataslab_size((*noqname)->nsecsig, 0));
1122 isc_mem_put(mctx, *noqname, sizeof(**noqname));
1127 init_rdataset(dns_rbtdb_t *rbtdb, rdatasetheader_t *h)
1129 ISC_LINK_INIT(h, lru_link);
1133 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1134 fprintf(stderr, "initialized header: %p\n", h);
1140 static inline rdatasetheader_t *
1141 new_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx)
1143 rdatasetheader_t *h;
1145 h = isc_mem_get(mctx, sizeof(*h));
1150 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1151 fprintf(stderr, "allocated header: %p\n", h);
1153 init_rdataset(rbtdb, h);
1158 free_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *rdataset)
1162 if (EXISTS(rdataset) &&
1163 (rdataset->attributes & RDATASET_ATTR_STATCOUNT) != 0) {
1164 update_rrsetstats(rbtdb, rdataset, ISC_FALSE);
1167 if (IS_CACHE(rbtdb) && ISC_LINK_LINKED(rdataset, lru_link)) {
1168 int idx = rdataset->node->locknum;
1169 ISC_LIST_UNLINK(rbtdb->rdatasets[idx], rdataset, lru_link);
1170 if (rdataset->heap_index != 0) {
1171 isc_heap_delete(rbtdb->heaps[idx],
1172 rdataset->heap_index);
1174 rdataset->heap_index = 0;
1177 if (rdataset->noqname != NULL)
1178 free_noqname(mctx, &rdataset->noqname);
1180 free_acachearray(mctx, rdataset, rdataset->additional_auth);
1181 free_acachearray(mctx, rdataset, rdataset->additional_glue);
1183 if ((rdataset->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
1184 size = sizeof(*rdataset);
1186 size = dns_rdataslab_size((unsigned char *)rdataset,
1188 isc_mem_put(mctx, rdataset, size);
1192 rollback_node(dns_rbtnode_t *node, rbtdb_serial_t serial) {
1193 rdatasetheader_t *header, *dcurrent;
1194 isc_boolean_t make_dirty = ISC_FALSE;
1197 * Caller must hold the node lock.
1201 * We set the IGNORE attribute on rdatasets with serial number
1202 * 'serial'. When the reference count goes to zero, these rdatasets
1203 * will be cleaned up; until that time, they will be ignored.
1205 for (header = node->data; header != NULL; header = header->next) {
1206 if (header->serial == serial) {
1207 header->attributes |= RDATASET_ATTR_IGNORE;
1208 make_dirty = ISC_TRUE;
1210 for (dcurrent = header->down;
1212 dcurrent = dcurrent->down) {
1213 if (dcurrent->serial == serial) {
1214 dcurrent->attributes |= RDATASET_ATTR_IGNORE;
1215 make_dirty = ISC_TRUE;
1224 clean_stale_headers(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *top)
1226 rdatasetheader_t *d, *down_next;
1228 for (d = top->down; d != NULL; d = down_next) {
1229 down_next = d->down;
1230 free_rdataset(rbtdb, mctx, d);
1236 clean_cache_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1237 rdatasetheader_t *current, *top_prev, *top_next;
1238 isc_mem_t *mctx = rbtdb->common.mctx;
1241 * Caller must be holding the node lock.
1245 for (current = node->data; current != NULL; current = top_next) {
1246 top_next = current->next;
1247 clean_stale_headers(rbtdb, mctx, current);
1249 * If current is nonexistent or stale, we can clean it up.
1251 if ((current->attributes &
1252 (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0) {
1253 if (top_prev != NULL)
1254 top_prev->next = current->next;
1256 node->data = current->next;
1257 free_rdataset(rbtdb, mctx, current);
1265 clean_zone_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1266 rbtdb_serial_t least_serial)
1268 rdatasetheader_t *current, *dcurrent, *down_next, *dparent;
1269 rdatasetheader_t *top_prev, *top_next;
1270 isc_mem_t *mctx = rbtdb->common.mctx;
1271 isc_boolean_t still_dirty = ISC_FALSE;
1274 * Caller must be holding the node lock.
1276 REQUIRE(least_serial != 0);
1279 for (current = node->data; current != NULL; current = top_next) {
1280 top_next = current->next;
1283 * First, we clean up any instances of multiple rdatasets
1284 * with the same serial number, or that have the IGNORE
1288 for (dcurrent = current->down;
1290 dcurrent = down_next) {
1291 down_next = dcurrent->down;
1292 INSIST(dcurrent->serial <= dparent->serial);
1293 if (dcurrent->serial == dparent->serial ||
1295 if (down_next != NULL)
1296 down_next->next = dparent;
1297 dparent->down = down_next;
1298 free_rdataset(rbtdb, mctx, dcurrent);
1304 * We've now eliminated all IGNORE datasets with the possible
1305 * exception of current, which we now check.
1307 if (IGNORE(current)) {
1308 down_next = current->down;
1309 if (down_next == NULL) {
1310 if (top_prev != NULL)
1311 top_prev->next = current->next;
1313 node->data = current->next;
1314 free_rdataset(rbtdb, mctx, current);
1316 * current no longer exists, so we can
1317 * just continue with the loop.
1322 * Pull up current->down, making it the new
1325 if (top_prev != NULL)
1326 top_prev->next = down_next;
1328 node->data = down_next;
1329 down_next->next = top_next;
1330 free_rdataset(rbtdb, mctx, current);
1331 current = down_next;
1336 * We now try to find the first down node less than the
1340 for (dcurrent = current->down;
1342 dcurrent = down_next) {
1343 down_next = dcurrent->down;
1344 if (dcurrent->serial < least_serial)
1350 * If there is a such an rdataset, delete it and any older
1353 if (dcurrent != NULL) {
1355 down_next = dcurrent->down;
1356 INSIST(dcurrent->serial <= least_serial);
1357 free_rdataset(rbtdb, mctx, dcurrent);
1358 dcurrent = down_next;
1359 } while (dcurrent != NULL);
1360 dparent->down = NULL;
1364 * Note. The serial number of 'current' might be less than
1365 * least_serial too, but we cannot delete it because it is
1366 * the most recent version, unless it is a NONEXISTENT
1369 if (current->down != NULL) {
1370 still_dirty = ISC_TRUE;
1374 * If this is a NONEXISTENT rdataset, we can delete it.
1376 if (NONEXISTENT(current)) {
1377 if (top_prev != NULL)
1378 top_prev->next = current->next;
1380 node->data = current->next;
1381 free_rdataset(rbtdb, mctx, current);
1391 * Clean up dead nodes. These are nodes which have no references, and
1392 * have no data. They are dead but we could not or chose not to delete
1393 * them when we deleted all the data at that node because we did not want
1394 * to wait for the tree write lock.
1396 * The caller must hold a tree write lock and bucketnum'th node (write) lock.
1399 cleanup_dead_nodes(dns_rbtdb_t *rbtdb, int bucketnum) {
1400 dns_rbtnode_t *node;
1401 isc_result_t result;
1402 int count = 10; /* XXXJT: should be adjustable */
1404 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1405 while (node != NULL && count > 0) {
1406 ISC_LIST_UNLINK(rbtdb->deadnodes[bucketnum], node, deadlink);
1409 * Since we're holding a tree write lock, it should be
1410 * impossible for this node to be referenced by others.
1412 INSIST(dns_rbtnode_refcurrent(node) == 0 &&
1413 node->data == NULL);
1415 result = dns_rbt_deletenode(rbtdb->tree, node, ISC_FALSE);
1416 if (result != ISC_R_SUCCESS)
1417 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1418 DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
1419 "cleanup_dead_nodes: "
1420 "dns_rbt_deletenode: %s",
1421 isc_result_totext(result));
1422 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1428 * Caller must be holding the node lock if its reference must be protected
1432 new_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1433 unsigned int lockrefs, noderefs;
1434 isc_refcount_t *lockref;
1436 dns_rbtnode_refincrement0(node, &noderefs);
1437 if (noderefs == 1) { /* this is the first reference to the node */
1438 lockref = &rbtdb->node_locks[node->locknum].references;
1439 isc_refcount_increment0(lockref, &lockrefs);
1440 INSIST(lockrefs != 0);
1442 INSIST(noderefs != 0);
1446 * This function is assumed to be called when a node is newly referenced
1447 * and can be in the deadnode list. In that case the node must be retrieved
1448 * from the list because it is going to be used. In addition, if the caller
1449 * happens to hold a write lock on the tree, it's a good chance to purge dead
1451 * Note: while a new reference is gained in multiple places, there are only very
1452 * few cases where the node can be in the deadnode list (only empty nodes can
1453 * have been added to the list).
1456 reactivate_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1457 isc_rwlocktype_t treelocktype)
1459 isc_boolean_t need_relock = ISC_FALSE;
1461 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
1462 new_reference(rbtdb, node);
1464 NODE_WEAKLOCK(&rbtdb->node_locks[node->locknum].lock,
1465 isc_rwlocktype_read);
1466 if (ISC_LINK_LINKED(node, deadlink))
1467 need_relock = ISC_TRUE;
1468 else if (!ISC_LIST_EMPTY(rbtdb->deadnodes[node->locknum]) &&
1469 treelocktype == isc_rwlocktype_write)
1470 need_relock = ISC_TRUE;
1471 NODE_WEAKUNLOCK(&rbtdb->node_locks[node->locknum].lock,
1472 isc_rwlocktype_read);
1474 NODE_WEAKLOCK(&rbtdb->node_locks[node->locknum].lock,
1475 isc_rwlocktype_write);
1476 if (ISC_LINK_LINKED(node, deadlink))
1477 ISC_LIST_UNLINK(rbtdb->deadnodes[node->locknum],
1479 if (treelocktype == isc_rwlocktype_write)
1480 cleanup_dead_nodes(rbtdb, node->locknum);
1481 NODE_WEAKUNLOCK(&rbtdb->node_locks[node->locknum].lock,
1482 isc_rwlocktype_write);
1485 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
1489 * Caller must be holding the node lock; either the "strong", read or write
1490 * lock. Note that the lock must be held even when node references are
1491 * atomically modified; in that case the decrement operation itself does not
1492 * have to be protected, but we must avoid a race condition where multiple
1493 * threads are decreasing the reference to zero simultaneously and at least
1494 * one of them is going to free the node.
1495 * This function returns ISC_TRUE if and only if the node reference decreases
1498 static isc_boolean_t
1499 decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1500 rbtdb_serial_t least_serial,
1501 isc_rwlocktype_t nlock, isc_rwlocktype_t tlock,
1502 isc_boolean_t pruning)
1504 isc_result_t result;
1505 isc_boolean_t write_locked;
1506 rbtdb_nodelock_t *nodelock;
1507 unsigned int refs, nrefs;
1508 int bucket = node->locknum;
1509 isc_boolean_t no_reference;
1511 nodelock = &rbtdb->node_locks[bucket];
1513 /* Handle easy and typical case first. */
1514 if (!node->dirty && (node->data != NULL || node->down != NULL)) {
1515 dns_rbtnode_refdecrement(node, &nrefs);
1516 INSIST((int)nrefs >= 0);
1518 isc_refcount_decrement(&nodelock->references, &refs);
1519 INSIST((int)refs >= 0);
1521 return ((nrefs == 0) ? ISC_TRUE : ISC_FALSE);
1524 /* Upgrade the lock? */
1525 if (nlock == isc_rwlocktype_read) {
1526 NODE_WEAKUNLOCK(&nodelock->lock, isc_rwlocktype_read);
1527 NODE_WEAKLOCK(&nodelock->lock, isc_rwlocktype_write);
1529 dns_rbtnode_refdecrement(node, &nrefs);
1530 INSIST((int)nrefs >= 0);
1532 /* Restore the lock? */
1533 if (nlock == isc_rwlocktype_read)
1534 NODE_WEAKDOWNGRADE(&nodelock->lock);
1538 if (node->dirty && dns_rbtnode_refcurrent(node) == 0) {
1539 if (IS_CACHE(rbtdb))
1540 clean_cache_node(rbtdb, node);
1542 if (least_serial == 0) {
1544 * Caller doesn't know the least serial.
1547 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1548 least_serial = rbtdb->least_serial;
1549 RBTDB_UNLOCK(&rbtdb->lock,
1550 isc_rwlocktype_read);
1552 clean_zone_node(rbtdb, node, least_serial);
1556 isc_refcount_decrement(&nodelock->references, &refs);
1557 INSIST((int)refs >= 0);
1560 * XXXDCL should this only be done for cache zones?
1562 if (node->data != NULL || node->down != NULL) {
1563 /* Restore the lock? */
1564 if (nlock == isc_rwlocktype_read)
1565 NODE_WEAKDOWNGRADE(&nodelock->lock);
1570 * Attempt to switch to a write lock on the tree. If this fails,
1571 * we will add this node to a linked list of nodes in this locking
1572 * bucket which we will free later.
1574 if (tlock != isc_rwlocktype_write) {
1576 * Locking hierarchy notwithstanding, we don't need to free
1577 * the node lock before acquiring the tree write lock because
1578 * we only do a trylock.
1580 if (tlock == isc_rwlocktype_read)
1581 result = isc_rwlock_tryupgrade(&rbtdb->tree_lock);
1583 result = isc_rwlock_trylock(&rbtdb->tree_lock,
1584 isc_rwlocktype_write);
1585 RUNTIME_CHECK(result == ISC_R_SUCCESS ||
1586 result == ISC_R_LOCKBUSY);
1588 write_locked = ISC_TF(result == ISC_R_SUCCESS);
1590 write_locked = ISC_TRUE;
1592 no_reference = ISC_TRUE;
1593 if (write_locked && dns_rbtnode_refcurrent(node) == 0) {
1595 * We can now delete the node if the reference counter is
1596 * zero. This should be typically the case, but a different
1597 * thread may still gain a (new) reference just before the
1598 * current thread locks the tree (e.g., in findnode()).
1602 * If this node is the only one in the level it's in, deleting
1603 * this node may recursively make its parent the only node in
1604 * the parent level; if so, and if no one is currently using
1605 * the parent node, this is almost the only opportunity to
1606 * clean it up. But the recursive cleanup is not that trivial
1607 * since the child and parent may be in different lock buckets,
1608 * which would cause a lock order reversal problem. To avoid
1609 * the trouble, we'll dispatch a separate event for batch
1610 * cleaning. We need to check whether we're deleting the node
1611 * as a result of pruning to avoid infinite dispatching.
1612 * Note: pruning happens only when a task has been set for the
1613 * rbtdb. If the user of the rbtdb chooses not to set a task,
1614 * it's their responsibility to purge stale leaves (e.g. by
1615 * periodic walk-through).
1617 if (!pruning && node->parent != NULL &&
1618 node->parent->down == node && node->left == NULL &&
1619 node->right == NULL && rbtdb->task != NULL) {
1623 ev = isc_event_allocate(rbtdb->common.mctx, NULL,
1626 sizeof(isc_event_t));
1628 new_reference(rbtdb, node);
1630 attach((dns_db_t *)rbtdb, &db);
1632 isc_task_send(rbtdb->task, &ev);
1633 no_reference = ISC_FALSE;
1636 * XXX: this is a weird situation. We could
1637 * ignore this error case, but then the stale
1638 * node will unlikely be purged except via a
1639 * rare condition such as manual cleanup. So
1640 * we queue it in the deadnodes list, hoping
1641 * the memory shortage is temporary and the node
1642 * will be deleted later.
1644 isc_log_write(dns_lctx,
1645 DNS_LOGCATEGORY_DATABASE,
1646 DNS_LOGMODULE_CACHE,
1648 "decrement_reference: failed to "
1649 "allocate pruning event");
1650 INSIST(!ISC_LINK_LINKED(node, deadlink));
1651 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node,
1655 if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) {
1656 char printname[DNS_NAME_FORMATSIZE];
1658 isc_log_write(dns_lctx,
1659 DNS_LOGCATEGORY_DATABASE,
1660 DNS_LOGMODULE_CACHE,
1662 "decrement_reference: "
1663 "delete from rbt: %p %s",
1665 dns_rbt_formatnodename(node,
1667 sizeof(printname)));
1670 INSIST(!ISC_LINK_LINKED(node, deadlink));
1671 result = dns_rbt_deletenode(rbtdb->tree, node,
1673 if (result != ISC_R_SUCCESS) {
1674 isc_log_write(dns_lctx,
1675 DNS_LOGCATEGORY_DATABASE,
1676 DNS_LOGMODULE_CACHE,
1678 "decrement_reference: "
1679 "dns_rbt_deletenode: %s",
1680 isc_result_totext(result));
1683 } else if (dns_rbtnode_refcurrent(node) == 0) {
1684 INSIST(!ISC_LINK_LINKED(node, deadlink));
1685 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node, deadlink);
1688 /* Restore the lock? */
1689 if (nlock == isc_rwlocktype_read)
1690 NODE_WEAKDOWNGRADE(&nodelock->lock);
1693 * Relock a read lock, or unlock the write lock if no lock was held.
1695 if (tlock == isc_rwlocktype_none)
1697 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1699 if (tlock == isc_rwlocktype_read)
1701 isc_rwlock_downgrade(&rbtdb->tree_lock);
1703 return (no_reference);
1707 * Prune the tree by recursively cleaning-up single leaves. In the worst
1708 * case, the number of iteration is the number of tree levels, which is at
1709 * most the maximum number of domain name labels, i.e, 127. In practice, this
1710 * should be much smaller (only a few times), and even the worst case would be
1711 * acceptable for a single event.
1714 prune_tree(isc_task_t *task, isc_event_t *event) {
1715 dns_rbtdb_t *rbtdb = event->ev_sender;
1716 dns_rbtnode_t *node = event->ev_arg;
1717 dns_rbtnode_t *parent;
1718 unsigned int locknum;
1722 isc_event_free(&event);
1724 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1725 locknum = node->locknum;
1726 NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1728 parent = node->parent;
1729 decrement_reference(rbtdb, node, 0, isc_rwlocktype_write,
1730 isc_rwlocktype_write, ISC_TRUE);
1732 if (parent != NULL && parent->down == NULL) {
1734 * node was the only down child of the parent and has
1735 * just been removed. We'll then need to examine the
1736 * parent. Keep the lock if possible; otherwise,
1737 * release the old lock and acquire one for the parent.
1739 if (parent->locknum != locknum) {
1740 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
1741 isc_rwlocktype_write);
1742 locknum = parent->locknum;
1743 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
1744 isc_rwlocktype_write);
1748 * We need to gain a reference to the node before
1749 * decrementing it in the next iteration. In addition,
1750 * if the node is in the dead-nodes list, extract it
1751 * from the list beforehand as we do in
1752 * reactivate_node().
1754 new_reference(rbtdb, parent);
1755 if (ISC_LINK_LINKED(parent, deadlink)) {
1756 ISC_LIST_UNLINK(rbtdb->deadnodes[locknum],
1763 } while (node != NULL);
1764 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1765 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1767 detach((dns_db_t **)&rbtdb);
1771 make_least_version(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1772 rbtdb_changedlist_t *cleanup_list)
1775 * Caller must be holding the database lock.
1778 rbtdb->least_serial = version->serial;
1779 *cleanup_list = version->changed_list;
1780 ISC_LIST_INIT(version->changed_list);
1784 cleanup_nondirty(rbtdb_version_t *version, rbtdb_changedlist_t *cleanup_list) {
1785 rbtdb_changed_t *changed, *next_changed;
1788 * If the changed record is dirty, then
1789 * an update created multiple versions of
1790 * a given rdataset. We keep this list
1791 * until we're the least open version, at
1792 * which point it's safe to get rid of any
1795 * If the changed record isn't dirty, then
1796 * we don't need it anymore since we're
1797 * committing and not rolling back.
1799 * The caller must be holding the database lock.
1801 for (changed = HEAD(version->changed_list);
1803 changed = next_changed) {
1804 next_changed = NEXT(changed, link);
1805 if (!changed->dirty) {
1806 UNLINK(version->changed_list,
1808 APPEND(*cleanup_list,
1814 static isc_boolean_t
1815 iszonesecure(dns_db_t *db, dns_dbnode_t *origin) {
1816 dns_rdataset_t keyset;
1817 dns_rdataset_t nsecset, signsecset;
1818 isc_boolean_t haszonekey = ISC_FALSE;
1819 isc_boolean_t hasnsec = ISC_FALSE;
1820 isc_result_t result;
1822 dns_rdataset_init(&keyset);
1823 result = dns_db_findrdataset(db, origin, NULL, dns_rdatatype_dnskey, 0,
1825 if (result == ISC_R_SUCCESS) {
1826 dns_rdata_t keyrdata = DNS_RDATA_INIT;
1827 result = dns_rdataset_first(&keyset);
1828 while (result == ISC_R_SUCCESS) {
1829 dns_rdataset_current(&keyset, &keyrdata);
1830 if (dns_zonekey_iszonekey(&keyrdata)) {
1831 haszonekey = ISC_TRUE;
1834 result = dns_rdataset_next(&keyset);
1836 dns_rdataset_disassociate(&keyset);
1841 dns_rdataset_init(&nsecset);
1842 dns_rdataset_init(&signsecset);
1843 result = dns_db_findrdataset(db, origin, NULL, dns_rdatatype_nsec, 0,
1844 0, &nsecset, &signsecset);
1845 if (result == ISC_R_SUCCESS) {
1846 if (dns_rdataset_isassociated(&signsecset)) {
1848 dns_rdataset_disassociate(&signsecset);
1850 dns_rdataset_disassociate(&nsecset);
1856 closeversion(dns_db_t *db, dns_dbversion_t **versionp, isc_boolean_t commit) {
1857 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1858 rbtdb_version_t *version, *cleanup_version, *least_greater;
1859 isc_boolean_t rollback = ISC_FALSE;
1860 rbtdb_changedlist_t cleanup_list;
1861 rbtdb_changed_t *changed, *next_changed;
1862 rbtdb_serial_t serial, least_serial;
1863 dns_rbtnode_t *rbtnode;
1865 isc_boolean_t writer;
1867 REQUIRE(VALID_RBTDB(rbtdb));
1868 version = (rbtdb_version_t *)*versionp;
1870 cleanup_version = NULL;
1871 ISC_LIST_INIT(cleanup_list);
1873 isc_refcount_decrement(&version->references, &refs);
1874 if (refs > 0) { /* typical and easy case first */
1876 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1877 INSIST(!version->writer);
1878 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
1883 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1884 serial = version->serial;
1885 writer = version->writer;
1886 if (version->writer) {
1889 rbtdb_version_t *cur_version;
1891 INSIST(version->commit_ok);
1892 INSIST(version == rbtdb->future_version);
1894 * The current version is going to be replaced.
1895 * Release the (likely last) reference to it from the
1896 * DB itself and unlink it from the open list.
1898 cur_version = rbtdb->current_version;
1899 isc_refcount_decrement(&cur_version->references,
1902 if (cur_version->serial == rbtdb->least_serial)
1903 INSIST(EMPTY(cur_version->changed_list));
1904 UNLINK(rbtdb->open_versions,
1907 if (EMPTY(rbtdb->open_versions)) {
1909 * We're going to become the least open
1912 make_least_version(rbtdb, version,
1916 * Some other open version is the
1917 * least version. We can't cleanup
1918 * records that were changed in this
1919 * version because the older versions
1920 * may still be in use by an open
1923 * We can, however, discard the
1924 * changed records for things that
1925 * we've added that didn't exist in
1928 cleanup_nondirty(version, &cleanup_list);
1931 * If the (soon to be former) current version
1932 * isn't being used by anyone, we can clean
1936 cleanup_version = cur_version;
1937 APPENDLIST(version->changed_list,
1938 cleanup_version->changed_list,
1942 * Become the current version.
1944 version->writer = ISC_FALSE;
1945 rbtdb->current_version = version;
1946 rbtdb->current_serial = version->serial;
1947 rbtdb->future_version = NULL;
1950 * Keep the current version in the open list, and
1951 * gain a reference for the DB itself (see the DB
1952 * creation function below). This must be the only
1953 * case where we need to increment the counter from
1954 * zero and need to use isc_refcount_increment0().
1956 isc_refcount_increment0(&version->references,
1958 INSIST(cur_ref == 1);
1959 PREPEND(rbtdb->open_versions,
1960 rbtdb->current_version, link);
1963 * We're rolling back this transaction.
1965 cleanup_list = version->changed_list;
1966 ISC_LIST_INIT(version->changed_list);
1967 rollback = ISC_TRUE;
1968 cleanup_version = version;
1969 rbtdb->future_version = NULL;
1972 if (version != rbtdb->current_version) {
1974 * There are no external or internal references
1975 * to this version and it can be cleaned up.
1977 cleanup_version = version;
1980 * Find the version with the least serial
1981 * number greater than ours.
1983 least_greater = PREV(version, link);
1984 if (least_greater == NULL)
1985 least_greater = rbtdb->current_version;
1987 INSIST(version->serial < least_greater->serial);
1989 * Is this the least open version?
1991 if (version->serial == rbtdb->least_serial) {
1993 * Yes. Install the new least open
1996 make_least_version(rbtdb,
2001 * Add any unexecuted cleanups to
2002 * those of the least greater version.
2004 APPENDLIST(least_greater->changed_list,
2005 version->changed_list,
2008 } else if (version->serial == rbtdb->least_serial)
2009 INSIST(EMPTY(version->changed_list));
2010 UNLINK(rbtdb->open_versions, version, link);
2012 least_serial = rbtdb->least_serial;
2013 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
2016 * Update the zone's secure status.
2018 if (writer && commit && !IS_CACHE(rbtdb))
2019 rbtdb->secure = iszonesecure(db, rbtdb->origin_node);
2021 if (cleanup_version != NULL) {
2022 INSIST(EMPTY(cleanup_version->changed_list));
2023 isc_mem_put(rbtdb->common.mctx, cleanup_version,
2024 sizeof(*cleanup_version));
2027 if (!EMPTY(cleanup_list)) {
2029 * We acquire a tree write lock here in order to make sure
2030 * that stale nodes will be removed in decrement_reference().
2031 * If we didn't have the lock, those nodes could miss the
2032 * chance to be removed until the server stops. The write lock
2033 * is expensive, but this event should be rare enough to justify
2036 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2037 for (changed = HEAD(cleanup_list);
2039 changed = next_changed) {
2042 next_changed = NEXT(changed, link);
2043 rbtnode = changed->node;
2044 lock = &rbtdb->node_locks[rbtnode->locknum].lock;
2046 NODE_LOCK(lock, isc_rwlocktype_write);
2048 * This is a good opportunity to purge any dead nodes,
2051 cleanup_dead_nodes(rbtdb, rbtnode->locknum);
2054 rollback_node(rbtnode, serial);
2055 decrement_reference(rbtdb, rbtnode, least_serial,
2056 isc_rwlocktype_write,
2057 isc_rwlocktype_write, ISC_FALSE);
2059 NODE_UNLOCK(lock, isc_rwlocktype_write);
2061 isc_mem_put(rbtdb->common.mctx, changed,
2064 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2072 * Add the necessary magic for the wildcard name 'name'
2073 * to be found in 'rbtdb'.
2075 * In order for wildcard matching to work correctly in
2076 * zone_find(), we must ensure that a node for the wildcarding
2077 * level exists in the database, and has its 'find_callback'
2078 * and 'wild' bits set.
2080 * E.g. if the wildcard name is "*.sub.example." then we
2081 * must ensure that "sub.example." exists and is marked as
2085 add_wildcard_magic(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2086 isc_result_t result;
2087 dns_name_t foundname;
2088 dns_offsets_t offsets;
2090 dns_rbtnode_t *node = NULL;
2092 dns_name_init(&foundname, offsets);
2093 n = dns_name_countlabels(name);
2096 dns_name_getlabelsequence(name, 1, n, &foundname);
2097 result = dns_rbt_addnode(rbtdb->tree, &foundname, &node);
2098 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2100 node->find_callback = 1;
2102 return (ISC_R_SUCCESS);
2106 add_empty_wildcards(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2107 isc_result_t result;
2108 dns_name_t foundname;
2109 dns_offsets_t offsets;
2110 unsigned int n, l, i;
2112 dns_name_init(&foundname, offsets);
2113 n = dns_name_countlabels(name);
2114 l = dns_name_countlabels(&rbtdb->common.origin);
2117 dns_rbtnode_t *node = NULL; /* dummy */
2118 dns_name_getlabelsequence(name, n - i, i, &foundname);
2119 if (dns_name_iswildcard(&foundname)) {
2120 result = add_wildcard_magic(rbtdb, &foundname);
2121 if (result != ISC_R_SUCCESS)
2123 result = dns_rbt_addnode(rbtdb->tree, &foundname,
2125 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2130 return (ISC_R_SUCCESS);
2134 findnode(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2135 dns_dbnode_t **nodep)
2137 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2138 dns_rbtnode_t *node = NULL;
2139 dns_name_t nodename;
2140 isc_result_t result;
2141 isc_rwlocktype_t locktype = isc_rwlocktype_read;
2143 REQUIRE(VALID_RBTDB(rbtdb));
2145 dns_name_init(&nodename, NULL);
2146 RWLOCK(&rbtdb->tree_lock, locktype);
2147 result = dns_rbt_findnode(rbtdb->tree, name, NULL, &node, NULL,
2148 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2149 if (result != ISC_R_SUCCESS) {
2150 RWUNLOCK(&rbtdb->tree_lock, locktype);
2152 if (result == DNS_R_PARTIALMATCH)
2153 result = ISC_R_NOTFOUND;
2157 * It would be nice to try to upgrade the lock instead of
2158 * unlocking then relocking.
2160 locktype = isc_rwlocktype_write;
2161 RWLOCK(&rbtdb->tree_lock, locktype);
2163 result = dns_rbt_addnode(rbtdb->tree, name, &node);
2164 if (result == ISC_R_SUCCESS) {
2165 dns_rbt_namefromnode(node, &nodename);
2166 #ifdef DNS_RBT_USEHASH
2167 node->locknum = node->hashval % rbtdb->node_lock_count;
2169 node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2170 rbtdb->node_lock_count;
2172 add_empty_wildcards(rbtdb, name);
2174 if (dns_name_iswildcard(name)) {
2175 result = add_wildcard_magic(rbtdb, name);
2176 if (result != ISC_R_SUCCESS) {
2177 RWUNLOCK(&rbtdb->tree_lock, locktype);
2181 } else if (result != ISC_R_EXISTS) {
2182 RWUNLOCK(&rbtdb->tree_lock, locktype);
2186 reactivate_node(rbtdb, node, locktype);
2187 RWUNLOCK(&rbtdb->tree_lock, locktype);
2189 *nodep = (dns_dbnode_t *)node;
2191 return (ISC_R_SUCCESS);
2195 zone_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
2196 rbtdb_search_t *search = arg;
2197 rdatasetheader_t *header, *header_next;
2198 rdatasetheader_t *dname_header, *sigdname_header, *ns_header;
2199 rdatasetheader_t *found;
2200 isc_result_t result;
2201 dns_rbtnode_t *onode;
2204 * We only want to remember the topmost zone cut, since it's the one
2205 * that counts, so we'll just continue if we've already found a
2208 if (search->zonecut != NULL)
2209 return (DNS_R_CONTINUE);
2212 result = DNS_R_CONTINUE;
2213 onode = search->rbtdb->origin_node;
2215 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2216 isc_rwlocktype_read);
2219 * Look for an NS or DNAME rdataset active in our version.
2222 dname_header = NULL;
2223 sigdname_header = NULL;
2224 for (header = node->data; header != NULL; header = header_next) {
2225 header_next = header->next;
2226 if (header->type == dns_rdatatype_ns ||
2227 header->type == dns_rdatatype_dname ||
2228 header->type == RBTDB_RDATATYPE_SIGDNAME) {
2230 if (header->serial <= search->serial &&
2233 * Is this a "this rdataset doesn't
2236 if (NONEXISTENT(header))
2240 header = header->down;
2241 } while (header != NULL);
2242 if (header != NULL) {
2243 if (header->type == dns_rdatatype_dname)
2244 dname_header = header;
2245 else if (header->type ==
2246 RBTDB_RDATATYPE_SIGDNAME)
2247 sigdname_header = header;
2248 else if (node != onode ||
2249 IS_STUB(search->rbtdb)) {
2251 * We've found an NS rdataset that
2252 * isn't at the origin node. We check
2253 * that they're not at the origin node,
2254 * because otherwise we'd erroneously
2255 * treat the zone top as if it were
2265 * Did we find anything?
2267 if (dname_header != NULL) {
2269 * Note that DNAME has precedence over NS if both exist.
2271 found = dname_header;
2272 search->zonecut_sigrdataset = sigdname_header;
2273 } else if (ns_header != NULL) {
2275 search->zonecut_sigrdataset = NULL;
2278 if (found != NULL) {
2280 * We increment the reference count on node to ensure that
2281 * search->zonecut_rdataset will still be valid later.
2283 new_reference(search->rbtdb, node);
2284 search->zonecut = node;
2285 search->zonecut_rdataset = found;
2286 search->need_cleanup = ISC_TRUE;
2288 * Since we've found a zonecut, anything beneath it is
2289 * glue and is not subject to wildcard matching, so we
2290 * may clear search->wild.
2292 search->wild = ISC_FALSE;
2293 if ((search->options & DNS_DBFIND_GLUEOK) == 0) {
2295 * If the caller does not want to find glue, then
2296 * this is the best answer and the search should
2299 result = DNS_R_PARTIALMATCH;
2304 * The search will continue beneath the zone cut.
2305 * This may or may not be the best match. In case it
2306 * is, we need to remember the node name.
2308 zcname = dns_fixedname_name(&search->zonecut_name);
2309 RUNTIME_CHECK(dns_name_copy(name, zcname, NULL) ==
2311 search->copy_name = ISC_TRUE;
2315 * There is no zonecut at this node which is active in this
2318 * If this is a "wild" node and the caller hasn't disabled
2319 * wildcard matching, remember that we've seen a wild node
2320 * in case we need to go searching for wildcard matches
2323 if (node->wild && (search->options & DNS_DBFIND_NOWILD) == 0)
2324 search->wild = ISC_TRUE;
2327 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2328 isc_rwlocktype_read);
2334 bind_rdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
2335 rdatasetheader_t *header, isc_stdtime_t now,
2336 dns_rdataset_t *rdataset)
2338 unsigned char *raw; /* RDATASLAB */
2341 * Caller must be holding the node reader lock.
2342 * XXXJT: technically, we need a writer lock, since we'll increment
2343 * the header count below. However, since the actual counter value
2344 * doesn't matter, we prioritize performance here. (We may want to
2345 * use atomic increment when available).
2348 if (rdataset == NULL)
2351 new_reference(rbtdb, node);
2353 INSIST(rdataset->methods == NULL); /* We must be disassociated. */
2355 rdataset->methods = &rdataset_methods;
2356 rdataset->rdclass = rbtdb->common.rdclass;
2357 rdataset->type = RBTDB_RDATATYPE_BASE(header->type);
2358 rdataset->covers = RBTDB_RDATATYPE_EXT(header->type);
2359 rdataset->ttl = header->rdh_ttl - now;
2360 rdataset->trust = header->trust;
2361 if (NXDOMAIN(header))
2362 rdataset->attributes |= DNS_RDATASETATTR_NXDOMAIN;
2363 rdataset->private1 = rbtdb;
2364 rdataset->private2 = node;
2365 raw = (unsigned char *)header + sizeof(*header);
2366 rdataset->private3 = raw;
2367 rdataset->count = header->count++;
2368 if (rdataset->count == ISC_UINT32_MAX)
2369 rdataset->count = 0;
2372 * Reset iterator state.
2374 rdataset->privateuint4 = 0;
2375 rdataset->private5 = NULL;
2378 * Add noqname proof.
2380 rdataset->private6 = header->noqname;
2381 if (rdataset->private6 != NULL)
2382 rdataset->attributes |= DNS_RDATASETATTR_NOQNAME;
2385 static inline isc_result_t
2386 setup_delegation(rbtdb_search_t *search, dns_dbnode_t **nodep,
2387 dns_name_t *foundname, dns_rdataset_t *rdataset,
2388 dns_rdataset_t *sigrdataset)
2390 isc_result_t result;
2392 rbtdb_rdatatype_t type;
2393 dns_rbtnode_t *node;
2396 * The caller MUST NOT be holding any node locks.
2399 node = search->zonecut;
2400 type = search->zonecut_rdataset->type;
2403 * If we have to set foundname, we do it before anything else.
2404 * If we were to set foundname after we had set nodep or bound the
2405 * rdataset, then we'd have to undo that work if dns_name_copy()
2406 * failed. By setting foundname first, there's nothing to undo if
2409 if (foundname != NULL && search->copy_name) {
2410 zcname = dns_fixedname_name(&search->zonecut_name);
2411 result = dns_name_copy(zcname, foundname, NULL);
2412 if (result != ISC_R_SUCCESS)
2415 if (nodep != NULL) {
2417 * Note that we don't have to increment the node's reference
2418 * count here because we're going to use the reference we
2419 * already have in the search block.
2422 search->need_cleanup = ISC_FALSE;
2424 if (rdataset != NULL) {
2425 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2426 isc_rwlocktype_read);
2427 bind_rdataset(search->rbtdb, node, search->zonecut_rdataset,
2428 search->now, rdataset);
2429 if (sigrdataset != NULL && search->zonecut_sigrdataset != NULL)
2430 bind_rdataset(search->rbtdb, node,
2431 search->zonecut_sigrdataset,
2432 search->now, sigrdataset);
2433 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2434 isc_rwlocktype_read);
2437 if (type == dns_rdatatype_dname)
2438 return (DNS_R_DNAME);
2439 return (DNS_R_DELEGATION);
2442 static inline isc_boolean_t
2443 valid_glue(rbtdb_search_t *search, dns_name_t *name, rbtdb_rdatatype_t type,
2444 dns_rbtnode_t *node)
2446 unsigned char *raw; /* RDATASLAB */
2447 unsigned int count, size;
2449 isc_boolean_t valid = ISC_FALSE;
2450 dns_offsets_t offsets;
2451 isc_region_t region;
2452 rdatasetheader_t *header;
2455 * No additional locking is required.
2459 * Valid glue types are A, AAAA, A6. NS is also a valid glue type
2460 * if it occurs at a zone cut, but is not valid below it.
2462 if (type == dns_rdatatype_ns) {
2463 if (node != search->zonecut) {
2466 } else if (type != dns_rdatatype_a &&
2467 type != dns_rdatatype_aaaa &&
2468 type != dns_rdatatype_a6) {
2472 header = search->zonecut_rdataset;
2473 raw = (unsigned char *)header + sizeof(*header);
2474 count = raw[0] * 256 + raw[1];
2475 #if DNS_RDATASET_FIXED
2476 raw += 2 + (4 * count);
2483 size = raw[0] * 256 + raw[1];
2484 #if DNS_RDATASET_FIXED
2490 region.length = size;
2493 * XXX Until we have rdata structures, we have no choice but
2494 * to directly access the rdata format.
2496 dns_name_init(&ns_name, offsets);
2497 dns_name_fromregion(&ns_name, ®ion);
2498 if (dns_name_compare(&ns_name, name) == 0) {
2507 static inline isc_boolean_t
2508 activeempty(rbtdb_search_t *search, dns_rbtnodechain_t *chain,
2511 dns_fixedname_t fnext;
2512 dns_fixedname_t forigin;
2517 dns_rbtnode_t *node;
2518 isc_result_t result;
2519 isc_boolean_t answer = ISC_FALSE;
2520 rdatasetheader_t *header;
2522 rbtdb = search->rbtdb;
2524 dns_name_init(&prefix, NULL);
2525 dns_fixedname_init(&fnext);
2526 next = dns_fixedname_name(&fnext);
2527 dns_fixedname_init(&forigin);
2528 origin = dns_fixedname_name(&forigin);
2530 result = dns_rbtnodechain_next(chain, NULL, NULL);
2531 while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
2533 result = dns_rbtnodechain_current(chain, &prefix,
2535 if (result != ISC_R_SUCCESS)
2537 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2538 isc_rwlocktype_read);
2539 for (header = node->data;
2541 header = header->next) {
2542 if (header->serial <= search->serial &&
2543 !IGNORE(header) && EXISTS(header))
2546 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2547 isc_rwlocktype_read);
2550 result = dns_rbtnodechain_next(chain, NULL, NULL);
2552 if (result == ISC_R_SUCCESS)
2553 result = dns_name_concatenate(&prefix, origin, next, NULL);
2554 if (result == ISC_R_SUCCESS && dns_name_issubdomain(next, name))
2559 static inline isc_boolean_t
2560 activeemtpynode(rbtdb_search_t *search, dns_name_t *qname, dns_name_t *wname) {
2561 dns_fixedname_t fnext;
2562 dns_fixedname_t forigin;
2563 dns_fixedname_t fprev;
2571 dns_rbtnode_t *node;
2572 dns_rbtnodechain_t chain;
2573 isc_boolean_t check_next = ISC_TRUE;
2574 isc_boolean_t check_prev = ISC_TRUE;
2575 isc_boolean_t answer = ISC_FALSE;
2576 isc_result_t result;
2577 rdatasetheader_t *header;
2580 rbtdb = search->rbtdb;
2582 dns_name_init(&name, NULL);
2583 dns_name_init(&tname, NULL);
2584 dns_name_init(&rname, NULL);
2585 dns_fixedname_init(&fnext);
2586 next = dns_fixedname_name(&fnext);
2587 dns_fixedname_init(&fprev);
2588 prev = dns_fixedname_name(&fprev);
2589 dns_fixedname_init(&forigin);
2590 origin = dns_fixedname_name(&forigin);
2593 * Find if qname is at or below a empty node.
2594 * Use our own copy of the chain.
2597 chain = search->chain;
2600 result = dns_rbtnodechain_current(&chain, &name,
2602 if (result != ISC_R_SUCCESS)
2604 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2605 isc_rwlocktype_read);
2606 for (header = node->data;
2608 header = header->next) {
2609 if (header->serial <= search->serial &&
2610 !IGNORE(header) && EXISTS(header))
2613 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2614 isc_rwlocktype_read);
2617 result = dns_rbtnodechain_prev(&chain, NULL, NULL);
2618 } while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN);
2619 if (result == ISC_R_SUCCESS)
2620 result = dns_name_concatenate(&name, origin, prev, NULL);
2621 if (result != ISC_R_SUCCESS)
2622 check_prev = ISC_FALSE;
2624 result = dns_rbtnodechain_next(&chain, NULL, NULL);
2625 while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
2627 result = dns_rbtnodechain_current(&chain, &name,
2629 if (result != ISC_R_SUCCESS)
2631 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2632 isc_rwlocktype_read);
2633 for (header = node->data;
2635 header = header->next) {
2636 if (header->serial <= search->serial &&
2637 !IGNORE(header) && EXISTS(header))
2640 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2641 isc_rwlocktype_read);
2644 result = dns_rbtnodechain_next(&chain, NULL, NULL);
2646 if (result == ISC_R_SUCCESS)
2647 result = dns_name_concatenate(&name, origin, next, NULL);
2648 if (result != ISC_R_SUCCESS)
2649 check_next = ISC_FALSE;
2651 dns_name_clone(qname, &rname);
2654 * Remove the wildcard label to find the terminal name.
2656 n = dns_name_countlabels(wname);
2657 dns_name_getlabelsequence(wname, 1, n - 1, &tname);
2660 if ((check_prev && dns_name_issubdomain(prev, &rname)) ||
2661 (check_next && dns_name_issubdomain(next, &rname))) {
2666 * Remove the left hand label.
2668 n = dns_name_countlabels(&rname);
2669 dns_name_getlabelsequence(&rname, 1, n - 1, &rname);
2670 } while (!dns_name_equal(&rname, &tname));
2674 static inline isc_result_t
2675 find_wildcard(rbtdb_search_t *search, dns_rbtnode_t **nodep,
2679 dns_rbtnode_t *node, *level_node, *wnode;
2680 rdatasetheader_t *header;
2681 isc_result_t result = ISC_R_NOTFOUND;
2684 dns_fixedname_t fwname;
2686 isc_boolean_t done, wild, active;
2687 dns_rbtnodechain_t wchain;
2690 * Caller must be holding the tree lock and MUST NOT be holding
2695 * Examine each ancestor level. If the level's wild bit
2696 * is set, then construct the corresponding wildcard name and
2697 * search for it. If the wildcard node exists, and is active in
2698 * this version, we're done. If not, then we next check to see
2699 * if the ancestor is active in this version. If so, then there
2700 * can be no possible wildcard match and again we're done. If not,
2701 * continue the search.
2704 rbtdb = search->rbtdb;
2705 i = search->chain.level_matches;
2709 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2710 isc_rwlocktype_read);
2713 * First we try to figure out if this node is active in
2714 * the search's version. We do this now, even though we
2715 * may not need the information, because it simplifies the
2716 * locking and code flow.
2718 for (header = node->data;
2720 header = header->next) {
2721 if (header->serial <= search->serial &&
2722 !IGNORE(header) && EXISTS(header))
2735 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2736 isc_rwlocktype_read);
2740 * Construct the wildcard name for this level.
2742 dns_name_init(&name, NULL);
2743 dns_rbt_namefromnode(node, &name);
2744 dns_fixedname_init(&fwname);
2745 wname = dns_fixedname_name(&fwname);
2746 result = dns_name_concatenate(dns_wildcardname, &name,
2749 while (result == ISC_R_SUCCESS && j != 0) {
2751 level_node = search->chain.levels[j];
2752 dns_name_init(&name, NULL);
2753 dns_rbt_namefromnode(level_node, &name);
2754 result = dns_name_concatenate(wname,
2759 if (result != ISC_R_SUCCESS)
2763 dns_rbtnodechain_init(&wchain, NULL);
2764 result = dns_rbt_findnode(rbtdb->tree, wname,
2765 NULL, &wnode, &wchain,
2766 DNS_RBTFIND_EMPTYDATA,
2768 if (result == ISC_R_SUCCESS) {
2772 * We have found the wildcard node. If it
2773 * is active in the search's version, we're
2776 lock = &rbtdb->node_locks[wnode->locknum].lock;
2777 NODE_LOCK(lock, isc_rwlocktype_read);
2778 for (header = wnode->data;
2780 header = header->next) {
2781 if (header->serial <= search->serial &&
2782 !IGNORE(header) && EXISTS(header))
2785 NODE_UNLOCK(lock, isc_rwlocktype_read);
2786 if (header != NULL ||
2787 activeempty(search, &wchain, wname)) {
2788 if (activeemtpynode(search, qname,
2790 return (ISC_R_NOTFOUND);
2793 * The wildcard node is active!
2795 * Note: result is still ISC_R_SUCCESS
2796 * so we don't have to set it.
2801 } else if (result != ISC_R_NOTFOUND &&
2802 result != DNS_R_PARTIALMATCH) {
2804 * An error has occurred. Bail out.
2812 * The level node is active. Any wildcarding
2813 * present at higher levels has no
2814 * effect and we're done.
2816 result = ISC_R_NOTFOUND;
2822 node = search->chain.levels[i];
2830 static inline isc_result_t
2831 find_closest_nsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
2832 dns_name_t *foundname, dns_rdataset_t *rdataset,
2833 dns_rdataset_t *sigrdataset, isc_boolean_t need_sig)
2835 dns_rbtnode_t *node;
2836 rdatasetheader_t *header, *header_next, *found, *foundsig;
2837 isc_boolean_t empty_node;
2838 isc_result_t result;
2839 dns_fixedname_t fname, forigin;
2840 dns_name_t *name, *origin;
2844 dns_fixedname_init(&fname);
2845 name = dns_fixedname_name(&fname);
2846 dns_fixedname_init(&forigin);
2847 origin = dns_fixedname_name(&forigin);
2848 result = dns_rbtnodechain_current(&search->chain, name,
2850 if (result != ISC_R_SUCCESS)
2852 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2853 isc_rwlocktype_read);
2856 empty_node = ISC_TRUE;
2857 for (header = node->data;
2859 header = header_next) {
2860 header_next = header->next;
2862 * Look for an active, extant NSEC or RRSIG NSEC.
2865 if (header->serial <= search->serial &&
2868 * Is this a "this rdataset doesn't
2871 if (NONEXISTENT(header))
2875 header = header->down;
2876 } while (header != NULL);
2877 if (header != NULL) {
2879 * We now know that there is at least one
2880 * active rdataset at this node.
2882 empty_node = ISC_FALSE;
2883 if (header->type == dns_rdatatype_nsec) {
2885 if (foundsig != NULL)
2887 } else if (header->type ==
2888 RBTDB_RDATATYPE_SIGNSEC) {
2896 if (found != NULL &&
2897 (foundsig != NULL || !need_sig))
2900 * We've found the right NSEC record.
2902 * Note: for this to really be the right
2903 * NSEC record, it's essential that the NSEC
2904 * records of any nodes obscured by a zone
2905 * cut have been removed; we assume this is
2908 result = dns_name_concatenate(name, origin,
2910 if (result == ISC_R_SUCCESS) {
2911 if (nodep != NULL) {
2912 new_reference(search->rbtdb,
2916 bind_rdataset(search->rbtdb, node,
2919 if (foundsig != NULL)
2920 bind_rdataset(search->rbtdb,
2926 } else if (found == NULL && foundsig == NULL) {
2928 * This node is active, but has no NSEC or
2929 * RRSIG NSEC. That means it's glue or
2930 * other obscured zone data that isn't
2931 * relevant for our search. Treat the
2932 * node as if it were empty and keep looking.
2934 empty_node = ISC_TRUE;
2935 result = dns_rbtnodechain_prev(&search->chain,
2939 * We found an active node, but either the
2940 * NSEC or the RRSIG NSEC is missing. This
2943 result = DNS_R_BADDB;
2947 * This node isn't active. We've got to keep
2950 result = dns_rbtnodechain_prev(&search->chain, NULL,
2953 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2954 isc_rwlocktype_read);
2955 } while (empty_node && result == ISC_R_SUCCESS);
2958 * If the result is ISC_R_NOMORE, then we got to the beginning of
2959 * the database and didn't find a NSEC record. This shouldn't
2962 if (result == ISC_R_NOMORE)
2963 result = DNS_R_BADDB;
2969 zone_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
2970 dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
2971 dns_dbnode_t **nodep, dns_name_t *foundname,
2972 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
2974 dns_rbtnode_t *node = NULL;
2975 isc_result_t result;
2976 rbtdb_search_t search;
2977 isc_boolean_t cname_ok = ISC_TRUE;
2978 isc_boolean_t close_version = ISC_FALSE;
2979 isc_boolean_t maybe_zonecut = ISC_FALSE;
2980 isc_boolean_t at_zonecut = ISC_FALSE;
2982 isc_boolean_t empty_node;
2983 rdatasetheader_t *header, *header_next, *found, *nsecheader;
2984 rdatasetheader_t *foundsig, *cnamesig, *nsecsig;
2985 rbtdb_rdatatype_t sigtype;
2986 isc_boolean_t active;
2987 dns_rbtnodechain_t chain;
2991 search.rbtdb = (dns_rbtdb_t *)db;
2993 REQUIRE(VALID_RBTDB(search.rbtdb));
2996 * We don't care about 'now'.
3001 * If the caller didn't supply a version, attach to the current
3004 if (version == NULL) {
3005 currentversion(db, &version);
3006 close_version = ISC_TRUE;
3009 search.rbtversion = version;
3010 search.serial = search.rbtversion->serial;
3011 search.options = options;
3012 search.copy_name = ISC_FALSE;
3013 search.need_cleanup = ISC_FALSE;
3014 search.wild = ISC_FALSE;
3015 search.zonecut = NULL;
3016 dns_fixedname_init(&search.zonecut_name);
3017 dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
3021 * 'wild' will be true iff. we've matched a wildcard.
3025 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3028 * Search down from the root of the tree. If, while going down, we
3029 * encounter a callback node, zone_zonecut_callback() will search the
3030 * rdatasets at the zone cut for active DNAME or NS rdatasets.
3032 result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
3033 &search.chain, DNS_RBTFIND_EMPTYDATA,
3034 zone_zonecut_callback, &search);
3036 if (result == DNS_R_PARTIALMATCH) {
3038 if (search.zonecut != NULL) {
3039 result = setup_delegation(&search, nodep, foundname,
3040 rdataset, sigrdataset);
3046 * At least one of the levels in the search chain
3047 * potentially has a wildcard. For each such level,
3048 * we must see if there's a matching wildcard active
3049 * in the current version.
3051 result = find_wildcard(&search, &node, name);
3052 if (result == ISC_R_SUCCESS) {
3053 result = dns_name_copy(name, foundname, NULL);
3054 if (result != ISC_R_SUCCESS)
3059 else if (result != ISC_R_NOTFOUND)
3063 chain = search.chain;
3064 active = activeempty(&search, &chain, name);
3067 * If we're here, then the name does not exist, is not
3068 * beneath a zonecut, and there's no matching wildcard.
3070 if (search.rbtdb->secure ||
3071 (search.options & DNS_DBFIND_FORCENSEC) != 0)
3073 result = find_closest_nsec(&search, nodep, foundname,
3074 rdataset, sigrdataset,
3075 search.rbtdb->secure);
3076 if (result == ISC_R_SUCCESS)
3077 result = active ? DNS_R_EMPTYNAME :
3080 result = active ? DNS_R_EMPTYNAME : DNS_R_NXDOMAIN;
3082 } else if (result != ISC_R_SUCCESS)
3087 * We have found a node whose name is the desired name, or we
3088 * have matched a wildcard.
3091 if (search.zonecut != NULL) {
3093 * If we're beneath a zone cut, we don't want to look for
3094 * CNAMEs because they're not legitimate zone glue.
3096 cname_ok = ISC_FALSE;
3099 * The node may be a zone cut itself. If it might be one,
3100 * make sure we check for it later.
3102 if (node->find_callback &&
3103 (node != search.rbtdb->origin_node ||
3104 IS_STUB(search.rbtdb)) &&
3105 !dns_rdatatype_atparent(type))
3106 maybe_zonecut = ISC_TRUE;
3110 * Certain DNSSEC types are not subject to CNAME matching
3111 * (RFC4035, section 2.5 and RFC3007).
3113 * We don't check for RRSIG, because we don't store RRSIG records
3116 if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
3117 cname_ok = ISC_FALSE;
3120 * We now go looking for rdata...
3123 NODE_LOCK(&(search.rbtdb->node_locks[node->locknum].lock),
3124 isc_rwlocktype_read);
3128 sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
3132 empty_node = ISC_TRUE;
3133 for (header = node->data; header != NULL; header = header_next) {
3134 header_next = header->next;
3136 * Look for an active, extant rdataset.
3139 if (header->serial <= search.serial &&
3142 * Is this a "this rdataset doesn't
3145 if (NONEXISTENT(header))
3149 header = header->down;
3150 } while (header != NULL);
3151 if (header != NULL) {
3153 * We now know that there is at least one active
3154 * rdataset at this node.
3156 empty_node = ISC_FALSE;
3159 * Do special zone cut handling, if requested.
3161 if (maybe_zonecut &&
3162 header->type == dns_rdatatype_ns) {
3164 * We increment the reference count on node to
3165 * ensure that search->zonecut_rdataset will
3166 * still be valid later.
3168 new_reference(search.rbtdb, node);
3169 search.zonecut = node;
3170 search.zonecut_rdataset = header;
3171 search.zonecut_sigrdataset = NULL;
3172 search.need_cleanup = ISC_TRUE;
3173 maybe_zonecut = ISC_FALSE;
3174 at_zonecut = ISC_TRUE;
3176 * It is not clear if KEY should still be
3177 * allowed at the parent side of the zone
3178 * cut or not. It is needed for RFC3007
3179 * validated updates.
3181 if ((search.options & DNS_DBFIND_GLUEOK) == 0
3182 && type != dns_rdatatype_nsec
3183 && type != dns_rdatatype_key) {
3185 * Glue is not OK, but any answer we
3186 * could return would be glue. Return
3192 if (found != NULL && foundsig != NULL)
3197 * If we found a type we were looking for,
3200 if (header->type == type ||
3201 type == dns_rdatatype_any ||
3202 (header->type == dns_rdatatype_cname &&
3205 * We've found the answer!
3208 if (header->type == dns_rdatatype_cname &&
3211 * We may be finding a CNAME instead
3212 * of the desired type.
3214 * If we've already got the CNAME RRSIG,
3215 * use it, otherwise change sigtype
3216 * so that we find it.
3218 if (cnamesig != NULL)
3219 foundsig = cnamesig;
3222 RBTDB_RDATATYPE_SIGCNAME;
3225 * If we've got all we need, end the search.
3227 if (!maybe_zonecut && foundsig != NULL)
3229 } else if (header->type == sigtype) {
3231 * We've found the RRSIG rdataset for our
3232 * target type. Remember it.
3236 * If we've got all we need, end the search.
3238 if (!maybe_zonecut && found != NULL)
3240 } else if (header->type == dns_rdatatype_nsec) {
3242 * Remember a NSEC rdataset even if we're
3243 * not specifically looking for it, because
3244 * we might need it later.
3246 nsecheader = header;
3247 } else if (header->type == RBTDB_RDATATYPE_SIGNSEC) {
3249 * If we need the NSEC rdataset, we'll also
3250 * need its signature.
3253 } else if (cname_ok &&
3254 header->type == RBTDB_RDATATYPE_SIGCNAME) {
3256 * If we get a CNAME match, we'll also need
3266 * We have an exact match for the name, but there are no
3267 * active rdatasets in the desired version. That means that
3268 * this node doesn't exist in the desired version, and that
3269 * we really have a partial match.
3272 lock = &search.rbtdb->node_locks[node->locknum].lock;
3273 NODE_UNLOCK(lock, isc_rwlocktype_read);
3279 * If we didn't find what we were looking for...
3281 if (found == NULL) {
3282 if (search.zonecut != NULL) {
3284 * We were trying to find glue at a node beneath a
3285 * zone cut, but didn't.
3287 * Return the delegation.
3289 lock = &search.rbtdb->node_locks[node->locknum].lock;
3290 NODE_UNLOCK(lock, isc_rwlocktype_read);
3291 result = setup_delegation(&search, nodep, foundname,
3292 rdataset, sigrdataset);
3296 * The desired type doesn't exist.
3298 result = DNS_R_NXRRSET;
3299 if (search.rbtdb->secure &&
3300 (nsecheader == NULL || nsecsig == NULL)) {
3302 * The zone is secure but there's no NSEC,
3303 * or the NSEC has no signature!
3306 result = DNS_R_BADDB;
3310 lock = &search.rbtdb->node_locks[node->locknum].lock;
3311 NODE_UNLOCK(lock, isc_rwlocktype_read);
3312 result = find_closest_nsec(&search, nodep, foundname,
3313 rdataset, sigrdataset,
3314 search.rbtdb->secure);
3315 if (result == ISC_R_SUCCESS)
3316 result = DNS_R_EMPTYWILD;
3319 if ((search.options & DNS_DBFIND_FORCENSEC) != 0 &&
3323 * There's no NSEC record, and we were told
3326 result = DNS_R_BADDB;
3329 if (nodep != NULL) {
3330 new_reference(search.rbtdb, node);
3333 if (search.rbtdb->secure ||
3334 (search.options & DNS_DBFIND_FORCENSEC) != 0)
3336 bind_rdataset(search.rbtdb, node, nsecheader,
3338 if (nsecsig != NULL)
3339 bind_rdataset(search.rbtdb, node,
3340 nsecsig, 0, sigrdataset);
3343 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
3348 * We found what we were looking for, or we found a CNAME.
3351 if (type != found->type &&
3352 type != dns_rdatatype_any &&
3353 found->type == dns_rdatatype_cname) {
3355 * We weren't doing an ANY query and we found a CNAME instead
3356 * of the type we were looking for, so we need to indicate
3357 * that result to the caller.
3359 result = DNS_R_CNAME;
3360 } else if (search.zonecut != NULL) {
3362 * If we're beneath a zone cut, we must indicate that the
3363 * result is glue, unless we're actually at the zone cut
3364 * and the type is NSEC or KEY.
3366 if (search.zonecut == node) {
3368 * It is not clear if KEY should still be
3369 * allowed at the parent side of the zone
3370 * cut or not. It is needed for RFC3007
3371 * validated updates.
3373 if (type == dns_rdatatype_nsec ||
3374 type == dns_rdatatype_key)
3375 result = ISC_R_SUCCESS;
3376 else if (type == dns_rdatatype_any)
3377 result = DNS_R_ZONECUT;
3379 result = DNS_R_GLUE;
3381 result = DNS_R_GLUE;
3383 * We might have found data that isn't glue, but was occluded
3384 * by a dynamic update. If the caller cares about this, they
3385 * will have told us to validate glue.
3387 * XXX We should cache the glue validity state!
3389 if (result == DNS_R_GLUE &&
3390 (search.options & DNS_DBFIND_VALIDATEGLUE) != 0 &&
3391 !valid_glue(&search, foundname, type, node)) {
3392 lock = &search.rbtdb->node_locks[node->locknum].lock;
3393 NODE_UNLOCK(lock, isc_rwlocktype_read);
3394 result = setup_delegation(&search, nodep, foundname,
3395 rdataset, sigrdataset);
3400 * An ordinary successful query!
3402 result = ISC_R_SUCCESS;
3405 if (nodep != NULL) {
3407 new_reference(search.rbtdb, node);
3409 search.need_cleanup = ISC_FALSE;
3413 if (type != dns_rdatatype_any) {
3414 bind_rdataset(search.rbtdb, node, found, 0, rdataset);
3415 if (foundsig != NULL)
3416 bind_rdataset(search.rbtdb, node, foundsig, 0,
3421 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
3424 NODE_UNLOCK(&(search.rbtdb->node_locks[node->locknum].lock),
3425 isc_rwlocktype_read);
3428 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3431 * If we found a zonecut but aren't going to use it, we have to
3434 if (search.need_cleanup) {
3435 node = search.zonecut;
3436 lock = &(search.rbtdb->node_locks[node->locknum].lock);
3438 NODE_LOCK(lock, isc_rwlocktype_read);
3439 decrement_reference(search.rbtdb, node, 0,
3440 isc_rwlocktype_read, isc_rwlocktype_none,
3442 NODE_UNLOCK(lock, isc_rwlocktype_read);
3446 closeversion(db, &version, ISC_FALSE);
3448 dns_rbtnodechain_reset(&search.chain);
3454 zone_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
3455 isc_stdtime_t now, dns_dbnode_t **nodep,
3456 dns_name_t *foundname,
3457 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3466 UNUSED(sigrdataset);
3468 FATAL_ERROR(__FILE__, __LINE__, "zone_findzonecut() called!");
3470 return (ISC_R_NOTIMPLEMENTED);
3474 cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
3475 rbtdb_search_t *search = arg;
3476 rdatasetheader_t *header, *header_prev, *header_next;
3477 rdatasetheader_t *dname_header, *sigdname_header;
3478 isc_result_t result;
3480 isc_rwlocktype_t locktype;
3484 REQUIRE(search->zonecut == NULL);
3487 * Keep compiler silent.
3491 lock = &(search->rbtdb->node_locks[node->locknum].lock);
3492 locktype = isc_rwlocktype_read;
3493 NODE_LOCK(lock, locktype);
3496 * Look for a DNAME or RRSIG DNAME rdataset.
3498 dname_header = NULL;
3499 sigdname_header = NULL;
3501 for (header = node->data; header != NULL; header = header_next) {
3502 header_next = header->next;
3503 if (header->rdh_ttl <= search->now) {
3505 * This rdataset is stale. If no one else is
3506 * using the node, we can clean it up right
3507 * now, otherwise we mark it as stale, and
3508 * the node as dirty, so it will get cleaned
3511 if ((header->rdh_ttl <= search->now - RBTDB_VIRTUAL) &&
3512 (locktype == isc_rwlocktype_write ||
3513 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
3515 * We update the node's status only when we
3516 * can get write access; otherwise, we leave
3517 * others to this work. Periodical cleaning
3518 * will eventually take the job as the last
3520 * We won't downgrade the lock, since other
3521 * rdatasets are probably stale, too.
3523 locktype = isc_rwlocktype_write;
3525 if (dns_rbtnode_refcurrent(node) == 0) {
3529 * header->down can be non-NULL if the
3530 * refcount has just decremented to 0
3531 * but decrement_reference() has not
3532 * performed clean_cache_node(), in
3533 * which case we need to purge the
3534 * stale headers first.
3536 mctx = search->rbtdb->common.mctx;
3537 clean_stale_headers(search->rbtdb,
3540 if (header_prev != NULL)
3544 node->data = header->next;
3545 free_rdataset(search->rbtdb, mctx,
3548 header->attributes |=
3549 RDATASET_ATTR_STALE;
3551 header_prev = header;
3554 header_prev = header;
3555 } else if (header->type == dns_rdatatype_dname &&
3557 dname_header = header;
3558 header_prev = header;
3559 } else if (header->type == RBTDB_RDATATYPE_SIGDNAME &&
3561 sigdname_header = header;
3562 header_prev = header;
3564 header_prev = header;
3567 if (dname_header != NULL &&
3568 (dname_header->trust != dns_trust_pending ||
3569 (search->options & DNS_DBFIND_PENDINGOK) != 0)) {
3571 * We increment the reference count on node to ensure that
3572 * search->zonecut_rdataset will still be valid later.
3574 new_reference(search->rbtdb, node);
3575 INSIST(!ISC_LINK_LINKED(node, deadlink));
3576 search->zonecut = node;
3577 search->zonecut_rdataset = dname_header;
3578 search->zonecut_sigrdataset = sigdname_header;
3579 search->need_cleanup = ISC_TRUE;
3580 result = DNS_R_PARTIALMATCH;
3582 result = DNS_R_CONTINUE;
3584 NODE_UNLOCK(lock, locktype);
3589 static inline isc_result_t
3590 find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node,
3591 dns_dbnode_t **nodep, dns_name_t *foundname,
3592 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3595 dns_rbtnode_t *level_node;
3596 rdatasetheader_t *header, *header_prev, *header_next;
3597 rdatasetheader_t *found, *foundsig;
3598 isc_result_t result = ISC_R_NOTFOUND;
3603 isc_rwlocktype_t locktype;
3606 * Caller must be holding the tree lock.
3609 rbtdb = search->rbtdb;
3610 i = search->chain.level_matches;
3613 locktype = isc_rwlocktype_read;
3614 lock = &rbtdb->node_locks[node->locknum].lock;
3615 NODE_LOCK(lock, locktype);
3618 * Look for NS and RRSIG NS rdatasets.
3623 for (header = node->data;
3625 header = header_next) {
3626 header_next = header->next;
3627 if (header->rdh_ttl <= search->now) {
3629 * This rdataset is stale. If no one else is
3630 * using the node, we can clean it up right
3631 * now, otherwise we mark it as stale, and
3632 * the node as dirty, so it will get cleaned
3635 if ((header->rdh_ttl <= search->now -
3637 (locktype == isc_rwlocktype_write ||
3638 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
3640 * We update the node's status only
3641 * when we can get write access.
3643 locktype = isc_rwlocktype_write;
3645 if (dns_rbtnode_refcurrent(node)
3649 m = search->rbtdb->common.mctx;
3650 clean_stale_headers(
3653 if (header_prev != NULL)
3659 free_rdataset(rbtdb, m,
3662 header->attributes |=
3663 RDATASET_ATTR_STALE;
3665 header_prev = header;
3668 header_prev = header;
3669 } else if (EXISTS(header)) {
3671 * We've found an extant rdataset. See if
3672 * we're interested in it.
3674 if (header->type == dns_rdatatype_ns) {
3676 if (foundsig != NULL)
3678 } else if (header->type ==
3679 RBTDB_RDATATYPE_SIGNS) {
3684 header_prev = header;
3686 header_prev = header;
3689 if (found != NULL) {
3691 * If we have to set foundname, we do it before
3692 * anything else. If we were to set foundname after
3693 * we had set nodep or bound the rdataset, then we'd
3694 * have to undo that work if dns_name_concatenate()
3695 * failed. By setting foundname first, there's
3696 * nothing to undo if we have trouble.
3698 if (foundname != NULL) {
3699 dns_name_init(&name, NULL);
3700 dns_rbt_namefromnode(node, &name);
3701 result = dns_name_copy(&name, foundname, NULL);
3702 while (result == ISC_R_SUCCESS && i > 0) {
3704 level_node = search->chain.levels[i];
3705 dns_name_init(&name, NULL);
3706 dns_rbt_namefromnode(level_node,
3709 dns_name_concatenate(foundname,
3714 if (result != ISC_R_SUCCESS) {
3719 result = DNS_R_DELEGATION;
3720 if (nodep != NULL) {
3721 new_reference(search->rbtdb, node);
3724 bind_rdataset(search->rbtdb, node, found, search->now,
3726 if (foundsig != NULL)
3727 bind_rdataset(search->rbtdb, node, foundsig,
3728 search->now, sigrdataset);
3729 if (need_headerupdate(found, search->now) ||
3730 (foundsig != NULL &&
3731 need_headerupdate(foundsig, search->now))) {
3732 if (locktype != isc_rwlocktype_write) {
3733 NODE_UNLOCK(lock, locktype);
3734 NODE_LOCK(lock, isc_rwlocktype_write);
3735 locktype = isc_rwlocktype_write;
3737 if (need_headerupdate(found, search->now))
3738 update_header(search->rbtdb, found,
3740 if (foundsig != NULL &&
3741 need_headerupdate(foundsig, search->now)) {
3742 update_header(search->rbtdb, foundsig,
3749 NODE_UNLOCK(lock, locktype);
3751 if (found == NULL && i > 0) {
3753 node = search->chain.levels[i];
3763 find_coveringnsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
3764 isc_stdtime_t now, dns_name_t *foundname,
3765 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3767 dns_rbtnode_t *node;
3768 rdatasetheader_t *header, *header_next, *header_prev;
3769 rdatasetheader_t *found, *foundsig;
3770 isc_boolean_t empty_node;
3771 isc_result_t result;
3772 dns_fixedname_t fname, forigin;
3773 dns_name_t *name, *origin;
3774 rbtdb_rdatatype_t matchtype, sigmatchtype;
3776 isc_rwlocktype_t locktype;
3778 matchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_nsec, 0);
3779 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
3780 dns_rdatatype_nsec);
3784 dns_fixedname_init(&fname);
3785 name = dns_fixedname_name(&fname);
3786 dns_fixedname_init(&forigin);
3787 origin = dns_fixedname_name(&forigin);
3788 result = dns_rbtnodechain_current(&search->chain, name,
3790 if (result != ISC_R_SUCCESS)
3792 locktype = isc_rwlocktype_read;
3793 lock = &(search->rbtdb->node_locks[node->locknum].lock);
3794 NODE_LOCK(lock, locktype);
3797 empty_node = ISC_TRUE;
3799 for (header = node->data;
3801 header = header_next) {
3802 header_next = header->next;
3803 if (header->rdh_ttl <= now) {
3805 * This rdataset is stale. If no one else is
3806 * using the node, we can clean it up right
3807 * now, otherwise we mark it as stale, and the
3808 * node as dirty, so it will get cleaned up
3811 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
3812 (locktype == isc_rwlocktype_write ||
3813 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
3815 * We update the node's status only
3816 * when we can get write access.
3818 locktype = isc_rwlocktype_write;
3820 if (dns_rbtnode_refcurrent(node)
3824 m = search->rbtdb->common.mctx;
3825 clean_stale_headers(
3828 if (header_prev != NULL)
3832 node->data = header->next;
3833 free_rdataset(search->rbtdb, m,
3836 header->attributes |=
3837 RDATASET_ATTR_STALE;
3839 header_prev = header;
3842 header_prev = header;
3845 if (NONEXISTENT(header) ||
3846 RBTDB_RDATATYPE_BASE(header->type) == 0) {
3847 header_prev = header;
3850 empty_node = ISC_FALSE;
3851 if (header->type == matchtype)
3853 else if (header->type == sigmatchtype)
3855 header_prev = header;
3857 if (found != NULL) {
3858 result = dns_name_concatenate(name, origin,
3860 if (result != ISC_R_SUCCESS)
3862 bind_rdataset(search->rbtdb, node, found,
3864 if (foundsig != NULL)
3865 bind_rdataset(search->rbtdb, node, foundsig,
3867 new_reference(search->rbtdb, node);
3869 result = DNS_R_COVERINGNSEC;
3870 } else if (!empty_node) {
3871 result = ISC_R_NOTFOUND;
3873 result = dns_rbtnodechain_prev(&search->chain, NULL,
3876 NODE_UNLOCK(lock, locktype);
3877 } while (empty_node && result == ISC_R_SUCCESS);
3882 cache_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
3883 dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
3884 dns_dbnode_t **nodep, dns_name_t *foundname,
3885 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3887 dns_rbtnode_t *node = NULL;
3888 isc_result_t result;
3889 rbtdb_search_t search;
3890 isc_boolean_t cname_ok = ISC_TRUE;
3891 isc_boolean_t empty_node;
3893 isc_rwlocktype_t locktype;
3894 rdatasetheader_t *header, *header_prev, *header_next;
3895 rdatasetheader_t *found, *nsheader;
3896 rdatasetheader_t *foundsig, *nssig, *cnamesig;
3897 rdatasetheader_t *update, *updatesig;
3898 rbtdb_rdatatype_t sigtype, negtype;
3902 search.rbtdb = (dns_rbtdb_t *)db;
3904 REQUIRE(VALID_RBTDB(search.rbtdb));
3905 REQUIRE(version == NULL);
3908 isc_stdtime_get(&now);
3910 search.rbtversion = NULL;
3912 search.options = options;
3913 search.copy_name = ISC_FALSE;
3914 search.need_cleanup = ISC_FALSE;
3915 search.wild = ISC_FALSE;
3916 search.zonecut = NULL;
3917 dns_fixedname_init(&search.zonecut_name);
3918 dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
3923 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3926 * Search down from the root of the tree. If, while going down, we
3927 * encounter a callback node, cache_zonecut_callback() will search the
3928 * rdatasets at the zone cut for a DNAME rdataset.
3930 result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
3931 &search.chain, DNS_RBTFIND_EMPTYDATA,
3932 cache_zonecut_callback, &search);
3934 if (result == DNS_R_PARTIALMATCH) {
3935 if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0) {
3936 result = find_coveringnsec(&search, nodep, now,
3937 foundname, rdataset,
3939 if (result == DNS_R_COVERINGNSEC)
3942 if (search.zonecut != NULL) {
3943 result = setup_delegation(&search, nodep, foundname,
3944 rdataset, sigrdataset);
3948 result = find_deepest_zonecut(&search, node, nodep,
3949 foundname, rdataset,
3953 } else if (result != ISC_R_SUCCESS)
3957 * Certain DNSSEC types are not subject to CNAME matching
3958 * (RFC4035, section 2.5 and RFC3007).
3960 * We don't check for RRSIG, because we don't store RRSIG records
3963 if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
3964 cname_ok = ISC_FALSE;
3967 * We now go looking for rdata...
3970 lock = &(search.rbtdb->node_locks[node->locknum].lock);
3971 locktype = isc_rwlocktype_read;
3972 NODE_LOCK(lock, locktype);
3976 sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
3977 negtype = RBTDB_RDATATYPE_VALUE(0, type);
3981 empty_node = ISC_TRUE;
3983 for (header = node->data; header != NULL; header = header_next) {
3984 header_next = header->next;
3985 if (header->rdh_ttl <= now) {
3987 * This rdataset is stale. If no one else is using the
3988 * node, we can clean it up right now, otherwise we
3989 * mark it as stale, and the node as dirty, so it will
3990 * get cleaned up later.
3992 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
3993 (locktype == isc_rwlocktype_write ||
3994 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
3996 * We update the node's status only when we
3997 * can get write access.
3999 locktype = isc_rwlocktype_write;
4001 if (dns_rbtnode_refcurrent(node) == 0) {
4004 mctx = search.rbtdb->common.mctx;
4005 clean_stale_headers(search.rbtdb, mctx,
4007 if (header_prev != NULL)
4011 node->data = header->next;
4012 free_rdataset(search.rbtdb, mctx,
4015 header->attributes |=
4016 RDATASET_ATTR_STALE;
4018 header_prev = header;
4021 header_prev = header;
4022 } else if (EXISTS(header)) {
4024 * We now know that there is at least one active
4025 * non-stale rdataset at this node.
4027 empty_node = ISC_FALSE;
4030 * If we found a type we were looking for, remember
4033 if (header->type == type ||
4034 (type == dns_rdatatype_any &&
4035 RBTDB_RDATATYPE_BASE(header->type) != 0) ||
4036 (cname_ok && header->type ==
4037 dns_rdatatype_cname)) {
4039 * We've found the answer.
4042 if (header->type == dns_rdatatype_cname &&
4046 * If we've already got the CNAME RRSIG,
4047 * use it, otherwise change sigtype
4048 * so that we find it.
4050 if (cnamesig != NULL)
4051 foundsig = cnamesig;
4054 RBTDB_RDATATYPE_SIGCNAME;
4055 foundsig = cnamesig;
4057 } else if (header->type == sigtype) {
4059 * We've found the RRSIG rdataset for our
4060 * target type. Remember it.
4063 } else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
4064 header->type == negtype) {
4066 * We've found a negative cache entry.
4069 } else if (header->type == dns_rdatatype_ns) {
4071 * Remember a NS rdataset even if we're
4072 * not specifically looking for it, because
4073 * we might need it later.
4076 } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4078 * If we need the NS rdataset, we'll also
4079 * need its signature.
4082 } else if (cname_ok &&
4083 header->type == RBTDB_RDATATYPE_SIGCNAME) {
4085 * If we get a CNAME match, we'll also need
4090 header_prev = header;
4092 header_prev = header;
4097 * We have an exact match for the name, but there are no
4098 * extant rdatasets. That means that this node doesn't
4099 * meaningfully exist, and that we really have a partial match.
4101 NODE_UNLOCK(lock, locktype);
4106 * If we didn't find what we were looking for...
4108 if (found == NULL ||
4109 (found->trust == dns_trust_glue &&
4110 ((options & DNS_DBFIND_GLUEOK) == 0)) ||
4111 (found->trust == dns_trust_pending &&
4112 ((options & DNS_DBFIND_PENDINGOK) == 0))) {
4114 * If there is an NS rdataset at this node, then this is the
4117 if (nsheader != NULL) {
4118 if (nodep != NULL) {
4119 new_reference(search.rbtdb, node);
4120 INSIST(!ISC_LINK_LINKED(node, deadlink));
4123 bind_rdataset(search.rbtdb, node, nsheader, search.now,
4125 if (need_headerupdate(nsheader, search.now))
4127 if (nssig != NULL) {
4128 bind_rdataset(search.rbtdb, node, nssig,
4129 search.now, sigrdataset);
4130 if (need_headerupdate(nssig, search.now))
4133 result = DNS_R_DELEGATION;
4138 * Go find the deepest zone cut.
4140 NODE_UNLOCK(lock, locktype);
4145 * We found what we were looking for, or we found a CNAME.
4148 if (nodep != NULL) {
4149 new_reference(search.rbtdb, node);
4150 INSIST(!ISC_LINK_LINKED(node, deadlink));
4154 if (RBTDB_RDATATYPE_BASE(found->type) == 0) {
4156 * We found a negative cache entry.
4158 if (NXDOMAIN(found))
4159 result = DNS_R_NCACHENXDOMAIN;
4161 result = DNS_R_NCACHENXRRSET;
4162 } else if (type != found->type &&
4163 type != dns_rdatatype_any &&
4164 found->type == dns_rdatatype_cname) {
4166 * We weren't doing an ANY query and we found a CNAME instead
4167 * of the type we were looking for, so we need to indicate
4168 * that result to the caller.
4170 result = DNS_R_CNAME;
4173 * An ordinary successful query!
4175 result = ISC_R_SUCCESS;
4178 if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN ||
4179 result == DNS_R_NCACHENXRRSET) {
4180 bind_rdataset(search.rbtdb, node, found, search.now,
4182 if (need_headerupdate(found, search.now))
4184 if (foundsig != NULL) {
4185 bind_rdataset(search.rbtdb, node, foundsig, search.now,
4187 if (need_headerupdate(foundsig, search.now))
4188 updatesig = foundsig;
4193 if ((update != NULL || updatesig != NULL) &&
4194 locktype != isc_rwlocktype_write) {
4195 NODE_UNLOCK(lock, locktype);
4196 NODE_LOCK(lock, isc_rwlocktype_write);
4197 locktype = isc_rwlocktype_write;
4199 if (update != NULL && need_headerupdate(update, search.now))
4200 update_header(search.rbtdb, update, search.now);
4201 if (updatesig != NULL && need_headerupdate(updatesig, search.now))
4202 update_header(search.rbtdb, updatesig, search.now);
4204 NODE_UNLOCK(lock, locktype);
4207 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4210 * If we found a zonecut but aren't going to use it, we have to
4213 if (search.need_cleanup) {
4214 node = search.zonecut;
4215 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4217 NODE_LOCK(lock, isc_rwlocktype_read);
4218 decrement_reference(search.rbtdb, node, 0,
4219 isc_rwlocktype_read, isc_rwlocktype_none,
4221 NODE_UNLOCK(lock, isc_rwlocktype_read);
4224 dns_rbtnodechain_reset(&search.chain);
4230 cache_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
4231 isc_stdtime_t now, dns_dbnode_t **nodep,
4232 dns_name_t *foundname,
4233 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4235 dns_rbtnode_t *node = NULL;
4237 isc_result_t result;
4238 rbtdb_search_t search;
4239 rdatasetheader_t *header, *header_prev, *header_next;
4240 rdatasetheader_t *found, *foundsig;
4241 unsigned int rbtoptions = DNS_RBTFIND_EMPTYDATA;
4242 isc_rwlocktype_t locktype;
4244 search.rbtdb = (dns_rbtdb_t *)db;
4246 REQUIRE(VALID_RBTDB(search.rbtdb));
4249 isc_stdtime_get(&now);
4251 search.rbtversion = NULL;
4253 search.options = options;
4254 search.copy_name = ISC_FALSE;
4255 search.need_cleanup = ISC_FALSE;
4256 search.wild = ISC_FALSE;
4257 search.zonecut = NULL;
4258 dns_fixedname_init(&search.zonecut_name);
4259 dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4262 if ((options & DNS_DBFIND_NOEXACT) != 0)
4263 rbtoptions |= DNS_RBTFIND_NOEXACT;
4265 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4268 * Search down from the root of the tree.
4270 result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4271 &search.chain, rbtoptions, NULL, &search);
4273 if (result == DNS_R_PARTIALMATCH) {
4275 result = find_deepest_zonecut(&search, node, nodep, foundname,
4276 rdataset, sigrdataset);
4278 } else if (result != ISC_R_SUCCESS)
4282 * We now go looking for an NS rdataset at the node.
4285 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4286 locktype = isc_rwlocktype_read;
4287 NODE_LOCK(lock, locktype);
4292 for (header = node->data; header != NULL; header = header_next) {
4293 header_next = header->next;
4294 if (header->rdh_ttl <= now) {
4296 * This rdataset is stale. If no one else is using the
4297 * node, we can clean it up right now, otherwise we
4298 * mark it as stale, and the node as dirty, so it will
4299 * get cleaned up later.
4301 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4302 (locktype == isc_rwlocktype_write ||
4303 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4305 * We update the node's status only when we
4306 * can get write access.
4308 locktype = isc_rwlocktype_write;
4310 if (dns_rbtnode_refcurrent(node) == 0) {
4313 mctx = search.rbtdb->common.mctx;
4314 clean_stale_headers(search.rbtdb, mctx,
4316 if (header_prev != NULL)
4320 node->data = header->next;
4321 free_rdataset(search.rbtdb, mctx,
4324 header->attributes |=
4325 RDATASET_ATTR_STALE;
4327 header_prev = header;
4330 header_prev = header;
4331 } else if (EXISTS(header)) {
4333 * If we found a type we were looking for, remember
4336 if (header->type == dns_rdatatype_ns) {
4338 * Remember a NS rdataset even if we're
4339 * not specifically looking for it, because
4340 * we might need it later.
4343 } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4345 * If we need the NS rdataset, we'll also
4346 * need its signature.
4350 header_prev = header;
4352 header_prev = header;
4355 if (found == NULL) {
4357 * No NS records here.
4359 NODE_UNLOCK(lock, locktype);
4363 if (nodep != NULL) {
4364 new_reference(search.rbtdb, node);
4365 INSIST(!ISC_LINK_LINKED(node, deadlink));
4369 bind_rdataset(search.rbtdb, node, found, search.now, rdataset);
4370 if (foundsig != NULL)
4371 bind_rdataset(search.rbtdb, node, foundsig, search.now,
4374 if (need_headerupdate(found, search.now) ||
4375 (foundsig != NULL && need_headerupdate(foundsig, search.now))) {
4376 if (locktype != isc_rwlocktype_write) {
4377 NODE_UNLOCK(lock, locktype);
4378 NODE_LOCK(lock, isc_rwlocktype_write);
4379 locktype = isc_rwlocktype_write;
4381 if (need_headerupdate(found, search.now))
4382 update_header(search.rbtdb, found, search.now);
4383 if (foundsig != NULL &&
4384 need_headerupdate(foundsig, search.now)) {
4385 update_header(search.rbtdb, foundsig, search.now);
4389 NODE_UNLOCK(lock, locktype);
4392 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4394 INSIST(!search.need_cleanup);
4396 dns_rbtnodechain_reset(&search.chain);
4398 if (result == DNS_R_DELEGATION)
4399 result = ISC_R_SUCCESS;
4405 attachnode(dns_db_t *db, dns_dbnode_t *source, dns_dbnode_t **targetp) {
4406 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4407 dns_rbtnode_t *node = (dns_rbtnode_t *)source;
4410 REQUIRE(VALID_RBTDB(rbtdb));
4411 REQUIRE(targetp != NULL && *targetp == NULL);
4413 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
4414 dns_rbtnode_refincrement(node, &refs);
4416 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
4422 detachnode(dns_db_t *db, dns_dbnode_t **targetp) {
4423 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4424 dns_rbtnode_t *node;
4425 isc_boolean_t want_free = ISC_FALSE;
4426 isc_boolean_t inactive = ISC_FALSE;
4427 rbtdb_nodelock_t *nodelock;
4429 REQUIRE(VALID_RBTDB(rbtdb));
4430 REQUIRE(targetp != NULL && *targetp != NULL);
4432 node = (dns_rbtnode_t *)(*targetp);
4433 nodelock = &rbtdb->node_locks[node->locknum];
4435 NODE_LOCK(&nodelock->lock, isc_rwlocktype_read);
4437 if (decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
4438 isc_rwlocktype_none, ISC_FALSE)) {
4439 if (isc_refcount_current(&nodelock->references) == 0 &&
4440 nodelock->exiting) {
4441 inactive = ISC_TRUE;
4445 NODE_UNLOCK(&nodelock->lock, isc_rwlocktype_read);
4450 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
4452 if (rbtdb->active == 0)
4453 want_free = ISC_TRUE;
4454 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
4456 char buf[DNS_NAME_FORMATSIZE];
4457 if (dns_name_dynamic(&rbtdb->common.origin))
4458 dns_name_format(&rbtdb->common.origin, buf,
4461 strcpy(buf, "<UNKNOWN>");
4462 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
4463 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
4464 "calling free_rbtdb(%s)", buf);
4465 free_rbtdb(rbtdb, ISC_TRUE, NULL);
4471 expirenode(dns_db_t *db, dns_dbnode_t *node, isc_stdtime_t now) {
4472 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4473 dns_rbtnode_t *rbtnode = node;
4474 rdatasetheader_t *header;
4475 isc_boolean_t force_expire = ISC_FALSE;
4477 * These are the category and module used by the cache cleaner.
4479 isc_boolean_t log = ISC_FALSE;
4480 isc_logcategory_t *category = DNS_LOGCATEGORY_DATABASE;
4481 isc_logmodule_t *module = DNS_LOGMODULE_CACHE;
4482 int level = ISC_LOG_DEBUG(2);
4483 char printname[DNS_NAME_FORMATSIZE];
4485 REQUIRE(VALID_RBTDB(rbtdb));
4488 * Caller must hold a tree lock.
4492 isc_stdtime_get(&now);
4494 if (rbtdb->overmem) {
4497 isc_random_get(&val);
4499 * XXXDCL Could stand to have a better policy, like LRU.
4501 force_expire = ISC_TF(rbtnode->down == NULL && val % 4 == 0);
4504 * Note that 'log' can be true IFF rbtdb->overmem is also true.
4505 * rbtdb->overmem can currently only be true for cache
4506 * databases -- hence all of the "overmem cache" log strings.
4508 log = ISC_TF(isc_log_wouldlog(dns_lctx, level));
4510 isc_log_write(dns_lctx, category, module, level,
4511 "overmem cache: %s %s",
4512 force_expire ? "FORCE" : "check",
4513 dns_rbt_formatnodename(rbtnode,
4515 sizeof(printname)));
4519 * We may not need write access, but this code path is not performance
4520 * sensitive, so it should be okay to always lock as a writer.
4522 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4523 isc_rwlocktype_write);
4525 for (header = rbtnode->data; header != NULL; header = header->next)
4526 if (header->rdh_ttl <= now - RBTDB_VIRTUAL) {
4528 * We don't check if refcurrent(rbtnode) == 0 and try
4529 * to free like we do in cache_find(), because
4530 * refcurrent(rbtnode) must be non-zero. This is so
4531 * because 'node' is an argument to the function.
4533 header->attributes |= RDATASET_ATTR_STALE;
4536 isc_log_write(dns_lctx, category, module,
4537 level, "overmem cache: stale %s",
4539 } else if (force_expire) {
4540 if (! RETAIN(header)) {
4541 set_ttl(rbtdb, header, 0);
4542 header->attributes |= RDATASET_ATTR_STALE;
4545 isc_log_write(dns_lctx, category, module,
4546 level, "overmem cache: "
4547 "reprieve by RETAIN() %s",
4550 } else if (rbtdb->overmem && log)
4551 isc_log_write(dns_lctx, category, module, level,
4552 "overmem cache: saved %s", printname);
4554 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4555 isc_rwlocktype_write);
4557 return (ISC_R_SUCCESS);
4561 overmem(dns_db_t *db, isc_boolean_t overmem) {
4562 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4564 if (IS_CACHE(rbtdb))
4565 rbtdb->overmem = overmem;
4569 printnode(dns_db_t *db, dns_dbnode_t *node, FILE *out) {
4570 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4571 dns_rbtnode_t *rbtnode = node;
4572 isc_boolean_t first;
4574 REQUIRE(VALID_RBTDB(rbtdb));
4576 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4577 isc_rwlocktype_read);
4579 fprintf(out, "node %p, %u references, locknum = %u\n",
4580 rbtnode, dns_rbtnode_refcurrent(rbtnode),
4582 if (rbtnode->data != NULL) {
4583 rdatasetheader_t *current, *top_next;
4585 for (current = rbtnode->data; current != NULL;
4586 current = top_next) {
4587 top_next = current->next;
4589 fprintf(out, "\ttype %u", current->type);
4595 "\tserial = %lu, ttl = %u, "
4596 "trust = %u, attributes = %u\n",
4597 (unsigned long)current->serial,
4600 current->attributes);
4601 current = current->down;
4602 } while (current != NULL);
4605 fprintf(out, "(empty)\n");
4607 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4608 isc_rwlocktype_read);
4612 createiterator(dns_db_t *db, isc_boolean_t relative_names,
4613 dns_dbiterator_t **iteratorp)
4615 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4616 rbtdb_dbiterator_t *rbtdbiter;
4618 REQUIRE(VALID_RBTDB(rbtdb));
4620 rbtdbiter = isc_mem_get(rbtdb->common.mctx, sizeof(*rbtdbiter));
4621 if (rbtdbiter == NULL)
4622 return (ISC_R_NOMEMORY);
4624 rbtdbiter->common.methods = &dbiterator_methods;
4625 rbtdbiter->common.db = NULL;
4626 dns_db_attach(db, &rbtdbiter->common.db);
4627 rbtdbiter->common.relative_names = relative_names;
4628 rbtdbiter->common.magic = DNS_DBITERATOR_MAGIC;
4629 rbtdbiter->common.cleaning = ISC_FALSE;
4630 rbtdbiter->paused = ISC_TRUE;
4631 rbtdbiter->tree_locked = isc_rwlocktype_none;
4632 rbtdbiter->result = ISC_R_SUCCESS;
4633 dns_fixedname_init(&rbtdbiter->name);
4634 dns_fixedname_init(&rbtdbiter->origin);
4635 rbtdbiter->node = NULL;
4636 rbtdbiter->delete = 0;
4637 memset(rbtdbiter->deletions, 0, sizeof(rbtdbiter->deletions));
4638 dns_rbtnodechain_init(&rbtdbiter->chain, db->mctx);
4640 *iteratorp = (dns_dbiterator_t *)rbtdbiter;
4642 return (ISC_R_SUCCESS);
4646 zone_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
4647 dns_rdatatype_t type, dns_rdatatype_t covers,
4648 isc_stdtime_t now, dns_rdataset_t *rdataset,
4649 dns_rdataset_t *sigrdataset)
4651 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4652 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
4653 rdatasetheader_t *header, *header_next, *found, *foundsig;
4654 rbtdb_serial_t serial;
4655 rbtdb_version_t *rbtversion = version;
4656 isc_boolean_t close_version = ISC_FALSE;
4657 rbtdb_rdatatype_t matchtype, sigmatchtype;
4659 REQUIRE(VALID_RBTDB(rbtdb));
4660 REQUIRE(type != dns_rdatatype_any);
4662 if (rbtversion == NULL) {
4663 currentversion(db, (dns_dbversion_t **) (void *)(&rbtversion));
4664 close_version = ISC_TRUE;
4666 serial = rbtversion->serial;
4669 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4670 isc_rwlocktype_read);
4674 matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
4676 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
4680 for (header = rbtnode->data; header != NULL; header = header_next) {
4681 header_next = header->next;
4683 if (header->serial <= serial &&
4686 * Is this a "this rdataset doesn't
4689 if (NONEXISTENT(header))
4693 header = header->down;
4694 } while (header != NULL);
4695 if (header != NULL) {
4697 * We have an active, extant rdataset. If it's a
4698 * type we're looking for, remember it.
4700 if (header->type == matchtype) {
4702 if (foundsig != NULL)
4704 } else if (header->type == sigmatchtype) {
4711 if (found != NULL) {
4712 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
4713 if (foundsig != NULL)
4714 bind_rdataset(rbtdb, rbtnode, foundsig, now,
4718 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4719 isc_rwlocktype_read);
4722 closeversion(db, (dns_dbversion_t **) (void *)(&rbtversion),
4726 return (ISC_R_NOTFOUND);
4728 return (ISC_R_SUCCESS);
4732 cache_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
4733 dns_rdatatype_t type, dns_rdatatype_t covers,
4734 isc_stdtime_t now, dns_rdataset_t *rdataset,
4735 dns_rdataset_t *sigrdataset)
4737 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4738 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
4739 rdatasetheader_t *header, *header_next, *found, *foundsig;
4740 rbtdb_rdatatype_t matchtype, sigmatchtype, negtype;
4741 isc_result_t result;
4743 isc_rwlocktype_t locktype;
4745 REQUIRE(VALID_RBTDB(rbtdb));
4746 REQUIRE(type != dns_rdatatype_any);
4750 result = ISC_R_SUCCESS;
4753 isc_stdtime_get(&now);
4755 lock = &rbtdb->node_locks[rbtnode->locknum].lock;
4756 locktype = isc_rwlocktype_read;
4757 NODE_LOCK(lock, locktype);
4761 matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
4762 negtype = RBTDB_RDATATYPE_VALUE(0, type);
4764 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
4768 for (header = rbtnode->data; header != NULL; header = header_next) {
4769 header_next = header->next;
4770 if (header->rdh_ttl <= now) {
4771 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4772 (locktype == isc_rwlocktype_write ||
4773 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4775 * We update the node's status only when we
4776 * can get write access.
4778 locktype = isc_rwlocktype_write;
4781 * We don't check if refcurrent(rbtnode) == 0
4782 * and try to free like we do in cache_find(),
4783 * because refcurrent(rbtnode) must be
4784 * non-zero. This is so because 'node' is an
4785 * argument to the function.
4787 header->attributes |= RDATASET_ATTR_STALE;
4790 } else if (EXISTS(header)) {
4791 if (header->type == matchtype)
4793 else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
4794 header->type == negtype)
4796 else if (header->type == sigmatchtype)
4800 if (found != NULL) {
4801 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
4802 if (foundsig != NULL)
4803 bind_rdataset(rbtdb, rbtnode, foundsig, now,
4807 NODE_UNLOCK(lock, locktype);
4810 return (ISC_R_NOTFOUND);
4812 if (RBTDB_RDATATYPE_BASE(found->type) == 0) {
4814 * We found a negative cache entry.
4816 if (NXDOMAIN(found))
4817 result = DNS_R_NCACHENXDOMAIN;
4819 result = DNS_R_NCACHENXRRSET;
4826 allrdatasets(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
4827 isc_stdtime_t now, dns_rdatasetiter_t **iteratorp)
4829 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4830 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
4831 rbtdb_version_t *rbtversion = version;
4832 rbtdb_rdatasetiter_t *iterator;
4835 REQUIRE(VALID_RBTDB(rbtdb));
4837 iterator = isc_mem_get(rbtdb->common.mctx, sizeof(*iterator));
4838 if (iterator == NULL)
4839 return (ISC_R_NOMEMORY);
4841 if ((db->attributes & DNS_DBATTR_CACHE) == 0) {
4843 if (rbtversion == NULL)
4845 (dns_dbversion_t **) (void *)(&rbtversion));
4849 isc_refcount_increment(&rbtversion->references,
4855 isc_stdtime_get(&now);
4859 iterator->common.magic = DNS_RDATASETITER_MAGIC;
4860 iterator->common.methods = &rdatasetiter_methods;
4861 iterator->common.db = db;
4862 iterator->common.node = node;
4863 iterator->common.version = (dns_dbversion_t *)rbtversion;
4864 iterator->common.now = now;
4866 NODE_STRONGLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
4868 dns_rbtnode_refincrement(rbtnode, &refs);
4871 iterator->current = NULL;
4873 NODE_STRONGUNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
4875 *iteratorp = (dns_rdatasetiter_t *)iterator;
4877 return (ISC_R_SUCCESS);
4880 static isc_boolean_t
4881 cname_and_other_data(dns_rbtnode_t *node, rbtdb_serial_t serial) {
4882 rdatasetheader_t *header, *header_next;
4883 isc_boolean_t cname, other_data;
4884 dns_rdatatype_t rdtype;
4887 * The caller must hold the node lock.
4891 * Look for CNAME and "other data" rdatasets active in our version.
4894 other_data = ISC_FALSE;
4895 for (header = node->data; header != NULL; header = header_next) {
4896 header_next = header->next;
4897 if (header->type == dns_rdatatype_cname) {
4899 * Look for an active extant CNAME.
4902 if (header->serial <= serial &&
4905 * Is this a "this rdataset doesn't
4908 if (NONEXISTENT(header))
4912 header = header->down;
4913 } while (header != NULL);
4918 * Look for active extant "other data".
4920 * "Other data" is any rdataset whose type is not
4921 * KEY, NSEC, SIG or RRSIG.
4923 rdtype = RBTDB_RDATATYPE_BASE(header->type);
4924 if (rdtype != dns_rdatatype_key &&
4925 rdtype != dns_rdatatype_sig &&
4926 rdtype != dns_rdatatype_nsec &&
4927 rdtype != dns_rdatatype_rrsig) {
4929 * Is it active and extant?
4932 if (header->serial <= serial &&
4935 * Is this a "this rdataset
4936 * doesn't exist" record?
4938 if (NONEXISTENT(header))
4942 header = header->down;
4943 } while (header != NULL);
4945 other_data = ISC_TRUE;
4950 if (cname && other_data)
4957 add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
4958 rdatasetheader_t *newheader, unsigned int options, isc_boolean_t loading,
4959 dns_rdataset_t *addedrdataset, isc_stdtime_t now)
4961 rbtdb_changed_t *changed = NULL;
4962 rdatasetheader_t *topheader, *topheader_prev, *header;
4963 unsigned char *merged;
4964 isc_result_t result;
4965 isc_boolean_t header_nx;
4966 isc_boolean_t newheader_nx;
4967 isc_boolean_t merge;
4968 dns_rdatatype_t rdtype, covers;
4969 rbtdb_rdatatype_t negtype;
4973 * Add an rdatasetheader_t to a node.
4977 * Caller must be holding the node lock.
4980 if ((options & DNS_DBADD_MERGE) != 0) {
4981 REQUIRE(rbtversion != NULL);
4986 if ((options & DNS_DBADD_FORCE) != 0)
4987 trust = dns_trust_ultimate;
4989 trust = newheader->trust;
4991 if (rbtversion != NULL && !loading) {
4993 * We always add a changed record, even if no changes end up
4994 * being made to this node, because it's harmless and
4995 * simplifies the code.
4997 changed = add_changed(rbtdb, rbtversion, rbtnode);
4998 if (changed == NULL) {
4999 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5000 return (ISC_R_NOMEMORY);
5004 newheader_nx = NONEXISTENT(newheader) ? ISC_TRUE : ISC_FALSE;
5005 topheader_prev = NULL;
5008 if (rbtversion == NULL && !newheader_nx) {
5009 rdtype = RBTDB_RDATATYPE_BASE(newheader->type);
5012 * We're adding a negative cache entry.
5014 covers = RBTDB_RDATATYPE_EXT(newheader->type);
5015 if (covers == dns_rdatatype_any) {
5017 * We're adding an negative cache entry
5018 * which covers all types (NXDOMAIN,
5019 * NODATA(QTYPE=ANY)).
5021 * We make all other data stale so that the
5022 * only rdataset that can be found at this
5023 * node is the negative cache entry.
5025 for (topheader = rbtnode->data;
5027 topheader = topheader->next) {
5028 set_ttl(rbtdb, topheader, 0);
5029 topheader->attributes |=
5030 RDATASET_ATTR_STALE;
5035 negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
5038 * We're adding something that isn't a
5039 * negative cache entry. Look for an extant
5040 * non-stale NXDOMAIN/NODATA(QTYPE=ANY) negative
5043 for (topheader = rbtnode->data;
5045 topheader = topheader->next) {
5046 if (topheader->type ==
5047 RBTDB_RDATATYPE_NCACHEANY)
5050 if (topheader != NULL && EXISTS(topheader) &&
5051 topheader->rdh_ttl > now) {
5055 if (trust < topheader->trust) {
5057 * The NXDOMAIN/NODATA(QTYPE=ANY)
5060 free_rdataset(rbtdb,
5063 if (addedrdataset != NULL)
5064 bind_rdataset(rbtdb, rbtnode,
5067 return (DNS_R_UNCHANGED);
5070 * The new rdataset is better. Expire the
5071 * NXDOMAIN/NODATA(QTYPE=ANY).
5073 set_ttl(rbtdb, topheader, 0);
5074 topheader->attributes |= RDATASET_ATTR_STALE;
5079 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
5083 for (topheader = rbtnode->data;
5085 topheader = topheader->next) {
5086 if (topheader->type == newheader->type ||
5087 topheader->type == negtype)
5089 topheader_prev = topheader;
5094 * If header isn't NULL, we've found the right type. There may be
5095 * IGNORE rdatasets between the top of the chain and the first real
5096 * data. We skip over them.
5099 while (header != NULL && IGNORE(header))
5100 header = header->down;
5101 if (header != NULL) {
5102 header_nx = NONEXISTENT(header) ? ISC_TRUE : ISC_FALSE;
5105 * Deleting an already non-existent rdataset has no effect.
5107 if (header_nx && newheader_nx) {
5108 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5109 return (DNS_R_UNCHANGED);
5113 * Trying to add an rdataset with lower trust to a cache DB
5114 * has no effect, provided that the cache data isn't stale.
5116 if (rbtversion == NULL && trust < header->trust &&
5117 (header->rdh_ttl > now || header_nx)) {
5118 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5119 if (addedrdataset != NULL)
5120 bind_rdataset(rbtdb, rbtnode, header, now,
5122 return (DNS_R_UNCHANGED);
5126 * Don't merge if a nonexistent rdataset is involved.
5128 if (merge && (header_nx || newheader_nx))
5132 * If 'merge' is ISC_TRUE, we'll try to create a new rdataset
5133 * that is the union of 'newheader' and 'header'.
5136 unsigned int flags = 0;
5137 INSIST(rbtversion->serial >= header->serial);
5139 result = ISC_R_SUCCESS;
5141 if ((options & DNS_DBADD_EXACT) != 0)
5142 flags |= DNS_RDATASLAB_EXACT;
5143 if ((options & DNS_DBADD_EXACTTTL) != 0 &&
5144 newheader->rdh_ttl != header->rdh_ttl)
5145 result = DNS_R_NOTEXACT;
5146 else if (newheader->rdh_ttl != header->rdh_ttl)
5147 flags |= DNS_RDATASLAB_FORCE;
5148 if (result == ISC_R_SUCCESS)
5149 result = dns_rdataslab_merge(
5150 (unsigned char *)header,
5151 (unsigned char *)newheader,
5152 (unsigned int)(sizeof(*newheader)),
5154 rbtdb->common.rdclass,
5155 (dns_rdatatype_t)header->type,
5157 if (result == ISC_R_SUCCESS) {
5159 * If 'header' has the same serial number as
5160 * we do, we could clean it up now if we knew
5161 * that our caller had no references to it.
5162 * We don't know this, however, so we leave it
5163 * alone. It will get cleaned up when
5164 * clean_zone_node() runs.
5166 free_rdataset(rbtdb, rbtdb->common.mctx,
5168 newheader = (rdatasetheader_t *)merged;
5170 free_rdataset(rbtdb, rbtdb->common.mctx,
5176 * Don't replace existing NS, A and AAAA RRsets
5177 * in the cache if they are already exist. This
5178 * prevents named being locked to old servers.
5179 * Don't lower trust of existing record if the
5182 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
5183 header->type == dns_rdatatype_ns &&
5184 !header_nx && !newheader_nx &&
5185 header->trust >= newheader->trust &&
5186 dns_rdataslab_equalx((unsigned char *)header,
5187 (unsigned char *)newheader,
5188 (unsigned int)(sizeof(*newheader)),
5189 rbtdb->common.rdclass,
5190 (dns_rdatatype_t)header->type)) {
5192 * Honour the new ttl if it is less than the
5195 if (header->rdh_ttl > newheader->rdh_ttl)
5196 set_ttl(rbtdb, header, newheader->rdh_ttl);
5197 if (header->noqname == NULL &&
5198 newheader->noqname != NULL) {
5199 header->noqname = newheader->noqname;
5200 newheader->noqname = NULL;
5202 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5203 if (addedrdataset != NULL)
5204 bind_rdataset(rbtdb, rbtnode, header, now,
5206 return (ISC_R_SUCCESS);
5208 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
5209 (header->type == dns_rdatatype_a ||
5210 header->type == dns_rdatatype_aaaa) &&
5211 !header_nx && !newheader_nx &&
5212 header->trust >= newheader->trust &&
5213 dns_rdataslab_equal((unsigned char *)header,
5214 (unsigned char *)newheader,
5215 (unsigned int)(sizeof(*newheader)))) {
5217 * Honour the new ttl if it is less than the
5220 if (header->rdh_ttl > newheader->rdh_ttl)
5221 set_ttl(rbtdb, header, newheader->rdh_ttl);
5222 if (header->noqname == NULL &&
5223 newheader->noqname != NULL) {
5224 header->noqname = newheader->noqname;
5225 newheader->noqname = NULL;
5227 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5228 if (addedrdataset != NULL)
5229 bind_rdataset(rbtdb, rbtnode, header, now,
5231 return (ISC_R_SUCCESS);
5233 INSIST(rbtversion == NULL ||
5234 rbtversion->serial >= topheader->serial);
5235 if (topheader_prev != NULL)
5236 topheader_prev->next = newheader;
5238 rbtnode->data = newheader;
5239 newheader->next = topheader->next;
5242 * There are no other references to 'header' when
5243 * loading, so we MAY clean up 'header' now.
5244 * Since we don't generate changed records when
5245 * loading, we MUST clean up 'header' now.
5247 newheader->down = NULL;
5248 free_rdataset(rbtdb, rbtdb->common.mctx, header);
5250 newheader->down = topheader;
5251 topheader->next = newheader;
5253 if (changed != NULL)
5254 changed->dirty = ISC_TRUE;
5255 if (rbtversion == NULL) {
5256 set_ttl(rbtdb, header, 0);
5257 header->attributes |= RDATASET_ATTR_STALE;
5259 if (IS_CACHE(rbtdb)) {
5260 int idx = newheader->node->locknum;
5262 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
5263 newheader, lru_link);
5266 * XXXMLG We don't check the return value
5267 * here. If it fails, we will not do TTL
5268 * based expiry on this node. However, we
5269 * will do it on the LRU side, so memory
5270 * will not leak... for long.
5272 isc_heap_insert(rbtdb->heaps[idx], newheader);
5277 * No non-IGNORED rdatasets of the given type exist at
5282 * If we're trying to delete the type, don't bother.
5285 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5286 return (DNS_R_UNCHANGED);
5289 if (topheader != NULL) {
5291 * We have an list of rdatasets of the given type,
5292 * but they're all marked IGNORE. We simply insert
5293 * the new rdataset at the head of the list.
5295 * Ignored rdatasets cannot occur during loading, so
5299 INSIST(rbtversion == NULL ||
5300 rbtversion->serial >= topheader->serial);
5301 if (topheader_prev != NULL)
5302 topheader_prev->next = newheader;
5304 rbtnode->data = newheader;
5305 newheader->next = topheader->next;
5306 newheader->down = topheader;
5307 topheader->next = newheader;
5309 if (changed != NULL)
5310 changed->dirty = ISC_TRUE;
5313 * No rdatasets of the given type exist at the node.
5315 newheader->next = rbtnode->data;
5316 newheader->down = NULL;
5317 rbtnode->data = newheader;
5319 if (IS_CACHE(rbtdb)) {
5320 int idx = newheader->node->locknum;
5321 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
5322 newheader, lru_link);
5323 isc_heap_insert(rbtdb->heaps[idx], newheader);
5328 * Check if the node now contains CNAME and other data.
5330 if (rbtversion != NULL &&
5331 cname_and_other_data(rbtnode, rbtversion->serial))
5332 return (DNS_R_CNAMEANDOTHER);
5334 if (addedrdataset != NULL)
5335 bind_rdataset(rbtdb, rbtnode, newheader, now, addedrdataset);
5337 return (ISC_R_SUCCESS);
5340 static inline isc_boolean_t
5341 delegating_type(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
5342 rbtdb_rdatatype_t type)
5344 if (IS_CACHE(rbtdb)) {
5345 if (type == dns_rdatatype_dname)
5349 } else if (type == dns_rdatatype_dname ||
5350 (type == dns_rdatatype_ns &&
5351 (node != rbtdb->origin_node || IS_STUB(rbtdb))))
5356 static inline isc_result_t
5357 addnoqname(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
5358 dns_rdataset_t *rdataset)
5360 struct noqname *noqname;
5361 isc_mem_t *mctx = rbtdb->common.mctx;
5363 dns_rdataset_t nsec, nsecsig;
5364 isc_result_t result;
5367 dns_name_init(&name, NULL);
5368 dns_rdataset_init(&nsec);
5369 dns_rdataset_init(&nsecsig);
5371 result = dns_rdataset_getnoqname(rdataset, &name, &nsec, &nsecsig);
5372 RUNTIME_CHECK(result == ISC_R_SUCCESS);
5374 noqname = isc_mem_get(mctx, sizeof(*noqname));
5375 if (noqname == NULL) {
5376 result = ISC_R_NOMEMORY;
5379 dns_name_init(&noqname->name, NULL);
5380 noqname->nsec = NULL;
5381 noqname->nsecsig = NULL;
5382 result = dns_name_dup(&name, mctx, &noqname->name);
5383 if (result != ISC_R_SUCCESS)
5385 result = dns_rdataslab_fromrdataset(&nsec, mctx, &r, 0);
5386 if (result != ISC_R_SUCCESS)
5388 noqname->nsec = r.base;
5389 result = dns_rdataslab_fromrdataset(&nsecsig, mctx, &r, 0);
5390 if (result != ISC_R_SUCCESS)
5392 noqname->nsecsig = r.base;
5393 dns_rdataset_disassociate(&nsec);
5394 dns_rdataset_disassociate(&nsecsig);
5395 newheader->noqname = noqname;
5396 return (ISC_R_SUCCESS);
5399 dns_rdataset_disassociate(&nsec);
5400 dns_rdataset_disassociate(&nsecsig);
5401 free_noqname(mctx, &noqname);
5406 addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5407 isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options,
5408 dns_rdataset_t *addedrdataset)
5410 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5411 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5412 rbtdb_version_t *rbtversion = version;
5413 isc_region_t region;
5414 rdatasetheader_t *newheader;
5415 rdatasetheader_t *header;
5416 isc_result_t result;
5417 isc_boolean_t delegating;
5418 isc_boolean_t tree_locked = ISC_FALSE;
5420 REQUIRE(VALID_RBTDB(rbtdb));
5422 if (rbtversion == NULL) {
5424 isc_stdtime_get(&now);
5428 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
5430 sizeof(rdatasetheader_t));
5431 if (result != ISC_R_SUCCESS)
5434 newheader = (rdatasetheader_t *)region.base;
5435 init_rdataset(rbtdb, newheader);
5436 set_ttl(rbtdb, newheader, rdataset->ttl + now);
5437 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
5439 newheader->attributes = 0;
5440 newheader->noqname = NULL;
5441 newheader->count = init_count++;
5442 newheader->trust = rdataset->trust;
5443 newheader->additional_auth = NULL;
5444 newheader->additional_glue = NULL;
5445 newheader->last_used = now;
5446 newheader->node = rbtnode;
5447 if (rbtversion != NULL) {
5448 newheader->serial = rbtversion->serial;
5451 newheader->serial = 1;
5452 if ((rdataset->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0)
5453 newheader->attributes |= RDATASET_ATTR_NXDOMAIN;
5454 if ((rdataset->attributes & DNS_RDATASETATTR_NOQNAME) != 0) {
5455 result = addnoqname(rbtdb, newheader, rdataset);
5456 if (result != ISC_R_SUCCESS) {
5457 free_rdataset(rbtdb, rbtdb->common.mctx,
5465 * If we're adding a delegation type (e.g. NS or DNAME for a zone,
5466 * just DNAME for the cache), then we need to set the callback bit
5469 if (delegating_type(rbtdb, rbtnode, rdataset->type))
5470 delegating = ISC_TRUE;
5472 delegating = ISC_FALSE;
5475 * If we're adding a delegation type or the DB is a cache in an overmem
5476 * state, hold an exclusive lock on the tree. In the latter case
5477 * the lock does not necessarily have to be acquired but it will help
5478 * purge stale entries more effectively.
5480 if (delegating || (IS_CACHE(rbtdb) && rbtdb->overmem)) {
5481 tree_locked = ISC_TRUE;
5482 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
5485 if (IS_CACHE(rbtdb) && rbtdb->overmem)
5486 overmem_purge(rbtdb, rbtnode->locknum, now, tree_locked);
5488 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5489 isc_rwlocktype_write);
5491 if (rbtdb->rrsetstats != NULL) {
5492 newheader->attributes |= RDATASET_ATTR_STATCOUNT;
5493 update_rrsetstats(rbtdb, newheader, ISC_TRUE);
5496 if (IS_CACHE(rbtdb)) {
5498 cleanup_dead_nodes(rbtdb, rbtnode->locknum);
5500 header = isc_heap_element(rbtdb->heaps[rbtnode->locknum], 1);
5501 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL)
5502 expire_header(rbtdb, header, tree_locked);
5505 * If we've been holding a write lock on the tree just for
5506 * cleaning, we can release it now. However, we still need the
5509 if (tree_locked && !delegating) {
5510 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
5511 tree_locked = ISC_FALSE;
5515 result = add(rbtdb, rbtnode, rbtversion, newheader, options, ISC_FALSE,
5516 addedrdataset, now);
5517 if (result == ISC_R_SUCCESS && delegating)
5518 rbtnode->find_callback = 1;
5520 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5521 isc_rwlocktype_write);
5524 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
5527 * Update the zone's secure status. If version is non-NULL
5528 * this is deferred until closeversion() is called.
5530 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
5531 rbtdb->secure = iszonesecure(db, rbtdb->origin_node);
5537 subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5538 dns_rdataset_t *rdataset, unsigned int options,
5539 dns_rdataset_t *newrdataset)
5541 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5542 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5543 rbtdb_version_t *rbtversion = version;
5544 rdatasetheader_t *topheader, *topheader_prev, *header, *newheader;
5545 unsigned char *subresult;
5546 isc_region_t region;
5547 isc_result_t result;
5548 rbtdb_changed_t *changed;
5550 REQUIRE(VALID_RBTDB(rbtdb));
5552 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
5554 sizeof(rdatasetheader_t));
5555 if (result != ISC_R_SUCCESS)
5557 newheader = (rdatasetheader_t *)region.base;
5558 init_rdataset(rbtdb, newheader);
5559 set_ttl(rbtdb, newheader, rdataset->ttl);
5560 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
5562 newheader->attributes = 0;
5563 newheader->serial = rbtversion->serial;
5564 newheader->trust = 0;
5565 newheader->noqname = NULL;
5566 newheader->count = init_count++;
5567 newheader->additional_auth = NULL;
5568 newheader->additional_glue = NULL;
5569 newheader->last_used = 0;
5570 newheader->node = rbtnode;
5572 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5573 isc_rwlocktype_write);
5575 changed = add_changed(rbtdb, rbtversion, rbtnode);
5576 if (changed == NULL) {
5577 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5578 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5579 isc_rwlocktype_write);
5580 return (ISC_R_NOMEMORY);
5583 topheader_prev = NULL;
5584 for (topheader = rbtnode->data;
5586 topheader = topheader->next) {
5587 if (topheader->type == newheader->type)
5589 topheader_prev = topheader;
5592 * If header isn't NULL, we've found the right type. There may be
5593 * IGNORE rdatasets between the top of the chain and the first real
5594 * data. We skip over them.
5597 while (header != NULL && IGNORE(header))
5598 header = header->down;
5599 if (header != NULL && EXISTS(header)) {
5600 unsigned int flags = 0;
5602 result = ISC_R_SUCCESS;
5603 if ((options & DNS_DBSUB_EXACT) != 0) {
5604 flags |= DNS_RDATASLAB_EXACT;
5605 if (newheader->rdh_ttl != header->rdh_ttl)
5606 result = DNS_R_NOTEXACT;
5608 if (result == ISC_R_SUCCESS)
5609 result = dns_rdataslab_subtract(
5610 (unsigned char *)header,
5611 (unsigned char *)newheader,
5612 (unsigned int)(sizeof(*newheader)),
5614 rbtdb->common.rdclass,
5615 (dns_rdatatype_t)header->type,
5617 if (result == ISC_R_SUCCESS) {
5618 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5619 newheader = (rdatasetheader_t *)subresult;
5621 * We have to set the serial since the rdataslab
5622 * subtraction routine copies the reserved portion of
5623 * header, not newheader.
5625 newheader->serial = rbtversion->serial;
5627 * XXXJT: dns_rdataslab_subtract() copied the pointers
5628 * to additional info. We need to clear these fields
5629 * to avoid having duplicated references.
5631 newheader->additional_auth = NULL;
5632 newheader->additional_glue = NULL;
5633 } else if (result == DNS_R_NXRRSET) {
5635 * This subtraction would remove all of the rdata;
5636 * add a nonexistent header instead.
5638 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5639 newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
5640 if (newheader == NULL) {
5641 result = ISC_R_NOMEMORY;
5644 set_ttl(rbtdb, newheader, 0);
5645 newheader->type = topheader->type;
5646 newheader->attributes = RDATASET_ATTR_NONEXISTENT;
5647 newheader->trust = 0;
5648 newheader->serial = rbtversion->serial;
5649 newheader->noqname = NULL;
5650 newheader->count = 0;
5651 newheader->additional_auth = NULL;
5652 newheader->additional_glue = NULL;
5653 newheader->node = rbtnode;
5654 newheader->last_used = 0;
5656 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5661 * If we're here, we want to link newheader in front of
5664 INSIST(rbtversion->serial >= topheader->serial);
5665 if (topheader_prev != NULL)
5666 topheader_prev->next = newheader;
5668 rbtnode->data = newheader;
5669 newheader->next = topheader->next;
5670 newheader->down = topheader;
5671 topheader->next = newheader;
5673 changed->dirty = ISC_TRUE;
5676 * The rdataset doesn't exist, so we don't need to do anything
5677 * to satisfy the deletion request.
5679 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5680 if ((options & DNS_DBSUB_EXACT) != 0)
5681 result = DNS_R_NOTEXACT;
5683 result = DNS_R_UNCHANGED;
5686 if (result == ISC_R_SUCCESS && newrdataset != NULL)
5687 bind_rdataset(rbtdb, rbtnode, newheader, 0, newrdataset);
5690 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5691 isc_rwlocktype_write);
5694 * Update the zone's secure status. If version is non-NULL
5695 * this is deferred until closeversion() is called.
5697 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
5698 rbtdb->secure = iszonesecure(db, rbtdb->origin_node);
5704 deleterdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5705 dns_rdatatype_t type, dns_rdatatype_t covers)
5707 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5708 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5709 rbtdb_version_t *rbtversion = version;
5710 isc_result_t result;
5711 rdatasetheader_t *newheader;
5713 REQUIRE(VALID_RBTDB(rbtdb));
5715 if (type == dns_rdatatype_any)
5716 return (ISC_R_NOTIMPLEMENTED);
5717 if (type == dns_rdatatype_rrsig && covers == 0)
5718 return (ISC_R_NOTIMPLEMENTED);
5720 newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
5721 if (newheader == NULL)
5722 return (ISC_R_NOMEMORY);
5723 set_ttl(rbtdb, newheader, 0);
5724 newheader->type = RBTDB_RDATATYPE_VALUE(type, covers);
5725 newheader->attributes = RDATASET_ATTR_NONEXISTENT;
5726 newheader->trust = 0;
5727 newheader->noqname = NULL;
5728 newheader->additional_auth = NULL;
5729 newheader->additional_glue = NULL;
5730 if (rbtversion != NULL)
5731 newheader->serial = rbtversion->serial;
5733 newheader->serial = 0;
5734 newheader->count = 0;
5735 newheader->last_used = 0;
5736 newheader->node = rbtnode;
5738 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5739 isc_rwlocktype_write);
5741 result = add(rbtdb, rbtnode, rbtversion, newheader, DNS_DBADD_FORCE,
5742 ISC_FALSE, NULL, 0);
5744 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5745 isc_rwlocktype_write);
5748 * Update the zone's secure status. If version is non-NULL
5749 * this is deferred until closeversion() is called.
5751 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
5752 rbtdb->secure = iszonesecure(db, rbtdb->origin_node);
5758 loading_addrdataset(void *arg, dns_name_t *name, dns_rdataset_t *rdataset) {
5759 rbtdb_load_t *loadctx = arg;
5760 dns_rbtdb_t *rbtdb = loadctx->rbtdb;
5761 dns_rbtnode_t *node;
5762 isc_result_t result;
5763 isc_region_t region;
5764 rdatasetheader_t *newheader;
5767 * This routine does no node locking. See comments in
5768 * 'load' below for more information on loading and
5774 * SOA records are only allowed at top of zone.
5776 if (rdataset->type == dns_rdatatype_soa &&
5777 !IS_CACHE(rbtdb) && !dns_name_equal(name, &rbtdb->common.origin))
5778 return (DNS_R_NOTZONETOP);
5780 add_empty_wildcards(rbtdb, name);
5782 if (dns_name_iswildcard(name)) {
5784 * NS record owners cannot legally be wild cards.
5786 if (rdataset->type == dns_rdatatype_ns)
5787 return (DNS_R_INVALIDNS);
5788 result = add_wildcard_magic(rbtdb, name);
5789 if (result != ISC_R_SUCCESS)
5794 result = dns_rbt_addnode(rbtdb->tree, name, &node);
5795 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
5797 if (result != ISC_R_EXISTS) {
5798 dns_name_t foundname;
5799 dns_name_init(&foundname, NULL);
5800 dns_rbt_namefromnode(node, &foundname);
5801 #ifdef DNS_RBT_USEHASH
5802 node->locknum = node->hashval % rbtdb->node_lock_count;
5804 node->locknum = dns_name_hash(&foundname, ISC_TRUE) %
5805 rbtdb->node_lock_count;
5809 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
5811 sizeof(rdatasetheader_t));
5812 if (result != ISC_R_SUCCESS)
5814 newheader = (rdatasetheader_t *)region.base;
5815 init_rdataset(rbtdb, newheader);
5816 set_ttl(rbtdb, newheader,
5817 rdataset->ttl + loadctx->now); /* XXX overflow check */
5818 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
5820 newheader->attributes = 0;
5821 newheader->trust = rdataset->trust;
5822 newheader->serial = 1;
5823 newheader->noqname = NULL;
5824 newheader->count = init_count++;
5825 newheader->additional_auth = NULL;
5826 newheader->additional_glue = NULL;
5827 /* won't be used, but initialize anyway */
5828 newheader->last_used = 0;
5829 newheader->node = node;
5831 result = add(rbtdb, node, rbtdb->current_version, newheader,
5832 DNS_DBADD_MERGE, ISC_TRUE, NULL, 0);
5833 if (result == ISC_R_SUCCESS &&
5834 delegating_type(rbtdb, node, rdataset->type))
5835 node->find_callback = 1;
5836 else if (result == DNS_R_UNCHANGED)
5837 result = ISC_R_SUCCESS;
5843 beginload(dns_db_t *db, dns_addrdatasetfunc_t *addp, dns_dbload_t **dbloadp) {
5844 rbtdb_load_t *loadctx;
5847 rbtdb = (dns_rbtdb_t *)db;
5849 REQUIRE(VALID_RBTDB(rbtdb));
5851 loadctx = isc_mem_get(rbtdb->common.mctx, sizeof(*loadctx));
5852 if (loadctx == NULL)
5853 return (ISC_R_NOMEMORY);
5855 loadctx->rbtdb = rbtdb;
5856 if (IS_CACHE(rbtdb))
5857 isc_stdtime_get(&loadctx->now);
5861 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
5863 REQUIRE((rbtdb->attributes & (RBTDB_ATTR_LOADED|RBTDB_ATTR_LOADING))
5865 rbtdb->attributes |= RBTDB_ATTR_LOADING;
5867 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
5869 *addp = loading_addrdataset;
5872 return (ISC_R_SUCCESS);
5876 endload(dns_db_t *db, dns_dbload_t **dbloadp) {
5877 rbtdb_load_t *loadctx;
5878 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5880 REQUIRE(VALID_RBTDB(rbtdb));
5881 REQUIRE(dbloadp != NULL);
5883 REQUIRE(loadctx->rbtdb == rbtdb);
5885 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
5887 REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADING) != 0);
5888 REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADED) == 0);
5890 rbtdb->attributes &= ~RBTDB_ATTR_LOADING;
5891 rbtdb->attributes |= RBTDB_ATTR_LOADED;
5893 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
5896 * If there's a KEY rdataset at the zone origin containing a
5897 * zone key, we consider the zone secure.
5899 if (! IS_CACHE(rbtdb))
5900 rbtdb->secure = iszonesecure(db, rbtdb->origin_node);
5904 isc_mem_put(rbtdb->common.mctx, loadctx, sizeof(*loadctx));
5906 return (ISC_R_SUCCESS);
5910 dump(dns_db_t *db, dns_dbversion_t *version, const char *filename,
5911 dns_masterformat_t masterformat) {
5914 rbtdb = (dns_rbtdb_t *)db;
5916 REQUIRE(VALID_RBTDB(rbtdb));
5918 return (dns_master_dump2(rbtdb->common.mctx, db, version,
5919 &dns_master_style_default,
5920 filename, masterformat));
5924 delete_callback(void *data, void *arg) {
5925 dns_rbtdb_t *rbtdb = arg;
5926 rdatasetheader_t *current, *next;
5928 for (current = data; current != NULL; current = next) {
5929 next = current->next;
5930 free_rdataset(rbtdb, rbtdb->common.mctx, current);
5934 static isc_boolean_t
5935 issecure(dns_db_t *db) {
5937 isc_boolean_t secure;
5939 rbtdb = (dns_rbtdb_t *)db;
5941 REQUIRE(VALID_RBTDB(rbtdb));
5943 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
5944 secure = rbtdb->secure;
5945 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
5951 nodecount(dns_db_t *db) {
5955 rbtdb = (dns_rbtdb_t *)db;
5957 REQUIRE(VALID_RBTDB(rbtdb));
5959 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
5960 count = dns_rbt_nodecount(rbtdb->tree);
5961 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
5967 settask(dns_db_t *db, isc_task_t *task) {
5970 rbtdb = (dns_rbtdb_t *)db;
5972 REQUIRE(VALID_RBTDB(rbtdb));
5974 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
5975 if (rbtdb->task != NULL)
5976 isc_task_detach(&rbtdb->task);
5978 isc_task_attach(task, &rbtdb->task);
5979 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
5982 static isc_boolean_t
5983 ispersistent(dns_db_t *db) {
5989 getoriginnode(dns_db_t *db, dns_dbnode_t **nodep) {
5990 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5991 dns_rbtnode_t *onode;
5992 isc_result_t result = ISC_R_SUCCESS;
5994 REQUIRE(VALID_RBTDB(rbtdb));
5995 REQUIRE(nodep != NULL && *nodep == NULL);
5997 /* Note that the access to origin_node doesn't require a DB lock */
5998 onode = (dns_rbtnode_t *)rbtdb->origin_node;
5999 if (onode != NULL) {
6000 NODE_STRONGLOCK(&rbtdb->node_locks[onode->locknum].lock);
6001 new_reference(rbtdb, onode);
6002 NODE_STRONGUNLOCK(&rbtdb->node_locks[onode->locknum].lock);
6004 *nodep = rbtdb->origin_node;
6006 INSIST(!IS_CACHE(rbtdb));
6007 result = ISC_R_NOTFOUND;
6013 static dns_stats_t *
6014 getrrsetstats(dns_db_t *db) {
6015 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6017 REQUIRE(VALID_RBTDB(rbtdb));
6018 REQUIRE(IS_CACHE(rbtdb)); /* current restriction */
6020 return (rbtdb->rrsetstats);
6023 static dns_dbmethods_t zone_methods = {
6056 static dns_dbmethods_t cache_methods = {
6090 #ifdef DNS_RBTDB_VERSION64
6095 (isc_mem_t *mctx, dns_name_t *origin, dns_dbtype_t type,
6096 dns_rdataclass_t rdclass, unsigned int argc, char *argv[],
6097 void *driverarg, dns_db_t **dbp)
6100 isc_result_t result;
6104 /* Keep the compiler happy. */
6109 rbtdb = isc_mem_get(mctx, sizeof(*rbtdb));
6111 return (ISC_R_NOMEMORY);
6113 memset(rbtdb, '\0', sizeof(*rbtdb));
6114 dns_name_init(&rbtdb->common.origin, NULL);
6115 rbtdb->common.attributes = 0;
6116 if (type == dns_dbtype_cache) {
6117 rbtdb->common.methods = &cache_methods;
6118 rbtdb->common.attributes |= DNS_DBATTR_CACHE;
6119 } else if (type == dns_dbtype_stub) {
6120 rbtdb->common.methods = &zone_methods;
6121 rbtdb->common.attributes |= DNS_DBATTR_STUB;
6123 rbtdb->common.methods = &zone_methods;
6124 rbtdb->common.rdclass = rdclass;
6125 rbtdb->common.mctx = NULL;
6127 result = RBTDB_INITLOCK(&rbtdb->lock);
6128 if (result != ISC_R_SUCCESS)
6131 result = isc_rwlock_init(&rbtdb->tree_lock, 0, 0);
6132 if (result != ISC_R_SUCCESS)
6136 * Initialize node_lock_count in a generic way to support future
6137 * extension which allows the user to specify this value on creation.
6138 * Note that when specified for a cache DB it must be larger than 1
6139 * as commented with the definition of DEFAULT_CACHE_NODE_LOCK_COUNT.
6141 if (rbtdb->node_lock_count == 0) {
6142 if (IS_CACHE(rbtdb))
6143 rbtdb->node_lock_count = DEFAULT_CACHE_NODE_LOCK_COUNT;
6145 rbtdb->node_lock_count = DEFAULT_NODE_LOCK_COUNT;
6146 } else if (rbtdb->node_lock_count < 2 && IS_CACHE(rbtdb)) {
6147 result = ISC_R_RANGE;
6148 goto cleanup_tree_lock;
6150 INSIST(rbtdb->node_lock_count < (1 << DNS_RBT_LOCKLENGTH));
6151 rbtdb->node_locks = isc_mem_get(mctx, rbtdb->node_lock_count *
6152 sizeof(rbtdb_nodelock_t));
6153 if (rbtdb->node_locks == NULL) {
6154 result = ISC_R_NOMEMORY;
6155 goto cleanup_tree_lock;
6158 rbtdb->rrsetstats = NULL;
6159 if (IS_CACHE(rbtdb)) {
6160 result = dns_rdatasetstats_create(mctx, &rbtdb->rrsetstats);
6161 if (result != ISC_R_SUCCESS)
6162 goto cleanup_node_locks;
6163 rbtdb->rdatasets = isc_mem_get(mctx, rbtdb->node_lock_count *
6164 sizeof(rdatasetheaderlist_t));
6165 if (rbtdb->rdatasets == NULL) {
6166 result = ISC_R_NOMEMORY;
6167 goto cleanup_rrsetstats;
6169 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
6170 ISC_LIST_INIT(rbtdb->rdatasets[i]);
6175 rbtdb->heaps = isc_mem_get(mctx, rbtdb->node_lock_count *
6176 sizeof(isc_heap_t *));
6177 if (rbtdb->heaps == NULL) {
6178 result = ISC_R_NOMEMORY;
6179 goto cleanup_rdatasets;
6181 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
6182 rbtdb->heaps[i] = NULL;
6183 for (i = 0; i < (int)rbtdb->node_lock_count; i++) {
6184 result = isc_heap_create(mctx, ttl_sooner,
6187 if (result != ISC_R_SUCCESS)
6191 rbtdb->rdatasets = NULL;
6192 rbtdb->heaps = NULL;
6195 rbtdb->deadnodes = isc_mem_get(mctx, rbtdb->node_lock_count *
6196 sizeof(rbtnodelist_t));
6197 if (rbtdb->deadnodes == NULL) {
6198 result = ISC_R_NOMEMORY;
6201 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
6202 ISC_LIST_INIT(rbtdb->deadnodes[i]);
6204 rbtdb->active = rbtdb->node_lock_count;
6206 for (i = 0; i < (int)(rbtdb->node_lock_count); i++) {
6207 result = NODE_INITLOCK(&rbtdb->node_locks[i].lock);
6208 if (result == ISC_R_SUCCESS) {
6209 result = isc_refcount_init(&rbtdb->node_locks[i].references, 0);
6210 if (result != ISC_R_SUCCESS)
6211 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
6213 if (result != ISC_R_SUCCESS) {
6215 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
6216 isc_refcount_decrement(&rbtdb->node_locks[i].references, NULL);
6217 isc_refcount_destroy(&rbtdb->node_locks[i].references);
6219 goto cleanup_deadnodes;
6221 rbtdb->node_locks[i].exiting = ISC_FALSE;
6225 * Attach to the mctx. The database will persist so long as there
6226 * are references to it, and attaching to the mctx ensures that our
6227 * mctx won't disappear out from under us.
6229 isc_mem_attach(mctx, &rbtdb->common.mctx);
6232 * Must be initialized before free_rbtdb() is called.
6234 isc_ondestroy_init(&rbtdb->common.ondest);
6237 * Make a copy of the origin name.
6239 result = dns_name_dupwithoffsets(origin, mctx, &rbtdb->common.origin);
6240 if (result != ISC_R_SUCCESS) {
6241 free_rbtdb(rbtdb, ISC_FALSE, NULL);
6246 * Make the Red-Black Tree.
6248 result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->tree);
6249 if (result != ISC_R_SUCCESS) {
6250 free_rbtdb(rbtdb, ISC_FALSE, NULL);
6254 * In order to set the node callback bit correctly in zone databases,
6255 * we need to know if the node has the origin name of the zone.
6256 * In loading_addrdataset() we could simply compare the new name
6257 * to the origin name, but this is expensive. Also, we don't know the
6258 * node name in addrdataset(), so we need another way of knowing the
6261 * We now explicitly create a node for the zone's origin, and then
6262 * we simply remember the node's address. This is safe, because
6263 * the top-of-zone node can never be deleted, nor can its address
6266 if (!IS_CACHE(rbtdb)) {
6267 rbtdb->origin_node = NULL;
6268 result = dns_rbt_addnode(rbtdb->tree, &rbtdb->common.origin,
6269 &rbtdb->origin_node);
6270 if (result != ISC_R_SUCCESS) {
6271 INSIST(result != ISC_R_EXISTS);
6272 free_rbtdb(rbtdb, ISC_FALSE, NULL);
6276 * We need to give the origin node the right locknum.
6278 dns_name_init(&name, NULL);
6279 dns_rbt_namefromnode(rbtdb->origin_node, &name);
6280 #ifdef DNS_RBT_USEHASH
6281 rbtdb->origin_node->locknum =
6282 rbtdb->origin_node->hashval %
6283 rbtdb->node_lock_count;
6285 rbtdb->origin_node->locknum =
6286 dns_name_hash(&name, ISC_TRUE) %
6287 rbtdb->node_lock_count;
6292 * Misc. Initialization.
6294 result = isc_refcount_init(&rbtdb->references, 1);
6295 if (result != ISC_R_SUCCESS) {
6296 free_rbtdb(rbtdb, ISC_FALSE, NULL);
6299 rbtdb->attributes = 0;
6300 rbtdb->secure = ISC_FALSE;
6301 rbtdb->overmem = ISC_FALSE;
6305 * Version Initialization.
6307 rbtdb->current_serial = 1;
6308 rbtdb->least_serial = 1;
6309 rbtdb->next_serial = 2;
6310 rbtdb->current_version = allocate_version(mctx, 1, 1, ISC_FALSE);
6311 if (rbtdb->current_version == NULL) {
6312 isc_refcount_decrement(&rbtdb->references, NULL);
6313 isc_refcount_destroy(&rbtdb->references);
6314 free_rbtdb(rbtdb, ISC_FALSE, NULL);
6315 return (ISC_R_NOMEMORY);
6317 rbtdb->future_version = NULL;
6318 ISC_LIST_INIT(rbtdb->open_versions);
6320 * Keep the current version in the open list so that list operation
6321 * won't happen in normal lookup operations.
6323 PREPEND(rbtdb->open_versions, rbtdb->current_version, link);
6325 rbtdb->common.magic = DNS_DB_MAGIC;
6326 rbtdb->common.impmagic = RBTDB_MAGIC;
6328 *dbp = (dns_db_t *)rbtdb;
6330 return (ISC_R_SUCCESS);
6333 isc_mem_put(mctx, rbtdb->deadnodes,
6334 rbtdb->node_lock_count * sizeof(rbtnodelist_t));
6337 if (rbtdb->heaps != NULL) {
6338 for (i = 0 ; i < (int)rbtdb->node_lock_count ; i++)
6339 if (rbtdb->heaps[i] != NULL)
6340 isc_heap_destroy(&rbtdb->heaps[i]);
6341 isc_mem_put(mctx, rbtdb->heaps,
6342 rbtdb->node_lock_count * sizeof(isc_heap_t *));
6346 if (rbtdb->rdatasets != NULL)
6347 isc_mem_put(mctx, rbtdb->rdatasets, rbtdb->node_lock_count *
6348 sizeof(rdatasetheaderlist_t));
6350 if (rbtdb->rrsetstats != NULL)
6351 dns_stats_detach(&rbtdb->rrsetstats);
6354 isc_mem_put(mctx, rbtdb->node_locks,
6355 rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
6358 isc_rwlock_destroy(&rbtdb->tree_lock);
6361 RBTDB_DESTROYLOCK(&rbtdb->lock);
6364 isc_mem_put(mctx, rbtdb, sizeof(*rbtdb));
6370 * Slabbed Rdataset Methods
6374 rdataset_disassociate(dns_rdataset_t *rdataset) {
6375 dns_db_t *db = rdataset->private1;
6376 dns_dbnode_t *node = rdataset->private2;
6378 detachnode(db, &node);
6382 rdataset_first(dns_rdataset_t *rdataset) {
6383 unsigned char *raw = rdataset->private3; /* RDATASLAB */
6386 count = raw[0] * 256 + raw[1];
6388 rdataset->private5 = NULL;
6389 return (ISC_R_NOMORE);
6392 #if DNS_RDATASET_FIXED
6393 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0)
6394 raw += 2 + (4 * count);
6400 * The privateuint4 field is the number of rdata beyond the
6401 * cursor position, so we decrement the total count by one
6402 * before storing it.
6404 * If DNS_RDATASETATTR_LOADORDER is not set 'raw' points to the
6405 * first record. If DNS_RDATASETATTR_LOADORDER is set 'raw' points
6406 * to the first entry in the offset table.
6409 rdataset->privateuint4 = count;
6410 rdataset->private5 = raw;
6412 return (ISC_R_SUCCESS);
6416 rdataset_next(dns_rdataset_t *rdataset) {
6418 unsigned int length;
6419 unsigned char *raw; /* RDATASLAB */
6421 count = rdataset->privateuint4;
6423 return (ISC_R_NOMORE);
6425 rdataset->privateuint4 = count;
6428 * Skip forward one record (length + 4) or one offset (4).
6430 raw = rdataset->private5;
6431 #if DNS_RDATASET_FIXED
6432 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0) {
6434 length = raw[0] * 256 + raw[1];
6436 #if DNS_RDATASET_FIXED
6438 rdataset->private5 = raw + 4; /* length(2) + order(2) */
6440 rdataset->private5 = raw + 2; /* length(2) */
6443 return (ISC_R_SUCCESS);
6447 rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata) {
6448 unsigned char *raw = rdataset->private5; /* RDATASLAB */
6449 #if DNS_RDATASET_FIXED
6450 unsigned int offset;
6454 REQUIRE(raw != NULL);
6457 * Find the start of the record if not already in private5
6458 * then skip the length and order fields.
6460 #if DNS_RDATASET_FIXED
6461 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) != 0) {
6462 offset = (raw[0] << 24) + (raw[1] << 16) +
6463 (raw[2] << 8) + raw[3];
6464 raw = rdataset->private3;
6468 r.length = raw[0] * 256 + raw[1];
6470 #if DNS_RDATASET_FIXED
6476 dns_rdata_fromregion(rdata, rdataset->rdclass, rdataset->type, &r);
6480 rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target) {
6481 dns_db_t *db = source->private1;
6482 dns_dbnode_t *node = source->private2;
6483 dns_dbnode_t *cloned_node = NULL;
6485 attachnode(db, node, &cloned_node);
6489 * Reset iterator state.
6491 target->privateuint4 = 0;
6492 target->private5 = NULL;
6496 rdataset_count(dns_rdataset_t *rdataset) {
6497 unsigned char *raw = rdataset->private3; /* RDATASLAB */
6500 count = raw[0] * 256 + raw[1];
6506 rdataset_getnoqname(dns_rdataset_t *rdataset, dns_name_t *name,
6507 dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
6509 dns_db_t *db = rdataset->private1;
6510 dns_dbnode_t *node = rdataset->private2;
6511 dns_dbnode_t *cloned_node;
6512 struct noqname *noqname = rdataset->private6;
6515 attachnode(db, node, &cloned_node);
6516 nsec->methods = &rdataset_methods;
6517 nsec->rdclass = db->rdclass;
6518 nsec->type = dns_rdatatype_nsec;
6520 nsec->ttl = rdataset->ttl;
6521 nsec->trust = rdataset->trust;
6522 nsec->private1 = rdataset->private1;
6523 nsec->private2 = rdataset->private2;
6524 nsec->private3 = noqname->nsec;
6525 nsec->privateuint4 = 0;
6526 nsec->private5 = NULL;
6527 nsec->private6 = NULL;
6530 attachnode(db, node, &cloned_node);
6531 nsecsig->methods = &rdataset_methods;
6532 nsecsig->rdclass = db->rdclass;
6533 nsecsig->type = dns_rdatatype_rrsig;
6534 nsecsig->covers = dns_rdatatype_nsec;
6535 nsecsig->ttl = rdataset->ttl;
6536 nsecsig->trust = rdataset->trust;
6537 nsecsig->private1 = rdataset->private1;
6538 nsecsig->private2 = rdataset->private2;
6539 nsecsig->private3 = noqname->nsecsig;
6540 nsecsig->privateuint4 = 0;
6541 nsecsig->private5 = NULL;
6542 nsec->private6 = NULL;
6544 dns_name_clone(&noqname->name, name);
6546 return (ISC_R_SUCCESS);
6550 * Rdataset Iterator Methods
6554 rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp) {
6555 rbtdb_rdatasetiter_t *rbtiterator;
6557 rbtiterator = (rbtdb_rdatasetiter_t *)(*iteratorp);
6559 if (rbtiterator->common.version != NULL)
6560 closeversion(rbtiterator->common.db,
6561 &rbtiterator->common.version, ISC_FALSE);
6562 detachnode(rbtiterator->common.db, &rbtiterator->common.node);
6563 isc_mem_put(rbtiterator->common.db->mctx, rbtiterator,
6564 sizeof(*rbtiterator));
6570 rdatasetiter_first(dns_rdatasetiter_t *iterator) {
6571 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
6572 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
6573 dns_rbtnode_t *rbtnode = rbtiterator->common.node;
6574 rbtdb_version_t *rbtversion = rbtiterator->common.version;
6575 rdatasetheader_t *header, *top_next;
6576 rbtdb_serial_t serial;
6579 if (IS_CACHE(rbtdb)) {
6581 now = rbtiterator->common.now;
6583 serial = rbtversion->serial;
6587 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6588 isc_rwlocktype_read);
6590 for (header = rbtnode->data; header != NULL; header = top_next) {
6591 top_next = header->next;
6593 if (header->serial <= serial && !IGNORE(header)) {
6595 * Is this a "this rdataset doesn't exist"
6596 * record? Or is it too old in the cache?
6598 * Note: unlike everywhere else, we
6599 * check for now > header->ttl instead
6600 * of now >= header->ttl. This allows
6601 * ANY and RRSIG queries for 0 TTL
6602 * rdatasets to work.
6604 if (NONEXISTENT(header) ||
6605 (now != 0 && now > header->rdh_ttl))
6609 header = header->down;
6610 } while (header != NULL);
6615 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6616 isc_rwlocktype_read);
6618 rbtiterator->current = header;
6621 return (ISC_R_NOMORE);
6623 return (ISC_R_SUCCESS);
6627 rdatasetiter_next(dns_rdatasetiter_t *iterator) {
6628 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
6629 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
6630 dns_rbtnode_t *rbtnode = rbtiterator->common.node;
6631 rbtdb_version_t *rbtversion = rbtiterator->common.version;
6632 rdatasetheader_t *header, *top_next;
6633 rbtdb_serial_t serial;
6635 rbtdb_rdatatype_t type, negtype;
6636 dns_rdatatype_t rdtype, covers;
6638 header = rbtiterator->current;
6640 return (ISC_R_NOMORE);
6642 if (IS_CACHE(rbtdb)) {
6644 now = rbtiterator->common.now;
6646 serial = rbtversion->serial;
6650 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6651 isc_rwlocktype_read);
6653 type = header->type;
6654 rdtype = RBTDB_RDATATYPE_BASE(header->type);
6656 covers = RBTDB_RDATATYPE_EXT(header->type);
6657 negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
6659 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
6660 for (header = header->next; header != NULL; header = top_next) {
6661 top_next = header->next;
6663 * If not walking back up the down list.
6665 if (header->type != type && header->type != negtype) {
6667 if (header->serial <= serial &&
6670 * Is this a "this rdataset doesn't
6673 * Note: unlike everywhere else, we
6674 * check for now > header->ttl instead
6675 * of now >= header->ttl. This allows
6676 * ANY and RRSIG queries for 0 TTL
6677 * rdatasets to work.
6679 if ((header->attributes &
6680 RDATASET_ATTR_NONEXISTENT) != 0 ||
6681 (now != 0 && now > header->rdh_ttl))
6685 header = header->down;
6686 } while (header != NULL);
6692 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6693 isc_rwlocktype_read);
6695 rbtiterator->current = header;
6698 return (ISC_R_NOMORE);
6700 return (ISC_R_SUCCESS);
6704 rdatasetiter_current(dns_rdatasetiter_t *iterator, dns_rdataset_t *rdataset) {
6705 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
6706 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
6707 dns_rbtnode_t *rbtnode = rbtiterator->common.node;
6708 rdatasetheader_t *header;
6710 header = rbtiterator->current;
6711 REQUIRE(header != NULL);
6713 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6714 isc_rwlocktype_read);
6716 bind_rdataset(rbtdb, rbtnode, header, rbtiterator->common.now,
6719 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6720 isc_rwlocktype_read);
6725 * Database Iterator Methods
6729 reference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
6730 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
6731 dns_rbtnode_t *node = rbtdbiter->node;
6736 INSIST(rbtdbiter->tree_locked != isc_rwlocktype_none);
6737 reactivate_node(rbtdb, node, rbtdbiter->tree_locked);
6741 dereference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
6742 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
6743 dns_rbtnode_t *node = rbtdbiter->node;
6749 lock = &rbtdb->node_locks[node->locknum].lock;
6750 NODE_LOCK(lock, isc_rwlocktype_read);
6751 decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
6752 rbtdbiter->tree_locked, ISC_FALSE);
6753 NODE_UNLOCK(lock, isc_rwlocktype_read);
6755 rbtdbiter->node = NULL;
6759 flush_deletions(rbtdb_dbiterator_t *rbtdbiter) {
6760 dns_rbtnode_t *node;
6761 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
6762 isc_boolean_t was_read_locked = ISC_FALSE;
6766 if (rbtdbiter->delete != 0) {
6768 * Note that "%d node of %d in tree" can report things like
6769 * "flush_deletions: 59 nodes of 41 in tree". This means
6770 * That some nodes appear on the deletions list more than
6771 * once. Only the last occurence will actually be deleted.
6773 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
6774 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
6775 "flush_deletions: %d nodes of %d in tree",
6777 dns_rbt_nodecount(rbtdb->tree));
6779 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
6780 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6781 was_read_locked = ISC_TRUE;
6783 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6784 rbtdbiter->tree_locked = isc_rwlocktype_write;
6786 for (i = 0; i < rbtdbiter->delete; i++) {
6787 node = rbtdbiter->deletions[i];
6788 lock = &rbtdb->node_locks[node->locknum].lock;
6790 NODE_LOCK(lock, isc_rwlocktype_read);
6791 decrement_reference(rbtdb, node, 0,
6792 isc_rwlocktype_read,
6793 rbtdbiter->tree_locked, ISC_FALSE);
6794 NODE_UNLOCK(lock, isc_rwlocktype_read);
6797 rbtdbiter->delete = 0;
6799 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6800 if (was_read_locked) {
6801 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6802 rbtdbiter->tree_locked = isc_rwlocktype_read;
6805 rbtdbiter->tree_locked = isc_rwlocktype_none;
6811 resume_iteration(rbtdb_dbiterator_t *rbtdbiter) {
6812 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
6814 REQUIRE(rbtdbiter->paused);
6815 REQUIRE(rbtdbiter->tree_locked == isc_rwlocktype_none);
6817 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6818 rbtdbiter->tree_locked = isc_rwlocktype_read;
6820 rbtdbiter->paused = ISC_FALSE;
6824 dbiterator_destroy(dns_dbiterator_t **iteratorp) {
6825 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)(*iteratorp);
6826 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
6827 dns_db_t *db = NULL;
6829 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
6830 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6831 rbtdbiter->tree_locked = isc_rwlocktype_none;
6833 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_none);
6835 dereference_iter_node(rbtdbiter);
6837 flush_deletions(rbtdbiter);
6839 dns_db_attach(rbtdbiter->common.db, &db);
6840 dns_db_detach(&rbtdbiter->common.db);
6842 dns_rbtnodechain_reset(&rbtdbiter->chain);
6843 isc_mem_put(db->mctx, rbtdbiter, sizeof(*rbtdbiter));
6850 dbiterator_first(dns_dbiterator_t *iterator) {
6851 isc_result_t result;
6852 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
6853 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
6854 dns_name_t *name, *origin;
6856 if (rbtdbiter->result != ISC_R_SUCCESS &&
6857 rbtdbiter->result != ISC_R_NOMORE)
6858 return (rbtdbiter->result);
6860 if (rbtdbiter->paused)
6861 resume_iteration(rbtdbiter);
6863 dereference_iter_node(rbtdbiter);
6865 name = dns_fixedname_name(&rbtdbiter->name);
6866 origin = dns_fixedname_name(&rbtdbiter->origin);
6867 dns_rbtnodechain_reset(&rbtdbiter->chain);
6869 result = dns_rbtnodechain_first(&rbtdbiter->chain, rbtdb->tree, name,
6872 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
6873 result = dns_rbtnodechain_current(&rbtdbiter->chain, NULL,
6874 NULL, &rbtdbiter->node);
6875 if (result == ISC_R_SUCCESS) {
6876 rbtdbiter->new_origin = ISC_TRUE;
6877 reference_iter_node(rbtdbiter);
6880 INSIST(result == ISC_R_NOTFOUND);
6881 result = ISC_R_NOMORE; /* The tree is empty. */
6884 rbtdbiter->result = result;
6890 dbiterator_last(dns_dbiterator_t *iterator) {
6891 isc_result_t result;
6892 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
6893 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
6894 dns_name_t *name, *origin;
6896 if (rbtdbiter->result != ISC_R_SUCCESS &&
6897 rbtdbiter->result != ISC_R_NOMORE)
6898 return (rbtdbiter->result);
6900 if (rbtdbiter->paused)
6901 resume_iteration(rbtdbiter);
6903 dereference_iter_node(rbtdbiter);
6905 name = dns_fixedname_name(&rbtdbiter->name);
6906 origin = dns_fixedname_name(&rbtdbiter->origin);
6907 dns_rbtnodechain_reset(&rbtdbiter->chain);
6909 result = dns_rbtnodechain_last(&rbtdbiter->chain, rbtdb->tree, name,
6911 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
6912 result = dns_rbtnodechain_current(&rbtdbiter->chain, NULL,
6913 NULL, &rbtdbiter->node);
6914 if (result == ISC_R_SUCCESS) {
6915 rbtdbiter->new_origin = ISC_TRUE;
6916 reference_iter_node(rbtdbiter);
6919 INSIST(result == ISC_R_NOTFOUND);
6920 result = ISC_R_NOMORE; /* The tree is empty. */
6923 rbtdbiter->result = result;
6929 dbiterator_seek(dns_dbiterator_t *iterator, dns_name_t *name) {
6930 isc_result_t result;
6931 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
6932 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
6933 dns_name_t *iname, *origin;
6935 if (rbtdbiter->result != ISC_R_SUCCESS &&
6936 rbtdbiter->result != ISC_R_NOMORE)
6937 return (rbtdbiter->result);
6939 if (rbtdbiter->paused)
6940 resume_iteration(rbtdbiter);
6942 dereference_iter_node(rbtdbiter);
6944 iname = dns_fixedname_name(&rbtdbiter->name);
6945 origin = dns_fixedname_name(&rbtdbiter->origin);
6946 dns_rbtnodechain_reset(&rbtdbiter->chain);
6948 result = dns_rbt_findnode(rbtdb->tree, name, NULL, &rbtdbiter->node,
6949 &rbtdbiter->chain, DNS_RBTFIND_EMPTYDATA,
6951 if (result == ISC_R_SUCCESS) {
6952 result = dns_rbtnodechain_current(&rbtdbiter->chain, iname,
6954 if (result == ISC_R_SUCCESS) {
6955 rbtdbiter->new_origin = ISC_TRUE;
6956 reference_iter_node(rbtdbiter);
6959 } else if (result == DNS_R_PARTIALMATCH)
6960 result = ISC_R_NOTFOUND;
6962 rbtdbiter->result = result;
6968 dbiterator_prev(dns_dbiterator_t *iterator) {
6969 isc_result_t result;
6970 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
6971 dns_name_t *name, *origin;
6973 REQUIRE(rbtdbiter->node != NULL);
6975 if (rbtdbiter->result != ISC_R_SUCCESS)
6976 return (rbtdbiter->result);
6978 if (rbtdbiter->paused)
6979 resume_iteration(rbtdbiter);
6981 name = dns_fixedname_name(&rbtdbiter->name);
6982 origin = dns_fixedname_name(&rbtdbiter->origin);
6983 result = dns_rbtnodechain_prev(&rbtdbiter->chain, name, origin);
6985 dereference_iter_node(rbtdbiter);
6987 if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
6988 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
6989 result = dns_rbtnodechain_current(&rbtdbiter->chain, NULL,
6990 NULL, &rbtdbiter->node);
6993 if (result == ISC_R_SUCCESS)
6994 reference_iter_node(rbtdbiter);
6996 rbtdbiter->result = result;
7002 dbiterator_next(dns_dbiterator_t *iterator) {
7003 isc_result_t result;
7004 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7005 dns_name_t *name, *origin;
7007 REQUIRE(rbtdbiter->node != NULL);
7009 if (rbtdbiter->result != ISC_R_SUCCESS)
7010 return (rbtdbiter->result);
7012 if (rbtdbiter->paused)
7013 resume_iteration(rbtdbiter);
7015 name = dns_fixedname_name(&rbtdbiter->name);
7016 origin = dns_fixedname_name(&rbtdbiter->origin);
7017 result = dns_rbtnodechain_next(&rbtdbiter->chain, name, origin);
7019 dereference_iter_node(rbtdbiter);
7021 if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
7022 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
7023 result = dns_rbtnodechain_current(&rbtdbiter->chain, NULL,
7024 NULL, &rbtdbiter->node);
7026 if (result == ISC_R_SUCCESS)
7027 reference_iter_node(rbtdbiter);
7029 rbtdbiter->result = result;
7035 dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep,
7038 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7039 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7040 dns_rbtnode_t *node = rbtdbiter->node;
7041 isc_result_t result;
7042 dns_name_t *nodename = dns_fixedname_name(&rbtdbiter->name);
7043 dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
7045 REQUIRE(rbtdbiter->result == ISC_R_SUCCESS);
7046 REQUIRE(rbtdbiter->node != NULL);
7048 if (rbtdbiter->paused)
7049 resume_iteration(rbtdbiter);
7052 if (rbtdbiter->common.relative_names)
7054 result = dns_name_concatenate(nodename, origin, name, NULL);
7055 if (result != ISC_R_SUCCESS)
7057 if (rbtdbiter->common.relative_names && rbtdbiter->new_origin)
7058 result = DNS_R_NEWORIGIN;
7060 result = ISC_R_SUCCESS;
7062 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
7063 new_reference(rbtdb, node);
7064 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
7066 *nodep = rbtdbiter->node;
7068 if (iterator->cleaning && result == ISC_R_SUCCESS) {
7069 isc_result_t expire_result;
7072 * If the deletion array is full, flush it before trying
7073 * to expire the current node. The current node can't
7074 * fully deleted while the iteration cursor is still on it.
7076 if (rbtdbiter->delete == DELETION_BATCH_MAX)
7077 flush_deletions(rbtdbiter);
7079 expire_result = expirenode(iterator->db, *nodep, 0);
7082 * expirenode() currently always returns success.
7084 if (expire_result == ISC_R_SUCCESS && node->down == NULL) {
7087 rbtdbiter->deletions[rbtdbiter->delete++] = node;
7088 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
7089 dns_rbtnode_refincrement(node, &refs);
7091 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
7099 dbiterator_pause(dns_dbiterator_t *iterator) {
7100 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7101 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7103 if (rbtdbiter->result != ISC_R_SUCCESS &&
7104 rbtdbiter->result != ISC_R_NOMORE)
7105 return (rbtdbiter->result);
7107 if (rbtdbiter->paused)
7108 return (ISC_R_SUCCESS);
7110 rbtdbiter->paused = ISC_TRUE;
7112 if (rbtdbiter->tree_locked != isc_rwlocktype_none) {
7113 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_read);
7114 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7115 rbtdbiter->tree_locked = isc_rwlocktype_none;
7118 flush_deletions(rbtdbiter);
7120 return (ISC_R_SUCCESS);
7124 dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name) {
7125 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7126 dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
7128 if (rbtdbiter->result != ISC_R_SUCCESS)
7129 return (rbtdbiter->result);
7131 return (dns_name_copy(origin, name, NULL));
7135 * Additional cache routines.
7138 rdataset_getadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
7139 dns_rdatatype_t qtype, dns_acache_t *acache,
7140 dns_zone_t **zonep, dns_db_t **dbp,
7141 dns_dbversion_t **versionp, dns_dbnode_t **nodep,
7142 dns_name_t *fname, dns_message_t *msg,
7145 dns_rbtdb_t *rbtdb = rdataset->private1;
7146 dns_rbtnode_t *rbtnode = rdataset->private2;
7147 unsigned char *raw = rdataset->private3; /* RDATASLAB */
7148 unsigned int current_count = rdataset->privateuint4;
7150 rdatasetheader_t *header;
7151 nodelock_t *nodelock;
7152 unsigned int total_count;
7153 acachectl_t *acarray;
7154 dns_acacheentry_t *entry;
7155 isc_result_t result;
7157 UNUSED(qtype); /* we do not use this value at least for now */
7160 header = (struct rdatasetheader *)(raw - sizeof(*header));
7162 total_count = raw[0] * 256 + raw[1];
7163 INSIST(total_count > current_count);
7164 count = total_count - current_count - 1;
7168 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
7169 NODE_LOCK(nodelock, isc_rwlocktype_read);
7172 case dns_rdatasetadditional_fromauth:
7173 acarray = header->additional_auth;
7175 case dns_rdatasetadditional_fromcache:
7178 case dns_rdatasetadditional_fromglue:
7179 acarray = header->additional_glue;
7185 if (acarray == NULL) {
7186 if (type != dns_rdatasetadditional_fromcache)
7187 dns_acache_countquerymiss(acache);
7188 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
7189 return (ISC_R_NOTFOUND);
7192 if (acarray[count].entry == NULL) {
7193 dns_acache_countquerymiss(acache);
7194 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
7195 return (ISC_R_NOTFOUND);
7199 dns_acache_attachentry(acarray[count].entry, &entry);
7201 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
7203 result = dns_acache_getentry(entry, zonep, dbp, versionp,
7204 nodep, fname, msg, now);
7206 dns_acache_detachentry(&entry);
7212 acache_callback(dns_acacheentry_t *entry, void **arg) {
7214 dns_rbtnode_t *rbtnode;
7215 nodelock_t *nodelock;
7216 acachectl_t *acarray = NULL;
7217 acache_cbarg_t *cbarg;
7220 REQUIRE(arg != NULL);
7224 * The caller must hold the entry lock.
7227 rbtdb = (dns_rbtdb_t *)cbarg->db;
7228 rbtnode = (dns_rbtnode_t *)cbarg->node;
7230 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
7231 NODE_LOCK(nodelock, isc_rwlocktype_write);
7233 switch (cbarg->type) {
7234 case dns_rdatasetadditional_fromauth:
7235 acarray = cbarg->header->additional_auth;
7237 case dns_rdatasetadditional_fromglue:
7238 acarray = cbarg->header->additional_glue;
7244 count = cbarg->count;
7245 if (acarray != NULL && acarray[count].entry == entry) {
7246 acarray[count].entry = NULL;
7247 INSIST(acarray[count].cbarg == cbarg);
7248 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
7249 acarray[count].cbarg = NULL;
7251 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
7253 dns_acache_detachentry(&entry);
7255 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
7257 dns_db_detachnode((dns_db_t *)rbtdb, (dns_dbnode_t **)(void*)&rbtnode);
7258 dns_db_detach((dns_db_t **)(void*)&rbtdb);
7264 acache_cancelentry(isc_mem_t *mctx, dns_acacheentry_t *entry,
7265 acache_cbarg_t **cbargp)
7267 acache_cbarg_t *cbarg;
7269 REQUIRE(mctx != NULL);
7270 REQUIRE(entry != NULL);
7271 REQUIRE(cbargp != NULL && *cbargp != NULL);
7275 dns_acache_cancelentry(entry);
7276 dns_db_detachnode(cbarg->db, &cbarg->node);
7277 dns_db_detach(&cbarg->db);
7279 isc_mem_put(mctx, cbarg, sizeof(acache_cbarg_t));
7285 rdataset_setadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
7286 dns_rdatatype_t qtype, dns_acache_t *acache,
7287 dns_zone_t *zone, dns_db_t *db,
7288 dns_dbversion_t *version, dns_dbnode_t *node,
7291 dns_rbtdb_t *rbtdb = rdataset->private1;
7292 dns_rbtnode_t *rbtnode = rdataset->private2;
7293 unsigned char *raw = rdataset->private3; /* RDATASLAB */
7294 unsigned int current_count = rdataset->privateuint4;
7295 rdatasetheader_t *header;
7296 unsigned int total_count, count;
7297 nodelock_t *nodelock;
7298 isc_result_t result;
7299 acachectl_t *acarray;
7300 dns_acacheentry_t *newentry, *oldentry = NULL;
7301 acache_cbarg_t *newcbarg, *oldcbarg = NULL;
7305 if (type == dns_rdatasetadditional_fromcache)
7306 return (ISC_R_SUCCESS);
7308 header = (struct rdatasetheader *)(raw - sizeof(*header));
7310 total_count = raw[0] * 256 + raw[1];
7311 INSIST(total_count > current_count);
7312 count = total_count - current_count - 1; /* should be private data */
7314 newcbarg = isc_mem_get(rbtdb->common.mctx, sizeof(*newcbarg));
7315 if (newcbarg == NULL)
7316 return (ISC_R_NOMEMORY);
7317 newcbarg->type = type;
7318 newcbarg->count = count;
7319 newcbarg->header = header;
7320 newcbarg->db = NULL;
7321 dns_db_attach((dns_db_t *)rbtdb, &newcbarg->db);
7322 newcbarg->node = NULL;
7323 dns_db_attachnode((dns_db_t *)rbtdb, (dns_dbnode_t *)rbtnode,
7326 result = dns_acache_createentry(acache, (dns_db_t *)rbtdb,
7327 acache_callback, newcbarg, &newentry);
7328 if (result != ISC_R_SUCCESS)
7330 /* Set cache data in the new entry. */
7331 result = dns_acache_setentry(acache, newentry, zone, db,
7332 version, node, fname);
7333 if (result != ISC_R_SUCCESS)
7336 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
7337 NODE_LOCK(nodelock, isc_rwlocktype_write);
7341 case dns_rdatasetadditional_fromauth:
7342 acarray = header->additional_auth;
7344 case dns_rdatasetadditional_fromglue:
7345 acarray = header->additional_glue;
7351 if (acarray == NULL) {
7354 acarray = isc_mem_get(rbtdb->common.mctx, total_count *
7355 sizeof(acachectl_t));
7357 if (acarray == NULL) {
7358 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
7362 for (i = 0; i < total_count; i++) {
7363 acarray[i].entry = NULL;
7364 acarray[i].cbarg = NULL;
7368 case dns_rdatasetadditional_fromauth:
7369 header->additional_auth = acarray;
7371 case dns_rdatasetadditional_fromglue:
7372 header->additional_glue = acarray;
7378 if (acarray[count].entry != NULL) {
7380 * Swap the entry. Delay cleaning-up the old entry since
7381 * it would require a node lock.
7383 oldentry = acarray[count].entry;
7384 INSIST(acarray[count].cbarg != NULL);
7385 oldcbarg = acarray[count].cbarg;
7387 acarray[count].entry = newentry;
7388 acarray[count].cbarg = newcbarg;
7390 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
7392 if (oldentry != NULL) {
7393 acache_cancelentry(rbtdb->common.mctx, oldentry, &oldcbarg);
7394 dns_acache_detachentry(&oldentry);
7397 return (ISC_R_SUCCESS);
7400 if (newcbarg != NULL) {
7401 if (newentry != NULL) {
7402 acache_cancelentry(rbtdb->common.mctx, newentry,
7404 dns_acache_detachentry(&newentry);
7406 dns_db_detachnode((dns_db_t *)rbtdb, &newcbarg->node);
7407 dns_db_detach(&newcbarg->db);
7408 isc_mem_put(rbtdb->common.mctx, newcbarg,
7417 rdataset_putadditional(dns_acache_t *acache, dns_rdataset_t *rdataset,
7418 dns_rdatasetadditional_t type, dns_rdatatype_t qtype)
7420 dns_rbtdb_t *rbtdb = rdataset->private1;
7421 dns_rbtnode_t *rbtnode = rdataset->private2;
7422 unsigned char *raw = rdataset->private3; /* RDATASLAB */
7423 unsigned int current_count = rdataset->privateuint4;
7424 rdatasetheader_t *header;
7425 nodelock_t *nodelock;
7426 unsigned int total_count, count;
7427 acachectl_t *acarray;
7428 dns_acacheentry_t *entry;
7429 acache_cbarg_t *cbarg;
7431 UNUSED(qtype); /* we do not use this value at least for now */
7434 if (type == dns_rdatasetadditional_fromcache)
7435 return (ISC_R_SUCCESS);
7437 header = (struct rdatasetheader *)(raw - sizeof(*header));
7439 total_count = raw[0] * 256 + raw[1];
7440 INSIST(total_count > current_count);
7441 count = total_count - current_count - 1;
7446 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
7447 NODE_LOCK(nodelock, isc_rwlocktype_write);
7450 case dns_rdatasetadditional_fromauth:
7451 acarray = header->additional_auth;
7453 case dns_rdatasetadditional_fromglue:
7454 acarray = header->additional_glue;
7460 if (acarray == NULL) {
7461 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
7462 return (ISC_R_NOTFOUND);
7465 entry = acarray[count].entry;
7466 if (entry == NULL) {
7467 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
7468 return (ISC_R_NOTFOUND);
7471 acarray[count].entry = NULL;
7472 cbarg = acarray[count].cbarg;
7473 acarray[count].cbarg = NULL;
7475 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
7477 if (entry != NULL) {
7479 acache_cancelentry(rbtdb->common.mctx, entry, &cbarg);
7480 dns_acache_detachentry(&entry);
7483 return (ISC_R_SUCCESS);
7487 * Routines for LRU-based cache management.
7491 * See if a given cache entry that is being reused needs to be updated
7492 * in the LRU-list. From the LRU management point of view, this function is
7493 * expected to return true for almost all cases. When used with threads,
7494 * however, this may cause a non-negligible performance penalty because a
7495 * writer lock will have to be acquired before updating the list.
7496 * If DNS_RBTDB_LIMITLRUUPDATE is defined to be non 0 at compilation time, this
7497 * function returns true if the entry has not been updated for some period of
7498 * time. We differentiate the NS or glue address case and the others since
7499 * experiments have shown that the former tends to be accessed relatively
7500 * infrequently and the cost of cache miss is higher (e.g., a missing NS records
7501 * may cause external queries at a higher level zone, involving more
7504 * Caller must hold the node (read or write) lock.
7506 static inline isc_boolean_t
7507 need_headerupdate(rdatasetheader_t *header, isc_stdtime_t now) {
7508 if ((header->attributes &
7509 (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0)
7512 #if DNS_RBTDB_LIMITLRUUPDATE
7513 if (header->type == dns_rdatatype_ns ||
7514 (header->trust == dns_trust_glue &&
7515 (header->type == dns_rdatatype_a ||
7516 header->type == dns_rdatatype_aaaa))) {
7518 * Glue records are updated if at least 60 seconds have passed
7519 * since the previous update time.
7521 return (header->last_used + 60 <= now);
7524 /* Other records are updated if 5 minutes have passed. */
7525 return (header->last_used + 300 <= now);
7534 * Update the timestamp of a given cache entry and move it to the head
7535 * of the corresponding LRU list.
7537 * Caller must hold the node (write) lock.
7539 * Note that the we do NOT touch the heap here, as the TTL has not changed.
7542 update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
7545 /* To be checked: can we really assume this? XXXMLG */
7546 INSIST(ISC_LINK_LINKED(header, lru_link));
7548 ISC_LIST_UNLINK(rbtdb->rdatasets[header->node->locknum],
7550 header->last_used = now;
7551 ISC_LIST_PREPEND(rbtdb->rdatasets[header->node->locknum],
7556 * Purge some expired and/or stale (i.e. unused for some period) cache entries
7557 * under an overmem condition. To recover from this condition quickly, up to
7558 * 2 entries will be purged. This process is triggered while adding a new
7559 * entry, and we specifically avoid purging entries in the same LRU bucket as
7560 * the one to which the new entry will belong. Otherwise, we might purge
7561 * entries of the same name of different RR types while adding RRsets from a
7562 * single response (consider the case where we're adding A and AAAA glue records
7563 * of the same NS name).
7566 overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
7567 isc_stdtime_t now, isc_boolean_t tree_locked)
7569 rdatasetheader_t *header, *header_prev;
7570 unsigned int locknum;
7573 for (locknum = (locknum_start + 1) % rbtdb->node_lock_count;
7574 locknum != locknum_start && purgecount > 0;
7575 locknum = (locknum + 1) % rbtdb->node_lock_count) {
7576 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
7577 isc_rwlocktype_write);
7579 header = isc_heap_element(rbtdb->heaps[locknum], 1);
7580 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL) {
7581 expire_header(rbtdb, header, tree_locked);
7585 for (header = ISC_LIST_TAIL(rbtdb->rdatasets[locknum]);
7586 header != NULL && purgecount > 0;
7587 header = header_prev) {
7588 header_prev = ISC_LIST_PREV(header, lru_link);
7590 * Unlink the entry at this point to avoid checking it
7591 * again even if it's currently used someone else and
7592 * cannot be purged at this moment. This entry won't be
7593 * referenced any more (so unlinking is safe) since the
7594 * TTL was reset to 0.
7596 ISC_LIST_UNLINK(rbtdb->rdatasets[locknum], header,
7598 expire_header(rbtdb, header, tree_locked);
7602 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
7603 isc_rwlocktype_write);
7608 expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
7609 isc_boolean_t tree_locked)
7611 set_ttl(rbtdb, header, 0);
7612 header->attributes |= RDATASET_ATTR_STALE;
7613 header->node->dirty = 1;
7616 * Caller must hold the node (write) lock.
7619 if (dns_rbtnode_refcurrent(header->node) == 0) {
7621 * If no one else is using the node, we can clean it up now.
7622 * We first need to gain a new reference to the node to meet a
7623 * requirement of decrement_reference().
7625 new_reference(rbtdb, header->node);
7626 decrement_reference(rbtdb, header->node, 0,
7627 isc_rwlocktype_write,
7628 tree_locked ? isc_rwlocktype_write :
7629 isc_rwlocktype_none, ISC_FALSE);