2 * Copyright (C) 2004-2010 Internet Systems Consortium, Inc. ("ISC")
3 * Copyright (C) 1999-2003 Internet Software Consortium.
5 * Permission to use, copy, modify, and/or distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15 * PERFORMANCE OF THIS SOFTWARE.
18 /* $Id: rbtdb.c,v 1.248.12.18.2.5 2010/02/26 00:26:54 marka Exp $ */
23 * Principal Author: Bob Halley
29 #include <isc/event.h>
31 #include <isc/platform.h>
32 #include <isc/print.h>
33 #include <isc/mutex.h>
34 #include <isc/random.h>
35 #include <isc/refcount.h>
36 #include <isc/rwlock.h>
37 #include <isc/string.h>
42 #include <dns/acache.h>
44 #include <dns/dbiterator.h>
45 #include <dns/events.h>
46 #include <dns/fixedname.h>
49 #include <dns/masterdump.h>
51 #include <dns/rdata.h>
52 #include <dns/rdataset.h>
53 #include <dns/rdatasetiter.h>
54 #include <dns/rdataslab.h>
55 #include <dns/result.h>
56 #include <dns/stats.h>
59 #include <dns/zonekey.h>
61 #ifdef DNS_RBTDB_VERSION64
67 #ifdef DNS_RBTDB_VERSION64
68 #define RBTDB_MAGIC ISC_MAGIC('R', 'B', 'D', '8')
70 #define RBTDB_MAGIC ISC_MAGIC('R', 'B', 'D', '4')
74 * Note that "impmagic" is not the first four bytes of the struct, so
75 * ISC_MAGIC_VALID cannot be used.
77 #define VALID_RBTDB(rbtdb) ((rbtdb) != NULL && \
78 (rbtdb)->common.impmagic == RBTDB_MAGIC)
80 #ifdef DNS_RBTDB_VERSION64
81 typedef isc_uint64_t rbtdb_serial_t;
83 * Make casting easier in symbolic debuggers by using different names
84 * for the 64 bit version.
86 #define dns_rbtdb_t dns_rbtdb64_t
87 #define rdatasetheader_t rdatasetheader64_t
88 #define rbtdb_version_t rbtdb_version64_t
90 typedef isc_uint32_t rbtdb_serial_t;
93 typedef isc_uint32_t rbtdb_rdatatype_t;
95 #define RBTDB_RDATATYPE_BASE(type) ((dns_rdatatype_t)((type) & 0xFFFF))
96 #define RBTDB_RDATATYPE_EXT(type) ((dns_rdatatype_t)((type) >> 16))
97 #define RBTDB_RDATATYPE_VALUE(b, e) (((e) << 16) | (b))
99 #define RBTDB_RDATATYPE_SIGNSEC \
100 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec)
101 #define RBTDB_RDATATYPE_SIGNS \
102 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ns)
103 #define RBTDB_RDATATYPE_SIGCNAME \
104 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_cname)
105 #define RBTDB_RDATATYPE_SIGDNAME \
106 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_dname)
107 #define RBTDB_RDATATYPE_NCACHEANY \
108 RBTDB_RDATATYPE_VALUE(0, dns_rdatatype_any)
111 * We use rwlock for DB lock only when ISC_RWLOCK_USEATOMIC is non 0.
112 * Using rwlock is effective with regard to lookup performance only when
113 * it is implemented in an efficient way.
114 * Otherwise, it is generally wise to stick to the simple locking since rwlock
115 * would require more memory or can even make lookups slower due to its own
116 * overhead (when it internally calls mutex locks).
118 #ifdef ISC_RWLOCK_USEATOMIC
119 #define DNS_RBTDB_USERWLOCK 1
121 #define DNS_RBTDB_USERWLOCK 0
124 #if DNS_RBTDB_USERWLOCK
125 #define RBTDB_INITLOCK(l) isc_rwlock_init((l), 0, 0)
126 #define RBTDB_DESTROYLOCK(l) isc_rwlock_destroy(l)
127 #define RBTDB_LOCK(l, t) RWLOCK((l), (t))
128 #define RBTDB_UNLOCK(l, t) RWUNLOCK((l), (t))
130 #define RBTDB_INITLOCK(l) isc_mutex_init(l)
131 #define RBTDB_DESTROYLOCK(l) DESTROYLOCK(l)
132 #define RBTDB_LOCK(l, t) LOCK(l)
133 #define RBTDB_UNLOCK(l, t) UNLOCK(l)
137 * Since node locking is sensitive to both performance and memory footprint,
138 * we need some trick here. If we have both high-performance rwlock and
139 * high performance and small-memory reference counters, we use rwlock for
140 * node lock and isc_refcount for node references. In this case, we don't have
141 * to protect the access to the counters by locks.
142 * Otherwise, we simply use ordinary mutex lock for node locking, and use
143 * simple integers as reference counters which is protected by the lock.
144 * In most cases, we can simply use wrapper macros such as NODE_LOCK and
145 * NODE_UNLOCK. In some other cases, however, we need to protect reference
146 * counters first and then protect other parts of a node as read-only data.
147 * Special additional macros, NODE_STRONGLOCK(), NODE_WEAKLOCK(), etc, are also
148 * provided for these special cases. When we can use the efficient backend
149 * routines, we should only protect the "other members" by NODE_WEAKLOCK(read).
150 * Otherwise, we should use NODE_STRONGLOCK() to protect the entire critical
151 * section including the access to the reference counter.
152 * Note that we cannot use NODE_LOCK()/NODE_UNLOCK() wherever the protected
153 * section is also protected by NODE_STRONGLOCK().
155 #if defined(ISC_RWLOCK_USEATOMIC) && defined(DNS_RBT_USEISCREFCOUNT)
156 typedef isc_rwlock_t nodelock_t;
158 #define NODE_INITLOCK(l) isc_rwlock_init((l), 0, 0)
159 #define NODE_DESTROYLOCK(l) isc_rwlock_destroy(l)
160 #define NODE_LOCK(l, t) RWLOCK((l), (t))
161 #define NODE_UNLOCK(l, t) RWUNLOCK((l), (t))
162 #define NODE_TRYUPGRADE(l) isc_rwlock_tryupgrade(l)
164 #define NODE_STRONGLOCK(l) ((void)0)
165 #define NODE_STRONGUNLOCK(l) ((void)0)
166 #define NODE_WEAKLOCK(l, t) NODE_LOCK(l, t)
167 #define NODE_WEAKUNLOCK(l, t) NODE_UNLOCK(l, t)
168 #define NODE_WEAKDOWNGRADE(l) isc_rwlock_downgrade(l)
170 typedef isc_mutex_t nodelock_t;
172 #define NODE_INITLOCK(l) isc_mutex_init(l)
173 #define NODE_DESTROYLOCK(l) DESTROYLOCK(l)
174 #define NODE_LOCK(l, t) LOCK(l)
175 #define NODE_UNLOCK(l, t) UNLOCK(l)
176 #define NODE_TRYUPGRADE(l) ISC_R_SUCCESS
178 #define NODE_STRONGLOCK(l) LOCK(l)
179 #define NODE_STRONGUNLOCK(l) UNLOCK(l)
180 #define NODE_WEAKLOCK(l, t) ((void)0)
181 #define NODE_WEAKUNLOCK(l, t) ((void)0)
182 #define NODE_WEAKDOWNGRADE(l) ((void)0)
186 * Whether to rate-limit updating the LRU to avoid possible thread contention.
187 * Our performance measurement has shown the cost is marginal, so it's defined
188 * to be 0 by default either with or without threads.
190 #ifndef DNS_RBTDB_LIMITLRUUPDATE
191 #define DNS_RBTDB_LIMITLRUUPDATE 0
195 * Allow clients with a virtual time of up to 5 minutes in the past to see
196 * records that would have otherwise have expired.
198 #define RBTDB_VIRTUAL 300
206 typedef struct acachectl acachectl_t;
208 typedef struct rdatasetheader {
210 * Locked by the owning node's lock.
212 rbtdb_serial_t serial;
214 rbtdb_rdatatype_t type;
215 isc_uint16_t attributes;
217 struct noqname *noqname;
219 * We don't use the LIST macros, because the LIST structure has
220 * both head and tail pointers, and is doubly linked.
223 struct rdatasetheader *next;
225 * If this is the top header for an rdataset, 'next' points
226 * to the top header for the next rdataset (i.e., the next type).
227 * Otherwise, it points up to the header whose down pointer points
231 struct rdatasetheader *down;
233 * Points to the header for the next older version of
239 * Monotonously increased every time this rdataset is bound so that
240 * it is used as the base of the starting point in DNS responses
241 * when the "cyclic" rrset-order is required. Since the ordering
242 * should not be so crucial, no lock is set for the counter for
243 * performance reasons.
246 acachectl_t *additional_auth;
247 acachectl_t *additional_glue;
250 isc_stdtime_t last_used;
251 ISC_LINK(struct rdatasetheader) lru_link;
253 * Used for LRU-based cache management. We should probably make
254 * these cache-DB specific. We might also make it a pointer and
255 * ensure only the top header has a valid link to save memory.
256 * The linked-list is locked by the rbtdb->lrulock.
260 * It's possible this should not be here anymore, but instead
261 * referenced from the bucket's heap directly.
266 unsigned int heap_index;
268 * Used for TTL-based cache cleaning.
272 typedef ISC_LIST(rdatasetheader_t) rdatasetheaderlist_t;
273 typedef ISC_LIST(dns_rbtnode_t) rbtnodelist_t;
275 #define RDATASET_ATTR_NONEXISTENT 0x0001
276 #define RDATASET_ATTR_STALE 0x0002
277 #define RDATASET_ATTR_IGNORE 0x0004
278 #define RDATASET_ATTR_RETAIN 0x0008
279 #define RDATASET_ATTR_NXDOMAIN 0x0010
280 #define RDATASET_ATTR_RESIGN 0x0020
281 #define RDATASET_ATTR_STATCOUNT 0x0040
283 typedef struct acache_cbarg {
284 dns_rdatasetadditional_t type;
288 rdatasetheader_t *header;
292 dns_acacheentry_t *entry;
293 acache_cbarg_t *cbarg;
298 * When the cache will pre-expire data (due to memory low or other
299 * situations) before the rdataset's TTL has expired, it MUST
300 * respect the RETAIN bit and not expire the data until its TTL is
304 #undef IGNORE /* WIN32 winbase.h defines this. */
306 #define EXISTS(header) \
307 (((header)->attributes & RDATASET_ATTR_NONEXISTENT) == 0)
308 #define NONEXISTENT(header) \
309 (((header)->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
310 #define IGNORE(header) \
311 (((header)->attributes & RDATASET_ATTR_IGNORE) != 0)
312 #define RETAIN(header) \
313 (((header)->attributes & RDATASET_ATTR_RETAIN) != 0)
314 #define NXDOMAIN(header) \
315 (((header)->attributes & RDATASET_ATTR_NXDOMAIN) != 0)
317 #define DEFAULT_NODE_LOCK_COUNT 7 /*%< Should be prime. */
320 * Number of buckets for cache DB entries (locks, LRU lists, TTL heaps).
321 * There is a tradeoff issue about configuring this value: if this is too
322 * small, it may cause heavier contention between threads; if this is too large,
323 * LRU purge algorithm won't work well (entries tend to be purged prematurely).
324 * The default value should work well for most environments, but this can
325 * also be configurable at compilation time via the
326 * DNS_RBTDB_CACHE_NODE_LOCK_COUNT variable. This value must be larger than
327 * 1 due to the assumption of overmem_purge().
329 #ifdef DNS_RBTDB_CACHE_NODE_LOCK_COUNT
330 #if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1
331 #error "DNS_RBTDB_CACHE_NODE_LOCK_COUNT must be larger than 1"
333 #define DEFAULT_CACHE_NODE_LOCK_COUNT DNS_RBTDB_CACHE_NODE_LOCK_COUNT
336 #define DEFAULT_CACHE_NODE_LOCK_COUNT 16
337 #endif /* DNS_RBTDB_CACHE_NODE_LOCK_COUNT */
341 /* Protected in the refcount routines. */
342 isc_refcount_t references;
343 /* Locked by lock. */
344 isc_boolean_t exiting;
347 typedef struct rbtdb_changed {
348 dns_rbtnode_t * node;
350 ISC_LINK(struct rbtdb_changed) link;
353 typedef ISC_LIST(rbtdb_changed_t) rbtdb_changedlist_t;
355 typedef struct rbtdb_version {
357 rbtdb_serial_t serial;
359 * Protected in the refcount routines.
360 * XXXJT: should we change the lock policy based on the refcount
363 isc_refcount_t references;
364 /* Locked by database lock. */
365 isc_boolean_t writer;
366 isc_boolean_t commit_ok;
367 rbtdb_changedlist_t changed_list;
368 ISC_LINK(struct rbtdb_version) link;
371 typedef ISC_LIST(rbtdb_version_t) rbtdb_versionlist_t;
376 #if DNS_RBTDB_USERWLOCK
381 isc_rwlock_t tree_lock;
382 unsigned int node_lock_count;
383 rbtdb_nodelock_t * node_locks;
384 dns_rbtnode_t * origin_node;
385 dns_stats_t * rrsetstats; /* cache DB only */
386 /* Locked by lock. */
388 isc_refcount_t references;
389 unsigned int attributes;
390 rbtdb_serial_t current_serial;
391 rbtdb_serial_t least_serial;
392 rbtdb_serial_t next_serial;
393 rbtdb_version_t * current_version;
394 rbtdb_version_t * future_version;
395 rbtdb_versionlist_t open_versions;
396 isc_boolean_t overmem;
398 dns_dbnode_t *soanode;
399 dns_dbnode_t *nsnode;
402 * This is a linked list used to implement the LRU cache. There will
403 * be node_lock_count linked lists here. Nodes in bucket 1 will be
404 * placed on the linked list rdatasets[1].
406 rdatasetheaderlist_t *rdatasets;
409 * Temporary storage for stale cache nodes and dynamically deleted
410 * nodes that await being cleaned up.
412 rbtnodelist_t *deadnodes;
415 * Heaps. Each of these is used for TTL based expiry.
419 /* Locked by tree_lock. */
421 isc_boolean_t secure;
424 unsigned int quantum;
427 #define RBTDB_ATTR_LOADED 0x01
428 #define RBTDB_ATTR_LOADING 0x02
435 rbtdb_version_t * rbtversion;
436 rbtdb_serial_t serial;
437 unsigned int options;
438 dns_rbtnodechain_t chain;
439 isc_boolean_t copy_name;
440 isc_boolean_t need_cleanup;
442 dns_rbtnode_t * zonecut;
443 rdatasetheader_t * zonecut_rdataset;
444 rdatasetheader_t * zonecut_sigrdataset;
445 dns_fixedname_t zonecut_name;
457 static void rdataset_disassociate(dns_rdataset_t *rdataset);
458 static isc_result_t rdataset_first(dns_rdataset_t *rdataset);
459 static isc_result_t rdataset_next(dns_rdataset_t *rdataset);
460 static void rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata);
461 static void rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target);
462 static unsigned int rdataset_count(dns_rdataset_t *rdataset);
463 static isc_result_t rdataset_getnoqname(dns_rdataset_t *rdataset,
465 dns_rdataset_t *nsec,
466 dns_rdataset_t *nsecsig);
467 static isc_result_t rdataset_getadditional(dns_rdataset_t *rdataset,
468 dns_rdatasetadditional_t type,
469 dns_rdatatype_t qtype,
470 dns_acache_t *acache,
473 dns_dbversion_t **versionp,
474 dns_dbnode_t **nodep,
478 static isc_result_t rdataset_setadditional(dns_rdataset_t *rdataset,
479 dns_rdatasetadditional_t type,
480 dns_rdatatype_t qtype,
481 dns_acache_t *acache,
484 dns_dbversion_t *version,
487 static isc_result_t rdataset_putadditional(dns_acache_t *acache,
488 dns_rdataset_t *rdataset,
489 dns_rdatasetadditional_t type,
490 dns_rdatatype_t qtype);
491 static inline isc_boolean_t need_headerupdate(rdatasetheader_t *header,
493 static void update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
495 static void expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
496 isc_boolean_t tree_locked);
497 static void overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
498 isc_stdtime_t now, isc_boolean_t tree_locked);
499 static void prune_tree(isc_task_t *task, isc_event_t *event);
500 static void rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust);
501 static void rdataset_expire(dns_rdataset_t *rdataset);
503 static dns_rdatasetmethods_t rdataset_methods = {
504 rdataset_disassociate,
512 rdataset_getadditional,
513 rdataset_setadditional,
514 rdataset_putadditional,
519 static void rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp);
520 static isc_result_t rdatasetiter_first(dns_rdatasetiter_t *iterator);
521 static isc_result_t rdatasetiter_next(dns_rdatasetiter_t *iterator);
522 static void rdatasetiter_current(dns_rdatasetiter_t *iterator,
523 dns_rdataset_t *rdataset);
525 static dns_rdatasetitermethods_t rdatasetiter_methods = {
526 rdatasetiter_destroy,
532 typedef struct rbtdb_rdatasetiter {
533 dns_rdatasetiter_t common;
534 rdatasetheader_t * current;
535 } rbtdb_rdatasetiter_t;
537 static void dbiterator_destroy(dns_dbiterator_t **iteratorp);
538 static isc_result_t dbiterator_first(dns_dbiterator_t *iterator);
539 static isc_result_t dbiterator_last(dns_dbiterator_t *iterator);
540 static isc_result_t dbiterator_seek(dns_dbiterator_t *iterator,
542 static isc_result_t dbiterator_prev(dns_dbiterator_t *iterator);
543 static isc_result_t dbiterator_next(dns_dbiterator_t *iterator);
544 static isc_result_t dbiterator_current(dns_dbiterator_t *iterator,
545 dns_dbnode_t **nodep,
547 static isc_result_t dbiterator_pause(dns_dbiterator_t *iterator);
548 static isc_result_t dbiterator_origin(dns_dbiterator_t *iterator,
551 static dns_dbiteratormethods_t dbiterator_methods = {
563 #define DELETION_BATCH_MAX 64
566 * If 'paused' is ISC_TRUE, then the tree lock is not being held.
568 typedef struct rbtdb_dbiterator {
569 dns_dbiterator_t common;
570 isc_boolean_t paused;
571 isc_boolean_t new_origin;
572 isc_rwlocktype_t tree_locked;
574 dns_fixedname_t name;
575 dns_fixedname_t origin;
576 dns_rbtnodechain_t chain;
578 dns_rbtnode_t *deletions[DELETION_BATCH_MAX];
580 } rbtdb_dbiterator_t;
583 #define IS_STUB(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_STUB) != 0)
584 #define IS_CACHE(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_CACHE) != 0)
586 static void free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log,
588 static void overmem(dns_db_t *db, isc_boolean_t overmem);
591 * 'init_count' is used to initialize 'newheader->count' which inturn
592 * is used to determine where in the cycle rrset-order cyclic starts.
593 * We don't lock this as we don't care about simultaneous updates.
596 * Both init_count and header->count can be ISC_UINT32_MAX.
597 * The count on the returned rdataset however can't be as
598 * that indicates that the database does not implement cyclic
601 static unsigned int init_count;
606 * If a routine is going to lock more than one lock in this module, then
607 * the locking must be done in the following order:
611 * Node Lock (Only one from the set may be locked at one time by
616 * Failure to follow this hierarchy can result in deadlock.
622 * For zone databases the node for the origin of the zone MUST NOT be deleted.
631 attach(dns_db_t *source, dns_db_t **targetp) {
632 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)source;
634 REQUIRE(VALID_RBTDB(rbtdb));
636 isc_refcount_increment(&rbtdb->references, NULL);
642 free_rbtdb_callback(isc_task_t *task, isc_event_t *event) {
643 dns_rbtdb_t *rbtdb = event->ev_arg;
647 free_rbtdb(rbtdb, ISC_TRUE, event);
651 update_rrsetstats(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
652 isc_boolean_t increment)
654 dns_rdatastatstype_t statattributes = 0;
655 dns_rdatastatstype_t base = 0;
656 dns_rdatastatstype_t type;
658 /* At the moment we count statistics only for cache DB */
659 INSIST(IS_CACHE(rbtdb));
661 if (NXDOMAIN(header))
662 statattributes = DNS_RDATASTATSTYPE_ATTR_NXDOMAIN;
663 else if (RBTDB_RDATATYPE_BASE(header->type) == 0) {
664 statattributes = DNS_RDATASTATSTYPE_ATTR_NXRRSET;
665 base = RBTDB_RDATATYPE_EXT(header->type);
667 base = RBTDB_RDATATYPE_BASE(header->type);
669 type = DNS_RDATASTATSTYPE_VALUE(base, statattributes);
671 dns_rdatasetstats_increment(rbtdb->rrsetstats, type);
673 dns_rdatasetstats_decrement(rbtdb->rrsetstats, type);
677 set_ttl(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, dns_ttl_t newttl) {
682 oldttl = header->rdh_ttl;
683 header->rdh_ttl = newttl;
686 * It's possible the rbtdb is not a cache. If this is the case,
687 * we will not have a heap, and we move on. If we do, though,
688 * we might need to adjust things.
690 if (header->heap_index == 0 || newttl == oldttl)
692 idx = header->node->locknum;
693 if (rbtdb->heaps == NULL || rbtdb->heaps[idx] == NULL)
695 heap = rbtdb->heaps[idx];
698 isc_heap_increased(heap, header->heap_index);
700 isc_heap_decreased(heap, header->heap_index);
704 * This function allows the heap code to rank the priority of each
705 * element. It returns ISC_TRUE if v1 happens "sooner" than v2.
708 ttl_sooner(void *v1, void *v2) {
709 rdatasetheader_t *h1 = v1;
710 rdatasetheader_t *h2 = v2;
712 if (h1->rdh_ttl < h2->rdh_ttl)
718 * This function sets the heap index into the header.
721 ttl_set_index(void *what, unsigned int index) {
722 rdatasetheader_t *h = what;
724 h->heap_index = index;
728 * Work out how many nodes can be deleted in the time between two
729 * requests to the nameserver. Smooth the resulting number and use it
730 * as a estimate for the number of nodes to be deleted in the next
734 adjust_quantum(unsigned int old, isc_time_t *start) {
735 unsigned int pps = dns_pps; /* packets per second */
736 unsigned int interval;
745 interval = 1000000 / pps; /* interval in usec */
748 usecs = isc_time_microdiff(&end, start);
751 * We were unable to measure the amount of time taken.
752 * Double the nodes deleted next time.
759 new = old * interval;
760 new /= (unsigned int)usecs;
767 new = (new + old * 3) / 4;
769 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, DNS_LOGMODULE_CACHE,
770 ISC_LOG_DEBUG(1), "adjust_quantum -> %d", new);
776 free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log, isc_event_t *event) {
778 isc_ondestroy_t ondest;
780 char buf[DNS_NAME_FORMATSIZE];
783 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
784 overmem((dns_db_t *)rbtdb, (isc_boolean_t)-1);
786 REQUIRE(rbtdb->current_version != NULL || EMPTY(rbtdb->open_versions));
787 REQUIRE(rbtdb->future_version == NULL);
789 if (rbtdb->current_version != NULL) {
792 isc_refcount_decrement(&rbtdb->current_version->references,
795 UNLINK(rbtdb->open_versions, rbtdb->current_version, link);
796 isc_refcount_destroy(&rbtdb->current_version->references);
797 isc_mem_put(rbtdb->common.mctx, rbtdb->current_version,
798 sizeof(rbtdb_version_t));
802 * We assume the number of remaining dead nodes is reasonably small;
803 * the overhead of unlinking all nodes here should be negligible.
805 for (i = 0; i < rbtdb->node_lock_count; i++) {
808 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
809 while (node != NULL) {
810 ISC_LIST_UNLINK(rbtdb->deadnodes[i], node, deadlink);
811 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
816 rbtdb->quantum = (rbtdb->task != NULL) ? 100 : 0;
818 if (rbtdb->tree != NULL) {
819 isc_time_now(&start);
820 result = dns_rbt_destroy2(&rbtdb->tree, rbtdb->quantum);
821 if (result == ISC_R_QUOTA) {
822 INSIST(rbtdb->task != NULL);
823 if (rbtdb->quantum != 0)
824 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
827 event = isc_event_allocate(rbtdb->common.mctx,
829 DNS_EVENT_FREESTORAGE,
832 sizeof(isc_event_t));
835 isc_task_send(rbtdb->task, &event);
838 INSIST(result == ISC_R_SUCCESS && rbtdb->tree == NULL);
841 isc_event_free(&event);
843 if (dns_name_dynamic(&rbtdb->common.origin))
844 dns_name_format(&rbtdb->common.origin, buf,
847 strcpy(buf, "<UNKNOWN>");
848 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
849 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
850 "done free_rbtdb(%s)", buf);
852 if (dns_name_dynamic(&rbtdb->common.origin))
853 dns_name_free(&rbtdb->common.origin, rbtdb->common.mctx);
854 for (i = 0; i < rbtdb->node_lock_count; i++) {
855 isc_refcount_destroy(&rbtdb->node_locks[i].references);
856 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
860 * Clean up LRU cache objects.
862 if (rbtdb->rdatasets != NULL) {
863 for (i = 0; i < rbtdb->node_lock_count; i++)
864 INSIST(ISC_LIST_EMPTY(rbtdb->rdatasets[i]));
865 isc_mem_put(rbtdb->common.mctx, rbtdb->rdatasets,
866 rbtdb->node_lock_count *
867 sizeof(rdatasetheaderlist_t));
870 * Clean up dead node buckets.
872 if (rbtdb->deadnodes != NULL) {
873 for (i = 0; i < rbtdb->node_lock_count; i++)
874 INSIST(ISC_LIST_EMPTY(rbtdb->deadnodes[i]));
875 isc_mem_put(rbtdb->common.mctx, rbtdb->deadnodes,
876 rbtdb->node_lock_count * sizeof(rbtnodelist_t));
879 * Clean up TTL heap cache objects.
881 if (rbtdb->heaps != NULL) {
882 for (i = 0; i < rbtdb->node_lock_count; i++)
883 isc_heap_destroy(&rbtdb->heaps[i]);
884 isc_mem_put(rbtdb->common.mctx, rbtdb->heaps,
885 rbtdb->node_lock_count *
886 sizeof(isc_heap_t *));
889 if (rbtdb->rrsetstats != NULL)
890 dns_stats_detach(&rbtdb->rrsetstats);
892 isc_mem_put(rbtdb->common.mctx, rbtdb->node_locks,
893 rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
894 isc_rwlock_destroy(&rbtdb->tree_lock);
895 isc_refcount_destroy(&rbtdb->references);
896 if (rbtdb->task != NULL)
897 isc_task_detach(&rbtdb->task);
899 RBTDB_DESTROYLOCK(&rbtdb->lock);
900 rbtdb->common.magic = 0;
901 rbtdb->common.impmagic = 0;
902 ondest = rbtdb->common.ondest;
903 isc_mem_putanddetach(&rbtdb->common.mctx, rbtdb, sizeof(*rbtdb));
904 isc_ondestroy_notify(&ondest, rbtdb);
908 maybe_free_rbtdb(dns_rbtdb_t *rbtdb) {
909 isc_boolean_t want_free = ISC_FALSE;
911 unsigned int inactive = 0;
913 /* XXX check for open versions here */
915 if (rbtdb->soanode != NULL)
916 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->soanode);
917 if (rbtdb->nsnode != NULL)
918 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->nsnode);
921 * Even though there are no external direct references, there still
922 * may be nodes in use.
924 for (i = 0; i < rbtdb->node_lock_count; i++) {
925 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
926 rbtdb->node_locks[i].exiting = ISC_TRUE;
927 NODE_UNLOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
928 if (isc_refcount_current(&rbtdb->node_locks[i].references)
935 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
936 rbtdb->active -= inactive;
937 if (rbtdb->active == 0)
938 want_free = ISC_TRUE;
939 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
941 char buf[DNS_NAME_FORMATSIZE];
942 if (dns_name_dynamic(&rbtdb->common.origin))
943 dns_name_format(&rbtdb->common.origin, buf,
946 strcpy(buf, "<UNKNOWN>");
947 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
948 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
949 "calling free_rbtdb(%s)", buf);
950 free_rbtdb(rbtdb, ISC_TRUE, NULL);
956 detach(dns_db_t **dbp) {
957 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(*dbp);
960 REQUIRE(VALID_RBTDB(rbtdb));
962 isc_refcount_decrement(&rbtdb->references, &refs);
965 maybe_free_rbtdb(rbtdb);
971 currentversion(dns_db_t *db, dns_dbversion_t **versionp) {
972 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
973 rbtdb_version_t *version;
976 REQUIRE(VALID_RBTDB(rbtdb));
978 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
979 version = rbtdb->current_version;
980 isc_refcount_increment(&version->references, &refs);
981 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
983 *versionp = (dns_dbversion_t *)version;
986 static inline rbtdb_version_t *
987 allocate_version(isc_mem_t *mctx, rbtdb_serial_t serial,
988 unsigned int references, isc_boolean_t writer)
991 rbtdb_version_t *version;
993 version = isc_mem_get(mctx, sizeof(*version));
996 version->serial = serial;
997 result = isc_refcount_init(&version->references, references);
998 if (result != ISC_R_SUCCESS) {
999 isc_mem_put(mctx, version, sizeof(*version));
1002 version->writer = writer;
1003 version->commit_ok = ISC_FALSE;
1004 ISC_LIST_INIT(version->changed_list);
1005 ISC_LINK_INIT(version, link);
1011 newversion(dns_db_t *db, dns_dbversion_t **versionp) {
1012 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1013 rbtdb_version_t *version;
1015 REQUIRE(VALID_RBTDB(rbtdb));
1016 REQUIRE(versionp != NULL && *versionp == NULL);
1017 REQUIRE(rbtdb->future_version == NULL);
1019 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1020 RUNTIME_CHECK(rbtdb->next_serial != 0); /* XXX Error? */
1021 version = allocate_version(rbtdb->common.mctx, rbtdb->next_serial, 1,
1023 if (version != NULL) {
1024 version->commit_ok = ISC_TRUE;
1025 rbtdb->next_serial++;
1026 rbtdb->future_version = version;
1028 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1030 if (version == NULL)
1031 return (ISC_R_NOMEMORY);
1033 *versionp = version;
1035 return (ISC_R_SUCCESS);
1039 attachversion(dns_db_t *db, dns_dbversion_t *source,
1040 dns_dbversion_t **targetp)
1042 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1043 rbtdb_version_t *rbtversion = source;
1046 REQUIRE(VALID_RBTDB(rbtdb));
1048 isc_refcount_increment(&rbtversion->references, &refs);
1051 *targetp = rbtversion;
1054 static rbtdb_changed_t *
1055 add_changed(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1056 dns_rbtnode_t *node)
1058 rbtdb_changed_t *changed;
1062 * Caller must be holding the node lock if its reference must be
1063 * protected by the lock.
1066 changed = isc_mem_get(rbtdb->common.mctx, sizeof(*changed));
1068 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1070 REQUIRE(version->writer);
1072 if (changed != NULL) {
1073 dns_rbtnode_refincrement(node, &refs);
1075 changed->node = node;
1076 changed->dirty = ISC_FALSE;
1077 ISC_LIST_INITANDAPPEND(version->changed_list, changed, link);
1079 version->commit_ok = ISC_FALSE;
1081 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1087 free_acachearray(isc_mem_t *mctx, rdatasetheader_t *header,
1092 unsigned char *raw; /* RDATASLAB */
1095 * The caller must be holding the corresponding node lock.
1101 raw = (unsigned char *)header + sizeof(*header);
1102 count = raw[0] * 256 + raw[1];
1105 * Sanity check: since an additional cache entry has a reference to
1106 * the original DB node (in the callback arg), there should be no
1107 * acache entries when the node can be freed.
1109 for (i = 0; i < count; i++)
1110 INSIST(array[i].entry == NULL && array[i].cbarg == NULL);
1112 isc_mem_put(mctx, array, count * sizeof(acachectl_t));
1116 free_noqname(isc_mem_t *mctx, struct noqname **noqname) {
1118 if (dns_name_dynamic(&(*noqname)->name))
1119 dns_name_free(&(*noqname)->name, mctx);
1120 if ((*noqname)->nsec != NULL)
1121 isc_mem_put(mctx, (*noqname)->nsec,
1122 dns_rdataslab_size((*noqname)->nsec, 0));
1123 if ((*noqname)->nsecsig != NULL)
1124 isc_mem_put(mctx, (*noqname)->nsecsig,
1125 dns_rdataslab_size((*noqname)->nsecsig, 0));
1126 isc_mem_put(mctx, *noqname, sizeof(**noqname));
1131 init_rdataset(dns_rbtdb_t *rbtdb, rdatasetheader_t *h)
1133 ISC_LINK_INIT(h, lru_link);
1137 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1138 fprintf(stderr, "initialized header: %p\n", h);
1144 static inline rdatasetheader_t *
1145 new_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx)
1147 rdatasetheader_t *h;
1149 h = isc_mem_get(mctx, sizeof(*h));
1154 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1155 fprintf(stderr, "allocated header: %p\n", h);
1157 init_rdataset(rbtdb, h);
1162 free_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *rdataset)
1166 if (EXISTS(rdataset) &&
1167 (rdataset->attributes & RDATASET_ATTR_STATCOUNT) != 0) {
1168 update_rrsetstats(rbtdb, rdataset, ISC_FALSE);
1171 if (IS_CACHE(rbtdb) && ISC_LINK_LINKED(rdataset, lru_link)) {
1172 int idx = rdataset->node->locknum;
1173 ISC_LIST_UNLINK(rbtdb->rdatasets[idx], rdataset, lru_link);
1174 if (rdataset->heap_index != 0) {
1175 isc_heap_delete(rbtdb->heaps[idx],
1176 rdataset->heap_index);
1178 rdataset->heap_index = 0;
1181 if (rdataset->noqname != NULL)
1182 free_noqname(mctx, &rdataset->noqname);
1184 free_acachearray(mctx, rdataset, rdataset->additional_auth);
1185 free_acachearray(mctx, rdataset, rdataset->additional_glue);
1187 if ((rdataset->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
1188 size = sizeof(*rdataset);
1190 size = dns_rdataslab_size((unsigned char *)rdataset,
1192 isc_mem_put(mctx, rdataset, size);
1196 rollback_node(dns_rbtnode_t *node, rbtdb_serial_t serial) {
1197 rdatasetheader_t *header, *dcurrent;
1198 isc_boolean_t make_dirty = ISC_FALSE;
1201 * Caller must hold the node lock.
1205 * We set the IGNORE attribute on rdatasets with serial number
1206 * 'serial'. When the reference count goes to zero, these rdatasets
1207 * will be cleaned up; until that time, they will be ignored.
1209 for (header = node->data; header != NULL; header = header->next) {
1210 if (header->serial == serial) {
1211 header->attributes |= RDATASET_ATTR_IGNORE;
1212 make_dirty = ISC_TRUE;
1214 for (dcurrent = header->down;
1216 dcurrent = dcurrent->down) {
1217 if (dcurrent->serial == serial) {
1218 dcurrent->attributes |= RDATASET_ATTR_IGNORE;
1219 make_dirty = ISC_TRUE;
1228 clean_stale_headers(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *top)
1230 rdatasetheader_t *d, *down_next;
1232 for (d = top->down; d != NULL; d = down_next) {
1233 down_next = d->down;
1234 free_rdataset(rbtdb, mctx, d);
1240 clean_cache_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1241 rdatasetheader_t *current, *top_prev, *top_next;
1242 isc_mem_t *mctx = rbtdb->common.mctx;
1245 * Caller must be holding the node lock.
1249 for (current = node->data; current != NULL; current = top_next) {
1250 top_next = current->next;
1251 clean_stale_headers(rbtdb, mctx, current);
1253 * If current is nonexistent or stale, we can clean it up.
1255 if ((current->attributes &
1256 (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0) {
1257 if (top_prev != NULL)
1258 top_prev->next = current->next;
1260 node->data = current->next;
1261 free_rdataset(rbtdb, mctx, current);
1269 clean_zone_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1270 rbtdb_serial_t least_serial)
1272 rdatasetheader_t *current, *dcurrent, *down_next, *dparent;
1273 rdatasetheader_t *top_prev, *top_next;
1274 isc_mem_t *mctx = rbtdb->common.mctx;
1275 isc_boolean_t still_dirty = ISC_FALSE;
1278 * Caller must be holding the node lock.
1280 REQUIRE(least_serial != 0);
1283 for (current = node->data; current != NULL; current = top_next) {
1284 top_next = current->next;
1287 * First, we clean up any instances of multiple rdatasets
1288 * with the same serial number, or that have the IGNORE
1292 for (dcurrent = current->down;
1294 dcurrent = down_next) {
1295 down_next = dcurrent->down;
1296 INSIST(dcurrent->serial <= dparent->serial);
1297 if (dcurrent->serial == dparent->serial ||
1299 if (down_next != NULL)
1300 down_next->next = dparent;
1301 dparent->down = down_next;
1302 free_rdataset(rbtdb, mctx, dcurrent);
1308 * We've now eliminated all IGNORE datasets with the possible
1309 * exception of current, which we now check.
1311 if (IGNORE(current)) {
1312 down_next = current->down;
1313 if (down_next == NULL) {
1314 if (top_prev != NULL)
1315 top_prev->next = current->next;
1317 node->data = current->next;
1318 free_rdataset(rbtdb, mctx, current);
1320 * current no longer exists, so we can
1321 * just continue with the loop.
1326 * Pull up current->down, making it the new
1329 if (top_prev != NULL)
1330 top_prev->next = down_next;
1332 node->data = down_next;
1333 down_next->next = top_next;
1334 free_rdataset(rbtdb, mctx, current);
1335 current = down_next;
1340 * We now try to find the first down node less than the
1344 for (dcurrent = current->down;
1346 dcurrent = down_next) {
1347 down_next = dcurrent->down;
1348 if (dcurrent->serial < least_serial)
1354 * If there is a such an rdataset, delete it and any older
1357 if (dcurrent != NULL) {
1359 down_next = dcurrent->down;
1360 INSIST(dcurrent->serial <= least_serial);
1361 free_rdataset(rbtdb, mctx, dcurrent);
1362 dcurrent = down_next;
1363 } while (dcurrent != NULL);
1364 dparent->down = NULL;
1368 * Note. The serial number of 'current' might be less than
1369 * least_serial too, but we cannot delete it because it is
1370 * the most recent version, unless it is a NONEXISTENT
1373 if (current->down != NULL) {
1374 still_dirty = ISC_TRUE;
1378 * If this is a NONEXISTENT rdataset, we can delete it.
1380 if (NONEXISTENT(current)) {
1381 if (top_prev != NULL)
1382 top_prev->next = current->next;
1384 node->data = current->next;
1385 free_rdataset(rbtdb, mctx, current);
1395 * Clean up dead nodes. These are nodes which have no references, and
1396 * have no data. They are dead but we could not or chose not to delete
1397 * them when we deleted all the data at that node because we did not want
1398 * to wait for the tree write lock.
1400 * The caller must hold a tree write lock and bucketnum'th node (write) lock.
1403 cleanup_dead_nodes(dns_rbtdb_t *rbtdb, int bucketnum) {
1404 dns_rbtnode_t *node;
1405 isc_result_t result;
1406 int count = 10; /* XXXJT: should be adjustable */
1408 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1409 while (node != NULL && count > 0) {
1410 ISC_LIST_UNLINK(rbtdb->deadnodes[bucketnum], node, deadlink);
1413 * Since we're holding a tree write lock, it should be
1414 * impossible for this node to be referenced by others.
1416 INSIST(dns_rbtnode_refcurrent(node) == 0 &&
1417 node->data == NULL);
1419 result = dns_rbt_deletenode(rbtdb->tree, node, ISC_FALSE);
1420 if (result != ISC_R_SUCCESS)
1421 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1422 DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
1423 "cleanup_dead_nodes: "
1424 "dns_rbt_deletenode: %s",
1425 isc_result_totext(result));
1426 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1432 * Caller must be holding the node lock if its reference must be protected
1436 new_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1437 unsigned int lockrefs, noderefs;
1438 isc_refcount_t *lockref;
1440 dns_rbtnode_refincrement0(node, &noderefs);
1441 if (noderefs == 1) { /* this is the first reference to the node */
1442 lockref = &rbtdb->node_locks[node->locknum].references;
1443 isc_refcount_increment0(lockref, &lockrefs);
1444 INSIST(lockrefs != 0);
1446 INSIST(noderefs != 0);
1450 * This function is assumed to be called when a node is newly referenced
1451 * and can be in the deadnode list. In that case the node must be retrieved
1452 * from the list because it is going to be used. In addition, if the caller
1453 * happens to hold a write lock on the tree, it's a good chance to purge dead
1455 * Note: while a new reference is gained in multiple places, there are only very
1456 * few cases where the node can be in the deadnode list (only empty nodes can
1457 * have been added to the list).
1460 reactivate_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1461 isc_rwlocktype_t treelocktype)
1463 isc_boolean_t need_relock = ISC_FALSE;
1465 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
1466 new_reference(rbtdb, node);
1468 NODE_WEAKLOCK(&rbtdb->node_locks[node->locknum].lock,
1469 isc_rwlocktype_read);
1470 if (ISC_LINK_LINKED(node, deadlink))
1471 need_relock = ISC_TRUE;
1472 else if (!ISC_LIST_EMPTY(rbtdb->deadnodes[node->locknum]) &&
1473 treelocktype == isc_rwlocktype_write)
1474 need_relock = ISC_TRUE;
1475 NODE_WEAKUNLOCK(&rbtdb->node_locks[node->locknum].lock,
1476 isc_rwlocktype_read);
1478 NODE_WEAKLOCK(&rbtdb->node_locks[node->locknum].lock,
1479 isc_rwlocktype_write);
1480 if (ISC_LINK_LINKED(node, deadlink))
1481 ISC_LIST_UNLINK(rbtdb->deadnodes[node->locknum],
1483 if (treelocktype == isc_rwlocktype_write)
1484 cleanup_dead_nodes(rbtdb, node->locknum);
1485 NODE_WEAKUNLOCK(&rbtdb->node_locks[node->locknum].lock,
1486 isc_rwlocktype_write);
1489 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
1493 * Caller must be holding the node lock; either the "strong", read or write
1494 * lock. Note that the lock must be held even when node references are
1495 * atomically modified; in that case the decrement operation itself does not
1496 * have to be protected, but we must avoid a race condition where multiple
1497 * threads are decreasing the reference to zero simultaneously and at least
1498 * one of them is going to free the node.
1499 * This function returns ISC_TRUE if and only if the node reference decreases
1502 static isc_boolean_t
1503 decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1504 rbtdb_serial_t least_serial,
1505 isc_rwlocktype_t nlock, isc_rwlocktype_t tlock,
1506 isc_boolean_t pruning)
1508 isc_result_t result;
1509 isc_boolean_t write_locked;
1510 rbtdb_nodelock_t *nodelock;
1511 unsigned int refs, nrefs;
1512 int bucket = node->locknum;
1513 isc_boolean_t no_reference;
1515 nodelock = &rbtdb->node_locks[bucket];
1517 /* Handle easy and typical case first. */
1518 if (!node->dirty && (node->data != NULL || node->down != NULL)) {
1519 dns_rbtnode_refdecrement(node, &nrefs);
1520 INSIST((int)nrefs >= 0);
1522 isc_refcount_decrement(&nodelock->references, &refs);
1523 INSIST((int)refs >= 0);
1525 return ((nrefs == 0) ? ISC_TRUE : ISC_FALSE);
1528 /* Upgrade the lock? */
1529 if (nlock == isc_rwlocktype_read) {
1530 NODE_WEAKUNLOCK(&nodelock->lock, isc_rwlocktype_read);
1531 NODE_WEAKLOCK(&nodelock->lock, isc_rwlocktype_write);
1533 dns_rbtnode_refdecrement(node, &nrefs);
1534 INSIST((int)nrefs >= 0);
1536 /* Restore the lock? */
1537 if (nlock == isc_rwlocktype_read)
1538 NODE_WEAKDOWNGRADE(&nodelock->lock);
1542 if (node->dirty && dns_rbtnode_refcurrent(node) == 0) {
1543 if (IS_CACHE(rbtdb))
1544 clean_cache_node(rbtdb, node);
1546 if (least_serial == 0) {
1548 * Caller doesn't know the least serial.
1551 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1552 least_serial = rbtdb->least_serial;
1553 RBTDB_UNLOCK(&rbtdb->lock,
1554 isc_rwlocktype_read);
1556 clean_zone_node(rbtdb, node, least_serial);
1560 isc_refcount_decrement(&nodelock->references, &refs);
1561 INSIST((int)refs >= 0);
1564 * XXXDCL should this only be done for cache zones?
1566 if (node->data != NULL || node->down != NULL) {
1567 /* Restore the lock? */
1568 if (nlock == isc_rwlocktype_read)
1569 NODE_WEAKDOWNGRADE(&nodelock->lock);
1574 * Attempt to switch to a write lock on the tree. If this fails,
1575 * we will add this node to a linked list of nodes in this locking
1576 * bucket which we will free later.
1578 if (tlock != isc_rwlocktype_write) {
1580 * Locking hierarchy notwithstanding, we don't need to free
1581 * the node lock before acquiring the tree write lock because
1582 * we only do a trylock.
1584 if (tlock == isc_rwlocktype_read)
1585 result = isc_rwlock_tryupgrade(&rbtdb->tree_lock);
1587 result = isc_rwlock_trylock(&rbtdb->tree_lock,
1588 isc_rwlocktype_write);
1589 RUNTIME_CHECK(result == ISC_R_SUCCESS ||
1590 result == ISC_R_LOCKBUSY);
1592 write_locked = ISC_TF(result == ISC_R_SUCCESS);
1594 write_locked = ISC_TRUE;
1596 no_reference = ISC_TRUE;
1597 if (write_locked && dns_rbtnode_refcurrent(node) == 0) {
1599 * We can now delete the node if the reference counter is
1600 * zero. This should be typically the case, but a different
1601 * thread may still gain a (new) reference just before the
1602 * current thread locks the tree (e.g., in findnode()).
1606 * If this node is the only one in the level it's in, deleting
1607 * this node may recursively make its parent the only node in
1608 * the parent level; if so, and if no one is currently using
1609 * the parent node, this is almost the only opportunity to
1610 * clean it up. But the recursive cleanup is not that trivial
1611 * since the child and parent may be in different lock buckets,
1612 * which would cause a lock order reversal problem. To avoid
1613 * the trouble, we'll dispatch a separate event for batch
1614 * cleaning. We need to check whether we're deleting the node
1615 * as a result of pruning to avoid infinite dispatching.
1616 * Note: pruning happens only when a task has been set for the
1617 * rbtdb. If the user of the rbtdb chooses not to set a task,
1618 * it's their responsibility to purge stale leaves (e.g. by
1619 * periodic walk-through).
1621 if (!pruning && node->parent != NULL &&
1622 node->parent->down == node && node->left == NULL &&
1623 node->right == NULL && rbtdb->task != NULL) {
1627 ev = isc_event_allocate(rbtdb->common.mctx, NULL,
1630 sizeof(isc_event_t));
1632 new_reference(rbtdb, node);
1634 attach((dns_db_t *)rbtdb, &db);
1636 isc_task_send(rbtdb->task, &ev);
1637 no_reference = ISC_FALSE;
1640 * XXX: this is a weird situation. We could
1641 * ignore this error case, but then the stale
1642 * node will unlikely be purged except via a
1643 * rare condition such as manual cleanup. So
1644 * we queue it in the deadnodes list, hoping
1645 * the memory shortage is temporary and the node
1646 * will be deleted later.
1648 isc_log_write(dns_lctx,
1649 DNS_LOGCATEGORY_DATABASE,
1650 DNS_LOGMODULE_CACHE,
1652 "decrement_reference: failed to "
1653 "allocate pruning event");
1654 INSIST(!ISC_LINK_LINKED(node, deadlink));
1655 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node,
1659 if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) {
1660 char printname[DNS_NAME_FORMATSIZE];
1662 isc_log_write(dns_lctx,
1663 DNS_LOGCATEGORY_DATABASE,
1664 DNS_LOGMODULE_CACHE,
1666 "decrement_reference: "
1667 "delete from rbt: %p %s",
1669 dns_rbt_formatnodename(node,
1671 sizeof(printname)));
1674 INSIST(!ISC_LINK_LINKED(node, deadlink));
1675 result = dns_rbt_deletenode(rbtdb->tree, node,
1677 if (result != ISC_R_SUCCESS) {
1678 isc_log_write(dns_lctx,
1679 DNS_LOGCATEGORY_DATABASE,
1680 DNS_LOGMODULE_CACHE,
1682 "decrement_reference: "
1683 "dns_rbt_deletenode: %s",
1684 isc_result_totext(result));
1687 } else if (dns_rbtnode_refcurrent(node) == 0) {
1688 INSIST(!ISC_LINK_LINKED(node, deadlink));
1689 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node, deadlink);
1692 /* Restore the lock? */
1693 if (nlock == isc_rwlocktype_read)
1694 NODE_WEAKDOWNGRADE(&nodelock->lock);
1697 * Relock a read lock, or unlock the write lock if no lock was held.
1699 if (tlock == isc_rwlocktype_none)
1701 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1703 if (tlock == isc_rwlocktype_read)
1705 isc_rwlock_downgrade(&rbtdb->tree_lock);
1707 return (no_reference);
1711 * Prune the tree by recursively cleaning-up single leaves. In the worst
1712 * case, the number of iteration is the number of tree levels, which is at
1713 * most the maximum number of domain name labels, i.e, 127. In practice, this
1714 * should be much smaller (only a few times), and even the worst case would be
1715 * acceptable for a single event.
1718 prune_tree(isc_task_t *task, isc_event_t *event) {
1719 dns_rbtdb_t *rbtdb = event->ev_sender;
1720 dns_rbtnode_t *node = event->ev_arg;
1721 dns_rbtnode_t *parent;
1722 unsigned int locknum;
1726 isc_event_free(&event);
1728 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1729 locknum = node->locknum;
1730 NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1732 parent = node->parent;
1733 decrement_reference(rbtdb, node, 0, isc_rwlocktype_write,
1734 isc_rwlocktype_write, ISC_TRUE);
1736 if (parent != NULL && parent->down == NULL) {
1738 * node was the only down child of the parent and has
1739 * just been removed. We'll then need to examine the
1740 * parent. Keep the lock if possible; otherwise,
1741 * release the old lock and acquire one for the parent.
1743 if (parent->locknum != locknum) {
1744 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
1745 isc_rwlocktype_write);
1746 locknum = parent->locknum;
1747 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
1748 isc_rwlocktype_write);
1752 * We need to gain a reference to the node before
1753 * decrementing it in the next iteration. In addition,
1754 * if the node is in the dead-nodes list, extract it
1755 * from the list beforehand as we do in
1756 * reactivate_node().
1758 new_reference(rbtdb, parent);
1759 if (ISC_LINK_LINKED(parent, deadlink)) {
1760 ISC_LIST_UNLINK(rbtdb->deadnodes[locknum],
1767 } while (node != NULL);
1768 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1769 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1771 detach((dns_db_t **)&rbtdb);
1775 make_least_version(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1776 rbtdb_changedlist_t *cleanup_list)
1779 * Caller must be holding the database lock.
1782 rbtdb->least_serial = version->serial;
1783 *cleanup_list = version->changed_list;
1784 ISC_LIST_INIT(version->changed_list);
1788 cleanup_nondirty(rbtdb_version_t *version, rbtdb_changedlist_t *cleanup_list) {
1789 rbtdb_changed_t *changed, *next_changed;
1792 * If the changed record is dirty, then
1793 * an update created multiple versions of
1794 * a given rdataset. We keep this list
1795 * until we're the least open version, at
1796 * which point it's safe to get rid of any
1799 * If the changed record isn't dirty, then
1800 * we don't need it anymore since we're
1801 * committing and not rolling back.
1803 * The caller must be holding the database lock.
1805 for (changed = HEAD(version->changed_list);
1807 changed = next_changed) {
1808 next_changed = NEXT(changed, link);
1809 if (!changed->dirty) {
1810 UNLINK(version->changed_list,
1812 APPEND(*cleanup_list,
1818 static isc_boolean_t
1819 iszonesecure(dns_db_t *db, dns_dbnode_t *origin) {
1820 dns_rdataset_t keyset;
1821 dns_rdataset_t nsecset, signsecset;
1822 isc_boolean_t haszonekey = ISC_FALSE;
1823 isc_boolean_t hasnsec = ISC_FALSE;
1824 isc_result_t result;
1826 dns_rdataset_init(&keyset);
1827 result = dns_db_findrdataset(db, origin, NULL, dns_rdatatype_dnskey, 0,
1829 if (result == ISC_R_SUCCESS) {
1830 dns_rdata_t keyrdata = DNS_RDATA_INIT;
1831 result = dns_rdataset_first(&keyset);
1832 while (result == ISC_R_SUCCESS) {
1833 dns_rdataset_current(&keyset, &keyrdata);
1834 if (dns_zonekey_iszonekey(&keyrdata)) {
1835 haszonekey = ISC_TRUE;
1838 result = dns_rdataset_next(&keyset);
1840 dns_rdataset_disassociate(&keyset);
1845 dns_rdataset_init(&nsecset);
1846 dns_rdataset_init(&signsecset);
1847 result = dns_db_findrdataset(db, origin, NULL, dns_rdatatype_nsec, 0,
1848 0, &nsecset, &signsecset);
1849 if (result == ISC_R_SUCCESS) {
1850 if (dns_rdataset_isassociated(&signsecset)) {
1852 dns_rdataset_disassociate(&signsecset);
1854 dns_rdataset_disassociate(&nsecset);
1860 closeversion(dns_db_t *db, dns_dbversion_t **versionp, isc_boolean_t commit) {
1861 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1862 rbtdb_version_t *version, *cleanup_version, *least_greater;
1863 isc_boolean_t rollback = ISC_FALSE;
1864 rbtdb_changedlist_t cleanup_list;
1865 rbtdb_changed_t *changed, *next_changed;
1866 rbtdb_serial_t serial, least_serial;
1867 dns_rbtnode_t *rbtnode;
1869 isc_boolean_t writer;
1871 REQUIRE(VALID_RBTDB(rbtdb));
1872 version = (rbtdb_version_t *)*versionp;
1874 cleanup_version = NULL;
1875 ISC_LIST_INIT(cleanup_list);
1877 isc_refcount_decrement(&version->references, &refs);
1878 if (refs > 0) { /* typical and easy case first */
1880 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1881 INSIST(!version->writer);
1882 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
1887 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1888 serial = version->serial;
1889 writer = version->writer;
1890 if (version->writer) {
1893 rbtdb_version_t *cur_version;
1895 INSIST(version->commit_ok);
1896 INSIST(version == rbtdb->future_version);
1898 * The current version is going to be replaced.
1899 * Release the (likely last) reference to it from the
1900 * DB itself and unlink it from the open list.
1902 cur_version = rbtdb->current_version;
1903 isc_refcount_decrement(&cur_version->references,
1906 if (cur_version->serial == rbtdb->least_serial)
1907 INSIST(EMPTY(cur_version->changed_list));
1908 UNLINK(rbtdb->open_versions,
1911 if (EMPTY(rbtdb->open_versions)) {
1913 * We're going to become the least open
1916 make_least_version(rbtdb, version,
1920 * Some other open version is the
1921 * least version. We can't cleanup
1922 * records that were changed in this
1923 * version because the older versions
1924 * may still be in use by an open
1927 * We can, however, discard the
1928 * changed records for things that
1929 * we've added that didn't exist in
1932 cleanup_nondirty(version, &cleanup_list);
1935 * If the (soon to be former) current version
1936 * isn't being used by anyone, we can clean
1940 cleanup_version = cur_version;
1941 APPENDLIST(version->changed_list,
1942 cleanup_version->changed_list,
1946 * Become the current version.
1948 version->writer = ISC_FALSE;
1949 rbtdb->current_version = version;
1950 rbtdb->current_serial = version->serial;
1951 rbtdb->future_version = NULL;
1954 * Keep the current version in the open list, and
1955 * gain a reference for the DB itself (see the DB
1956 * creation function below). This must be the only
1957 * case where we need to increment the counter from
1958 * zero and need to use isc_refcount_increment0().
1960 isc_refcount_increment0(&version->references,
1962 INSIST(cur_ref == 1);
1963 PREPEND(rbtdb->open_versions,
1964 rbtdb->current_version, link);
1967 * We're rolling back this transaction.
1969 cleanup_list = version->changed_list;
1970 ISC_LIST_INIT(version->changed_list);
1971 rollback = ISC_TRUE;
1972 cleanup_version = version;
1973 rbtdb->future_version = NULL;
1976 if (version != rbtdb->current_version) {
1978 * There are no external or internal references
1979 * to this version and it can be cleaned up.
1981 cleanup_version = version;
1984 * Find the version with the least serial
1985 * number greater than ours.
1987 least_greater = PREV(version, link);
1988 if (least_greater == NULL)
1989 least_greater = rbtdb->current_version;
1991 INSIST(version->serial < least_greater->serial);
1993 * Is this the least open version?
1995 if (version->serial == rbtdb->least_serial) {
1997 * Yes. Install the new least open
2000 make_least_version(rbtdb,
2005 * Add any unexecuted cleanups to
2006 * those of the least greater version.
2008 APPENDLIST(least_greater->changed_list,
2009 version->changed_list,
2012 } else if (version->serial == rbtdb->least_serial)
2013 INSIST(EMPTY(version->changed_list));
2014 UNLINK(rbtdb->open_versions, version, link);
2016 least_serial = rbtdb->least_serial;
2017 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
2020 * Update the zone's secure status.
2022 if (writer && commit && !IS_CACHE(rbtdb))
2023 rbtdb->secure = iszonesecure(db, rbtdb->origin_node);
2025 if (cleanup_version != NULL) {
2026 INSIST(EMPTY(cleanup_version->changed_list));
2027 isc_mem_put(rbtdb->common.mctx, cleanup_version,
2028 sizeof(*cleanup_version));
2031 if (!EMPTY(cleanup_list)) {
2033 * We acquire a tree write lock here in order to make sure
2034 * that stale nodes will be removed in decrement_reference().
2035 * If we didn't have the lock, those nodes could miss the
2036 * chance to be removed until the server stops. The write lock
2037 * is expensive, but this event should be rare enough to justify
2040 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2041 for (changed = HEAD(cleanup_list);
2043 changed = next_changed) {
2046 next_changed = NEXT(changed, link);
2047 rbtnode = changed->node;
2048 lock = &rbtdb->node_locks[rbtnode->locknum].lock;
2050 NODE_LOCK(lock, isc_rwlocktype_write);
2052 * This is a good opportunity to purge any dead nodes,
2055 cleanup_dead_nodes(rbtdb, rbtnode->locknum);
2058 rollback_node(rbtnode, serial);
2059 decrement_reference(rbtdb, rbtnode, least_serial,
2060 isc_rwlocktype_write,
2061 isc_rwlocktype_write, ISC_FALSE);
2063 NODE_UNLOCK(lock, isc_rwlocktype_write);
2065 isc_mem_put(rbtdb->common.mctx, changed,
2068 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2076 * Add the necessary magic for the wildcard name 'name'
2077 * to be found in 'rbtdb'.
2079 * In order for wildcard matching to work correctly in
2080 * zone_find(), we must ensure that a node for the wildcarding
2081 * level exists in the database, and has its 'find_callback'
2082 * and 'wild' bits set.
2084 * E.g. if the wildcard name is "*.sub.example." then we
2085 * must ensure that "sub.example." exists and is marked as
2089 add_wildcard_magic(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2090 isc_result_t result;
2091 dns_name_t foundname;
2092 dns_offsets_t offsets;
2094 dns_rbtnode_t *node = NULL;
2096 dns_name_init(&foundname, offsets);
2097 n = dns_name_countlabels(name);
2100 dns_name_getlabelsequence(name, 1, n, &foundname);
2101 result = dns_rbt_addnode(rbtdb->tree, &foundname, &node);
2102 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2104 node->find_callback = 1;
2106 return (ISC_R_SUCCESS);
2110 add_empty_wildcards(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2111 isc_result_t result;
2112 dns_name_t foundname;
2113 dns_offsets_t offsets;
2114 unsigned int n, l, i;
2116 dns_name_init(&foundname, offsets);
2117 n = dns_name_countlabels(name);
2118 l = dns_name_countlabels(&rbtdb->common.origin);
2121 dns_rbtnode_t *node = NULL; /* dummy */
2122 dns_name_getlabelsequence(name, n - i, i, &foundname);
2123 if (dns_name_iswildcard(&foundname)) {
2124 result = add_wildcard_magic(rbtdb, &foundname);
2125 if (result != ISC_R_SUCCESS)
2127 result = dns_rbt_addnode(rbtdb->tree, &foundname,
2129 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2134 return (ISC_R_SUCCESS);
2138 findnode(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2139 dns_dbnode_t **nodep)
2141 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2142 dns_rbtnode_t *node = NULL;
2143 dns_name_t nodename;
2144 isc_result_t result;
2145 isc_rwlocktype_t locktype = isc_rwlocktype_read;
2147 REQUIRE(VALID_RBTDB(rbtdb));
2149 dns_name_init(&nodename, NULL);
2150 RWLOCK(&rbtdb->tree_lock, locktype);
2151 result = dns_rbt_findnode(rbtdb->tree, name, NULL, &node, NULL,
2152 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2153 if (result != ISC_R_SUCCESS) {
2154 RWUNLOCK(&rbtdb->tree_lock, locktype);
2156 if (result == DNS_R_PARTIALMATCH)
2157 result = ISC_R_NOTFOUND;
2161 * It would be nice to try to upgrade the lock instead of
2162 * unlocking then relocking.
2164 locktype = isc_rwlocktype_write;
2165 RWLOCK(&rbtdb->tree_lock, locktype);
2167 result = dns_rbt_addnode(rbtdb->tree, name, &node);
2168 if (result == ISC_R_SUCCESS) {
2169 dns_rbt_namefromnode(node, &nodename);
2170 #ifdef DNS_RBT_USEHASH
2171 node->locknum = node->hashval % rbtdb->node_lock_count;
2173 node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2174 rbtdb->node_lock_count;
2176 add_empty_wildcards(rbtdb, name);
2178 if (dns_name_iswildcard(name)) {
2179 result = add_wildcard_magic(rbtdb, name);
2180 if (result != ISC_R_SUCCESS) {
2181 RWUNLOCK(&rbtdb->tree_lock, locktype);
2185 } else if (result != ISC_R_EXISTS) {
2186 RWUNLOCK(&rbtdb->tree_lock, locktype);
2190 reactivate_node(rbtdb, node, locktype);
2191 RWUNLOCK(&rbtdb->tree_lock, locktype);
2193 *nodep = (dns_dbnode_t *)node;
2195 return (ISC_R_SUCCESS);
2199 zone_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
2200 rbtdb_search_t *search = arg;
2201 rdatasetheader_t *header, *header_next;
2202 rdatasetheader_t *dname_header, *sigdname_header, *ns_header;
2203 rdatasetheader_t *found;
2204 isc_result_t result;
2205 dns_rbtnode_t *onode;
2208 * We only want to remember the topmost zone cut, since it's the one
2209 * that counts, so we'll just continue if we've already found a
2212 if (search->zonecut != NULL)
2213 return (DNS_R_CONTINUE);
2216 result = DNS_R_CONTINUE;
2217 onode = search->rbtdb->origin_node;
2219 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2220 isc_rwlocktype_read);
2223 * Look for an NS or DNAME rdataset active in our version.
2226 dname_header = NULL;
2227 sigdname_header = NULL;
2228 for (header = node->data; header != NULL; header = header_next) {
2229 header_next = header->next;
2230 if (header->type == dns_rdatatype_ns ||
2231 header->type == dns_rdatatype_dname ||
2232 header->type == RBTDB_RDATATYPE_SIGDNAME) {
2234 if (header->serial <= search->serial &&
2237 * Is this a "this rdataset doesn't
2240 if (NONEXISTENT(header))
2244 header = header->down;
2245 } while (header != NULL);
2246 if (header != NULL) {
2247 if (header->type == dns_rdatatype_dname)
2248 dname_header = header;
2249 else if (header->type ==
2250 RBTDB_RDATATYPE_SIGDNAME)
2251 sigdname_header = header;
2252 else if (node != onode ||
2253 IS_STUB(search->rbtdb)) {
2255 * We've found an NS rdataset that
2256 * isn't at the origin node. We check
2257 * that they're not at the origin node,
2258 * because otherwise we'd erroneously
2259 * treat the zone top as if it were
2269 * Did we find anything?
2271 if (dname_header != NULL) {
2273 * Note that DNAME has precedence over NS if both exist.
2275 found = dname_header;
2276 search->zonecut_sigrdataset = sigdname_header;
2277 } else if (ns_header != NULL) {
2279 search->zonecut_sigrdataset = NULL;
2282 if (found != NULL) {
2284 * We increment the reference count on node to ensure that
2285 * search->zonecut_rdataset will still be valid later.
2287 new_reference(search->rbtdb, node);
2288 search->zonecut = node;
2289 search->zonecut_rdataset = found;
2290 search->need_cleanup = ISC_TRUE;
2292 * Since we've found a zonecut, anything beneath it is
2293 * glue and is not subject to wildcard matching, so we
2294 * may clear search->wild.
2296 search->wild = ISC_FALSE;
2297 if ((search->options & DNS_DBFIND_GLUEOK) == 0) {
2299 * If the caller does not want to find glue, then
2300 * this is the best answer and the search should
2303 result = DNS_R_PARTIALMATCH;
2308 * The search will continue beneath the zone cut.
2309 * This may or may not be the best match. In case it
2310 * is, we need to remember the node name.
2312 zcname = dns_fixedname_name(&search->zonecut_name);
2313 RUNTIME_CHECK(dns_name_copy(name, zcname, NULL) ==
2315 search->copy_name = ISC_TRUE;
2319 * There is no zonecut at this node which is active in this
2322 * If this is a "wild" node and the caller hasn't disabled
2323 * wildcard matching, remember that we've seen a wild node
2324 * in case we need to go searching for wildcard matches
2327 if (node->wild && (search->options & DNS_DBFIND_NOWILD) == 0)
2328 search->wild = ISC_TRUE;
2331 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2332 isc_rwlocktype_read);
2338 bind_rdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
2339 rdatasetheader_t *header, isc_stdtime_t now,
2340 dns_rdataset_t *rdataset)
2342 unsigned char *raw; /* RDATASLAB */
2345 * Caller must be holding the node reader lock.
2346 * XXXJT: technically, we need a writer lock, since we'll increment
2347 * the header count below. However, since the actual counter value
2348 * doesn't matter, we prioritize performance here. (We may want to
2349 * use atomic increment when available).
2352 if (rdataset == NULL)
2355 new_reference(rbtdb, node);
2357 INSIST(rdataset->methods == NULL); /* We must be disassociated. */
2359 rdataset->methods = &rdataset_methods;
2360 rdataset->rdclass = rbtdb->common.rdclass;
2361 rdataset->type = RBTDB_RDATATYPE_BASE(header->type);
2362 rdataset->covers = RBTDB_RDATATYPE_EXT(header->type);
2363 rdataset->ttl = header->rdh_ttl - now;
2364 rdataset->trust = header->trust;
2365 if (NXDOMAIN(header))
2366 rdataset->attributes |= DNS_RDATASETATTR_NXDOMAIN;
2367 rdataset->private1 = rbtdb;
2368 rdataset->private2 = node;
2369 raw = (unsigned char *)header + sizeof(*header);
2370 rdataset->private3 = raw;
2371 rdataset->count = header->count++;
2372 if (rdataset->count == ISC_UINT32_MAX)
2373 rdataset->count = 0;
2376 * Reset iterator state.
2378 rdataset->privateuint4 = 0;
2379 rdataset->private5 = NULL;
2382 * Add noqname proof.
2384 rdataset->private6 = header->noqname;
2385 if (rdataset->private6 != NULL)
2386 rdataset->attributes |= DNS_RDATASETATTR_NOQNAME;
2389 static inline isc_result_t
2390 setup_delegation(rbtdb_search_t *search, dns_dbnode_t **nodep,
2391 dns_name_t *foundname, dns_rdataset_t *rdataset,
2392 dns_rdataset_t *sigrdataset)
2394 isc_result_t result;
2396 rbtdb_rdatatype_t type;
2397 dns_rbtnode_t *node;
2400 * The caller MUST NOT be holding any node locks.
2403 node = search->zonecut;
2404 type = search->zonecut_rdataset->type;
2407 * If we have to set foundname, we do it before anything else.
2408 * If we were to set foundname after we had set nodep or bound the
2409 * rdataset, then we'd have to undo that work if dns_name_copy()
2410 * failed. By setting foundname first, there's nothing to undo if
2413 if (foundname != NULL && search->copy_name) {
2414 zcname = dns_fixedname_name(&search->zonecut_name);
2415 result = dns_name_copy(zcname, foundname, NULL);
2416 if (result != ISC_R_SUCCESS)
2419 if (nodep != NULL) {
2421 * Note that we don't have to increment the node's reference
2422 * count here because we're going to use the reference we
2423 * already have in the search block.
2426 search->need_cleanup = ISC_FALSE;
2428 if (rdataset != NULL) {
2429 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2430 isc_rwlocktype_read);
2431 bind_rdataset(search->rbtdb, node, search->zonecut_rdataset,
2432 search->now, rdataset);
2433 if (sigrdataset != NULL && search->zonecut_sigrdataset != NULL)
2434 bind_rdataset(search->rbtdb, node,
2435 search->zonecut_sigrdataset,
2436 search->now, sigrdataset);
2437 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2438 isc_rwlocktype_read);
2441 if (type == dns_rdatatype_dname)
2442 return (DNS_R_DNAME);
2443 return (DNS_R_DELEGATION);
2446 static inline isc_boolean_t
2447 valid_glue(rbtdb_search_t *search, dns_name_t *name, rbtdb_rdatatype_t type,
2448 dns_rbtnode_t *node)
2450 unsigned char *raw; /* RDATASLAB */
2451 unsigned int count, size;
2453 isc_boolean_t valid = ISC_FALSE;
2454 dns_offsets_t offsets;
2455 isc_region_t region;
2456 rdatasetheader_t *header;
2459 * No additional locking is required.
2463 * Valid glue types are A, AAAA, A6. NS is also a valid glue type
2464 * if it occurs at a zone cut, but is not valid below it.
2466 if (type == dns_rdatatype_ns) {
2467 if (node != search->zonecut) {
2470 } else if (type != dns_rdatatype_a &&
2471 type != dns_rdatatype_aaaa &&
2472 type != dns_rdatatype_a6) {
2476 header = search->zonecut_rdataset;
2477 raw = (unsigned char *)header + sizeof(*header);
2478 count = raw[0] * 256 + raw[1];
2479 #if DNS_RDATASET_FIXED
2480 raw += 2 + (4 * count);
2487 size = raw[0] * 256 + raw[1];
2488 #if DNS_RDATASET_FIXED
2494 region.length = size;
2497 * XXX Until we have rdata structures, we have no choice but
2498 * to directly access the rdata format.
2500 dns_name_init(&ns_name, offsets);
2501 dns_name_fromregion(&ns_name, ®ion);
2502 if (dns_name_compare(&ns_name, name) == 0) {
2511 static inline isc_boolean_t
2512 activeempty(rbtdb_search_t *search, dns_rbtnodechain_t *chain,
2515 dns_fixedname_t fnext;
2516 dns_fixedname_t forigin;
2521 dns_rbtnode_t *node;
2522 isc_result_t result;
2523 isc_boolean_t answer = ISC_FALSE;
2524 rdatasetheader_t *header;
2526 rbtdb = search->rbtdb;
2528 dns_name_init(&prefix, NULL);
2529 dns_fixedname_init(&fnext);
2530 next = dns_fixedname_name(&fnext);
2531 dns_fixedname_init(&forigin);
2532 origin = dns_fixedname_name(&forigin);
2534 result = dns_rbtnodechain_next(chain, NULL, NULL);
2535 while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
2537 result = dns_rbtnodechain_current(chain, &prefix,
2539 if (result != ISC_R_SUCCESS)
2541 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2542 isc_rwlocktype_read);
2543 for (header = node->data;
2545 header = header->next) {
2546 if (header->serial <= search->serial &&
2547 !IGNORE(header) && EXISTS(header))
2550 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2551 isc_rwlocktype_read);
2554 result = dns_rbtnodechain_next(chain, NULL, NULL);
2556 if (result == ISC_R_SUCCESS)
2557 result = dns_name_concatenate(&prefix, origin, next, NULL);
2558 if (result == ISC_R_SUCCESS && dns_name_issubdomain(next, name))
2563 static inline isc_boolean_t
2564 activeemtpynode(rbtdb_search_t *search, dns_name_t *qname, dns_name_t *wname) {
2565 dns_fixedname_t fnext;
2566 dns_fixedname_t forigin;
2567 dns_fixedname_t fprev;
2575 dns_rbtnode_t *node;
2576 dns_rbtnodechain_t chain;
2577 isc_boolean_t check_next = ISC_TRUE;
2578 isc_boolean_t check_prev = ISC_TRUE;
2579 isc_boolean_t answer = ISC_FALSE;
2580 isc_result_t result;
2581 rdatasetheader_t *header;
2584 rbtdb = search->rbtdb;
2586 dns_name_init(&name, NULL);
2587 dns_name_init(&tname, NULL);
2588 dns_name_init(&rname, NULL);
2589 dns_fixedname_init(&fnext);
2590 next = dns_fixedname_name(&fnext);
2591 dns_fixedname_init(&fprev);
2592 prev = dns_fixedname_name(&fprev);
2593 dns_fixedname_init(&forigin);
2594 origin = dns_fixedname_name(&forigin);
2597 * Find if qname is at or below a empty node.
2598 * Use our own copy of the chain.
2601 chain = search->chain;
2604 result = dns_rbtnodechain_current(&chain, &name,
2606 if (result != ISC_R_SUCCESS)
2608 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2609 isc_rwlocktype_read);
2610 for (header = node->data;
2612 header = header->next) {
2613 if (header->serial <= search->serial &&
2614 !IGNORE(header) && EXISTS(header))
2617 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2618 isc_rwlocktype_read);
2621 result = dns_rbtnodechain_prev(&chain, NULL, NULL);
2622 } while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN);
2623 if (result == ISC_R_SUCCESS)
2624 result = dns_name_concatenate(&name, origin, prev, NULL);
2625 if (result != ISC_R_SUCCESS)
2626 check_prev = ISC_FALSE;
2628 result = dns_rbtnodechain_next(&chain, NULL, NULL);
2629 while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
2631 result = dns_rbtnodechain_current(&chain, &name,
2633 if (result != ISC_R_SUCCESS)
2635 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2636 isc_rwlocktype_read);
2637 for (header = node->data;
2639 header = header->next) {
2640 if (header->serial <= search->serial &&
2641 !IGNORE(header) && EXISTS(header))
2644 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2645 isc_rwlocktype_read);
2648 result = dns_rbtnodechain_next(&chain, NULL, NULL);
2650 if (result == ISC_R_SUCCESS)
2651 result = dns_name_concatenate(&name, origin, next, NULL);
2652 if (result != ISC_R_SUCCESS)
2653 check_next = ISC_FALSE;
2655 dns_name_clone(qname, &rname);
2658 * Remove the wildcard label to find the terminal name.
2660 n = dns_name_countlabels(wname);
2661 dns_name_getlabelsequence(wname, 1, n - 1, &tname);
2664 if ((check_prev && dns_name_issubdomain(prev, &rname)) ||
2665 (check_next && dns_name_issubdomain(next, &rname))) {
2670 * Remove the left hand label.
2672 n = dns_name_countlabels(&rname);
2673 dns_name_getlabelsequence(&rname, 1, n - 1, &rname);
2674 } while (!dns_name_equal(&rname, &tname));
2678 static inline isc_result_t
2679 find_wildcard(rbtdb_search_t *search, dns_rbtnode_t **nodep,
2683 dns_rbtnode_t *node, *level_node, *wnode;
2684 rdatasetheader_t *header;
2685 isc_result_t result = ISC_R_NOTFOUND;
2688 dns_fixedname_t fwname;
2690 isc_boolean_t done, wild, active;
2691 dns_rbtnodechain_t wchain;
2694 * Caller must be holding the tree lock and MUST NOT be holding
2699 * Examine each ancestor level. If the level's wild bit
2700 * is set, then construct the corresponding wildcard name and
2701 * search for it. If the wildcard node exists, and is active in
2702 * this version, we're done. If not, then we next check to see
2703 * if the ancestor is active in this version. If so, then there
2704 * can be no possible wildcard match and again we're done. If not,
2705 * continue the search.
2708 rbtdb = search->rbtdb;
2709 i = search->chain.level_matches;
2713 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2714 isc_rwlocktype_read);
2717 * First we try to figure out if this node is active in
2718 * the search's version. We do this now, even though we
2719 * may not need the information, because it simplifies the
2720 * locking and code flow.
2722 for (header = node->data;
2724 header = header->next) {
2725 if (header->serial <= search->serial &&
2726 !IGNORE(header) && EXISTS(header))
2739 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2740 isc_rwlocktype_read);
2744 * Construct the wildcard name for this level.
2746 dns_name_init(&name, NULL);
2747 dns_rbt_namefromnode(node, &name);
2748 dns_fixedname_init(&fwname);
2749 wname = dns_fixedname_name(&fwname);
2750 result = dns_name_concatenate(dns_wildcardname, &name,
2753 while (result == ISC_R_SUCCESS && j != 0) {
2755 level_node = search->chain.levels[j];
2756 dns_name_init(&name, NULL);
2757 dns_rbt_namefromnode(level_node, &name);
2758 result = dns_name_concatenate(wname,
2763 if (result != ISC_R_SUCCESS)
2767 dns_rbtnodechain_init(&wchain, NULL);
2768 result = dns_rbt_findnode(rbtdb->tree, wname,
2769 NULL, &wnode, &wchain,
2770 DNS_RBTFIND_EMPTYDATA,
2772 if (result == ISC_R_SUCCESS) {
2776 * We have found the wildcard node. If it
2777 * is active in the search's version, we're
2780 lock = &rbtdb->node_locks[wnode->locknum].lock;
2781 NODE_LOCK(lock, isc_rwlocktype_read);
2782 for (header = wnode->data;
2784 header = header->next) {
2785 if (header->serial <= search->serial &&
2786 !IGNORE(header) && EXISTS(header))
2789 NODE_UNLOCK(lock, isc_rwlocktype_read);
2790 if (header != NULL ||
2791 activeempty(search, &wchain, wname)) {
2792 if (activeemtpynode(search, qname,
2794 return (ISC_R_NOTFOUND);
2797 * The wildcard node is active!
2799 * Note: result is still ISC_R_SUCCESS
2800 * so we don't have to set it.
2805 } else if (result != ISC_R_NOTFOUND &&
2806 result != DNS_R_PARTIALMATCH) {
2808 * An error has occurred. Bail out.
2816 * The level node is active. Any wildcarding
2817 * present at higher levels has no
2818 * effect and we're done.
2820 result = ISC_R_NOTFOUND;
2826 node = search->chain.levels[i];
2834 static inline isc_result_t
2835 find_closest_nsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
2836 dns_name_t *foundname, dns_rdataset_t *rdataset,
2837 dns_rdataset_t *sigrdataset, isc_boolean_t need_sig)
2839 dns_rbtnode_t *node;
2840 rdatasetheader_t *header, *header_next, *found, *foundsig;
2841 isc_boolean_t empty_node;
2842 isc_result_t result;
2843 dns_fixedname_t fname, forigin;
2844 dns_name_t *name, *origin;
2848 dns_fixedname_init(&fname);
2849 name = dns_fixedname_name(&fname);
2850 dns_fixedname_init(&forigin);
2851 origin = dns_fixedname_name(&forigin);
2852 result = dns_rbtnodechain_current(&search->chain, name,
2854 if (result != ISC_R_SUCCESS)
2856 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2857 isc_rwlocktype_read);
2860 empty_node = ISC_TRUE;
2861 for (header = node->data;
2863 header = header_next) {
2864 header_next = header->next;
2866 * Look for an active, extant NSEC or RRSIG NSEC.
2869 if (header->serial <= search->serial &&
2872 * Is this a "this rdataset doesn't
2875 if (NONEXISTENT(header))
2879 header = header->down;
2880 } while (header != NULL);
2881 if (header != NULL) {
2883 * We now know that there is at least one
2884 * active rdataset at this node.
2886 empty_node = ISC_FALSE;
2887 if (header->type == dns_rdatatype_nsec) {
2889 if (foundsig != NULL)
2891 } else if (header->type ==
2892 RBTDB_RDATATYPE_SIGNSEC) {
2900 if (found != NULL &&
2901 (foundsig != NULL || !need_sig))
2904 * We've found the right NSEC record.
2906 * Note: for this to really be the right
2907 * NSEC record, it's essential that the NSEC
2908 * records of any nodes obscured by a zone
2909 * cut have been removed; we assume this is
2912 result = dns_name_concatenate(name, origin,
2914 if (result == ISC_R_SUCCESS) {
2915 if (nodep != NULL) {
2916 new_reference(search->rbtdb,
2920 bind_rdataset(search->rbtdb, node,
2923 if (foundsig != NULL)
2924 bind_rdataset(search->rbtdb,
2930 } else if (found == NULL && foundsig == NULL) {
2932 * This node is active, but has no NSEC or
2933 * RRSIG NSEC. That means it's glue or
2934 * other obscured zone data that isn't
2935 * relevant for our search. Treat the
2936 * node as if it were empty and keep looking.
2938 empty_node = ISC_TRUE;
2939 result = dns_rbtnodechain_prev(&search->chain,
2943 * We found an active node, but either the
2944 * NSEC or the RRSIG NSEC is missing. This
2947 result = DNS_R_BADDB;
2951 * This node isn't active. We've got to keep
2954 result = dns_rbtnodechain_prev(&search->chain, NULL,
2957 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2958 isc_rwlocktype_read);
2959 } while (empty_node && result == ISC_R_SUCCESS);
2962 * If the result is ISC_R_NOMORE, then we got to the beginning of
2963 * the database and didn't find a NSEC record. This shouldn't
2966 if (result == ISC_R_NOMORE)
2967 result = DNS_R_BADDB;
2973 zone_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
2974 dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
2975 dns_dbnode_t **nodep, dns_name_t *foundname,
2976 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
2978 dns_rbtnode_t *node = NULL;
2979 isc_result_t result;
2980 rbtdb_search_t search;
2981 isc_boolean_t cname_ok = ISC_TRUE;
2982 isc_boolean_t close_version = ISC_FALSE;
2983 isc_boolean_t maybe_zonecut = ISC_FALSE;
2984 isc_boolean_t at_zonecut = ISC_FALSE;
2986 isc_boolean_t empty_node;
2987 rdatasetheader_t *header, *header_next, *found, *nsecheader;
2988 rdatasetheader_t *foundsig, *cnamesig, *nsecsig;
2989 rbtdb_rdatatype_t sigtype;
2990 isc_boolean_t active;
2991 dns_rbtnodechain_t chain;
2995 search.rbtdb = (dns_rbtdb_t *)db;
2997 REQUIRE(VALID_RBTDB(search.rbtdb));
3000 * We don't care about 'now'.
3005 * If the caller didn't supply a version, attach to the current
3008 if (version == NULL) {
3009 currentversion(db, &version);
3010 close_version = ISC_TRUE;
3013 search.rbtversion = version;
3014 search.serial = search.rbtversion->serial;
3015 search.options = options;
3016 search.copy_name = ISC_FALSE;
3017 search.need_cleanup = ISC_FALSE;
3018 search.wild = ISC_FALSE;
3019 search.zonecut = NULL;
3020 dns_fixedname_init(&search.zonecut_name);
3021 dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
3025 * 'wild' will be true iff. we've matched a wildcard.
3029 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3032 * Search down from the root of the tree. If, while going down, we
3033 * encounter a callback node, zone_zonecut_callback() will search the
3034 * rdatasets at the zone cut for active DNAME or NS rdatasets.
3036 result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
3037 &search.chain, DNS_RBTFIND_EMPTYDATA,
3038 zone_zonecut_callback, &search);
3040 if (result == DNS_R_PARTIALMATCH) {
3042 if (search.zonecut != NULL) {
3043 result = setup_delegation(&search, nodep, foundname,
3044 rdataset, sigrdataset);
3050 * At least one of the levels in the search chain
3051 * potentially has a wildcard. For each such level,
3052 * we must see if there's a matching wildcard active
3053 * in the current version.
3055 result = find_wildcard(&search, &node, name);
3056 if (result == ISC_R_SUCCESS) {
3057 result = dns_name_copy(name, foundname, NULL);
3058 if (result != ISC_R_SUCCESS)
3063 else if (result != ISC_R_NOTFOUND)
3067 chain = search.chain;
3068 active = activeempty(&search, &chain, name);
3071 * If we're here, then the name does not exist, is not
3072 * beneath a zonecut, and there's no matching wildcard.
3074 if (search.rbtdb->secure ||
3075 (search.options & DNS_DBFIND_FORCENSEC) != 0)
3077 result = find_closest_nsec(&search, nodep, foundname,
3078 rdataset, sigrdataset,
3079 search.rbtdb->secure);
3080 if (result == ISC_R_SUCCESS)
3081 result = active ? DNS_R_EMPTYNAME :
3084 result = active ? DNS_R_EMPTYNAME : DNS_R_NXDOMAIN;
3086 } else if (result != ISC_R_SUCCESS)
3091 * We have found a node whose name is the desired name, or we
3092 * have matched a wildcard.
3095 if (search.zonecut != NULL) {
3097 * If we're beneath a zone cut, we don't want to look for
3098 * CNAMEs because they're not legitimate zone glue.
3100 cname_ok = ISC_FALSE;
3103 * The node may be a zone cut itself. If it might be one,
3104 * make sure we check for it later.
3106 if (node->find_callback &&
3107 (node != search.rbtdb->origin_node ||
3108 IS_STUB(search.rbtdb)) &&
3109 !dns_rdatatype_atparent(type))
3110 maybe_zonecut = ISC_TRUE;
3114 * Certain DNSSEC types are not subject to CNAME matching
3115 * (RFC4035, section 2.5 and RFC3007).
3117 * We don't check for RRSIG, because we don't store RRSIG records
3120 if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
3121 cname_ok = ISC_FALSE;
3124 * We now go looking for rdata...
3127 NODE_LOCK(&(search.rbtdb->node_locks[node->locknum].lock),
3128 isc_rwlocktype_read);
3132 sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
3136 empty_node = ISC_TRUE;
3137 for (header = node->data; header != NULL; header = header_next) {
3138 header_next = header->next;
3140 * Look for an active, extant rdataset.
3143 if (header->serial <= search.serial &&
3146 * Is this a "this rdataset doesn't
3149 if (NONEXISTENT(header))
3153 header = header->down;
3154 } while (header != NULL);
3155 if (header != NULL) {
3157 * We now know that there is at least one active
3158 * rdataset at this node.
3160 empty_node = ISC_FALSE;
3163 * Do special zone cut handling, if requested.
3165 if (maybe_zonecut &&
3166 header->type == dns_rdatatype_ns) {
3168 * We increment the reference count on node to
3169 * ensure that search->zonecut_rdataset will
3170 * still be valid later.
3172 new_reference(search.rbtdb, node);
3173 search.zonecut = node;
3174 search.zonecut_rdataset = header;
3175 search.zonecut_sigrdataset = NULL;
3176 search.need_cleanup = ISC_TRUE;
3177 maybe_zonecut = ISC_FALSE;
3178 at_zonecut = ISC_TRUE;
3180 * It is not clear if KEY should still be
3181 * allowed at the parent side of the zone
3182 * cut or not. It is needed for RFC3007
3183 * validated updates.
3185 if ((search.options & DNS_DBFIND_GLUEOK) == 0
3186 && type != dns_rdatatype_nsec
3187 && type != dns_rdatatype_key) {
3189 * Glue is not OK, but any answer we
3190 * could return would be glue. Return
3196 if (found != NULL && foundsig != NULL)
3201 * If we found a type we were looking for,
3204 if (header->type == type ||
3205 type == dns_rdatatype_any ||
3206 (header->type == dns_rdatatype_cname &&
3209 * We've found the answer!
3212 if (header->type == dns_rdatatype_cname &&
3215 * We may be finding a CNAME instead
3216 * of the desired type.
3218 * If we've already got the CNAME RRSIG,
3219 * use it, otherwise change sigtype
3220 * so that we find it.
3222 if (cnamesig != NULL)
3223 foundsig = cnamesig;
3226 RBTDB_RDATATYPE_SIGCNAME;
3229 * If we've got all we need, end the search.
3231 if (!maybe_zonecut && foundsig != NULL)
3233 } else if (header->type == sigtype) {
3235 * We've found the RRSIG rdataset for our
3236 * target type. Remember it.
3240 * If we've got all we need, end the search.
3242 if (!maybe_zonecut && found != NULL)
3244 } else if (header->type == dns_rdatatype_nsec) {
3246 * Remember a NSEC rdataset even if we're
3247 * not specifically looking for it, because
3248 * we might need it later.
3250 nsecheader = header;
3251 } else if (header->type == RBTDB_RDATATYPE_SIGNSEC) {
3253 * If we need the NSEC rdataset, we'll also
3254 * need its signature.
3257 } else if (cname_ok &&
3258 header->type == RBTDB_RDATATYPE_SIGCNAME) {
3260 * If we get a CNAME match, we'll also need
3270 * We have an exact match for the name, but there are no
3271 * active rdatasets in the desired version. That means that
3272 * this node doesn't exist in the desired version, and that
3273 * we really have a partial match.
3276 lock = &search.rbtdb->node_locks[node->locknum].lock;
3277 NODE_UNLOCK(lock, isc_rwlocktype_read);
3283 * If we didn't find what we were looking for...
3285 if (found == NULL) {
3286 if (search.zonecut != NULL) {
3288 * We were trying to find glue at a node beneath a
3289 * zone cut, but didn't.
3291 * Return the delegation.
3293 lock = &search.rbtdb->node_locks[node->locknum].lock;
3294 NODE_UNLOCK(lock, isc_rwlocktype_read);
3295 result = setup_delegation(&search, nodep, foundname,
3296 rdataset, sigrdataset);
3300 * The desired type doesn't exist.
3302 result = DNS_R_NXRRSET;
3303 if (search.rbtdb->secure &&
3304 (nsecheader == NULL || nsecsig == NULL)) {
3306 * The zone is secure but there's no NSEC,
3307 * or the NSEC has no signature!
3310 result = DNS_R_BADDB;
3314 lock = &search.rbtdb->node_locks[node->locknum].lock;
3315 NODE_UNLOCK(lock, isc_rwlocktype_read);
3316 result = find_closest_nsec(&search, nodep, foundname,
3317 rdataset, sigrdataset,
3318 search.rbtdb->secure);
3319 if (result == ISC_R_SUCCESS)
3320 result = DNS_R_EMPTYWILD;
3323 if ((search.options & DNS_DBFIND_FORCENSEC) != 0 &&
3327 * There's no NSEC record, and we were told
3330 result = DNS_R_BADDB;
3333 if (nodep != NULL) {
3334 new_reference(search.rbtdb, node);
3337 if (search.rbtdb->secure ||
3338 (search.options & DNS_DBFIND_FORCENSEC) != 0)
3340 bind_rdataset(search.rbtdb, node, nsecheader,
3342 if (nsecsig != NULL)
3343 bind_rdataset(search.rbtdb, node,
3344 nsecsig, 0, sigrdataset);
3347 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
3352 * We found what we were looking for, or we found a CNAME.
3355 if (type != found->type &&
3356 type != dns_rdatatype_any &&
3357 found->type == dns_rdatatype_cname) {
3359 * We weren't doing an ANY query and we found a CNAME instead
3360 * of the type we were looking for, so we need to indicate
3361 * that result to the caller.
3363 result = DNS_R_CNAME;
3364 } else if (search.zonecut != NULL) {
3366 * If we're beneath a zone cut, we must indicate that the
3367 * result is glue, unless we're actually at the zone cut
3368 * and the type is NSEC or KEY.
3370 if (search.zonecut == node) {
3372 * It is not clear if KEY should still be
3373 * allowed at the parent side of the zone
3374 * cut or not. It is needed for RFC3007
3375 * validated updates.
3377 if (type == dns_rdatatype_nsec ||
3378 type == dns_rdatatype_key)
3379 result = ISC_R_SUCCESS;
3380 else if (type == dns_rdatatype_any)
3381 result = DNS_R_ZONECUT;
3383 result = DNS_R_GLUE;
3385 result = DNS_R_GLUE;
3387 * We might have found data that isn't glue, but was occluded
3388 * by a dynamic update. If the caller cares about this, they
3389 * will have told us to validate glue.
3391 * XXX We should cache the glue validity state!
3393 if (result == DNS_R_GLUE &&
3394 (search.options & DNS_DBFIND_VALIDATEGLUE) != 0 &&
3395 !valid_glue(&search, foundname, type, node)) {
3396 lock = &search.rbtdb->node_locks[node->locknum].lock;
3397 NODE_UNLOCK(lock, isc_rwlocktype_read);
3398 result = setup_delegation(&search, nodep, foundname,
3399 rdataset, sigrdataset);
3404 * An ordinary successful query!
3406 result = ISC_R_SUCCESS;
3409 if (nodep != NULL) {
3411 new_reference(search.rbtdb, node);
3413 search.need_cleanup = ISC_FALSE;
3417 if (type != dns_rdatatype_any) {
3418 bind_rdataset(search.rbtdb, node, found, 0, rdataset);
3419 if (foundsig != NULL)
3420 bind_rdataset(search.rbtdb, node, foundsig, 0,
3425 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
3428 NODE_UNLOCK(&(search.rbtdb->node_locks[node->locknum].lock),
3429 isc_rwlocktype_read);
3432 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3435 * If we found a zonecut but aren't going to use it, we have to
3438 if (search.need_cleanup) {
3439 node = search.zonecut;
3440 lock = &(search.rbtdb->node_locks[node->locknum].lock);
3442 NODE_LOCK(lock, isc_rwlocktype_read);
3443 decrement_reference(search.rbtdb, node, 0,
3444 isc_rwlocktype_read, isc_rwlocktype_none,
3446 NODE_UNLOCK(lock, isc_rwlocktype_read);
3450 closeversion(db, &version, ISC_FALSE);
3452 dns_rbtnodechain_reset(&search.chain);
3458 zone_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
3459 isc_stdtime_t now, dns_dbnode_t **nodep,
3460 dns_name_t *foundname,
3461 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3470 UNUSED(sigrdataset);
3472 FATAL_ERROR(__FILE__, __LINE__, "zone_findzonecut() called!");
3474 return (ISC_R_NOTIMPLEMENTED);
3478 cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
3479 rbtdb_search_t *search = arg;
3480 rdatasetheader_t *header, *header_prev, *header_next;
3481 rdatasetheader_t *dname_header, *sigdname_header;
3482 isc_result_t result;
3484 isc_rwlocktype_t locktype;
3488 REQUIRE(search->zonecut == NULL);
3491 * Keep compiler silent.
3495 lock = &(search->rbtdb->node_locks[node->locknum].lock);
3496 locktype = isc_rwlocktype_read;
3497 NODE_LOCK(lock, locktype);
3500 * Look for a DNAME or RRSIG DNAME rdataset.
3502 dname_header = NULL;
3503 sigdname_header = NULL;
3505 for (header = node->data; header != NULL; header = header_next) {
3506 header_next = header->next;
3507 if (header->rdh_ttl <= search->now) {
3509 * This rdataset is stale. If no one else is
3510 * using the node, we can clean it up right
3511 * now, otherwise we mark it as stale, and
3512 * the node as dirty, so it will get cleaned
3515 if ((header->rdh_ttl <= search->now - RBTDB_VIRTUAL) &&
3516 (locktype == isc_rwlocktype_write ||
3517 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
3519 * We update the node's status only when we
3520 * can get write access; otherwise, we leave
3521 * others to this work. Periodical cleaning
3522 * will eventually take the job as the last
3524 * We won't downgrade the lock, since other
3525 * rdatasets are probably stale, too.
3527 locktype = isc_rwlocktype_write;
3529 if (dns_rbtnode_refcurrent(node) == 0) {
3533 * header->down can be non-NULL if the
3534 * refcount has just decremented to 0
3535 * but decrement_reference() has not
3536 * performed clean_cache_node(), in
3537 * which case we need to purge the
3538 * stale headers first.
3540 mctx = search->rbtdb->common.mctx;
3541 clean_stale_headers(search->rbtdb,
3544 if (header_prev != NULL)
3548 node->data = header->next;
3549 free_rdataset(search->rbtdb, mctx,
3552 header->attributes |=
3553 RDATASET_ATTR_STALE;
3555 header_prev = header;
3558 header_prev = header;
3559 } else if (header->type == dns_rdatatype_dname &&
3561 dname_header = header;
3562 header_prev = header;
3563 } else if (header->type == RBTDB_RDATATYPE_SIGDNAME &&
3565 sigdname_header = header;
3566 header_prev = header;
3568 header_prev = header;
3571 if (dname_header != NULL &&
3572 (!DNS_TRUST_PENDING(dname_header->trust) ||
3573 (search->options & DNS_DBFIND_PENDINGOK) != 0)) {
3575 * We increment the reference count on node to ensure that
3576 * search->zonecut_rdataset will still be valid later.
3578 new_reference(search->rbtdb, node);
3579 INSIST(!ISC_LINK_LINKED(node, deadlink));
3580 search->zonecut = node;
3581 search->zonecut_rdataset = dname_header;
3582 search->zonecut_sigrdataset = sigdname_header;
3583 search->need_cleanup = ISC_TRUE;
3584 result = DNS_R_PARTIALMATCH;
3586 result = DNS_R_CONTINUE;
3588 NODE_UNLOCK(lock, locktype);
3593 static inline isc_result_t
3594 find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node,
3595 dns_dbnode_t **nodep, dns_name_t *foundname,
3596 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3599 dns_rbtnode_t *level_node;
3600 rdatasetheader_t *header, *header_prev, *header_next;
3601 rdatasetheader_t *found, *foundsig;
3602 isc_result_t result = ISC_R_NOTFOUND;
3607 isc_rwlocktype_t locktype;
3610 * Caller must be holding the tree lock.
3613 rbtdb = search->rbtdb;
3614 i = search->chain.level_matches;
3617 locktype = isc_rwlocktype_read;
3618 lock = &rbtdb->node_locks[node->locknum].lock;
3619 NODE_LOCK(lock, locktype);
3622 * Look for NS and RRSIG NS rdatasets.
3627 for (header = node->data;
3629 header = header_next) {
3630 header_next = header->next;
3631 if (header->rdh_ttl <= search->now) {
3633 * This rdataset is stale. If no one else is
3634 * using the node, we can clean it up right
3635 * now, otherwise we mark it as stale, and
3636 * the node as dirty, so it will get cleaned
3639 if ((header->rdh_ttl <= search->now -
3641 (locktype == isc_rwlocktype_write ||
3642 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
3644 * We update the node's status only
3645 * when we can get write access.
3647 locktype = isc_rwlocktype_write;
3649 if (dns_rbtnode_refcurrent(node)
3653 m = search->rbtdb->common.mctx;
3654 clean_stale_headers(
3657 if (header_prev != NULL)
3663 free_rdataset(rbtdb, m,
3666 header->attributes |=
3667 RDATASET_ATTR_STALE;
3669 header_prev = header;
3672 header_prev = header;
3673 } else if (EXISTS(header)) {
3675 * We've found an extant rdataset. See if
3676 * we're interested in it.
3678 if (header->type == dns_rdatatype_ns) {
3680 if (foundsig != NULL)
3682 } else if (header->type ==
3683 RBTDB_RDATATYPE_SIGNS) {
3688 header_prev = header;
3690 header_prev = header;
3693 if (found != NULL) {
3695 * If we have to set foundname, we do it before
3696 * anything else. If we were to set foundname after
3697 * we had set nodep or bound the rdataset, then we'd
3698 * have to undo that work if dns_name_concatenate()
3699 * failed. By setting foundname first, there's
3700 * nothing to undo if we have trouble.
3702 if (foundname != NULL) {
3703 dns_name_init(&name, NULL);
3704 dns_rbt_namefromnode(node, &name);
3705 result = dns_name_copy(&name, foundname, NULL);
3706 while (result == ISC_R_SUCCESS && i > 0) {
3708 level_node = search->chain.levels[i];
3709 dns_name_init(&name, NULL);
3710 dns_rbt_namefromnode(level_node,
3713 dns_name_concatenate(foundname,
3718 if (result != ISC_R_SUCCESS) {
3723 result = DNS_R_DELEGATION;
3724 if (nodep != NULL) {
3725 new_reference(search->rbtdb, node);
3728 bind_rdataset(search->rbtdb, node, found, search->now,
3730 if (foundsig != NULL)
3731 bind_rdataset(search->rbtdb, node, foundsig,
3732 search->now, sigrdataset);
3733 if (need_headerupdate(found, search->now) ||
3734 (foundsig != NULL &&
3735 need_headerupdate(foundsig, search->now))) {
3736 if (locktype != isc_rwlocktype_write) {
3737 NODE_UNLOCK(lock, locktype);
3738 NODE_LOCK(lock, isc_rwlocktype_write);
3739 locktype = isc_rwlocktype_write;
3741 if (need_headerupdate(found, search->now))
3742 update_header(search->rbtdb, found,
3744 if (foundsig != NULL &&
3745 need_headerupdate(foundsig, search->now)) {
3746 update_header(search->rbtdb, foundsig,
3753 NODE_UNLOCK(lock, locktype);
3755 if (found == NULL && i > 0) {
3757 node = search->chain.levels[i];
3767 find_coveringnsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
3768 isc_stdtime_t now, dns_name_t *foundname,
3769 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3771 dns_rbtnode_t *node;
3772 rdatasetheader_t *header, *header_next, *header_prev;
3773 rdatasetheader_t *found, *foundsig;
3774 isc_boolean_t empty_node;
3775 isc_result_t result;
3776 dns_fixedname_t fname, forigin;
3777 dns_name_t *name, *origin;
3778 rbtdb_rdatatype_t matchtype, sigmatchtype;
3780 isc_rwlocktype_t locktype;
3782 matchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_nsec, 0);
3783 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
3784 dns_rdatatype_nsec);
3788 dns_fixedname_init(&fname);
3789 name = dns_fixedname_name(&fname);
3790 dns_fixedname_init(&forigin);
3791 origin = dns_fixedname_name(&forigin);
3792 result = dns_rbtnodechain_current(&search->chain, name,
3794 if (result != ISC_R_SUCCESS)
3796 locktype = isc_rwlocktype_read;
3797 lock = &(search->rbtdb->node_locks[node->locknum].lock);
3798 NODE_LOCK(lock, locktype);
3801 empty_node = ISC_TRUE;
3803 for (header = node->data;
3805 header = header_next) {
3806 header_next = header->next;
3807 if (header->rdh_ttl <= now) {
3809 * This rdataset is stale. If no one else is
3810 * using the node, we can clean it up right
3811 * now, otherwise we mark it as stale, and the
3812 * node as dirty, so it will get cleaned up
3815 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
3816 (locktype == isc_rwlocktype_write ||
3817 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
3819 * We update the node's status only
3820 * when we can get write access.
3822 locktype = isc_rwlocktype_write;
3824 if (dns_rbtnode_refcurrent(node)
3828 m = search->rbtdb->common.mctx;
3829 clean_stale_headers(
3832 if (header_prev != NULL)
3836 node->data = header->next;
3837 free_rdataset(search->rbtdb, m,
3840 header->attributes |=
3841 RDATASET_ATTR_STALE;
3843 header_prev = header;
3846 header_prev = header;
3849 if (NONEXISTENT(header) ||
3850 RBTDB_RDATATYPE_BASE(header->type) == 0) {
3851 header_prev = header;
3854 empty_node = ISC_FALSE;
3855 if (header->type == matchtype)
3857 else if (header->type == sigmatchtype)
3859 header_prev = header;
3861 if (found != NULL) {
3862 result = dns_name_concatenate(name, origin,
3864 if (result != ISC_R_SUCCESS)
3866 bind_rdataset(search->rbtdb, node, found,
3868 if (foundsig != NULL)
3869 bind_rdataset(search->rbtdb, node, foundsig,
3871 new_reference(search->rbtdb, node);
3873 result = DNS_R_COVERINGNSEC;
3874 } else if (!empty_node) {
3875 result = ISC_R_NOTFOUND;
3877 result = dns_rbtnodechain_prev(&search->chain, NULL,
3880 NODE_UNLOCK(lock, locktype);
3881 } while (empty_node && result == ISC_R_SUCCESS);
3886 cache_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
3887 dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
3888 dns_dbnode_t **nodep, dns_name_t *foundname,
3889 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3891 dns_rbtnode_t *node = NULL;
3892 isc_result_t result;
3893 rbtdb_search_t search;
3894 isc_boolean_t cname_ok = ISC_TRUE;
3895 isc_boolean_t empty_node;
3897 isc_rwlocktype_t locktype;
3898 rdatasetheader_t *header, *header_prev, *header_next;
3899 rdatasetheader_t *found, *nsheader;
3900 rdatasetheader_t *foundsig, *nssig, *cnamesig;
3901 rdatasetheader_t *update, *updatesig;
3902 rbtdb_rdatatype_t sigtype, negtype;
3906 search.rbtdb = (dns_rbtdb_t *)db;
3908 REQUIRE(VALID_RBTDB(search.rbtdb));
3909 REQUIRE(version == NULL);
3912 isc_stdtime_get(&now);
3914 search.rbtversion = NULL;
3916 search.options = options;
3917 search.copy_name = ISC_FALSE;
3918 search.need_cleanup = ISC_FALSE;
3919 search.wild = ISC_FALSE;
3920 search.zonecut = NULL;
3921 dns_fixedname_init(&search.zonecut_name);
3922 dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
3927 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3930 * Search down from the root of the tree. If, while going down, we
3931 * encounter a callback node, cache_zonecut_callback() will search the
3932 * rdatasets at the zone cut for a DNAME rdataset.
3934 result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
3935 &search.chain, DNS_RBTFIND_EMPTYDATA,
3936 cache_zonecut_callback, &search);
3938 if (result == DNS_R_PARTIALMATCH) {
3939 if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0) {
3940 result = find_coveringnsec(&search, nodep, now,
3941 foundname, rdataset,
3943 if (result == DNS_R_COVERINGNSEC)
3946 if (search.zonecut != NULL) {
3947 result = setup_delegation(&search, nodep, foundname,
3948 rdataset, sigrdataset);
3952 result = find_deepest_zonecut(&search, node, nodep,
3953 foundname, rdataset,
3957 } else if (result != ISC_R_SUCCESS)
3961 * Certain DNSSEC types are not subject to CNAME matching
3962 * (RFC4035, section 2.5 and RFC3007).
3964 * We don't check for RRSIG, because we don't store RRSIG records
3967 if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
3968 cname_ok = ISC_FALSE;
3971 * We now go looking for rdata...
3974 lock = &(search.rbtdb->node_locks[node->locknum].lock);
3975 locktype = isc_rwlocktype_read;
3976 NODE_LOCK(lock, locktype);
3980 sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
3981 negtype = RBTDB_RDATATYPE_VALUE(0, type);
3985 empty_node = ISC_TRUE;
3987 for (header = node->data; header != NULL; header = header_next) {
3988 header_next = header->next;
3989 if (header->rdh_ttl <= now) {
3991 * This rdataset is stale. If no one else is using the
3992 * node, we can clean it up right now, otherwise we
3993 * mark it as stale, and the node as dirty, so it will
3994 * get cleaned up later.
3996 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
3997 (locktype == isc_rwlocktype_write ||
3998 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4000 * We update the node's status only when we
4001 * can get write access.
4003 locktype = isc_rwlocktype_write;
4005 if (dns_rbtnode_refcurrent(node) == 0) {
4008 mctx = search.rbtdb->common.mctx;
4009 clean_stale_headers(search.rbtdb, mctx,
4011 if (header_prev != NULL)
4015 node->data = header->next;
4016 free_rdataset(search.rbtdb, mctx,
4019 header->attributes |=
4020 RDATASET_ATTR_STALE;
4022 header_prev = header;
4025 header_prev = header;
4026 } else if (EXISTS(header)) {
4028 * We now know that there is at least one active
4029 * non-stale rdataset at this node.
4031 empty_node = ISC_FALSE;
4034 * If we found a type we were looking for, remember
4037 if (header->type == type ||
4038 (type == dns_rdatatype_any &&
4039 RBTDB_RDATATYPE_BASE(header->type) != 0) ||
4040 (cname_ok && header->type ==
4041 dns_rdatatype_cname)) {
4043 * We've found the answer.
4046 if (header->type == dns_rdatatype_cname &&
4050 * If we've already got the CNAME RRSIG,
4051 * use it, otherwise change sigtype
4052 * so that we find it.
4054 if (cnamesig != NULL)
4055 foundsig = cnamesig;
4058 RBTDB_RDATATYPE_SIGCNAME;
4059 foundsig = cnamesig;
4061 } else if (header->type == sigtype) {
4063 * We've found the RRSIG rdataset for our
4064 * target type. Remember it.
4067 } else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
4068 header->type == negtype) {
4070 * We've found a negative cache entry.
4073 } else if (header->type == dns_rdatatype_ns) {
4075 * Remember a NS rdataset even if we're
4076 * not specifically looking for it, because
4077 * we might need it later.
4080 } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4082 * If we need the NS rdataset, we'll also
4083 * need its signature.
4086 } else if (cname_ok &&
4087 header->type == RBTDB_RDATATYPE_SIGCNAME) {
4089 * If we get a CNAME match, we'll also need
4094 header_prev = header;
4096 header_prev = header;
4101 * We have an exact match for the name, but there are no
4102 * extant rdatasets. That means that this node doesn't
4103 * meaningfully exist, and that we really have a partial match.
4105 NODE_UNLOCK(lock, locktype);
4110 * If we didn't find what we were looking for...
4112 if (found == NULL ||
4113 (DNS_TRUST_ADDITIONAL(found->trust) &&
4114 ((options & DNS_DBFIND_ADDITIONALOK) == 0)) ||
4115 (found->trust == dns_trust_glue &&
4116 ((options & DNS_DBFIND_GLUEOK) == 0)) ||
4117 (DNS_TRUST_PENDING(found->trust) &&
4118 ((options & DNS_DBFIND_PENDINGOK) == 0))) {
4120 * If there is an NS rdataset at this node, then this is the
4123 if (nsheader != NULL) {
4124 if (nodep != NULL) {
4125 new_reference(search.rbtdb, node);
4126 INSIST(!ISC_LINK_LINKED(node, deadlink));
4129 bind_rdataset(search.rbtdb, node, nsheader, search.now,
4131 if (need_headerupdate(nsheader, search.now))
4133 if (nssig != NULL) {
4134 bind_rdataset(search.rbtdb, node, nssig,
4135 search.now, sigrdataset);
4136 if (need_headerupdate(nssig, search.now))
4139 result = DNS_R_DELEGATION;
4144 * Go find the deepest zone cut.
4146 NODE_UNLOCK(lock, locktype);
4151 * We found what we were looking for, or we found a CNAME.
4154 if (nodep != NULL) {
4155 new_reference(search.rbtdb, node);
4156 INSIST(!ISC_LINK_LINKED(node, deadlink));
4160 if (RBTDB_RDATATYPE_BASE(found->type) == 0) {
4162 * We found a negative cache entry.
4164 if (NXDOMAIN(found))
4165 result = DNS_R_NCACHENXDOMAIN;
4167 result = DNS_R_NCACHENXRRSET;
4168 } else if (type != found->type &&
4169 type != dns_rdatatype_any &&
4170 found->type == dns_rdatatype_cname) {
4172 * We weren't doing an ANY query and we found a CNAME instead
4173 * of the type we were looking for, so we need to indicate
4174 * that result to the caller.
4176 result = DNS_R_CNAME;
4179 * An ordinary successful query!
4181 result = ISC_R_SUCCESS;
4184 if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN ||
4185 result == DNS_R_NCACHENXRRSET) {
4186 bind_rdataset(search.rbtdb, node, found, search.now,
4188 if (need_headerupdate(found, search.now))
4190 if (foundsig != NULL) {
4191 bind_rdataset(search.rbtdb, node, foundsig, search.now,
4193 if (need_headerupdate(foundsig, search.now))
4194 updatesig = foundsig;
4199 if ((update != NULL || updatesig != NULL) &&
4200 locktype != isc_rwlocktype_write) {
4201 NODE_UNLOCK(lock, locktype);
4202 NODE_LOCK(lock, isc_rwlocktype_write);
4203 locktype = isc_rwlocktype_write;
4205 if (update != NULL && need_headerupdate(update, search.now))
4206 update_header(search.rbtdb, update, search.now);
4207 if (updatesig != NULL && need_headerupdate(updatesig, search.now))
4208 update_header(search.rbtdb, updatesig, search.now);
4210 NODE_UNLOCK(lock, locktype);
4213 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4216 * If we found a zonecut but aren't going to use it, we have to
4219 if (search.need_cleanup) {
4220 node = search.zonecut;
4221 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4223 NODE_LOCK(lock, isc_rwlocktype_read);
4224 decrement_reference(search.rbtdb, node, 0,
4225 isc_rwlocktype_read, isc_rwlocktype_none,
4227 NODE_UNLOCK(lock, isc_rwlocktype_read);
4230 dns_rbtnodechain_reset(&search.chain);
4236 cache_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
4237 isc_stdtime_t now, dns_dbnode_t **nodep,
4238 dns_name_t *foundname,
4239 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4241 dns_rbtnode_t *node = NULL;
4243 isc_result_t result;
4244 rbtdb_search_t search;
4245 rdatasetheader_t *header, *header_prev, *header_next;
4246 rdatasetheader_t *found, *foundsig;
4247 unsigned int rbtoptions = DNS_RBTFIND_EMPTYDATA;
4248 isc_rwlocktype_t locktype;
4250 search.rbtdb = (dns_rbtdb_t *)db;
4252 REQUIRE(VALID_RBTDB(search.rbtdb));
4255 isc_stdtime_get(&now);
4257 search.rbtversion = NULL;
4259 search.options = options;
4260 search.copy_name = ISC_FALSE;
4261 search.need_cleanup = ISC_FALSE;
4262 search.wild = ISC_FALSE;
4263 search.zonecut = NULL;
4264 dns_fixedname_init(&search.zonecut_name);
4265 dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4268 if ((options & DNS_DBFIND_NOEXACT) != 0)
4269 rbtoptions |= DNS_RBTFIND_NOEXACT;
4271 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4274 * Search down from the root of the tree.
4276 result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4277 &search.chain, rbtoptions, NULL, &search);
4279 if (result == DNS_R_PARTIALMATCH) {
4281 result = find_deepest_zonecut(&search, node, nodep, foundname,
4282 rdataset, sigrdataset);
4284 } else if (result != ISC_R_SUCCESS)
4288 * We now go looking for an NS rdataset at the node.
4291 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4292 locktype = isc_rwlocktype_read;
4293 NODE_LOCK(lock, locktype);
4298 for (header = node->data; header != NULL; header = header_next) {
4299 header_next = header->next;
4300 if (header->rdh_ttl <= now) {
4302 * This rdataset is stale. If no one else is using the
4303 * node, we can clean it up right now, otherwise we
4304 * mark it as stale, and the node as dirty, so it will
4305 * get cleaned up later.
4307 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4308 (locktype == isc_rwlocktype_write ||
4309 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4311 * We update the node's status only when we
4312 * can get write access.
4314 locktype = isc_rwlocktype_write;
4316 if (dns_rbtnode_refcurrent(node) == 0) {
4319 mctx = search.rbtdb->common.mctx;
4320 clean_stale_headers(search.rbtdb, mctx,
4322 if (header_prev != NULL)
4326 node->data = header->next;
4327 free_rdataset(search.rbtdb, mctx,
4330 header->attributes |=
4331 RDATASET_ATTR_STALE;
4333 header_prev = header;
4336 header_prev = header;
4337 } else if (EXISTS(header)) {
4339 * If we found a type we were looking for, remember
4342 if (header->type == dns_rdatatype_ns) {
4344 * Remember a NS rdataset even if we're
4345 * not specifically looking for it, because
4346 * we might need it later.
4349 } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4351 * If we need the NS rdataset, we'll also
4352 * need its signature.
4356 header_prev = header;
4358 header_prev = header;
4361 if (found == NULL) {
4363 * No NS records here.
4365 NODE_UNLOCK(lock, locktype);
4369 if (nodep != NULL) {
4370 new_reference(search.rbtdb, node);
4371 INSIST(!ISC_LINK_LINKED(node, deadlink));
4375 bind_rdataset(search.rbtdb, node, found, search.now, rdataset);
4376 if (foundsig != NULL)
4377 bind_rdataset(search.rbtdb, node, foundsig, search.now,
4380 if (need_headerupdate(found, search.now) ||
4381 (foundsig != NULL && need_headerupdate(foundsig, search.now))) {
4382 if (locktype != isc_rwlocktype_write) {
4383 NODE_UNLOCK(lock, locktype);
4384 NODE_LOCK(lock, isc_rwlocktype_write);
4385 locktype = isc_rwlocktype_write;
4387 if (need_headerupdate(found, search.now))
4388 update_header(search.rbtdb, found, search.now);
4389 if (foundsig != NULL &&
4390 need_headerupdate(foundsig, search.now)) {
4391 update_header(search.rbtdb, foundsig, search.now);
4395 NODE_UNLOCK(lock, locktype);
4398 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4400 INSIST(!search.need_cleanup);
4402 dns_rbtnodechain_reset(&search.chain);
4404 if (result == DNS_R_DELEGATION)
4405 result = ISC_R_SUCCESS;
4411 attachnode(dns_db_t *db, dns_dbnode_t *source, dns_dbnode_t **targetp) {
4412 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4413 dns_rbtnode_t *node = (dns_rbtnode_t *)source;
4416 REQUIRE(VALID_RBTDB(rbtdb));
4417 REQUIRE(targetp != NULL && *targetp == NULL);
4419 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
4420 dns_rbtnode_refincrement(node, &refs);
4422 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
4428 detachnode(dns_db_t *db, dns_dbnode_t **targetp) {
4429 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4430 dns_rbtnode_t *node;
4431 isc_boolean_t want_free = ISC_FALSE;
4432 isc_boolean_t inactive = ISC_FALSE;
4433 rbtdb_nodelock_t *nodelock;
4435 REQUIRE(VALID_RBTDB(rbtdb));
4436 REQUIRE(targetp != NULL && *targetp != NULL);
4438 node = (dns_rbtnode_t *)(*targetp);
4439 nodelock = &rbtdb->node_locks[node->locknum];
4441 NODE_LOCK(&nodelock->lock, isc_rwlocktype_read);
4443 if (decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
4444 isc_rwlocktype_none, ISC_FALSE)) {
4445 if (isc_refcount_current(&nodelock->references) == 0 &&
4446 nodelock->exiting) {
4447 inactive = ISC_TRUE;
4451 NODE_UNLOCK(&nodelock->lock, isc_rwlocktype_read);
4456 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
4458 if (rbtdb->active == 0)
4459 want_free = ISC_TRUE;
4460 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
4462 char buf[DNS_NAME_FORMATSIZE];
4463 if (dns_name_dynamic(&rbtdb->common.origin))
4464 dns_name_format(&rbtdb->common.origin, buf,
4467 strcpy(buf, "<UNKNOWN>");
4468 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
4469 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
4470 "calling free_rbtdb(%s)", buf);
4471 free_rbtdb(rbtdb, ISC_TRUE, NULL);
4477 expirenode(dns_db_t *db, dns_dbnode_t *node, isc_stdtime_t now) {
4478 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4479 dns_rbtnode_t *rbtnode = node;
4480 rdatasetheader_t *header;
4481 isc_boolean_t force_expire = ISC_FALSE;
4483 * These are the category and module used by the cache cleaner.
4485 isc_boolean_t log = ISC_FALSE;
4486 isc_logcategory_t *category = DNS_LOGCATEGORY_DATABASE;
4487 isc_logmodule_t *module = DNS_LOGMODULE_CACHE;
4488 int level = ISC_LOG_DEBUG(2);
4489 char printname[DNS_NAME_FORMATSIZE];
4491 REQUIRE(VALID_RBTDB(rbtdb));
4494 * Caller must hold a tree lock.
4498 isc_stdtime_get(&now);
4500 if (rbtdb->overmem) {
4503 isc_random_get(&val);
4505 * XXXDCL Could stand to have a better policy, like LRU.
4507 force_expire = ISC_TF(rbtnode->down == NULL && val % 4 == 0);
4510 * Note that 'log' can be true IFF rbtdb->overmem is also true.
4511 * rbtdb->overmem can currently only be true for cache
4512 * databases -- hence all of the "overmem cache" log strings.
4514 log = ISC_TF(isc_log_wouldlog(dns_lctx, level));
4516 isc_log_write(dns_lctx, category, module, level,
4517 "overmem cache: %s %s",
4518 force_expire ? "FORCE" : "check",
4519 dns_rbt_formatnodename(rbtnode,
4521 sizeof(printname)));
4525 * We may not need write access, but this code path is not performance
4526 * sensitive, so it should be okay to always lock as a writer.
4528 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4529 isc_rwlocktype_write);
4531 for (header = rbtnode->data; header != NULL; header = header->next)
4532 if (header->rdh_ttl <= now - RBTDB_VIRTUAL) {
4534 * We don't check if refcurrent(rbtnode) == 0 and try
4535 * to free like we do in cache_find(), because
4536 * refcurrent(rbtnode) must be non-zero. This is so
4537 * because 'node' is an argument to the function.
4539 header->attributes |= RDATASET_ATTR_STALE;
4542 isc_log_write(dns_lctx, category, module,
4543 level, "overmem cache: stale %s",
4545 } else if (force_expire) {
4546 if (! RETAIN(header)) {
4547 set_ttl(rbtdb, header, 0);
4548 header->attributes |= RDATASET_ATTR_STALE;
4551 isc_log_write(dns_lctx, category, module,
4552 level, "overmem cache: "
4553 "reprieve by RETAIN() %s",
4556 } else if (rbtdb->overmem && log)
4557 isc_log_write(dns_lctx, category, module, level,
4558 "overmem cache: saved %s", printname);
4560 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4561 isc_rwlocktype_write);
4563 return (ISC_R_SUCCESS);
4567 overmem(dns_db_t *db, isc_boolean_t overmem) {
4568 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4570 if (IS_CACHE(rbtdb))
4571 rbtdb->overmem = overmem;
4575 printnode(dns_db_t *db, dns_dbnode_t *node, FILE *out) {
4576 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4577 dns_rbtnode_t *rbtnode = node;
4578 isc_boolean_t first;
4580 REQUIRE(VALID_RBTDB(rbtdb));
4582 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4583 isc_rwlocktype_read);
4585 fprintf(out, "node %p, %u references, locknum = %u\n",
4586 rbtnode, dns_rbtnode_refcurrent(rbtnode),
4588 if (rbtnode->data != NULL) {
4589 rdatasetheader_t *current, *top_next;
4591 for (current = rbtnode->data; current != NULL;
4592 current = top_next) {
4593 top_next = current->next;
4595 fprintf(out, "\ttype %u", current->type);
4601 "\tserial = %lu, ttl = %u, "
4602 "trust = %u, attributes = %u\n",
4603 (unsigned long)current->serial,
4606 current->attributes);
4607 current = current->down;
4608 } while (current != NULL);
4611 fprintf(out, "(empty)\n");
4613 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4614 isc_rwlocktype_read);
4618 createiterator(dns_db_t *db, isc_boolean_t relative_names,
4619 dns_dbiterator_t **iteratorp)
4621 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4622 rbtdb_dbiterator_t *rbtdbiter;
4624 REQUIRE(VALID_RBTDB(rbtdb));
4626 rbtdbiter = isc_mem_get(rbtdb->common.mctx, sizeof(*rbtdbiter));
4627 if (rbtdbiter == NULL)
4628 return (ISC_R_NOMEMORY);
4630 rbtdbiter->common.methods = &dbiterator_methods;
4631 rbtdbiter->common.db = NULL;
4632 dns_db_attach(db, &rbtdbiter->common.db);
4633 rbtdbiter->common.relative_names = relative_names;
4634 rbtdbiter->common.magic = DNS_DBITERATOR_MAGIC;
4635 rbtdbiter->common.cleaning = ISC_FALSE;
4636 rbtdbiter->paused = ISC_TRUE;
4637 rbtdbiter->tree_locked = isc_rwlocktype_none;
4638 rbtdbiter->result = ISC_R_SUCCESS;
4639 dns_fixedname_init(&rbtdbiter->name);
4640 dns_fixedname_init(&rbtdbiter->origin);
4641 rbtdbiter->node = NULL;
4642 rbtdbiter->delete = 0;
4643 memset(rbtdbiter->deletions, 0, sizeof(rbtdbiter->deletions));
4644 dns_rbtnodechain_init(&rbtdbiter->chain, db->mctx);
4646 *iteratorp = (dns_dbiterator_t *)rbtdbiter;
4648 return (ISC_R_SUCCESS);
4652 zone_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
4653 dns_rdatatype_t type, dns_rdatatype_t covers,
4654 isc_stdtime_t now, dns_rdataset_t *rdataset,
4655 dns_rdataset_t *sigrdataset)
4657 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4658 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
4659 rdatasetheader_t *header, *header_next, *found, *foundsig;
4660 rbtdb_serial_t serial;
4661 rbtdb_version_t *rbtversion = version;
4662 isc_boolean_t close_version = ISC_FALSE;
4663 rbtdb_rdatatype_t matchtype, sigmatchtype;
4665 REQUIRE(VALID_RBTDB(rbtdb));
4666 REQUIRE(type != dns_rdatatype_any);
4668 if (rbtversion == NULL) {
4669 currentversion(db, (dns_dbversion_t **) (void *)(&rbtversion));
4670 close_version = ISC_TRUE;
4672 serial = rbtversion->serial;
4675 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4676 isc_rwlocktype_read);
4680 matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
4682 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
4686 for (header = rbtnode->data; header != NULL; header = header_next) {
4687 header_next = header->next;
4689 if (header->serial <= serial &&
4692 * Is this a "this rdataset doesn't
4695 if (NONEXISTENT(header))
4699 header = header->down;
4700 } while (header != NULL);
4701 if (header != NULL) {
4703 * We have an active, extant rdataset. If it's a
4704 * type we're looking for, remember it.
4706 if (header->type == matchtype) {
4708 if (foundsig != NULL)
4710 } else if (header->type == sigmatchtype) {
4717 if (found != NULL) {
4718 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
4719 if (foundsig != NULL)
4720 bind_rdataset(rbtdb, rbtnode, foundsig, now,
4724 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4725 isc_rwlocktype_read);
4728 closeversion(db, (dns_dbversion_t **) (void *)(&rbtversion),
4732 return (ISC_R_NOTFOUND);
4734 return (ISC_R_SUCCESS);
4738 cache_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
4739 dns_rdatatype_t type, dns_rdatatype_t covers,
4740 isc_stdtime_t now, dns_rdataset_t *rdataset,
4741 dns_rdataset_t *sigrdataset)
4743 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4744 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
4745 rdatasetheader_t *header, *header_next, *found, *foundsig;
4746 rbtdb_rdatatype_t matchtype, sigmatchtype, negtype;
4747 isc_result_t result;
4749 isc_rwlocktype_t locktype;
4751 REQUIRE(VALID_RBTDB(rbtdb));
4752 REQUIRE(type != dns_rdatatype_any);
4756 result = ISC_R_SUCCESS;
4759 isc_stdtime_get(&now);
4761 lock = &rbtdb->node_locks[rbtnode->locknum].lock;
4762 locktype = isc_rwlocktype_read;
4763 NODE_LOCK(lock, locktype);
4767 matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
4768 negtype = RBTDB_RDATATYPE_VALUE(0, type);
4770 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
4774 for (header = rbtnode->data; header != NULL; header = header_next) {
4775 header_next = header->next;
4776 if (header->rdh_ttl <= now) {
4777 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4778 (locktype == isc_rwlocktype_write ||
4779 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4781 * We update the node's status only when we
4782 * can get write access.
4784 locktype = isc_rwlocktype_write;
4787 * We don't check if refcurrent(rbtnode) == 0
4788 * and try to free like we do in cache_find(),
4789 * because refcurrent(rbtnode) must be
4790 * non-zero. This is so because 'node' is an
4791 * argument to the function.
4793 header->attributes |= RDATASET_ATTR_STALE;
4796 } else if (EXISTS(header)) {
4797 if (header->type == matchtype)
4799 else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
4800 header->type == negtype)
4802 else if (header->type == sigmatchtype)
4806 if (found != NULL) {
4807 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
4808 if (foundsig != NULL)
4809 bind_rdataset(rbtdb, rbtnode, foundsig, now,
4813 NODE_UNLOCK(lock, locktype);
4816 return (ISC_R_NOTFOUND);
4818 if (RBTDB_RDATATYPE_BASE(found->type) == 0) {
4820 * We found a negative cache entry.
4822 if (NXDOMAIN(found))
4823 result = DNS_R_NCACHENXDOMAIN;
4825 result = DNS_R_NCACHENXRRSET;
4832 allrdatasets(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
4833 isc_stdtime_t now, dns_rdatasetiter_t **iteratorp)
4835 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4836 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
4837 rbtdb_version_t *rbtversion = version;
4838 rbtdb_rdatasetiter_t *iterator;
4841 REQUIRE(VALID_RBTDB(rbtdb));
4843 iterator = isc_mem_get(rbtdb->common.mctx, sizeof(*iterator));
4844 if (iterator == NULL)
4845 return (ISC_R_NOMEMORY);
4847 if ((db->attributes & DNS_DBATTR_CACHE) == 0) {
4849 if (rbtversion == NULL)
4851 (dns_dbversion_t **) (void *)(&rbtversion));
4855 isc_refcount_increment(&rbtversion->references,
4861 isc_stdtime_get(&now);
4865 iterator->common.magic = DNS_RDATASETITER_MAGIC;
4866 iterator->common.methods = &rdatasetiter_methods;
4867 iterator->common.db = db;
4868 iterator->common.node = node;
4869 iterator->common.version = (dns_dbversion_t *)rbtversion;
4870 iterator->common.now = now;
4872 NODE_STRONGLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
4874 dns_rbtnode_refincrement(rbtnode, &refs);
4877 iterator->current = NULL;
4879 NODE_STRONGUNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
4881 *iteratorp = (dns_rdatasetiter_t *)iterator;
4883 return (ISC_R_SUCCESS);
4886 static isc_boolean_t
4887 cname_and_other_data(dns_rbtnode_t *node, rbtdb_serial_t serial) {
4888 rdatasetheader_t *header, *header_next;
4889 isc_boolean_t cname, other_data;
4890 dns_rdatatype_t rdtype;
4893 * The caller must hold the node lock.
4897 * Look for CNAME and "other data" rdatasets active in our version.
4900 other_data = ISC_FALSE;
4901 for (header = node->data; header != NULL; header = header_next) {
4902 header_next = header->next;
4903 if (header->type == dns_rdatatype_cname) {
4905 * Look for an active extant CNAME.
4908 if (header->serial <= serial &&
4911 * Is this a "this rdataset doesn't
4914 if (NONEXISTENT(header))
4918 header = header->down;
4919 } while (header != NULL);
4924 * Look for active extant "other data".
4926 * "Other data" is any rdataset whose type is not
4927 * KEY, NSEC, SIG or RRSIG.
4929 rdtype = RBTDB_RDATATYPE_BASE(header->type);
4930 if (rdtype != dns_rdatatype_key &&
4931 rdtype != dns_rdatatype_sig &&
4932 rdtype != dns_rdatatype_nsec &&
4933 rdtype != dns_rdatatype_rrsig) {
4935 * Is it active and extant?
4938 if (header->serial <= serial &&
4941 * Is this a "this rdataset
4942 * doesn't exist" record?
4944 if (NONEXISTENT(header))
4948 header = header->down;
4949 } while (header != NULL);
4951 other_data = ISC_TRUE;
4956 if (cname && other_data)
4963 add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
4964 rdatasetheader_t *newheader, unsigned int options, isc_boolean_t loading,
4965 dns_rdataset_t *addedrdataset, isc_stdtime_t now)
4967 rbtdb_changed_t *changed = NULL;
4968 rdatasetheader_t *topheader, *topheader_prev, *header;
4969 unsigned char *merged;
4970 isc_result_t result;
4971 isc_boolean_t header_nx;
4972 isc_boolean_t newheader_nx;
4973 isc_boolean_t merge;
4974 dns_rdatatype_t rdtype, covers;
4975 rbtdb_rdatatype_t negtype;
4979 * Add an rdatasetheader_t to a node.
4983 * Caller must be holding the node lock.
4986 if ((options & DNS_DBADD_MERGE) != 0) {
4987 REQUIRE(rbtversion != NULL);
4992 if ((options & DNS_DBADD_FORCE) != 0)
4993 trust = dns_trust_ultimate;
4995 trust = newheader->trust;
4997 if (rbtversion != NULL && !loading) {
4999 * We always add a changed record, even if no changes end up
5000 * being made to this node, because it's harmless and
5001 * simplifies the code.
5003 changed = add_changed(rbtdb, rbtversion, rbtnode);
5004 if (changed == NULL) {
5005 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5006 return (ISC_R_NOMEMORY);
5010 newheader_nx = NONEXISTENT(newheader) ? ISC_TRUE : ISC_FALSE;
5011 topheader_prev = NULL;
5014 if (rbtversion == NULL && !newheader_nx) {
5015 rdtype = RBTDB_RDATATYPE_BASE(newheader->type);
5018 * We're adding a negative cache entry.
5020 covers = RBTDB_RDATATYPE_EXT(newheader->type);
5021 if (covers == dns_rdatatype_any) {
5023 * We're adding an negative cache entry
5024 * which covers all types (NXDOMAIN,
5025 * NODATA(QTYPE=ANY)).
5027 * We make all other data stale so that the
5028 * only rdataset that can be found at this
5029 * node is the negative cache entry.
5031 for (topheader = rbtnode->data;
5033 topheader = topheader->next) {
5034 set_ttl(rbtdb, topheader, 0);
5035 topheader->attributes |=
5036 RDATASET_ATTR_STALE;
5041 negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
5044 * We're adding something that isn't a
5045 * negative cache entry. Look for an extant
5046 * non-stale NXDOMAIN/NODATA(QTYPE=ANY) negative
5049 for (topheader = rbtnode->data;
5051 topheader = topheader->next) {
5052 if (topheader->type ==
5053 RBTDB_RDATATYPE_NCACHEANY)
5056 if (topheader != NULL && EXISTS(topheader) &&
5057 topheader->rdh_ttl > now) {
5061 if (trust < topheader->trust) {
5063 * The NXDOMAIN/NODATA(QTYPE=ANY)
5066 free_rdataset(rbtdb,
5069 if (addedrdataset != NULL)
5070 bind_rdataset(rbtdb, rbtnode,
5073 return (DNS_R_UNCHANGED);
5076 * The new rdataset is better. Expire the
5077 * NXDOMAIN/NODATA(QTYPE=ANY).
5079 set_ttl(rbtdb, topheader, 0);
5080 topheader->attributes |= RDATASET_ATTR_STALE;
5085 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
5089 for (topheader = rbtnode->data;
5091 topheader = topheader->next) {
5092 if (topheader->type == newheader->type ||
5093 topheader->type == negtype)
5095 topheader_prev = topheader;
5100 * If header isn't NULL, we've found the right type. There may be
5101 * IGNORE rdatasets between the top of the chain and the first real
5102 * data. We skip over them.
5105 while (header != NULL && IGNORE(header))
5106 header = header->down;
5107 if (header != NULL) {
5108 header_nx = NONEXISTENT(header) ? ISC_TRUE : ISC_FALSE;
5111 * Deleting an already non-existent rdataset has no effect.
5113 if (header_nx && newheader_nx) {
5114 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5115 return (DNS_R_UNCHANGED);
5119 * Trying to add an rdataset with lower trust to a cache DB
5120 * has no effect, provided that the cache data isn't stale.
5122 if (rbtversion == NULL && trust < header->trust &&
5123 (header->rdh_ttl > now || header_nx)) {
5124 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5125 if (addedrdataset != NULL)
5126 bind_rdataset(rbtdb, rbtnode, header, now,
5128 return (DNS_R_UNCHANGED);
5132 * Don't merge if a nonexistent rdataset is involved.
5134 if (merge && (header_nx || newheader_nx))
5138 * If 'merge' is ISC_TRUE, we'll try to create a new rdataset
5139 * that is the union of 'newheader' and 'header'.
5142 unsigned int flags = 0;
5143 INSIST(rbtversion->serial >= header->serial);
5145 result = ISC_R_SUCCESS;
5147 if ((options & DNS_DBADD_EXACT) != 0)
5148 flags |= DNS_RDATASLAB_EXACT;
5149 if ((options & DNS_DBADD_EXACTTTL) != 0 &&
5150 newheader->rdh_ttl != header->rdh_ttl)
5151 result = DNS_R_NOTEXACT;
5152 else if (newheader->rdh_ttl != header->rdh_ttl)
5153 flags |= DNS_RDATASLAB_FORCE;
5154 if (result == ISC_R_SUCCESS)
5155 result = dns_rdataslab_merge(
5156 (unsigned char *)header,
5157 (unsigned char *)newheader,
5158 (unsigned int)(sizeof(*newheader)),
5160 rbtdb->common.rdclass,
5161 (dns_rdatatype_t)header->type,
5163 if (result == ISC_R_SUCCESS) {
5165 * If 'header' has the same serial number as
5166 * we do, we could clean it up now if we knew
5167 * that our caller had no references to it.
5168 * We don't know this, however, so we leave it
5169 * alone. It will get cleaned up when
5170 * clean_zone_node() runs.
5172 free_rdataset(rbtdb, rbtdb->common.mctx,
5174 newheader = (rdatasetheader_t *)merged;
5176 free_rdataset(rbtdb, rbtdb->common.mctx,
5182 * Don't replace existing NS, A and AAAA RRsets
5183 * in the cache if they are already exist. This
5184 * prevents named being locked to old servers.
5185 * Don't lower trust of existing record if the
5188 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
5189 header->type == dns_rdatatype_ns &&
5190 !header_nx && !newheader_nx &&
5191 header->trust >= newheader->trust &&
5192 dns_rdataslab_equalx((unsigned char *)header,
5193 (unsigned char *)newheader,
5194 (unsigned int)(sizeof(*newheader)),
5195 rbtdb->common.rdclass,
5196 (dns_rdatatype_t)header->type)) {
5198 * Honour the new ttl if it is less than the
5201 if (header->rdh_ttl > newheader->rdh_ttl)
5202 set_ttl(rbtdb, header, newheader->rdh_ttl);
5203 if (header->noqname == NULL &&
5204 newheader->noqname != NULL) {
5205 header->noqname = newheader->noqname;
5206 newheader->noqname = NULL;
5208 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5209 if (addedrdataset != NULL)
5210 bind_rdataset(rbtdb, rbtnode, header, now,
5212 return (ISC_R_SUCCESS);
5214 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
5215 (header->type == dns_rdatatype_a ||
5216 header->type == dns_rdatatype_aaaa) &&
5217 !header_nx && !newheader_nx &&
5218 header->trust >= newheader->trust &&
5219 dns_rdataslab_equal((unsigned char *)header,
5220 (unsigned char *)newheader,
5221 (unsigned int)(sizeof(*newheader)))) {
5223 * Honour the new ttl if it is less than the
5226 if (header->rdh_ttl > newheader->rdh_ttl)
5227 set_ttl(rbtdb, header, newheader->rdh_ttl);
5228 if (header->noqname == NULL &&
5229 newheader->noqname != NULL) {
5230 header->noqname = newheader->noqname;
5231 newheader->noqname = NULL;
5233 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5234 if (addedrdataset != NULL)
5235 bind_rdataset(rbtdb, rbtnode, header, now,
5237 return (ISC_R_SUCCESS);
5239 INSIST(rbtversion == NULL ||
5240 rbtversion->serial >= topheader->serial);
5241 if (topheader_prev != NULL)
5242 topheader_prev->next = newheader;
5244 rbtnode->data = newheader;
5245 newheader->next = topheader->next;
5248 * There are no other references to 'header' when
5249 * loading, so we MAY clean up 'header' now.
5250 * Since we don't generate changed records when
5251 * loading, we MUST clean up 'header' now.
5253 newheader->down = NULL;
5254 free_rdataset(rbtdb, rbtdb->common.mctx, header);
5256 newheader->down = topheader;
5257 topheader->next = newheader;
5259 if (changed != NULL)
5260 changed->dirty = ISC_TRUE;
5261 if (rbtversion == NULL) {
5262 set_ttl(rbtdb, header, 0);
5263 header->attributes |= RDATASET_ATTR_STALE;
5265 if (IS_CACHE(rbtdb)) {
5266 int idx = newheader->node->locknum;
5268 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
5269 newheader, lru_link);
5272 * XXXMLG We don't check the return value
5273 * here. If it fails, we will not do TTL
5274 * based expiry on this node. However, we
5275 * will do it on the LRU side, so memory
5276 * will not leak... for long.
5278 isc_heap_insert(rbtdb->heaps[idx], newheader);
5283 * No non-IGNORED rdatasets of the given type exist at
5288 * If we're trying to delete the type, don't bother.
5291 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5292 return (DNS_R_UNCHANGED);
5295 if (topheader != NULL) {
5297 * We have an list of rdatasets of the given type,
5298 * but they're all marked IGNORE. We simply insert
5299 * the new rdataset at the head of the list.
5301 * Ignored rdatasets cannot occur during loading, so
5305 INSIST(rbtversion == NULL ||
5306 rbtversion->serial >= topheader->serial);
5307 if (topheader_prev != NULL)
5308 topheader_prev->next = newheader;
5310 rbtnode->data = newheader;
5311 newheader->next = topheader->next;
5312 newheader->down = topheader;
5313 topheader->next = newheader;
5315 if (changed != NULL)
5316 changed->dirty = ISC_TRUE;
5319 * No rdatasets of the given type exist at the node.
5321 newheader->next = rbtnode->data;
5322 newheader->down = NULL;
5323 rbtnode->data = newheader;
5325 if (IS_CACHE(rbtdb)) {
5326 int idx = newheader->node->locknum;
5327 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
5328 newheader, lru_link);
5329 isc_heap_insert(rbtdb->heaps[idx], newheader);
5334 * Check if the node now contains CNAME and other data.
5336 if (rbtversion != NULL &&
5337 cname_and_other_data(rbtnode, rbtversion->serial))
5338 return (DNS_R_CNAMEANDOTHER);
5340 if (addedrdataset != NULL)
5341 bind_rdataset(rbtdb, rbtnode, newheader, now, addedrdataset);
5343 return (ISC_R_SUCCESS);
5346 static inline isc_boolean_t
5347 delegating_type(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
5348 rbtdb_rdatatype_t type)
5350 if (IS_CACHE(rbtdb)) {
5351 if (type == dns_rdatatype_dname)
5355 } else if (type == dns_rdatatype_dname ||
5356 (type == dns_rdatatype_ns &&
5357 (node != rbtdb->origin_node || IS_STUB(rbtdb))))
5362 static inline isc_result_t
5363 addnoqname(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
5364 dns_rdataset_t *rdataset)
5366 struct noqname *noqname;
5367 isc_mem_t *mctx = rbtdb->common.mctx;
5369 dns_rdataset_t nsec, nsecsig;
5370 isc_result_t result;
5373 dns_name_init(&name, NULL);
5374 dns_rdataset_init(&nsec);
5375 dns_rdataset_init(&nsecsig);
5377 result = dns_rdataset_getnoqname(rdataset, &name, &nsec, &nsecsig);
5378 RUNTIME_CHECK(result == ISC_R_SUCCESS);
5380 noqname = isc_mem_get(mctx, sizeof(*noqname));
5381 if (noqname == NULL) {
5382 result = ISC_R_NOMEMORY;
5385 dns_name_init(&noqname->name, NULL);
5386 noqname->nsec = NULL;
5387 noqname->nsecsig = NULL;
5388 result = dns_name_dup(&name, mctx, &noqname->name);
5389 if (result != ISC_R_SUCCESS)
5391 result = dns_rdataslab_fromrdataset(&nsec, mctx, &r, 0);
5392 if (result != ISC_R_SUCCESS)
5394 noqname->nsec = r.base;
5395 result = dns_rdataslab_fromrdataset(&nsecsig, mctx, &r, 0);
5396 if (result != ISC_R_SUCCESS)
5398 noqname->nsecsig = r.base;
5399 dns_rdataset_disassociate(&nsec);
5400 dns_rdataset_disassociate(&nsecsig);
5401 newheader->noqname = noqname;
5402 return (ISC_R_SUCCESS);
5405 dns_rdataset_disassociate(&nsec);
5406 dns_rdataset_disassociate(&nsecsig);
5407 free_noqname(mctx, &noqname);
5412 addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5413 isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options,
5414 dns_rdataset_t *addedrdataset)
5416 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5417 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5418 rbtdb_version_t *rbtversion = version;
5419 isc_region_t region;
5420 rdatasetheader_t *newheader;
5421 rdatasetheader_t *header;
5422 isc_result_t result;
5423 isc_boolean_t delegating;
5424 isc_boolean_t tree_locked = ISC_FALSE;
5426 REQUIRE(VALID_RBTDB(rbtdb));
5428 if (rbtversion == NULL) {
5430 isc_stdtime_get(&now);
5434 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
5436 sizeof(rdatasetheader_t));
5437 if (result != ISC_R_SUCCESS)
5440 newheader = (rdatasetheader_t *)region.base;
5441 init_rdataset(rbtdb, newheader);
5442 set_ttl(rbtdb, newheader, rdataset->ttl + now);
5443 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
5445 newheader->attributes = 0;
5446 newheader->noqname = NULL;
5447 newheader->count = init_count++;
5448 newheader->trust = rdataset->trust;
5449 newheader->additional_auth = NULL;
5450 newheader->additional_glue = NULL;
5451 newheader->last_used = now;
5452 newheader->node = rbtnode;
5453 if (rbtversion != NULL) {
5454 newheader->serial = rbtversion->serial;
5457 newheader->serial = 1;
5458 if ((rdataset->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0)
5459 newheader->attributes |= RDATASET_ATTR_NXDOMAIN;
5460 if ((rdataset->attributes & DNS_RDATASETATTR_NOQNAME) != 0) {
5461 result = addnoqname(rbtdb, newheader, rdataset);
5462 if (result != ISC_R_SUCCESS) {
5463 free_rdataset(rbtdb, rbtdb->common.mctx,
5471 * If we're adding a delegation type (e.g. NS or DNAME for a zone,
5472 * just DNAME for the cache), then we need to set the callback bit
5475 if (delegating_type(rbtdb, rbtnode, rdataset->type))
5476 delegating = ISC_TRUE;
5478 delegating = ISC_FALSE;
5481 * If we're adding a delegation type or the DB is a cache in an overmem
5482 * state, hold an exclusive lock on the tree. In the latter case
5483 * the lock does not necessarily have to be acquired but it will help
5484 * purge stale entries more effectively.
5486 if (delegating || (IS_CACHE(rbtdb) && rbtdb->overmem)) {
5487 tree_locked = ISC_TRUE;
5488 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
5491 if (IS_CACHE(rbtdb) && rbtdb->overmem)
5492 overmem_purge(rbtdb, rbtnode->locknum, now, tree_locked);
5494 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5495 isc_rwlocktype_write);
5497 if (rbtdb->rrsetstats != NULL) {
5498 newheader->attributes |= RDATASET_ATTR_STATCOUNT;
5499 update_rrsetstats(rbtdb, newheader, ISC_TRUE);
5502 if (IS_CACHE(rbtdb)) {
5504 cleanup_dead_nodes(rbtdb, rbtnode->locknum);
5506 header = isc_heap_element(rbtdb->heaps[rbtnode->locknum], 1);
5507 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL)
5508 expire_header(rbtdb, header, tree_locked);
5511 * If we've been holding a write lock on the tree just for
5512 * cleaning, we can release it now. However, we still need the
5515 if (tree_locked && !delegating) {
5516 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
5517 tree_locked = ISC_FALSE;
5521 result = add(rbtdb, rbtnode, rbtversion, newheader, options, ISC_FALSE,
5522 addedrdataset, now);
5523 if (result == ISC_R_SUCCESS && delegating)
5524 rbtnode->find_callback = 1;
5526 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5527 isc_rwlocktype_write);
5530 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
5533 * Update the zone's secure status. If version is non-NULL
5534 * this is deferred until closeversion() is called.
5536 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
5537 rbtdb->secure = iszonesecure(db, rbtdb->origin_node);
5543 subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5544 dns_rdataset_t *rdataset, unsigned int options,
5545 dns_rdataset_t *newrdataset)
5547 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5548 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5549 rbtdb_version_t *rbtversion = version;
5550 rdatasetheader_t *topheader, *topheader_prev, *header, *newheader;
5551 unsigned char *subresult;
5552 isc_region_t region;
5553 isc_result_t result;
5554 rbtdb_changed_t *changed;
5556 REQUIRE(VALID_RBTDB(rbtdb));
5558 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
5560 sizeof(rdatasetheader_t));
5561 if (result != ISC_R_SUCCESS)
5563 newheader = (rdatasetheader_t *)region.base;
5564 init_rdataset(rbtdb, newheader);
5565 set_ttl(rbtdb, newheader, rdataset->ttl);
5566 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
5568 newheader->attributes = 0;
5569 newheader->serial = rbtversion->serial;
5570 newheader->trust = 0;
5571 newheader->noqname = NULL;
5572 newheader->count = init_count++;
5573 newheader->additional_auth = NULL;
5574 newheader->additional_glue = NULL;
5575 newheader->last_used = 0;
5576 newheader->node = rbtnode;
5578 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5579 isc_rwlocktype_write);
5581 changed = add_changed(rbtdb, rbtversion, rbtnode);
5582 if (changed == NULL) {
5583 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5584 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5585 isc_rwlocktype_write);
5586 return (ISC_R_NOMEMORY);
5589 topheader_prev = NULL;
5590 for (topheader = rbtnode->data;
5592 topheader = topheader->next) {
5593 if (topheader->type == newheader->type)
5595 topheader_prev = topheader;
5598 * If header isn't NULL, we've found the right type. There may be
5599 * IGNORE rdatasets between the top of the chain and the first real
5600 * data. We skip over them.
5603 while (header != NULL && IGNORE(header))
5604 header = header->down;
5605 if (header != NULL && EXISTS(header)) {
5606 unsigned int flags = 0;
5608 result = ISC_R_SUCCESS;
5609 if ((options & DNS_DBSUB_EXACT) != 0) {
5610 flags |= DNS_RDATASLAB_EXACT;
5611 if (newheader->rdh_ttl != header->rdh_ttl)
5612 result = DNS_R_NOTEXACT;
5614 if (result == ISC_R_SUCCESS)
5615 result = dns_rdataslab_subtract(
5616 (unsigned char *)header,
5617 (unsigned char *)newheader,
5618 (unsigned int)(sizeof(*newheader)),
5620 rbtdb->common.rdclass,
5621 (dns_rdatatype_t)header->type,
5623 if (result == ISC_R_SUCCESS) {
5624 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5625 newheader = (rdatasetheader_t *)subresult;
5627 * We have to set the serial since the rdataslab
5628 * subtraction routine copies the reserved portion of
5629 * header, not newheader.
5631 newheader->serial = rbtversion->serial;
5633 * XXXJT: dns_rdataslab_subtract() copied the pointers
5634 * to additional info. We need to clear these fields
5635 * to avoid having duplicated references.
5637 newheader->additional_auth = NULL;
5638 newheader->additional_glue = NULL;
5639 } else if (result == DNS_R_NXRRSET) {
5641 * This subtraction would remove all of the rdata;
5642 * add a nonexistent header instead.
5644 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5645 newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
5646 if (newheader == NULL) {
5647 result = ISC_R_NOMEMORY;
5650 set_ttl(rbtdb, newheader, 0);
5651 newheader->type = topheader->type;
5652 newheader->attributes = RDATASET_ATTR_NONEXISTENT;
5653 newheader->trust = 0;
5654 newheader->serial = rbtversion->serial;
5655 newheader->noqname = NULL;
5656 newheader->count = 0;
5657 newheader->additional_auth = NULL;
5658 newheader->additional_glue = NULL;
5659 newheader->node = rbtnode;
5660 newheader->last_used = 0;
5662 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5667 * If we're here, we want to link newheader in front of
5670 INSIST(rbtversion->serial >= topheader->serial);
5671 if (topheader_prev != NULL)
5672 topheader_prev->next = newheader;
5674 rbtnode->data = newheader;
5675 newheader->next = topheader->next;
5676 newheader->down = topheader;
5677 topheader->next = newheader;
5679 changed->dirty = ISC_TRUE;
5682 * The rdataset doesn't exist, so we don't need to do anything
5683 * to satisfy the deletion request.
5685 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5686 if ((options & DNS_DBSUB_EXACT) != 0)
5687 result = DNS_R_NOTEXACT;
5689 result = DNS_R_UNCHANGED;
5692 if (result == ISC_R_SUCCESS && newrdataset != NULL)
5693 bind_rdataset(rbtdb, rbtnode, newheader, 0, newrdataset);
5696 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5697 isc_rwlocktype_write);
5700 * Update the zone's secure status. If version is non-NULL
5701 * this is deferred until closeversion() is called.
5703 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
5704 rbtdb->secure = iszonesecure(db, rbtdb->origin_node);
5710 deleterdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5711 dns_rdatatype_t type, dns_rdatatype_t covers)
5713 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5714 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5715 rbtdb_version_t *rbtversion = version;
5716 isc_result_t result;
5717 rdatasetheader_t *newheader;
5719 REQUIRE(VALID_RBTDB(rbtdb));
5721 if (type == dns_rdatatype_any)
5722 return (ISC_R_NOTIMPLEMENTED);
5723 if (type == dns_rdatatype_rrsig && covers == 0)
5724 return (ISC_R_NOTIMPLEMENTED);
5726 newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
5727 if (newheader == NULL)
5728 return (ISC_R_NOMEMORY);
5729 set_ttl(rbtdb, newheader, 0);
5730 newheader->type = RBTDB_RDATATYPE_VALUE(type, covers);
5731 newheader->attributes = RDATASET_ATTR_NONEXISTENT;
5732 newheader->trust = 0;
5733 newheader->noqname = NULL;
5734 newheader->additional_auth = NULL;
5735 newheader->additional_glue = NULL;
5736 if (rbtversion != NULL)
5737 newheader->serial = rbtversion->serial;
5739 newheader->serial = 0;
5740 newheader->count = 0;
5741 newheader->last_used = 0;
5742 newheader->node = rbtnode;
5744 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5745 isc_rwlocktype_write);
5747 result = add(rbtdb, rbtnode, rbtversion, newheader, DNS_DBADD_FORCE,
5748 ISC_FALSE, NULL, 0);
5750 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5751 isc_rwlocktype_write);
5754 * Update the zone's secure status. If version is non-NULL
5755 * this is deferred until closeversion() is called.
5757 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
5758 rbtdb->secure = iszonesecure(db, rbtdb->origin_node);
5764 loading_addrdataset(void *arg, dns_name_t *name, dns_rdataset_t *rdataset) {
5765 rbtdb_load_t *loadctx = arg;
5766 dns_rbtdb_t *rbtdb = loadctx->rbtdb;
5767 dns_rbtnode_t *node;
5768 isc_result_t result;
5769 isc_region_t region;
5770 rdatasetheader_t *newheader;
5773 * This routine does no node locking. See comments in
5774 * 'load' below for more information on loading and
5780 * SOA records are only allowed at top of zone.
5782 if (rdataset->type == dns_rdatatype_soa &&
5783 !IS_CACHE(rbtdb) && !dns_name_equal(name, &rbtdb->common.origin))
5784 return (DNS_R_NOTZONETOP);
5786 add_empty_wildcards(rbtdb, name);
5788 if (dns_name_iswildcard(name)) {
5790 * NS record owners cannot legally be wild cards.
5792 if (rdataset->type == dns_rdatatype_ns)
5793 return (DNS_R_INVALIDNS);
5794 result = add_wildcard_magic(rbtdb, name);
5795 if (result != ISC_R_SUCCESS)
5800 result = dns_rbt_addnode(rbtdb->tree, name, &node);
5801 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
5803 if (result != ISC_R_EXISTS) {
5804 dns_name_t foundname;
5805 dns_name_init(&foundname, NULL);
5806 dns_rbt_namefromnode(node, &foundname);
5807 #ifdef DNS_RBT_USEHASH
5808 node->locknum = node->hashval % rbtdb->node_lock_count;
5810 node->locknum = dns_name_hash(&foundname, ISC_TRUE) %
5811 rbtdb->node_lock_count;
5815 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
5817 sizeof(rdatasetheader_t));
5818 if (result != ISC_R_SUCCESS)
5820 newheader = (rdatasetheader_t *)region.base;
5821 init_rdataset(rbtdb, newheader);
5822 set_ttl(rbtdb, newheader,
5823 rdataset->ttl + loadctx->now); /* XXX overflow check */
5824 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
5826 newheader->attributes = 0;
5827 newheader->trust = rdataset->trust;
5828 newheader->serial = 1;
5829 newheader->noqname = NULL;
5830 newheader->count = init_count++;
5831 newheader->additional_auth = NULL;
5832 newheader->additional_glue = NULL;
5833 /* won't be used, but initialize anyway */
5834 newheader->last_used = 0;
5835 newheader->node = node;
5837 result = add(rbtdb, node, rbtdb->current_version, newheader,
5838 DNS_DBADD_MERGE, ISC_TRUE, NULL, 0);
5839 if (result == ISC_R_SUCCESS &&
5840 delegating_type(rbtdb, node, rdataset->type))
5841 node->find_callback = 1;
5842 else if (result == DNS_R_UNCHANGED)
5843 result = ISC_R_SUCCESS;
5849 beginload(dns_db_t *db, dns_addrdatasetfunc_t *addp, dns_dbload_t **dbloadp) {
5850 rbtdb_load_t *loadctx;
5853 rbtdb = (dns_rbtdb_t *)db;
5855 REQUIRE(VALID_RBTDB(rbtdb));
5857 loadctx = isc_mem_get(rbtdb->common.mctx, sizeof(*loadctx));
5858 if (loadctx == NULL)
5859 return (ISC_R_NOMEMORY);
5861 loadctx->rbtdb = rbtdb;
5862 if (IS_CACHE(rbtdb))
5863 isc_stdtime_get(&loadctx->now);
5867 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
5869 REQUIRE((rbtdb->attributes & (RBTDB_ATTR_LOADED|RBTDB_ATTR_LOADING))
5871 rbtdb->attributes |= RBTDB_ATTR_LOADING;
5873 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
5875 *addp = loading_addrdataset;
5878 return (ISC_R_SUCCESS);
5882 endload(dns_db_t *db, dns_dbload_t **dbloadp) {
5883 rbtdb_load_t *loadctx;
5884 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5886 REQUIRE(VALID_RBTDB(rbtdb));
5887 REQUIRE(dbloadp != NULL);
5889 REQUIRE(loadctx->rbtdb == rbtdb);
5891 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
5893 REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADING) != 0);
5894 REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADED) == 0);
5896 rbtdb->attributes &= ~RBTDB_ATTR_LOADING;
5897 rbtdb->attributes |= RBTDB_ATTR_LOADED;
5899 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
5902 * If there's a KEY rdataset at the zone origin containing a
5903 * zone key, we consider the zone secure.
5905 if (! IS_CACHE(rbtdb))
5906 rbtdb->secure = iszonesecure(db, rbtdb->origin_node);
5910 isc_mem_put(rbtdb->common.mctx, loadctx, sizeof(*loadctx));
5912 return (ISC_R_SUCCESS);
5916 dump(dns_db_t *db, dns_dbversion_t *version, const char *filename,
5917 dns_masterformat_t masterformat) {
5920 rbtdb = (dns_rbtdb_t *)db;
5922 REQUIRE(VALID_RBTDB(rbtdb));
5924 return (dns_master_dump2(rbtdb->common.mctx, db, version,
5925 &dns_master_style_default,
5926 filename, masterformat));
5930 delete_callback(void *data, void *arg) {
5931 dns_rbtdb_t *rbtdb = arg;
5932 rdatasetheader_t *current, *next;
5934 for (current = data; current != NULL; current = next) {
5935 next = current->next;
5936 free_rdataset(rbtdb, rbtdb->common.mctx, current);
5940 static isc_boolean_t
5941 issecure(dns_db_t *db) {
5943 isc_boolean_t secure;
5945 rbtdb = (dns_rbtdb_t *)db;
5947 REQUIRE(VALID_RBTDB(rbtdb));
5949 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
5950 secure = rbtdb->secure;
5951 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
5957 nodecount(dns_db_t *db) {
5961 rbtdb = (dns_rbtdb_t *)db;
5963 REQUIRE(VALID_RBTDB(rbtdb));
5965 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
5966 count = dns_rbt_nodecount(rbtdb->tree);
5967 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
5973 settask(dns_db_t *db, isc_task_t *task) {
5976 rbtdb = (dns_rbtdb_t *)db;
5978 REQUIRE(VALID_RBTDB(rbtdb));
5980 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
5981 if (rbtdb->task != NULL)
5982 isc_task_detach(&rbtdb->task);
5984 isc_task_attach(task, &rbtdb->task);
5985 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
5988 static isc_boolean_t
5989 ispersistent(dns_db_t *db) {
5995 getoriginnode(dns_db_t *db, dns_dbnode_t **nodep) {
5996 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5997 dns_rbtnode_t *onode;
5998 isc_result_t result = ISC_R_SUCCESS;
6000 REQUIRE(VALID_RBTDB(rbtdb));
6001 REQUIRE(nodep != NULL && *nodep == NULL);
6003 /* Note that the access to origin_node doesn't require a DB lock */
6004 onode = (dns_rbtnode_t *)rbtdb->origin_node;
6005 if (onode != NULL) {
6006 NODE_STRONGLOCK(&rbtdb->node_locks[onode->locknum].lock);
6007 new_reference(rbtdb, onode);
6008 NODE_STRONGUNLOCK(&rbtdb->node_locks[onode->locknum].lock);
6010 *nodep = rbtdb->origin_node;
6012 INSIST(!IS_CACHE(rbtdb));
6013 result = ISC_R_NOTFOUND;
6019 static dns_stats_t *
6020 getrrsetstats(dns_db_t *db) {
6021 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6023 REQUIRE(VALID_RBTDB(rbtdb));
6024 REQUIRE(IS_CACHE(rbtdb)); /* current restriction */
6026 return (rbtdb->rrsetstats);
6029 static dns_dbmethods_t zone_methods = {
6062 static dns_dbmethods_t cache_methods = {
6096 #ifdef DNS_RBTDB_VERSION64
6101 (isc_mem_t *mctx, dns_name_t *origin, dns_dbtype_t type,
6102 dns_rdataclass_t rdclass, unsigned int argc, char *argv[],
6103 void *driverarg, dns_db_t **dbp)
6106 isc_result_t result;
6110 /* Keep the compiler happy. */
6115 rbtdb = isc_mem_get(mctx, sizeof(*rbtdb));
6117 return (ISC_R_NOMEMORY);
6119 memset(rbtdb, '\0', sizeof(*rbtdb));
6120 dns_name_init(&rbtdb->common.origin, NULL);
6121 rbtdb->common.attributes = 0;
6122 if (type == dns_dbtype_cache) {
6123 rbtdb->common.methods = &cache_methods;
6124 rbtdb->common.attributes |= DNS_DBATTR_CACHE;
6125 } else if (type == dns_dbtype_stub) {
6126 rbtdb->common.methods = &zone_methods;
6127 rbtdb->common.attributes |= DNS_DBATTR_STUB;
6129 rbtdb->common.methods = &zone_methods;
6130 rbtdb->common.rdclass = rdclass;
6131 rbtdb->common.mctx = NULL;
6133 result = RBTDB_INITLOCK(&rbtdb->lock);
6134 if (result != ISC_R_SUCCESS)
6137 result = isc_rwlock_init(&rbtdb->tree_lock, 0, 0);
6138 if (result != ISC_R_SUCCESS)
6142 * Initialize node_lock_count in a generic way to support future
6143 * extension which allows the user to specify this value on creation.
6144 * Note that when specified for a cache DB it must be larger than 1
6145 * as commented with the definition of DEFAULT_CACHE_NODE_LOCK_COUNT.
6147 if (rbtdb->node_lock_count == 0) {
6148 if (IS_CACHE(rbtdb))
6149 rbtdb->node_lock_count = DEFAULT_CACHE_NODE_LOCK_COUNT;
6151 rbtdb->node_lock_count = DEFAULT_NODE_LOCK_COUNT;
6152 } else if (rbtdb->node_lock_count < 2 && IS_CACHE(rbtdb)) {
6153 result = ISC_R_RANGE;
6154 goto cleanup_tree_lock;
6156 INSIST(rbtdb->node_lock_count < (1 << DNS_RBT_LOCKLENGTH));
6157 rbtdb->node_locks = isc_mem_get(mctx, rbtdb->node_lock_count *
6158 sizeof(rbtdb_nodelock_t));
6159 if (rbtdb->node_locks == NULL) {
6160 result = ISC_R_NOMEMORY;
6161 goto cleanup_tree_lock;
6164 rbtdb->rrsetstats = NULL;
6165 if (IS_CACHE(rbtdb)) {
6166 result = dns_rdatasetstats_create(mctx, &rbtdb->rrsetstats);
6167 if (result != ISC_R_SUCCESS)
6168 goto cleanup_node_locks;
6169 rbtdb->rdatasets = isc_mem_get(mctx, rbtdb->node_lock_count *
6170 sizeof(rdatasetheaderlist_t));
6171 if (rbtdb->rdatasets == NULL) {
6172 result = ISC_R_NOMEMORY;
6173 goto cleanup_rrsetstats;
6175 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
6176 ISC_LIST_INIT(rbtdb->rdatasets[i]);
6181 rbtdb->heaps = isc_mem_get(mctx, rbtdb->node_lock_count *
6182 sizeof(isc_heap_t *));
6183 if (rbtdb->heaps == NULL) {
6184 result = ISC_R_NOMEMORY;
6185 goto cleanup_rdatasets;
6187 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
6188 rbtdb->heaps[i] = NULL;
6189 for (i = 0; i < (int)rbtdb->node_lock_count; i++) {
6190 result = isc_heap_create(mctx, ttl_sooner,
6193 if (result != ISC_R_SUCCESS)
6197 rbtdb->rdatasets = NULL;
6198 rbtdb->heaps = NULL;
6201 rbtdb->deadnodes = isc_mem_get(mctx, rbtdb->node_lock_count *
6202 sizeof(rbtnodelist_t));
6203 if (rbtdb->deadnodes == NULL) {
6204 result = ISC_R_NOMEMORY;
6207 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
6208 ISC_LIST_INIT(rbtdb->deadnodes[i]);
6210 rbtdb->active = rbtdb->node_lock_count;
6212 for (i = 0; i < (int)(rbtdb->node_lock_count); i++) {
6213 result = NODE_INITLOCK(&rbtdb->node_locks[i].lock);
6214 if (result == ISC_R_SUCCESS) {
6215 result = isc_refcount_init(&rbtdb->node_locks[i].references, 0);
6216 if (result != ISC_R_SUCCESS)
6217 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
6219 if (result != ISC_R_SUCCESS) {
6221 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
6222 isc_refcount_decrement(&rbtdb->node_locks[i].references, NULL);
6223 isc_refcount_destroy(&rbtdb->node_locks[i].references);
6225 goto cleanup_deadnodes;
6227 rbtdb->node_locks[i].exiting = ISC_FALSE;
6231 * Attach to the mctx. The database will persist so long as there
6232 * are references to it, and attaching to the mctx ensures that our
6233 * mctx won't disappear out from under us.
6235 isc_mem_attach(mctx, &rbtdb->common.mctx);
6238 * Must be initialized before free_rbtdb() is called.
6240 isc_ondestroy_init(&rbtdb->common.ondest);
6243 * Make a copy of the origin name.
6245 result = dns_name_dupwithoffsets(origin, mctx, &rbtdb->common.origin);
6246 if (result != ISC_R_SUCCESS) {
6247 free_rbtdb(rbtdb, ISC_FALSE, NULL);
6252 * Make the Red-Black Tree.
6254 result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->tree);
6255 if (result != ISC_R_SUCCESS) {
6256 free_rbtdb(rbtdb, ISC_FALSE, NULL);
6260 * In order to set the node callback bit correctly in zone databases,
6261 * we need to know if the node has the origin name of the zone.
6262 * In loading_addrdataset() we could simply compare the new name
6263 * to the origin name, but this is expensive. Also, we don't know the
6264 * node name in addrdataset(), so we need another way of knowing the
6267 * We now explicitly create a node for the zone's origin, and then
6268 * we simply remember the node's address. This is safe, because
6269 * the top-of-zone node can never be deleted, nor can its address
6272 if (!IS_CACHE(rbtdb)) {
6273 rbtdb->origin_node = NULL;
6274 result = dns_rbt_addnode(rbtdb->tree, &rbtdb->common.origin,
6275 &rbtdb->origin_node);
6276 if (result != ISC_R_SUCCESS) {
6277 INSIST(result != ISC_R_EXISTS);
6278 free_rbtdb(rbtdb, ISC_FALSE, NULL);
6282 * We need to give the origin node the right locknum.
6284 dns_name_init(&name, NULL);
6285 dns_rbt_namefromnode(rbtdb->origin_node, &name);
6286 #ifdef DNS_RBT_USEHASH
6287 rbtdb->origin_node->locknum =
6288 rbtdb->origin_node->hashval %
6289 rbtdb->node_lock_count;
6291 rbtdb->origin_node->locknum =
6292 dns_name_hash(&name, ISC_TRUE) %
6293 rbtdb->node_lock_count;
6298 * Misc. Initialization.
6300 result = isc_refcount_init(&rbtdb->references, 1);
6301 if (result != ISC_R_SUCCESS) {
6302 free_rbtdb(rbtdb, ISC_FALSE, NULL);
6305 rbtdb->attributes = 0;
6306 rbtdb->secure = ISC_FALSE;
6307 rbtdb->overmem = ISC_FALSE;
6311 * Version Initialization.
6313 rbtdb->current_serial = 1;
6314 rbtdb->least_serial = 1;
6315 rbtdb->next_serial = 2;
6316 rbtdb->current_version = allocate_version(mctx, 1, 1, ISC_FALSE);
6317 if (rbtdb->current_version == NULL) {
6318 isc_refcount_decrement(&rbtdb->references, NULL);
6319 isc_refcount_destroy(&rbtdb->references);
6320 free_rbtdb(rbtdb, ISC_FALSE, NULL);
6321 return (ISC_R_NOMEMORY);
6323 rbtdb->future_version = NULL;
6324 ISC_LIST_INIT(rbtdb->open_versions);
6326 * Keep the current version in the open list so that list operation
6327 * won't happen in normal lookup operations.
6329 PREPEND(rbtdb->open_versions, rbtdb->current_version, link);
6331 rbtdb->common.magic = DNS_DB_MAGIC;
6332 rbtdb->common.impmagic = RBTDB_MAGIC;
6334 *dbp = (dns_db_t *)rbtdb;
6336 return (ISC_R_SUCCESS);
6339 isc_mem_put(mctx, rbtdb->deadnodes,
6340 rbtdb->node_lock_count * sizeof(rbtnodelist_t));
6343 if (rbtdb->heaps != NULL) {
6344 for (i = 0 ; i < (int)rbtdb->node_lock_count ; i++)
6345 if (rbtdb->heaps[i] != NULL)
6346 isc_heap_destroy(&rbtdb->heaps[i]);
6347 isc_mem_put(mctx, rbtdb->heaps,
6348 rbtdb->node_lock_count * sizeof(isc_heap_t *));
6352 if (rbtdb->rdatasets != NULL)
6353 isc_mem_put(mctx, rbtdb->rdatasets, rbtdb->node_lock_count *
6354 sizeof(rdatasetheaderlist_t));
6356 if (rbtdb->rrsetstats != NULL)
6357 dns_stats_detach(&rbtdb->rrsetstats);
6360 isc_mem_put(mctx, rbtdb->node_locks,
6361 rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
6364 isc_rwlock_destroy(&rbtdb->tree_lock);
6367 RBTDB_DESTROYLOCK(&rbtdb->lock);
6370 isc_mem_put(mctx, rbtdb, sizeof(*rbtdb));
6376 * Slabbed Rdataset Methods
6380 rdataset_disassociate(dns_rdataset_t *rdataset) {
6381 dns_db_t *db = rdataset->private1;
6382 dns_dbnode_t *node = rdataset->private2;
6384 detachnode(db, &node);
6388 rdataset_first(dns_rdataset_t *rdataset) {
6389 unsigned char *raw = rdataset->private3; /* RDATASLAB */
6392 count = raw[0] * 256 + raw[1];
6394 rdataset->private5 = NULL;
6395 return (ISC_R_NOMORE);
6398 #if DNS_RDATASET_FIXED
6399 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0)
6400 raw += 2 + (4 * count);
6406 * The privateuint4 field is the number of rdata beyond the
6407 * cursor position, so we decrement the total count by one
6408 * before storing it.
6410 * If DNS_RDATASETATTR_LOADORDER is not set 'raw' points to the
6411 * first record. If DNS_RDATASETATTR_LOADORDER is set 'raw' points
6412 * to the first entry in the offset table.
6415 rdataset->privateuint4 = count;
6416 rdataset->private5 = raw;
6418 return (ISC_R_SUCCESS);
6422 rdataset_next(dns_rdataset_t *rdataset) {
6424 unsigned int length;
6425 unsigned char *raw; /* RDATASLAB */
6427 count = rdataset->privateuint4;
6429 return (ISC_R_NOMORE);
6431 rdataset->privateuint4 = count;
6434 * Skip forward one record (length + 4) or one offset (4).
6436 raw = rdataset->private5;
6437 #if DNS_RDATASET_FIXED
6438 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0) {
6440 length = raw[0] * 256 + raw[1];
6442 #if DNS_RDATASET_FIXED
6444 rdataset->private5 = raw + 4; /* length(2) + order(2) */
6446 rdataset->private5 = raw + 2; /* length(2) */
6449 return (ISC_R_SUCCESS);
6453 rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata) {
6454 unsigned char *raw = rdataset->private5; /* RDATASLAB */
6455 #if DNS_RDATASET_FIXED
6456 unsigned int offset;
6460 REQUIRE(raw != NULL);
6463 * Find the start of the record if not already in private5
6464 * then skip the length and order fields.
6466 #if DNS_RDATASET_FIXED
6467 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) != 0) {
6468 offset = (raw[0] << 24) + (raw[1] << 16) +
6469 (raw[2] << 8) + raw[3];
6470 raw = rdataset->private3;
6474 r.length = raw[0] * 256 + raw[1];
6476 #if DNS_RDATASET_FIXED
6482 dns_rdata_fromregion(rdata, rdataset->rdclass, rdataset->type, &r);
6486 rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target) {
6487 dns_db_t *db = source->private1;
6488 dns_dbnode_t *node = source->private2;
6489 dns_dbnode_t *cloned_node = NULL;
6491 attachnode(db, node, &cloned_node);
6495 * Reset iterator state.
6497 target->privateuint4 = 0;
6498 target->private5 = NULL;
6502 rdataset_count(dns_rdataset_t *rdataset) {
6503 unsigned char *raw = rdataset->private3; /* RDATASLAB */
6506 count = raw[0] * 256 + raw[1];
6512 rdataset_getnoqname(dns_rdataset_t *rdataset, dns_name_t *name,
6513 dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
6515 dns_db_t *db = rdataset->private1;
6516 dns_dbnode_t *node = rdataset->private2;
6517 dns_dbnode_t *cloned_node;
6518 struct noqname *noqname = rdataset->private6;
6521 attachnode(db, node, &cloned_node);
6522 nsec->methods = &rdataset_methods;
6523 nsec->rdclass = db->rdclass;
6524 nsec->type = dns_rdatatype_nsec;
6526 nsec->ttl = rdataset->ttl;
6527 nsec->trust = rdataset->trust;
6528 nsec->private1 = rdataset->private1;
6529 nsec->private2 = rdataset->private2;
6530 nsec->private3 = noqname->nsec;
6531 nsec->privateuint4 = 0;
6532 nsec->private5 = NULL;
6533 nsec->private6 = NULL;
6536 attachnode(db, node, &cloned_node);
6537 nsecsig->methods = &rdataset_methods;
6538 nsecsig->rdclass = db->rdclass;
6539 nsecsig->type = dns_rdatatype_rrsig;
6540 nsecsig->covers = dns_rdatatype_nsec;
6541 nsecsig->ttl = rdataset->ttl;
6542 nsecsig->trust = rdataset->trust;
6543 nsecsig->private1 = rdataset->private1;
6544 nsecsig->private2 = rdataset->private2;
6545 nsecsig->private3 = noqname->nsecsig;
6546 nsecsig->privateuint4 = 0;
6547 nsecsig->private5 = NULL;
6548 nsec->private6 = NULL;
6550 dns_name_clone(&noqname->name, name);
6552 return (ISC_R_SUCCESS);
6556 rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust) {
6557 dns_rbtdb_t *rbtdb = rdataset->private1;
6558 dns_rbtnode_t *rbtnode = rdataset->private2;
6559 rdatasetheader_t *header = rdataset->private3;
6562 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6563 isc_rwlocktype_write);
6564 header->trust = rdataset->trust = trust;
6565 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6566 isc_rwlocktype_write);
6570 rdataset_expire(dns_rdataset_t *rdataset) {
6571 dns_rbtdb_t *rbtdb = rdataset->private1;
6572 dns_rbtnode_t *rbtnode = rdataset->private2;
6573 rdatasetheader_t *header = rdataset->private3;
6576 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6577 isc_rwlocktype_write);
6578 expire_header(rbtdb, header, ISC_FALSE);
6579 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6580 isc_rwlocktype_write);
6584 * Rdataset Iterator Methods
6588 rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp) {
6589 rbtdb_rdatasetiter_t *rbtiterator;
6591 rbtiterator = (rbtdb_rdatasetiter_t *)(*iteratorp);
6593 if (rbtiterator->common.version != NULL)
6594 closeversion(rbtiterator->common.db,
6595 &rbtiterator->common.version, ISC_FALSE);
6596 detachnode(rbtiterator->common.db, &rbtiterator->common.node);
6597 isc_mem_put(rbtiterator->common.db->mctx, rbtiterator,
6598 sizeof(*rbtiterator));
6604 rdatasetiter_first(dns_rdatasetiter_t *iterator) {
6605 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
6606 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
6607 dns_rbtnode_t *rbtnode = rbtiterator->common.node;
6608 rbtdb_version_t *rbtversion = rbtiterator->common.version;
6609 rdatasetheader_t *header, *top_next;
6610 rbtdb_serial_t serial;
6613 if (IS_CACHE(rbtdb)) {
6615 now = rbtiterator->common.now;
6617 serial = rbtversion->serial;
6621 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6622 isc_rwlocktype_read);
6624 for (header = rbtnode->data; header != NULL; header = top_next) {
6625 top_next = header->next;
6627 if (header->serial <= serial && !IGNORE(header)) {
6629 * Is this a "this rdataset doesn't exist"
6630 * record? Or is it too old in the cache?
6632 * Note: unlike everywhere else, we
6633 * check for now > header->ttl instead
6634 * of now >= header->ttl. This allows
6635 * ANY and RRSIG queries for 0 TTL
6636 * rdatasets to work.
6638 if (NONEXISTENT(header) ||
6639 (now != 0 && now > header->rdh_ttl))
6643 header = header->down;
6644 } while (header != NULL);
6649 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6650 isc_rwlocktype_read);
6652 rbtiterator->current = header;
6655 return (ISC_R_NOMORE);
6657 return (ISC_R_SUCCESS);
6661 rdatasetiter_next(dns_rdatasetiter_t *iterator) {
6662 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
6663 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
6664 dns_rbtnode_t *rbtnode = rbtiterator->common.node;
6665 rbtdb_version_t *rbtversion = rbtiterator->common.version;
6666 rdatasetheader_t *header, *top_next;
6667 rbtdb_serial_t serial;
6669 rbtdb_rdatatype_t type, negtype;
6670 dns_rdatatype_t rdtype, covers;
6672 header = rbtiterator->current;
6674 return (ISC_R_NOMORE);
6676 if (IS_CACHE(rbtdb)) {
6678 now = rbtiterator->common.now;
6680 serial = rbtversion->serial;
6684 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6685 isc_rwlocktype_read);
6687 type = header->type;
6688 rdtype = RBTDB_RDATATYPE_BASE(header->type);
6690 covers = RBTDB_RDATATYPE_EXT(header->type);
6691 negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
6693 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
6694 for (header = header->next; header != NULL; header = top_next) {
6695 top_next = header->next;
6697 * If not walking back up the down list.
6699 if (header->type != type && header->type != negtype) {
6701 if (header->serial <= serial &&
6704 * Is this a "this rdataset doesn't
6707 * Note: unlike everywhere else, we
6708 * check for now > header->ttl instead
6709 * of now >= header->ttl. This allows
6710 * ANY and RRSIG queries for 0 TTL
6711 * rdatasets to work.
6713 if ((header->attributes &
6714 RDATASET_ATTR_NONEXISTENT) != 0 ||
6715 (now != 0 && now > header->rdh_ttl))
6719 header = header->down;
6720 } while (header != NULL);
6726 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6727 isc_rwlocktype_read);
6729 rbtiterator->current = header;
6732 return (ISC_R_NOMORE);
6734 return (ISC_R_SUCCESS);
6738 rdatasetiter_current(dns_rdatasetiter_t *iterator, dns_rdataset_t *rdataset) {
6739 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
6740 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
6741 dns_rbtnode_t *rbtnode = rbtiterator->common.node;
6742 rdatasetheader_t *header;
6744 header = rbtiterator->current;
6745 REQUIRE(header != NULL);
6747 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6748 isc_rwlocktype_read);
6750 bind_rdataset(rbtdb, rbtnode, header, rbtiterator->common.now,
6753 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6754 isc_rwlocktype_read);
6759 * Database Iterator Methods
6763 reference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
6764 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
6765 dns_rbtnode_t *node = rbtdbiter->node;
6770 INSIST(rbtdbiter->tree_locked != isc_rwlocktype_none);
6771 reactivate_node(rbtdb, node, rbtdbiter->tree_locked);
6775 dereference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
6776 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
6777 dns_rbtnode_t *node = rbtdbiter->node;
6783 lock = &rbtdb->node_locks[node->locknum].lock;
6784 NODE_LOCK(lock, isc_rwlocktype_read);
6785 decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
6786 rbtdbiter->tree_locked, ISC_FALSE);
6787 NODE_UNLOCK(lock, isc_rwlocktype_read);
6789 rbtdbiter->node = NULL;
6793 flush_deletions(rbtdb_dbiterator_t *rbtdbiter) {
6794 dns_rbtnode_t *node;
6795 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
6796 isc_boolean_t was_read_locked = ISC_FALSE;
6800 if (rbtdbiter->delete != 0) {
6802 * Note that "%d node of %d in tree" can report things like
6803 * "flush_deletions: 59 nodes of 41 in tree". This means
6804 * That some nodes appear on the deletions list more than
6805 * once. Only the last occurence will actually be deleted.
6807 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
6808 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
6809 "flush_deletions: %d nodes of %d in tree",
6811 dns_rbt_nodecount(rbtdb->tree));
6813 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
6814 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6815 was_read_locked = ISC_TRUE;
6817 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6818 rbtdbiter->tree_locked = isc_rwlocktype_write;
6820 for (i = 0; i < rbtdbiter->delete; i++) {
6821 node = rbtdbiter->deletions[i];
6822 lock = &rbtdb->node_locks[node->locknum].lock;
6824 NODE_LOCK(lock, isc_rwlocktype_read);
6825 decrement_reference(rbtdb, node, 0,
6826 isc_rwlocktype_read,
6827 rbtdbiter->tree_locked, ISC_FALSE);
6828 NODE_UNLOCK(lock, isc_rwlocktype_read);
6831 rbtdbiter->delete = 0;
6833 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6834 if (was_read_locked) {
6835 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6836 rbtdbiter->tree_locked = isc_rwlocktype_read;
6839 rbtdbiter->tree_locked = isc_rwlocktype_none;
6845 resume_iteration(rbtdb_dbiterator_t *rbtdbiter) {
6846 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
6848 REQUIRE(rbtdbiter->paused);
6849 REQUIRE(rbtdbiter->tree_locked == isc_rwlocktype_none);
6851 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6852 rbtdbiter->tree_locked = isc_rwlocktype_read;
6854 rbtdbiter->paused = ISC_FALSE;
6858 dbiterator_destroy(dns_dbiterator_t **iteratorp) {
6859 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)(*iteratorp);
6860 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
6861 dns_db_t *db = NULL;
6863 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
6864 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6865 rbtdbiter->tree_locked = isc_rwlocktype_none;
6867 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_none);
6869 dereference_iter_node(rbtdbiter);
6871 flush_deletions(rbtdbiter);
6873 dns_db_attach(rbtdbiter->common.db, &db);
6874 dns_db_detach(&rbtdbiter->common.db);
6876 dns_rbtnodechain_reset(&rbtdbiter->chain);
6877 isc_mem_put(db->mctx, rbtdbiter, sizeof(*rbtdbiter));
6884 dbiterator_first(dns_dbiterator_t *iterator) {
6885 isc_result_t result;
6886 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
6887 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
6888 dns_name_t *name, *origin;
6890 if (rbtdbiter->result != ISC_R_SUCCESS &&
6891 rbtdbiter->result != ISC_R_NOMORE)
6892 return (rbtdbiter->result);
6894 if (rbtdbiter->paused)
6895 resume_iteration(rbtdbiter);
6897 dereference_iter_node(rbtdbiter);
6899 name = dns_fixedname_name(&rbtdbiter->name);
6900 origin = dns_fixedname_name(&rbtdbiter->origin);
6901 dns_rbtnodechain_reset(&rbtdbiter->chain);
6903 result = dns_rbtnodechain_first(&rbtdbiter->chain, rbtdb->tree, name,
6906 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
6907 result = dns_rbtnodechain_current(&rbtdbiter->chain, NULL,
6908 NULL, &rbtdbiter->node);
6909 if (result == ISC_R_SUCCESS) {
6910 rbtdbiter->new_origin = ISC_TRUE;
6911 reference_iter_node(rbtdbiter);
6914 INSIST(result == ISC_R_NOTFOUND);
6915 result = ISC_R_NOMORE; /* The tree is empty. */
6918 rbtdbiter->result = result;
6924 dbiterator_last(dns_dbiterator_t *iterator) {
6925 isc_result_t result;
6926 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
6927 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
6928 dns_name_t *name, *origin;
6930 if (rbtdbiter->result != ISC_R_SUCCESS &&
6931 rbtdbiter->result != ISC_R_NOMORE)
6932 return (rbtdbiter->result);
6934 if (rbtdbiter->paused)
6935 resume_iteration(rbtdbiter);
6937 dereference_iter_node(rbtdbiter);
6939 name = dns_fixedname_name(&rbtdbiter->name);
6940 origin = dns_fixedname_name(&rbtdbiter->origin);
6941 dns_rbtnodechain_reset(&rbtdbiter->chain);
6943 result = dns_rbtnodechain_last(&rbtdbiter->chain, rbtdb->tree, name,
6945 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
6946 result = dns_rbtnodechain_current(&rbtdbiter->chain, NULL,
6947 NULL, &rbtdbiter->node);
6948 if (result == ISC_R_SUCCESS) {
6949 rbtdbiter->new_origin = ISC_TRUE;
6950 reference_iter_node(rbtdbiter);
6953 INSIST(result == ISC_R_NOTFOUND);
6954 result = ISC_R_NOMORE; /* The tree is empty. */
6957 rbtdbiter->result = result;
6963 dbiterator_seek(dns_dbiterator_t *iterator, dns_name_t *name) {
6964 isc_result_t result;
6965 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
6966 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
6967 dns_name_t *iname, *origin;
6969 if (rbtdbiter->result != ISC_R_SUCCESS &&
6970 rbtdbiter->result != ISC_R_NOMORE)
6971 return (rbtdbiter->result);
6973 if (rbtdbiter->paused)
6974 resume_iteration(rbtdbiter);
6976 dereference_iter_node(rbtdbiter);
6978 iname = dns_fixedname_name(&rbtdbiter->name);
6979 origin = dns_fixedname_name(&rbtdbiter->origin);
6980 dns_rbtnodechain_reset(&rbtdbiter->chain);
6982 result = dns_rbt_findnode(rbtdb->tree, name, NULL, &rbtdbiter->node,
6983 &rbtdbiter->chain, DNS_RBTFIND_EMPTYDATA,
6985 if (result == ISC_R_SUCCESS) {
6986 result = dns_rbtnodechain_current(&rbtdbiter->chain, iname,
6988 if (result == ISC_R_SUCCESS) {
6989 rbtdbiter->new_origin = ISC_TRUE;
6990 reference_iter_node(rbtdbiter);
6993 } else if (result == DNS_R_PARTIALMATCH)
6994 result = ISC_R_NOTFOUND;
6996 rbtdbiter->result = result;
7002 dbiterator_prev(dns_dbiterator_t *iterator) {
7003 isc_result_t result;
7004 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7005 dns_name_t *name, *origin;
7007 REQUIRE(rbtdbiter->node != NULL);
7009 if (rbtdbiter->result != ISC_R_SUCCESS)
7010 return (rbtdbiter->result);
7012 if (rbtdbiter->paused)
7013 resume_iteration(rbtdbiter);
7015 name = dns_fixedname_name(&rbtdbiter->name);
7016 origin = dns_fixedname_name(&rbtdbiter->origin);
7017 result = dns_rbtnodechain_prev(&rbtdbiter->chain, name, origin);
7019 dereference_iter_node(rbtdbiter);
7021 if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
7022 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
7023 result = dns_rbtnodechain_current(&rbtdbiter->chain, NULL,
7024 NULL, &rbtdbiter->node);
7027 if (result == ISC_R_SUCCESS)
7028 reference_iter_node(rbtdbiter);
7030 rbtdbiter->result = result;
7036 dbiterator_next(dns_dbiterator_t *iterator) {
7037 isc_result_t result;
7038 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7039 dns_name_t *name, *origin;
7041 REQUIRE(rbtdbiter->node != NULL);
7043 if (rbtdbiter->result != ISC_R_SUCCESS)
7044 return (rbtdbiter->result);
7046 if (rbtdbiter->paused)
7047 resume_iteration(rbtdbiter);
7049 name = dns_fixedname_name(&rbtdbiter->name);
7050 origin = dns_fixedname_name(&rbtdbiter->origin);
7051 result = dns_rbtnodechain_next(&rbtdbiter->chain, name, origin);
7053 dereference_iter_node(rbtdbiter);
7055 if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
7056 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
7057 result = dns_rbtnodechain_current(&rbtdbiter->chain, NULL,
7058 NULL, &rbtdbiter->node);
7060 if (result == ISC_R_SUCCESS)
7061 reference_iter_node(rbtdbiter);
7063 rbtdbiter->result = result;
7069 dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep,
7072 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7073 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7074 dns_rbtnode_t *node = rbtdbiter->node;
7075 isc_result_t result;
7076 dns_name_t *nodename = dns_fixedname_name(&rbtdbiter->name);
7077 dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
7079 REQUIRE(rbtdbiter->result == ISC_R_SUCCESS);
7080 REQUIRE(rbtdbiter->node != NULL);
7082 if (rbtdbiter->paused)
7083 resume_iteration(rbtdbiter);
7086 if (rbtdbiter->common.relative_names)
7088 result = dns_name_concatenate(nodename, origin, name, NULL);
7089 if (result != ISC_R_SUCCESS)
7091 if (rbtdbiter->common.relative_names && rbtdbiter->new_origin)
7092 result = DNS_R_NEWORIGIN;
7094 result = ISC_R_SUCCESS;
7096 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
7097 new_reference(rbtdb, node);
7098 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
7100 *nodep = rbtdbiter->node;
7102 if (iterator->cleaning && result == ISC_R_SUCCESS) {
7103 isc_result_t expire_result;
7106 * If the deletion array is full, flush it before trying
7107 * to expire the current node. The current node can't
7108 * fully deleted while the iteration cursor is still on it.
7110 if (rbtdbiter->delete == DELETION_BATCH_MAX)
7111 flush_deletions(rbtdbiter);
7113 expire_result = expirenode(iterator->db, *nodep, 0);
7116 * expirenode() currently always returns success.
7118 if (expire_result == ISC_R_SUCCESS && node->down == NULL) {
7121 rbtdbiter->deletions[rbtdbiter->delete++] = node;
7122 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
7123 dns_rbtnode_refincrement(node, &refs);
7125 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
7133 dbiterator_pause(dns_dbiterator_t *iterator) {
7134 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7135 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7137 if (rbtdbiter->result != ISC_R_SUCCESS &&
7138 rbtdbiter->result != ISC_R_NOMORE)
7139 return (rbtdbiter->result);
7141 if (rbtdbiter->paused)
7142 return (ISC_R_SUCCESS);
7144 rbtdbiter->paused = ISC_TRUE;
7146 if (rbtdbiter->tree_locked != isc_rwlocktype_none) {
7147 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_read);
7148 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7149 rbtdbiter->tree_locked = isc_rwlocktype_none;
7152 flush_deletions(rbtdbiter);
7154 return (ISC_R_SUCCESS);
7158 dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name) {
7159 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7160 dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
7162 if (rbtdbiter->result != ISC_R_SUCCESS)
7163 return (rbtdbiter->result);
7165 return (dns_name_copy(origin, name, NULL));
7169 * Additional cache routines.
7172 rdataset_getadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
7173 dns_rdatatype_t qtype, dns_acache_t *acache,
7174 dns_zone_t **zonep, dns_db_t **dbp,
7175 dns_dbversion_t **versionp, dns_dbnode_t **nodep,
7176 dns_name_t *fname, dns_message_t *msg,
7179 dns_rbtdb_t *rbtdb = rdataset->private1;
7180 dns_rbtnode_t *rbtnode = rdataset->private2;
7181 unsigned char *raw = rdataset->private3; /* RDATASLAB */
7182 unsigned int current_count = rdataset->privateuint4;
7184 rdatasetheader_t *header;
7185 nodelock_t *nodelock;
7186 unsigned int total_count;
7187 acachectl_t *acarray;
7188 dns_acacheentry_t *entry;
7189 isc_result_t result;
7191 UNUSED(qtype); /* we do not use this value at least for now */
7194 header = (struct rdatasetheader *)(raw - sizeof(*header));
7196 total_count = raw[0] * 256 + raw[1];
7197 INSIST(total_count > current_count);
7198 count = total_count - current_count - 1;
7202 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
7203 NODE_LOCK(nodelock, isc_rwlocktype_read);
7206 case dns_rdatasetadditional_fromauth:
7207 acarray = header->additional_auth;
7209 case dns_rdatasetadditional_fromcache:
7212 case dns_rdatasetadditional_fromglue:
7213 acarray = header->additional_glue;
7219 if (acarray == NULL) {
7220 if (type != dns_rdatasetadditional_fromcache)
7221 dns_acache_countquerymiss(acache);
7222 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
7223 return (ISC_R_NOTFOUND);
7226 if (acarray[count].entry == NULL) {
7227 dns_acache_countquerymiss(acache);
7228 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
7229 return (ISC_R_NOTFOUND);
7233 dns_acache_attachentry(acarray[count].entry, &entry);
7235 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
7237 result = dns_acache_getentry(entry, zonep, dbp, versionp,
7238 nodep, fname, msg, now);
7240 dns_acache_detachentry(&entry);
7246 acache_callback(dns_acacheentry_t *entry, void **arg) {
7248 dns_rbtnode_t *rbtnode;
7249 nodelock_t *nodelock;
7250 acachectl_t *acarray = NULL;
7251 acache_cbarg_t *cbarg;
7254 REQUIRE(arg != NULL);
7258 * The caller must hold the entry lock.
7261 rbtdb = (dns_rbtdb_t *)cbarg->db;
7262 rbtnode = (dns_rbtnode_t *)cbarg->node;
7264 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
7265 NODE_LOCK(nodelock, isc_rwlocktype_write);
7267 switch (cbarg->type) {
7268 case dns_rdatasetadditional_fromauth:
7269 acarray = cbarg->header->additional_auth;
7271 case dns_rdatasetadditional_fromglue:
7272 acarray = cbarg->header->additional_glue;
7278 count = cbarg->count;
7279 if (acarray != NULL && acarray[count].entry == entry) {
7280 acarray[count].entry = NULL;
7281 INSIST(acarray[count].cbarg == cbarg);
7282 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
7283 acarray[count].cbarg = NULL;
7285 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
7287 dns_acache_detachentry(&entry);
7289 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
7291 dns_db_detachnode((dns_db_t *)rbtdb, (dns_dbnode_t **)(void*)&rbtnode);
7292 dns_db_detach((dns_db_t **)(void*)&rbtdb);
7298 acache_cancelentry(isc_mem_t *mctx, dns_acacheentry_t *entry,
7299 acache_cbarg_t **cbargp)
7301 acache_cbarg_t *cbarg;
7303 REQUIRE(mctx != NULL);
7304 REQUIRE(entry != NULL);
7305 REQUIRE(cbargp != NULL && *cbargp != NULL);
7309 dns_acache_cancelentry(entry);
7310 dns_db_detachnode(cbarg->db, &cbarg->node);
7311 dns_db_detach(&cbarg->db);
7313 isc_mem_put(mctx, cbarg, sizeof(acache_cbarg_t));
7319 rdataset_setadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
7320 dns_rdatatype_t qtype, dns_acache_t *acache,
7321 dns_zone_t *zone, dns_db_t *db,
7322 dns_dbversion_t *version, dns_dbnode_t *node,
7325 dns_rbtdb_t *rbtdb = rdataset->private1;
7326 dns_rbtnode_t *rbtnode = rdataset->private2;
7327 unsigned char *raw = rdataset->private3; /* RDATASLAB */
7328 unsigned int current_count = rdataset->privateuint4;
7329 rdatasetheader_t *header;
7330 unsigned int total_count, count;
7331 nodelock_t *nodelock;
7332 isc_result_t result;
7333 acachectl_t *acarray;
7334 dns_acacheentry_t *newentry, *oldentry = NULL;
7335 acache_cbarg_t *newcbarg, *oldcbarg = NULL;
7339 if (type == dns_rdatasetadditional_fromcache)
7340 return (ISC_R_SUCCESS);
7342 header = (struct rdatasetheader *)(raw - sizeof(*header));
7344 total_count = raw[0] * 256 + raw[1];
7345 INSIST(total_count > current_count);
7346 count = total_count - current_count - 1; /* should be private data */
7348 newcbarg = isc_mem_get(rbtdb->common.mctx, sizeof(*newcbarg));
7349 if (newcbarg == NULL)
7350 return (ISC_R_NOMEMORY);
7351 newcbarg->type = type;
7352 newcbarg->count = count;
7353 newcbarg->header = header;
7354 newcbarg->db = NULL;
7355 dns_db_attach((dns_db_t *)rbtdb, &newcbarg->db);
7356 newcbarg->node = NULL;
7357 dns_db_attachnode((dns_db_t *)rbtdb, (dns_dbnode_t *)rbtnode,
7360 result = dns_acache_createentry(acache, (dns_db_t *)rbtdb,
7361 acache_callback, newcbarg, &newentry);
7362 if (result != ISC_R_SUCCESS)
7364 /* Set cache data in the new entry. */
7365 result = dns_acache_setentry(acache, newentry, zone, db,
7366 version, node, fname);
7367 if (result != ISC_R_SUCCESS)
7370 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
7371 NODE_LOCK(nodelock, isc_rwlocktype_write);
7375 case dns_rdatasetadditional_fromauth:
7376 acarray = header->additional_auth;
7378 case dns_rdatasetadditional_fromglue:
7379 acarray = header->additional_glue;
7385 if (acarray == NULL) {
7388 acarray = isc_mem_get(rbtdb->common.mctx, total_count *
7389 sizeof(acachectl_t));
7391 if (acarray == NULL) {
7392 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
7396 for (i = 0; i < total_count; i++) {
7397 acarray[i].entry = NULL;
7398 acarray[i].cbarg = NULL;
7402 case dns_rdatasetadditional_fromauth:
7403 header->additional_auth = acarray;
7405 case dns_rdatasetadditional_fromglue:
7406 header->additional_glue = acarray;
7412 if (acarray[count].entry != NULL) {
7414 * Swap the entry. Delay cleaning-up the old entry since
7415 * it would require a node lock.
7417 oldentry = acarray[count].entry;
7418 INSIST(acarray[count].cbarg != NULL);
7419 oldcbarg = acarray[count].cbarg;
7421 acarray[count].entry = newentry;
7422 acarray[count].cbarg = newcbarg;
7424 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
7426 if (oldentry != NULL) {
7427 acache_cancelentry(rbtdb->common.mctx, oldentry, &oldcbarg);
7428 dns_acache_detachentry(&oldentry);
7431 return (ISC_R_SUCCESS);
7434 if (newcbarg != NULL) {
7435 if (newentry != NULL) {
7436 acache_cancelentry(rbtdb->common.mctx, newentry,
7438 dns_acache_detachentry(&newentry);
7440 dns_db_detachnode((dns_db_t *)rbtdb, &newcbarg->node);
7441 dns_db_detach(&newcbarg->db);
7442 isc_mem_put(rbtdb->common.mctx, newcbarg,
7451 rdataset_putadditional(dns_acache_t *acache, dns_rdataset_t *rdataset,
7452 dns_rdatasetadditional_t type, dns_rdatatype_t qtype)
7454 dns_rbtdb_t *rbtdb = rdataset->private1;
7455 dns_rbtnode_t *rbtnode = rdataset->private2;
7456 unsigned char *raw = rdataset->private3; /* RDATASLAB */
7457 unsigned int current_count = rdataset->privateuint4;
7458 rdatasetheader_t *header;
7459 nodelock_t *nodelock;
7460 unsigned int total_count, count;
7461 acachectl_t *acarray;
7462 dns_acacheentry_t *entry;
7463 acache_cbarg_t *cbarg;
7465 UNUSED(qtype); /* we do not use this value at least for now */
7468 if (type == dns_rdatasetadditional_fromcache)
7469 return (ISC_R_SUCCESS);
7471 header = (struct rdatasetheader *)(raw - sizeof(*header));
7473 total_count = raw[0] * 256 + raw[1];
7474 INSIST(total_count > current_count);
7475 count = total_count - current_count - 1;
7480 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
7481 NODE_LOCK(nodelock, isc_rwlocktype_write);
7484 case dns_rdatasetadditional_fromauth:
7485 acarray = header->additional_auth;
7487 case dns_rdatasetadditional_fromglue:
7488 acarray = header->additional_glue;
7494 if (acarray == NULL) {
7495 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
7496 return (ISC_R_NOTFOUND);
7499 entry = acarray[count].entry;
7500 if (entry == NULL) {
7501 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
7502 return (ISC_R_NOTFOUND);
7505 acarray[count].entry = NULL;
7506 cbarg = acarray[count].cbarg;
7507 acarray[count].cbarg = NULL;
7509 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
7511 if (entry != NULL) {
7513 acache_cancelentry(rbtdb->common.mctx, entry, &cbarg);
7514 dns_acache_detachentry(&entry);
7517 return (ISC_R_SUCCESS);
7521 * Routines for LRU-based cache management.
7525 * See if a given cache entry that is being reused needs to be updated
7526 * in the LRU-list. From the LRU management point of view, this function is
7527 * expected to return true for almost all cases. When used with threads,
7528 * however, this may cause a non-negligible performance penalty because a
7529 * writer lock will have to be acquired before updating the list.
7530 * If DNS_RBTDB_LIMITLRUUPDATE is defined to be non 0 at compilation time, this
7531 * function returns true if the entry has not been updated for some period of
7532 * time. We differentiate the NS or glue address case and the others since
7533 * experiments have shown that the former tends to be accessed relatively
7534 * infrequently and the cost of cache miss is higher (e.g., a missing NS records
7535 * may cause external queries at a higher level zone, involving more
7538 * Caller must hold the node (read or write) lock.
7540 static inline isc_boolean_t
7541 need_headerupdate(rdatasetheader_t *header, isc_stdtime_t now) {
7542 if ((header->attributes &
7543 (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0)
7546 #if DNS_RBTDB_LIMITLRUUPDATE
7547 if (header->type == dns_rdatatype_ns ||
7548 (header->trust == dns_trust_glue &&
7549 (header->type == dns_rdatatype_a ||
7550 header->type == dns_rdatatype_aaaa))) {
7552 * Glue records are updated if at least 60 seconds have passed
7553 * since the previous update time.
7555 return (header->last_used + 60 <= now);
7558 /* Other records are updated if 5 minutes have passed. */
7559 return (header->last_used + 300 <= now);
7568 * Update the timestamp of a given cache entry and move it to the head
7569 * of the corresponding LRU list.
7571 * Caller must hold the node (write) lock.
7573 * Note that the we do NOT touch the heap here, as the TTL has not changed.
7576 update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
7579 /* To be checked: can we really assume this? XXXMLG */
7580 INSIST(ISC_LINK_LINKED(header, lru_link));
7582 ISC_LIST_UNLINK(rbtdb->rdatasets[header->node->locknum],
7584 header->last_used = now;
7585 ISC_LIST_PREPEND(rbtdb->rdatasets[header->node->locknum],
7590 * Purge some expired and/or stale (i.e. unused for some period) cache entries
7591 * under an overmem condition. To recover from this condition quickly, up to
7592 * 2 entries will be purged. This process is triggered while adding a new
7593 * entry, and we specifically avoid purging entries in the same LRU bucket as
7594 * the one to which the new entry will belong. Otherwise, we might purge
7595 * entries of the same name of different RR types while adding RRsets from a
7596 * single response (consider the case where we're adding A and AAAA glue records
7597 * of the same NS name).
7600 overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
7601 isc_stdtime_t now, isc_boolean_t tree_locked)
7603 rdatasetheader_t *header, *header_prev;
7604 unsigned int locknum;
7607 for (locknum = (locknum_start + 1) % rbtdb->node_lock_count;
7608 locknum != locknum_start && purgecount > 0;
7609 locknum = (locknum + 1) % rbtdb->node_lock_count) {
7610 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
7611 isc_rwlocktype_write);
7613 header = isc_heap_element(rbtdb->heaps[locknum], 1);
7614 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL) {
7615 expire_header(rbtdb, header, tree_locked);
7619 for (header = ISC_LIST_TAIL(rbtdb->rdatasets[locknum]);
7620 header != NULL && purgecount > 0;
7621 header = header_prev) {
7622 header_prev = ISC_LIST_PREV(header, lru_link);
7624 * Unlink the entry at this point to avoid checking it
7625 * again even if it's currently used someone else and
7626 * cannot be purged at this moment. This entry won't be
7627 * referenced any more (so unlinking is safe) since the
7628 * TTL was reset to 0.
7630 ISC_LIST_UNLINK(rbtdb->rdatasets[locknum], header,
7632 expire_header(rbtdb, header, tree_locked);
7636 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
7637 isc_rwlocktype_write);
7642 expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
7643 isc_boolean_t tree_locked)
7645 set_ttl(rbtdb, header, 0);
7646 header->attributes |= RDATASET_ATTR_STALE;
7647 header->node->dirty = 1;
7650 * Caller must hold the node (write) lock.
7653 if (dns_rbtnode_refcurrent(header->node) == 0) {
7655 * If no one else is using the node, we can clean it up now.
7656 * We first need to gain a new reference to the node to meet a
7657 * requirement of decrement_reference().
7659 new_reference(rbtdb, header->node);
7660 decrement_reference(rbtdb, header->node, 0,
7661 isc_rwlocktype_write,
7662 tree_locked ? isc_rwlocktype_write :
7663 isc_rwlocktype_none, ISC_FALSE);