rbtdb.c revision b56bd9b59f590ade778ac6621fb5bede4001d8ae
/*
* Copyright (C) 2004-2016 Internet Systems Consortium, Inc. ("ISC")
* Copyright (C) 1999-2003 Internet Software Consortium.
*
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
* REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
* INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
* LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
* PERFORMANCE OF THIS SOFTWARE.
*/
/*! \file */
/*
* Principal Author: Bob Halley
*/
#include <config.h>
/* #define inline */
#ifdef HAVE_INTTYPES_H
#include <inttypes.h> /* uintptr_t */
#endif
#include <isc/platform.h>
#include <isc/refcount.h>
#include <dns/callbacks.h>
#include <dns/dbiterator.h>
#include <dns/fixedname.h>
#include <dns/masterdump.h>
#include <dns/rdataset.h>
#include <dns/rdatasetiter.h>
#include <dns/rdataslab.h>
#include <dns/rdatastruct.h>
#ifndef WIN32
#else
#define PROT_READ 0x01
#define PROT_WRITE 0x02
#define MAP_PRIVATE 0x0002
#define MAP_FAILED ((void *)-1)
#endif
#ifdef DNS_RBTDB_VERSION64
#include "rbtdb64.h"
#else
#include "rbtdb.h"
#endif
#ifdef DNS_RBTDB_VERSION64
#else
#endif
} while (0)
/*
* This is the map file header for RBTDB images. It is populated, and then
* written, as the LAST thing done to the file. Writing this last (with
* zeros in the header area initially) will ensure that the header is only
* valid when the RBTDB image is also valid.
*/
typedef struct rbtdb_file_header rbtdb_file_header_t;
/* Header length, always the same size regardless of structure size */
#define RBTDB_HEADER_LENGTH 1024
struct rbtdb_file_header {
char version1[32];
unsigned int bigendian:1;
};
/*%
* Note that "impmagic" is not the first four bytes of the struct, so
* ISC_MAGIC_VALID cannot be used.
*/
#ifdef DNS_RBTDB_VERSION64
typedef isc_uint64_t rbtdb_serial_t;
/*%
* Make casting easier in symbolic debuggers by using different names
* for the 64 bit version.
*/
#define dns_rbtdb_t dns_rbtdb64_t
#define rdatasetheader_t rdatasetheader64_t
#define rbtdb_version_t rbtdb_version64_t
#define FILE_VERSION FILE_VERSION64
#define init_count init_count64
#define cache_methods cache_methods64
#define rdataset_methods rdataset_methods64
#define slab_methods slab_methods64
#define zone_methods zone_methods64
#define acache_callback acache_callback64
#define activeempty activeempty64
#define activeemtpynode activeemtpynode64
#define add_changed add_changed64
#define addrdataset addrdataset64
#define allrdatasets allrdatasets64
#define attachnode attachnode64
#define attachversion attachversion64
#define beginload beginload64
#define bind_rdataset bind_rdataset64
#define cache_find cache_find64
#define cache_findzonecut cache_findzonecut64
#define closeversion closeversion64
#define createiterator createiterator64
#define currentversion currentversion64
#define dbiterator_first dbiterator_first64
#define dbiterator_last dbiterator_last64
#define dbiterator_next dbiterator_next64
#define dbiterator_origin dbiterator_origin64
#define dbiterator_pause dbiterator_pause64
#define dbiterator_prev dbiterator_prev64
#define dbiterator_seek dbiterator_seek64
#define delete_callback delete_callback64
#define delete_node delete_node64
#define deleterdataset deleterdataset64
#define deserialize32 deserialize64
#define detachnode detachnode64
#define expire_header expire_header64
#define expirenode expirenode64
#define find_closest_nsec find_closest_nsec64
#define find_coveringnsec find_coveringnsec64
#define findnode findnode64
#define findnodeintree findnodeintree64
#define findnsec3node findnsec3node64
#define flush_deletions flush_deletions64
#define free_acachearray free_acachearray64
#define free_noqname free_noqname64
#define free_rbtdb free_rbtdb64
#define free_rdataset free_rdataset64
#define getoriginnode getoriginnode64
#define getrrsetstats getrrsetstats64
#define getsigningtime getsigningtime64
#define hashsize hashsize64
#define init_file_version init_file_version64
#define isdnssec isdnssec64
#define ispersistent ispersistent64
#define issecure issecure64
#define iszonesecure iszonesecure64
#define loadnode loadnode64
#define matchparams matchparams64
#define maybe_free_rbtdb maybe_free_rbtdb64
#define new_reference new_reference64
#define newversion newversion64
#define nodecount nodecount64
#define printnode printnode64
#define prune_tree prune_tree64
#define rbt_datafixer rbt_datafixer64
#define rbt_datawriter rbt_datawriter64
#define rdataset_clone rdataset_clone64
#define rdataset_count rdataset_count64
#define rdataset_current rdataset_current64
#define rdataset_expire rdataset_expire64
#define rdataset_first rdataset_first64
#define rdataset_next rdataset_next64
#define rdataset_settrust rdataset_settrust64
#define rdatasetiter_next rdatasetiter_next64
#define reactivate_node reactivate_node64
#define resign_delete resign_delete64
#define resign_insert resign_insert64
#define resign_sooner resign_sooner64
#define resigned resigned64
#define rpz_attach rpz_attach64
#define rpz_ready rpz_ready64
#define serialize serialize64
#define set_index set_index64
#define setcachestats setcachestats64
#define setownercase setownercase64
#define setsigningtime setsigningtime64
#define setup_delegation setup_delegation64
#define subtractrdataset subtractrdataset64
#define ttl_sooner ttl_sooner64
#define update_cachestats update_cachestats64
#define update_header update_header64
#define update_newheader update_newheader64
#define update_rrsetstats update_rrsetstats64
#define zone_find zone_find64
#define zone_findrdataset zone_findrdataset64
#define zone_findzonecut zone_findzonecut64
#else
typedef isc_uint32_t rbtdb_serial_t;
#endif
typedef isc_uint32_t rbtdb_rdatatype_t;
#define RBTDB_RDATATYPE_SIGNSEC \
#define RBTDB_RDATATYPE_SIGNSEC3 \
#define RBTDB_RDATATYPE_SIGNS \
#define RBTDB_RDATATYPE_SIGCNAME \
#define RBTDB_RDATATYPE_SIGDNAME \
#define RBTDB_RDATATYPE_SIGDDS \
#define RBTDB_RDATATYPE_NCACHEANY \
/*
* We use rwlock for DB lock only when ISC_RWLOCK_USEATOMIC is non 0.
* Using rwlock is effective with regard to lookup performance only when
* it is implemented in an efficient way.
* Otherwise, it is generally wise to stick to the simple locking since rwlock
* would require more memory or can even make lookups slower due to its own
* overhead (when it internally calls mutex locks).
*/
#ifdef ISC_RWLOCK_USEATOMIC
#define DNS_RBTDB_USERWLOCK 1
#else
#define DNS_RBTDB_USERWLOCK 0
#endif
#define RBTDB_INITLOCK(l) isc_rwlock_init((l), 0, 0)
#define RBTDB_DESTROYLOCK(l) isc_rwlock_destroy(l)
#define RBTDB_LOCK(l, t) RWLOCK((l), (t))
#define RBTDB_UNLOCK(l, t) RWUNLOCK((l), (t))
#else
#define RBTDB_INITLOCK(l) isc_mutex_init(l)
#define RBTDB_DESTROYLOCK(l) DESTROYLOCK(l)
#define RBTDB_LOCK(l, t) LOCK(l)
#define RBTDB_UNLOCK(l, t) UNLOCK(l)
#endif
/*
* Since node locking is sensitive to both performance and memory footprint,
* we need some trick here. If we have both high-performance rwlock and
* high performance and small-memory reference counters, we use rwlock for
* node lock and isc_refcount for node references. In this case, we don't have
* to protect the access to the counters by locks.
* Otherwise, we simply use ordinary mutex lock for node locking, and use
* simple integers as reference counters which is protected by the lock.
* In most cases, we can simply use wrapper macros such as NODE_LOCK and
* NODE_UNLOCK. In some other cases, however, we need to protect reference
* counters first and then protect other parts of a node as read-only data.
* Special additional macros, NODE_STRONGLOCK(), NODE_WEAKLOCK(), etc, are also
* provided for these special cases. When we can use the efficient backend
* routines, we should only protect the "other members" by NODE_WEAKLOCK(read).
* Otherwise, we should use NODE_STRONGLOCK() to protect the entire critical
* section including the access to the reference counter.
* Note that we cannot use NODE_LOCK()/NODE_UNLOCK() wherever the protected
* section is also protected by NODE_STRONGLOCK().
*/
#if defined(ISC_RWLOCK_USEATOMIC) && defined(DNS_RBT_USEISCREFCOUNT)
typedef isc_rwlock_t nodelock_t;
#define NODE_INITLOCK(l) isc_rwlock_init((l), 0, 0)
#define NODE_DESTROYLOCK(l) isc_rwlock_destroy(l)
#define NODE_UNLOCK(l, t) RWUNLOCK((l), (t))
#define NODE_TRYUPGRADE(l) isc_rwlock_tryupgrade(l)
#define NODE_STRONGLOCK(l) ((void)0)
#define NODE_STRONGUNLOCK(l) ((void)0)
#define NODE_WEAKLOCK(l, t) NODE_LOCK(l, t)
#define NODE_WEAKUNLOCK(l, t) NODE_UNLOCK(l, t)
#define NODE_WEAKDOWNGRADE(l) isc_rwlock_downgrade(l)
#else
typedef isc_mutex_t nodelock_t;
#define NODE_INITLOCK(l) isc_mutex_init(l)
#define NODE_DESTROYLOCK(l) DESTROYLOCK(l)
#define NODE_UNLOCK(l, t) UNLOCK(l)
#define NODE_TRYUPGRADE(l) ISC_R_SUCCESS
#define NODE_STRONGLOCK(l) LOCK(l)
#define NODE_STRONGUNLOCK(l) UNLOCK(l)
#define NODE_WEAKLOCK(l, t) ((void)0)
#define NODE_WEAKUNLOCK(l, t) ((void)0)
#define NODE_WEAKDOWNGRADE(l) ((void)0)
#endif
/*%
* Whether to rate-limit updating the LRU to avoid possible thread contention.
* Our performance measurement has shown the cost is marginal, so it's defined
* to be 0 by default either with or without threads.
*/
#ifndef DNS_RBTDB_LIMITLRUUPDATE
#define DNS_RBTDB_LIMITLRUUPDATE 0
#endif
/*
* Allow clients with a virtual time of up to 5 minutes in the past to see
* records that would have otherwise have expired.
*/
#define RBTDB_VIRTUAL 300
struct noqname {
void * neg;
void * negsig;
};
typedef struct acachectl acachectl_t;
typedef struct rdatasetheader {
/*%
* Locked by the owning node's lock.
*/
unsigned int is_mmapped : 1;
unsigned int next_is_relative : 1;
unsigned int node_is_relative : 1;
unsigned int resign_lsb : 1;
/*%<
* We don't use the LIST macros, because the LIST structure has
* both head and tail pointers, and is doubly linked.
*/
struct rdatasetheader *next;
/*%<
* If this is the top header for an rdataset, 'next' points
* to the top header for the next rdataset (i.e., the next type).
* Otherwise, it points up to the header whose down pointer points
* at this header.
*/
struct rdatasetheader *down;
/*%<
* Points to the header for the next older version of
* this rdataset.
*/
/*%<
* Monotonously increased every time this rdataset is bound so that
* it is used as the base of the starting point in DNS responses
* when the "cyclic" rrset-order is required. Since the ordering
* should not be so crucial, no lock is set for the counter for
* performance reasons.
*/
unsigned int heap_index;
/*%<
* Used for TTL-based cache cleaning.
*/
/*%<
* Case vector. If the bit is set then the corresponding
* character in the owner name needs to be AND'd with 0x20,
* rendering that character upper case.
*/
unsigned char upper[32];
#define RDATASET_ATTR_NONEXISTENT 0x0001
#define RDATASET_ATTR_STALE 0x0002
#define RDATASET_ATTR_IGNORE 0x0004
#define RDATASET_ATTR_RETAIN 0x0008
#define RDATASET_ATTR_NXDOMAIN 0x0010
#define RDATASET_ATTR_RESIGN 0x0020
#define RDATASET_ATTR_STATCOUNT 0x0040
#define RDATASET_ATTR_OPTOUT 0x0080
#define RDATASET_ATTR_NEGATIVE 0x0100
#define RDATASET_ATTR_PREFETCH 0x0200
#define RDATASET_ATTR_CASESET 0x0400
#define RDATASET_ATTR_ZEROTTL 0x0800
typedef struct acache_cbarg {
unsigned int count;
struct acachectl {
};
/*
* XXX
* When the cache will pre-expire data (due to memory low or other
* situations) before the rdataset's TTL has expired, it MUST
* respect the RETAIN bit and not expire the data until its TTL is
* expired.
*/
#define NONEXISTENT(header) \
/*%
* Number of buckets for cache DB entries (locks, LRU lists, TTL heaps).
* There is a tradeoff issue about configuring this value: if this is too
* small, it may cause heavier contention between threads; if this is too large,
* LRU purge algorithm won't work well (entries tend to be purged prematurely).
* The default value should work well for most environments, but this can
* also be configurable at compilation time via the
* DNS_RBTDB_CACHE_NODE_LOCK_COUNT variable. This value must be larger than
* 1 due to the assumption of overmem_purge().
*/
#if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1
#error "DNS_RBTDB_CACHE_NODE_LOCK_COUNT must be larger than 1"
#else
#endif
#else
#define DEFAULT_CACHE_NODE_LOCK_COUNT 16
#endif /* DNS_RBTDB_CACHE_NODE_LOCK_COUNT */
typedef struct {
/* Protected in the refcount routines. */
/* Locked by lock. */
typedef struct rbtdb_changed {
typedef enum {
typedef struct dns_rbtdb dns_rbtdb_t;
/* Reason for expiring a record from cache */
typedef enum {
} expire_t;
typedef struct rbtdb_version {
/* Not locked */
dns_rbtdb_t * rbtdb;
/*
* Protected in the refcount routines.
* XXXJT: should we change the lock policy based on the refcount
* performance?
*/
/* Locked by database lock. */
/* NSEC3 parameters */
unsigned char salt[DNS_NSEC3_SALTSIZE];
struct dns_rbtdb {
/* Unlocked. */
/* Locks the data in this struct */
#else
#endif
/* Locks the tree structure (prevents nodes appearing/disappearing) */
/* Locks for individual tree nodes */
unsigned int node_lock_count;
/* Locked by lock. */
unsigned int active;
unsigned int attributes;
isc_task_t * task;
/*
* This is a linked list used to implement the LRU cache. There will
* be node_lock_count linked lists here. Nodes in bucket 1 will be
* placed on the linked list rdatasets[1].
*/
/*%
* Temporary storage for stale cache nodes and dynamically deleted
* nodes that await being cleaned up.
*/
/*
* Heaps. These are used for TTL based expiry in a cache,
* or for zone resigning in a zone DB. hmctx is the memory
* context to use for the heap (which differs from the main
* database memory context in the case of a cache).
*/
isc_heap_t **heaps;
/*
* Base values for the mmap() code.
*/
void * mmap_location;
/* Locked by tree_lock. */
/* Unlocked */
unsigned int quantum;
};
#define RBTDB_ATTR_LOADED 0x01
#define RBTDB_ATTR_LOADING 0x02
/*%
* Search Context
*/
typedef struct {
dns_rbtdb_t * rbtdb;
unsigned int options;
/*%
* Load Context
*/
typedef struct {
dns_rbtdb_t * rbtdb;
} rbtdb_load_t;
dns_zone_t **zonep,
dns_name_t *fname);
const dns_name_t *name);
dns_name_t *name);
static dns_rdatasetmethods_t rdataset_methods = {
NULL,
NULL,
};
static dns_rdatasetmethods_t slab_methods = {
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
};
static dns_rdatasetitermethods_t rdatasetiter_methods = {
};
typedef struct rbtdb_rdatasetiter {
dns_name_t *name);
dns_name_t *name);
dns_name_t *name);
static dns_dbiteratormethods_t dbiterator_methods = {
};
#define DELETION_BATCH_MAX 64
/*
* If 'paused' is ISC_TRUE, then the tree lock is not being held.
*/
typedef struct rbtdb_dbiterator {
int delete;
isc_event_t *event);
/* Pad to 32 bytes */
/*%
* 'init_count' is used to initialize 'newheader->count' which inturn
* is used to determine where in the cycle rrset-order cyclic starts.
* We don't lock this as we don't care about simultaneous updates.
*
* Note:
* Both init_count and header->count can be ISC_UINT32_MAX.
* The count on the returned rdataset however can't be as
* that indicates that the database does not implement cyclic
* processing.
*/
static unsigned int init_count;
/*
* Locking
*
* If a routine is going to lock more than one lock in this module, then
* the locking must be done in the following order:
*
* Tree Lock
*
* Node Lock (Only one from the set may be locked at one time by
* any caller)
*
* Database Lock
*
* Failure to follow this hierarchy can result in deadlock.
*/
/*
* Deleting Nodes
*
* For zone databases the node for the origin of the zone MUST NOT be deleted.
*/
/*
* Debugging routines
*/
#ifdef DEBUG
static void
isc_buffer_t b;
isc_region_t r;
do {
isc_buffer_putuint8(&b, 0);
} while (size > 0);
}
#endif
/*
* DB Routines
*/
static void
}
static void
}
static void
return;
switch (result) {
case ISC_R_SUCCESS:
case DNS_R_CNAME:
case DNS_R_DNAME:
case DNS_R_DELEGATION:
case DNS_R_NCACHENXDOMAIN:
case DNS_R_NCACHENXRRSET:
break;
default:
}
}
static void
{
dns_rdatastatstype_t base = 0;
/* At the moment we count statistics only for cache DB */
else {
}
} else
if (increment)
else
}
static void
int idx;
return;
}
/*
* It's possible the rbtdb is not a cache. If this is the case,
* we will not have a heap, and we move on. If we do, though,
* we might need to adjust things.
*/
return;
return;
else
}
/*%
* These functions allow the heap code to rank the priority of each
* element. It returns ISC_TRUE if v1 happens "sooner" than v2.
*/
static isc_boolean_t
return (ISC_TRUE);
return (ISC_FALSE);
}
static isc_boolean_t
return (ISC_TRUE);
return (ISC_FALSE);
}
/*%
* This function sets the heap index into the header.
*/
static void
rdatasetheader_t *h = what;
h->heap_index = idx;
}
/*%
* Work out how many nodes can be deleted in the time between two
* requests to the nameserver. Smooth the resulting number and use it
* as a estimate for the number of nodes to be deleted in the next
* iteration.
*/
static unsigned int
unsigned int interval;
unsigned int new;
if (pps < 100)
pps = 100;
isc_time_now(&end);
if (interval == 0)
interval = 1;
if (usecs == 0) {
/*
* We were unable to measure the amount of time taken.
* Double the nodes deleted next time.
*/
old *= 2;
if (old > 1000)
old = 1000;
return (old);
}
if (new == 0)
new = 1;
else if (new > 1000)
new = 1000;
/* Smooth */
return (new);
}
static void
unsigned int i;
char buf[DNS_NAME_FORMATSIZE];
unsigned int refs;
&refs);
sizeof(rbtdb_version_t));
}
/*
* We assume the number of remaining dead nodes is reasonably small;
* the overhead of unlinking all nodes here should be negligible.
*/
for (i = 0; i < rbtdb->node_lock_count; i++) {
}
}
for (;;) {
/*
* pick the next tree to (start to) destroy
*/
/*
* we're finished after clear cutting
*/
break;
}
}
if (result == ISC_R_QUOTA) {
&start);
NULL,
sizeof(isc_event_t));
continue;
return;
}
}
if (log) {
sizeof(buf));
else
"done free_rbtdb(%s)", buf);
}
for (i = 0; i < rbtdb->node_lock_count; i++) {
}
/*
* Clean up LRU / re-signing order lists.
*/
for (i = 0; i < rbtdb->node_lock_count; i++)
sizeof(rdatasetheaderlist_t));
}
/*
* Clean up dead node buckets.
*/
for (i = 0; i < rbtdb->node_lock_count; i++)
}
/*
* Clean up heap objects.
*/
for (i = 0; i < rbtdb->node_lock_count; i++)
}
/*
* We must be cleaning up after a failed zone loading.
*/
}
}
{
sizeof(dns_dbonupdatelistener_t));
}
}
static inline void
unsigned int i;
unsigned int inactive = 0;
/* XXX check for open versions here */
/*
* Even though there are no external direct references, there still
* may be nodes in use.
*/
for (i = 0; i < rbtdb->node_lock_count; i++) {
== 0) {
inactive++;
}
}
if (inactive != 0) {
if (want_free) {
char buf[DNS_NAME_FORMATSIZE];
sizeof(buf));
else
"calling free_rbtdb(%s)", buf);
}
}
}
static void
unsigned int refs;
if (refs == 0)
}
static void
unsigned int refs;
}
static inline rbtdb_version_t *
{
return (NULL);
if (result != ISC_R_SUCCESS) {
return (NULL);
}
return (version);
}
static isc_result_t
ISC_TRUE);
} else {
version->iterations = 0;
version->salt_length = 0;
}
rbtdb->next_serial++;
}
return (ISC_R_NOMEMORY);
return (ISC_R_SUCCESS);
}
static void
{
unsigned int refs;
*targetp = rbtversion;
}
static rbtdb_changed_t *
{
unsigned int refs;
/*
* Caller must be holding the node lock if its reference must be
* protected by the lock.
*/
} else
return (changed);
}
static void
{
unsigned int count;
unsigned int i;
unsigned char *raw; /* RDATASLAB */
/*
* The caller must be holding the corresponding node lock.
*/
return;
/*
* Sanity check: since an additional cache entry has a reference to
* the original DB node (in the callback arg), there should be no
* acache entries when the node can be freed.
*/
for (i = 0; i < count; i++)
}
static inline void
}
static inline void
ISC_LINK_INIT(h, link);
h->heap_index = 0;
h->is_mmapped = 0;
h->next_is_relative = 0;
h->node_is_relative = 0;
#if TRACE_HEADER
#else
#endif
}
/*
* Update the copied values of 'next' and 'node' if they are relative.
*/
static void
char *p;
if (old->next_is_relative) {
p = (char *) old;
}
if (old->node_is_relative) {
p = (char *) old;
}
}
}
static inline rdatasetheader_t *
rdatasetheader_t *h;
h = isc_mem_get(mctx, sizeof(*h));
if (h == NULL)
return (NULL);
#if TRACE_HEADER
#endif
init_rdataset(rbtdb, h);
h->rdh_ttl = 0;
return (h);
}
static inline void
unsigned int size;
int idx;
}
}
if (rdataset->heap_index != 0)
rdataset->heap_index = 0;
else
sizeof(*rdataset));
return;
}
static inline void
/*
* Caller must hold the node lock.
*/
/*
* We set the IGNORE attribute on rdatasets with serial number
* 'serial'. When the reference count goes to zero, these rdatasets
* will be cleaned up; until that time, they will be ignored.
*/
}
}
}
}
if (make_dirty)
}
static inline void
/*
* If we are already stale there is nothing to do.
*/
return;
/*
* If we have not been counted then there is nothing to do.
*/
return;
}
static inline void
{
rdatasetheader_t *d, *down_next;
}
}
static inline void
/*
* Caller must be holding the node lock.
*/
/*
* If current is nonexistent or stale, we can clean it up.
*/
if ((current->attributes &
(RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0) {
else
} else
}
}
static inline void
{
/*
* Caller must be holding the node lock.
*/
REQUIRE(least_serial != 0);
/*
* First, we clean up any instances of multiple rdatasets
* with the same serial number, or that have the IGNORE
* attribute.
*/
} else
}
/*
* We've now eliminated all IGNORE datasets with the possible
* exception of current, which we now check.
*/
else
/*
* current no longer exists, so we can
* just continue with the loop.
*/
continue;
} else {
/*
* Pull up current->down, making it the new
* current.
*/
else
}
}
/*
* We now try to find the first down node less than the
* least serial.
*/
break;
}
/*
* If there is a such an rdataset, delete it and any older
* versions.
*/
do {
}
/*
* Note. The serial number of 'current' might be less than
* least_serial too, but we cannot delete it because it is
* the most recent version, unless it is a NONEXISTENT
* rdataset.
*/
} else {
/*
* If this is a NONEXISTENT rdataset, we can delete it.
*/
if (NONEXISTENT(current)) {
else
} else
}
}
if (!still_dirty)
}
static void
unsigned int node_has_rpz;
char printname[DNS_NAME_FORMATSIZE];
ISC_LOG_DEBUG(1),
"delete_node(): %p %s (bucket %d)",
node,
}
case DNS_RBT_NSEC_NORMAL:
/*
* Though this may be wasteful, it has to be done before
* node is deleted.
*/
if (result == ISC_R_SUCCESS &&
break;
case DNS_RBT_NSEC_HAS_NSEC:
/*
* Delete the corresponding node from the auxiliary NSEC
* tree before deleting from the main tree.
*/
if (result != ISC_R_SUCCESS) {
"delete_node: "
"dns_rbt_findnode(nsec): %s",
} else {
if (result != ISC_R_SUCCESS) {
"delete_node(): "
"dns_rbt_deletenode(nsecnode): %s",
}
}
if (result == ISC_R_SUCCESS &&
break;
case DNS_RBT_NSEC_NSEC:
break;
case DNS_RBT_NSEC_NSEC3:
break;
}
if (result != ISC_R_SUCCESS) {
"delete_node(): "
"dns_rbt_deletenode: %s",
}
}
/*
* Caller must be holding the node lock.
*/
static inline void
}
}
/*%
* Clean up dead nodes. These are nodes which have no references, and
* have no data. They are dead but we could not or chose not to delete
* them when we deleted all the data at that node because we did not want
* to wait for the tree write lock.
*
* The caller must hold a tree write lock and bucketnum'th node (write) lock.
*/
static void
/*
* Since we're holding a tree write lock, it should be
* impossible for this node to be referenced by others.
*/
{
sizeof(isc_event_t));
} else {
}
} else {
}
count--;
}
}
/*
* This function is assumed to be called when a node is newly referenced
* and can be in the deadnode list. In that case the node must be retrieved
* from the list because it is going to be used. In addition, if the caller
* happens to hold a write lock on the tree, it's a good chance to purge dead
* nodes.
* Note: while a new reference is gained in multiple places, there are only very
* few cases where the node can be in the deadnode list (only empty nodes can
* have been added to the list).
*/
static inline void
{
/*
* Check if we can possibly cleanup the dead node. If so, upgrade
* the node lock below to perform the cleanup.
*/
}
/*
* Upgrade the lock and test if we still need to unlink.
*/
if (maybe_cleanup)
}
}
/*
* Caller must be holding the node lock; either the "strong", read or write
* lock. Note that the lock must be held even when node references are
* atomically modified; in that case the decrement operation itself does not
* have to be protected, but we must avoid a race condition where multiple
* threads are decreasing the reference to zero simultaneously and at least
* one of them is going to free the node.
* This function returns ISC_TRUE if and only if the node reference decreases
* to zero.
*/
static isc_boolean_t
{
#define KEEP_NODE(n, r) \
/* Handle easy and typical case first. */
if (nrefs == 0) {
}
}
/* Upgrade the lock? */
if (nlock == isc_rwlocktype_read) {
}
if (nrefs > 0) {
/* Restore the lock? */
if (nlock == isc_rwlocktype_read)
return (ISC_FALSE);
}
else {
if (least_serial == 0) {
/*
* Caller doesn't know the least serial.
* Get it.
*/
}
}
}
/*
* Attempt to switch to a write lock on the tree. If this fails,
* we will add this node to a linked list of nodes in this locking
* bucket which we will free later.
*/
if (tlock != isc_rwlocktype_write) {
/*
* Locking hierarchy notwithstanding, we don't need to free
* the node lock before acquiring the tree write lock because
* we only do a trylock.
*/
if (tlock == isc_rwlocktype_read)
else
result == ISC_R_LOCKBUSY);
} else
goto restore_locks;
if (write_locked) {
/*
* We can now delete the node.
*/
/*
* If this node is the only one in the level it's in, deleting
* this node may recursively make its parent the only node in
* the parent level; if so, and if no one is currently using
* the parent node, this is almost the only opportunity to
* clean it up. But the recursive cleanup is not that trivial
* since the child and parent may be in different lock buckets,
* which would cause a lock order reversal problem. To avoid
* the trouble, we'll dispatch a separate event for batch
* cleaning. We need to check whether we're deleting the node
* as a result of pruning to avoid infinite dispatching.
* Note: pruning happens only when a task has been set for the
* rbtdb. If the user of the rbtdb chooses not to set a task,
* it's their responsibility to purge stale leaves (e.g. by
* periodic walk-through).
*/
sizeof(isc_event_t));
} else {
/*
* XXX: this is a weird situation. We could
* ignore this error case, but then the stale
* node will unlikely be purged except via a
* rare condition such as manual cleanup. So
* we queue it in the deadnodes list, hoping
* the memory shortage is temporary and the node
* will be deleted later.
*/
"decrement_reference: failed to "
"allocate pruning event");
deadlink);
}
} else {
}
} else {
}
/* Restore the lock? */
if (nlock == isc_rwlocktype_read)
/*
* Relock a read lock, or unlock the write lock if no lock was held.
*/
if (tlock == isc_rwlocktype_none)
if (write_locked)
if (tlock == isc_rwlocktype_read)
if (write_locked)
return (no_reference);
}
/*
* Prune the tree by recursively cleaning-up single leaves. In the worst
* case, the number of iteration is the number of tree levels, which is at
* most the maximum number of domain name labels, i.e, 127. In practice, this
* should be much smaller (only a few times), and even the worst case would be
* acceptable for a single event.
*/
static void
unsigned int locknum;
do {
/*
* node was the only down child of the parent and has
* just been removed. We'll then need to examine the
* parent. Keep the lock if possible; otherwise,
* release the old lock and acquire one for the parent.
*/
}
/*
* We need to gain a reference to the node before
* decrementing it in the next iteration. In addition,
* if the node is in the dead-nodes list, extract it
* from the list beforehand as we do in
* reactivate_node().
*/
} else
}
static inline void
{
/*
* Caller must be holding the database lock.
*/
}
static inline void
/*
* If the changed record is dirty, then
* an update created multiple versions of
* a given rdataset. We keep this list
* until we're the least open version, at
* which point it's safe to get rid of any
* older versions.
*
* If the changed record isn't dirty, then
* we don't need it anymore since we're
* committing and not rolling back.
*
* The caller must be holding the database lock.
*/
changed = next_changed) {
}
}
}
static void
if (result == ISC_R_SUCCESS) {
while (result == ISC_R_SUCCESS) {
if (dns_zonekey_iszonekey(&keyrdata)) {
break;
}
}
}
if (!haszonekey) {
return;
}
0, 0, &nsecset, &signsecset);
if (result == ISC_R_SUCCESS) {
if (dns_rdataset_isassociated(&signsecset)) {
}
}
/*
*/
else
}
/*%<
* Walk the origin node looking for NSEC3PARAM records.
* Cache the nsec3 parameters.
*/
static void
unsigned char *raw; /* RDATASLAB */
header = header_next) {
do {
if (NONEXISTENT(header))
break;
} else
/*
* Find A NSEC3PARAM with a supported algorithm.
*/
#else
raw += 2;
#endif
while (count-- > 0U) {
raw += 4;
#else
raw += 2;
#endif
®ion);
NULL);
continue;
if (nsec3param.flags != 0)
continue;
/*
* Look for a better algorithm than the
* unknown test algorithm.
*/
goto unlock;
}
}
}
}
static void
unsigned int locknum;
unsigned int refs;
}
if (again)
else {
if (refs == 0)
}
}
static void
unsigned int refs;
if (refs > 0) { /* typical and easy case first */
if (commit) {
}
goto end;
}
/*
* Update the zone's secure status in version before making
* it the current version.
*/
if (commit) {
unsigned cur_ref;
/*
* The current version is going to be replaced.
* Release the (likely last) reference to it from the
* DB itself and unlink it from the open list.
*/
&cur_ref);
if (cur_ref == 0) {
cur_version, link);
}
/*
* We're going to become the least open
* version.
*/
&cleanup_list);
} else {
/*
* Some other open version is the
* least version. We can't cleanup
* records that were changed in this
* version because the older versions
* may still be in use by an open
* version.
*
* We can, however, discard the
* changed records for things that
* we've added that didn't exist in
* prior versions.
*/
}
/*
* If the (soon to be former) current version
* isn't being used by anyone, we can clean
* it up.
*/
if (cur_ref == 0) {
link);
}
/*
* Become the current version.
*/
/*
* Keep the current version in the open list, and
* gain a reference for the DB itself (see the DB
* creation function below). This must be the only
* case where we need to increment the counter from
* zero and need to use isc_refcount_increment0().
*/
&cur_ref);
} else {
/*
* We're rolling back this transaction.
*/
}
} else {
/*
* There are no external or internal references
* to this version and it can be cleaned up.
*/
/*
* Find the version with the least serial
* number greater than ours.
*/
if (least_greater == NULL)
/*
* Is this the least open version?
*/
/*
* Yes. Install the new least open
* version.
*/
&cleanup_list);
} else {
/*
* Add any unexecuted cleanups to
* those of the least greater version.
*/
link);
}
}
if (cleanup_version != NULL) {
sizeof(*cleanup_version));
}
/*
*/
header);
if (result != ISC_R_SUCCESS)
"Unable to reinsert header to "
"re-signing heap: %s\n",
}
}
if (!EMPTY(cleanup_list)) {
rbtdb, sizeof(isc_event_t));
/*
* We acquire a tree write lock here in order to make
* sure that stale nodes will be removed in
* decrement_reference(). If we didn't have the lock,
* those nodes could miss the chance to be removed
* until the server stops. The write lock is
* expensive, but this event should be rare enough
* to justify the cost.
*/
}
changed = next_changed) {
/*
* This is a good opportunity to purge any dead nodes,
* so use it.
*/
if (rollback)
sizeof(*changed));
}
} else
}
end:
}
/*
* Add the necessary magic for the wildcard name 'name'
* to be found in 'rbtdb'.
*
* In order for wildcard matching to work correctly in
* zone_find(), we must ensure that a node for the wildcarding
* level exists in the database, and has its 'find_callback'
* and 'wild' bits set.
*
* E.g. if the wildcard name is "*.sub.example." then we
* must ensure that "sub.example." exists and is marked as
* a wildcard level.
*/
static isc_result_t
unsigned int n;
n = dns_name_countlabels(name);
INSIST(n >= 2);
n--;
return (result);
if (result == ISC_R_SUCCESS)
return (ISC_R_SUCCESS);
}
static isc_result_t
unsigned int n, l, i;
n = dns_name_countlabels(name);
i = l + 1;
while (i < n) {
if (dns_name_iswildcard(&foundname)) {
if (result != ISC_R_SUCCESS)
return (result);
&node);
return (result);
if (result == ISC_R_SUCCESS)
}
i++;
}
return (ISC_R_SUCCESS);
}
static isc_result_t
{
if (result != ISC_R_SUCCESS) {
if (!create) {
if (result == DNS_R_PARTIALMATCH)
return (result);
}
/*
* It would be nice to try to upgrade the lock instead of
* unlocking then relocking.
*/
if (result == ISC_R_SUCCESS) {
#ifdef DNS_RBT_USEHASH
#else
#endif
if (dns_name_iswildcard(name)) {
if (result != ISC_R_SUCCESS) {
return (result);
}
}
}
} else if (result != ISC_R_EXISTS) {
return (result);
}
}
/*
* Always try to add the policy zone data, because this node might
* already have been implicitly created by the previous addition of
* a longer domain. A common example is adding *.example.com
* (implicitly creating example.com) followed by explicitly adding
* example.com.
*/
if (result == ISC_R_SUCCESS)
/*
* It is too late to give up, so merely complain.
*/
"dns_rpz_add(): %s",
}
}
return (ISC_R_SUCCESS);
}
static isc_result_t
{
}
static isc_result_t
{
}
static isc_result_t
/*
* We only want to remember the topmost zone cut, since it's the one
* that counts, so we'll just continue if we've already found a
* zonecut.
*/
return (DNS_R_CONTINUE);
/*
* Look for an NS or DNAME rdataset active in our version.
*/
dname_header = NULL;
do {
/*
* Is this a "this rdataset doesn't
* exist" record?
*/
if (NONEXISTENT(header))
break;
} else
/*
* We've found an NS rdataset that
* isn't at the origin node. We check
* that they're not at the origin node,
* because otherwise we'd erroneously
* treat the zone top as if it were
* a delegation.
*/
}
}
}
}
/*
* Did we find anything?
*/
/*
* Note that NS has precedence over DNAME if both exist
* in a zone. Otherwise DNAME take precedence over NS.
*/
} else if (dname_header != NULL) {
}
/*
* We increment the reference count on node to ensure that
* search->zonecut_rdataset will still be valid later.
*/
/*
* Since we've found a zonecut, anything beneath it is
* glue and is not subject to wildcard matching, so we
* may clear search->wild.
*/
/*
* If the caller does not want to find glue, then
* this is the best answer and the search should
* stop now.
*/
} else {
/*
* The search will continue beneath the zone cut.
* This may or may not be the best match. In case it
* is, we need to remember the node name.
*/
}
} else {
/*
* There is no zonecut at this node which is active in this
* version.
*
* If this is a "wild" node and the caller hasn't disabled
* wildcard matching, remember that we've seen a wild node
* in case we need to go searching for wildcard matches
* later on.
*/
}
return (result);
}
static inline void
{
unsigned char *raw; /* RDATASLAB */
/*
* Caller must be holding the node reader lock.
* XXXJT: technically, we need a writer lock, since we'll increment
* the header count below. However, since the actual counter value
* doesn't matter, we prioritize performance here. (We may want to
* use atomic increment when available).
*/
return;
/*
* Reset iterator state.
*/
rdataset->privateuint4 = 0;
/*
* Add noqname proof.
*/
/*
* Copy out re-signing information.
*/
} else
}
static inline isc_result_t
{
/*
* The caller MUST NOT be holding any node locks.
*/
/*
* If we have to set foundname, we do it before anything else.
* If we were to set foundname after we had set nodep or bound the
* rdataset, then we'd have to undo that work if dns_name_copy()
* failed. By setting foundname first, there's nothing to undo if
* we have trouble.
*/
if (result != ISC_R_SUCCESS)
return (result);
}
/*
* Note that we don't have to increment the node's reference
* count here because we're going to use the reference we
* already have in the search block.
*/
}
}
if (type == dns_rdatatype_dname)
return (DNS_R_DNAME);
return (DNS_R_DELEGATION);
}
static inline isc_boolean_t
{
unsigned char *raw; /* RDATASLAB */
/*
* No additional locking is required.
*/
/*
* Valid glue types are A, AAAA, A6. NS is also a valid glue type
* if it occurs at a zone cut, but is not valid below it.
*/
if (type == dns_rdatatype_ns) {
return (ISC_FALSE);
}
} else if (type != dns_rdatatype_a &&
type != dns_rdatatype_aaaa &&
type != dns_rdatatype_a6) {
return (ISC_FALSE);
}
#else
raw += 2;
#endif
while (count > 0) {
count--;
raw += 4;
#else
raw += 2;
#endif
/*
* XXX Until we have rdata structures, we have no choice but
* to directly access the rdata format.
*/
break;
}
}
return (valid);
}
static inline isc_boolean_t
{
if (result != ISC_R_SUCCESS)
break;
break;
}
break;
}
if (result == ISC_R_SUCCESS)
return (answer);
}
static inline isc_boolean_t
unsigned int n;
/*
* Find if qname is at or below a empty node.
* Use our own copy of the chain.
*/
do {
if (result != ISC_R_SUCCESS)
break;
break;
}
break;
if (result == ISC_R_SUCCESS)
if (result != ISC_R_SUCCESS)
if (result != ISC_R_SUCCESS)
break;
break;
}
break;
}
if (result == ISC_R_SUCCESS)
if (result != ISC_R_SUCCESS)
/*
* Remove the wildcard label to find the terminal name.
*/
n = dns_name_countlabels(wname);
do {
break;
}
/*
* Remove the left hand label.
*/
n = dns_name_countlabels(&rname);
return (answer);
}
static inline isc_result_t
{
unsigned int i, j;
/*
* Caller must be holding the tree lock and MUST NOT be holding
* any node locks.
*/
/*
* Examine each ancestor level. If the level's wild bit
* is set, then construct the corresponding wildcard name and
* search for it. If the wildcard node exists, and is active in
* this version, we're done. If not, then we next check to see
* if the ancestor is active in this version. If so, then there
* can be no possible wildcard match and again we're done. If not,
* continue the search.
*/
do {
/*
* First we try to figure out if this node is active in
* the search's version. We do this now, even though we
* may not need the information, because it simplifies the
* locking and code flow.
*/
break;
}
else
else
if (wild) {
/*
* Construct the wildcard name for this level.
*/
j = i;
while (result == ISC_R_SUCCESS && j != 0) {
j--;
&name,
NULL);
}
if (result != ISC_R_SUCCESS)
break;
if (result == ISC_R_SUCCESS) {
/*
* We have found the wildcard node. If it
* is active in the search's version, we're
* done.
*/
break;
}
wname)) {
return (ISC_R_NOTFOUND);
}
/*
* The wildcard node is active!
*
* Note: result is still ISC_R_SUCCESS
* so we don't have to set it.
*/
break;
}
} else if (result != ISC_R_NOTFOUND &&
result != DNS_R_PARTIALMATCH) {
/*
* An error has occurred. Bail out.
*/
break;
}
}
if (active) {
/*
* The level node is active. Any wildcarding
* present at higher levels has no
* effect and we're done.
*/
break;
}
if (i > 0) {
i--;
} else
} while (!done);
return (result);
}
static isc_boolean_t
{
unsigned char *raw; /* RDATASLAB */
#else
raw += 2;
#endif
while (count-- > 0) {
raw += 4;
#else
raw += 2;
#endif
nsec3.salt_length) == 0)
return (ISC_TRUE);
}
return (ISC_FALSE);
}
/*
*/
static inline isc_result_t
{
if (type == dns_rdatatype_nsec3) {
return (result);
nodep);
return (result);
}
for (;;) {
if (*firstp) {
/*
* Construct the name of the second node to check.
* It is the first node sought in the NSEC tree.
*/
if (result != ISC_R_SUCCESS)
return (result);
if (result == ISC_R_SUCCESS) {
/*
* Since this was the first loop, finding the
* name in the NSEC tree implies that the first
* node checked in the main tree had an
* unacceptable NSEC record.
* Try the previous node in the NSEC tree.
*/
if (result == DNS_R_NEWORIGIN)
} else if (result == ISC_R_NOTFOUND ||
result == DNS_R_PARTIALMATCH) {
if (result == ISC_R_NOTFOUND)
}
} else {
/*
* This is a second or later trip through the auxiliary
* tree for the name of a third or earlier NSEC node in
* the main tree. Previous trips through the NSEC tree
* must have found nodes in the main tree with NSEC
* records. Perhaps they lacked signature records.
*/
if (result == DNS_R_NEWORIGIN)
}
if (result != ISC_R_SUCCESS)
return (result);
/*
* Construct the name to seek in the main tree.
*/
if (result != ISC_R_SUCCESS)
return (result);
if (result == ISC_R_SUCCESS)
return (result);
/*
* There should always be a node in the main tree with the
* same name as the node in the auxiliary NSEC tree, except for
* nodes in the auxiliary tree that are awaiting deletion.
*/
"previous_closest_nsec(): %s",
return (DNS_R_BADDB);
}
}
}
/*
* search chain. For NSEC3 records only NSEC3 records that match the
* current NSEC3PARAM record are considered.
*/
static inline isc_result_t
{
} else {
}
/*
* Use the auxiliary tree only starting with the second node in the
* hope that the original node will be right much of the time.
*/
if (result != ISC_R_SUCCESS)
return (result);
do {
header = header_next) {
/*
* Look for an active, extant NSEC or RRSIG NSEC.
*/
do {
/*
* Is this a "this rdataset doesn't
* exist" record?
*/
if (NONEXISTENT(header))
break;
} else
/*
* We now know that there is at least one
* active rdataset at this node.
*/
break;
break;
}
}
}
if (!empty_node) {
NULL);
/*
*
* Note: for this to really be the right
* NSEC record, it's essential that the NSEC
* records of any nodes obscured by a zone
* cut have been removed; we assume this is
* the case.
*/
if (result == ISC_R_SUCCESS) {
node);
}
rdataset);
node,
}
/*
* This node is active, but has no NSEC or
* RRSIG NSEC. That means it's glue or
* other obscured zone data that isn't
* relevant for our search. Treat the
* node as if it were empty and keep looking.
*/
&prevnode,
&first);
} else {
/*
* We found an active node, but either the
* NSEC or the RRSIG NSEC is missing. This
* shouldn't happen.
*/
}
} else {
/*
* This node isn't active. We've got to keep
* looking.
*/
}
if (!first)
goto again;
}
}
/*
* If the result is ISC_R_NOMORE, then we got to the beginning of
* the database and didn't find a NSEC record. This shouldn't
* happen.
*/
if (result == ISC_R_NOMORE)
return (result);
}
static isc_result_t
{
/*
* We don't care about 'now'.
*/
/*
* If the caller didn't supply a version, attach to the current
* version.
*/
}
/*
* 'wild' will be true iff. we've matched a wildcard.
*/
/*
* Search down from the root of the tree. If, while going down, we
* encounter a callback node, zone_zonecut_callback() will search the
* rdatasets at the zone cut for active DNAME or NS rdatasets.
*/
if (result == DNS_R_PARTIALMATCH) {
goto tree_exit;
}
/*
* At least one of the levels in the search chain
* potentially has a wildcard. For each such level,
* we must see if there's a matching wildcard active
* in the current version.
*/
if (result == ISC_R_SUCCESS) {
if (result != ISC_R_SUCCESS)
goto tree_exit;
goto found;
}
else if (result != ISC_R_NOTFOUND)
goto tree_exit;
}
/*
* If we're here, then the name does not exist, is not
* beneath a zonecut, and there's no matching wildcard.
*/
{
if (result == ISC_R_SUCCESS)
} else
goto tree_exit;
} else if (result != ISC_R_SUCCESS)
goto tree_exit;
/*
* We have found a node whose name is the desired name, or we
* have matched a wildcard.
*/
/*
* If we're beneath a zone cut, we don't want to look for
* CNAMEs because they're not legitimate zone glue.
*/
} else {
/*
* The node may be a zone cut itself. If it might be one,
* make sure we check for it later.
*
* DS records live above the zone cut in ordinary zone so
* we want to ignore any referral.
*
* Stub zones don't have anything "above" the delgation so
* we always return a referral.
*/
if (node->find_callback &&
!dns_rdatatype_atparent(type)) ||
}
/*
* Certain DNSSEC types are not subject to CNAME matching
* (RFC4035, section 2.5 and RFC3007).
*
* We don't check for RRSIG, because we don't store RRSIG records
* directly.
*/
/*
* We now go looking for rdata...
*/
nsecheader = NULL;
/*
* Look for an active, extant rdataset.
*/
do {
/*
* Is this a "this rdataset doesn't
* exist" record?
*/
if (NONEXISTENT(header))
break;
} else
/*
* We now know that there is at least one active
* rdataset at this node.
*/
/*
* Do special zone cut handling, if requested.
*/
if (maybe_zonecut &&
/*
* We increment the reference count on node to
* ensure that search->zonecut_rdataset will
* still be valid later.
*/
/*
* It is not clear if KEY should still be
* allowed at the parent side of the zone
* cut or not. It is needed for RFC3007
* validated updates.
*/
&& type != dns_rdatatype_nsec
&& type != dns_rdatatype_key) {
/*
* Glue is not OK, but any answer we
* could return would be glue. Return
* the delegation.
*/
break;
}
break;
}
/*
* If the NSEC3 record doesn't match the chain
* we are using behave as if it isn't here.
*/
goto partial_match;
}
/*
* If we found a type we were looking for,
* remember it.
*/
type == dns_rdatatype_any ||
cname_ok)) {
/*
* We've found the answer!
*/
cname_ok) {
/*
* We may be finding a CNAME instead
* of the desired type.
*
* If we've already got the CNAME RRSIG,
* use it, otherwise change sigtype
* so that we find it.
*/
else
sigtype =
}
/*
* If we've got all we need, end the search.
*/
break;
/*
* We've found the RRSIG rdataset for our
* target type. Remember it.
*/
/*
* If we've got all we need, end the search.
*/
break;
/*
* Remember a NSEC rdataset even if we're
* not specifically looking for it, because
* we might need it later.
*/
nsecheader = header;
/*
* If we need the NSEC rdataset, we'll also
* need its signature.
*/
} else if (cname_ok &&
/*
* If we get a CNAME match, we'll also need
* its signature.
*/
}
}
}
if (empty_node) {
/*
* We have an exact match for the name, but there are no
* active rdatasets in the desired version. That means that
* this node doesn't exist in the desired version, and that
* we really have a partial match.
*/
if (!wild) {
goto partial_match;
}
}
/*
* If we didn't find what we were looking for...
*/
/*
* We were trying to find glue at a node beneath a
* zone cut, but didn't.
*
* Return the delegation.
*/
goto tree_exit;
}
/*
* The desired type doesn't exist.
*/
/*
* The zone is secure but there's no NSEC,
* or the NSEC has no signature!
*/
if (!wild) {
goto node_exit;
}
if (result == ISC_R_SUCCESS)
goto tree_exit;
}
nsecheader == NULL)
{
/*
* There's no NSEC record, and we were told
* to find one.
*/
goto node_exit;
}
}
{
0, rdataset);
nsecsig, 0, sigrdataset);
}
if (wild)
goto node_exit;
}
/*
* We found what we were looking for, or we found a CNAME.
*/
type != dns_rdatatype_any &&
/*
* We weren't doing an ANY query and we found a CNAME instead
* of the type we were looking for, so we need to indicate
* that result to the caller.
*/
/*
* If we're beneath a zone cut, we must indicate that the
* result is glue, unless we're actually at the zone cut
* and the type is NSEC or KEY.
*/
/*
* It is not clear if KEY should still be
* allowed at the parent side of the zone
* cut or not. It is needed for RFC3007
* validated updates.
*/
if (type == dns_rdatatype_nsec ||
type == dns_rdatatype_nsec3 ||
else if (type == dns_rdatatype_any)
else
result = DNS_R_GLUE;
} else
result = DNS_R_GLUE;
/*
* We might have found data that isn't glue, but was occluded
* by a dynamic update. If the caller cares about this, they
* will have told us to validate glue.
*
* XXX We should cache the glue validity state!
*/
if (result == DNS_R_GLUE &&
goto tree_exit;
}
} else {
/*
* An ordinary successful query!
*/
}
if (!at_zonecut)
else
}
if (type != dns_rdatatype_any) {
}
if (wild)
/*
* If we found a zonecut but aren't going to use it, we have to
* let go of it.
*/
if (search.need_cleanup) {
}
if (close_version)
return (result);
}
static isc_result_t
{
/* NOTREACHED */
return (ISC_R_NOTIMPLEMENTED);
}
static isc_boolean_t
{
#if !defined(ISC_RWLOCK_USEATOMIC) || !defined(DNS_RBT_USEISCREFCOUNT)
#endif
/*
* This rdataset is stale. If no one else is using the
* node, we can clean it up right now, otherwise we mark
* it as stale, and the node as dirty, so it will get
* cleaned up later.
*/
(*locktype == isc_rwlocktype_write ||
{
/*
* We update the node's status only when we can
* get write access; otherwise, we leave others
* to this work. Periodical cleaning will
* eventually take the job as the last resort.
* We won't downgrade the lock, since other
* rdatasets are probably stale, too.
*/
if (dns_rbtnode_refcurrent(node) == 0) {
/*
* header->down can be non-NULL if the
* refcount has just decremented to 0
* but decrement_reference() has not
* performed clean_cache_node(), in
* which case we need to purge the stale
* headers first.
*/
if (*header_prev != NULL)
else
} else {
*header_prev = header;
}
} else
*header_prev = header;
return (ISC_TRUE);
}
return (ISC_FALSE);
}
static isc_result_t
/* XXX comment */
/*
* Keep compiler silent.
*/
/*
* Look for a DNAME or RRSIG DNAME rdataset.
*/
dname_header = NULL;
header_prev = NULL;
&header_prev)) {
/* Do nothing. */
} else
}
if (dname_header != NULL &&
/*
* We increment the reference count on node to ensure that
* search->zonecut_rdataset will still be valid later.
*/
} else
return (result);
}
static inline isc_result_t
{
unsigned int i;
/*
* Caller must be holding the tree lock.
*/
do {
/*
* Look for NS and RRSIG NS rdatasets.
*/
header_prev = NULL;
&header_prev)) {
/* Do nothing. */
/*
* We've found an extant rdataset. See if
* we're interested in it.
*/
break;
break;
}
} else
}
/*
* If we have to set foundname, we do it before
* anything else. If we were to set foundname after
* we had set nodep or bound the rdataset, then we'd
* have to undo that work if dns_name_concatenate()
* failed. By setting foundname first, there's
* nothing to undo if we have trouble.
*/
while (result == ISC_R_SUCCESS && i > 0) {
i--;
&name);
result =
&name,
NULL);
}
if (result != ISC_R_SUCCESS) {
goto node_exit;
}
}
}
rdataset);
if (locktype != isc_rwlocktype_write) {
}
}
}
}
i--;
} else
} while (!done);
return (result);
}
static isc_result_t
{
do {
if (result != ISC_R_SUCCESS)
return (result);
header_prev = NULL;
&header_prev)) {
continue;
}
if (NONEXISTENT(header) ||
continue;
}
}
if (result != ISC_R_SUCCESS)
goto unlock_node;
now, sigrdataset);
} else if (!empty_node) {
} else
NULL);
return (result);
}
/*
* Connect this RBTDB to the response policy zone summary data for the view.
*/
static void
dns_rbtdb_t * rbtdb;
}
/*
* Enable this RBTDB as a response policy zone.
*/
static isc_result_t
dns_rbtdb_t * rbtdb;
} else {
}
return (result);
}
static isc_result_t
{
if (now == 0)
/*
* Search down from the root of the tree. If, while going down, we
* encounter a callback node, cache_zonecut_callback() will search the
* rdatasets at the zone cut for a DNAME rdataset.
*/
if (result == DNS_R_PARTIALMATCH) {
if (result == DNS_R_COVERINGNSEC)
goto tree_exit;
}
goto tree_exit;
} else {
goto tree_exit;
}
} else if (result != ISC_R_SUCCESS)
goto tree_exit;
/*
* Certain DNSSEC types are not subject to CNAME matching
* (RFC4035, section 2.5 and RFC3007).
*
* We don't check for RRSIG, because we don't store RRSIG records
* directly.
*/
/*
* We now go looking for rdata...
*/
header_prev = NULL;
&header_prev)) {
/* Do nothing. */
/*
* We now know that there is at least one active
* non-stale rdataset at this node.
*/
/*
* If we found a type we were looking for, remember
* it.
*/
(type == dns_rdatatype_any &&
/*
* We've found the answer.
*/
cname_ok &&
/*
* If we've already got the
* CNAME RRSIG, use it.
*/
}
/*
* We've found the RRSIG rdataset for our
* target type. Remember it.
*/
/*
* We've found a negative cache entry.
*/
/*
* Remember a NS rdataset even if we're
* not specifically looking for it, because
* we might need it later.
*/
/*
* If we need the NS rdataset, we'll also
* need its signature.
*/
} else if (cname_ok &&
/*
* If we get a CNAME match, we'll also need
* its signature.
*/
}
} else
}
if (empty_node) {
/*
* We have an exact match for the name, but there are no
* extant rdatasets. That means that this node doesn't
* meaningfully exist, and that we really have a partial match.
*/
goto find_ns;
}
/*
* If we didn't find what we were looking for...
*/
((options & DNS_DBFIND_ADDITIONALOK) == 0)) ||
((options & DNS_DBFIND_GLUEOK) == 0)) ||
((options & DNS_DBFIND_PENDINGOK) == 0))) {
/*
* If there is an NS rdataset at this node, then this is the
* deepest zone cut.
*/
}
rdataset);
}
goto node_exit;
}
/*
* Go find the deepest zone cut.
*/
goto find_ns;
}
/*
* We found what we were looking for, or we found a CNAME.
*/
}
/*
* We found a negative cache entry.
*/
else
type != dns_rdatatype_any &&
/*
* We weren't doing an ANY query and we found a CNAME instead
* of the type we were looking for, so we need to indicate
* that result to the caller.
*/
} else {
/*
* An ordinary successful query!
*/
}
result == DNS_R_NCACHENXRRSET) {
rdataset);
}
}
locktype != isc_rwlocktype_write) {
}
/*
* If we found a zonecut but aren't going to use it, we have to
* let go of it.
*/
if (search.need_cleanup) {
}
return (result);
}
static isc_result_t
{
unsigned int rbtoptions = DNS_RBTFIND_EMPTYDATA;
if (now == 0)
if ((options & DNS_DBFIND_NOEXACT) != 0)
/*
* Search down from the root of the tree.
*/
if (result == DNS_R_PARTIALMATCH) {
goto tree_exit;
} else if (result != ISC_R_SUCCESS)
goto tree_exit;
/*
* We now go looking for an NS rdataset at the node.
*/
header_prev = NULL;
&header_prev)) {
/* Do nothing. */
/*
* If we found a type we were looking for, remember
* it.
*/
/*
* Remember a NS rdataset even if we're
* not specifically looking for it, because
* we might need it later.
*/
/*
* If we need the NS rdataset, we'll also
* need its signature.
*/
}
} else
}
/*
* No NS records here.
*/
goto find_ns;
}
}
if (locktype != isc_rwlocktype_write) {
}
}
}
if (result == DNS_R_DELEGATION)
return (result);
}
static void
unsigned int refs;
}
static void
}
}
if (inactive) {
if (want_free) {
char buf[DNS_NAME_FORMATSIZE];
sizeof(buf));
else
"calling free_rbtdb(%s)", buf);
}
}
}
static isc_result_t
/*
* These are the category and module used by the cache cleaner.
*/
char printname[DNS_NAME_FORMATSIZE];
/*
* Caller must hold a tree lock.
*/
if (now == 0)
/*
* XXXDCL Could stand to have a better policy, like LRU.
*/
/*
* Note that 'log' can be true IFF overmem is also true.
* overmem can currently only be true for cache
* databases -- hence all of the "overmem cache" log strings.
*/
if (log)
"overmem cache: %s %s",
sizeof(printname)));
}
/*
* We may not need write access, but this code path is not performance
* sensitive, so it should be okay to always lock as a writer.
*/
/*
* We don't check if refcurrent(rbtnode) == 0 and try
* to free like we do in cache_find(), because
* refcurrent(rbtnode) must be non-zero. This is so
* because 'node' is an argument to the function.
*/
if (log)
level, "overmem cache: stale %s",
} else if (force_expire) {
} else if (log) {
level, "overmem cache: "
"reprieve by RETAIN() %s",
}
"overmem cache: saved %s", printname);
return (ISC_R_SUCCESS);
}
static void
/* This is an empty callback. See adb.c:water() */
return;
}
static void
do {
if (!first)
"\tserial = %lu, ttl = %u, "
"trust = %u, attributes = %u, "
"resign = %u\n",
}
} else
}
static isc_result_t
{
return (ISC_R_NOMEMORY);
else
return (ISC_R_SUCCESS);
}
static isc_result_t
{
if (rbtversion == NULL) {
}
now = 0;
if (covers == 0)
else
sigmatchtype = 0;
do {
/*
* Is this a "this rdataset doesn't
* exist" record?
*/
if (NONEXISTENT(header))
break;
} else
/*
* We have an active, extant rdataset. If it's a
* type we're looking for, remember it.
*/
break;
break;
}
}
}
}
if (close_version)
return (ISC_R_NOTFOUND);
return (ISC_R_SUCCESS);
}
static isc_result_t
{
if (now == 0)
if (covers == 0)
else
sigmatchtype = 0;
(locktype == isc_rwlocktype_write ||
/*
* We update the node's status only when we
* can get write access.
*/
/*
* We don't check if refcurrent(rbtnode) == 0
* and try to free like we do in cache_find(),
* because refcurrent(rbtnode) must be
* non-zero. This is so because 'node' is an
* argument to the function.
*/
}
}
}
}
return (ISC_R_NOTFOUND);
/*
* We found a negative cache entry.
*/
else
}
return (result);
}
static isc_result_t
{
unsigned int refs;
return (ISC_R_NOMEMORY);
now = 0;
if (rbtversion == NULL)
(dns_dbversion_t **) (void *)(&rbtversion));
else {
&refs);
}
} else {
if (now == 0)
rbtversion = NULL;
}
return (ISC_R_SUCCESS);
}
static isc_boolean_t
/*
* The caller must hold the node lock.
*/
/*
* Look for CNAME and "other data" rdatasets active in our version.
*/
/*
* Look for an active extant CNAME.
*/
do {
/*
* Is this a "this rdataset doesn't
* exist" record?
*/
if (NONEXISTENT(header))
break;
} else
} else {
/*
* Look for active extant "other data".
*
* "Other data" is any rdataset whose type is not
* KEY, NSEC, SIG or RRSIG.
*/
if (rdtype != dns_rdatatype_key &&
rdtype != dns_rdatatype_sig &&
rdtype != dns_rdatatype_nsec &&
rdtype != dns_rdatatype_rrsig) {
/*
* Is it active and extant?
*/
do {
/*
* Is this a "this rdataset
* doesn't exist" record?
*/
if (NONEXISTENT(header))
break;
} else
}
}
}
if (cname && other_data)
return (ISC_TRUE);
return (ISC_FALSE);
}
static isc_result_t
return (result);
}
static void
{
/*
* Remove the old header from the heap
*/
header->heap_index);
header->heap_index = 0;
}
}
}
static isc_result_t
{
unsigned char *merged;
int idx;
/*
* Add an rdatasetheader_t to a node.
*/
/*
* Caller must be holding the node lock.
*/
if ((options & DNS_DBADD_MERGE) != 0) {
} else
if ((options & DNS_DBADD_FORCE) != 0)
else
/*
* We always add a changed record, even if no changes end up
* being made to this node, because it's harmless and
* simplifies the code.
*/
return (ISC_R_NOMEMORY);
}
}
negtype = 0;
/*
* We're adding a negative cache entry.
*/
/*
* If we're adding an negative cache entry
* which covers all types (NXDOMAIN,
* NODATA(QTYPE=ANY)).
*
* We make all other data stale so that the
* only rdataset that can be found at this
* node is the negative cache entry.
*
* Otherwise look for any RRSIGs of the
* given type so they can be marked stale
* later.
*/
if (covers == dns_rdatatype_any) {
}
if (covers == dns_rdatatype_any)
goto find_header;
} else {
/*
* We're adding something that isn't a
* negative cache entry. Look for an extant
* cache entry. If we're adding an RRSIG, also
* check for an extant non-stale NODATA ncache
* entry which covers the same type as the RRSIG.
*/
RBTDB_RDATATYPE_VALUE(0, covers))) {
break;
}
}
/*
* Found one.
*/
/*
* is more trusted.
*/
if (addedrdataset != NULL)
return (DNS_R_UNCHANGED);
}
/*
* The new rdataset is better. Expire the
* ncache entry.
*/
goto find_header;
}
}
}
break;
}
/*
* If header isn't NULL, we've found the right type. There may be
* IGNORE rdatasets between the top of the chain and the first real
* data. We skip over them.
*/
/*
* Deleting an already non-existent rdataset has no effect.
*/
if (header_nx && newheader_nx) {
return (DNS_R_UNCHANGED);
}
/*
* Trying to add an rdataset with lower trust to a cache DB
* has no effect, provided that the cache data isn't stale.
*/
if (addedrdataset != NULL)
return (DNS_R_UNCHANGED);
}
/*
* Don't merge if a nonexistent rdataset is involved.
*/
/*
* If 'merge' is ISC_TRUE, we'll try to create a new rdataset
* that is the union of 'newheader' and 'header'.
*/
if (merge) {
unsigned int flags = 0;
if ((options & DNS_DBADD_EXACT) != 0)
if ((options & DNS_DBADD_EXACTTTL) != 0 &&
if (result == ISC_R_SUCCESS)
(unsigned char *)header,
(unsigned char *)newheader,
(unsigned int)(sizeof(*newheader)),
if (result == ISC_R_SUCCESS) {
/*
* If 'header' has the same serial number as
* we do, we could clean it up now if we knew
* that our caller had no references to it.
* We don't know this, however, so we leave it
* alone. It will get cleaned up when
* clean_zone_node() runs.
*/
}
} else {
return (result);
}
}
/*
* Don't replace existing NS, A and AAAA RRsets
* in the cache if they are already exist. This
* prevents named being locked to old servers.
* Don't lower trust of existing record if the
* update is forced.
*/
!header_nx && !newheader_nx &&
dns_rdataslab_equalx((unsigned char *)header,
(unsigned char *)newheader,
(unsigned int)(sizeof(*newheader)),
/*
* Honour the new ttl if it is less than the
* older one.
*/
}
}
if (addedrdataset != NULL)
return (ISC_R_SUCCESS);
}
/*
* If we have will be replacing a NS RRset force its TTL
* to be no more than the current NS RRset's TTL. This
* ensures the delegations that are withdrawn are honoured.
*/
!header_nx && !newheader_nx &&
}
}
(options & DNS_DBADD_PREFETCH) == 0 &&
!header_nx && !newheader_nx &&
dns_rdataslab_equal((unsigned char *)header,
(unsigned char *)newheader,
(unsigned int)(sizeof(*newheader)))) {
/*
* Honour the new ttl if it is less than the
* older one.
*/
}
}
if (addedrdataset != NULL)
return (ISC_R_SUCCESS);
}
if (topheader_prev != NULL)
else
if (loading) {
/*
* There are no other references to 'header' when
* loading, so we MAY clean up 'header' now.
* Since we don't generate changed records when
* loading, we MUST clean up 'header' now.
*/
if (result != ISC_R_SUCCESS)
return (result);
}
} else {
if (rbtversion == NULL) {
}
}
/*
* XXXMLG We don't check the return value
* here. If it fails, we will not do TTL
* based expiry on this node. However, we
* will do it on the LRU side, so memory
* will not leak... for long.
*/
if (result != ISC_R_SUCCESS)
return (result);
}
}
} else {
/*
* No non-IGNORED rdatasets of the given type exist at
* this node.
*/
/*
* If we're trying to delete the type, don't bother.
*/
if (newheader_nx) {
return (DNS_R_UNCHANGED);
}
/*
* We have an list of rdatasets of the given type,
* but they're all marked IGNORE. We simply insert
* the new rdataset at the head of the list.
*
* Ignored rdatasets cannot occur during loading, so
* we INSIST on it.
*/
if (topheader_prev != NULL)
else
} else {
/*
* No rdatasets of the given type exist at the node.
*/
}
if (result != ISC_R_SUCCESS)
return (result);
}
}
/*
* Check if the node now contains CNAME and other data.
*/
if (rbtversion != NULL &&
return (DNS_R_CNAMEANDOTHER);
if (addedrdataset != NULL)
return (ISC_R_SUCCESS);
}
static inline isc_boolean_t
{
if (type == dns_rdatatype_dname)
return (ISC_TRUE);
else
return (ISC_FALSE);
} else if (type == dns_rdatatype_dname ||
(type == dns_rdatatype_ns &&
return (ISC_TRUE);
return (ISC_FALSE);
}
static inline isc_result_t
{
isc_region_t r;
goto cleanup;
}
if (result != ISC_R_SUCCESS)
goto cleanup;
if (result != ISC_R_SUCCESS)
goto cleanup;
if (result != ISC_R_SUCCESS)
goto cleanup;
return (ISC_R_SUCCESS);
return(result);
}
static inline isc_result_t
{
isc_region_t r;
goto cleanup;
}
if (result != ISC_R_SUCCESS)
goto cleanup;
if (result != ISC_R_SUCCESS)
goto cleanup;
if (result != ISC_R_SUCCESS)
goto cleanup;
return (ISC_R_SUCCESS);
return(result);
}
static dns_dbmethods_t zone_methods;
static isc_result_t
{
if (rbtversion == NULL) {
if (now == 0)
} else
now = 0;
®ion, sizeof(rdatasetheader_t));
if (result != ISC_R_SUCCESS)
return (result);
newheader->attributes = 0;
if (rbtversion != NULL) {
now = 0;
} else {
newheader->resign_lsb = 0;
}
} else {
newheader->resign_lsb = 0;
if (result != ISC_R_SUCCESS) {
return (result);
}
}
if (result != ISC_R_SUCCESS) {
return (result);
}
}
}
/*
* If we're adding a delegation type (e.g. NS or DNAME for a zone,
* just DNAME for the cache), then we need to set the callback bit
* on the node.
*/
else
/*
* Add to the auxiliary NSEC tree if we're adding an NSEC record.
*/
else
/*
* If we're adding a delegation type, adding to the auxiliary NSEC tree,
* or the DB is a cache in an overmem state, hold an exclusive lock on
* the tree. In the latter case the lock does not necessarily have to
* be acquired but it will help purge stale entries more effectively.
*/
}
if (cache_is_overmem)
}
if (tree_locked)
/*
* If we've been holding a write lock on the tree just for
* cleaning, we can release it now. However, we still need the
* node lock.
*/
}
}
if (newnsec) {
if (result == ISC_R_SUCCESS) {
} else if (result == ISC_R_EXISTS) {
}
}
if (result == ISC_R_SUCCESS)
if (tree_locked)
/*
* Update the zone's secure status. If version is non-NULL
* this is deferred until closeversion() is called.
*/
return (result);
}
static isc_result_t
{
unsigned char *subresult;
®ion, sizeof(rdatasetheader_t));
if (result != ISC_R_SUCCESS)
return (result);
newheader->attributes = 0;
} else {
newheader->resign_lsb = 0;
}
return (ISC_R_NOMEMORY);
}
break;
}
/*
* If header isn't NULL, we've found the right type. There may be
* IGNORE rdatasets between the top of the chain and the first real
* data. We skip over them.
*/
unsigned int flags = 0;
if ((options & DNS_DBSUB_EXACT) != 0) {
}
if (result == ISC_R_SUCCESS)
(unsigned char *)header,
(unsigned char *)newheader,
(unsigned int)(sizeof(*newheader)),
if (result == ISC_R_SUCCESS) {
/*
* We have to set the serial since the rdataslab
* subtraction routine copies the reserved portion of
* header, not newheader.
*/
/*
* XXXJT: dns_rdataslab_subtract() copied the pointers
* to additional info. We need to clear these fields
* to avoid having duplicated references.
*/
} else if (result == DNS_R_NXRRSET) {
/*
* This subtraction would remove all of the rdata;
* add a nonexistent header instead.
*/
goto unlock;
}
newheader->resign_lsb = 0;
} else {
goto unlock;
}
/*
* If we're here, we want to link newheader in front of
* topheader.
*/
if (topheader_prev != NULL)
else
} else {
/*
* The rdataset doesn't exist, so we don't need to do anything
* to satisfy the deletion request.
*/
if ((options & DNS_DBSUB_EXACT) != 0)
else
}
(options & DNS_DBSUB_WANTOLD) != 0)
/*
* Update the zone's secure status. If version is non-NULL
* this is deferred until closeversion() is called.
*/
return (result);
}
static isc_result_t
{
if (type == dns_rdatatype_any)
return (ISC_R_NOTIMPLEMENTED);
return (ISC_R_NOTIMPLEMENTED);
return (ISC_R_NOMEMORY);
if (rbtversion != NULL)
else
/*
* Update the zone's secure status. If version is non-NULL
* this is deferred until closeversion() is called.
*/
return (result);
}
/*
* load a non-NSEC3 node in the main tree and optionally to the auxiliary NSEC
*/
static isc_result_t
{
name);
if (rpzresult == ISC_R_SUCCESS) {
} else if (noderesult != ISC_R_EXISTS) {
/*
* Remove the node we just added above.
*/
if (tmpresult != ISC_R_SUCCESS)
"loading_addrdataset: "
"dns_rbt_deletenode: %s after "
"dns_rbt_addnode(NSEC): %s",
}
}
if (!hasnsec)
goto done;
if (noderesult == ISC_R_EXISTS) {
/*
* Add a node to the auxiliary NSEC tree for an old node
* just now getting an NSEC record.
*/
goto done;
} else if (noderesult != ISC_R_SUCCESS)
goto done;
/*
* Build the auxiliary tree for NSECs as we go.
* This tree speeds searches for closest NSECs that would otherwise
* need to examine many irrelevant nodes in large TLDs.
*
* Add nodes to the auxiliary tree after corresponding nodes have
* been added to the main tree.
*/
if (nsecresult == ISC_R_SUCCESS) {
goto done;
}
if (nsecresult == ISC_R_EXISTS) {
#if 1 /* 0 */
"addnode: NSEC node already exists");
#endif
goto done;
}
if (noderesult == ISC_R_SUCCESS) {
unsigned int node_has_rpz;
/*
* Remove the node we just added above.
*/
if (tmpresult == ISC_R_SUCCESS) {
/*
* Clean rpz entries added above.
*/
} else {
"loading_addrdataset: "
"dns_rbt_deletenode: %s after "
"dns_rbt_addnode(NSEC): %s",
}
}
/*
* Set the error condition to be returned.
*/
done:
return (noderesult);
}
static isc_result_t
/*
* This routine does no node locking. See comments in
* 'load' below for more information on loading and
* locking.
*/
/*
* SOA records are only allowed at top of zone.
*/
return (DNS_R_NOTZONETOP);
if (dns_name_iswildcard(name)) {
/*
* NS record owners cannot legally be wild cards.
*/
return (DNS_R_INVALIDNS);
/*
* NSEC3 record owners cannot legally be wild cards.
*/
return (DNS_R_INVALIDNSEC3);
if (result != ISC_R_SUCCESS)
return (result);
}
if (result == ISC_R_SUCCESS)
} else {
}
return (result);
if (result == ISC_R_SUCCESS) {
#ifdef DNS_RBT_USEHASH
#else
#endif
}
®ion,
sizeof(rdatasetheader_t));
if (result != ISC_R_SUCCESS)
return (result);
newheader->attributes = 0;
} else {
newheader->resign_lsb = 0;
}
if (result == ISC_R_SUCCESS &&
else if (result == DNS_R_UNCHANGED)
return (result);
}
static isc_result_t
{
unsigned char *p;
p = (unsigned char *) header;
#ifdef DEBUG
size - sizeof(rdatasetheader_t));
#endif
header->node_is_relative = 0;
if (result != ISC_R_SUCCESS)
return (result);
}
return (ISC_R_INVALIDFILE);
header->next_is_relative = 0;
return (ISC_R_INVALIDFILE);
}
}
return (ISC_R_SUCCESS);
}
/*
* Load the RBT database from the image in 'f'
*/
static isc_result_t
int fd;
char *base;
/*
* TODO CKB: since this is read-write (had to be to add nodes later)
* we will need to lock the file or the nodes in it before modifying
* the nodes in the file.
*/
/* Map in the whole file in one go */
flags = MAP_PRIVATE;
#ifdef MAP_FILE
#endif
return (ISC_R_FAILURE);
if (result != ISC_R_SUCCESS)
goto cleanup;
&origin_node, NULL,
if (result != ISC_R_SUCCESS)
goto cleanup;
}
if (result != ISC_R_SUCCESS)
goto cleanup;
}
if (result != ISC_R_SUCCESS)
goto cleanup;
}
/*
* We have a successfully loaded all the rbt trees now update
* rbtdb to use them.
*/
}
}
}
return (ISC_R_SUCCESS);
return (result);
}
static isc_result_t
return (ISC_R_NOMEMORY);
else
if (result != ISC_R_SUCCESS) {
sizeof(*loadctx));
return (result);
}
}
== 0);
return (ISC_R_SUCCESS);
}
static isc_result_t
/*
* If there's a KEY rdataset at the zone origin containing a
* zone key, we consider the zone secure.
*/
return (ISC_R_SUCCESS);
}
/*
* helper function to handle writing out the rdataset data pointed to
* by the void *data pointer in the dns_rbtnode
*/
static isc_result_t
{
unsigned char *p;
char pad[sizeof(char *)];
do {
if (NONEXISTENT(header))
break;
} else
continue;
sizeof(rdatasetheader_t));
p = (unsigned char *) header;
return (ISC_R_RANGE);
/*
* Round size up to the next pointer sized offset so it
* will be properly aligned when read back in.
*/
}
#ifdef DEBUG
sizeof(rdatasetheader_t));
size - sizeof(rdatasetheader_t));
#endif
sizeof(rdatasetheader_t));
size - sizeof(rdatasetheader_t));
/*
* Pad to force alignment.
*/
}
}
return (result);
}
/*
* Write out a zeroed header as a placeholder. Doing this ensures
* that the file will not read while it is partially written, should
* writing fail or be interrupted.
*/
static isc_result_t
char buffer[RBTDB_HEADER_LENGTH];
return (result);
}
static void
init_file_version(void) {
int n;
INSIST(n > 0 && (unsigned int)n < sizeof(FILE_VERSION));
}
/*
* Write the file header out, recording the locations of the three
* RBT's used in the rbtdb: tree, nsec, and nsec3, and including NodeDump
* version information and any information stored in the rbtdb object
* itself that should be stored here.
*/
static isc_result_t
{
return (result);
}
static isc_result_t
/* Ensure we're writing to a plain file */
/*
* first, write out a zeroed header to store rbtdb information
*
* then for each of the three trees, store the current position
* in the file and call dns_rbt_serialize_tree
*
* finally, write out the rbtdb header, storing the locations of the
* rbtheaders
*
* NOTE: need to do something better with the return codes, &= will
* not work.
*/
version, &tree_location));
version, &nsec_location));
version, &nsec3_location));
return (result);
}
static isc_result_t
{
filename, masterformat));
}
static void
unsigned int locknum;
}
}
static isc_boolean_t
return (secure);
}
static isc_boolean_t
return (dnssec);
}
static unsigned int
unsigned int count;
return (count);
}
static size_t
return (size);
}
static void
}
static isc_boolean_t
return (ISC_FALSE);
}
static isc_result_t
/* Note that the access to origin_node doesn't require a DB lock */
} else {
}
return (result);
}
static isc_result_t
{
if (rbtversion == NULL)
if (rbtversion->havensec3) {
}
if (salt_length != NULL)
if (iterations != NULL)
}
return (result);
}
static isc_result_t
header--;
if (header->heap_index != 0) {
if (resign == 0) {
header->heap_index);
header->heap_index = 0;
header->heap_index);
header->heap_index);
}
return (result);
}
static isc_result_t
{
unsigned int i;
unsigned int locknum;
for (i = 0; i < rbtdb->node_lock_count; i++) {
continue;
}
} else
}
goto unlock;
return (result);
}
static void
{
header--;
if (header->heap_index == 0)
return;
/*
* Delete from heap and save to re-signed list so that it can
* be restored if we backout of this change.
*/
}
static isc_result_t
return (ISC_R_SUCCESS);
}
static dns_stats_t *
return (rbtdb->rrsetstats);
}
static dns_dbmethods_t zone_methods = {
dump,
NULL,
NULL,
NULL,
NULL,
NULL,
};
static dns_dbmethods_t cache_methods = {
NULL,
dump,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
};
#ifdef DNS_RBTDB_VERSION64
#else
#endif
{
int i;
isc_boolean_t (*sooner)(void *, void *);
/* Keep the compiler happy. */
return (ISC_R_NOMEMORY);
/*
* If argv[0] exists, it points to a memory context to use for heap
*/
if (argc != 0)
if (type == dns_dbtype_cache) {
} else if (type == dns_dbtype_stub) {
} else
if (result != ISC_R_SUCCESS)
goto cleanup_rbtdb;
if (result != ISC_R_SUCCESS)
goto cleanup_lock;
/*
* Initialize node_lock_count in a generic way to support future
* extension which allows the user to specify this value on creation.
* Note that when specified for a cache DB it must be larger than 1
* as commented with the definition of DEFAULT_CACHE_NODE_LOCK_COUNT.
*/
if (rbtdb->node_lock_count == 0) {
else
goto cleanup_tree_lock;
}
sizeof(rbtdb_nodelock_t));
goto cleanup_tree_lock;
}
if (result != ISC_R_SUCCESS)
goto cleanup_node_locks;
sizeof(rdatasetheaderlist_t));
goto cleanup_rrsetstats;
}
for (i = 0; i < (int)rbtdb->node_lock_count; i++)
} else
/*
* Create the heaps.
*/
sizeof(isc_heap_t *));
goto cleanup_rdatasets;
}
for (i = 0; i < (int)rbtdb->node_lock_count; i++)
for (i = 0; i < (int)rbtdb->node_lock_count; i++) {
if (result != ISC_R_SUCCESS)
goto cleanup_heaps;
}
/*
* Create deadnode lists.
*/
sizeof(rbtnodelist_t));
goto cleanup_heaps;
}
for (i = 0; i < (int)rbtdb->node_lock_count; i++)
for (i = 0; i < (int)(rbtdb->node_lock_count); i++) {
if (result == ISC_R_SUCCESS) {
if (result != ISC_R_SUCCESS)
}
if (result != ISC_R_SUCCESS) {
while (i-- > 0) {
}
goto cleanup_deadnodes;
}
}
/*
* Attach to the mctx. The database will persist so long as there
* are references to it, and attaching to the mctx ensures that our
* mctx won't disappear out from under us.
*/
/*
* Must be initialized before free_rbtdb() is called.
*/
/*
* Make a copy of the origin name.
*/
if (result != ISC_R_SUCCESS) {
return (result);
}
/*
* Make the Red-Black Trees.
*/
if (result != ISC_R_SUCCESS) {
return (result);
}
if (result != ISC_R_SUCCESS) {
return (result);
}
if (result != ISC_R_SUCCESS) {
return (result);
}
/*
* In order to set the node callback bit correctly in zone databases,
* we need to know if the node has the origin name of the zone.
* In loading_addrdataset() we could simply compare the new name
* to the origin name, but this is expensive. Also, we don't know the
* node name in addrdataset(), so we need another way of knowing the
* zone's top.
*
* We now explicitly create a node for the zone's origin, and then
* we simply remember the node's address. This is safe, because
* the top-of-zone node can never be deleted, nor can its address
* change.
*/
&rbtdb->origin_node);
if (result != ISC_R_SUCCESS) {
return (result);
}
/*
* We need to give the origin node the right locknum.
*/
#ifdef DNS_RBT_USEHASH
#else
#endif
/*
* Add an apex node to the NSEC3 tree so that NSEC3 searches
* return partial matches when there is only a single NSEC3
* record in the tree.
*/
&nsec3node);
if (result != ISC_R_SUCCESS) {
return (result);
}
/*
* We need to give the nsec3 origin node the right locknum.
*/
#ifdef DNS_RBT_USEHASH
#else
#endif
}
/*
* Misc. Initialization.
*/
if (result != ISC_R_SUCCESS) {
return (result);
}
rbtdb->attributes = 0;
/*
* Version Initialization.
*/
return (ISC_R_NOMEMORY);
}
/*
* Keep the current version in the open list so that list operation
* won't happen in normal lookup operations.
*/
return (ISC_R_SUCCESS);
for (i = 0 ; i < (int)rbtdb->node_lock_count ; i++)
}
sizeof(rdatasetheaderlist_t));
return (result);
}
/*
* Slabbed Rdataset Methods
*/
static void
}
static isc_result_t
unsigned int count;
if (count == 0) {
return (ISC_R_NOMORE);
}
else
#endif
raw += 2;
/*
* The privateuint4 field is the number of rdata beyond the
* cursor position, so we decrement the total count by one
* before storing it.
*
* If DNS_RDATASETATTR_LOADORDER is not set 'raw' points to the
* first record. If DNS_RDATASETATTR_LOADORDER is set 'raw' points
* to the first entry in the offset table.
*/
count--;
return (ISC_R_SUCCESS);
}
static isc_result_t
unsigned int count;
unsigned int length;
unsigned char *raw; /* RDATASLAB */
if (count == 0)
return (ISC_R_NOMORE);
count--;
/*
* Skip forward one record (length + 4) or one offset (4).
*/
#endif
}
#else
#endif
return (ISC_R_SUCCESS);
}
static void
unsigned int offset;
#endif
unsigned int length;
isc_region_t r;
unsigned int flags = 0;
/*
* Find the start of the record if not already in private5
* then skip the length and order fields.
*/
}
#endif
raw += 4;
#else
raw += 2;
#endif
if (*raw & DNS_RDATASLAB_OFFLINE)
length--;
raw++;
}
}
static void
/*
* Reset iterator state.
*/
target->privateuint4 = 0;
}
static unsigned int
unsigned int count;
return (count);
}
static isc_result_t
{
cloned_node = NULL;
nsec->privateuint4 = 0;
cloned_node = NULL;
nsecsig->privateuint4 = 0;
return (ISC_R_SUCCESS);
}
static isc_result_t
{
cloned_node = NULL;
nsec->privateuint4 = 0;
cloned_node = NULL;
nsecsig->privateuint4 = 0;
return (ISC_R_SUCCESS);
}
static void
header--;
}
static void
header--;
}
static void
header--;
}
/*
* Rdataset Iterator Methods
*/
static void
sizeof(*rbtiterator));
}
static isc_result_t
serial = 1;
} else {
now = 0;
}
do {
/*
* Is this a "this rdataset doesn't exist"
* record? Or is it too old in the cache?
*
* Note: unlike everywhere else, we
* check for now > header->rdh_ttl instead
* of now >= header->rdh_ttl. This allows
* ANY and RRSIG queries for 0 TTL
* rdatasets to work.
*/
if (NONEXISTENT(header) ||
break;
} else
break;
}
return (ISC_R_NOMORE);
return (ISC_R_SUCCESS);
}
static isc_result_t
return (ISC_R_NOMORE);
serial = 1;
} else {
now = 0;
}
} else
/*
* If not walking back up the down list.
*/
do {
/*
* Is this a "this rdataset doesn't
* exist" record?
*
* Note: unlike everywhere else, we
* check for now > header->ttl instead
* of now >= header->ttl. This allows
* ANY and RRSIG queries for 0 TTL
* rdatasets to work.
*/
if ((header->attributes &
RDATASET_ATTR_NONEXISTENT) != 0 ||
break;
} else
break;
}
}
return (ISC_R_NOMORE);
return (ISC_R_SUCCESS);
}
static void
rdataset);
}
/*
* Database Iterator Methods
*/
static inline void
return;
}
static inline void
return;
}
static void
int i;
/*
* Note that "%d node of %d in tree" can report things like
* "flush_deletions: 59 nodes of 41 in tree". This means
* That some nodes appear on the deletions list more than
* once. Only the last occurence will actually be deleted.
*/
"flush_deletions: %d nodes of %d in tree",
}
}
if (was_read_locked) {
} else {
}
}
}
static inline void
}
static void
} else
dns_db_detach(&db);
}
static isc_result_t
} else {
origin);
}
}
if (result == ISC_R_SUCCESS) {
}
} else {
}
return (result);
}
static isc_result_t
}
}
if (result == ISC_R_SUCCESS) {
}
} else {
}
return (result);
}
static isc_result_t
} else {
/*
* Stay on main chain if not found on either chain.
*/
if (result == DNS_R_PARTIALMATCH) {
if (tresult == ISC_R_SUCCESS) {
}
}
}
if (tresult == ISC_R_SUCCESS) {
} else {
}
} else
return (result);
}
static isc_result_t
if (result == ISC_R_NOTFOUND)
}
}
if (result == ISC_R_SUCCESS)
return (result);
}
static isc_result_t
if (result == ISC_R_NOTFOUND)
}
}
if (result == ISC_R_SUCCESS)
return (result);
}
static isc_result_t
{
if (result != ISC_R_SUCCESS)
return (result);
} else
/*
* If the deletion array is full, flush it before trying
* to expire the current node. The current node can't
* fully deleted while the iteration cursor is still on it.
*/
/*
* expirenode() currently always returns success.
*/
unsigned int refs;
}
}
return (result);
}
static isc_result_t
return (ISC_R_SUCCESS);
}
return (ISC_R_SUCCESS);
}
static isc_result_t
}
/*%
* Additional cache routines.
*/
static isc_result_t
{
unsigned int count;
unsigned int total_count;
switch (type) {
break;
break;
break;
default:
INSIST(0);
}
if (type != dns_rdatasetadditional_fromcache)
return (ISC_R_NOTFOUND);
}
return (ISC_R_NOTFOUND);
}
return (result);
}
static void
unsigned int count;
/*
* The caller must hold the entry lock.
*/
break;
break;
default:
INSIST(0);
}
}
}
static void
{
if (dns_acache_cancelentry(entry)) {
}
}
static isc_result_t
{
unsigned int total_count, count;
if (type == dns_rdatasetadditional_fromcache)
return (ISC_R_SUCCESS);
return (ISC_R_NOMEMORY);
if (result != ISC_R_SUCCESS)
goto fail;
/* Set cache data in the new entry. */
if (result != ISC_R_SUCCESS)
goto fail;
switch (type) {
break;
break;
default:
INSIST(0);
}
unsigned int i;
sizeof(acachectl_t));
goto fail;
}
for (i = 0; i < total_count; i++) {
}
}
switch (type) {
break;
break;
default:
INSIST(0);
}
/*
* Swap the entry. Delay cleaning-up the old entry since
* it would require a node lock.
*/
}
}
return (ISC_R_SUCCESS);
fail:
&newcbarg);
} else {
sizeof(*newcbarg));
}
}
return (result);
}
static isc_result_t
{
unsigned int total_count, count;
if (type == dns_rdatasetadditional_fromcache)
return (ISC_R_SUCCESS);
switch (type) {
break;
break;
default:
INSIST(0);
}
return (ISC_R_NOTFOUND);
}
return (ISC_R_NOTFOUND);
}
}
return (ISC_R_SUCCESS);
}
static void
unsigned int i;
/*
* We do not need to worry about label lengths as they are all
* less than or equal to 63.
*/
}
static void
}
static void
const rdatasetheader_t *header;
unsigned int i;
return;
/*
* Set the case bit if it does not match the recorded bit.
*/
}
}
/*%
* Routines for LRU-based cache management.
*/
/*%
* See if a given cache entry that is being reused needs to be updated
* in the LRU-list. From the LRU management point of view, this function is
* expected to return true for almost all cases. When used with threads,
* however, this may cause a non-negligible performance penalty because a
* writer lock will have to be acquired before updating the list.
* If DNS_RBTDB_LIMITLRUUPDATE is defined to be non 0 at compilation time, this
* function returns true if the entry has not been updated for some period of
* time. We differentiate the NS or glue address case and the others since
* experiments have shown that the former tends to be accessed relatively
* infrequently and the cost of cache miss is higher (e.g., a missing NS records
* may cause external queries at a higher level zone, involving more
* transactions).
*
* Caller must hold the node (read or write) lock.
*/
static inline isc_boolean_t
if ((header->attributes &
return (ISC_FALSE);
/*
* Glue records are updated if at least 60 seconds have passed
* since the previous update time.
*/
}
/* Other records are updated if 5 minutes have passed. */
#else
return (ISC_TRUE);
#endif
}
/*%
* Update the timestamp of a given cache entry and move it to the head
* of the corresponding LRU list.
*
* Caller must hold the node (write) lock.
*
* Note that the we do NOT touch the heap here, as the TTL has not changed.
*/
static void
{
/* To be checked: can we really assume this? XXXMLG */
}
/*%
* under an overmem condition. To recover from this condition quickly, up to
* 2 entries will be purged. This process is triggered while adding a new
* entry, and we specifically avoid purging entries in the same LRU bucket as
* the one to which the new entry will belong. Otherwise, we might purge
* entries of the same name of different RR types while adding RRsets from a
* single response (consider the case where we're adding A and AAAA glue records
* of the same NS name).
*/
static void
{
unsigned int locknum;
int purgecount = 2;
purgecount--;
}
header = header_prev) {
/*
* Unlink the entry at this point to avoid checking it
* again even if it's currently used someone else and
* cannot be purged at this moment. This entry won't be
* referenced any more (so unlinking is safe) since the
* TTL was reset to 0.
*/
link);
purgecount--;
}
}
}
static void
{
/*
* Caller must hold the node (write) lock.
*/
/*
* If no one else is using the node, we can clean it up now.
* We first need to gain a new reference to the node to meet a
* requirement of decrement_reference().
*/
return;
switch (reason) {
case expire_ttl:
break;
case expire_lru:
break;
default:
break;
}
}
}