/*
* Copyright (C) 2012-2017 Internet Systems Consortium, Inc. ("ISC")
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
/*! \file */
/*
* Rate limit DNS responses.
*/
/* #define ISC_LIST_CHECKINIT */
#include <config.h>
#include <dns/rdatatype.h>
#include <dns/rdataclass.h>
static void
char *log_buf, unsigned int log_buf_len);
/*
* Get a modulus for a hash function that is tolerably likely to be
* relatively prime to most inputs. Of course, we get a prime for for initial
* values not larger than the square of the last prime. We often get a prime
* after that.
* This works well in practice for hash tables up to at least 100
* times the square of the last prime and better than a multiplicative hash.
*/
static int
3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41,
43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97,
#if 0
101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157,
163, 167, 173, 179, 181, 191, 193, 197, 199, 211, 223, 227,
229, 233, 239, 241, 251, 257, 263, 269, 271, 277, 281, 283,
293, 307, 311, 313, 317, 331, 337, 347, 349, 353, 359, 367,
373, 379, 383, 389, 397, 401, 409, 419, 421, 431, 433, 439,
443, 449, 457, 461, 463, 467, 479, 487, 491, 499, 503, 509,
521, 523, 541, 547, 557, 563, 569, 571, 577, 587, 593, 599,
601, 607, 613, 617, 619, 631, 641, 643, 647, 653, 659, 661,
673, 677, 683, 691, 701, 709, 719, 727, 733, 739, 743, 751,
757, 761, 769, 773, 787, 797, 809, 811, 821, 823, 827, 829,
839, 853, 857, 859, 863, 877, 881, 883, 887, 907, 911, 919,
929, 937, 941, 947, 953, 967, 971, 977, 983, 991, 997,1009,
#endif
};
unsigned int result;
++pp;
return (*pp);
}
if ((result & 1) == 0)
++result;
divisions = 0;
tries = 1;
do {
p = *pp++;
++divisions;
if ((result % p) == 0) {
++tries;
result += 2;
}
"%d hash_divisor() divisions in %d tries"
" to get %d from %d",
return (result);
}
/*
* Convert a timestamp to a number of seconds in the past.
*/
static inline int
int delta;
if (delta >= 0)
return (delta);
/*
* The timestamp is in the future. That future might result from
* re-ordered requests, because we use timestamps on requests
* instead of consulting a clock. Timestamps in the distant future are
* assumed to result from clock changes. When the clock changes to
* the past, make existing timestamps appear to be in the past.
*/
if (delta < -DNS_RRL_MAX_TIME_TRAVEL)
return (DNS_RRL_FOREVER);
return (0);
}
static inline int
if (!e->ts_valid)
return (DNS_RRL_FOREVER);
}
static inline void
unsigned int ts_gen;
int i, ts;
if (ts < 0) {
if (ts < -DNS_RRL_MAX_TIME_TRAVEL)
else
ts = 0;
}
/*
* Make a new timestamp base if the current base is too old.
* All entries older than DNS_RRL_MAX_WINDOW seconds are ancient,
* useless history. Their timestamps can be treated as if they are
* all the same.
* We only do arithmetic on more recent timestamps, so bases for
* older timestamps can be recycled provided the old timestamps are
* marked as ancient history.
* This loop is almost always very short because most entries are
* recycled after one second and any entries that need to be marked
* are older than (DNS_RRL_TS_BASES)*DNS_RRL_MAX_TS seconds.
*/
if (ts >= DNS_RRL_MAX_TS) {
{
}
if (i != 0)
"rrl new time base scanned %d entries"
" at %d for %d %d %d %d",
ts = 0;
}
}
static isc_result_t
unsigned int bsize;
dns_rrl_block_t *b;
dns_rrl_entry_t *e;
double rate;
int i;
rrl->max_entries != 0)
{
if (newsize <= 0)
return (ISC_R_SUCCESS);
}
/*
* Log expansions so that the user can tune max-table-size
* and min-table-size.
*/
"increase from %d to %d RRL entries with"
" %d bins; average search length %.1f",
}
if (b == NULL) {
"isc_mem_get(%d) failed for RRL entries",
bsize);
return (ISC_R_NOMEMORY);
}
e = b->entries;
for (i = 0; i < newsize; ++i, ++e) {
ISC_LINK_INIT(e, hlink);
}
return (ISC_R_SUCCESS);
}
static inline dns_rrl_bin_t *
}
static void
++old_bin)
{
ISC_LINK_INIT(e, hlink);
}
}
sizeof(*old_hash)
}
static isc_result_t
double rate;
/*
* Most searches fail and so go to the end of the chain.
* Use a small hash table load factor.
*/
"isc_mem_get(%d) failed for"
" RRL hash table",
hsize);
return (ISC_R_NOMEMORY);
}
"increase from %d to %d RRL bins for"
" %d entries; average search length %.1f",
}
return (ISC_R_SUCCESS);
}
static void
/*
* Make the entry most recently used.
*/
if (e == rrl->last_logged)
}
/*
* Expand the hash table if it is time and necessary.
* This will leave the newly referenced entry in a chain in the
* old hash table. It will migrate to the new hash table the next
* time it is used or be cut loose when the old hash table is destroyed.
*/
}
}
static inline isc_boolean_t
if (memcmp(a, b, sizeof(dns_rrl_key_t)) == 0)
return (ISC_TRUE);
return (ISC_FALSE);
}
static inline isc_uint32_t
int i;
}
return (hval);
}
/*
* Construct the hash table key.
* Use a hash of the DNS query name to save space in the database.
* Collisions result in legitimate rate limiting responses for one
* query name also limiting responses for other names to the
* same client. This is rare and benign enough given the large
* space costs compared to keeping the entire name in the database
* entry or the time costs of dynamic allocation.
*/
static void
const isc_sockaddr_t *client_addr,
{
int labels, i;
if (rtype == DNS_RRL_RTYPE_QUERY) {
} else if (rtype == DNS_RRL_RTYPE_REFERRAL ||
rtype == DNS_RRL_RTYPE_NODATA) {
/*
* Because there is no qtype in the empty answer sections of
* referral and NODATA responses, count them as the same.
*/
}
/*
* Ignore the first label of wildcards.
*/
{
key->s.qname_hash =
} else {
key->s.qname_hash =
}
}
case AF_INET:
break;
case AF_INET6:
for (i = 0; i < DNS_RRL_MAX_PREFIX/32; ++i)
break;
}
}
static inline dns_rrl_rate_t *
switch (rtype) {
case DNS_RRL_RTYPE_QUERY:
return (&rrl->responses_per_second);
case DNS_RRL_RTYPE_REFERRAL:
return (&rrl->referrals_per_second);
case DNS_RRL_RTYPE_NODATA:
return (&rrl->nodata_per_second);
case DNS_RRL_RTYPE_NXDOMAIN:
return (&rrl->nxdomains_per_second);
case DNS_RRL_RTYPE_ERROR:
return (&rrl->errors_per_second);
case DNS_RRL_RTYPE_ALL:
return (&rrl->all_per_second);
default:
INSIST(0);
}
return (NULL);
}
static int
rate = 1;
} else {
}
return (balance);
}
/*
* Search for an entry for a response and optionally create it.
*/
static dns_rrl_entry_t *
char *log_buf, unsigned int log_buf_len)
{
dns_rrl_entry_t *e;
/*
* Look for the entry in the current hash table.
*/
probes = 1;
e = ISC_LIST_HEAD(*new_bin);
while (e != NULL) {
return (e);
}
++probes;
e = ISC_LIST_NEXT(e, hlink);
}
/*
* Look in the old hash table.
*/
e = ISC_LIST_HEAD(*old_bin);
while (e != NULL) {
return (e);
}
e = ISC_LIST_NEXT(e, hlink);
}
/*
* Discard prevous hash table when all of its entries are old.
*/
}
if (!create)
return (NULL);
/*
* The entry does not exist, so create it by finding a free entry.
* Keep currently penalized and logged entries.
* Try to make more entries if none are idle.
* Steal the oldest entry if we cannot create more.
*/
e != NULL;
e = ISC_LIST_PREV(e, lru))
{
if (!ISC_LINK_LINKED(e, hlink))
break;
if (age <= 1) {
e = NULL;
break;
}
break;
}
if (e == NULL) {
}
if (e->logged)
if (ISC_LINK_LINKED(e, hlink)) {
else
}
return (e);
}
static void
const char *age_str;
if (age == DNS_RRL_FOREVER) {
age_str = "";
} else {
}
"rrl %08x %6s responses=%-3d %s",
}
static inline dns_rrl_result_t
char *log_buf, unsigned int log_buf_len)
{
/*
* Pick the rate counter.
*/
if (rate == 0)
return (DNS_RRL_RESULT_OK);
if (scale < 1.0) {
/*
* The limit for clients that have used TCP is not scaled.
*/
0, dns_rdatatype_none, NULL,
scale = 1.0;
}
}
if (scale < 1.0) {
if (new_rate < 1)
new_rate = 1;
"%d qps scaled %s by %.2f"
" from %d to %d",
}
}
/*
* Treat time jumps into the recent past as no time.
* Treat entries older than the window as if they were just created
* Credit other entries.
*/
if (age > 0) {
/*
* Credit tokens earned during elapsed time.
*/
e->slip_cnt = 0;
} else {
e->slip_cnt = 0;
}
}
/*
* Find the seconds since last log message without overflowing
* small counter. This counter is reset when an entry is
* created. It is not necessarily reset when some requests
* are answered provided other requests continue to be dropped
* or slipped. This can happen when the request rate is just
* at the limit.
*/
if (e->logged) {
}
}
/*
* Debit the entry for this response.
*/
if (--e->responses >= 0) {
return (DNS_RRL_RESULT_OK);
}
/*
* Drop this response unless it should slip or leak.
*/
if (new_slip < 2)
new_slip = 2;
"%d qps scaled slip"
" by %.2f from %d to %d",
}
}
if (e->slip_cnt++ == 0) {
e->slip_cnt = 0;
return (DNS_RRL_RESULT_SLIP);
e->slip_cnt = 0;
}
}
return (DNS_RRL_RESULT_DROP);
}
static inline dns_rrl_qname_buf_t *
return (NULL);
return (qbuf);
}
static inline void
}
}
static void
return;
}
}
/*
* Build strings for the logs
*/
static void
char *log_buf, unsigned int log_buf_len)
{
const char *rstr;
if (log_buf_len <= 1) {
if (log_buf_len == 1)
log_buf[0] = '\0';
return;
}
switch (rrl_result) {
case DNS_RRL_RESULT_OK:
break;
case DNS_RRL_RESULT_DROP:
break;
case DNS_RRL_RESULT_SLIP:
break;
default:
INSIST(0);
break;
}
case DNS_RRL_RTYPE_QUERY:
break;
case DNS_RRL_RTYPE_REFERRAL:
break;
case DNS_RRL_RTYPE_NODATA:
break;
case DNS_RRL_RTYPE_NXDOMAIN:
break;
case DNS_RRL_RTYPE_ERROR:
if (resp_result == ISC_R_SUCCESS) {
} else {
}
break;
case DNS_RRL_RTYPE_ALL:
break;
default:
INSIST(0);
}
if (plural)
else
} else {
}
if (msg_result != ISC_R_SUCCESS)
/*
* Capture the qname for the "stop limiting" message.
*/
} else {
"isc_mem_get(%d)"
" failed for RRL qname",
(int)sizeof(*qbuf));
}
}
qbuf->e = e;
NULL);
}
}
} else {
}
}
}
e->key.s.qname_hash);
}
/*
* We saved room for '\0'.
*/
}
static void
char *log_buf, unsigned int log_buf_len)
{
if (e->logged) {
make_log_buf(rrl, e,
: "stop limiting ",
"%s", log_buf);
free_qname(rrl, e);
--rrl->num_logged;
}
}
/*
* Log messages for streams that have stopped being rate limited.
*/
static void
char *log_buf, unsigned int log_buf_len)
{
dns_rrl_entry_t *e;
int age;
if (!e->logged)
continue;
if (now != 0) {
if (age < DNS_RRL_STOP_LOG_SECS ||
break;
}
if (rrl->num_logged <= 0)
break;
/*
* Too many messages could stall real work.
*/
if (--limit < 0) {
return;
}
}
if (e == NULL) {
}
rrl->last_logged = e;
}
/*
* Main rate limit interface.
*/
{
dns_rrl_entry_t *e;
int secs;
int exempt_match;
return (DNS_RRL_RESULT_OK);
}
/*
* Estimate total query per second rate when scaling by qps.
*/
qps = 0.0;
scale = 1.0;
} else {
++rrl->qps_responses;
if (secs <= 0) {
} else {
if (isc_log_wouldlog(dns_lctx,
"%d responses/%d seconds"
" = %d qps",
(int)qps);
rrl->qps_responses = 0;
}
}
}
/*
* Do maintenance once per second.
*/
/*
* Notice TCP responses when scaling limits by qps.
* Do not try to rate limit TCP responses.
*/
if (is_tcp) {
if (scale < 1.0) {
0, dns_rdatatype_none, NULL,
if (e != NULL) {
}
}
return (ISC_R_SUCCESS);
}
/*
* Find the right kind of entry, creating it if necessary.
* If that is impossible, then nothing more can be done
*/
switch (resp_result) {
case ISC_R_SUCCESS:
break;
case DNS_R_DELEGATION:
break;
case DNS_R_NXRRSET:
break;
case DNS_R_NXDOMAIN:
break;
default:
break;
}
if (e == NULL) {
return (DNS_RRL_RESULT_OK);
}
/*
* Do not worry about speed or releasing the lock.
* This message appears before messages from debit_rrl_entry().
*/
"%s", log_buf);
}
if (rrl->all_per_second.r != 0) {
/*
* We must debit the all-per-second token bucket if we have
* an all-per-second limit for the IP address.
* The all-per-second limit determines the log message
* when both limits are hit.
* The response limiting must continue if the
* all-per-second limiting lapses.
*/
0, dns_rdatatype_none, NULL,
return (DNS_RRL_RESULT_OK);
}
if (rrl_all_result != DNS_RRL_RESULT_OK) {
e = e_all;
make_log_buf(rrl, e,
"prefer all-per-second limiting ",
"%s", log_buf);
}
}
}
if (rrl_result == DNS_RRL_RESULT_OK) {
return (DNS_RRL_RESULT_OK);
}
/*
* Log occassionally in the rate-limit category.
*/
if (!e->logged) {
rrl->last_logged = e;
}
e->log_secs = 0;
/*
* Avoid holding the lock.
*/
if (!wouldlog) {
e = NULL;
}
"%s", log_buf);
}
/*
* Make a log message for the caller.
*/
if (wouldlog)
make_log_buf(rrl, e,
if (e != NULL) {
/*
* Do not save the qname unless we might need it for
* the ending log message.
*/
if (!e->logged)
free_qname(rrl, e);
}
return (rrl_result);
}
void
dns_rrl_block_t *b;
dns_rrl_hash_t *h;
int i;
return;
/*
* Assume the caller takes care of locking the view and anything else.
*/
if (rrl->num_logged > 0)
for (i = 0; i < DNS_RRL_QNAMES; ++i) {
break;
}
}
if (h != NULL)
if (h != NULL)
}
return (ISC_R_NOMEMORY);
if (result != ISC_R_SUCCESS) {
return (result);
}
if (result != ISC_R_SUCCESS) {
return (result);
}
if (result != ISC_R_SUCCESS) {
return (result);
}
return (ISC_R_SUCCESS);
}