Skip to content

Commit

Permalink
Fix cache update when serve expired is used (#1143)
Browse files Browse the repository at this point in the history
- Fix cache update when serve expired is used in order to not evict
  still usable expired records. Modules are forbidden to update the
  cache if their answer is DNSSEC unchecked or bogus and a valid
  (expired) entry already exists. Bogus replies from the validator are
  also discarded in favor of existing (expired) valid replies.

- serve-expired-ttl-reset should try to keep expired records in the
  cache in case they are reset.
  • Loading branch information
gthess committed Sep 24, 2024
1 parent 24ebca7 commit 2e398d5
Show file tree
Hide file tree
Showing 24 changed files with 1,282 additions and 69 deletions.
2 changes: 0 additions & 2 deletions cachedb/cachedb.c
Original file line number Diff line number Diff line change
Expand Up @@ -828,8 +828,6 @@ cachedb_handle_query(struct module_qstate* qstate,
/* In case we have expired data but there is a client timer for expired
* answers, pass execution to next module in order to try updating the
* data first.
* TODO: this needs revisit. The expired data stored from cachedb has
* 0 TTL which is picked up by iterator later when looking in the cache.
*/
if(qstate->env->cfg->serve_expired && msg_expired) {
qstate->return_msg = NULL;
Expand Down
4 changes: 4 additions & 0 deletions daemon/remote.c
Original file line number Diff line number Diff line change
Expand Up @@ -1953,6 +1953,8 @@ bogus_del_msg(struct lruhash_entry* e, void* arg)
struct reply_info* d = (struct reply_info*)e->data;
if(d->security == sec_status_bogus) {
d->ttl = inf->expired;
d->prefetch_ttl = inf->expired;
d->serve_expired_ttl = inf->expired;
inf->num_msgs++;
#ifdef USE_CACHEDB
if(inf->remcachedb && inf->worker->env.cachedb_enabled)
Expand Down Expand Up @@ -2035,6 +2037,8 @@ negative_del_msg(struct lruhash_entry* e, void* arg)
* or NOERROR rcode with ANCOUNT==0: a NODATA answer */
if(FLAGS_GET_RCODE(d->flags) != 0 || d->an_numrrsets == 0) {
d->ttl = inf->expired;
d->prefetch_ttl = inf->expired;
d->serve_expired_ttl = inf->expired;
inf->num_msgs++;
#ifdef USE_CACHEDB
if(inf->remcachedb && inf->worker->env.cachedb_enabled)
Expand Down
18 changes: 7 additions & 11 deletions daemon/worker.c
Original file line number Diff line number Diff line change
Expand Up @@ -661,22 +661,18 @@ answer_from_cache(struct worker* worker, struct query_info* qinfo,
if(rep->ttl < timenow) {
/* Check if we need to serve expired now */
if(worker->env.cfg->serve_expired &&
!worker->env.cfg->serve_expired_client_timeout
/* if serve-expired-client-timeout is set, serve
* an expired record without attempting recursion
* if the serve_expired_norec_ttl is set for the record
* as we know that recursion is currently failing. */
(!worker->env.cfg->serve_expired_client_timeout ||
timenow < rep->serve_expired_norec_ttl)
#ifdef USE_CACHEDB
&& !(worker->env.cachedb_enabled &&
worker->env.cfg->cachedb_check_when_serve_expired)
#endif
) {
if(worker->env.cfg->serve_expired_ttl &&
rep->serve_expired_ttl < timenow)
return 0;
/* Ignore expired failure answers */
if(FLAGS_GET_RCODE(rep->flags) !=
LDNS_RCODE_NOERROR &&
FLAGS_GET_RCODE(rep->flags) !=
LDNS_RCODE_NXDOMAIN &&
FLAGS_GET_RCODE(rep->flags) !=
LDNS_RCODE_YXDOMAIN)
if(!reply_info_can_answer_expired(rep, timenow))
return 0;
if(!rrset_array_lock(rep->ref, rep->rrset_count, 0))
return 0;
Expand Down
1 change: 1 addition & 0 deletions dns64/dns64.c
Original file line number Diff line number Diff line change
Expand Up @@ -847,6 +847,7 @@ dns64_adjust_a(int id, struct module_qstate* super, struct module_qstate* qstate
*/
cp = construct_reply_info_base(super->region, rep->flags, rep->qdcount,
rep->ttl, rep->prefetch_ttl, rep->serve_expired_ttl,
rep->serve_expired_norec_ttl,
rep->an_numrrsets, rep->ns_numrrsets, rep->ar_numrrsets,
rep->rrset_count, rep->security, LDNS_EDE_NONE);
if(!cp)
Expand Down
23 changes: 15 additions & 8 deletions iterator/iterator.c
Original file line number Diff line number Diff line change
Expand Up @@ -322,16 +322,21 @@ error_response_cache(struct module_qstate* qstate, int id, int rcode)
qstate->qinfo.qname, qstate->qinfo.qname_len,
qstate->qinfo.qtype, qstate->qinfo.qclass,
qstate->query_flags, 0,
qstate->env->cfg->serve_expired_ttl_reset)) != NULL) {
qstate->env->cfg->serve_expired)) != NULL) {
struct reply_info* rep = (struct reply_info*)msg->entry.data;
if(qstate->env->cfg->serve_expired &&
qstate->env->cfg->serve_expired_ttl_reset && rep &&
*qstate->env->now + qstate->env->cfg->serve_expired_ttl
> rep->serve_expired_ttl) {
verbose(VERB_ALGO, "reset serve-expired-ttl for "
if(qstate->env->cfg->serve_expired && rep) {
if(qstate->env->cfg->serve_expired_ttl_reset &&
*qstate->env->now + qstate->env->cfg->serve_expired_ttl
> rep->serve_expired_ttl) {
verbose(VERB_ALGO, "reset serve-expired-ttl for "
"response in cache");
rep->serve_expired_ttl = *qstate->env->now +
qstate->env->cfg->serve_expired_ttl;
}
verbose(VERB_ALGO, "set serve-expired-norec-ttl for "
"response in cache");
rep->serve_expired_ttl = *qstate->env->now +
qstate->env->cfg->serve_expired_ttl;
rep->serve_expired_norec_ttl = NORR_TTL +
*qstate->env->now;
}
if(rep && (FLAGS_GET_RCODE(rep->flags) ==
LDNS_RCODE_NOERROR ||
Expand Down Expand Up @@ -4046,6 +4051,8 @@ processClassResponse(struct module_qstate* qstate, int id,
to->rep->prefetch_ttl = from->rep->prefetch_ttl;
if(from->rep->serve_expired_ttl < to->rep->serve_expired_ttl)
to->rep->serve_expired_ttl = from->rep->serve_expired_ttl;
if(from->rep->serve_expired_norec_ttl < to->rep->serve_expired_norec_ttl)
to->rep->serve_expired_norec_ttl = from->rep->serve_expired_norec_ttl;
}
/* are we done? */
foriq->num_current_queries --;
Expand Down
51 changes: 35 additions & 16 deletions services/cache/dns.c
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ dns_cache_store_msg(struct module_env* env, struct query_info* qinfo,
size_t i;

/* store RRsets */
for(i=0; i<rep->rrset_count; i++) {
for(i=0; i<rep->rrset_count; i++) {
rep->ref[i].key = rep->rrsets[i];
rep->ref[i].id = rep->rrsets[i]->id;
}
Expand Down Expand Up @@ -197,6 +197,7 @@ dns_cache_store_msg(struct module_env* env, struct query_info* qinfo,
reply_info_sortref(rep);
if(!(e = query_info_entrysetup(qinfo, rep, hash))) {
log_err("store_msg: malloc failed");
reply_info_delete(rep, NULL);
return;
}
slabhash_insert(env->msg_cache, hash, &e->entry, rep, env->alloc);
Expand Down Expand Up @@ -607,22 +608,8 @@ tomsg(struct module_env* env, struct query_info* q, struct reply_info* r,
time_t now_control = now;
if(now > r->ttl) {
/* Check if we are allowed to serve expired */
if(allow_expired) {
if(env->cfg->serve_expired_ttl &&
r->serve_expired_ttl < now) {
return NULL;
}
/* Ignore expired failure answers */
if(FLAGS_GET_RCODE(r->flags) !=
LDNS_RCODE_NOERROR &&
FLAGS_GET_RCODE(r->flags) !=
LDNS_RCODE_NXDOMAIN &&
FLAGS_GET_RCODE(r->flags) !=
LDNS_RCODE_YXDOMAIN)
return 0;
} else {
if(!allow_expired || !reply_info_can_answer_expired(r, now))
return NULL;
}
/* Change the current time so we can pass the below TTL checks when
* serving expired data. */
now_control = r->ttl - env->cfg->serve_expired_reply_ttl;
Expand All @@ -641,6 +628,7 @@ tomsg(struct module_env* env, struct query_info* q, struct reply_info* r,
else
msg->rep->prefetch_ttl = PREFETCH_TTL_CALC(msg->rep->ttl);
msg->rep->serve_expired_ttl = msg->rep->ttl + SERVE_EXPIRED_TTL;
msg->rep->serve_expired_norec_ttl = 0;
msg->rep->security = r->security;
msg->rep->an_numrrsets = r->an_numrrsets;
msg->rep->ns_numrrsets = r->ns_numrrsets;
Expand Down Expand Up @@ -724,6 +712,7 @@ rrset_msg(struct ub_packed_rrset_key* rrset, struct regional* region,
msg->rep->ttl = d->ttl - now;
msg->rep->prefetch_ttl = PREFETCH_TTL_CALC(msg->rep->ttl);
msg->rep->serve_expired_ttl = msg->rep->ttl + SERVE_EXPIRED_TTL;
msg->rep->serve_expired_norec_ttl = 0;
msg->rep->security = sec_status_unchecked;
msg->rep->an_numrrsets = 1;
msg->rep->ns_numrrsets = 0;
Expand Down Expand Up @@ -763,6 +752,7 @@ synth_dname_msg(struct ub_packed_rrset_key* rrset, struct regional* region,
msg->rep->ttl = d->ttl - now;
msg->rep->prefetch_ttl = PREFETCH_TTL_CALC(msg->rep->ttl);
msg->rep->serve_expired_ttl = msg->rep->ttl + SERVE_EXPIRED_TTL;
msg->rep->serve_expired_norec_ttl = 0;
msg->rep->security = sec_status_unchecked;
msg->rep->an_numrrsets = 1;
msg->rep->ns_numrrsets = 0;
Expand Down Expand Up @@ -1070,6 +1060,35 @@ dns_cache_store(struct module_env* env, struct query_info* msgqinf,
struct regional* region, uint32_t flags, time_t qstarttime)
{
struct reply_info* rep = NULL;
if(SERVE_EXPIRED) {
/* We are serving expired records. Before caching, check if a
* useful expired record exists. */
struct msgreply_entry* e = msg_cache_lookup(env,
msgqinf->qname, msgqinf->qname_len, msgqinf->qtype,
msgqinf->qclass, flags, 0, 0);
if(e) {
struct reply_info* cached = e->entry.data;
if(cached->ttl < *env->now
&& reply_info_could_use_expired(cached, *env->now)
/* If we are validating make sure only
* validating modules can update such messages.
* In that case don't cache it and let a
* subsequent module handle the caching. For
* example, the iterator should not replace an
* expired secure answer with a fresh unchecked
* one and let the validator manage caching. */
&& cached->security != sec_status_bogus
&& (env->need_to_validate &&
msgrep->security == sec_status_unchecked)) {
verbose(VERB_ALGO, "a validated expired entry "
"could be overwritten, skip caching "
"the new message at this stage");
lock_rw_unlock(&e->entry.lock);
return 1;
}
lock_rw_unlock(&e->entry.lock);
}
}
/* alloc, malloc properly (not in region, like msg is) */
rep = reply_info_copy(msgrep, env->alloc, NULL);
if(!rep)
Expand Down
10 changes: 5 additions & 5 deletions services/cache/rrset.c
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,8 @@ need_to_update_rrset(void* nd, void* cd, time_t timenow, int equal, int ns)
{
struct packed_rrset_data* newd = (struct packed_rrset_data*)nd;
struct packed_rrset_data* cached = (struct packed_rrset_data*)cd;
/* o if new data is expired, current data is better */
if( newd->ttl < timenow && cached->ttl >= timenow)
/* o if new data is expired, cached data is better */
if( newd->ttl < timenow && timenow <= cached->ttl)
return 0;
/* o store if rrset has been validated
* everything better than bogus data
Expand All @@ -140,9 +140,9 @@ need_to_update_rrset(void* nd, void* cd, time_t timenow, int equal, int ns)
if( cached->security == sec_status_bogus &&
newd->security != sec_status_bogus && !equal)
return 1;
/* o if current RRset is more trustworthy - insert it */
/* o if new RRset is more trustworthy - insert it */
if( newd->trust > cached->trust ) {
/* if the cached rrset is bogus, and this one equal,
/* if the cached rrset is bogus, and new is equal,
* do not update the TTL - let it expire. */
if(equal && cached->ttl >= timenow &&
cached->security == sec_status_bogus)
Expand All @@ -155,7 +155,7 @@ need_to_update_rrset(void* nd, void* cd, time_t timenow, int equal, int ns)
/* o same trust, but different in data - insert it */
if( newd->trust == cached->trust && !equal ) {
/* if this is type NS, do not 'stick' to owner that changes
* the NS RRset, but use the old TTL for the new data, and
* the NS RRset, but use the cached TTL for the new data, and
* update to fetch the latest data. ttl is not expired, because
* that check was before this one. */
if(ns) {
Expand Down
13 changes: 9 additions & 4 deletions services/mesh.c
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,7 @@ int mesh_make_new_space(struct mesh_area* mesh, sldns_buffer* qbuf)

struct dns_msg*
mesh_serve_expired_lookup(struct module_qstate* qstate,
struct query_info* lookup_qinfo)
struct query_info* lookup_qinfo, int* is_expired)
{
hashvalue_type h;
struct lruhash_entry* e;
Expand All @@ -321,13 +321,15 @@ mesh_serve_expired_lookup(struct module_qstate* qstate,
time_t timenow = *qstate->env->now;
int must_validate = (!(qstate->query_flags&BIT_CD)
|| qstate->env->cfg->ignore_cd) && qstate->env->need_to_validate;
*is_expired = 0;
/* Lookup cache */
h = query_info_hash(lookup_qinfo, qstate->query_flags);
e = slabhash_lookup(qstate->env->msg_cache, h, lookup_qinfo, 0);
if(!e) return NULL;

key = (struct msgreply_entry*)e->key;
data = (struct reply_info*)e->data;
if(data->ttl < timenow) *is_expired = 1;
msg = tomsg(qstate->env, &key->key, data, qstate->region, timenow,
qstate->env->cfg->serve_expired, qstate->env->scratch);
if(!msg)
Expand Down Expand Up @@ -2176,6 +2178,7 @@ mesh_serve_expired_callback(void* arg)
int must_validate = (!(qstate->query_flags&BIT_CD)
|| qstate->env->cfg->ignore_cd) && qstate->env->need_to_validate;
int i = 0;
int is_expired;
if(!qstate->serve_expired_data) return;
verbose(VERB_ALGO, "Serve expired: Trying to reply with expired data");
comm_timer_delete(qstate->serve_expired_data->timer);
Expand All @@ -2193,7 +2196,7 @@ mesh_serve_expired_callback(void* arg)
fptr_ok(fptr_whitelist_serve_expired_lookup(
qstate->serve_expired_data->get_cached_answer));
msg = (*qstate->serve_expired_data->get_cached_answer)(qstate,
lookup_qinfo);
lookup_qinfo, &is_expired);
if(!msg)
return;
/* Reset these in case we pass a second time from here. */
Expand Down Expand Up @@ -2285,8 +2288,10 @@ mesh_serve_expired_callback(void* arg)

/* Add EDE Stale Answer (RCF8914). Ignore global ede as this is
* warning instead of an error */
if (r->edns.edns_present && qstate->env->cfg->ede_serve_expired &&
qstate->env->cfg->ede) {
if(r->edns.edns_present &&
qstate->env->cfg->ede_serve_expired &&
qstate->env->cfg->ede &&
is_expired) {
edns_opt_list_append_ede(&r->edns.opt_list_out,
mstate->s.region, LDNS_EDE_STALE_ANSWER, NULL);
}
Expand Down
3 changes: 2 additions & 1 deletion services/mesh.h
Original file line number Diff line number Diff line change
Expand Up @@ -673,11 +673,12 @@ void mesh_serve_expired_callback(void* arg);
* the same behavior as when replying from cache.
* @param qstate: the module qstate.
* @param lookup_qinfo: the query info to look for in the cache.
* @param is_expired: set if the cached answer is expired.
* @return dns_msg if a cached answer was found, otherwise NULL.
*/
struct dns_msg*
mesh_serve_expired_lookup(struct module_qstate* qstate,
struct query_info* lookup_qinfo);
struct query_info* lookup_qinfo, int* is_expired);

/**
* See if the mesh has space for more queries. You can allocate queries
Expand Down
4 changes: 4 additions & 0 deletions services/rpz.c
Original file line number Diff line number Diff line change
Expand Up @@ -1969,6 +1969,7 @@ rpz_synthesize_nodata(struct rpz* ATTR_UNUSED(r), struct module_qstate* ms,
0, /* ttl */
0, /* prettl */
0, /* expttl */
0, /* norecttl */
0, /* an */
0, /* ns */
0, /* ar */
Expand Down Expand Up @@ -1999,6 +2000,7 @@ rpz_synthesize_nxdomain(struct rpz* r, struct module_qstate* ms,
0, /* ttl */
0, /* prettl */
0, /* expttl */
0, /* norecttl */
0, /* an */
0, /* ns */
0, /* ar */
Expand Down Expand Up @@ -2031,6 +2033,7 @@ rpz_synthesize_localdata_from_rrset(struct rpz* ATTR_UNUSED(r), struct module_qs
0, /* ttl */
0, /* prettl */
0, /* expttl */
0, /* norecttl */
1, /* an */
0, /* ns */
0, /* ar */
Expand Down Expand Up @@ -2176,6 +2179,7 @@ rpz_synthesize_cname_override_msg(struct rpz* r, struct module_qstate* ms,
0, /* ttl */
0, /* prettl */
0, /* expttl */
0, /* norecttl */
1, /* an */
0, /* ns */
0, /* ar */
Expand Down
2 changes: 1 addition & 1 deletion testcode/unitmain.c
Original file line number Diff line number Diff line change
Expand Up @@ -1232,7 +1232,7 @@ static void edns_ede_answer_encode_test(void)
unit_assert(region);
rep = construct_reply_info_base(region,
LDNS_RCODE_NOERROR | BIT_QR, 1,
3600, 3600, 3600,
3600, 3600, 3600, 0,
0, 0, 0, 0,
sec_status_unchecked, LDNS_EDE_NONE);
unit_assert(rep);
Expand Down
2 changes: 1 addition & 1 deletion testdata/serve_expired_cached_servfail.rpl
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ RANGE_BEGIN 0 20
RANGE_END

; ns.example.com.
RANGE_BEGIN 30 100
RANGE_BEGIN 40 100
ADDRESS 1.2.3.4
ENTRY_BEGIN
MATCH opcode qtype qname
Expand Down
2 changes: 1 addition & 1 deletion testdata/serve_expired_cached_servfail_refresh.rpl
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ stub-zone:
stub-addr: 1.2.3.4
CONFIG_END

SCENARIO_BEGIN Test serve-expired with client-timeout and a SERVFAIL upstream reply
SCENARIO_BEGIN Test serve-expired without client-timeout and a SERVFAIL upstream reply
; Scenario overview:
; - query for example.com. IN A
; - answer from upstream is SERVFAIL; will be cached for NORR_TTL(5)
Expand Down
Loading

0 comments on commit 2e398d5

Please sign in to comment.