From 07ea4d371b5ac4e3daa81f4eff5ff4c05fb1704f Mon Sep 17 00:00:00 2001 From: Daniel Mezzatto Date: Fri, 4 Apr 2014 19:14:55 -0300 Subject: [PATCH 1/2] New option "auto_eject_drop" A boolean value that controls if auto ejected hosts should be dropped from the hash ring. If set to false, failing hosts will immediately reply timeout. Defaults to true. See https://github.com/twitter/twemproxy/issues/213 for more information --- README.md | 1 + src/hashkit/nc_modula.c | 6 +++--- src/nc_conf.c | 12 ++++++++++++ src/nc_conf.h | 2 ++ src/nc_message.c | 8 ++++++++ src/nc_server.c | 18 ++++++++++++++++++ src/nc_server.h | 2 ++ 7 files changed, 46 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 90f47f12..7a08833e 100644 --- a/README.md +++ b/README.md @@ -98,6 +98,7 @@ nutcracker can be configured through a YAML file specified by the -c or --conf-f + **redis**: A boolean value that controls if a server pool speaks redis or memcached protocol. Defaults to false. + **server_connections**: The maximum number of connections that can be opened to each server. By default, we open at most 1 server connection. + **auto_eject_hosts**: A boolean value that controls if server should be ejected temporarily when it fails consecutively server_failure_limit times. See [liveness recommendations](notes/recommendation.md#liveness) for information. Defaults to false. ++ **auto_eject_drop**: A boolean value that controls if auto ejected hosts should be dropped from the hash ring. If set to false, failing hosts will immediately reply timeout. Defaults to true. + **server_retry_timeout**: The timeout value in msec to wait for before retrying on a temporarily ejected server, when auto_eject_host is set to true. Defaults to 30000 msec. + **server_failure_limit**: The number of consecutive failures on a server that would lead to it being temporarily ejected when auto_eject_host is set to true. Defaults to 2. + **servers**: A list of server address, port and weight (name:port:weight or ip:port:weight) for this server pool. diff --git a/src/hashkit/nc_modula.c b/src/hashkit/nc_modula.c index 083f89a9..6edd2d20 100644 --- a/src/hashkit/nc_modula.c +++ b/src/hashkit/nc_modula.c @@ -53,7 +53,7 @@ modula_update(struct server_pool *pool) for (server_index = 0; server_index < nserver; server_index++) { struct server *server = array_get(&pool->server, server_index); - if (pool->auto_eject_hosts) { + if (pool->auto_eject_hosts && pool->auto_eject_drop) { if (server->next_retry <= now) { server->next_retry = 0LL; nlive_server++; @@ -68,7 +68,7 @@ modula_update(struct server_pool *pool) ASSERT(server->weight > 0); /* count weight only for live servers */ - if (!pool->auto_eject_hosts || server->next_retry <= now) { + if (!pool->auto_eject_hosts || !pool->auto_eject_drop || server->next_retry <= now) { total_weight += server->weight; } } @@ -116,7 +116,7 @@ modula_update(struct server_pool *pool) for (server_index = 0; server_index < nserver; server_index++) { struct server *server = array_get(&pool->server, server_index); - if (pool->auto_eject_hosts && server->next_retry > now) { + if (pool->auto_eject_hosts && pool->auto_eject_drop && server->next_retry > now) { continue; } diff --git a/src/nc_conf.c b/src/nc_conf.c index 24bb0ded..332575cb 100644 --- a/src/nc_conf.c +++ b/src/nc_conf.c @@ -82,6 +82,10 @@ static struct command conf_commands[] = { conf_set_bool, offsetof(struct conf_pool, auto_eject_hosts) }, + { string("auto_eject_drop"), + conf_set_bool, + offsetof(struct conf_pool, auto_eject_drop) }, + { string("server_connections"), conf_set_num, offsetof(struct conf_pool, server_connections) }, @@ -154,6 +158,7 @@ conf_server_each_transform(void *elem, void *data) s->next_retry = 0LL; s->failure_count = 0; + s->dead = 0; log_debug(LOG_VERB, "transform to server %"PRIu32" '%.*s'", s->idx, s->pname.len, s->pname.data); @@ -186,6 +191,7 @@ conf_pool_init(struct conf_pool *cp, struct string *name) cp->redis = CONF_UNSET_NUM; cp->preconnect = CONF_UNSET_NUM; cp->auto_eject_hosts = CONF_UNSET_NUM; + cp->auto_eject_drop = CONF_UNSET_NUM; cp->server_connections = CONF_UNSET_NUM; cp->server_retry_timeout = CONF_UNSET_NUM; cp->server_failure_limit = CONF_UNSET_NUM; @@ -277,6 +283,7 @@ conf_pool_each_transform(void *elem, void *data) sp->server_retry_timeout = (int64_t)cp->server_retry_timeout * 1000LL; sp->server_failure_limit = (uint32_t)cp->server_failure_limit; sp->auto_eject_hosts = cp->auto_eject_hosts ? 1 : 0; + sp->auto_eject_drop = cp->auto_eject_drop ? 1 : 0; sp->preconnect = cp->preconnect ? 1 : 0; status = server_init(&sp->server, &cp->server, sp); @@ -322,6 +329,7 @@ conf_dump(struct conf *cf) log_debug(LOG_VVERB, " redis: %d", cp->redis); log_debug(LOG_VVERB, " preconnect: %d", cp->preconnect); log_debug(LOG_VVERB, " auto_eject_hosts: %d", cp->auto_eject_hosts); + log_debug(LOG_VVERB, " auto_eject_drop: %d", cp->auto_eject_drop); log_debug(LOG_VVERB, " server_connections: %d", cp->server_connections); log_debug(LOG_VVERB, " server_retry_timeout: %d", @@ -1219,6 +1227,10 @@ conf_validate_pool(struct conf *cf, struct conf_pool *cp) cp->auto_eject_hosts = CONF_DEFAULT_AUTO_EJECT_HOSTS; } + if (cp->auto_eject_drop == CONF_UNSET_NUM) { + cp->auto_eject_drop = CONF_DEFAULT_AUTO_EJECT_DROP; + } + if (cp->server_connections == CONF_UNSET_NUM) { cp->server_connections = CONF_DEFAULT_SERVER_CONNECTIONS; } else if (cp->server_connections == 0) { diff --git a/src/nc_conf.h b/src/nc_conf.h index 648baae5..a6e6f0f9 100644 --- a/src/nc_conf.h +++ b/src/nc_conf.h @@ -49,6 +49,7 @@ #define CONF_DEFAULT_REDIS false #define CONF_DEFAULT_PRECONNECT false #define CONF_DEFAULT_AUTO_EJECT_HOSTS false +#define CONF_DEFAULT_AUTO_EJECT_DROP true #define CONF_DEFAULT_SERVER_RETRY_TIMEOUT 30 * 1000 /* in msec */ #define CONF_DEFAULT_SERVER_FAILURE_LIMIT 2 #define CONF_DEFAULT_SERVER_CONNECTIONS 1 @@ -83,6 +84,7 @@ struct conf_pool { int redis; /* redis: */ int preconnect; /* preconnect: */ int auto_eject_hosts; /* auto_eject_hosts: */ + int auto_eject_drop; /* auto_eject_drop: */ int server_connections; /* server_connections: */ int server_retry_timeout; /* server_retry_timeout: in msec */ int server_failure_limit; /* server_failure_limit: */ diff --git a/src/nc_message.c b/src/nc_message.c index 654cdf9a..73118506 100644 --- a/src/nc_message.c +++ b/src/nc_message.c @@ -145,6 +145,7 @@ void msg_tmo_insert(struct msg *msg, struct conn *conn) { struct rbnode *node; + struct server *server; int timeout; ASSERT(msg->request); @@ -155,6 +156,13 @@ msg_tmo_insert(struct msg *msg, struct conn *conn) return; } + server = conn->owner; + + /* insert already expired */ + if (server->dead) { + timeout = -1; + } + node = &msg->tmo_rbe; node->key = nc_msec_now() + timeout; node->data = conn; diff --git a/src/nc_server.c b/src/nc_server.c index 40705167..c19ea6c4 100644 --- a/src/nc_server.c +++ b/src/nc_server.c @@ -268,6 +268,11 @@ server_failure(struct context *ctx, struct server *server) return; } + /* avoid processing and changing the stats unnecessarily */ + if (server->dead) { + return; + } + now = nc_usec_now(); if (now < 0) { return; @@ -287,6 +292,11 @@ server_failure(struct context *ctx, struct server *server) server->failure_count = 0; server->next_retry = next; + /* only mark as dead if the config says so */ + if (!pool->auto_eject_drop) { + server->dead = 1; + } + status = server_pool_run(pool); if (status != NC_OK) { log_error("updating pool %"PRIu32" '%.*s' failed: %s", pool->idx, @@ -546,6 +556,7 @@ server_ok(struct context *ctx, struct conn *conn) server->failure_count); server->failure_count = 0; server->next_retry = 0LL; + server->dead = 0; } } @@ -665,6 +676,13 @@ server_pool_conn(struct context *ctx, struct server_pool *pool, uint8_t *key, return NULL; } + /* dead and not yet the time to retry */ + if (server->dead && nc_usec_now() < server->next_retry) { + return NULL; + } + + server->dead = 0; + /* pick a connection to a given server */ conn = server_conn(server); if (conn == NULL) { diff --git a/src/nc_server.h b/src/nc_server.h index dfe16faf..969d39ab 100644 --- a/src/nc_server.h +++ b/src/nc_server.h @@ -83,6 +83,7 @@ struct server { int64_t next_retry; /* next retry time in usec */ uint32_t failure_count; /* # consecutive failures */ + unsigned dead:1; /* server marked as dead */ }; struct server_pool { @@ -117,6 +118,7 @@ struct server_pool { int64_t server_retry_timeout; /* server retry timeout in usec */ uint32_t server_failure_limit; /* server failure limit */ unsigned auto_eject_hosts:1; /* auto_eject_hosts? */ + unsigned auto_eject_drop:1; /* drop auto ejected hosts? */ unsigned preconnect:1; /* preconnect? */ unsigned redis:1; /* redis? */ }; From 5b0ccbce1e1eca192264485656b0bca794ca85e3 Mon Sep 17 00:00:00 2001 From: Daniel Mezzatto Date: Mon, 7 Apr 2014 15:58:09 -0300 Subject: [PATCH 2/2] Code cleanup: - we dont need the timeout insertion code into the red-black tree since a dead server will not reach this code - forcing timeout error when a request is done against a dead server --- src/nc_message.c | 8 -------- src/nc_server.c | 1 + 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/src/nc_message.c b/src/nc_message.c index 73118506..654cdf9a 100644 --- a/src/nc_message.c +++ b/src/nc_message.c @@ -145,7 +145,6 @@ void msg_tmo_insert(struct msg *msg, struct conn *conn) { struct rbnode *node; - struct server *server; int timeout; ASSERT(msg->request); @@ -156,13 +155,6 @@ msg_tmo_insert(struct msg *msg, struct conn *conn) return; } - server = conn->owner; - - /* insert already expired */ - if (server->dead) { - timeout = -1; - } - node = &msg->tmo_rbe; node->key = nc_msec_now() + timeout; node->data = conn; diff --git a/src/nc_server.c b/src/nc_server.c index c19ea6c4..5037cef4 100644 --- a/src/nc_server.c +++ b/src/nc_server.c @@ -678,6 +678,7 @@ server_pool_conn(struct context *ctx, struct server_pool *pool, uint8_t *key, /* dead and not yet the time to retry */ if (server->dead && nc_usec_now() < server->next_retry) { + errno = ETIMEDOUT; return NULL; }