Skip to content

Commit

Permalink
New option "auto_eject_drop"
Browse files Browse the repository at this point in the history
A boolean value that controls if auto ejected hosts should be dropped from the hash ring. If set to false, failing hosts will immediately reply timeout. Defaults to true.

See twitter#213 for more information
  • Loading branch information
Daniel Mezzatto committed Apr 4, 2014
1 parent 8a4f5c0 commit 07ea4d3
Show file tree
Hide file tree
Showing 7 changed files with 46 additions and 3 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ nutcracker can be configured through a YAML file specified by the -c or --conf-f
+ **redis**: A boolean value that controls if a server pool speaks redis or memcached protocol. Defaults to false.
+ **server_connections**: The maximum number of connections that can be opened to each server. By default, we open at most 1 server connection.
+ **auto_eject_hosts**: A boolean value that controls if server should be ejected temporarily when it fails consecutively server_failure_limit times. See [liveness recommendations](notes/recommendation.md#liveness) for information. Defaults to false.
+ **auto_eject_drop**: A boolean value that controls if auto ejected hosts should be dropped from the hash ring. If set to false, failing hosts will immediately reply timeout. Defaults to true.
+ **server_retry_timeout**: The timeout value in msec to wait for before retrying on a temporarily ejected server, when auto_eject_host is set to true. Defaults to 30000 msec.
+ **server_failure_limit**: The number of consecutive failures on a server that would lead to it being temporarily ejected when auto_eject_host is set to true. Defaults to 2.
+ **servers**: A list of server address, port and weight (name:port:weight or ip:port:weight) for this server pool.
Expand Down
6 changes: 3 additions & 3 deletions src/hashkit/nc_modula.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ modula_update(struct server_pool *pool)
for (server_index = 0; server_index < nserver; server_index++) {
struct server *server = array_get(&pool->server, server_index);

if (pool->auto_eject_hosts) {
if (pool->auto_eject_hosts && pool->auto_eject_drop) {
if (server->next_retry <= now) {
server->next_retry = 0LL;
nlive_server++;
Expand All @@ -68,7 +68,7 @@ modula_update(struct server_pool *pool)
ASSERT(server->weight > 0);

/* count weight only for live servers */
if (!pool->auto_eject_hosts || server->next_retry <= now) {
if (!pool->auto_eject_hosts || !pool->auto_eject_drop || server->next_retry <= now) {
total_weight += server->weight;
}
}
Expand Down Expand Up @@ -116,7 +116,7 @@ modula_update(struct server_pool *pool)
for (server_index = 0; server_index < nserver; server_index++) {
struct server *server = array_get(&pool->server, server_index);

if (pool->auto_eject_hosts && server->next_retry > now) {
if (pool->auto_eject_hosts && pool->auto_eject_drop && server->next_retry > now) {
continue;
}

Expand Down
12 changes: 12 additions & 0 deletions src/nc_conf.c
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,10 @@ static struct command conf_commands[] = {
conf_set_bool,
offsetof(struct conf_pool, auto_eject_hosts) },

{ string("auto_eject_drop"),
conf_set_bool,
offsetof(struct conf_pool, auto_eject_drop) },

{ string("server_connections"),
conf_set_num,
offsetof(struct conf_pool, server_connections) },
Expand Down Expand Up @@ -154,6 +158,7 @@ conf_server_each_transform(void *elem, void *data)

s->next_retry = 0LL;
s->failure_count = 0;
s->dead = 0;

log_debug(LOG_VERB, "transform to server %"PRIu32" '%.*s'",
s->idx, s->pname.len, s->pname.data);
Expand Down Expand Up @@ -186,6 +191,7 @@ conf_pool_init(struct conf_pool *cp, struct string *name)
cp->redis = CONF_UNSET_NUM;
cp->preconnect = CONF_UNSET_NUM;
cp->auto_eject_hosts = CONF_UNSET_NUM;
cp->auto_eject_drop = CONF_UNSET_NUM;
cp->server_connections = CONF_UNSET_NUM;
cp->server_retry_timeout = CONF_UNSET_NUM;
cp->server_failure_limit = CONF_UNSET_NUM;
Expand Down Expand Up @@ -277,6 +283,7 @@ conf_pool_each_transform(void *elem, void *data)
sp->server_retry_timeout = (int64_t)cp->server_retry_timeout * 1000LL;
sp->server_failure_limit = (uint32_t)cp->server_failure_limit;
sp->auto_eject_hosts = cp->auto_eject_hosts ? 1 : 0;
sp->auto_eject_drop = cp->auto_eject_drop ? 1 : 0;
sp->preconnect = cp->preconnect ? 1 : 0;

status = server_init(&sp->server, &cp->server, sp);
Expand Down Expand Up @@ -322,6 +329,7 @@ conf_dump(struct conf *cf)
log_debug(LOG_VVERB, " redis: %d", cp->redis);
log_debug(LOG_VVERB, " preconnect: %d", cp->preconnect);
log_debug(LOG_VVERB, " auto_eject_hosts: %d", cp->auto_eject_hosts);
log_debug(LOG_VVERB, " auto_eject_drop: %d", cp->auto_eject_drop);
log_debug(LOG_VVERB, " server_connections: %d",
cp->server_connections);
log_debug(LOG_VVERB, " server_retry_timeout: %d",
Expand Down Expand Up @@ -1219,6 +1227,10 @@ conf_validate_pool(struct conf *cf, struct conf_pool *cp)
cp->auto_eject_hosts = CONF_DEFAULT_AUTO_EJECT_HOSTS;
}

if (cp->auto_eject_drop == CONF_UNSET_NUM) {
cp->auto_eject_drop = CONF_DEFAULT_AUTO_EJECT_DROP;
}

if (cp->server_connections == CONF_UNSET_NUM) {
cp->server_connections = CONF_DEFAULT_SERVER_CONNECTIONS;
} else if (cp->server_connections == 0) {
Expand Down
2 changes: 2 additions & 0 deletions src/nc_conf.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
#define CONF_DEFAULT_REDIS false
#define CONF_DEFAULT_PRECONNECT false
#define CONF_DEFAULT_AUTO_EJECT_HOSTS false
#define CONF_DEFAULT_AUTO_EJECT_DROP true
#define CONF_DEFAULT_SERVER_RETRY_TIMEOUT 30 * 1000 /* in msec */
#define CONF_DEFAULT_SERVER_FAILURE_LIMIT 2
#define CONF_DEFAULT_SERVER_CONNECTIONS 1
Expand Down Expand Up @@ -83,6 +84,7 @@ struct conf_pool {
int redis; /* redis: */
int preconnect; /* preconnect: */
int auto_eject_hosts; /* auto_eject_hosts: */
int auto_eject_drop; /* auto_eject_drop: */
int server_connections; /* server_connections: */
int server_retry_timeout; /* server_retry_timeout: in msec */
int server_failure_limit; /* server_failure_limit: */
Expand Down
8 changes: 8 additions & 0 deletions src/nc_message.c
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ void
msg_tmo_insert(struct msg *msg, struct conn *conn)
{
struct rbnode *node;
struct server *server;
int timeout;

ASSERT(msg->request);
Expand All @@ -155,6 +156,13 @@ msg_tmo_insert(struct msg *msg, struct conn *conn)
return;
}

server = conn->owner;

/* insert already expired */
if (server->dead) {
timeout = -1;
}

node = &msg->tmo_rbe;
node->key = nc_msec_now() + timeout;
node->data = conn;
Expand Down
18 changes: 18 additions & 0 deletions src/nc_server.c
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,11 @@ server_failure(struct context *ctx, struct server *server)
return;
}

/* avoid processing and changing the stats unnecessarily */
if (server->dead) {
return;
}

now = nc_usec_now();
if (now < 0) {
return;
Expand All @@ -287,6 +292,11 @@ server_failure(struct context *ctx, struct server *server)
server->failure_count = 0;
server->next_retry = next;

/* only mark as dead if the config says so */
if (!pool->auto_eject_drop) {
server->dead = 1;
}

status = server_pool_run(pool);
if (status != NC_OK) {
log_error("updating pool %"PRIu32" '%.*s' failed: %s", pool->idx,
Expand Down Expand Up @@ -546,6 +556,7 @@ server_ok(struct context *ctx, struct conn *conn)
server->failure_count);
server->failure_count = 0;
server->next_retry = 0LL;
server->dead = 0;
}
}

Expand Down Expand Up @@ -665,6 +676,13 @@ server_pool_conn(struct context *ctx, struct server_pool *pool, uint8_t *key,
return NULL;
}

/* dead and not yet the time to retry */
if (server->dead && nc_usec_now() < server->next_retry) {
return NULL;
}

server->dead = 0;

/* pick a connection to a given server */
conn = server_conn(server);
if (conn == NULL) {
Expand Down
2 changes: 2 additions & 0 deletions src/nc_server.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ struct server {

int64_t next_retry; /* next retry time in usec */
uint32_t failure_count; /* # consecutive failures */
unsigned dead:1; /* server marked as dead */
};

struct server_pool {
Expand Down Expand Up @@ -117,6 +118,7 @@ struct server_pool {
int64_t server_retry_timeout; /* server retry timeout in usec */
uint32_t server_failure_limit; /* server failure limit */
unsigned auto_eject_hosts:1; /* auto_eject_hosts? */
unsigned auto_eject_drop:1; /* drop auto ejected hosts? */
unsigned preconnect:1; /* preconnect? */
unsigned redis:1; /* redis? */
};
Expand Down

0 comments on commit 07ea4d3

Please sign in to comment.