Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New option "auto_eject_drop" #214

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ nutcracker can be configured through a YAML file specified by the -c or --conf-f
+ **redis**: A boolean value that controls if a server pool speaks redis or memcached protocol. Defaults to false.
+ **server_connections**: The maximum number of connections that can be opened to each server. By default, we open at most 1 server connection.
+ **auto_eject_hosts**: A boolean value that controls if server should be ejected temporarily when it fails consecutively server_failure_limit times. See [liveness recommendations](notes/recommendation.md#liveness) for information. Defaults to false.
+ **auto_eject_drop**: A boolean value that controls if auto ejected hosts should be dropped from the hash ring. If set to false, failing hosts will immediately reply timeout. Defaults to true.
+ **server_retry_timeout**: The timeout value in msec to wait for before retrying on a temporarily ejected server, when auto_eject_host is set to true. Defaults to 30000 msec.
+ **server_failure_limit**: The number of consecutive failures on a server that would lead to it being temporarily ejected when auto_eject_host is set to true. Defaults to 2.
+ **servers**: A list of server address, port and weight (name:port:weight or ip:port:weight) for this server pool.
Expand Down
6 changes: 3 additions & 3 deletions src/hashkit/nc_modula.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ modula_update(struct server_pool *pool)
for (server_index = 0; server_index < nserver; server_index++) {
struct server *server = array_get(&pool->server, server_index);

if (pool->auto_eject_hosts) {
if (pool->auto_eject_hosts && pool->auto_eject_drop) {
if (server->next_retry <= now) {
server->next_retry = 0LL;
nlive_server++;
Expand All @@ -68,7 +68,7 @@ modula_update(struct server_pool *pool)
ASSERT(server->weight > 0);

/* count weight only for live servers */
if (!pool->auto_eject_hosts || server->next_retry <= now) {
if (!pool->auto_eject_hosts || !pool->auto_eject_drop || server->next_retry <= now) {
total_weight += server->weight;
}
}
Expand Down Expand Up @@ -116,7 +116,7 @@ modula_update(struct server_pool *pool)
for (server_index = 0; server_index < nserver; server_index++) {
struct server *server = array_get(&pool->server, server_index);

if (pool->auto_eject_hosts && server->next_retry > now) {
if (pool->auto_eject_hosts && pool->auto_eject_drop && server->next_retry > now) {
continue;
}

Expand Down
12 changes: 12 additions & 0 deletions src/nc_conf.c
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,10 @@ static struct command conf_commands[] = {
conf_set_bool,
offsetof(struct conf_pool, auto_eject_hosts) },

{ string("auto_eject_drop"),
conf_set_bool,
offsetof(struct conf_pool, auto_eject_drop) },

{ string("server_connections"),
conf_set_num,
offsetof(struct conf_pool, server_connections) },
Expand Down Expand Up @@ -154,6 +158,7 @@ conf_server_each_transform(void *elem, void *data)

s->next_retry = 0LL;
s->failure_count = 0;
s->dead = 0;

log_debug(LOG_VERB, "transform to server %"PRIu32" '%.*s'",
s->idx, s->pname.len, s->pname.data);
Expand Down Expand Up @@ -186,6 +191,7 @@ conf_pool_init(struct conf_pool *cp, struct string *name)
cp->redis = CONF_UNSET_NUM;
cp->preconnect = CONF_UNSET_NUM;
cp->auto_eject_hosts = CONF_UNSET_NUM;
cp->auto_eject_drop = CONF_UNSET_NUM;
cp->server_connections = CONF_UNSET_NUM;
cp->server_retry_timeout = CONF_UNSET_NUM;
cp->server_failure_limit = CONF_UNSET_NUM;
Expand Down Expand Up @@ -277,6 +283,7 @@ conf_pool_each_transform(void *elem, void *data)
sp->server_retry_timeout = (int64_t)cp->server_retry_timeout * 1000LL;
sp->server_failure_limit = (uint32_t)cp->server_failure_limit;
sp->auto_eject_hosts = cp->auto_eject_hosts ? 1 : 0;
sp->auto_eject_drop = cp->auto_eject_drop ? 1 : 0;
sp->preconnect = cp->preconnect ? 1 : 0;

status = server_init(&sp->server, &cp->server, sp);
Expand Down Expand Up @@ -322,6 +329,7 @@ conf_dump(struct conf *cf)
log_debug(LOG_VVERB, " redis: %d", cp->redis);
log_debug(LOG_VVERB, " preconnect: %d", cp->preconnect);
log_debug(LOG_VVERB, " auto_eject_hosts: %d", cp->auto_eject_hosts);
log_debug(LOG_VVERB, " auto_eject_drop: %d", cp->auto_eject_drop);
log_debug(LOG_VVERB, " server_connections: %d",
cp->server_connections);
log_debug(LOG_VVERB, " server_retry_timeout: %d",
Expand Down Expand Up @@ -1219,6 +1227,10 @@ conf_validate_pool(struct conf *cf, struct conf_pool *cp)
cp->auto_eject_hosts = CONF_DEFAULT_AUTO_EJECT_HOSTS;
}

if (cp->auto_eject_drop == CONF_UNSET_NUM) {
cp->auto_eject_drop = CONF_DEFAULT_AUTO_EJECT_DROP;
}

if (cp->server_connections == CONF_UNSET_NUM) {
cp->server_connections = CONF_DEFAULT_SERVER_CONNECTIONS;
} else if (cp->server_connections == 0) {
Expand Down
2 changes: 2 additions & 0 deletions src/nc_conf.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
#define CONF_DEFAULT_REDIS false
#define CONF_DEFAULT_PRECONNECT false
#define CONF_DEFAULT_AUTO_EJECT_HOSTS false
#define CONF_DEFAULT_AUTO_EJECT_DROP true
#define CONF_DEFAULT_SERVER_RETRY_TIMEOUT 30 * 1000 /* in msec */
#define CONF_DEFAULT_SERVER_FAILURE_LIMIT 2
#define CONF_DEFAULT_SERVER_CONNECTIONS 1
Expand Down Expand Up @@ -83,6 +84,7 @@ struct conf_pool {
int redis; /* redis: */
int preconnect; /* preconnect: */
int auto_eject_hosts; /* auto_eject_hosts: */
int auto_eject_drop; /* auto_eject_drop: */
int server_connections; /* server_connections: */
int server_retry_timeout; /* server_retry_timeout: in msec */
int server_failure_limit; /* server_failure_limit: */
Expand Down
19 changes: 19 additions & 0 deletions src/nc_server.c
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,11 @@ server_failure(struct context *ctx, struct server *server)
return;
}

/* avoid processing and changing the stats unnecessarily */
if (server->dead) {
return;
}

now = nc_usec_now();
if (now < 0) {
return;
Expand All @@ -287,6 +292,11 @@ server_failure(struct context *ctx, struct server *server)
server->failure_count = 0;
server->next_retry = next;

/* only mark as dead if the config says so */
if (!pool->auto_eject_drop) {
server->dead = 1;
}

status = server_pool_run(pool);
if (status != NC_OK) {
log_error("updating pool %"PRIu32" '%.*s' failed: %s", pool->idx,
Expand Down Expand Up @@ -546,6 +556,7 @@ server_ok(struct context *ctx, struct conn *conn)
server->failure_count);
server->failure_count = 0;
server->next_retry = 0LL;
server->dead = 0;
}
}

Expand Down Expand Up @@ -665,6 +676,14 @@ server_pool_conn(struct context *ctx, struct server_pool *pool, uint8_t *key,
return NULL;
}

/* dead and not yet the time to retry */
if (server->dead && nc_usec_now() < server->next_retry) {
errno = ETIMEDOUT;
return NULL;
}

server->dead = 0;

/* pick a connection to a given server */
conn = server_conn(server);
if (conn == NULL) {
Expand Down
2 changes: 2 additions & 0 deletions src/nc_server.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ struct server {

int64_t next_retry; /* next retry time in usec */
uint32_t failure_count; /* # consecutive failures */
unsigned dead:1; /* server marked as dead */
};

struct server_pool {
Expand Down Expand Up @@ -117,6 +118,7 @@ struct server_pool {
int64_t server_retry_timeout; /* server retry timeout in usec */
uint32_t server_failure_limit; /* server failure limit */
unsigned auto_eject_hosts:1; /* auto_eject_hosts? */
unsigned auto_eject_drop:1; /* drop auto ejected hosts? */
unsigned preconnect:1; /* preconnect? */
unsigned redis:1; /* redis? */
};
Expand Down