Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add healthcheck support for http and smtp #8

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 11 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,18 +1,26 @@
FROM python:3-alpine
ENTRYPOINT ["dumb-init", "--"]
CMD ["proxy"]
RUN apk add --no-cache -t .build build-base &&\
HEALTHCHECK CMD ["healthcheck"]
RUN apk add --no-cache -t .build build-base curl-dev &&\
apk add --no-cache socat &&\
pip install --no-cache-dir dnspython dumb-init &&\
apk add --no-cache libcurl &&\
pip install --no-cache-dir dnspython dumb-init pycurl &&\
apk del .build
ENV NAMESERVERS="208.67.222.222 8.8.8.8 208.67.220.220 8.8.4.4" \
PORT="80 443" \
PRE_RESOLVE=0 \
MODE=tcp \
VERBOSE=0 \
MAX_CONNECTIONS=100 \
UDP_ANSWERS=1
UDP_ANSWERS=1 \
HTTP_HEALTHCHECK=0\
HTTP_HEALTHCHECK_URL="http://\$TARGET/"\
SMTP_HEALTHCHECK=0\
SMTP_HEALTHCHECK_URL="smtp://\$TARGET/"\
SMTP_HEALTHCHECK_COMMAND="HELP"
COPY proxy.py /usr/local/bin/proxy
COPY healthcheck.py /usr/local/bin/healthcheck

# Labels
ARG BUILD_DATE
Expand Down
63 changes: 63 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,36 @@ Use these environment variables:

Required. It's the host name where the incoming connections will be redirected to.

### `HTTP_HEALTHCHECK`

Default: `0`

Set to `1` to enable healthcheck with pycurl http requests. This is useful if the target
uses a deployment where the ip of the service gets changed frequently (e.g.
`accounts.google.com`) and you are using [`PRE_RESOLVE`](#pre_resolve)

#### Automatically restarting unhealthy proxies

When you enable the http healthcheck the container marks itself as unhealthy but does
nothing. (see https://github.com/moby/moby/pull/22719)

If you want to restart your proxies automatically, you can use
https://github.com/willfarrell/docker-autoheal.

### `HTTP_HEALTHCHECK_URL`

Default: `http://$TARGET/`

Url to use in [`HTTP_HEALTHCHECK`](#http_healthcheck) if enabled. `$TARGET` gets
replaced inside the url by the configured [`TARGET`](#target).

### `HTTP_HEALTHCHECK_TIMEOUT_MS`

Default: `2000`

Timeout in milliseconds for http healthcheck. This is used as a timeout for connecting
and receiving an answer. You may end up with twice the time spend.

### `MODE`

Default: `tcp`
Expand Down Expand Up @@ -94,6 +124,39 @@ Set to `1` to force using the specified [nameservers](#nameservers) to resolve t

This is especially useful when using a network alias to whitelist an external API.

### `SMTP_HEALTHCHECK`

Default: `0`

Set to `1` to enable healthcheck with pycurl smtp requests. This is useful if the target
uses a deployment where the ip of the service gets changed frequently (e.g.
`smtp.eu.sparkpostmail.com`) and you are using [`PRE_RESOLVE`](#pre_resolve)

#### Automatically restarting unhealthy proxies

see [HTTP_HEALTHCHECK](#http_healthcheck)

### `SMTP_HEALTHCHECK_URL`

Default: `smtp://$TARGET/`

Url to use in [`SMTP_HEALTHCHECK`](#smtp_healthcheck) if enabled. `$TARGET` gets
replaced inside the url by the configured [`TARGET`](#target).

### `SMTP_HEALTHCHECK_COMMAND`

Default: `HELP`

Enables changing the healthcheck command for servers that do not support `HELP` (e.g.
for [MailHog](https://github.com/mailhog/MailHog) you can use `QUIT`)

### `SMTP_HEALTHCHECK_TIMEOUT_MS`

Default: `2000`

Timeout in milliseconds for smtp healthcheck. This is used as a timeout for connecting
and receiving an answer. You may end up with twice the time spend.

### `UDP_ANSWERS`

Default: `1`
Expand Down
184 changes: 184 additions & 0 deletions healthcheck.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
#!/usr/bin/env python3

import logging
import os

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("healthcheck")


def error(message, exception=None):
logger.error(message)
if exception is None:
exit(1)
else:
raise exception


def http_healthcheck():
"""
Use pycurl to check if the target server is still responding via proxy.py
:return: None
"""
import re

import pycurl

check_url = os.environ.get("HTTP_HEALTHCHECK_URL", "http://localhost/")
check_timeout_ms = int(os.environ.get("HTTP_HEALTHCHECK_TIMEOUT_MS", 2000))
target = os.environ.get("TARGET", "localhost")
check_url_with_target = check_url.replace("$TARGET", target)
port = re.search("https?://[^:]*(?::([^/]+))?", check_url_with_target)[1] or "80"
print("checking %s via 127.0.0.1" % check_url_with_target)
logger.info("checking %s via 127.0.0.1" % check_url_with_target)
try:
request = pycurl.Curl()
request.setopt(pycurl.URL, check_url_with_target)
# do not send the request to the target directly but use our own socat proxy process to check if it's still
# working
request.setopt(pycurl.RESOLVE, ["{}:{}:127.0.0.1".format(target, port)])
request.setopt(pycurl.CONNECTTIMEOUT_MS, check_timeout_ms)
request.setopt(pycurl.TIMEOUT_MS, check_timeout_ms)
request.perform()
request.close()
except pycurl.error as e:
error("error while checking http connection", e)


def smtp_healthcheck():
"""
Use pycurl to check if the target server is still responding via proxy.py
:return: None
"""
import re

import pycurl

check_url = os.environ.get("SMTP_HEALTHCHECK_URL", "smtp://localhost/")
check_command = os.environ.get("SMTP_HEALTHCHECK_COMMAND", "HELP")
check_timeout_ms = int(os.environ.get("SMTP_HEALTHCHECK_TIMEOUT_MS", 2000))
target = os.environ.get("TARGET", "localhost")
check_url_with_target = check_url.replace("$TARGET", target)
port = re.search("smtp://[^:]*(?::([^/]+))?", check_url_with_target)[1] or "25"
logger.info("checking %s via 127.0.0.1" % check_url_with_target)
try:
request = pycurl.Curl()
request.setopt(pycurl.URL, check_url_with_target)
request.setopt(pycurl.CUSTOMREQUEST, check_command)
# do not send the request to the target directly but use our own socat proxy process to check if it's still
# working
request.setopt(pycurl.RESOLVE, ["{}:{}:127.0.0.1".format(target, port)])
request.setopt(pycurl.CONNECTTIMEOUT_MS, check_timeout_ms)
request.setopt(pycurl.TIMEOUT_MS, check_timeout_ms)
request.perform()
request.close()
except pycurl.error as e:
error("error while checking smtp connection", e)


def process_healthcheck():
"""
Check that at least one socat process exists per port and no more than the number of configured max connections
processes exist for each port.
:return:
"""
import subprocess

ports = os.environ["PORT"].split()
max_connections = int(os.environ["MAX_CONNECTIONS"])
logger.info(
"checking socat processes for port(s) %s having at least one and less than %d socat processes"
% (ports, max_connections)
)
socat_processes = (
subprocess.check_output(["sh", "-c", "grep -R socat /proc/[0-9]*/cmdline"])
.decode("utf-8")
.split("\n")
)
pids = [process.split("/")[2] for process in socat_processes if process]
if len(pids) < len(ports):
# if we have less than the number of ports socat processes we do not need to count processes per port and can
# fail fast
error("Expected at least %d socat processes" % len(ports))
port_process_count = {port: 0 for port in ports}
for pid in pids:
# foreach socat pid we detect the port it's for by checking the last argument (connect to) that ends with
# :{ip}:{port} for our processes
try:
with open("/proc/%d/cmdline" % int(pid)) as fp:
# arguments in /proc/.../cmdline are split by null bytes
cmd = [part for part in "".join(fp.readlines()).split("\x00") if part]
port = cmd[2].split(":")[-1]
port_process_count[port] = port_process_count[port] + 1
except FileNotFoundError:
# ignore processes no longer existing (possibly retrieved an answer)
pass
for port in ports:
if port_process_count[port] == 0:
error("Missing socat process(es) for port: %s" % port)
if port_process_count[port] >= max_connections + 1:
error(
"More than %d + 1 socat process(es) for port: %s"
% (max_connections, port)
)


def preresolve_healthcheck():
"""
Check that the pre-resolved ip is still valid now for target
:return:
"""
from tempfile import gettempdir

load_balancing_dns_fs_flag = os.path.join(
gettempdir(), "load_balancing_dns_detected"
)
if not os.path.exists(load_balancing_dns_fs_flag):
# only run the resolver check if a previous run didn't flag the target as being dns load-balanced
import subprocess

from dns.resolver import Resolver

pre_resolved_ips = {
line.split(":")[2]
for line in subprocess.check_output(
["sh", "-c", "grep -R '\\(udp\\|tcp\\)-connect:' /proc/[0-9]*/cmdline"]
)
.decode("utf-8")
.split("\n")
if line
}
resolver = Resolver()
resolver.nameservers = os.environ["NAMESERVERS"].split()
target = os.environ["TARGET"]
resolved_ips = [answer.address for answer in resolver.resolve(target)]
for ip in pre_resolved_ips:
logger.info(f"checking {target} resolves to {ip}")
if ip not in resolved_ips:
resolved_ips_2 = [answer.address for answer in resolver.resolve(target)]
if resolved_ips_2 == resolved_ips:
error(
f"{target} no longer resolves to {ip}, {resolved_ips}, {resolved_ips_2}"
)
else:
resolved_ips_3 = [
answer.address for answer in resolver.resolve(target)
]
# to make sure we didn't just hit the server switch in dns, we check again before deactivating
# the healthcheck permanently (until the container restarts)
if resolved_ips_3 != resolved_ips_2:
logger.info(
f"{target} seems to be load-balancing with dns ({resolved_ips} != {resolved_ips_2}), "
f"deactivating the resolver healthcheck"
)
with open(f"{load_balancing_dns_fs_flag}", "w") as fp:
fp.write(target)


process_healthcheck()
if os.environ["PRE_RESOLVE"] == "1":
preresolve_healthcheck()
if os.environ.get("HTTP_HEALTHCHECK", "0") == "1":
http_healthcheck()
if os.environ.get("SMTP_HEALTHCHECK", "0") == "1":
smtp_healthcheck()
Loading
Loading