Skip to content

Commit

Permalink
Merge pull request #8 from ap-wtioit/master-enable_healthcheck_github
Browse files Browse the repository at this point in the history
add healthcheck support for http and smtp
  • Loading branch information
pedrobaeza committed Feb 7, 2024
2 parents d2cf614 + 4627f26 commit 118a9ac
Show file tree
Hide file tree
Showing 8 changed files with 972 additions and 308 deletions.
14 changes: 11 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,18 +1,26 @@
FROM python:3-alpine
ENTRYPOINT ["dumb-init", "--"]
CMD ["proxy"]
RUN apk add --no-cache -t .build build-base &&\
HEALTHCHECK CMD ["healthcheck"]
RUN apk add --no-cache -t .build build-base curl-dev &&\
apk add --no-cache socat &&\
pip install --no-cache-dir dnspython dumb-init &&\
apk add --no-cache libcurl &&\
pip install --no-cache-dir dnspython dumb-init pycurl &&\
apk del .build
ENV NAMESERVERS="208.67.222.222 8.8.8.8 208.67.220.220 8.8.4.4" \
PORT="80 443" \
PRE_RESOLVE=0 \
MODE=tcp \
VERBOSE=0 \
MAX_CONNECTIONS=100 \
UDP_ANSWERS=1
UDP_ANSWERS=1 \
HTTP_HEALTHCHECK=0\
HTTP_HEALTHCHECK_URL="http://\$TARGET/"\
SMTP_HEALTHCHECK=0\
SMTP_HEALTHCHECK_URL="smtp://\$TARGET/"\
SMTP_HEALTHCHECK_COMMAND="HELP"
COPY proxy.py /usr/local/bin/proxy
COPY healthcheck.py /usr/local/bin/healthcheck

# Labels
ARG BUILD_DATE
Expand Down
63 changes: 63 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,36 @@ Use these environment variables:

Required. It's the host name where the incoming connections will be redirected to.

### `HTTP_HEALTHCHECK`

Default: `0`

Set to `1` to enable healthcheck with pycurl http requests. This is useful if the target
uses a deployment where the ip of the service gets changed frequently (e.g.
`accounts.google.com`) and you are using [`PRE_RESOLVE`](#pre_resolve)

#### Automatically restarting unhealthy proxies

When you enable the http healthcheck the container marks itself as unhealthy but does
nothing. (see https://github.com/moby/moby/pull/22719)

If you want to restart your proxies automatically, you can use
https://github.com/willfarrell/docker-autoheal.

### `HTTP_HEALTHCHECK_URL`

Default: `http://$TARGET/`

Url to use in [`HTTP_HEALTHCHECK`](#http_healthcheck) if enabled. `$TARGET` gets
replaced inside the url by the configured [`TARGET`](#target).

### `HTTP_HEALTHCHECK_TIMEOUT_MS`

Default: `2000`

Timeout in milliseconds for http healthcheck. This is used as a timeout for connecting
and receiving an answer. You may end up with twice the time spend.

### `MODE`

Default: `tcp`
Expand Down Expand Up @@ -94,6 +124,39 @@ Set to `1` to force using the specified [nameservers](#nameservers) to resolve t

This is especially useful when using a network alias to whitelist an external API.

### `SMTP_HEALTHCHECK`

Default: `0`

Set to `1` to enable healthcheck with pycurl smtp requests. This is useful if the target
uses a deployment where the ip of the service gets changed frequently (e.g.
`smtp.eu.sparkpostmail.com`) and you are using [`PRE_RESOLVE`](#pre_resolve)

#### Automatically restarting unhealthy proxies

see [HTTP_HEALTHCHECK](#http_healthcheck)

### `SMTP_HEALTHCHECK_URL`

Default: `smtp://$TARGET/`

Url to use in [`SMTP_HEALTHCHECK`](#smtp_healthcheck) if enabled. `$TARGET` gets
replaced inside the url by the configured [`TARGET`](#target).

### `SMTP_HEALTHCHECK_COMMAND`

Default: `HELP`

Enables changing the healthcheck command for servers that do not support `HELP` (e.g.
for [MailHog](https://github.com/mailhog/MailHog) you can use `QUIT`)

### `SMTP_HEALTHCHECK_TIMEOUT_MS`

Default: `2000`

Timeout in milliseconds for smtp healthcheck. This is used as a timeout for connecting
and receiving an answer. You may end up with twice the time spend.

### `UDP_ANSWERS`

Default: `1`
Expand Down
184 changes: 184 additions & 0 deletions healthcheck.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
#!/usr/bin/env python3

import logging
import os

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("healthcheck")


def error(message, exception=None):
logger.error(message)
if exception is None:
exit(1)
else:
raise exception


def http_healthcheck():
"""
Use pycurl to check if the target server is still responding via proxy.py
:return: None
"""
import re

import pycurl

check_url = os.environ.get("HTTP_HEALTHCHECK_URL", "http://localhost/")
check_timeout_ms = int(os.environ.get("HTTP_HEALTHCHECK_TIMEOUT_MS", 2000))
target = os.environ.get("TARGET", "localhost")
check_url_with_target = check_url.replace("$TARGET", target)
port = re.search("https?://[^:]*(?::([^/]+))?", check_url_with_target)[1] or "80"
print("checking %s via 127.0.0.1" % check_url_with_target)
logger.info("checking %s via 127.0.0.1" % check_url_with_target)
try:
request = pycurl.Curl()
request.setopt(pycurl.URL, check_url_with_target)
# do not send the request to the target directly but use our own socat proxy process to check if it's still
# working
request.setopt(pycurl.RESOLVE, ["{}:{}:127.0.0.1".format(target, port)])
request.setopt(pycurl.CONNECTTIMEOUT_MS, check_timeout_ms)
request.setopt(pycurl.TIMEOUT_MS, check_timeout_ms)
request.perform()
request.close()
except pycurl.error as e:
error("error while checking http connection", e)


def smtp_healthcheck():
"""
Use pycurl to check if the target server is still responding via proxy.py
:return: None
"""
import re

import pycurl

check_url = os.environ.get("SMTP_HEALTHCHECK_URL", "smtp://localhost/")
check_command = os.environ.get("SMTP_HEALTHCHECK_COMMAND", "HELP")
check_timeout_ms = int(os.environ.get("SMTP_HEALTHCHECK_TIMEOUT_MS", 2000))
target = os.environ.get("TARGET", "localhost")
check_url_with_target = check_url.replace("$TARGET", target)
port = re.search("smtp://[^:]*(?::([^/]+))?", check_url_with_target)[1] or "25"
logger.info("checking %s via 127.0.0.1" % check_url_with_target)
try:
request = pycurl.Curl()
request.setopt(pycurl.URL, check_url_with_target)
request.setopt(pycurl.CUSTOMREQUEST, check_command)
# do not send the request to the target directly but use our own socat proxy process to check if it's still
# working
request.setopt(pycurl.RESOLVE, ["{}:{}:127.0.0.1".format(target, port)])
request.setopt(pycurl.CONNECTTIMEOUT_MS, check_timeout_ms)
request.setopt(pycurl.TIMEOUT_MS, check_timeout_ms)
request.perform()
request.close()
except pycurl.error as e:
error("error while checking smtp connection", e)


def process_healthcheck():
"""
Check that at least one socat process exists per port and no more than the number of configured max connections
processes exist for each port.
:return:
"""
import subprocess

ports = os.environ["PORT"].split()
max_connections = int(os.environ["MAX_CONNECTIONS"])
logger.info(
"checking socat processes for port(s) %s having at least one and less than %d socat processes"
% (ports, max_connections)
)
socat_processes = (
subprocess.check_output(["sh", "-c", "grep -R socat /proc/[0-9]*/cmdline"])
.decode("utf-8")
.split("\n")
)
pids = [process.split("/")[2] for process in socat_processes if process]
if len(pids) < len(ports):
# if we have less than the number of ports socat processes we do not need to count processes per port and can
# fail fast
error("Expected at least %d socat processes" % len(ports))
port_process_count = {port: 0 for port in ports}
for pid in pids:
# foreach socat pid we detect the port it's for by checking the last argument (connect to) that ends with
# :{ip}:{port} for our processes
try:
with open("/proc/%d/cmdline" % int(pid)) as fp:
# arguments in /proc/.../cmdline are split by null bytes
cmd = [part for part in "".join(fp.readlines()).split("\x00") if part]
port = cmd[2].split(":")[-1]
port_process_count[port] = port_process_count[port] + 1
except FileNotFoundError:
# ignore processes no longer existing (possibly retrieved an answer)
pass
for port in ports:
if port_process_count[port] == 0:
error("Missing socat process(es) for port: %s" % port)
if port_process_count[port] >= max_connections + 1:
error(
"More than %d + 1 socat process(es) for port: %s"
% (max_connections, port)
)


def preresolve_healthcheck():
"""
Check that the pre-resolved ip is still valid now for target
:return:
"""
from tempfile import gettempdir

load_balancing_dns_fs_flag = os.path.join(
gettempdir(), "load_balancing_dns_detected"
)
if not os.path.exists(load_balancing_dns_fs_flag):
# only run the resolver check if a previous run didn't flag the target as being dns load-balanced
import subprocess

from dns.resolver import Resolver

pre_resolved_ips = {
line.split(":")[2]
for line in subprocess.check_output(
["sh", "-c", "grep -R '\\(udp\\|tcp\\)-connect:' /proc/[0-9]*/cmdline"]
)
.decode("utf-8")
.split("\n")
if line
}
resolver = Resolver()
resolver.nameservers = os.environ["NAMESERVERS"].split()
target = os.environ["TARGET"]
resolved_ips = [answer.address for answer in resolver.resolve(target)]
for ip in pre_resolved_ips:
logger.info(f"checking {target} resolves to {ip}")
if ip not in resolved_ips:
resolved_ips_2 = [answer.address for answer in resolver.resolve(target)]
if resolved_ips_2 == resolved_ips:
error(
f"{target} no longer resolves to {ip}, {resolved_ips}, {resolved_ips_2}"
)
else:
resolved_ips_3 = [
answer.address for answer in resolver.resolve(target)
]
# to make sure we didn't just hit the server switch in dns, we check again before deactivating
# the healthcheck permanently (until the container restarts)
if resolved_ips_3 != resolved_ips_2:
logger.info(
f"{target} seems to be load-balancing with dns ({resolved_ips} != {resolved_ips_2}), "
f"deactivating the resolver healthcheck"
)
with open(f"{load_balancing_dns_fs_flag}", "w") as fp:
fp.write(target)


process_healthcheck()
if os.environ["PRE_RESOLVE"] == "1":
preresolve_healthcheck()
if os.environ.get("HTTP_HEALTHCHECK", "0") == "1":
http_healthcheck()
if os.environ.get("SMTP_HEALTHCHECK", "0") == "1":
smtp_healthcheck()
Loading

0 comments on commit 118a9ac

Please sign in to comment.