add healthcheck support for http and smtp

this enables the user to configure a healthcheck for containers that can be used to automatically restart proxies resolving to no longer active ips due to PRE_RESOLVE
Tecnativa · Jul 8, 2021 · 576ceef · 576ceef
1 parent a4e0d28
commit 576ceef
Show file tree

Hide file tree

Showing 6 changed files with 421 additions and 3 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -1,18 +1,26 @@
 FROM python:3-alpine
 ENTRYPOINT ["dumb-init", "--"]
 CMD ["proxy"]
-RUN apk add --no-cache -t .build build-base &&\
+HEALTHCHECK CMD ["healthcheck"]
+RUN apk add --no-cache -t .build build-base curl-dev &&\
     apk add --no-cache socat &&\
-    pip install --no-cache-dir dnspython dumb-init &&\
+    apk add --no-cache libcurl &&\
+    pip install --no-cache-dir dnspython dumb-init pycurl &&\
     apk del .build
 ENV NAMESERVERS="208.67.222.222 8.8.8.8 208.67.220.220 8.8.4.4" \
     PORT="80 443" \
     PRE_RESOLVE=0 \
     MODE=tcp \
     VERBOSE=0 \
     MAX_CONNECTIONS=100 \
-    UDP_ANSWERS=1
+    UDP_ANSWERS=1 \
+    HTTP_HEALTHCHECK=0\
+    HTTP_HEALTHCHECK_URL="http://\$TARGET/"\
+    SMTP_HEALTHCHECK=0\
+    SMTP_HEALTHCHECK_URL="smtp://\$TARGET/"\
+    SMTP_HEALTHCHECK_COMMAND="HELP"
 COPY proxy.py /usr/local/bin/proxy
+COPY healthcheck.py /usr/local/bin/healthcheck
 
 # Labels
 ARG BUILD_DATE

diff --git a/README.md b/README.md
@@ -35,6 +35,29 @@ Use these environment variables:
 
 Required. It's the host name where the incoming connections will be redirected to.
 
+### `HTTP_HEALTHCHECK`
+
+Default: `0`
+
+Set to `1` to enable healthcheck with pycurl http requests. This is useful if the target
+uses a deployment where the ip of the service gets changed frequently (e.g.
+`accounts.google.com`) and you are using [`PRE_RESOLVE`](#pre_resolve)
+
+#### Automatically restarting unhealthy proxies
+
+When you enable the http healthcheck the container marks itself as unhealthy but does
+nothing. (see https://github.com/moby/moby/pull/22719)
+
+If you want to restart your proxies automatically, you can use
+https://github.com/willfarrell/docker-autoheal.
+
+### `HTTP_HEALTHCHECK_URL`
+
+Default: `http://$TARGET/`
+
+Url to use in [`HTTP_HEALTHCHECK`](#http_healthcheck) if enabled. `$TARGET` gets
+replaced inside the url by the configured [`TARGET`](#target).
+
 ### `MODE`
 
 Default: `tcp`
@@ -94,6 +117,32 @@ Set to `1` to force using the specified [nameservers](#nameservers) to resolve t
 
 This is especially useful when using a network alias to whitelist an external API.
 
+### `SMTP_HEALTHCHECK`
+
+Default: `0`
+
+Set to `1` to enable healthcheck with pycurl smtp requests. This is useful if the target
+uses a deployment where the ip of the service gets changed frequently (e.g.
+`smtp.eu.sparkpostmail.com`) and you are using [`PRE_RESOLVE`](#pre_resolve)
+
+#### Automatically restarting unhealthy proxies
+
+see [HTTP_HEALTHCHECK](#http_healthcheck)
+
+### `SMTP_HEALTHCHECK_URL`
+
+Default: `smtp://$TARGET/`
+
+Url to use in [`SMTP_HEALTHCHECK`](#smtp_healthcheck) if enabled. `$TARGET` gets
+replaced inside the url by the configured [`TARGET`](#target).
+
+### `SMTP_HEALTHCHECK_COMMAND`
+
+Default: `HELP`
+
+Enables changing the healtcheck command for servers that do not support `HELP` (e.g. for
+[MailHog](https://github.com/mailhog/MailHog) you can use `QUIT`)
+
 ### `UDP_ANSWERS`
 
 Default: `1`

diff --git a/healthcheck.py b/healthcheck.py
@@ -0,0 +1,124 @@
+#!/usr/bin/env python3
+
+import logging
+import os
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger("healthcheck")
+
+
+def error(message, exception=None):
+    logger.error(message)
+    if exception is None:
+        exit(1)
+    else:
+        raise exception
+
+
+def http_healthcheck():
+    """
+    Use pycurl to check if the target server is still responding via proxy.py
+    :return: None
+    """
+    import re
+
+    import pycurl
+
+    check_url = os.environ.get("HTTP_HEALTHCHECK_URL", "http://localhost/")
+    target = os.environ.get("TARGET", "localhost")
+    check_url_with_target = check_url.replace("$TARGET", target)
+    port = re.search("https?://[^:]*(?::([^/]+))?", check_url_with_target)[1] or "80"
+    print("checking %s via 127.0.0.1" % check_url_with_target)
+    logger.info("checking %s via 127.0.0.1" % check_url_with_target)
+    try:
+        request = pycurl.Curl()
+        request.setopt(pycurl.URL, check_url_with_target)
+        # do not send the request to the target directly but use our own socat proxy process to check if it's still
+        # working
+        request.setopt(pycurl.RESOLVE, ["{}:{}:127.0.0.1".format(target, port)])
+        request.perform()
+        request.close()
+    except pycurl.error as e:
+        error("error while checking http connection", e)
+
+
+def smtp_healthcheck():
+    """
+    Use pycurl to check if the target server is still responding via proxy.py
+    :return: None
+    """
+    import re
+
+    import pycurl
+
+    check_url = os.environ.get("SMTP_HEALTHCHECK_URL", "smtp://localhost/")
+    check_command = os.environ.get("SMTP_HEALTHCHECK_COMMAND", "HELP")
+    target = os.environ.get("TARGET", "localhost")
+    check_url_with_target = check_url.replace("$TARGET", target)
+    port = re.search("smtp://[^:]*(?::([^/]+))?", check_url_with_target)[1] or "25"
+    logger.info("checking %s via 127.0.0.1" % check_url_with_target)
+    try:
+        request = pycurl.Curl()
+        request.setopt(pycurl.URL, check_url_with_target)
+        request.setopt(pycurl.CUSTOMREQUEST, check_command)
+        # do not send the request to the target directly but use our own socat proxy process to check if it's still
+        # working
+        request.setopt(pycurl.RESOLVE, ["{}:{}:127.0.0.1".format(target, port)])
+        request.perform()
+        request.close()
+    except pycurl.error as e:
+        error("error while checking smtp connection", e)
+
+
+def process_healthcheck():
+    """
+    Check that at least one socat process exists per port and no more than the number of configured max connections
+    processes exist for each port.
+    :return:
+    """
+    import subprocess
+
+    ports = os.environ["PORT"].split()
+    max_connections = int(os.environ["MAX_CONNECTIONS"])
+    logger.info(
+        "checking socat processes for port(s) %s having at least one and less than %d socat processes"
+        % (ports, max_connections)
+    )
+    socat_processes = (
+        subprocess.check_output(["sh", "-c", "grep -R socat /proc/[0-9]*/cmdline"])
+        .decode("utf-8")
+        .split("\n")
+    )
+    pids = [process.split("/")[2] for process in socat_processes if process]
+    if len(pids) < len(ports):
+        # if we have less than the number of ports socat processes we do not need to count processes per port and can
+        # fail fast
+        error("Expected at least %d socat processes" % len(ports))
+    port_process_count = {port: 0 for port in ports}
+    for pid in pids:
+        # foreach socat pid we detect the port it's for by checking the last argument (connect to) that ends with
+        # :{ip}:{port} for our processes
+        try:
+            with open("/proc/%d/cmdline" % int(pid), "rt") as fp:
+                # arguments in /proc/.../cmdline are split by null bytes
+                cmd = [part for part in "".join(fp.readlines()).split("\x00") if part]
+                port = cmd[2].split(":")[-1]
+                port_process_count[port] = port_process_count[port] + 1
+        except FileNotFoundError:
+            # ignore processes no longer existing (possibly retrieved an answer)
+            pass
+    for port in ports:
+        if port_process_count[port] == 0:
+            error("Missing socat process(es) for port: %s" % port)
+        if port_process_count[port] >= max_connections + 1:
+            error(
+                "More than %d + 1  socat process(es) for port: %s"
+                % (max_connections, port)
+            )
+
+
+process_healthcheck()
+if os.environ.get("HTTP_HEALTHCHECK", "0") == "1":
+    http_healthcheck()
+if os.environ.get("SMTP_HEALTHCHECK", "0") == "1":
+    smtp_healthcheck()
diff --git a/tests/run_tests.sh b/tests/run_tests.sh
@@ -0,0 +1,76 @@
+#!/usr/bin/env bash
+set -e
+for arg in "$@" ; do
+  echo arg $arg
+  if [[ "$arg" == "DEBUG" ]] ; then
+    DEBUG=1
+  else
+    TEST_FILTER="$arg"
+  fi
+done
+DEBUG=${DEBUG:-0}
+TEST_FILTER=${TEST_FILTER:-}
+
+function cleanup() {
+  if [[ $DEBUG == 1 ]]; then
+    docker-compose -f tests/test.yaml ps
+    docker-compose -f tests/test.yaml exec -T proxy_preresolve /usr/local/bin/healthcheck || true
+    docker-compose -f tests/test.yaml exec -T proxy_without_preresolve /usr/local/bin/healthcheck || true
+    docker-compose -f tests/test.yaml top
+    docker-compose -f tests/test.yaml logs
+  fi
+  docker-compose -f tests/test.yaml down -v --remove-orphans
+}
+trap cleanup EXIT
+
+function with_prefix() {
+  local prefix
+  prefix="$1"
+  shift
+  "$@" 2>&1 | while read -r line; do
+    echo "$prefix" "$line"
+  done
+  return "${PIPESTATUS[0]}"
+}
+
+function run_tests() {
+  for service in $(docker-compose -f tests/test.yaml config --services); do
+    if [[ ( $service == test_* || ( $DEBUG = 1 && $service == debug_* ) ) && $service == *"$TEST_FILTER"* ]] ; then
+      echo "running $service"
+      with_prefix "$service:" docker-compose -f tests/test.yaml run --rm "$service"
+    fi
+  done
+}
+
+function change_target_ips() {
+  for target in "target" "target_smtp"; do
+    #spin up a second target and remove the first target container to give it a new ip (simulates a new deployment of an external cloud service)
+    local target_container_id
+    target_container_id=$(docker-compose -f tests/test.yaml ps -q "$target")
+    if [[ "$target_container_id" != "" ]] ; then
+      if [[ $DEBUG == 1 ]] ; then
+        docker inspect "$target_container_id" | grep '"IPAddress": "[^"]\+'
+      fi
+      docker-compose -f tests/test.yaml up -d --scale "$target=2" "$target"
+      docker stop "$target_container_id" | xargs echo "stopped ${target}_1"
+      docker rm "$target_container_id" | xargs echo "removed ${target}_1"
+      if [[ $DEBUG == 1 ]] ; then
+        target_container_id=$(docker-compose -f tests/test.yaml ps -q "$target")
+        docker inspect "$target_container_id" | grep '"IPAddress": "[^"]\+'
+      fi
+    fi
+  done
+  # give docker some time to restart unhealthy containers
+  sleep 5
+}
+
+with_prefix "build:" docker-compose -f tests/test.yaml build
+
+# make sure all tests pass when target is up
+run_tests
+
+# when target changes ip
+with_prefix "changing target_ip:" change_target_ips
+
+# all tests still should pass
+run_tests