From 4a4af106c822f23189e3eb96660173e156d7dbad Mon Sep 17 00:00:00 2001 From: Curtis Vogt Date: Mon, 4 Mar 2024 14:28:45 -0600 Subject: [PATCH 01/27] Implement health checks and graceful termination --- .gitignore | 1 + Project.toml | 26 ++++ src/K8sDeputy.jl | 14 +++ src/graceful_termination.jl | 149 ++++++++++++++++++++++ src/health.jl | 63 ++++++++++ src/server.jl | 14 +++ test/graceful_termination.jl | 31 +++++ test/health.jl | 235 +++++++++++++++++++++++++++++++++++ test/runtests.jl | 22 ++++ 9 files changed, 555 insertions(+) create mode 100644 .gitignore create mode 100644 Project.toml create mode 100644 src/K8sDeputy.jl create mode 100644 src/graceful_termination.jl create mode 100644 src/health.jl create mode 100644 src/server.jl create mode 100644 test/graceful_termination.jl create mode 100644 test/health.jl create mode 100644 test/runtests.jl diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ba39cc5 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +Manifest.toml diff --git a/Project.toml b/Project.toml new file mode 100644 index 0000000..85bb45d --- /dev/null +++ b/Project.toml @@ -0,0 +1,26 @@ +name = "K8sDeputy" +uuid = "2481ae95-212f-4650-bb21-d53ea3caf09f" +authors = ["Beacon Biosignals, Inc"] +version = "0.1.0" + +[deps] +Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" +HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3" +Mocking = "78c3b35d-d492-501b-9361-3d52fe80e533" +Sockets = "6462fe0b-24de-5631-8697-dd941f90decc" + +[compat] +Aqua = "0.7" +Dates = "1" +HTTP = "1" +Mocking = "0.7" +Sockets = "1" +Test = "1" +julia = "1.6" + +[extras] +Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[targets] +test = ["Aqua", "Test"] diff --git a/src/K8sDeputy.jl b/src/K8sDeputy.jl new file mode 100644 index 0000000..90ea1ef --- /dev/null +++ b/src/K8sDeputy.jl @@ -0,0 +1,14 @@ +module K8sDeputy + +using Dates: Period, Second +using HTTP: HTTP +using Mocking +using Sockets: accept, connect, listen, localhost + +export Deputy, graceful_terminator, readied, shutdown, graceful_terminate + +include("graceful_termination.jl") +include("health.jl") +include("server.jl") + +end # module K8sDeputy diff --git a/src/graceful_termination.jl b/src/graceful_termination.jl new file mode 100644 index 0000000..18f7db2 --- /dev/null +++ b/src/graceful_termination.jl @@ -0,0 +1,149 @@ +# As Julia lacks user-defined signal handling and the default behavior for critical signals +# (i.e. SIGTERM, SIGABRT, SIGQUIT) is to report the signal and show a stack trace. As K8s +# utilizes SIGTERM by default to gracefully shutdown pods and we want to avoid logging +# unnecessary stack traces so we will utilize a `preStop` container hook as an alternative. +# +# Note it is possible to use the C function `sigaction` with a Julia callback function but +# from experimenting with this there are a few issues such as being unable to use locks or +# printing (`jl_safe_printf` does work). + +# Linux typically stores PID files in `/run` which requires root access. For systems with +# read-only file systems we need to support a user specified writable volume. +_deputy_ipc_dir() = get(tempdir, ENV, "DEPUTY_IPC_DIR") + +# Prefer using UNIX domain sockets but if the `DEPUTY_IPC_DIR` is set assume the file +# system is read-only and use a named pipe instead. +function _socket_path(name) + return haskey(ENV, "DEPUTY_IPC_DIR") ? joinpath(_deputy_ipc_dir(), name) : name +end + +# Following the Linux convention for pid files: +# https://refspecs.linuxfoundation.org/FHS_3.0/fhs/ch03s15.html +entrypoint_pid_file() = joinpath(_deputy_ipc_dir(), "julia-entrypoint.pid") +entrypoint_pid(pid::Integer) = write(entrypoint_pid_file(), string(pid) * "\n") + +function entrypoint_pid() + pid_file = entrypoint_pid_file() + return isfile(pid_file) ? parse(Int, readchomp(pid_file)) : 1 +end + +# https://docs.libuv.org/en/v1.x/process.html#c.uv_kill +uv_kill(pid::Integer, signum::Integer) = ccall(:uv_kill, Cint, (Cint, Cint), pid, signum) + +""" + graceful_terminator(f) -> Nothing + +Register a zero-argument function to be called when `graceful_terminate` is called targeting +this process. The user-defined function `f` is expected to call `exit` to terminate the +Julia process. The `graceful_terminator` function is only allowed to be called once within a +Julia process. + +## Examples + +```julia +app_status = AppStatus() +graceful_terminator(() -> shutdown(app_status)) +``` +## Kubernetes Setup + +When using Kubernetes (K8s) you can enable [graceful termination](https://cloud.google.com/blog/products/containers-kubernetes/kubernetes-best-practices-terminating-with-grace) +of a Julia process by defining a pod [`preStop`](https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks) +container hook. Typically, K8s initiates graceful termination via the `TERM` signal but +as Julia forcefully terminates when receiving this signal and Julia does not support +user-defined signal handlers we utilize `preStop` instead. + +The following K8s pod manifest snippet will specify K8s to call the user-defined function +specified by the `graceful_terminator`: + +```yaml +spec: + containers: + - lifecycle: + preStop: + exec: + command: ["julia", "-e", "using $(@__MODULE__()); graceful_terminate()"] +``` + +Additionally, the entrypoint for the container should also not directly use the Julia +as init process (PID 1). Instead, users should define their entrypoint similarly to +`["/bin/sh", "-c", "julia entrypoint.jl; sleep 1"]` as this allows the both the Julia +process and the `preStop` process to cleanly terminate. +""" +function graceful_terminator(f; set_entrypoint::Bool=true) + set_entrypoint && entrypoint_pid(getpid()) + + # Utilize UNIX domain sockets for the IPC. Avoid using network sockets here as we don't + # want to allow access to this functionality from outside of the localhost. Each process + # uses a distinct socket name allowing for multiple Julia processes to allow independent + # use of the graceful terminator. + server = listen(_socket_path("graceful-terminator.$(getpid())")) + + t = Threads.@spawn begin + while isopen(server) + sock = accept(server) + request = readline(sock) + + if request == "terminate" + try + f() # Expecting user-defined function to call `exit` + catch e + @error "User graceful terminator callback failed with exception:\n" * + sprint(showerror, e, catch_backtrace()) + end + end + + close(sock) + end + end + + # Useful only to report internal errors + @static if VERSION >= v"1.7.0-DEV.727" + errormonitor(t) + end + + return nothing +end + +""" + graceful_terminate(pid::Integer=entrypoint_pid(); wait::Bool=true) -> Nothing + +Initiates the execution of the `graceful_terminator` user callback in the process `pid`. See +`graceful_terminator` for more details. +""" +function graceful_terminate(pid::Integer=entrypoint_pid(); wait::Bool=true) + # As K8s doesn't provide a way to view the logs from the `preStop` command you can work + # a round this by writing to the STDOUT of the `pid`. Only works while `pid` is running. + # https://stackoverflow.com/a/70708744 + # open("/proc/$pid/fd/1", "w") do io + # println(io, "preStop called") + # end + + sock = connect(_socket_path("graceful-terminator.$pid")) + println(sock, "terminate") + close(sock) + + # Wait for the `pid` to complete. We must to block here as otherwise K8s sends a + # `TERM` signal immediately after the `preStop` completes. If we fail to wait the + # Julia process won't have a chance to perform a "clean" shutdown. If the Julia process + # takes longer than `terminationGracePeriodSeconds` to stop then K8s will forcefully + # terminate the with the `KILL` signal. + # + # The `preStop` must complete before the container terminates otherwise K8s will + # report a `FailedPreStopHook` event. To avoid seeing this warning the Julia process + # should not be run directly as the container entrypoint but rather run as a subprocess + # of the entrypoint with a delay after the subprocess' termination. Doing this allows + # both the target Julia process and the `preStop` process to exit cleanly. + # + # https://cloud.google.com/blog/products/containers-kubernetes/kubernetes-best-practices-terminating-with-grace + # https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#pod-termination + if wait + # The special "signal" 0 is used to check for process existence. + # https://man7.org/linux/man-pages/man2/kill.2.html + while uv_kill(pid, 0) == 0 + # Polling frequency should ideally be faster than the post-termination delay + sleep(0.1) + end + end + + return nothing +end diff --git a/src/health.jl b/src/health.jl new file mode 100644 index 0000000..e1f60c8 --- /dev/null +++ b/src/health.jl @@ -0,0 +1,63 @@ +mutable struct Deputy + ready::Bool + shutting_down::Bool + shutdown_handler::Any + shutdown_handler_timeout::Second +end + +function Deputy(; shutdown_handler=nothing, shutdown_handler_timeout::Period=Second(5)) + return Deputy(false, false, shutdown_handler, shutdown_handler_timeout) +end + +function readied(deputy::Deputy) + deputy.ready = true + return nothing +end + +function shutdown(deputy::Deputy) + # Abend if already shutting down + deputy.shutting_down && return nothing + deputy.shutting_down = true + + if !isnothing(deputy.shutdown_handler) + t = @async deputy.shutdown_handler() + + # Ensure the shutdown handler completes on-time and without exceptions + status = timedwait(deputy.shutdown_handler_timeout; pollint=Second(1)) do + return istaskdone(t) + end + + if istaskfailed(t) + @error "Shutdown handler failed" exception = TaskFailedException(t) + elseif status === :timed_out + @warn "Shutdown handler still running after $(deputy.shutdown_handler_timeout)" + end + end + + # Shutdown handler's should not call `exit` + @mock exit(1) + + return nothing +end + +function live_endpoint(deputy::Deputy) + return function (r::HTTP.Request) + @debug "liveness probed" + return if !deputy.shutting_down + HTTP.Response(200) + else + HTTP.Response(503) + end + end +end + +function ready_endpoint(deputy::Deputy) + return function (r::HTTP.Request) + @debug "readiness probed" + return if deputy.ready + HTTP.Response(200) + else + HTTP.Response(503) + end + end +end diff --git a/src/server.jl b/src/server.jl new file mode 100644 index 0000000..b22a59f --- /dev/null +++ b/src/server.jl @@ -0,0 +1,14 @@ +const DEFAULT_PORT = 8081 + +function _default_port() + name = "DEPUTY_HEALTH_CHECK_PORT" + return haskey(ENV, name) ? parse(Int, ENV[name]) : DEFAULT_PORT +end + +function serve!(deputy::Deputy, host=localhost, port::Integer=_default_port()) + router = HTTP.Router() + HTTP.register!(router, "/health/live", live_endpoint(deputy)) + HTTP.register!(router, "/health/ready", ready_endpoint(deputy)) + + return HTTP.serve!(router, host, port) +end diff --git a/test/graceful_termination.jl b/test/graceful_termination.jl new file mode 100644 index 0000000..fdb46e6 --- /dev/null +++ b/test/graceful_termination.jl @@ -0,0 +1,31 @@ +@testset "graceful_terminator / graceful_terminate" begin + code = quote + using K8sDeputy + atexit(() -> @info "SHUTDOWN COMPLETE") + graceful_terminator() do + @info "GRACEFUL TERMINATION HANDLER" + exit(2) + return nothing + end + sleep(60) + end + + cmd = `$(Base.julia_cmd()) --color=no -e $code` + buffer = IOBuffer() + p = run(pipeline(cmd; stdout=buffer, stderr=buffer); wait=false) + @test timedwait(() -> process_running(p), Second(5)) === :ok + + # Allow some time for Julia to startup and the graceful terminator to be registered. + sleep(3) + + @test graceful_terminate(getpid(p)) === nothing # Blocks untils the HTTP server goes down + @test process_exited(p) + @test p.exitcode == 2 + + output = String(take!(buffer)) + expected = """ + [ Info: GRACEFUL TERMINATION HANDLER + [ Info: SHUTDOWN COMPLETE + """ + @test output == expected +end diff --git a/test/health.jl b/test/health.jl new file mode 100644 index 0000000..87f7986 --- /dev/null +++ b/test/health.jl @@ -0,0 +1,235 @@ +function exit_patcher(rc::Ref{Int}) + atexit_hooks = [] + return [@patch Base.atexit(f) = push!(atexit_hooks, f) + @patch function Base.exit(n) + rc[] = n + while !isempty(atexit_hooks) + pop!(atexit_hooks)() + end + end] +end + +@testset "Deputy" begin + @testset "basic" begin + deputy = Deputy() + @test !deputy.ready + @test !deputy.shutting_down + + readied(deputy) + @test deputy.ready + @test !deputy.shutting_down + end + + @testset "live_endpoint / ready_endpoint" begin + deputy = Deputy() + request = HTTP.Request() + + # Note: Users should not mutate the internal state of a `Deputy` + # TODO: Define `==(x::HTTP.Response, y::HTTP.Response)`. + + deputy.ready = false + r = ready_endpoint(deputy)(request) + @test r.status == 503 + @test isempty(String(r.body)) + + deputy.ready = true + r = ready_endpoint(deputy)(request) + @test r.status == 200 + @test isempty(String(r.body)) + + deputy.shutting_down = false + r = live_endpoint(deputy)(request) + @test r.status == 200 + @test isempty(String(r.body)) + + deputy.shutting_down = true + r = live_endpoint(deputy)(request) + @test r.status == 503 + @test isempty(String(r.body)) + end + + # Note: If a non-mocked `exit(0)` is called it may appear that all tests have passed. + @testset "shutdown" begin + @testset "default handler" begin + deputy = Deputy() + + rc = Ref{Int}() + logs = [(:info, "SHUTDOWN COMPLETE")] + @test_logs(logs..., + apply(exit_patcher(rc)) do + @mock atexit(() -> @info "SHUTDOWN COMPLETE") + return shutdown(deputy) + end) + + @test isassigned(rc) + @test rc[] == 1 + end + + @testset "custom handler" begin + deputy = nothing + + shutdown_handler = function () + @info "SHUTDOWN HANDLER" + @info "shutting_down = $(deputy.shutting_down)" + end + + deputy = Deputy(; shutdown_handler) + + rc = Ref{Int}() + logs = [(:info, "SHUTDOWN HANDLER"), + (:info, "shutting_down = true"), + (:info, "SHUTDOWN COMPLETE")] + @test_logs(logs..., + apply(exit_patcher(rc)) do + @mock atexit(() -> @info "SHUTDOWN COMPLETE") + return shutdown(deputy) + end) + + @test isassigned(rc) + @test rc[] == 1 + end + + @testset "handler exception" begin + shutdown_handler = () -> error("failure") + deputy = Deputy(; shutdown_handler) + + rc = Ref{Int}() + logs = [(:error, "Shutdown handler failed"), + (:info, "SHUTDOWN COMPLETE")] + @test_logs(logs..., + apply(exit_patcher(rc)) do + @mock atexit(() -> @info "SHUTDOWN COMPLETE") + return shutdown(deputy) + end) + + @test isassigned(rc) + @test rc[] == 1 + end + + @testset "timeout" begin + shutdown_handler = function () + @info "SHUTDOWN HANDLER" + sleep(10) + @info "SHOULD NEVER BE SEEN" + return nothing + end + + deputy = Deputy(; shutdown_handler, shutdown_handler_timeout=Second(1)) + + rc = Ref{Int}() + logs = [(:info, "SHUTDOWN HANDLER"), + (:warn, "Shutdown handler still running after 1 second"), + (:info, "SHUTDOWN COMPLETE")] + @test_logs(logs..., + apply(exit_patcher(rc)) do + @mock atexit(() -> @info "SHUTDOWN COMPLETE") + return shutdown(deputy) + end) + + @test isassigned(rc) + @test rc[] == 1 + end + + @testset "exit" begin + code = quote + using K8sDeputy, Dates + + shutdown_handler() = @info "SHUTDOWN HANDLER" + atexit(() -> @info "SHUTDOWN COMPLETE") + + deputy = Deputy(; shutdown_handler, shutdown_handler_timeout=Second(1)) + shutdown(deputy) + end + + cmd = `$(Base.julia_cmd()) --color=no -e $code` + buffer = IOBuffer() + p = run(pipeline(cmd; stdout=buffer, stderr=buffer); wait=false) + + @test timedwait(() -> process_exited(p), Second(10)) === :ok + @test p.exitcode == 1 + + output = String(take!(buffer)) + expected = """ + [ Info: SHUTDOWN HANDLER + [ Info: SHUTDOWN COMPLETE + """ + @test output == expected + end + end + + @testset "serve!" begin + deputy = Deputy() + port = rand(EPHEMERAL_PORT_RANGE) + server = serve!(deputy, localhost, port) + + try + r = HTTP.get("http://localhost:$port/health/ready"; status_exception=false) + @test r.status == 503 + + r = HTTP.get("http://localhost:$port/health/live") + @test r.status == 200 + + readied(deputy) + + r = HTTP.get("http://localhost:$port/health/ready") + @test r.status == 200 + + r = HTTP.get("http://localhost:$port/health/live") + @test r.status == 200 + + # Faking shutting down. Normal usage would call `shutdown` but we don't want to + # terminate our test process. + deputy.shutting_down = true + + r = HTTP.get("http://localhost:$port/health/ready") + @test r.status == 200 + + r = HTTP.get("http://localhost:$port/health/live"; status_exception=false) + @test r.status == 503 + finally + close(server) + end + end + + @testset "graceful termination" begin + port = rand(EPHEMERAL_PORT_RANGE) + code = quote + using K8sDeputy, Sockets + + shutdown_handler() = @info "SHUTDOWN HANDLER" + atexit(() -> @info "SHUTDOWN COMPLETE") + + deputy = Deputy(; shutdown_handler) + graceful_terminator() do + @info "GRACEFUL TERMINATION HANDLER" + shutdown(deputy) + return nothing + end + K8sDeputy.serve!(deputy, Sockets.localhost, $port) + readied(deputy) + sleep(60) + end + + cmd = `$(Base.julia_cmd()) --color=no -e $code` + buffer = IOBuffer() + p = run(pipeline(cmd; stdout=buffer, stderr=buffer); wait=false) + @test timedwait(() -> process_running(p), Second(5)) === :ok + @test timedwait(Second(10)) do + r = HTTP.get("http://localhost:$port/health/ready"; status_exception=false) + return r.status == 200 + end === :ok + + graceful_terminate(getpid(p)) # Blocks untils the HTTP server goes down + @test process_exited(p) + @test p.exitcode == 1 + + output = String(take!(buffer)) + expected = """ + [ Info: Listening on: 127.0.0.1:$port, thread id: 1 + [ Info: GRACEFUL TERMINATION HANDLER + [ Info: SHUTDOWN HANDLER + [ Info: SHUTDOWN COMPLETE + """ + @test output == expected + end +end diff --git a/test/runtests.jl b/test/runtests.jl new file mode 100644 index 0000000..38f87c9 --- /dev/null +++ b/test/runtests.jl @@ -0,0 +1,22 @@ +using Aqua: Aqua +using Dates: Second +using K8sDeputy +using K8sDeputy: ready_endpoint, live_endpoint, serve! +using HTTP: HTTP +using Mocking: Mocking, @mock, @patch, apply +using Sockets: localhost +using Test + +# https://en.wikipedia.org/wiki/Ephemeral_port +const EPHEMERAL_PORT_RANGE = 49152:65535 + +Mocking.activate() + +@testset "K8sDeputy.jl" begin + @testset "Aqua" begin + Aqua.test_all(K8sDeputy; ambiguities=false) + end + + include("graceful_termination.jl") + include("health.jl") +end From 0b702cea2a53de81215bd4ee02c5ffcbf24ca316 Mon Sep 17 00:00:00 2001 From: Curtis Vogt Date: Mon, 4 Mar 2024 14:30:39 -0600 Subject: [PATCH 02/27] Add GitHub workflows --- .github/workflows/CI.yaml | 59 ++++++++++++++++++++++++ .github/workflows/DocPreviewCleanup.yaml | 39 ++++++++++++++++ .github/workflows/Documenter.yaml | 58 +++++++++++++++++++++++ .github/workflows/FormatCheck.yaml | 46 ++++++++++++++++++ 4 files changed, 202 insertions(+) create mode 100644 .github/workflows/CI.yaml create mode 100644 .github/workflows/DocPreviewCleanup.yaml create mode 100644 .github/workflows/Documenter.yaml create mode 100644 .github/workflows/FormatCheck.yaml diff --git a/.github/workflows/CI.yaml b/.github/workflows/CI.yaml new file mode 100644 index 0000000..5c0dede --- /dev/null +++ b/.github/workflows/CI.yaml @@ -0,0 +1,59 @@ +--- +name: CI +on: + workflow_dispatch: + push: + branches: + - main + tags: ["*"] + paths: + - "src/**" + - "test/**" + - "Project.toml" + - ".github/workflows/CI.yaml" + pull_request: + paths: + - "src/**" + - "test/**" + - "Project.toml" + - ".github/workflows/CI.yaml" +concurrency: + # Skip intermediate builds: always. + # Cancel intermediate builds: only if it is a pull request build. + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }} +jobs: + test: + name: Julia ${{ matrix.version }} - ${{ matrix.runs-on }} - ${{ matrix.arch }} - ${{ matrix.threads}} threads + # These permissions are needed to: + # - Delete old caches: https://github.com/julia-actions/cache#cache-retention + permissions: + actions: write + contents: read + runs-on: ${{ matrix.runs-on }} + strategy: + fail-fast: false + matrix: + version: + - "1.6" # Earliest version of Julia that the package is compatible with + - "1" # Latest Julia release + runs-on: + - ubuntu-latest + arch: + - x64 + threads: + - 1 + env: + JULIA_NUM_THREADS: ${{ matrix.threads }} + steps: + - uses: actions/checkout@v4 + - uses: julia-actions/setup-julia@v1 + with: + version: ${{ matrix.version }} + arch: ${{ matrix.arch }} + - uses: julia-actions/cache@v1 + - uses: julia-actions/julia-runtest@v1 + - uses: julia-actions/julia-processcoverage@v1 + - uses: codecov/codecov-action@v3 + with: + file: lcov.info diff --git a/.github/workflows/DocPreviewCleanup.yaml b/.github/workflows/DocPreviewCleanup.yaml new file mode 100644 index 0000000..bee2b01 --- /dev/null +++ b/.github/workflows/DocPreviewCleanup.yaml @@ -0,0 +1,39 @@ +--- +# remove PR previews once they're merged +# +name: Doc Preview Cleanup +on: + pull_request: + types: [closed] + +# Ensure that only one "Doc Preview Cleanup" workflow is force pushing at a time +concurrency: + group: doc-preview-cleanup + cancel-in-progress: false + +jobs: + doc-preview-cleanup: + runs-on: ubuntu-latest + permissions: + contents: write + env: + PR: ${{ github.event.number }} + steps: + - name: Checkout gh-pages branch + uses: actions/checkout@v4 + with: + ref: gh-pages + - name: Delete preview and history + push changes + run: | + preview_dir="previews/PR${PR?}" + if [ -d "${preview_dir}" ]; then + # Delete preview directory created by this PR + git rm -rf "${preview_dir}" + + # Commit the removed preview directories and truncate history + git config user.name "Documenter.jl" + git config user.email "documenter@juliadocs.github.io" + git commit -m "delete preview" + git branch gh-pages-new $(echo "squash history" | git commit-tree HEAD^{tree}) + git push --force origin gh-pages-new:gh-pages + fi diff --git a/.github/workflows/Documenter.yaml b/.github/workflows/Documenter.yaml new file mode 100644 index 0000000..ad2b5f2 --- /dev/null +++ b/.github/workflows/Documenter.yaml @@ -0,0 +1,58 @@ +--- +name: Documenter +on: + workflow_dispatch: + push: + tags: ["*"] + branches: + - main + paths: + - "docs/**" + - "src/**" + - "Project.toml" + - ".github/workflows/Documenter.yaml" + pull_request: + paths: + - "docs/**" + - "src/**" + - "Project.toml" + - ".github/workflows/Documenter.yaml" + - ".github/workflows/DocPreviewCleanup.yaml" +jobs: + docs: + name: Build + # These permissions are needed to: + # - Delete old caches: https://github.com/julia-actions/cache#usage + permissions: + actions: write + contents: read + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: julia-actions/setup-julia@v1 + with: + version: "1" + show-versioninfo: true + - uses: julia-actions/cache@v1 + - name: Install dependencies + shell: julia --project=docs --color=yes {0} + run: | + using Pkg + Pkg.develop(PackageSpec(path=pwd())) + Pkg.instantiate() + - name: Build docs + uses: julia-actions/julia-docdeploy@v1 + with: + install-package: false # Avoid instantiating twice + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Preview URL + if: ${{ github.event_name == 'pull_request' }} + run: | + repo_owner="${repo%/*}" # e.g. JuliaLang + repo_name="${repo#*/}" # e.g. Example.jl + echo ":books: Documentation preview available at:" | tee -a "$GITHUB_STEP_SUMMARY" + echo "" | tee -a "$GITHUB_STEP_SUMMARY" + env: + repo: ${{ github.repository }} # e.g. JuliaLang/Example.jl + PR: ${{ github.event.number }} diff --git a/.github/workflows/FormatCheck.yaml b/.github/workflows/FormatCheck.yaml new file mode 100644 index 0000000..6b6df9b --- /dev/null +++ b/.github/workflows/FormatCheck.yaml @@ -0,0 +1,46 @@ +--- +name: Format Check +on: + push: + branches: + - main + tags: ["*"] + paths: + - "**/*.jl" + - ".github/workflows/FormatCheck.yaml" + pull_request: + paths: + - "**/*.jl" + - ".github/workflows/FormatCheck.yml" +jobs: + format-check: + name: Julia + # These permissions are needed to: + # - Delete old caches: https://github.com/julia-actions/cache#usage + # - Post formatting suggestions: https://github.com/reviewdog/action-suggester#required-permissions + permissions: + actions: write + contents: read + pull-requests: write + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: julia-actions/setup-julia@v1 + with: + version: "1" + - uses: julia-actions/cache@v1 + - name: Install JuliaFormatter + shell: julia --project=@format --color=yes {0} + run: | + using Pkg + Pkg.add(PackageSpec(; name="JuliaFormatter", version="1")) + - name: Check formatting + shell: julia --project=@format --color=yes {0} + run: | + using JuliaFormatter + format("."; verbose=true) || exit(1) + # Add formatting suggestions to non-draft PRs even if when "Check formatting" fails + - uses: reviewdog/action-suggester@v1 + if: ${{ !cancelled() && github.event_name == 'pull_request' && github.event.pull_request.draft == false }} + with: + tool_name: JuliaFormatter From 3d1b7831b8acfe82aace80285ba03b5449825c33 Mon Sep 17 00:00:00 2001 From: Curtis Vogt Date: Mon, 4 Mar 2024 15:52:55 -0600 Subject: [PATCH 03/27] Rename endpoint functions --- src/health.jl | 4 ++-- src/server.jl | 4 ++-- test/health.jl | 10 +++++----- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/health.jl b/src/health.jl index e1f60c8..0c0d73d 100644 --- a/src/health.jl +++ b/src/health.jl @@ -40,7 +40,7 @@ function shutdown(deputy::Deputy) return nothing end -function live_endpoint(deputy::Deputy) +function liveness_endpoint(deputy::Deputy) return function (r::HTTP.Request) @debug "liveness probed" return if !deputy.shutting_down @@ -51,7 +51,7 @@ function live_endpoint(deputy::Deputy) end end -function ready_endpoint(deputy::Deputy) +function readiness_endpoint(deputy::Deputy) return function (r::HTTP.Request) @debug "readiness probed" return if deputy.ready diff --git a/src/server.jl b/src/server.jl index b22a59f..d8459d5 100644 --- a/src/server.jl +++ b/src/server.jl @@ -7,8 +7,8 @@ end function serve!(deputy::Deputy, host=localhost, port::Integer=_default_port()) router = HTTP.Router() - HTTP.register!(router, "/health/live", live_endpoint(deputy)) - HTTP.register!(router, "/health/ready", ready_endpoint(deputy)) + HTTP.register!(router, "/health/live", liveness_endpoint(deputy)) + HTTP.register!(router, "/health/ready", readiness_endpoint(deputy)) return HTTP.serve!(router, host, port) end diff --git a/test/health.jl b/test/health.jl index 87f7986..4044397 100644 --- a/test/health.jl +++ b/test/health.jl @@ -20,7 +20,7 @@ end @test !deputy.shutting_down end - @testset "live_endpoint / ready_endpoint" begin + @testset "liveness_endpoint / readiness_endpoint" begin deputy = Deputy() request = HTTP.Request() @@ -28,22 +28,22 @@ end # TODO: Define `==(x::HTTP.Response, y::HTTP.Response)`. deputy.ready = false - r = ready_endpoint(deputy)(request) + r = readiness_endpoint(deputy)(request) @test r.status == 503 @test isempty(String(r.body)) deputy.ready = true - r = ready_endpoint(deputy)(request) + r = readiness_endpoint(deputy)(request) @test r.status == 200 @test isempty(String(r.body)) deputy.shutting_down = false - r = live_endpoint(deputy)(request) + r = liveness_endpoint(deputy)(request) @test r.status == 200 @test isempty(String(r.body)) deputy.shutting_down = true - r = live_endpoint(deputy)(request) + r = liveness_endpoint(deputy)(request) @test r.status == 503 @test isempty(String(r.body)) end From b3cf654bcae5e0261c2e9acdb2283f15ec43a459 Mon Sep 17 00:00:00 2001 From: Curtis Vogt Date: Mon, 4 Mar 2024 16:01:29 -0600 Subject: [PATCH 04/27] Add docstrings to health check functions --- src/health.jl | 32 ++++++++++++++++++++++++++++++++ src/server.jl | 12 ++++++++++++ 2 files changed, 44 insertions(+) diff --git a/src/health.jl b/src/health.jl index 0c0d73d..240fc1e 100644 --- a/src/health.jl +++ b/src/health.jl @@ -5,15 +5,47 @@ mutable struct Deputy shutdown_handler_timeout::Second end +""" + Deputy(; shutdown_handler=nothing, shutdown_handler_timeout::Period=Second(5)) + +Construct an application `Deputy` which provides health check endpoints. + +## Keywords + +- `shutdown_handler` (optional): A zero-argument function which allows the user to provide + a custom callback function for when `shutdown(::Deputy)` is called. +- `shutdown_handler_timeout::Period` (optional): Specifies the maximum execution duration of + a `shutdown_handler`. +""" function Deputy(; shutdown_handler=nothing, shutdown_handler_timeout::Period=Second(5)) return Deputy(false, false, shutdown_handler, shutdown_handler_timeout) end +""" + readied(deputy::Deputy) -> Nothing + +Mark the application as "ready". Sets the readiness endpoint to respond with successful +responses. +""" function readied(deputy::Deputy) deputy.ready = true return nothing end +""" + shutdown(deputy::Deputy) -> Nothing + +Initiates a shutdown of the application by: + +1. Setting the liveness endpoint to respond with failures. +2. Executing the deputy's `shutdown_handler` (if defined). +3. Exiting the current Julia process. + +If a `deputy.shutdown_handler` is defined it must complete within the +`deputy.shutdown_handler_timeout` or a warning will be logged and the Julia process will +immediately exit. Any exceptions that occur in the `deputy.shutdown_handler` will also be +logged and result in the Julia process exiting. +""" function shutdown(deputy::Deputy) # Abend if already shutting down deputy.shutting_down && return nothing diff --git a/src/server.jl b/src/server.jl index d8459d5..ab02675 100644 --- a/src/server.jl +++ b/src/server.jl @@ -5,6 +5,18 @@ function _default_port() return haskey(ENV, name) ? parse(Int, ENV[name]) : DEFAULT_PORT end +""" + K8sDeputy.serve!(deputy::Deputy, [host], [port]) -> HTTP.Server + +Starts a non-blocking `HTTP.Server` responding to requests to `deputy` health checks. The +following health check endpoints are available: + +- `/health/live`: Is the server is alive/running? +- `/health/ready`: Is the server is ready (has `readied(deputy)` been called)? + +These endpoints will respond with HTTP status `200 OK` on success or +`503 Service Unavailable` on failure. +""" function serve!(deputy::Deputy, host=localhost, port::Integer=_default_port()) router = HTTP.Router() HTTP.register!(router, "/health/live", liveness_endpoint(deputy)) From 8b04c15cbd5634c5b153e1411a781de40b3b9f8b Mon Sep 17 00:00:00 2001 From: Curtis Vogt Date: Tue, 5 Mar 2024 13:37:37 -0600 Subject: [PATCH 05/27] Add badges to README --- README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 6ea28a5..d4ff251 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,6 @@ # K8sDeputy.jl -Provides K8s health checks and graceful termination support on behalf of Julia services + +[![docs](https://img.shields.io/badge/docs-dev-blue.svg)](https://beacon-biosignals.github.io/K8sDeputy/dev) +[![CI](https://github.com/beacon-biosignals/K8sDeputy.jl/actions/workflows/CI.yml/badge.svg)](https://github.com/beacon-biosignals/K8sDeputy.jl/actions/workflows/CI.yml) + +Provides K8s health checks and graceful termination support on behalf of Julia services. From 754f1170ccd973463e3781513aa8cf61f0f2233a Mon Sep 17 00:00:00 2001 From: Curtis Vogt Date: Tue, 5 Mar 2024 13:38:43 -0600 Subject: [PATCH 06/27] Add manual --- .gitignore | 1 + docs/Project.toml | 6 ++ docs/make.jl | 17 +++++ docs/src/api.md | 10 +++ docs/src/graceful_termination.md | 68 +++++++++++++++++++ docs/src/health_checks.md | 112 +++++++++++++++++++++++++++++++ docs/src/index.md | 6 ++ 7 files changed, 220 insertions(+) create mode 100644 docs/Project.toml create mode 100644 docs/make.jl create mode 100644 docs/src/api.md create mode 100644 docs/src/graceful_termination.md create mode 100644 docs/src/health_checks.md create mode 100644 docs/src/index.md diff --git a/.gitignore b/.gitignore index ba39cc5..06ba5eb 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ +docs/build Manifest.toml diff --git a/docs/Project.toml b/docs/Project.toml new file mode 100644 index 0000000..95d1b1a --- /dev/null +++ b/docs/Project.toml @@ -0,0 +1,6 @@ +[deps] +Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" +K8sDeputy = "2481ae95-212f-4650-bb21-d53ea3caf09f" + +[compat] +Documenter = "1.0.0" diff --git a/docs/make.jl b/docs/make.jl new file mode 100644 index 0000000..5708d95 --- /dev/null +++ b/docs/make.jl @@ -0,0 +1,17 @@ +using K8sDeputy +using Documenter + +pages = ["Home" => "index.md", + "Health Checks" => "health_checks.md", + "Graceful Termination" => "graceful_termination.md", + "API" => "api.md"] + +makedocs(; modules=[K8sDeputy], + format=Documenter.HTML(; prettyurls=get(ENV, "CI", nothing) == "true"), + sitename="K8sDeputy.jl", + authors="Beacon Biosignals", + pages) + +deploydocs(; repo="github.com/beacon-biosignals/K8sDeputy.jl.git", + push_preview=true, + devbranch="main") diff --git a/docs/src/api.md b/docs/src/api.md new file mode 100644 index 0000000..ef48f3e --- /dev/null +++ b/docs/src/api.md @@ -0,0 +1,10 @@ +# API + +```@docs +Deputy +K8sDeputy.serve! +readied +shutdown +graceful_terminator +graceful_terminate +``` diff --git a/docs/src/graceful_termination.md b/docs/src/graceful_termination.md new file mode 100644 index 0000000..f498bf6 --- /dev/null +++ b/docs/src/graceful_termination.md @@ -0,0 +1,68 @@ +# Graceful Termination + +Kubernetes (K8s) applications are expected to handle [graceful termination](https://cloud.google.com/blog/products/containers-kubernetes/kubernetes-best-practices-terminating-with-grace). Typically, +applications will initiate a graceful termination by handling the `TERM` signal when a K8s pod is to be terminated. + +At this point in time [Julia does not provide user-definable signal handlers](https://github.com/JuliaLang/julia/issues/14675) and the internal Julia signal handler for `TERM` results in the process reporting the signal (with a stack trace) to standard error and exiting. Julia provides users with [`atexit`](https://docs.julialang.org/en/v1/base/base/#Base.atexit) to define callbacks when Julia is terminating but unfortunately this callback system only allows for trivial actions to occur when Julia is shutdown due to handling the `TERM` signal. + +These limitations resulted in K8sDeputy.jl providing an alternative path for handling graceful termination Julia processes. This avoids logging unnecessary error messages and also provides a reliable shutdown callback system for graceful termination. + +## Interface + +The K8sDeputy.jl package provides the `graceful_terminator` function for registering a single user callback upon receiving a graceful termination event. The `graceful_terminate` function can be used from another Julia process to terminate the `graceful_terminator` caller process. For example run the following code in an interactive Julia REPL: + +```julia +using K8sDeputy +graceful_terminator(() -> (@info "Gracefully terminating..."; exit())) +``` + +In another terminal run the following code to initiate graceful termination: + +```sh +julia -e 'using K8sDeputy; graceful_terminate()' +``` + +Once `graceful_terminate` has been called the first process will: execute the callback, log the message, and exit the Julia process. + +!!! note + + By default the `graceful_terminator` function registers the caller Julia process as the "entrypoint" Julia process. Primarily, this allows for out-of-the-box support for Julia + applications running as non-[init](https://en.wikipedia.org/wiki/Init) processes but only allows one Julia process to be defined as the "entrypoint". If you require multiple Julia processes within to support graceful termination concurrently you can use `set_entrypoint=false` (e.g. `graceful_terminator(...; set_entrypoint=false)`) and pass in the target process ID to `graceful_terminate`. + +## Deputy Integration + +The `graceful_terminator` function can be combined with the deputy's `shutdown` function to allow graceful termination of the application and the deputy: + +```julia +using K8sDeputy +deputy = Deputy(; shutdown_handler=() -> @info "Shutting down") +server = K8sDeputy.serve!(deputy, "0.0.0.0") +graceful_terminator(() -> shutdown(deputy)) + +# Application code +``` + +## Kubernetes Setup + +To configure your K8s container resource to call `graceful_terminate` when terminating you can configure a [`preStop` hook](https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks): + +```yaml +apiVersion: v1 +kind: Pod +spec: + containers: + - name: app + # command: ["/bin/sh", "-c", "julia entrypoint.jl; sleep 1"] + lifecycle: + preStop: + exec: + command: ["julia", "-e", "using K8sDeputy; graceful_terminate()"] + # terminationGracePeriodSeconds: 30 +``` + +!!! note + + Applications with slow shutdown callbacks may want to consider specifying `terminationGracePeriodSeconds` which specifies the maximum duration a pod can take when gracefully terminating. Once the timeout is reached the processes running in the pod are forcibly halted with a `KILL` signal. + +Finally, the entrypoint for the container should also not directly use the Julia as [init](https://en.wikipedia.org/wiki/Init) process (PID 1). Instead, users should define their entrypoint similarly to +`["/bin/sh", "-c", "julia entrypoint.jl; sleep 1"]` as this allows the both the Julia process and the `preStop` process to cleanly terminate. diff --git a/docs/src/health_checks.md b/docs/src/health_checks.md new file mode 100644 index 0000000..6cdc899 --- /dev/null +++ b/docs/src/health_checks.md @@ -0,0 +1,112 @@ +# Health Checks + +K8sDeputy.jl provides the following health endpoints: + +- `/health/live` +- `/health/ready` + +These endpoints respond with HTTP status `200 OK` on success or `503 Service Unavailable` on failure. + +## Supporting liveness probes + +In order to enable [liveness probes](https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-a-liveness-command) you will need to start the K8sDeputy health check server from within your application: + +```julia +using K8sDeputy +deputy = Deputy() +K8sDeputy.serve!(deputy, "0.0.0.0") + +# Application code +``` + +!!! note + + We specify the HTTP service to listen to all addresses (i.e. `0.0.0.0`) on the container as the K8s [kubelet](https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/) which uses the `livenessProbe` executes the requests from outside of the container. + +Once `K8sDeputy.serve!` has been called the HTTP based liveness endpoint should now return successful responses. + +Probe requests prior to running `K8sDeputy.serve!` will return failure responses. Application developers should consider starting the health check endpoints before running slow application initialization code. Alternatively, an [`initialDelaySeconds`](https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#configure-probes) can be added to the `livenessProbe`. + +You'll also need to configure your K8s container resource to specify the `livenessProbe`. For example here's a partial manifest for a K8s pod: + +```yaml +apiVersion: v1 +kind: Pod +spec: + containers: + - name: app + ports: + - name: health-check + containerPort: 8081 # The default K8sDeputy.jl heath check port + protocol: TCP + livenessProbe: + httpGet: + path: /health/live + port: health-check + timeoutSeconds: 5 +``` + +!!!note + + K8s probes require that applications must respond to the probe requests in under `timeoutSeconds` (defaults to 1 second). Since Julia's HTTP.jl server can be unresponsive we recommend using a `timeoutSeconds` of at least 5 seconds. + +## Supporting readiness probes + +Enabling [readiness probes](https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-readiness-probes) is similar to [enabling the liveness probes](#supporting-liveness-probes) but requires an call to `readied`: + +```julia +using K8sDeputy +deputy = Deputy() +K8sDeputy.serve!(deputy, "0.0.0.0") + +# Application initialization code + +readied(deputy) + +# Application code +``` + +When you application is ready you should declare your application as "readied". Doing this causes the readiness endpoint to start returning successful responses. For K8s applications responding to network traffic this endpoint is critical for ensuring timely responses to external requests. Although, defining `readied` for non-network based applications is optional it can still be useful for administration/monitoring. + +To configure your K8s container resource with a readiness probe you'll need to declare a `readinessProbe` in your manifest. For example here's a partial manifest for a K8s pod: + +```yaml +apiVersion: v1 +kind: Pod +spec: + containers: + - name: app + ports: + - name: health-check + containerPort: 8081 # Default K8sDeputy.jl heath check port + protocol: TCP + readinessProbe: + httpGet: + path: /health/ready + port: health-check + timeoutSeconds: 5 +``` + +## Shutdown + +When it is time to shutdown your application you should inform the deputy by running the `shutdown` function: + +```julia +using K8sDeputy +deputy = Deputy(; shutdown_handler=() -> @info "Shutting down") +K8sDeputy.serve!(deputy, "0.0.0.0") + +try + # Application code +finally + shutdown(deputy) +end +``` + +Once `shutdown` is called the following occur: + +1. The liveness endpoint starts returning failure responses +2. The deputy's `shutdown_handler` is called +3. The Julia process is terminated + +By default the `shutdown_handler` only has 5 seconds to complete. If your `shutdown_handler` requires more time to execute you can change the timeout by using the keyword `shutdown_handler_timeout`. diff --git a/docs/src/index.md b/docs/src/index.md new file mode 100644 index 0000000..d4ff251 --- /dev/null +++ b/docs/src/index.md @@ -0,0 +1,6 @@ +# K8sDeputy.jl + +[![docs](https://img.shields.io/badge/docs-dev-blue.svg)](https://beacon-biosignals.github.io/K8sDeputy/dev) +[![CI](https://github.com/beacon-biosignals/K8sDeputy.jl/actions/workflows/CI.yml/badge.svg)](https://github.com/beacon-biosignals/K8sDeputy.jl/actions/workflows/CI.yml) + +Provides K8s health checks and graceful termination support on behalf of Julia services. From 26a9d9b7a845b33e21c28b8aa2d372f8765615eb Mon Sep 17 00:00:00 2001 From: Curtis Vogt Date: Tue, 5 Mar 2024 13:44:34 -0600 Subject: [PATCH 07/27] Add both stable/dev docs badges --- README.md | 3 ++- docs/src/index.md | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index d4ff251..b971ab5 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ # K8sDeputy.jl -[![docs](https://img.shields.io/badge/docs-dev-blue.svg)](https://beacon-biosignals.github.io/K8sDeputy/dev) [![CI](https://github.com/beacon-biosignals/K8sDeputy.jl/actions/workflows/CI.yml/badge.svg)](https://github.com/beacon-biosignals/K8sDeputy.jl/actions/workflows/CI.yml) +[![Stable Documentation](https://img.shields.io/badge/docs-stable-blue.svg)](https://beacon-biosignals.github.io/K8sDeputy.jl/stable) +[![Dev Documentation](https://img.shields.io/badge/docs-dev-blue.svg)](https://beacon-biosignals.github.io/K8sDeputy.jl/dev) Provides K8s health checks and graceful termination support on behalf of Julia services. diff --git a/docs/src/index.md b/docs/src/index.md index d4ff251..b971ab5 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -1,6 +1,7 @@ # K8sDeputy.jl -[![docs](https://img.shields.io/badge/docs-dev-blue.svg)](https://beacon-biosignals.github.io/K8sDeputy/dev) [![CI](https://github.com/beacon-biosignals/K8sDeputy.jl/actions/workflows/CI.yml/badge.svg)](https://github.com/beacon-biosignals/K8sDeputy.jl/actions/workflows/CI.yml) +[![Stable Documentation](https://img.shields.io/badge/docs-stable-blue.svg)](https://beacon-biosignals.github.io/K8sDeputy.jl/stable) +[![Dev Documentation](https://img.shields.io/badge/docs-dev-blue.svg)](https://beacon-biosignals.github.io/K8sDeputy.jl/dev) Provides K8s health checks and graceful termination support on behalf of Julia services. From 48ba8566baec4f8fe329a6ad6a5d9aabd88524f7 Mon Sep 17 00:00:00 2001 From: Curtis Vogt Date: Tue, 5 Mar 2024 13:44:54 -0600 Subject: [PATCH 08/27] Specify YAS formatting --- .JuliaFormatter.toml | 1 + README.md | 1 + docs/src/index.md | 1 + 3 files changed, 3 insertions(+) create mode 100644 .JuliaFormatter.toml diff --git a/.JuliaFormatter.toml b/.JuliaFormatter.toml new file mode 100644 index 0000000..857c3ae --- /dev/null +++ b/.JuliaFormatter.toml @@ -0,0 +1 @@ +style = "yas" diff --git a/README.md b/README.md index b971ab5..1552f28 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ # K8sDeputy.jl [![CI](https://github.com/beacon-biosignals/K8sDeputy.jl/actions/workflows/CI.yml/badge.svg)](https://github.com/beacon-biosignals/K8sDeputy.jl/actions/workflows/CI.yml) +[![Code Style: YASGuide](https://img.shields.io/badge/code%20style-yas-violet.svg)](https://github.com/jrevels/YASGuide) [![Stable Documentation](https://img.shields.io/badge/docs-stable-blue.svg)](https://beacon-biosignals.github.io/K8sDeputy.jl/stable) [![Dev Documentation](https://img.shields.io/badge/docs-dev-blue.svg)](https://beacon-biosignals.github.io/K8sDeputy.jl/dev) diff --git a/docs/src/index.md b/docs/src/index.md index b971ab5..1552f28 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -1,6 +1,7 @@ # K8sDeputy.jl [![CI](https://github.com/beacon-biosignals/K8sDeputy.jl/actions/workflows/CI.yml/badge.svg)](https://github.com/beacon-biosignals/K8sDeputy.jl/actions/workflows/CI.yml) +[![Code Style: YASGuide](https://img.shields.io/badge/code%20style-yas-violet.svg)](https://github.com/jrevels/YASGuide) [![Stable Documentation](https://img.shields.io/badge/docs-stable-blue.svg)](https://beacon-biosignals.github.io/K8sDeputy.jl/stable) [![Dev Documentation](https://img.shields.io/badge/docs-dev-blue.svg)](https://beacon-biosignals.github.io/K8sDeputy.jl/dev) From e7b16a33c2dd4b03ee2c61c44388e43f96e55a7a Mon Sep 17 00:00:00 2001 From: Curtis Vogt Date: Tue, 5 Mar 2024 13:46:00 -0600 Subject: [PATCH 09/27] fixup! Rename endpoint functions --- test/runtests.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/runtests.jl b/test/runtests.jl index 38f87c9..af18fe9 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,7 +1,7 @@ using Aqua: Aqua using Dates: Second using K8sDeputy -using K8sDeputy: ready_endpoint, live_endpoint, serve! +using K8sDeputy: readiness_endpoint, liveness_endpoint, serve! using HTTP: HTTP using Mocking: Mocking, @mock, @patch, apply using Sockets: localhost From 8634d3b4cfdaae5124007afe977548fd630907c9 Mon Sep 17 00:00:00 2001 From: Curtis Vogt Date: Tue, 5 Mar 2024 13:49:46 -0600 Subject: [PATCH 10/27] Reference localhost more --- test/health.jl | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/test/health.jl b/test/health.jl index 4044397..3326992 100644 --- a/test/health.jl +++ b/test/health.jl @@ -163,28 +163,28 @@ end server = serve!(deputy, localhost, port) try - r = HTTP.get("http://localhost:$port/health/ready"; status_exception=false) + r = HTTP.get("http://$localhost:$port/health/ready"; status_exception=false) @test r.status == 503 - r = HTTP.get("http://localhost:$port/health/live") + r = HTTP.get("http://$localhost:$port/health/live") @test r.status == 200 readied(deputy) - r = HTTP.get("http://localhost:$port/health/ready") + r = HTTP.get("http://$localhost:$port/health/ready") @test r.status == 200 - r = HTTP.get("http://localhost:$port/health/live") + r = HTTP.get("http://$localhost:$port/health/live") @test r.status == 200 # Faking shutting down. Normal usage would call `shutdown` but we don't want to # terminate our test process. deputy.shutting_down = true - r = HTTP.get("http://localhost:$port/health/ready") + r = HTTP.get("http://$localhost:$port/health/ready") @test r.status == 200 - r = HTTP.get("http://localhost:$port/health/live"; status_exception=false) + r = HTTP.get("http://$localhost:$port/health/live"; status_exception=false) @test r.status == 503 finally close(server) @@ -215,7 +215,7 @@ end p = run(pipeline(cmd; stdout=buffer, stderr=buffer); wait=false) @test timedwait(() -> process_running(p), Second(5)) === :ok @test timedwait(Second(10)) do - r = HTTP.get("http://localhost:$port/health/ready"; status_exception=false) + r = HTTP.get("http://$localhost:$port/health/ready"; status_exception=false) return r.status == 200 end === :ok @@ -225,7 +225,7 @@ end output = String(take!(buffer)) expected = """ - [ Info: Listening on: 127.0.0.1:$port, thread id: 1 + [ Info: Listening on: $localhost:$port, thread id: 1 [ Info: GRACEFUL TERMINATION HANDLER [ Info: SHUTDOWN HANDLER [ Info: SHUTDOWN COMPLETE From bfb54ef52320c7163a3bc2c73d1bf1eef55f92c7 Mon Sep 17 00:00:00 2001 From: Curtis Vogt Date: Tue, 5 Mar 2024 14:00:58 -0600 Subject: [PATCH 11/27] Document keyword `set_entrypoint` --- src/graceful_termination.jl | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/graceful_termination.jl b/src/graceful_termination.jl index 18f7db2..383383d 100644 --- a/src/graceful_termination.jl +++ b/src/graceful_termination.jl @@ -31,13 +31,21 @@ end uv_kill(pid::Integer, signum::Integer) = ccall(:uv_kill, Cint, (Cint, Cint), pid, signum) """ - graceful_terminator(f) -> Nothing + graceful_terminator(f; set_entrypoint::Bool=true) -> Nothing Register a zero-argument function to be called when `graceful_terminate` is called targeting this process. The user-defined function `f` is expected to call `exit` to terminate the Julia process. The `graceful_terminator` function is only allowed to be called once within a Julia process. +## Keywords + +- `set_entrypoint::Bool` (optional): Sets the calling Julia process as the "entrypoint" to + be targeted by default when running `graceful_terminate` in another Julia process. Users + who want to utilize `graceful_terminator` in multiple Julia processes should use + `set_entrypoint=false` and specify process IDs when calling `graceful_terminate`. Defaults + to `true`. + ## Examples ```julia From e6966983eaf90bf4e47c08523c9e0ed98fa72a8f Mon Sep 17 00:00:00 2001 From: Curtis Vogt Date: Tue, 5 Mar 2024 14:02:30 -0600 Subject: [PATCH 12/27] Comment on dead code --- src/graceful_termination.jl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/graceful_termination.jl b/src/graceful_termination.jl index 383383d..d3a74f7 100644 --- a/src/graceful_termination.jl +++ b/src/graceful_termination.jl @@ -119,6 +119,9 @@ Initiates the execution of the `graceful_terminator` user callback in the proces `graceful_terminator` for more details. """ function graceful_terminate(pid::Integer=entrypoint_pid(); wait::Bool=true) + # Note: The follow dead code has been left here purposefully as an example of how to + # view output when running via `preStop`. + # # As K8s doesn't provide a way to view the logs from the `preStop` command you can work # a round this by writing to the STDOUT of the `pid`. Only works while `pid` is running. # https://stackoverflow.com/a/70708744 @@ -126,6 +129,7 @@ function graceful_terminate(pid::Integer=entrypoint_pid(); wait::Bool=true) # println(io, "preStop called") # end + sock = connect(_socket_path("graceful-terminator.$pid")) println(sock, "terminate") close(sock) From 7b40f9984a1963f522f2bb90e5ae8cd20a3c61aa Mon Sep 17 00:00:00 2001 From: Curtis Vogt Date: Tue, 5 Mar 2024 14:04:08 -0600 Subject: [PATCH 13/27] Log invalid graceful terminator requests --- src/graceful_termination.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/graceful_termination.jl b/src/graceful_termination.jl index d3a74f7..95b86c6 100644 --- a/src/graceful_termination.jl +++ b/src/graceful_termination.jl @@ -98,6 +98,8 @@ function graceful_terminator(f; set_entrypoint::Bool=true) @error "User graceful terminator callback failed with exception:\n" * sprint(showerror, e, catch_backtrace()) end + else + @warn "Graceful terminator received an invalid request: \"$request\"" end close(sock) From 75d2a18f3e323dcbc9288bf948d2c3f6898898bf Mon Sep 17 00:00:00 2001 From: Curtis Vogt Date: Tue, 5 Mar 2024 14:31:51 -0600 Subject: [PATCH 14/27] Add argument documentation to `K8sDeputy.serve!` --- src/server.jl | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/server.jl b/src/server.jl index ab02675..cf0b77f 100644 --- a/src/server.jl +++ b/src/server.jl @@ -6,7 +6,7 @@ function _default_port() end """ - K8sDeputy.serve!(deputy::Deputy, [host], [port]) -> HTTP.Server + K8sDeputy.serve!(deputy::Deputy, [host], [port::Integer]) -> HTTP.Server Starts a non-blocking `HTTP.Server` responding to requests to `deputy` health checks. The following health check endpoints are available: @@ -16,6 +16,13 @@ following health check endpoints are available: These endpoints will respond with HTTP status `200 OK` on success or `503 Service Unavailable` on failure. + +## Arguments + +- `host` (optional): The address to listen to for incoming requests. Defaults to + `Sockets.localhost`. +- `port::Integer` (optional): The port to listen on. Defaults to the port number specified + by the environmental variable `DEPUTY_HEALTH_CHECK_PORT`, otherwise `8081`. """ function serve!(deputy::Deputy, host=localhost, port::Integer=_default_port()) router = HTTP.Router() From 507519cd0e403131278c3d0d6881627674ac78b6 Mon Sep 17 00:00:00 2001 From: Curtis Vogt Date: Tue, 5 Mar 2024 14:32:15 -0600 Subject: [PATCH 15/27] Add quickstart guide --- docs/make.jl | 1 + docs/src/graceful_termination.md | 31 ++++++++++++++- docs/src/quickstart.md | 66 ++++++++++++++++++++++++++++++++ 3 files changed, 97 insertions(+), 1 deletion(-) create mode 100644 docs/src/quickstart.md diff --git a/docs/make.jl b/docs/make.jl index 5708d95..040578f 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -2,6 +2,7 @@ using K8sDeputy using Documenter pages = ["Home" => "index.md", + "Quickstart" => "quickstart.md", "Health Checks" => "health_checks.md", "Graceful Termination" => "graceful_termination.md", "API" => "api.md"] diff --git a/docs/src/graceful_termination.md b/docs/src/graceful_termination.md index f498bf6..0ecf8bb 100644 --- a/docs/src/graceful_termination.md +++ b/docs/src/graceful_termination.md @@ -57,7 +57,7 @@ spec: preStop: exec: command: ["julia", "-e", "using K8sDeputy; graceful_terminate()"] - # terminationGracePeriodSeconds: 30 + # terminationGracePeriodSeconds: 30 ``` !!! note @@ -66,3 +66,32 @@ spec: Finally, the entrypoint for the container should also not directly use the Julia as [init](https://en.wikipedia.org/wiki/Init) process (PID 1). Instead, users should define their entrypoint similarly to `["/bin/sh", "-c", "julia entrypoint.jl; sleep 1"]` as this allows the both the Julia process and the `preStop` process to cleanly terminate. + +### Read-only Filesystem + +If you a read-only filesystem on your container you'll need to configure a writeable volume mount for K8sDeputy.jl. The `DEPUTY_IPC_DIR` environmental variable can be used to instruct K8sDeputy.jl where to store the named pipes it creates for interprocess communication: + +```yaml +apiVersion: v1 +kind: Pod +spec: + containers: + - name: app + # command: ["/bin/sh", "-c", "julia entrypoint.jl; sleep 1"] + env: + - name: DEPUTY_IPC_DIR + value: /mnt/deputy-ipc + lifecycle: + preStop: + exec: + command: ["julia", "-e", "using K8sDeputy; graceful_terminate()"] + securityContext: + readOnlyRootFilesystem: true + volumeMounts: + - mountPath: /mnt/deputy-ipc + name: deputy-ipc + volumes: + - name: deputy-ipc + emptyDir: + medium: Memory +``` diff --git a/docs/src/quickstart.md b/docs/src/quickstart.md new file mode 100644 index 0000000..a8b3e45 --- /dev/null +++ b/docs/src/quickstart.md @@ -0,0 +1,66 @@ +# Quickstart + +For users who want to get started quickly you can use the following template to incorporate liveness probes, readiness probes, graceful termination, binding to non-priviledged ports, and read-only filesystem support. + +1. Add K8sDeputy.jl to your Julia project: `Pkg.add("K8sDeputy")` +2. Define the following `entrypoint.jl` in your application and include it in the `WORKDIR` of your `Dockerfile`: + + ```julia + using K8sDeputy + deputy = Deputy() + server = K8sDeputy.serve!(deputy, "0.0.0.0") + graceful_terminator(() -> shutdown(deputy)) + + # Application initialization code + + readied(deputy) + + # Application code + ``` + +3. Incorporate the following changes into your K8s resource manifest: + + ```yaml + apiVersion: v1 + kind: Pod + spec: + containers: + - name: app + command: ["/bin/sh", "-c", "julia entrypoint.jl; sleep 1"] + env: + - name: DEPUTY_HEALTH_CHECK_PORT + value: "44444" + - name: DEPUTY_IPC_DIR + value: /mnt/deputy-ipc + ports: + - name: health-check + containerPort: 44444 # Must match ENV `DEPUTY_HEALTH_CHECK_PORT` + protocol: TCP + livenessProbe: + httpGet: + path: /health/live + port: health-check + timeoutSeconds: 5 + readinessProbe: + httpGet: + path: /health/ready + port: health-check + timeoutSeconds: 5 + lifecycle: + preStop: + exec: + command: ["julia", "-e", "using K8sDeputy; graceful_terminate()"] + securityContext: + capabilities: + drop: + - all + readOnlyRootFilesystem: true + volumeMounts: + - mountPath: /mnt/deputy-ipc + name: deputy-ipc + terminationGracePeriodSeconds: 30 + volumes: + - name: deputy-ipc + emptyDir: + medium: Memory + ``` From a0e26701c8ffda515f748a3ffa99af684607432f Mon Sep 17 00:00:00 2001 From: Curtis Vogt Date: Tue, 5 Mar 2024 14:42:44 -0600 Subject: [PATCH 16/27] Use default K8sDeputy.jl port in quickstart --- docs/src/quickstart.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/docs/src/quickstart.md b/docs/src/quickstart.md index a8b3e45..6289c74 100644 --- a/docs/src/quickstart.md +++ b/docs/src/quickstart.md @@ -28,13 +28,11 @@ For users who want to get started quickly you can use the following template to - name: app command: ["/bin/sh", "-c", "julia entrypoint.jl; sleep 1"] env: - - name: DEPUTY_HEALTH_CHECK_PORT - value: "44444" - name: DEPUTY_IPC_DIR value: /mnt/deputy-ipc ports: - name: health-check - containerPort: 44444 # Must match ENV `DEPUTY_HEALTH_CHECK_PORT` + containerPort: 8081 # Default K8sDeputy.jl heath check port protocol: TCP livenessProbe: httpGet: From c66f6bb4091d131567ab8f89446f03d8173f6621 Mon Sep 17 00:00:00 2001 From: Curtis Vogt Date: Tue, 5 Mar 2024 14:51:09 -0600 Subject: [PATCH 17/27] Formatting --- src/graceful_termination.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/graceful_termination.jl b/src/graceful_termination.jl index 95b86c6..7c9bce9 100644 --- a/src/graceful_termination.jl +++ b/src/graceful_termination.jl @@ -131,7 +131,6 @@ function graceful_terminate(pid::Integer=entrypoint_pid(); wait::Bool=true) # println(io, "preStop called") # end - sock = connect(_socket_path("graceful-terminator.$pid")) println(sock, "terminate") close(sock) From 74af1f94e3d6564dc548d35cb11c9adc6bc0a281 Mon Sep 17 00:00:00 2001 From: Curtis Vogt Date: Tue, 5 Mar 2024 15:21:15 -0600 Subject: [PATCH 18/27] Add LICENSE file --- LICENSE | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 LICENSE diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..df399ac --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2023 Beacon Biosignals, Inc. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file From 84a32deed023aea533aa1c522fa25153591bdd9d Mon Sep 17 00:00:00 2001 From: Curtis Vogt Date: Tue, 5 Mar 2024 15:22:06 -0600 Subject: [PATCH 19/27] fixup! Add LICENSE file --- LICENSE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/LICENSE b/LICENSE index df399ac..3dd4d7e 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2023 Beacon Biosignals, Inc. +Copyright (c) 2024 Beacon Biosignals, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal From 435a121900690fd0c71053074078a0fc0a594be3 Mon Sep 17 00:00:00 2001 From: Curtis Vogt Date: Fri, 8 Mar 2024 09:34:28 -0600 Subject: [PATCH 20/27] Documentation corrections Co-authored-by: Glenn Moynihan --- docs/src/graceful_termination.md | 8 ++++---- docs/src/health_checks.md | 4 ++-- src/graceful_termination.jl | 8 ++++---- src/server.jl | 2 +- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/docs/src/graceful_termination.md b/docs/src/graceful_termination.md index 0ecf8bb..bce115b 100644 --- a/docs/src/graceful_termination.md +++ b/docs/src/graceful_termination.md @@ -27,7 +27,7 @@ Once `graceful_terminate` has been called the first process will: execute the ca !!! note By default the `graceful_terminator` function registers the caller Julia process as the "entrypoint" Julia process. Primarily, this allows for out-of-the-box support for Julia - applications running as non-[init](https://en.wikipedia.org/wiki/Init) processes but only allows one Julia process to be defined as the "entrypoint". If you require multiple Julia processes within to support graceful termination concurrently you can use `set_entrypoint=false` (e.g. `graceful_terminator(...; set_entrypoint=false)`) and pass in the target process ID to `graceful_terminate`. + applications running as non-[init](https://en.wikipedia.org/wiki/Init) processes but only allows one Julia process to be defined as the "entrypoint". If you require multiple Julia processes to support graceful termination concurrently you can use `set_entrypoint=false` (e.g. `graceful_terminator(...; set_entrypoint=false)`) and pass in the target process ID to `graceful_terminate`. ## Deputy Integration @@ -64,12 +64,12 @@ spec: Applications with slow shutdown callbacks may want to consider specifying `terminationGracePeriodSeconds` which specifies the maximum duration a pod can take when gracefully terminating. Once the timeout is reached the processes running in the pod are forcibly halted with a `KILL` signal. -Finally, the entrypoint for the container should also not directly use the Julia as [init](https://en.wikipedia.org/wiki/Init) process (PID 1). Instead, users should define their entrypoint similarly to -`["/bin/sh", "-c", "julia entrypoint.jl; sleep 1"]` as this allows the both the Julia process and the `preStop` process to cleanly terminate. +Finally, the entrypoint for the container should also not directly use the Julia as the [init](https://en.wikipedia.org/wiki/Init) process (PID 1). Instead, users should define their entrypoint similarly to +`["/bin/sh", "-c", "julia entrypoint.jl; sleep 1"]` as this allows both the Julia process and the `preStop` process to cleanly terminate. ### Read-only Filesystem -If you a read-only filesystem on your container you'll need to configure a writeable volume mount for K8sDeputy.jl. The `DEPUTY_IPC_DIR` environmental variable can be used to instruct K8sDeputy.jl where to store the named pipes it creates for interprocess communication: +If you have a read-only filesystem on your container you'll need to configure a writeable volume mount for K8sDeputy.jl. The `DEPUTY_IPC_DIR` environmental variable can be used to instruct K8sDeputy.jl where to store the named pipes it creates for interprocess communication: ```yaml apiVersion: v1 diff --git a/docs/src/health_checks.md b/docs/src/health_checks.md index 6cdc899..7274428 100644 --- a/docs/src/health_checks.md +++ b/docs/src/health_checks.md @@ -66,7 +66,7 @@ readied(deputy) # Application code ``` -When you application is ready you should declare your application as "readied". Doing this causes the readiness endpoint to start returning successful responses. For K8s applications responding to network traffic this endpoint is critical for ensuring timely responses to external requests. Although, defining `readied` for non-network based applications is optional it can still be useful for administration/monitoring. +When your application is ready you should declare your application as "readied". Doing this causes the readiness endpoint to start returning successful responses. For K8s applications responding to network traffic this endpoint is critical for ensuring timely responses to external requests. Although, defining `readied` for non-network based applications is optional it can still be useful for administration/monitoring. To configure your K8s container resource with a readiness probe you'll need to declare a `readinessProbe` in your manifest. For example here's a partial manifest for a K8s pod: @@ -103,7 +103,7 @@ finally end ``` -Once `shutdown` is called the following occur: +Once `shutdown` is called the following occurs: 1. The liveness endpoint starts returning failure responses 2. The deputy's `shutdown_handler` is called diff --git a/src/graceful_termination.jl b/src/graceful_termination.jl index 7c9bce9..aae0c47 100644 --- a/src/graceful_termination.jl +++ b/src/graceful_termination.jl @@ -72,9 +72,9 @@ spec: command: ["julia", "-e", "using $(@__MODULE__()); graceful_terminate()"] ``` -Additionally, the entrypoint for the container should also not directly use the Julia -as init process (PID 1). Instead, users should define their entrypoint similarly to -`["/bin/sh", "-c", "julia entrypoint.jl; sleep 1"]` as this allows the both the Julia +Additionally, the entrypoint for the container should also not directly use the Julia process +as the init process (PID 1). Instead, users should define their entrypoint similarly to +`["/bin/sh", "-c", "julia entrypoint.jl; sleep 1"]` as this allows for both the Julia process and the `preStop` process to cleanly terminate. """ function graceful_terminator(f; set_entrypoint::Bool=true) @@ -135,7 +135,7 @@ function graceful_terminate(pid::Integer=entrypoint_pid(); wait::Bool=true) println(sock, "terminate") close(sock) - # Wait for the `pid` to complete. We must to block here as otherwise K8s sends a + # Wait for the `pid` to complete. We must block here as otherwise K8s sends a # `TERM` signal immediately after the `preStop` completes. If we fail to wait the # Julia process won't have a chance to perform a "clean" shutdown. If the Julia process # takes longer than `terminationGracePeriodSeconds` to stop then K8s will forcefully diff --git a/src/server.jl b/src/server.jl index cf0b77f..4703ab0 100644 --- a/src/server.jl +++ b/src/server.jl @@ -12,7 +12,7 @@ Starts a non-blocking `HTTP.Server` responding to requests to `deputy` health ch following health check endpoints are available: - `/health/live`: Is the server is alive/running? -- `/health/ready`: Is the server is ready (has `readied(deputy)` been called)? +- `/health/ready`: Is the server ready (has `readied(deputy)` been called)? These endpoints will respond with HTTP status `200 OK` on success or `503 Service Unavailable` on failure. From 98af4cff2fc3f9758d860f79d553426456559274 Mon Sep 17 00:00:00 2001 From: Curtis Vogt Date: Fri, 8 Mar 2024 10:04:30 -0600 Subject: [PATCH 21/27] Indicate mutating health functions --- docs/src/api.md | 4 ++-- docs/src/graceful_termination.md | 4 ++-- docs/src/health_checks.md | 12 ++++++------ docs/src/quickstart.md | 4 ++-- src/K8sDeputy.jl | 2 +- src/health.jl | 10 +++++----- test/health.jl | 20 ++++++++++---------- 7 files changed, 28 insertions(+), 28 deletions(-) diff --git a/docs/src/api.md b/docs/src/api.md index ef48f3e..06460bc 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -3,8 +3,8 @@ ```@docs Deputy K8sDeputy.serve! -readied -shutdown +readied! +shutdown! graceful_terminator graceful_terminate ``` diff --git a/docs/src/graceful_termination.md b/docs/src/graceful_termination.md index bce115b..3a1d1de 100644 --- a/docs/src/graceful_termination.md +++ b/docs/src/graceful_termination.md @@ -31,13 +31,13 @@ Once `graceful_terminate` has been called the first process will: execute the ca ## Deputy Integration -The `graceful_terminator` function can be combined with the deputy's `shutdown` function to allow graceful termination of the application and the deputy: +The `graceful_terminator` function can be combined with the deputy's `shutdown!` function to allow graceful termination of the application and the deputy: ```julia using K8sDeputy deputy = Deputy(; shutdown_handler=() -> @info "Shutting down") server = K8sDeputy.serve!(deputy, "0.0.0.0") -graceful_terminator(() -> shutdown(deputy)) +graceful_terminator(() -> shutdown!(deputy)) # Application code ``` diff --git a/docs/src/health_checks.md b/docs/src/health_checks.md index 7274428..31d775b 100644 --- a/docs/src/health_checks.md +++ b/docs/src/health_checks.md @@ -52,7 +52,7 @@ spec: ## Supporting readiness probes -Enabling [readiness probes](https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-readiness-probes) is similar to [enabling the liveness probes](#supporting-liveness-probes) but requires an call to `readied`: +Enabling [readiness probes](https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-readiness-probes) is similar to [enabling the liveness probes](#supporting-liveness-probes) but requires an call to `readied!`: ```julia using K8sDeputy @@ -61,12 +61,12 @@ K8sDeputy.serve!(deputy, "0.0.0.0") # Application initialization code -readied(deputy) +readied!(deputy) # Application code ``` -When your application is ready you should declare your application as "readied". Doing this causes the readiness endpoint to start returning successful responses. For K8s applications responding to network traffic this endpoint is critical for ensuring timely responses to external requests. Although, defining `readied` for non-network based applications is optional it can still be useful for administration/monitoring. +When your application is ready you should declare your application as such with `readied!`. Doing this causes the readiness endpoint to start returning successful responses. For K8s applications responding to network traffic this endpoint is critical for ensuring timely responses to external requests. Although, defining `readied!` for non-network based applications is optional it can still be useful for administration/monitoring. To configure your K8s container resource with a readiness probe you'll need to declare a `readinessProbe` in your manifest. For example here's a partial manifest for a K8s pod: @@ -89,7 +89,7 @@ spec: ## Shutdown -When it is time to shutdown your application you should inform the deputy by running the `shutdown` function: +When it is time to shutdown your application you should inform the deputy by running the `shutdown!` function: ```julia using K8sDeputy @@ -99,11 +99,11 @@ K8sDeputy.serve!(deputy, "0.0.0.0") try # Application code finally - shutdown(deputy) + shutdown!(deputy) end ``` -Once `shutdown` is called the following occurs: +Once `shutdown!` is called the following occurs: 1. The liveness endpoint starts returning failure responses 2. The deputy's `shutdown_handler` is called diff --git a/docs/src/quickstart.md b/docs/src/quickstart.md index 6289c74..4704bd2 100644 --- a/docs/src/quickstart.md +++ b/docs/src/quickstart.md @@ -9,11 +9,11 @@ For users who want to get started quickly you can use the following template to using K8sDeputy deputy = Deputy() server = K8sDeputy.serve!(deputy, "0.0.0.0") - graceful_terminator(() -> shutdown(deputy)) + graceful_terminator(() -> shutdown!(deputy)) # Application initialization code - readied(deputy) + readied!(deputy) # Application code ``` diff --git a/src/K8sDeputy.jl b/src/K8sDeputy.jl index 90ea1ef..16daeea 100644 --- a/src/K8sDeputy.jl +++ b/src/K8sDeputy.jl @@ -5,7 +5,7 @@ using HTTP: HTTP using Mocking using Sockets: accept, connect, listen, localhost -export Deputy, graceful_terminator, readied, shutdown, graceful_terminate +export Deputy, graceful_terminator, readied!, shutdown!, graceful_terminate include("graceful_termination.jl") include("health.jl") diff --git a/src/health.jl b/src/health.jl index 240fc1e..af9771c 100644 --- a/src/health.jl +++ b/src/health.jl @@ -22,22 +22,22 @@ function Deputy(; shutdown_handler=nothing, shutdown_handler_timeout::Period=Sec end """ - readied(deputy::Deputy) -> Nothing + readied!(deputy::Deputy) -> Nothing Mark the application as "ready". Sets the readiness endpoint to respond with successful responses. """ -function readied(deputy::Deputy) +function readied!(deputy::Deputy) deputy.ready = true return nothing end """ - shutdown(deputy::Deputy) -> Nothing + shutdown!(deputy::Deputy) -> Nothing Initiates a shutdown of the application by: -1. Setting the liveness endpoint to respond with failures. +1. Mark the application as shutting down ("non-live"). 2. Executing the deputy's `shutdown_handler` (if defined). 3. Exiting the current Julia process. @@ -46,7 +46,7 @@ If a `deputy.shutdown_handler` is defined it must complete within the immediately exit. Any exceptions that occur in the `deputy.shutdown_handler` will also be logged and result in the Julia process exiting. """ -function shutdown(deputy::Deputy) +function shutdown!(deputy::Deputy) # Abend if already shutting down deputy.shutting_down && return nothing deputy.shutting_down = true diff --git a/test/health.jl b/test/health.jl index 3326992..901cf0b 100644 --- a/test/health.jl +++ b/test/health.jl @@ -15,7 +15,7 @@ end @test !deputy.ready @test !deputy.shutting_down - readied(deputy) + readied!(deputy) @test deputy.ready @test !deputy.shutting_down end @@ -49,7 +49,7 @@ end end # Note: If a non-mocked `exit(0)` is called it may appear that all tests have passed. - @testset "shutdown" begin + @testset "shutdown!" begin @testset "default handler" begin deputy = Deputy() @@ -58,7 +58,7 @@ end @test_logs(logs..., apply(exit_patcher(rc)) do @mock atexit(() -> @info "SHUTDOWN COMPLETE") - return shutdown(deputy) + return shutdown!(deputy) end) @test isassigned(rc) @@ -82,7 +82,7 @@ end @test_logs(logs..., apply(exit_patcher(rc)) do @mock atexit(() -> @info "SHUTDOWN COMPLETE") - return shutdown(deputy) + return shutdown!(deputy) end) @test isassigned(rc) @@ -99,7 +99,7 @@ end @test_logs(logs..., apply(exit_patcher(rc)) do @mock atexit(() -> @info "SHUTDOWN COMPLETE") - return shutdown(deputy) + return shutdown!(deputy) end) @test isassigned(rc) @@ -123,7 +123,7 @@ end @test_logs(logs..., apply(exit_patcher(rc)) do @mock atexit(() -> @info "SHUTDOWN COMPLETE") - return shutdown(deputy) + return shutdown!(deputy) end) @test isassigned(rc) @@ -138,7 +138,7 @@ end atexit(() -> @info "SHUTDOWN COMPLETE") deputy = Deputy(; shutdown_handler, shutdown_handler_timeout=Second(1)) - shutdown(deputy) + shutdown!(deputy) end cmd = `$(Base.julia_cmd()) --color=no -e $code` @@ -169,7 +169,7 @@ end r = HTTP.get("http://$localhost:$port/health/live") @test r.status == 200 - readied(deputy) + readied!(deputy) r = HTTP.get("http://$localhost:$port/health/ready") @test r.status == 200 @@ -202,11 +202,11 @@ end deputy = Deputy(; shutdown_handler) graceful_terminator() do @info "GRACEFUL TERMINATION HANDLER" - shutdown(deputy) + shutdown!(deputy) return nothing end K8sDeputy.serve!(deputy, Sockets.localhost, $port) - readied(deputy) + readied!(deputy) sleep(60) end From 351eb9dd92d93170eae52638a77139713bc66a9c Mon Sep 17 00:00:00 2001 From: Curtis Vogt Date: Fri, 8 Mar 2024 10:06:53 -0600 Subject: [PATCH 22/27] Rename `entrypoint_pid(::Integer)` to `set_entrypoint_pid` --- src/graceful_termination.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/graceful_termination.jl b/src/graceful_termination.jl index aae0c47..9d74f1e 100644 --- a/src/graceful_termination.jl +++ b/src/graceful_termination.jl @@ -20,7 +20,7 @@ end # Following the Linux convention for pid files: # https://refspecs.linuxfoundation.org/FHS_3.0/fhs/ch03s15.html entrypoint_pid_file() = joinpath(_deputy_ipc_dir(), "julia-entrypoint.pid") -entrypoint_pid(pid::Integer) = write(entrypoint_pid_file(), string(pid) * "\n") +set_entrypoint_pid(pid::Integer) = write(entrypoint_pid_file(), string(pid) * "\n") function entrypoint_pid() pid_file = entrypoint_pid_file() @@ -78,7 +78,7 @@ as the init process (PID 1). Instead, users should define their entrypoint simil process and the `preStop` process to cleanly terminate. """ function graceful_terminator(f; set_entrypoint::Bool=true) - set_entrypoint && entrypoint_pid(getpid()) + set_entrypoint && set_entrypoint_pid(getpid()) # Utilize UNIX domain sockets for the IPC. Avoid using network sockets here as we don't # want to allow access to this functionality from outside of the localhost. Each process From b3b9eae3eb826edde8e845b5f6a286935ddf837e Mon Sep 17 00:00:00 2001 From: Curtis Vogt Date: Fri, 8 Mar 2024 10:15:55 -0600 Subject: [PATCH 23/27] Document custom exit status --- src/health.jl | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/health.jl b/src/health.jl index af9771c..ff54878 100644 --- a/src/health.jl +++ b/src/health.jl @@ -45,6 +45,9 @@ If a `deputy.shutdown_handler` is defined it must complete within the `deputy.shutdown_handler_timeout` or a warning will be logged and the Julia process will immediately exit. Any exceptions that occur in the `deputy.shutdown_handler` will also be logged and result in the Julia process exiting. + +A `shutdown_handler` may optionally call `exit` if a user wants to specify the exit status. +By default `shutdown!` uses an exit status of `1`. """ function shutdown!(deputy::Deputy) # Abend if already shutting down @@ -66,7 +69,9 @@ function shutdown!(deputy::Deputy) end end - # Shutdown handler's should not call `exit` + # Normally `shutdown!` is responsible for exiting the Julia process. However, a + # user-defined `shutdown_handler` may call `exit` but must do so prior before reaching + # the timeout. @mock exit(1) return nothing From 59ae13f38e7d453bdd152e30f6c339ed4febfc46 Mon Sep 17 00:00:00 2001 From: Curtis Vogt Date: Fri, 8 Mar 2024 10:23:24 -0600 Subject: [PATCH 24/27] fixup! Indicate mutating health functions --- src/graceful_termination.jl | 2 +- src/health.jl | 2 +- src/server.jl | 2 +- test/health.jl | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/graceful_termination.jl b/src/graceful_termination.jl index 9d74f1e..f3605aa 100644 --- a/src/graceful_termination.jl +++ b/src/graceful_termination.jl @@ -50,7 +50,7 @@ Julia process. ```julia app_status = AppStatus() -graceful_terminator(() -> shutdown(app_status)) +graceful_terminator(() -> shutdown!(app_status)) ``` ## Kubernetes Setup diff --git a/src/health.jl b/src/health.jl index ff54878..cebb3b5 100644 --- a/src/health.jl +++ b/src/health.jl @@ -13,7 +13,7 @@ Construct an application `Deputy` which provides health check endpoints. ## Keywords - `shutdown_handler` (optional): A zero-argument function which allows the user to provide - a custom callback function for when `shutdown(::Deputy)` is called. + a custom callback function for when `shutdown!(::Deputy)` is called. - `shutdown_handler_timeout::Period` (optional): Specifies the maximum execution duration of a `shutdown_handler`. """ diff --git a/src/server.jl b/src/server.jl index 4703ab0..66b9a2b 100644 --- a/src/server.jl +++ b/src/server.jl @@ -12,7 +12,7 @@ Starts a non-blocking `HTTP.Server` responding to requests to `deputy` health ch following health check endpoints are available: - `/health/live`: Is the server is alive/running? -- `/health/ready`: Is the server ready (has `readied(deputy)` been called)? +- `/health/ready`: Is the server ready (has `readied!(deputy)` been called)? These endpoints will respond with HTTP status `200 OK` on success or `503 Service Unavailable` on failure. diff --git a/test/health.jl b/test/health.jl index 901cf0b..36227a1 100644 --- a/test/health.jl +++ b/test/health.jl @@ -177,7 +177,7 @@ end r = HTTP.get("http://$localhost:$port/health/live") @test r.status == 200 - # Faking shutting down. Normal usage would call `shutdown` but we don't want to + # Faking shutting down. Normal usage would call `shutdown!` but we don't want to # terminate our test process. deputy.shutting_down = true From 4e7ed3c9cc3b59990f71dc6a90f5bcbb48eda16c Mon Sep 17 00:00:00 2001 From: Curtis Vogt Date: Fri, 8 Mar 2024 14:17:10 -0600 Subject: [PATCH 25/27] Fix note syntax Co-authored-by: Dave Kleinschmidt --- docs/src/health_checks.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/health_checks.md b/docs/src/health_checks.md index 31d775b..d28a207 100644 --- a/docs/src/health_checks.md +++ b/docs/src/health_checks.md @@ -46,7 +46,7 @@ spec: timeoutSeconds: 5 ``` -!!!note +!!! note K8s probes require that applications must respond to the probe requests in under `timeoutSeconds` (defaults to 1 second). Since Julia's HTTP.jl server can be unresponsive we recommend using a `timeoutSeconds` of at least 5 seconds. From 5097c6f58f7cbcac74c132706b05546609105029 Mon Sep 17 00:00:00 2001 From: Curtis Vogt Date: Fri, 8 Mar 2024 14:18:08 -0600 Subject: [PATCH 26/27] Add test for gracefully terminating multiple Julia processes --- test/graceful_termination.jl | 91 ++++++++++++++++++++++++++---------- test/health.jl | 5 +- 2 files changed, 70 insertions(+), 26 deletions(-) diff --git a/test/graceful_termination.jl b/test/graceful_termination.jl index fdb46e6..075ae91 100644 --- a/test/graceful_termination.jl +++ b/test/graceful_termination.jl @@ -1,31 +1,74 @@ -@testset "graceful_terminator / graceful_terminate" begin - code = quote - using K8sDeputy - atexit(() -> @info "SHUTDOWN COMPLETE") - graceful_terminator() do - @info "GRACEFUL TERMINATION HANDLER" - exit(2) - return nothing +@testset "graceful_terminator" begin + @testset "Julia entrypoint" begin + code = quote + using K8sDeputy + atexit(() -> @info "SHUTDOWN COMPLETE") + graceful_terminator() do + @info "GRACEFUL TERMINATION HANDLER" + exit(2) + return nothing + end + sleep(60) end - sleep(60) + + cmd = `$(Base.julia_cmd()) --color=no -e $code` + buffer = IOBuffer() + p = run(pipeline(cmd; stdout=buffer, stderr=buffer); wait=false) + @test timedwait(() -> process_running(p), Second(5)) === :ok + + # Allow some time for Julia to startup and the graceful terminator to be registered. + sleep(3) + + # When no PID is passed in the process ID is read from the Julia entrypoint file. + # Blocks untils the process terminates. + @test graceful_terminate() === nothing + + @test process_exited(p) + @test p.exitcode == 2 + + output = String(take!(buffer)) + expected = """ + [ Info: GRACEFUL TERMINATION HANDLER + [ Info: SHUTDOWN COMPLETE + """ + @test output == expected end - cmd = `$(Base.julia_cmd()) --color=no -e $code` - buffer = IOBuffer() - p = run(pipeline(cmd; stdout=buffer, stderr=buffer); wait=false) - @test timedwait(() -> process_running(p), Second(5)) === :ok + @testset "multiple Julia processes" begin + code = quote + using K8sDeputy + atexit(() -> @info "SHUTDOWN COMPLETE") + graceful_terminator(; set_entrypoint=false) do + @info "GRACEFUL TERMINATION HANDLER" + exit(2) + return nothing + end + sleep(60) + end + + cmd = `$(Base.julia_cmd()) --color=no -e $code` + buffer1 = IOBuffer() + buffer2 = IOBuffer() + p1 = run(pipeline(cmd; stdout=buffer1, stderr=buffer1); wait=false) + p2 = run(pipeline(cmd; stdout=buffer2, stderr=buffer2); wait=false) + @test timedwait(() -> process_running(p1) && process_running(p2), Second(5)) === :ok - # Allow some time for Julia to startup and the graceful terminator to be registered. - sleep(3) + # Allow some time for Julia to startup and the graceful terminator to be registered. + sleep(3) - @test graceful_terminate(getpid(p)) === nothing # Blocks untils the HTTP server goes down - @test process_exited(p) - @test p.exitcode == 2 + # Blocks untils the process terminates + @test graceful_terminate(getpid(p1)) === nothing + @test graceful_terminate(getpid(p2)) === nothing + @test process_exited(p1) + @test process_exited(p2) - output = String(take!(buffer)) - expected = """ - [ Info: GRACEFUL TERMINATION HANDLER - [ Info: SHUTDOWN COMPLETE - """ - @test output == expected + output1 = String(take!(buffer1)) + output2 = String(take!(buffer2)) + expected = """ + [ Info: GRACEFUL TERMINATION HANDLER + [ Info: SHUTDOWN COMPLETE + """ + @test output1 == expected + @test output2 == expected + end end diff --git a/test/health.jl b/test/health.jl index 36227a1..8d45143 100644 --- a/test/health.jl +++ b/test/health.jl @@ -200,7 +200,7 @@ end atexit(() -> @info "SHUTDOWN COMPLETE") deputy = Deputy(; shutdown_handler) - graceful_terminator() do + graceful_terminator(; set_entrypoint=false) do @info "GRACEFUL TERMINATION HANDLER" shutdown!(deputy) return nothing @@ -219,7 +219,8 @@ end return r.status == 200 end === :ok - graceful_terminate(getpid(p)) # Blocks untils the HTTP server goes down + # Blocks untils the process terminates + graceful_terminate(getpid(p)) @test process_exited(p) @test p.exitcode == 1 From 7d4c703be55589c817ac13736fbd274b5ef21c7e Mon Sep 17 00:00:00 2001 From: Curtis Vogt Date: Fri, 8 Mar 2024 14:24:54 -0600 Subject: [PATCH 27/27] Cross reference graceful termination --- docs/src/health_checks.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/src/health_checks.md b/docs/src/health_checks.md index d28a207..a2228cd 100644 --- a/docs/src/health_checks.md +++ b/docs/src/health_checks.md @@ -110,3 +110,5 @@ Once `shutdown!` is called the following occurs: 3. The Julia process is terminated By default the `shutdown_handler` only has 5 seconds to complete. If your `shutdown_handler` requires more time to execute you can change the timeout by using the keyword `shutdown_handler_timeout`. + +Depending on your application you may want to define multiple calls to `shutdown!`. For example you may want to call `shutdown!` from within `graceful_terminator` to enable [graceful termination support](./graceful_termination.md) for you application.