-
Notifications
You must be signed in to change notification settings - Fork 16
fix: add backend startup heartbeat liveness probe #114
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 8 commits
0f70fb0
068564a
011aeaa
c72be6e
d8f8200
8638ba9
cb423aa
b64664e
47f00a0
e43de52
4f4c5c9
15f582b
46e2f13
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,14 +1,20 @@ | ||
| from __future__ import annotations | ||
|
|
||
| import atexit | ||
| import ctypes | ||
| import json | ||
| import os | ||
| import runpy | ||
| import sys | ||
| import threading | ||
| import time | ||
| from pathlib import Path | ||
|
|
||
| BACKEND_DIR = Path(__file__).resolve().parent | ||
| APP_DIR = BACKEND_DIR / "app" | ||
| _WINDOWS_DLL_DIRECTORY_HANDLES: list[object] = [] | ||
| STARTUP_HEARTBEAT_ENV = "ASTRBOT_BACKEND_STARTUP_HEARTBEAT_PATH" | ||
| STARTUP_HEARTBEAT_INTERVAL_SECONDS = 2.0 | ||
|
|
||
|
|
||
| def configure_stdio_utf8() -> None: | ||
|
|
@@ -113,15 +119,96 @@ def preload_windows_runtime_dlls() -> None: | |
| continue | ||
|
|
||
|
|
||
| configure_stdio_utf8() | ||
| configure_windows_dll_search_path() | ||
| preload_windows_runtime_dlls() | ||
| def resolve_startup_heartbeat_path() -> Path | None: | ||
| raw = os.environ.get(STARTUP_HEARTBEAT_ENV, "").strip() | ||
| if not raw: | ||
| return None | ||
| return Path(raw) | ||
|
|
||
|
|
||
| def write_startup_heartbeat( | ||
| path: Path, state: str, *, warn_on_error: bool = False | ||
| ) -> bool: | ||
| try: | ||
| path.parent.mkdir(parents=True, exist_ok=True) | ||
| payload = { | ||
| "pid": os.getpid(), | ||
| "state": state, | ||
| "updated_at_ms": int(time.time() * 1000), | ||
| } | ||
| temp_path = path.with_name(f"{path.name}.tmp") | ||
| temp_path.write_text( | ||
| json.dumps(payload, separators=(",", ":")), | ||
| encoding="utf-8", | ||
| ) | ||
| temp_path.replace(path) | ||
| return True | ||
| except Exception as exc: | ||
| if warn_on_error: | ||
| print( | ||
| f"[startup-heartbeat] failed to write heartbeat to {path}: {exc.__class__.__name__}: {exc}", | ||
| file=sys.stderr, | ||
| ) | ||
| return False | ||
|
|
||
|
|
||
| def heartbeat_loop( | ||
sourcery-ai[bot] marked this conversation as resolved.
Show resolved
Hide resolved
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. issue (complexity): Consider simplifying the heartbeat warning logic and inlining the payload builder to make the heartbeat flow easier to follow and maintain. You can reduce complexity in two small, targeted ways without changing the overall behavior (periodic atomic JSON writes + “stopping” heartbeat). 1. Simplify heartbeat loop warning logicThe current For example, throttle warnings to at most once every 10 seconds: WARNING_THROTTLE_SECONDS = 10.0
def heartbeat_loop(
path: Path, interval_seconds: float, stop_event: threading.Event
) -> None:
last_warning_time: float | None = None
while not stop_event.wait(interval_seconds):
now = time.time()
warn_now = (
last_warning_time is None
or (now - last_warning_time) >= WARNING_THROTTLE_SECONDS
)
ok = write_startup_heartbeat(path, "starting", warn_on_error=warn_now)
if warn_now and not ok:
last_warning_time = nowThis keeps log volume bounded while removing 2. Inline
|
||
| path: Path, interval_seconds: float, stop_event: threading.Event | ||
| ) -> None: | ||
| had_successful_write = False | ||
| warning_emitted = False | ||
|
|
||
| ok = write_startup_heartbeat(path, "starting", warn_on_error=True) | ||
| if ok: | ||
| had_successful_write = True | ||
| else: | ||
| warning_emitted = True | ||
|
|
||
| while not stop_event.wait(interval_seconds): | ||
| warn_now = (not warning_emitted) or (not had_successful_write) | ||
| ok = write_startup_heartbeat(path, "starting", warn_on_error=warn_now) | ||
| if ok: | ||
| had_successful_write = True | ||
| warning_emitted = False | ||
| elif warn_now: | ||
| warning_emitted = True | ||
|
|
||
|
|
||
| def start_startup_heartbeat() -> None: | ||
sourcery-ai[bot] marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| heartbeat_path = resolve_startup_heartbeat_path() | ||
| if heartbeat_path is None: | ||
| return | ||
|
|
||
| stop_event = threading.Event() | ||
|
|
||
| def on_exit() -> None: | ||
| stop_event.set() | ||
| write_startup_heartbeat(heartbeat_path, "stopping", warn_on_error=True) | ||
|
|
||
| atexit.register(on_exit) | ||
| threading.Thread( | ||
| target=heartbeat_loop, | ||
| args=(heartbeat_path, STARTUP_HEARTBEAT_INTERVAL_SECONDS, stop_event), | ||
| name="astrbot-startup-heartbeat", | ||
| daemon=True, | ||
| ).start() | ||
|
|
||
|
|
||
| def main() -> None: | ||
| configure_stdio_utf8() | ||
| configure_windows_dll_search_path() | ||
| preload_windows_runtime_dlls() | ||
| start_startup_heartbeat() | ||
|
|
||
| sys.path.insert(0, str(APP_DIR)) | ||
|
|
||
| main_file = APP_DIR / "main.py" | ||
| if not main_file.is_file(): | ||
| raise FileNotFoundError(f"Backend entrypoint not found: {main_file}") | ||
|
|
||
| sys.path.insert(0, str(APP_DIR)) | ||
| sys.argv[0] = str(main_file) | ||
| runpy.run_path(str(main_file), run_name="__main__") | ||
|
|
||
| main_file = APP_DIR / "main.py" | ||
| if not main_file.is_file(): | ||
| raise FileNotFoundError(f"Backend entrypoint not found: {main_file}") | ||
|
|
||
| sys.argv[0] = str(main_file) | ||
| runpy.run_path(str(main_file), run_name="__main__") | ||
| if __name__ == "__main__": | ||
| main() | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,87 @@ | ||
| import importlib.util | ||
| import unittest | ||
| from pathlib import Path | ||
| from unittest import mock | ||
|
|
||
|
|
||
| MODULE_PATH = Path(__file__).with_name("launch_backend.py") | ||
| SPEC = importlib.util.spec_from_file_location("launch_backend_under_test", MODULE_PATH) | ||
| if SPEC is None or SPEC.loader is None: | ||
| raise RuntimeError(f"Cannot load launch_backend module from {MODULE_PATH}") | ||
| launch_backend = importlib.util.module_from_spec(SPEC) | ||
| SPEC.loader.exec_module(launch_backend) | ||
|
|
||
|
|
||
| class StartupHeartbeatTests(unittest.TestCase): | ||
| def test_repeated_failures_warn_before_first_success(self) -> None: | ||
| stop_event = mock.Mock() | ||
| stop_event.wait.side_effect = [False, True] | ||
|
|
||
| with mock.patch.object( | ||
| launch_backend, | ||
| "write_startup_heartbeat", | ||
| side_effect=[False, False], | ||
| ) as write_mock: | ||
| launch_backend.heartbeat_loop(Path("/tmp/heartbeat.json"), 2.0, stop_event) | ||
|
|
||
| self.assertEqual( | ||
| [call.kwargs["warn_on_error"] for call in write_mock.call_args_list], | ||
| [True, True], | ||
| ) | ||
|
|
||
| def test_repeated_failures_after_success_are_suppressed(self) -> None: | ||
| stop_event = mock.Mock() | ||
| stop_event.wait.side_effect = [False, False, True] | ||
|
|
||
| with mock.patch.object( | ||
| launch_backend, | ||
| "write_startup_heartbeat", | ||
| side_effect=[True, False, False], | ||
| ) as write_mock: | ||
| launch_backend.heartbeat_loop(Path("/tmp/heartbeat.json"), 2.0, stop_event) | ||
|
|
||
| self.assertEqual( | ||
| [call.kwargs["warn_on_error"] for call in write_mock.call_args_list], | ||
| [True, True, False], | ||
| ) | ||
|
|
||
| def test_stop_failure_still_warns_after_earlier_failure(self) -> None: | ||
| stop_event = mock.Mock() | ||
| thread = mock.Mock() | ||
| register = mock.Mock() | ||
|
|
||
| with mock.patch.object( | ||
| launch_backend, | ||
| "write_startup_heartbeat", | ||
| return_value=False, | ||
| ) as write_mock: | ||
| with mock.patch.object( | ||
| launch_backend, | ||
| "resolve_startup_heartbeat_path", | ||
| return_value=Path("/tmp/heartbeat.json"), | ||
| ): | ||
| with mock.patch.object( | ||
| launch_backend.threading, "Event", return_value=stop_event | ||
| ): | ||
| with mock.patch.object( | ||
| launch_backend.threading, "Thread", return_value=thread | ||
| ): | ||
| with mock.patch.object( | ||
| launch_backend.atexit, "register", register | ||
| ): | ||
| launch_backend.start_startup_heartbeat() | ||
| on_exit = register.call_args.args[0] | ||
| on_exit() | ||
|
|
||
| self.assertEqual( | ||
| [call.args[1] for call in write_mock.call_args_list], | ||
| ["stopping"], | ||
| ) | ||
| self.assertEqual( | ||
| [call.kwargs["warn_on_error"] for call in write_mock.call_args_list], | ||
| [True], | ||
| ) | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| unittest.main() |
Uh oh!
There was an error while loading. Please reload this page.