Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion docs/environment-variables.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
| --- | --- | --- |
| `ASTRBOT_BACKEND_URL` | 后端基础 URL | 默认 `http://127.0.0.1:6185/` |
| `ASTRBOT_BACKEND_AUTO_START` | 是否自动拉起后端 | 默认 `1`(启用) |
| `ASTRBOT_BACKEND_TIMEOUT_MS` | 后端就绪等待超时 | 开发模式默认 `20000`;打包模式默认回退 `300000` |
| `ASTRBOT_BACKEND_TIMEOUT_MS` | 后端就绪等待超时 | 开发模式默认 `20000`;打包模式默认回退 `900000` |
| `ASTRBOT_BACKEND_STARTUP_IDLE_TIMEOUT_MS` | 后端启动 heartbeat 空闲超时 | 默认 `60000`,范围 `5000~900000` |
| `ASTRBOT_BACKEND_READY_HTTP_PATH` | 就绪探针 HTTP 路径 | 默认 `/api/stat/start-time` |
| `ASTRBOT_BACKEND_READY_PROBE_TIMEOUT_MS` | 就绪探针单次超时 | 默认回退到 `ASTRBOT_BACKEND_PING_TIMEOUT_MS` |
| `ASTRBOT_BACKEND_READY_POLL_INTERVAL_MS` | 就绪轮询间隔 | 默认 `300`,并按边界 clamp |
Expand Down Expand Up @@ -53,6 +54,7 @@
| 变量 | 用途 | 默认值/行为 |
| --- | --- | --- |
| `ASTRBOT_DESKTOP_CLIENT` | 标记桌面客户端环境 | 打包态启动后端时写入 `1` |
| `ASTRBOT_BACKEND_STARTUP_HEARTBEAT_PATH` | 桌面端写给后端启动器的 heartbeat 文件路径 | 打包态默认写到 `ASTRBOT_ROOT/data/backend-startup-heartbeat.json` |

## 4. 发布/CI(GitHub Actions)

Expand Down
105 changes: 96 additions & 9 deletions scripts/backend/templates/launch_backend.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,20 @@
from __future__ import annotations

import atexit
import ctypes
import json
import os
import runpy
import sys
import threading
import time
from pathlib import Path

BACKEND_DIR = Path(__file__).resolve().parent
APP_DIR = BACKEND_DIR / "app"
_WINDOWS_DLL_DIRECTORY_HANDLES: list[object] = []
STARTUP_HEARTBEAT_ENV = "ASTRBOT_BACKEND_STARTUP_HEARTBEAT_PATH"
STARTUP_HEARTBEAT_INTERVAL_SECONDS = 2.0


def configure_stdio_utf8() -> None:
Expand Down Expand Up @@ -113,15 +119,96 @@ def preload_windows_runtime_dlls() -> None:
continue


configure_stdio_utf8()
configure_windows_dll_search_path()
preload_windows_runtime_dlls()
def resolve_startup_heartbeat_path() -> Path | None:
raw = os.environ.get(STARTUP_HEARTBEAT_ENV, "").strip()
if not raw:
return None
return Path(raw)


def write_startup_heartbeat(
path: Path, state: str, *, warn_on_error: bool = False
) -> bool:
try:
path.parent.mkdir(parents=True, exist_ok=True)
payload = {
"pid": os.getpid(),
"state": state,
"updated_at_ms": int(time.time() * 1000),
}
temp_path = path.with_name(f"{path.name}.tmp")
temp_path.write_text(
json.dumps(payload, separators=(",", ":")),
encoding="utf-8",
)
temp_path.replace(path)
return True
except Exception as exc:
if warn_on_error:
print(
f"[startup-heartbeat] failed to write heartbeat to {path}: {exc.__class__.__name__}: {exc}",
file=sys.stderr,
)
return False


def heartbeat_loop(
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

issue (complexity): Consider simplifying the heartbeat warning logic and inlining the payload builder to make the heartbeat flow easier to follow and maintain.

You can reduce complexity in two small, targeted ways without changing the overall behavior (periodic atomic JSON writes + “stopping” heartbeat).

1. Simplify heartbeat loop warning logic

The current heartbeat_loop has two flags plus a nested should_warn helper. You can replace that small state machine with a simple time-based throttling scheme, which is easier to read and reason about.

For example, throttle warnings to at most once every 10 seconds:

WARNING_THROTTLE_SECONDS = 10.0


def heartbeat_loop(
    path: Path, interval_seconds: float, stop_event: threading.Event
) -> None:
    last_warning_time: float | None = None

    while not stop_event.wait(interval_seconds):
        now = time.time()
        warn_now = (
            last_warning_time is None
            or (now - last_warning_time) >= WARNING_THROTTLE_SECONDS
        )

        ok = write_startup_heartbeat(path, "starting", warn_on_error=warn_now)

        if warn_now and not ok:
            last_warning_time = now

This keeps log volume bounded while removing had_successful_write, warning_emitted_since_last_success, and the nested function.

2. Inline build_heartbeat_payload into write_startup_heartbeat

build_heartbeat_payload only wraps a small dict and is only used from write_startup_heartbeat. Inlining it removes one layer in the call stack (build_heartbeat_payload → atomic_write_json → write_startup_heartbeat → heartbeat_loop), making the flow easier to follow.

def write_startup_heartbeat(
    path: Path, state: str, *, warn_on_error: bool = False
) -> bool:
    payload = {
        "pid": os.getpid(),
        "state": state,
        "updated_at_ms": int(time.time() * 1000),
    }

    try:
        path.parent.mkdir(parents=True, exist_ok=True)
        atomic_write_json(path, payload)
        return True
    except Exception as exc:
        if warn_on_error:
            print(
                f"[startup-heartbeat] failed to write heartbeat to {path}: "
                f"{exc.__class__.__name__}: {exc}",
                file=sys.stderr,
            )
        return False

atomic_write_json remains a focused helper for the critical “write temp → replace” behavior, but the heartbeat logic is now more localized and straightforward.

path: Path, interval_seconds: float, stop_event: threading.Event
) -> None:
had_successful_write = False
warning_emitted = False

ok = write_startup_heartbeat(path, "starting", warn_on_error=True)
if ok:
had_successful_write = True
else:
warning_emitted = True

while not stop_event.wait(interval_seconds):
warn_now = (not warning_emitted) or (not had_successful_write)
ok = write_startup_heartbeat(path, "starting", warn_on_error=warn_now)
if ok:
had_successful_write = True
warning_emitted = False
elif warn_now:
warning_emitted = True


def start_startup_heartbeat() -> None:
heartbeat_path = resolve_startup_heartbeat_path()
if heartbeat_path is None:
return

stop_event = threading.Event()

def on_exit() -> None:
stop_event.set()
write_startup_heartbeat(heartbeat_path, "stopping", warn_on_error=True)

atexit.register(on_exit)
threading.Thread(
target=heartbeat_loop,
args=(heartbeat_path, STARTUP_HEARTBEAT_INTERVAL_SECONDS, stop_event),
name="astrbot-startup-heartbeat",
daemon=True,
).start()


def main() -> None:
configure_stdio_utf8()
configure_windows_dll_search_path()
preload_windows_runtime_dlls()
start_startup_heartbeat()

sys.path.insert(0, str(APP_DIR))

main_file = APP_DIR / "main.py"
if not main_file.is_file():
raise FileNotFoundError(f"Backend entrypoint not found: {main_file}")

sys.path.insert(0, str(APP_DIR))
sys.argv[0] = str(main_file)
runpy.run_path(str(main_file), run_name="__main__")

main_file = APP_DIR / "main.py"
if not main_file.is_file():
raise FileNotFoundError(f"Backend entrypoint not found: {main_file}")

sys.argv[0] = str(main_file)
runpy.run_path(str(main_file), run_name="__main__")
if __name__ == "__main__":
main()
87 changes: 87 additions & 0 deletions scripts/backend/templates/test_launch_backend.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import importlib.util
import unittest
from pathlib import Path
from unittest import mock


MODULE_PATH = Path(__file__).with_name("launch_backend.py")
SPEC = importlib.util.spec_from_file_location("launch_backend_under_test", MODULE_PATH)
if SPEC is None or SPEC.loader is None:
raise RuntimeError(f"Cannot load launch_backend module from {MODULE_PATH}")
launch_backend = importlib.util.module_from_spec(SPEC)
SPEC.loader.exec_module(launch_backend)


class StartupHeartbeatTests(unittest.TestCase):
def test_repeated_failures_warn_before_first_success(self) -> None:
stop_event = mock.Mock()
stop_event.wait.side_effect = [False, True]

with mock.patch.object(
launch_backend,
"write_startup_heartbeat",
side_effect=[False, False],
) as write_mock:
launch_backend.heartbeat_loop(Path("/tmp/heartbeat.json"), 2.0, stop_event)

self.assertEqual(
[call.kwargs["warn_on_error"] for call in write_mock.call_args_list],
[True, True],
)

def test_repeated_failures_after_success_are_suppressed(self) -> None:
stop_event = mock.Mock()
stop_event.wait.side_effect = [False, False, True]

with mock.patch.object(
launch_backend,
"write_startup_heartbeat",
side_effect=[True, False, False],
) as write_mock:
launch_backend.heartbeat_loop(Path("/tmp/heartbeat.json"), 2.0, stop_event)

self.assertEqual(
[call.kwargs["warn_on_error"] for call in write_mock.call_args_list],
[True, True, False],
)

def test_stop_failure_still_warns_after_earlier_failure(self) -> None:
stop_event = mock.Mock()
thread = mock.Mock()
register = mock.Mock()

with mock.patch.object(
launch_backend,
"write_startup_heartbeat",
return_value=False,
) as write_mock:
with mock.patch.object(
launch_backend,
"resolve_startup_heartbeat_path",
return_value=Path("/tmp/heartbeat.json"),
):
with mock.patch.object(
launch_backend.threading, "Event", return_value=stop_event
):
with mock.patch.object(
launch_backend.threading, "Thread", return_value=thread
):
with mock.patch.object(
launch_backend.atexit, "register", register
):
launch_backend.start_startup_heartbeat()
on_exit = register.call_args.args[0]
on_exit()

self.assertEqual(
[call.args[1] for call in write_mock.call_args_list],
["stopping"],
)
self.assertEqual(
[call.kwargs["warn_on_error"] for call in write_mock.call_args_list],
[True],
)


if __name__ == "__main__":
unittest.main()
10 changes: 9 additions & 1 deletion src-tauri/src/app_constants.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::time::Duration;

pub(crate) const DEFAULT_BACKEND_URL: &str = "http://127.0.0.1:6185/";
pub(crate) const BACKEND_TIMEOUT_ENV: &str = "ASTRBOT_BACKEND_TIMEOUT_MS";
pub(crate) const PACKAGED_BACKEND_TIMEOUT_FALLBACK_MS: u64 = 5 * 60 * 1000;
pub(crate) const PACKAGED_BACKEND_TIMEOUT_FALLBACK_MS: u64 = 15 * 60 * 1000;
pub(crate) const GRACEFUL_RESTART_REQUEST_TIMEOUT_MS: u64 = 2_500;
pub(crate) const GRACEFUL_RESTART_START_TIME_TIMEOUT_MS: u64 = 1_800;
pub(crate) const GRACEFUL_RESTART_POLL_INTERVAL_MS: u64 = 350;
Expand All @@ -17,6 +17,14 @@ pub(crate) const BACKEND_READY_PROBE_TIMEOUT_ENV: &str = "ASTRBOT_BACKEND_READY_
pub(crate) const BACKEND_READY_PROBE_TIMEOUT_MIN_MS: u64 = 100;
pub(crate) const BACKEND_READY_PROBE_TIMEOUT_MAX_MS: u64 = 30_000;
pub(crate) const BACKEND_READY_TCP_PROBE_TIMEOUT_MAX_MS: u64 = 1_000;
pub(crate) const BACKEND_STARTUP_IDLE_TIMEOUT_ENV: &str = "ASTRBOT_BACKEND_STARTUP_IDLE_TIMEOUT_MS";
pub(crate) const DEFAULT_BACKEND_STARTUP_IDLE_TIMEOUT_MS: u64 = 60 * 1000;
pub(crate) const BACKEND_STARTUP_IDLE_TIMEOUT_MIN_MS: u64 = 5_000;
pub(crate) const BACKEND_STARTUP_IDLE_TIMEOUT_MAX_MS: u64 = 15 * 60 * 1000;
pub(crate) const BACKEND_STARTUP_HEARTBEAT_PATH_ENV: &str =
"ASTRBOT_BACKEND_STARTUP_HEARTBEAT_PATH";
pub(crate) const DEFAULT_BACKEND_STARTUP_HEARTBEAT_RELATIVE_PATH: &str =
"data/backend-startup-heartbeat.json";
pub(crate) const DEFAULT_BACKEND_PING_TIMEOUT_MS: u64 = 800;
pub(crate) const BACKEND_PING_TIMEOUT_MIN_MS: u64 = 50;
pub(crate) const BACKEND_PING_TIMEOUT_MAX_MS: u64 = 30_000;
Expand Down
1 change: 1 addition & 0 deletions src-tauri/src/app_helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ mod tests {
cwd: PathBuf::from("."),
root_dir: None,
webui_dir: None,
startup_heartbeat_path: None,
packaged_mode: false,
};

Expand Down
1 change: 1 addition & 0 deletions src-tauri/src/app_types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ pub(crate) struct LaunchPlan {
pub(crate) cwd: PathBuf,
pub(crate) root_dir: Option<PathBuf>,
pub(crate) webui_dir: Option<PathBuf>,
pub(crate) startup_heartbeat_path: Option<PathBuf>,
pub(crate) packaged_mode: bool,
}

Expand Down
84 changes: 84 additions & 0 deletions src-tauri/src/backend/config.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use std::env;
use std::path::{Path, PathBuf};
use std::time::Duration;
use url::Url;

Expand All @@ -7,6 +8,8 @@ pub struct BackendReadinessConfig {
pub path: String,
pub probe_timeout_ms: u64,
pub poll_interval_ms: u64,
pub startup_idle_timeout_ms: u64,
pub startup_heartbeat_path: Option<PathBuf>,
}

pub fn resolve_backend_ready_http_path<F>(env_name: &str, default_path: &str, mut log: F) -> String
Expand Down Expand Up @@ -97,6 +100,44 @@ where
parse_clamped_timeout_env(raw, env_name, fallback_ms, min_ms, max_ms, log)
}

pub fn resolve_backend_startup_idle_timeout_ms<F>(
raw: &str,
env_name: &str,
fallback_ms: u64,
min_ms: u64,
max_ms: u64,
log: F,
) -> u64
where
F: FnMut(String),
{
parse_clamped_timeout_env(raw, env_name, fallback_ms, min_ms, max_ms, log)
}

pub fn resolve_backend_startup_heartbeat_path(
root_dir: Option<&Path>,
packaged_root: Option<PathBuf>,
relative_path: &str,
) -> Option<PathBuf> {
let trimmed = relative_path.trim();
if trimmed.is_empty() {
return None;
}

if let Some(root) = root_dir {
return Some(root.join(trimmed));
}

if let Ok(root) = env::var("ASTRBOT_ROOT") {
let root = PathBuf::from(root.trim());
if !root.as_os_str().is_empty() {
return Some(root.join(trimmed));
}
}

packaged_root.map(|root| root.join(trimmed))
}

#[allow(clippy::too_many_arguments)]
pub fn resolve_backend_readiness_config<F>(
ready_http_path_env: &str,
Expand Down Expand Up @@ -221,6 +262,8 @@ where
path,
probe_timeout_ms,
poll_interval_ms,
startup_idle_timeout_ms: 0,
startup_heartbeat_path: None,
}
}

Expand Down Expand Up @@ -260,6 +303,47 @@ mod tests {
assert_eq!(value, 3_000);
}

#[test]
fn resolve_backend_startup_idle_timeout_clamps_large_value() {
let value = resolve_backend_startup_idle_timeout_ms(
"999999",
"TEST_STARTUP_IDLE_TIMEOUT_ENV",
60_000,
5_000,
300_000,
|_| {},
);
assert_eq!(value, 300_000);
}

#[test]
fn resolve_backend_startup_idle_timeout_clamps_small_value() {
let value = resolve_backend_startup_idle_timeout_ms(
"1000",
"TEST_STARTUP_IDLE_TIMEOUT_ENV",
60_000,
5_000,
300_000,
|_| {},
);
assert_eq!(value, 5_000);
}

#[test]
fn resolve_backend_startup_heartbeat_path_prefers_root_dir() {
let path = resolve_backend_startup_heartbeat_path(
Some(Path::new("/tmp/astrbot-root")),
Some(PathBuf::from("/tmp/packaged-root")),
"data/backend-startup-heartbeat.json",
)
.expect("expected heartbeat path");

assert_eq!(
path,
PathBuf::from("/tmp/astrbot-root").join("data/backend-startup-heartbeat.json")
);
}

#[test]
fn resolve_backend_timeout_uses_packaged_fallback_when_zero() {
let timeout = resolve_backend_timeout_ms(true, "TEST_TIMEOUT_ENV_MISSING", 20_000, 300_000);
Expand Down
3 changes: 3 additions & 0 deletions src-tauri/src/backend/launch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,9 @@ impl BackendState {
if let Some(root_dir) = &plan.root_dir {
command.env("ASTRBOT_ROOT", root_dir);
}
if let Some(heartbeat_path) = plan.startup_heartbeat_path.as_ref() {
command.env(crate::BACKEND_STARTUP_HEARTBEAT_PATH_ENV, heartbeat_path);
}
if let Some(webui_dir) = &plan.webui_dir {
command.env("ASTRBOT_WEBUI_DIR", webui_dir);
}
Expand Down
Loading