Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/ARCHITECTURE.md
Original file line number Diff line number Diff line change
Expand Up @@ -477,7 +477,7 @@ Renderer draws attention border / story overlay
### ADR-014: Agent Session Detection, Persistence, and Resumption

- **Decision:** Architect detects running AI agents at quit time, captures their session UUIDs, persists them in `persistence.toml`, and automatically resumes them on next launch. The quit-time teardown runs asynchronously on a background worker thread while the main thread keeps rendering terminal updates.
- **Context:** To persist an agent's session ID for resumption on next launch, Architect must capture the session UUID that the agent prints to the PTY during graceful shutdown. The quit sequence is: detect running agent via macOS `sysctl`/process inspection → start a background teardown worker → worker launches one teardown task per detected agent session in parallel; each task injects `Ctrl+C` twice (all supported agents), waits, retries once, and finally sends SIGTERM as last resort → main thread continues polling PTY output/rendering terminals, so users can see agents stopping in real time → a full-screen `quit_blocking_overlay` blocks all input and renders a shimmering gray veil while teardown is in progress → Architect extracts UUIDs only from PTY bytes captured after shutdown begins (not full history) and persists successful captures to `persistence.toml`.
- **Context:** To persist an agent's session ID for resumption on next launch, Architect must capture the session UUID that the agent prints to the PTY during graceful shutdown. The quit sequence is: detect running agent via macOS `sysctl`/process inspection → start a background teardown worker → worker launches one teardown task per detected agent session in parallel; each task injects `Ctrl+C` twice (all supported agents), waits, retries once, and finally sends SIGTERM as last resort → main thread continues polling PTY output/rendering terminals (including post-exit PTY drain while sessions are still allocated), so users can see agents stopping in real time and trailing output is not dropped → a full-screen `quit_blocking_overlay` blocks all input and renders a shimmering gray veil while teardown is in progress → after worker completion, runtime performs a bounded drain-until-quiet pass over all affected PTYs to capture trailing output that arrived after the worker reported done → Architect extracts UUIDs only from PTY bytes captured after shutdown begins (not full history) and persists successful captures to `persistence.toml`.
- **Agent detection strategy:** `session/state.detectForegroundAgent()` reads the foreground process-group leader's process image name (`kp_proc.p_comm`) via `sysctl KERN_PROC_PID`. If `p_comm` is `"claude"`, `"codex"`, or `"gemini"`, the agent is identified directly. If `p_comm` is `"node"`, `KERN_PROCARGS2` is read to inspect `argv[1]`; if the script path contains `"claude"`, `"codex"`, or `"gemini"`, the corresponding agent is matched. This uniform approach covers both direct binaries and Node.js-wrapped agents.
- **Resume-command injection:** On next launch, `app/runtime.zig` reads the persisted `agent_type` and `agent_session_id` from `persistence.toml`. If both are present, it appends the resume command (e.g., `claude --resume <uuid>`) to `session.pending_write` immediately after spawning the shell. The shell reads this input once it is ready, so no timing synchronization is needed.
- **Layer boundary:** `app/runtime.zig` owns quit orchestration (worker lifecycle, PTY exit signaling by fd, persistence timing) and UI blocking state. `session/state.zig` owns agent detection and session metadata access. `app/terminal_history.zig` owns text analysis (UUID extraction). UI components (`ui/components/quit_blocking_overlay.zig`) own the visual/input lock behavior.
Expand Down
63 changes: 62 additions & 1 deletion src/app/runtime.zig
Original file line number Diff line number Diff line change
Expand Up @@ -511,6 +511,9 @@ fn handleQuitRequest(
const quit_primary_wait_ms: u64 = 2500;
const quit_retry_wait_ms: u64 = 2500;
const quit_term_wait_ms: u64 = 500;
const quit_capture_drain_poll_ns: u64 = 20 * std.time.ns_per_ms;
const quit_capture_drain_quiet_ns: i128 = 250 * @as(i128, std.time.ns_per_ms);
const quit_capture_drain_max_ns: i128 = 2500 * @as(i128, std.time.ns_per_ms);

const QuitTeardownTask = struct {
session_idx: usize,
Expand Down Expand Up @@ -653,6 +656,47 @@ fn foregroundPgrp(slave_path_z: [:0]const u8, shell_pid: posix.pid_t) ?posix.pid
return fg_pgrp;
}

fn drainQuitCaptureOutput(tasks: []const QuitTeardownTask, sessions: []const *SessionState) void {
if (tasks.len == 0) return;

var last_capture_lengths: [grid_layout.max_terminals]usize = [_]usize{0} ** grid_layout.max_terminals;
for (tasks, 0..) |task, idx| {
last_capture_lengths[idx] = sessions[task.session_idx].quitCaptureBytes().len;
}

const start_ns = std.time.nanoTimestamp();
var last_growth_ns = start_ns;

while (true) {
var saw_growth = false;
for (tasks, 0..) |task, idx| {
const session = sessions[task.session_idx];
session.processOutput() catch |err| {
log.warn("quit teardown: session {d} post-worker output drain failed: {}", .{ task.session_idx, err });
};
const new_len = session.quitCaptureBytes().len;
if (new_len > last_capture_lengths[idx]) {
saw_growth = true;
}
last_capture_lengths[idx] = new_len;
}

const now_ns = std.time.nanoTimestamp();
if (saw_growth) {
last_growth_ns = now_ns;
}

if (!shouldContinueQuitCaptureDrain(start_ns, last_growth_ns, now_ns)) break;
std.Thread.sleep(quit_capture_drain_poll_ns);
}
}

fn shouldContinueQuitCaptureDrain(start_ns: i128, last_growth_ns: i128, now_ns: i128) bool {
const quiet_elapsed = now_ns - last_growth_ns;
const total_elapsed = now_ns - start_ns;
return quiet_elapsed < quit_capture_drain_quiet_ns and total_elapsed < quit_capture_drain_max_ns;
}

fn startQuitFlow(
quit_state: *QuitTeardownState,
sessions: []*SessionState,
Expand Down Expand Up @@ -2482,6 +2526,7 @@ pub fn run() !void {
if (quit_teardown.active) {
quit_blocking_overlay_component.setActive(false);
quit_teardown.join();
drainQuitCaptureOutput(quit_teardown.tasks[0..quit_teardown.task_count], sessions[0..]);
for (quit_teardown.tasks[0..quit_teardown.task_count]) |task| {
const session = sessions[task.session_idx];
session.stopQuitCapture();
Expand All @@ -2492,7 +2537,6 @@ pub fn run() !void {
session.agent_kind = null;
const text = session.quitCaptureBytes();
log.debug("quit teardown: session {d} extracted {d} bytes of terminal text", .{ task.session_idx, text.len });
log.debug("quit teardown: session {d} terminal text tail: {s}", .{ task.session_idx, text[@max(0, text.len -| 1000)..] });
if (terminal_history.extractLastUuid(text)) |uuid| {
log.info("quit teardown: session {d} captured session id: {s}", .{ task.session_idx, uuid });
session.agent_kind = task.agent_kind;
Expand Down Expand Up @@ -2550,6 +2594,23 @@ test "markTeardownComplete returns true only once" {
try std.testing.expect(!markTeardownComplete(&done));
}

test "shouldContinueQuitCaptureDrain stops after quiet window" {
const start_ns: i128 = 0;
const last_growth_ns: i128 = 0;
const at_quiet_boundary = quit_capture_drain_quiet_ns;
try std.testing.expect(!shouldContinueQuitCaptureDrain(start_ns, last_growth_ns, at_quiet_boundary));

const just_before_quiet = quit_capture_drain_quiet_ns - 1;
try std.testing.expect(shouldContinueQuitCaptureDrain(start_ns, last_growth_ns, just_before_quiet));
}

test "shouldContinueQuitCaptureDrain stops after max window" {
const start_ns: i128 = 0;
const recent_growth_ns = quit_capture_drain_max_ns - 1;
const at_max_boundary = quit_capture_drain_max_ns;
try std.testing.expect(!shouldContinueQuitCaptureDrain(start_ns, recent_growth_ns, at_max_boundary));
}

const TestSwapError = error{InitFailed};

const TestResource = struct {
Expand Down
14 changes: 13 additions & 1 deletion src/session/state.zig
Original file line number Diff line number Diff line change
Expand Up @@ -455,7 +455,7 @@ pub const SessionState = struct {
}

pub fn processOutput(self: *SessionState) ProcessOutputError!void {
if (!self.spawned or self.dead) return;
if (!shouldProcessOutput(self.spawned, self.dead)) return;

const shell = &(self.shell orelse return);
const stream = &(self.stream orelse return);
Expand Down Expand Up @@ -484,6 +484,11 @@ pub const SessionState = struct {
}
}

fn shouldProcessOutput(spawned: bool, dead: bool) bool {
_ = dead;
return spawned;
}

/// Try to flush any queued stdin data; preserves ordering relative to new input.
pub fn flushPendingWrites(self: *SessionState) !void {
if (self.pending_write.items.len == 0) return;
Expand Down Expand Up @@ -842,6 +847,13 @@ test "pending write shrinks when empty and over threshold" {
try std.testing.expect(buf.capacity <= pending_write_shrink_threshold);
}

test "shouldProcessOutput keeps draining after process exit" {
try std.testing.expect(!SessionState.shouldProcessOutput(false, false));
try std.testing.expect(!SessionState.shouldProcessOutput(false, true));
try std.testing.expect(SessionState.shouldProcessOutput(true, false));
try std.testing.expect(SessionState.shouldProcessOutput(true, true));
}

test "AgentKind.fromComm recognises known agent names" {
try std.testing.expectEqual(AgentKind.claude, AgentKind.fromComm("claude").?);
try std.testing.expectEqual(AgentKind.codex, AgentKind.fromComm("codex").?);
Expand Down