Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/ARCHITECTURE.md
Original file line number Diff line number Diff line change
Expand Up @@ -477,7 +477,7 @@ Renderer draws attention border / story overlay
### ADR-014: Agent Session Detection, Persistence, and Resumption

- **Decision:** Architect detects running AI agents at quit time, captures their session UUIDs, persists them in `persistence.toml`, and automatically resumes them on next launch. The quit-time teardown runs asynchronously on a background worker thread while the main thread keeps rendering terminal updates.
- **Context:** To persist an agent's session ID for resumption on next launch, Architect must capture the session UUID that the agent prints to the PTY during graceful shutdown. The quit sequence is: detect running agent via macOS `sysctl`/process inspection → start a background teardown worker → worker launches one teardown task per detected agent session in parallel; each task injects `Ctrl+C` twice (all supported agents), waits, retries once, and finally sends SIGTERM as last resort → main thread continues polling PTY output/rendering terminals, so users can see agents stopping in real time → a full-screen `quit_blocking_overlay` blocks all input and renders a shimmering gray veil while teardown is in progress → Architect extracts UUIDs only from PTY bytes captured after shutdown begins (not full history) and persists successful captures to `persistence.toml`.
- **Context:** To persist an agent's session ID for resumption on next launch, Architect must capture the session UUID that the agent prints to the PTY during graceful shutdown. The quit sequence is: detect running agent via macOS `sysctl`/process inspection → start a background teardown worker → worker launches one teardown task per detected agent session in parallel; each task injects `Ctrl+C` twice (all supported agents), waits, retries once, and finally sends SIGTERM as last resort → main thread continues polling PTY output/rendering terminals (including post-exit PTY drain only for sessions with active quit capture while they are still allocated), so users can see agents stopping in real time and trailing output is not dropped → a full-screen `quit_blocking_overlay` blocks all input and renders a shimmering gray veil while teardown is in progress → after worker completion, runtime performs a bounded drain-until-quiet pass over all affected PTYs to capture trailing output that arrived after the worker reported done → Architect extracts UUIDs only from PTY bytes captured after shutdown begins (not full history) and persists successful captures to `persistence.toml`.
- **Agent detection strategy:** `session/state.detectForegroundAgent()` reads the foreground process-group leader's process image name (`kp_proc.p_comm`) via `sysctl KERN_PROC_PID`. If `p_comm` is `"claude"`, `"codex"`, or `"gemini"`, the agent is identified directly. If `p_comm` is `"node"`, `KERN_PROCARGS2` is read to inspect `argv[1]`; if the script path contains `"claude"`, `"codex"`, or `"gemini"`, the corresponding agent is matched. This uniform approach covers both direct binaries and Node.js-wrapped agents.
- **Resume-command injection:** On next launch, `app/runtime.zig` reads the persisted `agent_type` and `agent_session_id` from `persistence.toml`. If both are present, it appends the resume command (e.g., `claude --resume <uuid>`) to `session.pending_write` immediately after spawning the shell. The shell reads this input once it is ready, so no timing synchronization is needed.
- **Layer boundary:** `app/runtime.zig` owns quit orchestration (worker lifecycle, PTY exit signaling by fd, persistence timing) and UI blocking state. `session/state.zig` owns agent detection and session metadata access. `app/terminal_history.zig` owns text analysis (UUID extraction). UI components (`ui/components/quit_blocking_overlay.zig`) own the visual/input lock behavior.
Expand Down
63 changes: 62 additions & 1 deletion src/app/runtime.zig
Original file line number Diff line number Diff line change
Expand Up @@ -511,6 +511,9 @@ fn handleQuitRequest(
const quit_primary_wait_ms: u64 = 2500;
const quit_retry_wait_ms: u64 = 2500;
const quit_term_wait_ms: u64 = 500;
const quit_capture_drain_poll_ns: u64 = 20 * std.time.ns_per_ms;
const quit_capture_drain_quiet_ns: i128 = 250 * @as(i128, std.time.ns_per_ms);
const quit_capture_drain_max_ns: i128 = 2500 * @as(i128, std.time.ns_per_ms);

const QuitTeardownTask = struct {
session_idx: usize,
Expand Down Expand Up @@ -653,6 +656,47 @@ fn foregroundPgrp(slave_path_z: [:0]const u8, shell_pid: posix.pid_t) ?posix.pid
return fg_pgrp;
}

fn drainQuitCaptureOutput(tasks: []const QuitTeardownTask, sessions: []const *SessionState) void {
if (tasks.len == 0) return;

var last_capture_lengths: [grid_layout.max_terminals]usize = [_]usize{0} ** grid_layout.max_terminals;
for (tasks, 0..) |task, idx| {
last_capture_lengths[idx] = sessions[task.session_idx].quitCaptureBytes().len;
}

const start_ns = std.time.nanoTimestamp();
var last_growth_ns = start_ns;

while (true) {
var saw_growth = false;
for (tasks, 0..) |task, idx| {
const session = sessions[task.session_idx];
session.processOutput() catch |err| {
log.warn("quit teardown: session {d} post-worker output drain failed: {}", .{ task.session_idx, err });
};
const new_len = session.quitCaptureBytes().len;
if (new_len > last_capture_lengths[idx]) {
saw_growth = true;
}
last_capture_lengths[idx] = new_len;
}

const now_ns = std.time.nanoTimestamp();
if (saw_growth) {
last_growth_ns = now_ns;
}

if (!shouldContinueQuitCaptureDrain(start_ns, last_growth_ns, now_ns)) break;
std.Thread.sleep(quit_capture_drain_poll_ns);
}
}

fn shouldContinueQuitCaptureDrain(start_ns: i128, last_growth_ns: i128, now_ns: i128) bool {
const quiet_elapsed = now_ns - last_growth_ns;
const total_elapsed = now_ns - start_ns;
return quiet_elapsed < quit_capture_drain_quiet_ns and total_elapsed < quit_capture_drain_max_ns;
}

fn startQuitFlow(
quit_state: *QuitTeardownState,
sessions: []*SessionState,
Expand Down Expand Up @@ -2482,6 +2526,7 @@ pub fn run() !void {
if (quit_teardown.active) {
quit_blocking_overlay_component.setActive(false);
quit_teardown.join();
drainQuitCaptureOutput(quit_teardown.tasks[0..quit_teardown.task_count], sessions[0..]);
for (quit_teardown.tasks[0..quit_teardown.task_count]) |task| {
const session = sessions[task.session_idx];
session.stopQuitCapture();
Expand All @@ -2492,7 +2537,6 @@ pub fn run() !void {
session.agent_kind = null;
const text = session.quitCaptureBytes();
log.debug("quit teardown: session {d} extracted {d} bytes of terminal text", .{ task.session_idx, text.len });
log.debug("quit teardown: session {d} terminal text tail: {s}", .{ task.session_idx, text[@max(0, text.len -| 1000)..] });
if (terminal_history.extractLastUuid(text)) |uuid| {
log.info("quit teardown: session {d} captured session id: {s}", .{ task.session_idx, uuid });
session.agent_kind = task.agent_kind;
Expand Down Expand Up @@ -2550,6 +2594,23 @@ test "markTeardownComplete returns true only once" {
try std.testing.expect(!markTeardownComplete(&done));
}

test "shouldContinueQuitCaptureDrain stops after quiet window" {
const start_ns: i128 = 0;
const last_growth_ns: i128 = 0;
const at_quiet_boundary = quit_capture_drain_quiet_ns;
try std.testing.expect(!shouldContinueQuitCaptureDrain(start_ns, last_growth_ns, at_quiet_boundary));

const just_before_quiet = quit_capture_drain_quiet_ns - 1;
try std.testing.expect(shouldContinueQuitCaptureDrain(start_ns, last_growth_ns, just_before_quiet));
}

test "shouldContinueQuitCaptureDrain stops after max window" {
const start_ns: i128 = 0;
const recent_growth_ns = quit_capture_drain_max_ns - 1;
const at_max_boundary = quit_capture_drain_max_ns;
try std.testing.expect(!shouldContinueQuitCaptureDrain(start_ns, recent_growth_ns, at_max_boundary));
}

const TestSwapError = error{InitFailed};

const TestResource = struct {
Expand Down
25 changes: 21 additions & 4 deletions src/session/state.zig
Original file line number Diff line number Diff line change
Expand Up @@ -455,15 +455,18 @@ pub const SessionState = struct {
}

pub fn processOutput(self: *SessionState) ProcessOutputError!void {
if (!self.spawned or self.dead) return;
if (!shouldProcessOutput(self.spawned, self.dead, self.quit_capture_active)) return;

const shell = &(self.shell orelse return);
const stream = &(self.stream orelse return);

while (true) {
const n = shell.read(&self.output_buf) catch |err| {
if (err == error.WouldBlock) return;
return err;
const n = shell.read(&self.output_buf) catch |err| switch (err) {
error.WouldBlock => return,
// Linux PTYs can report EIO after the slave side closes.
// Treat it as terminal EOF so normal dead sessions don't fail the runtime loop.
error.InputOutput => return,
else => return err,
};

if (n == 0) return;
Expand All @@ -484,6 +487,12 @@ pub const SessionState = struct {
}
}

fn shouldProcessOutput(spawned: bool, dead: bool, quit_capture_active: bool) bool {
if (!spawned) return false;
if (!dead) return true;
return quit_capture_active;
}

/// Try to flush any queued stdin data; preserves ordering relative to new input.
pub fn flushPendingWrites(self: *SessionState) !void {
if (self.pending_write.items.len == 0) return;
Expand Down Expand Up @@ -842,6 +851,14 @@ test "pending write shrinks when empty and over threshold" {
try std.testing.expect(buf.capacity <= pending_write_shrink_threshold);
}

test "shouldProcessOutput drains dead sessions only during quit capture" {
try std.testing.expect(!SessionState.shouldProcessOutput(false, false, false));
try std.testing.expect(!SessionState.shouldProcessOutput(false, true, false));
try std.testing.expect(SessionState.shouldProcessOutput(true, false, false));
try std.testing.expect(!SessionState.shouldProcessOutput(true, true, false));
try std.testing.expect(SessionState.shouldProcessOutput(true, true, true));
}

test "AgentKind.fromComm recognises known agent names" {
try std.testing.expectEqual(AgentKind.claude, AgentKind.fromComm("claude").?);
try std.testing.expectEqual(AgentKind.codex, AgentKind.fromComm("codex").?);
Expand Down