From 81f4a212b212d7528fb958a2c52c67b3f53b2cee Mon Sep 17 00:00:00 2001
From: Forketyfork <forketyfork@icloud.com>
Date: Tue, 24 Feb 2026 15:36:44 +0100
Subject: [PATCH 1/2] fix(runtime): preserve agent session capture during quit

Issue: Quitting Architect with multiple active agent sessions could miss resume IDs because trailing PTY output arrived after sessions were marked dead, and shutdown debug tail logging printed control-heavy terminal data into the launcher shell.

Solution: Keep draining PTY output for spawned sessions even after process exit so late bytes are still processed. Add a bounded post-worker drain-until-quiet pass before UUID extraction to capture trailing shutdown output deterministically. Remove raw terminal-tail debug logging to avoid control-sequence noise in the launcher shell while retaining concise capture diagnostics.
---
 docs/ARCHITECTURE.md  |  2 +-
 src/app/runtime.zig   | 63 ++++++++++++++++++++++++++++++++++++++++++-
 src/session/state.zig | 14 +++++++++-
 3 files changed, 76 insertions(+), 3 deletions(-)

diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md
index 2da3252..f3d0478 100644
--- a/docs/ARCHITECTURE.md
+++ b/docs/ARCHITECTURE.md
@@ -477,7 +477,7 @@ Renderer draws attention border / story overlay
 ### ADR-014: Agent Session Detection, Persistence, and Resumption
 
 - **Decision:** Architect detects running AI agents at quit time, captures their session UUIDs, persists them in `persistence.toml`, and automatically resumes them on next launch. The quit-time teardown runs asynchronously on a background worker thread while the main thread keeps rendering terminal updates.
-- **Context:** To persist an agent's session ID for resumption on next launch, Architect must capture the session UUID that the agent prints to the PTY during graceful shutdown. The quit sequence is: detect running agent via macOS `sysctl`/process inspection → start a background teardown worker → worker launches one teardown task per detected agent session in parallel; each task injects `Ctrl+C` twice (all supported agents), waits, retries once, and finally sends SIGTERM as last resort → main thread continues polling PTY output/rendering terminals, so users can see agents stopping in real time → a full-screen `quit_blocking_overlay` blocks all input and renders a shimmering gray veil while teardown is in progress → Architect extracts UUIDs only from PTY bytes captured after shutdown begins (not full history) and persists successful captures to `persistence.toml`.
+- **Context:** To persist an agent's session ID for resumption on next launch, Architect must capture the session UUID that the agent prints to the PTY during graceful shutdown. The quit sequence is: detect running agent via macOS `sysctl`/process inspection → start a background teardown worker → worker launches one teardown task per detected agent session in parallel; each task injects `Ctrl+C` twice (all supported agents), waits, retries once, and finally sends SIGTERM as last resort → main thread continues polling PTY output/rendering terminals (including post-exit PTY drain while sessions are still allocated), so users can see agents stopping in real time and trailing output is not dropped → a full-screen `quit_blocking_overlay` blocks all input and renders a shimmering gray veil while teardown is in progress → after worker completion, runtime performs a bounded drain-until-quiet pass over all affected PTYs to capture trailing output that arrived after the worker reported done → Architect extracts UUIDs only from PTY bytes captured after shutdown begins (not full history) and persists successful captures to `persistence.toml`.
 - **Agent detection strategy:** `session/state.detectForegroundAgent()` reads the foreground process-group leader's process image name (`kp_proc.p_comm`) via `sysctl KERN_PROC_PID`. If `p_comm` is `"claude"`, `"codex"`, or `"gemini"`, the agent is identified directly. If `p_comm` is `"node"`, `KERN_PROCARGS2` is read to inspect `argv[1]`; if the script path contains `"claude"`, `"codex"`, or `"gemini"`, the corresponding agent is matched. This uniform approach covers both direct binaries and Node.js-wrapped agents.
 - **Resume-command injection:** On next launch, `app/runtime.zig` reads the persisted `agent_type` and `agent_session_id` from `persistence.toml`. If both are present, it appends the resume command (e.g., `claude --resume <uuid>`) to `session.pending_write` immediately after spawning the shell. The shell reads this input once it is ready, so no timing synchronization is needed.
 - **Layer boundary:** `app/runtime.zig` owns quit orchestration (worker lifecycle, PTY exit signaling by fd, persistence timing) and UI blocking state. `session/state.zig` owns agent detection and session metadata access. `app/terminal_history.zig` owns text analysis (UUID extraction). UI components (`ui/components/quit_blocking_overlay.zig`) own the visual/input lock behavior.
diff --git a/src/app/runtime.zig b/src/app/runtime.zig
index 8bba3ca..7cd84b4 100644
--- a/src/app/runtime.zig
+++ b/src/app/runtime.zig
@@ -511,6 +511,9 @@ fn handleQuitRequest(
 const quit_primary_wait_ms: u64 = 2500;
 const quit_retry_wait_ms: u64 = 2500;
 const quit_term_wait_ms: u64 = 500;
+const quit_capture_drain_poll_ns: u64 = 20 * std.time.ns_per_ms;
+const quit_capture_drain_quiet_ns: i128 = 250 * @as(i128, std.time.ns_per_ms);
+const quit_capture_drain_max_ns: i128 = 2500 * @as(i128, std.time.ns_per_ms);
 
 const QuitTeardownTask = struct {
     session_idx: usize,
@@ -653,6 +656,47 @@ fn foregroundPgrp(slave_path_z: [:0]const u8, shell_pid: posix.pid_t) ?posix.pid
     return fg_pgrp;
 }
 
+fn drainQuitCaptureOutput(tasks: []const QuitTeardownTask, sessions: []const *SessionState) void {
+    if (tasks.len == 0) return;
+
+    var last_capture_lengths: [grid_layout.max_terminals]usize = [_]usize{0} ** grid_layout.max_terminals;
+    for (tasks, 0..) |task, idx| {
+        last_capture_lengths[idx] = sessions[task.session_idx].quitCaptureBytes().len;
+    }
+
+    const start_ns = std.time.nanoTimestamp();
+    var last_growth_ns = start_ns;
+
+    while (true) {
+        var saw_growth = false;
+        for (tasks, 0..) |task, idx| {
+            const session = sessions[task.session_idx];
+            session.processOutput() catch |err| {
+                log.warn("quit teardown: session {d} post-worker output drain failed: {}", .{ task.session_idx, err });
+            };
+            const new_len = session.quitCaptureBytes().len;
+            if (new_len > last_capture_lengths[idx]) {
+                saw_growth = true;
+            }
+            last_capture_lengths[idx] = new_len;
+        }
+
+        const now_ns = std.time.nanoTimestamp();
+        if (saw_growth) {
+            last_growth_ns = now_ns;
+        }
+
+        if (!shouldContinueQuitCaptureDrain(start_ns, last_growth_ns, now_ns)) break;
+        std.Thread.sleep(quit_capture_drain_poll_ns);
+    }
+}
+
+fn shouldContinueQuitCaptureDrain(start_ns: i128, last_growth_ns: i128, now_ns: i128) bool {
+    const quiet_elapsed = now_ns - last_growth_ns;
+    const total_elapsed = now_ns - start_ns;
+    return quiet_elapsed < quit_capture_drain_quiet_ns and total_elapsed < quit_capture_drain_max_ns;
+}
+
 fn startQuitFlow(
     quit_state: *QuitTeardownState,
     sessions: []*SessionState,
@@ -2482,6 +2526,7 @@ pub fn run() !void {
         if (quit_teardown.active) {
             quit_blocking_overlay_component.setActive(false);
             quit_teardown.join();
+            drainQuitCaptureOutput(quit_teardown.tasks[0..quit_teardown.task_count], sessions[0..]);
             for (quit_teardown.tasks[0..quit_teardown.task_count]) |task| {
                 const session = sessions[task.session_idx];
                 session.stopQuitCapture();
@@ -2492,7 +2537,6 @@ pub fn run() !void {
                 session.agent_kind = null;
                 const text = session.quitCaptureBytes();
                 log.debug("quit teardown: session {d} extracted {d} bytes of terminal text", .{ task.session_idx, text.len });
-                log.debug("quit teardown: session {d} terminal text tail: {s}", .{ task.session_idx, text[@max(0, text.len -| 1000)..] });
                 if (terminal_history.extractLastUuid(text)) |uuid| {
                     log.info("quit teardown: session {d} captured session id: {s}", .{ task.session_idx, uuid });
                     session.agent_kind = task.agent_kind;
@@ -2550,6 +2594,23 @@ test "markTeardownComplete returns true only once" {
     try std.testing.expect(!markTeardownComplete(&done));
 }
 
+test "shouldContinueQuitCaptureDrain stops after quiet window" {
+    const start_ns: i128 = 0;
+    const last_growth_ns: i128 = 0;
+    const at_quiet_boundary = quit_capture_drain_quiet_ns;
+    try std.testing.expect(!shouldContinueQuitCaptureDrain(start_ns, last_growth_ns, at_quiet_boundary));
+
+    const just_before_quiet = quit_capture_drain_quiet_ns - 1;
+    try std.testing.expect(shouldContinueQuitCaptureDrain(start_ns, last_growth_ns, just_before_quiet));
+}
+
+test "shouldContinueQuitCaptureDrain stops after max window" {
+    const start_ns: i128 = 0;
+    const recent_growth_ns = quit_capture_drain_max_ns - 1;
+    const at_max_boundary = quit_capture_drain_max_ns;
+    try std.testing.expect(!shouldContinueQuitCaptureDrain(start_ns, recent_growth_ns, at_max_boundary));
+}
+
 const TestSwapError = error{InitFailed};
 
 const TestResource = struct {
diff --git a/src/session/state.zig b/src/session/state.zig
index f3b4a24..23bec50 100644
--- a/src/session/state.zig
+++ b/src/session/state.zig
@@ -455,7 +455,7 @@ pub const SessionState = struct {
     }
 
     pub fn processOutput(self: *SessionState) ProcessOutputError!void {
-        if (!self.spawned or self.dead) return;
+        if (!shouldProcessOutput(self.spawned, self.dead)) return;
 
         const shell = &(self.shell orelse return);
         const stream = &(self.stream orelse return);
@@ -484,6 +484,11 @@ pub const SessionState = struct {
         }
     }
 
+    fn shouldProcessOutput(spawned: bool, dead: bool) bool {
+        _ = dead;
+        return spawned;
+    }
+
     /// Try to flush any queued stdin data; preserves ordering relative to new input.
     pub fn flushPendingWrites(self: *SessionState) !void {
         if (self.pending_write.items.len == 0) return;
@@ -842,6 +847,13 @@ test "pending write shrinks when empty and over threshold" {
     try std.testing.expect(buf.capacity <= pending_write_shrink_threshold);
 }
 
+test "shouldProcessOutput keeps draining after process exit" {
+    try std.testing.expect(!SessionState.shouldProcessOutput(false, false));
+    try std.testing.expect(!SessionState.shouldProcessOutput(false, true));
+    try std.testing.expect(SessionState.shouldProcessOutput(true, false));
+    try std.testing.expect(SessionState.shouldProcessOutput(true, true));
+}
+
 test "AgentKind.fromComm recognises known agent names" {
     try std.testing.expectEqual(AgentKind.claude, AgentKind.fromComm("claude").?);
     try std.testing.expectEqual(AgentKind.codex, AgentKind.fromComm("codex").?);

From 96d18c868148dba6ce286e326c39130d2f5ef182 Mon Sep 17 00:00:00 2001
From: Forketyfork <forketyfork@icloud.com>
Date: Tue, 24 Feb 2026 15:49:49 +0100
Subject: [PATCH 2/2] fix(session): avoid dead PTY reads outside quit capture

Address PR review comment: https://github.com/forketyfork/architect/pull/243#discussion_r2847569529

Limit post-exit output draining to sessions with active quit capture, and treat PTY EIO as terminal EOF during reads. This preserves quit-time UUID capture while avoiding runtime failures when dead sessions are polled in normal loops.
---
 docs/ARCHITECTURE.md  |  2 +-
 src/session/state.zig | 29 +++++++++++++++++------------
 2 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md
index f3d0478..e883530 100644
--- a/docs/ARCHITECTURE.md
+++ b/docs/ARCHITECTURE.md
@@ -477,7 +477,7 @@ Renderer draws attention border / story overlay
 ### ADR-014: Agent Session Detection, Persistence, and Resumption
 
 - **Decision:** Architect detects running AI agents at quit time, captures their session UUIDs, persists them in `persistence.toml`, and automatically resumes them on next launch. The quit-time teardown runs asynchronously on a background worker thread while the main thread keeps rendering terminal updates.
-- **Context:** To persist an agent's session ID for resumption on next launch, Architect must capture the session UUID that the agent prints to the PTY during graceful shutdown. The quit sequence is: detect running agent via macOS `sysctl`/process inspection → start a background teardown worker → worker launches one teardown task per detected agent session in parallel; each task injects `Ctrl+C` twice (all supported agents), waits, retries once, and finally sends SIGTERM as last resort → main thread continues polling PTY output/rendering terminals (including post-exit PTY drain while sessions are still allocated), so users can see agents stopping in real time and trailing output is not dropped → a full-screen `quit_blocking_overlay` blocks all input and renders a shimmering gray veil while teardown is in progress → after worker completion, runtime performs a bounded drain-until-quiet pass over all affected PTYs to capture trailing output that arrived after the worker reported done → Architect extracts UUIDs only from PTY bytes captured after shutdown begins (not full history) and persists successful captures to `persistence.toml`.
+- **Context:** To persist an agent's session ID for resumption on next launch, Architect must capture the session UUID that the agent prints to the PTY during graceful shutdown. The quit sequence is: detect running agent via macOS `sysctl`/process inspection → start a background teardown worker → worker launches one teardown task per detected agent session in parallel; each task injects `Ctrl+C` twice (all supported agents), waits, retries once, and finally sends SIGTERM as last resort → main thread continues polling PTY output/rendering terminals (including post-exit PTY drain only for sessions with active quit capture while they are still allocated), so users can see agents stopping in real time and trailing output is not dropped → a full-screen `quit_blocking_overlay` blocks all input and renders a shimmering gray veil while teardown is in progress → after worker completion, runtime performs a bounded drain-until-quiet pass over all affected PTYs to capture trailing output that arrived after the worker reported done → Architect extracts UUIDs only from PTY bytes captured after shutdown begins (not full history) and persists successful captures to `persistence.toml`.
 - **Agent detection strategy:** `session/state.detectForegroundAgent()` reads the foreground process-group leader's process image name (`kp_proc.p_comm`) via `sysctl KERN_PROC_PID`. If `p_comm` is `"claude"`, `"codex"`, or `"gemini"`, the agent is identified directly. If `p_comm` is `"node"`, `KERN_PROCARGS2` is read to inspect `argv[1]`; if the script path contains `"claude"`, `"codex"`, or `"gemini"`, the corresponding agent is matched. This uniform approach covers both direct binaries and Node.js-wrapped agents.
 - **Resume-command injection:** On next launch, `app/runtime.zig` reads the persisted `agent_type` and `agent_session_id` from `persistence.toml`. If both are present, it appends the resume command (e.g., `claude --resume <uuid>`) to `session.pending_write` immediately after spawning the shell. The shell reads this input once it is ready, so no timing synchronization is needed.
 - **Layer boundary:** `app/runtime.zig` owns quit orchestration (worker lifecycle, PTY exit signaling by fd, persistence timing) and UI blocking state. `session/state.zig` owns agent detection and session metadata access. `app/terminal_history.zig` owns text analysis (UUID extraction). UI components (`ui/components/quit_blocking_overlay.zig`) own the visual/input lock behavior.
diff --git a/src/session/state.zig b/src/session/state.zig
index 23bec50..db71599 100644
--- a/src/session/state.zig
+++ b/src/session/state.zig
@@ -455,15 +455,18 @@ pub const SessionState = struct {
     }
 
     pub fn processOutput(self: *SessionState) ProcessOutputError!void {
-        if (!shouldProcessOutput(self.spawned, self.dead)) return;
+        if (!shouldProcessOutput(self.spawned, self.dead, self.quit_capture_active)) return;
 
         const shell = &(self.shell orelse return);
         const stream = &(self.stream orelse return);
 
         while (true) {
-            const n = shell.read(&self.output_buf) catch |err| {
-                if (err == error.WouldBlock) return;
-                return err;
+            const n = shell.read(&self.output_buf) catch |err| switch (err) {
+                error.WouldBlock => return,
+                // Linux PTYs can report EIO after the slave side closes.
+                // Treat it as terminal EOF so normal dead sessions don't fail the runtime loop.
+                error.InputOutput => return,
+                else => return err,
             };
 
             if (n == 0) return;
@@ -484,9 +487,10 @@ pub const SessionState = struct {
         }
     }
 
-    fn shouldProcessOutput(spawned: bool, dead: bool) bool {
-        _ = dead;
-        return spawned;
+    fn shouldProcessOutput(spawned: bool, dead: bool, quit_capture_active: bool) bool {
+        if (!spawned) return false;
+        if (!dead) return true;
+        return quit_capture_active;
     }
 
     /// Try to flush any queued stdin data; preserves ordering relative to new input.
@@ -847,11 +851,12 @@ test "pending write shrinks when empty and over threshold" {
     try std.testing.expect(buf.capacity <= pending_write_shrink_threshold);
 }
 
-test "shouldProcessOutput keeps draining after process exit" {
-    try std.testing.expect(!SessionState.shouldProcessOutput(false, false));
-    try std.testing.expect(!SessionState.shouldProcessOutput(false, true));
-    try std.testing.expect(SessionState.shouldProcessOutput(true, false));
-    try std.testing.expect(SessionState.shouldProcessOutput(true, true));
+test "shouldProcessOutput drains dead sessions only during quit capture" {
+    try std.testing.expect(!SessionState.shouldProcessOutput(false, false, false));
+    try std.testing.expect(!SessionState.shouldProcessOutput(false, true, false));
+    try std.testing.expect(SessionState.shouldProcessOutput(true, false, false));
+    try std.testing.expect(!SessionState.shouldProcessOutput(true, true, false));
+    try std.testing.expect(SessionState.shouldProcessOutput(true, true, true));
 }
 
 test "AgentKind.fromComm recognises known agent names" {