From b22f2ddfbaa02dd7756c2b91289cde99bde89c59 Mon Sep 17 00:00:00 2001
From: kevinccbsg <kevinccbsg@gmail.com>
Date: Fri, 8 May 2026 20:11:45 +0200
Subject: [PATCH 1/7] docs: add spec and plan for end-of-run failures recap

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../plans/2026-05-08-failures-recap.md        | 410 ++++++++++++++++++
 .../specs/2026-05-08-failures-recap-design.md | 168 +++++++
 2 files changed, 578 insertions(+)
 create mode 100644 docs/superpowers/plans/2026-05-08-failures-recap.md
 create mode 100644 docs/superpowers/specs/2026-05-08-failures-recap-design.md
diff --git a/docs/superpowers/plans/2026-05-08-failures-recap.md b/docs/superpowers/plans/2026-05-08-failures-recap.md
new file mode 100644
index 0000000..4ce4f91
--- /dev/null
+++ b/docs/superpowers/plans/2026-05-08-failures-recap.md
@@ -0,0 +1,410 @@
+# End-of-Run Failures Recap Implementation Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** After the existing `--- Run complete ---` summary, when any tests failed, print a recap block listing every failed test (with its error) so the failure list survives `tail -N` and can be copied as a single block.
+
+**Architecture:** All changes live in `src/cli/run.ts`. A module-local `failures: FailureRecord[]` array is appended to in the `test:fail` handler; the `run:complete` handler renders the recap block before exit when the array is non-empty. No protocol changes, no new flags, no relay/browser changes. Tests use a real `WebSocketServer` that scripts messages to the `run()` function, with `process.exit` and `console.log` mocked to capture exit code and stdout.
+
+**Tech Stack:** TypeScript, Node `ws`, Vitest. Existing CLI in `src/cli/run.ts`. Spec: `docs/superpowers/specs/2026-05-08-failures-recap-design.md`.
+
+---
+
+## File Structure
+
+| File | Change |
+|---|---|
+| `src/cli/run.ts` | Add `FailureRecord` interface + `failures` array; append on `test:fail`; render recap block in `run:complete` handler when non-empty. |
+| `src/tests/cli/run.spec.ts` | **New file.** Two tests: recap appears with two failures; recap absent on a green run. Spins up a `WebSocketServer` on port 9880 that scripts the message stream to `run()`. |
+| `README.md` | Two-sentence note appended at the end of the `## CLI run command` section describing the recap block. |
+
+Port 9880 is unused — existing test files use 9877 (relay), 9878 / 9879 (others). Keeps the convention of one port per test file.
+
+---
+
+## Task 1: Add failing test — recap on failures
+
+**Files:**
+- Create: `src/tests/cli/run.spec.ts`
+
+**Why TDD here:** The recap is purely an output change. A test that asserts the exact strings in stdout is the cheapest, most precise verification — it documents the format and catches accidental regressions of the very thing we're shipping.
+
+- [ ] **Step 1: Create the test file with one failing test**
+
+Write `src/tests/cli/run.spec.ts`:
+
+```typescript
+import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
+import { WebSocketServer, WebSocket as WsServerSocket } from 'ws';
+import { run } from '../../cli/run';
+
+const PORT = 9880;
+const HOST = 'localhost';
+const PATH = '/__twd/ws';
+
+interface Harness {
+  server: WebSocketServer;
+  logs: string[];
+  errors: string[];
+  exitPromise: Promise<number>;
+}
+
+/**
+ * Start a fake relay on PORT that, when the run() client sends `hello`,
+ * replies with `{ type: 'connected', browser: true }` and then invokes
+ * `script(ws)` so the test can stream lifecycle events.
+ *
+ * `process.exit` is mocked to resolve `exitPromise` with the exit code
+ * instead of terminating the test runner. `console.log` / `console.error`
+ * are captured into `logs` / `errors`.
+ */
+async function startHarness(
+  script: (ws: WsServerSocket) => void,
+): Promise<Harness> {
+  const logs: string[] = [];
+  const errors: string[] = [];
+
+  vi.spyOn(console, 'log').mockImplementation((...args: unknown[]) => {
+    logs.push(args.map(String).join(' '));
+  });
+  vi.spyOn(console, 'error').mockImplementation((...args: unknown[]) => {
+    errors.push(args.map(String).join(' '));
+  });
+
+  let resolveExit!: (code: number) => void;
+  const exitPromise = new Promise<number>((resolve) => {
+    resolveExit = resolve;
+  });
+  vi.spyOn(process, 'exit').mockImplementation(((code?: number) => {
+    resolveExit(code ?? 0);
+    return undefined as never;
+  }) as typeof process.exit);
+
+  const server = new WebSocketServer({ port: PORT, path: PATH });
+  await new Promise<void>((resolve) => server.on('listening', () => resolve()));
+
+  server.on('connection', (ws) => {
+    ws.on('message', (data) => {
+      const msg = JSON.parse(data.toString());
+      if (msg.type === 'hello') {
+        ws.send(JSON.stringify({ type: 'connected', browser: true }));
+      } else if (msg.type === 'run') {
+        script(ws);
+      }
+    });
+  });
+
+  return { server, logs, errors, exitPromise };
+}
+
+async function stopHarness(h: Harness): Promise<void> {
+  await new Promise<void>((resolve) => h.server.close(() => resolve()));
+}
+
+describe('cli run — failures recap', () => {
+  let harness: Harness | undefined;
+
+  beforeEach(() => {
+    harness = undefined;
+  });
+
+  afterEach(async () => {
+    if (harness) await stopHarness(harness);
+    vi.restoreAllMocks();
+  });
+
+  it('prints the recap block when tests fail', async () => {
+    harness = await startHarness((ws) => {
+      ws.send(JSON.stringify({ type: 'run:start', testCount: 2 }));
+      ws.send(
+        JSON.stringify({ type: 'test:start', suite: 'Checkout', name: 'state dropdown' }),
+      );
+      ws.send(
+        JSON.stringify({
+          type: 'test:fail',
+          suite: 'Checkout',
+          name: 'state dropdown',
+          duration: 70,
+          error: 'waitFor timed out after 2000ms. Last error: No select items found',
+        }),
+      );
+      ws.send(
+        JSON.stringify({ type: 'test:start', suite: 'Checkout', name: 'province dropdown' }),
+      );
+      ws.send(
+        JSON.stringify({
+          type: 'test:fail',
+          suite: 'Checkout',
+          name: 'province dropdown',
+          duration: 65,
+          error: 'waitFor timed out after 2000ms. Last error: No select items found',
+        }),
+      );
+      ws.send(
+        JSON.stringify({
+          type: 'run:complete',
+          passed: 0,
+          failed: 2,
+          skipped: 0,
+          duration: 1500,
+        }),
+      );
+    });
+
+    run({ port: PORT, host: HOST, path: PATH, timeout: 5000 });
+
+    const code = await harness.exitPromise;
+    const out = harness.logs.join('\n');
+
+    expect(out).toContain('Failed tests (2):');
+    expect(out).toContain('Checkout > state dropdown');
+    expect(out).toContain('Checkout > province dropdown');
+    expect(out).toContain('waitFor timed out after 2000ms');
+    expect(code).toBe(1);
+  });
+});
+```
+
+- [ ] **Step 2: Run the test, confirm it fails**
+
+Run: `npx vitest run src/tests/cli/run.spec.ts`
+
+Expected: FAIL. The assertion `expect(out).toContain('Failed tests (2):')` fails because today's `run:complete` handler prints only the summary lines, no recap.
+
+- [ ] **Step 3: Commit the failing test**
+
+```bash
+git add src/tests/cli/run.spec.ts
+git commit -m "test: add failing test for end-of-run failures recap"
+```
+
+---
+
+## Task 2: Implement the recap
+
+**Files:**
+- Modify: `src/cli/run.ts:62-84`
+
+- [ ] **Step 1: Add the FailureRecord interface and failures array**
+
+In `src/cli/run.ts`, after the existing `RunOptions` interface (around line 10) and before `export function run`, add:
+
+```typescript
+interface FailureRecord {
+  suite: string;
+  name: string;
+  error?: string;
+}
+```
+
+Inside `run()`, alongside the other local state declarations (`runSent`, `runComplete`, `failed`, currently lines 19–21), add:
+
+```typescript
+  const failures: FailureRecord[] = [];
+```
+
+- [ ] **Step 2: Append to failures in the `test:fail` handler**
+
+Replace the `test:fail` case (currently lines 62–68):
+
+```typescript
+      case 'test:fail':
+        failed = true;
+        console.log(`  FAIL: ${msg.suite} > ${msg.name} (${msg.duration}ms)`);
+        if (msg.error) {
+          console.log(`    Error: ${msg.error}`);
+        }
+        break;
+```
+
+with:
+
+```typescript
+      case 'test:fail':
+        failed = true;
+        console.log(`  FAIL: ${msg.suite} > ${msg.name} (${msg.duration}ms)`);
+        if (msg.error) {
+          console.log(`    Error: ${msg.error}`);
+        }
+        failures.push({ suite: msg.suite, name: msg.name, error: msg.error });
+        break;
+```
+
+- [ ] **Step 3: Render the recap block in `run:complete`**
+
+Replace the `run:complete` case (currently lines 74–84):
+
+```typescript
+      case 'run:complete': {
+        const duration = (msg.duration / 1000).toFixed(1);
+        console.log(`\n--- Run complete ---`);
+        console.log(`Passed: ${msg.passed} | Failed: ${msg.failed} | Skipped: ${msg.skipped}`);
+        console.log(`Duration: ${duration}s`);
+        runComplete = true;
+        clearTimeout(timer);
+        ws.close();
+        process.exit(failed || msg.failed > 0 ? 1 : 0);
+        break;
+      }
+```
+
+with:
+
+```typescript
+      case 'run:complete': {
+        const duration = (msg.duration / 1000).toFixed(1);
+        console.log(`\n--- Run complete ---`);
+        console.log(`Passed: ${msg.passed} | Failed: ${msg.failed} | Skipped: ${msg.skipped}`);
+        console.log(`Duration: ${duration}s`);
+
+        if (failures.length > 0) {
+          console.log(`\nFailed tests (${failures.length}):`);
+          for (const f of failures) {
+            console.log(`  × ${f.suite} > ${f.name}`);
+            if (f.error) {
+              const indented = f.error.replace(/\n/g, '\n    ');
+              console.log(`    ${indented}`);
+            }
+          }
+        }
+
+        runComplete = true;
+        clearTimeout(timer);
+        ws.close();
+        process.exit(failed || msg.failed > 0 ? 1 : 0);
+        break;
+      }
+```
+
+- [ ] **Step 4: Run the failing test, confirm it now passes**
+
+Run: `npx vitest run src/tests/cli/run.spec.ts`
+
+Expected: PASS. The recap block is now printed and contains both failure entries plus the indented error lines.
+
+- [ ] **Step 5: Run the full test suite to confirm no regressions**
+
+Run: `npm test -- --run`
+
+Expected: all tests pass (existing 26 tests + the new one).
+
+- [ ] **Step 6: Commit**
+
+```bash
+git add src/cli/run.ts
+git commit -m "feat: print failed-tests recap block at end of run"
+```
+
+---
+
+## Task 3: Add the green-run regression test
+
+**Files:**
+- Modify: `src/tests/cli/run.spec.ts`
+
+- [ ] **Step 1: Add the second test case**
+
+Add this `it` block inside the existing `describe('cli run — failures recap', ...)`, after the first test:
+
+```typescript
+  it('does not print the recap on a green run', async () => {
+    harness = await startHarness((ws) => {
+      ws.send(JSON.stringify({ type: 'run:start', testCount: 1 }));
+      ws.send(
+        JSON.stringify({ type: 'test:start', suite: 'Smoke', name: 'works' }),
+      );
+      ws.send(
+        JSON.stringify({
+          type: 'test:pass',
+          suite: 'Smoke',
+          name: 'works',
+          duration: 12,
+        }),
+      );
+      ws.send(
+        JSON.stringify({
+          type: 'run:complete',
+          passed: 1,
+          failed: 0,
+          skipped: 0,
+          duration: 50,
+        }),
+      );
+    });
+
+    run({ port: PORT, host: HOST, path: PATH, timeout: 5000 });
+
+    const code = await harness.exitPromise;
+    const out = harness.logs.join('\n');
+
+    expect(out).not.toContain('Failed tests');
+    expect(out).toContain('--- Run complete ---');
+    expect(code).toBe(0);
+  });
+```
+
+- [ ] **Step 2: Run the file, confirm both tests pass**
+
+Run: `npx vitest run src/tests/cli/run.spec.ts`
+
+Expected: 2 passed.
+
+- [ ] **Step 3: Run the full test suite once more**
+
+Run: `npm test -- --run`
+
+Expected: all tests pass.
+
+- [ ] **Step 4: Commit**
+
+```bash
+git add src/tests/cli/run.spec.ts
+git commit -m "test: assert no recap is printed on green runs"
+```
+
+---
+
+## Task 4: Document the recap in README
+
+**Files:**
+- Modify: `README.md` (end of `## CLI run command` section, around line 187)
+
+- [ ] **Step 1: Append a short note after the existing flag table paragraph**
+
+In `README.md`, after the line `When --test is used and no tests match, the CLI prints the available test names so you can correct the filter.` (around line 187) and before the `---` separator (line 189), insert a blank line and then:
+
+```markdown
+When any tests fail, the CLI prints a recap block at the very end of the output listing each failed test and its error. This survives `tail -N` truncation and is easy to copy as a single block.
+```
+
+- [ ] **Step 2: Verify by reading the updated section**
+
+Confirm the new sentence sits between the `--test` paragraph and the `---` separator and that surrounding markdown still renders.
+
+- [ ] **Step 3: Commit**
+
+```bash
+git add README.md
+git commit -m "docs: mention end-of-run failures recap in README"
+```
+
+---
+
+## Manual smoke (optional, post-merge)
+
+Per the spec, run a real suite with one intentionally broken test against a local relay and confirm:
+
+1. The recap appears at the very end of the output.
+2. `npx twd-relay run | tail -10` still shows the recap.
+3. A fully green run prints **no** recap header.
+
+This is not a blocking step for merging; the unit tests cover the behavior. It is worth doing once before publishing a new version.
+
+---
+
+## Out of Scope (per spec)
+
+- New CLI flags, reporter modes, color output.
+- Changes to the abort path (`run:aborted` already prints a self-contained error).
+- Cross-run aggregation, dedup/grouping, or truncation of long errors.
+- Version bump / publish — handled separately on `main` per the project's existing workflow.
diff --git a/docs/superpowers/specs/2026-05-08-failures-recap-design.md b/docs/superpowers/specs/2026-05-08-failures-recap-design.md
new file mode 100644
index 0000000..06a178d
--- /dev/null
+++ b/docs/superpowers/specs/2026-05-08-failures-recap-design.md
@@ -0,0 +1,168 @@
+# End-of-Run Failures Recap
+
+## Problem
+
+`twd-relay run` prints test results in the order they happen — `RUN:` then `PASS:` / `FAIL:` / `SKIP:` per test, followed by a final 3-line summary (`Passed | Failed | Skipped`, `Duration`). When a run produces ~75+ tests, failures get buried in the stream:
+
+```
+  RUN:  Suite > test 1
+  PASS: Suite > test 1 (42s)
+  ... (74 more lines) ...
+  FAIL: Suite > test 35 (70s)
+    Error: waitFor timed out after 2000ms.
+  ... (40 more lines) ...
+--- Run complete ---
+Passed: 75 | Failed: 2 | Skipped: 0
+Duration: 65.7s
+```
+
+Two real consequences:
+
+1. **Truncated logs lose failures.** Anyone (CI logs, terminal scrollback, AI agents piping through `tail -N`) who only sees the tail of the output gets the summary numbers but not the names of the failing tests. They have to re-run with `grep` or rerun the whole suite to find what failed. Observed in practice: an AI agent piped through `tail -80`, saw the `Failed: 2` count but only one of the two failure lines, and burned a second 65 s run just to extract the second name.
+2. **No single block to act on.** Even with full output, mentally collating "which tests failed" requires scanning the whole stream and matching `FAIL:` lines against `Error:` lines. There's no terminal section that says "here's what to investigate."
+
+## Solution
+
+After the existing `--- Run complete ---` summary, when `failed > 0`, print a recap block listing every failed test with its error:
+
+```
+--- Run complete ---
+Passed: 75 | Failed: 2 | Skipped: 0
+Duration: 65.7s
+
+Failed tests (2):
+  × Checkout New — JSON Order Flow > should show state dropdown for USA
+    waitFor timed out after 2000ms. Last error: No select items found
+  × Checkout New — JSON Order Flow > should show province dropdown for Canada
+    waitFor timed out after 2000ms. Last error: No select items found
+```
+
+Properties that matter:
+
+- **At the very end of output.** Survives any `tail -N` with `N ≥ ~10` regardless of suite size.
+- **One block, one purpose.** Easy to copy/paste into an issue, a chat message, or another tool's input.
+- **Per-failure error preserved.** No need to scroll back to find `Error:` lines.
+- **Not printed when nothing failed.** Zero noise on green runs.
+
+## Why this and not a `--reporter` flag
+
+A `--reporter=minimal` flag (suppress `RUN:`/`PASS:` lines) was discussed as an alternative. The recap block subsumes its main use case — "I just want to see what failed" — without losing the per-test progress stream that's useful for watching long runs interactively. A reporter flag is still worth considering as a follow-up for very large suites or strict CI logs, but it's a strictly separate change. This spec scopes to the recap block.
+
+## Implementation
+
+All changes in `src/cli/run.ts`. No protocol changes, no relay/browser changes, no new flags.
+
+### Collect failures during the run
+
+Add a module-local array, append on `test:fail`:
+
+```ts
+interface FailureRecord {
+  suite: string;
+  name: string;
+  error?: string;
+}
+
+const failures: FailureRecord[] = [];
+```
+
+In the `test:fail` handler (currently lines 62–68), after the existing `console.log` calls:
+
+```ts
+case 'test:fail':
+  failed = true;
+  console.log(`  FAIL: ${msg.suite} > ${msg.name} (${msg.duration}ms)`);
+  if (msg.error) {
+    console.log(`    Error: ${msg.error}`);
+  }
+  failures.push({ suite: msg.suite, name: msg.name, error: msg.error });
+  break;
+```
+
+### Print the recap block on `run:complete`
+
+In the `run:complete` handler (currently lines 74–84), after the existing summary lines, before the `process.exit`:
+
+```ts
+case 'run:complete': {
+  const duration = (msg.duration / 1000).toFixed(1);
+  console.log(`\n--- Run complete ---`);
+  console.log(`Passed: ${msg.passed} | Failed: ${msg.failed} | Skipped: ${msg.skipped}`);
+  console.log(`Duration: ${duration}s`);
+
+  if (failures.length > 0) {
+    console.log(`\nFailed tests (${failures.length}):`);
+    for (const f of failures) {
+      console.log(`  × ${f.suite} > ${f.name}`);
+      if (f.error) {
+        // Indent multi-line errors so they read as one block per failure
+        const indented = f.error.replace(/\n/g, '\n    ');
+        console.log(`    ${indented}`);
+      }
+    }
+  }
+
+  runComplete = true;
+  clearTimeout(timer);
+  ws.close();
+  process.exit(failed || msg.failed > 0 ? 1 : 0);
+  break;
+}
+```
+
+### Handle the abort path
+
+When `run:aborted` fires (line 86), the run ends without a `run:complete` from the browser — the CLI exits via the abort handler. The recap is only useful when individual tests failed, not when the whole run was aborted (the abort message is already a clear single block). No change to the abort handler.
+
+The currently-running test that triggered the abort is **not** added to `failures` (it never fires `test:fail` — abort short-circuits the runner). This matches the existing semantics: the abort message names that test directly.
+
+## Output format details
+
+| Aspect | Choice |
+|---|---|
+| Header | `Failed tests (N):` — `N` matches both the summary's `Failed:` count and the recap entries. |
+| Marker | `×` (Unicode multiplication sign). Visually distinct from `>`/`-` already used in the stream. ASCII-only repos can swap to `*` or `X`; not parameterized initially. |
+| Blank line before recap | Yes, separates from the summary. |
+| Blank line after recap | No — the next thing is process exit; trailing newline only. |
+| Multi-line errors | Re-indented so each `\n` lines up under the test name. Preserves stack-trace readability without breaking the per-failure visual block. |
+| Long suite/test names | Not wrapped. The user's terminal handles wrapping; truncating would lose information. |
+| Color | None for now. The existing CLI output is plain text; introducing color is a separate cross-cutting decision. |
+
+## Files changed
+
+| File | Change |
+|---|---|
+| `src/cli/run.ts` | Add `failures: FailureRecord[]` collected on `test:fail`; print recap block in `run:complete` handler when non-empty. |
+| `src/tests/cli/run.spec.ts` (new or extended, depending on existing coverage) | Test: with two simulated `test:fail` events followed by `run:complete`, the captured stdout contains the `Failed tests (2):` header and both test names + error strings in order. Test: a green run (no `test:fail`) does not emit the recap header. |
+| `README.md` | Short note in the run-output section: failed tests are repeated in a recap block at the end of the run for easy scanning. |
+
+## Edge cases
+
+| Scenario | Behavior |
+|---|---|
+| No failures | No recap block printed. |
+| 1 failure | `Failed tests (1):` followed by one entry. Singular form not used (keep template uniform). |
+| `test:fail` with no `error` field | Test name printed without an indented error line. |
+| Multi-line `error` (stack trace) | Each line indented to align under the test name; reads as a block. |
+| Test name contains `>` | Rendered as-is. The `Suite > Name` pattern is already established by `RUN:`/`PASS:` lines. |
+| `run:complete` arrives before any `test:fail` events but reports `failed > 0` | Should not happen given the protocol, but the recap simply doesn't print (we go by collected events, not the count). The summary line still says `Failed: N`, so the discrepancy is visible. |
+| Abort path (`run:aborted`) | No recap block — abort handler already prints a self-contained error and exits. Failures collected before the abort tick are not reported (run did not complete normally). |
+| Same test fails twice in one run | Not possible with the current protocol; if it ever happens, both entries appear. No dedup. |
+
+## Testing approach
+
+Two tests in the existing CLI test harness pattern:
+
+- **Recap on failures.** Drive the message switch with: `connected`/`browser:connected` → `run:start` → `test:start`/`test:fail` × 2 → `run:complete`. Capture stdout; assert it contains `Failed tests (2):`, both suite/name strings, and the error substrings.
+- **No recap on green run.** Same but with `test:pass` events and `failed: 0`. Assert stdout does **not** contain `Failed tests`.
+
+Manual smoke: run a real suite with one intentionally-broken test against the local relay; confirm the recap appears at the very end and survives `npx twd-relay run | tail -10`.
+
+## Non-goals
+
+- New CLI flags. The recap is unconditional when failures exist; no opt-out needed (it's additive and small).
+- Reporter modes (`--reporter=minimal`, `--reporter=json`). Discussed above; separate change if pursued.
+- Color output. Cross-cutting decision out of scope here.
+- Aggregating failures across multiple runs. Single-run scope only.
+- Dedup or grouping (e.g. "3 failures in suite X"). Flat list keeps the implementation trivial; group-by can be added later if real suites grow large enough that it matters.
+- Truncating long error messages. Information loss isn't worth the savings for typical TWD failures (1–3 lines).

From ed2559538a8426ed6f4182c921498c8400735ac5 Mon Sep 17 00:00:00 2001
From: kevinccbsg <kevinccbsg@gmail.com>
Date: Fri, 8 May 2026 20:12:52 +0200
Subject: [PATCH 2/7] test: add failing test for end-of-run failures recap

---
 src/tests/cli/run.spec.ts | 129 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 129 insertions(+)
 create mode 100644 src/tests/cli/run.spec.ts

diff --git a/src/tests/cli/run.spec.ts b/src/tests/cli/run.spec.ts
new file mode 100644
index 0000000..13493ff
--- /dev/null
+++ b/src/tests/cli/run.spec.ts
@@ -0,0 +1,129 @@
+import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
+import { WebSocketServer, WebSocket as WsServerSocket } from 'ws';
+import { run } from '../../cli/run';
+
+const PORT = 9880;
+const HOST = 'localhost';
+const PATH = '/__twd/ws';
+
+interface Harness {
+  server: WebSocketServer;
+  logs: string[];
+  errors: string[];
+  exitPromise: Promise<number>;
+}
+
+/**
+ * Start a fake relay on PORT that, when the run() client sends `hello`,
+ * replies with `{ type: 'connected', browser: true }` and then invokes
+ * `script(ws)` so the test can stream lifecycle events.
+ *
+ * `process.exit` is mocked to resolve `exitPromise` with the exit code
+ * instead of terminating the test runner. `console.log` / `console.error`
+ * are captured into `logs` / `errors`.
+ */
+async function startHarness(
+  script: (ws: WsServerSocket) => void,
+): Promise<Harness> {
+  const logs: string[] = [];
+  const errors: string[] = [];
+
+  vi.spyOn(console, 'log').mockImplementation((...args: unknown[]) => {
+    logs.push(args.map(String).join(' '));
+  });
+  vi.spyOn(console, 'error').mockImplementation((...args: unknown[]) => {
+    errors.push(args.map(String).join(' '));
+  });
+
+  let resolveExit!: (code: number) => void;
+  const exitPromise = new Promise<number>((resolve) => {
+    resolveExit = resolve;
+  });
+  vi.spyOn(process, 'exit').mockImplementation(((code?: number) => {
+    resolveExit(code ?? 0);
+    return undefined as never;
+  }) as typeof process.exit);
+
+  const server = new WebSocketServer({ port: PORT, path: PATH });
+  await new Promise<void>((resolve) => server.on('listening', () => resolve()));
+
+  server.on('connection', (ws) => {
+    ws.on('message', (data) => {
+      const msg = JSON.parse(data.toString());
+      if (msg.type === 'hello') {
+        ws.send(JSON.stringify({ type: 'connected', browser: true }));
+      } else if (msg.type === 'run') {
+        script(ws);
+      }
+    });
+  });
+
+  return { server, logs, errors, exitPromise };
+}
+
+async function stopHarness(h: Harness): Promise<void> {
+  await new Promise<void>((resolve) => h.server.close(() => resolve()));
+}
+
+describe('cli run — failures recap', () => {
+  let harness: Harness | undefined;
+
+  beforeEach(() => {
+    harness = undefined;
+  });
+
+  afterEach(async () => {
+    if (harness) await stopHarness(harness);
+    vi.restoreAllMocks();
+  });
+
+  it('prints the recap block when tests fail', async () => {
+    harness = await startHarness((ws) => {
+      ws.send(JSON.stringify({ type: 'run:start', testCount: 2 }));
+      ws.send(
+        JSON.stringify({ type: 'test:start', suite: 'Checkout', name: 'state dropdown' }),
+      );
+      ws.send(
+        JSON.stringify({
+          type: 'test:fail',
+          suite: 'Checkout',
+          name: 'state dropdown',
+          duration: 70,
+          error: 'waitFor timed out after 2000ms. Last error: No select items found',
+        }),
+      );
+      ws.send(
+        JSON.stringify({ type: 'test:start', suite: 'Checkout', name: 'province dropdown' }),
+      );
+      ws.send(
+        JSON.stringify({
+          type: 'test:fail',
+          suite: 'Checkout',
+          name: 'province dropdown',
+          duration: 65,
+          error: 'waitFor timed out after 2000ms. Last error: No select items found',
+        }),
+      );
+      ws.send(
+        JSON.stringify({
+          type: 'run:complete',
+          passed: 0,
+          failed: 2,
+          skipped: 0,
+          duration: 1500,
+        }),
+      );
+    });
+
+    run({ port: PORT, host: HOST, path: PATH, timeout: 5000 });
+
+    const code = await harness.exitPromise;
+    const out = harness.logs.join('\n');
+
+    expect(out).toContain('Failed tests (2):');
+    expect(out).toContain('Checkout > state dropdown');
+    expect(out).toContain('Checkout > province dropdown');
+    expect(out).toContain('waitFor timed out after 2000ms');
+    expect(code).toBe(1);
+  });
+});

From 0c43e1488d50085dbb52480903b4c15f07d9bf8a Mon Sep 17 00:00:00 2001
From: kevinccbsg <kevinccbsg@gmail.com>
Date: Fri, 8 May 2026 20:16:32 +0200
Subject: [PATCH 3/7] feat: print failed-tests recap block at end of run

---
 src/cli/run.ts | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/src/cli/run.ts b/src/cli/run.ts
index 60d096f..2a9c80d 100644
--- a/src/cli/run.ts
+++ b/src/cli/run.ts
@@ -9,6 +9,12 @@ export interface RunOptions {
   maxTestDurationMs?: number;
 }
 
+interface FailureRecord {
+  suite: string;
+  name: string;
+  error?: string;
+}
+
 export function run(options: RunOptions): void {
   const { port, timeout, path, host, testNames, maxTestDurationMs } = options;
   const url = `ws://${host}:${port}${path}`;
@@ -19,6 +25,7 @@ export function run(options: RunOptions): void {
   let runSent = false;
   let runComplete = false;
   let failed = false;
+  const failures: FailureRecord[] = [];
 
   const timer = setTimeout(() => {
     console.error(`\nTimeout: no run:complete received within ${timeout / 1000}s`);
@@ -65,6 +72,7 @@ export function run(options: RunOptions): void {
         if (msg.error) {
           console.log(`    Error: ${msg.error}`);
         }
+        failures.push({ suite: msg.suite, name: msg.name, error: msg.error });
         break;
 
       case 'test:skip':
@@ -76,6 +84,18 @@ export function run(options: RunOptions): void {
         console.log(`\n--- Run complete ---`);
         console.log(`Passed: ${msg.passed} | Failed: ${msg.failed} | Skipped: ${msg.skipped}`);
         console.log(`Duration: ${duration}s`);
+
+        if (failures.length > 0) {
+          console.log(`\nFailed tests (${failures.length}):`);
+          for (const f of failures) {
+            console.log(`  × ${f.suite} > ${f.name}`);
+            if (f.error) {
+              const indented = f.error.replace(/\n/g, '\n    ');
+              console.log(`    ${indented}`);
+            }
+          }
+        }
+
         runComplete = true;
         clearTimeout(timer);
         ws.close();

From 309fca314c7a15c59d385ae00c7b3f959a3b60c8 Mon Sep 17 00:00:00 2001
From: kevinccbsg <kevinccbsg@gmail.com>
Date: Fri, 8 May 2026 20:22:57 +0200
Subject: [PATCH 4/7] test: assert no recap is printed on green runs

---
 src/tests/cli/run.spec.ts | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/src/tests/cli/run.spec.ts b/src/tests/cli/run.spec.ts
index 13493ff..6b89080 100644
--- a/src/tests/cli/run.spec.ts
+++ b/src/tests/cli/run.spec.ts
@@ -126,4 +126,39 @@ describe('cli run — failures recap', () => {
     expect(out).toContain('waitFor timed out after 2000ms');
     expect(code).toBe(1);
   });
+
+  it('does not print the recap on a green run', async () => {
+    harness = await startHarness((ws) => {
+      ws.send(JSON.stringify({ type: 'run:start', testCount: 1 }));
+      ws.send(
+        JSON.stringify({ type: 'test:start', suite: 'Smoke', name: 'works' }),
+      );
+      ws.send(
+        JSON.stringify({
+          type: 'test:pass',
+          suite: 'Smoke',
+          name: 'works',
+          duration: 12,
+        }),
+      );
+      ws.send(
+        JSON.stringify({
+          type: 'run:complete',
+          passed: 1,
+          failed: 0,
+          skipped: 0,
+          duration: 50,
+        }),
+      );
+    });
+
+    run({ port: PORT, host: HOST, path: PATH, timeout: 5000 });
+
+    const code = await harness.exitPromise;
+    const out = harness.logs.join('\n');
+
+    expect(out).not.toContain('Failed tests');
+    expect(out).toContain('--- Run complete ---');
+    expect(code).toBe(0);
+  });
 });

From ea71b2fe84a3c8bcfaa2117cb4290cdd36ac4787 Mon Sep 17 00:00:00 2001
From: kevinccbsg <kevinccbsg@gmail.com>
Date: Fri, 8 May 2026 20:24:19 +0200
Subject: [PATCH 5/7] test: bump cli test port to 9886 to avoid vite
 port-counter overlap

---
 src/tests/cli/run.spec.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tests/cli/run.spec.ts b/src/tests/cli/run.spec.ts
index 6b89080..0646499 100644
--- a/src/tests/cli/run.spec.ts
+++ b/src/tests/cli/run.spec.ts
@@ -2,7 +2,7 @@ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
 import { WebSocketServer, WebSocket as WsServerSocket } from 'ws';
 import { run } from '../../cli/run';
 
-const PORT = 9880;
+const PORT = 9886;
 const HOST = 'localhost';
 const PATH = '/__twd/ws';
 

From 9192148de8e75c0363ab261ef41b7ab464ef410b Mon Sep 17 00:00:00 2001
From: kevinccbsg <kevinccbsg@gmail.com>
Date: Fri, 8 May 2026 20:25:08 +0200
Subject: [PATCH 6/7] docs: mention end-of-run failures recap in README

---
 README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.md b/README.md
index e46cbe1..0915bc5 100644
--- a/README.md
+++ b/README.md
@@ -186,6 +186,8 @@ twd-relay run --test "login" --test "signup"
 
 When `--test` is used and no tests match, the CLI prints the available test names so you can correct the filter.
 
+When any tests fail, the CLI prints a recap block at the very end of the output listing each failed test and its error. This survives `tail -N` truncation and is easy to copy as a single block.
+
 ---
 
 ## License

From ea85a6d69f885a4d48e62b6a4afc9eb2e49d67c0 Mon Sep 17 00:00:00 2001
From: kevinccbsg <kevinccbsg@gmail.com>
Date: Fri, 8 May 2026 20:27:53 +0200
Subject: [PATCH 7/7] test: cover empty-error and multi-line error edge cases

---
 src/tests/cli/run.spec.ts | 77 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 77 insertions(+)

diff --git a/src/tests/cli/run.spec.ts b/src/tests/cli/run.spec.ts
index 0646499..541be1e 100644
--- a/src/tests/cli/run.spec.ts
+++ b/src/tests/cli/run.spec.ts
@@ -161,4 +161,81 @@ describe('cli run — failures recap', () => {
     expect(out).toContain('--- Run complete ---');
     expect(code).toBe(0);
   });
+
+  it('omits the indented error line when test:fail has no error field', async () => {
+    harness = await startHarness((ws) => {
+      ws.send(JSON.stringify({ type: 'run:start', testCount: 1 }));
+      ws.send(
+        JSON.stringify({ type: 'test:start', suite: 'Lonely', name: 'no error info' }),
+      );
+      ws.send(
+        JSON.stringify({
+          type: 'test:fail',
+          suite: 'Lonely',
+          name: 'no error info',
+          duration: 5,
+        }),
+      );
+      ws.send(
+        JSON.stringify({
+          type: 'run:complete',
+          passed: 0,
+          failed: 1,
+          skipped: 0,
+          duration: 100,
+        }),
+      );
+    });
+
+    run({ port: PORT, host: HOST, path: PATH, timeout: 5000 });
+
+    const code = await harness.exitPromise;
+    const out = harness.logs.join('\n');
+
+    expect(out).toContain('Failed tests (1):');
+    expect(out).toContain('× Lonely > no error info');
+    // Find the recap section and confirm there's no indented error line under it.
+    const recapStart = out.indexOf('Failed tests (1):');
+    const recap = out.slice(recapStart);
+    // The line after `× Lonely > no error info` should NOT start with 4 spaces of error text
+    expect(recap).not.toMatch(/× Lonely > no error info\n {4}\S/);
+    expect(code).toBe(1);
+  });
+
+  it('indents each line of a multi-line error under the failure entry', async () => {
+    harness = await startHarness((ws) => {
+      ws.send(JSON.stringify({ type: 'run:start', testCount: 1 }));
+      ws.send(
+        JSON.stringify({ type: 'test:start', suite: 'Stacky', name: 'throws with stack' }),
+      );
+      ws.send(
+        JSON.stringify({
+          type: 'test:fail',
+          suite: 'Stacky',
+          name: 'throws with stack',
+          duration: 8,
+          error: 'Boom\n  at frame1\n  at frame2',
+        }),
+      );
+      ws.send(
+        JSON.stringify({
+          type: 'run:complete',
+          passed: 0,
+          failed: 1,
+          skipped: 0,
+          duration: 100,
+        }),
+      );
+    });
+
+    run({ port: PORT, host: HOST, path: PATH, timeout: 5000 });
+
+    const code = await harness.exitPromise;
+    const out = harness.logs.join('\n');
+
+    // Recap section should have all three error lines aligned under the test name (4-space indent).
+    const recap = out.slice(out.indexOf('Failed tests (1):'));
+    expect(recap).toContain('    Boom\n      at frame1\n      at frame2');
+    expect(code).toBe(1);
+  });
 });