From b22f2ddfbaa02dd7756c2b91289cde99bde89c59 Mon Sep 17 00:00:00 2001 From: kevinccbsg Date: Fri, 8 May 2026 20:11:45 +0200 Subject: [PATCH 1/7] docs: add spec and plan for end-of-run failures recap Co-Authored-By: Claude Opus 4.7 (1M context) --- .../plans/2026-05-08-failures-recap.md | 410 ++++++++++++++++++ .../specs/2026-05-08-failures-recap-design.md | 168 +++++++ 2 files changed, 578 insertions(+) create mode 100644 docs/superpowers/plans/2026-05-08-failures-recap.md create mode 100644 docs/superpowers/specs/2026-05-08-failures-recap-design.md diff --git a/docs/superpowers/plans/2026-05-08-failures-recap.md b/docs/superpowers/plans/2026-05-08-failures-recap.md new file mode 100644 index 0000000..4ce4f91 --- /dev/null +++ b/docs/superpowers/plans/2026-05-08-failures-recap.md @@ -0,0 +1,410 @@ +# End-of-Run Failures Recap Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** After the existing `--- Run complete ---` summary, when any tests failed, print a recap block listing every failed test (with its error) so the failure list survives `tail -N` and can be copied as a single block. + +**Architecture:** All changes live in `src/cli/run.ts`. A module-local `failures: FailureRecord[]` array is appended to in the `test:fail` handler; the `run:complete` handler renders the recap block before exit when the array is non-empty. No protocol changes, no new flags, no relay/browser changes. Tests use a real `WebSocketServer` that scripts messages to the `run()` function, with `process.exit` and `console.log` mocked to capture exit code and stdout. + +**Tech Stack:** TypeScript, Node `ws`, Vitest. Existing CLI in `src/cli/run.ts`. Spec: `docs/superpowers/specs/2026-05-08-failures-recap-design.md`. + +--- + +## File Structure + +| File | Change | +|---|---| +| `src/cli/run.ts` | Add `FailureRecord` interface + `failures` array; append on `test:fail`; render recap block in `run:complete` handler when non-empty. | +| `src/tests/cli/run.spec.ts` | **New file.** Two tests: recap appears with two failures; recap absent on a green run. Spins up a `WebSocketServer` on port 9880 that scripts the message stream to `run()`. | +| `README.md` | Two-sentence note appended at the end of the `## CLI run command` section describing the recap block. | + +Port 9880 is unused — existing test files use 9877 (relay), 9878 / 9879 (others). Keeps the convention of one port per test file. + +--- + +## Task 1: Add failing test — recap on failures + +**Files:** +- Create: `src/tests/cli/run.spec.ts` + +**Why TDD here:** The recap is purely an output change. A test that asserts the exact strings in stdout is the cheapest, most precise verification — it documents the format and catches accidental regressions of the very thing we're shipping. + +- [ ] **Step 1: Create the test file with one failing test** + +Write `src/tests/cli/run.spec.ts`: + +```typescript +import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; +import { WebSocketServer, WebSocket as WsServerSocket } from 'ws'; +import { run } from '../../cli/run'; + +const PORT = 9880; +const HOST = 'localhost'; +const PATH = '/__twd/ws'; + +interface Harness { + server: WebSocketServer; + logs: string[]; + errors: string[]; + exitPromise: Promise; +} + +/** + * Start a fake relay on PORT that, when the run() client sends `hello`, + * replies with `{ type: 'connected', browser: true }` and then invokes + * `script(ws)` so the test can stream lifecycle events. + * + * `process.exit` is mocked to resolve `exitPromise` with the exit code + * instead of terminating the test runner. `console.log` / `console.error` + * are captured into `logs` / `errors`. + */ +async function startHarness( + script: (ws: WsServerSocket) => void, +): Promise { + const logs: string[] = []; + const errors: string[] = []; + + vi.spyOn(console, 'log').mockImplementation((...args: unknown[]) => { + logs.push(args.map(String).join(' ')); + }); + vi.spyOn(console, 'error').mockImplementation((...args: unknown[]) => { + errors.push(args.map(String).join(' ')); + }); + + let resolveExit!: (code: number) => void; + const exitPromise = new Promise((resolve) => { + resolveExit = resolve; + }); + vi.spyOn(process, 'exit').mockImplementation(((code?: number) => { + resolveExit(code ?? 0); + return undefined as never; + }) as typeof process.exit); + + const server = new WebSocketServer({ port: PORT, path: PATH }); + await new Promise((resolve) => server.on('listening', () => resolve())); + + server.on('connection', (ws) => { + ws.on('message', (data) => { + const msg = JSON.parse(data.toString()); + if (msg.type === 'hello') { + ws.send(JSON.stringify({ type: 'connected', browser: true })); + } else if (msg.type === 'run') { + script(ws); + } + }); + }); + + return { server, logs, errors, exitPromise }; +} + +async function stopHarness(h: Harness): Promise { + await new Promise((resolve) => h.server.close(() => resolve())); +} + +describe('cli run — failures recap', () => { + let harness: Harness | undefined; + + beforeEach(() => { + harness = undefined; + }); + + afterEach(async () => { + if (harness) await stopHarness(harness); + vi.restoreAllMocks(); + }); + + it('prints the recap block when tests fail', async () => { + harness = await startHarness((ws) => { + ws.send(JSON.stringify({ type: 'run:start', testCount: 2 })); + ws.send( + JSON.stringify({ type: 'test:start', suite: 'Checkout', name: 'state dropdown' }), + ); + ws.send( + JSON.stringify({ + type: 'test:fail', + suite: 'Checkout', + name: 'state dropdown', + duration: 70, + error: 'waitFor timed out after 2000ms. Last error: No select items found', + }), + ); + ws.send( + JSON.stringify({ type: 'test:start', suite: 'Checkout', name: 'province dropdown' }), + ); + ws.send( + JSON.stringify({ + type: 'test:fail', + suite: 'Checkout', + name: 'province dropdown', + duration: 65, + error: 'waitFor timed out after 2000ms. Last error: No select items found', + }), + ); + ws.send( + JSON.stringify({ + type: 'run:complete', + passed: 0, + failed: 2, + skipped: 0, + duration: 1500, + }), + ); + }); + + run({ port: PORT, host: HOST, path: PATH, timeout: 5000 }); + + const code = await harness.exitPromise; + const out = harness.logs.join('\n'); + + expect(out).toContain('Failed tests (2):'); + expect(out).toContain('Checkout > state dropdown'); + expect(out).toContain('Checkout > province dropdown'); + expect(out).toContain('waitFor timed out after 2000ms'); + expect(code).toBe(1); + }); +}); +``` + +- [ ] **Step 2: Run the test, confirm it fails** + +Run: `npx vitest run src/tests/cli/run.spec.ts` + +Expected: FAIL. The assertion `expect(out).toContain('Failed tests (2):')` fails because today's `run:complete` handler prints only the summary lines, no recap. + +- [ ] **Step 3: Commit the failing test** + +```bash +git add src/tests/cli/run.spec.ts +git commit -m "test: add failing test for end-of-run failures recap" +``` + +--- + +## Task 2: Implement the recap + +**Files:** +- Modify: `src/cli/run.ts:62-84` + +- [ ] **Step 1: Add the FailureRecord interface and failures array** + +In `src/cli/run.ts`, after the existing `RunOptions` interface (around line 10) and before `export function run`, add: + +```typescript +interface FailureRecord { + suite: string; + name: string; + error?: string; +} +``` + +Inside `run()`, alongside the other local state declarations (`runSent`, `runComplete`, `failed`, currently lines 19–21), add: + +```typescript + const failures: FailureRecord[] = []; +``` + +- [ ] **Step 2: Append to failures in the `test:fail` handler** + +Replace the `test:fail` case (currently lines 62–68): + +```typescript + case 'test:fail': + failed = true; + console.log(` FAIL: ${msg.suite} > ${msg.name} (${msg.duration}ms)`); + if (msg.error) { + console.log(` Error: ${msg.error}`); + } + break; +``` + +with: + +```typescript + case 'test:fail': + failed = true; + console.log(` FAIL: ${msg.suite} > ${msg.name} (${msg.duration}ms)`); + if (msg.error) { + console.log(` Error: ${msg.error}`); + } + failures.push({ suite: msg.suite, name: msg.name, error: msg.error }); + break; +``` + +- [ ] **Step 3: Render the recap block in `run:complete`** + +Replace the `run:complete` case (currently lines 74–84): + +```typescript + case 'run:complete': { + const duration = (msg.duration / 1000).toFixed(1); + console.log(`\n--- Run complete ---`); + console.log(`Passed: ${msg.passed} | Failed: ${msg.failed} | Skipped: ${msg.skipped}`); + console.log(`Duration: ${duration}s`); + runComplete = true; + clearTimeout(timer); + ws.close(); + process.exit(failed || msg.failed > 0 ? 1 : 0); + break; + } +``` + +with: + +```typescript + case 'run:complete': { + const duration = (msg.duration / 1000).toFixed(1); + console.log(`\n--- Run complete ---`); + console.log(`Passed: ${msg.passed} | Failed: ${msg.failed} | Skipped: ${msg.skipped}`); + console.log(`Duration: ${duration}s`); + + if (failures.length > 0) { + console.log(`\nFailed tests (${failures.length}):`); + for (const f of failures) { + console.log(` × ${f.suite} > ${f.name}`); + if (f.error) { + const indented = f.error.replace(/\n/g, '\n '); + console.log(` ${indented}`); + } + } + } + + runComplete = true; + clearTimeout(timer); + ws.close(); + process.exit(failed || msg.failed > 0 ? 1 : 0); + break; + } +``` + +- [ ] **Step 4: Run the failing test, confirm it now passes** + +Run: `npx vitest run src/tests/cli/run.spec.ts` + +Expected: PASS. The recap block is now printed and contains both failure entries plus the indented error lines. + +- [ ] **Step 5: Run the full test suite to confirm no regressions** + +Run: `npm test -- --run` + +Expected: all tests pass (existing 26 tests + the new one). + +- [ ] **Step 6: Commit** + +```bash +git add src/cli/run.ts +git commit -m "feat: print failed-tests recap block at end of run" +``` + +--- + +## Task 3: Add the green-run regression test + +**Files:** +- Modify: `src/tests/cli/run.spec.ts` + +- [ ] **Step 1: Add the second test case** + +Add this `it` block inside the existing `describe('cli run — failures recap', ...)`, after the first test: + +```typescript + it('does not print the recap on a green run', async () => { + harness = await startHarness((ws) => { + ws.send(JSON.stringify({ type: 'run:start', testCount: 1 })); + ws.send( + JSON.stringify({ type: 'test:start', suite: 'Smoke', name: 'works' }), + ); + ws.send( + JSON.stringify({ + type: 'test:pass', + suite: 'Smoke', + name: 'works', + duration: 12, + }), + ); + ws.send( + JSON.stringify({ + type: 'run:complete', + passed: 1, + failed: 0, + skipped: 0, + duration: 50, + }), + ); + }); + + run({ port: PORT, host: HOST, path: PATH, timeout: 5000 }); + + const code = await harness.exitPromise; + const out = harness.logs.join('\n'); + + expect(out).not.toContain('Failed tests'); + expect(out).toContain('--- Run complete ---'); + expect(code).toBe(0); + }); +``` + +- [ ] **Step 2: Run the file, confirm both tests pass** + +Run: `npx vitest run src/tests/cli/run.spec.ts` + +Expected: 2 passed. + +- [ ] **Step 3: Run the full test suite once more** + +Run: `npm test -- --run` + +Expected: all tests pass. + +- [ ] **Step 4: Commit** + +```bash +git add src/tests/cli/run.spec.ts +git commit -m "test: assert no recap is printed on green runs" +``` + +--- + +## Task 4: Document the recap in README + +**Files:** +- Modify: `README.md` (end of `## CLI run command` section, around line 187) + +- [ ] **Step 1: Append a short note after the existing flag table paragraph** + +In `README.md`, after the line `When --test is used and no tests match, the CLI prints the available test names so you can correct the filter.` (around line 187) and before the `---` separator (line 189), insert a blank line and then: + +```markdown +When any tests fail, the CLI prints a recap block at the very end of the output listing each failed test and its error. This survives `tail -N` truncation and is easy to copy as a single block. +``` + +- [ ] **Step 2: Verify by reading the updated section** + +Confirm the new sentence sits between the `--test` paragraph and the `---` separator and that surrounding markdown still renders. + +- [ ] **Step 3: Commit** + +```bash +git add README.md +git commit -m "docs: mention end-of-run failures recap in README" +``` + +--- + +## Manual smoke (optional, post-merge) + +Per the spec, run a real suite with one intentionally broken test against a local relay and confirm: + +1. The recap appears at the very end of the output. +2. `npx twd-relay run | tail -10` still shows the recap. +3. A fully green run prints **no** recap header. + +This is not a blocking step for merging; the unit tests cover the behavior. It is worth doing once before publishing a new version. + +--- + +## Out of Scope (per spec) + +- New CLI flags, reporter modes, color output. +- Changes to the abort path (`run:aborted` already prints a self-contained error). +- Cross-run aggregation, dedup/grouping, or truncation of long errors. +- Version bump / publish — handled separately on `main` per the project's existing workflow. diff --git a/docs/superpowers/specs/2026-05-08-failures-recap-design.md b/docs/superpowers/specs/2026-05-08-failures-recap-design.md new file mode 100644 index 0000000..06a178d --- /dev/null +++ b/docs/superpowers/specs/2026-05-08-failures-recap-design.md @@ -0,0 +1,168 @@ +# End-of-Run Failures Recap + +## Problem + +`twd-relay run` prints test results in the order they happen — `RUN:` then `PASS:` / `FAIL:` / `SKIP:` per test, followed by a final 3-line summary (`Passed | Failed | Skipped`, `Duration`). When a run produces ~75+ tests, failures get buried in the stream: + +``` + RUN: Suite > test 1 + PASS: Suite > test 1 (42s) + ... (74 more lines) ... + FAIL: Suite > test 35 (70s) + Error: waitFor timed out after 2000ms. + ... (40 more lines) ... +--- Run complete --- +Passed: 75 | Failed: 2 | Skipped: 0 +Duration: 65.7s +``` + +Two real consequences: + +1. **Truncated logs lose failures.** Anyone (CI logs, terminal scrollback, AI agents piping through `tail -N`) who only sees the tail of the output gets the summary numbers but not the names of the failing tests. They have to re-run with `grep` or rerun the whole suite to find what failed. Observed in practice: an AI agent piped through `tail -80`, saw the `Failed: 2` count but only one of the two failure lines, and burned a second 65 s run just to extract the second name. +2. **No single block to act on.** Even with full output, mentally collating "which tests failed" requires scanning the whole stream and matching `FAIL:` lines against `Error:` lines. There's no terminal section that says "here's what to investigate." + +## Solution + +After the existing `--- Run complete ---` summary, when `failed > 0`, print a recap block listing every failed test with its error: + +``` +--- Run complete --- +Passed: 75 | Failed: 2 | Skipped: 0 +Duration: 65.7s + +Failed tests (2): + × Checkout New — JSON Order Flow > should show state dropdown for USA + waitFor timed out after 2000ms. Last error: No select items found + × Checkout New — JSON Order Flow > should show province dropdown for Canada + waitFor timed out after 2000ms. Last error: No select items found +``` + +Properties that matter: + +- **At the very end of output.** Survives any `tail -N` with `N ≥ ~10` regardless of suite size. +- **One block, one purpose.** Easy to copy/paste into an issue, a chat message, or another tool's input. +- **Per-failure error preserved.** No need to scroll back to find `Error:` lines. +- **Not printed when nothing failed.** Zero noise on green runs. + +## Why this and not a `--reporter` flag + +A `--reporter=minimal` flag (suppress `RUN:`/`PASS:` lines) was discussed as an alternative. The recap block subsumes its main use case — "I just want to see what failed" — without losing the per-test progress stream that's useful for watching long runs interactively. A reporter flag is still worth considering as a follow-up for very large suites or strict CI logs, but it's a strictly separate change. This spec scopes to the recap block. + +## Implementation + +All changes in `src/cli/run.ts`. No protocol changes, no relay/browser changes, no new flags. + +### Collect failures during the run + +Add a module-local array, append on `test:fail`: + +```ts +interface FailureRecord { + suite: string; + name: string; + error?: string; +} + +const failures: FailureRecord[] = []; +``` + +In the `test:fail` handler (currently lines 62–68), after the existing `console.log` calls: + +```ts +case 'test:fail': + failed = true; + console.log(` FAIL: ${msg.suite} > ${msg.name} (${msg.duration}ms)`); + if (msg.error) { + console.log(` Error: ${msg.error}`); + } + failures.push({ suite: msg.suite, name: msg.name, error: msg.error }); + break; +``` + +### Print the recap block on `run:complete` + +In the `run:complete` handler (currently lines 74–84), after the existing summary lines, before the `process.exit`: + +```ts +case 'run:complete': { + const duration = (msg.duration / 1000).toFixed(1); + console.log(`\n--- Run complete ---`); + console.log(`Passed: ${msg.passed} | Failed: ${msg.failed} | Skipped: ${msg.skipped}`); + console.log(`Duration: ${duration}s`); + + if (failures.length > 0) { + console.log(`\nFailed tests (${failures.length}):`); + for (const f of failures) { + console.log(` × ${f.suite} > ${f.name}`); + if (f.error) { + // Indent multi-line errors so they read as one block per failure + const indented = f.error.replace(/\n/g, '\n '); + console.log(` ${indented}`); + } + } + } + + runComplete = true; + clearTimeout(timer); + ws.close(); + process.exit(failed || msg.failed > 0 ? 1 : 0); + break; +} +``` + +### Handle the abort path + +When `run:aborted` fires (line 86), the run ends without a `run:complete` from the browser — the CLI exits via the abort handler. The recap is only useful when individual tests failed, not when the whole run was aborted (the abort message is already a clear single block). No change to the abort handler. + +The currently-running test that triggered the abort is **not** added to `failures` (it never fires `test:fail` — abort short-circuits the runner). This matches the existing semantics: the abort message names that test directly. + +## Output format details + +| Aspect | Choice | +|---|---| +| Header | `Failed tests (N):` — `N` matches both the summary's `Failed:` count and the recap entries. | +| Marker | `×` (Unicode multiplication sign). Visually distinct from `>`/`-` already used in the stream. ASCII-only repos can swap to `*` or `X`; not parameterized initially. | +| Blank line before recap | Yes, separates from the summary. | +| Blank line after recap | No — the next thing is process exit; trailing newline only. | +| Multi-line errors | Re-indented so each `\n` lines up under the test name. Preserves stack-trace readability without breaking the per-failure visual block. | +| Long suite/test names | Not wrapped. The user's terminal handles wrapping; truncating would lose information. | +| Color | None for now. The existing CLI output is plain text; introducing color is a separate cross-cutting decision. | + +## Files changed + +| File | Change | +|---|---| +| `src/cli/run.ts` | Add `failures: FailureRecord[]` collected on `test:fail`; print recap block in `run:complete` handler when non-empty. | +| `src/tests/cli/run.spec.ts` (new or extended, depending on existing coverage) | Test: with two simulated `test:fail` events followed by `run:complete`, the captured stdout contains the `Failed tests (2):` header and both test names + error strings in order. Test: a green run (no `test:fail`) does not emit the recap header. | +| `README.md` | Short note in the run-output section: failed tests are repeated in a recap block at the end of the run for easy scanning. | + +## Edge cases + +| Scenario | Behavior | +|---|---| +| No failures | No recap block printed. | +| 1 failure | `Failed tests (1):` followed by one entry. Singular form not used (keep template uniform). | +| `test:fail` with no `error` field | Test name printed without an indented error line. | +| Multi-line `error` (stack trace) | Each line indented to align under the test name; reads as a block. | +| Test name contains `>` | Rendered as-is. The `Suite > Name` pattern is already established by `RUN:`/`PASS:` lines. | +| `run:complete` arrives before any `test:fail` events but reports `failed > 0` | Should not happen given the protocol, but the recap simply doesn't print (we go by collected events, not the count). The summary line still says `Failed: N`, so the discrepancy is visible. | +| Abort path (`run:aborted`) | No recap block — abort handler already prints a self-contained error and exits. Failures collected before the abort tick are not reported (run did not complete normally). | +| Same test fails twice in one run | Not possible with the current protocol; if it ever happens, both entries appear. No dedup. | + +## Testing approach + +Two tests in the existing CLI test harness pattern: + +- **Recap on failures.** Drive the message switch with: `connected`/`browser:connected` → `run:start` → `test:start`/`test:fail` × 2 → `run:complete`. Capture stdout; assert it contains `Failed tests (2):`, both suite/name strings, and the error substrings. +- **No recap on green run.** Same but with `test:pass` events and `failed: 0`. Assert stdout does **not** contain `Failed tests`. + +Manual smoke: run a real suite with one intentionally-broken test against the local relay; confirm the recap appears at the very end and survives `npx twd-relay run | tail -10`. + +## Non-goals + +- New CLI flags. The recap is unconditional when failures exist; no opt-out needed (it's additive and small). +- Reporter modes (`--reporter=minimal`, `--reporter=json`). Discussed above; separate change if pursued. +- Color output. Cross-cutting decision out of scope here. +- Aggregating failures across multiple runs. Single-run scope only. +- Dedup or grouping (e.g. "3 failures in suite X"). Flat list keeps the implementation trivial; group-by can be added later if real suites grow large enough that it matters. +- Truncating long error messages. Information loss isn't worth the savings for typical TWD failures (1–3 lines). From ed2559538a8426ed6f4182c921498c8400735ac5 Mon Sep 17 00:00:00 2001 From: kevinccbsg Date: Fri, 8 May 2026 20:12:52 +0200 Subject: [PATCH 2/7] test: add failing test for end-of-run failures recap --- src/tests/cli/run.spec.ts | 129 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 129 insertions(+) create mode 100644 src/tests/cli/run.spec.ts diff --git a/src/tests/cli/run.spec.ts b/src/tests/cli/run.spec.ts new file mode 100644 index 0000000..13493ff --- /dev/null +++ b/src/tests/cli/run.spec.ts @@ -0,0 +1,129 @@ +import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; +import { WebSocketServer, WebSocket as WsServerSocket } from 'ws'; +import { run } from '../../cli/run'; + +const PORT = 9880; +const HOST = 'localhost'; +const PATH = '/__twd/ws'; + +interface Harness { + server: WebSocketServer; + logs: string[]; + errors: string[]; + exitPromise: Promise; +} + +/** + * Start a fake relay on PORT that, when the run() client sends `hello`, + * replies with `{ type: 'connected', browser: true }` and then invokes + * `script(ws)` so the test can stream lifecycle events. + * + * `process.exit` is mocked to resolve `exitPromise` with the exit code + * instead of terminating the test runner. `console.log` / `console.error` + * are captured into `logs` / `errors`. + */ +async function startHarness( + script: (ws: WsServerSocket) => void, +): Promise { + const logs: string[] = []; + const errors: string[] = []; + + vi.spyOn(console, 'log').mockImplementation((...args: unknown[]) => { + logs.push(args.map(String).join(' ')); + }); + vi.spyOn(console, 'error').mockImplementation((...args: unknown[]) => { + errors.push(args.map(String).join(' ')); + }); + + let resolveExit!: (code: number) => void; + const exitPromise = new Promise((resolve) => { + resolveExit = resolve; + }); + vi.spyOn(process, 'exit').mockImplementation(((code?: number) => { + resolveExit(code ?? 0); + return undefined as never; + }) as typeof process.exit); + + const server = new WebSocketServer({ port: PORT, path: PATH }); + await new Promise((resolve) => server.on('listening', () => resolve())); + + server.on('connection', (ws) => { + ws.on('message', (data) => { + const msg = JSON.parse(data.toString()); + if (msg.type === 'hello') { + ws.send(JSON.stringify({ type: 'connected', browser: true })); + } else if (msg.type === 'run') { + script(ws); + } + }); + }); + + return { server, logs, errors, exitPromise }; +} + +async function stopHarness(h: Harness): Promise { + await new Promise((resolve) => h.server.close(() => resolve())); +} + +describe('cli run — failures recap', () => { + let harness: Harness | undefined; + + beforeEach(() => { + harness = undefined; + }); + + afterEach(async () => { + if (harness) await stopHarness(harness); + vi.restoreAllMocks(); + }); + + it('prints the recap block when tests fail', async () => { + harness = await startHarness((ws) => { + ws.send(JSON.stringify({ type: 'run:start', testCount: 2 })); + ws.send( + JSON.stringify({ type: 'test:start', suite: 'Checkout', name: 'state dropdown' }), + ); + ws.send( + JSON.stringify({ + type: 'test:fail', + suite: 'Checkout', + name: 'state dropdown', + duration: 70, + error: 'waitFor timed out after 2000ms. Last error: No select items found', + }), + ); + ws.send( + JSON.stringify({ type: 'test:start', suite: 'Checkout', name: 'province dropdown' }), + ); + ws.send( + JSON.stringify({ + type: 'test:fail', + suite: 'Checkout', + name: 'province dropdown', + duration: 65, + error: 'waitFor timed out after 2000ms. Last error: No select items found', + }), + ); + ws.send( + JSON.stringify({ + type: 'run:complete', + passed: 0, + failed: 2, + skipped: 0, + duration: 1500, + }), + ); + }); + + run({ port: PORT, host: HOST, path: PATH, timeout: 5000 }); + + const code = await harness.exitPromise; + const out = harness.logs.join('\n'); + + expect(out).toContain('Failed tests (2):'); + expect(out).toContain('Checkout > state dropdown'); + expect(out).toContain('Checkout > province dropdown'); + expect(out).toContain('waitFor timed out after 2000ms'); + expect(code).toBe(1); + }); +}); From 0c43e1488d50085dbb52480903b4c15f07d9bf8a Mon Sep 17 00:00:00 2001 From: kevinccbsg Date: Fri, 8 May 2026 20:16:32 +0200 Subject: [PATCH 3/7] feat: print failed-tests recap block at end of run --- src/cli/run.ts | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/cli/run.ts b/src/cli/run.ts index 60d096f..2a9c80d 100644 --- a/src/cli/run.ts +++ b/src/cli/run.ts @@ -9,6 +9,12 @@ export interface RunOptions { maxTestDurationMs?: number; } +interface FailureRecord { + suite: string; + name: string; + error?: string; +} + export function run(options: RunOptions): void { const { port, timeout, path, host, testNames, maxTestDurationMs } = options; const url = `ws://${host}:${port}${path}`; @@ -19,6 +25,7 @@ export function run(options: RunOptions): void { let runSent = false; let runComplete = false; let failed = false; + const failures: FailureRecord[] = []; const timer = setTimeout(() => { console.error(`\nTimeout: no run:complete received within ${timeout / 1000}s`); @@ -65,6 +72,7 @@ export function run(options: RunOptions): void { if (msg.error) { console.log(` Error: ${msg.error}`); } + failures.push({ suite: msg.suite, name: msg.name, error: msg.error }); break; case 'test:skip': @@ -76,6 +84,18 @@ export function run(options: RunOptions): void { console.log(`\n--- Run complete ---`); console.log(`Passed: ${msg.passed} | Failed: ${msg.failed} | Skipped: ${msg.skipped}`); console.log(`Duration: ${duration}s`); + + if (failures.length > 0) { + console.log(`\nFailed tests (${failures.length}):`); + for (const f of failures) { + console.log(` × ${f.suite} > ${f.name}`); + if (f.error) { + const indented = f.error.replace(/\n/g, '\n '); + console.log(` ${indented}`); + } + } + } + runComplete = true; clearTimeout(timer); ws.close(); From 309fca314c7a15c59d385ae00c7b3f959a3b60c8 Mon Sep 17 00:00:00 2001 From: kevinccbsg Date: Fri, 8 May 2026 20:22:57 +0200 Subject: [PATCH 4/7] test: assert no recap is printed on green runs --- src/tests/cli/run.spec.ts | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/src/tests/cli/run.spec.ts b/src/tests/cli/run.spec.ts index 13493ff..6b89080 100644 --- a/src/tests/cli/run.spec.ts +++ b/src/tests/cli/run.spec.ts @@ -126,4 +126,39 @@ describe('cli run — failures recap', () => { expect(out).toContain('waitFor timed out after 2000ms'); expect(code).toBe(1); }); + + it('does not print the recap on a green run', async () => { + harness = await startHarness((ws) => { + ws.send(JSON.stringify({ type: 'run:start', testCount: 1 })); + ws.send( + JSON.stringify({ type: 'test:start', suite: 'Smoke', name: 'works' }), + ); + ws.send( + JSON.stringify({ + type: 'test:pass', + suite: 'Smoke', + name: 'works', + duration: 12, + }), + ); + ws.send( + JSON.stringify({ + type: 'run:complete', + passed: 1, + failed: 0, + skipped: 0, + duration: 50, + }), + ); + }); + + run({ port: PORT, host: HOST, path: PATH, timeout: 5000 }); + + const code = await harness.exitPromise; + const out = harness.logs.join('\n'); + + expect(out).not.toContain('Failed tests'); + expect(out).toContain('--- Run complete ---'); + expect(code).toBe(0); + }); }); From ea71b2fe84a3c8bcfaa2117cb4290cdd36ac4787 Mon Sep 17 00:00:00 2001 From: kevinccbsg Date: Fri, 8 May 2026 20:24:19 +0200 Subject: [PATCH 5/7] test: bump cli test port to 9886 to avoid vite port-counter overlap --- src/tests/cli/run.spec.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tests/cli/run.spec.ts b/src/tests/cli/run.spec.ts index 6b89080..0646499 100644 --- a/src/tests/cli/run.spec.ts +++ b/src/tests/cli/run.spec.ts @@ -2,7 +2,7 @@ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; import { WebSocketServer, WebSocket as WsServerSocket } from 'ws'; import { run } from '../../cli/run'; -const PORT = 9880; +const PORT = 9886; const HOST = 'localhost'; const PATH = '/__twd/ws'; From 9192148de8e75c0363ab261ef41b7ab464ef410b Mon Sep 17 00:00:00 2001 From: kevinccbsg Date: Fri, 8 May 2026 20:25:08 +0200 Subject: [PATCH 6/7] docs: mention end-of-run failures recap in README --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index e46cbe1..0915bc5 100644 --- a/README.md +++ b/README.md @@ -186,6 +186,8 @@ twd-relay run --test "login" --test "signup" When `--test` is used and no tests match, the CLI prints the available test names so you can correct the filter. +When any tests fail, the CLI prints a recap block at the very end of the output listing each failed test and its error. This survives `tail -N` truncation and is easy to copy as a single block. + --- ## License From ea85a6d69f885a4d48e62b6a4afc9eb2e49d67c0 Mon Sep 17 00:00:00 2001 From: kevinccbsg Date: Fri, 8 May 2026 20:27:53 +0200 Subject: [PATCH 7/7] test: cover empty-error and multi-line error edge cases --- src/tests/cli/run.spec.ts | 77 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/src/tests/cli/run.spec.ts b/src/tests/cli/run.spec.ts index 0646499..541be1e 100644 --- a/src/tests/cli/run.spec.ts +++ b/src/tests/cli/run.spec.ts @@ -161,4 +161,81 @@ describe('cli run — failures recap', () => { expect(out).toContain('--- Run complete ---'); expect(code).toBe(0); }); + + it('omits the indented error line when test:fail has no error field', async () => { + harness = await startHarness((ws) => { + ws.send(JSON.stringify({ type: 'run:start', testCount: 1 })); + ws.send( + JSON.stringify({ type: 'test:start', suite: 'Lonely', name: 'no error info' }), + ); + ws.send( + JSON.stringify({ + type: 'test:fail', + suite: 'Lonely', + name: 'no error info', + duration: 5, + }), + ); + ws.send( + JSON.stringify({ + type: 'run:complete', + passed: 0, + failed: 1, + skipped: 0, + duration: 100, + }), + ); + }); + + run({ port: PORT, host: HOST, path: PATH, timeout: 5000 }); + + const code = await harness.exitPromise; + const out = harness.logs.join('\n'); + + expect(out).toContain('Failed tests (1):'); + expect(out).toContain('× Lonely > no error info'); + // Find the recap section and confirm there's no indented error line under it. + const recapStart = out.indexOf('Failed tests (1):'); + const recap = out.slice(recapStart); + // The line after `× Lonely > no error info` should NOT start with 4 spaces of error text + expect(recap).not.toMatch(/× Lonely > no error info\n {4}\S/); + expect(code).toBe(1); + }); + + it('indents each line of a multi-line error under the failure entry', async () => { + harness = await startHarness((ws) => { + ws.send(JSON.stringify({ type: 'run:start', testCount: 1 })); + ws.send( + JSON.stringify({ type: 'test:start', suite: 'Stacky', name: 'throws with stack' }), + ); + ws.send( + JSON.stringify({ + type: 'test:fail', + suite: 'Stacky', + name: 'throws with stack', + duration: 8, + error: 'Boom\n at frame1\n at frame2', + }), + ); + ws.send( + JSON.stringify({ + type: 'run:complete', + passed: 0, + failed: 1, + skipped: 0, + duration: 100, + }), + ); + }); + + run({ port: PORT, host: HOST, path: PATH, timeout: 5000 }); + + const code = await harness.exitPromise; + const out = harness.logs.join('\n'); + + // Recap section should have all three error lines aligned under the test name (4-space indent). + const recap = out.slice(out.indexOf('Failed tests (1):')); + expect(recap).toContain(' Boom\n at frame1\n at frame2'); + expect(code).toBe(1); + }); });