From 49abf3a4cb9762c162e6a8b1ebdfc911148065ae Mon Sep 17 00:00:00 2001 From: Steve Nims Date: Fri, 30 Jan 2026 21:59:48 -0500 Subject: [PATCH 1/2] feat: use interrupt() as graceful timeout alternative (#347) Replace single-tier AbortController.abort() timeout with a two-tier mechanism: soft timeout calls query.interrupt() for graceful stop, hard timeout calls controller.abort() as fallback after grace period. New config fields timeout_strategy and interrupt_grace_ms control behavior. Default: "interrupt_first" with 10s grace period. "abort_only" preserves legacy behavior. Co-Authored-By: Claude Opus 4.5 --- CLAUDE.md | 4 +- src/config/defaults.ts | 2 + src/config/schema.ts | 6 + src/stages/3-execution/agent-executor.ts | 72 ++++++- src/stages/3-execution/session-batching.ts | 33 +++- src/stages/3-execution/timeout-strategy.ts | 86 +++++++++ src/stages/3-execution/transcript-builder.ts | 11 ++ src/stages/4-evaluation/metrics.ts | 1 + src/types/config.ts | 15 ++ src/types/index.ts | 1 + src/types/transcript.ts | 3 +- tests/mocks/sdk-mock.ts | 19 +- .../stages/3-execution/agent-executor.test.ts | 47 +++++ .../3-execution/session-batching.test.ts | 81 ++++++++ .../3-execution/timeout-strategy.test.ts | 182 ++++++++++++++++++ 15 files changed, 547 insertions(+), 16 deletions(-) create mode 100644 src/stages/3-execution/timeout-strategy.ts create mode 100644 tests/unit/stages/3-execution/timeout-strategy.test.ts diff --git a/CLAUDE.md b/CLAUDE.md index 092448b..3dc73c5 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -6,11 +6,11 @@ | Tool | Use When | | --------------------------------- | -------------------------------------- | -| Serena `find_symbol` | Know the symbol name - TRY FIRST | +| Serena `find_symbol` | Know the symbol name - TRY FIRST | | Serena `find_referencing_symbols` | Find all usages of a symbol | | Serena `get_symbols_overview` | Understand file structure | | `rg "pattern"` | Regex/text patterns (not symbol-based) | -| Built-in `Grep` / `Glob` | Fallback when above tools insufficient | +| Built-in `Grep` / `Glob` | Fallback when above tools insufficient | ### Edit (prefer in order) diff --git a/src/config/defaults.ts b/src/config/defaults.ts index d21b47c..a9c94d2 100644 --- a/src/config/defaults.ts +++ b/src/config/defaults.ts @@ -45,6 +45,8 @@ export const DEFAULT_EXECUTION = { disallowed_tools: ["Write", "Edit", "Bash"] as string[], num_reps: 1, additional_plugins: [] as string[], + timeout_strategy: "interrupt_first" as const, + interrupt_grace_ms: 10000, }; /** diff --git a/src/config/schema.ts b/src/config/schema.ts index 0e2cb23..f045fbe 100644 --- a/src/config/schema.ts +++ b/src/config/schema.ts @@ -116,6 +116,8 @@ export const SessionStrategySchema = z.enum([ "batched_by_component", ]); +const TimeoutStrategySchema = z.enum(["interrupt_first", "abort_only"]); + /** * Generation configuration schema. */ @@ -160,6 +162,10 @@ export const ExecutionConfigSchema = z.object({ * - 10000-30000: Complex reasoning or multi-step tasks */ max_thinking_tokens: z.number().int().min(100).max(100000).optional(), + /** Timeout strategy: "interrupt_first" tries graceful interrupt before hard abort */ + timeout_strategy: TimeoutStrategySchema.default("interrupt_first"), + /** Grace period (ms) after interrupt before hard abort. Only for "interrupt_first". */ + interrupt_grace_ms: z.number().int().min(1000).max(60000).default(10000), }); /** diff --git a/src/stages/3-execution/agent-executor.ts b/src/stages/3-execution/agent-executor.ts index fcf02c7..b68af70 100644 --- a/src/stages/3-execution/agent-executor.ts +++ b/src/stages/3-execution/agent-executor.ts @@ -31,9 +31,15 @@ import { type SettingSource, type ModelUsage, } from "./sdk-client.js"; +import { + createTimeout, + type QueryHolder, + type TimeoutConfig, +} from "./timeout-strategy.js"; import { buildTranscript, createErrorEvent, + createInterruptedError, type TranscriptBuilderContext, } from "./transcript-builder.js"; @@ -213,12 +219,16 @@ interface ExecutionContext { hookCollector: ReturnType; /** Abort controller for timeout */ controller: AbortController; - /** Timeout ID (for cleanup) */ - timeout: ReturnType; + /** Cleanup function for timeout timers */ + cleanup: () => void; /** Execution start timestamp */ startTime: number; /** Whether the SDK Stop hook fired (clean completion) */ stopReceived: boolean; + /** Whether an interrupt was fired (for error classification) */ + interrupted: { value: boolean }; + /** Mutable query reference for timeout callbacks */ + queryHolder: QueryHolder; } /** @@ -227,14 +237,21 @@ interface ExecutionContext { * @param timeoutMs - Timeout in milliseconds * @returns Execution context for scenario execution */ -function prepareExecutionContext(timeoutMs: number): ExecutionContext { +function prepareExecutionContext( + timeoutConfig: TimeoutConfig, +): ExecutionContext { const messages: SDKMessage[] = []; const detectedTools: ToolCapture[] = []; const subagentCaptures: SubagentCapture[] = []; const errors: TranscriptErrorEvent[] = []; const hookCollector = createHookResponseCollector(); const controller = new AbortController(); - const timeout = setTimeout(() => controller.abort(), timeoutMs); + const queryHolder: QueryHolder = { query: undefined }; + const { cleanup, interrupted } = createTimeout( + controller, + queryHolder, + timeoutConfig, + ); const startTime = Date.now(); return { @@ -244,9 +261,11 @@ function prepareExecutionContext(timeoutMs: number): ExecutionContext { errors, hookCollector, controller, - timeout, + cleanup, startTime, stopReceived: false, + interrupted, + queryHolder, }; } @@ -353,6 +372,11 @@ export function deriveTerminationType( return "timeout"; } + const hasInterrupt = errors.some((e) => e.error_type === "interrupted"); + if (hasInterrupt) { + return "interrupted"; + } + if (errors.length > 0) { return "error"; } @@ -504,7 +528,11 @@ export async function executeScenario( } = options; // Prepare execution context - const ctx = prepareExecutionContext(config.timeout_ms); + const ctx = prepareExecutionContext({ + timeout_ms: config.timeout_ms, + timeout_strategy: config.timeout_strategy ?? "interrupt_first", + interrupt_grace_ms: config.interrupt_grace_ms ?? 10000, + }); // Set up capture infrastructure const { plugins, hooksConfig } = setupCaptureInfrastructure( @@ -537,6 +565,7 @@ export async function executeScenario( // Use provided query function or real SDK const q = queryFn ? queryFn(queryInput) : executeQuery(queryInput); queryObject = q; + ctx.queryHolder.query = q; try { for await (const message of q) { @@ -563,7 +592,17 @@ export async function executeScenario( // Ensure cleanup if error occurred outside the retry wrapper queryObject?.close(); } finally { - clearTimeout(ctx.timeout); + ctx.cleanup(); + } + + // If interrupted but no AbortError was recorded, add an interrupted error + if ( + ctx.interrupted.value && + !ctx.errors.some((e) => e.error_type === "timeout") + ) { + ctx.errors.push( + createInterruptedError("Execution interrupted by soft timeout"), + ); } return finalizeExecution(ctx, scenario, pluginName, config.model); @@ -591,7 +630,11 @@ export async function executeScenarioWithCheckpoint( } = options; // Prepare execution context - const ctx = prepareExecutionContext(config.timeout_ms); + const ctx = prepareExecutionContext({ + timeout_ms: config.timeout_ms, + timeout_strategy: config.timeout_strategy ?? "interrupt_first", + interrupt_grace_ms: config.interrupt_grace_ms ?? 10000, + }); let userMessageId: string | undefined; // Set up capture infrastructure @@ -625,6 +668,7 @@ export async function executeScenarioWithCheckpoint( await withRetry(async () => { const q = queryFn ? queryFn(queryInput) : executeQuery(queryInput); queryObject = q; + ctx.queryHolder.query = q; try { for await (const message of q) { @@ -656,7 +700,17 @@ export async function executeScenarioWithCheckpoint( // Ensure cleanup if error occurred outside the retry wrapper queryObject?.close(); } finally { - clearTimeout(ctx.timeout); + ctx.cleanup(); + } + + // If interrupted but no AbortError was recorded, add an interrupted error + if ( + ctx.interrupted.value && + !ctx.errors.some((e) => e.error_type === "timeout") + ) { + ctx.errors.push( + createInterruptedError("Execution interrupted by soft timeout"), + ); } return finalizeExecution(ctx, scenario, pluginName, config.model); diff --git a/src/stages/3-execution/session-batching.ts b/src/stages/3-execution/session-batching.ts index 0903e0a..bd5f8a7 100644 --- a/src/stages/3-execution/session-batching.ts +++ b/src/stages/3-execution/session-batching.ts @@ -28,8 +28,10 @@ import { type SDKMessage, type SettingSource, } from "./sdk-client.js"; +import { createTimeout, type QueryHolder } from "./timeout-strategy.js"; import { buildTranscript, + createInterruptedError, type TranscriptBuilderContext, } from "./transcript-builder.js"; @@ -281,6 +283,8 @@ interface ExecuteScenarioWithRetryOptions { sharedStatelessHooks?: StatelessHooks | undefined; /** Optional rate limiter function */ rateLimiter?: ((fn: () => Promise) => Promise) | undefined; + /** Mutable query holder for timeout interrupt access */ + queryHolder?: QueryHolder | undefined; } /** @@ -331,6 +335,7 @@ async function executeScenarioWithRetry( sharedCaptureMaps, sharedStatelessHooks, rateLimiter, + queryHolder, } = options; const scenarioMessages: SDKMessage[] = []; @@ -397,6 +402,11 @@ async function executeScenarioWithRetry( await withRetry(async () => { const q = queryFn ? queryFn(queryInput) : executeQuery(queryInput); + // Assign query to holder so timeout callback can access it + if (queryHolder) { + queryHolder.query = q; + } + try { for await (const message of q) { scenarioMessages.push(message); @@ -734,7 +744,12 @@ export async function executeBatch( sharedCaptureMaps.subagentCaptureMap.clear(); const controller = new AbortController(); - const timeout = setTimeout(() => controller.abort(), config.timeout_ms); + const queryHolder: QueryHolder = { query: undefined }; + const timeout = createTimeout(controller, queryHolder, { + timeout_ms: config.timeout_ms, + timeout_strategy: config.timeout_strategy ?? "interrupt_first", + interrupt_grace_ms: config.interrupt_grace_ms ?? 10000, + }); try { const executionResult = await executeScenarioWithRetry({ @@ -753,14 +768,26 @@ export async function executeBatch( sharedCaptureMaps, sharedStatelessHooks, rateLimiter: options.rateLimiter, + queryHolder, }); + // If interrupted but no AbortError was recorded, add an interrupted error + const errors = executionResult.errors; + if ( + timeout.interrupted.value && + !errors.some((e) => e.error_type === "timeout") + ) { + errors.push( + createInterruptedError("Execution interrupted by soft timeout"), + ); + } + const result = buildScenarioResult({ scenario, messages: executionResult.messages, detectedTools: executionResult.detectedTools, hookResponses: executionResult.hookResponses, - errors: executionResult.errors, + errors, subagentCaptures: executionResult.subagentCaptures, pluginName, model: config.model, @@ -803,7 +830,7 @@ export async function executeBatch( `Batch: scenario ${String(scenarioIndex + 1)} failed, continuing with batch: ${err instanceof Error ? err.message : String(err)}`, ); } finally { - clearTimeout(timeout); + timeout.cleanup(); } } diff --git a/src/stages/3-execution/timeout-strategy.ts b/src/stages/3-execution/timeout-strategy.ts new file mode 100644 index 0000000..0181f70 --- /dev/null +++ b/src/stages/3-execution/timeout-strategy.ts @@ -0,0 +1,86 @@ +/** + * Two-tier timeout strategy for graceful query interruption. + * + * Supports two strategies: + * - "abort_only": Single hard abort after timeout_ms (legacy behavior) + * - "interrupt_first": Soft interrupt at timeout_ms, hard abort after grace period + */ + +import type { Query } from "./sdk-client.js"; +import type { TimeoutStrategy } from "../../types/index.js"; + +/** Mutable holder for a Query reference, allowing timeout callbacks to access the query lazily. */ +export interface QueryHolder { + query: Query | undefined; +} + +/** Result of createTimeout with cleanup and state tracking. */ +export interface TwoTierTimeout { + /** Call to clear all pending timers */ + cleanup: () => void; + /** Whether an interrupt was fired (for error classification) */ + interrupted: { value: boolean }; + /** Mutable reference to the query — set after query creation */ + queryHolder: QueryHolder; +} + +export interface TimeoutConfig { + timeout_ms: number; + timeout_strategy: TimeoutStrategy; + interrupt_grace_ms: number; +} + +/** + * Create a timeout mechanism for scenario execution. + * + * - `abort_only`: Single `setTimeout(() => abort(), timeout_ms)` + * - `interrupt_first`: Soft timeout calls `query.interrupt()`, then schedules + * hard abort after `interrupt_grace_ms`. If query isn't created yet, falls + * through to hard abort immediately. + */ +export function createTimeout( + controller: AbortController, + queryHolder: QueryHolder, + config: TimeoutConfig, +): TwoTierTimeout { + const interrupted = { value: false }; + let softTimer: ReturnType | undefined; + let hardTimer: ReturnType | undefined; + + const hardAbort = (): void => { + controller.abort(); + }; + + if (config.timeout_strategy === "abort_only") { + softTimer = setTimeout(hardAbort, config.timeout_ms); + } else { + // interrupt_first strategy + softTimer = setTimeout(() => { + const query = queryHolder.query; + if (query) { + interrupted.value = true; + query.interrupt().catch(() => { + // Swallow errors — hard abort is the fallback + }); + // Schedule hard abort after grace period + hardTimer = setTimeout(hardAbort, config.interrupt_grace_ms); + } else { + // Query not yet created — fall through to hard abort + hardAbort(); + } + }, config.timeout_ms); + } + + const cleanup = (): void => { + if (softTimer !== undefined) { + clearTimeout(softTimer); + softTimer = undefined; + } + if (hardTimer !== undefined) { + clearTimeout(hardTimer); + hardTimer = undefined; + } + }; + + return { cleanup, interrupted, queryHolder }; +} diff --git a/src/stages/3-execution/transcript-builder.ts b/src/stages/3-execution/transcript-builder.ts index dea6023..8e1c31f 100644 --- a/src/stages/3-execution/transcript-builder.ts +++ b/src/stages/3-execution/transcript-builder.ts @@ -235,6 +235,17 @@ export function createErrorEvent( }; } +/** Create an error event for a graceful interrupt timeout. */ +export function createInterruptedError(message: string): TranscriptErrorEvent { + return { + type: "error", + error_type: "interrupted", + message, + timestamp: Date.now(), + recoverable: false, + }; +} + /** * Extract session ID from system init message. * diff --git a/src/stages/4-evaluation/metrics.ts b/src/stages/4-evaluation/metrics.ts index b29b380..38fce14 100644 --- a/src/stages/4-evaluation/metrics.ts +++ b/src/stages/4-evaluation/metrics.ts @@ -159,6 +159,7 @@ export function countErrorsByType( const counts: Record = { api_error: 0, timeout: 0, + interrupted: 0, permission_denied: 0, budget_exceeded: 0, }; diff --git a/src/types/config.ts b/src/types/config.ts index a35cffc..f37abce 100644 --- a/src/types/config.ts +++ b/src/types/config.ts @@ -71,6 +71,9 @@ export interface GenerationConfig { /** * Execution stage configuration. */ +/** Strategy for handling execution timeouts */ +export type TimeoutStrategy = "interrupt_first" | "abort_only"; + export interface ExecutionConfig { model: string; max_turns: number; @@ -107,6 +110,18 @@ export interface ExecutionConfig { * - 10000-30000: Complex reasoning or multi-step tasks */ max_thinking_tokens?: number; + /** + * Timeout strategy. + * - "interrupt_first": Try graceful interrupt before hard abort (default) + * - "abort_only": Immediately abort on timeout (legacy behavior) + */ + timeout_strategy?: TimeoutStrategy; + /** + * Grace period (ms) after interrupt before hard abort. + * Only applies when timeout_strategy is "interrupt_first". + * Default: 10000 (10 seconds) + */ + interrupt_grace_ms?: number; } /** diff --git a/src/types/index.ts b/src/types/index.ts index 06599a8..f63b84a 100644 --- a/src/types/index.ts +++ b/src/types/index.ts @@ -109,6 +109,7 @@ export type { ScopeConfig, ReasoningEffort, SessionStrategy, + TimeoutStrategy, GenerationConfig, ExecutionConfig, DetectionMode, diff --git a/src/types/transcript.ts b/src/types/transcript.ts index 437e88e..9ea2921 100644 --- a/src/types/transcript.ts +++ b/src/types/transcript.ts @@ -153,6 +153,7 @@ export interface ToolResultEvent { export type TranscriptErrorType = | "api_error" | "timeout" + | "interrupted" | "permission_denied" | "budget_exceeded"; @@ -189,7 +190,7 @@ export interface Transcript { * - "timeout": Forced termination via AbortController * - "error": Unexpected failure during execution */ -export type TerminationType = "clean" | "timeout" | "error"; +export type TerminationType = "clean" | "timeout" | "interrupted" | "error"; /** * Result of executing a scenario. diff --git a/tests/mocks/sdk-mock.ts b/tests/mocks/sdk-mock.ts index dc2af94..6256af3 100644 --- a/tests/mocks/sdk-mock.ts +++ b/tests/mocks/sdk-mock.ts @@ -60,6 +60,9 @@ export interface MockQueryConfig { /** Whether to simulate a timeout via AbortSignal */ shouldTimeout?: boolean; + /** Whether to simulate an interrupt (yields result message and stops early) */ + shouldInterrupt?: boolean; + /** Cost in USD to report */ costUsd?: number; @@ -225,6 +228,9 @@ export function createMockQueryFn(config: MockQueryConfig = {}): QueryFunction { const messages: SDKMessage[] = []; let toolCallCounter = 0; + // Track interrupt state — shared between interrupt() and the generator + let interruptRequested = false; + // Extract hooks for calling during tool use const preToolUseHooks: PreToolUseHookConfig[] = input.options?.hooks?.PreToolUse ?? []; @@ -416,6 +422,16 @@ export function createMockQueryFn(config: MockQueryConfig = {}): QueryFunction { } } + // Simulate interrupt: when interrupt is requested (or pre-configured), + // yield the result message and return early + if (interruptRequested || config.shouldInterrupt) { + if (msg.type === "result" || msg === messages[messages.length - 1]) { + // Yield the result message so the consumer gets partial results + yield resultMsg; + return; + } + } + // Call hooks before yielding assistant message with tool calls if (msg.type === "assistant" && !config.errorMessage) { for (const toolCall of toolCalls) { @@ -480,7 +496,7 @@ export function createMockQueryFn(config: MockQueryConfig = {}): QueryFunction { }, async interrupt(): Promise { - // No-op for mock + interruptRequested = true; }, async setPermissionMode(): Promise { @@ -546,6 +562,7 @@ export function createMockExecutionConfig( max_budget_usd: 1.0, session_isolation: true, permission_bypass: true, + disallowed_tools: ["Write", "Edit", "Bash"], num_reps: 1, additional_plugins: [], ...overrides, diff --git a/tests/unit/stages/3-execution/agent-executor.test.ts b/tests/unit/stages/3-execution/agent-executor.test.ts index 5c09c6a..57b0dea 100644 --- a/tests/unit/stages/3-execution/agent-executor.test.ts +++ b/tests/unit/stages/3-execution/agent-executor.test.ts @@ -325,4 +325,51 @@ describe("deriveTerminationType", () => { const errors: TranscriptErrorEvent[] = []; expect(deriveTerminationType(errors, false)).toBe("clean"); }); + + it("returns 'interrupted' when errors contain interrupted type", () => { + const errors: TranscriptErrorEvent[] = [ + { + type: "error", + error_type: "interrupted", + message: "Execution interrupted by soft timeout", + timestamp: Date.now(), + recoverable: false, + }, + ]; + expect(deriveTerminationType(errors, false)).toBe("interrupted"); + }); + + it("returns 'timeout' when both interrupted AND timeout errors exist", () => { + // timeout takes priority over interrupted + const errors: TranscriptErrorEvent[] = [ + { + type: "error", + error_type: "interrupted", + message: "Execution interrupted by soft timeout", + timestamp: Date.now(), + recoverable: false, + }, + { + type: "error", + error_type: "timeout", + message: "Operation timed out", + timestamp: Date.now(), + recoverable: false, + }, + ]; + expect(deriveTerminationType(errors, false)).toBe("timeout"); + }); + + it("returns 'clean' when stopReceived even with interrupted error", () => { + const errors: TranscriptErrorEvent[] = [ + { + type: "error", + error_type: "interrupted", + message: "Execution interrupted by soft timeout", + timestamp: Date.now(), + recoverable: false, + }, + ]; + expect(deriveTerminationType(errors, true)).toBe("clean"); + }); }); diff --git a/tests/unit/stages/3-execution/session-batching.test.ts b/tests/unit/stages/3-execution/session-batching.test.ts index 53a34cf..71d304e 100644 --- a/tests/unit/stages/3-execution/session-batching.test.ts +++ b/tests/unit/stages/3-execution/session-batching.test.ts @@ -577,4 +577,85 @@ describe("session-batching", () => { expect(results[0]?.subagent_captures).toBeUndefined(); }); }); + + describe("executeBatch timeout strategy", () => { + const createBatchScenario = ( + id: string, + componentRef: string, + ): TestScenario => ({ + id, + scenario_type: "direct", + component_type: "skill", + component_ref: componentRef, + user_prompt: `Test prompt for ${id}`, + expected_trigger: true, + expected_component: componentRef, + }); + + it("executes batch with abort_only strategy (legacy behavior)", async () => { + const scenarios = [createBatchScenario("scenario-1", "skill:test")]; + const mockQuery = createMockQueryFn({ + triggeredTools: [{ name: "Skill", input: { skill: "test" } }], + }); + + const results = await executeBatch({ + scenarios, + pluginPath: "/path/to/plugin", + pluginName: "test-plugin", + config: createMockExecutionConfig({ + timeout_strategy: "abort_only", + }), + queryFn: mockQuery, + }); + + expect(results).toHaveLength(1); + expect(results[0]?.termination_type).toBe("clean"); + expect(results[0]?.errors).toHaveLength(0); + }); + + it("executes batch with interrupt_first strategy (default)", async () => { + const scenarios = [createBatchScenario("scenario-1", "skill:test")]; + const mockQuery = createMockQueryFn({ + triggeredTools: [{ name: "Skill", input: { skill: "test" } }], + }); + + const results = await executeBatch({ + scenarios, + pluginPath: "/path/to/plugin", + pluginName: "test-plugin", + config: createMockExecutionConfig({ + timeout_strategy: "interrupt_first", + interrupt_grace_ms: 5000, + }), + queryFn: mockQuery, + }); + + expect(results).toHaveLength(1); + expect(results[0]?.termination_type).toBe("clean"); + expect(results[0]?.errors).toHaveLength(0); + }); + + it("defaults to interrupt_first when timeout_strategy is undefined", async () => { + const scenarios = [createBatchScenario("scenario-1", "skill:test")]; + const mockQuery = createMockQueryFn({ + triggeredTools: [{ name: "Skill", input: { skill: "test" } }], + }); + + // Config without explicit timeout_strategy + const config = createMockExecutionConfig(); + // Ensure it's undefined (not set) + expect(config.timeout_strategy).toBeUndefined(); + + const results = await executeBatch({ + scenarios, + pluginPath: "/path/to/plugin", + pluginName: "test-plugin", + config, + queryFn: mockQuery, + }); + + expect(results).toHaveLength(1); + expect(results[0]?.termination_type).toBe("clean"); + }); + }); }); diff --git a/tests/unit/stages/3-execution/timeout-strategy.test.ts b/tests/unit/stages/3-execution/timeout-strategy.test.ts new file mode 100644 index 0000000..e3bb8d2 --- /dev/null +++ b/tests/unit/stages/3-execution/timeout-strategy.test.ts @@ -0,0 +1,182 @@ +/** + * Unit tests for timeout-strategy.ts + */ + +import { afterEach, describe, expect, it, vi } from "vitest"; + +import { + createTimeout, + type QueryHolder, +} from "../../../../src/stages/3-execution/timeout-strategy.js"; + +describe("createTimeout", () => { + afterEach(() => { + vi.useRealTimers(); + }); + + describe("abort_only strategy", () => { + it("aborts controller after timeout_ms", () => { + vi.useFakeTimers(); + const controller = new AbortController(); + const queryHolder: QueryHolder = { query: undefined }; + + const timeout = createTimeout(controller, queryHolder, { + timeout_ms: 5000, + timeout_strategy: "abort_only", + interrupt_grace_ms: 10000, + }); + + expect(controller.signal.aborted).toBe(false); + + vi.advanceTimersByTime(5000); + expect(controller.signal.aborted).toBe(true); + expect(timeout.interrupted.value).toBe(false); + + timeout.cleanup(); + }); + + it("does not abort if cleaned up before timeout", () => { + vi.useFakeTimers(); + const controller = new AbortController(); + const queryHolder: QueryHolder = { query: undefined }; + + const timeout = createTimeout(controller, queryHolder, { + timeout_ms: 5000, + timeout_strategy: "abort_only", + interrupt_grace_ms: 10000, + }); + + timeout.cleanup(); + vi.advanceTimersByTime(10000); + expect(controller.signal.aborted).toBe(false); + }); + }); + + describe("interrupt_first strategy", () => { + it("calls interrupt on query at soft timeout", async () => { + vi.useFakeTimers(); + const controller = new AbortController(); + const mockQuery = { + interrupt: vi.fn().mockResolvedValue(undefined), + }; + const queryHolder: QueryHolder = { + query: mockQuery as unknown as QueryHolder["query"], + }; + + const timeout = createTimeout(controller, queryHolder, { + timeout_ms: 5000, + timeout_strategy: "interrupt_first", + interrupt_grace_ms: 3000, + }); + + vi.advanceTimersByTime(5000); + // Let the microtask queue flush for the async interrupt call + await vi.advanceTimersByTimeAsync(0); + + expect(mockQuery.interrupt).toHaveBeenCalledOnce(); + expect(timeout.interrupted.value).toBe(true); + expect(controller.signal.aborted).toBe(false); + + timeout.cleanup(); + }); + + it("hard aborts after grace period if query still running", async () => { + vi.useFakeTimers(); + const controller = new AbortController(); + const mockQuery = { + interrupt: vi.fn().mockResolvedValue(undefined), + }; + const queryHolder: QueryHolder = { + query: mockQuery as unknown as QueryHolder["query"], + }; + + const timeout = createTimeout(controller, queryHolder, { + timeout_ms: 5000, + timeout_strategy: "interrupt_first", + interrupt_grace_ms: 3000, + }); + + // Soft timeout fires + await vi.advanceTimersByTimeAsync(5000); + expect(controller.signal.aborted).toBe(false); + + // Grace period expires → hard abort + vi.advanceTimersByTime(3000); + expect(controller.signal.aborted).toBe(true); + + timeout.cleanup(); + }); + + it("falls through to hard abort if query not yet created", () => { + vi.useFakeTimers(); + const controller = new AbortController(); + const queryHolder: QueryHolder = { query: undefined }; + + const timeout = createTimeout(controller, queryHolder, { + timeout_ms: 5000, + timeout_strategy: "interrupt_first", + interrupt_grace_ms: 3000, + }); + + vi.advanceTimersByTime(5000); + // No query → immediate hard abort + expect(controller.signal.aborted).toBe(true); + expect(timeout.interrupted.value).toBe(false); + + timeout.cleanup(); + }); + + it("swallows interrupt() errors and still schedules hard abort", async () => { + vi.useFakeTimers(); + const controller = new AbortController(); + const mockQuery = { + interrupt: vi.fn().mockRejectedValue(new Error("interrupt failed")), + }; + const queryHolder: QueryHolder = { + query: mockQuery as unknown as QueryHolder["query"], + }; + + const timeout = createTimeout(controller, queryHolder, { + timeout_ms: 5000, + timeout_strategy: "interrupt_first", + interrupt_grace_ms: 3000, + }); + + // Soft timeout fires, interrupt rejects + await vi.advanceTimersByTimeAsync(5000); + expect(mockQuery.interrupt).toHaveBeenCalledOnce(); + + // Hard abort fires after grace period + vi.advanceTimersByTime(3000); + expect(controller.signal.aborted).toBe(true); + + timeout.cleanup(); + }); + + it("cleanup cancels both soft and hard timers", async () => { + vi.useFakeTimers(); + const controller = new AbortController(); + const mockQuery = { + interrupt: vi.fn().mockResolvedValue(undefined), + }; + const queryHolder: QueryHolder = { + query: mockQuery as unknown as QueryHolder["query"], + }; + + const timeout = createTimeout(controller, queryHolder, { + timeout_ms: 5000, + timeout_strategy: "interrupt_first", + interrupt_grace_ms: 3000, + }); + + // Trigger soft timeout (schedules hard timer) + await vi.advanceTimersByTimeAsync(5000); + expect(mockQuery.interrupt).toHaveBeenCalledOnce(); + + // Cleanup before hard timer fires + timeout.cleanup(); + vi.advanceTimersByTime(5000); + expect(controller.signal.aborted).toBe(false); + }); + }); +}); From 37ef25f29b1726d25e932cd0f7de177d4c16ca63 Mon Sep 17 00:00:00 2001 From: Steve Nims Date: Fri, 30 Jan 2026 22:10:26 -0500 Subject: [PATCH 2/2] refactor: address PR review feedback - Update JSDoc on deriveTerminationType to include "interrupted" priority - Update JSDoc on TerminationType to document "interrupted" variant - Extract addInterruptErrorIfNeeded helper to reduce duplication - Remove unused queryHolder field from TwoTierTimeout return interface Co-Authored-By: Claude Opus 4.5 --- src/stages/3-execution/agent-executor.ts | 27 +++++----------------- src/stages/3-execution/session-batching.ts | 20 ++++++---------- src/stages/3-execution/timeout-strategy.ts | 26 +++++++++++++++++---- src/types/transcript.ts | 1 + 4 files changed, 36 insertions(+), 38 deletions(-) diff --git a/src/stages/3-execution/agent-executor.ts b/src/stages/3-execution/agent-executor.ts index b68af70..216c074 100644 --- a/src/stages/3-execution/agent-executor.ts +++ b/src/stages/3-execution/agent-executor.ts @@ -32,6 +32,7 @@ import { type ModelUsage, } from "./sdk-client.js"; import { + addInterruptErrorIfNeeded, createTimeout, type QueryHolder, type TimeoutConfig, @@ -39,7 +40,6 @@ import { import { buildTranscript, createErrorEvent, - createInterruptedError, type TranscriptBuilderContext, } from "./transcript-builder.js"; @@ -354,8 +354,9 @@ interface BuildExecutionResultOptions { * Priority: * 1. If Stop hook fired → "clean" (agent completed normally) * 2. If errors contain an AbortError → "timeout" (forced termination) - * 3. If any errors exist → "error" (unexpected failure) - * 4. Otherwise → "clean" (no errors and no Stop hook, assume clean) + * 3. If errors contain an interrupted error → "interrupted" (graceful interrupt) + * 4. If any errors exist → "error" (unexpected failure) + * 5. Otherwise → "clean" (no errors and no Stop hook, assume clean) */ export function deriveTerminationType( errors: TranscriptErrorEvent[], @@ -595,15 +596,7 @@ export async function executeScenario( ctx.cleanup(); } - // If interrupted but no AbortError was recorded, add an interrupted error - if ( - ctx.interrupted.value && - !ctx.errors.some((e) => e.error_type === "timeout") - ) { - ctx.errors.push( - createInterruptedError("Execution interrupted by soft timeout"), - ); - } + addInterruptErrorIfNeeded(ctx.interrupted, ctx.errors); return finalizeExecution(ctx, scenario, pluginName, config.model); } @@ -703,15 +696,7 @@ export async function executeScenarioWithCheckpoint( ctx.cleanup(); } - // If interrupted but no AbortError was recorded, add an interrupted error - if ( - ctx.interrupted.value && - !ctx.errors.some((e) => e.error_type === "timeout") - ) { - ctx.errors.push( - createInterruptedError("Execution interrupted by soft timeout"), - ); - } + addInterruptErrorIfNeeded(ctx.interrupted, ctx.errors); return finalizeExecution(ctx, scenario, pluginName, config.model); } diff --git a/src/stages/3-execution/session-batching.ts b/src/stages/3-execution/session-batching.ts index bd5f8a7..883bbe5 100644 --- a/src/stages/3-execution/session-batching.ts +++ b/src/stages/3-execution/session-batching.ts @@ -28,10 +28,13 @@ import { type SDKMessage, type SettingSource, } from "./sdk-client.js"; -import { createTimeout, type QueryHolder } from "./timeout-strategy.js"; +import { + addInterruptErrorIfNeeded, + createTimeout, + type QueryHolder, +} from "./timeout-strategy.js"; import { buildTranscript, - createInterruptedError, type TranscriptBuilderContext, } from "./transcript-builder.js"; @@ -771,23 +774,14 @@ export async function executeBatch( queryHolder, }); - // If interrupted but no AbortError was recorded, add an interrupted error - const errors = executionResult.errors; - if ( - timeout.interrupted.value && - !errors.some((e) => e.error_type === "timeout") - ) { - errors.push( - createInterruptedError("Execution interrupted by soft timeout"), - ); - } + addInterruptErrorIfNeeded(timeout.interrupted, executionResult.errors); const result = buildScenarioResult({ scenario, messages: executionResult.messages, detectedTools: executionResult.detectedTools, hookResponses: executionResult.hookResponses, - errors, + errors: executionResult.errors, subagentCaptures: executionResult.subagentCaptures, pluginName, model: config.model, diff --git a/src/stages/3-execution/timeout-strategy.ts b/src/stages/3-execution/timeout-strategy.ts index 0181f70..1098fc7 100644 --- a/src/stages/3-execution/timeout-strategy.ts +++ b/src/stages/3-execution/timeout-strategy.ts @@ -6,8 +6,13 @@ * - "interrupt_first": Soft interrupt at timeout_ms, hard abort after grace period */ +import { createInterruptedError } from "./transcript-builder.js"; + import type { Query } from "./sdk-client.js"; -import type { TimeoutStrategy } from "../../types/index.js"; +import type { + TimeoutStrategy, + TranscriptErrorEvent, +} from "../../types/index.js"; /** Mutable holder for a Query reference, allowing timeout callbacks to access the query lazily. */ export interface QueryHolder { @@ -20,8 +25,6 @@ export interface TwoTierTimeout { cleanup: () => void; /** Whether an interrupt was fired (for error classification) */ interrupted: { value: boolean }; - /** Mutable reference to the query — set after query creation */ - queryHolder: QueryHolder; } export interface TimeoutConfig { @@ -82,5 +85,20 @@ export function createTimeout( } }; - return { cleanup, interrupted, queryHolder }; + return { cleanup, interrupted }; +} + +/** + * If an interrupt was fired but no hard abort (timeout) error was recorded, + * push an "interrupted" error event onto the errors array. + */ +export function addInterruptErrorIfNeeded( + interrupted: { value: boolean }, + errors: TranscriptErrorEvent[], +): void { + if (interrupted.value && !errors.some((e) => e.error_type === "timeout")) { + errors.push( + createInterruptedError("Execution interrupted by soft timeout"), + ); + } } diff --git a/src/types/transcript.ts b/src/types/transcript.ts index 9ea2921..e94c361 100644 --- a/src/types/transcript.ts +++ b/src/types/transcript.ts @@ -188,6 +188,7 @@ export interface Transcript { * Type of scenario termination. * - "clean": Agent finished normally (Stop hook received) * - "timeout": Forced termination via AbortController + * - "interrupted": Graceful interrupt via query.interrupt() before hard abort * - "error": Unexpected failure during execution */ export type TerminationType = "clean" | "timeout" | "interrupted" | "error";