Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@

| Tool | Use When |
| --------------------------------- | -------------------------------------- |
| Serena `find_symbol` | Know the symbol name - TRY FIRST |
| Serena `find_symbol` | Know the symbol name - TRY FIRST |
| Serena `find_referencing_symbols` | Find all usages of a symbol |
| Serena `get_symbols_overview` | Understand file structure |
| `rg "pattern"` | Regex/text patterns (not symbol-based) |
| Built-in `Grep` / `Glob` | Fallback when above tools insufficient |
| Built-in `Grep` / `Glob` | Fallback when above tools insufficient |

### Edit (prefer in order)

Expand Down
2 changes: 2 additions & 0 deletions src/config/defaults.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ export const DEFAULT_EXECUTION = {
disallowed_tools: ["Write", "Edit", "Bash"] as string[],
num_reps: 1,
additional_plugins: [] as string[],
timeout_strategy: "interrupt_first" as const,
interrupt_grace_ms: 10000,
};

/**
Expand Down
6 changes: 6 additions & 0 deletions src/config/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,8 @@ export const SessionStrategySchema = z.enum([
"batched_by_component",
]);

const TimeoutStrategySchema = z.enum(["interrupt_first", "abort_only"]);

/**
* Generation configuration schema.
*/
Expand Down Expand Up @@ -160,6 +162,10 @@ export const ExecutionConfigSchema = z.object({
* - 10000-30000: Complex reasoning or multi-step tasks
*/
max_thinking_tokens: z.number().int().min(100).max(100000).optional(),
/** Timeout strategy: "interrupt_first" tries graceful interrupt before hard abort */
timeout_strategy: TimeoutStrategySchema.default("interrupt_first"),
/** Grace period (ms) after interrupt before hard abort. Only for "interrupt_first". */
interrupt_grace_ms: z.number().int().min(1000).max(60000).default(10000),
});

/**
Expand Down
61 changes: 50 additions & 11 deletions src/stages/3-execution/agent-executor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,12 @@ import {
type SettingSource,
type ModelUsage,
} from "./sdk-client.js";
import {
addInterruptErrorIfNeeded,
createTimeout,
type QueryHolder,
type TimeoutConfig,
} from "./timeout-strategy.js";
import {
buildTranscript,
createErrorEvent,
Expand Down Expand Up @@ -213,12 +219,16 @@ interface ExecutionContext {
hookCollector: ReturnType<typeof createHookResponseCollector>;
/** Abort controller for timeout */
controller: AbortController;
/** Timeout ID (for cleanup) */
timeout: ReturnType<typeof setTimeout>;
/** Cleanup function for timeout timers */
cleanup: () => void;
/** Execution start timestamp */
startTime: number;
/** Whether the SDK Stop hook fired (clean completion) */
stopReceived: boolean;
/** Whether an interrupt was fired (for error classification) */
interrupted: { value: boolean };
/** Mutable query reference for timeout callbacks */
queryHolder: QueryHolder;
}

/**
Expand All @@ -227,14 +237,21 @@ interface ExecutionContext {
* @param timeoutMs - Timeout in milliseconds
* @returns Execution context for scenario execution
*/
function prepareExecutionContext(timeoutMs: number): ExecutionContext {
function prepareExecutionContext(
timeoutConfig: TimeoutConfig,
): ExecutionContext {
const messages: SDKMessage[] = [];
const detectedTools: ToolCapture[] = [];
const subagentCaptures: SubagentCapture[] = [];
const errors: TranscriptErrorEvent[] = [];
const hookCollector = createHookResponseCollector();
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), timeoutMs);
const queryHolder: QueryHolder = { query: undefined };
const { cleanup, interrupted } = createTimeout(
controller,
queryHolder,
timeoutConfig,
);
const startTime = Date.now();

return {
Expand All @@ -244,9 +261,11 @@ function prepareExecutionContext(timeoutMs: number): ExecutionContext {
errors,
hookCollector,
controller,
timeout,
cleanup,
startTime,
stopReceived: false,
interrupted,
queryHolder,
};
}

Expand Down Expand Up @@ -335,8 +354,9 @@ interface BuildExecutionResultOptions {
* Priority:
* 1. If Stop hook fired → "clean" (agent completed normally)
* 2. If errors contain an AbortError → "timeout" (forced termination)
* 3. If any errors exist → "error" (unexpected failure)
* 4. Otherwise → "clean" (no errors and no Stop hook, assume clean)
* 3. If errors contain an interrupted error → "interrupted" (graceful interrupt)
* 4. If any errors exist → "error" (unexpected failure)
* 5. Otherwise → "clean" (no errors and no Stop hook, assume clean)
*/
export function deriveTerminationType(
errors: TranscriptErrorEvent[],
Expand All @@ -353,6 +373,11 @@ export function deriveTerminationType(
return "timeout";
}

const hasInterrupt = errors.some((e) => e.error_type === "interrupted");
if (hasInterrupt) {
return "interrupted";
}

if (errors.length > 0) {
return "error";
}
Expand Down Expand Up @@ -504,7 +529,11 @@ export async function executeScenario(
} = options;

// Prepare execution context
const ctx = prepareExecutionContext(config.timeout_ms);
const ctx = prepareExecutionContext({
timeout_ms: config.timeout_ms,
timeout_strategy: config.timeout_strategy ?? "interrupt_first",
interrupt_grace_ms: config.interrupt_grace_ms ?? 10000,
});

// Set up capture infrastructure
const { plugins, hooksConfig } = setupCaptureInfrastructure(
Expand Down Expand Up @@ -537,6 +566,7 @@ export async function executeScenario(
// Use provided query function or real SDK
const q = queryFn ? queryFn(queryInput) : executeQuery(queryInput);
queryObject = q;
ctx.queryHolder.query = q;

try {
for await (const message of q) {
Expand All @@ -563,9 +593,11 @@ export async function executeScenario(
// Ensure cleanup if error occurred outside the retry wrapper
queryObject?.close();
} finally {
clearTimeout(ctx.timeout);
ctx.cleanup();
}

addInterruptErrorIfNeeded(ctx.interrupted, ctx.errors);

return finalizeExecution(ctx, scenario, pluginName, config.model);
}

Expand All @@ -591,7 +623,11 @@ export async function executeScenarioWithCheckpoint(
} = options;

// Prepare execution context
const ctx = prepareExecutionContext(config.timeout_ms);
const ctx = prepareExecutionContext({
timeout_ms: config.timeout_ms,
timeout_strategy: config.timeout_strategy ?? "interrupt_first",
interrupt_grace_ms: config.interrupt_grace_ms ?? 10000,
});
let userMessageId: string | undefined;

// Set up capture infrastructure
Expand Down Expand Up @@ -625,6 +661,7 @@ export async function executeScenarioWithCheckpoint(
await withRetry(async () => {
const q = queryFn ? queryFn(queryInput) : executeQuery(queryInput);
queryObject = q;
ctx.queryHolder.query = q;

try {
for await (const message of q) {
Expand Down Expand Up @@ -656,9 +693,11 @@ export async function executeScenarioWithCheckpoint(
// Ensure cleanup if error occurred outside the retry wrapper
queryObject?.close();
} finally {
clearTimeout(ctx.timeout);
ctx.cleanup();
}

addInterruptErrorIfNeeded(ctx.interrupted, ctx.errors);

return finalizeExecution(ctx, scenario, pluginName, config.model);
}

Expand Down
25 changes: 23 additions & 2 deletions src/stages/3-execution/session-batching.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@ import {
type SDKMessage,
type SettingSource,
} from "./sdk-client.js";
import {
addInterruptErrorIfNeeded,
createTimeout,
type QueryHolder,
} from "./timeout-strategy.js";
import {
buildTranscript,
type TranscriptBuilderContext,
Expand Down Expand Up @@ -281,6 +286,8 @@ interface ExecuteScenarioWithRetryOptions {
sharedStatelessHooks?: StatelessHooks | undefined;
/** Optional rate limiter function */
rateLimiter?: (<T>(fn: () => Promise<T>) => Promise<T>) | undefined;
/** Mutable query holder for timeout interrupt access */
queryHolder?: QueryHolder | undefined;
}

/**
Expand Down Expand Up @@ -331,6 +338,7 @@ async function executeScenarioWithRetry(
sharedCaptureMaps,
sharedStatelessHooks,
rateLimiter,
queryHolder,
} = options;

const scenarioMessages: SDKMessage[] = [];
Expand Down Expand Up @@ -397,6 +405,11 @@ async function executeScenarioWithRetry(
await withRetry(async () => {
const q = queryFn ? queryFn(queryInput) : executeQuery(queryInput);

// Assign query to holder so timeout callback can access it
if (queryHolder) {
queryHolder.query = q;
}

try {
for await (const message of q) {
scenarioMessages.push(message);
Expand Down Expand Up @@ -734,7 +747,12 @@ export async function executeBatch(
sharedCaptureMaps.subagentCaptureMap.clear();

const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), config.timeout_ms);
const queryHolder: QueryHolder = { query: undefined };
const timeout = createTimeout(controller, queryHolder, {
timeout_ms: config.timeout_ms,
timeout_strategy: config.timeout_strategy ?? "interrupt_first",
interrupt_grace_ms: config.interrupt_grace_ms ?? 10000,
});

try {
const executionResult = await executeScenarioWithRetry({
Expand All @@ -753,8 +771,11 @@ export async function executeBatch(
sharedCaptureMaps,
sharedStatelessHooks,
rateLimiter: options.rateLimiter,
queryHolder,
});

addInterruptErrorIfNeeded(timeout.interrupted, executionResult.errors);

const result = buildScenarioResult({
scenario,
messages: executionResult.messages,
Expand Down Expand Up @@ -803,7 +824,7 @@ export async function executeBatch(
`Batch: scenario ${String(scenarioIndex + 1)} failed, continuing with batch: ${err instanceof Error ? err.message : String(err)}`,
);
} finally {
clearTimeout(timeout);
timeout.cleanup();
}
}

Expand Down
104 changes: 104 additions & 0 deletions src/stages/3-execution/timeout-strategy.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
/**
* Two-tier timeout strategy for graceful query interruption.
*
* Supports two strategies:
* - "abort_only": Single hard abort after timeout_ms (legacy behavior)
* - "interrupt_first": Soft interrupt at timeout_ms, hard abort after grace period
*/

import { createInterruptedError } from "./transcript-builder.js";

import type { Query } from "./sdk-client.js";
import type {
TimeoutStrategy,
TranscriptErrorEvent,
} from "../../types/index.js";

/** Mutable holder for a Query reference, allowing timeout callbacks to access the query lazily. */
export interface QueryHolder {
query: Query | undefined;
}

/** Result of createTimeout with cleanup and state tracking. */
export interface TwoTierTimeout {
/** Call to clear all pending timers */
cleanup: () => void;
/** Whether an interrupt was fired (for error classification) */
interrupted: { value: boolean };
}

export interface TimeoutConfig {
timeout_ms: number;
timeout_strategy: TimeoutStrategy;
interrupt_grace_ms: number;
}

/**
* Create a timeout mechanism for scenario execution.
*
* - `abort_only`: Single `setTimeout(() => abort(), timeout_ms)`
* - `interrupt_first`: Soft timeout calls `query.interrupt()`, then schedules
* hard abort after `interrupt_grace_ms`. If query isn't created yet, falls
* through to hard abort immediately.
*/
export function createTimeout(
controller: AbortController,
queryHolder: QueryHolder,
config: TimeoutConfig,
): TwoTierTimeout {
const interrupted = { value: false };
let softTimer: ReturnType<typeof setTimeout> | undefined;
let hardTimer: ReturnType<typeof setTimeout> | undefined;

const hardAbort = (): void => {
controller.abort();
};

if (config.timeout_strategy === "abort_only") {
softTimer = setTimeout(hardAbort, config.timeout_ms);
} else {
// interrupt_first strategy
softTimer = setTimeout(() => {
const query = queryHolder.query;
if (query) {
interrupted.value = true;
query.interrupt().catch(() => {
// Swallow errors — hard abort is the fallback
});
// Schedule hard abort after grace period
hardTimer = setTimeout(hardAbort, config.interrupt_grace_ms);
} else {
// Query not yet created — fall through to hard abort
hardAbort();
}
}, config.timeout_ms);
}

const cleanup = (): void => {
if (softTimer !== undefined) {
clearTimeout(softTimer);
softTimer = undefined;
}
if (hardTimer !== undefined) {
clearTimeout(hardTimer);
hardTimer = undefined;
}
};

return { cleanup, interrupted };
}

/**
* If an interrupt was fired but no hard abort (timeout) error was recorded,
* push an "interrupted" error event onto the errors array.
*/
export function addInterruptErrorIfNeeded(
interrupted: { value: boolean },
errors: TranscriptErrorEvent[],
): void {
if (interrupted.value && !errors.some((e) => e.error_type === "timeout")) {
errors.push(
createInterruptedError("Execution interrupted by soft timeout"),
);
}
}
Loading
Loading