Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@

| Tool | Use When |
| --------------------------------- | -------------------------------------- |
| Serena `find_symbol` | Know the symbol name - TRY FIRST |
| Serena `find_symbol` | Know the symbol name - TRY FIRST |
| Serena `find_referencing_symbols` | Find all usages of a symbol |
| Serena `get_symbols_overview` | Understand file structure |
| `rg "pattern"` | Regex/text patterns (not symbol-based) |
| Built-in `Grep` / `Glob` | Fallback when above tools insufficient |
| Built-in `Grep` / `Glob` | Fallback when above tools insufficient |

### Edit (prefer in order)

Expand Down
2 changes: 2 additions & 0 deletions src/config/defaults.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ export const DEFAULT_EXECUTION = {
disallowed_tools: ["Write", "Edit", "Bash"] as string[],
num_reps: 1,
additional_plugins: [] as string[],
timeout_strategy: "interrupt_first" as const,
interrupt_grace_ms: 10000,
};

/**
Expand Down
6 changes: 6 additions & 0 deletions src/config/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,8 @@ export const SessionStrategySchema = z.enum([
"batched_by_component",
]);

const TimeoutStrategySchema = z.enum(["interrupt_first", "abort_only"]);

/**
* Generation configuration schema.
*/
Expand Down Expand Up @@ -160,6 +162,10 @@ export const ExecutionConfigSchema = z.object({
* - 10000-30000: Complex reasoning or multi-step tasks
*/
max_thinking_tokens: z.number().int().min(100).max(100000).optional(),
/** Timeout strategy: "interrupt_first" tries graceful interrupt before hard abort */
timeout_strategy: TimeoutStrategySchema.default("interrupt_first"),
/** Grace period (ms) after interrupt before hard abort. Only for "interrupt_first". */
interrupt_grace_ms: z.number().int().min(1000).max(60000).default(10000),
});

/**
Expand Down
72 changes: 63 additions & 9 deletions src/stages/3-execution/agent-executor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,15 @@ import {
type SettingSource,
type ModelUsage,
} from "./sdk-client.js";
import {
createTimeout,
type QueryHolder,
type TimeoutConfig,
} from "./timeout-strategy.js";
import {
buildTranscript,
createErrorEvent,
createInterruptedError,
type TranscriptBuilderContext,
} from "./transcript-builder.js";

Expand Down Expand Up @@ -213,12 +219,16 @@ interface ExecutionContext {
hookCollector: ReturnType<typeof createHookResponseCollector>;
/** Abort controller for timeout */
controller: AbortController;
/** Timeout ID (for cleanup) */
timeout: ReturnType<typeof setTimeout>;
/** Cleanup function for timeout timers */
cleanup: () => void;
/** Execution start timestamp */
startTime: number;
/** Whether the SDK Stop hook fired (clean completion) */
stopReceived: boolean;
/** Whether an interrupt was fired (for error classification) */
interrupted: { value: boolean };
/** Mutable query reference for timeout callbacks */
queryHolder: QueryHolder;
}

/**
Expand All @@ -227,14 +237,21 @@ interface ExecutionContext {
* @param timeoutMs - Timeout in milliseconds
* @returns Execution context for scenario execution
*/
function prepareExecutionContext(timeoutMs: number): ExecutionContext {
function prepareExecutionContext(
timeoutConfig: TimeoutConfig,
): ExecutionContext {
const messages: SDKMessage[] = [];
const detectedTools: ToolCapture[] = [];
const subagentCaptures: SubagentCapture[] = [];
const errors: TranscriptErrorEvent[] = [];
const hookCollector = createHookResponseCollector();
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), timeoutMs);
const queryHolder: QueryHolder = { query: undefined };
const { cleanup, interrupted } = createTimeout(
controller,
queryHolder,
timeoutConfig,
);
const startTime = Date.now();

return {
Expand All @@ -244,9 +261,11 @@ function prepareExecutionContext(timeoutMs: number): ExecutionContext {
errors,
hookCollector,
controller,
timeout,
cleanup,
startTime,
stopReceived: false,
interrupted,
queryHolder,
};
}

Expand Down Expand Up @@ -353,6 +372,11 @@ export function deriveTerminationType(
return "timeout";
}

const hasInterrupt = errors.some((e) => e.error_type === "interrupted");
if (hasInterrupt) {
return "interrupted";
}

if (errors.length > 0) {
return "error";
}
Expand Down Expand Up @@ -504,7 +528,11 @@ export async function executeScenario(
} = options;

// Prepare execution context
const ctx = prepareExecutionContext(config.timeout_ms);
const ctx = prepareExecutionContext({
timeout_ms: config.timeout_ms,
timeout_strategy: config.timeout_strategy ?? "interrupt_first",
interrupt_grace_ms: config.interrupt_grace_ms ?? 10000,
});

// Set up capture infrastructure
const { plugins, hooksConfig } = setupCaptureInfrastructure(
Expand Down Expand Up @@ -537,6 +565,7 @@ export async function executeScenario(
// Use provided query function or real SDK
const q = queryFn ? queryFn(queryInput) : executeQuery(queryInput);
queryObject = q;
ctx.queryHolder.query = q;

try {
for await (const message of q) {
Expand All @@ -563,7 +592,17 @@ export async function executeScenario(
// Ensure cleanup if error occurred outside the retry wrapper
queryObject?.close();
} finally {
clearTimeout(ctx.timeout);
ctx.cleanup();
}

// If interrupted but no AbortError was recorded, add an interrupted error
if (
ctx.interrupted.value &&
!ctx.errors.some((e) => e.error_type === "timeout")
) {
ctx.errors.push(
createInterruptedError("Execution interrupted by soft timeout"),
);
}

return finalizeExecution(ctx, scenario, pluginName, config.model);
Expand Down Expand Up @@ -591,7 +630,11 @@ export async function executeScenarioWithCheckpoint(
} = options;

// Prepare execution context
const ctx = prepareExecutionContext(config.timeout_ms);
const ctx = prepareExecutionContext({
timeout_ms: config.timeout_ms,
timeout_strategy: config.timeout_strategy ?? "interrupt_first",
interrupt_grace_ms: config.interrupt_grace_ms ?? 10000,
});
let userMessageId: string | undefined;

// Set up capture infrastructure
Expand Down Expand Up @@ -625,6 +668,7 @@ export async function executeScenarioWithCheckpoint(
await withRetry(async () => {
const q = queryFn ? queryFn(queryInput) : executeQuery(queryInput);
queryObject = q;
ctx.queryHolder.query = q;

try {
for await (const message of q) {
Expand Down Expand Up @@ -656,7 +700,17 @@ export async function executeScenarioWithCheckpoint(
// Ensure cleanup if error occurred outside the retry wrapper
queryObject?.close();
} finally {
clearTimeout(ctx.timeout);
ctx.cleanup();
}

// If interrupted but no AbortError was recorded, add an interrupted error
if (
ctx.interrupted.value &&
!ctx.errors.some((e) => e.error_type === "timeout")
) {
ctx.errors.push(
createInterruptedError("Execution interrupted by soft timeout"),
);
}

return finalizeExecution(ctx, scenario, pluginName, config.model);
Expand Down
33 changes: 30 additions & 3 deletions src/stages/3-execution/session-batching.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,10 @@ import {
type SDKMessage,
type SettingSource,
} from "./sdk-client.js";
import { createTimeout, type QueryHolder } from "./timeout-strategy.js";
import {
buildTranscript,
createInterruptedError,
type TranscriptBuilderContext,
} from "./transcript-builder.js";

Expand Down Expand Up @@ -281,6 +283,8 @@ interface ExecuteScenarioWithRetryOptions {
sharedStatelessHooks?: StatelessHooks | undefined;
/** Optional rate limiter function */
rateLimiter?: (<T>(fn: () => Promise<T>) => Promise<T>) | undefined;
/** Mutable query holder for timeout interrupt access */
queryHolder?: QueryHolder | undefined;
}

/**
Expand Down Expand Up @@ -331,6 +335,7 @@ async function executeScenarioWithRetry(
sharedCaptureMaps,
sharedStatelessHooks,
rateLimiter,
queryHolder,
} = options;

const scenarioMessages: SDKMessage[] = [];
Expand Down Expand Up @@ -397,6 +402,11 @@ async function executeScenarioWithRetry(
await withRetry(async () => {
const q = queryFn ? queryFn(queryInput) : executeQuery(queryInput);

// Assign query to holder so timeout callback can access it
if (queryHolder) {
queryHolder.query = q;
}

try {
for await (const message of q) {
scenarioMessages.push(message);
Expand Down Expand Up @@ -734,7 +744,12 @@ export async function executeBatch(
sharedCaptureMaps.subagentCaptureMap.clear();

const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), config.timeout_ms);
const queryHolder: QueryHolder = { query: undefined };
const timeout = createTimeout(controller, queryHolder, {
timeout_ms: config.timeout_ms,
timeout_strategy: config.timeout_strategy ?? "interrupt_first",
interrupt_grace_ms: config.interrupt_grace_ms ?? 10000,
});

try {
const executionResult = await executeScenarioWithRetry({
Expand All @@ -753,14 +768,26 @@ export async function executeBatch(
sharedCaptureMaps,
sharedStatelessHooks,
rateLimiter: options.rateLimiter,
queryHolder,
});

// If interrupted but no AbortError was recorded, add an interrupted error
const errors = executionResult.errors;
if (
timeout.interrupted.value &&
!errors.some((e) => e.error_type === "timeout")
) {
errors.push(
createInterruptedError("Execution interrupted by soft timeout"),
);
}

const result = buildScenarioResult({
scenario,
messages: executionResult.messages,
detectedTools: executionResult.detectedTools,
hookResponses: executionResult.hookResponses,
errors: executionResult.errors,
errors,
subagentCaptures: executionResult.subagentCaptures,
pluginName,
model: config.model,
Expand Down Expand Up @@ -803,7 +830,7 @@ export async function executeBatch(
`Batch: scenario ${String(scenarioIndex + 1)} failed, continuing with batch: ${err instanceof Error ? err.message : String(err)}`,
);
} finally {
clearTimeout(timeout);
timeout.cleanup();
}
}

Expand Down
86 changes: 86 additions & 0 deletions src/stages/3-execution/timeout-strategy.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
/**
* Two-tier timeout strategy for graceful query interruption.
*
* Supports two strategies:
* - "abort_only": Single hard abort after timeout_ms (legacy behavior)
* - "interrupt_first": Soft interrupt at timeout_ms, hard abort after grace period
*/

import type { Query } from "./sdk-client.js";
import type { TimeoutStrategy } from "../../types/index.js";

/** Mutable holder for a Query reference, allowing timeout callbacks to access the query lazily. */
export interface QueryHolder {
query: Query | undefined;
}

/** Result of createTimeout with cleanup and state tracking. */
export interface TwoTierTimeout {
/** Call to clear all pending timers */
cleanup: () => void;
/** Whether an interrupt was fired (for error classification) */
interrupted: { value: boolean };
/** Mutable reference to the query — set after query creation */
queryHolder: QueryHolder;
}

export interface TimeoutConfig {
timeout_ms: number;
timeout_strategy: TimeoutStrategy;
interrupt_grace_ms: number;
}

/**
* Create a timeout mechanism for scenario execution.
*
* - `abort_only`: Single `setTimeout(() => abort(), timeout_ms)`
* - `interrupt_first`: Soft timeout calls `query.interrupt()`, then schedules
* hard abort after `interrupt_grace_ms`. If query isn't created yet, falls
* through to hard abort immediately.
*/
export function createTimeout(
controller: AbortController,
queryHolder: QueryHolder,
config: TimeoutConfig,
): TwoTierTimeout {
const interrupted = { value: false };
let softTimer: ReturnType<typeof setTimeout> | undefined;
let hardTimer: ReturnType<typeof setTimeout> | undefined;

const hardAbort = (): void => {
controller.abort();
};

if (config.timeout_strategy === "abort_only") {
softTimer = setTimeout(hardAbort, config.timeout_ms);
} else {
// interrupt_first strategy
softTimer = setTimeout(() => {
const query = queryHolder.query;
if (query) {
interrupted.value = true;
query.interrupt().catch(() => {
// Swallow errors — hard abort is the fallback
});
// Schedule hard abort after grace period
hardTimer = setTimeout(hardAbort, config.interrupt_grace_ms);
} else {
// Query not yet created — fall through to hard abort
hardAbort();
}
}, config.timeout_ms);
}

const cleanup = (): void => {
if (softTimer !== undefined) {
clearTimeout(softTimer);
softTimer = undefined;
}
if (hardTimer !== undefined) {
clearTimeout(hardTimer);
hardTimer = undefined;
}
};

return { cleanup, interrupted, queryHolder };
}
Loading