Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@ This file provides guidance to Claude Code when working with this repository.

This project has Serena configured. **You MUST follow these rules:**

| Instead of... | USE THIS | Cost |
| -------------------------- | ------------------------------------ | -------- |
| Built-in `Grep`, `grep` | `rg "pattern"` | **FREE** |
| Built-in `Edit` tool | Morph `edit_file` | **FREE** |
| Reading entire files | Serena `get_symbols_overview` | **FREE** |
| Searching for symbols | Serena `find_symbol` | **FREE** |
| Finding usages | Serena `find_referencing_symbols` | **FREE** |
| Semantic/fuzzy search | `warpgrep_codebase_search` | **$$$** |
| Instead of... | USE THIS | Cost |
| ----------------------- | --------------------------------- | -------- |
| Built-in `Grep`, `grep` | `rg "pattern"` | **FREE** |
| Built-in `Edit` tool | Morph `edit_file` | **FREE** |
| Reading entire files | Serena `get_symbols_overview` | **FREE** |
| Searching for symbols | Serena `find_symbol` | **FREE** |
| Finding usages | Serena `find_referencing_symbols` | **FREE** |
| Semantic/fuzzy search | `warpgrep_codebase_search` | **$$$** |

> **FREE tools first. `warpgrep` costs real money - only use when `rg` and Serena cannot answer the question.**

Expand Down
8 changes: 4 additions & 4 deletions src/cli/commands/execute.ts
Original file line number Diff line number Diff line change
Expand Up @@ -76,12 +76,12 @@ export function registerExecuteCommand(program: Command): void {
writeJson(`${resultsDir}/scenarios.json`, generation.scenarios);

// Stage 3: Execution
const execution = await runExecution(
const execution = await runExecution({
analysis,
generation.scenarios,
scenarios: generation.scenarios,
config,
consoleProgress,
);
progress: consoleProgress,
});
state = updateStateAfterExecution(state, execution.results);
await saveState(state);

Expand Down
76 changes: 38 additions & 38 deletions src/cli/commands/resume.ts
Original file line number Diff line number Diff line change
Expand Up @@ -58,20 +58,20 @@ async function resumeFromAnalysis(
const generation = await runGeneration(analysis, config);
writeJson(`${resultsDir}/scenarios.json`, generation.scenarios);

const execution = await runExecution(
const execution = await runExecution({
analysis,
generation.scenarios,
scenarios: generation.scenarios,
config,
consoleProgress,
);
progress: consoleProgress,
});

const evaluation = await runEvaluation(
analysis.plugin_name,
generation.scenarios,
execution.results,
const evaluation = await runEvaluation({
pluginName: analysis.plugin_name,
scenarios: generation.scenarios,
executions: execution.results,
config,
consoleProgress,
);
progress: consoleProgress,
});

// Chain state updates
let currentState = updateStateAfterAnalysis(initialState, analysis);
Expand Down Expand Up @@ -107,20 +107,20 @@ async function resumeFromGeneration(
const generation = await runGeneration(analysisData, config);
writeJson(`${resultsDir}/scenarios.json`, generation.scenarios);

const execution = await runExecution(
analysisData,
generation.scenarios,
const execution = await runExecution({
analysis: analysisData,
scenarios: generation.scenarios,
config,
consoleProgress,
);
progress: consoleProgress,
});

const evaluation = await runEvaluation(
analysisData.plugin_name,
generation.scenarios,
execution.results,
const evaluation = await runEvaluation({
pluginName: analysisData.plugin_name,
scenarios: generation.scenarios,
executions: execution.results,
config,
consoleProgress,
);
progress: consoleProgress,
});

// Chain state updates
let currentState = updateStateAfterGeneration(
Expand Down Expand Up @@ -158,20 +158,20 @@ async function resumeFromExecution(
);
}

const execution = await runExecution(
analysisData,
scenarioData,
const execution = await runExecution({
analysis: analysisData,
scenarios: scenarioData,
config,
consoleProgress,
);
progress: consoleProgress,
});

const evaluation = await runEvaluation(
analysisData.plugin_name,
scenarioData,
execution.results,
const evaluation = await runEvaluation({
pluginName: analysisData.plugin_name,
scenarios: scenarioData,
executions: execution.results,
config,
consoleProgress,
);
progress: consoleProgress,
});

// Chain state updates
let currentState = updateStateAfterExecution(initialState, execution.results);
Expand Down Expand Up @@ -206,13 +206,13 @@ async function resumeFromEvaluation(
);
}

const evaluation = await runEvaluation(
analysisData.plugin_name,
scenarioData,
executionData,
const evaluation = await runEvaluation({
pluginName: analysisData.plugin_name,
scenarios: scenarioData,
executions: executionData,
config,
consoleProgress,
);
progress: consoleProgress,
});

// Chain state updates
let currentState = updateStateAfterEvaluation(
Expand Down
20 changes: 10 additions & 10 deletions src/cli/commands/run.ts
Original file line number Diff line number Diff line change
Expand Up @@ -145,25 +145,25 @@ export function registerRunCommand(program: Command): void {
}

// Stage 3: Execution
const execution = await runExecution(
const execution = await runExecution({
analysis,
scenariosToRun,
scenarios: scenariosToRun,
config,
consoleProgress,
);
progress: consoleProgress,
});
state = updateStateAfterExecution(state, execution.results);
await saveState(state);

writeExecutionMetadata(resultsDir, execution);

// Stage 4: Evaluation
const evaluation = await runEvaluation(
analysis.plugin_name,
scenariosToRun,
execution.results,
const evaluation = await runEvaluation({
pluginName: analysis.plugin_name,
scenarios: scenariosToRun,
executions: execution.results,
config,
consoleProgress,
);
progress: consoleProgress,
});
state = updateStateAfterEvaluation(state, evaluation.results);
await saveState(state);

Expand Down
30 changes: 20 additions & 10 deletions src/stages/2-generation/agent-scenario-generator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -306,25 +306,35 @@ export async function generateAgentScenarios(
return parseAgentScenarioResponse(response, agent);
}

/**
* Options for generateAllAgentScenarios.
*/
export interface GenerateAllAgentScenariosOptions {
/** Anthropic client */
client: Anthropic;
/** Array of agent components */
agents: AgentComponent[];
/** Generation config */
config: GenerationConfig;
/** Optional progress callback */
onProgress?: (completed: number, total: number, agent: string) => void;
/** Maximum concurrent LLM calls (defaults to 10) */
maxConcurrent?: number;
}

/**
* Generate scenarios for all agents.
*
* Uses parallel execution with optional rate limiting.
*
* @param client - Anthropic client
* @param agents - Array of agent components
* @param config - Generation config
* @param onProgress - Optional progress callback
* @param maxConcurrent - Maximum concurrent LLM calls (defaults to 10)
* @param options - Generate all agent scenarios options
* @returns Array of all test scenarios
*/
export async function generateAllAgentScenarios(
client: Anthropic,
agents: AgentComponent[],
config: GenerationConfig,
onProgress?: (completed: number, total: number, agent: string) => void,
maxConcurrent = 10,
options: GenerateAllAgentScenariosOptions,
): Promise<TestScenario[]> {
const { client, agents, config, onProgress, maxConcurrent = 10 } = options;

// Create rate limiter if configured
const rateLimiter = setupRateLimiter(config);

Expand Down
67 changes: 55 additions & 12 deletions src/stages/2-generation/cost-estimator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,40 @@ import type { TextBlockParam } from "@anthropic-ai/sdk/resources/messages/messag
*/
export type SystemPrompt = string | TextBlockParam[];

/**
* Options for countPromptTokens.
*/
export interface CountPromptTokensOptions {
/** Anthropic client */
client: Anthropic;
/** Model to use */
model: string;
/** Prompt text */
prompt: string;
/** Optional per-request timeout in milliseconds (default: 30000 = 30s) */
timeout?: number;
/** Optional system prompt (string or array of text blocks for caching) */
system?: SystemPrompt;
}

/**
* Options for estimateGenerationCost.
*/
export interface EstimateGenerationCostOptions {
/** Anthropic client */
client: Anthropic;
/** Prompts to estimate */
prompts: string[];
/** Model to use */
model: string;
/** Maximum concurrent token counting operations (default: 10) */
concurrency?: number;
/** Timeout for each token counting request in ms (default: 30000) */
tokenCountingTimeout?: number;
/** Optional system prompt to include in token count */
system?: SystemPrompt;
}

/**
* Default SDK timeout in milliseconds (2 minutes).
* This is a conservative default for client-level timeout.
Expand Down Expand Up @@ -74,12 +108,9 @@ export function createAnthropicClient(timeout?: number): Anthropic {
* @returns Token count
*/
export async function countPromptTokens(
client: Anthropic,
model: string,
prompt: string,
timeout?: number,
system?: SystemPrompt,
options: CountPromptTokensOptions,
): Promise<number> {
const { client, model, prompt, timeout, system } = options;
const result = await client.messages.countTokens(
{
model: resolveModelId(model),
Expand Down Expand Up @@ -111,20 +142,32 @@ const DEFAULT_TOKEN_COUNTING_CONCURRENCY = 10;
* @returns Token estimate
*/
export async function estimateGenerationCost(
client: Anthropic,
prompts: string[],
model: string,
concurrency: number = DEFAULT_TOKEN_COUNTING_CONCURRENCY,
tokenCountingTimeout?: number,
system?: SystemPrompt,
options: EstimateGenerationCostOptions,
): Promise<TokenEstimate> {
const {
client,
prompts,
model,
concurrency = DEFAULT_TOKEN_COUNTING_CONCURRENCY,
tokenCountingTimeout,
system,
} = options;

// Count tokens in parallel for improved performance
// Use continueOnError: false to fail fast - partial results would underestimate costs
const result = await parallel({
items: prompts,
concurrency,
fn: async (prompt) =>
countPromptTokens(client, model, prompt, tokenCountingTimeout, system),
countPromptTokens({
client,
model,
prompt,
...(tokenCountingTimeout !== undefined && {
timeout: tokenCountingTimeout,
}),
...(system !== undefined && { system }),
}),
continueOnError: false,
});
const totalInputTokens = result.results.reduce((a, b) => a + b, 0);
Expand Down
30 changes: 20 additions & 10 deletions src/stages/2-generation/diversity-manager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -58,23 +58,33 @@ export function calculateScenarioDistribution(
};
}

/**
* Options for createBaseScenario.
*/
export interface CreateBaseScenarioOptions {
/** Component reference (name) */
componentRef: string;
/** Type of component */
componentType: ComponentType;
/** The triggering mechanism to preserve */
coreIntent: string;
/** Original prompt */
basePrompt: string;
/** Index for ID generation */
index: number;
}

/**
* Create a base scenario from a component.
*
* @param componentRef - Component reference (name)
* @param componentType - Type of component
* @param coreIntent - The triggering mechanism to preserve
* @param basePrompt - Original prompt
* @param index - Index for ID generation
* @param options - Create base scenario options
* @returns Base scenario
*/
export function createBaseScenario(
componentRef: string,
componentType: ComponentType,
coreIntent: string,
basePrompt: string,
index: number,
options: CreateBaseScenarioOptions,
): BaseScenario {
const { componentRef, componentType, coreIntent, basePrompt, index } =
options;
return {
id: `${componentRef}-base-${String(index)}`,
component_ref: componentRef,
Expand Down
Loading
Loading