Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions .cursor/rules/file-structure.mdc
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,14 @@ alwaysApply: false
We want thin files that aim to have a single responsibility and a single export.
We should have a single export per file.

Specific directories:
- hooks: for hooks
- components: for components
- pages: for pages
Special files:

## Utils
File name convention: *.utils.ts
Single export: XXXUtils
example:

export const RunnerUtils = {
runnerFunction1,
runnerFunction2,
}
73 changes: 5 additions & 68 deletions javascript/src/domain/scenarios/index.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { CoreMessage } from "ai";
import { ScenarioExecution, ScenarioExecutionState } from "../..";
import { AgentAdapter } from "../agents/index";
import { ScenarioExecutionStateLike, ScenarioResult } from "../core/execution";
import { ScenarioExecutionStateLike } from "../core/execution";
export * from "./script-commands";

export const DEFAULT_MAX_TURNS = 10;
export const DEFAULT_VERBOSE = false;
Expand Down Expand Up @@ -86,75 +87,11 @@ export interface ScenarioConfigFinal
setId?: string;
}

/**
* The execution context for a scenario script.
* This provides the functions to control the flow of the scenario.
*/
export interface ScenarioExecutionLike {
/**
* The history of messages in the conversation.
*/
readonly messages: CoreMessage[];

/**
* The ID of the conversation thread.
*/
readonly threadId: string;

/**
* Adds a message to the conversation.
* @param message The message to add.
*/
message(message: CoreMessage): Promise<void>;
/**
* Adds a user message to the conversation.
* If no content is provided, the user simulator will generate a message.
* @param content The content of the user message.
*/
user(content?: string | CoreMessage): Promise<void>;
/**
* Adds an agent message to the conversation.
* If no content is provided, the agent under test will generate a message.
* @param content The content of the agent message.
*/
agent(content?: string | CoreMessage): Promise<void>;
/**
* Invokes the judge agent to evaluate the current state.
* @param content Optional message to the judge.
* @returns The result of the scenario if the judge makes a final decision.
*/
judge(content?: string | CoreMessage): Promise<ScenarioResult | null>;
/**
* Proceeds with the scenario automatically for a number of turns.
* @param turns The number of turns to proceed. Defaults to running until the scenario ends.
* @param onTurn Optional callback executed at the end of each turn.
* @param onStep Optional callback executed after each agent interaction.
* @returns The result of the scenario if it ends.
*/
proceed(
turns?: number,
onTurn?: (state: ScenarioExecutionStateLike) => void | Promise<void>,
onStep?: (state: ScenarioExecutionStateLike) => void | Promise<void>
): Promise<ScenarioResult | null>;
/**
* Ends the scenario with a success.
* @param reasoning Optional reasoning for the success.
* @returns The final result of the scenario.
*/
succeed(reasoning?: string): Promise<ScenarioResult>;
/**
* Ends the scenario with a failure.
* @param reasoning Optional reasoning for the failure.
* @returns The final result of the scenario.
*/
fail(reasoning?: string): Promise<ScenarioResult>;
}

/**
* A step in a scenario script.
* This is a function that takes the current state and an executor, and performs an action.
*/
export type ScriptStep = (
state: ScenarioExecutionStateLike,
executor: ScenarioExecutionLike
state: ScenarioExecutionState,
executor: ScenarioExecution
) => Promise<void> | void;
80 changes: 80 additions & 0 deletions javascript/src/domain/scenarios/script-commands.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import { CoreMessage } from "ai";
import { ScenarioExecutionStateLike, ScenarioExecutionLike } from "../index";

/**
* Represents a typed script command that describes a specific action to take
* during scenario execution. Commands are data structures that can be serialized
* and provide type safety for scenario scripts.
*/
export type ScriptCommand =
| MessageCommand
| UserCommand
| AgentCommand
| JudgeCommand
| ProceedCommand
| SucceedCommand
| FailCommand;

/**
* Command to add a specific message directly to the conversation.
* Useful for simulating tool responses, system messages, or specific conversational states.
*/
export interface MessageCommand {
readonly type: "message";
readonly message: CoreMessage;
}

/**
* Command to generate or specify a user message in the conversation.
* If content is not provided, the user simulator agent will generate content automatically.
*/
export interface UserCommand {
readonly type: "user";
readonly content?: string | CoreMessage;
}

/**
* Command to generate or specify an agent response in the conversation.
* If content is not provided, the agent under test will generate content automatically.
*/
export interface AgentCommand {
readonly type: "agent";
readonly content?: string | CoreMessage;
}

/**
* Command to invoke the judge agent to evaluate the current conversation state.
* The judge will evaluate based on its configured criteria and may end the scenario.
*/
export interface JudgeCommand {
readonly type: "judge";
readonly content?: string | CoreMessage;
}

/**
* Command to let the scenario proceed automatically for a specified number of turns.
* Agents will interact naturally according to their roles until the turn limit is reached
* or the judge decides to end the scenario.
*/
export interface ProceedCommand {
readonly type: "proceed";
readonly turns?: number;
readonly onTurn?: (state: ScenarioExecutionStateLike) => void | Promise<void>;
readonly onStep?: (state: ScenarioExecutionStateLike) => void | Promise<void>;
}

/**
* Command to immediately end the scenario with a success verdict.
*/
export interface SucceedCommand {
readonly type: "succeed";
readonly reasoning?: string;
}

/**
* Command to immediately end the scenario with a failure verdict.
*/
export interface FailCommand {
readonly type: "fail";
readonly reasoning?: string;
}
13 changes: 7 additions & 6 deletions javascript/src/execution/scenario-execution-state.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import {
CoreUserMessage,
} from "ai";
import { Observable, Subject } from "rxjs";
import { ScenarioExecutionStateLike, ScenarioConfig } from "../domain";
import { ScenarioConfig, ScenarioResult } from "../domain";
import { generateMessageId } from "../utils/ids";

// Generic enum - ready for extension
Expand All @@ -26,7 +26,7 @@ export type StateChangeEvent = {
* the internal logic for tracking conversation history, turns, results, and
* other related information.
*/
export class ScenarioExecutionState implements ScenarioExecutionStateLike {
export class ScenarioExecutionState {
private _messages: (CoreMessage & { id: string })[] = [];
private _currentTurn: number = 0;
private _threadId: string = "";
Expand All @@ -38,6 +38,7 @@ export class ScenarioExecutionState implements ScenarioExecutionStateLike {

description: string;
config: ScenarioConfig;
result?: ScenarioResult;

constructor(config: ScenarioConfig) {
this.config = config;
Expand All @@ -52,10 +53,6 @@ export class ScenarioExecutionState implements ScenarioExecutionStateLike {
return this._currentTurn;
}

set currentTurn(turn: number) {
this._currentTurn = turn;
}

get threadId(): string {
return this._threadId;
}
Expand All @@ -64,6 +61,10 @@ export class ScenarioExecutionState implements ScenarioExecutionStateLike {
this._threadId = value;
}

incrementTurn(): void {
this._currentTurn++;
}

/**
* Adds a message to the conversation history.
*
Expand Down
Loading