langwatch · drewdrewthis · Nov 25, 2025 · Nov 25, 2025 · Nov 25, 2025 · Nov 25, 2025
diff --git a/.cursor/rules/file-structure.mdc b/.cursor/rules/file-structure.mdc
@@ -6,7 +6,14 @@ alwaysApply: false
 We want thin files that aim to have a single responsibility and a single export.
 We should have a single export per file.
 
-Specific directories:
-- hooks: for hooks
-- components: for components
-- pages: for pages
+Special files:
+
+## Utils
+File name convention: *.utils.ts
+Single export: XXXUtils
+example: 
+
+export const RunnerUtils = {
+    runnerFunction1,
+    runnerFunction2,
+}
diff --git a/javascript/src/domain/scenarios/index.ts b/javascript/src/domain/scenarios/index.ts
@@ -1,6 +1,7 @@
-import { CoreMessage } from "ai";
+import { ScenarioExecution, ScenarioExecutionState } from "../..";
 import { AgentAdapter } from "../agents/index";
-import { ScenarioExecutionStateLike, ScenarioResult } from "../core/execution";
+import { ScenarioExecutionStateLike } from "../core/execution";
+export * from "./script-commands";
 
 export const DEFAULT_MAX_TURNS = 10;
 export const DEFAULT_VERBOSE = false;
@@ -86,75 +87,11 @@ export interface ScenarioConfigFinal
   setId?: string;
 }
 
-/**
- * The execution context for a scenario script.
- * This provides the functions to control the flow of the scenario.
- */
-export interface ScenarioExecutionLike {
-  /**
-   * The history of messages in the conversation.
-   */
-  readonly messages: CoreMessage[];
-
-  /**
-   * The ID of the conversation thread.
-   */
-  readonly threadId: string;
-
-  /**
-   * Adds a message to the conversation.
-   * @param message The message to add.
-   */
-  message(message: CoreMessage): Promise<void>;
-  /**
-   * Adds a user message to the conversation.
-   * If no content is provided, the user simulator will generate a message.
-   * @param content The content of the user message.
-   */
-  user(content?: string | CoreMessage): Promise<void>;
-  /**
-   * Adds an agent message to the conversation.
-   * If no content is provided, the agent under test will generate a message.
-   * @param content The content of the agent message.
-   */
-  agent(content?: string | CoreMessage): Promise<void>;
-  /**
-   * Invokes the judge agent to evaluate the current state.
-   * @param content Optional message to the judge.
-   * @returns The result of the scenario if the judge makes a final decision.
-   */
-  judge(content?: string | CoreMessage): Promise<ScenarioResult | null>;
-  /**
-   * Proceeds with the scenario automatically for a number of turns.
-   * @param turns The number of turns to proceed. Defaults to running until the scenario ends.
-   * @param onTurn Optional callback executed at the end of each turn.
-   * @param onStep Optional callback executed after each agent interaction.
-   * @returns The result of the scenario if it ends.
-   */
-  proceed(
-    turns?: number,
-    onTurn?: (state: ScenarioExecutionStateLike) => void | Promise<void>,
-    onStep?: (state: ScenarioExecutionStateLike) => void | Promise<void>
-  ): Promise<ScenarioResult | null>;
-  /**
-   * Ends the scenario with a success.
-   * @param reasoning Optional reasoning for the success.
-   * @returns The final result of the scenario.
-   */
-  succeed(reasoning?: string): Promise<ScenarioResult>;
-  /**
-   * Ends the scenario with a failure.
-   * @param reasoning Optional reasoning for the failure.
-   * @returns The final result of the scenario.
-   */
-  fail(reasoning?: string): Promise<ScenarioResult>;
-}
-
 /**
  * A step in a scenario script.
  * This is a function that takes the current state and an executor, and performs an action.
  */
 export type ScriptStep = (
-  state: ScenarioExecutionStateLike,
-  executor: ScenarioExecutionLike
+  state: ScenarioExecutionState,
+  executor: ScenarioExecution
 ) => Promise<void> | void;
diff --git a/javascript/src/domain/scenarios/script-commands.ts b/javascript/src/domain/scenarios/script-commands.ts
@@ -0,0 +1,80 @@
+import { CoreMessage } from "ai";
+import { ScenarioExecutionStateLike, ScenarioExecutionLike } from "../index";
+
+/**
+ * Represents a typed script command that describes a specific action to take
+ * during scenario execution. Commands are data structures that can be serialized
+ * and provide type safety for scenario scripts.
+ */
+export type ScriptCommand =
+  | MessageCommand
+  | UserCommand
+  | AgentCommand
+  | JudgeCommand
+  | ProceedCommand
+  | SucceedCommand
+  | FailCommand;
+
+/**
+ * Command to add a specific message directly to the conversation.
+ * Useful for simulating tool responses, system messages, or specific conversational states.
+ */
+export interface MessageCommand {
+  readonly type: "message";
+  readonly message: CoreMessage;
+}
+
+/**
+ * Command to generate or specify a user message in the conversation.
+ * If content is not provided, the user simulator agent will generate content automatically.
+ */
+export interface UserCommand {
+  readonly type: "user";
+  readonly content?: string | CoreMessage;
+}
+
+/**
+ * Command to generate or specify an agent response in the conversation.
+ * If content is not provided, the agent under test will generate content automatically.
+ */
+export interface AgentCommand {
+  readonly type: "agent";
+  readonly content?: string | CoreMessage;
+}
+
+/**
+ * Command to invoke the judge agent to evaluate the current conversation state.
+ * The judge will evaluate based on its configured criteria and may end the scenario.
+ */
+export interface JudgeCommand {
+  readonly type: "judge";
+  readonly content?: string | CoreMessage;
+}
+
+/**
+ * Command to let the scenario proceed automatically for a specified number of turns.
+ * Agents will interact naturally according to their roles until the turn limit is reached
+ * or the judge decides to end the scenario.
+ */
+export interface ProceedCommand {
+  readonly type: "proceed";
+  readonly turns?: number;
+  readonly onTurn?: (state: ScenarioExecutionStateLike) => void | Promise<void>;
+  readonly onStep?: (state: ScenarioExecutionStateLike) => void | Promise<void>;
+}
+
+/**
+ * Command to immediately end the scenario with a success verdict.
+ */
+export interface SucceedCommand {
+  readonly type: "succeed";
+  readonly reasoning?: string;
+}
+
+/**
+ * Command to immediately end the scenario with a failure verdict.
+ */
+export interface FailCommand {
+  readonly type: "fail";
+  readonly reasoning?: string;
+}
diff --git a/javascript/src/execution/scenario-execution-state.ts b/javascript/src/execution/scenario-execution-state.ts
@@ -5,7 +5,7 @@ import {
   CoreUserMessage,
 } from "ai";
 import { Observable, Subject } from "rxjs";
-import { ScenarioExecutionStateLike, ScenarioConfig } from "../domain";
+import { ScenarioConfig, ScenarioResult } from "../domain";
 import { generateMessageId } from "../utils/ids";
 
 // Generic enum - ready for extension
@@ -26,7 +26,7 @@ export type StateChangeEvent = {
  * the internal logic for tracking conversation history, turns, results, and
  * other related information.
  */
-export class ScenarioExecutionState implements ScenarioExecutionStateLike {
+export class ScenarioExecutionState {
   private _messages: (CoreMessage & { id: string })[] = [];
   private _currentTurn: number = 0;
   private _threadId: string = "";
@@ -38,6 +38,7 @@ export class ScenarioExecutionState implements ScenarioExecutionStateLike {
 
   description: string;
   config: ScenarioConfig;
+  result?: ScenarioResult;
 
   constructor(config: ScenarioConfig) {
     this.config = config;
@@ -52,10 +53,6 @@ export class ScenarioExecutionState implements ScenarioExecutionStateLike {
     return this._currentTurn;
   }
 
-  set currentTurn(turn: number) {
-    this._currentTurn = turn;
-  }
-
   get threadId(): string {
     return this._threadId;
   }
@@ -64,6 +61,10 @@ export class ScenarioExecutionState implements ScenarioExecutionStateLike {
     this._threadId = value;
   }
 
+  incrementTurn(): void {
+    this._currentTurn++;
+  }
+
   /**
    * Adds a message to the conversation history.
    *