diff --git a/go/agent.go b/go/agent.go
new file mode 100644
index 00000000..73e94117
--- /dev/null
+++ b/go/agent.go
@@ -0,0 +1,71 @@
+package scenario
+
+import (
+ "context"
+
+ "github.com/openai/openai-go"
+)
+
+type AgentRole string
+type AgentReturnKind int
+
+const (
+ AgentRoleAgent AgentRole = "agent"
+ AgentRoleUser AgentRole = "user"
+ AgentRoleJudge AgentRole = "judge"
+
+ AgentReturnString AgentReturnKind = iota
+ AgentReturnScenarioResult
+ AgentReturnMessages
+ AgentReturnMessage
+)
+
+type AgentConfig struct {
+ Name string
+
+ Model string
+ OpenAIClient *openai.Client
+
+ Temperature *float64
+ MaxTokens *int64
+}
+
+type AgentInput struct {
+ ThreadID string
+ Messages []openai.ChatCompletionMessageParamUnion
+ NewMessages []openai.ChatCompletionMessageParamUnion
+ RequestedRole AgentRole
+ JudgmentRequest bool
+ ScenarioState ExecutionState
+ ScenarioConfig ScenarioConfig
+}
+
+type AgentAdapter interface {
+ Role() AgentRole
+ Call(ctx context.Context, input AgentInput) (*AgentReturn, error)
+}
+
+type AgentReturn struct {
+ Kind AgentReturnKind
+
+ StringValue string
+ ScenarioResultValue ScenarioResult
+ MessagesValue []openai.ChatCompletionMessageParamUnion
+ MessageValue openai.ChatCompletionMessageParamUnion
+}
+
+func NewStringAgentReturn(s string) *AgentReturn {
+ return &AgentReturn{Kind: AgentReturnString, StringValue: s}
+}
+func NewScenarioResultAgentReturn(r ScenarioResult) *AgentReturn {
+ return &AgentReturn{Kind: AgentReturnScenarioResult, ScenarioResultValue: r}
+}
+func NewMessagesAgentReturn(msgs []openai.ChatCompletionMessageParamUnion) *AgentReturn {
+ return &AgentReturn{Kind: AgentReturnMessages, MessagesValue: msgs}
+}
+func NewEmptyAgentReturn() *AgentReturn {
+ return &AgentReturn{Kind: AgentReturnMessages, MessagesValue: []openai.ChatCompletionMessageParamUnion{}}
+}
+func NewMessageAgentReturn(msg openai.ChatCompletionMessageParamUnion) *AgentReturn {
+ return &AgentReturn{Kind: AgentReturnMessage, MessageValue: msg}
+}
diff --git a/go/agent_judge.go b/go/agent_judge.go
new file mode 100644
index 00000000..7a84dce5
--- /dev/null
+++ b/go/agent_judge.go
@@ -0,0 +1,240 @@
+package scenario
+
+import (
+ "context"
+ "errors"
+ "fmt"
+ "strings"
+
+ "github.com/langwatch/scenario/go/internal"
+ "github.com/langwatch/scenario/go/internal/libraries/ptr"
+
+ "github.com/openai/openai-go"
+ "github.com/openai/openai-go/shared"
+)
+
+const (
+ judgePrompt = `
+
+You are an LLM as a judge watching a simulated conversation as it plays out live to determine if the agent under test meets the criteria or not.
+
+
+
+Your goal is to determine if you already have enough information to make a verdict of the scenario below, or if the conversation should continue for longer.
+If you do have enough information, use the finish_test tool to determine if all the criteria have been met, if not, use the continue_test tool to let the next step play out.
+
+
+
+{{.Description}}
+
+
+
+{{.FormattedCriteriaList}}
+
+
+
+- Be strict, do not let the conversation continue if the agent already broke one of the "do not" or "should not" criterias.
+- DO NOT make any judgment calls that are not explicitly listed in the success or failure criteria, withhold judgement if necessary
+
+`
+
+ lastMessagePrompt = `
+System:
+
+
+This is the last message, conversation has reached the maximum number of turns, give your final verdict,
+if you don't have enough information to make a verdict, say inconclusive with max turns reached.
+
+`
+)
+
+func buildJudgePrompt(criteria []string, description string) string {
+ formattedCriteriaList := ""
+ for i, criterion := range criteria {
+ formattedCriteriaList += fmt.Sprintf("%d. %s\n", i+1, criterion)
+ }
+
+ populatedPrompt := strings.ReplaceAll(judgePrompt, "{{.FormattedCriteriaList}}", formattedCriteriaList)
+ populatedPrompt = strings.ReplaceAll(populatedPrompt, "{{.Description}}", description)
+
+ return populatedPrompt
+}
+
+type JudgeAgentConfig struct {
+ AgentConfig
+
+ SystemPrompt *string
+ Criteria []string
+}
+
+type JudgeAgent struct {
+ cfg JudgeAgentConfig
+}
+
+func NewJudgeAgent(cfg JudgeAgentConfig) *JudgeAgent {
+ return &JudgeAgent{
+ cfg: cfg,
+ }
+}
+
+func (a *JudgeAgent) Role() AgentRole {
+ return AgentRoleJudge
+}
+
+func (a *JudgeAgent) Call(ctx context.Context, input AgentInput) (*AgentReturn, error) {
+ var systemPrompt string
+ if a.cfg.SystemPrompt != nil {
+ systemPrompt = *a.cfg.SystemPrompt
+ } else {
+ systemPrompt = buildJudgePrompt(a.cfg.Criteria, input.ScenarioConfig.Description)
+ }
+
+ lastMessage := input.ScenarioState.CurrentTurn() >= input.ScenarioConfig.MaxTurns
+ enforceJudgement := input.JudgmentRequest
+ hasCriteria := len(a.cfg.Criteria) > 0
+ messages := append(
+ []openai.ChatCompletionMessageParamUnion{openai.SystemMessage(systemPrompt)},
+ input.Messages...,
+ )
+
+ if lastMessage {
+ messages = append(messages, openai.UserMessage(lastMessagePrompt))
+ }
+
+ if enforceJudgement && !hasCriteria {
+ return NewScenarioResultAgentReturn(ScenarioResult{
+ Success: false,
+ Messages: []openai.ChatCompletionMessageParamUnion{},
+ Reasoning: ptr.Ptr("TestingAgent was called as a judge, but it has no criteria to judge against"),
+ MetCriteria: []string{},
+ UnmetCriteria: []string{},
+ }), nil
+ }
+
+ params := openai.ChatCompletionNewParams{
+ Messages: messages,
+ Model: a.cfg.Model,
+ Temperature: openai.Opt(ptr.ValueOrDefault(a.cfg.Temperature, 0.0)),
+ Tools: createJudgeAgentTools(a.cfg.Criteria),
+ }
+ if a.cfg.MaxTokens != nil {
+ params.MaxCompletionTokens = openai.Opt(*a.cfg.MaxTokens)
+ }
+
+ completion, err := a.cfg.OpenAIClient.Chat.Completions.New(ctx, params)
+ if err != nil {
+ return nil, err
+ }
+
+ if len(completion.Choices) == 0 {
+ return nil, errors.New("judge agent had no response choices")
+ }
+
+ completionChoice := completion.Choices[0]
+ if len(completionChoice.Message.ToolCalls) == 0 {
+ return nil, errors.New("judge agent response has no tool calls")
+ }
+
+ toolCall := completionChoice.Message.ToolCalls[0]
+ if toolCall.Type != "function" {
+ return nil, errors.New("judge agent response tool call is of an unknown type")
+ }
+
+ switch toolCall.Function.Name {
+ case "continue_test":
+ return NewEmptyAgentReturn(), nil
+
+ case "finish_test":
+ toolArguments, err := internal.ParseJudgeAgentFinishTestToolArguments(toolCall.Function.Arguments)
+ if err != nil {
+ return nil, errors.New("")
+ }
+
+ passedCriteria := []string{}
+ failedCriteria := []string{}
+
+ for key, reasoning := range toolArguments.Criteria {
+ reasoningBool, ok := reasoning.(bool)
+ if !ok {
+ continue
+ }
+
+ if reasoningBool == true {
+ passedCriteria = append(passedCriteria, key)
+ } else {
+ failedCriteria = append(failedCriteria, key)
+ }
+ }
+
+ return NewScenarioResultAgentReturn(ScenarioResult{
+ Success: toolArguments.Verdict == "success" && len(failedCriteria) == 0,
+ Messages: messages,
+ Reasoning: ptr.Ptr(toolArguments.Reasoning),
+ MetCriteria: passedCriteria,
+ UnmetCriteria: failedCriteria,
+ }), nil
+
+ default:
+ return nil, errors.New("judge agent response tool call is not of a known name")
+ }
+}
+
+func createJudgeAgentTools(criteria []string) []openai.ChatCompletionToolParam {
+ criteriaMap := map[string]any{}
+ criteriaNames := []string{}
+ for _, criterion := range criteria {
+ paramName := criterionNameToParamName(criterion)
+ criteriaNames = append(criteriaNames, paramName)
+ criteriaMap[paramName] = map[string]any{
+ "enum": []any{true, false, "inconclusive"},
+ "description": criterion,
+ }
+ }
+
+ tools := []openai.ChatCompletionToolParam{{
+ Type: "function",
+ Function: shared.FunctionDefinitionParam{
+ Name: "continue_test",
+ Description: openai.Opt("Continue the test with the next step"),
+ Strict: openai.Opt(true),
+ Parameters: openai.FunctionParameters{
+ "type": "object",
+ "properties": map[any]any{},
+ "required": []any{},
+ "additionalProperties": false,
+ },
+ },
+ }, {
+ Type: "function",
+ Function: shared.FunctionDefinitionParam{
+ Name: "finish_test",
+ Description: openai.Opt("Complete the test with a final verdict"),
+ Strict: openai.Opt(true),
+ Parameters: openai.FunctionParameters{
+ "type": "object",
+ "properties": map[any]any{
+ "criteria": map[any]any{
+ "type": "object",
+ "properties": criteriaMap,
+ "required": criteriaNames,
+ "additionalProperties": false,
+ "description": "Strict verdict for each criterion",
+ },
+ "reasoning": map[any]any{
+ "type": "string",
+ "description": "Explanation of what the final verdict should be",
+ },
+ "verdict": map[any]any{
+ "type": "string",
+ "enum": []any{"success", "failure", "inconclusive"},
+ "description": "The final verdict of the test",
+ },
+ },
+ "required": []any{"criteria", "reasoning", "verdict"},
+ "additionalProperties": false,
+ },
+ },
+ }}
+
+ return tools
+}
diff --git a/go/agent_user_simulator.go b/go/agent_user_simulator.go
new file mode 100644
index 00000000..11920890
--- /dev/null
+++ b/go/agent_user_simulator.go
@@ -0,0 +1,91 @@
+package scenario
+
+import (
+ "context"
+ "errors"
+ "strings"
+
+ "github.com/langwatch/scenario/go/internal/libraries/ptr"
+
+ "github.com/openai/openai-go"
+)
+
+const userSimulatorPrompt = `
+
+You are pretending to be a user, you are testing an AI Agent (shown as the user role) based on a scenario.
+Approach this naturally, as a human user would, with very short inputs, few words, all lowercase, imperative, not periods, like when they google or talk to chatgpt.
+
+
+
+Your goal (assistant) is to interact with the Agent Under Test (user) as if you were a human user to see if it can complete the scenario successfully.
+
+
+
+{{.Description}}
+
+
+
+- DO NOT carry over any requests yourself, YOU ARE NOT the assistant today, you are the user
+
+`
+
+func buildUserSimulatorPrompt(description string) string {
+ // NOTE(afr): Change this to a template
+ return strings.ReplaceAll(userSimulatorPrompt, "{{.Description}}", description)
+}
+
+type UserSimulatorAgentConfig struct {
+ AgentConfig
+
+ SystemPrompt *string
+}
+
+type UserSimulatorAgent struct {
+ cfg UserSimulatorAgentConfig
+}
+
+func NewUserSimulatorAgent(cfg UserSimulatorAgentConfig) *UserSimulatorAgent {
+ return &UserSimulatorAgent{
+ cfg: cfg,
+ }
+}
+
+func (a *UserSimulatorAgent) Role() AgentRole {
+ return AgentRoleUser
+}
+
+func (a *UserSimulatorAgent) Call(ctx context.Context, input AgentInput) (*AgentReturn, error) {
+ var systemPrompt string
+ if a.cfg.SystemPrompt != nil {
+ systemPrompt = *a.cfg.SystemPrompt
+ } else {
+ systemPrompt = buildUserSimulatorPrompt(input.ScenarioConfig.Description)
+ }
+
+ messages := append(
+ []openai.ChatCompletionMessageParamUnion{openai.SystemMessage(systemPrompt)},
+ input.Messages...,
+ )
+
+ params := openai.ChatCompletionNewParams{
+ Messages: messages,
+ Model: a.cfg.Model,
+ Temperature: openai.Opt(ptr.ValueOrDefault(a.cfg.Temperature, 0.0)),
+ }
+ if a.cfg.MaxTokens != nil {
+ params.MaxCompletionTokens = openai.Opt(*a.cfg.MaxTokens)
+ }
+
+ completion, err := a.cfg.OpenAIClient.Chat.Completions.New(ctx, params)
+ if err != nil {
+ return nil, err
+ }
+
+ if len(completion.Choices) == 0 {
+ return nil, errors.New("user simulator agent had no response choices")
+ }
+
+ return NewMessageAgentReturn(openai.ChatCompletionMessageParamUnion{
+ OfAssistant: ptr.Ptr(completion.Choices[0].Message.ToAssistantMessageParam()),
+ }), nil
+}
diff --git a/go/config.go b/go/config.go
new file mode 100644
index 00000000..78efc278
--- /dev/null
+++ b/go/config.go
@@ -0,0 +1,14 @@
+package scenario
+
+type ScenarioConfig struct {
+ ID string
+ Name string
+ Description string
+ Agents []AgentAdapter
+ Script []ScriptStep
+
+ MaxTurns int
+ ThreadID string
+ SetID string
+ BatchRunID string
+}
diff --git a/go/domain.go b/go/domain.go
new file mode 100644
index 00000000..b6227b92
--- /dev/null
+++ b/go/domain.go
@@ -0,0 +1,84 @@
+package scenario
+
+import (
+ "context"
+ "time"
+
+ "github.com/openai/openai-go"
+)
+
+type ScriptStep func(
+ ctx context.Context,
+ execution Execution,
+ state ExecutionState,
+) (*ScenarioResult, error)
+
+type ProceedCallback func(state ExecutionState) error
+
+type ExecutionState interface {
+ Config() ScenarioConfig
+ Description() string
+ Messages() []openai.ChatCompletionMessageParamUnion
+ ThreadID() string
+ CurrentTurn() int
+
+ AddMessage(message openai.ChatCompletionMessageParamUnion)
+
+ LastMessage() (*openai.ChatCompletionMessageParamUnion, error)
+ LastUserMessage() (*openai.ChatCompletionUserMessageParam, error)
+
+ LastToolCall(toolName string) (*openai.ChatCompletionToolMessageParam, *openai.ChatCompletionMessageToolCallParam, error)
+ HasToolCall(toolName string) bool
+}
+
+type Execution interface {
+ Messages() []openai.ChatCompletionMessageParamUnion
+ ThreadID() string
+
+ Run(ctx context.Context) *ScenarioResult
+
+ Message(ctx context.Context, message openai.ChatCompletionMessageParamUnion) error
+
+ UserString(ctx context.Context, content string) error
+ UserMessage(ctx context.Context, message openai.ChatCompletionUserMessageParam) error
+
+ AgentString(ctx context.Context, content string) error
+ AgentMessage(ctx context.Context, message openai.ChatCompletionAssistantMessageParam) error
+
+ JudgeString(ctx context.Context, content string) (*ScenarioResult, error)
+ JudgeMessage(ctx context.Context, message openai.ChatCompletionMessageParamUnion) (*ScenarioResult, error)
+
+ Proceed(ctx context.Context, opts ...ProceedOption) (*ScenarioResult, error)
+
+ Succeed(ctx context.Context, reasoning string) (*ScenarioResult, error)
+ Fail(ctx context.Context, reasoning string) (*ScenarioResult, error)
+}
+
+type ProceedOptions struct {
+ Turns int
+ OnTurn ProceedCallback
+ OnStep ProceedCallback
+}
+
+type ProceedOption func(*ProceedOptions)
+
+func WithProceedTurns(turns int) ProceedOption {
+ return func(opts *ProceedOptions) { opts.Turns = turns }
+}
+func WithProceedOnTurn(onTurn ProceedCallback) ProceedOption {
+ return func(opts *ProceedOptions) { opts.OnTurn = onTurn }
+}
+func WithProceedOnStep(onStep ProceedCallback) ProceedOption {
+ return func(opts *ProceedOptions) { opts.OnStep = onStep }
+}
+
+type ScenarioResult struct {
+ Success bool
+ Messages []openai.ChatCompletionMessageParamUnion
+ Reasoning *string
+ MetCriteria []string
+ UnmetCriteria []string
+ TotalTime *time.Duration
+ AgentTime *time.Duration
+ Error *string
+}
diff --git a/go/execution.go b/go/execution.go
new file mode 100644
index 00000000..aa6ceb07
--- /dev/null
+++ b/go/execution.go
@@ -0,0 +1,132 @@
+package scenario
+
+import (
+ "context"
+ "errors"
+ "time"
+
+ "github.com/langwatch/scenario/go/internal/libraries/ptr"
+ "github.com/openai/openai-go"
+)
+
+type ScenarioExecution struct {
+ cfg ScenarioConfig
+ state *ScenarioExecutionState
+ eventBus EventBus
+ script []ScriptStep
+}
+
+func NewScenarioExecution(cfg ScenarioConfig, script []ScriptStep, eventBus EventBus) *ScenarioExecution {
+ return &ScenarioExecution{
+ cfg: cfg,
+ state: NewScenarioExecutionState(cfg),
+ eventBus: eventBus,
+ script: script,
+ }
+}
+
+// State returns the current state of the Scenario's execution.
+func (e *ScenarioExecution) State() *ScenarioExecutionState {
+ return e.state
+}
+
+// Execute runs the scenario from start to finish, emitting events as it goes.
+func (e *ScenarioExecution) Run(ctx context.Context) *ScenarioResult {
+ e.emitEvent(RunStartedEvent{
+ timestamp: time.Now(),
+ ScenarioID: e.cfg.ID,
+ ScenarioName: e.cfg.Name,
+ Description: e.cfg.Description,
+ })
+
+ for _, step := range e.script {
+ if ctx.Err() != nil {
+ e.emitEvent(ErrorEvent{timestamp: time.Now(), Error: ctx.Err(), Fatal: true})
+ break
+ }
+
+ result, err := step(ctx, e, e.state)
+ if err != nil {
+ e.emitEvent(ErrorEvent{timestamp: time.Now(), Error: err, Fatal: true})
+ break
+ }
+
+ e.emitEvent(MessageSnapshotEvent{timestamp: time.Now(), Messages: e.state.Messages()})
+
+ if result != nil {
+ e.emitEvent(RunFinishedEvent{timestamp: time.Now(), Result: result})
+ return result
+ }
+ }
+
+ // If no result, treat as failure
+ // TODO(afr): Proper error here
+ failResult := &ScenarioResult{
+ Success: false,
+ Messages: e.state.messages,
+ Reasoning: nil,
+ MetCriteria: []string{},
+ UnmetCriteria: []string{},
+ TotalTime: ptr.Ptr(time.Since(e.state.startedAt)),
+ Error: ptr.Ptr("no result was created"),
+ }
+ e.emitEvent(RunFinishedEvent{timestamp: time.Now(), Result: failResult})
+
+ return failResult
+}
+
+func (e *ScenarioExecution) Messages() []openai.ChatCompletionMessageParamUnion {
+ return e.state.Messages()
+}
+
+func (e *ScenarioExecution) ThreadID() string {
+ return e.state.ThreadID()
+}
+
+func (e *ScenarioExecution) Message(ctx context.Context, message openai.ChatCompletionMessageParamUnion) error {
+ return errors.New("execution Message not implemented")
+}
+
+func (e *ScenarioExecution) UserString(ctx context.Context, content string) error {
+ return errors.New("execution UserString not implemented")
+}
+func (e *ScenarioExecution) UserMessage(ctx context.Context, message openai.ChatCompletionUserMessageParam) error {
+ return errors.New("execution UserMessage not implemented")
+}
+
+func (e *ScenarioExecution) AgentString(ctx context.Context, content string) error {
+ return errors.New("execution AgentString not implemented")
+}
+func (e *ScenarioExecution) AgentMessage(ctx context.Context, message openai.ChatCompletionAssistantMessageParam) error {
+ return errors.New("execution AgentMessage not implemented")
+}
+
+func (e *ScenarioExecution) JudgeString(ctx context.Context, content string) (*ScenarioResult, error) {
+ return nil, errors.New("execution JudgeString not implemented")
+}
+func (e *ScenarioExecution) JudgeMessage(ctx context.Context, message openai.ChatCompletionMessageParamUnion) (*ScenarioResult, error) {
+ return nil, errors.New("execution JudgeMessage not implemented")
+}
+
+func (e *ScenarioExecution) Proceed(ctx context.Context, opts ...ProceedOption) (*ScenarioResult, error) {
+ // Build options from variadic
+ options := &ProceedOptions{}
+ for _, opt := range opts {
+ opt(options)
+ }
+ // Use options.Turns, options.OnTurn, options.OnStep as needed
+ return nil, errors.New("execution Proceed not implemented")
+}
+
+func (e *ScenarioExecution) Succeed(ctx context.Context, reasoning string) (*ScenarioResult, error) {
+ return &ScenarioResult{}, nil
+}
+func (e *ScenarioExecution) Fail(ctx context.Context, reasoning string) (*ScenarioResult, error) {
+ return nil, errors.New("execution Fail not implemented")
+}
+
+func (e *ScenarioExecution) emitEvent(event ScenarioEvent) {
+ if e.eventBus != nil {
+ e.eventBus.Publish(event)
+ }
+}
diff --git a/go/executionstate.go b/go/executionstate.go
new file mode 100644
index 00000000..91515c29
--- /dev/null
+++ b/go/executionstate.go
@@ -0,0 +1,124 @@
+package scenario
+
+import (
+ "errors"
+ "time"
+
+ "github.com/langwatch/scenario/go/internal/libraries/ptr"
+ "github.com/openai/openai-go"
+)
+
+// ScenarioExecutionState represents the immutable state of a scenario at a given turn.
+// A new state should be created for each turn/iteration and passed down.
+type ScenarioExecutionState struct {
+ messages []openai.ChatCompletionMessageParamUnion
+ currentTurn int
+ threadID string
+ description string
+ config ScenarioConfig
+ startedAt time.Time
+}
+
+// NewScenarioExecutionState creates a new initial state for a scenario.
+func NewScenarioExecutionState(cfg ScenarioConfig) *ScenarioExecutionState {
+ return &ScenarioExecutionState{
+ messages: []openai.ChatCompletionMessageParamUnion{},
+ currentTurn: 0,
+ threadID: cfg.ThreadID,
+ description: cfg.Description,
+ config: cfg,
+ startedAt: time.Now(),
+ }
+}
+
+func (s *ScenarioExecutionState) AddMessage(msg openai.ChatCompletionMessageParamUnion) {
+ s.messages = append(s.messages, msg)
+}
+
+func (s *ScenarioExecutionState) LastMessage() (*openai.ChatCompletionMessageParamUnion, error) {
+ if len(s.messages) == 0 {
+ return nil, errors.New("no messages in execution state history")
+ }
+
+ return &s.messages[len(s.messages)-1], nil
+}
+
+func (s *ScenarioExecutionState) LastUserMessage() (*openai.ChatCompletionUserMessageParam, error) {
+ for i := len(s.messages) - 1; i >= 0; i-- {
+ msg := s.messages[i]
+
+ if ptr.ValueOrZero(msg.GetRole()) == "user" {
+ return msg.OfUser, nil
+ }
+ }
+
+ return nil, errors.New("no user messages in execution state history")
+}
+
+func (s *ScenarioExecutionState) LastToolCall(toolName string) (*openai.ChatCompletionToolMessageParam, *openai.ChatCompletionMessageToolCallParam, error) {
+ toolCallIDToParam := make(map[string]*openai.ChatCompletionMessageToolCallParam)
+
+ // collect tool call ids/params
+ for i := len(s.messages) - 1; i >= 0; i-- {
+ msg := s.messages[i]
+ if ptr.ValueOrZero(msg.GetRole()) == "assistant" && msg.OfAssistant != nil {
+ for _, tc := range msg.OfAssistant.ToolCalls {
+ // Only store if matches toolName
+ if tc.Function.Name == toolName {
+ // Need pointer to tc for return
+ tcCopy := tc
+ toolCallIDToParam[tc.ID] = &tcCopy
+ }
+ }
+ }
+ }
+
+ // find the last tool message with a matching tool name
+ for i := len(s.messages) - 1; i >= 0; i-- {
+ msg := s.messages[i]
+ if ptr.ValueOrZero(msg.GetRole()) != "tool" || msg.OfTool == nil {
+ continue
+ }
+ toolMsg := msg.OfTool
+ toolCallID := toolMsg.ToolCallID
+ if tc, ok := toolCallIDToParam[toolCallID]; ok {
+ return toolMsg, tc, nil
+ }
+ }
+
+ return nil, nil, errors.New("no tool call result for tool '" + toolName + "' in execution state history")
+}
+
+func (s *ScenarioExecutionState) HasToolCall(toolName string) bool {
+ _, _, err := s.LastToolCall(toolName)
+ if err != nil {
+ return false
+ }
+
+ return true
+}
+
+// Messages returns a copy of the messages in the state.
+func (s *ScenarioExecutionState) Messages() []openai.ChatCompletionMessageParamUnion {
+ return append([]openai.ChatCompletionMessageParamUnion(nil), s.messages...)
+}
+
+// CurrentTurn returns the current turn number.
+func (s *ScenarioExecutionState) CurrentTurn() int {
+ return s.currentTurn
+}
+
+// ThreadID returns the thread ID for the scenario.
+func (s *ScenarioExecutionState) ThreadID() string {
+ return s.threadID
+}
+
+// Description returns the scenario description.
+func (s *ScenarioExecutionState) Description() string {
+ return s.description
+}
+
+// Config returns the scenario config.
+func (s *ScenarioExecutionState) Config() ScenarioConfig {
+ return s.config
+}
diff --git a/go/go.mod b/go/go.mod
new file mode 100644
index 00000000..5728f790
--- /dev/null
+++ b/go/go.mod
@@ -0,0 +1,15 @@
+module github.com/langwatch/scenario/go
+
+go 1.24.2
+
+require (
+ github.com/jamescun/basex v0.0.0-20180407124237-e1bcb39ab18e // indirect
+ github.com/matryer/is v1.4.1 // indirect
+ github.com/openai/openai-go v1.8.2 // indirect
+ github.com/quasilyte/go-ruleguard/dsl v0.3.22 // indirect
+ github.com/tidwall/gjson v1.14.4 // indirect
+ github.com/tidwall/match v1.1.1 // indirect
+ github.com/tidwall/pretty v1.2.1 // indirect
+ github.com/tidwall/sjson v1.2.5 // indirect
+ github.com/wearemojo/mojo-public-go v0.0.0-20250626105007-9f59be427015 // indirect
+)
diff --git a/go/go.sum b/go/go.sum
new file mode 100644
index 00000000..c56f9302
--- /dev/null
+++ b/go/go.sum
@@ -0,0 +1,20 @@
+github.com/jamescun/basex v0.0.0-20180407124237-e1bcb39ab18e h1:hNqjOAKBqcxNss3cdPCnvrWlprJqMvXvfY0EMKqwZ4E=
+github.com/jamescun/basex v0.0.0-20180407124237-e1bcb39ab18e/go.mod h1:lnTW2aVT/MijZZXFVlMJOFaIk9ypxRMVOB9xtNJaHoQ=
+github.com/matryer/is v1.4.1 h1:55ehd8zaGABKLXQUe2awZ99BD/PTc2ls+KV/dXphgEQ=
+github.com/matryer/is v1.4.1/go.mod h1:8I/i5uYgLzgsgEloJE1U6xx5HkBQpAZvepWuujKwMRU=
+github.com/openai/openai-go v1.8.2 h1:UqSkJ1vCOPUpz9Ka5tS0324EJFEuOvMc+lA/EarJWP8=
+github.com/openai/openai-go v1.8.2/go.mod h1:g461MYGXEXBVdV5SaR/5tNzNbSfwTBBefwc+LlDCK0Y=
+github.com/quasilyte/go-ruleguard/dsl v0.3.22 h1:wd8zkOhSNr+I+8Qeciml08ivDt1pSXe60+5DqOpCjPE=
+github.com/quasilyte/go-ruleguard/dsl v0.3.22/go.mod h1:KeCP03KrjuSO0H1kTuZQCWlQPulDV6YMIXmpQss17rU=
+github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
+github.com/tidwall/gjson v1.14.4 h1:uo0p8EbA09J7RQaflQ1aBRffTR7xedD2bcIVSYxLnkM=
+github.com/tidwall/gjson v1.14.4/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
+github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA=
+github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
+github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
+github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4=
+github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
+github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY=
+github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28=
+github.com/wearemojo/mojo-public-go v0.0.0-20250626105007-9f59be427015 h1:fqqTDbjpf2Qy0lS6rFshhp6Hets3TowoCBa1xcsl47M=
+github.com/wearemojo/mojo-public-go v0.0.0-20250626105007-9f59be427015/go.mod h1:+KCKWpp9pBdmZVLwBXtj4wrvmXuMVq4SfJqDKENn1JU=
diff --git a/go/ids.go b/go/ids.go
new file mode 100644
index 00000000..290cae1d
--- /dev/null
+++ b/go/ids.go
@@ -0,0 +1,11 @@
+package scenario
+
+import (
+ "context"
+
+ "github.com/langwatch/scenario/go/internal/libraries/ksuid"
+)
+
+func generateThreadID(ctx context.Context) string {
+ return ksuid.Generate(ctx, "thread").String()
+}
diff --git a/go/internal/judge_agent_tools.go b/go/internal/judge_agent_tools.go
new file mode 100644
index 00000000..aedbf221
--- /dev/null
+++ b/go/internal/judge_agent_tools.go
@@ -0,0 +1,31 @@
+package internal
+
+import (
+ "encoding/json"
+ "fmt"
+)
+
+type JudgeAgentFinishTestToolArguments struct {
+ Verdict string
+ Reasoning string
+ Criteria map[string]interface{}
+}
+
+func ParseJudgeAgentFinishTestToolArguments(arguments string) (*JudgeAgentFinishTestToolArguments, error) {
+ var resp *JudgeAgentFinishTestToolArguments
+ if err := json.Unmarshal([]byte(arguments), &resp); err != nil {
+ return nil, fmt.Errorf("failed to parse judge agent finish tool arguments: %w", err)
+ }
+
+ if resp.Verdict == "" {
+ resp.Verdict = "inconclusive"
+ }
+ if resp.Reasoning == "" {
+ resp.Reasoning = "No reasoning provided"
+ }
+ if resp.Criteria == nil {
+ resp.Criteria = map[string]interface{}{}
+ }
+
+ return resp, nil
+}
diff --git a/go/internal/libraries/ksuid/LICENSE b/go/internal/libraries/ksuid/LICENSE
new file mode 100644
index 00000000..c3dce1bb
--- /dev/null
+++ b/go/internal/libraries/ksuid/LICENSE
@@ -0,0 +1,21 @@
+Copyright (c) 2018-2023 Cuvva Limited
+Copyright (c) 2023-2025 Mojo Men Ltd
+Copyright (c) 2025 Reasoning Engine B.V.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/go/internal/libraries/ksuid/README.md b/go/internal/libraries/ksuid/README.md
new file mode 100644
index 00000000..80e9956b
--- /dev/null
+++ b/go/internal/libraries/ksuid/README.md
@@ -0,0 +1,74 @@
+# ksuid
+
+ksuid is a Go library that generated prefixed, k-sorted globally unique identifiers.
+
+Each KSUID has a resource type and optionally an environment prefix (no environment prefix is for prod use only). They are roughly sortable down to per-second resolution.
+
+Properties of a KSUID:
+
+- resource type and environment prefixing
+- lexicographically, time sortable
+- no startup co-ordination
+- guaranteed unique relative to process/machine
+
+## Usage
+
+### API
+
+ksuid is primarily a Go package to be consumed by backend services, below are examples of its API usage.
+
+To generate a KSUID with a custom resource type and for the prod environment:
+
+```go
+id := ksuid.Generate("user")
+/* => ID{
+ Environment: "prod",
+ Resource: "user",
+ Timestamp: time.Time{"2021-04-29T10:46:56Z"},
+ MachineID: net.HardwareAddr{"1e:00:a2:3e:53:90"},
+ ProcessID: 21124,
+ SequenceID: 0,
+} */
+```
+
+To parse a single given KSUID:
+
+```go
+id, err := ksuid.Parse([]byte("user_000000C8BhY47NRDD94E5VZxVX4bo"))
+/*
+=> ID{
+ Environment: "prod",
+ Resource: "user",
+ Timestamp: time.Time{"2021-04-29T10:46:56Z"},
+ MachineID: net.HardwareAddr{"1e:00:a2:3e:53:90"},
+ ProcessID: 21124,
+ SequenceID: 0,
+}, nil
+*/
+```
+
+## Structure
+
+Excluding the resource & environment prefix parts, KSUIDs are 29 bytes long when Base62 encoded, consisting of 21 bytes decoded:
+
+- first 8 bytes: a 64-bit unix timestamp
+- next 9 bytes: a 64-bit instance ID, prefixed by an 8-bit scheme
+- next 4 bytes: a 32-bit incrementing counter, reset every second
+
+Optionally a KSUID has two, underscore delimited prefixes. The first prefix is optional, and is the environment in which the KSUID was generated (test, dev, git commit etc), omitting the environment identifies prod only. The second prefix is the resource type (user, profile, vehicle etc) and is required.
+
+### Instance IDs
+
+The instance ID is structured differently depending on the source environment - allowing the best choice for the given use-case.
+
+The first byte indicates which kind of instance ID, which then defines the structure of the remaining 8 bytes:
+
+- `0x44` (ASCII `D`): Docker
+ - 8 bytes: truncated Docker container ID
+- `0x48` (ASCII `H`): hardware
+ - first 6 bytes: the 48-bit MAC address
+ - next 2 bytes: the 16-bit process ID (truncated if necessary)
+- `0x52` (ASCII `R`): random
+ - 8 bytes: randomly generated bytes
+
+The random option should only be used if no reliable instance ID is available.
diff --git a/go/internal/libraries/ksuid/base62.go b/go/internal/libraries/ksuid/base62.go
new file mode 100644
index 00000000..3e90aab9
--- /dev/null
+++ b/go/internal/libraries/ksuid/base62.go
@@ -0,0 +1,91 @@
+package ksuid
+
+const (
+ offsetUppercase = 10
+ offsetLowercase = 36
+)
+
+// converts base62 bytes into the number value that it represents.
+func base62Value(digit byte) byte {
+ switch {
+ case digit >= '0' && digit <= '9':
+ return digit - '0'
+ case digit >= 'A' && digit <= 'Z':
+ return offsetUppercase + (digit - 'A')
+ default:
+ return offsetLowercase + (digit - 'a')
+ }
+}
+
+func fastDecodeBase62(dst []byte, src []byte) error {
+ const srcBase = 62
+ const dstBase = 4294967296
+
+ parts := [encodedLen]byte{
+ base62Value(src[0]),
+ base62Value(src[1]),
+ base62Value(src[2]),
+ base62Value(src[3]),
+ base62Value(src[4]),
+ base62Value(src[5]),
+ base62Value(src[6]),
+ base62Value(src[7]),
+ base62Value(src[8]),
+ base62Value(src[9]),
+
+ base62Value(src[10]),
+ base62Value(src[11]),
+ base62Value(src[12]),
+ base62Value(src[13]),
+ base62Value(src[14]),
+ base62Value(src[15]),
+ base62Value(src[16]),
+ base62Value(src[17]),
+ base62Value(src[18]),
+ base62Value(src[19]),
+
+ base62Value(src[20]),
+ base62Value(src[21]),
+ base62Value(src[22]),
+ base62Value(src[23]),
+ base62Value(src[24]),
+ base62Value(src[25]),
+ base62Value(src[26]),
+ base62Value(src[27]),
+ base62Value(src[28]),
+ }
+
+ numDst := len(dst)
+ baseParts := parts[:]
+ baseQueue := [encodedLen]byte{}
+
+ for len(baseParts) > 0 {
+ quotient := baseQueue[:0]
+ remainder := uint64(0)
+
+ for _, c := range baseParts {
+ value := uint64(c) + remainder*srcBase
+ digit := value / dstBase
+ remainder = value % dstBase
+
+ if len(quotient) != 0 || digit != 0 {
+ quotient = append(quotient, byte(digit))
+ }
+ }
+
+ if numDst < 4 {
+ return &ParseError{"output buffer too short"}
+ }
+
+ dst[numDst-4] = byte(remainder >> 24)
+ dst[numDst-3] = byte(remainder >> 16)
+ dst[numDst-2] = byte(remainder >> 8)
+ dst[numDst-1] = byte(remainder)
+ numDst -= 4
+ baseParts = quotient
+ }
+
+ var zero [decodedLen]byte
+ copy(dst[:numDst], zero[:])
+ return nil
+}
diff --git a/go/internal/libraries/ksuid/id.go b/go/internal/libraries/ksuid/id.go
new file mode 100644
index 00000000..a4fcf51c
--- /dev/null
+++ b/go/internal/libraries/ksuid/id.go
@@ -0,0 +1,223 @@
+package ksuid
+
+import (
+ "bytes"
+ "database/sql/driver"
+ "encoding/binary"
+ "encoding/json"
+
+ "github.com/jamescun/basex"
+)
+
+// ID is an optionally prefixed, k-sortable globally unique ID.
+type ID struct {
+ Environment string
+ Resource string
+
+ Timestamp uint64
+ InstanceID InstanceID
+ SequenceID uint32
+}
+
+const (
+ decodedLen = 21
+ encodedLen = 29
+)
+
+// MustParse unmarshals an ID from a string and panics on error.
+func MustParse(src string) ID {
+ id, err := Parse(src)
+ if err != nil {
+ panic(err)
+ }
+
+ return id
+}
+
+// Parse unmarshals an ID from a series of bytes.
+func Parse(str string) (id ID, err error) {
+ var src []byte
+ id.Environment, id.Resource, src = splitPrefixID([]byte(str))
+
+ if id.Environment == "" {
+ id.Environment = Production
+ }
+
+ if len(src) < encodedLen {
+ err = &ParseError{"ksuid too short"}
+ return
+ } else if len(src) > encodedLen {
+ err = &ParseError{"ksuid too long"}
+ return
+ }
+
+ dst := make([]byte, decodedLen)
+ err = fastDecodeBase62(dst, src)
+ if err != nil {
+ err = &ParseError{"invalid base62: " + err.Error()}
+ return
+ }
+
+ id.Timestamp = binary.BigEndian.Uint64(dst[:8])
+ id.InstanceID.SchemeData = dst[8]
+ copy(id.InstanceID.BytesData[:], dst[9:17])
+ id.SequenceID = binary.BigEndian.Uint32(dst[17:])
+
+ return
+}
+
+func splitPrefixID(input []byte) (environment, resource string, id []byte) {
+ // NOTE(jc): this function is optimized to reduce conditional branching
+ // on the hot path/most common use case.
+
+ lastIdx := bytes.LastIndexByte(input, '_')
+ if lastIdx < 0 {
+ id = input
+ return
+ }
+
+ firstIdx := bytes.IndexByte(input[:lastIdx], '_')
+ if firstIdx > -1 {
+ environment = string(input[:firstIdx])
+ resource = string(input[firstIdx+1 : lastIdx])
+ id = input[lastIdx+1:]
+ return
+ }
+
+ resource = string(input[:lastIdx])
+ id = input[lastIdx+1:]
+
+ return
+}
+
+// IsZero returns true if id has not yet been initialized.
+func (id ID) IsZero() bool {
+ return id == ID{}
+}
+
+// Equal returns true if the given ID matches id of the caller.
+func (id ID) Equal(x ID) bool {
+ return id == x
+}
+
+// Scan implements a custom database/sql.Scanner to support
+// unmarshaling from standard database drivers.
+func (id *ID) Scan(src any) error {
+ switch src := src.(type) {
+ case string:
+ n, err := Parse(src)
+ if err != nil {
+ return err
+ }
+
+ *id = n
+ return nil
+
+ case []byte:
+ n, err := Parse(string(src))
+ if err != nil {
+ return err
+ }
+
+ *id = n
+ return nil
+
+ default:
+ return &ParseError{"unsupported scan, must be string or []byte"}
+ }
+}
+
+// Value implements a custom database/sql/driver.Valuer to support
+// marshaling to standard database drivers.
+func (id ID) Value() (driver.Value, error) {
+ return id.Bytes(), nil
+}
+
+func (id ID) prefixLen() (n int) {
+ if id.Resource != "" {
+ n += len(id.Resource) + 1
+
+ if id.Environment != "" && id.Environment != Production {
+ n += len(id.Environment) + 1
+ }
+ }
+
+ return
+}
+
+// MarshalJSON implements a custom JSON string marshaler.
+func (id ID) MarshalJSON() ([]byte, error) {
+ b := id.Bytes()
+ x := make([]byte, len(b)+2)
+ x[0] = '"'
+ copy(x[1:], b)
+ x[len(x)-1] = '"'
+ return x, nil
+}
+
+// UnmarshalJSON implements a custom JSON string unmarshaler.
+func (id *ID) UnmarshalJSON(b []byte) (err error) {
+ var str string
+ err = json.Unmarshal(b, &str)
+ if err != nil {
+ return
+ }
+
+ n, err := Parse(str)
+ if err != nil {
+ return
+ }
+
+ *id = n
+ return
+}
+
+// Bytes stringifies and returns ID as a byte slice.
+func (id ID) Bytes() []byte {
+ prefixLen := id.prefixLen()
+ dst := make([]byte, prefixLen+encodedLen)
+
+ if id.Resource != "" {
+ offset := 0
+ if id.Environment != "" && id.Environment != Production {
+ copy(dst, id.Environment)
+ dst[len(id.Environment)] = '_'
+ offset = len(id.Environment) + 1
+ }
+
+ copy(dst[offset:], id.Resource)
+ dst[offset+len(id.Resource)] = '_'
+ }
+
+ iid := id.InstanceID.Bytes()
+
+ decodedBytes := make([]byte, decodedLen)
+ encodedBytes := make([]byte, encodedLen)
+ binary.BigEndian.PutUint64(decodedBytes, id.Timestamp)
+ decodedBytes[8] = id.InstanceID.Scheme()
+ copy(decodedBytes[9:], iid[:])
+ binary.BigEndian.PutUint32(decodedBytes[17:], id.SequenceID)
+
+ basex.Base62.Encode(encodedBytes, decodedBytes)
+ copy(dst[prefixLen+2:], encodedBytes)
+
+ dst[prefixLen] = '0'
+ dst[prefixLen+1] = '0'
+
+ return dst
+}
+
+// String stringifies and returns ID as a string.
+func (id ID) String() string {
+ return string(id.Bytes())
+}
+
+// ParseError is returned when unexpected input is encountered when
+// parsing user input to an ID.
+type ParseError struct {
+ errorString string
+}
+
+func (pe ParseError) Error() string {
+ return pe.errorString
+}
diff --git a/go/internal/libraries/ksuid/id_test.go b/go/internal/libraries/ksuid/id_test.go
new file mode 100644
index 00000000..e3dc3984
--- /dev/null
+++ b/go/internal/libraries/ksuid/id_test.go
@@ -0,0 +1,344 @@
+//nolint:gosec // we're using fixed timestamps for tests
+package ksuid
+
+import (
+ "testing"
+ "time"
+
+ "github.com/matryer/is"
+)
+
+func TestSplitPrefixID(t *testing.T) {
+ tests := []struct {
+ Name string
+ Source []byte
+
+ Resource string
+ Environment string
+ ID []byte
+ }{
+ {"Empty", []byte(""), "", "", []byte("")},
+ {"Bare", []byte("000EoVtOLK4o4XykFcYe63Kw"), "", "", []byte("000EoVtOLK4o4XykFcYe63Kw")},
+ {"Resource", []byte("user_000EoVtOLK4o4XykFcYe63Kw"), "user", "", []byte("000EoVtOLK4o4XykFcYe63Kw")},
+ {"ResourceEnvironment", []byte("test_user_000EoVtOLK4o4XykFcYe63Kw"), "user", "test", []byte("000EoVtOLK4o4XykFcYe63Kw")},
+ {"BlankResource", []byte("_000EoVtOLK4o4XykFcYe63Kw"), "", "", []byte("000EoVtOLK4o4XykFcYe63Kw")},
+ {"BlankResourceEnvironment", []byte("__000EoVtOLK4o4XykFcYe63Kw"), "", "", []byte("000EoVtOLK4o4XykFcYe63Kw")},
+ {"BlankIDResource", []byte("user_"), "user", "", []byte("")},
+ {"BlankIDResourceEnvironment", []byte("test_user_"), "user", "test", []byte("")},
+ }
+
+ for _, test := range tests {
+ t.Run(test.Name, func(t *testing.T) {
+ is := is.New(t)
+
+ environment, resource, id := splitPrefixID(test.Source)
+
+ is.Equal(test.Environment, environment)
+ is.Equal(test.Resource, resource)
+ is.Equal(test.ID, id)
+ })
+ }
+}
+
+func TestParse(t *testing.T) {
+ tests := []struct {
+ Name string
+ Source []byte
+
+ ID ID
+ Error error
+ }{
+ {"Short", []byte(""), ID{}, &ParseError{"ksuid too short"}},
+ {"Long", []byte("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"), ID{}, &ParseError{"ksuid too long"}},
+ {"InvalidBase62", []byte("AAAAAAAAAAAAAAAAAAAAAAAAA//AA"), ID{}, &ParseError{"invalid base62: output buffer too short"}},
+ {
+ "Bare", []byte("000000BPG6Lks9tQoAiJYrBRSXPX6"),
+ ID{
+ Environment: Production,
+ Timestamp: uint64(time.Date(2018, 4, 5, 16, 53, 42, 0, time.UTC).Unix()),
+ InstanceID: InstanceID{
+ SchemeData: 'H',
+ BytesData: [8]byte{0x8c, 0x85, 0x90, 0x5f, 0x44, 0xca, 0x80, 0xd9},
+ },
+ SequenceID: 0,
+ },
+ nil,
+ },
+ {
+ "Resource", []byte("user_000000BPG6Lks9tQoAiJYrBRSXPX6"),
+ ID{
+ Environment: Production,
+ Resource: "user",
+ Timestamp: uint64(time.Date(2018, 4, 5, 16, 53, 42, 0, time.UTC).Unix()),
+ InstanceID: InstanceID{
+ SchemeData: 'H',
+ BytesData: [8]byte{0x8c, 0x85, 0x90, 0x5f, 0x44, 0xca, 0x80, 0xd9},
+ },
+ SequenceID: 0,
+ },
+ nil,
+ },
+ {
+ "ResourceEnvironment", []byte("test_user_000000BPG6Lks9tQoAiJYrBRSXPX6"),
+ ID{
+ Environment: "test",
+ Resource: "user",
+ Timestamp: uint64(time.Date(2018, 4, 5, 16, 53, 42, 0, time.UTC).Unix()),
+ InstanceID: InstanceID{
+ SchemeData: 'H',
+ BytesData: [8]byte{0x8c, 0x85, 0x90, 0x5f, 0x44, 0xca, 0x80, 0xd9},
+ },
+ SequenceID: 0,
+ },
+ nil,
+ },
+ }
+
+ for _, test := range tests {
+ t.Run(test.Name, func(t *testing.T) {
+ is := is.New(t)
+
+ id, err := Parse(string(test.Source))
+ if test.Error == nil {
+ is.NoErr(err)
+ is.Equal(test.ID, id)
+ } else {
+ is.Equal(test.Error, err)
+ }
+ })
+ }
+}
+
+func BenchmarkParse(b *testing.B) {
+ for range b.N {
+ _, _ = Parse("user_000000BPG6Lks9tQoAiJYrBRSXPX6")
+ }
+}
+
+func TestID(t *testing.T) {
+ t.Run("Scan", func(t *testing.T) {
+ tests := []struct {
+ Name string
+ Src any
+
+ ID ID
+ Error error
+ }{
+ {
+ "Bytes", []byte("000000BPG6Lks9tQoAiJYrBRSXPX6"),
+ ID{
+ Environment: Production,
+ Timestamp: uint64(time.Date(2018, 4, 5, 16, 53, 42, 0, time.UTC).Unix()),
+ InstanceID: InstanceID{
+ SchemeData: 'H',
+ BytesData: [8]byte{0x8c, 0x85, 0x90, 0x5f, 0x44, 0xca, 0x80, 0xd9},
+ },
+ SequenceID: 0,
+ },
+ nil,
+ },
+ {
+ "String", "000000BPG6Lks9tQoAiJYrBRSXPX6",
+ ID{
+ Environment: Production,
+ Timestamp: uint64(time.Date(2018, 4, 5, 16, 53, 42, 0, time.UTC).Unix()),
+ InstanceID: InstanceID{
+ SchemeData: 'H',
+ BytesData: [8]byte{0x8c, 0x85, 0x90, 0x5f, 0x44, 0xca, 0x80, 0xd9},
+ },
+ SequenceID: 0,
+ },
+ nil,
+ },
+ {
+ "Unknown", 1234, ID{}, &ParseError{"unsupported scan, must be string or []byte"},
+ },
+ }
+
+ for _, test := range tests {
+ t.Run(test.Name, func(t *testing.T) {
+ is := is.New(t)
+
+ id := ID{}
+ err := id.Scan(test.Src)
+ if test.Error == nil {
+ is.NoErr(err)
+ is.Equal(test.ID, id)
+ } else {
+ is.Equal(test.Error, err)
+ }
+ })
+ }
+ })
+
+ t.Run("UnmarshalJSON", func(t *testing.T) {
+ tests := []struct {
+ Name string
+ Source []byte
+
+ ID ID
+ Error error
+ }{
+ {"Short", []byte(`""`), ID{}, &ParseError{"ksuid too short"}},
+ {"Long", []byte(`"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"`), ID{}, &ParseError{"ksuid too long"}},
+ {"InvalidBase62", []byte(`"AAAAAAAAAAAAAAAAAAAAAAAAA//AA"`), ID{}, &ParseError{"invalid base62: output buffer too short"}},
+ {
+ "Bare", []byte(`"000000BPG6Lks9tQoAiJYrBRSXPX6"`),
+ ID{
+ Environment: Production,
+ Timestamp: uint64(time.Date(2018, 4, 5, 16, 53, 42, 0, time.UTC).Unix()),
+ InstanceID: InstanceID{
+ SchemeData: 'H',
+ BytesData: [8]byte{0x8c, 0x85, 0x90, 0x5f, 0x44, 0xca, 0x80, 0xd9},
+ },
+ SequenceID: 0,
+ },
+ nil,
+ },
+ {
+ "Resource", []byte(`"user_000000BPG6Lks9tQoAiJYrBRSXPX6"`),
+ ID{
+ Resource: "user",
+ Environment: Production,
+ Timestamp: uint64(time.Date(2018, 4, 5, 16, 53, 42, 0, time.UTC).Unix()),
+ InstanceID: InstanceID{
+ SchemeData: 'H',
+ BytesData: [8]byte{0x8c, 0x85, 0x90, 0x5f, 0x44, 0xca, 0x80, 0xd9},
+ },
+ SequenceID: 0,
+ },
+ nil,
+ },
+ {
+ "ResourceEnvironment", []byte(`"test_user_000000BPG6Lks9tQoAiJYrBRSXPX6"`),
+ ID{
+ Resource: "user",
+ Environment: "test",
+ Timestamp: uint64(time.Date(2018, 4, 5, 16, 53, 42, 0, time.UTC).Unix()),
+ InstanceID: InstanceID{
+ SchemeData: 'H',
+ BytesData: [8]byte{0x8c, 0x85, 0x90, 0x5f, 0x44, 0xca, 0x80, 0xd9},
+ },
+ SequenceID: 0,
+ },
+ nil,
+ },
+ }
+
+ for _, test := range tests {
+ t.Run(test.Name, func(t *testing.T) {
+ is := is.New(t)
+
+ id := ID{}
+ err := id.UnmarshalJSON(test.Source)
+ if test.Error == nil {
+ is.NoErr(err)
+ is.Equal(test.ID, id)
+ } else {
+ is.Equal(test.Error, err)
+ }
+ })
+ }
+ })
+
+ t.Run("Bytes", func(t *testing.T) {
+ tests := []struct {
+ Name string
+ ID ID
+
+ Bytes []byte
+ JSON []byte
+ }{
+ {
+ "Bare", ID{
+ Timestamp: uint64(time.Date(2018, 4, 5, 16, 53, 42, 0, time.UTC).Unix()),
+ InstanceID: InstanceID{
+ SchemeData: 'H',
+ BytesData: [8]byte{0x8c, 0x85, 0x90, 0x5f, 0x44, 0xca, 0x80, 0xd9},
+ },
+ SequenceID: 0,
+ }, []byte("000000BPG6Lks9tQoAiJYrBRSXPX6"), []byte(`"000000BPG6Lks9tQoAiJYrBRSXPX6"`),
+ },
+ {
+ "BareEnvironment", ID{
+ Environment: "test",
+ Timestamp: uint64(time.Date(2018, 4, 5, 16, 53, 42, 0, time.UTC).Unix()),
+ InstanceID: InstanceID{
+ SchemeData: 'H',
+ BytesData: [8]byte{0x8c, 0x85, 0x90, 0x5f, 0x44, 0xca, 0x80, 0xd9},
+ },
+ SequenceID: 0,
+ }, []byte("000000BPG6Lks9tQoAiJYrBRSXPX6"), []byte(`"000000BPG6Lks9tQoAiJYrBRSXPX6"`),
+ },
+ {
+ "Resource", ID{
+ Resource: "user",
+ Timestamp: uint64(time.Date(2018, 4, 5, 16, 53, 42, 0, time.UTC).Unix()),
+ InstanceID: InstanceID{
+ SchemeData: 'H',
+ BytesData: [8]byte{0x8c, 0x85, 0x90, 0x5f, 0x44, 0xca, 0x80, 0xd9},
+ },
+ SequenceID: 0,
+ }, []byte("user_000000BPG6Lks9tQoAiJYrBRSXPX6"), []byte(`"user_000000BPG6Lks9tQoAiJYrBRSXPX6"`),
+ },
+ {
+ "ResourceProduction", ID{
+ Environment: Production,
+ Resource: "user",
+ Timestamp: uint64(time.Date(2018, 4, 5, 16, 53, 42, 0, time.UTC).Unix()),
+ InstanceID: InstanceID{
+ SchemeData: 'H',
+ BytesData: [8]byte{0x8c, 0x85, 0x90, 0x5f, 0x44, 0xca, 0x80, 0xd9},
+ },
+ SequenceID: 0,
+ }, []byte("user_000000BPG6Lks9tQoAiJYrBRSXPX6"), []byte(`"user_000000BPG6Lks9tQoAiJYrBRSXPX6"`),
+ },
+ {
+ "ResourceEnvironment", ID{
+ Resource: "user",
+ Environment: "test",
+ Timestamp: uint64(time.Date(2018, 4, 5, 16, 53, 42, 0, time.UTC).Unix()),
+ InstanceID: InstanceID{
+ SchemeData: 'H',
+ BytesData: [8]byte{0x8c, 0x85, 0x90, 0x5f, 0x44, 0xca, 0x80, 0xd9},
+ },
+ SequenceID: 0,
+ }, []byte("test_user_000000BPG6Lks9tQoAiJYrBRSXPX6"), []byte(`"test_user_000000BPG6Lks9tQoAiJYrBRSXPX6"`),
+ },
+ }
+
+ for _, test := range tests {
+ t.Run(test.Name, func(t *testing.T) {
+ is := is.New(t)
+
+ is.Equal(test.Bytes, test.ID.Bytes())
+ is.Equal(string(test.Bytes), test.ID.String())
+
+ value, err := test.ID.Value()
+ is.NoErr(err)
+ is.Equal(test.Bytes, value)
+
+ json, err := test.ID.MarshalJSON()
+ is.NoErr(err)
+ is.Equal(test.JSON, json)
+ })
+ }
+ })
+}
+
+func TestComparable(t *testing.T) {
+ id := Generate(t.Context(), "compare")
+ remade, err := Parse(id.String())
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if id != remade {
+ t.Error("IDs are not equal")
+ }
+
+ id2 := Generate(t.Context(), "compare")
+ if id == id2 {
+ t.Error("IDs are equal!")
+ }
+}
diff --git a/go/internal/libraries/ksuid/instance_id.go b/go/internal/libraries/ksuid/instance_id.go
new file mode 100644
index 00000000..fda805e0
--- /dev/null
+++ b/go/internal/libraries/ksuid/instance_id.go
@@ -0,0 +1,117 @@
+package ksuid
+
+import (
+ "bytes"
+ "context"
+ "crypto/rand"
+ "encoding/binary"
+ "encoding/hex"
+ "errors"
+ "fmt"
+ "net"
+ "os"
+)
+
+var (
+ ErrNoHardwareAddress = errors.New("no hardware address")
+ ErrNotDockerized = errors.New("not dockerized")
+)
+
+type InstanceID struct {
+ SchemeData byte
+ BytesData [8]byte
+}
+
+func (i InstanceID) Scheme() byte {
+ return i.SchemeData
+}
+
+func (i InstanceID) Bytes() [8]byte {
+ return i.BytesData
+}
+
+// NewHardwareID returns a HardwareID for the current node.
+func NewHardwareID(ctx context.Context) (InstanceID, error) {
+ hwAddr, err := getHardwareAddr(ctx)
+ if err != nil {
+ return InstanceID{}, err
+ }
+
+ //nolint:gosec // we're intentionally truncating to 16 bits
+ processID := uint16(os.Getpid() & 0xFFFF)
+
+ var bytes [8]byte
+ copy(bytes[:], hwAddr)
+ binary.BigEndian.PutUint16(bytes[6:], processID)
+
+ return InstanceID{
+ SchemeData: 'H',
+ BytesData: bytes,
+ }, nil
+}
+
+func getHardwareAddr(ctx context.Context) (net.HardwareAddr, error) {
+ addrs, err := net.Interfaces()
+ if err != nil {
+ return nil, err
+ }
+
+ for _, addr := range addrs {
+ // only return physical interfaces (i.e. not loopback)
+ if len(addr.HardwareAddr) >= 6 {
+ return addr.HardwareAddr, nil
+ }
+ }
+
+ return nil, fmt.Errorf("%w: %w", ErrNoHardwareAddress, err)
+}
+
+// NewDockerID returns a DockerID for the current Docker container.
+func NewDockerID(ctx context.Context) (InstanceID, error) {
+ cid, err := getDockerID(ctx)
+ if err != nil {
+ return InstanceID{}, err
+ }
+
+ var b [8]byte
+ copy(b[:], cid)
+
+ return InstanceID{
+ SchemeData: 'D',
+ BytesData: b,
+ }, nil
+}
+
+func getDockerID(ctx context.Context) ([]byte, error) {
+ src, err := os.ReadFile("/proc/1/cpuset")
+ src = bytes.TrimSpace(src)
+ if os.IsNotExist(err) || len(src) < 64 || !bytes.HasPrefix(src, []byte("/docker")) {
+ return nil, fmt.Errorf("%w: %w", ErrNotDockerized, err)
+ } else if err != nil {
+ return nil, err
+ }
+
+ dst := make([]byte, 32)
+ _, err = hex.Decode(dst, src[len(src)-64:])
+ if err != nil {
+ return nil, err
+ }
+
+ return dst, nil
+}
+
+// NewRandomID returns a RandomID initialized by a PRNG.
+func NewRandomID() InstanceID {
+ tmp := make([]byte, 8)
+ if _, err := rand.Read(tmp); err != nil {
+ panic(err)
+ }
+
+ var b [8]byte
+ copy(b[:], tmp)
+
+ return InstanceID{
+ SchemeData: 'R',
+ BytesData: b,
+ }
+}
diff --git a/go/internal/libraries/ksuid/node.go b/go/internal/libraries/ksuid/node.go
new file mode 100644
index 00000000..4b4c38a1
--- /dev/null
+++ b/go/internal/libraries/ksuid/node.go
@@ -0,0 +1,83 @@
+package ksuid
+
+import (
+ "context"
+ "fmt"
+ "strings"
+ "sync"
+ "time"
+)
+
+// Production is the internal name for production ksuid, but is omitted
+// during marshaling.
+const Production = "prod"
+
+var exportedNode = makeNode(context.Background(), Production)
+
+func makeNode(ctx context.Context, environment string) *Node {
+ if iid, err := NewDockerID(ctx); err == nil {
+ return NewNode(environment, iid)
+ }
+
+ if iid, err := NewHardwareID(ctx); err == nil {
+ return NewNode(environment, iid)
+ }
+
+ return NewNode(environment, NewRandomID())
+}
+
+// Node contains metadata used for ksuid generation for a specific machine.
+type Node struct {
+ InstanceID InstanceID
+
+ timestamp uint64
+ sequence uint32
+ sequenceMu sync.Mutex
+}
+
+// NewNode returns a ID generator for the current machine.
+func NewNode(environment string, instanceID InstanceID) *Node {
+ return &Node{
+ InstanceID: instanceID,
+ }
+}
+
+// Generate returns a new ID for the machine and resource configured.
+func (n *Node) Generate(ctx context.Context, resource string) (id ID) {
+ if strings.ContainsRune(resource, '_') {
+ panic(fmt.Errorf("ksuid resource contains underscore: %s", resource))
+ }
+
+ id.Environment = Production
+ id.Resource = resource
+ id.InstanceID = n.InstanceID
+
+ n.sequenceMu.Lock()
+
+ //nolint:gosec // this is a problem before 1970, but we live in the future
+ timestamp := uint64(time.Now().UTC().Unix())
+ if (timestamp - n.timestamp) >= 1 {
+ n.timestamp = timestamp
+ n.sequence = 0
+ } else {
+ n.sequence++
+ }
+
+ id.Timestamp = timestamp
+ id.SequenceID = n.sequence
+
+ n.sequenceMu.Unlock()
+
+ return id
+}
+
+// SetInstanceID overrides the default instance id in the exported node.
+// This will effect all invocations of the Generate function.
+func SetInstanceID(instanceID InstanceID) {
+ exportedNode.InstanceID = instanceID
+}
+
+// Generate returns a new ID for the current machine and resource configured.
+func Generate(ctx context.Context, resource string) ID {
+ return exportedNode.Generate(ctx, resource)
+}
diff --git a/go/internal/libraries/ksuid/node_test.go b/go/internal/libraries/ksuid/node_test.go
new file mode 100644
index 00000000..4f288738
--- /dev/null
+++ b/go/internal/libraries/ksuid/node_test.go
@@ -0,0 +1,11 @@
+package ksuid
+
+import (
+ "testing"
+)
+
+func BenchmarkGenerate(b *testing.B) {
+ for b.Loop() {
+ Generate(b.Context(), "user")
+ }
+}
diff --git a/go/internal/libraries/ptr/ptr.go b/go/internal/libraries/ptr/ptr.go
new file mode 100644
index 00000000..005e5c5d
--- /dev/null
+++ b/go/internal/libraries/ptr/ptr.go
@@ -0,0 +1,38 @@
+package ptr
+
+// Ptr returns a pointer to the given value.
+func Ptr[T any](v T) *T {
+ return &v
+}
+
+// ValueOrNil returns the value of the pointer if it is not nil, otherwise it returns the
+// zero value of the type.
+func ValueOrNil[T any](v *T) T {
+ if v == nil {
+ var zero T
+ return zero
+ }
+
+ return *v
+}
+
+// ValueOrZero returns the value of the pointer if it is not nil, otherwise it returns
+// the zero value of the type.
+func ValueOrZero[T any](v *T) T {
+ if v == nil {
+ var zero T
+ return zero
+ }
+
+ return *v
+}
+
+// ValueOrDefault returns the value of the pointer if it is not nil, otherwise it returns
+// the zero value of the type.
+func ValueOrDefault[T any](v *T, defaultValue T) T {
+ if v == nil {
+ return defaultValue
+ }
+
+ return *v
+}
diff --git a/go/internal/libraries/ptr/ptr_test.go b/go/internal/libraries/ptr/ptr_test.go
new file mode 100644
index 00000000..b91cf352
--- /dev/null
+++ b/go/internal/libraries/ptr/ptr_test.go
@@ -0,0 +1,109 @@
+package ptr
+
+import (
+ "testing"
+
+ "github.com/matryer/is"
+)
+
+func TestPtr(t *testing.T) {
+ is := is.New(t)
+ t.Run("int", func(t *testing.T) {
+ v := 42
+ p := Ptr(v)
+ is.True(p != nil)
+ is.Equal(*p, v)
+ })
+ t.Run("string", func(t *testing.T) {
+ v := "hello"
+ p := Ptr(v)
+ is.True(p != nil)
+ is.Equal(*p, v)
+ })
+ t.Run("struct", func(t *testing.T) {
+ type S struct{ X int }
+ v := S{X: 7}
+ p := Ptr(v)
+ is.True(p != nil)
+ is.Equal(*p, v)
+ })
+}
+
+func TestValueOrNil(t *testing.T) {
+ is := is.New(t)
+ t.Run("int", func(t *testing.T) {
+ var p *int
+ is.Equal(ValueOrNil(p), 0)
+ v := 5
+ p = &v
+ is.Equal(ValueOrNil(p), v)
+ })
+ t.Run("string", func(t *testing.T) {
+ var p *string
+ is.Equal(ValueOrNil(p), "")
+ v := "foo"
+ p = &v
+ is.Equal(ValueOrNil(p), v)
+ })
+ // struct
+ t.Run("struct", func(t *testing.T) {
+ type S struct{ X int }
+ var p *S
+ is.Equal(ValueOrNil(p), S{})
+ v := S{X: 9}
+ p = &v
+ is.Equal(ValueOrNil(p), v)
+ })
+}
+
+func TestValueOrZero(t *testing.T) {
+ is := is.New(t)
+ t.Run("int", func(t *testing.T) {
+ var p *int
+ is.Equal(ValueOrZero(p), 0)
+ v := 8
+ p = &v
+ is.Equal(ValueOrZero(p), v)
+ })
+ t.Run("string", func(t *testing.T) {
+ var p *string
+ is.Equal(ValueOrZero(p), "")
+ v := "bar"
+ p = &v
+ is.Equal(ValueOrZero(p), v)
+ })
+ t.Run("struct", func(t *testing.T) {
+ type S struct{ X int }
+ var p *S
+ is.Equal(ValueOrZero(p), S{})
+ v := S{X: 3}
+ p = &v
+ is.Equal(ValueOrZero(p), v)
+ })
+}
+
+func TestValueOrDefault(t *testing.T) {
+ is := is.New(t)
+ t.Run("int", func(t *testing.T) {
+ var p *int
+ is.Equal(ValueOrDefault(p, 0), 0)
+ v := 11
+ p = &v
+ is.Equal(ValueOrDefault(p, 0), v)
+ })
+ t.Run("string", func(t *testing.T) {
+ var p *string
+ is.Equal(ValueOrDefault(p, ""), "")
+ v := "baz"
+ p = &v
+ is.Equal(ValueOrDefault(p, ""), v)
+ })
+ t.Run("struct", func(t *testing.T) {
+ type S struct{ X int }
+ var p *S
+ is.Equal(ValueOrDefault(p, S{}), S{})
+ v := S{X: 4}
+ p = &v
+ is.Equal(ValueOrDefault(p, S{}), v)
+ })
+}
diff --git a/go/runner.go b/go/runner.go
new file mode 100644
index 00000000..b4fedf4a
--- /dev/null
+++ b/go/runner.go
@@ -0,0 +1,72 @@
+package scenario
+
+import (
+ "context"
+ "errors"
+ "fmt"
+)
+
+var (
+ ErrScenarioNameRequired = errors.New("a scenario name is required")
+ ErrScenarioDescriptionRequired = errors.New("a scenario description is required")
+ ErrMaxTurnsMustBePositive = errors.New("the maximum number of turns must be positive")
+ ErrNoAgentsSpecified = errors.New("no agents specified")
+ ErrNoAgentInAgentsWithAgentRole = errors.New("no agent was provided in the agents slice with the role agent")
+ ErrAgentWithInvalidRole = errors.New("agent with invalid role")
+)
+
+func Run(ctx context.Context, cfg ScenarioConfig) (*ScenarioResult, error) {
+ if cfg.Name == "" {
+ return nil, ErrScenarioNameRequired
+ }
+ if cfg.Description == "" {
+ return nil, ErrScenarioDescriptionRequired
+ }
+ if cfg.MaxTurns < 0 {
+ return nil, ErrMaxTurnsMustBePositive
+ }
+ if cfg.MaxTurns == 0 {
+ cfg.MaxTurns = 10 // Default
+ }
+ if len(cfg.Agents) == 0 {
+ return nil, ErrNoAgentsSpecified
+ }
+ if err := validateAgentsSlice(cfg.Agents); err != nil {
+ return nil, err
+ }
+
+ if cfg.ThreadID == "" {
+ cfg.ThreadID = generateThreadID(ctx)
+ }
+
+ if len(cfg.Script) == 0 {
+ cfg.Script = []ScriptStep{
+ Proceed(),
+ }
+ }
+
+ return nil, nil
+}
+
+func validateAgentsSlice(agents []AgentAdapter) error {
+ foundAgent := false
+ for _, a := range agents {
+ if a.Role() == AgentRoleAgent {
+ foundAgent = true
+ break
+ }
+ }
+ if !foundAgent {
+ return ErrNoAgentInAgentsWithAgentRole
+ }
+
+ for i, a := range agents {
+ role := a.Role()
+
+ if role != AgentRoleAgent && role != AgentRoleJudge && role != AgentRoleUser {
+ return fmt.Errorf("%v: index:%d given:%s", ErrAgentWithInvalidRole, i, role)
+ }
+ }
+
+ return nil
+}
diff --git a/go/script.go b/go/script.go
new file mode 100644
index 00000000..70fe9f9d
--- /dev/null
+++ b/go/script.go
@@ -0,0 +1,68 @@
+package scenario
+
+import (
+ "context"
+
+ "github.com/openai/openai-go"
+)
+
+func Message(message openai.ChatCompletionMessageParamUnion) ScriptStep {
+ return func(ctx context.Context, execution Execution, state ExecutionState) (*ScenarioResult, error) {
+ return nil, execution.Message(ctx, message)
+ }
+}
+
+func UserString(content string) ScriptStep {
+ return func(ctx context.Context, execution Execution, state ExecutionState) (*ScenarioResult, error) {
+ return nil, execution.UserString(ctx, content)
+ }
+}
+
+func UserMessage(message openai.ChatCompletionUserMessageParam) ScriptStep {
+ return func(ctx context.Context, execution Execution, state ExecutionState) (*ScenarioResult, error) {
+ return nil, execution.UserMessage(ctx, message)
+ }
+}
+
+func AgentString(content string) ScriptStep {
+ return func(ctx context.Context, execution Execution, state ExecutionState) (*ScenarioResult, error) {
+ return nil, execution.AgentString(ctx, content)
+ }
+}
+
+func AgentMessage(message openai.ChatCompletionAssistantMessageParam) ScriptStep {
+ return func(ctx context.Context, execution Execution, state ExecutionState) (*ScenarioResult, error) {
+ return nil, execution.AgentMessage(ctx, message)
+ }
+}
+
+func JudgeString(content string) ScriptStep {
+ return func(ctx context.Context, execution Execution, state ExecutionState) (*ScenarioResult, error) {
+ return execution.JudgeString(ctx, content)
+ }
+}
+
+func JudgeMessage(message openai.ChatCompletionMessageParamUnion) ScriptStep {
+ return func(ctx context.Context, execution Execution, state ExecutionState) (*ScenarioResult, error) {
+ return execution.JudgeMessage(ctx, message)
+ }
+}
+
+// Proceed now uses an options pattern: Proceed(WithProceedTurns(n)), etc.
+func Proceed(opts ...ProceedOption) ScriptStep {
+ return func(ctx context.Context, execution Execution, state ExecutionState) (*ScenarioResult, error) {
+ return execution.Proceed(ctx, opts...)
+ }
+}
+
+func Succeed(reasoning string) ScriptStep {
+ return func(ctx context.Context, execution Execution, state ExecutionState) (*ScenarioResult, error) {
+ return execution.Succeed(ctx, reasoning)
+ }
+}
+
+func Fail(reasoning string) ScriptStep {
+ return func(ctx context.Context, execution Execution, state ExecutionState) (*ScenarioResult, error) {
+ return execution.Fail(ctx, reasoning)
+ }
+}
diff --git a/go/utils.go b/go/utils.go
new file mode 100644
index 00000000..19bfde9a
--- /dev/null
+++ b/go/utils.go
@@ -0,0 +1,29 @@
+package scenario
+
+import (
+ "strings"
+ "unicode"
+)
+
+func criterionNameToParamName(criterion string) string {
+ // Remove all double quotes
+ criterion = strings.ReplaceAll(criterion, "\"", "")
+
+ // Replace all non-alphanumeric characters with underscores, and convert to lowercase as we go
+ var result strings.Builder
+ for _, r := range criterion {
+ if unicode.IsLetter(r) || unicode.IsDigit(r) {
+ result.WriteRune(unicode.ToLower(r))
+ } else {
+ result.WriteRune('_')
+ }
+ }
+ param := result.String()
+
+ // Truncate to 70 characters
+ if len(param) > 70 {
+ param = param[:70]
+ }
+
+ return param
+}