From 718066c8b1fb2243574ddf8cabb522345687080a Mon Sep 17 00:00:00 2001 From: Andrew Truong Date: Wed, 12 Mar 2025 13:53:44 -0700 Subject: [PATCH 01/15] test --- .../openai_agents/openai_agents.py | 470 ++++++++++++++++++ 1 file changed, 470 insertions(+) create mode 100644 weave/integrations/openai_agents/openai_agents.py diff --git a/weave/integrations/openai_agents/openai_agents.py b/weave/integrations/openai_agents/openai_agents.py new file mode 100644 index 000000000000..31d51abf5f68 --- /dev/null +++ b/weave/integrations/openai_agents/openai_agents.py @@ -0,0 +1,470 @@ +""" +A Weave integration for OpenAI Agents. + +This module provides a TracingProcessor implementation that logs OpenAI +Agent traces and spans to Weave. +""" + +from __future__ import annotations + +from typing import Any, TypedDict + +from agents import tracing +from agents.tracing import add_trace_processor + +import weave +from weave.trace.context import call_context +from weave.trace.context.weave_client_context import ( + require_weave_client, +) + + +def _call_type(span: tracing.Span[Any]) -> str: + """Determine the appropriate call type for a given OpenAI Agent span.""" + return span.span_data.type or "task" + + +def _call_name(span: tracing.Span[Any]) -> str: + """Determine the name for a given OpenAI Agent span.""" + if name := getattr(span.span_data, "name", None): + return name + elif isinstance(span.span_data, tracing.GenerationSpanData): + return "Generation" + elif isinstance(span.span_data, tracing.ResponseSpanData): + return "Response" + elif isinstance(span.span_data, tracing.HandoffSpanData): + return "Handoff" + else: + return "Unknown" + + +class WeaveDataFormat(TypedDict): + inputs: dict[str, Any] + outputs: dict[str, Any] + metadata: dict[str, Any] + metrics: dict[str, Any] + error: dict[str, Any] | None + + +class WeaveTracingProcessor(tracing.TracingProcessor): + """ + A TracingProcessor implementation that logs OpenAI Agent traces and spans to Weave. + + This processor captures different types of spans from OpenAI Agents (agent execution, + function calls, LLM generations, etc.) and logs them to Weave as structured trace data. + Child spans are logged as separate calls but not redundantly included in the parent trace data. + + Args: + parent_call: Optional Weave call to use as the parent for all traces. + If None, the current call from the context will be used. + """ + + def __init__(self, parent_call: call_context.Call | None = None): + self._parent_call = parent_call + self._trace_data: dict[str, dict[str, Any]] = {} + self._trace_calls: dict[str, call_context.Call] = {} + self._span_calls: dict[str, call_context.Call] = {} + self._ended_traces: set[str] = set() + self._span_parents: dict[str, str] = {} + + def on_trace_start(self, trace: tracing.Trace) -> None: + """Called when a trace starts.""" + # Set up basic trace data + self._trace_data[trace.trace_id] = { + "name": trace.name, + "type": "task", + "metrics": {}, + "metadata": {}, + } + + # Create a call for this trace + parent = self._parent_call or call_context.get_current_call() + wc = require_weave_client() + trace_call = wc.create_call( + op="openai_agent_trace", + inputs={"name": trace.name}, + parent=parent, + attributes={"type": "task", "agent_trace_id": trace.trace_id}, + display_name=trace.name, + ) + self._trace_calls[trace.trace_id] = trace_call + + def on_trace_end(self, trace: tracing.Trace) -> None: + """Called when a trace ends.""" + tid = trace.trace_id + + if tid not in self._trace_data: + return + + if tid not in self._trace_calls: + return + + trace_data = self._trace_data[tid] + self._ended_traces.add(tid) + + # Finish the trace call + output = { + "status": "completed", + "metrics": trace_data.get("metrics", {}), + "metadata": trace_data.get("metadata", {}), + } + wc = require_weave_client() + wc.finish_call(self._trace_calls[tid], output=output) + + def _agent_log_data( + self, span: tracing.Span[tracing.AgentSpanData] + ) -> WeaveDataFormat: + """Extract log data from an agent span.""" + return WeaveDataFormat( + inputs={}, + outputs={}, + metadata={ + "tools": span.span_data.tools, + "handoffs": span.span_data.handoffs, + "output_type": span.span_data.output_type, + }, + metrics={}, + error=None, + ) + + def _response_log_data( + self, span: tracing.Span[tracing.ResponseSpanData] + ) -> WeaveDataFormat: + """Extract log data from a response span.""" + inputs = {} + outputs = {} + metadata = {} + metrics = {} + + # Add input if available + if span.span_data.input is not None: + inputs["input"] = span.span_data.input + + # Extract output and other details from response + if span.span_data.response is not None: + # Just get the plain output value + outputs["output"] = span.span_data.response.output + + # All other data goes into metadata + metadata = span.span_data.response.metadata or {} + + # Add all other response fields to metadata + additional_fields = span.span_data.response.model_dump( + exclude={"input", "output", "metadata", "usage"} + ) + metadata.update(additional_fields) + + # Add usage data to metrics if available + if span.span_data.response.usage is not None: + usage = span.span_data.response.usage + metrics = { + "tokens": usage.total_tokens, + "prompt_tokens": usage.input_tokens, + "completion_tokens": usage.output_tokens, + } + + return WeaveDataFormat( + inputs=inputs, + outputs=outputs, + metadata=metadata, + metrics=metrics, + error=None, + ) + + def _function_log_data( + self, span: tracing.Span[tracing.FunctionSpanData] + ) -> WeaveDataFormat: + """Extract log data from a function span.""" + return WeaveDataFormat( + inputs={"input": span.span_data.input}, + outputs={"output": span.span_data.output}, + metadata={}, + metrics={}, + error=None, + ) + + def _handoff_log_data( + self, span: tracing.Span[tracing.HandoffSpanData] + ) -> WeaveDataFormat: + """Extract log data from a handoff span.""" + return WeaveDataFormat( + inputs={}, + outputs={}, + metadata={ + "from_agent": span.span_data.from_agent, + "to_agent": span.span_data.to_agent, + }, + metrics={}, + error=None, + ) + + def _guardrail_log_data( + self, span: tracing.Span[tracing.GuardrailSpanData] + ) -> WeaveDataFormat: + """Extract log data from a guardrail span.""" + return WeaveDataFormat( + inputs={}, + outputs={}, + metadata={"triggered": span.span_data.triggered}, + metrics={}, + error=None, + ) + + def _generation_log_data( + self, span: tracing.Span[tracing.GenerationSpanData] + ) -> WeaveDataFormat: + """Extract log data from a generation span.""" + return WeaveDataFormat( + inputs={"input": span.span_data.input}, + outputs={"output": span.span_data.output}, + metadata={ + "model": span.span_data.model, + "model_config": span.span_data.model_config, + }, + metrics={ + "tokens": span.data_usage.usage.get("total_tokens"), + "prompt_tokens": span.data_usage.usage.get("prompt_tokens"), + "completion_tokens": span.data_usage.usage.get("completion_tokens"), + }, + error=None, + ) + + def _custom_log_data( + self, span: tracing.Span[tracing.CustomSpanData] + ) -> WeaveDataFormat: + """Extract log data from a custom span.""" + # Prepare fields + inputs = {} + outputs = {} + metadata = {} + metrics = {} + + # Extract data from the custom span + custom_data = span.span_data.data + + # Map custom data to the appropriate fields if possible + if "input" in custom_data: + inputs["input"] = custom_data["input"] + + if "output" in custom_data: + outputs["output"] = custom_data["output"] + + if "metadata" in custom_data: + metadata = custom_data["metadata"] + + if "metrics" in custom_data: + metrics = custom_data["metrics"] + + # Add any remaining fields to metadata + for key, value in custom_data.items(): + if key not in ["input", "output", "metadata", "metrics"]: + metadata[key] = value + + return WeaveDataFormat( + inputs=inputs, + outputs=outputs, + metadata=metadata, + metrics=metrics, + error=None, + ) + + def _log_data(self, span: tracing.Span[Any]) -> WeaveDataFormat: + """Extract the appropriate log data based on the span type.""" + if isinstance(span.span_data, tracing.AgentSpanData): + return self._agent_log_data(span) + elif isinstance(span.span_data, tracing.ResponseSpanData): + return self._response_log_data(span) + elif isinstance(span.span_data, tracing.FunctionSpanData): + return self._function_log_data(span) + elif isinstance(span.span_data, tracing.HandoffSpanData): + return self._handoff_log_data(span) + elif isinstance(span.span_data, tracing.GuardrailSpanData): + return self._guardrail_log_data(span) + elif isinstance(span.span_data, tracing.GenerationSpanData): + return self._generation_log_data(span) + elif isinstance(span.span_data, tracing.CustomSpanData): + return self._custom_log_data(span) + else: + return WeaveDataFormat( + inputs={}, + outputs={}, + metadata={}, + metrics={}, + error=None, + ) + + def _get_parent_call( + self, span: tracing.Span[Any] + ) -> weave.trace.context.call_context.Call | None: + """Helper method to determine the parent call for a span.""" + trace_id = span.trace_id + parent_span_id = getattr(span, "parent_id", None) + + # Child span + if call := self._span_calls.get(parent_span_id): + return call + + # Trace root + if call := self._trace_calls.get(trace_id): + return call + + # Should not reach here, but kept for completeness + return None + + def on_span_start(self, span: tracing.Span[Any]) -> None: + """Called when a span starts.""" + # For Response spans, we'll defer call creation until on_span_end when we have input data + if isinstance(span.span_data, tracing.ResponseSpanData): + return + + # Spans must have a parent (either another span or the trace root) + if not self._get_parent_call(span): + return + + # Spans must be part of a trace + tid = span.trace_id + if tid not in self._trace_data: + return + + span_name = _call_name(span) + span_type = _call_type(span) + parent_call = self._get_parent_call(span) + + wc = require_weave_client() + span_call = wc.create_call( + op=f"openai_agent_{span_type}", + inputs={"name": span_name}, + parent=parent_call, + attributes={ + "type": span_type, + "agent_span_id": span.span_id, + "agent_trace_id": tid, + "parent_span_id": getattr(span, "parent_id", None), + }, + display_name=span_name, + ) + self._span_calls[span.span_id] = span_call + + def on_span_end(self, span: tracing.Span[Any]) -> None: + """Called when a span ends.""" + trace_id = span.trace_id + span_name = _call_name(span) + span_type = _call_type(span) + log_data = self._log_data(span) + + # For Response spans, create the call here so we can include input data + if ( + isinstance(span.span_data, tracing.ResponseSpanData) + and span.span_id not in self._span_calls + and trace_id in self._trace_data + and (parent_call := self._get_parent_call(span)) + ): + # Create attributes + attributes = { + "type": span_type, + "agent_span_id": span.span_id, + "agent_trace_id": trace_id, + } + + # Add parent span ID if present + if pid := getattr(span, "parent_id", None): + attributes["parent_span_id"] = pid + + # Create inputs with both name and input data if available + inputs = { + "name": span_name, + "input": log_data["inputs"].get("input"), + } + + # Create the call now that we have the input data + wc = require_weave_client() + span_call = wc.create_call( + op=f"openai_agent_{span_type}", + inputs=inputs, + parent=parent_call, + attributes=attributes, + display_name=span_name, + ) + self._span_calls[span.span_id] = span_call + + # If this span has a call, finish it + if (span_call := self._span_calls.get(span.span_id)) is None: + return + + output = { + "status": "error" if span.error else "completed", + "type": span_type, + "name": span_name, + "parent_span_id": getattr(span, "parent_id", None), + "output": log_data["outputs"].get("output"), + "metrics": log_data["metrics"], + "metadata": log_data["metadata"], + "error": log_data["error"], + } + + # Add error if present + if span.error: + output["error"] = span.error + elif log_data["error"]: + output["error"] = log_data["error"] + + # Finish the call with the collected data + wc = require_weave_client() + wc.finish_call(span_call, output=output) + + def _finish_unfinished_calls(self, status: str) -> None: + """Helper method for finishing unfinished calls on shutdown or flush.""" + wc = require_weave_client() + # Finish any unfinished trace calls + for trace_id, trace_data in self._trace_data.items(): + if trace_id in self._trace_calls: + trace_call = self._trace_calls[trace_id] + + # Check if call is already finished + if not getattr(trace_call, "ended_at", None): + # Set status based on whether it ended normally + actual_status = ( + "completed" if trace_id in self._ended_traces else status + ) + + # Prepare output with the basic trace data + output = { + "status": actual_status, + "metrics": trace_data.get("metrics", {}), + "metadata": trace_data.get("metadata", {}), + } + wc.finish_call(trace_call, output=output) + + # Also finish any unfinished span calls + for span_call in self._span_calls.values(): + if not getattr(span_call, "ended_at", None): + wc.finish_call(span_call, output={"status": status}) + + def shutdown(self) -> None: + """Called when the application stops.""" + self._finish_unfinished_calls("interrupted") + + def force_flush(self) -> None: + """Forces an immediate flush of all queued traces.""" + self._finish_unfinished_calls("force_flushed") + + +def install( + parent_call: weave.trace.context.call_context.Call | None = None, +) -> WeaveTracingProcessor: + """ + Install the Weave tracing processor for OpenAI Agents. + + This function creates a WeaveTracingProcessor and registers it with + the OpenAI Agents tracing system. + + Args: + parent_call: Optional Weave call to use as the parent for all traces. + If None, the current call from the context will be used. + + Returns: + The installed WeaveTracingProcessor instance. + """ + processor = WeaveTracingProcessor(parent_call) + add_trace_processor(processor) + return processor From 50e7b9bb2d9706384cafb4d9365955b9912982b1 Mon Sep 17 00:00:00 2001 From: andrewtruong Date: Mon, 17 Mar 2025 10:51:19 -0400 Subject: [PATCH 02/15] test --- .../openai_agents/openai_agents.py | 81 +++++++++++++++---- weave/trace/autopatch.py | 11 +++ 2 files changed, 75 insertions(+), 17 deletions(-) diff --git a/weave/integrations/openai_agents/openai_agents.py b/weave/integrations/openai_agents/openai_agents.py index 31d51abf5f68..bb474aaec9c7 100644 --- a/weave/integrations/openai_agents/openai_agents.py +++ b/weave/integrations/openai_agents/openai_agents.py @@ -13,11 +13,15 @@ from agents.tracing import add_trace_processor import weave +from weave.integrations.patcher import NoOpPatcher, Patcher +from weave.trace.autopatch import IntegrationSettings from weave.trace.context import call_context from weave.trace.context.weave_client_context import ( require_weave_client, ) +_openai_agents_patcher: OpenAIAgentsPatcher | None = None + def _call_type(span: tracing.Span[Any]) -> str: """Determine the appropriate call type for a given OpenAI Agent span.""" @@ -133,8 +137,8 @@ def _response_log_data( """Extract log data from a response span.""" inputs = {} outputs = {} - metadata = {} - metrics = {} + metadata: dict[str, Any] = {} + metrics: dict[str, Any] = {} # Add input if available if span.span_data.input is not None: @@ -236,8 +240,8 @@ def _custom_log_data( # Prepare fields inputs = {} outputs = {} - metadata = {} - metrics = {} + metadata: dict[str, Any] = {} + metrics: dict[str, Any] = {} # Extract data from the custom span custom_data = span.span_data.data @@ -301,7 +305,9 @@ def _get_parent_call( parent_span_id = getattr(span, "parent_id", None) # Child span - if call := self._span_calls.get(parent_span_id): + if parent_span_id is not None and ( + call := self._span_calls.get(parent_span_id) + ): return call # Trace root @@ -449,22 +455,63 @@ def force_flush(self) -> None: self._finish_unfinished_calls("force_flushed") -def install( - parent_call: weave.trace.context.call_context.Call | None = None, -) -> WeaveTracingProcessor: +class OpenAIAgentsPatcher(Patcher): + """ + A patcher for OpenAI Agents that manages the lifecycle of a WeaveTracingProcessor. + + Unlike other patchers that modify function behavior, this patcher installs and + removes a processor from the OpenAI Agents tracing system. """ - Install the Weave tracing processor for OpenAI Agents. - This function creates a WeaveTracingProcessor and registers it with - the OpenAI Agents tracing system. + def __init__(self, settings: IntegrationSettings) -> None: + self.settings = settings + self.processor: WeaveTracingProcessor | None = None + self.patched = False + self.parent_call: weave.trace.context.call_context.Call | None = None + + def attempt_patch(self) -> bool: + """Install a WeaveTracingProcessor in the OpenAI Agents tracing system.""" + if self.patched: + return True + + try: + self.processor = WeaveTracingProcessor(self.parent_call) + add_trace_processor(self.processor) + self.patched = True + except Exception as e: + self.processor = None + return False + else: + return True + + def undo_patch(self) -> bool: + # OpenAI Agents doesn't have a way to de-register a processor yet... + return True + + +def get_openai_agents_patcher( + settings: IntegrationSettings | None = None, +) -> OpenAIAgentsPatcher | NoOpPatcher: + """ + Get a patcher for OpenAI Agents integration. Args: - parent_call: Optional Weave call to use as the parent for all traces. - If None, the current call from the context will be used. + settings: Optional integration settings to configure the patcher. + If None, default settings will be used. Returns: - The installed WeaveTracingProcessor instance. + A patcher that can be used to patch and unpatch the OpenAI Agents integration. """ - processor = WeaveTracingProcessor(parent_call) - add_trace_processor(processor) - return processor + if settings is None: + settings = IntegrationSettings() + + if not settings.enabled: + return NoOpPatcher() + + global _openai_agents_patcher + if _openai_agents_patcher is not None: + return _openai_agents_patcher + + _openai_agents_patcher = OpenAIAgentsPatcher(settings) + + return _openai_agents_patcher diff --git a/weave/trace/autopatch.py b/weave/trace/autopatch.py index a3cf3f77e1ff..5137bd340c71 100644 --- a/weave/trace/autopatch.py +++ b/weave/trace/autopatch.py @@ -10,6 +10,8 @@ from weave.trace.weave_client import Call +print("HELLO") + class OpSettings(BaseModel): """Op settings for a specific integration. @@ -47,6 +49,7 @@ class AutopatchSettings(BaseModel): mistral: IntegrationSettings = Field(default_factory=IntegrationSettings) notdiamond: IntegrationSettings = Field(default_factory=IntegrationSettings) openai: IntegrationSettings = Field(default_factory=IntegrationSettings) + openai_agents: IntegrationSettings = Field(default_factory=IntegrationSettings) vertexai: IntegrationSettings = Field(default_factory=IntegrationSettings) chatnvidia: IntegrationSettings = Field(default_factory=IntegrationSettings) @@ -58,6 +61,8 @@ def autopatch(settings: Optional[AutopatchSettings] = None) -> None: if settings.disable_autopatch: return + print("inside autopatch") + from weave.integrations.anthropic.anthropic_sdk import get_anthropic_patcher from weave.integrations.cerebras.cerebras_sdk import get_cerebras_patcher from weave.integrations.cohere.cohere_sdk import get_cohere_patcher @@ -79,8 +84,11 @@ def autopatch(settings: Optional[AutopatchSettings] = None) -> None: from weave.integrations.mistral import get_mistral_patcher from weave.integrations.notdiamond.tracing import get_notdiamond_patcher from weave.integrations.openai.openai_sdk import get_openai_patcher + from weave.integrations.openai_agents.openai_agents import get_openai_agents_patcher from weave.integrations.vertexai.vertexai_sdk import get_vertexai_patcher + print("Hello from patcher") + get_openai_patcher(settings.openai).attempt_patch() get_mistral_patcher(settings.mistral).attempt_patch() get_litellm_patcher(settings.litellm).attempt_patch() @@ -95,6 +103,7 @@ def autopatch(settings: Optional[AutopatchSettings] = None) -> None: get_vertexai_patcher(settings.vertexai).attempt_patch() get_nvidia_ai_patcher(settings.chatnvidia).attempt_patch() get_huggingface_patcher(settings.huggingface).attempt_patch() + get_openai_agents_patcher(settings.openai_agents).attempt_patch() llamaindex_patcher.attempt_patch() langchain_patcher.attempt_patch() @@ -122,6 +131,7 @@ def reset_autopatch() -> None: from weave.integrations.mistral import get_mistral_patcher from weave.integrations.notdiamond.tracing import get_notdiamond_patcher from weave.integrations.openai.openai_sdk import get_openai_patcher + from weave.integrations.openai_agents.openai_agents import get_openai_agents_patcher from weave.integrations.vertexai.vertexai_sdk import get_vertexai_patcher get_openai_patcher().undo_patch() @@ -138,6 +148,7 @@ def reset_autopatch() -> None: get_vertexai_patcher().undo_patch() get_nvidia_ai_patcher().undo_patch() get_huggingface_patcher().undo_patch() + get_openai_agents_patcher().undo_patch() llamaindex_patcher.undo_patch() langchain_patcher.undo_patch() From b188b06e988c55075b934428cf4b7ffd8c10fe8f Mon Sep 17 00:00:00 2001 From: andrewtruong Date: Mon, 17 Mar 2025 10:54:46 -0400 Subject: [PATCH 03/15] test --- weave/trace/autopatch.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/weave/trace/autopatch.py b/weave/trace/autopatch.py index 5137bd340c71..88d1ac98daf7 100644 --- a/weave/trace/autopatch.py +++ b/weave/trace/autopatch.py @@ -10,8 +10,6 @@ from weave.trace.weave_client import Call -print("HELLO") - class OpSettings(BaseModel): """Op settings for a specific integration. @@ -61,8 +59,6 @@ def autopatch(settings: Optional[AutopatchSettings] = None) -> None: if settings.disable_autopatch: return - print("inside autopatch") - from weave.integrations.anthropic.anthropic_sdk import get_anthropic_patcher from weave.integrations.cerebras.cerebras_sdk import get_cerebras_patcher from weave.integrations.cohere.cohere_sdk import get_cohere_patcher @@ -87,8 +83,6 @@ def autopatch(settings: Optional[AutopatchSettings] = None) -> None: from weave.integrations.openai_agents.openai_agents import get_openai_agents_patcher from weave.integrations.vertexai.vertexai_sdk import get_vertexai_patcher - print("Hello from patcher") - get_openai_patcher(settings.openai).attempt_patch() get_mistral_patcher(settings.mistral).attempt_patch() get_litellm_patcher(settings.litellm).attempt_patch() From 20097fbbedc205be8d6ef29c46d01cb4e5b25470 Mon Sep 17 00:00:00 2001 From: andrewtruong Date: Mon, 17 Mar 2025 11:07:55 -0400 Subject: [PATCH 04/15] test --- weave/integrations/openai_agents/openai_agents.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/weave/integrations/openai_agents/openai_agents.py b/weave/integrations/openai_agents/openai_agents.py index bb474aaec9c7..6ed112372662 100644 --- a/weave/integrations/openai_agents/openai_agents.py +++ b/weave/integrations/openai_agents/openai_agents.py @@ -63,8 +63,7 @@ class WeaveTracingProcessor(tracing.TracingProcessor): If None, the current call from the context will be used. """ - def __init__(self, parent_call: call_context.Call | None = None): - self._parent_call = parent_call + def __init__(self): self._trace_data: dict[str, dict[str, Any]] = {} self._trace_calls: dict[str, call_context.Call] = {} self._span_calls: dict[str, call_context.Call] = {} @@ -82,12 +81,11 @@ def on_trace_start(self, trace: tracing.Trace) -> None: } # Create a call for this trace - parent = self._parent_call or call_context.get_current_call() wc = require_weave_client() trace_call = wc.create_call( op="openai_agent_trace", inputs={"name": trace.name}, - parent=parent, + parent=call_context.get_current_call(), attributes={"type": "task", "agent_trace_id": trace.trace_id}, display_name=trace.name, ) @@ -465,9 +463,8 @@ class OpenAIAgentsPatcher(Patcher): def __init__(self, settings: IntegrationSettings) -> None: self.settings = settings - self.processor: WeaveTracingProcessor | None = None self.patched = False - self.parent_call: weave.trace.context.call_context.Call | None = None + self.processor: WeaveTracingProcessor | None = None def attempt_patch(self) -> bool: """Install a WeaveTracingProcessor in the OpenAI Agents tracing system.""" @@ -475,7 +472,7 @@ def attempt_patch(self) -> bool: return True try: - self.processor = WeaveTracingProcessor(self.parent_call) + self.processor = WeaveTracingProcessor() add_trace_processor(self.processor) self.patched = True except Exception as e: From 0fee87a6bf58a7509be4d86588ea8f23af488637 Mon Sep 17 00:00:00 2001 From: andrewtruong Date: Mon, 17 Mar 2025 11:20:00 -0400 Subject: [PATCH 05/15] test --- .../openai_agents/openai_agents.py | 50 ++++++++----------- 1 file changed, 21 insertions(+), 29 deletions(-) diff --git a/weave/integrations/openai_agents/openai_agents.py b/weave/integrations/openai_agents/openai_agents.py index 6ed112372662..abd749308991 100644 --- a/weave/integrations/openai_agents/openai_agents.py +++ b/weave/integrations/openai_agents/openai_agents.py @@ -12,13 +12,11 @@ from agents import tracing from agents.tracing import add_trace_processor -import weave from weave.integrations.patcher import NoOpPatcher, Patcher from weave.trace.autopatch import IntegrationSettings from weave.trace.context import call_context -from weave.trace.context.weave_client_context import ( - require_weave_client, -) +from weave.trace.context.weave_client_context import require_weave_client +from weave.trace.weave_client import Call _openai_agents_patcher: OpenAIAgentsPatcher | None = None @@ -42,7 +40,7 @@ def _call_name(span: tracing.Span[Any]) -> str: return "Unknown" -class WeaveDataFormat(TypedDict): +class WeaveDataDict(TypedDict): inputs: dict[str, Any] outputs: dict[str, Any] metadata: dict[str, Any] @@ -63,7 +61,7 @@ class WeaveTracingProcessor(tracing.TracingProcessor): If None, the current call from the context will be used. """ - def __init__(self): + def __init__(self) -> None: self._trace_data: dict[str, dict[str, Any]] = {} self._trace_calls: dict[str, call_context.Call] = {} self._span_calls: dict[str, call_context.Call] = {} @@ -115,9 +113,9 @@ def on_trace_end(self, trace: tracing.Trace) -> None: def _agent_log_data( self, span: tracing.Span[tracing.AgentSpanData] - ) -> WeaveDataFormat: + ) -> WeaveDataDict: """Extract log data from an agent span.""" - return WeaveDataFormat( + return WeaveDataDict( inputs={}, outputs={}, metadata={ @@ -131,7 +129,7 @@ def _agent_log_data( def _response_log_data( self, span: tracing.Span[tracing.ResponseSpanData] - ) -> WeaveDataFormat: + ) -> WeaveDataDict: """Extract log data from a response span.""" inputs = {} outputs = {} @@ -165,7 +163,7 @@ def _response_log_data( "completion_tokens": usage.output_tokens, } - return WeaveDataFormat( + return WeaveDataDict( inputs=inputs, outputs=outputs, metadata=metadata, @@ -175,9 +173,9 @@ def _response_log_data( def _function_log_data( self, span: tracing.Span[tracing.FunctionSpanData] - ) -> WeaveDataFormat: + ) -> WeaveDataDict: """Extract log data from a function span.""" - return WeaveDataFormat( + return WeaveDataDict( inputs={"input": span.span_data.input}, outputs={"output": span.span_data.output}, metadata={}, @@ -187,9 +185,9 @@ def _function_log_data( def _handoff_log_data( self, span: tracing.Span[tracing.HandoffSpanData] - ) -> WeaveDataFormat: + ) -> WeaveDataDict: """Extract log data from a handoff span.""" - return WeaveDataFormat( + return WeaveDataDict( inputs={}, outputs={}, metadata={ @@ -202,9 +200,9 @@ def _handoff_log_data( def _guardrail_log_data( self, span: tracing.Span[tracing.GuardrailSpanData] - ) -> WeaveDataFormat: + ) -> WeaveDataDict: """Extract log data from a guardrail span.""" - return WeaveDataFormat( + return WeaveDataDict( inputs={}, outputs={}, metadata={"triggered": span.span_data.triggered}, @@ -214,9 +212,9 @@ def _guardrail_log_data( def _generation_log_data( self, span: tracing.Span[tracing.GenerationSpanData] - ) -> WeaveDataFormat: + ) -> WeaveDataDict: """Extract log data from a generation span.""" - return WeaveDataFormat( + return WeaveDataDict( inputs={"input": span.span_data.input}, outputs={"output": span.span_data.output}, metadata={ @@ -233,7 +231,7 @@ def _generation_log_data( def _custom_log_data( self, span: tracing.Span[tracing.CustomSpanData] - ) -> WeaveDataFormat: + ) -> WeaveDataDict: """Extract log data from a custom span.""" # Prepare fields inputs = {} @@ -262,7 +260,7 @@ def _custom_log_data( if key not in ["input", "output", "metadata", "metrics"]: metadata[key] = value - return WeaveDataFormat( + return WeaveDataDict( inputs=inputs, outputs=outputs, metadata=metadata, @@ -270,7 +268,7 @@ def _custom_log_data( error=None, ) - def _log_data(self, span: tracing.Span[Any]) -> WeaveDataFormat: + def _log_data(self, span: tracing.Span[Any]) -> WeaveDataDict: """Extract the appropriate log data based on the span type.""" if isinstance(span.span_data, tracing.AgentSpanData): return self._agent_log_data(span) @@ -287,7 +285,7 @@ def _log_data(self, span: tracing.Span[Any]) -> WeaveDataFormat: elif isinstance(span.span_data, tracing.CustomSpanData): return self._custom_log_data(span) else: - return WeaveDataFormat( + return WeaveDataDict( inputs={}, outputs={}, metadata={}, @@ -295,9 +293,7 @@ def _log_data(self, span: tracing.Span[Any]) -> WeaveDataFormat: error=None, ) - def _get_parent_call( - self, span: tracing.Span[Any] - ) -> weave.trace.context.call_context.Call | None: + def _get_parent_call(self, span: tracing.Span[Any]) -> Call | None: """Helper method to determine the parent call for a span.""" trace_id = span.trace_id parent_span_id = getattr(span, "parent_id", None) @@ -396,10 +392,6 @@ def on_span_end(self, span: tracing.Span[Any]) -> None: return output = { - "status": "error" if span.error else "completed", - "type": span_type, - "name": span_name, - "parent_span_id": getattr(span, "parent_id", None), "output": log_data["outputs"].get("output"), "metrics": log_data["metrics"], "metadata": log_data["metadata"], From c3834ed903d8183ba785b52ec7c7b5ffa0236733 Mon Sep 17 00:00:00 2001 From: andrewtruong Date: Mon, 17 Mar 2025 11:27:28 -0400 Subject: [PATCH 06/15] test --- noxfile.py | 1 + pyproject.toml | 1 + tests/integrations/openai_agents/__init__.py | 0 .../openai_agents/openai_agents_test.py | 0 .../integrations/openai_agents/test_autopatch.py | 0 .../integrations/openai_agents/openai_agents.py | 16 ++++++++++++---- 6 files changed, 14 insertions(+), 4 deletions(-) create mode 100644 tests/integrations/openai_agents/__init__.py create mode 100644 tests/integrations/openai_agents/openai_agents_test.py create mode 100644 tests/integrations/openai_agents/test_autopatch.py diff --git a/noxfile.py b/noxfile.py index 06930b88d3a5..f366db8c15a1 100644 --- a/noxfile.py +++ b/noxfile.py @@ -50,6 +50,7 @@ def lint(session): "mistral1", "notdiamond", "openai", + "openai_agents", "vertexai", "bedrock", "scorers", diff --git a/pyproject.toml b/pyproject.toml index f713320ceef5..45aabb631857 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -108,6 +108,7 @@ scorers = [ ] notdiamond = ["notdiamond>=0.3.21", "litellm<=1.49.1"] openai = ["openai>=1.0.0"] +openai_agents = ["openai-agents>=0.0.4"] pandas-test = ["pandas>=2.2.3"] presidio = ["presidio-analyzer==2.2.357", "presidio-anonymizer==2.2.357"] modal = ["modal", "python-dotenv"] diff --git a/tests/integrations/openai_agents/__init__.py b/tests/integrations/openai_agents/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/integrations/openai_agents/openai_agents_test.py b/tests/integrations/openai_agents/openai_agents_test.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/integrations/openai_agents/test_autopatch.py b/tests/integrations/openai_agents/test_autopatch.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/weave/integrations/openai_agents/openai_agents.py b/weave/integrations/openai_agents/openai_agents.py index abd749308991..089017156a88 100644 --- a/weave/integrations/openai_agents/openai_agents.py +++ b/weave/integrations/openai_agents/openai_agents.py @@ -9,9 +9,6 @@ from typing import Any, TypedDict -from agents import tracing -from agents.tracing import add_trace_processor - from weave.integrations.patcher import NoOpPatcher, Patcher from weave.trace.autopatch import IntegrationSettings from weave.trace.context import call_context @@ -21,6 +18,15 @@ _openai_agents_patcher: OpenAIAgentsPatcher | None = None +try: + from agents import tracing + from agents.tracing import TracingProcessor +except ImportError: + + class TracingProcessor: # type: ignore + pass + + def _call_type(span: tracing.Span[Any]) -> str: """Determine the appropriate call type for a given OpenAI Agent span.""" return span.span_data.type or "task" @@ -48,7 +54,7 @@ class WeaveDataDict(TypedDict): error: dict[str, Any] | None -class WeaveTracingProcessor(tracing.TracingProcessor): +class WeaveTracingProcessor(TracingProcessor): """ A TracingProcessor implementation that logs OpenAI Agent traces and spans to Weave. @@ -464,6 +470,8 @@ def attempt_patch(self) -> bool: return True try: + from agents.tracing import add_trace_processor + self.processor = WeaveTracingProcessor() add_trace_processor(self.processor) self.patched = True From e67c2877903fdbf6ecea8fd54488443c9e368290 Mon Sep 17 00:00:00 2001 From: andrewtruong Date: Mon, 17 Mar 2025 11:47:38 -0400 Subject: [PATCH 07/15] test --- weave/integrations/openai_agents/openai_agents.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/weave/integrations/openai_agents/openai_agents.py b/weave/integrations/openai_agents/openai_agents.py index 089017156a88..ccb2cd4a71e8 100644 --- a/weave/integrations/openai_agents/openai_agents.py +++ b/weave/integrations/openai_agents/openai_agents.py @@ -23,8 +23,7 @@ from agents.tracing import TracingProcessor except ImportError: - class TracingProcessor: # type: ignore - pass + class TracingProcessor: ... # type: ignore[no-redef] def _call_type(span: tracing.Span[Any]) -> str: @@ -54,7 +53,7 @@ class WeaveDataDict(TypedDict): error: dict[str, Any] | None -class WeaveTracingProcessor(TracingProcessor): +class WeaveTracingProcessor(TracingProcessor): # pyright: ignore[reportGeneralTypeIssues] """ A TracingProcessor implementation that logs OpenAI Agent traces and spans to Weave. From afda46f2dffe31f297b36227a2c4337c37c9e0f8 Mon Sep 17 00:00:00 2001 From: andrewtruong Date: Mon, 17 Mar 2025 13:53:02 -0400 Subject: [PATCH 08/15] test --- weave/integrations/openai_agents/openai_agents.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/weave/integrations/openai_agents/openai_agents.py b/weave/integrations/openai_agents/openai_agents.py index ccb2cd4a71e8..38c8cd5e6440 100644 --- a/weave/integrations/openai_agents/openai_agents.py +++ b/weave/integrations/openai_agents/openai_agents.py @@ -60,10 +60,6 @@ class WeaveTracingProcessor(TracingProcessor): # pyright: ignore[reportGeneralT This processor captures different types of spans from OpenAI Agents (agent execution, function calls, LLM generations, etc.) and logs them to Weave as structured trace data. Child spans are logged as separate calls but not redundantly included in the parent trace data. - - Args: - parent_call: Optional Weave call to use as the parent for all traces. - If None, the current call from the context will be used. """ def __init__(self) -> None: @@ -97,10 +93,8 @@ def on_trace_start(self, trace: tracing.Trace) -> None: def on_trace_end(self, trace: tracing.Trace) -> None: """Called when a trace ends.""" tid = trace.trace_id - if tid not in self._trace_data: return - if tid not in self._trace_calls: return From 6bae11bc3944881ad6b3f590b88b45bae13d0dbb Mon Sep 17 00:00:00 2001 From: andrewtruong Date: Tue, 18 Mar 2025 00:04:18 -0400 Subject: [PATCH 09/15] test --- noxfile.py | 3 + .../test_openai_agents_quickstart.yaml | 94 ++++ ...est_openai_agents_quickstart_homework.yaml | 448 ++++++++++++++++++ .../openai_agents/openai_agents_test.py | 257 ++++++++++ 4 files changed, 802 insertions(+) create mode 100644 tests/integrations/openai_agents/cassettes/openai_agents_test/test_openai_agents_quickstart.yaml create mode 100644 tests/integrations/openai_agents/cassettes/openai_agents_test/test_openai_agents_quickstart_homework.yaml diff --git a/noxfile.py b/noxfile.py index f366db8c15a1..b77af2e033bb 100644 --- a/noxfile.py +++ b/noxfile.py @@ -93,6 +93,9 @@ def tests(session, shard): env["MISTRAL_API_KEY"] = session.env.get("MISTRAL_API_KEY") env["OPENAI_API_KEY"] = session.env.get("OPENAI_API_KEY") + if shard == "openai_agents": + env["OPENAI_API_KEY"] = session.env.get("OPENAI_API_KEY") + default_test_dirs = [f"integrations/{shard}/"] test_dirs_dict = { "custom": [], diff --git a/tests/integrations/openai_agents/cassettes/openai_agents_test/test_openai_agents_quickstart.yaml b/tests/integrations/openai_agents/cassettes/openai_agents_test/test_openai_agents_quickstart.yaml new file mode 100644 index 000000000000..380fc3dd0823 --- /dev/null +++ b/tests/integrations/openai_agents/cassettes/openai_agents_test/test_openai_agents_quickstart.yaml @@ -0,0 +1,94 @@ +interactions: +- request: + body: '{"input":[{"content":"Write a haiku about recursion in programming.","role":"user"}],"model":"gpt-4o","include":[],"instructions":"You + are a helpful assistant","stream":false,"tools":[]}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate, zstd + connection: + - keep-alive + content-length: + - '186' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - Agents/Python 0.0.0 + x-stainless-arch: + - arm64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.66.3 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.9 + method: POST + uri: https://api.openai.com/v1/responses + response: + body: + string: !!binary | + H4sIAAAAAAAAA3RUwW7bMAy95ysInZvCdhzbyXWn3XYdusJgbDpVJ4uGRA0tivz7YNlx4q29BBGf + 3iP5KPpjA6B0q46gHPmhLsq2orZLduk+r9JDVp3yKj/sT1le7rNsVyZlUuT7MjkUHVGbkXoYBfj0 + So1cRdj6Od44QqG2xhFLyzzLisNhV0TMC0rwI6fhfjAk1E4kco6dOoINxsSAttcbdUuC2vg16sWF + RjTbqPaTA6AjQHghM3TBAHqvvaCVSb/Ht5qDDEFq4d9kV2o9t2RGmfMg25y3WZLl26TaJsXcaSSq + IzxtAAA+4i+AkveBRlpP3uN5aj8Ck7e9Py/W4h6r0dpTmaRpkVHRpdV+t6NPrY0aX1kVQccmZl53 + GaGGrZC9VXtf8arqqx30dmNPF8bIEdQ3bgkaNMaDMGjxZLoHgF/2u+201UJgmAcP2oLXhmxDEf2B + IuSsB+rJnQkaQ+ge1ynQWha8ju/pecEu878pMp6e4wgGdGgMmVqYTR2LUkcQF2iCHf3RHHx9fYp1 + HMEyYUfo2Wp7VsfZDEVdx07uLgGoM1lyKFT70Pfo3md0A3CZny87us8r1A8jI8Rw+pjM0WjgnKhj + 1+PtfDeBeG9pdEox9ffCupkGHITVAkxmzcehHu5zumCb6OhIa7XHk7kuV4ivcylI29Ua5LuH/+N3 + S7eU3WDzQu2NmEylz+x/tystPwM+012G85W0sKC5gUWyuBU8rb4aPQm2KDjKXzaXvwAAAP//AwBA + W3tF6QQAAA== + headers: + CF-RAY: + - 9221c6bccef336b3-YYZ + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Tue, 18 Mar 2025 03:52:16 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=Tqnkbc7A4PgF5SM.gzWaijlDAAnDOk9Zieo6zRFmpd4-1742269936-1.0.1.1-9U42TycTKwZ6d4ISt9Qix_e2j6Latc3s8rPzobIIvSxaXqbrj.2XcpXF2GDI88LQyM0sRkP63Fw5_2JqEPAcJqt3OQPH5IESe3_.Iu0IofY; + path=/; expires=Tue, 18-Mar-25 04:22:16 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=KqFFy64fH2k1v0MoM0hYELHA_M66ltXaaJFWGlP2Kng-1742269936949-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - wandb + openai-processing-ms: + - '728' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-request-id: + - req_f9ec95d382e76a8e0ef8eb42aa207c3b + status: + code: 200 + message: OK +version: 1 diff --git a/tests/integrations/openai_agents/cassettes/openai_agents_test/test_openai_agents_quickstart_homework.yaml b/tests/integrations/openai_agents/cassettes/openai_agents_test/test_openai_agents_quickstart_homework.yaml new file mode 100644 index 000000000000..81e4e1ad1f69 --- /dev/null +++ b/tests/integrations/openai_agents/cassettes/openai_agents_test/test_openai_agents_quickstart_homework.yaml @@ -0,0 +1,448 @@ +interactions: +- request: + body: '{"input":[{"content":"who was the first president of the united states?","role":"user"}],"model":"gpt-4o","include":[],"instructions":"You + determine which agent to use based on the user''s homework question","stream":false,"tools":[{"name":"transfer_to_history_tutor","parameters":{"additionalProperties":false,"type":"object","properties":{},"required":[]},"strict":true,"type":"function","description":"Handoff + to the History Tutor agent to handle the request. Specialist agent for historical + questions"},{"name":"transfer_to_math_tutor","parameters":{"additionalProperties":false,"type":"object","properties":{},"required":[]},"strict":true,"type":"function","description":"Handoff + to the Math Tutor agent to handle the request. Specialist agent for math questions"}]}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate, zstd + connection: + - keep-alive + content-length: + - '770' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - Agents/Python 0.0.0 + x-stainless-arch: + - arm64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.66.3 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.9 + method: POST + uri: https://api.openai.com/v1/responses + response: + body: + string: !!binary | + H4sIAAAAAAAAA9RVsW7bMBDd/RWEli5JINuyrXgrumRp0aJBmjYIhBN5sthQpEoemwSG/70gJVty + 66JFOnUx5Ht3R/K9x+N2wlgiRbJmiUXXFsuVyKt0lWEFeT69nEE1W/EqF0vgPK+qNEVcCJHxMsfl + bH5ZJmehgSm/Iqd9E6MddnFuEQhFAQGbrrLZbJUu8jRijoC8CzXcNK1CQtEVobXGJmumvVIxIPU+ + oxBIIJU7Rh1Zz0kaHbt9Np4JJLSN1Mgea8lrBhvUxMgw75CV4FAwoxnVGAL2lWO1afDR2Af2zaML + rbqtNPBUGE+tp4LMA+qjhRsjUIUVNy2dZ+Z8ls6y8zQ/T5c9KbEwWbO7CWOMbeMvYwk9txjKKq/j + rgsOSsWSCHdiVHwvxeJSZGmQIl/Mc56nkIkyzcpsflKK2CI0LLo+8fP2y2u5fOc/XM8+bW7c7VO6 + dG9ub0ocKjQ0cU9kQbsKbUGmqKUjY58L8mTskAp24xvUFMne7gbglKAR200Yu4+UtGBBKVQFGaPi + uUM+WY8dbPG7NN4Vexd1hzgwbhGc0VJvknVPZ4JVZSyNkhhLNqjRAmHhfNOAfe7RCWO73nnG4nhd + wqYNFT6GpxdpH32iYaHK2AaG/yMdY97hoN0S3flqI3lMAU8mOQDuz54YWBXouJVtDK5ZcgVamKoK + Xg72veo0YtdBo8HmNWihMGZYjJa+YB9b5BKUdNTnVcayTmPJQR2c717kiqBsE26dG1EU3CKEDF1B + vbemRUsSQ0YFyuHZkLY/fz9IRkg7LtvuRkg4mbQYHHJ334d3IzdaGUdSUHkywv6Z87dA9YsJb0Lx + X1Idcv9Hng/XnUxbtOMbZb3msKdVSAel2k9972CDw3WT+mjozqers1+B0XOwHWYfr1EMlelY++Tn + YT7NTgGn+h5mz+9akyFQox3Pp4dpEB6Zo5cDCQQQRKYnux8AAAD//wMAmd7yCIQHAAA= + headers: + CF-RAY: + - 9221d67a5f217114-YYZ + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Tue, 18 Mar 2025 04:03:01 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=HDzaVYulwHR_R9HSxKqxbSczB6wMNNLM6kiuq33dtt0-1742270581-1.0.1.1-luMG9ImXVGW.gmxurLn8iP_IIglUjFh3UIHr2trvYsdhnUeWkLp1t1eESFACAQBvIFUpYZXUg1oQl.sRyTQV8.tjgJwvgRuFzzR5FAAHoE8; + path=/; expires=Tue, 18-Mar-25 04:33:01 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=w9nmghjfiG727DO5RnTtUqvu5A3RuwoUmSyGF7wO2Zk-1742270581745-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - wandb + openai-processing-ms: + - '783' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-request-id: + - req_cc16a1d38a539dd2700d7362d0c0e0a5 + status: + code: 200 + message: OK +- request: + body: '{"input":[{"content":"who was the first president of the united states?","role":"user"}],"model":"gpt-4o","include":[],"instructions":"Check + if the user is asking about homework.","stream":false,"text":{"format":{"type":"json_schema","name":"final_output","schema":{"properties":{"is_homework":{"title":"Is + Homework","type":"boolean"},"reasoning":{"title":"Reasoning","type":"string"}},"required":["is_homework","reasoning"],"title":"HomeworkOutput","type":"object","additionalProperties":false},"strict":true}},"tools":[]}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate, zstd + connection: + - keep-alive + content-length: + - '523' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - Agents/Python 0.0.0 + x-stainless-arch: + - arm64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.66.3 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.9 + method: POST + uri: https://api.openai.com/v1/responses + response: + body: + string: !!binary | + H4sIAAAAAAAAA3RVTW/jOAy951cQOieFnaZJmutedk+7WMxtUhi0RduayJJHoncmKPLfF5I/Yqft + JYj5xA89PlLvKwChpDiBcOTbbH+QxzI57KhMkmP6us1L3B0PqXzJt2kid2lCeyl3+RGLXOYpkViH + ADb/QQWPQazxg71whEwyw4Clh912e0hejknEPCN3PvgUtmk1McneiZyzTpzAdFpHgzLjiUwSo9J+ + iXp2XcHKmhjtj5qKC6gSuCboPDlQHtBflKkAc9sx1LahX9Zdnvp8Df7ObMdtxxnbC5lF9MZK0iFs + 1fJmZzfbZLvbJMdNsh9uHh3FCb6vAADe4y+A4GtLwa0h77Hq6YhAz3Xjq5Hql32+PQaqj6+7522e + p/v08PwqsfyU6hjjK+oi6KyOmdF75RkN36HCGiZzr3Ze8aLqkQ76fffuDwTLCcT7WSifjUSexYld + R+uzcITeGmWqszidxbea4GdHPvQmdMGzQ1XVXFr3C50ENBJKLLhDvYbCNo01+hp6RRKUAZJdgcEX + NXhiVqbyoNWFoFaerbtCodF78k/wrQ7hu6oizx4Uh2zhpL5O3d440kGMT2dxW14KjbGMo4C+v03Y + bfjXW8LXW2x6iw61Jp2xtTorUEdFRgoi7Og/ZTufjcOQxaZPmppIEqeBfkFlaR3PDgGIigw5ZMp8 + 1zTorgO6ArgNA2QdzfMyNW3w6KI5fUoGa2zZkKi0rsH796znP7w1mS9qavCuGEm+cKoN3CyKAxAG + m+hXKoN6GKCZRPtAp5m+ROtsS44V+YU9DMVdSw9QKFBxr+i/PPw5nlo/nBkukVurCY2YobdFrz9y + /zHPv9OZL7J4dgGdJ1l9kk44+tkpR3IxcA/X/aK6yfo2izdVONLw95L1+QT3G3mGoJSqH6Z/5n0o + UXtaPdQebxgXehDXpP5ed73oa6uKfs90bMUE9BM0fLZZOxei60w/z8FNKo+5Hnd+F5fkpFJlFtv4 + db/+aJ+9BZOWCyxqknfHZDW7lnhc8s/7z4DP4k59+So0W0Z9B9Pn7URXeIIWLwoxSuQ4G7fV7X8A + AAD//wMASt5oTIEHAAA= + headers: + CF-RAY: + - 9221d67a5ea3ac31-YYZ + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Tue, 18 Mar 2025 04:03:02 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=PSuGBu.a5J.rjKmbVuliRYI3TELmf2jSqu1nuqZG_.Q-1742270582-1.0.1.1-vrKe0whVA4akOViYQBTIJU.T6K9_RF30CuKtcA1SPGW85gCy95WAHwy3_O9GzUbsJFFzafZk1OnNqPr0ypp02MQeN.3WqbWhcwd6yxh8AZA; + path=/; expires=Tue, 18-Mar-25 04:33:02 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=i.bMt1gdJwYlGjEG6z0JLaElyl9o9ZwznjjKjFqRNss-1742270582203-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - wandb + openai-processing-ms: + - '1256' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-request-id: + - req_9518e0f93eb24f595133401b3b4b5408 + status: + code: 200 + message: OK +- request: + body: '{"input":[{"content":"who was the first president of the united states?","role":"user"},{"id":"fc_67d8f0759d4081928538c80a4db04b430ee5dd4cb8e6239b","arguments":"{}","call_id":"call_XZAi6NuQT2WgVsXx06sCXVbe","name":"transfer_to_history_tutor","type":"function_call","status":"completed"},{"call_id":"call_XZAi6NuQT2WgVsXx06sCXVbe","output":"{''assistant'': + ''History Tutor''}","type":"function_call_output"}],"model":"gpt-4o","include":[],"instructions":"You + provide assistance with historical queries. Explain important events and context + clearly.","stream":false,"tools":[]}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate, zstd + connection: + - keep-alive + content-length: + - '571' + content-type: + - application/json + cookie: + - __cf_bm=PSuGBu.a5J.rjKmbVuliRYI3TELmf2jSqu1nuqZG_.Q-1742270582-1.0.1.1-vrKe0whVA4akOViYQBTIJU.T6K9_RF30CuKtcA1SPGW85gCy95WAHwy3_O9GzUbsJFFzafZk1OnNqPr0ypp02MQeN.3WqbWhcwd6yxh8AZA; + _cfuvid=i.bMt1gdJwYlGjEG6z0JLaElyl9o9ZwznjjKjFqRNss-1742270582203-0.0.1.1-604800000 + host: + - api.openai.com + user-agent: + - Agents/Python 0.0.0 + x-stainless-arch: + - arm64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.66.3 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.9 + method: POST + uri: https://api.openai.com/v1/responses + response: + body: + string: !!binary | + H4sIAAAAAAAAA3RVTY/jNgy9z68gfM4ETiYfTm7Fou0ei26LRbGzMBiJjtXKkktRmQkW898LyY6T + bGcvQcQnPj6SIv3tAaAwuthDwRT6erPVVVNuN2tcr6rFblk9LRpVVgdc73S5eqKSaK31Sh0q2iyf + dodilgj84W9SciHxLtBgV0wopGtM2GK7Wi635bpaZiwISgzJR/mutySkBydi9lzswUVrs8G4y41a + k6Cx4R4NwlGJ8S6z/eUj9OxPRhNgCCYIOkXwYqSF1gTxbBRa+DcSGwpz+Pm1t2gcmK73LOgE6ERO + AqDToLwTehVQlpDteT4o7PC19lH6KLX4f8jd6em8JpuEHHt5XPnHZblcPZbVY7kZa5Udiz18eQAA + +JZ/AQo595TcOgoBj0MBMzB0pwvHqTkHpCo157Be6WVZrajCHR0W+t3mZI4fFTuD7G2OfKmWXKGc + v7uqvVV8p/pSDnq9eg8XkmUPxR8tQWM4CPzGFIwmJ+AbkJbgT2eENHwSFArwggF+Jc9Hgs8YWuOO + 4t0cPhIE4hNpaNh3sNhWOxAPi+1umzvVWzyTBgTFURm0kLIC43KEnzpKXXfwO528jemtIJ/hM/Is + 45qxEeOOk6L5pzl88C6IkXx7lmMkiILgwZrQdmMGDjOdhaM/EbtsNlfPML9JA0wA3wg5YGqImXTK + AUNmfi5+QWmJE+lHE+CDj074/FyAjpTutSaAJdTEoTU96MhJcnIdNDzHZbnYBWg8dyjmRHAm5DDP + bP1YdXWGQAIdunOyKUqdCDMwTtmoL4QjhXdJDILCg3EkUxF6RiVGUUJTV3LpnD2DvHgQ4i7M718B + OucFLzP65euEvY3/Bks6fc1T0iOjtWRr8d7WCm0eeuFIA8x0Mj6G+rJv6jwl0xAyYfDOuGOxH99r + QU3jWW4uARRHcsQoVIfYdcjnEX0AeBt3lGe6jSvU9ckjZvNiXo7W/MbHQEPtpvPNkOR7U6JDiCG/ + 1hs1zGAUX0zAUKzx2Nf9bUyOTuWKJjdtAh7sZYPGvEAmQcbdbapqPfu//WazTrIVqpb01bEcpI/e + 3y/A3eY94D3eqTk/ohYvaK/golpM5YqB7r4NHQlqFEz8bw9v/wEAAP//AwCitwqwzwYAAA== + headers: + CF-RAY: + - 9221d6833e697114-YYZ + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Tue, 18 Mar 2025 04:03:04 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - wandb + openai-processing-ms: + - '2435' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-request-id: + - req_3d00a3ff13996886f3833e87caa35784 + status: + code: 200 + message: OK +- request: + body: '{"data":[{"object":"trace","id":"trace_1ab5bd2f3bd54272bd73fd8d8a7cd852","workflow_name":"Agent + workflow","group_id":null,"metadata":null},{"object":"trace.span","id":"span_1e44ab53f21c445b806ad924","trace_id":"trace_1ab5bd2f3bd54272bd73fd8d8a7cd852","parent_id":"span_7667e7c5ff7c4eaaa62c38dc","started_at":"2025-03-18T04:03:00.779649+00:00","ended_at":"2025-03-18T04:03:01.794891+00:00","span_data":{"type":"response","response_id":"resp_67d8f074efa88192af27cf8d6acc8ff00ee5dd4cb8e6239b"},"error":null},{"object":"trace.span","id":"span_b6a3c878a7e849f38627e59a","trace_id":"trace_1ab5bd2f3bd54272bd73fd8d8a7cd852","parent_id":"span_7667e7c5ff7c4eaaa62c38dc","started_at":"2025-03-18T04:03:01.810169+00:00","ended_at":"2025-03-18T04:03:01.812869+00:00","span_data":{"type":"handoff","from_agent":"Triage + Agent","to_agent":"History Tutor"},"error":null},{"object":"trace.span","id":"span_8e1c549154a748b099628ff2","trace_id":"trace_1ab5bd2f3bd54272bd73fd8d8a7cd852","parent_id":"span_b1984e593eda4692a3183ce0","started_at":"2025-03-18T04:03:00.781844+00:00","ended_at":"2025-03-18T04:03:02.231198+00:00","span_data":{"type":"response","response_id":"resp_67d8f074ef008192bfa4871d5b210d410e6dd4b8acbdb1ee"},"error":null},{"object":"trace.span","id":"span_b1984e593eda4692a3183ce0","trace_id":"trace_1ab5bd2f3bd54272bd73fd8d8a7cd852","parent_id":"span_171654fb373c488fb10a59c5","started_at":"2025-03-18T04:03:00.768830+00:00","ended_at":"2025-03-18T04:03:02.238774+00:00","span_data":{"type":"agent","name":"Guardrail + check","handoffs":[],"tools":[],"output_type":"HomeworkOutput"},"error":null},{"object":"trace.span","id":"span_171654fb373c488fb10a59c5","trace_id":"trace_1ab5bd2f3bd54272bd73fd8d8a7cd852","parent_id":"span_7667e7c5ff7c4eaaa62c38dc","started_at":"2025-03-18T04:03:00.766081+00:00","ended_at":"2025-03-18T04:03:02.239789+00:00","span_data":{"type":"guardrail","name":"homework_guardrail","triggered":false},"error":null},{"object":"trace.span","id":"span_7667e7c5ff7c4eaaa62c38dc","trace_id":"trace_1ab5bd2f3bd54272bd73fd8d8a7cd852","parent_id":null,"started_at":"2025-03-18T04:03:00.764209+00:00","ended_at":"2025-03-18T04:03:02.241161+00:00","span_data":{"type":"agent","name":"Triage + Agent","handoffs":["History Tutor","Math Tutor"],"tools":[],"output_type":"str"},"error":null},{"object":"trace.span","id":"span_95d013d25cd545fc939c1187","trace_id":"trace_1ab5bd2f3bd54272bd73fd8d8a7cd852","parent_id":"span_a8c36ecc40a54441998f4d51","started_at":"2025-03-18T04:03:02.243541+00:00","ended_at":"2025-03-18T04:03:04.846259+00:00","span_data":{"type":"response","response_id":"resp_67d8f0765a548192831fc08ba59d043e0ee5dd4cb8e6239b"},"error":null},{"object":"trace.span","id":"span_a8c36ecc40a54441998f4d51","trace_id":"trace_1ab5bd2f3bd54272bd73fd8d8a7cd852","parent_id":null,"started_at":"2025-03-18T04:03:02.242207+00:00","ended_at":"2025-03-18T04:03:04.859339+00:00","span_data":{"type":"agent","name":"History + Tutor","handoffs":[],"tools":[],"output_type":"str"},"error":null},{"object":"trace","id":"trace_2fa1b11ea411418584c0d2adf1615c15","workflow_name":"Agent + workflow","group_id":null,"metadata":null}]}' + headers: + accept: + - '*/*' + accept-encoding: + - gzip, deflate, zstd + connection: + - keep-alive + content-length: + - '3130' + content-type: + - application/json + host: + - api.openai.com + openai-beta: + - traces=v1 + user-agent: + - python-httpx/0.28.1 + method: POST + uri: https://api.openai.com/v1/traces/ingest + response: + body: + string: '' + headers: + CF-RAY: + - 9221d698a946a223-YYZ + Connection: + - keep-alive + Date: + - Tue, 18 Mar 2025 04:03:05 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=TEkJxtu3qYrTgA5q8QshQkFRlLyo2E_VpjQl7atwEx4-1742270585-1.0.1.1-mHdcGT2H8f4eWxOQWIPclKW5IPa3ulHLqkzq8hVXn_J2O6wP_ACnTHICu5_44Wi.aHeQhg.8zxTEsxKNaHLe7WZrETi0dnya3.MCNDc1kAM; + path=/; expires=Tue, 18-Mar-25 04:33:05 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=pd_obuq4UhlYM54ZLS2Jb8wlbz9Gq0Fy38x6ATZVo9Q-1742270585987-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + X-Content-Type-Options: + - nosniff + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - wandb + openai-processing-ms: + - '167' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-request-id: + - req_0a49d9e7ef029ef4272318f57edfb4fa + status: + code: 204 + message: No Content +- request: + body: '{"input":[{"content":"what is life","role":"user"}],"model":"gpt-4o","include":[],"instructions":"Check + if the user is asking about homework.","stream":false,"text":{"format":{"type":"json_schema","name":"final_output","schema":{"properties":{"is_homework":{"title":"Is + Homework","type":"boolean"},"reasoning":{"title":"Reasoning","type":"string"}},"required":["is_homework","reasoning"],"title":"HomeworkOutput","type":"object","additionalProperties":false},"strict":true}},"tools":[]}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate, zstd + connection: + - keep-alive + content-length: + - '486' + content-type: + - application/json + cookie: + - __cf_bm=PSuGBu.a5J.rjKmbVuliRYI3TELmf2jSqu1nuqZG_.Q-1742270582-1.0.1.1-vrKe0whVA4akOViYQBTIJU.T6K9_RF30CuKtcA1SPGW85gCy95WAHwy3_O9GzUbsJFFzafZk1OnNqPr0ypp02MQeN.3WqbWhcwd6yxh8AZA; + _cfuvid=i.bMt1gdJwYlGjEG6z0JLaElyl9o9ZwznjjKjFqRNss-1742270582203-0.0.1.1-604800000 + host: + - api.openai.com + user-agent: + - Agents/Python 0.0.0 + x-stainless-arch: + - arm64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.66.3 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.9 + method: POST + uri: https://api.openai.com/v1/responses + response: + body: + string: !!binary | + H4sIAAAAAAAAA3RVyY7jOAy95ysIXfpSVYizOrn2ZeY0g8HcOgWDkehYHVlyS/R0Nwr594HkJXaq + 6mLYfOKix0f6bQEgtBJHEJ5CU+z2Ki+X+7w87Dd5dlhhhtlO7anE3Xa7krjcbOR2u84PWbbeyzOJ + pxjAnb+T5CGIs6G3S0/IpAqMWLbfrFb75TbfJCwwchuij3R1Y4hJdU7kvfPiCLY1Jhm0HU4Uihi1 + CXM0sG8la2dTtK8VySvoErgiaAN50AEwXLW9AJ5dy1C5mn46f33p8tX4q3AtNy0X7K5kZ9Frp8jE + sJeGnzfuebVcbZ6X+fNy1988OYojfFsAALylJ4Dg3w1Ft5pCwEtHRwI6rutwGag+7Na5jFSfs3Jd + 7kta036nSso/pDrF+Iy6BHpnUmYMQQdGy3dIOstk79VOK55VPdBBv+7e3YFoOYJ4OwkdioHIkziW + aAI9nYQnDM5qezmJ40n8WxH8aCnE5sCXnxVybIbRJX2JL02ljQuuqbREA9qCRW49AVoFylEA6xi0 + VVoiRyugREW1lsAYruD82EqIt73Ymiy/nMRtXjRa6xgHgXx7HbFb/9ZZ4tdramqDHo0hU7BzppBo + kuLYt9TBnv7Trg3FIPYiNXXUzMiBOPb0CipL53lyCEBcyJJHpiK0dY3+d48uAG79gDhP07xMdRM9 + 2mTOXpa9NbWkT1Q6X+P9e9LT78HZIsiKarwrQlGQXjeRm1lxAMJinfxKbdH0AzKRYBfoONGPaLxr + yLOmMLNH0d+18gDFAjV3iv0zwB/DqaeHM/0lzs4ZQism6G3W6/fcv8/zz3jmkyyBfUSnSRYfpBOe + frTak5oN1MN1P6lutL5O4o0VDjT8NWd9OqHdxp0gqJSOfUTz97QPaTAXD7WnG6aFHcU1qr/TXSf6 + ymnZ7ZGWnRiBboL6z6ZopkL0rZXYS0koHfBshp3epiU4qlTb2bbND0/v7ZNdP2pZoqxI3R2Xi8m1 + xOMSX28+Aj6KO/bls9DsGM0dzFbrka74i5n9MYhRIafZuC1u/wMAAP//AwDLdEeGYQcAAA== + headers: + CF-RAY: + - 9221d693bafcac31-YYZ + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Tue, 18 Mar 2025 04:03:06 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - wandb + openai-processing-ms: + - '1011' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-request-id: + - req_7a49232413dfe84b8d37cec36648f2b8 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/integrations/openai_agents/openai_agents_test.py b/tests/integrations/openai_agents/openai_agents_test.py index e69de29bb2d1..0cdbc2dbe363 100644 --- a/tests/integrations/openai_agents/openai_agents_test.py +++ b/tests/integrations/openai_agents/openai_agents_test.py @@ -0,0 +1,257 @@ +import agents +import pytest +from agents import Agent, GuardrailFunctionOutput, InputGuardrail, Runner +from pydantic import BaseModel + +from weave.trace.object_record import ObjectRecord +from weave.trace.weave_client import WeaveClient + + +@pytest.mark.skip_clickhouse_client +@pytest.mark.vcr( + filter_headers=["authorization"], + allowed_hosts=["api.wandb.ai", "localhost"], +) +def test_openai_agents_quickstart(client: WeaveClient) -> None: + agent = Agent(name="Assistant", instructions="You are a helpful assistant") + + result = Runner.run_sync(agent, "Write a haiku about recursion in programming.") + calls = client.get_calls() + + assert len(calls) == 3 + + trace_root = calls[0] + trace_root.inputs["name"] = "Agent workflow" + trace_root.output["status"] = "completed" + trace_root.output["metrics"] = {} + trace_root.output["metadata"] = {} + + agent_call = calls[1] + agent_call.inputs["name"] = "Assistant" + agent_call.output["output"] = None + agent_call.output["metrics"] = {} + agent_call.output["metadata"] = {"tools": [], "handoffs": [], "output_type": "str"} + agent_call.output["error"] = None + + response_call = calls[2] + response_call.inputs["name"] = "Response" + response_call.inputs["input"] = [ + { + "content": "Write a haiku about recursion in programming.", + "role": "user", + } + ] + + # TODO + # assert response_call.output["output"][0] + + +# @pytest.mark.skip(reason="Not clear why VCR fails for this test") +@pytest.mark.skip_clickhouse_client +@pytest.mark.vcr( + filter_headers=["authorization"], + allowed_hosts=["api.wandb.ai", "localhost"], +) +@pytest.mark.asyncio +async def test_openai_agents_quickstart_homework(client: WeaveClient) -> None: + class HomeworkOutput(BaseModel): + is_homework: bool + reasoning: str + + guardrail_agent = Agent( + name="Guardrail check", + instructions="Check if the user is asking about homework.", + output_type=HomeworkOutput, + ) + + math_tutor_agent = Agent( + name="Math Tutor", + handoff_description="Specialist agent for math questions", + instructions="You provide help with math problems. Explain your reasoning at each step and include examples", + ) + + history_tutor_agent = Agent( + name="History Tutor", + handoff_description="Specialist agent for historical questions", + instructions="You provide assistance with historical queries. Explain important events and context clearly.", + ) + + async def homework_guardrail(ctx, agent, input_data): + result = await Runner.run(guardrail_agent, input_data, context=ctx.context) + final_output = result.final_output_as(HomeworkOutput) + return GuardrailFunctionOutput( + output_info=final_output, + tripwire_triggered=not final_output.is_homework, + ) + + triage_agent = Agent( + name="Triage Agent", + instructions="You determine which agent to use based on the user's homework question", + handoffs=[history_tutor_agent, math_tutor_agent], + input_guardrails=[ + InputGuardrail(guardrail_function=homework_guardrail), + ], + ) + + result = await Runner.run( + triage_agent, "who was the first president of the united states?" + ) + with pytest.raises(agents.exceptions.InputGuardrailTripwireTriggered): + result = await Runner.run(triage_agent, "what is life") + + ##################### + ### Result1 Block ### + ##################### + + calls = client.get_calls() + assert len(calls) == 14 + + # ==================== + call0 = calls[0] + assert call0.inputs["name"] == "Agent workflow" + assert call0.output["status"] == "completed" + assert call0.output["metrics"] == {} + assert call0.output["metadata"] == {} + + # ==================== + call1 = calls[1] + assert call1.inputs["name"] == "Triage Agent" + assert call1.output["output"] is None + assert call1.output["metrics"] == {} + assert call1.output["metadata"]["tools"] == [] + assert call1.output["metadata"]["handoffs"] == ["History Tutor", "Math Tutor"] + assert call1.output["metadata"]["output_type"] == "str" + assert call1.output["error"] is None + + # ==================== + call2 = calls[2] + assert call2.inputs["name"] == "homework_guardrail" + assert call2.output["output"] is None + assert call2.output["metrics"] == {} + assert call2.output["metadata"] == {"triggered": False} + assert call2.output["error"] is None + + # ==================== + call3 = calls[3] + assert call3.inputs["name"] == "Guardrail check" + assert call3.output["output"] is None + assert call3.output["metrics"] == {} + assert call3.output["metadata"]["tools"] == [] + assert call3.output["metadata"]["handoffs"] == [] + assert call3.output["metadata"]["output_type"] == "HomeworkOutput" + assert call3.output["error"] is None + + # ==================== + call4 = calls[4] + assert call4.inputs["name"] == "Response" + assert ( + call4.inputs["input"][0]["content"] + == "who was the first president of the united states?" + ) + assert call4.inputs["input"][0]["role"] == "user" + + val4 = call4.output["output"][0]._val + assert isinstance(val4, ObjectRecord) + assert val4.name == "transfer_to_history_tutor" + assert val4.type == "function_call" + assert val4.status == "completed" + + # ==================== + call5 = calls[5] + assert call5.inputs["name"] == "Handoff" + assert call5.output["output"] is None + assert call5.output["metrics"] == {} + assert call5.output["metadata"]["from_agent"] == "Triage Agent" + assert call5.output["metadata"]["to_agent"] == "History Tutor" + assert call5.output["error"] is None + + # ==================== + call6 = calls[6] + assert call6.inputs["name"] == "Response" + assert ( + call6.inputs["input"][0]["content"] + == "who was the first president of the united states?" + ) + assert call6.inputs["input"][0]["role"] == "user" + + val6 = call6.output["output"][0]._val + assert isinstance(val6, ObjectRecord) + assert val6.role == "assistant" + assert val6.type == "message" + assert val6.status == "completed" + + # ==================== + call7 = calls[7] + assert call7.inputs["name"] == "History Tutor" + assert call7.output["output"] is None + assert call7.output["metrics"] == {} + assert call7.output["metadata"]["tools"] == [] + assert call7.output["metadata"]["handoffs"] == [] + assert call7.output["metadata"]["output_type"] == "str" + assert call7.output["error"] is None + + # ==================== + call8 = calls[8] + assert call8.inputs["name"] == "Response" + assert ( + call8.inputs["input"][0]["content"] + == "who was the first president of the united states?" + ) + assert call8.inputs["input"][0]["role"] == "user" + assert call8.inputs["input"][1]["name"] == "transfer_to_history_tutor" + assert call8.inputs["input"][1]["type"] == "function_call" + assert call8.inputs["input"][1]["status"] == "completed" + + val8 = call8.output["output"][0]._val + assert isinstance(val8, ObjectRecord) + assert val8.role == "assistant" + assert val8.type == "message" + assert val8.status == "completed" + + ##################### + ### Result2 Block ### + ##################### + + call9 = calls[9] + assert call9.inputs["name"] == "Agent workflow" + assert call9.output["status"] == "completed" + assert call9.output["metrics"] == {} + assert call9.output["metadata"] == {} + + # ==================== + call10 = calls[10] + assert call10.inputs["name"] == "Triage Agent" + assert call10.output["output"] is None + assert call10.output["metrics"] == {} + assert call10.output["metadata"]["tools"] == [] + assert call10.output["metadata"]["handoffs"] == ["History Tutor", "Math Tutor"] + assert call10.output["metadata"]["output_type"] == "str" + + # ==================== + call11 = calls[11] + assert call11.inputs["name"] == "homework_guardrail" + assert call11.output["output"] is None + assert call11.output["metrics"] == {} + assert call11.output["metadata"]["triggered"] is True + assert call11.output["error"] is None + + # ==================== + call12 = calls[12] + assert call12.inputs["name"] == "Guardrail check" + assert call12.output["output"] is None + assert call12.output["metrics"] == {} + assert call12.output["metadata"]["tools"] == [] + assert call12.output["metadata"]["handoffs"] == [] + assert call12.output["metadata"]["output_type"] == "HomeworkOutput" + + # ==================== + call13 = calls[13] + assert call13.inputs["name"] == "Response" + assert call13.inputs["input"][0]["content"] == "what is life" + assert call13.inputs["input"][0]["role"] == "user" + + val13 = call13.output["output"][0]._val + assert isinstance(val13, ObjectRecord) + assert val13.role == "assistant" + assert val13.type == "message" + assert val13.status == "completed" From 4f83679c65b8fde4104e113db9c1a1422de40f1c Mon Sep 17 00:00:00 2001 From: andrewtruong Date: Tue, 18 Mar 2025 00:51:34 -0400 Subject: [PATCH 10/15] test --- .../openai_agents/openai_agents_test.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/tests/integrations/openai_agents/openai_agents_test.py b/tests/integrations/openai_agents/openai_agents_test.py index 0cdbc2dbe363..d08f70856f0a 100644 --- a/tests/integrations/openai_agents/openai_agents_test.py +++ b/tests/integrations/openai_agents/openai_agents_test.py @@ -6,6 +6,8 @@ from weave.trace.object_record import ObjectRecord from weave.trace.weave_client import WeaveClient +# TODO: Responses should be updated once we have patching for the new Responses API + @pytest.mark.skip_clickhouse_client @pytest.mark.vcr( @@ -42,8 +44,15 @@ def test_openai_agents_quickstart(client: WeaveClient) -> None: } ] - # TODO - # assert response_call.output["output"][0] + val = response_call.output["output"][0]._val + assert isinstance(val, ObjectRecord) + assert val.role == "assistant" + assert val.type == "message" + assert val.status == "completed" + assert ( + val.content[0].text + == "Code calls to itself, \nInfinite loops in silence, \nPatterns emerge clear." + ) # @pytest.mark.skip(reason="Not clear why VCR fails for this test") From 8264effb4435312de53cc647bf53d9c46edc5d57 Mon Sep 17 00:00:00 2001 From: andrewtruong Date: Tue, 18 Mar 2025 10:58:06 -0400 Subject: [PATCH 11/15] test --- .../openai_agents/openai_agents_test.py | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/tests/integrations/openai_agents/openai_agents_test.py b/tests/integrations/openai_agents/openai_agents_test.py index d08f70856f0a..a4570e1b795c 100644 --- a/tests/integrations/openai_agents/openai_agents_test.py +++ b/tests/integrations/openai_agents/openai_agents_test.py @@ -3,7 +3,6 @@ from agents import Agent, GuardrailFunctionOutput, InputGuardrail, Runner from pydantic import BaseModel -from weave.trace.object_record import ObjectRecord from weave.trace.weave_client import WeaveClient # TODO: Responses should be updated once we have patching for the new Responses API @@ -44,8 +43,7 @@ def test_openai_agents_quickstart(client: WeaveClient) -> None: } ] - val = response_call.output["output"][0]._val - assert isinstance(val, ObjectRecord) + val = response_call.output["output"][0] assert val.role == "assistant" assert val.type == "message" assert val.status == "completed" @@ -159,8 +157,7 @@ async def homework_guardrail(ctx, agent, input_data): ) assert call4.inputs["input"][0]["role"] == "user" - val4 = call4.output["output"][0]._val - assert isinstance(val4, ObjectRecord) + val4 = call4.output["output"][0] assert val4.name == "transfer_to_history_tutor" assert val4.type == "function_call" assert val4.status == "completed" @@ -183,8 +180,7 @@ async def homework_guardrail(ctx, agent, input_data): ) assert call6.inputs["input"][0]["role"] == "user" - val6 = call6.output["output"][0]._val - assert isinstance(val6, ObjectRecord) + val6 = call6.output["output"][0] assert val6.role == "assistant" assert val6.type == "message" assert val6.status == "completed" @@ -211,8 +207,7 @@ async def homework_guardrail(ctx, agent, input_data): assert call8.inputs["input"][1]["type"] == "function_call" assert call8.inputs["input"][1]["status"] == "completed" - val8 = call8.output["output"][0]._val - assert isinstance(val8, ObjectRecord) + val8 = call8.output["output"][0] assert val8.role == "assistant" assert val8.type == "message" assert val8.status == "completed" @@ -259,8 +254,7 @@ async def homework_guardrail(ctx, agent, input_data): assert call13.inputs["input"][0]["content"] == "what is life" assert call13.inputs["input"][0]["role"] == "user" - val13 = call13.output["output"][0]._val - assert isinstance(val13, ObjectRecord) + val13 = call13.output["output"][0] assert val13.role == "assistant" assert val13.type == "message" assert val13.status == "completed" From 05dfde8acb5aac4046a53d3ecc3735fcabb6023c Mon Sep 17 00:00:00 2001 From: andrewtruong Date: Tue, 18 Mar 2025 15:47:39 -0400 Subject: [PATCH 12/15] test --- tests/integrations/openai_agents/openai_agents_test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/integrations/openai_agents/openai_agents_test.py b/tests/integrations/openai_agents/openai_agents_test.py index a4570e1b795c..e3d1609ac582 100644 --- a/tests/integrations/openai_agents/openai_agents_test.py +++ b/tests/integrations/openai_agents/openai_agents_test.py @@ -53,7 +53,6 @@ def test_openai_agents_quickstart(client: WeaveClient) -> None: ) -# @pytest.mark.skip(reason="Not clear why VCR fails for this test") @pytest.mark.skip_clickhouse_client @pytest.mark.vcr( filter_headers=["authorization"], From 7ae662939858d7e49eccd6c1666cf72ca48447b3 Mon Sep 17 00:00:00 2001 From: andrewtruong Date: Tue, 18 Mar 2025 17:33:57 -0400 Subject: [PATCH 13/15] test --- noxfile.py | 2 +- tests/integrations/openai_agents/openai_agents_test.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/noxfile.py b/noxfile.py index b77af2e033bb..b3b0b37e350d 100644 --- a/noxfile.py +++ b/noxfile.py @@ -94,7 +94,7 @@ def tests(session, shard): env["OPENAI_API_KEY"] = session.env.get("OPENAI_API_KEY") if shard == "openai_agents": - env["OPENAI_API_KEY"] = session.env.get("OPENAI_API_KEY") + env["OPENAI_API_KEY"] = session.env.get("OPENAI_API_KEY", "DUMMY_API_KEY") default_test_dirs = [f"integrations/{shard}/"] test_dirs_dict = { diff --git a/tests/integrations/openai_agents/openai_agents_test.py b/tests/integrations/openai_agents/openai_agents_test.py index e3d1609ac582..2e4ce11ca3ed 100644 --- a/tests/integrations/openai_agents/openai_agents_test.py +++ b/tests/integrations/openai_agents/openai_agents_test.py @@ -53,6 +53,7 @@ def test_openai_agents_quickstart(client: WeaveClient) -> None: ) +@pytest.mark.skip(reason="This test works, but the order of requests to OpenAI can be mixed up (by the Agent framework). This causes the test to fail more than reasonable in CI.") @pytest.mark.skip_clickhouse_client @pytest.mark.vcr( filter_headers=["authorization"], From 4b1c850be4f320389d7c3f6f530503e5d2d23052 Mon Sep 17 00:00:00 2001 From: andrewtruong Date: Wed, 19 Mar 2025 15:07:29 -0400 Subject: [PATCH 14/15] test --- noxfile.py | 18 +++++++++------- .../openai_agents/openai_agents_test.py | 21 ++++++++++++++++--- 2 files changed, 28 insertions(+), 11 deletions(-) diff --git a/noxfile.py b/noxfile.py index b3b0b37e350d..0ddde665573e 100644 --- a/noxfile.py +++ b/noxfile.py @@ -1,3 +1,5 @@ +import os + import nox nox.options.default_venv_backend = "uv" @@ -78,23 +80,23 @@ def tests(session, shard): } # Add the GOOGLE_API_KEY environment variable for the "google" shard if shard == "google_ai_studio": - env["GOOGLE_API_KEY"] = session.env.get("GOOGLE_API_KEY") + env["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY", "MISSING") # Add the NVIDIA_API_KEY environment variable for the "langchain_nvidia_ai_endpoints" shard if shard == "langchain_nvidia_ai_endpoints": - env["NVIDIA_API_KEY"] = session.env.get("NVIDIA_API_KEY") + env["NVIDIA_API_KEY"] = os.getenv("NVIDIA_API_KEY", "MISSING") # we are doing some integration test in test_llm_integrations.py that requires # setting some environment variables for the LLM providers if shard == "scorers": - env["GOOGLE_API_KEY"] = session.env.get("GOOGLE_API_KEY") - env["GEMINI_API_KEY"] = session.env.get("GEMINI_API_KEY") - env["ANTHROPIC_API_KEY"] = session.env.get("ANTHROPIC_API_KEY") - env["MISTRAL_API_KEY"] = session.env.get("MISTRAL_API_KEY") - env["OPENAI_API_KEY"] = session.env.get("OPENAI_API_KEY") + env["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY", "MISSING") + env["GEMINI_API_KEY"] = os.getenv("GEMINI_API_KEY", "MISSING") + env["ANTHROPIC_API_KEY"] = os.getenv("ANTHROPIC_API_KEY", "MISSING") + env["MISTRAL_API_KEY"] = os.getenv("MISTRAL_API_KEY", "MISSING") + env["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY", "MISSING") if shard == "openai_agents": - env["OPENAI_API_KEY"] = session.env.get("OPENAI_API_KEY", "DUMMY_API_KEY") + env["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY", "MISSING") default_test_dirs = [f"integrations/{shard}/"] test_dirs_dict = { diff --git a/tests/integrations/openai_agents/openai_agents_test.py b/tests/integrations/openai_agents/openai_agents_test.py index 2e4ce11ca3ed..b2bca56ba2f8 100644 --- a/tests/integrations/openai_agents/openai_agents_test.py +++ b/tests/integrations/openai_agents/openai_agents_test.py @@ -3,17 +3,28 @@ from agents import Agent, GuardrailFunctionOutput, InputGuardrail, Runner from pydantic import BaseModel +from weave.integrations.openai_agents.openai_agents import WeaveTracingProcessor from weave.trace.weave_client import WeaveClient # TODO: Responses should be updated once we have patching for the new Responses API +@pytest.fixture +def setup_tests(): + # This is required because OpenAI by default adds its own trace processor which causes issues in the test. + # We can't just add our trace processor with autopatching because it wont remove the OpenAI trace processor. + # Instead, we manually set the trace processors to just be ours. This simplifies testing. + # However, by default the autopatching keeps the default OpenAI trace processor, and additionally installs the Weave processor. + + agents.set_trace_processors([WeaveTracingProcessor()]) + + @pytest.mark.skip_clickhouse_client @pytest.mark.vcr( filter_headers=["authorization"], allowed_hosts=["api.wandb.ai", "localhost"], ) -def test_openai_agents_quickstart(client: WeaveClient) -> None: +def test_openai_agents_quickstart(client: WeaveClient, setup_tests) -> None: agent = Agent(name="Assistant", instructions="You are a helpful assistant") result = Runner.run_sync(agent, "Write a haiku about recursion in programming.") @@ -53,14 +64,18 @@ def test_openai_agents_quickstart(client: WeaveClient) -> None: ) -@pytest.mark.skip(reason="This test works, but the order of requests to OpenAI can be mixed up (by the Agent framework). This causes the test to fail more than reasonable in CI.") +@pytest.mark.skip( + reason="This test works, but the order of requests to OpenAI can be mixed up (by the Agent framework). This causes the test to fail more than reasonable in CI." +) @pytest.mark.skip_clickhouse_client @pytest.mark.vcr( filter_headers=["authorization"], allowed_hosts=["api.wandb.ai", "localhost"], ) @pytest.mark.asyncio -async def test_openai_agents_quickstart_homework(client: WeaveClient) -> None: +async def test_openai_agents_quickstart_homework( + client: WeaveClient, setup_tests +) -> None: class HomeworkOutput(BaseModel): is_homework: bool reasoning: str From 8c9289a888b18bb8659a0399ac6ed333ddaaedc7 Mon Sep 17 00:00:00 2001 From: andrewtruong Date: Wed, 19 Mar 2025 23:49:46 -0400 Subject: [PATCH 15/15] test --- tests/integrations/openai_agents/test_autopatch.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 tests/integrations/openai_agents/test_autopatch.py diff --git a/tests/integrations/openai_agents/test_autopatch.py b/tests/integrations/openai_agents/test_autopatch.py deleted file mode 100644 index e69de29bb2d1..000000000000