wandb · Mar 20, 2025
diff --git a/‎noxfile.py
+13-7 b/‎noxfile.py
+13-7
diff --git a/‎pyproject.toml
+1 b/‎pyproject.toml
+1
diff --git a/‎tests/integrations/openai_agents/__init__.py b/‎tests/integrations/openai_agents/__init__.py
diff --git a/‎tests/integrations/openai_agents/cassettes/openai_agents_test/test_openai_agents_quickstart.yaml
+94 b/‎tests/integrations/openai_agents/cassettes/openai_agents_test/test_openai_agents_quickstart.yaml
+94
diff --git a/‎tests/integrations/openai_agents/cassettes/openai_agents_test/test_openai_agents_quickstart_homework.yaml
+448 b/‎tests/integrations/openai_agents/cassettes/openai_agents_test/test_openai_agents_quickstart_homework.yaml
+448
diff --git a/‎tests/integrations/openai_agents/openai_agents_test.py
+275 b/‎tests/integrations/openai_agents/openai_agents_test.py
+275
diff --git a/‎weave/integrations/openai_agents/openai_agents.py
+507 b/‎weave/integrations/openai_agents/openai_agents.py
+507
diff --git a/‎weave/trace/autopatch.py
+5 b/‎weave/trace/autopatch.py
+5
@@ -1,3 +1,5 @@
+import os
+
 import nox
 
 nox.options.default_venv_backend = "uv"
@@ -50,6 +52,7 @@ def lint(session):
         "mistral1",
         "notdiamond",
         "openai",
+        "openai_agents",
         "vertexai",
         "bedrock",
         "scorers",
@@ -77,20 +80,23 @@ def tests(session, shard):
     }
     # Add the GOOGLE_API_KEY environment variable for the "google" shard
     if shard == "google_ai_studio":
-        env["GOOGLE_API_KEY"] = session.env.get("GOOGLE_API_KEY")
+        env["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY", "MISSING")
 
     # Add the NVIDIA_API_KEY environment variable for the "langchain_nvidia_ai_endpoints" shard
     if shard == "langchain_nvidia_ai_endpoints":
-        env["NVIDIA_API_KEY"] = session.env.get("NVIDIA_API_KEY")
+        env["NVIDIA_API_KEY"] = os.getenv("NVIDIA_API_KEY", "MISSING")
 
     # we are doing some integration test in test_llm_integrations.py that requires
     # setting some environment variables for the LLM providers
     if shard == "scorers":
-        env["GOOGLE_API_KEY"] = session.env.get("GOOGLE_API_KEY")
-        env["GEMINI_API_KEY"] = session.env.get("GEMINI_API_KEY")
-        env["ANTHROPIC_API_KEY"] = session.env.get("ANTHROPIC_API_KEY")
-        env["MISTRAL_API_KEY"] = session.env.get("MISTRAL_API_KEY")
-        env["OPENAI_API_KEY"] = session.env.get("OPENAI_API_KEY")
+        env["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY", "MISSING")
+        env["GEMINI_API_KEY"] = os.getenv("GEMINI_API_KEY", "MISSING")
+        env["ANTHROPIC_API_KEY"] = os.getenv("ANTHROPIC_API_KEY", "MISSING")
+        env["MISTRAL_API_KEY"] = os.getenv("MISTRAL_API_KEY", "MISSING")
+        env["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY", "MISSING")
+
+    if shard == "openai_agents":
+        env["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY", "MISSING")
 
     default_test_dirs = [f"integrations/{shard}/"]
     test_dirs_dict = {
 
@@ -108,6 +108,7 @@ scorers = [
 ]
 notdiamond = ["notdiamond>=0.3.21", "litellm<=1.49.1"]
 openai = ["openai>=1.0.0"]
+openai_agents = ["openai-agents>=0.0.4"]
 pandas-test = ["pandas>=2.2.3"]
 presidio = ["presidio-analyzer==2.2.357", "presidio-anonymizer==2.2.357"]
 modal = ["modal", "python-dotenv"]
 
@@ -0,0 +1,94 @@
+interactions:
+- request:
+    body: '{"input":[{"content":"Write a haiku about recursion in programming.","role":"user"}],"model":"gpt-4o","include":[],"instructions":"You
+      are a helpful assistant","stream":false,"tools":[]}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate, zstd
+      connection:
+      - keep-alive
+      content-length:
+      - '186'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      user-agent:
+      - Agents/Python 0.0.0
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - async:asyncio
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.66.3
+      x-stainless-read-timeout:
+      - '600'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.12.9
+    method: POST
+    uri: https://api.openai.com/v1/responses
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAA3RUwW7bMAy95ysInZvCdhzbyXWn3XYdusJgbDpVJ4uGRA0tivz7YNlx4q29BBGf
+        3iP5KPpjA6B0q46gHPmhLsq2orZLduk+r9JDVp3yKj/sT1le7rNsVyZlUuT7MjkUHVGbkXoYBfj0
+        So1cRdj6Od44QqG2xhFLyzzLisNhV0TMC0rwI6fhfjAk1E4kco6dOoINxsSAttcbdUuC2vg16sWF
+        RjTbqPaTA6AjQHghM3TBAHqvvaCVSb/Ht5qDDEFq4d9kV2o9t2RGmfMg25y3WZLl26TaJsXcaSSq
+        IzxtAAA+4i+AkveBRlpP3uN5aj8Ck7e9Py/W4h6r0dpTmaRpkVHRpdV+t6NPrY0aX1kVQccmZl53
+        GaGGrZC9VXtf8arqqx30dmNPF8bIEdQ3bgkaNMaDMGjxZLoHgF/2u+201UJgmAcP2oLXhmxDEf2B
+        IuSsB+rJnQkaQ+ge1ynQWha8ju/pecEu878pMp6e4wgGdGgMmVqYTR2LUkcQF2iCHf3RHHx9fYp1
+        HMEyYUfo2Wp7VsfZDEVdx07uLgGoM1lyKFT70Pfo3md0A3CZny87us8r1A8jI8Rw+pjM0WjgnKhj
+        1+PtfDeBeG9pdEox9ffCupkGHITVAkxmzcehHu5zumCb6OhIa7XHk7kuV4ivcylI29Ua5LuH/+N3
+        S7eU3WDzQu2NmEylz+x/tystPwM+012G85W0sKC5gUWyuBU8rb4aPQm2KDjKXzaXvwAAAP//AwBA
+        W3tF6QQAAA==
+    headers:
+      CF-RAY:
+      - 9221c6bccef336b3-YYZ
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Tue, 18 Mar 2025 03:52:16 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=Tqnkbc7A4PgF5SM.gzWaijlDAAnDOk9Zieo6zRFmpd4-1742269936-1.0.1.1-9U42TycTKwZ6d4ISt9Qix_e2j6Latc3s8rPzobIIvSxaXqbrj.2XcpXF2GDI88LQyM0sRkP63Fw5_2JqEPAcJqt3OQPH5IESe3_.Iu0IofY;
+        path=/; expires=Tue, 18-Mar-25 04:22:16 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=KqFFy64fH2k1v0MoM0hYELHA_M66ltXaaJFWGlP2Kng-1742269936949-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - wandb
+      openai-processing-ms:
+      - '728'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-request-id:
+      - req_f9ec95d382e76a8e0ef8eb42aa207c3b
+    status:
+      code: 200
+      message: OK
+version: 1
@@ -0,0 +1,275 @@
+import agents
+import pytest
+from agents import Agent, GuardrailFunctionOutput, InputGuardrail, Runner
+from pydantic import BaseModel
+
+from weave.integrations.openai_agents.openai_agents import WeaveTracingProcessor
+from weave.trace.weave_client import WeaveClient
+
+# TODO: Responses should be updated once we have patching for the new Responses API
+
+
+@pytest.fixture
+def setup_tests():
+    # This is required because OpenAI by default adds its own trace processor which causes issues in the test.
+    # We can't just add our trace processor with autopatching because it wont remove the OpenAI trace processor.
+    # Instead, we manually set the trace processors to just be ours.  This simplifies testing.
+    # However, by default the autopatching keeps the default OpenAI trace processor, and additionally installs the Weave processor.
+
+    agents.set_trace_processors([WeaveTracingProcessor()])
+
+
+@pytest.mark.skip_clickhouse_client
+@pytest.mark.vcr(
+    filter_headers=["authorization"],
+    allowed_hosts=["api.wandb.ai", "localhost"],
+)
+def test_openai_agents_quickstart(client: WeaveClient, setup_tests) -> None:
+    agent = Agent(name="Assistant", instructions="You are a helpful assistant")
+
+    result = Runner.run_sync(agent, "Write a haiku about recursion in programming.")
+    calls = client.get_calls()
+
+    assert len(calls) == 3
+
+    trace_root = calls[0]
+    trace_root.inputs["name"] = "Agent workflow"
+    trace_root.output["status"] = "completed"
+    trace_root.output["metrics"] = {}
+    trace_root.output["metadata"] = {}
+
+    agent_call = calls[1]
+    agent_call.inputs["name"] = "Assistant"
+    agent_call.output["output"] = None
+    agent_call.output["metrics"] = {}
+    agent_call.output["metadata"] = {"tools": [], "handoffs": [], "output_type": "str"}
+    agent_call.output["error"] = None
+
+    response_call = calls[2]
+    response_call.inputs["name"] = "Response"
+    response_call.inputs["input"] = [
+        {
+            "content": "Write a haiku about recursion in programming.",
+            "role": "user",
+        }
+    ]
+
+    val = response_call.output["output"][0]
+    assert val.role == "assistant"
+    assert val.type == "message"
+    assert val.status == "completed"
+    assert (
+        val.content[0].text
+        == "Code calls to itself,  \nInfinite loops in silence,  \nPatterns emerge clear."
+    )
+
+
+@pytest.mark.skip(
+    reason="This test works, but the order of requests to OpenAI can be mixed up (by the Agent framework).  This causes the test to fail more than reasonable in CI."
+)
+@pytest.mark.skip_clickhouse_client
+@pytest.mark.vcr(
+    filter_headers=["authorization"],
+    allowed_hosts=["api.wandb.ai", "localhost"],
+)
+@pytest.mark.asyncio
+async def test_openai_agents_quickstart_homework(
+    client: WeaveClient, setup_tests
+) -> None:
+    class HomeworkOutput(BaseModel):
+        is_homework: bool
+        reasoning: str
+
+    guardrail_agent = Agent(
+        name="Guardrail check",
+        instructions="Check if the user is asking about homework.",
+        output_type=HomeworkOutput,
+    )
+
+    math_tutor_agent = Agent(
+        name="Math Tutor",
+        handoff_description="Specialist agent for math questions",
+        instructions="You provide help with math problems. Explain your reasoning at each step and include examples",
+    )
+
+    history_tutor_agent = Agent(
+        name="History Tutor",
+        handoff_description="Specialist agent for historical questions",
+        instructions="You provide assistance with historical queries. Explain important events and context clearly.",
+    )
+
+    async def homework_guardrail(ctx, agent, input_data):
+        result = await Runner.run(guardrail_agent, input_data, context=ctx.context)
+        final_output = result.final_output_as(HomeworkOutput)
+        return GuardrailFunctionOutput(
+            output_info=final_output,
+            tripwire_triggered=not final_output.is_homework,
+        )
+
+    triage_agent = Agent(
+        name="Triage Agent",
+        instructions="You determine which agent to use based on the user's homework question",
+        handoffs=[history_tutor_agent, math_tutor_agent],
+        input_guardrails=[
+            InputGuardrail(guardrail_function=homework_guardrail),
+        ],
+    )
+
+    result = await Runner.run(
+        triage_agent, "who was the first president of the united states?"
+    )
+    with pytest.raises(agents.exceptions.InputGuardrailTripwireTriggered):
+        result = await Runner.run(triage_agent, "what is life")
+
+    #####################
+    ### Result1 Block ###
+    #####################
+
+    calls = client.get_calls()
+    assert len(calls) == 14
+
+    # ====================
+    call0 = calls[0]
+    assert call0.inputs["name"] == "Agent workflow"
+    assert call0.output["status"] == "completed"
+    assert call0.output["metrics"] == {}
+    assert call0.output["metadata"] == {}
+
+    # ====================
+    call1 = calls[1]
+    assert call1.inputs["name"] == "Triage Agent"
+    assert call1.output["output"] is None
+    assert call1.output["metrics"] == {}
+    assert call1.output["metadata"]["tools"] == []
+    assert call1.output["metadata"]["handoffs"] == ["History Tutor", "Math Tutor"]
+    assert call1.output["metadata"]["output_type"] == "str"
+    assert call1.output["error"] is None
+
+    # ====================
+    call2 = calls[2]
+    assert call2.inputs["name"] == "homework_guardrail"
+    assert call2.output["output"] is None
+    assert call2.output["metrics"] == {}
+    assert call2.output["metadata"] == {"triggered": False}
+    assert call2.output["error"] is None
+
+    # ====================
+    call3 = calls[3]
+    assert call3.inputs["name"] == "Guardrail check"
+    assert call3.output["output"] is None
+    assert call3.output["metrics"] == {}
+    assert call3.output["metadata"]["tools"] == []
+    assert call3.output["metadata"]["handoffs"] == []
+    assert call3.output["metadata"]["output_type"] == "HomeworkOutput"
+    assert call3.output["error"] is None
+
+    # ====================
+    call4 = calls[4]
+    assert call4.inputs["name"] == "Response"
+    assert (
+        call4.inputs["input"][0]["content"]
+        == "who was the first president of the united states?"
+    )
+    assert call4.inputs["input"][0]["role"] == "user"
+
+    val4 = call4.output["output"][0]
+    assert val4.name == "transfer_to_history_tutor"
+    assert val4.type == "function_call"
+    assert val4.status == "completed"
+
+    # ====================
+    call5 = calls[5]
+    assert call5.inputs["name"] == "Handoff"
+    assert call5.output["output"] is None
+    assert call5.output["metrics"] == {}
+    assert call5.output["metadata"]["from_agent"] == "Triage Agent"
+    assert call5.output["metadata"]["to_agent"] == "History Tutor"
+    assert call5.output["error"] is None
+
+    # ====================
+    call6 = calls[6]
+    assert call6.inputs["name"] == "Response"
+    assert (
+        call6.inputs["input"][0]["content"]
+        == "who was the first president of the united states?"
+    )
+    assert call6.inputs["input"][0]["role"] == "user"
+
+    val6 = call6.output["output"][0]
+    assert val6.role == "assistant"
+    assert val6.type == "message"
+    assert val6.status == "completed"
+
+    # ====================
+    call7 = calls[7]
+    assert call7.inputs["name"] == "History Tutor"
+    assert call7.output["output"] is None
+    assert call7.output["metrics"] == {}
+    assert call7.output["metadata"]["tools"] == []
+    assert call7.output["metadata"]["handoffs"] == []
+    assert call7.output["metadata"]["output_type"] == "str"
+    assert call7.output["error"] is None
+
+    # ====================
+    call8 = calls[8]
+    assert call8.inputs["name"] == "Response"
+    assert (
+        call8.inputs["input"][0]["content"]
+        == "who was the first president of the united states?"
+    )
+    assert call8.inputs["input"][0]["role"] == "user"
+    assert call8.inputs["input"][1]["name"] == "transfer_to_history_tutor"
+    assert call8.inputs["input"][1]["type"] == "function_call"
+    assert call8.inputs["input"][1]["status"] == "completed"
+
+    val8 = call8.output["output"][0]
+    assert val8.role == "assistant"
+    assert val8.type == "message"
+    assert val8.status == "completed"
+
+    #####################
+    ### Result2 Block ###
+    #####################
+
+    call9 = calls[9]
+    assert call9.inputs["name"] == "Agent workflow"
+    assert call9.output["status"] == "completed"
+    assert call9.output["metrics"] == {}
+    assert call9.output["metadata"] == {}
+
+    # ====================
+    call10 = calls[10]
+    assert call10.inputs["name"] == "Triage Agent"
+    assert call10.output["output"] is None
+    assert call10.output["metrics"] == {}
+    assert call10.output["metadata"]["tools"] == []
+    assert call10.output["metadata"]["handoffs"] == ["History Tutor", "Math Tutor"]
+    assert call10.output["metadata"]["output_type"] == "str"
+
+    # ====================
+    call11 = calls[11]
+    assert call11.inputs["name"] == "homework_guardrail"
+    assert call11.output["output"] is None
+    assert call11.output["metrics"] == {}
+    assert call11.output["metadata"]["triggered"] is True
+    assert call11.output["error"] is None
+
+    # ====================
+    call12 = calls[12]
+    assert call12.inputs["name"] == "Guardrail check"
+    assert call12.output["output"] is None
+    assert call12.output["metrics"] == {}
+    assert call12.output["metadata"]["tools"] == []
+    assert call12.output["metadata"]["handoffs"] == []
+    assert call12.output["metadata"]["output_type"] == "HomeworkOutput"
+
+    # ====================
+    call13 = calls[13]
+    assert call13.inputs["name"] == "Response"
+    assert call13.inputs["input"][0]["content"] == "what is life"
+    assert call13.inputs["input"][0]["role"] == "user"
+
+    val13 = call13.output["output"][0]
+    assert val13.role == "assistant"
+    assert val13.type == "message"
+    assert val13.status == "completed"
@@ -47,6 +47,7 @@ class AutopatchSettings(BaseModel):
     mistral: IntegrationSettings = Field(default_factory=IntegrationSettings)
     notdiamond: IntegrationSettings = Field(default_factory=IntegrationSettings)
     openai: IntegrationSettings = Field(default_factory=IntegrationSettings)
+    openai_agents: IntegrationSettings = Field(default_factory=IntegrationSettings)
     vertexai: IntegrationSettings = Field(default_factory=IntegrationSettings)
     chatnvidia: IntegrationSettings = Field(default_factory=IntegrationSettings)
 
@@ -79,6 +80,7 @@ def autopatch(settings: Optional[AutopatchSettings] = None) -> None:
     from weave.integrations.mistral import get_mistral_patcher
     from weave.integrations.notdiamond.tracing import get_notdiamond_patcher
     from weave.integrations.openai.openai_sdk import get_openai_patcher
+    from weave.integrations.openai_agents.openai_agents import get_openai_agents_patcher
     from weave.integrations.vertexai.vertexai_sdk import get_vertexai_patcher
 
     get_openai_patcher(settings.openai).attempt_patch()
@@ -95,6 +97,7 @@ def autopatch(settings: Optional[AutopatchSettings] = None) -> None:
     get_vertexai_patcher(settings.vertexai).attempt_patch()
     get_nvidia_ai_patcher(settings.chatnvidia).attempt_patch()
     get_huggingface_patcher(settings.huggingface).attempt_patch()
+    get_openai_agents_patcher(settings.openai_agents).attempt_patch()
 
     llamaindex_patcher.attempt_patch()
     langchain_patcher.attempt_patch()
@@ -122,6 +125,7 @@ def reset_autopatch() -> None:
     from weave.integrations.mistral import get_mistral_patcher
     from weave.integrations.notdiamond.tracing import get_notdiamond_patcher
     from weave.integrations.openai.openai_sdk import get_openai_patcher
+    from weave.integrations.openai_agents.openai_agents import get_openai_agents_patcher
     from weave.integrations.vertexai.vertexai_sdk import get_vertexai_patcher
 
     get_openai_patcher().undo_patch()
@@ -138,6 +142,7 @@ def reset_autopatch() -> None:
     get_vertexai_patcher().undo_patch()
     get_nvidia_ai_patcher().undo_patch()
     get_huggingface_patcher().undo_patch()
+    get_openai_agents_patcher().undo_patch()
 
     llamaindex_patcher.undo_patch()
     langchain_patcher.undo_patch()
Original file line number	Diff line number	Diff line change
`@@ -108,6 +108,7 @@ scorers = [`
`108`	`108`	`]`
`109`	`109`	`notdiamond = ["notdiamond>=0.3.21", "litellm<=1.49.1"]`
`110`	`110`	`openai = ["openai>=1.0.0"]`
	`111`	`+openai_agents = ["openai-agents>=0.0.4"]`
`111`	`112`	`pandas-test = ["pandas>=2.2.3"]`
`112`	`113`	`presidio = ["presidio-analyzer==2.2.357", "presidio-anonymizer==2.2.357"]`
`113`	`114`	`modal = ["modal", "python-dotenv"]`