Skip to content

Commit c3d1bed

Browse files
authoredMar 20, 2025··
feat(weave): OpenAI Agents SDK integration (#3882)
1 parent 7a6d583 commit c3d1bed

File tree

8 files changed

+1343
-7
lines changed

8 files changed

+1343
-7
lines changed
 

‎noxfile.py

+13-7
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import os
2+
13
import nox
24

35
nox.options.default_venv_backend = "uv"
@@ -50,6 +52,7 @@ def lint(session):
5052
"mistral1",
5153
"notdiamond",
5254
"openai",
55+
"openai_agents",
5356
"vertexai",
5457
"bedrock",
5558
"scorers",
@@ -77,20 +80,23 @@ def tests(session, shard):
7780
}
7881
# Add the GOOGLE_API_KEY environment variable for the "google" shard
7982
if shard == "google_ai_studio":
80-
env["GOOGLE_API_KEY"] = session.env.get("GOOGLE_API_KEY")
83+
env["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY", "MISSING")
8184

8285
# Add the NVIDIA_API_KEY environment variable for the "langchain_nvidia_ai_endpoints" shard
8386
if shard == "langchain_nvidia_ai_endpoints":
84-
env["NVIDIA_API_KEY"] = session.env.get("NVIDIA_API_KEY")
87+
env["NVIDIA_API_KEY"] = os.getenv("NVIDIA_API_KEY", "MISSING")
8588

8689
# we are doing some integration test in test_llm_integrations.py that requires
8790
# setting some environment variables for the LLM providers
8891
if shard == "scorers":
89-
env["GOOGLE_API_KEY"] = session.env.get("GOOGLE_API_KEY")
90-
env["GEMINI_API_KEY"] = session.env.get("GEMINI_API_KEY")
91-
env["ANTHROPIC_API_KEY"] = session.env.get("ANTHROPIC_API_KEY")
92-
env["MISTRAL_API_KEY"] = session.env.get("MISTRAL_API_KEY")
93-
env["OPENAI_API_KEY"] = session.env.get("OPENAI_API_KEY")
92+
env["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY", "MISSING")
93+
env["GEMINI_API_KEY"] = os.getenv("GEMINI_API_KEY", "MISSING")
94+
env["ANTHROPIC_API_KEY"] = os.getenv("ANTHROPIC_API_KEY", "MISSING")
95+
env["MISTRAL_API_KEY"] = os.getenv("MISTRAL_API_KEY", "MISSING")
96+
env["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY", "MISSING")
97+
98+
if shard == "openai_agents":
99+
env["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY", "MISSING")
94100

95101
default_test_dirs = [f"integrations/{shard}/"]
96102
test_dirs_dict = {

‎pyproject.toml

+1
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ scorers = [
108108
]
109109
notdiamond = ["notdiamond>=0.3.21", "litellm<=1.49.1"]
110110
openai = ["openai>=1.0.0"]
111+
openai_agents = ["openai-agents>=0.0.4"]
111112
pandas-test = ["pandas>=2.2.3"]
112113
presidio = ["presidio-analyzer==2.2.357", "presidio-anonymizer==2.2.357"]
113114
modal = ["modal", "python-dotenv"]

‎tests/integrations/openai_agents/__init__.py

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
interactions:
2+
- request:
3+
body: '{"input":[{"content":"Write a haiku about recursion in programming.","role":"user"}],"model":"gpt-4o","include":[],"instructions":"You
4+
are a helpful assistant","stream":false,"tools":[]}'
5+
headers:
6+
accept:
7+
- application/json
8+
accept-encoding:
9+
- gzip, deflate, zstd
10+
connection:
11+
- keep-alive
12+
content-length:
13+
- '186'
14+
content-type:
15+
- application/json
16+
host:
17+
- api.openai.com
18+
user-agent:
19+
- Agents/Python 0.0.0
20+
x-stainless-arch:
21+
- arm64
22+
x-stainless-async:
23+
- async:asyncio
24+
x-stainless-lang:
25+
- python
26+
x-stainless-os:
27+
- MacOS
28+
x-stainless-package-version:
29+
- 1.66.3
30+
x-stainless-read-timeout:
31+
- '600'
32+
x-stainless-retry-count:
33+
- '0'
34+
x-stainless-runtime:
35+
- CPython
36+
x-stainless-runtime-version:
37+
- 3.12.9
38+
method: POST
39+
uri: https://api.openai.com/v1/responses
40+
response:
41+
body:
42+
string: !!binary |
43+
H4sIAAAAAAAAA3RUwW7bMAy95ysInZvCdhzbyXWn3XYdusJgbDpVJ4uGRA0tivz7YNlx4q29BBGf
44+
3iP5KPpjA6B0q46gHPmhLsq2orZLduk+r9JDVp3yKj/sT1le7rNsVyZlUuT7MjkUHVGbkXoYBfj0
45+
So1cRdj6Od44QqG2xhFLyzzLisNhV0TMC0rwI6fhfjAk1E4kco6dOoINxsSAttcbdUuC2vg16sWF
46+
RjTbqPaTA6AjQHghM3TBAHqvvaCVSb/Ht5qDDEFq4d9kV2o9t2RGmfMg25y3WZLl26TaJsXcaSSq
47+
IzxtAAA+4i+AkveBRlpP3uN5aj8Ck7e9Py/W4h6r0dpTmaRpkVHRpdV+t6NPrY0aX1kVQccmZl53
48+
GaGGrZC9VXtf8arqqx30dmNPF8bIEdQ3bgkaNMaDMGjxZLoHgF/2u+201UJgmAcP2oLXhmxDEf2B
49+
IuSsB+rJnQkaQ+ge1ynQWha8ju/pecEu878pMp6e4wgGdGgMmVqYTR2LUkcQF2iCHf3RHHx9fYp1
50+
HMEyYUfo2Wp7VsfZDEVdx07uLgGoM1lyKFT70Pfo3md0A3CZny87us8r1A8jI8Rw+pjM0WjgnKhj
51+
1+PtfDeBeG9pdEox9ffCupkGHITVAkxmzcehHu5zumCb6OhIa7XHk7kuV4ivcylI29Ua5LuH/+N3
52+
S7eU3WDzQu2NmEylz+x/tystPwM+012G85W0sKC5gUWyuBU8rb4aPQm2KDjKXzaXvwAAAP//AwBA
53+
W3tF6QQAAA==
54+
headers:
55+
CF-RAY:
56+
- 9221c6bccef336b3-YYZ
57+
Connection:
58+
- keep-alive
59+
Content-Encoding:
60+
- gzip
61+
Content-Type:
62+
- application/json
63+
Date:
64+
- Tue, 18 Mar 2025 03:52:16 GMT
65+
Server:
66+
- cloudflare
67+
Set-Cookie:
68+
- __cf_bm=Tqnkbc7A4PgF5SM.gzWaijlDAAnDOk9Zieo6zRFmpd4-1742269936-1.0.1.1-9U42TycTKwZ6d4ISt9Qix_e2j6Latc3s8rPzobIIvSxaXqbrj.2XcpXF2GDI88LQyM0sRkP63Fw5_2JqEPAcJqt3OQPH5IESe3_.Iu0IofY;
69+
path=/; expires=Tue, 18-Mar-25 04:22:16 GMT; domain=.api.openai.com; HttpOnly;
70+
Secure; SameSite=None
71+
- _cfuvid=KqFFy64fH2k1v0MoM0hYELHA_M66ltXaaJFWGlP2Kng-1742269936949-0.0.1.1-604800000;
72+
path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
73+
Transfer-Encoding:
74+
- chunked
75+
X-Content-Type-Options:
76+
- nosniff
77+
alt-svc:
78+
- h3=":443"; ma=86400
79+
cf-cache-status:
80+
- DYNAMIC
81+
openai-organization:
82+
- wandb
83+
openai-processing-ms:
84+
- '728'
85+
openai-version:
86+
- '2020-10-01'
87+
strict-transport-security:
88+
- max-age=31536000; includeSubDomains; preload
89+
x-request-id:
90+
- req_f9ec95d382e76a8e0ef8eb42aa207c3b
91+
status:
92+
code: 200
93+
message: OK
94+
version: 1

‎tests/integrations/openai_agents/cassettes/openai_agents_test/test_openai_agents_quickstart_homework.yaml

+448
Large diffs are not rendered by default.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,275 @@
1+
import agents
2+
import pytest
3+
from agents import Agent, GuardrailFunctionOutput, InputGuardrail, Runner
4+
from pydantic import BaseModel
5+
6+
from weave.integrations.openai_agents.openai_agents import WeaveTracingProcessor
7+
from weave.trace.weave_client import WeaveClient
8+
9+
# TODO: Responses should be updated once we have patching for the new Responses API
10+
11+
12+
@pytest.fixture
13+
def setup_tests():
14+
# This is required because OpenAI by default adds its own trace processor which causes issues in the test.
15+
# We can't just add our trace processor with autopatching because it wont remove the OpenAI trace processor.
16+
# Instead, we manually set the trace processors to just be ours. This simplifies testing.
17+
# However, by default the autopatching keeps the default OpenAI trace processor, and additionally installs the Weave processor.
18+
19+
agents.set_trace_processors([WeaveTracingProcessor()])
20+
21+
22+
@pytest.mark.skip_clickhouse_client
23+
@pytest.mark.vcr(
24+
filter_headers=["authorization"],
25+
allowed_hosts=["api.wandb.ai", "localhost"],
26+
)
27+
def test_openai_agents_quickstart(client: WeaveClient, setup_tests) -> None:
28+
agent = Agent(name="Assistant", instructions="You are a helpful assistant")
29+
30+
result = Runner.run_sync(agent, "Write a haiku about recursion in programming.")
31+
calls = client.get_calls()
32+
33+
assert len(calls) == 3
34+
35+
trace_root = calls[0]
36+
trace_root.inputs["name"] = "Agent workflow"
37+
trace_root.output["status"] = "completed"
38+
trace_root.output["metrics"] = {}
39+
trace_root.output["metadata"] = {}
40+
41+
agent_call = calls[1]
42+
agent_call.inputs["name"] = "Assistant"
43+
agent_call.output["output"] = None
44+
agent_call.output["metrics"] = {}
45+
agent_call.output["metadata"] = {"tools": [], "handoffs": [], "output_type": "str"}
46+
agent_call.output["error"] = None
47+
48+
response_call = calls[2]
49+
response_call.inputs["name"] = "Response"
50+
response_call.inputs["input"] = [
51+
{
52+
"content": "Write a haiku about recursion in programming.",
53+
"role": "user",
54+
}
55+
]
56+
57+
val = response_call.output["output"][0]
58+
assert val.role == "assistant"
59+
assert val.type == "message"
60+
assert val.status == "completed"
61+
assert (
62+
val.content[0].text
63+
== "Code calls to itself, \nInfinite loops in silence, \nPatterns emerge clear."
64+
)
65+
66+
67+
@pytest.mark.skip(
68+
reason="This test works, but the order of requests to OpenAI can be mixed up (by the Agent framework). This causes the test to fail more than reasonable in CI."
69+
)
70+
@pytest.mark.skip_clickhouse_client
71+
@pytest.mark.vcr(
72+
filter_headers=["authorization"],
73+
allowed_hosts=["api.wandb.ai", "localhost"],
74+
)
75+
@pytest.mark.asyncio
76+
async def test_openai_agents_quickstart_homework(
77+
client: WeaveClient, setup_tests
78+
) -> None:
79+
class HomeworkOutput(BaseModel):
80+
is_homework: bool
81+
reasoning: str
82+
83+
guardrail_agent = Agent(
84+
name="Guardrail check",
85+
instructions="Check if the user is asking about homework.",
86+
output_type=HomeworkOutput,
87+
)
88+
89+
math_tutor_agent = Agent(
90+
name="Math Tutor",
91+
handoff_description="Specialist agent for math questions",
92+
instructions="You provide help with math problems. Explain your reasoning at each step and include examples",
93+
)
94+
95+
history_tutor_agent = Agent(
96+
name="History Tutor",
97+
handoff_description="Specialist agent for historical questions",
98+
instructions="You provide assistance with historical queries. Explain important events and context clearly.",
99+
)
100+
101+
async def homework_guardrail(ctx, agent, input_data):
102+
result = await Runner.run(guardrail_agent, input_data, context=ctx.context)
103+
final_output = result.final_output_as(HomeworkOutput)
104+
return GuardrailFunctionOutput(
105+
output_info=final_output,
106+
tripwire_triggered=not final_output.is_homework,
107+
)
108+
109+
triage_agent = Agent(
110+
name="Triage Agent",
111+
instructions="You determine which agent to use based on the user's homework question",
112+
handoffs=[history_tutor_agent, math_tutor_agent],
113+
input_guardrails=[
114+
InputGuardrail(guardrail_function=homework_guardrail),
115+
],
116+
)
117+
118+
result = await Runner.run(
119+
triage_agent, "who was the first president of the united states?"
120+
)
121+
with pytest.raises(agents.exceptions.InputGuardrailTripwireTriggered):
122+
result = await Runner.run(triage_agent, "what is life")
123+
124+
#####################
125+
### Result1 Block ###
126+
#####################
127+
128+
calls = client.get_calls()
129+
assert len(calls) == 14
130+
131+
# ====================
132+
call0 = calls[0]
133+
assert call0.inputs["name"] == "Agent workflow"
134+
assert call0.output["status"] == "completed"
135+
assert call0.output["metrics"] == {}
136+
assert call0.output["metadata"] == {}
137+
138+
# ====================
139+
call1 = calls[1]
140+
assert call1.inputs["name"] == "Triage Agent"
141+
assert call1.output["output"] is None
142+
assert call1.output["metrics"] == {}
143+
assert call1.output["metadata"]["tools"] == []
144+
assert call1.output["metadata"]["handoffs"] == ["History Tutor", "Math Tutor"]
145+
assert call1.output["metadata"]["output_type"] == "str"
146+
assert call1.output["error"] is None
147+
148+
# ====================
149+
call2 = calls[2]
150+
assert call2.inputs["name"] == "homework_guardrail"
151+
assert call2.output["output"] is None
152+
assert call2.output["metrics"] == {}
153+
assert call2.output["metadata"] == {"triggered": False}
154+
assert call2.output["error"] is None
155+
156+
# ====================
157+
call3 = calls[3]
158+
assert call3.inputs["name"] == "Guardrail check"
159+
assert call3.output["output"] is None
160+
assert call3.output["metrics"] == {}
161+
assert call3.output["metadata"]["tools"] == []
162+
assert call3.output["metadata"]["handoffs"] == []
163+
assert call3.output["metadata"]["output_type"] == "HomeworkOutput"
164+
assert call3.output["error"] is None
165+
166+
# ====================
167+
call4 = calls[4]
168+
assert call4.inputs["name"] == "Response"
169+
assert (
170+
call4.inputs["input"][0]["content"]
171+
== "who was the first president of the united states?"
172+
)
173+
assert call4.inputs["input"][0]["role"] == "user"
174+
175+
val4 = call4.output["output"][0]
176+
assert val4.name == "transfer_to_history_tutor"
177+
assert val4.type == "function_call"
178+
assert val4.status == "completed"
179+
180+
# ====================
181+
call5 = calls[5]
182+
assert call5.inputs["name"] == "Handoff"
183+
assert call5.output["output"] is None
184+
assert call5.output["metrics"] == {}
185+
assert call5.output["metadata"]["from_agent"] == "Triage Agent"
186+
assert call5.output["metadata"]["to_agent"] == "History Tutor"
187+
assert call5.output["error"] is None
188+
189+
# ====================
190+
call6 = calls[6]
191+
assert call6.inputs["name"] == "Response"
192+
assert (
193+
call6.inputs["input"][0]["content"]
194+
== "who was the first president of the united states?"
195+
)
196+
assert call6.inputs["input"][0]["role"] == "user"
197+
198+
val6 = call6.output["output"][0]
199+
assert val6.role == "assistant"
200+
assert val6.type == "message"
201+
assert val6.status == "completed"
202+
203+
# ====================
204+
call7 = calls[7]
205+
assert call7.inputs["name"] == "History Tutor"
206+
assert call7.output["output"] is None
207+
assert call7.output["metrics"] == {}
208+
assert call7.output["metadata"]["tools"] == []
209+
assert call7.output["metadata"]["handoffs"] == []
210+
assert call7.output["metadata"]["output_type"] == "str"
211+
assert call7.output["error"] is None
212+
213+
# ====================
214+
call8 = calls[8]
215+
assert call8.inputs["name"] == "Response"
216+
assert (
217+
call8.inputs["input"][0]["content"]
218+
== "who was the first president of the united states?"
219+
)
220+
assert call8.inputs["input"][0]["role"] == "user"
221+
assert call8.inputs["input"][1]["name"] == "transfer_to_history_tutor"
222+
assert call8.inputs["input"][1]["type"] == "function_call"
223+
assert call8.inputs["input"][1]["status"] == "completed"
224+
225+
val8 = call8.output["output"][0]
226+
assert val8.role == "assistant"
227+
assert val8.type == "message"
228+
assert val8.status == "completed"
229+
230+
#####################
231+
### Result2 Block ###
232+
#####################
233+
234+
call9 = calls[9]
235+
assert call9.inputs["name"] == "Agent workflow"
236+
assert call9.output["status"] == "completed"
237+
assert call9.output["metrics"] == {}
238+
assert call9.output["metadata"] == {}
239+
240+
# ====================
241+
call10 = calls[10]
242+
assert call10.inputs["name"] == "Triage Agent"
243+
assert call10.output["output"] is None
244+
assert call10.output["metrics"] == {}
245+
assert call10.output["metadata"]["tools"] == []
246+
assert call10.output["metadata"]["handoffs"] == ["History Tutor", "Math Tutor"]
247+
assert call10.output["metadata"]["output_type"] == "str"
248+
249+
# ====================
250+
call11 = calls[11]
251+
assert call11.inputs["name"] == "homework_guardrail"
252+
assert call11.output["output"] is None
253+
assert call11.output["metrics"] == {}
254+
assert call11.output["metadata"]["triggered"] is True
255+
assert call11.output["error"] is None
256+
257+
# ====================
258+
call12 = calls[12]
259+
assert call12.inputs["name"] == "Guardrail check"
260+
assert call12.output["output"] is None
261+
assert call12.output["metrics"] == {}
262+
assert call12.output["metadata"]["tools"] == []
263+
assert call12.output["metadata"]["handoffs"] == []
264+
assert call12.output["metadata"]["output_type"] == "HomeworkOutput"
265+
266+
# ====================
267+
call13 = calls[13]
268+
assert call13.inputs["name"] == "Response"
269+
assert call13.inputs["input"][0]["content"] == "what is life"
270+
assert call13.inputs["input"][0]["role"] == "user"
271+
272+
val13 = call13.output["output"][0]
273+
assert val13.role == "assistant"
274+
assert val13.type == "message"
275+
assert val13.status == "completed"

‎weave/integrations/openai_agents/openai_agents.py

+507
Large diffs are not rendered by default.

‎weave/trace/autopatch.py

+5
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ class AutopatchSettings(BaseModel):
4747
mistral: IntegrationSettings = Field(default_factory=IntegrationSettings)
4848
notdiamond: IntegrationSettings = Field(default_factory=IntegrationSettings)
4949
openai: IntegrationSettings = Field(default_factory=IntegrationSettings)
50+
openai_agents: IntegrationSettings = Field(default_factory=IntegrationSettings)
5051
vertexai: IntegrationSettings = Field(default_factory=IntegrationSettings)
5152
chatnvidia: IntegrationSettings = Field(default_factory=IntegrationSettings)
5253

@@ -79,6 +80,7 @@ def autopatch(settings: Optional[AutopatchSettings] = None) -> None:
7980
from weave.integrations.mistral import get_mistral_patcher
8081
from weave.integrations.notdiamond.tracing import get_notdiamond_patcher
8182
from weave.integrations.openai.openai_sdk import get_openai_patcher
83+
from weave.integrations.openai_agents.openai_agents import get_openai_agents_patcher
8284
from weave.integrations.vertexai.vertexai_sdk import get_vertexai_patcher
8385

8486
get_openai_patcher(settings.openai).attempt_patch()
@@ -95,6 +97,7 @@ def autopatch(settings: Optional[AutopatchSettings] = None) -> None:
9597
get_vertexai_patcher(settings.vertexai).attempt_patch()
9698
get_nvidia_ai_patcher(settings.chatnvidia).attempt_patch()
9799
get_huggingface_patcher(settings.huggingface).attempt_patch()
100+
get_openai_agents_patcher(settings.openai_agents).attempt_patch()
98101

99102
llamaindex_patcher.attempt_patch()
100103
langchain_patcher.attempt_patch()
@@ -122,6 +125,7 @@ def reset_autopatch() -> None:
122125
from weave.integrations.mistral import get_mistral_patcher
123126
from weave.integrations.notdiamond.tracing import get_notdiamond_patcher
124127
from weave.integrations.openai.openai_sdk import get_openai_patcher
128+
from weave.integrations.openai_agents.openai_agents import get_openai_agents_patcher
125129
from weave.integrations.vertexai.vertexai_sdk import get_vertexai_patcher
126130

127131
get_openai_patcher().undo_patch()
@@ -138,6 +142,7 @@ def reset_autopatch() -> None:
138142
get_vertexai_patcher().undo_patch()
139143
get_nvidia_ai_patcher().undo_patch()
140144
get_huggingface_patcher().undo_patch()
145+
get_openai_agents_patcher().undo_patch()
141146

142147
llamaindex_patcher.undo_patch()
143148
langchain_patcher.undo_patch()

0 commit comments

Comments
 (0)
Please sign in to comment.