Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 27 additions & 3 deletions src/agents/_run_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,8 @@ class SingleStepResult:
"""Items generated before the current step."""

new_step_items: list[RunItem]
"""Items generated during this current step."""
"""Items generated during this current step. May be filtered during handoffs to avoid
duplication in model input."""

next_step: NextStepHandoff | NextStepFinalOutput | NextStepRunAgain
"""The next step to take."""
Expand All @@ -254,11 +255,18 @@ class SingleStepResult:
tool_output_guardrail_results: list[ToolOutputGuardrailResult]
"""Tool output guardrail results from this step."""

session_step_items: list[RunItem] | None = None
"""Full unfiltered items for session history. When set, these are used instead of
new_step_items for session saving and generated_items property."""

@property
def generated_items(self) -> list[RunItem]:
"""Items generated during the agent run (i.e. everything generated after
`original_input`)."""
return self.pre_step_items + self.new_step_items
items = (
self.session_step_items if self.session_step_items is not None else self.new_step_items
)
return self.pre_step_items + items


def get_model_tracing_impl(
Expand Down Expand Up @@ -1285,6 +1293,12 @@ async def execute_handoffs(
)
pre_step_items = list(filtered.pre_handoff_items)
new_step_items = list(filtered.new_items)
# For custom input filters, use input_items if available, otherwise new_items
if filtered.input_items is not None:
session_step_items = list(filtered.new_items)
new_step_items = list(filtered.input_items)
else:
session_step_items = None
elif should_nest_history and handoff_input_data is not None:
nested = nest_handoff_history(
handoff_input_data,
Expand All @@ -1296,7 +1310,16 @@ async def execute_handoffs(
else list(nested.input_history)
)
pre_step_items = list(nested.pre_handoff_items)
new_step_items = list(nested.new_items)
# Keep full new_items for session history.
session_step_items = list(nested.new_items)
# Use input_items (filtered) for model input if available.
if nested.input_items is not None:
new_step_items = list(nested.input_items)
else:
new_step_items = session_step_items
else:
# No filtering or nesting - session_step_items not needed
session_step_items = None

return SingleStepResult(
original_input=original_input,
Expand All @@ -1306,6 +1329,7 @@ async def execute_handoffs(
next_step=NextStepHandoff(new_agent),
tool_input_guardrail_results=[],
tool_output_guardrail_results=[],
session_step_items=session_step_items,
)

@classmethod
Expand Down
7 changes: 7 additions & 0 deletions src/agents/handoffs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,13 @@ class HandoffInputData:
later on, it is optional for backwards compatibility.
"""

input_items: tuple[RunItem, ...] | None = None
"""
Items to include in the next agent's input. When set, these items are used instead of
new_items for building the input to the next agent. This allows filtering duplicates
from agent input while preserving all items in new_items for session history.
"""

def clone(self, **kwargs: Any) -> HandoffInputData:
"""
Make a copy of the handoff input data, with the given arguments changed. For example, you
Expand Down
58 changes: 45 additions & 13 deletions src/agents/handoffs/history.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,13 @@
_conversation_history_start = _DEFAULT_CONVERSATION_HISTORY_START
_conversation_history_end = _DEFAULT_CONVERSATION_HISTORY_END

# Item types that are summarized in the conversation history.
# They should not be forwarded verbatim to the next agent to avoid duplication.
_SUMMARY_ONLY_INPUT_TYPES = {
"function_call",
"function_call_output",
}


def set_conversation_history_wrappers(
*,
Expand Down Expand Up @@ -67,23 +74,34 @@ def nest_handoff_history(

normalized_history = _normalize_input_history(handoff_input_data.input_history)
flattened_history = _flatten_nested_history_messages(normalized_history)
pre_items_as_inputs = [
_run_item_to_plain_input(item) for item in handoff_input_data.pre_handoff_items
]
new_items_as_inputs = [_run_item_to_plain_input(item) for item in handoff_input_data.new_items]

# Convert items to plain inputs for the transcript summary.
pre_items_as_inputs: list[TResponseInputItem] = []
filtered_pre_items: list[RunItem] = []
for run_item in handoff_input_data.pre_handoff_items:
plain_input = _run_item_to_plain_input(run_item)
pre_items_as_inputs.append(plain_input)
if _should_forward_pre_item(plain_input):
filtered_pre_items.append(run_item)

new_items_as_inputs: list[TResponseInputItem] = []
filtered_input_items: list[RunItem] = []
for run_item in handoff_input_data.new_items:
plain_input = _run_item_to_plain_input(run_item)
new_items_as_inputs.append(plain_input)
if _should_forward_new_item(plain_input):
filtered_input_items.append(run_item)

transcript = flattened_history + pre_items_as_inputs + new_items_as_inputs

mapper = history_mapper or default_handoff_history_mapper
history_items = mapper(transcript)
filtered_pre_items = tuple(
item
for item in handoff_input_data.pre_handoff_items
if _get_run_item_role(item) != "assistant"
)

return handoff_input_data.clone(
input_history=tuple(deepcopy(item) for item in history_items),
pre_handoff_items=filtered_pre_items,
pre_handoff_items=tuple(filtered_pre_items),
# new_items stays unchanged for session history.
input_items=tuple(filtered_input_items),
)


Expand Down Expand Up @@ -231,6 +249,20 @@ def _split_role_and_name(role_text: str) -> tuple[str, str | None]:
return (role_text or "developer", None)


def _get_run_item_role(run_item: RunItem) -> str | None:
role_candidate = run_item.to_input_item().get("role")
return role_candidate if isinstance(role_candidate, str) else None
def _should_forward_pre_item(input_item: TResponseInputItem) -> bool:
"""Return False when the previous transcript item is represented in the summary."""
role_candidate = input_item.get("role")
if isinstance(role_candidate, str) and role_candidate == "assistant":
return False
type_candidate = input_item.get("type")
return not (isinstance(type_candidate, str) and type_candidate in _SUMMARY_ONLY_INPUT_TYPES)


def _should_forward_new_item(input_item: TResponseInputItem) -> bool:
"""Return False for tool or side-effect items that the summary already covers."""
# Items with a role should always be forwarded.
role_candidate = input_item.get("role")
if isinstance(role_candidate, str) and role_candidate:
return True
type_candidate = input_item.get("type")
return not (isinstance(type_candidate, str) and type_candidate in _SUMMARY_ONLY_INPUT_TYPES)
11 changes: 8 additions & 3 deletions src/agents/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -1740,10 +1740,15 @@ async def _get_single_step_result_from_streamed_response(
context_wrapper=context_wrapper,
run_config=run_config,
)
# Use session_step_items (unfiltered) if available for streaming observability,
# otherwise fall back to new_step_items.
streaming_items = (
single_step_result.session_step_items
if single_step_result.session_step_items is not None
else single_step_result.new_step_items
)
new_step_items = [
item
for item in single_step_result.new_step_items
if item not in new_items_processed_response
item for item in streaming_items if item not in new_items_processed_response
]
RunImpl.stream_step_items_to_queue(new_step_items, event_queue)

Expand Down
11 changes: 4 additions & 7 deletions tests/test_agent_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,9 +172,9 @@ async def test_handoffs():

assert result.final_output == "done"
assert len(result.raw_responses) == 3, "should have three model responses"
assert len(result.to_input_list()) == 7, (
"should have 7 inputs: summary message, tool call, tool result, message, handoff, "
"handoff result, and done message"
assert len(result.to_input_list()) == 5, (
"should have 5 session items: summary message (contains pre_handoff tool call/result), "
"message, handoff call, handoff output, and done message"
)
assert result.last_agent == agent_1, "should have handed off to agent_1"

Expand Down Expand Up @@ -226,10 +226,7 @@ async def test_structured_output():

assert result.final_output == Foo(bar="baz")
assert len(result.raw_responses) == 4, "should have four model responses"
assert len(result.to_input_list()) == 10, (
"should have input: conversation summary, function call, function call result, message, "
"handoff, handoff output, preamble message, tool call, tool call result, final output"
)
assert len(result.to_input_list()) == 8, "should have 8 inputs including summary"

assert result.last_agent == agent_1, "should have handed off to agent_1"
assert result.final_output == Foo(bar="baz"), "should have structured output"
Expand Down
20 changes: 9 additions & 11 deletions tests/test_agent_runner_streamed.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,9 +175,9 @@ async def test_handoffs():

assert result.final_output == "done"
assert len(result.raw_responses) == 3, "should have three model responses"
assert len(result.to_input_list()) == 7, (
"should have 7 inputs: summary message, tool call, tool result, message, handoff, "
"handoff result, and done message"
assert len(result.to_input_list()) == 5, (
"should have 5 session items: summary message (contains pre_handoff tool call/result), "
"message, handoff call, handoff output, and done message"
)
assert result.last_agent == agent_1, "should have handed off to agent_1"

Expand Down Expand Up @@ -231,10 +231,7 @@ async def test_structured_output():

assert result.final_output == Foo(bar="baz")
assert len(result.raw_responses) == 4, "should have four model responses"
assert len(result.to_input_list()) == 10, (
"should have input: conversation summary, function call, function call result, message, "
"handoff, handoff output, preamble message, tool call, tool call result, final output"
)
assert len(result.to_input_list()) == 8, "should have 8 inputs including summary"

assert result.last_agent == agent_1, "should have handed off to agent_1"
assert result.final_output == Foo(bar="baz"), "should have structured output"
Expand Down Expand Up @@ -717,9 +714,9 @@ async def test_streaming_events():

assert result.final_output == Foo(bar="baz")
assert len(result.raw_responses) == 4, "should have four model responses"
assert len(result.to_input_list()) == 9, (
"should have input: conversation summary, function call, function call result, message, "
"handoff, handoff output, tool call, tool call result, final output"
assert len(result.to_input_list()) == 7, (
"should have input: conversation summary (contains pre-handoff tool calls), "
"message, handoff, handoff output, tool call, tool call result, final output"
)

assert result.last_agent == agent_1, "should have handed off to agent_1"
Expand All @@ -737,7 +734,8 @@ async def test_streaming_events():
"tool_call_output": 2,
"message": 2, # get_text_message("a_message") + get_final_output_message(...)
"handoff": 1, # get_handoff_tool_call(agent_1)
"handoff_output": 1, # handoff_output_item
# handoff_output is summarized in conversation history, not duplicated as raw item
"handoff_output": 0,
}

total_expected_item_count = sum(expected_item_type_map.values())
Expand Down
Loading