openai · AnkanMisra · Jan 2, 2026 · Jan 2, 2026 · Jan 3, 2026 · Jan 3, 2026
diff --git a/src/agents/_run_impl.py b/src/agents/_run_impl.py
@@ -243,7 +243,8 @@ class SingleStepResult:
     """Items generated before the current step."""
 
     new_step_items: list[RunItem]
-    """Items generated during this current step."""
+    """Items generated during this current step. May be filtered during handoffs to avoid
+    duplication in model input."""
 
     next_step: NextStepHandoff | NextStepFinalOutput | NextStepRunAgain
     """The next step to take."""
@@ -254,11 +255,18 @@ class SingleStepResult:
     tool_output_guardrail_results: list[ToolOutputGuardrailResult]
     """Tool output guardrail results from this step."""
 
+    session_step_items: list[RunItem] | None = None
+    """Full unfiltered items for session history. When set, these are used instead of
+    new_step_items for session saving and generated_items property."""
+
     @property
     def generated_items(self) -> list[RunItem]:
         """Items generated during the agent run (i.e. everything generated after
         `original_input`)."""
-        return self.pre_step_items + self.new_step_items
+        items = (
+            self.session_step_items if self.session_step_items is not None else self.new_step_items
+        )
+        return self.pre_step_items + items
 
 
 def get_model_tracing_impl(
@@ -1285,6 +1293,12 @@ async def execute_handoffs(
                 )
                 pre_step_items = list(filtered.pre_handoff_items)
                 new_step_items = list(filtered.new_items)
+                # For custom input filters, use input_items if available, otherwise new_items
+                if filtered.input_items is not None:
+                    session_step_items = list(filtered.new_items)
+                    new_step_items = list(filtered.input_items)
+                else:
+                    session_step_items = None
             elif should_nest_history and handoff_input_data is not None:
                 nested = nest_handoff_history(
                     handoff_input_data,
@@ -1296,7 +1310,16 @@ async def execute_handoffs(
                     else list(nested.input_history)
                 )
                 pre_step_items = list(nested.pre_handoff_items)
-                new_step_items = list(nested.new_items)
+                # Keep full new_items for session history.
+                session_step_items = list(nested.new_items)
+                # Use input_items (filtered) for model input if available.
+                if nested.input_items is not None:
+                    new_step_items = list(nested.input_items)
+                else:
+                    new_step_items = session_step_items
+            else:
+                # No filtering or nesting - session_step_items not needed
+                session_step_items = None
 
         return SingleStepResult(
             original_input=original_input,
@@ -1306,6 +1329,7 @@ async def execute_handoffs(
             next_step=NextStepHandoff(new_agent),
             tool_input_guardrail_results=[],
             tool_output_guardrail_results=[],
+            session_step_items=session_step_items,
         )
 
     @classmethod

diff --git a/src/agents/handoffs/__init__.py b/src/agents/handoffs/__init__.py
@@ -62,6 +62,13 @@ class HandoffInputData:
     later on, it is optional for backwards compatibility.
     """
 
+    input_items: tuple[RunItem, ...] | None = None
+    """
+    Items to include in the next agent's input. When set, these items are used instead of
+    new_items for building the input to the next agent. This allows filtering duplicates
+    from agent input while preserving all items in new_items for session history.
+    """
+
     def clone(self, **kwargs: Any) -> HandoffInputData:
         """
         Make a copy of the handoff input data, with the given arguments changed. For example, you

diff --git a/src/agents/handoffs/history.py b/src/agents/handoffs/history.py
@@ -26,6 +26,13 @@
 _conversation_history_start = _DEFAULT_CONVERSATION_HISTORY_START
 _conversation_history_end = _DEFAULT_CONVERSATION_HISTORY_END
 
+# Item types that are summarized in the conversation history.
+# They should not be forwarded verbatim to the next agent to avoid duplication.
+_SUMMARY_ONLY_INPUT_TYPES = {
+    "function_call",
+    "function_call_output",
+}
+
 
 def set_conversation_history_wrappers(
     *,
@@ -67,23 +74,34 @@ def nest_handoff_history(
 
     normalized_history = _normalize_input_history(handoff_input_data.input_history)
     flattened_history = _flatten_nested_history_messages(normalized_history)
-    pre_items_as_inputs = [
-        _run_item_to_plain_input(item) for item in handoff_input_data.pre_handoff_items
-    ]
-    new_items_as_inputs = [_run_item_to_plain_input(item) for item in handoff_input_data.new_items]
+
+    # Convert items to plain inputs for the transcript summary.
+    pre_items_as_inputs: list[TResponseInputItem] = []
+    filtered_pre_items: list[RunItem] = []
+    for run_item in handoff_input_data.pre_handoff_items:
+        plain_input = _run_item_to_plain_input(run_item)
+        pre_items_as_inputs.append(plain_input)
+        if _should_forward_pre_item(plain_input):
+            filtered_pre_items.append(run_item)
+
+    new_items_as_inputs: list[TResponseInputItem] = []
+    filtered_input_items: list[RunItem] = []
+    for run_item in handoff_input_data.new_items:
+        plain_input = _run_item_to_plain_input(run_item)
+        new_items_as_inputs.append(plain_input)
+        if _should_forward_new_item(plain_input):
+            filtered_input_items.append(run_item)
+
     transcript = flattened_history + pre_items_as_inputs + new_items_as_inputs
 
     mapper = history_mapper or default_handoff_history_mapper
     history_items = mapper(transcript)
-    filtered_pre_items = tuple(
-        item
-        for item in handoff_input_data.pre_handoff_items
-        if _get_run_item_role(item) != "assistant"
-    )
 
     return handoff_input_data.clone(
         input_history=tuple(deepcopy(item) for item in history_items),
-        pre_handoff_items=filtered_pre_items,
+        pre_handoff_items=tuple(filtered_pre_items),
+        # new_items stays unchanged for session history.
+        input_items=tuple(filtered_input_items),
     )
 
 
@@ -231,6 +249,20 @@ def _split_role_and_name(role_text: str) -> tuple[str, str | None]:
     return (role_text or "developer", None)
 
 
-def _get_run_item_role(run_item: RunItem) -> str | None:
-    role_candidate = run_item.to_input_item().get("role")
-    return role_candidate if isinstance(role_candidate, str) else None
+def _should_forward_pre_item(input_item: TResponseInputItem) -> bool:
+    """Return False when the previous transcript item is represented in the summary."""
+    role_candidate = input_item.get("role")
+    if isinstance(role_candidate, str) and role_candidate == "assistant":
+        return False
+    type_candidate = input_item.get("type")
+    return not (isinstance(type_candidate, str) and type_candidate in _SUMMARY_ONLY_INPUT_TYPES)
+
+
+def _should_forward_new_item(input_item: TResponseInputItem) -> bool:
+    """Return False for tool or side-effect items that the summary already covers."""
+    # Items with a role should always be forwarded.
+    role_candidate = input_item.get("role")
+    if isinstance(role_candidate, str) and role_candidate:
+        return True
+    type_candidate = input_item.get("type")
+    return not (isinstance(type_candidate, str) and type_candidate in _SUMMARY_ONLY_INPUT_TYPES)
diff --git a/src/agents/run.py b/src/agents/run.py
@@ -1740,10 +1740,15 @@ async def _get_single_step_result_from_streamed_response(
             context_wrapper=context_wrapper,
             run_config=run_config,
         )
+        # Use session_step_items (unfiltered) if available for streaming observability,
+        # otherwise fall back to new_step_items.
+        streaming_items = (
+            single_step_result.session_step_items
+            if single_step_result.session_step_items is not None
+            else single_step_result.new_step_items
+        )
         new_step_items = [
-            item
-            for item in single_step_result.new_step_items
-            if item not in new_items_processed_response
+            item for item in streaming_items if item not in new_items_processed_response
         ]
         RunImpl.stream_step_items_to_queue(new_step_items, event_queue)
 

diff --git a/tests/test_agent_runner.py b/tests/test_agent_runner.py
@@ -172,9 +172,9 @@ async def test_handoffs():
 
     assert result.final_output == "done"
     assert len(result.raw_responses) == 3, "should have three model responses"
-    assert len(result.to_input_list()) == 7, (
-        "should have 7 inputs: summary message, tool call, tool result, message, handoff, "
-        "handoff result, and done message"
+    assert len(result.to_input_list()) == 5, (
+        "should have 5 session items: summary message (contains pre_handoff tool call/result), "
+        "message, handoff call, handoff output, and done message"
     )
     assert result.last_agent == agent_1, "should have handed off to agent_1"
 
@@ -226,10 +226,7 @@ async def test_structured_output():
 
     assert result.final_output == Foo(bar="baz")
     assert len(result.raw_responses) == 4, "should have four model responses"
-    assert len(result.to_input_list()) == 10, (
-        "should have input: conversation summary, function call, function call result, message, "
-        "handoff, handoff output, preamble message, tool call, tool call result, final output"
-    )
+    assert len(result.to_input_list()) == 8, "should have 8 inputs including summary"
 
     assert result.last_agent == agent_1, "should have handed off to agent_1"
     assert result.final_output == Foo(bar="baz"), "should have structured output"

diff --git a/tests/test_agent_runner_streamed.py b/tests/test_agent_runner_streamed.py
@@ -175,9 +175,9 @@ async def test_handoffs():
 
     assert result.final_output == "done"
     assert len(result.raw_responses) == 3, "should have three model responses"
-    assert len(result.to_input_list()) == 7, (
-        "should have 7 inputs: summary message, tool call, tool result, message, handoff, "
-        "handoff result, and done message"
+    assert len(result.to_input_list()) == 5, (
+        "should have 5 session items: summary message (contains pre_handoff tool call/result), "
+        "message, handoff call, handoff output, and done message"
     )
     assert result.last_agent == agent_1, "should have handed off to agent_1"
 
@@ -231,10 +231,7 @@ async def test_structured_output():
 
     assert result.final_output == Foo(bar="baz")
     assert len(result.raw_responses) == 4, "should have four model responses"
-    assert len(result.to_input_list()) == 10, (
-        "should have input: conversation summary, function call, function call result, message, "
-        "handoff, handoff output, preamble message, tool call, tool call result, final output"
-    )
+    assert len(result.to_input_list()) == 8, "should have 8 inputs including summary"
 
     assert result.last_agent == agent_1, "should have handed off to agent_1"
     assert result.final_output == Foo(bar="baz"), "should have structured output"
@@ -717,9 +714,9 @@ async def test_streaming_events():
 
     assert result.final_output == Foo(bar="baz")
     assert len(result.raw_responses) == 4, "should have four model responses"
-    assert len(result.to_input_list()) == 9, (
-        "should have input: conversation summary, function call, function call result, message, "
-        "handoff, handoff output, tool call, tool call result, final output"
+    assert len(result.to_input_list()) == 7, (
+        "should have input: conversation summary (contains pre-handoff tool calls), "
+        "message, handoff, handoff output, tool call, tool call result, final output"
     )
 
     assert result.last_agent == agent_1, "should have handed off to agent_1"
@@ -737,7 +734,8 @@ async def test_streaming_events():
         "tool_call_output": 2,
         "message": 2,  # get_text_message("a_message") + get_final_output_message(...)
         "handoff": 1,  # get_handoff_tool_call(agent_1)
-        "handoff_output": 1,  # handoff_output_item
+        # handoff_output is summarized in conversation history, not duplicated as raw item
+        "handoff_output": 0,
     }
 
     total_expected_item_count = sum(expected_item_type_map.values())