From fe70594e003f748eacccfd1a2320b8d1fd15ab01 Mon Sep 17 00:00:00 2001
From: Marko Rosenmueller <5467316+dr75@users.noreply.github.com>
Date: Wed, 5 Nov 2025 11:32:10 +0000
Subject: [PATCH 1/3] [Bugfix]: missing partial content when gpt-oss tool
 calling is enabled

Signed-off-by: Marko Rosenmueller <5467316+dr75@users.noreply.github.com>
---
 vllm/tool_parsers/openai_tool_parser.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/vllm/tool_parsers/openai_tool_parser.py b/vllm/tool_parsers/openai_tool_parser.py
index db92ea8982d7..9f7c5502429d 100644
--- a/vllm/tool_parsers/openai_tool_parser.py
+++ b/vllm/tool_parsers/openai_tool_parser.py
@@ -79,6 +79,16 @@ def extract_tool_calls(
                 elif msg.channel == "commentary" and not msg.recipient:
                     commentary_content = msg_text
 
+            # Check for partial responses:
+            # current content without recipient and final channel
+            if (
+                parser.current_content
+                and final_content is None
+                and parser.current_recipient is None
+                and parser.current_channel in [None, "final"]
+            ):
+                final_content = parser.current_content
+
         return ExtractedToolCallInformation(
             tools_called=len(tool_calls) > 0,
             tool_calls=tool_calls,

From 59dd664169e8d4b6d1eab0620487bf48fd720470 Mon Sep 17 00:00:00 2001
From: Marko Rosenmueller <5467316+dr75@users.noreply.github.com>
Date: Wed, 5 Nov 2025 11:32:37 +0000
Subject: [PATCH 2/3] test

Signed-off-by: Marko Rosenmueller <5467316+dr75@users.noreply.github.com>
---
 tests/tool_parsers/test_openai_tool_parser.py | 89 +++++++++++++++++++
 1 file changed, 89 insertions(+)

diff --git a/tests/tool_parsers/test_openai_tool_parser.py b/tests/tool_parsers/test_openai_tool_parser.py
index 44b8c92745e9..6e984936b0d2 100644
--- a/tests/tool_parsers/test_openai_tool_parser.py
+++ b/tests/tool_parsers/test_openai_tool_parser.py
@@ -9,6 +9,7 @@
     DeveloperContent,
     HarmonyEncodingName,
     Message,
+    RenderConversationConfig,
     Role,
     SystemContent,
     load_harmony_encoding,
@@ -261,3 +262,91 @@ def test_extract_tool_calls_with_content(
     ]
     assert_tool_calls(extracted_info.tool_calls, expected_tool_calls)
     assert extracted_info.content == final_content
+
+
+def test_extract_partial_response_no_tools(openai_tool_parser, harmony_encoding):
+    """Test partial response without tool calls where final_content is cut off."""
+    final_content = "This is a partial response."
+    convo = Conversation.from_messages(
+        [
+            Message.from_role_and_content(
+                Role.USER, "What is the weather in Tokyo based on where I'm at?"
+            ),
+            Message.from_role_and_content(
+                Role.ASSISTANT,
+                'User asks: "What is the weather in Tokyo?" based on their location. We need to use get_current_weather tool and get_user_location tool.',  #  noqa: E501
+            ).with_channel("analysis"),
+            Message.from_role_and_content(Role.ASSISTANT, final_content).with_channel(
+                "final"
+            ),
+        ]
+    )
+    token_ids = harmony_encoding.render_conversation_for_completion(
+        convo, Role.ASSISTANT, config=RenderConversationConfig(auto_drop_analysis=False)
+    )
+    token_ids = token_ids[:-5]  # Simulate cut-off by removing last 5 tokens
+    extracted_info = openai_tool_parser.extract_tool_calls(
+        "",
+        request=None,
+        token_ids=token_ids,
+    )
+    assert not extracted_info.tools_called
+    assert extracted_info.tool_calls == []
+    assert extracted_info.content
+    assert len(extracted_info.content) > 0
+
+    print(extracted_info.content)
+    assert len(extracted_info.content) < len(final_content)
+    assert extracted_info.content == final_content[: len(extracted_info.content)]
+
+
+def test_extract_partial_response_with_tool_call(
+    openai_tool_parser,
+    harmony_encoding,
+):
+    """Test partial response with tool call where final_content is cut off."""
+    final_content = "Let me check the weather."
+    convo = Conversation.from_messages(
+        [
+            Message.from_role_and_content(
+                Role.USER, "What is the weather in Tokyo based on where I'm at?"
+            ),
+            Message.from_role_and_content(
+                Role.ASSISTANT,
+                'User asks: "What is the weather in Tokyo?" based on their location. We need to use get_current_weather tool and get_user_location tool.',  #  noqa: E501
+            ).with_channel("analysis"),
+            Message.from_role_and_content(Role.ASSISTANT, '{"location": "Tokyo"}')
+            .with_channel("commentary")
+            .with_recipient("functions.get_current_weather")
+            .with_content_type("json"),
+            Message.from_role_and_content(Role.ASSISTANT, final_content).with_channel(
+                "final"
+            ),
+        ]
+    )
+    token_ids = harmony_encoding.render_conversation_for_completion(
+        convo, Role.ASSISTANT, config=RenderConversationConfig(auto_drop_analysis=False)
+    )
+    token_ids = token_ids[:-5]  # Simulate cut-off by removing last 5 tokens
+
+    extracted_info = openai_tool_parser.extract_tool_calls(
+        "",
+        request=None,
+        token_ids=token_ids,
+    )
+    assert extracted_info.tools_called
+    expected_tool_calls = [
+        ToolCall(
+            function=FunctionCall(
+                name="get_current_weather",
+                arguments=json.dumps({"location": "Tokyo"}),
+            )
+        ),
+    ]
+    assert_tool_calls(extracted_info.tool_calls, expected_tool_calls)
+    assert extracted_info.content
+    assert len(extracted_info.content) > 0
+
+    print(extracted_info.content)
+    assert len(extracted_info.content) < len(final_content)
+    assert extracted_info.content == final_content[: len(extracted_info.content)]

From 006b02433fbcd51997b2c0b707bfbe0a8c108b91 Mon Sep 17 00:00:00 2001
From: Marko Rosenmueller <5467316+dr75@users.noreply.github.com>
Date: Fri, 28 Nov 2025 11:06:51 +0000
Subject: [PATCH 3/3] typo

Signed-off-by: Marko Rosenmueller <5467316+dr75@users.noreply.github.com>
---
 vllm/tool_parsers/openai_tool_parser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/tool_parsers/openai_tool_parser.py b/vllm/tool_parsers/openai_tool_parser.py
index 9f7c5502429d..0ae5511eefd8 100644
--- a/vllm/tool_parsers/openai_tool_parser.py
+++ b/vllm/tool_parsers/openai_tool_parser.py
@@ -80,7 +80,7 @@ def extract_tool_calls(
                     commentary_content = msg_text
 
             # Check for partial responses:
-            # current content without recipient and final channel
+            # current content in final channel without recipient and final content.
             if (
                 parser.current_content
                 and final_content is None