From fe70594e003f748eacccfd1a2320b8d1fd15ab01 Mon Sep 17 00:00:00 2001 From: Marko Rosenmueller <5467316+dr75@users.noreply.github.com> Date: Wed, 5 Nov 2025 11:32:10 +0000 Subject: [PATCH 1/3] [Bugfix]: missing partial content when gpt-oss tool calling is enabled Signed-off-by: Marko Rosenmueller <5467316+dr75@users.noreply.github.com> --- vllm/tool_parsers/openai_tool_parser.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/vllm/tool_parsers/openai_tool_parser.py b/vllm/tool_parsers/openai_tool_parser.py index db92ea8982d7..9f7c5502429d 100644 --- a/vllm/tool_parsers/openai_tool_parser.py +++ b/vllm/tool_parsers/openai_tool_parser.py @@ -79,6 +79,16 @@ def extract_tool_calls( elif msg.channel == "commentary" and not msg.recipient: commentary_content = msg_text + # Check for partial responses: + # current content without recipient and final channel + if ( + parser.current_content + and final_content is None + and parser.current_recipient is None + and parser.current_channel in [None, "final"] + ): + final_content = parser.current_content + return ExtractedToolCallInformation( tools_called=len(tool_calls) > 0, tool_calls=tool_calls, From 59dd664169e8d4b6d1eab0620487bf48fd720470 Mon Sep 17 00:00:00 2001 From: Marko Rosenmueller <5467316+dr75@users.noreply.github.com> Date: Wed, 5 Nov 2025 11:32:37 +0000 Subject: [PATCH 2/3] test Signed-off-by: Marko Rosenmueller <5467316+dr75@users.noreply.github.com> --- tests/tool_parsers/test_openai_tool_parser.py | 89 +++++++++++++++++++ 1 file changed, 89 insertions(+) diff --git a/tests/tool_parsers/test_openai_tool_parser.py b/tests/tool_parsers/test_openai_tool_parser.py index 44b8c92745e9..6e984936b0d2 100644 --- a/tests/tool_parsers/test_openai_tool_parser.py +++ b/tests/tool_parsers/test_openai_tool_parser.py @@ -9,6 +9,7 @@ DeveloperContent, HarmonyEncodingName, Message, + RenderConversationConfig, Role, SystemContent, load_harmony_encoding, @@ -261,3 +262,91 @@ def test_extract_tool_calls_with_content( ] assert_tool_calls(extracted_info.tool_calls, expected_tool_calls) assert extracted_info.content == final_content + + +def test_extract_partial_response_no_tools(openai_tool_parser, harmony_encoding): + """Test partial response without tool calls where final_content is cut off.""" + final_content = "This is a partial response." + convo = Conversation.from_messages( + [ + Message.from_role_and_content( + Role.USER, "What is the weather in Tokyo based on where I'm at?" + ), + Message.from_role_and_content( + Role.ASSISTANT, + 'User asks: "What is the weather in Tokyo?" based on their location. We need to use get_current_weather tool and get_user_location tool.', # noqa: E501 + ).with_channel("analysis"), + Message.from_role_and_content(Role.ASSISTANT, final_content).with_channel( + "final" + ), + ] + ) + token_ids = harmony_encoding.render_conversation_for_completion( + convo, Role.ASSISTANT, config=RenderConversationConfig(auto_drop_analysis=False) + ) + token_ids = token_ids[:-5] # Simulate cut-off by removing last 5 tokens + extracted_info = openai_tool_parser.extract_tool_calls( + "", + request=None, + token_ids=token_ids, + ) + assert not extracted_info.tools_called + assert extracted_info.tool_calls == [] + assert extracted_info.content + assert len(extracted_info.content) > 0 + + print(extracted_info.content) + assert len(extracted_info.content) < len(final_content) + assert extracted_info.content == final_content[: len(extracted_info.content)] + + +def test_extract_partial_response_with_tool_call( + openai_tool_parser, + harmony_encoding, +): + """Test partial response with tool call where final_content is cut off.""" + final_content = "Let me check the weather." + convo = Conversation.from_messages( + [ + Message.from_role_and_content( + Role.USER, "What is the weather in Tokyo based on where I'm at?" + ), + Message.from_role_and_content( + Role.ASSISTANT, + 'User asks: "What is the weather in Tokyo?" based on their location. We need to use get_current_weather tool and get_user_location tool.', # noqa: E501 + ).with_channel("analysis"), + Message.from_role_and_content(Role.ASSISTANT, '{"location": "Tokyo"}') + .with_channel("commentary") + .with_recipient("functions.get_current_weather") + .with_content_type("json"), + Message.from_role_and_content(Role.ASSISTANT, final_content).with_channel( + "final" + ), + ] + ) + token_ids = harmony_encoding.render_conversation_for_completion( + convo, Role.ASSISTANT, config=RenderConversationConfig(auto_drop_analysis=False) + ) + token_ids = token_ids[:-5] # Simulate cut-off by removing last 5 tokens + + extracted_info = openai_tool_parser.extract_tool_calls( + "", + request=None, + token_ids=token_ids, + ) + assert extracted_info.tools_called + expected_tool_calls = [ + ToolCall( + function=FunctionCall( + name="get_current_weather", + arguments=json.dumps({"location": "Tokyo"}), + ) + ), + ] + assert_tool_calls(extracted_info.tool_calls, expected_tool_calls) + assert extracted_info.content + assert len(extracted_info.content) > 0 + + print(extracted_info.content) + assert len(extracted_info.content) < len(final_content) + assert extracted_info.content == final_content[: len(extracted_info.content)] From 006b02433fbcd51997b2c0b707bfbe0a8c108b91 Mon Sep 17 00:00:00 2001 From: Marko Rosenmueller <5467316+dr75@users.noreply.github.com> Date: Fri, 28 Nov 2025 11:06:51 +0000 Subject: [PATCH 3/3] typo Signed-off-by: Marko Rosenmueller <5467316+dr75@users.noreply.github.com> --- vllm/tool_parsers/openai_tool_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/tool_parsers/openai_tool_parser.py b/vllm/tool_parsers/openai_tool_parser.py index 9f7c5502429d..0ae5511eefd8 100644 --- a/vllm/tool_parsers/openai_tool_parser.py +++ b/vllm/tool_parsers/openai_tool_parser.py @@ -80,7 +80,7 @@ def extract_tool_calls( commentary_content = msg_text # Check for partial responses: - # current content without recipient and final channel + # current content in final channel without recipient and final content. if ( parser.current_content and final_content is None