Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions tests/entrypoints/openai/test_serving_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,7 @@ async def test_gpt_oss_multi_turn_chat(gptoss_client: OpenAI,
assert tc.function is not None and tc.function.name == "get_current_weather"
args1 = tc.function.arguments
assert args1 is not None and len(args1) > 0
assert not first_msg.content

messages.append({"role": "assistant", "content": args1})
messages.append({
Expand Down
76 changes: 74 additions & 2 deletions tests/tool_use/test_openai_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,12 @@ def test_extract_tool_calls_no_tools(openai_tool_parser, harmony_encoding):
assert extracted_info.content == "This is a test"


def test_extract_tool_calls_single_tool(openai_tool_parser, harmony_encoding):
@pytest.mark.parametrize("tool_args", [
'{"location": "Tokyo"}',
'{\n"location": "Tokyo"\n}',
])
def test_extract_tool_calls_single_tool(openai_tool_parser, harmony_encoding,
tool_args):
convo = Conversation.from_messages([
Message.from_role_and_content(Role.USER,
"What is the weather in Tokyo?"),
Expand All @@ -80,7 +85,7 @@ def test_extract_tool_calls_single_tool(openai_tool_parser, harmony_encoding):
).with_channel("analysis"),
Message.from_role_and_content(
Role.ASSISTANT,
'{"location": "Tokyo"}').with_channel("commentary").with_recipient(
tool_args).with_channel("commentary").with_recipient(
"functions.get_current_weather").with_content_type("json"),
])
token_ids = harmony_encoding.render_conversation_for_completion(
Expand Down Expand Up @@ -121,6 +126,17 @@ def test_extract_tool_calls_multiple_tools(
Role.ASSISTANT,
'{"location": "Tokyo"}').with_channel("commentary").with_recipient(
"functions.get_user_location").with_content_type("json"),
Message.from_role_and_content(
Role.ASSISTANT, '{"location": "Tokyo"}').with_channel(
"commentary").with_recipient("functions.no_content_type"),
Message.from_role_and_content(Role.ASSISTANT, "foo").with_channel(
"commentary").with_recipient("functions.not_json_no_content_type"),
Message.from_role_and_content(
Role.ASSISTANT, '{}').with_channel("commentary").with_recipient(
"functions.empty_args").with_content_type("json"),
Message.from_role_and_content(
Role.ASSISTANT, '').with_channel("commentary").with_recipient(
"functions.no_args").with_content_type("json"),
])
token_ids = harmony_encoding.render_conversation_for_completion(
convo,
Expand All @@ -141,7 +157,63 @@ def test_extract_tool_calls_multiple_tools(
ToolCall(function=FunctionCall(
name="get_user_location",
arguments=json.dumps({"location": "Tokyo"}),
)),
ToolCall(function=FunctionCall(
name="no_content_type",
arguments=json.dumps({"location": "Tokyo"}),
)),
ToolCall(function=FunctionCall(
name="not_json_no_content_type",
arguments="foo",
)),
ToolCall(function=FunctionCall(
name="empty_args",
arguments=json.dumps({}),
)),
ToolCall(function=FunctionCall(
name="no_args",
arguments="",
))
]
assert_tool_calls(extracted_info.tool_calls, expected_tool_calls)
assert extracted_info.content is None


def test_extract_tool_calls_with_content(
openai_tool_parser,
harmony_encoding,
):
final_content = "This tool call will get the weather."
convo = Conversation.from_messages([
Message.from_role_and_content(
Role.USER, "What is the weather in Tokyo based on where I'm at?"),
Message.from_role_and_content(
Role.ASSISTANT,
'User asks: "What is the weather in Tokyo?" based on their location. We need to use get_current_weather tool and get_user_location tool.', # noqa: E501
).with_channel("analysis"),
Message.from_role_and_content(
Role.ASSISTANT,
'{"location": "Tokyo"}').with_channel("commentary").with_recipient(
"functions.get_current_weather").with_content_type("json"),
Message.from_role_and_content(Role.ASSISTANT,
final_content).with_channel("final"),
])
token_ids = harmony_encoding.render_conversation_for_completion(
convo,
Role.ASSISTANT,
)

extracted_info = openai_tool_parser.extract_tool_calls(
"",
request=None,
token_ids=token_ids,
)
assert extracted_info.tools_called
expected_tool_calls = [
ToolCall(function=FunctionCall(
name="get_current_weather",
arguments=json.dumps({"location": "Tokyo"}),
)),
]
assert_tool_calls(extracted_info.tool_calls, expected_tool_calls)
assert extracted_info.content == final_content
13 changes: 5 additions & 8 deletions vllm/entrypoints/openai/serving_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -1186,6 +1186,10 @@ async def chat_completion_full_generator(
logprobs = None

if self.use_harmony:
reasoning_content, content, _ = parse_chat_output(token_ids)
if not request.include_reasoning:
reasoning_content = None

if self.tool_parser is not None:
tool_parser = self.tool_parser(tokenizer)
# NOTE: We use token_ids for openai tool parser
Expand All @@ -1194,21 +1198,14 @@ async def chat_completion_full_generator(
request=request,
token_ids=token_ids, # type: ignore
)
reasoning_content, content = None, tool_call_info.content
if request.include_reasoning:
reasoning_content, content, _ = parse_chat_output(
token_ids)
content = tool_call_info.content
message = ChatMessage(
role=role,
reasoning_content=reasoning_content,
content=content,
tool_calls=tool_call_info.tool_calls,
)
else:
reasoning_content, content, _ = parse_chat_output(
token_ids)
if not request.include_reasoning:
reasoning_content = None
message = ChatMessage(
role=role,
reasoning_content=reasoning_content,
Expand Down
24 changes: 22 additions & 2 deletions vllm/entrypoints/openai/tool_parsers/openai_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from __future__ import annotations

import json
from collections.abc import Sequence
from typing import TYPE_CHECKING

Expand All @@ -12,10 +13,13 @@
FunctionCall, ToolCall)
from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import (
ToolParser, ToolParserManager)
from vllm.logger import init_logger

if TYPE_CHECKING:
from vllm.transformers_utils.tokenizer import AnyTokenizer

logger = init_logger(__name__)


@ToolParserManager.register_module("openai")
class OpenAIToolParser(ToolParser):
Expand All @@ -40,17 +44,33 @@ def extract_tool_calls(

if len(parser.messages) > 0:
for msg in parser.messages:
if len(msg.content) < 1:
continue
msg_text = msg.content[0].text
if msg.recipient and msg.recipient.startswith("functions."):
# If no content-type is given assume JSON, as that's the
# most common case with gpt-oss models.
if not msg.content_type or "json" in msg.content_type:
# load and dump the JSON text to check validity and
# remove any extra newlines or other odd formatting
try:
tool_args = json.dumps(json.loads(msg_text))
except json.JSONDecodeError:
logger.exception(
"Error decoding JSON tool call from response.")
tool_args = msg_text
else:
tool_args = msg_text
tool_calls.append(
ToolCall(
type="function",
function=FunctionCall(
name=msg.recipient.split("functions.")[1],
arguments=msg.content[0].text,
arguments=tool_args,
),
))
elif msg.channel == "final":
final_content = msg.content[0].text
final_content = msg_text

return ExtractedToolCallInformation(
tools_called=len(tool_calls) > 0,
Expand Down