Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 40 additions & 10 deletions vllm/entrypoints/harmony_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@
from openai.types.responses.response_reasoning_item import (
Content as ResponseReasoningTextContent)
from openai.types.responses.tool import Tool
from openai_harmony import (Author, ChannelConfig, Conversation,
DeveloperContent, HarmonyEncodingName, Message,
ReasoningEffort, Role, StreamableParser,
from openai_harmony import (Author, ChannelConfig, DeveloperContent,
HarmonyEncodingName, Message, ReasoningEffort,
RenderOptions, Role, StreamableParser,
SystemContent, TextContent, ToolDescription,
load_harmony_encoding)

Expand Down Expand Up @@ -213,14 +213,18 @@ def parse_chat_input(chat_msg) -> list[Message]:
tool_calls = chat_msg.get("tool_calls")
if role == "assistant" and tool_calls:
msgs: list[Message] = []
content = chat_msg.get("content") or ""
analysis_msg = Message.from_role_and_content(Role.ASSISTANT, content)
analysis_msg = analysis_msg.with_channel("analysis")
msgs.append(analysis_msg)

for call in tool_calls:
func = call.get("function", {})
name = func.get("name", "")
arguments = func.get("arguments", "") or ""
msg = Message.from_role_and_content(Role.ASSISTANT, arguments)
msg = msg.with_channel("commentary")
msg = msg.with_recipient(f"functions.{name}")
msg = msg.with_content_type("json")
msg = msg.with_channel(f"commentary to=functions.{name}")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't seem to be a valid channel?

Copy link
Contributor Author

@levunet levunet Sep 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The harmony library function failed to add data properly, so it was temporarily added to the channel. If not added, the data would be passed to gpt-oss in the 'before' data format, and as this content accumulates, errors occur in the output structure.

use:
width_recipient(f"functions.{name}")

data:
<|start|>assistant to=functions.{name}<|channel|>

Copy link
Contributor Author

@levunet levunet Sep 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As errors accumulate, the following data will be output:

gpt-oss fail e.g.
"<|start|>assistant<|channel|>commentary to=functions.name>{}<|call|>"
"<|start|>assistant<|channel|>analysis to=functions.file_read <|constrain|>json<|message|>{}<|call|>"

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this sounds more like an issue in harmony library. could you raise an issue there?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could you add a unit test for the encoding result?

msg.with_content_type("json")
msgs.append(msg)
return msgs

Expand All @@ -230,7 +234,7 @@ def parse_chat_input(chat_msg) -> list[Message]:
content = chat_msg.get("content", "") or ""
msg = Message.from_author_and_content(
Author.new(Role.TOOL, f"functions.{name}"),
content).with_channel("commentary")
content).with_channel("commentary").with_recipient("assistant")
return [msg]

# Default: user/assistant/system messages with content
Expand All @@ -245,9 +249,35 @@ def parse_chat_input(chat_msg) -> list[Message]:


def render_for_completion(messages: list[Message]) -> list[int]:
conversation = Conversation.from_messages(messages)
token_ids = get_encoding().render_conversation_for_completion(
conversation, Role.ASSISTANT)
if not messages:
return []

token_ids = []
encoding = get_encoding()
end_token_ids = encoding.encode("<|end|>", allowed_special={"<|end|>"})
call_token_ids = encoding.encode("<|call|>", allowed_special={"<|call|>"})

has_function_tools = any(
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will evaluate to false for chats with no assistant messages yet, preventing
Calls to these tools must go to the commentary channel: 'functions'.
from being added to the system prompt for requests that define custom tools in the functions namespace. This leads to the same issue of the model putting tool calls in the analysis channel, often using incorrect arguments or forgetting to define a tool recipient.

I think you want to check for a developer message that includes a functions namespace definition instead.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As you suggested, I modified it to check tool usage at msg.author.role == Role.DEVELOPER, so it can be verified without the first message. Thank you!

msg.author.role == Role.DEVELOPER and msg.content[0] and hasattr(
msg.content[0], 'tools') and msg.content[0].tools is not None
and msg.content[0].tools["functions"] is not None for msg in messages)

for i, msg in enumerate(messages):
msg_tokens = encoding.render(
msg,
RenderOptions(conversation_has_function_tools=has_function_tools))
is_tool_call = (msg.author.role == Role.ASSISTANT and msg.channel
and "functions." in msg.channel)
if (i < len(messages) - 1 and is_tool_call and end_token_ids
and call_token_ids and msg_tokens
and msg_tokens[-1] == end_token_ids[0]):
msg_tokens[-1] = call_token_ids[0]
token_ids.extend(msg_tokens)

start_assistant_tokens = encoding.encode("<|start|>assistant",
allowed_special={"<|start|>"})
token_ids.extend(start_assistant_tokens)

return token_ids


Expand Down
4 changes: 3 additions & 1 deletion vllm/entrypoints/openai/serving_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -1569,7 +1569,9 @@ def _make_request_with_harmony(
sys_msg = get_system_message(
reasoning_effort=request.reasoning_effort,
browser_description=None,
python_description=None)
python_description=None,
with_custom_tools=request.tools is not None
)
messages.append(sys_msg)

# Add developer message.
Expand Down
Loading