Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 39 additions & 8 deletions amplifier_module_provider_github_copilot/converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,29 @@ def convert_messages_to_prompt(messages: list[dict[str, Any]]) -> str:
assistant_text = content
tool_calls = msg.get("tool_calls", [])

# If tool_calls key is missing/empty but content blocks contain
# tool_call/tool_use entries, extract them so conversation history
# is correctly serialized (prevents lost tool context on replay).
if not tool_calls and isinstance(msg.get("content"), list):
for block in msg["content"]:
if isinstance(block, dict) and block.get("type") in (
"tool_call",
"tool_use",
):
tool_calls.append(
{
"name": block.get("name", "unknown"),
"arguments": block.get(
"input", block.get("arguments", {})
),
"id": block.get("id", ""),
}
)

if tool_calls:
# Include tool call information
# Include tool call history using XML tags that are clearly marked as
# past actions. Do NOT use [Tool Call: ...] format — the model mimics it
# and writes fake tool calls as text instead of using structured calling.
tool_parts = []
for tc in tool_calls:
tool_name = tc.get("name", tc.get("function", {}).get("name", "unknown"))
Expand All @@ -104,7 +125,9 @@ def convert_messages_to_prompt(messages: list[dict[str, Any]]) -> str:
tool_args = json.loads(tool_args)
except json.JSONDecodeError:
pass
tool_parts.append(f"[Tool Call: {tool_name}({json.dumps(tool_args)})]")
tool_parts.append(
f"<tool_used name=\"{tool_name}\">{json.dumps(tool_args)}</tool_used>"
)
if assistant_text:
parts.append(f"Assistant: {assistant_text}\n" + "\n".join(tool_parts))
else:
Expand All @@ -114,12 +137,12 @@ def convert_messages_to_prompt(messages: list[dict[str, Any]]) -> str:
elif role == "tool":
# Tool results from Amplifier's tool execution
tool_name = msg.get("tool_name", msg.get("name", "tool"))
parts.append(f"Tool Result ({tool_name}): {content}")
parts.append(f"<tool_result name=\"{tool_name}\">{content}</tool_result>")
elif role == "function":
# Legacy function role (deprecated by OpenAI in favor of 'tool')
# Handle as alias for tool result for backward compatibility
func_name = msg.get("name", "function")
parts.append(f"Tool Result ({func_name}): {content}")
parts.append(f"<tool_result name=\"{func_name}\">{content}</tool_result>")
else:
# Unknown role, treat as user
logger.warning(f"[CONVERTER] Unknown message role: {role}")
Expand Down Expand Up @@ -155,13 +178,21 @@ def _extract_content(msg: dict[str, Any]) -> str:
if isinstance(block, str):
text_parts.append(block)
elif isinstance(block, dict):
if block.get("type") == "text":
block_type = block.get("type", "")
if block_type in ("tool_call", "tool_use", "tool_result"):
# Skip tool call/result blocks — they are not text content
# and must not leak into the serialized prompt
continue
if block_type == "text":
text_parts.append(block.get("text", ""))
elif block.get("type") == "image_url":
elif block_type == "image_url":
text_parts.append("[Image]")
elif block_type == "thinking":
continue # thinking blocks are not user-visible text
else:
# Unknown block type
text_parts.append(str(block.get("text", block.get("content", ""))))
text_val = block.get("text", block.get("content", ""))
if text_val:
text_parts.append(str(text_val))
return "\n".join(text_parts)

# Fallback
Expand Down
69 changes: 69 additions & 0 deletions amplifier_module_provider_github_copilot/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@

import asyncio
import logging
import re
import time
from collections import OrderedDict
from typing import Any
Expand Down Expand Up @@ -1010,6 +1011,74 @@ async def _on_retry(attempt: int, delay: float, error: KernelLLMError) -> None:
response = await retry_with_backoff(_do_complete, self._retry_config, on_retry=_on_retry)
elapsed_ms = int((time.time() - outer_start) * 1000)

# ── Fix 2: Defensive detection of fake tool calls ──────────────
# When the LLM writes tool calls as plain text instead of issuing
# structured tool_requests, the orchestrator would display fake
# results that were never actually executed. Detect this and
# retry with a correction message (up to 2 times).
_FAKE_TOOL_CALL_RE = re.compile(
r"\[Tool Call:\s*\w+\(" # [Tool Call: name(
r"|Tool Result \(\w+\):" # Tool Result (name):
r"|<tool_used\s+name=" # <tool_used name= (XML format mimicked)
)
_MAX_FAKE_TC_RETRIES = 2

if request_tools and not response.tool_calls:
# Extract all text from content blocks
response_text = ""
for block in response.content or []:
if hasattr(block, "text"):
response_text += block.text

fake_retry = 0
while (
_FAKE_TOOL_CALL_RE.search(response_text)
and fake_retry < _MAX_FAKE_TC_RETRIES
):
fake_retry += 1
logger.warning(
f"[PROVIDER] Detected fake tool call text in response "
f"(retry {fake_retry}/{_MAX_FAKE_TC_RETRIES}). "
f"Re-prompting LLM to use structured tool calls."
)
await self._emit_event(
"provider:fake_tool_retry",
{
"provider": self.name,
"model": model,
"retry": fake_retry,
},
)

# Append a correction hint to the messages and re-complete
correction_msg = {
"role": "user",
"content": (
"IMPORTANT: You just wrote tool calls as plain text "
"instead of invoking them. That text was discarded. "
"You MUST use the structured tool calling mechanism "
"provided by the system — do NOT write tool names, "
"arguments, or results as text. Retry now using real "
"tool calls."
),
}
messages.append(correction_msg)
prompt = convert_messages_to_prompt(messages)

response = await retry_with_backoff(
_do_complete, self._retry_config, on_retry=_on_retry
)

# Re-check text
if response.tool_calls:
break
response_text = ""
for block in response.content or []:
if hasattr(block, "text"):
response_text += block.text

elapsed_ms = int((time.time() - outer_start) * 1000)

if self._debug:
content_preview = self._truncate(str(response.content))
logger.debug(f"[PROVIDER] Response content: {content_preview}")
Expand Down
2 changes: 1 addition & 1 deletion tests/integration/test_multi_model_saturation.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ async def run_scenario(model: str, turns: int, prompt: str, tag: str) -> dict[st

# Check how many tool call patterns exist in the serialized prompt
serialized_prompt = convert_messages_to_prompt(messages)
tc_text_count = len(re.findall(r'\[Tool Call:', serialized_prompt))
tc_text_count = len(re.findall(r'<tool_used\s+name=', serialized_prompt))

request = Mock()
request.messages = messages
Expand Down
28 changes: 14 additions & 14 deletions tests/test_converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def test_tool_result_message(self):
}
]
result = convert_messages_to_prompt(messages)
assert "Tool Result (read_file): file contents here" in result
assert '<tool_result name="read_file">file contents here</tool_result>' in result

def test_assistant_with_tool_calls(self):
"""Should include tool call information in assistant message."""
Expand All @@ -98,7 +98,7 @@ def test_assistant_with_tool_calls(self):

assert "Assistant:" in result
assert "Let me check that." in result
assert "[Tool Call: read_file" in result
assert '<tool_used name="read_file">' in result

def test_list_content_blocks(self):
"""Should handle OpenAI-style list content blocks."""
Expand Down Expand Up @@ -286,7 +286,7 @@ def test_tool_call_arguments_as_json_string(self):
]
result = convert_messages_to_prompt(messages)

assert "[Tool Call: search" in result
assert '<tool_used name="search">' in result
# Arguments should be parsed and re-serialized
assert "python" in result

Expand All @@ -308,7 +308,7 @@ def test_tool_call_arguments_invalid_json_string(self):
result = convert_messages_to_prompt(messages)

# Should not crash, include the tool call somehow
assert "[Tool Call: custom" in result
assert '<tool_used name="custom">' in result

def test_assistant_with_tool_calls_no_content(self):
"""Should handle assistant message with tool calls but no text."""
Expand All @@ -328,7 +328,7 @@ def test_assistant_with_tool_calls_no_content(self):
result = convert_messages_to_prompt(messages)

assert "Assistant:" in result
assert "[Tool Call: read_file" in result
assert '<tool_used name="read_file">' in result

def test_unknown_role_message(self):
"""Should handle unknown message roles gracefully."""
Expand Down Expand Up @@ -381,7 +381,7 @@ def test_tool_name_fallback(self):
]
result = convert_messages_to_prompt(messages)

assert "Tool Result" in result
assert "<tool_result" in result
assert "result content" in result

def test_tool_call_with_function_format(self):
Expand All @@ -403,7 +403,7 @@ def test_tool_call_with_function_format(self):
]
result = convert_messages_to_prompt(messages)

assert "[Tool Call: get_weather" in result
assert '<tool_used name="get_weather">' in result
assert "Seattle" in result


Expand Down Expand Up @@ -553,7 +553,7 @@ def test_function_message_formatted_as_tool_result(self):
result = convert_messages_to_prompt(messages)

# Should be formatted as tool result for consistency
assert "Tool Result (get_weather):" in result
assert '<tool_result name="get_weather">' in result
assert "72 degrees, sunny" in result

def test_function_message_no_name_fallback(self):
Expand All @@ -562,7 +562,7 @@ def test_function_message_no_name_fallback(self):
result = convert_messages_to_prompt(messages)

# Should use 'function' as fallback name
assert "Tool Result (function):" in result
assert '<tool_result name="function">' in result
assert "result content" in result

def test_mixed_tool_and_function_roles(self):
Expand All @@ -574,8 +574,8 @@ def test_mixed_tool_and_function_roles(self):
result = convert_messages_to_prompt(messages)

# Both should appear as Tool Results
assert "Tool Result (read_file): file A contents" in result
assert "Tool Result (write_file): file B written" in result
assert '<tool_result name="read_file">file A contents</tool_result>' in result
assert '<tool_result name="write_file">file B written</tool_result>' in result


class TestAllSixRolesIntegration:
Expand Down Expand Up @@ -612,10 +612,10 @@ def test_all_six_roles_in_conversation(self):

# Assistant with tool call
assert "Assistant:" in result
assert "[Tool Call: get_time" in result
assert '<tool_used name="get_time">' in result

# Tool result
assert "Tool Result (get_time): 10:30 AM" in result
assert '<tool_result name="get_time">10:30 AM</tool_result>' in result

# Function as legacy tool result
assert "Tool Result (legacy_func): legacy result" in result
assert '<tool_result name="legacy_func">legacy result</tool_result>' in result