microsoft · mowree · Mar 2, 2026 · Feb 28, 2026 · Feb 28, 2026 · Feb 28, 2026
diff --git a/amplifier_module_provider_github_copilot/converters.py b/amplifier_module_provider_github_copilot/converters.py
@@ -93,6 +93,25 @@ def convert_messages_to_prompt(messages: list[dict[str, Any]]) -> str:
             assistant_text = content
             tool_calls = msg.get("tool_calls", [])
 
+            # If tool_calls key is missing/empty but content blocks contain
+            # tool_call/tool_use entries, extract them so conversation history
+            # is correctly serialized (prevents lost tool context on replay).
+            if not tool_calls and isinstance(msg.get("content"), list):
+                for block in msg["content"]:
+                    if isinstance(block, dict) and block.get("type") in (
+                        "tool_call",
+                        "tool_use",
+                    ):
+                        tool_calls.append(
+                            {
+                                "name": block.get("name", "unknown"),
+                                "arguments": block.get(
+                                    "input", block.get("arguments", {})
+                                ),
+                                "id": block.get("id", ""),
+                            }
+                        )
+
             if tool_calls:
                 # Include tool call information
                 tool_parts = []
@@ -155,9 +174,14 @@ def _extract_content(msg: dict[str, Any]) -> str:
             if isinstance(block, str):
                 text_parts.append(block)
             elif isinstance(block, dict):
-                if block.get("type") == "text":
+                block_type = block.get("type", "")
+                if block_type in ("tool_call", "tool_use"):
+                    # Skip tool call blocks — they are not text content
+                    # and must not leak into the serialized prompt
+                    continue
+                if block_type == "text":
                     text_parts.append(block.get("text", ""))
-                elif block.get("type") == "image_url":
+                elif block_type == "image_url":
                     text_parts.append("[Image]")
                 else:
                     # Unknown block type

diff --git a/amplifier_module_provider_github_copilot/provider.py b/amplifier_module_provider_github_copilot/provider.py
@@ -32,6 +32,7 @@
 
 import asyncio
 import logging
+import re
 import time
 from collections import OrderedDict
 from typing import Any
@@ -1010,6 +1011,68 @@ async def _on_retry(attempt: int, delay: float, error: KernelLLMError) -> None:
         response = await retry_with_backoff(_do_complete, self._retry_config, on_retry=_on_retry)
         elapsed_ms = int((time.time() - outer_start) * 1000)
 
+        # ── Fix 2: Defensive detection of fake tool calls ──────────────
+        # When the LLM writes "[Tool Call: ...]" as plain text instead of
+        # issuing structured tool_requests, the orchestrator would display
+        # fake results that were never actually executed.  Detect this and
+        # retry with a correction message (up to 2 times).
+        _FAKE_TOOL_CALL_RE = re.compile(r"\[Tool Call:")
+        _MAX_FAKE_TC_RETRIES = 2
+
+        if request_tools and not response.tool_calls:
+            # Extract all text from content blocks
+            response_text = ""
+            for block in response.content or []:
+                if hasattr(block, "text"):
+                    response_text += block.text
+
+            fake_retry = 0
+            while (
+                _FAKE_TOOL_CALL_RE.search(response_text)
+                and fake_retry < _MAX_FAKE_TC_RETRIES
+            ):
+                fake_retry += 1
+                logger.warning(
+                    f"[PROVIDER] Detected fake tool call text in response "
+                    f"(retry {fake_retry}/{_MAX_FAKE_TC_RETRIES}). "
+                    f"Re-prompting LLM to use structured tool calls."
+                )
+                await self._emit_event(
+                    "provider:fake_tool_retry",
+                    {
+                        "provider": self.name,
+                        "model": model,
+                        "retry": fake_retry,
+                    },
+                )
+
+                # Append a correction hint to the messages and re-complete
+                correction_msg = {
+                    "role": "user",
+                    "content": (
+                        "You wrote tool calls as plain text instead of using "
+                        "the actual tool calling mechanism. Do NOT write "
+                        "'[Tool Call: ...]' as text. Use the structured tool "
+                        "calling API to invoke tools."
+                    ),
+                }
+                messages.append(correction_msg)
+                prompt = convert_messages_to_prompt(messages)
+
+                response = await retry_with_backoff(
+                    _do_complete, self._retry_config, on_retry=_on_retry
+                )
+
+                # Re-check text
+                if response.tool_calls:
+                    break
+                response_text = ""
+                for block in response.content or []:
+                    if hasattr(block, "text"):
+                        response_text += block.text
+
+            elapsed_ms = int((time.time() - outer_start) * 1000)
+
         if self._debug:
             content_preview = self._truncate(str(response.content))
             logger.debug(f"[PROVIDER] Response content: {content_preview}")