Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 49 additions & 14 deletions src/copaw/local_models/tag_parser.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
"""Parse special tags from model-generated text.

Handles ``<think>...</think>`` (reasoning) and
Handles ``<think>...</think>`` / ``<thought>...</thought>`` (reasoning) and
``<tool_call>...</tool_call>`` (function calling) tags that local models
like Qwen3-Instruct embed in their raw text output.
"""
Expand All @@ -23,6 +23,9 @@
THINK_START = "<think>"
THINK_END = "</think>"

THOUGHT_START = "<thought>"
THOUGHT_END = "</thought>"

TOOL_CALL_START = "<tool_call>"
TOOL_CALL_END = "</tool_call>"

Expand All @@ -32,6 +35,12 @@
re.DOTALL,
)

# Regex to find a complete <thought>...</thought> block (non-greedy).
_THOUGHT_RE = re.compile(
r"<thought>(.*?)</thought>",
re.DOTALL,
)

# Regex to find complete <tool_call>...</tool_call> blocks (non-greedy).
_TOOL_CALL_RE = re.compile(
r"<tool_call>\s*(.*?)\s*</tool_call>",
Expand Down Expand Up @@ -269,19 +278,26 @@ def _parse_single_tool_call(raw_text: str) -> ParsedToolCall | None:


def text_contains_think_tag(text: str) -> bool:
"""Fast substring check for a ``<think>`` tag."""
return THINK_START in text
"""Fast substring check for a ``<think>`` or ``<thought>`` tag."""
return THINK_START in text or THOUGHT_START in text


def extract_thinking_from_text(text: str) -> TextWithThinking:
"""Extract ``<think>...</think>`` content from *text*.
"""Extract ``<think>...</think>`` or ``<thought>...</thought>`` content
from *text*.

Both tag variants are supported: ``<think>`` (used by models such as
Qwen3-Instruct) and ``<thought>`` (used by some other providers).

Returns a :class:`TextWithThinking` with:

* ``thinking`` – the reasoning content (empty if none found)
* ``remaining_text`` – everything outside the think tags
* ``has_open_tag`` – ``True`` if ``<think>`` opened but not closed yet
* ``remaining_text`` – everything outside the think/thought tags
* ``has_open_tag`` – ``True`` if an opening tag was found but the
matching closing tag has not yet been seen
(streaming scenario)
"""
# Try <think>...</think> first.
match = _THINK_RE.search(text)
if match:
thinking = match.group(1).strip()
Expand All @@ -291,18 +307,37 @@ def extract_thinking_from_text(text: str) -> TextWithThinking:
remaining_text=remaining,
)

# No complete block — check for an unclosed <think>.
open_idx = text.find(THINK_START)
if open_idx != -1:
remaining = text[:open_idx].strip()
partial = text[open_idx + len(THINK_START) :]
# Try <thought>...</thought>.
match = _THOUGHT_RE.search(text)
if match:
thinking = match.group(1).strip()
remaining = (text[: match.start()] + text[match.end() :]).strip()
return TextWithThinking(
thinking=partial.strip(),
thinking=thinking,
remaining_text=remaining,
has_open_tag=True,
)

return TextWithThinking(remaining_text=text)
# No complete block — check for an unclosed <think> or <thought>.
think_idx = text.find(THINK_START)
thought_idx = text.find(THOUGHT_START)

# Pick whichever open tag appears first (if both present).
if think_idx != -1 and (thought_idx == -1 or think_idx <= thought_idx):
open_idx = think_idx
open_tag_len = len(THINK_START)
elif thought_idx != -1:
open_idx = thought_idx
open_tag_len = len(THOUGHT_START)
else:
return TextWithThinking(remaining_text=text)

remaining = text[:open_idx].strip()
partial = text[open_idx + open_tag_len :]
return TextWithThinking(
thinking=partial.strip(),
remaining_text=remaining,
has_open_tag=True,
)


def text_contains_tool_call_tag(text: str) -> bool:
Expand Down
84 changes: 44 additions & 40 deletions src/copaw/providers/capability_baseline.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,16 +268,17 @@ def _load_baseline(self) -> None:
note="GLM text/code models are text-only",
),
)
self._register(
ExpectedCapability(
provider_id="zhipu-cn",
model_id="glm-5v-turbo",
expected_image=True,
expected_video=False,
doc_url=_zhipu_cn_doc,
note="GLM vision model supports image input",
),
)
for mid in ("glm-5v-turbo", "glm-4v", "glm-4v-plus", "glm-4v-flash", "glm-4.6v-flash"):
self._register(
ExpectedCapability(
provider_id="zhipu-cn",
model_id=mid,
expected_image=True,
expected_video=False,
doc_url=_zhipu_cn_doc,
note="GLM vision model supports image input",
),
)

# ---------------------------------------------------------------
# Zhipu Coding Plan (BigModel)
Expand All @@ -294,16 +295,17 @@ def _load_baseline(self) -> None:
note="GLM text/code models are text-only",
),
)
self._register(
ExpectedCapability(
provider_id="zhipu-cn-codingplan",
model_id="glm-5v-turbo",
expected_image=True,
expected_video=False,
doc_url=_zhipu_cn_cp_doc,
note="GLM vision model supports image input",
),
)
for mid in ("glm-5v-turbo", "glm-4v", "glm-4v-plus", "glm-4v-flash", "glm-4.6v-flash"):
self._register(
ExpectedCapability(
provider_id="zhipu-cn-codingplan",
model_id=mid,
expected_image=True,
expected_video=False,
doc_url=_zhipu_cn_cp_doc,
note="GLM vision model supports image input",
),
)

# ---------------------------------------------------------------
# Zhipu (Z.AI)
Expand All @@ -320,16 +322,17 @@ def _load_baseline(self) -> None:
note="GLM text/code models are text-only",
),
)
self._register(
ExpectedCapability(
provider_id="zhipu-intl",
model_id="glm-5v-turbo",
expected_image=True,
expected_video=False,
doc_url=_zhipu_intl_doc,
note="GLM vision model supports image input",
),
)
for mid in ("glm-5v-turbo", "glm-4v", "glm-4v-plus", "glm-4v-flash", "glm-4.6v-flash"):
self._register(
ExpectedCapability(
provider_id="zhipu-intl",
model_id=mid,
expected_image=True,
expected_video=False,
doc_url=_zhipu_intl_doc,
note="GLM vision model supports image input",
),
)

# ---------------------------------------------------------------
# Zhipu Coding Plan (Z.AI)
Expand All @@ -346,16 +349,17 @@ def _load_baseline(self) -> None:
note="GLM text/code models are text-only",
),
)
self._register(
ExpectedCapability(
provider_id="zhipu-intl-codingplan",
model_id="glm-5v-turbo",
expected_image=True,
expected_video=False,
doc_url=_zhipu_intl_cp_doc,
note="GLM vision model supports image input",
),
)
for mid in ("glm-5v-turbo", "glm-4v", "glm-4v-plus", "glm-4v-flash", "glm-4.6v-flash"):
self._register(
ExpectedCapability(
provider_id="zhipu-intl-codingplan",
model_id=mid,
expected_image=True,
expected_video=False,
doc_url=_zhipu_intl_cp_doc,
note="GLM vision model supports image input",
),
)

# ---------------------------------------------------------------
# 4. OpenAI
Expand Down
28 changes: 28 additions & 0 deletions src/copaw/providers/openai_chat_model_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@
from pydantic import BaseModel

from copaw.local_models.tag_parser import (
extract_thinking_from_text,
parse_tool_calls_from_text,
text_contains_think_tag,
text_contains_tool_call_tag,
)

Expand Down Expand Up @@ -268,11 +270,29 @@ async def _parse_openai_stream_response(
# --- 2. Scan text/content blocks ---
# Some models emit <tool_call> tags directly in their
# response text instead of (or in addition to) thinking.
# Others embed reasoning inside <think>/<thought> tags
# in the text rather than via reasoning_content.
new_content: list | None = None
injected_thinking_blocks: list = []
for i, block in enumerate(parsed.content):
if block.get("type") != "text":
continue
text = block.get("text") or ""

# --- 2a. Extract <think>/<thought> tags from text ---
if text_contains_think_tag(text):
think_result = extract_thinking_from_text(text)
if think_result.thinking or think_result.has_open_tag:
injected_thinking_blocks.append(
{
"type": "thinking",
"thinking": think_result.thinking,
},
)
text = think_result.remaining_text
block["text"] = text

# --- 2b. Extract <tool_call> tags from text ---
if not text_contains_tool_call_tag(text):
continue

Expand Down Expand Up @@ -300,6 +320,14 @@ async def _parse_openai_stream_response(
new_content = list(parsed.content)
new_content[i] = None # type: ignore[index]

if injected_thinking_blocks:
# Prepend extracted thinking blocks before existing content.
parsed.content = injected_thinking_blocks + list(
parsed.content,
)
# Rebuild new_content index offsets after prepending.
new_content = None

if new_content is not None:
parsed.content = [b for b in new_content if b is not None]
Comment on lines +323 to 332
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The current logic for prepending injected_thinking_blocks resets new_content to None (line 329), which effectively bypasses the filtering of empty text blocks that were marked for removal earlier in the loop. If a text block becomes empty after extracting thinking or tool-call tags, it should be removed from the final content list.

The filtering should be applied to parsed.content before prepending the new blocks to ensure the response remains clean.

Suggested change
if injected_thinking_blocks:
# Prepend extracted thinking blocks before existing content.
parsed.content = injected_thinking_blocks + list(
parsed.content,
)
# Rebuild new_content index offsets after prepending.
new_content = None
if new_content is not None:
parsed.content = [b for b in new_content if b is not None]
if new_content is not None:
parsed.content = [b for b in new_content if b is not None]
if injected_thinking_blocks:
# Prepend extracted thinking blocks before existing content.
parsed.content = injected_thinking_blocks + list(parsed.content)


Expand Down
28 changes: 28 additions & 0 deletions src/copaw/providers/provider_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,34 @@
supports_video=False,
probe_source="documentation",
),
ModelInfo(
id="glm-4v",
name="glm-4v",
supports_image=True,
supports_video=False,
probe_source="documentation",
),
ModelInfo(
id="glm-4v-plus",
name="glm-4v-plus",
supports_image=True,
supports_video=False,
probe_source="documentation",
),
ModelInfo(
id="glm-4v-flash",
name="glm-4v-flash",
supports_image=True,
supports_video=False,
probe_source="documentation",
),
ModelInfo(
id="glm-4.6v-flash",
name="glm-4.6v-flash",
supports_image=True,
supports_video=False,
probe_source="documentation",
),
]

OPENAI_MODELS: List[ModelInfo] = [
Expand Down
4 changes: 4 additions & 0 deletions tests/unit/providers/test_provider_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,10 @@ def test_builtin_zhipu_providers_registered(isolated_secret_dir) -> None:
"glm-5.1",
"glm-5-turbo",
"glm-5v-turbo",
"glm-4v",
"glm-4v-plus",
"glm-4v-flash",
"glm-4.6v-flash",
]


Expand Down
Loading