Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 51 additions & 14 deletions backend/app/utils/llm_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def chat(
temperature: float = 0.7,
max_tokens: int = 4096,
response_format: Optional[Dict] = None
) -> str:
) -> Optional[str]:
"""
发送聊天请求

Expand All @@ -49,7 +49,7 @@ def chat(
response_format: 响应格式(如JSON模式)

Returns:
模型响应文本
模型响应文本,若模型返回空内容则返回 None
"""
kwargs = {
"model": self.model,
Expand All @@ -62,10 +62,24 @@ def chat(
kwargs["response_format"] = response_format

response = self.client.chat.completions.create(**kwargs)
content = response.choices[0].message.content
# 部分模型(如MiniMax M2.5)会在content中包含<think>思考内容,需要移除
message = response.choices[0].message
content = message.content

# 部分推理模型(如 Qwen3-thinking、DeepSeek-R1)在思维链模式下
# 会把思考过程放入 reasoning_content,content 字段可能为 None 或空字符串。
# 此时尝试从 reasoning_content 兜底提取实际回答。
if not content:
reasoning = getattr(message, 'reasoning_content', None)
if reasoning:
# reasoning_content 是思考过程,不是最终答案;
# 说明模型在思考阶段被截断,无有效 content,返回 None
pass
return None

# 移除部分模型在 content 中内联的 <think>...</think> 思考块
# (如 MiniMax M2.5 / 部分 Qwen3 非流式模式)
content = re.sub(r'<think>[\s\S]*?</think>', '', content).strip()
return content
return content if content else None

def chat_json(
self,
Expand All @@ -76,6 +90,10 @@ def chat_json(
"""
发送聊天请求并返回JSON

不使用 response_format=json_object,以兼容思维链推理模型
(Qwen3-thinking、DeepSeek-R1 等不支持该参数)。
改为从模型的文本输出中手动提取 JSON。

Args:
messages: 消息列表
temperature: 温度参数
Expand All @@ -88,16 +106,35 @@ def chat_json(
messages=messages,
temperature=temperature,
max_tokens=max_tokens,
response_format={"type": "json_object"}
# 不传 response_format,避免思维链模型返回 content=None
)
# 清理markdown代码块标记
cleaned_response = response.strip()
cleaned_response = re.sub(r'^```(?:json)?\s*\n?', '', cleaned_response, flags=re.IGNORECASE)
cleaned_response = re.sub(r'\n?```\s*$', '', cleaned_response)
cleaned_response = cleaned_response.strip()
if response is None:
raise ValueError(
"LLM 返回内容为空(content=None)。"
"可能原因:模型为推理/思维链模型且内容被截断,或 max_tokens 不足。"
f"当前模型: {self.model}"
)

# 从文本中提取 JSON(兼容 markdown 代码块和裸 JSON 两种格式)
cleaned = response.strip()

# 优先尝试提取 ```json ... ``` 代码块
json_block = re.search(r'```(?:json)?\s*\n?([\s\S]*?)\n?```', cleaned, re.IGNORECASE)
if json_block:
cleaned = json_block.group(1).strip()
else:
# 去除首尾的 markdown 围栏(无语言标记的情况)
cleaned = re.sub(r'^```\s*\n?', '', cleaned, flags=re.IGNORECASE)
cleaned = re.sub(r'\n?```\s*$', '', cleaned)
cleaned = cleaned.strip()

# 尝试从文本中定位第一个 { 到最后一个 },提取 JSON 主体
start = cleaned.find('{')
end = cleaned.rfind('}')
if start != -1 and end != -1 and end > start:
cleaned = cleaned[start:end + 1]

try:
return json.loads(cleaned_response)
return json.loads(cleaned)
except json.JSONDecodeError:
raise ValueError(f"LLM返回的JSON格式无效: {cleaned_response}")

raise ValueError(f"LLM返回的JSON格式无效: {cleaned[:500]}")