From 3c7394ba2da0ddc16f8765c00450373b1b4022c7 Mon Sep 17 00:00:00 2001
From: zhangshizhao <zhangshizhao@xiaohongshu.com>
Date: Mon, 30 Mar 2026 14:30:42 +0800
Subject: [PATCH] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=E6=8A=A5=E5=91=8A?=
 =?UTF-8?q?=E7=94=9F=E6=88=90=E6=97=B6=E7=9A=84=E6=8A=A5=E9=94=99=EF=BC=8C?=
 =?UTF-8?q?=E5=A4=84=E7=90=86=E6=A8=A1=E5=9E=8B=E8=BF=94=E5=9B=9E=20None?=
 =?UTF-8?q?=20=E7=9A=84=E6=83=85=E5=86=B5?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- chat() 方法：当模型返回的 content 为 None 时，直接返回 None，避免 re.sub() 接收 None 崩溃
- chat_json() 方法：去掉 response_format 参数，改为从模型文本输出中手动提取 JSON，兼容 markdown 代码块和裸 JSON 两种格式
---
 backend/app/utils/llm_client.py | 65 ++++++++++++++++++++++++++-------
 1 file changed, 51 insertions(+), 14 deletions(-)
diff --git a/backend/app/utils/llm_client.py b/backend/app/utils/llm_client.py
index 6c1a81f49..79a5cc02e 100644
--- a/backend/app/utils/llm_client.py
+++ b/backend/app/utils/llm_client.py
@@ -38,7 +38,7 @@ def chat(
         temperature: float = 0.7,
         max_tokens: int = 4096,
         response_format: Optional[Dict] = None
-    ) -> str:
+    ) -> Optional[str]:
         """
         发送聊天请求
         
@@ -49,7 +49,7 @@ def chat(
             response_format: 响应格式（如JSON模式）
             
         Returns:
-            模型响应文本
+            模型响应文本，若模型返回空内容则返回 None
         """
         kwargs = {
             "model": self.model,
@@ -62,10 +62,24 @@ def chat(
             kwargs["response_format"] = response_format
         
         response = self.client.chat.completions.create(**kwargs)
-        content = response.choices[0].message.content
-        # 部分模型（如MiniMax M2.5）会在content中包含<think>思考内容，需要移除
+        message = response.choices[0].message
+        content = message.content
+
+        # 部分推理模型（如 Qwen3-thinking、DeepSeek-R1）在思维链模式下
+        # 会把思考过程放入 reasoning_content，content 字段可能为 None 或空字符串。
+        # 此时尝试从 reasoning_content 兜底提取实际回答。
+        if not content:
+            reasoning = getattr(message, 'reasoning_content', None)
+            if reasoning:
+                # reasoning_content 是思考过程，不是最终答案；
+                # 说明模型在思考阶段被截断，无有效 content，返回 None
+                pass
+            return None
+
+        # 移除部分模型在 content 中内联的 <think>...</think> 思考块
+        # （如 MiniMax M2.5 / 部分 Qwen3 非流式模式）
         content = re.sub(r'<think>[\s\S]*?</think>', '', content).strip()
-        return content
+        return content if content else None
     
     def chat_json(
         self,
@@ -76,6 +90,10 @@ def chat_json(
         """
         发送聊天请求并返回JSON
         
+        不使用 response_format=json_object，以兼容思维链推理模型
+        （Qwen3-thinking、DeepSeek-R1 等不支持该参数）。
+        改为从模型的文本输出中手动提取 JSON。
+        
         Args:
             messages: 消息列表
             temperature: 温度参数
@@ -88,16 +106,35 @@ def chat_json(
             messages=messages,
             temperature=temperature,
             max_tokens=max_tokens,
-            response_format={"type": "json_object"}
+            # 不传 response_format，避免思维链模型返回 content=None
         )
-        # 清理markdown代码块标记
-        cleaned_response = response.strip()
-        cleaned_response = re.sub(r'^```(?:json)?\s*\n?', '', cleaned_response, flags=re.IGNORECASE)
-        cleaned_response = re.sub(r'\n?```\s*$', '', cleaned_response)
-        cleaned_response = cleaned_response.strip()
+        if response is None:
+            raise ValueError(
+                "LLM 返回内容为空（content=None）。"
+                "可能原因：模型为推理/思维链模型且内容被截断，或 max_tokens 不足。"
+                f"当前模型: {self.model}"
+            )
+
+        # 从文本中提取 JSON（兼容 markdown 代码块和裸 JSON 两种格式）
+        cleaned = response.strip()
+
+        # 优先尝试提取 ```json ... ``` 代码块
+        json_block = re.search(r'```(?:json)?\s*\n?([\s\S]*?)\n?```', cleaned, re.IGNORECASE)
+        if json_block:
+            cleaned = json_block.group(1).strip()
+        else:
+            # 去除首尾的 markdown 围栏（无语言标记的情况）
+            cleaned = re.sub(r'^```\s*\n?', '', cleaned, flags=re.IGNORECASE)
+            cleaned = re.sub(r'\n?```\s*$', '', cleaned)
+            cleaned = cleaned.strip()
+
+        # 尝试从文本中定位第一个 { 到最后一个 }，提取 JSON 主体
+        start = cleaned.find('{')
+        end = cleaned.rfind('}')
+        if start != -1 and end != -1 and end > start:
+            cleaned = cleaned[start:end + 1]
 
         try:
-            return json.loads(cleaned_response)
+            return json.loads(cleaned)
         except json.JSONDecodeError:
-            raise ValueError(f"LLM返回的JSON格式无效: {cleaned_response}")
-
+            raise ValueError(f"LLM返回的JSON格式无效: {cleaned[:500]}")