From 3c7394ba2da0ddc16f8765c00450373b1b4022c7 Mon Sep 17 00:00:00 2001 From: zhangshizhao Date: Mon, 30 Mar 2026 14:30:42 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=E6=8A=A5=E5=91=8A?= =?UTF-8?q?=E7=94=9F=E6=88=90=E6=97=B6=E7=9A=84=E6=8A=A5=E9=94=99=EF=BC=8C?= =?UTF-8?q?=E5=A4=84=E7=90=86=E6=A8=A1=E5=9E=8B=E8=BF=94=E5=9B=9E=20None?= =?UTF-8?q?=20=E7=9A=84=E6=83=85=E5=86=B5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - chat() 方法:当模型返回的 content 为 None 时,直接返回 None,避免 re.sub() 接收 None 崩溃 - chat_json() 方法:去掉 response_format 参数,改为从模型文本输出中手动提取 JSON,兼容 markdown 代码块和裸 JSON 两种格式 --- backend/app/utils/llm_client.py | 65 ++++++++++++++++++++++++++------- 1 file changed, 51 insertions(+), 14 deletions(-) diff --git a/backend/app/utils/llm_client.py b/backend/app/utils/llm_client.py index 6c1a81f49..79a5cc02e 100644 --- a/backend/app/utils/llm_client.py +++ b/backend/app/utils/llm_client.py @@ -38,7 +38,7 @@ def chat( temperature: float = 0.7, max_tokens: int = 4096, response_format: Optional[Dict] = None - ) -> str: + ) -> Optional[str]: """ 发送聊天请求 @@ -49,7 +49,7 @@ def chat( response_format: 响应格式(如JSON模式) Returns: - 模型响应文本 + 模型响应文本,若模型返回空内容则返回 None """ kwargs = { "model": self.model, @@ -62,10 +62,24 @@ def chat( kwargs["response_format"] = response_format response = self.client.chat.completions.create(**kwargs) - content = response.choices[0].message.content - # 部分模型(如MiniMax M2.5)会在content中包含思考内容,需要移除 + message = response.choices[0].message + content = message.content + + # 部分推理模型(如 Qwen3-thinking、DeepSeek-R1)在思维链模式下 + # 会把思考过程放入 reasoning_content,content 字段可能为 None 或空字符串。 + # 此时尝试从 reasoning_content 兜底提取实际回答。 + if not content: + reasoning = getattr(message, 'reasoning_content', None) + if reasoning: + # reasoning_content 是思考过程,不是最终答案; + # 说明模型在思考阶段被截断,无有效 content,返回 None + pass + return None + + # 移除部分模型在 content 中内联的 ... 思考块 + # (如 MiniMax M2.5 / 部分 Qwen3 非流式模式) content = re.sub(r'[\s\S]*?', '', content).strip() - return content + return content if content else None def chat_json( self, @@ -76,6 +90,10 @@ def chat_json( """ 发送聊天请求并返回JSON + 不使用 response_format=json_object,以兼容思维链推理模型 + (Qwen3-thinking、DeepSeek-R1 等不支持该参数)。 + 改为从模型的文本输出中手动提取 JSON。 + Args: messages: 消息列表 temperature: 温度参数 @@ -88,16 +106,35 @@ def chat_json( messages=messages, temperature=temperature, max_tokens=max_tokens, - response_format={"type": "json_object"} + # 不传 response_format,避免思维链模型返回 content=None ) - # 清理markdown代码块标记 - cleaned_response = response.strip() - cleaned_response = re.sub(r'^```(?:json)?\s*\n?', '', cleaned_response, flags=re.IGNORECASE) - cleaned_response = re.sub(r'\n?```\s*$', '', cleaned_response) - cleaned_response = cleaned_response.strip() + if response is None: + raise ValueError( + "LLM 返回内容为空(content=None)。" + "可能原因:模型为推理/思维链模型且内容被截断,或 max_tokens 不足。" + f"当前模型: {self.model}" + ) + + # 从文本中提取 JSON(兼容 markdown 代码块和裸 JSON 两种格式) + cleaned = response.strip() + + # 优先尝试提取 ```json ... ``` 代码块 + json_block = re.search(r'```(?:json)?\s*\n?([\s\S]*?)\n?```', cleaned, re.IGNORECASE) + if json_block: + cleaned = json_block.group(1).strip() + else: + # 去除首尾的 markdown 围栏(无语言标记的情况) + cleaned = re.sub(r'^```\s*\n?', '', cleaned, flags=re.IGNORECASE) + cleaned = re.sub(r'\n?```\s*$', '', cleaned) + cleaned = cleaned.strip() + + # 尝试从文本中定位第一个 { 到最后一个 },提取 JSON 主体 + start = cleaned.find('{') + end = cleaned.rfind('}') + if start != -1 and end != -1 and end > start: + cleaned = cleaned[start:end + 1] try: - return json.loads(cleaned_response) + return json.loads(cleaned) except json.JSONDecodeError: - raise ValueError(f"LLM返回的JSON格式无效: {cleaned_response}") - + raise ValueError(f"LLM返回的JSON格式无效: {cleaned[:500]}")