@@ -77,22 +77,8 @@ def extract_reasoning_content_streaming(
7777 # reasoning content continues
7878 return DeltaMessage (reasoning_content = delta_text )
7979 else :
80- # No <think> in previous or delta, also need to check for </think>.
81- # Because the model may have generated </think> without <think>
82- # Ref https://huggingface.co/deepseek-ai/DeepSeek-R1/commit/8a58a132790c9935686eb97f042afa8013451c9f
83- if self .think_end_token in delta_text :
84- # </think> in delta with more tokens,
85- # extract reasoning content and content
86- end_index = delta_text .find (self .think_end_token )
87- reasoning_content = delta_text [:end_index ]
88- content = delta_text [end_index + len (self .think_end_token ):]
89- return DeltaMessage (reasoning_content = reasoning_content , content = content if content else None )
90- elif self .think_end_token in previous_text :
91- # </think> in previous, thinking content ends
92- return DeltaMessage (content = delta_text )
93- else :
94- # no </think> in previous or delta, reasoning content continues
95- return DeltaMessage (reasoning_content = delta_text )
80+ # no <think> in previous or delta, all content
81+ return DeltaMessage (content = delta_text )
9682
9783 def extract_reasoning_content (self , model_output : str , request : ChatCompletionRequest ,
9884 ** kwargs ) -> Tuple [Optional [str ], Optional [str ]]:
@@ -109,26 +95,35 @@ def extract_reasoning_content(self, model_output: str, request: ChatCompletionRe
10995 reasoning_content (str | None): The reasoning content.
11096 final_output (str | None): The content.
11197 """
112- # DeepSeek R1 doesn't generate <think> now.
98+ start_index = model_output .find (self .think_start_token )
99+ end_index = model_output .find (self .think_end_token )
113100 # Thus we assume the reasoning content is always at the start.
114- # Ref https://huggingface.co/deepseek-ai/DeepSeek-R1/commit/8a58a132790c9935686eb97f042afa8013451c9f
115- if self .think_end_token not in model_output :
101+ if end_index < 0 :
116102 # for qwen3 model, the reasoning content is wrapped by <think> </think> xml tags
117- return None , model_output
118- # Add a start token if it's missing to keep compatibility.
119- if self .think_start_token not in model_output :
120- model_output = f'{ self .think_start_token } { model_output } '
121- # Use a regex to find the reasoning content
122- reasoning_content = self .reasoning_regex .findall (model_output )[0 ]
123-
124- end_index = len (f'{ self .think_start_token } { reasoning_content } { self .think_end_token } ' )
125- final_output = model_output [end_index :]
126- if reasoning_content .startswith ('\n ' ):
127- reasoning_content = reasoning_content [1 :]
128- if reasoning_content .endswith ('\n ' ):
129- reasoning_content = reasoning_content [:- 1 ]
103+ if start_index < 0 :
104+ return None , model_output
105+ reasoning_content = model_output [start_index + len (self .think_start_token ):]
106+ reasoning_content = self ._trim_newlines (reasoning_content )
107+ return reasoning_content , None
108+
109+ if start_index >= 0 and start_index < end_index :
110+ reasoning_content = model_output [start_index + len (self .think_start_token ):end_index ]
111+ else :
112+ reasoning_content = model_output [:end_index ]
113+ reasoning_content = self ._trim_newlines (reasoning_content )
114+
115+ final_output = model_output [end_index + len (self .think_end_token ):]
116+ final_output = self ._trim_newlines (final_output )
130117
131118 if len (final_output ) == 0 :
132119 return reasoning_content , None
133-
134120 return reasoning_content , final_output
121+
122+ @classmethod
123+ def _trim_newlines (cls , text : str ):
124+ """Trim newlines from the start and end of a string."""
125+ while text .startswith ('\n ' ):
126+ text = text [1 :]
127+ while text .endswith ('\n ' ):
128+ text = text [:- 1 ]
129+ return text
0 commit comments