Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 14 additions & 10 deletions models.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,25 +105,30 @@ def __init__(self, chunk: ChatChunk|None = None):
self.add_chunk(chunk)

def add_chunk(self, chunk: ChatChunk) -> ChatChunk:
if chunk["reasoning_delta"]:
# Normalize None deltas to empty strings to prevent TypeError on concatenation
reasoning_delta = chunk["reasoning_delta"] or ""
response_delta = chunk["response_delta"] or ""

if reasoning_delta:
self.native_reasoning = True

# if native reasoning detection works, there's no need to worry about thinking tags
if self.native_reasoning:
processed_chunk = ChatChunk(response_delta=chunk["response_delta"], reasoning_delta=chunk["reasoning_delta"])
processed_chunk = ChatChunk(response_delta=response_delta, reasoning_delta=reasoning_delta)
else:
# if the model outputs thinking tags, we ned to parse them manually as reasoning
processed_chunk = self._process_thinking_chunk(chunk)
# if the model outputs thinking tags, we need to parse them manually as reasoning
processed_chunk = self._process_thinking_chunk(ChatChunk(response_delta=response_delta, reasoning_delta=reasoning_delta))

self.reasoning += processed_chunk["reasoning_delta"]
self.response += processed_chunk["response_delta"]
# Normalize processed chunk values as well
self.reasoning += processed_chunk["reasoning_delta"] or ""
self.response += processed_chunk["response_delta"] or ""

return processed_chunk

def _process_thinking_chunk(self, chunk: ChatChunk) -> ChatChunk:
response_delta = self.unprocessed + chunk["response_delta"]
response_delta = self.unprocessed + (chunk["response_delta"] or "")
self.unprocessed = ""
return self._process_thinking_tags(response_delta, chunk["reasoning_delta"])
return self._process_thinking_tags(response_delta, chunk["reasoning_delta"] or "")

def _process_thinking_tags(self, response: str, reasoning: str) -> ChatChunk:
if self.thinking:
Expand Down Expand Up @@ -561,7 +566,6 @@ async def unified_call(
attempt += 1
await asyncio.sleep(retry_delay_s)


class AsyncAIChatReplacement:
class _Completions:
def __init__(self, wrapper):
Expand Down Expand Up @@ -657,6 +661,7 @@ async def _acall(

return resp


class LiteLLMEmbeddingWrapper(Embeddings):
model_name: str
kwargs: dict = {}
Expand Down Expand Up @@ -827,7 +832,6 @@ def _parse_chunk(chunk: Any) -> ChatChunk:
return ChatChunk(reasoning_delta=reasoning_delta, response_delta=response_delta)



def _adjust_call_args(provider_name: str, model_name: str, kwargs: dict):
# for openrouter add app reference
if provider_name == "openrouter":
Expand Down