Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion litellm/llms/ollama/completion/transformation.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from httpx._models import Headers, Response

import litellm
from litellm._logging import verbose_proxy_logger
from litellm.litellm_core_utils.prompt_templates.common_utils import (
get_str_from_messages,
)
Expand Down Expand Up @@ -577,6 +578,18 @@ def chunk_parser(
]
)
else:
raise Exception(f"Unable to parse ollama chunk - {chunk}")
# In this case, 'thinking' is not present in the chunk, chunk["done"] is false,
# and chunk["response"] is falsy (None or empty string),
# but Ollama is just starting to stream, so it should be processed as a normal dict
return ModelResponseStream(
choices=[
StreamingChoices(
index=0,
delta=Delta(reasoning_content=""),
)
]
)
# raise Exception(f"Unable to parse ollama chunk - {chunk}")
except Exception as e:
verbose_proxy_logger.error(f"Unable to parse ollama chunk - {chunk}")
raise e
Original file line number Diff line number Diff line change
Expand Up @@ -459,6 +459,28 @@ def test_chunk_parser_normal_response(self):
assert result.choices and result.choices[0].delta is not None
assert result.choices[0].delta.content == "Hello world"
assert getattr(result.choices[0].delta, "reasoning_content", None) is None

def test_chunk_parser_empty_response_without_thinking(self):
"""Test that empty response chunks without thinking still work."""
iterator = OllamaTextCompletionResponseIterator(
streaming_response=iter([]), sync_stream=True, json_mode=False
)

# Test empty response chunk without thinking
empty_response_chunk = {
"model": "qwen3:4b",
"created_at": "2025-10-16T11:27:14.82881Z",
"response": "",
"done": False,
}

result = iterator.chunk_parser(empty_response_chunk)

# Updated to handle ModelResponseStream return type
assert isinstance(result, ModelResponseStream)
assert result.choices and result.choices[0].delta is not None
assert result.choices[0].delta.content == None
assert getattr(result.choices[0].delta, "reasoning_content", None) is ""

def test_chunk_parser_done_chunk(self):
"""Test that done chunks work correctly."""
Expand Down
Loading