Skip to content

Commit b0ccc35

Browse files
authored
fix(ollama): Enhance chunk parsing for empty responses without 'thinking' and improve error logging (#13333) (#15717)
1 parent e1cb928 commit b0ccc35

File tree

2 files changed

+36
-1
lines changed

2 files changed

+36
-1
lines changed

litellm/llms/ollama/completion/transformation.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from httpx._models import Headers, Response
77

88
import litellm
9+
from litellm._logging import verbose_proxy_logger
910
from litellm.litellm_core_utils.prompt_templates.common_utils import (
1011
get_str_from_messages,
1112
)
@@ -577,6 +578,18 @@ def chunk_parser(
577578
]
578579
)
579580
else:
580-
raise Exception(f"Unable to parse ollama chunk - {chunk}")
581+
# In this case, 'thinking' is not present in the chunk, chunk["done"] is false,
582+
# and chunk["response"] is falsy (None or empty string),
583+
# but Ollama is just starting to stream, so it should be processed as a normal dict
584+
return ModelResponseStream(
585+
choices=[
586+
StreamingChoices(
587+
index=0,
588+
delta=Delta(reasoning_content=""),
589+
)
590+
]
591+
)
592+
# raise Exception(f"Unable to parse ollama chunk - {chunk}")
581593
except Exception as e:
594+
verbose_proxy_logger.error(f"Unable to parse ollama chunk - {chunk}")
582595
raise e

tests/test_litellm/llms/ollama/test_ollama_completion_transformation.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -459,6 +459,28 @@ def test_chunk_parser_normal_response(self):
459459
assert result.choices and result.choices[0].delta is not None
460460
assert result.choices[0].delta.content == "Hello world"
461461
assert getattr(result.choices[0].delta, "reasoning_content", None) is None
462+
463+
def test_chunk_parser_empty_response_without_thinking(self):
464+
"""Test that empty response chunks without thinking still work."""
465+
iterator = OllamaTextCompletionResponseIterator(
466+
streaming_response=iter([]), sync_stream=True, json_mode=False
467+
)
468+
469+
# Test empty response chunk without thinking
470+
empty_response_chunk = {
471+
"model": "qwen3:4b",
472+
"created_at": "2025-10-16T11:27:14.82881Z",
473+
"response": "",
474+
"done": False,
475+
}
476+
477+
result = iterator.chunk_parser(empty_response_chunk)
478+
479+
# Updated to handle ModelResponseStream return type
480+
assert isinstance(result, ModelResponseStream)
481+
assert result.choices and result.choices[0].delta is not None
482+
assert result.choices[0].delta.content == None
483+
assert getattr(result.choices[0].delta, "reasoning_content", None) is ""
462484

463485
def test_chunk_parser_done_chunk(self):
464486
"""Test that done chunks work correctly."""

0 commit comments

Comments
 (0)