diff --git a/litellm/llms/ollama/completion/transformation.py b/litellm/llms/ollama/completion/transformation.py index b476e5c8a631..cacb05c91b07 100644 --- a/litellm/llms/ollama/completion/transformation.py +++ b/litellm/llms/ollama/completion/transformation.py @@ -6,6 +6,7 @@ from httpx._models import Headers, Response import litellm +from litellm._logging import verbose_proxy_logger from litellm.litellm_core_utils.prompt_templates.common_utils import ( get_str_from_messages, ) @@ -577,6 +578,18 @@ def chunk_parser( ] ) else: - raise Exception(f"Unable to parse ollama chunk - {chunk}") + # In this case, 'thinking' is not present in the chunk, chunk["done"] is false, + # and chunk["response"] is falsy (None or empty string), + # but Ollama is just starting to stream, so it should be processed as a normal dict + return ModelResponseStream( + choices=[ + StreamingChoices( + index=0, + delta=Delta(reasoning_content=""), + ) + ] + ) + # raise Exception(f"Unable to parse ollama chunk - {chunk}") except Exception as e: + verbose_proxy_logger.error(f"Unable to parse ollama chunk - {chunk}") raise e diff --git a/tests/test_litellm/llms/ollama/test_ollama_completion_transformation.py b/tests/test_litellm/llms/ollama/test_ollama_completion_transformation.py index 39386fe4fd25..5f448e06ab06 100644 --- a/tests/test_litellm/llms/ollama/test_ollama_completion_transformation.py +++ b/tests/test_litellm/llms/ollama/test_ollama_completion_transformation.py @@ -459,6 +459,28 @@ def test_chunk_parser_normal_response(self): assert result.choices and result.choices[0].delta is not None assert result.choices[0].delta.content == "Hello world" assert getattr(result.choices[0].delta, "reasoning_content", None) is None + + def test_chunk_parser_empty_response_without_thinking(self): + """Test that empty response chunks without thinking still work.""" + iterator = OllamaTextCompletionResponseIterator( + streaming_response=iter([]), sync_stream=True, json_mode=False + ) + + # Test empty response chunk without thinking + empty_response_chunk = { + "model": "qwen3:4b", + "created_at": "2025-10-16T11:27:14.82881Z", + "response": "", + "done": False, + } + + result = iterator.chunk_parser(empty_response_chunk) + + # Updated to handle ModelResponseStream return type + assert isinstance(result, ModelResponseStream) + assert result.choices and result.choices[0].delta is not None + assert result.choices[0].delta.content == None + assert getattr(result.choices[0].delta, "reasoning_content", None) is "" def test_chunk_parser_done_chunk(self): """Test that done chunks work correctly."""