fix(ollama): Enhance chunk parsing for empty responses without 'thinking' and improve error logging (#13333) (#15717)

lshgdut · web-flow · commit b0ccc35a9c69 · 2025-10-21T16:59:01.000-07:00
diff --git a/litellm/llms/ollama/completion/transformation.py b/litellm/llms/ollama/completion/transformation.py
@@ -6,6 +6,7 @@
 from httpx._models import Headers, Response
 
 import litellm
+from litellm._logging import verbose_proxy_logger
 from litellm.litellm_core_utils.prompt_templates.common_utils import (
     get_str_from_messages,
 )
@@ -577,6 +578,18 @@ def chunk_parser(
                     ]
                 )
             else:
-                raise Exception(f"Unable to parse ollama chunk - {chunk}")
+                # In this case, 'thinking' is not present in the chunk, chunk["done"] is false,
+                # and chunk["response"] is falsy (None or empty string), 
+                # but Ollama is just starting to stream, so it should be processed as a normal dict
+                return ModelResponseStream(
+                    choices=[
+                        StreamingChoices(
+                            index=0,
+                            delta=Delta(reasoning_content=""),
+                        )
+                    ]
+                )
+                # raise Exception(f"Unable to parse ollama chunk - {chunk}")
         except Exception as e:
+            verbose_proxy_logger.error(f"Unable to parse ollama chunk - {chunk}")
             raise e
diff --git a/tests/test_litellm/llms/ollama/test_ollama_completion_transformation.py b/tests/test_litellm/llms/ollama/test_ollama_completion_transformation.py
@@ -459,6 +459,28 @@ def test_chunk_parser_normal_response(self):
         assert result.choices and result.choices[0].delta is not None
         assert result.choices[0].delta.content == "Hello world"
         assert getattr(result.choices[0].delta, "reasoning_content", None) is None
+        
+    def test_chunk_parser_empty_response_without_thinking(self):
+        """Test that empty response chunks without thinking still work."""
+        iterator = OllamaTextCompletionResponseIterator(
+            streaming_response=iter([]), sync_stream=True, json_mode=False
+        )
+
+        # Test empty response chunk without thinking
+        empty_response_chunk = {
+            "model": "qwen3:4b",
+            "created_at": "2025-10-16T11:27:14.82881Z",
+            "response": "",
+            "done": False,
+        }
+
+        result = iterator.chunk_parser(empty_response_chunk)
+
+        # Updated to handle ModelResponseStream return type
+        assert isinstance(result, ModelResponseStream)
+        assert result.choices and result.choices[0].delta is not None
+        assert result.choices[0].delta.content == None
+        assert getattr(result.choices[0].delta, "reasoning_content", None) is ""
 
     def test_chunk_parser_done_chunk(self):
         """Test that done chunks work correctly."""