remove litellm and openai changes for now

dbschmigelski · dbschmigelski · commit d135ca57a17a · 2025-10-30T10:50:59.000-04:00
diff --git a/src/strands/models/bedrock.py b/src/strands/models/bedrock.py
@@ -222,7 +222,7 @@ def _format_request(
                 UserWarning,
                 stacklevel=3
             )
-            system_blocks.append({"cachePoint": {"type": self.config["cache_prompt"]}})  # only default is valid here
+            system_blocks.append({"cachePoint": {"type": self.config["cache_prompt"]}})
 
         return {
             "modelId": self.config["model_id"],
diff --git a/src/strands/models/litellm.py b/src/strands/models/litellm.py
@@ -14,7 +14,7 @@
 from typing_extensions import Unpack, override
 
 from ..tools import convert_pydantic_to_tool_spec
-from ..types.content import ContentBlock, Messages, SystemContentBlock
+from ..types.content import ContentBlock, Messages
 from ..types.exceptions import ContextWindowOverflowException
 from ..types.streaming import StreamEvent
 from ..types.tools import ToolChoice, ToolSpec
@@ -131,119 +131,6 @@ def _stream_switch_content(self, data_type: str, prev_data_type: str | None) ->
 
         return chunks, data_type
 
-    @override
-    @classmethod
-    def format_request_messages(
-        cls,
-        messages: Messages,
-        system_prompt: Optional[str] = None,
-        *,
-        system_prompt_content: Optional[list[SystemContentBlock]] = None,
-        **kwargs: Any,
-    ) -> list[dict[str, Any]]:
-        """Format a LiteLLM compatible messages array with cache point support.
-
-        Args:
-            messages: List of message objects to be processed by the model.
-            system_prompt: System prompt to provide context to the model (for legacy compatibility).
-            system_prompt_content: System prompt content blocks to provide context to the model.
-            **kwargs: Additional keyword arguments for future extensibility.
-
-        Returns:
-            A LiteLLM compatible messages array.
-        """
-        formatted_messages: list[dict[str, Any]] = []
-
-        # Handle system prompt content blocks (preferred) or fallback to system_prompt
-        if system_prompt_content:
-            # For LiteLLM with Bedrock, we can support cache points
-            system_content = []
-
-            for block in system_prompt_content:
-                if "text" in block:
-                    system_content.append({"type": "text", "text": block["text"]})
-                elif "cachePoint" in block and block["cachePoint"].get("type") == "default":
-                    # Apply cache control to the immediately preceding content block
-                    # for LiteLLM/Anthropic compatibility
-                    if system_content:
-                        system_content[-1]["cache_control"] = {"type": "ephemeral"}
-
-            # Create single system message with content array
-            if system_content:
-                formatted_messages.append({"role": "system", "content": system_content})
-        elif system_prompt:
-            # Fallback to simple string system prompt for legacy compatibility
-            formatted_messages.append({"role": "system", "content": system_prompt})
-
-        # Process regular messages
-        for message in messages:
-            contents = message["content"]
-
-            formatted_contents = [
-                cls.format_request_message_content(content)
-                for content in contents
-                if not any(block_type in content for block_type in ["toolResult", "toolUse"])
-            ]
-            formatted_tool_calls = [
-                cls.format_request_message_tool_call(content["toolUse"]) for content in contents if "toolUse" in content
-            ]
-            formatted_tool_messages = [
-                cls.format_request_tool_message(content["toolResult"])
-                for content in contents
-                if "toolResult" in content
-            ]
-
-            formatted_message = {
-                "role": message["role"],
-                "content": formatted_contents,
-                **({"tool_calls": formatted_tool_calls} if formatted_tool_calls else {}),
-            }
-            formatted_messages.append(formatted_message)
-            formatted_messages.extend(formatted_tool_messages)
-
-        return [message for message in formatted_messages if message["content"] or "tool_calls" in message]
-
-    @override
-    def format_chunk(self, event: dict[str, Any]) -> StreamEvent:
-        """Format a LiteLLM response event into a standardized message chunk.
-
-        Args:
-            event: A response event from the LiteLLM model.
-
-        Returns:
-            The formatted chunk.
-
-        Raises:
-            RuntimeError: If chunk_type is not recognized.
-        """
-        # Handle metadata case with prompt caching support
-        if event["chunk_type"] == "metadata":
-            usage_data = {
-                "inputTokens": event["data"].prompt_tokens,
-                "outputTokens": event["data"].completion_tokens,
-                "totalTokens": event["data"].total_tokens,
-            }
-
-            # Add prompt caching support for LiteLLM
-            if hasattr(event["data"], "prompt_tokens_details") and event["data"].prompt_tokens_details:
-                if hasattr(event["data"].prompt_tokens_details, "cached_tokens"):
-                    usage_data["cacheReadInputTokens"] = event["data"].prompt_tokens_details.cached_tokens
-
-            if hasattr(event["data"], "cache_creation_input_tokens") and event["data"].cache_creation_input_tokens:
-                usage_data["cacheWriteInputTokens"] = event["data"].cache_creation_input_tokens
-
-            return {
-                "metadata": {
-                    "usage": usage_data,
-                    "metrics": {
-                        "latencyMs": 0,  # TODO
-                    },
-                },
-            }
-
-        # For all other cases, use the parent implementation
-        return super().format_chunk(event)
-
     @override
     async def stream(
         self,
@@ -252,7 +139,6 @@ async def stream(
         system_prompt: Optional[str] = None,
         *,
         tool_choice: ToolChoice | None = None,
-        system_prompt_content: Optional[list[SystemContentBlock]] = None,
         **kwargs: Any,
     ) -> AsyncGenerator[StreamEvent, None]:
         """Stream conversation with the LiteLLM model.
@@ -268,7 +154,7 @@ async def stream(
             Formatted message chunks from the model.
         """
         logger.debug("formatting request")
-        request = self.format_request(messages, tool_specs, system_prompt_content, tool_choice)
+        request = self.format_request(messages, tool_specs, system_prompt, tool_choice)
         logger.debug("request=<%s>", request)
 
         logger.debug("invoking model")
diff --git a/src/strands/models/openai.py b/src/strands/models/openai.py
@@ -14,7 +14,7 @@
 from pydantic import BaseModel
 from typing_extensions import Unpack, override
 
-from ..types.content import ContentBlock, Messages, SystemContentBlock
+from ..types.content import ContentBlock, Messages
 from ..types.exceptions import ContextWindowOverflowException, ModelThrottledException
 from ..types.streaming import StreamEvent
 from ..types.tools import ToolChoice, ToolResult, ToolSpec, ToolUse
@@ -198,21 +198,12 @@ def _format_request_tool_choice(cls, tool_choice: ToolChoice | None) -> dict[str
                 return {"tool_choice": "auto"}
 
     @classmethod
-    def format_request_messages(
-        cls,
-        messages: Messages,
-        system_prompt: Optional[str] = None,
-        *,
-        system_prompt_content: Optional[list[SystemContentBlock]] = None,
-        **kwargs: Any,
-    ) -> list[dict[str, Any]]:
+    def format_request_messages(cls, messages: Messages, system_prompt: Optional[str] = None) -> list[dict[str, Any]]:
         """Format an OpenAI compatible messages array.
 
         Args:
             messages: List of message objects to be processed by the model.
             system_prompt: System prompt to provide context to the model.
-            system_prompt_content: Structured system prompt content blocks (for advanced use cases).
-            **kwargs: Additional keyword arguments for future extensibility.
 
         Returns:
             An OpenAI compatible messages array.
@@ -253,17 +244,14 @@ def format_request(
         tool_specs: Optional[list[ToolSpec]] = None,
         system_prompt: Optional[str] = None,
         tool_choice: ToolChoice | None = None,
-        system_prompt_content: Optional[list[SystemContentBlock]] = None,
     ) -> dict[str, Any]:
         """Format an OpenAI compatible chat streaming request.
 
         Args:
             messages: List of message objects to be processed by the model.
             tool_specs: List of tool specifications to make available to the model.
-            system_prompt: System prompt to provide context to the model. When system_prompt_content
-                is provided, this should contain the flattened text for legacy subclass compatibility.
+            system_prompt: System prompt to provide context to the model.
             tool_choice: Selection strategy for tool invocation.
-            system_prompt_content: Structured system prompt content blocks.
 
         Returns:
             An OpenAI compatible chat streaming request.
@@ -272,27 +260,8 @@ def format_request(
             TypeError: If a message contains a content block type that cannot be converted to an OpenAI-compatible
                 format.
         """
-        # Handle system prompt content with backwards compatibility
-        # LEGACY COMPATIBILITY: The try/except approach is needed because:
-        # 1. Some subclasses may override format_request_messages() with the old signature:
-        #    format_request_messages(cls, messages: Messages, system_prompt: Optional[str] = None)
-        # 2. Calling with system_prompt_content kwarg would fail on legacy overrides
-        # 3. This provides graceful fallback for existing subclass implementations
-        if system_prompt_content:
-            try:
-                # Try new signature with system_prompt_content parameter
-                messages_formatted = self.format_request_messages(
-                    messages, system_prompt, system_prompt_content=system_prompt_content
-                )
-            except TypeError:
-                # Fallback for legacy subclass overrides that don't support system_prompt_content
-                # Use system_prompt which should be populated for legacy compatibility
-                messages_formatted = self.format_request_messages(messages, system_prompt)
-        else:
-            messages_formatted = self.format_request_messages(messages, system_prompt)
-
         return {
-            "messages": messages_formatted,
+            "messages": self.format_request_messages(messages, system_prompt),
             "model": self.config["model_id"],
             "stream": True,
             "stream_options": {"include_usage": True},
@@ -391,7 +360,6 @@ async def stream(
         system_prompt: Optional[str] = None,
         *,
         tool_choice: ToolChoice | None = None,
-        system_prompt_content: Optional[list[SystemContentBlock]] = None,
         **kwargs: Any,
     ) -> AsyncGenerator[StreamEvent, None]:
         """Stream conversation with the OpenAI model.
@@ -411,18 +379,7 @@ async def stream(
             ModelThrottledException: If the request is throttled by OpenAI (rate limits).
         """
         logger.debug("formatting request")
-        # TODO This logic si wrong
-        # Use system_prompt_content if provided, otherwise fall back to system_prompt
-        if system_prompt_content:
-            # Extract text from first block if it's a simple text block
-            if len(system_prompt_content) == 1 and "text" in system_prompt_content[0]:
-                system_prompt_str = system_prompt_content[0]["text"]
-            else:
-                system_prompt_str = None  # OpenAI doesn't support complex system content blocks
-        else:
-            system_prompt_str = system_prompt
-
-        request = self.format_request(messages, tool_specs, system_prompt_str)
+        request = self.format_request(messages, tool_specs, system_prompt, tool_choice)
         logger.debug("formatted request=<%s>", request)
 
         logger.debug("invoking model")
@@ -501,13 +458,7 @@ async def stream(
 
     @override
     async def structured_output(
-        self,
-        output_model: Type[T],
-        prompt: Messages,
-        system_prompt: Optional[str] = None,
-        *,
-        system_prompt_content: Optional[list[SystemContentBlock]] = None,
-        **kwargs: Any,
+        self, output_model: Type[T], prompt: Messages, system_prompt: Optional[str] = None, **kwargs: Any
     ) -> AsyncGenerator[dict[str, Union[T, Any]], None]:
         """Get structured output from the model.
 
diff --git a/src/strands/types/content.py b/src/strands/types/content.py
@@ -6,7 +6,7 @@
 - Bedrock docs: https://docs.aws.amazon.com/bedrock/latest/APIReference/API_Types_Amazon_Bedrock_Runtime.html
 """
 
-from typing import Dict, List, Literal, Optional, Union
+from typing import Dict, List, Literal, Optional
 
 from typing_extensions import TypedDict
 
@@ -68,7 +68,7 @@ class CachePoint(TypedDict):
         type: The type of cache point, typically "default".
     """
 
-    type: str  # Can we change this to default without breaking
+    type: str
 
 
 class ContentBlock(TypedDict, total=False):
@@ -103,12 +103,10 @@ class SystemContentBlock(TypedDict, total=False):
     """Contains configurations for instructions to provide the model for how to handle input.
 
     Attributes:
-        cachePoint: A cache point configuration to optimize conversation history.
         guardContent: A content block to assess with the guardrail.
         text: A system prompt for the model.
     """
 
-    cachePoint: CachePoint
     guardContent: GuardContent
     text: str
 
diff --git a/tests/strands/agent/test_agent.py b/tests/strands/agent/test_agent.py
@@ -1795,7 +1795,6 @@ def test_agent_tool_record_direct_tool_call_disabled_with_non_serializable(agent
     assert len(agent.messages) == 0
 
 
-# TODO: dedup
 def test_agent_empty_invoke():
     model = MockedModelProvider([{"role": "assistant", "content": [{"text": "hello!"}]}])
     agent = Agent(model=model, messages=[{"role": "user", "content": [{"text": "hello!"}]}])
@@ -2211,7 +2210,7 @@ def test_agent_none_system_prompt():
     agent = Agent(system_prompt=None)
     
     assert agent.system_prompt is None
-    assert agent._system_prompt_content == None
+    assert agent._system_prompt_content is None
 
 
 def test_agent_empty_list_system_prompt():
@@ -2228,7 +2227,6 @@ def test_agent_backwards_compatibility_string_access():
     agent = Agent(system_prompt=system_prompt)
     
     # Should be able to access as string for backwards compatibility
-    assert isinstance(agent.system_prompt, str)
     assert agent.system_prompt == system_prompt
 
 
@@ -2239,60 +2237,7 @@ def test_agent_backwards_compatibility_single_text_block():
     agent = Agent(system_prompt=system_prompt_content)
     
     # Should extract text for backwards compatibility
-    assert isinstance(agent.system_prompt, str)
     assert agent.system_prompt == text
 
 
-def test_agent_initialize_system_prompt_string_input():
-    """Test _initialize_system_prompt with string input."""
-    agent = Agent()
-    result = agent._initialize_system_prompt("Test prompt")
-    
-    assert result == ("Test prompt", [{"text": "Test prompt"}])
-
-
-def test_agent_initialize_system_prompt_single_text_block_input():
-    """Test _initialize_system_prompt with single text block."""
-    agent = Agent()
-    input_blocks = [{"text": "Test prompt"}]
-    result = agent._initialize_system_prompt(input_blocks)
-    
-    assert result == ("Test prompt", input_blocks)
-
-
-def test_agent_initialize_system_prompt_multiple_blocks_input():
-    """Test _initialize_system_prompt with multiple blocks."""
-    agent = Agent()
-    input_blocks = [
-        {"text": "First block"},
-        {"cachePoint": {"type": "default"}},
-        {"text": "Second block"}
-    ]
-    result = agent._initialize_system_prompt(input_blocks)
-    
-    assert result == ("First block\nSecond block", input_blocks)
 
-
-def test_agent_initialize_system_prompt_single_non_text_block_input():
-    """Test _initialize_system_prompt with single non-text block."""
-    agent = Agent()
-    input_blocks = [{"cachePoint": {"type": "default"}}]
-    result = agent._initialize_system_prompt(input_blocks)
-    
-    assert result == (None, input_blocks)
-
-
-def test_agent_initialize_system_prompt_none_input():
-    """Test _initialize_system_prompt with None input."""
-    agent = Agent()
-    result = agent._initialize_system_prompt(None)
-    
-    assert result == (None, None)
-
-
-def test_agent_initialize_system_prompt_empty_list_input():
-    """Test _initialize_system_prompt with empty list."""
-    agent = Agent()
-    result = agent._initialize_system_prompt([])
-    
-    assert result == (None, [])
diff --git a/tests/strands/models/test_litellm.py b/tests/strands/models/test_litellm.py

Original file line number	Diff line number	Diff line change
`@@ -222,7 +222,7 @@ def _format_request(`
`222`	`222`	`UserWarning,`
`223`	`223`	`stacklevel=3`
`224`	`224`	`)`
`225`		`- system_blocks.append({"cachePoint": {"type": self.config["cache_prompt"]}}) # only default is valid here`
	`225`	`+ system_blocks.append({"cachePoint": {"type": self.config["cache_prompt"]}})`
`226`	`226`
`227`	`227`	`return {`
`228`	`228`	`"modelId": self.config["model_id"],`