From cf4d9e366294af14317a3ef82a2d3c929a659ce8 Mon Sep 17 00:00:00 2001 From: Sameer Kankute Date: Wed, 15 Oct 2025 13:31:38 +0530 Subject: [PATCH 1/3] Update perplexity cost tracking --- litellm/llms/perplexity/chat/__init__.py | 1 + .../llms/perplexity/chat/transformation.py | 145 +++++++++------ litellm/main.py | 27 ++- .../test_perplexity_chat_transformation.py | 150 +++++++++++++++- .../test_perplexity_cost_calculator.py | 114 +++++++++++- .../perplexity/test_perplexity_integration.py | 167 +++++++++++++++++- 6 files changed, 548 insertions(+), 56 deletions(-) create mode 100644 litellm/llms/perplexity/chat/__init__.py diff --git a/litellm/llms/perplexity/chat/__init__.py b/litellm/llms/perplexity/chat/__init__.py new file mode 100644 index 000000000000..f4f9edf38e52 --- /dev/null +++ b/litellm/llms/perplexity/chat/__init__.py @@ -0,0 +1 @@ +"""Perplexity chat completion transformations.""" diff --git a/litellm/llms/perplexity/chat/transformation.py b/litellm/llms/perplexity/chat/transformation.py index 27e6415ff8b2..50fc3f657897 100644 --- a/litellm/llms/perplexity/chat/transformation.py +++ b/litellm/llms/perplexity/chat/transformation.py @@ -1,31 +1,39 @@ -""" -Translate from OpenAI's `/v1/chat/completions` to Perplexity's `/v1/chat/completions` -""" +"""Translate from OpenAI's `/v1/chat/completions` to Perplexity's `/v1/chat/completions`.""" -from typing import Any, List, Optional, Tuple +from __future__ import annotations + +import re +from typing import TYPE_CHECKING, Any -import httpx import litellm from litellm._logging import verbose_logger -from litellm.secret_managers.main import get_secret_str -from litellm.types.llms.openai import AllMessageValues -from litellm.types.utils import Usage, PromptTokensDetailsWrapper -from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig -from litellm.types.utils import ModelResponse -from litellm.types.llms.openai import ChatCompletionAnnotation -from litellm.types.llms.openai import ChatCompletionAnnotationURLCitation +from litellm.secret_managers.main import get_secret_str +from litellm.types.utils import ModelResponse, PromptTokensDetailsWrapper, Usage + +if TYPE_CHECKING: + import httpx + + from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj + from litellm.types.llms.openai import ( + AllMessageValues, + ChatCompletionAnnotation, + ChatCompletionAnnotationURLCitation, + ) class PerplexityChatConfig(OpenAIGPTConfig): + """Configuration for Perplexity chat completions.""" + @property - def custom_llm_provider(self) -> Optional[str]: + def custom_llm_provider(self) -> str | None: + """Return the custom LLM provider name.""" return "perplexity" def _get_openai_compatible_provider_info( - self, api_base: Optional[str], api_key: Optional[str] - ) -> Tuple[Optional[str], Optional[str]]: - api_base = api_base or get_secret_str("PERPLEXITY_API_BASE") or "https://api.perplexity.ai" # type: ignore + self, api_base: str | None, api_key: str | None, + ) -> tuple[str | None, str | None]: + api_base = api_base or get_secret_str("PERPLEXITY_API_BASE") or "https://api.perplexity.ai" # type: ignore[assignment] dynamic_api_key = ( api_key or get_secret_str("PERPLEXITYAI_API_KEY") @@ -33,9 +41,10 @@ def _get_openai_compatible_provider_info( ) return api_base, dynamic_api_key - def get_supported_openai_params(self, model: str) -> list: - """ - Perplexity supports a subset of OpenAI params + def get_supported_openai_params(self, model: str) -> list[str]: + """Get supported OpenAI parameters for Perplexity. + + Perplexity supports a subset of OpenAI params. Ref: https://docs.perplexity.ai/api-reference/chat-completions @@ -56,36 +65,37 @@ def get_supported_openai_params(self, model: str) -> list: try: if litellm.supports_reasoning( - model=model, custom_llm_provider=self.custom_llm_provider + model=model, custom_llm_provider=self.custom_llm_provider, ): base_openai_params.append("reasoning_effort") - except Exception as e: + except (ValueError, TypeError) as e: verbose_logger.debug(f"Error checking if model supports reasoning: {e}") - + try: if litellm.supports_web_search( - model=model, custom_llm_provider=self.custom_llm_provider + model=model, custom_llm_provider=self.custom_llm_provider, ): base_openai_params.append("web_search_options") - except Exception as e: + except (ValueError, TypeError) as e: verbose_logger.debug(f"Error checking if model supports web search: {e}") - + return base_openai_params - def transform_response( + def transform_response( # noqa: PLR0913 self, model: str, raw_response: httpx.Response, model_response: ModelResponse, logging_obj: LiteLLMLoggingObj, request_data: dict, - messages: List[AllMessageValues], + messages: list[AllMessageValues], optional_params: dict, litellm_params: dict, - encoding: Any, - api_key: Optional[str] = None, - json_mode: Optional[bool] = None, + encoding: Any, # noqa: ANN401 + api_key: str | None = None, + json_mode: bool | None = None, # noqa: FBT001 ) -> ModelResponse: + """Transform Perplexity response to standard format.""" # Call the parent transform_response first to handle the standard transformation model_response = super().transform_response( model=model, @@ -104,28 +114,29 @@ def transform_response( # Extract and enhance usage with Perplexity-specific fields try: raw_response_json = raw_response.json() + self.add_cost_to_usage(model_response, raw_response_json) self._enhance_usage_with_perplexity_fields( - model_response, raw_response_json + model_response, raw_response_json, ) self._add_citations_as_annotations(model_response, raw_response_json) - except Exception as e: + except (ValueError, TypeError, KeyError) as e: verbose_logger.debug(f"Error extracting Perplexity-specific usage fields: {e}") return model_response - def _enhance_usage_with_perplexity_fields( - self, model_response: ModelResponse, raw_response_json: dict + def _enhance_usage_with_perplexity_fields( # noqa: C901 + self, model_response: ModelResponse, raw_response_json: dict, ) -> None: - """ - Extract citation tokens and search queries from Perplexity API response - and add them to the usage object using standard LiteLLM fields. + """Extract citation tokens and search queries from Perplexity API response. + + Add them to the usage object using standard LiteLLM fields. """ if not hasattr(model_response, "usage") or model_response.usage is None: # Create a usage object if it doesn't exist (when usage was None) model_response.usage = Usage( # type: ignore[attr-defined] prompt_tokens=0, completion_tokens=0, - total_tokens=0 + total_tokens=0, ) usage = model_response.usage # type: ignore[attr-defined] @@ -146,7 +157,7 @@ def _enhance_usage_with_perplexity_fields( # Extract search queries count from usage or response metadata # Perplexity might include this in the usage object or as separate metadata perplexity_usage = raw_response_json.get("usage", {}) - + # Try to extract search queries from usage field first, then root level num_search_queries = perplexity_usage.get("num_search_queries") if num_search_queries is None: @@ -155,28 +166,28 @@ def _enhance_usage_with_perplexity_fields( num_search_queries = perplexity_usage.get("search_queries") if num_search_queries is None: num_search_queries = raw_response_json.get("search_queries") - + # Create or update prompt_tokens_details to include web search requests and citation tokens if citation_tokens > 0 or ( num_search_queries is not None and num_search_queries > 0 ): if usage.prompt_tokens_details is None: usage.prompt_tokens_details = PromptTokensDetailsWrapper() - + # Store citation tokens count for cost calculation if citation_tokens > 0: - setattr(usage, "citation_tokens", citation_tokens) - + usage.citation_tokens = citation_tokens + # Store search queries count in the standard web_search_requests field if num_search_queries is not None and num_search_queries > 0: usage.prompt_tokens_details.web_search_requests = num_search_queries - def _add_citations_as_annotations( - self, model_response: ModelResponse, raw_response_json: dict + def _add_citations_as_annotations( # noqa: C901, PLR0912 + self, model_response: ModelResponse, raw_response_json: dict, ) -> None: - """ - Extract citations and search_results from Perplexity API response - and add them as ChatCompletionAnnotation objects to the message. + """Extract citations and search_results from Perplexity API response. + + Add them as ChatCompletionAnnotation objects to the message. """ if not model_response.choices: return @@ -205,8 +216,6 @@ def _add_citations_as_annotations( return # Find all citation markers like [1], [2], [3], [4] in the text - import re - citation_pattern = r"\[(\d+)\]" citation_matches = list(re.finditer(citation_pattern, content)) @@ -246,6 +255,38 @@ def _add_citations_as_annotations( # Also add the raw citations and search_results as attributes for backward compatibility if citations: - setattr(model_response, "citations", citations) + model_response.citations = citations # type: ignore[attr-defined] if search_results: - setattr(model_response, "search_results", search_results) \ No newline at end of file + model_response.search_results = search_results # type: ignore[attr-defined] + + + def add_cost_to_usage(self, model_response: ModelResponse, raw_response_json: dict) -> None: + """Add the cost to the usage object.""" + try: + usage_data = raw_response_json.get("usage") + if usage_data: + # Try different possible cost field locations + response_cost = None + + # Check if cost is directly in usage (flat structure) + if "total_cost" in usage_data: + response_cost = usage_data["total_cost"] + # Check if cost is nested (cost.total_cost structure) + elif "cost" in usage_data and isinstance(usage_data["cost"], dict): + response_cost = usage_data["cost"].get("total_cost") + # Check if cost is a simple value + elif "cost" in usage_data: + response_cost = usage_data["cost"] + + if response_cost is not None: + # Store cost in hidden params for the cost calculator to use + if not hasattr(model_response, "_hidden_params"): + model_response._hidden_params = {} # noqa: SLF001 + if "additional_headers" not in model_response._hidden_params: # noqa: SLF001 + model_response._hidden_params["additional_headers"] = {} # noqa: SLF001 + model_response._hidden_params["additional_headers"][ # noqa: SLF001 + "llm_provider-x-litellm-response-cost" + ] = float(response_cost) + except (ValueError, TypeError, KeyError) as e: + verbose_logger.debug(f"Error adding cost to usage: {e}") + # If we can't extract cost, continue without it - don't fail the response diff --git a/litellm/main.py b/litellm/main.py index 5000de7a694c..95be8448968d 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -2017,11 +2017,36 @@ def completion( # type: ignore # noqa: PLR0915 logging.post_call( input=messages, api_key=api_key, original_response=response ) + elif custom_llm_provider == "perplexity": + response = base_llm_http_handler.completion( + model=model, + messages=messages, + headers=headers, + model_response=model_response, + api_key=api_key, + api_base=api_base, + acompletion=acompletion, + logging_obj=logging, + optional_params=optional_params, + litellm_params=litellm_params, + shared_session=shared_session, + timeout=timeout, + client=client, + custom_llm_provider=custom_llm_provider, + encoding=encoding, + stream=stream, + provider_config=provider_config, + ) + + ## LOGGING - Call after response has been processed by transform_response + logging.post_call( + input=messages, api_key=api_key, original_response=response + ) + elif ( model in litellm.open_ai_chat_completion_models or custom_llm_provider == "custom_openai" or custom_llm_provider == "deepinfra" - or custom_llm_provider == "perplexity" or custom_llm_provider == "nvidia_nim" or custom_llm_provider == "cerebras" or custom_llm_provider == "baseten" diff --git a/tests/test_litellm/llms/perplexity/chat/test_perplexity_chat_transformation.py b/tests/test_litellm/llms/perplexity/chat/test_perplexity_chat_transformation.py index 784e6f6fe63d..6e9dc4263037 100644 --- a/tests/test_litellm/llms/perplexity/chat/test_perplexity_chat_transformation.py +++ b/tests/test_litellm/llms/perplexity/chat/test_perplexity_chat_transformation.py @@ -707,4 +707,152 @@ def test_add_citations_as_annotations_no_message(self): # Check that no annotations were created (message content is None) assert choice.message.content is None # No annotations should be created since content is None - assert not hasattr(choice.message, 'annotations') or choice.message.annotations is None \ No newline at end of file + assert not hasattr(choice.message, 'annotations') or choice.message.annotations is None + + # Tests for cost extraction functionality + def test_add_cost_to_usage_flat_structure(self): + """Test cost extraction from flat usage structure.""" + config = PerplexityChatConfig() + + # Create a ModelResponse + model_response = ModelResponse() + model_response.usage = Usage( + prompt_tokens=100, + completion_tokens=50, + total_tokens=150 + ) + + # Mock raw response with flat cost structure + raw_response_json = { + "choices": [{"message": {"content": "Test response"}}], + "usage": { + "prompt_tokens": 100, + "completion_tokens": 50, + "total_tokens": 150, + "total_cost": 0.00015 + } + } + + # Test cost extraction + config.add_cost_to_usage(model_response, raw_response_json) + + # Check that cost was stored in hidden params + assert hasattr(model_response, "_hidden_params") + assert "additional_headers" in model_response._hidden_params + assert "llm_provider-x-litellm-response-cost" in model_response._hidden_params["additional_headers"] + + cost = model_response._hidden_params["additional_headers"]["llm_provider-x-litellm-response-cost"] + assert cost == 0.00015 + + def test_add_cost_to_usage_nested_structure(self): + """Test cost extraction from nested usage structure.""" + config = PerplexityChatConfig() + + # Create a ModelResponse + model_response = ModelResponse() + model_response.usage = Usage( + prompt_tokens=100, + completion_tokens=50, + total_tokens=150 + ) + + # Mock raw response with nested cost structure + raw_response_json = { + "choices": [{"message": {"content": "Test response"}}], + "usage": { + "prompt_tokens": 100, + "completion_tokens": 50, + "total_tokens": 150, + "cost": { + "total_cost": 0.00025 + } + } + } + + # Test cost extraction + config.add_cost_to_usage(model_response, raw_response_json) + + # Check that cost was stored in hidden params + assert hasattr(model_response, "_hidden_params") + assert "additional_headers" in model_response._hidden_params + assert "llm_provider-x-litellm-response-cost" in model_response._hidden_params["additional_headers"] + + cost = model_response._hidden_params["additional_headers"]["llm_provider-x-litellm-response-cost"] + assert cost == 0.00025 + + def test_add_cost_to_usage_no_cost_data(self): + """Test handling when no cost data is present.""" + config = PerplexityChatConfig() + + # Create a ModelResponse + model_response = ModelResponse() + model_response.usage = Usage( + prompt_tokens=100, + completion_tokens=50, + total_tokens=150 + ) + + # Mock raw response without cost + raw_response_json = { + "choices": [{"message": {"content": "Test response"}}], + "usage": { + "prompt_tokens": 100, + "completion_tokens": 50, + "total_tokens": 150 + } + } + + # Test cost extraction - should not raise error + config.add_cost_to_usage(model_response, raw_response_json) + + # Should not have cost in hidden params + if hasattr(model_response, "_hidden_params"): + assert "llm_provider-x-litellm-response-cost" not in model_response._hidden_params.get("additional_headers", {}) + + def test_transform_response_includes_cost_extraction(self): + """Test that transform_response includes cost extraction.""" + config = PerplexityChatConfig() + + # Mock raw response + mock_response = Mock() + mock_response.json.return_value = { + "choices": [{"message": {"content": "Test response"}}], + "usage": { + "prompt_tokens": 100, + "completion_tokens": 50, + "total_tokens": 150, + "total_cost": 0.00015 + } + } + mock_response.headers = {} + + # Create a ModelResponse + model_response = ModelResponse() + model_response.usage = Usage( + prompt_tokens=100, + completion_tokens=50, + total_tokens=150 + ) + model_response.model = "perplexity/sonar-pro" + + # Mock the parent transform_response to return our model_response + with patch.object(config.__class__.__bases__[0], 'transform_response', return_value=model_response): + result = config.transform_response( + model="perplexity/sonar-pro", + raw_response=mock_response, + model_response=model_response, + logging_obj=Mock(), + request_data={}, + messages=[{"role": "user", "content": "Test"}], + optional_params={}, + litellm_params={}, + encoding=None, + ) + + # Check that cost was extracted and stored + assert hasattr(result, "_hidden_params") + assert "additional_headers" in result._hidden_params + assert "llm_provider-x-litellm-response-cost" in result._hidden_params["additional_headers"] + + cost = result._hidden_params["additional_headers"]["llm_provider-x-litellm-response-cost"] + assert cost == 0.00015 \ No newline at end of file diff --git a/tests/test_litellm/llms/perplexity/test_perplexity_cost_calculator.py b/tests/test_litellm/llms/perplexity/test_perplexity_cost_calculator.py index f9a521000701..db3a9254d3f5 100644 --- a/tests/test_litellm/llms/perplexity/test_perplexity_cost_calculator.py +++ b/tests/test_litellm/llms/perplexity/test_perplexity_cost_calculator.py @@ -370,4 +370,116 @@ def test_cost_calculation_combinations(self, citation_tokens, search_queries, re # Ensure costs are non-negative assert prompt_cost >= 0 - assert completion_cost >= 0 \ No newline at end of file + assert completion_cost >= 0 + + def test_cost_extraction_priority_over_calculation(self): + """Test that extracted cost from API response takes priority over calculated cost.""" + from litellm.cost_calculator import response_cost_calculator + from litellm.llms.perplexity.chat.transformation import PerplexityChatConfig + + config = PerplexityChatConfig() + + # Create a ModelResponse with extracted cost + model_response = ModelResponse() + model_response.usage = Usage( + prompt_tokens=100, + completion_tokens=50, + total_tokens=150 + ) + model_response.model = "perplexity/sonar-pro" + + # Mock raw response with cost + raw_response_json = { + "choices": [{"message": {"content": "Test response"}}], + "usage": { + "prompt_tokens": 100, + "completion_tokens": 50, + "total_tokens": 150, + "total_cost": 0.00015 # This should be used instead of calculated cost + } + } + + # Extract cost from API response + config.add_cost_to_usage(model_response, raw_response_json) + + # Test response cost calculator - should use extracted cost + cost = response_cost_calculator( + response_object=model_response, + model="perplexity/sonar-pro", + custom_llm_provider="perplexity", + call_type="completion", + optional_params={} + ) + + # Should return the extracted cost, not calculated cost + assert cost == 0.00015 + + def test_cost_extraction_from_nested_structure(self): + """Test cost extraction from nested usage structure.""" + from litellm.llms.perplexity.chat.transformation import PerplexityChatConfig + + config = PerplexityChatConfig() + + # Create a ModelResponse + model_response = ModelResponse() + model_response.usage = Usage( + prompt_tokens=100, + completion_tokens=50, + total_tokens=150 + ) + + # Mock raw response with nested cost structure + raw_response_json = { + "choices": [{"message": {"content": "Test response"}}], + "usage": { + "prompt_tokens": 100, + "completion_tokens": 50, + "total_tokens": 150, + "cost": { + "total_cost": 0.00025 + } + } + } + + # Test cost extraction + config.add_cost_to_usage(model_response, raw_response_json) + + # Check that cost was stored in hidden params + assert hasattr(model_response, "_hidden_params") + assert "additional_headers" in model_response._hidden_params + assert "llm_provider-x-litellm-response-cost" in model_response._hidden_params["additional_headers"] + + cost = model_response._hidden_params["additional_headers"]["llm_provider-x-litellm-response-cost"] + assert cost == 0.00025 + + def test_cost_extraction_error_handling(self): + """Test error handling during cost extraction.""" + from litellm.llms.perplexity.chat.transformation import PerplexityChatConfig + + config = PerplexityChatConfig() + + # Create a ModelResponse + model_response = ModelResponse() + model_response.usage = Usage( + prompt_tokens=100, + completion_tokens=50, + total_tokens=150 + ) + + # Mock raw response with invalid cost data + raw_response_json = { + "choices": [{"message": {"content": "Test response"}}], + "usage": { + "prompt_tokens": 100, + "completion_tokens": 50, + "total_tokens": 150, + "cost": "invalid_cost" # Invalid cost type + } + } + + # Test cost extraction - should not raise error + config.add_cost_to_usage(model_response, raw_response_json) + + # Should not have cost in hidden params due to error + if hasattr(model_response, "_hidden_params"): + assert "llm_provider-x-litellm-response-cost" not in model_response._hidden_params.get("additional_headers", {}) \ No newline at end of file diff --git a/tests/test_litellm/llms/perplexity/test_perplexity_integration.py b/tests/test_litellm/llms/perplexity/test_perplexity_integration.py index ae72b8a9625e..c8eb3793213f 100644 --- a/tests/test_litellm/llms/perplexity/test_perplexity_integration.py +++ b/tests/test_litellm/llms/perplexity/test_perplexity_integration.py @@ -316,4 +316,169 @@ def test_case_insensitive_provider_matching(self, provider_name): expected_completion_cost = (50 * 8e-6) + (1 / 1000 * 0.005) assert math.isclose(prompt_cost, expected_prompt_cost, rel_tol=1e-6) - assert math.isclose(completion_cost_val, expected_completion_cost, rel_tol=1e-6) \ No newline at end of file + assert math.isclose(completion_cost_val, expected_completion_cost, rel_tol=1e-6) + + def test_cost_extraction_from_api_response(self): + """Test cost extraction from Perplexity API response.""" + config = PerplexityChatConfig() + + # Create a ModelResponse + model_response = ModelResponse() + model_response.usage = Usage( + prompt_tokens=100, + completion_tokens=50, + total_tokens=150 + ) + model_response.model = "perplexity/sonar-pro" + + # Mock raw response with cost + raw_response_json = { + "choices": [{"message": {"content": "Test response"}}], + "usage": { + "prompt_tokens": 100, + "completion_tokens": 50, + "total_tokens": 150, + "total_cost": 0.00015 + } + } + + # Test cost extraction + config.add_cost_to_usage(model_response, raw_response_json) + + # Check that cost was stored in hidden params + assert hasattr(model_response, "_hidden_params") + assert "additional_headers" in model_response._hidden_params + assert "llm_provider-x-litellm-response-cost" in model_response._hidden_params["additional_headers"] + + cost = model_response._hidden_params["additional_headers"]["llm_provider-x-litellm-response-cost"] + assert cost == 0.00015 + + def test_cost_extraction_integration_with_main_calculator(self): + """Test that extracted cost takes priority over calculated cost.""" + config = PerplexityChatConfig() + + # Create a ModelResponse + model_response = ModelResponse() + model_response.usage = Usage( + prompt_tokens=100, + completion_tokens=50, + total_tokens=150 + ) + model_response.model = "perplexity/sonar-pro" + + # Mock raw response with cost + raw_response_json = { + "choices": [{"message": {"content": "Test response"}}], + "usage": { + "prompt_tokens": 100, + "completion_tokens": 50, + "total_tokens": 150, + "total_cost": 0.00015 + } + } + + # Extract cost + config.add_cost_to_usage(model_response, raw_response_json) + + # Test main cost calculator - should use extracted cost + from litellm.cost_calculator import response_cost_calculator + cost = response_cost_calculator( + response_object=model_response, + model="perplexity/sonar-pro", + custom_llm_provider="perplexity", + call_type="completion", + optional_params={} + ) + + # Should return the extracted cost, not calculated cost + assert cost == 0.00015 + + def test_cost_extraction_nested_structure(self): + """Test cost extraction from nested usage structure.""" + config = PerplexityChatConfig() + + # Create a ModelResponse + model_response = ModelResponse() + model_response.usage = Usage( + prompt_tokens=100, + completion_tokens=50, + total_tokens=150 + ) + + # Mock raw response with nested cost structure + raw_response_json = { + "choices": [{"message": {"content": "Test response"}}], + "usage": { + "prompt_tokens": 100, + "completion_tokens": 50, + "total_tokens": 150, + "cost": { + "total_cost": 0.00025 + } + } + } + + # Test cost extraction + config.add_cost_to_usage(model_response, raw_response_json) + + # Check that cost was stored in hidden params + assert hasattr(model_response, "_hidden_params") + assert "additional_headers" in model_response._hidden_params + assert "llm_provider-x-litellm-response-cost" in model_response._hidden_params["additional_headers"] + + cost = model_response._hidden_params["additional_headers"]["llm_provider-x-litellm-response-cost"] + assert cost == 0.00025 + + def test_cost_extraction_error_handling(self): + """Test error handling during cost extraction.""" + config = PerplexityChatConfig() + + # Create a ModelResponse + model_response = ModelResponse() + model_response.usage = Usage( + prompt_tokens=100, + completion_tokens=50, + total_tokens=150 + ) + + # Mock raw response with invalid cost data + raw_response_json = { + "choices": [{"message": {"content": "Test response"}}], + "usage": { + "prompt_tokens": 100, + "completion_tokens": 50, + "total_tokens": 150, + "cost": "invalid_cost" # Invalid cost type + } + } + + # Test cost extraction - should not raise error + config.add_cost_to_usage(model_response, raw_response_json) + + # Should not have cost in hidden params due to error + if hasattr(model_response, "_hidden_params"): + assert "llm_provider-x-litellm-response-cost" not in model_response._hidden_params.get("additional_headers", {}) + + def test_cost_extraction_no_usage_data(self): + """Test handling when no usage data is present.""" + config = PerplexityChatConfig() + + # Create a ModelResponse + model_response = ModelResponse() + model_response.usage = Usage( + prompt_tokens=100, + completion_tokens=50, + total_tokens=150 + ) + + # Mock raw response without usage + raw_response_json = { + "choices": [{"message": {"content": "Test response"}}] + } + + # Test cost extraction - should not raise error + config.add_cost_to_usage(model_response, raw_response_json) + + # Should not have cost in hidden params + if hasattr(model_response, "_hidden_params"): + assert "llm_provider-x-litellm-response-cost" not in model_response._hidden_params.get("additional_headers", {}) \ No newline at end of file From b033e33b197fb3021eaa02fff2ddf8443b35298b Mon Sep 17 00:00:00 2001 From: Sameer Kankute Date: Thu, 16 Oct 2025 00:30:04 +0530 Subject: [PATCH 2/3] fix lint errors --- .../llms/perplexity/chat/transformation.py | 60 +++++++++---------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/litellm/llms/perplexity/chat/transformation.py b/litellm/llms/perplexity/chat/transformation.py index 50fc3f657897..a0ec9f325a8f 100644 --- a/litellm/llms/perplexity/chat/transformation.py +++ b/litellm/llms/perplexity/chat/transformation.py @@ -2,8 +2,7 @@ from __future__ import annotations -import re -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, Optional, Tuple import litellm from litellm._logging import verbose_logger @@ -31,9 +30,9 @@ def custom_llm_provider(self) -> str | None: return "perplexity" def _get_openai_compatible_provider_info( - self, api_base: str | None, api_key: str | None, - ) -> tuple[str | None, str | None]: - api_base = api_base or get_secret_str("PERPLEXITY_API_BASE") or "https://api.perplexity.ai" # type: ignore[assignment] + self, api_base: Optional[str], api_key: Optional[str] + ) -> Tuple[Optional[str], Optional[str]]: + api_base = api_base or get_secret_str("PERPLEXITY_API_BASE") or "https://api.perplexity.ai" # type: ignore dynamic_api_key = ( api_key or get_secret_str("PERPLEXITYAI_API_KEY") @@ -41,10 +40,9 @@ def _get_openai_compatible_provider_info( ) return api_base, dynamic_api_key - def get_supported_openai_params(self, model: str) -> list[str]: - """Get supported OpenAI parameters for Perplexity. - - Perplexity supports a subset of OpenAI params. + def get_supported_openai_params(self, model: str) -> list: + """ + Perplexity supports a subset of OpenAI params Ref: https://docs.perplexity.ai/api-reference/chat-completions @@ -65,22 +63,23 @@ def get_supported_openai_params(self, model: str) -> list[str]: try: if litellm.supports_reasoning( - model=model, custom_llm_provider=self.custom_llm_provider, + model=model, custom_llm_provider=self.custom_llm_provider ): base_openai_params.append("reasoning_effort") - except (ValueError, TypeError) as e: + except Exception as e: verbose_logger.debug(f"Error checking if model supports reasoning: {e}") - + try: if litellm.supports_web_search( - model=model, custom_llm_provider=self.custom_llm_provider, + model=model, custom_llm_provider=self.custom_llm_provider ): base_openai_params.append("web_search_options") - except (ValueError, TypeError) as e: + except Exception as e: verbose_logger.debug(f"Error checking if model supports web search: {e}") - + return base_openai_params + def transform_response( # noqa: PLR0913 self, model: str, @@ -91,9 +90,9 @@ def transform_response( # noqa: PLR0913 messages: list[AllMessageValues], optional_params: dict, litellm_params: dict, - encoding: Any, # noqa: ANN401 + encoding: Any, api_key: str | None = None, - json_mode: bool | None = None, # noqa: FBT001 + json_mode: bool | None = None, ) -> ModelResponse: """Transform Perplexity response to standard format.""" # Call the parent transform_response first to handle the standard transformation @@ -124,7 +123,7 @@ def transform_response( # noqa: PLR0913 return model_response - def _enhance_usage_with_perplexity_fields( # noqa: C901 + def _enhance_usage_with_perplexity_fields( self, model_response: ModelResponse, raw_response_json: dict, ) -> None: """Extract citation tokens and search queries from Perplexity API response. @@ -182,12 +181,12 @@ def _enhance_usage_with_perplexity_fields( # noqa: C901 if num_search_queries is not None and num_search_queries > 0: usage.prompt_tokens_details.web_search_requests = num_search_queries - def _add_citations_as_annotations( # noqa: C901, PLR0912 - self, model_response: ModelResponse, raw_response_json: dict, + def _add_citations_as_annotations( + self, model_response: ModelResponse, raw_response_json: dict ) -> None: - """Extract citations and search_results from Perplexity API response. - - Add them as ChatCompletionAnnotation objects to the message. + """ + Extract citations and search_results from Perplexity API response + and add them as ChatCompletionAnnotation objects to the message. """ if not model_response.choices: return @@ -216,6 +215,8 @@ def _add_citations_as_annotations( # noqa: C901, PLR0912 return # Find all citation markers like [1], [2], [3], [4] in the text + import re + citation_pattern = r"\[(\d+)\]" citation_matches = list(re.finditer(citation_pattern, content)) @@ -255,10 +256,9 @@ def _add_citations_as_annotations( # noqa: C901, PLR0912 # Also add the raw citations and search_results as attributes for backward compatibility if citations: - model_response.citations = citations # type: ignore[attr-defined] + setattr(model_response, "citations", citations) if search_results: - model_response.search_results = search_results # type: ignore[attr-defined] - + setattr(model_response, "search_results", search_results) def add_cost_to_usage(self, model_response: ModelResponse, raw_response_json: dict) -> None: """Add the cost to the usage object.""" @@ -281,10 +281,10 @@ def add_cost_to_usage(self, model_response: ModelResponse, raw_response_json: di if response_cost is not None: # Store cost in hidden params for the cost calculator to use if not hasattr(model_response, "_hidden_params"): - model_response._hidden_params = {} # noqa: SLF001 - if "additional_headers" not in model_response._hidden_params: # noqa: SLF001 - model_response._hidden_params["additional_headers"] = {} # noqa: SLF001 - model_response._hidden_params["additional_headers"][ # noqa: SLF001 + model_response._hidden_params = {} + if "additional_headers" not in model_response._hidden_params: + model_response._hidden_params["additional_headers"] = {} + model_response._hidden_params["additional_headers"][ "llm_provider-x-litellm-response-cost" ] = float(response_cost) except (ValueError, TypeError, KeyError) as e: From e109ba4cf23ccee4c9654eef3218ee6b248bbfeb Mon Sep 17 00:00:00 2001 From: Sameer Kankute Date: Fri, 17 Oct 2025 08:55:21 +0530 Subject: [PATCH 3/3] fix code --- litellm/llms/perplexity/chat/transformation.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/litellm/llms/perplexity/chat/transformation.py b/litellm/llms/perplexity/chat/transformation.py index a0ec9f325a8f..8c536e395b38 100644 --- a/litellm/llms/perplexity/chat/transformation.py +++ b/litellm/llms/perplexity/chat/transformation.py @@ -2,7 +2,7 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any, Optional, Tuple +from typing import TYPE_CHECKING, Any, List, Optional, Tuple import litellm from litellm._logging import verbose_logger @@ -87,12 +87,12 @@ def transform_response( # noqa: PLR0913 model_response: ModelResponse, logging_obj: LiteLLMLoggingObj, request_data: dict, - messages: list[AllMessageValues], + messages: List[AllMessageValues], optional_params: dict, litellm_params: dict, encoding: Any, - api_key: str | None = None, - json_mode: bool | None = None, + api_key: Optional[str] = None, + json_mode: Optional[bool] = None, ) -> ModelResponse: """Transform Perplexity response to standard format.""" # Call the parent transform_response first to handle the standard transformation