From cf4d9e366294af14317a3ef82a2d3c929a659ce8 Mon Sep 17 00:00:00 2001
From: Sameer Kankute <sameer@berri.ai>
Date: Wed, 15 Oct 2025 13:31:38 +0530
Subject: [PATCH 1/3] Update perplexity cost tracking

---
 litellm/llms/perplexity/chat/__init__.py      |   1 +
 .../llms/perplexity/chat/transformation.py    | 145 +++++++++------
 litellm/main.py                               |  27 ++-
 .../test_perplexity_chat_transformation.py    | 150 +++++++++++++++-
 .../test_perplexity_cost_calculator.py        | 114 +++++++++++-
 .../perplexity/test_perplexity_integration.py | 167 +++++++++++++++++-
 6 files changed, 548 insertions(+), 56 deletions(-)
 create mode 100644 litellm/llms/perplexity/chat/__init__.py

diff --git a/litellm/llms/perplexity/chat/__init__.py b/litellm/llms/perplexity/chat/__init__.py
new file mode 100644
index 000000000000..f4f9edf38e52
--- /dev/null
+++ b/litellm/llms/perplexity/chat/__init__.py
@@ -0,0 +1 @@
+"""Perplexity chat completion transformations."""
diff --git a/litellm/llms/perplexity/chat/transformation.py b/litellm/llms/perplexity/chat/transformation.py
index 27e6415ff8b2..50fc3f657897 100644
--- a/litellm/llms/perplexity/chat/transformation.py
+++ b/litellm/llms/perplexity/chat/transformation.py
@@ -1,31 +1,39 @@
-"""
-Translate from OpenAI's `/v1/chat/completions` to Perplexity's `/v1/chat/completions`
-"""
+"""Translate from OpenAI's `/v1/chat/completions` to Perplexity's `/v1/chat/completions`."""
 
-from typing import Any, List, Optional, Tuple
+from __future__ import annotations
+
+import re
+from typing import TYPE_CHECKING, Any
 
-import httpx
 import litellm
 from litellm._logging import verbose_logger
-from litellm.secret_managers.main import get_secret_str
-from litellm.types.llms.openai import AllMessageValues
-from litellm.types.utils import Usage, PromptTokensDetailsWrapper
-from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
 from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig
-from litellm.types.utils import ModelResponse
-from litellm.types.llms.openai import ChatCompletionAnnotation
-from litellm.types.llms.openai import ChatCompletionAnnotationURLCitation
+from litellm.secret_managers.main import get_secret_str
+from litellm.types.utils import ModelResponse, PromptTokensDetailsWrapper, Usage
+
+if TYPE_CHECKING:
+    import httpx
+
+    from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
+    from litellm.types.llms.openai import (
+        AllMessageValues,
+        ChatCompletionAnnotation,
+        ChatCompletionAnnotationURLCitation,
+    )
 
 
 class PerplexityChatConfig(OpenAIGPTConfig):
+    """Configuration for Perplexity chat completions."""
+
     @property
-    def custom_llm_provider(self) -> Optional[str]:
+    def custom_llm_provider(self) -> str | None:
+        """Return the custom LLM provider name."""
         return "perplexity"
 
     def _get_openai_compatible_provider_info(
-        self, api_base: Optional[str], api_key: Optional[str]
-    ) -> Tuple[Optional[str], Optional[str]]:
-        api_base = api_base or get_secret_str("PERPLEXITY_API_BASE") or "https://api.perplexity.ai"  # type: ignore
+        self, api_base: str | None, api_key: str | None,
+    ) -> tuple[str | None, str | None]:
+        api_base = api_base or get_secret_str("PERPLEXITY_API_BASE") or "https://api.perplexity.ai"  # type: ignore[assignment]
         dynamic_api_key = (
             api_key
             or get_secret_str("PERPLEXITYAI_API_KEY")
@@ -33,9 +41,10 @@ def _get_openai_compatible_provider_info(
         )
         return api_base, dynamic_api_key
 
-    def get_supported_openai_params(self, model: str) -> list:
-        """
-        Perplexity supports a subset of OpenAI params
+    def get_supported_openai_params(self, model: str) -> list[str]:
+        """Get supported OpenAI parameters for Perplexity.
+
+        Perplexity supports a subset of OpenAI params.
 
         Ref: https://docs.perplexity.ai/api-reference/chat-completions
 
@@ -56,36 +65,37 @@ def get_supported_openai_params(self, model: str) -> list:
 
         try:
             if litellm.supports_reasoning(
-                model=model, custom_llm_provider=self.custom_llm_provider
+                model=model, custom_llm_provider=self.custom_llm_provider,
             ):
                 base_openai_params.append("reasoning_effort")
-        except Exception as e:
+        except (ValueError, TypeError) as e:
             verbose_logger.debug(f"Error checking if model supports reasoning: {e}")
-        
+
         try:
             if litellm.supports_web_search(
-                model=model, custom_llm_provider=self.custom_llm_provider
+                model=model, custom_llm_provider=self.custom_llm_provider,
             ):
                 base_openai_params.append("web_search_options")
-        except Exception as e:
+        except (ValueError, TypeError) as e:
             verbose_logger.debug(f"Error checking if model supports web search: {e}")
-        
+
         return base_openai_params
 
-    def transform_response(
+    def transform_response(  # noqa: PLR0913
         self,
         model: str,
         raw_response: httpx.Response,
         model_response: ModelResponse,
         logging_obj: LiteLLMLoggingObj,
         request_data: dict,
-        messages: List[AllMessageValues],
+        messages: list[AllMessageValues],
         optional_params: dict,
         litellm_params: dict,
-        encoding: Any,
-        api_key: Optional[str] = None,
-        json_mode: Optional[bool] = None,
+        encoding: Any,  # noqa: ANN401
+        api_key: str | None = None,
+        json_mode: bool | None = None,  # noqa: FBT001
     ) -> ModelResponse:
+        """Transform Perplexity response to standard format."""
         # Call the parent transform_response first to handle the standard transformation
         model_response = super().transform_response(
             model=model,
@@ -104,28 +114,29 @@ def transform_response(
         # Extract and enhance usage with Perplexity-specific fields
         try:
             raw_response_json = raw_response.json()
+            self.add_cost_to_usage(model_response, raw_response_json)
             self._enhance_usage_with_perplexity_fields(
-                model_response, raw_response_json
+                model_response, raw_response_json,
             )
             self._add_citations_as_annotations(model_response, raw_response_json)
-        except Exception as e:
+        except (ValueError, TypeError, KeyError) as e:
             verbose_logger.debug(f"Error extracting Perplexity-specific usage fields: {e}")
 
         return model_response
 
-    def _enhance_usage_with_perplexity_fields(
-        self, model_response: ModelResponse, raw_response_json: dict
+    def _enhance_usage_with_perplexity_fields(  # noqa: C901
+        self, model_response: ModelResponse, raw_response_json: dict,
     ) -> None:
-        """
-        Extract citation tokens and search queries from Perplexity API response
-        and add them to the usage object using standard LiteLLM fields.
+        """Extract citation tokens and search queries from Perplexity API response.
+
+        Add them to the usage object using standard LiteLLM fields.
         """
         if not hasattr(model_response, "usage") or model_response.usage is None:
             # Create a usage object if it doesn't exist (when usage was None)
             model_response.usage = Usage(  # type: ignore[attr-defined]
                 prompt_tokens=0,
                 completion_tokens=0,
-                total_tokens=0
+                total_tokens=0,
             )
 
         usage = model_response.usage  # type: ignore[attr-defined]
@@ -146,7 +157,7 @@ def _enhance_usage_with_perplexity_fields(
         # Extract search queries count from usage or response metadata
         # Perplexity might include this in the usage object or as separate metadata
         perplexity_usage = raw_response_json.get("usage", {})
-        
+
         # Try to extract search queries from usage field first, then root level
         num_search_queries = perplexity_usage.get("num_search_queries")
         if num_search_queries is None:
@@ -155,28 +166,28 @@ def _enhance_usage_with_perplexity_fields(
             num_search_queries = perplexity_usage.get("search_queries")
         if num_search_queries is None:
             num_search_queries = raw_response_json.get("search_queries")
-        
+
         # Create or update prompt_tokens_details to include web search requests and citation tokens
         if citation_tokens > 0 or (
             num_search_queries is not None and num_search_queries > 0
         ):
             if usage.prompt_tokens_details is None:
                 usage.prompt_tokens_details = PromptTokensDetailsWrapper()
-            
+
             # Store citation tokens count for cost calculation
             if citation_tokens > 0:
-                setattr(usage, "citation_tokens", citation_tokens)
-            
+                usage.citation_tokens = citation_tokens
+
             # Store search queries count in the standard web_search_requests field
             if num_search_queries is not None and num_search_queries > 0:
                 usage.prompt_tokens_details.web_search_requests = num_search_queries
 
-    def _add_citations_as_annotations(
-        self, model_response: ModelResponse, raw_response_json: dict
+    def _add_citations_as_annotations(  # noqa: C901, PLR0912
+        self, model_response: ModelResponse, raw_response_json: dict,
     ) -> None:
-        """
-        Extract citations and search_results from Perplexity API response
-        and add them as ChatCompletionAnnotation objects to the message.
+        """Extract citations and search_results from Perplexity API response.
+
+        Add them as ChatCompletionAnnotation objects to the message.
         """
         if not model_response.choices:
             return
@@ -205,8 +216,6 @@ def _add_citations_as_annotations(
             return
 
         # Find all citation markers like [1], [2], [3], [4] in the text
-        import re
-
         citation_pattern = r"\[(\d+)\]"
         citation_matches = list(re.finditer(citation_pattern, content))
 
@@ -246,6 +255,38 @@ def _add_citations_as_annotations(
 
         # Also add the raw citations and search_results as attributes for backward compatibility
         if citations:
-            setattr(model_response, "citations", citations)
+            model_response.citations = citations  # type: ignore[attr-defined]
         if search_results:
-            setattr(model_response, "search_results", search_results)
\ No newline at end of file
+            model_response.search_results = search_results  # type: ignore[attr-defined]
+
+
+    def add_cost_to_usage(self, model_response: ModelResponse, raw_response_json: dict) -> None:
+        """Add the cost to the usage object."""
+        try:
+            usage_data = raw_response_json.get("usage")
+            if usage_data:
+                # Try different possible cost field locations
+                response_cost = None
+
+                # Check if cost is directly in usage (flat structure)
+                if "total_cost" in usage_data:
+                    response_cost = usage_data["total_cost"]
+                # Check if cost is nested (cost.total_cost structure)
+                elif "cost" in usage_data and isinstance(usage_data["cost"], dict):
+                    response_cost = usage_data["cost"].get("total_cost")
+                # Check if cost is a simple value
+                elif "cost" in usage_data:
+                    response_cost = usage_data["cost"]
+
+                if response_cost is not None:
+                    # Store cost in hidden params for the cost calculator to use
+                    if not hasattr(model_response, "_hidden_params"):
+                        model_response._hidden_params = {}  # noqa: SLF001
+                    if "additional_headers" not in model_response._hidden_params:  # noqa: SLF001
+                        model_response._hidden_params["additional_headers"] = {}  # noqa: SLF001
+                    model_response._hidden_params["additional_headers"][  # noqa: SLF001
+                        "llm_provider-x-litellm-response-cost"
+                    ] = float(response_cost)
+        except (ValueError, TypeError, KeyError) as e:
+            verbose_logger.debug(f"Error adding cost to usage: {e}")
+            # If we can't extract cost, continue without it - don't fail the response
diff --git a/litellm/main.py b/litellm/main.py
index 5000de7a694c..95be8448968d 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -2017,11 +2017,36 @@ def completion(  # type: ignore # noqa: PLR0915
             logging.post_call(
                 input=messages, api_key=api_key, original_response=response
             )
+        elif custom_llm_provider == "perplexity":
+            response = base_llm_http_handler.completion(
+                model=model,
+                messages=messages,
+                headers=headers,
+                model_response=model_response,
+                api_key=api_key,
+                api_base=api_base,
+                acompletion=acompletion,
+                logging_obj=logging,
+                optional_params=optional_params,
+                litellm_params=litellm_params,
+                shared_session=shared_session,
+                timeout=timeout,
+                client=client,
+                custom_llm_provider=custom_llm_provider,
+                encoding=encoding,
+                stream=stream,
+                provider_config=provider_config,
+            )
+
+            ## LOGGING - Call after response has been processed by transform_response
+            logging.post_call(
+                input=messages, api_key=api_key, original_response=response
+            )
+            
         elif (
             model in litellm.open_ai_chat_completion_models
             or custom_llm_provider == "custom_openai"
             or custom_llm_provider == "deepinfra"
-            or custom_llm_provider == "perplexity"
             or custom_llm_provider == "nvidia_nim"
             or custom_llm_provider == "cerebras"
             or custom_llm_provider == "baseten"
diff --git a/tests/test_litellm/llms/perplexity/chat/test_perplexity_chat_transformation.py b/tests/test_litellm/llms/perplexity/chat/test_perplexity_chat_transformation.py
index 784e6f6fe63d..6e9dc4263037 100644
--- a/tests/test_litellm/llms/perplexity/chat/test_perplexity_chat_transformation.py
+++ b/tests/test_litellm/llms/perplexity/chat/test_perplexity_chat_transformation.py
@@ -707,4 +707,152 @@ def test_add_citations_as_annotations_no_message(self):
         # Check that no annotations were created (message content is None)
         assert choice.message.content is None
         # No annotations should be created since content is None
-        assert not hasattr(choice.message, 'annotations') or choice.message.annotations is None
\ No newline at end of file
+        assert not hasattr(choice.message, 'annotations') or choice.message.annotations is None
+
+    # Tests for cost extraction functionality
+    def test_add_cost_to_usage_flat_structure(self):
+        """Test cost extraction from flat usage structure."""
+        config = PerplexityChatConfig()
+        
+        # Create a ModelResponse
+        model_response = ModelResponse()
+        model_response.usage = Usage(
+            prompt_tokens=100,
+            completion_tokens=50,
+            total_tokens=150
+        )
+        
+        # Mock raw response with flat cost structure
+        raw_response_json = {
+            "choices": [{"message": {"content": "Test response"}}],
+            "usage": {
+                "prompt_tokens": 100,
+                "completion_tokens": 50,
+                "total_tokens": 150,
+                "total_cost": 0.00015
+            }
+        }
+        
+        # Test cost extraction
+        config.add_cost_to_usage(model_response, raw_response_json)
+        
+        # Check that cost was stored in hidden params
+        assert hasattr(model_response, "_hidden_params")
+        assert "additional_headers" in model_response._hidden_params
+        assert "llm_provider-x-litellm-response-cost" in model_response._hidden_params["additional_headers"]
+        
+        cost = model_response._hidden_params["additional_headers"]["llm_provider-x-litellm-response-cost"]
+        assert cost == 0.00015
+
+    def test_add_cost_to_usage_nested_structure(self):
+        """Test cost extraction from nested usage structure."""
+        config = PerplexityChatConfig()
+        
+        # Create a ModelResponse
+        model_response = ModelResponse()
+        model_response.usage = Usage(
+            prompt_tokens=100,
+            completion_tokens=50,
+            total_tokens=150
+        )
+        
+        # Mock raw response with nested cost structure
+        raw_response_json = {
+            "choices": [{"message": {"content": "Test response"}}],
+            "usage": {
+                "prompt_tokens": 100,
+                "completion_tokens": 50,
+                "total_tokens": 150,
+                "cost": {
+                    "total_cost": 0.00025
+                }
+            }
+        }
+        
+        # Test cost extraction
+        config.add_cost_to_usage(model_response, raw_response_json)
+        
+        # Check that cost was stored in hidden params
+        assert hasattr(model_response, "_hidden_params")
+        assert "additional_headers" in model_response._hidden_params
+        assert "llm_provider-x-litellm-response-cost" in model_response._hidden_params["additional_headers"]
+        
+        cost = model_response._hidden_params["additional_headers"]["llm_provider-x-litellm-response-cost"]
+        assert cost == 0.00025
+
+    def test_add_cost_to_usage_no_cost_data(self):
+        """Test handling when no cost data is present."""
+        config = PerplexityChatConfig()
+        
+        # Create a ModelResponse
+        model_response = ModelResponse()
+        model_response.usage = Usage(
+            prompt_tokens=100,
+            completion_tokens=50,
+            total_tokens=150
+        )
+        
+        # Mock raw response without cost
+        raw_response_json = {
+            "choices": [{"message": {"content": "Test response"}}],
+            "usage": {
+                "prompt_tokens": 100,
+                "completion_tokens": 50,
+                "total_tokens": 150
+            }
+        }
+        
+        # Test cost extraction - should not raise error
+        config.add_cost_to_usage(model_response, raw_response_json)
+        
+        # Should not have cost in hidden params
+        if hasattr(model_response, "_hidden_params"):
+            assert "llm_provider-x-litellm-response-cost" not in model_response._hidden_params.get("additional_headers", {})
+
+    def test_transform_response_includes_cost_extraction(self):
+        """Test that transform_response includes cost extraction."""
+        config = PerplexityChatConfig()
+        
+        # Mock raw response
+        mock_response = Mock()
+        mock_response.json.return_value = {
+            "choices": [{"message": {"content": "Test response"}}],
+            "usage": {
+                "prompt_tokens": 100,
+                "completion_tokens": 50,
+                "total_tokens": 150,
+                "total_cost": 0.00015
+            }
+        }
+        mock_response.headers = {}
+        
+        # Create a ModelResponse
+        model_response = ModelResponse()
+        model_response.usage = Usage(
+            prompt_tokens=100,
+            completion_tokens=50,
+            total_tokens=150
+        )
+        model_response.model = "perplexity/sonar-pro"
+        
+        # Mock the parent transform_response to return our model_response
+        with patch.object(config.__class__.__bases__[0], 'transform_response', return_value=model_response):
+            result = config.transform_response(
+                model="perplexity/sonar-pro",
+                raw_response=mock_response,
+                model_response=model_response,
+                logging_obj=Mock(),
+                request_data={},
+                messages=[{"role": "user", "content": "Test"}],
+                optional_params={},
+                litellm_params={},
+                encoding=None,
+            )
+        
+        # Check that cost was extracted and stored
+        assert hasattr(result, "_hidden_params")
+        assert "additional_headers" in result._hidden_params
+        assert "llm_provider-x-litellm-response-cost" in result._hidden_params["additional_headers"]
+        
+        cost = result._hidden_params["additional_headers"]["llm_provider-x-litellm-response-cost"]
+        assert cost == 0.00015
\ No newline at end of file
diff --git a/tests/test_litellm/llms/perplexity/test_perplexity_cost_calculator.py b/tests/test_litellm/llms/perplexity/test_perplexity_cost_calculator.py
index f9a521000701..db3a9254d3f5 100644
--- a/tests/test_litellm/llms/perplexity/test_perplexity_cost_calculator.py
+++ b/tests/test_litellm/llms/perplexity/test_perplexity_cost_calculator.py
@@ -370,4 +370,116 @@ def test_cost_calculation_combinations(self, citation_tokens, search_queries, re
         
         # Ensure costs are non-negative
         assert prompt_cost >= 0
-        assert completion_cost >= 0 
\ No newline at end of file
+        assert completion_cost >= 0
+
+    def test_cost_extraction_priority_over_calculation(self):
+        """Test that extracted cost from API response takes priority over calculated cost."""
+        from litellm.cost_calculator import response_cost_calculator
+        from litellm.llms.perplexity.chat.transformation import PerplexityChatConfig
+        
+        config = PerplexityChatConfig()
+        
+        # Create a ModelResponse with extracted cost
+        model_response = ModelResponse()
+        model_response.usage = Usage(
+            prompt_tokens=100,
+            completion_tokens=50,
+            total_tokens=150
+        )
+        model_response.model = "perplexity/sonar-pro"
+        
+        # Mock raw response with cost
+        raw_response_json = {
+            "choices": [{"message": {"content": "Test response"}}],
+            "usage": {
+                "prompt_tokens": 100,
+                "completion_tokens": 50,
+                "total_tokens": 150,
+                "total_cost": 0.00015  # This should be used instead of calculated cost
+            }
+        }
+        
+        # Extract cost from API response
+        config.add_cost_to_usage(model_response, raw_response_json)
+        
+        # Test response cost calculator - should use extracted cost
+        cost = response_cost_calculator(
+            response_object=model_response,
+            model="perplexity/sonar-pro",
+            custom_llm_provider="perplexity",
+            call_type="completion",
+            optional_params={}
+        )
+        
+        # Should return the extracted cost, not calculated cost
+        assert cost == 0.00015
+
+    def test_cost_extraction_from_nested_structure(self):
+        """Test cost extraction from nested usage structure."""
+        from litellm.llms.perplexity.chat.transformation import PerplexityChatConfig
+        
+        config = PerplexityChatConfig()
+        
+        # Create a ModelResponse
+        model_response = ModelResponse()
+        model_response.usage = Usage(
+            prompt_tokens=100,
+            completion_tokens=50,
+            total_tokens=150
+        )
+        
+        # Mock raw response with nested cost structure
+        raw_response_json = {
+            "choices": [{"message": {"content": "Test response"}}],
+            "usage": {
+                "prompt_tokens": 100,
+                "completion_tokens": 50,
+                "total_tokens": 150,
+                "cost": {
+                    "total_cost": 0.00025
+                }
+            }
+        }
+        
+        # Test cost extraction
+        config.add_cost_to_usage(model_response, raw_response_json)
+        
+        # Check that cost was stored in hidden params
+        assert hasattr(model_response, "_hidden_params")
+        assert "additional_headers" in model_response._hidden_params
+        assert "llm_provider-x-litellm-response-cost" in model_response._hidden_params["additional_headers"]
+        
+        cost = model_response._hidden_params["additional_headers"]["llm_provider-x-litellm-response-cost"]
+        assert cost == 0.00025
+
+    def test_cost_extraction_error_handling(self):
+        """Test error handling during cost extraction."""
+        from litellm.llms.perplexity.chat.transformation import PerplexityChatConfig
+        
+        config = PerplexityChatConfig()
+        
+        # Create a ModelResponse
+        model_response = ModelResponse()
+        model_response.usage = Usage(
+            prompt_tokens=100,
+            completion_tokens=50,
+            total_tokens=150
+        )
+        
+        # Mock raw response with invalid cost data
+        raw_response_json = {
+            "choices": [{"message": {"content": "Test response"}}],
+            "usage": {
+                "prompt_tokens": 100,
+                "completion_tokens": 50,
+                "total_tokens": 150,
+                "cost": "invalid_cost"  # Invalid cost type
+            }
+        }
+        
+        # Test cost extraction - should not raise error
+        config.add_cost_to_usage(model_response, raw_response_json)
+        
+        # Should not have cost in hidden params due to error
+        if hasattr(model_response, "_hidden_params"):
+            assert "llm_provider-x-litellm-response-cost" not in model_response._hidden_params.get("additional_headers", {}) 
\ No newline at end of file
diff --git a/tests/test_litellm/llms/perplexity/test_perplexity_integration.py b/tests/test_litellm/llms/perplexity/test_perplexity_integration.py
index ae72b8a9625e..c8eb3793213f 100644
--- a/tests/test_litellm/llms/perplexity/test_perplexity_integration.py
+++ b/tests/test_litellm/llms/perplexity/test_perplexity_integration.py
@@ -316,4 +316,169 @@ def test_case_insensitive_provider_matching(self, provider_name):
         expected_completion_cost = (50 * 8e-6) + (1 / 1000 * 0.005)
         
         assert math.isclose(prompt_cost, expected_prompt_cost, rel_tol=1e-6)
-        assert math.isclose(completion_cost_val, expected_completion_cost, rel_tol=1e-6) 
\ No newline at end of file
+        assert math.isclose(completion_cost_val, expected_completion_cost, rel_tol=1e-6)
+
+    def test_cost_extraction_from_api_response(self):
+        """Test cost extraction from Perplexity API response."""
+        config = PerplexityChatConfig()
+        
+        # Create a ModelResponse
+        model_response = ModelResponse()
+        model_response.usage = Usage(
+            prompt_tokens=100,
+            completion_tokens=50,
+            total_tokens=150
+        )
+        model_response.model = "perplexity/sonar-pro"
+        
+        # Mock raw response with cost
+        raw_response_json = {
+            "choices": [{"message": {"content": "Test response"}}],
+            "usage": {
+                "prompt_tokens": 100,
+                "completion_tokens": 50,
+                "total_tokens": 150,
+                "total_cost": 0.00015
+            }
+        }
+        
+        # Test cost extraction
+        config.add_cost_to_usage(model_response, raw_response_json)
+        
+        # Check that cost was stored in hidden params
+        assert hasattr(model_response, "_hidden_params")
+        assert "additional_headers" in model_response._hidden_params
+        assert "llm_provider-x-litellm-response-cost" in model_response._hidden_params["additional_headers"]
+        
+        cost = model_response._hidden_params["additional_headers"]["llm_provider-x-litellm-response-cost"]
+        assert cost == 0.00015
+
+    def test_cost_extraction_integration_with_main_calculator(self):
+        """Test that extracted cost takes priority over calculated cost."""
+        config = PerplexityChatConfig()
+        
+        # Create a ModelResponse
+        model_response = ModelResponse()
+        model_response.usage = Usage(
+            prompt_tokens=100,
+            completion_tokens=50,
+            total_tokens=150
+        )
+        model_response.model = "perplexity/sonar-pro"
+        
+        # Mock raw response with cost
+        raw_response_json = {
+            "choices": [{"message": {"content": "Test response"}}],
+            "usage": {
+                "prompt_tokens": 100,
+                "completion_tokens": 50,
+                "total_tokens": 150,
+                "total_cost": 0.00015
+            }
+        }
+        
+        # Extract cost
+        config.add_cost_to_usage(model_response, raw_response_json)
+        
+        # Test main cost calculator - should use extracted cost
+        from litellm.cost_calculator import response_cost_calculator
+        cost = response_cost_calculator(
+            response_object=model_response,
+            model="perplexity/sonar-pro",
+            custom_llm_provider="perplexity",
+            call_type="completion",
+            optional_params={}
+        )
+        
+        # Should return the extracted cost, not calculated cost
+        assert cost == 0.00015
+
+    def test_cost_extraction_nested_structure(self):
+        """Test cost extraction from nested usage structure."""
+        config = PerplexityChatConfig()
+        
+        # Create a ModelResponse
+        model_response = ModelResponse()
+        model_response.usage = Usage(
+            prompt_tokens=100,
+            completion_tokens=50,
+            total_tokens=150
+        )
+        
+        # Mock raw response with nested cost structure
+        raw_response_json = {
+            "choices": [{"message": {"content": "Test response"}}],
+            "usage": {
+                "prompt_tokens": 100,
+                "completion_tokens": 50,
+                "total_tokens": 150,
+                "cost": {
+                    "total_cost": 0.00025
+                }
+            }
+        }
+        
+        # Test cost extraction
+        config.add_cost_to_usage(model_response, raw_response_json)
+        
+        # Check that cost was stored in hidden params
+        assert hasattr(model_response, "_hidden_params")
+        assert "additional_headers" in model_response._hidden_params
+        assert "llm_provider-x-litellm-response-cost" in model_response._hidden_params["additional_headers"]
+        
+        cost = model_response._hidden_params["additional_headers"]["llm_provider-x-litellm-response-cost"]
+        assert cost == 0.00025
+
+    def test_cost_extraction_error_handling(self):
+        """Test error handling during cost extraction."""
+        config = PerplexityChatConfig()
+        
+        # Create a ModelResponse
+        model_response = ModelResponse()
+        model_response.usage = Usage(
+            prompt_tokens=100,
+            completion_tokens=50,
+            total_tokens=150
+        )
+        
+        # Mock raw response with invalid cost data
+        raw_response_json = {
+            "choices": [{"message": {"content": "Test response"}}],
+            "usage": {
+                "prompt_tokens": 100,
+                "completion_tokens": 50,
+                "total_tokens": 150,
+                "cost": "invalid_cost"  # Invalid cost type
+            }
+        }
+        
+        # Test cost extraction - should not raise error
+        config.add_cost_to_usage(model_response, raw_response_json)
+        
+        # Should not have cost in hidden params due to error
+        if hasattr(model_response, "_hidden_params"):
+            assert "llm_provider-x-litellm-response-cost" not in model_response._hidden_params.get("additional_headers", {})
+
+    def test_cost_extraction_no_usage_data(self):
+        """Test handling when no usage data is present."""
+        config = PerplexityChatConfig()
+        
+        # Create a ModelResponse
+        model_response = ModelResponse()
+        model_response.usage = Usage(
+            prompt_tokens=100,
+            completion_tokens=50,
+            total_tokens=150
+        )
+        
+        # Mock raw response without usage
+        raw_response_json = {
+            "choices": [{"message": {"content": "Test response"}}]
+        }
+        
+        # Test cost extraction - should not raise error
+        config.add_cost_to_usage(model_response, raw_response_json)
+        
+        # Should not have cost in hidden params
+        if hasattr(model_response, "_hidden_params"):
+            assert "llm_provider-x-litellm-response-cost" not in model_response._hidden_params.get("additional_headers", {}) 
\ No newline at end of file

From b033e33b197fb3021eaa02fff2ddf8443b35298b Mon Sep 17 00:00:00 2001
From: Sameer Kankute <sameer@berri.ai>
Date: Thu, 16 Oct 2025 00:30:04 +0530
Subject: [PATCH 2/3] fix lint errors

---
 .../llms/perplexity/chat/transformation.py    | 60 +++++++++----------
 1 file changed, 30 insertions(+), 30 deletions(-)

diff --git a/litellm/llms/perplexity/chat/transformation.py b/litellm/llms/perplexity/chat/transformation.py
index 50fc3f657897..a0ec9f325a8f 100644
--- a/litellm/llms/perplexity/chat/transformation.py
+++ b/litellm/llms/perplexity/chat/transformation.py
@@ -2,8 +2,7 @@
 
 from __future__ import annotations
 
-import re
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING, Any, Optional, Tuple
 
 import litellm
 from litellm._logging import verbose_logger
@@ -31,9 +30,9 @@ def custom_llm_provider(self) -> str | None:
         return "perplexity"
 
     def _get_openai_compatible_provider_info(
-        self, api_base: str | None, api_key: str | None,
-    ) -> tuple[str | None, str | None]:
-        api_base = api_base or get_secret_str("PERPLEXITY_API_BASE") or "https://api.perplexity.ai"  # type: ignore[assignment]
+        self, api_base: Optional[str], api_key: Optional[str]
+    ) -> Tuple[Optional[str], Optional[str]]:
+        api_base = api_base or get_secret_str("PERPLEXITY_API_BASE") or "https://api.perplexity.ai"  # type: ignore
         dynamic_api_key = (
             api_key
             or get_secret_str("PERPLEXITYAI_API_KEY")
@@ -41,10 +40,9 @@ def _get_openai_compatible_provider_info(
         )
         return api_base, dynamic_api_key
 
-    def get_supported_openai_params(self, model: str) -> list[str]:
-        """Get supported OpenAI parameters for Perplexity.
-
-        Perplexity supports a subset of OpenAI params.
+    def get_supported_openai_params(self, model: str) -> list:
+        """
+        Perplexity supports a subset of OpenAI params
 
         Ref: https://docs.perplexity.ai/api-reference/chat-completions
 
@@ -65,22 +63,23 @@ def get_supported_openai_params(self, model: str) -> list[str]:
 
         try:
             if litellm.supports_reasoning(
-                model=model, custom_llm_provider=self.custom_llm_provider,
+                model=model, custom_llm_provider=self.custom_llm_provider
             ):
                 base_openai_params.append("reasoning_effort")
-        except (ValueError, TypeError) as e:
+        except Exception as e:
             verbose_logger.debug(f"Error checking if model supports reasoning: {e}")
-
+        
         try:
             if litellm.supports_web_search(
-                model=model, custom_llm_provider=self.custom_llm_provider,
+                model=model, custom_llm_provider=self.custom_llm_provider
             ):
                 base_openai_params.append("web_search_options")
-        except (ValueError, TypeError) as e:
+        except Exception as e:
             verbose_logger.debug(f"Error checking if model supports web search: {e}")
-
+        
         return base_openai_params
 
+
     def transform_response(  # noqa: PLR0913
         self,
         model: str,
@@ -91,9 +90,9 @@ def transform_response(  # noqa: PLR0913
         messages: list[AllMessageValues],
         optional_params: dict,
         litellm_params: dict,
-        encoding: Any,  # noqa: ANN401
+        encoding: Any,  
         api_key: str | None = None,
-        json_mode: bool | None = None,  # noqa: FBT001
+        json_mode: bool | None = None,  
     ) -> ModelResponse:
         """Transform Perplexity response to standard format."""
         # Call the parent transform_response first to handle the standard transformation
@@ -124,7 +123,7 @@ def transform_response(  # noqa: PLR0913
 
         return model_response
 
-    def _enhance_usage_with_perplexity_fields(  # noqa: C901
+    def _enhance_usage_with_perplexity_fields(  
         self, model_response: ModelResponse, raw_response_json: dict,
     ) -> None:
         """Extract citation tokens and search queries from Perplexity API response.
@@ -182,12 +181,12 @@ def _enhance_usage_with_perplexity_fields(  # noqa: C901
             if num_search_queries is not None and num_search_queries > 0:
                 usage.prompt_tokens_details.web_search_requests = num_search_queries
 
-    def _add_citations_as_annotations(  # noqa: C901, PLR0912
-        self, model_response: ModelResponse, raw_response_json: dict,
+    def _add_citations_as_annotations(
+        self, model_response: ModelResponse, raw_response_json: dict
     ) -> None:
-        """Extract citations and search_results from Perplexity API response.
-
-        Add them as ChatCompletionAnnotation objects to the message.
+        """
+        Extract citations and search_results from Perplexity API response
+        and add them as ChatCompletionAnnotation objects to the message.
         """
         if not model_response.choices:
             return
@@ -216,6 +215,8 @@ def _add_citations_as_annotations(  # noqa: C901, PLR0912
             return
 
         # Find all citation markers like [1], [2], [3], [4] in the text
+        import re
+
         citation_pattern = r"\[(\d+)\]"
         citation_matches = list(re.finditer(citation_pattern, content))
 
@@ -255,10 +256,9 @@ def _add_citations_as_annotations(  # noqa: C901, PLR0912
 
         # Also add the raw citations and search_results as attributes for backward compatibility
         if citations:
-            model_response.citations = citations  # type: ignore[attr-defined]
+            setattr(model_response, "citations", citations)
         if search_results:
-            model_response.search_results = search_results  # type: ignore[attr-defined]
-
+            setattr(model_response, "search_results", search_results)
 
     def add_cost_to_usage(self, model_response: ModelResponse, raw_response_json: dict) -> None:
         """Add the cost to the usage object."""
@@ -281,10 +281,10 @@ def add_cost_to_usage(self, model_response: ModelResponse, raw_response_json: di
                 if response_cost is not None:
                     # Store cost in hidden params for the cost calculator to use
                     if not hasattr(model_response, "_hidden_params"):
-                        model_response._hidden_params = {}  # noqa: SLF001
-                    if "additional_headers" not in model_response._hidden_params:  # noqa: SLF001
-                        model_response._hidden_params["additional_headers"] = {}  # noqa: SLF001
-                    model_response._hidden_params["additional_headers"][  # noqa: SLF001
+                        model_response._hidden_params = {}  
+                    if "additional_headers" not in model_response._hidden_params:  
+                        model_response._hidden_params["additional_headers"] = {}  
+                    model_response._hidden_params["additional_headers"][  
                         "llm_provider-x-litellm-response-cost"
                     ] = float(response_cost)
         except (ValueError, TypeError, KeyError) as e:

From e109ba4cf23ccee4c9654eef3218ee6b248bbfeb Mon Sep 17 00:00:00 2001
From: Sameer Kankute <sameer@berri.ai>
Date: Fri, 17 Oct 2025 08:55:21 +0530
Subject: [PATCH 3/3] fix code

---
 litellm/llms/perplexity/chat/transformation.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/litellm/llms/perplexity/chat/transformation.py b/litellm/llms/perplexity/chat/transformation.py
index a0ec9f325a8f..8c536e395b38 100644
--- a/litellm/llms/perplexity/chat/transformation.py
+++ b/litellm/llms/perplexity/chat/transformation.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Any, Optional, Tuple
+from typing import TYPE_CHECKING, Any, List, Optional, Tuple
 
 import litellm
 from litellm._logging import verbose_logger
@@ -87,12 +87,12 @@ def transform_response(  # noqa: PLR0913
         model_response: ModelResponse,
         logging_obj: LiteLLMLoggingObj,
         request_data: dict,
-        messages: list[AllMessageValues],
+        messages: List[AllMessageValues],
         optional_params: dict,
         litellm_params: dict,
         encoding: Any,  
-        api_key: str | None = None,
-        json_mode: bool | None = None,  
+        api_key: Optional[str] = None,
+        json_mode: Optional[bool] = None,  
     ) -> ModelResponse:
         """Transform Perplexity response to standard format."""
         # Call the parent transform_response first to handle the standard transformation