Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions sentry_sdk/integrations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ def iter_default_integrations(with_auto_enabling_integrations):
"sentry_sdk.integrations.langchain.LangchainIntegration",
"sentry_sdk.integrations.langgraph.LanggraphIntegration",
"sentry_sdk.integrations.litestar.LitestarIntegration",
"sentry_sdk.integrations.litellm.LiteLLMIntegration",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd prefer we don't enable the integration by default from the start for several reasons:

"sentry_sdk.integrations.loguru.LoguruIntegration",
"sentry_sdk.integrations.openai.OpenAIIntegration",
"sentry_sdk.integrations.pymongo.PyMongoIntegration",
Expand Down
281 changes: 281 additions & 0 deletions sentry_sdk/integrations/litellm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,281 @@
from typing import TYPE_CHECKING

import sentry_sdk
from sentry_sdk import consts
from sentry_sdk.ai.monitoring import record_token_usage
from sentry_sdk.ai.utils import get_start_span_function, set_data_normalized
from sentry_sdk.consts import SPANDATA
from sentry_sdk.integrations import DidNotEnable, Integration
from sentry_sdk.scope import should_send_default_pii
from sentry_sdk.utils import event_from_exception

if TYPE_CHECKING:
from typing import Any, Dict
from datetime import datetime

try:
import litellm
except ImportError:
raise DidNotEnable("LiteLLM not installed")


def _get_provider_from_model(model):
# type: (str) -> str
"""Extract provider name from model string using LiteLLM's logic"""
if not model:
return "unknown"

# Common provider prefixes/patterns
if model.startswith("gpt-") or model.startswith("o1-") or "openai/" in model:
return "openai"
elif model.startswith("claude-") or "anthropic/" in model:
return "anthropic"
elif (
model.startswith("gemini-")
or "google/" in model
or model.startswith("vertex_ai/")
):
return "google"
elif "cohere/" in model or model.startswith("command-"):
return "cohere"
elif "azure/" in model:
return "azure"
elif "bedrock/" in model:
return "bedrock"
elif "ollama/" in model:
return "ollama"
else:
# Try to use LiteLLM's internal provider detection if available
try:
if hasattr(litellm, "get_llm_provider"):
provider_info = litellm.get_llm_provider(model)
Comment on lines +50 to +51
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was trying to find out if calling this function could potentially have unintended side-effects (like making a network call I suppose, since it might, for some reason, return a BadRequestError), and I quickly got overwhelmed by the spaghettiness of the code. It's almost impossible to parse.

Hopefully using this is ok side-effect wise, since we only provide it with a model (and not api key/base etc.).

if isinstance(provider_info, tuple) and len(provider_info) > 1:
return provider_info[1] or "unknown"
return "unknown"
except Exception:
return "unknown"


def _input_callback(
kwargs, # type: Dict[str, Any]
):
# type: (...) -> None
"""Handle the start of a request."""
integration = sentry_sdk.get_client().get_integration(LiteLLMIntegration)

if integration is None:
return

# Get key parameters
model = kwargs.get("model", "")
messages = kwargs.get("messages", [])
operation = "chat" if messages else "embeddings"

# Start a new span/transaction
span = get_start_span_function()(
op=(
consts.OP.GEN_AI_CHAT
if operation == "chat"
else consts.OP.GEN_AI_EMBEDDINGS
),
name=f"{operation} {model}",
origin=LiteLLMIntegration.origin,
)
span.__enter__()
Comment on lines +75 to +84
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We start a transaction if we don't have one ready yet.


# Store span for later
kwargs["_sentry_span"] = span
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm wondering if we can store this in litellm_params.metadata instead? At least on this page it says that is supposed to be "your custom metadata", which could be less intrusive than adding a kwarg.


# Set basic data
set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, "litellm")
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be litellm? Or the actual provider being used? (anthropic, openai, ...)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The actual provider as far as I understand. OTel says:

The Generative AI provider as identified by the client or server instrumentation.

The attribute itself is deprecated and was renamed to gen_ai.provider.name. I'm not sure what the plan regarding keeping up with OTel's semantic conventions is; you probably know better if we want to use the old or the new way.

set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, operation)
set_data_normalized(
span, "gen_ai.litellm.provider", _get_provider_from_model(model)
)

# Record messages if allowed
if messages and should_send_default_pii() and integration.include_prompts:
set_data_normalized(
span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages, unpack=False
)

# Record other parameters
params = {
"model": SPANDATA.GEN_AI_REQUEST_MODEL,
"stream": SPANDATA.GEN_AI_RESPONSE_STREAMING,
"max_tokens": SPANDATA.GEN_AI_REQUEST_MAX_TOKENS,
"presence_penalty": SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY,
"frequency_penalty": SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY,
"temperature": SPANDATA.GEN_AI_REQUEST_TEMPERATURE,
"top_p": SPANDATA.GEN_AI_REQUEST_TOP_P,
}
Comment on lines +103 to +111
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is not clear where to actually put these parameters in the arguments to completion

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't understand this comment, can you elaborate? What do the params have to do with completion?

for key, attribute in params.items():
value = kwargs.get(key)
if value is not None:
set_data_normalized(span, attribute, value)

# Record LiteLLM-specific parameters
litellm_params = {
"api_base": kwargs.get("api_base"),
"api_version": kwargs.get("api_version"),
"custom_llm_provider": kwargs.get("custom_llm_provider"),
}
for key, value in litellm_params.items():
if value is not None:
set_data_normalized(span, f"gen_ai.litellm.{key}", value)

Comment on lines +59 to +126

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Potential bug: The LiteLLM integration callbacks lack capture_internal_exceptions() protection, which could propagate internal SDK errors to the user's application, causing a crash.
  • Description: The LiteLLM integration registers several callbacks, such as _input_callback, _success_callback, and _failure_callback, that execute during a LiteLLM API call. Unlike other Sentry AI integrations, these callbacks are not wrapped in capture_internal_exceptions(). An exception occurring within this code, for example during a call to litellm.get_llm_provider() or while parsing the response object, will not be caught. This will cause the exception to propagate up to the user's application, potentially causing it to crash. This deviates from the established SDK pattern of isolating internal exceptions from user code.

  • Suggested fix: Wrap the entire body of the _input_callback, _success_callback, and _failure_callback functions with the capture_internal_exceptions() context manager. This will ensure any exceptions are caught and logged as internal SDK errors, preventing them from crashing the user's application.
    severity: 0.75, confidence: 0.9

Did we get this right? 👍 / 👎 to inform future reviews.


def _success_callback(
kwargs, # type: Dict[str, Any]
completion_response, # type: Any
start_time, # type: datetime
end_time, # type: datetime
):
# type: (...) -> None
"""Handle successful completion."""

span = kwargs.get("_sentry_span")
if span is None:
return

integration = sentry_sdk.get_client().get_integration(LiteLLMIntegration)
if integration is None:
return

try:
# Record model information
if hasattr(completion_response, "model"):
set_data_normalized(
span, SPANDATA.GEN_AI_RESPONSE_MODEL, completion_response.model
)

# Record response content if allowed
if should_send_default_pii() and integration.include_prompts:
if hasattr(completion_response, "choices"):
response_messages = []
for choice in completion_response.choices:
if hasattr(choice, "message"):
if hasattr(choice.message, "model_dump"):
response_messages.append(choice.message.model_dump())
elif hasattr(choice.message, "dict"):
response_messages.append(choice.message.dict())
else:
# Fallback for basic message objects
msg = {}
if hasattr(choice.message, "role"):
msg["role"] = choice.message.role
if hasattr(choice.message, "content"):
msg["content"] = choice.message.content
if hasattr(choice.message, "tool_calls"):
msg["tool_calls"] = choice.message.tool_calls
response_messages.append(msg)

if response_messages:
set_data_normalized(
span, SPANDATA.GEN_AI_RESPONSE_TEXT, response_messages
)

# Record token usage
if hasattr(completion_response, "usage"):
usage = completion_response.usage
record_token_usage(
span,
input_tokens=getattr(usage, "prompt_tokens", None),
output_tokens=getattr(usage, "completion_tokens", None),
total_tokens=getattr(usage, "total_tokens", None),
)

finally:
# Always finish the span and clean up
span.__exit__(None, None, None)


def _failure_callback(
kwargs, # type: Dict[str, Any]
exception, # type: Exception
start_time, # type: datetime
end_time, # type: datetime
):
# type: (...) -> None
"""Handle request failure."""
span = kwargs.get("_sentry_span")

try:
# Capture the exception
event, hint = event_from_exception(
exception,
client_options=sentry_sdk.get_client().options,
mechanism={"type": "litellm", "handled": False},
)
sentry_sdk.capture_event(event, hint=hint)
finally:
# Always finish the span and clean up
span.__exit__(None, None, None)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The __exit__ here should be given the exception so that we set the span status correctly to failed.


Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Race Condition in LiteLLM Callbacks

The LiteLLM integration has a race condition where spans started in _input_callback are finished in separate threads by _success_callback or _failure_callback. These callbacks may complete after the parent transaction, causing spans to be dropped from traces. Additionally, _failure_callback incorrectly calls span.__exit__ with None arguments, failing to properly record the failure state.

Fix in Cursor Fix in Web


class LiteLLMIntegration(Integration):
"""
LiteLLM integration for Sentry.
This integration automatically captures LiteLLM API calls and sends them to Sentry
for monitoring and error tracking. It supports all 100+ LLM providers that LiteLLM
supports, including OpenAI, Anthropic, Google, Cohere, and many others.
Features:
- Automatic exception capture for all LiteLLM calls
- Token usage tracking across all providers
- Provider detection and attribution
- Input/output message capture (configurable)
- Streaming response support
- Cost tracking integration
Usage:
```python
import litellm
import sentry_sdk
# Initialize Sentry with the LiteLLM integration
sentry_sdk.init(
dsn="your-dsn",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd also include send_default_pii=True in the example here so that folks get the most out of the box by default. Maybe with a small comment what it actually controls in this integration, like you did for include_prompts.

integrations=[
sentry_sdk.integrations.LiteLLMIntegration(
include_prompts=True # Set to False to exclude message content
)
]
)
# All LiteLLM calls will now be monitored
response = litellm.completion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "Hello!"}]
)
```
Configuration:
- include_prompts (bool): Whether to include prompts and responses in spans.
Defaults to True. Set to False to exclude potentially sensitive data.
"""

identifier = "litellm"
origin = f"auto.ai.{identifier}"

def __init__(self, include_prompts=True):
# type: (LiteLLMIntegration, bool) -> None
self.include_prompts = include_prompts

@staticmethod
def setup_once():
# type: () -> None
"""Set up LiteLLM callbacks for monitoring."""
litellm.input_callback = litellm.input_callback or []
if _input_callback not in litellm.input_callback:
litellm.input_callback.append(_input_callback)

litellm.success_callback = litellm.success_callback or []
if _success_callback not in litellm.success_callback:
litellm.success_callback.append(_success_callback)

litellm.failure_callback = litellm.failure_callback or []
if _failure_callback not in litellm.failure_callback:
litellm.failure_callback.append(_failure_callback)
Comment on lines +277 to +283
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems as if both success_callback and failure_callback are run in a thread, which might finish after completion returns. As the span is closed in either callback, it may occur that the span is finished after the surrounding transaction does, resulting it being absent completely. This should definitely be pointed out somewhere.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is definitely the potential for a timing issue but I don't see a way around it at the moment since the LiteLLM integration might not be in control of the overarching transaction.

From your testing when developing this, was this a real issue when something like a web framework was managing the transaction?

Loading