-
Notifications
You must be signed in to change notification settings - Fork 558
feat(integrations): add litellm integration #4864
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,281 @@ | ||
from typing import TYPE_CHECKING | ||
|
||
import sentry_sdk | ||
from sentry_sdk import consts | ||
from sentry_sdk.ai.monitoring import record_token_usage | ||
from sentry_sdk.ai.utils import get_start_span_function, set_data_normalized | ||
from sentry_sdk.consts import SPANDATA | ||
from sentry_sdk.integrations import DidNotEnable, Integration | ||
from sentry_sdk.scope import should_send_default_pii | ||
from sentry_sdk.utils import event_from_exception | ||
|
||
if TYPE_CHECKING: | ||
from typing import Any, Dict | ||
from datetime import datetime | ||
|
||
try: | ||
import litellm | ||
except ImportError: | ||
raise DidNotEnable("LiteLLM not installed") | ||
|
||
|
||
def _get_provider_from_model(model): | ||
# type: (str) -> str | ||
"""Extract provider name from model string using LiteLLM's logic""" | ||
if not model: | ||
return "unknown" | ||
|
||
# Common provider prefixes/patterns | ||
if model.startswith("gpt-") or model.startswith("o1-") or "openai/" in model: | ||
return "openai" | ||
elif model.startswith("claude-") or "anthropic/" in model: | ||
return "anthropic" | ||
elif ( | ||
model.startswith("gemini-") | ||
or "google/" in model | ||
or model.startswith("vertex_ai/") | ||
): | ||
return "google" | ||
elif "cohere/" in model or model.startswith("command-"): | ||
return "cohere" | ||
elif "azure/" in model: | ||
return "azure" | ||
elif "bedrock/" in model: | ||
return "bedrock" | ||
elif "ollama/" in model: | ||
return "ollama" | ||
else: | ||
# Try to use LiteLLM's internal provider detection if available | ||
try: | ||
if hasattr(litellm, "get_llm_provider"): | ||
provider_info = litellm.get_llm_provider(model) | ||
Comment on lines
+50
to
+51
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was trying to find out if calling this function could potentially have unintended side-effects (like making a network call I suppose, since it might, for some reason, return a Hopefully using this is ok side-effect wise, since we only provide it with a model (and not api key/base etc.). |
||
if isinstance(provider_info, tuple) and len(provider_info) > 1: | ||
return provider_info[1] or "unknown" | ||
return "unknown" | ||
except Exception: | ||
return "unknown" | ||
|
||
|
||
def _input_callback( | ||
kwargs, # type: Dict[str, Any] | ||
): | ||
# type: (...) -> None | ||
"""Handle the start of a request.""" | ||
integration = sentry_sdk.get_client().get_integration(LiteLLMIntegration) | ||
|
||
if integration is None: | ||
return | ||
|
||
# Get key parameters | ||
model = kwargs.get("model", "") | ||
messages = kwargs.get("messages", []) | ||
operation = "chat" if messages else "embeddings" | ||
|
||
# Start a new span/transaction | ||
span = get_start_span_function()( | ||
op=( | ||
consts.OP.GEN_AI_CHAT | ||
if operation == "chat" | ||
else consts.OP.GEN_AI_EMBEDDINGS | ||
), | ||
name=f"{operation} {model}", | ||
origin=LiteLLMIntegration.origin, | ||
) | ||
span.__enter__() | ||
Comment on lines
+75
to
+84
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We start a transaction if we don't have one ready yet. |
||
|
||
# Store span for later | ||
kwargs["_sentry_span"] = span | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm wondering if we can store this in |
||
|
||
# Set basic data | ||
set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, "litellm") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The actual provider as far as I understand. OTel says:
The attribute itself is deprecated and was renamed to |
||
set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, operation) | ||
set_data_normalized( | ||
span, "gen_ai.litellm.provider", _get_provider_from_model(model) | ||
) | ||
|
||
# Record messages if allowed | ||
if messages and should_send_default_pii() and integration.include_prompts: | ||
set_data_normalized( | ||
span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages, unpack=False | ||
) | ||
|
||
# Record other parameters | ||
params = { | ||
"model": SPANDATA.GEN_AI_REQUEST_MODEL, | ||
"stream": SPANDATA.GEN_AI_RESPONSE_STREAMING, | ||
"max_tokens": SPANDATA.GEN_AI_REQUEST_MAX_TOKENS, | ||
"presence_penalty": SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY, | ||
"frequency_penalty": SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY, | ||
"temperature": SPANDATA.GEN_AI_REQUEST_TEMPERATURE, | ||
"top_p": SPANDATA.GEN_AI_REQUEST_TOP_P, | ||
} | ||
Comment on lines
+103
to
+111
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is not clear where to actually put these parameters in the arguments to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Don't understand this comment, can you elaborate? What do the params have to do with |
||
for key, attribute in params.items(): | ||
value = kwargs.get(key) | ||
if value is not None: | ||
set_data_normalized(span, attribute, value) | ||
|
||
# Record LiteLLM-specific parameters | ||
litellm_params = { | ||
"api_base": kwargs.get("api_base"), | ||
"api_version": kwargs.get("api_version"), | ||
"custom_llm_provider": kwargs.get("custom_llm_provider"), | ||
} | ||
for key, value in litellm_params.items(): | ||
if value is not None: | ||
set_data_normalized(span, f"gen_ai.litellm.{key}", value) | ||
|
||
Comment on lines
+59
to
+126
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Potential bug: The LiteLLM integration callbacks lack
|
||
|
||
def _success_callback( | ||
kwargs, # type: Dict[str, Any] | ||
completion_response, # type: Any | ||
start_time, # type: datetime | ||
end_time, # type: datetime | ||
): | ||
# type: (...) -> None | ||
"""Handle successful completion.""" | ||
|
||
span = kwargs.get("_sentry_span") | ||
if span is None: | ||
return | ||
|
||
integration = sentry_sdk.get_client().get_integration(LiteLLMIntegration) | ||
if integration is None: | ||
return | ||
|
||
try: | ||
# Record model information | ||
if hasattr(completion_response, "model"): | ||
set_data_normalized( | ||
span, SPANDATA.GEN_AI_RESPONSE_MODEL, completion_response.model | ||
) | ||
|
||
# Record response content if allowed | ||
if should_send_default_pii() and integration.include_prompts: | ||
if hasattr(completion_response, "choices"): | ||
response_messages = [] | ||
for choice in completion_response.choices: | ||
if hasattr(choice, "message"): | ||
if hasattr(choice.message, "model_dump"): | ||
response_messages.append(choice.message.model_dump()) | ||
elif hasattr(choice.message, "dict"): | ||
response_messages.append(choice.message.dict()) | ||
else: | ||
# Fallback for basic message objects | ||
msg = {} | ||
if hasattr(choice.message, "role"): | ||
msg["role"] = choice.message.role | ||
if hasattr(choice.message, "content"): | ||
msg["content"] = choice.message.content | ||
if hasattr(choice.message, "tool_calls"): | ||
msg["tool_calls"] = choice.message.tool_calls | ||
response_messages.append(msg) | ||
|
||
if response_messages: | ||
set_data_normalized( | ||
span, SPANDATA.GEN_AI_RESPONSE_TEXT, response_messages | ||
) | ||
|
||
# Record token usage | ||
if hasattr(completion_response, "usage"): | ||
usage = completion_response.usage | ||
record_token_usage( | ||
span, | ||
input_tokens=getattr(usage, "prompt_tokens", None), | ||
output_tokens=getattr(usage, "completion_tokens", None), | ||
total_tokens=getattr(usage, "total_tokens", None), | ||
) | ||
|
||
finally: | ||
# Always finish the span and clean up | ||
span.__exit__(None, None, None) | ||
|
||
|
||
def _failure_callback( | ||
kwargs, # type: Dict[str, Any] | ||
exception, # type: Exception | ||
start_time, # type: datetime | ||
end_time, # type: datetime | ||
): | ||
# type: (...) -> None | ||
"""Handle request failure.""" | ||
span = kwargs.get("_sentry_span") | ||
|
||
try: | ||
# Capture the exception | ||
event, hint = event_from_exception( | ||
exception, | ||
client_options=sentry_sdk.get_client().options, | ||
mechanism={"type": "litellm", "handled": False}, | ||
) | ||
sentry_sdk.capture_event(event, hint=hint) | ||
finally: | ||
# Always finish the span and clean up | ||
span.__exit__(None, None, None) | ||
cursor[bot] marked this conversation as resolved.
Show resolved
Hide resolved
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The |
||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Bug: Race Condition in LiteLLM CallbacksThe LiteLLM integration has a race condition where spans started in |
||
|
||
class LiteLLMIntegration(Integration): | ||
""" | ||
LiteLLM integration for Sentry. | ||
This integration automatically captures LiteLLM API calls and sends them to Sentry | ||
for monitoring and error tracking. It supports all 100+ LLM providers that LiteLLM | ||
supports, including OpenAI, Anthropic, Google, Cohere, and many others. | ||
Features: | ||
- Automatic exception capture for all LiteLLM calls | ||
- Token usage tracking across all providers | ||
- Provider detection and attribution | ||
- Input/output message capture (configurable) | ||
- Streaming response support | ||
- Cost tracking integration | ||
Usage: | ||
```python | ||
import litellm | ||
import sentry_sdk | ||
# Initialize Sentry with the LiteLLM integration | ||
sentry_sdk.init( | ||
dsn="your-dsn", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd also include |
||
integrations=[ | ||
sentry_sdk.integrations.LiteLLMIntegration( | ||
include_prompts=True # Set to False to exclude message content | ||
) | ||
] | ||
) | ||
# All LiteLLM calls will now be monitored | ||
response = litellm.completion( | ||
model="gpt-3.5-turbo", | ||
messages=[{"role": "user", "content": "Hello!"}] | ||
) | ||
``` | ||
Configuration: | ||
- include_prompts (bool): Whether to include prompts and responses in spans. | ||
Defaults to True. Set to False to exclude potentially sensitive data. | ||
""" | ||
|
||
identifier = "litellm" | ||
origin = f"auto.ai.{identifier}" | ||
|
||
def __init__(self, include_prompts=True): | ||
# type: (LiteLLMIntegration, bool) -> None | ||
self.include_prompts = include_prompts | ||
|
||
@staticmethod | ||
def setup_once(): | ||
# type: () -> None | ||
"""Set up LiteLLM callbacks for monitoring.""" | ||
litellm.input_callback = litellm.input_callback or [] | ||
if _input_callback not in litellm.input_callback: | ||
litellm.input_callback.append(_input_callback) | ||
|
||
litellm.success_callback = litellm.success_callback or [] | ||
if _success_callback not in litellm.success_callback: | ||
litellm.success_callback.append(_success_callback) | ||
|
||
litellm.failure_callback = litellm.failure_callback or [] | ||
if _failure_callback not in litellm.failure_callback: | ||
litellm.failure_callback.append(_failure_callback) | ||
Comment on lines
+277
to
+283
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems as if both There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is definitely the potential for a timing issue but I don't see a way around it at the moment since the LiteLLM integration might not be in control of the overarching transaction. From your testing when developing this, was this a real issue when something like a web framework was managing the transaction? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd prefer we don't enable the integration by default from the start for several reasons: