Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(integrations): Add integration for qdrant #3623

Draft
wants to merge 3 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions sentry_sdk/consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,8 @@ class OP:
COHERE_CHAT_COMPLETIONS_CREATE = "ai.chat_completions.create.cohere"
COHERE_EMBEDDINGS_CREATE = "ai.embeddings.create.cohere"
DB = "db"
DB_QDRANT_GRPC = "db.qdrant.grpc"
DB_QDRANT_REST = "db.qdrant.rest"
DB_REDIS = "db.redis"
EVENT_DJANGO = "event.django"
FUNCTION = "function"
Expand Down
40 changes: 40 additions & 0 deletions sentry_sdk/integrations/qdrant/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from sentry_sdk.integrations import DidNotEnable

try:
from qdrant_client.http import ApiClient, AsyncApiClient
import grpc
except ImportError:
raise DidNotEnable("Qdrant client not installed")

from sentry_sdk.integrations import Integration
from sentry_sdk.integrations.qdrant.consts import _IDENTIFIER
from sentry_sdk.integrations.qdrant.qdrant import (
_sync_api_client_send_inner,
_async_api_client_send_inner,
_wrap_channel_sync,
_wrap_channel_async,
)


class QdrantIntegration(Integration):
identifier = _IDENTIFIER

def __init__(self, mute_children_http_spans=True):
# type: (bool) -> None
self.mute_children_http_spans = mute_children_http_spans

@staticmethod
def setup_once():
# type: () -> None

# hooks for the REST client
ApiClient.send_inner = _sync_api_client_send_inner(ApiClient.send_inner)
AsyncApiClient.send_inner = _async_api_client_send_inner(
AsyncApiClient.send_inner
)

# hooks for the gRPC client
grpc.secure_channel = _wrap_channel_sync(grpc.secure_channel)
grpc.insecure_channel = _wrap_channel_sync(grpc.insecure_channel)
grpc.aio.secure_channel = _wrap_channel_async(grpc.aio.secure_channel)
grpc.aio.insecure_channel = _wrap_channel_async(grpc.aio.insecure_channel)
121 changes: 121 additions & 0 deletions sentry_sdk/integrations/qdrant/consts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
from sentry_sdk.integrations.qdrant.path_matching import PathTrie

SPAN_ORIGIN = "auto.db.qdrant"

# created from https://github.com/qdrant/qdrant/blob/master/docs/redoc/v1.11.x/openapi.json
# only used for qdrants REST API. gRPC is using other identifiers
_PATH_TO_OPERATION_ID = {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure it is such a good idea to hardcode this dictionary based on something from QDrant which could change in future QDrant versions. It would be better to somehow obtain this information from QDrant at runtime, to maintain compatibility with future versions.

"/collections/{collection_name}/shards": {"put": "create_shard_key"},
"/collections/{collection_name}/shards/delete": {"post": "delete_shard_key"},
"/": {"get": "root"},
"/telemetry": {"get": "telemetry"},
"/metrics": {"get": "metrics"},
"/locks": {"post": "post_locks", "get": "get_locks"},
"/healthz": {"get": "healthz"},
"/livez": {"get": "livez"},
"/readyz": {"get": "readyz"},
"/issues": {"get": "get_issues", "delete": "clear_issues"},
"/cluster": {"get": "cluster_status"},
"/cluster/recover": {"post": "recover_current_peer"},
"/cluster/peer/{peer_id}": {"delete": "remove_peer"},
"/collections": {"get": "get_collections"},
"/collections/{collection_name}": {
"get": "get_collection",
"put": "create_collection",
"patch": "update_collection",
"delete": "delete_collection",
},
"/collections/aliases": {"post": "update_aliases"},
"/collections/{collection_name}/index": {"put": "create_field_index"},
"/collections/{collection_name}/exists": {"get": "collection_exists"},
"/collections/{collection_name}/index/{field_name}": {
"delete": "delete_field_index"
},
"/collections/{collection_name}/cluster": {
"get": "collection_cluster_info",
"post": "update_collection_cluster",
},
"/collections/{collection_name}/aliases": {"get": "get_collection_aliases"},
"/aliases": {"get": "get_collections_aliases"},
"/collections/{collection_name}/snapshots/upload": {
"post": "recover_from_uploaded_snapshot"
},
"/collections/{collection_name}/snapshots/recover": {
"put": "recover_from_snapshot"
},
"/collections/{collection_name}/snapshots": {
"get": "list_snapshots",
"post": "create_snapshot",
},
"/collections/{collection_name}/snapshots/{snapshot_name}": {
"delete": "delete_snapshot",
"get": "get_snapshot",
},
"/snapshots": {"get": "list_full_snapshots", "post": "create_full_snapshot"},
"/snapshots/{snapshot_name}": {
"delete": "delete_full_snapshot",
"get": "get_full_snapshot",
},
"/collections/{collection_name}/shards/{shard_id}/snapshots/upload": {
"post": "recover_shard_from_uploaded_snapshot"
},
"/collections/{collection_name}/shards/{shard_id}/snapshots/recover": {
"put": "recover_shard_from_snapshot"
},
"/collections/{collection_name}/shards/{shard_id}/snapshots": {
"get": "list_shard_snapshots",
"post": "create_shard_snapshot",
},
"/collections/{collection_name}/shards/{shard_id}/snapshots/{snapshot_name}": {
"delete": "delete_shard_snapshot",
"get": "get_shard_snapshot",
},
"/collections/{collection_name}/points/{id}": {"get": "get_point"},
"/collections/{collection_name}/points": {
"post": "get_points",
"put": "upsert_points",
},
"/collections/{collection_name}/points/delete": {"post": "delete_points"},
"/collections/{collection_name}/points/vectors": {"put": "update_vectors"},
"/collections/{collection_name}/points/vectors/delete": {"post": "delete_vectors"},
"/collections/{collection_name}/points/payload": {
"post": "set_payload",
"put": "overwrite_payload",
},
"/collections/{collection_name}/points/payload/delete": {"post": "delete_payload"},
"/collections/{collection_name}/points/payload/clear": {"post": "clear_payload"},
"/collections/{collection_name}/points/batch": {"post": "batch_update"},
"/collections/{collection_name}/points/scroll": {"post": "scroll_points"},
"/collections/{collection_name}/points/search": {"post": "search_points"},
"/collections/{collection_name}/points/search/batch": {
"post": "search_batch_points"
},
"/collections/{collection_name}/points/search/groups": {
"post": "search_point_groups"
},
"/collections/{collection_name}/points/recommend": {"post": "recommend_points"},
"/collections/{collection_name}/points/recommend/batch": {
"post": "recommend_batch_points"
},
"/collections/{collection_name}/points/recommend/groups": {
"post": "recommend_point_groups"
},
"/collections/{collection_name}/points/discover": {"post": "discover_points"},
"/collections/{collection_name}/points/discover/batch": {
"post": "discover_batch_points"
},
"/collections/{collection_name}/points/count": {"post": "count_points"},
"/collections/{collection_name}/points/query": {"post": "query_points"},
"/collections/{collection_name}/points/query/batch": {"post": "query_batch_points"},
"/collections/{collection_name}/points/query/groups": {
"post": "query_points_groups"
},
}

_DISALLOWED_PROTO_FIELDS = {"data", "keyword"}

_DISALLOWED_REST_FIELDS = {"nearest", "value"}

_IDENTIFIER = "qdrant"

_qdrant_trie = PathTrie(_PATH_TO_OPERATION_ID)
144 changes: 144 additions & 0 deletions sentry_sdk/integrations/qdrant/path_matching.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
from typing import Any, Dict, Optional, List


class TrieNode:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need to define a custom data structure here? Is there no way to do this with one of the APIs exposed by QDrant or with one of the built-in data structures?

def __init__(self, is_placeholder=False):
"""
Initializes a TrieNode.

:param is_placeholder: Indicates if this node represents a placeholder (wildcard).
"""
self.children = {} # type: Dict[str, 'TrieNode']
self.operation_ids = {} # type: Dict[str, str]
self.is_placeholder = is_placeholder # type: bool

@classmethod
def from_dict(cls, data, parent_path=""):
# type: (Dict[str, Any], str) -> 'TrieNode'
"""
Recursively constructs a TrieNode from a nested dictionary.

:param data: Nested dictionary mapping path segments to either nested dictionaries
or dictionaries of HTTP methods to operation IDs.
:param parent_path: The accumulated path from the root to the current node.
:return: Root TrieNode of the constructed trie.
"""
node = cls()
for path, methods in data.items():
segments = PathTrie.split_path(path)
current = node
for segment in segments:
is_placeholder = segment.startswith("{") and segment.endswith("}")
key = "*" if is_placeholder else segment

if key not in current.children:
current.children[key] = TrieNode(is_placeholder=is_placeholder)
current = current.children[key]

if isinstance(methods, dict):
for method, operation_id in methods.items():
current.operation_ids[method.lower()] = operation_id

return node

def to_dict(self, current_path=""):
# type: (str) -> Dict[str, Any]
"""
Serializes the TrieNode and its children back to a nested dictionary.

:param current_path: The accumulated path from the root to the current node.
:return: Nested dictionary representing the trie.
"""
result = {} # type: Dict[str, Any]
if self.operation_ids:
path_key = current_path or "/"
result[path_key] = self.operation_ids.copy()

for segment, child in self.children.items():
# replace wildcard '*' back to placeholder format if necessary.
# allows for TrieNode.from_dict(TrieNode.to_dict()) to be idempotent.
display_segment = "{placeholder}" if child.is_placeholder else segment
new_path = (
f"{current_path}/{display_segment}"
if current_path
else f"/{display_segment}"
)
child_dict = child.to_dict(new_path)
result.update(child_dict)

return result


class PathTrie:
WILDCARD = "*" # type: str

def __init__(self, data=None):
# type: (Optional[Dict[str, Any]]) -> None
"""
Initializes the PathTrie with optional initial data.

:param data: Optional nested dictionary to initialize the trie.
"""
self.root = TrieNode.from_dict(data or {}) # type: TrieNode

def insert(self, path, method, operation_id):
# type: (str, str, str) -> None
"""
Inserts a path into the trie with its corresponding HTTP method and operation ID.

:param path: The API path (e.g., '/users/{user_id}/posts').
:param method: HTTP method (e.g., 'GET', 'POST').
:param operation_id: The operation identifier associated with the path and method.
"""
current = self.root
segments = self.split_path(path)

for segment in segments:
is_placeholder = self._is_placeholder(segment)
key = self.WILDCARD if is_placeholder else segment

if key not in current.children:
current.children[key] = TrieNode(is_placeholder=is_placeholder)
current = current.children[key]

current.operation_ids[method.lower()] = operation_id

def match(self, path, method):
# type: (str, str) -> Optional[str]
"""
Matches a given path and HTTP method to its corresponding operation ID.

:param path: The API path to match.
:param method: HTTP method to match.
:return: The operation ID if a match is found; otherwise, None.
"""
current = self.root
segments = self.split_path(path)

for segment in segments:
if segment in current.children:
current = current.children[segment]
elif self.WILDCARD in current.children:
current = current.children[self.WILDCARD]
else:
return None

return current.operation_ids.get(method.lower())

def to_dict(self):
# type: () -> Dict[str, Any]
return self.root.to_dict()

@staticmethod
def split_path(path):
# type: (str) -> List[str]
return [segment for segment in path.strip("/").split("/") if segment]

@staticmethod
def _is_placeholder(segment):
# type: (str) -> bool
return segment.startswith("{") and segment.endswith("}")

def __repr__(self):
# type: () -> str
return f"PathTrie({self.to_dict()})"
Loading
Loading