feat: remove hardcoded eap_spans references in table and tiemseries RPCs

getsentry · Dec 30, 2024 · d11082b · d11082b
1 parent 16e3c49
commit d11082b
Show file tree

Hide file tree

Showing 3 changed files with 34 additions and 91 deletions.
diff --git a/snuba/web/rpc/common/common.py b/snuba/web/rpc/common/common.py
@@ -1,7 +1,7 @@
 from datetime import datetime, timedelta
-from typing import Final, Mapping, Sequence, Set
+from typing import Sequence
 
-from sentry_protos.snuba.v1.request_common_pb2 import RequestMeta
+from sentry_protos.snuba.v1.request_common_pb2 import RequestMeta, TraceItemName
 from sentry_protos.snuba.v1.trace_item_attribute_pb2 import (
     AttributeKey,
     VirtualColumnContext,
@@ -11,8 +11,11 @@
     TraceItemFilter,
 )
 
+from snuba.datasets.entities.entity_key import EntityKey
+from snuba.datasets.entities.factory import get_entity
 from snuba.query import Query
 from snuba.query.conditions import combine_and_conditions, combine_or_conditions
+from snuba.query.data_source.simple import Entity
 from snuba.query.dsl import Functions as f
 from snuba.query.dsl import (
     and_cond,
@@ -72,86 +75,25 @@ def transform(exp: Expression) -> Expression:
     query.transform_expressions(transform)
 
 
-# These are the columns which aren't stored in attr_str_ nor attr_num_ in clickhouse
-NORMALIZED_COLUMNS: Final[Mapping[str, AttributeKey.Type.ValueType]] = {
-    "sentry.organization_id": AttributeKey.Type.TYPE_INT,
-    "sentry.project_id": AttributeKey.Type.TYPE_INT,
-    "sentry.service": AttributeKey.Type.TYPE_STRING,
-    "sentry.span_id": AttributeKey.Type.TYPE_STRING,  # this is converted by a processor on the storage
-    "sentry.parent_span_id": AttributeKey.Type.TYPE_STRING,  # this is converted by a processor on the storage
-    "sentry.segment_id": AttributeKey.Type.TYPE_STRING,  # this is converted by a processor on the storage
-    "sentry.segment_name": AttributeKey.Type.TYPE_STRING,
-    "sentry.is_segment": AttributeKey.Type.TYPE_BOOLEAN,
-    "sentry.duration_ms": AttributeKey.Type.TYPE_FLOAT,
-    "sentry.exclusive_time_ms": AttributeKey.Type.TYPE_FLOAT,
-    "sentry.retention_days": AttributeKey.Type.TYPE_INT,
-    "sentry.name": AttributeKey.Type.TYPE_STRING,
-    "sentry.sampling_weight": AttributeKey.Type.TYPE_FLOAT,
-    "sentry.sampling_factor": AttributeKey.Type.TYPE_FLOAT,
-    "sentry.timestamp": AttributeKey.Type.TYPE_UNSPECIFIED,
-    "sentry.start_timestamp": AttributeKey.Type.TYPE_UNSPECIFIED,
-    "sentry.end_timestamp": AttributeKey.Type.TYPE_UNSPECIFIED,
-}
-
-TIMESTAMP_COLUMNS: Final[Set[str]] = {
-    "sentry.timestamp",
-    "sentry.start_timestamp",
-    "sentry.end_timestamp",
-}
-
-
 def attribute_key_to_expression(attr_key: AttributeKey) -> Expression:
-    def _build_label_mapping_key(attr_key: AttributeKey) -> str:
-        return attr_key.name + "_" + AttributeKey.Type.Name(attr_key.type)
-
     if attr_key.type == AttributeKey.Type.TYPE_UNSPECIFIED:
         raise BadSnubaRPCRequestException(
             f"attribute key {attr_key.name} must have a type specified"
         )
-    alias = _build_label_mapping_key(attr_key)
-
-    if attr_key.name == "sentry.trace_id":
-        if attr_key.type == AttributeKey.Type.TYPE_STRING:
-            return f.CAST(column("trace_id"), "String", alias=alias)
-        raise BadSnubaRPCRequestException(
-            f"Attribute {attr_key.name} must be requested as a string, got {attr_key.type}"
-        )
-
-    if attr_key.name in TIMESTAMP_COLUMNS:
-        if attr_key.type == AttributeKey.Type.TYPE_STRING:
-            return f.CAST(
-                column(attr_key.name[len("sentry.") :]), "String", alias=alias
-            )
-        if attr_key.type == AttributeKey.Type.TYPE_INT:
-            return f.CAST(column(attr_key.name[len("sentry.") :]), "Int64", alias=alias)
-        if attr_key.type == AttributeKey.Type.TYPE_FLOAT:
-            return f.CAST(
-                column(attr_key.name[len("sentry.") :]), "Float64", alias=alias
-            )
-        raise BadSnubaRPCRequestException(
-            f"Attribute {attr_key.name} must be requested as a string, float, or integer, got {attr_key.type}"
-        )
+    alias = attr_key.name + "_" + AttributeKey.Type.Name(attr_key.type)
 
-    if attr_key.name in NORMALIZED_COLUMNS:
-        if NORMALIZED_COLUMNS[attr_key.name] == attr_key.type:
-            return column(attr_key.name[len("sentry.") :], alias=attr_key.name)
-        raise BadSnubaRPCRequestException(
-            f"Attribute {attr_key.name} must be requested as {NORMALIZED_COLUMNS[attr_key.name]}, got {attr_key.type}"
-        )
-
-    # End of special handling, just send to the appropriate bucket
     if attr_key.type == AttributeKey.Type.TYPE_STRING:
         return SubscriptableReference(
             alias=alias, column=column("attr_str"), key=literal(attr_key.name)
         )
     if attr_key.type == AttributeKey.Type.TYPE_FLOAT:
         return SubscriptableReference(
-            alias=alias, column=column("attr_num"), key=literal(attr_key.name)
+            alias=alias, column=column("attr_f64"), key=literal(attr_key.name)
         )
     if attr_key.type == AttributeKey.Type.TYPE_INT:
         return f.CAST(
             SubscriptableReference(
-                alias=None, column=column("attr_num"), key=literal(attr_key.name)
+                alias=None, column=column("attr_i64"), key=literal(attr_key.name)
             ),
             "Int64",
             alias=alias,
@@ -160,7 +102,7 @@ def _build_label_mapping_key(attr_key: AttributeKey) -> str:
         return f.CAST(
             SubscriptableReference(
                 alias=None,
-                column=column("attr_num"),
+                column=column("attr_f64"),
                 key=literal(attr_key.name),
             ),
             "Boolean",
@@ -209,20 +151,19 @@ def apply_virtual_columns(
     mapped_column_to_context = {c.to_column_name: c for c in virtual_column_contexts}
 
     def transform_expressions(expression: Expression) -> Expression:
-        # virtual columns will show up as `attr_str[virtual_column_name]` or `attr_num[virtual_column_name]`
+        # virtual columns will show up as `attr_str[virtual_column_name]`
         if not isinstance(expression, SubscriptableReference):
             return expression
 
         if expression.column.column_name != "attr_str":
             return expression
+
         context = mapped_column_to_context.get(str(expression.key.value))
         if context:
             attribute_expression = attribute_key_to_expression(
                 AttributeKey(
                     name=context.from_column_name,
-                    type=NORMALIZED_COLUMNS.get(
-                        context.from_column_name, AttributeKey.TYPE_STRING
-                    ),
+                    type=AttributeKey.TYPE_STRING,
                 )
             )
             return f.transform(
@@ -340,8 +281,6 @@ def trace_item_filters_to_expression(item_filter: TraceItemFilter) -> Expression
 
     if item_filter.HasField("exists_filter"):
         k = item_filter.exists_filter.key
-        if k.name in NORMALIZED_COLUMNS.keys():
-            return f.isNotNull(column(k.name))
         if k.type == AttributeKey.Type.TYPE_STRING:
             return f.mapContains(column("attr_str"), literal(k.name))
         else:
@@ -363,6 +302,24 @@ def project_id_and_org_conditions(meta: RequestMeta) -> Expression:
     )
 
 
+def entity_key_from_trace_item_name(name: TraceItemName.ValueType) -> EntityKey:
+    # TODO type is not always specified, fix that then delete this
+    if name == TraceItemName.TRACE_ITEM_NAME_UNSPECIFIED:
+        return EntityKey("eap_spans_rpc")
+    if name == TraceItemName.TRACE_ITEM_NAME_EAP_SPANS:
+        return EntityKey("eap_spans_rpc")
+    raise BadSnubaRPCRequestException(f"unknown trace item type: ${name}")
+
+
+def entity_from_trace_item_name(name: TraceItemName.ValueType) -> Entity:
+    entity_key = entity_key_from_trace_item_name(name)
+    return Entity(
+        key=entity_key,
+        schema=get_entity(entity_key).get_data_model(),
+        sample=None,
+    )
+
+
 def timestamp_in_range_condition(start_ts: int, end_ts: int) -> Expression:
     return and_cond(
         f.less(

diff --git a/snuba/web/rpc/v1/endpoint_time_series.py b/snuba/web/rpc/v1/endpoint_time_series.py
@@ -16,11 +16,8 @@
 
 from snuba.attribution.appid import AppID
 from snuba.attribution.attribution_info import AttributionInfo
-from snuba.datasets.entities.entity_key import EntityKey
-from snuba.datasets.entities.factory import get_entity
 from snuba.datasets.pluggable_dataset import PluggableDataset
 from snuba.query import OrderBy, OrderByDirection, SelectedExpression
-from snuba.query.data_source.simple import Entity
 from snuba.query.dsl import Functions as f
 from snuba.query.dsl import column
 from snuba.query.logical import Query
@@ -38,6 +35,7 @@
 from snuba.web.rpc.common.common import (
     attribute_key_to_expression,
     base_conditions_and,
+    entity_from_trace_item_name,
     trace_item_filters_to_expression,
     treeify_or_and_conditions,
 )
@@ -188,12 +186,7 @@ def _convert_result_timeseries(
 
 
 def _build_query(request: TimeSeriesRequest) -> Query:
-    # TODO: This is hardcoded still
-    entity = Entity(
-        key=EntityKey("eap_spans"),
-        schema=get_entity(EntityKey("eap_spans")).get_data_model(),
-        sample=None,
-    )
+    entity = entity_from_trace_item_name(request.meta.trace_item_name)
 
     aggregation_columns = [
         SelectedExpression(

diff --git a/snuba/web/rpc/v1/endpoint_trace_item_table.py b/snuba/web/rpc/v1/endpoint_trace_item_table.py
@@ -20,11 +20,8 @@
 
 from snuba.attribution.appid import AppID
 from snuba.attribution.attribution_info import AttributionInfo
-from snuba.datasets.entities.entity_key import EntityKey
-from snuba.datasets.entities.factory import get_entity
 from snuba.datasets.pluggable_dataset import PluggableDataset
 from snuba.query import OrderBy, OrderByDirection, SelectedExpression
-from snuba.query.data_source.simple import Entity
 from snuba.query.logical import Query
 from snuba.query.query_settings import HTTPQuerySettings
 from snuba.request import Request as SnubaRequest
@@ -41,6 +38,7 @@
     apply_virtual_columns,
     attribute_key_to_expression,
     base_conditions_and,
+    entity_from_trace_item_name,
     trace_item_filters_to_expression,
     treeify_or_and_conditions,
 )
@@ -79,12 +77,7 @@ def _convert_order_by(
 
 
 def _build_query(request: TraceItemTableRequest) -> Query:
-    # TODO: This is hardcoded still
-    entity = Entity(
-        key=EntityKey("eap_spans"),
-        schema=get_entity(EntityKey("eap_spans")).get_data_model(),
-        sample=None,
-    )
+    entity = entity_from_trace_item_name(request.meta.trace_item_name)
 
     selected_columns = []
     for column in request.columns: