Skip to content

Commit

Permalink
feat: remove hardcoded eap_spans references in table and tiemseries RPCs
Browse files Browse the repository at this point in the history
  • Loading branch information
colin-sentry committed Dec 30, 2024
1 parent 16e3c49 commit d11082b
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 91 deletions.
103 changes: 30 additions & 73 deletions snuba/web/rpc/common/common.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from datetime import datetime, timedelta
from typing import Final, Mapping, Sequence, Set
from typing import Sequence

from sentry_protos.snuba.v1.request_common_pb2 import RequestMeta
from sentry_protos.snuba.v1.request_common_pb2 import RequestMeta, TraceItemName
from sentry_protos.snuba.v1.trace_item_attribute_pb2 import (
AttributeKey,
VirtualColumnContext,
Expand All @@ -11,8 +11,11 @@
TraceItemFilter,
)

from snuba.datasets.entities.entity_key import EntityKey
from snuba.datasets.entities.factory import get_entity
from snuba.query import Query
from snuba.query.conditions import combine_and_conditions, combine_or_conditions
from snuba.query.data_source.simple import Entity
from snuba.query.dsl import Functions as f
from snuba.query.dsl import (
and_cond,
Expand Down Expand Up @@ -72,86 +75,25 @@ def transform(exp: Expression) -> Expression:
query.transform_expressions(transform)


# These are the columns which aren't stored in attr_str_ nor attr_num_ in clickhouse
NORMALIZED_COLUMNS: Final[Mapping[str, AttributeKey.Type.ValueType]] = {
"sentry.organization_id": AttributeKey.Type.TYPE_INT,
"sentry.project_id": AttributeKey.Type.TYPE_INT,
"sentry.service": AttributeKey.Type.TYPE_STRING,
"sentry.span_id": AttributeKey.Type.TYPE_STRING, # this is converted by a processor on the storage
"sentry.parent_span_id": AttributeKey.Type.TYPE_STRING, # this is converted by a processor on the storage
"sentry.segment_id": AttributeKey.Type.TYPE_STRING, # this is converted by a processor on the storage
"sentry.segment_name": AttributeKey.Type.TYPE_STRING,
"sentry.is_segment": AttributeKey.Type.TYPE_BOOLEAN,
"sentry.duration_ms": AttributeKey.Type.TYPE_FLOAT,
"sentry.exclusive_time_ms": AttributeKey.Type.TYPE_FLOAT,
"sentry.retention_days": AttributeKey.Type.TYPE_INT,
"sentry.name": AttributeKey.Type.TYPE_STRING,
"sentry.sampling_weight": AttributeKey.Type.TYPE_FLOAT,
"sentry.sampling_factor": AttributeKey.Type.TYPE_FLOAT,
"sentry.timestamp": AttributeKey.Type.TYPE_UNSPECIFIED,
"sentry.start_timestamp": AttributeKey.Type.TYPE_UNSPECIFIED,
"sentry.end_timestamp": AttributeKey.Type.TYPE_UNSPECIFIED,
}

TIMESTAMP_COLUMNS: Final[Set[str]] = {
"sentry.timestamp",
"sentry.start_timestamp",
"sentry.end_timestamp",
}


def attribute_key_to_expression(attr_key: AttributeKey) -> Expression:
def _build_label_mapping_key(attr_key: AttributeKey) -> str:
return attr_key.name + "_" + AttributeKey.Type.Name(attr_key.type)

if attr_key.type == AttributeKey.Type.TYPE_UNSPECIFIED:
raise BadSnubaRPCRequestException(
f"attribute key {attr_key.name} must have a type specified"
)
alias = _build_label_mapping_key(attr_key)

if attr_key.name == "sentry.trace_id":
if attr_key.type == AttributeKey.Type.TYPE_STRING:
return f.CAST(column("trace_id"), "String", alias=alias)
raise BadSnubaRPCRequestException(
f"Attribute {attr_key.name} must be requested as a string, got {attr_key.type}"
)

if attr_key.name in TIMESTAMP_COLUMNS:
if attr_key.type == AttributeKey.Type.TYPE_STRING:
return f.CAST(
column(attr_key.name[len("sentry.") :]), "String", alias=alias
)
if attr_key.type == AttributeKey.Type.TYPE_INT:
return f.CAST(column(attr_key.name[len("sentry.") :]), "Int64", alias=alias)
if attr_key.type == AttributeKey.Type.TYPE_FLOAT:
return f.CAST(
column(attr_key.name[len("sentry.") :]), "Float64", alias=alias
)
raise BadSnubaRPCRequestException(
f"Attribute {attr_key.name} must be requested as a string, float, or integer, got {attr_key.type}"
)
alias = attr_key.name + "_" + AttributeKey.Type.Name(attr_key.type)

if attr_key.name in NORMALIZED_COLUMNS:
if NORMALIZED_COLUMNS[attr_key.name] == attr_key.type:
return column(attr_key.name[len("sentry.") :], alias=attr_key.name)
raise BadSnubaRPCRequestException(
f"Attribute {attr_key.name} must be requested as {NORMALIZED_COLUMNS[attr_key.name]}, got {attr_key.type}"
)

# End of special handling, just send to the appropriate bucket
if attr_key.type == AttributeKey.Type.TYPE_STRING:
return SubscriptableReference(
alias=alias, column=column("attr_str"), key=literal(attr_key.name)
)
if attr_key.type == AttributeKey.Type.TYPE_FLOAT:
return SubscriptableReference(
alias=alias, column=column("attr_num"), key=literal(attr_key.name)
alias=alias, column=column("attr_f64"), key=literal(attr_key.name)
)
if attr_key.type == AttributeKey.Type.TYPE_INT:
return f.CAST(
SubscriptableReference(
alias=None, column=column("attr_num"), key=literal(attr_key.name)
alias=None, column=column("attr_i64"), key=literal(attr_key.name)
),
"Int64",
alias=alias,
Expand All @@ -160,7 +102,7 @@ def _build_label_mapping_key(attr_key: AttributeKey) -> str:
return f.CAST(
SubscriptableReference(
alias=None,
column=column("attr_num"),
column=column("attr_f64"),
key=literal(attr_key.name),
),
"Boolean",
Expand Down Expand Up @@ -209,20 +151,19 @@ def apply_virtual_columns(
mapped_column_to_context = {c.to_column_name: c for c in virtual_column_contexts}

def transform_expressions(expression: Expression) -> Expression:
# virtual columns will show up as `attr_str[virtual_column_name]` or `attr_num[virtual_column_name]`
# virtual columns will show up as `attr_str[virtual_column_name]`
if not isinstance(expression, SubscriptableReference):
return expression

if expression.column.column_name != "attr_str":
return expression

context = mapped_column_to_context.get(str(expression.key.value))
if context:
attribute_expression = attribute_key_to_expression(
AttributeKey(
name=context.from_column_name,
type=NORMALIZED_COLUMNS.get(
context.from_column_name, AttributeKey.TYPE_STRING
),
type=AttributeKey.TYPE_STRING,
)
)
return f.transform(
Expand Down Expand Up @@ -340,8 +281,6 @@ def trace_item_filters_to_expression(item_filter: TraceItemFilter) -> Expression

if item_filter.HasField("exists_filter"):
k = item_filter.exists_filter.key
if k.name in NORMALIZED_COLUMNS.keys():
return f.isNotNull(column(k.name))
if k.type == AttributeKey.Type.TYPE_STRING:
return f.mapContains(column("attr_str"), literal(k.name))
else:
Expand All @@ -363,6 +302,24 @@ def project_id_and_org_conditions(meta: RequestMeta) -> Expression:
)


def entity_key_from_trace_item_name(name: TraceItemName.ValueType) -> EntityKey:
# TODO type is not always specified, fix that then delete this
if name == TraceItemName.TRACE_ITEM_NAME_UNSPECIFIED:
return EntityKey("eap_spans_rpc")
if name == TraceItemName.TRACE_ITEM_NAME_EAP_SPANS:
return EntityKey("eap_spans_rpc")
raise BadSnubaRPCRequestException(f"unknown trace item type: ${name}")


def entity_from_trace_item_name(name: TraceItemName.ValueType) -> Entity:
entity_key = entity_key_from_trace_item_name(name)
return Entity(
key=entity_key,
schema=get_entity(entity_key).get_data_model(),
sample=None,
)


def timestamp_in_range_condition(start_ts: int, end_ts: int) -> Expression:
return and_cond(
f.less(
Expand Down
11 changes: 2 additions & 9 deletions snuba/web/rpc/v1/endpoint_time_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,8 @@

from snuba.attribution.appid import AppID
from snuba.attribution.attribution_info import AttributionInfo
from snuba.datasets.entities.entity_key import EntityKey
from snuba.datasets.entities.factory import get_entity
from snuba.datasets.pluggable_dataset import PluggableDataset
from snuba.query import OrderBy, OrderByDirection, SelectedExpression
from snuba.query.data_source.simple import Entity
from snuba.query.dsl import Functions as f
from snuba.query.dsl import column
from snuba.query.logical import Query
Expand All @@ -38,6 +35,7 @@
from snuba.web.rpc.common.common import (
attribute_key_to_expression,
base_conditions_and,
entity_from_trace_item_name,
trace_item_filters_to_expression,
treeify_or_and_conditions,
)
Expand Down Expand Up @@ -188,12 +186,7 @@ def _convert_result_timeseries(


def _build_query(request: TimeSeriesRequest) -> Query:
# TODO: This is hardcoded still
entity = Entity(
key=EntityKey("eap_spans"),
schema=get_entity(EntityKey("eap_spans")).get_data_model(),
sample=None,
)
entity = entity_from_trace_item_name(request.meta.trace_item_name)

aggregation_columns = [
SelectedExpression(
Expand Down
11 changes: 2 additions & 9 deletions snuba/web/rpc/v1/endpoint_trace_item_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,8 @@

from snuba.attribution.appid import AppID
from snuba.attribution.attribution_info import AttributionInfo
from snuba.datasets.entities.entity_key import EntityKey
from snuba.datasets.entities.factory import get_entity
from snuba.datasets.pluggable_dataset import PluggableDataset
from snuba.query import OrderBy, OrderByDirection, SelectedExpression
from snuba.query.data_source.simple import Entity
from snuba.query.logical import Query
from snuba.query.query_settings import HTTPQuerySettings
from snuba.request import Request as SnubaRequest
Expand All @@ -41,6 +38,7 @@
apply_virtual_columns,
attribute_key_to_expression,
base_conditions_and,
entity_from_trace_item_name,
trace_item_filters_to_expression,
treeify_or_and_conditions,
)
Expand Down Expand Up @@ -79,12 +77,7 @@ def _convert_order_by(


def _build_query(request: TraceItemTableRequest) -> Query:
# TODO: This is hardcoded still
entity = Entity(
key=EntityKey("eap_spans"),
schema=get_entity(EntityKey("eap_spans")).get_data_model(),
sample=None,
)
entity = entity_from_trace_item_name(request.meta.trace_item_name)

selected_columns = []
for column in request.columns:
Expand Down

0 comments on commit d11082b

Please sign in to comment.