Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement filter offset for attribute values API #6667

Merged
merged 6 commits into from
Jan 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions snuba/web/rpc/common/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,3 +390,18 @@ def base_conditions_and(meta: RequestMeta, *other_exprs: Expression) -> Expressi
),
*other_exprs,
)


def convert_filter_offset(filter_offset: TraceItemFilter) -> Expression:
if not filter_offset.HasField("comparison_filter"):
raise TypeError("filter_offset needs to be a comparison filter")
if filter_offset.comparison_filter.op != ComparisonFilter.OP_GREATER_THAN:
raise TypeError("filter_offset must use the greater than comparison")

k_expression = column(filter_offset.comparison_filter.key.name)
v = filter_offset.comparison_filter.value
value_type = v.WhichOneof("value")
if value_type != "val_str":
raise BadSnubaRPCRequestException("please provide a string for filter offset")

return f.greater(k_expression, literal(v.val_str))
28 changes: 7 additions & 21 deletions snuba/web/rpc/v1/endpoint_trace_item_attribute_names.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,40 +21,26 @@
from snuba.query import OrderBy, OrderByDirection, SelectedExpression
from snuba.query.data_source.simple import Entity
from snuba.query.dsl import Functions as f
from snuba.query.dsl import column, literal
from snuba.query.expressions import Expression
from snuba.query.dsl import column
from snuba.query.logical import Query
from snuba.query.query_settings import HTTPQuerySettings
from snuba.reader import Row
from snuba.request import Request as SnubaRequest
from snuba.web import QueryResult
from snuba.web.query import run_query
from snuba.web.rpc import RPCEndpoint
from snuba.web.rpc.common.common import base_conditions_and, treeify_or_and_conditions
from snuba.web.rpc.common.common import (
base_conditions_and,
convert_filter_offset,
treeify_or_and_conditions,
)
from snuba.web.rpc.common.debug_info import extract_response_meta
from snuba.web.rpc.common.exceptions import BadSnubaRPCRequestException

# max value the user can provide for 'limit' in their request
MAX_REQUEST_LIMIT = 1000


def _convert_filter_offset(filter_offset: TraceItemFilter) -> Expression:
if not filter_offset.HasField("comparison_filter"):
raise TypeError("filter_offset needs to be a comparison filter")
if filter_offset.comparison_filter.op != ComparisonFilter.OP_GREATER_THAN:
raise TypeError("filter_offset must use the greater than comparison")

k_expression = column(filter_offset.comparison_filter.key.name)
v = filter_offset.comparison_filter.value
value_type = v.WhichOneof("value")
if value_type != "val_str":
raise BadSnubaRPCRequestException(
"keys are strings, so please provide a string filter"
)

return f.greater(k_expression, literal(v.val_str))


def convert_to_snuba_request(req: TraceItemAttributeNamesRequest) -> SnubaRequest:
if req.limit > MAX_REQUEST_LIMIT:
raise BadSnubaRPCRequestException(
Expand Down Expand Up @@ -86,7 +72,7 @@ def convert_to_snuba_request(req: TraceItemAttributeNamesRequest) -> SnubaReques
base_conditions_and(
req.meta,
f.like(column("attr_key"), f"%{req.value_substring_match}%"),
_convert_filter_offset(req.page_token.filter_offset),
convert_filter_offset(req.page_token.filter_offset),
)
if req.page_token.HasField("filter_offset")
else base_conditions_and(
Expand Down
94 changes: 68 additions & 26 deletions snuba/web/rpc/v1/trace_item_attribute_values.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@
TraceItemAttributeValuesResponse,
)
from sentry_protos.snuba.v1.request_common_pb2 import PageToken
from sentry_protos.snuba.v1.trace_item_attribute_pb2 import AttributeKey, AttributeValue
from sentry_protos.snuba.v1.trace_item_filter_pb2 import (
ComparisonFilter,
TraceItemFilter,
)

from snuba.attribution.appid import AppID
from snuba.attribution.attribution_info import AttributionInfo
Expand All @@ -17,19 +22,60 @@
from snuba.query.data_source.simple import Entity
from snuba.query.dsl import Functions as f
from snuba.query.dsl import column, literal, literals_array
from snuba.query.expressions import Expression
from snuba.query.logical import Query
from snuba.query.query_settings import HTTPQuerySettings
from snuba.request import Request as SnubaRequest
from snuba.web.query import run_query
from snuba.web.rpc import RPCEndpoint
from snuba.web.rpc.common.common import (
base_conditions_and,
convert_filter_offset,
treeify_or_and_conditions,
truncate_request_meta_to_day,
)
from snuba.web.rpc.common.exceptions import BadSnubaRPCRequestException


def _build_base_conditions_and(request: TraceItemAttributeValuesRequest) -> Expression:
if request.value_substring_match is not None:
return (
base_conditions_and(
request.meta,
f.equals(column("attr_key"), literal(request.key.name)),
# multiSearchAny has special treatment with ngram bloom filters
# https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#functions-support
f.multiSearchAny(
column("attr_value"),
literals_array(None, [literal(request.value_substring_match)]),
),
convert_filter_offset(request.page_token.filter_offset),
)
if request.page_token.HasField("filter_offset")
else base_conditions_and(
request.meta,
f.equals(column("attr_key"), literal(request.key.name)),
f.multiSearchAny(
column("attr_value"),
literals_array(None, [literal(request.value_substring_match)]),
),
)
)
else:
return (
base_conditions_and(
request.meta,
f.equals(column("attr_key"), literal(request.key.name)),
convert_filter_offset(request.page_token.filter_offset),
)
if request.page_token.HasField("filter_offset")
else base_conditions_and(
request.meta,
f.equals(column("attr_key"), literal(request.key.name)),
)
)


def _build_query(request: TraceItemAttributeValuesRequest) -> Query:
if request.limit > 1000:
raise BadSnubaRPCRequestException("Limit can be at most 1000")
Expand All @@ -50,26 +96,8 @@ def _build_query(request: TraceItemAttributeValuesRequest) -> Query:
expression=f.distinct(column("attr_value", alias="attr_value")),
),
],
condition=base_conditions_and(
request.meta,
f.equals(column("attr_key"), literal(request.key.name)),
# multiSearchAny has special treatment with ngram bloom filters
# https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#functions-support
f.multiSearchAny(
column("attr_value"),
literals_array(None, [literal(request.value_substring_match)]),
),
)
if request.value_substring_match is not None
else base_conditions_and(
request.meta,
f.equals(column("attr_key"), literal(request.key.name)),
),
condition=_build_base_conditions_and(request),
order_by=[
OrderBy(
xurui-c marked this conversation as resolved.
Show resolved Hide resolved
direction=OrderByDirection.ASC, expression=column("organization_id")
),
OrderBy(direction=OrderByDirection.ASC, expression=column("attr_key")),
OrderBy(direction=OrderByDirection.ASC, expression=column("attr_value")),
],
limit=request.limit,
Expand Down Expand Up @@ -115,20 +143,34 @@ def request_class(cls) -> Type[TraceItemAttributeValuesRequest]:
def _execute(
self, in_msg: TraceItemAttributeValuesRequest
) -> TraceItemAttributeValuesResponse:
if not in_msg.HasField("page_token"):
in_msg.page_token.offset = 0
if in_msg.page_token.HasField("filter_offset"):
raise NotImplementedError(
"TraceItemAttributeValues does not currently support page_token.filter_offset, please use page_token.offset instead."
)
snuba_request = _build_snuba_request(in_msg)
res = run_query(
dataset=PluggableDataset(name="eap", all_entities=[]),
request=snuba_request,
timer=self._timer,
)
values = [r["attr_value"] for r in res.result.get("data", [])]
if len(values) == 0:
return TraceItemAttributeValuesResponse(
values=values,
page_token=None,
)

return TraceItemAttributeValuesResponse(
values=values,
page_token=PageToken(offset=in_msg.page_token.offset + len(values)),
page_token=(
PageToken(offset=in_msg.page_token.offset + len(values))
if in_msg.page_token.HasField("offset") or len(values) == 0
else PageToken(
filter_offset=TraceItemFilter(
comparison_filter=ComparisonFilter(
key=AttributeKey(
type=AttributeKey.TYPE_STRING, name="attr_value"
),
op=ComparisonFilter.OP_GREATER_THAN,
value=AttributeValue(val_str=values[-1]),
)
)
)
),
)
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,7 @@ def test_page_token_offset_filter(self) -> None:
)
res = EndpointTraceItemAttributeNames().execute(req)
page_token = res.page_token
assert res.page_token.WhichOneof("value") == "filter_offset"
assert res.attributes == expected_attributes[:at_a_time]
expected_attributes = expected_attributes[at_a_time:]
done += at_a_time
Expand Down
62 changes: 50 additions & 12 deletions tests/web/rpc/v1/test_trace_item_attribute_values_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,8 @@
from sentry_protos.snuba.v1.endpoint_trace_item_attributes_pb2 import (
TraceItemAttributeValuesRequest,
)
from sentry_protos.snuba.v1.request_common_pb2 import PageToken, RequestMeta
from sentry_protos.snuba.v1.request_common_pb2 import RequestMeta
from sentry_protos.snuba.v1.trace_item_attribute_pb2 import AttributeKey
from sentry_protos.snuba.v1.trace_item_filter_pb2 import TraceItemFilter

from snuba.datasets.storages.factory import get_storage
from snuba.datasets.storages.storage_key import StorageKey
Expand Down Expand Up @@ -146,16 +145,36 @@ def test_page_token(self, setup_teardown: Any) -> None:
done += at_a_time
assert expected_values == []

def test_filter_offset_is_not_implemented_right_now(self) -> None:
req = TraceItemAttributeValuesRequest(
meta=COMMON_META,
limit=6,
key=AttributeKey(name="tag1", type=AttributeKey.TYPE_STRING),
value_substring_match="",
page_token=PageToken(filter_offset=TraceItemFilter()),
)
with pytest.raises(NotImplementedError):
AttributeValuesRequest().execute(req)
def test_page_token_filter_offset(self) -> None:
expected_values = [
"blah",
"derpderp",
"durp",
"herp",
"herpderp",
"some_last_value",
]
total_number_of_values = len(expected_values)

# grab 2 at a time until we get them all
done = 0
page_token = None
at_a_time = 2
while done < total_number_of_values:
req = TraceItemAttributeValuesRequest(
meta=COMMON_META,
limit=at_a_time,
key=AttributeKey(name="tag1", type=AttributeKey.TYPE_STRING),
value_substring_match="",
page_token=page_token,
)
res = AttributeValuesRequest().execute(req)
page_token = res.page_token
xurui-c marked this conversation as resolved.
Show resolved Hide resolved
assert res.page_token.WhichOneof("value") == "filter_offset"
assert res.values == expected_values[:at_a_time]
expected_values = expected_values[at_a_time:]
done += at_a_time
assert expected_values == []

def test_with_value_substring_match(self, setup_teardown: Any) -> None:
message = TraceItemAttributeValuesRequest(
Expand All @@ -166,3 +185,22 @@ def test_with_value_substring_match(self, setup_teardown: Any) -> None:
)
response = AttributeValuesRequest().execute(message)
assert response.values == ["derpderp", "herp", "herpderp"]

def test_empty_results(self) -> None:
req = TraceItemAttributeValuesRequest(
meta=RequestMeta(
project_ids=[1, 2, 3],
organization_id=1,
cogs_category="something",
referrer="something",
start_timestamp=Timestamp(
seconds=int((BASE_TIME - timedelta(days=1)).timestamp())
),
end_timestamp=Timestamp(
seconds=int((BASE_TIME + timedelta(days=1)).timestamp())
),
),
value_substring_match="this_definitely_doesnt_exist_93710",
)
res = AttributeValuesRequest().execute(req)
assert res.values == []
Loading