-
-
Notifications
You must be signed in to change notification settings - Fork 59
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(eap): Start decoupling EAP entities at the entity layer #6701
base: master
Are you sure you want to change the base?
Changes from all commits
2076df7
ceae449
46f9d6c
cc646b9
cc41bc3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
version: v1 | ||
kind: entity | ||
name: eap_spans_rpc | ||
|
||
schema: | ||
[ | ||
{ name: service, type: String }, | ||
{ name: trace_id, type: UUID }, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should be a required field given the API we have. |
||
{ name: span_id, type: UInt, args: { size: 64 } }, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should have some sort of a |
||
{ name: parent_span_id, type: UInt, args: { size: 64 } }, | ||
{ name: segment_id, type: UInt, args: { size: 64 } }, | ||
{ name: segment_name, type: String }, | ||
{ name: is_segment, type: UInt, args: { size: 8 } }, | ||
{ name: start_timestamp, type: DateTime64, args: { precision: 6 } }, | ||
{ name: end_timestamp, type: DateTime64, args: { precision: 6 } }, | ||
{ name: duration_ms, type: Float, args: { size: 64 } }, | ||
{ name: exclusive_time_ms, type: Float, args: { size: 64 } }, | ||
{ name: name, type: String }, | ||
|
||
# these are the required columns for an 'RPC entity' that can be used by EAP RPCs | ||
{ name: organization_id, type: UInt, args: { size: 64 } }, | ||
{ name: project_id, type: UInt, args: { size: 64 } }, | ||
{ name: time, type: DateTime }, # used by TimeSeriesProcessor | ||
{ name: timestamp, type: DateTime }, # mapped to _sort_timestamp | ||
Comment on lines
+23
to
+24
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there a way we do not have this duplication? I think one timestamp is enough for the required fields. |
||
{ name: retention_days, type: UInt, args: { size: 16 } }, | ||
{ name: sampling_factor, type: Float, args: { size: 64 } }, | ||
{ name: sampling_weight, type: UInt, args: { size: 64 } }, | ||
{ name: sign, type: Int, args: { size: 8 } }, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think this should be a required column as we're not using |
||
{ name: attr_str, type: Map, args: { key: { type: String }, value: { type: String } } }, | ||
{ name: attr_f64, type: Map, args: { key: { type: String }, value: { type: Float, args: { size: 64 } } } }, | ||
{ name: attr_i64, type: Map, args: { key: { type: String }, value: { type: Int, args: { size: 64 } } } }, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's leave things that don't exist yet out of it. |
||
] | ||
|
||
storages: | ||
- storage: eap_spans | ||
is_writable: true | ||
translation_mappers: | ||
columns: | ||
- mapper: ColumnToColumn | ||
args: | ||
from_table_name: null | ||
from_col_name: timestamp | ||
to_table_name: null | ||
to_col_name: _sort_timestamp | ||
|
||
subscriptables: | ||
- mapper: SubscriptableHashBucketMapper | ||
args: | ||
from_column_table: null | ||
from_column_name: attr_str | ||
to_col_table: null | ||
to_col_name: attr_str | ||
data_type: String | ||
normalized_columns: | ||
sentry.name: name | ||
sentry.service: service | ||
sentry.span_id: span_id | ||
sentry.parent_span_id: parent_span_id | ||
sentry.segment_id: segment_id | ||
sentry.segment_name: segment_name | ||
sentry.start_timestamp: start_timestamp | ||
sentry.end_timestamp: end_timestamp | ||
sentry.timestamp: _sort_timestamp | ||
- mapper: SubscriptableHashBucketMapper | ||
args: | ||
from_column_table: null | ||
from_column_name: attr_f64 | ||
to_col_table: null | ||
to_col_name: attr_num | ||
data_type: Float64 | ||
normalized_columns: | ||
sentry.exclusive_time_micro: exclusive_time_micro | ||
sentry.duration_micro: duration_micro | ||
- mapper: SubscriptableHashBucketMapper | ||
args: | ||
from_column_table: null | ||
from_column_name: attr_i64 | ||
to_col_table: null | ||
to_col_name: attr_num | ||
data_type: Int64 | ||
normalized_columns: | ||
sentry.organization_id: organization_id | ||
sentry.project_id: project_id | ||
|
||
storage_selector: | ||
selector: DefaultQueryStorageSelector | ||
|
||
query_processors: | ||
- processor: TimeSeriesProcessor | ||
args: | ||
time_group_columns: | ||
time: timestamp | ||
time_parse_columns: | ||
- start_timestamp | ||
- end_timestamp | ||
- processor: HashBucketFunctionTransformer | ||
args: | ||
hash_bucket_name_mapping: | ||
attr_str: attr_str | ||
attr_f64: attr_num | ||
attr_i64: attr_num | ||
- processor: OptionalAttributeAggregationTransformer | ||
args: | ||
attribute_column_names: | ||
- attr_num | ||
Comment on lines
+104
to
+105
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This might be my ignorance speaking but should this be |
||
aggregation_names: | ||
- sum | ||
- count | ||
- avg | ||
- avgWeighted | ||
- max | ||
- min | ||
- uniq | ||
curried_aggregation_names: | ||
- quantile | ||
- quantileTDigestWeighted | ||
|
||
validate_data_model: do_nothing # in order to reference aliased columns, we shouldn't validate columns purely based on the entity schema | ||
validators: | ||
- validator: EntityRequiredColumnValidator | ||
args: | ||
required_filter_columns: [organization_id] | ||
|
||
required_time_column: timestamp |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -146,6 +146,59 @@ def test_hash_bucket_tag_translation() -> None: | |
) | ||
|
||
|
||
def test_hash_bucket_normalized() -> None: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you also add unit tests for when:
|
||
mapper = SubscriptableHashBucketMapper( | ||
from_column_table=None, | ||
from_column_name="tags_str", | ||
to_col_table=None, | ||
to_col_name="tags_float", | ||
data_type="String", | ||
normalized_columns={"derp.hello": "some_column"}, | ||
) | ||
|
||
non_normalized_mapped = mapper.attempt_map( | ||
SubscriptableReference( | ||
"tags_str[z]", Column(None, None, "tags_str"), Literal(None, "z") | ||
), | ||
SnubaClickhouseMappingTranslator(TranslationMappers()), | ||
) | ||
|
||
normalized_mapped = mapper.attempt_map( | ||
SubscriptableReference( | ||
"tags_str[derp.hello]", | ||
Column(None, None, "tags_str"), | ||
Literal(None, "derp.hello"), | ||
), | ||
SnubaClickhouseMappingTranslator(TranslationMappers()), | ||
) | ||
|
||
assert non_normalized_mapped == FunctionCall( | ||
"tags_str[z]", | ||
"CAST", | ||
( | ||
FunctionCall( | ||
None, | ||
"arrayElement", | ||
( | ||
Column( | ||
None, | ||
None, | ||
f"tags_float_{fnv_1a(b'z') % constants.ATTRIBUTE_BUCKETS}", | ||
), | ||
Literal(None, "z"), | ||
), | ||
), | ||
Literal(None, "String"), | ||
), | ||
) | ||
|
||
assert normalized_mapped == FunctionCall( | ||
"tags_str[derp.hello]", | ||
"CAST", | ||
(Column(None, None, "some_column"), Literal(None, "String")), | ||
) | ||
|
||
|
||
def _get_nullable_expr(alias: str) -> FunctionCall: | ||
return FunctionCall( | ||
alias, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It appears that with the current implementation if
normalized_columns
is specified butdata_type
is not specified, then the code silently ignores it and falls into theelse
block returningarrayElement(...)
. If the expectation is thatdata_type
must be specified along withnormalized_columns
, then should we enforce that and return appropriate error so that the caller does not get unexpected behavior?