-
Notifications
You must be signed in to change notification settings - Fork 100
feat(query-engine): add hash functions fnv, murmur3, md5, sha1, sha512, xxh3, xxh128 #2887
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 7 commits
c7189ac
4825ba5
582c3a0
ac1934c
3cc8c6f
ff4d511
99b1b1a
0a15f0f
fbb6e66
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4805,6 +4805,306 @@ mod test { | |
| test_update_attr_to_hash_function_call_result_all_supported_types::<KqlParser>().await | ||
| } | ||
|
|
||
| async fn test_update_attr_to_md5_function_call_result<P: Parser>() { | ||
| let logs_data = to_logs_data(vec![ | ||
| LogRecord::build() | ||
| .attributes(vec![ | ||
| KeyValue::new("str_attr", AnyValue::new_string("y")), | ||
| KeyValue::new("binary_attr", AnyValue::new_bytes(b"418")), | ||
| ]) | ||
| .finish(), | ||
| ]); | ||
| let query = r#"logs | extend | ||
| attributes["str_attr"] = encode(md5(attributes["str_attr"]), "hex"), | ||
| attributes["binary_attr"] = encode(md5(attributes["binary_attr"]), "hex") | ||
| "#; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Related to the
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is correct - but will need to add change from this comment for this test to pass
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done, I changed the return type to |
||
| let pipeline_expr = P::parse_with_options(query, default_parser_options()) | ||
| .unwrap() | ||
| .pipeline; | ||
| let mut pipeline = Pipeline::new(pipeline_expr); | ||
| let input = otlp_to_otap(&OtlpProtoMessage::Logs(logs_data)); | ||
| let result = pipeline.execute(input).await.unwrap(); | ||
| let OtlpProtoMessage::Logs(result_logs_data) = otap_to_otlp(&result) else { | ||
| panic!("invalid signal type"); | ||
| }; | ||
| let log_0 = &result_logs_data.resource_logs[0].scope_logs[0].log_records[0]; | ||
| assert_eq!( | ||
| log_0.attributes, | ||
| vec![ | ||
| KeyValue::new( | ||
| "str_attr", | ||
| AnyValue::new_string("a6105c0a611b41b08f1209506350279e") | ||
| ), | ||
| KeyValue::new( | ||
| "binary_attr", | ||
| AnyValue::new_string("0ffe9bcd5a3d234d4e99e9a1fb9a5d2c") | ||
| ) | ||
| ] | ||
| ); | ||
| } | ||
|
|
||
| #[tokio::test] | ||
| async fn test_update_attr_to_md5_function_call_result_opl_parser() { | ||
| test_update_attr_to_md5_function_call_result::<OplParser>().await | ||
| } | ||
| #[tokio::test] | ||
| async fn test_update_attr_to_md5_function_call_result_kql_parser() { | ||
| test_update_attr_to_md5_function_call_result::<KqlParser>().await | ||
| } | ||
|
|
||
| async fn test_update_attr_to_fnv_hash_function_call_result<P: Parser>() { | ||
| let logs_data = to_logs_data(vec![ | ||
| LogRecord::build() | ||
| .attributes(vec![KeyValue::new( | ||
| "str_attr", | ||
| AnyValue::new_string("hello"), | ||
| )]) | ||
| .finish(), | ||
| ]); | ||
|
|
||
| // fnv returns an Int64 directly - no encode() wrapper needed | ||
| let query = r#"logs | extend attributes["str_attr"] = fnv(attributes["str_attr"])"#; | ||
| let pipeline_expr = P::parse_with_options(query, default_parser_options()) | ||
| .unwrap() | ||
| .pipeline; | ||
| let mut pipeline = Pipeline::new(pipeline_expr); | ||
|
|
||
| let input = otlp_to_otap(&OtlpProtoMessage::Logs(logs_data)); | ||
| let result = pipeline.execute(input).await.unwrap(); | ||
| let OtlpProtoMessage::Logs(result_logs_data) = otap_to_otlp(&result) else { | ||
| panic!("invalid signal type"); | ||
| }; | ||
| let log_0 = &result_logs_data.resource_logs[0].scope_logs[0].log_records[0]; | ||
| assert_eq!( | ||
| log_0.attributes, | ||
| vec![KeyValue::new( | ||
| "str_attr", | ||
| // FNV-1a 64-bit of "hello" interpreted as i64 | ||
| AnyValue::new_int(-6615550055289275125_i64) | ||
| )] | ||
| ); | ||
| } | ||
|
|
||
| #[tokio::test] | ||
| async fn test_update_attr_to_fnv_hash_function_call_result_opl_parser() { | ||
| test_update_attr_to_fnv_hash_function_call_result::<OplParser>().await | ||
| } | ||
|
|
||
| #[tokio::test] | ||
| async fn test_update_attr_to_fnv_hash_function_call_result_kql_parser() { | ||
| test_update_attr_to_fnv_hash_function_call_result::<KqlParser>().await | ||
| } | ||
|
|
||
| async fn test_update_attr_to_murmur3_hash_function_call_result<P: Parser>() { | ||
| let logs_data = to_logs_data(vec![ | ||
| LogRecord::build() | ||
| .attributes(vec![KeyValue::new( | ||
| "str_attr", | ||
| AnyValue::new_string("hello"), | ||
| )]) | ||
| .finish(), | ||
| ]); | ||
|
|
||
| // murmur3 returns an Int64 directly - no encode() wrapper needed | ||
| let query = r#"logs | extend attributes["str_attr"] = murmur3(attributes["str_attr"])"#; | ||
| let pipeline_expr = P::parse_with_options(query, default_parser_options()) | ||
| .unwrap() | ||
| .pipeline; | ||
| let mut pipeline = Pipeline::new(pipeline_expr); | ||
|
|
||
| let input = otlp_to_otap(&OtlpProtoMessage::Logs(logs_data)); | ||
| let result = pipeline.execute(input).await.unwrap(); | ||
| let OtlpProtoMessage::Logs(result_logs_data) = otap_to_otlp(&result) else { | ||
| panic!("invalid signal type"); | ||
| }; | ||
| let log_0 = &result_logs_data.resource_logs[0].scope_logs[0].log_records[0]; | ||
| assert_eq!( | ||
| log_0.attributes, | ||
| vec![KeyValue::new( | ||
| "str_attr", | ||
| // MurmurHash3 32-bit of "hello" with seed=0 | ||
| AnyValue::new_int(613_153_351_i64) | ||
| )] | ||
| ); | ||
| } | ||
|
|
||
| #[tokio::test] | ||
| async fn test_update_attr_to_murmur3_hash_function_call_result_opl_parser() { | ||
| test_update_attr_to_murmur3_hash_function_call_result::<OplParser>().await | ||
| } | ||
|
|
||
| #[tokio::test] | ||
| async fn test_update_attr_to_murmur3_hash_function_call_result_kql_parser() { | ||
| test_update_attr_to_murmur3_hash_function_call_result::<KqlParser>().await | ||
| } | ||
|
|
||
| async fn test_update_attr_to_sha1_hash_function_call_result<P: Parser>() { | ||
| let logs_data = to_logs_data(vec![ | ||
| LogRecord::build() | ||
| .attributes(vec![KeyValue::new( | ||
| "str_attr", | ||
| AnyValue::new_string("hello"), | ||
| )]) | ||
| .finish(), | ||
| ]); | ||
|
|
||
| let query = | ||
| r#"logs | extend attributes["str_attr"] = encode(sha1(attributes["str_attr"]), "hex")"#; | ||
| let pipeline_expr = P::parse_with_options(query, default_parser_options()) | ||
| .unwrap() | ||
| .pipeline; | ||
| let mut pipeline = Pipeline::new(pipeline_expr); | ||
|
|
||
| let input = otlp_to_otap(&OtlpProtoMessage::Logs(logs_data)); | ||
| let result = pipeline.execute(input).await.unwrap(); | ||
| let OtlpProtoMessage::Logs(result_logs_data) = otap_to_otlp(&result) else { | ||
| panic!("invalid signal type"); | ||
| }; | ||
| let log_0 = &result_logs_data.resource_logs[0].scope_logs[0].log_records[0]; | ||
| assert_eq!( | ||
| log_0.attributes, | ||
| vec![KeyValue::new( | ||
| "str_attr", | ||
| AnyValue::new_string("aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d") | ||
| )] | ||
| ); | ||
| } | ||
|
|
||
| #[tokio::test] | ||
| async fn test_update_attr_to_sha1_hash_function_call_result_opl_parser() { | ||
| test_update_attr_to_sha1_hash_function_call_result::<OplParser>().await | ||
| } | ||
|
|
||
| #[tokio::test] | ||
| async fn test_update_attr_to_sha1_hash_function_call_result_kql_parser() { | ||
| test_update_attr_to_sha1_hash_function_call_result::<KqlParser>().await | ||
| } | ||
|
|
||
| async fn test_update_attr_to_sha512_hash_function_call_result<P: Parser>() { | ||
| let logs_data = to_logs_data(vec![ | ||
| LogRecord::build() | ||
| .attributes(vec![KeyValue::new( | ||
| "str_attr", | ||
| AnyValue::new_string("hello"), | ||
| )]) | ||
| .finish(), | ||
| ]); | ||
|
|
||
| let query = r#"logs | extend attributes["str_attr"] = encode(sha512(attributes["str_attr"]), "hex")"#; | ||
| let pipeline_expr = P::parse_with_options(query, default_parser_options()) | ||
| .unwrap() | ||
| .pipeline; | ||
| let mut pipeline = Pipeline::new(pipeline_expr); | ||
|
|
||
| let input = otlp_to_otap(&OtlpProtoMessage::Logs(logs_data)); | ||
| let result = pipeline.execute(input).await.unwrap(); | ||
| let OtlpProtoMessage::Logs(result_logs_data) = otap_to_otlp(&result) else { | ||
| panic!("invalid signal type"); | ||
| }; | ||
| let log_0 = &result_logs_data.resource_logs[0].scope_logs[0].log_records[0]; | ||
| assert_eq!( | ||
| log_0.attributes, | ||
| vec![KeyValue::new( | ||
| "str_attr", | ||
| AnyValue::new_string( | ||
| "9b71d224bd62f3785d96d46ad3ea3d73319bfbc2890caadae2dff72519673ca72323c3d99ba5c11d7c7acc6e14b8c5da0c4663475c2e5c3adef46f73bcdec043" | ||
| ) | ||
| )] | ||
| ); | ||
| } | ||
|
|
||
| #[tokio::test] | ||
| async fn test_update_attr_to_sha512_hash_function_call_result_opl_parser() { | ||
| test_update_attr_to_sha512_hash_function_call_result::<OplParser>().await | ||
| } | ||
|
|
||
| #[tokio::test] | ||
| async fn test_update_attr_to_sha512_hash_function_call_result_kql_parser() { | ||
| test_update_attr_to_sha512_hash_function_call_result::<KqlParser>().await | ||
| } | ||
|
|
||
| async fn test_update_attr_to_xxh3_hash_function_call_result<P: Parser>() { | ||
| let logs_data = to_logs_data(vec![ | ||
| LogRecord::build() | ||
| .attributes(vec![KeyValue::new( | ||
| "str_attr", | ||
| AnyValue::new_string("hello"), | ||
| )]) | ||
| .finish(), | ||
| ]); | ||
|
|
||
| let query = r#"logs | extend attributes["str_attr"] = xxh3(attributes["str_attr"])"#; | ||
| let pipeline_expr = P::parse_with_options(query, default_parser_options()) | ||
| .unwrap() | ||
| .pipeline; | ||
| let mut pipeline = Pipeline::new(pipeline_expr); | ||
|
|
||
| let input = otlp_to_otap(&OtlpProtoMessage::Logs(logs_data)); | ||
| let result = pipeline.execute(input).await.unwrap(); | ||
| let OtlpProtoMessage::Logs(result_logs_data) = otap_to_otlp(&result) else { | ||
| panic!("invalid signal type"); | ||
| }; | ||
| let log_0 = &result_logs_data.resource_logs[0].scope_logs[0].log_records[0]; | ||
| assert_eq!( | ||
| log_0.attributes, | ||
| vec![KeyValue::new( | ||
| "str_attr", | ||
| AnyValue::new_int(-7685981735718036227_i64) | ||
| )] | ||
| ); | ||
| } | ||
|
|
||
| #[tokio::test] | ||
| async fn test_update_attr_to_xxh3_hash_function_call_result_opl_parser() { | ||
| test_update_attr_to_xxh3_hash_function_call_result::<OplParser>().await | ||
| } | ||
|
|
||
| #[tokio::test] | ||
| async fn test_update_attr_to_xxh3_hash_function_call_result_kql_parser() { | ||
| test_update_attr_to_xxh3_hash_function_call_result::<KqlParser>().await | ||
| } | ||
|
|
||
| async fn test_update_attr_to_xxh128_hash_function_call_result<P: Parser>() { | ||
| let logs_data = to_logs_data(vec![ | ||
| LogRecord::build() | ||
| .attributes(vec![KeyValue::new( | ||
| "str_attr", | ||
| AnyValue::new_string("hello"), | ||
| )]) | ||
| .finish(), | ||
| ]); | ||
|
|
||
| let query = r#"logs | extend attributes["str_attr"] = encode(xxh128(attributes["str_attr"]), "hex")"#; | ||
| let pipeline_expr = P::parse_with_options(query, default_parser_options()) | ||
| .unwrap() | ||
| .pipeline; | ||
| let mut pipeline = Pipeline::new(pipeline_expr); | ||
|
|
||
| let input = otlp_to_otap(&OtlpProtoMessage::Logs(logs_data)); | ||
| let result = pipeline.execute(input).await.unwrap(); | ||
| let OtlpProtoMessage::Logs(result_logs_data) = otap_to_otlp(&result) else { | ||
| panic!("invalid signal type"); | ||
| }; | ||
| let log_0 = &result_logs_data.resource_logs[0].scope_logs[0].log_records[0]; | ||
| assert_eq!( | ||
| log_0.attributes, | ||
| vec![KeyValue::new( | ||
| "str_attr", | ||
| AnyValue::new_string("b5e9c1ad071b3e7fc779cfaa5e523818") | ||
| )] | ||
| ); | ||
| } | ||
|
|
||
| #[tokio::test] | ||
| async fn test_update_attr_to_xxh128_hash_function_call_result_opl_parser() { | ||
| test_update_attr_to_xxh128_hash_function_call_result::<OplParser>().await | ||
| } | ||
|
|
||
| #[tokio::test] | ||
| async fn test_update_attr_to_xxh128_hash_function_call_result_kql_parser() { | ||
| test_update_attr_to_xxh128_hash_function_call_result::<KqlParser>().await | ||
| } | ||
|
|
||
| async fn test_update_attr_to_substring_function_call_result<P: Parser>() { | ||
| let logs_data = to_logs_data(vec![ | ||
| LogRecord::build() | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we avoid making
sha1a default dependency here, or gate just thesha1()function behind a feature?otap-df-query-engineis pulled into the normaldf_enginebuild through core-nodes/transform-processor, so this makes SHA-1 part of the default engine dependency graph. Also, the workspacesha1dependency enablesoid, which does not look used by this implementation.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
for reference, the concerns on sha-1 - #2827
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I gated
sha1behind ansha1-hashcargo feature. The module, UDF registration, parser, and tests are all behind#[cfg(feature = "sha1-hash")]Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@SzymonIwaniuk @lalitb That could be done in a future PR but I’m slightly concerned by the growing number of SHA-1 related features. While the current usages are valid (e.g. WebSocket protocol compatibility or non-cryptographic hash functions in OPL), multiplying fine-grained features could make the build matrix and dependency story harder to reason about over time.
I’d recommend converging toward a single high-level compatibility feature (e.g. sha1-compat) controlling all SHA-1 usage globally, instead of component-specific flags. Internally, all SHA-1 usage should go through a shared utility module with explicit documentation clarifying that it is used for protocol compatibility/non-security purposes only.
Please open a GH issue to track this if not integrated in this PR. Thanks.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@lquerel @lalitb I agree with the consolidation of SHA-1 related feature flags. I think this would be better addressed in a follow-up issue rather than in this PR, as it involves changes across multiple components. Feel free to assign it to me once the issue is created after this PR. @lalitb I'd like to know what you think about this approach, would that work for you?