diff --git a/CHANGELOG.md b/CHANGELOG.md index aa45b9358..1b2c01b96 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,6 +31,8 @@ - `intelmq.bots.parsers.dataplane.parser`: Use ` | ` as field delimiter, fix parsing of AS names including `|` (PR#2488 by DigitalTrustCenter). #### Experts +- `intelmq.bots.experts.sieve.expert`: + - For `:contains`, `=~` and `!~`, convert the value to string before matching avoiding an exception. If the value is a dict, convert the value to JSON (PR#2500 by Sebastian Wagner). #### Outputs - `intelmq.bots.outputs.misp.output_feed`: handle failures if saved current event wasn't saved or is incorrect (PR by Kamil Mankowski). diff --git a/docs/user/bots.md b/docs/user/bots.md index 2c83de069..459d4d584 100644 --- a/docs/user/bots.md +++ b/docs/user/bots.md @@ -3582,10 +3582,12 @@ if :exists source.fqdn { ... } if feed.name != 'acme-security' || feed.accuracy == 100 || extra.false_positive == false { ... } ``` -- `:contains` matches on substrings. +- `:contains` matches on substrings ([`str.find`](https://docs.python.org/3/library/stdtypes.html#str.find)). - `=~` matches strings based on the given regular expression. `!~` is the inverse regular expression match. +- For `:contains`, `=~` and `!~`, the value is converted to string before matching. If the value is a dict, convert the value to JSON. + - Numerical comparisons are evaluated with `<`, `<=`, `>`, `>=`. - `<<` matches if an IP address is contained in the specified network range: diff --git a/intelmq/bots/experts/sieve/expert.py b/intelmq/bots/experts/sieve/expert.py index af7cbaf1c..9bd723e7a 100644 --- a/intelmq/bots/experts/sieve/expert.py +++ b/intelmq/bots/experts/sieve/expert.py @@ -16,6 +16,7 @@ import operator from datetime import datetime, timedelta, timezone +from json import dumps from typing import Callable, Dict, Optional, Union from enum import Enum, auto @@ -272,7 +273,14 @@ def process_single_string_match(self, key, op, value, event) -> bool: if key not in event: return op in {'!=', '!~'} - return self._string_op_map[op](event[key], value.value) + lhs = event[key] + if not isinstance(lhs, str) and op not in ('==', '!='): + if isinstance(lhs, dict): + lhs = dumps(lhs) + else: + lhs = str(lhs) + + return self._string_op_map[op](lhs, value.value) def process_multi_string_match(self, key, op, value, event) -> bool: if key not in event: diff --git a/intelmq/tests/bots/experts/sieve/test_expert.py b/intelmq/tests/bots/experts/sieve/test_expert.py index 6241aa60e..caf23b03c 100644 --- a/intelmq/tests/bots/experts/sieve/test_expert.py +++ b/intelmq/tests/bots/experts/sieve/test_expert.py @@ -1738,6 +1738,14 @@ def test_empty_list(self): self.run_bot() self.assertMessageEqual(0, expected) + def test_extra_dict(self): + self.sysconfig['file'] = os.path.join(os.path.dirname(__file__), 'test_sieve_files/test_extra_dict.sieve') + event = EXAMPLE_INPUT.copy() + event['extra.some_dict'] = {'key': []} + self.input_message = event + self.run_bot() + self.assertOutputQueueLen(0) + if __name__ == '__main__': # pragma: no cover unittest.main() diff --git a/intelmq/tests/bots/experts/sieve/test_sieve_files/test_extra_dict.sieve b/intelmq/tests/bots/experts/sieve/test_sieve_files/test_extra_dict.sieve new file mode 100644 index 000000000..5eb50e8ac --- /dev/null +++ b/intelmq/tests/bots/experts/sieve/test_sieve_files/test_extra_dict.sieve @@ -0,0 +1,11 @@ +// '{"extra.some_dict": { "key": [] }}' + +if :notexists extra.some_dict { + drop +} +if extra.some_dict !~ '"key": ' { + drop +} +if extra.some_dict =~ '"key": \[\]' { + drop +}