diff --git a/logprep/abc/processor.py b/logprep/abc/processor.py index 832e5d104..df199ac4b 100644 --- a/logprep/abc/processor.py +++ b/logprep/abc/processor.py @@ -12,7 +12,6 @@ from logprep.framework.rule_tree.rule_tree import RuleTree, RuleTreeType from logprep.metrics.metrics import Metric from logprep.processor.base.exceptions import ( - FieldExistsWarning, ProcessingCriticalError, ProcessingError, ProcessingWarning, @@ -386,7 +385,7 @@ def _write_target_field(self, event: dict, rule: "Rule", result: any) -> None: target_field=rule.target_field, content=result, extends_lists=rule.extend_target_list, - overwrite_output_field=rule.overwrite_target, + overwrite_target_field=rule.overwrite_target, ) def setup(self): diff --git a/logprep/processor/clusterer/processor.py b/logprep/processor/clusterer/processor.py index 914a8c1a5..d6c985d8d 100644 --- a/logprep/processor/clusterer/processor.py +++ b/logprep/processor/clusterer/processor.py @@ -143,7 +143,7 @@ def _cluster(self, event: dict, rule: ClustererRule): self._config.output_field_name, cluster_signature, extends_lists=rule.extend_target_list, - overwrite_output_field=rule.overwrite_target, + overwrite_target_field=rule.overwrite_target, ) self._last_non_extracted_signature = sig_text diff --git a/logprep/processor/dissector/processor.py b/logprep/processor/dissector/processor.py index 1da24a239..0b4b7bc1d 100644 --- a/logprep/processor/dissector/processor.py +++ b/logprep/processor/dissector/processor.py @@ -90,6 +90,6 @@ def _apply_convert_datatype(self, event, rule): for target_field, converter in rule.convert_actions: try: target_value = converter(get_dotted_field_value(event, target_field)) - add_field_to(event, target_field, target_value, overwrite_output_field=True) + add_field_to(event, target_field, target_value, overwrite_target_field=True) except ValueError as error: self._handle_warning_error(event, rule, error) diff --git a/logprep/processor/domain_label_extractor/processor.py b/logprep/processor/domain_label_extractor/processor.py index 342d74e4b..c4883035d 100644 --- a/logprep/processor/domain_label_extractor/processor.py +++ b/logprep/processor/domain_label_extractor/processor.py @@ -49,7 +49,7 @@ from logprep.processor.domain_label_extractor.rule import DomainLabelExtractorRule from logprep.processor.field_manager.processor import FieldManager from logprep.util.getter import GetterFactory -from logprep.util.helper import add_and_overwrite, get_dotted_field_value, add_batch_to +from logprep.util.helper import add_and_overwrite, add_batch_to, get_dotted_field_value from logprep.util.validators import list_of_urls_validator logger = logging.getLogger("DomainLabelExtractor") @@ -141,7 +141,7 @@ def _apply_rules(self, event, rule: DomainLabelExtractorRule): f"{rule.target_field}.subdomain", ] contents = [f"{labels.domain}.{labels.suffix}", labels.suffix, labels.subdomain] - add_batch_to(event, targets, contents, overwrite_output_field=rule.overwrite_target) + add_batch_to(event, targets, contents, overwrite_target_field=rule.overwrite_target) else: tagging_field.append(f"invalid_domain_in_{rule.source_fields[0].replace('.', '_')}") add_and_overwrite(event, self._config.tagging_field_name, tagging_field) diff --git a/logprep/processor/domain_resolver/processor.py b/logprep/processor/domain_resolver/processor.py index 5f3231d2b..f9b918d45 100644 --- a/logprep/processor/domain_resolver/processor.py +++ b/logprep/processor/domain_resolver/processor.py @@ -225,4 +225,4 @@ def _store_debug_infos(self, event, requires_storing): "obtained_from_cache": not requires_storing, "cache_size": len(self._domain_ip_map.keys()), } - add_field_to(event, "resolved_ip_debug", event_dbg, overwrite_output_field=True) + add_field_to(event, "resolved_ip_debug", event_dbg, overwrite_target_field=True) diff --git a/logprep/processor/generic_resolver/processor.py b/logprep/processor/generic_resolver/processor.py index 803639b12..c0b4eccb3 100644 --- a/logprep/processor/generic_resolver/processor.py +++ b/logprep/processor/generic_resolver/processor.py @@ -30,7 +30,7 @@ from logprep.processor.base.exceptions import FieldExistsWarning from logprep.processor.field_manager.processor import FieldManager from logprep.processor.generic_resolver.rule import GenericResolverRule -from logprep.util.helper import get_dotted_field_value, add_field_to +from logprep.util.helper import add_field_to, get_dotted_field_value class GenericResolver(FieldManager): @@ -64,7 +64,7 @@ def _apply_rules(self, event, rule): target_field, content, extends_lists=rule.extend_target_list, - overwrite_output_field=rule.overwrite_target, + overwrite_target_field=rule.overwrite_target, ) except FieldExistsWarning as error: conflicting_fields.extend(error.skipped_fields) diff --git a/logprep/processor/geoip_enricher/processor.py b/logprep/processor/geoip_enricher/processor.py index b83791218..e6afce2e4 100644 --- a/logprep/processor/geoip_enricher/processor.py +++ b/logprep/processor/geoip_enricher/processor.py @@ -41,7 +41,7 @@ from logprep.processor.field_manager.processor import FieldManager from logprep.processor.geoip_enricher.rule import GEOIP_DATA_STUBS, GeoipEnricherRule from logprep.util.getter import GetterFactory -from logprep.util.helper import get_dotted_field_value, add_batch_to +from logprep.util.helper import add_batch_to, get_dotted_field_value logger = logging.getLogger("GeoipEnricher") @@ -139,5 +139,5 @@ def _apply_rules(self, event, rule): targets, contents, extends_lists=False, - overwrite_output_field=rule.overwrite_target, + overwrite_target_field=rule.overwrite_target, ) diff --git a/logprep/processor/grokker/processor.py b/logprep/processor/grokker/processor.py index ad8872a2b..a54a5ab23 100644 --- a/logprep/processor/grokker/processor.py +++ b/logprep/processor/grokker/processor.py @@ -38,15 +38,11 @@ from attrs import define, field, validators -from logprep.processor.base.exceptions import ( - FieldExistsWarning, - ProcessingError, - ProcessingWarning, -) +from logprep.processor.base.exceptions import ProcessingError, ProcessingWarning from logprep.processor.field_manager.processor import FieldManager from logprep.processor.grokker.rule import GrokkerRule from logprep.util.getter import GetterFactory -from logprep.util.helper import add_field_to, get_dotted_field_value, add_batch_to +from logprep.util.helper import add_batch_to, get_dotted_field_value logger = logging.getLogger("Grokker") @@ -96,7 +92,7 @@ def _apply_rules(self, event: dict, rule: GrokkerRule): targets, contents, extends_lists=rule.extend_target_list, - overwrite_output_field=rule.overwrite_target, + overwrite_target_field=rule.overwrite_target, ) if self._handle_missing_fields(event, rule, rule.actions.keys(), source_values): return diff --git a/logprep/processor/hyperscan_resolver/processor.py b/logprep/processor/hyperscan_resolver/processor.py index 57eccb890..81d050952 100644 --- a/logprep/processor/hyperscan_resolver/processor.py +++ b/logprep/processor/hyperscan_resolver/processor.py @@ -39,11 +39,11 @@ from logprep.processor.base.exceptions import ( FieldExistsWarning, - SkipImportError, ProcessingCriticalError, + SkipImportError, ) from logprep.processor.field_manager.processor import FieldManager -from logprep.util.helper import get_dotted_field_value, add_field_to +from logprep.util.helper import add_field_to, get_dotted_field_value from logprep.util.validators import directory_validator # pylint: disable=no-name-in-module @@ -57,7 +57,6 @@ # pylint: disable=ungrouped-imports from logprep.processor.hyperscan_resolver.rule import HyperscanResolverRule - # pylint: enable=ungrouped-imports @@ -125,7 +124,7 @@ def _apply_rules(self, event: dict, rule: HyperscanResolverRule): resolve_target, dest_val, extends_lists=rule.extend_target_list, - overwrite_output_field=rule.overwrite_target, + overwrite_target_field=rule.overwrite_target, ) except FieldExistsWarning as error: conflicting_fields.extend(error.skipped_fields) diff --git a/logprep/processor/labeler/processor.py b/logprep/processor/labeler/processor.py index 452d3711a..d5b52bd77 100644 --- a/logprep/processor/labeler/processor.py +++ b/logprep/processor/labeler/processor.py @@ -33,10 +33,7 @@ from logprep.abc.processor import Processor from logprep.processor.labeler.labeling_schema import LabelingSchema from logprep.processor.labeler.rule import LabelerRule -from logprep.util.helper import ( - get_dotted_field_value, - add_batch_to, -) +from logprep.util.helper import add_batch_to, get_dotted_field_value class Labeler(Processor): @@ -81,4 +78,4 @@ def _apply_rules(self, event, rule): add_batch_to(event, targets, contents, extends_lists=True) # convert sets into sorted lists contents = [sorted(set(get_dotted_field_value(event, target))) for target in targets] - add_batch_to(event, targets, contents, overwrite_output_field=True) + add_batch_to(event, targets, contents, overwrite_target_field=True) diff --git a/logprep/processor/pseudonymizer/processor.py b/logprep/processor/pseudonymizer/processor.py index b4ec2159a..a117e83c7 100644 --- a/logprep/processor/pseudonymizer/processor.py +++ b/logprep/processor/pseudonymizer/processor.py @@ -264,7 +264,7 @@ def _apply_rules(self, event: dict, rule: PseudonymizerRule): ] else: field_value = self._pseudonymize_field(rule, dotted_field, regex, field_value) - _ = add_field_to(event, dotted_field, field_value, overwrite_output_field=True) + _ = add_field_to(event, dotted_field, field_value, overwrite_target_field=True) if "@timestamp" in event: for pseudonym, _ in self.result.data: pseudonym["@timestamp"] = event["@timestamp"] diff --git a/logprep/processor/template_replacer/processor.py b/logprep/processor/template_replacer/processor.py index 538d635c1..94fa18ac2 100644 --- a/logprep/processor/template_replacer/processor.py +++ b/logprep/processor/template_replacer/processor.py @@ -38,7 +38,6 @@ from attr import define, field, validators -from logprep.processor.base.exceptions import FieldExistsWarning from logprep.processor.field_manager.processor import FieldManager from logprep.processor.template_replacer.rule import TemplateReplacerRule from logprep.util.getter import GetterFactory @@ -115,7 +114,7 @@ def _perform_replacement(self, event: dict, replacement: str, rule: TemplateRepl Therefore, they wouldn't be replaced, and we can overwrite the existing target field. """ overwrite = get_dotted_field_value(event, self._target_field) is not None - add_field_to(event, self._target_field, replacement, overwrite_output_field=overwrite) + add_field_to(event, self._target_field, replacement, overwrite_target_field=overwrite) def setup(self): super().setup() diff --git a/logprep/util/helper.py b/logprep/util/helper.py index 2db41d418..8158ba98f 100644 --- a/logprep/util/helper.py +++ b/logprep/util/helper.py @@ -64,10 +64,10 @@ def add_field_to( target_field, content, extends_lists=False, - overwrite_output_field=False, + overwrite_target_field=False, ) -> None: """ - Add content to the output_field in the given event. Output_field can be a dotted subfield. + Add content to the target_field in the given event. target_field can be a dotted subfield. In case of missing fields, all intermediate fields will be created. Parameters ---------- @@ -76,25 +76,25 @@ def add_field_to( target_field: str Dotted subfield string indicating the target of the output value, e.g. destination.ip content: str, float, int, list, dict - Value that should be written into the output_field, can be a str, list, or dict object + Value that should be written into the target_field extends_lists: bool - Flag that determines whether output_field lists should be extended - overwrite_output_field: bool - Flag that determines whether the output_field should be overwritten + Flag that determines whether target_field lists should be extended + overwrite_target_field: bool + Flag that determines whether the target_field should be overwritten Raises ------ ValueError - If both extends_lists and overwrite_output_field are set to True. + If both extends_lists and overwrite_target_field are set to True. FieldExistsWarning - If the output field already exists and overwrite_output_field is False, or if extends_lists is True but + If the target_field already exists and overwrite_target_field is False, or if extends_lists is True but the existing field is not a list. """ - if extends_lists and overwrite_output_field: + if extends_lists and overwrite_target_field: raise ValueError("An output field can't be overwritten and extended at the same time") field_path = [event, *get_dotted_field_list(target_field)] target_key = field_path.pop() - if overwrite_output_field: + if overwrite_target_field: target_parent = reduce(_add_and_overwrite_key, field_path) target_parent[target_key] = content return @@ -138,7 +138,7 @@ def _add_field_to_silent_fail(*args, **kwargs) -> None | str: def add_batch_to( - event, targets, contents, extends_lists=False, overwrite_output_field=False + event, targets, contents, extends_lists=False, overwrite_target_field=False ) -> None: """ Handles the batch addition operation while raising a FieldExistsWarning with all unsuccessful targets. @@ -152,7 +152,7 @@ def add_batch_to( A list of contents corresponding to each target field. extends_lists: bool A boolean indicating whether to extend lists if the target field already exists. - overwrite_output_field: bool + overwrite_target_field: bool A boolean indicating whether to overwrite the target field if it already exists. Raises: @@ -165,7 +165,7 @@ def add_batch_to( targets, contents, itertools.repeat(extends_lists, len(targets)), - itertools.repeat(overwrite_output_field, len(targets)), + itertools.repeat(overwrite_target_field, len(targets)), ) unsuccessful_targets = [item for item in unsuccessful_targets if item is not None] if unsuccessful_targets: @@ -343,7 +343,7 @@ def snake_to_camel(snake: str) -> str: def add_and_overwrite(event, target_field, content, *_): """wrapper for add_field_to""" - add_field_to(event, target_field, content, overwrite_output_field=True) + add_field_to(event, target_field, content, overwrite_target_field=True) def append(event, target_field, content, separator): diff --git a/tests/unit/util/test_helper_add_field.py b/tests/unit/util/test_helper_add_field.py index d622fdd37..b8dd67aec 100644 --- a/tests/unit/util/test_helper_add_field.py +++ b/tests/unit/util/test_helper_add_field.py @@ -72,13 +72,13 @@ def test_provoke_dict_duplicate_in_dotted_subfield(self): def test_add_field_to_overwrites_output_field_in_root_level(self): document = {"some": "field", "output_field": "has already content"} - add_field_to(document, "output_field", {"dict": "content"}, overwrite_output_field=True) + add_field_to(document, "output_field", {"dict": "content"}, overwrite_target_field=True) assert document.get("output_field") == {"dict": "content"} def test_add_field_to_overwrites_output_field_in_nested_level(self): document = {"some": "field", "nested": {"output": {"field": "has already content"}}} add_field_to( - document, "nested.output.field", {"dict": "content"}, overwrite_output_field=True + document, "nested.output.field", {"dict": "content"}, overwrite_target_field=True ) assert document.get("nested", {}).get("output", {}).get("field") == {"dict": "content"} @@ -100,7 +100,7 @@ def test_add_field_to_raises_if_list_should_be_extended_and_overwritten_at_the_s "some_list", ["first", "second"], extends_lists=True, - overwrite_output_field=True, + overwrite_target_field=True, ) def test_returns_false_if_dotted_field_value_key_exists(self):