Skip to content

Commit

Permalink
Refactor overwrite argument for field addition functions
Browse files Browse the repository at this point in the history
- Renames `overwrite_output_field` to `overwrite_target_field` in multiple files.
- Ensures consistency in method signatures
  • Loading branch information
dtrai2 committed Nov 12, 2024
1 parent 5a0c5a2 commit 032a95c
Show file tree
Hide file tree
Showing 14 changed files with 37 additions and 47 deletions.
3 changes: 1 addition & 2 deletions logprep/abc/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from logprep.framework.rule_tree.rule_tree import RuleTree, RuleTreeType
from logprep.metrics.metrics import Metric
from logprep.processor.base.exceptions import (
FieldExistsWarning,
ProcessingCriticalError,
ProcessingError,
ProcessingWarning,
Expand Down Expand Up @@ -386,7 +385,7 @@ def _write_target_field(self, event: dict, rule: "Rule", result: any) -> None:
target_field=rule.target_field,
content=result,
extends_lists=rule.extend_target_list,
overwrite_output_field=rule.overwrite_target,
overwrite_target_field=rule.overwrite_target,
)

def setup(self):
Expand Down
2 changes: 1 addition & 1 deletion logprep/processor/clusterer/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ def _cluster(self, event: dict, rule: ClustererRule):
self._config.output_field_name,
cluster_signature,
extends_lists=rule.extend_target_list,
overwrite_output_field=rule.overwrite_target,
overwrite_target_field=rule.overwrite_target,
)
self._last_non_extracted_signature = sig_text

Expand Down
2 changes: 1 addition & 1 deletion logprep/processor/dissector/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,6 @@ def _apply_convert_datatype(self, event, rule):
for target_field, converter in rule.convert_actions:
try:
target_value = converter(get_dotted_field_value(event, target_field))
add_field_to(event, target_field, target_value, overwrite_output_field=True)
add_field_to(event, target_field, target_value, overwrite_target_field=True)
except ValueError as error:
self._handle_warning_error(event, rule, error)
4 changes: 2 additions & 2 deletions logprep/processor/domain_label_extractor/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
from logprep.processor.domain_label_extractor.rule import DomainLabelExtractorRule
from logprep.processor.field_manager.processor import FieldManager
from logprep.util.getter import GetterFactory
from logprep.util.helper import add_and_overwrite, get_dotted_field_value, add_batch_to
from logprep.util.helper import add_and_overwrite, add_batch_to, get_dotted_field_value
from logprep.util.validators import list_of_urls_validator

logger = logging.getLogger("DomainLabelExtractor")
Expand Down Expand Up @@ -141,7 +141,7 @@ def _apply_rules(self, event, rule: DomainLabelExtractorRule):
f"{rule.target_field}.subdomain",
]
contents = [f"{labels.domain}.{labels.suffix}", labels.suffix, labels.subdomain]
add_batch_to(event, targets, contents, overwrite_output_field=rule.overwrite_target)
add_batch_to(event, targets, contents, overwrite_target_field=rule.overwrite_target)
else:
tagging_field.append(f"invalid_domain_in_{rule.source_fields[0].replace('.', '_')}")
add_and_overwrite(event, self._config.tagging_field_name, tagging_field)
Expand Down
2 changes: 1 addition & 1 deletion logprep/processor/domain_resolver/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,4 +225,4 @@ def _store_debug_infos(self, event, requires_storing):
"obtained_from_cache": not requires_storing,
"cache_size": len(self._domain_ip_map.keys()),
}
add_field_to(event, "resolved_ip_debug", event_dbg, overwrite_output_field=True)
add_field_to(event, "resolved_ip_debug", event_dbg, overwrite_target_field=True)
4 changes: 2 additions & 2 deletions logprep/processor/generic_resolver/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
from logprep.processor.base.exceptions import FieldExistsWarning
from logprep.processor.field_manager.processor import FieldManager
from logprep.processor.generic_resolver.rule import GenericResolverRule
from logprep.util.helper import get_dotted_field_value, add_field_to
from logprep.util.helper import add_field_to, get_dotted_field_value


class GenericResolver(FieldManager):
Expand Down Expand Up @@ -64,7 +64,7 @@ def _apply_rules(self, event, rule):
target_field,
content,
extends_lists=rule.extend_target_list,
overwrite_output_field=rule.overwrite_target,
overwrite_target_field=rule.overwrite_target,
)
except FieldExistsWarning as error:
conflicting_fields.extend(error.skipped_fields)
Expand Down
4 changes: 2 additions & 2 deletions logprep/processor/geoip_enricher/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
from logprep.processor.field_manager.processor import FieldManager
from logprep.processor.geoip_enricher.rule import GEOIP_DATA_STUBS, GeoipEnricherRule
from logprep.util.getter import GetterFactory
from logprep.util.helper import get_dotted_field_value, add_batch_to
from logprep.util.helper import add_batch_to, get_dotted_field_value

logger = logging.getLogger("GeoipEnricher")

Expand Down Expand Up @@ -139,5 +139,5 @@ def _apply_rules(self, event, rule):
targets,
contents,
extends_lists=False,
overwrite_output_field=rule.overwrite_target,
overwrite_target_field=rule.overwrite_target,
)
10 changes: 3 additions & 7 deletions logprep/processor/grokker/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,11 @@

from attrs import define, field, validators

from logprep.processor.base.exceptions import (
FieldExistsWarning,
ProcessingError,
ProcessingWarning,
)
from logprep.processor.base.exceptions import ProcessingError, ProcessingWarning
from logprep.processor.field_manager.processor import FieldManager
from logprep.processor.grokker.rule import GrokkerRule
from logprep.util.getter import GetterFactory
from logprep.util.helper import add_field_to, get_dotted_field_value, add_batch_to
from logprep.util.helper import add_batch_to, get_dotted_field_value

logger = logging.getLogger("Grokker")

Expand Down Expand Up @@ -96,7 +92,7 @@ def _apply_rules(self, event: dict, rule: GrokkerRule):
targets,
contents,
extends_lists=rule.extend_target_list,
overwrite_output_field=rule.overwrite_target,
overwrite_target_field=rule.overwrite_target,
)
if self._handle_missing_fields(event, rule, rule.actions.keys(), source_values):
return
Expand Down
7 changes: 3 additions & 4 deletions logprep/processor/hyperscan_resolver/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,11 @@

from logprep.processor.base.exceptions import (
FieldExistsWarning,
SkipImportError,
ProcessingCriticalError,
SkipImportError,
)
from logprep.processor.field_manager.processor import FieldManager
from logprep.util.helper import get_dotted_field_value, add_field_to
from logprep.util.helper import add_field_to, get_dotted_field_value
from logprep.util.validators import directory_validator

# pylint: disable=no-name-in-module
Expand All @@ -57,7 +57,6 @@
# pylint: disable=ungrouped-imports
from logprep.processor.hyperscan_resolver.rule import HyperscanResolverRule


# pylint: enable=ungrouped-imports


Expand Down Expand Up @@ -125,7 +124,7 @@ def _apply_rules(self, event: dict, rule: HyperscanResolverRule):
resolve_target,
dest_val,
extends_lists=rule.extend_target_list,
overwrite_output_field=rule.overwrite_target,
overwrite_target_field=rule.overwrite_target,
)
except FieldExistsWarning as error:
conflicting_fields.extend(error.skipped_fields)
Expand Down
7 changes: 2 additions & 5 deletions logprep/processor/labeler/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,7 @@
from logprep.abc.processor import Processor
from logprep.processor.labeler.labeling_schema import LabelingSchema
from logprep.processor.labeler.rule import LabelerRule
from logprep.util.helper import (
get_dotted_field_value,
add_batch_to,
)
from logprep.util.helper import add_batch_to, get_dotted_field_value


class Labeler(Processor):
Expand Down Expand Up @@ -81,4 +78,4 @@ def _apply_rules(self, event, rule):
add_batch_to(event, targets, contents, extends_lists=True)
# convert sets into sorted lists
contents = [sorted(set(get_dotted_field_value(event, target))) for target in targets]
add_batch_to(event, targets, contents, overwrite_output_field=True)
add_batch_to(event, targets, contents, overwrite_target_field=True)
2 changes: 1 addition & 1 deletion logprep/processor/pseudonymizer/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ def _apply_rules(self, event: dict, rule: PseudonymizerRule):
]
else:
field_value = self._pseudonymize_field(rule, dotted_field, regex, field_value)
_ = add_field_to(event, dotted_field, field_value, overwrite_output_field=True)
_ = add_field_to(event, dotted_field, field_value, overwrite_target_field=True)
if "@timestamp" in event:
for pseudonym, _ in self.result.data:
pseudonym["@timestamp"] = event["@timestamp"]
Expand Down
3 changes: 1 addition & 2 deletions logprep/processor/template_replacer/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@

from attr import define, field, validators

from logprep.processor.base.exceptions import FieldExistsWarning
from logprep.processor.field_manager.processor import FieldManager
from logprep.processor.template_replacer.rule import TemplateReplacerRule
from logprep.util.getter import GetterFactory
Expand Down Expand Up @@ -115,7 +114,7 @@ def _perform_replacement(self, event: dict, replacement: str, rule: TemplateRepl
Therefore, they wouldn't be replaced, and we can overwrite the existing target field.
"""
overwrite = get_dotted_field_value(event, self._target_field) is not None
add_field_to(event, self._target_field, replacement, overwrite_output_field=overwrite)
add_field_to(event, self._target_field, replacement, overwrite_target_field=overwrite)

def setup(self):
super().setup()
Expand Down
28 changes: 14 additions & 14 deletions logprep/util/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,10 @@ def add_field_to(
target_field,
content,
extends_lists=False,
overwrite_output_field=False,
overwrite_target_field=False,
) -> None:
"""
Add content to the output_field in the given event. Output_field can be a dotted subfield.
Add content to the target_field in the given event. target_field can be a dotted subfield.
In case of missing fields, all intermediate fields will be created.
Parameters
----------
Expand All @@ -76,25 +76,25 @@ def add_field_to(
target_field: str
Dotted subfield string indicating the target of the output value, e.g. destination.ip
content: str, float, int, list, dict
Value that should be written into the output_field, can be a str, list, or dict object
Value that should be written into the target_field
extends_lists: bool
Flag that determines whether output_field lists should be extended
overwrite_output_field: bool
Flag that determines whether the output_field should be overwritten
Flag that determines whether target_field lists should be extended
overwrite_target_field: bool
Flag that determines whether the target_field should be overwritten
Raises
------
ValueError
If both extends_lists and overwrite_output_field are set to True.
If both extends_lists and overwrite_target_field are set to True.
FieldExistsWarning
If the output field already exists and overwrite_output_field is False, or if extends_lists is True but
If the target_field already exists and overwrite_target_field is False, or if extends_lists is True but
the existing field is not a list.
"""
if extends_lists and overwrite_output_field:
if extends_lists and overwrite_target_field:
raise ValueError("An output field can't be overwritten and extended at the same time")
field_path = [event, *get_dotted_field_list(target_field)]
target_key = field_path.pop()

if overwrite_output_field:
if overwrite_target_field:
target_parent = reduce(_add_and_overwrite_key, field_path)
target_parent[target_key] = content
return
Expand Down Expand Up @@ -138,7 +138,7 @@ def _add_field_to_silent_fail(*args, **kwargs) -> None | str:


def add_batch_to(
event, targets, contents, extends_lists=False, overwrite_output_field=False
event, targets, contents, extends_lists=False, overwrite_target_field=False
) -> None:
"""
Handles the batch addition operation while raising a FieldExistsWarning with all unsuccessful targets.
Expand All @@ -152,7 +152,7 @@ def add_batch_to(
A list of contents corresponding to each target field.
extends_lists: bool
A boolean indicating whether to extend lists if the target field already exists.
overwrite_output_field: bool
overwrite_target_field: bool
A boolean indicating whether to overwrite the target field if it already exists.
Raises:
Expand All @@ -165,7 +165,7 @@ def add_batch_to(
targets,
contents,
itertools.repeat(extends_lists, len(targets)),
itertools.repeat(overwrite_output_field, len(targets)),
itertools.repeat(overwrite_target_field, len(targets)),
)
unsuccessful_targets = [item for item in unsuccessful_targets if item is not None]
if unsuccessful_targets:
Expand Down Expand Up @@ -343,7 +343,7 @@ def snake_to_camel(snake: str) -> str:

def add_and_overwrite(event, target_field, content, *_):
"""wrapper for add_field_to"""
add_field_to(event, target_field, content, overwrite_output_field=True)
add_field_to(event, target_field, content, overwrite_target_field=True)


def append(event, target_field, content, separator):
Expand Down
6 changes: 3 additions & 3 deletions tests/unit/util/test_helper_add_field.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,13 +72,13 @@ def test_provoke_dict_duplicate_in_dotted_subfield(self):

def test_add_field_to_overwrites_output_field_in_root_level(self):
document = {"some": "field", "output_field": "has already content"}
add_field_to(document, "output_field", {"dict": "content"}, overwrite_output_field=True)
add_field_to(document, "output_field", {"dict": "content"}, overwrite_target_field=True)
assert document.get("output_field") == {"dict": "content"}

def test_add_field_to_overwrites_output_field_in_nested_level(self):
document = {"some": "field", "nested": {"output": {"field": "has already content"}}}
add_field_to(
document, "nested.output.field", {"dict": "content"}, overwrite_output_field=True
document, "nested.output.field", {"dict": "content"}, overwrite_target_field=True
)
assert document.get("nested", {}).get("output", {}).get("field") == {"dict": "content"}

Expand All @@ -100,7 +100,7 @@ def test_add_field_to_raises_if_list_should_be_extended_and_overwritten_at_the_s
"some_list",
["first", "second"],
extends_lists=True,
overwrite_output_field=True,
overwrite_target_field=True,
)

def test_returns_false_if_dotted_field_value_key_exists(self):
Expand Down

0 comments on commit 032a95c

Please sign in to comment.