Skip to content

Commit 205045e

Browse files
authored
update add_field_to function for improved error handling (#696)
1 parent 5c45241 commit 205045e

File tree

45 files changed

+563
-539
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+563
-539
lines changed

CHANGELOG.md

+9
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,20 @@
22

33
## next release
44
### Breaking
5+
6+
* `CriticalInputError` is raised when the input preprocessor values can't be set, this was so far only true
7+
for the hmac preprocessor, but is now also applied for all other preprocessors.
8+
* fix `delimiter` typo in `StringSplitterRule` configuration
9+
510
### Features
611
### Improvements
712

813
* replace `BaseException` with `Exception` for custom errors
914
* refactor `generic_resolver` to validate rules on startup instead of application of each rule
15+
* rewrite the helper method `add_field_to` such that it always raises an `FieldExistsWarning` instead of return a bool.
16+
* add new helper method `add_fields_to` to directly add multiple fields to one event
17+
* refactored some processors to make use of the new helper methods
18+
1019

1120
### Bugfix
1221

logprep/abc/input.py

+37-34
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@
1717
from logprep.abc.connector import Connector
1818
from logprep.abc.exceptions import LogprepException
1919
from logprep.metrics.metrics import Metric
20-
from logprep.util.helper import add_field_to, get_dotted_field_value
20+
from logprep.processor.base.exceptions import FieldExistsWarning
21+
from logprep.util.helper import add_fields_to, get_dotted_field_value
2122
from logprep.util.time import UTC, TimeParser
2223
from logprep.util.validators import dict_structure_validator
2324

@@ -280,16 +281,19 @@ def get_next(self, timeout: float) -> dict | None:
280281
self.metrics.number_of_processed_events += 1
281282
if not isinstance(event, dict):
282283
raise CriticalInputError(self, "not a dict", event)
283-
if self._add_hmac:
284-
event = self._add_hmac_to(event, raw_event)
285-
if self._add_version_info:
286-
self._add_version_information_to_event(event)
287-
if self._add_log_arrival_time_information:
288-
self._add_arrival_time_information_to_event(event)
289-
if self._add_log_arrival_timedelta_information:
290-
self._add_arrival_timedelta_information_to_event(event)
291-
if self._add_env_enrichment:
292-
self._add_env_enrichment_to_event(event)
284+
try:
285+
if self._add_hmac:
286+
event = self._add_hmac_to(event, raw_event)
287+
if self._add_version_info:
288+
self._add_version_information_to_event(event)
289+
if self._add_log_arrival_time_information:
290+
self._add_arrival_time_information_to_event(event)
291+
if self._add_log_arrival_timedelta_information:
292+
self._add_arrival_timedelta_information_to_event(event)
293+
if self._add_env_enrichment:
294+
self._add_env_enrichment_to_event(event)
295+
except FieldExistsWarning as error:
296+
raise CriticalInputError(self, error.args[0], event) from error
293297
return event
294298

295299
def batch_finished_callback(self):
@@ -300,13 +304,19 @@ def _add_env_enrichment_to_event(self, event: dict):
300304
enrichments = self._config.preprocessing.get("enrich_by_env_variables")
301305
if not enrichments:
302306
return
303-
for target_field, variable_name in enrichments.items():
304-
add_field_to(event, target_field, os.environ.get(variable_name, ""))
307+
fields = {
308+
target: os.environ.get(variable_name, "")
309+
for target, variable_name in enrichments.items()
310+
}
311+
add_fields_to(event, fields)
305312

306313
def _add_arrival_time_information_to_event(self, event: dict):
307-
now = TimeParser.now()
308-
target_field = self._config.preprocessing.get("log_arrival_time_target_field")
309-
add_field_to(event, target_field, now.isoformat())
314+
new_field = {
315+
self._config.preprocessing.get(
316+
"log_arrival_time_target_field"
317+
): TimeParser.now().isoformat()
318+
}
319+
add_fields_to(event, new_field)
310320

311321
def _add_arrival_timedelta_information_to_event(self, event: dict):
312322
log_arrival_timedelta_config = self._config.preprocessing.get("log_arrival_timedelta")
@@ -322,16 +332,16 @@ def _add_arrival_timedelta_information_to_event(self, event: dict):
322332
TimeParser.from_string(log_arrival_time).astimezone(UTC)
323333
- TimeParser.from_string(time_reference).astimezone(UTC)
324334
).total_seconds()
325-
add_field_to(event, target_field, delta_time_sec)
335+
add_fields_to(event, fields={target_field: delta_time_sec})
326336

327337
def _add_version_information_to_event(self, event: dict):
328338
"""Add the version information to the event"""
329339
target_field = self._config.preprocessing.get("version_info_target_field")
330340
# pylint: disable=protected-access
331-
add_field_to(event, target_field, self._config._version_information)
341+
add_fields_to(event, fields={target_field: self._config._version_information})
332342
# pylint: enable=protected-access
333343

334-
def _add_hmac_to(self, event_dict, raw_event) -> Tuple[dict, str]:
344+
def _add_hmac_to(self, event_dict, raw_event) -> dict:
335345
"""
336346
Calculates an HMAC (Hash-based message authentication code) based on a given target field
337347
and adds it to the given event. If the target field has the value '<RAW_MSG>' the full raw
@@ -357,7 +367,7 @@ def _add_hmac_to(self, event_dict, raw_event) -> Tuple[dict, str]:
357367
------
358368
CriticalInputError
359369
If the hmac could not be added to the event because the desired output field already
360-
exists or cant't be found.
370+
exists or can't be found.
361371
"""
362372
hmac_options = self._config.preprocessing.get("hmac", {})
363373
hmac_target_field_name = hmac_options.get("target")
@@ -381,18 +391,11 @@ def _add_hmac_to(self, event_dict, raw_event) -> Tuple[dict, str]:
381391
digestmod=hashlib.sha256,
382392
).hexdigest()
383393
compressed = zlib.compress(received_orig_message, level=-1)
384-
hmac_output = {"hmac": hmac, "compressed_base64": base64.b64encode(compressed).decode()}
385-
add_was_successful = add_field_to(
386-
event_dict,
387-
hmac_options.get("output_field"),
388-
hmac_output,
389-
)
390-
if not add_was_successful:
391-
raise CriticalInputError(
392-
self,
393-
f"Couldn't add the hmac to the input event as the desired "
394-
f"output field '{hmac_options.get('output_field')}' already "
395-
f"exist.",
396-
event_dict,
397-
)
394+
new_field = {
395+
hmac_options.get("output_field"): {
396+
"hmac": hmac,
397+
"compressed_base64": base64.b64encode(compressed).decode(),
398+
}
399+
}
400+
add_fields_to(event_dict, new_field)
398401
return event_dict

logprep/abc/processor.py

+9-11
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,14 @@
1212
from logprep.framework.rule_tree.rule_tree import RuleTree, RuleTreeType
1313
from logprep.metrics.metrics import Metric
1414
from logprep.processor.base.exceptions import (
15-
FieldExistsWarning,
1615
ProcessingCriticalError,
1716
ProcessingError,
1817
ProcessingWarning,
1918
)
2019
from logprep.util import getter
2120
from logprep.util.helper import (
2221
add_and_overwrite,
23-
add_field_to,
22+
add_fields_to,
2423
get_dotted_field_value,
2524
pop_dotted_field_value,
2625
)
@@ -357,13 +356,15 @@ def _handle_warning_error(self, event, rule, error, failure_tags=None):
357356
if failure_tags is None:
358357
failure_tags = rule.failure_tags
359358
if tags is None:
360-
add_and_overwrite(event, "tags", sorted(list({*failure_tags})))
359+
new_field = {"tags": sorted(list({*failure_tags}))}
361360
else:
362-
add_and_overwrite(event, "tags", sorted(list({*tags, *failure_tags})))
361+
new_field = {"tags": sorted(list({*tags, *failure_tags}))}
362+
add_and_overwrite(event, new_field, rule)
363363
if isinstance(error, ProcessingWarning):
364364
if error.tags:
365365
tags = tags if tags else []
366-
add_and_overwrite(event, "tags", sorted(list({*error.tags, *tags, *failure_tags})))
366+
new_field = {"tags": sorted(list({*error.tags, *tags, *failure_tags}))}
367+
add_and_overwrite(event, new_field, rule)
367368
self.result.warnings.append(error)
368369
else:
369370
self.result.warnings.append(ProcessingWarning(str(error), rule, event))
@@ -381,15 +382,12 @@ def _has_missing_values(self, event, rule, source_field_dict):
381382
return False
382383

383384
def _write_target_field(self, event: dict, rule: "Rule", result: any) -> None:
384-
add_successful = add_field_to(
385+
add_fields_to(
385386
event,
386-
output_field=rule.target_field,
387-
content=result,
387+
fields={rule.target_field: result},
388388
extends_lists=rule.extend_target_list,
389-
overwrite_output_field=rule.overwrite_target,
389+
overwrite_target_field=rule.overwrite_target,
390390
)
391-
if not add_successful:
392-
raise FieldExistsWarning(rule, event, [rule.target_field])
393391

394392
def setup(self):
395393
super().setup()

logprep/metrics/metrics.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@
124124
from attrs import define, field, validators
125125
from prometheus_client import CollectorRegistry, Counter, Gauge, Histogram
126126

127-
from logprep.util.helper import add_field_to
127+
from logprep.util.helper import add_fields_to
128128

129129

130130
@define(kw_only=True, slots=False)
@@ -222,12 +222,14 @@ def inner(self, *args, **kwargs): # nosemgrep
222222
if hasattr(self, "rule_type"):
223223
event = args[0]
224224
if event:
225-
add_field_to(event, f"processing_times.{self.rule_type}", duration)
225+
add_fields_to(
226+
event, fields={f"processing_times.{self.rule_type}": duration}
227+
)
226228
if hasattr(self, "_logprep_config"): # attribute of the Pipeline class
227229
event = args[0]
228230
if event:
229-
add_field_to(event, "processing_times.pipeline", duration)
230-
add_field_to(event, "processing_times.hostname", gethostname())
231+
add_fields_to(event, fields={"processing_times.pipeline": duration})
232+
add_fields_to(event, fields={"processing_times.hostname": gethostname()})
231233
return result
232234

233235
return inner

logprep/processor/base/exceptions.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -70,17 +70,20 @@ def __init__(self, message: str, rule: "Rule"):
7070
class ProcessingWarning(Warning):
7171
"""A warning occurred - log the warning, but continue processing the event."""
7272

73-
def __init__(self, message: str, rule: "Rule", event: dict, tags: List[str] = None):
73+
def __init__(self, message: str, rule: "Rule | None", event: dict, tags: List[str] = None):
7474
self.tags = tags if tags else []
75-
rule.metrics.number_of_warnings += 1
76-
message = f"{message}, {rule.id=}, {rule.description=}, {event=}"
75+
if rule:
76+
rule.metrics.number_of_warnings += 1
77+
message += f", {rule.id=}, {rule.description=}"
78+
message += f", {event=}"
7779
super().__init__(f"{self.__class__.__name__}: {message}")
7880

7981

8082
class FieldExistsWarning(ProcessingWarning):
8183
"""Raised if field already exists."""
8284

83-
def __init__(self, rule: "Rule", event: dict, skipped_fields: List[str]):
85+
def __init__(self, rule: "Rule | None", event: dict, skipped_fields: List[str]):
86+
self.skipped_fields = skipped_fields
8487
message = (
8588
"The following fields could not be written, because "
8689
"one or more subfields existed and could not be extended: "

logprep/processor/clusterer/processor.py

+4-5
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@
5353
SignaturePhaseStreaming,
5454
)
5555
from logprep.processor.field_manager.processor import FieldManager
56-
from logprep.util.helper import add_field_to, get_dotted_field_value
56+
from logprep.util.helper import add_fields_to, get_dotted_field_value
5757

5858

5959
class Clusterer(FieldManager):
@@ -138,12 +138,11 @@ def _cluster(self, event: dict, rule: ClustererRule):
138138
)
139139
else:
140140
cluster_signature = cluster_signature_based_on_message
141-
add_field_to(
141+
add_fields_to(
142142
event,
143-
self._config.output_field_name,
144-
cluster_signature,
143+
fields={self._config.output_field_name: cluster_signature},
145144
extends_lists=rule.extend_target_list,
146-
overwrite_output_field=rule.overwrite_target,
145+
overwrite_target_field=rule.overwrite_target,
147146
)
148147
self._last_non_extracted_signature = sig_text
149148

logprep/processor/dissector/processor.py

+11-5
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,14 @@
2828
.. automodule:: logprep.processor.dissector.rule
2929
"""
3030

31-
from typing import Callable, List, Tuple
31+
from typing import TYPE_CHECKING, Callable, List, Tuple
3232

3333
from logprep.processor.dissector.rule import DissectorRule
3434
from logprep.processor.field_manager.processor import FieldManager
35-
from logprep.util.helper import add_field_to, get_dotted_field_value
35+
from logprep.util.helper import add_fields_to, get_dotted_field_value
36+
37+
if TYPE_CHECKING:
38+
from logprep.processor.base.rule import Rule
3639

3740

3841
class Dissector(FieldManager):
@@ -51,7 +54,7 @@ def _apply_mapping(self, event, rule):
5154
for action, *args, _ in action_mappings_sorted_by_position:
5255
action(*args)
5356

54-
def _get_mappings(self, event, rule) -> List[Tuple[Callable, dict, str, str, str, int]]:
57+
def _get_mappings(self, event, rule) -> List[Tuple[Callable, dict, dict, str, "Rule", int]]:
5558
current_field = None
5659
target_field_mapping = {}
5760
for rule_action in rule.actions:
@@ -84,12 +87,15 @@ def _get_mappings(self, event, rule) -> List[Tuple[Callable, dict, str, str, str
8487
target_field = target_field_mapping.get(target_field.lstrip("&"))
8588
if strip_char:
8689
content = content.strip(strip_char)
87-
yield rule_action, event, target_field, content, separator, position
90+
field = {target_field: content}
91+
yield rule_action, event, field, separator, rule, position
8892

8993
def _apply_convert_datatype(self, event, rule):
9094
for target_field, converter in rule.convert_actions:
9195
try:
9296
target_value = converter(get_dotted_field_value(event, target_field))
93-
add_field_to(event, target_field, target_value, overwrite_output_field=True)
97+
add_fields_to(
98+
event, {target_field: target_value}, rule, overwrite_target_field=True
99+
)
94100
except ValueError as error:
95101
self._handle_warning_error(event, rule, error)

logprep/processor/domain_label_extractor/processor.py

+12-16
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,10 @@
4646
from filelock import FileLock
4747
from tldextract import TLDExtract
4848

49-
from logprep.processor.base.exceptions import FieldExistsWarning
5049
from logprep.processor.domain_label_extractor.rule import DomainLabelExtractorRule
5150
from logprep.processor.field_manager.processor import FieldManager
5251
from logprep.util.getter import GetterFactory
53-
from logprep.util.helper import add_and_overwrite, add_field_to, get_dotted_field_value
52+
from logprep.util.helper import add_and_overwrite, add_fields_to, get_dotted_field_value
5453
from logprep.util.validators import list_of_urls_validator
5554

5655
logger = logging.getLogger("DomainLabelExtractor")
@@ -131,27 +130,24 @@ def _apply_rules(self, event, rule: DomainLabelExtractorRule):
131130

132131
if self._is_valid_ip(domain):
133132
tagging_field.append(f"ip_in_{rule.source_fields[0].replace('.', '_')}")
134-
add_and_overwrite(event, self._config.tagging_field_name, tagging_field)
133+
add_and_overwrite(
134+
event, fields={self._config.tagging_field_name: tagging_field}, rule=rule
135+
)
135136
return
136137

137138
labels = self._tld_extractor(domain)
138139
if labels.suffix != "":
139-
labels_dict = {
140-
"registered_domain": labels.domain + "." + labels.suffix,
141-
"top_level_domain": labels.suffix,
142-
"subdomain": labels.subdomain,
140+
fields = {
141+
f"{rule.target_field}.registered_domain": f"{labels.domain}.{labels.suffix}",
142+
f"{rule.target_field}.top_level_domain": labels.suffix,
143+
f"{rule.target_field}.subdomain": labels.subdomain,
143144
}
144-
for label, value in labels_dict.items():
145-
output_field = f"{rule.target_field}.{label}"
146-
add_successful = add_field_to(
147-
event, output_field, value, overwrite_output_field=rule.overwrite_target
148-
)
149-
150-
if not add_successful:
151-
raise FieldExistsWarning(rule, event, [output_field])
145+
add_fields_to(event, fields, rule, overwrite_target_field=rule.overwrite_target)
152146
else:
153147
tagging_field.append(f"invalid_domain_in_{rule.source_fields[0].replace('.', '_')}")
154-
add_and_overwrite(event, self._config.tagging_field_name, tagging_field)
148+
add_and_overwrite(
149+
event, fields={self._config.tagging_field_name: tagging_field}, rule=rule
150+
)
155151

156152
@staticmethod
157153
def _is_valid_ip(domain):

logprep/processor/domain_resolver/processor.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@
5353
from logprep.util.cache import Cache
5454
from logprep.util.getter import GetterFactory
5555
from logprep.util.hasher import SHA256Hasher
56-
from logprep.util.helper import add_field_to, get_dotted_field_value
56+
from logprep.util.helper import add_fields_to, get_dotted_field_value
5757
from logprep.util.validators import list_of_urls_validator
5858

5959
logger = logging.getLogger("DomainResolver")
@@ -222,7 +222,9 @@ def _resolve_ip(self, domain, hash_string=None):
222222

223223
def _store_debug_infos(self, event, requires_storing):
224224
event_dbg = {
225-
"obtained_from_cache": not requires_storing,
226-
"cache_size": len(self._domain_ip_map.keys()),
225+
"resolved_ip_debug": {
226+
"obtained_from_cache": not requires_storing,
227+
"cache_size": len(self._domain_ip_map.keys()),
228+
}
227229
}
228-
add_field_to(event, "resolved_ip_debug", event_dbg, overwrite_output_field=True)
230+
add_fields_to(event, event_dbg, overwrite_target_field=True)

0 commit comments

Comments
 (0)