Skip to content

Commit d825a1c

Browse files
authored
remove process_strategy and mermaid (#451)
* removed ProcessStrategy as it is implementing additional complexity without multiple Strategies * remove mermaid from documentation because it does noct work in airgapped environments
1 parent 02448cd commit d825a1c

File tree

10 files changed

+79
-340
lines changed

10 files changed

+79
-340
lines changed

doc/requirements.txt

-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
sphinx
22
sphinx_rtd_theme
3-
sphinxcontrib-mermaid
43
sphinxcontrib.datatemplates
54
sphinx-copybutton
65
nbsphinx

doc/source/conf.py

-1
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,6 @@ def setup(app):
5656
# ones.
5757
extensions = [
5858
"sphinx.ext.napoleon",
59-
"sphinxcontrib.mermaid",
6059
"sphinx.ext.autosummary",
6160
"sphinxcontrib.datatemplates",
6261
"nbsphinx",

doc/source/development/index.rst

-116
Original file line numberDiff line numberDiff line change
@@ -2,122 +2,6 @@
22
Development
33
===========
44

5-
.. mermaid::
6-
7-
classDiagram
8-
Component <-- Processor
9-
Component <-- Connector
10-
Connector <-- Input : implements
11-
Connector <-- Output : implements
12-
Processor <-- Normalizer : implements
13-
Processor <-- Pseudonymizer : implements
14-
Input <-- ConfluentKafkaInput : implements
15-
Output <-- ConfluentKafkaOutput : implements
16-
ProcessorConfiguration
17-
Rule <-- NormalizerRule : inherit
18-
Rule <-- PseudonymizerRule : inherit
19-
BaseProcessorTestCase <-- NormalizerTestCase : implements
20-
BaseProcessorTestCase <-- PseudonymizerTestCase : implements
21-
class Component{
22-
+Config
23-
+str name
24-
+Logger _logger
25-
+Config _config
26-
+String describe()
27-
+None setup()
28-
+None shut_down()
29-
30-
}
31-
class Processor{
32-
<<interface>>
33-
+rule_class
34-
+Config
35-
+load_rules()
36-
+process()
37-
+apply_rules()*
38-
}
39-
class Normalizer{
40-
+Config
41-
+rule_class = NormalizerRule
42-
+_config: Normalizer.Config
43-
+apply_rules()
44-
}
45-
46-
class Pseudonymizer{
47-
+Config
48-
+rule_class = PseudonymizerRule
49-
+_config: Pseudonymizer.Config
50-
+apply_rules()
51-
}
52-
class Connector{
53-
<<interface>>
54-
+Config
55-
}
56-
class Input{
57-
<<interface>>
58-
+Config
59-
+_config: Input.Config
60-
-Dict _get_event()*
61-
-None _get_raw_event()
62-
+tuple[dict, error|None] get_next()
63-
}
64-
class Output{
65-
<<interface>>
66-
+Config
67-
+_config: Output.Config
68-
+None store()*
69-
+None store_custom()*
70-
+None store_failed()*
71-
}
72-
class ConfluentKafkaInput{
73-
+Config
74-
+_config: ConfluentKafkaInput.Config
75-
+tuple _get_event()
76-
+bytearray _get_raw_event()
77-
}
78-
class ConfluentKafkaOutput{
79-
+Config
80-
+_config: ConfluentKafkaInput.Config
81-
+None store()
82-
+None store_custom()
83-
+None store_failed()
84-
}
85-
86-
class Configuration{
87-
<<adapter>>
88-
+create
89-
}
90-
class Registry{
91-
+mapping : dict
92-
}
93-
94-
class Factory{
95-
+create()
96-
}
97-
98-
99-
class TestFactory{
100-
+test_check()
101-
+test_create_normalizer()
102-
+test_create_pseudonymizer()
103-
}
104-
105-
class BaseProcessorTestCase{
106-
+test_describe()
107-
+test_load_rules()
108-
+test_process()
109-
+test_apply_rules()*
110-
}
111-
112-
class NormalizerTestCase{
113-
+test_apply_rules()
114-
}
115-
116-
class PseudonymizerTestCase{
117-
+test_apply_rules()
118-
}
119-
120-
1215
.. toctree::
1226
:maxdepth: 2
1237

doc/source/user_manual/introduction.rst

-21
Original file line numberDiff line numberDiff line change
@@ -36,27 +36,6 @@ Multiple instances of pipelines are created and run in parallel by different pro
3636
Only one event at a time is processed by each processor.
3737
Therefore, results of a processor should not depend on other events.
3838

39-
.. mermaid::
40-
41-
flowchart LR
42-
A[Input\nConnector] --> B
43-
A[Input\nConnector] --> C
44-
A[Input\nConnector] --> D
45-
subgraph Pipeline 1
46-
B[Normalizer] --> E[Geo-IP Enricher]
47-
E --> F[Dropper]
48-
end
49-
subgraph Pipeline 2
50-
C[Normalizer] --> G[Geo-IP Enricher]
51-
G --> H[Dropper]
52-
end
53-
subgraph Pipeline n
54-
D[Normalizer] --> I[Geo-IP Enricher]
55-
I --> J[Dropper]
56-
end
57-
F --> K[Output\nConnector]
58-
H --> K[Output\nConnector]
59-
J --> K[Output\nConnector]
6039

6140
Processors
6241
==========

logprep/abc/processor.py

+27-11
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
"""Abstract module for processors"""
22
import copy
3+
import time
34
from abc import abstractmethod
5+
from functools import reduce
46
from logging import DEBUG, Logger
57
from multiprocessing import current_process
68
from pathlib import Path
@@ -16,7 +18,6 @@
1618
ProcessingCriticalError,
1719
ProcessingWarning,
1820
)
19-
from logprep.processor.processor_strategy import SpecificGenericProcessStrategy
2021
from logprep.util import getter
2122
from logprep.util.helper import (
2223
add_and_overwrite,
@@ -122,7 +123,6 @@ def update_mean_processing_time_per_event(self, new_sample):
122123

123124
def __init__(self, name: str, configuration: "Processor.Config", logger: Logger):
124125
super().__init__(name, configuration, logger)
125-
self._strategy = SpecificGenericProcessStrategy(self._config.apply_multiple_times)
126126
self.metric_labels, specific_tree_labels, generic_tree_labels = self._create_metric_labels()
127127
self._specific_tree = RuleTree(
128128
config_path=self._config.tree_config, metric_labels=specific_tree_labels
@@ -192,15 +192,31 @@ def process(self, event: dict):
192192
A dictionary representing a log event.
193193
194194
"""
195-
if self._logger.isEnabledFor(DEBUG): # pragma: no cover
196-
self._logger.debug(f"{self.describe()} processing event {event}")
197-
self._strategy.process(
198-
event,
199-
generic_tree=self._generic_tree,
200-
specific_tree=self._specific_tree,
201-
callback=self._apply_rules_wrapper,
202-
processor_metrics=self.metrics,
203-
)
195+
self._logger.debug(f"{self.describe()} processing event {event}")
196+
self.metrics.number_of_processed_events += 1
197+
self._process_rule_tree(event, self._specific_tree)
198+
self._process_rule_tree(event, self._generic_tree)
199+
200+
def _process_rule_tree(self, event: dict, tree: "RuleTree"):
201+
applied_rules = set()
202+
203+
def _process_rule(event, rule):
204+
begin = time.time()
205+
self._apply_rules_wrapper(event, rule)
206+
processing_time = time.time() - begin
207+
rule.metrics._number_of_matches += 1
208+
rule.metrics.update_mean_processing_time(processing_time)
209+
self.metrics.update_mean_processing_time_per_event(processing_time)
210+
applied_rules.add(rule)
211+
return event
212+
213+
if self._config.apply_multiple_times:
214+
matching_rules = tree.get_matching_rules(event)
215+
while matching_rules:
216+
reduce(_process_rule, (event, *matching_rules))
217+
matching_rules = set(tree.get_matching_rules(event)).difference(applied_rules)
218+
else:
219+
reduce(_process_rule, (event, *tree.get_matching_rules(event)))
204220

205221
def _apply_rules_wrapper(self, event, rule):
206222
try:

logprep/processor/processor_strategy.py

-91
This file was deleted.

logprep/run_logprep.py

+7-16
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
import sys
77
import warnings
88
from argparse import ArgumentParser
9-
from logging import ERROR, Logger, getLogger
109
from os.path import basename
1110
from pathlib import Path
1211

@@ -24,21 +23,13 @@
2423
from logprep.util.schema_and_rule_checker import SchemaAndRuleChecker
2524
from logprep.util.time_measurement import TimeMeasurement
2625

27-
from logging import (
28-
getLogger,
29-
basicConfig,
30-
Logger,
31-
)
32-
from logging.handlers import SysLogHandler
33-
34-
3526
warnings.simplefilter("always", DeprecationWarning)
3627
logging.captureWarnings(True)
3728

3829
DEFAULT_LOCATION_CONFIG = "file:///etc/logprep/pipeline.yml"
39-
getLogger("filelock").setLevel(ERROR)
40-
getLogger("urllib3.connectionpool").setLevel(ERROR)
41-
getLogger("elasticsearch").setLevel(ERROR)
30+
logging.getLogger("filelock").setLevel(logging.ERROR)
31+
logging.getLogger("urllib3.connectionpool").setLevel(logging.ERROR)
32+
logging.getLogger("elasticsearch").setLevel(logging.ERROR)
4233

4334

4435
def _parse_arguments():
@@ -98,7 +89,7 @@ def _parse_arguments():
9889
return arguments
9990

10091

101-
def _run_logprep(arguments, logger: Logger):
92+
def _run_logprep(arguments, logger: logging.Logger):
10293
runner = None
10394
try:
10495
runner = Runner.get_runner()
@@ -148,15 +139,15 @@ def _setup_logger(args, config: Configuration):
148139
try:
149140
log_config = config.get("logger", {})
150141
log_level = log_config.get("level", "INFO")
151-
basicConfig(
142+
logging.basicConfig(
152143
level=log_level, format="%(asctime)-15s %(name)-5s %(levelname)-8s: %(message)s"
153144
)
154145
logger = logging.getLogger("Logprep")
155146
logger.info(f"Log level set to '{log_level}'")
156147
for version in get_versions_string(args).split("\n"):
157148
logger.info(version)
158149
except BaseException as error: # pylint: disable=broad-except
159-
getLogger("Logprep").exception(error)
150+
logging.getLogger("Logprep").exception(error)
160151
sys.exit(1)
161152
return logger
162153

@@ -187,7 +178,7 @@ def _setup_metrics_and_time_measurement(args, config, logger):
187178
logger.debug(f"Config path: {args.config}")
188179

189180

190-
def _validate_rules(args, config: Configuration, logger: Logger):
181+
def _validate_rules(args, config: Configuration, logger: logging.Logger):
191182
try:
192183
config.verify_pipeline_only(logger)
193184
except InvalidConfigurationError as error:

0 commit comments

Comments
 (0)