Skip to content

Commit dce2b6c

Browse files
authored
fix pseudonymizer cache metrics (#703)
* fix pseudonymizer cache metrics
1 parent 7e95536 commit dce2b6c

File tree

3 files changed

+45
-10
lines changed

3 files changed

+45
-10
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
- fix `confluent_kafka.store_offsets` if `last_valid_record` is `None`, can happen if a rebalancing happens
1111
before the first message was pulled.
12+
- fix pseudonymizer cache metrics not updated
1213

1314
## 14.0.0
1415
### Breaking

logprep/processor/pseudonymizer/processor.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -344,9 +344,9 @@ def _wrap_hash(self, hash_string: str) -> str:
344344
def _update_cache_metrics(self):
345345
cache_info_pseudonyms = self._get_pseudonym_dict_cached.cache_info()
346346
cache_info_urls = self._pseudonymize_url_cached.cache_info()
347-
self.metrics.new_results = cache_info_pseudonyms.misses + cache_info_urls.misses
348-
self.metrics.cached_results = cache_info_pseudonyms.hits + cache_info_urls.hits
349-
self.metrics.num_cache_entries = cache_info_pseudonyms.currsize + cache_info_urls.currsize
350-
self.metrics.cache_load = (cache_info_pseudonyms.currsize + cache_info_urls.currsize) / (
347+
self.metrics.new_results += cache_info_pseudonyms.misses + cache_info_urls.misses
348+
self.metrics.cached_results += cache_info_pseudonyms.hits + cache_info_urls.hits
349+
self.metrics.num_cache_entries += cache_info_pseudonyms.currsize + cache_info_urls.currsize
350+
self.metrics.cache_load += (cache_info_pseudonyms.currsize + cache_info_urls.currsize) / (
351351
cache_info_pseudonyms.maxsize + cache_info_urls.maxsize
352352
)

tests/unit/processor/pseudonymizer/test_pseudonymizer.py

+40-6
Original file line numberDiff line numberDiff line change
@@ -821,9 +821,6 @@ def test_pseudonymize_string_adds_pseudonyms(self):
821821
assert len(self.object.result.data) == 1
822822

823823
def test_resolve_from_cache_pseudonym(self):
824-
self.object.metrics.new_results = 0
825-
self.object.metrics.cached_results = 0
826-
self.object.metrics.num_cache_entries = 0
827824
rule_dict = {
828825
"filter": "winlog.event_id: 1234 AND winlog.provider_name: Test456",
829826
"pseudonymizer": {
@@ -844,15 +841,15 @@ def test_resolve_from_cache_pseudonym(self):
844841
}
845842
}
846843
self._load_specific_rule(rule_dict)
844+
self.object.metrics.new_results = 0
845+
self.object.metrics.cached_results = 0
846+
self.object.metrics.num_cache_entries = 0
847847
self.object.process(event)
848848
assert self.object.metrics.new_results == 1
849849
assert self.object.metrics.cached_results == 1
850850
assert self.object.metrics.num_cache_entries == 1
851851

852852
def test_resolve_from_cache_pseudonymize_urls(self):
853-
self.object.metrics.new_results = 0
854-
self.object.metrics.cached_results = 0
855-
self.object.metrics.num_cache_entries = 0
856853
rule_dict = {
857854
"filter": "filter_this: does_not_matter",
858855
"pseudonymizer": {
@@ -869,6 +866,9 @@ def test_resolve_from_cache_pseudonymize_urls(self):
869866
"and_pseudo_this": "https://www.pseudo.this.de",
870867
}
871868
self._load_specific_rule(rule_dict)
869+
self.object.metrics.new_results = 0
870+
self.object.metrics.cached_results = 0
871+
self.object.metrics.num_cache_entries = 0
872872
self.object.process(event)
873873
# 1 subdomains -> pseudonym_cache, 1 url -> url_cache
874874
assert self.object.metrics.new_results == 2
@@ -1089,3 +1089,37 @@ def test_setup_raises_invalid_configuration_on_missing_regex_mapping(self):
10891089
)
10901090
with pytest.raises(InvalidConfigurationError, match=error_message):
10911091
self.object.setup()
1092+
1093+
def test_cache_metrics_updated(self):
1094+
rule_dict = {
1095+
"filter": "winlog.event_id: 1234 AND winlog.provider_name: Test456",
1096+
"pseudonymizer": {
1097+
"mapping": {
1098+
"winlog.event_data.param1": "RE_WHOLE_FIELD",
1099+
}
1100+
},
1101+
}
1102+
event = {
1103+
"@timestamp": "custom timestamp",
1104+
"winlog": {
1105+
"event_id": 1234,
1106+
"provider_name": "Test456",
1107+
"event_data": {
1108+
"param1": "Pseudonymize me - appears twice!",
1109+
},
1110+
},
1111+
}
1112+
self._load_specific_rule(rule_dict)
1113+
1114+
self.object.metrics.new_results = 0
1115+
self.object.metrics.cached_results = 0
1116+
self.object.metrics.num_cache_entries = 0
1117+
1118+
self.object.process(deepcopy(event))
1119+
self.object.process(deepcopy(event))
1120+
self.object.process(event)
1121+
# because the event is the same, the result is cached
1122+
# metrics are mocked by integers and incremented by cache_info results
1123+
assert self.object.metrics.new_results == 3
1124+
assert self.object.metrics.cached_results == 3
1125+
assert self.object.metrics.num_cache_entries == 3

0 commit comments

Comments
 (0)