Skip to content
This repository was archived by the owner on Sep 11, 2024. It is now read-only.

Commit b334fd8

Browse files
committed
sensitive queries must respect word boundaries
1 parent 57a62b5 commit b334fd8

File tree

2 files changed

+5
-2
lines changed

2 files changed

+5
-2
lines changed

src/cpr_data_access/utils.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import csv
2+
import re
23
from pathlib import Path
34
from typing import Any, Union
45

@@ -21,7 +22,9 @@ def is_sensitive_query(text: str, sensitive_terms: set) -> bool:
2122
2223
"""
2324
sensitive_terms_in_query = [
24-
term for term in sensitive_terms if term in text.lower().split()
25+
term
26+
for term in sensitive_terms
27+
if re.findall(r"\b" + re.escape(term) + r"\b", text.lower())
2528
]
2629

2730
if sensitive_terms_in_query:

tests/test_utils.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
[False, "word but outnumbered"],
2323
[False, "word another phrase example but with many other items"],
2424
[True, "word"],
25-
[True, "wordle"],
25+
[False, "wordle"],
2626
[True, "test term"],
2727
[True, "test term word"],
2828
[True, "test term and"],

0 commit comments

Comments
 (0)