Skip to content
This repository has been archived by the owner on Sep 11, 2024. It is now read-only.

Commit

Permalink
Merge pull request #167 from climatepolicyradar/fix/sensitive-queries…
Browse files Browse the repository at this point in the history
…-word-boundaries

Fix/sensitive queries word boundaries
  • Loading branch information
kdutia authored Apr 2, 2024
2 parents aa80fde + b334fd8 commit ce74ebb
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 2 deletions.
5 changes: 4 additions & 1 deletion src/cpr_data_access/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import csv
import re
from pathlib import Path
from typing import Any, Union

Expand All @@ -21,7 +22,9 @@ def is_sensitive_query(text: str, sensitive_terms: set) -> bool:
"""
sensitive_terms_in_query = [
term for term in sensitive_terms if term in text.lower()
term
for term in sensitive_terms
if re.findall(r"\b" + re.escape(term) + r"\b", text.lower())
]

if sensitive_terms_in_query:
Expand Down
2 changes: 1 addition & 1 deletion tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
[False, "word but outnumbered"],
[False, "word another phrase example but with many other items"],
[True, "word"],
[True, "wordle"],
[False, "wordle"],
[True, "test term"],
[True, "test term word"],
[True, "test term and"],
Expand Down

0 comments on commit ce74ebb

Please sign in to comment.