Replace deprecated levenshtein method in NEL benchmark

explosion · May 2, 2023 · 18a7784 · 18a7784
1 parent 3a1cf48
commit 18a7784
Show file tree

Hide file tree

Showing 2 changed files with 3 additions and 2 deletions.
diff --git a/benchmarks/nel/requirements.txt b/benchmarks/nel/requirements.txt
@@ -3,6 +3,7 @@ tqdm
 prettytable
 scikit-learn
 fuzzyset2
+rapidfuzz>=2.0.0
 spacyfishing
 virtualenv
 pysqlite3-binary
diff --git a/benchmarks/nel/scripts/candidate_generation/embeddings.py b/benchmarks/nel/scripts/candidate_generation/embeddings.py
@@ -7,7 +7,7 @@
 from spacy.tokens import Span
 from .base import NearestNeighborCandidateSelector
 from compat import KnowledgeBase
-from rapidfuzz.string_metric import normalized_levenshtein
+from rapidfuzz.distance.Levenshtein import normalized_similarity
 
 
 class EmbeddingCandidateSelector(NearestNeighborCandidateSelector):
@@ -39,7 +39,7 @@ def _fetch_candidates(
         candidate_entity_ids: Set[str] = set()
         for nne in nn_entities:
             for name in nn_entities[nne].aliases:
-                if normalized_levenshtein(name.lower(), span.text.lower()) / 100 >= lexical_similarity_cutoff:
+                if normalized_similarity(name.lower(), span.text.lower()) / 100 >= lexical_similarity_cutoff:
                     candidate_entity_ids.add(nne)
                     break