Skip to content

Commit

Permalink
Replace deprecated levenshtein method in NEL benchmark
Browse files Browse the repository at this point in the history
  • Loading branch information
adrianeboyd committed May 2, 2023
1 parent 3a1cf48 commit 18a7784
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 2 deletions.
1 change: 1 addition & 0 deletions benchmarks/nel/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ tqdm
prettytable
scikit-learn
fuzzyset2
rapidfuzz>=2.0.0
spacyfishing
virtualenv
pysqlite3-binary
4 changes: 2 additions & 2 deletions benchmarks/nel/scripts/candidate_generation/embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from spacy.tokens import Span
from .base import NearestNeighborCandidateSelector
from compat import KnowledgeBase
from rapidfuzz.string_metric import normalized_levenshtein
from rapidfuzz.distance.Levenshtein import normalized_similarity


class EmbeddingCandidateSelector(NearestNeighborCandidateSelector):
Expand Down Expand Up @@ -39,7 +39,7 @@ def _fetch_candidates(
candidate_entity_ids: Set[str] = set()
for nne in nn_entities:
for name in nn_entities[nne].aliases:
if normalized_levenshtein(name.lower(), span.text.lower()) / 100 >= lexical_similarity_cutoff:
if normalized_similarity(name.lower(), span.text.lower()) / 100 >= lexical_similarity_cutoff:
candidate_entity_ids.add(nne)
break

Expand Down

0 comments on commit 18a7784

Please sign in to comment.