Skip to content

Commit

Permalink
Keywords - temporary solution for list of stopwords
Browse files Browse the repository at this point in the history
  • Loading branch information
PrimozGodec committed Aug 31, 2023
1 parent 3cee57f commit 00240df
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 3 deletions.
6 changes: 4 additions & 2 deletions orangecontrib/text/keywords/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,15 @@
from orangecontrib.text import Corpus
from orangecontrib.text.keywords.mbert import mbert_keywords
from orangecontrib.text.keywords.rake import Rake
from orangecontrib.text.language import ISO2LANG
from orangecontrib.text.preprocess import StopwordsFilter

# all available languages for RAKE
from orangecontrib.text.vectorization import BowVectorizer

# todo
RAKE_LANGUAGES = StopwordsFilter.supported_languages()
# todo: this is a temporary solution since supported_languages now returns
# languages as ISO codes - refactor with keywords language refactoring
RAKE_LANGUAGES = [ISO2LANG[la] for la in StopwordsFilter.supported_languages()]
# all available languages for YAKE!
YAKE_LANGUAGE_MAPPING = {
"Arabic": "ar",
Expand Down
2 changes: 1 addition & 1 deletion orangecontrib/text/widgets/tests/test_owkeywords.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def test_run_with_words(self):
self.assertEqual(len(results.scores), 42)

def test_run_normalize_words(self):
normalizer = LemmagenLemmatizer()
normalizer = LemmagenLemmatizer(language="en")
corpus = normalizer(self.corpus)

words = ["minor", "tree"]
Expand Down

0 comments on commit 00240df

Please sign in to comment.