From 6cd35a14621449facb94b94f1b21c2f69b1dc0ce Mon Sep 17 00:00:00 2001
From: PrimozGodec
Date: Thu, 1 Jun 2023 08:40:27 +0200
Subject: [PATCH] Keywords - temporary solution for list of stopwords
---
orangecontrib/text/keywords/__init__.py | 6 ++++--
orangecontrib/text/widgets/tests/test_owkeywords.py | 2 +-
2 files changed, 5 insertions(+), 3 deletions(-)
diff --git a/orangecontrib/text/keywords/__init__.py b/orangecontrib/text/keywords/__init__.py
index bdfb44bdd..8dc84f6c4 100644
--- a/orangecontrib/text/keywords/__init__.py
+++ b/orangecontrib/text/keywords/__init__.py
@@ -15,13 +15,15 @@
from orangecontrib.text import Corpus
from orangecontrib.text.keywords.mbert import mbert_keywords
from orangecontrib.text.keywords.rake import Rake
+from orangecontrib.text.language import ISO2LANG
from orangecontrib.text.preprocess import StopwordsFilter
# all available languages for RAKE
from orangecontrib.text.vectorization import BowVectorizer
-# todo
-RAKE_LANGUAGES = StopwordsFilter.supported_languages()
+# todo: this is a temporary solution since supported_languages now returns
+# languages as ISO codes - refactor with keywords language refactoring
+RAKE_LANGUAGES = [ISO2LANG[la] for la in StopwordsFilter.supported_languages()]
# all available languages for YAKE!
YAKE_LANGUAGE_MAPPING = {
"Arabic": "ar",
diff --git a/orangecontrib/text/widgets/tests/test_owkeywords.py b/orangecontrib/text/widgets/tests/test_owkeywords.py
index 200e77246..18e4deb63 100644
--- a/orangecontrib/text/widgets/tests/test_owkeywords.py
+++ b/orangecontrib/text/widgets/tests/test_owkeywords.py
@@ -83,7 +83,7 @@ def test_run_with_words(self):
self.assertEqual(len(results.scores), 42)
def test_run_normalize_words(self):
- normalizer = LemmagenLemmatizer()
+ normalizer = LemmagenLemmatizer(language="en")
corpus = normalizer(self.corpus)
words = ["minor", "tree"]