diff --git a/orangecontrib/text/corpus.py b/orangecontrib/text/corpus.py index b0660a830..8de6b753e 100644 --- a/orangecontrib/text/corpus.py +++ b/orangecontrib/text/corpus.py @@ -1,6 +1,6 @@ import os from collections import Counter, defaultdict -from copy import copy, deepcopy +from copy import copy from numbers import Integral from itertools import chain from typing import Union, Optional, List, Tuple, Dict @@ -19,15 +19,6 @@ ) from Orange.preprocess.transformation import Identity from Orange.data.util import get_unique_names - -# Gensim is 4.3.2 is incompatible with scipy 1.3, where they removed triu/ -# thus hack what it is missing here it. -# Remove this section after we depend on newer gensim -import scipy.linalg -if "triu" not in scipy.linalg.__dict__: - scipy.linalg.triu = np.triu - -from gensim import corpora from orangewidget.utils.signals import summarize, PartialSummary import scipy.sparse as sp diff --git a/orangecontrib/text/misc/nltk_data_download.py b/orangecontrib/text/misc/nltk_data_download.py index 781c5b8e9..6cf49ec24 100644 --- a/orangecontrib/text/misc/nltk_data_download.py +++ b/orangecontrib/text/misc/nltk_data_download.py @@ -22,8 +22,8 @@ 'punkt', 'opinion_lexicon', 'vader_lexicon', - 'averaged_perceptron_tagger', - 'maxent_treebank_pos_tagger', + 'averaged_perceptron_tagger_eng', + 'maxent_treebank_pos_tagger_tab', 'omw-1.4', ] diff --git a/orangecontrib/text/tests/test_corpus.py b/orangecontrib/text/tests/test_corpus.py index aa20b5f4f..02c56f2ec 100644 --- a/orangecontrib/text/tests/test_corpus.py +++ b/orangecontrib/text/tests/test_corpus.py @@ -1,7 +1,6 @@ import os import pickle import unittest -from datetime import datetime import numpy as np from numpy.testing import assert_array_equal @@ -16,7 +15,6 @@ from orangewidget.utils.signals import summarize from scipy.sparse import csr_matrix, issparse -import orangecontrib from orangecontrib.text import preprocess from orangecontrib.text.corpus import Corpus from orangecontrib.text.preprocess import ( @@ -27,18 +25,6 @@ from orangecontrib.text.tag import AveragedPerceptronTagger -class ImportHack(unittest.TestCase): - - def test_perhaps_remove_gensim_hack(self): - now = datetime.now() - if (now.year, now.month) >= (2024, 7): - self.fail( - "Check if gensim newer than 4.3.2 is available; if so, add it " - "to requirements, remove the scipy monkey-patch in corpus.py " - "and this test." - ) - - class CorpusTests(unittest.TestCase): def setUp(self): self.pos_tagger = AveragedPerceptronTagger() diff --git a/requirements.txt b/requirements.txt index 36c489f1d..e6fadc0b6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,11 +3,11 @@ beautifulsoup4 biopython # Enables Pubmed widget. conllu docx2txt>=0.6 -gensim>=4.3.0,!=4.3.1 # gensim 4.3.1 is build on numpy 1.24, causing error on older numpys +gensim>=4.3.3 httpx!=0.23.1 # temporary fix - semantic search fail (but only in tests) langdetect lemmagen3 -nltk>=3.0.5 # TweetTokenizer introduced in 3.0.5 +nltk>=3.9.1 numpy odfpy>=1.3.5 Orange3 >=3.35.0 diff --git a/tox.ini b/tox.ini index 1dc69c880..7758b2842 100644 --- a/tox.ini +++ b/tox.ini @@ -29,6 +29,7 @@ deps = oldest: orange-canvas-core==0.1.30 oldest: orange-widget-base==4.20.0 oldest: pandas==1.4.0 + oldest: nltk==3.9.1 latest: https://github.com/biolab/orange3/archive/refs/heads/master.zip#egg=orange3 latest: https://github.com/biolab/orange3-network/archive/refs/heads/master.zip#egg=orange3-network latest: https://github.com/biolab/orange-canvas-core/archive/refs/heads/master.zip#egg=orange-canvas-core