Skip to content

Commit

Permalink
New version of spacy requires a bigger model (sm -> md)
Browse files Browse the repository at this point in the history
  • Loading branch information
rafelafrance committed Oct 3, 2024
1 parent 2877b3e commit 4030ce3
Show file tree
Hide file tree
Showing 4 changed files with 4 additions and 5 deletions.
3 changes: 1 addition & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ VENV=.venv
PY_VER=python3.11
PYTHON=./$(VENV)/bin/$(PY_VER)
PIP_INSTALL=$(PYTHON) -m pip install
SPACY_MODEL=$(PYTHON) -m spacy download en_core_web_sm
SPACY_MODEL=$(PYTHON) -m spacy download en_core_web_md

test: activate
$(PYTHON) -m unittest discover
Expand All @@ -19,7 +19,6 @@ dev: venv activate base
$(PIP_INSTALL) -e ../../misc/common_utils
$(PIP_INSTALL) -e .[dev]
$(SPACY_MODEL)
pre-commit install

activate:
. $(VENV)/bin/activate
Expand Down
2 changes: 1 addition & 1 deletion tests/rules/test_sentence.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def test_sentencizer_04(self):
def test_sentencizer_05(self):
text = shorten(
"""
Plants perennial (rhizomatous), usually glabrous, sometimes sparsely hairy.
Plants perennial (rhizomatous), usually glabrous, sometimes sparsely hairy.
Stems [10–]30–70[–100] cm. Leaves: stipules lanceolate to oblong.
""",
)
Expand Down
2 changes: 1 addition & 1 deletion traiter/pylib/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
def build():
extensions.add_extensions()

nlp = spacy.load("en_core_web_sm", exclude=["ner", "lemmatizer", "tok2vec"])
nlp = spacy.load("en_core_web_md", exclude=["ner", "lemmatizer", "tok2vec"])

tokenizer.setup_tokenizer(nlp)

Expand Down
2 changes: 1 addition & 1 deletion traiter/pylib/pipes/sentence.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

@Language.factory(SENTENCES)
class Sentence:
def __init__(self, nlp: Language, name: str, base_model: str = "en_core_web_sm"):
def __init__(self, nlp: Language, name: str, base_model: str = "en_core_web_md"):
self.nlp = nlp
self.name = name

Expand Down

0 comments on commit 4030ce3

Please sign in to comment.