Skip to content

Commit d7ae84f

Browse files
spelling is removed as it created more problems than the problems it solved. another spelling module will be implemented in the future
1 parent 08f2f28 commit d7ae84f

File tree

1 file changed

+6
-22
lines changed

1 file changed

+6
-22
lines changed

vnlp/normalizer/normalizer.py

Lines changed: 6 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,10 @@
11
from typing import List
22
from pathlib import Path
3-
import os
4-
5-
import jamspell
63

74
from ._deasciifier import Deasciifier
85
from ..stemmer_morph_analyzer import StemmerAnalyzer
9-
from ..utils import check_and_download
106

117
RESOURCES_PATH = str(Path(__file__).parent.parent / "resources")
12-
SPELL_CORRECTION_MODEL_PATH = RESOURCES_PATH + "/spell_correction_model.bin"
13-
SPELL_CORRECTION_MODEL_LINK = "https://vnlp-model-weights.s3.eu-west-1.amazonaws.com/spell_correction_model.bin"
14-
158

169
class Normalizer:
1710
"""
@@ -181,21 +174,12 @@ def correct_typos(self, text: str) -> str:
181174
182175
>> 'kasıtlı yazım hatası ekliyorum'
183176
"""
184-
# Lazy load the model.
185-
if not hasattr(self, "corrector") or self.corrector is None:
186-
corrector = jamspell.TSpellCorrector()
187-
if not os.path.isfile(SPELL_CORRECTION_MODEL_PATH):
188-
check_and_download(
189-
SPELL_CORRECTION_MODEL_PATH, SPELL_CORRECTION_MODEL_LINK
190-
)
191-
loaded = corrector.LoadLangModel(SPELL_CORRECTION_MODEL_PATH)
192-
if not loaded:
193-
raise FileNotFoundError(
194-
f"Spell correction model could not be loaded in '{SPELL_CORRECTION_MODEL_PATH}'. Please check the file path."
195-
)
196-
self.corrector = corrector
197-
198-
return self.corrector.FixFragment(text)
177+
# 27.11.24: spelling is removed for now as the dependencies we relied on
178+
# for spelling e.g. Jamspell and swig
179+
# created more trouble than the problem they solved.
180+
# We will implement a better solution in the future.
181+
pass
182+
199183

200184
def convert_numbers_to_words(
201185
self,

0 commit comments

Comments
 (0)