From 97b60453b936eed61ba8b6c2bcc597e6aa154b9e Mon Sep 17 00:00:00 2001 From: sai-prasanna Date: Sat, 25 Jan 2020 22:03:25 +0530 Subject: [PATCH 1/3] Fix a bug in loading symspell dictionary. --- lmproof/candidate_generators.py | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lmproof/candidate_generators.py b/lmproof/candidate_generators.py index ef529c5..e4ef20d 100644 --- a/lmproof/candidate_generators.py +++ b/lmproof/candidate_generators.py @@ -95,7 +95,7 @@ def load(cls, language: str) -> "SpellCorrectGenerator": / "resources" / "frequency_dictionary_en_82_765.txt" ) - sym_spell.create_dictionary(str(dict_path)) + symspell.load_dictionary(str(dict_path), term_index=0, count_index=1) spacy_model = spacy.load("en_core_web_sm", disable=["parser", "ner"]) else: raise RuntimeError(f"The language {language} is currently not language.") diff --git a/pyproject.toml b/pyproject.toml index ce9d0a6..5dd5ae4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "lmproof" -version = "0.2.0" +version = "0.2.1a" description = "Language model powered proof reader for correcting contextual errors in natural language." authors = ["sai-prasanna "] readme = "README.md" From 9df09ad4a05b9559e326d3ac3939041a93953871 Mon Sep 17 00:00:00 2001 From: P R Gurunath Date: Sun, 26 Jan 2020 17:32:59 +0530 Subject: [PATCH 2/3] Fix out of index error in candidate_generator When the length is exactly equal to 1, there is indexing error while creating span (in tokenized_sentence[1]) --- lmproof/candidate_generators.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lmproof/candidate_generators.py b/lmproof/candidate_generators.py index e4ef20d..9962242 100644 --- a/lmproof/candidate_generators.py +++ b/lmproof/candidate_generators.py @@ -127,7 +127,7 @@ def _edits( elif replaced_token.is_upper: substitute = substitute.upper() - if token_idx == 0 and substitute == "" and len(tokenized_sentence) >= 1: + if token_idx == 0 and substitute == "" and len(tokenized_sentence) > 1: candidate = Edit( Span(replaced_token.idx, tokenized_sentence[1].idx + 1), tokenized_sentence[1].text[0].upper(), From dcd1555507c8cbec4efbd82536377cff4e13b961 Mon Sep 17 00:00:00 2001 From: P R Gurunath Date: Mon, 27 Jan 2020 12:50:45 +0530 Subject: [PATCH 3/3] Fix typo --- lmproof/candidate_generators.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lmproof/candidate_generators.py b/lmproof/candidate_generators.py index 9962242..544d43d 100644 --- a/lmproof/candidate_generators.py +++ b/lmproof/candidate_generators.py @@ -95,7 +95,7 @@ def load(cls, language: str) -> "SpellCorrectGenerator": / "resources" / "frequency_dictionary_en_82_765.txt" ) - symspell.load_dictionary(str(dict_path), term_index=0, count_index=1) + sym_spell.load_dictionary(str(dict_path), term_index=0, count_index=1) spacy_model = spacy.load("en_core_web_sm", disable=["parser", "ner"]) else: raise RuntimeError(f"The language {language} is currently not language.")