Skip to content

Commit

Permalink
Fix regex logic
Browse files Browse the repository at this point in the history
  • Loading branch information
patrickgold committed Feb 23, 2021
1 parent db4a468 commit 247aa37
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 6 deletions.
6 changes: 3 additions & 3 deletions clb.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@
import gzip
import msgpack
import io
import re
import regex
import time

STR_FAIL_REGEX = "[.,]+"
STR_VALIDATE_REGEX = r"^((\p{L}\p{M}*)|\'|\-)+$"

def __validate_str(str):
return re.search(STR_FAIL_REGEX, str) == None and len(str) != 0
return regex.search(STR_VALIDATE_REGEX, str) != None and len(str) != 0

def __freq_for_index(index, len_list):
"""
Expand Down
6 changes: 3 additions & 3 deletions flict.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,9 +185,9 @@ def insertNgram(self, ngram: Ngram):


def clb_to_flict(src_path: str, dst_path: str):
word_regex = regex.compile(r"^\sword=(\p{L}\p{M}*)+,f=[0-9]+$")
bigram_regex = regex.compile(r"^\s\sbigram=(\p{L}\p{M}*)+,f=[0-9]+$")
trigram_rgeex = regex.compile(r"^\s\s\strigram=(\p{L}\p{M}*)+,f=[0-9]+$")
word_regex = regex.compile(r"^\sword=((\p{L}\p{M}*)|\'|\-)+,f=[0-9]+$")
bigram_regex = regex.compile(r"^\s\sbigram=((\p{L}\p{M}*)|\'|\-)+,f=[0-9]+$")
trigram_rgeex = regex.compile(r"^\s\s\strigram=((\p{L}\p{M}*)|\'|\-)+,f=[0-9]+$")
try:
ptree = FlictRootNode()
with io.open(src_path, "r", encoding="utf-8") as f_src:
Expand Down

0 comments on commit 247aa37

Please sign in to comment.