forked from mehedihasanbijoy/DPCSpell
-
Notifications
You must be signed in to change notification settings - Fork 0
/
process.py
79 lines (65 loc) · 1.77 KB
/
process.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import pandas as pd
from utils import word2char
from tqdm import tqdm
def check_from_left(word, error):
left = []
for i in range(len(error)):
if error[i] == word[i]:
left.append(0)
else:
left.append(1)
return left
def check_from_right(word, error):
word.reverse()
error.reverse()
right = []
for i in range(len(error)):
if error[i] == word[i]:
right.append(0)
else:
right.append(1)
right.reverse()
return right
def check_from_both(word, error):
length = len(error)
if length % 2 == 0:
iterator = length // 2
else:
iterator = (length // 2) + 1
x = -1
left = []
right = []
for i in range(iterator):
if error[i] == word[i]:
left.append(0)
else:
left.append(1)
if error[x] == word[x]:
right.append(0)
else:
right.append(1)
x -= 1
right.reverse()
both = [*left, *right]
return both
if __name__ == '__main__':
path = './Dataset/sec_dataset_III_v3.csv'
df = pd.read_csv('./Dataset/sec_dataset_III_v3.csv')
df_copy = df.copy()
df['Word'] = df['Word'].apply(word2char)
df['Error'] = df['Error'].apply(word2char)
for idx in tqdm(range(len(df))):
word = df.iloc[idx, 0].split()
error = df.iloc[idx, 1].split()
word = ['ব', 'া', 'ং', 'ল', 'া']
error = ['ব', 'ং', 'ল', 'া']
print(len(word), len(error))
print(f'{word}\n{error}')
# checking from left
left = check_from_left(word, error)
print(left)
right = check_from_right(word, error)
print(right)
both = check_from_both(word, error)
print(both)
break