-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconvert.py
40 lines (28 loc) · 1.16 KB
/
convert.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import os
output = "r_vocab.txt"
if os.path.exists(output):
os.remove(output)
# { "japanese": "こんにちは", "english": "Hello" },
with open(output, "w", encoding="utf-8") as o:
with open("vocab.txt", "r", encoding="utf-8") as f :
for line in f.readlines():
if line == "\n":
continue
# table header
if line.startswith("Kanji"):
continue
# table name
if len(line) <=2 :
continue
# remove \n
line = line[:-1]
lines = line.split("\t")
print(lines)
# two cases with kanji and without
# end result: { "japanese": "こんにちは", "english": "Hello" },
if len(lines) > 3:
new_line = f"{{\"kanji\": \"{lines[0]}\", \"japanese\": \"{lines[1]}\", \"romaji\": \"{lines[2]}\", \"english\": \"{lines[3]}\"}},\n"
else:
new_line = f"{{\"japanese\": \"{lines[0]}\", \"romaji\": \"{lines[1]}\", \"english\": \"{lines[2]}\"}},\n"
o.write(new_line)
print(new_line)