-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
62 lines (51 loc) · 1.66 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
__author__ = 'cltanuki'
import sys
import codecs
import re
from os import listdir
from os.path import isfile, join, split
import xlsxwriter
if hasattr(sys, 'frozen'):
basis = sys.executable
else:
basis = sys.argv[0]
base_folder = split(basis)[0]
type_path = join(base_folder, "types")
input_path = join(base_folder, "input")
type_files = [f for f in listdir(type_path) if isfile(join(type_path, f))]
input_files = [f for f in listdir(input_path) if isfile(join(input_path, f))]
type_dict = {}
input_dict = {}
counter_dict = {}
for file in type_files:
type_dict[file] = [x.rstrip().lower() for x in codecs.open(join(type_path, file), 'r', encoding='cp1251').readlines()]
for file in input_files:
data = codecs.open(join(input_path, file), 'r', encoding='cp1251').read()
input_dict[file] = [i.lower() for i in re.sub("[^\w]", " ", data).split()]
counter_dict[file] = 0
for i in input_dict:
word_list = input_dict.get(i)
cat_data = {}
for k, v in type_dict.items():
word_data = {}
type_list = v
for word in word_list:
if word in type_list:
word_data[word] = word_data.get(word, 0) + 1
cat_data[k] = word_data
counter_dict[i] = cat_data
workbook = xlsxwriter.Workbook('result.xlsx')
for k, v in counter_dict.items():
worksheet = workbook.add_worksheet(k)
col = 0
for i, t in v.items():
s_col = col + 1
line = 0
worksheet.write(line, col, i)
for a, z in t.items():
line += 1
print(col, s_col, line, a, z)
worksheet.write(line, col, a)
worksheet.write(line, s_col, z)
col += 2
workbook.close()