-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathUseful_functions.py
executable file
·129 lines (116 loc) · 4.23 KB
/
Useful_functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# -*- coding: utf-8 -*-
import codecs
import os
import json
import re
def open_file(filename):
"""
Функция возвращает файл, прочитанный в кодировке utf-8
:param filename: имя файла с расширением
:return: строка, содержащая полный исходный текст
"""
fh = codecs.open(filename, 'r', encoding='utf-8')
fh = fh.read()
fh = fh.replace("^", " ")
fh = fh.replace("\\", " ")
fh = fh.replace("&", " ")
fh += '\n.'
return fh
# ----------- УКАЗАТЬ В ПЕРЕМЕННОЙ path ПУТЬ К СЛОВАРЯМ -----------
def open_dictionary(filename):
"""
Функция возвращает импортированный словарь, прочитанный в кодировке utf-8
:param filename: имя файла с расширением
:return: список строк словаря
"""
cwd = os.getcwd()
path = "./dictionaries"
if 'testset' in cwd:
path = "../dictionaries"
os.chdir(path)
#print(os.getcwd())
fh = codecs.open(filename + ".txt", 'r', encoding='utf-8')
imported_dictionary = []
for line in fh:
line = line.strip()
imported_dictionary.append(line)
imported_dictionary.sort()
os.chdir(cwd)
return imported_dictionary
def write_in_file(text, filename, separator=' ', path="./raw"):
f = open(filename + '.txt', 'w', encoding='utf-8')
cwd = os.getcwd()
os.chdir(path)
for token in text:
array = []
for key in token:
array.append(str(token[key]))
f.write(separator.join(array))
f.write('\n')
f.close()
os.chdir(cwd)
# print('-----------------------------------')
# print('Content is in file: ' + filename + '.txt.')
# print('-----------------------------------')
# def write_as_json(text, filename, path='/Users/ulyanasidorova/Documents/UPD — копия 2'):
# pwd = os.getcwd()
# os.chdir(path)
# f = open('j_' + filename + ".json", 'w', encoding='utf-8')
# text = json.dumps(text)
# f.write(text)
# # print("===================")
# # print("Content is in file: " + filename + ".json.")
# # print("===================")
# os.chdir(pwd)
# return True
def write_as_object(text, filename, path):
pwd = os.getcwd()
os.chdir(path)
f = open(filename, 'w', encoding='utf-8')
for line in text:
f.write(line + '\n')
print("===================")
print("Извлеченные сущности находятся в файле: " + filename)
print("Файл находится в папке: " + path + ".")
print("===================")
os.chdir(pwd)
return True
def open_json(filename, path):
pwd = os.getcwd()
os.chdir(path)
with open('j_' + filename + '.json', 'r') as f:
text = json.load(f)
os.chdir(pwd)
return text
#
# def write_in_file_result(result, filename,separator=' '):
# f = open(filename + '.txt', 'w', encoding = 'utf-8')
# array = []
# for line in result:
# x = []
# if 'PERSON' in line or 'ORG_I' in line or 'ORG_B' in line or 'LOC' in line:
# x.append(line[1], str(line[0]), str(line[2]))
# array.append(x)
# for item in array:
# f.write(separator.join(array))
# f.write('\n')
# f.close()
# # print ('-----------------------------------')
# # print('Content is in file: ' + filename + '.txt.')
# # print ('-----------------------------------')
#
#
# def write_in_file_result(result, filename,separator=' '):
# f = open(filename + '.objects.txt', 'w', encoding = 'utf-8')
# x = []
# for line in result:
# if 'PERSON' or 'ORG_I' in line or 'ORG_B' in line or 'LOC' in line:
# x.append(line)
# for item in x:
# item = str(item)
# f.write(separator.join(item))
# f.write('\n')
# f.close()
# # print ('-----------------------------------')
# # print('Content is in file: ' + filename + '.objects.txt.')
# # print ('-----------------------------------')