-
Notifications
You must be signed in to change notification settings - Fork 0
/
flow3_lazy.py
291 lines (258 loc) · 10.7 KB
/
flow3_lazy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
import os
from tqdm.contrib.concurrent import process_map
import cv2
import numpy as np
from utils import *
from lxml import etree
# PATH_TO_IMAGE_FOLDER = r'C:\Users\starc\PycharmProjects\TableBank-Recognition-Processing\images'
# PATH_TO_ORIGINAL_ANNOTATIONS = r'C:\Users\starc\PycharmProjects\TableBank-Recognition-Processing\original_annotations'
# PATH_TO_DESTINATION_ANNOTATIONS = r'C:\Users\starc\PycharmProjects\TableBank-Recognition-Processing\annotations'
PATH_TO_IMAGE_FOLDER = r'E:\TableBank-Recognition\Recognition\images'
PATH_TO_ORIGINAL_ANNOTATIONS = r'E:\TableBank-Recognition\Recognition\annotations_original'
PATH_TO_DESTINATION_ANNOTATIONS = r'E:\TableBank-Recognition\Recognition\flow3'
def trim_bbox(img, bbox):
top = bbox[0]
left = bbox[1]
bottom = bbox[2]
right = bbox[3]
if len(np.unique(img[top: bottom + 1, left: right + 1])) == 1:
return None # Empty cells. No need to trim. Just return None
# Clear any undetected white border lines
while top < bottom:
if len(np.unique(img[top, left: right + 1])) == 1 and np.unique(img[top, left: right + 1])[0] == 255:
top += 1
else:
break
while bottom > top:
if len(np.unique(img[bottom, left: right + 1])) == 1 and np.unique(img[bottom, left: right + 1])[0] == 255:
bottom -= 1
else:
break
while left < right:
if len(np.unique(img[top: bottom + 1, left])) == 1 and np.unique(img[top: bottom + 1, left])[0] == 255:
left += 1
else:
break
while right > left:
if len(np.unique(img[top: bottom + 1, right])) == 1 and np.unique(img[top: bottom + 1, right])[0] == 255:
right -= 1
else:
break
# Trim bbox
while top < bottom:
if len(np.unique(img[top, left: right + 1])) == 1 and np.unique(img[top, left: right + 1])[0] == 0:
top += 1
else:
break
while bottom > top:
if len(np.unique(img[bottom, left: right + 1])) == 1 and np.unique(img[bottom, left: right + 1])[0] == 0:
bottom -= 1
else:
break
while left < right:
if len(np.unique(img[top: bottom + 1, left])) == 1 and np.unique(img[top: bottom + 1, left])[0] == 0:
left += 1
else:
break
while right > left:
if len(np.unique(img[top: bottom + 1, right])) == 1 and np.unique(img[top: bottom + 1, right])[0] == 0:
right -= 1
else:
break
return [top, left, bottom, right]
def flow3(file):
file = get_file_name(file)
# Setup paths
image_path = os.path.join(PATH_TO_IMAGE_FOLDER, file + '.png')
original_annotation_xml = os.path.join(PATH_TO_ORIGINAL_ANNOTATIONS, file + '.txt')
destination_annotation_xml = os.path.join(PATH_TO_DESTINATION_ANNOTATIONS, file + '.xml')
xml = open(original_annotation_xml).read()
# Skip advanced table for later
if advanced_table_check(xml):
return
# Read original annotations
total_cells, total_cells_non_empty, rows_ann = count_cells(original_annotation_xml)
max_columns = -1
max_visible_columns = -1
for row in rows_ann:
if len(row) > max_columns:
max_columns = len(row)
if row.count('tdy') > max_visible_columns:
max_visible_columns = row.count('tdy')
# Read and threshold image
img = cv2.imread(image_path, 0) # Read image as grayscale
# _, binary = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
_, binary = cv2.threshold(img, 192, 255, cv2.THRESH_BINARY)
# Invert the image
inverted_bin_img = 255 - binary
draw_border(inverted_bin_img)
# Set kernel size for erosion/dilation.
horizontal_kernel_length = img.shape[1] // max_columns // 4
vertical_kernel_length = img.shape[0] // len(rows_ann) // 4 * 3
if horizontal_kernel_length < 1 or vertical_kernel_length < 1:
return
# Horizontal kernel for detecting horizontal lines
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (horizontal_kernel_length, 1))
# Vertical kernel for detecting vertical lines
vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, vertical_kernel_length))
# Detect vertical lines
image_1 = cv2.erode(inverted_bin_img, vertical_kernel, iterations=3)
vertical_lines = cv2.dilate(image_1, vertical_kernel, iterations=3)
# Detect horizontal lines
image_2 = cv2.erode(inverted_bin_img, horizontal_kernel, iterations=3)
horizontal_lines = cv2.dilate(image_2, horizontal_kernel, iterations=3)
rows_coordinates = []
rows_vertical_projection = [False] * len(horizontal_lines) # False means black, True means white
for i in range(len(horizontal_lines)):
if np.unique(horizontal_lines[i])[0] == 255:
rows_vertical_projection[i] = True
row_idx = 0
while row_idx < len(horizontal_lines):
quit = False
while rows_vertical_projection[row_idx] == True:
row_idx += 1
if row_idx == len(horizontal_lines) - 1:
quit = True
break
if quit:
break
row_start = row_idx
while rows_vertical_projection[row_idx] == False:
row_idx += 1
row_end = row_idx - 1
rows_coordinates.append([row_start, row_end])
if row_idx >= len(horizontal_lines) - 1:
break
rows_coordinates = [row_coordinates for row_coordinates in rows_coordinates if
row_coordinates[1] - row_coordinates[0] > 4]
cols_coordinates = []
cols_horizontal_projection = [False] * len(vertical_lines[0]) # False means black, True means white
for i in range(len(vertical_lines[0])):
if np.unique(vertical_lines[:, i])[0] == 255:
cols_horizontal_projection[i] = True
col_idx = 0
while col_idx < len(vertical_lines[0]):
quit = False
while cols_horizontal_projection[col_idx] == True:
col_idx += 1
if col_idx == len(vertical_lines[0]) - 1:
quit = True
break
if quit:
break
col_start = col_idx
while cols_horizontal_projection[col_idx] == False:
col_idx += 1
col_end = col_idx - 1
cols_coordinates.append([col_start, col_end])
if col_idx >= len(vertical_lines[0]) - 1:
break
cols_coordinates = [col_coordinates for col_coordinates in cols_coordinates if
col_coordinates[1] - col_coordinates[0] > 4]
cells = []
for row_idx, row in enumerate(rows_coordinates):
row_temp = []
for col in cols_coordinates:
row_temp.append([row[0], col[0], row[1], col[1]])
cells.append(row_temp)
new_cells = []
for i in range(len(cells)):
new_row = []
for j in range(len(cells[i])):
new_bbox = trim_bbox(inverted_bin_img, cells[i][j])
if new_bbox is not None:
new_row.append(new_bbox)
new_cells.append(new_row)
# Check if number of rows/columns in annotation file and in detected rows/columns match
matched = True
if len(rows_ann) == len(new_cells): # Check if numbers of rows are equal
for row in range(len(rows_ann)):
# Count visible cell in current row of annotation
visible_col_count = 0
for cell in rows_ann[row]:
if cell == 'tdy':
visible_col_count += 1
if not (visible_col_count == len(new_cells[row]) or len(new_cells[row]) == len(rows_ann[row])):
matched = False
break
else:
matched = False
if matched:
# Write to XML
root = etree.Element('annotation')
# folder tag
folder = etree.SubElement(root, 'folder')
folder.text = 'images'
# filename tag
filename = etree.SubElement(root, 'filename')
filename.text = file
# path tag
path = etree.SubElement(root, 'path')
path.text = PATH_TO_IMAGE_FOLDER
# source tag
source = etree.SubElement(root, 'source')
# database tag
database = etree.SubElement(source, 'database')
database.text = 'Unknown'
# size tag
size = etree.SubElement(root, 'size')
_image = cv2.imread(image_path)
width = etree.SubElement(size, 'width')
width.text = str(_image.shape[1])
height = etree.SubElement(size, 'height')
height.text = str(_image.shape[0])
depth = etree.SubElement(size, 'depth')
depth.text = str(_image.shape[2])
# segmented tag
segmented = etree.SubElement(root, 'segmented')
segmented.text = str(0)
for row in new_cells:
for cell in row:
object = etree.SubElement(root, 'object')
# name
name = etree.SubElement(object, 'name')
name.text = 'table_cell'
# pose
pose = etree.SubElement(object, 'pose')
pose.text = 'Unspecified'
# truncated
truncated = etree.SubElement(object, 'truncated')
truncated.text = '0'
# difficult
difficult = etree.SubElement(object, 'difficult')
difficult.text = '0'
# bndbox
bndbox = etree.SubElement(object, 'bndbox')
# xmin
_xmin = etree.SubElement(bndbox, 'xmin')
_xmin.text = str(cell[1])
# ymin
_ymin = etree.SubElement(bndbox, 'ymin')
_ymin.text = str(cell[0])
# xmax
_xmax = etree.SubElement(bndbox, 'xmax')
_xmax.text = str(cell[3])
# ymax
_ymax = etree.SubElement(bndbox, 'ymax')
_ymax.text = str(cell[2])
et = etree.ElementTree(root)
et.write(destination_annotation_xml, pretty_print=True)
if __name__ == '__main__':
# a_pool = multiprocessing.Pool()
# total = 0
# for root, dirs, files in os.walk(PATH_TO_IMAGE_FOLDER):
# start = time.time()
# a_pool.map(flow3, files)
# end = time.time()
# total = end - start
# print('Time taken: ' + str(total) + ' (s)')
for root, dirs, files in os.walk(PATH_TO_IMAGE_FOLDER):
process_map(flow3, files, max_workers=12, chunksize=10)
# for root, dirs, files in os.walk(PATH_TO_IMAGE_FOLDER):
# for file in tqdm(files):
# try:
# flow3(get_file_name(file))
# except:
# print(get_file_name(file))
# print('Advanced files: ' + str(advance_counter))
# print('Correct files: ' + str(correct_counter))