forked from arnavdutta/Table-Detection-Extraction
-
Notifications
You must be signed in to change notification settings - Fork 2
/
table_detection.py
44 lines (34 loc) · 1.99 KB
/
table_detection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import cv2
import numpy as np
import os
def table_detection(img_path):
img = cv2.imread(img_path)
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
(thresh, img_bin) = cv2.threshold(img_gray, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
img_bin = cv2.bitwise_not(img_bin)
kernel_length_v = (np.array(img_gray).shape[1])//120
vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, kernel_length_v))
im_temp1 = cv2.erode(img_bin, vertical_kernel, iterations=3)
vertical_lines_img = cv2.dilate(im_temp1, vertical_kernel, iterations=3)
kernel_length_h = (np.array(img_gray).shape[1])//40
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_length_h, 1))
im_temp2 = cv2.erode(img_bin, horizontal_kernel, iterations=3)
horizontal_lines_img = cv2.dilate(im_temp2, horizontal_kernel, iterations=3)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
table_segment = cv2.addWeighted(vertical_lines_img, 0.5, horizontal_lines_img, 0.5, 0.0)
table_segment = cv2.erode(cv2.bitwise_not(table_segment), kernel, iterations=2)
thresh, table_segment = cv2.threshold(table_segment, 0, 255, cv2.THRESH_OTSU)
_, contours, hierarchy = cv2.findContours(table_segment, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
count = 0
for c in contours:
x, y, w, h = cv2.boundingRect(c)
if (w > 80 and h > 20) and w > 3 * h:
count += 1
cropped = img[y:y + h, x:x + w]
if True:
cv2.imwrite("./results/cropped/crop_" + str(count) + "__" + img_path.split('/')[-1], cropped)
cv2.rectangle(img,(x, y),(x + w, y + h),(0, 255, 0), 2)
if True:
cv2.imwrite("./results/table_detect/table_detect__" + img_path.split('/')[-1], table_segment)
cv2.imwrite("./results/bb/bb__" + img_path.split('/')[-1], img)
{table_detection('./forms/' + i) for i in os.listdir('./forms/') if i.endswith('.png') or i.endswith('.PNG') or i.endswith('.jpg') or i.endswith('.JPG')}