diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d1e1573 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +/weights +detect_text/ocr.py +/output \ No newline at end of file diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..0173fe6 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,34 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Python: Single Test", + "type": "python", + "request": "launch", + "program": "run_single.py", + "console": "integratedTerminal", + "justMyCode": false + }, + { + "name": "Python: Tune Test", + "type": "python", + "request": "launch", + "program": "run_testing.py", + "console": "integratedTerminal", + "justMyCode": false + }, + { + "name": "Python: Batch Test", + "type": "python", + "request": "launch", + "program": "run_batch.py", + "console": "integratedTerminal", + "justMyCode": false, + "args": [ + "--cat","poster"] + } + ] +} \ No newline at end of file diff --git a/cnn/CNN.py b/cnn/CNN.py index fb365ff..8c1dfee 100644 --- a/cnn/CNN.py +++ b/cnn/CNN.py @@ -1,5 +1,6 @@ import keras -from keras.applications.resnet50 import ResNet50 +# from keras.applications.resnet50 import ResNet50 +from tensorflow.keras.applications.resnet50 import ResNet50 from keras.models import Model,load_model from keras.layers import Dense, Activation, Flatten, Dropout from sklearn.metrics import confusion_matrix @@ -87,7 +88,8 @@ def predict(self, imgs, compos, load=False, show=False): return for i in range(len(imgs)): X = self.preprocess_img(imgs[i]) - Y = self.class_map[np.argmax(self.model.predict(X))] + # verbose=0: for no log output for keras model. + Y = self.class_map[np.argmax(self.model.predict(X,verbose=0))] compos[i].category = Y if show: print(Y) diff --git a/cnn/__pycache__/CNN.cpython-35.pyc b/cnn/__pycache__/CNN.cpython-35.pyc index 00b467d..f6dd328 100644 Binary files a/cnn/__pycache__/CNN.cpython-35.pyc and b/cnn/__pycache__/CNN.cpython-35.pyc differ diff --git a/cnn/__pycache__/CNN.cpython-37.pyc b/cnn/__pycache__/CNN.cpython-37.pyc new file mode 100644 index 0000000..70a6091 Binary files /dev/null and b/cnn/__pycache__/CNN.cpython-37.pyc differ diff --git a/cnn/__pycache__/CNN.cpython-39.pyc b/cnn/__pycache__/CNN.cpython-39.pyc new file mode 100644 index 0000000..1fb5456 Binary files /dev/null and b/cnn/__pycache__/CNN.cpython-39.pyc differ diff --git a/config/CONFIG.py b/config/CONFIG.py index 0a2af94..f48ae7d 100644 --- a/config/CONFIG.py +++ b/config/CONFIG.py @@ -1,7 +1,7 @@ from os.path import join as pjoin import os - +ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) class Config: def __init__(self): @@ -9,14 +9,14 @@ def __init__(self): self.image_shape = (64, 64, 3) # self.MODEL_PATH = 'E:\\Mulong\\Model\\UI2CODE\\cnn6_icon.h5' # self.class_map = ['button', 'input', 'icon', 'img', 'text'] - self.CNN_PATH = 'E:/Mulong/Model/rico_compos/cnn-rico-1.h5' + self.CNN_PATH = ROOT + '/weights/cnn-rico-1.h5' self.element_class = ['Button', 'CheckBox', 'Chronometer', 'EditText', 'ImageButton', 'ImageView', 'ProgressBar', 'RadioButton', 'RatingBar', 'SeekBar', 'Spinner', 'Switch', 'ToggleButton', 'VideoView', 'TextView'] self.class_number = len(self.element_class) # setting EAST (ocr) model - self.EAST_PATH = 'E:/Mulong/Model/East/east_icdar2015_resnet_v1_50_rbox' + self.EAST_PATH = ROOT + '/weights/east_icdar2015_resnet_v1_50_rbox' self.COLOR = {'Button': (0, 255, 0), 'CheckBox': (0, 0, 255), 'Chronometer': (255, 166, 166), 'EditText': (255, 166, 0), @@ -25,7 +25,7 @@ def __init__(self): 'RatingBar': (0, 166, 255), 'SeekBar': (0, 166, 10), 'Spinner': (50, 21, 255), 'Switch': (80, 166, 66), 'ToggleButton': (0, 66, 80), 'VideoView': (88, 66, 0), 'TextView': (169, 255, 0), 'NonText': (0,0,255), - 'Compo':(0, 0, 255), 'Text':(169, 255, 0), 'Block':(80, 166, 66)} + 'Compo':(0, 0, 255), 'Text':(169, 140, 0), 'Block':(80, 166, 66)} def build_output_folders(self): # setting data flow paths diff --git a/config/__pycache__/CONFIG.cpython-35.pyc b/config/__pycache__/CONFIG.cpython-35.pyc index d3cf612..1664168 100644 Binary files a/config/__pycache__/CONFIG.cpython-35.pyc and b/config/__pycache__/CONFIG.cpython-35.pyc differ diff --git a/config/__pycache__/CONFIG.cpython-37.pyc b/config/__pycache__/CONFIG.cpython-37.pyc index 31fea94..55b1af0 100644 Binary files a/config/__pycache__/CONFIG.cpython-37.pyc and b/config/__pycache__/CONFIG.cpython-37.pyc differ diff --git a/config/__pycache__/CONFIG.cpython-39.pyc b/config/__pycache__/CONFIG.cpython-39.pyc new file mode 100644 index 0000000..a026c9d Binary files /dev/null and b/config/__pycache__/CONFIG.cpython-39.pyc differ diff --git a/config/__pycache__/CONFIG_UIED.cpython-35.pyc b/config/__pycache__/CONFIG_UIED.cpython-35.pyc index 0675fb2..fe0ee6e 100644 Binary files a/config/__pycache__/CONFIG_UIED.cpython-35.pyc and b/config/__pycache__/CONFIG_UIED.cpython-35.pyc differ diff --git a/config/__pycache__/CONFIG_UIED.cpython-37.pyc b/config/__pycache__/CONFIG_UIED.cpython-37.pyc index ee8516a..9e16121 100644 Binary files a/config/__pycache__/CONFIG_UIED.cpython-37.pyc and b/config/__pycache__/CONFIG_UIED.cpython-37.pyc differ diff --git a/config/__pycache__/CONFIG_UIED.cpython-39.pyc b/config/__pycache__/CONFIG_UIED.cpython-39.pyc new file mode 100644 index 0000000..f2f1095 Binary files /dev/null and b/config/__pycache__/CONFIG_UIED.cpython-39.pyc differ diff --git a/data/output/ip/30800.jpg b/data/output/ip/30800.jpg new file mode 100644 index 0000000..7422883 Binary files /dev/null and b/data/output/ip/30800.jpg differ diff --git a/data/output/ip/30800.json b/data/output/ip/30800.json new file mode 100644 index 0000000..5e7c8fc --- /dev/null +++ b/data/output/ip/30800.json @@ -0,0 +1,279 @@ +{ + "img_shape": [ + 800, + 494, + 3 + ], + "compos": [ + { + "id": 1, + "class": "ImageView", + "column_min": 18, + "row_min": 25, + "column_max": 79, + "row_max": 47, + "width": 61, + "height": 22 + }, + { + "id": 2, + "class": "SeekBar", + "column_min": 403, + "row_min": 23, + "column_max": 429, + "row_max": 50, + "width": 26, + "height": 27 + }, + { + "id": 3, + "class": "ImageView", + "column_min": 84, + "row_min": 26, + "column_max": 142, + "row_max": 51, + "width": 58, + "height": 25 + }, + { + "id": 4, + "class": "ImageView", + "column_min": 464, + "row_min": 25, + "column_max": 471, + "row_max": 47, + "width": 7, + "height": 22 + }, + { + "id": 5, + "class": "ImageButton", + "column_min": 446, + "row_min": 73, + "column_max": 489, + "row_max": 117, + "width": 43, + "height": 44 + }, + { + "id": 6, + "class": "ImageView", + "column_min": 6, + "row_min": 81, + "column_max": 43, + "row_max": 110, + "width": 37, + "height": 29 + }, + { + "id": 7, + "class": "ImageView", + "column_min": 98, + "row_min": 88, + "column_max": 181, + "row_max": 106, + "width": 83, + "height": 18 + }, + { + "id": 8, + "class": "ImageView", + "column_min": 187, + "row_min": 88, + "column_max": 218, + "row_max": 106, + "width": 31, + "height": 18 + }, + { + "id": 9, + "class": "ImageView", + "column_min": 219, + "row_min": 88, + "column_max": 261, + "row_max": 109, + "width": 42, + "height": 21 + }, + { + "id": 10, + "class": "TextView", + "column_min": 266, + "row_min": 88, + "column_max": 309, + "row_max": 106, + "width": 43, + "height": 18 + }, + { + "id": 11, + "class": "ImageView", + "column_min": 314, + "row_min": 88, + "column_max": 394, + "row_max": 106, + "width": 80, + "height": 18 + }, + { + "id": 12, + "class": "ImageView", + "column_min": 216, + "row_min": 190, + "column_max": 276, + "row_max": 249, + "width": 60, + "height": 59 + }, + { + "id": 13, + "class": "ImageView", + "column_min": 0, + "row_min": 352, + "column_max": 493, + "row_max": 592, + "width": 493, + "height": 240 + }, + { + "id": 14, + "class": "ImageView", + "column_min": 9, + "row_min": 618, + "column_max": 14, + "row_max": 659, + "width": 5, + "height": 41 + }, + { + "id": 15, + "class": "ImageView", + "column_min": 29, + "row_min": 620, + "column_max": 50, + "row_max": 654, + "width": 21, + "height": 34 + }, + { + "id": 16, + "class": "ImageView", + "column_min": 83, + "row_min": 619, + "column_max": 420, + "row_max": 655, + "width": 337, + "height": 36 + }, + { + "id": 17, + "class": "ImageView", + "column_min": 453, + "row_min": 631, + "column_max": 464, + "row_max": 644, + "width": 11, + "height": 13 + }, + { + "id": 18, + "class": "ImageView", + "column_min": 203, + "row_min": 664, + "column_max": 218, + "row_max": 671, + "width": 15, + "height": 7 + }, + { + "id": 19, + "class": "ImageView", + "column_min": 25, + "row_min": 682, + "column_max": 74, + "row_max": 729, + "width": 49, + "height": 47 + }, + { + "id": 20, + "class": "ImageView", + "column_min": 123, + "row_min": 682, + "column_max": 171, + "row_max": 732, + "width": 48, + "height": 50 + }, + { + "id": 21, + "class": "ImageView", + "column_min": 222, + "row_min": 681, + "column_max": 270, + "row_max": 730, + "width": 48, + "height": 49 + }, + { + "id": 22, + "class": "ImageView", + "column_min": 321, + "row_min": 682, + "column_max": 369, + "row_max": 732, + "width": 48, + "height": 50 + }, + { + "id": 23, + "class": "SeekBar", + "column_min": 420, + "row_min": 682, + "column_max": 467, + "row_max": 729, + "width": 47, + "height": 47 + }, + { + "id": 24, + "class": "TextView", + "column_min": 0, + "row_min": 737, + "column_max": 493, + "row_max": 795, + "width": 493, + "height": 58 + }, + { + "id": 25, + "class": "ImageView", + "column_min": 26, + "row_min": 441, + "column_max": 56, + "row_max": 475, + "width": 30, + "height": 34 + }, + { + "id": 26, + "class": "ImageView", + "column_min": 72, + "row_min": 441, + "column_max": 97, + "row_max": 476, + "width": 25, + "height": 35 + }, + { + "id": 27, + "class": "TextView", + "column_min": 0, + "row_min": 737, + "column_max": 190, + "row_max": 793, + "width": 190, + "height": 56 + } + ] +} \ No newline at end of file diff --git a/data/output/ip/result.jpg b/data/output/ip/result.jpg new file mode 100644 index 0000000..ece4cf1 Binary files /dev/null and b/data/output/ip/result.jpg differ diff --git a/data/output/merge/30800.jpg b/data/output/merge/30800.jpg new file mode 100644 index 0000000..eec24dd Binary files /dev/null and b/data/output/merge/30800.jpg differ diff --git a/data/output/merge/30800.json b/data/output/merge/30800.json new file mode 100644 index 0000000..996ff3b --- /dev/null +++ b/data/output/merge/30800.json @@ -0,0 +1,274 @@ +{ + "compos": [ + { + "id": 0, + "class": "ImageButton", + "height": 44, + "width": 43, + "position": { + "column_min": 446, + "row_min": 73, + "column_max": 489, + "row_max": 117 + } + }, + { + "id": 1, + "class": "ImageView", + "height": 29, + "width": 37, + "position": { + "column_min": 6, + "row_min": 81, + "column_max": 43, + "row_max": 110 + } + }, + { + "id": 2, + "class": "ImageView", + "height": 59, + "width": 60, + "position": { + "column_min": 216, + "row_min": 190, + "column_max": 276, + "row_max": 249 + } + }, + { + "id": 3, + "class": "ImageView", + "height": 240, + "width": 493, + "position": { + "column_min": 0, + "row_min": 352, + "column_max": 493, + "row_max": 592 + }, + "children": [ + 15, + 16 + ] + }, + { + "id": 4, + "class": "ImageView", + "height": 41, + "width": 5, + "position": { + "column_min": 9, + "row_min": 618, + "column_max": 14, + "row_max": 659 + } + }, + { + "id": 5, + "class": "ImageView", + "height": 34, + "width": 21, + "position": { + "column_min": 29, + "row_min": 620, + "column_max": 50, + "row_max": 654 + } + }, + { + "id": 6, + "class": "ImageView", + "height": 36, + "width": 337, + "position": { + "column_min": 83, + "row_min": 619, + "column_max": 420, + "row_max": 655 + } + }, + { + "id": 7, + "class": "ImageView", + "height": 13, + "width": 11, + "position": { + "column_min": 453, + "row_min": 631, + "column_max": 464, + "row_max": 644 + } + }, + { + "id": 8, + "class": "ImageView", + "height": 7, + "width": 15, + "position": { + "column_min": 203, + "row_min": 664, + "column_max": 218, + "row_max": 671 + } + }, + { + "id": 9, + "class": "ImageView", + "height": 47, + "width": 49, + "position": { + "column_min": 25, + "row_min": 682, + "column_max": 74, + "row_max": 729 + } + }, + { + "id": 10, + "class": "ImageView", + "height": 50, + "width": 48, + "position": { + "column_min": 123, + "row_min": 682, + "column_max": 171, + "row_max": 732 + } + }, + { + "id": 11, + "class": "ImageView", + "height": 49, + "width": 48, + "position": { + "column_min": 222, + "row_min": 681, + "column_max": 270, + "row_max": 730 + } + }, + { + "id": 12, + "class": "ImageView", + "height": 50, + "width": 48, + "position": { + "column_min": 321, + "row_min": 682, + "column_max": 369, + "row_max": 732 + } + }, + { + "id": 13, + "class": "SeekBar", + "height": 47, + "width": 47, + "position": { + "column_min": 420, + "row_min": 682, + "column_max": 467, + "row_max": 729 + } + }, + { + "id": 14, + "class": "TextView", + "height": 58, + "width": 493, + "position": { + "column_min": 0, + "row_min": 737, + "column_max": 493, + "row_max": 795 + }, + "children": [ + 17 + ] + }, + { + "id": 15, + "class": "ImageView", + "height": 34, + "width": 30, + "position": { + "column_min": 26, + "row_min": 441, + "column_max": 56, + "row_max": 475 + }, + "parent": 3 + }, + { + "id": 16, + "class": "ImageView", + "height": 35, + "width": 25, + "position": { + "column_min": 72, + "row_min": 441, + "column_max": 97, + "row_max": 476 + }, + "parent": 3 + }, + { + "id": 17, + "class": "TextView", + "height": 56, + "width": 190, + "position": { + "column_min": 0, + "row_min": 737, + "column_max": 190, + "row_max": 793 + }, + "parent": 14 + }, + { + "id": 18, + "class": "Text", + "height": 24, + "width": 123, + "position": { + "column_min": 20, + "row_min": 25, + "column_max": 143, + "row_max": 49 + }, + "text_content": "Relax Night" + }, + { + "id": 19, + "class": "Text", + "height": 20, + "width": 296, + "position": { + "column_min": 99, + "row_min": 87, + "column_max": 395, + "row_max": 107 + }, + "text_content": "Brahms Lullaby and crickets" + }, + { + "id": 20, + "class": "Text", + "height": 26, + "width": 68, + "position": { + "column_min": 403, + "row_min": 22, + "column_max": 471, + "row_max": 48 + }, + "text_content": "+ :" + } + ], + "img_shape": [ + 800, + 494, + 3 + ] +} \ No newline at end of file diff --git a/data/output/ocr/30800.json b/data/output/ocr/30800.json new file mode 100644 index 0000000..11f5fe0 --- /dev/null +++ b/data/output/ocr/30800.json @@ -0,0 +1,89 @@ +{ + "img_shape": [ + 1747, + 1080, + 3 + ], + "texts": [ + { + "row_max": 108, + "column_min": 44, + "content": "Relax Night", + "height": 52, + "width": 269, + "row_min": 56, + "column_max": 313, + "id": 0 + }, + { + "row_max": 234, + "column_min": 217, + "content": "Brahms Lullaby and crickets", + "height": 42, + "width": 647, + "row_min": 192, + "column_max": 864, + "id": 1 + }, + { + "row_max": 1670, + "column_min": 31, + "content": "Dog House On", + "height": 45, + "width": 302, + "row_min": 1625, + "column_max": 333, + "id": 2 + }, + { + "row_max": 1710, + "column_min": 63, + "content": "The Prairie .", + "height": 31, + "width": 241, + "row_min": 1679, + "column_max": 304, + "id": 3 + }, + { + "row_max": 106, + "column_min": 882, + "content": "+ :", + "height": 56, + "width": 148, + "row_min": 50, + "column_max": 1030, + "id": 4 + }, + { + "row_max": 1653, + "column_min": 499, + "content": "Dog boarding and Dog training . YX", + "height": 42, + "width": 577, + "row_min": 1611, + "column_max": 1076, + "id": 5 + }, + { + "row_max": 1686, + "column_min": 557, + "content": "dog's home away from home .", + "height": 33, + "width": 426, + "row_min": 1653, + "column_max": 983, + "id": 6 + }, + { + "row_max": 1711, + "column_min": 683, + "content": "facebook.com", + "height": 20, + "width": 171, + "row_min": 1691, + "column_max": 854, + "id": 7 + } + ] +} \ No newline at end of file diff --git a/data/output/ocr/30800.png b/data/output/ocr/30800.png new file mode 100644 index 0000000..d8b7379 Binary files /dev/null and b/data/output/ocr/30800.png differ diff --git a/data/output/result.jpg b/data/output/result.jpg new file mode 100644 index 0000000..ece4cf1 Binary files /dev/null and b/data/output/result.jpg differ diff --git a/detect_compo/__pycache__/ip_region_proposal.cpython-35.pyc b/detect_compo/__pycache__/ip_region_proposal.cpython-35.pyc index 7cdd1bd..6de9688 100644 Binary files a/detect_compo/__pycache__/ip_region_proposal.cpython-35.pyc and b/detect_compo/__pycache__/ip_region_proposal.cpython-35.pyc differ diff --git a/detect_compo/__pycache__/ip_region_proposal.cpython-37.pyc b/detect_compo/__pycache__/ip_region_proposal.cpython-37.pyc new file mode 100644 index 0000000..915ff20 Binary files /dev/null and b/detect_compo/__pycache__/ip_region_proposal.cpython-37.pyc differ diff --git a/detect_compo/__pycache__/ip_region_proposal.cpython-39.pyc b/detect_compo/__pycache__/ip_region_proposal.cpython-39.pyc new file mode 100644 index 0000000..034d7a0 Binary files /dev/null and b/detect_compo/__pycache__/ip_region_proposal.cpython-39.pyc differ diff --git a/detect_compo/ip_region_proposal.py b/detect_compo/ip_region_proposal.py index 5f57bd2..ec990de 100644 --- a/detect_compo/ip_region_proposal.py +++ b/detect_compo/ip_region_proposal.py @@ -1,9 +1,6 @@ -import cv2 from os.path import join as pjoin import time -import json -import numpy as np - +import os import detect_compo.lib_ip.ip_preprocessing as pre import detect_compo.lib_ip.ip_draw as draw import detect_compo.lib_ip.ip_detection as det @@ -40,10 +37,10 @@ def nesting_inspection(org, grey, compos, ffl_block): def compo_detection(input_img_path, output_root, uied_params, resize_by_height=800, classifier=None, show=False, wai_key=0): - start = time.clock() + # start = time.clock() name = input_img_path.split('/')[-1][:-4] if '/' in input_img_path else input_img_path.split('\\')[-1][:-4] ip_root = file.build_directory(pjoin(output_root, "ip")) - + os.makedirs(ip_root,exist_ok=True) # *** Step 1 *** pre-processing: read img -> get binary map org, grey = pre.read_img(input_img_path, resize_by_height) binary = pre.binarization(org, grad_min=int(uied_params['min-grad'])) @@ -80,12 +77,12 @@ def compo_detection(input_img_path, output_root, uied_params, # uicompos = det.rm_noise_compos(uicompos) # *** Step 6 *** element classification: all category classification - # if classifier is not None: - # classifier['Elements'].predict([compo.compo_clipping(org) for compo in uicompos], uicompos) - # draw.draw_bounding_box_class(org, uicompos, show=show, name='cls', write_path=pjoin(ip_root, 'result.jpg')) - # draw.draw_bounding_box_class(org, uicompos, write_path=pjoin(output_root, 'result.jpg')) + if classifier is not None: + classifier['Elements'].predict([compo.compo_clipping(org) for compo in uicompos], uicompos) + draw.draw_bounding_box_class(org, uicompos, show=show, name='cls', write_path=pjoin(ip_root, 'result.jpg')) + draw.draw_bounding_box_class(org, uicompos, write_path=pjoin(output_root, 'result.jpg')) # *** Step 7 *** save detection result Compo.compos_update(uicompos, org.shape) file.save_corners_json(pjoin(ip_root, name + '.json'), uicompos) - print("[Compo Detection Completed in %.3f s] Input: %s Output: %s" % (time.clock() - start, input_img_path, pjoin(ip_root, name + '.json'))) + # print("[Compo Detection Completed in %.3f s] Input: %s Output: %s" % (time.clock() - start, input_img_path, pjoin(ip_root, name + '.json'))) diff --git a/detect_compo/lib_ip/__pycache__/Bbox.cpython-35.pyc b/detect_compo/lib_ip/__pycache__/Bbox.cpython-35.pyc index 55b519e..5c66f42 100644 Binary files a/detect_compo/lib_ip/__pycache__/Bbox.cpython-35.pyc and b/detect_compo/lib_ip/__pycache__/Bbox.cpython-35.pyc differ diff --git a/detect_compo/lib_ip/__pycache__/Bbox.cpython-37.pyc b/detect_compo/lib_ip/__pycache__/Bbox.cpython-37.pyc new file mode 100644 index 0000000..a49bfc2 Binary files /dev/null and b/detect_compo/lib_ip/__pycache__/Bbox.cpython-37.pyc differ diff --git a/detect_compo/lib_ip/__pycache__/Bbox.cpython-39.pyc b/detect_compo/lib_ip/__pycache__/Bbox.cpython-39.pyc new file mode 100644 index 0000000..293d07e Binary files /dev/null and b/detect_compo/lib_ip/__pycache__/Bbox.cpython-39.pyc differ diff --git a/detect_compo/lib_ip/__pycache__/Component.cpython-35.pyc b/detect_compo/lib_ip/__pycache__/Component.cpython-35.pyc index 57bed21..74cf56b 100644 Binary files a/detect_compo/lib_ip/__pycache__/Component.cpython-35.pyc and b/detect_compo/lib_ip/__pycache__/Component.cpython-35.pyc differ diff --git a/detect_compo/lib_ip/__pycache__/Component.cpython-37.pyc b/detect_compo/lib_ip/__pycache__/Component.cpython-37.pyc new file mode 100644 index 0000000..9c97778 Binary files /dev/null and b/detect_compo/lib_ip/__pycache__/Component.cpython-37.pyc differ diff --git a/detect_compo/lib_ip/__pycache__/Component.cpython-39.pyc b/detect_compo/lib_ip/__pycache__/Component.cpython-39.pyc new file mode 100644 index 0000000..00e9bbf Binary files /dev/null and b/detect_compo/lib_ip/__pycache__/Component.cpython-39.pyc differ diff --git a/detect_compo/lib_ip/__pycache__/file_utils.cpython-35.pyc b/detect_compo/lib_ip/__pycache__/file_utils.cpython-35.pyc index b0c64cb..8bcd412 100644 Binary files a/detect_compo/lib_ip/__pycache__/file_utils.cpython-35.pyc and b/detect_compo/lib_ip/__pycache__/file_utils.cpython-35.pyc differ diff --git a/detect_compo/lib_ip/__pycache__/file_utils.cpython-37.pyc b/detect_compo/lib_ip/__pycache__/file_utils.cpython-37.pyc new file mode 100644 index 0000000..9eb0fac Binary files /dev/null and b/detect_compo/lib_ip/__pycache__/file_utils.cpython-37.pyc differ diff --git a/detect_compo/lib_ip/__pycache__/file_utils.cpython-39.pyc b/detect_compo/lib_ip/__pycache__/file_utils.cpython-39.pyc new file mode 100644 index 0000000..055a19e Binary files /dev/null and b/detect_compo/lib_ip/__pycache__/file_utils.cpython-39.pyc differ diff --git a/detect_compo/lib_ip/__pycache__/ip_detection.cpython-35.pyc b/detect_compo/lib_ip/__pycache__/ip_detection.cpython-35.pyc index 807ce2f..8bc71d7 100644 Binary files a/detect_compo/lib_ip/__pycache__/ip_detection.cpython-35.pyc and b/detect_compo/lib_ip/__pycache__/ip_detection.cpython-35.pyc differ diff --git a/detect_compo/lib_ip/__pycache__/ip_detection.cpython-37.pyc b/detect_compo/lib_ip/__pycache__/ip_detection.cpython-37.pyc new file mode 100644 index 0000000..477ffc6 Binary files /dev/null and b/detect_compo/lib_ip/__pycache__/ip_detection.cpython-37.pyc differ diff --git a/detect_compo/lib_ip/__pycache__/ip_detection.cpython-39.pyc b/detect_compo/lib_ip/__pycache__/ip_detection.cpython-39.pyc new file mode 100644 index 0000000..d9248c2 Binary files /dev/null and b/detect_compo/lib_ip/__pycache__/ip_detection.cpython-39.pyc differ diff --git a/detect_compo/lib_ip/__pycache__/ip_draw.cpython-35.pyc b/detect_compo/lib_ip/__pycache__/ip_draw.cpython-35.pyc index fe08f64..2ac302a 100644 Binary files a/detect_compo/lib_ip/__pycache__/ip_draw.cpython-35.pyc and b/detect_compo/lib_ip/__pycache__/ip_draw.cpython-35.pyc differ diff --git a/detect_compo/lib_ip/__pycache__/ip_draw.cpython-37.pyc b/detect_compo/lib_ip/__pycache__/ip_draw.cpython-37.pyc new file mode 100644 index 0000000..33a59df Binary files /dev/null and b/detect_compo/lib_ip/__pycache__/ip_draw.cpython-37.pyc differ diff --git a/detect_compo/lib_ip/__pycache__/ip_draw.cpython-39.pyc b/detect_compo/lib_ip/__pycache__/ip_draw.cpython-39.pyc new file mode 100644 index 0000000..eee6227 Binary files /dev/null and b/detect_compo/lib_ip/__pycache__/ip_draw.cpython-39.pyc differ diff --git a/detect_compo/lib_ip/__pycache__/ip_preprocessing.cpython-35.pyc b/detect_compo/lib_ip/__pycache__/ip_preprocessing.cpython-35.pyc index 82957d6..4afe1f5 100644 Binary files a/detect_compo/lib_ip/__pycache__/ip_preprocessing.cpython-35.pyc and b/detect_compo/lib_ip/__pycache__/ip_preprocessing.cpython-35.pyc differ diff --git a/detect_compo/lib_ip/__pycache__/ip_preprocessing.cpython-37.pyc b/detect_compo/lib_ip/__pycache__/ip_preprocessing.cpython-37.pyc new file mode 100644 index 0000000..51ca133 Binary files /dev/null and b/detect_compo/lib_ip/__pycache__/ip_preprocessing.cpython-37.pyc differ diff --git a/detect_compo/lib_ip/__pycache__/ip_preprocessing.cpython-39.pyc b/detect_compo/lib_ip/__pycache__/ip_preprocessing.cpython-39.pyc new file mode 100644 index 0000000..5118d0a Binary files /dev/null and b/detect_compo/lib_ip/__pycache__/ip_preprocessing.cpython-39.pyc differ diff --git a/detect_compo/lib_ip/file_utils.py b/detect_compo/lib_ip/file_utils.py index 1bf1541..48a0442 100644 --- a/detect_compo/lib_ip/file_utils.py +++ b/detect_compo/lib_ip/file_utils.py @@ -26,6 +26,11 @@ def save_corners(file_path, corners, compo_name, clear=True): def save_corners_json(file_path, compos): + if compos is None or len(compos) == 0: + output = {} + with open(file_path, 'w') as f_out: + json.dump(output, f_out, indent=4) + return img_shape = compos[0].image_shape output = {'img_shape': img_shape, 'compos': []} f_out = open(file_path, 'w') diff --git a/detect_merge/__pycache__/Element.cpython-35.pyc b/detect_merge/__pycache__/Element.cpython-35.pyc index 1b93806..c52b8e1 100644 Binary files a/detect_merge/__pycache__/Element.cpython-35.pyc and b/detect_merge/__pycache__/Element.cpython-35.pyc differ diff --git a/detect_merge/__pycache__/Element.cpython-37.pyc b/detect_merge/__pycache__/Element.cpython-37.pyc new file mode 100644 index 0000000..31a4f44 Binary files /dev/null and b/detect_merge/__pycache__/Element.cpython-37.pyc differ diff --git a/detect_merge/__pycache__/Element.cpython-39.pyc b/detect_merge/__pycache__/Element.cpython-39.pyc new file mode 100644 index 0000000..e5244da Binary files /dev/null and b/detect_merge/__pycache__/Element.cpython-39.pyc differ diff --git a/detect_merge/__pycache__/merge.cpython-35.pyc b/detect_merge/__pycache__/merge.cpython-35.pyc index ceaeaf9..c588cb1 100644 Binary files a/detect_merge/__pycache__/merge.cpython-35.pyc and b/detect_merge/__pycache__/merge.cpython-35.pyc differ diff --git a/detect_merge/__pycache__/merge.cpython-37.pyc b/detect_merge/__pycache__/merge.cpython-37.pyc new file mode 100644 index 0000000..1687bd2 Binary files /dev/null and b/detect_merge/__pycache__/merge.cpython-37.pyc differ diff --git a/detect_merge/__pycache__/merge.cpython-39.pyc b/detect_merge/__pycache__/merge.cpython-39.pyc new file mode 100644 index 0000000..98c3c58 Binary files /dev/null and b/detect_merge/__pycache__/merge.cpython-39.pyc differ diff --git a/detect_merge/merge.py b/detect_merge/merge.py index b70acc6..294a68e 100644 --- a/detect_merge/merge.py +++ b/detect_merge/merge.py @@ -7,10 +7,14 @@ import shutil from detect_merge.Element import Element +from config.CONFIG import Config + +cfg = Config() def show_elements(org_img, eles, show=False, win_name='element', wait_key=0, shown_resize=None, line=2): - color_map = {'Text':(0, 0, 255), 'Compo':(0, 255, 0), 'Block':(0, 255, 0), 'Text Content':(255, 0, 255)} + # color_map = {'Text':(0, 0, 255), 'Compo':(0, 255, 0), 'Block':(0, 255, 0), 'Text Content':(255, 0, 255)} + color_map = cfg.COLOR img = org_img.copy() for ele in eles: color = color_map[ele.category] @@ -45,7 +49,8 @@ def refine_texts(texts, img_shape): refined_texts = [] for text in texts: # remove potential noise - if len(text.text_content) > 1 and text.height / img_shape[0] < 0.075: + # if len(text.text_content) > 1 and text.height / img_shape[0] < 0.075: + if len(text.text_content) > 1: refined_texts.append(text) return refined_texts @@ -196,6 +201,8 @@ def merge(img_path, compo_path, text_path, merge_root=None, is_paragraph=False, # load text and non-text compo ele_id = 0 compos = [] + if len(compo_json) == 0: + return None, None for compo in compo_json['compos']: element = Element(ele_id, (compo['column_min'], compo['row_min'], compo['column_max'], compo['row_max']), compo['class']) compos.append(element) @@ -231,5 +238,5 @@ def merge(img_path, compo_path, text_path, merge_root=None, is_paragraph=False, name = img_path.replace('\\', '/').split('/')[-1][:-4] components = save_elements(pjoin(merge_root, name + '.json'), elements, img_resize.shape) cv2.imwrite(pjoin(merge_root, name + '.jpg'), board) - print('[Merge Completed] Input: %s Output: %s' % (img_path, pjoin(merge_root, name + '.jpg'))) + # print('[Merge Completed] Input: %s Output: %s' % (img_path, pjoin(merge_root, name + '.jpg'))) return board, components diff --git a/detect_text/__pycache__/Text.cpython-35.pyc b/detect_text/__pycache__/Text.cpython-35.pyc index 1167045..f9cffc6 100644 Binary files a/detect_text/__pycache__/Text.cpython-35.pyc and b/detect_text/__pycache__/Text.cpython-35.pyc differ diff --git a/detect_text/__pycache__/Text.cpython-37.pyc b/detect_text/__pycache__/Text.cpython-37.pyc new file mode 100644 index 0000000..7394e6a Binary files /dev/null and b/detect_text/__pycache__/Text.cpython-37.pyc differ diff --git a/detect_text/__pycache__/Text.cpython-39.pyc b/detect_text/__pycache__/Text.cpython-39.pyc new file mode 100644 index 0000000..c279bd1 Binary files /dev/null and b/detect_text/__pycache__/Text.cpython-39.pyc differ diff --git a/detect_text/__pycache__/ocr.cpython-35.pyc b/detect_text/__pycache__/ocr.cpython-35.pyc index a4bf88c..54c31d4 100644 Binary files a/detect_text/__pycache__/ocr.cpython-35.pyc and b/detect_text/__pycache__/ocr.cpython-35.pyc differ diff --git a/detect_text/__pycache__/ocr.cpython-37.pyc b/detect_text/__pycache__/ocr.cpython-37.pyc new file mode 100644 index 0000000..8ab7126 Binary files /dev/null and b/detect_text/__pycache__/ocr.cpython-37.pyc differ diff --git a/detect_text/__pycache__/ocr.cpython-39.pyc b/detect_text/__pycache__/ocr.cpython-39.pyc new file mode 100644 index 0000000..959e739 Binary files /dev/null and b/detect_text/__pycache__/ocr.cpython-39.pyc differ diff --git a/detect_text/__pycache__/text_detection.cpython-35.pyc b/detect_text/__pycache__/text_detection.cpython-35.pyc index 6dfd484..bc12c2c 100644 Binary files a/detect_text/__pycache__/text_detection.cpython-35.pyc and b/detect_text/__pycache__/text_detection.cpython-35.pyc differ diff --git a/detect_text/__pycache__/text_detection.cpython-37.pyc b/detect_text/__pycache__/text_detection.cpython-37.pyc new file mode 100644 index 0000000..33c9977 Binary files /dev/null and b/detect_text/__pycache__/text_detection.cpython-37.pyc differ diff --git a/detect_text/__pycache__/text_detection.cpython-39.pyc b/detect_text/__pycache__/text_detection.cpython-39.pyc new file mode 100644 index 0000000..adf3275 Binary files /dev/null and b/detect_text/__pycache__/text_detection.cpython-39.pyc differ diff --git a/detect_text/ocr.py b/detect_text/ocr.py index 3c017d5..e28ef01 100644 --- a/detect_text/ocr.py +++ b/detect_text/ocr.py @@ -25,14 +25,14 @@ def Google_OCR_makeImageData(imgpath): def ocr_detection_google(imgpath): start = time.clock() url = 'https://vision.googleapis.com/v1/images:annotate' - api_key = 'AIzaSyDUc4iOUASJQYkVwSomIArTKhE2C6bHK8U' # *** Replace with your own Key *** + api_key = '' # *** Replace with your own Key *** imgdata = Google_OCR_makeImageData(imgpath) response = requests.post(url, data=imgdata, params={'key': api_key}, headers={'Content_Type': 'application/json'}) # print('*** Text Detection Time Taken:%.3fs ***' % (time.clock() - start)) - print("*** Please replace the Google OCR key at detect_text/ocr.py line 28 with your own (apply in https://cloud.google.com/vision) ***") + # print("*** Please replace the Google OCR key at detect_text/ocr.py line 28 with your own (apply in https://cloud.google.com/vision) ***") if 'responses' not in response.json(): raise Exception(response.json()) if response.json()['responses'] == [{}]: diff --git a/detect_text/text_detection.py b/detect_text/text_detection.py index 30ac52c..389453b 100644 --- a/detect_text/text_detection.py +++ b/detect_text/text_detection.py @@ -109,12 +109,13 @@ def text_cvt_orc_format(ocr_result): def text_cvt_orc_format_paddle(paddle_result): texts = [] - for i, line in enumerate(paddle_result): - points = np.array(line[0]) - location = {'left': int(min(points[:, 0])), 'top': int(min(points[:, 1])), 'right': int(max(points[:, 0])), - 'bottom': int(max(points[:, 1]))} - content = line[1][0] - texts.append(Text(i, content, location)) + if paddle_result[0] is not None: + for i, line in enumerate(paddle_result[0]): + points = np.array(line[0]) + location = {'left': int(min(points[:, 0])), 'top': int(min(points[:, 1])), 'right': int(max(points[:, 0])), + 'bottom': int(max(points[:, 1]))} + content = line[1][0] + texts.append(Text(i, content, location)) return texts @@ -132,13 +133,14 @@ def text_detection(input_file='../data/input/30800.jpg', output_file='../data/ou :param method: google or paddle :param paddle_model: the preload paddle model for paddle ocr ''' - start = time.clock() + # start = time.clock() name = input_file.split('/')[-1][:-4] ocr_root = pjoin(output_file, 'ocr') + os.makedirs(ocr_root,exist_ok=True) img = cv2.imread(input_file) if method == 'google': - print('*** Detect Text through Google OCR ***') + # print('*** Detect Text through Google OCR ***') ocr_result = ocr.ocr_detection_google(input_file) texts = text_cvt_orc_format(ocr_result) texts = merge_intersected_texts(texts) @@ -147,17 +149,17 @@ def text_detection(input_file='../data/input/30800.jpg', output_file='../data/ou elif method == 'paddle': # The import of the paddle ocr can be separate to the beginning of the program if you decide to use this method from paddleocr import PaddleOCR - print('*** Detect Text through Paddle OCR ***') + # print('*** Detect Text through Paddle OCR ***') if paddle_model is None: paddle_model = PaddleOCR(use_angle_cls=True, lang="ch") result = paddle_model.ocr(input_file, cls=True) texts = text_cvt_orc_format_paddle(result) else: raise ValueError('Method has to be "google" or "paddle"') - + visualize_texts(img, texts, shown_resize_height=800, show=show, write_path=pjoin(ocr_root, name+'.png')) save_detection_json(pjoin(ocr_root, name+'.json'), texts, img.shape) - print("[Text Detection Completed in %.3f s] Input: %s Output: %s" % (time.clock() - start, input_file, pjoin(ocr_root, name+'.json'))) + # print("[Text Detection Completed in %.3f s] Input: %s Output: %s" % (time.clock() - start, input_file, pjoin(ocr_root, name+'.json'))) # text_detection() diff --git a/run_batch.py b/run_batch.py index 14ea1fb..ff64831 100644 --- a/run_batch.py +++ b/run_batch.py @@ -1,12 +1,13 @@ -import multiprocessing -import glob +import os import time import json +import argparse from tqdm import tqdm -from os.path import join as pjoin, exists +from os.path import join as pjoin import cv2 - +import requests import detect_compo.ip_region_proposal as ip +from paddleocr import PaddleOCR def resize_height_by_longest_edge(img_path, resize_length=800): @@ -20,26 +21,91 @@ def resize_height_by_longest_edge(img_path, resize_length=800): if __name__ == '__main__': # initialization - input_img_root = "E:/Mulong/Datasets/rico/combined" - output_root = "E:/Mulong/Result/rico/rico_uied/rico_new_uied_v3" - data = json.load(open('E:/Mulong/Datasets/rico/instances_test.json', 'r')) + parser = argparse.ArgumentParser(description='UIED batch processing') + parser.add_argument('--cat', default='web', type=str, help='category of images') + args = parser.parse_args() + input_img_root = os.path.join( + "/public/dataset/UEyes/UEyes_dataset/images_per_cat/", args.cat + ) + categroy_list = ['mobile', 'web', 'desktop', 'poster'] + assert args.cat in categroy_list, 'category should be in {}'.format(categroy_list) + output_root = os.path.join( + "/public/dataset/UEyes/UEyes_dataset/anno_uied", args.cat + ) + data = json.load(open('/public/dataset/UEyes/UEyes_dataset/images.json', 'r')) + + # input_imgs = [pjoin(input_img_root, img['file_name'].split('/')[-1]) for img in data['images']] + # input_imgs = sorted(input_imgs, key=lambda x: int(x.split('/')[-1][:-4])) # sorted by index + input_imgs = [ + img['file_name'] for img in data['images'] if img['Category'] == args.cat + ] - input_imgs = [pjoin(input_img_root, img['file_name'].split('/')[-1]) for img in data['images']] - input_imgs = sorted(input_imgs, key=lambda x: int(x.split('/')[-1][:-4])) # sorted by index + ''' + ele:min-grad: gradient threshold to produce binary map + ele:ffl-block: fill-flood threshold + ele:min-ele-area: minimum area for selected elements + ele:merge-contained-ele: if True, merge elements contained in others + text:max-word-inline-gap: words with smaller distance than the gap are counted as a line + text:max-line-gap: lines with smaller distance than the gap are counted as a paragraph - key_params = {'min-grad': 10, 'ffl-block': 5, 'min-ele-area': 50, 'merge-contained-ele': True, - 'max-word-inline-gap': 10, 'max-line-ingraph-gap': 4, 'remove-top-bar': True} + Tips: + 1. Larger *min-grad* produces fine-grained binary-map while prone to over-segment element to small pieces + 2. Smaller *min-ele-area* leaves tiny elements while prone to produce noises + 3. If not *merge-contained-ele*, the elements inside others will be recognized, while prone to produce noises + 4. The *max-word-inline-gap* and *max-line-gap* should be dependent on the input image size and resolution - is_ip = False - is_clf = False + mobile: {'min-grad':4, 'ffl-block':5, 'min-ele-area':50, 'max-word-inline-gap':6, 'max-line-gap':1} + web : {'min-grad':3, 'ffl-block':5, 'min-ele-area':25, 'max-word-inline-gap':4, 'max-line-gap':4} + ''' + key_params_dic = { + 'mobile': { + 'min-grad': 4, + 'ffl-block': 5, + 'min-ele-area': 50, + 'max-word-inline-gap': 6, + 'max-line-gap': 1, + 'merge-contained-ele': True, + 'merge-line-to-paragraph': False, + 'remove-bar': False, + }, + 'web': { + 'min-grad': 3, + 'ffl-block': 5, + 'min-ele-area': 25, + 'max-word-inline-gap': 4, + 'max-line-gap': 4, + 'merge-contained-ele': True, + 'merge-line-to-paragraph': False, + 'remove-bar': False, + }, + 'poster': { + 'min-grad': 5, + 'ffl-block': 3, + 'min-ele-area': 75, + 'merge-contained-ele': True, + 'merge-line-to-paragraph': False, + 'remove-bar': False, + }, + 'desktop': { + 'min-grad': 5, + 'ffl-block': 5, + 'min-ele-area': 50, + 'merge-contained-ele': True, + 'merge-line-to-paragraph': False, + 'remove-bar': True, + }, + } + key_params = key_params_dic[args.cat] + is_ip = True + is_clf = True is_ocr = False is_merge = True - # Load deep learning models in advance compo_classifier = None if is_ip and is_clf: compo_classifier = {} from cnn.CNN import CNN + # compo_classifier['Image'] = CNN('Image') compo_classifier['Elements'] = CNN('Elements') # compo_classifier['Noise'] = CNN('Noise') @@ -48,27 +114,71 @@ def resize_height_by_longest_edge(img_path, resize_length=800): import detect_text.text_detection as text # set the range of target inputs' indices - num = 0 - start_index = 30800 # 61728 - end_index = 100000 - for input_img in input_imgs: + # num = 0 + # start_index = 30800 # 61728 + # end_index = 100000 + # for input_img in input_imgs: + # resized_height = resize_height_by_longest_edge(input_img) + # index = input_img.split('/')[-1][:-4] + # if int(index) < start_index: + # continue + # if int(index) > end_index: + # break + max_retries = 5 + retry_wait_seconds = 5 + paddle_model = PaddleOCR(use_angle_cls=True, lang="en", show_log=False) + for input_img in tqdm(input_imgs): resized_height = resize_height_by_longest_edge(input_img) - index = input_img.split('/')[-1][:-4] - if int(index) < start_index: - continue - if int(index) > end_index: - break - if is_ocr: - text.text_detection(input_img, output_root, show=False) + for i in range(max_retries): + try: + text.text_detection( + input_img, + output_root, + show=False, + method='paddle', + paddle_model=paddle_model, + ) + break + except requests.exceptions.ProxyError: + if i < max_retries - 1: + print( + "Request failed due to proxy error. Retrying in {} seconds...".format( + retry_wait_seconds + ) + ) + time.sleep(retry_wait_seconds) + else: + print("Request failed after maximum retries. Exiting.") + raise if is_ip: - ip.compo_detection(input_img, output_root, key_params, classifier=compo_classifier, resize_by_height=resized_height, show=False) + ip.compo_detection( + input_img, + output_root, + key_params, + classifier=compo_classifier, + resize_by_height=resized_height, + show=False, + ) if is_merge: - import merge - compo_path = pjoin(output_root, 'ip', str(index) + '.json') - ocr_path = pjoin(output_root, 'ocr', str(index) + '.json') - merge.merge(input_img, compo_path, ocr_path, output_root, is_remove_top=key_params['remove-top-bar'], show=True) + import detect_merge.merge as merge - num += 1 + name = input_img.split('/')[-1][:-4] + compo_path = pjoin(output_root, 'ip', str(name) + '.json') + ocr_path = pjoin(output_root, 'ocr', str(name) + '.json') + merge_path = pjoin(output_root, 'merge') + os.makedirs(merge_path, exist_ok=True) + # merge.merge(input_img, compo_path, ocr_path, output_root, + # is_remove_top=key_params['remove-top-bar'], + # show=True) + merge.merge( + input_img, + compo_path, + ocr_path, + merge_path, + is_remove_bar=key_params['remove-bar'], + is_paragraph=key_params['merge-line-to-paragraph'], + show=False, + ) diff --git a/run_single.py b/run_single.py index a22a50f..29e7b09 100644 --- a/run_single.py +++ b/run_single.py @@ -2,7 +2,7 @@ import cv2 import os import numpy as np - +from paddleocr import PaddleOCR def resize_height_by_longest_edge(img_path, resize_length=800): org = cv2.imread(img_path) @@ -14,7 +14,12 @@ def resize_height_by_longest_edge(img_path, resize_length=800): def color_tips(): - color_map = {'Text': (0, 0, 255), 'Compo': (0, 255, 0), 'Block': (0, 255, 255), 'Text Content': (255, 0, 255)} + color_map = { + 'Text': (0, 0, 255), + 'Compo': (0, 255, 0), + 'Block': (0, 255, 255), + 'Text Content': (255, 0, 255), + } board = np.zeros((200, 200, 3), dtype=np.uint8) board[:50, :, :] = (0, 0, 255) @@ -22,70 +27,126 @@ def color_tips(): board[100:150, :, :] = (255, 0, 255) board[150:200, :, :] = (0, 255, 255) cv2.putText(board, 'Text', (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2) - cv2.putText(board, 'Non-text Compo', (10, 70), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2) - cv2.putText(board, "Compo's Text Content", (10, 120), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2) + cv2.putText( + board, 'Non-text Compo', (10, 70), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2 + ) + cv2.putText( + board, + "Compo's Text Content", + (10, 120), + cv2.FONT_HERSHEY_SIMPLEX, + 0.5, + (0, 0, 0), + 2, + ) cv2.putText(board, "Block", (10, 170), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2) cv2.imshow('colors', board) if __name__ == '__main__': - ''' - ele:min-grad: gradient threshold to produce binary map - ele:ffl-block: fill-flood threshold - ele:min-ele-area: minimum area for selected elements - ele:merge-contained-ele: if True, merge elements contained in others - text:max-word-inline-gap: words with smaller distance than the gap are counted as a line - text:max-line-gap: lines with smaller distance than the gap are counted as a paragraph - - Tips: - 1. Larger *min-grad* produces fine-grained binary-map while prone to over-segment element to small pieces - 2. Smaller *min-ele-area* leaves tiny elements while prone to produce noises - 3. If not *merge-contained-ele*, the elements inside others will be recognized, while prone to produce noises - 4. The *max-word-inline-gap* and *max-line-gap* should be dependent on the input image size and resolution - - mobile: {'min-grad':4, 'ffl-block':5, 'min-ele-area':50, 'max-word-inline-gap':6, 'max-line-gap':1} - web : {'min-grad':3, 'ffl-block':5, 'min-ele-area':25, 'max-word-inline-gap':4, 'max-line-gap':4} + ele:min-grad: gradient threshold to produce binary map + ele:ffl-block: fill-flood threshold + ele:min-ele-area: minimum area for selected elements + ele:merge-contained-ele: if True, merge elements contained in others + text:max-word-inline-gap: words with smaller distance than the gap are counted as a line + text:max-line-gap: lines with smaller distance than the gap are counted as a paragraph + + Tips: + 1. Larger *min-grad* produces fine-grained binary-map while prone to over-segment element to small pieces + 2. Smaller *min-ele-area* leaves tiny elements while prone to produce noises + 3. If not *merge-contained-ele*, the elements inside others will be recognized, while prone to produce noises + 4. The *max-word-inline-gap* and *max-line-gap* should be dependent on the input image size and resolution + + mobile: {'min-grad':4, 'ffl-block':5, 'min-ele-area':50, 'max-word-inline-gap':6, 'max-line-gap':1} + web : {'min-grad':3, 'ffl-block':5, 'min-ele-area':25, 'max-word-inline-gap':4, 'max-line-gap':4} ''' - key_params = {'min-grad':10, 'ffl-block':5, 'min-ele-area':50, - 'merge-contained-ele':True, 'merge-line-to-paragraph':False, 'remove-bar':True} + key_params_dic = { + 'mobile': { + 'min-grad': 4, + 'ffl-block': 5, + 'min-ele-area': 50, + 'max-word-inline-gap': 6, + 'max-line-gap': 1, + }, + 'web': { + 'min-grad': 3, + 'ffl-block': 5, + 'min-ele-area': 25, + 'max-word-inline-gap': 4, + 'max-line-gap': 4, + }, + 'poster': { + 'min-grad': 5, + 'ffl-block': 5, + 'min-ele-area': 50, + 'merge-contained-ele': True, + 'merge-line-to-paragraph': False, + 'remove-bar': False, + }, + 'desktop': { + 'min-grad': 5, + 'ffl-block': 5, + 'min-ele-area': 50, + 'merge-contained-ele': True, + 'merge-line-to-paragraph': False, + 'remove-bar': True, + }, + } # set input image path - input_path_img = 'data/input/497.jpg' - output_root = 'data/output' - + input_path_img = '/public/dataset/UEyes/UEyes_dataset/images/0bce2b.png' + output_root = './output' + key_params = key_params_dic['poster'] resized_height = resize_height_by_longest_edge(input_path_img, resize_length=800) - color_tips() + # color_tips() is_ip = True - is_clf = False + is_clf = True is_ocr = True is_merge = True if is_ocr: import detect_text.text_detection as text + paddle_model = PaddleOCR(use_angle_cls=True, lang="en", show_log=False) os.makedirs(pjoin(output_root, 'ocr'), exist_ok=True) - text.text_detection(input_path_img, output_root, show=True, method='google') + text.text_detection(input_path_img, output_root, show=False, method='paddle',paddle_model=paddle_model) if is_ip: import detect_compo.ip_region_proposal as ip + os.makedirs(pjoin(output_root, 'ip'), exist_ok=True) # switch of the classification func classifier = None if is_clf: classifier = {} from cnn.CNN import CNN + # classifier['Image'] = CNN('Image') classifier['Elements'] = CNN('Elements') # classifier['Noise'] = CNN('Noise') - ip.compo_detection(input_path_img, output_root, key_params, - classifier=classifier, resize_by_height=resized_height, show=False) + ip.compo_detection( + input_path_img, + output_root, + key_params, + classifier=classifier, + resize_by_height=resized_height, + show=False, + ) if is_merge: import detect_merge.merge as merge + os.makedirs(pjoin(output_root, 'merge'), exist_ok=True) name = input_path_img.split('/')[-1][:-4] compo_path = pjoin(output_root, 'ip', str(name) + '.json') ocr_path = pjoin(output_root, 'ocr', str(name) + '.json') - merge.merge(input_path_img, compo_path, ocr_path, pjoin(output_root, 'merge'), - is_remove_bar=key_params['remove-bar'], is_paragraph=key_params['merge-line-to-paragraph'], show=True) + merge.merge( + input_path_img, + compo_path, + ocr_path, + pjoin(output_root, 'merge'), + is_remove_bar=key_params['remove-bar'], + is_paragraph=key_params['merge-line-to-paragraph'], + show=False, + ) diff --git a/run_testing(Used for Adjusting).py b/run_testing.py similarity index 88% rename from run_testing(Used for Adjusting).py rename to run_testing.py index f97d51e..476e5c8 100644 --- a/run_testing(Used for Adjusting).py +++ b/run_testing.py @@ -39,8 +39,8 @@ def nothing(x): 'max-word-inline-gap':10, 'max-line-gap':4, 'remove-top-bar':True} # set input image path - input_path_img = 'data/input/4.jpg' - output_root = 'data/output' + input_path_img = '/public/dataset/UEyes/UEyes_dataset/images/1b4c8d.png' + output_root = './output' resized_height = resize_height_by_longest_edge(input_path_img) is_clf = False @@ -56,10 +56,10 @@ def nothing(x): testing_ip = True testing_merge = False - cv2.namedWindow('parameters') + # cv2.namedWindow('parameters') if testing_ip: - cv2.createTrackbar('min-grad', 'parameters', 4, 20, nothing) - cv2.createTrackbar('min-ele-area', 'parameters', 20, 200, nothing) + # cv2.createTrackbar('min-grad', 'parameters', 4, 20, nothing) + # cv2.createTrackbar('min-ele-area', 'parameters', 20, 200, nothing) while(1): key_params['min-grad'] = cv2.getTrackbarPos('min-grad', 'parameters') key_params['min-ele-area'] = cv2.getTrackbarPos('min-ele-area', 'parameters') @@ -77,8 +77,8 @@ def nothing(x): classifier=classifier, resize_by_height=resized_height, show=True, wai_key=10) if testing_merge: - cv2.createTrackbar('max-word-inline-gap', 'parameters', 4, 20, nothing) - cv2.createTrackbar('max-line-gap', 'parameters', 20, 200, nothing) + # cv2.createTrackbar('max-word-inline-gap', 'parameters', 4, 20, nothing) + # cv2.createTrackbar('max-line-gap', 'parameters', 20, 200, nothing) while(1): key_params['max-word-inline-gap'] = cv2.getTrackbarPos('max-word-inline-gap', 'parameters') key_params['max-line-gap'] = cv2.getTrackbarPos('max-line-gap', 'parameters') diff --git a/uied_requirements.txt b/uied_requirements.txt new file mode 100644 index 0000000..a14b6e2 --- /dev/null +++ b/uied_requirements.txt @@ -0,0 +1,118 @@ +# This file may be used to create an environment using: +# $ conda create --name --file +# platform: linux-64 +_libgcc_mutex=0.1=main +_openmp_mutex=5.1=1_gnu +absl-py=2.0.0=pypi_0 +astor=0.8.1=pypi_0 +astunparse=1.6.3=pypi_0 +blas=1.0=mkl +bottleneck=1.3.5=py37h7deecbd_0 +bzip2=1.0.8=h7b6447c_0 +ca-certificates=2023.08.22=h06a4308_0 +cachetools=4.2.4=pypi_0 +cairo=1.16.0=hb05425b_5 +certifi=2022.12.7=py37h06a4308_0 +charset-normalizer=3.3.1=pypi_0 +expat=2.5.0=h6a678d5_0 +ffmpeg=4.0=hcdf2ecd_0 +flatbuffers=23.5.26=pypi_0 +fontconfig=2.14.1=h4c34cd2_2 +freeglut=3.0.0=hf484d3e_5 +freetype=2.12.1=h4a9f257_0 +gast=0.2.2=pypi_0 +glib=2.69.1=he621ea3_2 +google-auth=1.35.0=pypi_0 +google-auth-oauthlib=0.4.6=pypi_0 +google-pasta=0.2.0=pypi_0 +graphite2=1.3.14=h295c915_1 +grpcio=1.59.0=pypi_0 +h5py=3.8.0=pypi_0 +harfbuzz=1.8.8=hffaf4a1_0 +hdf5=1.10.2=hba1933b_1 +icu=58.2=he6710b0_3 +idna=3.4=pypi_0 +importlib-metadata=6.7.0=pypi_0 +install=1.3.5=pypi_0 +intel-openmp=2022.1.0=h9e868ea_3769 +jasper=2.0.14=hd8c5072_2 +joblib=1.3.2=pypi_0 +jpeg=9e=h5eee18b_1 +keras=2.11.0=pypi_0 +keras-applications=1.0.8=pypi_0 +keras-preprocessing=1.1.2=pypi_0 +ld_impl_linux-64=2.38=h1181459_1 +lerc=3.0=h295c915_0 +libclang=16.0.6=pypi_0 +libdeflate=1.17=h5eee18b_1 +libffi=3.4.4=h6a678d5_0 +libgcc-ng=11.2.0=h1234567_1 +libgfortran-ng=7.5.0=ha8ba4b0_17 +libgfortran4=7.5.0=ha8ba4b0_17 +libglu=9.0.0=hf484d3e_1 +libgomp=11.2.0=h1234567_1 +libopencv=3.4.2=hb342d67_1 +libopus=1.3.1=h7b6447c_0 +libpng=1.6.39=h5eee18b_0 +libstdcxx-ng=11.2.0=h1234567_1 +libtiff=4.5.1=h6a678d5_0 +libuuid=1.41.5=h5eee18b_0 +libvpx=1.7.0=h439df22_0 +libwebp-base=1.3.2=h5eee18b_0 +libxcb=1.15=h7f8727e_0 +libxml2=2.10.4=hcbfbd50_0 +lz4-c=1.9.4=h6a678d5_0 +markdown=3.4.4=pypi_0 +markupsafe=2.1.3=pypi_0 +mkl=2020.2=256 +mkl-service=2.3.0=py37he8ac12f_0 +mkl_fft=1.3.0=py37h54f3939_0 +mkl_random=1.1.1=py37h0573a6f_0 +ncurses=6.4=h6a678d5_0 +numexpr=2.7.3=py37hb2eb853_0 +numpy=1.21.6=pypi_0 +oauthlib=3.2.2=pypi_0 +opencv=3.4.2=py37h6fd60c2_1 +openssl=1.1.1w=h7f8727e_0 +opt-einsum=3.3.0=pypi_0 +packaging=23.2=pypi_0 +pandas=1.3.5=py37h8c16a72_0 +pcre=8.45=h295c915_0 +pip=23.3.1=pypi_0 +pixman=0.40.0=h7f8727e_1 +protobuf=3.19.6=pypi_0 +py-opencv=3.4.2=py37hb342d67_1 +pyasn1=0.5.0=pypi_0 +pyasn1-modules=0.3.0=pypi_0 +python=3.7.16=h7a1cb2a_0 +python-dateutil=2.8.2=pyhd3eb1b0_0 +pytz=2021.3=pyhd3eb1b0_0 +pyyaml=6.0.1=pypi_0 +readline=8.2=h5eee18b_0 +requests=2.31.0=pypi_0 +requests-oauthlib=1.3.1=pypi_0 +rsa=4.9=pypi_0 +scikit-learn=1.0.2=pypi_0 +scipy=1.7.3=pypi_0 +setuptools=65.6.3=py37h06a4308_0 +six=1.16.0=pyhd3eb1b0_1 +sqlite=3.41.2=h5eee18b_0 +tensorboard=2.11.2=pypi_0 +tensorboard-data-server=0.6.1=pypi_0 +tensorboard-plugin-wit=1.8.1=pypi_0 +tensorflow=2.11.0=pypi_0 +tensorflow-estimator=2.11.0=pypi_0 +tensorflow-io-gcs-filesystem=0.34.0=pypi_0 +termcolor=2.3.0=pypi_0 +threadpoolctl=3.1.0=pypi_0 +tk=8.6.12=h1ccaba5_0 +tqdm=4.66.1=pypi_0 +typing-extensions=4.7.1=pypi_0 +urllib3=2.0.7=pypi_0 +werkzeug=2.2.3=pypi_0 +wheel=0.37.1=pyhd3eb1b0_0 +wrapt=1.15.0=pypi_0 +xz=5.4.2=h5eee18b_0 +zipp=3.15.0=pypi_0 +zlib=1.2.13=h5eee18b_0 +zstd=1.5.5=hc292b87_0 diff --git a/uied_requirements_pip_py3.7.txt b/uied_requirements_pip_py3.7.txt new file mode 100644 index 0000000..c3465f9 --- /dev/null +++ b/uied_requirements_pip_py3.7.txt @@ -0,0 +1,106 @@ +absl-py==2.0.0 +anyio==3.7.1 +astor==0.8.1 +astunparse==1.6.3 +attrdict==2.0.1 +babel==2.13.1 +bce-python-sdk==0.8.92 +beautifulsoup4==4.12.2 +cachetools==4.2.4 +certifi==2022.12.7 +charset-normalizer==3.3.1 +click==8.1.7 +cssselect==1.2.0 +cssutils==2.7.1 +cycler==0.11.0 +cython==3.0.5 +decorator==5.1.1 +et-xmlfile==1.1.0 +exceptiongroup==1.1.3 +# expat==2.5.0 +fire==0.5.0 +flask==2.2.5 +flask-babel==3.1.0 +flatbuffers==23.5.26 +fonttools==4.38.0 +future==0.18.3 +gast==0.2.2 +google-auth==1.35.0 +google-auth-oauthlib==0.4.6 +google-pasta==0.2.0 +grpcio==1.59.0 +h11==0.14.0 +h5py==3.8.0 +httpcore==0.17.3 +httpx==0.24.1 +idna==3.4 +imageio==2.31.2 +imgaug==0.4.0 +importlib-metadata==6.7.0 +install==1.3.5 +itsdangerous==2.1.2 +jinja2==3.1.2 +joblib==1.3.2 +keras==2.11.0 +keras-applications==1.0.8 +keras-preprocessing==1.1.2 +kiwisolver==1.4.5 +libclang==16.0.6 +lmdb==1.4.1 +lxml==4.9.3 +markdown==3.4.4 +markupsafe==2.1.3 +matplotlib==3.5.3 +networkx==2.6.3 +numexpr==2.7.3 +numpy==1.21.6 +oauthlib==3.2.2 +opencv-contrib-python==4.6.0.66 +opencv-python==4.6.0.66 +openpyxl==3.1.2 +opt-einsum==3.3.0 +packaging==23.2 +paddleocr==2.7.0.2 +paddlepaddle-gpu==2.5.2 +pandas==1.3.5 +pdf2docx==0.5.6 +pillow==9.5.0 +pip==23.3.1 +premailer==3.10.0 +protobuf==3.19.6 +psutil==5.9.6 +pyasn1==0.5.0 +pyasn1-modules==0.3.0 +pyclipper==1.3.0.post5 +pycryptodome==3.19.0 +pymupdf==1.20.2 +pyparsing==3.1.1 +pytz==2023.3.post1 +pywavelets==1.3.0 +pyyaml==6.0.1 +rapidfuzz==3.4.0 +rarfile==4.1 +requests==2.31.0 +requests-oauthlib==1.3.1 +rsa==4.9 +scikit-image==0.19.3 +scikit-learn==1.0.2 +scipy==1.7.3 +shapely==2.0.2 +soupsieve==2.4.1 +tensorboard==2.11.2 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.1 +tensorflow==2.11.0 +tensorflow-estimator==2.11.0 +tensorflow-io-gcs-filesystem==0.34.0 +termcolor==2.3.0 +threadpoolctl==3.1.0 +tifffile==2021.11.2 +tqdm==4.66.1 +typing-extensions==4.7.1 +urllib3==2.0.7 +visualdl==2.4.0 +werkzeug==2.2.3 +wrapt==1.15.0 +zipp==3.15.0 diff --git a/uied_requirements_py3.7.txt b/uied_requirements_py3.7.txt new file mode 100644 index 0000000..e94a399 --- /dev/null +++ b/uied_requirements_py3.7.txt @@ -0,0 +1,67 @@ +# This file may be used to create an environment using: +# $ conda create --name --file +# platform: linux-64 +_libgcc_mutex=0.1=main +_openmp_mutex=5.1=1_gnu +blas=1.0=mkl +bottleneck=1.3.5=py37h7deecbd_0 +bzip2=1.0.8=h7b6447c_0 +ca-certificates=2023.08.22=h06a4308_0 +cairo=1.16.0=hb05425b_5 +certifi=2022.12.7=py37h06a4308_0 +expat=2.5.0=h6a678d5_0 +ffmpeg=4.0=hcdf2ecd_0 +fontconfig=2.14.1=h4c34cd2_2 +freeglut=3.0.0=hf484d3e_5 +freetype=2.12.1=h4a9f257_0 +glib=2.69.1=he621ea3_2 +graphite2=1.3.14=h295c915_1 +harfbuzz=1.8.8=hffaf4a1_0 +hdf5=1.10.2=hba1933b_1 +icu=58.2=he6710b0_3 +intel-openmp=2022.1.0=h9e868ea_3769 +jasper=2.0.14=hd8c5072_2 +jpeg=9e=h5eee18b_1 +ld_impl_linux-64=2.38=h1181459_1 +lerc=3.0=h295c915_0 +libdeflate=1.17=h5eee18b_1 +libffi=3.4.4=h6a678d5_0 +libgcc-ng=11.2.0=h1234567_1 +libgfortran-ng=7.5.0=ha8ba4b0_17 +libgfortran4=7.5.0=ha8ba4b0_17 +libglu=9.0.0=hf484d3e_1 +libgomp=11.2.0=h1234567_1 +libopencv=3.4.2=hb342d67_1 +libopus=1.3.1=h7b6447c_0 +libpng=1.6.39=h5eee18b_0 +libstdcxx-ng=11.2.0=h1234567_1 +libtiff=4.5.1=h6a678d5_0 +libuuid=1.41.5=h5eee18b_0 +libvpx=1.7.0=h439df22_0 +libwebp-base=1.3.2=h5eee18b_0 +libxcb=1.15=h7f8727e_0 +libxml2=2.10.4=hcbfbd50_0 +lz4-c=1.9.4=h6a678d5_0 +mkl=2020.2=256 +mkl-service=2.3.0=py37he8ac12f_0 +mkl_fft=1.3.0=py37h54f3939_0 +mkl_random=1.1.1=py37h0573a6f_0 +ncurses=6.4=h6a678d5_0 +numexpr=2.7.3=py37hb2eb853_0 +opencv=3.4.2=py37h6fd60c2_1 +openssl=1.1.1w=h7f8727e_0 +pandas=1.3.5=py37h8c16a72_0 +pcre=8.45=h295c915_0 +pixman=0.40.0=h7f8727e_1 +py-opencv=3.4.2=py37hb342d67_1 +# python=3.7.16=h7a1cb2a_0 +python-dateutil=2.8.2=pyhd3eb1b0_0 +readline=8.2=h5eee18b_0 +setuptools=65.6.3=py37h06a4308_0 +six=1.16.0=pyhd3eb1b0_1 +sqlite=3.41.2=h5eee18b_0 +tk=8.6.12=h1ccaba5_0 +wheel=0.37.1=pyhd3eb1b0_0 +xz=5.4.2=h5eee18b_0 +zlib=1.2.13=h5eee18b_0 +zstd=1.5.5=hc292b87_0