Skip to content

Commit

Permalink
Remove segment line residue from thresholding text containing boxes
Browse files Browse the repository at this point in the history
GUI screendumps have to be made as similar to regular OCR compatible
documents containing purely text as possible. While thresholding a
screendump for text detection is one step closer in this direction,
GUI elements containing text can turn into boxes of vertical and
horizontal lines within the binarized image after applying it which
would then lower the text detection quality. To deal with this let's
optionally perform some morphological operations to remove segment
lines with thickness up to a maximal segment line length.
  • Loading branch information
pevogam committed Apr 29, 2024
1 parent 79e1936 commit 49e7092
Showing 1 changed file with 12 additions and 0 deletions.
12 changes: 12 additions & 0 deletions guibot/finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -1869,6 +1869,7 @@ def __configure_backend(self, backend=None, category="text", reset=False):
self.params[category]["psmode"] = CVParameter(3, 0, 13, enumerated=True)
self.params[category]["extra_configs"] = CVParameter("")
self.params[category]["binarize_detection"] = CVParameter(False)
self.params[category]["segment_line_max"] = CVParameter(1, 1, None, 1.0)
self.params[category]["recursion_height"] = CVParameter(0.3, 0.0, 1.0, 0.01)
self.params[category]["recursion_width"] = CVParameter(0.3, 0.0, 1.0, 0.01)
elif backend == "east":
Expand Down Expand Up @@ -2285,6 +2286,17 @@ def _detect_text_boxes(self, haystack):
detection_img = numpy.array(haystack.pil_image)
if self.params["tdetect"]["binarize_detection"].value:
detection_img = self._binarize_image(detection_img)

# remove segment line residue from thresholding text containing boxes (GUI elements)
max_segment = self.params["tdetect"]["segment_line_max"].value
for i in range(1, max_segment):
hline = cv2.getStructuringElement(cv2.MORPH_RECT, (max_segment, i))
hlopened = cv2.morphologyEx(detection_img, cv2.MORPH_OPEN, hline, iterations=1)
vline = cv2.getStructuringElement(cv2.MORPH_RECT, (i, max_segment))
vlopened = cv2.morphologyEx(detection_img, cv2.MORPH_OPEN, vline, iterations=1)
detection_img -= hlopened
detection_img -= vlopened

else:
detection_img = cv2.cvtColor(detection_img, cv2.COLOR_RGB2GRAY)
detection_width = int(self.params["tdetect"]["recursion_width"].value * haystack.width)
Expand Down

0 comments on commit 49e7092

Please sign in to comment.