From 49e70920915cae4342ccb8824f8f4dc71a5c688a Mon Sep 17 00:00:00 2001 From: Plamen Dimitrov Date: Mon, 29 Apr 2024 13:22:56 +0800 Subject: [PATCH] Remove segment line residue from thresholding text containing boxes GUI screendumps have to be made as similar to regular OCR compatible documents containing purely text as possible. While thresholding a screendump for text detection is one step closer in this direction, GUI elements containing text can turn into boxes of vertical and horizontal lines within the binarized image after applying it which would then lower the text detection quality. To deal with this let's optionally perform some morphological operations to remove segment lines with thickness up to a maximal segment line length. --- guibot/finder.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/guibot/finder.py b/guibot/finder.py index e44e0eb9..47b0c699 100644 --- a/guibot/finder.py +++ b/guibot/finder.py @@ -1869,6 +1869,7 @@ def __configure_backend(self, backend=None, category="text", reset=False): self.params[category]["psmode"] = CVParameter(3, 0, 13, enumerated=True) self.params[category]["extra_configs"] = CVParameter("") self.params[category]["binarize_detection"] = CVParameter(False) + self.params[category]["segment_line_max"] = CVParameter(1, 1, None, 1.0) self.params[category]["recursion_height"] = CVParameter(0.3, 0.0, 1.0, 0.01) self.params[category]["recursion_width"] = CVParameter(0.3, 0.0, 1.0, 0.01) elif backend == "east": @@ -2285,6 +2286,17 @@ def _detect_text_boxes(self, haystack): detection_img = numpy.array(haystack.pil_image) if self.params["tdetect"]["binarize_detection"].value: detection_img = self._binarize_image(detection_img) + + # remove segment line residue from thresholding text containing boxes (GUI elements) + max_segment = self.params["tdetect"]["segment_line_max"].value + for i in range(1, max_segment): + hline = cv2.getStructuringElement(cv2.MORPH_RECT, (max_segment, i)) + hlopened = cv2.morphologyEx(detection_img, cv2.MORPH_OPEN, hline, iterations=1) + vline = cv2.getStructuringElement(cv2.MORPH_RECT, (i, max_segment)) + vlopened = cv2.morphologyEx(detection_img, cv2.MORPH_OPEN, vline, iterations=1) + detection_img -= hlopened + detection_img -= vlopened + else: detection_img = cv2.cvtColor(detection_img, cv2.COLOR_RGB2GRAY) detection_width = int(self.params["tdetect"]["recursion_width"].value * haystack.width)