From 49e70920915cae4342ccb8824f8f4dc71a5c688a Mon Sep 17 00:00:00 2001
From: Plamen Dimitrov <plamen.dimitrov@intra2net.com>
Date: Mon, 29 Apr 2024 13:22:56 +0800
Subject: [PATCH] Remove segment line residue from thresholding text containing
 boxes

GUI screendumps have to be made as similar to regular OCR compatible
documents containing purely text as possible. While thresholding a
screendump for text detection is one step closer in this direction,
GUI elements containing text can turn into boxes of vertical and
horizontal lines within the binarized image after applying it which
would then lower the text detection quality. To deal with this let's
optionally perform some morphological operations to remove segment
lines with thickness up to a maximal segment line length.
---
 guibot/finder.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/guibot/finder.py b/guibot/finder.py
index e44e0eb9..47b0c699 100644
--- a/guibot/finder.py
+++ b/guibot/finder.py
@@ -1869,6 +1869,7 @@ def __configure_backend(self, backend=None, category="text", reset=False):
                 self.params[category]["psmode"] = CVParameter(3, 0, 13, enumerated=True)
                 self.params[category]["extra_configs"] = CVParameter("")
                 self.params[category]["binarize_detection"] = CVParameter(False)
+                self.params[category]["segment_line_max"] = CVParameter(1, 1, None, 1.0)
                 self.params[category]["recursion_height"] = CVParameter(0.3, 0.0, 1.0, 0.01)
                 self.params[category]["recursion_width"] = CVParameter(0.3, 0.0, 1.0, 0.01)
             elif backend == "east":
@@ -2285,6 +2286,17 @@ def _detect_text_boxes(self, haystack):
         detection_img = numpy.array(haystack.pil_image)
         if self.params["tdetect"]["binarize_detection"].value:
             detection_img = self._binarize_image(detection_img)
+
+            # remove segment line residue from thresholding text containing boxes (GUI elements)
+            max_segment = self.params["tdetect"]["segment_line_max"].value
+            for i in range(1, max_segment):
+                hline = cv2.getStructuringElement(cv2.MORPH_RECT, (max_segment, i))
+                hlopened = cv2.morphologyEx(detection_img, cv2.MORPH_OPEN, hline, iterations=1)
+                vline = cv2.getStructuringElement(cv2.MORPH_RECT, (i, max_segment))
+                vlopened = cv2.morphologyEx(detection_img, cv2.MORPH_OPEN, vline, iterations=1)
+                detection_img -= hlopened
+                detection_img -= vlopened
+
         else:
             detection_img = cv2.cvtColor(detection_img, cv2.COLOR_RGB2GRAY)
         detection_width = int(self.params["tdetect"]["recursion_width"].value * haystack.width)