Merge pull request #12 from pevogam/newer-ocr-backends

Newer OCR backends: pytesseract, tesserocr for text recognition and EAST for text detection
intra2net · Oct 2, 2020 · e86ed79 · e86ed79
2 parents 5f3f49a + 9c67e64
commit e86ed79
Show file tree

Hide file tree

Showing 10 changed files with 257 additions and 73 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -7,7 +7,7 @@ python:
 - 3.8
 - pypy3.5
 env:
-- INSTALL_VARIANT=pip DISABLE_OCR=1 DISABLE_PYTORCH=1
+- INSTALL_VARIANT=pip DISABLE_PYTORCH=1
 matrix:
   include:
   - python: 3.7
@@ -33,6 +33,10 @@ addons:
     sources:
     - ubuntu-toolchain-r-test
     packages:
+    # ocr
+    - pkg-config
+    - tesseract-ocr
+    - libtesseract-dev
     # virtual screen
     - libx11-dev
     - libxtst-dev
@@ -51,6 +55,7 @@ before_script:
 - "/sbin/start-stop-daemon --start --quiet --pidfile /tmp/custom_xvfb_99.pid --make-pidfile
   --background --exec /usr/bin/Xvfb -- :99 -ac -screen 0 1024x768x24"
 - sleep 3  # give xvfb some time to start
+- if [[ $TRAVIS_PYTHON_VERSION == '3.5' ]]; then export DISABLE_OCR=1; fi
 - if [[ $TRAVIS_PYTHON_VERSION == 'pypy3.5' ]]; then export DISABLE_AUTOPY=1; fi
 - if [[ $TRAVIS_PYTHON_VERSION == 'pypy3.5' ]]; then export DISABLE_PYQT=1; fi
 - if [[ $TRAVIS_PYTHON_VERSION == 'pypy3.5' ]]; then export DISABLE_OPENCV=1; fi

diff --git a/guibot/calibrator.py b/guibot/calibrator.py
@@ -26,6 +26,12 @@
 log = logging.getLogger('guibot.calibrator')
 
 
+#: explicit blacklist of backend combinations to skip for benchmarking
+benchmark_blacklist = [("mixed", "normal", "mixed", "east", "hmm", "adaptive", "adaptive"),
+                       ("mixed", "adaptive", "mixed", "east", "hmm", "adaptive", "adaptive"),
+                       ("mixed", "canny", "mixed", "east", "hmm", "adaptive", "adaptive")]
+
+
 class Calibrator(object):
     """
     Provides with a group of methods to facilitate and automate the selection
@@ -112,12 +118,19 @@ def backend_tuples(category_list, finder):
                     for z in backend_tuples(category_list[1:], finder):
                         yield (backend,) + z
         for backend_tuple in backend_tuples(ordered_categories, finder):
+            if backend_tuple in benchmark_blacklist:
+                log.warning("Skipping blacklisted benchmarked backend combination")
+                continue
             method = "+".join(backend_tuple)
             log.info("Benchmark testing with %s", method)
 
             for backend, category in zip(backend_tuple, ordered_categories):
                 finder.configure_backend(backend=backend, category=category, reset=False)
                 finder.can_calibrate(category, calibration)
+                try:
+                    finder.synchronize_backend(backend=backend, category=category, reset=False)
+                except UnsupportedBackendError as error:
+                    log.debug("Skipping synchronization for %s/backend=%s", category, backend)
 
             if random_starts > 0:
                 self.search(finder, random_starts=random_starts, uniform=uniform,
@@ -489,6 +502,10 @@ def _handle_restricted_values(self, finder):
                 params["blurKernelSize"].value += 1
             if params["backend"] == "adaptive" and params["blockSize"].value % 2 == 0:
                 params["blockSize"].value += 1
+        if "tdetect" in finder.params:
+            params = finder.params["tdetect"]
+            if params["backend"] == "east" and params["input_res_x"].value != params["input_res_y"].value:
+                params["input_res_x"].value = params["input_res_y"].value
         if "ocr" in finder.params:
             params = finder.params["ocr"]
             if params["dt_mask_size"].value not in [0, 3, 5]:

diff --git a/guibot/config.py b/guibot/config.py
@@ -59,8 +59,8 @@ class GlobalConfig(type):
     _feature_detect_backend = "ORB"
     _feature_extract_backend = "ORB"
     _feature_match_backend = "BruteForce-Hamming"
-    _text_detect_backend = "erstat"
-    _text_ocr_backend = "tesseract"
+    _text_detect_backend = "contours"
+    _text_ocr_backend = "pytesseract"
     _hybrid_match_backend = "template"
 
     def toggle_delay(self, value=None):
@@ -348,8 +348,8 @@ def find_backend(self, value=None):
             * feature - matching using a mixture of feature detection,
                         extraction and matching algorithms
             * cascade - matching using OpenCV pretrained Haar cascades
-            * text - text matching using ERStat or custom text detection,
-                     followed by tesseract or Hidden Markov Model OCR
+            * text - text matching using EAST, ERStat, or custom text detection,
+                     followed by Tesseract or Hidden Markov Model OCR
             * tempfeat - a mixture of template and feature matching where the
                        first is used as necessary and the second as sufficient stage
             * deep - deep learning matching using convolutional neural network but
@@ -450,7 +450,7 @@ def text_detect_backend(self, value=None):
 
         :param value: name of the text detection backend
 
-        Supported backends: erstat, contours, components.
+        Supported backends: east, erstat, contours, components.
         """
         if value is None:
             return GlobalConfig._text_detect_backend
@@ -465,7 +465,7 @@ def text_ocr_backend(self, value=None):
 
         :param value: name of the optical character recognition backend
 
-        Supported backends: tesseract, hmm, beamSearch.
+        Supported backends: pytesseract, tesserocr, tesseract (OpenCV), hmm, beamSearch.
         """
         if value is None:
             return GlobalConfig._text_ocr_backend