diff --git a/guibot/finder.py b/guibot/finder.py index 411f6e1c..bda51ad8 100644 --- a/guibot/finder.py +++ b/guibot/finder.py @@ -2002,19 +2002,22 @@ def __synchronize_backend(self, backend=None, category="text", reset=False): import cv2 datapath = self.params["text"]["datapath"].value + tessdata_path = os.path.join(datapath, "tessdata") + if not os.path.exists(tessdata_path): + tessdata_path = os.environ.get("TESSDATA_PREFIX", "./tessdata") + if not os.path.exists(tessdata_path): + tessdata_path = None + if category == "text" or category in ["contour", "threshold", "threshold2"]: # nothing to sync return elif category == "tdetect" and backend == "pytesseract": - tessdata_path = os.path.join(datapath, "tessdata") - if not os.path.exists(tessdata_path): - tessdata_path = os.environ.get("TESSDATA_PREFIX", ".") - import pytesseract self.tbox = pytesseract - self.tbox_config = r"--tessdata-dir %s --oem %s --psm %s " - self.tbox_config %= (tessdata_path, + tessdata_dir = "--tessdata-dir '" + tessdata_path + "'" if tessdata_path else "" + self.tbox_config = r"%s --oem %s --psm %s " + self.tbox_config %= (tessdata_dir, self.params["tdetect"]["oem"].value, self.params["tdetect"]["psmode"].value) self.tbox_config += r"-c tessedit_char_whitelist='%s' %s batch.nochop wordstrbox" @@ -2038,15 +2041,12 @@ def __synchronize_backend(self, backend=None, category="text", reset=False): return elif category == "ocr": - tessdata_path = os.path.join(datapath, "tessdata") - if not os.path.exists(tessdata_path): - tessdata_path = os.environ.get("TESSDATA_PREFIX", ".") - if backend == "pytesseract": import pytesseract self.ocr = pytesseract - self.ocr_config = r"--tessdata-dir '%s' --oem %s --psm %s " - self.ocr_config %= (tessdata_path, + tessdata_dir = "--tessdata-dir '" + tessdata_path + "'" if tessdata_path else "" + self.ocr_config = r"%s --oem %s --psm %s " + self.ocr_config %= (tessdata_dir, self.params["ocr"]["oem"].value, self.params["ocr"]["psmode"].value) self.ocr_config += r"-c tessedit_char_whitelist='%s' %s" @@ -2054,17 +2054,23 @@ def __synchronize_backend(self, backend=None, category="text", reset=False): self.params["ocr"]["extra_configs"].value) elif backend == "tesserocr": from tesserocr import PyTessBaseAPI - self.ocr = PyTessBaseAPI(path=tessdata_path, - lang=self.params["ocr"]["language"].value, - oem=self.params["ocr"]["oem"].value, - psm=self.params["ocr"]["psmode"].value) + kwargs = {"lang": self.params["ocr"]["language"].value, + "oem": self.params["ocr"]["oem"].value, + "psm": self.params["ocr"]["psmode"].value} + if tessdata_path: + self.ocr = PyTessBaseAPI(path=tessdata_path, **kwargs) + else: + self.ocr = PyTessBaseAPI(**kwargs) self.ocr.SetVariable("tessedit_char_whitelist", self.params["ocr"]["char_whitelist"].value) elif backend == "tesseract": - self.ocr = cv2.text.OCRTesseract_create(tessdata_path, - language=self.params["ocr"]["language"].value, - char_whitelist=self.params["ocr"]["char_whitelist"].value, - oem=self.params["ocr"]["oem"].value, - psmode=self.params["ocr"]["psmode"].value) + kwargs = {"language": self.params["ocr"]["language"].value, + "char_whitelist": self.params["ocr"]["char_whitelist"].value, + "oem": self.params["ocr"]["oem"].value, + "psmode": self.params["ocr"]["psmode"].value} + if tessdata_path: + self.ocr = cv2.text.OCRTesseract_create(datapath, **kwargs) + else: + self.ocr = cv2.text.OCRTesseract_create(**kwargs) elif backend in ["hmm", "beamSearch"]: import numpy diff --git a/misc/tessdata/deu.traineddata b/misc/tessdata/deu.traineddata deleted file mode 100644 index 6dd65488..00000000 Binary files a/misc/tessdata/deu.traineddata and /dev/null differ diff --git a/misc/tessdata/eng.traineddata b/misc/tessdata/eng.traineddata deleted file mode 100644 index 176dc322..00000000 Binary files a/misc/tessdata/eng.traineddata and /dev/null differ diff --git a/packaging/guibot.spec b/packaging/guibot.spec index dd1d74b9..3ffe02aa 100644 --- a/packaging/guibot.spec +++ b/packaging/guibot.spec @@ -61,7 +61,7 @@ pushd packaging %{__python3} setup.py install --root %{buildroot} popd %{__install} -d %{buildroot}%{python3_sitelib}/guibot/tests/images -%{__install} -d %{buildroot}%{python3_sitelib}/guibot/misc/tessdata +%{__install} -d %{buildroot}%{python3_sitelib}/guibot/misc %{__cp} -a tests/* %{buildroot}%{python3_sitelib}/guibot/tests %{__cp} -a misc/* %{buildroot}%{python3_sitelib}/guibot/misc