diff --git a/.Rbuildignore b/.Rbuildignore index 0be63bd..a3ea027 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -14,3 +14,4 @@ vignettes/.*\.png$ ^configure.log$ ^\.github$ +^\.vscode$ diff --git a/.gitignore b/.gitignore index 08d7203..d5b2fcf 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,4 @@ inst/tessdata windows src/Makevars configure.log +\.vscode diff --git a/src/tesseract.cpp b/src/tesseract.cpp index f9e30cb..6ab5883 100644 --- a/src/tesseract.cpp +++ b/src/tesseract.cpp @@ -17,9 +17,26 @@ using namespace cpp11; +/* libtesseract 4.0 insisted that the engine is initiated in 'C' locale. + * We do this as exemplified in the example code in the libc manual: + * https://www.gnu.org/software/libc/manual/html_node/Setting-the-Locale.html + * Full discussion: https://github.com/tesseract-ocr/tesseract/issues/1670 + */ +#if TESSERACT_MAJOR_VERSION == 4 && TESSERACT_MINOR_VERSION == 0 +#define TESSERACT40 +#endif + static tesseract::TessBaseAPI *make_analyze_api() { +#ifdef TESSERACT40 + char *old_ctype = strdup(setlocale(LC_ALL, NULL)); + setlocale(LC_ALL, "C"); +#endif tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI(); api->InitForAnalysePage(); +#ifdef TESSERACT40 + setlocale(LC_ALL, old_ctype); + free(old_ctype); +#endif return api; } @@ -57,8 +74,12 @@ static tesseract::TessBaseAPI *make_analyze_api() { configs.push_back(&config_strings.back()[0]); } - tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI(); +#ifdef TESSERACT40 + char *old_ctype = strdup(setlocale(LC_ALL, NULL)); + setlocale(LC_ALL, "C"); +#endif + tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI(); GenericVector params, values; for (int i = 0; i < opt_names.size(); i++) { @@ -68,6 +89,11 @@ static tesseract::TessBaseAPI *make_analyze_api() { int err = api->Init(path, lang, tesseract::OEM_DEFAULT, configs.data(), configs.size(), ¶ms, &values, false); +#ifdef TESSERACT40 + setlocale(LC_ALL, old_ctype); + free(old_ctype); +#endif + if (err) { delete api; throw std::runtime_error(