Skip to content

Commit

Permalink
re-add C locale for tesseract 4.0
Browse files Browse the repository at this point in the history
  • Loading branch information
pachadotdev committed Aug 8, 2024
1 parent 3c19c85 commit 73006b5
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 1 deletion.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@
vignettes/.*\.png$
^configure.log$
^\.github$
^\.vscode$
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ inst/tessdata
windows
src/Makevars
configure.log
\.vscode
28 changes: 27 additions & 1 deletion src/tesseract.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,26 @@

using namespace cpp11;

/* libtesseract 4.0 insisted that the engine is initiated in 'C' locale.
* We do this as exemplified in the example code in the libc manual:
* https://www.gnu.org/software/libc/manual/html_node/Setting-the-Locale.html
* Full discussion: https://github.com/tesseract-ocr/tesseract/issues/1670
*/
#if TESSERACT_MAJOR_VERSION == 4 && TESSERACT_MINOR_VERSION == 0
#define TESSERACT40
#endif

static tesseract::TessBaseAPI *make_analyze_api() {
#ifdef TESSERACT40
char *old_ctype = strdup(setlocale(LC_ALL, NULL));
setlocale(LC_ALL, "C");
#endif
tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
api->InitForAnalysePage();
#ifdef TESSERACT40
setlocale(LC_ALL, old_ctype);
free(old_ctype);
#endif
return api;
}

Expand Down Expand Up @@ -57,8 +74,12 @@ static tesseract::TessBaseAPI *make_analyze_api() {
configs.push_back(&config_strings.back()[0]);
}

tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
#ifdef TESSERACT40
char *old_ctype = strdup(setlocale(LC_ALL, NULL));
setlocale(LC_ALL, "C");
#endif

tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();

GenericVector<STRING> params, values;
for (int i = 0; i < opt_names.size(); i++) {
Expand All @@ -68,6 +89,11 @@ static tesseract::TessBaseAPI *make_analyze_api() {
int err = api->Init(path, lang, tesseract::OEM_DEFAULT, configs.data(),
configs.size(), &params, &values, false);

#ifdef TESSERACT40
setlocale(LC_ALL, old_ctype);
free(old_ctype);
#endif

if (err) {
delete api;
throw std::runtime_error(
Expand Down

0 comments on commit 73006b5

Please sign in to comment.