-
Notifications
You must be signed in to change notification settings - Fork 12
/
Dockerfile-tess5.1.0
72 lines (60 loc) · 3.74 KB
/
Dockerfile-tess5.1.0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
## Builds binaries for Amazonlinux 2
FROM lambci/lambda-base-2:build
#Proxy setup if exists
#ENV http_proxy 'http://ip:port'
#ENV https_proxy 'https://ip:port'
ARG LEPTONICA_VERSION=1.79.0
ARG TESSERACT_VERSION=5.1.0
ARG AUTOCONF_ARCHIVE_VERSION=2021.02.19
ARG TMP_BUILD=/tmp
ARG TESSERACT=/opt/tesseract
ARG LEPTONICA=/opt/leptonica
ARG DIST=/opt/build-dist
# change OCR_LANG to enable the layer for different languages
ARG OCR_LANG=fra
# change TESSERACT_DATA_SUFFIX to use different datafiles (options: "_best", "_fast" and "")
ARG TESSERACT_DATA_SUFFIX=_best
ARG TESSERACT_DATA_VERSION=4.1.0
RUN yum makecache fast; yum clean all && yum -y update && yum -y upgrade; yum clean all && \
yum install -y yum-plugin-ovl; yum clean all && yum -y groupinstall "Development Tools"; yum clean all
RUN yum -y install gcc gcc-c++ make autoconf aclocal automake libtool \
libjpeg-devel libpng-devel libtiff-devel zlib-devel \
libzip-devel freetype-devel lcms2-devel libwebp-devel \
libicu-devel tcl-devel tk-devel pango-devel cairo-devel; yum clean all
WORKDIR ${TMP_BUILD}/leptonica-build
RUN curl -L https://github.com/DanBloomberg/leptonica/releases/download/${LEPTONICA_VERSION}/leptonica-${LEPTONICA_VERSION}.tar.gz | tar xz && cd ${TMP_BUILD}/leptonica-build/leptonica-${LEPTONICA_VERSION} && \
./configure --prefix=${LEPTONICA} && make && make install && cp -r ./src/.libs /opt/liblept
RUN echo "/opt/leptonica/lib" > /etc/ld.so.conf.d/leptonica.conf && ldconfig
WORKDIR ${TMP_BUILD}/autoconf-build
RUN curl https://ftp.gnu.org/gnu/autoconf-archive/autoconf-archive-${AUTOCONF_ARCHIVE_VERSION}.tar.xz | tar xJ && \
cd autoconf-archive-${AUTOCONF_ARCHIVE_VERSION} && ./configure && make && make install && cp ./m4/* /usr/share/aclocal/
WORKDIR ${TMP_BUILD}/tesseract-build
RUN curl -L https://github.com/tesseract-ocr/tesseract/archive/${TESSERACT_VERSION}.tar.gz | tar xz && \
cd tesseract-${TESSERACT_VERSION} && ./autogen.sh && PKG_CONFIG_PATH=/opt/leptonica/lib/pkgconfig LIBLEPT_HEADERSDIR=/opt/leptonica/include \
./configure --prefix=${TESSERACT} --with-extra-includes=/opt/leptonica/include --with-extra-libraries=/opt/leptonica/lib && make && make install
WORKDIR /opt
RUN mkdir -p ${DIST}/lib && mkdir -p ${DIST}/bin && \
cp ${TESSERACT}/bin/tesseract ${DIST}/bin/ && \
cp ${TESSERACT}/lib/libtesseract.so.5 ${DIST}/lib/ && \
cp ${LEPTONICA}/lib/liblept.so.5 ${DIST}/lib/liblept.so.5 && \
cp /usr/lib64/libgomp.so.1 ${DIST}/lib/ && \
cp /usr/lib64/libwebp.so.4 ${DIST}/lib/ && \
cp /usr/lib64/libpng15.so.15 ${DIST}/lib/ && \
cp /usr/lib64/libjpeg.so.62 ${DIST}/lib/ && \
cp /usr/lib64/libtiff.so.5 ${DIST}/lib/ && \
cp /usr/lib64/libjbig.so.2.0 ${DIST}/lib/ && \
echo -e "LEPTONICA_VERSION=${LEPTONICA_VERSION}\nTESSERACT_VERSION=${TESSERACT_VERSION}\nTESSERACT_DATA_FILES=tessdata${TESSERACT_DATA_SUFFIX}/${TESSERACT_DATA_VERSION}\nTESSERACT_DATA_LANGUAGES=osd,eng,${OCR_LANG}" > ${DIST}/TESSERACT-README.md && \
find ${DIST}/lib -name '*.so*' | xargs strip -s
WORKDIR ${DIST}/tesseract/share/tessdata
RUN curl -L https://github.com/tesseract-ocr/tessdata${TESSERACT_DATA_SUFFIX}/raw/${TESSERACT_DATA_VERSION}/osd.traineddata > osd.traineddata && \
curl -L https://github.com/tesseract-ocr/tessdata${TESSERACT_DATA_SUFFIX}/raw/${TESSERACT_DATA_VERSION}/eng.traineddata > eng.traineddata && \
curl -L https://github.com/tesseract-ocr/tessdata${TESSERACT_DATA_SUFFIX}/raw/${TESSERACT_DATA_VERSION}/${OCR_LANG}.traineddata > ${OCR_LANG}.traineddata
COPY . ${DIST}/tesseract/share/tessdata/
RUN rm Dockerfile*; exit 0
RUN rm READM*; exit 0
RUN rm build_*; exit 0
RUN rm *.zip; exit 0
RUN rm -rf example; exit 0
WORKDIR /var/task
#COPY requirements.txt .
#RUN pip install -r requirements.txt -t python/lib/python3.8/site-packages/