diff --git a/Dockerfile b/Dockerfile index dc53808..e5f42a8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,20 +1,30 @@ +FROM debian:bullseye as build + +# avoid issue with packages requiring interaction (e.g. tzdata) +ARG DEBIAN_FRONTEND=noninteractive +RUN apt-get update && apt-get install -y wget cmake clang git autoconf pkg-config + +# Change submodule to https as we're cloning only. Avoids issues with ssh +# 8bb209c0c21476ee904a is 0.4 with some bugfixes +RUN mkdir /home/pdfalto && cd /home/pdfalto \ + && git clone https://github.com/kermitt2/pdfalto.git && cd pdfalto && git checkout 8bb209c0c21476ee904a && ./install_deps.sh \ + && git submodule set-url xpdf-4.03 https://github.com/kermitt2/xpdf-4.03.git && git submodule update --init --recursive \ + && cmake ./ && make + FROM python:3.11-slim LABEL maintainer="Donald Gray " LABEL org.opencontainers.image.source=https://github.com/dlcs/pdf-to-alto LABEL org.opencontainers.image.description="Extract ALTO from PDF" -COPY /deps/pdfalto /usr/bin/pdfalto +COPY --from=build /home/pdfalto/pdfalto/pdfalto /usr/bin/pdfalto COPY requirements.txt /opt/app/requirements.txt WORKDIR /opt/app RUN pip install --no-cache-dir -r requirements.txt - -COPY app /opt/app/app -COPY monitor.py /opt/app/monitor.py -COPY wait-for-localstack.sh /opt/app/wait-for-localstack.sh +COPY . /opt/app RUN chmod +x wait-for-localstack.sh -CMD ["python3", "/opt/app/monitor.py"] +CMD ["python3", "/opt/app/monitor.py"] \ No newline at end of file diff --git a/deps/pdfalto b/deps/pdfalto deleted file mode 100644 index 9c9b386..0000000 Binary files a/deps/pdfalto and /dev/null differ diff --git a/deps/readme.md b/deps/readme.md deleted file mode 100644 index ecb4d82..0000000 --- a/deps/readme.md +++ /dev/null @@ -1,18 +0,0 @@ -# Dependencies - -pdfalto is a required binary from https://github.com/kermitt2/pdfalto.git. Built using: - -```dockerfile -FROM debian:bullseye as build - -# avoid issue with packages requiring interaction (e.g. tzdata) -ARG DEBIAN_FRONTEND=noninteractive -RUN apt-get update && apt-get install -y wget cmake clang git autoconf pkg-config - -# Change submodule to https as we're cloning only. Avoids issues with ssh -# 8bb209c0c21476ee904a is 0.4 with some bugfixes -RUN mkdir /home/pdfalto && cd /home/pdfalto \ - && git clone https://github.com/kermitt2/pdfalto.git && cd pdfalto && git checkout 8bb209c0c21476ee904a && ./install_deps.sh \ - && git submodule set-url xpdf-4.03 https://github.com/kermitt2/xpdf-4.03.git && git submodule update --init --recursive \ - && cmake ./ && make -``` \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 4a84b25..a385a68 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ -boto3==1.20.33 -botocore==1.23.33 +boto3==1.29.2 +botocore==1.32.2 certifi==2021.10.8 charset-normalizer==2.0.10 colorama==0.4.4 @@ -11,6 +11,6 @@ pycryptodome==3.12.0 PyMuPDF==1.22.5 python-dateutil==2.8.2 requests==2.27.1 -s3transfer==0.5.0 +s3transfer==0.7.0 six==1.16.0 urllib3==1.26.8