Skip to content

Commit

Permalink
Merge pull request #5 from dlcs/feature/update_deps
Browse files Browse the repository at this point in the history
Update aws pip packages + multi stage build
  • Loading branch information
donaldgray authored Nov 17, 2023
2 parents 52c92b2 + fa6ad38 commit e9efbbd
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 27 deletions.
22 changes: 16 additions & 6 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,20 +1,30 @@
FROM debian:bullseye as build

# avoid issue with packages requiring interaction (e.g. tzdata)
ARG DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install -y wget cmake clang git autoconf pkg-config

# Change submodule to https as we're cloning only. Avoids issues with ssh
# 8bb209c0c21476ee904a is 0.4 with some bugfixes
RUN mkdir /home/pdfalto && cd /home/pdfalto \
&& git clone https://github.com/kermitt2/pdfalto.git && cd pdfalto && git checkout 8bb209c0c21476ee904a && ./install_deps.sh \
&& git submodule set-url xpdf-4.03 https://github.com/kermitt2/xpdf-4.03.git && git submodule update --init --recursive \
&& cmake ./ && make

FROM python:3.11-slim

LABEL maintainer="Donald Gray <[email protected]>"
LABEL org.opencontainers.image.source=https://github.com/dlcs/pdf-to-alto
LABEL org.opencontainers.image.description="Extract ALTO from PDF"

COPY /deps/pdfalto /usr/bin/pdfalto
COPY --from=build /home/pdfalto/pdfalto/pdfalto /usr/bin/pdfalto

COPY requirements.txt /opt/app/requirements.txt

WORKDIR /opt/app
RUN pip install --no-cache-dir -r requirements.txt

COPY app /opt/app/app
COPY monitor.py /opt/app/monitor.py
COPY wait-for-localstack.sh /opt/app/wait-for-localstack.sh
COPY . /opt/app

RUN chmod +x wait-for-localstack.sh

CMD ["python3", "/opt/app/monitor.py"]
CMD ["python3", "/opt/app/monitor.py"]
Binary file removed deps/pdfalto
Binary file not shown.
18 changes: 0 additions & 18 deletions deps/readme.md

This file was deleted.

6 changes: 3 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
boto3==1.20.33
botocore==1.23.33
boto3==1.29.2
botocore==1.32.2
certifi==2021.10.8
charset-normalizer==2.0.10
colorama==0.4.4
Expand All @@ -11,6 +11,6 @@ pycryptodome==3.12.0
PyMuPDF==1.22.5
python-dateutil==2.8.2
requests==2.27.1
s3transfer==0.5.0
s3transfer==0.7.0
six==1.16.0
urllib3==1.26.8

0 comments on commit e9efbbd

Please sign in to comment.