From 5df3f2b4e939c54a822e8dbaba0b99f8c92e619d Mon Sep 17 00:00:00 2001 From: Nigel Banks Date: Wed, 22 Feb 2023 23:54:09 +0000 Subject: [PATCH] Tesseract can now extract text from jp2 images. --- README.md | 17 +++++++++-------- docker-bake.hcl | 10 ++++++++-- hypercube/Dockerfile | 5 +++++ 3 files changed, 22 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index d7c94721..4b2c44b3 100644 --- a/README.md +++ b/README.md @@ -404,9 +404,9 @@ Many are intermediate images used to build other images in the list, for example what settings, and ports, are exposed and what functionality it provides, as well as how to update it to the latest releases. -Additionally this repository consumes [imagemagick] image produced by a separate -repository. Since it is a standalone image that rarely changes and takes a while -to build, due to building it under emulation. +Additionally this repository consumes [imagemagick] & [leptonica] images +produced by a separate repositories. Since it is a standalone image that rarely +changes and takes a while to build, due to building it under emulation. ### Updating Dependencies @@ -485,7 +485,7 @@ All of the images build by this project are derived from the The image is only `5MB` in size and has access to a package repository. It has been chosen for its small size, and ease of generating custom packages (as is -done in the [imagemagick] image). +done in the [imagemagick] & [leptonica] images). The [base] image includes two tools essential to the functioning of all the images. @@ -667,8 +667,8 @@ are arranged in a hierarchy, that roughly follows below: └── nginx ├── crayfish │ ├── homarus - │ ├── houdini (consumes "imagemagick" as well during its build stage) - │ ├── hypercube + │ ├── houdini (consumes [imagemagick] as well during its build stage) + │ ├── hypercube (consumes [leptonica] as well during its build stage) │ ├── milliner │ └── riprap ├── crayfits @@ -677,8 +677,8 @@ are arranged in a hierarchy, that roughly follows below: └── matomo ``` -[imagemagick] stand outside of the hierarchy as they are use only to build -packages that are consumed by other images during their build stage. +[imagemagick] & [leptonica] stand outside of the hierarchy as they are use only +to build packages that are consumed by other images during their build stage. ### Folder Layout @@ -865,6 +865,7 @@ adding the following, and restarting `Docker`: [islandora-starter-site]: https://github.com/Islandora/islandora-starter-site [isle-dc]: https://github.com/Islandora-Devops/isle-dc [isle-site-template]: https://github.com/Islandora-Devops/isle-site-template +[leptonica]: https://github.com/Islandora-Devops/isle-leptonica [musl libc]: https://musl.libc.org/ [official documentation]: https://islandora.github.io/documentation/ [Overlay2]: https://docs.docker.com/storage/storagedriver/overlayfs-driver#configure-docker-with-the-overlay-or-overlay2-storage-driver diff --git a/docker-bake.hcl b/docker-bake.hcl index dc2ee9f0..6516e401 100644 --- a/docker-bake.hcl +++ b/docker-bake.hcl @@ -295,7 +295,9 @@ target "base-common" { # The digest (sha256 hash) is not platform specific but the digest for the manifest of all platforms. # It will be the digest printed when you do: docker pull alpine:3.17.1 # Not the one displayed on DockerHub. - # N.B. This should match the value used in + # N.B. This should match the value used in: + # - + # - alpine = "docker-image://alpine:3.17.1@sha256:f271e74b17ced29b915d351685fd4644785c6d1559dd1f2d4189a5e851ef753a" } } @@ -355,13 +357,17 @@ target "houdini-common" { context = "houdini" contexts = { # Produced by this repository . - imagemagick = "docker-image://islandora/imagemagick:7.1.0.16@sha256:c9a9c5a7a6f49f38e5ddb4046b15ce149276ee08ab8d1d47a25bfa01a8530cab" + imagemagick = "docker-image://islandora/imagemagick:7.1.0.16@sha256:6183916c95e5207033022ba92035ae9723df86b1ddfd90a737d953d4a829d7f3" } } target "hypercube-common" { inherits = ["common"] context = "hypercube" + contexts = { + # Produced by this repository . + leptonica = "docker-image://islandora/leptonica:1.82.0@sha256:142c4fe8215625e2c4eca0ad990b36b87791e4e319aa2dab34d5ef9f4eae8bd0" + } } target "java-common" { diff --git a/hypercube/Dockerfile b/hypercube/Dockerfile index e37f62e9..6eaf16d3 100644 --- a/hypercube/Dockerfile +++ b/hypercube/Dockerfile @@ -1,4 +1,5 @@ # syntax=docker/dockerfile:1.5.1 +FROM leptonica FROM crayfish AS hypercube ARG TARGETARCH @@ -14,8 +15,12 @@ RUN --mount=type=cache,id=hypercube-composer-${TARGETARCH},sharing=locked,target cleanup.sh # Platform specific does require arch specific identifier. +# Though platform information is included via the FROM leptonica. RUN --mount=type=cache,id=hypercube-apk-${TARGETARCH},sharing=locked,target=/var/cache/apk \ + --mount=type=bind,from=leptonica,source=/packages,target=/packages \ + --mount=type=bind,from=leptonica,source=/etc/apk/keys,target=/etc/apk/keys \ apk add \ + /packages/leptonica-*.apk \ poppler-utils \ tesseract-ocr \ tesseract-ocr-data-fra \