forked from ocrmypdf/OCRmyPDF
-
Notifications
You must be signed in to change notification settings - Fork 0
/
snapcraft.yaml
121 lines (102 loc) · 2.93 KB
/
snapcraft.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# SPDX-FileCopyrightText: 2022 Alexander Langanke
# SPDX-FileCopyrightText: 2022 James R. Barlow
# SPDX-FileCopyrightText: 2023 林博仁(Buo-ren, Lin) <[email protected]>
# SPDX-License-Identifier: MPL-2.0
name: ocrmypdf-digidigital
title: OCRmyPDF used by OCRthyPDF
base: core22
version: git
summary: OCRmyPDF adds a searchable text layer to scanned PDF files
description: OCRmyPDF packaged for snap
grade: stable
confinement: strict
icon: docs/images/logo-square-256.svg
license: MPL-2.0
architectures: [amd64]
#added
package-repositories:
- type: apt
ppa: alex-p/tesseract-ocr5
environment:
#TESSDATA_PREFIX: $SNAP/usr/share/tesseract-ocr/4.00/tessdata
TESSDATA_PREFIX: $SNAP/usr/share/tesseract-ocr/tessdata
GS_LIB: $SNAP/usr/share/ghostscript/9.55/Resource/Init
GS_FONTPATH: $SNAP/usr/share/ghostscript/9.55/Resource/Font
LD_LIBRARY_PATH: $SNAP/usr/lib/x86_64-linux-gnu
apps:
ocrmypdf:
command: usr/bin/snapcraft-preload python3 -m ocrmypdf
plugs:
- desktop
- desktop-legacy
- wayland
- x11
- home
- removable-media
parts:
snapcraft-preload:
source: https://github.com/sergiusens/snapcraft-preload.git
plugin: cmake
cmake-parameters:
- -DCMAKE_INSTALL_PREFIX=/usr -DLIBPATH=/usr/lib
build-packages:
- on amd64:
- gcc-multilib
- g++-multilib
stage-packages:
- lib32stdc++6
jbig2enc:
plugin: autotools
source: https://github.com/agl/jbig2enc.git
source-tag: "0.29"
build-packages:
- libleptonica-dev
tesseract:
plugin: nil
stage-packages:
- tesseract-ocr-all
build-packages:
- curl
override-prime: |
snapcraftctl prime
tessdatapath=$(find ./ -iname tessdata)
ln -s -f ${tessdatapath/usr\/share\/tesseract-ocr\//} ${tessdatapath}/../../tessdata
curl --output "${tessdatapath}/best-#1.traineddata" "https://raw.githubusercontent.com/tesseract-ocr/tessdata_best/main/{deu,eng,fra,spa,por,ita,rus,ukr,jpn,jpn_vert,ara,hin,pol}.traineddata"
# debug
ls -l ${tessdatapath}
ocrmypdf:
plugin: python
source: .
build-packages:
- python3-pip
stage-packages:
- ghostscript
- icc-profiles-free
- liblept5
- libxml2
- pngquant
# moved to tesseract part
# - tesseract-ocr-all
- unpaper
- qpdf
- zlib1g
python-packages:
- cffi
- pdfminer.six
- pikepdf
- Pillow
- pluggy
- reportlab
- setuptools
- tqdm
- pipe
- wheel
override-build: |
pip3 install --user dephell[full]
$HOME/.local/bin/dephell deps convert \
--from-path pyproject.toml \
--from-format pyproject \
--to-path setup.py \
--to-format setuppy
snapcraftctl build
ln -sf ../usr/lib/libsnapcraft-preload.so $SNAPCRAFT_PART_INSTALL/lib/libsnapcraft-preload.so