-
-
Notifications
You must be signed in to change notification settings - Fork 6.2k
Expand file tree
/
Copy pathDockerfile.transcribe.gpu
More file actions
91 lines (75 loc) · 3.13 KB
/
Copy pathDockerfile.transcribe.gpu
File metadata and controls
91 lines (75 loc) · 3.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# GPU-enabled variant of the Transcribe server image (NVIDIA / CUDA).
#
# This Dockerfile is the same as Dockerfile.transcribe, but:
# - The base image is nvidia/cuda (so the CUDA runtime libs are present).
# - Node.js 24 is installed on top of the CUDA base image.
# - The CUDA build of llama-mtmd-cli is copied from the official llama.cpp
# CUDA image (ghcr.io/ggml-org/llama.cpp:full-cuda-b5449). We use a
# multi-stage COPY here because llama.cpp does not publish a prebuilt
# Linux/CUDA zip on its release page — only Windows CUDA zips and the
# official Docker images.
#
# To run with GPU:
# docker run --gpus all -e HTR_CLI_GPU_LAYERS=9999 \
# --rm --env-file .env-transcribe -p 4567:4567 \
# -v ./data:/data joplin/transcribe:gpu-latest
#
# Requires the NVIDIA Container Toolkit on the host.
# Apple Silicon / Metal is not supported through Docker (Docker Desktop on
# macOS cannot expose the GPU to containers) — see packages/transcribe/README.md
# for the native-run instructions.
# Stage 1: pull the CUDA-built llama.cpp binaries from the official image.
FROM ghcr.io/ggml-org/llama.cpp:full-cuda-b5449 AS llama-cuda
# Stage 2: our runtime image.
FROM nvidia/cuda:13.1.1-cudnn-runtime-ubuntu22.04
RUN apt-get update \
&& apt-get install -y \
ca-certificates curl wget \
python3 tini gnupg \
&& curl -fsSL https://deb.nodesource.com/setup_24.x | bash - \
&& apt-get install -y nodejs \
&& rm -rf /var/lib/apt/lists/*
ENV NODE_ENV=production
RUN corepack enable
# Copy the CUDA-built llama-mtmd-cli binary and its runtime shared libraries
# from the official llama.cpp CUDA image. The /app directory of that image
# contains the built binaries and their .so dependencies.
RUN mkdir -p /opt/llama/build/bin
COPY --from=llama-cuda /app/llama-mtmd-cli /opt/llama/build/bin/llama-mtmd-cli
COPY --from=llama-cuda /app/*.so /opt/llama/build/bin/
RUN chmod +x /opt/llama/build/bin/llama-mtmd-cli
# Create non-root user for security
RUN groupadd -r transcribe && useradd -r -g transcribe -m transcribe
WORKDIR /app
COPY .yarn/releases ./.yarn/releases
COPY .yarn/patches ./.yarn/patches
COPY package.json .
COPY .yarnrc.yml .
COPY yarn.lock .
COPY gulpfile.js .
COPY tsconfig.json .
COPY packages/lib ./packages/lib
COPY packages/utils ./packages/utils
COPY packages/tools ./packages/tools
COPY packages/renderer ./packages/renderer
COPY packages/htmlpack ./packages/htmlpack
COPY packages/transcribe ./packages/transcribe
# We don't want to build onenote-converter since it is not used by the server
RUN sed --in-place '/onenote-converter/d' ./packages/lib/package.json
RUN BUILD_SEQUENCIAL=1 yarn install --inline-builds \
&& yarn cache clean \
&& rm -rf .yarn/berry
# Create data directory and set permissions
RUN mkdir -p /data/images \
&& chown -R transcribe:transcribe /data
WORKDIR /app/packages/transcribe
# Switch to non-root user
USER transcribe
# Set environment variables
ENV HTR_CLI_BINARY_PATH=/opt/llama/build/bin/llama-mtmd-cli
ENV LD_LIBRARY_PATH=/opt/llama/build/bin
ENV DATA_DIR=/data
ENV QUEUE_DRIVER=sqlite
# Start the Node.js application
ENTRYPOINT ["/usr/bin/tini", "--"]
CMD ["yarn", "start"]