[minor] OpenAI Whisper (#38)

lehigh-university-libraries · Sep 19, 2024 · cf7bb0a · cf7bb0a
1 parent 7360229
commit cf7bb0a
Show file tree

Hide file tree

Showing 7 changed files with 99 additions and 2 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -25,10 +25,10 @@ RUN adduser -S -G nobody scyllaridae
 
 RUN apk update && \
     apk add --no-cache \
-      curl==8.9.1-r0 \
+      curl==8.9.1-r2 \
       bash==5.2.26-r0 \
       ca-certificates==20240705-r0 \
-      openssl==3.3.1-r3
+      openssl==3.3.2-r0
 
 COPY . ./
 

diff --git a/ci/k8s/ingress.yaml b/ci/k8s/ingress.yaml
@@ -50,3 +50,10 @@ spec:
                 name: islandora-tesseract
                 port:
                   number: 8080
+          - path: /whisper(/|$)(.*)
+            pathType: Prefix
+            backend:
+              service:
+                name: islandora-whisper
+                port:
+                  number: 8080
diff --git a/ci/k8s/whisper.yaml b/ci/k8s/whisper.yaml
@@ -0,0 +1,45 @@
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: islandora-whisper
+spec:
+  selector:
+    app: islandora-whisper
+  ports:
+    - protocol: TCP
+      port: 8080
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: islandora-whisper
+spec:
+  replicas: 3
+  selector:
+    matchLabels:
+      app: islandora-whisper
+  template:
+    metadata:
+      labels:
+        app: islandora-whisper
+    spec:
+      containers:
+        - name: scyllaridae-whisper
+          image: __DOCKER_REPOSITORY__/scyllaridae-whisper:main
+          imagePullPolicy: IfNotPresent
+          resources:
+            requests:
+              memory: "256Mi"
+              cpu: "500m"
+            limits:
+              memory: "1Gi"
+              nvidia.com/gpu: 1
+          ports:
+            - containerPort: 8080
+          readinessProbe:
+            httpGet:
+              path: /healthcheck
+              port: 8080
+            initialDelaySeconds: 5
+            periodSeconds: 10
diff --git a/examples/whisper/Dockerfile b/examples/whisper/Dockerfile
@@ -0,0 +1,17 @@
+ARG TAG=main
+ARG DOCKER_REPOSITORY=local
+FROM ${DOCKER_REPOSITORY}/scyllaridae:${TAG} AS scyllaridae
+FROM ghcr.io/ggerganov/whisper.cpp:main-cuda
+
+RUN bash ./models/download-ggml-model.sh base.en
+
+COPY --from=scyllaridae /app /app
+COPY scyllaridae.yml /app
+
+RUN apt-get update && apt-get install -y gosu=1.14-1 --no-install-recommends \
+  && rm -rf /var/lib/apt/lists/* \
+  && groupadd -r nobody \
+  && useradd -r -g nobody scyllaridae \
+  && chmod +x /app/docker-entrypoint.sh
+
+ENTRYPOINT ["/app/docker-entrypoint.sh"]
diff --git a/examples/whisper/README.md b/examples/whisper/README.md
@@ -0,0 +1,3 @@
+# Whisper
+
+OpenAI Whisper as a microservice, with NVIDIA GPU support
diff --git a/examples/whisper/cmd.sh b/examples/whisper/cmd.sh
@@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+
+# take input from stdin and print to stdout
+
+set -eou pipefail
+
+input_temp=$(mktemp /tmp/whisper-input-XXXXXX)
+
+cat > "$input_temp"
+
+/app/main \
+  -m /app/models/ggml-base.en.bin \
+  --output-vtt \
+  -f "$input_temp" \
+  --output-file "$input_temp.vtt" > /dev/null 2>&1
+
+cat "$input_temp.vtt"
+
+rm "$input_temp" "$input_temp.vtt"
diff --git a/examples/whisper/scyllaridae.yml b/examples/whisper/scyllaridae.yml
@@ -0,0 +1,6 @@
+allowedMimeTypes:
+  - "audio/*"
+  - "video/*"
+cmdByMimeType:
+  default:
+    cmd: /app/cmd.sh