Skip to content

Commit 1b65406

Browse files
ci: add Docker build workflow for CPU/GPU images with version tagging and release automation
1 parent 33904e4 commit 1b65406

File tree

1 file changed

+314
-0
lines changed

1 file changed

+314
-0
lines changed
Lines changed: 314 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,314 @@
1+
name: Build and Publish Docker Images
2+
3+
# Trigger workflow ONLY on version tags
4+
on:
5+
push:
6+
tags:
7+
- 'v*.*.*' # Only on version tags (e.g. v0.6.1, v1.2.3)
8+
workflow_dispatch: # Allow manual trigger
9+
10+
env:
11+
REGISTRY: ghcr.io
12+
IMAGE_NAME: ${{ github.repository }}
13+
14+
jobs:
15+
# Job to clean cache before build
16+
clean-cache:
17+
runs-on: ubuntu-latest
18+
steps:
19+
- name: Clear GitHub Actions Cache
20+
run: |
21+
echo "🧹 Clearing potentially corrupted cache..."
22+
# GitHub Actions cache cleanup is done automatically
23+
# This step is just for logging
24+
25+
build-and-push:
26+
needs: clean-cache
27+
runs-on: ubuntu-latest
28+
29+
# Set permissions for GITHUB_TOKEN
30+
permissions:
31+
contents: read
32+
packages: write
33+
attestations: write
34+
id-token: write
35+
36+
strategy:
37+
fail-fast: false
38+
matrix:
39+
include:
40+
- variant: cpu
41+
dockerfile: docker/dockerfile
42+
platforms: linux/amd64
43+
cache-scope: cpu
44+
- variant: gpu
45+
dockerfile: docker/dockerfile
46+
platforms: linux/amd64
47+
cache-scope: gpu
48+
49+
steps:
50+
# Step 1: Checkout repository
51+
- name: Checkout repository
52+
uses: actions/checkout@v4
53+
54+
# Step 2: Set up Docker Buildx for advanced features
55+
- name: Set up Docker Buildx
56+
uses: docker/setup-buildx-action@v3
57+
with:
58+
driver-opts: |
59+
network=host
60+
image=moby/buildkit:latest
61+
62+
# Step 3: Log in to GitHub Container Registry
63+
- name: Log in to GitHub Container Registry
64+
uses: docker/login-action@v3
65+
with:
66+
registry: ${{ env.REGISTRY }}
67+
username: ${{ github.actor }}
68+
password: ${{ secrets.GITHUB_TOKEN }}
69+
70+
# Step 4: Prepare lowercase image name for Docker compatibility
71+
- name: Prepare lowercase image name
72+
id: image-name
73+
run: |
74+
# Convert to lowercase to ensure Docker compatibility
75+
LOWERCASE_IMAGE_NAME=$(echo "${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}" | tr '[:upper:]' '[:lower:]')
76+
echo "IMAGE_NAME_LOWER=${LOWERCASE_IMAGE_NAME}" >> $GITHUB_OUTPUT
77+
echo "📝 Using lowercase image name: ${LOWERCASE_IMAGE_NAME}"
78+
79+
# Step 5: Extract metadata for Docker (ONLY version tags)
80+
- name: Extract Docker metadata
81+
id: meta
82+
uses: docker/metadata-action@v5
83+
with:
84+
images: ${{ steps.image-name.outputs.IMAGE_NAME_LOWER }}
85+
flavor: |
86+
latest=false
87+
tags: |
88+
# Specific version tag (e.g. v0.6.1-cpu, v0.6.1-gpu)
89+
type=ref,event=tag,suffix=-${{ matrix.variant }}
90+
# Latest tag for each version (latest-cpu, latest-gpu)
91+
type=raw,value=latest-${{ matrix.variant }}
92+
93+
# Step 6: Generate build args based on variant
94+
- name: Set build arguments
95+
id: build-args
96+
run: |
97+
if [ "${{ matrix.variant }}" = "gpu" ]; then
98+
echo "BUILD_ARGS=ENABLE_GPU=true" >> $GITHUB_OUTPUT
99+
echo "LABELS=gpu.cuda=12.1.0" >> $GITHUB_OUTPUT
100+
else
101+
echo "BUILD_ARGS=ENABLE_GPU=false" >> $GITHUB_OUTPUT
102+
echo "LABELS=gpu.cuda=none" >> $GITHUB_OUTPUT
103+
fi
104+
105+
# Step 7: Build and push Docker image (with retry for GPU)
106+
- name: Build and push Docker image (${{ matrix.variant }})
107+
id: build
108+
uses: docker/build-push-action@v6
109+
with:
110+
context: .
111+
file: ${{ matrix.dockerfile }}
112+
platforms: ${{ matrix.platforms }}
113+
push: true
114+
tags: ${{ steps.meta.outputs.tags }}
115+
labels: |
116+
${{ steps.meta.outputs.labels }}
117+
org.opencontainers.image.variant=${{ matrix.variant }}
118+
${{ steps.build-args.outputs.LABELS }}
119+
# Cache with more specific and clean scope
120+
cache-from: type=gha,scope=v2-${{ matrix.variant }}-${{ github.ref_name }}
121+
cache-to: type=gha,mode=max,scope=v2-${{ matrix.variant }}-${{ github.ref_name }}
122+
# Disable features that can cause conflicts
123+
provenance: false
124+
sbom: false
125+
# Output only digest, no extra metadata
126+
outputs: type=registry
127+
build-args: |
128+
${{ steps.build-args.outputs.BUILD_ARGS }}
129+
# Retry for GPU builds that often fail due to cache issues
130+
continue-on-error: ${{ matrix.variant == 'gpu' }}
131+
132+
# Extra step: Retry GPU build if it fails
133+
- name: Retry GPU build (if needed)
134+
if: failure() && matrix.variant == 'gpu'
135+
uses: docker/build-push-action@v6
136+
with:
137+
context: .
138+
file: ${{ matrix.dockerfile }}
139+
platforms: ${{ matrix.platforms }}
140+
push: true
141+
tags: ${{ steps.meta.outputs.tags }}
142+
labels: |
143+
${{ steps.meta.outputs.labels }}
144+
org.opencontainers.image.variant=${{ matrix.variant }}
145+
${{ steps.build-args.outputs.LABELS }}
146+
# No cache for retry
147+
no-cache: true
148+
provenance: false
149+
sbom: false
150+
outputs: type=registry
151+
build-args: |
152+
${{ steps.build-args.outputs.BUILD_ARGS }}
153+
154+
# Step 8: Output image details
155+
- name: Image details
156+
run: |
157+
echo "✅ Successfully built and pushed ${{ matrix.variant }} image"
158+
echo "📦 Image: ${{ steps.image-name.outputs.IMAGE_NAME_LOWER }}"
159+
echo "🏷️ Tags: ${{ steps.meta.outputs.tags }}"
160+
echo "🔖 Digest: ${{ steps.build.outputs.digest }}"
161+
echo "📋 Version: ${{ github.ref_name }}"
162+
163+
# Job to clean up unwanted SHA images
164+
cleanup-unwanted-images:
165+
needs: build-and-push
166+
runs-on: ubuntu-latest
167+
if: success()
168+
169+
permissions:
170+
packages: write
171+
172+
steps:
173+
- name: Clean up unwanted SHA images
174+
env:
175+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
176+
run: |
177+
echo "🧹 Looking for unwanted SHA-only images to cleanup..."
178+
179+
# Get package name (convert to lowercase)
180+
PACKAGE_NAME=$(echo "${{ github.event.repository.name }}" | tr '[:upper:]' '[:lower:]')
181+
182+
# List package versions
183+
curl -s -H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \
184+
"https://api.github.com/user/packages/container/${PACKAGE_NAME}/versions" \
185+
| jq -r '.[] | select(.metadata.container.tags | length == 0 or (.metadata.container.tags | all(startswith("sha256")))) | .id' \
186+
| while read version_id; do
187+
if [ -n "$version_id" ]; then
188+
echo "Deleting unwanted image version: $version_id"
189+
curl -X DELETE -H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \
190+
"https://api.github.com/user/packages/container/${PACKAGE_NAME}/versions/${version_id}" || true
191+
fi
192+
done
193+
194+
echo "✅ Cleanup completed"
195+
196+
# Job to verify images after push
197+
verify-images:
198+
needs: [build-and-push, cleanup-unwanted-images]
199+
runs-on: ubuntu-latest
200+
201+
permissions:
202+
packages: read
203+
204+
steps:
205+
- name: Extract version
206+
id: version
207+
run: echo "VERSION=${GITHUB_REF#refs/tags/}" >> $GITHUB_OUTPUT
208+
209+
- name: Prepare lowercase image name
210+
id: image-name
211+
run: |
212+
# Convert to lowercase for Docker compatibility
213+
LOWERCASE_IMAGE_NAME=$(echo "${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}" | tr '[:upper:]' '[:lower:]')
214+
echo "IMAGE_NAME_LOWER=${LOWERCASE_IMAGE_NAME}" >> $GITHUB_OUTPUT
215+
216+
- name: Verify CPU image
217+
run: |
218+
echo "Verifying CPU image for version ${{ steps.version.outputs.VERSION }}..."
219+
docker pull ${{ steps.image-name.outputs.IMAGE_NAME_LOWER }}:${{ steps.version.outputs.VERSION }}-cpu
220+
docker pull ${{ steps.image-name.outputs.IMAGE_NAME_LOWER }}:latest-cpu
221+
222+
- name: Verify GPU image
223+
run: |
224+
echo "Verifying GPU image for version ${{ steps.version.outputs.VERSION }}..."
225+
docker pull ${{ steps.image-name.outputs.IMAGE_NAME_LOWER }}:${{ steps.version.outputs.VERSION }}-gpu
226+
docker pull ${{ steps.image-name.outputs.IMAGE_NAME_LOWER }}:latest-gpu
227+
228+
- name: List available tags
229+
run: |
230+
echo "📋 Available image tags for this release:"
231+
echo "- ${{ steps.image-name.outputs.IMAGE_NAME_LOWER }}:${{ steps.version.outputs.VERSION }}-cpu"
232+
echo "- ${{ steps.image-name.outputs.IMAGE_NAME_LOWER }}:${{ steps.version.outputs.VERSION }}-gpu"
233+
echo "- ${{ steps.image-name.outputs.IMAGE_NAME_LOWER }}:latest-cpu"
234+
echo "- ${{ steps.image-name.outputs.IMAGE_NAME_LOWER }}:latest-gpu"
235+
236+
# Job to create release notes
237+
create-release:
238+
needs: [build-and-push, cleanup-unwanted-images]
239+
runs-on: ubuntu-latest
240+
241+
permissions:
242+
contents: write
243+
244+
steps:
245+
- name: Checkout repository
246+
uses: actions/checkout@v4
247+
248+
- name: Extract version
249+
id: version
250+
run: echo "VERSION=${GITHUB_REF#refs/tags/}" >> $GITHUB_OUTPUT
251+
252+
- name: Create Release
253+
uses: softprops/action-gh-release@v2
254+
with:
255+
tag_name: ${{ steps.version.outputs.VERSION }}
256+
name: Release ${{ steps.version.outputs.VERSION }}
257+
body: |
258+
## 🚀 Progressive Summarizer RAPTOR - Docker Images Published
259+
260+
This release includes Docker images for both CPU and GPU variants of the Progressive Summarizer using RAPTOR methodology:
261+
262+
### CPU Image
263+
```bash
264+
docker pull ghcr.io/${{ github.repository }}:${{ steps.version.outputs.VERSION }}-cpu
265+
docker pull ghcr.io/${{ github.repository }}:latest-cpu
266+
```
267+
268+
### GPU Image (CUDA 12.1)
269+
```bash
270+
docker pull ghcr.io/${{ github.repository }}:${{ steps.version.outputs.VERSION }}-gpu
271+
docker pull ghcr.io/${{ github.repository }}:latest-gpu
272+
```
273+
274+
### Docker Compose
275+
```bash
276+
# CPU deployment
277+
cd docker
278+
docker compose --profile cpu up -d
279+
280+
# GPU deployment (recommended for large documents)
281+
cd docker
282+
docker compose --profile gpu up -d
283+
```
284+
285+
### Quick Start
286+
```bash
287+
# Run CPU version
288+
docker run -p 8080:8080 ghcr.io/${{ github.repository }}:${{ steps.version.outputs.VERSION }}-cpu
289+
290+
# Run GPU version (requires nvidia-docker)
291+
docker run --gpus all -p 8080:8080 ghcr.io/${{ github.repository }}:${{ steps.version.outputs.VERSION }}-gpu
292+
```
293+
294+
### Features
295+
- Progressive document summarization using RAPTOR (Recursive Abstractive Processing for Tree-Organized Retrieval)
296+
- Hierarchical clustering and summarization for long documents
297+
- Multi-level abstraction with configurable depth
298+
- GPU acceleration for transformer models and embedding generation
299+
- Support for various document formats (PDF, TXT, DOCX, etc.)
300+
- RESTful API with streaming responses
301+
- Configurable chunk sizes and overlap strategies
302+
- Memory-efficient processing for large documents
303+
304+
### RAPTOR Methodology
305+
This implementation leverages the RAPTOR approach for:
306+
- Building hierarchical summaries through recursive clustering
307+
- Creating tree-structured representations of document content
308+
- Enabling multi-scale information retrieval and summarization
309+
- Optimizing context-aware summarization for different abstraction levels
310+
311+
For more information, see the [README](https://github.com/${{ github.repository }}/blob/main/README.md).
312+
draft: false
313+
prerelease: false
314+
generate_release_notes: true

0 commit comments

Comments
 (0)