1+ name : Build and Publish Docker Images
2+
3+ # Trigger workflow ONLY on version tags
4+ on :
5+ push :
6+ tags :
7+ - ' v*.*.*' # Only on version tags (e.g. v0.6.1, v1.2.3)
8+ workflow_dispatch : # Allow manual trigger
9+
10+ env :
11+ REGISTRY : ghcr.io
12+ IMAGE_NAME : ${{ github.repository }}
13+
14+ jobs :
15+ # Job to clean cache before build
16+ clean-cache :
17+ runs-on : ubuntu-latest
18+ steps :
19+ - name : Clear GitHub Actions Cache
20+ run : |
21+ echo "🧹 Clearing potentially corrupted cache..."
22+ # GitHub Actions cache cleanup is done automatically
23+ # This step is just for logging
24+
25+ build-and-push :
26+ needs : clean-cache
27+ runs-on : ubuntu-latest
28+
29+ # Set permissions for GITHUB_TOKEN
30+ permissions :
31+ contents : read
32+ packages : write
33+ attestations : write
34+ id-token : write
35+
36+ strategy :
37+ fail-fast : false
38+ matrix :
39+ include :
40+ - variant : cpu
41+ dockerfile : docker/dockerfile
42+ platforms : linux/amd64
43+ cache-scope : cpu
44+ - variant : gpu
45+ dockerfile : docker/dockerfile
46+ platforms : linux/amd64
47+ cache-scope : gpu
48+
49+ steps :
50+ # Step 1: Checkout repository
51+ - name : Checkout repository
52+ uses : actions/checkout@v4
53+
54+ # Step 2: Set up Docker Buildx for advanced features
55+ - name : Set up Docker Buildx
56+ uses : docker/setup-buildx-action@v3
57+ with :
58+ driver-opts : |
59+ network=host
60+ image=moby/buildkit:latest
61+
62+ # Step 3: Log in to GitHub Container Registry
63+ - name : Log in to GitHub Container Registry
64+ uses : docker/login-action@v3
65+ with :
66+ registry : ${{ env.REGISTRY }}
67+ username : ${{ github.actor }}
68+ password : ${{ secrets.GITHUB_TOKEN }}
69+
70+ # Step 4: Prepare lowercase image name for Docker compatibility
71+ - name : Prepare lowercase image name
72+ id : image-name
73+ run : |
74+ # Convert to lowercase to ensure Docker compatibility
75+ LOWERCASE_IMAGE_NAME=$(echo "${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}" | tr '[:upper:]' '[:lower:]')
76+ echo "IMAGE_NAME_LOWER=${LOWERCASE_IMAGE_NAME}" >> $GITHUB_OUTPUT
77+ echo "📝 Using lowercase image name: ${LOWERCASE_IMAGE_NAME}"
78+
79+ # Step 5: Extract metadata for Docker (ONLY version tags)
80+ - name : Extract Docker metadata
81+ id : meta
82+ uses : docker/metadata-action@v5
83+ with :
84+ images : ${{ steps.image-name.outputs.IMAGE_NAME_LOWER }}
85+ flavor : |
86+ latest=false
87+ tags : |
88+ # Specific version tag (e.g. v0.6.1-cpu, v0.6.1-gpu)
89+ type=ref,event=tag,suffix=-${{ matrix.variant }}
90+ # Latest tag for each version (latest-cpu, latest-gpu)
91+ type=raw,value=latest-${{ matrix.variant }}
92+
93+ # Step 6: Generate build args based on variant
94+ - name : Set build arguments
95+ id : build-args
96+ run : |
97+ if [ "${{ matrix.variant }}" = "gpu" ]; then
98+ echo "BUILD_ARGS=ENABLE_GPU=true" >> $GITHUB_OUTPUT
99+ echo "LABELS=gpu.cuda=12.1.0" >> $GITHUB_OUTPUT
100+ else
101+ echo "BUILD_ARGS=ENABLE_GPU=false" >> $GITHUB_OUTPUT
102+ echo "LABELS=gpu.cuda=none" >> $GITHUB_OUTPUT
103+ fi
104+
105+ # Step 7: Build and push Docker image (with retry for GPU)
106+ - name : Build and push Docker image (${{ matrix.variant }})
107+ id : build
108+ uses : docker/build-push-action@v6
109+ with :
110+ context : .
111+ file : ${{ matrix.dockerfile }}
112+ platforms : ${{ matrix.platforms }}
113+ push : true
114+ tags : ${{ steps.meta.outputs.tags }}
115+ labels : |
116+ ${{ steps.meta.outputs.labels }}
117+ org.opencontainers.image.variant=${{ matrix.variant }}
118+ ${{ steps.build-args.outputs.LABELS }}
119+ # Cache with more specific and clean scope
120+ cache-from : type=gha,scope=v2-${{ matrix.variant }}-${{ github.ref_name }}
121+ cache-to : type=gha,mode=max,scope=v2-${{ matrix.variant }}-${{ github.ref_name }}
122+ # Disable features that can cause conflicts
123+ provenance : false
124+ sbom : false
125+ # Output only digest, no extra metadata
126+ outputs : type=registry
127+ build-args : |
128+ ${{ steps.build-args.outputs.BUILD_ARGS }}
129+ # Retry for GPU builds that often fail due to cache issues
130+ continue-on-error : ${{ matrix.variant == 'gpu' }}
131+
132+ # Extra step: Retry GPU build if it fails
133+ - name : Retry GPU build (if needed)
134+ if : failure() && matrix.variant == 'gpu'
135+ uses : docker/build-push-action@v6
136+ with :
137+ context : .
138+ file : ${{ matrix.dockerfile }}
139+ platforms : ${{ matrix.platforms }}
140+ push : true
141+ tags : ${{ steps.meta.outputs.tags }}
142+ labels : |
143+ ${{ steps.meta.outputs.labels }}
144+ org.opencontainers.image.variant=${{ matrix.variant }}
145+ ${{ steps.build-args.outputs.LABELS }}
146+ # No cache for retry
147+ no-cache : true
148+ provenance : false
149+ sbom : false
150+ outputs : type=registry
151+ build-args : |
152+ ${{ steps.build-args.outputs.BUILD_ARGS }}
153+
154+ # Step 8: Output image details
155+ - name : Image details
156+ run : |
157+ echo "✅ Successfully built and pushed ${{ matrix.variant }} image"
158+ echo "📦 Image: ${{ steps.image-name.outputs.IMAGE_NAME_LOWER }}"
159+ echo "🏷️ Tags: ${{ steps.meta.outputs.tags }}"
160+ echo "🔖 Digest: ${{ steps.build.outputs.digest }}"
161+ echo "📋 Version: ${{ github.ref_name }}"
162+
163+ # Job to clean up unwanted SHA images
164+ cleanup-unwanted-images :
165+ needs : build-and-push
166+ runs-on : ubuntu-latest
167+ if : success()
168+
169+ permissions :
170+ packages : write
171+
172+ steps :
173+ - name : Clean up unwanted SHA images
174+ env :
175+ GH_TOKEN : ${{ secrets.GITHUB_TOKEN }}
176+ run : |
177+ echo "🧹 Looking for unwanted SHA-only images to cleanup..."
178+
179+ # Get package name (convert to lowercase)
180+ PACKAGE_NAME=$(echo "${{ github.event.repository.name }}" | tr '[:upper:]' '[:lower:]')
181+
182+ # List package versions
183+ curl -s -H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \
184+ "https://api.github.com/user/packages/container/${PACKAGE_NAME}/versions" \
185+ | jq -r '.[] | select(.metadata.container.tags | length == 0 or (.metadata.container.tags | all(startswith("sha256")))) | .id' \
186+ | while read version_id; do
187+ if [ -n "$version_id" ]; then
188+ echo "Deleting unwanted image version: $version_id"
189+ curl -X DELETE -H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \
190+ "https://api.github.com/user/packages/container/${PACKAGE_NAME}/versions/${version_id}" || true
191+ fi
192+ done
193+
194+ echo "✅ Cleanup completed"
195+
196+ # Job to verify images after push
197+ verify-images :
198+ needs : [build-and-push, cleanup-unwanted-images]
199+ runs-on : ubuntu-latest
200+
201+ permissions :
202+ packages : read
203+
204+ steps :
205+ - name : Extract version
206+ id : version
207+ run : echo "VERSION=${GITHUB_REF#refs/tags/}" >> $GITHUB_OUTPUT
208+
209+ - name : Prepare lowercase image name
210+ id : image-name
211+ run : |
212+ # Convert to lowercase for Docker compatibility
213+ LOWERCASE_IMAGE_NAME=$(echo "${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}" | tr '[:upper:]' '[:lower:]')
214+ echo "IMAGE_NAME_LOWER=${LOWERCASE_IMAGE_NAME}" >> $GITHUB_OUTPUT
215+
216+ - name : Verify CPU image
217+ run : |
218+ echo "Verifying CPU image for version ${{ steps.version.outputs.VERSION }}..."
219+ docker pull ${{ steps.image-name.outputs.IMAGE_NAME_LOWER }}:${{ steps.version.outputs.VERSION }}-cpu
220+ docker pull ${{ steps.image-name.outputs.IMAGE_NAME_LOWER }}:latest-cpu
221+
222+ - name : Verify GPU image
223+ run : |
224+ echo "Verifying GPU image for version ${{ steps.version.outputs.VERSION }}..."
225+ docker pull ${{ steps.image-name.outputs.IMAGE_NAME_LOWER }}:${{ steps.version.outputs.VERSION }}-gpu
226+ docker pull ${{ steps.image-name.outputs.IMAGE_NAME_LOWER }}:latest-gpu
227+
228+ - name : List available tags
229+ run : |
230+ echo "📋 Available image tags for this release:"
231+ echo "- ${{ steps.image-name.outputs.IMAGE_NAME_LOWER }}:${{ steps.version.outputs.VERSION }}-cpu"
232+ echo "- ${{ steps.image-name.outputs.IMAGE_NAME_LOWER }}:${{ steps.version.outputs.VERSION }}-gpu"
233+ echo "- ${{ steps.image-name.outputs.IMAGE_NAME_LOWER }}:latest-cpu"
234+ echo "- ${{ steps.image-name.outputs.IMAGE_NAME_LOWER }}:latest-gpu"
235+
236+ # Job to create release notes
237+ create-release :
238+ needs : [build-and-push, cleanup-unwanted-images]
239+ runs-on : ubuntu-latest
240+
241+ permissions :
242+ contents : write
243+
244+ steps :
245+ - name : Checkout repository
246+ uses : actions/checkout@v4
247+
248+ - name : Extract version
249+ id : version
250+ run : echo "VERSION=${GITHUB_REF#refs/tags/}" >> $GITHUB_OUTPUT
251+
252+ - name : Create Release
253+ uses : softprops/action-gh-release@v2
254+ with :
255+ tag_name : ${{ steps.version.outputs.VERSION }}
256+ name : Release ${{ steps.version.outputs.VERSION }}
257+ body : |
258+ ## 🚀 Progressive Summarizer RAPTOR - Docker Images Published
259+
260+ This release includes Docker images for both CPU and GPU variants of the Progressive Summarizer using RAPTOR methodology:
261+
262+ ### CPU Image
263+ ```bash
264+ docker pull ghcr.io/${{ github.repository }}:${{ steps.version.outputs.VERSION }}-cpu
265+ docker pull ghcr.io/${{ github.repository }}:latest-cpu
266+ ```
267+
268+ ### GPU Image (CUDA 12.1)
269+ ```bash
270+ docker pull ghcr.io/${{ github.repository }}:${{ steps.version.outputs.VERSION }}-gpu
271+ docker pull ghcr.io/${{ github.repository }}:latest-gpu
272+ ```
273+
274+ ### Docker Compose
275+ ```bash
276+ # CPU deployment
277+ cd docker
278+ docker compose --profile cpu up -d
279+
280+ # GPU deployment (recommended for large documents)
281+ cd docker
282+ docker compose --profile gpu up -d
283+ ```
284+
285+ ### Quick Start
286+ ```bash
287+ # Run CPU version
288+ docker run -p 8080:8080 ghcr.io/${{ github.repository }}:${{ steps.version.outputs.VERSION }}-cpu
289+
290+ # Run GPU version (requires nvidia-docker)
291+ docker run --gpus all -p 8080:8080 ghcr.io/${{ github.repository }}:${{ steps.version.outputs.VERSION }}-gpu
292+ ```
293+
294+ ### Features
295+ - Progressive document summarization using RAPTOR (Recursive Abstractive Processing for Tree-Organized Retrieval)
296+ - Hierarchical clustering and summarization for long documents
297+ - Multi-level abstraction with configurable depth
298+ - GPU acceleration for transformer models and embedding generation
299+ - Support for various document formats (PDF, TXT, DOCX, etc.)
300+ - RESTful API with streaming responses
301+ - Configurable chunk sizes and overlap strategies
302+ - Memory-efficient processing for large documents
303+
304+ ### RAPTOR Methodology
305+ This implementation leverages the RAPTOR approach for:
306+ - Building hierarchical summaries through recursive clustering
307+ - Creating tree-structured representations of document content
308+ - Enabling multi-scale information retrieval and summarization
309+ - Optimizing context-aware summarization for different abstraction levels
310+
311+ For more information, see the [README](https://github.com/${{ github.repository }}/blob/main/README.md).
312+ draft : false
313+ prerelease : false
314+ generate_release_notes : true
0 commit comments