Test Brev Tutorial Docker Images #417
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Test Brev Tutorial Docker Images | |
| on: | |
| workflow_dispatch: | |
| inputs: | |
| tutorial: | |
| description: 'Tutorial name to test' | |
| required: true | |
| type: string | |
| git_sha: | |
| description: 'Git commit SHA to update status for' | |
| required: true | |
| type: string | |
| workflow_run_id: | |
| description: 'Workflow run ID to download artifacts from' | |
| required: true | |
| type: string | |
| jobs: | |
| test-tutorial: | |
| name: test-tutorial (${{ inputs.tutorial }}) | |
| runs-on: linux-amd64-gpu-t4-latest-1 | |
| defaults: | |
| run: | |
| working-directory: ${{ github.workspace }} | |
| permissions: | |
| statuses: write | |
| steps: | |
| - name: Show runner info | |
| run: | | |
| echo "Runner name: ${{ runner.name }}" | |
| echo "Runner OS: ${{ runner.os }}" | |
| echo "Runner arch: ${{ runner.arch }}" | |
| echo "Runner uname: $(uname -a)" | |
| - name: Checkout repository | |
| uses: actions/checkout@v4 | |
| - name: Set Git branch variables | |
| run: | | |
| GIT_BRANCH_NAME=${GITHUB_REF#refs/heads/} | |
| # Sanitize branch name for Docker tags (replace invalid characters with hyphens and convert to lowercase) | |
| DOCKER_TAG_BRANCH=$(echo "${GIT_BRANCH_NAME}" | sed 's/[^a-zA-Z0-9._-]/-/g' | tr '[:upper:]' '[:lower:]') | |
| GIT_SHA=${{ inputs.git_sha }} | |
| GIT_SHORT_SHA=${GIT_SHA:0:7} | |
| echo "GIT_BRANCH_NAME=${GIT_BRANCH_NAME}" >> $GITHUB_ENV | |
| echo "DOCKER_TAG_BRANCH=${DOCKER_TAG_BRANCH}" >> $GITHUB_ENV | |
| echo "GIT_SHA=${GIT_SHA}" >> $GITHUB_ENV | |
| echo "GIT_SHORT_SHA=${GIT_SHORT_SHA}" >> $GITHUB_ENV | |
| - name: Download commit-specific Docker Compose artifact | |
| uses: dawidd6/action-download-artifact@v6 | |
| with: | |
| workflow: build-brev-tutorial-docker-images.yml | |
| run_id: ${{ inputs.workflow_run_id }} | |
| name: docker-compose-${{ inputs.tutorial }}-${{ env.DOCKER_TAG_BRANCH }}-git-${{ env.GIT_SHORT_SHA }} | |
| path: artifacts/commit-specific/${{ inputs.tutorial }}/ | |
| - name: Log in to GitHub Container Registry | |
| uses: docker/login-action@v3 | |
| with: | |
| registry: ghcr.io | |
| username: ${{ github.actor }} | |
| password: ${{ secrets.GITHUB_TOKEN }} | |
| - name: Stop DCGM to allow NCU profiling | |
| run: | | |
| # DCGM (Data Center GPU Manager) locks the GPU and prevents NCU from profiling. | |
| # Stop it before running the container tests. | |
| echo "Stopping DCGM services..." | |
| sudo systemctl stop nvidia-dcgm || echo "nvidia-dcgm service not found or already stopped" | |
| sudo systemctl stop dcgm || echo "dcgm service not found or already stopped" | |
| # Also try nv-hostengine which DCGM uses | |
| sudo systemctl stop nv-hostengine || echo "nv-hostengine service not found or already stopped" | |
| # Kill any remaining dcgm processes | |
| sudo pkill -9 nv-hostengine || echo "No nv-hostengine processes found" | |
| sudo pkill -9 dcgm || echo "No dcgm processes found" | |
| echo "DCGM services stopped." | |
| - name: Test Docker Compose | |
| id: test | |
| run: | | |
| ./brev/test-docker-compose.bash "artifacts/commit-specific/${{ inputs.tutorial }}/brev/docker-compose.yml" | |
| - name: Update commit status to success | |
| if: success() | |
| run: | | |
| gh api \ | |
| --method POST \ | |
| -H "Accept: application/vnd.github+json" \ | |
| /repos/${{ github.repository }}/statuses/${GIT_SHA} \ | |
| -f state='success' \ | |
| -f target_url="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" \ | |
| -f description='Tests passed' \ | |
| -f context='test / ${{ inputs.tutorial }}' | |
| env: | |
| GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| - name: Update commit status to failure | |
| if: failure() | |
| run: | | |
| gh api \ | |
| --method POST \ | |
| -H "Accept: application/vnd.github+json" \ | |
| /repos/${{ github.repository }}/statuses/${GIT_SHA} \ | |
| -f state='failure' \ | |
| -f target_url="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" \ | |
| -f description='Tests failed' \ | |
| -f context='test / ${{ inputs.tutorial }}' | |
| env: | |
| GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} |