File tree Expand file tree Collapse file tree 1 file changed +14
-0
lines changed
Expand file tree Collapse file tree 1 file changed +14
-0
lines changed Original file line number Diff line number Diff line change 6363 username : ${{ github.actor }}
6464 password : ${{ secrets.GITHUB_TOKEN }}
6565
66+ - name : Stop DCGM to allow NCU profiling
67+ run : |
68+ # DCGM (Data Center GPU Manager) locks the GPU and prevents NCU from profiling.
69+ # Stop it before running the container tests.
70+ echo "Stopping DCGM services..."
71+ sudo systemctl stop nvidia-dcgm || echo "nvidia-dcgm service not found or already stopped"
72+ sudo systemctl stop dcgm || echo "dcgm service not found or already stopped"
73+ # Also try nv-hostengine which DCGM uses
74+ sudo systemctl stop nv-hostengine || echo "nv-hostengine service not found or already stopped"
75+ # Kill any remaining dcgm processes
76+ sudo pkill -9 nv-hostengine || echo "No nv-hostengine processes found"
77+ sudo pkill -9 dcgm || echo "No dcgm processes found"
78+ echo "DCGM services stopped."
79+
6680 - name : Test Docker Compose
6781 id : test
6882 run : |
You can’t perform that action at this time.
0 commit comments