Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 15 additions & 5 deletions .github/workflows/build-test-package-python-cuda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:
matrix:
python3-minor-version: ${{ github.event_name == 'pull_request' && fromJSON('["11"]') || fromJSON('["9","10","11"]') }}
manylinux-platform: ["_2_28-x64","2014-x64"]
cuda-version: ["116","121","124"]
cuda-version: ["118","128","130"]

steps:
- uses: actions/checkout@v4
Expand All @@ -46,6 +46,9 @@ jobs:
CMAKE_OPTIONS="${CMAKE_OPTIONS} -DCUDAToolkit_ROOT=/usr/lib64/cuda${CUDA_VERSION}"
CMAKE_OPTIONS="${CMAKE_OPTIONS} -DCMAKE_CUDA_COMPILER=/usr/lib64/cuda${CUDA_VERSION}/bin/nvcc"
CMAKE_OPTIONS="${CMAKE_OPTIONS} --config-setting=build.tool-args=-j16"
if test ${CUDA_VERSION_MAJOR} -ge 13; then
CMAKE_OPTIONS="${CMAKE_OPTIONS} -DCMAKE_CUDA_ARCHITECTURES=75"
fi

# The first two are not library paths but are included to be mounted in the
# docker by dockcross-manylinux-build-module-wheels.sh
Expand Down Expand Up @@ -102,7 +105,7 @@ jobs:
max-parallel: 2
matrix:
python3-minor-version: ${{ github.event_name == 'pull_request' && fromJSON('["11"]') || fromJSON('["9","10","11"]') }}
cuda-version: ["124"]
cuda-version: ["128","130"]

steps:
- uses: actions/checkout@v4
Expand Down Expand Up @@ -153,7 +156,14 @@ jobs:
$LIBCUDART= (Get-Item "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}\bin\cudart64*dll" ).Name
& nvcc --version
((Get-Content -Path pyproject.toml) -replace "itk-cudacommon","itk-cudacommon-cuda${CUDA_VERSION}") | Set-Content -Path pyproject.toml
./windows-download-cache-and-build-module-wheels.ps1 "${{ matrix.python3-minor-version }}" -setup_options "--lib-paths ""C:/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}/bin"" --exclude-libs ""nvcuda.dll;${LIBCUDART}""" -cmake_options """-DCUDACOMMON_CUDA_VERSION=${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}"" ""-DCMAKE_CUDA_COMPILER=$env:CUDACXX"""
if(${CUDA_VERSION_MAJOR} -ge 13) {
$CUDA_ARCHITECTURES = "75"
}
else {
$CUDA_ARCHITECTURES = "52"
}

./windows-download-cache-and-build-module-wheels.ps1 "${{ matrix.python3-minor-version }}" -setup_options "--lib-paths ""C:/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}/bin"" --exclude-libs ""nvcuda.dll;${LIBCUDART}""" -cmake_options """-DCUDACOMMON_CUDA_VERSION=${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}"" ""-DCMAKE_CUDA_COMPILER=$env:CUDACXX"" ""-DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCHITECTURES}"""

mkdir -p '${{ github.workspace }}\dist'
cp 'dist\*.whl' '${{ github.workspace }}\dist'
Expand Down Expand Up @@ -223,8 +233,8 @@ jobs:

- name: Test python wheel
run: |
# Find the CUDA 124 wheel for Python 3.11 dynamically
wheel=$(find wheels -name "*cuda124*cp311*manylinux_2_28_x86_64.whl" -type f | head -1)
# Find the CUDA 128 wheel for Python 3.11 dynamically
wheel=$(find wheels -name "*cuda128*cp311*manylinux_2_28_x86_64.whl" -type f | head -1)
pip uninstall -y $(pip freeze | sed -E 's/(==.*|[[:space:]]+@.*)$//' | grep -E '^itk-') || true
echo "Installing wheel: $wheel"
pip install $wheel
Expand Down
9 changes: 7 additions & 2 deletions src/itkCudaUtil.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,9 @@ CudaGetMaxFlopsDev()
int max_flops_device = 0;
for (int i = 0; i < numAvailableDevices; ++i)
{
int flops = devices[i].multiProcessorCount * devices[i].clockRate;
int clockRate;
cudaDeviceGetAttribute(&clockRate, cudaDevAttrClockRate, i);
int flops = devices[i].multiProcessorCount * clockRate;
if (flops > max_flops)
{
max_flops = flops;
Expand Down Expand Up @@ -121,9 +123,12 @@ CudaPrintDeviceInfo(int device, bool verbose)
return;
}

int clockRate;
cudaDeviceGetAttribute(&clockRate, cudaDevAttrClockRate, device);

std::cout << prop.name << std::endl;
std::cout << "Compute capability: " << prop.major << "." << prop.minor << std::endl;
std::cout << "Clockrate: " << prop.clockRate << std::endl;
std::cout << "Clockrate: " << clockRate << std::endl;
std::cout << "Global memory: " << prop.totalGlobalMem << std::endl;
std::cout << "Constant memory: " << prop.totalConstMem << std::endl;
std::cout << "Number of Multi Processors: " << prop.multiProcessorCount << std::endl;
Expand Down