diff --git a/.github/workflows/build-test-package-python-cuda.yml b/.github/workflows/build-test-package-python-cuda.yml index 5468a1e..acee1ac 100644 --- a/.github/workflows/build-test-package-python-cuda.yml +++ b/.github/workflows/build-test-package-python-cuda.yml @@ -237,10 +237,9 @@ jobs: # Run tests with warning detection. # SWIG < 4.4 triggers the Py_LIMITED_API "builtin type swig…" warning when # Python runs with -Walways; pytest enables that behavior, so we ignore it. - stderr_log=$(mktemp) - pytest $GITHUB_WORKSPACE/test/*.py -vv -s -W "ignore:builtin type swig" 2> "$stderr_log" - if grep -q "Warning" "$stderr_log"; then - echo "Warnings found in stderr, failing the build" - cat "$stderr_log" + test_log=$(mktemp) + pytest $GITHUB_WORKSPACE/test/*.py -vv -s -W error -W "ignore:builtin type swig" 2>&1 | tee "$test_log" + if grep -q "Warning" "$test_log"; then + echo "Warnings detected in test output, failing the build" exit 1 fi diff --git a/include/itkCudaSquareImage.hcu b/include/itkCudaSquareImage.hcu index 0af2660..74baffe 100644 --- a/include/itkCudaSquareImage.hcu +++ b/include/itkCudaSquareImage.hcu @@ -25,6 +25,10 @@ template void CudaSquareImage3D(int imSize[3], PixelType * in, PixelType * out); +template +void +CudaSquareImage2D(int imSize[2], PixelType * in, PixelType * out); + } // end namespace itk #endif diff --git a/include/itkCudaSquareImageFilter.hxx b/include/itkCudaSquareImageFilter.hxx index 4ec5085..5913396 100644 --- a/include/itkCudaSquareImageFilter.hxx +++ b/include/itkCudaSquareImageFilter.hxx @@ -26,18 +26,29 @@ template void CudaSquareImageFilter::GPUGenerateData() { - int size[3] = { 1, 1, 1 }; - for (unsigned int i = 0; i < ImageDimension; i++) - { - size[i] = this->GetInput()->GetBufferedRegion().GetSize()[i]; - } - typename ImageType::PixelType * pin0 = (typename ImageType::PixelType *)(this->GetInput(0)->GetCudaDataManager()->GetGPUBufferPointer()); typename ImageType::PixelType * pout = (typename ImageType::PixelType *)(this->GetOutput()->GetCudaDataManager()->GetGPUBufferPointer()); - CudaSquareImage3D(size, pin0, pout); + if constexpr (ImageDimension == 2) + { + int size[2]; + for (unsigned int i = 0; i < 2; ++i) + { + size[i] = this->GetInput()->GetBufferedRegion().GetSize()[i]; + } + CudaSquareImage2D(size, pin0, pout); + } + if constexpr (ImageDimension == 3) + { + int size[3] = { 1, 1, 1 }; + for (unsigned int i = 0; i < ImageDimension; ++i) + { + size[i] = this->GetInput()->GetBufferedRegion().GetSize()[i]; + } + CudaSquareImage3D(size, pin0, pout); + } } } // end namespace itk diff --git a/src/itkCudaSquareImage.cu b/src/itkCudaSquareImage.cu index 3f9ece1..8a1246c 100644 --- a/src/itkCudaSquareImage.cu +++ b/src/itkCudaSquareImage.cu @@ -21,6 +21,42 @@ namespace itk { +template +__global__ void +CudaSquareImage2D_kernel(int2 imSize, PixelType * in, PixelType * out) +{ + unsigned int i = blockIdx.x * blockDim.x + threadIdx.x; + unsigned int j = blockIdx.y * blockDim.y + threadIdx.y; + + if (i >= imSize.x || j >= imSize.y) + { + return; + } + unsigned int gidx = i + j * imSize.x; + + out[gidx] = in[gidx] * in[gidx]; +} + +template +void +CudaSquareImage2D(int imSize[2], PixelType * in, PixelType * out) +{ + // Thread Block Dimensions + constexpr int tBlock_x = 16; + constexpr int tBlock_y = 16; + + unsigned int blocksInX = (imSize[0] - 1) / tBlock_x + 1; + unsigned int blocksInY = (imSize[1] - 1) / tBlock_y + 1; + + // Compute block and grid sizes + dim3 dimGrid = dim3(blocksInX, blocksInY); + dim3 dimBlock = dim3(tBlock_x, tBlock_y); + + int2 imageSize = make_int2(imSize[0], imSize[1]); + + CudaSquareImage2D_kernel<<>>(imageSize, in, out); +} + template __global__ void CudaSquareImage3D_kernel(int3 imSize, PixelType * in, PixelType * out) @@ -60,9 +96,19 @@ CudaSquareImage3D(int imSize[3], PixelType * in, PixelType * out) CudaSquareImage3D_kernel<<>>(imageSize, in, out); } -template void CudaCommon_EXPORT -CudaSquareImage3D(int imSize[3], float * in, float * out); -template void CudaCommon_EXPORT -CudaSquareImage3D(int imSize[3], double * in, double * out); +#define ITK_CUDA_SQUARE_IMAGE_INSTANTIATE(T) \ + template void CudaCommon_EXPORT CudaSquareImage3D(int imSize[3], T * in, T * out); \ + template void CudaCommon_EXPORT CudaSquareImage2D(int imSize[2], T * in, T * out) + +ITK_CUDA_SQUARE_IMAGE_INSTANTIATE(float); +ITK_CUDA_SQUARE_IMAGE_INSTANTIATE(double); +ITK_CUDA_SQUARE_IMAGE_INSTANTIATE(int); +ITK_CUDA_SQUARE_IMAGE_INSTANTIATE(unsigned int); +ITK_CUDA_SQUARE_IMAGE_INSTANTIATE(unsigned long); +ITK_CUDA_SQUARE_IMAGE_INSTANTIATE(short); +ITK_CUDA_SQUARE_IMAGE_INSTANTIATE(unsigned short); +ITK_CUDA_SQUARE_IMAGE_INSTANTIATE(unsigned char); + +#undef ITK_CUDA_SQUARE_IMAGE_INSTANTIATE } // end namespace itk diff --git a/wrapping/CMakeLists.txt b/wrapping/CMakeLists.txt index a76d2d0..98a5583 100644 --- a/wrapping/CMakeLists.txt +++ b/wrapping/CMakeLists.txt @@ -1,4 +1,19 @@ itk_wrap_module(CudaCommon) + +set( + WRAPPER_SUBMODULE_ORDER + itkCudaDataManager + itkCudaImage + itkCudaImageDataManager + itkImageToImageFilterCudaCommon + itkInPlaceImageFilterCudaCommon + itkImageSourceCudaCommon + itkCudaImageToImageFilter + itkCudaInPlaceImageFilter + itkCudaImageFromImageFilter + itkCudaSquareImageFilter +) + itk_auto_load_submodules() itk_end_wrap_module() diff --git a/wrapping/itkCudaImage.wrap b/wrapping/itkCudaImage.wrap index e24e37d..833e8e5 100644 --- a/wrapping/itkCudaImage.wrap +++ b/wrapping/itkCudaImage.wrap @@ -1,7 +1,7 @@ configure_file("${CMAKE_CURRENT_SOURCE_DIR}/CudaImage.i.init" "${CMAKE_CURRENT_BINARY_DIR}/CudaImage.i" @ONLY) itk_wrap_class("itk::CudaImage" POINTER_WITH_CONST_POINTER) - UNIQUE(types "UC;UL;${ITKM_IT};${WRAP_ITK_SCALAR}") + UNIQUE(types "UC;UL;${WRAP_ITK_SCALAR}") UNIQUE(vector_universe "${WRAP_ITK_VECTOR_REAL};${WRAP_ITK_COV_VECTOR_REAL}") foreach(t ${types}) set(PixelType ${t}) diff --git a/wrapping/itkCudaImageDataManager.wrap b/wrapping/itkCudaImageDataManager.wrap index 7d61f28..6c0388f 100644 --- a/wrapping/itkCudaImageDataManager.wrap +++ b/wrapping/itkCudaImageDataManager.wrap @@ -2,7 +2,7 @@ itk_wrap_include(itkCudaImage.h) itk_wrap_class("itk::CudaImageDataManager" POINTER) - UNIQUE(types "UC;UL;${ITKM_IT};${WRAP_ITK_SCALAR}") + UNIQUE(types "UC;UL;${WRAP_ITK_SCALAR}") foreach(d ${ITK_WRAP_IMAGE_DIMS}) foreach(t ${types}) itk_wrap_template("CI${ITKM_${t}}${d}" "itk::CudaImage<${ITKT_${t}}, ${d}>") diff --git a/wrapping/itkCudaImageFromImageFilter.wrap b/wrapping/itkCudaImageFromImageFilter.wrap index 9cff53c..cd86325 100644 --- a/wrapping/itkCudaImageFromImageFilter.wrap +++ b/wrapping/itkCudaImageFromImageFilter.wrap @@ -2,7 +2,7 @@ itk_wrap_include(itkImage.h) itk_wrap_class("itk::CudaImageFromImageFilter" POINTER) - UNIQUE(types "UC;UL;${ITKM_IT};${WRAP_ITK_SCALAR}") + UNIQUE(types "UC;UL;${WRAP_ITK_SCALAR}") foreach(d ${ITK_WRAP_IMAGE_DIMS}) foreach(t ${types}) itk_wrap_template("${t}${d}" "itk::Image<${ITKT_${t}}, ${d}>") diff --git a/wrapping/itkCudaImageToImageFilter.wrap b/wrapping/itkCudaImageToImageFilter.wrap new file mode 100644 index 0000000..0667faf --- /dev/null +++ b/wrapping/itkCudaImageToImageFilter.wrap @@ -0,0 +1,43 @@ +itk_wrap_include(itkCudaImage.h) +itk_wrap_include(itkCudaImageToImageFilter.h) +itk_wrap_include(itkInPlaceImageFilter.h) + +itk_wrap_class("itk::CudaImageToImageFilter" POINTER) + + # Match CudaImage scalar instantiations + UNIQUE(types "UC;UL;${WRAP_ITK_SCALAR}") + foreach(d ${ITK_WRAP_IMAGE_DIMS}) + foreach(t ${types}) + itk_wrap_template("CI${ITKM_${t}}${d}CI${ITKM_${t}}${d}" "itk::CudaImage<${ITKT_${t}}, ${d}>, itk::CudaImage<${ITKT_${t}}, ${d}>") + endforeach() + endforeach() + + # Match CudaImage vector/covariant instantiations + UNIQUE(vector_types "${WRAP_ITK_VECTOR_REAL};${WRAP_ITK_COV_VECTOR_REAL}") + foreach(c ${ITK_WRAP_VECTOR_COMPONENTS}) + foreach(d ${ITK_WRAP_IMAGE_DIMS}) + foreach(vt ${vector_types}) + itk_wrap_template("CI${ITKM_${vt}${c}}${d}CI${ITKM_${vt}${c}}${d}" "itk::CudaImage<${ITKT_${vt}${c}}, ${d}>, itk::CudaImage<${ITKT_${vt}${c}}, ${d}>") + endforeach() + endforeach() + endforeach() + + # Instantiate with InPlaceImageFilter as parent for in-place CUDA filters + UNIQUE(types "UC;UL;${WRAP_ITK_SCALAR}") + foreach(d ${ITK_WRAP_IMAGE_DIMS}) + foreach(t ${types}) + itk_wrap_template("CI${ITKM_${t}}${d}CI${ITKM_${t}}${d}IPF" "itk::CudaImage<${ITKT_${t}}, ${d}>, itk::CudaImage<${ITKT_${t}}, ${d}>, itk::InPlaceImageFilter, itk::CudaImage<${ITKT_${t}}, ${d}> >") + endforeach() + endforeach() + + UNIQUE(vector_types "${WRAP_ITK_VECTOR_REAL};${WRAP_ITK_COV_VECTOR_REAL}") + foreach(c ${ITK_WRAP_VECTOR_COMPONENTS}) + foreach(d ${ITK_WRAP_IMAGE_DIMS}) + foreach(vt ${vector_types}) + itk_wrap_template("CI${ITKM_${vt}${c}}${d}CI${ITKM_${vt}${c}}${d}IPF" "itk::CudaImage<${ITKT_${vt}${c}}, ${d}>, itk::CudaImage<${ITKT_${vt}${c}}, ${d}>, itk::InPlaceImageFilter, itk::CudaImage<${ITKT_${vt}${c}}, ${d}> >") + endforeach() + endforeach() + endforeach() + + +itk_end_wrap_class() diff --git a/wrapping/itkCudaInPlaceImageFilter.wrap b/wrapping/itkCudaInPlaceImageFilter.wrap new file mode 100644 index 0000000..ac97abe --- /dev/null +++ b/wrapping/itkCudaInPlaceImageFilter.wrap @@ -0,0 +1,23 @@ +itk_wrap_include(itkCudaImage.h) + +itk_wrap_class("itk::CudaInPlaceImageFilter" POINTER) + + UNIQUE(types "UC;UL;${WRAP_ITK_SCALAR}") + foreach(d ${ITK_WRAP_IMAGE_DIMS}) + foreach(t ${types}) + itk_wrap_template("CI${ITKM_${t}}${d}" "itk::CudaImage<${ITKT_${t}}, ${d}>") + itk_wrap_template("CI${ITKM_${t}}${d}CI${ITKM_${t}}${d}" "itk::CudaImage<${ITKT_${t}}, ${d}>, itk::CudaImage<${ITKT_${t}}, ${d}>") + endforeach() + endforeach() + + UNIQUE(vector_types "${WRAP_ITK_VECTOR_REAL};${WRAP_ITK_COV_VECTOR_REAL}") + foreach(c ${ITK_WRAP_VECTOR_COMPONENTS}) + foreach(d ${ITK_WRAP_IMAGE_DIMS}) + foreach(vt ${vector_types}) + itk_wrap_template("CI${ITKM_${vt}${c}}${d}" "itk::CudaImage<${ITKT_${vt}${c}}, ${d}>") + itk_wrap_template("CI${ITKM_${vt}${c}}${d}CI${ITKM_${vt}${c}}${d}" "itk::CudaImage<${ITKT_${vt}${c}}, ${d}>, itk::CudaImage<${ITKT_${vt}${c}}, ${d}>") + endforeach() + endforeach() + endforeach() + +itk_end_wrap_class() diff --git a/wrapping/itkCudaSquareImageFilter.wrap b/wrapping/itkCudaSquareImageFilter.wrap new file mode 100644 index 0000000..b2af1f6 --- /dev/null +++ b/wrapping/itkCudaSquareImageFilter.wrap @@ -0,0 +1,14 @@ +itk_wrap_include(itkCudaImage.h) + +itk_wrap_class("itk::CudaSquareImageFilter" POINTER) + + UNIQUE(types "UC;UL;${WRAP_ITK_SCALAR}") + foreach(d ${ITK_WRAP_IMAGE_DIMS}) + if(d EQUAL 2 OR d EQUAL 3) + foreach(t ${types}) + itk_wrap_template("CI${ITKM_${t}}${d}" "itk::CudaImage<${ITKT_${t}},${d}>") + endforeach() + endif() + endforeach() + +itk_end_wrap_class() diff --git a/wrapping/itkImageSourceCudaCommon.wrap b/wrapping/itkImageSourceCudaCommon.wrap index 47d0aa1..ab12ee5 100644 --- a/wrapping/itkImageSourceCudaCommon.wrap +++ b/wrapping/itkImageSourceCudaCommon.wrap @@ -2,7 +2,7 @@ itk_wrap_include(itkCudaImage.h) itk_wrap_class("itk::ImageSource" POINTER) - UNIQUE(types "UC;UL;${ITKM_IT};${WRAP_ITK_SCALAR}") + UNIQUE(types "UC;UL;${WRAP_ITK_SCALAR}") foreach(d ${ITK_WRAP_IMAGE_DIMS}) foreach(t ${types}) itk_wrap_template("CI${ITKM_${t}}${d}" "itk::CudaImage<${ITKT_${t}}, ${d}>") diff --git a/wrapping/itkImageToImageFilterCudaCommon.wrap b/wrapping/itkImageToImageFilterCudaCommon.wrap index a31c86f..84c8f8a 100644 --- a/wrapping/itkImageToImageFilterCudaCommon.wrap +++ b/wrapping/itkImageToImageFilterCudaCommon.wrap @@ -3,20 +3,26 @@ itk_wrap_include(itkImage.h) itk_wrap_class("itk::ImageToImageFilter" POINTER) - UNIQUE(types "UC;UL;${ITKM_IT};${WRAP_ITK_SCALAR}") + UNIQUE(types "UC;UL;${WRAP_ITK_SCALAR}") + UNIQUE(vector_types "${WRAP_ITK_VECTOR_REAL};${WRAP_ITK_COV_VECTOR_REAL}") + foreach(d ${ITK_WRAP_IMAGE_DIMS}) foreach(t ${types}) - itk_wrap_template("CI${ITKM_${t}}${d}CI${ITKM_${t}}${d}" "itk::CudaImage<${ITKT_${t}}, ${d}>, itk::CudaImage<${ITKT_${t}}, ${d}>") - itk_wrap_template("I${ITKM_${t}}${d}CI${ITKM_${t}}${d}" "itk::Image<${ITKT_${t}}, ${d}>, itk::CudaImage<${ITKT_${t}}, ${d}>") + itk_wrap_template("I${ITKM_${t}}${d}CI${ITKM_${t}}${d}" + "itk::Image< ${ITKT_${t}}, ${d} >, itk::CudaImage< ${ITKT_${t}}, ${d} >") + itk_wrap_template("CI${ITKM_${t}}${d}I${ITKM_${t}}${d}" + "itk::CudaImage< ${ITKT_${t}}, ${d} >, itk::Image< ${ITKT_${t}}, ${d} >") + itk_wrap_template("CI${ITKM_${t}}${d}CI${ITKM_${t}}${d}" + "itk::CudaImage< ${ITKT_${t}}, ${d} >, itk::CudaImage< ${ITKT_${t}}, ${d} >") endforeach() - endforeach() - - UNIQUE(vector_types "${WRAP_ITK_VECTOR_REAL};${WRAP_ITK_COV_VECTOR_REAL}") - foreach(c ${ITK_WRAP_VECTOR_COMPONENTS}) - foreach(d ${ITK_WRAP_IMAGE_DIMS}) + foreach(c ${ITK_WRAP_VECTOR_COMPONENTS}) foreach(vt ${vector_types}) - itk_wrap_template("CI${ITKM_${vt}${c}}${d}CI${ITKM_${vt}${c}}${d}" "itk::CudaImage<${ITKT_${vt}${c}}, ${d}>, itk::CudaImage<${ITKT_${vt}${c}}, ${d}>") - itk_wrap_template("I${ITKM_${vt}${c}}${d}CI${ITKM_${vt}${c}}${d}" "itk::Image<${ITKT_${vt}${c}}, ${d}>, itk::CudaImage<${ITKT_${vt}${c}}, ${d}>") + itk_wrap_template("I${ITKM_${vt}${c}}${d}CI${ITKM_${vt}${c}}${d}" + "itk::Image< ${ITKT_${vt}${c}}, ${d} >, itk::CudaImage< ${ITKT_${vt}${c}}, ${d} >") + itk_wrap_template("CI${ITKM_${vt}${c}}${d}I${ITKM_${vt}${c}}${d}" + "itk::CudaImage< ${ITKT_${vt}${c}}, ${d} >, itk::Image< ${ITKT_${vt}${c}}, ${d} >") + itk_wrap_template("CI${ITKM_${vt}${c}}${d}CI${ITKM_${vt}${c}}${d}" + "itk::CudaImage< ${ITKT_${vt}${c}}, ${d} >, itk::CudaImage< ${ITKT_${vt}${c}}, ${d} >") endforeach() endforeach() endforeach() diff --git a/wrapping/itkInPlaceImageFilterCudaCommon.wrap b/wrapping/itkInPlaceImageFilterCudaCommon.wrap new file mode 100644 index 0000000..378c574 --- /dev/null +++ b/wrapping/itkInPlaceImageFilterCudaCommon.wrap @@ -0,0 +1,30 @@ +itk_wrap_include(itkCudaImage.h) +itk_wrap_include(itkInPlaceImageFilter.h) + +itk_wrap_class("itk::InPlaceImageFilter" POINTER) + + UNIQUE(types "UC;UL;${WRAP_ITK_SCALAR}") + UNIQUE(vector_types "${WRAP_ITK_VECTOR_REAL};${WRAP_ITK_COV_VECTOR_REAL}") + + foreach(d ${ITK_WRAP_IMAGE_DIMS}) + foreach(t ${types}) + itk_wrap_template("I${ITKM_${t}}${d}CI${ITKM_${t}}${d}" + "itk::Image< ${ITKT_${t}}, ${d} >, itk::CudaImage< ${ITKT_${t}}, ${d} >") + itk_wrap_template("CI${ITKM_${t}}${d}I${ITKM_${t}}${d}" + "itk::CudaImage< ${ITKT_${t}}, ${d} >, itk::Image< ${ITKT_${t}}, ${d} >") + itk_wrap_template("CI${ITKM_${t}}${d}CI${ITKM_${t}}${d}" + "itk::CudaImage< ${ITKT_${t}}, ${d} >, itk::CudaImage< ${ITKT_${t}}, ${d} >") + endforeach() + foreach(c ${ITK_WRAP_VECTOR_COMPONENTS}) + foreach(vt ${vector_types}) + itk_wrap_template("I${ITKM_${vt}${c}}${d}CI${ITKM_${vt}${c}}${d}" + "itk::Image< ${ITKT_${vt}${c}}, ${d} >, itk::CudaImage< ${ITKT_${vt}${c}}, ${d} >") + itk_wrap_template("CI${ITKM_${vt}${c}}${d}I${ITKM_${vt}${c}}${d}" + "itk::CudaImage< ${ITKT_${vt}${c}}, ${d} >, itk::Image< ${ITKT_${vt}${c}}, ${d} >") + itk_wrap_template("CI${ITKM_${vt}${c}}${d}CI${ITKM_${vt}${c}}${d}" + "itk::CudaImage< ${ITKT_${vt}${c}}, ${d} >, itk::CudaImage< ${ITKT_${vt}${c}}, ${d} >") + endforeach() + endforeach() + endforeach() + +itk_end_wrap_class()