From 03171f2ad85c6fe5fa0b086e2ff1680e53a7582e Mon Sep 17 00:00:00 2001 From: Bernhard Manfred Gruber Date: Wed, 29 Oct 2025 21:19:55 +0100 Subject: [PATCH 1/2] Prefix CUB kernel headers with `kernel_` This makes the filenames more consistent with the rest of CUB --- c/parallel/src/for/for_op_helper.cpp | 2 +- c/parallel/src/histogram.cu | 2 +- c/parallel/src/merge_sort.cu | 2 +- c/parallel/src/radix_sort.cu | 2 +- c/parallel/src/reduce.cu | 2 +- c/parallel/src/scan.cu | 2 +- c/parallel/src/segmented_reduce.cu | 2 +- c/parallel/src/three_way_partition.cu | 4 ++-- c/parallel/src/transform.cu | 2 +- c/parallel/src/unique_by_key.cu | 4 ++-- cub/cub/device/dispatch/dispatch_for.cuh | 2 +- .../device/dispatch/dispatch_histogram.cuh | 2 +- .../device/dispatch/dispatch_merge_sort.cuh | 2 +- .../device/dispatch/dispatch_radix_sort.cuh | 2 +- cub/cub/device/dispatch/dispatch_reduce.cuh | 4 ++-- .../dispatch_reduce_nondeterministic.cuh | 2 +- cub/cub/device/dispatch/dispatch_scan.cuh | 2 +- .../dispatch/dispatch_segmented_sort.cuh | 2 +- .../dispatch/dispatch_three_way_partition.cuh | 2 +- .../device/dispatch/dispatch_transform.cuh | 2 +- .../dispatch/dispatch_unique_by_key.cuh | 4 ++-- .../{for_each.cuh => kernel_for_each.cuh} | 0 .../{histogram.cuh => kernel_histogram.cuh} | 0 .../{merge_sort.cuh => kernel_merge_sort.cuh} | 0 .../{radix_sort.cuh => kernel_radix_sort.cuh} | 0 .../kernels/{reduce.cuh => kernel_reduce.cuh} | 0 .../kernels/{scan.cuh => kernel_scan.cuh} | 0 ...reduce.cuh => kernel_segmented_reduce.cuh} | 2 +- ...ted_sort.cuh => kernel_segmented_sort.cuh} | 0 ...ion.cuh => kernel_three_way_partition.cuh} | 0 .../{transform.cuh => kernel_transform.cuh} | 0 ...ue_by_key.cuh => kernel_unique_by_key.cuh} | 0 cub/test/catch2_test_nvrtc.cu | 20 +++++++++---------- 33 files changed, 36 insertions(+), 36 deletions(-) rename cub/cub/device/dispatch/kernels/{for_each.cuh => kernel_for_each.cuh} (100%) rename cub/cub/device/dispatch/kernels/{histogram.cuh => kernel_histogram.cuh} (100%) rename cub/cub/device/dispatch/kernels/{merge_sort.cuh => kernel_merge_sort.cuh} (100%) rename cub/cub/device/dispatch/kernels/{radix_sort.cuh => kernel_radix_sort.cuh} (100%) rename cub/cub/device/dispatch/kernels/{reduce.cuh => kernel_reduce.cuh} (100%) rename cub/cub/device/dispatch/kernels/{scan.cuh => kernel_scan.cuh} (100%) rename cub/cub/device/dispatch/kernels/{segmented_reduce.cuh => kernel_segmented_reduce.cuh} (99%) rename cub/cub/device/dispatch/kernels/{segmented_sort.cuh => kernel_segmented_sort.cuh} (100%) rename cub/cub/device/dispatch/kernels/{three_way_partition.cuh => kernel_three_way_partition.cuh} (100%) rename cub/cub/device/dispatch/kernels/{transform.cuh => kernel_transform.cuh} (100%) rename cub/cub/device/dispatch/kernels/{unique_by_key.cuh => kernel_unique_by_key.cuh} (100%) diff --git a/c/parallel/src/for/for_op_helper.cpp b/c/parallel/src/for/for_op_helper.cpp index ca6ed8a0286..5f36736c8b3 100644 --- a/c/parallel/src/for/for_op_helper.cpp +++ b/c/parallel/src/for/for_op_helper.cpp @@ -129,7 +129,7 @@ std::string get_for_kernel(cccl_op_t user_op, cccl_iterator_t iter) R"XXX( #include #include -#include +#include struct __align__({2}) storage_t {{ char data[{3}]; diff --git a/c/parallel/src/histogram.cu b/c/parallel/src/histogram.cu index 1fc20ac6d50..380f08b25f1 100644 --- a/c/parallel/src/histogram.cu +++ b/c/parallel/src/histogram.cu @@ -212,7 +212,7 @@ CUresult cccl_device_histogram_build_ex( constexpr std::string_view src_template = R"XXX( #include #include -#include +#include struct __align__({1}) storage_t {{ char data[{0}]; diff --git a/c/parallel/src/merge_sort.cu b/c/parallel/src/merge_sort.cu index b72e0a796b0..992351775a3 100644 --- a/c/parallel/src/merge_sort.cu +++ b/c/parallel/src/merge_sort.cu @@ -286,7 +286,7 @@ CUresult cccl_device_merge_sort_build_ex( std::string final_src = std::format( R"XXX( #include -#include +#include #include // needed for cub::NullType struct __align__({1}) storage_t {{ char data[{0}]; diff --git a/c/parallel/src/radix_sort.cu b/c/parallel/src/radix_sort.cu index 27545c9c3f7..987f9c354c1 100644 --- a/c/parallel/src/radix_sort.cu +++ b/c/parallel/src/radix_sort.cu @@ -311,7 +311,7 @@ CUresult cccl_device_radix_sort_build_ex( const std::string final_src = std::format( R"XXX( #include -#include +#include #include struct __align__({1}) storage_t {{ diff --git a/c/parallel/src/reduce.cu b/c/parallel/src/reduce.cu index eb9c45c9421..268b2b0a7b7 100644 --- a/c/parallel/src/reduce.cu +++ b/c/parallel/src/reduce.cu @@ -198,7 +198,7 @@ CUresult cccl_device_reduce_build_ex( std::string final_src = std::format( R"XXX( #include -#include +#include {0} struct __align__({2}) storage_t {{ char data[{1}]; diff --git a/c/parallel/src/scan.cu b/c/parallel/src/scan.cu index aedd7bc050a..a2b47159d2d 100644 --- a/c/parallel/src/scan.cu +++ b/c/parallel/src/scan.cu @@ -260,7 +260,7 @@ CUresult cccl_device_scan_build_ex( R"XXX( #include #include -#include +#include #include struct __align__({1}) storage_t {{ char data[{0}]; diff --git a/c/parallel/src/segmented_reduce.cu b/c/parallel/src/segmented_reduce.cu index 71837b30542..29edec1f7d5 100644 --- a/c/parallel/src/segmented_reduce.cu +++ b/c/parallel/src/segmented_reduce.cu @@ -187,7 +187,7 @@ CUresult cccl_device_segmented_reduce_build_ex( R"XXX( #include #include -#include +#include {0} struct __align__({2}) storage_t {{ char data[{1}]; diff --git a/c/parallel/src/three_way_partition.cu b/c/parallel/src/three_way_partition.cu index 1077ded5279..8f7467af167 100644 --- a/c/parallel/src/three_way_partition.cu +++ b/c/parallel/src/three_way_partition.cu @@ -12,7 +12,7 @@ #include // cub::detail::CudaDriverLauncherFactory #include #include // cub::DispatchThreeWayPartitionIf -#include // DeviceThreeWayPartition kernels +#include // DeviceThreeWayPartition kernels #include // policy_hub #include @@ -195,7 +195,7 @@ CUresult cccl_device_three_way_partition_build_ex( std::string final_src = std::format( R"XXX( #include -#include +#include {0} struct __align__({2}) storage_t {{ char data[{1}]; diff --git a/c/parallel/src/transform.cu b/c/parallel/src/transform.cu index 449d4a5ded5..7466c4dadfb 100644 --- a/c/parallel/src/transform.cu +++ b/c/parallel/src/transform.cu @@ -507,7 +507,7 @@ CUresult cccl_device_binary_transform_build_ex( std::string final_src = std::format( R"XXX( -#include +#include struct __align__({1}) input1_storage_t {{ char data[{0}]; }}; diff --git a/c/parallel/src/unique_by_key.cu b/c/parallel/src/unique_by_key.cu index da33e2ddeac..d727525764c 100644 --- a/c/parallel/src/unique_by_key.cu +++ b/c/parallel/src/unique_by_key.cu @@ -259,8 +259,8 @@ CUresult cccl_device_unique_by_key_build_ex( std::string final_src = std::format( R"XXX( #include -#include -#include +#include +#include #include struct __align__({1}) storage_t {{ char data[{0}]; diff --git a/cub/cub/device/dispatch/dispatch_for.cuh b/cub/cub/device/dispatch/dispatch_for.cuh index a39bffd68e4..4e586dc6491 100644 --- a/cub/cub/device/dispatch/dispatch_for.cuh +++ b/cub/cub/device/dispatch/dispatch_for.cuh @@ -14,7 +14,7 @@ #endif // no system header #include -#include +#include #include #include #include diff --git a/cub/cub/device/dispatch/dispatch_histogram.cuh b/cub/cub/device/dispatch/dispatch_histogram.cuh index d3fc11e29e6..0f22c3514b1 100644 --- a/cub/cub/device/dispatch/dispatch_histogram.cuh +++ b/cub/cub/device/dispatch/dispatch_histogram.cuh @@ -23,7 +23,7 @@ #endif // no system header #include -#include +#include #include #include #include diff --git a/cub/cub/device/dispatch/dispatch_merge_sort.cuh b/cub/cub/device/dispatch/dispatch_merge_sort.cuh index 511ea96e3eb..8a47117d877 100644 --- a/cub/cub/device/dispatch/dispatch_merge_sort.cuh +++ b/cub/cub/device/dispatch/dispatch_merge_sort.cuh @@ -14,7 +14,7 @@ #endif // no system header #include -#include +#include #include #include #include diff --git a/cub/cub/device/dispatch/dispatch_radix_sort.cuh b/cub/cub/device/dispatch/dispatch_radix_sort.cuh index aa2db3d2c47..cc3d5b192d1 100644 --- a/cub/cub/device/dispatch/dispatch_radix_sort.cuh +++ b/cub/cub/device/dispatch/dispatch_radix_sort.cuh @@ -20,7 +20,7 @@ # pragma system_header #endif // no system header -#include +#include #include #include #include diff --git a/cub/cub/device/dispatch/dispatch_reduce.cuh b/cub/cub/device/dispatch/dispatch_reduce.cuh index dddef207a44..bf1291c7221 100644 --- a/cub/cub/device/dispatch/dispatch_reduce.cuh +++ b/cub/cub/device/dispatch/dispatch_reduce.cuh @@ -22,8 +22,8 @@ #include #include // for cub::detail::invoke_result_t -#include -#include +#include +#include #include #include #include diff --git a/cub/cub/device/dispatch/dispatch_reduce_nondeterministic.cuh b/cub/cub/device/dispatch/dispatch_reduce_nondeterministic.cuh index 88230a89977..d1800b60758 100644 --- a/cub/cub/device/dispatch/dispatch_reduce_nondeterministic.cuh +++ b/cub/cub/device/dispatch/dispatch_reduce_nondeterministic.cuh @@ -20,7 +20,7 @@ #include #include // for cub::detail::invoke_result_t -#include +#include #include #include #include diff --git a/cub/cub/device/dispatch/dispatch_scan.cuh b/cub/cub/device/dispatch/dispatch_scan.cuh index 505a16ae1b5..e9b11f1aef4 100644 --- a/cub/cub/device/dispatch/dispatch_scan.cuh +++ b/cub/cub/device/dispatch/dispatch_scan.cuh @@ -24,7 +24,7 @@ #include #include -#include +#include #include #include #include diff --git a/cub/cub/device/dispatch/dispatch_segmented_sort.cuh b/cub/cub/device/dispatch/dispatch_segmented_sort.cuh index 148281ee219..eafab4cea49 100644 --- a/cub/cub/device/dispatch/dispatch_segmented_sort.cuh +++ b/cub/cub/device/dispatch/dispatch_segmented_sort.cuh @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/cub/cub/device/dispatch/dispatch_three_way_partition.cuh b/cub/cub/device/dispatch/dispatch_three_way_partition.cuh index 4b1305dea2e..41675038f13 100644 --- a/cub/cub/device/dispatch/dispatch_three_way_partition.cuh +++ b/cub/cub/device/dispatch/dispatch_three_way_partition.cuh @@ -15,7 +15,7 @@ #include #include -#include +#include #include #include #include diff --git a/cub/cub/device/dispatch/dispatch_transform.cuh b/cub/cub/device/dispatch/dispatch_transform.cuh index f54376b8c42..f2d3020b1b6 100644 --- a/cub/cub/device/dispatch/dispatch_transform.cuh +++ b/cub/cub/device/dispatch/dispatch_transform.cuh @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/cub/cub/device/dispatch/dispatch_unique_by_key.cuh b/cub/cub/device/dispatch/dispatch_unique_by_key.cuh index 1c6bd232a03..a3c6cebd22f 100644 --- a/cub/cub/device/dispatch/dispatch_unique_by_key.cuh +++ b/cub/cub/device/dispatch/dispatch_unique_by_key.cuh @@ -21,8 +21,8 @@ #endif // no system header #include -#include -#include +#include +#include #include #include #include diff --git a/cub/cub/device/dispatch/kernels/for_each.cuh b/cub/cub/device/dispatch/kernels/kernel_for_each.cuh similarity index 100% rename from cub/cub/device/dispatch/kernels/for_each.cuh rename to cub/cub/device/dispatch/kernels/kernel_for_each.cuh diff --git a/cub/cub/device/dispatch/kernels/histogram.cuh b/cub/cub/device/dispatch/kernels/kernel_histogram.cuh similarity index 100% rename from cub/cub/device/dispatch/kernels/histogram.cuh rename to cub/cub/device/dispatch/kernels/kernel_histogram.cuh diff --git a/cub/cub/device/dispatch/kernels/merge_sort.cuh b/cub/cub/device/dispatch/kernels/kernel_merge_sort.cuh similarity index 100% rename from cub/cub/device/dispatch/kernels/merge_sort.cuh rename to cub/cub/device/dispatch/kernels/kernel_merge_sort.cuh diff --git a/cub/cub/device/dispatch/kernels/radix_sort.cuh b/cub/cub/device/dispatch/kernels/kernel_radix_sort.cuh similarity index 100% rename from cub/cub/device/dispatch/kernels/radix_sort.cuh rename to cub/cub/device/dispatch/kernels/kernel_radix_sort.cuh diff --git a/cub/cub/device/dispatch/kernels/reduce.cuh b/cub/cub/device/dispatch/kernels/kernel_reduce.cuh similarity index 100% rename from cub/cub/device/dispatch/kernels/reduce.cuh rename to cub/cub/device/dispatch/kernels/kernel_reduce.cuh diff --git a/cub/cub/device/dispatch/kernels/scan.cuh b/cub/cub/device/dispatch/kernels/kernel_scan.cuh similarity index 100% rename from cub/cub/device/dispatch/kernels/scan.cuh rename to cub/cub/device/dispatch/kernels/kernel_scan.cuh diff --git a/cub/cub/device/dispatch/kernels/segmented_reduce.cuh b/cub/cub/device/dispatch/kernels/kernel_segmented_reduce.cuh similarity index 99% rename from cub/cub/device/dispatch/kernels/segmented_reduce.cuh rename to cub/cub/device/dispatch/kernels/kernel_segmented_reduce.cuh index 0e4777db3a7..cc8182707c0 100644 --- a/cub/cub/device/dispatch/kernels/segmented_reduce.cuh +++ b/cub/cub/device/dispatch/kernels/kernel_segmented_reduce.cuh @@ -14,7 +14,7 @@ #endif // no system header #include -#include // finalize_and_store_aggregate +#include // finalize_and_store_aggregate #include CUB_NAMESPACE_BEGIN diff --git a/cub/cub/device/dispatch/kernels/segmented_sort.cuh b/cub/cub/device/dispatch/kernels/kernel_segmented_sort.cuh similarity index 100% rename from cub/cub/device/dispatch/kernels/segmented_sort.cuh rename to cub/cub/device/dispatch/kernels/kernel_segmented_sort.cuh diff --git a/cub/cub/device/dispatch/kernels/three_way_partition.cuh b/cub/cub/device/dispatch/kernels/kernel_three_way_partition.cuh similarity index 100% rename from cub/cub/device/dispatch/kernels/three_way_partition.cuh rename to cub/cub/device/dispatch/kernels/kernel_three_way_partition.cuh diff --git a/cub/cub/device/dispatch/kernels/transform.cuh b/cub/cub/device/dispatch/kernels/kernel_transform.cuh similarity index 100% rename from cub/cub/device/dispatch/kernels/transform.cuh rename to cub/cub/device/dispatch/kernels/kernel_transform.cuh diff --git a/cub/cub/device/dispatch/kernels/unique_by_key.cuh b/cub/cub/device/dispatch/kernels/kernel_unique_by_key.cuh similarity index 100% rename from cub/cub/device/dispatch/kernels/unique_by_key.cuh rename to cub/cub/device/dispatch/kernels/kernel_unique_by_key.cuh diff --git a/cub/test/catch2_test_nvrtc.cu b/cub/test/catch2_test_nvrtc.cu index 04e2a3c457f..fbc9f7c668a 100644 --- a/cub/test/catch2_test_nvrtc.cu +++ b/cub/test/catch2_test_nvrtc.cu @@ -51,16 +51,16 @@ TEST_CASE("Test nvrtc", "[test][nvrtc]") #include #include #include - #include - #include - #include - #include - #include - #include - #include - #include - #include - #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include #include #include #include From 311eb9ebddedc46b28626fa15615e4fd04d26cbf Mon Sep 17 00:00:00 2001 From: Bernhard Manfred Gruber Date: Fri, 31 Oct 2025 20:04:25 +0100 Subject: [PATCH 2/2] fic --- c/parallel/src/transform.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/c/parallel/src/transform.cu b/c/parallel/src/transform.cu index 7466c4dadfb..bae8bdbb171 100644 --- a/c/parallel/src/transform.cu +++ b/c/parallel/src/transform.cu @@ -285,7 +285,7 @@ CUresult cccl_device_unary_transform_build_ex( std::string final_src = std::format( R"XXX( #include -#include +#include struct __align__({1}) input_storage_t {{ char data[{0}]; }};