Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion c/parallel/src/for/for_op_helper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ std::string get_for_kernel(cccl_op_t user_op, cccl_iterator_t iter)
R"XXX(
#include <cuda/std/iterator>
#include <cub/agent/agent_for.cuh>
#include <cub/device/dispatch/kernels/for_each.cuh>
#include <cub/device/dispatch/kernels/kernel_for_each.cuh>

struct __align__({2}) storage_t {{
char data[{3}];
Expand Down
2 changes: 1 addition & 1 deletion c/parallel/src/histogram.cu
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ CUresult cccl_device_histogram_build_ex(
constexpr std::string_view src_template = R"XXX(
#include <cub/agent/agent_histogram.cuh>
#include <cub/block/block_load.cuh>
#include <cub/device/dispatch/kernels/histogram.cuh>
#include <cub/device/dispatch/kernels/kernel_histogram.cuh>

struct __align__({1}) storage_t {{
char data[{0}];
Expand Down
2 changes: 1 addition & 1 deletion c/parallel/src/merge_sort.cu
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ CUresult cccl_device_merge_sort_build_ex(
std::string final_src = std::format(
R"XXX(
#include <cub/device/dispatch/tuning/tuning_merge_sort.cuh>
#include <cub/device/dispatch/kernels/merge_sort.cuh>
#include <cub/device/dispatch/kernels/kernel_merge_sort.cuh>
#include <cub/util_type.cuh> // needed for cub::NullType
struct __align__({1}) storage_t {{
char data[{0}];
Expand Down
2 changes: 1 addition & 1 deletion c/parallel/src/radix_sort.cu
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,7 @@ CUresult cccl_device_radix_sort_build_ex(
const std::string final_src = std::format(
R"XXX(
#include <cub/device/dispatch/tuning/tuning_radix_sort.cuh>
#include <cub/device/dispatch/kernels/radix_sort.cuh>
#include <cub/device/dispatch/kernels/kernel_radix_sort.cuh>
#include <cub/agent/single_pass_scan_operators.cuh>

struct __align__({1}) storage_t {{
Expand Down
2 changes: 1 addition & 1 deletion c/parallel/src/reduce.cu
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ CUresult cccl_device_reduce_build_ex(
std::string final_src = std::format(
R"XXX(
#include <cub/device/dispatch/tuning/tuning_reduce.cuh>
#include <cub/device/dispatch/kernels/reduce.cuh>
#include <cub/device/dispatch/kernels/kernel_reduce.cuh>
{0}
struct __align__({2}) storage_t {{
char data[{1}];
Expand Down
2 changes: 1 addition & 1 deletion c/parallel/src/scan.cu
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ CUresult cccl_device_scan_build_ex(
R"XXX(
#include <cub/device/dispatch/tuning/tuning_scan.cuh>
#include <cub/block/block_scan.cuh>
#include <cub/device/dispatch/kernels/scan.cuh>
#include <cub/device/dispatch/kernels/kernel_scan.cuh>
#include <cub/agent/single_pass_scan_operators.cuh>
struct __align__({1}) storage_t {{
char data[{0}];
Expand Down
2 changes: 1 addition & 1 deletion c/parallel/src/segmented_reduce.cu
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ CUresult cccl_device_segmented_reduce_build_ex(
R"XXX(
#include <cub/block/block_reduce.cuh>
#include <cub/device/dispatch/tuning/tuning_reduce.cuh>
#include <cub/device/dispatch/kernels/segmented_reduce.cuh>
#include <cub/device/dispatch/kernels/kernel_segmented_reduce.cuh>
{0}
struct __align__({2}) storage_t {{
char data[{1}];
Expand Down
4 changes: 2 additions & 2 deletions c/parallel/src/three_way_partition.cu
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
#include <cub/detail/launcher/cuda_driver.cuh> // cub::detail::CudaDriverLauncherFactory
#include <cub/detail/ptx-json-parser.h>
#include <cub/device/dispatch/dispatch_three_way_partition.cuh> // cub::DispatchThreeWayPartitionIf
#include <cub/device/dispatch/kernels/three_way_partition.cuh> // DeviceThreeWayPartition kernels
#include <cub/device/dispatch/kernels/kernel_three_way_partition.cuh> // DeviceThreeWayPartition kernels
#include <cub/device/dispatch/tuning/tuning_three_way_partition.cuh> // policy_hub

#include <exception>
Expand Down Expand Up @@ -195,7 +195,7 @@ CUresult cccl_device_three_way_partition_build_ex(
std::string final_src = std::format(
R"XXX(
#include <cub/device/dispatch/tuning/tuning_three_way_partition.cuh>
#include <cub/device/dispatch/kernels/three_way_partition.cuh>
#include <cub/device/dispatch/kernels/kernel_three_way_partition.cuh>
{0}
struct __align__({2}) storage_t {{
char data[{1}];
Expand Down
4 changes: 2 additions & 2 deletions c/parallel/src/transform.cu
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ CUresult cccl_device_unary_transform_build_ex(
std::string final_src = std::format(
R"XXX(
#include <cub/device/dispatch/tuning/tuning_transform.cuh>
#include <cub/device/dispatch/kernels/transform.cuh>
#include <cub/device/dispatch/kernels/kernel_transform.cuh>
struct __align__({1}) input_storage_t {{
char data[{0}];
}};
Expand Down Expand Up @@ -507,7 +507,7 @@ CUresult cccl_device_binary_transform_build_ex(

std::string final_src = std::format(
R"XXX(
#include <cub/device/dispatch/kernels/transform.cuh>
#include <cub/device/dispatch/kernels/kernel_transform.cuh>
struct __align__({1}) input1_storage_t {{
char data[{0}];
}};
Expand Down
4 changes: 2 additions & 2 deletions c/parallel/src/unique_by_key.cu
Original file line number Diff line number Diff line change
Expand Up @@ -259,8 +259,8 @@ CUresult cccl_device_unique_by_key_build_ex(
std::string final_src = std::format(
R"XXX(
#include <cub/device/dispatch/tuning/tuning_unique_by_key.cuh>
#include <cub/device/dispatch/kernels/scan.cuh>
#include <cub/device/dispatch/kernels/unique_by_key.cuh>
#include <cub/device/dispatch/kernels/kernel_scan.cuh>
#include <cub/device/dispatch/kernels/kernel_unique_by_key.cuh>
#include <cub/agent/single_pass_scan_operators.cuh>
struct __align__({1}) storage_t {{
char data[{0}];
Expand Down
2 changes: 1 addition & 1 deletion cub/cub/device/dispatch/dispatch_for.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
#endif // no system header

#include <cub/agent/agent_for.cuh>
#include <cub/device/dispatch/kernels/for_each.cuh>
#include <cub/device/dispatch/kernels/kernel_for_each.cuh>
#include <cub/device/dispatch/tuning/tuning_for.cuh>
#include <cub/thread/thread_load.cuh>
#include <cub/util_device.cuh>
Expand Down
2 changes: 1 addition & 1 deletion cub/cub/device/dispatch/dispatch_histogram.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
#endif // no system header

#include <cub/agent/agent_histogram.cuh>
#include <cub/device/dispatch/kernels/histogram.cuh>
#include <cub/device/dispatch/kernels/kernel_histogram.cuh>
#include <cub/device/dispatch/tuning/tuning_histogram.cuh>
#include <cub/grid/grid_queue.cuh>
#include <cub/thread/thread_search.cuh>
Expand Down
2 changes: 1 addition & 1 deletion cub/cub/device/dispatch/dispatch_merge_sort.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
#endif // no system header

#include <cub/agent/agent_merge_sort.cuh>
#include <cub/device/dispatch/kernels/merge_sort.cuh>
#include <cub/device/dispatch/kernels/kernel_merge_sort.cuh>
#include <cub/device/dispatch/tuning/tuning_merge_sort.cuh>
#include <cub/util_device.cuh>
#include <cub/util_math.cuh>
Expand Down
2 changes: 1 addition & 1 deletion cub/cub/device/dispatch/dispatch_radix_sort.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
# pragma system_header
#endif // no system header

#include <cub/device/dispatch/kernels/radix_sort.cuh>
#include <cub/device/dispatch/kernels/kernel_radix_sort.cuh>
#include <cub/device/dispatch/tuning/tuning_radix_sort.cuh>
#include <cub/util_debug.cuh>
#include <cub/util_device.cuh>
Expand Down
4 changes: 2 additions & 2 deletions cub/cub/device/dispatch/dispatch_reduce.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@

#include <cub/detail/launcher/cuda_runtime.cuh>
#include <cub/detail/type_traits.cuh> // for cub::detail::invoke_result_t
#include <cub/device/dispatch/kernels/reduce.cuh>
#include <cub/device/dispatch/kernels/segmented_reduce.cuh>
#include <cub/device/dispatch/kernels/kernel_reduce.cuh>
#include <cub/device/dispatch/kernels/kernel_segmented_reduce.cuh>
#include <cub/device/dispatch/tuning/tuning_reduce.cuh>
#include <cub/grid/grid_even_share.cuh>
#include <cub/thread/thread_operators.cuh>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

#include <cub/detail/launcher/cuda_runtime.cuh>
#include <cub/detail/type_traits.cuh> // for cub::detail::invoke_result_t
#include <cub/device/dispatch/kernels/reduce.cuh>
#include <cub/device/dispatch/kernels/kernel_reduce.cuh>
#include <cub/device/dispatch/tuning/tuning_reduce.cuh>
#include <cub/grid/grid_even_share.cuh>
#include <cub/thread/thread_operators.cuh>
Expand Down
2 changes: 1 addition & 1 deletion cub/cub/device/dispatch/dispatch_scan.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

#include <cub/agent/agent_scan.cuh>
#include <cub/detail/launcher/cuda_runtime.cuh>
#include <cub/device/dispatch/kernels/scan.cuh>
#include <cub/device/dispatch/kernels/kernel_scan.cuh>
#include <cub/device/dispatch/tuning/tuning_scan.cuh>
#include <cub/thread/thread_operators.cuh>
#include <cub/util_debug.cuh>
Expand Down
2 changes: 1 addition & 1 deletion cub/cub/device/dispatch/dispatch_segmented_sort.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#include <cub/detail/device_double_buffer.cuh>
#include <cub/detail/temporary_storage.cuh>
#include <cub/device/device_partition.cuh>
#include <cub/device/dispatch/kernels/segmented_sort.cuh>
#include <cub/device/dispatch/kernels/kernel_segmented_sort.cuh>
#include <cub/device/dispatch/tuning/tuning_segmented_sort.cuh>
#include <cub/util_debug.cuh>
#include <cub/util_device.cuh>
Expand Down
2 changes: 1 addition & 1 deletion cub/cub/device/dispatch/dispatch_three_way_partition.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

#include <cub/agent/agent_three_way_partition.cuh>
#include <cub/device/dispatch/dispatch_scan.cuh>
#include <cub/device/dispatch/kernels/three_way_partition.cuh>
#include <cub/device/dispatch/kernels/kernel_three_way_partition.cuh>
#include <cub/device/dispatch/tuning/tuning_three_way_partition.cuh>
#include <cub/util_device.cuh>
#include <cub/util_math.cuh>
Expand Down
2 changes: 1 addition & 1 deletion cub/cub/device/dispatch/dispatch_transform.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#include <cub/detail/detect_cuda_runtime.cuh>
#include <cub/detail/launcher/cuda_runtime.cuh>
#include <cub/detail/uninitialized_copy.cuh>
#include <cub/device/dispatch/kernels/transform.cuh>
#include <cub/device/dispatch/kernels/kernel_transform.cuh>
#include <cub/util_arch.cuh>
#include <cub/util_device.cuh>
#include <cub/util_math.cuh>
Expand Down
4 changes: 2 additions & 2 deletions cub/cub/device/dispatch/dispatch_unique_by_key.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
#endif // no system header

#include <cub/device/dispatch/dispatch_scan.cuh>
#include <cub/device/dispatch/kernels/scan.cuh>
#include <cub/device/dispatch/kernels/unique_by_key.cuh>
#include <cub/device/dispatch/kernels/kernel_scan.cuh>
#include <cub/device/dispatch/kernels/kernel_unique_by_key.cuh>
#include <cub/device/dispatch/tuning/tuning_unique_by_key.cuh>
#include <cub/util_device.cuh>
#include <cub/util_math.cuh>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
#endif // no system header

#include <cub/agent/agent_reduce.cuh>
#include <cub/device/dispatch/kernels/reduce.cuh> // finalize_and_store_aggregate
#include <cub/device/dispatch/kernels/kernel_reduce.cuh> // finalize_and_store_aggregate
#include <cub/iterator/arg_index_input_iterator.cuh>

CUB_NAMESPACE_BEGIN
Expand Down
20 changes: 10 additions & 10 deletions cub/test/catch2_test_nvrtc.cu
Original file line number Diff line number Diff line change
Expand Up @@ -51,16 +51,16 @@ TEST_CASE("Test nvrtc", "[test][nvrtc]")
#include <cub/block/block_shuffle.cuh>
#include <cub/block/block_store.cuh>
#include <cub/block/radix_rank_sort_operations.cuh>
#include <cub/device/dispatch/kernels/reduce.cuh>
#include <cub/device/dispatch/kernels/for_each.cuh>
#include <cub/device/dispatch/kernels/scan.cuh>
#include <cub/device/dispatch/kernels/merge_sort.cuh>
#include <cub/device/dispatch/kernels/segmented_reduce.cuh>
#include <cub/device/dispatch/kernels/radix_sort.cuh>
#include <cub/device/dispatch/kernels/unique_by_key.cuh>
#include <cub/device/dispatch/kernels/transform.cuh>
#include <cub/device/dispatch/kernels/histogram.cuh>
#include <cub/device/dispatch/kernels/segmented_sort.cuh>
#include <cub/device/dispatch/kernels/kernel_reduce.cuh>
#include <cub/device/dispatch/kernels/kernel_for_each.cuh>
#include <cub/device/dispatch/kernels/kernel_scan.cuh>
#include <cub/device/dispatch/kernels/kernel_merge_sort.cuh>
#include <cub/device/dispatch/kernels/kernel_segmented_reduce.cuh>
#include <cub/device/dispatch/kernels/kernel_radix_sort.cuh>
#include <cub/device/dispatch/kernels/kernel_unique_by_key.cuh>
#include <cub/device/dispatch/kernels/kernel_transform.cuh>
#include <cub/device/dispatch/kernels/kernel_histogram.cuh>
#include <cub/device/dispatch/kernels/kernel_segmented_sort.cuh>
#include <cub/iterator/arg_index_input_iterator.cuh>
#include <cub/iterator/cache_modified_input_iterator.cuh>
#include <cub/iterator/cache_modified_output_iterator.cuh>
Expand Down