diff --git a/benchmarks/bench/merge_sort/keys.cu b/benchmarks/bench/merge_sort/keys.cu index 76eb1d26e..4a6a62868 100644 --- a/benchmarks/bench/merge_sort/keys.cu +++ b/benchmarks/bench/merge_sort/keys.cu @@ -9,7 +9,7 @@ #ifndef TUNE_BASE #define TUNE_THREADS_PER_BLOCK (1 << TUNE_THREADS_PER_BLOCK_POW2) -#endif +#endif // TUNE_BASE using value_t = cub::NullType; @@ -18,18 +18,18 @@ using value_t = cub::NullType; #if TUNE_TRANSPOSE == 0 #define TUNE_LOAD_ALGORITHM cub::BLOCK_LOAD_DIRECT #define TUNE_STORE_ALGORITHM cub::BLOCK_STORE_DIRECT -#else +#else // TUNE_TRANSPOSE == 1 #define TUNE_LOAD_ALGORITHM cub::BLOCK_LOAD_WARP_TRANSPOSE #define TUNE_STORE_ALGORITHM cub::BLOCK_STORE_WARP_TRANSPOSE -#endif +#endif // TUNE_TRANSPOSE #if TUNE_LOAD == 0 #define TUNE_LOAD_MODIFIER cub::LOAD_DEFAULT #elif TUNE_LOAD == 1 #define TUNE_LOAD_MODIFIER cub::LOAD_LDG -#else +#else // TUNE_LOAD == 2 #define TUNE_LOAD_MODIFIER cub::LOAD_CA -#endif +#endif // TUNE_LOAD template struct policy_hub_t @@ -46,7 +46,7 @@ struct policy_hub_t using MaxPolicy = policy_t; }; -#endif +#endif // !TUNE_BASE template void merge_sort_keys(nvbench::state &state, nvbench::type_list) @@ -69,10 +69,10 @@ void merge_sort_keys(nvbench::state &state, nvbench::type_list) offset_t, compare_op_t, policy_t>; -#else +#else // TUNE_BASE using dispatch_t = cub:: DispatchMergeSort; -#endif +#endif // TUNE_BASE // Retrieve axis parameters const auto elements = static_cast(state.get_int64("Elements{io}")); diff --git a/benchmarks/bench/merge_sort/pairs.cu b/benchmarks/bench/merge_sort/pairs.cu index 8a7c01151..d878fe99b 100644 --- a/benchmarks/bench/merge_sort/pairs.cu +++ b/benchmarks/bench/merge_sort/pairs.cu @@ -15,18 +15,18 @@ #if TUNE_TRANSPOSE == 0 #define TUNE_LOAD_ALGORITHM cub::BLOCK_LOAD_DIRECT #define TUNE_STORE_ALGORITHM cub::BLOCK_STORE_DIRECT -#else +#else // TUNE_TRANSPOSE == 1 #define TUNE_LOAD_ALGORITHM cub::BLOCK_LOAD_WARP_TRANSPOSE #define TUNE_STORE_ALGORITHM cub::BLOCK_STORE_WARP_TRANSPOSE -#endif +#endif // TUNE_TRANSPOSE #if TUNE_LOAD == 0 #define TUNE_LOAD_MODIFIER cub::LOAD_DEFAULT #elif TUNE_LOAD == 1 #define TUNE_LOAD_MODIFIER cub::LOAD_LDG -#else +#else // TUNE_LOAD == 2 #define TUNE_LOAD_MODIFIER cub::LOAD_CA -#endif +#endif // TUNE_LOAD template struct policy_hub_t @@ -43,7 +43,7 @@ struct policy_hub_t using MaxPolicy = policy_t; }; -#endif +#endif // TUNE_BASE template void merge_sort_keys(nvbench::state &state, nvbench::type_list) @@ -66,10 +66,10 @@ void merge_sort_keys(nvbench::state &state, nvbench::type_list; -#else +#else // TUNE_BASE using dispatch_t = cub:: DispatchMergeSort; -#endif +#endif // TUNE_BASE // Retrieve axis parameters const auto elements = static_cast(state.get_int64("Elements{io}")); @@ -124,15 +124,15 @@ void merge_sort_keys(nvbench::state &state, nvbench::type_list; -#else +#else // !defined(TUNE_KeyT) using key_types = all_types; -#endif +#endif // TUNE_KeyT #ifdef TUNE_ValueT using value_types = nvbench::type_list; -#else +#else // !defined(TUNE_ValueT) using value_types = nvbench::type_list; -#endif +#endif // TUNE_ValueT NVBENCH_BENCH_TYPES(merge_sort_keys, NVBENCH_TYPE_AXES(key_types, value_types, offset_types)) .set_name("cub::DeviceMergeSort::SortPairs") diff --git a/benchmarks/bench/partition/flagged.cu b/benchmarks/bench/partition/flagged.cu index 6f59df8a9..395ddb620 100644 --- a/benchmarks/bench/partition/flagged.cu +++ b/benchmarks/bench/partition/flagged.cu @@ -14,15 +14,15 @@ constexpr bool may_alias = false; #if !TUNE_BASE #if TUNE_TRANSPOSE == 0 #define TUNE_LOAD_ALGORITHM cub::BLOCK_LOAD_DIRECT -#else +#else // TUNE_TRANSPOSE == 1 #define TUNE_LOAD_ALGORITHM cub::BLOCK_LOAD_WARP_TRANSPOSE -#endif +#endif // TUNE_TRANSPOSE #if TUNE_LOAD == 0 #define TUNE_LOAD_MODIFIER cub::LOAD_DEFAULT -#else +#else // TUNE_LOAD == 1 #define TUNE_LOAD_MODIFIER cub::LOAD_CA -#endif +#endif // TUNE_LOAD template struct policy_hub_t @@ -44,7 +44,7 @@ struct policy_hub_t using MaxPolicy = policy_t; }; -#endif +#endif // TUNE_BASE template void partition(nvbench::state &state, nvbench::type_list) @@ -57,7 +57,7 @@ void partition(nvbench::state &state, nvbench::type_list) using equality_op_t = cub::NullType; using offset_t = OffsetT; -#if !TUNE_BASE + #if !TUNE_BASE using policy_t = policy_hub_t; using dispatch_t = cub::DispatchSelectIf) keep_rejects, may_alias, policy_t>; -#else + #else // TUNE_BASE using dispatch_t = cub::DispatchSelectIf) offset_t, keep_rejects, may_alias>; -#endif + #endif // TUNE_BASE // Retrieve axis parameters const auto elements = static_cast(state.get_int64("Elements{io}")); diff --git a/benchmarks/bench/partition/if.cu b/benchmarks/bench/partition/if.cu index 8a2b9fae7..3e9f79e26 100644 --- a/benchmarks/bench/partition/if.cu +++ b/benchmarks/bench/partition/if.cu @@ -14,15 +14,15 @@ constexpr bool may_alias = false; #if !TUNE_BASE #if TUNE_TRANSPOSE == 0 #define TUNE_LOAD_ALGORITHM cub::BLOCK_LOAD_DIRECT -#else +#else // TUNE_TRANSPOSE == 1 #define TUNE_LOAD_ALGORITHM cub::BLOCK_LOAD_WARP_TRANSPOSE -#endif +#endif // TUNE_TRANSPOSE #if TUNE_LOAD == 0 #define TUNE_LOAD_MODIFIER cub::LOAD_DEFAULT -#else +#else // TUNE_LOAD == 1 #define TUNE_LOAD_MODIFIER cub::LOAD_CA -#endif +#endif // TUNE_LOAD template struct policy_hub_t @@ -44,7 +44,7 @@ struct policy_hub_t using MaxPolicy = policy_t; }; -#endif +#endif // !TUNE_BASE template struct less_then_t @@ -79,7 +79,7 @@ void partition(nvbench::state &state, nvbench::type_list) using equality_op_t = cub::NullType; using offset_t = OffsetT; -#if !TUNE_BASE + #if !TUNE_BASE using policy_t = policy_hub_t; using dispatch_t = cub::DispatchSelectIf) keep_rejects, may_alias, policy_t>; -#else + #else // TUNE_BASE using dispatch_t = cub::DispatchSelectIf) offset_t, keep_rejects, may_alias>; -#endif + #endif // !TUNE_BASE // Retrieve axis parameters const auto elements = static_cast(state.get_int64("Elements{io}")); diff --git a/benchmarks/bench/radix_sort/keys.cu b/benchmarks/bench/radix_sort/keys.cu index 7c34e1ffd..9c085e297 100644 --- a/benchmarks/bench/radix_sort/keys.cu +++ b/benchmarks/bench/radix_sort/keys.cu @@ -95,13 +95,13 @@ constexpr bool fits_in_default_shared_memory() { return max_temp_storage_size() < 48 * 1024; } -#else +#else // TUNE_BASE template constexpr bool fits_in_default_shared_memory() { return true; } -#endif +#endif // TUNE_BASE template void radix_sort_keys(std::integral_constant, @@ -114,9 +114,9 @@ void radix_sort_keys(std::integral_constant, #if !TUNE_BASE using policy_t = policy_hub_t; using dispatch_t = cub::DispatchRadixSort; -#else +#else // TUNE_BASE using dispatch_t = cub::DispatchRadixSort; -#endif +#endif // TUNE_BASE const int begin_bit = 0; const int end_bit = sizeof(key_t) * 8; diff --git a/benchmarks/bench/radix_sort/pairs.cu b/benchmarks/bench/radix_sort/pairs.cu index 8aab0b459..36832d2d0 100644 --- a/benchmarks/bench/radix_sort/pairs.cu +++ b/benchmarks/bench/radix_sort/pairs.cu @@ -93,13 +93,13 @@ constexpr bool fits_in_default_shared_memory() { return max_temp_storage_size() < 48 * 1024; } -#else +#else // TUNE_BASE template constexpr bool fits_in_default_shared_memory() { return true; } -#endif +#endif // TUNE_BASE template void radix_sort_values(std::integral_constant, @@ -113,9 +113,9 @@ void radix_sort_values(std::integral_constant, #if !TUNE_BASE using policy_t = policy_hub_t; using dispatch_t = cub::DispatchRadixSort; -#else +#else // TUNE_BASE using dispatch_t = cub::DispatchRadixSort; -#endif +#endif // TUNE_BASE const int begin_bit = 0; const int end_bit = sizeof(key_t) * 8; @@ -201,15 +201,15 @@ void radix_sort_values(nvbench::state &state, nvbench::type_list; -#else +#else // !defined(TUNE_KeyT) using key_types = fundamental_types; -#endif +#endif // TUNE_KeyT #ifdef TUNE_ValueT using value_types = nvbench::type_list; -#else +#else // !defined(Tune_ValueT) using value_types = nvbench::type_list; -#endif +#endif // TUNE_ValueT NVBENCH_BENCH_TYPES(radix_sort_values, NVBENCH_TYPE_AXES(key_types, value_types, offset_types)) .set_name("cub::DeviceRadixSort::SortPairs") diff --git a/benchmarks/bench/reduce/base.cuh b/benchmarks/bench/reduce/base.cuh index 97e66b095..98f6f3264 100644 --- a/benchmarks/bench/reduce/base.cuh +++ b/benchmarks/bench/reduce/base.cuh @@ -30,7 +30,7 @@ struct policy_hub_t using MaxPolicy = policy_t; }; -#endif +#endif // !TUNE_BASE template void reduce(nvbench::state &state, nvbench::type_list) @@ -45,9 +45,9 @@ void reduce(nvbench::state &state, nvbench::type_list) using policy_t = policy_hub_t; using dispatch_t = cub::DispatchReduce; -#else +#else // TUNE_BASE using dispatch_t = cub::DispatchReduce; -#endif +#endif // TUNE_BASE // Retrieve axis parameters const auto elements = static_cast(state.get_int64("Elements{io}")); diff --git a/benchmarks/bench/scan/exclusive/base.cuh b/benchmarks/bench/scan/exclusive/base.cuh index 43eab99fd..7823b3e97 100644 --- a/benchmarks/bench/scan/exclusive/base.cuh +++ b/benchmarks/bench/scan/exclusive/base.cuh @@ -47,13 +47,13 @@ constexpr bool fits_in_default_shared_memory() { return max_temp_storage_size() < 48 * 1024; } -#else +#else // TUNE_BASE template constexpr bool fits_in_default_shared_memory() { return true; } -#endif +#endif // TUNE_BASE template static void basic(std::integral_constant, diff --git a/benchmarks/bench/scan/exclusive/by_key.cu b/benchmarks/bench/scan/exclusive/by_key.cu index bd05e84d4..9aa12be6f 100644 --- a/benchmarks/bench/scan/exclusive/by_key.cu +++ b/benchmarks/bench/scan/exclusive/by_key.cu @@ -25,7 +25,7 @@ struct policy_hub_t using MaxPolicy = policy_t; }; -#endif +#endif // !TUNE_BASE template static void scan(nvbench::state &state, nvbench::type_list) @@ -39,7 +39,7 @@ static void scan(nvbench::state &state, nvbench::type_list; -#else + #else // TUNE_BASE using dispatch_t = cub::DispatchScanByKey; -#endif + #endif // TUNE_BASE const auto elements = static_cast(state.get_int64("Elements{io}")); @@ -112,15 +112,15 @@ using some_offset_types = nvbench::type_list; #ifdef TUNE_KeyT using key_types = nvbench::type_list; -#else +#else // !defined(TUNE_KeyT) using key_types = all_types; -#endif +#endif // TUNE_KeyT #ifdef TUNE_ValueT using value_types = nvbench::type_list; -#else +#else // !defined(TUNE_ValueT) using value_types = nvbench::type_list; -#endif +#endif // TUNE_ValueT NVBENCH_BENCH_TYPES(scan, NVBENCH_TYPE_AXES(key_types, value_types, some_offset_types)) .set_name("cub::DeviceScan::ExclusiveSumByKey") diff --git a/benchmarks/bench/select/flagged.cu b/benchmarks/bench/select/flagged.cu index a8e59f603..7a9e3af47 100644 --- a/benchmarks/bench/select/flagged.cu +++ b/benchmarks/bench/select/flagged.cu @@ -15,15 +15,15 @@ constexpr bool may_alias = false; #if !TUNE_BASE #if TUNE_TRANSPOSE == 0 #define TUNE_LOAD_ALGORITHM cub::BLOCK_LOAD_DIRECT -#else +#else // TUNE_TRANSPOSE == 1 #define TUNE_LOAD_ALGORITHM cub::BLOCK_LOAD_WARP_TRANSPOSE -#endif +#endif // TUNE_TRANSPOSE #if TUNE_LOAD == 0 #define TUNE_LOAD_MODIFIER cub::LOAD_DEFAULT -#else +#else // TUNE_LOAD == 1 #define TUNE_LOAD_MODIFIER cub::LOAD_CA -#endif +#endif // TUNE_LOAD template struct policy_hub_t @@ -45,7 +45,7 @@ struct policy_hub_t using MaxPolicy = policy_t; }; -#endif +#endif // !TUNE_BASE template void select(nvbench::state &state, nvbench::type_list) @@ -58,7 +58,7 @@ void select(nvbench::state &state, nvbench::type_list) using equality_op_t = cub::NullType; using offset_t = OffsetT; -#if !TUNE_BASE + #if !TUNE_BASE using policy_t = policy_hub_t; using dispatch_t = cub::DispatchSelectIf) keep_rejects, may_alias, policy_t>; -#else + #else // TUNE_BASE using dispatch_t = cub::DispatchSelectIf) offset_t, keep_rejects, may_alias>; -#endif + #endif // !TUNE_BASE // Retrieve axis parameters const auto elements = static_cast(state.get_int64("Elements{io}")); diff --git a/benchmarks/bench/select/if.cu b/benchmarks/bench/select/if.cu index 5deac7d47..110ceb6fa 100644 --- a/benchmarks/bench/select/if.cu +++ b/benchmarks/bench/select/if.cu @@ -16,15 +16,15 @@ constexpr bool may_alias = false; #if !TUNE_BASE #if TUNE_TRANSPOSE == 0 #define TUNE_LOAD_ALGORITHM cub::BLOCK_LOAD_DIRECT -#else +#else // TUNE_TRANSPOSE == 1 #define TUNE_LOAD_ALGORITHM cub::BLOCK_LOAD_WARP_TRANSPOSE -#endif +#endif // TUNE_TRANSPOSE #if TUNE_LOAD == 0 #define TUNE_LOAD_MODIFIER cub::LOAD_DEFAULT -#else +#else // TUNE_LOAD == 1 #define TUNE_LOAD_MODIFIER cub::LOAD_CA -#endif +#endif // TUNE_LOAD template struct policy_hub_t @@ -46,7 +46,7 @@ struct policy_hub_t using MaxPolicy = policy_t; }; -#endif +#endif // !TUNE_BASE template struct less_then_t @@ -81,7 +81,7 @@ void select(nvbench::state &state, nvbench::type_list) using equality_op_t = cub::NullType; using offset_t = OffsetT; -#if !TUNE_BASE + #if !TUNE_BASE using policy_t = policy_hub_t; using dispatch_t = cub::DispatchSelectIf) keep_rejects, may_alias, policy_t>; -#else + #else // TUNE_BASE using dispatch_t = cub::DispatchSelectIf) offset_t, keep_rejects, may_alias>; -#endif + #endif // TUNE_BASE // Retrieve axis parameters const auto elements = static_cast(state.get_int64("Elements{io}"));