diff --git a/benchmarks/bench/select/if.cu b/benchmarks/bench/select/if.cu new file mode 100644 index 000000000..06b8818d3 --- /dev/null +++ b/benchmarks/bench/select/if.cu @@ -0,0 +1,170 @@ +#include +#include +#include +#include + +// %RANGE% TUNE_TRANSPOSE trp 0:1:1 +// %RANGE% TUNE_LOAD ld 0:1:1 +// %RANGE% TUNE_ITEMS_PER_THREAD ipt 7:24:1 +// %RANGE% TUNE_THREADS_PER_BLOCK tpb 128:1024:32 +// %RANGE% CUB_DETAIL_L2_BACKOFF_NS l2b 0:1200:5 +// %RANGE% CUB_DETAIL_L2_WRITE_LATENCY_NS l2w 0:1200:5 + +constexpr bool keep_rejects = false; +constexpr bool may_alias = false; + +#if !TUNE_BASE +#if TUNE_TRANSPOSE == 0 +#define TUNE_LOAD_ALGORITHM cub::BLOCK_LOAD_DIRECT +#else +#define TUNE_LOAD_ALGORITHM cub::BLOCK_LOAD_WARP_TRANSPOSE +#endif + +#if TUNE_LOAD == 0 +#define TUNE_LOAD_MODIFIER cub::LOAD_DEFAULT +#else +#define TUNE_LOAD_MODIFIER cub::LOAD_CA +#endif + +template +struct policy_hub_t +{ + struct policy_t : cub::ChainedPolicy<300, policy_t, policy_t> + { + static constexpr int NOMINAL_4B_ITEMS_PER_THREAD = TUNE_ITEMS_PER_THREAD; + + static constexpr int ITEMS_PER_THREAD = + CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, + CUB_MAX(1, (NOMINAL_4B_ITEMS_PER_THREAD * 4 / sizeof(InputT)))); + + using SelectIfPolicyT = cub::AgentSelectIfPolicy; + }; + + using MaxPolicy = policy_t; +}; +#endif + +template +struct less_then_t +{ + T m_val; + + __device__ bool operator()(const T &val) const { return val < m_val; } +}; + +template +T value_from_entropy(double percentage) +{ + if (percentage == 1) + { + return std::numeric_limits::max(); + } + + const auto max_val = static_cast(std::numeric_limits::max()); + const auto min_val = static_cast(std::numeric_limits::lowest()); + const auto result = min_val + percentage * max_val - percentage * min_val; + return static_cast(result); +} + +template +void select(nvbench::state &state, nvbench::type_list) +{ + using input_it_t = const T*; + using flag_it_t = cub::NullType*; + using output_it_t = T*; + using num_selected_it_t = OffsetT*; + using select_op_t = less_then_t; + using equality_op_t = cub::NullType; + using offset_t = OffsetT; + +#if !TUNE_BASE + using policy_t = policy_hub_t; + using dispatch_t = cub::DispatchSelectIf; +#else + using dispatch_t = cub::DispatchSelectIf; +#endif + + // Retrieve axis parameters + const auto elements = static_cast(state.get_int64("Elements{io}")); + const bit_entropy entropy = str_to_entropy(state.get_string("Entropy")); + + T min_val = std::numeric_limits::lowest(); + T max_val = std::numeric_limits::max(); + T val = value_from_entropy(entropy_to_probability(entropy)); + std::cout << val << std::endl; + select_op_t select_op{val}; + + thrust::device_vector in(elements); + thrust::device_vector num_selected(1); + + gen(seed_t{}, in); + + // TODO Extract into helper TU + const auto selected_elements = thrust::count_if(in.cbegin(), in.cend(), select_op); + thrust::device_vector out(selected_elements); + + input_it_t d_in = thrust::raw_pointer_cast(in.data()); + flag_it_t d_flags = nullptr; + output_it_t d_out = thrust::raw_pointer_cast(out.data()); + num_selected_it_t d_num_selected = thrust::raw_pointer_cast(num_selected.data()); + + state.add_element_count(elements); + state.add_global_memory_reads(elements); + state.add_global_memory_writes(selected_elements); + state.add_global_memory_writes(1); + + std::size_t temp_size{}; + dispatch_t::Dispatch(nullptr, + temp_size, + d_in, + d_flags, + d_out, + d_num_selected, + select_op, + equality_op_t{}, + elements, + 0); + + thrust::device_vector temp(temp_size); + auto *temp_storage = thrust::raw_pointer_cast(temp.data()); + + state.exec([&](nvbench::launch &launch) { + dispatch_t::Dispatch(temp_storage, + temp_size, + d_in, + d_flags, + d_out, + d_num_selected, + select_op, + equality_op_t{}, + elements, + launch.get_stream()); + }); +} + +NVBENCH_BENCH_TYPES(select, NVBENCH_TYPE_AXES(fundamental_types, offset_types)) + .set_name("cub::DeviceSelect::If") + .set_type_axes_names({"T{ct}", "OffsetT{ct}"}) + .add_int64_power_of_two_axis("Elements{io}", nvbench::range(16, 28, 4)) + .add_string_axis("Entropy", {"1.000", "0.811", "0.544", "0.337", "0.201", "0.000"}); diff --git a/benchmarks/nvbench_helper/nvbench_helper.cu b/benchmarks/nvbench_helper/nvbench_helper.cu index 665e58f71..acd854b1e 100644 --- a/benchmarks/nvbench_helper/nvbench_helper.cu +++ b/benchmarks/nvbench_helper/nvbench_helper.cu @@ -265,20 +265,6 @@ void generator_t::operator()(seed_t seed, }; } -double entropy_to_probability(bit_entropy entropy) -{ - switch (entropy) - { - case bit_entropy::_0_000: return 0.0; - case bit_entropy::_0_811: return 0.811; - case bit_entropy::_0_544: return 0.544; - case bit_entropy::_0_337: return 0.337; - case bit_entropy::_0_201: return 0.201; - case bit_entropy::_1_000: return 1.0; - default: return 0.0; - } -} - struct random_to_probability_t { double m_probability; diff --git a/benchmarks/nvbench_helper/nvbench_helper.cuh b/benchmarks/nvbench_helper/nvbench_helper.cuh index 8bcad1bb8..6323ca019 100644 --- a/benchmarks/nvbench_helper/nvbench_helper.cuh +++ b/benchmarks/nvbench_helper/nvbench_helper.cuh @@ -49,7 +49,6 @@ using fundamental_types = nvbench::type_list; @@ -103,6 +102,21 @@ enum class bit_entropy }; NVBENCH_DECLARE_TYPE_STRINGS(bit_entropy, "BE", "bit entropy"); +[[nodiscard]] +inline double entropy_to_probability(bit_entropy entropy) +{ + switch (entropy) + { + case bit_entropy::_0_000: return 0.0; + case bit_entropy::_0_811: return 0.811; + case bit_entropy::_0_544: return 0.544; + case bit_entropy::_0_337: return 0.337; + case bit_entropy::_0_201: return 0.201; + case bit_entropy::_1_000: return 1.0; + default: return 0.0; + } +} + [[nodiscard]] bit_entropy str_to_entropy(std::string str) { if (str == "1.000")