diff --git a/benchmarks/bench/histogram/even.cu b/benchmarks/bench/histogram/even.cu index c5a2c9f57..aba0997aa 100644 --- a/benchmarks/bench/histogram/even.cu +++ b/benchmarks/bench/histogram/even.cu @@ -1,5 +1,4 @@ -#include - +#include "histogram_common.cuh" #include // %RANGE% TUNE_ITEMS ipt 7:24:1 @@ -9,72 +8,6 @@ // %RANGE% TUNE_MEM_PREFERENCE mem 0:2:1 // %RANGE% TUNE_LOAD ld 0:2:1 -#if !TUNE_BASE - -#if TUNE_LOAD == 0 -#define TUNE_LOAD_MODIFIER cub::LOAD_DEFAULT -#elif TUNE_LOAD == 1 -#define TUNE_LOAD_MODIFIER cub::LOAD_LDG -#else // TUNE_LOAD == 2 -#define TUNE_LOAD_MODIFIER cub::LOAD_CA -#endif // TUNE_LOAD - -#if TUNE_MEM_PREFERENCE == 0 -constexpr cub::BlockHistogramMemoryPreference MEM_PREFERENCE = cub::GMEM; -#elif TUNE_MEM_PREFERENCE == 1 -constexpr cub::BlockHistogramMemoryPreference MEM_PREFERENCE = cub::SMEM; -#else // TUNE_MEM_PREFERENCE == 2 -constexpr cub::BlockHistogramMemoryPreference MEM_PREFERENCE = cub::BLEND; -#endif // TUNE_MEM_PREFERENCE - - -template -struct policy_hub_t -{ - template - struct TScale - { - enum - { - V_SCALE = (sizeof(SampleT) + sizeof(int) - 1) / sizeof(int), - VALUE = CUB_MAX((NOMINAL_ITEMS_PER_THREAD / NUM_ACTIVE_CHANNELS / V_SCALE), 1) - }; - }; - - struct policy_t : cub::ChainedPolicy<350, policy_t, policy_t> - { - using AgentHistogramPolicyT = cub::AgentHistogramPolicy::VALUE, - cub::BLOCK_LOAD_DIRECT, - TUNE_LOAD_MODIFIER, - TUNE_RLE_COMPRESS, - MEM_PREFERENCE, - TUNE_WORK_STEALING>; - }; - - using MaxPolicy = policy_t; -}; -#endif // !TUNE_BASE - -template -SampleT get_upper_level(OffsetT bins, OffsetT elements) -{ - if constexpr (cuda::std::is_integral_v) - { - if constexpr (sizeof(SampleT) < sizeof(OffsetT)) - { - const SampleT max_key = std::numeric_limits::max(); - return static_cast(std::min(bins, static_cast(max_key))); - } - else - { - return static_cast(bins); - } - } - - return static_cast(elements); -} - template static void histogram(nvbench::state &state, nvbench::type_list) { diff --git a/benchmarks/bench/histogram/histogram_common.cuh b/benchmarks/bench/histogram/histogram_common.cuh new file mode 100644 index 000000000..63fc1872d --- /dev/null +++ b/benchmarks/bench/histogram/histogram_common.cuh @@ -0,0 +1,69 @@ +#pragma once + +#include + +#if !TUNE_BASE + +#if TUNE_LOAD == 0 +#define TUNE_LOAD_MODIFIER cub::LOAD_DEFAULT +#elif TUNE_LOAD == 1 +#define TUNE_LOAD_MODIFIER cub::LOAD_LDG +#else // TUNE_LOAD == 2 +#define TUNE_LOAD_MODIFIER cub::LOAD_CA +#endif // TUNE_LOAD + +#if TUNE_MEM_PREFERENCE == 0 +constexpr cub::BlockHistogramMemoryPreference MEM_PREFERENCE = cub::GMEM; +#elif TUNE_MEM_PREFERENCE == 1 +constexpr cub::BlockHistogramMemoryPreference MEM_PREFERENCE = cub::SMEM; +#else // TUNE_MEM_PREFERENCE == 2 +constexpr cub::BlockHistogramMemoryPreference MEM_PREFERENCE = cub::BLEND; +#endif // TUNE_MEM_PREFERENCE + + +template +struct policy_hub_t +{ + template + struct TScale + { + enum + { + V_SCALE = (sizeof(SampleT) + sizeof(int) - 1) / sizeof(int), + VALUE = CUB_MAX((NOMINAL_ITEMS_PER_THREAD / NUM_ACTIVE_CHANNELS / V_SCALE), 1) + }; + }; + + struct policy_t : cub::ChainedPolicy<350, policy_t, policy_t> + { + using AgentHistogramPolicyT = cub::AgentHistogramPolicy::VALUE, + cub::BLOCK_LOAD_DIRECT, + TUNE_LOAD_MODIFIER, + TUNE_RLE_COMPRESS, + MEM_PREFERENCE, + TUNE_WORK_STEALING>; + }; + + using MaxPolicy = policy_t; +}; +#endif // !TUNE_BASE + +template +SampleT get_upper_level(OffsetT bins, OffsetT elements) +{ + if constexpr (cuda::std::is_integral_v) + { + if constexpr (sizeof(SampleT) < sizeof(OffsetT)) + { + const SampleT max_key = std::numeric_limits::max(); + return static_cast(std::min(bins, static_cast(max_key))); + } + else + { + return static_cast(bins); + } + } + + return static_cast(elements); +} diff --git a/benchmarks/bench/histogram/multi/even.cu b/benchmarks/bench/histogram/multi/even.cu index ee7d69499..b39c596fa 100644 --- a/benchmarks/bench/histogram/multi/even.cu +++ b/benchmarks/bench/histogram/multi/even.cu @@ -1,5 +1,4 @@ -#include - +#include "../histogram_common.cuh" #include // %RANGE% TUNE_ITEMS ipt 7:24:1 @@ -9,71 +8,6 @@ // %RANGE% TUNE_MEM_PREFERENCE mem 0:2:1 // %RANGE% TUNE_LOAD ld 0:2:1 -#if !TUNE_BASE - -#if TUNE_LOAD == 0 -#define TUNE_LOAD_MODIFIER cub::LOAD_DEFAULT -#elif TUNE_LOAD == 1 -#define TUNE_LOAD_MODIFIER cub::LOAD_LDG -#else // TUNE_LOAD == 2 -#define TUNE_LOAD_MODIFIER cub::LOAD_CA -#endif // TUNE_LOAD - -#if TUNE_MEM_PREFERENCE == 0 -constexpr cub::BlockHistogramMemoryPreference MEM_PREFERENCE = cub::GMEM; -#elif TUNE_MEM_PREFERENCE == 1 -constexpr cub::BlockHistogramMemoryPreference MEM_PREFERENCE = cub::SMEM; -#else // TUNE_MEM_PREFERENCE == 2 -constexpr cub::BlockHistogramMemoryPreference MEM_PREFERENCE = cub::BLEND; -#endif // TUNE_MEM_PREFERENCE - -template -struct policy_hub_t -{ - template - struct TScale - { - enum - { - V_SCALE = (sizeof(SampleT) + sizeof(int) - 1) / sizeof(int), - VALUE = CUB_MAX((NOMINAL_ITEMS_PER_THREAD / NUM_ACTIVE_CHANNELS / V_SCALE), 1) - }; - }; - - struct policy_t : cub::ChainedPolicy<350, policy_t, policy_t> - { - using AgentHistogramPolicyT = cub::AgentHistogramPolicy::VALUE, - cub::BLOCK_LOAD_DIRECT, - TUNE_LOAD_MODIFIER, - TUNE_RLE_COMPRESS, - MEM_PREFERENCE, - TUNE_WORK_STEALING>; - }; - - using MaxPolicy = policy_t; -}; -#endif // !TUNE_BASE - -template -SampleT get_upper_level(OffsetT bins, OffsetT elements) -{ - if constexpr (cuda::std::is_integral_v) - { - if constexpr (sizeof(SampleT) < sizeof(OffsetT)) - { - const SampleT max_key = std::numeric_limits::max(); - return static_cast(std::min(bins, static_cast(max_key))); - } - else - { - return static_cast(bins); - } - } - - return static_cast(elements); -} - template static void histogram(nvbench::state &state, nvbench::type_list) { diff --git a/benchmarks/bench/histogram/multi/range.cu b/benchmarks/bench/histogram/multi/range.cu index a7bb64784..f13b632b0 100644 --- a/benchmarks/bench/histogram/multi/range.cu +++ b/benchmarks/bench/histogram/multi/range.cu @@ -1,7 +1,6 @@ -#include - #include +#include "../histogram_common.cuh" #include // %RANGE% TUNE_ITEMS ipt 7:24:1 @@ -11,71 +10,6 @@ // %RANGE% TUNE_MEM_PREFERENCE mem 0:2:1 // %RANGE% TUNE_LOAD ld 0:2:1 -#if !TUNE_BASE - -#if TUNE_LOAD == 0 -#define TUNE_LOAD_MODIFIER cub::LOAD_DEFAULT -#elif TUNE_LOAD == 1 -#define TUNE_LOAD_MODIFIER cub::LOAD_LDG -#else // TUNE_LOAD == 2 -#define TUNE_LOAD_MODIFIER cub::LOAD_CA -#endif // TUNE_LOAD - -#if TUNE_MEM_PREFERENCE == 0 -constexpr cub::BlockHistogramMemoryPreference MEM_PREFERENCE = cub::GMEM; -#elif TUNE_MEM_PREFERENCE == 1 -constexpr cub::BlockHistogramMemoryPreference MEM_PREFERENCE = cub::SMEM; -#else // TUNE_MEM_PREFERENCE == 2 -constexpr cub::BlockHistogramMemoryPreference MEM_PREFERENCE = cub::BLEND; -#endif // TUNE_MEM_PREFERENCE - -template -struct policy_hub_t -{ - template - struct TScale - { - enum - { - V_SCALE = (sizeof(SampleT) + sizeof(int) - 1) / sizeof(int), - VALUE = CUB_MAX((NOMINAL_ITEMS_PER_THREAD / NUM_ACTIVE_CHANNELS / V_SCALE), 1) - }; - }; - - struct policy_t : cub::ChainedPolicy<350, policy_t, policy_t> - { - using AgentHistogramPolicyT = cub::AgentHistogramPolicy::VALUE, - cub::BLOCK_LOAD_DIRECT, - TUNE_LOAD_MODIFIER, - TUNE_RLE_COMPRESS, - MEM_PREFERENCE, - TUNE_WORK_STEALING>; - }; - - using MaxPolicy = policy_t; -}; -#endif // !TUNE_BASE - -template -SampleT get_upper_level(OffsetT bins, OffsetT elements) -{ - if constexpr (cuda::std::is_integral_v) - { - if constexpr (sizeof(SampleT) < sizeof(OffsetT)) - { - const SampleT max_key = std::numeric_limits::max(); - return static_cast(std::min(bins, static_cast(max_key))); - } - else - { - return static_cast(bins); - } - } - - return static_cast(elements); -} - template static void histogram(nvbench::state &state, nvbench::type_list) { diff --git a/benchmarks/bench/histogram/range.cu b/benchmarks/bench/histogram/range.cu index c48af56ea..810d51151 100644 --- a/benchmarks/bench/histogram/range.cu +++ b/benchmarks/bench/histogram/range.cu @@ -1,7 +1,5 @@ -#include - +#include "histogram_common.cuh" #include - #include // %RANGE% TUNE_ITEMS ipt 7:24:1 @@ -11,72 +9,6 @@ // %RANGE% TUNE_MEM_PREFERENCE mem 0:2:1 // %RANGE% TUNE_LOAD ld 0:2:1 -#if !TUNE_BASE - -#if TUNE_LOAD == 0 -#define TUNE_LOAD_MODIFIER cub::LOAD_DEFAULT -#elif TUNE_LOAD == 1 -#define TUNE_LOAD_MODIFIER cub::LOAD_LDG -#else // TUNE_LOAD == 2 -#define TUNE_LOAD_MODIFIER cub::LOAD_CA -#endif // TUNE_LOAD - -#if TUNE_MEM_PREFERENCE == 0 -constexpr cub::BlockHistogramMemoryPreference MEM_PREFERENCE = cub::GMEM; -#elif TUNE_MEM_PREFERENCE == 1 -constexpr cub::BlockHistogramMemoryPreference MEM_PREFERENCE = cub::SMEM; -#else // TUNE_MEM_PREFERENCE == 2 -constexpr cub::BlockHistogramMemoryPreference MEM_PREFERENCE = cub::BLEND; -#endif // TUNE_MEM_PREFERENCE - - -template -struct policy_hub_t -{ - template - struct TScale - { - enum - { - V_SCALE = (sizeof(SampleT) + sizeof(int) - 1) / sizeof(int), - VALUE = CUB_MAX((NOMINAL_ITEMS_PER_THREAD / NUM_ACTIVE_CHANNELS / V_SCALE), 1) - }; - }; - - struct policy_t : cub::ChainedPolicy<350, policy_t, policy_t> - { - using AgentHistogramPolicyT = cub::AgentHistogramPolicy::VALUE, - cub::BLOCK_LOAD_DIRECT, - TUNE_LOAD_MODIFIER, - TUNE_RLE_COMPRESS, - MEM_PREFERENCE, - TUNE_WORK_STEALING>; - }; - - using MaxPolicy = policy_t; -}; -#endif // !TUNE_BASE - -template -SampleT get_upper_level(OffsetT bins, OffsetT elements) -{ - if constexpr (cuda::std::is_integral_v) - { - if constexpr (sizeof(SampleT) < sizeof(OffsetT)) - { - const SampleT max_key = std::numeric_limits::max(); - return static_cast(std::min(bins, static_cast(max_key))); - } - else - { - return static_cast(bins); - } - } - - return static_cast(elements); -} - template static void histogram(nvbench::state &state, nvbench::type_list) {