Skip to content

Commit

Permalink
Common header for histogram bench
Browse files Browse the repository at this point in the history
  • Loading branch information
gevtushenko committed May 26, 2023
1 parent 7cf8819 commit ec0b724
Show file tree
Hide file tree
Showing 5 changed files with 73 additions and 271 deletions.
69 changes: 1 addition & 68 deletions benchmarks/bench/histogram/even.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#include <cub/device/device_histogram.cuh>

#include "histogram_common.cuh"
#include <nvbench_helper.cuh>

// %RANGE% TUNE_ITEMS ipt 7:24:1
Expand All @@ -9,72 +8,6 @@
// %RANGE% TUNE_MEM_PREFERENCE mem 0:2:1
// %RANGE% TUNE_LOAD ld 0:2:1

#if !TUNE_BASE

#if TUNE_LOAD == 0
#define TUNE_LOAD_MODIFIER cub::LOAD_DEFAULT
#elif TUNE_LOAD == 1
#define TUNE_LOAD_MODIFIER cub::LOAD_LDG
#else // TUNE_LOAD == 2
#define TUNE_LOAD_MODIFIER cub::LOAD_CA
#endif // TUNE_LOAD

#if TUNE_MEM_PREFERENCE == 0
constexpr cub::BlockHistogramMemoryPreference MEM_PREFERENCE = cub::GMEM;
#elif TUNE_MEM_PREFERENCE == 1
constexpr cub::BlockHistogramMemoryPreference MEM_PREFERENCE = cub::SMEM;
#else // TUNE_MEM_PREFERENCE == 2
constexpr cub::BlockHistogramMemoryPreference MEM_PREFERENCE = cub::BLEND;
#endif // TUNE_MEM_PREFERENCE


template <typename SampleT, int NUM_ACTIVE_CHANNELS>
struct policy_hub_t
{
template <int NOMINAL_ITEMS_PER_THREAD>
struct TScale
{
enum
{
V_SCALE = (sizeof(SampleT) + sizeof(int) - 1) / sizeof(int),
VALUE = CUB_MAX((NOMINAL_ITEMS_PER_THREAD / NUM_ACTIVE_CHANNELS / V_SCALE), 1)
};
};

struct policy_t : cub::ChainedPolicy<350, policy_t, policy_t>
{
using AgentHistogramPolicyT = cub::AgentHistogramPolicy<TUNE_THREADS,
TScale<TUNE_ITEMS>::VALUE,
cub::BLOCK_LOAD_DIRECT,
TUNE_LOAD_MODIFIER,
TUNE_RLE_COMPRESS,
MEM_PREFERENCE,
TUNE_WORK_STEALING>;
};

using MaxPolicy = policy_t;
};
#endif // !TUNE_BASE

template <class SampleT, class OffsetT>
SampleT get_upper_level(OffsetT bins, OffsetT elements)
{
if constexpr (cuda::std::is_integral_v<SampleT>)
{
if constexpr (sizeof(SampleT) < sizeof(OffsetT))
{
const SampleT max_key = std::numeric_limits<SampleT>::max();
return static_cast<SampleT>(std::min(bins, static_cast<OffsetT>(max_key)));
}
else
{
return static_cast<SampleT>(bins);
}
}

return static_cast<SampleT>(elements);
}

template <typename SampleT, typename CounterT, typename OffsetT>
static void histogram(nvbench::state &state, nvbench::type_list<SampleT, CounterT, OffsetT>)
{
Expand Down
69 changes: 69 additions & 0 deletions benchmarks/bench/histogram/histogram_common.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#pragma once

#include <cub/device/device_histogram.cuh>

#if !TUNE_BASE

#if TUNE_LOAD == 0
#define TUNE_LOAD_MODIFIER cub::LOAD_DEFAULT
#elif TUNE_LOAD == 1
#define TUNE_LOAD_MODIFIER cub::LOAD_LDG
#else // TUNE_LOAD == 2
#define TUNE_LOAD_MODIFIER cub::LOAD_CA
#endif // TUNE_LOAD

#if TUNE_MEM_PREFERENCE == 0
constexpr cub::BlockHistogramMemoryPreference MEM_PREFERENCE = cub::GMEM;
#elif TUNE_MEM_PREFERENCE == 1
constexpr cub::BlockHistogramMemoryPreference MEM_PREFERENCE = cub::SMEM;
#else // TUNE_MEM_PREFERENCE == 2
constexpr cub::BlockHistogramMemoryPreference MEM_PREFERENCE = cub::BLEND;
#endif // TUNE_MEM_PREFERENCE


template <typename SampleT, int NUM_ACTIVE_CHANNELS>
struct policy_hub_t
{
template <int NOMINAL_ITEMS_PER_THREAD>
struct TScale
{
enum
{
V_SCALE = (sizeof(SampleT) + sizeof(int) - 1) / sizeof(int),
VALUE = CUB_MAX((NOMINAL_ITEMS_PER_THREAD / NUM_ACTIVE_CHANNELS / V_SCALE), 1)
};
};

struct policy_t : cub::ChainedPolicy<350, policy_t, policy_t>
{
using AgentHistogramPolicyT = cub::AgentHistogramPolicy<TUNE_THREADS,
TScale<TUNE_ITEMS>::VALUE,
cub::BLOCK_LOAD_DIRECT,
TUNE_LOAD_MODIFIER,
TUNE_RLE_COMPRESS,
MEM_PREFERENCE,
TUNE_WORK_STEALING>;
};

using MaxPolicy = policy_t;
};
#endif // !TUNE_BASE

template <class SampleT, class OffsetT>
SampleT get_upper_level(OffsetT bins, OffsetT elements)
{
if constexpr (cuda::std::is_integral_v<SampleT>)
{
if constexpr (sizeof(SampleT) < sizeof(OffsetT))
{
const SampleT max_key = std::numeric_limits<SampleT>::max();
return static_cast<SampleT>(std::min(bins, static_cast<OffsetT>(max_key)));
}
else
{
return static_cast<SampleT>(bins);
}
}

return static_cast<SampleT>(elements);
}
68 changes: 1 addition & 67 deletions benchmarks/bench/histogram/multi/even.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#include <cub/device/device_histogram.cuh>

#include "../histogram_common.cuh"
#include <nvbench_helper.cuh>

// %RANGE% TUNE_ITEMS ipt 7:24:1
Expand All @@ -9,71 +8,6 @@
// %RANGE% TUNE_MEM_PREFERENCE mem 0:2:1
// %RANGE% TUNE_LOAD ld 0:2:1

#if !TUNE_BASE

#if TUNE_LOAD == 0
#define TUNE_LOAD_MODIFIER cub::LOAD_DEFAULT
#elif TUNE_LOAD == 1
#define TUNE_LOAD_MODIFIER cub::LOAD_LDG
#else // TUNE_LOAD == 2
#define TUNE_LOAD_MODIFIER cub::LOAD_CA
#endif // TUNE_LOAD

#if TUNE_MEM_PREFERENCE == 0
constexpr cub::BlockHistogramMemoryPreference MEM_PREFERENCE = cub::GMEM;
#elif TUNE_MEM_PREFERENCE == 1
constexpr cub::BlockHistogramMemoryPreference MEM_PREFERENCE = cub::SMEM;
#else // TUNE_MEM_PREFERENCE == 2
constexpr cub::BlockHistogramMemoryPreference MEM_PREFERENCE = cub::BLEND;
#endif // TUNE_MEM_PREFERENCE

template <typename SampleT, int NUM_ACTIVE_CHANNELS>
struct policy_hub_t
{
template <int NOMINAL_ITEMS_PER_THREAD>
struct TScale
{
enum
{
V_SCALE = (sizeof(SampleT) + sizeof(int) - 1) / sizeof(int),
VALUE = CUB_MAX((NOMINAL_ITEMS_PER_THREAD / NUM_ACTIVE_CHANNELS / V_SCALE), 1)
};
};

struct policy_t : cub::ChainedPolicy<350, policy_t, policy_t>
{
using AgentHistogramPolicyT = cub::AgentHistogramPolicy<TUNE_THREADS,
TScale<TUNE_ITEMS>::VALUE,
cub::BLOCK_LOAD_DIRECT,
TUNE_LOAD_MODIFIER,
TUNE_RLE_COMPRESS,
MEM_PREFERENCE,
TUNE_WORK_STEALING>;
};

using MaxPolicy = policy_t;
};
#endif // !TUNE_BASE

template <class SampleT, class OffsetT>
SampleT get_upper_level(OffsetT bins, OffsetT elements)
{
if constexpr (cuda::std::is_integral_v<SampleT>)
{
if constexpr (sizeof(SampleT) < sizeof(OffsetT))
{
const SampleT max_key = std::numeric_limits<SampleT>::max();
return static_cast<SampleT>(std::min(bins, static_cast<OffsetT>(max_key)));
}
else
{
return static_cast<SampleT>(bins);
}
}

return static_cast<SampleT>(elements);
}

template <typename SampleT, typename CounterT, typename OffsetT>
static void histogram(nvbench::state &state, nvbench::type_list<SampleT, CounterT, OffsetT>)
{
Expand Down
68 changes: 1 addition & 67 deletions benchmarks/bench/histogram/multi/range.cu
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#include <cub/device/device_histogram.cuh>

#include <thrust/sequence.h>

#include "../histogram_common.cuh"
#include <nvbench_helper.cuh>

// %RANGE% TUNE_ITEMS ipt 7:24:1
Expand All @@ -11,71 +10,6 @@
// %RANGE% TUNE_MEM_PREFERENCE mem 0:2:1
// %RANGE% TUNE_LOAD ld 0:2:1

#if !TUNE_BASE

#if TUNE_LOAD == 0
#define TUNE_LOAD_MODIFIER cub::LOAD_DEFAULT
#elif TUNE_LOAD == 1
#define TUNE_LOAD_MODIFIER cub::LOAD_LDG
#else // TUNE_LOAD == 2
#define TUNE_LOAD_MODIFIER cub::LOAD_CA
#endif // TUNE_LOAD

#if TUNE_MEM_PREFERENCE == 0
constexpr cub::BlockHistogramMemoryPreference MEM_PREFERENCE = cub::GMEM;
#elif TUNE_MEM_PREFERENCE == 1
constexpr cub::BlockHistogramMemoryPreference MEM_PREFERENCE = cub::SMEM;
#else // TUNE_MEM_PREFERENCE == 2
constexpr cub::BlockHistogramMemoryPreference MEM_PREFERENCE = cub::BLEND;
#endif // TUNE_MEM_PREFERENCE

template <typename SampleT, int NUM_ACTIVE_CHANNELS>
struct policy_hub_t
{
template <int NOMINAL_ITEMS_PER_THREAD>
struct TScale
{
enum
{
V_SCALE = (sizeof(SampleT) + sizeof(int) - 1) / sizeof(int),
VALUE = CUB_MAX((NOMINAL_ITEMS_PER_THREAD / NUM_ACTIVE_CHANNELS / V_SCALE), 1)
};
};

struct policy_t : cub::ChainedPolicy<350, policy_t, policy_t>
{
using AgentHistogramPolicyT = cub::AgentHistogramPolicy<TUNE_THREADS,
TScale<TUNE_ITEMS>::VALUE,
cub::BLOCK_LOAD_DIRECT,
TUNE_LOAD_MODIFIER,
TUNE_RLE_COMPRESS,
MEM_PREFERENCE,
TUNE_WORK_STEALING>;
};

using MaxPolicy = policy_t;
};
#endif // !TUNE_BASE

template <class SampleT, class OffsetT>
SampleT get_upper_level(OffsetT bins, OffsetT elements)
{
if constexpr (cuda::std::is_integral_v<SampleT>)
{
if constexpr (sizeof(SampleT) < sizeof(OffsetT))
{
const SampleT max_key = std::numeric_limits<SampleT>::max();
return static_cast<SampleT>(std::min(bins, static_cast<OffsetT>(max_key)));
}
else
{
return static_cast<SampleT>(bins);
}
}

return static_cast<SampleT>(elements);
}

template <typename SampleT, typename CounterT, typename OffsetT>
static void histogram(nvbench::state &state, nvbench::type_list<SampleT, CounterT, OffsetT>)
{
Expand Down
70 changes: 1 addition & 69 deletions benchmarks/bench/histogram/range.cu
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
#include <cub/device/device_histogram.cuh>

#include "histogram_common.cuh"
#include <thrust/sequence.h>

#include <nvbench_helper.cuh>

// %RANGE% TUNE_ITEMS ipt 7:24:1
Expand All @@ -11,72 +9,6 @@
// %RANGE% TUNE_MEM_PREFERENCE mem 0:2:1
// %RANGE% TUNE_LOAD ld 0:2:1

#if !TUNE_BASE

#if TUNE_LOAD == 0
#define TUNE_LOAD_MODIFIER cub::LOAD_DEFAULT
#elif TUNE_LOAD == 1
#define TUNE_LOAD_MODIFIER cub::LOAD_LDG
#else // TUNE_LOAD == 2
#define TUNE_LOAD_MODIFIER cub::LOAD_CA
#endif // TUNE_LOAD

#if TUNE_MEM_PREFERENCE == 0
constexpr cub::BlockHistogramMemoryPreference MEM_PREFERENCE = cub::GMEM;
#elif TUNE_MEM_PREFERENCE == 1
constexpr cub::BlockHistogramMemoryPreference MEM_PREFERENCE = cub::SMEM;
#else // TUNE_MEM_PREFERENCE == 2
constexpr cub::BlockHistogramMemoryPreference MEM_PREFERENCE = cub::BLEND;
#endif // TUNE_MEM_PREFERENCE


template <typename SampleT, int NUM_ACTIVE_CHANNELS>
struct policy_hub_t
{
template <int NOMINAL_ITEMS_PER_THREAD>
struct TScale
{
enum
{
V_SCALE = (sizeof(SampleT) + sizeof(int) - 1) / sizeof(int),
VALUE = CUB_MAX((NOMINAL_ITEMS_PER_THREAD / NUM_ACTIVE_CHANNELS / V_SCALE), 1)
};
};

struct policy_t : cub::ChainedPolicy<350, policy_t, policy_t>
{
using AgentHistogramPolicyT = cub::AgentHistogramPolicy<TUNE_THREADS,
TScale<TUNE_ITEMS>::VALUE,
cub::BLOCK_LOAD_DIRECT,
TUNE_LOAD_MODIFIER,
TUNE_RLE_COMPRESS,
MEM_PREFERENCE,
TUNE_WORK_STEALING>;
};

using MaxPolicy = policy_t;
};
#endif // !TUNE_BASE

template <class SampleT, class OffsetT>
SampleT get_upper_level(OffsetT bins, OffsetT elements)
{
if constexpr (cuda::std::is_integral_v<SampleT>)
{
if constexpr (sizeof(SampleT) < sizeof(OffsetT))
{
const SampleT max_key = std::numeric_limits<SampleT>::max();
return static_cast<SampleT>(std::min(bins, static_cast<OffsetT>(max_key)));
}
else
{
return static_cast<SampleT>(bins);
}
}

return static_cast<SampleT>(elements);
}

template <typename SampleT, typename CounterT, typename OffsetT>
static void histogram(nvbench::state &state, nvbench::type_list<SampleT, CounterT, OffsetT>)
{
Expand Down

0 comments on commit ec0b724

Please sign in to comment.