Skip to content

Commit

Permalink
Adjust benches
Browse files Browse the repository at this point in the history
  • Loading branch information
gevtushenko committed May 23, 2023
1 parent 3e40ecd commit bcbb6d1
Show file tree
Hide file tree
Showing 12 changed files with 28 additions and 15 deletions.
4 changes: 3 additions & 1 deletion benchmarks/bench/partition/flagged.cu
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,15 @@
// %RANGE% TUNE_LOAD ld 0:1:1
// %RANGE% TUNE_ITEMS_PER_THREAD ipt 7:24:1
// %RANGE% TUNE_THREADS_PER_BLOCK tpb 128:1024:32
// %RANGE% CUB_DETAIL_L2_BACKOFF_NS l2b 0:1200:5
// %RANGE% CUB_DETAIL_INITIAL_L2_BACKOFF_NS_POW2 l2b 3:8:1
// %RANGE% CUB_DETAIL_L2_WRITE_LATENCY_NS l2w 0:1200:5

constexpr bool keep_rejects = true;
constexpr bool may_alias = false;

#if !TUNE_BASE
#define CUB_DETAIL_INITIAL_L2_BACKOFF_NS (1 << CUB_DETAIL_INITIAL_L2_BACKOFF_NS_POW2)

#if TUNE_TRANSPOSE == 0
#define TUNE_LOAD_ALGORITHM cub::BLOCK_LOAD_DIRECT
#else // TUNE_TRANSPOSE == 1
Expand Down
4 changes: 3 additions & 1 deletion benchmarks/bench/partition/if.cu
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,15 @@
// %RANGE% TUNE_LOAD ld 0:1:1
// %RANGE% TUNE_ITEMS_PER_THREAD ipt 7:24:1
// %RANGE% TUNE_THREADS_PER_BLOCK tpb 128:1024:32
// %RANGE% CUB_DETAIL_L2_BACKOFF_NS l2b 0:1200:5
// %RANGE% CUB_DETAIL_INITIAL_L2_BACKOFF_NS_POW2 l2b 3:8:1
// %RANGE% CUB_DETAIL_L2_WRITE_LATENCY_NS l2w 0:1200:5

constexpr bool keep_rejects = true;
constexpr bool may_alias = false;

#if !TUNE_BASE
#define CUB_DETAIL_INITIAL_L2_BACKOFF_NS (1 << CUB_DETAIL_INITIAL_L2_BACKOFF_NS_POW2)

#if TUNE_TRANSPOSE == 0
#define TUNE_LOAD_ALGORITHM cub::BLOCK_LOAD_DIRECT
#else // TUNE_TRANSPOSE == 1
Expand Down
4 changes: 3 additions & 1 deletion benchmarks/bench/reduce/by_key.cu
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@
// %RANGE% TUNE_THREADS tpb 128:1024:32
// %RANGE% TUNE_TRANSPOSE trp 0:1:1
// %RANGE% TUNE_LOAD ld 0:1:1
// %RANGE% CUB_DETAIL_L2_BACKOFF_NS l2b 0:1200:5
// %RANGE% CUB_DETAIL_INITIAL_L2_BACKOFF_NS_POW2 l2b 3:8:1
// %RANGE% CUB_DETAIL_L2_WRITE_LATENCY_NS l2w 0:1200:5

#if !TUNE_BASE
#define CUB_DETAIL_INITIAL_L2_BACKOFF_NS (1 << CUB_DETAIL_INITIAL_L2_BACKOFF_NS_POW2)

#if TUNE_TRANSPOSE == 0
#define TUNE_LOAD_ALGORITHM cub::BLOCK_LOAD_DIRECT
#else // TUNE_TRANSPOSE == 1
Expand Down
4 changes: 3 additions & 1 deletion benchmarks/bench/run_length_encode/encode.cu
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@
// %RANGE% TUNE_THREADS tpb 128:1024:32
// %RANGE% TUNE_TRANSPOSE trp 0:1:1
// %RANGE% TUNE_LOAD ld 0:1:1
// %RANGE% CUB_DETAIL_L2_BACKOFF_NS l2b 0:1200:5
// %RANGE% CUB_DETAIL_INITIAL_L2_BACKOFF_NS_POW2 l2b 3:8:1
// %RANGE% CUB_DETAIL_L2_WRITE_LATENCY_NS l2w 0:1200:5

#if !TUNE_BASE
#define CUB_DETAIL_INITIAL_L2_BACKOFF_NS (1 << CUB_DETAIL_INITIAL_L2_BACKOFF_NS_POW2)

#if TUNE_TRANSPOSE == 0
#define TUNE_LOAD_ALGORITHM cub::BLOCK_LOAD_DIRECT
#else // TUNE_TRANSPOSE == 1
Expand Down
4 changes: 3 additions & 1 deletion benchmarks/bench/run_length_encode/non_trivial_runs.cu
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,14 @@
// %RANGE% TUNE_TRANSPOSE trp 0:1:1
// %RANGE% TUNE_TIME_SLICING ts 0:1:1
// %RANGE% TUNE_LOAD ld 0:1:1
// %RANGE% CUB_DETAIL_L2_BACKOFF_NS l2b 0:1200:5
// %RANGE% CUB_DETAIL_INITIAL_L2_BACKOFF_NS_POW2 l2b 3:8:1
// %RANGE% CUB_DETAIL_L2_WRITE_LATENCY_NS l2w 0:1200:5

#include <cub/device/device_run_length_encode.cuh>

#if !TUNE_BASE
#define CUB_DETAIL_INITIAL_L2_BACKOFF_NS (1 << CUB_DETAIL_INITIAL_L2_BACKOFF_NS_POW2)

#if TUNE_TRANSPOSE == 0
#define TUNE_LOAD_ALGORITHM cub::BLOCK_LOAD_DIRECT
#else // TUNE_TRANSPOSE == 1
Expand Down
2 changes: 2 additions & 0 deletions benchmarks/bench/scan/exclusive/base.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
#include <type_traits>

#if !TUNE_BASE
#define CUB_DETAIL_INITIAL_L2_BACKOFF_NS (1 << CUB_DETAIL_INITIAL_L2_BACKOFF_NS_POW2)

template <typename AccumT>
struct policy_hub_t
{
Expand Down
4 changes: 3 additions & 1 deletion benchmarks/bench/scan/exclusive/by_key.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,16 @@

// %RANGE% TUNE_ITEMS ipt 7:24:1
// %RANGE% TUNE_THREADS tpb 128:1024:32
// %RANGE% CUB_DETAIL_L2_BACKOFF_NS l2b 0:1200:5
// %RANGE% CUB_DETAIL_INITIAL_L2_BACKOFF_NS_POW2 l2b 3:8:1
// %RANGE% CUB_DETAIL_L2_WRITE_LATENCY_NS l2w 0:1200:5

#include <cub/device/device_scan.cuh>

#include <type_traits>

#if !TUNE_BASE
#define CUB_DETAIL_INITIAL_L2_BACKOFF_NS (1 << CUB_DETAIL_INITIAL_L2_BACKOFF_NS_POW2)

struct policy_hub_t
{
struct policy_t : cub::ChainedPolicy<300, policy_t, policy_t>
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/bench/scan/exclusive/max.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

// %RANGE% TUNE_ITEMS ipt 7:24:1
// %RANGE% TUNE_THREADS tpb 128:1024:32
// %RANGE% CUB_DETAIL_L2_BACKOFF_NS l2b 0:1200:5
// %RANGE% CUB_DETAIL_INITIAL_L2_BACKOFF_NS_POW2 l2b 3:8:1
// %RANGE% CUB_DETAIL_L2_WRITE_LATENCY_NS l2w 0:1200:5

using op_t = max_t;
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/bench/scan/exclusive/sum.cu
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// %RANGE% TUNE_ITEMS ipt 7:24:1
// %RANGE% TUNE_THREADS tpb 128:1024:32
// %RANGE% CUB_DETAIL_L2_BACKOFF_NS l2b 0:1200:5
// %RANGE% CUB_DETAIL_INITIAL_L2_BACKOFF_NS_POW2 l2b 3:8:1
// %RANGE% CUB_DETAIL_L2_WRITE_LATENCY_NS l2w 0:1200:5

#include <nvbench_helper.cuh>
Expand Down
4 changes: 3 additions & 1 deletion benchmarks/bench/select/flagged.cu
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,15 @@
// %RANGE% TUNE_LOAD ld 0:1:1
// %RANGE% TUNE_ITEMS_PER_THREAD ipt 7:24:1
// %RANGE% TUNE_THREADS_PER_BLOCK tpb 128:1024:32
// %RANGE% CUB_DETAIL_L2_BACKOFF_NS l2b 0:1200:5
// %RANGE% CUB_DETAIL_INITIAL_L2_BACKOFF_NS_POW2 l2b 3:8:1
// %RANGE% CUB_DETAIL_L2_WRITE_LATENCY_NS l2w 0:1200:5

constexpr bool keep_rejects = false;
constexpr bool may_alias = false;

#if !TUNE_BASE
#define CUB_DETAIL_INITIAL_L2_BACKOFF_NS (1 << CUB_DETAIL_INITIAL_L2_BACKOFF_NS_POW2)

#if TUNE_TRANSPOSE == 0
#define TUNE_LOAD_ALGORITHM cub::BLOCK_LOAD_DIRECT
#else // TUNE_TRANSPOSE == 1
Expand Down
4 changes: 3 additions & 1 deletion benchmarks/bench/select/if.cu
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,15 @@
// %RANGE% TUNE_LOAD ld 0:1:1
// %RANGE% TUNE_ITEMS_PER_THREAD ipt 7:24:1
// %RANGE% TUNE_THREADS_PER_BLOCK tpb 128:1024:32
// %RANGE% CUB_DETAIL_L2_BACKOFF_NS l2b 0:1200:5
// %RANGE% CUB_DETAIL_INITIAL_L2_BACKOFF_NS_POW2 l2b 3:8:1
// %RANGE% CUB_DETAIL_L2_WRITE_LATENCY_NS l2w 0:1200:5

constexpr bool keep_rejects = false;
constexpr bool may_alias = false;

#if !TUNE_BASE
#define CUB_DETAIL_INITIAL_L2_BACKOFF_NS (1 << CUB_DETAIL_INITIAL_L2_BACKOFF_NS_POW2)

#if TUNE_TRANSPOSE == 0
#define TUNE_LOAD_ALGORITHM cub::BLOCK_LOAD_DIRECT
#else // TUNE_TRANSPOSE == 1
Expand Down
5 changes: 0 additions & 5 deletions cub/agent/single_pass_scan_operators.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -52,13 +52,8 @@ CUB_NAMESPACE_BEGIN
#define CUB_DETAIL_INITIAL_L2_BACKOFF_NS 8
#endif

#define CUB_DETAIL_DEFAULT_L2_BACKOFF_NS 350
#define CUB_DETAIL_DEFAULT_L2_WRITE_LATENCY_NS 450

#ifndef CUB_DETAIL_L2_BACKOFF_NS
#define CUB_DETAIL_L2_BACKOFF_NS CUB_DETAIL_DEFAULT_L2_BACKOFF_NS
#endif

#ifndef CUB_DETAIL_L2_WRITE_LATENCY_NS
#define CUB_DETAIL_L2_WRITE_LATENCY_NS CUB_DETAIL_DEFAULT_L2_WRITE_LATENCY_NS
#endif
Expand Down

0 comments on commit bcbb6d1

Please sign in to comment.