Skip to content

Commit

Permalink
Build one ct workload at a time
Browse files Browse the repository at this point in the history
  • Loading branch information
gevtushenko committed May 2, 2023
1 parent d14b2cd commit 8f92f51
Show file tree
Hide file tree
Showing 7 changed files with 70 additions and 36 deletions.
2 changes: 1 addition & 1 deletion benchmarks/bench/reduce/max.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

// %RANGE% TUNE_ITEMS_PER_THREAD ipt 7:24:1
// %RANGE% TUNE_THREADS_PER_BLOCK tpb 128:1024:32
// %RANGE% TUNE_ITEMS_PER_VEC_LOAD_POW2 ipv 1:3:1
// %RANGE% TUNE_ITEMS_PER_VEC_LOAD_POW2 ipv 1:2:1

using op_t = max_t;
#include "base.cuh"
2 changes: 1 addition & 1 deletion benchmarks/bench/reduce/sum.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

// %RANGE% TUNE_ITEMS_PER_THREAD ipt 7:24:1
// %RANGE% TUNE_THREADS_PER_BLOCK tpb 128:1024:32
// %RANGE% TUNE_ITEMS_PER_VEC_LOAD_POW2 ipv 1:3:1
// %RANGE% TUNE_ITEMS_PER_VEC_LOAD_POW2 ipv 1:2:1

using op_t = cub::Sum;
#include "base.cuh"
22 changes: 11 additions & 11 deletions benchmarks/nvbench_helper/nvbench_helper.cu
Original file line number Diff line number Diff line change
Expand Up @@ -280,17 +280,17 @@ void gen(seed_t seed, thrust::device_vector<T> &data, bit_entropy entropy, T min

#define INSTANTIATE(TYPE) INSTANTIATE_RND(TYPE);

INSTANTIATE(std::uint8_t);
INSTANTIATE(std::uint16_t);
INSTANTIATE(std::uint32_t);
INSTANTIATE(std::uint64_t);
INSTANTIATE(__uint128_t);

INSTANTIATE(std::int8_t);
INSTANTIATE(std::int16_t);
INSTANTIATE(std::int32_t);
INSTANTIATE(std::int64_t);
INSTANTIATE(__int128_t);
INSTANTIATE(uint8_t);
INSTANTIATE(uint16_t);
INSTANTIATE(uint32_t);
INSTANTIATE(uint64_t);
INSTANTIATE(uint128_t);

INSTANTIATE(int8_t);
INSTANTIATE(int16_t);
INSTANTIATE(int32_t);
INSTANTIATE(int64_t);
INSTANTIATE(int128_t);

INSTANTIATE(float);
INSTANTIATE(double);
Expand Down
39 changes: 21 additions & 18 deletions benchmarks/nvbench_helper/nvbench_helper.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,12 @@
#include <nvbench/nvbench.cuh>

using complex = cuda::std::complex<float>;
using int128_t = __int128_t;
using uint128_t = __uint128_t;

NVBENCH_DECLARE_TYPE_STRINGS(__int128_t, "I128", "int128_t");
NVBENCH_DECLARE_TYPE_STRINGS(__uint128_t, "U128", "uint128_t");
NVBENCH_DECLARE_TYPE_STRINGS(complex, "C64", "complex<float>");
NVBENCH_DECLARE_TYPE_STRINGS(int128_t, "I128", "int128_t");
NVBENCH_DECLARE_TYPE_STRINGS(uint128_t, "U128", "uint128_t");
NVBENCH_DECLARE_TYPE_STRINGS(complex, "C64", "complex");

namespace detail
{
Expand All @@ -33,28 +35,29 @@ struct push_back<T, nvbench::type_list<As...>>
template <class T, class List>
using push_back_t = typename detail::push_back<T, List>::type;

#ifdef TUNE_OFFSET_TYPE
using offset_types = nvbench::type_list<TUNE_OFFSET_TYPE>;
#ifdef TUNE_OffsetT
using offset_types = nvbench::type_list<TUNE_OffsetT>;
#else
using offset_types = nvbench::type_list<nvbench::int32_t, nvbench::int64_t>;
using offset_types = nvbench::type_list<int32_t, int64_t>;
#endif

#ifdef TUNE_TYPE
using all_value_types = nvbench::type_list<TUNE_TYPE>;
#ifdef TUNE_T
using fundamental_types = nvbench::type_list<TUNE_T>;
using all_types = nvbench::type_list<TUNE_T>;
#else
using fundamental_types = nvbench::type_list<nvbench::int8_t,
nvbench::int16_t,
nvbench::int32_t,
nvbench::int64_t,
__int128_t,
using fundamental_types = nvbench::type_list<int8_t,
int16_t,
int32_t,
int64_t,
int128_t,
float,
double>;

using all_types = nvbench::type_list<nvbench::int8_t,
nvbench::int16_t,
nvbench::int32_t,
nvbench::int64_t,
__int128_t,
using all_types = nvbench::type_list<int8_t,
int16_t,
int32_t,
int64_t,
int128_t,
float,
double,
complex>;
Expand Down
36 changes: 34 additions & 2 deletions benchmarks/scripts/cub/bench/bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,9 +244,10 @@ def pull_elapsed(self, bench, workload_point):


class Bench:
def __init__(self, algorithm_name, variant):
def __init__(self, algorithm_name, variant, ct_workload):
self.algname = algorithm_name
self.variant = variant
self.ct_workload = ct_workload

def label(self):
return self.algname + '.' + self.variant.label()
Expand Down Expand Up @@ -315,6 +316,24 @@ def axes_values(self, sub_space, ct):

return space

def axes_value_descriptions(self):
result = json_benches(self.algname)

if len(result["benchmarks"]) != 1:
raise Exception("Executable should contain exactly one benchmark")

descriptions = {}
for axis in result["benchmarks"][0]["axes"]:
name = axis["name"]
if axis["flags"]:
name = name + "[{}]".format(axis["flags"])
descriptions[name] = {}
for value in axis["values"]:
descriptions[name][value["input_string"]] = value["description"]

return descriptions


def axis_values(self, axis_name):
result = json_benches(self.algname)

Expand All @@ -341,6 +360,19 @@ def axis_values(self, axis_name):
def build(self):
build = CMake().build(self)
return build.code == 0

def definitions(self):
definitions = self.variant.tuning()
definitions = definitions + "\n"

descriptions = self.axes_value_descriptions()
for ct_component in self.ct_workload:
ct_axis_name, ct_value = ct_component.split('=')
description = descriptions[ct_axis_name][ct_value]
ct_axis_name = ct_axis_name.replace('{ct}', '')
definitions = definitions + "#define TUNE_{} {}\n".format(ct_axis_name, description)

return definitions

def do_run(self, point, timeout):
try:
Expand Down Expand Up @@ -483,4 +515,4 @@ def score(self, ct_workload, rt_workload_space, base_estimator, variant_estimato

class BaseBench(Bench):
def __init__(self, algname):
super().__init__(algname, BasePoint())
super().__init__(algname, BasePoint(), [])
3 changes: 1 addition & 2 deletions benchmarks/scripts/cub/bench/cmake.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,8 @@ def __init__(self):
def do_build(self, bench, timeout):
try:
if not bench.is_base():
# TODO populate tuning file
with open(bench.exe_name() + ".h", "w") as f:
f.writelines(bench.variant.tuning())
f.writelines(bench.definitions())

cmd = ["cmake", "--build", ".", "--target", bench.exe_name()]

Expand Down
2 changes: 1 addition & 1 deletion benchmarks/scripts/cub/bench/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def __call__(self, algname, ct_workload_space, rt_workload_space):

for ct_workload in ct_workload_space:
for variant in variants:
bench = Bench(algname, variant)
bench = Bench(algname, variant, list(ct_workload))
score = bench.score(ct_workload,
rt_workload_space,
self.base_center_estimator,
Expand Down

0 comments on commit 8f92f51

Please sign in to comment.