Skip to content

Commit

Permalink
hip kernel launch and hipDeviceSynchronize
Browse files Browse the repository at this point in the history
  • Loading branch information
cwpearson committed Jul 10, 2023
1 parent 2c53e32 commit 884b98d
Show file tree
Hide file tree
Showing 5 changed files with 162 additions and 5 deletions.
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ add_subdirectory(cudaMemcpyPeerAsync)
add_subdirectory(cudaMemcpyPeerAsync-duplex)
add_subdirectory(demand)
add_subdirectory(demand-duplex)
add_subdirectory(hip)
add_subdirectory(hipMemcpy)
add_subdirectory(hipMemcpyAsync)
add_subdirectory(hipMemcpyAsync-duplex)
Expand Down
6 changes: 6 additions & 0 deletions src/hip/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
if (SCOPE_USE_HIP)
set(comm_SOURCES ${comm_SOURCES}
${CMAKE_CURRENT_LIST_DIR}/hipDeviceSynchronize.cpp
${CMAKE_CURRENT_LIST_DIR}/kernel.cpp
PARENT_SCOPE)
endif()
50 changes: 50 additions & 0 deletions src/hip/hipDeviceSynchronize.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
/*! \file sync.cpp Measure the runtime cost of hipDeviceSynchronize
*/

#include "scope/scope.hpp"

#define NAME "Comm_hipDeviceSynchronize"

auto Comm_hipDeviceSynchronize = [](benchmark::State &state, const int gpu,
const int numaId) {
numa::ScopedBind binder(numaId);

if (0 == state.thread_index()) {
OR_SKIP_AND_RETURN(scope::hip_reset_device(gpu),
"failed to reset HIP device");
}

OR_SKIP_AND_RETURN(hipSetDevice(gpu), "");
OR_SKIP_AND_RETURN(hipFree(0), "failed to init");

hipError_t err = hipSuccess;
for (auto _ : state) {
err = hipDeviceSynchronize();
}

OR_SKIP_AND_RETURN(err, "failed to lsync");

state.SetItemsProcessed(state.iterations());
state.counters["gpu"] = gpu;
};

static void registerer() {
std::string name;
const std::vector<Device> hips = scope::system::hip_devices();
for (size_t i = 0; i < hips.size(); ++i) {
for (int numaId : numa::mems()) {
for (size_t numThreads = 1;
numThreads <= numa::cpus_in_node(numaId).size(); numThreads *= 2) {
int gpu = hips[i];
name = std::string(NAME) + "/" + std::to_string(numaId) + "/" +
std::to_string(gpu);
benchmark::RegisterBenchmark(name.c_str(), Comm_hipDeviceSynchronize,
gpu, numaId)
->Threads(numThreads)
->UseRealTime();
}
}
}
}

SCOPE_AFTER_INIT(registerer, NAME);
99 changes: 99 additions & 0 deletions src/hip/kernel.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
/* Measure the runtime cost of a kernel launch
*/

#include "scope/scope.hpp"

#define NAME "Comm_hip_kernel"

// helper for passing some number of bytes by value
template <unsigned N> struct S {
char bytes[N];
};
template <> struct S<0> {};

template <unsigned N> __global__ void Comm_hip_kernel_kernel(S<N> s) {
(void)s;
}

auto Comm_hip_kernel = [](benchmark::State &state, const int gpu,
const int numaId) {
numa::ScopedBind binder(numaId);

if (0 == state.thread_index()) {
OR_SKIP_AND_RETURN(scope::hip_reset_device(gpu),
"failed to reset HIP device");
}

OR_SKIP_AND_RETURN(hipSetDevice(gpu), "");
OR_SKIP_AND_RETURN(hipFree(0), "failed to init");

const size_t nArgs = state.range(0);

#define LAUNCH(n) \
case n: { \
Comm_hip_kernel_kernel<(n)><<<1, 1>>>(S<(n)>()); \
break; \
}

for (auto _ : state) {
// Start copy
switch (nArgs) {
LAUNCH(0)
LAUNCH(1)
LAUNCH(4)
LAUNCH(8)
LAUNCH(32)
LAUNCH(64)
LAUNCH(96)
LAUNCH(128)
LAUNCH(256)
LAUNCH(512)
LAUNCH(1024)
LAUNCH(2048)
LAUNCH(4096)
default: {
state.SkipWithError("unexpected number of params");
break;
}
}
}
#undef LAUNCH
OR_SKIP_AND_RETURN(hipGetLastError(), "failed to launch kernel");

state.SetItemsProcessed(state.iterations());
state.counters["gpu"] = gpu;
OR_SKIP_AND_RETURN(hipDeviceSynchronize(), "failed to synchronize");
};

static void registerer() {
std::string name;
const std::vector<Device> hips = scope::system::hip_devices();
for (size_t i = 0; i < hips.size(); ++i) {
for (int numaId : numa::mems()) {
for (size_t numThreads = 1;
numThreads <= numa::cpus_in_node(numaId).size(); numThreads *= 2) {
int gpu = hips[i];
name = std::string(NAME) + "/" + std::to_string(numaId) + "/" +
std::to_string(gpu);
benchmark::RegisterBenchmark(name.c_str(), Comm_hip_kernel, gpu, numaId)
->Arg(0)
->Arg(1)
->Arg(4)
->Arg(8)
->Arg(32)
->Arg(64)
->Arg(96)
->Arg(128)
->Arg(256)
->Arg(512)
->Arg(1024)
->Arg(2048)
->Arg(4096)
->Threads(numThreads)
->UseRealTime();
}
}
}
}

SCOPE_AFTER_INIT(registerer, NAME);
11 changes: 6 additions & 5 deletions src/libcudart/cudaDeviceSynchronize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,13 @@

#define NAME "Comm_cudaDeviceSynchronize"

auto Comm_cudart_kernel = [](benchmark::State &state, const int gpu,
const int numaId) {
auto Comm_cudaDeviceSynchronize = [](benchmark::State &state, const int gpu,
const int numaId) {
numa::ScopedBind binder(numaId);

if (0 == state.thread_index()) {
OR_SKIP_AND_RETURN(scope::cuda_reset_device(gpu), "failed to reset CUDA device");
OR_SKIP_AND_RETURN(scope::cuda_reset_device(gpu),
"failed to reset CUDA device");
}

OR_SKIP_AND_RETURN(cudaSetDevice(gpu), "");
Expand All @@ -37,8 +38,8 @@ static void registerer() {
int gpu = cudas[i];
name = std::string(NAME) + "/" + std::to_string(numaId) + "/" +
std::to_string(gpu);
benchmark::RegisterBenchmark(name.c_str(), Comm_cudart_kernel, gpu,
numaId)
benchmark::RegisterBenchmark(name.c_str(), Comm_cudaDeviceSynchronize,
gpu, numaId)
->Threads(numThreads)
->UseRealTime();
}
Expand Down

0 comments on commit 884b98d

Please sign in to comment.