Skip to content

Commit

Permalink
add cudaDeviceSynchronize.cpp
Browse files Browse the repository at this point in the history
  • Loading branch information
cwpearson committed Jul 10, 2023
1 parent baa5a55 commit 12c101a
Showing 1 changed file with 49 additions and 0 deletions.
49 changes: 49 additions & 0 deletions src/libcudart/cudaDeviceSynchronize.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/*! \file sync.cpp Measure the runtime cost of cudaDeviceSynchronize
*/

#include "scope/scope.hpp"

#define NAME "Comm_cudaDeviceSynchronize"

auto Comm_cudart_kernel = [](benchmark::State &state, const int gpu,
const int numaId) {
numa::ScopedBind binder(numaId);

if (0 == state.thread_index()) {
OR_SKIP_AND_RETURN(scope::cuda_reset_device(gpu), "failed to reset CUDA device");
}

OR_SKIP_AND_RETURN(cudaSetDevice(gpu), "");
OR_SKIP_AND_RETURN(cudaFree(0), "failed to init");

cudaError_t err = cudaSuccess;
for (auto _ : state) {
err = cudaDeviceSynchronize();
}

OR_SKIP_AND_RETURN(err, "failed to lsync");

state.SetItemsProcessed(state.iterations());
state.counters["gpu"] = gpu;
};

static void registerer() {
std::string name;
const std::vector<Device> cudas = scope::system::cuda_devices();
for (size_t i = 0; i < cudas.size(); ++i) {
for (int numaId : numa::mems()) {
for (size_t numThreads = 1;
numThreads <= numa::cpus_in_node(numaId).size(); numThreads *= 2) {
int gpu = cudas[i];
name = std::string(NAME) + "/" + std::to_string(numaId) + "/" +
std::to_string(gpu);
benchmark::RegisterBenchmark(name.c_str(), Comm_cudart_kernel, gpu,
numaId)
->Threads(numThreads)
->UseRealTime();
}
}
}
}

SCOPE_AFTER_INIT(registerer, NAME);

0 comments on commit 12c101a

Please sign in to comment.