Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes cudaErrorInvalidValue when running on nvbench-created cuda stream #113

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
31 changes: 26 additions & 5 deletions nvbench/cuda_stream.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,13 @@
#pragma once

#include <nvbench/cuda_call.cuh>
#include <nvbench/detail/device_scope.cuh>
#include <nvbench/device_info.cuh>

#include <cuda_runtime_api.h>

#include <memory>
#include <optional>

namespace nvbench
{
Expand All @@ -39,18 +42,36 @@ namespace nvbench
struct cuda_stream
{
/**
* Constructs a cuda_stream that owns a new stream, created with
* `cudaStreamCreate`.
* Constructs a cuda_stream that owns a new stream, created with `cudaStreamCreate`.
*
* @param device The device that this stream should be associated with. If no device is provided,
* the stream will be associated with the device that is active at the call time.
*/
cuda_stream()
: m_stream{[]() {
explicit cuda_stream(std::optional<nvbench::device_info> device)
: m_stream{[device]() {
cudaStream_t s;
NVBENCH_CUDA_CALL(cudaStreamCreate(&s));
if (device.has_value())
{
nvbench::detail::device_scope scope_guard{device.value().get_id()};
NVBENCH_CUDA_CALL(cudaStreamCreate(&s));
}
else
{
NVBENCH_CUDA_CALL(cudaStreamCreate(&s));
}
return s;
}(),
stream_deleter{true}}
{}

/**
* @brief Constructs a new cuda_stream tha is asociated with the device that is active at the call
* time.
*/
cuda_stream()
: cuda_stream(std::nullopt)
{}

/**
* Constructs a `cuda_stream` from an explicit cudaStream_t.
*
Expand Down
3 changes: 2 additions & 1 deletion nvbench/state.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,6 @@ private:
std::optional<nvbench::device_info> device,
std::size_t type_config_index);

nvbench::cuda_stream m_cuda_stream;
std::reference_wrapper<const nvbench::benchmark_base> m_benchmark;
nvbench::named_values m_axis_values;
std::optional<nvbench::device_info> m_device;
Expand All @@ -277,6 +276,8 @@ private:
nvbench::float64_t m_skip_time;
nvbench::float64_t m_timeout;

nvbench::cuda_stream m_cuda_stream;

// Deadlock protection. See blocking_kernel's class doc for details.
nvbench::float64_t m_blocking_kernel_timeout{30.0};

Expand Down
2 changes: 2 additions & 0 deletions nvbench/state.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ state::state(const benchmark_base &bench)
, m_max_noise{bench.get_max_noise()}
, m_skip_time{bench.get_skip_time()}
, m_timeout{bench.get_timeout()}
, m_cuda_stream{std::nullopt}
{}

state::state(const benchmark_base &bench,
Expand All @@ -58,6 +59,7 @@ state::state(const benchmark_base &bench,
, m_max_noise{bench.get_max_noise()}
, m_skip_time{bench.get_skip_time()}
, m_timeout{bench.get_timeout()}
, m_cuda_stream{m_device}
{}

nvbench::int64_t state::get_int64(const std::string &axis_name) const
Expand Down
1 change: 1 addition & 0 deletions testing/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ set(test_srcs
benchmark.cu
create.cu
cuda_timer.cu
cuda_stream.cu
cpu_timer.cu
enum_type_list.cu
float64_axis.cu
Expand Down
72 changes: 72 additions & 0 deletions testing/cuda_stream.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
/*
* Copyright 2023 NVIDIA Corporation
*
* Licensed under the Apache License, Version 2.0 with the LLVM exception
* (the "License"); you may not use this file except in compliance with
* the License.
*
* You may obtain a copy of the License at
*
* http://llvm.org/foundation/relicensing/LICENSE.txt
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <nvbench/cuda_stream.cuh>
#include <nvbench/device_manager.cuh>
#include <nvbench/types.cuh>

#include "test_asserts.cuh"

#include <fmt/format.h>

namespace
{
/**
* @brief Queries and returns the device id that the given \p cuda_stream is associated with
*
* @param cuda_stream The stream to get the device id for
* @return The device id that \p cuda_stream is associated with
*/
int get_device_of_stream(cudaStream_t cuda_stream)
{
CUcontext ctx;
NVBENCH_DRIVER_API_CALL(cuStreamGetCtx(CUstream{cuda_stream}, &ctx));
NVBENCH_DRIVER_API_CALL(cuCtxPushCurrent(ctx));
CUdevice device_id{};
NVBENCH_DRIVER_API_CALL(cuCtxGetDevice(&device_id));
NVBENCH_DRIVER_API_CALL(cuCtxPopCurrent(&ctx));
return static_cast<int>(device_id);
elstehle marked this conversation as resolved.
Show resolved Hide resolved
}
} // namespace

void test_basic()
{
// Get devices
auto devices = nvbench::device_manager::get().get_devices();

// Iterate over devices
for (auto const &device_info : devices)
{
// Create stream on the device before it becomes the active device
nvbench::cuda_stream device_stream(device_info);

// Verify cuda stream is associated with the correct cuda device
ASSERT(get_device_of_stream(device_stream.get_stream()) == device_info.get_id());

// Set the device as active device
device_info.set_active();

// Create the stream (implicitly) on the device that is currently active
nvbench::cuda_stream current_device_stream{};

// Verify the cuda stream was in fact associated with the currently active device
ASSERT(get_device_of_stream(current_device_stream.get_stream()) == device_info.get_id());
}
}

int main() { test_basic(); }