Skip to content

Commit

Permalink
Ensure that cuda_memory_resource allocates memory on the proper dev…
Browse files Browse the repository at this point in the history
…ice (#2073)

* Ensure that `cuda_memory_resource` allocates memory on the proper device

* Move `__ensure_current_device` to own header
  • Loading branch information
miscco authored Aug 1, 2024
1 parent 2600135 commit 39b926a
Show file tree
Hide file tree
Showing 4 changed files with 99 additions and 38 deletions.
32 changes: 4 additions & 28 deletions cub/cub/util_device.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
// for backward compatibility
#include <cub/util_temporary_storage.cuh>

#include <cuda/std/__cuda/ensure_current_device.h>
#include <cuda/std/type_traits>
#include <cuda/std/utility>

Expand Down Expand Up @@ -105,36 +106,11 @@ CUB_RUNTIME_FUNCTION inline int CurrentDevice()
}

#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
/**
* \brief RAII helper which saves the current device and switches to the
* specified device on construction and switches to the saved device on
* destruction.
*/
struct SwitchDevice
{
private:
int const old_device;
bool const needs_reset;

public:
_CCCL_HOST inline SwitchDevice(int new_device)
: old_device(CurrentDevice())
, needs_reset(old_device != new_device)
{
if (needs_reset)
{
CubDebug(cudaSetDevice(new_device));
}
}
//! @brief RAII helper which saves the current device and switches to the specified device on construction and switches
//! to the saved device on destruction.
using SwitchDevice = ::cuda::__ensure_current_device;

_CCCL_HOST inline ~SwitchDevice()
{
if (needs_reset)
{
CubDebug(cudaSetDevice(old_device));
}
}
};
#endif // DOXYGEN_SHOULD_SKIP_THIS

/**
Expand Down
34 changes: 27 additions & 7 deletions libcudacxx/include/cuda/__memory_resource/cuda_memory_resource.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,30 @@
# include <cuda/__memory_resource/resource.h>
# include <cuda/__memory_resource/resource_ref.h>
# include <cuda/std/__cuda/api_wrapper.h>
# include <cuda/std/__cuda/ensure_current_device.h>
# include <cuda/std/__new/bad_alloc.h>

# if _CCCL_STD_VER >= 2014

_LIBCUDACXX_BEGIN_NAMESPACE_CUDA_MR

//! @brief cuda_memory_resource uses `cudaMalloc` / `cudaFree` for allocation / deallocation.
struct cuda_memory_resource
//! By default uses device 0 to allocate memory
class cuda_memory_resource
{
private:
int __device_id_{0};

public:
//! @brief default constructs a cuda_memory_resource allocating memory on device 0
cuda_memory_resource() = default;

//! @brief default constructs a cuda_memory_resource allocating memory on device \p __device_id
//! @param __device_id The id of the device we are allocating memory on
constexpr cuda_memory_resource(const int __device_id) noexcept
: __device_id_(__device_id)
{}

//! @brief Allocate device memory of size at least \p __bytes.
//! @param __bytes The size in bytes of the allocation.
//! @param __alignment The requested alignment of the allocation.
Expand All @@ -54,6 +69,9 @@ struct cuda_memory_resource
_CUDA_VSTD::__throw_bad_alloc();
}

// We need to ensure that we allocate on the right device as `cudaMalloc` always uses the current device
__ensure_current_device __device_wrapper{__device_id_};

void* __ptr{nullptr};
_CCCL_TRY_CUDA_API(::cudaMalloc, "Failed to allocate memory with cudaMalloc.", &__ptr, __bytes);
return __ptr;
Expand All @@ -73,17 +91,19 @@ struct cuda_memory_resource
}

//! @brief Equality comparison with another \c cuda_memory_resource
//! @return true
_CCCL_NODISCARD constexpr bool operator==(cuda_memory_resource const&) const noexcept
//! @param __other The other \c cuda_memory_resource
//! @return true, if both resources hold the same device id
_CCCL_NODISCARD constexpr bool operator==(cuda_memory_resource const& __other) const noexcept
{
return true;
return __device_id_ == __other.__device_id_;
}
# if _CCCL_STD_VER <= 2017
//! @brief Inequality comparison with another \c cuda_memory_resource
//! @return false
_CCCL_NODISCARD constexpr bool operator!=(cuda_memory_resource const&) const noexcept
//! @param __other The other \c cuda_memory_resource
//! @return true, if both resources hold different device id's
_CCCL_NODISCARD constexpr bool operator!=(cuda_memory_resource const& __other) const noexcept
{
return false;
return __device_id_ != __other.__device_id_;
}
# endif // _CCCL_STD_VER <= 2017

Expand Down
65 changes: 65 additions & 0 deletions libcudacxx/include/cuda/std/__cuda/ensure_current_device.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
//===----------------------------------------------------------------------===//
//
// Part of libcu++, the C++ Standard Library for your entire system,
// under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
//
//===----------------------------------------------------------------------===//

#ifndef _CUDA__STD__CUDA_ENSURE_CURRENT_DEVICE_H
#define _CUDA__STD__CUDA_ENSURE_CURRENT_DEVICE_H

#include <cuda/std/detail/__config>

#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
# pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
# pragma clang system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
# pragma system_header
#endif // no system header

#if !defined(_CCCL_CUDA_COMPILER_NVCC) && !defined(_CCCL_CUDA_COMPILER_NVHPC)
# include <cuda_runtime_api.h>
#endif // !_CCCL_CUDA_COMPILER_NVCC && !_CCCL_CUDA_COMPILER_NVHPC

#include <cuda/std/__cuda/api_wrapper.h>
#include <cuda/std/__exception/cuda_error.h>

_LIBCUDACXX_BEGIN_NAMESPACE_CUDA

//! @brief `__ensure_current_device` is a simple helper that the current device is set to the right one.
//! Only changes the current device if the target device is not the current one
struct __ensure_current_device
{
int __target_device_ = 0;
int __original_device_ = 0;

//! @brief Querries the current device and if that is different than \p __target_device sets the current device to
//! \p __target_device
__ensure_current_device(const int __target_device)
: __target_device_(__target_device)
{
_CCCL_TRY_CUDA_API(::cudaGetDevice, "Failed to query current device", &__original_device_);
if (__original_device_ != __target_device_)
{
_CCCL_TRY_CUDA_API(::cudaSetDevice, "Failed to set device", __target_device_);
}
}

//! @brief If the \p __original_device was not equal to \p __target_device sets the current device back to
//! \p __original_device
~__ensure_current_device()
{
if (__original_device_ != __target_device_)
{
_CCCL_TRY_CUDA_API(::cudaSetDevice, "Failed to set device", __original_device_);
}
}
};

_LIBCUDACXX_END_NAMESPACE_CUDA

#endif //_CUDA__STD__CUDA_ENSURE_CURRENT_DEVICE_H
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,14 @@
#include <cuda/std/type_traits>

using resource = cuda::mr::cuda_memory_resource;
static_assert(cuda::std::is_trivial<resource>::value, "");
static_assert(cuda::std::is_trivially_default_constructible<resource>::value, "");
static_assert(!cuda::std::is_trivial<resource>::value, "");
static_assert(!cuda::std::is_trivially_default_constructible<resource>::value, "");
static_assert(cuda::std::is_trivially_copy_constructible<resource>::value, "");
static_assert(cuda::std::is_trivially_move_constructible<resource>::value, "");
static_assert(cuda::std::is_trivially_copy_assignable<resource>::value, "");
static_assert(cuda::std::is_trivially_move_assignable<resource>::value, "");
static_assert(cuda::std::is_trivially_destructible<resource>::value, "");
static_assert(cuda::std::is_empty<resource>::value, "");
static_assert(!cuda::std::is_empty<resource>::value, "");

int main(int, char**)
{
Expand Down

0 comments on commit 39b926a

Please sign in to comment.