Merge branch 'branch-24.04' into test-cuda-12.2

rapidsai · Feb 9, 2024 · bdb9c06 · bdb9c06
2 parents f4da95c + 49f7058
commit bdb9c06
Show file tree

Hide file tree

Showing 19 changed files with 147 additions and 100 deletions.
diff --git a/ci/test_cpp.sh b/ci/test_cpp.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
 set -euo pipefail
 
@@ -40,7 +40,7 @@ rapids-logger "Run gtests"
 
 cd $CONDA_PREFIX/bin/gtests/librmm/
 export GTEST_OUTPUT=xml:${RAPIDS_TESTS_DIR}/
-ctest -j20 --output-on-failure
+ctest -j20 --output-on-failure --no-tests=error
 
 rapids-logger "Test script exiting with value: $EXITCODE"
 exit ${EXITCODE}
diff --git a/include/rmm/device_buffer.hpp b/include/rmm/device_buffer.hpp
@@ -18,8 +18,8 @@
 #include <rmm/cuda_device.hpp>
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/detail/error.hpp>
-#include <rmm/mr/device/device_memory_resource.hpp>
 #include <rmm/mr/device/per_device_resource.hpp>
+#include <rmm/resource_ref.hpp>
 
 #include <cuda_runtime_api.h>
 
@@ -40,7 +40,7 @@ namespace rmm {
  * @brief RAII construct for device memory allocation
  *
  * This class allocates untyped and *uninitialized* device memory using a
- * `device_memory_resource`. If not explicitly specified, the memory resource
+ * `device_async_resource_ref`. If not explicitly specified, the memory resource
  * returned from `get_current_device_resource()` is used.
  *
  * @note Unlike `std::vector` or `thrust::device_vector`, the device memory
@@ -82,8 +82,6 @@ namespace rmm {
  *```
  */
 class device_buffer {
-  using async_resource_ref = cuda::mr::async_resource_ref<cuda::mr::device_accessible>;
-
  public:
   // The copy constructor and copy assignment operator without a stream are deleted because they
   // provide no way to specify an explicit stream
@@ -111,7 +109,7 @@ class device_buffer {
    */
   explicit device_buffer(std::size_t size,
                          cuda_stream_view stream,
-                         async_resource_ref mr = mr::get_current_device_resource())
+                         device_async_resource_ref mr = mr::get_current_device_resource())
     : _stream{stream}, _mr{mr}
   {
     cuda_set_device_raii dev{_device};
@@ -140,7 +138,7 @@ class device_buffer {
   device_buffer(void const* source_data,
                 std::size_t size,
                 cuda_stream_view stream,
-                async_resource_ref mr = rmm::mr::get_current_device_resource())
+                device_async_resource_ref mr = mr::get_current_device_resource())
     : _stream{stream}, _mr{mr}
   {
     cuda_set_device_raii dev{_device};
@@ -171,7 +169,7 @@ class device_buffer {
    */
   device_buffer(device_buffer const& other,
                 cuda_stream_view stream,
-                async_resource_ref mr = rmm::mr::get_current_device_resource())
+                device_async_resource_ref mr = mr::get_current_device_resource())
     : device_buffer{other.data(), other.size(), stream, mr}
   {
   }
@@ -410,17 +408,17 @@ class device_buffer {
   void set_stream(cuda_stream_view stream) noexcept { _stream = stream; }
 
   /**
-   * @briefreturn{The async_resource_ref used to allocate and deallocate}
+   * @briefreturn{The resource used to allocate and deallocate}
    */
-  [[nodiscard]] async_resource_ref memory_resource() const noexcept { return _mr; }
+  [[nodiscard]] rmm::device_async_resource_ref memory_resource() const noexcept { return _mr; }
 
  private:
   void* _data{nullptr};        ///< Pointer to device memory allocation
   std::size_t _size{};         ///< Requested size of the device memory allocation
   std::size_t _capacity{};     ///< The actual size of the device memory allocation
   cuda_stream_view _stream{};  ///< Stream to use for device memory deallocation
 
-  async_resource_ref _mr{
+  rmm::device_async_resource_ref _mr{
     rmm::mr::get_current_device_resource()};  ///< The memory resource used to
                                               ///< allocate/deallocate device memory
   cuda_device_id _device{get_current_cuda_device()};

diff --git a/include/rmm/device_scalar.hpp b/include/rmm/device_scalar.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,8 +18,8 @@
 
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_uvector.hpp>
-#include <rmm/mr/device/device_memory_resource.hpp>
 #include <rmm/mr/device/per_device_resource.hpp>
+#include <rmm/resource_ref.hpp>
 
 #include <type_traits>
 
@@ -92,9 +92,8 @@ class device_scalar {
    * @param stream Stream on which to perform asynchronous allocation.
    * @param mr Optional, resource with which to allocate.
    */
-  explicit device_scalar(
-    cuda_stream_view stream,
-    rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+  explicit device_scalar(cuda_stream_view stream,
+                         device_async_resource_ref mr = mr::get_current_device_resource())
     : _storage{1, stream, mr}
   {
   }
@@ -115,10 +114,9 @@ class device_scalar {
    * @param stream Optional, stream on which to perform allocation and copy.
    * @param mr Optional, resource with which to allocate.
    */
-  explicit device_scalar(
-    value_type const& initial_value,
-    cuda_stream_view stream,
-    rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+  explicit device_scalar(value_type const& initial_value,
+                         cuda_stream_view stream,
+                         device_async_resource_ref mr = mr::get_current_device_resource())
     : _storage{1, stream, mr}
   {
     set_value_async(initial_value, stream);
@@ -138,7 +136,7 @@ class device_scalar {
    */
   device_scalar(device_scalar const& other,
                 cuda_stream_view stream,
-                rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+                device_async_resource_ref mr = mr::get_current_device_resource())
     : _storage{other._storage, stream, mr}
   {
   }

diff --git a/include/rmm/device_uvector.hpp b/include/rmm/device_uvector.hpp
@@ -20,8 +20,8 @@
 #include <rmm/detail/error.hpp>
 #include <rmm/detail/exec_check_disable.hpp>
 #include <rmm/device_buffer.hpp>
-#include <rmm/mr/device/device_memory_resource.hpp>
 #include <rmm/mr/device/per_device_resource.hpp>
+#include <rmm/resource_ref.hpp>
 
 #include <cstddef>
 #include <vector>
@@ -74,7 +74,6 @@ namespace rmm {
  */
 template <typename T>
 class device_uvector {
-  using async_resource_ref = cuda::mr::async_resource_ref<cuda::mr::device_accessible>;
   static_assert(std::is_trivially_copyable<T>::value,
                 "device_uvector only supports types that are trivially copyable.");
 
@@ -126,7 +125,7 @@ class device_uvector {
    */
   explicit device_uvector(std::size_t size,
                           cuda_stream_view stream,
-                          async_resource_ref mr = rmm::mr::get_current_device_resource())
+                          device_async_resource_ref mr = mr::get_current_device_resource())
     : _storage{elements_to_bytes(size), stream, mr}
   {
   }
@@ -142,7 +141,7 @@ class device_uvector {
    */
   explicit device_uvector(device_uvector const& other,
                           cuda_stream_view stream,
-                          async_resource_ref mr = rmm::mr::get_current_device_resource())
+                          device_async_resource_ref mr = mr::get_current_device_resource())
     : _storage{other._storage, stream, mr}
   {
   }
@@ -525,9 +524,10 @@ class device_uvector {
   [[nodiscard]] bool is_empty() const noexcept { return size() == 0; }
 
   /**
-   * @briefreturn{The async_resource_ref used to allocate and deallocate the device storage}
+   * @briefreturn{The resource used to allocate and deallocate the device
+   * storage}
    */
-  [[nodiscard]] async_resource_ref memory_resource() const noexcept
+  [[nodiscard]] rmm::device_async_resource_ref memory_resource() const noexcept
   {
     return _storage.memory_resource();
   }

diff --git a/include/rmm/exec_policy.hpp b/include/rmm/exec_policy.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -23,6 +23,7 @@
 
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/mr/device/thrust_allocator_adaptor.hpp>
+#include <rmm/resource_ref.hpp>
 
 #include <rmm/detail/thrust_namespace.h>
 #include <thrust/system/cuda/execution_policy.h>
@@ -39,7 +40,7 @@ namespace rmm {
  * @brief Synchronous execution policy for allocations using thrust
  */
 using thrust_exec_policy_t =
-  thrust::detail::execute_with_allocator<rmm::mr::thrust_allocator<char>,
+  thrust::detail::execute_with_allocator<mr::thrust_allocator<char>,
                                          thrust::cuda_cub::execute_on_stream_base>;
 
 /**
@@ -54,10 +55,10 @@ class exec_policy : public thrust_exec_policy_t {
    * @param stream The stream on which to allocate temporary memory
    * @param mr The resource to use for allocating temporary memory
    */
-  explicit exec_policy(cuda_stream_view stream             = cuda_stream_default,
-                       rmm::mr::device_memory_resource* mr = mr::get_current_device_resource())
+  explicit exec_policy(cuda_stream_view stream      = cuda_stream_default,
+                       device_async_resource_ref mr = mr::get_current_device_resource())
     : thrust_exec_policy_t(
-        thrust::cuda::par(rmm::mr::thrust_allocator<char>(stream, mr)).on(stream.value()))
+        thrust::cuda::par(mr::thrust_allocator<char>(stream, mr)).on(stream.value()))
   {
   }
 };
@@ -68,7 +69,7 @@ class exec_policy : public thrust_exec_policy_t {
  * @brief Asynchronous execution policy for allocations using thrust
  */
 using thrust_exec_policy_nosync_t =
-  thrust::detail::execute_with_allocator<rmm::mr::thrust_allocator<char>,
+  thrust::detail::execute_with_allocator<mr::thrust_allocator<char>,
                                          thrust::cuda_cub::execute_on_stream_nosync_base>;
 /**
  * @brief Helper class usable as a Thrust CUDA execution policy
@@ -78,11 +79,10 @@ using thrust_exec_policy_nosync_t =
  */
 class exec_policy_nosync : public thrust_exec_policy_nosync_t {
  public:
-  explicit exec_policy_nosync(
-    cuda_stream_view stream             = cuda_stream_default,
-    rmm::mr::device_memory_resource* mr = mr::get_current_device_resource())
+  explicit exec_policy_nosync(cuda_stream_view stream      = cuda_stream_default,
+                              device_async_resource_ref mr = mr::get_current_device_resource())
     : thrust_exec_policy_nosync_t(
-        thrust::cuda::par_nosync(rmm::mr::thrust_allocator<char>(stream, mr)).on(stream.value()))
+        thrust::cuda::par_nosync(mr::thrust_allocator<char>(stream, mr)).on(stream.value()))
   {
   }
 };

diff --git a/include/rmm/mr/device/binning_memory_resource.hpp b/include/rmm/mr/device/binning_memory_resource.hpp
@@ -169,8 +169,6 @@ class binning_memory_resource final : public device_memory_resource {
   /**
    * @brief Deallocate memory pointed to by \p p.
    *
-   * @throws nothing
-   *
    * @param ptr Pointer to be deallocated
    * @param bytes The size in bytes of the allocation. This must be equal to the
    * value of `bytes` that was passed to the `allocate` call that returned `p`.

diff --git a/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp b/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp
@@ -226,8 +226,6 @@ class stream_ordered_memory_resource : public crtp<PoolResource>, public device_
   /**
    * @brief Deallocate memory pointed to by `p`.
    *
-   * @throws nothing
-   *
    * @param p Pointer to be deallocated
    * @param size The size in bytes of the allocation to deallocate
    * @param stream The stream in which to order this deallocation

diff --git a/include/rmm/mr/device/device_memory_resource.hpp b/include/rmm/mr/device/device_memory_resource.hpp
@@ -297,9 +297,16 @@ class device_memory_resource {
    * @brief Query whether the resource supports use of non-null CUDA streams for
    * allocation/deallocation.
    *
+   * @deprecated Functionality removed in favor of cuda::mr::async_memory_resource.
+   *
    * @returns bool true if the resource supports non-null CUDA streams.
    */
-  [[nodiscard]] virtual bool supports_streams() const noexcept { return false; }
+  [[deprecated("Functionality removed in favor of cuda::mr::async_memory_resource.")]]  //
+  [[nodiscard]] virtual bool
+  supports_streams() const noexcept
+  {
+    return false;
+  }
 
   /**
    * @brief Query whether the resource supports the get_mem_info API.

diff --git a/include/rmm/mr/device/thrust_allocator_adaptor.hpp b/include/rmm/mr/device/thrust_allocator_adaptor.hpp
@@ -16,8 +16,8 @@
 
 #pragma once
 
-#include <rmm/mr/device/device_memory_resource.hpp>
 #include <rmm/mr/device/per_device_resource.hpp>
+#include <rmm/resource_ref.hpp>
 
 #include <rmm/detail/thrust_namespace.h>
 #include <thrust/device_malloc_allocator.h>
@@ -34,18 +34,16 @@ namespace rmm::mr {
  */
 /**
  * @brief An `allocator` compatible with Thrust containers and algorithms using
- * a `device_memory_resource` for memory (de)allocation.
+ * a `device_async_resource_ref` for memory (de)allocation.
  *
- * Unlike a `device_memory_resource`, `thrust_allocator` is typed and bound to
+ * Unlike a `device_async_resource_ref`, `thrust_allocator` is typed and bound to
  * allocate objects of a specific type `T`, but can be freely rebound to other
  * types.
  *
  * @tparam T The type of the objects that will be allocated by this allocator
  */
 template <typename T>
 class thrust_allocator : public thrust::device_malloc_allocator<T> {
-  using async_resource_ref = cuda::mr::async_resource_ref<cuda::mr::device_accessible>;
-
  public:
   using Base      = thrust::device_malloc_allocator<T>;  ///< The base type of this allocator
   using pointer   = typename Base::pointer;              ///< The pointer type
@@ -83,7 +81,10 @@ class thrust_allocator : public thrust::device_malloc_allocator<T> {
    * @param mr The resource to be used for device memory allocation
    * @param stream The stream to be used for device memory (de)allocation
    */
-  thrust_allocator(cuda_stream_view stream, async_resource_ref mr) : _stream{stream}, _mr(mr) {}
+  thrust_allocator(cuda_stream_view stream, rmm::device_async_resource_ref mr)
+    : _stream{stream}, _mr(mr)
+  {
+  }
 
   /**
    * @brief Copy constructor. Copies the resource pointer and stream.
@@ -121,9 +122,9 @@ class thrust_allocator : public thrust::device_malloc_allocator<T> {
   }
 
   /**
-   * @briefreturn{The async_resource_ref used to allocate and deallocate}
+   * @briefreturn{The resource used to allocate and deallocate}
    */
-  [[nodiscard]] async_resource_ref memory_resource() const noexcept { return _mr; }
+  [[nodiscard]] rmm::device_async_resource_ref memory_resource() const noexcept { return _mr; }
 
   /**
    * @briefreturn{The stream used by this allocator}
@@ -139,7 +140,7 @@ class thrust_allocator : public thrust::device_malloc_allocator<T> {
 
  private:
   cuda_stream_view _stream{};
-  async_resource_ref _mr{rmm::mr::get_current_device_resource()};
+  rmm::device_async_resource_ref _mr{rmm::mr::get_current_device_resource()};
 };
 /** @} */  // end of group
 }  // namespace rmm::mr