From c6c7c889496fbf0d09e6b375e829d80d7a687161 Mon Sep 17 00:00:00 2001 From: Michael Schellenberger Costa Date: Wed, 6 Nov 2024 11:38:34 +0100 Subject: [PATCH] Improve `uninitialized_{async_}buffer` API This implements some improvements to the buffers. * We want to be able to convert them if the properties match. * Add some improvements regarding symbol visibility * Add a way to grow a vector * Ensure we can simply swap them --- .../uninitialized_async_buffer.cuh | 101 ++++++++++----- .../__container/uninitialized_buffer.cuh | 117 ++++++++++++------ .../containers/uninitialized_async_buffer.cu | 39 ++++++ cudax/test/containers/uninitialized_buffer.cu | 34 +++++ docs/repo.toml | 1 + libcudacxx/include/cuda/std/__utility/swap.h | 2 + 6 files changed, 220 insertions(+), 74 deletions(-) diff --git a/cudax/include/cuda/experimental/__container/uninitialized_async_buffer.cuh b/cudax/include/cuda/experimental/__container/uninitialized_async_buffer.cuh index d9679c41575..5bad25bd46e 100644 --- a/cudax/include/cuda/experimental/__container/uninitialized_async_buffer.cuh +++ b/cudax/include/cuda/experimental/__container/uninitialized_async_buffer.cuh @@ -1,6 +1,7 @@ //===----------------------------------------------------------------------===// // -// Part of the CUDA Toolkit, under the Apache License v2.0 with LLVM Exceptions. +// Part of CUDA Experimental in CUDA C++ Core Libraries, +// under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. @@ -24,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -76,20 +78,32 @@ private: "execution space property!"); using __async_resource = ::cuda::experimental::mr::any_async_resource<_Properties...>; + __async_resource __mr_; ::cuda::stream_ref __stream_ = {}; size_t __count_ = 0; void* __buf_ = nullptr; + template + friend class uninitialized_async_buffer; + + //! @brief Helper to check whether a different buffer still statisfies all properties of this one + template + static constexpr bool __properties_match = + !_CCCL_TRAIT(_CUDA_VSTD::is_same, + _CUDA_VSTD::__make_type_set<_Properties...>, + _CUDA_VSTD::__make_type_set<_OtherProperties...>) + && _CUDA_VSTD::__type_set_contains_v<_CUDA_VSTD::__make_type_set<_OtherProperties...>, _Properties...>; + //! @brief Determines the allocation size given the alignment and size of `T` - _CCCL_NODISCARD static constexpr size_t __get_allocation_size(const size_t __count) noexcept + _CCCL_NODISCARD _CCCL_HIDE_FROM_ABI static constexpr size_t __get_allocation_size(const size_t __count) noexcept { constexpr size_t __alignment = alignof(_Tp); return (__count * sizeof(_Tp) + (__alignment - 1)) & ~(__alignment - 1); } //! @brief Determines the properly aligned start of the buffer given the alignment and size of `T` - _CCCL_NODISCARD constexpr _Tp* __get_data() const noexcept + _CCCL_NODISCARD _CCCL_HIDE_FROM_ABI constexpr _Tp* __get_data() const noexcept { constexpr size_t __alignment = alignof(_Tp); size_t __space = __get_allocation_size(__count_); @@ -101,7 +115,8 @@ private: //! @brief Causes the buffer to be treated as a span when passed to cudax::launch. //! @pre The buffer must have the cuda::mr::device_accessible property. template - _CCCL_NODISCARD_FRIEND auto __cudax_launch_transform(::cuda::stream_ref, uninitialized_async_buffer& __self) noexcept + _CCCL_NODISCARD_FRIEND _CCCL_HIDE_FROM_ABI auto + __cudax_launch_transform(::cuda::stream_ref, uninitialized_async_buffer& __self) noexcept _LIBCUDACXX_TRAILING_REQUIRES(_CUDA_VSTD::span<_Tp>)( _CUDA_VSTD::same_as<_Tp, _Tp2>&& _CUDA_VSTD::__is_included_in_v<_CUDA_VMR::device_accessible, _Properties...>) { @@ -112,7 +127,7 @@ private: //! @brief Causes the buffer to be treated as a span when passed to cudax::launch //! @pre The buffer must have the cuda::mr::device_accessible property. template - _CCCL_NODISCARD_FRIEND auto + _CCCL_NODISCARD_FRIEND _CCCL_HIDE_FROM_ABI auto __cudax_launch_transform(::cuda::stream_ref, const uninitialized_async_buffer& __self) noexcept _LIBCUDACXX_TRAILING_REQUIRES(_CUDA_VSTD::span)( _CUDA_VSTD::same_as<_Tp, _Tp2>&& _CUDA_VSTD::__is_included_in_v<_CUDA_VMR::device_accessible, _Properties...>) @@ -127,13 +142,14 @@ public: using pointer = _Tp*; using size_type = size_t; - //! @brief Constructs an \c uninitialized_async_buffer, allocating sufficient storage for \p __count elements using + //! @brief Constructs an \c uninitialized_async_buffer, allocating sufficient storage for \p __count elements through //! \p __mr //! @param __mr The async memory resource to allocate the buffer with. //! @param __stream The CUDA stream used for stream-ordered allocation. //! @param __count The desired size of the buffer. //! @note Depending on the alignment requirements of `T` the size of the underlying allocation might be larger //! than `count * sizeof(T)`. Only allocates memory when \p __count > 0 + _CCCL_HIDE_FROM_ABI uninitialized_async_buffer(__async_resource __mr, const ::cuda::stream_ref __stream, const size_t __count) : __mr_(_CUDA_VSTD::move(__mr)) , __stream_(__stream) @@ -141,21 +157,35 @@ public: , __buf_(__count_ == 0 ? nullptr : __mr_.allocate_async(__get_allocation_size(__count_), __stream_)) {} - uninitialized_async_buffer(const uninitialized_async_buffer&) = delete; - uninitialized_async_buffer& operator=(const uninitialized_async_buffer&) = delete; + _CCCL_HIDE_FROM_ABI uninitialized_async_buffer(const uninitialized_async_buffer&) = delete; + _CCCL_HIDE_FROM_ABI uninitialized_async_buffer& operator=(const uninitialized_async_buffer&) = delete; - //! @brief Move construction + //! @brief Move-constructs a \c uninitialized_async_buffer from \p __other //! @param __other Another \c uninitialized_async_buffer - uninitialized_async_buffer(uninitialized_async_buffer&& __other) noexcept + //! Takes ownership of the allocation in \p __other and resets it + _CCCL_HIDE_FROM_ABI uninitialized_async_buffer(uninitialized_async_buffer&& __other) noexcept + : __mr_(_CUDA_VSTD::move(__other.__mr_)) + , __stream_(_CUDA_VSTD::exchange(__other.__stream_, {})) + , __count_(_CUDA_VSTD::exchange(__other.__count_, 0)) + , __buf_(_CUDA_VSTD::exchange(__other.__buf_, nullptr)) + {} + + //! @brief Move-constructs a \c uninitialized_async_buffer from \p __other + //! @param __other Another \c uninitialized_async_buffer with matching properties + //! Takes ownership of the allocation in \p __other and resets it + _LIBCUDACXX_TEMPLATE(class... _OtherProperties) + _LIBCUDACXX_REQUIRES(__properties_match<_OtherProperties...>) + _CCCL_HIDE_FROM_ABI uninitialized_async_buffer(uninitialized_async_buffer<_Tp, _OtherProperties...>&& __other) noexcept : __mr_(_CUDA_VSTD::move(__other.__mr_)) , __stream_(_CUDA_VSTD::exchange(__other.__stream_, {})) , __count_(_CUDA_VSTD::exchange(__other.__count_, 0)) , __buf_(_CUDA_VSTD::exchange(__other.__buf_, nullptr)) {} - //! @brief Move assignment + //! @brief Move-assings a \c uninitialized_async_buffer from \p __other //! @param __other Another \c uninitialized_async_buffer - uninitialized_async_buffer& operator=(uninitialized_async_buffer&& __other) noexcept + //! Deallocates the current allocation and then takes ownership of the allocation in \p __other and resets it + _CCCL_HIDE_FROM_ABI uninitialized_async_buffer& operator=(uninitialized_async_buffer&& __other) noexcept { if (this == _CUDA_VSTD::addressof(__other)) { @@ -172,11 +202,12 @@ public: __buf_ = _CUDA_VSTD::exchange(__other.__buf_, nullptr); return *this; } + //! @brief Destroys an \c uninitialized_async_buffer and deallocates the buffer in stream order on the stream that was //! used to create the buffer. //! @warning The destructor does not destroy any objects that may or may not reside within the buffer. It is the //! user's responsibility to ensure that all objects within the buffer have been properly destroyed. - ~uninitialized_async_buffer() + _CCCL_HIDE_FROM_ABI ~uninitialized_async_buffer() { if (__buf_) { @@ -184,33 +215,33 @@ public: } } - //! @brief Returns an aligned pointer to the buffer - _CCCL_NODISCARD constexpr pointer begin() const noexcept + //! @brief Returns an aligned pointer to the first element in the buffer + _CCCL_NODISCARD _CCCL_HIDE_FROM_ABI constexpr pointer begin() const noexcept { return __get_data(); } //! @brief Returns an aligned pointer to the element following the last element of the buffer. //! This element acts as a placeholder; attempting to access it results in undefined behavior. - _CCCL_NODISCARD constexpr pointer end() const noexcept + _CCCL_NODISCARD _CCCL_HIDE_FROM_ABI constexpr pointer end() const noexcept { return __get_data() + __count_; } - //! @brief Returns an aligned pointer to the buffer - _CCCL_NODISCARD constexpr pointer data() const noexcept + //! @brief Returns an aligned pointer to the first element in the buffer + _CCCL_NODISCARD _CCCL_HIDE_FROM_ABI constexpr pointer data() const noexcept { return __get_data(); } //! @brief Returns the size of the buffer - _CCCL_NODISCARD constexpr size_type size() const noexcept + _CCCL_NODISCARD _CCCL_HIDE_FROM_ABI constexpr size_type size() const noexcept { return __count_; } //! @brief Returns the size of the buffer in bytes - _CCCL_NODISCARD constexpr size_type size_bytes() const noexcept + _CCCL_NODISCARD _CCCL_HIDE_FROM_ABI constexpr size_type size_bytes() const noexcept { return __count_ * sizeof(_Tp); } @@ -219,13 +250,13 @@ public: //! Returns a \c const reference to the :ref:`any_async_resource ` //! that holds the memory resource used to allocate the buffer //! @endrst - _CCCL_NODISCARD const __async_resource& get_resource() const noexcept + _CCCL_NODISCARD _CCCL_HIDE_FROM_ABI const __async_resource& get_resource() const noexcept { return __mr_; } //! @brief Returns the stored stream - _CCCL_NODISCARD constexpr ::cuda::stream_ref get_stream() const noexcept + _CCCL_NODISCARD _CCCL_HIDE_FROM_ABI constexpr ::cuda::stream_ref get_stream() const noexcept { return __stream_; } @@ -233,7 +264,7 @@ public: //! @brief Replaces the stored stream //! @param __new_stream the new stream //! @note Always synchronizes with the old stream - constexpr void change_stream(::cuda::stream_ref __new_stream) + _CCCL_HIDE_FROM_ABI constexpr void change_stream(::cuda::stream_ref __new_stream) { if (__new_stream != __stream_) { @@ -242,22 +273,26 @@ public: __stream_ = __new_stream; } - //! @brief Swaps the contents with those of another \c uninitialized_async_buffer - //! @param __other The other \c uninitialized_async_buffer. - constexpr void swap(uninitialized_async_buffer& __other) noexcept - { - _CUDA_VSTD::swap(__mr_, __other.__mr_); - _CUDA_VSTD::swap(__count_, __other.__count_); - _CUDA_VSTD::swap(__buf_, __other.__buf_); - } - # ifndef DOXYGEN_SHOULD_SKIP_THIS // friend functions are currently broken //! @brief Forwards the passed properties _LIBCUDACXX_TEMPLATE(class _Property) _LIBCUDACXX_REQUIRES( (!property_with_value<_Property>) _LIBCUDACXX_AND _CUDA_VSTD::__is_included_in_v<_Property, _Properties...>) - friend constexpr void get_property(const uninitialized_async_buffer&, _Property) noexcept {} + _CCCL_HIDE_FROM_ABI friend constexpr void get_property(const uninitialized_async_buffer&, _Property) noexcept {} # endif // DOXYGEN_SHOULD_SKIP_THIS + + //! @brief Internal method to grow the allocation to a new size \p __count. + //! @param __count The new size of the allocation. + //! @return An \c uninitialized_async_buffer that holds the previous allocation + //! @warning This buffer must outlive the returned buffer + _CCCL_HIDE_FROM_ABI uninitialized_async_buffer __replace_allocation(const size_t __count) + { + // Create a new buffer with a reference to the stored memory resource and swap allocation information + uninitialized_async_buffer __ret{_CUDA_VMR::async_resource_ref<_Properties...>{__mr_}, __stream_, __count}; + _CUDA_VSTD::swap(__count_, __ret.__count_); + _CUDA_VSTD::swap(__buf_, __ret.__buf_); + return __ret; + } }; template diff --git a/cudax/include/cuda/experimental/__container/uninitialized_buffer.cuh b/cudax/include/cuda/experimental/__container/uninitialized_buffer.cuh index c57e4bc5eac..0388074514e 100644 --- a/cudax/include/cuda/experimental/__container/uninitialized_buffer.cuh +++ b/cudax/include/cuda/experimental/__container/uninitialized_buffer.cuh @@ -1,6 +1,7 @@ //===----------------------------------------------------------------------===// // -// Part of the CUDA Toolkit, under the Apache License v2.0 with LLVM Exceptions. +// Part of CUDA Experimental in CUDA C++ Core Libraries, +// under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. @@ -24,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -42,14 +44,14 @@ namespace cuda::experimental //! @rst //! .. _cudax-containers-uninitialized-buffer: //! -//! Uninitialized type safe memory storage +//! Uninitialized type-safe memory storage //! --------------------------------------- //! //! ``uninitialized_buffer`` provides a typed buffer allocated from a given :ref:`memory resource //! `. It handles alignment and release of the allocation. //! The memory is uninitialized, so that a user needs to ensure elements are properly constructed. //! -//! In addition to being type safe, ``uninitialized_buffer`` also takes a set of :ref:`properties +//! In addition to being type-safe, ``uninitialized_buffer`` also takes a set of :ref:`properties //! ` to ensure that e.g. execution space constraints are checked //! at compile time. However, we can only forward stateless properties. If a user wants to use a stateful one, then they //! need to implement :ref:`get_property(const device_buffer&, Property) @@ -63,23 +65,35 @@ class uninitialized_buffer { private: static_assert(_CUDA_VMR::__contains_execution_space_property<_Properties...>, - "The properties of cuda::experimental::mr::uninitialized_buffer must contain at least one execution " - "space property!"); + "The properties of cuda::experimental::uninitialized_buffer must contain at least one execution space " + "property!"); using __resource = ::cuda::experimental::mr::any_resource<_Properties...>; + __resource __mr_; size_t __count_ = 0; void* __buf_ = nullptr; + template + friend class uninitialized_buffer; + + //! @brief Helper to check whether a different buffer still statisfies all properties of this one + template + static constexpr bool __properties_match = + !_CCCL_TRAIT(_CUDA_VSTD::is_same, + _CUDA_VSTD::__make_type_set<_Properties...>, + _CUDA_VSTD::__make_type_set<_OtherProperties...>) + && _CUDA_VSTD::__type_set_contains_v<_CUDA_VSTD::__make_type_set<_OtherProperties...>, _Properties...>; + //! @brief Determines the allocation size given the alignment and size of `T` - _CCCL_NODISCARD _CCCL_HOST_DEVICE static constexpr size_t __get_allocation_size(const size_t __count) noexcept + _CCCL_NODISCARD _CCCL_HIDE_FROM_ABI static constexpr size_t __get_allocation_size(const size_t __count) noexcept { constexpr size_t __alignment = alignof(_Tp); return (__count * sizeof(_Tp) + (__alignment - 1)) & ~(__alignment - 1); } //! @brief Determines the properly aligned start of the buffer given the alignment and size of `T` - _CCCL_NODISCARD _CCCL_HOST_DEVICE _Tp* __get_data() const noexcept + _CCCL_NODISCARD _CCCL_HIDE_FROM_ABI _Tp* __get_data() const noexcept { constexpr size_t __alignment = alignof(_Tp); size_t __space = __get_allocation_size(__count_); @@ -91,7 +105,8 @@ private: //! @brief Causes the buffer to be treated as a span when passed to cudax::launch. //! @pre The buffer must have the cuda::mr::device_accessible property. template - _CCCL_NODISCARD_FRIEND auto __cudax_launch_transform(::cuda::stream_ref, uninitialized_buffer& __self) noexcept + _CCCL_NODISCARD_FRIEND _CCCL_HIDE_FROM_ABI auto + __cudax_launch_transform(::cuda::stream_ref, uninitialized_buffer& __self) noexcept _LIBCUDACXX_TRAILING_REQUIRES(_CUDA_VSTD::span<_Tp>)( _CUDA_VSTD::same_as<_Tp, _Tp2>&& _CUDA_VSTD::__is_included_in_v<_CUDA_VMR::device_accessible, _Properties...>) { @@ -101,7 +116,8 @@ private: //! @brief Causes the buffer to be treated as a span when passed to cudax::launch //! @pre The buffer must have the cuda::mr::device_accessible property. template - _CCCL_NODISCARD_FRIEND auto __cudax_launch_transform(::cuda::stream_ref, const uninitialized_buffer& __self) noexcept + _CCCL_NODISCARD_FRIEND _CCCL_HIDE_FROM_ABI auto + __cudax_launch_transform(::cuda::stream_ref, const uninitialized_buffer& __self) noexcept _LIBCUDACXX_TRAILING_REQUIRES(_CUDA_VSTD::span)( _CUDA_VSTD::same_as<_Tp, _Tp2>&& _CUDA_VSTD::__is_included_in_v<_CUDA_VMR::device_accessible, _Properties...>) { @@ -114,32 +130,46 @@ public: using pointer = _Tp*; using size_type = size_t; - //! @brief Constructs a \c uninitialized_buffer, allocating sufficient storage for \p __count elements through \p __mr + //! @brief Constructs an \c uninitialized_buffer and allocates sufficient storage for \p __count elements through + //! \p __mr //! @param __mr The memory resource to allocate the buffer with. //! @param __count The desired size of the buffer. //! @note Depending on the alignment requirements of `T` the size of the underlying allocation might be larger //! than `count * sizeof(T)`. //! @note Only allocates memory when \p __count > 0 - uninitialized_buffer(__resource __mr, const size_t __count) + _CCCL_HIDE_FROM_ABI uninitialized_buffer(__resource __mr, const size_t __count) : __mr_(_CUDA_VSTD::move(__mr)) , __count_(__count) , __buf_(__count_ == 0 ? nullptr : __mr_.allocate(__get_allocation_size(__count_))) {} - uninitialized_buffer(const uninitialized_buffer&) = delete; - uninitialized_buffer& operator=(const uninitialized_buffer&) = delete; + _CCCL_HIDE_FROM_ABI uninitialized_buffer(const uninitialized_buffer&) = delete; + _CCCL_HIDE_FROM_ABI uninitialized_buffer& operator=(const uninitialized_buffer&) = delete; + + //! @brief Move-constructs a \c uninitialized_buffer from \p __other + //! @param __other Another \c uninitialized_buffer + //! Takes ownership of the allocation in \p __other and resets it + _CCCL_HIDE_FROM_ABI uninitialized_buffer(uninitialized_buffer&& __other) noexcept + : __mr_(_CUDA_VSTD::move(__other.__mr_)) + , __count_(_CUDA_VSTD::exchange(__other.__count_, 0)) + , __buf_(_CUDA_VSTD::exchange(__other.__buf_, nullptr)) + {} - //! @brief Move construction + //! @brief Move-constructs a \c uninitialized_buffer from another \c uninitialized_buffer with matching properties //! @param __other Another \c uninitialized_buffer - uninitialized_buffer(uninitialized_buffer&& __other) noexcept + //! Takes ownership of the allocation in \p __other and resets it + _LIBCUDACXX_TEMPLATE(class... _OtherProperties) + _LIBCUDACXX_REQUIRES(__properties_match<_OtherProperties...>) + _CCCL_HIDE_FROM_ABI uninitialized_buffer(uninitialized_buffer<_Tp, _OtherProperties...>&& __other) noexcept : __mr_(_CUDA_VSTD::move(__other.__mr_)) , __count_(_CUDA_VSTD::exchange(__other.__count_, 0)) , __buf_(_CUDA_VSTD::exchange(__other.__buf_, nullptr)) {} - //! @brief Move assignment + //! @brief Move-assings a \c uninitialized_buffer from \p __other //! @param __other Another \c uninitialized_buffer - uninitialized_buffer& operator=(uninitialized_buffer&& __other) noexcept + //! Deallocates the current allocation and then takes ownership of the allocation in \p __other and resets it + _CCCL_HIDE_FROM_ABI uninitialized_buffer& operator=(uninitialized_buffer&& __other) noexcept { if (this == _CUDA_VSTD::addressof(__other)) { @@ -150,16 +180,17 @@ public: { __mr_.deallocate(__buf_, __get_allocation_size(__count_)); } + __mr_ = _CUDA_VSTD::move(__other.__mr_); __count_ = _CUDA_VSTD::exchange(__other.__count_, 0); __buf_ = _CUDA_VSTD::exchange(__other.__buf_, nullptr); return *this; } - //! @brief Destroys an \c uninitialized_buffer deallocating the buffer + //! @brief Destroys an \c uninitialized_buffer and deallocates the buffer //! @warning The destructor does not destroy any objects that may or may not reside within the buffer. It is the //! user's responsibility to ensure that all objects within the buffer have been properly destroyed. - ~uninitialized_buffer() + _CCCL_HIDE_FROM_ABI ~uninitialized_buffer() { if (__buf_) { @@ -167,32 +198,33 @@ public: } } - //! @brief Returns an aligned pointer to the buffer - _CCCL_NODISCARD _CCCL_HOST_DEVICE pointer begin() const noexcept + //! @brief Returns an aligned pointer to the first element in the buffer + _CCCL_NODISCARD _CCCL_HIDE_FROM_ABI pointer begin() const noexcept { return __get_data(); } - //! @brief Returns an aligned pointer to end of the buffer - _CCCL_NODISCARD _CCCL_HOST_DEVICE pointer end() const noexcept + //! @brief Returns an aligned pointer to the element following the last element of the buffer. + //! This element acts as a placeholder; attempting to access it results in undefined behavior. + _CCCL_NODISCARD _CCCL_HIDE_FROM_ABI pointer end() const noexcept { return __get_data() + __count_; } - //! @brief Returns an aligned pointer to the buffer - _CCCL_NODISCARD _CCCL_HOST_DEVICE pointer data() const noexcept + //! @brief Returns an aligned pointer to the first element in the buffer + _CCCL_NODISCARD _CCCL_HIDE_FROM_ABI pointer data() const noexcept { return __get_data(); } - //! @brief Returns the size of the buffer - _CCCL_NODISCARD _CCCL_HOST_DEVICE constexpr size_type size() const noexcept + //! @brief Returns the size of the allocation + _CCCL_NODISCARD _CCCL_HIDE_FROM_ABI constexpr size_type size() const noexcept { return __count_; } //! @brief Returns the size of the buffer in bytes - _CCCL_NODISCARD constexpr size_type size_bytes() const noexcept + _CCCL_NODISCARD _CCCL_HIDE_FROM_ABI constexpr size_type size_bytes() const noexcept { return __count_ * sizeof(_Tp); } @@ -201,28 +233,31 @@ public: //! Returns a \c const reference to the :ref:`any_resource ` //! that holds the memory resource used to allocate the buffer //! @endrst - _CCCL_EXEC_CHECK_DISABLE - _CCCL_NODISCARD _CCCL_HOST_DEVICE const __resource& get_resource() const noexcept + _CCCL_NODISCARD _CCCL_HIDE_FROM_ABI const __resource& get_resource() const noexcept { return __mr_; } - //! @brief Swaps the contents with those of another \c uninitialized_buffer - //! @param __other The other \c uninitialized_buffer. - _CCCL_HOST_DEVICE constexpr void swap(uninitialized_buffer& __other) noexcept - { - __mr_.swap(__other.__mr_); - _CUDA_VSTD::swap(__count_, __other.__count_); - _CUDA_VSTD::swap(__buf_, __other.__buf_); - } - -# ifndef DOXYGEN_SHOULD_SKIP_THIS // friend functions are currently brocken +# ifndef DOXYGEN_SHOULD_SKIP_THIS // friend functions are currently broken //! @brief Forwards the passed Properties _LIBCUDACXX_TEMPLATE(class _Property) _LIBCUDACXX_REQUIRES( (!property_with_value<_Property>) _LIBCUDACXX_AND _CUDA_VSTD::__is_included_in_v<_Property, _Properties...>) - friend constexpr void get_property(const uninitialized_buffer&, _Property) noexcept {} + _CCCL_HIDE_FROM_ABI friend constexpr void get_property(const uninitialized_buffer&, _Property) noexcept {} # endif // DOXYGEN_SHOULD_SKIP_THIS + + //! @brief Internal method to grow the allocation to a new size \p __count. + //! @param __count The new size of the allocation. + //! @return An \c uninitialized_buffer that holds the previous allocation + //! @warning This buffer must outlive the returned buffer + _CCCL_HIDE_FROM_ABI uninitialized_buffer __replace_allocation(const size_t __count) + { + // Create a new buffer with a reference to the stored memory resource and swap allocation information + uninitialized_buffer __ret{_CUDA_VMR::resource_ref<_Properties...>{__mr_}, __count}; + _CUDA_VSTD::swap(__count_, __ret.__count_); + _CUDA_VSTD::swap(__buf_, __ret.__buf_); + return __ret; + } }; template diff --git a/cudax/test/containers/uninitialized_async_buffer.cu b/cudax/test/containers/uninitialized_async_buffer.cu index 83a3c8515d0..87fc7fc5ece 100644 --- a/cudax/test/containers/uninitialized_async_buffer.cu +++ b/cudax/test/containers/uninitialized_async_buffer.cu @@ -42,6 +42,10 @@ constexpr int get_property( { return 42; } +constexpr int get_property(const cuda::experimental::mr::async_memory_resource&, my_property) +{ + return 42; +} TEMPLATE_TEST_CASE( "uninitialized_async_buffer", "[container]", char, short, int, long, long long, float, double, do_not_construct) @@ -79,6 +83,23 @@ TEMPLATE_TEST_CASE( } } + SECTION("conversion") + { + cuda::experimental::uninitialized_async_buffer input{ + resource, stream, 42}; + const TestType* ptr = input.data(); + + uninitialized_async_buffer from_rvalue{cuda::std::move(input)}; + CUDAX_CHECK(from_rvalue.data() == ptr); + CUDAX_CHECK(from_rvalue.size() == 42); + CUDAX_CHECK(from_rvalue.get_stream() == stream); + + // Ensure that we properly reset the input buffer + CUDAX_CHECK(input.data() == nullptr); + CUDAX_CHECK(input.size() == 0); + CUDAX_CHECK(input.get_stream() == cuda::stream_ref{}); + } + SECTION("assignment") { static_assert(!cuda::std::is_copy_assignable::value, ""); @@ -164,6 +185,24 @@ TEMPLATE_TEST_CASE( CUDAX_CHECK(res == TestType{84}); } } + + SECTION("Replace allocation of current buffer") + { + uninitialized_async_buffer buf{resource, stream, 42}; + const TestType* old_ptr = buf.data(); + const size_t old_size = buf.size(); + + { + const uninitialized_async_buffer old_buf = buf.__replace_allocation(1337); + CUDAX_CHECK(buf.data() != old_ptr); + CUDAX_CHECK(buf.size() == 1337); + + CUDAX_CHECK(old_buf.data() == old_ptr); + CUDAX_CHECK(old_buf.size() == old_size); + + CUDAX_CHECK(buf.get_stream() == old_buf.get_stream()); + } + } } // A test resource that keeps track of the number of resources are diff --git a/cudax/test/containers/uninitialized_buffer.cu b/cudax/test/containers/uninitialized_buffer.cu index e19b9c7ac68..22fe1ef473c 100644 --- a/cudax/test/containers/uninitialized_buffer.cu +++ b/cudax/test/containers/uninitialized_buffer.cu @@ -56,6 +56,10 @@ constexpr int get_property( { return 42; } +constexpr int get_property(const cuda::mr::device_memory_resource&, my_property) +{ + return 42; +} TEMPLATE_TEST_CASE( "uninitialized_buffer", "[container]", char, short, int, long, long long, float, double, do_not_construct) @@ -89,6 +93,20 @@ TEMPLATE_TEST_CASE( } } + SECTION("conversion") + { + cuda::experimental::uninitialized_buffer input{resource, 42}; + const TestType* ptr = input.data(); + + uninitialized_buffer from_rvalue{cuda::std::move(input)}; + CUDAX_CHECK(from_rvalue.data() == ptr); + CUDAX_CHECK(from_rvalue.size() == 42); + + // Ensure that we properly reset the input buffer + CUDAX_CHECK(input.data() == nullptr); + CUDAX_CHECK(input.size() == 0); + } + SECTION("assignment") { static_assert(!cuda::std::is_copy_assignable::value, ""); @@ -168,6 +186,22 @@ TEMPLATE_TEST_CASE( CUDAX_CHECK(res == TestType{84}); } } + + SECTION("Replace allocation of current buffer") + { + uninitialized_buffer buf{resource, 42}; + const TestType* old_ptr = buf.data(); + const size_t old_size = buf.size(); + + { + const uninitialized_buffer old_buf = buf.__replace_allocation(1337); + CUDAX_CHECK(buf.data() != old_ptr); + CUDAX_CHECK(buf.size() == 1337); + + CUDAX_CHECK(old_buf.data() == old_ptr); + CUDAX_CHECK(old_buf.size() == old_size); + } + } } __global__ void kernel(_CUDA_VSTD::span data) diff --git a/docs/repo.toml b/docs/repo.toml index e745a6dc14d..2bc5748922d 100644 --- a/docs/repo.toml +++ b/docs/repo.toml @@ -406,6 +406,7 @@ doxygen_predefined = [ "_CCCL_EXEC_CHECK_DISABLE=", "_CCCL_FORCEINLINE=", "_CCCL_GLOBAL_CONSTANT=constexpr", + "_CCCL_HIDE_FROM_ABI=", "_CCCL_HOST=", "_CCCL_HOST_DEVICE=", "_CCCL_IF_CONSTEXPR=if constexpr", diff --git a/libcudacxx/include/cuda/std/__utility/swap.h b/libcudacxx/include/cuda/std/__utility/swap.h index 8531d68f2a1..b2c95b96872 100644 --- a/libcudacxx/include/cuda/std/__utility/swap.h +++ b/libcudacxx/include/cuda/std/__utility/swap.h @@ -31,6 +31,7 @@ _LIBCUDACXX_BEGIN_NAMESPACE_STD +_CCCL_EXEC_CHECK_DISABLE template _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 __swap_result_t<_Tp> swap(_Tp& __x, _Tp& __y) noexcept( _CCCL_TRAIT(is_nothrow_move_constructible, _Tp) && _CCCL_TRAIT(is_nothrow_move_assignable, _Tp)) @@ -40,6 +41,7 @@ _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 __swap_result_t<_Tp> swap(_Tp& _ __y = _CUDA_VSTD::move(__t); } +_CCCL_EXEC_CHECK_DISABLE template _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 __enable_if_t<__detect_adl_swap::__has_no_adl_swap_array<_Tp, _Np>::value && __is_swappable<_Tp>::value>