Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve uninitialized_{async_}buffer API #2713

Merged
merged 1 commit into from
Nov 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
//===----------------------------------------------------------------------===//
//
// Part of the CUDA Toolkit, under the Apache License v2.0 with LLVM Exceptions.
// Part of CUDA Experimental in CUDA C++ Core Libraries,
// under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
Expand All @@ -24,6 +25,7 @@
#include <cuda/__memory_resource/resource_ref.h>
#include <cuda/std/__memory/align.h>
#include <cuda/std/__new/launder.h>
#include <cuda/std/__type_traits/type_set.h>
#include <cuda/std/__utility/exchange.h>
#include <cuda/std/__utility/move.h>
#include <cuda/std/__utility/swap.h>
Expand Down Expand Up @@ -76,20 +78,32 @@ private:
"execution space property!");

using __async_resource = ::cuda::experimental::mr::any_async_resource<_Properties...>;

__async_resource __mr_;
::cuda::stream_ref __stream_ = {};
size_t __count_ = 0;
void* __buf_ = nullptr;

template <class, class...>
friend class uninitialized_async_buffer;

//! @brief Helper to check whether a different buffer still statisfies all properties of this one
template <class... _OtherProperties>
static constexpr bool __properties_match =
!_CCCL_TRAIT(_CUDA_VSTD::is_same,
_CUDA_VSTD::__make_type_set<_Properties...>,
_CUDA_VSTD::__make_type_set<_OtherProperties...>)
&& _CUDA_VSTD::__type_set_contains_v<_CUDA_VSTD::__make_type_set<_OtherProperties...>, _Properties...>;

//! @brief Determines the allocation size given the alignment and size of `T`
_CCCL_NODISCARD static constexpr size_t __get_allocation_size(const size_t __count) noexcept
_CCCL_NODISCARD _CCCL_HIDE_FROM_ABI static constexpr size_t __get_allocation_size(const size_t __count) noexcept
{
constexpr size_t __alignment = alignof(_Tp);
return (__count * sizeof(_Tp) + (__alignment - 1)) & ~(__alignment - 1);
}

//! @brief Determines the properly aligned start of the buffer given the alignment and size of `T`
_CCCL_NODISCARD constexpr _Tp* __get_data() const noexcept
_CCCL_NODISCARD _CCCL_HIDE_FROM_ABI constexpr _Tp* __get_data() const noexcept
{
constexpr size_t __alignment = alignof(_Tp);
size_t __space = __get_allocation_size(__count_);
Expand All @@ -101,7 +115,8 @@ private:
//! @brief Causes the buffer to be treated as a span when passed to cudax::launch.
//! @pre The buffer must have the cuda::mr::device_accessible property.
template <class _Tp2 = _Tp>
_CCCL_NODISCARD_FRIEND auto __cudax_launch_transform(::cuda::stream_ref, uninitialized_async_buffer& __self) noexcept
_CCCL_NODISCARD_FRIEND _CCCL_HIDE_FROM_ABI auto
__cudax_launch_transform(::cuda::stream_ref, uninitialized_async_buffer& __self) noexcept
_LIBCUDACXX_TRAILING_REQUIRES(_CUDA_VSTD::span<_Tp>)(
_CUDA_VSTD::same_as<_Tp, _Tp2>&& _CUDA_VSTD::__is_included_in_v<_CUDA_VMR::device_accessible, _Properties...>)
{
Expand All @@ -112,7 +127,7 @@ private:
//! @brief Causes the buffer to be treated as a span when passed to cudax::launch
//! @pre The buffer must have the cuda::mr::device_accessible property.
template <class _Tp2 = _Tp>
_CCCL_NODISCARD_FRIEND auto
_CCCL_NODISCARD_FRIEND _CCCL_HIDE_FROM_ABI auto
__cudax_launch_transform(::cuda::stream_ref, const uninitialized_async_buffer& __self) noexcept
_LIBCUDACXX_TRAILING_REQUIRES(_CUDA_VSTD::span<const _Tp>)(
_CUDA_VSTD::same_as<_Tp, _Tp2>&& _CUDA_VSTD::__is_included_in_v<_CUDA_VMR::device_accessible, _Properties...>)
Expand All @@ -127,35 +142,50 @@ public:
using pointer = _Tp*;
using size_type = size_t;

//! @brief Constructs an \c uninitialized_async_buffer, allocating sufficient storage for \p __count elements using
//! @brief Constructs an \c uninitialized_async_buffer, allocating sufficient storage for \p __count elements through
//! \p __mr
//! @param __mr The async memory resource to allocate the buffer with.
//! @param __stream The CUDA stream used for stream-ordered allocation.
//! @param __count The desired size of the buffer.
//! @note Depending on the alignment requirements of `T` the size of the underlying allocation might be larger
//! than `count * sizeof(T)`. Only allocates memory when \p __count > 0
_CCCL_HIDE_FROM_ABI
uninitialized_async_buffer(__async_resource __mr, const ::cuda::stream_ref __stream, const size_t __count)
: __mr_(_CUDA_VSTD::move(__mr))
, __stream_(__stream)
, __count_(__count)
, __buf_(__count_ == 0 ? nullptr : __mr_.allocate_async(__get_allocation_size(__count_), __stream_))
{}

uninitialized_async_buffer(const uninitialized_async_buffer&) = delete;
uninitialized_async_buffer& operator=(const uninitialized_async_buffer&) = delete;
_CCCL_HIDE_FROM_ABI uninitialized_async_buffer(const uninitialized_async_buffer&) = delete;
_CCCL_HIDE_FROM_ABI uninitialized_async_buffer& operator=(const uninitialized_async_buffer&) = delete;

//! @brief Move construction
//! @brief Move-constructs a \c uninitialized_async_buffer from \p __other
//! @param __other Another \c uninitialized_async_buffer
uninitialized_async_buffer(uninitialized_async_buffer&& __other) noexcept
//! Takes ownership of the allocation in \p __other and resets it
_CCCL_HIDE_FROM_ABI uninitialized_async_buffer(uninitialized_async_buffer&& __other) noexcept
: __mr_(_CUDA_VSTD::move(__other.__mr_))
, __stream_(_CUDA_VSTD::exchange(__other.__stream_, {}))
, __count_(_CUDA_VSTD::exchange(__other.__count_, 0))
, __buf_(_CUDA_VSTD::exchange(__other.__buf_, nullptr))
{}

//! @brief Move-constructs a \c uninitialized_async_buffer from \p __other
//! @param __other Another \c uninitialized_async_buffer with matching properties
//! Takes ownership of the allocation in \p __other and resets it
_LIBCUDACXX_TEMPLATE(class... _OtherProperties)
_LIBCUDACXX_REQUIRES(__properties_match<_OtherProperties...>)
_CCCL_HIDE_FROM_ABI uninitialized_async_buffer(uninitialized_async_buffer<_Tp, _OtherProperties...>&& __other) noexcept
: __mr_(_CUDA_VSTD::move(__other.__mr_))
, __stream_(_CUDA_VSTD::exchange(__other.__stream_, {}))
, __count_(_CUDA_VSTD::exchange(__other.__count_, 0))
, __buf_(_CUDA_VSTD::exchange(__other.__buf_, nullptr))
{}

//! @brief Move assignment
//! @brief Move-assings a \c uninitialized_async_buffer from \p __other
//! @param __other Another \c uninitialized_async_buffer
uninitialized_async_buffer& operator=(uninitialized_async_buffer&& __other) noexcept
//! Deallocates the current allocation and then takes ownership of the allocation in \p __other and resets it
_CCCL_HIDE_FROM_ABI uninitialized_async_buffer& operator=(uninitialized_async_buffer&& __other) noexcept
{
if (this == _CUDA_VSTD::addressof(__other))
{
Expand All @@ -172,45 +202,46 @@ public:
__buf_ = _CUDA_VSTD::exchange(__other.__buf_, nullptr);
return *this;
}

//! @brief Destroys an \c uninitialized_async_buffer and deallocates the buffer in stream order on the stream that was
//! used to create the buffer.
//! @warning The destructor does not destroy any objects that may or may not reside within the buffer. It is the
//! user's responsibility to ensure that all objects within the buffer have been properly destroyed.
~uninitialized_async_buffer()
_CCCL_HIDE_FROM_ABI ~uninitialized_async_buffer()
{
if (__buf_)
{
__mr_.deallocate_async(__buf_, __get_allocation_size(__count_), __stream_);
}
}

//! @brief Returns an aligned pointer to the buffer
_CCCL_NODISCARD constexpr pointer begin() const noexcept
//! @brief Returns an aligned pointer to the first element in the buffer
_CCCL_NODISCARD _CCCL_HIDE_FROM_ABI constexpr pointer begin() const noexcept
{
return __get_data();
}

//! @brief Returns an aligned pointer to the element following the last element of the buffer.
//! This element acts as a placeholder; attempting to access it results in undefined behavior.
_CCCL_NODISCARD constexpr pointer end() const noexcept
_CCCL_NODISCARD _CCCL_HIDE_FROM_ABI constexpr pointer end() const noexcept
{
return __get_data() + __count_;
}

//! @brief Returns an aligned pointer to the buffer
_CCCL_NODISCARD constexpr pointer data() const noexcept
//! @brief Returns an aligned pointer to the first element in the buffer
_CCCL_NODISCARD _CCCL_HIDE_FROM_ABI constexpr pointer data() const noexcept
{
return __get_data();
}

//! @brief Returns the size of the buffer
_CCCL_NODISCARD constexpr size_type size() const noexcept
_CCCL_NODISCARD _CCCL_HIDE_FROM_ABI constexpr size_type size() const noexcept
{
return __count_;
}

//! @brief Returns the size of the buffer in bytes
_CCCL_NODISCARD constexpr size_type size_bytes() const noexcept
_CCCL_NODISCARD _CCCL_HIDE_FROM_ABI constexpr size_type size_bytes() const noexcept
{
return __count_ * sizeof(_Tp);
}
Expand All @@ -219,21 +250,21 @@ public:
//! Returns a \c const reference to the :ref:`any_async_resource <cudax-memory-resource-any-async-resource>`
//! that holds the memory resource used to allocate the buffer
//! @endrst
_CCCL_NODISCARD const __async_resource& get_resource() const noexcept
_CCCL_NODISCARD _CCCL_HIDE_FROM_ABI const __async_resource& get_resource() const noexcept
{
return __mr_;
}

//! @brief Returns the stored stream
_CCCL_NODISCARD constexpr ::cuda::stream_ref get_stream() const noexcept
_CCCL_NODISCARD _CCCL_HIDE_FROM_ABI constexpr ::cuda::stream_ref get_stream() const noexcept
{
return __stream_;
}

//! @brief Replaces the stored stream
//! @param __new_stream the new stream
//! @note Always synchronizes with the old stream
constexpr void change_stream(::cuda::stream_ref __new_stream)
_CCCL_HIDE_FROM_ABI constexpr void change_stream(::cuda::stream_ref __new_stream)
{
if (__new_stream != __stream_)
{
Expand All @@ -242,22 +273,26 @@ public:
__stream_ = __new_stream;
}

//! @brief Swaps the contents with those of another \c uninitialized_async_buffer
//! @param __other The other \c uninitialized_async_buffer.
constexpr void swap(uninitialized_async_buffer& __other) noexcept
{
_CUDA_VSTD::swap(__mr_, __other.__mr_);
_CUDA_VSTD::swap(__count_, __other.__count_);
_CUDA_VSTD::swap(__buf_, __other.__buf_);
}

# ifndef DOXYGEN_SHOULD_SKIP_THIS // friend functions are currently broken
//! @brief Forwards the passed properties
_LIBCUDACXX_TEMPLATE(class _Property)
_LIBCUDACXX_REQUIRES(
(!property_with_value<_Property>) _LIBCUDACXX_AND _CUDA_VSTD::__is_included_in_v<_Property, _Properties...>)
friend constexpr void get_property(const uninitialized_async_buffer&, _Property) noexcept {}
_CCCL_HIDE_FROM_ABI friend constexpr void get_property(const uninitialized_async_buffer&, _Property) noexcept {}
# endif // DOXYGEN_SHOULD_SKIP_THIS

//! @brief Internal method to grow the allocation to a new size \p __count.
//! @param __count The new size of the allocation.
//! @return An \c uninitialized_async_buffer that holds the previous allocation
//! @warning This buffer must outlive the returned buffer
_CCCL_HIDE_FROM_ABI uninitialized_async_buffer __replace_allocation(const size_t __count)
{
// Create a new buffer with a reference to the stored memory resource and swap allocation information
uninitialized_async_buffer __ret{_CUDA_VMR::async_resource_ref<_Properties...>{__mr_}, __stream_, __count};
_CUDA_VSTD::swap(__count_, __ret.__count_);
_CUDA_VSTD::swap(__buf_, __ret.__buf_);
return __ret;
}
};

template <class _Tp>
Expand Down
Loading
Loading