Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions cudax/include/cuda/experimental/__execution/policy.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
# pragma system_header
#endif // no system header

#include <cuda/__stream/get_stream.h>
#include <cuda/__stream/stream_ref.h>
#include <cuda/std/__concepts/concept_macros.h>
#include <cuda/std/__execution/env.h>
#include <cuda/std/__execution/policy.h>
Expand All @@ -45,8 +47,10 @@ struct any_execution_policy
_CCCL_HIDE_FROM_ABI any_execution_policy() = default;

template <uint32_t _Policy>
_CCCL_HOST_API constexpr any_execution_policy(::cuda::std::execution::__execution_policy_base<_Policy>) noexcept
_CCCL_HOST_API constexpr any_execution_policy(
const ::cuda::std::execution::__execution_policy_base<_Policy>& __pol) noexcept
: value(value_type{_Policy})
, stream(::cuda::std::execution::__query_or(__pol, ::cuda::get_stream, ::cuda::stream_ref{cudaStreamPerThread}))
{}

_CCCL_HOST_API constexpr operator __execution_policy() const noexcept
Expand Down Expand Up @@ -89,7 +93,15 @@ struct any_execution_policy
}
#endif // _CCCL_STD_VER <= 2017

__execution_policy value = __execution_policy::__invalid_execution_policy;
#if _CCCL_HAS_CTK() && !_CCCL_COMPILER(NVRTC)
[[nodiscard]] _CCCL_HOST_API ::cuda::stream_ref get_stream() const noexcept
{
return stream;
}
#endif // _CCCL_HAS_CTK() && !_CCCL_COMPILER(NVRTC)

__execution_policy value = __execution_policy::__invalid_execution_policy;
::cuda::stream_ref stream = ::cuda::stream_ref{cudaStreamPerThread};
};

struct get_execution_policy_t;
Expand Down
189 changes: 188 additions & 1 deletion libcudacxx/include/cuda/__execution/policy.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,204 @@
#if _CCCL_HAS_BACKEND_CUDA()

# include <cuda/__fwd/execution_policy.h>
# include <cuda/__memory_resource/device_memory_pool.h>
# include <cuda/__memory_resource/get_memory_resource.h>
# include <cuda/__memory_resource/resource.h>
# include <cuda/__stream/get_stream.h>
# include <cuda/__stream/stream_ref.h>
# include <cuda/std/__execution/policy.h>
# include <cuda/std/__type_traits/is_execution_policy.h>
# include <cuda/std/__utility/forward.h>

# include <cuda/std/__cccl/prologue.h>

_CCCL_BEGIN_NAMESPACE_CUDA_STD_EXECUTION

template <bool _HasStream>
struct __policy_stream_holder
{
::cuda::stream_ref __stream_;

_CCCL_HOST_API constexpr __policy_stream_holder(::cuda::stream_ref __stream) noexcept
: __stream_(__stream)
{}
};

template <>
struct __policy_stream_holder<false>
{
_CCCL_HIDE_FROM_ABI __policy_stream_holder() = default;

//! @brief Dummy constructor to simplify implementation of the cuda policy
_CCCL_HOST_API constexpr __policy_stream_holder(::cuda::stream_ref) noexcept {}
};

template <bool _HasResource>
struct __policy_memory_resource_holder
{
using __resource_t = ::cuda::mr::any_resource<::cuda::mr::device_accessible>;

__resource_t __resource_;

_CCCL_TEMPLATE(class _Resource)
_CCCL_REQUIRES(::cuda::mr::resource_with<_Resource, ::cuda::mr::device_accessible>)
_CCCL_HOST_API constexpr __policy_memory_resource_holder(_Resource&& __resource) noexcept
: __resource_(::cuda::std::forward<_Resource>(__resource))
{}
};

template <>
struct __policy_memory_resource_holder<false>
{
_CCCL_HIDE_FROM_ABI __policy_memory_resource_holder() = default;

//! @brief Dummy constructor to simplify implementation of the cuda policy
_CCCL_TEMPLATE(class _Resource)
_CCCL_REQUIRES(::cuda::mr::resource_with<_Resource, ::cuda::mr::device_accessible>)
_CCCL_HOST_API constexpr __policy_memory_resource_holder(_Resource&&) noexcept {}
};

template <uint32_t _Policy>
struct _CCCL_DECLSPEC_EMPTY_BASES __execution_policy_base<_Policy, __execution_backend::__cuda>
: __execution_policy_base<_Policy, __execution_backend::__none>
{};
, protected __policy_stream_holder<__cuda_policy_with_stream<_Policy>>
, protected __policy_memory_resource_holder<__cuda_policy_with_memory_resource<_Policy>>
{
private:
template <uint32_t, __execution_backend>
friend struct __execution_policy_base;

using __stream_holder = __policy_stream_holder<__cuda_policy_with_stream<_Policy>>;
using __resource_holder = __policy_memory_resource_holder<__cuda_policy_with_memory_resource<_Policy>>;

template <uint32_t _OtherPolicy>
_CCCL_HOST_API constexpr __execution_policy_base(
const __execution_policy_base<_OtherPolicy, __execution_backend::__cuda>& __policy) noexcept
: __stream_holder(__policy.query(::cuda::get_stream))
, __resource_holder(__policy.query(::cuda::mr::get_memory_resource))
{}

template <uint32_t _OtherPolicy>
_CCCL_HOST_API constexpr __execution_policy_base(
const __execution_policy_base<_OtherPolicy, __execution_backend::__cuda>& __policy,
::cuda::stream_ref __stream) noexcept
: __stream_holder(__stream)
, __resource_holder(__policy.query(::cuda::mr::get_memory_resource))
{}

template <uint32_t _OtherPolicy, class _Resource>
_CCCL_HOST_API constexpr __execution_policy_base(
const __execution_policy_base<_OtherPolicy, __execution_backend::__cuda>& __policy, _Resource&& __resource) noexcept
: __stream_holder(__policy.query(::cuda::get_stream))
, __resource_holder(::cuda::std::forward<_Resource>(__resource))
{}

public:
_CCCL_HIDE_FROM_ABI constexpr __execution_policy_base() noexcept = default;

//! @brief Convert to a policy that holds a stream
//! @note This cannot be merged with the other case where we already have a stream as this needs to be const qualified
_CCCL_TEMPLATE(bool _WithStream = __cuda_policy_with_stream<_Policy>)
_CCCL_REQUIRES((!_WithStream))
[[nodiscard]] _CCCL_HOST_API auto set_stream(::cuda::stream_ref __stream) const noexcept
{
constexpr uint32_t __new_policy = __set_cuda_backend_option<_Policy, __cuda_backend_options::__with_stream>;
return __execution_policy_base<__new_policy>{*this, __stream};
}

//! @brief Set the current stream
_CCCL_TEMPLATE(bool _WithStream = __cuda_policy_with_stream<_Policy>)
_CCCL_REQUIRES(_WithStream)
[[nodiscard]] _CCCL_HOST_API __execution_policy_base& set_stream(::cuda::stream_ref __stream) noexcept
{
this->__stream_ = __stream;
return *this;
}

//! @brief Return the stream stored in the holder or a default stream
[[nodiscard]] _CCCL_HOST_API ::cuda::stream_ref query(const ::cuda::get_stream_t&) const noexcept
{
if constexpr (__cuda_policy_with_stream<_Policy>)
{
return this->__stream_;
}
else
{
return ::cuda::stream_ref{cudaStreamPerThread};
}
}

//! @brief Set the current memory resource
_CCCL_TEMPLATE(class _Resource, bool _WithResource = __cuda_policy_with_memory_resource<_Policy>)
_CCCL_REQUIRES(::cuda::mr::resource_with<_Resource, ::cuda::mr::device_accessible> _CCCL_AND _WithResource)
[[nodiscard]] _CCCL_HOST_API __execution_policy_base& set_memory_resource(_Resource&& __resource) noexcept
{
this->__resource_ = __resource;
return *this;
}

//! @brief Convert to a policy that holds a memory resource
_CCCL_TEMPLATE(class _Resource, bool _WithResource = __cuda_policy_with_memory_resource<_Policy>)
_CCCL_REQUIRES(::cuda::mr::resource_with<_Resource, ::cuda::mr::device_accessible> _CCCL_AND(!_WithResource))
[[nodiscard]] _CCCL_HOST_API auto set_memory_resource(_Resource&& __resource) const noexcept
{
constexpr uint32_t __new_policy =
__set_cuda_backend_option<_Policy, __cuda_backend_options::__with_memory_resource>;
return __execution_policy_base<__new_policy>{*this, __resource};
}

//! @brief Return either a stored or a default memory resource
//! @note We cannot put that into the __policy_memory_resource_holder because we need a stream for the device
[[nodiscard]] _CCCL_HOST_API auto query(const ::cuda::mr::get_memory_resource_t&) const noexcept
{
if constexpr (__cuda_policy_with_memory_resource<_Policy>)
{
return this->__resource_;
}
else
{
::cuda::stream_ref __stream = this->query(::cuda::get_stream);
return ::cuda::device_default_memory_pool(__stream.device());
}
}

template <uint32_t _OtherPolicy, __execution_backend _OtherBackend>
[[nodiscard]] _CCCL_API friend constexpr bool operator==(
const __execution_policy_base& __lhs, const __execution_policy_base<_OtherPolicy, _OtherBackend>& __rhs) noexcept
{
if constexpr (_Policy != _OtherPolicy)
{
return false;
}

if constexpr (__cuda_policy_with_stream<_Policy>)
{
if (__lhs.query(::cuda::get_stream) != __rhs.query(::cuda::get_stream))
{
return false;
}
}

if constexpr (__cuda_policy_with_memory_resource<_Policy>)
{
if (__lhs.query(::cuda::mr::get_memory_resource) != __rhs.query(::cuda::mr::get_memory_resource))
{
return false;
}
}

return true;
}

# if _CCCL_STD_VER <= 2017
template <uint32_t _OtherPolicy, __execution_backend _OtherBackend>
[[nodiscard]] _CCCL_API friend constexpr bool operator!=(
const __execution_policy_base& __lhs, const __execution_policy_base<_OtherPolicy, _OtherBackend>& __rhs) noexcept
{
return !(__lhs == __rhs);
}
# endif // _CCCL_STD_VER <= 2017
};

_CCCL_END_NAMESPACE_CUDA_STD_EXECUTION

Expand Down
26 changes: 26 additions & 0 deletions libcudacxx/include/cuda/__fwd/execution_policy.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,12 @@

_CCCL_BEGIN_NAMESPACE_CUDA_STD_EXECUTION

enum __cuda_backend_options : uint16_t
{
__with_stream = 1 << 0, ///> Determines whether the policy holds a stream
__with_memory_resource = 1 << 1, ///> Determines whether the policy holds a memory resource
};

//! @brief Sets the execution backend to cuda
template <uint32_t _Policy>
[[nodiscard]] _CCCL_API constexpr uint32_t __with_cuda_backend() noexcept
Expand All @@ -38,6 +44,26 @@ template <uint32_t _Policy>
return __new_policy;
}

//! @brief Backend specific options of the CUDA backend
template <uint32_t _Policy>
inline constexpr __cuda_backend_options __policy_to_cuda_backend_options =
static_cast<__cuda_backend_options>((_Policy & uint32_t{0xFFFF0000}) >> 16);

//! @brief Sets a backend specific option
template <uint32_t _Policy, __cuda_backend_options __option>
inline constexpr uint32_t __set_cuda_backend_option =
_Policy | static_cast<uint32_t>(static_cast<uint32_t>(__option) << 16);

//! @brief Detects whether a given policy holds a user provided stream
template <uint32_t _Policy>
inline constexpr bool __cuda_policy_with_stream =
__policy_to_cuda_backend_options<_Policy> & __cuda_backend_options::__with_stream;

//! @brief Detects whether a given policy holds a user provided memory resource
template <uint32_t _Policy>
inline constexpr bool __cuda_policy_with_memory_resource =
__policy_to_cuda_backend_options<_Policy> & __cuda_backend_options::__with_memory_resource;

_CCCL_END_NAMESPACE_CUDA_STD_EXECUTION

# include <cuda/std/__cccl/epilogue.h>
Expand Down
1 change: 1 addition & 0 deletions libcudacxx/include/cuda/std/__numeric/reduce.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@

_CCCL_BEGIN_NAMESPACE_CUDA_STD

_CCCL_EXEC_CHECK_DISABLE
template <class _InputIterator, class _Tp, class _BinaryOp>
[[nodiscard]] _CCCL_API constexpr _Tp reduce(_InputIterator __first, _InputIterator __last, _Tp __init, _BinaryOp __b)
{
Expand Down
5 changes: 4 additions & 1 deletion libcudacxx/include/cuda/std/__pstl/cuda/for_each_n.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,12 @@

# include <cuda/__execution/policy.h>
# include <cuda/__runtime/api_wrapper.h>
# include <cuda/__stream/get_stream.h>
# include <cuda/__stream/stream_ref.h>
# include <cuda/std/__algorithm/for_each_n.h>
# include <cuda/std/__exception/cuda_error.h>
# include <cuda/std/__exception/terminate.h>
# include <cuda/std/__execution/env.h>
# include <cuda/std/__execution/policy.h>
# include <cuda/std/__iterator/iterator_traits.h>
# include <cuda/std/__pstl/dispatch.h>
Expand All @@ -54,7 +56,8 @@ struct __pstl_dispatch<__pstl_algorithm::__for_each_n, __execution_backend::__cu
__par_impl([[maybe_unused]] _Policy __policy, _Iter __first, _Size __orig_n, _Fn __func) noexcept
{
const auto __count = ::cuda::std::__convert_to_integral(__orig_n);
::cuda::stream_ref __stream{cudaStreamPerThread};

auto __stream = __policy.query(::cuda::get_stream);

_CCCL_TRY_CUDA_API(
::cub::DeviceFor::ForEachN,
Expand Down
Loading