diff --git a/docs/libcudacxx/extended_api/mdspan.rst b/docs/libcudacxx/extended_api/mdspan.rst index 028ac918aec..f0b29f3d1d0 100644 --- a/docs/libcudacxx/extended_api/mdspan.rst +++ b/docs/libcudacxx/extended_api/mdspan.rst @@ -10,6 +10,8 @@ Mdspan mdspan/host_device_accessor mdspan/restrict_accessor mdspan/shared_memory_accessor + mdspan/mdspan_to_dlpack + mdspan/dlpack_to_mdspan .. list-table:: :widths: 25 45 30 30 @@ -34,3 +36,13 @@ Mdspan - ``mdspan`` and accessor for CUDA shared memory - CCCL 3.2.0 - CUDA 13.2 + + * - :ref:`mdspan to dlpack ` + - Convert a ``mdspan`` to a ``DLTensor`` + - CCCL 3.2.0 + - CUDA 13.2 + + * - :ref:`dlpack to mdspan ` + - Convert a ``DLTensor`` to a ``mdspan`` + - CCCL 3.2.0 + - CUDA 13.2 diff --git a/docs/libcudacxx/extended_api/mdspan/dlpack_to_mdspan.rst b/docs/libcudacxx/extended_api/mdspan/dlpack_to_mdspan.rst new file mode 100644 index 00000000000..61497f6621d --- /dev/null +++ b/docs/libcudacxx/extended_api/mdspan/dlpack_to_mdspan.rst @@ -0,0 +1,129 @@ +.. _libcudacxx-extended-api-mdspan-dlpack-to-mdspan: + +DLPack to ``mdspan`` +==================== + +This functionality provides a conversion from `DLPack `__ ``DLTensor`` to ``cuda::host_mdspan``, ``cuda::device_mdspan``, and ``cuda::managed_mdspan``. + +Defined in the ```` header. + +Conversion functions +-------------------- + +.. code:: cuda + + namespace cuda { + + template + [[nodiscard]] cuda::host_mdspan, LayoutPolicy> + to_host_mdspan(const DLTensor& tensor); + + template + [[nodiscard]] cuda::device_mdspan, LayoutPolicy> + to_device_mdspan(const DLTensor& tensor); + + template + [[nodiscard]] cuda::managed_mdspan, LayoutPolicy> + to_managed_mdspan(const DLTensor& tensor); + + } // namespace cuda + +Template parameters +------------------- + +- ``ElementType``: The element type of the resulting ``mdspan``. Must match the ``DLTensor::dtype``. +- ``Rank``: The number of dimensions. Must match ``DLTensor::ndim``. +- ``LayoutPolicy``: The layout policy for the resulting ``mdspan``. Defaults to ``cuda::std::layout_stride``. Supported layouts are: + + - ``cuda::std::layout_right`` (C-contiguous, row-major) + - ``cuda::std::layout_left`` (Fortran-contiguous, column-major) + - ``cuda::std::layout_stride`` (general strided layout) + +Semantics +--------- + +The conversion produces a non-owning ``mdspan`` view of the ``DLTensor`` data: + +- The ``mdspan`` data pointer is computed as ``static_cast(tensor.data) + tensor.byte_offset``. +- For ``rank > 0``, ``mdspan.extent(i)`` is ``tensor.shape[i]``. +- For ``layout_stride``, ``mdspan.stride(i)`` is ``tensor.strides[i]`` (or computed as row-major if ``strides`` is ``nullptr`` for DLPack < v1.2). +- The device type is validated: + + - ``kDLCPU`` for ``to_host_mdspan`` + - ``kDLCUDA`` for ``to_device_mdspan`` + - ``kDLCUDAManaged`` for ``to_managed_mdspan`` + +Constraints +----------- + +- ``LayoutPolicy`` must be one of ``cuda::std::layout_right``, ``cuda::std::layout_left``, or ``cuda::std::layout_stride``. +- For ``layout_right`` and ``layout_left``, the ``DLTensor`` strides must be compatible with the layout. + +Runtime errors +-------------- + +The conversion throws ``std::invalid_argument`` in the following cases: + +- ``DLTensor::ndim`` does not match the specified ``Rank``. +- ``DLTensor::dtype`` does not match ``ElementType``. +- ``DLTensor::data`` is ``nullptr``. +- ``DLTensor::shape`` is ``nullptr`` (for rank > 0). +- Any ``DLTensor::shape[i]`` is negative. +- ``DLTensor::strides`` is ``nullptr`` for DLPack v1.2 or later. +- ``DLTensor::strides`` is ``nullptr`` for ``layout_left`` with rank > 1 (DLPack < v1.2). +- ``DLTensor::strides[i]`` is not positive for ``layout_stride``. +- ``DLTensor::strides`` are not compatible with the requested ``layout_right`` or ``layout_left``. +- ``DLTensor::device.device_type`` does not match the target mdspan type. +- Data pointer is not properly aligned for the element type. + +Availability notes +------------------ + +- This API is available only when DLPack header is present, namely ```` is found in the include path. +- Requires DLPack major version 1. + +References +---------- + +- `DLPack C API `__ documentation. + +Example +------- + +.. code:: cuda + + #include + #include + #include + #include + + int main() { + int data[6] = {0, 1, 2, 3, 4, 5}; + + // Create a DLTensor manually for demonstration + int64_t shape[2] = {2, 3}; + int64_t strides[2] = {3, 1}; // row-major strides + + DLTensor tensor{}; + tensor.data = data; + tensor.device = {kDLCPU, 0}; + tensor.ndim = 2; + tensor.dtype = {kDLInt, 32, 1}; + tensor.shape = shape; + tensor.strides = strides; + tensor.byte_offset = 0; + + // Convert to host_mdspan + auto md = cuda::to_host_mdspan(tensor); + + assert(md.rank() == 2); + assert(md.extent(0) == 2 && md.extent(1) == 3); + assert(md.stride(0) == 3 && md.stride(1) == 1); + assert(md.data_handle() == data); + assert(md(0, 0) == 0 && md(1, 2) == 5); + } + +See also +-------- + +- :ref:`libcudacxx-extended-api-mdspan-mdspan-to-dlpack` for the reverse conversion. diff --git a/docs/libcudacxx/extended_api/mdspan/mdspan_to_dlpack.rst b/docs/libcudacxx/extended_api/mdspan/mdspan_to_dlpack.rst new file mode 100644 index 00000000000..bb04aae5002 --- /dev/null +++ b/docs/libcudacxx/extended_api/mdspan/mdspan_to_dlpack.rst @@ -0,0 +1,137 @@ +.. _libcudacxx-extended-api-mdspan-mdspan-to-dlpack: + +``mdspan`` to DLPack +==================== + +This functionality provides a conversion from ``cuda::host_mdspan``, ``cuda::device_mdspan``, and ``cuda::managed_mdspan`` to `DLPack `__ ``DLTensor`` view. + +Defined in the ```` header. + +Conversion functions +-------------------- + +.. code:: cuda + + namespace cuda { + + template + [[nodiscard]] __dlpack_tensor + to_dlpack(const cuda::host_mdspan& mdspan); + + template + [[nodiscard]] __dlpack_tensor + to_dlpack(const cuda::device_mdspan& mdspan, + cuda::device_ref device = cuda::device_ref{0}); + + template + [[nodiscard]] __dlpack_tensor + to_dlpack(const cuda::managed_mdspan& mdspan); + + } // namespace cuda + +Types +----- + +``__dlpack_tensor`` is an internal class that stores a ``DLTensor`` and owns the backing storage for its ``shape`` and ``strides`` pointers. The class does not use any heap allocation. + +.. code:: cuda + + namespace cuda { + + template + class __dlpack_tensor { + public: + __dlpack_tensor(); + __dlpack_tensor(const __dlpack_tensor&) noexcept; + __dlpack_tensor(__dlpack_tensor&&) noexcept; + __dlpack_tensor& operator=(const __dlpack_tensor&) noexcept; + __dlpack_tensor& operator=(__dlpack_tensor&&) noexcept; + ~__dlpack_tensor() noexcept = default; + + DLTensor& get() noexcept; + const DLTensor& get() const noexcept; + }; + + } // namespace cuda + +``cuda::__dlpack_tensor`` stores a ``DLTensor`` and owns the backing storage for its ``shape`` and ``strides`` pointers. The class does not use any heap allocation. + +.. note:: **Lifetime** + + The ``DLTensor`` associated with ``cuda::__dlpack_tensor`` must not outlive the wrapper. If the wrapper is destroyed, the returned ``DLTensor::shape`` and ``DLTensor::strides`` pointers will dangle. + +.. note:: **Const-correctness** + + ``DLTensor::data`` points at ``mdspan.data_handle()`` (or is ``nullptr`` if ``mdspan.size() == 0``). If ``T`` is ``const``, the pointer is ``const_cast``'d because ``DLTensor::data`` is unqualified. + +Semantics +--------- + +The conversion produces a non-owning DLPack view of the ``mdspan`` data and metadata: + +- ``DLTensor::ndim`` is ``mdspan.rank()``. +- For rank > 0, ``DLTensor::shape[i]`` is ``mdspan.extent(i)``. +- For rank > 0, ``DLTensor::strides[i]`` is ``mdspan.stride(i)``. +- ``DLTensor::byte_offset`` is always ``0``. +- ``DLTensor::device`` is: + + - ``{kDLCPU, 0}`` for ``cuda::host_mdspan`` + - ``{kDLCUDA, device.get()}`` for ``cuda::device_mdspan`` + - ``{kDLCUDAManaged, 0}`` for ``cuda::managed_mdspan`` + +Element types are mapped to ``DLDataType`` according to the DLPack conventions, including: + +- ``bool``. +- Signed and unsigned integers. +- IEEE-754 Floating-point and extended precision floating-point, including ``__half``, ``__nv_bfloat16``, ``__float128``, FP8, FP6, FP4 when available. +- Complex: ``cuda::std::complex<__half>``, ``cuda::std::complex``, and ``cuda::std::complex``. +- `CUDA built-in vector types `__, such as ``int2``, ``float4``, etc. +- Vector types for extended floating-point, such as ``__half2``, ``__nv_fp8x4_e4m3``, etc. + +Constraints +----------- + +- The accessor ``data_handle_type`` must be a pointer type. + +Runtime errors +-------------- + +- If any ``extent(i)`` or ``stride(i)`` cannot be represented in ``int64_t``, the conversion raises an exception. + +Availability notes +------------------ + +- This API is available only when DLPack header is present, namely ```` is found in the include path. + +References +---------- + +- `DLPack C API `__ documentation. + +Example +------- + +.. code:: cuda + + #include + #include + #include + #include + + int main() { + using extents_t = cuda::std::extents; + + int data[6] = {0, 1, 2, 3, 4, 5}; + cuda::host_mdspan md{data, extents_t{}}; + + auto dl = cuda::to_dlpack(md); + const auto& dltensor = dl.get(); + // auto dltensor = dl.get(); is incorrect; it returns a reference to a temporary object that will be destroyed at the end of the statement. + + // `dl` owns the shape/stride storage; `dltensor.data` is a non-owning pointer to `data`. + assert(dltensor.device.device_type == kDLCPU); + assert(dltensor.ndim == 2); + assert(dltensor.shape[0] == 2 && dltensor.shape[1] == 3); + assert(dltensor.strides[0] == 3 && dltensor.strides[1] == 1); + assert(dltensor.data == data); + } diff --git a/libcudacxx/include/cuda/__mdspan/dlpack_to_mdspan.h b/libcudacxx/include/cuda/__mdspan/dlpack_to_mdspan.h new file mode 100644 index 00000000000..b2dc3640c6b --- /dev/null +++ b/libcudacxx/include/cuda/__mdspan/dlpack_to_mdspan.h @@ -0,0 +1,261 @@ +//===----------------------------------------------------------------------===// +// +// Part of the libcu++ Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef _CUDA___MDSPAN_DLPACK_TO_MDSPAN_H +#define _CUDA___MDSPAN_DLPACK_TO_MDSPAN_H + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#if _CCCL_HAS_DLPACK() + +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include + +# include + +# include +// +# include + +_CCCL_BEGIN_NAMESPACE_CUDA + +template +[[nodiscard]] _CCCL_HOST_API inline bool __validate_dlpack_data_type(const ::DLDataType& __dtype) noexcept +{ + const auto __expected = ::cuda::__data_type_to_dlpack<_ElementType>(); + return __dtype.code == __expected.code && __dtype.bits == __expected.bits && __dtype.lanes == __expected.lanes; +} + +[[nodiscard]] +_CCCL_HOST_API inline ::cuda::std::int64_t __layout_right_stride( + const ::cuda::std::int64_t* __shapes, ::cuda::std::size_t __pos, ::cuda::std::size_t __rank) noexcept +{ + ::cuda::std::int64_t __stride = 1; + for (auto __i = __pos + 1; __i < __rank; ++__i) + { + __stride *= __shapes[__i]; // TODO: check for overflow + } + return __stride; +} + +[[nodiscard]] +_CCCL_HOST_API inline ::cuda::std::int64_t +__layout_left_stride(const ::cuda::std::int64_t* __shapes, ::cuda::std::size_t __pos) noexcept +{ + ::cuda::std::int64_t __stride = 1; + for (::cuda::std::size_t __i = 0; __i < __pos; ++__i) + { + __stride *= __shapes[__i]; // TODO: check for overflow + } + return __stride; +} + +template +_CCCL_HOST_API void __validate_dlpack_strides(const ::DLTensor& __tensor, [[maybe_unused]] ::cuda::std::size_t __rank) +{ + [[maybe_unused]] constexpr bool __is_layout_right = ::cuda::std::is_same_v<_LayoutPolicy, ::cuda::std::layout_right>; + [[maybe_unused]] constexpr bool __is_layout_left = ::cuda::std::is_same_v<_LayoutPolicy, ::cuda::std::layout_left>; + [[maybe_unused]] constexpr bool __is_layout_stride = + ::cuda::std::is_same_v<_LayoutPolicy, ::cuda::std::layout_stride>; + const auto __strides_ptr = __tensor.strides; + if (__strides_ptr == nullptr) + { +# if DLPACK_MAJOR_VERSION > 1 || (DLPACK_MAJOR_VERSION == 1 && DLPACK_MINOR_VERSION >= 2) + _CCCL_THROW(::std::invalid_argument{"strides=nullptr is not supported for DLPack v1.2 and later"}); +# else + // strides == nullptr means row-major (C-contiguous) layout + if (__is_layout_left && __rank > 1) + { + _CCCL_THROW(::std::invalid_argument{"strides must be non-null for layout_left"}); + } + else + { + return; + } +# endif // DLPACK_MAJOR_VERSION > 1 || (DLPACK_MAJOR_VERSION == 1 && DLPACK_MINOR_VERSION >= 2) + } + for (::cuda::std::size_t __pos = 0; __pos < __rank; ++__pos) + { + if constexpr (__is_layout_right) + { + if (__strides_ptr[__pos] != ::cuda::__layout_right_stride(__tensor.shape, __pos, __rank)) + { + _CCCL_THROW(::std::invalid_argument{"DLTensor strides are not compatible with layout_right"}); + } + } + else if constexpr (__is_layout_left) + { + if (__strides_ptr[__pos] != ::cuda::__layout_left_stride(__tensor.shape, __pos)) + { + _CCCL_THROW(::std::invalid_argument{"DLTensor strides are not compatible with layout_left"}); + } + } + else if constexpr (__is_layout_stride) + { + if (__strides_ptr[__pos] <= 0) + { + _CCCL_THROW(::std::invalid_argument{"mdspan strides must be positive"}); + } + } + } +} + +template +[[nodiscard]] +_CCCL_HOST_API ::cuda::std::mdspan<_ElementType, ::cuda::std::dims<_Rank, ::cuda::std::int64_t>, _LayoutPolicy> +__to_mdspan(const ::DLTensor& __tensor) +{ + using __extents_type = ::cuda::std::dims<_Rank, ::cuda::std::int64_t>; + using __mdspan_type = ::cuda::std::mdspan<_ElementType, __extents_type, _LayoutPolicy>; + using __mapping_type = typename _LayoutPolicy::template mapping<__extents_type>; + using __element_type = typename __mdspan_type::element_type; + constexpr bool __is_layout_right = ::cuda::std::is_same_v<_LayoutPolicy, ::cuda::std::layout_right>; + constexpr bool __is_layout_left = ::cuda::std::is_same_v<_LayoutPolicy, ::cuda::std::layout_left>; + constexpr bool __is_layout_stride = ::cuda::std::is_same_v<_LayoutPolicy, ::cuda::std::layout_stride>; + // TODO: add support for layout_right_padded and layout_left_padded + if constexpr (!__is_layout_right && !__is_layout_left && !__is_layout_stride) + { + static_assert(::cuda::std::__always_false_v<_LayoutPolicy>, "Unsupported layout policy"); + _CCCL_UNREACHABLE(); + return __mdspan_type{}; + } + else + { + if (cuda::std::cmp_not_equal(__tensor.ndim, _Rank)) + { + _CCCL_THROW(::std::invalid_argument{"DLTensor rank does not match expected rank"}); + } + if (!::cuda::__validate_dlpack_data_type<__element_type>(__tensor.dtype)) + { + _CCCL_THROW(::std::invalid_argument{"DLTensor data type does not match expected type"}); + } + if (__tensor.data == nullptr) + { + _CCCL_THROW(::std::invalid_argument{"DLTensor data must be non-null"}); + } + auto __base_data = static_cast(__tensor.data) + __tensor.byte_offset; + auto __data = reinterpret_cast<__element_type*>(__base_data); + const auto __datatype_size = __tensor.dtype.bits * __tensor.dtype.lanes / 8; + // this is not the exact solution because data type size != data type alignment. + // However, it always works for the supported data types. + if (__datatype_size > 0 && !::cuda::is_aligned(__data, __datatype_size)) + { + _CCCL_THROW(::std::invalid_argument{"DLTensor data must be aligned to the data type"}); + } + if constexpr (_Rank == 0) + { + return __mdspan_type{__data, __mapping_type{}}; + } + else // Rank > 0 + { + if (__tensor.shape == nullptr) + { + _CCCL_THROW(::std::invalid_argument{"DLTensor shape must be non-null"}); + } + using ::cuda::std::int64_t; + using ::cuda::std::size_t; + ::cuda::std::array __extents_array{}; + for (size_t __i = 0; __i < _Rank; ++__i) + { + if (__tensor.shape[__i] < 0) + { + _CCCL_THROW(::std::invalid_argument{"DLTensor shape must be positive"}); + } + __extents_array[__i] = __tensor.shape[__i]; + } + ::cuda::__validate_dlpack_strides<_LayoutPolicy>(__tensor, _Rank); + if constexpr (__is_layout_stride) + { + ::cuda::std::array __strides_array{}; + for (size_t __i = 0; __i < _Rank; ++__i) + { + const bool __has_strides = __tensor.strides != nullptr; + __strides_array[__i] = + __has_strides ? __tensor.strides[__i] : ::cuda::__layout_right_stride(__tensor.shape, __i, _Rank); + } + return __mdspan_type{__data, __mapping_type{__extents_array, __strides_array}}; + } + else + { + __extents_type __extents{__extents_array}; + return __mdspan_type{__data, __extents}; + } + } + } +} + +/*********************************************************************************************************************** + * Public API + **********************************************************************************************************************/ + +template +[[nodiscard]] +_CCCL_HOST_API ::cuda::host_mdspan<_ElementType, ::cuda::std::dims<_Rank, ::cuda::std::int64_t>, _LayoutPolicy> +to_host_mdspan(const ::DLTensor& __tensor) +{ + if (__tensor.device.device_type != ::kDLCPU) + { + _CCCL_THROW(::std::invalid_argument{"DLTensor device type must be kDLCPU for host_mdspan"}); + } + using __extents_type = ::cuda::std::dims<_Rank, ::cuda::std::int64_t>; + using __mdspan_type = ::cuda::host_mdspan<_ElementType, __extents_type, _LayoutPolicy>; + return __mdspan_type{::cuda::__to_mdspan<_ElementType, _Rank, _LayoutPolicy>(__tensor)}; +} + +template +[[nodiscard]] +_CCCL_HOST_API ::cuda::device_mdspan<_ElementType, ::cuda::std::dims<_Rank, ::cuda::std::int64_t>, _LayoutPolicy> +to_device_mdspan(const ::DLTensor& __tensor) +{ + if (__tensor.device.device_type != ::kDLCUDA) + { + _CCCL_THROW(::std::invalid_argument{"DLTensor device type must be kDLCUDA for device_mdspan"}); + } + using __extents_type = ::cuda::std::dims<_Rank, ::cuda::std::int64_t>; + using __mdspan_type = ::cuda::device_mdspan<_ElementType, __extents_type, _LayoutPolicy>; + return __mdspan_type{::cuda::__to_mdspan<_ElementType, _Rank, _LayoutPolicy>(__tensor)}; +} + +template +[[nodiscard]] +_CCCL_HOST_API ::cuda::managed_mdspan<_ElementType, ::cuda::std::dims<_Rank, ::cuda::std::int64_t>, _LayoutPolicy> +to_managed_mdspan(const ::DLTensor& __tensor) +{ + if (__tensor.device.device_type != ::kDLCUDAManaged) + { + _CCCL_THROW(::std::invalid_argument{"DLTensor device type must be kDLCUDAManaged for managed_mdspan"}); + } + using __extents_type = ::cuda::std::dims<_Rank, ::cuda::std::int64_t>; + using __mdspan_type = ::cuda::managed_mdspan<_ElementType, __extents_type, _LayoutPolicy>; + return __mdspan_type{::cuda::__to_mdspan<_ElementType, _Rank, _LayoutPolicy>(__tensor)}; +} + +_CCCL_END_NAMESPACE_CUDA + +# include + +#endif // __CCCL_HAS_DLPACK() +#endif // _CUDA___MDSPAN_DLPACK_TO_MDSPAN_H diff --git a/libcudacxx/include/cuda/__mdspan/mdspan_to_dlpack.h b/libcudacxx/include/cuda/__mdspan/mdspan_to_dlpack.h new file mode 100644 index 00000000000..b5bdba1c80a --- /dev/null +++ b/libcudacxx/include/cuda/__mdspan/mdspan_to_dlpack.h @@ -0,0 +1,303 @@ +//===----------------------------------------------------------------------===// +// +// Part of the libcu++ Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef _CUDA___MDSPAN_MDSPAN_TO_DLPACK_H +#define _CUDA___MDSPAN_MDSPAN_TO_DLPACK_H + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#if !_CCCL_COMPILER(NVRTC) && _CCCL_HAS_INCLUDE() + +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include + +# include + +# include +// +# include + +_CCCL_BEGIN_NAMESPACE_CUDA + +static_assert(DLPACK_MAJOR_VERSION == 1, "DLPACK_MAJOR_VERSION must be 1"); + +template +[[nodiscard]] _CCCL_HOST_API inline ::DLDataType __data_type_to_dlpack() noexcept +{ + if constexpr (::cuda::std::is_same_v<_ElementType, bool>) + { + return ::DLDataType{::kDLBool, 8, 1}; + } + //-------------------------------------------------------------------------------------------------------------------- + // Signed integer types + else if constexpr (::cuda::std::__cccl_is_integer_v<_ElementType>) + { + return ::DLDataType{ + (::cuda::std::is_signed_v<_ElementType>) ? ::kDLInt : ::kDLUInt, ::cuda::std::__num_bits_v<_ElementType>, 1}; + } + //-------------------------------------------------------------------------------------------------------------------- + // bfloat16 (must come before general floating-point) +# if _CCCL_HAS_NVBF16() + else if constexpr (::cuda::std::is_same_v<_ElementType, ::__nv_bfloat16>) + { + return ::DLDataType{::kDLBfloat, 16, 1}; + } +# endif // _CCCL_HAS_NVBF16() + //-------------------------------------------------------------------------------------------------------------------- + // Low-precision Floating-point types (must come before general floating-point) +# if _CCCL_HAS_NVFP8_E4M3() + else if constexpr (::cuda::std::is_same_v<_ElementType, ::__nv_fp8_e4m3>) + { + return ::DLDataType{::kDLFloat8_e4m3fn, 8, 1}; + } +# endif // _CCCL_HAS_NVFP8_E4M3() +# if _CCCL_HAS_NVFP8_E5M2() + else if constexpr (::cuda::std::is_same_v<_ElementType, ::__nv_fp8_e5m2>) + { + return ::DLDataType{::kDLFloat8_e5m2, 8, 1}; + } +# endif // _CCCL_HAS_NVFP8_E5M2() +# if _CCCL_HAS_NVFP8_E8M0() + else if constexpr (::cuda::std::is_same_v<_ElementType, ::__nv_fp8_e8m0>) + { + return ::DLDataType{::kDLFloat8_e8m0fnu, 8, 1}; + } +# endif // _CCCL_HAS_NVFP8_E8M0() +# if _CCCL_HAS_NVFP6_E2M3() + else if constexpr (::cuda::std::is_same_v<_ElementType, ::__nv_fp6_e2m3>) + { + return ::DLDataType{::kDLFloat6_e2m3fn, 6, 1}; + } +# endif // _CCCL_HAS_NVFP6_E2M3() +# if _CCCL_HAS_NVFP6_E3M2() + else if constexpr (::cuda::std::is_same_v<_ElementType, ::__nv_fp6_e3m2>) + { + return ::DLDataType{::kDLFloat6_e3m2fn, 6, 1}; + } +# endif // _CCCL_HAS_NVFP6_E3M2() +# if _CCCL_HAS_NVFP4_E2M1() + else if constexpr (::cuda::std::is_same_v<_ElementType, ::__nv_fp4_e2m1>) + { + return ::DLDataType{::kDLFloat4_e2m1fn, 4, 1}; + } +# endif // _CCCL_HAS_NVFP4_E2M1() + //-------------------------------------------------------------------------------------------------------------------- + // Floating-point types (after specific types) + else if constexpr (::cuda::is_floating_point_v<_ElementType>) + { + return ::DLDataType{::kDLFloat, ::cuda::std::__num_bits_v<_ElementType>, 1}; + } + //-------------------------------------------------------------------------------------------------------------------- + // Complex types + // 256-bit data types are not supported in DLPack, e.g. cuda::std::complex<__float128> + else if constexpr (::cuda::std::__is_cuda_std_complex_v<_ElementType> && sizeof(_ElementType) <= sizeof(double) * 2) + { + // DLPack encodes complex numbers as a compact struct of two scalar values, and `bits` stores + // the size of the full complex number (e.g. std::complex => bits=64). + return ::DLDataType{::kDLComplex, sizeof(_ElementType) * CHAR_BIT, 1}; + } + //-------------------------------------------------------------------------------------------------------------------- + // CUDA built-in vector types +# if _CCCL_HAS_CTK() + else if constexpr (::cuda::__is_vector_type_v<_ElementType> || ::cuda::__is_extended_fp_vector_type_v<_ElementType>) + { + constexpr ::cuda::std::uint16_t __lanes = ::cuda::std::tuple_size_v<_ElementType>; + if constexpr (__lanes == 2 || __lanes == 4) + { + using __scalar_t = ::cuda::std::remove_cv_t<::cuda::std::tuple_element_t<0, _ElementType>>; + auto __scalar = ::cuda::__data_type_to_dlpack<__scalar_t>(); + __scalar.lanes = __lanes; + return __scalar; + } + else + { + static_assert(::cuda::std::__always_false_v<_ElementType>, "Unsupported vector type"); + return ::DLDataType{}; + } + } +# endif // _CCCL_HAS_CTK() + //-------------------------------------------------------------------------------------------------------------------- + // Unsupported types + else + { + static_assert(::cuda::std::__always_false_v<_ElementType>, "Unsupported type"); + return ::DLDataType{}; + } +} + +template <::cuda::std::size_t _Rank> +class __dlpack_tensor +{ + ::cuda::std::array<::cuda::std::int64_t, _Rank> __shape{}; + ::cuda::std::array<::cuda::std::int64_t, _Rank> __strides{}; + ::DLTensor __tensor{}; + + _CCCL_HOST_API void __update_tensor() noexcept + { + __tensor.shape = _Rank > 0 ? __shape.data() : nullptr; + __tensor.strides = _Rank > 0 ? __strides.data() : nullptr; + } + +public: + _CCCL_HOST_API explicit __dlpack_tensor() noexcept + { + __update_tensor(); + } + + _CCCL_HOST_API __dlpack_tensor(const __dlpack_tensor& __other) noexcept + : __shape{__other.__shape} + , __strides{__other.__strides} + , __tensor{__other.__tensor} + { + __update_tensor(); + } + + _CCCL_HOST_API __dlpack_tensor(__dlpack_tensor&& __other) noexcept + : __shape{::cuda::std::move(__other.__shape)} + , __strides{::cuda::std::move(__other.__strides)} + , __tensor{__other.__tensor} + { + __other.__tensor = ::DLTensor{}; + __update_tensor(); + } + + _CCCL_HOST_API __dlpack_tensor& operator=(const __dlpack_tensor& __other) noexcept + { + if (this == &__other) + { + return *this; + } + __shape = __other.__shape; + __strides = __other.__strides; + __tensor = __other.__tensor; + __update_tensor(); + return *this; + } + + _CCCL_HOST_API __dlpack_tensor& operator=(__dlpack_tensor&& __other) noexcept + { + if (this == &__other) + { + return *this; + } + __shape = ::cuda::std::move(__other.__shape); + __strides = ::cuda::std::move(__other.__strides); + __tensor = __other.__tensor; + __other.__tensor = ::DLTensor{}; + __update_tensor(); + return *this; + } + + _CCCL_HIDE_FROM_ABI ~__dlpack_tensor() noexcept = default; + + [[nodiscard]] _CCCL_HOST_API ::DLTensor& get() noexcept + { + return __tensor; + } + + [[nodiscard]] _CCCL_HOST_API const ::DLTensor& get() const noexcept + { + return __tensor; + } +}; + +template +[[nodiscard]] _CCCL_HOST_API __dlpack_tensor<_Extents::rank()> +__to_dlpack(const ::cuda::std::mdspan<_ElementType, _Extents, _Layout, _Accessor>& __mdspan, + ::DLDeviceType __device_type, + int __device_id) +{ + static_assert(::cuda::std::is_pointer_v, "data_handle_type must be a pointer"); + using __element_type = ::cuda::std::remove_cv_t<_ElementType>; + __dlpack_tensor<_Extents::rank()> __wrapper{}; + auto& __tensor = __wrapper.get(); + __tensor.data = __mdspan.size() > 0 ? const_cast<__element_type*>(__mdspan.data_handle()) : nullptr; + __tensor.device = ::DLDevice{__device_type, __device_id}; + __tensor.ndim = static_cast(__mdspan.rank()); + __tensor.dtype = ::cuda::__data_type_to_dlpack<::cuda::std::remove_cv_t<_ElementType>>(); + if constexpr (_Extents::rank() > 0) + { + constexpr auto __max_extent = ::cuda::std::numeric_limits<::cuda::std::int64_t>::max(); + for (::cuda::std::size_t __i = 0; __i < __mdspan.rank(); ++__i) + { + if (::cuda::std::cmp_greater(__mdspan.extent(__i), __max_extent)) + { + _CCCL_THROW(::std::invalid_argument{"Extent is too large"}); + } + if (::cuda::std::cmp_greater(__mdspan.stride(__i), __max_extent)) + { + _CCCL_THROW(::std::invalid_argument{"Stride is too large"}); + } + __tensor.shape[__i] = static_cast<::cuda::std::int64_t>(__mdspan.extent(__i)); + __tensor.strides[__i] = static_cast<::cuda::std::int64_t>(__mdspan.stride(__i)); + } + } + __tensor.byte_offset = 0; + return __wrapper; +} + +/*********************************************************************************************************************** + * Public API + **********************************************************************************************************************/ + +template +[[nodiscard]] _CCCL_HOST_API __dlpack_tensor<_Extents::rank()> +to_dlpack(const ::cuda::host_mdspan<_ElementType, _Extents, _Layout, _Accessor>& __mdspan) +{ + using __mdspan_type = ::cuda::std::mdspan<_ElementType, _Extents, _Layout, _Accessor>; + return ::cuda::__to_dlpack(__mdspan_type{__mdspan}, ::kDLCPU, 0); +} + +template +[[nodiscard]] _CCCL_HOST_API __dlpack_tensor<_Extents::rank()> +to_dlpack(const ::cuda::device_mdspan<_ElementType, _Extents, _Layout, _Accessor>& __mdspan, + ::cuda::device_ref __device = ::cuda::device_ref{0}) +{ + using __mdspan_type = ::cuda::std::mdspan<_ElementType, _Extents, _Layout, _Accessor>; + return ::cuda::__to_dlpack(__mdspan_type{__mdspan}, ::kDLCUDA, __device.get()); +} + +template +[[nodiscard]] _CCCL_HOST_API __dlpack_tensor<_Extents::rank()> +to_dlpack(const ::cuda::managed_mdspan<_ElementType, _Extents, _Layout, _Accessor>& __mdspan) +{ + using __mdspan_type = ::cuda::std::mdspan<_ElementType, _Extents, _Layout, _Accessor>; + return ::cuda::__to_dlpack(__mdspan_type{__mdspan}, ::kDLCUDAManaged, 0); +} + +_CCCL_END_NAMESPACE_CUDA + +# include + +#endif // !_CCCL_COMPILER(NVRTC) && _CCCL_HAS_INCLUDE() +#endif // _CUDA___MDSPAN_MDSPAN_TO_DLPACK_H diff --git a/libcudacxx/include/cuda/__type_traits/vector_type.h b/libcudacxx/include/cuda/__type_traits/vector_type.h index 7e21b8a806e..4e88c9efa65 100644 --- a/libcudacxx/include/cuda/__type_traits/vector_type.h +++ b/libcudacxx/include/cuda/__type_traits/vector_type.h @@ -24,7 +24,6 @@ #if _CCCL_HAS_CTK() # include -# include # include # if !_CCCL_CUDA_COMPILATION() @@ -346,10 +345,195 @@ using __vector_type_t = decltype(::cuda::__cccl_vector_type_t_impl<_Tp, _Size>() template inline constexpr bool __has_vector_type_v = !::cuda::std::is_same_v<__vector_type_t<_Tp, _Size>, void>; +template +inline constexpr bool __is_vector_type_v = false; + +template <> +inline constexpr bool __is_vector_type_v<::char1> = true; +template <> +inline constexpr bool __is_vector_type_v<::char2> = true; +template <> +inline constexpr bool __is_vector_type_v<::char3> = true; +template <> +inline constexpr bool __is_vector_type_v<::char4> = true; + +template <> +inline constexpr bool __is_vector_type_v<::uchar1> = true; +template <> +inline constexpr bool __is_vector_type_v<::uchar2> = true; +template <> +inline constexpr bool __is_vector_type_v<::uchar3> = true; +template <> +inline constexpr bool __is_vector_type_v<::uchar4> = true; + +template <> +inline constexpr bool __is_vector_type_v<::short1> = true; +template <> +inline constexpr bool __is_vector_type_v<::short2> = true; +template <> +inline constexpr bool __is_vector_type_v<::short3> = true; +template <> +inline constexpr bool __is_vector_type_v<::short4> = true; + +template <> +inline constexpr bool __is_vector_type_v<::ushort1> = true; +template <> +inline constexpr bool __is_vector_type_v<::ushort2> = true; +template <> +inline constexpr bool __is_vector_type_v<::ushort3> = true; +template <> +inline constexpr bool __is_vector_type_v<::ushort4> = true; + +template <> +inline constexpr bool __is_vector_type_v<::int1> = true; +template <> +inline constexpr bool __is_vector_type_v<::int2> = true; +template <> +inline constexpr bool __is_vector_type_v<::int3> = true; +template <> +inline constexpr bool __is_vector_type_v<::int4> = true; + +template <> +inline constexpr bool __is_vector_type_v<::uint1> = true; +template <> +inline constexpr bool __is_vector_type_v<::uint2> = true; +template <> +inline constexpr bool __is_vector_type_v<::uint3> = true; +template <> +inline constexpr bool __is_vector_type_v<::uint4> = true; + +template <> +inline constexpr bool __is_vector_type_v<::long1> = true; +template <> +inline constexpr bool __is_vector_type_v<::long2> = true; +template <> +inline constexpr bool __is_vector_type_v<::long3> = true; +# if _CCCL_CTK_AT_LEAST(13, 0) +template <> +inline constexpr bool __is_vector_type_v<::long4_16a> = true; +template <> +inline constexpr bool __is_vector_type_v<::long4_32a> = true; +# endif // ^^^ _CCCL_CTK_AT_LEAST(13, 0) +template <> +inline constexpr bool __is_vector_type_v<::long4> = true; + +template <> +inline constexpr bool __is_vector_type_v<::ulong1> = true; +template <> +inline constexpr bool __is_vector_type_v<::ulong2> = true; +template <> +inline constexpr bool __is_vector_type_v<::ulong3> = true; +# if _CCCL_CTK_AT_LEAST(13, 0) +template <> +inline constexpr bool __is_vector_type_v<::ulong4_16a> = true; +template <> +inline constexpr bool __is_vector_type_v<::ulong4_32a> = true; +# endif // ^^^ _CCCL_CTK_AT_LEAST(13, 0) +template <> +inline constexpr bool __is_vector_type_v<::ulong4> = true; + +template <> +inline constexpr bool __is_vector_type_v<::longlong1> = true; +template <> +inline constexpr bool __is_vector_type_v<::longlong2> = true; +template <> +inline constexpr bool __is_vector_type_v<::longlong3> = true; +# if _CCCL_CTK_AT_LEAST(13, 0) +template <> +inline constexpr bool __is_vector_type_v<::longlong4_16a> = true; +template <> +inline constexpr bool __is_vector_type_v<::longlong4_32a> = true; +# endif // ^^^ _CCCL_CTK_AT_LEAST(13, 0) +template <> +inline constexpr bool __is_vector_type_v<::longlong4> = true; + +template <> +inline constexpr bool __is_vector_type_v<::ulonglong1> = true; +template <> +inline constexpr bool __is_vector_type_v<::ulonglong2> = true; +template <> +inline constexpr bool __is_vector_type_v<::ulonglong3> = true; +# if _CCCL_CTK_AT_LEAST(13, 0) +template <> +inline constexpr bool __is_vector_type_v<::ulonglong4_16a> = true; +template <> +inline constexpr bool __is_vector_type_v<::ulonglong4_32a> = true; +# endif // ^^^ _CCCL_CTK_AT_LEAST(13, 0) +template <> +inline constexpr bool __is_vector_type_v<::ulonglong4> = true; + +template <> +inline constexpr bool __is_vector_type_v<::float1> = true; +template <> +inline constexpr bool __is_vector_type_v<::float2> = true; +template <> +inline constexpr bool __is_vector_type_v<::float3> = true; +template <> +inline constexpr bool __is_vector_type_v<::float4> = true; + +template <> +inline constexpr bool __is_vector_type_v<::double1> = true; +template <> +inline constexpr bool __is_vector_type_v<::double2> = true; +template <> +inline constexpr bool __is_vector_type_v<::double3> = true; +# if _CCCL_CTK_AT_LEAST(13, 0) +template <> +inline constexpr bool __is_vector_type_v<::double4_16a> = true; +template <> +inline constexpr bool __is_vector_type_v<::double4_32a> = true; +# endif // ^^^ _CCCL_CTK_AT_LEAST(13, 0) +template <> +inline constexpr bool __is_vector_type_v<::double4> = true; + +template <> +inline constexpr bool __is_vector_type_v<::dim3> = true; + +template +inline constexpr bool __is_extended_fp_vector_type_v = false; + +# if _CCCL_HAS_NVFP8() +template <> +inline constexpr bool __is_extended_fp_vector_type_v<::__nv_bfloat162> = true; +template <> +inline constexpr bool __is_extended_fp_vector_type_v<::__half2> = true; +template <> +inline constexpr bool __is_extended_fp_vector_type_v<::__nv_fp8x2_e4m3> = true; +template <> +inline constexpr bool __is_extended_fp_vector_type_v<::__nv_fp8x2_e5m2> = true; +template <> +inline constexpr bool __is_extended_fp_vector_type_v<::__nv_fp8x4_e4m3> = true; +template <> +inline constexpr bool __is_extended_fp_vector_type_v<::__nv_fp8x4_e5m2> = true; +# if _CCCL_CTK_AT_LEAST(12, 8) +template <> +inline constexpr bool __is_extended_fp_vector_type_v<::__nv_fp8x2_e8m0> = true; +template <> +inline constexpr bool __is_extended_fp_vector_type_v<::__nv_fp8x4_e8m0> = true; +# endif // _CCCL_CTK_AT_LEAST(12, 8) +# endif // _CCCL_HAS_NVFP8() + +# if _CCCL_HAS_NVFP6() +template <> +inline constexpr bool __is_extended_fp_vector_type_v<::__nv_fp6x2_e2m3> = true; +template <> +inline constexpr bool __is_extended_fp_vector_type_v<::__nv_fp6x2_e3m2> = true; +template <> +inline constexpr bool __is_extended_fp_vector_type_v<::__nv_fp6x4_e2m3> = true; +template <> +inline constexpr bool __is_extended_fp_vector_type_v<::__nv_fp6x4_e3m2> = true; +# endif // _CCCL_HAS_NVFP6() + +# if _CCCL_HAS_NVFP4() +template <> +inline constexpr bool __is_extended_fp_vector_type_v<::__nv_fp4x2_e2m1> = true; +template <> +inline constexpr bool __is_extended_fp_vector_type_v<::__nv_fp4x4_e2m1> = true; +# endif // _CCCL_HAS_NVFP4() + _CCCL_END_NAMESPACE_CUDA # include #endif // !_CCCL_HAS_CTK() - #endif // _CUDA__TYPE_TRAITS_VECTOR_TYPE_H diff --git a/libcudacxx/include/cuda/mdspan b/libcudacxx/include/cuda/mdspan index ae81a30219a..f8e36e75e43 100644 --- a/libcudacxx/include/cuda/mdspan +++ b/libcudacxx/include/cuda/mdspan @@ -21,7 +21,9 @@ # pragma system_header #endif // no system header +#include #include +#include #include #include #include diff --git a/libcudacxx/include/cuda/std/__tuple_dir/vector_types.h b/libcudacxx/include/cuda/std/__tuple_dir/vector_types.h index a44443ed091..125b6b87492 100644 --- a/libcudacxx/include/cuda/std/__tuple_dir/vector_types.h +++ b/libcudacxx/include/cuda/std/__tuple_dir/vector_types.h @@ -121,6 +121,26 @@ _LIBCUDACXX_SPECIALIZE_TUPLE_INTERFACE(__half, __half, 2) # if _CCCL_HAS_NVBF16() _LIBCUDACXX_SPECIALIZE_TUPLE_INTERFACE(__nv_bfloat16, __nv_bfloat16, 2) # endif // _CCCL_HAS_NVBF16() +# if _CCCL_HAS_NVFP8() +_LIBCUDACXX_SPECIALIZE_TUPLE_INTERFACE(__nv_fp8x, __nv_fp8_e5m2, 2, _e5m2) +_LIBCUDACXX_SPECIALIZE_TUPLE_INTERFACE(__nv_fp8x, __nv_fp8_e5m2, 4, _e5m2) +_LIBCUDACXX_SPECIALIZE_TUPLE_INTERFACE(__nv_fp8x, __nv_fp8_e4m3, 2, _e4m3) +_LIBCUDACXX_SPECIALIZE_TUPLE_INTERFACE(__nv_fp8x, __nv_fp8_e4m3, 4, _e4m3) +# if _CCCL_CTK_AT_LEAST(12, 8) +_LIBCUDACXX_SPECIALIZE_TUPLE_INTERFACE(__nv_fp8x, __nv_fp8_e8m0, 2, _e8m0) +_LIBCUDACXX_SPECIALIZE_TUPLE_INTERFACE(__nv_fp8x, __nv_fp8_e8m0, 4, _e8m0) +# endif // _CCCL_CTK_AT_LEAST(12, 8) +# endif // _CCCL_HAS_NVFP8() +# if _CCCL_HAS_NVFP6() +_LIBCUDACXX_SPECIALIZE_TUPLE_INTERFACE(__nv_fp6x, __nv_fp6_e3m2, 2, _e3m2) +_LIBCUDACXX_SPECIALIZE_TUPLE_INTERFACE(__nv_fp6x, __nv_fp6_e3m2, 4, _e3m2) +_LIBCUDACXX_SPECIALIZE_TUPLE_INTERFACE(__nv_fp6x, __nv_fp6_e2m3, 2, _e2m3) +_LIBCUDACXX_SPECIALIZE_TUPLE_INTERFACE(__nv_fp6x, __nv_fp6_e2m3, 4, _e2m3) +# endif // _CCCL_HAS_NVFP6() +# if _CCCL_HAS_NVFP4() +_LIBCUDACXX_SPECIALIZE_TUPLE_INTERFACE(__nv_fp4x, __nv_fp4_e2m1, 2, _e2m1) +_LIBCUDACXX_SPECIALIZE_TUPLE_INTERFACE(__nv_fp4x, __nv_fp4_e2m1, 4, _e2m1) +# endif // _CCCL_HAS_NVFP4() template struct __get_element; @@ -273,6 +293,26 @@ _LIBCUDACXX_SPECIALIZE_GET(__half2, __half) # if _CCCL_HAS_NVBF16() _LIBCUDACXX_SPECIALIZE_GET(__nv_bfloat162, __nv_bfloat16) # endif // _CCCL_HAS_NVBF16() +# if _CCCL_HAS_NVFP8() +_LIBCUDACXX_SPECIALIZE_GET(__nv_fp8x2_e5m2, __nv_fp8_e5m2) +_LIBCUDACXX_SPECIALIZE_GET(__nv_fp8x4_e5m2, __nv_fp8_e5m2) +_LIBCUDACXX_SPECIALIZE_GET(__nv_fp8x2_e4m3, __nv_fp8_e4m3) +_LIBCUDACXX_SPECIALIZE_GET(__nv_fp8x4_e4m3, __nv_fp8_e4m3) +# if _CCCL_CTK_AT_LEAST(12, 8) +_LIBCUDACXX_SPECIALIZE_GET(__nv_fp8x2_e8m0, __nv_fp8_e8m0) +_LIBCUDACXX_SPECIALIZE_GET(__nv_fp8x4_e8m0, __nv_fp8_e8m0) +# endif // _CCCL_CTK_AT_LEAST(12, 8) +# endif // _CCCL_HAS_NVFP8() +# if _CCCL_HAS_NVFP6() +_LIBCUDACXX_SPECIALIZE_GET(__nv_fp6x2_e3m2, __nv_fp6_e3m2) +_LIBCUDACXX_SPECIALIZE_GET(__nv_fp6x4_e3m2, __nv_fp6_e3m2) +_LIBCUDACXX_SPECIALIZE_GET(__nv_fp6x2_e2m3, __nv_fp6_e2m3) +_LIBCUDACXX_SPECIALIZE_GET(__nv_fp6x4_e2m3, __nv_fp6_e2m3) +# endif // _CCCL_HAS_NVFP6() +# if _CCCL_HAS_NVFP4() +_LIBCUDACXX_SPECIALIZE_GET(__nv_fp4x2_e2m1, __nv_fp4_e2m1) +_LIBCUDACXX_SPECIALIZE_GET(__nv_fp4x4_e2m1, __nv_fp4_e2m1) +# endif // _CCCL_HAS_NVFP4() _CCCL_END_NAMESPACE_CUDA_STD diff --git a/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/dlpack_to_mdspan/dlpack_to_mdspan.pass.cpp b/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/dlpack_to_mdspan/dlpack_to_mdspan.pass.cpp new file mode 100644 index 00000000000..c393b99fcab --- /dev/null +++ b/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/dlpack_to_mdspan/dlpack_to_mdspan.pass.cpp @@ -0,0 +1,679 @@ +//===----------------------------------------------------------------------===// +// +// Part of the libcu++ Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// +// UNSUPPORTED: nvrtc + +#include +#include +#include +#include +#include + +#include + +#include "test_macros.h" +#include + +template +using dlpack_array = cuda::std::array; + +//============================================================================== +// Test: Rank-0 mdspan conversion +//============================================================================== + +bool test_rank0() +{ + float data = 42.0f; + DLTensor tensor{}; + tensor.data = &data; + tensor.device = DLDevice{kDLCPU, 0}; + tensor.ndim = 0; + tensor.dtype = DLDataType{DLDataTypeCode::kDLFloat, 32, 1}; + + auto host_mdspan = cuda::to_host_mdspan(tensor); + + assert(host_mdspan.rank() == 0); + assert(host_mdspan.size() == 1); + assert(host_mdspan.data_handle() == &data); + assert(host_mdspan() == 42.0f); + return true; +} + +//============================================================================== +// Test: Empty tensor (zero in one dimension) +//============================================================================== + +bool test_empty_tensor() +{ + int dummy = 0; // Non-null but won't be accessed + dlpack_array<2> shape = {0, 5}; + dlpack_array<2> strides = {5, 1}; // row-major + DLTensor tensor{}; + tensor.data = &dummy; + tensor.device = DLDevice{kDLCPU, 0}; + tensor.ndim = 2; + tensor.dtype = DLDataType{DLDataTypeCode::kDLInt, 32, 1}; + tensor.shape = shape.data(); + tensor.strides = strides.data(); + + auto host_mdspan = cuda::to_host_mdspan(tensor); + + assert(host_mdspan.extent(0) == 0); + assert(host_mdspan.extent(1) == 5); + assert(host_mdspan.size() == 0); + assert(host_mdspan.empty()); + return true; +} + +//============================================================================== +// Test: Rank-1 mdspan with layout_right (row-major) +//============================================================================== + +bool test_rank1() +{ + cuda::std::array data = {1, 2, 3, 4, 5}; + dlpack_array<1> shape = {5}; + dlpack_array<1> strides = {1}; + DLTensor tensor{}; + tensor.data = data.data(); + tensor.device = DLDevice{kDLCPU, 0}; + tensor.ndim = 1; + tensor.dtype = ::DLDataType{::kDLInt, 32, 1}; + tensor.shape = shape.data(); + tensor.strides = strides.data(); + + auto host_mdspan_right = cuda::to_host_mdspan(tensor); + auto host_mdspan_left = cuda::to_host_mdspan(tensor); + auto host_mdspan_stride = cuda::to_host_mdspan(tensor); + + assert(host_mdspan_right.rank() == 1); + assert(host_mdspan_right.extent(0) == 5); + assert(host_mdspan_right.stride(0) == 1); + for (int i = 0; i < 5; ++i) + { + assert(host_mdspan_right(i) == data[i]); + } + assert(host_mdspan_left.rank() == 1); + assert(host_mdspan_left.extent(0) == 5); + assert(host_mdspan_left.stride(0) == 1); + for (int i = 0; i < 5; ++i) + { + assert(host_mdspan_left(i) == data[i]); + } + assert(host_mdspan_stride.rank() == 1); + assert(host_mdspan_stride.extent(0) == 5); + assert(host_mdspan_stride.stride(0) == 1); + for (int i = 0; i < 5; ++i) + { + assert(host_mdspan_stride(i) == data[i]); + } + return true; +} + +//============================================================================== +// Test: Rank-2 mdspan with layout_right (row-major) +//============================================================================== + +bool test_rank2_layout_right() +{ + // 2x3 matrix in row-major order + cuda::std::array data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; + dlpack_array<2> shape = {2, 3}; + dlpack_array<2> strides = {3, 1}; // row-major + DLTensor tensor{}; + tensor.data = data.data(); + tensor.device = DLDevice{kDLCPU, 0}; + tensor.ndim = 2; + tensor.dtype = cuda::__data_type_to_dlpack(); + tensor.shape = shape.data(); + tensor.strides = strides.data(); + + auto host_mdspan = cuda::to_host_mdspan(tensor); + + assert(host_mdspan.rank() == 2); + assert(host_mdspan.extent(0) == 2); + assert(host_mdspan.extent(1) == 3); + assert(host_mdspan.stride(0) == 3); // row stride + assert(host_mdspan.stride(1) == 1); // column stride + + // Check values: row-major layout + assert(host_mdspan(0, 0) == 1.0f); + assert(host_mdspan(0, 1) == 2.0f); + assert(host_mdspan(0, 2) == 3.0f); + assert(host_mdspan(1, 0) == 4.0f); + assert(host_mdspan(1, 1) == 5.0f); + assert(host_mdspan(1, 2) == 6.0f); + return true; +} + +//============================================================================== +// Test: Rank-2 mdspan with layout_left (column-major) +//============================================================================== + +bool test_rank2_layout_left() +{ + // 2x3 matrix in column-major order + cuda::std::array data = {1.0f, 4.0f, 2.0f, 5.0f, 3.0f, 6.0f}; + dlpack_array<2> shape = {2, 3}; + dlpack_array<2> strides = {1, 2}; // column-major + DLTensor tensor{}; + tensor.data = data.data(); + tensor.device = DLDevice{kDLCPU, 0}; + tensor.ndim = 2; + tensor.dtype = cuda::__data_type_to_dlpack(); + tensor.shape = shape.data(); + tensor.strides = strides.data(); + + auto host_mdspan = cuda::to_host_mdspan(tensor); + + assert(host_mdspan.rank() == 2); + assert(host_mdspan.extent(0) == 2); + assert(host_mdspan.extent(1) == 3); + assert(host_mdspan.stride(0) == 1); // row stride + assert(host_mdspan.stride(1) == 2); // column stride + + // Check values: column-major layout + assert(host_mdspan(0, 0) == 1.0f); + assert(host_mdspan(0, 1) == 2.0f); + assert(host_mdspan(0, 2) == 3.0f); + assert(host_mdspan(1, 0) == 4.0f); + assert(host_mdspan(1, 1) == 5.0f); + assert(host_mdspan(1, 2) == 6.0f); + return true; +} + +//============================================================================== +// Test: Rank-2 mdspan with layout_stride (arbitrary strides) +//============================================================================== + +bool test_rank2_layout_stride() +{ + // 2x3 matrix with custom strides (e.g., padded) + cuda::std::array data = {1, 2, 3, 0, 4, 5, 6, 0}; // Each row padded to 4 elements + dlpack_array<2> shape = {2, 3}; + dlpack_array<2> strides = {4, 1}; // Row stride = 4 (padded), col stride = 1 + DLTensor tensor{}; + tensor.data = data.data(); + tensor.device = DLDevice{kDLCPU, 0}; + tensor.ndim = 2; + tensor.dtype = cuda::__data_type_to_dlpack(); + tensor.shape = shape.data(); + tensor.strides = strides.data(); + + auto host_mdspan = cuda::to_host_mdspan(tensor); + + assert(host_mdspan.rank() == 2); + assert(host_mdspan.extent(0) == 2); + assert(host_mdspan.extent(1) == 3); + assert(host_mdspan.stride(0) == 4); + assert(host_mdspan.stride(1) == 1); + + assert(host_mdspan(0, 0) == 1); + assert(host_mdspan(0, 1) == 2); + assert(host_mdspan(0, 2) == 3); + assert(host_mdspan(1, 0) == 4); + assert(host_mdspan(1, 1) == 5); + assert(host_mdspan(1, 2) == 6); + return true; +} + +//============================================================================== +// Test: layout_stride with default (layout_right) strides when strides is nullptr +// Note: This tests the fallback behavior for DLPack < 1.2 +//============================================================================== + +#if !(DLPACK_MAJOR_VERSION == 1 && DLPACK_MINOR_VERSION >= 2) + +bool test_layout_stride_null_strides() +{ + cuda::std::array data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; + dlpack_array<2> shape = {2, 3}; + DLTensor tensor{}; + tensor.data = data.data(); + tensor.device = DLDevice{kDLCPU, 0}; + tensor.ndim = 2; + tensor.dtype = cuda::__data_type_to_dlpack(); + tensor.shape = shape.data(); + tensor.strides = nullptr; // null strides + + auto host_mdspan = cuda::to_host_mdspan(tensor); + + // Should use row-major strides by default + assert(host_mdspan.stride(0) == 3); + assert(host_mdspan.stride(1) == 1); + return true; +} + +#endif // !(DLPACK_MAJOR_VERSION == 1 && DLPACK_MINOR_VERSION >= 2) + +//============================================================================== +// Test: byte_offset support +//============================================================================== + +bool test_byte_offset() +{ + cuda::std::array data = {0, 0, 1, 2, 3, 4, 5, 6}; + // Skip first 2 ints (8 bytes) + dlpack_array<1> shape = {6}; + dlpack_array<1> strides = {1}; + DLTensor tensor{}; + tensor.data = data.data(); + tensor.device = DLDevice{kDLCPU, 0}; + tensor.ndim = 1; + tensor.dtype = DLDataType{DLDataTypeCode::kDLInt, 32, 1}; + tensor.shape = shape.data(); + tensor.strides = strides.data(); + tensor.byte_offset = sizeof(int) * 2; + + auto host_mdspan = cuda::to_host_mdspan(tensor); + + assert(host_mdspan.extent(0) == 6); + assert(host_mdspan(0) == 1); + assert(host_mdspan(5) == 6); + return true; +} + +//============================================================================== +// Exception tests +//============================================================================== + +void test_exception_wrong_rank() +{ + cuda::std::array data{}; + dlpack_array<2> shape = {2, 3}; + dlpack_array<2> strides = {3, 1}; // row-major + DLTensor tensor{}; + tensor.data = data.data(); + tensor.device = DLDevice{kDLCPU, 0}; + tensor.ndim = 2; + tensor.dtype = DLDataType{DLDataTypeCode::kDLInt, 32, 1}; + tensor.shape = shape.data(); + tensor.strides = strides.data(); + + bool caught = false; + try + { + // Try to convert rank-2 tensor to rank-1 mdspan + unused(cuda::to_host_mdspan(tensor)); + } + catch (const std::invalid_argument&) + { + caught = true; + } + assert(caught); +} + +void test_exception_wrong_dtype() +{ + cuda::std::array data{}; + dlpack_array<1> shape = {4}; + dlpack_array<1> strides = {1}; + DLTensor tensor{}; + tensor.data = data.data(); + tensor.device = DLDevice{kDLCPU, 0}; + tensor.ndim = 1; + tensor.dtype = DLDataType{DLDataTypeCode::kDLInt, 32, 1}; // dtype is int + tensor.shape = shape.data(); + tensor.strides = strides.data(); + + bool caught = false; + try + { + // Try to convert int tensor to float mdspan + unused(cuda::to_host_mdspan(tensor)); + } + catch (const std::invalid_argument&) + { + caught = true; + } + assert(caught); +} + +void test_exception_null_data() +{ + dlpack_array<1> shape = {4}; + dlpack_array<1> strides = {1}; + DLTensor tensor{}; + tensor.data = nullptr; + tensor.device = DLDevice{kDLCPU, 0}; + tensor.ndim = 1; + tensor.dtype = DLDataType{DLDataTypeCode::kDLInt, 32, 1}; + tensor.shape = shape.data(); + tensor.strides = strides.data(); + + bool caught = false; + try + { + unused(cuda::to_host_mdspan(tensor)); + } + catch (const std::invalid_argument&) + { + caught = true; + } + assert(caught); +} + +void test_exception_null_shape() +{ + cuda::std::array data{}; + DLTensor tensor{}; + tensor.data = data.data(); + tensor.device = DLDevice{kDLCPU, 0}; + tensor.ndim = 1; + tensor.dtype = DLDataType{DLDataTypeCode::kDLInt, 32, 1}; + tensor.shape = nullptr; // null shape + + bool caught = false; + try + { + unused(cuda::to_host_mdspan(tensor)); + } + catch (const std::invalid_argument&) + { + caught = true; + } + assert(caught); +} + +void test_exception_negative_shape() +{ + cuda::std::array data{}; + dlpack_array<1> shape = {-3}; // negative shape + dlpack_array<1> strides = {1}; + DLTensor tensor{}; + tensor.data = data.data(); + tensor.device = DLDevice{kDLCPU, 0}; + tensor.ndim = 1; + tensor.dtype = DLDataType{DLDataTypeCode::kDLInt, 32, 1}; + tensor.shape = shape.data(); + tensor.strides = strides.data(); + + bool caught = false; + try + { + unused(cuda::to_host_mdspan(tensor)); + } + catch (const std::invalid_argument&) + { + caught = true; + } + assert(caught); +} + +void test_exception_wrong_device_type_host() +{ + cuda::std::array data{}; + dlpack_array<1> shape = {4}; + dlpack_array<1> strides = {1}; + DLTensor tensor{}; + tensor.data = data.data(); + tensor.device = DLDevice{::kDLCUDA, 0}; // CUDA device, not CPU + tensor.ndim = 1; + tensor.dtype = DLDataType{DLDataTypeCode::kDLInt, 32, 1}; + tensor.shape = shape.data(); + tensor.strides = strides.data(); + + bool caught = false; + try + { + unused(cuda::to_host_mdspan(tensor)); + } + catch (const std::invalid_argument&) + { + caught = true; + } + assert(caught); +} + +void test_exception_wrong_device_type_device() +{ + cuda::std::array data{}; + dlpack_array<1> shape = {4}; + dlpack_array<1> strides = {1}; + DLTensor tensor{}; + tensor.data = data.data(); + tensor.device = DLDevice{kDLCPU, 0}; // CPU device, not CUDA + tensor.ndim = 1; + tensor.dtype = DLDataType{DLDataTypeCode::kDLInt, 32, 1}; + tensor.shape = shape.data(); + tensor.strides = strides.data(); + + bool caught = false; + try + { + unused(cuda::to_device_mdspan(tensor)); + } + catch (const std::invalid_argument&) + { + caught = true; + } + assert(caught); +} + +void test_exception_wrong_device_type_managed() +{ + cuda::std::array data{}; + dlpack_array<1> shape = {4}; + dlpack_array<1> strides = {1}; + DLTensor tensor{}; + tensor.data = data.data(); + tensor.device = DLDevice{kDLCPU, 0}; // CPU device, not CUDA managed + tensor.ndim = 1; + tensor.dtype = DLDataType{DLDataTypeCode::kDLInt, 32, 1}; + tensor.shape = shape.data(); + tensor.strides = strides.data(); + + bool caught = false; + try + { + unused(cuda::to_managed_mdspan(tensor)); + } + catch (const std::invalid_argument&) + { + caught = true; + } + assert(caught); +} + +void test_exception_stride_mismatch_layout_right() +{ + cuda::std::array data{}; + dlpack_array<2> shape = {2, 3}; + dlpack_array<2> strides = {1, 2}; // Column-major, not row-major + DLTensor tensor{}; + tensor.data = data.data(); + tensor.device = DLDevice{kDLCPU, 0}; + tensor.ndim = 2; + tensor.dtype = DLDataType{DLDataTypeCode::kDLFloat, 32, 1}; + tensor.shape = shape.data(); + tensor.strides = strides.data(); + + bool caught = false; + try + { + unused(cuda::to_host_mdspan(tensor)); + } + catch (const std::invalid_argument&) + { + caught = true; + } + assert(caught); +} + +void test_exception_stride_mismatch_layout_left() +{ + cuda::std::array data{}; + dlpack_array<2> shape = {2, 3}; + dlpack_array<2> strides = {3, 1}; // Row-major, not column-major + DLTensor tensor{}; + tensor.data = data.data(); + tensor.device = DLDevice{kDLCPU, 0}; + tensor.ndim = 2; + tensor.dtype = DLDataType{DLDataTypeCode::kDLFloat, 32, 1}; + tensor.shape = shape.data(); + tensor.strides = strides.data(); + + bool caught = false; + try + { + unused(cuda::to_host_mdspan(tensor)); + } + catch (const std::invalid_argument&) + { + caught = true; + } + assert(caught); +} + +void test_exception_zero_stride_layout_stride() +{ + cuda::std::array data{}; + dlpack_array<2> shape = {2, 3}; + dlpack_array<2> strides = {0, 1}; // Zero stride is invalid + DLTensor tensor{}; + tensor.data = data.data(); + tensor.device = DLDevice{kDLCPU, 0}; + tensor.ndim = 2; + tensor.dtype = DLDataType{DLDataTypeCode::kDLInt, 32, 1}; + tensor.shape = shape.data(); + tensor.strides = strides.data(); + + bool caught = false; + try + { + unused(cuda::to_host_mdspan(tensor)); + } + catch (const std::invalid_argument&) + { + caught = true; + } + assert(caught); +} + +void test_exception_null_strides_dlpack_v12() +{ + cuda::std::array data{}; + dlpack_array<2> shape = {2, 3}; + DLTensor tensor{}; + tensor.data = data.data(); + tensor.device = DLDevice{kDLCPU, 0}; + tensor.ndim = 2; + tensor.dtype = DLDataType{DLDataTypeCode::kDLFloat, 32, 1}; + tensor.shape = shape.data(); + tensor.strides = nullptr; // null strides not allowed in DLPack v1.2+ + + bool caught = false; + try + { + unused(cuda::to_host_mdspan(tensor)); + } + catch (const std::invalid_argument&) + { + caught = true; + } + assert(caught); +} + +void test_exception_misaligned_data() +{ + // Create a buffer that allows us to get a misaligned pointer + alignas(16) cuda::std::array buffer{}; + // Get a pointer that's 1 byte into the buffer (misaligned for int) + auto misaligned_ptr = reinterpret_cast(buffer.data() + 1); + dlpack_array<1> shape = {3}; + dlpack_array<1> strides = {1}; + DLTensor tensor{}; + tensor.data = misaligned_ptr; + tensor.device = DLDevice{kDLCPU, 0}; + tensor.ndim = 1; + tensor.dtype = DLDataType{DLDataTypeCode::kDLInt, 32, 1}; + tensor.shape = shape.data(); + tensor.strides = strides.data(); + + bool caught = false; + try + { + unused(cuda::to_host_mdspan(tensor)); + } + catch (const std::invalid_argument&) + { + caught = true; + } + assert(caught); +} + +bool test_exceptions() +{ + test_exception_wrong_rank(); + test_exception_wrong_dtype(); + test_exception_null_data(); + test_exception_null_shape(); + test_exception_negative_shape(); + test_exception_wrong_device_type_host(); + test_exception_wrong_device_type_device(); + test_exception_wrong_device_type_managed(); + test_exception_stride_mismatch_layout_right(); + test_exception_stride_mismatch_layout_left(); + test_exception_zero_stride_layout_stride(); +#if DLPACK_MAJOR_VERSION > 1 || (DLPACK_MAJOR_VERSION == 1 && DLPACK_MINOR_VERSION >= 2) + test_exception_null_strides_dlpack_v12(); +#endif + test_exception_misaligned_data(); + return true; +} + +//============================================================================== +// Test: Return type checking +//============================================================================== + +bool test_return_types() +{ + cuda::std::array data{}; + dlpack_array<1> shape = {4}; + dlpack_array<1> strides = {1}; + DLTensor tensor{}; + tensor.data = data.data(); + tensor.device = DLDevice{kDLCPU, 0}; + tensor.ndim = 1; + tensor.dtype = cuda::__data_type_to_dlpack(); + tensor.shape = shape.data(); + tensor.strides = strides.data(); + + // Check return type of to_host_mdspan + auto host_ms = cuda::to_host_mdspan(tensor); + static_assert( + cuda::std::is_same_v, cuda::std::layout_stride>>); + assert(host_ms.extent(0) == 4); + + auto host_ms_right = cuda::to_host_mdspan(tensor); + static_assert( + cuda::std::is_same_v, cuda::std::layout_right>>); + assert(host_ms_right.extent(0) == 4); + return true; +} + +int main(int, char**) +{ + NV_IF_TARGET( + NV_IS_HOST, + (assert(test_rank0()); // + assert(test_empty_tensor()); + assert(test_rank1()); + assert(test_rank2_layout_right()); + assert(test_rank2_layout_left()); + assert(test_rank2_layout_stride()); + assert(test_byte_offset()); + assert(test_return_types()); + assert(test_exceptions());)) +#if !(DLPACK_MAJOR_VERSION > 1 || (DLPACK_MAJOR_VERSION == 1 && DLPACK_MINOR_VERSION >= 2)) + NV_IF_TARGET(NV_IS_HOST, (assert(test_layout_stride_null_strides());)) +#endif + return 0; +} diff --git a/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/mdspan_to_dlpack/mdspan_to_dlpack.pass.cpp b/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/mdspan_to_dlpack/mdspan_to_dlpack.pass.cpp new file mode 100644 index 00000000000..b066a9b2569 --- /dev/null +++ b/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/mdspan_to_dlpack/mdspan_to_dlpack.pass.cpp @@ -0,0 +1,527 @@ +//===----------------------------------------------------------------------===// +// +// Part of the libcu++ Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// +// UNSUPPORTED: nvrtc + +#include +#include +#include +#include +#include +#include + +#include "test_macros.h" + +void check_datatype(const DLDataType& dt, uint8_t code, uint8_t bits, uint16_t lanes) +{ + assert(dt.code == code); + assert(dt.bits == bits); + assert(dt.lanes == lanes); +} + +bool test_mdspan_to_dlpack_host_layout_right() +{ + using extents_t = cuda::std::extents; + int data[6] = {0, 1, 2, 3, 4, 5}; + cuda::host_mdspan md{data, extents_t{}}; + auto dlpack_wrapper = cuda::to_dlpack(md); + const auto& dltensor = dlpack_wrapper.get(); + + assert(dltensor.device.device_type == kDLCPU); + assert(dltensor.device.device_id == 0); + assert(dltensor.ndim == 2); + check_datatype(dltensor.dtype, kDLInt, 32, 1); + assert(dltensor.shape != nullptr); + assert(dltensor.strides != nullptr); + assert(dltensor.shape[0] == 2); + assert(dltensor.shape[1] == 3); + assert(dltensor.strides[0] == 3); + assert(dltensor.strides[1] == 1); + assert(dltensor.byte_offset == 0); + assert(dltensor.data == data); + return true; +} + +bool test_mdspan_to_dlpack_host_layout_left() +{ + using extents_t = cuda::std::extents; + int data[6] = {0, 1, 2, 3, 4, 5}; + cuda::host_mdspan md{data, extents_t{}}; + auto dlpack_wrapper = cuda::to_dlpack(md); + const auto& dltensor = dlpack_wrapper.get(); + + assert(dltensor.device.device_type == kDLCPU); + assert(dltensor.device.device_id == 0); + check_datatype(dltensor.dtype, kDLInt, 32, 1); + assert(dltensor.ndim == 2); + assert(dltensor.shape != nullptr); + assert(dltensor.strides != nullptr); + assert(dltensor.shape[0] == 2); + assert(dltensor.shape[1] == 3); + assert(dltensor.strides[0] == 1); + assert(dltensor.strides[1] == 2); + assert(dltensor.byte_offset == 0); + assert(dltensor.data == data); + return true; +} + +bool test_mdspan_to_dlpack_empty_size() +{ + using extents_t = cuda::std::dims<2>; + int data[1] = {42}; + cuda::host_mdspan m{data, extents_t{0, 3}}; + auto dlpack_wrapper = cuda::to_dlpack(m); + const auto& dltensor = dlpack_wrapper.get(); + + assert(dltensor.device.device_type == kDLCPU); + assert(dltensor.device.device_id == 0); + check_datatype(dltensor.dtype, kDLInt, 32, 1); + assert(dltensor.ndim == 2); + assert(dltensor.shape[0] == 0); + assert(dltensor.shape[1] == 3); + assert(dltensor.strides[0] == 3); + assert(dltensor.strides[1] == 1); + assert(dltensor.byte_offset == 0); + assert(dltensor.data == nullptr); // size() == 0 => nullptr + return true; +} + +bool test_mdspan_to_dlpack_rank_0() +{ + using extents_t = cuda::std::extents; + int data[1] = {7}; + cuda::host_mdspan md{data, extents_t{}}; + auto dlpack_wrapper = cuda::to_dlpack(md); + const auto& dltensor = dlpack_wrapper.get(); + + assert(dltensor.device.device_type == kDLCPU); + assert(dltensor.device.device_id == 0); + check_datatype(dltensor.dtype, kDLInt, 32, 1); + assert(dltensor.ndim == 0); + assert(dltensor.shape == nullptr); + assert(dltensor.strides == nullptr); + assert(dltensor.byte_offset == 0); + assert(dltensor.data == data); // rank-0 mdspan has size() == 1 + return true; +} + +bool test_mdspan_to_dlpack_const_pointer() +{ + using extents_t = cuda::std::dims<3>; + const int data[6] = {0, 1, 2, 3, 4, 5}; + cuda::host_mdspan md{data, extents_t{2, 3, 4}}; + auto dlpack_wrapper = cuda::to_dlpack(md); + const auto& dltensor = dlpack_wrapper.get(); + + assert(dltensor.device.device_type == kDLCPU); + assert(dltensor.device.device_id == 0); + check_datatype(dltensor.dtype, kDLInt, 32, 1); + assert(dltensor.ndim == 3); + assert(dltensor.shape[0] == 2); + assert(dltensor.shape[1] == 3); + assert(dltensor.shape[2] == 4); + assert(dltensor.strides[0] == 12); + assert(dltensor.strides[1] == 4); + assert(dltensor.strides[2] == 1); + assert(dltensor.byte_offset == 0); + assert(dltensor.data == data); // rank-0 mdspan has size() == 1 + return true; +} + +bool test_mdspan_to_dlpack_device() +{ + using extents_t = cuda::std::extents; + float* data = nullptr; + assert(cudaMalloc(&data, 6 * sizeof(float)) == cudaSuccess); + cuda::device_mdspan md{data, extents_t{}}; + auto dlpack_wrapper = cuda::to_dlpack(md, cuda::device_ref{0}); + const auto& dltensor = dlpack_wrapper.get(); + + assert(dltensor.device.device_type == kDLCUDA); + assert(dltensor.device.device_id == 0); + assert(dltensor.ndim == 2); + check_datatype(dltensor.dtype, kDLFloat, 32, 1); + assert(dltensor.shape[0] == 2); + assert(dltensor.shape[1] == 3); + assert(dltensor.strides[0] == 3); + assert(dltensor.strides[1] == 1); + assert(dltensor.byte_offset == 0); + assert(dltensor.data == data); + return true; +} + +bool test_mdspan_to_dlpack_managed() +{ + using extents_t = cuda::std::extents; + float* data = nullptr; + assert(cudaMallocManaged(&data, 6 * sizeof(float)) == cudaSuccess); + cuda::managed_mdspan md{data, extents_t{}}; + auto dlpack_wrapper = cuda::to_dlpack(md); + const auto& dltensor = dlpack_wrapper.get(); + + assert(dltensor.device.device_type == kDLCUDAManaged); + assert(dltensor.device.device_id == 0); + assert(dltensor.ndim == 2); + check_datatype(dltensor.dtype, kDLFloat, 32, 1); + assert(dltensor.shape[0] == 2); + assert(dltensor.shape[1] == 3); + assert(dltensor.strides[0] == 3); + assert(dltensor.strides[1] == 1); + assert(dltensor.byte_offset == 0); + assert(dltensor.data == data); + return true; +} + +template +struct test_mdspan_to_dlpack_types_fn +{ + using list_t = ListT; + + cuda::std::array expected_types; + + template + void call_impl() const + { + using T = cuda::std::__type_at_c; + using extents_t = cuda::std::extents; + T* data = nullptr; + cuda::host_mdspan md{data, extents_t{}}; + auto dlpack_wrapper = cuda::to_dlpack(md); + const auto& dltensor = dlpack_wrapper.get(); + + auto type = expected_types[index]; + check_datatype(dltensor.dtype, type.code, type.bits, type.lanes); + } + + template + void call(cuda::std::index_sequence) const + { + (call_impl(), ...); + } +}; + +bool test_mdspan_to_dlpack_basic_types() +{ + using list_t = cuda::std::__type_list< + bool, + signed char, + short, + int, + long, + long long, +#if _CCCL_HAS_INT128() + __int128_t, +#endif + // Unsigned integer types + unsigned char, + unsigned short, + unsigned int, + unsigned long, + unsigned long long, +#if _CCCL_HAS_INT128() + __uint128_t, +#endif + // Floating-point types + float, + double +#if _CCCL_HAS_FLOAT128() + , + __float128 +#endif + >; + cuda::std::array expected_types = { + DLDataType{kDLBool, 8, 1}, + // Signed integer types + DLDataType{kDLInt, 8, 1}, + DLDataType{kDLInt, 16, 1}, + DLDataType{kDLInt, 32, 1}, + DLDataType{kDLInt, sizeof(long) * 8, 1}, + DLDataType{kDLInt, 64, 1}, +#if _CCCL_HAS_INT128() + DLDataType{kDLInt, 128, 1}, +#endif + // Unsigned integer types + DLDataType{kDLUInt, 8, 1}, + DLDataType{kDLUInt, 16, 1}, + DLDataType{kDLUInt, 32, 1}, + DLDataType{kDLUInt, sizeof(unsigned long) * 8, 1}, + DLDataType{kDLUInt, 64, 1}, +#if _CCCL_HAS_INT128() + DLDataType{kDLUInt, 128, 1}, +#endif + // Floating-point types + DLDataType{kDLFloat, 32, 1}, + DLDataType{kDLFloat, 64, 1}, +#if _CCCL_HAS_FLOAT128() + DLDataType{kDLFloat, 128, 1}, +#endif + }; + test_mdspan_to_dlpack_types_fn test_fn{expected_types}; + test_fn.call(cuda::std::make_index_sequence{}); + return true; +} + +bool test_mdspan_to_dlpack_extended_fp_and_complex_types() +{ + using list_t = cuda::std::__type_list< +#if _CCCL_HAS_NVFP16() + ::__half, +#endif +#if _CCCL_HAS_NVBF16() + ::__nv_bfloat16, +#endif +#if _CCCL_HAS_FLOAT128() + __float128, +#endif + // Low-precision floating-point types +#if _CCCL_HAS_NVFP8_E4M3() + ::__nv_fp8_e4m3, +#endif +#if _CCCL_HAS_NVFP8_E5M2() + ::__nv_fp8_e5m2, +#endif +#if _CCCL_HAS_NVFP8_E8M0() + ::__nv_fp8_e8m0, +#endif +#if _CCCL_HAS_NVFP6_E2M3() + ::__nv_fp6_e2m3, +#endif +#if _CCCL_HAS_NVFP6_E3M2() + ::__nv_fp6_e3m2, +#endif +#if _CCCL_HAS_NVFP4_E2M1() + ::__nv_fp4_e2m1, +#endif + // Complex types +#if _CCCL_HAS_NVFP16() + cuda::std::complex<::__half>, +#endif + cuda::std::complex, + cuda::std::complex>; + cuda::std::array expected_types = { +#if _CCCL_HAS_NVFP16() + DLDataType{kDLFloat, 16, 1}, +#endif +#if _CCCL_HAS_NVBF16() + DLDataType{kDLBfloat, 16, 1}, +#endif +#if _CCCL_HAS_FLOAT128() + DLDataType{kDLFloat, 128, 1}, +#endif + // Low-precision floating-point types +#if _CCCL_HAS_NVFP8_E4M3() + DLDataType{kDLFloat8_e4m3fn, 8, 1}, +#endif +#if _CCCL_HAS_NVFP8_E5M2() + DLDataType{kDLFloat8_e5m2, 8, 1}, +#endif +#if _CCCL_HAS_NVFP8_E8M0() + DLDataType{kDLFloat8_e8m0fnu, 8, 1}, +#endif +#if _CCCL_HAS_NVFP6_E2M3() + DLDataType{kDLFloat6_e2m3fn, 6, 1}, +#endif +#if _CCCL_HAS_NVFP6_E3M2() + DLDataType{kDLFloat6_e3m2fn, 6, 1}, +#endif +#if _CCCL_HAS_NVFP4_E2M1() + DLDataType{kDLFloat4_e2m1fn, 4, 1}, +#endif + // Complex types +#if _CCCL_HAS_NVFP16() + DLDataType{kDLComplex, 32, 1}, +#endif + DLDataType{kDLComplex, 64, 1}, + DLDataType{kDLComplex, 128, 1}}; + test_mdspan_to_dlpack_types_fn test_fn{expected_types}; + test_fn.call(cuda::std::make_index_sequence{}); + return true; +} + +#if _CCCL_HAS_CTK() +bool test_mdspan_to_dlpack_vector_types() +{ + using list_t = cuda::std::__type_list< + ::char2, + ::char4, + ::uchar2, + ::uchar4, + ::short2, + ::short4, + ::ushort2, + ::ushort4, + ::int2, + ::int4, + ::uint2, + ::uint4, + ::long2 +# if _CCCL_CTK_AT_LEAST(13, 0) + , + ::long4_32a +# else + , + ::long4 +# endif + , + ::ulong2 +# if _CCCL_CTK_AT_LEAST(13, 0) + , + ::ulong4_32a +# else + , + ::ulong4 +# endif + , + ::longlong2 +# if _CCCL_CTK_AT_LEAST(13, 0) + , + ::longlong4_32a +# else + , + ::longlong4 +# endif + , + ::ulonglong2 +# if _CCCL_CTK_AT_LEAST(13, 0) + , + ::ulonglong4_32a +# else + , + ::ulonglong4 +# endif + , + ::float2, + ::float4, + ::double2 +# if _CCCL_CTK_AT_LEAST(13, 0) + , + ::double4_32a +# else + , + ::double4 +# endif + >; + cuda::std::array expected_types = { + DLDataType{kDLInt, 8, 2}, + DLDataType{kDLInt, 8, 4}, + DLDataType{kDLUInt, 8, 2}, + DLDataType{kDLUInt, 8, 4}, + DLDataType{kDLInt, 16, 2}, + DLDataType{kDLInt, 16, 4}, + DLDataType{kDLUInt, 16, 2}, + DLDataType{kDLUInt, 16, 4}, + DLDataType{kDLInt, 32, 2}, + DLDataType{kDLInt, 32, 4}, + DLDataType{kDLUInt, 32, 2}, + DLDataType{kDLUInt, 32, 4}, + DLDataType{kDLInt, sizeof(long) * 8, 2}, + DLDataType{kDLInt, sizeof(long) * 8, 4}, + DLDataType{kDLUInt, sizeof(unsigned long) * 8, 2}, + DLDataType{kDLUInt, sizeof(unsigned long) * 8, 4}, + DLDataType{kDLInt, 64, 2}, + DLDataType{kDLInt, 64, 4}, + DLDataType{kDLUInt, 64, 2}, + DLDataType{kDLUInt, 64, 4}, + DLDataType{kDLFloat, 32, 2}, + DLDataType{kDLFloat, 32, 4}, + DLDataType{kDLFloat, 64, 2}, + DLDataType{kDLFloat, 64, 4}}; + test_mdspan_to_dlpack_types_fn test_fn{expected_types}; + test_fn.call(cuda::std::make_index_sequence{}); + return true; +} +#endif // _CCCL_HAS_CTK() + +bool test_mdspan_to_dlpack_extended_fp_vector_types() +{ + using list_t = cuda::std::__type_list< +#if _CCCL_HAS_NVFP16() + ::__half2, +#endif +#if _CCCL_HAS_NVBF16() + ::__nv_bfloat162, +#endif +#if _CCCL_HAS_NVFP8_E4M3() + ::__nv_fp8x2_e4m3, + ::__nv_fp8x4_e4m3, +#endif +#if _CCCL_HAS_NVFP8_E5M2() + ::__nv_fp8x2_e5m2, + ::__nv_fp8x4_e5m2, +#endif +#if _CCCL_HAS_NVFP8_E8M0() + ::__nv_fp8x2_e8m0, + ::__nv_fp8x4_e8m0, +#endif +#if _CCCL_HAS_NVFP6_E2M3() + ::__nv_fp6x2_e2m3, + ::__nv_fp6x4_e2m3, +#endif +#if _CCCL_HAS_NVFP6_E3M2() + ::__nv_fp6x2_e3m2, + ::__nv_fp6x4_e3m2, +#endif +#if _CCCL_HAS_NVFP4_E2M1() + ::__nv_fp4x2_e2m1, + ::__nv_fp4x4_e2m1, +#endif + void* /* dummy to allow trailing commas */>; + cuda::std::array expected_types = { +#if _CCCL_HAS_NVFP16() + DLDataType{kDLFloat, 16, 2}, +#endif +#if _CCCL_HAS_NVBF16() + DLDataType{kDLBfloat, 16, 2}, +#endif +#if _CCCL_HAS_NVFP8_E4M3() + DLDataType{kDLFloat8_e4m3fn, 8, 2}, + DLDataType{kDLFloat8_e4m3fn, 8, 4}, +#endif +#if _CCCL_HAS_NVFP8_E5M2() + DLDataType{kDLFloat8_e5m2, 8, 2}, + DLDataType{kDLFloat8_e5m2, 8, 4}, +#endif +#if _CCCL_HAS_NVFP8_E8M0() + DLDataType{kDLFloat8_e8m0fnu, 8, 2}, + DLDataType{kDLFloat8_e8m0fnu, 8, 4}, +#endif +#if _CCCL_HAS_NVFP6_E2M3() + DLDataType{kDLFloat6_e2m3fn, 6, 2}, + DLDataType{kDLFloat6_e2m3fn, 6, 4}, +#endif +#if _CCCL_HAS_NVFP6_E3M2() + DLDataType{kDLFloat6_e3m2fn, 6, 2}, + DLDataType{kDLFloat6_e3m2fn, 6, 4}, +#endif +#if _CCCL_HAS_NVFP4_E2M1() + DLDataType{kDLFloat4_e2m1fn, 4, 2}, + DLDataType{kDLFloat4_e2m1fn, 4, 4}, +#endif + }; + test_mdspan_to_dlpack_types_fn test_fn{expected_types}; + test_fn.call(cuda::std::make_index_sequence{}); + return true; +} + +int main(int, char**) +{ + NV_IF_TARGET(NV_IS_HOST, (assert(test_mdspan_to_dlpack_host_layout_right());)) + NV_IF_TARGET(NV_IS_HOST, (assert(test_mdspan_to_dlpack_host_layout_left());)) + NV_IF_TARGET(NV_IS_HOST, (assert(test_mdspan_to_dlpack_empty_size());)) + NV_IF_TARGET(NV_IS_HOST, (assert(test_mdspan_to_dlpack_rank_0());)) + NV_IF_TARGET(NV_IS_HOST, (assert(test_mdspan_to_dlpack_const_pointer());)) + NV_IF_TARGET(NV_IS_HOST, (assert(test_mdspan_to_dlpack_device());)) + NV_IF_TARGET(NV_IS_HOST, (assert(test_mdspan_to_dlpack_managed());)) + NV_IF_TARGET(NV_IS_HOST, (assert(test_mdspan_to_dlpack_basic_types());)) + NV_IF_TARGET(NV_IS_HOST, (assert(test_mdspan_to_dlpack_extended_fp_and_complex_types());)) +#if _CCCL_HAS_CTK() + NV_IF_TARGET(NV_IS_HOST, (assert(test_mdspan_to_dlpack_vector_types());)) +#endif // _CCCL_HAS_CTK() + NV_IF_TARGET(NV_IS_HOST, (assert(test_mdspan_to_dlpack_extended_fp_vector_types());)) + return 0; +} diff --git a/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/mdspan_to_dlpack/mdspan_to_dlpack.wrapper.pass.cpp b/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/mdspan_to_dlpack/mdspan_to_dlpack.wrapper.pass.cpp new file mode 100644 index 00000000000..e54cf6c93b6 --- /dev/null +++ b/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/mdspan_to_dlpack/mdspan_to_dlpack.wrapper.pass.cpp @@ -0,0 +1,214 @@ +//===----------------------------------------------------------------------===// +// +// Part of the libcu++ Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// +// UNSUPPORTED: nvrtc + +#include +#include +#include +#include +#include +#include +#include + +#include "test_macros.h" + +void check_datatype(const DLDataType& dt, uint8_t code, uint8_t bits, uint16_t lanes) +{ + assert(dt.code == code); + assert(dt.bits == bits); + assert(dt.lanes == lanes); +} + +bool test_mdspan_to_dlpack_wrapper_default_ctor() +{ + cuda::__dlpack_tensor<3> dlpack_wrapper{}; + DLDataType default_dtype = {}; + DLDevice default_device = {}; + auto& tensor = dlpack_wrapper.get(); + assert(tensor.device.device_type == default_device.device_type); + assert(tensor.device.device_id == default_device.device_id); + check_datatype(tensor.dtype, default_dtype.code, default_dtype.bits, default_dtype.lanes); + assert(tensor.shape != nullptr); + assert(tensor.strides != nullptr); + return true; +} + +bool test_dlpack_wrapper_copy_ctor() +{ + using extents_t = cuda::std::extents; + int data[6] = {0, 1, 2, 3, 4, 5}; + cuda::host_mdspan md{data, extents_t{}}; + auto w = cuda::to_dlpack(md); + auto& t = w.get(); + auto* shape_ptr = t.shape; + auto* strides_ptr = t.strides; + + auto w2 = w; // copy construct + // Copy must not alias the source wrapper's shape/stride storage. + auto& t2 = w2.get(); + assert(t2.shape != nullptr); + assert(t2.strides != nullptr); + assert(t2.shape != shape_ptr); + assert(t2.strides != strides_ptr); + + // Source wrapper must remain intact. + assert(t.shape == shape_ptr); + assert(t.strides == strides_ptr); + + // Sanity-check copied tensor metadata and values. + assert(t2.device.device_type == kDLCPU); + assert(t2.device.device_id == 0); + assert(t2.ndim == 2); + check_datatype(t2.dtype, kDLInt, 32, 1); + assert(t2.shape[0] == 2); + assert(t2.shape[1] == 3); + assert(t2.strides[0] == 3); + assert(t2.strides[1] == 1); + assert(t2.byte_offset == 0); + assert(t2.data == data); + return true; +} + +bool test_dlpack_wrapper_move_ctor() +{ + using extents_t = cuda::std::extents; + int data[6] = {0, 1, 2, 3, 4, 5}; + cuda::host_mdspan md{data, extents_t{}}; + auto w = cuda::to_dlpack(md); + auto& t = w.get(); + auto* shape_ptr = t.shape; + auto* strides_ptr = t.strides; + auto moved = cuda::std::move(w); // move construct + + // Moved-to wrapper must not keep pointers to moved-from storage. + auto& tm = moved.get(); + assert(tm.shape != nullptr); + assert(tm.strides != nullptr); + assert(tm.shape != shape_ptr); + assert(tm.strides != strides_ptr); + + // Moved-from wrapper is explicitly reset to a default/empty DLTensor. + assert(t.shape == nullptr); + assert(t.strides == nullptr); + assert(t.data == nullptr); + assert(t.ndim == 0); + + // Sanity-check moved-to tensor metadata and values. + assert(tm.device.device_type == kDLCPU); + assert(tm.device.device_id == 0); + assert(tm.ndim == 2); + check_datatype(tm.dtype, kDLInt, 32, 1); + assert(tm.shape[0] == 2); + assert(tm.shape[1] == 3); + assert(tm.strides[0] == 3); + assert(tm.strides[1] == 1); + assert(tm.byte_offset == 0); + assert(tm.data == data); + return true; +} + +bool test_dlpack_wrapper_copy_assignment() +{ + using extents_t = cuda::std::extents; + int data_a[6] = {0, 1, 2, 3, 4, 5}; + int data_b[6] = {6, 7, 8, 9, 10, 11}; + cuda::host_mdspan md_a{data_a, extents_t{}}; + cuda::host_mdspan md_b{data_b, extents_t{}}; + auto a = cuda::to_dlpack(md_a); + auto b = cuda::to_dlpack(md_b); + auto& ta = a.get(); + auto& tb = b.get(); + auto* b_shape_ptr = tb.shape; + auto* b_strides_ptr = tb.strides; + + b = a; // copy assign + // Destination must keep pointing to its own member arrays (not to `a`). + assert(tb.shape == b_shape_ptr); + assert(tb.strides == b_strides_ptr); + assert(tb.shape != ta.shape); + assert(tb.strides != ta.strides); + + // Values must be copied correctly. + assert(tb.data == data_a); + assert(tb.ndim == 2); + assert(tb.shape[0] == 2); + assert(tb.shape[1] == 3); + assert(tb.strides[0] == 3); + assert(tb.strides[1] == 1); + return true; +} + +bool test_dlpack_wrapper_move_assignment() +{ + using extents_t = cuda::std::extents; + int data_a[6] = {0, 1, 2, 3, 4, 5}; + int data_b[6] = {6, 7, 8, 9, 10, 11}; + cuda::host_mdspan md_a{data_a, extents_t{}}; + cuda::host_mdspan md_b{data_b, extents_t{}}; + auto a = cuda::to_dlpack(md_a); + auto b = cuda::to_dlpack(md_b); + auto& ta = a.get(); + auto& tb = b.get(); + auto* a_shape_ptr = ta.shape; + auto* a_strides_ptr = ta.strides; + auto* b_shape_ptr = tb.shape; + auto* b_strides_ptr = tb.strides; + + b = cuda::std::move(a); // move assign + // Destination must keep pointing to its own member arrays, not the source's. + assert(tb.shape == b_shape_ptr); + assert(tb.strides == b_strides_ptr); + assert(tb.shape != a_shape_ptr); + assert(tb.strides != a_strides_ptr); + + // Source must be reset. + assert(ta.shape == nullptr); + assert(ta.strides == nullptr); + assert(ta.data == nullptr); + assert(ta.ndim == 0); + + // Values must be moved correctly. + assert(tb.data == data_a); + assert(tb.ndim == 2); + assert(tb.shape[0] == 2); + assert(tb.shape[1] == 3); + assert(tb.strides[0] == 3); + assert(tb.strides[1] == 1); + return true; +} + +bool test_dlpack_wrapper_get() +{ + using wrapper_t = cuda::__dlpack_tensor<2>; + static_assert(cuda::std::is_same_v().get()), ::DLTensor&>); + static_assert(cuda::std::is_same_v().get()), const ::DLTensor&>); + + wrapper_t w{}; + // Mutating through the reference returned by `get()` must be observable. + auto& t = w.get(); + t.ndim = 123; + assert(w.get().ndim == 123); + + // Const overload should also alias the same underlying object. + const wrapper_t& cw = w; + assert(&cw.get() == &w.get()); + return true; +} + +int main(int, char**) +{ + NV_IF_TARGET( + NV_IS_HOST, + (assert(test_mdspan_to_dlpack_wrapper_default_ctor()); assert(test_dlpack_wrapper_copy_ctor()); + assert(test_dlpack_wrapper_move_ctor()); + assert(test_dlpack_wrapper_copy_assignment()); + assert(test_dlpack_wrapper_move_assignment()); + assert(test_dlpack_wrapper_get());)) + return 0; +} diff --git a/libcudacxx/test/libcudacxx/libcxx/type_traits/vector_type.compile.pass.cpp b/libcudacxx/test/libcudacxx/libcxx/type_traits/vector_type.compile.pass.cpp index 406b6157cbf..1561857be02 100644 --- a/libcudacxx/test/libcudacxx/libcxx/type_traits/vector_type.compile.pass.cpp +++ b/libcudacxx/test/libcudacxx/libcxx/type_traits/vector_type.compile.pass.cpp @@ -29,31 +29,61 @@ __host__ __device__ void test() test(); test(); + static_assert(cuda::__is_vector_type_v); + static_assert(cuda::__is_vector_type_v); + static_assert(cuda::__is_vector_type_v); + static_assert(cuda::__is_vector_type_v); + test(); test(); test(); test(); + static_assert(cuda::__is_vector_type_v); + static_assert(cuda::__is_vector_type_v); + static_assert(cuda::__is_vector_type_v); + static_assert(cuda::__is_vector_type_v); + test(); test(); test(); test(); + static_assert(cuda::__is_vector_type_v); + static_assert(cuda::__is_vector_type_v); + static_assert(cuda::__is_vector_type_v); + static_assert(cuda::__is_vector_type_v); + test(); test(); test(); test(); + static_assert(cuda::__is_vector_type_v); + static_assert(cuda::__is_vector_type_v); + static_assert(cuda::__is_vector_type_v); + static_assert(cuda::__is_vector_type_v); + test(); test(); test(); test(); + static_assert(cuda::__is_vector_type_v); + static_assert(cuda::__is_vector_type_v); + static_assert(cuda::__is_vector_type_v); + static_assert(cuda::__is_vector_type_v); + test(); test(); test(); test(); + static_assert(cuda::__is_vector_type_v); + static_assert(cuda::__is_vector_type_v); + static_assert(cuda::__is_vector_type_v); + static_assert(cuda::__is_vector_type_v); + test(); test(); test(); @@ -63,6 +93,16 @@ __host__ __device__ void test() test(); #endif // ^^^ _CCCL_CTK_BELOW(13, 0) ^^^ + static_assert(cuda::__is_vector_type_v); + static_assert(cuda::__is_vector_type_v); + static_assert(cuda::__is_vector_type_v); +#if _CCCL_CTK_AT_LEAST(13, 0) + static_assert(cuda::__is_vector_type_v); + static_assert(cuda::__is_vector_type_v); +#else // ^^^ _CCCL_CTK_AT_LEAST(13, 0) ^^^ / vvv _CCCL_CTK_BELOW(13, 0) vvv + static_assert(cuda::__is_vector_type_v); +#endif // _CCCL_CTK_AT_LEAST(13, 0) + test(); test(); test(); @@ -72,6 +112,16 @@ __host__ __device__ void test() test(); #endif // ^^^ _CCCL_CTK_BELOW(13, 0) ^^^ + static_assert(cuda::__is_vector_type_v); + static_assert(cuda::__is_vector_type_v); + static_assert(cuda::__is_vector_type_v); +#if _CCCL_CTK_AT_LEAST(13, 0) + static_assert(cuda::__is_vector_type_v); + static_assert(cuda::__is_vector_type_v); +#else // ^^^ _CCCL_CTK_AT_LEAST(13, 0) ^^^ / vvv _CCCL_CTK_BELOW(13, 0) vvv + static_assert(cuda::__is_vector_type_v); +#endif // _CCCL_CTK_AT_LEAST(13, 0) + test(); test(); test(); @@ -81,6 +131,16 @@ __host__ __device__ void test() test(); #endif // ^^^ _CCCL_CTK_BELOW(13, 0) ^^^ + static_assert(cuda::__is_vector_type_v); + static_assert(cuda::__is_vector_type_v); + static_assert(cuda::__is_vector_type_v); +#if _CCCL_CTK_AT_LEAST(13, 0) + static_assert(cuda::__is_vector_type_v); + static_assert(cuda::__is_vector_type_v); +#else // ^^^ _CCCL_CTK_AT_LEAST(13, 0) ^^^ / vvv _CCCL_CTK_BELOW(13, 0) vvv + static_assert(cuda::__is_vector_type_v); +#endif // _CCCL_CTK_AT_LEAST(13, 0) + test(); test(); test(); @@ -90,11 +150,26 @@ __host__ __device__ void test() test(); #endif // ^^^ _CCCL_CTK_BELOW(13, 0) ^^^ + static_assert(cuda::__is_vector_type_v); + static_assert(cuda::__is_vector_type_v); + static_assert(cuda::__is_vector_type_v); +#if _CCCL_CTK_AT_LEAST(13, 0) + static_assert(cuda::__is_vector_type_v); + static_assert(cuda::__is_vector_type_v); +#else // ^^^ _CCCL_CTK_AT_LEAST(13, 0) ^^^ / vvv _CCCL_CTK_BELOW(13, 0) vvv + static_assert(cuda::__is_vector_type_v); +#endif // _CCCL_CTK_AT_LEAST(13, 0) + test(); test(); test(); test(); + static_assert(cuda::__is_vector_type_v); + static_assert(cuda::__is_vector_type_v); + static_assert(cuda::__is_vector_type_v); + static_assert(cuda::__is_vector_type_v); + test(); test(); test(); @@ -104,11 +179,26 @@ __host__ __device__ void test() test(); #endif // ^^^ _CCCL_CTK_BELOW(13, 0) ^^^ + static_assert(cuda::__is_vector_type_v); + static_assert(cuda::__is_vector_type_v); + static_assert(cuda::__is_vector_type_v); +#if _CCCL_CTK_AT_LEAST(13, 0) + static_assert(cuda::__is_vector_type_v); + static_assert(cuda::__is_vector_type_v); +#else // ^^^ _CCCL_CTK_AT_LEAST(13, 0) ^^^ / vvv _CCCL_CTK_BELOW(13, 0) vvv + static_assert(cuda::__is_vector_type_v); +#endif // _CCCL_CTK_AT_LEAST(13, 0) + + static_assert(cuda::__is_vector_type_v); + // 2. Test invalid combinations test(); test(); test(); + + static_assert(!cuda::__is_vector_type_v); + static_assert(!cuda::__is_vector_type_v); } int main(int, char**)