diff --git a/docs/libcudacxx/extended_api/mdspan.rst b/docs/libcudacxx/extended_api/mdspan.rst
index 028ac918aec..ca0582fa0d5 100644
--- a/docs/libcudacxx/extended_api/mdspan.rst
+++ b/docs/libcudacxx/extended_api/mdspan.rst
@@ -10,6 +10,7 @@ Mdspan
    mdspan/host_device_accessor
    mdspan/restrict_accessor
    mdspan/shared_memory_accessor
+   mdspan/mdspan_to_dlpack
 
 .. list-table::
    :widths: 25 45 30 30
@@ -34,3 +35,8 @@ Mdspan
      - ``mdspan`` and accessor for CUDA shared memory
      - CCCL 3.2.0
      - CUDA 13.2
+
+   * - :ref:`mdspan to dlpack <libcudacxx-extended-api-mdspan-mdspan-to-dlpack>`
+     - Convert a ``mdspan`` to a ``DLTensor``
+     - CCCL 3.2.0
+     - CUDA 13.2
diff --git a/docs/libcudacxx/extended_api/mdspan/mdspan_to_dlpack.rst b/docs/libcudacxx/extended_api/mdspan/mdspan_to_dlpack.rst
new file mode 100644
index 00000000000..1cf91c50ef8
--- /dev/null
+++ b/docs/libcudacxx/extended_api/mdspan/mdspan_to_dlpack.rst
@@ -0,0 +1,140 @@
+.. _libcudacxx-extended-api-mdspan-mdspan-to-dlpack:
+
+``mdspan`` to DLPack
+====================
+
+This functionality provides a conversion from ``cuda::host_mdspan``, ``cuda::device_mdspan``, and ``cuda::managed_mdspan`` to `DLPack <https://dmlc.github.io/dlpack/latest/>`__ ``DLTensor`` view.
+
+Defined in the ``<cuda/mdspan>`` header.
+
+Conversion functions
+--------------------
+
+.. code:: cuda
+
+   namespace cuda {
+
+   template <typename T, typename Extents, typename Layout, typename Accessor>
+   [[nodiscard]] __dlpack_tensor<Extents::rank()>
+   to_dlpack_tensor(const cuda::host_mdspan<T, Extents, Layout, Accessor>& mdspan);
+
+   template <typename T, typename Extents, typename Layout, typename Accessor>
+   [[nodiscard]] __dlpack_tensor<Extents::rank()>
+   to_dlpack_tensor(const cuda::device_mdspan<T, Extents, Layout, Accessor>& mdspan);
+
+   template <typename T, typename Extents, typename Layout, typename Accessor>
+   [[nodiscard]] __dlpack_tensor<Extents::rank()>
+   to_dlpack_tensor(const cuda::managed_mdspan<T, Extents, Layout, Accessor>& mdspan);
+
+   } // namespace cuda
+
+Types
+-----
+
+``__dlpack_tensor`` is an internal class that stores a ``DLTensor`` and owns the backing storage for its ``shape`` and ``strides`` pointers. The class does not use any heap allocation.
+
+.. code:: cuda
+
+  namespace cuda {
+
+  template <size_t Rank>
+  class __dlpack_tensor {
+  public:
+      __dlpack_tensor() noexcept;
+      __dlpack_tensor(const __dlpack_tensor&) noexcept;
+      __dlpack_tensor(__dlpack_tensor&&) noexcept;
+      __dlpack_tensor& operator=(const __dlpack_tensor&) noexcept;
+      __dlpack_tensor& operator=(__dlpack_tensor&&) noexcept;
+      ~__dlpack_tensor() noexcept = default;
+
+      DLTensor&       get() & noexcept       [[lifetimebound]];
+      const DLTensor& get() & const noexcept [[lifetimebound]];
+
+      DLTensor&       get() && noexcept       = delete;
+      const DLTensor& get() && const noexcept = delete;
+  };
+
+  } // namespace cuda
+
+``cuda::__dlpack_tensor`` stores a ``DLTensor`` and owns the backing storage for its ``shape`` and ``strides`` pointers. The class does not use any heap allocation.
+
+.. note:: **Lifetime**
+
+  The ``DLTensor`` associated with ``cuda::__dlpack_tensor`` must not outlive the wrapper. If the wrapper is destroyed or moved, the returned ``DLTensor::shape`` and ``DLTensor::strides`` pointers will dangle.
+
+.. note:: **Const-correctness**
+
+  ``DLTensor::data`` points at ``mdspan.data_handle()`` (or is ``nullptr`` if ``mdspan.size() == 0``). If ``T`` is ``const``, the pointer is ``const_cast``'d because ``DLTensor::data`` is unqualified.
+
+Semantics
+---------
+
+The conversion produces a non-owning DLPack view of the ``mdspan`` data and metadata:
+
+- ``DLTensor::ndim`` is ``mdspan.rank()``.
+- For rank > 0, ``DLTensor::shape[i]`` is ``mdspan.extent(i)``.
+- For rank > 0, ``DLTensor::strides[i]`` is ``mdspan.stride(i)``.
+- ``DLTensor::byte_offset`` is always ``0``.
+- ``DLTensor::device`` is:
+
+  - ``{kDLCPU, 0}`` for ``cuda::host_mdspan``
+  - ``{kDLCUDA, /*device_id*/}`` for ``cuda::device_mdspan``
+  - ``{kDLCUDAManaged, 0}`` for ``cuda::managed_mdspan``
+
+Element types are mapped to ``DLDataType`` according to the DLPack conventions, including:
+
+- ``bool``.
+- Signed and unsigned integers.
+- IEEE-754 Floating-point and extended precision floating-point, including ``__half``, ``__nv_bfloat16``, ``__float128``, FP8, FP6, FP4 when available.
+- Complex: ``cuda::std::complex<__half>``, ``cuda::std::complex<float>``, and ``cuda::std::complex<double>``.
+- `CUDA built-in vector types <https://docs.nvidia.com/cuda/cuda-programming-guide/05-appendices/cpp-language-extensions.html#built-in-types>`__, such as ``int2``, ``float4``, etc.
+- Vector types for extended floating-point, such as ``__half2``, ``__nv_fp8x4_e4m3``, etc.
+
+Constraints
+-----------
+
+- The accessor ``data_handle_type`` must be a pointer type.
+
+Runtime errors
+--------------
+
+- If any ``extent(i)`` or ``stride(i)`` cannot be represented in ``int64_t``, the conversion raises an ``std::invalid_argument`` exception.
+
+Availability notes
+------------------
+
+- This API is available only when DLPack header is present, namely ``<dlpack/dlpack.h>`` is found in the include path.
+- This API can be disabled by defining ``CCCL_DISABLE_DLPACK`` before including any library headers. In this case, ``<dlpack/dlpack.h>`` will not be included.
+
+References
+----------
+
+- `DLPack C API <https://dmlc.github.io/dlpack/latest/c_api.html>`__ documentation.
+
+Example
+-------
+
+.. code:: cuda
+
+  #include <dlpack/dlpack.h>
+  #include <cuda/mdspan>
+  #include <cuda/std/cassert>
+  #include <cuda/std/cstdint>
+
+  int main() {
+    using extents_t = cuda::std::extents<size_t, 2, 3>;
+
+    int data[6] = {0, 1, 2, 3, 4, 5};
+    cuda::host_mdspan<int, extents_t> md{data, extents_t{}};
+
+    auto dl              = cuda::to_dlpack_tensor(md);
+    const auto& dltensor = dl.get();
+    // auto dltensor = dl.get(); is incorrect; it returns a reference to a temporary object that will be destroyed at the end of the statement.
+
+    // `dl` owns the shape/stride storage; `dltensor.data` is a non-owning pointer to `data`.
+    assert(dltensor.device.device_type == kDLCPU);
+    assert(dltensor.ndim == 2);
+    assert(dltensor.shape[0] == 2 && dltensor.shape[1] == 3);
+    assert(dltensor.strides[0] == 3 && dltensor.strides[1] == 1);
+    assert(dltensor.data == data);
+  }
diff --git a/libcudacxx/include/cuda/__mdspan/mdspan_to_dlpack.h b/libcudacxx/include/cuda/__mdspan/mdspan_to_dlpack.h
new file mode 100644
index 00000000000..b6ed93f4b53
--- /dev/null
+++ b/libcudacxx/include/cuda/__mdspan/mdspan_to_dlpack.h
@@ -0,0 +1,311 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the libcu++ Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _CUDA___MDSPAN_MDSPAN_TO_DLPACK_H
+#define _CUDA___MDSPAN_MDSPAN_TO_DLPACK_H
+
+#include <cuda/std/detail/__config>
+
+#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
+#  pragma GCC system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
+#  pragma clang system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
+#  pragma system_header
+#endif // no system header
+
+#if _CCCL_HAS_DLPACK()
+
+#  include <cuda/__driver/driver_api.h>
+#  include <cuda/__internal/dlpack.h>
+#  include <cuda/__mdspan/host_device_mdspan.h>
+#  include <cuda/__type_traits/is_floating_point.h>
+#  include <cuda/__type_traits/vector_type.h>
+#  include <cuda/std/__cstddef/types.h>
+#  include <cuda/std/__exception/exception_macros.h>
+#  include <cuda/std/__fwd/complex.h>
+#  include <cuda/std/__limits/numeric_limits.h>
+#  include <cuda/std/__type_traits/always_false.h>
+#  include <cuda/std/__type_traits/is_pointer.h>
+#  include <cuda/std/__type_traits/is_same.h>
+#  include <cuda/std/__type_traits/num_bits.h>
+#  include <cuda/std/__type_traits/remove_cv.h>
+#  include <cuda/std/__utility/cmp.h>
+#  include <cuda/std/__utility/move.h>
+#  include <cuda/std/array>
+#  include <cuda/std/cstdint>
+#  include <cuda/std/mdspan>
+
+#  include <stdexcept>
+
+#  include <cuda/std/__cccl/prologue.h>
+
+_CCCL_BEGIN_NAMESPACE_CUDA
+
+template <typename _ElementType>
+[[nodiscard]] _CCCL_HOST_API inline ::DLDataType __data_type_to_dlpack() noexcept
+{
+  if constexpr (::cuda::std::is_same_v<_ElementType, bool>)
+  {
+    return ::DLDataType{::kDLBool, 8, 1};
+  }
+  //--------------------------------------------------------------------------------------------------------------------
+  // Signed integer types
+  else if constexpr (::cuda::std::__cccl_is_integer_v<_ElementType>)
+  {
+    return ::DLDataType{
+      (::cuda::std::is_signed_v<_ElementType>) ? ::kDLInt : ::kDLUInt, ::cuda::std::__num_bits_v<_ElementType>, 1};
+  }
+  //--------------------------------------------------------------------------------------------------------------------
+  // bfloat16 (must come before general floating-point)
+#  if _CCCL_HAS_NVBF16()
+  else if constexpr (::cuda::std::is_same_v<_ElementType, ::__nv_bfloat16>)
+  {
+    return ::DLDataType{::kDLBfloat, 16, 1};
+  }
+#  endif // _CCCL_HAS_NVBF16()
+  //--------------------------------------------------------------------------------------------------------------------
+  // Low-precision Floating-point types (must come before general floating-point)
+#  if _CCCL_HAS_NVFP8_E4M3()
+  else if constexpr (::cuda::std::is_same_v<_ElementType, ::__nv_fp8_e4m3>)
+  {
+    return ::DLDataType{::kDLFloat8_e4m3fn, 8, 1};
+  }
+#  endif // _CCCL_HAS_NVFP8_E4M3()
+#  if _CCCL_HAS_NVFP8_E5M2()
+  else if constexpr (::cuda::std::is_same_v<_ElementType, ::__nv_fp8_e5m2>)
+  {
+    return ::DLDataType{::kDLFloat8_e5m2, 8, 1};
+  }
+#  endif // _CCCL_HAS_NVFP8_E5M2()
+#  if _CCCL_HAS_NVFP8_E8M0()
+  else if constexpr (::cuda::std::is_same_v<_ElementType, ::__nv_fp8_e8m0>)
+  {
+    return ::DLDataType{::kDLFloat8_e8m0fnu, 8, 1};
+  }
+#  endif // _CCCL_HAS_NVFP8_E8M0()
+#  if _CCCL_HAS_NVFP6_E2M3()
+  else if constexpr (::cuda::std::is_same_v<_ElementType, ::__nv_fp6_e2m3>)
+  {
+    return ::DLDataType{::kDLFloat6_e2m3fn, 6, 1};
+  }
+#  endif // _CCCL_HAS_NVFP6_E2M3()
+#  if _CCCL_HAS_NVFP6_E3M2()
+  else if constexpr (::cuda::std::is_same_v<_ElementType, ::__nv_fp6_e3m2>)
+  {
+    return ::DLDataType{::kDLFloat6_e3m2fn, 6, 1};
+  }
+#  endif // _CCCL_HAS_NVFP6_E3M2()
+#  if _CCCL_HAS_NVFP4_E2M1()
+  else if constexpr (::cuda::std::is_same_v<_ElementType, ::__nv_fp4_e2m1>)
+  {
+    return ::DLDataType{::kDLFloat4_e2m1fn, 4, 1};
+  }
+#  endif // _CCCL_HAS_NVFP4_E2M1()
+  //--------------------------------------------------------------------------------------------------------------------
+  // Floating-point types (after specific types)
+  else if constexpr (::cuda::is_floating_point_v<_ElementType>)
+  {
+    return ::DLDataType{::kDLFloat, ::cuda::std::__num_bits_v<_ElementType>, 1};
+  }
+  //--------------------------------------------------------------------------------------------------------------------
+  // Complex types
+  // 256-bit data types are not supported in DLPack, e.g. cuda::std::complex<__float128>
+  else if constexpr (::cuda::std::__is_cuda_std_complex_v<_ElementType> && sizeof(_ElementType) <= sizeof(double) * 2)
+  {
+    // DLPack encodes complex numbers as a compact struct of two scalar values, and `bits` stores
+    // the size of the full complex number (e.g. std::complex<float> => bits=64).
+    return ::DLDataType{::kDLComplex, sizeof(_ElementType) * CHAR_BIT, 1};
+  }
+  //--------------------------------------------------------------------------------------------------------------------
+  // CUDA built-in vector types
+#  if _CCCL_HAS_CTK()
+  else if constexpr (::cuda::__is_vector_type_v<_ElementType> || ::cuda::__is_extended_fp_vector_type_v<_ElementType>)
+  {
+    constexpr ::cuda::std::uint16_t __lanes = ::cuda::std::tuple_size_v<_ElementType>;
+    if constexpr (__lanes == 2 || __lanes == 4)
+    {
+      using __scalar_t = ::cuda::std::remove_cv_t<::cuda::std::tuple_element_t<0, _ElementType>>;
+      auto __scalar    = ::cuda::__data_type_to_dlpack<__scalar_t>();
+      __scalar.lanes   = __lanes;
+      return __scalar;
+    }
+    else
+    {
+      static_assert(::cuda::std::__always_false_v<_ElementType>, "Unsupported vector type");
+      return ::DLDataType{};
+    }
+  }
+#  endif // _CCCL_HAS_CTK()
+  //--------------------------------------------------------------------------------------------------------------------
+  // Unsupported types
+  else
+  {
+    static_assert(::cuda::std::__always_false_v<_ElementType>, "Unsupported type");
+    return ::DLDataType{};
+  }
+}
+
+template <::cuda::std::size_t _Rank>
+class __dlpack_tensor
+{
+  ::cuda::std::array<::cuda::std::int64_t, _Rank> __shape{};
+  ::cuda::std::array<::cuda::std::int64_t, _Rank> __strides{};
+  ::DLTensor __tensor{};
+
+  _CCCL_HOST_API void __update_tensor() noexcept
+  {
+    __tensor.shape   = _Rank > 0 ? __shape.data() : nullptr;
+    __tensor.strides = _Rank > 0 ? __strides.data() : nullptr;
+  }
+
+public:
+  _CCCL_HOST_API explicit __dlpack_tensor() noexcept
+  {
+    __update_tensor();
+  }
+
+  _CCCL_HOST_API __dlpack_tensor(const __dlpack_tensor& __other) noexcept
+      : __shape{__other.__shape}
+      , __strides{__other.__strides}
+      , __tensor{__other.__tensor}
+  {
+    __update_tensor();
+  }
+
+  _CCCL_HOST_API __dlpack_tensor(__dlpack_tensor&& __other) noexcept
+      : __shape{::cuda::std::move(__other.__shape)}
+      , __strides{::cuda::std::move(__other.__strides)}
+      , __tensor{__other.__tensor}
+  {
+    __other.__tensor = ::DLTensor{};
+    __update_tensor();
+  }
+
+  _CCCL_HOST_API __dlpack_tensor& operator=(const __dlpack_tensor& __other) noexcept
+  {
+    if (this == &__other)
+    {
+      return *this;
+    }
+    __shape   = __other.__shape;
+    __strides = __other.__strides;
+    __tensor  = __other.__tensor;
+    __update_tensor();
+    return *this;
+  }
+
+  _CCCL_HOST_API __dlpack_tensor& operator=(__dlpack_tensor&& __other) noexcept
+  {
+    if (this == &__other)
+    {
+      return *this;
+    }
+    __shape          = ::cuda::std::move(__other.__shape);
+    __strides        = ::cuda::std::move(__other.__strides);
+    __tensor         = __other.__tensor;
+    __other.__tensor = ::DLTensor{};
+    __update_tensor();
+    return *this;
+  }
+
+  _CCCL_HIDE_FROM_ABI ~__dlpack_tensor() noexcept = default;
+
+  [[nodiscard]] _CCCL_HOST_API ::DLTensor& get() & noexcept _CCCL_LIFETIMEBOUND
+  {
+    return __tensor;
+  }
+
+  [[nodiscard]] _CCCL_HOST_API ::DLTensor& get() && noexcept = delete;
+
+  [[nodiscard]] _CCCL_HOST_API const ::DLTensor& get() const& noexcept _CCCL_LIFETIMEBOUND
+  {
+    return __tensor;
+  }
+
+  [[nodiscard]] _CCCL_HOST_API const ::DLTensor& get() const&& noexcept = delete;
+};
+
+template <typename _ElementType, typename _Extents, typename _Layout, typename _Accessor>
+[[nodiscard]] _CCCL_HOST_API __dlpack_tensor<_Extents::rank()>
+__to_dlpack(const ::cuda::std::mdspan<_ElementType, _Extents, _Layout, _Accessor>& __mdspan,
+            ::DLDeviceType __device_type,
+            int __device_id)
+{
+  static_assert(::cuda::std::is_pointer_v<typename _Accessor::data_handle_type>, "data_handle_type must be a pointer");
+  using __element_type = ::cuda::std::remove_cv_t<_ElementType>;
+  __dlpack_tensor<_Extents::rank()> __wrapper{};
+  auto& __tensor  = __wrapper.get();
+  __tensor.data   = __mdspan.size() > 0 ? const_cast<__element_type*>(__mdspan.data_handle()) : nullptr;
+  __tensor.device = ::DLDevice{__device_type, __device_id};
+  __tensor.ndim   = static_cast<int>(__mdspan.rank());
+  __tensor.dtype  = ::cuda::__data_type_to_dlpack<::cuda::std::remove_cv_t<_ElementType>>();
+  if constexpr (_Extents::rank() > 0)
+  {
+    constexpr auto __max_extent = ::cuda::std::numeric_limits<::cuda::std::int64_t>::max();
+    for (::cuda::std::size_t __i = 0; __i < __mdspan.rank(); ++__i)
+    {
+      if (::cuda::std::cmp_greater(__mdspan.extent(__i), __max_extent))
+      {
+        _CCCL_THROW(::std::invalid_argument{"Extent is too large"});
+      }
+      if (::cuda::std::cmp_greater(__mdspan.stride(__i), __max_extent))
+      {
+        _CCCL_THROW(::std::invalid_argument{"Stride is too large"});
+      }
+      __tensor.shape[__i]   = static_cast<::cuda::std::int64_t>(__mdspan.extent(__i));
+      __tensor.strides[__i] = static_cast<::cuda::std::int64_t>(__mdspan.stride(__i));
+    }
+  }
+  __tensor.byte_offset = 0;
+  return __wrapper;
+}
+
+/***********************************************************************************************************************
+ * Public API
+ **********************************************************************************************************************/
+
+template <typename _ElementType, typename _Extents, typename _Layout, typename _Accessor>
+[[nodiscard]] _CCCL_HOST_API __dlpack_tensor<_Extents::rank()>
+to_dlpack_tensor(const ::cuda::host_mdspan<_ElementType, _Extents, _Layout, _Accessor>& __mdspan)
+{
+  using __mdspan_type = ::cuda::std::mdspan<_ElementType, _Extents, _Layout, _Accessor>;
+  return ::cuda::__to_dlpack(__mdspan_type{__mdspan}, ::kDLCPU, 0);
+}
+
+template <typename _ElementType, typename _Extents, typename _Layout, typename _Accessor>
+[[nodiscard]] _CCCL_HOST_API __dlpack_tensor<_Extents::rank()>
+to_dlpack_tensor(const ::cuda::device_mdspan<_ElementType, _Extents, _Layout, _Accessor>& __mdspan)
+{
+  using __mdspan_type              = ::cuda::std::mdspan<_ElementType, _Extents, _Layout, _Accessor>;
+  ::CUpointer_attribute __attrs[1] = {::CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL};
+  int __ptr_dev_id                 = 0;
+  void* __results[1]               = {&__ptr_dev_id};
+  const auto __status = ::cuda::__driver::__pointerGetAttributesNoThrow(__attrs, __results, __mdspan.data_handle());
+  if (__status != ::cudaSuccess)
+  {
+    _CCCL_THROW(::std::invalid_argument{"Failed to get device ordinal of a pointer"});
+  }
+  return ::cuda::__to_dlpack(__mdspan_type{__mdspan}, ::kDLCUDA, __ptr_dev_id);
+}
+
+template <typename _ElementType, typename _Extents, typename _Layout, typename _Accessor>
+[[nodiscard]] _CCCL_HOST_API __dlpack_tensor<_Extents::rank()>
+to_dlpack_tensor(const ::cuda::managed_mdspan<_ElementType, _Extents, _Layout, _Accessor>& __mdspan)
+{
+  using __mdspan_type = ::cuda::std::mdspan<_ElementType, _Extents, _Layout, _Accessor>;
+  return ::cuda::__to_dlpack(__mdspan_type{__mdspan}, ::kDLCUDAManaged, 0);
+}
+
+_CCCL_END_NAMESPACE_CUDA
+
+#  include <cuda/std/__cccl/epilogue.h>
+
+#endif // _CCCL_HAS_DLPACK()
+#endif // _CUDA___MDSPAN_MDSPAN_TO_DLPACK_H
diff --git a/libcudacxx/include/cuda/__type_traits/vector_type.h b/libcudacxx/include/cuda/__type_traits/vector_type.h
index 7e21b8a806e..59badd8162c 100644
--- a/libcudacxx/include/cuda/__type_traits/vector_type.h
+++ b/libcudacxx/include/cuda/__type_traits/vector_type.h
@@ -24,7 +24,6 @@
 #if _CCCL_HAS_CTK()
 
 #  include <cuda/std/__cstddef/types.h>
-#  include <cuda/std/__type_traits/always_false.h>
 #  include <cuda/std/__type_traits/is_same.h>
 
 #  if !_CCCL_CUDA_COMPILATION()
@@ -340,16 +339,204 @@ template <class _Tp, ::cuda::std::size_t _Size>
   }
 }
 
+_CCCL_SUPPRESS_DEPRECATED_PUSH
+
 template <class _Tp, ::cuda::std::size_t _Size>
 using __vector_type_t = decltype(::cuda::__cccl_vector_type_t_impl<_Tp, _Size>());
 
 template <class _Tp, ::cuda::std::size_t _Size>
 inline constexpr bool __has_vector_type_v = !::cuda::std::is_same_v<__vector_type_t<_Tp, _Size>, void>;
 
+template <class _Tp>
+inline constexpr bool __is_vector_type_v = false;
+
+template <>
+inline constexpr bool __is_vector_type_v<::char1> = true;
+template <>
+inline constexpr bool __is_vector_type_v<::char2> = true;
+template <>
+inline constexpr bool __is_vector_type_v<::char3> = true;
+template <>
+inline constexpr bool __is_vector_type_v<::char4> = true;
+
+template <>
+inline constexpr bool __is_vector_type_v<::uchar1> = true;
+template <>
+inline constexpr bool __is_vector_type_v<::uchar2> = true;
+template <>
+inline constexpr bool __is_vector_type_v<::uchar3> = true;
+template <>
+inline constexpr bool __is_vector_type_v<::uchar4> = true;
+
+template <>
+inline constexpr bool __is_vector_type_v<::short1> = true;
+template <>
+inline constexpr bool __is_vector_type_v<::short2> = true;
+template <>
+inline constexpr bool __is_vector_type_v<::short3> = true;
+template <>
+inline constexpr bool __is_vector_type_v<::short4> = true;
+
+template <>
+inline constexpr bool __is_vector_type_v<::ushort1> = true;
+template <>
+inline constexpr bool __is_vector_type_v<::ushort2> = true;
+template <>
+inline constexpr bool __is_vector_type_v<::ushort3> = true;
+template <>
+inline constexpr bool __is_vector_type_v<::ushort4> = true;
+
+template <>
+inline constexpr bool __is_vector_type_v<::int1> = true;
+template <>
+inline constexpr bool __is_vector_type_v<::int2> = true;
+template <>
+inline constexpr bool __is_vector_type_v<::int3> = true;
+template <>
+inline constexpr bool __is_vector_type_v<::int4> = true;
+
+template <>
+inline constexpr bool __is_vector_type_v<::uint1> = true;
+template <>
+inline constexpr bool __is_vector_type_v<::uint2> = true;
+template <>
+inline constexpr bool __is_vector_type_v<::uint3> = true;
+template <>
+inline constexpr bool __is_vector_type_v<::uint4> = true;
+
+template <>
+inline constexpr bool __is_vector_type_v<::long1> = true;
+template <>
+inline constexpr bool __is_vector_type_v<::long2> = true;
+template <>
+inline constexpr bool __is_vector_type_v<::long3> = true;
+#  if _CCCL_CTK_AT_LEAST(13, 0)
+template <>
+inline constexpr bool __is_vector_type_v<::long4_16a> = true;
+template <>
+inline constexpr bool __is_vector_type_v<::long4_32a> = true;
+#  endif // ^^^ _CCCL_CTK_AT_LEAST(13, 0)
+template <>
+inline constexpr bool __is_vector_type_v<::long4> = true;
+
+template <>
+inline constexpr bool __is_vector_type_v<::ulong1> = true;
+template <>
+inline constexpr bool __is_vector_type_v<::ulong2> = true;
+template <>
+inline constexpr bool __is_vector_type_v<::ulong3> = true;
+#  if _CCCL_CTK_AT_LEAST(13, 0)
+template <>
+inline constexpr bool __is_vector_type_v<::ulong4_16a> = true;
+template <>
+inline constexpr bool __is_vector_type_v<::ulong4_32a> = true;
+#  endif // ^^^ _CCCL_CTK_AT_LEAST(13, 0)
+template <>
+inline constexpr bool __is_vector_type_v<::ulong4> = true;
+
+template <>
+inline constexpr bool __is_vector_type_v<::longlong1> = true;
+template <>
+inline constexpr bool __is_vector_type_v<::longlong2> = true;
+template <>
+inline constexpr bool __is_vector_type_v<::longlong3> = true;
+#  if _CCCL_CTK_AT_LEAST(13, 0)
+template <>
+inline constexpr bool __is_vector_type_v<::longlong4_16a> = true;
+template <>
+inline constexpr bool __is_vector_type_v<::longlong4_32a> = true;
+#  endif // ^^^ _CCCL_CTK_AT_LEAST(13, 0)
+template <>
+inline constexpr bool __is_vector_type_v<::longlong4> = true;
+
+template <>
+inline constexpr bool __is_vector_type_v<::ulonglong1> = true;
+template <>
+inline constexpr bool __is_vector_type_v<::ulonglong2> = true;
+template <>
+inline constexpr bool __is_vector_type_v<::ulonglong3> = true;
+#  if _CCCL_CTK_AT_LEAST(13, 0)
+template <>
+inline constexpr bool __is_vector_type_v<::ulonglong4_16a> = true;
+template <>
+inline constexpr bool __is_vector_type_v<::ulonglong4_32a> = true;
+#  endif // ^^^ _CCCL_CTK_AT_LEAST(13, 0)
+template <>
+inline constexpr bool __is_vector_type_v<::ulonglong4> = true;
+
+template <>
+inline constexpr bool __is_vector_type_v<::float1> = true;
+template <>
+inline constexpr bool __is_vector_type_v<::float2> = true;
+template <>
+inline constexpr bool __is_vector_type_v<::float3> = true;
+template <>
+inline constexpr bool __is_vector_type_v<::float4> = true;
+
+template <>
+inline constexpr bool __is_vector_type_v<::double1> = true;
+template <>
+inline constexpr bool __is_vector_type_v<::double2> = true;
+template <>
+inline constexpr bool __is_vector_type_v<::double3> = true;
+#  if _CCCL_CTK_AT_LEAST(13, 0)
+template <>
+inline constexpr bool __is_vector_type_v<::double4_16a> = true;
+template <>
+inline constexpr bool __is_vector_type_v<::double4_32a> = true;
+#  endif // ^^^ _CCCL_CTK_AT_LEAST(13, 0)
+template <>
+inline constexpr bool __is_vector_type_v<::double4> = true;
+
+template <>
+inline constexpr bool __is_vector_type_v<::dim3> = true;
+
+template <typename _Tp>
+inline constexpr bool __is_extended_fp_vector_type_v = false;
+
+#  if _CCCL_HAS_NVFP8()
+template <>
+inline constexpr bool __is_extended_fp_vector_type_v<::__nv_bfloat162> = true;
+template <>
+inline constexpr bool __is_extended_fp_vector_type_v<::__half2> = true;
+template <>
+inline constexpr bool __is_extended_fp_vector_type_v<::__nv_fp8x2_e4m3> = true;
+template <>
+inline constexpr bool __is_extended_fp_vector_type_v<::__nv_fp8x2_e5m2> = true;
+template <>
+inline constexpr bool __is_extended_fp_vector_type_v<::__nv_fp8x4_e4m3> = true;
+template <>
+inline constexpr bool __is_extended_fp_vector_type_v<::__nv_fp8x4_e5m2> = true;
+#    if _CCCL_CTK_AT_LEAST(12, 8)
+template <>
+inline constexpr bool __is_extended_fp_vector_type_v<::__nv_fp8x2_e8m0> = true;
+template <>
+inline constexpr bool __is_extended_fp_vector_type_v<::__nv_fp8x4_e8m0> = true;
+#    endif // _CCCL_CTK_AT_LEAST(12, 8)
+#  endif // _CCCL_HAS_NVFP8()
+
+#  if _CCCL_HAS_NVFP6()
+template <>
+inline constexpr bool __is_extended_fp_vector_type_v<::__nv_fp6x2_e2m3> = true;
+template <>
+inline constexpr bool __is_extended_fp_vector_type_v<::__nv_fp6x2_e3m2> = true;
+template <>
+inline constexpr bool __is_extended_fp_vector_type_v<::__nv_fp6x4_e2m3> = true;
+template <>
+inline constexpr bool __is_extended_fp_vector_type_v<::__nv_fp6x4_e3m2> = true;
+#  endif // _CCCL_HAS_NVFP6()
+
+#  if _CCCL_HAS_NVFP4()
+template <>
+inline constexpr bool __is_extended_fp_vector_type_v<::__nv_fp4x2_e2m1> = true;
+template <>
+inline constexpr bool __is_extended_fp_vector_type_v<::__nv_fp4x4_e2m1> = true;
+#  endif // _CCCL_HAS_NVFP4()
+
+_CCCL_SUPPRESS_DEPRECATED_POP
 _CCCL_END_NAMESPACE_CUDA
 
 #  include <cuda/std/__cccl/epilogue.h>
 
 #endif // !_CCCL_HAS_CTK()
-
 #endif // _CUDA__TYPE_TRAITS_VECTOR_TYPE_H
diff --git a/libcudacxx/include/cuda/mdspan b/libcudacxx/include/cuda/mdspan
index ae81a30219a..3129198d02a 100644
--- a/libcudacxx/include/cuda/mdspan
+++ b/libcudacxx/include/cuda/mdspan
@@ -22,6 +22,7 @@
 #endif // no system header
 
 #include <cuda/__mdspan/host_device_mdspan.h>
+#include <cuda/__mdspan/mdspan_to_dlpack.h>
 #include <cuda/__mdspan/restrict_mdspan.h>
 #include <cuda/__mdspan/shared_memory_mdspan.h>
 #include <cuda/std/mdspan>
diff --git a/libcudacxx/include/cuda/std/__tuple_dir/vector_types.h b/libcudacxx/include/cuda/std/__tuple_dir/vector_types.h
index a44443ed091..125b6b87492 100644
--- a/libcudacxx/include/cuda/std/__tuple_dir/vector_types.h
+++ b/libcudacxx/include/cuda/std/__tuple_dir/vector_types.h
@@ -121,6 +121,26 @@ _LIBCUDACXX_SPECIALIZE_TUPLE_INTERFACE(__half, __half, 2)
 #  if _CCCL_HAS_NVBF16()
 _LIBCUDACXX_SPECIALIZE_TUPLE_INTERFACE(__nv_bfloat16, __nv_bfloat16, 2)
 #  endif // _CCCL_HAS_NVBF16()
+#  if _CCCL_HAS_NVFP8()
+_LIBCUDACXX_SPECIALIZE_TUPLE_INTERFACE(__nv_fp8x, __nv_fp8_e5m2, 2, _e5m2)
+_LIBCUDACXX_SPECIALIZE_TUPLE_INTERFACE(__nv_fp8x, __nv_fp8_e5m2, 4, _e5m2)
+_LIBCUDACXX_SPECIALIZE_TUPLE_INTERFACE(__nv_fp8x, __nv_fp8_e4m3, 2, _e4m3)
+_LIBCUDACXX_SPECIALIZE_TUPLE_INTERFACE(__nv_fp8x, __nv_fp8_e4m3, 4, _e4m3)
+#    if _CCCL_CTK_AT_LEAST(12, 8)
+_LIBCUDACXX_SPECIALIZE_TUPLE_INTERFACE(__nv_fp8x, __nv_fp8_e8m0, 2, _e8m0)
+_LIBCUDACXX_SPECIALIZE_TUPLE_INTERFACE(__nv_fp8x, __nv_fp8_e8m0, 4, _e8m0)
+#    endif // _CCCL_CTK_AT_LEAST(12, 8)
+#  endif // _CCCL_HAS_NVFP8()
+#  if _CCCL_HAS_NVFP6()
+_LIBCUDACXX_SPECIALIZE_TUPLE_INTERFACE(__nv_fp6x, __nv_fp6_e3m2, 2, _e3m2)
+_LIBCUDACXX_SPECIALIZE_TUPLE_INTERFACE(__nv_fp6x, __nv_fp6_e3m2, 4, _e3m2)
+_LIBCUDACXX_SPECIALIZE_TUPLE_INTERFACE(__nv_fp6x, __nv_fp6_e2m3, 2, _e2m3)
+_LIBCUDACXX_SPECIALIZE_TUPLE_INTERFACE(__nv_fp6x, __nv_fp6_e2m3, 4, _e2m3)
+#  endif // _CCCL_HAS_NVFP6()
+#  if _CCCL_HAS_NVFP4()
+_LIBCUDACXX_SPECIALIZE_TUPLE_INTERFACE(__nv_fp4x, __nv_fp4_e2m1, 2, _e2m1)
+_LIBCUDACXX_SPECIALIZE_TUPLE_INTERFACE(__nv_fp4x, __nv_fp4_e2m1, 4, _e2m1)
+#  endif // _CCCL_HAS_NVFP4()
 
 template <size_t _Ip>
 struct __get_element;
@@ -273,6 +293,26 @@ _LIBCUDACXX_SPECIALIZE_GET(__half2, __half)
 #  if _CCCL_HAS_NVBF16()
 _LIBCUDACXX_SPECIALIZE_GET(__nv_bfloat162, __nv_bfloat16)
 #  endif // _CCCL_HAS_NVBF16()
+#  if _CCCL_HAS_NVFP8()
+_LIBCUDACXX_SPECIALIZE_GET(__nv_fp8x2_e5m2, __nv_fp8_e5m2)
+_LIBCUDACXX_SPECIALIZE_GET(__nv_fp8x4_e5m2, __nv_fp8_e5m2)
+_LIBCUDACXX_SPECIALIZE_GET(__nv_fp8x2_e4m3, __nv_fp8_e4m3)
+_LIBCUDACXX_SPECIALIZE_GET(__nv_fp8x4_e4m3, __nv_fp8_e4m3)
+#    if _CCCL_CTK_AT_LEAST(12, 8)
+_LIBCUDACXX_SPECIALIZE_GET(__nv_fp8x2_e8m0, __nv_fp8_e8m0)
+_LIBCUDACXX_SPECIALIZE_GET(__nv_fp8x4_e8m0, __nv_fp8_e8m0)
+#    endif // _CCCL_CTK_AT_LEAST(12, 8)
+#  endif // _CCCL_HAS_NVFP8()
+#  if _CCCL_HAS_NVFP6()
+_LIBCUDACXX_SPECIALIZE_GET(__nv_fp6x2_e3m2, __nv_fp6_e3m2)
+_LIBCUDACXX_SPECIALIZE_GET(__nv_fp6x4_e3m2, __nv_fp6_e3m2)
+_LIBCUDACXX_SPECIALIZE_GET(__nv_fp6x2_e2m3, __nv_fp6_e2m3)
+_LIBCUDACXX_SPECIALIZE_GET(__nv_fp6x4_e2m3, __nv_fp6_e2m3)
+#  endif // _CCCL_HAS_NVFP6()
+#  if _CCCL_HAS_NVFP4()
+_LIBCUDACXX_SPECIALIZE_GET(__nv_fp4x2_e2m1, __nv_fp4_e2m1)
+_LIBCUDACXX_SPECIALIZE_GET(__nv_fp4x4_e2m1, __nv_fp4_e2m1)
+#  endif // _CCCL_HAS_NVFP4()
 
 _CCCL_END_NAMESPACE_CUDA_STD
 
diff --git a/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/mdspan_to_dlpack/mdspan_to_dlpack.pass.cpp b/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/mdspan_to_dlpack/mdspan_to_dlpack.pass.cpp
new file mode 100644
index 00000000000..955a75c85ab
--- /dev/null
+++ b/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/mdspan_to_dlpack/mdspan_to_dlpack.pass.cpp
@@ -0,0 +1,527 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the libcu++ Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+// UNSUPPORTED: nvrtc
+
+#include <cuda/mdspan>
+#include <cuda/std/cassert>
+#include <cuda/std/complex>
+#include <cuda/std/cstdint>
+#include <cuda/std/type_traits>
+#include <cuda/utility>
+
+#include "test_macros.h"
+
+void check_datatype(const DLDataType& dt, uint8_t code, uint8_t bits, uint16_t lanes)
+{
+  assert(dt.code == code);
+  assert(dt.bits == bits);
+  assert(dt.lanes == lanes);
+}
+
+bool test_mdspan_to_dlpack_host_layout_right()
+{
+  using extents_t = cuda::std::extents<size_t, 2, 3>;
+  int data[6]     = {0, 1, 2, 3, 4, 5};
+  cuda::host_mdspan<int, extents_t> md{data, extents_t{}};
+  auto dlpack_wrapper  = cuda::to_dlpack_tensor(md);
+  const auto& dltensor = dlpack_wrapper.get();
+
+  assert(dltensor.device.device_type == kDLCPU);
+  assert(dltensor.device.device_id == 0);
+  assert(dltensor.ndim == 2);
+  check_datatype(dltensor.dtype, kDLInt, 32, 1);
+  assert(dltensor.shape != nullptr);
+  assert(dltensor.strides != nullptr);
+  assert(dltensor.shape[0] == 2);
+  assert(dltensor.shape[1] == 3);
+  assert(dltensor.strides[0] == 3);
+  assert(dltensor.strides[1] == 1);
+  assert(dltensor.byte_offset == 0);
+  assert(dltensor.data == data);
+  return true;
+}
+
+bool test_mdspan_to_dlpack_host_layout_left()
+{
+  using extents_t = cuda::std::extents<size_t, 2, 3>;
+  int data[6]     = {0, 1, 2, 3, 4, 5};
+  cuda::host_mdspan<int, extents_t, cuda::std::layout_left> md{data, extents_t{}};
+  auto dlpack_wrapper  = cuda::to_dlpack_tensor(md);
+  const auto& dltensor = dlpack_wrapper.get();
+
+  assert(dltensor.device.device_type == kDLCPU);
+  assert(dltensor.device.device_id == 0);
+  check_datatype(dltensor.dtype, kDLInt, 32, 1);
+  assert(dltensor.ndim == 2);
+  assert(dltensor.shape != nullptr);
+  assert(dltensor.strides != nullptr);
+  assert(dltensor.shape[0] == 2);
+  assert(dltensor.shape[1] == 3);
+  assert(dltensor.strides[0] == 1);
+  assert(dltensor.strides[1] == 2);
+  assert(dltensor.byte_offset == 0);
+  assert(dltensor.data == data);
+  return true;
+}
+
+bool test_mdspan_to_dlpack_empty_size()
+{
+  using extents_t = cuda::std::dims<2>;
+  int data[1]     = {42};
+  cuda::host_mdspan<int, extents_t> m{data, extents_t{0, 3}};
+  auto dlpack_wrapper  = cuda::to_dlpack_tensor(m);
+  const auto& dltensor = dlpack_wrapper.get();
+
+  assert(dltensor.device.device_type == kDLCPU);
+  assert(dltensor.device.device_id == 0);
+  check_datatype(dltensor.dtype, kDLInt, 32, 1);
+  assert(dltensor.ndim == 2);
+  assert(dltensor.shape[0] == 0);
+  assert(dltensor.shape[1] == 3);
+  assert(dltensor.strides[0] == 3);
+  assert(dltensor.strides[1] == 1);
+  assert(dltensor.byte_offset == 0);
+  assert(dltensor.data == nullptr); // size() == 0 => nullptr
+  return true;
+}
+
+bool test_mdspan_to_dlpack_rank_0()
+{
+  using extents_t = cuda::std::extents<size_t>;
+  int data[1]     = {7};
+  cuda::host_mdspan<int, extents_t> md{data, extents_t{}};
+  auto dlpack_wrapper  = cuda::to_dlpack_tensor(md);
+  const auto& dltensor = dlpack_wrapper.get();
+
+  assert(dltensor.device.device_type == kDLCPU);
+  assert(dltensor.device.device_id == 0);
+  check_datatype(dltensor.dtype, kDLInt, 32, 1);
+  assert(dltensor.ndim == 0);
+  assert(dltensor.shape == nullptr);
+  assert(dltensor.strides == nullptr);
+  assert(dltensor.byte_offset == 0);
+  assert(dltensor.data == data); // rank-0 mdspan has size() == 1
+  return true;
+}
+
+bool test_mdspan_to_dlpack_const_pointer()
+{
+  using extents_t   = cuda::std::dims<3>;
+  const int data[6] = {0, 1, 2, 3, 4, 5};
+  cuda::host_mdspan<const int, extents_t> md{data, extents_t{2, 3, 4}};
+  auto dlpack_wrapper  = cuda::to_dlpack_tensor(md);
+  const auto& dltensor = dlpack_wrapper.get();
+
+  assert(dltensor.device.device_type == kDLCPU);
+  assert(dltensor.device.device_id == 0);
+  check_datatype(dltensor.dtype, kDLInt, 32, 1);
+  assert(dltensor.ndim == 3);
+  assert(dltensor.shape[0] == 2);
+  assert(dltensor.shape[1] == 3);
+  assert(dltensor.shape[2] == 4);
+  assert(dltensor.strides[0] == 12);
+  assert(dltensor.strides[1] == 4);
+  assert(dltensor.strides[2] == 1);
+  assert(dltensor.byte_offset == 0);
+  assert(dltensor.data == data); // rank-0 mdspan has size() == 1
+  return true;
+}
+
+bool test_mdspan_to_dlpack_device()
+{
+  using extents_t = cuda::std::extents<size_t, 2, 3>;
+  float* data     = nullptr;
+  assert(cudaMalloc(&data, 6 * sizeof(float)) == cudaSuccess);
+  cuda::device_mdspan<float, extents_t> md{data, extents_t{}};
+  auto dlpack_wrapper  = cuda::to_dlpack_tensor(md);
+  const auto& dltensor = dlpack_wrapper.get();
+
+  assert(dltensor.device.device_type == kDLCUDA);
+  assert(dltensor.device.device_id == 0);
+  assert(dltensor.ndim == 2);
+  check_datatype(dltensor.dtype, kDLFloat, 32, 1);
+  assert(dltensor.shape[0] == 2);
+  assert(dltensor.shape[1] == 3);
+  assert(dltensor.strides[0] == 3);
+  assert(dltensor.strides[1] == 1);
+  assert(dltensor.byte_offset == 0);
+  assert(dltensor.data == data);
+  return true;
+}
+
+bool test_mdspan_to_dlpack_managed()
+{
+  using extents_t = cuda::std::extents<size_t, 2, 3>;
+  float* data     = nullptr;
+  assert(cudaMallocManaged(&data, 6 * sizeof(float)) == cudaSuccess);
+  cuda::managed_mdspan<float, extents_t> md{data, extents_t{}};
+  auto dlpack_wrapper  = cuda::to_dlpack_tensor(md);
+  const auto& dltensor = dlpack_wrapper.get();
+
+  assert(dltensor.device.device_type == kDLCUDAManaged);
+  assert(dltensor.device.device_id == 0);
+  assert(dltensor.ndim == 2);
+  check_datatype(dltensor.dtype, kDLFloat, 32, 1);
+  assert(dltensor.shape[0] == 2);
+  assert(dltensor.shape[1] == 3);
+  assert(dltensor.strides[0] == 3);
+  assert(dltensor.strides[1] == 1);
+  assert(dltensor.byte_offset == 0);
+  assert(dltensor.data == data);
+  return true;
+}
+
+template <typename ListT>
+struct test_mdspan_to_dlpack_types_fn
+{
+  using list_t = ListT;
+
+  cuda::std::array<DLDataType, list_t::__size> expected_types;
+
+  template <size_t index>
+  void call_impl() const
+  {
+    using T         = cuda::std::__type_at_c<index, list_t>;
+    using extents_t = cuda::std::extents<size_t, 2, 3>;
+    T data[6]       = {};
+    cuda::host_mdspan<T, extents_t> md{data, extents_t{}};
+    auto dlpack_wrapper  = cuda::to_dlpack_tensor(md);
+    const auto& dltensor = dlpack_wrapper.get();
+
+    auto type = expected_types[index];
+    check_datatype(dltensor.dtype, type.code, type.bits, type.lanes);
+  }
+
+  template <size_t... Indices>
+  void call(cuda::std::index_sequence<Indices...>) const
+  {
+    (call_impl<Indices>(), ...);
+  }
+};
+
+bool test_mdspan_to_dlpack_basic_types()
+{
+  using list_t = cuda::std::__type_list<
+    bool,
+    signed char,
+    short,
+    int,
+    long,
+    long long,
+#if _CCCL_HAS_INT128()
+    __int128_t,
+#endif
+    // Unsigned integer types
+    unsigned char,
+    unsigned short,
+    unsigned int,
+    unsigned long,
+    unsigned long long,
+#if _CCCL_HAS_INT128()
+    __uint128_t,
+#endif
+    // Floating-point types
+    float,
+    double
+#if _CCCL_HAS_FLOAT128()
+    ,
+    __float128
+#endif
+    >;
+  cuda::std::array<DLDataType, list_t::__size> expected_types = {
+    DLDataType{kDLBool, 8, 1},
+    // Signed integer types
+    DLDataType{kDLInt, 8, 1},
+    DLDataType{kDLInt, 16, 1},
+    DLDataType{kDLInt, 32, 1},
+    DLDataType{kDLInt, sizeof(long) * 8, 1},
+    DLDataType{kDLInt, 64, 1},
+#if _CCCL_HAS_INT128()
+    DLDataType{kDLInt, 128, 1},
+#endif
+    // Unsigned integer types
+    DLDataType{kDLUInt, 8, 1},
+    DLDataType{kDLUInt, 16, 1},
+    DLDataType{kDLUInt, 32, 1},
+    DLDataType{kDLUInt, sizeof(unsigned long) * 8, 1},
+    DLDataType{kDLUInt, 64, 1},
+#if _CCCL_HAS_INT128()
+    DLDataType{kDLUInt, 128, 1},
+#endif
+    // Floating-point types
+    DLDataType{kDLFloat, 32, 1},
+    DLDataType{kDLFloat, 64, 1},
+#if _CCCL_HAS_FLOAT128()
+    DLDataType{kDLFloat, 128, 1},
+#endif
+  };
+  test_mdspan_to_dlpack_types_fn<list_t> test_fn{expected_types};
+  test_fn.call(cuda::std::make_index_sequence<list_t::__size - 1>{});
+  return true;
+}
+
+bool test_mdspan_to_dlpack_extended_fp_and_complex_types()
+{
+  using list_t = cuda::std::__type_list<
+#if _CCCL_HAS_NVFP16()
+    ::__half,
+#endif
+#if _CCCL_HAS_NVBF16()
+    ::__nv_bfloat16,
+#endif
+#if _CCCL_HAS_FLOAT128()
+    __float128,
+#endif
+  // Low-precision floating-point types
+#if _CCCL_HAS_NVFP8_E4M3()
+    ::__nv_fp8_e4m3,
+#endif
+#if _CCCL_HAS_NVFP8_E5M2()
+    ::__nv_fp8_e5m2,
+#endif
+#if _CCCL_HAS_NVFP8_E8M0()
+    ::__nv_fp8_e8m0,
+#endif
+#if _CCCL_HAS_NVFP6_E2M3()
+    ::__nv_fp6_e2m3,
+#endif
+#if _CCCL_HAS_NVFP6_E3M2()
+    ::__nv_fp6_e3m2,
+#endif
+#if _CCCL_HAS_NVFP4_E2M1()
+    ::__nv_fp4_e2m1,
+#endif
+  // Complex types
+#if _CCCL_HAS_NVFP16()
+    cuda::std::complex<::__half>,
+#endif
+    cuda::std::complex<float>,
+    cuda::std::complex<double>>;
+  cuda::std::array<DLDataType, list_t::__size> expected_types = {
+#if _CCCL_HAS_NVFP16()
+    DLDataType{kDLFloat, 16, 1},
+#endif
+#if _CCCL_HAS_NVBF16()
+    DLDataType{kDLBfloat, 16, 1},
+#endif
+#if _CCCL_HAS_FLOAT128()
+    DLDataType{kDLFloat, 128, 1},
+#endif
+  // Low-precision floating-point types
+#if _CCCL_HAS_NVFP8_E4M3()
+    DLDataType{kDLFloat8_e4m3fn, 8, 1},
+#endif
+#if _CCCL_HAS_NVFP8_E5M2()
+    DLDataType{kDLFloat8_e5m2, 8, 1},
+#endif
+#if _CCCL_HAS_NVFP8_E8M0()
+    DLDataType{kDLFloat8_e8m0fnu, 8, 1},
+#endif
+#if _CCCL_HAS_NVFP6_E2M3()
+    DLDataType{kDLFloat6_e2m3fn, 6, 1},
+#endif
+#if _CCCL_HAS_NVFP6_E3M2()
+    DLDataType{kDLFloat6_e3m2fn, 6, 1},
+#endif
+#if _CCCL_HAS_NVFP4_E2M1()
+    DLDataType{kDLFloat4_e2m1fn, 4, 1},
+#endif
+  // Complex types
+#if _CCCL_HAS_NVFP16()
+    DLDataType{kDLComplex, 32, 1},
+#endif
+    DLDataType{kDLComplex, 64, 1},
+    DLDataType{kDLComplex, 128, 1}};
+  test_mdspan_to_dlpack_types_fn<list_t> test_fn{expected_types};
+  test_fn.call(cuda::std::make_index_sequence<list_t::__size>{});
+  return true;
+}
+
+#if _CCCL_HAS_CTK()
+bool test_mdspan_to_dlpack_vector_types()
+{
+  using list_t = cuda::std::__type_list<
+    ::char2,
+    ::char4,
+    ::uchar2,
+    ::uchar4,
+    ::short2,
+    ::short4,
+    ::ushort2,
+    ::ushort4,
+    ::int2,
+    ::int4,
+    ::uint2,
+    ::uint4,
+    ::long2
+#  if _CCCL_CTK_AT_LEAST(13, 0)
+    ,
+    ::long4_32a
+#  else
+    ,
+    ::long4
+#  endif
+    ,
+    ::ulong2
+#  if _CCCL_CTK_AT_LEAST(13, 0)
+    ,
+    ::ulong4_32a
+#  else
+    ,
+    ::ulong4
+#  endif
+    ,
+    ::longlong2
+#  if _CCCL_CTK_AT_LEAST(13, 0)
+    ,
+    ::longlong4_32a
+#  else
+    ,
+    ::longlong4
+#  endif
+    ,
+    ::ulonglong2
+#  if _CCCL_CTK_AT_LEAST(13, 0)
+    ,
+    ::ulonglong4_32a
+#  else
+    ,
+    ::ulonglong4
+#  endif
+    ,
+    ::float2,
+    ::float4,
+    ::double2
+#  if _CCCL_CTK_AT_LEAST(13, 0)
+    ,
+    ::double4_32a
+#  else
+    ,
+    ::double4
+#  endif
+    >;
+  cuda::std::array<DLDataType, list_t::__size> expected_types = {
+    DLDataType{kDLInt, 8, 2},
+    DLDataType{kDLInt, 8, 4},
+    DLDataType{kDLUInt, 8, 2},
+    DLDataType{kDLUInt, 8, 4},
+    DLDataType{kDLInt, 16, 2},
+    DLDataType{kDLInt, 16, 4},
+    DLDataType{kDLUInt, 16, 2},
+    DLDataType{kDLUInt, 16, 4},
+    DLDataType{kDLInt, 32, 2},
+    DLDataType{kDLInt, 32, 4},
+    DLDataType{kDLUInt, 32, 2},
+    DLDataType{kDLUInt, 32, 4},
+    DLDataType{kDLInt, sizeof(long) * 8, 2},
+    DLDataType{kDLInt, sizeof(long) * 8, 4},
+    DLDataType{kDLUInt, sizeof(unsigned long) * 8, 2},
+    DLDataType{kDLUInt, sizeof(unsigned long) * 8, 4},
+    DLDataType{kDLInt, 64, 2},
+    DLDataType{kDLInt, 64, 4},
+    DLDataType{kDLUInt, 64, 2},
+    DLDataType{kDLUInt, 64, 4},
+    DLDataType{kDLFloat, 32, 2},
+    DLDataType{kDLFloat, 32, 4},
+    DLDataType{kDLFloat, 64, 2},
+    DLDataType{kDLFloat, 64, 4}};
+  test_mdspan_to_dlpack_types_fn<list_t> test_fn{expected_types};
+  test_fn.call(cuda::std::make_index_sequence<list_t::__size - 1>{});
+  return true;
+}
+#endif // _CCCL_HAS_CTK()
+
+bool test_mdspan_to_dlpack_extended_fp_vector_types()
+{
+  using list_t = cuda::std::__type_list<
+#if _CCCL_HAS_NVFP16()
+    ::__half2,
+#endif
+#if _CCCL_HAS_NVBF16()
+    ::__nv_bfloat162,
+#endif
+#if _CCCL_HAS_NVFP8_E4M3()
+    ::__nv_fp8x2_e4m3,
+    ::__nv_fp8x4_e4m3,
+#endif
+#if _CCCL_HAS_NVFP8_E5M2()
+    ::__nv_fp8x2_e5m2,
+    ::__nv_fp8x4_e5m2,
+#endif
+#if _CCCL_HAS_NVFP8_E8M0()
+    ::__nv_fp8x2_e8m0,
+    ::__nv_fp8x4_e8m0,
+#endif
+#if _CCCL_HAS_NVFP6_E2M3()
+    ::__nv_fp6x2_e2m3,
+    ::__nv_fp6x4_e2m3,
+#endif
+#if _CCCL_HAS_NVFP6_E3M2()
+    ::__nv_fp6x2_e3m2,
+    ::__nv_fp6x4_e3m2,
+#endif
+#if _CCCL_HAS_NVFP4_E2M1()
+    ::__nv_fp4x2_e2m1,
+    ::__nv_fp4x4_e2m1,
+#endif
+    void* /* dummy to allow trailing commas */>;
+  cuda::std::array<DLDataType, list_t::__size> expected_types = {
+#if _CCCL_HAS_NVFP16()
+    DLDataType{kDLFloat, 16, 2},
+#endif
+#if _CCCL_HAS_NVBF16()
+    DLDataType{kDLBfloat, 16, 2},
+#endif
+#if _CCCL_HAS_NVFP8_E4M3()
+    DLDataType{kDLFloat8_e4m3fn, 8, 2},
+    DLDataType{kDLFloat8_e4m3fn, 8, 4},
+#endif
+#if _CCCL_HAS_NVFP8_E5M2()
+    DLDataType{kDLFloat8_e5m2, 8, 2},
+    DLDataType{kDLFloat8_e5m2, 8, 4},
+#endif
+#if _CCCL_HAS_NVFP8_E8M0()
+    DLDataType{kDLFloat8_e8m0fnu, 8, 2},
+    DLDataType{kDLFloat8_e8m0fnu, 8, 4},
+#endif
+#if _CCCL_HAS_NVFP6_E2M3()
+    DLDataType{kDLFloat6_e2m3fn, 6, 2},
+    DLDataType{kDLFloat6_e2m3fn, 6, 4},
+#endif
+#if _CCCL_HAS_NVFP6_E3M2()
+    DLDataType{kDLFloat6_e3m2fn, 6, 2},
+    DLDataType{kDLFloat6_e3m2fn, 6, 4},
+#endif
+#if _CCCL_HAS_NVFP4_E2M1()
+    DLDataType{kDLFloat4_e2m1fn, 4, 2},
+    DLDataType{kDLFloat4_e2m1fn, 4, 4},
+#endif
+  };
+  test_mdspan_to_dlpack_types_fn<list_t> test_fn{expected_types};
+  test_fn.call(cuda::std::make_index_sequence<list_t::__size - 1>{});
+  return true;
+}
+
+int main(int, char**)
+{
+  NV_IF_TARGET(NV_IS_HOST, (assert(test_mdspan_to_dlpack_host_layout_right());))
+  NV_IF_TARGET(NV_IS_HOST, (assert(test_mdspan_to_dlpack_host_layout_left());))
+  NV_IF_TARGET(NV_IS_HOST, (assert(test_mdspan_to_dlpack_empty_size());))
+  NV_IF_TARGET(NV_IS_HOST, (assert(test_mdspan_to_dlpack_rank_0());))
+  NV_IF_TARGET(NV_IS_HOST, (assert(test_mdspan_to_dlpack_const_pointer());))
+  NV_IF_TARGET(NV_IS_HOST, (assert(test_mdspan_to_dlpack_device());))
+  NV_IF_TARGET(NV_IS_HOST, (assert(test_mdspan_to_dlpack_managed());))
+  NV_IF_TARGET(NV_IS_HOST, (assert(test_mdspan_to_dlpack_basic_types());))
+  NV_IF_TARGET(NV_IS_HOST, (assert(test_mdspan_to_dlpack_extended_fp_and_complex_types());))
+#if _CCCL_HAS_CTK()
+  NV_IF_TARGET(NV_IS_HOST, (assert(test_mdspan_to_dlpack_vector_types());))
+#endif // _CCCL_HAS_CTK()
+  NV_IF_TARGET(NV_IS_HOST, (assert(test_mdspan_to_dlpack_extended_fp_vector_types());))
+  return 0;
+}
diff --git a/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/mdspan_to_dlpack/mdspan_to_dlpack.wrapper.fail.cpp b/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/mdspan_to_dlpack/mdspan_to_dlpack.wrapper.fail.cpp
new file mode 100644
index 00000000000..562d3757d07
--- /dev/null
+++ b/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/mdspan_to_dlpack/mdspan_to_dlpack.wrapper.fail.cpp
@@ -0,0 +1,22 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the libcu++ Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+#include <cuda/mdspan>
+#include <cuda/std/cassert>
+
+bool test_mdspan_to_dlpack_wrapper_get_lvalue()
+{
+  auto tensor = cuda::__dlpack_tensor<3>{}.get();
+  return true;
+}
+
+int main(int, char**)
+{
+  assert(test_mdspan_to_dlpack_wrapper_get_lvalue());
+  return 0;
+}
diff --git a/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/mdspan_to_dlpack/mdspan_to_dlpack.wrapper.pass.cpp b/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/mdspan_to_dlpack/mdspan_to_dlpack.wrapper.pass.cpp
new file mode 100644
index 00000000000..ee7016f9556
--- /dev/null
+++ b/libcudacxx/test/libcudacxx/cuda/containers/views/mdspan/mdspan_to_dlpack/mdspan_to_dlpack.wrapper.pass.cpp
@@ -0,0 +1,214 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the libcu++ Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+// UNSUPPORTED: nvrtc
+
+#include <cuda/mdspan>
+#include <cuda/std/cassert>
+#include <cuda/std/complex>
+#include <cuda/std/cstdint>
+#include <cuda/std/type_traits>
+#include <cuda/std/utility>
+#include <cuda/utility>
+
+#include "test_macros.h"
+
+void check_datatype(const DLDataType& dt, uint8_t code, uint8_t bits, uint16_t lanes)
+{
+  assert(dt.code == code);
+  assert(dt.bits == bits);
+  assert(dt.lanes == lanes);
+}
+
+bool test_mdspan_to_dlpack_wrapper_default_ctor()
+{
+  cuda::__dlpack_tensor<3> dlpack_wrapper{};
+  DLDataType default_dtype = {};
+  DLDevice default_device  = {};
+  auto& tensor             = dlpack_wrapper.get();
+  assert(tensor.device.device_type == default_device.device_type);
+  assert(tensor.device.device_id == default_device.device_id);
+  check_datatype(tensor.dtype, default_dtype.code, default_dtype.bits, default_dtype.lanes);
+  assert(tensor.shape != nullptr);
+  assert(tensor.strides != nullptr);
+  return true;
+}
+
+bool test_dlpack_wrapper_copy_ctor()
+{
+  using extents_t = cuda::std::extents<size_t, 2, 3>;
+  int data[6]     = {0, 1, 2, 3, 4, 5};
+  cuda::host_mdspan<int, extents_t> md{data, extents_t{}};
+  auto w            = cuda::to_dlpack_tensor(md);
+  auto& t           = w.get();
+  auto* shape_ptr   = t.shape;
+  auto* strides_ptr = t.strides;
+
+  auto w2 = w; // copy construct
+  // Copy must not alias the source wrapper's shape/stride storage.
+  auto& t2 = w2.get();
+  assert(t2.shape != nullptr);
+  assert(t2.strides != nullptr);
+  assert(t2.shape != shape_ptr);
+  assert(t2.strides != strides_ptr);
+
+  // Source wrapper must remain intact.
+  assert(t.shape == shape_ptr);
+  assert(t.strides == strides_ptr);
+
+  // Sanity-check copied tensor metadata and values.
+  assert(t2.device.device_type == kDLCPU);
+  assert(t2.device.device_id == 0);
+  assert(t2.ndim == 2);
+  check_datatype(t2.dtype, kDLInt, 32, 1);
+  assert(t2.shape[0] == 2);
+  assert(t2.shape[1] == 3);
+  assert(t2.strides[0] == 3);
+  assert(t2.strides[1] == 1);
+  assert(t2.byte_offset == 0);
+  assert(t2.data == data);
+  return true;
+}
+
+bool test_dlpack_wrapper_move_ctor()
+{
+  using extents_t = cuda::std::extents<size_t, 2, 3>;
+  int data[6]     = {0, 1, 2, 3, 4, 5};
+  cuda::host_mdspan<int, extents_t> md{data, extents_t{}};
+  auto w            = cuda::to_dlpack_tensor(md);
+  auto& t           = w.get();
+  auto* shape_ptr   = t.shape;
+  auto* strides_ptr = t.strides;
+  auto moved        = cuda::std::move(w); // move construct
+
+  // Moved-to wrapper must not keep pointers to moved-from storage.
+  auto& tm = moved.get();
+  assert(tm.shape != nullptr);
+  assert(tm.strides != nullptr);
+  assert(tm.shape != shape_ptr);
+  assert(tm.strides != strides_ptr);
+
+  // Moved-from wrapper is explicitly reset to a default/empty DLTensor.
+  assert(t.shape == nullptr);
+  assert(t.strides == nullptr);
+  assert(t.data == nullptr);
+  assert(t.ndim == 0);
+
+  // Sanity-check moved-to tensor metadata and values.
+  assert(tm.device.device_type == kDLCPU);
+  assert(tm.device.device_id == 0);
+  assert(tm.ndim == 2);
+  check_datatype(tm.dtype, kDLInt, 32, 1);
+  assert(tm.shape[0] == 2);
+  assert(tm.shape[1] == 3);
+  assert(tm.strides[0] == 3);
+  assert(tm.strides[1] == 1);
+  assert(tm.byte_offset == 0);
+  assert(tm.data == data);
+  return true;
+}
+
+bool test_dlpack_wrapper_copy_assignment()
+{
+  using extents_t = cuda::std::extents<size_t, 2, 3>;
+  int data_a[6]   = {0, 1, 2, 3, 4, 5};
+  int data_b[6]   = {6, 7, 8, 9, 10, 11};
+  cuda::host_mdspan<int, extents_t> md_a{data_a, extents_t{}};
+  cuda::host_mdspan<int, extents_t> md_b{data_b, extents_t{}};
+  auto a              = cuda::to_dlpack_tensor(md_a);
+  auto b              = cuda::to_dlpack_tensor(md_b);
+  auto& ta            = a.get();
+  auto& tb            = b.get();
+  auto* b_shape_ptr   = tb.shape;
+  auto* b_strides_ptr = tb.strides;
+
+  b = a; // copy assign
+  // Destination must keep pointing to its own member arrays (not to `a`).
+  assert(tb.shape == b_shape_ptr);
+  assert(tb.strides == b_strides_ptr);
+  assert(tb.shape != ta.shape);
+  assert(tb.strides != ta.strides);
+
+  // Values must be copied correctly.
+  assert(tb.data == data_a);
+  assert(tb.ndim == 2);
+  assert(tb.shape[0] == 2);
+  assert(tb.shape[1] == 3);
+  assert(tb.strides[0] == 3);
+  assert(tb.strides[1] == 1);
+  return true;
+}
+
+bool test_dlpack_wrapper_move_assignment()
+{
+  using extents_t = cuda::std::extents<size_t, 2, 3>;
+  int data_a[6]   = {0, 1, 2, 3, 4, 5};
+  int data_b[6]   = {6, 7, 8, 9, 10, 11};
+  cuda::host_mdspan<int, extents_t> md_a{data_a, extents_t{}};
+  cuda::host_mdspan<int, extents_t> md_b{data_b, extents_t{}};
+  auto a              = cuda::to_dlpack_tensor(md_a);
+  auto b              = cuda::to_dlpack_tensor(md_b);
+  auto& ta            = a.get();
+  auto& tb            = b.get();
+  auto* a_shape_ptr   = ta.shape;
+  auto* a_strides_ptr = ta.strides;
+  auto* b_shape_ptr   = tb.shape;
+  auto* b_strides_ptr = tb.strides;
+
+  b = cuda::std::move(a); // move assign
+  // Destination must keep pointing to its own member arrays, not the source's.
+  assert(tb.shape == b_shape_ptr);
+  assert(tb.strides == b_strides_ptr);
+  assert(tb.shape != a_shape_ptr);
+  assert(tb.strides != a_strides_ptr);
+
+  // Source must be reset.
+  assert(ta.shape == nullptr);
+  assert(ta.strides == nullptr);
+  assert(ta.data == nullptr);
+  assert(ta.ndim == 0);
+
+  // Values must be moved correctly.
+  assert(tb.data == data_a);
+  assert(tb.ndim == 2);
+  assert(tb.shape[0] == 2);
+  assert(tb.shape[1] == 3);
+  assert(tb.strides[0] == 3);
+  assert(tb.strides[1] == 1);
+  return true;
+}
+
+bool test_dlpack_wrapper_get()
+{
+  using wrapper_t = cuda::__dlpack_tensor<2>;
+  static_assert(cuda::std::is_same_v<decltype(cuda::std::declval<wrapper_t&>().get()), ::DLTensor&>);
+  static_assert(cuda::std::is_same_v<decltype(cuda::std::declval<const wrapper_t&>().get()), const ::DLTensor&>);
+
+  wrapper_t w{};
+  // Mutating through the reference returned by `get()` must be observable.
+  auto& t = w.get();
+  t.ndim  = 123;
+  assert(w.get().ndim == 123);
+
+  // Const overload should also alias the same underlying object.
+  const wrapper_t& cw = w;
+  assert(&cw.get() == &w.get());
+  return true;
+}
+
+int main(int, char**)
+{
+  NV_IF_TARGET(
+    NV_IS_HOST,
+    (assert(test_mdspan_to_dlpack_wrapper_default_ctor()); assert(test_dlpack_wrapper_copy_ctor());
+     assert(test_dlpack_wrapper_move_ctor());
+     assert(test_dlpack_wrapper_copy_assignment());
+     assert(test_dlpack_wrapper_move_assignment());
+     assert(test_dlpack_wrapper_get());))
+  return 0;
+}
diff --git a/libcudacxx/test/libcudacxx/libcxx/type_traits/vector_type.compile.pass.cpp b/libcudacxx/test/libcudacxx/libcxx/type_traits/vector_type.compile.pass.cpp
index 406b6157cbf..1561857be02 100644
--- a/libcudacxx/test/libcudacxx/libcxx/type_traits/vector_type.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/libcxx/type_traits/vector_type.compile.pass.cpp
@@ -29,31 +29,61 @@ __host__ __device__ void test()
   test<signed char, 3, char3>();
   test<signed char, 4, char4>();
 
+  static_assert(cuda::__is_vector_type_v<char1>);
+  static_assert(cuda::__is_vector_type_v<char2>);
+  static_assert(cuda::__is_vector_type_v<char3>);
+  static_assert(cuda::__is_vector_type_v<char4>);
+
   test<unsigned char, 1, uchar1>();
   test<unsigned char, 2, uchar2>();
   test<unsigned char, 3, uchar3>();
   test<unsigned char, 4, uchar4>();
 
+  static_assert(cuda::__is_vector_type_v<uchar1>);
+  static_assert(cuda::__is_vector_type_v<uchar2>);
+  static_assert(cuda::__is_vector_type_v<uchar3>);
+  static_assert(cuda::__is_vector_type_v<uchar4>);
+
   test<signed short, 1, short1>();
   test<signed short, 2, short2>();
   test<signed short, 3, short3>();
   test<signed short, 4, short4>();
 
+  static_assert(cuda::__is_vector_type_v<short1>);
+  static_assert(cuda::__is_vector_type_v<short2>);
+  static_assert(cuda::__is_vector_type_v<short3>);
+  static_assert(cuda::__is_vector_type_v<short4>);
+
   test<unsigned short, 1, ushort1>();
   test<unsigned short, 2, ushort2>();
   test<unsigned short, 3, ushort3>();
   test<unsigned short, 4, ushort4>();
 
+  static_assert(cuda::__is_vector_type_v<ushort1>);
+  static_assert(cuda::__is_vector_type_v<ushort2>);
+  static_assert(cuda::__is_vector_type_v<ushort3>);
+  static_assert(cuda::__is_vector_type_v<ushort4>);
+
   test<signed int, 1, int1>();
   test<signed int, 2, int2>();
   test<signed int, 3, int3>();
   test<signed int, 4, int4>();
 
+  static_assert(cuda::__is_vector_type_v<int1>);
+  static_assert(cuda::__is_vector_type_v<int2>);
+  static_assert(cuda::__is_vector_type_v<int3>);
+  static_assert(cuda::__is_vector_type_v<int4>);
+
   test<unsigned int, 1, uint1>();
   test<unsigned int, 2, uint2>();
   test<unsigned int, 3, uint3>();
   test<unsigned int, 4, uint4>();
 
+  static_assert(cuda::__is_vector_type_v<uint1>);
+  static_assert(cuda::__is_vector_type_v<uint2>);
+  static_assert(cuda::__is_vector_type_v<uint3>);
+  static_assert(cuda::__is_vector_type_v<uint4>);
+
   test<signed long, 1, long1>();
   test<signed long, 2, long2>();
   test<signed long, 3, long3>();
@@ -63,6 +93,16 @@ __host__ __device__ void test()
   test<signed long, 4, long4>();
 #endif // ^^^ _CCCL_CTK_BELOW(13, 0) ^^^
 
+  static_assert(cuda::__is_vector_type_v<long1>);
+  static_assert(cuda::__is_vector_type_v<long2>);
+  static_assert(cuda::__is_vector_type_v<long3>);
+#if _CCCL_CTK_AT_LEAST(13, 0)
+  static_assert(cuda::__is_vector_type_v<long4_16a>);
+  static_assert(cuda::__is_vector_type_v<long4_32a>);
+#else // ^^^ _CCCL_CTK_AT_LEAST(13, 0) ^^^ / vvv _CCCL_CTK_BELOW(13, 0) vvv
+  static_assert(cuda::__is_vector_type_v<long4>);
+#endif // _CCCL_CTK_AT_LEAST(13, 0)
+
   test<unsigned long, 1, ulong1>();
   test<unsigned long, 2, ulong2>();
   test<unsigned long, 3, ulong3>();
@@ -72,6 +112,16 @@ __host__ __device__ void test()
   test<unsigned long, 4, ulong4>();
 #endif // ^^^ _CCCL_CTK_BELOW(13, 0) ^^^
 
+  static_assert(cuda::__is_vector_type_v<ulong1>);
+  static_assert(cuda::__is_vector_type_v<ulong2>);
+  static_assert(cuda::__is_vector_type_v<ulong3>);
+#if _CCCL_CTK_AT_LEAST(13, 0)
+  static_assert(cuda::__is_vector_type_v<ulong4_16a>);
+  static_assert(cuda::__is_vector_type_v<ulong4_32a>);
+#else // ^^^ _CCCL_CTK_AT_LEAST(13, 0) ^^^ / vvv _CCCL_CTK_BELOW(13, 0) vvv
+  static_assert(cuda::__is_vector_type_v<ulong4>);
+#endif // _CCCL_CTK_AT_LEAST(13, 0)
+
   test<signed long long, 1, longlong1>();
   test<signed long long, 2, longlong2>();
   test<signed long long, 3, longlong3>();
@@ -81,6 +131,16 @@ __host__ __device__ void test()
   test<signed long long, 4, longlong4>();
 #endif // ^^^ _CCCL_CTK_BELOW(13, 0) ^^^
 
+  static_assert(cuda::__is_vector_type_v<longlong1>);
+  static_assert(cuda::__is_vector_type_v<longlong2>);
+  static_assert(cuda::__is_vector_type_v<longlong3>);
+#if _CCCL_CTK_AT_LEAST(13, 0)
+  static_assert(cuda::__is_vector_type_v<longlong4_16a>);
+  static_assert(cuda::__is_vector_type_v<longlong4_32a>);
+#else // ^^^ _CCCL_CTK_AT_LEAST(13, 0) ^^^ / vvv _CCCL_CTK_BELOW(13, 0) vvv
+  static_assert(cuda::__is_vector_type_v<longlong4>);
+#endif // _CCCL_CTK_AT_LEAST(13, 0)
+
   test<unsigned long long, 1, ulonglong1>();
   test<unsigned long long, 2, ulonglong2>();
   test<unsigned long long, 3, ulonglong3>();
@@ -90,11 +150,26 @@ __host__ __device__ void test()
   test<unsigned long long, 4, ulonglong4>();
 #endif // ^^^ _CCCL_CTK_BELOW(13, 0) ^^^
 
+  static_assert(cuda::__is_vector_type_v<ulonglong1>);
+  static_assert(cuda::__is_vector_type_v<ulonglong2>);
+  static_assert(cuda::__is_vector_type_v<ulonglong3>);
+#if _CCCL_CTK_AT_LEAST(13, 0)
+  static_assert(cuda::__is_vector_type_v<ulonglong4_16a>);
+  static_assert(cuda::__is_vector_type_v<ulonglong4_32a>);
+#else // ^^^ _CCCL_CTK_AT_LEAST(13, 0) ^^^ / vvv _CCCL_CTK_BELOW(13, 0) vvv
+  static_assert(cuda::__is_vector_type_v<ulonglong4>);
+#endif // _CCCL_CTK_AT_LEAST(13, 0)
+
   test<float, 1, float1>();
   test<float, 2, float2>();
   test<float, 3, float3>();
   test<float, 4, float4>();
 
+  static_assert(cuda::__is_vector_type_v<float1>);
+  static_assert(cuda::__is_vector_type_v<float2>);
+  static_assert(cuda::__is_vector_type_v<float3>);
+  static_assert(cuda::__is_vector_type_v<float4>);
+
   test<double, 1, double1>();
   test<double, 2, double2>();
   test<double, 3, double3>();
@@ -104,11 +179,26 @@ __host__ __device__ void test()
   test<double, 4, double4>();
 #endif // ^^^ _CCCL_CTK_BELOW(13, 0) ^^^
 
+  static_assert(cuda::__is_vector_type_v<double1>);
+  static_assert(cuda::__is_vector_type_v<double2>);
+  static_assert(cuda::__is_vector_type_v<double3>);
+#if _CCCL_CTK_AT_LEAST(13, 0)
+  static_assert(cuda::__is_vector_type_v<double4_16a>);
+  static_assert(cuda::__is_vector_type_v<double4_32a>);
+#else // ^^^ _CCCL_CTK_AT_LEAST(13, 0) ^^^ / vvv _CCCL_CTK_BELOW(13, 0) vvv
+  static_assert(cuda::__is_vector_type_v<double4>);
+#endif // _CCCL_CTK_AT_LEAST(13, 0)
+
+  static_assert(cuda::__is_vector_type_v<dim3>);
+
   // 2. Test invalid combinations
 
   test<float, 0, void>();
   test<char, 1, void>();
   test<long, 5, void>();
+
+  static_assert(!cuda::__is_vector_type_v<int>);
+  static_assert(!cuda::__is_vector_type_v<void>);
 }
 
 int main(int, char**)