Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add dimensions description functionality to CUDA Experimental library #1743

Merged
merged 17 commits into from
Jun 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 12 additions & 12 deletions ci/matrix.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
ctk_11_1: &ctk_11_1 '11.1'

Check notice on line 1 in ci/matrix.yaml

View workflow job for this annotation

GitHub Actions / Build workflow from matrix

Removing standalone job '[C++17 clang14] Build(amd64)' as it appears as a producer in 'cudax nvcc clang CTK12.0'

Check notice on line 1 in ci/matrix.yaml

View workflow job for this annotation

GitHub Actions / Build workflow from matrix

Removing standalone job '[C++20 clang14] Build(amd64)' as it appears as a producer in 'cudax nvcc clang CTK12.0'
ctk_11_8: &ctk_11_8 '11.8'
ctk_12_0: &ctk_12_0 '12.0'
ctk_curr: &ctk_curr '12.4'
Expand Down Expand Up @@ -71,18 +71,18 @@
# verify-codegen:
- {jobs: ['verify_codegen'], project: 'libcudacxx'}
# cudax has different CTK reqs:
# - {jobs: ['build'], project: 'cudax', ctk: [*ctk_12_0, *ctk_curr], std: 'all', cxx: [*gcc9, *gcc10, *gcc11]}
# - {jobs: ['build'], project: 'cudax', ctk: [*ctk_12_0, *ctk_curr], std: 'all', cxx: [*llvm9, *llvm10, *llvm11, *llvm12, *llvm13, *llvm14]}
# - {jobs: ['build'], project: 'cudax', ctk: [ *ctk_curr], std: 'all', cxx: [*llvm15]}
# - {jobs: ['build'], project: 'cudax', ctk: [*ctk_12_0, ], std: 'all', cxx: [*msvc2022_1436]}
# - {jobs: ['build'], project: 'cudax', ctk: [ *ctk_curr], std: 'all', cxx: [*msvc2022]}
# - {jobs: ['build'], project: 'cudax', ctk: [*ctk_12_0 ], std: 17, cxx: [*gcc12], sm: "90"}
# - {jobs: ['build'], project: 'cudax', ctk: [ *ctk_curr], std: 17, cxx: [*gcc12], sm: "90a"}
# - {jobs: ['build'], project: 'cudax', ctk: [ *ctk_curr], std: 'all', cxx: [*gcc12, *llvm16], cpu: 'arm64'}
# - {jobs: ['build'], project: 'cudax', ctk: [ *ctk_curr], std: 17, cxx: [*oneapi]}
# - {jobs: ['test'], project: 'cudax', ctk: [*ctk_12_0, *ctk_curr], std: 'all', cxx: [*gcc12]}
# - {jobs: ['test'], project: 'cudax', ctk: [*ctk_12_0 ], std: 'all', cxx: [*llvm14]}
# - {jobs: ['test'], project: 'cudax', ctk: [ *ctk_curr], std: 'all', cxx: [*llvm16]}
- {jobs: ['build'], project: 'cudax', ctk: [*ctk_12_0, *ctk_curr], std: 'all', cxx: [*gcc9, *gcc10, *gcc11]}
- {jobs: ['build'], project: 'cudax', ctk: [*ctk_12_0, *ctk_curr], std: 'all', cxx: [*llvm9, *llvm10, *llvm11, *llvm12, *llvm13, *llvm14]}
- {jobs: ['build'], project: 'cudax', ctk: [ *ctk_curr], std: 'all', cxx: [*llvm15]}
Comment on lines +74 to +76
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is that still valid? I would guess that std: 'all' will also test C++11 / C++14

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree this seems wrong, but then why are there only c++17/20 jobs running for this PR?

- {jobs: ['build'], project: 'cudax', ctk: [*ctk_12_0, ], std: 20, cxx: [*msvc2022_1436]}
- {jobs: ['build'], project: 'cudax', ctk: [ *ctk_curr], std: 20, cxx: [*msvc2022]}
- {jobs: ['build'], project: 'cudax', ctk: [*ctk_12_0 ], std: 17, cxx: [*gcc12], sm: "90"}
- {jobs: ['build'], project: 'cudax', ctk: [ *ctk_curr], std: 17, cxx: [*gcc12], sm: "90a"}
- {jobs: ['build'], project: 'cudax', ctk: [ *ctk_curr], std: 'all', cxx: [*gcc12, *llvm16], cpu: 'arm64'}
- {jobs: ['build'], project: 'cudax', ctk: [ *ctk_curr], std: 17, cxx: [*oneapi]}
- {jobs: ['test'], project: 'cudax', ctk: [*ctk_12_0, *ctk_curr], std: 'all', cxx: [*gcc12]}
- {jobs: ['test'], project: 'cudax', ctk: [*ctk_12_0 ], std: 'all', cxx: [*llvm14]}
- {jobs: ['test'], project: 'cudax', ctk: [ *ctk_curr], std: 'all', cxx: [*llvm16]}
# cccl-infra:
- {jobs: ['infra'], project: 'cccl', ctk: *ctk_11_1, cxx: [*gcc-oldest, *llvm-oldest]}
- {jobs: ['infra'], project: 'cccl', ctk: *ctk_curr, cxx: [*gcc-newest, *llvm-newest]}
Expand Down
145 changes: 145 additions & 0 deletions cudax/include/cuda/experimental/__hierarchy/dimensions.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
//===----------------------------------------------------------------------===//
//
// Part of CUDA Experimental in CUDA C++ Core Libraries,
// under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
//
//===----------------------------------------------------------------------===//

#ifndef _CUDAX__HIERARCHY_DIMENSIONS
#define _CUDAX__HIERARCHY_DIMENSIONS

#include <cuda/std/mdspan>

#if _CCCL_STD_VER >= 2017
namespace cuda::experimental
{

template <typename T, size_t... Extents>
using dimensions = ::cuda::std::extents<T, Extents...>;

// not unsigned because of a bug in ::cuda::std::extents
using dimensions_index_type = int;

/**
* @brief Type representing a result of a multi-dimensional hierarchy query.
*
* Returned from extents and index queries.
*
* @par Snippet
* @code
* #include <cudax/hierarchy_dimensions.cuh>
*
* template <typename Dimensions>
* __global__ void kernel(Dimensions dims)
* {
* auto ext = dims.extents();
*
* // Can be accessed like cuda::std::extents or like dim3
* assert(ext.extent(0) == expected);
* assert(ext.x == expected);
*
* // Can be converted to dim3
* dim3 dimensions = ext;
* }
* @endcode
* @par
*
* @tparam T
* Type of the result for each dimension
*
* @tparam Extents
* Extents of the result
*/
template <typename T, size_t... Extents>
struct hierarchy_query_result : public dimensions<T, Extents...>
{
using Dims = dimensions<T, Extents...>;
using Dims::Dims;
_CCCL_HOST_DEVICE explicit constexpr hierarchy_query_result(const Dims& dims)
: Dims(dims)
{}
static_assert(Dims::rank() > 0 && Dims::rank() <= 3);

const T x = Dims::extent(0);
const T y = Dims::rank() > 1 ? Dims::extent(1) : 1;
const T z = Dims::rank() > 2 ? Dims::extent(2) : 1;

_CCCL_HOST_DEVICE constexpr operator dim3() const
{
return dim3(static_cast<uint32_t>(x), static_cast<uint32_t>(y), static_cast<uint32_t>(z));
}
};

namespace detail
{
template <typename OpType>
_CCCL_NODISCARD _CCCL_HOST_DEVICE constexpr size_t merge_extents(size_t e1, size_t e2)
{
if (e1 == ::cuda::std::dynamic_extent || e2 == ::cuda::std::dynamic_extent)
{
return ::cuda::std::dynamic_extent;
}
else
{
OpType op;
return op(e1, e2);
}
}

template <typename DstType, typename OpType, typename T1, size_t... Extents1, typename T2, size_t... Extents2>
_CCCL_NODISCARD _CCCL_HOST_DEVICE constexpr auto
dims_op(const OpType& op, const dimensions<T1, Extents1...>& h1, const dimensions<T2, Extents2...>& h2) noexcept
{
// For now target only 3 dim extents
static_assert(sizeof...(Extents1) == sizeof...(Extents2));
static_assert(sizeof...(Extents1) == 3);

return dimensions<DstType, merge_extents<OpType>(Extents1, Extents2)...>(
op(static_cast<DstType>(h1.extent(0)), h2.extent(0)),
op(static_cast<DstType>(h1.extent(1)), h2.extent(1)),
op(static_cast<DstType>(h1.extent(2)), h2.extent(2)));
}

template <typename DstType, typename T1, size_t... Extents1, typename T2, size_t... Extents2>
_CCCL_NODISCARD _CCCL_HOST_DEVICE constexpr auto
dims_product(const dimensions<T1, Extents1...>& h1, const dimensions<T2, Extents2...>& h2) noexcept
{
return dims_op<DstType>(::cuda::std::multiplies(), h1, h2);
}

template <typename DstType, typename T1, size_t... Extents1, typename T2, size_t... Extents2>
_CCCL_NODISCARD _CCCL_HOST_DEVICE constexpr auto
dims_sum(const dimensions<T1, Extents1...>& h1, const dimensions<T2, Extents2...>& h2) noexcept
{
return dims_op<DstType>(::cuda::std::plus(), h1, h2);
}

template <typename T, size_t... Extents>
_CCCL_NODISCARD _CCCL_HOST_DEVICE constexpr auto convert_to_query_result(const dimensions<T, Extents...>& result)
{
return hierarchy_query_result<T, Extents...>(result);
}

_CCCL_NODISCARD _CCCL_HOST_DEVICE constexpr auto dim3_to_dims(const dim3& dims)
{
return dimensions<dimensions_index_type,
::cuda::std::dynamic_extent,
::cuda::std::dynamic_extent,
::cuda::std::dynamic_extent>(dims.x, dims.y, dims.z);
}

template <typename TyTrunc, typename Index, typename Dims>
_CCCL_NODISCARD _CCCL_HOST_DEVICE constexpr auto index_to_linear(const Index& index, const Dims& dims)
{
static_assert(Dims::rank() == 3);

return (static_cast<TyTrunc>(index.extent(2)) * dims.extent(1) + index.extent(1)) * dims.extent(0) + index.extent(0);
}

} // namespace detail
} // namespace cuda::experimental
#endif // _CCCL_STD_VER >= 2017
#endif // _CUDAX__HIERARCHY_DIMENSIONS
Loading
Loading