Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ensure serial algorithms can be used inside parallel algorithms #174

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions include/ddc/ddc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
#include "ddc/uniform_point_sampling.hpp"

// Algorithms
#include "ddc/deepcopy.hpp"
#include "ddc/fill.hpp"
#include "ddc/for_each.hpp"
#include "ddc/mirror.hpp"
#include "ddc/parallel_deepcopy.hpp"
Expand Down
50 changes: 50 additions & 0 deletions include/ddc/deepcopy.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// Copyright (C) The DDC development team, see COPYRIGHT.md file
//
// SPDX-License-Identifier: MIT

#pragma once

#include <type_traits>

#include <Kokkos_Core.hpp>

#include "ddc/chunk_common.hpp"
#include "ddc/chunk_span.hpp"

namespace ddc {

/** Copy the content of a chunk span into another
* @param[out] dst the chunk span in which to copy
* @param[in] src the chunk span from which to copy
* @return dst as a ChunkSpan
*/
template <
class ElementTypeDst,
class SupportDst,
class LayoutDst,
class MemorySpaceDst,
class ElementTypeSrc,
class SupportSrc,
class LayoutSrc,
class MemorySpaceSrc>
KOKKOS_FUNCTION auto deepcopy(
ChunkSpan<ElementTypeDst, SupportDst, LayoutDst, MemorySpaceDst> const& dst,
ChunkSpan<ElementTypeSrc, SupportSrc, LayoutSrc, MemorySpaceSrc> const& src) noexcept
{
using ChunkDst = ChunkSpan<ElementTypeDst, SupportDst, LayoutDst, MemorySpaceDst>;
using ChunkSrc = ChunkSpan<ElementTypeSrc, SupportSrc, LayoutSrc, MemorySpaceSrc>;
static_assert(
std::is_assignable_v<chunk_reference_t<ChunkDst>, chunk_reference_t<ChunkSrc>>,
"Not assignable");
KOKKOS_ASSERT(dst.domain().extents() == src.domain().extents());
KOKKOS_ASSERT(
(Kokkos::SpaceAccessibility<DDC_CURRENT_KOKKOS_SPACE, MemorySpaceSrc>::accessible));
KOKKOS_ASSERT(
(Kokkos::SpaceAccessibility<DDC_CURRENT_KOKKOS_SPACE, MemorySpaceDst>::accessible));
for_each(dst.domain(), [&dst, &src](typename SupportDst::discrete_element_type const& elem) {
dst(elem) = src(elem);
});
return dst;
}

} // namespace ddc
39 changes: 39 additions & 0 deletions include/ddc/fill.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// Copyright (C) The DDC development team, see COPYRIGHT.md file
//
// SPDX-License-Identifier: MIT

#pragma once

#include <type_traits>

#include <Kokkos_Core.hpp>

#include "ddc/chunk_span.hpp"
#include "ddc/detail/macros.hpp"
#include "ddc/for_each.hpp"

namespace ddc {

/** Fill a chunk span with a given value
* @param[out] dst the chunk span in which to copy
* @param[in] value the value to fill `dst`
* @return dst as a ChunkSpan
*/
template <class ElementType, class Support, class Layout, class MemorySpace, class T>
KOKKOS_FUNCTION auto fill(
ChunkSpan<ElementType, Support, Layout, MemorySpace> const& dst,
T const& value) noexcept
{
static_assert(
std::is_assignable_v<
chunk_reference_t<ChunkSpan<ElementType, Support, Layout, MemorySpace>>,
T>,
"Not assignable");
KOKKOS_ASSERT((Kokkos::SpaceAccessibility<DDC_CURRENT_KOKKOS_SPACE, MemorySpace>::accessible));
for_each(dst.domain(), [&dst, &value](typename Support::discrete_element_type const& elem) {
dst(elem) = value;
});
return dst;
}

} // namespace ddc
37 changes: 35 additions & 2 deletions include/ddc/for_each.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ namespace ddc {
namespace detail {

template <class RetType, class Element, std::size_t N, class Functor, class... Is>
void for_each_serial(
KOKKOS_FUNCTION void for_each_serial(
tpadioleau marked this conversation as resolved.
Show resolved Hide resolved
std::array<Element, N> const& begin,
std::array<Element, N> const& end,
Functor const& f,
Expand All @@ -38,14 +38,47 @@ void for_each_serial(
}
}

template <class RetType, class Element, std::size_t N, class Functor, class... Is>
void host_for_each_serial(
std::array<Element, N> const& begin,
std::array<Element, N> const& end,
Functor const& f,
Is const&... is) noexcept
{
static constexpr std::size_t I = sizeof...(Is);
if constexpr (I == N) {
f(RetType(is...));
} else {
for (Element ii = begin[I]; ii < end[I]; ++ii) {
host_for_each_serial<RetType>(begin, end, f, is..., ii);
}
}
}

} // namespace detail

/** iterates over a nD domain in serial
* This version must only be called from a host-device function.
tpadioleau marked this conversation as resolved.
Show resolved Hide resolved
* @param[in] domain the domain over which to iterate
* @param[in] f a functor taking an index as parameter
*/
template <class... DDims, class Functor>
KOKKOS_FUNCTION void for_each(DiscreteDomain<DDims...> const& domain, Functor&& f) noexcept
{
DiscreteElement<DDims...> const ddc_begin = domain.front();
DiscreteElement<DDims...> const ddc_end = domain.front() + domain.extents();
std::array const begin = detail::array(ddc_begin);
std::array const end = detail::array(ddc_end);
detail::for_each_serial<DiscreteElement<DDims...>>(begin, end, std::forward<Functor>(f));
}

/** iterates over a nD domain in serial.
* This version must only be called from the host.
* @param[in] domain the domain over which to iterate
* @param[in] f a functor taking an index as parameter
*/
template <class... DDims, class Functor>
void for_each(DiscreteDomain<DDims...> const& domain, Functor&& f) noexcept
void host_for_each(DiscreteDomain<DDims...> const& domain, Functor&& f) noexcept
{
DiscreteElement<DDims...> const ddc_begin = domain.front();
DiscreteElement<DDims...> const ddc_end = domain.front() + domain.extents();
Expand Down
72 changes: 70 additions & 2 deletions include/ddc/transform_reduce.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ template <
class BinaryReductionOp,
class UnaryTransformOp,
class... DCoords>
T transform_reduce_serial(
KOKKOS_FUNCTION T transform_reduce_serial(
DiscreteDomain<DDims...> const& domain,
[[maybe_unused]] T const neutral,
BinaryReductionOp const& reduce,
Expand All @@ -58,9 +58,54 @@ T transform_reduce_serial(
DDC_IF_NVCC_THEN_POP
}

/** A serial reduction over a nD domain
* @param[in] domain the range over which to apply the algorithm
* @param[in] neutral the neutral element of the reduction operation
* @param[in] reduce a binary FunctionObject that will be applied in unspecified order to the
* results of transform, the results of other reduce and neutral.
* @param[in] transform a unary FunctionObject that will be applied to each element of the input
* range. The return type must be acceptable as input to reduce
* @param[in] dcoords discrete elements from dimensions already in a loop
*/
template <
class... DDims,
class T,
class BinaryReductionOp,
class UnaryTransformOp,
class... DCoords>
KOKKOS_FUNCTION T host_transform_reduce_serial(
DiscreteDomain<DDims...> const& domain,
[[maybe_unused]] T const neutral,
BinaryReductionOp const& reduce,
UnaryTransformOp const& transform,
DCoords const&... dcoords) noexcept
{
DDC_IF_NVCC_THEN_PUSH_AND_SUPPRESS(implicit_return_from_non_void_function)
if constexpr (sizeof...(DCoords) == sizeof...(DDims)) {
return transform(DiscreteElement<DDims...>(dcoords...));
} else {
using CurrentDDim = type_seq_element_t<sizeof...(DCoords), detail::TypeSeq<DDims...>>;
T result = neutral;
for (DiscreteElement<CurrentDDim> const ii : select<CurrentDDim>(domain)) {
result = reduce(
result,
host_transform_reduce_serial(
domain,
neutral,
reduce,
transform,
dcoords...,
ii));
}
return result;
}
DDC_IF_NVCC_THEN_POP
}

} // namespace detail

/** A reduction over a nD domain in serial
* This version must only be called from a host-device function.
tpadioleau marked this conversation as resolved.
Show resolved Hide resolved
* @param[in] domain the range over which to apply the algorithm
* @param[in] neutral the neutral element of the reduction operation
* @param[in] reduce a binary FunctionObject that will be applied in unspecified order to the
Expand All @@ -69,7 +114,7 @@ T transform_reduce_serial(
* range. The return type must be acceptable as input to reduce
*/
template <class... DDims, class T, class BinaryReductionOp, class UnaryTransformOp>
T transform_reduce(
KOKKOS_FUNCTION T transform_reduce(
DiscreteDomain<DDims...> const& domain,
T neutral,
BinaryReductionOp&& reduce,
Expand All @@ -82,4 +127,27 @@ T transform_reduce(
std::forward<UnaryTransformOp>(transform));
}

/** A reduction over a nD domain in serial
* This version must only be called from the host.
* @param[in] domain the range over which to apply the algorithm
* @param[in] neutral the neutral element of the reduction operation
* @param[in] reduce a binary FunctionObject that will be applied in unspecified order to the
* results of transform, the results of other reduce and neutral.
* @param[in] transform a unary FunctionObject that will be applied to each element of the input
* range. The return type must be acceptable as input to reduce
*/
template <class... DDims, class T, class BinaryReductionOp, class UnaryTransformOp>
T host_transform_reduce(
DiscreteDomain<DDims...> const& domain,
T neutral,
BinaryReductionOp&& reduce,
UnaryTransformOp&& transform) noexcept
{
return detail::host_transform_reduce_serial(
domain,
neutral,
std::forward<BinaryReductionOp>(reduce),
std::forward<UnaryTransformOp>(transform));
}

} // namespace ddc
1 change: 1 addition & 0 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ add_executable(ddc_tests
parallel_for_each.cpp
parallel_deepcopy.cpp
parallel_transform_reduce.cpp
nested_algorithms.cpp
)
target_compile_features(ddc_tests PUBLIC cxx_std_17)
target_link_libraries(ddc_tests
Expand Down
6 changes: 3 additions & 3 deletions tests/fft.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ static void test_fft()
return x * x;
};

double const criterion = Kokkos::sqrt(ddc::transform_reduce(
double const criterion = Kokkos::sqrt(ddc::host_transform_reduce(
Ff_host.domain(),
0.,
ddc::reducer::sum<double>(),
Expand All @@ -103,7 +103,7 @@ static void test_fft()
return pow2(diff) / denom;
}));

double const criterion2 = Kokkos::sqrt(ddc::transform_reduce(
double const criterion2 = Kokkos::sqrt(ddc::host_transform_reduce(
FFf_host.domain(),
0.,
ddc::reducer::sum<double>(),
Expand Down Expand Up @@ -154,7 +154,7 @@ static void test_fft_norm(ddc::FFT_Normalization const norm)
ddc::ChunkSpan const FFf = FFf_alloc.span_view();
ddc::ifft(exec_space, FFf, Ff_bis, {norm});

double const f_sum = ddc::transform_reduce(f.domain(), 0., ddc::reducer::sum<double>(), f);
double const f_sum = ddc::host_transform_reduce(f.domain(), 0., ddc::reducer::sum<double>(), f);

double Ff0_expected;
double FFf_expected;
Expand Down
8 changes: 4 additions & 4 deletions tests/for_each.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ TEST(ForEachSerialHost, Empty)
DDomX const dom(lbound_x, DVectX(0));
std::vector<int> storage(dom.size(), 0);
ddc::ChunkSpan<int, DDomX> const view(storage.data(), dom);
ddc::for_each(dom, [=](DElemX const ix) { view(ix) += 1; });
ddc::host_for_each(dom, [=](DElemX const ix) { view(ix) += 1; });
EXPECT_EQ(std::count(storage.begin(), storage.end(), 1), dom.size())
<< std::count(storage.begin(), storage.end(), 1) << std::endl;
}
Expand All @@ -57,7 +57,7 @@ TEST(ForEachSerialHost, ZeroDimension)
DDom0D const dom;
int storage = 0;
ddc::ChunkSpan<int, DDom0D> const view(&storage, dom);
ddc::for_each(dom, [=](DElem0D const ii) { view(ii) += 1; });
ddc::host_for_each(dom, [=](DElem0D const ii) { view(ii) += 1; });
EXPECT_EQ(storage, 1) << storage << std::endl;
}

Expand All @@ -66,7 +66,7 @@ TEST(ForEachSerialHost, OneDimension)
DDomX const dom(lbound_x, nelems_x);
std::vector<int> storage(dom.size(), 0);
ddc::ChunkSpan<int, DDomX> const view(storage.data(), dom);
ddc::for_each(dom, [=](DElemX const ix) { view(ix) += 1; });
ddc::host_for_each(dom, [=](DElemX const ix) { view(ix) += 1; });
EXPECT_EQ(std::count(storage.begin(), storage.end(), 1), dom.size());
}

Expand All @@ -75,6 +75,6 @@ TEST(ForEachSerialHost, TwoDimensions)
DDomXY const dom(lbound_x_y, nelems_x_y);
std::vector<int> storage(dom.size(), 0);
ddc::ChunkSpan<int, DDomXY> const view(storage.data(), dom);
ddc::for_each(dom, [=](DElemXY const ixy) { view(ixy) += 1; });
ddc::host_for_each(dom, [=](DElemXY const ixy) { view(ixy) += 1; });
EXPECT_EQ(std::count(storage.begin(), storage.end(), 1), dom.size());
}
Loading