diff --git a/examples/characteristics_advection.cpp b/examples/characteristics_advection.cpp index 9798e5211..f9ce8fe54 100644 --- a/examples/characteristics_advection.cpp +++ b/examples/characteristics_advection.cpp @@ -192,7 +192,10 @@ int main(int argc, char** argv) //! [initial output] // display the initial data - ddc::deepcopy(host_density_alloc, last_density_alloc); + ddc::deepcopy( + Kokkos::DefaultExecutionSpace(), + host_density_alloc.span_view(), + last_density_alloc.span_cview()); display(ddc::coordinate(time_domain.front()), host_density_alloc[x_domain][y_domain]); // time of the iteration where the last output happened @@ -279,7 +282,10 @@ int main(int argc, char** argv) //! [output] if (iter - last_output >= t_output_period) { last_output = iter; - ddc::deepcopy(host_density_alloc, last_density_alloc); + ddc::deepcopy( + Kokkos::DefaultExecutionSpace(), + host_density_alloc.span_view(), + last_density_alloc.span_cview()); display(ddc::coordinate(iter), host_density_alloc[x_domain][y_domain]); } @@ -293,7 +299,10 @@ int main(int argc, char** argv) //! [final output] if (last_output < time_domain.back()) { - ddc::deepcopy(host_density_alloc, last_density_alloc); + ddc::deepcopy( + Kokkos::DefaultExecutionSpace(), + host_density_alloc.span_view(), + last_density_alloc.span_cview()); display(ddc::coordinate(time_domain.back()), host_density_alloc[x_domain][y_domain]); } diff --git a/examples/game_of_life.cpp b/examples/game_of_life.cpp index a733fa940..b5622e002 100644 --- a/examples/game_of_life.cpp +++ b/examples/game_of_life.cpp @@ -92,7 +92,9 @@ int main() std::size_t iter = 0; for (; iter < nt; ++iter) { - ddc::deepcopy(cells_in_host_alloc, cells_in); + ddc::deepcopy( + cells_in_host_alloc.span_view(), + cells_in.span_cview()); print_2DChunk(std::cout, cells_in_host_alloc.span_cview()) << "\n"; ddc::parallel_for_each( @@ -125,7 +127,9 @@ int main() }); ddc::deepcopy(cells_in, cells_out); } - ddc::deepcopy(cells_in_host_alloc, cells_in); + ddc::deepcopy( + cells_in_host_alloc.span_view(), + cells_in.span_cview()); print_2DChunk(std::cout, cells_in_host_alloc.span_cview()) << "\n"; return 0; diff --git a/examples/heat_equation.cpp b/examples/heat_equation.cpp index 2ac2fd20a..819fec35d 100644 --- a/examples/heat_equation.cpp +++ b/examples/heat_equation.cpp @@ -258,7 +258,10 @@ int main(int argc, char** argv) //! [initial output] // display the initial data - ddc::deepcopy(ghosted_temp, ghosted_last_temp); + ddc::deepcopy( + Kokkos::DefaultExecutionSpace(), + ghosted_temp, + ghosted_last_temp); display(ddc::coordinate(time_domain.front()), ghosted_temp[x_domain][y_domain]); // time of the iteration where the last output happened @@ -273,15 +276,19 @@ int main(int argc, char** argv) //! [boundary conditions] // Periodic boundary conditions ddc::deepcopy( + Kokkos::DefaultExecutionSpace(), ghosted_last_temp[x_pre_ghost][y_domain], ghosted_last_temp[y_domain][x_domain_end]); ddc::deepcopy( + Kokkos::DefaultExecutionSpace(), ghosted_last_temp[y_domain][x_post_ghost], ghosted_last_temp[y_domain][x_domain_begin]); ddc::deepcopy( + Kokkos::DefaultExecutionSpace(), ghosted_last_temp[x_domain][y_pre_ghost], ghosted_last_temp[x_domain][y_domain_end]); ddc::deepcopy( + Kokkos::DefaultExecutionSpace(), ghosted_last_temp[x_domain][y_post_ghost], ghosted_last_temp[x_domain][y_domain_begin]); //! [boundary conditions] @@ -331,7 +338,10 @@ int main(int argc, char** argv) //! [output] if (iter - last_output_iter >= t_output_period) { last_output_iter = iter; - ddc::deepcopy(ghosted_temp, ghosted_last_temp); + ddc::deepcopy( + Kokkos::DefaultExecutionSpace(), + ghosted_temp, + ghosted_last_temp); display(ddc::coordinate(iter), ghosted_temp[x_domain][y_domain]); } @@ -345,7 +355,10 @@ int main(int argc, char** argv) //! [final output] if (last_output_iter < time_domain.back()) { - ddc::deepcopy(ghosted_temp, ghosted_last_temp); + ddc::deepcopy( + Kokkos::DefaultExecutionSpace(), + ghosted_temp, + ghosted_last_temp); display(ddc::coordinate(time_domain.back()), ghosted_temp[x_domain][y_domain]); } diff --git a/examples/heat_equation_spectral.cpp b/examples/heat_equation_spectral.cpp index e252b9367..b64de0b65 100644 --- a/examples/heat_equation_spectral.cpp +++ b/examples/heat_equation_spectral.cpp @@ -201,7 +201,10 @@ int main(int argc, char** argv) //! [initial output] // display the initial data - ddc::deepcopy(_host_temp, _last_temp); + ddc::deepcopy( + Kokkos::DefaultExecutionSpace(), + _host_temp, + _last_temp); display(ddc::coordinate(time_domain.front()), _host_temp[x_domain][y_domain]); // time of the iteration where the last output happened @@ -274,7 +277,10 @@ int main(int argc, char** argv) //! [output] if (iter - last_output >= t_output_period) { last_output = iter; - ddc::deepcopy(_host_temp, _last_temp); + ddc::deepcopy( + Kokkos::DefaultExecutionSpace(), + _host_temp, + _last_temp); display(ddc::coordinate(iter), _host_temp[x_domain][y_domain]); } @@ -288,7 +294,10 @@ int main(int argc, char** argv) //! [final output] if (last_output < time_domain.back()) { - ddc::deepcopy(_host_temp, _last_temp); + ddc::deepcopy( + Kokkos::DefaultExecutionSpace(), + _host_temp, + _last_temp); display(ddc::coordinate(time_domain.back()), _host_temp[x_domain][y_domain]); } diff --git a/include/ddc/deepcopy.hpp b/include/ddc/deepcopy.hpp index b05780774..17bdf06bb 100644 --- a/include/ddc/deepcopy.hpp +++ b/include/ddc/deepcopy.hpp @@ -7,16 +7,49 @@ #include #include "ddc/chunk_common.hpp" +#include "ddc/chunk_span.hpp" namespace ddc { +/** Copy the content of a borrowed chunk into another + * @param[out] dst the borrowed chunk in which to copy + * @param[in] src the borrowed chunk from which to copy + * @return dst as a ChunkSpan +*/ +template +auto parallel_deepcopy(ExecutionSpace&& exec_space, ChunkDst&& dst, ChunkSrc&& src) +{ + static_assert(is_borrowed_chunk_v); + static_assert(is_borrowed_chunk_v); + static_assert( + std::is_assignable_v, chunk_reference_t>, + "Not assignable"); + assert(dst.domain().extents() == src.domain().extents()); + Kokkos::deep_copy( + std::forward(exec_space), + dst.allocation_kokkos_view(), + src.allocation_kokkos_view()); + return dst.span_view(); +} + /** Copy the content of a borrowed chunk into another * @param[out] dst the borrowed chunk in which to copy * @param[in] src the borrowed chunk from which to copy * @return dst as a ChunkSpan */ template -auto deepcopy(ChunkDst&& dst, ChunkSrc&& src) +auto parallel_deepcopy(ChunkDst&& dst, ChunkSrc&& src) +{ + return parallel_deepcopy(std::forward(dst), std::forward(src)); +} + +/** Copy the content of a borrowed chunk into another + * @param[out] dst the borrowed chunk in which to copy + * @param[in] src the borrowed chunk from which to copy + * @return dst as a ChunkSpan +*/ +template +auto deepcopy(ExecutionSpace&& exec_space, ChunkDst&& dst, ChunkSrc&& src) { static_assert(is_borrowed_chunk_v); static_assert(is_borrowed_chunk_v); @@ -24,8 +57,43 @@ auto deepcopy(ChunkDst&& dst, ChunkSrc&& src) std::is_assignable_v, chunk_reference_t>, "Not assignable"); assert(dst.domain().extents() == src.domain().extents()); - Kokkos::deep_copy(dst.allocation_kokkos_view(), src.allocation_kokkos_view()); + Kokkos::deep_copy( + std::forward(exec_space), + dst.allocation_kokkos_view(), + src.allocation_kokkos_view()); return dst.span_view(); } +/** Copy the content of a borrowed chunk into another + * @param[out] dst the borrowed chunk in which to copy + * @param[in] src the borrowed chunk from which to copy + * @return dst as a ChunkSpan +*/ +template < + class ElementTypeDst, + class SupportDst, + class LayoutDst, + class MemorySpaceDst, + class ElementTypeSrc, + class SupportSrc, + class LayoutSrc, + class MemorySpaceSrc> +KOKKOS_FUNCTION auto deepcopy( + ChunkSpan const& dst, + ChunkSpan const& src) +{ + using ChunkDst = ChunkSpan; + using ChunkSrc = ChunkSpan; + static_assert( + std::is_assignable_v, chunk_reference_t>, + "Not assignable"); + assert(dst.domain().extents() == src.domain().extents()); + KOKKOS_ASSERT( + (Kokkos::SpaceAccessibility::accessible)); + KOKKOS_ASSERT( + (Kokkos::SpaceAccessibility::accessible)); + for_each(dst.domain(), [&](auto elem) { dst(elem) = src(elem); }); + return dst; +} + } // namespace ddc diff --git a/include/ddc/fill.hpp b/include/ddc/fill.hpp index f2d025e57..53abe3a23 100644 --- a/include/ddc/fill.hpp +++ b/include/ddc/fill.hpp @@ -3,10 +3,14 @@ #pragma once #include +#include +#include #include -#include "ddc/chunk_common.hpp" +#include "ddc/chunk_span.hpp" +#include "ddc/detail/macros.hpp" +#include "ddc/for_each.hpp" namespace ddc { @@ -15,13 +19,47 @@ namespace ddc { * @param[in] value the value to fill `dst` * @return dst as a ChunkSpan */ -template -auto fill(ChunkDst&& dst, T const& value) +template +auto parallel_fill(ExecutionSpace&& exec_space, ChunkDst&& dst, T const& value) { static_assert(is_borrowed_chunk_v); static_assert(std::is_assignable_v, T>, "Not assignable"); - Kokkos::deep_copy(dst.allocation_kokkos_view(), value); + Kokkos::deep_copy( + std::forward(exec_space), + dst.allocation_kokkos_view(), + value); return dst.span_view(); } +/** Fill a borrowed chunk with a given value + * @param[out] dst the borrowed chunk in which to copy + * @param[in] value the value to fill `dst` + * @return dst as a ChunkSpan + */ +template +auto parallel_fill(ChunkDst&& dst, T const& value) +{ + return parallel_fill(Kokkos::DefaultExecutionSpace(), std::forward(dst), value); +} + +/** Fill a borrowed chunk with a given value + * @param[out] dst the borrowed chunk in which to copy + * @param[in] value the value to fill `dst` + * @return dst as a ChunkSpan + */ +template +KOKKOS_FUNCTION auto fill( + ChunkSpan const& dst, + T const& value) +{ + static_assert( + std::is_assignable_v< + chunk_reference_t>, + T>, + "Not assignable"); + KOKKOS_ENSURES((Kokkos::SpaceAccessibility::accessible)); + for_each(dst.domain(), [&](auto elem) { dst(elem) = value; }); + return dst; +} + } // namespace ddc diff --git a/include/ddc/for_each.hpp b/include/ddc/for_each.hpp index de46addb5..8d3347737 100644 --- a/include/ddc/for_each.hpp +++ b/include/ddc/for_each.hpp @@ -28,7 +28,7 @@ class ForEachKokkosLambdaAdapter F m_f; public: - ForEachKokkosLambdaAdapter(F const& f) : m_f(f) {} + explicit ForEachKokkosLambdaAdapter(F const& f) : m_f(f) {} template = true> KOKKOS_IMPL_FORCEINLINE void operator()([[maybe_unused]] index_type unused_id) const @@ -145,13 +145,13 @@ inline void for_each_kokkos( } template -inline void for_each_serial( +KOKKOS_FUNCTION void for_each_serial( std::array const& begin, std::array const& end, Functor const& f, Is const&... is) noexcept { - static constexpr std::size_t I = sizeof...(Is); + constexpr std::size_t I = sizeof...(Is); if constexpr (I == N) { f(RetType(is...)); } else { @@ -168,7 +168,7 @@ inline void for_each_serial( * @param[in] f a functor taking an index as parameter */ template -inline void for_each(DiscreteDomain const& domain, Functor&& f) noexcept +KOKKOS_FUNCTION void for_each(DiscreteDomain const& domain, Functor&& f) noexcept { DiscreteElement const ddc_begin = domain.front(); DiscreteElement const ddc_end = domain.front() + domain.extents(); diff --git a/include/ddc/mirror.hpp b/include/ddc/mirror.hpp index 9f0232698..77af4dfea 100644 --- a/include/ddc/mirror.hpp +++ b/include/ddc/mirror.hpp @@ -41,7 +41,7 @@ auto create_mirror_and_copy( Support, KokkosAllocator, typename Space::memory_space>> chunk(src.domain()); - deepcopy(chunk, src); + deepcopy(chunk.span_view(), src); return chunk; } diff --git a/include/ddc/transform_reduce.hpp b/include/ddc/transform_reduce.hpp index 1bab4efa6..2052d99f3 100644 --- a/include/ddc/transform_reduce.hpp +++ b/include/ddc/transform_reduce.hpp @@ -101,7 +101,7 @@ template < class BinaryReductionOp, class UnaryTransformOp, class... DCoords> -inline T transform_reduce_serial( +KOKKOS_FUNCTION T transform_reduce_serial( DiscreteDomain const& domain, [[maybe_unused]] T const neutral, BinaryReductionOp const& reduce, @@ -186,6 +186,7 @@ class TransformReducerKokkosLambdaAdapter */ template inline T transform_reduce_kokkos( + ExecSpace const&, [[maybe_unused]] DiscreteDomain<> const& domain, T neutral, BinaryReductionOp const& reduce, @@ -220,6 +221,7 @@ inline T transform_reduce_kokkos( */ template inline T transform_reduce_kokkos( + ExecSpace const& exec_space, DiscreteDomain const& domain, T neutral, BinaryReductionOp const& reduce, @@ -265,6 +267,7 @@ template < class BinaryReductionOp, class UnaryTransformOp> inline T transform_reduce_kokkos( + ExecSpace const&, DiscreteDomain const& domain, T neutral, BinaryReductionOp const& reduce, @@ -318,8 +321,7 @@ inline T transform_reduce_kokkos( * range. The return type must be acceptable as input to reduce */ template -inline T transform_reduce( - [[maybe_unused]] serial_host_policy policy, +KOKKOS_FUNCTION T transform_reduce( DiscreteDomain const& domain, T neutral, BinaryReductionOp&& reduce, @@ -341,62 +343,21 @@ inline T transform_reduce( * @param[in] transform a unary FunctionObject that will be applied to each element of the input * range. The return type must be acceptable as input to reduce */ -template -inline T transform_reduce( - [[maybe_unused]] parallel_host_policy policy, - DiscreteDomain const& domain, - T neutral, - BinaryReductionOp&& reduce, - UnaryTransformOp&& transform) noexcept -{ - return detail::transform_reduce_kokkos( - domain, - neutral, - std::forward(reduce), - std::forward(transform)); -} - -/** A reduction over a nD domain using the Kokkos execution policy - * @param[in] policy the execution policy to use - * @param[in] domain the range over which to apply the algorithm - * @param[in] neutral the neutral element of the reduction operation - * @param[in] reduce a binary FunctionObject that will be applied in unspecified order to the - * results of transform, the results of other reduce and neutral. - * @param[in] transform a unary FunctionObject that will be applied to each element of the input - * range. The return type must be acceptable as input to reduce - */ -template -inline T transform_reduce( - [[maybe_unused]] parallel_device_policy policy, - DiscreteDomain const& domain, - T neutral, - BinaryReductionOp&& reduce, - UnaryTransformOp&& transform) noexcept -{ - return detail::transform_reduce_kokkos( - domain, - neutral, - std::forward(reduce), - std::forward(transform)); -} - -/** A reduction over a nD domain using the default execution policy - * @param[in] domain the range over which to apply the algorithm - * @param[in] neutral the neutral element of the reduction operation - * @param[in] reduce a binary FunctionObject that will be applied in unspecified order to the - * results of transform, the results of other reduce and neutral. - * @param[in] transform a unary FunctionObject that will be applied to each element of the input - * range. The return type must be acceptable as input to reduce - */ -template -inline T transform_reduce( +template < + class ExecutionSpace, + class... DDims, + class T, + class BinaryReductionOp, + class UnaryTransformOp> +T parallel_transform_reduce( + ExecutionSpace&& exec_space, DiscreteDomain const& domain, T neutral, BinaryReductionOp&& reduce, UnaryTransformOp&& transform) noexcept { - return transform_reduce( - default_policy(), + return detail::transform_reduce_kokkos( + exec_space, domain, neutral, std::forward(reduce), diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 5605553ea..b98f83ad8 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -23,6 +23,7 @@ add_executable(ddc_tests discrete_element.cpp discrete_vector.cpp discrete_space.cpp + nested_algorithms.cpp ) target_compile_features(ddc_tests PUBLIC cxx_std_17) target_link_libraries(ddc_tests diff --git a/tests/chunk.cpp b/tests/chunk.cpp index a10370622..7eef36991 100644 --- a/tests/chunk.cpp +++ b/tests/chunk.cpp @@ -396,7 +396,7 @@ TEST(Chunk1DTest, Deepcopy) chunk(ix) = 1.001 * ix.uid(); } ChunkX chunk2(chunk.domain()); - ddc::deepcopy(chunk2, chunk); + ddc::deepcopy(chunk2.span_view(), chunk.span_cview()); for (auto&& ix : chunk.domain()) { // we expect exact equality, not EXPECT_DOUBLE_EQ: these are copy EXPECT_EQ(chunk2(ix), chunk(ix)); @@ -583,7 +583,7 @@ TEST(Chunk2DTest, Deepcopy) } } ChunkXY chunk2(chunk.domain()); - ddc::deepcopy(chunk2, chunk); + ddc::deepcopy(chunk2.span_view(), chunk.span_cview()); for (auto&& ix : chunk.domain()) { for (auto&& iy : chunk.domain()) { // we expect complete equality, not EXPECT_DOUBLE_EQ: these are copy @@ -603,7 +603,7 @@ TEST(Chunk2DTest, DeepcopyReordered) ChunkYX chunk2(ddc::select(chunk.domain())); ddc::ChunkSpan const chunk2_view(chunk2.data_handle(), chunk.domain()); - ddc::deepcopy(chunk2_view, chunk); + ddc::deepcopy(chunk2_view.span_view(), chunk.span_cview()); for (auto&& ix : chunk.domain()) { for (auto&& iy : chunk.domain()) { // we expect complete equality, not EXPECT_DOUBLE_EQ: these are copy @@ -635,7 +635,7 @@ TEST(Chunk3DTest, AccessFromDiscreteElements) TEST(Chunk2DTest, Mirror) { ChunkXY chunk(dom_x_y); - ddc::fill(chunk, 1.4); + ddc::fill(chunk.span_view(), 1.4); auto const chunk2 = ddc::create_mirror_and_copy(chunk.span_cview()); for (auto&& ix : chunk.domain()) { for (auto&& iy : chunk.domain()) { diff --git a/tests/fft.cpp b/tests/fft.cpp index 5dafc561a..b5b4e0592 100644 --- a/tests/fft.cpp +++ b/tests/fft.cpp @@ -55,7 +55,7 @@ static void test_fft() }); ddc::Chunk f_bis_alloc(f.domain(), ddc::KokkosAllocator()); ddc::ChunkSpan const f_bis = f_bis_alloc.span_view(); - ddc::deepcopy(f_bis, f); + ddc::deepcopy(exec_space, f_bis, f); ddc::Chunk Ff_alloc(k_mesh, ddc::KokkosAllocator()); ddc::ChunkSpan const Ff = Ff_alloc.span_view(); @@ -65,7 +65,7 @@ static void test_fft() // deepcopy of Ff because FFT C2R overwrites the input ddc::Chunk Ff_bis_alloc(Ff.domain(), ddc::KokkosAllocator()); ddc::ChunkSpan const Ff_bis = Ff_bis_alloc.span_view(); - ddc::deepcopy(Ff_bis, Ff); + ddc::deepcopy(exec_space, Ff_bis, Ff); ddc::Chunk FFf_alloc(f.domain(), ddc::KokkosAllocator()); ddc::ChunkSpan const FFf = FFf_alloc.span_view(); @@ -73,15 +73,15 @@ static void test_fft() ddc::Chunk f_host_alloc(f.domain(), ddc::HostAllocator()); ddc::ChunkSpan const f_host = f_host_alloc.span_view(); - ddc::deepcopy(f_host, f); + ddc::deepcopy(exec_space, f_host, f); ddc::Chunk Ff_host_alloc(Ff.domain(), ddc::HostAllocator()); ddc::ChunkSpan const Ff_host = Ff_host_alloc.span_view(); - ddc::deepcopy(Ff_host, Ff); + ddc::deepcopy(exec_space, Ff_host, Ff); ddc::Chunk FFf_host_alloc(FFf.domain(), ddc::HostAllocator()); ddc::ChunkSpan const FFf_host = FFf_host_alloc.span_view(); - ddc::deepcopy(FFf_host, FFf); + ddc::deepcopy(exec_space, FFf_host, FFf); auto const pow2 = KOKKOS_LAMBDA(double x) { @@ -132,11 +132,11 @@ static void test_fft_norm(ddc::FFT_Normalization const norm) ddc::Chunk f_alloc = ddc::Chunk(x_mesh, ddc::KokkosAllocator()); ddc::ChunkSpan const f = f_alloc.span_view(); - ddc::fill(f, Tin(1)); + ddc::parallel_fill(exec_space, f, Tin(1)); ddc::Chunk f_bis_alloc(f.domain(), ddc::KokkosAllocator()); ddc::ChunkSpan const f_bis = f_bis_alloc.span_view(); - ddc::deepcopy(f_bis, f); + ddc::deepcopy(exec_space, f_bis, f); ddc::Chunk Ff_alloc(k_mesh, ddc::KokkosAllocator()); ddc::ChunkSpan const Ff = Ff_alloc.span_view(); @@ -146,7 +146,7 @@ static void test_fft_norm(ddc::FFT_Normalization const norm) // deepcopy of Ff because FFT C2R overwrites the input ddc::Chunk Ff_bis_alloc(Ff.domain(), ddc::KokkosAllocator()); ddc::ChunkSpan const Ff_bis = Ff_bis_alloc.span_view(); - ddc::deepcopy(Ff_bis, Ff); + ddc::deepcopy(exec_space, Ff_bis, Ff); ddc::Chunk FFf_alloc(x_mesh, ddc::KokkosAllocator()); ddc::ChunkSpan const FFf = FFf_alloc.span_view(); diff --git a/tests/nested_algorithms.cpp b/tests/nested_algorithms.cpp new file mode 100644 index 000000000..061815215 --- /dev/null +++ b/tests/nested_algorithms.cpp @@ -0,0 +1,118 @@ +#include + +#include + +namespace { + +struct DDimX; +using DElemX = ddc::DiscreteElement; +using DVectX = ddc::DiscreteVector; +using DDomX = ddc::DiscreteDomain; + +static DElemX constexpr lbound_x(0); +static DVectX constexpr nelems_x(10); + +} // namespace + +void test_nested_algorithms_for_each() +{ + DDomX ddom(lbound_x, nelems_x); + ddc::Chunk chk(ddom, ddc::DeviceAllocator()); + ddc::ChunkSpan chks = chk.span_view(); + ddc::parallel_for_each( + Kokkos::DefaultExecutionSpace(), + ddom.take_first(DVectX(1)), + KOKKOS_LAMBDA(DElemX) { + ddc::for_each(chks.domain(), [&](DElemX elem) { chks(elem) = 10; }); + }); + int res = ddc::parallel_transform_reduce( + Kokkos::DefaultExecutionSpace(), + ddom, + 0, + ddc::reducer::sum(), + chks); + EXPECT_EQ(res, 10 * ddom.size()); +} + +TEST(NestedAlgorithms, ForEach) +{ + test_nested_algorithms_for_each(); +} + +void test_nested_algorithms_transform_reduce() +{ + DDomX ddom(lbound_x, nelems_x); + ddc::Chunk chk(ddom, ddc::DeviceAllocator()); + ddc::ChunkSpan chks = chk.span_view(); + ddc::parallel_for_each( + Kokkos::DefaultExecutionSpace(), + ddom, + KOKKOS_LAMBDA(DElemX elem) { + chks(elem) = ddc::transform_reduce( + DDomX(lbound_x, DVectX(10)), + 0, + ddc::reducer::sum(), + [&](DElemX) { return 1; }); + }); + int res = ddc::parallel_transform_reduce( + Kokkos::DefaultExecutionSpace(), + ddom, + 0, + ddc::reducer::sum(), + chks); + EXPECT_EQ(res, 10 * ddom.size()); +} + +TEST(NestedAlgorithms, TransformReduce) +{ + test_nested_algorithms_transform_reduce(); +} + +void test_nested_algorithms_fill() +{ + DDomX ddom(lbound_x, nelems_x); + ddc::Chunk chk(ddom, ddc::DeviceAllocator()); + ddc::ChunkSpan chks = chk.span_view(); + ddc::parallel_for_each( + Kokkos::DefaultExecutionSpace(), + ddom.take_first(DVectX(1)), + KOKKOS_LAMBDA(DElemX) { ddc::fill(chks, 10); }); + int res = ddc::parallel_transform_reduce( + Kokkos::DefaultExecutionSpace(), + ddom, + 0, + ddc::reducer::sum(), + chks); + EXPECT_EQ(res, 10 * ddom.size()); +} + +TEST(NestedAlgorithms, Fill) +{ + test_nested_algorithms_fill(); +} + +void test_nested_algorithms_deepcopy() +{ + DDomX ddom(lbound_x, nelems_x); + ddc::Chunk chk(ddom, ddc::DeviceAllocator()); + ddc::ChunkSpan chks = chk.span_view(); + ddc::parallel_fill(Kokkos::DefaultExecutionSpace(), chks, 10); + ddc::Chunk chk2(ddom, ddc::DeviceAllocator()); + ddc::ChunkSpan chk2s = chk2.span_view(); + ddc::parallel_for_each( + Kokkos::DefaultExecutionSpace(), + ddom.take_first(DVectX(1)), + KOKKOS_LAMBDA(DElemX) { ddc::deepcopy(chk2s, chks); }); + int res = ddc::parallel_transform_reduce( + Kokkos::DefaultExecutionSpace(), + ddom, + 0, + ddc::reducer::sum(), + chk2s); + EXPECT_EQ(res, 10 * ddom.size()); +} + +TEST(NestedAlgorithms, Deepcopy) +{ + test_nested_algorithms_deepcopy(); +} diff --git a/tests/relocatable_device_code.cpp b/tests/relocatable_device_code.cpp index bd1dc55d9..a763843c7 100644 --- a/tests/relocatable_device_code.cpp +++ b/tests/relocatable_device_code.cpp @@ -20,7 +20,7 @@ std::pair, ddc::Coordinate> read_from_devi dom_x.take_last(rdc::DVectX(1)), KOKKOS_LAMBDA(rdc::DElemX const ix) { array(ix) = ddc::step(); }); ddc::Chunk allocation_h(dom_x, ddc::HostAllocator()); - ddc::deepcopy(allocation_h, allocation_d); + ddc::deepcopy(Kokkos::DefaultExecutionSpace(), allocation_h, allocation_d); return std::pair< ddc::Coordinate, ddc::Coordinate>(allocation_h(rdc::DElemX(0)), allocation_h(rdc::DElemX(1))); diff --git a/tests/splines/batched_2d_spline_builder.cpp b/tests/splines/batched_2d_spline_builder.cpp index d5be12260..ff3fabd7e 100644 --- a/tests/splines/batched_2d_spline_builder.cpp +++ b/tests/splines/batched_2d_spline_builder.cpp @@ -593,16 +593,16 @@ static void Batched2dSplineTest() I2>(spline_eval_deriv12, coords_eval.span_cview(), coef.span_cview()); // Checking errors (we recover the initial values) - double max_norm_error = ddc::transform_reduce( - ddc::policies::policy(exec_space), + double max_norm_error = ddc::parallel_transform_reduce( + exec_space, spline_eval.domain(), 0., ddc::reducer::max(), KOKKOS_LAMBDA(Index...> const e) { return Kokkos::abs(spline_eval(e) - vals(e)); }); - double max_norm_error_diff1 = ddc::transform_reduce( - ddc::policies::policy(exec_space), + double max_norm_error_diff1 = ddc::parallel_transform_reduce( + exec_space, spline_eval_deriv1.domain(), 0., ddc::reducer::max(), @@ -611,8 +611,8 @@ static void Batched2dSplineTest() Coord const y = ddc::coordinate(ddc::select>(e)); return Kokkos::abs(spline_eval_deriv1(e) - evaluator.deriv(x, y, 1, 0)); }); - double max_norm_error_diff2 = ddc::transform_reduce( - ddc::policies::policy(exec_space), + double max_norm_error_diff2 = ddc::parallel_transform_reduce( + exec_space, spline_eval_deriv2.domain(), 0., ddc::reducer::max(), @@ -621,8 +621,8 @@ static void Batched2dSplineTest() Coord const y = ddc::coordinate(ddc::select>(e)); return Kokkos::abs(spline_eval_deriv2(e) - evaluator.deriv(x, y, 0, 1)); }); - double max_norm_error_diff12 = ddc::transform_reduce( - ddc::policies::policy(exec_space), + double max_norm_error_diff12 = ddc::parallel_transform_reduce( + exec_space, spline_eval_deriv1.domain(), 0., ddc::reducer::max(), diff --git a/tests/splines/batched_spline_builder.cpp b/tests/splines/batched_spline_builder.cpp index 6ee7c03d8..c659bf504 100644 --- a/tests/splines/batched_spline_builder.cpp +++ b/tests/splines/batched_spline_builder.cpp @@ -352,8 +352,8 @@ static void BatchedSplineTest() spline_evaluator_batched.integrate(spline_eval_integrals, coef.span_cview()); // Checking errors (we recover the initial values) - double max_norm_error = ddc::transform_reduce( - ddc::policies::policy(exec_space), + double max_norm_error = ddc::parallel_transform_reduce( + exec_space, spline_eval.domain(), 0., ddc::reducer::max(), @@ -361,8 +361,8 @@ static void BatchedSplineTest() return Kokkos::abs(spline_eval(e) - vals(e)); }); - double max_norm_error_diff = ddc::transform_reduce( - ddc::policies::policy(exec_space), + double max_norm_error_diff = ddc::parallel_transform_reduce( + exec_space, spline_eval_deriv.domain(), 0., ddc::reducer::max(), @@ -370,8 +370,8 @@ static void BatchedSplineTest() Coord const x = ddc::coordinate(ddc::select>(e)); return Kokkos::abs(spline_eval_deriv(e) - evaluator.deriv(x, 1)); }); - double max_norm_error_integ = ddc::transform_reduce( - ddc::policies::policy(exec_space), + double max_norm_error_integ = ddc::parallel_transform_reduce( + exec_space, spline_eval_integrals.domain(), 0., ddc::reducer::max(), diff --git a/tests/splines/extrapolation_rule.cpp b/tests/splines/extrapolation_rule.cpp index d584e8970..ee221ae91 100644 --- a/tests/splines/extrapolation_rule.cpp +++ b/tests/splines/extrapolation_rule.cpp @@ -341,8 +341,8 @@ static void ExtrapolationRuleSplineTest() spline_evaluator_batched(spline_eval, coords_eval.span_cview(), coef.span_cview()); // Checking errors (we recover the initial values) - double max_norm_error = ddc::transform_reduce( - ddc::policies::policy(exec_space), + double max_norm_error = ddc::parallel_transform_reduce( + exec_space, spline_eval.domain(), 0., ddc::reducer::max(), diff --git a/tests/splines/periodicity_spline_builder.cpp b/tests/splines/periodicity_spline_builder.cpp index cf6a424d4..11d5c4ce2 100644 --- a/tests/splines/periodicity_spline_builder.cpp +++ b/tests/splines/periodicity_spline_builder.cpp @@ -188,8 +188,8 @@ static void PeriodicitySplineBuilderTest() spline_evaluator(spline_eval, coords_eval.span_cview(), coef.span_cview()); // Checking errors (we recover the initial values) - double max_norm_error = ddc::transform_reduce( - ddc::policies::policy(exec_space), + double max_norm_error = ddc::parallel_transform_reduce( + exec_space, spline_eval.domain(), 0., ddc::reducer::max(), diff --git a/tests/transform_reduce.cpp b/tests/transform_reduce.cpp index 736b485e7..fbf603364 100644 --- a/tests/transform_reduce.cpp +++ b/tests/transform_reduce.cpp @@ -43,12 +43,7 @@ TEST(TransformReduceSerialHost, ZeroDimension) int count = 0; ddc::for_each(dom, [&](DElem0D const i) { chunk(i) = count++; }); EXPECT_EQ( - ddc::transform_reduce( - ddc::policies::serial_host, - dom, - 0, - ddc::reducer::sum(), - chunk), + ddc::transform_reduce(dom, 0, ddc::reducer::sum(), chunk), dom.size() * (dom.size() - 1) / 2); } @@ -60,12 +55,7 @@ TEST(TransformReduceSerialHost, OneDimension) int count = 0; ddc::for_each(dom, [&](DElemX const ix) { chunk(ix) = count++; }); EXPECT_EQ( - ddc::transform_reduce( - ddc::policies::serial_host, - dom, - 0, - ddc::reducer::sum(), - chunk), + ddc::transform_reduce(dom, 0, ddc::reducer::sum(), chunk), dom.size() * (dom.size() - 1) / 2); } @@ -77,12 +67,7 @@ TEST(TransformReduceSerialHost, TwoDimensions) int count = 0; ddc::for_each(dom, [&](DElemXY const ixy) { chunk(ixy) = count++; }); EXPECT_EQ( - ddc::transform_reduce( - ddc::policies::serial_host, - dom, - 0, - ddc::reducer::sum(), - chunk), + ddc::transform_reduce(dom, 0, ddc::reducer::sum(), chunk), dom.size() * (dom.size() - 1) / 2); } TEST(TransformReduceParallelHost, ZeroDimension) @@ -93,8 +78,8 @@ TEST(TransformReduceParallelHost, ZeroDimension) int count = 0; ddc::for_each(dom, [&](DElem0D const i) { chunk(i) = count++; }); EXPECT_EQ( - ddc::transform_reduce( - ddc::policies::parallel_host, + ddc::parallel_transform_reduce( + Kokkos::DefaultHostExecutionSpace(), dom, 0, ddc::reducer::sum(), @@ -109,8 +94,8 @@ TEST(TransformReduceParallelHost, OneDimension) int count = 0; ddc::for_each(dom, [&](DElemX const ix) { chunk(ix) = count++; }); EXPECT_EQ( - ddc::transform_reduce( - ddc::policies::parallel_host, + ddc::parallel_transform_reduce( + Kokkos::DefaultHostExecutionSpace(), dom, 0, ddc::reducer::sum(), @@ -126,8 +111,8 @@ TEST(TransformReduceParallelHost, TwoDimensions) int count = 0; ddc::for_each(dom, [&](DElemXY const ixy) { chunk(ixy) = count++; }); EXPECT_EQ( - ddc::transform_reduce( - ddc::policies::parallel_host, + ddc::parallel_transform_reduce( + Kokkos::DefaultHostExecutionSpace(), dom, 0, ddc::reducer::sum(), @@ -146,8 +131,8 @@ static void TestTransformReduceParallelDeviceZeroDimension() dom, KOKKOS_LAMBDA(DElem0D const i) { chunk(i) = Kokkos::atomic_fetch_add(&count(), 1); }); EXPECT_EQ( - ddc::transform_reduce( - ddc::policies::parallel_device, + ddc::parallel_transform_reduce( + Kokkos::DefaultExecutionSpace(), dom, 0, ddc::reducer::sum(), @@ -171,8 +156,8 @@ static void TestTransformReduceParallelDeviceOneDimension() dom, KOKKOS_LAMBDA(DElemX const ix) { chunk(ix) = Kokkos::atomic_fetch_add(&count(), 1); }); EXPECT_EQ( - ddc::transform_reduce( - ddc::policies::parallel_device, + ddc::parallel_transform_reduce( + Kokkos::DefaultExecutionSpace(), dom, 0, ddc::reducer::sum(), @@ -198,8 +183,8 @@ static void TestTransformReduceParallelDeviceTwoDimensions() chunk(ixy) = Kokkos::atomic_fetch_add(&count(), 1); }); EXPECT_EQ( - ddc::transform_reduce( - ddc::policies::parallel_device, + ddc::parallel_transform_reduce( + Kokkos::DefaultExecutionSpace(), dom, 0, ddc::reducer::sum(),