diff --git a/examples/characteristics_advection.cpp b/examples/characteristics_advection.cpp index b25c54c71..5564faef8 100644 --- a/examples/characteristics_advection.cpp +++ b/examples/characteristics_advection.cpp @@ -69,7 +69,7 @@ void display(double time, ChunkType density) std::cout << " * density[y:" << ddc::get_domain(density).size() / 2 << "] = {"; ddc::for_each( - ddc::policies::serial_host, + ddc::policies::serial, ddc::get_domain(density), [=](ddc::DiscreteElement const ix) { std::cout << std::setw(6) << density_slice(ix) << " "; @@ -194,7 +194,9 @@ int main(int argc, char** argv) //! [initial output] // display the initial data - ddc::deepcopy(host_density_alloc, last_density_alloc); + ddc::deepcopy( + host_density_alloc.span_view(), + last_density_alloc.span_cview()); display(ddc::coordinate(time_domain.front()), host_density_alloc[x_domain][y_domain]); // time of the iteration where the last output happened @@ -280,7 +282,9 @@ int main(int argc, char** argv) //! [output] if (iter - last_output >= t_output_period) { last_output = iter; - ddc::deepcopy(host_density_alloc, last_density_alloc); + ddc::deepcopy( + host_density_alloc.span_view(), + last_density_alloc.span_cview()); display(ddc::coordinate(iter), host_density_alloc[x_domain][y_domain]); } @@ -294,7 +298,9 @@ int main(int argc, char** argv) //! [final output] if (last_output < time_domain.back()) { - ddc::deepcopy(host_density_alloc, last_density_alloc); + ddc::deepcopy( + host_density_alloc.span_view(), + last_density_alloc.span_cview()); display(ddc::coordinate(time_domain.back()), host_density_alloc[x_domain][y_domain]); } diff --git a/examples/game_of_life.cpp b/examples/game_of_life.cpp index 438cdaecc..875006e0f 100644 --- a/examples/game_of_life.cpp +++ b/examples/game_of_life.cpp @@ -93,7 +93,9 @@ int main() std::size_t iter = 0; for (; iter < nt; ++iter) { - ddc::deepcopy(cells_in_host_alloc, cells_in); + ddc::deepcopy( + cells_in_host_alloc.span_view(), + cells_in.span_cview()); print_2DChunk(std::cout, cells_in_host_alloc.span_cview()) << "\n"; ddc::for_each( @@ -127,7 +129,9 @@ int main() }); ddc::deepcopy(cells_in, cells_out); } - ddc::deepcopy(cells_in_host_alloc, cells_in); + ddc::deepcopy( + cells_in_host_alloc.span_view(), + cells_in.span_cview()); print_2DChunk(std::cout, cells_in_host_alloc.span_cview()) << "\n"; return 0; diff --git a/examples/heat_equation.cpp b/examples/heat_equation.cpp index 8aa8b7960..300f8ed6a 100644 --- a/examples/heat_equation.cpp +++ b/examples/heat_equation.cpp @@ -59,7 +59,7 @@ void display(double time, ChunkType temp) std::cout << " * temperature[y:" << ddc::get_domain(temp).size() / 2 << "] = {"; ddc::for_each( - ddc::policies::serial_host, + ddc::policies::serial, ddc::get_domain(temp), [=](ddc::DiscreteElement const ix) { std::cout << std::setw(6) << temp_slice(ix); @@ -239,7 +239,10 @@ int main(int argc, char** argv) //! [initial output] // display the initial data - ddc::deepcopy(ghosted_temp, ghosted_last_temp); + ddc::deepcopy( + ddc::policies::parallel_device, + ghosted_temp, + ghosted_last_temp); display(ddc::coordinate(time_domain.front()), ghosted_temp[x_domain][y_domain]); // time of the iteration where the last output happened @@ -254,15 +257,19 @@ int main(int argc, char** argv) //! [boundary conditions] // Periodic boundary conditions ddc::deepcopy( + ddc::policies::parallel_device, ghosted_last_temp[x_pre_ghost][y_domain], ghosted_last_temp[y_domain][x_domain_end]); ddc::deepcopy( + ddc::policies::parallel_device, ghosted_last_temp[y_domain][x_post_ghost], ghosted_last_temp[y_domain][x_domain_begin]); ddc::deepcopy( + ddc::policies::parallel_device, ghosted_last_temp[x_domain][y_pre_ghost], ghosted_last_temp[x_domain][y_domain_end]); ddc::deepcopy( + ddc::policies::parallel_device, ghosted_last_temp[x_domain][y_post_ghost], ghosted_last_temp[x_domain][y_domain_begin]); //! [boundary conditions] @@ -312,7 +319,10 @@ int main(int argc, char** argv) //! [output] if (iter - last_output >= t_output_period) { last_output = iter; - ddc::deepcopy(ghosted_temp, ghosted_last_temp); + ddc::deepcopy( + ddc::policies::parallel_device, + ghosted_temp, + ghosted_last_temp); display(ddc::coordinate(iter), ghosted_temp[x_domain][y_domain]); } @@ -326,7 +336,10 @@ int main(int argc, char** argv) //! [final output] if (last_output < time_domain.back()) { - ddc::deepcopy(ghosted_temp, ghosted_last_temp); + ddc::deepcopy( + ddc::policies::parallel_device, + ghosted_temp, + ghosted_last_temp); display(ddc::coordinate(time_domain.back()), ghosted_temp[x_domain][y_domain]); } diff --git a/examples/heat_equation_spectral.cpp b/examples/heat_equation_spectral.cpp index eb0517e66..c3398228d 100644 --- a/examples/heat_equation_spectral.cpp +++ b/examples/heat_equation_spectral.cpp @@ -60,7 +60,7 @@ void display(double time, ChunkType temp) std::cout << " * temperature[y:" << ddc::get_domain(temp).size() / 2 << "] = {"; ddc::for_each( - ddc::policies::serial_host, + ddc::policies::serial, ddc::get_domain(temp), [=](ddc::DiscreteElement const ix) { std::cout << std::setw(6) << temp_slice(ix); @@ -203,7 +203,10 @@ int main(int argc, char** argv) //! [initial output] // display the initial data - ddc::deepcopy(_host_temp, _last_temp); + ddc::deepcopy( + ddc::policies::parallel_device, + _host_temp, + _last_temp); display(ddc::coordinate(time_domain.front()), _host_temp[x_domain][y_domain]); // time of the iteration where the last output happened @@ -277,7 +280,10 @@ int main(int argc, char** argv) //! [output] if (iter - last_output >= t_output_period) { last_output = iter; - ddc::deepcopy(_host_temp, _last_temp); + ddc::deepcopy( + ddc::policies::parallel_device, + _host_temp, + _last_temp); display(ddc::coordinate(iter), _host_temp[x_domain][y_domain]); } @@ -291,7 +297,10 @@ int main(int argc, char** argv) //! [final output] if (last_output < time_domain.back()) { - ddc::deepcopy(_host_temp, _last_temp); + ddc::deepcopy( + ddc::policies::parallel_device, + _host_temp, + _last_temp); display(ddc::coordinate(time_domain.back()), _host_temp[x_domain][y_domain]); } diff --git a/include/ddc/deepcopy.hpp b/include/ddc/deepcopy.hpp index d4e468d22..0d44256e2 100644 --- a/include/ddc/deepcopy.hpp +++ b/include/ddc/deepcopy.hpp @@ -7,6 +7,7 @@ #include #include "ddc/chunk_span.hpp" +#include "ddc/for_each.hpp" namespace ddc { @@ -16,7 +17,7 @@ namespace ddc { * @return dst as a ChunkSpan */ template -auto deepcopy(ChunkDst&& dst, ChunkSrc&& src) +auto deepcopy(parallel_host_policy, ChunkDst&& dst, ChunkSrc&& src) { static_assert(is_borrowed_chunk_v); static_assert(is_borrowed_chunk_v); @@ -28,4 +29,66 @@ auto deepcopy(ChunkDst&& dst, ChunkSrc&& src) return dst.span_view(); } +/** Copy the content of a borrowed chunk into another + * @param[out] dst the borrowed chunk in which to copy + * @param[in] src the borrowed chunk from which to copy + * @return dst as a ChunkSpan +*/ +template +auto deepcopy(parallel_device_policy, ChunkDst&& dst, ChunkSrc&& src) +{ + return deepcopy( + policies::parallel_host, + std::forward(dst), + std::forward(src)); +} + +/** Copy the content of a borrowed chunk into another + * @param[out] dst the borrowed chunk in which to copy + * @param[in] src the borrowed chunk from which to copy + * @return dst as a ChunkSpan +*/ +template < + class ElementTypeDst, + class SupportDst, + class LayoutDst, + class MemorySpaceDst, + class ElementTypeSrc, + class SupportSrc, + class LayoutSrc, + class MemorySpaceSrc> +KOKKOS_FUNCTION auto deepcopy( + serial_policy, + ChunkSpan const& dst, + ChunkSpan const& src) +{ + using ChunkDst = ChunkSpan; + using ChunkSrc = ChunkSpan; + static_assert( + std::is_assignable_v, chunk_reference_t>, + "Not assignable"); + assert(dst.domain().extents() == src.domain().extents()); + KOKKOS_ENSURES((Kokkos::SpaceAccessibility< + DDC_CURRENT_KOKKOS_SPACE, + typename std::remove_cv_t>::memory_space>:: + accessible)); + KOKKOS_ENSURES((Kokkos::SpaceAccessibility< + DDC_CURRENT_KOKKOS_SPACE, + typename std::remove_cv_t>::memory_space>:: + accessible)); + for_each(policies::serial, dst.domain(), [&](auto elem) { dst(elem) = src(elem); }); + return dst; +} + +/** Copy the content of a borrowed chunk into another + * @param[out] dst the borrowed chunk in which to copy + * @param[in] src the borrowed chunk from which to copy + * @return dst as a ChunkSpan +*/ +template +KOKKOS_FUNCTION auto deepcopy(ChunkDst&& dst, ChunkSrc&& src) +{ + return deepcopy(policies::serial, std::forward(dst), std::forward(src)); +} + } // namespace ddc diff --git a/include/ddc/fill.hpp b/include/ddc/fill.hpp index 513392d28..2533da481 100644 --- a/include/ddc/fill.hpp +++ b/include/ddc/fill.hpp @@ -3,10 +3,14 @@ #pragma once #include +#include +#include #include #include "ddc/chunk_span.hpp" +#include "ddc/detail/macros.hpp" +#include "ddc/for_each.hpp" namespace ddc { @@ -16,7 +20,7 @@ namespace ddc { * @return dst as a ChunkSpan */ template -auto fill(ChunkDst&& dst, T const& value) +auto fill(parallel_host_policy, ChunkDst&& dst, T const& value) { static_assert(is_borrowed_chunk_v); static_assert(std::is_assignable_v, T>, "Not assignable"); @@ -24,4 +28,47 @@ auto fill(ChunkDst&& dst, T const& value) return dst.span_view(); } +/** Fill a borrowed chunk with a given value + * @param[out] dst the borrowed chunk in which to copy + * @param[in] value the value to fill `dst` + * @return dst as a ChunkSpan + */ +template +auto fill(parallel_device_policy, ChunkDst&& dst, T const& value) +{ + return fill(ddc::policies::parallel_host, std::forward(dst), value); +} + +/** Fill a borrowed chunk with a given value + * @param[out] dst the borrowed chunk in which to copy + * @param[in] value the value to fill `dst` + * @return dst as a ChunkSpan + */ +template +KOKKOS_FUNCTION auto fill( + serial_policy policy, + ChunkSpan const& dst, + T const& value) +{ + static_assert( + std::is_assignable_v< + chunk_reference_t>, + T>, + "Not assignable"); + KOKKOS_ENSURES((Kokkos::SpaceAccessibility::accessible)); + for_each(policy, dst.domain(), [&](auto elem) { dst(elem) = value; }); + return dst; +} + +/** Fill a borrowed chunk with a given value + * @param[out] dst the borrowed chunk in which to copy + * @param[in] value the value to fill `dst` + * @return dst as a ChunkSpan + */ +template +KOKKOS_FUNCTION auto fill(ChunkDst&& dst, T const& value) +{ + return fill(policies::serial, std::forward(dst), value); +} + } // namespace ddc diff --git a/include/ddc/for_each.hpp b/include/ddc/for_each.hpp index cf27f4352..e10cf4e6c 100644 --- a/include/ddc/for_each.hpp +++ b/include/ddc/for_each.hpp @@ -28,7 +28,7 @@ class ForEachKokkosLambdaAdapter F m_f; public: - ForEachKokkosLambdaAdapter(F const& f) : m_f(f) {} + explicit ForEachKokkosLambdaAdapter(F const& f) : m_f(f) {} template = true> KOKKOS_IMPL_FORCEINLINE void operator()([[maybe_unused]] index_type unused_id) const @@ -75,7 +75,7 @@ inline void for_each_kokkos( } template -inline void for_each_kokkos(DiscreteDomain const& domain, Functor const& f) noexcept +void for_each_kokkos(DiscreteDomain const& domain, Functor const& f) noexcept { DiscreteElement const ddc_begin = domain.front(); DiscreteElement const ddc_end = domain.front() + domain.extents(); @@ -93,9 +93,7 @@ inline void for_each_kokkos(DiscreteDomain const& domain, Functor const& } template -inline void for_each_kokkos( - DiscreteDomain const& domain, - Functor&& f) noexcept +void for_each_kokkos(DiscreteDomain const& domain, Functor&& f) noexcept { DiscreteElement const ddc_begin = domain.front(); DiscreteElement const ddc_end = domain.front() + domain.extents(); @@ -128,13 +126,13 @@ inline void for_each_kokkos( } template -inline void for_each_serial( +KOKKOS_FUNCTION void for_each_serial( std::array const& begin, std::array const& end, Functor const& f, Is const&... is) noexcept { - static constexpr std::size_t I = sizeof...(Is); + constexpr std::size_t I = sizeof...(Is); if constexpr (I == N) { f(RetType(is...)); } else { @@ -147,7 +145,7 @@ inline void for_each_serial( } // namespace detail /// Serial execution on the host -struct serial_host_policy +struct serial_policy { }; @@ -156,8 +154,8 @@ struct serial_host_policy * @param[in] f a functor taking an index as parameter */ template -inline void for_each( - serial_host_policy, +KOKKOS_FUNCTION void for_each( + serial_policy, DiscreteDomain const& domain, Functor&& f) noexcept { @@ -173,8 +171,8 @@ inline void for_each( * @param[in] f a functor taking an index as parameter */ template -inline void for_each_n( - serial_host_policy, +KOKKOS_FUNCTION void for_each_n( + serial_policy, DiscreteVector const& extent, Functor&& f) noexcept { @@ -195,10 +193,7 @@ struct parallel_host_policy * @param[in] f a functor taking an index as parameter */ template -inline void for_each( - parallel_host_policy, - DiscreteDomain const& domain, - Functor&& f) noexcept +void for_each(parallel_host_policy, DiscreteDomain const& domain, Functor&& f) noexcept { detail::for_each_kokkos(domain, std::forward(f)); } @@ -213,19 +208,16 @@ struct parallel_device_policy * @param[in] f a functor taking an index as parameter */ template -inline void for_each( - parallel_device_policy, - DiscreteDomain const& domain, - Functor&& f) noexcept +void for_each(parallel_device_policy, DiscreteDomain const& domain, Functor&& f) noexcept { detail::for_each_kokkos(domain, std::forward(f)); } -using default_policy = serial_host_policy; +using default_policy = serial_policy; namespace policies { -inline constexpr serial_host_policy serial_host; +inline constexpr serial_policy serial; inline constexpr parallel_host_policy parallel_host; inline constexpr parallel_device_policy parallel_device; @@ -233,7 +225,7 @@ template constexpr auto policy([[maybe_unused]] ExecSpace exec_space) { if constexpr (std::is_same_v) { - return ddc::policies::serial_host; + return ddc::policies::serial; #ifdef KOKKOS_ENABLE_OPENMP } else if constexpr (std::is_same_v) { return ddc::policies::parallel_host; @@ -252,9 +244,9 @@ constexpr auto policy([[maybe_unused]] ExecSpace exec_space) * @param[in] f a functor taking an index as parameter */ template -inline void for_each(DiscreteDomain const& domain, Functor&& f) noexcept +KOKKOS_FUNCTION void for_each(DiscreteDomain const& domain, Functor&& f) noexcept { - for_each(default_policy(), domain, std::forward(f)); + for_each(policies::serial, domain, std::forward(f)); } /** iterates over a nD extent using the default execution policy @@ -262,9 +254,9 @@ inline void for_each(DiscreteDomain const& domain, Functor&& f) noexce * @param[in] f a functor taking an index as parameter */ template -inline void for_each_n(DiscreteVector const& extent, Functor&& f) noexcept +KOKKOS_FUNCTION void for_each_n(DiscreteVector const& extent, Functor&& f) noexcept { - for_each_n(default_policy(), extent, std::forward(f)); + for_each_n(policies::serial, extent, std::forward(f)); } template < @@ -272,18 +264,19 @@ template < class ElementType, class... DDims, class LayoutPolicy, + class MemorySpace, class Functor> -inline void for_each_elem( - ExecutionPolicy&& policy, - ChunkSpan, LayoutPolicy> chunk_span, +KOKKOS_FUNCTION void for_each_elem( + ExecutionPolicy policy, + ChunkSpan, LayoutPolicy, MemorySpace> chunk_span, Functor&& f) noexcept { - for_each(std::forward(policy), chunk_span.domain(), std::forward(f)); + for_each(policy, chunk_span.domain(), std::forward(f)); } -template -inline void for_each_elem( - ChunkSpan, LayoutPolicy> chunk_span, +template +KOKKOS_FUNCTION void for_each_elem( + ChunkSpan, LayoutPolicy, MemorySpace> chunk_span, Functor&& f) noexcept { for_each(chunk_span.domain(), std::forward(f)); diff --git a/include/ddc/mirror.hpp b/include/ddc/mirror.hpp index 9f0232698..77af4dfea 100644 --- a/include/ddc/mirror.hpp +++ b/include/ddc/mirror.hpp @@ -41,7 +41,7 @@ auto create_mirror_and_copy( Support, KokkosAllocator, typename Space::memory_space>> chunk(src.domain()); - deepcopy(chunk, src); + deepcopy(chunk.span_view(), src); return chunk; } diff --git a/include/ddc/transform_reduce.hpp b/include/ddc/transform_reduce.hpp index 3b0d358cd..c4004a061 100644 --- a/include/ddc/transform_reduce.hpp +++ b/include/ddc/transform_reduce.hpp @@ -99,7 +99,7 @@ template < class BinaryReductionOp, class UnaryTransformOp, class... DCoords> -inline T transform_reduce_serial( +KOKKOS_FUNCTION T transform_reduce_serial( DiscreteDomain const& domain, [[maybe_unused]] T const neutral, BinaryReductionOp const& reduce, @@ -316,8 +316,8 @@ inline T transform_reduce_kokkos( * range. The return type must be acceptable as input to reduce */ template -inline T transform_reduce( - [[maybe_unused]] serial_host_policy policy, +KOKKOS_FUNCTION T transform_reduce( + [[maybe_unused]] serial_policy policy, DiscreteDomain const& domain, T neutral, BinaryReductionOp&& reduce, @@ -340,7 +340,7 @@ inline T transform_reduce( * range. The return type must be acceptable as input to reduce */ template -inline T transform_reduce( +T transform_reduce( [[maybe_unused]] parallel_host_policy policy, DiscreteDomain const& domain, T neutral, @@ -364,7 +364,7 @@ inline T transform_reduce( * range. The return type must be acceptable as input to reduce */ template -inline T transform_reduce( +T transform_reduce( [[maybe_unused]] parallel_device_policy policy, DiscreteDomain const& domain, T neutral, @@ -387,14 +387,14 @@ inline T transform_reduce( * range. The return type must be acceptable as input to reduce */ template -inline T transform_reduce( +KOKKOS_FUNCTION T transform_reduce( DiscreteDomain const& domain, T neutral, BinaryReductionOp&& reduce, UnaryTransformOp&& transform) noexcept { return transform_reduce( - default_policy(), + policies::serial, domain, neutral, std::forward(reduce), diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index be6fb2e72..54cf25b34 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -22,6 +22,7 @@ add_executable(ddc_tests fill.cpp discrete_element.cpp discrete_vector.cpp + nested_algorithms.cpp ) target_compile_features(ddc_tests PUBLIC cxx_std_17) target_link_libraries(ddc_tests diff --git a/tests/chunk.cpp b/tests/chunk.cpp index 978d9ab28..bb7580195 100644 --- a/tests/chunk.cpp +++ b/tests/chunk.cpp @@ -394,7 +394,7 @@ TEST(Chunk1DTest, Deepcopy) chunk(ix) = 1.001 * ix.uid(); } ChunkX chunk2(chunk.domain()); - ddc::deepcopy(chunk2, chunk); + ddc::deepcopy(chunk2.span_view(), chunk.span_cview()); for (auto&& ix : chunk.domain()) { // we expect exact equality, not EXPECT_DOUBLE_EQ: these are copy EXPECT_EQ(chunk2(ix), chunk(ix)); @@ -581,7 +581,7 @@ TEST(Chunk2DTest, Deepcopy) } } ChunkXY chunk2(chunk.domain()); - ddc::deepcopy(chunk2, chunk); + ddc::deepcopy(chunk2.span_view(), chunk.span_cview()); for (auto&& ix : chunk.domain()) { for (auto&& iy : chunk.domain()) { // we expect complete equality, not EXPECT_DOUBLE_EQ: these are copy @@ -601,7 +601,7 @@ TEST(Chunk2DTest, DeepcopyReordered) ChunkYX chunk2(ddc::select(chunk.domain())); ddc::ChunkSpan chunk2_view(chunk2.data_handle(), chunk.domain()); - ddc::deepcopy(chunk2_view, chunk); + ddc::deepcopy(chunk2_view.span_view(), chunk.span_cview()); for (auto&& ix : chunk.domain()) { for (auto&& iy : chunk.domain()) { // we expect complete equality, not EXPECT_DOUBLE_EQ: these are copy @@ -633,7 +633,7 @@ TEST(Chunk3DTest, AccessFromDiscreteElements) TEST(Chunk2DTest, Mirror) { ChunkXY chunk(dom_x_y); - ddc::fill(chunk, 1.4); + ddc::fill(chunk.span_view(), 1.4); auto const chunk2 = ddc::create_mirror_and_copy(chunk.span_cview()); for (auto&& ix : chunk.domain()) { for (auto&& iy : chunk.domain()) { diff --git a/tests/fft.cpp b/tests/fft.cpp index ddbe44811..1de6b2e4b 100644 --- a/tests/fft.cpp +++ b/tests/fft.cpp @@ -52,7 +52,7 @@ static void test_fft() }); ddc::Chunk f_bis_alloc(f.domain(), ddc::KokkosAllocator()); ddc::ChunkSpan f_bis = f_bis_alloc.span_view(); - ddc::deepcopy(f_bis, f); + ddc::deepcopy(ddc::policies::policy(exec_space), f_bis, f); ddc::Chunk Ff_alloc(k_mesh, ddc::KokkosAllocator()); ddc::ChunkSpan Ff = Ff_alloc.span_view(); @@ -62,7 +62,7 @@ static void test_fft() // deepcopy of Ff because FFT C2R overwrites the input ddc::Chunk Ff_bis_alloc(Ff.domain(), ddc::KokkosAllocator()); ddc::ChunkSpan Ff_bis = Ff_bis_alloc.span_view(); - ddc::deepcopy(Ff_bis, Ff); + ddc::deepcopy(ddc::policies::policy(exec_space), Ff_bis, Ff); ddc::Chunk FFf_alloc(f.domain(), ddc::KokkosAllocator()); ddc::ChunkSpan FFf = FFf_alloc.span_view(); @@ -70,15 +70,15 @@ static void test_fft() ddc::Chunk f_host_alloc(f.domain(), ddc::HostAllocator()); ddc::ChunkSpan f_host = f_host_alloc.span_view(); - ddc::deepcopy(f_host, f); + ddc::deepcopy(ddc::policies::policy(exec_space), f_host, f); ddc::Chunk Ff_host_alloc(Ff.domain(), ddc::HostAllocator()); ddc::ChunkSpan Ff_host = Ff_host_alloc.span_view(); - ddc::deepcopy(Ff_host, Ff); + ddc::deepcopy(ddc::policies::policy(exec_space), Ff_host, Ff); ddc::Chunk FFf_host_alloc(FFf.domain(), ddc::HostAllocator()); ddc::ChunkSpan FFf_host = FFf_host_alloc.span_view(); - ddc::deepcopy(FFf_host, FFf); + ddc::deepcopy(ddc::policies::policy(exec_space), FFf_host, FFf); auto const pow2 = DDC_LAMBDA(double x) { @@ -137,7 +137,7 @@ static void test_fft_norm(ddc::FFT_Normalization const norm) ddc::Chunk f_bis_alloc = ddc::Chunk(f.domain(), ddc::KokkosAllocator()); ddc::ChunkSpan f_bis = f_bis_alloc.span_view(); - ddc::deepcopy(f_bis, f); + ddc::deepcopy(ddc::policies::policy(exec_space), f_bis, f); ddc::Chunk Ff_alloc = ddc::Chunk(k_mesh, ddc::KokkosAllocator()); ddc::ChunkSpan Ff = Ff_alloc.span_view(); @@ -147,7 +147,7 @@ static void test_fft_norm(ddc::FFT_Normalization const norm) // deepcopy of Ff because FFT C2R overwrites the input ddc::Chunk Ff_bis_alloc = ddc::Chunk(Ff.domain(), ddc::KokkosAllocator()); ddc::ChunkSpan Ff_bis = Ff_bis_alloc.span_view(); - ddc::deepcopy(Ff_bis, Ff); + ddc::deepcopy(ddc::policies::policy(exec_space), Ff_bis, Ff); ddc::Chunk FFf_alloc = ddc::Chunk(x_mesh, ddc::KokkosAllocator()); ddc::ChunkSpan FFf = FFf_alloc.span_view(); diff --git a/tests/for_each.cpp b/tests/for_each.cpp index 7056576c9..1e4c41ad6 100644 --- a/tests/for_each.cpp +++ b/tests/for_each.cpp @@ -40,7 +40,7 @@ TEST(ForEachSerialHost, Empty) DDomX const dom(lbound_x, DVectX(0)); std::vector storage(dom.size(), 0); ddc::ChunkSpan view(storage.data(), dom); - ddc::for_each(ddc::policies::serial_host, dom, [=](DElemX const ix) { view(ix) += 1; }); + ddc::for_each(ddc::policies::serial, dom, [=](DElemX const ix) { view(ix) += 1; }); EXPECT_EQ(std::count(storage.begin(), storage.end(), 1), dom.size()) << std::count(storage.begin(), storage.end(), 1) << std::endl; } @@ -50,7 +50,7 @@ TEST(ForEachSerialHost, ZeroDimension) DDom0D const dom; int storage = 0; ddc::ChunkSpan view(&storage, dom); - ddc::for_each(ddc::policies::serial_host, dom, [=](DElem0D const ii) { view(ii) += 1; }); + ddc::for_each(ddc::policies::serial, dom, [=](DElem0D const ii) { view(ii) += 1; }); EXPECT_EQ(storage, 1) << storage << std::endl; } @@ -59,7 +59,7 @@ TEST(ForEachSerialHost, OneDimension) DDomX const dom(lbound_x, nelems_x); std::vector storage(dom.size(), 0); ddc::ChunkSpan view(storage.data(), dom); - ddc::for_each(ddc::policies::serial_host, dom, [=](DElemX const ix) { view(ix) += 1; }); + ddc::for_each(ddc::policies::serial, dom, [=](DElemX const ix) { view(ix) += 1; }); EXPECT_EQ(std::count(storage.begin(), storage.end(), 1), dom.size()); } @@ -68,7 +68,7 @@ TEST(ForEachSerialHost, TwoDimensions) DDomXY const dom(lbound_x_y, nelems_x_y); std::vector storage(dom.size(), 0); ddc::ChunkSpan view(storage.data(), dom); - ddc::for_each(ddc::policies::serial_host, dom, [=](DElemXY const ixy) { view(ixy) += 1; }); + ddc::for_each(ddc::policies::serial, dom, [=](DElemXY const ixy) { view(ixy) += 1; }); EXPECT_EQ(std::count(storage.begin(), storage.end(), 1), dom.size()); } diff --git a/tests/nested_algorithms.cpp b/tests/nested_algorithms.cpp new file mode 100644 index 000000000..f0793d36c --- /dev/null +++ b/tests/nested_algorithms.cpp @@ -0,0 +1,175 @@ +#include + +#include + +namespace { + +struct DDimX; +using DElemX = ddc::DiscreteElement; +using DVectX = ddc::DiscreteVector; +using DDomX = ddc::DiscreteDomain; + +static DElemX constexpr lbound_x(0); +static DVectX constexpr nelems_x(10); + +} // namespace + +void test_nested_algorithms_for_each() +{ + DDomX ddom(lbound_x, nelems_x); + ddc::Chunk chk(ddom, ddc::DeviceAllocator()); + ddc::ChunkSpan chks = chk.span_view(); + ddc::for_each( + ddc::policies::parallel_device, + ddom.take_first(DVectX(1)), + DDC_LAMBDA(DElemX) { + ddc::for_each(ddc::policies::serial, chks.domain(), [&](DElemX elem) { + chks(elem) = 10; + }); + }); + int res = ddc::transform_reduce( + ddc::policies::parallel_device, + ddom, + 0, + ddc::reducer::sum(), + chks); + EXPECT_EQ(res, 10 * ddom.size()); +} + +TEST(NestedAlgorithms, ForEach) +{ + test_nested_algorithms_for_each(); +} + +void test_nested_algorithms_for_each_n() +{ + DDomX ddom(lbound_x, nelems_x); + ddc::Chunk chk(ddom, ddc::DeviceAllocator()); + ddc::ChunkSpan chks = chk.span_view(); + ddc::for_each( + ddc::policies::parallel_device, + ddom.take_first(DVectX(1)), + DDC_LAMBDA(DElemX elem) { + ddc::for_each_n(ddc::policies::serial, ddom.extents(), [&](DVectX vect_x) { + chks(elem + vect_x) = 10; + }); + }); + int res = ddc::transform_reduce( + ddc::policies::parallel_device, + ddom, + 0, + ddc::reducer::sum(), + chks); + EXPECT_EQ(res, 10 * ddom.size()); +} + +TEST(NestedAlgorithms, ForEachN) +{ + test_nested_algorithms_for_each_n(); +} + +void test_nested_algorithms_for_each_elem() +{ + DDomX ddom(lbound_x, nelems_x); + ddc::Chunk chk(ddom, ddc::DeviceAllocator()); + ddc::ChunkSpan chks = chk.span_view(); + ddc::for_each( + ddc::policies::parallel_device, + ddom.take_first(DVectX(1)), + DDC_LAMBDA(DElemX) { + ddc::for_each_elem(ddc::policies::serial, chks, [&](DElemX elem) { + chks(elem) = 10; + }); + }); + int res = ddc::transform_reduce( + ddc::policies::parallel_device, + ddom, + 0, + ddc::reducer::sum(), + chks); + EXPECT_EQ(res, 10 * ddom.size()); +} + +TEST(NestedAlgorithms, ForEachElem) +{ + test_nested_algorithms_for_each_elem(); +} + +void test_nested_algorithms_transform_reduce() +{ + DDomX ddom(lbound_x, nelems_x); + ddc::Chunk chk(ddom, ddc::DeviceAllocator()); + ddc::ChunkSpan chks = chk.span_view(); + ddc::for_each( + ddc::policies::parallel_device, + ddom, + DDC_LAMBDA(DElemX elem) { + chks(elem) = ddc::transform_reduce( + ddc::policies::serial, + DDomX(lbound_x, DVectX(10)), + 0, + ddc::reducer::sum(), + [&](DElemX) { return 1; }); + }); + int res = ddc::transform_reduce( + ddc::policies::parallel_device, + ddom, + 0, + ddc::reducer::sum(), + chks); + EXPECT_EQ(res, 10 * ddom.size()); +} + +TEST(NestedAlgorithms, TransformReduce) +{ + test_nested_algorithms_transform_reduce(); +} + +void test_nested_algorithms_fill() +{ + DDomX ddom(lbound_x, nelems_x); + ddc::Chunk chk(ddom, ddc::DeviceAllocator()); + ddc::ChunkSpan chks = chk.span_view(); + ddc::for_each( + ddc::policies::parallel_device, + ddom.take_first(DVectX(1)), + DDC_LAMBDA(DElemX) { ddc::fill(ddc::policies::serial, chks, 10); }); + int res = ddc::transform_reduce( + ddc::policies::parallel_device, + ddom, + 0, + ddc::reducer::sum(), + chks); + EXPECT_EQ(res, 10 * ddom.size()); +} + +TEST(NestedAlgorithms, Fill) +{ + test_nested_algorithms_fill(); +} + +void test_nested_algorithms_deepcopy() +{ + DDomX ddom(lbound_x, nelems_x); + ddc::Chunk chk(ddom, ddc::DeviceAllocator()); + ddc::ChunkSpan chks = chk.span_view(); + ddc::fill(ddc::policies::parallel_device, chks, 10); + ddc::Chunk chk2(ddom, ddc::DeviceAllocator()); + ddc::ChunkSpan chk2s = chk2.span_view(); + ddc::for_each( + ddc::policies::parallel_device, + ddom.take_first(DVectX(1)), + DDC_LAMBDA(DElemX) { ddc::deepcopy(ddc::policies::serial, chk2s, chks); }); + int res = ddc::transform_reduce( + ddc::policies::parallel_device, + ddom, + 0, + ddc::reducer::sum(), + chk2s); + EXPECT_EQ(res, 10 * ddom.size()); +} + +TEST(NestedAlgorithms, Deepcopy) +{ + test_nested_algorithms_deepcopy(); +} diff --git a/tests/relocatable_device_code.cpp b/tests/relocatable_device_code.cpp index d3e13110e..dc67ef88f 100644 --- a/tests/relocatable_device_code.cpp +++ b/tests/relocatable_device_code.cpp @@ -19,7 +19,7 @@ std::pair, ddc::Coordinate> read_from_devi dom_x.take_last(rdc::DVectX(1)), DDC_LAMBDA(rdc::DElemX const ix) { array(ix) = ddc::step(); }); ddc::Chunk allocation_h(dom_x, ddc::HostAllocator()); - ddc::deepcopy(allocation_h, allocation_d); + ddc::deepcopy(ddc::policies::parallel_device, allocation_h, allocation_d); return std::pair< ddc::Coordinate, ddc::Coordinate>(allocation_h(rdc::DElemX(0)), allocation_h(rdc::DElemX(1))); diff --git a/tests/transform_reduce.cpp b/tests/transform_reduce.cpp index 900f6d4e0..aa5315393 100644 --- a/tests/transform_reduce.cpp +++ b/tests/transform_reduce.cpp @@ -42,7 +42,7 @@ TEST(TransformReduceSerialHost, ZeroDimension) ddc::for_each(dom, [&](DElem0D const i) { chunk(i) = count++; }); EXPECT_EQ( ddc::transform_reduce( - ddc::policies::serial_host, + ddc::policies::serial, dom, 0, ddc::reducer::sum(), @@ -59,7 +59,7 @@ TEST(TransformReduceSerialHost, OneDimension) ddc::for_each(dom, [&](DElemX const ix) { chunk(ix) = count++; }); EXPECT_EQ( ddc::transform_reduce( - ddc::policies::serial_host, + ddc::policies::serial, dom, 0, ddc::reducer::sum(), @@ -76,7 +76,7 @@ TEST(TransformReduceSerialHost, TwoDimensions) ddc::for_each(dom, [&](DElemXY const ixy) { chunk(ixy) = count++; }); EXPECT_EQ( ddc::transform_reduce( - ddc::policies::serial_host, + ddc::policies::serial, dom, 0, ddc::reducer::sum(),