From dfdafa26527ba607d8fd6048a2aff8ccce622e9c Mon Sep 17 00:00:00 2001 From: Yuuichi Asahi Date: Tue, 26 Sep 2023 17:00:41 +0900 Subject: [PATCH] More executor based implementation of heat3d-mpi --- mini-apps/heat3d-mpi/executors/heat3D.hpp | 86 ++++-- mini-apps/heat3d-mpi/executors/mpi_comm.hpp | 304 ++++++++------------ 2 files changed, 187 insertions(+), 203 deletions(-) diff --git a/mini-apps/heat3d-mpi/executors/heat3D.hpp b/mini-apps/heat3d-mpi/executors/heat3D.hpp index 923a982..129ca74 100644 --- a/mini-apps/heat3d-mpi/executors/heat3D.hpp +++ b/mini-apps/heat3d-mpi/executors/heat3D.hpp @@ -62,53 +62,105 @@ void solve(const Config& conf, // Overlapping for(std::size_t i=0; ibegin(); + + auto _pack_all = + pack_all_sender(stdexec::just(), + std::forward(scheduler), + comm, + u); + timers[HaloPack]->begin(); - comm.pack(scheduler, u); + stdexec::sync_wait( std::move(_pack_all) ); timers[HaloPack]->end(); - auto inner_update = stdexec::when_all( - stdexec::just() | exec::on( scheduler, stdexec::bulk(n, heat3d_functor(conf, x_mask, y_mask, z_mask, u, un)) ), - stdexec::just() | stdexec::then( [&]{ timers[HaloComm]->begin(); - comm.commP2P(); - timers[HaloComm]->end(); - } ) + auto _inner_update = + stdexec::when_all( + stdexec::just() | exec::on( scheduler, stdexec::bulk(n, heat3d_functor(conf, x_mask, y_mask, z_mask, u, un)) ), + stdexec::just() | stdexec::then( [&]{ timers[HaloComm]->begin(); + comm.commP2P(); + timers[HaloComm]->end(); + } ) ); timers[Heat]->begin(); - stdexec::sync_wait( std::move(inner_update) ); + stdexec::sync_wait( std::move(_inner_update) ); timers[Heat]->end(); timers[HaloUnpack]->begin(); - comm.boundaryUpdate(conf, scheduler, un); + auto _boundaryUpdate_all = + boundaryUpdate_all_sender(stdexec::just(), scheduler, conf, comm, un) + | stdexec::then( [&]{ std::swap(u, un); } ); + + stdexec::sync_wait( std::move(_boundaryUpdate_all) ); timers[HaloUnpack]->end(); - std::swap(u, un); timers[MainLoop]->end(); } } else { for(std::size_t i=0; ibegin(); + auto _pack_all = + pack_all_sender(stdexec::just(), + std::forward(scheduler), + comm, + u); + timers[HaloPack]->begin(); - comm.pack(scheduler, u); + stdexec::sync_wait( std::move(_pack_all) ); timers[HaloPack]->end(); timers[HaloComm]->begin(); comm.commP2P(); timers[HaloComm]->end(); + auto _unpack_all = + unpack_all_sender(stdexec::just(), + std::forward(scheduler), + comm, + u); + timers[HaloUnpack]->begin(); - comm.unpack(scheduler, u); + stdexec::sync_wait( std::move(_unpack_all) ); timers[HaloUnpack]->end(); - auto update = stdexec::just() - | exec::on( scheduler, stdexec::bulk(n, heat3d_functor(conf, x_mask, y_mask, z_mask, u, un)) ) - | stdexec::then( [&]{ std::swap(u, un); } ); + auto _update = stdexec::just() + | exec::on( scheduler, stdexec::bulk(n, heat3d_functor(conf, x_mask, y_mask, z_mask, u, un)) ) + | stdexec::then( [&]{ std::swap(u, un); } ); timers[Heat]->begin(); - stdexec::sync_wait( std::move(update) ); + stdexec::sync_wait( std::move(_update) ); timers[Heat]->end(); + /* The following also works + auto _pack_all = + pack_all_sender(stdexec::just(), + std::forward(scheduler), + comm, + u); + + auto _comm = _pack_all + | stdexec::then([&]{ + timers[HaloComm]->begin(); + comm.commP2P(); + timers[HaloComm]->end(); + }); + + stdexec::sync_wait( std::move(_comm) ); + + auto _unpack_all = + unpack_all_sender(stdexec::just(), + std::forward(scheduler), + comm, + u); + + auto _update = _unpack_all + | exec::on( scheduler, stdexec::bulk(n, heat3d_functor(conf, x_mask, y_mask, z_mask, u, un)) ) + | stdexec::then( [&]{ std::swap(u, un); } ); + + stdexec::sync_wait( std::move(_update) ); + */ + timers[MainLoop]->end(); } } @@ -131,7 +183,7 @@ void finalize(const Config& conf, auto un = variables.un(); auto analytical_solution = stdexec::just() - | exec::on( scheduler, stdexec::bulk(n, analytical_solution_functor(conf, time, x, y, z, un)) ); + | exec::on( scheduler, stdexec::bulk(n, analytical_solution_functor(conf, time, x, y, z, un)) ); stdexec::sync_wait( std::move(analytical_solution) ); // Check errors diff --git a/mini-apps/heat3d-mpi/executors/mpi_comm.hpp b/mini-apps/heat3d-mpi/executors/mpi_comm.hpp index 39f0cb9..8f4538d 100644 --- a/mini-apps/heat3d-mpi/executors/mpi_comm.hpp +++ b/mini-apps/heat3d-mpi/executors/mpi_comm.hpp @@ -225,164 +225,12 @@ class Comm { } public: - template - void pack(Scheduler&& schdeuler, View& u) { - // Define submdspans for halo regions - const std::pair inner_x(1, u.extent(0) - 1); - const std::pair inner_y(1, u.extent(1) - 1); - const std::pair inner_z(1, u.extent(2) - 1); - - // Exchange in x direction - { - int i = 0; - auto ux_send_left = stdex::submdspan(u, 1, inner_y, inner_z); - auto ux_send_right = stdex::submdspan(u, u.extent(0) - 2, inner_y, inner_z); - - pack_(schdeuler, send_buffer(i), ux_send_left, ux_send_right); - } - - // Exchange in y direction - { - int i = 1; - auto uy_send_left = stdex::submdspan(u, inner_x, 1, inner_z); - auto uy_send_right = stdex::submdspan(u, inner_x, u.extent(1) - 2, inner_z); - - pack_(schdeuler, send_buffer(i), uy_send_left, uy_send_right); - } - - // Exchange in z direction - { - int i = 2; - auto uz_send_left = stdex::submdspan(u, inner_x, inner_y, 1); - auto uz_send_right = stdex::submdspan(u, inner_x, inner_y, u.extent(2) - 2); - - pack_(schdeuler, send_buffer(i), uz_send_left, uz_send_right); - } - } - - template - void unpack(Scheduler&& schdeuler, View& u) { - // Define submdspans for halo regions - const std::pair inner_x(1, u.extent(0) - 1); - const std::pair inner_y(1, u.extent(1) - 1); - const std::pair inner_z(1, u.extent(2) - 1); - - // Exchange in x direction - { - int i = 0; - auto ux_recv_left = stdex::submdspan(u, 0, inner_y, inner_z); - auto ux_recv_right = stdex::submdspan(u, u.extent(0) - 1, inner_y, inner_z); - - unpack_(schdeuler, ux_recv_left, ux_recv_right, recv_buffer(i)); - } - - // Exchange in y direction - { - int i = 1; - auto uy_recv_left = stdex::submdspan(u, inner_x, 0, inner_z); - auto uy_recv_right = stdex::submdspan(u, inner_x, u.extent(1) - 1, inner_z); - - unpack_(schdeuler, uy_recv_left, uy_recv_right, recv_buffer(i)); - } - - // Exchange in z direction - { - int i = 2; - auto uz_recv_left = stdex::submdspan(u, inner_x, inner_y, 0); - auto uz_recv_right = stdex::submdspan(u, inner_x, inner_y, u.extent(2) - 1); - - unpack_(schdeuler, uz_recv_left, uz_recv_right, recv_buffer(i)); - } - } - - template - void boundaryUpdate(const Config& conf, Scheduler&& schdeuler, View& u) { - // Define submdspans for halo regions - const std::pair inner_x(1, u.extent(0) - 1); - const std::pair inner_y(1, u.extent(1) - 1); - const std::pair inner_z(1, u.extent(2) - 1); - - // Exchange in x direction - { - int i = 0; - auto ux_recv_left = stdex::submdspan(u, 1, inner_y, inner_z); - auto ux_recv_right = stdex::submdspan(u, u.extent(0) - 2, inner_y, inner_z); - - boundaryUpdate_(conf, schdeuler, ux_recv_left, ux_recv_right, recv_buffer(i)); - } - - // Exchange in y direction - { - int i = 1; - auto uy_recv_left = stdex::submdspan(u, inner_x, 1, inner_z); - auto uy_recv_right = stdex::submdspan(u, inner_x, u.extent(1) - 2, inner_z); - - boundaryUpdate_(conf, schdeuler, uy_recv_left, uy_recv_right, recv_buffer(i)); - } - - // Exchange in z direction - { - int i = 2; - auto uz_recv_left = stdex::submdspan(u, inner_x, inner_y, 1); - auto uz_recv_right = stdex::submdspan(u, inner_x, inner_y, u.extent(2) - 2); - - boundaryUpdate_(conf, schdeuler, uz_recv_left, uz_recv_right, recv_buffer(i)); - } - } - void commP2P() { for(std::size_t i=0; i - void exchangeHalos(Scheduler&& schdeuler, View& u) { - // Define submdspans for halo regions - const std::pair inner_x(1, u.extent(0) - 1); - const std::pair inner_y(1, u.extent(1) - 1); - const std::pair inner_z(1, u.extent(2) - 1); - - // Exchange in x direction - { - int i = 0; - auto ux_send_left = stdex::submdspan(u, 1, inner_y, inner_z); - auto ux_send_right = stdex::submdspan(u, u.extent(0) - 2, inner_y, inner_z); - auto ux_recv_left = stdex::submdspan(u, 0, inner_y, inner_z); - auto ux_recv_right = stdex::submdspan(u, u.extent(0) - 1, inner_y, inner_z); - - pack_(schdeuler, send_buffer(i), ux_send_left, ux_send_right); - commP2P_(recv_buffer(i), send_buffer(i)); - unpack_(schdeuler, ux_recv_left, ux_recv_right, recv_buffer(i)); - } - - // Exchange in y direction - { - int i = 1; - auto uy_send_left = stdex::submdspan(u, inner_x, 1, inner_z); - auto uy_send_right = stdex::submdspan(u, inner_x, u.extent(1) - 2, inner_z); - auto uy_recv_left = stdex::submdspan(u, inner_x, 0, inner_z); - auto uy_recv_right = stdex::submdspan(u, inner_x, u.extent(1) - 1, inner_z); - - pack_(schdeuler, send_buffer(i), uy_send_left, uy_send_right); - commP2P_(recv_buffer(i), send_buffer(i)); - unpack_(schdeuler, uy_recv_left, uy_recv_right, recv_buffer(i)); - } - - // Exchange in z direction - { - int i = 2; - auto uz_send_left = stdex::submdspan(u, inner_x, inner_y, 1); - auto uz_send_right = stdex::submdspan(u, inner_x, inner_y, u.extent(2) - 2); - auto uz_recv_left = stdex::submdspan(u, inner_x, inner_y, 0); - auto uz_recv_right = stdex::submdspan(u, inner_x, inner_y, u.extent(2) - 1); - - pack_(schdeuler, send_buffer(i), uz_send_left, uz_send_right); - commP2P_(recv_buffer(i), send_buffer(i)); - unpack_(schdeuler, uz_recv_left, uz_recv_right, recv_buffer(i)); - } - } - private: template void commP2P_(HaloType& recv, HaloType& send) { @@ -407,48 +255,132 @@ class Comm { thrust::swap( send_right_vector, recv_left_vector ); } } +}; - template - void pack_(Scheduler&& scheduler, HaloType& send, const View& left, const View& right) { - auto left_buffer = send.left(); - auto right_buffer = send.right(); - const std::size_t n = left.size(); +/* Senders for boundary updates */ +template +stdexec::sender auto pack_sender(Sender&& sender, Scheduler&& scheduler, HaloType& send, const View& left, const View& right) { + auto left_buffer = send.left(); + auto right_buffer = send.right(); + const std::size_t n = left.size(); - assert( left.extents() == right.extents() ); - assert( left.extents() == left_buffer.extents() ); - assert( left.extents() == right_buffer.extents() ); + assert( left.extents() == right.extents() ); + assert( left.extents() == left_buffer.extents() ); + assert( left.extents() == right_buffer.extents() ); - auto pack_task = stdexec::just() | exec::on( scheduler, stdexec::bulk(n, copy_functor(left, right, left_buffer, right_buffer) ) ); - stdexec::sync_wait( std::move(pack_task) ); - } + return sender | exec::on( scheduler, stdexec::bulk(n, copy_functor(left, right, left_buffer, right_buffer) ) ); +} - template - void unpack_(Scheduler&& scheduler, View& left, View& right, HaloType& recv) { - const auto left_buffer = recv.left(); - const auto right_buffer = recv.right(); - const std::size_t n = left.size(); +template +stdexec::sender auto unpack_sender(Sender&& sender, Scheduler&& scheduler, View& left, View& right, HaloType& recv) { + const auto left_buffer = recv.left(); + const auto right_buffer = recv.right(); + const std::size_t n = left.size(); - assert( left.extents() == right.extents() ); - assert( left.extents() == left_buffer.extents() ); - assert( left.extents() == right_buffer.extents() ); + assert( left.extents() == right.extents() ); + assert( left.extents() == left_buffer.extents() ); + assert( left.extents() == right_buffer.extents() ); - auto unpack_task = stdexec::just() | exec::on( scheduler, stdexec::bulk(n, copy_functor(left_buffer, right_buffer, left, right) ) ); - stdexec::sync_wait( std::move(unpack_task) ); - } + return sender | exec::on( scheduler, stdexec::bulk(n, copy_functor(left_buffer, right_buffer, left, right) ) ); +} - template - void boundaryUpdate_(const Config& conf, Scheduler&& scheduler, View& left, View& right, HaloType& recv) { - const auto left_buffer = recv.left(); - const auto right_buffer = recv.right(); - const std::size_t n = left.size(); +template +stdexec::sender auto boundaryUpdate_sender(Sender&& sender, Scheduler&& scheduler, const Config& conf, View& left, View& right, HaloType& recv) { + const auto left_buffer = recv.left(); + const auto right_buffer = recv.right(); + const std::size_t n = left.size(); - assert( left.extents() == right.extents() ); - assert( left.extents() == left_buffer.extents() ); - assert( left.extents() == right_buffer.extents() ); + assert( left.extents() == right.extents() ); + assert( left.extents() == left_buffer.extents() ); + assert( left.extents() == right_buffer.extents() ); - auto update_task = stdexec::just() | exec::on( scheduler, stdexec::bulk(n, heat3d_boundary_functor(conf, left_buffer, right_buffer, left, right) ) ); - stdexec::sync_wait( std::move(update_task) ); - } -}; + return sender | exec::on( scheduler, stdexec::bulk(n, heat3d_boundary_functor(conf, left_buffer, right_buffer, left, right) ) ); +} + +template +stdexec::sender auto pack_all_sender(Sender&& sender, Scheduler&& scheduler, Comm& comm, View& u) { + // Define submdspans for halo regions + const std::pair inner_x(1, u.extent(0) - 1); + const std::pair inner_y(1, u.extent(1) - 1); + const std::pair inner_z(1, u.extent(2) - 1); + + int i = 0; + auto ux_send_left = stdex::submdspan(u, 1, inner_y, inner_z); + auto ux_send_right = stdex::submdspan(u, u.extent(0) - 2, inner_y, inner_z); + auto _pack_x_sender = pack_sender(sender, scheduler, comm.send_buffer(i), ux_send_left, ux_send_right); + + i = 1; + auto uy_send_left = stdex::submdspan(u, inner_x, 1, inner_z); + auto uy_send_right = stdex::submdspan(u, inner_x, u.extent(1) - 2, inner_z); + auto _pack_y_sender = pack_sender(sender, scheduler, comm.send_buffer(i), uy_send_left, uy_send_right); + + i = 2; + auto uz_send_left = stdex::submdspan(u, inner_x, inner_y, 1); + auto uz_send_right = stdex::submdspan(u, inner_x, inner_y, u.extent(2) - 2); + auto _pack_z_sender = pack_sender(sender, scheduler, comm.send_buffer(i), uz_send_left, uz_send_right); + + return stdexec::when_all( + std::move(_pack_x_sender), + std::move(_pack_y_sender), + std::move(_pack_z_sender) + ); +} + +template +stdexec::sender auto unpack_all_sender(Sender&& sender, Scheduler&& scheduler, Comm& comm, View& u) { + // Define submdspans for halo regions + const std::pair inner_x(1, u.extent(0) - 1); + const std::pair inner_y(1, u.extent(1) - 1); + const std::pair inner_z(1, u.extent(2) - 1); + + int i = 0; + auto ux_recv_left = stdex::submdspan(u, 0, inner_y, inner_z); + auto ux_recv_right = stdex::submdspan(u, u.extent(0) - 1, inner_y, inner_z); + auto _unpack_x_sender = unpack_sender(sender, scheduler, ux_recv_left, ux_recv_right, comm.recv_buffer(i)); + + i = 1; + auto uy_recv_left = stdex::submdspan(u, inner_x, 0, inner_z); + auto uy_recv_right = stdex::submdspan(u, inner_x, u.extent(1) - 1, inner_z); + auto _unpack_y_sender = unpack_sender(sender, scheduler, uy_recv_left, uy_recv_right, comm.recv_buffer(i)); + + i = 2; + auto uz_recv_left = stdex::submdspan(u, inner_x, inner_y, 0); + auto uz_recv_right = stdex::submdspan(u, inner_x, inner_y, u.extent(2) - 1); + auto _unpack_z_sender = unpack_sender(sender, scheduler, uz_recv_left, uz_recv_right, comm.recv_buffer(i)); + + return stdexec::when_all( + std::move(_unpack_x_sender), + std::move(_unpack_y_sender), + std::move(_unpack_z_sender) + ); +} + +template +stdexec::sender auto boundaryUpdate_all_sender(Sender&& sender, Scheduler&& schdeuler, const Config& conf, Comm& comm, View& u) { + // [Note] These operations must be done sequential, not with when_all + // Define submdspans for halo regions + const std::pair inner_x(1, u.extent(0) - 1); + const std::pair inner_y(1, u.extent(1) - 1); + const std::pair inner_z(1, u.extent(2) - 1); + + int i = 0; + auto ux_recv_left = stdex::submdspan(u, 1, inner_y, inner_z); + auto ux_recv_right = stdex::submdspan(u, u.extent(0) - 2, inner_y, inner_z); + auto _boundary_update_x_sender = boundaryUpdate_sender(sender, schdeuler, conf, ux_recv_left, ux_recv_right, comm.recv_buffer(i)); + + // Exchange in y direction + i = 1; + auto uy_recv_left = stdex::submdspan(u, inner_x, 1, inner_z); + auto uy_recv_right = stdex::submdspan(u, inner_x, u.extent(1) - 2, inner_z); + auto _boundary_update_y_sender = boundaryUpdate_sender(_boundary_update_x_sender, schdeuler, conf, uy_recv_left, uy_recv_right, comm.recv_buffer(i)); + + // Exchange in z direction + i = 2; + auto uz_recv_left = stdex::submdspan(u, inner_x, inner_y, 1); + auto uz_recv_right = stdex::submdspan(u, inner_x, inner_y, u.extent(2) - 2); + auto _boundary_update_z_sender = boundaryUpdate_sender(_boundary_update_y_sender, schdeuler, conf, uz_recv_left, uz_recv_right, comm.recv_buffer(i)); + + return _boundary_update_z_sender; +} #endif