From 9dac20f19e52f91aafbc91fd76d6f886bc87d791 Mon Sep 17 00:00:00 2001 From: Cyrus Harrison Date: Tue, 11 Jun 2024 08:23:25 -0700 Subject: [PATCH] mpi world comm audit and fixes (#1307) * audit and remove use of mpi world comm * changelog --- CHANGELOG.md | 3 ++ src/libs/apcomp/compositor.cpp | 6 ++-- .../apcomp_diy_partial_redistribute.hpp | 11 ++++--- .../diy/include/diy/mpi/communicator.hpp | 13 +++++++- .../ascent_runtime_rover_filters.cpp | 4 +++ .../ascent_runtime_vtkh_filters.cpp | 2 +- src/libs/rover/utils/rover_logging.cpp | 30 +++++++++++++++++-- src/libs/rover/utils/rover_logging.hpp | 11 +++++-- src/libs/rover/utils/vtk_dataset_reader.cpp | 9 +++--- src/libs/vtkh/StatisticsDB.hpp | 19 ++++++------ src/libs/vtkh/compositing/Compositor.cpp | 6 ++-- .../vtkh/compositing/PayloadCompositor.cpp | 3 +- .../diy/include/diy/mpi/communicator.hpp | 2 +- .../vtkh_diy_partial_redistribute.hpp | 11 ++++--- src/libs/vtkh/filters/Slice.cpp | 6 ++-- src/libs/vtkh/filters/UniformGrid.cpp | 14 ++++----- src/libs/vtkh/filters/avtParICAlgorithm.hpp | 1 + src/libs/vtkh/filters/avtParICAlgorithm.hxx | 16 +++++----- 18 files changed, 111 insertions(+), 56 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 707dcf502..4e892bf9f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,9 @@ and this project aspires to adhere to [Semantic Versioning](https://semver.org/s - Added a `topologies` option to the relay extract. This allows you to select which topologies are saved. This option can be used with the existing `fields` option, the result is the union of the selected topologies and fields. - Added `near_plane` and `far_plane` to the camera details provided in Ascent::info() +### Fixed +- Resolved a few cases where MPI_COMM_WORLD was used instead instead of the selected MPI communicator. + ## [0.9.3] - Released 2024-05-11 ### Preferred dependency versions for ascent@0.9.3 - conduit@0.9.1 diff --git a/src/libs/apcomp/compositor.cpp b/src/libs/apcomp/compositor.cpp index 205b0ca36..039feb42c 100644 --- a/src/libs/apcomp/compositor.cpp +++ b/src/libs/apcomp/compositor.cpp @@ -222,8 +222,7 @@ Compositor::CompositeZBufferSurface() // nothing to do here in serial. Images were composited as // they were added to the compositor #ifdef APCOMP_PARALLEL - apcompdiy::mpi::communicator diy_comm; - diy_comm = apcompdiy::mpi::communicator(MPI_Comm_f2c(mpi_comm())); + apcompdiy::mpi::communicator diy_comm(MPI_Comm_f2c(mpi_comm())); assert(m_images.size() == 1); RadixKCompositor compositor; @@ -243,8 +242,7 @@ Compositor::CompositeVisOrder() { #ifdef APCOMP_PARALLEL - apcompdiy::mpi::communicator diy_comm; - diy_comm = apcompdiy::mpi::communicator(MPI_Comm_f2c(mpi_comm())); + apcompdiy::mpi::communicator diy_comm(MPI_Comm_f2c(mpi_comm())); assert(m_images.size() != 0); DirectSendCompositor compositor; diff --git a/src/libs/apcomp/internal/apcomp_diy_partial_redistribute.hpp b/src/libs/apcomp/internal/apcomp_diy_partial_redistribute.hpp index e72136c2d..1e9df7179 100644 --- a/src/libs/apcomp/internal/apcomp_diy_partial_redistribute.hpp +++ b/src/libs/apcomp/internal/apcomp_diy_partial_redistribute.hpp @@ -26,9 +26,12 @@ template struct Redistribute { const apcompdiy::RegularDecomposer &m_decomposer; + MPI_Comm &m_comm; - Redistribute(const apcompdiy::RegularDecomposer &decomposer) - : m_decomposer(decomposer) + Redistribute(const apcompdiy::RegularDecomposer &decomposer, + MPI_Comm &comm) + : m_decomposer(decomposer), + m_comm(comm) {} void operator()(void *v_block, const apcompdiy::ReduceProxy &proxy) const @@ -81,7 +84,7 @@ struct Redistribute } // for } // else - MPI_Barrier(MPI_COMM_WORLD); //HACK + MPI_Barrier(m_comm); //HACK } // operator }; @@ -113,7 +116,7 @@ void redistribute_detail(std::vector &partia apcompdiy::RegularDecomposer decomposer(dims, global_bounds, num_blocks); decomposer.decompose(world.rank(), assigner, create); - apcompdiy::all_to_all(master, assigner, Redistribute(decomposer), magic_k); + apcompdiy::all_to_all(master, assigner, Redistribute(decomposer,comm), magic_k); } // diff --git a/src/libs/apcomp/internal/diy/include/diy/mpi/communicator.hpp b/src/libs/apcomp/internal/diy/include/diy/mpi/communicator.hpp index c1999e314..ee76959e1 100644 --- a/src/libs/apcomp/internal/diy/include/diy/mpi/communicator.hpp +++ b/src/libs/apcomp/internal/diy/include/diy/mpi/communicator.hpp @@ -8,8 +8,11 @@ namespace mpi class communicator { public: + inline + communicator(); + inline - communicator(MPI_Comm comm = MPI_COMM_WORLD, bool owner = false); + communicator(MPI_Comm comm, bool owner = false); ~communicator() { destroy(); } @@ -97,6 +100,14 @@ namespace mpi } } +apcompdiy::mpi::communicator:: +communicator(): + comm_(MPI_COMM_NULL), rank_(0), size_(1), owner_(false) +{ + // empty +} + + apcompdiy::mpi::communicator:: communicator(MPI_Comm comm, bool owner): comm_(comm), rank_(0), size_(1), owner_(owner) diff --git a/src/libs/ascent/runtimes/flow_filters/ascent_runtime_rover_filters.cpp b/src/libs/ascent/runtimes/flow_filters/ascent_runtime_rover_filters.cpp index 94f90c42e..0d91b68a6 100644 --- a/src/libs/ascent/runtimes/flow_filters/ascent_runtime_rover_filters.cpp +++ b/src/libs/ascent/runtimes/flow_filters/ascent_runtime_rover_filters.cpp @@ -40,6 +40,7 @@ #if defined(ASCENT_VTKM_ENABLED) #include +#include #include #include #include @@ -212,6 +213,9 @@ RoverXRay::execute() Rover tracer; #ifdef ASCENT_MPI_ENABLED int comm_id = flow::Workspace::default_mpi_comm(); + rover::Logger::get_instance()->set_mpi_comm_id(comm_id); + /// these use different styles of naming functions .... + rover::DataLogger::GetInstance()->set_mpi_comm_id(comm_id); tracer.set_mpi_comm_handle(comm_id); #endif diff --git a/src/libs/ascent/runtimes/flow_filters/ascent_runtime_vtkh_filters.cpp b/src/libs/ascent/runtimes/flow_filters/ascent_runtime_vtkh_filters.cpp index cf45984a5..affd30249 100644 --- a/src/libs/ascent/runtimes/flow_filters/ascent_runtime_vtkh_filters.cpp +++ b/src/libs/ascent/runtimes/flow_filters/ascent_runtime_vtkh_filters.cpp @@ -5178,7 +5178,7 @@ VTKHVTKFileExtract::execute() Node n_recv; conduit::relay::mpi::all_gather_using_schema(n_local_domain_ids, n_recv, - MPI_COMM_WORLD); + mpi_comm); n_global_domain_ids.set(DataType::index_t(num_global_domains)); n_global_domain_ids.print(); index_t_array global_vals = n_global_domain_ids.value(); diff --git a/src/libs/rover/utils/rover_logging.cpp b/src/libs/rover/utils/rover_logging.cpp index f189b8556..11c4d5673 100644 --- a/src/libs/rover/utils/rover_logging.cpp +++ b/src/libs/rover/utils/rover_logging.cpp @@ -23,7 +23,7 @@ Logger::Logger() log_name<<"rover"; #ifdef ROVER_PARALLEL int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_rank(MPI_Comm_f2c(get_mpi_comm_id()), &rank); log_name<<"_"< #include #include +#include namespace rover { -class Logger +class ROVER_API Logger { public: ~Logger(); static Logger *get_instance(); + void set_mpi_comm_id(int comm_id); + int get_mpi_comm_id(); void write(const int level, const std::string &message, const char *file, int line); std::ofstream & get_stream(); protected: Logger(); Logger(Logger const &); std::ofstream m_stream; + int m_mpi_comm_id; static class Logger* m_instance; }; -class DataLogger +class ROVER_API DataLogger { public: ~DataLogger(); static DataLogger *GetInstance(); + void set_mpi_comm_id(int comm_id); + int get_mpi_comm_id(); void OpenLogEntry(const std::string &entryName); void CloseLogEntry(const double &entryTime); @@ -49,6 +55,7 @@ class DataLogger std::stringstream Stream; static class DataLogger* Instance; std::stack Entries; + int m_mpi_comm_id; }; #ifdef ROVER_ENABLE_LOGGING diff --git a/src/libs/rover/utils/vtk_dataset_reader.cpp b/src/libs/rover/utils/vtk_dataset_reader.cpp index 032645ce6..07792b1af 100644 --- a/src/libs/rover/utils/vtk_dataset_reader.cpp +++ b/src/libs/rover/utils/vtk_dataset_reader.cpp @@ -4,6 +4,7 @@ // other details. No copyright assignment is required to contribute to Ascent. //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +#include #include #include #include @@ -98,10 +99,10 @@ MultiDomainVTKReader::read_file(const std::string &directory, const std::string // // figure out which data sets to read // - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - int num_ranks; - MPI_Comm_size(MPI_COMM_WORLD, &num_ranks); + int rank, num_ranks; + MPI_Comm comm = MPI_Comm_f2c(Logger::get_instance()->get_mpi_comm_id()); + MPI_Comm_rank(comm, &rank); + MPI_Comm_size(comm, &num_ranks); if(rank == 0) { std::cout<<"Num ranks "<tmp(nProcs,0.0); tmp[rank] = vals[i]; - MPI_Reduce(&tmp[0], &res[0], nProcs, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(&tmp[0], &res[0], nProcs, MPI_FLOAT, MPI_SUM, 0, mpi_comm); } if (rank == 0) timerStats[it->first] = statValue(res); @@ -300,7 +301,7 @@ class VTKH_API StatisticsDB { std::vector tmp(nProcs,0); tmp[rank] = vals[i]; - MPI_Reduce(&tmp[0], &res[0], nProcs, MPI_UNSIGNED_LONG, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(&tmp[0], &res[0], nProcs, MPI_UNSIGNED_LONG, MPI_SUM, 0, mpi_comm); } if (rank == 0) counterStats[it->first] = statValue(res); @@ -334,9 +335,9 @@ class VTKH_API StatisticsDB float allMin, allMax; int allMaxSz; - MPI_Allreduce(&myMin, &allMin, 1, MPI_FLOAT, MPI_MIN, MPI_COMM_WORLD); - MPI_Allreduce(&myMax, &allMax, 1, MPI_FLOAT, MPI_MAX, MPI_COMM_WORLD); - MPI_Allreduce(&myMaxSz, &allMaxSz, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + MPI_Allreduce(&myMin, &allMin, 1, MPI_FLOAT, MPI_MIN, mpi_comm); + MPI_Allreduce(&myMax, &allMax, 1, MPI_FLOAT, MPI_MAX, mpi_comm); + MPI_Allreduce(&myMaxSz, &allMaxSz, 1, MPI_INT, MPI_MAX, mpi_comm); int buffSz = allMaxSz*2 + 1, tag = 0; for (it = events.begin(); it != events.end(); it++) @@ -354,7 +355,7 @@ class VTKH_API StatisticsDB for (int i = 1; i < nProcs; i++) { MPI_Status status; - MPI_Recv(&buff[0], buffSz, MPI_FLOAT, i, tag, MPI_COMM_WORLD, &status); + MPI_Recv(&buff[0], buffSz, MPI_FLOAT, i, tag, mpi_comm, &status); int n = int(buff[0]); for (int j = 0; j < n; j+=2) eventStats[i][it->first].history.push_back(std::make_pair(buff[1+j], buff[1+j+1])); @@ -373,7 +374,7 @@ class VTKH_API StatisticsDB evData[1+j*2+0] = it->second.history[j].first; evData[1+j*2+1] = it->second.history[j].second; } - MPI_Send(&evData[0], buffSz, MPI_FLOAT, 0, tag, MPI_COMM_WORLD); + MPI_Send(&evData[0], buffSz, MPI_FLOAT, 0, tag, mpi_comm); } } } diff --git a/src/libs/vtkh/compositing/Compositor.cpp b/src/libs/vtkh/compositing/Compositor.cpp index 93971f705..a750b574c 100644 --- a/src/libs/vtkh/compositing/Compositor.cpp +++ b/src/libs/vtkh/compositing/Compositor.cpp @@ -202,8 +202,7 @@ Compositor::CompositeZBufferSurface() // nothing to do here in serial. Images were composited as // they were added to the compositor #ifdef VTKH_PARALLEL - vtkhdiy::mpi::communicator diy_comm; - diy_comm = vtkhdiy::mpi::communicator(MPI_Comm_f2c(GetMPICommHandle())); + vtkhdiy::mpi::communicator diy_comm(MPI_Comm_f2c(GetMPICommHandle())); assert(m_images.size() == 1); RadixKCompositor compositor; @@ -223,8 +222,7 @@ Compositor::CompositeVisOrder() { #ifdef VTKH_PARALLEL - vtkhdiy::mpi::communicator diy_comm; - diy_comm = vtkhdiy::mpi::communicator(MPI_Comm_f2c(GetMPICommHandle())); + vtkhdiy::mpi::communicator diy_comm(MPI_Comm_f2c(GetMPICommHandle())); assert(m_images.size() != 0); DirectSendCompositor compositor; diff --git a/src/libs/vtkh/compositing/PayloadCompositor.cpp b/src/libs/vtkh/compositing/PayloadCompositor.cpp index 38a3ac60b..3f7a341ee 100644 --- a/src/libs/vtkh/compositing/PayloadCompositor.cpp +++ b/src/libs/vtkh/compositing/PayloadCompositor.cpp @@ -51,8 +51,7 @@ PayloadCompositor::Composite() // nothing to do here in serial. Images were composited as // they were added to the compositor #ifdef VTKH_PARALLEL - vtkhdiy::mpi::communicator diy_comm; - diy_comm = vtkhdiy::mpi::communicator(MPI_Comm_f2c(GetMPICommHandle())); + vtkhdiy::mpi::communicator diy_comm(MPI_Comm_f2c(GetMPICommHandle())); assert(m_images.size() == 1); RadixKCompositor compositor; diff --git a/src/libs/vtkh/compositing/internal/diy/include/diy/mpi/communicator.hpp b/src/libs/vtkh/compositing/internal/diy/include/diy/mpi/communicator.hpp index f1d9a1e19..a2ad9be08 100644 --- a/src/libs/vtkh/compositing/internal/diy/include/diy/mpi/communicator.hpp +++ b/src/libs/vtkh/compositing/internal/diy/include/diy/mpi/communicator.hpp @@ -8,7 +8,7 @@ namespace mpi class communicator { public: - communicator(MPI_Comm comm = MPI_COMM_WORLD): + communicator(MPI_Comm comm): comm_(comm) { MPI_Comm_rank(comm_, &rank_); MPI_Comm_size(comm_, &size_); } int rank() const { return rank_; } diff --git a/src/libs/vtkh/compositing/vtkh_diy_partial_redistribute.hpp b/src/libs/vtkh/compositing/vtkh_diy_partial_redistribute.hpp index fc4614ff1..a0a73623d 100644 --- a/src/libs/vtkh/compositing/vtkh_diy_partial_redistribute.hpp +++ b/src/libs/vtkh/compositing/vtkh_diy_partial_redistribute.hpp @@ -59,9 +59,12 @@ template struct Redistribute { const vtkhdiy::RegularDecomposer &m_decomposer; + MPI_Comm &m_comm; - Redistribute(const vtkhdiy::RegularDecomposer &decomposer) - : m_decomposer(decomposer) + Redistribute(const vtkhdiy::RegularDecomposer &decomposer, + MPI_Comm &comm) + : m_decomposer(decomposer), + m_comm(comm) {} void operator()(void *v_block, const vtkhdiy::ReduceProxy &proxy) const @@ -113,7 +116,7 @@ struct Redistribute } // for } // else - MPI_Barrier(MPI_COMM_WORLD); //HACK + MPI_Barrier(m_comm); //HACK } // operator }; @@ -145,7 +148,7 @@ void redistribute_detail(std::vector &partia const int dims = 1; vtkhdiy::RegularDecomposer decomposer(dims, global_bounds, num_blocks); decomposer.decompose(world.rank(), assigner, create); - vtkhdiy::all_to_all(master, assigner, Redistribute(decomposer), magic_k); + vtkhdiy::all_to_all(master, assigner, Redistribute(decomposer,comm), magic_k); } // diff --git a/src/libs/vtkh/filters/Slice.cpp b/src/libs/vtkh/filters/Slice.cpp index 97c545209..93be63004 100644 --- a/src/libs/vtkh/filters/Slice.cpp +++ b/src/libs/vtkh/filters/Slice.cpp @@ -644,7 +644,7 @@ AutoSliceLevels::DoExecute() float datafield_min = 0.; #if ASCENT_MPI_ENABLED - + MPI_Comm mpi_comm = MPI_Comm_f2c(vtkh::GetMPICommHandle()); float local_datafield_max = 0.; float local_datafield_min = 0.; @@ -653,8 +653,8 @@ AutoSliceLevels::DoExecute() local_datafield_max = (float)*max_element(field_data.begin(),field_data.end()); local_datafield_min = (float)*min_element(field_data.begin(),field_data.end()); } - MPI_Reduce(&local_datafield_max, &datafield_max, 1, MPI_FLOAT, MPI_MAX, 0, MPI_COMM_WORLD); - MPI_Reduce(&local_datafield_min, &datafield_min, 1, MPI_FLOAT, MPI_MIN, 0, MPI_COMM_WORLD); + MPI_Reduce(&local_datafield_max, &datafield_max, 1, MPI_FLOAT, MPI_MAX, 0, mpi_comm); + MPI_Reduce(&local_datafield_min, &datafield_min, 1, MPI_FLOAT, MPI_MIN, 0, mpi_comm); #else diff --git a/src/libs/vtkh/filters/UniformGrid.cpp b/src/libs/vtkh/filters/UniformGrid.cpp index b9b13b09a..16211cf1f 100644 --- a/src/libs/vtkh/filters/UniformGrid.cpp +++ b/src/libs/vtkh/filters/UniformGrid.cpp @@ -216,8 +216,8 @@ class GlobalReduceField } } - MPI_Reduce(l_mask.data(), g_mask.data(), num_points, MPI_INT, MPI_LAND, 0, MPI_COMM_WORLD); - MPI_Reduce(l_valid.data(), g_valid.data(), num_points, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(l_mask.data(), g_mask.data(), num_points, MPI_INT, MPI_LAND, 0, mpi_comm); + MPI_Reduce(l_valid.data(), g_valid.data(), num_points, MPI_INT, MPI_SUM, 0, mpi_comm); ////send to root process if(uah_field.CanConvert()) @@ -235,7 +235,7 @@ class GlobalReduceField } } - MPI_Reduce(local_field, global_field.data(), num_points, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(local_field, global_field.data(), num_points, MPI_INT, MPI_SUM, 0, mpi_comm); if(par_rank == 0) { @@ -277,7 +277,7 @@ class GlobalReduceField ah_field.WritePortal().Set(i,0); } - MPI_Reduce(local_field, global_field.data(), num_points, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(local_field, global_field.data(), num_points, MPI_FLOAT, MPI_SUM, 0, mpi_comm); if(par_rank == 0) { @@ -317,7 +317,7 @@ class GlobalReduceField } double * local_field = GetVTKMPointer(ah_field); std::vector global_field(num_points,0); - MPI_Reduce(local_field, global_field.data(), num_points, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(local_field, global_field.data(), num_points, MPI_DOUBLE, MPI_SUM, 0, mpi_comm); if(par_rank == 0) { @@ -365,8 +365,8 @@ class GlobalReduceField local_y_points[i] = ah_field.ReadPortal().Get(i)[1]; } - MPI_Reduce(local_x_points.data(), global_x_points.data(), num_points, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD); - MPI_Reduce(local_y_points.data(), global_y_points.data(), num_points, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(local_x_points.data(), global_x_points.data(), num_points, MPI_FLOAT, MPI_SUM, 0, mpi_comm); + MPI_Reduce(local_y_points.data(), global_y_points.data(), num_points, MPI_FLOAT, MPI_SUM, 0, mpi_comm); if(par_rank == 0) { diff --git a/src/libs/vtkh/filters/avtParICAlgorithm.hpp b/src/libs/vtkh/filters/avtParICAlgorithm.hpp index a970dd59e..bae837b9e 100644 --- a/src/libs/vtkh/filters/avtParICAlgorithm.hpp +++ b/src/libs/vtkh/filters/avtParICAlgorithm.hpp @@ -83,6 +83,7 @@ class avtParICAlgorithm typedef std::map::iterator bufferIterator; typedef std::map>::iterator packetIterator; + MPI_Comm comm; int rank, nProcs; std::map sendBuffers, recvBuffers; diff --git a/src/libs/vtkh/filters/avtParICAlgorithm.hxx b/src/libs/vtkh/filters/avtParICAlgorithm.hxx index d3bff5d11..9257d395a 100644 --- a/src/libs/vtkh/filters/avtParICAlgorithm.hxx +++ b/src/libs/vtkh/filters/avtParICAlgorithm.hxx @@ -2,15 +2,15 @@ #include #include "MemStream.h" #include -//#include "avtParICAlgorithm.h" using namespace std; -avtParICAlgorithm::avtParICAlgorithm(MPI_Comm comm) +avtParICAlgorithm::avtParICAlgorithm(MPI_Comm _comm): +: comm(_comm), + msgID(0) { - MPI_Comm_size(comm, &nProcs); - MPI_Comm_rank(comm, &rank); - msgID = 0; + MPI_Comm_size(transaction_safe_dynamic, &nProcs); + MPI_Comm_rank(_comm, &rank); } void @@ -92,9 +92,9 @@ avtParICAlgorithm::PostRecv(int tag, int sz, int src) MPI_Request req; if (src == -1) - MPI_Irecv(buff, sz, MPI_BYTE, MPI_ANY_SOURCE, tag, MPI_COMM_WORLD, &req); + MPI_Irecv(buff, sz, MPI_BYTE, MPI_ANY_SOURCE, tag, comm, &req); else - MPI_Irecv(buff, sz, MPI_BYTE, src, tag, MPI_COMM_WORLD, &req); + MPI_Irecv(buff, sz, MPI_BYTE, src, tag, comm, &req); RequestTagPair entry(req, tag); recvBuffers[entry] = buff; @@ -220,7 +220,7 @@ avtParICAlgorithm::SendData(int dst, int tag, MemStream *buff) memcpy(&header, bufferList[i], sizeof(header)); MPI_Request req; int err = MPI_Isend(bufferList[i], header.packetSz, MPI_BYTE, dst, - tag, MPI_COMM_WORLD, &req); + tag, comm, &req); if (err != MPI_SUCCESS) { cerr << "Err with MPI_Isend in PARIC algorithm" << endl;