Skip to content

Commit

Permalink
mpi world comm audit and fixes (#1307)
Browse files Browse the repository at this point in the history
* audit and remove use of mpi world comm

* changelog
  • Loading branch information
cyrush committed Jun 11, 2024
1 parent 0158038 commit 9dac20f
Show file tree
Hide file tree
Showing 18 changed files with 111 additions and 56 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ and this project aspires to adhere to [Semantic Versioning](https://semver.org/s
- Added a `topologies` option to the relay extract. This allows you to select which topologies are saved. This option can be used with the existing `fields` option, the result is the union of the selected topologies and fields.
- Added `near_plane` and `far_plane` to the camera details provided in Ascent::info()

### Fixed
- Resolved a few cases where MPI_COMM_WORLD was used instead instead of the selected MPI communicator.

## [0.9.3] - Released 2024-05-11
### Preferred dependency versions for [email protected]
- [email protected]
Expand Down
6 changes: 2 additions & 4 deletions src/libs/apcomp/compositor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -222,8 +222,7 @@ Compositor::CompositeZBufferSurface()
// nothing to do here in serial. Images were composited as
// they were added to the compositor
#ifdef APCOMP_PARALLEL
apcompdiy::mpi::communicator diy_comm;
diy_comm = apcompdiy::mpi::communicator(MPI_Comm_f2c(mpi_comm()));
apcompdiy::mpi::communicator diy_comm(MPI_Comm_f2c(mpi_comm()));

assert(m_images.size() == 1);
RadixKCompositor compositor;
Expand All @@ -243,8 +242,7 @@ Compositor::CompositeVisOrder()
{

#ifdef APCOMP_PARALLEL
apcompdiy::mpi::communicator diy_comm;
diy_comm = apcompdiy::mpi::communicator(MPI_Comm_f2c(mpi_comm()));
apcompdiy::mpi::communicator diy_comm(MPI_Comm_f2c(mpi_comm()));

assert(m_images.size() != 0);
DirectSendCompositor compositor;
Expand Down
11 changes: 7 additions & 4 deletions src/libs/apcomp/internal/apcomp_diy_partial_redistribute.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,12 @@ template<typename BlockType>
struct Redistribute
{
const apcompdiy::RegularDecomposer<apcompdiy::DiscreteBounds> &m_decomposer;
MPI_Comm &m_comm;

Redistribute(const apcompdiy::RegularDecomposer<apcompdiy::DiscreteBounds> &decomposer)
: m_decomposer(decomposer)
Redistribute(const apcompdiy::RegularDecomposer<apcompdiy::DiscreteBounds> &decomposer,
MPI_Comm &comm)
: m_decomposer(decomposer),
m_comm(comm)
{}

void operator()(void *v_block, const apcompdiy::ReduceProxy &proxy) const
Expand Down Expand Up @@ -81,7 +84,7 @@ struct Redistribute
} // for

} // else
MPI_Barrier(MPI_COMM_WORLD); //HACK
MPI_Barrier(m_comm); //HACK
} // operator
};

Expand Down Expand Up @@ -113,7 +116,7 @@ void redistribute_detail(std::vector<typename AddBlockType::PartialType> &partia

apcompdiy::RegularDecomposer<apcompdiy::DiscreteBounds> decomposer(dims, global_bounds, num_blocks);
decomposer.decompose(world.rank(), assigner, create);
apcompdiy::all_to_all(master, assigner, Redistribute<Block>(decomposer), magic_k);
apcompdiy::all_to_all(master, assigner, Redistribute<Block>(decomposer,comm), magic_k);
}

//
Expand Down
13 changes: 12 additions & 1 deletion src/libs/apcomp/internal/diy/include/diy/mpi/communicator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,11 @@ namespace mpi
class communicator
{
public:
inline
communicator();

inline
communicator(MPI_Comm comm = MPI_COMM_WORLD, bool owner = false);
communicator(MPI_Comm comm, bool owner = false);

~communicator() { destroy(); }

Expand Down Expand Up @@ -97,6 +100,14 @@ namespace mpi
}
}

apcompdiy::mpi::communicator::
communicator():
comm_(MPI_COMM_NULL), rank_(0), size_(1), owner_(false)
{
// empty
}


apcompdiy::mpi::communicator::
communicator(MPI_Comm comm, bool owner):
comm_(comm), rank_(0), size_(1), owner_(owner)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@

#if defined(ASCENT_VTKM_ENABLED)
#include <rover.hpp>
#include <rover/utils/rover_logging.hpp>
#include <ray_generators/camera_generator.hpp>
#include <vtkh/vtkh.hpp>
#include <vtkh/DataSet.hpp>
Expand Down Expand Up @@ -212,6 +213,9 @@ RoverXRay::execute()
Rover tracer;
#ifdef ASCENT_MPI_ENABLED
int comm_id = flow::Workspace::default_mpi_comm();
rover::Logger::get_instance()->set_mpi_comm_id(comm_id);
/// these use different styles of naming functions ....
rover::DataLogger::GetInstance()->set_mpi_comm_id(comm_id);
tracer.set_mpi_comm_handle(comm_id);
#endif

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5178,7 +5178,7 @@ VTKHVTKFileExtract::execute()
Node n_recv;
conduit::relay::mpi::all_gather_using_schema(n_local_domain_ids,
n_recv,
MPI_COMM_WORLD);
mpi_comm);
n_global_domain_ids.set(DataType::index_t(num_global_domains));
n_global_domain_ids.print();
index_t_array global_vals = n_global_domain_ids.value();
Expand Down
30 changes: 28 additions & 2 deletions src/libs/rover/utils/rover_logging.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ Logger::Logger()
log_name<<"rover";
#ifdef ROVER_PARALLEL
int rank;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_rank(MPI_Comm_f2c(get_mpi_comm_id()), &rank);
log_name<<"_"<<rank;
#endif
log_name<<".log";
Expand All @@ -45,6 +45,20 @@ Logger* Logger::get_instance()
return m_instance;
}

void
Logger::set_mpi_comm_id(int comm_id)
{
m_mpi_comm_id = comm_id;
}

int
Logger::get_mpi_comm_id()
{
return m_mpi_comm_id;
}



std::ofstream& Logger::get_stream()
{
return m_stream;
Expand Down Expand Up @@ -75,6 +89,18 @@ DataLogger::~DataLogger()
Stream.str("");
}

void
DataLogger::set_mpi_comm_id(int comm_id)
{
m_mpi_comm_id = comm_id;
}

int
DataLogger::get_mpi_comm_id()
{
return m_mpi_comm_id;
}

DataLogger*
DataLogger::GetInstance()
{
Expand All @@ -99,7 +125,7 @@ DataLogger::WriteLog()
log_name<<"rover_data";
#ifdef ROVER_PARALLEL
int rank;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_rank(MPI_Comm_f2c(get_mpi_comm_id()), &rank);
log_name<<"_"<<rank;
#endif
log_name<<".log";
Expand Down
11 changes: 9 additions & 2 deletions src/libs/rover/utils/rover_logging.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,28 +10,34 @@
#include <fstream>
#include <stack>
#include <sstream>
#include <rover_exports.h>

namespace rover {

class Logger
class ROVER_API Logger
{
public:
~Logger();
static Logger *get_instance();
void set_mpi_comm_id(int comm_id);
int get_mpi_comm_id();
void write(const int level, const std::string &message, const char *file, int line);
std::ofstream & get_stream();
protected:
Logger();
Logger(Logger const &);
std::ofstream m_stream;
int m_mpi_comm_id;
static class Logger* m_instance;
};

class DataLogger
class ROVER_API DataLogger
{
public:
~DataLogger();
static DataLogger *GetInstance();
void set_mpi_comm_id(int comm_id);
int get_mpi_comm_id();
void OpenLogEntry(const std::string &entryName);
void CloseLogEntry(const double &entryTime);

Expand All @@ -49,6 +55,7 @@ class DataLogger
std::stringstream Stream;
static class DataLogger* Instance;
std::stack<std::string> Entries;
int m_mpi_comm_id;
};

#ifdef ROVER_ENABLE_LOGGING
Expand Down
9 changes: 5 additions & 4 deletions src/libs/rover/utils/vtk_dataset_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
// other details. No copyright assignment is required to contribute to Ascent.
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//

#include <utils/rover_logging.hpp>
#include <utils/vtk_dataset_reader.hpp>
#include <vtkm/io/VTKDataSetReader.h>
#include <iostream>
Expand Down Expand Up @@ -98,10 +99,10 @@ MultiDomainVTKReader::read_file(const std::string &directory, const std::string
//
// figure out which data sets to read
//
int rank;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
int num_ranks;
MPI_Comm_size(MPI_COMM_WORLD, &num_ranks);
int rank, num_ranks;
MPI_Comm comm = MPI_Comm_f2c(Logger::get_instance()->get_mpi_comm_id());
MPI_Comm_rank(comm, &rank);
MPI_Comm_size(comm, &num_ranks);
if(rank == 0)
{
std::cout<<"Num ranks "<<num_ranks<<"\n";
Expand Down
19 changes: 10 additions & 9 deletions src/libs/vtkh/StatisticsDB.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -251,9 +251,10 @@ class VTKH_API StatisticsDB
return;

#ifdef VTKH_PARALLEL
MPI_Comm mpi_comm = MPI_Comm_f2c(vtkh::GetMPICommHandle());
int rank, nProcs;
MPI_Comm_size(MPI_COMM_WORLD, &nProcs);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(mpi_comm, &nProcs);
MPI_Comm_rank(mpi_comm, &rank);

int sz = 0;

Expand All @@ -275,7 +276,7 @@ class VTKH_API StatisticsDB
{
std::vector<float>tmp(nProcs,0.0);
tmp[rank] = vals[i];
MPI_Reduce(&tmp[0], &res[0], nProcs, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD);
MPI_Reduce(&tmp[0], &res[0], nProcs, MPI_FLOAT, MPI_SUM, 0, mpi_comm);
}
if (rank == 0)
timerStats[it->first] = statValue<float>(res);
Expand All @@ -300,7 +301,7 @@ class VTKH_API StatisticsDB
{
std::vector<unsigned long> tmp(nProcs,0);
tmp[rank] = vals[i];
MPI_Reduce(&tmp[0], &res[0], nProcs, MPI_UNSIGNED_LONG, MPI_SUM, 0, MPI_COMM_WORLD);
MPI_Reduce(&tmp[0], &res[0], nProcs, MPI_UNSIGNED_LONG, MPI_SUM, 0, mpi_comm);
}
if (rank == 0)
counterStats[it->first] = statValue<unsigned long>(res);
Expand Down Expand Up @@ -334,9 +335,9 @@ class VTKH_API StatisticsDB

float allMin, allMax;
int allMaxSz;
MPI_Allreduce(&myMin, &allMin, 1, MPI_FLOAT, MPI_MIN, MPI_COMM_WORLD);
MPI_Allreduce(&myMax, &allMax, 1, MPI_FLOAT, MPI_MAX, MPI_COMM_WORLD);
MPI_Allreduce(&myMaxSz, &allMaxSz, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
MPI_Allreduce(&myMin, &allMin, 1, MPI_FLOAT, MPI_MIN, mpi_comm);
MPI_Allreduce(&myMax, &allMax, 1, MPI_FLOAT, MPI_MAX, mpi_comm);
MPI_Allreduce(&myMaxSz, &allMaxSz, 1, MPI_INT, MPI_MAX, mpi_comm);

int buffSz = allMaxSz*2 + 1, tag = 0;
for (it = events.begin(); it != events.end(); it++)
Expand All @@ -354,7 +355,7 @@ class VTKH_API StatisticsDB
for (int i = 1; i < nProcs; i++)
{
MPI_Status status;
MPI_Recv(&buff[0], buffSz, MPI_FLOAT, i, tag, MPI_COMM_WORLD, &status);
MPI_Recv(&buff[0], buffSz, MPI_FLOAT, i, tag, mpi_comm, &status);
int n = int(buff[0]);
for (int j = 0; j < n; j+=2)
eventStats[i][it->first].history.push_back(std::make_pair(buff[1+j], buff[1+j+1]));
Expand All @@ -373,7 +374,7 @@ class VTKH_API StatisticsDB
evData[1+j*2+0] = it->second.history[j].first;
evData[1+j*2+1] = it->second.history[j].second;
}
MPI_Send(&evData[0], buffSz, MPI_FLOAT, 0, tag, MPI_COMM_WORLD);
MPI_Send(&evData[0], buffSz, MPI_FLOAT, 0, tag, mpi_comm);
}
}
}
Expand Down
6 changes: 2 additions & 4 deletions src/libs/vtkh/compositing/Compositor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -202,8 +202,7 @@ Compositor::CompositeZBufferSurface()
// nothing to do here in serial. Images were composited as
// they were added to the compositor
#ifdef VTKH_PARALLEL
vtkhdiy::mpi::communicator diy_comm;
diy_comm = vtkhdiy::mpi::communicator(MPI_Comm_f2c(GetMPICommHandle()));
vtkhdiy::mpi::communicator diy_comm(MPI_Comm_f2c(GetMPICommHandle()));

assert(m_images.size() == 1);
RadixKCompositor compositor;
Expand All @@ -223,8 +222,7 @@ Compositor::CompositeVisOrder()
{

#ifdef VTKH_PARALLEL
vtkhdiy::mpi::communicator diy_comm;
diy_comm = vtkhdiy::mpi::communicator(MPI_Comm_f2c(GetMPICommHandle()));
vtkhdiy::mpi::communicator diy_comm(MPI_Comm_f2c(GetMPICommHandle()));

assert(m_images.size() != 0);
DirectSendCompositor compositor;
Expand Down
3 changes: 1 addition & 2 deletions src/libs/vtkh/compositing/PayloadCompositor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,7 @@ PayloadCompositor::Composite()
// nothing to do here in serial. Images were composited as
// they were added to the compositor
#ifdef VTKH_PARALLEL
vtkhdiy::mpi::communicator diy_comm;
diy_comm = vtkhdiy::mpi::communicator(MPI_Comm_f2c(GetMPICommHandle()));
vtkhdiy::mpi::communicator diy_comm(MPI_Comm_f2c(GetMPICommHandle()));

assert(m_images.size() == 1);
RadixKCompositor compositor;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ namespace mpi
class communicator
{
public:
communicator(MPI_Comm comm = MPI_COMM_WORLD):
communicator(MPI_Comm comm):
comm_(comm) { MPI_Comm_rank(comm_, &rank_); MPI_Comm_size(comm_, &size_); }

int rank() const { return rank_; }
Expand Down
11 changes: 7 additions & 4 deletions src/libs/vtkh/compositing/vtkh_diy_partial_redistribute.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,12 @@ template<typename BlockType>
struct Redistribute
{
const vtkhdiy::RegularDecomposer<vtkhdiy::DiscreteBounds> &m_decomposer;
MPI_Comm &m_comm;

Redistribute(const vtkhdiy::RegularDecomposer<vtkhdiy::DiscreteBounds> &decomposer)
: m_decomposer(decomposer)
Redistribute(const vtkhdiy::RegularDecomposer<vtkhdiy::DiscreteBounds> &decomposer,
MPI_Comm &comm)
: m_decomposer(decomposer),
m_comm(comm)
{}

void operator()(void *v_block, const vtkhdiy::ReduceProxy &proxy) const
Expand Down Expand Up @@ -113,7 +116,7 @@ struct Redistribute
} // for

} // else
MPI_Barrier(MPI_COMM_WORLD); //HACK
MPI_Barrier(m_comm); //HACK
} // operator
};

Expand Down Expand Up @@ -145,7 +148,7 @@ void redistribute_detail(std::vector<typename AddBlockType::PartialType> &partia
const int dims = 1;
vtkhdiy::RegularDecomposer<vtkhdiy::DiscreteBounds> decomposer(dims, global_bounds, num_blocks);
decomposer.decompose(world.rank(), assigner, create);
vtkhdiy::all_to_all(master, assigner, Redistribute<Block>(decomposer), magic_k);
vtkhdiy::all_to_all(master, assigner, Redistribute<Block>(decomposer,comm), magic_k);
}

//
Expand Down
6 changes: 3 additions & 3 deletions src/libs/vtkh/filters/Slice.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -644,7 +644,7 @@ AutoSliceLevels::DoExecute()
float datafield_min = 0.;

#if ASCENT_MPI_ENABLED

MPI_Comm mpi_comm = MPI_Comm_f2c(vtkh::GetMPICommHandle());
float local_datafield_max = 0.;
float local_datafield_min = 0.;

Expand All @@ -653,8 +653,8 @@ AutoSliceLevels::DoExecute()
local_datafield_max = (float)*max_element(field_data.begin(),field_data.end());
local_datafield_min = (float)*min_element(field_data.begin(),field_data.end());
}
MPI_Reduce(&local_datafield_max, &datafield_max, 1, MPI_FLOAT, MPI_MAX, 0, MPI_COMM_WORLD);
MPI_Reduce(&local_datafield_min, &datafield_min, 1, MPI_FLOAT, MPI_MIN, 0, MPI_COMM_WORLD);
MPI_Reduce(&local_datafield_max, &datafield_max, 1, MPI_FLOAT, MPI_MAX, 0, mpi_comm);
MPI_Reduce(&local_datafield_min, &datafield_min, 1, MPI_FLOAT, MPI_MIN, 0, mpi_comm);

#else

Expand Down
Loading

0 comments on commit 9dac20f

Please sign in to comment.