Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Resolve "Use MPI utility functions to do parallel decomposition" #203

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/FieldLayout/FieldLayout.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ namespace ippl {

detail::Partitioner<Dim> partition;

partition.split(domain, hLocalDomains_m, requestedLayout_m, nRanks);
partition.split(Ippl::Comm.get(), domain, hLocalDomains_m, requestedLayout_m, nRanks);

findNeighbors();

Expand Down
13 changes: 11 additions & 2 deletions src/Partition/Partitioner.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,20 +18,29 @@
#ifndef IPPL_PARTITIONER_H
#define IPPL_PARTITIONER_H

#include <utility>

#include "Communicate/Communicate.h"
#include "Index/NDIndex.h"

namespace ippl {
namespace detail {

template <unsigned Dim>
class Partitioner {
private:
using pair_type = std::pair<int, int>;

public:
Partitioner() = default;
~Partitioner() = default;

template <typename view_type>
void split(const NDIndex<Dim>& domain, view_type& view, e_dim_tag* decomp,
int nSplits) const;
void split(Communicate* communicate, const NDIndex<Dim>& domain, view_type& view,
e_dim_tag* decomp, int nSplits) const;

private:
pair_type getLocalBounds(int nglobal, int coords, int dims) const;
};
} // namespace detail
} // namespace ippl
Expand Down
129 changes: 36 additions & 93 deletions src/Partition/Partitioner.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,114 +25,57 @@ namespace ippl {

template <unsigned Dim>
template <typename view_type>
void Partitioner<Dim>::split(const NDIndex<Dim>& domain, view_type& view, e_dim_tag* decomp,
void Partitioner<Dim>::split(Communicate* communicate, const NDIndex<Dim>& domain,
view_type& /*view*/, e_dim_tag* /*decomp*/,
int nSplits) const {
using NDIndex_t = NDIndex<Dim>;
// using NDIndex_t = NDIndex<Dim>;

// Recursively split the domain until we have generated all the domains.
std::vector<NDIndex_t> domains_c(nSplits);
NDIndex_t leftDomain;
int dims[Dim];

// Start with the whole domain.
domains_c[0] = domain;
int v;
unsigned int d = 0;
MPI_Dims_create(nSplits, Dim, dims);

int v1, v2, rm, vtot, vl, vr;
double a, lmax, len;
const int periods[Dim] = {false, false, false};
int reorder = false;

for (v = nSplits, rm = 0; v > 1; v /= 2) {
rm += (v % 2);
}
MPI_Comm cart;

MPI_Comm* world = communicate->getCommunicator();

if (rm == 0) {
// nSplits is a power of 2
MPI_Cart_create(*world, Dim, dims, periods, reorder, &cart);

std::vector<NDIndex_t> copy_c(nSplits);
int rank = -1;
MPI_Comm_rank(cart, &rank);

for (v = 1; v < nSplits; v *= 2) {
// Go to the next parallel dimension.
while (decomp[d] != PARALLEL)
if (++d == Dim)
d = 0;
int coords[Dim];
MPI_Cart_coords(cart, rank, Dim, coords);

// Split all the current nSplits.
int i, j;
for (i = 0, j = 0; i < v; ++i, j += 2) {
// Split to the left and to the right, saving both.
domains_c[i].split(copy_c[j], copy_c[j + 1], d);
}
// Copy back.
std::copy(copy_c.begin(), copy_c.begin() + v * 2, domains_c.begin());
std::array<pair_type, Dim> bounds;

// On to the next dimension.
if (++d == Dim)
d = 0;
}
for (unsigned d = 0; d < Dim; ++d) {
int length = domain[d].length();
bounds[d] = this->getLocalBounds(length, coords[d], dims[d]);
}
}

template <unsigned Dim>
Partitioner<Dim>::pair_type Partitioner<Dim>::getLocalBounds(int nglobal, int coords,
int dims) const {
int nlocal = nglobal / dims;
int remaining = nglobal - dims * nlocal;

int first = nlocal * coords;
int last = 0;
if (coords < remaining) {
nlocal = nlocal + 1;
first = first + coords;
last = last + coords;
} else {
vtot = 1; // count the number of nSplits to make sure that it worked
// nSplits is not a power of 2 so we need to do some fancy splitting
// sorry... this would be much cleaner with recursion
/*
The way this works is to recursively split on the longest dimension.
Suppose you request 11 nSplits. It will split the longest dimension
in the ratio 5:6 and put the new domains in node 0 and node 5. Then
it splits the longest dimension of the 0 domain and puts the results
in node 0 and node 2 and then splits the longest dimension of node 5
and puts the results in node 5 and node 8. etc.
The logic is kind of bizarre, but it works.
*/
for (v = 1; v < 2 * nSplits; ++v) {
// kind of reverse the bits of v
for (v2 = v, v1 = 1; v2 > 1; v2 /= 2) {
v1 = 2 * v1 + (v2 % 2);
}
vl = 0;
vr = nSplits;

while (v1 > 1) {
if ((v1 % 2) == 1) {
vl = vl + (vr - vl) / 2;
} else {
vr = vl + (vr - vl) / 2;
}
v1 /= 2;
}

v2 = vl + (vr - vl) / 2;

if (v2 > vl) {
a = v2 - vl;
a /= vr - vl;
vr = v2;
leftDomain = domains_c[vl];
lmax = 0;
d = std::numeric_limits<unsigned int>::max();
for (unsigned int dd = 0; dd < Dim; ++dd) {
if (decomp[dd] == PARALLEL) {
if ((len = leftDomain[dd].length()) > lmax) {
lmax = len;
d = dd;
}
}
}
NDIndex_t temp;
domains_c[vl].split(temp, domains_c[vr], d, a);
domains_c[vl] = temp;
++vtot;
}
}

v = vtot;
first = first + remaining;
}

// Make sure v is the same number of nSplits at this stage.
PAssert_EQ(v, nSplits);
last = first + nlocal - 1;

for (size_t i = 0; i < domains_c.size(); ++i) {
view(i) = domains_c[i];
}
return std::make_pair(first, last);
}
} // namespace detail
} // namespace ippl