Skip to content

Commit

Permalink
Merge branch '177-p3m-particle-mesh-solver-crashes-in-scatter' into '…
Browse files Browse the repository at this point in the history
…master'

Resolve "P3M Particle-Mesh solver crashes in scatter"

Closes #177

See merge request OPAL/Libraries/ippl!187
  • Loading branch information
s-mayani committed Jun 29, 2023
2 parents 25e4cb1 + 1577b61 commit e5547f3
Show file tree
Hide file tree
Showing 4 changed files with 117 additions and 40 deletions.
9 changes: 6 additions & 3 deletions alpine/ChargedParticles.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,8 @@ class ChargedParticles : public ippl::ParticleBase<PLayout> {
if (dev > loadbalancethreshold_m) {
local = 1;
}
MPI_Allgather(&local, 1, MPI_INT, res.data(), 1, MPI_INT, ippl::Comm->getCommunicator());
MPI_Allgather(&local, 1, MPI_INT, res.data(), 1, MPI_INT,
ippl::Comm->getCommunicator());

for (unsigned int i = 0; i < res.size(); i++) {
if (res[i] == 1) {
Expand All @@ -375,7 +376,8 @@ class ChargedParticles : public ippl::ParticleBase<PLayout> {
std::vector<double> imb(ippl::Comm->size());
double equalPart = (double)totalP / ippl::Comm->size();
double dev = (std::abs((double)this->getLocalNum() - equalPart) / totalP) * 100.0;
MPI_Gather(&dev, 1, MPI_DOUBLE, imb.data(), 1, MPI_DOUBLE, 0, ippl::Comm->getCommunicator());
MPI_Gather(&dev, 1, MPI_DOUBLE, imb.data(), 1, MPI_DOUBLE, 0,
ippl::Comm->getCommunicator());

if (ippl::Comm->rank() == 0) {
std::stringstream fname;
Expand Down Expand Up @@ -605,7 +607,8 @@ class ChargedParticles : public ippl::ParticleBase<PLayout> {
kinEnergy *= 0.5;
double gkinEnergy = 0.0;

MPI_Reduce(&kinEnergy, &gkinEnergy, 1, MPI_DOUBLE, MPI_SUM, 0, ippl::Comm->getCommunicator());
MPI_Reduce(&kinEnergy, &gkinEnergy, 1, MPI_DOUBLE, MPI_SUM, 0,
ippl::Comm->getCommunicator());

const int nghostE = E_m.getNghost();
auto Eview = E_m.getView();
Expand Down
53 changes: 29 additions & 24 deletions alpine/PenningTrap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -164,22 +164,23 @@ int main(int argc, char* argv[]) {
auto start = std::chrono::high_resolution_clock::now();
Vector_t<int, Dim> nr = {std::atoi(argv[1]), std::atoi(argv[2]), std::atoi(argv[3])};

static IpplTimings::TimerRef mainTimer = IpplTimings::getTimer("total");
static IpplTimings::TimerRef particleCreation = IpplTimings::getTimer("particlesCreation");
static IpplTimings::TimerRef dumpDataTimer = IpplTimings::getTimer("dumpData");
static IpplTimings::TimerRef PTimer = IpplTimings::getTimer("pushVelocity");
static IpplTimings::TimerRef RTimer = IpplTimings::getTimer("pushPosition");
static IpplTimings::TimerRef updateTimer = IpplTimings::getTimer("update");
static IpplTimings::TimerRef DummySolveTimer = IpplTimings::getTimer("solveWarmup");
static IpplTimings::TimerRef SolveTimer = IpplTimings::getTimer("Solve");
static IpplTimings::TimerRef mainTimer = IpplTimings::getTimer("total");
static IpplTimings::TimerRef particleCreation = IpplTimings::getTimer("particlesCreation");
static IpplTimings::TimerRef dumpDataTimer = IpplTimings::getTimer("dumpData");
static IpplTimings::TimerRef PTimer = IpplTimings::getTimer("pushVelocity");
static IpplTimings::TimerRef RTimer = IpplTimings::getTimer("pushPosition");
static IpplTimings::TimerRef updateTimer = IpplTimings::getTimer("update");
static IpplTimings::TimerRef DummySolveTimer = IpplTimings::getTimer("solveWarmup");
static IpplTimings::TimerRef SolveTimer = IpplTimings::getTimer("Solve");
static IpplTimings::TimerRef domainDecomposition = IpplTimings::getTimer("loadBalance");

IpplTimings::startTimer(mainTimer);

size_type totalP = std::atol(argv[4]);
const unsigned int nt = std::atoi(argv[5]);

msg << "Penning Trap " << endl << "nt " << nt << " Np= " << totalP << " grid = " << nr << endl;
msg << "Penning Trap " << endl
<< "nt " << nt << " Np= " << totalP << " grid = " << nr << endl;

using bunch_type = ChargedParticles<PLayout_t<double, Dim>, double, Dim>;

Expand Down Expand Up @@ -289,7 +290,8 @@ int main(int argc, char* argv[]) {
size_type nloc = (size_type)(factor * totalP);
size_type Total_particles = 0;

MPI_Allreduce(&nloc, &Total_particles, 1, MPI_UNSIGNED_LONG, MPI_SUM, ippl::Comm->getCommunicator());
MPI_Allreduce(&nloc, &Total_particles, 1, MPI_UNSIGNED_LONG, MPI_SUM,
ippl::Comm->getCommunicator());

int rest = (int)(totalP - Total_particles);

Expand All @@ -300,7 +302,7 @@ int main(int argc, char* argv[]) {
Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42 + 100 * ippl::Comm->rank()));
Kokkos::parallel_for(
nloc, generate_random<Vector_t<double, Dim>, Kokkos::Random_XorShift64_Pool<>, Dim>(
P->R.getView(), P->P.getView(), rand_pool64, mu, sd, minU, maxU));
P->R.getView(), P->P.getView(), rand_pool64, mu, sd, minU, maxU));

Kokkos::fence();
ippl::Comm->barrier();
Expand Down Expand Up @@ -355,7 +357,8 @@ int main(int argc, char* argv[]) {
-(Rview(j)[0] - 0.5 * rmax[0]) * (V0 / (2 * Kokkos::pow(rmax[2], 2)));
double Eext_y =
-(Rview(j)[1] - 0.5 * rmax[1]) * (V0 / (2 * Kokkos::pow(rmax[2], 2)));
double Eext_z = (Rview(j)[2] - 0.5 * rmax[2]) * (V0 / (Kokkos::pow(rmax[2], 2)));
double Eext_z =
(Rview(j)[2] - 0.5 * rmax[2]) * (V0 / (Kokkos::pow(rmax[2], 2)));

Eview(j)[0] += Eext_x;
Eview(j)[1] += Eext_y;
Expand Down Expand Up @@ -410,21 +413,22 @@ int main(int argc, char* argv[]) {
-(R2view(j)[0] - 0.5 * rmax[0]) * (V0 / (2 * Kokkos::pow(rmax[2], 2)));
double Eext_y =
-(R2view(j)[1] - 0.5 * rmax[1]) * (V0 / (2 * Kokkos::pow(rmax[2], 2)));
double Eext_z = (R2view(j)[2] - 0.5 * rmax[2]) * (V0 / (Kokkos::pow(rmax[2], 2)));
double Eext_z =
(R2view(j)[2] - 0.5 * rmax[2]) * (V0 / (Kokkos::pow(rmax[2], 2)));

E2view(j)[0] += Eext_x;
E2view(j)[1] += Eext_y;
E2view(j)[2] += Eext_z;
P2view(j)[0] =
DrInv
* (P2view(j)[0]
+ alpha
* (E2view(j)[0] + P2view(j)[1] * Bext + alpha * Bext * E2view(j)[1]));
P2view(j)[1] =
DrInv
* (P2view(j)[1]
+ alpha
* (E2view(j)[1] - P2view(j)[0] * Bext - alpha * Bext * E2view(j)[0]));
P2view(j)[0] = DrInv
* (P2view(j)[0]
+ alpha
* (E2view(j)[0] + P2view(j)[1] * Bext
+ alpha * Bext * E2view(j)[1]));
P2view(j)[1] = DrInv
* (P2view(j)[1]
+ alpha
* (E2view(j)[1] - P2view(j)[0] * Bext
- alpha * Bext * E2view(j)[0]));
P2view(j)[2] += alpha * E2view(j)[2];
});
IpplTimings::stopTimer(PTimer);
Expand All @@ -437,7 +441,8 @@ int main(int argc, char* argv[]) {
msg << "Finished time step: " << it + 1 << " time: " << P->time_m << endl;

if (checkSignalHandler()) {
msg << "Aborting timestepping loop due to signal " << interruptSignalReceived << endl;
msg << "Aborting timestepping loop due to signal " << interruptSignalReceived
<< endl;
break;
}
}
Expand Down
1 change: 1 addition & 0 deletions src/Solver/P3MSolver.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ namespace ippl {

CxField_t rhotr_m;
CxField_t grntr_m;
CxField_t tempFieldComplex_m;

// fields that facilitate the calculation in greensFunction()
IField_t grnIField_m[Dim];
Expand Down
94 changes: 81 additions & 13 deletions src/Solver/P3MSolver.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ namespace ippl {
grn_m.initialize(*mesh_mp, *layout_mp);
rhotr_m.initialize(*meshComplex_m, *layoutComplex_m);
grntr_m.initialize(*meshComplex_m, *layoutComplex_m);
tempFieldComplex_m.initialize(*meshComplex_m, *layoutComplex_m);

// create the FFT object
fft_m = std::make_unique<FFT_t>(*layout_mp, *layoutComplex_m, this->params_m);
Expand Down Expand Up @@ -232,19 +233,85 @@ namespace ippl {

// multiply FFT(rho2)*FFT(green)
// convolution becomes multiplication in FFT
rhotr_m = rhotr_m * grntr_m;
rhotr_m = -rhotr_m * grntr_m;

using index_array_type = typename RangePolicy<Dim>::index_array_type;
if ((out == Base::GRAD) || (out == Base::SOL_AND_GRAD)) {
// Compute gradient in Fourier space and then
// take inverse FFT.

const Trhs pi = Kokkos::numbers::pi_v<Trhs>;
Kokkos::complex<Trhs> imag = {0.0, 1.0};

auto view = rhotr_m.getView();
const int nghost = rhotr_m.getNghost();
auto tempview = tempFieldComplex_m.getView();
auto viewRhs = this->rhs_mp->getView();
auto viewLhs = this->lhs_mp->getView();
const int nghostL = this->lhs_mp->getNghost();
const auto& lDomComplex = layoutComplex_m->getLocalNDIndex();

// define some member variables in local scope for the parallel_for
Vector_t hsize = hr_m;
Vector<int, Dim> N = nr_m;
Vector_t origin = mesh_mp->getOrigin();

for (size_t gd = 0; gd < Dim; ++gd) {
ippl::parallel_for(
"Gradient FFTPeriodicPoissonSolver", getRangePolicy(view, nghost),
KOKKOS_LAMBDA(const index_array_type& args) {
Vector<int, Dim> iVec = args - nghost;

for (unsigned d = 0; d < Dim; ++d) {
iVec[d] += lDomComplex[d].first();
}

Vector_t kVec;

for (size_t d = 0; d < Dim; ++d) {
const Trhs Len = N[d] * hsize[d];
bool shift = (iVec[d] > (N[d] / 2));
bool notMid = (iVec[d] != (N[d] / 2));
// For the noMid part see
// https://math.mit.edu/~stevenj/fft-deriv.pdf Algorithm 1
kVec[d] = notMid * 2 * pi / Len * (iVec[d] - shift * N[d]);
}

Trhs Dr = 0;
for (unsigned d = 0; d < Dim; ++d) {
Dr += kVec[d] * kVec[d];
}

apply(tempview, args) = apply(view, args);

bool isNotZero = (Dr != 0.0);

apply(tempview, args) *= -(isNotZero * imag * kVec[gd]);
});

fft_m->transform(-1, *this->rhs_mp, tempFieldComplex_m);

ippl::parallel_for(
"Assign Gradient FFTPeriodicPoissonSolver", getRangePolicy(viewLhs, nghostL),
KOKKOS_LAMBDA(const index_array_type& args) {
apply(viewLhs, args)[gd] = apply(viewRhs, args);
});
}

// inverse FFT of the product and store the electrostatic potential in rho2_mr
fft_m->transform(-1, *(this->rhs_mp), rhotr_m);
// normalization is double counted due to 2 transforms
*(this->lhs_mp) = *(this->lhs_mp) * nr_m[0] * nr_m[1] * nr_m[2];
// discretization of integral requires h^3 factor
*(this->lhs_mp) = *(this->lhs_mp) * hr_m[0] * hr_m[1] * hr_m[2];
}

// normalization is double counted due to 2 transforms
*(this->rhs_mp) = *(this->rhs_mp) * nr_m[0] * nr_m[1] * nr_m[2];
// discretization of integral requires h^3 factor
*(this->rhs_mp) = *(this->rhs_mp) * hr_m[0] * hr_m[1] * hr_m[2];
if ((out == Base::SOL) || (out == Base::SOL_AND_GRAD)) {
// inverse FFT of the product and store the electrostatic potential in rho2_mr
fft_m->transform(-1, *(this->rhs_mp), rhotr_m);

// if we want gradient of phi = Efield
if (out == Base::SOL_AND_GRAD || out == Base::GRAD) {
*(this->lhs_mp) = -grad(*this->rhs_mp);
// normalization is double counted due to 2 transforms
*(this->rhs_mp) = *(this->rhs_mp) * nr_m[0] * nr_m[1] * nr_m[2];
// discretization of integral requires h^3 factor
*(this->rhs_mp) = *(this->rhs_mp) * hr_m[0] * hr_m[1] * hr_m[2];
}
};

Expand All @@ -255,6 +322,9 @@ namespace ippl {
void P3MSolver<FieldLHS, FieldRHS>::greensFunction() {
grn_m = 0.0;

// define pi
Trhs pi = Kokkos::numbers::pi_v<Trhs>;

// This alpha parameter is a choice for the Green's function
// it controls the "range" of the Green's function (e.g.
// for the P3M collision modelling method, it indicates
Expand All @@ -274,8 +344,6 @@ namespace ippl {
const int nghost = grn_m.getNghost();
const auto& ldom = layout_mp->getLocalNDIndex();

constexpr Trhs ke = 2.532638e8;

// Kokkos parallel for loop to find (0,0,0) point and regularize
Kokkos::parallel_for(
"Assign Green's function ", ippl::getRangePolicy(view, nghost),
Expand All @@ -288,7 +356,7 @@ namespace ippl {
const bool isOrig = (ig == 0 && jg == 0 && kg == 0);

Trhs r = Kokkos::real(Kokkos::sqrt(view(i, j, k)));
view(i, j, k) = (!isOrig) * ke * (Kokkos::erf(alpha * r) / r);
view(i, j, k) = (!isOrig) * (-1.0 / (4.0 * pi)) * (Kokkos::erf(alpha * r) / r);
});

// perform the FFT of the Green's function for the convolution
Expand Down

0 comments on commit e5547f3

Please sign in to comment.