diff --git a/alpine/ChargedParticles.hpp b/alpine/ChargedParticles.hpp index 4c516fd48..99302498d 100644 --- a/alpine/ChargedParticles.hpp +++ b/alpine/ChargedParticles.hpp @@ -360,7 +360,8 @@ class ChargedParticles : public ippl::ParticleBase { if (dev > loadbalancethreshold_m) { local = 1; } - MPI_Allgather(&local, 1, MPI_INT, res.data(), 1, MPI_INT, ippl::Comm->getCommunicator()); + MPI_Allgather(&local, 1, MPI_INT, res.data(), 1, MPI_INT, + ippl::Comm->getCommunicator()); for (unsigned int i = 0; i < res.size(); i++) { if (res[i] == 1) { @@ -375,7 +376,8 @@ class ChargedParticles : public ippl::ParticleBase { std::vector imb(ippl::Comm->size()); double equalPart = (double)totalP / ippl::Comm->size(); double dev = (std::abs((double)this->getLocalNum() - equalPart) / totalP) * 100.0; - MPI_Gather(&dev, 1, MPI_DOUBLE, imb.data(), 1, MPI_DOUBLE, 0, ippl::Comm->getCommunicator()); + MPI_Gather(&dev, 1, MPI_DOUBLE, imb.data(), 1, MPI_DOUBLE, 0, + ippl::Comm->getCommunicator()); if (ippl::Comm->rank() == 0) { std::stringstream fname; @@ -605,7 +607,8 @@ class ChargedParticles : public ippl::ParticleBase { kinEnergy *= 0.5; double gkinEnergy = 0.0; - MPI_Reduce(&kinEnergy, &gkinEnergy, 1, MPI_DOUBLE, MPI_SUM, 0, ippl::Comm->getCommunicator()); + MPI_Reduce(&kinEnergy, &gkinEnergy, 1, MPI_DOUBLE, MPI_SUM, 0, + ippl::Comm->getCommunicator()); const int nghostE = E_m.getNghost(); auto Eview = E_m.getView(); diff --git a/alpine/PenningTrap.cpp b/alpine/PenningTrap.cpp index 0bf3519bf..aeda5668c 100644 --- a/alpine/PenningTrap.cpp +++ b/alpine/PenningTrap.cpp @@ -164,14 +164,14 @@ int main(int argc, char* argv[]) { auto start = std::chrono::high_resolution_clock::now(); Vector_t nr = {std::atoi(argv[1]), std::atoi(argv[2]), std::atoi(argv[3])}; - static IpplTimings::TimerRef mainTimer = IpplTimings::getTimer("total"); - static IpplTimings::TimerRef particleCreation = IpplTimings::getTimer("particlesCreation"); - static IpplTimings::TimerRef dumpDataTimer = IpplTimings::getTimer("dumpData"); - static IpplTimings::TimerRef PTimer = IpplTimings::getTimer("pushVelocity"); - static IpplTimings::TimerRef RTimer = IpplTimings::getTimer("pushPosition"); - static IpplTimings::TimerRef updateTimer = IpplTimings::getTimer("update"); - static IpplTimings::TimerRef DummySolveTimer = IpplTimings::getTimer("solveWarmup"); - static IpplTimings::TimerRef SolveTimer = IpplTimings::getTimer("Solve"); + static IpplTimings::TimerRef mainTimer = IpplTimings::getTimer("total"); + static IpplTimings::TimerRef particleCreation = IpplTimings::getTimer("particlesCreation"); + static IpplTimings::TimerRef dumpDataTimer = IpplTimings::getTimer("dumpData"); + static IpplTimings::TimerRef PTimer = IpplTimings::getTimer("pushVelocity"); + static IpplTimings::TimerRef RTimer = IpplTimings::getTimer("pushPosition"); + static IpplTimings::TimerRef updateTimer = IpplTimings::getTimer("update"); + static IpplTimings::TimerRef DummySolveTimer = IpplTimings::getTimer("solveWarmup"); + static IpplTimings::TimerRef SolveTimer = IpplTimings::getTimer("Solve"); static IpplTimings::TimerRef domainDecomposition = IpplTimings::getTimer("loadBalance"); IpplTimings::startTimer(mainTimer); @@ -179,7 +179,8 @@ int main(int argc, char* argv[]) { size_type totalP = std::atol(argv[4]); const unsigned int nt = std::atoi(argv[5]); - msg << "Penning Trap " << endl << "nt " << nt << " Np= " << totalP << " grid = " << nr << endl; + msg << "Penning Trap " << endl + << "nt " << nt << " Np= " << totalP << " grid = " << nr << endl; using bunch_type = ChargedParticles, double, Dim>; @@ -289,7 +290,8 @@ int main(int argc, char* argv[]) { size_type nloc = (size_type)(factor * totalP); size_type Total_particles = 0; - MPI_Allreduce(&nloc, &Total_particles, 1, MPI_UNSIGNED_LONG, MPI_SUM, ippl::Comm->getCommunicator()); + MPI_Allreduce(&nloc, &Total_particles, 1, MPI_UNSIGNED_LONG, MPI_SUM, + ippl::Comm->getCommunicator()); int rest = (int)(totalP - Total_particles); @@ -300,7 +302,7 @@ int main(int argc, char* argv[]) { Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42 + 100 * ippl::Comm->rank())); Kokkos::parallel_for( nloc, generate_random, Kokkos::Random_XorShift64_Pool<>, Dim>( - P->R.getView(), P->P.getView(), rand_pool64, mu, sd, minU, maxU)); + P->R.getView(), P->P.getView(), rand_pool64, mu, sd, minU, maxU)); Kokkos::fence(); ippl::Comm->barrier(); @@ -355,7 +357,8 @@ int main(int argc, char* argv[]) { -(Rview(j)[0] - 0.5 * rmax[0]) * (V0 / (2 * Kokkos::pow(rmax[2], 2))); double Eext_y = -(Rview(j)[1] - 0.5 * rmax[1]) * (V0 / (2 * Kokkos::pow(rmax[2], 2))); - double Eext_z = (Rview(j)[2] - 0.5 * rmax[2]) * (V0 / (Kokkos::pow(rmax[2], 2))); + double Eext_z = + (Rview(j)[2] - 0.5 * rmax[2]) * (V0 / (Kokkos::pow(rmax[2], 2))); Eview(j)[0] += Eext_x; Eview(j)[1] += Eext_y; @@ -410,21 +413,22 @@ int main(int argc, char* argv[]) { -(R2view(j)[0] - 0.5 * rmax[0]) * (V0 / (2 * Kokkos::pow(rmax[2], 2))); double Eext_y = -(R2view(j)[1] - 0.5 * rmax[1]) * (V0 / (2 * Kokkos::pow(rmax[2], 2))); - double Eext_z = (R2view(j)[2] - 0.5 * rmax[2]) * (V0 / (Kokkos::pow(rmax[2], 2))); + double Eext_z = + (R2view(j)[2] - 0.5 * rmax[2]) * (V0 / (Kokkos::pow(rmax[2], 2))); E2view(j)[0] += Eext_x; E2view(j)[1] += Eext_y; E2view(j)[2] += Eext_z; - P2view(j)[0] = - DrInv - * (P2view(j)[0] - + alpha - * (E2view(j)[0] + P2view(j)[1] * Bext + alpha * Bext * E2view(j)[1])); - P2view(j)[1] = - DrInv - * (P2view(j)[1] - + alpha - * (E2view(j)[1] - P2view(j)[0] * Bext - alpha * Bext * E2view(j)[0])); + P2view(j)[0] = DrInv + * (P2view(j)[0] + + alpha + * (E2view(j)[0] + P2view(j)[1] * Bext + + alpha * Bext * E2view(j)[1])); + P2view(j)[1] = DrInv + * (P2view(j)[1] + + alpha + * (E2view(j)[1] - P2view(j)[0] * Bext + - alpha * Bext * E2view(j)[0])); P2view(j)[2] += alpha * E2view(j)[2]; }); IpplTimings::stopTimer(PTimer); @@ -437,7 +441,8 @@ int main(int argc, char* argv[]) { msg << "Finished time step: " << it + 1 << " time: " << P->time_m << endl; if (checkSignalHandler()) { - msg << "Aborting timestepping loop due to signal " << interruptSignalReceived << endl; + msg << "Aborting timestepping loop due to signal " << interruptSignalReceived + << endl; break; } } diff --git a/src/Solver/P3MSolver.h b/src/Solver/P3MSolver.h index c57299566..110cfa472 100644 --- a/src/Solver/P3MSolver.h +++ b/src/Solver/P3MSolver.h @@ -88,6 +88,7 @@ namespace ippl { CxField_t rhotr_m; CxField_t grntr_m; + CxField_t tempFieldComplex_m; // fields that facilitate the calculation in greensFunction() IField_t grnIField_m[Dim]; diff --git a/src/Solver/P3MSolver.hpp b/src/Solver/P3MSolver.hpp index a6b730483..e0b8fd841 100644 --- a/src/Solver/P3MSolver.hpp +++ b/src/Solver/P3MSolver.hpp @@ -125,6 +125,7 @@ namespace ippl { grn_m.initialize(*mesh_mp, *layout_mp); rhotr_m.initialize(*meshComplex_m, *layoutComplex_m); grntr_m.initialize(*meshComplex_m, *layoutComplex_m); + tempFieldComplex_m.initialize(*meshComplex_m, *layoutComplex_m); // create the FFT object fft_m = std::make_unique(*layout_mp, *layoutComplex_m, this->params_m); @@ -232,19 +233,85 @@ namespace ippl { // multiply FFT(rho2)*FFT(green) // convolution becomes multiplication in FFT - rhotr_m = rhotr_m * grntr_m; + rhotr_m = -rhotr_m * grntr_m; + + using index_array_type = typename RangePolicy::index_array_type; + if ((out == Base::GRAD) || (out == Base::SOL_AND_GRAD)) { + // Compute gradient in Fourier space and then + // take inverse FFT. + + const Trhs pi = Kokkos::numbers::pi_v; + Kokkos::complex imag = {0.0, 1.0}; + + auto view = rhotr_m.getView(); + const int nghost = rhotr_m.getNghost(); + auto tempview = tempFieldComplex_m.getView(); + auto viewRhs = this->rhs_mp->getView(); + auto viewLhs = this->lhs_mp->getView(); + const int nghostL = this->lhs_mp->getNghost(); + const auto& lDomComplex = layoutComplex_m->getLocalNDIndex(); + + // define some member variables in local scope for the parallel_for + Vector_t hsize = hr_m; + Vector N = nr_m; + Vector_t origin = mesh_mp->getOrigin(); + + for (size_t gd = 0; gd < Dim; ++gd) { + ippl::parallel_for( + "Gradient FFTPeriodicPoissonSolver", getRangePolicy(view, nghost), + KOKKOS_LAMBDA(const index_array_type& args) { + Vector iVec = args - nghost; + + for (unsigned d = 0; d < Dim; ++d) { + iVec[d] += lDomComplex[d].first(); + } + + Vector_t kVec; + + for (size_t d = 0; d < Dim; ++d) { + const Trhs Len = N[d] * hsize[d]; + bool shift = (iVec[d] > (N[d] / 2)); + bool notMid = (iVec[d] != (N[d] / 2)); + // For the noMid part see + // https://math.mit.edu/~stevenj/fft-deriv.pdf Algorithm 1 + kVec[d] = notMid * 2 * pi / Len * (iVec[d] - shift * N[d]); + } + + Trhs Dr = 0; + for (unsigned d = 0; d < Dim; ++d) { + Dr += kVec[d] * kVec[d]; + } + + apply(tempview, args) = apply(view, args); + + bool isNotZero = (Dr != 0.0); + + apply(tempview, args) *= -(isNotZero * imag * kVec[gd]); + }); + + fft_m->transform(-1, *this->rhs_mp, tempFieldComplex_m); + + ippl::parallel_for( + "Assign Gradient FFTPeriodicPoissonSolver", getRangePolicy(viewLhs, nghostL), + KOKKOS_LAMBDA(const index_array_type& args) { + apply(viewLhs, args)[gd] = apply(viewRhs, args); + }); + } - // inverse FFT of the product and store the electrostatic potential in rho2_mr - fft_m->transform(-1, *(this->rhs_mp), rhotr_m); + // normalization is double counted due to 2 transforms + *(this->lhs_mp) = *(this->lhs_mp) * nr_m[0] * nr_m[1] * nr_m[2]; + // discretization of integral requires h^3 factor + *(this->lhs_mp) = *(this->lhs_mp) * hr_m[0] * hr_m[1] * hr_m[2]; + } - // normalization is double counted due to 2 transforms - *(this->rhs_mp) = *(this->rhs_mp) * nr_m[0] * nr_m[1] * nr_m[2]; - // discretization of integral requires h^3 factor - *(this->rhs_mp) = *(this->rhs_mp) * hr_m[0] * hr_m[1] * hr_m[2]; + if ((out == Base::SOL) || (out == Base::SOL_AND_GRAD)) { + // inverse FFT of the product and store the electrostatic potential in rho2_mr + fft_m->transform(-1, *(this->rhs_mp), rhotr_m); - // if we want gradient of phi = Efield - if (out == Base::SOL_AND_GRAD || out == Base::GRAD) { - *(this->lhs_mp) = -grad(*this->rhs_mp); + // normalization is double counted due to 2 transforms + *(this->rhs_mp) = *(this->rhs_mp) * nr_m[0] * nr_m[1] * nr_m[2]; + // discretization of integral requires h^3 factor + *(this->rhs_mp) = *(this->rhs_mp) * hr_m[0] * hr_m[1] * hr_m[2]; } }; @@ -255,6 +322,9 @@ namespace ippl { void P3MSolver::greensFunction() { grn_m = 0.0; + // define pi + Trhs pi = Kokkos::numbers::pi_v; + // This alpha parameter is a choice for the Green's function // it controls the "range" of the Green's function (e.g. // for the P3M collision modelling method, it indicates @@ -274,8 +344,6 @@ namespace ippl { const int nghost = grn_m.getNghost(); const auto& ldom = layout_mp->getLocalNDIndex(); - constexpr Trhs ke = 2.532638e8; - // Kokkos parallel for loop to find (0,0,0) point and regularize Kokkos::parallel_for( "Assign Green's function ", ippl::getRangePolicy(view, nghost), @@ -288,7 +356,7 @@ namespace ippl { const bool isOrig = (ig == 0 && jg == 0 && kg == 0); Trhs r = Kokkos::real(Kokkos::sqrt(view(i, j, k))); - view(i, j, k) = (!isOrig) * ke * (Kokkos::erf(alpha * r) / r); + view(i, j, k) = (!isOrig) * (-1.0 / (4.0 * pi)) * (Kokkos::erf(alpha * r) / r); }); // perform the FFT of the Green's function for the convolution