From 9c80e1ff79f2ceea0a82dcd4ecafbb2ab4e746e9 Mon Sep 17 00:00:00 2001 From: vinciguerra_a Date: Wed, 15 Mar 2023 13:11:15 +0000 Subject: [PATCH] Reformat the entire codebase Add modified OPAL .clang-format Create a git hook for automatic formatting on commit --- .clang-format | 24 + alpine/BumponTailInstability.cpp | 457 +- alpine/ChargedParticles.hpp | 439 +- alpine/LandauDamping.cpp | 360 +- alpine/PenningTrap.cpp | 454 +- alpine/UniformPlasmaTest.cpp | 167 +- hooks/create-hook-symlink.sh | 18 + hooks/pre-commit | 5 + src/AmrParticle/AmrParticleBase.h | 116 +- src/AmrParticle/AmrParticleBase.hpp | 314 +- src/AmrParticle/AmrParticleLevelCounter.h | 73 +- src/AmrParticle/ParticleAmrLayout.h | 38 +- src/Communicate/Archive.h | 46 +- src/Communicate/Archive.hpp | 56 +- src/Communicate/Buffers.cpp | 20 +- src/Communicate/Buffers.hpp | 35 +- src/Communicate/Communicate.cpp | 17 +- src/Communicate/Communicate.h | 69 +- src/Communicate/DataTypes.h | 27 +- src/Communicate/GlobalComm.h | 37 +- src/Communicate/GlobalComm.hpp | 645 +- src/Communicate/Operations.h | 34 +- src/Communicate/TagMaker.h | 73 +- src/Communicate/Tags.h | 110 +- .../OrthogonalRecursiveBisection.h | 36 +- .../OrthogonalRecursiveBisection.hpp | 567 +- src/Expression/IpplExpressions.h | 41 +- src/Expression/IpplOperations.h | 484 +- src/FFT/FFT.h | 120 +- src/FFT/FFT.hpp | 832 +-- src/Field/BConds.h | 49 +- src/Field/BConds.hpp | 74 +- src/Field/BareField.h | 66 +- src/Field/BareField.hpp | 157 +- src/Field/BcTypes.h | 139 +- src/Field/BcTypes.hpp | 432 +- src/Field/Field.h | 19 +- src/Field/Field.hpp | 49 +- src/Field/FieldOperations.hpp | 93 +- src/Field/HaloCells.h | 79 +- src/Field/HaloCells.hpp | 335 +- src/FieldLayout/BinaryBalancer.h | 29 +- src/FieldLayout/BinaryBalancer.hpp | 810 ++- src/FieldLayout/FieldLayout.h | 96 +- src/FieldLayout/FieldLayout.hpp | 297 +- src/FieldLayout/FieldLayoutUser.h | 26 +- src/Index/Index.h | 195 +- src/Index/Index.hpp | 328 +- src/Index/NDIndex.h | 50 +- src/Index/NDIndex.hpp | 124 +- src/Ippl.cpp | 35 +- src/Ippl.h | 30 +- src/IpplCore.h | 6 +- src/Meshes/Cartesian.h | 979 ++- src/Meshes/Cartesian.hpp | 5422 ++++++++--------- src/Meshes/CartesianCentering.h | 836 ++- src/Meshes/CartesianCentering.hpp | 12 +- src/Meshes/CartesianStencilSetup.h | 27 +- src/Meshes/Centering.cpp | 19 +- src/Meshes/Centering.h | 22 +- src/Meshes/Mesh.h | 17 +- src/Meshes/Mesh.hpp | 13 +- src/Meshes/UniformCartesian.h | 85 +- src/Meshes/UniformCartesian.hpp | 51 +- src/Particle/AbstractParticle.h | 44 +- src/Particle/IntNGP.h | 209 +- src/Particle/Interpolator.h | 331 +- src/Particle/ParticleAttrib.h | 76 +- src/Particle/ParticleAttrib.hpp | 272 +- src/Particle/ParticleAttribBase.h | 12 +- src/Particle/ParticleBC.h | 90 +- src/Particle/ParticleBalancer.h | 21 +- src/Particle/ParticleBalancer.hpp | 190 +- src/Particle/ParticleBase.h | 38 +- src/Particle/ParticleBase.hpp | 133 +- src/Particle/ParticleLayout.h | 37 +- src/Particle/ParticleLayout.hpp | 22 +- src/Particle/ParticleSpatialLayout.h | 31 +- src/Particle/ParticleSpatialLayout.hpp | 104 +- src/Partition/Partitioner.h | 13 +- src/Partition/Partitioner.hpp | 43 +- src/Region/NDRegion.h | 39 +- src/Region/NDRegion.hpp | 55 +- src/Region/PRegion.h | 40 +- src/Region/PRegion.hpp | 72 +- src/Region/RegionLayout.h | 23 +- src/Region/RegionLayout.hpp | 63 +- src/Solver/Electrostatics.h | 32 +- src/Solver/ElectrostaticsCG.h | 38 +- src/Solver/FFTPeriodicPoissonSolver.h | 51 +- src/Solver/FFTPeriodicPoissonSolver.hpp | 204 +- src/Solver/FFTPoissonSolver.h | 298 +- src/Solver/FFTPoissonSolver.hpp | 2631 ++++---- src/Solver/PCG.h | 40 +- src/Solver/Solver.h | 25 +- src/Solver/SolverAlgorithm.h | 12 +- src/Solver/test/Budiardja_plot.cpp | 197 +- src/Solver/test/TestCGSolver.cpp | 102 +- .../test/TestFFTPeriodicPoissonSolver.cpp | 248 +- src/Solver/test/TestGaussian.cpp | 445 +- src/Solver/test/TestGaussian_biharmonic.cpp | 282 +- src/Solver/test/TestGaussian_convergence.cpp | 364 +- src/Solver/test/TestSolverDesign.cpp | 24 +- src/Solver/test/TestSphere.cpp | 195 +- src/Types/IpplTypes.h | 2 +- src/Types/Vector.h | 53 +- src/Types/Vector.hpp | 116 +- src/Types/ViewTypes.h | 21 +- src/Utility/Inform.cpp | 143 +- src/Utility/Inform.h | 218 +- src/Utility/IpplException.h | 16 +- src/Utility/IpplInfo.cpp | 25 +- src/Utility/IpplInfo.h | 51 +- src/Utility/IpplMemoryUsage.cpp | 55 +- src/Utility/IpplMemoryUsage.h | 61 +- src/Utility/IpplTimings.cpp | 138 +- src/Utility/IpplTimings.h | 69 +- src/Utility/PAssert.cpp | 51 +- src/Utility/PAssert.h | 56 +- src/Utility/ParameterList.h | 85 +- src/Utility/Timer.cpp | 4 - src/Utility/Timer.h | 13 +- src/Utility/Unique.cpp | 9 +- src/Utility/Unique.h | 25 +- src/Utility/User.cpp | 16 +- src/Utility/User.h | 47 +- src/Utility/UserList.cpp | 71 +- src/Utility/UserList.h | 110 +- src/Utility/my_auto_ptr.h | 52 +- src/Utility/vmap.h | 229 +- src/Utility/vmap.hpp | 344 +- test/FFT/TestCos.cpp | 67 +- test/FFT/TestFFTCC.cpp | 71 +- test/FFT/TestFFTRC.cpp | 84 +- test/FFT/TestSine.cpp | 69 +- test/field/TestCurl.cpp | 113 +- test/field/TestFieldBC.cpp | 82 +- test/field/TestHalo.cpp | 107 +- test/field/TestHessian.cpp | 185 +- test/field/TestLaplace.cpp | 107 +- test/kokkos/TestVectorField.cpp | 188 +- test/kokkos/TestVectorField2.cpp | 48 +- test/kokkos/TestVectorField3.cpp | 56 +- test/kokkos/TestVectorField4.cpp | 60 +- test/p3m/ChargedParticleFactory.hpp | 1081 ++-- test/p3m/VTKFieldWriterParallel.hpp | 990 +-- test/p3m/p3m3d.cpp | 461 +- test/p3m/p3m3dHeating.cpp | 717 ++- test/p3m/p3m3dMicrobunching.cpp | 1383 +++-- test/p3m/p3m3dRegressionTests.cpp | 1549 ++--- test/p3m/p3m3dTwoStreamParallel.cpp | 1465 ++--- test/particle/PIC3d.cpp | 563 +- test/particle/TestGather.cpp | 30 +- test/particle/TestScatter.cpp | 55 +- test/particle/benchmarkParticleUpdate.cpp | 258 +- test/region/TestNDRegion.cpp | 11 +- test/region/TestPRegion.cpp | 17 +- test/serialization/serialize01.cpp | 115 +- test/vector/TestVector.cpp | 7 +- test/vector/TestVector2.cpp | 49 +- test/vector/TestVector3.cpp | 50 +- unit_tests/BareField/BareField.cpp | 60 +- unit_tests/Field/Field.cpp | 118 +- unit_tests/Field/FieldBC.cpp | 63 +- unit_tests/Meshes/UniformCartesian.cpp | 38 +- unit_tests/PIC/ORB.cpp | 58 +- unit_tests/PIC/PIC.cpp | 57 +- unit_tests/Particle/ParticleBC.cpp | 28 +- unit_tests/Particle/ParticleBase.cpp | 26 +- unit_tests/Particle/ParticleSendRecv.cpp | 126 +- unit_tests/Utility/ParameterList.cpp | 30 +- 171 files changed, 17005 insertions(+), 19578 deletions(-) create mode 100644 .clang-format create mode 100755 hooks/create-hook-symlink.sh create mode 100755 hooks/pre-commit diff --git a/.clang-format b/.clang-format new file mode 100644 index 000000000..46bb4bb3b --- /dev/null +++ b/.clang-format @@ -0,0 +1,24 @@ +Language: Cpp +BasedOnStyle: Google +AccessModifierOffset: -4 +AlignAfterOpenBracket: Align +AlignConsecutiveAssignments: true +AlignConsecutiveMacros: AcrossEmptyLinesAndComments +AlignEscapedNewlines: Left +AllowShortFunctionsOnASingleLine: Inline +AllowShortIfStatementsOnASingleLine: Never +AllowShortLambdasOnASingleLine: Empty +AllowShortLoopsOnASingleLine: false +AllowShortEnumsOnASingleLine: false +AttributeMacros: ['KOKKOS_INLINE_FUNCTION'] +BreakBeforeBinaryOperators: NonAssignment +BreakConstructorInitializers: BeforeComma +ColumnLimit: 100 +DerivePointerAlignment: false +IndentWidth: 4 +IncludeBlocks: Preserve +IndentGotoLabels: false +NamespaceIndentation: All +PackConstructorInitializers: Never +ReflowComments: true +StatementMacros: ['DefineUnaryOperation', 'DefineBinaryOperation'] diff --git a/alpine/BumponTailInstability.cpp b/alpine/BumponTailInstability.cpp index c15cf60aa..f27143eb7 100644 --- a/alpine/BumponTailInstability.cpp +++ b/alpine/BumponTailInstability.cpp @@ -9,13 +9,13 @@ // stype = Field solver type e.g., FFT // lbthres = Load balancing threshold i.e., lbthres*100 is the maximum load imbalance // percentage which can be tolerated and beyond which -// particle load balancing occurs. A value of 0.01 is good for many typical +// particle load balancing occurs. A value of 0.01 is good for many typical // simulations. // ovfactor = Over-allocation factor for the buffers used in the communication. Typical // values are 1.0, 2.0. Value 1.0 means no over-allocation. // Example: // srun ./BumponTailInstability 128 128 128 10000 10 FFT 0.01 2.0 --info 10 -// Change the TestName to TwoStreamInstability or BumponTailInstability +// Change the TestName to TwoStreamInstability or BumponTailInstability // in order to simulate the Two stream instability or bump on tail instability // cases // @@ -34,176 +34,165 @@ // along with IPPL. If not, see . // -#include "ChargedParticles.hpp" -#include -#include -#include +#include #include +#include #include -#include +#include +#include +#include "ChargedParticles.hpp" -#include +#include #include #include "Utility/IpplTimings.h" template struct Newton1D { + double tol = 1e-12; + int max_iter = 20; + double pi = std::acos(-1.0); - double tol = 1e-12; - int max_iter = 20; - double pi = std::acos(-1.0); - - T k, delta, u; - - KOKKOS_INLINE_FUNCTION - Newton1D() {} - - KOKKOS_INLINE_FUNCTION - Newton1D(const T& k_, const T& delta_, - const T& u_) - : k(k_), delta(delta_), u(u_) {} - - KOKKOS_INLINE_FUNCTION - ~Newton1D() {} - - KOKKOS_INLINE_FUNCTION - T f(T& x) { - T F; - F = x + (delta * (std::sin(k * x) / k)) - u; - return F; - } - - KOKKOS_INLINE_FUNCTION - T fprime(T& x) { - T Fprime; - Fprime = 1 + (delta * std::cos(k * x)); - return Fprime; - } - - KOKKOS_FUNCTION - void solve(T& x) { - int iterations = 0; - while (iterations < max_iter && std::fabs(f(x)) > tol) { - x = x - (f(x)/fprime(x)); - iterations += 1; - } - } -}; + T k, delta, u; + + KOKKOS_INLINE_FUNCTION Newton1D() {} + + KOKKOS_INLINE_FUNCTION Newton1D(const T& k_, const T& delta_, const T& u_) + : k(k_) + , delta(delta_) + , u(u_) {} + + KOKKOS_INLINE_FUNCTION ~Newton1D() {} + + KOKKOS_INLINE_FUNCTION T f(T& x) { + T F; + F = x + (delta * (std::sin(k * x) / k)) - u; + return F; + } + + KOKKOS_INLINE_FUNCTION T fprime(T& x) { + T Fprime; + Fprime = 1 + (delta * std::cos(k * x)); + return Fprime; + } + KOKKOS_FUNCTION + void solve(T& x) { + int iterations = 0; + while (iterations < max_iter && std::fabs(f(x)) > tol) { + x = x - (f(x) / fprime(x)); + iterations += 1; + } + } +}; template struct generate_random { + using view_type = typename ippl::detail::ViewType::view_type; + using value_type = typename T::value_type; + // Output View for the random numbers + view_type x, v; + + // The GeneratorPool + GeneratorPool rand_pool; + + value_type delta, sigma, muBulk, muBeam; + size_type nlocBulk; + + T k, minU, maxU; + + // Initialize all members + generate_random(view_type x_, view_type v_, GeneratorPool rand_pool_, value_type& delta_, T& k_, + value_type& sigma_, value_type& muBulk_, value_type& muBeam_, + size_type& nlocBulk_, T& minU_, T& maxU_) + : x(x_) + , v(v_) + , rand_pool(rand_pool_) + , delta(delta_) + , sigma(sigma_) + , muBulk(muBulk_) + , muBeam(muBeam_) + , nlocBulk(nlocBulk_) + , k(k_) + , minU(minU_) + , maxU(maxU_) {} + + KOKKOS_INLINE_FUNCTION void operator()(const size_t i) const { + // Get a random number state from the pool for the active thread + typename GeneratorPool::generator_type rand_gen = rand_pool.get_state(); + + bool isBeam = (i >= nlocBulk); + + value_type muZ = (value_type)(((!isBeam) * muBulk) + (isBeam * muBeam)); + + for (unsigned d = 0; d < Dim - 1; ++d) { + x(i)[d] = rand_gen.drand(minU[d], maxU[d]); + v(i)[d] = rand_gen.normal(0.0, sigma); + } + v(i)[Dim - 1] = rand_gen.normal(muZ, sigma); + + value_type u = rand_gen.drand(minU[Dim - 1], maxU[Dim - 1]); + x(i)[Dim - 1] = u / (1 + delta); + Newton1D solver(k[Dim - 1], delta, u); + solver.solve(x(i)[Dim - 1]); - using view_type = typename ippl::detail::ViewType::view_type; - using value_type = typename T::value_type; - // Output View for the random numbers - view_type x, v; - - // The GeneratorPool - GeneratorPool rand_pool; - - value_type delta, sigma, muBulk, muBeam; - size_type nlocBulk; - - T k, minU, maxU; - - // Initialize all members - generate_random(view_type x_, view_type v_, GeneratorPool rand_pool_, - value_type& delta_, T& k_, value_type& sigma_, - value_type& muBulk_, value_type& muBeam_, - size_type& nlocBulk_, T& minU_, T& maxU_) - : x(x_), v(v_), rand_pool(rand_pool_), - delta(delta_), sigma(sigma_), muBulk(muBulk_), muBeam(muBeam_), - nlocBulk(nlocBulk_), k(k_), minU(minU_), maxU(maxU_) {} - - KOKKOS_INLINE_FUNCTION - void operator()(const size_t i) const { - // Get a random number state from the pool for the active thread - typename GeneratorPool::generator_type rand_gen = rand_pool.get_state(); - - bool isBeam = (i >= nlocBulk); - - value_type muZ = (value_type)(((!isBeam) * muBulk) + (isBeam * muBeam)); - - for (unsigned d = 0; d < Dim-1; ++d) { - - x(i)[d] = rand_gen.drand(minU[d], maxU[d]); - v(i)[d] = rand_gen.normal(0.0, sigma); + // Give the state back, which will allow another thread to acquire it + rand_pool.free_state(rand_gen); } - v(i)[Dim-1] = rand_gen.normal(muZ, sigma); - - value_type u = rand_gen.drand(minU[Dim-1], maxU[Dim-1]); - x(i)[Dim-1] = u / (1 + delta); - Newton1D solver(k[Dim-1], delta, u); - solver.solve(x(i)[Dim-1]); - - - // Give the state back, which will allow another thread to acquire it - rand_pool.free_state(rand_gen); - } }; -double CDF(const double& x, const double& delta, const double& k, - const unsigned& dim) { - - bool isDimZ = (dim == (Dim-1)); - double cdf = x + (double)(isDimZ * ((delta / k) * std::sin(k * x))); - return cdf; +double CDF(const double& x, const double& delta, const double& k, const unsigned& dim) { + bool isDimZ = (dim == (Dim - 1)); + double cdf = x + (double)(isDimZ * ((delta / k) * std::sin(k * x))); + return cdf; } KOKKOS_FUNCTION -double PDF(const Vector_t& xvec, const double& delta, - const Vector_t& kw) { - - double pdf = 1.0 * 1.0 * (1.0 + delta * std::cos(kw[Dim-1] * xvec[Dim-1])); +double PDF(const Vector_t& xvec, const double& delta, const Vector_t& kw) { + double pdf = 1.0 * 1.0 * (1.0 + delta * std::cos(kw[Dim - 1] * xvec[Dim - 1])); return pdf; } -//const char* TestName = "BumponTailInstability"; +// const char* TestName = "BumponTailInstability"; const char* TestName = "TwoStreamInstability"; -int main(int argc, char *argv[]){ +int main(int argc, char* argv[]) { Ippl ippl(argc, argv); Inform msg(TestName); - Inform msg2all(TestName,INFORM_ALL_NODES); + Inform msg2all(TestName, INFORM_ALL_NODES); Ippl::Comm->setDefaultOverallocation(std::atof(argv[8])); - auto start = std::chrono::high_resolution_clock::now(); - ippl::Vector nr = { + auto start = std::chrono::high_resolution_clock::now(); + ippl::Vector nr = { std::atoi(argv[1]), std::atoi(argv[2]), - std::atoi(argv[3]) + std::atoi(argv[3]), }; - static IpplTimings::TimerRef mainTimer = IpplTimings::getTimer("total"); - static IpplTimings::TimerRef particleCreation = IpplTimings::getTimer("particlesCreation"); - static IpplTimings::TimerRef dumpDataTimer = IpplTimings::getTimer("dumpData"); - static IpplTimings::TimerRef PTimer = IpplTimings::getTimer("pushVelocity"); - static IpplTimings::TimerRef RTimer = IpplTimings::getTimer("pushPosition"); - static IpplTimings::TimerRef updateTimer = IpplTimings::getTimer("update"); - static IpplTimings::TimerRef DummySolveTimer = IpplTimings::getTimer("solveWarmup"); - static IpplTimings::TimerRef SolveTimer = IpplTimings::getTimer("solve"); + static IpplTimings::TimerRef mainTimer = IpplTimings::getTimer("total"); + static IpplTimings::TimerRef particleCreation = IpplTimings::getTimer("particlesCreation"); + static IpplTimings::TimerRef dumpDataTimer = IpplTimings::getTimer("dumpData"); + static IpplTimings::TimerRef PTimer = IpplTimings::getTimer("pushVelocity"); + static IpplTimings::TimerRef RTimer = IpplTimings::getTimer("pushPosition"); + static IpplTimings::TimerRef updateTimer = IpplTimings::getTimer("update"); + static IpplTimings::TimerRef DummySolveTimer = IpplTimings::getTimer("solveWarmup"); + static IpplTimings::TimerRef SolveTimer = IpplTimings::getTimer("solve"); static IpplTimings::TimerRef domainDecomposition = IpplTimings::getTimer("loadBalance"); IpplTimings::startTimer(mainTimer); const size_type totalP = std::atoll(argv[4]); - const unsigned int nt = std::atoi(argv[5]); + const unsigned int nt = std::atoi(argv[5]); - msg << TestName - << endl - << "nt " << nt << " Np= " - << totalP << " grid = " << nr - << endl; + msg << TestName << endl << "nt " << nt << " Np= " << totalP << " grid = " << nr << endl; using bunch_type = ChargedParticles; - std::unique_ptr P; + std::unique_ptr P; ippl::NDIndex domain; - for (unsigned i = 0; i< Dim; i++) { + for (unsigned i = 0; i < Dim; i++) { domain[i] = ippl::Index(nr[i]); } @@ -214,54 +203,51 @@ int main(int argc, char *argv[]){ Vector_t kw; double sigma, muBulk, muBeam, epsilon, delta; - - - if(std::strcmp(TestName,"TwoStreamInstability") == 0) { - // Parameters for two stream instability as in + + if (std::strcmp(TestName, "TwoStreamInstability") == 0) { + // Parameters for two stream instability as in // https://www.frontiersin.org/articles/10.3389/fphy.2018.00105/full - kw = {0.5, 0.5, 0.5}; - sigma = 0.1; + kw = {0.5, 0.5, 0.5}; + sigma = 0.1; epsilon = 0.5; - muBulk = -pi / 2.0; - muBeam = pi / 2.0; - delta = 0.01; - } - else if(std::strcmp(TestName,"BumponTailInstability") == 0) { - kw = {0.21, 0.21, 0.21}; - sigma = 1.0 / std::sqrt(2.0); + muBulk = -pi / 2.0; + muBeam = pi / 2.0; + delta = 0.01; + } else if (std::strcmp(TestName, "BumponTailInstability") == 0) { + kw = {0.21, 0.21, 0.21}; + sigma = 1.0 / std::sqrt(2.0); epsilon = 0.1; - muBulk = 0.0; - muBeam = 4.0; - delta = 0.01; - } - else { - //Default value is two stream instability - kw = {0.5, 0.5, 0.5}; - sigma = 0.1; + muBulk = 0.0; + muBeam = 4.0; + delta = 0.01; + } else { + // Default value is two stream instability + kw = {0.5, 0.5, 0.5}; + sigma = 0.1; epsilon = 0.5; - muBulk = -pi / 2.0; - muBeam = pi / 2.0; - delta = 0.01; + muBulk = -pi / 2.0; + muBeam = pi / 2.0; + delta = 0.01; } Vector_t rmin(0.0); - Vector_t rmax = 2 * pi / kw ; - double dx = rmax[0] / nr[0]; - double dy = rmax[1] / nr[1]; - double dz = rmax[2] / nr[2]; + Vector_t rmax = 2 * pi / kw; + double dx = rmax[0] / nr[0]; + double dy = rmax[1] / nr[1]; + double dz = rmax[2] / nr[2]; - Vector_t hr = {dx, dy, dz}; + Vector_t hr = {dx, dy, dz}; Vector_t origin = {rmin[0], rmin[1], rmin[2]}; - const double dt = 0.5*dx;//0.05 + const double dt = 0.5 * dx; // 0.05 - const bool isAllPeriodic=true; + const bool isAllPeriodic = true; Mesh_t mesh(domain, hr, origin); FieldLayout_t FL(domain, decomp, isAllPeriodic); PLayout_t PL(FL, mesh); - //Q = -\int\int f dx dv + // Q = -\int\int f dx dv double Q = -rmax[0] * rmax[1] * rmax[2]; - P = std::make_unique(PL,hr,rmin,rmax,decomp,Q); + P = std::make_unique(PL, hr, rmin, rmax, decomp, Q); P->nr_m = nr; @@ -272,7 +258,7 @@ int main(int argc, char *argv[]){ P->stype_m = argv[6]; P->initSolver(); - P->time_m = 0.0; + P->time_m = 0.0; P->loadbalancethreshold_m = std::atof(argv[7]); bool isFirstRepartition; @@ -280,99 +266,91 @@ int main(int argc, char *argv[]){ if ((P->loadbalancethreshold_m != 1.0) && (Ippl::Comm->size() > 1)) { msg << "Starting first repartition" << endl; IpplTimings::startTimer(domainDecomposition); - isFirstRepartition = true; + isFirstRepartition = true; const ippl::NDIndex& lDom = FL.getLocalNDIndex(); - const int nghost = P->rho_m.getNghost(); - using mdrange_type = Kokkos::MDRangePolicy>; - auto rhoview = P->rho_m.getView(); - - Kokkos::parallel_for("Assign initial rho based on PDF", - mdrange_type({nghost, nghost, nghost}, - {rhoview.extent(0) - nghost, - rhoview.extent(1) - nghost, - rhoview.extent(2) - nghost}), - KOKKOS_LAMBDA(const int i, - const int j, - const int k) - { - //local to global index conversion - const size_t ig = i + lDom[0].first() - nghost; - const size_t jg = j + lDom[1].first() - nghost; - const size_t kg = k + lDom[2].first() - nghost; - double x = (ig + 0.5) * hr[0] + origin[0]; - double y = (jg + 0.5) * hr[1] + origin[1]; - double z = (kg + 0.5) * hr[2] + origin[2]; - - Vector_t xvec = {x, y, z}; - - rhoview(i, j, k) = PDF(xvec, delta, kw); - - }); + const int nghost = P->rho_m.getNghost(); + using mdrange_type = Kokkos::MDRangePolicy>; + auto rhoview = P->rho_m.getView(); + + Kokkos::parallel_for( + "Assign initial rho based on PDF", + mdrange_type({nghost, nghost, nghost}, + {rhoview.extent(0) - nghost, rhoview.extent(1) - nghost, + rhoview.extent(2) - nghost}), + KOKKOS_LAMBDA(const int i, const int j, const int k) { + // local to global index conversion + const size_t ig = i + lDom[0].first() - nghost; + const size_t jg = j + lDom[1].first() - nghost; + const size_t kg = k + lDom[2].first() - nghost; + double x = (ig + 0.5) * hr[0] + origin[0]; + double y = (jg + 0.5) * hr[1] + origin[1]; + double z = (kg + 0.5) * hr[2] + origin[2]; + + Vector_t xvec = {x, y, z}; + + rhoview(i, j, k) = PDF(xvec, delta, kw); + }); Kokkos::fence(); - + P->initializeORB(FL, mesh); P->repartition(FL, mesh, bunchBuffer, isFirstRepartition); IpplTimings::stopTimer(domainDecomposition); } - + msg << "First domain decomposition done" << endl; IpplTimings::startTimer(particleCreation); - typedef ippl::detail::RegionLayout RegionLayout_t; - const RegionLayout_t& RLayout = PL.getRegionLayout(); + const RegionLayout_t& RLayout = PL.getRegionLayout(); const typename RegionLayout_t::host_mirror_type Regions = RLayout.gethLocalRegions(); Vector_t Nr, Dr, minU, maxU; int myRank = Ippl::Comm->rank(); - for (unsigned d = 0; d rank() < rest ) + if (Ippl::Comm->rank() < rest) ++nloc; P->create(nloc); - Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42 + 100*Ippl::Comm->rank())); + Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42 + 100 * Ippl::Comm->rank())); - Kokkos::parallel_for(nloc, - generate_random, Dim>( - P->R.getView(), P->P.getView(), rand_pool64, delta, kw, sigma, muBulk, - muBeam, nlocBulk, minU, maxU)); + Kokkos::parallel_for(nloc, generate_random, Dim>( + P->R.getView(), P->P.getView(), rand_pool64, delta, kw, sigma, + muBulk, muBeam, nlocBulk, minU, maxU)); Kokkos::fence(); Ippl::Comm->barrier(); - IpplTimings::stopTimer(particleCreation); - - P->q = P->Q_m/totalP; - //P->dumpParticleData(); + IpplTimings::stopTimer(particleCreation); + + P->q = P->Q_m / totalP; + // P->dumpParticleData(); msg << "particles created and initial conditions assigned " << endl; isFirstRepartition = false; - //The update after the particle creation is not needed as the - //particles are generated locally + // The update after the particle creation is not needed as the + // particles are generated locally IpplTimings::startTimer(DummySolveTimer); P->rho_m = 0.0; P->solver_mp->solve(); IpplTimings::stopTimer(DummySolveTimer); - - + P->scatterCIC(totalP, 0, hr); IpplTimings::startTimer(SolveTimer); @@ -384,13 +362,12 @@ int main(int argc, char *argv[]){ IpplTimings::startTimer(dumpDataTimer); P->dumpBumponTail(); P->gatherStatistics(totalP); - //P->dumpLocalDomains(FL, 0); + // P->dumpLocalDomains(FL, 0); IpplTimings::stopTimer(dumpDataTimer); // begin main timestep loop msg << "Starting iterations ..." << endl; - for (unsigned int it=0; itP = P->P - 0.5 * dt * P->E; IpplTimings::stopTimer(PTimer); - //drift + // drift IpplTimings::startTimer(RTimer); P->R = P->R + dt * P->P; IpplTimings::stopTimer(RTimer); - //Since the particles have moved spatially update them to correct processors - IpplTimings::startTimer(updateTimer); + // Since the particles have moved spatially update them to correct processors + IpplTimings::startTimer(updateTimer); PL.update(*P, bunchBuffer); IpplTimings::stopTimer(updateTimer); // Domain Decomposition - if (P->balance(totalP, it+1)) { - msg << "Starting repartition" << endl; - IpplTimings::startTimer(domainDecomposition); - P->repartition(FL, mesh, bunchBuffer, isFirstRepartition); - IpplTimings::stopTimer(domainDecomposition); - //IpplTimings::startTimer(dumpDataTimer); - //P->dumpLocalDomains(FL, it+1); - //IpplTimings::stopTimer(dumpDataTimer); + if (P->balance(totalP, it + 1)) { + msg << "Starting repartition" << endl; + IpplTimings::startTimer(domainDecomposition); + P->repartition(FL, mesh, bunchBuffer, isFirstRepartition); + IpplTimings::stopTimer(domainDecomposition); + // IpplTimings::startTimer(dumpDataTimer); + // P->dumpLocalDomains(FL, it+1); + // IpplTimings::stopTimer(dumpDataTimer); } + // scatter the charge onto the underlying grid + P->scatterCIC(totalP, it + 1, hr); - //scatter the charge onto the underlying grid - P->scatterCIC(totalP, it+1, hr); - - //Field solve + // Field solve IpplTimings::startTimer(SolveTimer); P->solver_mp->solve(); IpplTimings::stopTimer(SolveTimer); @@ -434,7 +410,7 @@ int main(int argc, char *argv[]){ // gather E field P->gatherCIC(); - //kick + // kick IpplTimings::startTimer(PTimer); P->P = P->P - 0.5 * dt * P->E; IpplTimings::stopTimer(PTimer); @@ -444,7 +420,7 @@ int main(int argc, char *argv[]){ P->dumpBumponTail(); P->gatherStatistics(totalP); IpplTimings::stopTimer(dumpDataTimer); - msg << "Finished time step: " << it+1 << " time: " << P->time_m << endl; + msg << "Finished time step: " << it + 1 << " time: " << P->time_m << endl; } msg << TestName << ": End." << endl; @@ -453,7 +429,8 @@ int main(int argc, char *argv[]){ IpplTimings::print(std::string("timing.dat")); auto end = std::chrono::high_resolution_clock::now(); - std::chrono::duration time_chrono = std::chrono::duration_cast>(end - start); + std::chrono::duration time_chrono = + std::chrono::duration_cast>(end - start); std::cout << "Elapsed time: " << time_chrono.count() << std::endl; return 0; diff --git a/alpine/ChargedParticles.hpp b/alpine/ChargedParticles.hpp index e64417e19..9885934db 100644 --- a/alpine/ChargedParticles.hpp +++ b/alpine/ChargedParticles.hpp @@ -23,24 +23,24 @@ constexpr unsigned Dim = 3; // some typedefs -typedef ippl::ParticleSpatialLayout PLayout_t; -typedef ippl::UniformCartesian Mesh_t; +typedef ippl::ParticleSpatialLayout PLayout_t; +typedef ippl::UniformCartesian Mesh_t; typedef ippl::FieldLayout FieldLayout_t; typedef ippl::OrthogonalRecursiveBisection ORB; using size_type = ippl::detail::size_type; -template +template using Vector = ippl::Vector; -template +template using Field = ippl::Field; -template +template using ParticleAttrib = ippl::ParticleAttrib; -typedef Vector Vector_t; -typedef Field Field_t; +typedef Vector Vector_t; +typedef Field Field_t; typedef Field VField_t; typedef ippl::FFTPeriodicPoissonSolver Solver_t; @@ -49,10 +49,7 @@ const double pi = std::acos(-1.0); // Test programs have to define this variable for VTK dump purposes extern const char* TestName; -void dumpVTK(VField_t& E, int nx, int ny, int nz, int iteration, - double dx, double dy, double dz) { - - +void dumpVTK(VField_t& E, int nx, int ny, int nz, int iteration, double dx, double dy, double dz) { typename VField_t::view_type::host_mirror_type host_view = E.getHostMirror(); std::stringstream fname; @@ -71,27 +68,23 @@ void dumpVTK(VField_t& E, int nx, int ny, int nz, int iteration, vtkout << TestName << endl; vtkout << "ASCII" << endl; vtkout << "DATASET STRUCTURED_POINTS" << endl; - vtkout << "DIMENSIONS " << nx+3 << " " << ny+3 << " " << nz+3 << endl; - vtkout << "ORIGIN " << -dx << " " << -dy << " " << -dz << endl; + vtkout << "DIMENSIONS " << nx + 3 << " " << ny + 3 << " " << nz + 3 << endl; + vtkout << "ORIGIN " << -dx << " " << -dy << " " << -dz << endl; vtkout << "SPACING " << dx << " " << dy << " " << dz << endl; - vtkout << "CELL_DATA " << (nx+2)*(ny+2)*(nz+2) << endl; + vtkout << "CELL_DATA " << (nx + 2) * (ny + 2) * (nz + 2) << endl; vtkout << "VECTORS E-Field float" << endl; - for (int z=0; z +template class ChargedParticles : public ippl::ParticleBase { public: VField_t E_m; @@ -155,55 +147,46 @@ class ChargedParticles : public ippl::ParticleBase { double rhoNorm_m; unsigned int loadbalancefreq_m; - - double loadbalancethreshold_m; + double loadbalancethreshold_m; public: - ParticleAttrib q; // charge + ParticleAttrib q; // charge typename ippl::ParticleBase::particle_position_type P; // particle velocity - typename ippl::ParticleBase::particle_position_type E; // electric field at particle position - + typename ippl::ParticleBase::particle_position_type + E; // electric field at particle position /* This constructor is mandatory for all derived classes from ParticleBase as the bunch buffer uses this */ ChargedParticles(PLayout& pl) - : ippl::ParticleBase(pl) - { + : ippl::ParticleBase(pl) { // register the particle attributes this->addAttribute(q); this->addAttribute(P); this->addAttribute(E); } - ChargedParticles(PLayout& pl, - Vector_t hr, - Vector_t rmin, - Vector_t rmax, - ippl::e_dim_tag decomp[Dim], - double Q) - : ippl::ParticleBase(pl) - , hr_m(hr) - , rmin_m(rmin) - , rmax_m(rmax) - , Q_m(Q) - { + ChargedParticles(PLayout& pl, Vector_t hr, Vector_t rmin, Vector_t rmax, + ippl::e_dim_tag decomp[Dim], double Q) + : ippl::ParticleBase(pl) + , hr_m(hr) + , rmin_m(rmin) + , rmax_m(rmax) + , Q_m(Q) { // register the particle attributes this->addAttribute(q); this->addAttribute(P); this->addAttribute(E); setupBCs(); for (unsigned int i = 0; i < Dim; i++) - decomp_m[i]=decomp[i]; + decomp_m[i] = decomp[i]; } - ~ChargedParticles(){ } + ~ChargedParticles() {} - void setupBCs() { - setBCAllPeriodic(); - } + void setupBCs() { setBCAllPeriodic(); } void updateLayout(FieldLayout_t& fl, Mesh_t& mesh, ChargedParticles& buffer, bool& isFirstRepartition) { @@ -219,39 +202,36 @@ class ChargedParticles : public ippl::ParticleBase { IpplTimings::stopTimer(tupdateLayout); static IpplTimings::TimerRef tupdatePLayout = IpplTimings::getTimer("updatePB"); IpplTimings::startTimer(tupdatePLayout); - if(!isFirstRepartition) { + if (!isFirstRepartition) { layout.update(*this, buffer); } IpplTimings::stopTimer(tupdatePLayout); } - void initializeORB(FieldLayout_t& fl, Mesh_t& mesh) { - orb.initialize(fl, mesh, rho_m); - } + void initializeORB(FieldLayout_t& fl, Mesh_t& mesh) { orb.initialize(fl, mesh, rho_m); } - void repartition(FieldLayout_t& fl, Mesh_t& mesh, ChargedParticles& buffer, + void repartition(FieldLayout_t& fl, Mesh_t& mesh, ChargedParticles& buffer, bool& isFirstRepartition) { // Repartition the domains bool res = orb.binaryRepartition(this->R, fl, isFirstRepartition); if (res != true) { - std::cout << "Could not repartition!" << std::endl; - return; + std::cout << "Could not repartition!" << std::endl; + return; } // Update this->updateLayout(fl, mesh, buffer, isFirstRepartition); this->solver_mp->setRhs(rho_m); } - bool balance(size_type totalP, const unsigned int nstep){ - if(std::strcmp(TestName,"UniformPlasmaTest") == 0) { + bool balance(size_type totalP, const unsigned int nstep) { + if (std::strcmp(TestName, "UniformPlasmaTest") == 0) { return (nstep % loadbalancefreq_m == 0); - } - else { + } else { int local = 0; std::vector res(Ippl::Comm->size()); - double equalPart = (double) totalP / Ippl::Comm->size(); - double dev = std::abs((double)this->getLocalNum() - equalPart) / totalP; + double equalPart = (double)totalP / Ippl::Comm->size(); + double dev = std::abs((double)this->getLocalNum() - equalPart) / totalP; if (dev > loadbalancethreshold_m) local = 1; MPI_Allgather(&local, 1, MPI_INT, res.data(), 1, MPI_INT, Ippl::getComm()); @@ -266,12 +246,10 @@ class ChargedParticles : public ippl::ParticleBase { void gatherStatistics(size_type totalP) { std::vector imb(Ippl::Comm->size()); - double equalPart = (double) totalP / Ippl::Comm->size(); - double dev = (std::abs((double)this->getLocalNum() - equalPart) - / totalP) * 100.0; - MPI_Gather(&dev, 1, MPI_DOUBLE, imb.data(), 1, MPI_DOUBLE, 0, - Ippl::getComm()); - + double equalPart = (double)totalP / Ippl::Comm->size(); + double dev = (std::abs((double)this->getLocalNum() - equalPart) / totalP) * 100.0; + MPI_Gather(&dev, 1, MPI_DOUBLE, imb.data(), 1, MPI_DOUBLE, 0, Ippl::getComm()); + if (Ippl::Comm->rank() == 0) { std::stringstream fname; fname << "data/LoadBalance_"; @@ -282,90 +260,79 @@ class ChargedParticles : public ippl::ParticleBase { csvout.precision(5); csvout.setf(std::ios::scientific, std::ios::floatfield); - if(time_m == 0.0) { + if (time_m == 0.0) { csvout << "time, rank, imbalance percentage" << endl; } - for(int r=0; r < Ippl::Comm->size(); ++r) { - csvout << time_m << " " - << r << " " - << imb[r] << endl; + for (int r = 0; r < Ippl::Comm->size(); ++r) { + csvout << time_m << " " << r << " " << imb[r] << endl; } } Ippl::Comm->barrier(); - } - void gatherCIC() { - - gather(this->E, E_m, this->R); - - } + void gatherCIC() { gather(this->E, E_m, this->R); } void scatterCIC(size_type totalP, unsigned int iteration, Vector_t& hrField) { + Inform m("scatter "); + rho_m = 0.0; + scatter(q, rho_m, this->R); - Inform m("scatter "); - - rho_m = 0.0; - scatter(q, rho_m, this->R); - - static IpplTimings::TimerRef sumTimer = IpplTimings::getTimer("Check"); - IpplTimings::startTimer(sumTimer); - double Q_grid = rho_m.sum(); + static IpplTimings::TimerRef sumTimer = IpplTimings::getTimer("Check"); + IpplTimings::startTimer(sumTimer); + double Q_grid = rho_m.sum(); - size_type Total_particles = 0; - size_type local_particles = this->getLocalNum(); + size_type Total_particles = 0; + size_type local_particles = this->getLocalNum(); - MPI_Reduce(&local_particles, &Total_particles, 1, - MPI_UNSIGNED_LONG, MPI_SUM, 0, Ippl::getComm()); + MPI_Reduce(&local_particles, &Total_particles, 1, MPI_UNSIGNED_LONG, MPI_SUM, 0, + Ippl::getComm()); - double rel_error = std::fabs((Q_m-Q_grid)/Q_m); - m << "Rel. error in charge conservation = " << rel_error << endl; + double rel_error = std::fabs((Q_m - Q_grid) / Q_m); + m << "Rel. error in charge conservation = " << rel_error << endl; - if(Ippl::Comm->rank() == 0) { - if(Total_particles != totalP || rel_error > 1e-10) { - m << "Time step: " << iteration << endl; - m << "Total particles in the sim. " << totalP - << " " << "after update: " - << Total_particles << endl; - m << "Rel. error in charge conservation: " - << rel_error << endl; - std::abort(); - } - } + if (Ippl::Comm->rank() == 0) { + if (Total_particles != totalP || rel_error > 1e-10) { + m << "Time step: " << iteration << endl; + m << "Total particles in the sim. " << totalP << " " + << "after update: " << Total_particles << endl; + m << "Rel. error in charge conservation: " << rel_error << endl; + std::abort(); + } + } - rho_m = rho_m / (hrField[0] * hrField[1] * hrField[2]); + rho_m = rho_m / (hrField[0] * hrField[1] * hrField[2]); - rhoNorm_m = norm(rho_m); - IpplTimings::stopTimer(sumTimer); + rhoNorm_m = norm(rho_m); + IpplTimings::stopTimer(sumTimer); - //dumpVTK(rho_m,nr_m[0],nr_m[1],nr_m[2],iteration,hrField[0],hrField[1],hrField[2]); + // dumpVTK(rho_m,nr_m[0],nr_m[1],nr_m[2],iteration,hrField[0],hrField[1],hrField[2]); - //rho = rho_e - rho_i - rho_m = rho_m - (Q_m/((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]))); + // rho = rho_e - rho_i + rho_m = + rho_m + - (Q_m / ((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]))); } void initSolver() { - Inform m("solver "); - if(stype_m == "FFT") + if (stype_m == "FFT") initFFTSolver(); else m << "No solver matches the argument" << endl; - } void initFFTSolver() { ippl::ParameterList sp; sp.add("output_type", Solver_t::GRAD); - sp.add("use_heffte_defaults", false); - sp.add("use_pencils", true); - sp.add("use_reorder", false); - sp.add("use_gpu_aware", true); - sp.add("comm", ippl::p2p_pl); - sp.add("r2c_direction", 0); + sp.add("use_heffte_defaults", false); + sp.add("use_pencils", true); + sp.add("use_reorder", false); + sp.add("use_gpu_aware", true); + sp.add("comm", ippl::p2p_pl); + sp.add("r2c_direction", 0); solver_mp = std::make_shared(); @@ -376,46 +343,41 @@ class ChargedParticles : public ippl::ParticleBase { solver_mp->setLhs(E_m); } - - - void dumpData() { - + void dumpData() { auto Pview = P.getView(); double Energy = 0.0; - Kokkos::parallel_reduce("Particle Energy", this->getLocalNum(), - KOKKOS_LAMBDA(const int i, double& valL){ - double myVal = dot(Pview(i), Pview(i)).apply(); - valL += myVal; - }, Kokkos::Sum(Energy)); + Kokkos::parallel_reduce( + "Particle Energy", this->getLocalNum(), + KOKKOS_LAMBDA(const int i, double& valL) { + double myVal = dot(Pview(i), Pview(i)).apply(); + valL += myVal; + }, + Kokkos::Sum(Energy)); Energy *= 0.5; double gEnergy = 0.0; - MPI_Reduce(&Energy, &gEnergy, 1, - MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); - + MPI_Reduce(&Energy, &gEnergy, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); const int nghostE = E_m.getNghost(); - auto Eview = E_m.getView(); + auto Eview = E_m.getView(); Vector_t normE; using mdrange_type = Kokkos::MDRangePolicy>; - for (unsigned d=0; d(temp)); + for (unsigned d = 0; d < Dim; ++d) { + double temp = 0.0; + Kokkos::parallel_reduce( + "Vector E reduce", + mdrange_type({nghostE, nghostE, nghostE}, + {Eview.extent(0) - nghostE, Eview.extent(1) - nghostE, + Eview.extent(2) - nghostE}), + KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k, double& valL) { + double myVal = std::pow(Eview(i, j, k)[d], 2); + valL += myVal; + }, + Kokkos::Sum(temp)); double globaltemp = 0.0; MPI_Reduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); normE[d] = std::sqrt(globaltemp); @@ -431,149 +393,130 @@ class ChargedParticles : public ippl::ParticleBase { csvout.precision(10); csvout.setf(std::ios::scientific, std::ios::floatfield); - if(time_m == 0.0) { + if (time_m == 0.0) { csvout << "time, Kinetic energy, Rho_norm2, Ex_norm2, Ey_norm2, Ez_norm2" << endl; } - csvout << time_m << " " - << gEnergy << " " - << rhoNorm_m << " " - << normE[0] << " " - << normE[1] << " " - << normE[2] << endl; + csvout << time_m << " " << gEnergy << " " << rhoNorm_m << " " << normE[0] << " " + << normE[1] << " " << normE[2] << endl; } Ippl::Comm->barrier(); - } - - void dumpLandau() { + } + void dumpLandau() { const int nghostE = E_m.getNghost(); - auto Eview = E_m.getView(); + auto Eview = E_m.getView(); double fieldEnergy, ExAmp; using mdrange_type = Kokkos::MDRangePolicy>; double temp = 0.0; - Kokkos::parallel_reduce("Ex inner product", - mdrange_type({nghostE, nghostE, nghostE}, - {Eview.extent(0) - nghostE, - Eview.extent(1) - nghostE, - Eview.extent(2) - nghostE}), - KOKKOS_LAMBDA(const size_t i, const size_t j, - const size_t k, double& valL) - { - double myVal = std::pow(Eview(i, j, k)[0], 2); - valL += myVal; - }, Kokkos::Sum(temp)); + Kokkos::parallel_reduce( + "Ex inner product", + mdrange_type( + {nghostE, nghostE, nghostE}, + {Eview.extent(0) - nghostE, Eview.extent(1) - nghostE, Eview.extent(2) - nghostE}), + KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k, double& valL) { + double myVal = std::pow(Eview(i, j, k)[0], 2); + valL += myVal; + }, + Kokkos::Sum(temp)); double globaltemp = 0.0; MPI_Reduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); fieldEnergy = globaltemp * hr_m[0] * hr_m[1] * hr_m[2]; double tempMax = 0.0; - Kokkos::parallel_reduce("Ex max norm", - mdrange_type({nghostE, nghostE, nghostE}, - {Eview.extent(0) - nghostE, - Eview.extent(1) - nghostE, - Eview.extent(2) - nghostE}), - KOKKOS_LAMBDA(const size_t i, const size_t j, - const size_t k, double& valL) - { - double myVal = std::fabs(Eview(i, j, k)[0]); - if(myVal > valL) valL = myVal; - }, Kokkos::Max(tempMax)); + Kokkos::parallel_reduce( + "Ex max norm", + mdrange_type( + {nghostE, nghostE, nghostE}, + {Eview.extent(0) - nghostE, Eview.extent(1) - nghostE, Eview.extent(2) - nghostE}), + KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k, double& valL) { + double myVal = std::fabs(Eview(i, j, k)[0]); + if (myVal > valL) + valL = myVal; + }, + Kokkos::Max(tempMax)); ExAmp = 0.0; MPI_Reduce(&tempMax, &ExAmp, 1, MPI_DOUBLE, MPI_MAX, 0, Ippl::getComm()); - if (Ippl::Comm->rank() == 0) { std::stringstream fname; fname << "data/FieldLandau_"; fname << Ippl::Comm->size(); fname << ".csv"; - Inform csvout(NULL, fname.str().c_str(), Inform::APPEND); csvout.precision(10); csvout.setf(std::ios::scientific, std::ios::floatfield); - if(time_m == 0.0) { + if (time_m == 0.0) { csvout << "time, Ex_field_energy, Ex_max_norm" << endl; } - csvout << time_m << " " - << fieldEnergy << " " - << ExAmp << endl; - + csvout << time_m << " " << fieldEnergy << " " << ExAmp << endl; } - + Ippl::Comm->barrier(); - } - - void dumpBumponTail() { + } + void dumpBumponTail() { const int nghostE = E_m.getNghost(); - auto Eview = E_m.getView(); + auto Eview = E_m.getView(); double fieldEnergy, EzAmp; using mdrange_type = Kokkos::MDRangePolicy>; double temp = 0.0; - Kokkos::parallel_reduce("Ex inner product", - mdrange_type({nghostE, nghostE, nghostE}, - {Eview.extent(0) - nghostE, - Eview.extent(1) - nghostE, - Eview.extent(2) - nghostE}), - KOKKOS_LAMBDA(const size_t i, const size_t j, - const size_t k, double& valL) - { - double myVal = std::pow(Eview(i, j, k)[2], 2); - valL += myVal; - }, Kokkos::Sum(temp)); + Kokkos::parallel_reduce( + "Ex inner product", + mdrange_type( + {nghostE, nghostE, nghostE}, + {Eview.extent(0) - nghostE, Eview.extent(1) - nghostE, Eview.extent(2) - nghostE}), + KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k, double& valL) { + double myVal = std::pow(Eview(i, j, k)[2], 2); + valL += myVal; + }, + Kokkos::Sum(temp)); double globaltemp = 0.0; MPI_Reduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); fieldEnergy = globaltemp * hr_m[0] * hr_m[1] * hr_m[2]; double tempMax = 0.0; - Kokkos::parallel_reduce("Ex max norm", - mdrange_type({nghostE, nghostE, nghostE}, - {Eview.extent(0) - nghostE, - Eview.extent(1) - nghostE, - Eview.extent(2) - nghostE}), - KOKKOS_LAMBDA(const size_t i, const size_t j, - const size_t k, double& valL) - { - double myVal = std::fabs(Eview(i, j, k)[2]); - if(myVal > valL) valL = myVal; - }, Kokkos::Max(tempMax)); + Kokkos::parallel_reduce( + "Ex max norm", + mdrange_type( + {nghostE, nghostE, nghostE}, + {Eview.extent(0) - nghostE, Eview.extent(1) - nghostE, Eview.extent(2) - nghostE}), + KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k, double& valL) { + double myVal = std::fabs(Eview(i, j, k)[2]); + if (myVal > valL) + valL = myVal; + }, + Kokkos::Max(tempMax)); EzAmp = 0.0; MPI_Reduce(&tempMax, &EzAmp, 1, MPI_DOUBLE, MPI_MAX, 0, Ippl::getComm()); - if (Ippl::Comm->rank() == 0) { std::stringstream fname; fname << "data/FieldBumponTail_"; fname << Ippl::Comm->size(); fname << ".csv"; - Inform csvout(NULL, fname.str().c_str(), Inform::APPEND); csvout.precision(10); csvout.setf(std::ios::scientific, std::ios::floatfield); - if(time_m == 0.0) { + if (time_m == 0.0) { csvout << "time, Ez_field_energy, Ez_max_norm" << endl; } - csvout << time_m << " " - << fieldEnergy << " " - << EzAmp << endl; - + csvout << time_m << " " << fieldEnergy << " " << EzAmp << endl; } - - Ippl::Comm->barrier(); - } - void dumpParticleData() { + Ippl::Comm->barrier(); + } + void dumpParticleData() { typename ParticleAttrib::HostMirror R_host = this->R.getHostMirror(); typename ParticleAttrib::HostMirror P_host = this->P.getHostMirror(); Kokkos::deep_copy(R_host, this->R.getView()); @@ -586,39 +529,31 @@ class ChargedParticles : public ippl::ParticleBase { pcsvout.precision(10); pcsvout.setf(std::ios::scientific, std::ios::floatfield); pcsvout << "R_x, R_y, R_z, V_x, V_y, V_z" << endl; - for (size_type i = 0; i< this->getLocalNum(); i++) { - pcsvout << R_host(i)[0] << " " - << R_host(i)[1] << " " - << R_host(i)[2] << " " - << P_host(i)[0] << " " - << P_host(i)[1] << " " - << P_host(i)[2] << endl; + for (size_type i = 0; i < this->getLocalNum(); i++) { + pcsvout << R_host(i)[0] << " " << R_host(i)[1] << " " << R_host(i)[2] << " " + << P_host(i)[0] << " " << P_host(i)[1] << " " << P_host(i)[2] << endl; } Ippl::Comm->barrier(); - } - - void dumpLocalDomains(const FieldLayout_t& fl, const unsigned int step) { + } + void dumpLocalDomains(const FieldLayout_t& fl, const unsigned int step) { if (Ippl::Comm->rank() == 0) { const typename FieldLayout_t::host_mirror_type domains = fl.getHostLocalDomains(); std::ofstream myfile; myfile.open("data/domains" + std::to_string(step) + ".txt"); for (unsigned int i = 0; i < domains.size(); ++i) { - myfile << domains[i][0].first() << " " << domains[i][1].first() << " " << domains[i][2].first() << " " - << domains[i][0].first() << " " << domains[i][1].last() << " " << domains[i][2].first() << " " - << domains[i][0].last() << " " << domains[i][1].first() << " " << domains[i][2].first() << " " - << domains[i][0].first() << " " << domains[i][1].first() << " " << domains[i][2].last() - << "\n"; + myfile << domains[i][0].first() << " " << domains[i][1].first() << " " + << domains[i][2].first() << " " << domains[i][0].first() << " " + << domains[i][1].last() << " " << domains[i][2].first() << " " + << domains[i][0].last() << " " << domains[i][1].first() << " " + << domains[i][2].first() << " " << domains[i][0].first() << " " + << domains[i][1].first() << " " << domains[i][2].last() << "\n"; } myfile.close(); } Ippl::Comm->barrier(); - } - -private: - void setBCAllPeriodic() { - - this->setParticleBC(ippl::BC::PERIODIC); } +private: + void setBCAllPeriodic() { this->setParticleBC(ippl::BC::PERIODIC); } }; diff --git a/alpine/LandauDamping.cpp b/alpine/LandauDamping.cpp index 26f8a3a88..068711ef4 100644 --- a/alpine/LandauDamping.cpp +++ b/alpine/LandauDamping.cpp @@ -9,7 +9,7 @@ // stype = Field solver type e.g., FFT // lbthres = Load balancing threshold i.e., lbthres*100 is the maximum load imbalance // percentage which can be tolerated and beyond which -// particle load balancing occurs. A value of 0.01 is good for many typical +// particle load balancing occurs. A value of 0.01 is good for many typical // simulations. // ovfactor = Over-allocation factor for the buffers used in the communication. Typical // values are 1.0, 2.0. Value 1.0 means no over-allocation. @@ -31,113 +31,108 @@ // along with IPPL. If not, see . // -#include "ChargedParticles.hpp" -#include -#include -#include +#include #include +#include #include -#include +#include +#include +#include "ChargedParticles.hpp" -#include +#include #include #include "Utility/IpplTimings.h" template struct Newton1D { + double tol = 1e-12; + int max_iter = 20; + double pi = std::acos(-1.0); - double tol = 1e-12; - int max_iter = 20; - double pi = std::acos(-1.0); - - T k, alpha, u; - - KOKKOS_INLINE_FUNCTION - Newton1D() {} - - KOKKOS_INLINE_FUNCTION - Newton1D(const T& k_, const T& alpha_, - const T& u_) - : k(k_), alpha(alpha_), u(u_) {} - - KOKKOS_INLINE_FUNCTION - ~Newton1D() {} - - KOKKOS_INLINE_FUNCTION - T f(T& x) { - T F; - F = x + (alpha * (std::sin(k * x) / k)) - u; - return F; - } - - KOKKOS_INLINE_FUNCTION - T fprime(T& x) { - T Fprime; - Fprime = 1 + (alpha * std::cos(k * x)); - return Fprime; - } - - KOKKOS_FUNCTION - void solve(T& x) { - int iterations = 0; - while (iterations < max_iter && std::fabs(f(x)) > tol) { - x = x - (f(x)/fprime(x)); - iterations += 1; - } - } -}; - - -template -struct generate_random { + T k, alpha, u; - using view_type = typename ippl::detail::ViewType::view_type; - using value_type = typename T::value_type; - // Output View for the random numbers - view_type x, v; + KOKKOS_INLINE_FUNCTION Newton1D() {} - // The GeneratorPool - GeneratorPool rand_pool; + KOKKOS_INLINE_FUNCTION Newton1D(const T& k_, const T& alpha_, const T& u_) + : k(k_) + , alpha(alpha_) + , u(u_) {} - value_type alpha; + KOKKOS_INLINE_FUNCTION ~Newton1D() {} - T k, minU, maxU; + KOKKOS_INLINE_FUNCTION T f(T& x) { + T F; + F = x + (alpha * (std::sin(k * x) / k)) - u; + return F; + } - // Initialize all members - generate_random(view_type x_, view_type v_, GeneratorPool rand_pool_, - value_type& alpha_, T& k_, T& minU_, T& maxU_) - : x(x_), v(v_), rand_pool(rand_pool_), - alpha(alpha_), k(k_), minU(minU_), maxU(maxU_) {} + KOKKOS_INLINE_FUNCTION T fprime(T& x) { + T Fprime; + Fprime = 1 + (alpha * std::cos(k * x)); + return Fprime; + } - KOKKOS_INLINE_FUNCTION - void operator()(const size_t i) const { - // Get a random number state from the pool for the active thread - typename GeneratorPool::generator_type rand_gen = rand_pool.get_state(); + KOKKOS_FUNCTION + void solve(T& x) { + int iterations = 0; + while (iterations < max_iter && std::fabs(f(x)) > tol) { + x = x - (f(x) / fprime(x)); + iterations += 1; + } + } +}; - value_type u; - for (unsigned d = 0; d < Dim; ++d) { +template +struct generate_random { + using view_type = typename ippl::detail::ViewType::view_type; + using value_type = typename T::value_type; + // Output View for the random numbers + view_type x, v; + + // The GeneratorPool + GeneratorPool rand_pool; + + value_type alpha; + + T k, minU, maxU; + + // Initialize all members + generate_random(view_type x_, view_type v_, GeneratorPool rand_pool_, value_type& alpha_, T& k_, + T& minU_, T& maxU_) + : x(x_) + , v(v_) + , rand_pool(rand_pool_) + , alpha(alpha_) + , k(k_) + , minU(minU_) + , maxU(maxU_) {} + + KOKKOS_INLINE_FUNCTION void operator()(const size_t i) const { + // Get a random number state from the pool for the active thread + typename GeneratorPool::generator_type rand_gen = rand_pool.get_state(); + + value_type u; + for (unsigned d = 0; d < Dim; ++d) { + u = rand_gen.drand(minU[d], maxU[d]); + x(i)[d] = u / (1 + alpha); + Newton1D solver(k[d], alpha, u); + solver.solve(x(i)[d]); + v(i)[d] = rand_gen.normal(0.0, 1.0); + } - u = rand_gen.drand(minU[d], maxU[d]); - x(i)[d] = u / (1 + alpha); - Newton1D solver(k[d], alpha, u); - solver.solve(x(i)[d]); - v(i)[d] = rand_gen.normal(0.0, 1.0); + // Give the state back, which will allow another thread to acquire it + rand_pool.free_state(rand_gen); } - - // Give the state back, which will allow another thread to acquire it - rand_pool.free_state(rand_gen); - } }; double CDF(const double& x, const double& alpha, const double& k) { - double cdf = x + (alpha / k) * std::sin(k * x); - return cdf; + double cdf = x + (alpha / k) * std::sin(k * x); + return cdf; } KOKKOS_FUNCTION -double PDF(const Vector_t& xvec, const double& alpha, - const Vector_t& kw, const unsigned Dim) { +double PDF(const Vector_t& xvec, const double& alpha, const Vector_t& kw, const unsigned Dim) { double pdf = 1.0; for (unsigned d = 0; d < Dim; ++d) { @@ -148,48 +143,40 @@ double PDF(const Vector_t& xvec, const double& alpha, const char* TestName = "LandauDamping"; -int main(int argc, char *argv[]){ +int main(int argc, char* argv[]) { Ippl ippl(argc, argv); - + Inform msg("LandauDamping"); - Inform msg2all("LandauDamping",INFORM_ALL_NODES); + Inform msg2all("LandauDamping", INFORM_ALL_NODES); Ippl::Comm->setDefaultOverallocation(std::atof(argv[8])); - auto start = std::chrono::high_resolution_clock::now(); - ippl::Vector nr = { - std::atoi(argv[1]), - std::atoi(argv[2]), - std::atoi(argv[3]) - }; - - static IpplTimings::TimerRef mainTimer = IpplTimings::getTimer("total"); - static IpplTimings::TimerRef particleCreation = IpplTimings::getTimer("particlesCreation"); - static IpplTimings::TimerRef dumpDataTimer = IpplTimings::getTimer("dumpData"); - static IpplTimings::TimerRef PTimer = IpplTimings::getTimer("pushVelocity"); - static IpplTimings::TimerRef RTimer = IpplTimings::getTimer("pushPosition"); - static IpplTimings::TimerRef updateTimer = IpplTimings::getTimer("update"); - static IpplTimings::TimerRef DummySolveTimer = IpplTimings::getTimer("solveWarmup"); - static IpplTimings::TimerRef SolveTimer = IpplTimings::getTimer("solve"); + auto start = std::chrono::high_resolution_clock::now(); + ippl::Vector nr = {std::atoi(argv[1]), std::atoi(argv[2]), std::atoi(argv[3])}; + + static IpplTimings::TimerRef mainTimer = IpplTimings::getTimer("total"); + static IpplTimings::TimerRef particleCreation = IpplTimings::getTimer("particlesCreation"); + static IpplTimings::TimerRef dumpDataTimer = IpplTimings::getTimer("dumpData"); + static IpplTimings::TimerRef PTimer = IpplTimings::getTimer("pushVelocity"); + static IpplTimings::TimerRef RTimer = IpplTimings::getTimer("pushPosition"); + static IpplTimings::TimerRef updateTimer = IpplTimings::getTimer("update"); + static IpplTimings::TimerRef DummySolveTimer = IpplTimings::getTimer("solveWarmup"); + static IpplTimings::TimerRef SolveTimer = IpplTimings::getTimer("solve"); static IpplTimings::TimerRef domainDecomposition = IpplTimings::getTimer("loadBalance"); IpplTimings::startTimer(mainTimer); const size_type totalP = std::atoll(argv[4]); - const unsigned int nt = std::atoi(argv[5]); + const unsigned int nt = std::atoi(argv[5]); - msg << "Landau damping" - << endl - << "nt " << nt << " Np= " - << totalP << " grid = " << nr - << endl; + msg << "Landau damping" << endl << "nt " << nt << " Np= " << totalP << " grid = " << nr << endl; using bunch_type = ChargedParticles; - std::unique_ptr P; + std::unique_ptr P; ippl::NDIndex domain; - for (unsigned i = 0; i< Dim; i++) { + for (unsigned i = 0; i < Dim; i++) { domain[i] = ippl::Index(nr[i]); } @@ -199,26 +186,26 @@ int main(int argc, char *argv[]){ } // create mesh and layout objects for this problem domain - Vector_t kw = {0.5, 0.5, 0.5}; + Vector_t kw = {0.5, 0.5, 0.5}; double alpha = 0.05; Vector_t rmin(0.0); - Vector_t rmax = 2 * pi / kw ; - double dx = rmax[0] / nr[0]; - double dy = rmax[1] / nr[1]; - double dz = rmax[2] / nr[2]; + Vector_t rmax = 2 * pi / kw; + double dx = rmax[0] / nr[0]; + double dy = rmax[1] / nr[1]; + double dz = rmax[2] / nr[2]; - Vector_t hr = {dx, dy, dz}; + Vector_t hr = {dx, dy, dz}; Vector_t origin = {rmin[0], rmin[1], rmin[2]}; - const double dt = 0.5*dx; + const double dt = 0.5 * dx; - const bool isAllPeriodic=true; + const bool isAllPeriodic = true; Mesh_t mesh(domain, hr, origin); FieldLayout_t FL(domain, decomp, isAllPeriodic); PLayout_t PL(FL, mesh); - //Q = -\int\int f dx dv + // Q = -\int\int f dx dv double Q = -rmax[0] * rmax[1] * rmax[2]; - P = std::make_unique(PL,hr,rmin,rmax,decomp,Q); + P = std::make_unique(PL, hr, rmin, rmax, decomp, Q); P->nr_m = nr; @@ -229,7 +216,7 @@ int main(int argc, char *argv[]){ P->stype_m = argv[6]; P->initSolver(); - P->time_m = 0.0; + P->time_m = 0.0; P->loadbalancethreshold_m = std::atof(argv[7]); bool isFirstRepartition; @@ -237,85 +224,80 @@ int main(int argc, char *argv[]){ if ((P->loadbalancethreshold_m != 1.0) && (Ippl::Comm->size() > 1)) { msg << "Starting first repartition" << endl; IpplTimings::startTimer(domainDecomposition); - isFirstRepartition = true; + isFirstRepartition = true; const ippl::NDIndex& lDom = FL.getLocalNDIndex(); - const int nghost = P->rho_m.getNghost(); - using mdrange_type = Kokkos::MDRangePolicy>; - auto rhoview = P->rho_m.getView(); - - Kokkos::parallel_for("Assign initial rho based on PDF", - mdrange_type({nghost, nghost, nghost}, - {rhoview.extent(0) - nghost, - rhoview.extent(1) - nghost, - rhoview.extent(2) - nghost}), - KOKKOS_LAMBDA(const int i, - const int j, - const int k) - { - //local to global index conversion - const size_t ig = i + lDom[0].first() - nghost; - const size_t jg = j + lDom[1].first() - nghost; - const size_t kg = k + lDom[2].first() - nghost; - double x = (ig + 0.5) * hr[0] + origin[0]; - double y = (jg + 0.5) * hr[1] + origin[1]; - double z = (kg + 0.5) * hr[2] + origin[2]; - - Vector_t xvec = {x, y, z}; - - rhoview(i, j, k) = PDF(xvec, alpha, kw, Dim); - - }); + const int nghost = P->rho_m.getNghost(); + using mdrange_type = Kokkos::MDRangePolicy>; + auto rhoview = P->rho_m.getView(); + + Kokkos::parallel_for( + "Assign initial rho based on PDF", + mdrange_type({nghost, nghost, nghost}, + {rhoview.extent(0) - nghost, rhoview.extent(1) - nghost, + rhoview.extent(2) - nghost}), + KOKKOS_LAMBDA(const int i, const int j, const int k) { + // local to global index conversion + const size_t ig = i + lDom[0].first() - nghost; + const size_t jg = j + lDom[1].first() - nghost; + const size_t kg = k + lDom[2].first() - nghost; + double x = (ig + 0.5) * hr[0] + origin[0]; + double y = (jg + 0.5) * hr[1] + origin[1]; + double z = (kg + 0.5) * hr[2] + origin[2]; + + Vector_t xvec = {x, y, z}; + + rhoview(i, j, k) = PDF(xvec, alpha, kw, Dim); + }); Kokkos::fence(); - + P->initializeORB(FL, mesh); P->repartition(FL, mesh, bunchBuffer, isFirstRepartition); IpplTimings::stopTimer(domainDecomposition); } - + msg << "First domain decomposition done" << endl; IpplTimings::startTimer(particleCreation); typedef ippl::detail::RegionLayout RegionLayout_t; - const RegionLayout_t& RLayout = PL.getRegionLayout(); + const RegionLayout_t& RLayout = PL.getRegionLayout(); const typename RegionLayout_t::host_mirror_type Regions = RLayout.gethLocalRegions(); Vector_t Nr, Dr, minU, maxU; int myRank = Ippl::Comm->rank(); - for (unsigned d = 0; d rank() < rest ) + if (Ippl::Comm->rank() < rest) ++nloc; P->create(nloc); - Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42 + 100*Ippl::Comm->rank())); + Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42 + 100 * Ippl::Comm->rank())); Kokkos::parallel_for(nloc, generate_random, Dim>( - P->R.getView(), P->P.getView(), rand_pool64, alpha, kw, minU, maxU)); + P->R.getView(), P->P.getView(), rand_pool64, alpha, kw, minU, maxU)); Kokkos::fence(); Ippl::Comm->barrier(); - IpplTimings::stopTimer(particleCreation); - - P->q = P->Q_m/totalP; + IpplTimings::stopTimer(particleCreation); + + P->q = P->Q_m / totalP; msg << "particles created and initial conditions assigned " << endl; isFirstRepartition = false; - //The update after the particle creation is not needed as the - //particles are generated locally + // The update after the particle creation is not needed as the + // particles are generated locally IpplTimings::startTimer(DummySolveTimer); P->rho_m = 0.0; @@ -333,13 +315,12 @@ int main(int argc, char *argv[]){ IpplTimings::startTimer(dumpDataTimer); P->dumpLandau(); P->gatherStatistics(totalP); - //P->dumpLocalDomains(FL, 0); + // P->dumpLocalDomains(FL, 0); IpplTimings::stopTimer(dumpDataTimer); // begin main timestep loop msg << "Starting iterations ..." << endl; - for (unsigned int it=0; itP = P->P - 0.5 * dt * P->E; IpplTimings::stopTimer(PTimer); - //drift + // drift IpplTimings::startTimer(RTimer); P->R = P->R + dt * P->P; IpplTimings::stopTimer(RTimer); - //Since the particles have moved spatially update them to correct processors - IpplTimings::startTimer(updateTimer); + // Since the particles have moved spatially update them to correct processors + IpplTimings::startTimer(updateTimer); PL.update(*P, bunchBuffer); IpplTimings::stopTimer(updateTimer); // Domain Decomposition - if (P->balance(totalP, it+1)) { - msg << "Starting repartition" << endl; - IpplTimings::startTimer(domainDecomposition); - P->repartition(FL, mesh, bunchBuffer, isFirstRepartition); - IpplTimings::stopTimer(domainDecomposition); - //IpplTimings::startTimer(dumpDataTimer); - //P->dumpLocalDomains(FL, it+1); - //IpplTimings::stopTimer(dumpDataTimer); + if (P->balance(totalP, it + 1)) { + msg << "Starting repartition" << endl; + IpplTimings::startTimer(domainDecomposition); + P->repartition(FL, mesh, bunchBuffer, isFirstRepartition); + IpplTimings::stopTimer(domainDecomposition); + // IpplTimings::startTimer(dumpDataTimer); + // P->dumpLocalDomains(FL, it+1); + // IpplTimings::stopTimer(dumpDataTimer); } + // scatter the charge onto the underlying grid + P->scatterCIC(totalP, it + 1, hr); - //scatter the charge onto the underlying grid - P->scatterCIC(totalP, it+1, hr); - - //Field solve + // Field solve IpplTimings::startTimer(SolveTimer); P->solver_mp->solve(); IpplTimings::stopTimer(SolveTimer); @@ -383,7 +363,7 @@ int main(int argc, char *argv[]){ // gather E field P->gatherCIC(); - //kick + // kick IpplTimings::startTimer(PTimer); P->P = P->P - 0.5 * dt * P->E; IpplTimings::stopTimer(PTimer); @@ -393,7 +373,7 @@ int main(int argc, char *argv[]){ P->dumpLandau(); P->gatherStatistics(totalP); IpplTimings::stopTimer(dumpDataTimer); - msg << "Finished time step: " << it+1 << " time: " << P->time_m << endl; + msg << "Finished time step: " << it + 1 << " time: " << P->time_m << endl; } msg << "LandauDamping: End." << endl; @@ -402,9 +382,9 @@ int main(int argc, char *argv[]){ IpplTimings::print(std::string("timing.dat")); auto end = std::chrono::high_resolution_clock::now(); - std::chrono::duration time_chrono = std::chrono::duration_cast>(end - start); + std::chrono::duration time_chrono = + std::chrono::duration_cast>(end - start); std::cout << "Elapsed time: " << time_chrono.count() << std::endl; - return 0; } diff --git a/alpine/PenningTrap.cpp b/alpine/PenningTrap.cpp index 87a9a6534..492cb0f76 100644 --- a/alpine/PenningTrap.cpp +++ b/alpine/PenningTrap.cpp @@ -9,14 +9,14 @@ // stype = Field solver type e.g., FFT // lbthres = Load balancing threshold i.e., lbthres*100 is the maximum load imbalance // percentage which can be tolerated and beyond which -// particle load balancing occurs. A value of 0.01 is good for many typical +// particle load balancing occurs. A value of 0.01 is good for many typical // simulations. // ovfactor = Over-allocation factor for the buffers used in the communication. Typical // values are 1.0, 2.0. Value 1.0 means no over-allocation. // Example: // srun ./PenningTrap 128 128 128 10000 300 FFT 0.01 1.0 --info 10 // -// Copyright (c) 2021, Sriramkrishnan Muralikrishnan, +// Copyright (c) 2021, Sriramkrishnan Muralikrishnan, // Paul Scherrer Institut, Villigen PSI, Switzerland // All rights reserved // @@ -32,170 +32,152 @@ // #include "ChargedParticles.hpp" -#include -#include -#include +#include #include +#include #include -#include +#include +#include -#include +#include #include #include "Utility/IpplTimings.h" template struct Newton1D { + double tol = 1e-12; + int max_iter = 20; + double pi = std::acos(-1.0); - double tol = 1e-12; - int max_iter = 20; - double pi = std::acos(-1.0); - - T mu, sigma, u; - - KOKKOS_INLINE_FUNCTION - Newton1D() {} - - KOKKOS_INLINE_FUNCTION - Newton1D(const T& mu_, const T& sigma_, - const T& u_) - : mu(mu_), sigma(sigma_), u(u_) {} - - KOKKOS_INLINE_FUNCTION - ~Newton1D() {} - - KOKKOS_INLINE_FUNCTION - T f(T& x) { - T F; - F = std::erf((x - mu)/(sigma * std::sqrt(2.0))) - - 2 * u + 1; - return F; - } - - KOKKOS_INLINE_FUNCTION - T fprime(T& x) { - T Fprime; - Fprime = (1 / sigma) * std::sqrt(2 / pi) * - std::exp(-0.5 * (std::pow(((x - mu) / sigma),2))); - return Fprime; - } - - KOKKOS_FUNCTION - void solve(T& x) { - int iterations = 0; - while ((iterations < max_iter) && (std::fabs(f(x)) > tol)) { - x = x - (f(x)/fprime(x)); - iterations += 1; - } - } -}; + T mu, sigma, u; + KOKKOS_INLINE_FUNCTION Newton1D() {} -template -struct generate_random { + KOKKOS_INLINE_FUNCTION Newton1D(const T& mu_, const T& sigma_, const T& u_) + : mu(mu_) + , sigma(sigma_) + , u(u_) {} - using view_type = typename ippl::detail::ViewType::view_type; - using value_type = typename T::value_type; - // Output View for the random numbers - view_type x, v; + KOKKOS_INLINE_FUNCTION ~Newton1D() {} - // The GeneratorPool - GeneratorPool rand_pool; + KOKKOS_INLINE_FUNCTION T f(T& x) { + T F; + F = std::erf((x - mu) / (sigma * std::sqrt(2.0))) - 2 * u + 1; + return F; + } - T mu, sigma, minU, maxU; + KOKKOS_INLINE_FUNCTION T fprime(T& x) { + T Fprime; + Fprime = + (1 / sigma) * std::sqrt(2 / pi) * std::exp(-0.5 * (std::pow(((x - mu) / sigma), 2))); + return Fprime; + } - double pi = std::acos(-1.0); + KOKKOS_FUNCTION + void solve(T& x) { + int iterations = 0; + while ((iterations < max_iter) && (std::fabs(f(x)) > tol)) { + x = x - (f(x) / fprime(x)); + iterations += 1; + } + } +}; - // Initialize all members - generate_random(view_type x_, view_type v_, GeneratorPool rand_pool_, - T& mu_, T& sigma_, T& minU_, T& maxU_) - : x(x_), v(v_), rand_pool(rand_pool_), - mu(mu_), sigma(sigma_), minU(minU_), maxU(maxU_) {} +template +struct generate_random { + using view_type = typename ippl::detail::ViewType::view_type; + using value_type = typename T::value_type; + // Output View for the random numbers + view_type x, v; - KOKKOS_INLINE_FUNCTION - void operator()(const size_t i) const { - // Get a random number state from the pool for the active thread - typename GeneratorPool::generator_type rand_gen = rand_pool.get_state(); + // The GeneratorPool + GeneratorPool rand_pool; - value_type u; - for (unsigned d = 0; d < Dim; ++d) { - u = rand_gen.drand(minU[d], maxU[d]); - x(i)[d] = (std::sqrt(pi / 2) * (2 * u - 1)) * - sigma[d] + mu[d]; - Newton1D solver(mu[d], sigma[d], u); - solver.solve(x(i)[d]); - v(i)[d] = rand_gen.normal(0.0, 1.0); - } + T mu, sigma, minU, maxU; - // Give the state back, which will allow another thread to acquire it - rand_pool.free_state(rand_gen); - } + double pi = std::acos(-1.0); + + // Initialize all members + generate_random(view_type x_, view_type v_, GeneratorPool rand_pool_, T& mu_, T& sigma_, + T& minU_, T& maxU_) + : x(x_) + , v(v_) + , rand_pool(rand_pool_) + , mu(mu_) + , sigma(sigma_) + , minU(minU_) + , maxU(maxU_) {} + + KOKKOS_INLINE_FUNCTION void operator()(const size_t i) const { + // Get a random number state from the pool for the active thread + typename GeneratorPool::generator_type rand_gen = rand_pool.get_state(); + + value_type u; + for (unsigned d = 0; d < Dim; ++d) { + u = rand_gen.drand(minU[d], maxU[d]); + x(i)[d] = (std::sqrt(pi / 2) * (2 * u - 1)) * sigma[d] + mu[d]; + Newton1D solver(mu[d], sigma[d], u); + solver.solve(x(i)[d]); + v(i)[d] = rand_gen.normal(0.0, 1.0); + } + + // Give the state back, which will allow another thread to acquire it + rand_pool.free_state(rand_gen); + } }; double CDF(const double& x, const double& mu, const double& sigma) { - double cdf = 0.5 * (1.0 + std::erf((x - mu)/(sigma * std::sqrt(2)))); - return cdf; + double cdf = 0.5 * (1.0 + std::erf((x - mu) / (sigma * std::sqrt(2)))); + return cdf; } - KOKKOS_FUNCTION -double PDF(const Vector_t& xvec, const Vector_t&mu, - const Vector_t& sigma, const unsigned Dim) { +double PDF(const Vector_t& xvec, const Vector_t& mu, const Vector_t& sigma, const unsigned Dim) { double pdf = 1.0; - double pi = std::acos(-1.0); + double pi = std::acos(-1.0); for (unsigned d = 0; d < Dim; ++d) { - pdf *= (1.0/ (sigma[d] * std::sqrt(2 * pi))) * - std::exp(-0.5 * std::pow((xvec[d] - mu[d])/sigma[d],2)); + pdf *= (1.0 / (sigma[d] * std::sqrt(2 * pi))) + * std::exp(-0.5 * std::pow((xvec[d] - mu[d]) / sigma[d], 2)); } return pdf; } const char* TestName = "PenningTrap"; -int main(int argc, char *argv[]){ +int main(int argc, char* argv[]) { Ippl ippl(argc, argv); Inform msg("PenningTrap"); - Inform msg2all("PenningTrap",INFORM_ALL_NODES); - + Inform msg2all("PenningTrap", INFORM_ALL_NODES); Ippl::Comm->setDefaultOverallocation(std::atof(argv[8])); - auto start = std::chrono::high_resolution_clock::now(); - ippl::Vector nr = { - std::atoi(argv[1]), - std::atoi(argv[2]), - std::atoi(argv[3]) - }; - - static IpplTimings::TimerRef mainTimer = IpplTimings::getTimer("total"); - static IpplTimings::TimerRef particleCreation = IpplTimings::getTimer("particlesCreation"); - static IpplTimings::TimerRef dumpDataTimer = IpplTimings::getTimer("dumpData"); - static IpplTimings::TimerRef PTimer = IpplTimings::getTimer("pushVelocity"); - static IpplTimings::TimerRef RTimer = IpplTimings::getTimer("pushPosition"); - static IpplTimings::TimerRef updateTimer = IpplTimings::getTimer("update"); - static IpplTimings::TimerRef DummySolveTimer = IpplTimings::getTimer("solveWarmup"); - static IpplTimings::TimerRef SolveTimer = IpplTimings::getTimer("Solve"); + auto start = std::chrono::high_resolution_clock::now(); + ippl::Vector nr = {std::atoi(argv[1]), std::atoi(argv[2]), std::atoi(argv[3])}; + + static IpplTimings::TimerRef mainTimer = IpplTimings::getTimer("total"); + static IpplTimings::TimerRef particleCreation = IpplTimings::getTimer("particlesCreation"); + static IpplTimings::TimerRef dumpDataTimer = IpplTimings::getTimer("dumpData"); + static IpplTimings::TimerRef PTimer = IpplTimings::getTimer("pushVelocity"); + static IpplTimings::TimerRef RTimer = IpplTimings::getTimer("pushPosition"); + static IpplTimings::TimerRef updateTimer = IpplTimings::getTimer("update"); + static IpplTimings::TimerRef DummySolveTimer = IpplTimings::getTimer("solveWarmup"); + static IpplTimings::TimerRef SolveTimer = IpplTimings::getTimer("Solve"); static IpplTimings::TimerRef domainDecomposition = IpplTimings::getTimer("loadBalance"); - - + IpplTimings::startTimer(mainTimer); - - size_type totalP = std::atol(argv[4]); - const unsigned int nt = std::atoi(argv[5]); - msg << "Penning Trap " - << endl - << "nt " << nt << " Np= " - << totalP << " grid = " << nr - << endl; + size_type totalP = std::atol(argv[4]); + const unsigned int nt = std::atoi(argv[5]); + msg << "Penning Trap " << endl << "nt " << nt << " Np= " << totalP << " grid = " << nr << endl; using bunch_type = ChargedParticles; - std::unique_ptr P; + std::unique_ptr P; ippl::NDIndex domain; - for (unsigned i = 0; i< Dim; i++) { + for (unsigned i = 0; i < Dim; i++) { domain[i] = ippl::Index(nr[i]); } @@ -211,137 +193,129 @@ int main(int argc, char *argv[]){ double dy = rmax[1] / nr[1]; double dz = rmax[2] / nr[2]; - Vector_t hr = {dx, dy, dz}; - Vector_t origin = {rmin[0], rmin[1], rmin[2]}; - unsigned int nrMax = 2048;// Max grid size in our studies - double dxFinest = rmax[0] / nrMax; - const double dt = 0.5 * dxFinest;//size of timestep + Vector_t hr = {dx, dy, dz}; + Vector_t origin = {rmin[0], rmin[1], rmin[2]}; + unsigned int nrMax = 2048; // Max grid size in our studies + double dxFinest = rmax[0] / nrMax; + const double dt = 0.5 * dxFinest; // size of timestep - const bool isAllPeriodic=true; + const bool isAllPeriodic = true; Mesh_t mesh(domain, hr, origin); FieldLayout_t FL(domain, decomp, isAllPeriodic); PLayout_t PL(FL, mesh); - double Q = -1562.5; + double Q = -1562.5; double Bext = 5.0; - P = std::make_unique(PL,hr,rmin,rmax,decomp,Q); + P = std::make_unique(PL, hr, rmin, rmax, decomp, Q); P->nr_m = nr; - Vector_t length = rmax - rmin; Vector_t mu, sd; - for (unsigned d = 0; dE_m.initialize(mesh, FL); P->rho_m.initialize(mesh, FL); bunch_type bunchBuffer(PL); - P->stype_m = argv[6]; P->initSolver(); - P->time_m = 0.0; + P->time_m = 0.0; P->loadbalancethreshold_m = std::atof(argv[7]); bool isFirstRepartition; - + if ((P->loadbalancethreshold_m != 1.0) && (Ippl::Comm->size() > 1)) { msg << "Starting first repartition" << endl; IpplTimings::startTimer(domainDecomposition); - isFirstRepartition = true; + isFirstRepartition = true; const ippl::NDIndex& lDom = FL.getLocalNDIndex(); - const int nghost = P->rho_m.getNghost(); - using mdrange_type = Kokkos::MDRangePolicy>; - auto rhoview = P->rho_m.getView(); - - Kokkos::parallel_for("Assign initial rho based on PDF", - mdrange_type({nghost, nghost, nghost}, - {rhoview.extent(0) - nghost, - rhoview.extent(1) - nghost, - rhoview.extent(2) - nghost}), - KOKKOS_LAMBDA(const int i, - const int j, - const int k) - { - //local to global index conversion - const size_t ig = i + lDom[0].first() - nghost; - const size_t jg = j + lDom[1].first() - nghost; - const size_t kg = k + lDom[2].first() - nghost; - double x = (ig + 0.5) * hr[0] + origin[0]; - double y = (jg + 0.5) * hr[1] + origin[1]; - double z = (kg + 0.5) * hr[2] + origin[2]; - - Vector_t xvec = {x, y, z}; - - rhoview(i, j, k) = PDF(xvec, mu, sd, Dim); - - }); + const int nghost = P->rho_m.getNghost(); + using mdrange_type = Kokkos::MDRangePolicy>; + auto rhoview = P->rho_m.getView(); + + Kokkos::parallel_for( + "Assign initial rho based on PDF", + mdrange_type({nghost, nghost, nghost}, + {rhoview.extent(0) - nghost, rhoview.extent(1) - nghost, + rhoview.extent(2) - nghost}), + KOKKOS_LAMBDA(const int i, const int j, const int k) { + // local to global index conversion + const size_t ig = i + lDom[0].first() - nghost; + const size_t jg = j + lDom[1].first() - nghost; + const size_t kg = k + lDom[2].first() - nghost; + double x = (ig + 0.5) * hr[0] + origin[0]; + double y = (jg + 0.5) * hr[1] + origin[1]; + double z = (kg + 0.5) * hr[2] + origin[2]; + + Vector_t xvec = {x, y, z}; + + rhoview(i, j, k) = PDF(xvec, mu, sd, Dim); + }); Kokkos::fence(); - + P->initializeORB(FL, mesh); P->repartition(FL, mesh, bunchBuffer, isFirstRepartition); IpplTimings::stopTimer(domainDecomposition); } - msg << "First domain decomposition done" << endl; IpplTimings::startTimer(particleCreation); typedef ippl::detail::RegionLayout RegionLayout_t; - const RegionLayout_t& RLayout = PL.getRegionLayout(); + const RegionLayout_t& RLayout = PL.getRegionLayout(); const typename RegionLayout_t::host_mirror_type Regions = RLayout.gethLocalRegions(); Vector_t Nr, Dr, minU, maxU; int myRank = Ippl::Comm->rank(); - for (unsigned d = 0; d rank() < rest ) + if (Ippl::Comm->rank() < rest) ++nloc; P->create(nloc); - Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42 + 100*Ippl::Comm->rank())); + Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42 + 100 * Ippl::Comm->rank())); Kokkos::parallel_for(nloc, generate_random, Dim>( - P->R.getView(), P->P.getView(), rand_pool64, mu, sd, minU, maxU)); + P->R.getView(), P->P.getView(), rand_pool64, mu, sd, minU, maxU)); Kokkos::fence(); Ippl::Comm->barrier(); - IpplTimings::stopTimer(particleCreation); - - P->q = P->Q_m/totalP; + IpplTimings::stopTimer(particleCreation); + + P->q = P->Q_m / totalP; msg << "particles created and initial conditions assigned " << endl; isFirstRepartition = false; - //The update after the particle creation is not needed as the - //particles are generated locally - + // The update after the particle creation is not needed as the + // particles are generated locally + IpplTimings::startTimer(DummySolveTimer); P->rho_m = 0.0; P->solver_mp->solve(); IpplTimings::stopTimer(DummySolveTimer); - + P->scatterCIC(totalP, 0, hr); IpplTimings::startTimer(SolveTimer); @@ -353,16 +327,15 @@ int main(int argc, char *argv[]){ IpplTimings::startTimer(dumpDataTimer); P->dumpData(); P->gatherStatistics(totalP); - //P->dumpLocalDomains(FL, 0); + // P->dumpLocalDomains(FL, 0); IpplTimings::stopTimer(dumpDataTimer); double alpha = -0.5 * dt; - double DrInv = 1.0 / (1 + (std::pow((alpha * Bext), 2))); + double DrInv = 1.0 / (1 + (std::pow((alpha * Bext), 2))); // begin main timestep loop msg << "Starting iterations ..." << endl; - for (unsigned int it=0; itR.getView(); auto Pview = P->P.getView(); auto Eview = P->E.getView(); - double V0 = 30*rmax[2]; - Kokkos::parallel_for("Kick1", P->getLocalNum(), - KOKKOS_LAMBDA(const size_t j){ - double Eext_x = -(Rview(j)[0] - 0.5*rmax[0]) * (V0/(2*std::pow(rmax[2],2))); - double Eext_y = -(Rview(j)[1] - 0.5*rmax[1]) * (V0/(2*std::pow(rmax[2],2))); - double Eext_z = (Rview(j)[2] - 0.5*rmax[2]) * (V0/(std::pow(rmax[2],2))); - - Eview(j)[0] += Eext_x; - Eview(j)[1] += Eext_y; - Eview(j)[2] += Eext_z; - - Pview(j)[0] += alpha * (Eview(j)[0] + Pview(j)[1] * Bext); - Pview(j)[1] += alpha * (Eview(j)[1] - Pview(j)[0] * Bext); - Pview(j)[2] += alpha * Eview(j)[2]; - }); + double V0 = 30 * rmax[2]; + Kokkos::parallel_for( + "Kick1", P->getLocalNum(), KOKKOS_LAMBDA(const size_t j) { + double Eext_x = -(Rview(j)[0] - 0.5 * rmax[0]) * (V0 / (2 * std::pow(rmax[2], 2))); + double Eext_y = -(Rview(j)[1] - 0.5 * rmax[1]) * (V0 / (2 * std::pow(rmax[2], 2))); + double Eext_z = (Rview(j)[2] - 0.5 * rmax[2]) * (V0 / (std::pow(rmax[2], 2))); + + Eview(j)[0] += Eext_x; + Eview(j)[1] += Eext_y; + Eview(j)[2] += Eext_z; + + Pview(j)[0] += alpha * (Eview(j)[0] + Pview(j)[1] * Bext); + Pview(j)[1] += alpha * (Eview(j)[1] - Pview(j)[0] * Bext); + Pview(j)[2] += alpha * Eview(j)[2]; + }); IpplTimings::stopTimer(PTimer); - //drift + // drift IpplTimings::startTimer(RTimer); P->R = P->R + dt * P->P; IpplTimings::stopTimer(RTimer); - //Since the particles have moved spatially update them to correct processors - IpplTimings::startTimer(updateTimer); + // Since the particles have moved spatially update them to correct processors + IpplTimings::startTimer(updateTimer); PL.update(*P, bunchBuffer); IpplTimings::stopTimer(updateTimer); // Domain Decomposition - if (P->balance(totalP, it+1)) { - msg << "Starting repartition" << endl; - IpplTimings::startTimer(domainDecomposition); - P->repartition(FL, mesh, bunchBuffer, isFirstRepartition); - IpplTimings::stopTimer(domainDecomposition); - //IpplTimings::startTimer(dumpDataTimer); - //P->dumpLocalDomains(FL, it+1); - //IpplTimings::stopTimer(dumpDataTimer); + if (P->balance(totalP, it + 1)) { + msg << "Starting repartition" << endl; + IpplTimings::startTimer(domainDecomposition); + P->repartition(FL, mesh, bunchBuffer, isFirstRepartition); + IpplTimings::stopTimer(domainDecomposition); + // IpplTimings::startTimer(dumpDataTimer); + // P->dumpLocalDomains(FL, it+1); + // IpplTimings::stopTimer(dumpDataTimer); } - - //scatter the charge onto the underlying grid - P->scatterCIC(totalP, it+1, hr); - //Field solve + // scatter the charge onto the underlying grid + P->scatterCIC(totalP, it + 1, hr); + + // Field solve IpplTimings::startTimer(SolveTimer); P->solver_mp->solve(); IpplTimings::stopTimer(SolveTimer); @@ -423,26 +396,32 @@ int main(int argc, char *argv[]){ // gather E field P->gatherCIC(); - //kick + // kick IpplTimings::startTimer(PTimer); auto R2view = P->R.getView(); auto P2view = P->P.getView(); auto E2view = P->E.getView(); - Kokkos::parallel_for("Kick2", P->getLocalNum(), - KOKKOS_LAMBDA(const size_t j){ - double Eext_x = -(R2view(j)[0] - 0.5*rmax[0]) * (V0/(2*std::pow(rmax[2],2))); - double Eext_y = -(R2view(j)[1] - 0.5*rmax[1]) * (V0/(2*std::pow(rmax[2],2))); - double Eext_z = (R2view(j)[2] - 0.5*rmax[2]) * (V0/(std::pow(rmax[2],2))); - - E2view(j)[0] += Eext_x; - E2view(j)[1] += Eext_y; - E2view(j)[2] += Eext_z; - P2view(j)[0] = DrInv * ( P2view(j)[0] + alpha * (E2view(j)[0] - + P2view(j)[1] * Bext + alpha * Bext * E2view(j)[1]) ); - P2view(j)[1] = DrInv * ( P2view(j)[1] + alpha * (E2view(j)[1] - - P2view(j)[0] * Bext - alpha * Bext * E2view(j)[0]) ); - P2view(j)[2] += alpha * E2view(j)[2]; - }); + Kokkos::parallel_for( + "Kick2", P->getLocalNum(), KOKKOS_LAMBDA(const size_t j) { + double Eext_x = -(R2view(j)[0] - 0.5 * rmax[0]) * (V0 / (2 * std::pow(rmax[2], 2))); + double Eext_y = -(R2view(j)[1] - 0.5 * rmax[1]) * (V0 / (2 * std::pow(rmax[2], 2))); + double Eext_z = (R2view(j)[2] - 0.5 * rmax[2]) * (V0 / (std::pow(rmax[2], 2))); + + E2view(j)[0] += Eext_x; + E2view(j)[1] += Eext_y; + E2view(j)[2] += Eext_z; + P2view(j)[0] = + DrInv + * (P2view(j)[0] + + alpha + * (E2view(j)[0] + P2view(j)[1] * Bext + alpha * Bext * E2view(j)[1])); + P2view(j)[1] = + DrInv + * (P2view(j)[1] + + alpha + * (E2view(j)[1] - P2view(j)[0] * Bext - alpha * Bext * E2view(j)[0])); + P2view(j)[2] += alpha * E2view(j)[2]; + }); IpplTimings::stopTimer(PTimer); P->time_m += dt; @@ -450,7 +429,7 @@ int main(int argc, char *argv[]){ P->dumpData(); P->gatherStatistics(totalP); IpplTimings::stopTimer(dumpDataTimer); - msg << "Finished time step: " << it+1 << " time: " << P->time_m << endl; + msg << "Finished time step: " << it + 1 << " time: " << P->time_m << endl; } msg << "Penning Trap: End." << endl; @@ -459,7 +438,8 @@ int main(int argc, char *argv[]){ IpplTimings::print(std::string("timing.dat")); auto end = std::chrono::high_resolution_clock::now(); - std::chrono::duration time_chrono = std::chrono::duration_cast>(end - start); + std::chrono::duration time_chrono = + std::chrono::duration_cast>(end - start); std::cout << "Elapsed time: " << time_chrono.count() << std::endl; return 0; diff --git a/alpine/UniformPlasmaTest.cpp b/alpine/UniformPlasmaTest.cpp index 62924052d..5383dbc1d 100644 --- a/alpine/UniformPlasmaTest.cpp +++ b/alpine/UniformPlasmaTest.cpp @@ -30,13 +30,13 @@ // #include "ChargedParticles.hpp" -#include -#include +#include #include #include -#include +#include +#include -#include +#include #include #include "Utility/IpplTimings.h" @@ -45,79 +45,71 @@ const char* TestName = "UniformPlasmaTest"; template struct generate_random { + using view_type = typename ippl::detail::ViewType::view_type; + // Output View for the random numbers + view_type vals; - using view_type = typename ippl::detail::ViewType::view_type; - // Output View for the random numbers - view_type vals; + // The GeneratorPool + GeneratorPool rand_pool; - // The GeneratorPool - GeneratorPool rand_pool; + T start, end; - T start, end; + // Initialize all members + generate_random(view_type vals_, GeneratorPool rand_pool_, T start_, T end_) + : vals(vals_) + , rand_pool(rand_pool_) + , start(start_) + , end(end_) {} - // Initialize all members - generate_random(view_type vals_, GeneratorPool rand_pool_, - T start_, T end_) - : vals(vals_), rand_pool(rand_pool_), - start(start_), end(end_) {} + KOKKOS_INLINE_FUNCTION void operator()(const size_t i) const { + // Get a random number state from the pool for the active thread + typename GeneratorPool::generator_type rand_gen = rand_pool.get_state(); - KOKKOS_INLINE_FUNCTION - void operator()(const size_t i) const { - // Get a random number state from the pool for the active thread - typename GeneratorPool::generator_type rand_gen = rand_pool.get_state(); + // Draw samples numbers from the pool as double in the range [start, end) + for (unsigned d = 0; d < Dim; ++d) { + vals(i)[d] = rand_gen.drand(start[d], end[d]); + } - // Draw samples numbers from the pool as double in the range [start, end) - for (unsigned d = 0; d < Dim; ++d) { - vals(i)[d] = rand_gen.drand(start[d], end[d]); + // Give the state back, which will allow another thread to acquire it + rand_pool.free_state(rand_gen); } - - // Give the state back, which will allow another thread to acquire it - rand_pool.free_state(rand_gen); - } }; -int main(int argc, char *argv[]){ +int main(int argc, char* argv[]) { Ippl ippl(argc, argv); Inform msg("UniformPlasmaTest"); - Inform msg2all(argv[0],INFORM_ALL_NODES); + Inform msg2all(argv[0], INFORM_ALL_NODES); Ippl::Comm->setDefaultOverallocation(std::atof(argv[8])); - auto start = std::chrono::high_resolution_clock::now(); - ippl::Vector nr = { - std::atoi(argv[1]), - std::atoi(argv[2]), - std::atoi(argv[3]) - }; - - static IpplTimings::TimerRef mainTimer = IpplTimings::getTimer("total"); - static IpplTimings::TimerRef particleCreation = IpplTimings::getTimer("particlesCreation"); - static IpplTimings::TimerRef dumpDataTimer = IpplTimings::getTimer("dumpData"); - static IpplTimings::TimerRef PTimer = IpplTimings::getTimer("pushVelocity"); - static IpplTimings::TimerRef temp = IpplTimings::getTimer("randomMove"); - static IpplTimings::TimerRef RTimer = IpplTimings::getTimer("pushPosition"); - static IpplTimings::TimerRef updateTimer = IpplTimings::getTimer("update"); - static IpplTimings::TimerRef DummySolveTimer = IpplTimings::getTimer("solveWarmup"); - static IpplTimings::TimerRef SolveTimer = IpplTimings::getTimer("solve"); + auto start = std::chrono::high_resolution_clock::now(); + ippl::Vector nr = {std::atoi(argv[1]), std::atoi(argv[2]), std::atoi(argv[3])}; + + static IpplTimings::TimerRef mainTimer = IpplTimings::getTimer("total"); + static IpplTimings::TimerRef particleCreation = IpplTimings::getTimer("particlesCreation"); + static IpplTimings::TimerRef dumpDataTimer = IpplTimings::getTimer("dumpData"); + static IpplTimings::TimerRef PTimer = IpplTimings::getTimer("pushVelocity"); + static IpplTimings::TimerRef temp = IpplTimings::getTimer("randomMove"); + static IpplTimings::TimerRef RTimer = IpplTimings::getTimer("pushPosition"); + static IpplTimings::TimerRef updateTimer = IpplTimings::getTimer("update"); + static IpplTimings::TimerRef DummySolveTimer = IpplTimings::getTimer("solveWarmup"); + static IpplTimings::TimerRef SolveTimer = IpplTimings::getTimer("solve"); static IpplTimings::TimerRef domainDecomposition = IpplTimings::getTimer("loadBalance"); IpplTimings::startTimer(mainTimer); const size_type totalP = std::atoll(argv[4]); - const unsigned int nt = std::atoi(argv[5]); + const unsigned int nt = std::atoi(argv[5]); - msg << "Uniform Plasma Test" - << endl - << "nt " << nt << " Np= " - << totalP << " grid = " << nr - << endl; + msg << "Uniform Plasma Test" << endl + << "nt " << nt << " Np= " << totalP << " grid = " << nr << endl; using bunch_type = ChargedParticles; - std::unique_ptr P; + std::unique_ptr P; ippl::NDIndex domain; - for (unsigned i = 0; i< Dim; i++) { + for (unsigned i = 0; i < Dim; i++) { domain[i] = ippl::Index(nr[i]); } @@ -133,24 +125,24 @@ int main(int argc, char *argv[]){ double dy = rmax[1] / nr[1]; double dz = rmax[2] / nr[2]; - Vector_t hr = {dx, dy, dz}; + Vector_t hr = {dx, dy, dz}; Vector_t origin = {rmin[0], rmin[1], rmin[2]}; const double dt = 1.0; - const bool isAllPeriodic=true; + const bool isAllPeriodic = true; Mesh_t mesh(domain, hr, origin); FieldLayout_t FL(domain, decomp, isAllPeriodic); PLayout_t PL(FL, mesh); double Q = -1562.5; - P = std::make_unique(PL,hr,rmin,rmax,decomp,Q); + P = std::make_unique(PL, hr, rmin, rmax, decomp, Q); - P->nr_m = nr; + P->nr_m = nr; size_type nloc = totalP / Ippl::Comm->size(); - int rest = (int) (totalP - nloc * Ippl::Comm->size()); + int rest = (int)(totalP - nloc * Ippl::Comm->size()); - if ( Ippl::Comm->rank() < rest ) + if (Ippl::Comm->rank() < rest) ++nloc; IpplTimings::startTimer(particleCreation); @@ -158,17 +150,16 @@ int main(int argc, char *argv[]){ const ippl::NDIndex& lDom = FL.getLocalNDIndex(); Vector_t Rmin, Rmax; - for (unsigned d = 0; d rand_pool64((size_type)(42 + 100*Ippl::Comm->rank())); - Kokkos::parallel_for(nloc, - generate_random, Dim>( - P->R.getView(), rand_pool64, Rmin, Rmax)); + Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42 + 100 * Ippl::Comm->rank())); + Kokkos::parallel_for(nloc, generate_random, Dim>( + P->R.getView(), rand_pool64, Rmin, Rmax)); Kokkos::fence(); - P->q = P->Q_m/totalP; + P->q = P->Q_m / totalP; P->P = 0.0; IpplTimings::stopTimer(particleCreation); @@ -177,7 +168,7 @@ int main(int argc, char *argv[]){ bunch_type bunchBuffer(PL); - IpplTimings::startTimer(updateTimer); + IpplTimings::startTimer(updateTimer); PL.update(*P, bunchBuffer); IpplTimings::stopTimer(updateTimer); @@ -185,15 +176,14 @@ int main(int argc, char *argv[]){ P->stype_m = argv[6]; P->initSolver(); - P->time_m = 0.0; + P->time_m = 0.0; P->loadbalancefreq_m = std::atoi(argv[7]); - + IpplTimings::startTimer(DummySolveTimer); P->rho_m = 0.0; P->solver_mp->solve(); IpplTimings::stopTimer(DummySolveTimer); - P->scatterCIC(totalP, 0, hr); P->initializeORB(FL, mesh); bool fromAnalyticDensity = false; @@ -209,12 +199,10 @@ int main(int argc, char *argv[]){ P->gatherStatistics(totalP); IpplTimings::stopTimer(dumpDataTimer); - // begin main timestep loop msg << "Starting iterations ..." << endl; - //P->gatherStatistics(totalP); - for (unsigned int it=0; itgatherStatistics(totalP); + for (unsigned int it = 0; it < nt; it++) { // LeapFrog time stepping https://en.wikipedia.org/wiki/Leapfrog_integration // Here, we assume a constant charge-to-mass ratio of -1 for // all the particles hence eliminating the need to store mass as @@ -226,35 +214,33 @@ int main(int argc, char *argv[]){ IpplTimings::startTimer(temp); Kokkos::parallel_for(P->getLocalNum(), - generate_random, Dim>( - P->P.getView(), rand_pool64, -hr, hr)); + generate_random, Dim>( + P->P.getView(), rand_pool64, -hr, hr)); Kokkos::fence(); IpplTimings::stopTimer(temp); - //drift + // drift IpplTimings::startTimer(RTimer); P->R = P->R + dt * P->P; IpplTimings::stopTimer(RTimer); - //Since the particles have moved spatially update them to correct processors - IpplTimings::startTimer(updateTimer); + // Since the particles have moved spatially update them to correct processors + IpplTimings::startTimer(updateTimer); PL.update(*P, bunchBuffer); IpplTimings::stopTimer(updateTimer); - // Domain Decomposition - if (P->balance(totalP, it+1)) { - msg << "Starting repartition" << endl; - IpplTimings::startTimer(domainDecomposition); - P->repartition(FL, mesh, bunchBuffer, fromAnalyticDensity); - IpplTimings::stopTimer(domainDecomposition); + if (P->balance(totalP, it + 1)) { + msg << "Starting repartition" << endl; + IpplTimings::startTimer(domainDecomposition); + P->repartition(FL, mesh, bunchBuffer, fromAnalyticDensity); + IpplTimings::stopTimer(domainDecomposition); } - //scatter the charge onto the underlying grid - P->scatterCIC(totalP, it+1, hr); + // scatter the charge onto the underlying grid + P->scatterCIC(totalP, it + 1, hr); - //Field solve + // Field solve IpplTimings::startTimer(SolveTimer); P->solver_mp->solve(); IpplTimings::stopTimer(SolveTimer); @@ -262,7 +248,7 @@ int main(int argc, char *argv[]){ // gather E field P->gatherCIC(); - //kick + // kick IpplTimings::startTimer(PTimer); P->P = P->P - 0.5 * dt * P->E; IpplTimings::stopTimer(PTimer); @@ -272,7 +258,7 @@ int main(int argc, char *argv[]){ P->dumpData(); P->gatherStatistics(totalP); IpplTimings::stopTimer(dumpDataTimer); - msg << "Finished time step: " << it+1 << " time: " << P->time_m << endl; + msg << "Finished time step: " << it + 1 << " time: " << P->time_m << endl; } msg << "Uniform Plasma Test: End." << endl; @@ -281,7 +267,8 @@ int main(int argc, char *argv[]){ IpplTimings::print(std::string("timing.dat")); auto end = std::chrono::high_resolution_clock::now(); - std::chrono::duration time_chrono = std::chrono::duration_cast>(end - start); + std::chrono::duration time_chrono = + std::chrono::duration_cast>(end - start); std::cout << "Elapsed time: " << time_chrono.count() << std::endl; return 0; diff --git a/hooks/create-hook-symlink.sh b/hooks/create-hook-symlink.sh new file mode 100755 index 000000000..038393034 --- /dev/null +++ b/hooks/create-hook-symlink.sh @@ -0,0 +1,18 @@ +#!/bin/bash +# OPAL utility script for setting up git hooks (with modifications) + +HOOK_NAMES="applypatch-msg pre-applypatch post-applypatch pre-commit prepare-commit-msg commit-msg post-commit pre-rebase post-checkout post-merge pre-receive update post-receive post-update pre-auto-gc" + +GITBASEDIR=`git rev-parse --show-toplevel` +HOOK_DIR=$GITBASEDIR/.git/hooks + +for hook in $HOOK_NAMES; do + # If the hook already exists, is executable, and is not a symlink + if [ ! -h $HOOK_DIR/$hook -a -x $HOOK_DIR/$hook ]; then + mv $HOOK_DIR/$hook $HOOK_DIR/$hook.local + fi + # if the hook is defined in hooks/, then symlink it in the git hooks + if [ -f $GITBASEDIR/hooks/$hook ]; then + ln -sf $GITBASEDIR/hooks/$hook $HOOK_DIR/$hook + fi +done diff --git a/hooks/pre-commit b/hooks/pre-commit new file mode 100755 index 000000000..4483e750b --- /dev/null +++ b/hooks/pre-commit @@ -0,0 +1,5 @@ +#!/bin/sh +git diff --cached --name-only | grep -e '\.h$' -e '\.cpp$' -e '\.hpp$' | while read FILE; do + clang-format -i "$FILE" + git add "$FILE" +done diff --git a/src/AmrParticle/AmrParticleBase.h b/src/AmrParticle/AmrParticleBase.h index 3908b86c5..79dd447b4 100644 --- a/src/AmrParticle/AmrParticleBase.h +++ b/src/AmrParticle/AmrParticleBase.h @@ -17,8 +17,8 @@ // const ParticleAttrib >& pp, // int lbase = 0, int lfine = -1) const; // -// Copyright (c) 2016 - 2020, Matthias Frey, Uldis Locans, Paul Scherrer Institut, Villigen PSI, Switzerland -// All rights reserved +// Copyright (c) 2016 - 2020, Matthias Frey, Uldis Locans, Paul Scherrer Institut, Villigen PSI, +// Switzerland All rights reserved // // Implemented as part of the PhD thesis // "Precise Simulations of Multibunches in High Intensity Cyclotrons" @@ -42,47 +42,45 @@ #include "AmrParticleLevelCounter.h" -template +template class AmrParticleBase : public IpplParticleBase { - public: - typedef typename PLayout::ParticlePos_t ParticlePos_t; - typedef typename PLayout::ParticleIndex_t ParticleIndex_t; - typedef typename PLayout::SingleParticlePos_t SingleParticlePos_t; - typedef typename PLayout::AmrField_t AmrField_t; - typedef typename PLayout::AmrVectorField_t AmrVectorField_t; - typedef typename PLayout::AmrScalarFieldContainer_t AmrScalarFieldContainer_t; - typedef typename PLayout::AmrVectorFieldContainer_t AmrVectorFieldContainer_t; - - typedef long SortListIndex_t; - typedef std::vector SortList_t; - typedef std::vector attrib_container_t; - - ParticleIndex_t Level; // m_lev - ParticleIndex_t Grid; // m_grid - + typedef typename PLayout::ParticlePos_t ParticlePos_t; + typedef typename PLayout::ParticleIndex_t ParticleIndex_t; + typedef typename PLayout::SingleParticlePos_t SingleParticlePos_t; + typedef typename PLayout::AmrField_t AmrField_t; + typedef typename PLayout::AmrVectorField_t AmrVectorField_t; + typedef typename PLayout::AmrScalarFieldContainer_t AmrScalarFieldContainer_t; + typedef typename PLayout::AmrVectorFieldContainer_t AmrVectorFieldContainer_t; + + typedef long SortListIndex_t; + typedef std::vector SortList_t; + typedef std::vector attrib_container_t; + + ParticleIndex_t Level; // m_lev + ParticleIndex_t Grid; // m_grid + typedef AmrParticleLevelCounter ParticleLevelCounter_t; - + public: - AmrParticleBase(); - + AmrParticleBase(PLayout* layout); - + ~AmrParticleBase() {} - - //initialize AmrParticleBase class - add level and grid variables to attribute list + + // initialize AmrParticleBase class - add level and grid variables to attribute list void initializeAmr() { this->addAttribute(Level); this->addAttribute(Grid); } - + const ParticleLevelCounter_t& getLocalNumPerLevel() const; - + ParticleLevelCounter_t& getLocalNumPerLevel(); - + void setLocalNumPerLevel(const ParticleLevelCounter_t& LocalNumPerLevel); - + /* Functions of IpplParticleBase adpated to * work with AmrParticleLevelCounter: * - createWithID() @@ -90,19 +88,19 @@ class AmrParticleBase : public IpplParticleBase { * - destroy() * - performDestroy() */ - + void createWithID(unsigned id); void create(size_t M); void destroy(size_t M, size_t I, bool doNow = false); - + void performDestroy(bool updateLocalNum = false); - + // Update the particle object after a timestep. This routine will change // our local, total, create particle counts properly. void update(); - + /*! * There's is NO check performed if lev_min <= lev_max and * lev_min >= 0. @@ -111,36 +109,36 @@ class AmrParticleBase : public IpplParticleBase { * @param isRegrid is true if we are updating the grids (default: false) */ void update(int lev_min, int lev_max, bool isRegrid = false); - + // Update the particle object after a timestep. This routine will change // our local, total, create particle counts properly. void update(const ParticleAttrib& canSwap); - + // sort particles based on the grid and level that they belong to void sort(); - + // sort the particles given a sortlist - void sort(SortList_t &sortlist); - + void sort(SortList_t& sortlist); + PLayout& getAmrLayout() { return this->getLayout(); } const PLayout& getAmrLayout() const { return this->getLayout(); } - + /*! * This method is used in the AmrPartBunch::boundp() function * in order to avoid multpile particle mappings during the * mesh regridding process. - * + * * @param forbidTransform true if we don't want to map particles onto * \f$[-1, 1]^3\f$ */ inline void setForbidTransform(bool forbidTransform); - + /*! * @returns true if we are not mapping the particles onto * \f$[-1, 1]^3\f$ during an update call. */ inline bool isForbidTransform() const; - + /*! * Linear mapping to AMReX computation domain [-1, 1]^3 including the Lorentz * transform. All dimensions @@ -151,45 +149,43 @@ class AmrParticleBase : public IpplParticleBase { * @returns scaling factor */ const double& domainMapping(bool inverse = false); - + /*! * This function is used during the cell tagging routines. * @returns the scaling factor of the particle domain mapping. */ inline const double& getScalingFactor() const; - - + void setLorentzFactor(const Vector_t& lorentzFactor); - -// void lorentzTransform(bool inverse = false); - + + // void lorentzTransform(bool inverse = false); + private: - void getLocalBounds_m(Vector_t &rmin, Vector_t &rmax); - void getGlobalBounds_m(Vector_t &rmin, Vector_t &rmax); - + void getLocalBounds_m(Vector_t& rmin, Vector_t& rmax); + void getGlobalBounds_m(Vector_t& rmin, Vector_t& rmax); + protected: IpplTimings::TimerRef updateParticlesTimer_m; IpplTimings::TimerRef sortParticlesTimer_m; IpplTimings::TimerRef domainMappingTimer_m; - - bool forbidTransform_m; ///< To avoid multiple transformations during regrid - + + bool forbidTransform_m; ///< To avoid multiple transformations during regrid + /*! * Scaling factor for particle coordinate transform * (used for Poisson solve and particle-to-core distribution) */ double scale_m; - + /*! * Lorentz factor used for the domain mapping. * Is updated in AmrBoxLib - * + * */ Vector_t lorentzFactor_m; - -// bool isLorentzTransformed_m; - - + + // bool isLorentzTransformed_m; + private: ParticleLevelCounter_t LocalNumPerLevel_m; }; diff --git a/src/AmrParticle/AmrParticleBase.hpp b/src/AmrParticle/AmrParticleBase.hpp index 2b7e8c6fb..d964502f9 100644 --- a/src/AmrParticle/AmrParticleBase.hpp +++ b/src/AmrParticle/AmrParticleBase.hpp @@ -17,8 +17,8 @@ // const ParticleAttrib >& pp, // int lbase = 0, int lfine = -1) const; // -// Copyright (c) 2016 - 2020, Matthias Frey, Uldis Locans, Paul Scherrer Institut, Villigen PSI, Switzerland -// All rights reserved +// Copyright (c) 2016 - 2020, Matthias Frey, Uldis Locans, Paul Scherrer Institut, Villigen PSI, +// Switzerland All rights reserved // // Implemented as part of the PhD thesis // "Precise Simulations of Multibunches in High Intensity Cyclotrons" @@ -36,93 +36,81 @@ #ifndef AMR_PARTICLE_BASE_HPP #define AMR_PARTICLE_BASE_HPP -#include #include +#include -template -AmrParticleBase::AmrParticleBase() : forbidTransform_m(false), - scale_m(1.0), - lorentzFactor_m(1.0, 1.0, 1.0), -// isLorentzTransformed_m(false), - LocalNumPerLevel_m() -{ +template +AmrParticleBase::AmrParticleBase() + : forbidTransform_m(false) + , scale_m(1.0) + , lorentzFactor_m(1.0, 1.0, 1.0) + , + // isLorentzTransformed_m(false), + LocalNumPerLevel_m() { updateParticlesTimer_m = IpplTimings::getTimer("AMR update particles"); - sortParticlesTimer_m = IpplTimings::getTimer("AMR sort particles"); - domainMappingTimer_m = IpplTimings::getTimer("AMR map particles"); + sortParticlesTimer_m = IpplTimings::getTimer("AMR sort particles"); + domainMappingTimer_m = IpplTimings::getTimer("AMR map particles"); } - -template +template AmrParticleBase::AmrParticleBase(PLayout* layout) - : IpplParticleBase(layout), - forbidTransform_m(false), - scale_m(1.0), - lorentzFactor_m(1.0, 1.0, 1.0), -// isLorentzTransformed_m(false), - LocalNumPerLevel_m() -{ + : IpplParticleBase(layout) + , forbidTransform_m(false) + , scale_m(1.0) + , lorentzFactor_m(1.0, 1.0, 1.0) + , + // isLorentzTransformed_m(false), + LocalNumPerLevel_m() { updateParticlesTimer_m = IpplTimings::getTimer("AMR update particles"); - sortParticlesTimer_m = IpplTimings::getTimer("AMR sort particles"); - domainMappingTimer_m = IpplTimings::getTimer("AMR map particles"); + sortParticlesTimer_m = IpplTimings::getTimer("AMR sort particles"); + domainMappingTimer_m = IpplTimings::getTimer("AMR map particles"); } - -template +template const typename AmrParticleBase::ParticleLevelCounter_t& - AmrParticleBase::getLocalNumPerLevel() const -{ +AmrParticleBase::getLocalNumPerLevel() const { return LocalNumPerLevel_m; } - -template +template typename AmrParticleBase::ParticleLevelCounter_t& - AmrParticleBase::getLocalNumPerLevel() -{ +AmrParticleBase::getLocalNumPerLevel() { return LocalNumPerLevel_m; } - -template -void AmrParticleBase::setLocalNumPerLevel( - const ParticleLevelCounter_t& LocalNumPerLevel) -{ +template +void AmrParticleBase::setLocalNumPerLevel(const ParticleLevelCounter_t& LocalNumPerLevel) { LocalNumPerLevel_m = LocalNumPerLevel; } - -template +template void AmrParticleBase::destroy(size_t M, size_t I, bool doNow) { /* if the particles are deleted directly * we need to update the particle level count */ if (M > 0) { - if ( doNow ) { + if (doNow) { for (size_t ip = I; ip < M + I; ++ip) - --LocalNumPerLevel_m[ Level[ip] ]; + --LocalNumPerLevel_m[Level[ip]]; } IpplParticleBase::destroy(M, I, doNow); } } - -template +template void AmrParticleBase::performDestroy(bool updateLocalNum) { // nothing to do if destroy list is empty - if ( this->DestroyList.empty() ) + if (this->DestroyList.empty()) return; - - if ( updateLocalNum ) { - typedef std::vector< std::pair > dlist_t; - dlist_t::const_iterator curr = this->DestroyList.begin(); + + if (updateLocalNum) { + typedef std::vector > dlist_t; + dlist_t::const_iterator curr = this->DestroyList.begin(); const dlist_t::const_iterator last = this->DestroyList.end(); - - while ( curr != last ) { - for (size_t ip = curr->first; - ip < curr->first + curr->second; - ++ip) - { - --LocalNumPerLevel_m[ Level[ip] ]; + + while (curr != last) { + for (size_t ip = curr->first; ip < curr->first + curr->second; ++ip) { + --LocalNumPerLevel_m[Level[ip]]; } ++curr; } @@ -130,159 +118,145 @@ void AmrParticleBase::performDestroy(bool updateLocalNum) { IpplParticleBase::performDestroy(updateLocalNum); } - -template +template void AmrParticleBase::create(size_t M) { - -// size_t localnum = LocalNumPerLevel_m[0]; - + // size_t localnum = LocalNumPerLevel_m[0]; + // particles are created at the coarsest level LocalNumPerLevel_m[0] += M; - + IpplParticleBase::create(M); - -// for (size_t i = localnum; i < LocalNumPerLevel_m[0]; ++i) { -// this->Grid[i] = 0; -// this->Level[i] = 0; -// } -} + // for (size_t i = localnum; i < LocalNumPerLevel_m[0]; ++i) { + // this->Grid[i] = 0; + // this->Level[i] = 0; + // } +} -template +template void AmrParticleBase::createWithID(unsigned id) { - -// size_t localnum = LocalNumPerLevel_m[0]; - + // size_t localnum = LocalNumPerLevel_m[0]; + ++LocalNumPerLevel_m[0]; - + IpplParticleBase::createWithID(id); - -// this->Grid[localnum] = 0; -// this->Level[localnum] = 0; -} + // this->Grid[localnum] = 0; + // this->Level[localnum] = 0; +} -template +template void AmrParticleBase::update() { // update all level this->update(0, -1); } - -template +template void AmrParticleBase::update(int lev_min, int lev_max, bool isRegrid) { - IpplTimings::startTimer(updateParticlesTimer_m); // make sure we've been initialized - PLayout *Layout = &this->getLayout(); + PLayout* Layout = &this->getLayout(); PAssert(Layout != 0); - + // ask the layout manager to update our atoms, etc. Layout->update(*this, lev_min, lev_max, isRegrid); - - //sort the particles by grid and level + + // sort the particles by grid and level sort(); - + IpplTimings::stopTimer(updateParticlesTimer_m); } -template +template void AmrParticleBase::update(const ParticleAttrib& canSwap) { - IpplTimings::startTimer(updateParticlesTimer_m); // make sure we've been initialized - PLayout *Layout = &this->getLayout(); + PLayout* Layout = &this->getLayout(); PAssert(Layout != 0); - + // ask the layout manager to update our atoms, etc. Layout->update(*this, &canSwap); - - //sort the particles by grid and level + + // sort the particles by grid and level sort(); - + IpplTimings::stopTimer(updateParticlesTimer_m); } - -template +template void AmrParticleBase::sort() { - IpplTimings::startTimer(sortParticlesTimer_m); size_t LocalNum = this->getLocalNum(); - SortList_t slist1(LocalNum); //slist1 holds the index of where each element should go - SortList_t slist2(LocalNum); //slist2 holds the index of which element should go in this position + SortList_t slist1(LocalNum); // slist1 holds the index of where each element should go + SortList_t slist2( + LocalNum); // slist2 holds the index of which element should go in this position - //sort the lists by grid and level - //slist1 hold the index of where each element should go in the list + // sort the lists by grid and level + // slist1 hold the index of where each element should go in the list std::iota(slist1.begin(), slist1.end(), 0); - std::sort(slist1.begin(), slist1.end(), [this](const SortListIndex_t &i, - const SortListIndex_t &j) - { - return (this->Level[i] < this->Level[j] || - (this->Level[i] == this->Level[j] && this->Grid[i] < this->Grid[j])); - }); - - //slist2 holds the index of which element should go in this position + std::sort(slist1.begin(), slist1.end(), + [this](const SortListIndex_t& i, const SortListIndex_t& j) { + return (this->Level[i] < this->Level[j] + || (this->Level[i] == this->Level[j] && this->Grid[i] < this->Grid[j])); + }); + + // slist2 holds the index of which element should go in this position for (unsigned int i = 0; i < LocalNum; ++i) slist2[slist1[i]] = i; - //sort the array according to slist2 + // sort the array according to slist2 this->sort(slist2); IpplTimings::stopTimer(sortParticlesTimer_m); } - -template -void AmrParticleBase::sort(SortList_t &sortlist) { +template +void AmrParticleBase::sort(SortList_t& sortlist) { attrib_container_t::iterator abeg = this->begin(); - attrib_container_t::iterator aend = this->end(); - for ( ; abeg != aend; ++abeg ) - (*abeg)->sort(sortlist); + attrib_container_t::iterator aend = this->end(); + for (; abeg != aend; ++abeg) + (*abeg)->sort(sortlist); } - -template +template void AmrParticleBase::setForbidTransform(bool forbidTransform) { forbidTransform_m = forbidTransform; } - -template +template bool AmrParticleBase::isForbidTransform() const { return forbidTransform_m; } - -template +template const double& AmrParticleBase::domainMapping(bool inverse) { IpplTimings::startTimer(domainMappingTimer_m); - + double scale = scale_m; - + Vector_t gamma = lorentzFactor_m; - - if ( !inverse ) { -// if ( !this->DestroyList.empty() ) { -// this->performDestroy(true); -// } + + if (!inverse) { + // if ( !this->DestroyList.empty() ) { + // this->performDestroy(true); + // } Vector_t rmin = Vector_t(0.0, 0.0, 0.0); Vector_t rmax = Vector_t(0.0, 0.0, 0.0); - + getGlobalBounds_m(rmin, rmax); - + /* in case of 1 particle, the bunch is rotated * transformed to the local frame such that this * particle lies on the origin (0, 0, 0). */ - if ( this->getTotalNum() == 1 || - (rmin == Vector_t(0.0, 0.0, 0.0) && rmax == Vector_t( 0.0, 0.0, 0.0)) ) { + if (this->getTotalNum() == 1 + || (rmin == Vector_t(0.0, 0.0, 0.0) && rmax == Vector_t(0.0, 0.0, 0.0))) { rmin = Vector_t(-1.0, -1.0, -1.0); - rmax = Vector_t( 1.0, 1.0, 1.0); + rmax = Vector_t(1.0, 1.0, 1.0); } /* Lorentz transfomration factor @@ -297,106 +271,102 @@ const double& AmrParticleBase::domainMapping(bool inverse) { const auto& lo = layout.lowerBound; const auto& hi = layout.upperBound; - Vector_t tmp = Vector_t(std::max( std::abs(rmin[0] / lo[0]), std::abs(rmax[0] / hi[0]) ), - std::max( std::abs(rmin[1] / lo[1]), std::abs(rmax[1] / hi[1]) ), - std::max( std::abs(rmin[2] / lo[2]), std::abs(rmax[2] / hi[2]) ) - ); - - scale = std::max( tmp[0], tmp[1] ); - scale = std::max( scale, tmp[2] ); + Vector_t tmp = Vector_t(std::max(std::abs(rmin[0] / lo[0]), std::abs(rmax[0] / hi[0])), + std::max(std::abs(rmin[1] / lo[1]), std::abs(rmax[1] / hi[1])), + std::max(std::abs(rmin[2] / lo[2]), std::abs(rmax[2] / hi[2]))); + + scale = std::max(tmp[0], tmp[1]); + scale = std::max(scale, tmp[2]); } else { // inverse Lorentz transform gamma = 1.0 / gamma; } - - if ( std::isnan(scale) || std::isinf(scale) ) { - if ( !Ippl::Comm->myNode() ) - throw IpplException("AmrParticleBase::domainMapping()", - "Scale factor is Nan or Inf"); + + if (std::isnan(scale) || std::isinf(scale)) { + if (!Ippl::Comm->myNode()) + throw IpplException("AmrParticleBase::domainMapping()", "Scale factor is Nan or Inf"); } Vector_t vscale = Vector_t(scale, scale, scale); - + // Lorentz transform + mapping to [-1, 1] for (unsigned int i = 0; i < this->getLocalNum(); ++i) { this->R[i] = this->R[i] * gamma / vscale; } - + scale_m = 1.0 / scale; - + IpplTimings::stopTimer(domainMappingTimer_m); - + return scale_m; } - -template +template const double& AmrParticleBase::getScalingFactor() const { return scale_m; } -template +template void AmrParticleBase::setLorentzFactor(const Vector_t& lorentzFactor) { lorentzFactor_m = lorentzFactor; } - -template -void AmrParticleBase::getLocalBounds_m(Vector_t &rmin, Vector_t &rmax) { +template +void AmrParticleBase::getLocalBounds_m(Vector_t& rmin, Vector_t& rmax) { const size_t localNum = this->getLocalNum(); if (localNum == 0) { double max = 1e10; - rmin = Vector_t( max, max, max); - rmax = Vector_t(-max, -max, -max); + rmin = Vector_t(max, max, max); + rmax = Vector_t(-max, -max, -max); return; } rmin = this->R[0]; rmax = this->R[0]; - for (size_t i = 1; i < localNum; ++ i) { - for (unsigned short d = 0; d < 3u; ++ d) { - if (rmin(d) > this->R[i](d)) rmin(d) = this->R[i](d); - else if (rmax(d) < this->R[i](d)) rmax(d) = this->R[i](d); + for (size_t i = 1; i < localNum; ++i) { + for (unsigned short d = 0; d < 3u; ++d) { + if (rmin(d) > this->R[i](d)) + rmin(d) = this->R[i](d); + else if (rmax(d) < this->R[i](d)) + rmax(d) = this->R[i](d); } } } - -template -void AmrParticleBase::getGlobalBounds_m(Vector_t &rmin, Vector_t &rmax) { +template +void AmrParticleBase::getGlobalBounds_m(Vector_t& rmin, Vector_t& rmax) { this->getLocalBounds_m(rmin, rmax); double min[6]; for (unsigned int i = 0; i < 3; ++i) { - min[2*i] = rmin[i]; - min[2*i + 1] = -rmax[i]; + min[2 * i] = rmin[i]; + min[2 * i + 1] = -rmax[i]; } allreduce(min, 6, std::less()); for (unsigned int i = 0; i < 3; ++i) { - rmin[i] = min[2*i]; - rmax[i] = -min[2*i + 1]; + rmin[i] = min[2 * i]; + rmax[i] = -min[2 * i + 1]; } } - // template // void AmrParticleBase::lorentzTransform(bool inverse) { -// +// // if ( isLorentzTransformed_m && !inverse ) { // return; // } -// +// // isLorentzTransformed_m = true; -// +// // Vector_t gamma = lorentzFactor_m; -// +// // if ( inverse ) { // gamma = 1.0 / gamma; // isLorentzTransformed_m = false; // } -// +// // for (std::size_t i = 0; i < this->getLocalNum(); ++i) // this->R[i] *= gamma; // } diff --git a/src/AmrParticle/AmrParticleLevelCounter.h b/src/AmrParticle/AmrParticleLevelCounter.h index 9c37f06fa..97dda7bab 100644 --- a/src/AmrParticle/AmrParticleLevelCounter.h +++ b/src/AmrParticle/AmrParticleLevelCounter.h @@ -26,19 +26,14 @@ #ifndef AMR_PARTICLE_LEVEL_COUNTER_H #define AMR_PARTICLE_LEVEL_COUNTER_H -#include -#include #include #include +#include +#include -template < - class Key, - class T, - class Compare = std::less, - class Allocator = std::allocator > -> class AmrParticleLevelCounter -{ - +template , + class Allocator = std::allocator > > +class AmrParticleLevelCounter { public: typedef typename std::map::value_type value_type; typedef typename std::map::size_type size_type; @@ -46,38 +41,37 @@ template < typedef typename std::map::const_iterator const_iterator; public: - - AmrParticleLevelCounter() : count_m() { } - + AmrParticleLevelCounter() + : count_m() {} + /*! * Add more "particles" to that level * @param level where to add * @param nTimes to increment */ void increment(const Key& level, T nTimes = T(1)) { count_m[level] += nTimes; } - + /*! * Add more "particles" to that level * @param level where to add * @param nTimes to decrement */ void decrement(const Key& level, T nTimes = T(1)) { increment(level, -nTimes); } - + T& operator[](T level) { return count_m[level]; } - + const T& operator[](T level) const { return count_m[level]; } - + size_type size() const { return count_m.size(); } - + bool empty() const { return count_m.empty(); } - + iterator begin() { return count_m.begin(); } const_iterator begin() const { return count_m.begin(); } - + iterator end() { return count_m.end(); } const_iterator end() const { return count_m.end(); } - - + /*! * Obtain the start of a level * @param level @@ -85,26 +79,23 @@ template < */ T begin(T level) const { auto end = count_m.begin(); - + // make sure to stay within container T size = count_m.size(); std::advance(end, (level > size) ? size : level); - - return std::accumulate(count_m.begin(), end, 0, - [](T sum, const value_type& value_pair) { - return sum + value_pair.second; - }); + + return std::accumulate(count_m.begin(), end, 0, [](T sum, const value_type& value_pair) { + return sum + value_pair.second; + }); } - - + /*! * Obtain the end of a level * @param level * @returns the index of the local end of that level */ T end(T level) const { return begin(level + 1); } - - + /*! * Remove particle indices from the container * @param num of particles that will be removed @@ -112,22 +103,18 @@ template < */ void remove(T num, T begin) { int inum = int(num); - while ( inum > -1 ) { + while (inum > -1) { T level = which(begin + inum); --count_m[level]; --inum; } } - /*! * @returns the total particle count * (should be the same as AmrParticleBase::LocalNum) */ - T getLocalNumAllLevel() { - return begin( count_m.size() ); - } - + T getLocalNumAllLevel() { return begin(count_m.size()); } /*! * @returns the total particle count up to the given level @@ -141,7 +128,6 @@ template < return sum; } - /*! * @returns the total particle count at the given level */ @@ -158,14 +144,13 @@ template < */ T which(T idx) { T level = 0; - - while ( idx >= end(level) && level < size() ) + + while (idx >= end(level) && level < size()) ++level; - + return level; } - - + private: /*! * Key represents level diff --git a/src/AmrParticle/ParticleAmrLayout.h b/src/AmrParticle/ParticleAmrLayout.h index a0dcccc28..f24e65ca0 100644 --- a/src/AmrParticle/ParticleAmrLayout.h +++ b/src/AmrParticle/ParticleAmrLayout.h @@ -2,8 +2,8 @@ // Class ParticleAmrLayout // Particle layout for AMR particles. // -// Copyright (c) 2016 - 2020, Matthias Frey, Uldis Locans, Paul Scherrer Institut, Villigen PSI, Switzerland -// All rights reserved +// Copyright (c) 2016 - 2020, Matthias Frey, Uldis Locans, Paul Scherrer Institut, Villigen PSI, +// Switzerland All rights reserved // // Implemented as part of the PhD thesis // "Precise Simulations of Multibunches in High Intensity Cyclotrons" @@ -23,64 +23,56 @@ #include "Particle/ParticleLayout.h" -template +template class ParticleAmrLayout : public ParticleLayout { - public: // pair iterator definition ... this layout does not allow for pairlists typedef int pair_t; typedef pair_t* pair_iterator; - typedef typename ParticleLayout::SingleParticlePos_t - SingleParticlePos_t; + typedef typename ParticleLayout::SingleParticlePos_t SingleParticlePos_t; typedef typename ParticleLayout::Index_t Index_t; - + // type of attributes this layout should use for position and ID typedef ParticleAttrib ParticlePos_t; - typedef ParticleAttrib ParticleIndex_t; - + typedef ParticleAttrib ParticleIndex_t; + public: - ParticleAmrLayout(); - + /*! * @param finestLevel of current simulation state */ void setFinestLevel(int finestLevel); - + /*! * @param maxLevel allowed during simulation run */ void setMaxLevel(int maxLevel); - + /*! * Set the computational domain of the base level. E.g. the computational * domain is [-1, 1]^3. With dh = 4, we get a new domain of [-1.04, 1.04]^3. * @param dh is the mesh enlargement in [%] */ virtual void setBoundingBox(double dh) = 0; - + protected: - int finestLevel_m; ///< Current finest level of simluation - int maxLevel_m; ///< Maximum level allowed + int finestLevel_m; ///< Current finest level of simluation + int maxLevel_m; ///< Maximum level allowed }; - // ============================================================================ - template ParticleAmrLayout::ParticleAmrLayout() - : finestLevel_m(0), - maxLevel_m(0) -{ } - + : finestLevel_m(0) + , maxLevel_m(0) {} template void ParticleAmrLayout::setFinestLevel(int finestLevel) { finestLevel_m = finestLevel; } - template void ParticleAmrLayout::setMaxLevel(int maxLevel) { maxLevel_m = maxLevel; diff --git a/src/Communicate/Archive.h b/src/Communicate/Archive.h index 568c71499..d368d2225 100644 --- a/src/Communicate/Archive.h +++ b/src/Communicate/Archive.h @@ -26,8 +26,8 @@ #define IPPL_ARCHIVE_H #include "Types/IpplTypes.h" -#include "Types/ViewTypes.h" #include "Types/Vector.h" +#include "Types/ViewTypes.h" namespace ippl { namespace detail { @@ -38,9 +38,8 @@ namespace ippl { */ template class Archive { - public: - using buffer_type = typename ViewType::view_type; + using buffer_type = typename ViewType::view_type; using pointer_type = typename buffer_type::pointer_type; Archive(size_type size = 0); @@ -84,36 +83,21 @@ namespace ippl { /*! * @returns a pointer to the data of the buffer */ - pointer_type getBuffer() { - return buffer_m.data(); - } - + pointer_type getBuffer() { return buffer_m.data(); } /*! * @returns the size of the buffer */ - size_type getSize() const { - return writepos_m; - } - - size_type getBufferSize() const { - return buffer_m.size(); - } - - void resizeBuffer(size_type size) { - Kokkos::resize(buffer_m, size); - } - - void reallocBuffer(size_type size) { - Kokkos::realloc(buffer_m, size); - } - - void resetWritePos() { - writepos_m = 0; - } - void resetReadPos() { - readpos_m = 0; - } + size_type getSize() const { return writepos_m; } + + size_type getBufferSize() const { return buffer_m.size(); } + + void resizeBuffer(size_type size) { Kokkos::resize(buffer_m, size); } + + void reallocBuffer(size_type size) { Kokkos::realloc(buffer_m, size); } + + void resetWritePos() { writepos_m = 0; } + void resetReadPos() { readpos_m = 0; } ~Archive() = default; @@ -125,8 +109,8 @@ namespace ippl { //! serialized data buffer_type buffer_m; }; - } -} + } // namespace detail +} // namespace ippl #include "Archive.hpp" diff --git a/src/Communicate/Archive.hpp b/src/Communicate/Archive.hpp index 1f77bc40f..f818a2d33 100644 --- a/src/Communicate/Archive.hpp +++ b/src/Communicate/Archive.hpp @@ -24,23 +24,18 @@ namespace ippl { template Archive::Archive(size_type size) - : writepos_m(0) - , readpos_m(0) - , buffer_m("buffer", size) - { } + : writepos_m(0) + , readpos_m(0) + , buffer_m("buffer", size) {} template template - void Archive::serialize(const Kokkos::View& view, - size_type nsends) { + void Archive::serialize(const Kokkos::View& view, size_type nsends) { size_t size = sizeof(T); Kokkos::parallel_for( - "Archive::serialize()", nsends, - KOKKOS_CLASS_LAMBDA(const size_type i) { - std::memcpy(buffer_m.data() + i * size + writepos_m, - view.data() + i, - size); - }); + "Archive::serialize()", nsends, KOKKOS_CLASS_LAMBDA(const size_type i) { + std::memcpy(buffer_m.data() + i * size + writepos_m, view.data() + i, size); + }); Kokkos::fence(); writepos_m += size * nsends; } @@ -52,8 +47,7 @@ namespace ippl { size_t size = sizeof(T); // Default index type for range policies is int64, // so we have to explicitly specify size_type (uint64) - using mdrange_t = Kokkos::MDRangePolicy, - Kokkos::IndexType>; + using mdrange_t = Kokkos::MDRangePolicy, Kokkos::IndexType>; Kokkos::parallel_for( "Archive::serialize()", // The constructor for Kokkos range policies always @@ -63,8 +57,7 @@ namespace ippl { mdrange_t({0, 0}, {(long int)nsends, Dim}), KOKKOS_CLASS_LAMBDA(const size_type i, const size_t d) { std::memcpy(buffer_m.data() + (Dim * i + d) * size + writepos_m, - &(*(view.data() + i))[d], - size); + &(*(view.data() + i))[d], size); }); Kokkos::fence(); writepos_m += Dim * size * nsends; @@ -72,19 +65,15 @@ namespace ippl { template template - void Archive::deserialize(Kokkos::View& view, - size_type nrecvs) { + void Archive::deserialize(Kokkos::View& view, size_type nrecvs) { size_t size = sizeof(T); - if(nrecvs > view.extent(0)) { + if (nrecvs > view.extent(0)) { Kokkos::realloc(view, nrecvs); } Kokkos::parallel_for( - "Archive::deserialize()", nrecvs, - KOKKOS_CLASS_LAMBDA(const size_type i) { - std::memcpy(view.data() + i, - buffer_m.data() + i * size + readpos_m, - size); - }); + "Archive::deserialize()", nrecvs, KOKKOS_CLASS_LAMBDA(const size_type i) { + std::memcpy(view.data() + i, buffer_m.data() + i * size + readpos_m, size); + }); // Wait for deserialization kernel to complete // (as with serialization kernels) Kokkos::fence(); @@ -96,21 +85,18 @@ namespace ippl { void Archive::deserialize(Kokkos::View*>& view, size_type nrecvs) { size_t size = sizeof(T); - if(nrecvs > view.extent(0)) { + if (nrecvs > view.extent(0)) { Kokkos::realloc(view, nrecvs); } - using mdrange_t = Kokkos::MDRangePolicy, - Kokkos::IndexType>; + using mdrange_t = Kokkos::MDRangePolicy, Kokkos::IndexType>; Kokkos::parallel_for( - "Archive::deserialize()", - mdrange_t({0, 0}, {(long int)nrecvs, Dim}), + "Archive::deserialize()", mdrange_t({0, 0}, {(long int)nrecvs, Dim}), KOKKOS_CLASS_LAMBDA(const size_type i, const size_t d) { std::memcpy(&(*(view.data() + i))[d], - buffer_m.data() + (Dim * i + d) * size + readpos_m, - size); - }); + buffer_m.data() + (Dim * i + d) * size + readpos_m, size); + }); Kokkos::fence(); readpos_m += Dim * size * nrecvs; } - } -} + } // namespace detail +} // namespace ippl diff --git a/src/Communicate/Buffers.cpp b/src/Communicate/Buffers.cpp index 7c9aa8ea9..84aa15ec5 100644 --- a/src/Communicate/Buffers.cpp +++ b/src/Communicate/Buffers.cpp @@ -37,16 +37,16 @@ namespace ippl { - void Communicate::setDefaultOverallocation(double factor) { - defaultOveralloc_m = factor; - } + void Communicate::setDefaultOverallocation(double factor) { + defaultOveralloc_m = factor; + } - void Communicate::deleteBuffer(int id) { - buffers_m.erase(id); - } + void Communicate::deleteBuffer(int id) { + buffers_m.erase(id); + } - void Communicate::deleteAllBuffers() { - buffers_m.clear(); - } + void Communicate::deleteAllBuffers() { + buffers_m.clear(); + } -} +} // namespace ippl diff --git a/src/Communicate/Buffers.hpp b/src/Communicate/Buffers.hpp index ffc2ab3ca..1ea994704 100644 --- a/src/Communicate/Buffers.hpp +++ b/src/Communicate/Buffers.hpp @@ -35,24 +35,23 @@ namespace ippl { - template - Communicate::buffer_type Communicate::getBuffer(int id, - size_type size, double overallocation) { - size *= sizeof(T); - #if __cplusplus > 201703L - if (buffers_m.contains(id)) { - #else - if (buffers_m.find(id) != buffers_m.end()) { - #endif - buffer_type buf = buffers_m[id]; - if (buf->getBufferSize() < size) { - buf->reallocBuffer(size); - } - return buf; + template + Communicate::buffer_type Communicate::getBuffer(int id, size_type size, double overallocation) { + size *= sizeof(T); +#if __cplusplus > 201703L + if (buffers_m.contains(id)) { +#else + if (buffers_m.find(id) != buffers_m.end()) { +#endif + buffer_type buf = buffers_m[id]; + if (buf->getBufferSize() < size) { + buf->reallocBuffer(size); } - buffers_m[id] = std::make_shared((size_type)(size * - std::max(overallocation, defaultOveralloc_m))); - return buffers_m[id]; + return buf; } + buffers_m[id] = std::make_shared( + (size_type)(size * std::max(overallocation, defaultOveralloc_m))); + return buffers_m[id]; + } -} +} // namespace ippl diff --git a/src/Communicate/Communicate.cpp b/src/Communicate/Communicate.cpp index 78bf9bb82..9f70643ea 100644 --- a/src/Communicate/Communicate.cpp +++ b/src/Communicate/Communicate.cpp @@ -18,13 +18,10 @@ namespace ippl { Communicate::Communicate(int& argc, char**& argv) - : Communicate(argc, argv, MPI_COMM_WORLD) - {} - + : Communicate(argc, argv, MPI_COMM_WORLD) {} Communicate::Communicate(int& argc, char**& argv, const MPI_Comm& comm) - : comm_m(comm) - { + : comm_m(comm) { MPI_Init(&argc, &argv); MPI_Comm_rank(comm_m, &rank_m); MPI_Comm_size(comm_m, &size_m); @@ -34,14 +31,12 @@ namespace ippl { MPI_Finalize(); } - void Communicate::irecv(int src, int tag, - archive_type& ar, MPI_Request& request, size_type msize) - { + void Communicate::irecv(int src, int tag, archive_type& ar, MPI_Request& request, + size_type msize) { if (msize > INT_MAX) { std::cerr << "Message size exceeds range of int" << std::endl; std::abort(); } - MPI_Irecv(ar.getBuffer(), msize, - MPI_BYTE, src, tag, comm_m, &request); + MPI_Irecv(ar.getBuffer(), msize, MPI_BYTE, src, tag, comm_m, &request); } -} +} // namespace ippl diff --git a/src/Communicate/Communicate.h b/src/Communicate/Communicate.h index 7024982e2..37855e21a 100644 --- a/src/Communicate/Communicate.h +++ b/src/Communicate/Communicate.h @@ -25,8 +25,8 @@ #include #include "Communicate/Archive.h" -#include "Communicate/Tags.h" #include "Communicate/TagMaker.h" +#include "Communicate/Tags.h" namespace ippl { /*! @@ -34,13 +34,11 @@ namespace ippl { * * \remark Calling the plain *this pointer returns the MPI communicator, e.g. MPI_COMM_WORLD. */ - class Communicate : public TagMaker - { - + class Communicate : public TagMaker { public: // Attention: only works with default spaces using archive_type = detail::Archive<>; - using buffer_type = std::shared_ptr; + using buffer_type = std::shared_ptr; using size_type = detail::size_type; @@ -93,58 +91,38 @@ namespace ippl { */ void deleteAllBuffers(); - [[deprecated]] - int myNode() const noexcept { - return rank_m; - } - - [[deprecated]] - int getNodes() const noexcept { - return size_m; - } - + [[deprecated]] int myNode() const noexcept { return rank_m; } - [[deprecated]] - const char *name() const noexcept { - return "MPI"; - } + [[deprecated]] int getNodes() const noexcept { return size_m; } - int size() const noexcept { - return size_m; - } + [[deprecated]] const char* name() const noexcept { return "MPI"; } - int rank() const noexcept { - return rank_m; - } + int size() const noexcept { return size_m; } + int rank() const noexcept { return rank_m; } /*! * \warning Only works with default spaces! */ template - void recv(int src, int tag, Buffer& buffer, archive_type& ar, - size_type msize, size_type nrecvs); + void recv(int src, int tag, Buffer& buffer, archive_type& ar, size_type msize, + size_type nrecvs); /*! * \warning Only works with default spaces! */ template - void isend(int dest, int tag, Buffer& buffer, archive_type&, - MPI_Request&, size_type nsends); + void isend(int dest, int tag, Buffer& buffer, archive_type&, MPI_Request&, + size_type nsends); /*! * \warning Only works with default spaces! */ void irecv(int src, int tag, archive_type&, MPI_Request&, size_type msize); + MPI_Comm* getCommunicator() noexcept { return &comm_m; } - MPI_Comm* getCommunicator() noexcept { - return &comm_m; - } - - void barrier() noexcept { - MPI_Barrier(comm_m); - } + void barrier() noexcept { MPI_Barrier(comm_m); } private: std::map buffers_m; @@ -155,11 +133,9 @@ namespace ippl { int rank_m; }; - template - void Communicate::recv(int src, int tag, Buffer& buffer, archive_type& ar, - size_type msize, size_type nrecvs) - { + void Communicate::recv(int src, int tag, Buffer& buffer, archive_type& ar, size_type msize, + size_type nrecvs) { // Temporary fix. MPI communication seems to have problems when the // count argument exceeds the range of int, so large messages should // be split into smaller messages @@ -168,25 +144,22 @@ namespace ippl { std::abort(); } MPI_Status status; - MPI_Recv(ar.getBuffer(), msize, - MPI_BYTE, src, tag, comm_m, &status); + MPI_Recv(ar.getBuffer(), msize, MPI_BYTE, src, tag, comm_m, &status); buffer.deserialize(ar, nrecvs); } template - void Communicate::isend(int dest, int tag, Buffer& buffer, - archive_type& ar, MPI_Request& request, size_type nsends) - { + void Communicate::isend(int dest, int tag, Buffer& buffer, archive_type& ar, + MPI_Request& request, size_type nsends) { if (ar.getSize() > INT_MAX) { std::cerr << "Message size exceeds range of int" << std::endl; std::abort(); } buffer.serialize(ar, nsends); - MPI_Isend(ar.getBuffer(), ar.getSize(), - MPI_BYTE, dest, tag, comm_m, &request); + MPI_Isend(ar.getBuffer(), ar.getSize(), MPI_BYTE, dest, tag, comm_m, &request); } -} +} // namespace ippl #include "Communicate/Buffers.hpp" diff --git a/src/Communicate/DataTypes.h b/src/Communicate/DataTypes.h index 94cdd869f..1adf83b1a 100644 --- a/src/Communicate/DataTypes.h +++ b/src/Communicate/DataTypes.h @@ -23,24 +23,23 @@ #include -template struct is_ippl_mpi_datatype: std::false_type {}; +template +struct is_ippl_mpi_datatype : std::false_type {}; -template MPI_Datatype get_mpi_datatype(const T& /*x*/) -{ - static_assert(is_ippl_mpi_datatype::value, - "type isn't an MPI type"); +template +MPI_Datatype get_mpi_datatype(const T& /*x*/) { + static_assert(is_ippl_mpi_datatype::value, "type isn't an MPI type"); return get_mpi_datatype(T()); } - -#define IPPL_MPI_DATATYPE(CppType, MPIType) \ -template<> \ -inline MPI_Datatype \ -get_mpi_datatype< CppType >(const CppType&) { return MPIType; } \ - \ -template<> \ -struct is_ippl_mpi_datatype: std::true_type {}; - +#define IPPL_MPI_DATATYPE(CppType, MPIType) \ + template <> \ + inline MPI_Datatype get_mpi_datatype(const CppType&) { \ + return MPIType; \ + } \ + \ + template <> \ + struct is_ippl_mpi_datatype : std::true_type {}; IPPL_MPI_DATATYPE(char, MPI_CHAR); diff --git a/src/Communicate/GlobalComm.h b/src/Communicate/GlobalComm.h index 5d71136b3..ae3be0628 100644 --- a/src/Communicate/GlobalComm.h +++ b/src/Communicate/GlobalComm.h @@ -15,11 +15,9 @@ * GlobalComm.h - Global communication functions, such as reduce and scatter. */ - // forward declarations class Communicate; - // Reduce equally-sized arrays across the machine, by sending to node // 0 and broadcasting back the result. The arguments are two begin,end // iterators for the source of the data, an iterator pointing to @@ -32,13 +30,12 @@ class Communicate; // which do not require their own special getCommunicate/putMessage. If you // need to reduce a complex quantity, use the scalar version of reduce. template -bool reduce(Communicate&, InputIterator, InputIterator, OutputIterator, - const ReduceOp&, bool *IncludeVal = 0); +bool reduce(Communicate&, InputIterator, InputIterator, OutputIterator, const ReduceOp&, + bool* IncludeVal = 0); // same as above, but this uses the default Communicate object template -bool reduce(InputIterator, InputIterator, OutputIterator, - const ReduceOp&, bool *IncludeVal = 0); +bool reduce(InputIterator, InputIterator, OutputIterator, const ReduceOp&, bool* IncludeVal = 0); // scalar versions of reduce ... instead of iterators, these versions // expect a single quantity to reduce and a location to place the result. @@ -49,38 +46,31 @@ bool reduce(Communicate& comm, T& input, T& output, const ReduceOp& op); template bool reduce(T& input, T& output, const ReduceOp& op); - // masked scalar versions of reduce ... instead of iterators, these versions // expect a single quantity to reduce and a location to place the result. // The final argument indicates whether the LOCAL NODE should have it's // value included in the reduction (by default, this is true). // Return success of operation. template -bool reduce_masked(Communicate& comm, T& input, T& output, const ReduceOp& op, - bool IncludeVal); +bool reduce_masked(Communicate& comm, T& input, T& output, const ReduceOp& op, bool IncludeVal); // same as above, but this uses the default Communicate object template -bool reduce_masked(T& input, T& output, const ReduceOp& op, - bool IncludeVal); - +bool reduce_masked(T& input, T& output, const ReduceOp& op, bool IncludeVal); // scalar versions of reduce ... instead of iterators, these versions // expect a single quantity to reduce and a location to place the result. template -bool reduce(Communicate& comm, T& input, T& output, const ReduceOp& op) -{ +bool reduce(Communicate& comm, T& input, T& output, const ReduceOp& op) { return reduce_masked(comm, input, output, op, true); } // same as above, but this uses the default Communicate object template -bool reduce(T& input, T& output, const ReduceOp& op) -{ +bool reduce(T& input, T& output, const ReduceOp& op) { return reduce_masked(input, output, op, true); } - // Scatter the data in the given source container to all other nodes. // The data is read using the first two begin,end iterators, and written // to the location indicated by the third iterator. The next two @@ -91,15 +81,12 @@ bool reduce(T& input, T& output, const ReduceOp& op) // same node. // Return success of operation. template -bool scatter(Communicate&, InputIterator, InputIterator, RandomIterator, - int *, int *, const ScatterOp&); +bool scatter(Communicate&, InputIterator, InputIterator, RandomIterator, int*, int*, + const ScatterOp&); // same as above, but this uses the default Communicate object template -bool scatter(InputIterator, InputIterator, RandomIterator, - int *, int *, const ScatterOp&); - - +bool scatter(InputIterator, InputIterator, RandomIterator, int*, int*, const ScatterOp&); /* Gather the data in the given source container from all other nodes to a * specific node (default: 0). @@ -107,7 +94,6 @@ bool scatter(InputIterator, InputIterator, RandomIterator, template void gather(const T* input, T* output, int count, int root = 0); - /* Scatter the data from all other nodes to a * specific node (default: 0). */ @@ -142,7 +128,6 @@ void allreduce(T* inout, int count, Op op); template void allreduce(T& inout, int count, Op op); - #include "Communicate/GlobalComm.hpp" -#endif // GLOBAL_COMM_H \ No newline at end of file +#endif // GLOBAL_COMM_H \ No newline at end of file diff --git a/src/Communicate/GlobalComm.hpp b/src/Communicate/GlobalComm.hpp index c8c7daf65..10a3622c2 100644 --- a/src/Communicate/GlobalComm.hpp +++ b/src/Communicate/GlobalComm.hpp @@ -24,11 +24,11 @@ ***************************************************************************/ // include files +#include "Communicate/DataTypes.h" #include "Communicate/GlobalComm.h" +#include "Communicate/Operations.h" #include "Communicate/Tags.h" #include "Utility/IpplInfo.h" -#include "Communicate/DataTypes.h" -#include "Communicate/Operations.h" #include @@ -49,151 +49,147 @@ // which do not require their own special getCommunicate/putMessage. If you // need to reduce a complex quantity, use the scalar version of reduce. template -bool reduce(Communicate& comm, InputIterator s1, InputIterator s2, - OutputIterator t1, const ReduceOp& op, bool *IncludeVal) -{ -/* - - // Inform dbgmsg("reduce-vector", INFORM_ALL_NODES); - - // determine destination node and tags - int parent = 0; - int sendtag = comm.next_tag(COMM_REDUCE_SEND_TAG, COMM_REDUCE_CYCLE); - int rectag = comm.next_tag(COMM_REDUCE_RECV_TAG, COMM_REDUCE_CYCLE); - - // determine how many elements we have to reduce - unsigned int elements = 0; - for (InputIterator tmps1 = s1; tmps1 != s2; ++tmps1, ++elements); - if (elements == 0) - { - ERRORMSG("reduce: nothing to reduce." << endl); - } +bool reduce(Communicate& comm, InputIterator s1, InputIterator s2, OutputIterator t1, + const ReduceOp& op, bool* IncludeVal) { + /* - // create flags, if they are not provided - bool *useFlags = IncludeVal; - if (useFlags == 0 && elements > 0) - { - useFlags = new bool[elements]; - for (unsigned int u=0; u < elements; useFlags[u++] = true); - } + // Inform dbgmsg("reduce-vector", INFORM_ALL_NODES); - if ( comm.myNode() != parent ) - { - // send the source data to node 0 if we are not node 0 - Message *msg = new Message; - // dbgmsg << "sending message with " << elements << " elements to node "; - // dbgmsg << parent << " with tag " << sendtag << endl; - ::putMessage(*msg, elements); - if (elements > 0) + // determine destination node and tags + int parent = 0; + int sendtag = comm.next_tag(COMM_REDUCE_SEND_TAG, COMM_REDUCE_CYCLE); + int rectag = comm.next_tag(COMM_REDUCE_RECV_TAG, COMM_REDUCE_CYCLE); + + // determine how many elements we have to reduce + unsigned int elements = 0; + for (InputIterator tmps1 = s1; tmps1 != s2; ++tmps1, ++elements); + if (elements == 0) { - ::putMessage(*msg, s1, s2); - ::putMessage(*msg, useFlags, useFlags + elements); + ERRORMSG("reduce: nothing to reduce." << endl); } - if ( ! comm.send(msg, parent, sendtag) ) + + // create flags, if they are not provided + bool *useFlags = IncludeVal; + if (useFlags == 0 && elements > 0) { - Ippl::abort("reduce: cannot send reduce buffers."); + useFlags = new bool[elements]; + for (unsigned int u=0; u < elements; useFlags[u++] = true); } - // then we get the results back - msg = comm.receive_block(parent, rectag); - // dbgmsg << "received message with size = " << msg->size(); - // dbgmsg << " from node " << parent << " with tag " << rectag << endl; - if ( ! msg || msg->size() < 1 ) - Ippl::abort("reduce: cannot receive reduce results."); - getMessage(*msg, *t1); - delete msg; - - } - else - { - // first copy the source into the target; this is like receiving - // from ourselves - InputIterator tmp1 = s1; - OutputIterator t2 = t1; - bool* copyf = useFlags; - for ( ; tmp1 != s2; ++tmp1, ++t2, ++copyf) - if (*copyf) - *t2 = *tmp1; - - // the parent receives all the messages and then broadcasts the - // reduced result - int notReceived = comm.getNodes() - 1; - while (notReceived > 0) + if ( comm.myNode() != parent ) { - // receive message - int fromnode = COMM_ANY_NODE; - Message *recmsg = comm.receive_block(fromnode, sendtag); - // dbgmsg << "received message with size = " << recmsg->size(); - // dbgmsg << " from node " << fromnode << " with tag "<size() < 1 ) - Ippl::abort("reduce: cannot receive reduce buffers."); - - // get data from message - int recelems; - ::getMessage(*recmsg, recelems); - if ((unsigned int) recelems != elements) - Ippl::abort("reduce: mismatched element count in vector reduction."); + // send the source data to node 0 if we are not node 0 + Message *msg = new Message; + // dbgmsg << "sending message with " << elements << " elements to node "; + // dbgmsg << parent << " with tag " << sendtag << endl; + ::putMessage(*msg, elements); if (elements > 0) { - InputIterator reci = (InputIterator)(recmsg->item(0).data()); - bool *recflag = (bool *)(recmsg->item(1).data()); + ::putMessage(*msg, s1, s2); + ::putMessage(*msg, useFlags, useFlags + elements); + } + if ( ! comm.send(msg, parent, sendtag) ) + { + Ippl::abort("reduce: cannot send reduce buffers."); + } + + // then we get the results back + msg = comm.receive_block(parent, rectag); + // dbgmsg << "received message with size = " << msg->size(); + // dbgmsg << " from node " << parent << " with tag " << rectag << endl; + if ( ! msg || msg->size() < 1 ) + Ippl::abort("reduce: cannot receive reduce results."); + getMessage(*msg, *t1); + delete msg; - // the target buffer must have size >= size of the source, so - // we can iterate over the source iterator - unsigned int u; - for (u=0, t2=t1; u < elements; ++t2, ++reci, ++u) + } + else + { + // first copy the source into the target; this is like receiving + // from ourselves + InputIterator tmp1 = s1; + OutputIterator t2 = t1; + bool* copyf = useFlags; + for ( ; tmp1 != s2; ++tmp1, ++t2, ++copyf) + if (*copyf) + *t2 = *tmp1; + + // the parent receives all the messages and then broadcasts the + // reduced result + int notReceived = comm.getNodes() - 1; + while (notReceived > 0) + { + // receive message + int fromnode = COMM_ANY_NODE; + Message *recmsg = comm.receive_block(fromnode, sendtag); + // dbgmsg << "received message with size = " << recmsg->size(); + // dbgmsg << " from node " << fromnode << " with tag "<size() < 1 ) + Ippl::abort("reduce: cannot receive reduce buffers."); + + // get data from message + int recelems; + ::getMessage(*recmsg, recelems); + if ((unsigned int) recelems != elements) + Ippl::abort("reduce: mismatched element count in vector reduction."); + if (elements > 0) { - if (recflag[u]) + InputIterator reci = (InputIterator)(recmsg->item(0).data()); + bool *recflag = (bool *)(recmsg->item(1).data()); + + // the target buffer must have size >= size of the source, so + // we can iterate over the source iterator + unsigned int u; + for (u=0, t2=t1; u < elements; ++t2, ++reci, ++u) { - if (useFlags[u]) + if (recflag[u]) { - PETE_apply(op, *t2, *reci); - } - else - { - *t2 = *reci; - useFlags[u] = true; + if (useFlags[u]) + { + PETE_apply(op, *t2, *reci); + } + else + { + *t2 = *reci; + useFlags[u] = true; + } } } } - } - // finished with this node's data - delete recmsg; - notReceived--; - } + // finished with this node's data + delete recmsg; + notReceived--; + } - // Finally, broadcast the results out. t2 should now point to the - // end of the target buffer. - if (comm.getNodes() > 1) - { - Message *sendmsg = new Message(); - putMessage(*sendmsg, t1, t2); - // dbgmsg << "sending message with size " << sendmsg->size(); - // dbgmsg << " to all nodes with tag " << rectag << endl; - if (comm.broadcast_others(sendmsg, rectag) != (comm.getNodes() - 1)) - Ippl::abort("reduce: cannot send reduce results."); + // Finally, broadcast the results out. t2 should now point to the + // end of the target buffer. + if (comm.getNodes() > 1) + { + Message *sendmsg = new Message(); + putMessage(*sendmsg, t1, t2); + // dbgmsg << "sending message with size " << sendmsg->size(); + // dbgmsg << " to all nodes with tag " << rectag << endl; + if (comm.broadcast_others(sendmsg, rectag) != (comm.getNodes() - 1)) + Ippl::abort("reduce: cannot send reduce results."); + } } - } - // we're done - if (useFlags != 0 && useFlags != IncludeVal) - delete [] useFlags; -*/ + // we're done + if (useFlags != 0 && useFlags != IncludeVal) + delete [] useFlags; + */ return true; } - //////////////////////////////////////////////////////////////////////////// // same as above, but this uses the default Communicate object template -bool reduce(InputIterator s1, InputIterator s2, - OutputIterator t1, const ReduceOp& op, bool *IncludeVal) -{ +bool reduce(InputIterator s1, InputIterator s2, OutputIterator t1, const ReduceOp& op, + bool* IncludeVal) { return reduce(*Ippl::Comm, s1, s2, t1, op, IncludeVal); } - //////////////////////////////////////////////////////////////////////////// // masked scalar versions of reduce ... instead of iterators, these versions // expect a single quantity to reduce and a location to place the result. @@ -201,138 +197,130 @@ bool reduce(InputIterator s1, InputIterator s2, // value included in the reduction (by default, this is true). // Return success of operation. template -bool reduce_masked(Communicate& comm, T& input, T& output, - const ReduceOp& op, bool IncludeVal) -{ - -// // Inform dbgmsg("reduce_masked", INFORM_ALL_NODES); -// -// // determine destination node and tags -// int parent = 0; -// int sendtag = comm.next_tag(COMM_REDUCE_SEND_TAG, COMM_REDUCE_CYCLE); -// int rectag = comm.next_tag(COMM_REDUCE_RECV_TAG, COMM_REDUCE_CYCLE); -// -// if (comm.myNode() != parent) -// { -// // send the source data to node 0 if we are not node 0 -// Message *msg = new Message; -// // dbgmsg << "sending message, includeflag=" << IncludeVal << ", to node "; -// // dbgmsg << parent << " with tag " << sendtag << endl; -// ::putMessage(*msg, IncludeVal); -// if (IncludeVal) -// ::putMessage(*msg, input); -// if ( ! comm.send(msg, parent, sendtag) ) -// { -// Ippl::abort("reduce: cannot send reduce scalar."); -// } -// -// // then we get the results back -// msg = comm.receive_block(parent, rectag); -// // dbgmsg << "received message with size = " << msg->size(); -// // dbgmsg << " from node " << parent << " with tag " << rectag << endl; -// if ( ! msg || msg->size() < 1 ) -// Ippl::abort("reduce: cannot receive reduce results."); -// getMessage(*msg, output); -// delete msg; -// -// } -// else -// { -// // first copy the source into the target; this is like receiving -// // from ourselves -// if (IncludeVal) -// output = input; -// -// // if there are several nodes, we must get the other results -// if (comm.getNodes() > 1) -// { -// -// // the parent receives all the messages and then broadcasts the -// // reduced result -// int notReceived = comm.getNodes() - 1; -// -// // create a temporary array to store values from other nodes -// T *recval = new T[notReceived]; -// bool *recflag = new bool[notReceived]; -// -// // get all messages -// while (notReceived > 0) -// { -// // receive message -// int fromnode = COMM_ANY_NODE; -// Message *recmsg = comm.receive_block(fromnode, sendtag); -// if ( ! recmsg || recmsg->size() < 1 ) -// Ippl::abort("reduce: cannot receive reduce buffers."); -// -// // get flag indicating if the message has any data; if it does, -// // get it and store it -// ::getMessage(*recmsg, recflag[fromnode - 1]); -// if (recflag[fromnode - 1]) -// ::getMessage(*recmsg, recval[fromnode - 1]); -// -// // finished with this node's data -// delete recmsg; -// notReceived--; -// } -// -// // now loop through the received values and do the reduction -// for (int n=1; n < comm.getNodes(); ++n) -// { -// if (recflag[n-1]) -// { -// if (IncludeVal) -// { -// PETE_apply(op, output, recval[n-1]); -// } -// else -// { -// output = recval[n-1]; -// IncludeVal = true; -// } -// } -// } -// -// // done with the temporary storage -// delete [] recflag; -// delete [] recval; -// } -// -// // Finally, broadcast the results out. t2 should now point to the -// // end of the target buffer. -// if (comm.getNodes() > 1) -// { -// Message *sendmsg = new Message(); -// ::putMessage(*sendmsg, output); -// // dbgmsg << "sending message with size " << sendmsg->size(); -// // dbgmsg << " to all nodes with tag " << rectag << endl; -// if (comm.broadcast_others(sendmsg, rectag) != (comm.getNodes() - 1)) -// Ippl::abort("reduce: cannot send reduce results."); -// } -// -// // we're done ... but do a check to see that we reduced SOMETHING -// /* ADA: can be "savely" ignored ... -// if (!IncludeVal) -// { -// WARNMSG("reduce: there was nothing to reduce, since the masks "); -// WARNMSG("were all false." << endl); -// } -// */ -// } +bool reduce_masked(Communicate& comm, T& input, T& output, const ReduceOp& op, bool IncludeVal) { + // // Inform dbgmsg("reduce_masked", INFORM_ALL_NODES); + // + // // determine destination node and tags + // int parent = 0; + // int sendtag = comm.next_tag(COMM_REDUCE_SEND_TAG, COMM_REDUCE_CYCLE); + // int rectag = comm.next_tag(COMM_REDUCE_RECV_TAG, COMM_REDUCE_CYCLE); + // + // if (comm.myNode() != parent) + // { + // // send the source data to node 0 if we are not node 0 + // Message *msg = new Message; + // // dbgmsg << "sending message, includeflag=" << IncludeVal << ", to node "; + // // dbgmsg << parent << " with tag " << sendtag << endl; + // ::putMessage(*msg, IncludeVal); + // if (IncludeVal) + // ::putMessage(*msg, input); + // if ( ! comm.send(msg, parent, sendtag) ) + // { + // Ippl::abort("reduce: cannot send reduce scalar."); + // } + // + // // then we get the results back + // msg = comm.receive_block(parent, rectag); + // // dbgmsg << "received message with size = " << msg->size(); + // // dbgmsg << " from node " << parent << " with tag " << rectag << endl; + // if ( ! msg || msg->size() < 1 ) + // Ippl::abort("reduce: cannot receive reduce results."); + // getMessage(*msg, output); + // delete msg; + // + // } + // else + // { + // // first copy the source into the target; this is like receiving + // // from ourselves + // if (IncludeVal) + // output = input; + // + // // if there are several nodes, we must get the other results + // if (comm.getNodes() > 1) + // { + // + // // the parent receives all the messages and then broadcasts the + // // reduced result + // int notReceived = comm.getNodes() - 1; + // + // // create a temporary array to store values from other nodes + // T *recval = new T[notReceived]; + // bool *recflag = new bool[notReceived]; + // + // // get all messages + // while (notReceived > 0) + // { + // // receive message + // int fromnode = COMM_ANY_NODE; + // Message *recmsg = comm.receive_block(fromnode, sendtag); + // if ( ! recmsg || recmsg->size() < 1 ) + // Ippl::abort("reduce: cannot receive reduce buffers."); + // + // // get flag indicating if the message has any data; if it does, + // // get it and store it + // ::getMessage(*recmsg, recflag[fromnode - 1]); + // if (recflag[fromnode - 1]) + // ::getMessage(*recmsg, recval[fromnode - 1]); + // + // // finished with this node's data + // delete recmsg; + // notReceived--; + // } + // + // // now loop through the received values and do the reduction + // for (int n=1; n < comm.getNodes(); ++n) + // { + // if (recflag[n-1]) + // { + // if (IncludeVal) + // { + // PETE_apply(op, output, recval[n-1]); + // } + // else + // { + // output = recval[n-1]; + // IncludeVal = true; + // } + // } + // } + // + // // done with the temporary storage + // delete [] recflag; + // delete [] recval; + // } + // + // // Finally, broadcast the results out. t2 should now point to the + // // end of the target buffer. + // if (comm.getNodes() > 1) + // { + // Message *sendmsg = new Message(); + // ::putMessage(*sendmsg, output); + // // dbgmsg << "sending message with size " << sendmsg->size(); + // // dbgmsg << " to all nodes with tag " << rectag << endl; + // if (comm.broadcast_others(sendmsg, rectag) != (comm.getNodes() - 1)) + // Ippl::abort("reduce: cannot send reduce results."); + // } + // + // // we're done ... but do a check to see that we reduced SOMETHING + // /* ADA: can be "savely" ignored ... + // if (!IncludeVal) + // { + // WARNMSG("reduce: there was nothing to reduce, since the masks "); + // WARNMSG("were all false." << endl); + // } + // */ + // } return true; } - //////////////////////////////////////////////////////////////////////////// // same as above, but this uses the default Communicate object template -bool reduce_masked(T& input, T& output, const ReduceOp& op, - bool IncludeVal) -{ - +bool reduce_masked(T& input, T& output, const ReduceOp& op, bool IncludeVal) { return reduce_masked(*Ippl::Comm, input, output, op, IncludeVal); } - //////////////////////////////////////////////////////////////////////////// // Scatter the data in the given source container to all other nodes. // The data is read using the first two begin,end iterators, and written @@ -344,97 +332,91 @@ bool reduce_masked(T& input, T& output, const ReduceOp& op, // same node. // Return success of operation. template -bool scatter(Communicate& comm, InputIterator s1, InputIterator s2, - RandomIterator t1, int *target_node, - int *target_position, const ScatterOp& op) -{ -/* - int i; // loop variables - int tag = comm.next_tag(COMM_REDUCE_SCATTER_TAG, COMM_REDUCE_CYCLE); - - // Create a number of send messages equal to TotalNodes - // these messages will be packed with the data from the source - // data and sent to the node indicated by target node array - // some empty messages will be sent so the recieving node knows when - // it has recieved all the messages - Message* msg = new Message[comm.getNodes()]; - - // Loop over each item of the source array and pack the send messages. - // The message is packed in pairs, the first element of each pair is - // an integer representing the array offset in the target. The second - // element is the data to be placed in that offset. - int *tn = target_node; - int *tp = target_position; - InputIterator si; - for ( si = s1; si != s2 ; si++, tn++, tp++ ) - { - if ( *tn < 0 || *tn >= comm.getNodes() ) +bool scatter(Communicate& comm, InputIterator s1, InputIterator s2, RandomIterator t1, + int* target_node, int* target_position, const ScatterOp& op) { + /* + int i; // loop variables + int tag = comm.next_tag(COMM_REDUCE_SCATTER_TAG, COMM_REDUCE_CYCLE); + + // Create a number of send messages equal to TotalNodes + // these messages will be packed with the data from the source + // data and sent to the node indicated by target node array + // some empty messages will be sent so the recieving node knows when + // it has recieved all the messages + Message* msg = new Message[comm.getNodes()]; + + // Loop over each item of the source array and pack the send messages. + // The message is packed in pairs, the first element of each pair is + // an integer representing the array offset in the target. The second + // element is the data to be placed in that offset. + int *tn = target_node; + int *tp = target_position; + InputIterator si; + for ( si = s1; si != s2 ; si++, tn++, tp++ ) { - ERRORMSG("scatter: bad scatter target " << *tn << endl); - return false; + if ( *tn < 0 || *tn >= comm.getNodes() ) + { + ERRORMSG("scatter: bad scatter target " << *tn << endl); + return false; + } + // msg[*tn].put(*tp).put(*si); + putMessage(msg[*tn], *tp); + putMessage(msg[*tn], *si); } - // msg[*tn].put(*tp).put(*si); - putMessage(msg[*tn], *tp); - putMessage(msg[*tn], *si); - } - // Send out the messages. We do not delete the messages here after the - // send, however. - for ( i = comm.getNodes() - 1; i >= 0; i-- ) - { - if ( ! comm.send(msg + i, i, tag, false) ) + // Send out the messages. We do not delete the messages here after the + // send, however. + for ( i = comm.getNodes() - 1; i >= 0; i-- ) { - ERRORMSG("scatter: cannot send scatter buffer " << i << endl); - return false; + if ( ! comm.send(msg + i, i, tag, false) ) + { + ERRORMSG("scatter: cannot send scatter buffer " << i << endl); + return false; + } } - } - // Receive the scatter messages back now. - int notReceived = comm.getNodes(); - while (notReceived > 0) - { - int fromnode = COMM_ANY_NODE; - Message *recmsg = comm.receive_block(fromnode, tag); - if ( ! recmsg ) + // Receive the scatter messages back now. + int notReceived = comm.getNodes(); + while (notReceived > 0) { - ERRORMSG("scatter: cannot receive scatter message." << endl); - return false; - } + int fromnode = COMM_ANY_NODE; + Message *recmsg = comm.receive_block(fromnode, tag); + if ( ! recmsg ) + { + ERRORMSG("scatter: cannot receive scatter message." << endl); + return false; + } - // for each (pos, val) pair, get it and put results in target storage - int pairs = recmsg->size() / 2; - int datapos; - InputIterator reci; - for ( i = 0 ; i < pairs ; i++ ) - { - // recmsg->get(datapos); - getMessage(*recmsg, datapos); - reci = (InputIterator)(recmsg->item(0).data()); - PETE_apply(op, t1[datapos], *reci); - recmsg->get(); // cleans out the item without another copy - } + // for each (pos, val) pair, get it and put results in target storage + int pairs = recmsg->size() / 2; + int datapos; + InputIterator reci; + for ( i = 0 ; i < pairs ; i++ ) + { + // recmsg->get(datapos); + getMessage(*recmsg, datapos); + reci = (InputIterator)(recmsg->item(0).data()); + PETE_apply(op, t1[datapos], *reci); + recmsg->get(); // cleans out the item without another copy + } - // Finished with this message. Delete it if it is from another node; if - // it is not, we sent it to ourselves and will delete it later. - if ( fromnode != comm.myNode() ) - delete recmsg; - notReceived--; - } + // Finished with this message. Delete it if it is from another node; if + // it is not, we sent it to ourselves and will delete it later. + if ( fromnode != comm.myNode() ) + delete recmsg; + notReceived--; + } - // at the end, delete the scatter messages, and return success - delete [] msg; -*/ + // at the end, delete the scatter messages, and return success + delete [] msg; + */ return true; } - // same as above, but this uses the default Communicate object template -bool scatter(InputIterator s1, InputIterator s2, - RandomIterator t1, int *target_node, - int *target_position, const ScatterOp& op) -{ - +bool scatter(InputIterator s1, InputIterator s2, RandomIterator t1, int* target_node, + int* target_position, const ScatterOp& op) { return scatter(*Ippl::Comm, s1, s2, t1, target_node, target_position, op); } @@ -442,28 +424,23 @@ template void gather(const T* input, T* output, int count, int root) { MPI_Datatype type = get_mpi_datatype(*input); - MPI_Gather(const_cast(input), count, type, - output, count, type, root, Ippl::getComm()); + MPI_Gather(const_cast(input), count, type, output, count, type, root, Ippl::getComm()); } - template void scatter(const T* input, T* output, int count, int root) { MPI_Datatype type = get_mpi_datatype(*input); - MPI_Scatter(const_cast(input), count, type, - output, count, type, root, Ippl::getComm()); + MPI_Scatter(const_cast(input), count, type, output, count, type, root, Ippl::getComm()); } - template void reduce(const T* input, T* output, int count, Op op, int root) { MPI_Datatype type = get_mpi_datatype(*input); MPI_Op mpiOp = get_mpi_op(op); - MPI_Reduce(const_cast(input), output, count, type, - mpiOp, root, Ippl::getComm()); + MPI_Reduce(const_cast(input), output, count, type, mpiOp, root, Ippl::getComm()); } template @@ -472,11 +449,9 @@ void new_reduce(const T* input, T* output, int count, Op op, int root) { MPI_Op mpiOp = get_mpi_op(op); - MPI_Reduce(const_cast(input), output, count, type, - mpiOp, root, Ippl::getComm()); + MPI_Reduce(const_cast(input), output, count, type, mpiOp, root, Ippl::getComm()); } - template void new_reduce(T* inout, int count, Op op, int root) { MPI_Datatype type = get_mpi_datatype(*inout); @@ -484,29 +459,24 @@ void new_reduce(T* inout, int count, Op op, int root) { MPI_Op mpiOp = get_mpi_op(op); if (Ippl::Comm->myNode() == root) { - MPI_Reduce(MPI_IN_PLACE, inout, count, type, - mpiOp, root, Ippl::getComm()); + MPI_Reduce(MPI_IN_PLACE, inout, count, type, mpiOp, root, Ippl::getComm()); } else { - MPI_Reduce(inout, inout, count, type, - mpiOp, root, Ippl::getComm()); + MPI_Reduce(inout, inout, count, type, mpiOp, root, Ippl::getComm()); } } - template void reduce(const T& input, T& output, int count, Op op, int root) { reduce(&input, &output, count, op, root); } - template void allreduce(const T* input, T* output, int count, Op op) { MPI_Datatype type = get_mpi_datatype(*input); MPI_Op mpiOp = get_mpi_op(op); - MPI_Allreduce(const_cast(input), output, count, type, - mpiOp, Ippl::getComm()); + MPI_Allreduce(const_cast(input), output, count, type, mpiOp, Ippl::getComm()); } template @@ -514,18 +484,15 @@ void allreduce(const T& input, T& output, int count, Op op) { allreduce(&input, &output, count, op); } - template void allreduce(T* inout, int count, Op op) { MPI_Datatype type = get_mpi_datatype(*inout); MPI_Op mpiOp = get_mpi_op(op); - MPI_Allreduce(MPI_IN_PLACE, inout, count, type, - mpiOp, Ippl::getComm()); + MPI_Allreduce(MPI_IN_PLACE, inout, count, type, mpiOp, Ippl::getComm()); } - template void allreduce(T& inout, int count, Op op) { allreduce(&inout, count, op); diff --git a/src/Communicate/Operations.h b/src/Communicate/Operations.h index 86a6f252e..0346f2c76 100644 --- a/src/Communicate/Operations.h +++ b/src/Communicate/Operations.h @@ -21,27 +21,26 @@ #ifndef IPPL_MPI_OPERATIONS_H #define IPPL_MPI_OPERATIONS_H -#include #include +#include -template struct is_ippl_mpi_type: std::false_type {}; +template +struct is_ippl_mpi_type : std::false_type {}; -template MPI_Op get_mpi_op(Op op) -{ - static_assert(is_ippl_mpi_type::value, - "type not supported"); - return get_mpi_op( op ); +template +MPI_Op get_mpi_op(Op op) { + static_assert(is_ippl_mpi_type::value, "type not supported"); + return get_mpi_op(op); } -#define IPPL_MPI_OP(CppOp, MPIOp) \ -template <> \ -inline MPI_Op \ -get_mpi_op< CppOp >(CppOp) { return MPIOp; } \ - \ -template<> \ -struct is_ippl_mpi_type: \ - std::true_type {}; - +#define IPPL_MPI_OP(CppOp, MPIOp) \ + template <> \ + inline MPI_Op get_mpi_op(CppOp) { \ + return MPIOp; \ + } \ + \ + template <> \ + struct is_ippl_mpi_type : std::true_type {}; /* with C++14 we should be able * to simply write @@ -64,7 +63,6 @@ IPPL_MPI_OP(std::plus, MPI_SUM); IPPL_MPI_OP(std::plus, MPI_SUM); IPPL_MPI_OP(std::plus, MPI_SUM); - IPPL_MPI_OP(std::less, MPI_MIN); IPPL_MPI_OP(std::less, MPI_MIN); IPPL_MPI_OP(std::less, MPI_MIN); @@ -79,7 +77,6 @@ IPPL_MPI_OP(std::less, MPI_MIN); IPPL_MPI_OP(std::less, MPI_MIN); IPPL_MPI_OP(std::less, MPI_MIN); - IPPL_MPI_OP(std::greater, MPI_MAX); IPPL_MPI_OP(std::greater, MPI_MAX); IPPL_MPI_OP(std::greater, MPI_MAX); @@ -94,7 +91,6 @@ IPPL_MPI_OP(std::greater, MPI_MAX); IPPL_MPI_OP(std::greater, MPI_MAX); IPPL_MPI_OP(std::greater, MPI_MAX); - IPPL_MPI_OP(std::logical_or, MPI_LOR); IPPL_MPI_OP(std::logical_and, MPI_LAND); diff --git a/src/Communicate/TagMaker.h b/src/Communicate/TagMaker.h index d4a709c86..34a81bcc2 100644 --- a/src/Communicate/TagMaker.h +++ b/src/Communicate/TagMaker.h @@ -15,31 +15,25 @@ * and to get a new tag for a given base tag. */ - // include files #include - // default cycle size, if not specified by the user #define DEF_CYCLE_SIZE 1000 - -class TagMaker -{ - +class TagMaker { public: // constructor/destructor - TagMaker(void) { } - virtual ~TagMaker(void) { } + TagMaker(void) {} + virtual ~TagMaker(void) {} // generate a new tag given a base tag. If the base tag has not been // previously established by create_base_tag, it will be done so by // this routine with the default cycle size. A new tag can be established // at the same time by also giving a cycle size as the second argument. - int next_tag(int t, int s = DEF_CYCLE_SIZE) - { + int next_tag(int t, int s = DEF_CYCLE_SIZE) { TagInfo& found = create_base_tag(t, s); - found.current = (found.current + 1) % found.cycleSize; + found.current = (found.current + 1) % found.cycleSize; return (found.base + found.current); } @@ -47,11 +41,9 @@ class TagMaker // for a given base tag. If the base tag doesn't exist, it will be // created and the largest possible tag within the cycle will // be returned. - int preceding_tag(int t, int s = DEF_CYCLE_SIZE) - { + int preceding_tag(int t, int s = DEF_CYCLE_SIZE) { const TagInfo& found = create_base_tag(t, s); - if (found.current == 0) - { + if (found.current == 0) { return (found.base + found.cycleSize - 1); } return (found.base + found.current - 1); @@ -61,51 +53,49 @@ class TagMaker // for a given base tag. If the base tag doesn't exist, it will be // created and the second smallest possible tag within the cycle will // be returned - int following_tag(int t, int s = DEF_CYCLE_SIZE) - { + int following_tag(int t, int s = DEF_CYCLE_SIZE) { const TagInfo& found = create_base_tag(t, s); - const int following = (found.current + 1) % found.cycleSize; + const int following = (found.current + 1) % found.cycleSize; return (found.base + following); } // just return the `current' tag that is to be generated from the // given base tag, without incrementing the cycle counter. - int current_tag(int t, int s = DEF_CYCLE_SIZE) - { + int current_tag(int t, int s = DEF_CYCLE_SIZE) { TagInfo& found = create_base_tag(t, s); return (found.base + found.current); } // reset the cycle counter for the given tag to be 0. If the tag is // not in the list, it is added. Returns the reset tag. - int reset_tag(int t, int s = DEF_CYCLE_SIZE) - { + int reset_tag(int t, int s = DEF_CYCLE_SIZE) { TagInfo& found = create_base_tag(t, s); - found.current = 0; + found.current = 0; return found.base; } private: // Simple struct holding info about the cycle size and current tag // for a base tag - class TagInfo - { + class TagInfo { public: - int base; // base tag value, the key for the map - int cycleSize; // range through which to cycle tag - int current; // current value of tag - TagInfo(int b, int s) : base(b), cycleSize(s), current(0) { } - TagInfo() : base(-1), cycleSize(-1), current(0) { } + int base; // base tag value, the key for the map + int cycleSize; // range through which to cycle tag + int current; // current value of tag + TagInfo(int b, int s) + : base(b) + , cycleSize(s) + , current(0) {} + TagInfo() + : base(-1) + , cycleSize(-1) + , current(0) {} }; // class used for comparisons - class TagCompare - { + class TagCompare { public: - bool operator()(const int& x, const int& y) const - { - return x < y; - } + bool operator()(const int& x, const int& y) const { return x < y; } }; // the list of base tags which have been established @@ -114,20 +104,17 @@ class TagMaker // Establish a new base tag and cycle size. Returns a reference to // the new TagInfo structure. // Arguments are: base tag, cycle size. - TagInfo& create_base_tag(int t, int s = DEF_CYCLE_SIZE) - { + TagInfo& create_base_tag(int t, int s = DEF_CYCLE_SIZE) { TagInfo& found = TagList[t]; - if ( found.base < 0 ) - { - found.base = t; + if (found.base < 0) { + found.base = t; found.cycleSize = s; } return TagList[t]; } - }; -#endif // TAG_MAKER_H +#endif // TAG_MAKER_H // vi: set et ts=4 sw=4 sts=4: // Local Variables: diff --git a/src/Communicate/Tags.h b/src/Communicate/Tags.h index 8d6db8bcd..f7d05aa26 100644 --- a/src/Communicate/Tags.h +++ b/src/Communicate/Tags.h @@ -22,42 +22,40 @@ // special tag used to indicate the program should quit. The values are // arbitrary, but non-zero. -#define IPPL_ABORT_TAG 5 // program should abort() -#define IPPL_EXIT_TAG 6 // program should exit() - +#define IPPL_ABORT_TAG 5 // program should abort() +#define IPPL_EXIT_TAG 6 // program should exit() // tags for reduction -#define COMM_REDUCE_SEND_TAG 10000 -#define COMM_REDUCE_RECV_TAG 11000 -#define COMM_REDUCE_SCATTER_TAG 12000 +#define COMM_REDUCE_SEND_TAG 10000 +#define COMM_REDUCE_RECV_TAG 11000 +#define COMM_REDUCE_SCATTER_TAG 12000 #define COMM_REDUCE_CYCLE 1000 - // tag for applying parallel periodic boundary condition. #define BC_PARALLEL_PERIODIC_TAG 15000 -#define BC_TAG_CYCLE 1000 +#define BC_TAG_CYCLE 1000 // Field tags namespace ippl { namespace detail { - #define HALO_FACE_TAG 20000 - #define HALO_EDGE_TAG 30000 - #define HALO_VERTEX_TAG 40000 - #define HALO_TAG_CYCLE 1000 - } -} - -#define F_GUARD_CELLS_TAG 20000 // Field::fillGuardCells() -#define F_WRITE_TAG 21000 // Field::write() -#define F_READ_TAG 22000 // Field::read() -#define F_GEN_ASSIGN_TAG 23000 // assign(BareField,BareField) -#define F_REPARTITION_BCAST_TAG 24000 // broadcast in FieldLayout::repartion. -#define F_REDUCE_PERP_TAG 25000 // reduction in binary load balance. -#define F_GETSINGLE_TAG 26000 // IndexedBareField::getsingle() -#define F_REDUCE_TAG 27000 // Reduction in minloc/maxloc -#define F_LAYOUT_IO_TAG 28000 // Reduction in minloc/maxloc -#define F_TAG_CYCLE 1000 +#define HALO_FACE_TAG 20000 +#define HALO_EDGE_TAG 30000 +#define HALO_VERTEX_TAG 40000 +#define HALO_TAG_CYCLE 1000 + } // namespace detail +} // namespace ippl + +#define F_GUARD_CELLS_TAG 20000 // Field::fillGuardCells() +#define F_WRITE_TAG 21000 // Field::write() +#define F_READ_TAG 22000 // Field::read() +#define F_GEN_ASSIGN_TAG 23000 // assign(BareField,BareField) +#define F_REPARTITION_BCAST_TAG 24000 // broadcast in FieldLayout::repartion. +#define F_REDUCE_PERP_TAG 25000 // reduction in binary load balance. +#define F_GETSINGLE_TAG 26000 // IndexedBareField::getsingle() +#define F_REDUCE_TAG 27000 // Reduction in minloc/maxloc +#define F_LAYOUT_IO_TAG 28000 // Reduction in minloc/maxloc +#define F_TAG_CYCLE 1000 // // Tags for FieldView and FieldBlock // #define FV_2D_TAG 30000 // FieldView::update_2D_data() @@ -81,51 +79,51 @@ namespace ippl { #define P_SPATIAL_GHOST_TAG 56000 #define P_SPATIAL_RANGE_TAG 57000 #define P_RESET_ID_TAG 58000 -#define P_LAYOUT_CYCLE 1000 +#define P_LAYOUT_CYCLE 1000 // Tags for Ippl setup -#define IPPL_MAKE_HOST_MAP_TAG 60000 -#define IPPL_TAG_CYCLE 1000 +#define IPPL_MAKE_HOST_MAP_TAG 60000 +#define IPPL_TAG_CYCLE 1000 // Tags for Ippl application codes -#define IPPL_APP_TAG0 90000 -#define IPPL_APP_TAG1 91000 -#define IPPL_APP_TAG2 92000 -#define IPPL_APP_TAG3 93000 -#define IPPL_APP_TAG4 94000 -#define IPPL_APP_TAG5 95000 -#define IPPL_APP_TAG6 96000 -#define IPPL_APP_TAG7 97000 -#define IPPL_APP_TAG8 98000 -#define IPPL_APP_TAG9 99000 -#define IPPL_APP_CYCLE 1000 +#define IPPL_APP_TAG0 90000 +#define IPPL_APP_TAG1 91000 +#define IPPL_APP_TAG2 92000 +#define IPPL_APP_TAG3 93000 +#define IPPL_APP_TAG4 94000 +#define IPPL_APP_TAG5 95000 +#define IPPL_APP_TAG6 96000 +#define IPPL_APP_TAG7 97000 +#define IPPL_APP_TAG8 98000 +#define IPPL_APP_TAG9 99000 +#define IPPL_APP_CYCLE 1000 // IDs used to identify buffers created using the buffer factory interface // Periodic boundary conditions -#define IPPL_PERIODIC_BC_SEND 1000 -#define IPPL_PERIODIC_BC_RECV 2000 +#define IPPL_PERIODIC_BC_SEND 1000 +#define IPPL_PERIODIC_BC_RECV 2000 // Halo cells -#define IPPL_HALO_FACE_SEND 3000 -#define IPPL_HALO_FACE_RECV 4000 +#define IPPL_HALO_FACE_SEND 3000 +#define IPPL_HALO_FACE_RECV 4000 -#define IPPL_HALO_EDGE_SEND 5000 -#define IPPL_HALO_EDGE_RECV 6000 +#define IPPL_HALO_EDGE_SEND 5000 +#define IPPL_HALO_EDGE_RECV 6000 -#define IPPL_HALO_VERTEX_SEND 7000 -#define IPPL_HALO_VERTEX_RECV 8000 +#define IPPL_HALO_VERTEX_SEND 7000 +#define IPPL_HALO_VERTEX_RECV 8000 // Particle spatial layout -#define IPPL_PARTICLE_SEND 9000 -#define IPPL_PARTICLE_RECV 10000 +#define IPPL_PARTICLE_SEND 9000 +#define IPPL_PARTICLE_RECV 10000 // FFT Poisson Solver -#define IPPL_SOLVER_SEND 13000 -#define IPPL_SOLVER_RECV 14000 -#define IPPL_VICO_SEND 16000 -#define IPPL_VICO_RECV 17000 +#define IPPL_SOLVER_SEND 13000 +#define IPPL_SOLVER_RECV 14000 +#define IPPL_VICO_SEND 16000 +#define IPPL_VICO_RECV 17000 -#define OPEN_SOLVER_TAG 18000 -#define VICO_SOLVER_TAG 70000 +#define OPEN_SOLVER_TAG 18000 +#define VICO_SOLVER_TAG 70000 -#endif // TAGS_H +#endif // TAGS_H diff --git a/src/Decomposition/OrthogonalRecursiveBisection.h b/src/Decomposition/OrthogonalRecursiveBisection.h index 92cb1ea67..ca7cef6c9 100644 --- a/src/Decomposition/OrthogonalRecursiveBisection.h +++ b/src/Decomposition/OrthogonalRecursiveBisection.h @@ -23,11 +23,11 @@ #ifndef IPPL_ORTHOGONAL_RECURSIVE_BISECTION_H #define IPPL_ORTHOGONAL_RECURSIVE_BISECTION_H -#include "Particle/ParticleSpatialLayout.h" -#include "Particle/ParticleAttrib.h" -#include "Index/NDIndex.h" -#include "Index/Index.h" #include "FieldLayout/FieldLayout.h" +#include "Index/Index.h" +#include "Index/NDIndex.h" +#include "Particle/ParticleAttrib.h" +#include "Particle/ParticleSpatialLayout.h" #include "Region/NDRegion.h" namespace ippl { @@ -37,13 +37,13 @@ namespace ippl { * @tparam Dim dimension * @tparam M mesh */ - template + template class OrthogonalRecursiveBisection { public: using view_type = typename detail::ViewType::view_type; // Weight for reduction - Field bf_m; + Field bf_m; /*! * Initialize member field with mesh and field layout @@ -51,8 +51,8 @@ namespace ippl { * @param mesh Mesh * @param rho Density field */ - void initialize(FieldLayout& fl, UniformCartesian& mesh, - const Field& rho); + void initialize(FieldLayout& fl, UniformCartesian& mesh, + const Field& rho); /*! * Performs scatter operation of particle positions in field (weights) and @@ -61,8 +61,7 @@ namespace ippl { * @param fl FieldLayout * @param isFirstRepartition boolean which tells whether to scatter or not */ - bool binaryRepartition(const ParticleAttrib>& R, - FieldLayout& fl, + bool binaryRepartition(const ParticleAttrib>& R, FieldLayout& fl, const bool& isFirstRepartition); /*! @@ -78,8 +77,7 @@ namespace ippl { * @param dom Domain to reduce * @param cutAxis Index of cut axis */ - void perpendicularReduction(std::vector& res, unsigned int cutAxis, - NDIndex& dom); + void perpendicularReduction(std::vector& res, unsigned int cutAxis, NDIndex& dom); /*! * Find median of array @@ -96,21 +94,19 @@ namespace ippl { * @param cutAxis Index of cut axis * @param median Median */ - void cutDomain(std::vector>& domains, std::vector& procs, - int it, int cutAxis, int median); + void cutDomain(std::vector>& domains, std::vector& procs, int it, + int cutAxis, int median); /*! * Scattering of particle positions in field using a CIC method * @param r Weights */ - void scatterR(const ParticleAttrib>& r); + void scatterR(const ParticleAttrib>& r); - }; // class - -} // namespace + }; // class +} // namespace ippl #include "Decomposition/OrthogonalRecursiveBisection.hpp" -#endif // IPPL_ORTHOGONAL_RECURSIVE_BISECTION_H - +#endif // IPPL_ORTHOGONAL_RECURSIVE_BISECTION_H diff --git a/src/Decomposition/OrthogonalRecursiveBisection.hpp b/src/Decomposition/OrthogonalRecursiveBisection.hpp index df8a8180a..48ec6193c 100644 --- a/src/Decomposition/OrthogonalRecursiveBisection.hpp +++ b/src/Decomposition/OrthogonalRecursiveBisection.hpp @@ -2,306 +2,294 @@ namespace ippl { template - void - OrthogonalRecursiveBisection::initialize(FieldLayout& fl, - UniformCartesian& mesh, - const Field& rho) { - bf_m.initialize(mesh, fl); - bf_m = rho; - + void OrthogonalRecursiveBisection::initialize(FieldLayout& fl, + UniformCartesian& mesh, + const Field& rho) { + bf_m.initialize(mesh, fl); + bf_m = rho; } template - bool - OrthogonalRecursiveBisection::binaryRepartition(const ParticleAttrib>& R, - FieldLayout& fl, - const bool& isFirstRepartition) { - // Timings - static IpplTimings::TimerRef tbasicOp = IpplTimings::getTimer("basicOperations"); - static IpplTimings::TimerRef tperpReduction = IpplTimings::getTimer("perpReduction"); - static IpplTimings::TimerRef tallReduce = IpplTimings::getTimer("allReduce"); - static IpplTimings::TimerRef tscatter = IpplTimings::getTimer("scatterR"); - - // Scattering of particle positions in field - // In case of first repartition we know the density from the - // analytical expression and we use that for load balancing - // and create particles. Note the particles are created only - // after the first repartition and hence we cannot call scatter - // before it. - IpplTimings::startTimer(tscatter); - if(!isFirstRepartition) { - scatterR(R); - } - - IpplTimings::stopTimer(tscatter); - - IpplTimings::startTimer(tbasicOp); - - // Get number of ranks - int nprocs = Ippl::Comm->size(); - - // Start with whole domain and total number of nodes - std::vector> domains = {fl.getDomain()}; - std::vector procs = {nprocs}; - - // Arrays for reduction - std::vector reduced, reducedRank; - - // Start recursive repartition loop - unsigned int it = 0; - int maxprocs = nprocs; - IpplTimings::stopTimer(tbasicOp); - - while (maxprocs > 1) { - // Find cut axis - IpplTimings::startTimer(tbasicOp); - int cutAxis = findCutAxis(domains[it]); - IpplTimings::stopTimer(tbasicOp); - - // Reserve space - IpplTimings::startTimer(tperpReduction); - reduced.resize(domains[it][cutAxis].length()); - reducedRank.resize(domains[it][cutAxis].length()); - - std::fill(reducedRank.begin(), reducedRank.end(), 0.0); - std::fill(reduced.begin(), reduced.end(), 0.0); - - // Peform reduction with field of weights and communicate to the other ranks - perpendicularReduction(reducedRank, cutAxis, domains[it]); - IpplTimings::stopTimer(tperpReduction); - - // Communicate to all the reduced weights - IpplTimings::startTimer(tallReduce); - MPI_Allreduce(reducedRank.data(), reduced.data(), reducedRank.size(), - MPI_DOUBLE, MPI_SUM, Ippl::getComm()); - IpplTimings::stopTimer(tallReduce); - - // Find median of reduced weights - IpplTimings::startTimer(tbasicOp); - // Initialize median to some value (1 is lower bound value) - int median = 1; - median = findMedian(reduced); - IpplTimings::stopTimer(tbasicOp); - - // Cut domains and procs - IpplTimings::startTimer(tbasicOp); - cutDomain(domains, procs, it, cutAxis, median); - - // Update max procs - maxprocs = 0; - for (unsigned int i = 0; i < procs.size(); i++) { - if (procs[i] > maxprocs) { - maxprocs = procs[i]; - it = i; - } - } - IpplTimings::stopTimer(tbasicOp); - - // Clear all arrays - IpplTimings::startTimer(tperpReduction); - reduced.clear(); - reducedRank.clear(); - IpplTimings::stopTimer(tperpReduction); - } - - // Check that no plane was obtained in the repartition - IpplTimings::startTimer(tbasicOp); - for (unsigned int i = 0; i < domains.size(); i++) { - if (domains[i][0].length() == 1 || - domains[i][1].length() == 1 || - domains[i][2].length() == 1) - return false; - } - - // Update FieldLayout with new indices - fl.updateLayout(domains); - - // Update local field with new layout - bf_m.updateLayout(fl); - IpplTimings::stopTimer(tbasicOp); - - return true; + bool OrthogonalRecursiveBisection::binaryRepartition( + const ParticleAttrib>& R, FieldLayout& fl, + const bool& isFirstRepartition) { + // Timings + static IpplTimings::TimerRef tbasicOp = IpplTimings::getTimer("basicOperations"); + static IpplTimings::TimerRef tperpReduction = IpplTimings::getTimer("perpReduction"); + static IpplTimings::TimerRef tallReduce = IpplTimings::getTimer("allReduce"); + static IpplTimings::TimerRef tscatter = IpplTimings::getTimer("scatterR"); + + // Scattering of particle positions in field + // In case of first repartition we know the density from the + // analytical expression and we use that for load balancing + // and create particles. Note the particles are created only + // after the first repartition and hence we cannot call scatter + // before it. + IpplTimings::startTimer(tscatter); + if (!isFirstRepartition) { + scatterR(R); + } + + IpplTimings::stopTimer(tscatter); + + IpplTimings::startTimer(tbasicOp); + + // Get number of ranks + int nprocs = Ippl::Comm->size(); + + // Start with whole domain and total number of nodes + std::vector> domains = {fl.getDomain()}; + std::vector procs = {nprocs}; + + // Arrays for reduction + std::vector reduced, reducedRank; + + // Start recursive repartition loop + unsigned int it = 0; + int maxprocs = nprocs; + IpplTimings::stopTimer(tbasicOp); + + while (maxprocs > 1) { + // Find cut axis + IpplTimings::startTimer(tbasicOp); + int cutAxis = findCutAxis(domains[it]); + IpplTimings::stopTimer(tbasicOp); + + // Reserve space + IpplTimings::startTimer(tperpReduction); + reduced.resize(domains[it][cutAxis].length()); + reducedRank.resize(domains[it][cutAxis].length()); + + std::fill(reducedRank.begin(), reducedRank.end(), 0.0); + std::fill(reduced.begin(), reduced.end(), 0.0); + + // Peform reduction with field of weights and communicate to the other ranks + perpendicularReduction(reducedRank, cutAxis, domains[it]); + IpplTimings::stopTimer(tperpReduction); + + // Communicate to all the reduced weights + IpplTimings::startTimer(tallReduce); + MPI_Allreduce(reducedRank.data(), reduced.data(), reducedRank.size(), MPI_DOUBLE, + MPI_SUM, Ippl::getComm()); + IpplTimings::stopTimer(tallReduce); + + // Find median of reduced weights + IpplTimings::startTimer(tbasicOp); + // Initialize median to some value (1 is lower bound value) + int median = 1; + median = findMedian(reduced); + IpplTimings::stopTimer(tbasicOp); + + // Cut domains and procs + IpplTimings::startTimer(tbasicOp); + cutDomain(domains, procs, it, cutAxis, median); + + // Update max procs + maxprocs = 0; + for (unsigned int i = 0; i < procs.size(); i++) { + if (procs[i] > maxprocs) { + maxprocs = procs[i]; + it = i; + } + } + IpplTimings::stopTimer(tbasicOp); + + // Clear all arrays + IpplTimings::startTimer(tperpReduction); + reduced.clear(); + reducedRank.clear(); + IpplTimings::stopTimer(tperpReduction); + } + + // Check that no plane was obtained in the repartition + IpplTimings::startTimer(tbasicOp); + for (unsigned int i = 0; i < domains.size(); i++) { + if (domains[i][0].length() == 1 || domains[i][1].length() == 1 + || domains[i][2].length() == 1) + return false; + } + + // Update FieldLayout with new indices + fl.updateLayout(domains); + + // Update local field with new layout + bf_m.updateLayout(fl); + IpplTimings::stopTimer(tbasicOp); + + return true; } - - template < class T, unsigned Dim, class M> - int - OrthogonalRecursiveBisection::findCutAxis(NDIndex& dom) { - int cutAxis = 0; - unsigned int maxLength = 0; - - // Iterate along all the dimensions - for (unsigned int d = 0; d < Dim; d++) { - // Find longest domain size - if (dom[d].length() > maxLength) { - maxLength = dom[d].length(); - cutAxis = d; - } - } - - return cutAxis; - } - - - template < class T, unsigned Dim, class M> - void - OrthogonalRecursiveBisection::perpendicularReduction( - std::vector& rankWeights, - unsigned int cutAxis, - NDIndex& dom) { - // Check if domains overlap, if not no need for reduction - NDIndex lDom = bf_m.getOwned(); - if (lDom[cutAxis].first() > dom[cutAxis].last() || - lDom[cutAxis].last() < dom[cutAxis].first()) - return; - // Get field's local weights - int nghost = bf_m.getNghost(); - const view_type data = bf_m.getView(); - // Determine the iteration bounds of the reduction - int cutAxisFirst = std::max(lDom[cutAxis].first(), dom[cutAxis].first()) - - lDom[cutAxis].first() + nghost; - int cutAxisLast = std::min(lDom[cutAxis].last(), dom[cutAxis].last()) - - lDom[cutAxis].first() + nghost; - // Set iterator for where to write in the reduced array - unsigned int arrayStart = 0; - if (dom[cutAxis].first() < lDom[cutAxis].first()) - arrayStart = lDom[cutAxis].first() - dom[cutAxis].first(); - // Face of domain has two directions: 1 and 2 - int perpAxis1 = (cutAxis+1) % Dim; - int perpAxis2 = (cutAxis+2) % Dim; - // inf and sup bounds must be within the domain to reduce, if not no need to reduce - int inf1 = std::max(lDom[perpAxis1].first(), dom[perpAxis1].first()) - - lDom[perpAxis1].first() + nghost; - int inf2 = std::max(lDom[perpAxis2].first(), dom[perpAxis2].first()) - - lDom[perpAxis2].first() + nghost; - int sup1 = std::min(lDom[perpAxis1].last(), dom[perpAxis1].last()) - - lDom[perpAxis1].first() + nghost; - int sup2 = std::min(lDom[perpAxis2].last(), dom[perpAxis2].last()) - - lDom[perpAxis2].first() + nghost; - if (sup1 < inf1 || sup2 < inf2) - return; - // The +1 is for Kokkos loop - sup1++; sup2++; - - // Iterate along cutAxis - using mdrange_t = Kokkos::MDRangePolicy>; - for (int i = cutAxisFirst; i <= cutAxisLast; i++) { - // Reducing over perpendicular plane defined by cutAxis - T tempRes = T(0); - switch (cutAxis) { - default: - case 0: - Kokkos::parallel_reduce("ORB weight reduction (0)", - mdrange_t({inf1, inf2},{sup1, sup2}), - KOKKOS_LAMBDA(const int j, const int k, T& weight) { - weight += data(i,j,k); - }, tempRes); - break; - case 1: - Kokkos::parallel_reduce("ORB weight reduction (1)", - mdrange_t({inf2, inf1},{sup2, sup1}), - KOKKOS_LAMBDA(const int j, const int k, T& weight) { - weight += data(j,i,k); - }, tempRes); - break; - case 2: - Kokkos::parallel_reduce("ORB weight reduction (2)", - mdrange_t({inf1, inf2},{sup1, sup2}), - KOKKOS_LAMBDA(const int j, const int k, T& weight) { - weight += data(j,k,i); - }, tempRes); - break; - } - - Kokkos::fence(); - - rankWeights[arrayStart] = tempRes; - arrayStart++; - } + template + int OrthogonalRecursiveBisection::findCutAxis(NDIndex& dom) { + int cutAxis = 0; + unsigned int maxLength = 0; + + // Iterate along all the dimensions + for (unsigned int d = 0; d < Dim; d++) { + // Find longest domain size + if (dom[d].length() > maxLength) { + maxLength = dom[d].length(); + cutAxis = d; + } + } + + return cutAxis; } - - - template < class T, unsigned Dim, class M> - int - OrthogonalRecursiveBisection::findMedian(std::vector& w) { - // Special case when array must be cut in half in order to not have planes - if (w.size() == 4) - return 1; - - // Get total sum of array - T tot = std::accumulate(w.begin(), w.end(), T(0)); - - // Find position of median as half of total in array - T half = 0.5 * tot; - T curr = T(0); - // Do not need to iterate to full extent since it must not give planes - for (unsigned int i = 0; i < w.size()-1; i++) { - curr += w[i]; - if (curr >= half) { - // If all particles are in the first plane, cut at 1 so to have size 2 - if (i == 0) - return 1; - T previous = curr - w[i]; - // curr - half < half - previous - if ((curr + previous) <= tot && curr != half) { // if true then take current i, otherwise i-1 - if (i == w.size() - 2) - return (i-1); - else - return i; - } else { - return (i > 1) ? (i-1) : 1; - } - } - } - // If all particles are in the last plane, cut two indices before the end so to have size 2 - return w.size()-3; + + template + void OrthogonalRecursiveBisection::perpendicularReduction( + std::vector& rankWeights, unsigned int cutAxis, NDIndex& dom) { + // Check if domains overlap, if not no need for reduction + NDIndex lDom = bf_m.getOwned(); + if (lDom[cutAxis].first() > dom[cutAxis].last() + || lDom[cutAxis].last() < dom[cutAxis].first()) + return; + // Get field's local weights + int nghost = bf_m.getNghost(); + const view_type data = bf_m.getView(); + // Determine the iteration bounds of the reduction + int cutAxisFirst = + std::max(lDom[cutAxis].first(), dom[cutAxis].first()) - lDom[cutAxis].first() + nghost; + int cutAxisLast = + std::min(lDom[cutAxis].last(), dom[cutAxis].last()) - lDom[cutAxis].first() + nghost; + // Set iterator for where to write in the reduced array + unsigned int arrayStart = 0; + if (dom[cutAxis].first() < lDom[cutAxis].first()) + arrayStart = lDom[cutAxis].first() - dom[cutAxis].first(); + // Face of domain has two directions: 1 and 2 + int perpAxis1 = (cutAxis + 1) % Dim; + int perpAxis2 = (cutAxis + 2) % Dim; + // inf and sup bounds must be within the domain to reduce, if not no need to reduce + int inf1 = std::max(lDom[perpAxis1].first(), dom[perpAxis1].first()) + - lDom[perpAxis1].first() + nghost; + int inf2 = std::max(lDom[perpAxis2].first(), dom[perpAxis2].first()) + - lDom[perpAxis2].first() + nghost; + int sup1 = std::min(lDom[perpAxis1].last(), dom[perpAxis1].last()) - lDom[perpAxis1].first() + + nghost; + int sup2 = std::min(lDom[perpAxis2].last(), dom[perpAxis2].last()) - lDom[perpAxis2].first() + + nghost; + if (sup1 < inf1 || sup2 < inf2) + return; + // The +1 is for Kokkos loop + sup1++; + sup2++; + + // Iterate along cutAxis + using mdrange_t = Kokkos::MDRangePolicy>; + for (int i = cutAxisFirst; i <= cutAxisLast; i++) { + // Reducing over perpendicular plane defined by cutAxis + T tempRes = T(0); + switch (cutAxis) { + default: + case 0: + Kokkos::parallel_reduce( + "ORB weight reduction (0)", mdrange_t({inf1, inf2}, {sup1, sup2}), + KOKKOS_LAMBDA(const int j, const int k, T& weight) { + weight += data(i, j, k); + }, + tempRes); + break; + case 1: + Kokkos::parallel_reduce( + "ORB weight reduction (1)", mdrange_t({inf2, inf1}, {sup2, sup1}), + KOKKOS_LAMBDA(const int j, const int k, T& weight) { + weight += data(j, i, k); + }, + tempRes); + break; + case 2: + Kokkos::parallel_reduce( + "ORB weight reduction (2)", mdrange_t({inf1, inf2}, {sup1, sup2}), + KOKKOS_LAMBDA(const int j, const int k, T& weight) { + weight += data(j, k, i); + }, + tempRes); + break; + } + + Kokkos::fence(); + + rankWeights[arrayStart] = tempRes; + arrayStart++; + } } + template + int OrthogonalRecursiveBisection::findMedian(std::vector& w) { + // Special case when array must be cut in half in order to not have planes + if (w.size() == 4) + return 1; - template < class T, unsigned Dim, class M> - void - OrthogonalRecursiveBisection::cutDomain(std::vector>& domains, - std::vector& procs, int it, int cutAxis, int median) { - // Cut domains[it] in half at median along cutAxis - NDIndex leftDom, rightDom; - domains[it].split(leftDom, rightDom, cutAxis, median + domains[it][cutAxis].first()); - domains[it] = leftDom; - domains.insert(domains.begin() + it + 1, 1, rightDom); - - // Cut procs in half - int temp = procs[it]; - procs[it] = procs[it] / 2; - procs.insert(procs.begin() + it + 1, 1, temp - procs[it]); + // Get total sum of array + T tot = std::accumulate(w.begin(), w.end(), T(0)); + + // Find position of median as half of total in array + T half = 0.5 * tot; + T curr = T(0); + // Do not need to iterate to full extent since it must not give planes + for (unsigned int i = 0; i < w.size() - 1; i++) { + curr += w[i]; + if (curr >= half) { + // If all particles are in the first plane, cut at 1 so to have size 2 + if (i == 0) + return 1; + T previous = curr - w[i]; + // curr - half < half - previous + if ((curr + previous) <= tot + && curr != half) { // if true then take current i, otherwise i-1 + if (i == w.size() - 2) + return (i - 1); + else + return i; + } else { + return (i > 1) ? (i - 1) : 1; + } + } + } + // If all particles are in the last plane, cut two indices before the end so to have size 2 + return w.size() - 3; } + template + void OrthogonalRecursiveBisection::cutDomain(std::vector>& domains, + std::vector& procs, int it, + int cutAxis, int median) { + // Cut domains[it] in half at median along cutAxis + NDIndex leftDom, rightDom; + domains[it].split(leftDom, rightDom, cutAxis, median + domains[it][cutAxis].first()); + domains[it] = leftDom; + domains.insert(domains.begin() + it + 1, 1, rightDom); + + // Cut procs in half + int temp = procs[it]; + procs[it] = procs[it] / 2; + procs.insert(procs.begin() + it + 1, 1, temp - procs[it]); + } - template < class T, unsigned Dim, class M> - void - OrthogonalRecursiveBisection::scatterR(const ParticleAttrib>& r) { + template + void OrthogonalRecursiveBisection::scatterR( + const ParticleAttrib>& r) { using vector_type = typename M::vector_type; // Reset local field bf_m = 0.0; // Get local data typename Field::view_type view = bf_m.getView(); - const M& mesh = bf_m.get_mesh(); - const FieldLayout& layout = bf_m.getLayout(); - const NDIndex& lDom = layout.getLocalNDIndex(); - const int nghost = bf_m.getNghost(); - + const M& mesh = bf_m.get_mesh(); + const FieldLayout& layout = bf_m.getLayout(); + const NDIndex& lDom = layout.getLocalNDIndex(); + const int nghost = bf_m.getNghost(); + // Get spacings - const vector_type& dx = mesh.getMeshSpacing(); + const vector_type& dx = mesh.getMeshSpacing(); const vector_type& origin = mesh.getOrigin(); - const vector_type invdx = 1.0 / dx; + const vector_type invdx = 1.0 / dx; Kokkos::parallel_for( - "ParticleAttrib::scatterR", - r.getParticleCount(), - KOKKOS_LAMBDA(const size_t idx) - { + "ParticleAttrib::scatterR", r.getParticleCount(), KOKKOS_LAMBDA(const size_t idx) { // Find nearest grid point - vector_type l = (r(idx) - origin) * invdx + 0.5; - Vector index = l; + vector_type l = (r(idx) - origin) * invdx + 0.5; + Vector index = l; Vector whi = l - index; Vector wlo = 1.0 - whi; @@ -310,18 +298,17 @@ namespace ippl { const size_t k = index[2] - lDom[2].first() + nghost; // Scatter - Kokkos::atomic_add(&view(i-1, j-1, k-1), wlo[0] * wlo[1] * wlo[2]); - Kokkos::atomic_add(&view(i-1, j-1, k ), wlo[0] * wlo[1] * whi[2]); - Kokkos::atomic_add(&view(i-1, j, k-1), wlo[0] * whi[1] * wlo[2]); - Kokkos::atomic_add(&view(i-1, j, k ), wlo[0] * whi[1] * whi[2]); - Kokkos::atomic_add(&view(i, j-1, k-1), whi[0] * wlo[1] * wlo[2]); - Kokkos::atomic_add(&view(i, j-1, k ), whi[0] * wlo[1] * whi[2]); - Kokkos::atomic_add(&view(i, j, k-1), whi[0] * whi[1] * wlo[2]); - Kokkos::atomic_add(&view(i, j, k ), whi[0] * whi[1] * whi[2]); - } - ); - + Kokkos::atomic_add(&view(i - 1, j - 1, k - 1), wlo[0] * wlo[1] * wlo[2]); + Kokkos::atomic_add(&view(i - 1, j - 1, k), wlo[0] * wlo[1] * whi[2]); + Kokkos::atomic_add(&view(i - 1, j, k - 1), wlo[0] * whi[1] * wlo[2]); + Kokkos::atomic_add(&view(i - 1, j, k), wlo[0] * whi[1] * whi[2]); + Kokkos::atomic_add(&view(i, j - 1, k - 1), whi[0] * wlo[1] * wlo[2]); + Kokkos::atomic_add(&view(i, j - 1, k), whi[0] * wlo[1] * whi[2]); + Kokkos::atomic_add(&view(i, j, k - 1), whi[0] * whi[1] * wlo[2]); + Kokkos::atomic_add(&view(i, j, k), whi[0] * whi[1] * whi[2]); + }); + bf_m.accumulateHalo(); } - -} // namespace + +} // namespace ippl diff --git a/src/Expression/IpplExpressions.h b/src/Expression/IpplExpressions.h index ae23846fe..2986531d0 100644 --- a/src/Expression/IpplExpressions.h +++ b/src/Expression/IpplExpressions.h @@ -40,13 +40,11 @@ namespace ippl { /*! * Access single element of the expression */ - KOKKOS_INLINE_FUNCTION - auto operator[](size_t i) const { + KOKKOS_INLINE_FUNCTION auto operator[](size_t i) const { return static_cast(*this)[i]; } }; - /*! * This expression is only used to allocate * enough memory for the kernel on the device. @@ -55,38 +53,32 @@ namespace ippl { */ template struct CapturedExpression { - template - KOKKOS_INLINE_FUNCTION - auto operator()(Args... args) const { + template + KOKKOS_INLINE_FUNCTION auto operator()(Args... args) const { return reinterpret_cast(*this)(args...); } char buffer[N]; }; - /*! * Expression for intrinsic data types. They are both regular expressions * and field expressions. */ template - struct Scalar : public Expression, sizeof(T)> - { + struct Scalar : public Expression, sizeof(T)> { typedef T value_type; - KOKKOS_FUNCTION - Scalar(value_type val) : val_m(val) { } + Scalar(value_type val) + : val_m(val) {} /*! * Access the scalar value with single index. * This is used for binary operations between * Scalar and Vector. */ - KOKKOS_INLINE_FUNCTION - value_type operator[](size_t /*i*/) const { - return val_m; - } + KOKKOS_INLINE_FUNCTION value_type operator[](size_t /*i*/) const { return val_m; } /*! * Access the scalar value with multiple indices. @@ -94,9 +86,8 @@ namespace ippl { * Scalar and BareField, Scalar and BareField, * and Scalar and Field. */ - template - KOKKOS_INLINE_FUNCTION - auto operator()(Args... /*args*/) const { + template + KOKKOS_INLINE_FUNCTION auto operator()(Args... /*args*/) const { return val_m; } @@ -104,16 +95,14 @@ namespace ippl { value_type val_m; }; + template + struct isExpression : std::false_type {}; - template - struct isExpression : std::false_type {}; - - template - struct isExpression> : std::true_type {}; - - } -} + template + struct isExpression> : std::true_type {}; + } // namespace detail +} // namespace ippl #include "Expression/IpplOperations.h" diff --git a/src/Expression/IpplOperations.h b/src/Expression/IpplOperations.h index 5454143fa..072abbded 100644 --- a/src/Expression/IpplOperations.h +++ b/src/Expression/IpplOperations.h @@ -25,110 +25,106 @@ namespace ippl { * @file IpplOperations.h */ - #define DefineUnaryOperation(fun, name, op1, op2) \ - template \ - struct fun : public detail::Expression, sizeof(E)> { \ - KOKKOS_FUNCTION \ - fun(const E& u) : u_m(u) { } \ - \ - KOKKOS_INLINE_FUNCTION \ - auto operator[](size_t i) const { return op1; } \ - \ - template \ - KOKKOS_INLINE_FUNCTION \ - auto operator()(Args... args) const { \ - return op2; \ - } \ - \ - private: \ - const E u_m; \ - }; \ - \ - template \ - KOKKOS_INLINE_FUNCTION \ - fun name(const detail::Expression& u) { \ - return fun(*static_cast(&u)); \ - } \ +#define DefineUnaryOperation(fun, name, op1, op2) \ + template \ + struct fun : public detail::Expression, sizeof(E)> { \ + KOKKOS_FUNCTION \ + fun(const E& u) \ + : u_m(u) {} \ + \ + KOKKOS_INLINE_FUNCTION auto operator[](size_t i) const { return op1; } \ + \ + template \ + KOKKOS_INLINE_FUNCTION auto operator()(Args... args) const { \ + return op2; \ + } \ + \ + private: \ + const E u_m; \ + }; \ + \ + template \ + KOKKOS_INLINE_FUNCTION fun name(const detail::Expression& u) { \ + return fun(*static_cast(&u)); \ + } /// @cond - DefineUnaryOperation(UnaryMinus, operator-, -u_m[i], -u_m(args...)) - DefineUnaryOperation(UnaryPlus, operator+, +u_m[i], +u_m(args...)) - DefineUnaryOperation(BitwiseNot, operator~, ~u_m[i], ~u_m(args...)) - DefineUnaryOperation(Not, operator!, !u_m[i], !u_m(args...)) - - DefineUnaryOperation(ArcCos, acos, acos(u_m[i]), acos(u_m(args...))) - DefineUnaryOperation(ArcSin, asin, asin(u_m[i]), asin(u_m(args...))) - DefineUnaryOperation(ArcTan, atan, atan(u_m[i]), atan(u_m(args...))) - DefineUnaryOperation(Ceil, ceil, ceil(u_m[i]), ceil(u_m(args...))) - DefineUnaryOperation(Cos, cos, cos(u_m[i]), cos(u_m(args...))) - DefineUnaryOperation(HypCos, cosh, cosh(u_m[i]), cosh(u_m(args...))) - DefineUnaryOperation(Exp, exp, exp(u_m[i]), exp(u_m(args...))) - DefineUnaryOperation(Fabs, fabs, fabs(u_m[i]), fabs(u_m(args...))) - DefineUnaryOperation(Floor, floor, floor(u_m[i]), floor(u_m(args...))) - DefineUnaryOperation(Log, log, log(u_m[i]), log(u_m(args...))) - DefineUnaryOperation(Log10, log10, log10(u_m[i]), log10(u_m(args...))) - DefineUnaryOperation(Sin, sin, sin(u_m[i]), sin(u_m(args...))) - DefineUnaryOperation(HypSin, sinh, sinh(u_m[i]), sinh(u_m(args...))) - DefineUnaryOperation(Sqrt, sqrt, sqrt(u_m[i]), sqrt(u_m(args...))) - DefineUnaryOperation(Tan, tan, tan(u_m[i]), tan(u_m(args...))) - DefineUnaryOperation(HypTan, tanh, tanh(u_m[i]), tanh(u_m(args...))) - DefineUnaryOperation(Erf, erf, erf(u_m[i]), erf(u_m(args...))) - /// @endcond - - - /*! - * Macro to overload C++ operators for the Scalar, BareField and Vector class. - * @param fun name of the expression template function - * @param name overloaded operator - * @param op1 operation for single index access - * @param op2 operation for multiple indices access - */ - #define DefineBinaryOperation(fun, name, op1, op2) \ - template \ - struct fun : public detail::Expression, sizeof(E1) + sizeof(E2)> { \ - KOKKOS_FUNCTION \ - fun(const E1& u, const E2& v) : u_m(u), v_m(v) { } \ - \ - KOKKOS_INLINE_FUNCTION \ - auto operator[](size_t i) const { return op1; } \ - \ - template \ - KOKKOS_INLINE_FUNCTION \ - auto operator()(Args... args) const { \ - return op2; \ - } \ - \ - private: \ - const E1 u_m; \ - const E2 v_m; \ - }; \ - \ - template \ - KOKKOS_INLINE_FUNCTION \ - fun name(const detail::Expression& u, \ - const detail::Expression& v) { \ - return fun(*static_cast(&u), \ - *static_cast(&v)); \ - } \ - \ - template ::value>> \ - KOKKOS_INLINE_FUNCTION \ - fun> name(const detail::Expression& u, \ - const T& v) { \ - return fun>(*static_cast(&u), v); \ - } \ - \ - template ::value>> \ - KOKKOS_INLINE_FUNCTION \ - fun, E> name(const T& u, \ - const detail::Expression& v) { \ - return fun, E>(u, *static_cast(&v)); \ + // clang-format off + DefineUnaryOperation(UnaryMinus, operator-, -u_m[i], -u_m(args...)) + DefineUnaryOperation(UnaryPlus, operator+, +u_m[i], +u_m(args...)) + DefineUnaryOperation(BitwiseNot, operator~, ~u_m[i], ~u_m(args...)) + DefineUnaryOperation(Not, operator!, !u_m[i], !u_m(args...)) + + DefineUnaryOperation(ArcCos, acos, acos(u_m[i]), acos(u_m(args...))) + DefineUnaryOperation(ArcSin, asin, asin(u_m[i]), asin(u_m(args...))) + DefineUnaryOperation(ArcTan, atan, atan(u_m[i]), atan(u_m(args...))) + DefineUnaryOperation(Ceil, ceil, ceil(u_m[i]), ceil(u_m(args...))) + DefineUnaryOperation(Cos, cos, cos(u_m[i]), cos(u_m(args...))) + DefineUnaryOperation(HypCos, cosh, cosh(u_m[i]), cosh(u_m(args...))) + DefineUnaryOperation(Exp, exp, exp(u_m[i]), exp(u_m(args...))) + DefineUnaryOperation(Fabs, fabs, fabs(u_m[i]), fabs(u_m(args...))) + DefineUnaryOperation(Floor, floor, floor(u_m[i]), floor(u_m(args...))) + DefineUnaryOperation(Log, log, log(u_m[i]), log(u_m(args...))) + DefineUnaryOperation(Log10, log10, log10(u_m[i]), log10(u_m(args...))) + DefineUnaryOperation(Sin, sin, sin(u_m[i]), sin(u_m(args...))) + DefineUnaryOperation(HypSin, sinh, sinh(u_m[i]), sinh(u_m(args...))) + DefineUnaryOperation(Sqrt, sqrt, sqrt(u_m[i]), sqrt(u_m(args...))) + DefineUnaryOperation(Tan, tan, tan(u_m[i]), tan(u_m(args...))) + DefineUnaryOperation(HypTan, tanh, tanh(u_m[i]), tanh(u_m(args...))) + DefineUnaryOperation(Erf, erf, erf(u_m[i]), erf(u_m(args...))) +// clang-format on +/// @endcond + +/*! + * Macro to overload C++ operators for the Scalar, BareField and Vector class. + * @param fun name of the expression template function + * @param name overloaded operator + * @param op1 operation for single index access + * @param op2 operation for multiple indices access + */ +#define DefineBinaryOperation(fun, name, op1, op2) \ + template \ + struct fun : public detail::Expression, sizeof(E1) + sizeof(E2)> { \ + KOKKOS_FUNCTION \ + fun(const E1& u, const E2& v) \ + : u_m(u) \ + , v_m(v) {} \ + \ + KOKKOS_INLINE_FUNCTION auto operator[](size_t i) const { return op1; } \ + \ + template \ + KOKKOS_INLINE_FUNCTION auto operator()(Args... args) const { \ + return op2; \ + } \ + \ + private: \ + const E1 u_m; \ + const E2 v_m; \ + }; \ + \ + template \ + KOKKOS_INLINE_FUNCTION fun name(const detail::Expression& u, \ + const detail::Expression& v) { \ + return fun(*static_cast(&u), *static_cast(&v)); \ + } \ + \ + template ::value>> \ + KOKKOS_INLINE_FUNCTION fun> name(const detail::Expression& u, \ + const T& v) { \ + return fun>(*static_cast(&u), v); \ + } \ + \ + template ::value>> \ + KOKKOS_INLINE_FUNCTION fun, E> name(const T& u, \ + const detail::Expression& v) { \ + return fun, E>(u, *static_cast(&v)); \ } /// @cond + // clang-format off DefineBinaryOperation(Add, operator+, u_m[i] + v_m[i], u_m(args...) + v_m(args...)) DefineBinaryOperation(Subtract, operator-, u_m[i] - v_m[i], u_m(args...) - v_m(args...)) DefineBinaryOperation(Multiply, operator*, u_m[i] * v_m[i], u_m(args...) * v_m(args...)) @@ -142,48 +138,49 @@ namespace ippl { DefineBinaryOperation(NEQ, operator!=, u_m[i] != v_m[i], u_m(args...) != v_m(args...)) DefineBinaryOperation(And, operator&&, u_m[i] && v_m[i], u_m(args...) && v_m(args...)) DefineBinaryOperation(Or, operator||, u_m[i] || v_m[i], u_m(args...) || v_m(args...)) - + DefineBinaryOperation(BitwiseAnd, operator&, u_m[i] & v_m[i], u_m(args...) & v_m(args...)) DefineBinaryOperation(BitwiseOr, operator|, u_m[i] | v_m[i], u_m(args...) | v_m(args...)) DefineBinaryOperation(BitwiseXor, operator^, u_m[i] ^ v_m[i], u_m(args...) ^ v_m(args...)) - - DefineBinaryOperation(Copysign, copysign, copysign(u_m[i],v_m[i]), + + DefineBinaryOperation(Copysign, copysign, copysign(u_m[i],v_m[i]), copysign(u_m(args...),v_m(args...))) - DefineBinaryOperation(Ldexp, ldexp, ldexp(u_m[i],v_m[i]), + DefineBinaryOperation(Ldexp, ldexp, ldexp(u_m[i],v_m[i]), ldexp(u_m(args...),v_m(args...))) - DefineBinaryOperation(Fmod, fmod, fmod(u_m[i],v_m[i]), + DefineBinaryOperation(Fmod, fmod, fmod(u_m[i],v_m[i]), fmod(u_m(args...),v_m(args...))) DefineBinaryOperation(Pow, pow, pow(u_m[i],v_m[i]), pow(u_m(args...),v_m(args...))) - DefineBinaryOperation(ArcTan2, atan2, atan2(u_m[i],v_m[i]), + DefineBinaryOperation(ArcTan2, atan2, atan2(u_m[i],v_m[i]), atan2(u_m(args...),v_m(args...))) + // clang-format on /// @endcond - namespace detail { /*! - * Meta function of cross product. This function is only supported for 3-dimensional vectors. + * Meta function of cross product. This function is only supported for 3-dimensional + * vectors. */ template struct meta_cross : public detail::Expression, sizeof(E1) + sizeof(E2)> { KOKKOS_FUNCTION - meta_cross(const E1& u, const E2& v) : u_m(u), v_m(v) { } + meta_cross(const E1& u, const E2& v) + : u_m(u) + , v_m(v) {} /* * Vector::cross */ - KOKKOS_INLINE_FUNCTION - auto operator[](size_t i) const { + KOKKOS_INLINE_FUNCTION auto operator[](size_t i) const { const size_t j = (i + 1) % 3; const size_t k = (i + 2) % 3; - return u_m[j] * v_m[k] - u_m[k] * v_m[j]; + return u_m[j] * v_m[k] - u_m[k] * v_m[j]; } /* * This is required for BareField::cross */ - template - KOKKOS_INLINE_FUNCTION - auto operator()(Args... args) const { + template + KOKKOS_INLINE_FUNCTION auto operator()(Args... args) const { return cross(u_m(args...), v_m(args...)); } @@ -191,14 +188,12 @@ namespace ippl { const E1 u_m; const E2 v_m; }; - } + } // namespace detail template - KOKKOS_INLINE_FUNCTION - detail::meta_cross cross(const detail::Expression& u, - const detail::Expression& v) { - return detail::meta_cross(*static_cast(&u), - *static_cast(&v)); + KOKKOS_INLINE_FUNCTION detail::meta_cross cross(const detail::Expression& u, + const detail::Expression& v) { + return detail::meta_cross(*static_cast(&u), *static_cast(&v)); } namespace detail { @@ -208,26 +203,26 @@ namespace ippl { template struct meta_dot : public Expression, sizeof(E1) + sizeof(E2)> { KOKKOS_FUNCTION - meta_dot(const E1& u, const E2& v) : u_m(u), v_m(v) { } + meta_dot(const E1& u, const E2& v) + : u_m(u) + , v_m(v) {} /* * Vector::dot */ - KOKKOS_INLINE_FUNCTION - auto apply() const { + KOKKOS_INLINE_FUNCTION auto apply() const { typename E1::value_type res = 0.0; for (size_t i = 0; i < E1::dim; ++i) { res += u_m[i] * v_m[i]; } - return res; //u_m[0] * v_m[0] + u_m[1] * v_m[1] + u_m[2] * v_m[2]; + return res; // u_m[0] * v_m[0] + u_m[1] * v_m[1] + u_m[2] * v_m[2]; } /* * This is required for BareField::dot */ - template - KOKKOS_INLINE_FUNCTION - auto operator()(Args... args) const { + template + KOKKOS_INLINE_FUNCTION auto operator()(Args... args) const { return dot(u_m(args...), v_m(args...)).apply(); } @@ -235,14 +230,12 @@ namespace ippl { const E1 u_m; const E2 v_m; }; - } + } // namespace detail template - KOKKOS_INLINE_FUNCTION - detail::meta_dot dot(const detail::Expression& u, - const detail::Expression& v) { - return detail::meta_dot(*static_cast(&u), - *static_cast(&v)); + KOKKOS_INLINE_FUNCTION detail::meta_dot dot(const detail::Expression& u, + const detail::Expression& v) { + return detail::meta_dot(*static_cast(&u), *static_cast(&v)); } namespace detail { @@ -251,40 +244,36 @@ namespace ippl { */ template - struct meta_grad : public Expression, - sizeof(E) + 3 * sizeof(typename E::Mesh_t::vector_type)> - { - + struct meta_grad + : public Expression, + sizeof(E) + 3 * sizeof(typename E::Mesh_t::vector_type)> { KOKKOS_FUNCTION - meta_grad(const E& u, - const typename E::Mesh_t::vector_type& xvector, - const typename E::Mesh_t::vector_type& yvector, - const typename E::Mesh_t::vector_type& zvector) - : u_m(u) - , xvector_m(xvector) - , yvector_m(yvector) - , zvector_m(zvector) - { } + meta_grad(const E& u, const typename E::Mesh_t::vector_type& xvector, + const typename E::Mesh_t::vector_type& yvector, + const typename E::Mesh_t::vector_type& zvector) + : u_m(u) + , xvector_m(xvector) + , yvector_m(yvector) + , zvector_m(zvector) {} /* * 3-dimensional grad */ - KOKKOS_INLINE_FUNCTION - auto operator()(size_t i, size_t j, size_t k) const { - return xvector_m * (u_m(i+1, j, k) - u_m(i-1, j, k )) + - yvector_m * (u_m(i , j+1, k) - u_m(i , j-1, k )) + - zvector_m * (u_m(i , j , k+1) - u_m(i , j , k-1)); + KOKKOS_INLINE_FUNCTION auto operator()(size_t i, size_t j, size_t k) const { + return xvector_m * (u_m(i + 1, j, k) - u_m(i - 1, j, k)) + + yvector_m * (u_m(i, j + 1, k) - u_m(i, j - 1, k)) + + zvector_m * (u_m(i, j, k + 1) - u_m(i, j, k - 1)); } private: - using Mesh_t = typename E::Mesh_t; + using Mesh_t = typename E::Mesh_t; using vector_type = typename Mesh_t::vector_type; const E u_m; const vector_type xvector_m; const vector_type yvector_m; const vector_type zvector_m; }; - } + } // namespace detail namespace detail { @@ -292,31 +281,29 @@ namespace ippl { * Meta function of divergence */ template - struct meta_div : public Expression, sizeof(E) + 3 * sizeof(typename E::Mesh_t::vector_type)> { - + struct meta_div + : public Expression, + sizeof(E) + 3 * sizeof(typename E::Mesh_t::vector_type)> { KOKKOS_FUNCTION - meta_div(const E& u, - const typename E::Mesh_t::vector_type& xvector, + meta_div(const E& u, const typename E::Mesh_t::vector_type& xvector, const typename E::Mesh_t::vector_type& yvector, const typename E::Mesh_t::vector_type& zvector) - : u_m(u) - , xvector_m(xvector) - , yvector_m(yvector) - , zvector_m(zvector) - { } + : u_m(u) + , xvector_m(xvector) + , yvector_m(yvector) + , zvector_m(zvector) {} /* * 3-dimensional div */ - KOKKOS_INLINE_FUNCTION - auto operator()(size_t i, size_t j, size_t k) const { - return dot(xvector_m, (u_m(i+1, j, k) - u_m(i-1, j, k ))).apply() + - dot(yvector_m, (u_m(i , j+1, k) - u_m(i , j-1, k ))).apply() + - dot(zvector_m, (u_m(i , j , k+1) - u_m(i , j , k-1))).apply(); + KOKKOS_INLINE_FUNCTION auto operator()(size_t i, size_t j, size_t k) const { + return dot(xvector_m, (u_m(i + 1, j, k) - u_m(i - 1, j, k))).apply() + + dot(yvector_m, (u_m(i, j + 1, k) - u_m(i, j - 1, k))).apply() + + dot(zvector_m, (u_m(i, j, k + 1) - u_m(i, j, k - 1))).apply(); } private: - using Mesh_t = typename E::Mesh_t; + using Mesh_t = typename E::Mesh_t; using vector_type = typename Mesh_t::vector_type; const E u_m; const vector_type xvector_m; @@ -324,38 +311,34 @@ namespace ippl { const vector_type zvector_m; }; - /*! - * Meta function of Laplacian + * Meta function of Laplacian */ template - struct meta_laplace : public Expression, sizeof(E) + sizeof(typename E::Mesh_t::vector_type)> { - + struct meta_laplace + : public Expression, + sizeof(E) + sizeof(typename E::Mesh_t::vector_type)> { KOKKOS_FUNCTION - meta_laplace(const E& u, - const typename E::Mesh_t::vector_type& hvector) - : u_m(u) - , hvector_m(hvector) - { } - + meta_laplace(const E& u, const typename E::Mesh_t::vector_type& hvector) + : u_m(u) + , hvector_m(hvector) {} + /* * 3-dimensional Laplacian */ - KOKKOS_INLINE_FUNCTION - auto operator()(size_t i, size_t j, size_t k) const { - - return hvector_m[0] * (u_m(i+1, j, k) - 2 * u_m(i, j, k) + u_m(i-1, j, k )) + - hvector_m[1] * (u_m(i , j+1, k) - 2 * u_m(i, j, k) + u_m(i , j-1, k )) + - hvector_m[2] * (u_m(i , j , k+1) - 2 * u_m(i, j, k) + u_m(i , j , k-1)); + KOKKOS_INLINE_FUNCTION auto operator()(size_t i, size_t j, size_t k) const { + return hvector_m[0] * (u_m(i + 1, j, k) - 2 * u_m(i, j, k) + u_m(i - 1, j, k)) + + hvector_m[1] * (u_m(i, j + 1, k) - 2 * u_m(i, j, k) + u_m(i, j - 1, k)) + + hvector_m[2] * (u_m(i, j, k + 1) - 2 * u_m(i, j, k) + u_m(i, j, k - 1)); } private: - using Mesh_t = typename E::Mesh_t; + using Mesh_t = typename E::Mesh_t; using vector_type = typename Mesh_t::vector_type; const E u_m; const vector_type hvector_m; }; - } + } // namespace detail namespace detail { /*! @@ -363,38 +346,37 @@ namespace ippl { */ template - struct meta_curl : public Expression, - sizeof(E) + 4 * sizeof(typename E::Mesh_t::vector_type)> - { - + struct meta_curl + : public Expression, + sizeof(E) + 4 * sizeof(typename E::Mesh_t::vector_type)> { KOKKOS_FUNCTION - meta_curl(const E& u, - const typename E::Mesh_t::vector_type& xvector, - const typename E::Mesh_t::vector_type& yvector, - const typename E::Mesh_t::vector_type& zvector, - const typename E::Mesh_t::vector_type& hvector) - : u_m(u) - , xvector_m(xvector) - , yvector_m(yvector) - , zvector_m(zvector) - , hvector_m(hvector) - { } + meta_curl(const E& u, const typename E::Mesh_t::vector_type& xvector, + const typename E::Mesh_t::vector_type& yvector, + const typename E::Mesh_t::vector_type& zvector, + const typename E::Mesh_t::vector_type& hvector) + : u_m(u) + , xvector_m(xvector) + , yvector_m(yvector) + , zvector_m(zvector) + , hvector_m(hvector) {} /* * 3-dimensional curl */ - KOKKOS_INLINE_FUNCTION - auto operator()(size_t i, size_t j, size_t k) const { - return xvector_m * ((u_m(i, j+1, k)[2] - u_m(i, j-1, k)[2])/(2 * hvector_m[1]) - - (u_m(i, j, k+1)[1] - u_m(i, j, k-1)[1])/(2 * hvector_m[2])) + - yvector_m * ((u_m(i, j, k+1)[0] - u_m(i, j, k-1)[0])/(2 * hvector_m[2]) - - (u_m(i+1, j, k)[2] - u_m(i-1, j, k)[2])/(2 * hvector_m[0])) + - zvector_m * ((u_m(i+1, j, k)[1] - u_m(i-1, j, k)[1])/(2 * hvector_m[0]) - - (u_m(i, j+1, k)[0] - u_m(i, j-1, k)[0])/(2 * hvector_m[1])); + KOKKOS_INLINE_FUNCTION auto operator()(size_t i, size_t j, size_t k) const { + return xvector_m + * ((u_m(i, j + 1, k)[2] - u_m(i, j - 1, k)[2]) / (2 * hvector_m[1]) + - (u_m(i, j, k + 1)[1] - u_m(i, j, k - 1)[1]) / (2 * hvector_m[2])) + + yvector_m + * ((u_m(i, j, k + 1)[0] - u_m(i, j, k - 1)[0]) / (2 * hvector_m[2]) + - (u_m(i + 1, j, k)[2] - u_m(i - 1, j, k)[2]) / (2 * hvector_m[0])) + + zvector_m + * ((u_m(i + 1, j, k)[1] - u_m(i - 1, j, k)[1]) / (2 * hvector_m[0]) + - (u_m(i, j + 1, k)[0] - u_m(i, j - 1, k)[0]) / (2 * hvector_m[1])); } private: - using Mesh_t = typename E::Mesh_t; + using Mesh_t = typename E::Mesh_t; using vector_type = typename Mesh_t::vector_type; const E u_m; const vector_type xvector_m; @@ -402,7 +384,7 @@ namespace ippl { const vector_type zvector_m; const vector_type hvector_m; }; - } + } // namespace detail namespace detail { @@ -410,46 +392,68 @@ namespace ippl { * Meta function of Hessian */ template - struct meta_hess : public Expression, sizeof(E) + 4 * sizeof(typename E::Mesh_t::vector_type)> { - + struct meta_hess + : public Expression, + sizeof(E) + 4 * sizeof(typename E::Mesh_t::vector_type)> { KOKKOS_FUNCTION - meta_hess(const E& u, - const typename E::Mesh_t::vector_type& xvector, - const typename E::Mesh_t::vector_type& yvector, - const typename E::Mesh_t::vector_type& zvector, - const typename E::Mesh_t::vector_type& hvector) - : u_m(u) - , xvector_m(xvector) - , yvector_m(yvector) - , zvector_m(zvector) - , hvector_m(hvector) - { } + meta_hess(const E& u, const typename E::Mesh_t::vector_type& xvector, + const typename E::Mesh_t::vector_type& yvector, + const typename E::Mesh_t::vector_type& zvector, + const typename E::Mesh_t::vector_type& hvector) + : u_m(u) + , xvector_m(xvector) + , yvector_m(yvector) + , zvector_m(zvector) + , hvector_m(hvector) {} /* * 3-dimensional hessian (return Vector,3>) */ - KOKKOS_INLINE_FUNCTION - auto operator()(size_t i, size_t j, size_t k) const { + KOKKOS_INLINE_FUNCTION auto operator()(size_t i, size_t j, size_t k) const { vector_type row_1, row_2, row_3; - row_1 = xvector_m * ((u_m(i+1,j,k) - 2.0*u_m(i,j,k) + u_m(i-1,j,k))/(hvector_m[0]*hvector_m[0])) + - yvector_m * ((u_m(i+1,j+1,k) - u_m(i-1,j+1,k) - u_m(i+1,j-1,k) + u_m(i-1,j-1,k))/(4.0*hvector_m[0]*hvector_m[1])) + - zvector_m * ((u_m(i+1,j,k+1) - u_m(i-1,j,k+1) - u_m(i+1,j,k-1) + u_m(i-1,j,k-1))/(4.0*hvector_m[0]*hvector_m[2])); - - row_2 = xvector_m * ((u_m(i+1,j+1,k) - u_m(i+1,j-1,k) - u_m(i-1,j+1,k) + u_m(i-1,j-1,k))/(4.0*hvector_m[1]*hvector_m[0])) + - yvector_m * ((u_m(i,j+1,k) - 2.0*u_m(i,j,k) + u_m(i,j-1,k))/(hvector_m[1]*hvector_m[1])) + - zvector_m * ((u_m(i,j+1,k+1) - u_m(i,j-1,k+1) - u_m(i,j+1,k-1) + u_m(i,j-1,k-1))/(4.0*hvector_m[1]*hvector_m[2])); - - row_3 = xvector_m * ((u_m(i+1,j,k+1) - u_m(i+1,j,k-1) - u_m(i-1,j,k+1) + u_m(i-1,j,k-1))/(4.0*hvector_m[2]*hvector_m[0])) + - yvector_m * ((u_m(i,j+1,k+1) - u_m(i,j+1,k-1) - u_m(i,j-1,k+1) + u_m(i,j-1,k-1))/(4.0*hvector_m[2]*hvector_m[1])) + - zvector_m * ((u_m(i,j,k+1) - 2.0*u_m(i,j,k) + u_m(i,j,k-1))/(hvector_m[2]*hvector_m[2])); + row_1 = xvector_m + * ((u_m(i + 1, j, k) - 2.0 * u_m(i, j, k) + u_m(i - 1, j, k)) + / (hvector_m[0] * hvector_m[0])) + + yvector_m + * ((u_m(i + 1, j + 1, k) - u_m(i - 1, j + 1, k) - u_m(i + 1, j - 1, k) + + u_m(i - 1, j - 1, k)) + / (4.0 * hvector_m[0] * hvector_m[1])) + + zvector_m + * ((u_m(i + 1, j, k + 1) - u_m(i - 1, j, k + 1) - u_m(i + 1, j, k - 1) + + u_m(i - 1, j, k - 1)) + / (4.0 * hvector_m[0] * hvector_m[2])); + + row_2 = xvector_m + * ((u_m(i + 1, j + 1, k) - u_m(i + 1, j - 1, k) - u_m(i - 1, j + 1, k) + + u_m(i - 1, j - 1, k)) + / (4.0 * hvector_m[1] * hvector_m[0])) + + yvector_m + * ((u_m(i, j + 1, k) - 2.0 * u_m(i, j, k) + u_m(i, j - 1, k)) + / (hvector_m[1] * hvector_m[1])) + + zvector_m + * ((u_m(i, j + 1, k + 1) - u_m(i, j - 1, k + 1) - u_m(i, j + 1, k - 1) + + u_m(i, j - 1, k - 1)) + / (4.0 * hvector_m[1] * hvector_m[2])); + + row_3 = xvector_m + * ((u_m(i + 1, j, k + 1) - u_m(i + 1, j, k - 1) - u_m(i - 1, j, k + 1) + + u_m(i - 1, j, k - 1)) + / (4.0 * hvector_m[2] * hvector_m[0])) + + yvector_m + * ((u_m(i, j + 1, k + 1) - u_m(i, j + 1, k - 1) - u_m(i, j - 1, k + 1) + + u_m(i, j - 1, k - 1)) + / (4.0 * hvector_m[2] * hvector_m[1])) + + zvector_m + * ((u_m(i, j, k + 1) - 2.0 * u_m(i, j, k) + u_m(i, j, k - 1)) + / (hvector_m[2] * hvector_m[2])); matrix_type hessian = {row_1, row_2, row_3}; - return hessian; + return hessian; } private: - using Mesh_t = typename E::Mesh_t; + using Mesh_t = typename E::Mesh_t; using vector_type = typename Mesh_t::vector_type; using matrix_type = typename Mesh_t::matrix_type; const E u_m; @@ -458,7 +462,7 @@ namespace ippl { const vector_type zvector_m; const vector_type hvector_m; }; - } -} + } // namespace detail +} // namespace ippl #endif diff --git a/src/FFT/FFT.h b/src/FFT/FFT.h index f750722e7..431cae6d1 100644 --- a/src/FFT/FFT.h +++ b/src/FFT/FFT.h @@ -34,17 +34,19 @@ #include #include -#include "FieldLayout/FieldLayout.h" #include "Field/Field.h" -#include "Utility/ParameterList.h" +#include "FieldLayout/FieldLayout.h" #include "Utility/IpplException.h" +#include "Utility/ParameterList.h" namespace heffte { - template<> struct is_ccomplex> : std::true_type{}; - - template<> struct is_zcomplex> : std::true_type{}; -} + template <> + struct is_ccomplex> : std::true_type {}; + + template <> + struct is_zcomplex> : std::true_type {}; +} // namespace heffte namespace ippl { @@ -66,9 +68,9 @@ namespace ippl { class CosTransform {}; enum FFTComm { - a2av = 0, - a2a = 1, - p2p = 2, + a2av = 0, + a2a = 1, + p2p = 2, p2p_pl = 3 }; @@ -76,24 +78,24 @@ namespace ippl { #ifdef Heffte_ENABLE_FFTW struct HeffteBackendType { - using backend = heffte::backend::fftw; + using backend = heffte::backend::fftw; using backendSine = heffte::backend::fftw_sin; - using backendCos = heffte::backend::fftw_cos; + using backendCos = heffte::backend::fftw_cos; }; #endif #ifdef Heffte_ENABLE_MKL struct HeffteBackendType { - using backend = heffte::backend::mkl; + using backend = heffte::backend::mkl; using backendSine = heffte::backend::mkl_sin; - using backendCos = heffte::backend::mkl_cos; + using backendCos = heffte::backend::mkl_cos; }; #endif #ifdef Heffte_ENABLE_CUDA #ifdef KOKKOS_ENABLE_CUDA struct HeffteBackendType { - using backend = heffte::backend::cufft; + using backend = heffte::backend::cufft; using backendSine = heffte::backend::cufft_sin; - using backendCos = heffte::backend::cufft_cos; + using backendCos = heffte::backend::cufft_cos; }; #endif #endif @@ -101,42 +103,41 @@ namespace ippl { #ifndef KOKKOS_ENABLE_CUDA #if !defined(Heffte_ENABLE_MKL) && !defined(Heffte_ENABLE_FFTW) /** - * Use heFFTe's inbuilt 1D fft computation on CPUs if no + * Use heFFTe's inbuilt 1D fft computation on CPUs if no * vendor specific or optimized backend is found - */ + */ struct HeffteBackendType { - using backend = heffte::backend::stock; + using backend = heffte::backend::stock; using backendSine = heffte::backend::stock_sin; - using backendCos = heffte::backend::stock_cos; + using backendCos = heffte::backend::stock_cos; }; #endif #endif - } + } // namespace detail /** Non-specialized FFT class. We specialize based on Transform tag class */ - template > + template > class FFT {}; /** complex-to-complex FFT class */ template - class FFT { - + class FFT { public: - typedef FieldLayout Layout_t; typedef Kokkos::complex Complex_t; - typedef Field ComplexField_t; + typedef Field ComplexField_t; using heffteBackend = typename detail::HeffteBackendType::backend; - using workspace_t = typename heffte::fft3d::template buffer_container; + using workspace_t = + typename heffte::fft3d::template buffer_container; /** Create a new FFT object with the layout for the input Field and * parameters for heffte. - */ + */ FFT(const Layout_t& layout, const ParameterList& params); // Destructor @@ -147,46 +148,40 @@ namespace ippl { */ void transform(int direction, ComplexField_t& f); - private: - //using long long = detail::long long; + // using long long = detail::long long; /** setup performs the initialization necessary. */ - void setup(const std::array& low, - const std::array& high, + void setup(const std::array& low, const std::array& high, const ParameterList& params); std::shared_ptr> heffte_m; workspace_t workspace_m; - }; - /** real-to-complex FFT class */ template - class FFT { - + class FFT { public: - typedef FieldLayout Layout_t; - typedef Field RealField_t; + typedef Field RealField_t; using heffteBackend = typename detail::HeffteBackendType::backend; typedef Kokkos::complex Complex_t; - using workspace_t = typename heffte::fft3d_r2c::template buffer_container; + using workspace_t = + typename heffte::fft3d_r2c::template buffer_container; - typedef Field ComplexField_t; + typedef Field ComplexField_t; /** Create a new FFT object with the layout for the input and output Fields * and parameters for heffte. - */ + */ FFT(const Layout_t& layoutInput, const Layout_t& layoutOutput, const ParameterList& params); - ~FFT() = default; /** Do the FFT: specify +1 or -1 to indicate forward or inverse @@ -194,9 +189,8 @@ namespace ippl { */ void transform(int direction, RealField_t& f, ComplexField_t& g); - private: - //using long long = detail::long long; + // using long long = detail::long long; /** setup performs the initialization necessary after the transform @@ -205,32 +199,27 @@ namespace ippl { void setup(const std::array& lowInput, const std::array& highInput, const std::array& lowOutput, - const std::array& highOutput, - const ParameterList& params); - + const std::array& highOutput, const ParameterList& params); std::shared_ptr> heffte_m; workspace_t workspace_m; - }; /** Sine transform class */ template - class FFT { - + class FFT { public: - typedef FieldLayout Layout_t; - typedef Field Field_t; + typedef Field Field_t; using heffteBackend = typename detail::HeffteBackendType::backendSine; - using workspace_t = typename heffte::fft3d::template buffer_container; + using workspace_t = typename heffte::fft3d::template buffer_container; /** Create a new FFT object with the layout for the input Field and * parameters for heffte. - */ + */ FFT(const Layout_t& layout, const ParameterList& params); // Destructor @@ -241,36 +230,31 @@ namespace ippl { */ void transform(int direction, Field_t& f); - private: /** setup performs the initialization necessary. */ - void setup(const std::array& low, - const std::array& high, + void setup(const std::array& low, const std::array& high, const ParameterList& params); std::shared_ptr> heffte_m; workspace_t workspace_m; - }; /** Cosine transform class */ template - class FFT { - + class FFT { public: - typedef FieldLayout Layout_t; - typedef Field Field_t; + typedef Field Field_t; using heffteBackend = typename detail::HeffteBackendType::backendCos; - using workspace_t = typename heffte::fft3d::template buffer_container; + using workspace_t = typename heffte::fft3d::template buffer_container; /** Create a new FFT object with the layout for the input Field and * parameters for heffte. - */ + */ FFT(const Layout_t& layout, const ParameterList& params); // Destructor @@ -281,24 +265,20 @@ namespace ippl { */ void transform(int direction, Field_t& f); - private: /** setup performs the initialization necessary. */ - void setup(const std::array& low, - const std::array& high, + void setup(const std::array& low, const std::array& high, const ParameterList& params); std::shared_ptr> heffte_m; workspace_t workspace_m; - }; - -} +} // namespace ippl #include "FFT/FFT.hpp" -#endif // IPPL_FFT_FFT_H +#endif // IPPL_FFT_FFT_H // vi: set et ts=4 sw=4 sts=4: // Local Variables: diff --git a/src/FFT/FFT.hpp b/src/FFT/FFT.hpp index 8efae8777..38815f2f6 100644 --- a/src/FFT/FFT.hpp +++ b/src/FFT/FFT.hpp @@ -29,11 +29,10 @@ */ #include "FFT/FFT.h" -#include "FieldLayout/FieldLayout.h" #include "Field/BareField.h" +#include "FieldLayout/FieldLayout.h" #include "Utility/IpplTimings.h" - namespace ippl { //========================================================================= @@ -46,12 +45,7 @@ namespace ippl { */ template - FFT::FFT( - const Layout_t& layout, - const ParameterList& params) - { - - + FFT::FFT(const Layout_t& layout, const ParameterList& params) { /** * Heffte requires to pass a 3D array even for 2D and * 1D FFTs we just have to make the length in other @@ -69,32 +63,27 @@ namespace ippl { * Static cast to detail::long long (uint64_t) is necessary, as heffte::box3d requires it * like that. */ - for(size_t d = 0; d < Dim; ++d) { - low[d] = static_cast(lDom[d].first()); + for (size_t d = 0; d < Dim; ++d) { + low[d] = static_cast(lDom[d].first()); high[d] = static_cast(lDom[d].length() + lDom[d].first() - 1); } setup(low, high, params); } - /** setup performs the initialization necessary. */ template - void - FFT::setup(const std::array& low, - const std::array& high, - const ParameterList& params) - { - - heffte::box3d inbox = {low, high}; - heffte::box3d outbox = {low, high}; + void FFT::setup(const std::array& low, + const std::array& high, + const ParameterList& params) { + heffte::box3d inbox = {low, high}; + heffte::box3d outbox = {low, high}; - heffte::plan_options heffteOptions = - heffte::default_options(); + heffte::plan_options heffteOptions = heffte::default_options(); - if(!params.get("use_heffte_defaults")) { + if (!params.get("use_heffte_defaults")) { heffteOptions.use_pencils = params.get("use_pencils"); heffteOptions.use_reorder = params.get("use_reorder"); #ifdef Heffte_ENABLE_GPU @@ -102,130 +91,93 @@ namespace ippl { #endif switch (params.get("comm")) { - - case a2a: - heffteOptions.algorithm = heffte::reshape_algorithm::alltoall; - break; - case a2av: - heffteOptions.algorithm = heffte::reshape_algorithm::alltoallv; - break; - case p2p: - heffteOptions.algorithm = heffte::reshape_algorithm::p2p; - break; - case p2p_pl: - heffteOptions.algorithm = heffte::reshape_algorithm::p2p_plined; - break; - default: - throw IpplException("FFT::setup", - "Unrecognized heffte communication type"); + case a2a: + heffteOptions.algorithm = heffte::reshape_algorithm::alltoall; + break; + case a2av: + heffteOptions.algorithm = heffte::reshape_algorithm::alltoallv; + break; + case p2p: + heffteOptions.algorithm = heffte::reshape_algorithm::p2p; + break; + case p2p_pl: + heffteOptions.algorithm = heffte::reshape_algorithm::p2p_plined; + break; + default: + throw IpplException("FFT::setup", "Unrecognized heffte communication type"); } - } + } - heffte_m = std::make_shared> - (inbox, outbox, Ippl::getComm(), heffteOptions); + heffte_m = std::make_shared>( + inbox, outbox, Ippl::getComm(), heffteOptions); - //heffte::gpu::device_set(Ippl::Comm->rank() % heffte::gpu::device_count()); - if(workspace_m.size() < heffte_m->size_workspace()) + // heffte::gpu::device_set(Ippl::Comm->rank() % heffte::gpu::device_count()); + if (workspace_m.size() < heffte_m->size_workspace()) workspace_m = workspace_t(heffte_m->size_workspace()); - } - - template - void - FFT::transform( - int direction, - typename FFT::ComplexField_t& f) - { - auto fview = f.getView(); - const int nghost = f.getNghost(); - - /** - *This copy to a temporary Kokkos view is needed because of following - *reasons: - *1) heffte wants the input and output fields without ghost layers - *2) heffte accepts data in layout left (by default) eventhough this - *can be changed during heffte box creation - */ - Kokkos::View - tempField("tempField", fview.extent(0) - 2*nghost, - fview.extent(1) - 2*nghost, - fview.extent(2) - 2*nghost); - - using mdrange_type = Kokkos::MDRangePolicy>; - - Kokkos::parallel_for("copy from Kokkos FFT", - mdrange_type({nghost, nghost, nghost}, - {fview.extent(0) - nghost, - fview.extent(1) - nghost, - fview.extent(2) - nghost - }), - KOKKOS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - tempField(i-nghost, j-nghost, k-nghost).real( - fview(i, j, k).real()); - tempField(i-nghost, j-nghost, k-nghost).imag( - fview(i, j, k).imag()); - }); - - - - - if ( direction == 1 ) - { - heffte_m->forward(tempField.data(), tempField.data(), workspace_m.data(), - heffte::scale::full); - } - else if ( direction == -1 ) - { - heffte_m->backward(tempField.data(), tempField.data(), workspace_m.data(), - heffte::scale::none); - } - else - { - throw std::logic_error( - "Only 1:forward and -1:backward are allowed as directions"); - } - - - Kokkos::parallel_for("copy to Kokkos FFT", - mdrange_type({nghost, nghost, nghost}, - {fview.extent(0) - nghost, - fview.extent(1) - nghost, - fview.extent(2) - nghost - }), - KOKKOS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - fview(i, j, k).real() = - tempField(i-nghost, j-nghost, k-nghost).real(); - fview(i, j, k).imag() = - tempField(i-nghost, j-nghost, k-nghost).imag(); - }); + void FFT::transform( + int direction, typename FFT::ComplexField_t& f) { + auto fview = f.getView(); + const int nghost = f.getNghost(); - } + /** + *This copy to a temporary Kokkos view is needed because of following + *reasons: + *1) heffte wants the input and output fields without ghost layers + *2) heffte accepts data in layout left (by default) eventhough this + *can be changed during heffte box creation + */ + Kokkos::View tempField( + "tempField", fview.extent(0) - 2 * nghost, fview.extent(1) - 2 * nghost, + fview.extent(2) - 2 * nghost); + + using mdrange_type = Kokkos::MDRangePolicy>; + + Kokkos::parallel_for( + "copy from Kokkos FFT", + mdrange_type( + {nghost, nghost, nghost}, + {fview.extent(0) - nghost, fview.extent(1) - nghost, fview.extent(2) - nghost}), + KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k) { + tempField(i - nghost, j - nghost, k - nghost).real(fview(i, j, k).real()); + tempField(i - nghost, j - nghost, k - nghost).imag(fview(i, j, k).imag()); + }); + + if (direction == 1) { + heffte_m->forward(tempField.data(), tempField.data(), workspace_m.data(), + heffte::scale::full); + } else if (direction == -1) { + heffte_m->backward(tempField.data(), tempField.data(), workspace_m.data(), + heffte::scale::none); + } else { + throw std::logic_error("Only 1:forward and -1:backward are allowed as directions"); + } + Kokkos::parallel_for( + "copy to Kokkos FFT", + mdrange_type( + {nghost, nghost, nghost}, + {fview.extent(0) - nghost, fview.extent(1) - nghost, fview.extent(2) - nghost}), + KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k) { + fview(i, j, k).real() = tempField(i - nghost, j - nghost, k - nghost).real(); + fview(i, j, k).imag() = tempField(i - nghost, j - nghost, k - nghost).imag(); + }); + } //======================================================================== // FFT RCTransform Constructors //======================================================================== /** - *Create a new FFT object of type RCTransform, with given input and output - *layouts and heffte parameters. - */ + *Create a new FFT object of type RCTransform, with given input and output + *layouts and heffte parameters. + */ template - FFT::FFT( - const Layout_t& layoutInput, - const Layout_t& layoutOutput, - const ParameterList& params) - { - + FFT::FFT(const Layout_t& layoutInput, const Layout_t& layoutOutput, + const ParameterList& params) { /** * Heffte requires to pass a 3D array even for 2D and * 1D FFTs we just have to make the length in other @@ -236,10 +188,9 @@ namespace ippl { std::array lowOutput; std::array highOutput; - const NDIndex& lDomInput = layoutInput.getLocalNDIndex(); + const NDIndex& lDomInput = layoutInput.getLocalNDIndex(); const NDIndex& lDomOutput = layoutOutput.getLocalNDIndex(); - lowInput.fill(0); highInput.fill(0); lowOutput.fill(0); @@ -249,39 +200,33 @@ namespace ippl { * Static cast to detail::long long (uint64_t) is necessary, as heffte::box3d requires it * like that. */ - for(size_t d = 0; d < Dim; ++d) { - lowInput[d] = static_cast(lDomInput[d].first()); - highInput[d] = static_cast(lDomInput[d].length() + - lDomInput[d].first() - 1); + for (size_t d = 0; d < Dim; ++d) { + lowInput[d] = static_cast(lDomInput[d].first()); + highInput[d] = static_cast(lDomInput[d].length() + lDomInput[d].first() - 1); lowOutput[d] = static_cast(lDomOutput[d].first()); - highOutput[d] = static_cast(lDomOutput[d].length() + - lDomOutput[d].first() - 1); + highOutput[d] = + static_cast(lDomOutput[d].length() + lDomOutput[d].first() - 1); } setup(lowInput, highInput, lowOutput, highOutput, params); } - /** setup performs the initialization. */ template - void - FFT::setup(const std::array& lowInput, - const std::array& highInput, - const std::array& lowOutput, - const std::array& highOutput, - const ParameterList& params) - { - - heffte::box3d inbox = {lowInput, highInput}; - heffte::box3d outbox = {lowOutput, highOutput}; - - heffte::plan_options heffteOptions = - heffte::default_options(); - - if(!params.get("use_heffte_defaults")) { + void FFT::setup(const std::array& lowInput, + const std::array& highInput, + const std::array& lowOutput, + const std::array& highOutput, + const ParameterList& params) { + heffte::box3d inbox = {lowInput, highInput}; + heffte::box3d outbox = {lowOutput, highOutput}; + + heffte::plan_options heffteOptions = heffte::default_options(); + + if (!params.get("use_heffte_defaults")) { heffteOptions.use_pencils = params.get("use_pencils"); heffteOptions.use_reorder = params.get("use_reorder"); #ifdef Heffte_ENABLE_GPU @@ -289,141 +234,103 @@ namespace ippl { #endif switch (params.get("comm")) { - - case a2a: - heffteOptions.algorithm = heffte::reshape_algorithm::alltoall; - break; - case a2av: - heffteOptions.algorithm = heffte::reshape_algorithm::alltoallv; - break; - case p2p: - heffteOptions.algorithm = heffte::reshape_algorithm::p2p; - break; - case p2p_pl: - heffteOptions.algorithm = heffte::reshape_algorithm::p2p_plined; - break; - default: - throw IpplException("FFT::setup", - "Unrecognized heffte communication type"); + case a2a: + heffteOptions.algorithm = heffte::reshape_algorithm::alltoall; + break; + case a2av: + heffteOptions.algorithm = heffte::reshape_algorithm::alltoallv; + break; + case p2p: + heffteOptions.algorithm = heffte::reshape_algorithm::p2p; + break; + case p2p_pl: + heffteOptions.algorithm = heffte::reshape_algorithm::p2p_plined; + break; + default: + throw IpplException("FFT::setup", "Unrecognized heffte communication type"); } - } - - heffte_m = std::make_shared> - (inbox, outbox, params.get("r2c_direction"), Ippl::getComm(), - heffteOptions); - - //heffte::gpu::device_set(Ippl::Comm->rank() % heffte::gpu::device_count()); - if(workspace_m.size() < heffte_m->size_workspace()) - workspace_m = workspace_t(heffte_m->size_workspace()); + } + + heffte_m = std::make_shared>( + inbox, outbox, params.get("r2c_direction"), Ippl::getComm(), heffteOptions); + // heffte::gpu::device_set(Ippl::Comm->rank() % heffte::gpu::device_count()); + if (workspace_m.size() < heffte_m->size_workspace()) + workspace_m = workspace_t(heffte_m->size_workspace()); } template - void - FFT::transform( - int direction, - typename FFT::RealField_t& f, - typename FFT::ComplexField_t& g) - { - auto fview = f.getView(); - auto gview = g.getView(); - const int nghostf = f.getNghost(); - const int nghostg = g.getNghost(); - - /** - *This copy to a temporary Kokkos view is needed because of following - *reasons: - *1) heffte wants the input and output fields without ghost layers - *2) heffte accepts data in layout left (by default) eventhough this - *can be changed during heffte box creation - */ - Kokkos::View - tempFieldf("tempFieldf", fview.extent(0) - 2*nghostf, - fview.extent(1) - 2*nghostf, - fview.extent(2) - 2*nghostf); - - Kokkos::View - tempFieldg("tempFieldg", gview.extent(0) - 2*nghostg, - gview.extent(1) - 2*nghostg, - gview.extent(2) - 2*nghostg); - - using mdrange_type = Kokkos::MDRangePolicy>; - - Kokkos::parallel_for("copy from Kokkos f field in FFT", - mdrange_type({nghostf, nghostf, nghostf}, - {fview.extent(0) - nghostf, - fview.extent(1) - nghostf, - fview.extent(2) - nghostf - }), - KOKKOS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - tempFieldf(i-nghostf, j-nghostf, k-nghostf) = fview(i, j, k); - }); - Kokkos::parallel_for("copy from Kokkos g field in FFT", - mdrange_type({nghostg, nghostg, nghostg}, - {gview.extent(0) - nghostg, - gview.extent(1) - nghostg, - gview.extent(2) - nghostg - }), - KOKKOS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - tempFieldg(i-nghostg, j-nghostg, k-nghostg).real( - gview(i, j, k).real()); - tempFieldg(i-nghostg, j-nghostg, k-nghostg).imag( - gview(i, j, k).imag()); - }); - - - if ( direction == 1 ) - { - heffte_m->forward( tempFieldf.data(), tempFieldg.data(), workspace_m.data(), - heffte::scale::full ); - } - else if ( direction == -1 ) - { - heffte_m->backward( tempFieldg.data(), tempFieldf.data(), workspace_m.data(), - heffte::scale::none ); - } - else - { - throw std::logic_error( - "Only 1:forward and -1:backward are allowed as directions"); - } - - - Kokkos::parallel_for("copy to Kokkos f field FFT", - mdrange_type({nghostf, nghostf, nghostf}, - {fview.extent(0) - nghostf, - fview.extent(1) - nghostf, - fview.extent(2) - nghostf - }), - KOKKOS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - fview(i, j, k) = tempFieldf(i-nghostf, j-nghostf, k-nghostf); - }); - - Kokkos::parallel_for("copy to Kokkos g field FFT", - mdrange_type({nghostg, nghostg, nghostg}, - {gview.extent(0) - nghostg, - gview.extent(1) - nghostg, - gview.extent(2) - nghostg - }), - KOKKOS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - gview(i, j, k).real() = - tempFieldg(i-nghostg, j-nghostg, k-nghostg).real(); - gview(i, j, k).imag() = - tempFieldg(i-nghostg, j-nghostg, k-nghostg).imag(); - }); + void FFT::transform( + int direction, typename FFT::RealField_t& f, + typename FFT::ComplexField_t& g) { + auto fview = f.getView(); + auto gview = g.getView(); + const int nghostf = f.getNghost(); + const int nghostg = g.getNghost(); + + /** + *This copy to a temporary Kokkos view is needed because of following + *reasons: + *1) heffte wants the input and output fields without ghost layers + *2) heffte accepts data in layout left (by default) eventhough this + *can be changed during heffte box creation + */ + Kokkos::View tempFieldf( + "tempFieldf", fview.extent(0) - 2 * nghostf, fview.extent(1) - 2 * nghostf, + fview.extent(2) - 2 * nghostf); + + Kokkos::View tempFieldg( + "tempFieldg", gview.extent(0) - 2 * nghostg, gview.extent(1) - 2 * nghostg, + gview.extent(2) - 2 * nghostg); + + using mdrange_type = Kokkos::MDRangePolicy>; + + Kokkos::parallel_for( + "copy from Kokkos f field in FFT", + mdrange_type( + {nghostf, nghostf, nghostf}, + {fview.extent(0) - nghostf, fview.extent(1) - nghostf, fview.extent(2) - nghostf}), + KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k) { + tempFieldf(i - nghostf, j - nghostf, k - nghostf) = fview(i, j, k); + }); + Kokkos::parallel_for( + "copy from Kokkos g field in FFT", + mdrange_type( + {nghostg, nghostg, nghostg}, + {gview.extent(0) - nghostg, gview.extent(1) - nghostg, gview.extent(2) - nghostg}), + KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k) { + tempFieldg(i - nghostg, j - nghostg, k - nghostg).real(gview(i, j, k).real()); + tempFieldg(i - nghostg, j - nghostg, k - nghostg).imag(gview(i, j, k).imag()); + }); + + if (direction == 1) { + heffte_m->forward(tempFieldf.data(), tempFieldg.data(), workspace_m.data(), + heffte::scale::full); + } else if (direction == -1) { + heffte_m->backward(tempFieldg.data(), tempFieldf.data(), workspace_m.data(), + heffte::scale::none); + } else { + throw std::logic_error("Only 1:forward and -1:backward are allowed as directions"); + } + Kokkos::parallel_for( + "copy to Kokkos f field FFT", + mdrange_type( + {nghostf, nghostf, nghostf}, + {fview.extent(0) - nghostf, fview.extent(1) - nghostf, fview.extent(2) - nghostf}), + KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k) { + fview(i, j, k) = tempFieldf(i - nghostf, j - nghostf, k - nghostf); + }); + + Kokkos::parallel_for( + "copy to Kokkos g field FFT", + mdrange_type( + {nghostg, nghostg, nghostg}, + {gview.extent(0) - nghostg, gview.extent(1) - nghostg, gview.extent(2) - nghostg}), + KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k) { + gview(i, j, k).real() = tempFieldg(i - nghostg, j - nghostg, k - nghostg).real(); + gview(i, j, k).imag() = tempFieldg(i - nghostg, j - nghostg, k - nghostg).imag(); + }); } //========================================================================= @@ -436,11 +343,7 @@ namespace ippl { */ template - FFT::FFT( - const Layout_t& layout, - const ParameterList& params) - { - + FFT::FFT(const Layout_t& layout, const ParameterList& params) { /** * Heffte requires to pass a 3D array even for 2D and * 1D FFTs we just have to make the length in other @@ -458,133 +361,104 @@ namespace ippl { * Static cast to detail::long long (uint64_t) is necessary, as heffte::box3d requires it * like that. */ - for(size_t d = 0; d < Dim; ++d) { - low[d] = static_cast(lDom[d].first()); + for (size_t d = 0; d < Dim; ++d) { + low[d] = static_cast(lDom[d].first()); high[d] = static_cast(lDom[d].length() + lDom[d].first() - 1); } setup(low, high, params); } - /** setup performs the initialization necessary. */ template - void - FFT::setup(const std::array& low, - const std::array& high, - const ParameterList& params) - { - - heffte::box3d inbox = {low, high}; - heffte::box3d outbox = {low, high}; + void FFT::setup(const std::array& low, + const std::array& high, + const ParameterList& params) { + heffte::box3d inbox = {low, high}; + heffte::box3d outbox = {low, high}; - heffte::plan_options heffteOptions = - heffte::default_options(); + heffte::plan_options heffteOptions = heffte::default_options(); - if(!params.get("use_heffte_defaults")) { + if (!params.get("use_heffte_defaults")) { heffteOptions.use_pencils = params.get("use_pencils"); heffteOptions.use_reorder = params.get("use_reorder"); #ifdef Heffte_ENABLE_GPU heffteOptions.use_gpu_aware = params.get("use_gpu_aware"); #endif switch (params.get("comm")) { - - case a2a: - heffteOptions.algorithm = heffte::reshape_algorithm::alltoall; - break; - case a2av: - heffteOptions.algorithm = heffte::reshape_algorithm::alltoallv; - break; - case p2p: - heffteOptions.algorithm = heffte::reshape_algorithm::p2p; - break; - case p2p_pl: - heffteOptions.algorithm = heffte::reshape_algorithm::p2p_plined; - break; - default: - throw IpplException("FFT::setup", - "Unrecognized heffte communication type"); + case a2a: + heffteOptions.algorithm = heffte::reshape_algorithm::alltoall; + break; + case a2av: + heffteOptions.algorithm = heffte::reshape_algorithm::alltoallv; + break; + case p2p: + heffteOptions.algorithm = heffte::reshape_algorithm::p2p; + break; + case p2p_pl: + heffteOptions.algorithm = heffte::reshape_algorithm::p2p_plined; + break; + default: + throw IpplException("FFT::setup", "Unrecognized heffte communication type"); } - } + } - heffte_m = std::make_shared> - (inbox, outbox, Ippl::getComm(), heffteOptions); + heffte_m = std::make_shared>( + inbox, outbox, Ippl::getComm(), heffteOptions); - //heffte::gpu::device_set(Ippl::Comm->rank() % heffte::gpu::device_count()); - if(workspace_m.size() < heffte_m->size_workspace()) + // heffte::gpu::device_set(Ippl::Comm->rank() % heffte::gpu::device_count()); + if (workspace_m.size() < heffte_m->size_workspace()) workspace_m = workspace_t(heffte_m->size_workspace()); - } template - void - FFT::transform( - int direction, - typename FFT::Field_t& f) - { - auto fview = f.getView(); - const int nghost = f.getNghost(); - - /** - *This copy to a temporary Kokkos view is needed because of following - *reasons: - *1) heffte wants the input and output fields without ghost layers - *2) heffte accepts data in layout left (by default) eventhough this - *can be changed during heffte box creation - */ - Kokkos::View - tempField("tempField", fview.extent(0) - 2*nghost, - fview.extent(1) - 2*nghost, - fview.extent(2) - 2*nghost); - - using mdrange_type = Kokkos::MDRangePolicy>; - - Kokkos::parallel_for("copy from Kokkos FFT", - mdrange_type({nghost, nghost, nghost}, - {fview.extent(0) - nghost, - fview.extent(1) - nghost, - fview.extent(2) - nghost - }), - KOKKOS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - tempField(i-nghost, j-nghost, k-nghost) = - fview(i, j, k); - }); - - if ( direction == 1 ) - { - heffte_m->forward(tempField.data(), tempField.data(), workspace_m.data(), - heffte::scale::full); - } - else if ( direction == -1 ) - { - heffte_m->backward(tempField.data(), tempField.data(), workspace_m.data(), - heffte::scale::none); - } - else - { - throw std::logic_error( - "Only 1:forward and -1:backward are allowed as directions"); - } - - Kokkos::parallel_for("copy to Kokkos FFT", - mdrange_type({nghost, nghost, nghost}, - {fview.extent(0) - nghost, - fview.extent(1) - nghost, - fview.extent(2) - nghost - }), - KOKKOS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - fview(i, j, k) = - tempField(i-nghost, j-nghost, k-nghost); - }); + void FFT::transform( + int direction, typename FFT::Field_t& f) { + auto fview = f.getView(); + const int nghost = f.getNghost(); + + /** + *This copy to a temporary Kokkos view is needed because of following + *reasons: + *1) heffte wants the input and output fields without ghost layers + *2) heffte accepts data in layout left (by default) eventhough this + *can be changed during heffte box creation + */ + Kokkos::View tempField("tempField", fview.extent(0) - 2 * nghost, + fview.extent(1) - 2 * nghost, + fview.extent(2) - 2 * nghost); + + using mdrange_type = Kokkos::MDRangePolicy>; + + Kokkos::parallel_for( + "copy from Kokkos FFT", + mdrange_type( + {nghost, nghost, nghost}, + {fview.extent(0) - nghost, fview.extent(1) - nghost, fview.extent(2) - nghost}), + KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k) { + tempField(i - nghost, j - nghost, k - nghost) = fview(i, j, k); + }); + + if (direction == 1) { + heffte_m->forward(tempField.data(), tempField.data(), workspace_m.data(), + heffte::scale::full); + } else if (direction == -1) { + heffte_m->backward(tempField.data(), tempField.data(), workspace_m.data(), + heffte::scale::none); + } else { + throw std::logic_error("Only 1:forward and -1:backward are allowed as directions"); + } + Kokkos::parallel_for( + "copy to Kokkos FFT", + mdrange_type( + {nghost, nghost, nghost}, + {fview.extent(0) - nghost, fview.extent(1) - nghost, fview.extent(2) - nghost}), + KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k) { + fview(i, j, k) = tempField(i - nghost, j - nghost, k - nghost); + }); } //========================================================================= @@ -597,11 +471,7 @@ namespace ippl { */ template - FFT::FFT( - const Layout_t& layout, - const ParameterList& params) - { - + FFT::FFT(const Layout_t& layout, const ParameterList& params) { /** * Heffte requires to pass a 3D array even for 2D and * 1D FFTs we just have to make the length in other @@ -619,136 +489,106 @@ namespace ippl { * Static cast to detail::long long (uint64_t) is necessary, as heffte::box3d requires it * like that. */ - for(size_t d = 0; d < Dim; ++d) { - low[d] = static_cast(lDom[d].first()); + for (size_t d = 0; d < Dim; ++d) { + low[d] = static_cast(lDom[d].first()); high[d] = static_cast(lDom[d].length() + lDom[d].first() - 1); } setup(low, high, params); } - /** setup performs the initialization necessary. */ template - void - FFT::setup(const std::array& low, - const std::array& high, - const ParameterList& params) - { - - heffte::box3d inbox = {low, high}; - heffte::box3d outbox = {low, high}; + void FFT::setup(const std::array& low, + const std::array& high, + const ParameterList& params) { + heffte::box3d inbox = {low, high}; + heffte::box3d outbox = {low, high}; - heffte::plan_options heffteOptions = - heffte::default_options(); + heffte::plan_options heffteOptions = heffte::default_options(); - if(!params.get("use_heffte_defaults")) { + if (!params.get("use_heffte_defaults")) { heffteOptions.use_pencils = params.get("use_pencils"); heffteOptions.use_reorder = params.get("use_reorder"); #ifdef Heffte_ENABLE_GPU heffteOptions.use_gpu_aware = params.get("use_gpu_aware"); #endif switch (params.get("comm")) { - - case a2a: - heffteOptions.algorithm = heffte::reshape_algorithm::alltoall; - break; - case a2av: - heffteOptions.algorithm = heffte::reshape_algorithm::alltoallv; - break; - case p2p: - heffteOptions.algorithm = heffte::reshape_algorithm::p2p; - break; - case p2p_pl: - heffteOptions.algorithm = heffte::reshape_algorithm::p2p_plined; - break; - default: - throw IpplException("FFT::setup", - "Unrecognized heffte communication type"); + case a2a: + heffteOptions.algorithm = heffte::reshape_algorithm::alltoall; + break; + case a2av: + heffteOptions.algorithm = heffte::reshape_algorithm::alltoallv; + break; + case p2p: + heffteOptions.algorithm = heffte::reshape_algorithm::p2p; + break; + case p2p_pl: + heffteOptions.algorithm = heffte::reshape_algorithm::p2p_plined; + break; + default: + throw IpplException("FFT::setup", "Unrecognized heffte communication type"); } - } + } - heffte_m = std::make_shared> - (inbox, outbox, Ippl::getComm(), heffteOptions); + heffte_m = std::make_shared>( + inbox, outbox, Ippl::getComm(), heffteOptions); - //heffte::gpu::device_set(Ippl::Comm->rank() % heffte::gpu::device_count()); - if(workspace_m.size() < heffte_m->size_workspace()) + // heffte::gpu::device_set(Ippl::Comm->rank() % heffte::gpu::device_count()); + if (workspace_m.size() < heffte_m->size_workspace()) workspace_m = workspace_t(heffte_m->size_workspace()); - } - template - void - FFT::transform( - int direction, - typename FFT::Field_t& f) - { - auto fview = f.getView(); - const int nghost = f.getNghost(); - - /** - *This copy to a temporary Kokkos view is needed because of following - *reasons: - *1) heffte wants the input and output fields without ghost layers - *2) heffte accepts data in layout left (by default) eventhough this - *can be changed during heffte box creation - */ - Kokkos::View - tempField("tempField", fview.extent(0) - 2*nghost, - fview.extent(1) - 2*nghost, - fview.extent(2) - 2*nghost); - - using mdrange_type = Kokkos::MDRangePolicy>; - - Kokkos::parallel_for("copy from Kokkos FFT", - mdrange_type({nghost, nghost, nghost}, - {fview.extent(0) - nghost, - fview.extent(1) - nghost, - fview.extent(2) - nghost - }), - KOKKOS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - tempField(i-nghost, j-nghost, k-nghost) = - fview(i, j, k); - }); - - if ( direction == 1 ) - { - heffte_m->forward(tempField.data(), tempField.data(), workspace_m.data(), - heffte::scale::full); - } - else if ( direction == -1 ) - { - heffte_m->backward(tempField.data(), tempField.data(), workspace_m.data(), - heffte::scale::none); - } - else - { - throw std::logic_error( - "Only 1:forward and -1:backward are allowed as directions"); - } - - Kokkos::parallel_for("copy to Kokkos FFT", - mdrange_type({nghost, nghost, nghost}, - {fview.extent(0) - nghost, - fview.extent(1) - nghost, - fview.extent(2) - nghost - }), - KOKKOS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - fview(i, j, k) = - tempField(i-nghost, j-nghost, k-nghost); - }); + void FFT::transform( + int direction, typename FFT::Field_t& f) { + auto fview = f.getView(); + const int nghost = f.getNghost(); + + /** + *This copy to a temporary Kokkos view is needed because of following + *reasons: + *1) heffte wants the input and output fields without ghost layers + *2) heffte accepts data in layout left (by default) eventhough this + *can be changed during heffte box creation + */ + Kokkos::View tempField("tempField", fview.extent(0) - 2 * nghost, + fview.extent(1) - 2 * nghost, + fview.extent(2) - 2 * nghost); + + using mdrange_type = Kokkos::MDRangePolicy>; + + Kokkos::parallel_for( + "copy from Kokkos FFT", + mdrange_type( + {nghost, nghost, nghost}, + {fview.extent(0) - nghost, fview.extent(1) - nghost, fview.extent(2) - nghost}), + KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k) { + tempField(i - nghost, j - nghost, k - nghost) = fview(i, j, k); + }); + + if (direction == 1) { + heffte_m->forward(tempField.data(), tempField.data(), workspace_m.data(), + heffte::scale::full); + } else if (direction == -1) { + heffte_m->backward(tempField.data(), tempField.data(), workspace_m.data(), + heffte::scale::none); + } else { + throw std::logic_error("Only 1:forward and -1:backward are allowed as directions"); + } + Kokkos::parallel_for( + "copy to Kokkos FFT", + mdrange_type( + {nghost, nghost, nghost}, + {fview.extent(0) - nghost, fview.extent(1) - nghost, fview.extent(2) - nghost}), + KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k) { + fview(i, j, k) = tempField(i - nghost, j - nghost, k - nghost); + }); } -} +} // namespace ippl // vi: set et ts=4 sw=4 sts=4: // Local Variables: diff --git a/src/Field/BConds.h b/src/Field/BConds.h index edaae5fc3..b33af4d42 100644 --- a/src/Field/BConds.h +++ b/src/Field/BConds.h @@ -1,10 +1,10 @@ // Class BConds -// This is the container class for the field BCs. +// This is the container class for the field BCs. // It calls the findBCNeighbors and apply in the // respective BC classes to apply field BCs // Copyright (c) 2021, Sriramkrishnan Muralikrishnan, // Paul Scherrer Institut, Villigen PSI, Switzerland -// Matthias Frey, University of St Andrews, +// Matthias Frey, University of St Andrews, // St Andrews, Scotland // All rights reserved // @@ -28,28 +28,25 @@ #include namespace ippl { - template class Field; + template + class Field; - template class BConds; + template + class BConds; - template + template std::ostream& operator<<(std::ostream&, const BConds&); - - template, - class Cell = typename Mesh::DefaultCentering> - class BConds - { + template , + class Cell = typename Mesh::DefaultCentering> + class BConds { public: - using bc_type = detail::BCondBase; - using container = std::array, 2 * Dim>; - using iterator = typename container::iterator; + using bc_type = detail::BCondBase; + using container = std::array, 2 * Dim>; + using iterator = typename container::iterator; using const_iterator = typename container::const_iterator; - - BConds() = default; + BConds() = default; ~BConds() = default; void findBCNeighbors(Field& field); @@ -58,28 +55,20 @@ namespace ippl { bool changesPhysicalCells() const; virtual void write(std::ostream&) const; - const std::shared_ptr& operator[](const int& i) const noexcept { - return bc_m[i]; - } + const std::shared_ptr& operator[](const int& i) const noexcept { return bc_m[i]; } - std::shared_ptr& operator[](const int& i) noexcept { - return bc_m[i]; - } + std::shared_ptr& operator[](const int& i) noexcept { return bc_m[i]; } private: container bc_m; }; - - template - inline std::ostream& - operator<<(std::ostream& os, const BConds& bc) - { + template + inline std::ostream& operator<<(std::ostream& os, const BConds& bc) { bc.write(os); return os; } -} - +} // namespace ippl #include "Field/BConds.hpp" diff --git a/src/Field/BConds.hpp b/src/Field/BConds.hpp index cb07b2f2d..540bbc366 100644 --- a/src/Field/BConds.hpp +++ b/src/Field/BConds.hpp @@ -1,10 +1,10 @@ // Class BConds -// This is the container class for the field BCs. +// This is the container class for the field BCs. // It calls the findBCNeighbors and apply in the // respective BC classes to apply field BCs // Copyright (c) 2021, Sriramkrishnan Muralikrishnan, // Paul Scherrer Institut, Villigen PSI, Switzerland -// Matthias Frey, University of St Andrews, +// Matthias Frey, University of St Andrews, // St Andrews, Scotland // All rights reserved // @@ -19,50 +19,42 @@ // along with IPPL. If not, see . // namespace ippl { - template - void - BConds::write(std::ostream& os) const - { - os << "BConds: (" << std::endl; - const_iterator it = bc_m.begin(); - for ( ; it != bc_m.end() - 1; ++it) { - (*it)->write(os); - os << "," << std::endl; - } + template + void BConds::write(std::ostream& os) const { + os << "BConds: (" << std::endl; + const_iterator it = bc_m.begin(); + for (; it != bc_m.end() - 1; ++it) { (*it)->write(os); - os << std::endl << ")"; + os << "," << std::endl; } + (*it)->write(os); + os << std::endl << ")"; + } - template - void - BConds::findBCNeighbors(Field& field) - { - for (iterator it = bc_m.begin(); it != bc_m.end(); ++it) { - (*it)->findBCNeighbors(field); - } - Kokkos::fence(); - Ippl::Comm->barrier(); + template + void BConds::findBCNeighbors(Field& field) { + for (iterator it = bc_m.begin(); it != bc_m.end(); ++it) { + (*it)->findBCNeighbors(field); } + Kokkos::fence(); + Ippl::Comm->barrier(); + } - template - void - BConds::apply(Field& field) - { - for (iterator it = bc_m.begin(); it != bc_m.end(); ++it) { - (*it)->apply(field); - } - Kokkos::fence(); - Ippl::Comm->barrier(); + template + void BConds::apply(Field& field) { + for (iterator it = bc_m.begin(); it != bc_m.end(); ++it) { + (*it)->apply(field); } + Kokkos::fence(); + Ippl::Comm->barrier(); + } - template - bool - BConds::changesPhysicalCells() const - { - bool doesChange = false; - for (const_iterator it = bc_m.begin(); it != bc_m.end(); ++it) { - doesChange |= (*it)->changesPhysicalCells(); - } - return doesChange; + template + bool BConds::changesPhysicalCells() const { + bool doesChange = false; + for (const_iterator it = bc_m.begin(); it != bc_m.end(); ++it) { + doesChange |= (*it)->changesPhysicalCells(); } -} + return doesChange; + } +} // namespace ippl diff --git a/src/Field/BareField.h b/src/Field/BareField.h index fd5ddcc95..63a1b5963 100644 --- a/src/Field/BareField.h +++ b/src/Field/BareField.h @@ -21,8 +21,8 @@ #include #include "Expression/IpplExpressions.h" -#include "Types/ViewTypes.h" #include "Types/IpplTypes.h" +#include "Types/ViewTypes.h" #include "FieldLayout/FieldLayout.h" @@ -31,9 +31,8 @@ #include "Field/HaloCells.h" -#include #include - +#include namespace ippl { class Index; @@ -47,13 +46,13 @@ namespace ippl { * @class BareField * @tparam T data type * @tparam Dim field dimension - * @warning The implementation currently only supports 3-dimensional fields. The reason are runtime issues - * with "if constrexpr" in the assignment operator when running on GPU. + * @warning The implementation currently only supports 3-dimensional fields. The reason are + * runtime issues with "if constrexpr" in the assignment operator when running on GPU. */ - template - class BareField : public detail::Expression, sizeof(typename detail::ViewType::view_type)> - { - + template + class BareField + : public detail::Expression, + sizeof(typename detail::ViewType::view_type)> { public: using Layout_t = FieldLayout; @@ -61,11 +60,10 @@ namespace ippl { using Domain_t = NDIndex; //! View type storing the data - using view_type = typename detail::ViewType::view_type; - using HostMirror = typename view_type::host_mirror_type; + using view_type = typename detail::ViewType::view_type; + using HostMirror = typename view_type::host_mirror_type; using policy_type = typename detail::RangePolicy::policy_type; - /*! A default constructor, which should be used only if the user calls the * 'initialize' function before doing anything else. There are no special * checks in the rest of the BareField methods to check that the field has @@ -89,10 +87,9 @@ namespace ippl { * @tparam Args... variadic template specifying the individiual * dimension arguments */ - template + template void resize(Args... args); - /*! * Initialize the field, if it was constructed from the default constructor. * This should NOT be called if the field was constructed by providing @@ -112,7 +109,6 @@ namespace ippl { */ detail::size_type size(unsigned d) const { return owned_m[d].length(); } - /*! * Index domain of the local field. * @returns the index domain. @@ -123,9 +119,7 @@ namespace ippl { * Index domain of the allocated field. * @returns the allocated index domain (including ghost cells) */ - const Domain_t getAllocated() const { - return owned_m.grow(nghost_m); - } + const Domain_t getAllocated() const { return owned_m.grow(nghost_m); } int getNghost() const { return nghost_m; } @@ -133,16 +127,13 @@ namespace ippl { void accumulateHalo(); - // Access to the layout. - Layout_t &getLayout() const - { + Layout_t& getLayout() const { PAssert(layout_m != 0); return *layout_m; } - - const Index& getIndex(unsigned d) const {return getLayout().getDomain()[d];} + const Index& getIndex(unsigned d) const { return getLayout().getDomain()[d]; } const NDIndex& getDomain() const { return getLayout().getDomain(); } detail::HaloCells& getHalo() { return halo_m; } @@ -167,25 +158,16 @@ namespace ippl { * @param args view indices * @returns a view element */ - template - KOKKOS_INLINE_FUNCTION - T operator() (Args... args) const { + template + KOKKOS_INLINE_FUNCTION T operator()(Args... args) const { return dview_m(args...); } + view_type& getView() { return dview_m; } - view_type& getView() { - return dview_m; - } + const view_type& getView() const { return dview_m; } - - const view_type& getView() const { - return dview_m; - } - - HostMirror getHostMirror() { - return Kokkos::create_mirror(dview_m); - } + HostMirror getHostMirror() { return Kokkos::create_mirror(dview_m); } /*! * Generate the 3D range policy for iterating over the field, @@ -196,10 +178,9 @@ namespace ippl { policy_type getRangePolicy(const int nghost = 0) const { PAssert_LE(nghost, nghost_m); const size_t shift = nghost_m - nghost; - return policy_type({shift, shift, shift}, - {dview_m.extent(0) - shift, - dview_m.extent(1) - shift, - dview_m.extent(2) - shift}); + return policy_type( + {shift, shift, shift}, + {dview_m.extent(0) - shift, dview_m.extent(1) - shift, dview_m.extent(2) - shift}); } /*! @@ -219,7 +200,6 @@ namespace ippl { T min(int nghost = 0) const; T prod(int nghost = 0) const; - private: //! Number of ghost layers on each field boundary int nghost_m; @@ -240,7 +220,7 @@ namespace ippl { //! How the arrays are laid out. Layout_t* layout_m; }; -} +} // namespace ippl #include "Field/BareField.hpp" diff --git a/src/Field/BareField.hpp b/src/Field/BareField.hpp index 685e6a751..f2e7f17b3 100644 --- a/src/Field/BareField.hpp +++ b/src/Field/BareField.hpp @@ -15,34 +15,31 @@ // You should have received a copy of the GNU General Public License // along with IPPL. If not, see . // +#include "Communicate/DataTypes.h" +#include "Ippl.h" #include "Utility/Inform.h" #include "Utility/IpplInfo.h" -#include "Ippl.h" -#include "Communicate/DataTypes.h" +#include #include #include -#include namespace ippl { namespace detail { template struct isExpression> : std::true_type {}; - } + } // namespace detail template BareField::BareField() - : nghost_m(1) - , layout_m(nullptr) - { } - + : nghost_m(1) + , layout_m(nullptr) {} template BareField::BareField(Layout_t& l, int nghost) - : nghost_m(nghost) -// , owned_m(0) - , layout_m(&l) - { + : nghost_m(nghost) + // , owned_m(0) + , layout_m(&l) { setup(); } @@ -58,10 +55,10 @@ namespace ippl { // ML template void BareField::updateLayout(Layout_t& l, int nghost) { - // std::cout << "Got in BareField::updateLayout()" << std::endl; - layout_m = &l; - nghost_m = nghost; - setup(); + // std::cout << "Got in BareField::updateLayout()" << std::endl; + layout_m = &l; + nghost_m = nghost; + setup(); } template @@ -70,131 +67,109 @@ namespace ippl { owned_m = layout_m->getLocalNDIndex(); - if constexpr(Dim == 1) { + if constexpr (Dim == 1) { this->resize(owned_m[0].length() + 2 * nghost_m); - } else if constexpr(Dim == 2) { - this->resize(owned_m[0].length() + 2 * nghost_m, - owned_m[1].length() + 2 * nghost_m); - } else if constexpr(Dim == 3) { - this->resize(owned_m[0].length() + 2 * nghost_m, - owned_m[1].length() + 2 * nghost_m, + } else if constexpr (Dim == 2) { + this->resize(owned_m[0].length() + 2 * nghost_m, owned_m[1].length() + 2 * nghost_m); + } else if constexpr (Dim == 3) { + this->resize(owned_m[0].length() + 2 * nghost_m, owned_m[1].length() + 2 * nghost_m, owned_m[2].length() + 2 * nghost_m); } } - template - template + template void BareField::resize(Args... args) { Kokkos::resize(dview_m, args...); } - template void BareField::fillHalo() { - if(Ippl::Comm->size() > 1) { + if (Ippl::Comm->size() > 1) { halo_m.fillHalo(dview_m, layout_m); } - if(layout_m->isAllPeriodic_m) { + if (layout_m->isAllPeriodic_m) { using Op = typename detail::HaloCells::assign; - halo_m.template applyPeriodicSerialDim(dview_m, - layout_m, - nghost_m); + halo_m.template applyPeriodicSerialDim(dview_m, layout_m, nghost_m); } } - template void BareField::accumulateHalo() { - if(Ippl::Comm->size() > 1) { + if (Ippl::Comm->size() > 1) { halo_m.accumulateHalo(dview_m, layout_m); } - if(layout_m->isAllPeriodic_m) { + if (layout_m->isAllPeriodic_m) { using Op = typename detail::HaloCells::rhs_plus_assign; - halo_m.template applyPeriodicSerialDim(dview_m, - layout_m, - nghost_m); + halo_m.template applyPeriodicSerialDim(dview_m, layout_m, nghost_m); } } - template BareField& BareField::operator=(T x) { using mdrange_type = Kokkos::MDRangePolicy>; - Kokkos::parallel_for("BareField::operator=(T)", - mdrange_type({0, 0, 0}, - {dview_m.extent(0), - dview_m.extent(1), - dview_m.extent(2) - }), - KOKKOS_CLASS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - dview_m(i, j, k) = x; - }); + Kokkos::parallel_for( + "BareField::operator=(T)", + mdrange_type({0, 0, 0}, {dview_m.extent(0), dview_m.extent(1), dview_m.extent(2)}), + KOKKOS_CLASS_LAMBDA(const size_t i, const size_t j, const size_t k) { + dview_m(i, j, k) = x; + }); return *this; } - template template BareField& BareField::operator=(const detail::Expression& expr) { using capture_type = detail::CapturedExpression; capture_type expr_ = reinterpret_cast(expr); using mdrange_type = Kokkos::MDRangePolicy>; - Kokkos::parallel_for("BareField::operator=(const Expression&)", - mdrange_type({nghost_m, nghost_m, nghost_m}, - {dview_m.extent(0) - nghost_m, - dview_m.extent(1) - nghost_m, - dview_m.extent(2) - nghost_m}), - KOKKOS_CLASS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - dview_m(i, j, k) = expr_(i, j, k); - }); + Kokkos::parallel_for( + "BareField::operator=(const Expression&)", + mdrange_type({nghost_m, nghost_m, nghost_m}, + {dview_m.extent(0) - nghost_m, dview_m.extent(1) - nghost_m, + dview_m.extent(2) - nghost_m}), + KOKKOS_CLASS_LAMBDA(const size_t i, const size_t j, const size_t k) { + dview_m(i, j, k) = expr_(i, j, k); + }); return *this; } - template - void BareField::write(std::ostream& out) const { + void BareField::write(std::ostream& out) const { Kokkos::fence(); detail::write(dview_m, out); } template - void BareField::write(Inform& inf) const { + void BareField::write(Inform& inf) const { write(inf.getDestination()); } - #define DefineReduction(fun, name, op, MPI_Op) \ - template \ - T BareField::name(int nghost) const { \ - PAssert_LE(nghost, nghost_m); \ - T temp = 0.0; \ - const size_t shift = nghost_m - nghost; \ - Kokkos::parallel_reduce("fun", \ - Kokkos::MDRangePolicy>({shift, shift, shift}, \ - {dview_m.extent(0) - shift, \ - dview_m.extent(1) - shift, \ - dview_m.extent(2) - shift}), \ - KOKKOS_CLASS_LAMBDA(const size_t i, const size_t j, \ - const size_t k, T& valL) { \ - T myVal = dview_m(i, j, k); \ - op; \ - }, Kokkos::fun(temp)); \ - T globaltemp = 0.0; \ - MPI_Datatype type = get_mpi_datatype(temp); \ - MPI_Allreduce(&temp, &globaltemp, 1, type, MPI_Op, Ippl::getComm()); \ - return globaltemp; \ +#define DefineReduction(fun, name, op, MPI_Op) \ + template \ + T BareField::name(int nghost) const { \ + PAssert_LE(nghost, nghost_m); \ + T temp = 0.0; \ + const size_t shift = nghost_m - nghost; \ + Kokkos::parallel_reduce( \ + "fun", \ + Kokkos::MDRangePolicy>( \ + {shift, shift, shift}, {dview_m.extent(0) - shift, dview_m.extent(1) - shift, \ + dview_m.extent(2) - shift}), \ + KOKKOS_CLASS_LAMBDA(const size_t i, const size_t j, const size_t k, T& valL) { \ + T myVal = dview_m(i, j, k); \ + op; \ + }, \ + Kokkos::fun(temp)); \ + T globaltemp = 0.0; \ + MPI_Datatype type = get_mpi_datatype(temp); \ + MPI_Allreduce(&temp, &globaltemp, 1, type, MPI_Op, Ippl::getComm()); \ + return globaltemp; \ } - DefineReduction(Sum, sum, valL += myVal, MPI_SUM) - DefineReduction(Max, max, if(myVal > valL) valL = myVal, MPI_MAX) - DefineReduction(Min, min, if(myVal < valL) valL = myVal, MPI_MIN) - DefineReduction(Prod, prod, valL *= myVal, MPI_PROD) - + DefineReduction(Sum, sum, valL += myVal, MPI_SUM) + DefineReduction(Max, max, if (myVal > valL) valL = myVal, MPI_MAX) + DefineReduction(Min, min, if (myVal < valL) valL = myVal, MPI_MIN) + DefineReduction(Prod, prod, valL *= myVal, MPI_PROD) -} +} // namespace ippl diff --git a/src/Field/BcTypes.h b/src/Field/BcTypes.h index 8ebd2cec2..fc55c65e1 100644 --- a/src/Field/BcTypes.h +++ b/src/Field/BcTypes.h @@ -1,18 +1,18 @@ -// This file contains the abstract base class for +// This file contains the abstract base class for // field boundary conditions and other child classes -// which represent specific BCs. At the moment the +// which represent specific BCs. At the moment the // following field BCs are supported -// +// // 1. Periodic BC // 2. Zero BC // 3. Specifying a constant BC // 4. No BC (default option) // 5. Constant extrapolation BC -// Only cell-centered field BCs are implemented +// Only cell-centered field BCs are implemented // at the moment. // Copyright (c) 2021, Sriramkrishnan Muralikrishnan, // Paul Scherrer Institut, Villigen PSI, Switzerland -// Matthias Frey, University of St Andrews, +// Matthias Frey, University of St Andrews, // St Andrews, Scotland // All rights reserved // @@ -29,15 +29,16 @@ #ifndef IPPL_FIELD_BC_TYPES_H #define IPPL_FIELD_BC_TYPES_H -#include "Index/NDIndex.h" -#include "Types/ViewTypes.h" -#include "Types/IpplTypes.h" #include "Communicate/Archive.h" #include "FieldLayout/FieldLayout.h" +#include "Index/NDIndex.h" #include "Meshes/UniformCartesian.h" +#include "Types/IpplTypes.h" +#include "Types/ViewTypes.h" namespace ippl { - template class Field; + template + class Field; /* * Enum type to identify different kinds of @@ -55,20 +56,18 @@ namespace ippl { }; namespace detail { - template class BCondBase; + template + class BCondBase; - template + template std::ostream& operator<<(std::ostream&, const BCondBase&); - - template - class BCondBase - { + template + class BCondBase { public: - - using Field_t = Field; + using Field_t = Field; using Layout_t = FieldLayout; - + // Constructor takes: // face: the face to apply the boundary condition on. // i : what component of T to apply the boundary condition to. @@ -80,8 +79,8 @@ namespace ippl { virtual FieldBC getBCType() const { return NO_FACE; } virtual void findBCNeighbors(Field& field) = 0; - virtual void apply(Field& field) = 0; - virtual void write(std::ostream&) const = 0; + virtual void apply(Field& field) = 0; + virtual void write(std::ostream&) const = 0; // Return face on which BC applies unsigned int getFace() const { return face_m; } @@ -97,30 +96,24 @@ namespace ippl { bool changePhysical_m; }; - } - - template, - class Cell = typename Mesh::DefaultCentering> - class ExtrapolateFace : public detail::BCondBase - { + } // namespace detail + + template , + class Cell = typename Mesh::DefaultCentering> + class ExtrapolateFace : public detail::BCondBase { public: // Constructor takes zero, one, or two int's specifying components of // multicomponent types like Vector this BC applies to. // Zero int's specified means apply to all components; one means apply to // component (i), and two means apply to component (i,j), using base_type = detail::BCondBase; - using Field_t = typename detail::BCondBase::Field_t; - using Layout_t = typename detail::BCondBase::Layout_t; + using Field_t = typename detail::BCondBase::Field_t; + using Layout_t = typename detail::BCondBase::Layout_t; - ExtrapolateFace(unsigned face, - T offset, - T slope) - : base_type(face) - , offset_m(offset) - , slope_m(slope) - {} + ExtrapolateFace(unsigned face, T offset, T slope) + : base_type(face) + , offset_m(offset) + , slope_m(slope) {} virtual ~ExtrapolateFace() = default; @@ -139,73 +132,54 @@ namespace ippl { T slope_m; }; - template, - class Cell = typename Mesh::DefaultCentering> - class NoBcFace : public detail::BCondBase - { - public: - - using Field_t = typename detail::BCondBase::Field_t; - NoBcFace(int face) : detail::BCondBase(face) {} + template , + class Cell = typename Mesh::DefaultCentering> + class NoBcFace : public detail::BCondBase { + public: + using Field_t = typename detail::BCondBase::Field_t; + NoBcFace(int face) + : detail::BCondBase(face) {} - virtual void findBCNeighbors(Field_t& /*field*/) {} - virtual void apply(Field_t& /*field*/) {} + virtual void findBCNeighbors(Field_t& /*field*/) {} + virtual void apply(Field_t& /*field*/) {} - virtual void write(std::ostream& out) const; - + virtual void write(std::ostream& out) const; }; - - template, - class Cell = typename Mesh::DefaultCentering> - class ConstantFace : public ExtrapolateFace - { + template , + class Cell = typename Mesh::DefaultCentering> + class ConstantFace : public ExtrapolateFace { public: ConstantFace(unsigned int face, T constant) - : ExtrapolateFace(face, constant, 0) - {} + : ExtrapolateFace(face, constant, 0) {} virtual FieldBC getBCType() const { return CONSTANT_FACE; } virtual void write(std::ostream& out) const; }; - - template, - class Cell = typename Mesh::DefaultCentering> - class ZeroFace : public ConstantFace - { + template , + class Cell = typename Mesh::DefaultCentering> + class ZeroFace : public ConstantFace { public: ZeroFace(unsigned face) - : ConstantFace(face, 0.0) - {} + : ConstantFace(face, 0.0) {} virtual FieldBC getBCType() const { return ZERO_FACE; } virtual void write(std::ostream& out) const; }; - - template, - class Cell = typename Mesh::DefaultCentering> - class PeriodicFace : public detail::BCondBase - { + template , + class Cell = typename Mesh::DefaultCentering> + class PeriodicFace : public detail::BCondBase { public: using face_neighbor_type = std::array, 2 * Dim>; - using Field_t = typename detail::BCondBase::Field_t; - using Layout_t = typename detail::BCondBase::Layout_t; - + using Field_t = typename detail::BCondBase::Field_t; + using Layout_t = typename detail::BCondBase::Layout_t; + PeriodicFace(unsigned face) - : detail::BCondBase(face) - { } + : detail::BCondBase(face) {} virtual FieldBC getBCType() const { return PERIODIC_FACE; } @@ -218,8 +192,7 @@ namespace ippl { face_neighbor_type faceNeighbors_m; detail::FieldBufferData haloData_m; }; -} - +} // namespace ippl #include "Field/BcTypes.hpp" diff --git a/src/Field/BcTypes.hpp b/src/Field/BcTypes.hpp index 9f3912d4e..56a142c1d 100644 --- a/src/Field/BcTypes.hpp +++ b/src/Field/BcTypes.hpp @@ -1,18 +1,18 @@ -// This file contains the abstract base class for +// This file contains the abstract base class for // field boundary conditions and other child classes -// which represent specific BCs. At the moment the +// which represent specific BCs. At the moment the // following field BCs are supported -// +// // 1. Periodic BC // 2. Zero BC // 3. Specifying a constant BC // 4. No BC (default option) // 5. Constant extrapolation BC -// Only cell-centered field BCs are implemented +// Only cell-centered field BCs are implemented // at the moment. // Copyright (c) 2021, Sriramkrishnan Muralikrishnan, // Paul Scherrer Institut, Villigen PSI, Switzerland -// Matthias Frey, University of St Andrews, +// Matthias Frey, University of St Andrews, // St Andrews, Scotland // All rights reserved // @@ -27,268 +27,239 @@ // along with IPPL. If not, see . // -#include "Utility/IpplException.h" #include "Field/HaloCells.h" +#include "Utility/IpplException.h" namespace ippl { namespace detail { - template + template BCondBase::BCondBase(unsigned int face) - : face_m(face) - , changePhysical_m(false) - { } + : face_m(face) + , changePhysical_m(false) {} - - template - inline std::ostream& - operator<<(std::ostream& os, const BCondBase& bc) - { + template + inline std::ostream& operator<<(std::ostream& os, const BCondBase& bc) { bc.write(os); return os; } - } + } // namespace detail - template - void ExtrapolateFace::apply(Field_t& field) - { - //We only support constant extrapolation for the moment, other - //higher order extrapolation stuffs need to be added. + template + void ExtrapolateFace::apply(Field_t& field) { + // We only support constant extrapolation for the moment, other + // higher order extrapolation stuffs need to be added. - unsigned int face = this->face_m; - unsigned d = face / 2; - if(Ippl::Comm->size() > 1) { - const Layout_t& layout = field.getLayout(); - const auto& lDomains = layout.getHostLocalDomains(); - const auto& domain = layout.getDomain(); - int myRank = Ippl::Comm->rank(); + unsigned int face = this->face_m; + unsigned d = face / 2; + if (Ippl::Comm->size() > 1) { + const Layout_t& layout = field.getLayout(); + const auto& lDomains = layout.getHostLocalDomains(); + const auto& domain = layout.getDomain(); + int myRank = Ippl::Comm->rank(); - bool isBoundary = (lDomains[myRank][d].max() == domain[d].max()) || - (lDomains[myRank][d].min() == domain[d].min()); + bool isBoundary = (lDomains[myRank][d].max() == domain[d].max()) + || (lDomains[myRank][d].min() == domain[d].min()); - if(!isBoundary) + if (!isBoundary) return; } - //If we are here then it is a processor with the face on the physical - //boundary or it is the single core case. Then the following code is same - //irrespective of either it is a single core or multi-core case as the - //non-periodic BC is local to apply. + // If we are here then it is a processor with the face on the physical + // boundary or it is the single core case. Then the following code is same + // irrespective of either it is a single core or multi-core case as the + // non-periodic BC is local to apply. typename Field_t::view_type& view = field.getView(); - const int nghost = field.getNghost(); - using mdrange_type = Kokkos::MDRangePolicy>; + const int nghost = field.getNghost(); + using mdrange_type = Kokkos::MDRangePolicy>; int src, dest; - //It is not clear what it exactly means to do extrapolate - //BC for nghost >1 - if(nghost > 1) { - throw IpplException("ExtrapolateFace::apply", - "nghost > 1 not supported"); + // It is not clear what it exactly means to do extrapolate + // BC for nghost >1 + if (nghost > 1) { + throw IpplException("ExtrapolateFace::apply", "nghost > 1 not supported"); } - //If face & 1 is true, then it is an upper BC - if(face & 1) { + // If face & 1 is true, then it is an upper BC + if (face & 1) { src = view.extent(d) - 2; dest = src + 1; - } - else { + } else { src = 1; dest = src - 1; } - switch(d) { + switch (d) { case 0: - Kokkos::parallel_for("Assign extrapolate BC X", - mdrange_type({nghost, nghost}, - {view.extent(1) - nghost, - view.extent(2) - nghost}), - KOKKOS_CLASS_LAMBDA(const size_t j, - const size_t k) - { - view(dest, j, k) = slope_m * view(src, j, k) + offset_m; - }); - break; + Kokkos::parallel_for( + "Assign extrapolate BC X", + mdrange_type({nghost, nghost}, + {view.extent(1) - nghost, view.extent(2) - nghost}), + KOKKOS_CLASS_LAMBDA(const size_t j, const size_t k) { + view(dest, j, k) = slope_m * view(src, j, k) + offset_m; + }); + break; case 1: - Kokkos::parallel_for("Assign extrapolate BC Y", - mdrange_type({nghost, nghost}, - {view.extent(0) - nghost, - view.extent(2) - nghost}), - KOKKOS_CLASS_LAMBDA(const size_t i, - const size_t k) - { - view(i, dest, k) = slope_m * view(i, src, k) + offset_m; - }); - break; + Kokkos::parallel_for( + "Assign extrapolate BC Y", + mdrange_type({nghost, nghost}, + {view.extent(0) - nghost, view.extent(2) - nghost}), + KOKKOS_CLASS_LAMBDA(const size_t i, const size_t k) { + view(i, dest, k) = slope_m * view(i, src, k) + offset_m; + }); + break; case 2: - Kokkos::parallel_for("Assign extrapolate BC Z", - mdrange_type({nghost, nghost}, - {view.extent(0) - nghost, - view.extent(1) - nghost}), - KOKKOS_CLASS_LAMBDA(const size_t i, - const size_t j) - { - view(i, j, dest) = slope_m * view(i, j, src) + offset_m; - }); - break; + Kokkos::parallel_for( + "Assign extrapolate BC Z", + mdrange_type({nghost, nghost}, + {view.extent(0) - nghost, view.extent(1) - nghost}), + KOKKOS_CLASS_LAMBDA(const size_t i, const size_t j) { + view(i, j, dest) = slope_m * view(i, j, src) + offset_m; + }); + break; default: - throw IpplException("ExtrapolateFace::apply", - "face number wrong"); + throw IpplException("ExtrapolateFace::apply", "face number wrong"); } } - - template - void ExtrapolateFace::write(std::ostream& out) const - { + + template + void ExtrapolateFace::write(std::ostream& out) const { out << "Constant Extrapolation Face" << ", Face = " << this->face_m; } - template - void NoBcFace::write(std::ostream& out) const - { + template + void NoBcFace::write(std::ostream& out) const { out << "NoBcFace" << ", Face = " << this->face_m; } - - template - void ConstantFace::write(std::ostream& out) const - { + template + void ConstantFace::write(std::ostream& out) const { out << "ConstantFace" - << ", Face = " << this->face_m - << ", Constant = " << this->offset_m; + << ", Face = " << this->face_m << ", Constant = " << this->offset_m; } - - template - void ZeroFace::write(std::ostream& out) const - { + template + void ZeroFace::write(std::ostream& out) const { out << "ZeroFace" << ", Face = " << this->face_m; } - - template - void PeriodicFace::write(std::ostream& out) const - { + template + void PeriodicFace::write(std::ostream& out) const { out << "PeriodicFace" << ", Face = " << this->face_m; } - - template - void PeriodicFace::findBCNeighbors(Field_t& field) - { - //For cell centering only face neighbors are needed - unsigned int face = this->face_m; - unsigned int d = face/ 2; - const int nghost = field.getNghost(); - int myRank = Ippl::Comm->rank(); - const Layout_t& layout = field.getLayout(); - const auto& lDomains = layout.getHostLocalDomains(); - const auto& domain = layout.getDomain(); - - for (size_t i = 0; i < faceNeighbors_m.size(); ++i) { + + template + void PeriodicFace::findBCNeighbors(Field_t& field) { + // For cell centering only face neighbors are needed + unsigned int face = this->face_m; + unsigned int d = face / 2; + const int nghost = field.getNghost(); + int myRank = Ippl::Comm->rank(); + const Layout_t& layout = field.getLayout(); + const auto& lDomains = layout.getHostLocalDomains(); + const auto& domain = layout.getDomain(); + + for (size_t i = 0; i < faceNeighbors_m.size(); ++i) { faceNeighbors_m[i].clear(); - } + } - if(lDomains[myRank][d].length() < domain[d].length()) { - //Only along this dimension we need communication. + if (lDomains[myRank][d].length() < domain[d].length()) { + // Only along this dimension we need communication. - bool isBoundary = (lDomains[myRank][d].max() == domain[d].max()) || - (lDomains[myRank][d].min() == domain[d].min()); + bool isBoundary = (lDomains[myRank][d].max() == domain[d].max()) + || (lDomains[myRank][d].min() == domain[d].min()); - if(isBoundary) { - - //this face is on mesh/physical boundary - // get my local box + if (isBoundary) { + // this face is on mesh/physical boundary + // get my local box auto& nd = lDomains[myRank]; // grow the box by nghost cells in dimension d of face auto gnd = nd.grow(nghost, d); int offset; - if(face & 1) { - //upper face + if (face & 1) { + // upper face offset = -domain[d].length(); - } - else { - //lower face + } else { + // lower face offset = domain[d].length(); } - //shift by offset + // shift by offset gnd[d] = gnd[d] + offset; - - //Now, we are ready to intersect + + // Now, we are ready to intersect for (int rank = 0; rank < Ippl::Comm->size(); ++rank) { if (rank == myRank) { continue; } - + if (gnd.touches(lDomains[rank])) { faceNeighbors_m[face].push_back(rank); } } } - } + } } - template - void PeriodicFace::apply(Field_t& field) - { - unsigned int face = this->face_m; - unsigned int d = face / 2; - typename Field_t::view_type& view = field.getView(); - const Layout_t& layout = field.getLayout(); - const int nghost = field.getNghost(); - int myRank = Ippl::Comm->rank(); - const auto& lDomains = layout.getHostLocalDomains(); - const auto& domain = layout.getDomain(); - - //We have to put tag here so that the matchtag inside - //the if is proper. - int tag = Ippl::Comm->next_tag(BC_PARALLEL_PERIODIC_TAG, BC_TAG_CYCLE); - - if(lDomains[myRank][d].length() < domain[d].length()) { - //Only along this dimension we need communication. - - bool isBoundary = (lDomains[myRank][d].max() == domain[d].max()) || - (lDomains[myRank][d].min() == domain[d].min()); - - if(isBoundary) { - //this face is on mesh/physical boundary - // get my local box + template + void PeriodicFace::apply(Field_t& field) { + unsigned int face = this->face_m; + unsigned int d = face / 2; + typename Field_t::view_type& view = field.getView(); + const Layout_t& layout = field.getLayout(); + const int nghost = field.getNghost(); + int myRank = Ippl::Comm->rank(); + const auto& lDomains = layout.getHostLocalDomains(); + const auto& domain = layout.getDomain(); + + // We have to put tag here so that the matchtag inside + // the if is proper. + int tag = Ippl::Comm->next_tag(BC_PARALLEL_PERIODIC_TAG, BC_TAG_CYCLE); + + if (lDomains[myRank][d].length() < domain[d].length()) { + // Only along this dimension we need communication. + + bool isBoundary = (lDomains[myRank][d].max() == domain[d].max()) + || (lDomains[myRank][d].min() == domain[d].min()); + + if (isBoundary) { + // this face is on mesh/physical boundary + // get my local box auto& nd = lDomains[myRank]; int offset, offsetRecv, matchtag; - if(face & 1) { - //upper face - offset = -domain[d].length(); + if (face & 1) { + // upper face + offset = -domain[d].length(); offsetRecv = nghost; - matchtag = Ippl::Comm->preceding_tag(BC_PARALLEL_PERIODIC_TAG); - } - else { - //lower face - offset = domain[d].length(); + matchtag = Ippl::Comm->preceding_tag(BC_PARALLEL_PERIODIC_TAG); + } else { + // lower face + offset = domain[d].length(); offsetRecv = -nghost; - matchtag = Ippl::Comm->following_tag(BC_PARALLEL_PERIODIC_TAG); + matchtag = Ippl::Comm->following_tag(BC_PARALLEL_PERIODIC_TAG); } - + using buffer_type = Communicate::buffer_type; std::vector requests(faceNeighbors_m[face].size()); - + using HaloCells_t = detail::HaloCells; - using range_t = typename HaloCells_t::bound_type; + using range_t = typename HaloCells_t::bound_type; HaloCells_t& halo = field.getHalo(); std::vector rangeNeighbors; - - for (size_t i = 0; i < faceNeighbors_m[face].size(); ++i) { + for (size_t i = 0; i < faceNeighbors_m[face].size(); ++i) { int rank = faceNeighbors_m[face][i]; - + auto ndNeighbor = lDomains[rank]; - ndNeighbor[d] = ndNeighbor[d] - offset; - + ndNeighbor[d] = ndNeighbor[d] - offset; + NDIndex gndNeighbor = ndNeighbor.grow(nghost, d); NDIndex overlap = gndNeighbor.intersect(nd); @@ -296,36 +267,32 @@ namespace ippl { range_t range; for (size_t j = 0; j < Dim; ++j) { - range.lo[j] = overlap[j].first() - nd[j].first() - + nghost; - range.hi[j] = overlap[j].last() - nd[j].first() - + nghost + 1; + range.lo[j] = overlap[j].first() - nd[j].first() + nghost; + range.hi[j] = overlap[j].last() - nd[j].first() + nghost + 1; } - - rangeNeighbors.push_back(range); - + + rangeNeighbors.push_back(range); + detail::size_type nSends; halo.pack(range, view, haloData_m, nSends); buffer_type buf = Ippl::Comm->getBuffer(IPPL_PERIODIC_BC_SEND + i, nSends); - Ippl::Comm->isend(rank, tag, haloData_m, *buf, - requests[i], nSends); + Ippl::Comm->isend(rank, tag, haloData_m, *buf, requests[i], nSends); buf->resetWritePos(); } - - for (size_t i = 0; i < faceNeighbors_m[face].size(); ++i) { + for (size_t i = 0; i < faceNeighbors_m[face].size(); ++i) { int rank = faceNeighbors_m[face][i]; - + range_t range = rangeNeighbors[i]; range.lo[d] = range.lo[d] + offsetRecv; range.hi[d] = range.hi[d] + offsetRecv; - - detail::size_type nRecvs = (range.hi[0] - range.lo[0]) * - (range.hi[1] - range.lo[1]) * - (range.hi[2] - range.lo[2]); + + detail::size_type nRecvs = (range.hi[0] - range.lo[0]) + * (range.hi[1] - range.lo[1]) + * (range.hi[2] - range.lo[2]); buffer_type buf = Ippl::Comm->getBuffer(IPPL_PERIODIC_BC_RECV + i, nRecvs); Ippl::Comm->recv(rank, matchtag, haloData_m, *buf, nRecvs * sizeof(T), nRecvs); @@ -335,16 +302,13 @@ namespace ippl { halo.template unpack(range, view, haloData_m); } if (requests.size() > 0) { - MPI_Waitall(requests.size(), requests.data(), - MPI_STATUSES_IGNORE); + MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE); } } - //For all other processors do nothing - } - else { - + // For all other processors do nothing + } else { using mdrange_type = Kokkos::MDRangePolicy>; - int N = view.extent(d)-1; + int N = view.extent(d) - 1; std::array ext; @@ -354,56 +318,40 @@ namespace ippl { switch (d) { case 0: - Kokkos::parallel_for("Assign periodic field BC X", - mdrange_type({0, nghost, nghost}, - {(long)nghost, - ext[1], - ext[2]}), - KOKKOS_CLASS_LAMBDA(const int i, - const size_t j, - const size_t k) - { - //The ghosts are filled starting from the inside of - //the domain proceeding outwards for both lower and - //upper faces. The extra brackets and explicit mention - //of 0 is for better readability of the code - - view(0+(nghost-1)-i, j, k) = view(N-nghost-i, j, k); - view(N-(nghost-1)+i, j, k) = view(0+nghost+i, j, k); - }); + Kokkos::parallel_for( + "Assign periodic field BC X", + mdrange_type({0, nghost, nghost}, {(long)nghost, ext[1], ext[2]}), + KOKKOS_CLASS_LAMBDA(const int i, const size_t j, const size_t k) { + // The ghosts are filled starting from the inside of + // the domain proceeding outwards for both lower and + // upper faces. The extra brackets and explicit mention + // of 0 is for better readability of the code + + view(0 + (nghost - 1) - i, j, k) = view(N - nghost - i, j, k); + view(N - (nghost - 1) + i, j, k) = view(0 + nghost + i, j, k); + }); break; case 1: - Kokkos::parallel_for("Assign periodic field BC Y", - mdrange_type({nghost, 0, nghost}, - {ext[0], - (long)nghost, - ext[2]}), - KOKKOS_CLASS_LAMBDA(const size_t i, - const int j, - const size_t k) - { - view(i, 0+(nghost-1)-j, k) = view(i, N-nghost-j, k); - view(i, N-(nghost-1)+j, k) = view(i, 0+nghost+j, k); - }); + Kokkos::parallel_for( + "Assign periodic field BC Y", + mdrange_type({nghost, 0, nghost}, {ext[0], (long)nghost, ext[2]}), + KOKKOS_CLASS_LAMBDA(const size_t i, const int j, const size_t k) { + view(i, 0 + (nghost - 1) - j, k) = view(i, N - nghost - j, k); + view(i, N - (nghost - 1) + j, k) = view(i, 0 + nghost + j, k); + }); break; case 2: - Kokkos::parallel_for("Assign periodic field BC Z", - mdrange_type({nghost, nghost, 0}, - {ext[0], - ext[1], - (long)nghost}), - KOKKOS_CLASS_LAMBDA(const size_t i, - const size_t j, - const int k) - { - view(i, j, 0+(nghost-1)-k) = view(i, j, N-nghost-k); - view(i, j, N-(nghost-1)+k) = view(i, j, 0+nghost+k); - }); + Kokkos::parallel_for( + "Assign periodic field BC Z", + mdrange_type({nghost, nghost, 0}, {ext[0], ext[1], (long)nghost}), + KOKKOS_CLASS_LAMBDA(const size_t i, const size_t j, const int k) { + view(i, j, 0 + (nghost - 1) - k) = view(i, j, N - nghost - k); + view(i, j, N - (nghost - 1) + k) = view(i, j, 0 + nghost + k); + }); break; default: throw IpplException("PeriodicFace::apply", "face number wrong"); - } - } + } } -} +} // namespace ippl diff --git a/src/Field/Field.h b/src/Field/Field.h index cdb0f33ab..584de9da7 100644 --- a/src/Field/Field.h +++ b/src/Field/Field.h @@ -18,17 +18,15 @@ #ifndef IPPL_FIELD_H #define IPPL_FIELD_H -#include "Field/BareField.h" #include "Field/BConds.h" +#include "Field/BareField.h" #include "Meshes/UniformCartesian.h" namespace ippl { - template , - class C=typename M::DefaultCentering > - class Field : public BareField - { + template , + class C = typename M::DefaultCentering> + class Field : public BareField { public: typedef T type; static constexpr unsigned dimension = Dim; @@ -48,13 +46,12 @@ namespace ippl { virtual ~Field() = default; - // Constructors including a Mesh object as argument: Field(Mesh_t&, Layout_t&, int nghost = 1); // Initialize the Field, also specifying a mesh void initialize(Mesh_t&, Layout_t&, int nghost = 1); - + // ML void updateLayout(Layout_t&, int nghost = 1); @@ -64,8 +61,7 @@ namespace ippl { } // Access to the mesh - KOKKOS_INLINE_FUNCTION - Mesh_t& get_mesh() const { return *mesh_m; } + KOKKOS_INLINE_FUNCTION Mesh_t& get_mesh() const { return *mesh_m; } /*! * Use the midpoint rule to calculate the field's volume integral @@ -86,14 +82,13 @@ namespace ippl { Field(const Field&) = default; private: - // The Mesh object, and a flag indicating if we constructed it Mesh_t* mesh_m; // The boundary conditions. BConds_t bc_m; }; -} +} // namespace ippl #include "Field/Field.hpp" #include "Field/FieldOperations.hpp" diff --git a/src/Field/Field.hpp b/src/Field/Field.hpp index 995d8adfe..c95acf998 100644 --- a/src/Field/Field.hpp +++ b/src/Field/Field.hpp @@ -21,58 +21,55 @@ namespace ippl { namespace detail { template struct isExpression> : std::true_type {}; - } + } // namespace detail ////////////////////////////////////////////////////////////////////////// // A default constructor, which should be used only if the user calls the // 'initialize' function before doing anything else. There are no special // checks in the rest of the Field methods to check that the Field has // been properly initialized - template - Field::Field() - : BareField() - , mesh_m(nullptr) - , bc_m() - { } + template + Field::Field() + : BareField() + , mesh_m(nullptr) + , bc_m() {} ////////////////////////////////////////////////////////////////////////// // Constructors which include a Mesh object as argument - template - Field::Field(Mesh_t& m, Layout_t& l, int nghost) - : BareField(l, nghost) - , mesh_m(&m) - { - for (unsigned int face=0; face < 2 * Dim; ++face) { + template + Field::Field(Mesh_t& m, Layout_t& l, int nghost) + : BareField(l, nghost) + , mesh_m(&m) { + for (unsigned int face = 0; face < 2 * Dim; ++face) { bc_m[face] = std::make_shared>(face); } } ////////////////////////////////////////////////////////////////////////// // Initialize the Field, also specifying a mesh - template - void Field::initialize(Mesh_t& m, Layout_t& l, int nghost) { - BareField::initialize(l, nghost); + template + void Field::initialize(Mesh_t& m, Layout_t& l, int nghost) { + BareField::initialize(l, nghost); mesh_m = &m; - for (unsigned int face=0; face < 2 * Dim; ++face) { + for (unsigned int face = 0; face < 2 * Dim; ++face) { bc_m[face] = std::make_shared>(face); } } - template - T Field::getVolumeIntegral() const { + template + T Field::getVolumeIntegral() const { typename M::value_type dV = mesh_m->getCellVolume(); return this->sum() * dV; } - template - T Field::getVolumeAverage() const { + template + T Field::getVolumeAverage() const { return getVolumeIntegral() / mesh_m->getMeshVolume(); } - template - void Field::updateLayout(Layout_t& l, int nghost) { - BareField::updateLayout(l, nghost); + template + void Field::updateLayout(Layout_t& l, int nghost) { + BareField::updateLayout(l, nghost); } -} - +} // namespace ippl diff --git a/src/Field/FieldOperations.hpp b/src/Field/FieldOperations.hpp index 155944cec..d001f4b47 100644 --- a/src/Field/FieldOperations.hpp +++ b/src/Field/FieldOperations.hpp @@ -23,18 +23,18 @@ namespace ippl { * @param f2 second field * @return Result of f1^T f2 */ - template > - T innerProduct(const Field& f1, const Field& f2) { - T sum = 0; + template > + T innerProduct(const Field& f1, const Field& f2) { + T sum = 0; auto view1 = f1.getView(); auto view2 = f2.getView(); - Kokkos::parallel_reduce("Field::innerProduct(Field&, Field&)", f1.getRangePolicy(), + Kokkos::parallel_reduce( + "Field::innerProduct(Field&, Field&)", f1.getRangePolicy(), KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k, T& val) { val += view1(i, j, k) * view2(i, j, k); }, - Kokkos::Sum(sum) - ); - T globalSum = 0; + Kokkos::Sum(sum)); + T globalSum = 0; MPI_Datatype type = get_mpi_datatype(sum); MPI_Allreduce(&sum, &globalSum, 1, type, MPI_SUM, Ippl::getComm()); return globalSum; @@ -46,45 +46,42 @@ namespace ippl { * @param p desired norm (default 2) * @return The desired norm of the field */ - template + template T norm(const Field& field, int p = 2) { - T local = 0; + T local = 0; auto view = field.getView(); switch (p) { - case 0: - { - Kokkos::parallel_reduce("Field::norm(0)", field.getRangePolicy(), - KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k, T& val) { - T myVal = std::abs(view(i, j, k)); - if (myVal > val) - val = myVal; - }, - Kokkos::Max(local) - ); - T globalMax = 0; - MPI_Datatype type = get_mpi_datatype(local); - MPI_Allreduce(&local, &globalMax, 1, type, MPI_MAX, Ippl::getComm()); - return globalMax; - } - case 2: - return std::sqrt(innerProduct(field, field)); - default: - { - Kokkos::parallel_reduce("Field::norm(int) general", field.getRangePolicy(), - KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k, T& val) { - val += std::pow(std::abs(view(i, j, k)), p); - }, - Kokkos::Sum(local) - ); - T globalSum = 0; - MPI_Datatype type = get_mpi_datatype(local); - MPI_Allreduce(&local, &globalSum, 1, type, MPI_SUM, Ippl::getComm()); - return std::pow(globalSum, 1.0 / p); - } + case 0: { + Kokkos::parallel_reduce( + "Field::norm(0)", field.getRangePolicy(), + KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k, T& val) { + T myVal = std::abs(view(i, j, k)); + if (myVal > val) + val = myVal; + }, + Kokkos::Max(local)); + T globalMax = 0; + MPI_Datatype type = get_mpi_datatype(local); + MPI_Allreduce(&local, &globalMax, 1, type, MPI_MAX, Ippl::getComm()); + return globalMax; + } + case 2: + return std::sqrt(innerProduct(field, field)); + default: { + Kokkos::parallel_reduce( + "Field::norm(int) general", field.getRangePolicy(), + KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k, T& val) { + val += std::pow(std::abs(view(i, j, k)), p); + }, + Kokkos::Sum(local)); + T globalSum = 0; + MPI_Datatype type = get_mpi_datatype(local); + MPI_Allreduce(&local, &globalSum, 1, type, MPI_SUM, Ippl::getComm()); + return std::pow(globalSum, 1.0 / p); + } } } - /*! * User interface of gradient in three dimensions. * @param u field @@ -92,19 +89,18 @@ namespace ippl { template detail::meta_grad> grad(Field& u) { u.fillHalo(); - BConds& bcField = u.getFieldBC(); + BConds& bcField = u.getFieldBC(); bcField.apply(u); M& mesh = u.get_mesh(); typename M::vector_type xvector(0); xvector[0] = 0.5 / mesh.getMeshSpacing(0); typename M::vector_type yvector(0); - yvector[1] = 0.5 / mesh.getMeshSpacing(1); + yvector[1] = 0.5 / mesh.getMeshSpacing(1); typename M::vector_type zvector(0); zvector[2] = 0.5 / mesh.getMeshSpacing(2); return detail::meta_grad>(u, xvector, yvector, zvector); } - /*! * User interface of divergence in three dimensions. * @param u field @@ -112,7 +108,7 @@ namespace ippl { template detail::meta_div> div(Field& u) { u.fillHalo(); - BConds& bcField = u.getFieldBC(); + BConds& bcField = u.getFieldBC(); bcField.apply(u); M& mesh = u.get_mesh(); typename M::vector_type xvector(0); @@ -124,7 +120,6 @@ namespace ippl { return detail::meta_div>(u, xvector, yvector, zvector); } - /*! * User interface of Laplacian in three dimensions. * @param u field @@ -132,7 +127,7 @@ namespace ippl { template detail::meta_laplace> laplace(Field& u) { u.fillHalo(); - BConds& bcField = u.getFieldBC(); + BConds& bcField = u.getFieldBC(); bcField.apply(u); M& mesh = u.get_mesh(); typename M::vector_type hvector(0); @@ -149,7 +144,7 @@ namespace ippl { template detail::meta_curl> curl(Field& u) { u.fillHalo(); - BConds& bcField = u.getFieldBC(); + BConds& bcField = u.getFieldBC(); bcField.apply(u); M& mesh = u.get_mesh(); typename M::vector_type xvector(0); @@ -170,7 +165,7 @@ namespace ippl { template detail::meta_hess> hess(Field& u) { u.fillHalo(); - BConds& bcField = u.getFieldBC(); + BConds& bcField = u.getFieldBC(); bcField.apply(u); M& mesh = u.get_mesh(); typename M::vector_type xvector(0); @@ -183,4 +178,4 @@ namespace ippl { hvector = mesh.getMeshSpacing(); return detail::meta_hess>(u, xvector, yvector, zvector, hvector); } -} +} // namespace ippl diff --git a/src/Field/HaloCells.h b/src/Field/HaloCells.h index 96a7db690..bb3024ceb 100644 --- a/src/Field/HaloCells.h +++ b/src/Field/HaloCells.h @@ -18,12 +18,12 @@ #ifndef IPPL_HALO_CELLS_H #define IPPL_HALO_CELLS_H -#include "Index/NDIndex.h" -#include "Types/ViewTypes.h" -#include "Types/IpplTypes.h" +#include #include "Communicate/Archive.h" #include "FieldLayout/FieldLayout.h" -#include +#include "Index/NDIndex.h" +#include "Types/IpplTypes.h" +#include "Types/ViewTypes.h" namespace ippl { namespace detail { @@ -34,29 +34,22 @@ namespace ippl { struct FieldBufferData { using view_type = typename detail::ViewType::view_type; - void serialize(Archive<>& ar, size_type nsends) { - ar.serialize(buffer, nsends); - } + void serialize(Archive<>& ar, size_type nsends) { ar.serialize(buffer, nsends); } - void deserialize(Archive<>& ar, size_type nrecvs) { - ar.deserialize(buffer, nrecvs); - } + void deserialize(Archive<>& ar, size_type nrecvs) { ar.deserialize(buffer, nrecvs); } view_type buffer; }; - /*! * This class provides the functionality to do field halo exchange. * @file HaloCells.h */ template - class HaloCells - { - + class HaloCells { public: - using view_type = typename detail::ViewType::view_type; - using Layout_t = FieldLayout; + using view_type = typename detail::ViewType::view_type; + using Layout_t = FieldLayout; using bound_type = typename Layout_t::bound_type; enum SendOrder { @@ -72,8 +65,7 @@ namespace ippl { * @param view the original field data * @param layout the field layout storing the domain decomposition */ - void accumulateHalo(view_type& view, - const Layout_t* layout); + void accumulateHalo(view_type& view, const Layout_t* layout); /*! * Send interal data to halo cells. This operation uses @@ -89,9 +81,7 @@ namespace ippl { * @param view the original view * @param fd the buffer to pack into */ - void pack(const bound_type& range, - const view_type& view, - FieldBufferData& fd, + void pack(const bound_type& range, const view_type& view, FieldBufferData& fd, size_type& nsends); /*! @@ -102,19 +92,14 @@ namespace ippl { * @tparam Op the data assigment operator */ template - void unpack(const bound_type& range, - const view_type& view, - FieldBufferData& fd); + void unpack(const bound_type& range, const view_type& view, FieldBufferData& fd); /*! * Operator for the unpack function. * This operator is used in case of INTERNAL_TO_HALO. */ struct assign { - KOKKOS_INLINE_FUNCTION - void operator()(T& lhs, const T& rhs) const { - lhs = rhs; - } + KOKKOS_INLINE_FUNCTION void operator()(T& lhs, const T& rhs) const { lhs = rhs; } }; /*! @@ -122,10 +107,7 @@ namespace ippl { * This operator is used in case of HALO_TO_INTERNAL. */ struct lhs_plus_assign { - KOKKOS_INLINE_FUNCTION - void operator()(T& lhs, const T& rhs) const { - lhs += rhs; - } + KOKKOS_INLINE_FUNCTION void operator()(T& lhs, const T& rhs) const { lhs += rhs; } }; /*! @@ -133,26 +115,19 @@ namespace ippl { * all periodic BCs application in BareField. */ struct rhs_plus_assign { - KOKKOS_INLINE_FUNCTION - void operator()(const T& lhs, T& rhs) const { - rhs += lhs; - } + KOKKOS_INLINE_FUNCTION void operator()(const T& lhs, T& rhs) const { rhs += lhs; } }; /*! - * Apply all periodic boundary conditions for the + * Apply all periodic boundary conditions for the * serial dimensions. Used in case of both fillHalo * and accumulateHalo with the help of operator as * template parameter. */ template - void applyPeriodicSerialDim(view_type& view, - const Layout_t* layout, - const int nghost); - + void applyPeriodicSerialDim(view_type& view, const Layout_t* layout, const int nghost); private: - /*! * Exchange the data of faces. * @param view is the original field data @@ -162,9 +137,7 @@ namespace ippl { * unpack function call */ template - void exchangeFaces(view_type& view, - const Layout_t* layout, - SendOrder order); + void exchangeFaces(view_type& view, const Layout_t* layout, SendOrder order); /*! * Exchange the data of edges. @@ -175,9 +148,7 @@ namespace ippl { * unpack function call */ template - void exchangeEdges(view_type& view, - const Layout_t* layout, - SendOrder order); + void exchangeEdges(view_type& view, const Layout_t* layout, SendOrder order); /*! * Exchange the data of vertices. @@ -188,9 +159,7 @@ namespace ippl { * unpack function call */ template - void exchangeVertices(view_type& view, - const Layout_t* layout, - SendOrder order); + void exchangeVertices(view_type& view, const Layout_t* layout, SendOrder order); /*! * Extract the subview of the original data. This does not copy. @@ -198,14 +167,12 @@ namespace ippl { * @param view is the original field data * @param intersect the bounds of the intersection */ - auto makeSubview(const view_type& view, - const bound_type& intersect); + auto makeSubview(const view_type& view, const bound_type& intersect); FieldBufferData haloData_m; - }; - } -} + } // namespace detail +} // namespace ippl #include "Field/HaloCells.hpp" diff --git a/src/Field/HaloCells.hpp b/src/Field/HaloCells.hpp index dbd248d4c..ee71a58e9 100644 --- a/src/Field/HaloCells.hpp +++ b/src/Field/HaloCells.hpp @@ -19,21 +19,18 @@ #include #include -#include "Utility/IpplException.h" #include "Communicate/Communicate.h" +#include "Utility/IpplException.h" namespace ippl { namespace detail { template - HaloCells::HaloCells() - { + HaloCells::HaloCells() { static_assert(Dim == 3, "Dimension must be 3!"); } template - void HaloCells::accumulateHalo(view_type& view, - const Layout_t* layout) - { + void HaloCells::accumulateHalo(view_type& view, const Layout_t* layout) { exchangeFaces(view, layout, HALO_TO_INTERNAL); exchangeEdges(view, layout, HALO_TO_INTERNAL); @@ -41,11 +38,8 @@ namespace ippl { exchangeVertices(view, layout, HALO_TO_INTERNAL); } - template - void HaloCells::fillHalo(view_type& view, - const Layout_t* layout) - { + void HaloCells::fillHalo(view_type& view, const Layout_t* layout) { exchangeFaces(view, layout, INTERNAL_TO_HALO); exchangeEdges(view, layout, INTERNAL_TO_HALO); @@ -53,27 +47,21 @@ namespace ippl { exchangeVertices(view, layout, INTERNAL_TO_HALO); } - template template - void HaloCells::exchangeFaces(view_type& view, - const Layout_t* layout, - SendOrder order) - { + void HaloCells::exchangeFaces(view_type& view, const Layout_t* layout, + SendOrder order) { /* The neighbor list has length 2 * Dim. Each index * denotes a face. The value tells which MPI rank * we need to send to. */ - using neighbor_type = typename Layout_t::face_neighbor_type; + using neighbor_type = typename Layout_t::face_neighbor_type; const neighbor_type& neighbors = layout->getFaceNeighbors(); - using neighbor_range_type = typename Layout_t::face_neighbor_range_type; - const neighbor_range_type& neighborsSendRange = - layout->getFaceNeighborsSendRange(); - const neighbor_range_type& neighborsRecvRange = - layout->getFaceNeighborsRecvRange(); - using match_face_type = typename Layout_t::match_face_type; - const match_face_type& matchface = layout->getMatchFace(); - + using neighbor_range_type = typename Layout_t::face_neighbor_range_type; + const neighbor_range_type& neighborsSendRange = layout->getFaceNeighborsSendRange(); + const neighbor_range_type& neighborsRecvRange = layout->getFaceNeighborsRecvRange(); + using match_face_type = typename Layout_t::match_face_type; + const match_face_type& matchface = layout->getMatchFace(); size_t totalRequests = 0; for (auto& neighbor : neighbors) { @@ -85,11 +73,10 @@ namespace ippl { std::array face_tag; const size_t groupCount = neighbors.size(); - size_t requestIndex = 0; + size_t requestIndex = 0; for (size_t face = 0; face < neighbors.size(); ++face) { face_tag[face] = Ippl::Comm->next_tag(HALO_FACE_TAG, HALO_TAG_CYCLE); for (size_t i = 0; i < neighbors[face].size(); ++i) { - int rank = neighbors[face][i]; bound_type range; @@ -108,11 +95,10 @@ namespace ippl { pack(range, view, haloData_m, nsends); buffer_type buf = Ippl::Comm->getBuffer( - IPPL_HALO_FACE_SEND + i * groupCount + face, - nsends); + IPPL_HALO_FACE_SEND + i * groupCount + face, nsends); Ippl::Comm->isend(rank, face_tag[face], haloData_m, *buf, - requests[requestIndex++], nsends); + requests[requestIndex++], nsends); buf->resetWritePos(); } } @@ -120,7 +106,6 @@ namespace ippl { // receive for (size_t face = 0; face < neighbors.size(); ++face) { for (size_t i = 0; i < neighbors[face].size(); ++i) { - int rank = neighbors[face][i]; bound_type range; @@ -130,16 +115,15 @@ namespace ippl { range = neighborsSendRange[face][i]; } - size_type nrecvs = (int)((range.hi[0] - range.lo[0]) * - (range.hi[1] - range.lo[1]) * - (range.hi[2] - range.lo[2])); + size_type nrecvs = + (int)((range.hi[0] - range.lo[0]) * (range.hi[1] - range.lo[1]) + * (range.hi[2] - range.lo[2])); buffer_type buf = Ippl::Comm->getBuffer( - IPPL_HALO_FACE_RECV + i * groupCount + face, - nrecvs); + IPPL_HALO_FACE_RECV + i * groupCount + face, nrecvs); - Ippl::Comm->recv(rank, face_tag[matchface[face]], haloData_m, - *buf, nrecvs * sizeof(T), nrecvs); + Ippl::Comm->recv(rank, face_tag[matchface[face]], haloData_m, *buf, + nrecvs * sizeof(T), nrecvs); buf->resetReadPos(); unpack(range, view, haloData_m); @@ -151,23 +135,17 @@ namespace ippl { } } - template template - void HaloCells::exchangeEdges(view_type& view, - const Layout_t* layout, - SendOrder order) - { - using neighbor_type = typename Layout_t::edge_neighbor_type; + void HaloCells::exchangeEdges(view_type& view, const Layout_t* layout, + SendOrder order) { + using neighbor_type = typename Layout_t::edge_neighbor_type; const neighbor_type& neighbors = layout->getEdgeNeighbors(); - using neighbor_range_type = typename Layout_t::edge_neighbor_range_type; - const neighbor_range_type& neighborsSendRange = - layout->getEdgeNeighborsSendRange(); - const neighbor_range_type& neighborsRecvRange = - layout->getEdgeNeighborsRecvRange(); - using match_edge_type = typename Layout_t::match_edge_type; - const match_edge_type& matchedge = layout->getMatchEdge(); - + using neighbor_range_type = typename Layout_t::edge_neighbor_range_type; + const neighbor_range_type& neighborsSendRange = layout->getEdgeNeighborsSendRange(); + const neighbor_range_type& neighborsRecvRange = layout->getEdgeNeighborsRecvRange(); + using match_edge_type = typename Layout_t::match_edge_type; + const match_edge_type& matchedge = layout->getMatchEdge(); size_t totalRequests = 0; for (auto& neighbor : neighbors) { @@ -177,14 +155,12 @@ namespace ippl { using buffer_type = Communicate::buffer_type; std::vector requests(totalRequests); - std::array edge_tag; const size_t groupCount = neighbors.size(); - size_t requestIndex = 0; + size_t requestIndex = 0; for (size_t edge = 0; edge < neighbors.size(); ++edge) { edge_tag[edge] = Ippl::Comm->next_tag(HALO_EDGE_TAG, HALO_TAG_CYCLE); for (size_t i = 0; i < neighbors[edge].size(); ++i) { - int rank = neighbors[edge][i]; bound_type range; @@ -198,11 +174,10 @@ namespace ippl { pack(range, view, haloData_m, nsends); buffer_type buf = Ippl::Comm->getBuffer( - IPPL_HALO_EDGE_SEND + i * groupCount + edge, - nsends); + IPPL_HALO_EDGE_SEND + i * groupCount + edge, nsends); - Ippl::Comm->isend(rank, edge_tag[edge], haloData_m, *buf, - requests[requestIndex++], nsends); + Ippl::Comm->isend(rank, edge_tag[edge], haloData_m, *buf, + requests[requestIndex++], nsends); buf->resetWritePos(); } } @@ -210,7 +185,6 @@ namespace ippl { // receive for (size_t edge = 0; edge < neighbors.size(); ++edge) { for (size_t i = 0; i < neighbors[edge].size(); ++i) { - int rank = neighbors[edge][i]; bound_type range; @@ -220,16 +194,15 @@ namespace ippl { range = neighborsSendRange[edge][i]; } - size_type nrecvs = (int)((range.hi[0] - range.lo[0]) * - (range.hi[1] - range.lo[1]) * - (range.hi[2] - range.lo[2])); + size_type nrecvs = + (int)((range.hi[0] - range.lo[0]) * (range.hi[1] - range.lo[1]) + * (range.hi[2] - range.lo[2])); buffer_type buf = Ippl::Comm->getBuffer( - IPPL_HALO_EDGE_RECV + i * groupCount + edge, - nrecvs); + IPPL_HALO_EDGE_RECV + i * groupCount + edge, nrecvs); - Ippl::Comm->recv(rank, edge_tag[matchedge[edge]], haloData_m, - *buf, nrecvs * sizeof(T), nrecvs); + Ippl::Comm->recv(rank, edge_tag[matchedge[edge]], haloData_m, *buf, + nrecvs * sizeof(T), nrecvs); buf->resetReadPos(); unpack(range, view, haloData_m); @@ -241,27 +214,21 @@ namespace ippl { } } - template template - void HaloCells::exchangeVertices(view_type& view, - const Layout_t* layout, - SendOrder order) - { - using neighbor_type = typename Layout_t::vertex_neighbor_type; + void HaloCells::exchangeVertices(view_type& view, const Layout_t* layout, + SendOrder order) { + using neighbor_type = typename Layout_t::vertex_neighbor_type; const neighbor_type& neighbors = layout->getVertexNeighbors(); - using neighbor_range_type = typename Layout_t::vertex_neighbor_range_type; - const neighbor_range_type& neighborsSendRange = - layout->getVertexNeighborsSendRange(); - const neighbor_range_type& neighborsRecvRange = - layout->getVertexNeighborsRecvRange(); - using match_vertex_type = typename Layout_t::match_vertex_type; - const match_vertex_type& matchvertex = layout->getMatchVertex(); + using neighbor_range_type = typename Layout_t::vertex_neighbor_range_type; + const neighbor_range_type& neighborsSendRange = layout->getVertexNeighborsSendRange(); + const neighbor_range_type& neighborsRecvRange = layout->getVertexNeighborsRecvRange(); + using match_vertex_type = typename Layout_t::match_vertex_type; + const match_vertex_type& matchvertex = layout->getMatchVertex(); using buffer_type = Communicate::buffer_type; std::vector requests(neighbors.size()); - std::array vertex_tag; size_t requestIndex = 0; for (size_t vertex = 0; vertex < neighbors.size(); ++vertex) { @@ -283,13 +250,10 @@ namespace ippl { size_type nsends; pack(range, view, haloData_m, nsends); - buffer_type buf = Ippl::Comm->getBuffer( - IPPL_HALO_VERTEX_SEND + vertex, - nsends); + buffer_type buf = Ippl::Comm->getBuffer(IPPL_HALO_VERTEX_SEND + vertex, nsends); - - Ippl::Comm->isend(rank, vertex_tag[vertex], haloData_m, *buf, - requests[requestIndex++], nsends); + Ippl::Comm->isend(rank, vertex_tag[vertex], haloData_m, *buf, + requests[requestIndex++], nsends); buf->resetWritePos(); } @@ -309,16 +273,13 @@ namespace ippl { range = neighborsSendRange[vertex]; } - size_type nrecvs = (int)((range.hi[0] - range.lo[0]) * - (range.hi[1] - range.lo[1]) * - (range.hi[2] - range.lo[2])); - - buffer_type buf = Ippl::Comm->getBuffer( - IPPL_HALO_VERTEX_RECV + vertex, - nrecvs); + size_type nrecvs = (int)((range.hi[0] - range.lo[0]) * (range.hi[1] - range.lo[1]) + * (range.hi[2] - range.lo[2])); + + buffer_type buf = Ippl::Comm->getBuffer(IPPL_HALO_VERTEX_RECV + vertex, nrecvs); - Ippl::Comm->recv(rank, vertex_tag[matchvertex[vertex]], - haloData_m, *buf, nrecvs * sizeof(T), nrecvs); + Ippl::Comm->recv(rank, vertex_tag[matchvertex[vertex]], haloData_m, *buf, + nrecvs * sizeof(T), nrecvs); buf->resetReadPos(); unpack(range, view, haloData_m); @@ -329,19 +290,15 @@ namespace ippl { } } - template - void HaloCells::pack(const bound_type& range, - const view_type& view, - FieldBufferData& fd, - size_type& nsends) - { + void HaloCells::pack(const bound_type& range, const view_type& view, + FieldBufferData& fd, size_type& nsends) { auto subview = makeSubview(view, range); auto& buffer = fd.buffer; size_t size = subview.size(); - nsends = size; + nsends = size; if (buffer.size() < size) { int overalloc = Ippl::Comm->getDefaultOverallocation(); Kokkos::realloc(buffer, size * overalloc); @@ -350,31 +307,20 @@ namespace ippl { using mdrange_type = Kokkos::MDRangePolicy>; Kokkos::parallel_for( "HaloCells::pack()", - mdrange_type({0, 0, 0}, - {subview.extent(0), - subview.extent(1), - subview.extent(2)}), - KOKKOS_CLASS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - int l = i + j * subview.extent(0) - + k * subview.extent(0) * subview.extent(1); + mdrange_type({0, 0, 0}, {subview.extent(0), subview.extent(1), subview.extent(2)}), + KOKKOS_CLASS_LAMBDA(const size_t i, const size_t j, const size_t k) { + int l = i + j * subview.extent(0) + k * subview.extent(0) * subview.extent(1); buffer(l) = subview(i, j, k); - } - ); + }); Kokkos::fence(); } - template template - void HaloCells::unpack(const bound_type& range, - const view_type& view, - FieldBufferData& fd) - { + void HaloCells::unpack(const bound_type& range, const view_type& view, + FieldBufferData& fd) { auto subview = makeSubview(view, range); - auto buffer = fd.buffer; + auto buffer = fd.buffer; // 29. November 2020 // https://stackoverflow.com/questions/3735398/operator-as-template-parameter @@ -383,118 +329,89 @@ namespace ippl { using mdrange_type = Kokkos::MDRangePolicy>; Kokkos::parallel_for( "HaloCells::unpack()", - mdrange_type({0, 0, 0}, - {subview.extent(0), - subview.extent(1), - subview.extent(2)}), - KOKKOS_CLASS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - int l = i + j * subview.extent(0) - + k * subview.extent(0) * subview.extent(1); + mdrange_type({0, 0, 0}, {subview.extent(0), subview.extent(1), subview.extent(2)}), + KOKKOS_CLASS_LAMBDA(const size_t i, const size_t j, const size_t k) { + int l = i + j * subview.extent(0) + k * subview.extent(0) * subview.extent(1); op(subview(i, j, k), buffer(l)); - } - ); + }); Kokkos::fence(); } - template - auto - HaloCells::makeSubview(const view_type& view, - const bound_type& intersect) - { + auto HaloCells::makeSubview(const view_type& view, const bound_type& intersect) { using Kokkos::make_pair; - return Kokkos::subview(view, - make_pair(intersect.lo[0], intersect.hi[0]), + return Kokkos::subview(view, make_pair(intersect.lo[0], intersect.hi[0]), make_pair(intersect.lo[1], intersect.hi[1]), make_pair(intersect.lo[2], intersect.hi[2])); } template template - void HaloCells::applyPeriodicSerialDim(view_type& view, - const Layout_t* layout, - const int nghost) - { - int myRank = Ippl::Comm->rank(); + void HaloCells::applyPeriodicSerialDim(view_type& view, const Layout_t* layout, + const int nghost) { + int myRank = Ippl::Comm->rank(); const auto& lDomains = layout->getHostLocalDomains(); - const auto& domain = layout->getDomain(); - using mdrange_type = Kokkos::MDRangePolicy>; + const auto& domain = layout->getDomain(); + using mdrange_type = Kokkos::MDRangePolicy>; std::array ext; for (size_t i = 0; i < Dim; ++i) { ext[i] = view.extent(i); } - + Op op; - - for(unsigned d=0; d class FieldLayout; -template class BareField; +template +class FieldLayout; +template +class BareField; -class BinaryRepartitionFailed { }; +class BinaryRepartitionFailed {}; // Calculate the local domain for a binary repartition. -template -NDIndex -CalcBinaryRepartition(FieldLayout&, BareField&); +template +NDIndex CalcBinaryRepartition(FieldLayout&, BareField&); // Calculate and apply a local domain for a binary repartition. -template -inline void -BinaryRepartition(FieldLayout& layout, BareField& weights) -{ - layout.Repartition( CalcBinaryRepartition(layout,weights) ); +template +inline void BinaryRepartition(FieldLayout& layout, BareField& weights) { + layout.Repartition(CalcBinaryRepartition(layout, weights)); } ////////////////////////////////////////////////////////////////////// #include "FieldLayout/BinaryBalancer.hpp" -#endif // BINARY_BALANCER_H +#endif // BINARY_BALANCER_H /*************************************************************************** * $RCSfile: BinaryBalancer.h,v $ $Author: adelmann $ * $Revision: 1.1.1.1 $ $Date: 2003/01/23 07:40:27 $ - * IPPL_VERSION_ID: $Id: BinaryBalancer.h,v 1.1.1.1 2003/01/23 07:40:27 adelmann Exp $ + * IPPL_VERSION_ID: $Id: BinaryBalancer.h,v 1.1.1.1 2003/01/23 07:40:27 adelmann Exp $ ***************************************************************************/ diff --git a/src/FieldLayout/BinaryBalancer.hpp b/src/FieldLayout/BinaryBalancer.hpp index 2da60145c..a17997aee 100644 --- a/src/FieldLayout/BinaryBalancer.hpp +++ b/src/FieldLayout/BinaryBalancer.hpp @@ -24,12 +24,11 @@ ***************************************************************************/ // include files +#include "Field/BareField.h" #include "FieldLayout/BinaryBalancer.h" #include "FieldLayout/FieldLayout.h" -#include "Field/BareField.h" #include "Utility/PAssert.h" - ////////////////////////////////////////////////////////////////////// /* @@ -78,41 +77,34 @@ // not SERIAL. // template -static int -FindCutAxis(const NDIndex &domain, const FieldLayout &layout) -{ - - - - // CutAxis will be the dimension to cut. - int cutAxis=-1; - // MaxLength will have the maximum length of any dimension. - unsigned int maxLength=0; - // Loop over dimension. - for (unsigned int d=0; d& domain, const FieldLayout& layout) { + // CutAxis will be the dimension to cut. + int cutAxis = -1; + // MaxLength will have the maximum length of any dimension. + unsigned int maxLength = 0; + // Loop over dimension. + for (unsigned int d = 0; d < Dim; ++d) { + if (layout.getDistribution(d) != SERIAL || layout.getRequestedDistribution(d) != SERIAL) { + // Check if this axis is longer than the current max. + unsigned int length = domain[d].length(); + if (maxLength < length) { + // If so, remember it. + cutAxis = d; + maxLength = length; + } + } } - } - // Make sure we found one. - //PAssert_GE(cutAxis, 0); + // Make sure we found one. + // PAssert_GE(cutAxis, 0); - if(cutAxis<0) - throw BinaryRepartitionFailed(); + if (cutAxis < 0) + throw BinaryRepartitionFailed(); - // Return the longest axis. - return cutAxis; + // Return the longest axis. + return cutAxis; } - // // Find the median point in a container. // The first two arguments are begin and end iterators. @@ -120,53 +112,47 @@ FindCutAxis(const NDIndex &domain, const FieldLayout &layout) // container is of objects of type T. // -template -static RandomIterator -FindMedian(int nprocs,RandomIterator begin, RandomIterator end, T) -{ - // First find the total weight. - T w = 0; - // Use w to find T's name - - - // If we have only one processor, cut at the left. - if ( nprocs == 1 ) - return begin; - - int lprocs = nprocs/2; - RandomIterator rp, median; - for (rp=begin; rp!=end; ++rp) - w += *rp; - - // If the total weight is zero, we need to do things specially. - if ( w==0 ) - { - // The total weight is zero. - // Put about as much zero weight stuff on the left and the right. - median = begin + ((end-begin)*lprocs)/nprocs; - } - else - { - // The total weight is nonzero. - // Put equal amounts on the left and right processors. - T w2 = (w*lprocs)/nprocs; - // Find the point with half the weight to the left. - bool found = false; - w = T(0); - for (rp=begin; (rp!=end)&&(!found); ++rp) { - // Add current element to running total - w += *rp; - if (w>=w2) { - found = true; - if ( (w-w2) > (*rp/T(2)) ) - median = rp; - else - median = (rp+1 != end) ? rp+1 : rp; - } - } +template +static RandomIterator FindMedian(int nprocs, RandomIterator begin, RandomIterator end, T) { + // First find the total weight. + T w = 0; + // Use w to find T's name + + // If we have only one processor, cut at the left. + if (nprocs == 1) + return begin; + + int lprocs = nprocs / 2; + RandomIterator rp, median; + for (rp = begin; rp != end; ++rp) + w += *rp; + + // If the total weight is zero, we need to do things specially. + if (w == 0) { + // The total weight is zero. + // Put about as much zero weight stuff on the left and the right. + median = begin + ((end - begin) * lprocs) / nprocs; + } else { + // The total weight is nonzero. + // Put equal amounts on the left and right processors. + T w2 = (w * lprocs) / nprocs; + // Find the point with half the weight to the left. + bool found = false; + w = T(0); + for (rp = begin; (rp != end) && (!found); ++rp) { + // Add current element to running total + w += *rp; + if (w >= w2) { + found = true; + if ((w - w2) > (*rp / T(2))) + median = rp; + else + median = (rp + 1 != end) ? rp + 1 : rp; + } + } } - // Found it. Exit. - return median; + // Found it. Exit. + return median; } ////////////////////////////////////////////////////////////////////// @@ -184,61 +170,49 @@ FindMedian(int nprocs,RandomIterator begin, RandomIterator end, T) // given the axis not to reduce on // and where in that dimension to reduce. -static inline double -PerpReduce(BrickIterator& data, int i, int cutAxis) -{ - double r=0; - if (cutAxis==0) - { - int l1 = data.size(1); - int l2 = data.size(2); - if ( (l1>0) && (l2>0) ) - for (int j2=0; j2& data, int i, int cutAxis) { + double r = 0; + if (cutAxis == 0) { + int l1 = data.size(1); + int l2 = data.size(2); + if ((l1 > 0) && (l2 > 0)) + for (int j2 = 0; j2 < l2; ++j2) + for (int j1 = 0; j1 < l1; ++j1) + r += data.offset(i, j1, j2); + } else if (cutAxis == 1) { + int l0 = data.size(0); + int l2 = data.size(2); + if ((l0 > 0) && (l2 > 0)) + for (int j2 = 0; j2 < l2; ++j2) + for (int j0 = 0; j0 < l0; ++j0) + r += data.offset(j0, i, j2); + } else if (cutAxis == 2) { + int l0 = data.size(0); + int l1 = data.size(1); + if ((l0 > 0) && (l1 > 0)) + for (int j1 = 0; j1 < l1; ++j1) + for (int j0 = 0; j0 < l0; ++j0) + r += data.offset(j0, j1, i); } - else if (cutAxis==1) - { - int l0 = data.size(0); - int l2 = data.size(2); - if ( (l0>0) && (l2>0) ) - for (int j2=0; j20) && (l1>0) ) - for (int j1=0; j1& data, int i, int cutAxis) -{ - double r=0; - if (cutAxis==0) - { - int length = data.size(1); - for (int j=0; j& data, int i, int cutAxis) { + double r = 0; + if (cutAxis == 0) { + int length = data.size(1); + for (int j = 0; j < length; ++j) + r += data.offset(i, j); + } else { + int length = data.size(0); + for (int j = 0; j < length; ++j) + r += data.offset(j, i); } - else - { - int length = data.size(0); - for (int j=0; j& data, int i, int cutAxis) // and where in that dimension to reduce. // -static inline double -PerpReduce(BrickIterator& data, int i, int ) -{ - return data.offset(i); +static inline double PerpReduce(BrickIterator& data, int i, int) { + return data.offset(i); } // @@ -258,16 +230,11 @@ PerpReduce(BrickIterator& data, int i, int ) // Put the results in an array of doubles. // -template -static void -LocalReduce(double *reduced, int cutAxis, BrickIterator data) -{ - - - - int length = data.size(cutAxis); - for (int i=0; i +static void LocalReduce(double* reduced, int cutAxis, BrickIterator data) { + int length = data.size(cutAxis); + for (int i = 0; i < length; ++i) + reduced[i] = PerpReduce(data, i, cutAxis); } ////////////////////////////////////////////////////////////////////// @@ -282,73 +249,65 @@ LocalReduce(double *reduced, int cutAxis, BrickIterator data) // Each of those final reductions are on different processors. // -template -static void -SendReduce(IndexIterator domainsBegin, IndexIterator domainsEnd, - BareField& weights, int tag) -{ - - // Buffers to store up domains and blocks of reduced data. - std::vector reducedBuffer; - std::vector domainBuffer; - // Loop over all of the domains. Keep a counter of which one you're on. - int di; - IndexIterator dp; - /*out << "SendReduce, ndomains=" << domainsEnd-domainsBegin << endl;*/ - for (dp=domainsBegin, di=0; dp!=domainsEnd; ++dp, ++di) - { - /*out << "SendReduce, domain=" << *dp << endl;*/ - // Find the dimension we'll be cutting on. - // We'll reduce in the dimensions perpendicular to this. - int cutAxis = FindCutAxis(*dp, weights.getLayout()); - // Find the LFields on this processor that touch this domain. - typename BareField::iterator_if lf_p; - for (lf_p=weights.begin_if(); lf_p != weights.end_if(); ++lf_p) - if ( (*dp).touches( (*lf_p).second->getOwned() ) ) - { - // Find the intersection with this LField. - NDIndex intersection = - (*dp).intersect( (*lf_p).second->getOwned() ); - // Allocate the accumulation buffer. - int length = intersection[cutAxis].length(); - double *reduced = new double[length]; - // Reduce into the local buffer. - /*out << "LocalReduce " << intersection << endl;*/ - LocalReduce(reduced,cutAxis,(*lf_p).second->begin(intersection)); - // Save the domain and the data. - reducedBuffer.push_back(reduced); - domainBuffer.push_back(intersection[cutAxis]); - } - - // If we found any hits, send them out. - int nrdomains = reducedBuffer.size(); - /*out << "nrdomains=" << nrdomains << endl;*/ - if ( nrdomains>0 ) - { - // Build a message to hold everything for this domain. - Message *mess = new Message; - // The number of reduced domains is the first thing in the message. - mess->put(nrdomains); - // Loop over the reduced domains, storing in the message each time. - std::vector::iterator dbp = domainBuffer.begin(); - std::vector::iterator rbp = reducedBuffer.begin(); - for (int i=0; isetCopy(false).setDelete(true).put(p,p+(*dbp).length()); +template +static void SendReduce(IndexIterator domainsBegin, IndexIterator domainsEnd, + BareField& weights, int tag) { + // Buffers to store up domains and blocks of reduced data. + std::vector reducedBuffer; + std::vector domainBuffer; + // Loop over all of the domains. Keep a counter of which one you're on. + int di; + IndexIterator dp; + /*out << "SendReduce, ndomains=" << domainsEnd-domainsBegin << endl;*/ + for (dp = domainsBegin, di = 0; dp != domainsEnd; ++dp, ++di) { + /*out << "SendReduce, domain=" << *dp << endl;*/ + // Find the dimension we'll be cutting on. + // We'll reduce in the dimensions perpendicular to this. + int cutAxis = FindCutAxis(*dp, weights.getLayout()); + // Find the LFields on this processor that touch this domain. + typename BareField::iterator_if lf_p; + for (lf_p = weights.begin_if(); lf_p != weights.end_if(); ++lf_p) + if ((*dp).touches((*lf_p).second->getOwned())) { + // Find the intersection with this LField. + NDIndex intersection = (*dp).intersect((*lf_p).second->getOwned()); + // Allocate the accumulation buffer. + int length = intersection[cutAxis].length(); + double* reduced = new double[length]; + // Reduce into the local buffer. + /*out << "LocalReduce " << intersection << endl;*/ + LocalReduce(reduced, cutAxis, (*lf_p).second->begin(intersection)); + // Save the domain and the data. + reducedBuffer.push_back(reduced); + domainBuffer.push_back(intersection[cutAxis]); } - // Send the message to proc di. - Ippl::Comm->send(mess, di, tag); + + // If we found any hits, send them out. + int nrdomains = reducedBuffer.size(); + /*out << "nrdomains=" << nrdomains << endl;*/ + if (nrdomains > 0) { + // Build a message to hold everything for this domain. + Message* mess = new Message; + // The number of reduced domains is the first thing in the message. + mess->put(nrdomains); + // Loop over the reduced domains, storing in the message each time. + std::vector::iterator dbp = domainBuffer.begin(); + std::vector::iterator rbp = reducedBuffer.begin(); + for (int i = 0; i < nrdomains; ++i, ++dbp, ++rbp) { + // First store the domain. + /*out << "putMessage " << *dbp << endl;*/ + putMessage(*mess, *dbp); + // Then the reduced data using begin/end iterators. + // Tell the message to delete the memory when it is done. + double* p = *rbp; + mess->setCopy(false).setDelete(true).put(p, p + (*dbp).length()); + } + // Send the message to proc di. + Ippl::Comm->send(mess, di, tag); } - // Clear out the buffers. - domainBuffer.erase( domainBuffer.begin(), domainBuffer.end() ); - reducedBuffer.erase( reducedBuffer.begin(), reducedBuffer.end() ); - /*out << "Bottom of SendReduce loop" << endl;*/ + // Clear out the buffers. + domainBuffer.erase(domainBuffer.begin(), domainBuffer.end()); + reducedBuffer.erase(reducedBuffer.begin(), reducedBuffer.end()); + /*out << "Bottom of SendReduce loop" << endl;*/ } } @@ -360,87 +319,79 @@ SendReduce(IndexIterator domainsBegin, IndexIterator domainsEnd, // Return begin and end iterators for the reduced data. // -template -static void -ReceiveReduce(NDIndex& domain, BareField& weights, - int reduce_tag, int nprocs, - int& cutLoc, int& cutAxis) -{ - - - // Build a place to accumulate the reduced data. - cutAxis = FindCutAxis(domain, weights.getLayout()); - /*out << "ReceiveReduce, cutAxis=" << cutAxis << endl;*/ - int i, length = domain[cutAxis].length(); - int offset = domain[cutAxis].first(); - std::vector reduced(length); - std::vector subReduced(length); - for (i=0; igetNodes(); - int mynode = Ippl::Comm->myNode(); - bool* found_touch = new bool[nodes]; - for (i=0; i::iterator_if lf_p, lf_end = weights.end_if(); - for (lf_p = weights.begin_if(); - lf_p != lf_end && !(found_touch[mynode]); ++lf_p) { - // Expect a message if it touches. - if ( (*lf_p).second->getOwned().touches(domain) ) - found_touch[mynode] = true; - } - // Now look in the remote parts of weights. - typename FieldLayout::touch_iterator_dv rf_p; - // Get the range of remote vnodes that touch domain. - typename FieldLayout::touch_range_dv range = - weights.getLayout().touch_range_rdv( domain ); - // Record the processors who have touches - for (rf_p = range.first; rf_p != range.second ; ++rf_p) { - int owner = (*((*rf_p).second)).getNode(); - found_touch[owner] = true; - } - // now just count up the number of messages to receive - for (i=0; i= 0 ) - { - // Receive a message. - int any_node = COMM_ANY_NODE; - Message *mess = Ippl::Comm->receive_block(any_node,reduce_tag); - PAssert(mess); - // Loop over all the domains in this message. - int received_domains = 0; - mess->get(received_domains); - while ( --received_domains>=0 ) - { - // Get the domain for the next part. - Index rdomain; - getMessage( *mess, rdomain ); - /*out << "ReceiveReduce, rdomain=" << rdomain << endl;*/ - // Get the incoming reduced data. - int rfirst = rdomain.first() - offset; - mess->get(subReduced[rfirst]); - // Accumulate it with the rest. - int rlast = rdomain.last() - offset; - for (int i=rfirst; i<=rlast; ++i) - reduced[i] += subReduced[i]; +template +static void ReceiveReduce(NDIndex& domain, BareField& weights, int reduce_tag, + int nprocs, int& cutLoc, int& cutAxis) { + // Build a place to accumulate the reduced data. + cutAxis = FindCutAxis(domain, weights.getLayout()); + /*out << "ReceiveReduce, cutAxis=" << cutAxis << endl;*/ + int i, length = domain[cutAxis].length(); + int offset = domain[cutAxis].first(); + std::vector reduced(length); + std::vector subReduced(length); + for (i = 0; i < length; ++i) + reduced[i] = 0; + + // Build a count of the number of messages to expect. + // We get *one message* from each node that has a touch. + int expected = 0; + int nodes = Ippl::Comm->getNodes(); + int mynode = Ippl::Comm->myNode(); + bool* found_touch = new bool[nodes]; + for (i = 0; i < nodes; ++i) + found_touch[i] = false; + // First look in the local vnodes of weights. + typename BareField::iterator_if lf_p, lf_end = weights.end_if(); + for (lf_p = weights.begin_if(); lf_p != lf_end && !(found_touch[mynode]); ++lf_p) { + // Expect a message if it touches. + if ((*lf_p).second->getOwned().touches(domain)) + found_touch[mynode] = true; + } + // Now look in the remote parts of weights. + typename FieldLayout::touch_iterator_dv rf_p; + // Get the range of remote vnodes that touch domain. + typename FieldLayout::touch_range_dv range = weights.getLayout().touch_range_rdv(domain); + // Record the processors who have touches + for (rf_p = range.first; rf_p != range.second; ++rf_p) { + int owner = (*((*rf_p).second)).getNode(); + found_touch[owner] = true; + } + // now just count up the number of messages to receive + for (i = 0; i < nodes; ++i) + if (found_touch[i]) + expected++; + delete[] found_touch; + + // Receive messages until we're done. + while (--expected >= 0) { + // Receive a message. + int any_node = COMM_ANY_NODE; + Message* mess = Ippl::Comm->receive_block(any_node, reduce_tag); + PAssert(mess); + // Loop over all the domains in this message. + int received_domains = 0; + mess->get(received_domains); + while (--received_domains >= 0) { + // Get the domain for the next part. + Index rdomain; + getMessage(*mess, rdomain); + /*out << "ReceiveReduce, rdomain=" << rdomain << endl;*/ + // Get the incoming reduced data. + int rfirst = rdomain.first() - offset; + mess->get(subReduced[rfirst]); + // Accumulate it with the rest. + int rlast = rdomain.last() - offset; + for (int i = rfirst; i <= rlast; ++i) + reduced[i] += subReduced[i]; } - // Delete the message, we're done with it - delete mess; + // Delete the message, we're done with it + delete mess; } - // Get the median. - cutLoc = - FindMedian(nprocs,reduced.begin(),reduced.begin()+length,double()) - -reduced.begin() + domain[cutAxis].first(); - /*out << "ReceiveReduce, cutLoc=" << cutLoc << endl;*/ + // Get the median. + cutLoc = FindMedian(nprocs, reduced.begin(), reduced.begin() + length, double()) + - reduced.begin() + domain[cutAxis].first(); + /*out << "ReceiveReduce, cutLoc=" << cutLoc << endl;*/ } ////////////////////////////////////////////////////////////////////// @@ -450,18 +401,14 @@ ReceiveReduce(NDIndex& domain, BareField& weights, // Broadcast to everybody. // -inline void -BcastCuts(int cutLoc, int cutAxis, int bcast_tag) -{ - - - // Make a message. - Message *mess = new Message(); - // Add the data to it. - mess->put(cutLoc); - mess->put(cutAxis); - // Send it out. - Ippl::Comm->broadcast_all(mess,bcast_tag); +inline void BcastCuts(int cutLoc, int cutAxis, int bcast_tag) { + // Make a message. + Message* mess = new Message(); + // Add the data to it. + mess->put(cutLoc); + mess->put(cutAxis); + // Send it out. + Ippl::Comm->broadcast_all(mess, bcast_tag); } ////////////////////////////////////////////////////////////////////// @@ -471,71 +418,60 @@ BcastCuts(int cutLoc, int cutAxis, int bcast_tag) // Cut up each of the domains using the cuts. // -template -static void -ReceiveCuts(std::vector< NDIndex > &domains, - std::vector< int >& nprocs, - int bcast_tag) -{ - - - - // Make a container to hold the split domains. - int nDomains = domains.size(); - std::vector< NDIndex > cutDomains(nDomains*2); - std::vector cutProcs(std::vector::size_type(nDomains*2)); - - // Everybody receives the broadcasts. - // There will be one for each domain in the list. - for (int expected = 0; expected < nDomains; ++expected) - { - // Receive each broadcast. - // The processor number will correspond to the location - // in the domains vector. - int whichDomain = COMM_ANY_NODE; - int cutLocation = 0, cutAxis = 0; - Message *mess = Ippl::Comm->receive_block(whichDomain,bcast_tag); - PAssert(mess); - mess->get(cutLocation); - mess->get(cutAxis); - delete mess; - - // Split this domain. - const NDIndex& domain = domains[whichDomain]; - NDIndex& left = cutDomains[ whichDomain*2 ]; - NDIndex& right = cutDomains[ whichDomain*2+1 ]; - // Build the left and right domains. - left = domain ; - right = domain ; - /*out << "Build indexes from : " - << domain[cutAxis].first() << " " - << cutLocation<< " " - << domain[cutAxis].last()<< " " - << endl;*/ - left[ cutAxis ] = Index( domain[cutAxis].first(), cutLocation-1 ); - right[ cutAxis ] = Index( cutLocation, domain[cutAxis].last() ); - - int procs = nprocs[whichDomain]; - cutProcs[ whichDomain*2 ] = procs/2; - cutProcs[ whichDomain*2+1 ] = procs - procs/2; +template +static void ReceiveCuts(std::vector >& domains, std::vector& nprocs, + int bcast_tag) { + // Make a container to hold the split domains. + int nDomains = domains.size(); + std::vector > cutDomains(nDomains * 2); + std::vector cutProcs(std::vector::size_type(nDomains * 2)); + + // Everybody receives the broadcasts. + // There will be one for each domain in the list. + for (int expected = 0; expected < nDomains; ++expected) { + // Receive each broadcast. + // The processor number will correspond to the location + // in the domains vector. + int whichDomain = COMM_ANY_NODE; + int cutLocation = 0, cutAxis = 0; + Message* mess = Ippl::Comm->receive_block(whichDomain, bcast_tag); + PAssert(mess); + mess->get(cutLocation); + mess->get(cutAxis); + delete mess; + + // Split this domain. + const NDIndex& domain = domains[whichDomain]; + NDIndex& left = cutDomains[whichDomain * 2]; + NDIndex& right = cutDomains[whichDomain * 2 + 1]; + // Build the left and right domains. + left = domain; + right = domain; + /*out << "Build indexes from : " + << domain[cutAxis].first() << " " + << cutLocation<< " " + << domain[cutAxis].last()<< " " + << endl;*/ + left[cutAxis] = Index(domain[cutAxis].first(), cutLocation - 1); + right[cutAxis] = Index(cutLocation, domain[cutAxis].last()); + + int procs = nprocs[whichDomain]; + cutProcs[whichDomain * 2] = procs / 2; + cutProcs[whichDomain * 2 + 1] = procs - procs / 2; } - // Put the domains you've just built into the input containers. - // Strip out the domains with no processors assigned. - domains.clear(); - nprocs.clear(); - PAssert_EQ(cutProcs.size(), cutDomains.size()); - for (unsigned int i=0; i > &domains, // according to the weights in a BareField. // -template -static void -CutEach(std::vector< NDIndex >& domains, - std::vector< int >& nprocs, - BareField& weights) -{ - - // Get tags for the reduction and the broadcast. - int reduce_tag = Ippl::Comm->next_tag( F_REDUCE_PERP_TAG , F_TAG_CYCLE ); - int bcast_tag = Ippl::Comm->next_tag( F_REDUCE_PERP_TAG , F_TAG_CYCLE ); - /*out << "reduce_tag=" << reduce_tag << endl;*/ - /*out << "bcast_tag=" << bcast_tag << endl;*/ - - // Do the sends for the reduces. - SendReduce(domains.begin(),domains.end(),weights,reduce_tag); - - // On the appropriate processors, receive the data for the reduce, - // and broadcast the cuts. - unsigned int mynode = Ippl::Comm->myNode(); - if ( mynode < domains.size() ) - { - // Receive partially reduced data, finish the reduction, find the median. - int cutAxis, cutLoc; - ReceiveReduce(domains[mynode],weights,reduce_tag, - nprocs[mynode],cutLoc,cutAxis); - // Broadcast those cuts out to everybody. - BcastCuts(cutLoc,cutAxis,bcast_tag); +template +static void CutEach(std::vector >& domains, std::vector& nprocs, + BareField& weights) { + // Get tags for the reduction and the broadcast. + int reduce_tag = Ippl::Comm->next_tag(F_REDUCE_PERP_TAG, F_TAG_CYCLE); + int bcast_tag = Ippl::Comm->next_tag(F_REDUCE_PERP_TAG, F_TAG_CYCLE); + /*out << "reduce_tag=" << reduce_tag << endl;*/ + /*out << "bcast_tag=" << bcast_tag << endl;*/ + + // Do the sends for the reduces. + SendReduce(domains.begin(), domains.end(), weights, reduce_tag); + + // On the appropriate processors, receive the data for the reduce, + // and broadcast the cuts. + unsigned int mynode = Ippl::Comm->myNode(); + if (mynode < domains.size()) { + // Receive partially reduced data, finish the reduction, find the median. + int cutAxis, cutLoc; + ReceiveReduce(domains[mynode], weights, reduce_tag, nprocs[mynode], cutLoc, cutAxis); + // Broadcast those cuts out to everybody. + BcastCuts(cutLoc, cutAxis, bcast_tag); } - // Receive the broadcast cuts and slice up the domains. - ReceiveCuts(domains,nprocs,bcast_tag); + // Receive the broadcast cuts and slice up the domains. + ReceiveCuts(domains, nprocs, bcast_tag); } ////////////////////////////////////////////////////////////////////// -template -NDIndex -CalcBinaryRepartition(FieldLayout& layout, BareField& weights) -{ -// Build a list of domains as we go. - std::vector< NDIndex > domains; // used by TAU_TYPE_STRING - std::vector procs; - - /*out << "Starting CalcBinaryRepartition, outstanding msgs=" - << Ippl::Comm->getReceived() - << endl;*/ - - // Get the processors we'll be dealing with. - int nprocs = Ippl::Comm->getNodes(); - int myproc = Ippl::Comm->myNode(); - domains.reserve(nprocs); - procs.reserve(nprocs); - - // Start the list with just the top level domain. - domains.push_back( layout.getDomain() ); - procs.push_back( nprocs ); - - // mprocs is the max number of procs assigned to a domain. - int mprocs=nprocs; - - // Loop as long as some domain has more than one proc assigned to it. - while ( mprocs>1 ) - { - // Cut all the domains in half. - CutEach(domains,procs,weights); - - // Find the max number of procs assigned to a domain. - mprocs = 0; - for (unsigned int i=0; i +NDIndex CalcBinaryRepartition(FieldLayout& layout, BareField& weights) { + // Build a list of domains as we go. + std::vector > domains; // used by TAU_TYPE_STRING + std::vector procs; + + /*out << "Starting CalcBinaryRepartition, outstanding msgs=" + << Ippl::Comm->getReceived() + << endl;*/ + + // Get the processors we'll be dealing with. + int nprocs = Ippl::Comm->getNodes(); + int myproc = Ippl::Comm->myNode(); + domains.reserve(nprocs); + procs.reserve(nprocs); + + // Start the list with just the top level domain. + domains.push_back(layout.getDomain()); + procs.push_back(nprocs); + + // mprocs is the max number of procs assigned to a domain. + int mprocs = nprocs; + + // Loop as long as some domain has more than one proc assigned to it. + while (mprocs > 1) { + // Cut all the domains in half. + CutEach(domains, procs, weights); + + // Find the max number of procs assigned to a domain. + mprocs = 0; + for (unsigned int i = 0; i < procs.size(); ++i) + if (mprocs < procs[i]) + mprocs = procs[i]; } - // Return the domain on this processor. - - - //seriously dirty fix - typename std::vector< NDIndex >::iterator i; + // Return the domain on this processor. - bool degenerated = false; + // seriously dirty fix + typename std::vector >::iterator i; - for(i = domains.begin();i!=domains.end();++i) - { - for(unsigned int d = 0;d class FieldLayout; + template + class FieldLayout; template std::ostream& operator<<(std::ostream&, const FieldLayout&); // enumeration used to select serial or parallel axes - enum e_dim_tag { SERIAL=0, PARALLEL=1 } ; - - - template - class FieldLayout - { + enum e_dim_tag { + SERIAL = 0, + PARALLEL = 1 + }; + template + class FieldLayout { public: - using NDIndex_t = NDIndex; - using view_type = typename detail::ViewType::view_type; - using host_mirror_type = typename view_type::host_mirror_type; - using face_neighbor_type = std::array, 2 * Dim>; - using edge_neighbor_type = std::array, Dim * (1 << (Dim - 1))>; + using NDIndex_t = NDIndex; + using view_type = typename detail::ViewType::view_type; + using host_mirror_type = typename view_type::host_mirror_type; + using face_neighbor_type = std::array, 2 * Dim>; + using edge_neighbor_type = std::array, Dim * (1 << (Dim - 1))>; using vertex_neighbor_type = std::array; - using match_face_type = std::array; - using match_edge_type = std::array; - using match_vertex_type = std::array; - + using match_face_type = std::array; + using match_edge_type = std::array; + using match_vertex_type = std::array; + struct bound_type { // lower bounds (ordering: x, y, z) std::array lo; // upper bounds (ordering x, y, z) std::array hi; }; - + using face_neighbor_range_type = std::array, 2 * Dim>; - using edge_neighbor_range_type = std::array, Dim * (1 << (Dim - 1))>; + using edge_neighbor_range_type = + std::array, Dim * (1 << (Dim - 1))>; using vertex_neighbor_range_type = std::array; - /*! * Default constructor, which should only be used if you are going to * call 'initialize' soon after (before using in any context) */ FieldLayout(); - FieldLayout(const NDIndex& domain, e_dim_tag *p=0, bool isAllPeriodic=false); + FieldLayout(const NDIndex& domain, e_dim_tag* p = 0, bool isAllPeriodic = false); // Destructor: Everything deletes itself automatically ... the base // class destructors inform all the FieldLayoutUser's we're going away. @@ -83,12 +84,11 @@ namespace ippl { // otherwise these are only called internally by the various non-default // FieldLayout constructors: - void initialize(const NDIndex& domain, e_dim_tag *p=0, bool isAllPeriodic=false); - + void initialize(const NDIndex& domain, e_dim_tag* p = 0, bool isAllPeriodic = false); // Return the domain. const NDIndex& getDomain() const { return gDomain_m; } - + // Compare FieldLayouts to see if they represent the same domain; if // dimensionalities are different, the NDIndex operator==() will return // false: @@ -109,16 +109,14 @@ namespace ippl { // SERIAL or PARALLEL e_dim_tag getDistribution(unsigned int d) const { e_dim_tag retval = PARALLEL; - if (minWidth_m[d] == (unsigned int) gDomain_m[d].length()) + if (minWidth_m[d] == (unsigned int)gDomain_m[d].length()) retval = SERIAL; return retval; } // for the requested dimension, report if the distribution was requested to // be SERIAL or PARALLEL - e_dim_tag getRequestedDistribution(unsigned int d) const { - return requestedLayout_m[d]; - } + e_dim_tag getRequestedDistribution(unsigned int d) const { return requestedLayout_m[d]; } const NDIndex_t& getLocalNDIndex(int rank = Ippl::Comm->rank()) const; @@ -138,7 +136,7 @@ namespace ippl { const edge_neighbor_type& getEdgeNeighbors() const; const vertex_neighbor_type& getVertexNeighbors() const; - + const face_neighbor_range_type& getFaceNeighborsSendRange() const; const edge_neighbor_range_type& getEdgeNeighborsSendRange() const; @@ -152,26 +150,21 @@ namespace ippl { const vertex_neighbor_range_type& getVertexNeighborsRecvRange() const; const match_face_type& getMatchFace() const; - + const match_edge_type& getMatchEdge() const; - + const match_vertex_type& getMatchVertex() const; void findNeighbors(int nghost = 1); - void addNeighbors(NDIndex_t& gnd, - NDIndex_t& nd, - NDIndex_t& ndNeighbor, - NDIndex_t& intersect, - int nghost, - int rank); + void addNeighbors(NDIndex_t& gnd, NDIndex_t& nd, NDIndex_t& ndNeighbor, + NDIndex_t& intersect, int nghost, int rank); void write(std::ostream& = std::cout) const; - - void updateLayout(const std::vector& domains); - bool isAllPeriodic_m; + void updateLayout(const std::vector& domains); + bool isAllPeriodic_m; private: /*! @@ -179,13 +172,13 @@ namespace ippl { * @param inersect the intersection between grown and the remote domain * @param rank the rank of the remote domain */ - void addVertex(const NDIndex_t& grown, const NDIndex_t& intersect, int rank, + void addVertex(const NDIndex_t& grown, const NDIndex_t& intersect, int rank, const bound_type& rangeSend, const bound_type& rangeRecv); - + void addEdge(const NDIndex_t& grown, const NDIndex_t& intersect, int rank, const bound_type& rangeSend, const bound_type& rangeRecv); - void addFace(const NDIndex_t& grown, const NDIndex_t& intersect, int rank, + void addFace(const NDIndex_t& grown, const NDIndex_t& intersect, int rank, const bound_type& rangeSend, const bound_type& rangeRecv); /*! @@ -197,14 +190,11 @@ namespace ippl { * @param offset to map global to local grid point * @param nghost number of ghost cells per dimension */ - bound_type getBounds(const NDIndex_t& nd1, - const NDIndex_t& nd2, - const NDIndex_t& offset, + bound_type getBounds(const NDIndex_t& nd1, const NDIndex_t& nd2, const NDIndex_t& offset, int nghost); int getPeriodicOffset(const NDIndex_t& nd, const unsigned int d, const int k); - private: //! Global domain NDIndex_t gDomain_m; @@ -248,12 +238,12 @@ namespace ippl { * [(x high, y high, z low), (x high, y high, z high)] --> edge 11 */ edge_neighbor_type edgeNeighbors_m; - + match_edge_type matchedge_m; /*! * Neighboring ranks that have the vertex value (corner cell). The value - * is negative, i.e. -1, if the vertex is on a mesh boundary if it is + * is negative, i.e. -1, if the vertex is on a mesh boundary if it is * non-periodic. * x low, y low, z low --> vertex index 0 * x high, y low, z low --> vertex index 1 @@ -273,18 +263,14 @@ namespace ippl { face_neighbor_range_type faceNeighborsSendRange_m, faceNeighborsRecvRange_m; edge_neighbor_range_type edgeNeighborsSendRange_m, edgeNeighborsRecvRange_m; vertex_neighbor_range_type vertexNeighborsSendRange_m, vertexNeighborsRecvRange_m; - }; - - template - inline - std::ostream& operator<<(std::ostream& out, const FieldLayout& f) { + template + inline std::ostream& operator<<(std::ostream& out, const FieldLayout& f) { f.write(out); return out; } -} - +} // namespace ippl #include "FieldLayout/FieldLayout.hpp" diff --git a/src/FieldLayout/FieldLayout.hpp b/src/FieldLayout/FieldLayout.hpp index 34cf92e0e..98868398f 100644 --- a/src/FieldLayout/FieldLayout.hpp +++ b/src/FieldLayout/FieldLayout.hpp @@ -27,7 +27,6 @@ #include "Utility/IpplException.h" #include "Utility/IpplTimings.h" - #include #include @@ -35,12 +34,11 @@ namespace ippl { template FieldLayout::FieldLayout() - : dLocalDomains_m("local domains (device)", 0) - , hLocalDomains_m(Kokkos::create_mirror_view(dLocalDomains_m)) - { + : dLocalDomains_m("local domains (device)", 0) + , hLocalDomains_m(Kokkos::create_mirror_view(dLocalDomains_m)) { for (unsigned int d = 0; d < Dim; ++d) { requestedLayout_m[d] = PARALLEL; - minWidth_m[d] = 0; + minWidth_m[d] = 0; } // We initialize matchface_m, matchedge_m, and matchvertex_m @@ -50,12 +48,12 @@ namespace ippl { * faces with the same ordering as above, and the value represents * the corresponding matching face number from the neighbours. * - * For Dim = 3, matchface_m represents the faces. + * For Dim = 3, matchface_m represents the faces. * For Dim = 2 and Dim = 1, the cells have no faces, * so this array is useless. */ - if constexpr(Dim == 3) { - matchface_m = { 1, 0, 3, 2, 5, 4 }; + if constexpr (Dim == 3) { + matchface_m = {1, 0, 3, 2, 5, 4}; } /*! @@ -66,10 +64,10 @@ namespace ippl { * For Dim = 3 and Dim = 2, marchedge_m represents the edges. * For Dim = 1, it is useless, as there are no matching edges. */ - if constexpr(Dim == 2) { - matchedge_m = { 1, 0, 3, 2 }; - } else if constexpr(Dim == 3) { - matchedge_m = { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8 }; + if constexpr (Dim == 2) { + matchedge_m = {1, 0, 3, 2}; + } else if constexpr (Dim == 3) { + matchedge_m = {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8}; } /*! @@ -80,39 +78,34 @@ namespace ippl { * the neighboring rank if the computational cell is extended by 1 halo * layer in all dimensions. * - * For all Dim, matchvertex_m represents the vertices. + * For all Dim, matchvertex_m represents the vertices. */ - if constexpr(Dim == 1) { - matchvertex_m = { 1, 0 }; - } else if constexpr(Dim == 2) { - matchvertex_m = { 3, 2, 1, 0 }; - } else if constexpr(Dim == 3) { - matchvertex_m = { 7, 6, 5, 4, 3, 2, 1, 0 }; + if constexpr (Dim == 1) { + matchvertex_m = {1, 0}; + } else if constexpr (Dim == 2) { + matchvertex_m = {3, 2, 1, 0}; + } else if constexpr (Dim == 3) { + matchvertex_m = {7, 6, 5, 4, 3, 2, 1, 0}; } - } - template FieldLayout::FieldLayout(const NDIndex& domain, e_dim_tag* p, bool isAllPeriodic) - : FieldLayout() - { + : FieldLayout() { initialize(domain, p, isAllPeriodic); } - template - FieldLayout::~FieldLayout() { } + FieldLayout::~FieldLayout() {} template - void - FieldLayout::updateLayout(const std::vector>& domains) { + void FieldLayout::updateLayout(const std::vector>& domains) { if (domains.empty()) - return; - + return; + for (unsigned int i = 0; i < domains.size(); i++) - hLocalDomains_m(i) = domains[i]; - + hLocalDomains_m(i) = domains[i]; + findNeighbors(); Kokkos::deep_copy(dLocalDomains_m, hLocalDomains_m); @@ -121,11 +114,8 @@ namespace ippl { } template - void - FieldLayout::initialize(const NDIndex& domain, - e_dim_tag* userflags, bool isAllPeriodic) - { - + void FieldLayout::initialize(const NDIndex& domain, e_dim_tag* userflags, + bool isAllPeriodic) { int nRanks = Ippl::Comm->size(); gDomain_m = domain; @@ -140,7 +130,6 @@ namespace ippl { return; } - // If the user did not specify parallel/serial flags then make all parallel. long totparelems = 1; for (unsigned d = 0; d < Dim; ++d) { @@ -176,48 +165,37 @@ namespace ippl { calcWidths(); } - template - const typename FieldLayout::NDIndex_t& - FieldLayout::getLocalNDIndex(int rank) const - { + const typename FieldLayout::NDIndex_t& FieldLayout::getLocalNDIndex(int rank) const { return hLocalDomains_m(rank); } - template - const typename FieldLayout::host_mirror_type - FieldLayout::getHostLocalDomains() const - { + const typename FieldLayout::host_mirror_type FieldLayout::getHostLocalDomains() + const { return hLocalDomains_m; } - template - const typename FieldLayout::view_type - FieldLayout::getDeviceLocalDomains() const - { + const typename FieldLayout::view_type FieldLayout::getDeviceLocalDomains() const { return dLocalDomains_m; } - template - const typename FieldLayout::face_neighbor_type& - FieldLayout::getFaceNeighbors() const { + const typename FieldLayout::face_neighbor_type& FieldLayout::getFaceNeighbors() + const { return faceNeighbors_m; } - template - const typename FieldLayout::edge_neighbor_type& - FieldLayout::getEdgeNeighbors() const { + const typename FieldLayout::edge_neighbor_type& FieldLayout::getEdgeNeighbors() + const { return edgeNeighbors_m; } - template - const typename FieldLayout::vertex_neighbor_type& - FieldLayout::getVertexNeighbors() const { + const typename FieldLayout::vertex_neighbor_type& FieldLayout::getVertexNeighbors() + const { return vertexNeighbors_m; } @@ -227,14 +205,12 @@ namespace ippl { return faceNeighborsSendRange_m; } - template const typename FieldLayout::edge_neighbor_range_type& FieldLayout::getEdgeNeighborsSendRange() const { return edgeNeighborsSendRange_m; } - template const typename FieldLayout::vertex_neighbor_range_type& FieldLayout::getVertexNeighborsSendRange() const { @@ -247,7 +223,6 @@ namespace ippl { return faceNeighborsRecvRange_m; } - template const typename FieldLayout::edge_neighbor_range_type& FieldLayout::getEdgeNeighborsRecvRange() const { @@ -261,26 +236,22 @@ namespace ippl { } template - const typename FieldLayout::match_face_type& - FieldLayout::getMatchFace() const { + const typename FieldLayout::match_face_type& FieldLayout::getMatchFace() const { return matchface_m; } template - const typename FieldLayout::match_edge_type& - FieldLayout::getMatchEdge() const { + const typename FieldLayout::match_edge_type& FieldLayout::getMatchEdge() const { return matchedge_m; } template - const typename FieldLayout::match_vertex_type& - FieldLayout::getMatchVertex() const { + const typename FieldLayout::match_vertex_type& FieldLayout::getMatchVertex() const { return matchvertex_m; } template - void FieldLayout::write(std::ostream& out) const - { + void FieldLayout::write(std::ostream& out) const { if (Ippl::Comm->rank() > 0) { return; } @@ -294,10 +265,8 @@ namespace ippl { } } - template - void FieldLayout::calcWidths() - { + void FieldLayout::calcWidths() { // initialize widths first for (unsigned int d = 0; d < Dim; ++d) { minWidth_m[d] = gDomain_m[d].length(); @@ -305,18 +274,16 @@ namespace ippl { using size_type = typename host_mirror_type::size_type; for (size_type i = 0; i < hLocalDomains_m.size(); ++i) { - const NDIndex_t &dom = hLocalDomains_m(i); + const NDIndex_t& dom = hLocalDomains_m(i); for (unsigned int d = 0; d < Dim; ++d) { - if ((unsigned int) dom[d].length() < minWidth_m[d]) + if ((unsigned int)dom[d].length() < minWidth_m[d]) minWidth_m[d] = dom[d].length(); } } } - template void FieldLayout::findNeighbors(int nghost) { - /* We need to reset the neighbor list * and its ranges because of the repartitioner. */ @@ -334,7 +301,6 @@ namespace ippl { vertexNeighbors_m.fill(-1); - int myRank = Ippl::Comm->rank(); // get my local box @@ -343,8 +309,10 @@ namespace ippl { // grow the box by nghost cells in each dimension auto gnd = nd.grow(nghost); - static IpplTimings::TimerRef findInternalNeighborsTimer = IpplTimings::getTimer("findInternal"); - static IpplTimings::TimerRef findPeriodicNeighborsTimer = IpplTimings::getTimer("findPeriodic"); + static IpplTimings::TimerRef findInternalNeighborsTimer = + IpplTimings::getTimer("findInternal"); + static IpplTimings::TimerRef findPeriodicNeighborsTimer = + IpplTimings::getTimer("findPeriodic"); for (int rank = 0; rank < Ippl::Comm->size(); ++rank) { if (rank == myRank) { // do not compare with my domain @@ -353,81 +321,73 @@ namespace ippl { auto& ndNeighbor = hLocalDomains_m[rank]; IpplTimings::startTimer(findInternalNeighborsTimer); - //For inter-processor neighbors + // For inter-processor neighbors if (gnd.touches(ndNeighbor)) { - auto intersect = gnd.intersect(ndNeighbor); addNeighbors(gnd, nd, ndNeighbor, intersect, nghost, rank); - } IpplTimings::stopTimer(findInternalNeighborsTimer); IpplTimings::startTimer(findPeriodicNeighborsTimer); - if(isAllPeriodic_m) { - + if (isAllPeriodic_m) { int offsetd0, offsetd1, offsetd2; for (unsigned int d0 = 0; d0 < Dim; ++d0) { - //The k loop is for checking whether our local - //domain touches both min. and max. extents of the - //global domain as this can happen in 1D, 2D decompositions - //and also in less no. of cores (like <=4) + // The k loop is for checking whether our local + // domain touches both min. and max. extents of the + // global domain as this can happen in 1D, 2D decompositions + // and also in less no. of cores (like <=4) for (int k0 = 0; k0 < 2; ++k0) { - offsetd0 = getPeriodicOffset(nd, d0, k0); - if(offsetd0 == 0) + if (offsetd0 == 0) continue; - gnd[d0] = gnd[d0] + offsetd0; + gnd[d0] = gnd[d0] + offsetd0; if (gnd.touches(ndNeighbor)) { auto intersect = gnd.intersect(ndNeighbor); ndNeighbor[d0] = ndNeighbor[d0] - offsetd0; - addNeighbors(gnd, nd, ndNeighbor, intersect, - nghost, rank); + addNeighbors(gnd, nd, ndNeighbor, intersect, nghost, rank); ndNeighbor[d0] = ndNeighbor[d0] + offsetd0; } - - //The following loop is to find the periodic edge neighbors of - //the domain in the physical boundary + + // The following loop is to find the periodic edge neighbors of + // the domain in the physical boundary for (unsigned int d1 = d0 + 1; d1 < Dim; ++d1) { for (int k1 = 0; k1 < 2; ++k1) { - offsetd1 = getPeriodicOffset(nd, d1, k1); - if(offsetd1 == 0) + if (offsetd1 == 0) continue; - - gnd[d1] = gnd[d1] + offsetd1; + + gnd[d1] = gnd[d1] + offsetd1; if (gnd.touches(ndNeighbor)) { auto intersect = gnd.intersect(ndNeighbor); ndNeighbor[d0] = ndNeighbor[d0] - offsetd0; ndNeighbor[d1] = ndNeighbor[d1] - offsetd1; - addNeighbors(gnd, nd, ndNeighbor, intersect, - nghost, rank); + addNeighbors(gnd, nd, ndNeighbor, intersect, nghost, rank); ndNeighbor[d0] = ndNeighbor[d0] + offsetd0; ndNeighbor[d1] = ndNeighbor[d1] + offsetd1; } - - //The following loop is to find the vertex neighbors of - //the domain in the physical boundary + + // The following loop is to find the vertex neighbors of + // the domain in the physical boundary for (unsigned int d2 = d1 + 1; d2 < Dim; ++d2) { for (int k2 = 0; k2 < 2; ++k2) { - offsetd2 = getPeriodicOffset(nd, d2, k2); - if(offsetd2 == 0) + if (offsetd2 == 0) continue; - - gnd[d2] = gnd[d2] + offsetd2; + + gnd[d2] = gnd[d2] + offsetd2; if (gnd.touches(ndNeighbor)) { auto intersect = gnd.intersect(ndNeighbor); ndNeighbor[d0] = ndNeighbor[d0] - offsetd0; ndNeighbor[d1] = ndNeighbor[d1] - offsetd1; ndNeighbor[d2] = ndNeighbor[d2] - offsetd2; - addNeighbors(gnd, nd, ndNeighbor, intersect, - nghost, rank); + addNeighbors(gnd, nd, ndNeighbor, intersect, nghost, + rank); ndNeighbor[d0] = ndNeighbor[d0] + offsetd0; ndNeighbor[d1] = ndNeighbor[d1] + offsetd1; ndNeighbor[d2] = ndNeighbor[d2] + offsetd2; } - gnd[d2] = gnd[d2] - offsetd2; + gnd[d2] = gnd[d2] - offsetd2; } } gnd[d1] = gnd[d1] - offsetd1; @@ -440,30 +400,22 @@ namespace ippl { IpplTimings::stopTimer(findPeriodicNeighborsTimer); } } - + template - void FieldLayout::addNeighbors(NDIndex_t& gnd, - NDIndex_t& nd, - NDIndex_t& ndNeighbor, - NDIndex_t& intersect, - int nghost, - int rank) { - - bound_type rangeSend, rangeRecv; - rangeSend = getBounds(nd, ndNeighbor, - nd, nghost); - - rangeRecv = getBounds(ndNeighbor, nd, - nd, nghost); - - int nDim = 0; - for (unsigned int d = 0; d < Dim; ++d) { - const Index& index = intersect[d]; - nDim += (index.length() > 1) ? 1 : 0; - } + void FieldLayout::addNeighbors(NDIndex_t& gnd, NDIndex_t& nd, NDIndex_t& ndNeighbor, + NDIndex_t& intersect, int nghost, int rank) { + bound_type rangeSend, rangeRecv; + rangeSend = getBounds(nd, ndNeighbor, nd, nghost); - switch (nDim) { + rangeRecv = getBounds(ndNeighbor, nd, nd, nghost); + int nDim = 0; + for (unsigned int d = 0; d < Dim; ++d) { + const Index& index = intersect[d]; + nDim += (index.length() > 1) ? 1 : 0; + } + + switch (nDim) { case 0: addVertex(gnd, intersect, rank, rangeSend, rangeRecv); break; @@ -475,22 +427,14 @@ namespace ippl { break; default: throw IpplException( - "FieldLayout::addNeighbors()", - "Failed to identify grid point. Neither a face, edge or vertex grid point."); - } - + "FieldLayout::addNeighbors()", + "Failed to identify grid point. Neither a face, edge or vertex grid point."); + } } - - - template - void FieldLayout::addVertex(const NDIndex_t& grown, - const NDIndex_t& intersect, - int rank, - const bound_type& rangeSend, - const bound_type& rangeRecv) - { + void FieldLayout::addVertex(const NDIndex_t& grown, const NDIndex_t& intersect, int rank, + const bound_type& rangeSend, const bound_type& rangeRecv) { /* The following routine computes the correct index * of the vertex. * @@ -502,7 +446,6 @@ namespace ippl { */ size_t index = 0; for (size_t d = 0; d < Dim; ++d) { - /* if lower --> 0 * else upper --> 1 */ @@ -515,27 +458,20 @@ namespace ippl { PAssert(index < vertexNeighbors_m.size()); - vertexNeighbors_m[index] = rank; + vertexNeighbors_m[index] = rank; vertexNeighborsSendRange_m[index] = rangeSend; vertexNeighborsRecvRange_m[index] = rangeRecv; - } - template - void FieldLayout::addEdge(const NDIndex_t& grown, - const NDIndex_t& intersect, - int rank, - const bound_type& rangeSend, - const bound_type& rangeRecv) - { + void FieldLayout::addEdge(const NDIndex_t& grown, const NDIndex_t& intersect, int rank, + const bound_type& rangeSend, const bound_type& rangeRecv) { int nEdgesPerDim = (1 << (Dim - 1)); size_t index = 0; int num = 1; for (size_t d = 0; d < Dim; ++d) { - if (intersect[d].length() == 1) { const bool isLower = (grown[d].first() == intersect[d].first()); index += (isLower) ? 0 : num; @@ -543,7 +479,6 @@ namespace ippl { continue; } - int jump = d * nEdgesPerDim; index += jump; } @@ -555,14 +490,9 @@ namespace ippl { edgeNeighborsRecvRange_m[index].push_back(rangeRecv); } - template - void FieldLayout::addFace(const NDIndex_t& grown, - const NDIndex_t& intersect, - int rank, - const bound_type& rangeSend, - const bound_type& rangeRecv) - { + void FieldLayout::addFace(const NDIndex_t& grown, const NDIndex_t& intersect, int rank, + const bound_type& rangeSend, const bound_type& rangeRecv) { for (unsigned int d = 0; d < Dim; ++d) { const Index& index = intersect[d]; @@ -592,14 +522,12 @@ namespace ippl { } } } - + template - typename FieldLayout::bound_type - FieldLayout::getBounds(const NDIndex_t& nd1, - const NDIndex_t& nd2, - const NDIndex_t& offset, - int nghost) - { + typename FieldLayout::bound_type FieldLayout::getBounds(const NDIndex_t& nd1, + const NDIndex_t& nd2, + const NDIndex_t& offset, + int nghost) { NDIndex gnd = nd2.grow(nghost); NDIndex overlap = gnd.intersect(nd1); @@ -611,35 +539,32 @@ namespace ippl { */ for (size_t i = 0; i < Dim; ++i) { intersect.lo[i] = overlap[i].first() - offset[i].first() /*offset*/ + nghost; - intersect.hi[i] = overlap[i].last() - offset[i].first() /*offset*/ + nghost + 1; + intersect.hi[i] = overlap[i].last() - offset[i].first() /*offset*/ + nghost + 1; } return intersect; } - + template - int FieldLayout::getPeriodicOffset(const NDIndex_t& nd, - const unsigned int d, - const int k) - { - int offset=0; - switch(k) { + int FieldLayout::getPeriodicOffset(const NDIndex_t& nd, const unsigned int d, + const int k) { + int offset = 0; + switch (k) { case 0: - if(nd[d].max() == gDomain_m[d].max()) + if (nd[d].max() == gDomain_m[d].max()) offset = -gDomain_m[d].length(); break; case 1: - if(nd[d].min() == gDomain_m[d].min()) + if (nd[d].min() == gDomain_m[d].min()) offset = gDomain_m[d].length(); break; default: - throw IpplException("FieldLayout:getPeriodicOffset", - "k has to be either 0 or 1"); + throw IpplException("FieldLayout:getPeriodicOffset", "k has to be either 0 or 1"); } - + return offset; } -} +} // namespace ippl diff --git a/src/FieldLayout/FieldLayoutUser.h b/src/FieldLayout/FieldLayoutUser.h index a687679d0..659c008aa 100644 --- a/src/FieldLayout/FieldLayoutUser.h +++ b/src/FieldLayout/FieldLayoutUser.h @@ -2,14 +2,14 @@ /*************************************************************************** * * The IPPL Framework - * + * ***************************************************************************/ #ifndef FIELD_LAYOUT_USER_H #define FIELD_LAYOUT_USER_H /*********************************************************************** - * + * * FieldLayoutUser is a base class for all classes which need to use * a FieldLayout - it is derived from User, which provides a virtual * function 'notifyUserOfDelete' which is called when the FieldLayout @@ -22,26 +22,24 @@ #include "Utility/User.h" #include "Utility/UserList.h" - // class definition class FieldLayoutUser : public User { - public: - // constructor - the base class selects a unique ID value - FieldLayoutUser() {}; + // constructor - the base class selects a unique ID value + FieldLayoutUser(){}; - // destructor, nothing to do here - virtual ~FieldLayoutUser() {}; + // destructor, nothing to do here + virtual ~FieldLayoutUser(){}; - // - // virtual functions for FieldLayoutUser's - // + // + // virtual functions for FieldLayoutUser's + // - // Repartition onto a new layout - virtual void Repartition(UserList *) = 0; + // Repartition onto a new layout + virtual void Repartition(UserList*) = 0; }; -#endif // FIELD_LAYOUT_USER_H +#endif // FIELD_LAYOUT_USER_H // vi: set et ts=4 sw=4 sts=4: // Local Variables: diff --git a/src/Index/Index.h b/src/Index/Index.h index 8aa120508..86ef03fce 100644 --- a/src/Index/Index.h +++ b/src/Index/Index.h @@ -50,113 +50,90 @@ #include namespace ippl { - class Index - { + class Index { public: - class iterator - { + class iterator { public: iterator() - : current_m(0) - , stride_m(0) - { } + : current_m(0) + , stride_m(0) {} iterator(int current, int stride = 1) - : current_m(current) - , stride_m(stride) - { } + : current_m(current) + , stride_m(stride) {} - int operator*() { return current_m ; } + int operator*() { return current_m; } - iterator operator--(int) - { + iterator operator--(int) { iterator tmp = *this; - current_m -= stride_m; // Post decrement + current_m -= stride_m; // Post decrement return tmp; } - iterator& operator--() - { + iterator& operator--() { current_m -= stride_m; return (*this); } - iterator operator++(int) - { + iterator operator++(int) { iterator tmp = *this; - current_m += stride_m; // Post increment + current_m += stride_m; // Post increment return tmp; } - iterator& operator++() - { + iterator& operator++() { current_m += stride_m; return (*this); } - iterator& operator+=(int i) - { + iterator& operator+=(int i) { current_m += (stride_m * i); return *this; } - iterator& operator-=(int i) - { + iterator& operator-=(int i) { current_m -= (stride_m * i); return *this; } - iterator operator+(int i) const - { - return iterator(current_m + i * stride_m, stride_m); - } + iterator operator+(int i) const { return iterator(current_m + i * stride_m, stride_m); } - iterator operator-(int i) const - { - return iterator(current_m - i * stride_m, stride_m); - } + iterator operator-(int i) const { return iterator(current_m - i * stride_m, stride_m); } - int operator[](int i) const - { - return current_m + i * stride_m; - } + int operator[](int i) const { return current_m + i * stride_m; } - bool operator==(const iterator &y) const - { + bool operator==(const iterator& y) const { return (current_m == y.current_m) && (stride_m == y.stride_m); } - bool operator<(const iterator &y) const - { - return (current_m < y.current_m)|| - ((current_m==y.current_m)&&(stride_m (const iterator &y) const { return y < (*this); } + bool operator>(const iterator& y) const { return y < (*this); } - bool operator<=(const iterator &y) const { return !(y < (*this)); } + bool operator<=(const iterator& y) const { return !(y < (*this)); } - bool operator>=(const iterator &y) const { return !((*this) < y); } + bool operator>=(const iterator& y) const { return !((*this) < y); } private: int current_m; int stride_m; }; - + /*! * Instantiate Index without any range. */ - KOKKOS_INLINE_FUNCTION - Index(); + KOKKOS_INLINE_FUNCTION Index(); /*! * Instantiate Index with range [0, ..., n-1] * @param n number of elements */ - KOKKOS_INLINE_FUNCTION - Index(size_t n); + KOKKOS_INLINE_FUNCTION Index(size_t n); /*! * Instantiate Index with user-defined lower and upper @@ -164,8 +141,7 @@ namespace ippl { * @param f first element * @param l last element */ - KOKKOS_INLINE_FUNCTION - Index(int f, int l); + KOKKOS_INLINE_FUNCTION Index(int f, int l); /*! * First to Last using Step. @@ -173,8 +149,7 @@ namespace ippl { * @param l last element * @param s step */ - KOKKOS_INLINE_FUNCTION - Index(int f, int l, int s); + KOKKOS_INLINE_FUNCTION Index(int f, int l, int s); KOKKOS_DEFAULTED_FUNCTION ~Index() = default; @@ -182,94 +157,72 @@ namespace ippl { /*! * @returns the smallest element */ - KOKKOS_INLINE_FUNCTION - int min() const noexcept; + KOKKOS_INLINE_FUNCTION int min() const noexcept; /*! * @returns the largest element */ - KOKKOS_INLINE_FUNCTION - int max() const noexcept; + KOKKOS_INLINE_FUNCTION int max() const noexcept; /*! * @returns the number of elements */ - KOKKOS_INLINE_FUNCTION - size_t length() const noexcept; + KOKKOS_INLINE_FUNCTION size_t length() const noexcept; /*! * @returns the stride */ - KOKKOS_INLINE_FUNCTION - int stride() const noexcept; + KOKKOS_INLINE_FUNCTION int stride() const noexcept; /*! * @returns the first element */ - KOKKOS_INLINE_FUNCTION - int first() const noexcept; + KOKKOS_INLINE_FUNCTION int first() const noexcept; /*! * @returns the last element */ - KOKKOS_INLINE_FUNCTION - int last() const noexcept; + KOKKOS_INLINE_FUNCTION int last() const noexcept; /*! * @returns true if empty, otherwise false */ - KOKKOS_INLINE_FUNCTION - bool empty() const noexcept; + KOKKOS_INLINE_FUNCTION bool empty() const noexcept; // Additive operations. - KOKKOS_INLINE_FUNCTION - friend Index operator+(const Index&,int); + KOKKOS_INLINE_FUNCTION friend Index operator+(const Index&, int); - KOKKOS_INLINE_FUNCTION - friend Index operator+(int,const Index&); + KOKKOS_INLINE_FUNCTION friend Index operator+(int, const Index&); - KOKKOS_INLINE_FUNCTION - friend Index operator-(const Index&,int); + KOKKOS_INLINE_FUNCTION friend Index operator-(const Index&, int); - KOKKOS_INLINE_FUNCTION - friend Index operator-(int,const Index&); + KOKKOS_INLINE_FUNCTION friend Index operator-(int, const Index&); // Multipplicative operations. - KOKKOS_INLINE_FUNCTION - friend Index operator-(const Index&); + KOKKOS_INLINE_FUNCTION friend Index operator-(const Index&); - KOKKOS_INLINE_FUNCTION - friend Index operator*(const Index&,int); + KOKKOS_INLINE_FUNCTION friend Index operator*(const Index&, int); - KOKKOS_INLINE_FUNCTION - friend Index operator*(int,const Index&); + KOKKOS_INLINE_FUNCTION friend Index operator*(int, const Index&); - KOKKOS_INLINE_FUNCTION - friend Index operator/(const Index&,int); + KOKKOS_INLINE_FUNCTION friend Index operator/(const Index&, int); // Intersect with another Index. - KOKKOS_INLINE_FUNCTION - Index intersect(const Index &) const; + KOKKOS_INLINE_FUNCTION Index intersect(const Index&) const; // Intersect with another Index. - KOKKOS_INLINE_FUNCTION - Index grow(int ncells) const; + KOKKOS_INLINE_FUNCTION Index grow(int ncells) const; // Test to see if there is any overlap between two Indexes. - KOKKOS_INLINE_FUNCTION - bool touches (const Index&a) const; + KOKKOS_INLINE_FUNCTION bool touches(const Index& a) const; // Test to see if one contains another (endpoints only) - KOKKOS_INLINE_FUNCTION - bool contains(const Index&a) const; + KOKKOS_INLINE_FUNCTION bool contains(const Index& a) const; // Split one into two. - KOKKOS_INLINE_FUNCTION - bool split(Index& l, Index& r) const; + KOKKOS_INLINE_FUNCTION bool split(Index& l, Index& r) const; // Split one into two at index i. - KOKKOS_INLINE_FUNCTION - bool split(Index& l, Index& r, int i) const; + KOKKOS_INLINE_FUNCTION bool split(Index& l, Index& r, int i) const; // Split index into two with a ratio between 0 and 1. - KOKKOS_INLINE_FUNCTION - bool split(Index& l, Index& r, double a) const; + KOKKOS_INLINE_FUNCTION bool split(Index& l, Index& r, double a) const; // iterator begin iterator begin() { return iterator(first_m, stride_m); } @@ -277,50 +230,42 @@ namespace ippl { iterator end() { return iterator(first_m + stride_m * length_m, stride_m); } // An operator< so we can impose some sort of ordering. - KOKKOS_INLINE_FUNCTION - bool operator<(const Index& r) const - { - return ( (length_m< r.length_m) || - ( (length_m==r.length_m) && ( (first_m0) && (stride_m 0) && (stride_m < r.stride_m))))); } // Test for equality. - KOKKOS_INLINE_FUNCTION - bool operator==(const Index& r) const noexcept - { - return (length_m==r.length_m) && (first_m==r.first_m) && (stride_m==r.stride_m); + KOKKOS_INLINE_FUNCTION bool operator==(const Index& r) const noexcept { + return (length_m == r.length_m) && (first_m == r.first_m) && (stride_m == r.stride_m); } private: - int first_m; /// First index element + int first_m; /// First index element int stride_m; - size_t length_m; /// The number of elements - + size_t length_m; /// The number of elements + // Make an Index that interally counts the other direction. - KOKKOS_INLINE_FUNCTION - Index reverse() const; + KOKKOS_INLINE_FUNCTION Index reverse() const; // Construct with a given base. This is private because // the interface shouldn't depend on how this is done. - KOKKOS_INLINE_FUNCTION - Index(int m, int a, const Index &b); + KOKKOS_INLINE_FUNCTION Index(int m, int a, const Index& b); - KOKKOS_INLINE_FUNCTION - Index(int f, int s, const Index *b); + KOKKOS_INLINE_FUNCTION Index(int f, int s, const Index* b); // Do a general intersect if the strides are not both 1. - KOKKOS_INLINE_FUNCTION - Index general_intersect(const Index&) const; + KOKKOS_INLINE_FUNCTION Index general_intersect(const Index&) const; }; - - inline - std::ostream& operator<<(std::ostream& out, const Index& I) { + inline std::ostream& operator<<(std::ostream& out, const Index& I) { out << '[' << I.first() << ':' << I.last() << ':' << I.stride() << ']'; return out; } -} +} // namespace ippl #include "Index/Index.hpp" diff --git a/src/Index/Index.hpp b/src/Index/Index.hpp index e890cb528..1f06c6d55 100644 --- a/src/Index/Index.hpp +++ b/src/Index/Index.hpp @@ -46,217 +46,158 @@ namespace ippl { - KOKKOS_INLINE_FUNCTION - Index::Index() - : first_m(0) - , stride_m(0) - , length_m(0) - { } - - KOKKOS_INLINE_FUNCTION - Index::Index(size_t n) - : first_m(0) - , stride_m(1) - , length_m(n) - { } - - - KOKKOS_INLINE_FUNCTION - Index::Index(int f, int l) - : first_m(f) - , stride_m(1) - , length_m(l-f+1) - { + KOKKOS_INLINE_FUNCTION Index::Index() + : first_m(0) + , stride_m(0) + , length_m(0) {} + + KOKKOS_INLINE_FUNCTION Index::Index(size_t n) + : first_m(0) + , stride_m(1) + , length_m(n) {} + + KOKKOS_INLINE_FUNCTION Index::Index(int f, int l) + : first_m(f) + , stride_m(1) + , length_m(l - f + 1) { PAssert_GE(l - f + 1, 0); } - - KOKKOS_INLINE_FUNCTION - Index::Index(int f, int l, int s) - : first_m(f) - , stride_m(s) - { + KOKKOS_INLINE_FUNCTION Index::Index(int f, int l, int s) + : first_m(f) + , stride_m(s) { PAssert_NE(s, 0); - if ( f==l ) { + if (f == l) { length_m = 1; - } - else if ( (l>f) ^ (s<0) ) { - length_m = (l-f)/s + 1; - } - else { + } else if ((l > f) ^ (s < 0)) { + length_m = (l - f) / s + 1; + } else { length_m = 0; } } + KOKKOS_INLINE_FUNCTION Index::Index(int m, int a, const Index& b) + : first_m(b.first_m * m + a) + , stride_m(b.stride_m * m) + , length_m(b.length_m) {} - KOKKOS_INLINE_FUNCTION - Index::Index(int m, int a, const Index &b) - : first_m(b.first_m*m+a) - , stride_m(b.stride_m*m) - , length_m(b.length_m) - { } - - - KOKKOS_INLINE_FUNCTION - Index::Index(int f, int s, const Index *b) - : first_m(f) - , stride_m(s) - , length_m(b->length_m) - { } + KOKKOS_INLINE_FUNCTION Index::Index(int f, int s, const Index* b) + : first_m(f) + , stride_m(s) + , length_m(b->length_m) {} - - KOKKOS_INLINE_FUNCTION - int Index::first() const noexcept { + KOKKOS_INLINE_FUNCTION int Index::first() const noexcept { return first_m; } - - KOKKOS_INLINE_FUNCTION - int Index::stride() const noexcept { + KOKKOS_INLINE_FUNCTION int Index::stride() const noexcept { return stride_m; } - - KOKKOS_INLINE_FUNCTION - bool Index::empty() const noexcept { + KOKKOS_INLINE_FUNCTION bool Index::empty() const noexcept { return length_m == 0; } - - KOKKOS_INLINE_FUNCTION - size_t Index::length() const noexcept { + KOKKOS_INLINE_FUNCTION size_t Index::length() const noexcept { return length_m; } - - KOKKOS_INLINE_FUNCTION - int Index::last() const noexcept { + KOKKOS_INLINE_FUNCTION int Index::last() const noexcept { return (length_m == 0) ? first_m : first_m + stride_m * (length_m - 1); } - - KOKKOS_INLINE_FUNCTION - int Index::min() const noexcept { + KOKKOS_INLINE_FUNCTION int Index::min() const noexcept { return (stride_m >= 0) ? first_m : first_m + stride_m * (length_m - 1); } - - KOKKOS_INLINE_FUNCTION - int Index::max() const noexcept { + KOKKOS_INLINE_FUNCTION int Index::max() const noexcept { return (stride_m >= 0) ? first_m + stride_m * (length_m - 1) : first_m; } - - KOKKOS_INLINE_FUNCTION - Index operator+(const Index& i, int off) { - return Index(1,off,i); + KOKKOS_INLINE_FUNCTION Index operator+(const Index& i, int off) { + return Index(1, off, i); } - - KOKKOS_INLINE_FUNCTION - Index operator+(int off, const Index& i) { - return Index(1,off,i); + KOKKOS_INLINE_FUNCTION Index operator+(int off, const Index& i) { + return Index(1, off, i); } - - KOKKOS_INLINE_FUNCTION - Index operator-(const Index& i, int off) { - return Index(1,-off,i); + KOKKOS_INLINE_FUNCTION Index operator-(const Index& i, int off) { + return Index(1, -off, i); } - - KOKKOS_INLINE_FUNCTION - Index operator-(int off, const Index& i) { - return Index(-1,off,i); + KOKKOS_INLINE_FUNCTION Index operator-(int off, const Index& i) { + return Index(-1, off, i); } - - KOKKOS_INLINE_FUNCTION - Index operator-(const Index& i) { - return Index(-1,0,i); + KOKKOS_INLINE_FUNCTION Index operator-(const Index& i) { + return Index(-1, 0, i); } - - KOKKOS_INLINE_FUNCTION - Index operator*(const Index& i, int m) { - return Index(m,0,i); + KOKKOS_INLINE_FUNCTION Index operator*(const Index& i, int m) { + return Index(m, 0, i); } - - KOKKOS_INLINE_FUNCTION - Index operator*(int m, const Index& i) { - return Index(m,0,i); + KOKKOS_INLINE_FUNCTION Index operator*(int m, const Index& i) { + return Index(m, 0, i); } - - KOKKOS_INLINE_FUNCTION - Index operator/(const Index& i, int d) { - return Index(i.first_m/d, i.stride_m/d, &i); + KOKKOS_INLINE_FUNCTION Index operator/(const Index& i, int d) { + return Index(i.first_m / d, i.stride_m / d, &i); } - - KOKKOS_INLINE_FUNCTION - Index Index::reverse() const { + KOKKOS_INLINE_FUNCTION Index Index::reverse() const { Index j; - j.first_m = last(); + j.first_m = last(); j.length_m = length_m; j.stride_m = -stride_m; return j; } - - KOKKOS_INLINE_FUNCTION - bool Index::touches(const Index&a) const { + KOKKOS_INLINE_FUNCTION bool Index::touches(const Index& a) const { return (min() <= a.max()) && (max() >= a.min()); } - - KOKKOS_INLINE_FUNCTION - bool Index::contains(const Index&a) const { + KOKKOS_INLINE_FUNCTION bool Index::contains(const Index& a) const { return (min() <= a.min()) && (max() >= a.max()); } - - KOKKOS_INLINE_FUNCTION - bool Index::split(Index& l, Index& r) const { + KOKKOS_INLINE_FUNCTION bool Index::split(Index& l, Index& r) const { PAssert_EQ(stride_m, 1); PAssert_GT(length_m, 1); - int first = first_m; + int first = first_m; int length = length_m; - int mid = first + length/2 - 1; - l = Index(first, mid); - r = Index(mid+1,first+length-1); + int mid = first + length / 2 - 1; + l = Index(first, mid); + r = Index(mid + 1, first + length - 1); return true; } - KOKKOS_INLINE_FUNCTION - bool Index::split(Index& l, Index& r, int i) const { + KOKKOS_INLINE_FUNCTION bool Index::split(Index& l, Index& r, int i) const { PAssert_EQ(stride_m, 1); PAssert_GT(length_m, 1); - int first = first_m; + int first = first_m; int length = length_m; if (i >= (first + length)) - return false; + return false; l = Index(first, i); - r = Index(i+1,first+length-1); + r = Index(i + 1, first + length - 1); return true; } - KOKKOS_INLINE_FUNCTION - bool Index::split(Index& l, Index& r, double a) const { + KOKKOS_INLINE_FUNCTION bool Index::split(Index& l, Index& r, double a) const { PAssert_EQ(stride_m, 1); PAssert_GT(length_m, 1); PAssert_LT(a, 1.0); PAssert_GT(a, 0.0); - int first = first_m; + int first = first_m; int length = length_m; - int mid = first + static_cast(length*a+0.5) - 1; - l = Index(first, mid); - r = Index(mid+1,first+length-1); + int mid = first + static_cast(length * a + 0.5) - 1; + l = Index(first, mid); + r = Index(mid + 1, first + length - 1); return true; } - ////////////////////////////////////////////////////////////////////// // Calculate the least common multipple of s1 and s2. // put the result in s. @@ -264,25 +205,20 @@ namespace ippl { // This version is optimized for small s1 and s2 and // just uses an exhaustive search. ////////////////////////////////////////////////////////////////////// - KOKKOS_INLINE_FUNCTION - void lcm(int s1, int s2, int &s, int &m1, int &m2) - { - PAssert_GT(s1, 0); // For simplicity, make some assumptions. + KOKKOS_INLINE_FUNCTION void lcm(int s1, int s2, int& s, int& m1, int& m2) { + PAssert_GT(s1, 0); // For simplicity, make some assumptions. PAssert_GT(s2, 0); - int i1=s1; - int i2=s2; + int i1 = s1; + int i2 = s2; int _m1 = 1; int _m2 = 1; - if (i2 rf ? lf : rf; - int l = ll < rl ? ll : rl; - ret.first_m = f; - ret.length_m = ( (l>=f) ? l-f+1 : 0 ); + if ((stride() == 1) && (rhs.stride() == 1)) { + int lf = first(); + int rf = rhs.first(); + int ll = last(); + int rl = rhs.last(); + int f = lf > rf ? lf : rf; + int l = ll < rl ? ll : rl; + ret.first_m = f; + ret.length_m = ((l >= f) ? l - f + 1 : 0); ret.stride_m = 1; - } - else + } else ret = general_intersect(rhs); return ret; } - KOKKOS_INLINE_FUNCTION - Index Index::grow(int ncells) const { + KOKKOS_INLINE_FUNCTION Index Index::grow(int ncells) const { Index index; - index.first_m = this->first_m - ncells; + index.first_m = this->first_m - ncells; index.length_m = this->length_m + 2 * ncells; index.stride_m = this->stride_m; return index; } + KOKKOS_INLINE_FUNCTION static Index do_intersect(const Index& a, const Index& b) { + PAssert_GT(a.stride(), 0); // This should be assured by the + PAssert_GT(b.stride(), 0); // caller of this function. - KOKKOS_INLINE_FUNCTION - static Index do_intersect(const Index &a, const Index &b) - { - PAssert_GT(a.stride(), 0); // This should be assured by the - PAssert_GT(b.stride(), 0); // caller of this function. - - int newStride; // The stride for the new index is - int a_mul,b_mul; // a_mul=newStride/a.stride() ... - lcm(a.stride(),b.stride(), // The input strides... - newStride,a_mul,b_mul); // the lcm of the strides of a and b. + int newStride; // The stride for the new index is + int a_mul, b_mul; // a_mul=newStride/a.stride() ... + lcm(a.stride(), b.stride(), // The input strides... + newStride, a_mul, b_mul); // the lcm of the strides of a and b. // Find the offset from a.first() in units of newStride // that puts the ranges close together. - int a_i = (b.first()-a.first())/a.stride(); - int a_off = a.first() + a_i*a.stride(); - if (a_off < b.first()) - { + int a_i = (b.first() - a.first()) / a.stride(); + int a_off = a.first() + a_i * a.stride(); + if (a_off < b.first()) { a_i++; a_off += a.stride(); } - PAssert_GE(a_off, b.first()); // make sure I'm understanding this right... + PAssert_GE(a_off, b.first()); // make sure I'm understanding this right... // Now do an exhaustive search for the first point in common. // Count over all possible offsets for a. - for (int a_m=0;(a_m that.max()) || (that.min() > max()) ) + if ((min() > that.max()) || (that.min() > max())) return Index(0); - if ( (stride_m==0) || (that.stride_m==0) ) + if ((stride_m == 0) || (that.stride_m == 0)) return Index(0); // If one or the other counts -ve, reverse it and intersect result. - if ( that.stride_m < 0 ) + if (that.stride_m < 0) return intersect(that.reverse()); - if ( stride_m < 0 ) - { + if (stride_m < 0) { Index r; r = reverse().intersect(that).reverse(); return r; @@ -414,11 +332,11 @@ namespace ippl { // Getting closer to the real thing: intersect them. // Pass the one that starts lower as the first argument. Index r; - if ( first_m < that.first_m ) - r = do_intersect(*this,that); + if (first_m < that.first_m) + r = do_intersect(*this, that); else - r = do_intersect(that,*this); + r = do_intersect(that, *this); return r; } -} +} // namespace ippl diff --git a/src/Index/NDIndex.h b/src/Index/NDIndex.h index 6b3b71f45..4eaf49e2d 100644 --- a/src/Index/NDIndex.h +++ b/src/Index/NDIndex.h @@ -29,73 +29,59 @@ namespace ippl { * @tparam Dim the number of index dimensions */ template - class NDIndex - { + class NDIndex { public: KOKKOS_FUNCTION NDIndex() {} template - KOKKOS_FUNCTION - NDIndex(const Args&... args); + KOKKOS_FUNCTION NDIndex(const Args&... args); /*! * @returns a reference to any of the Indexes. */ - KOKKOS_INLINE_FUNCTION - const ippl::Index& operator[](unsigned d) const noexcept; + KOKKOS_INLINE_FUNCTION const ippl::Index& operator[](unsigned d) const noexcept; - KOKKOS_INLINE_FUNCTION - Index& operator[](unsigned d) noexcept; + KOKKOS_INLINE_FUNCTION Index& operator[](unsigned d) noexcept; /*! * @returns the total size. */ - KOKKOS_INLINE_FUNCTION - unsigned size() const noexcept; + KOKKOS_INLINE_FUNCTION unsigned size() const noexcept; /*! * @returns true if empty. */ - KOKKOS_INLINE_FUNCTION - bool empty() const noexcept; + KOKKOS_INLINE_FUNCTION bool empty() const noexcept; /*! * Intersect with another NDIndex. */ - KOKKOS_INLINE_FUNCTION - NDIndex intersect(const NDIndex&) const; + KOKKOS_INLINE_FUNCTION NDIndex intersect(const NDIndex&) const; /*! * Intersect with another NDIndex. */ - KOKKOS_INLINE_FUNCTION - NDIndex grow(int ncells) const; - - KOKKOS_INLINE_FUNCTION - NDIndex grow(int ncells, unsigned int dim) const; + KOKKOS_INLINE_FUNCTION NDIndex grow(int ncells) const; + KOKKOS_INLINE_FUNCTION NDIndex grow(int ncells, unsigned int dim) const; - KOKKOS_INLINE_FUNCTION - bool touches(const NDIndex&) const; + KOKKOS_INLINE_FUNCTION bool touches(const NDIndex&) const; - KOKKOS_INLINE_FUNCTION - bool contains(const NDIndex& a) const; + KOKKOS_INLINE_FUNCTION bool contains(const NDIndex& a) const; // Split on dimension d with at position i - KOKKOS_INLINE_FUNCTION - bool split(NDIndex& l, NDIndex& r, unsigned d, int i) const; + KOKKOS_INLINE_FUNCTION bool split(NDIndex& l, NDIndex& r, unsigned d, + int i) const; // Split on dimension d with the given ratio 0& l, NDIndex& r, unsigned d, double a) const; + KOKKOS_INLINE_FUNCTION bool split(NDIndex& l, NDIndex& r, unsigned d, + double a) const; // Split on dimension d, or the longest dimension. - KOKKOS_INLINE_FUNCTION - bool split(NDIndex& l, NDIndex& r, unsigned d) const; + KOKKOS_INLINE_FUNCTION bool split(NDIndex& l, NDIndex& r, unsigned d) const; - KOKKOS_INLINE_FUNCTION - bool split(NDIndex& l, NDIndex& r) const; + KOKKOS_INLINE_FUNCTION bool split(NDIndex& l, NDIndex& r) const; private: KOKKOS_FUNCTION @@ -104,7 +90,7 @@ namespace ippl { //! Array of indices Index indices_m[Dim]; }; -} +} // namespace ippl #include "Index/NDIndex.hpp" diff --git a/src/Index/NDIndex.hpp b/src/Index/NDIndex.hpp index f78b0f466..cc5cbdb39 100644 --- a/src/Index/NDIndex.hpp +++ b/src/Index/NDIndex.hpp @@ -21,18 +21,13 @@ namespace ippl { template template - KOKKOS_FUNCTION - NDIndex::NDIndex(const Args&... args) - : NDIndex({args...}) - { - static_assert(Dim == sizeof...(args), - "Wrong number of arguments."); + KOKKOS_FUNCTION NDIndex::NDIndex(const Args&... args) + : NDIndex({args...}) { + static_assert(Dim == sizeof...(args), "Wrong number of arguments."); } - template - KOKKOS_FUNCTION - NDIndex::NDIndex(std::initializer_list indices) { + KOKKOS_FUNCTION NDIndex::NDIndex(std::initializer_list indices) { unsigned int i = 0; for (auto& index : indices) { indices_m[i] = index; @@ -40,24 +35,18 @@ namespace ippl { } } - template - KOKKOS_INLINE_FUNCTION - const Index& NDIndex::operator[](unsigned d) const noexcept { + KOKKOS_INLINE_FUNCTION const Index& NDIndex::operator[](unsigned d) const noexcept { return indices_m[d]; } - template - KOKKOS_INLINE_FUNCTION - Index& NDIndex::operator[](unsigned d) noexcept { + KOKKOS_INLINE_FUNCTION Index& NDIndex::operator[](unsigned d) noexcept { return indices_m[d]; } - template - KOKKOS_INLINE_FUNCTION - unsigned NDIndex::size() const noexcept { + KOKKOS_INLINE_FUNCTION unsigned NDIndex::size() const noexcept { unsigned s = indices_m[0].length(); for (unsigned int d = 1; d < Dim; ++d) { s *= indices_m[d].length(); @@ -65,10 +54,8 @@ namespace ippl { return s; } - - template - KOKKOS_INLINE_FUNCTION - bool NDIndex::empty() const noexcept { + template + KOKKOS_INLINE_FUNCTION bool NDIndex::empty() const noexcept { bool r = false; for (unsigned d = 0; d < Dim; ++d) { r = r || indices_m[d].empty(); @@ -76,21 +63,16 @@ namespace ippl { return r; } - template - inline std::ostream& - operator<<(std::ostream& out, const NDIndex& idx) { + inline std::ostream& operator<<(std::ostream& out, const NDIndex& idx) { out << '{'; for (unsigned d = 0; d < Dim; ++d) - out << idx[d] << ((d==Dim-1) ? '}' : ','); + out << idx[d] << ((d == Dim - 1) ? '}' : ','); return out; } - template - KOKKOS_INLINE_FUNCTION - NDIndex NDIndex::intersect(const NDIndex& ndi) const - { + KOKKOS_INLINE_FUNCTION NDIndex NDIndex::intersect(const NDIndex& ndi) const { NDIndex r; for (unsigned d = 0; d < Dim; ++d) r[d] = indices_m[d].intersect(ndi[d]); @@ -98,9 +80,7 @@ namespace ippl { } template - KOKKOS_INLINE_FUNCTION - NDIndex NDIndex::grow(int ncells) const - { + KOKKOS_INLINE_FUNCTION NDIndex NDIndex::grow(int ncells) const { NDIndex r; for (unsigned d = 0; d < Dim; ++d) r[d] = indices_m[d].grow(ncells); @@ -108,81 +88,67 @@ namespace ippl { } template - KOKKOS_INLINE_FUNCTION - NDIndex NDIndex::grow(int ncells, unsigned int dim) const - { + KOKKOS_INLINE_FUNCTION NDIndex NDIndex::grow(int ncells, unsigned int dim) const { NDIndex r = *this; - r[dim] = indices_m[dim].grow(ncells); + r[dim] = indices_m[dim].grow(ncells); return r; } template - KOKKOS_INLINE_FUNCTION - bool NDIndex::touches(const NDIndex& a) const - { + KOKKOS_INLINE_FUNCTION bool NDIndex::touches(const NDIndex& a) const { bool touch = true; for (unsigned int d = 0; (d < Dim) && touch; ++d) touch = touch && indices_m[d].touches(a.indices_m[d]); return touch; } - template - KOKKOS_INLINE_FUNCTION - bool NDIndex::contains(const NDIndex& a) const - { + KOKKOS_INLINE_FUNCTION bool NDIndex::contains(const NDIndex& a) const { bool cont = true; - for (unsigned int d = 0; (d < Dim) && cont ; ++d) + for (unsigned int d = 0; (d < Dim) && cont; ++d) cont = cont && indices_m[d].contains(a.indices_m[d]); return cont; } - template - KOKKOS_INLINE_FUNCTION - bool NDIndex::split(NDIndex& l, NDIndex& r, unsigned d, int i) const { - if ( &l != this ) + template + KOKKOS_INLINE_FUNCTION bool NDIndex::split(NDIndex& l, NDIndex& r, unsigned d, + int i) const { + if (&l != this) l = *this; - if ( &r != this ) + if (&r != this) r = *this; - return indices_m[d].split(l[d],r[d],i); + return indices_m[d].split(l[d], r[d], i); } - template - KOKKOS_INLINE_FUNCTION - bool NDIndex::split(NDIndex& l, NDIndex& r, - unsigned d, double a) const - { - if ( &l != this ) + template + KOKKOS_INLINE_FUNCTION bool NDIndex::split(NDIndex& l, NDIndex& r, unsigned d, + double a) const { + if (&l != this) l = *this; - if ( &r != this ) + if (&r != this) r = *this; - return indices_m[d].split(l[d],r[d],a); + return indices_m[d].split(l[d], r[d], a); } - - template - KOKKOS_INLINE_FUNCTION - bool NDIndex::split(NDIndex& l, NDIndex& r, unsigned d) const - { - if ( &l != this ) + template + KOKKOS_INLINE_FUNCTION bool NDIndex::split(NDIndex& l, NDIndex& r, + unsigned d) const { + if (&l != this) l = *this; - if ( &r != this ) + if (&r != this) r = *this; - return indices_m[d].split(l[d],r[d]); + return indices_m[d].split(l[d], r[d]); } - - template - KOKKOS_INLINE_FUNCTION - bool NDIndex::split(NDIndex& l, NDIndex& r) const - { - unsigned int max_dim = 0; + template + KOKKOS_INLINE_FUNCTION bool NDIndex::split(NDIndex& l, NDIndex& r) const { + unsigned int max_dim = 0; unsigned int max_length = 0; - for (unsigned int d=0; d max_length ) { - max_dim = d; - max_length = indices_m[d].length(); + for (unsigned int d = 0; d < Dim; ++d) + if (indices_m[d].length() > max_length) { + max_dim = d; + max_length = indices_m[d].length(); } - return split(l,r,max_dim); + return split(l, r, max_dim); } -} +} // namespace ippl diff --git a/src/Ippl.cpp b/src/Ippl.cpp index 420af917a..90cf1b425 100644 --- a/src/Ippl.cpp +++ b/src/Ippl.cpp @@ -16,18 +16,18 @@ // along with IPPL. If not, see . // #include "Ippl.h" -#include "Utility/IpplInfo.h" -#include #include +#include #include +#include "Utility/IpplInfo.h" #include // public static members of IpplInfo, initialized to default values -std::unique_ptr Ippl::Comm = 0; -std::unique_ptr Ippl::Info = 0; -std::unique_ptr Ippl::Warn = 0; -std::unique_ptr Ippl::Error = 0; +std::unique_ptr Ippl::Comm = 0; +std::unique_ptr Ippl::Info = 0; +std::unique_ptr Ippl::Warn = 0; +std::unique_ptr Ippl::Error = 0; void Ippl::deleteGlobals() { Info.reset(); @@ -45,22 +45,18 @@ std::ostream& operator<<(std::ostream& o, const Ippl&) { return o; } - -Ippl::Ippl(int& argc, char**& argv, MPI_Comm mpicomm) -{ - Info = std::make_unique("Ippl"); - Warn = std::make_unique("Warning", std::cerr); +Ippl::Ippl(int& argc, char**& argv, MPI_Comm mpicomm) { + Info = std::make_unique("Ippl"); + Warn = std::make_unique("Warning", std::cerr); Error = std::make_unique("Error", std::cerr, INFORM_ALL_NODES); Comm = std::make_unique(argc, argv, mpicomm); - try { std::list notparsed; int infoLevel = 0; - int nargs = 0; + int nargs = 0; while (nargs < argc) { - if (checkOption(argv[nargs], "--help", "-h")) { if (Comm->myNode() == 0) { IpplInfo::printHelp(argv); @@ -78,10 +74,10 @@ Ippl::Ippl(int& argc, char**& argv, MPI_Comm mpicomm) std::string header("Compile-time options: "); while (options.length() > 58) { std::string line = options.substr(0, 58); - size_t n = line.find_last_of(' '); + size_t n = line.find_last_of(' '); INFOMSG(header << line.substr(0, n) << "\n"); - header = std::string(22, ' '); + header = std::string(22, ' '); options = options.substr(n + 1); } INFOMSG(header << options << endl); @@ -102,7 +98,7 @@ Ippl::Ippl(int& argc, char**& argv, MPI_Comm mpicomm) } } - } catch(const std::exception& e) { + } catch (const std::exception& e) { if (Comm->myNode() == 0) { std::cerr << e.what() << std::endl; } @@ -112,7 +108,6 @@ Ippl::Ippl(int& argc, char**& argv, MPI_Comm mpicomm) Kokkos::initialize(argc, argv); } - bool Ippl::checkOption(const char* arg, const char* lstr, const char* sstr) { return (std::strcmp(arg, lstr) == 0) || (std::strcmp(arg, sstr) == 0); } @@ -128,7 +123,6 @@ int Ippl::getIntOption(const char* arg) { return std::atoi(arg); } - ///////////////////////////////////////////////////////////////////// // Destructor: need to delete comm library if this is the last IpplInfo Ippl::~Ippl() { @@ -136,7 +130,7 @@ Ippl::~Ippl() { Kokkos::finalize(); } -void Ippl::abort(const char *msg) { +void Ippl::abort(const char* msg) { // print out message, if one was provided if (msg != 0) { ERRORMSG(msg << endl); @@ -146,7 +140,6 @@ void Ippl::abort(const char *msg) { throw std::runtime_error("Error form IpplInfo::abort"); } - void Ippl::fence() { Kokkos::fence(); } diff --git a/src/Ippl.h b/src/Ippl.h index a208cc3fd..44ea054c6 100644 --- a/src/Ippl.h +++ b/src/Ippl.h @@ -21,18 +21,20 @@ #include #include "Communicate/Communicate.h" -#include "Utility/Inform.h" #include "Types/IpplTypes.h" +#include "Utility/Inform.h" class Ippl; std::ostream& operator<<(std::ostream&, const Ippl&); class Ippl { - public: // an enumeration used to indicate whether to KEEP command-line arguments // or REMOVE them - enum { KEEP = 0, REMOVE = 1 }; + enum { + KEEP = 0, + REMOVE = 1 + }; // the parallel communication object static std::unique_ptr Comm; @@ -47,21 +49,21 @@ class Ippl { // The second argument controls whether the IPPL-specific command line // arguments are stripped out (the default) or left in (if the setting // is IpplInfo::KEEP). - Ippl(int&, char** &, MPI_Comm mpicomm = MPI_COMM_WORLD); + Ippl(int&, char**&, MPI_Comm mpicomm = MPI_COMM_WORLD); // Constructor 2: default constructor. This will not change anything in // how the static data members are set up. This is useful for declaring // automatic IpplInfo instances in functions after IpplInfo.has been // initially created in the main() routine. - Ippl() {}; + Ippl(){}; // Destructor. ~Ippl(); - static MPI_Comm getComm() {return *Ippl::Comm->getCommunicator();} + static MPI_Comm getComm() { return *Ippl::Comm->getCommunicator(); } // Kill the communication and throw runtime error exception. - static void abort(const char * = 0); + static void abort(const char* = 0); static void fence(); @@ -73,12 +75,14 @@ class Ippl { }; // macros used to print out messages to the console or a directed file -#define INFOMSG(msg) { *Ippl::Info << msg; } -#define WARNMSG(msg) { *Ippl::Warn << msg; } -#define ERRORMSG(msg) { *Ippl::Error << msg; } - - -//FIMXE remove (only for backwards compatibility) +#define INFOMSG(msg) \ + { *Ippl::Info << msg; } +#define WARNMSG(msg) \ + { *Ippl::Warn << msg; } +#define ERRORMSG(msg) \ + { *Ippl::Error << msg; } + +// FIMXE remove (only for backwards compatibility) #include "IpplCore.h" #endif diff --git a/src/IpplCore.h b/src/IpplCore.h index 5866c7282..c00203d71 100644 --- a/src/IpplCore.h +++ b/src/IpplCore.h @@ -18,10 +18,9 @@ #ifndef IPPL_CORE_H #define IPPL_CORE_H - +#include "Field/BConds.h" #include "Field/BareField.h" #include "Field/Field.h" -#include "Field/BConds.h" // IPPL Utilities // #include "Utility/Timer.h" @@ -36,7 +35,7 @@ #include "FieldLayout/FieldLayout.h" #ifdef ENABLE_FFT - #include "FFT/FFT.h" +#include "FFT/FFT.h" #endif // // IPPL Meshes @@ -45,7 +44,6 @@ #include "Particle/ParticleBase.h" #include "Particle/ParticleSpatialLayout.h" - #include "Types/Vector.h" // // IPPL Load balancing diff --git a/src/Meshes/Cartesian.h b/src/Meshes/Cartesian.h index 43fc8cab0..11da5da12 100644 --- a/src/Meshes/Cartesian.h +++ b/src/Meshes/Cartesian.h @@ -2,7 +2,7 @@ /*************************************************************************** * * The IPPL Framework - * + * * * Visit http://people.web.psi.ch/adelmann/ for more details * @@ -15,266 +15,238 @@ // Cartesian class - represents non-uniform-spacing cartesian meshes. // include files -#include "Meshes/Mesh.h" -#include "Meshes/Centering.h" -#include "Meshes/CartesianCentering.h" #include "AppTypes/Vektor.h" +#include "Meshes/CartesianCentering.h" +#include "Meshes/Centering.h" +#include "Meshes/Mesh.h" #include // forward declarations -template class BareField; -template class Field; -template class Cartesian; +template +class BareField; +template +class Field; template -std::ostream& operator<<(std::ostream&, const Cartesian&); - -template -class Cartesian : public Mesh -{ -public: - //# public typedefs - typedef Cell DefaultCentering; - typedef MFLOAT MeshValue_t; - typedef Vektor MeshVektor_t; - - // Default constructor (use initialize() to make valid) - Cartesian() - { - hasSpacingFields = false; - }; - // Destructor - ~Cartesian() - { - if (hasSpacingFields) { - delete VertSpacings; - delete CellSpacings; - delete FlVert; - delete FlCell; - } - }; - - // Non-default constructors - Cartesian(const NDIndex& ndi); - Cartesian(const Index& I); - Cartesian(const Index& I, const Index& J); - Cartesian(const Index& I, const Index& J, const Index& K); - // These also take a MFLOAT** specifying the mesh spacings: - Cartesian(const NDIndex& ndi, MFLOAT** const delX); - Cartesian(const Index& I, MFLOAT** const delX); - Cartesian(const Index& I, const Index& J, MFLOAT** const delX); - Cartesian(const Index& I, const Index& J, const Index& K, - MFLOAT** const delX); - // These further take a Vektor& specifying the origin: - Cartesian(const NDIndex& ndi, MFLOAT** const delX, - const Vektor& orig); - Cartesian(const Index& I, MFLOAT** const delX, - const Vektor& orig); - Cartesian(const Index& I, const Index& J, MFLOAT** const delX, - const Vektor& orig); - Cartesian(const Index& I, const Index& J, const Index& K, - MFLOAT** const delX, const Vektor& orig); - // These further take a MeshBC_E array specifying mesh boundary conditions. - Cartesian(const NDIndex& ndi, MFLOAT** const delX, - const Vektor& orig, MeshBC_E* const mbc); - Cartesian(const Index& I, MFLOAT** const delX, - const Vektor& orig, MeshBC_E* const mbc); - Cartesian(const Index& I, const Index& J, MFLOAT** const delX, - const Vektor& orig, MeshBC_E* const mbc); - Cartesian(const Index& I, const Index& J, const Index& K, - MFLOAT** const delX, const Vektor& orig, - MeshBC_E* const mbc); - - // initialize functions - void initialize(const NDIndex& ndi); - void initialize(const Index& I); - void initialize(const Index& I, const Index& J); - void initialize(const Index& I, const Index& J, const Index& K); - // These also take a MFLOAT** specifying the mesh spacings: - void initialize(const NDIndex& ndi, MFLOAT** const delX); - void initialize(const Index& I, MFLOAT** const delX); - void initialize(const Index& I, const Index& J, MFLOAT** const delX); - void initialize(const Index& I, const Index& J, const Index& K, - MFLOAT** const delX); - // These further take a Vektor& specifying the origin: - void initialize(const NDIndex& ndi, MFLOAT** const delX, - const Vektor& orig); - void initialize(const Index& I, MFLOAT** const delX, - const Vektor& orig); - void initialize(const Index& I, const Index& J, MFLOAT** const delX, - const Vektor& orig); - void initialize(const Index& I, const Index& J, const Index& K, - MFLOAT** const delX, const Vektor& orig); - // These further take a MeshBC_E array specifying mesh boundary conditions. - void initialize(const NDIndex& ndi, MFLOAT** const delX, - const Vektor& orig, MeshBC_E* const mbc); - void initialize(const Index& I, MFLOAT** const delX, - const Vektor& orig, MeshBC_E* const mbc); - void initialize(const Index& I, const Index& J, MFLOAT** const delX, - const Vektor& orig, MeshBC_E* const mbc); - void initialize(const Index& I, const Index& J, const Index& K, - MFLOAT** const delX, const Vektor& orig, - MeshBC_E* const mbc); +class Cartesian; +template +std::ostream& operator<<(std::ostream&, const Cartesian&); + +template +class Cartesian : public Mesh { +public: + // # public typedefs + typedef Cell DefaultCentering; + typedef MFLOAT MeshValue_t; + typedef Vektor MeshVektor_t; + + // Default constructor (use initialize() to make valid) + Cartesian() { hasSpacingFields = false; }; + // Destructor + ~Cartesian() { + if (hasSpacingFields) { + delete VertSpacings; + delete CellSpacings; + delete FlVert; + delete FlCell; + } + }; + + // Non-default constructors + Cartesian(const NDIndex& ndi); + Cartesian(const Index& I); + Cartesian(const Index& I, const Index& J); + Cartesian(const Index& I, const Index& J, const Index& K); + // These also take a MFLOAT** specifying the mesh spacings: + Cartesian(const NDIndex& ndi, MFLOAT** const delX); + Cartesian(const Index& I, MFLOAT** const delX); + Cartesian(const Index& I, const Index& J, MFLOAT** const delX); + Cartesian(const Index& I, const Index& J, const Index& K, MFLOAT** const delX); + // These further take a Vektor& specifying the origin: + Cartesian(const NDIndex& ndi, MFLOAT** const delX, const Vektor& orig); + Cartesian(const Index& I, MFLOAT** const delX, const Vektor& orig); + Cartesian(const Index& I, const Index& J, MFLOAT** const delX, const Vektor& orig); + Cartesian(const Index& I, const Index& J, const Index& K, MFLOAT** const delX, + const Vektor& orig); + // These further take a MeshBC_E array specifying mesh boundary conditions. + Cartesian(const NDIndex& ndi, MFLOAT** const delX, const Vektor& orig, + MeshBC_E* const mbc); + Cartesian(const Index& I, MFLOAT** const delX, const Vektor& orig, + MeshBC_E* const mbc); + Cartesian(const Index& I, const Index& J, MFLOAT** const delX, const Vektor& orig, + MeshBC_E* const mbc); + Cartesian(const Index& I, const Index& J, const Index& K, MFLOAT** const delX, + const Vektor& orig, MeshBC_E* const mbc); + + // initialize functions + void initialize(const NDIndex& ndi); + void initialize(const Index& I); + void initialize(const Index& I, const Index& J); + void initialize(const Index& I, const Index& J, const Index& K); + // These also take a MFLOAT** specifying the mesh spacings: + void initialize(const NDIndex& ndi, MFLOAT** const delX); + void initialize(const Index& I, MFLOAT** const delX); + void initialize(const Index& I, const Index& J, MFLOAT** const delX); + void initialize(const Index& I, const Index& J, const Index& K, MFLOAT** const delX); + // These further take a Vektor& specifying the origin: + void initialize(const NDIndex& ndi, MFLOAT** const delX, const Vektor& orig); + void initialize(const Index& I, MFLOAT** const delX, const Vektor& orig); + void initialize(const Index& I, const Index& J, MFLOAT** const delX, + const Vektor& orig); + void initialize(const Index& I, const Index& J, const Index& K, MFLOAT** const delX, + const Vektor& orig); + // These further take a MeshBC_E array specifying mesh boundary conditions. + void initialize(const NDIndex& ndi, MFLOAT** const delX, const Vektor& orig, + MeshBC_E* const mbc); + void initialize(const Index& I, MFLOAT** const delX, const Vektor& orig, + MeshBC_E* const mbc); + void initialize(const Index& I, const Index& J, MFLOAT** const delX, + const Vektor& orig, MeshBC_E* const mbc); + void initialize(const Index& I, const Index& J, const Index& K, MFLOAT** const delX, + const Vektor& orig, MeshBC_E* const mbc); private: - // Private member data: - // Vert-vert spacings along each axis - including guard cells; use STL map: - std::map meshSpacing[Dim]; - // Vertex positions along each axis - including guard cells; use STL map: - std::map meshPosition[Dim]; - Vektor origin; // Origin of mesh coordinates (vertices) - MeshBC_E MeshBC[2*Dim]; // Mesh boundary conditions - FieldLayout* FlCell; // Layouts for BareField* CellSpacings - FieldLayout* FlVert; // Layouts for BareField* VertSpacings - - // Private member functions: - void updateMeshSpacingGuards(int face);// Update guard layers in meshSpacings - void setup(); // Private function doing tasks common to all constructors. - - // Set only the derivative constants, using pre-set spacings: - void set_Dvc(); - - -public: - - // Public member data: - unsigned gridSizes[Dim]; // Sizes (number of vertices) - Vektor Dvc[1<,Dim>* VertSpacings; - BareField,Dim>* CellSpacings; - - // Public member functions: - - // Create BareField's of vertex and cell spacings; allow for specifying - // layouts via the FieldLayout e_dim_tag and vnodes parameters (these - // get passed in to construct the FieldLayout used to construct the - // BareField's). - void storeSpacingFields(); // Default; will have default layout - // Special cases for 1-3 dimensions, ala FieldLayout ctors: - void storeSpacingFields(e_dim_tag p1, int vnodes=-1); - void storeSpacingFields(e_dim_tag p1, e_dim_tag p2, int vnodes=-1); - void storeSpacingFields(e_dim_tag p1, e_dim_tag p2, e_dim_tag p3, - int vnodes=-1); - // Next we have one for arbitrary dimension, ala FieldLayout ctor: - // All the others call this one internally: - void storeSpacingFields(e_dim_tag *p, int vnodes=-1); - - // These specify both the total number of vnodes and the numbers of vnodes - // along each dimension for the partitioning of the index space. Obviously - // this restricts the number of vnodes to be a product of the numbers along - // each dimension (the constructor implementation checks this): Special - // cases for 1-3 dimensions, ala FieldLayout ctors (see FieldLayout.h for - // more relevant comments, including definition of recurse): - void storeSpacingFields(e_dim_tag p1, - unsigned vnodes1, - bool recurse=false, - int vnodes=-1); - void storeSpacingFields(e_dim_tag p1, e_dim_tag p2, - unsigned vnodes1, unsigned vnodes2, - bool recurse=false,int vnodes=-1); - void storeSpacingFields(e_dim_tag p1, e_dim_tag p2, e_dim_tag p3, - unsigned vnodes1, unsigned vnodes2, unsigned vnodes3, - bool recurse=false, int vnodes=-1); - // Next we have one for arbitrary dimension, ala FieldLayout ctor: - // All the others call this one internally: - void storeSpacingFields(e_dim_tag *p, - unsigned* vnodesPerDirection, - bool recurse=false, int vnodes=-1); - - // Accessor functions for member data: - // Get the origin of mesh vertex positions: - Vektor get_origin() const; - // Get the spacings of mesh vertex positions along specified direction: - void get_meshSpacing(unsigned d, MFLOAT* spacings) const; - //leak MFLOAT* get_meshSpacing(unsigned d) const; - // Get mesh boundary conditions: - MeshBC_E get_MeshBC(unsigned face) const; // One face at a time - MeshBC_E* get_MeshBC() const; // All faces at once - - // Set functions for member data: - // Set the origin of mesh vertex positions: - void set_origin(const Vektor& o); - // Set the spacings of mesh vertex positions and Dvc: - void set_meshSpacing(MFLOAT** const del); - // Set up mesh boundary conditions: - // Face specifies the mesh face, following usual numbering convention. - // MeshBC_E "type" specifies the kind of BC reflective/periodic/none. - void set_MeshBC(unsigned face, MeshBC_E meshBCType); // One face at a time - void set_MeshBC(MeshBC_E* meshBCTypes); // All faces at once - - // Formatted output of Cartesian object: - void print(std::ostream&); - - //----Other Cartesian methods:----------------------------------------------- - // Volume of single cell indexed by input NDIndex; - MFLOAT getCellVolume(const NDIndex&) const; - // Field of volumes of all cells: - Field,Cell>& - getCellVolumeField(Field,Cell>&) const; - // Volume of range of cells bounded by verticies specified by input NDIndex: - MFLOAT getVertRangeVolume(const NDIndex&) const; - // Volume of range of cells spanned by input NDIndex (index of cells): - MFLOAT getCellRangeVolume(const NDIndex&) const; - // Nearest vertex index to (x,y,z): - NDIndex getNearestVertex(const Vektor&) const; - // Nearest vertex index with all vertex coordinates below (x,y,z): - NDIndex getVertexBelow(const Vektor&) const; - // NDIndex for cell in cell-ctrd Field containing the point (x,y,z): - NDIndex getCellContaining(const Vektor& x) const - { - return getVertexBelow(x); // I think these functions are identical. -tjw - } - // (x,y,z) coordinates of indexed vertex: - Vektor getVertexPosition(const NDIndex&) const; - // Field of (x,y,z) coordinates of all vertices: - Field,Dim,Cartesian,Vert>& - getVertexPositionField(Field,Dim, - Cartesian,Vert>& ) const; - // (x,y,z) coordinates of indexed cell: - Vektor getCellPosition(const NDIndex&) const; - // Field of (x,y,z) coordinates of all cells: - Field,Dim,Cartesian,Cell>& - getCellPositionField(Field,Dim, - Cartesian,Cell>& ) const; - // Vertex-vertex grid spacing of indexed cell: - Vektor getDeltaVertex(const NDIndex&) const; - // Field of vertex-vertex grid spacings of all cells: - Field,Dim,Cartesian,Cell>& - getDeltaVertexField(Field,Dim, - Cartesian,Cell>& ) const; - // Cell-cell grid spacing of indexed vertex: - Vektor getDeltaCell(const NDIndex&) const; - // Field of cell-cell grid spacings of all vertices: - Field,Dim,Cartesian,Vert>& - getDeltaCellField(Field,Dim, - Cartesian,Vert>& ) const; - // Array of surface normals to cells adjoining indexed cell: - Vektor* getSurfaceNormals(const NDIndex&) const; - // Array of (pointers to) Fields of surface normals to all cells: - void getSurfaceNormalFields(Field,Dim, - Cartesian,Cell>** ) const; - // Similar functions, but specify the surface normal to a single face, using - // the following numbering convention: 0 means low face of 1st dim, 1 means - // high face of 1st dim, 2 means low face of 2nd dim, 3 means high face of - // 2nd dim, and so on: - Vektor getSurfaceNormal(const NDIndex&, unsigned) const; - Field,Dim,Cartesian,Cell>& - getSurfaceNormalField(Field,Dim, - Cartesian,Cell>&, unsigned) const; - + // Private member data: + // Vert-vert spacings along each axis - including guard cells; use STL map: + std::map meshSpacing[Dim]; + // Vertex positions along each axis - including guard cells; use STL map: + std::map meshPosition[Dim]; + Vektor origin; // Origin of mesh coordinates (vertices) + MeshBC_E MeshBC[2 * Dim]; // Mesh boundary conditions + FieldLayout* FlCell; // Layouts for BareField* CellSpacings + FieldLayout* FlVert; // Layouts for BareField* VertSpacings + + // Private member functions: + void updateMeshSpacingGuards(int face); // Update guard layers in meshSpacings + void setup(); // Private function doing tasks common to all constructors. + + // Set only the derivative constants, using pre-set spacings: + void set_Dvc(); + +public: + // Public member data: + unsigned gridSizes[Dim]; // Sizes (number of vertices) + Vektor Dvc[1 << Dim]; // Constants for derivatives. + bool hasSpacingFields; // Flags allocation of the following: + BareField, Dim>* VertSpacings; + BareField, Dim>* CellSpacings; + + // Public member functions: + + // Create BareField's of vertex and cell spacings; allow for specifying + // layouts via the FieldLayout e_dim_tag and vnodes parameters (these + // get passed in to construct the FieldLayout used to construct the + // BareField's). + void storeSpacingFields(); // Default; will have default layout + // Special cases for 1-3 dimensions, ala FieldLayout ctors: + void storeSpacingFields(e_dim_tag p1, int vnodes = -1); + void storeSpacingFields(e_dim_tag p1, e_dim_tag p2, int vnodes = -1); + void storeSpacingFields(e_dim_tag p1, e_dim_tag p2, e_dim_tag p3, int vnodes = -1); + // Next we have one for arbitrary dimension, ala FieldLayout ctor: + // All the others call this one internally: + void storeSpacingFields(e_dim_tag* p, int vnodes = -1); + + // These specify both the total number of vnodes and the numbers of vnodes + // along each dimension for the partitioning of the index space. Obviously + // this restricts the number of vnodes to be a product of the numbers along + // each dimension (the constructor implementation checks this): Special + // cases for 1-3 dimensions, ala FieldLayout ctors (see FieldLayout.h for + // more relevant comments, including definition of recurse): + void storeSpacingFields(e_dim_tag p1, unsigned vnodes1, bool recurse = false, int vnodes = -1); + void storeSpacingFields(e_dim_tag p1, e_dim_tag p2, unsigned vnodes1, unsigned vnodes2, + bool recurse = false, int vnodes = -1); + void storeSpacingFields(e_dim_tag p1, e_dim_tag p2, e_dim_tag p3, unsigned vnodes1, + unsigned vnodes2, unsigned vnodes3, bool recurse = false, + int vnodes = -1); + // Next we have one for arbitrary dimension, ala FieldLayout ctor: + // All the others call this one internally: + void storeSpacingFields(e_dim_tag* p, unsigned* vnodesPerDirection, bool recurse = false, + int vnodes = -1); + + // Accessor functions for member data: + // Get the origin of mesh vertex positions: + Vektor get_origin() const; + // Get the spacings of mesh vertex positions along specified direction: + void get_meshSpacing(unsigned d, MFLOAT* spacings) const; + // leak MFLOAT* get_meshSpacing(unsigned d) const; + // Get mesh boundary conditions: + MeshBC_E get_MeshBC(unsigned face) const; // One face at a time + MeshBC_E* get_MeshBC() const; // All faces at once + + // Set functions for member data: + // Set the origin of mesh vertex positions: + void set_origin(const Vektor& o); + // Set the spacings of mesh vertex positions and Dvc: + void set_meshSpacing(MFLOAT** const del); + // Set up mesh boundary conditions: + // Face specifies the mesh face, following usual numbering convention. + // MeshBC_E "type" specifies the kind of BC reflective/periodic/none. + void set_MeshBC(unsigned face, MeshBC_E meshBCType); // One face at a time + void set_MeshBC(MeshBC_E* meshBCTypes); // All faces at once + + // Formatted output of Cartesian object: + void print(std::ostream&); + + //----Other Cartesian methods:----------------------------------------------- + // Volume of single cell indexed by input NDIndex; + MFLOAT getCellVolume(const NDIndex&) const; + // Field of volumes of all cells: + Field, Cell>& getCellVolumeField( + Field, Cell>&) const; + // Volume of range of cells bounded by verticies specified by input NDIndex: + MFLOAT getVertRangeVolume(const NDIndex&) const; + // Volume of range of cells spanned by input NDIndex (index of cells): + MFLOAT getCellRangeVolume(const NDIndex&) const; + // Nearest vertex index to (x,y,z): + NDIndex getNearestVertex(const Vektor&) const; + // Nearest vertex index with all vertex coordinates below (x,y,z): + NDIndex getVertexBelow(const Vektor&) const; + // NDIndex for cell in cell-ctrd Field containing the point (x,y,z): + NDIndex getCellContaining(const Vektor& x) const { + return getVertexBelow(x); // I think these functions are identical. -tjw + } + // (x,y,z) coordinates of indexed vertex: + Vektor getVertexPosition(const NDIndex&) const; + // Field of (x,y,z) coordinates of all vertices: + Field, Dim, Cartesian, Vert>& getVertexPositionField( + Field, Dim, Cartesian, Vert>&) const; + // (x,y,z) coordinates of indexed cell: + Vektor getCellPosition(const NDIndex&) const; + // Field of (x,y,z) coordinates of all cells: + Field, Dim, Cartesian, Cell>& getCellPositionField( + Field, Dim, Cartesian, Cell>&) const; + // Vertex-vertex grid spacing of indexed cell: + Vektor getDeltaVertex(const NDIndex&) const; + // Field of vertex-vertex grid spacings of all cells: + Field, Dim, Cartesian, Cell>& getDeltaVertexField( + Field, Dim, Cartesian, Cell>&) const; + // Cell-cell grid spacing of indexed vertex: + Vektor getDeltaCell(const NDIndex&) const; + // Field of cell-cell grid spacings of all vertices: + Field, Dim, Cartesian, Vert>& getDeltaCellField( + Field, Dim, Cartesian, Vert>&) const; + // Array of surface normals to cells adjoining indexed cell: + Vektor* getSurfaceNormals(const NDIndex&) const; + // Array of (pointers to) Fields of surface normals to all cells: + void getSurfaceNormalFields( + Field, Dim, Cartesian, Cell>**) const; + // Similar functions, but specify the surface normal to a single face, using + // the following numbering convention: 0 means low face of 1st dim, 1 means + // high face of 1st dim, 2 means low face of 2nd dim, 3 means high face of + // 2nd dim, and so on: + Vektor getSurfaceNormal(const NDIndex&, unsigned) const; + Field, Dim, Cartesian, Cell>& getSurfaceNormalField( + Field, Dim, Cartesian, Cell>&, unsigned) const; }; // I/O // Stream formatted output of Cartesian object: -template< unsigned Dim, class MFLOAT > -inline -std::ostream& operator<<(std::ostream& out, const Cartesian& mesh) -{ - Cartesian& ncmesh = const_cast&>(mesh); - ncmesh.print(out); - return out; +template +inline std::ostream& operator<<(std::ostream& out, const Cartesian& mesh) { + Cartesian& ncmesh = const_cast&>(mesh); + ncmesh.print(out); + return out; } //***************************************************************************** @@ -290,10 +262,10 @@ std::ostream& operator<<(std::ostream& out, const Cartesian& mesh) // // Definitions of stencils. // -// For each one we have first we have the user level function that takes -// a Field argument and returns an expression template. +// For each one we have first we have the user level function that takes +// a Field argument and returns an expression template. // This is the thing the user code sees. -// These could use some asserts to make sure the Fields have +// These could use some asserts to make sure the Fields have // enough guard cells. // // Then we have the 'apply' function that gets used in the inner loop @@ -321,335 +293,334 @@ std::ostream& operator<<(std::ostream& out, const Cartesian& mesh) //---------------------------------------------------------------------- // Divergence Vektor/Vert -> Scalar/Cell //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,Cell>& -Div(Field,1U,Cartesian<1U,MFLOAT>,Vert>& x, - Field,Cell>& r); +template +Field, Cell>& Div( + Field, 1U, Cartesian<1U, MFLOAT>, Vert>& x, + Field, Cell>& r); //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,Cell>& -Div(Field,2U,Cartesian<2U,MFLOAT>,Vert>& x, - Field,Cell>& r); +template +Field, Cell>& Div( + Field, 2U, Cartesian<2U, MFLOAT>, Vert>& x, + Field, Cell>& r); //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,Cell>& -Div(Field,3U,Cartesian<3U,MFLOAT>,Vert>& x, - Field,Cell>& r); +template +Field, Cell>& Div( + Field, 3U, Cartesian<3U, MFLOAT>, Vert>& x, + Field, Cell>& r); //---------------------------------------------------------------------- // Divergence Vektor/Cell -> Scalar/Vert //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,Vert>& -Div(Field,1U,Cartesian<1U,MFLOAT>,Cell>& x, - Field,Vert>& r); +template +Field, Vert>& Div( + Field, 1U, Cartesian<1U, MFLOAT>, Cell>& x, + Field, Vert>& r); //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,Vert>& -Div(Field,2U,Cartesian<2U,MFLOAT>,Cell>& x, - Field,Vert>& r); +template +Field, Vert>& Div( + Field, 2U, Cartesian<2U, MFLOAT>, Cell>& x, + Field, Vert>& r); //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,Vert>& -Div(Field,3U,Cartesian<3U,MFLOAT>,Cell>& x, - Field,Vert>& r); +template +Field, Vert>& Div( + Field, 3U, Cartesian<3U, MFLOAT>, Cell>& x, + Field, Vert>& r); //---------------------------------------------------------------------- // Divergence Vektor/Vert -> Scalar/Vert //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,Vert>& -Div(Field,1U,Cartesian<1U,MFLOAT>,Vert>& x, - Field,Vert>& r); +template +Field, Vert>& Div( + Field, 1U, Cartesian<1U, MFLOAT>, Vert>& x, + Field, Vert>& r); //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,Vert>& -Div(Field,2U,Cartesian<2U,MFLOAT>,Vert>& x, - Field,Vert>& r); +template +Field, Vert>& Div( + Field, 2U, Cartesian<2U, MFLOAT>, Vert>& x, + Field, Vert>& r); //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,Vert>& -Div(Field,3U,Cartesian<3U,MFLOAT>,Vert>& x, - Field,Vert>& r); +template +Field, Vert>& Div( + Field, 3U, Cartesian<3U, MFLOAT>, Vert>& x, + Field, Vert>& r); //---------------------------------------------------------------------- // Divergence Vektor/Cell -> Scalar/Cell //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,Cell>& -Div(Field,1U,Cartesian<1U,MFLOAT>,Cell>& x, - Field,Cell>& r); +template +Field, Cell>& Div( + Field, 1U, Cartesian<1U, MFLOAT>, Cell>& x, + Field, Cell>& r); //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,Cell>& -Div(Field,2U,Cartesian<2U,MFLOAT>,Cell>& x, - Field,Cell>& r); +template +Field, Cell>& Div( + Field, 2U, Cartesian<2U, MFLOAT>, Cell>& x, + Field, Cell>& r); //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,Cell>& -Div(Field,3U,Cartesian<3U,MFLOAT>,Cell>& x, - Field,Cell>& r); +template +Field, Cell>& Div( + Field, 3U, Cartesian<3U, MFLOAT>, Cell>& x, + Field, Cell>& r); //---------------------------------------------------------------------- // Divergence Tenzor/Vert -> Vektor/Cell //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,1U,Cartesian<1U,MFLOAT>,Cell>& -Div(Field,1U,Cartesian<1U,MFLOAT>,Vert>& x, - Field,1U,Cartesian<1U,MFLOAT>,Cell>& r); +template +Field, 1U, Cartesian<1U, MFLOAT>, Cell>& Div( + Field, 1U, Cartesian<1U, MFLOAT>, Vert>& x, + Field, 1U, Cartesian<1U, MFLOAT>, Cell>& r); //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,2U,Cartesian<2U,MFLOAT>,Cell>& -Div(Field,2U,Cartesian<2U,MFLOAT>,Vert>& x, - Field,2U,Cartesian<2U,MFLOAT>,Cell>& r); +template +Field, 2U, Cartesian<2U, MFLOAT>, Cell>& Div( + Field, 2U, Cartesian<2U, MFLOAT>, Vert>& x, + Field, 2U, Cartesian<2U, MFLOAT>, Cell>& r); //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,3U,Cartesian<3U,MFLOAT>,Cell>& -Div(Field,3U,Cartesian<3U,MFLOAT>,Vert>& x, - Field,3U,Cartesian<3U,MFLOAT>,Cell>& r); +template +Field, 3U, Cartesian<3U, MFLOAT>, Cell>& Div( + Field, 3U, Cartesian<3U, MFLOAT>, Vert>& x, + Field, 3U, Cartesian<3U, MFLOAT>, Cell>& r); //---------------------------------------------------------------------- // Divergence SymTenzor/Vert -> Vektor/Cell //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,1U,Cartesian<1U,MFLOAT>,Cell>& -Div(Field,1U,Cartesian<1U,MFLOAT>,Vert>& x, - Field,1U,Cartesian<1U,MFLOAT>,Cell>& r); +template +Field, 1U, Cartesian<1U, MFLOAT>, Cell>& Div( + Field, 1U, Cartesian<1U, MFLOAT>, Vert>& x, + Field, 1U, Cartesian<1U, MFLOAT>, Cell>& r); //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,2U,Cartesian<2U,MFLOAT>,Cell>& -Div(Field,2U,Cartesian<2U,MFLOAT>,Vert>& x, - Field,2U,Cartesian<2U,MFLOAT>,Cell>& r); +template +Field, 2U, Cartesian<2U, MFLOAT>, Cell>& Div( + Field, 2U, Cartesian<2U, MFLOAT>, Vert>& x, + Field, 2U, Cartesian<2U, MFLOAT>, Cell>& r); //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,3U,Cartesian<3U,MFLOAT>,Cell>& -Div(Field,3U,Cartesian<3U,MFLOAT>,Vert>& x, - Field,3U,Cartesian<3U,MFLOAT>,Cell>& r); +template +Field, 3U, Cartesian<3U, MFLOAT>, Cell>& Div( + Field, 3U, Cartesian<3U, MFLOAT>, Vert>& x, + Field, 3U, Cartesian<3U, MFLOAT>, Cell>& r); //---------------------------------------------------------------------- // Divergence Tenzor/Cell -> Vektor/Vert //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,1U,Cartesian<1U,MFLOAT>,Vert>& -Div(Field,1U,Cartesian<1U,MFLOAT>,Cell>& x, - Field,1U,Cartesian<1U,MFLOAT>,Vert>& r); +template +Field, 1U, Cartesian<1U, MFLOAT>, Vert>& Div( + Field, 1U, Cartesian<1U, MFLOAT>, Cell>& x, + Field, 1U, Cartesian<1U, MFLOAT>, Vert>& r); //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,2U,Cartesian<2U,MFLOAT>,Vert>& -Div(Field,2U,Cartesian<2U,MFLOAT>,Cell>& x, - Field,2U,Cartesian<2U,MFLOAT>,Vert>& r); +template +Field, 2U, Cartesian<2U, MFLOAT>, Vert>& Div( + Field, 2U, Cartesian<2U, MFLOAT>, Cell>& x, + Field, 2U, Cartesian<2U, MFLOAT>, Vert>& r); //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,3U,Cartesian<3U,MFLOAT>,Vert>& -Div(Field,3U,Cartesian<3U,MFLOAT>,Cell>& x, - Field,3U,Cartesian<3U,MFLOAT>,Vert>& r); +template +Field, 3U, Cartesian<3U, MFLOAT>, Vert>& Div( + Field, 3U, Cartesian<3U, MFLOAT>, Cell>& x, + Field, 3U, Cartesian<3U, MFLOAT>, Vert>& r); //---------------------------------------------------------------------- // Divergence SymTenzor/Cell -> Vektor/Vert //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,1U,Cartesian<1U,MFLOAT>,Vert>& -Div(Field,1U,Cartesian<1U,MFLOAT>,Cell>& x, - Field,1U,Cartesian<1U,MFLOAT>,Vert>& r); +template +Field, 1U, Cartesian<1U, MFLOAT>, Vert>& Div( + Field, 1U, Cartesian<1U, MFLOAT>, Cell>& x, + Field, 1U, Cartesian<1U, MFLOAT>, Vert>& r); //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,2U,Cartesian<2U,MFLOAT>,Vert>& -Div(Field,2U,Cartesian<2U,MFLOAT>,Cell>& x, - Field,2U,Cartesian<2U,MFLOAT>,Vert>& r); +template +Field, 2U, Cartesian<2U, MFLOAT>, Vert>& Div( + Field, 2U, Cartesian<2U, MFLOAT>, Cell>& x, + Field, 2U, Cartesian<2U, MFLOAT>, Vert>& r); //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,3U,Cartesian<3U,MFLOAT>,Vert>& -Div(Field,3U,Cartesian<3U,MFLOAT>,Cell>& x, - Field,3U,Cartesian<3U,MFLOAT>,Vert>& r); - +template +Field, 3U, Cartesian<3U, MFLOAT>, Vert>& Div( + Field, 3U, Cartesian<3U, MFLOAT>, Cell>& x, + Field, 3U, Cartesian<3U, MFLOAT>, Vert>& r); //---------------------------------------------------------------------- // Grad Scalar/Vert -> Vektor/Cell //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,1U,Cartesian<1U,MFLOAT>,Cell>& -Grad(Field,Vert>& x, - Field,1U,Cartesian<1U,MFLOAT>,Cell>& r); +template +Field, 1U, Cartesian<1U, MFLOAT>, Cell>& Grad( + Field, Vert>& x, + Field, 1U, Cartesian<1U, MFLOAT>, Cell>& r); //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,2U,Cartesian<2U,MFLOAT>,Cell>& -Grad(Field,Vert>& x, - Field,2U,Cartesian<2U,MFLOAT>,Cell>& r); +template +Field, 2U, Cartesian<2U, MFLOAT>, Cell>& Grad( + Field, Vert>& x, + Field, 2U, Cartesian<2U, MFLOAT>, Cell>& r); //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,3U,Cartesian<3U,MFLOAT>,Cell>& -Grad(Field,Vert>& x, - Field,3U,Cartesian<3U,MFLOAT>,Cell>& r); +template +Field, 3U, Cartesian<3U, MFLOAT>, Cell>& Grad( + Field, Vert>& x, + Field, 3U, Cartesian<3U, MFLOAT>, Cell>& r); //---------------------------------------------------------------------- // Grad Scalar/Cell -> Vektor/Vert //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,1U,Cartesian<1U,MFLOAT>,Vert>& -Grad(Field,Cell>& x, - Field,1U,Cartesian<1U,MFLOAT>,Vert>& r); +template +Field, 1U, Cartesian<1U, MFLOAT>, Vert>& Grad( + Field, Cell>& x, + Field, 1U, Cartesian<1U, MFLOAT>, Vert>& r); //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,2U,Cartesian<2U,MFLOAT>,Vert>& -Grad(Field,Cell>& x, - Field,2U,Cartesian<2U,MFLOAT>,Vert>& r); +template +Field, 2U, Cartesian<2U, MFLOAT>, Vert>& Grad( + Field, Cell>& x, + Field, 2U, Cartesian<2U, MFLOAT>, Vert>& r); //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,3U,Cartesian<3U,MFLOAT>,Vert>& -Grad(Field,Cell>& x, - Field,3U,Cartesian<3U,MFLOAT>,Vert>& r); +template +Field, 3U, Cartesian<3U, MFLOAT>, Vert>& Grad( + Field, Cell>& x, + Field, 3U, Cartesian<3U, MFLOAT>, Vert>& r); //---------------------------------------------------------------------- // Grad Scalar/Vert -> Vektor/Vert //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,1U,Cartesian<1U,MFLOAT>,Vert>& -Grad(Field,Vert>& x, - Field,1U,Cartesian<1U,MFLOAT>,Vert>& r); +template +Field, 1U, Cartesian<1U, MFLOAT>, Vert>& Grad( + Field, Vert>& x, + Field, 1U, Cartesian<1U, MFLOAT>, Vert>& r); //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,2U,Cartesian<2U,MFLOAT>,Vert>& -Grad(Field,Vert>& x, - Field,2U,Cartesian<2U,MFLOAT>,Vert>& r); +template +Field, 2U, Cartesian<2U, MFLOAT>, Vert>& Grad( + Field, Vert>& x, + Field, 2U, Cartesian<2U, MFLOAT>, Vert>& r); //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,3U,Cartesian<3U,MFLOAT>,Vert>& -Grad(Field,Vert>& x, - Field,3U,Cartesian<3U,MFLOAT>,Vert>& r); +template +Field, 3U, Cartesian<3U, MFLOAT>, Vert>& Grad( + Field, Vert>& x, + Field, 3U, Cartesian<3U, MFLOAT>, Vert>& r); //---------------------------------------------------------------------- // Grad Scalar/Cell -> Vektor/Cell //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,1U,Cartesian<1U,MFLOAT>,Cell>& -Grad(Field,Cell>& x, - Field,1U,Cartesian<1U,MFLOAT>,Cell>& r); +template +Field, 1U, Cartesian<1U, MFLOAT>, Cell>& Grad( + Field, Cell>& x, + Field, 1U, Cartesian<1U, MFLOAT>, Cell>& r); //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,2U,Cartesian<2U,MFLOAT>,Cell>& -Grad(Field,Cell>& x, - Field,2U,Cartesian<2U,MFLOAT>,Cell>& r); +template +Field, 2U, Cartesian<2U, MFLOAT>, Cell>& Grad( + Field, Cell>& x, + Field, 2U, Cartesian<2U, MFLOAT>, Cell>& r); //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,3U,Cartesian<3U,MFLOAT>,Cell>& -Grad(Field,Cell>& x, - Field,3U,Cartesian<3U,MFLOAT>,Cell>& r); +template +Field, 3U, Cartesian<3U, MFLOAT>, Cell>& Grad( + Field, Cell>& x, + Field, 3U, Cartesian<3U, MFLOAT>, Cell>& r); //---------------------------------------------------------------------- // Grad Vektor/Vert -> Tenzor/Cell //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,1U,Cartesian<1U,MFLOAT>,Cell>& -Grad(Field,1U,Cartesian<1U,MFLOAT>,Vert>& x, - Field,1U,Cartesian<1U,MFLOAT>,Cell>& r); +template +Field, 1U, Cartesian<1U, MFLOAT>, Cell>& Grad( + Field, 1U, Cartesian<1U, MFLOAT>, Vert>& x, + Field, 1U, Cartesian<1U, MFLOAT>, Cell>& r); //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,2U,Cartesian<2U,MFLOAT>,Cell>& -Grad(Field,2U,Cartesian<2U,MFLOAT>,Vert>& x, - Field,2U,Cartesian<2U,MFLOAT>,Cell>& r); +template +Field, 2U, Cartesian<2U, MFLOAT>, Cell>& Grad( + Field, 2U, Cartesian<2U, MFLOAT>, Vert>& x, + Field, 2U, Cartesian<2U, MFLOAT>, Cell>& r); //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,3U,Cartesian<3U,MFLOAT>,Cell>& -Grad(Field,3U,Cartesian<3U,MFLOAT>,Vert>& x, - Field,3U,Cartesian<3U,MFLOAT>,Cell>& r); +template +Field, 3U, Cartesian<3U, MFLOAT>, Cell>& Grad( + Field, 3U, Cartesian<3U, MFLOAT>, Vert>& x, + Field, 3U, Cartesian<3U, MFLOAT>, Cell>& r); //---------------------------------------------------------------------- // Grad Vektor/Cell -> Tenzor/Vert //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,1U,Cartesian<1U,MFLOAT>,Vert>& -Grad(Field,1U,Cartesian<1U,MFLOAT>,Cell>& x, - Field,1U,Cartesian<1U,MFLOAT>,Vert>& r); +template +Field, 1U, Cartesian<1U, MFLOAT>, Vert>& Grad( + Field, 1U, Cartesian<1U, MFLOAT>, Cell>& x, + Field, 1U, Cartesian<1U, MFLOAT>, Vert>& r); //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,2U,Cartesian<2U,MFLOAT>,Vert>& -Grad(Field,2U,Cartesian<2U,MFLOAT>,Cell>& x, - Field,2U,Cartesian<2U,MFLOAT>,Vert>& r); +template +Field, 2U, Cartesian<2U, MFLOAT>, Vert>& Grad( + Field, 2U, Cartesian<2U, MFLOAT>, Cell>& x, + Field, 2U, Cartesian<2U, MFLOAT>, Vert>& r); //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,3U,Cartesian<3U,MFLOAT>,Vert>& -Grad(Field,3U,Cartesian<3U,MFLOAT>,Cell>& x, - Field,3U,Cartesian<3U,MFLOAT>,Vert>& r); +template +Field, 3U, Cartesian<3U, MFLOAT>, Vert>& Grad( + Field, 3U, Cartesian<3U, MFLOAT>, Cell>& x, + Field, 3U, Cartesian<3U, MFLOAT>, Vert>& r); namespace IPPL { -//---------------------------------------------------------------------- -// Weighted average Cell to Vert -//---------------------------------------------------------------------- -template < class T1, class T2, class MFLOAT > -Field,Vert>& -Average(Field,Cell>& x, - Field,Cell>& w, - Field,Vert>& r) ; -//---------------------------------------------------------------------- -template < class T1, class T2, class MFLOAT > -Field,Vert>& -Average(Field,Cell>& x, - Field,Cell>& w, - Field,Vert>& r); -//---------------------------------------------------------------------- -template < class T1, class T2, class MFLOAT > -Field,Vert>& -Average(Field,Cell>& x, - Field,Cell>& w, - Field,Vert>& r); - -//---------------------------------------------------------------------- -// Weighted average Vert to Cell -//---------------------------------------------------------------------- -template < class T1, class T2, class MFLOAT > -Field,Cell>& -Average(Field,Vert>& x, - Field,Vert>& w, - Field,Cell>& r) ; -//---------------------------------------------------------------------- -template < class T1, class T2, class MFLOAT > -Field,Cell>& -Average(Field,Vert>& x, - Field,Vert>& w, - Field,Cell>& r); -//---------------------------------------------------------------------- -template < class T1, class T2, class MFLOAT > -Field,Cell>& -Average(Field,Vert>& x, - Field,Vert>& w, - Field,Cell>& r); - -//---------------------------------------------------------------------- - -//---------------------------------------------------------------------- -// Unweighted average Cell to Vert -//---------------------------------------------------------------------- -template < class T1, class MFLOAT > -Field,Vert>& -Average(Field,Cell>& x, - Field,Vert>& r) ; -//---------------------------------------------------------------------- -template < class T1, class MFLOAT > -Field,Vert>& -Average(Field,Cell>& x, - Field,Vert>& r); -//---------------------------------------------------------------------- -template < class T1, class MFLOAT > -Field,Vert>& -Average(Field,Cell>& x, - Field,Vert>& r); - -//---------------------------------------------------------------------- -// Unweighted average Vert to Cell -//---------------------------------------------------------------------- -template < class T1, class MFLOAT > -Field,Cell>& -Average(Field,Vert>& x, - Field,Cell>& r) ; -//---------------------------------------------------------------------- -template < class T1, class MFLOAT > -Field,Cell>& -Average(Field,Vert>& x, - Field,Cell>& r); -//---------------------------------------------------------------------- -template < class T1, class MFLOAT > -Field,Cell>& -Average(Field,Vert>& x, - Field,Cell>& r); - -//---------------------------------------------------------------------- -} + //---------------------------------------------------------------------- + // Weighted average Cell to Vert + //---------------------------------------------------------------------- + template + Field, Vert>& Average( + Field, Cell>& x, + Field, Cell>& w, + Field, Vert>& r); + //---------------------------------------------------------------------- + template + Field, Vert>& Average( + Field, Cell>& x, + Field, Cell>& w, + Field, Vert>& r); + //---------------------------------------------------------------------- + template + Field, Vert>& Average( + Field, Cell>& x, + Field, Cell>& w, + Field, Vert>& r); + + //---------------------------------------------------------------------- + // Weighted average Vert to Cell + //---------------------------------------------------------------------- + template + Field, Cell>& Average( + Field, Vert>& x, + Field, Vert>& w, + Field, Cell>& r); + //---------------------------------------------------------------------- + template + Field, Cell>& Average( + Field, Vert>& x, + Field, Vert>& w, + Field, Cell>& r); + //---------------------------------------------------------------------- + template + Field, Cell>& Average( + Field, Vert>& x, + Field, Vert>& w, + Field, Cell>& r); + + //---------------------------------------------------------------------- + + //---------------------------------------------------------------------- + // Unweighted average Cell to Vert + //---------------------------------------------------------------------- + template + Field, Vert>& Average( + Field, Cell>& x, + Field, Vert>& r); + //---------------------------------------------------------------------- + template + Field, Vert>& Average( + Field, Cell>& x, + Field, Vert>& r); + //---------------------------------------------------------------------- + template + Field, Vert>& Average( + Field, Cell>& x, + Field, Vert>& r); + + //---------------------------------------------------------------------- + // Unweighted average Vert to Cell + //---------------------------------------------------------------------- + template + Field, Cell>& Average( + Field, Vert>& x, + Field, Cell>& r); + //---------------------------------------------------------------------- + template + Field, Cell>& Average( + Field, Vert>& x, + Field, Cell>& r); + //---------------------------------------------------------------------- + template + Field, Cell>& Average( + Field, Vert>& x, + Field, Cell>& r); + + //---------------------------------------------------------------------- +} // namespace IPPL #include "Meshes/Cartesian.hpp" -#endif // CARTESIAN_H +#endif // CARTESIAN_H /*************************************************************************** * $RCSfile: Cartesian.h,v $ $Author: adelmann $ * $Revision: 1.1.1.1 $ $Date: 2003/01/23 07:40:28 $ - * IPPL_VERSION_ID: $Id: Cartesian.h,v 1.1.1.1 2003/01/23 07:40:28 adelmann Exp $ + * IPPL_VERSION_ID: $Id: Cartesian.h,v 1.1.1.1 2003/01/23 07:40:28 adelmann Exp $ ***************************************************************************/ diff --git a/src/Meshes/Cartesian.hpp b/src/Meshes/Cartesian.hpp index 1541d1c45..228f07f3e 100644 --- a/src/Meshes/Cartesian.hpp +++ b/src/Meshes/Cartesian.hpp @@ -27,100 +27,89 @@ // Implementations for Cartesian mesh class (nonuniform spacings) // include files -#include "Utility/PAssert.h" -#include "Utility/IpplException.h" -#include "Utility/IpplInfo.h" +#include "Field/Assign.h" +#include "Field/AssignDefs.h" #include "Field/BareField.h" #include "Field/BrickExpression.h" -#include "Field/LField.h" #include "Field/Field.h" -#include "Field/Assign.h" -#include "Field/AssignDefs.h" +#include "Field/LField.h" +#include "Utility/IpplException.h" +#include "Utility/IpplInfo.h" +#include "Utility/PAssert.h" //----------------------------------------------------------------------------- // Setup chores common to all constructors: //----------------------------------------------------------------------------- template -void -Cartesian:: -setup() -{ - hasSpacingFields = false; +void Cartesian::setup() { + hasSpacingFields = false; } //----------------------------------------------------------------------------- // Constructors from NDIndex object: //----------------------------------------------------------------------------- template -Cartesian:: -Cartesian(const NDIndex& ndi) -{ - unsigned int d,i; - for (d=0; d::Cartesian(const NDIndex& ndi) { + unsigned int d, i; + for (d = 0; d < Dim; d++) + gridSizes[d] = ndi[d].length(); // Number of vertices along this dimension. + setup(); // Setup chores, such as array allocations + for (d = 0; d < Dim; d++) { + MeshBC[2 * d] = Reflective; // Default mesh: reflective boundary conds + MeshBC[2 * d + 1] = Reflective; // Default mesh: reflective boundary conds + origin(d) = ndi[d].first(); // Default origin at ndi[d].first() + // default mesh spacing from stride() + for (i = 0; i < gridSizes[d] - 1; i++) { + (meshSpacing[d])[i] = ndi[d].stride(); + (meshPosition[d])[i] = MFLOAT(i); + } + (meshPosition[d])[gridSizes[d] - 1] = MFLOAT(gridSizes[d] - 1); + } + set_Dvc(); // Set derivative coefficients from spacings. } // Also specify mesh spacings: template -Cartesian:: -Cartesian(const NDIndex& ndi, MFLOAT** const delX) -{ - unsigned int d; - for (d=0; d::Cartesian(const NDIndex& ndi, MFLOAT** const delX) { + unsigned int d; + for (d = 0; d < Dim; d++) + gridSizes[d] = ndi[d].length(); // Number of vertices along this dimension. + setup(); // Setup chores, such as array allocations + for (d = 0; d < Dim; d++) { + MeshBC[2 * d] = Reflective; // Default mesh: reflective boundary conds + MeshBC[2 * d + 1] = Reflective; // Default mesh: reflective boundary conds + origin(d) = ndi[d].first(); // Default origin at ndi[d].first() + } + set_meshSpacing(delX); // Set mesh spacings and compute cell volume + set_Dvc(); // Set derivative coefficients from spacings. } // Also specify mesh spacings and origin: template -Cartesian:: -Cartesian(const NDIndex& ndi, MFLOAT** const delX, - const Vektor& orig) -{ - int d; - for (d=0; d::Cartesian(const NDIndex& ndi, MFLOAT** const delX, + const Vektor& orig) { + int d; + for (d = 0; d < Dim; d++) + gridSizes[d] = ndi[d].length(); // Number of vertices along this dimension. + setup(); // Setup chores, such as array allocations + for (d = 0; d < Dim; d++) { + MeshBC[2 * d] = Reflective; // Default mesh: reflective boundary conds + MeshBC[2 * d + 1] = Reflective; // Default mesh: reflective boundary conds + } + set_origin(orig); // Set origin. + set_meshSpacing(delX); // Set mesh spacings and compute cell volume + set_Dvc(); // Set derivative coefficients from spacings. } // Also specify a MeshBC_E array for mesh boundary conditions: template -Cartesian:: -Cartesian(const NDIndex& ndi, MFLOAT** const delX, - const Vektor& orig, MeshBC_E* const mbc) -{ - int d; - for (d=0; d::Cartesian(const NDIndex& ndi, MFLOAT** const delX, + const Vektor& orig, MeshBC_E* const mbc) { + int d; + for (d = 0; d < Dim; d++) + gridSizes[d] = ndi[d].length(); // Number of vertices along this dimension. + setup(); // Setup chores, such as array allocations + set_origin(orig); // Set origin. + set_MeshBC(mbc); // Set up mesh boundary conditions + set_meshSpacing(delX); // Set mesh spacings and compute cell volume + set_Dvc(); // Set derivative coefficients from spacings. } //----------------------------------------------------------------------------- // Constructors from Index objects: @@ -128,328 +117,292 @@ Cartesian(const NDIndex& ndi, MFLOAT** const delX, //===========1D============ template -Cartesian:: -Cartesian(const Index& I) -{ - PInsist(Dim==1,"Number of Index arguments does not match mesh dimension!!"); - gridSizes[0] = I.length(); // Number of vertices along this dimension. - setup(); // Setup chores, such as array allocations - origin(0) = I.first(); // Default origin at I.first() - unsigned int i; - // Default mesh spacing from stride() - for (i=0; i < gridSizes[0]-1; i++) { - (meshSpacing[0])[i] = I.stride(); - (meshPosition[0])[i] = MFLOAT(i); - } - (meshPosition[0])[gridSizes[0]-1] = MFLOAT(gridSizes[0]-1); - for (unsigned int d=0; d::Cartesian(const Index& I) { + PInsist(Dim == 1, "Number of Index arguments does not match mesh dimension!!"); + gridSizes[0] = I.length(); // Number of vertices along this dimension. + setup(); // Setup chores, such as array allocations + origin(0) = I.first(); // Default origin at I.first() + unsigned int i; + // Default mesh spacing from stride() + for (i = 0; i < gridSizes[0] - 1; i++) { + (meshSpacing[0])[i] = I.stride(); + (meshPosition[0])[i] = MFLOAT(i); + } + (meshPosition[0])[gridSizes[0] - 1] = MFLOAT(gridSizes[0] - 1); + for (unsigned int d = 0; d < Dim; d++) { + MeshBC[2 * d] = Reflective; // Default mesh: reflective boundary conds + MeshBC[2 * d + 1] = Reflective; // Default mesh: reflective boundary conds + } + set_Dvc(); // Set derivative coefficients from spacings. } // Also specify mesh spacings: template -Cartesian:: -Cartesian(const Index& I, MFLOAT** const delX) -{ - PInsist(Dim==1,"Number of Index arguments does not match mesh dimension!!"); - gridSizes[0] = I.length(); // Number of vertices along this dimension. - setup(); // Setup chores, such as array allocations - origin(0) = I.first(); // Default origin at I.first() - for (unsigned int d=0; d::Cartesian(const Index& I, MFLOAT** const delX) { + PInsist(Dim == 1, "Number of Index arguments does not match mesh dimension!!"); + gridSizes[0] = I.length(); // Number of vertices along this dimension. + setup(); // Setup chores, such as array allocations + origin(0) = I.first(); // Default origin at I.first() + for (unsigned int d = 0; d < Dim; d++) { + MeshBC[2 * d] = Reflective; // Default mesh: reflective boundary conds + MeshBC[2 * d + 1] = Reflective; // Default mesh: reflective boundary conds + } + set_meshSpacing(delX); // Set mesh spacings and compute cell volume + set_Dvc(); // Set derivative coefficients from spacings. } // Also specify mesh spacings and origin: template -Cartesian:: -Cartesian(const Index& I, MFLOAT** const delX, - const Vektor& orig) -{ - PInsist(Dim==1,"Number of Index arguments does not match mesh dimension!!"); - setup(); - gridSizes[0] = I.length(); // Number of vertices along this dimension. - for (unsigned int d=0; d::Cartesian(const Index& I, MFLOAT** const delX, + const Vektor& orig) { + PInsist(Dim == 1, "Number of Index arguments does not match mesh dimension!!"); + setup(); + gridSizes[0] = I.length(); // Number of vertices along this dimension. + for (unsigned int d = 0; d < Dim; d++) { + MeshBC[2 * d] = Reflective; // Default mesh: reflective boundary conds + MeshBC[2 * d + 1] = Reflective; // Default mesh: reflective boundary conds + } + set_origin(orig); // Set origin. + set_meshSpacing(delX); // Set mesh spacings and compute cell volume + set_Dvc(); // Set derivative coefficients from spacings. } // Also specify a MeshBC_E array for mesh boundary conditions: template -Cartesian:: -Cartesian(const Index& I, MFLOAT** const delX, - const Vektor& orig, MeshBC_E* const mbc) -{ - PInsist(Dim==1,"Number of Index arguments does not match mesh dimension!!"); - setup(); - gridSizes[0] = I.length(); // Number of vertices along this dimension. - set_origin(orig); // Set origin. - set_MeshBC(mbc); // Set up mesh boundary conditions - set_meshSpacing(delX); // Set mesh spacings and compute cell volume - set_Dvc(); // Set derivative coefficients from spacings. +Cartesian::Cartesian(const Index& I, MFLOAT** const delX, + const Vektor& orig, MeshBC_E* const mbc) { + PInsist(Dim == 1, "Number of Index arguments does not match mesh dimension!!"); + setup(); + gridSizes[0] = I.length(); // Number of vertices along this dimension. + set_origin(orig); // Set origin. + set_MeshBC(mbc); // Set up mesh boundary conditions + set_meshSpacing(delX); // Set mesh spacings and compute cell volume + set_Dvc(); // Set derivative coefficients from spacings. } //===========2D============ template -Cartesian:: -Cartesian(const Index& I, const Index& J) -{ - PInsist(Dim==2,"Number of Index arguments does not match mesh dimension!!"); - gridSizes[0] = I.length(); // Number of vertices along this dimension. - gridSizes[1] = J.length(); // Number of vertices along this dimension. - setup(); // Setup chores, such as array allocations - origin(0) = I.first(); // Default origin at I.first(),J.first() - origin(1) = J.first(); - unsigned int i; - // Default mesh spacing from stride() - for (i=0; i < gridSizes[0]-1; i++) { - (meshSpacing[0])[i] = I.stride(); - (meshPosition[0])[i] = MFLOAT(i); - } - (meshPosition[0])[gridSizes[0]-1] = MFLOAT(gridSizes[0]-1); - for (i=0; i < gridSizes[1]-1; i++) { - (meshSpacing[1])[i] = J.stride(); - (meshPosition[1])[i] = MFLOAT(i); - } - (meshPosition[1])[gridSizes[1]-1] = MFLOAT(gridSizes[1]-1); - for (unsigned int d=0; d::Cartesian(const Index& I, const Index& J) { + PInsist(Dim == 2, "Number of Index arguments does not match mesh dimension!!"); + gridSizes[0] = I.length(); // Number of vertices along this dimension. + gridSizes[1] = J.length(); // Number of vertices along this dimension. + setup(); // Setup chores, such as array allocations + origin(0) = I.first(); // Default origin at I.first(),J.first() + origin(1) = J.first(); + unsigned int i; + // Default mesh spacing from stride() + for (i = 0; i < gridSizes[0] - 1; i++) { + (meshSpacing[0])[i] = I.stride(); + (meshPosition[0])[i] = MFLOAT(i); + } + (meshPosition[0])[gridSizes[0] - 1] = MFLOAT(gridSizes[0] - 1); + for (i = 0; i < gridSizes[1] - 1; i++) { + (meshSpacing[1])[i] = J.stride(); + (meshPosition[1])[i] = MFLOAT(i); + } + (meshPosition[1])[gridSizes[1] - 1] = MFLOAT(gridSizes[1] - 1); + for (unsigned int d = 0; d < Dim; d++) { + MeshBC[2 * d] = Reflective; // Default mesh: reflective boundary conds + MeshBC[2 * d + 1] = Reflective; // Default mesh: reflective boundary conds + } + set_Dvc(); // Set derivative coefficients from spacings. } // Also specify mesh spacings: template -Cartesian:: -Cartesian(const Index& I, const Index& J, MFLOAT** const delX) -{ - PInsist(Dim==2,"Number of Index arguments does not match mesh dimension!!"); - gridSizes[0] = I.length(); // Number of vertices along this dimension. - gridSizes[1] = J.length(); // Number of vertices along this dimension. - setup(); // Setup chores, such as array allocations - origin(0) = I.first(); // Default origin at I.first(),J.first() - origin(1) = J.first(); - for (unsigned int d=0; d::Cartesian(const Index& I, const Index& J, MFLOAT** const delX) { + PInsist(Dim == 2, "Number of Index arguments does not match mesh dimension!!"); + gridSizes[0] = I.length(); // Number of vertices along this dimension. + gridSizes[1] = J.length(); // Number of vertices along this dimension. + setup(); // Setup chores, such as array allocations + origin(0) = I.first(); // Default origin at I.first(),J.first() + origin(1) = J.first(); + for (unsigned int d = 0; d < Dim; d++) { + MeshBC[2 * d] = Reflective; // Default mesh: reflective boundary conds + MeshBC[2 * d + 1] = Reflective; // Default mesh: reflective boundary conds + } + set_meshSpacing(delX); // Set mesh spacings and compute cell volume + set_Dvc(); // Set derivative coefficients from spacings. } // Also specify mesh spacings and origin: template -Cartesian:: -Cartesian(const Index& I, const Index& J, MFLOAT** const delX, - const Vektor& orig) -{ - PInsist(Dim==2,"Number of Index arguments does not match mesh dimension!!"); - gridSizes[0] = I.length(); // Number of vertices along this dimension. - gridSizes[1] = J.length(); // Number of vertices along this dimension. - setup(); // Setup chores, such as array allocations - for (unsigned int d=0; d::Cartesian(const Index& I, const Index& J, MFLOAT** const delX, + const Vektor& orig) { + PInsist(Dim == 2, "Number of Index arguments does not match mesh dimension!!"); + gridSizes[0] = I.length(); // Number of vertices along this dimension. + gridSizes[1] = J.length(); // Number of vertices along this dimension. + setup(); // Setup chores, such as array allocations + for (unsigned int d = 0; d < Dim; d++) { + MeshBC[2 * d] = Reflective; // Default mesh: reflective boundary conds + MeshBC[2 * d + 1] = Reflective; // Default mesh: reflective boundary conds + } + set_origin(orig); // Set origin. + set_meshSpacing(delX); // Set mesh spacings and compute cell volume + set_Dvc(); // Set derivative coefficients from spacings. } // Also specify a MeshBC_E array for mesh boundary conditions: template -Cartesian:: -Cartesian(const Index& I, const Index& J, MFLOAT** const delX, - const Vektor& orig, MeshBC_E* const mbc) -{ - PInsist(Dim==2,"Number of Index arguments does not match mesh dimension!!"); - gridSizes[0] = I.length(); // Number of vertices along this dimension. - gridSizes[1] = J.length(); // Number of vertices along this dimension. - setup(); // Setup chores, such as array allocations - set_origin(orig); // Set origin. - set_MeshBC(mbc); // Set up mesh boundary conditions - set_meshSpacing(delX); // Set mesh spacings and compute cell volume - set_Dvc(); // Set derivative coefficients from spacings. +Cartesian::Cartesian(const Index& I, const Index& J, MFLOAT** const delX, + const Vektor& orig, MeshBC_E* const mbc) { + PInsist(Dim == 2, "Number of Index arguments does not match mesh dimension!!"); + gridSizes[0] = I.length(); // Number of vertices along this dimension. + gridSizes[1] = J.length(); // Number of vertices along this dimension. + setup(); // Setup chores, such as array allocations + set_origin(orig); // Set origin. + set_MeshBC(mbc); // Set up mesh boundary conditions + set_meshSpacing(delX); // Set mesh spacings and compute cell volume + set_Dvc(); // Set derivative coefficients from spacings. } //===========3D============ template -Cartesian:: -Cartesian(const Index& I, const Index& J, const Index& K) -{ - PInsist(Dim==3,"Number of Index arguments does not match mesh dimension!!"); - gridSizes[0] = I.length(); // Number of vertices along this dimension. - gridSizes[1] = J.length(); // Number of vertices along this dimension. - gridSizes[2] = K.length(); // Number of vertices along this dimension. - // Setup chores, such as array allocations - setup(); - origin(0) = I.first(); // Default origin at I.first(),J.first(),K.first() - origin(1) = J.first(); - origin(2) = K.first(); - unsigned int i; - // Default mesh spacing from stride() - for (i=0; i < gridSizes[0]-1; i++) { - (meshSpacing[0])[i] = I.stride(); - (meshPosition[0])[i] = MFLOAT(i); - } - (meshPosition[0])[gridSizes[0]-1] = MFLOAT(gridSizes[0]-1); - for (i=0; i < gridSizes[1]-1; i++) { - (meshSpacing[1])[i] = J.stride(); - (meshPosition[1])[i] = MFLOAT(i); - } - (meshPosition[1])[gridSizes[1]-1] = MFLOAT(gridSizes[1]-1); - for (i=0; i < gridSizes[2]-1; i++) { - (meshSpacing[2])[i] = K.stride(); - (meshPosition[2])[i] = MFLOAT(i); - } - (meshPosition[2])[gridSizes[2]-1] = MFLOAT(gridSizes[2]-1); - - for (unsigned int d=0; d::Cartesian(const Index& I, const Index& J, const Index& K) { + PInsist(Dim == 3, "Number of Index arguments does not match mesh dimension!!"); + gridSizes[0] = I.length(); // Number of vertices along this dimension. + gridSizes[1] = J.length(); // Number of vertices along this dimension. + gridSizes[2] = K.length(); // Number of vertices along this dimension. + // Setup chores, such as array allocations + setup(); + origin(0) = I.first(); // Default origin at I.first(),J.first(),K.first() + origin(1) = J.first(); + origin(2) = K.first(); + unsigned int i; + // Default mesh spacing from stride() + for (i = 0; i < gridSizes[0] - 1; i++) { + (meshSpacing[0])[i] = I.stride(); + (meshPosition[0])[i] = MFLOAT(i); + } + (meshPosition[0])[gridSizes[0] - 1] = MFLOAT(gridSizes[0] - 1); + for (i = 0; i < gridSizes[1] - 1; i++) { + (meshSpacing[1])[i] = J.stride(); + (meshPosition[1])[i] = MFLOAT(i); + } + (meshPosition[1])[gridSizes[1] - 1] = MFLOAT(gridSizes[1] - 1); + for (i = 0; i < gridSizes[2] - 1; i++) { + (meshSpacing[2])[i] = K.stride(); + (meshPosition[2])[i] = MFLOAT(i); + } + (meshPosition[2])[gridSizes[2] - 1] = MFLOAT(gridSizes[2] - 1); + + for (unsigned int d = 0; d < Dim; d++) { + MeshBC[2 * d] = Reflective; // Default mesh: reflective boundary conds + MeshBC[2 * d + 1] = Reflective; // Default mesh: reflective boundary conds + } + set_Dvc(); // Set derivative coefficients from spacings. } // Also specify mesh spacings: template -Cartesian:: -Cartesian(const Index& I, const Index& J, const Index& K, - MFLOAT** const delX) -{ - PInsist(Dim==3,"Number of Index arguments does not match mesh dimension!!"); - gridSizes[0] = I.length(); // Number of vertices along this dimension. - gridSizes[1] = J.length(); // Number of vertices along this dimension. - gridSizes[2] = K.length(); // Number of vertices along this dimension. - setup(); // Setup chores, such as array allocations - origin(0) = I.first(); // Default origin at I.first(),J.first(),K.first() - origin(1) = J.first(); - origin(2) = K.first(); - for (unsigned int d=0; d::Cartesian(const Index& I, const Index& J, const Index& K, + MFLOAT** const delX) { + PInsist(Dim == 3, "Number of Index arguments does not match mesh dimension!!"); + gridSizes[0] = I.length(); // Number of vertices along this dimension. + gridSizes[1] = J.length(); // Number of vertices along this dimension. + gridSizes[2] = K.length(); // Number of vertices along this dimension. + setup(); // Setup chores, such as array allocations + origin(0) = I.first(); // Default origin at I.first(),J.first(),K.first() + origin(1) = J.first(); + origin(2) = K.first(); + for (unsigned int d = 0; d < Dim; d++) { + MeshBC[2 * d] = Reflective; // Default mesh: reflective boundary conds + MeshBC[2 * d + 1] = Reflective; // Default mesh: reflective boundary conds + } + set_meshSpacing(delX); // Set mesh spacings and compute cell volume + set_Dvc(); // Set derivative coefficients from spacings. } // Also specify mesh spacings and origin: template -Cartesian:: -Cartesian(const Index& I, const Index& J, const Index& K, - MFLOAT** const delX, const Vektor& orig) -{ - PInsist(Dim==3,"Number of Index arguments does not match mesh dimension!!"); - gridSizes[0] = I.length(); // Number of vertices along this dimension. - gridSizes[1] = J.length(); // Number of vertices along this dimension. - gridSizes[2] = K.length(); // Number of vertices along this dimension. - setup(); // Setup chores, such as array allocations - for (unsigned int d=0; d::Cartesian(const Index& I, const Index& J, const Index& K, + MFLOAT** const delX, const Vektor& orig) { + PInsist(Dim == 3, "Number of Index arguments does not match mesh dimension!!"); + gridSizes[0] = I.length(); // Number of vertices along this dimension. + gridSizes[1] = J.length(); // Number of vertices along this dimension. + gridSizes[2] = K.length(); // Number of vertices along this dimension. + setup(); // Setup chores, such as array allocations + for (unsigned int d = 0; d < Dim; d++) { + MeshBC[2 * d] = Reflective; // Default mesh: reflective boundary conds + MeshBC[2 * d + 1] = Reflective; // Default mesh: reflective boundary conds + } + set_origin(orig); // Set origin. + set_meshSpacing(delX); // Set mesh spacings and compute cell volume + set_Dvc(); // Set derivative coefficients from spacings. } // Also specify a MeshBC_E array for mesh boundary conditions: template -Cartesian:: -Cartesian(const Index& I, const Index& J, const Index& K, - MFLOAT** const delX, const Vektor& orig, - MeshBC_E* const mbc) -{ - PInsist(Dim==3,"Number of Index arguments does not match mesh dimension!!"); - gridSizes[0] = I.length(); // Number of vertices along this dimension. - gridSizes[1] = J.length(); // Number of vertices along this dimension. - gridSizes[2] = K.length(); // Number of vertices along this dimension. - setup(); // Setup chores, such as array allocations - set_origin(orig); // Set origin. - set_MeshBC(mbc); // Set up mesh boundary conditions - set_meshSpacing(delX); // Set mesh spacings and compute cell volume - set_Dvc(); // Set derivative coefficients from spacings. +Cartesian::Cartesian(const Index& I, const Index& J, const Index& K, + MFLOAT** const delX, const Vektor& orig, + MeshBC_E* const mbc) { + PInsist(Dim == 3, "Number of Index arguments does not match mesh dimension!!"); + gridSizes[0] = I.length(); // Number of vertices along this dimension. + gridSizes[1] = J.length(); // Number of vertices along this dimension. + gridSizes[2] = K.length(); // Number of vertices along this dimension. + setup(); // Setup chores, such as array allocations + set_origin(orig); // Set origin. + set_MeshBC(mbc); // Set up mesh boundary conditions + set_meshSpacing(delX); // Set mesh spacings and compute cell volume + set_Dvc(); // Set derivative coefficients from spacings. } //----------------------------------------------------------------------------- // initialize using NDIndex object: //----------------------------------------------------------------------------- template -void -Cartesian:: -initialize(const NDIndex& ndi) -{ - unsigned int d,i; - for (d=0; d::initialize(const NDIndex& ndi) { + unsigned int d, i; + for (d = 0; d < Dim; d++) + gridSizes[d] = ndi[d].length(); // Number of vertices along this dimension. + setup(); // Setup chores, such as array allocations + for (d = 0; d < Dim; d++) { + MeshBC[2 * d] = Reflective; // Default mesh: reflective boundary conds + MeshBC[2 * d + 1] = Reflective; // Default mesh: reflective boundary conds + origin(d) = ndi[d].first(); // Default origin at ndi[d].first() + // default mesh spacing from stride() + for (i = 0; i < gridSizes[d] - 1; i++) { + (meshSpacing[d])[i] = ndi[d].stride(); + (meshPosition[d])[i] = MFLOAT(i); + } + (meshPosition[d])[gridSizes[d] - 1] = MFLOAT(gridSizes[d] - 1); + } + set_Dvc(); // Set derivative coefficients from spacings. } // Also specify mesh spacings: template -void -Cartesian:: -initialize(const NDIndex& ndi, MFLOAT** const delX) -{ - unsigned int d; - for (d=0; d::initialize(const NDIndex& ndi, MFLOAT** const delX) { + unsigned int d; + for (d = 0; d < Dim; d++) + gridSizes[d] = ndi[d].length(); // Number of vertices along this dimension. + setup(); // Setup chores, such as array allocations + for (d = 0; d < Dim; d++) { + MeshBC[2 * d] = Reflective; // Default mesh: reflective boundary conds + MeshBC[2 * d + 1] = Reflective; // Default mesh: reflective boundary conds + origin(d) = ndi[d].first(); // Default origin at ndi[d].first() + } + set_meshSpacing(delX); // Set mesh spacings and compute cell volume + set_Dvc(); // Set derivative coefficients from spacings. } // Also specify mesh spacings and origin: template -void -Cartesian:: -initialize(const NDIndex& ndi, MFLOAT** const delX, - const Vektor& orig) -{ - int d; - for (d=0; d::initialize(const NDIndex& ndi, MFLOAT** const delX, + const Vektor& orig) { + int d; + for (d = 0; d < Dim; d++) + gridSizes[d] = ndi[d].length(); // Number of vertices along this dimension. + setup(); // Setup chores, such as array allocations + for (d = 0; d < Dim; d++) { + MeshBC[2 * d] = Reflective; // Default mesh: reflective boundary conds + MeshBC[2 * d + 1] = Reflective; // Default mesh: reflective boundary conds + } + set_origin(orig); // Set origin. + set_meshSpacing(delX); // Set mesh spacings and compute cell volume + set_Dvc(); // Set derivative coefficients from spacings. } // Also specify a MeshBC_E array for mesh boundary conditions: template -void -Cartesian:: -initialize(const NDIndex& ndi, MFLOAT** const delX, - const Vektor& orig, MeshBC_E* const mbc) -{ - int d; - for (d=0; d::initialize(const NDIndex& ndi, MFLOAT** const delX, + const Vektor& orig, MeshBC_E* const mbc) { + int d; + for (d = 0; d < Dim; d++) + gridSizes[d] = ndi[d].length(); // Number of vertices along this dimension. + setup(); // Setup chores, such as array allocations + set_origin(orig); // Set origin. + set_MeshBC(mbc); // Set up mesh boundary conditions + set_meshSpacing(delX); // Set mesh spacings and compute cell volume + set_Dvc(); // Set derivative coefficients from spacings. } //----------------------------------------------------------------------------- // initialize using Index objects: @@ -457,350 +410,307 @@ initialize(const NDIndex& ndi, MFLOAT** const delX, //===========1D============ template -void -Cartesian:: -initialize(const Index& I) -{ - PInsist(Dim==1,"Number of Index arguments does not match mesh dimension!!"); - gridSizes[0] = I.length(); // Number of vertices along this dimension. - setup(); // Setup chores, such as array allocations - origin(0) = I.first(); // Default origin at I.first() - unsigned int i; - // Default mesh spacing from stride() - for (i=0; i < gridSizes[0]-1; i++) { - (meshSpacing[0])[i] = I.stride(); - (meshPosition[0])[i] = MFLOAT(i); - } - (meshPosition[0])[gridSizes[0]-1] = MFLOAT(gridSizes[0]-1); - for (unsigned int d=0; d::initialize(const Index& I) { + PInsist(Dim == 1, "Number of Index arguments does not match mesh dimension!!"); + gridSizes[0] = I.length(); // Number of vertices along this dimension. + setup(); // Setup chores, such as array allocations + origin(0) = I.first(); // Default origin at I.first() + unsigned int i; + // Default mesh spacing from stride() + for (i = 0; i < gridSizes[0] - 1; i++) { + (meshSpacing[0])[i] = I.stride(); + (meshPosition[0])[i] = MFLOAT(i); + } + (meshPosition[0])[gridSizes[0] - 1] = MFLOAT(gridSizes[0] - 1); + for (unsigned int d = 0; d < Dim; d++) { + MeshBC[2 * d] = Reflective; // Default mesh: reflective boundary conds + MeshBC[2 * d + 1] = Reflective; // Default mesh: reflective boundary conds + } + set_Dvc(); // Set derivative coefficients from spacings. } // Also specify mesh spacings: template -void -Cartesian:: -initialize(const Index& I, MFLOAT** const delX) -{ - PInsist(Dim==1,"Number of Index arguments does not match mesh dimension!!"); - gridSizes[0] = I.length(); // Number of vertices along this dimension. - setup(); // Setup chores, such as array allocations - origin(0) = I.first(); // Default origin at I.first() - for (unsigned int d=0; d::initialize(const Index& I, MFLOAT** const delX) { + PInsist(Dim == 1, "Number of Index arguments does not match mesh dimension!!"); + gridSizes[0] = I.length(); // Number of vertices along this dimension. + setup(); // Setup chores, such as array allocations + origin(0) = I.first(); // Default origin at I.first() + for (unsigned int d = 0; d < Dim; d++) { + MeshBC[2 * d] = Reflective; // Default mesh: reflective boundary conds + MeshBC[2 * d + 1] = Reflective; // Default mesh: reflective boundary conds + } + set_meshSpacing(delX); // Set mesh spacings and compute cell volume + set_Dvc(); // Set derivative coefficients from spacings. } // Also specify mesh spacings and origin: template -void -Cartesian:: -initialize(const Index& I, MFLOAT** const delX, - const Vektor& orig) -{ - PInsist(Dim==1,"Number of Index arguments does not match mesh dimension!!"); - setup(); - gridSizes[0] = I.length(); // Number of vertices along this dimension. - for (unsigned int d=0; d::initialize(const Index& I, MFLOAT** const delX, + const Vektor& orig) { + PInsist(Dim == 1, "Number of Index arguments does not match mesh dimension!!"); + setup(); + gridSizes[0] = I.length(); // Number of vertices along this dimension. + for (unsigned int d = 0; d < Dim; d++) { + MeshBC[2 * d] = Reflective; // Default mesh: reflective boundary conds + MeshBC[2 * d + 1] = Reflective; // Default mesh: reflective boundary conds + } + set_origin(orig); // Set origin. + set_meshSpacing(delX); // Set mesh spacings and compute cell volume + set_Dvc(); // Set derivative coefficients from spacings. } // Also specify a MeshBC_E array for mesh boundary conditions: template -void -Cartesian:: -initialize(const Index& I, MFLOAT** const delX, - const Vektor& orig, MeshBC_E* const mbc) -{ - PInsist(Dim==1,"Number of Index arguments does not match mesh dimension!!"); - setup(); - gridSizes[0] = I.length(); // Number of vertices along this dimension. - set_origin(orig); // Set origin. - set_MeshBC(mbc); // Set up mesh boundary conditions - set_meshSpacing(delX); // Set mesh spacings and compute cell volume - set_Dvc(); // Set derivative coefficients from spacings. +void Cartesian::initialize(const Index& I, MFLOAT** const delX, + const Vektor& orig, MeshBC_E* const mbc) { + PInsist(Dim == 1, "Number of Index arguments does not match mesh dimension!!"); + setup(); + gridSizes[0] = I.length(); // Number of vertices along this dimension. + set_origin(orig); // Set origin. + set_MeshBC(mbc); // Set up mesh boundary conditions + set_meshSpacing(delX); // Set mesh spacings and compute cell volume + set_Dvc(); // Set derivative coefficients from spacings. } //===========2D============ template -void -Cartesian:: -initialize(const Index& I, const Index& J) -{ - PInsist(Dim==2,"Number of Index arguments does not match mesh dimension!!"); - gridSizes[0] = I.length(); // Number of vertices along this dimension. - gridSizes[1] = J.length(); // Number of vertices along this dimension. - setup(); // Setup chores, such as array allocations - origin(0) = I.first(); // Default origin at I.first(),J.first() - origin(1) = J.first(); - unsigned int i; - // Default mesh spacing from stride() - for (i=0; i < gridSizes[0]-1; i++) { - (meshSpacing[0])[i] = I.stride(); - (meshPosition[0])[i] = MFLOAT(i); - } - (meshPosition[0])[gridSizes[0]-1] = MFLOAT(gridSizes[0]-1); - for (i=0; i < gridSizes[1]-1; i++) { - (meshSpacing[1])[i] = J.stride(); - (meshPosition[1])[i] = MFLOAT(i); - } - (meshPosition[1])[gridSizes[1]-1] = MFLOAT(gridSizes[1]-1); - for (unsigned int d=0; d::initialize(const Index& I, const Index& J) { + PInsist(Dim == 2, "Number of Index arguments does not match mesh dimension!!"); + gridSizes[0] = I.length(); // Number of vertices along this dimension. + gridSizes[1] = J.length(); // Number of vertices along this dimension. + setup(); // Setup chores, such as array allocations + origin(0) = I.first(); // Default origin at I.first(),J.first() + origin(1) = J.first(); + unsigned int i; + // Default mesh spacing from stride() + for (i = 0; i < gridSizes[0] - 1; i++) { + (meshSpacing[0])[i] = I.stride(); + (meshPosition[0])[i] = MFLOAT(i); + } + (meshPosition[0])[gridSizes[0] - 1] = MFLOAT(gridSizes[0] - 1); + for (i = 0; i < gridSizes[1] - 1; i++) { + (meshSpacing[1])[i] = J.stride(); + (meshPosition[1])[i] = MFLOAT(i); + } + (meshPosition[1])[gridSizes[1] - 1] = MFLOAT(gridSizes[1] - 1); + for (unsigned int d = 0; d < Dim; d++) { + MeshBC[2 * d] = Reflective; // Default mesh: reflective boundary conds + MeshBC[2 * d + 1] = Reflective; // Default mesh: reflective boundary conds + } + set_Dvc(); // Set derivative coefficients from spacings. } // Also specify mesh spacings: template -void -Cartesian:: -initialize(const Index& I, const Index& J, MFLOAT** const delX) -{ - PInsist(Dim==2,"Number of Index arguments does not match mesh dimension!!"); - gridSizes[0] = I.length(); // Number of vertices along this dimension. - gridSizes[1] = J.length(); // Number of vertices along this dimension. - setup(); // Setup chores, such as array allocations - origin(0) = I.first(); // Default origin at I.first(),J.first() - origin(1) = J.first(); - for (unsigned int d=0; d::initialize(const Index& I, const Index& J, MFLOAT** const delX) { + PInsist(Dim == 2, "Number of Index arguments does not match mesh dimension!!"); + gridSizes[0] = I.length(); // Number of vertices along this dimension. + gridSizes[1] = J.length(); // Number of vertices along this dimension. + setup(); // Setup chores, such as array allocations + origin(0) = I.first(); // Default origin at I.first(),J.first() + origin(1) = J.first(); + for (unsigned int d = 0; d < Dim; d++) { + MeshBC[2 * d] = Reflective; // Default mesh: reflective boundary conds + MeshBC[2 * d + 1] = Reflective; // Default mesh: reflective boundary conds + } + set_meshSpacing(delX); // Set mesh spacings and compute cell volume + set_Dvc(); // Set derivative coefficients from spacings. } // Also specify mesh spacings and origin: template -void -Cartesian:: -initialize(const Index& I, const Index& J, MFLOAT** const delX, - const Vektor& orig) -{ - PInsist(Dim==2,"Number of Index arguments does not match mesh dimension!!"); - gridSizes[0] = I.length(); // Number of vertices along this dimension. - gridSizes[1] = J.length(); // Number of vertices along this dimension. - setup(); // Setup chores, such as array allocations - for (unsigned int d=0; d::initialize(const Index& I, const Index& J, MFLOAT** const delX, + const Vektor& orig) { + PInsist(Dim == 2, "Number of Index arguments does not match mesh dimension!!"); + gridSizes[0] = I.length(); // Number of vertices along this dimension. + gridSizes[1] = J.length(); // Number of vertices along this dimension. + setup(); // Setup chores, such as array allocations + for (unsigned int d = 0; d < Dim; d++) { + MeshBC[2 * d] = Reflective; // Default mesh: reflective boundary conds + MeshBC[2 * d + 1] = Reflective; // Default mesh: reflective boundary conds + } + set_origin(orig); // Set origin. + set_meshSpacing(delX); // Set mesh spacings and compute cell volume + set_Dvc(); // Set derivative coefficients from spacings. } // Also specify a MeshBC_E array for mesh boundary conditions: template -void -Cartesian:: -initialize(const Index& I, const Index& J, MFLOAT** const delX, - const Vektor& orig, MeshBC_E* const mbc) -{ - PInsist(Dim==2,"Number of Index arguments does not match mesh dimension!!"); - gridSizes[0] = I.length(); // Number of vertices along this dimension. - gridSizes[1] = J.length(); // Number of vertices along this dimension. - setup(); // Setup chores, such as array allocations - set_origin(orig); // Set origin. - set_MeshBC(mbc); // Set up mesh boundary conditions - set_meshSpacing(delX); // Set mesh spacings and compute cell volume - set_Dvc(); // Set derivative coefficients from spacings. +void Cartesian::initialize(const Index& I, const Index& J, MFLOAT** const delX, + const Vektor& orig, MeshBC_E* const mbc) { + PInsist(Dim == 2, "Number of Index arguments does not match mesh dimension!!"); + gridSizes[0] = I.length(); // Number of vertices along this dimension. + gridSizes[1] = J.length(); // Number of vertices along this dimension. + setup(); // Setup chores, such as array allocations + set_origin(orig); // Set origin. + set_MeshBC(mbc); // Set up mesh boundary conditions + set_meshSpacing(delX); // Set mesh spacings and compute cell volume + set_Dvc(); // Set derivative coefficients from spacings. } //===========3D============ template -void -Cartesian:: -initialize(const Index& I, const Index& J, const Index& K) -{ - PInsist(Dim==3,"Number of Index arguments does not match mesh dimension!!"); - gridSizes[0] = I.length(); // Number of vertices along this dimension. - gridSizes[1] = J.length(); // Number of vertices along this dimension. - gridSizes[2] = K.length(); // Number of vertices along this dimension. - // Setup chores, such as array allocations - setup(); - origin(0) = I.first(); // Default origin at I.first(),J.first(),K.first() - origin(1) = J.first(); - origin(2) = K.first(); - int i; - // Default mesh spacing from stride() - for (i=0; i < gridSizes[0]-1; i++) { - (meshSpacing[0])[i] = I.stride(); - (meshPosition[0])[i] = MFLOAT(i); - } - (meshPosition[0])[gridSizes[0]-1] = MFLOAT(gridSizes[0]-1); - for (i=0; i < gridSizes[1]-1; i++) { - (meshSpacing[1])[i] = J.stride(); - (meshPosition[1])[i] = MFLOAT(i); - } - (meshPosition[1])[gridSizes[1]-1] = MFLOAT(gridSizes[1]-1); - for (i=0; i < gridSizes[2]-1; i++) { - (meshSpacing[2])[i] = K.stride(); - (meshPosition[2])[i] = MFLOAT(i); - } - (meshPosition[2])[gridSizes[2]-1] = MFLOAT(gridSizes[2]-1); - - for (unsigned int d=0; d::initialize(const Index& I, const Index& J, const Index& K) { + PInsist(Dim == 3, "Number of Index arguments does not match mesh dimension!!"); + gridSizes[0] = I.length(); // Number of vertices along this dimension. + gridSizes[1] = J.length(); // Number of vertices along this dimension. + gridSizes[2] = K.length(); // Number of vertices along this dimension. + // Setup chores, such as array allocations + setup(); + origin(0) = I.first(); // Default origin at I.first(),J.first(),K.first() + origin(1) = J.first(); + origin(2) = K.first(); + int i; + // Default mesh spacing from stride() + for (i = 0; i < gridSizes[0] - 1; i++) { + (meshSpacing[0])[i] = I.stride(); + (meshPosition[0])[i] = MFLOAT(i); + } + (meshPosition[0])[gridSizes[0] - 1] = MFLOAT(gridSizes[0] - 1); + for (i = 0; i < gridSizes[1] - 1; i++) { + (meshSpacing[1])[i] = J.stride(); + (meshPosition[1])[i] = MFLOAT(i); + } + (meshPosition[1])[gridSizes[1] - 1] = MFLOAT(gridSizes[1] - 1); + for (i = 0; i < gridSizes[2] - 1; i++) { + (meshSpacing[2])[i] = K.stride(); + (meshPosition[2])[i] = MFLOAT(i); + } + (meshPosition[2])[gridSizes[2] - 1] = MFLOAT(gridSizes[2] - 1); + + for (unsigned int d = 0; d < Dim; d++) { + MeshBC[2 * d] = Reflective; // Default mesh: reflective boundary conds + MeshBC[2 * d + 1] = Reflective; // Default mesh: reflective boundary conds + } + set_Dvc(); // Set derivative coefficients from spacings. } // Also specify mesh spacings: template -void -Cartesian:: -initialize(const Index& I, const Index& J, const Index& K, - MFLOAT** const delX) -{ - PInsist(Dim==3,"Number of Index arguments does not match mesh dimension!!"); - gridSizes[0] = I.length(); // Number of vertices along this dimension. - gridSizes[1] = J.length(); // Number of vertices along this dimension. - gridSizes[2] = K.length(); // Number of vertices along this dimension. - setup(); // Setup chores, such as array allocations - origin(0) = I.first(); // Default origin at I.first(),J.first(),K.first() - origin(1) = J.first(); - origin(2) = K.first(); - for (unsigned int d=0; d::initialize(const Index& I, const Index& J, const Index& K, + MFLOAT** const delX) { + PInsist(Dim == 3, "Number of Index arguments does not match mesh dimension!!"); + gridSizes[0] = I.length(); // Number of vertices along this dimension. + gridSizes[1] = J.length(); // Number of vertices along this dimension. + gridSizes[2] = K.length(); // Number of vertices along this dimension. + setup(); // Setup chores, such as array allocations + origin(0) = I.first(); // Default origin at I.first(),J.first(),K.first() + origin(1) = J.first(); + origin(2) = K.first(); + for (unsigned int d = 0; d < Dim; d++) { + MeshBC[2 * d] = Reflective; // Default mesh: reflective boundary conds + MeshBC[2 * d + 1] = Reflective; // Default mesh: reflective boundary conds + } + set_meshSpacing(delX); // Set mesh spacings and compute cell volume + set_Dvc(); // Set derivative coefficients from spacings. } // Also specify mesh spacings and origin: template -void -Cartesian:: -initialize(const Index& I, const Index& J, const Index& K, - MFLOAT** const delX, const Vektor& orig) -{ - PInsist(Dim==3,"Number of Index arguments does not match mesh dimension!!"); - gridSizes[0] = I.length(); // Number of vertices along this dimension. - gridSizes[1] = J.length(); // Number of vertices along this dimension. - gridSizes[2] = K.length(); // Number of vertices along this dimension. - setup(); // Setup chores, such as array allocations - for (unsigned int d=0; d::initialize(const Index& I, const Index& J, const Index& K, + MFLOAT** const delX, const Vektor& orig) { + PInsist(Dim == 3, "Number of Index arguments does not match mesh dimension!!"); + gridSizes[0] = I.length(); // Number of vertices along this dimension. + gridSizes[1] = J.length(); // Number of vertices along this dimension. + gridSizes[2] = K.length(); // Number of vertices along this dimension. + setup(); // Setup chores, such as array allocations + for (unsigned int d = 0; d < Dim; d++) { + MeshBC[2 * d] = Reflective; // Default mesh: reflective boundary conds + MeshBC[2 * d + 1] = Reflective; // Default mesh: reflective boundary conds + } + set_origin(orig); // Set origin. + set_meshSpacing(delX); // Set mesh spacings and compute cell volume + set_Dvc(); // Set derivative coefficients from spacings. } // Also specify a MeshBC_E array for mesh boundary conditions: template -void -Cartesian:: -initialize(const Index& I, const Index& J, const Index& K, - MFLOAT** const delX, const Vektor& orig, - MeshBC_E* const mbc) -{ - PInsist(Dim==3,"Number of Index arguments does not match mesh dimension!!"); - gridSizes[0] = I.length(); // Number of vertices along this dimension. - gridSizes[1] = J.length(); // Number of vertices along this dimension. - gridSizes[2] = K.length(); // Number of vertices along this dimension. - setup(); // Setup chores, such as array allocations - set_origin(orig); // Set origin. - set_MeshBC(mbc); // Set up mesh boundary conditions - set_meshSpacing(delX); // Set mesh spacings and compute cell volume - set_Dvc(); // Set derivative coefficients from spacings. +void Cartesian::initialize(const Index& I, const Index& J, const Index& K, + MFLOAT** const delX, const Vektor& orig, + MeshBC_E* const mbc) { + PInsist(Dim == 3, "Number of Index arguments does not match mesh dimension!!"); + gridSizes[0] = I.length(); // Number of vertices along this dimension. + gridSizes[1] = J.length(); // Number of vertices along this dimension. + gridSizes[2] = K.length(); // Number of vertices along this dimension. + setup(); // Setup chores, such as array allocations + set_origin(orig); // Set origin. + set_MeshBC(mbc); // Set up mesh boundary conditions + set_meshSpacing(delX); // Set mesh spacings and compute cell volume + set_Dvc(); // Set derivative coefficients from spacings. } //----------------------------------------------------------------------------- // Set/accessor functions for member data: //----------------------------------------------------------------------------- // Set the origin of mesh vertex positions: -template -void Cartesian:: -set_origin(const Vektor& o) -{ - origin = o; - for (unsigned d=0; dnotifyOfChange(); +template +void Cartesian::set_origin(const Vektor& o) { + origin = o; + for (unsigned d = 0; d < Dim; ++d) { + (meshPosition[d])[0] = o(d); + for (unsigned vert = 1; vert < gridSizes[d]; ++vert) { + (meshPosition[d])[vert] = (meshPosition[d])[vert - 1] + (meshSpacing[d])[vert - 1]; + } + } + // Apply the current state of the mesh BC to add guards to meshPosition map: + for (unsigned face = 0; face < 2 * Dim; ++face) + updateMeshSpacingGuards(face); + this->notifyOfChange(); } // Get the origin of mesh vertex positions: -template -Vektor Cartesian:: -get_origin() const -{ - return origin; +template +Vektor Cartesian::get_origin() const { + return origin; } // Set the spacings of mesh vertex positions: -template -void Cartesian:: -set_meshSpacing(MFLOAT** const del) -{ - unsigned d, cell, face; - - for (d=0;dnotifyOfChange(); +template +void Cartesian::set_meshSpacing(MFLOAT** const del) { + unsigned d, cell, face; + + for (d = 0; d < Dim; ++d) { + (meshPosition[d])[0] = origin(d); + for (cell = 0; cell < gridSizes[d] - 1; cell++) { + (meshSpacing[d])[cell] = del[d][cell]; + (meshPosition[d])[cell + 1] = (meshPosition[d])[cell] + del[d][cell]; + } + } + // Apply the current state of the mesh BC to add guards to meshSpacings map: + for (face = 0; face < 2 * Dim; ++face) + updateMeshSpacingGuards(face); + // if spacing fields allocated, we must update values + if (hasSpacingFields) + storeSpacingFields(); + this->notifyOfChange(); } // Set only the derivative constants, using pre-set spacings: -template -void Cartesian:: -set_Dvc() -{ - unsigned d; - MFLOAT coef = 1.0; - for (d=1;d +void Cartesian::set_Dvc() { + unsigned d; + MFLOAT coef = 1.0; + for (d = 1; d < Dim; ++d) + coef *= 0.5; + + for (d = 0; d < Dim; ++d) { + MFLOAT dvc = coef; + for (unsigned b = 0; b < (1 << Dim); ++b) { + int s = (b & (1 << d)) ? 1 : -1; + Dvc[b](d) = s * dvc; + } } - } } // Get the spacings of mesh vertex positions along specified direction: -template -void Cartesian:: -get_meshSpacing(unsigned d, MFLOAT* spacings) const -{ - PAssert_LT(d, Dim); - for (unsigned int cell=0; cell < gridSizes[d]-1; cell++) - spacings[cell] = (*(meshSpacing[d].find(cell))).second; - return; -} -//leak template -//leak MFLOAT* Cartesian:: -//leak get_meshSpacing(unsigned d) const -//leak { -//leak PAssert_LT(d, Dim); -//leak MFLOAT* theMeshSpacing = new MFLOAT[gridSizes[d]-1]; -//leak for (int cell=0; cell < gridSizes[d]-1; cell++) -//leak theMeshSpacing[cell] = (*(meshSpacing[d].find(cell))).second; -//leak return theMeshSpacing; -//leak } +template +void Cartesian::get_meshSpacing(unsigned d, MFLOAT* spacings) const { + PAssert_LT(d, Dim); + for (unsigned int cell = 0; cell < gridSizes[d] - 1; cell++) + spacings[cell] = (*(meshSpacing[d].find(cell))).second; + return; +} +// leak template +// leak MFLOAT* Cartesian:: +// leak get_meshSpacing(unsigned d) const +// leak { +// leak PAssert_LT(d, Dim); +// leak MFLOAT* theMeshSpacing = new MFLOAT[gridSizes[d]-1]; +// leak for (int cell=0; cell < gridSizes[d]-1; cell++) +// leak theMeshSpacing[cell] = (*(meshSpacing[d].find(cell))).second; +// leak return theMeshSpacing; +// leak } /////////////////////////////////////////////////////////////////////////////// @@ -808,26 +718,26 @@ get_meshSpacing(unsigned d, MFLOAT* spacings) const // by BrickExpression in storeSpacingFields() // Periodic: -template -struct OpMeshPeriodic -{ -}; -template -inline void PETE_apply(OpMeshPeriodic /*e*/, T& a, T b) { a = b; } +template +struct OpMeshPeriodic {}; +template +inline void PETE_apply(OpMeshPeriodic /*e*/, T& a, T b) { + a = b; +} // Reflective/None: -template -struct OpMeshExtrapolate -{ - OpMeshExtrapolate(T& o, T& s) : Offset(o), Slope(s) {} - T Offset, Slope; +template +struct OpMeshExtrapolate { + OpMeshExtrapolate(T& o, T& s) + : Offset(o) + , Slope(s) {} + T Offset, Slope; }; // template // inline void apply(OpMeshExtrapolate e, T& a, T b) -template -inline void PETE_apply(OpMeshExtrapolate e, T& a, T b) -{ - a = b*e.Slope+e.Offset; +template +inline void PETE_apply(OpMeshExtrapolate e, T& a, T b) { + a = b * e.Slope + e.Offset; } /////////////////////////////////////////////////////////////////////////////// @@ -835,310 +745,288 @@ inline void PETE_apply(OpMeshExtrapolate e, T& a, T b) // Create BareField's of vertex and cell spacings // Special prototypes taking no args or FieldLayout ctor args: // No-arg case: -template -void Cartesian:: -storeSpacingFields() -{ - // Set up default FieldLayout parameters: - e_dim_tag et[Dim]; - for (unsigned int d=0; d +void Cartesian::storeSpacingFields() { + // Set up default FieldLayout parameters: + e_dim_tag et[Dim]; + for (unsigned int d = 0; d < Dim; d++) + et[d] = PARALLEL; + storeSpacingFields(et, -1); } // 1D -template -void Cartesian:: -storeSpacingFields(e_dim_tag p1, int vnodes) -{ - e_dim_tag et[1]; - et[0] = p1; - storeSpacingFields(et, vnodes); +template +void Cartesian::storeSpacingFields(e_dim_tag p1, int vnodes) { + e_dim_tag et[1]; + et[0] = p1; + storeSpacingFields(et, vnodes); } // 2D -template -void Cartesian:: -storeSpacingFields(e_dim_tag p1, e_dim_tag p2, int vnodes) -{ - e_dim_tag et[2]; - et[0] = p1; - et[1] = p2; - storeSpacingFields(et, vnodes); +template +void Cartesian::storeSpacingFields(e_dim_tag p1, e_dim_tag p2, int vnodes) { + e_dim_tag et[2]; + et[0] = p1; + et[1] = p2; + storeSpacingFields(et, vnodes); } // 3D -template -void Cartesian:: -storeSpacingFields(e_dim_tag p1, e_dim_tag p2, e_dim_tag p3, int vnodes) -{ - e_dim_tag et[3]; - et[0] = p1; - et[1] = p2; - et[2] = p3; - storeSpacingFields(et, vnodes); +template +void Cartesian::storeSpacingFields(e_dim_tag p1, e_dim_tag p2, e_dim_tag p3, + int vnodes) { + e_dim_tag et[3]; + et[0] = p1; + et[1] = p2; + et[2] = p3; + storeSpacingFields(et, vnodes); } - // The general storeSpacingfields() function; others invoke this internally: -template -void Cartesian:: -storeSpacingFields(e_dim_tag* et, int vnodes) -{ - unsigned int d; - int currentLocation[Dim]; - NDIndex cells, verts; - for (d=0; d(cells, et, vnodes); - // Note: enough guard cells only for existing Div(), etc. implementations: - VertSpacings = - new BareField,Dim>(*FlCell,GuardCellSizes(1)); - FlVert = new FieldLayout(verts, et, vnodes); - // Note: enough guard cells only for existing Div(), etc. implementations: - CellSpacings = - new BareField,Dim>(*FlVert,GuardCellSizes(1)); - } - // VERTEX-VERTEX SPACINGS: - BareField,Dim>& vertSpacings = *VertSpacings; - Vektor vertexSpacing; - vertSpacings.Uncompress(); // Must do this prior to assign via iterator - typename BareField,Dim>::iterator cfi, - cfi_end = vertSpacings.end(); - for (cfi = vertSpacings.begin(); cfi != cfi_end; ++cfi) { - cfi.GetCurrentLocation(currentLocation); - for (d=0; d,Dim>& cellSpacings = *CellSpacings; - Vektor cellSpacing; - cellSpacings.Uncompress(); // Must do this prior to assign via iterator - typename BareField,Dim>::iterator vfi, - vfi_end = cellSpacings.end(); - for (vfi = cellSpacings.begin(); vfi != vfi_end; ++vfi) { - vfi.GetCurrentLocation(currentLocation); - for (d=0; d v0,v1; v0 = 0.0; v1 = 1.0; // Used for Reflective mesh BC - typedef Vektor T; // Used multipple places in loop below - typename BareField::iterator_if cfill_i; // Iterator used below - typename BareField::iterator_if vfill_i; // Iterator used below - int coffset, voffset; // Pointer offsets used with LField::iterator below - MeshBC_E bct; // Scalar value of mesh BC used for each face in loop - // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - for (unsigned int face=0; face < 2*Dim; face++) { - // NDIndex's spanning elements and guard elements: - NDIndex cSlab = AddGuardCells(verts,cellSpacings.getGuardCellSizes()); - NDIndex vSlab = AddGuardCells(cells,vertSpacings.getGuardCellSizes()); - // Shrink it down to be the guards along the active face: - d = face/2; - // The following bitwise AND logical test returns true if face is odd - // (meaning the "high" or "right" face in the numbering convention) and - // returns false if face is even (meaning the "low" or "left" face in - // the numbering convention): - if ( face & 1 ) { - cSlab[d] = Index(verts[d].max() + 1, - verts[d].max() + cellSpacings.rightGuard(d)); - vSlab[d] = Index(cells[d].max() + 1, - cells[d].max() + vertSpacings.rightGuard(d)); - } else { - cSlab[d] = Index(verts[d].min() - cellSpacings.leftGuard(d), - verts[d].min() - 1); - vSlab[d] = Index(cells[d].min() - vertSpacings.leftGuard(d), - cells[d].min() - 1); - } - // Compute pointer offsets used with LField::iterator below: - switch (MeshBC[face]) { - case Periodic: - bct = Periodic; - if ( face & 1 ) { - coffset = -verts[d].length(); - voffset = -cells[d].length(); - } else { - coffset = verts[d].length(); - voffset = cells[d].length(); - } - break; - case Reflective: - bct = Reflective; - if ( face & 1 ) { - coffset = 2*verts[d].max() + 1; - voffset = 2*cells[d].max() + 1 - 1; - } else { - coffset = 2*verts[d].min() - 1; - voffset = 2*cells[d].min() - 1 + 1; - } - break; - case NoBC: - bct = NoBC; - if ( face & 1 ) { - coffset = 2*verts[d].max() + 1; - voffset = 2*cells[d].max() + 1 - 1; - } else { - coffset = 2*verts[d].min() - 1; - voffset = 2*cells[d].min() - 1 + 1; - } - break; - default: - throw IpplException("Cartesian::storeSpacingFields", "unknown MeshBC type"); - } - - // Loop over all the LField's in the BareField's: - // +++++++++++++++cellSpacings++++++++++++++ - for (cfill_i=cellSpacings.begin_if(); - cfill_i!=cellSpacings.end_if(); ++cfill_i) - { - // Cache some things we will use often below. - // Pointer to the data for the current LField (right????): - LField &fill = *(*cfill_i).second; - // NDIndex spanning all elements in the LField, including the guards: - const NDIndex &fill_alloc = fill.getAllocated(); - // If the previously-created boundary guard-layer NDIndex "cSlab" - // contains any of the elements in this LField (they will be guard - // elements if it does), assign the values into them here by applying - // the boundary condition: - if ( cSlab.touches( fill_alloc ) ) - { - // Find what it touches in this LField. - NDIndex dest = cSlab.intersect( fill_alloc ); - - // For exrapolation boundary conditions, the boundary guard-layer - // elements are typically copied from interior values; the "src" - // NDIndex specifies the interior elements to be copied into the - // "dest" boundary guard-layer elements (possibly after some - // mathematical operations like multipplying by minus 1 later): - NDIndex src = dest; // Create dest equal to src - // Now calculate the interior elements; the coffset variable - // computed above makes this right for "low" or "high" face cases: - src[d] = coffset - src[d]; - - // TJW: Why is there another loop over LField's here?????????? - // Loop over the ones that src touches. - typename BareField::iterator_if from_i; - for (from_i=cellSpacings.begin_if(); - from_i!=cellSpacings.end_if(); ++from_i) - { - // Cache a few things. - LField &from = *(*from_i).second; - const NDIndex &from_owned = from.getOwned(); - const NDIndex &from_alloc = from.getAllocated(); - // If src touches this LField... - if ( src.touches( from_owned ) ) - { - NDIndex from_it = src.intersect( from_alloc ); - NDIndex cfill_it = dest.plugBase( from_it ); - // Build iterators for the copy... - typedef typename LField::iterator LFI; - LFI lhs = fill.begin(cfill_it); - LFI rhs = from.begin(from_it); - // And do the assignment. - if (bct == Periodic) { - BrickExpression > - (lhs,rhs,OpMeshPeriodic()).apply(); - } else { - if (bct == Reflective) { - BrickExpression > - (lhs,rhs,OpMeshExtrapolate(v0,v1)).apply(); - } else { - if (bct == NoBC) { - BrickExpression > - (lhs,rhs,OpMeshExtrapolate(v0,v0)).apply(); +template +void Cartesian::storeSpacingFields(e_dim_tag* et, int vnodes) { + unsigned int d; + int currentLocation[Dim]; + NDIndex cells, verts; + for (d = 0; d < Dim; d++) { + cells[d] = Index(gridSizes[d] - 1); + verts[d] = Index(gridSizes[d]); + } + if (!hasSpacingFields) { + // allocate layouts and spacing fields + FlCell = new FieldLayout(cells, et, vnodes); + // Note: enough guard cells only for existing Div(), etc. implementations: + VertSpacings = new BareField, Dim>(*FlCell, GuardCellSizes(1)); + FlVert = new FieldLayout(verts, et, vnodes); + // Note: enough guard cells only for existing Div(), etc. implementations: + CellSpacings = new BareField, Dim>(*FlVert, GuardCellSizes(1)); + } + // VERTEX-VERTEX SPACINGS: + BareField, Dim>& vertSpacings = *VertSpacings; + Vektor vertexSpacing; + vertSpacings.Uncompress(); // Must do this prior to assign via iterator + typename BareField, Dim>::iterator cfi, cfi_end = vertSpacings.end(); + for (cfi = vertSpacings.begin(); cfi != cfi_end; ++cfi) { + cfi.GetCurrentLocation(currentLocation); + for (d = 0; d < Dim; d++) + vertexSpacing(d) = (*(meshSpacing[d].find(currentLocation[d]))).second; + *cfi = vertexSpacing; + } + // CELL-CELL SPACINGS: + BareField, Dim>& cellSpacings = *CellSpacings; + Vektor cellSpacing; + cellSpacings.Uncompress(); // Must do this prior to assign via iterator + typename BareField, Dim>::iterator vfi, vfi_end = cellSpacings.end(); + for (vfi = cellSpacings.begin(); vfi != vfi_end; ++vfi) { + vfi.GetCurrentLocation(currentLocation); + for (d = 0; d < Dim; d++) + cellSpacing(d) = + 0.5 + * ((meshSpacing[d])[currentLocation[d]] + (meshSpacing[d])[currentLocation[d] - 1]); + *vfi = cellSpacing; + } + //------------------------------------------------- + // Now the hard part, filling in the guard cells: + //------------------------------------------------- + // The easy part of the hard part is filling so that all the internal + // guard layers are right: + cellSpacings.fillGuardCells(); + vertSpacings.fillGuardCells(); + // The hard part of the hard part is filling the external guard layers, + // using the mesh BC to figure out how: + // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + // Temporaries used in loop over faces + Vektor v0, v1; + v0 = 0.0; + v1 = 1.0; // Used for Reflective mesh BC + typedef Vektor T; // Used multipple places in loop below + typename BareField::iterator_if cfill_i; // Iterator used below + typename BareField::iterator_if vfill_i; // Iterator used below + int coffset, voffset; // Pointer offsets used with LField::iterator below + MeshBC_E bct; // Scalar value of mesh BC used for each face in loop + // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + for (unsigned int face = 0; face < 2 * Dim; face++) { + // NDIndex's spanning elements and guard elements: + NDIndex cSlab = AddGuardCells(verts, cellSpacings.getGuardCellSizes()); + NDIndex vSlab = AddGuardCells(cells, vertSpacings.getGuardCellSizes()); + // Shrink it down to be the guards along the active face: + d = face / 2; + // The following bitwise AND logical test returns true if face is odd + // (meaning the "high" or "right" face in the numbering convention) and + // returns false if face is even (meaning the "low" or "left" face in + // the numbering convention): + if (face & 1) { + cSlab[d] = Index(verts[d].max() + 1, verts[d].max() + cellSpacings.rightGuard(d)); + vSlab[d] = Index(cells[d].max() + 1, cells[d].max() + vertSpacings.rightGuard(d)); + } else { + cSlab[d] = Index(verts[d].min() - cellSpacings.leftGuard(d), verts[d].min() - 1); + vSlab[d] = Index(cells[d].min() - vertSpacings.leftGuard(d), cells[d].min() - 1); + } + // Compute pointer offsets used with LField::iterator below: + switch (MeshBC[face]) { + case Periodic: + bct = Periodic; + if (face & 1) { + coffset = -verts[d].length(); + voffset = -cells[d].length(); + } else { + coffset = verts[d].length(); + voffset = cells[d].length(); + } + break; + case Reflective: + bct = Reflective; + if (face & 1) { + coffset = 2 * verts[d].max() + 1; + voffset = 2 * cells[d].max() + 1 - 1; + } else { + coffset = 2 * verts[d].min() - 1; + voffset = 2 * cells[d].min() - 1 + 1; + } + break; + case NoBC: + bct = NoBC; + if (face & 1) { + coffset = 2 * verts[d].max() + 1; + voffset = 2 * cells[d].max() + 1 - 1; + } else { + coffset = 2 * verts[d].min() - 1; + voffset = 2 * cells[d].min() - 1 + 1; + } + break; + default: + throw IpplException("Cartesian::storeSpacingFields", "unknown MeshBC type"); + } + + // Loop over all the LField's in the BareField's: + // +++++++++++++++cellSpacings++++++++++++++ + for (cfill_i = cellSpacings.begin_if(); cfill_i != cellSpacings.end_if(); ++cfill_i) { + // Cache some things we will use often below. + // Pointer to the data for the current LField (right????): + LField& fill = *(*cfill_i).second; + // NDIndex spanning all elements in the LField, including the guards: + const NDIndex& fill_alloc = fill.getAllocated(); + // If the previously-created boundary guard-layer NDIndex "cSlab" + // contains any of the elements in this LField (they will be guard + // elements if it does), assign the values into them here by applying + // the boundary condition: + if (cSlab.touches(fill_alloc)) { + // Find what it touches in this LField. + NDIndex dest = cSlab.intersect(fill_alloc); + + // For exrapolation boundary conditions, the boundary guard-layer + // elements are typically copied from interior values; the "src" + // NDIndex specifies the interior elements to be copied into the + // "dest" boundary guard-layer elements (possibly after some + // mathematical operations like multipplying by minus 1 later): + NDIndex src = dest; // Create dest equal to src + // Now calculate the interior elements; the coffset variable + // computed above makes this right for "low" or "high" face cases: + src[d] = coffset - src[d]; + + // TJW: Why is there another loop over LField's here?????????? + // Loop over the ones that src touches. + typename BareField::iterator_if from_i; + for (from_i = cellSpacings.begin_if(); from_i != cellSpacings.end_if(); ++from_i) { + // Cache a few things. + LField& from = *(*from_i).second; + const NDIndex& from_owned = from.getOwned(); + const NDIndex& from_alloc = from.getAllocated(); + // If src touches this LField... + if (src.touches(from_owned)) { + NDIndex from_it = src.intersect(from_alloc); + NDIndex cfill_it = dest.plugBase(from_it); + // Build iterators for the copy... + typedef typename LField::iterator LFI; + LFI lhs = fill.begin(cfill_it); + LFI rhs = from.begin(from_it); + // And do the assignment. + if (bct == Periodic) { + BrickExpression >(lhs, rhs, + OpMeshPeriodic()) + .apply(); + } else { + if (bct == Reflective) { + BrickExpression >( + lhs, rhs, OpMeshExtrapolate(v0, v1)) + .apply(); + } else { + if (bct == NoBC) { + BrickExpression >( + lhs, rhs, OpMeshExtrapolate(v0, v0)) + .apply(); + } + } } - } } - } - } - } - } - // +++++++++++++++vertSpacings++++++++++++++ - for (vfill_i=vertSpacings.begin_if(); - vfill_i!=vertSpacings.end_if(); ++vfill_i) - { - // Cache some things we will use often below. - // Pointer to the data for the current LField (right????): - LField &fill = *(*vfill_i).second; - // NDIndex spanning all elements in the LField, including the guards: - const NDIndex &fill_alloc = fill.getAllocated(); - // If the previously-created boundary guard-layer NDIndex "cSlab" - // contains any of the elements in this LField (they will be guard - // elements if it does), assign the values into them here by applying - // the boundary condition: - if ( vSlab.touches( fill_alloc ) ) - { - // Find what it touches in this LField. - NDIndex dest = vSlab.intersect( fill_alloc ); - - // For exrapolation boundary conditions, the boundary guard-layer - // elements are typically copied from interior values; the "src" - // NDIndex specifies the interior elements to be copied into the - // "dest" boundary guard-layer elements (possibly after some - // mathematical operations like multipplying by minus 1 later): - NDIndex src = dest; // Create dest equal to src - // Now calculate the interior elements; the voffset variable - // computed above makes this right for "low" or "high" face cases: - src[d] = voffset - src[d]; - - // TJW: Why is there another loop over LField's here?????????? - // Loop over the ones that src touches. - typename BareField::iterator_if from_i; - for (from_i=vertSpacings.begin_if(); - from_i!=vertSpacings.end_if(); ++from_i) - { - // Cache a few things. - LField &from = *(*from_i).second; - const NDIndex &from_owned = from.getOwned(); - const NDIndex &from_alloc = from.getAllocated(); - // If src touches this LField... - if ( src.touches( from_owned ) ) - { - NDIndex from_it = src.intersect( from_alloc ); - NDIndex vfill_it = dest.plugBase( from_it ); - // Build iterators for the copy... - typedef typename LField::iterator LFI; - LFI lhs = fill.begin(vfill_it); - LFI rhs = from.begin(from_it); - // And do the assignment. - if (bct == Periodic) { - BrickExpression > - (lhs,rhs,OpMeshPeriodic()).apply(); - } else { - if (bct == Reflective) { - BrickExpression > - (lhs,rhs,OpMeshExtrapolate(v0,v1)).apply(); - } else { - if (bct == NoBC) { - BrickExpression > - (lhs,rhs,OpMeshExtrapolate(v0,v0)).apply(); + } + } + } + // +++++++++++++++vertSpacings++++++++++++++ + for (vfill_i = vertSpacings.begin_if(); vfill_i != vertSpacings.end_if(); ++vfill_i) { + // Cache some things we will use often below. + // Pointer to the data for the current LField (right????): + LField& fill = *(*vfill_i).second; + // NDIndex spanning all elements in the LField, including the guards: + const NDIndex& fill_alloc = fill.getAllocated(); + // If the previously-created boundary guard-layer NDIndex "cSlab" + // contains any of the elements in this LField (they will be guard + // elements if it does), assign the values into them here by applying + // the boundary condition: + if (vSlab.touches(fill_alloc)) { + // Find what it touches in this LField. + NDIndex dest = vSlab.intersect(fill_alloc); + + // For exrapolation boundary conditions, the boundary guard-layer + // elements are typically copied from interior values; the "src" + // NDIndex specifies the interior elements to be copied into the + // "dest" boundary guard-layer elements (possibly after some + // mathematical operations like multipplying by minus 1 later): + NDIndex src = dest; // Create dest equal to src + // Now calculate the interior elements; the voffset variable + // computed above makes this right for "low" or "high" face cases: + src[d] = voffset - src[d]; + + // TJW: Why is there another loop over LField's here?????????? + // Loop over the ones that src touches. + typename BareField::iterator_if from_i; + for (from_i = vertSpacings.begin_if(); from_i != vertSpacings.end_if(); ++from_i) { + // Cache a few things. + LField& from = *(*from_i).second; + const NDIndex& from_owned = from.getOwned(); + const NDIndex& from_alloc = from.getAllocated(); + // If src touches this LField... + if (src.touches(from_owned)) { + NDIndex from_it = src.intersect(from_alloc); + NDIndex vfill_it = dest.plugBase(from_it); + // Build iterators for the copy... + typedef typename LField::iterator LFI; + LFI lhs = fill.begin(vfill_it); + LFI rhs = from.begin(from_it); + // And do the assignment. + if (bct == Periodic) { + BrickExpression >(lhs, rhs, + OpMeshPeriodic()) + .apply(); + } else { + if (bct == Reflective) { + BrickExpression >( + lhs, rhs, OpMeshExtrapolate(v0, v1)) + .apply(); + } else { + if (bct == NoBC) { + BrickExpression >( + lhs, rhs, OpMeshExtrapolate(v0, v0)) + .apply(); + } + } } - } } - } - } - } - } - - } + } + } + } + } - hasSpacingFields = true; // Flag this as having been done to this object. + hasSpacingFields = true; // Flag this as having been done to this object. } - // These specify both the total number of vnodes and the numbers of vnodes // along each dimension for the partitioning of the index space. Obviously // this restricts the number of vnodes to be a product of the numbers along @@ -1147,349 +1035,323 @@ storeSpacingFields(e_dim_tag* et, int vnodes) // more relevant comments, including definition of recurse): // 1D -template -void Cartesian:: -storeSpacingFields(e_dim_tag p1, - unsigned vnodes1, - bool recurse, - int vnodes) { - e_dim_tag et[1]; - et[0] = p1; - unsigned vnodesPerDirection[Dim]; - vnodesPerDirection[0] = vnodes1; - storeSpacingFields(et, vnodesPerDirection, recurse, vnodes); +template +void Cartesian::storeSpacingFields(e_dim_tag p1, unsigned vnodes1, bool recurse, + int vnodes) { + e_dim_tag et[1]; + et[0] = p1; + unsigned vnodesPerDirection[Dim]; + vnodesPerDirection[0] = vnodes1; + storeSpacingFields(et, vnodesPerDirection, recurse, vnodes); } // 2D -template -void Cartesian:: -storeSpacingFields(e_dim_tag p1, e_dim_tag p2, - unsigned vnodes1, unsigned vnodes2, - bool recurse,int vnodes) { - e_dim_tag et[2]; - et[0] = p1; - et[1] = p2; - unsigned vnodesPerDirection[Dim]; - vnodesPerDirection[0] = vnodes1; - vnodesPerDirection[1] = vnodes2; - storeSpacingFields(et, vnodesPerDirection, recurse, vnodes); +template +void Cartesian::storeSpacingFields(e_dim_tag p1, e_dim_tag p2, unsigned vnodes1, + unsigned vnodes2, bool recurse, int vnodes) { + e_dim_tag et[2]; + et[0] = p1; + et[1] = p2; + unsigned vnodesPerDirection[Dim]; + vnodesPerDirection[0] = vnodes1; + vnodesPerDirection[1] = vnodes2; + storeSpacingFields(et, vnodesPerDirection, recurse, vnodes); } // 3D -template -void Cartesian:: -storeSpacingFields(e_dim_tag p1, e_dim_tag p2, e_dim_tag p3, - unsigned vnodes1, unsigned vnodes2, unsigned vnodes3, - bool recurse, int vnodes) { - e_dim_tag et[3]; - et[0] = p1; - et[1] = p2; - et[2] = p3; - unsigned vnodesPerDirection[Dim]; - vnodesPerDirection[0] = vnodes1; - vnodesPerDirection[1] = vnodes2; - vnodesPerDirection[2] = vnodes3; - storeSpacingFields(et, vnodesPerDirection, recurse, vnodes); +template +void Cartesian::storeSpacingFields(e_dim_tag p1, e_dim_tag p2, e_dim_tag p3, + unsigned vnodes1, unsigned vnodes2, + unsigned vnodes3, bool recurse, int vnodes) { + e_dim_tag et[3]; + et[0] = p1; + et[1] = p2; + et[2] = p3; + unsigned vnodesPerDirection[Dim]; + vnodesPerDirection[0] = vnodes1; + vnodesPerDirection[1] = vnodes2; + vnodesPerDirection[2] = vnodes3; + storeSpacingFields(et, vnodesPerDirection, recurse, vnodes); } // TJW: Note: should clean up here eventually, and put redundant code from // this and the other general storeSpacingFields() implementation into one // function. Need to check this in quickly for Blanca right now --12/8/98 // The general storeSpacingfields() function; others invoke this internally: -template -void Cartesian:: -storeSpacingFields(e_dim_tag *p, - unsigned* vnodesPerDirection, - bool recurse, int vnodes) { - unsigned int d; - int currentLocation[Dim]; - NDIndex cells, verts; - for (d=0; d(cells, p, vnodesPerDirection, recurse, vnodes); - // Note: enough guard cells only for existing Div(), etc. implementations: - VertSpacings = - new BareField,Dim>(*FlCell,GuardCellSizes(1)); - FlVert = - new FieldLayout(verts, p, vnodesPerDirection, recurse, vnodes); - // Note: enough guard cells only for existing Div(), etc. implementations: - CellSpacings = - new BareField,Dim>(*FlVert,GuardCellSizes(1)); - } - // VERTEX-VERTEX SPACINGS: - BareField,Dim>& vertSpacings = *VertSpacings; - Vektor vertexSpacing; - vertSpacings.Uncompress(); // Must do this prior to assign via iterator - typename BareField,Dim>::iterator cfi, - cfi_end = vertSpacings.end(); - for (cfi = vertSpacings.begin(); cfi != cfi_end; ++cfi) { - cfi.GetCurrentLocation(currentLocation); - for (d=0; d,Dim>& cellSpacings = *CellSpacings; - Vektor cellSpacing; - cellSpacings.Uncompress(); // Must do this prior to assign via iterator - typename BareField,Dim>::iterator vfi, - vfi_end = cellSpacings.end(); - for (vfi = cellSpacings.begin(); vfi != vfi_end; ++vfi) { - vfi.GetCurrentLocation(currentLocation); - for (d=0; d v0,v1; v0 = 0.0; v1 = 1.0; // Used for Reflective mesh BC - unsigned int face; - typedef Vektor T; // Used multipple places in loop below - typename BareField::iterator_if cfill_i; // Iterator used below - typename BareField::iterator_if vfill_i; // Iterator used below - int coffset, voffset; // Pointer offsets used with LField::iterator below - MeshBC_E bct; // Scalar value of mesh BC used for each face in loop - // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - for (face=0; face < 2*Dim; face++) { - // NDIndex's spanning elements and guard elements: - NDIndex cSlab = AddGuardCells(verts,cellSpacings.getGuardCellSizes()); - NDIndex vSlab = AddGuardCells(cells,vertSpacings.getGuardCellSizes()); - // Shrink it down to be the guards along the active face: - d = face/2; - // The following bitwise AND logical test returns true if face is odd - // (meaning the "high" or "right" face in the numbering convention) and - // returns false if face is even (meaning the "low" or "left" face in - // the numbering convention): - if ( face & 1 ) { - cSlab[d] = Index(verts[d].max() + 1, - verts[d].max() + cellSpacings.rightGuard(d)); - vSlab[d] = Index(cells[d].max() + 1, - cells[d].max() + vertSpacings.rightGuard(d)); - } else { - cSlab[d] = Index(verts[d].min() - cellSpacings.leftGuard(d), - verts[d].min() - 1); - vSlab[d] = Index(cells[d].min() - vertSpacings.leftGuard(d), - cells[d].min() - 1); - } - // Compute pointer offsets used with LField::iterator below: - switch (MeshBC[face]) { - case Periodic: - bct = Periodic; - if ( face & 1 ) { - coffset = -verts[d].length(); - voffset = -cells[d].length(); - } else { - coffset = verts[d].length(); - voffset = cells[d].length(); - } - break; - case Reflective: - bct = Reflective; - if ( face & 1 ) { - coffset = 2*verts[d].max() + 1; - voffset = 2*cells[d].max() + 1 - 1; - } else { - coffset = 2*verts[d].min() - 1; - voffset = 2*cells[d].min() - 1 + 1; - } - break; - case NoBC: - bct = NoBC; - if ( face & 1 ) { - coffset = 2*verts[d].max() + 1; - voffset = 2*cells[d].max() + 1 - 1; - } else { - coffset = 2*verts[d].min() - 1; - voffset = 2*cells[d].min() - 1 + 1; - } - break; - default: - ERRORMSG("Cartesian::storeSpacingFields(): unknown MeshBC type" << endl); - break; - } - - // Loop over all the LField's in the BareField's: - // +++++++++++++++cellSpacings++++++++++++++ - for (cfill_i=cellSpacings.begin_if(); - cfill_i!=cellSpacings.end_if(); ++cfill_i) - { - // Cache some things we will use often below. - // Pointer to the data for the current LField (right????): - LField &fill = *(*cfill_i).second; - // NDIndex spanning all elements in the LField, including the guards: - const NDIndex &fill_alloc = fill.getAllocated(); - // If the previously-created boundary guard-layer NDIndex "cSlab" - // contains any of the elements in this LField (they will be guard - // elements if it does), assign the values into them here by applying - // the boundary condition: - if ( cSlab.touches( fill_alloc ) ) - { - // Find what it touches in this LField. - NDIndex dest = cSlab.intersect( fill_alloc ); - - // For exrapolation boundary conditions, the boundary guard-layer - // elements are typically copied from interior values; the "src" - // NDIndex specifies the interior elements to be copied into the - // "dest" boundary guard-layer elements (possibly after some - // mathematical operations like multipplying by minus 1 later): - NDIndex src = dest; // Create dest equal to src - // Now calculate the interior elements; the coffset variable - // computed above makes this right for "low" or "high" face cases: - src[d] = coffset - src[d]; - - // TJW: Why is there another loop over LField's here?????????? - // Loop over the ones that src touches. - typename BareField::iterator_if from_i; - for (from_i=cellSpacings.begin_if(); - from_i!=cellSpacings.end_if(); ++from_i) - { - // Cache a few things. - LField &from = *(*from_i).second; - const NDIndex &from_owned = from.getOwned(); - const NDIndex &from_alloc = from.getAllocated(); - // If src touches this LField... - if ( src.touches( from_owned ) ) - { - NDIndex from_it = src.intersect( from_alloc ); - NDIndex cfill_it = dest.plugBase( from_it ); - // Build iterators for the copy... - typedef typename LField::iterator LFI; - LFI lhs = fill.begin(cfill_it); - LFI rhs = from.begin(from_it); - // And do the assignment. - if (bct == Periodic) { - BrickExpression > - (lhs,rhs,OpMeshPeriodic()).apply(); - } else { - if (bct == Reflective) { - BrickExpression > - (lhs,rhs,OpMeshExtrapolate(v0,v1)).apply(); - } else { - if (bct == NoBC) { - BrickExpression > - (lhs,rhs,OpMeshExtrapolate(v0,v0)).apply(); +template +void Cartesian::storeSpacingFields(e_dim_tag* p, unsigned* vnodesPerDirection, + bool recurse, int vnodes) { + unsigned int d; + int currentLocation[Dim]; + NDIndex cells, verts; + for (d = 0; d < Dim; d++) { + cells[d] = Index(gridSizes[d] - 1); + verts[d] = Index(gridSizes[d]); + } + if (!hasSpacingFields) { + // allocate layouts and spacing fields + FlCell = new FieldLayout(cells, p, vnodesPerDirection, recurse, vnodes); + // Note: enough guard cells only for existing Div(), etc. implementations: + VertSpacings = new BareField, Dim>(*FlCell, GuardCellSizes(1)); + FlVert = new FieldLayout(verts, p, vnodesPerDirection, recurse, vnodes); + // Note: enough guard cells only for existing Div(), etc. implementations: + CellSpacings = new BareField, Dim>(*FlVert, GuardCellSizes(1)); + } + // VERTEX-VERTEX SPACINGS: + BareField, Dim>& vertSpacings = *VertSpacings; + Vektor vertexSpacing; + vertSpacings.Uncompress(); // Must do this prior to assign via iterator + typename BareField, Dim>::iterator cfi, cfi_end = vertSpacings.end(); + for (cfi = vertSpacings.begin(); cfi != cfi_end; ++cfi) { + cfi.GetCurrentLocation(currentLocation); + for (d = 0; d < Dim; d++) + vertexSpacing(d) = (*(meshSpacing[d].find(currentLocation[d]))).second; + *cfi = vertexSpacing; + } + // CELL-CELL SPACINGS: + BareField, Dim>& cellSpacings = *CellSpacings; + Vektor cellSpacing; + cellSpacings.Uncompress(); // Must do this prior to assign via iterator + typename BareField, Dim>::iterator vfi, vfi_end = cellSpacings.end(); + for (vfi = cellSpacings.begin(); vfi != vfi_end; ++vfi) { + vfi.GetCurrentLocation(currentLocation); + for (d = 0; d < Dim; d++) + cellSpacing(d) = + 0.5 + * ((meshSpacing[d])[currentLocation[d]] + (meshSpacing[d])[currentLocation[d] - 1]); + *vfi = cellSpacing; + } + //------------------------------------------------- + // Now the hard part, filling in the guard cells: + //------------------------------------------------- + // The easy part of the hard part is filling so that all the internal + // guard layers are right: + cellSpacings.fillGuardCells(); + vertSpacings.fillGuardCells(); + // The hard part of the hard part is filling the external guard layers, + // using the mesh BC to figure out how: + // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + // Temporaries used in loop over faces + Vektor v0, v1; + v0 = 0.0; + v1 = 1.0; // Used for Reflective mesh BC + unsigned int face; + typedef Vektor T; // Used multipple places in loop below + typename BareField::iterator_if cfill_i; // Iterator used below + typename BareField::iterator_if vfill_i; // Iterator used below + int coffset, voffset; // Pointer offsets used with LField::iterator below + MeshBC_E bct; // Scalar value of mesh BC used for each face in loop + // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + for (face = 0; face < 2 * Dim; face++) { + // NDIndex's spanning elements and guard elements: + NDIndex cSlab = AddGuardCells(verts, cellSpacings.getGuardCellSizes()); + NDIndex vSlab = AddGuardCells(cells, vertSpacings.getGuardCellSizes()); + // Shrink it down to be the guards along the active face: + d = face / 2; + // The following bitwise AND logical test returns true if face is odd + // (meaning the "high" or "right" face in the numbering convention) and + // returns false if face is even (meaning the "low" or "left" face in + // the numbering convention): + if (face & 1) { + cSlab[d] = Index(verts[d].max() + 1, verts[d].max() + cellSpacings.rightGuard(d)); + vSlab[d] = Index(cells[d].max() + 1, cells[d].max() + vertSpacings.rightGuard(d)); + } else { + cSlab[d] = Index(verts[d].min() - cellSpacings.leftGuard(d), verts[d].min() - 1); + vSlab[d] = Index(cells[d].min() - vertSpacings.leftGuard(d), cells[d].min() - 1); + } + // Compute pointer offsets used with LField::iterator below: + switch (MeshBC[face]) { + case Periodic: + bct = Periodic; + if (face & 1) { + coffset = -verts[d].length(); + voffset = -cells[d].length(); + } else { + coffset = verts[d].length(); + voffset = cells[d].length(); + } + break; + case Reflective: + bct = Reflective; + if (face & 1) { + coffset = 2 * verts[d].max() + 1; + voffset = 2 * cells[d].max() + 1 - 1; + } else { + coffset = 2 * verts[d].min() - 1; + voffset = 2 * cells[d].min() - 1 + 1; + } + break; + case NoBC: + bct = NoBC; + if (face & 1) { + coffset = 2 * verts[d].max() + 1; + voffset = 2 * cells[d].max() + 1 - 1; + } else { + coffset = 2 * verts[d].min() - 1; + voffset = 2 * cells[d].min() - 1 + 1; + } + break; + default: + ERRORMSG("Cartesian::storeSpacingFields(): unknown MeshBC type" << endl); + break; + } + + // Loop over all the LField's in the BareField's: + // +++++++++++++++cellSpacings++++++++++++++ + for (cfill_i = cellSpacings.begin_if(); cfill_i != cellSpacings.end_if(); ++cfill_i) { + // Cache some things we will use often below. + // Pointer to the data for the current LField (right????): + LField& fill = *(*cfill_i).second; + // NDIndex spanning all elements in the LField, including the guards: + const NDIndex& fill_alloc = fill.getAllocated(); + // If the previously-created boundary guard-layer NDIndex "cSlab" + // contains any of the elements in this LField (they will be guard + // elements if it does), assign the values into them here by applying + // the boundary condition: + if (cSlab.touches(fill_alloc)) { + // Find what it touches in this LField. + NDIndex dest = cSlab.intersect(fill_alloc); + + // For exrapolation boundary conditions, the boundary guard-layer + // elements are typically copied from interior values; the "src" + // NDIndex specifies the interior elements to be copied into the + // "dest" boundary guard-layer elements (possibly after some + // mathematical operations like multipplying by minus 1 later): + NDIndex src = dest; // Create dest equal to src + // Now calculate the interior elements; the coffset variable + // computed above makes this right for "low" or "high" face cases: + src[d] = coffset - src[d]; + + // TJW: Why is there another loop over LField's here?????????? + // Loop over the ones that src touches. + typename BareField::iterator_if from_i; + for (from_i = cellSpacings.begin_if(); from_i != cellSpacings.end_if(); ++from_i) { + // Cache a few things. + LField& from = *(*from_i).second; + const NDIndex& from_owned = from.getOwned(); + const NDIndex& from_alloc = from.getAllocated(); + // If src touches this LField... + if (src.touches(from_owned)) { + NDIndex from_it = src.intersect(from_alloc); + NDIndex cfill_it = dest.plugBase(from_it); + // Build iterators for the copy... + typedef typename LField::iterator LFI; + LFI lhs = fill.begin(cfill_it); + LFI rhs = from.begin(from_it); + // And do the assignment. + if (bct == Periodic) { + BrickExpression >(lhs, rhs, + OpMeshPeriodic()) + .apply(); + } else { + if (bct == Reflective) { + BrickExpression >( + lhs, rhs, OpMeshExtrapolate(v0, v1)) + .apply(); + } else { + if (bct == NoBC) { + BrickExpression >( + lhs, rhs, OpMeshExtrapolate(v0, v0)) + .apply(); + } + } } - } } - } - } - } - } - // +++++++++++++++vertSpacings++++++++++++++ - for (vfill_i=vertSpacings.begin_if(); - vfill_i!=vertSpacings.end_if(); ++vfill_i) - { - // Cache some things we will use often below. - // Pointer to the data for the current LField (right????): - LField &fill = *(*vfill_i).second; - // NDIndex spanning all elements in the LField, including the guards: - const NDIndex &fill_alloc = fill.getAllocated(); - // If the previously-created boundary guard-layer NDIndex "cSlab" - // contains any of the elements in this LField (they will be guard - // elements if it does), assign the values into them here by applying - // the boundary condition: - if ( vSlab.touches( fill_alloc ) ) - { - // Find what it touches in this LField. - NDIndex dest = vSlab.intersect( fill_alloc ); - - // For exrapolation boundary conditions, the boundary guard-layer - // elements are typically copied from interior values; the "src" - // NDIndex specifies the interior elements to be copied into the - // "dest" boundary guard-layer elements (possibly after some - // mathematical operations like multipplying by minus 1 later): - NDIndex src = dest; // Create dest equal to src - // Now calculate the interior elements; the voffset variable - // computed above makes this right for "low" or "high" face cases: - src[d] = voffset - src[d]; - - // TJW: Why is there another loop over LField's here?????????? - // Loop over the ones that src touches. - typename BareField::iterator_if from_i; - for (from_i=vertSpacings.begin_if(); - from_i!=vertSpacings.end_if(); ++from_i) - { - // Cache a few things. - LField &from = *(*from_i).second; - const NDIndex &from_owned = from.getOwned(); - const NDIndex &from_alloc = from.getAllocated(); - // If src touches this LField... - if ( src.touches( from_owned ) ) - { - NDIndex from_it = src.intersect( from_alloc ); - NDIndex vfill_it = dest.plugBase( from_it ); - // Build iterators for the copy... - typedef typename LField::iterator LFI; - LFI lhs = fill.begin(vfill_it); - LFI rhs = from.begin(from_it); - // And do the assignment. - if (bct == Periodic) { - BrickExpression > - (lhs,rhs,OpMeshPeriodic()).apply(); - } else { - if (bct == Reflective) { - BrickExpression > - (lhs,rhs,OpMeshExtrapolate(v0,v1)).apply(); - } else { - if (bct == NoBC) { - BrickExpression > - (lhs,rhs,OpMeshExtrapolate(v0,v0)).apply(); + } + } + } + // +++++++++++++++vertSpacings++++++++++++++ + for (vfill_i = vertSpacings.begin_if(); vfill_i != vertSpacings.end_if(); ++vfill_i) { + // Cache some things we will use often below. + // Pointer to the data for the current LField (right????): + LField& fill = *(*vfill_i).second; + // NDIndex spanning all elements in the LField, including the guards: + const NDIndex& fill_alloc = fill.getAllocated(); + // If the previously-created boundary guard-layer NDIndex "cSlab" + // contains any of the elements in this LField (they will be guard + // elements if it does), assign the values into them here by applying + // the boundary condition: + if (vSlab.touches(fill_alloc)) { + // Find what it touches in this LField. + NDIndex dest = vSlab.intersect(fill_alloc); + + // For exrapolation boundary conditions, the boundary guard-layer + // elements are typically copied from interior values; the "src" + // NDIndex specifies the interior elements to be copied into the + // "dest" boundary guard-layer elements (possibly after some + // mathematical operations like multipplying by minus 1 later): + NDIndex src = dest; // Create dest equal to src + // Now calculate the interior elements; the voffset variable + // computed above makes this right for "low" or "high" face cases: + src[d] = voffset - src[d]; + + // TJW: Why is there another loop over LField's here?????????? + // Loop over the ones that src touches. + typename BareField::iterator_if from_i; + for (from_i = vertSpacings.begin_if(); from_i != vertSpacings.end_if(); ++from_i) { + // Cache a few things. + LField& from = *(*from_i).second; + const NDIndex& from_owned = from.getOwned(); + const NDIndex& from_alloc = from.getAllocated(); + // If src touches this LField... + if (src.touches(from_owned)) { + NDIndex from_it = src.intersect(from_alloc); + NDIndex vfill_it = dest.plugBase(from_it); + // Build iterators for the copy... + typedef typename LField::iterator LFI; + LFI lhs = fill.begin(vfill_it); + LFI rhs = from.begin(from_it); + // And do the assignment. + if (bct == Periodic) { + BrickExpression >(lhs, rhs, + OpMeshPeriodic()) + .apply(); + } else { + if (bct == Reflective) { + BrickExpression >( + lhs, rhs, OpMeshExtrapolate(v0, v1)) + .apply(); + } else { + if (bct == NoBC) { + BrickExpression >( + lhs, rhs, OpMeshExtrapolate(v0, v0)) + .apply(); + } + } } - } } - } - } - } - } - - } + } + } + } + } - hasSpacingFields = true; // Flag this as having been done to this object. + hasSpacingFields = true; // Flag this as having been done to this object. } - //----------------------------------------------------------------------------- // I/O: //----------------------------------------------------------------------------- // Formatted output of Cartesian object: -template< unsigned Dim, class MFLOAT > -void -Cartesian:: -print(std::ostream& out) -{ - unsigned int d; - out << "======Cartesian<" << Dim << ",MFLOAT>==begin======" << std::endl; - for (d=0; d < Dim; d++) - out << "gridSizes[" << d << "] = " << gridSizes[d] << std::endl; - out << "origin = " << origin << std::endl; - for (d=0; d < Dim; d++) { - out << "--------meshSpacing[" << d << "]---------" << std::endl; - typename std::map::iterator mi; - for (mi=meshSpacing[d].begin(); mi != meshSpacing[d].end(); ++mi) { - out << "meshSpacing[" << d << "][" << (*mi).first << "] = " - << (*mi).second << std::endl; - } - } - for (unsigned b=0; b < (1<==end========" << std::endl; +template +void Cartesian::print(std::ostream& out) { + unsigned int d; + out << "======Cartesian<" << Dim << ",MFLOAT>==begin======" << std::endl; + for (d = 0; d < Dim; d++) + out << "gridSizes[" << d << "] = " << gridSizes[d] << std::endl; + out << "origin = " << origin << std::endl; + for (d = 0; d < Dim; d++) { + out << "--------meshSpacing[" << d << "]---------" << std::endl; + typename std::map::iterator mi; + for (mi = meshSpacing[d].begin(); mi != meshSpacing[d].end(); ++mi) { + out << "meshSpacing[" << d << "][" << (*mi).first << "] = " << (*mi).second + << std::endl; + } + } + for (unsigned b = 0; b < (1 << Dim); b++) + out << "Dvc[" << b << "] = " << Dvc[b] << std::endl; + for (d = 0; d < Dim; d++) + out << "MeshBC[" << 2 * d << "] = " << Mesh::MeshBC_E_Names[MeshBC[2 * d]] + << " ; MeshBC[" << 2 * d + 1 << "] = " << Mesh::MeshBC_E_Names[MeshBC[2 * d + 1]] + << std::endl; + out << "======Cartesian<" << Dim << ",MFLOAT>==end========" << std::endl; } //-------------------------------------------------------------------------- @@ -1498,478 +1360,428 @@ print(std::ostream& out) // Volume of cell indexed by NDIndex: template -MFLOAT -Cartesian:: -getCellVolume(const NDIndex& ndi) const -{ - MFLOAT volume = 1.0; - for (unsigned int d=0; d::getCellVolume(const NDIndex& ndi) const { + MFLOAT volume = 1.0; + for (unsigned int d = 0; d < Dim; d++) + if (ndi[d].length() != 1) { + ERRORMSG("Cartesian::getCellVolume() error: arg is not a NDIndex" + << "specifying a single element" << endl); + } else { + volume *= (*(meshSpacing[d].find(ndi[d].first()))).second; + } + return volume; } // Field of volumes of all cells: template -Field,Cell>& -Cartesian:: -getCellVolumeField(Field,Cell>& volumes) const -{ - // N.B.: here, other places taking Field& (in UniformCartesian, too), should - // have check on domain of input Field& to make sure it's big enough to hold - // all the values for this mesh object. - volumes = 1.0; - int currentLocation[Dim]; - volumes.Uncompress(); - // Iterate through all cells: - typename Field,Cell>::iterator fi, - fi_end=volumes.end(); - for (fi = volumes.begin(); fi != fi_end; ++fi) { - fi.GetCurrentLocation(currentLocation); - for (unsigned int d=0; d, Cell>& Cartesian::getCellVolumeField( + Field, Cell>& volumes) const { + // N.B.: here, other places taking Field& (in UniformCartesian, too), should + // have check on domain of input Field& to make sure it's big enough to hold + // all the values for this mesh object. + volumes = 1.0; + int currentLocation[Dim]; + volumes.Uncompress(); + // Iterate through all cells: + typename Field, Cell>::iterator fi, fi_end = volumes.end(); + for (fi = volumes.begin(); fi != fi_end; ++fi) { + fi.GetCurrentLocation(currentLocation); + for (unsigned int d = 0; d < Dim; d++) + *fi *= (*(meshSpacing[d].find(currentLocation[d]))).second; + } + return volumes; } // Volume of range of cells bounded by verticies specified by input NDIndex; template -MFLOAT -Cartesian:: -getVertRangeVolume(const NDIndex& ndi) const -{ - // Get vertex positions of extremal cells: - Vektor v0, v1; - unsigned int d; - int i0, i1; - for (d=0; d 3*(int(gridSizes[d])-1)/2) ) - ERRORMSG("Cartesian::getVertRangeVolume() error: " << ndi - << " is an NDIndex ranging outside the mesh and guard layers;" - << " not allowed." << endl); - v0(d) = (*(meshPosition[d].find(i0))).second; - i1 = ndi[d].last(); - if ( (i1 < -(int(gridSizes[d])-1)/2) || - (i1 > 3*(int(gridSizes[d])-1)/2) ) - ERRORMSG("Cartesian::getVertRangeVolume() error: " << ndi - << " is an NDIndex ranging outside the mesh and guard layers;" - << " not allowed." << endl); - v1(d) = (*(meshPosition[d].find(i1))).second; - } - // Compute volume of rectangular solid beweeen these extremal vertices: - MFLOAT volume = 1.0; - for (d=0; d::getVertRangeVolume(const NDIndex& ndi) const { + // Get vertex positions of extremal cells: + Vektor v0, v1; + unsigned int d; + int i0, i1; + for (d = 0; d < Dim; d++) { + i0 = ndi[d].first(); + if ((i0 < -(int(gridSizes[d]) - 1) / 2) || (i0 > 3 * (int(gridSizes[d]) - 1) / 2)) + ERRORMSG("Cartesian::getVertRangeVolume() error: " + << ndi << " is an NDIndex ranging outside the mesh and guard layers;" + << " not allowed." << endl); + v0(d) = (*(meshPosition[d].find(i0))).second; + i1 = ndi[d].last(); + if ((i1 < -(int(gridSizes[d]) - 1) / 2) || (i1 > 3 * (int(gridSizes[d]) - 1) / 2)) + ERRORMSG("Cartesian::getVertRangeVolume() error: " + << ndi << " is an NDIndex ranging outside the mesh and guard layers;" + << " not allowed." << endl); + v1(d) = (*(meshPosition[d].find(i1))).second; + } + // Compute volume of rectangular solid beweeen these extremal vertices: + MFLOAT volume = 1.0; + for (d = 0; d < Dim; d++) + volume *= std::abs(v1(d) - v0(d)); + return volume; } // Volume of range of cells spanned by input NDIndex (index of cells): template -MFLOAT -Cartesian:: -getCellRangeVolume(const NDIndex& ndi) const -{ - // Get vertex positions bounding extremal cells: - Vektor v0, v1; - int i0, i1; - for (unsigned int d=0; d 3*(int(gridSizes[d])-1)/2) ) - ERRORMSG("Cartesian::getCellRangeVolume() error: " << ndi - << " is an NDIndex ranging outside the mesh and guard layers;" - << " not allowed." << endl); - v0(d) = (*(meshPosition[d].find(i0))).second; - i1 = ndi[d].last()+1; - if ( (i1 < -(int(gridSizes[d])-1)/2) || - (i1 > 3*(int(gridSizes[d])-1)/2) ) - ERRORMSG("Cartesian::getCellRangeVolume() error: " << ndi - << " is an NDIndex ranging outside the mesh and guard layers;" - << " not allowed." << endl); - v1(d) = (*(meshPosition[d].find(i1))).second; - } - // Compute volume of rectangular solid beweeen these extremal vertices: - MFLOAT volume = 1.0; - for (unsigned int d=0; d::getCellRangeVolume(const NDIndex& ndi) const { + // Get vertex positions bounding extremal cells: + Vektor v0, v1; + int i0, i1; + for (unsigned int d = 0; d < Dim; d++) { + i0 = ndi[d].first(); + if ((i0 < -(int(gridSizes[d]) - 1) / 2) || (i0 > 3 * (int(gridSizes[d]) - 1) / 2)) + ERRORMSG("Cartesian::getCellRangeVolume() error: " + << ndi << " is an NDIndex ranging outside the mesh and guard layers;" + << " not allowed." << endl); + v0(d) = (*(meshPosition[d].find(i0))).second; + i1 = ndi[d].last() + 1; + if ((i1 < -(int(gridSizes[d]) - 1) / 2) || (i1 > 3 * (int(gridSizes[d]) - 1) / 2)) + ERRORMSG("Cartesian::getCellRangeVolume() error: " + << ndi << " is an NDIndex ranging outside the mesh and guard layers;" + << " not allowed." << endl); + v1(d) = (*(meshPosition[d].find(i1))).second; + } + // Compute volume of rectangular solid beweeen these extremal vertices: + MFLOAT volume = 1.0; + for (unsigned int d = 0; d < Dim; d++) + volume *= std::abs(v1(d) - v0(d)); + return volume; } // Nearest vertex index to (x,y,z): template -NDIndex -Cartesian:: -getNearestVertex(const Vektor& x) const -{ - unsigned int d; - Vektor boxMin, boxMax; - for (d=0; d boxMax(d)) ) - ERRORMSG("Cartesian::getNearestVertex() - input point is outside" - << " mesh boundary and guard layers; not allowed." << endl); - - // Find coordinate vectors of the vertices just above and just below the - // input point (extremal vertices on cell containing point); - MFLOAT xVertexBelow, xVertexAbove, xVertex; - int vertBelow, vertAbove, vertNearest[Dim]; - for (d=0; d xVertexAbove) { - vertNearest[d] = vertAbove; - continue; - } - while (vertAbove > vertBelow+1) { - vertNearest[d] = (vertAbove+vertBelow)/2; - xVertex = (*(meshPosition[d].find(vertNearest[d]))).second; - if (x(d) > xVertex) { - vertBelow = vertNearest[d]; - xVertexBelow = xVertex; - } - else if (x(d) < xVertex) { - vertAbove = vertNearest[d]; - xVertexAbove = xVertex; - } - else { // found exact match! - vertAbove = vertBelow; - } - } - if (vertAbove != vertBelow) { - if ((x(d)-xVertexBelow)<(xVertexAbove-x(d))) { - vertNearest[d] = vertBelow; - } - else { - vertNearest[d] = vertAbove; - } - } - } - - // Construct the NDIndex for nearest vert get its position vector: - NDIndex ndi; - for (d=0; d Cartesian::getNearestVertex(const Vektor& x) const { + unsigned int d; + Vektor boxMin, boxMax; + for (d = 0; d < Dim; d++) { + int gs = (int(gridSizes[d]) - 1) / 2; + boxMin(d) = (*(meshPosition[d].find(-gs))).second; + boxMax(d) = (*(meshPosition[d].find(3 * gs))).second; + } + for (d = 0; d < Dim; d++) + if ((x(d) < boxMin(d)) || (x(d) > boxMax(d))) + ERRORMSG("Cartesian::getNearestVertex() - input point is outside" + << " mesh boundary and guard layers; not allowed." << endl); + + // Find coordinate vectors of the vertices just above and just below the + // input point (extremal vertices on cell containing point); + MFLOAT xVertexBelow, xVertexAbove, xVertex; + int vertBelow, vertAbove, vertNearest[Dim]; + for (d = 0; d < Dim; d++) { + vertBelow = -(int(gridSizes[d]) - 1) / 2; + vertAbove = 3 * (int(gridSizes[d]) - 1) / 2; + xVertexBelow = (*(meshPosition[d].find(vertBelow))).second; + xVertexAbove = (*(meshPosition[d].find(vertAbove))).second; + // check for out of bounds + if (x(d) < xVertexBelow) { + vertNearest[d] = vertBelow; + continue; + } + if (x(d) > xVertexAbove) { + vertNearest[d] = vertAbove; + continue; + } + while (vertAbove > vertBelow + 1) { + vertNearest[d] = (vertAbove + vertBelow) / 2; + xVertex = (*(meshPosition[d].find(vertNearest[d]))).second; + if (x(d) > xVertex) { + vertBelow = vertNearest[d]; + xVertexBelow = xVertex; + } else if (x(d) < xVertex) { + vertAbove = vertNearest[d]; + xVertexAbove = xVertex; + } else { // found exact match! + vertAbove = vertBelow; + } + } + if (vertAbove != vertBelow) { + if ((x(d) - xVertexBelow) < (xVertexAbove - x(d))) { + vertNearest[d] = vertBelow; + } else { + vertNearest[d] = vertAbove; + } + } + } + + // Construct the NDIndex for nearest vert get its position vector: + NDIndex ndi; + for (d = 0; d < Dim; d++) + ndi[d] = Index(vertNearest[d], vertNearest[d], 1); + + return ndi; } // Nearest vertex index with all vertex coordinates below (x,y,z): template -NDIndex -Cartesian:: -getVertexBelow(const Vektor& x) const -{ - unsigned int d; - Vektor boxMin, boxMax; - for (d=0; d boxMax(d)) ) - ERRORMSG("Cartesian::getVertexBelow() - input point is outside" - << " mesh boundary and guard layers; not allowed." << endl); - - // Find coordinate vectors of the vertices just below the input point; - MFLOAT xVertexBelow, xVertexAbove, xVertex; - int vertBelow, vertAbove, vertNearest[Dim]; - for (d=0; d xVertexAbove) { - vertNearest[d] = vertAbove; - continue; - } - while (vertAbove > vertBelow+1) { - vertNearest[d] = (vertAbove+vertBelow)/2; - xVertex = (*(meshPosition[d].find(vertNearest[d]))).second; - if (x(d) > xVertex) { - vertBelow = vertNearest[d]; - xVertexBelow = xVertex; - } - else if (x(d) < xVertex) { - vertAbove = vertNearest[d]; - xVertexAbove = xVertex; - } - else { // found exact match! - vertAbove = vertBelow; - } - } - if (vertAbove != vertBelow) { - vertNearest[d] = vertBelow; - } - } - - // Construct the NDIndex for nearest vert get its position vector: - NDIndex ndi; - for (d=0; d Cartesian::getVertexBelow(const Vektor& x) const { + unsigned int d; + Vektor boxMin, boxMax; + for (d = 0; d < Dim; d++) { + int gs = (int(gridSizes[d]) - 1) / 2; + boxMin(d) = (*(meshPosition[d].find(-gs))).second; + boxMax(d) = (*(meshPosition[d].find(3 * gs))).second; + } + for (d = 0; d < Dim; d++) + if ((x(d) < boxMin(d)) || (x(d) > boxMax(d))) + ERRORMSG("Cartesian::getVertexBelow() - input point is outside" + << " mesh boundary and guard layers; not allowed." << endl); + + // Find coordinate vectors of the vertices just below the input point; + MFLOAT xVertexBelow, xVertexAbove, xVertex; + int vertBelow, vertAbove, vertNearest[Dim]; + for (d = 0; d < Dim; d++) { + vertBelow = -(int(gridSizes[d]) - 1) / 2; + vertAbove = 3 * (int(gridSizes[d]) - 1) / 2; + xVertexBelow = (*(meshPosition[d].find(vertBelow))).second; + xVertexAbove = (*(meshPosition[d].find(vertAbove))).second; + // check for out of bounds + if (x(d) < xVertexBelow) { + vertNearest[d] = vertBelow; + continue; + } + if (x(d) > xVertexAbove) { + vertNearest[d] = vertAbove; + continue; + } + while (vertAbove > vertBelow + 1) { + vertNearest[d] = (vertAbove + vertBelow) / 2; + xVertex = (*(meshPosition[d].find(vertNearest[d]))).second; + if (x(d) > xVertex) { + vertBelow = vertNearest[d]; + xVertexBelow = xVertex; + } else if (x(d) < xVertex) { + vertAbove = vertNearest[d]; + xVertexAbove = xVertex; + } else { // found exact match! + vertAbove = vertBelow; + } + } + if (vertAbove != vertBelow) { + vertNearest[d] = vertBelow; + } + } + + // Construct the NDIndex for nearest vert get its position vector: + NDIndex ndi; + for (d = 0; d < Dim; d++) + ndi[d] = Index(vertNearest[d], vertNearest[d], 1); + + return ndi; } // (x,y,z) coordinates of indexed vertex: template -Vektor -Cartesian:: -getVertexPosition(const NDIndex& ndi) const -{ - unsigned int d; - int i; - Vektor vertexPosition; - for (d=0; d 3*(int(gridSizes[d])-1)/2) ) - ERRORMSG("Cartesian::getVertexPosition() error: " << ndi - << " is an NDIndex outside the mesh and guard layers;" - << " not allowed." << endl); - vertexPosition(d) = (*(meshPosition[d].find(i))).second; - } - return vertexPosition; +Vektor Cartesian::getVertexPosition(const NDIndex& ndi) const { + unsigned int d; + int i; + Vektor vertexPosition; + for (d = 0; d < Dim; d++) { + if (ndi[d].length() != 1) + ERRORMSG("Cartesian::getVertexPosition() error: " + << ndi << " is not an NDIndex specifying a single element" << endl); + i = ndi[d].first(); + if ((i < -(int(gridSizes[d]) - 1) / 2) || (i > 3 * (int(gridSizes[d]) - 1) / 2)) + ERRORMSG("Cartesian::getVertexPosition() error: " + << ndi << " is an NDIndex outside the mesh and guard layers;" + << " not allowed." << endl); + vertexPosition(d) = (*(meshPosition[d].find(i))).second; + } + return vertexPosition; } // Field of (x,y,z) coordinates of all vertices: template -Field,Dim,Cartesian,Vert>& -Cartesian:: -getVertexPositionField(Field,Dim, - Cartesian,Vert>& vertexPositions) const -{ - int currentLocation[Dim]; - Vektor vertexPosition; - vertexPositions.Uncompress(); - typename Field,Dim,Cartesian,Vert>::iterator fi, - fi_end = vertexPositions.end(); - for (fi = vertexPositions.begin(); fi != fi_end; ++fi) { - // Construct a NDIndex for each field element: - fi.GetCurrentLocation(currentLocation); - for (unsigned int d=0; d, Dim, Cartesian, Vert>& +Cartesian::getVertexPositionField( + Field, Dim, Cartesian, Vert>& vertexPositions) const { + int currentLocation[Dim]; + Vektor vertexPosition; + vertexPositions.Uncompress(); + typename Field, Dim, Cartesian, Vert>::iterator fi, + fi_end = vertexPositions.end(); + for (fi = vertexPositions.begin(); fi != fi_end; ++fi) { + // Construct a NDIndex for each field element: + fi.GetCurrentLocation(currentLocation); + for (unsigned int d = 0; d < Dim; d++) { + vertexPosition(d) = (*(meshPosition[d].find(currentLocation[d]))).second; + } + *fi = vertexPosition; + } + return vertexPositions; } // (x,y,z) coordinates of indexed cell: template -Vektor -Cartesian:: -getCellPosition(const NDIndex& ndi) const -{ - unsigned int d; - int i; - Vektor cellPosition; - for (d=0; d= 3*(int(gridSizes[d])-1)/2) ) - ERRORMSG("Cartesian::getCellPosition() error: " << ndi - << " is an NDIndex outside the mesh and guard layers;" - << " not allowed." << endl); - cellPosition(d) = 0.5 * ( (*(meshPosition[d].find(i))).second + - (*(meshPosition[d].find(i+1))).second ); - } - return cellPosition; +Vektor Cartesian::getCellPosition(const NDIndex& ndi) const { + unsigned int d; + int i; + Vektor cellPosition; + for (d = 0; d < Dim; d++) { + if (ndi[d].length() != 1) + ERRORMSG("Cartesian::getCellPosition() error: " + << ndi << " is not an NDIndex specifying a single element" << endl); + i = ndi[d].first(); + if ((i < -(int(gridSizes[d]) - 1) / 2) || (i >= 3 * (int(gridSizes[d]) - 1) / 2)) + ERRORMSG("Cartesian::getCellPosition() error: " + << ndi << " is an NDIndex outside the mesh and guard layers;" + << " not allowed." << endl); + cellPosition(d) = + 0.5 * ((*(meshPosition[d].find(i))).second + (*(meshPosition[d].find(i + 1))).second); + } + return cellPosition; } // Field of (x,y,z) coordinates of all cells: template -Field,Dim,Cartesian,Cell>& -Cartesian:: -getCellPositionField(Field,Dim, - Cartesian,Cell>& cellPositions) const -{ - int currentLocation[Dim]; - Vektor cellPosition; - cellPositions.Uncompress(); - typename Field,Dim,Cartesian,Cell>::iterator fi, - fi_end = cellPositions.end(); - for (fi = cellPositions.begin(); fi != fi_end; ++fi) { - // Construct a NDIndex for each field element: - fi.GetCurrentLocation(currentLocation); - for (unsigned int d=0; d, Dim, Cartesian, Cell>& +Cartesian::getCellPositionField( + Field, Dim, Cartesian, Cell>& cellPositions) const { + int currentLocation[Dim]; + Vektor cellPosition; + cellPositions.Uncompress(); + typename Field, Dim, Cartesian, Cell>::iterator fi, + fi_end = cellPositions.end(); + for (fi = cellPositions.begin(); fi != fi_end; ++fi) { + // Construct a NDIndex for each field element: + fi.GetCurrentLocation(currentLocation); + for (unsigned int d = 0; d < Dim; d++) { + cellPosition(d) = 0.5 + * ((*(meshPosition[d].find(currentLocation[d]))).second + + (*(meshPosition[d].find(currentLocation[d] + 1))).second); + } + *fi = cellPosition; + } + return cellPositions; } // Vertex-vertex grid spacing of indexed cell: template -Vektor -Cartesian:: -getDeltaVertex(const NDIndex& ndi) const -{ - // return value - Vektor vertexVertexSpacing(0); - - for (unsigned int d=0; d Cartesian::getDeltaVertex(const NDIndex& ndi) const { + // return value + Vektor vertexVertexSpacing(0); + + for (unsigned int d = 0; d < Dim; d++) { + // endpoints of the index range ... make sure they are in ascending order + int a = ndi[d].first(); + int b = ndi[d].last(); + if (b < a) { + int tmpa = a; + a = b; + b = tmpa; + } + + // make sure we have valid endpoints + if (a < -((int(gridSizes[d]) - 1) / 2) || b >= 3 * (int(gridSizes[d]) - 1) / 2) { + ERRORMSG("Cartesian::getDeltaVertex() error: " + << ndi << " is an NDIndex ranging outside" + << " the mesh and guard layers region; not allowed." << endl); + } + + // add up all the values between the endpoints + // N.B.: following may need modification to be right for periodic Mesh BC: + while (a <= b) + vertexVertexSpacing[d] += (*(meshSpacing[d].find(a++))).second; } - // make sure we have valid endpoints - if (a < -((int(gridSizes[d])-1)/2) || b >= 3*(int(gridSizes[d])-1)/2) { - ERRORMSG("Cartesian::getDeltaVertex() error: " << ndi - << " is an NDIndex ranging outside" - << " the mesh and guard layers region; not allowed." - << endl); - } - - // add up all the values between the endpoints - // N.B.: following may need modification to be right for periodic Mesh BC: - while (a <= b) - vertexVertexSpacing[d] += (*(meshSpacing[d].find(a++))).second; - } - - return vertexVertexSpacing; + return vertexVertexSpacing; } // Field of vertex-vertex grid spacings of all cells: template -Field,Dim,Cartesian,Cell>& -Cartesian:: -getDeltaVertexField(Field,Dim, - Cartesian,Cell>& vertexSpacings) const -{ - int currentLocation[Dim]; - Vektor vertexVertexSpacing; - vertexSpacings.Uncompress(); - typename Field,Dim,Cartesian,Cell>::iterator fi, - fi_end = vertexSpacings.end(); - for (fi = vertexSpacings.begin(); fi != fi_end; ++fi) { - fi.GetCurrentLocation(currentLocation); - for (unsigned int d=0; d, Dim, Cartesian, Cell>& +Cartesian::getDeltaVertexField( + Field, Dim, Cartesian, Cell>& vertexSpacings) const { + int currentLocation[Dim]; + Vektor vertexVertexSpacing; + vertexSpacings.Uncompress(); + typename Field, Dim, Cartesian, Cell>::iterator fi, + fi_end = vertexSpacings.end(); + for (fi = vertexSpacings.begin(); fi != fi_end; ++fi) { + fi.GetCurrentLocation(currentLocation); + for (unsigned int d = 0; d < Dim; d++) + vertexVertexSpacing[d] = (*(meshSpacing[d].find(currentLocation[d]))).second; + *fi = vertexVertexSpacing; + } + return vertexSpacings; } // Cell-cell grid spacing of indexed cell: template -Vektor -Cartesian:: -getDeltaCell(const NDIndex& ndi) const -{ - // return value - Vektor cellCellSpacing(0); - - for (unsigned int d=0; d= 3*(int(gridSizes[d])-1)/2) { - ERRORMSG("Cartesian::getDeltaCell() error: " << ndi - << " is an NDIndex ranging outside" - << " the mesh and guard layers region; not allowed." - << endl); - } - - // add up the contributions along the interval ... - while (a <= b) { - cellCellSpacing[d] += ((*(meshSpacing[d].find(a))).second + - (*(meshSpacing[d].find(a-1))).second) * 0.5; - a++; +Vektor Cartesian::getDeltaCell(const NDIndex& ndi) const { + // return value + Vektor cellCellSpacing(0); + + for (unsigned int d = 0; d < Dim; d++) { + // endpoints of the index range ... make sure they are in ascending order + int a = ndi[d].first(); + int b = ndi[d].last(); + if (b < a) { + int tmpa = a; + a = b; + b = tmpa; + } + + // make sure the endpoints are valid + if (a <= -(int(gridSizes[d]) - 1) / 2 || b >= 3 * (int(gridSizes[d]) - 1) / 2) { + ERRORMSG("Cartesian::getDeltaCell() error: " + << ndi << " is an NDIndex ranging outside" + << " the mesh and guard layers region; not allowed." << endl); + } + + // add up the contributions along the interval ... + while (a <= b) { + cellCellSpacing[d] += + ((*(meshSpacing[d].find(a))).second + (*(meshSpacing[d].find(a - 1))).second) * 0.5; + a++; + } } - } - return cellCellSpacing; + return cellCellSpacing; } // Field of cell-cell grid spacings of all cells: template -Field,Dim,Cartesian,Vert>& -Cartesian:: -getDeltaCellField(Field,Dim, - Cartesian,Vert>& cellSpacings) const -{ - int currentLocation[Dim]; - Vektor cellCellSpacing; - cellSpacings.Uncompress(); - typename Field,Dim,Cartesian,Vert>::iterator fi, - fi_end = cellSpacings.end(); - for (fi = cellSpacings.begin(); fi != fi_end; ++fi) { - fi.GetCurrentLocation(currentLocation); - for (unsigned int d=0; d, Dim, Cartesian, Vert>& +Cartesian::getDeltaCellField( + Field, Dim, Cartesian, Vert>& cellSpacings) const { + int currentLocation[Dim]; + Vektor cellCellSpacing; + cellSpacings.Uncompress(); + typename Field, Dim, Cartesian, Vert>::iterator fi, + fi_end = cellSpacings.end(); + for (fi = cellSpacings.begin(); fi != fi_end; ++fi) { + fi.GetCurrentLocation(currentLocation); + for (unsigned int d = 0; d < Dim; d++) + cellCellSpacing[d] += ((*(meshSpacing[d].find(currentLocation[d]))).second + + (*(meshSpacing[d].find(currentLocation[d] - 1))).second) + * 0.5; + *fi = cellCellSpacing; + } + return cellSpacings; } // Array of surface normals to cells adjoining indexed cell: template -Vektor* -Cartesian:: -getSurfaceNormals(const NDIndex& /*ndi*/) const -{ - Vektor* surfaceNormals = new Vektor[2*Dim]; - unsigned int d, i; - for (d=0; d* Cartesian::getSurfaceNormals(const NDIndex& /*ndi*/) const { + Vektor* surfaceNormals = new Vektor[2 * Dim]; + unsigned int d, i; + for (d = 0; d < Dim; d++) { + for (i = 0; i < Dim; i++) { + surfaceNormals[2 * d](i) = 0.0; + surfaceNormals[2 * d + 1](i) = 0.0; + } + surfaceNormals[2 * d](d) = -1.0; + surfaceNormals[2 * d + 1](d) = 1.0; + } + return surfaceNormals; } // Array of (pointers to) Fields of surface normals to all cells: template -void -Cartesian:: -getSurfaceNormalFields(Field, Dim, - Cartesian,Cell>** - surfaceNormalsFields ) const -{ - Vektor* surfaceNormals = new Vektor[2*Dim]; - unsigned int d, i; - for (d=0; d::getSurfaceNormalFields( + Field, Dim, Cartesian, Cell>** surfaceNormalsFields) const { + Vektor* surfaceNormals = new Vektor[2 * Dim]; + unsigned int d, i; + for (d = 0; d < Dim; d++) { + for (i = 0; i < Dim; i++) { + surfaceNormals[2 * d](i) = 0.0; + surfaceNormals[2 * d + 1](i) = 0.0; + } + surfaceNormals[2 * d](d) = -1.0; + surfaceNormals[2 * d + 1](d) = 1.0; + } + for (d = 0; d < 2 * Dim; d++) + assign((*(surfaceNormalsFields[d])), surfaceNormals[d]); + // return surfaceNormalsFields; } // Similar functions, but specify the surface normal to a single face, using // the following numbering convention: 0 means low face of 1st dim, 1 means @@ -1977,68 +1789,64 @@ getSurfaceNormalFields(Field, Dim, // 2nd dim, and so on: // Surface normal to face on indexed cell: template -Vektor -Cartesian:: -getSurfaceNormal(const NDIndex& /*ndi*/, unsigned face) const -{ - Vektor surfaceNormal; - unsigned int d; - // The following bitwise AND logical test returns true if face is odd - // (meaning the "high" or "right" face in the numbering convention) and - // returns false if face is even (meaning the "low" or "left" face in the - // numbering convention): - if ( face & 1 ) { - for (d=0; d Cartesian::getSurfaceNormal(const NDIndex& /*ndi*/, + unsigned face) const { + Vektor surfaceNormal; + unsigned int d; + // The following bitwise AND logical test returns true if face is odd + // (meaning the "high" or "right" face in the numbering convention) and + // returns false if face is even (meaning the "low" or "left" face in the + // numbering convention): + if (face & 1) { + for (d = 0; d < Dim; d++) { + if ((face / 2) == d) { + surfaceNormal(face) = -1.0; + } else { + surfaceNormal(face) = 0.0; + } + } + } else { + for (d = 0; d < Dim; d++) { + if ((face / 2) == d) { + surfaceNormal(face) = 1.0; + } else { + surfaceNormal(face) = 0.0; + } + } + } + return surfaceNormal; } // Field of surface normals to face on all cells: template -Field,Dim,Cartesian,Cell>& -Cartesian:: -getSurfaceNormalField(Field, Dim, - Cartesian,Cell>& surfaceNormalField, - unsigned face) const -{ - Vektor surfaceNormal; - unsigned int d; - // The following bitwise AND logical test returns true if face is odd - // (meaning the "high" or "right" face in the numbering convention) and - // returns false if face is even (meaning the "low" or "left" face in the - // numbering convention): - if ( face & 1 ) { - for (d=0; d, Dim, Cartesian, Cell>& +Cartesian::getSurfaceNormalField( + Field, Dim, Cartesian, Cell>& surfaceNormalField, + unsigned face) const { + Vektor surfaceNormal; + unsigned int d; + // The following bitwise AND logical test returns true if face is odd + // (meaning the "high" or "right" face in the numbering convention) and + // returns false if face is even (meaning the "low" or "left" face in the + // numbering convention): + if (face & 1) { + for (d = 0; d < Dim; d++) { + if ((face / 2) == d) { + surfaceNormal(face) = -1.0; + } else { + surfaceNormal(face) = 0.0; + } + } + } else { + for (d = 0; d < Dim; d++) { + if ((face / 2) == d) { + surfaceNormal(face) = 1.0; + } else { + surfaceNormal(face) = 0.0; + } + } + } + surfaceNormalField = surfaceNormal; + return surfaceNormalField; } // Set up mesh boundary conditions: @@ -2046,140 +1854,116 @@ getSurfaceNormalField(Field, Dim, // MeshBC_E "type" specifies the kind of BC reflective/periodic/none. // One face at a time: template -void -Cartesian:: -set_MeshBC(unsigned face, MeshBC_E meshBCType) -{ - MeshBC[face] = meshBCType; - updateMeshSpacingGuards(face); - // if spacing fields allocated, we must update values - if (hasSpacingFields) storeSpacingFields(); +void Cartesian::set_MeshBC(unsigned face, MeshBC_E meshBCType) { + MeshBC[face] = meshBCType; + updateMeshSpacingGuards(face); + // if spacing fields allocated, we must update values + if (hasSpacingFields) + storeSpacingFields(); } // All faces at once: template -void -Cartesian:: -set_MeshBC(MeshBC_E* meshBCTypes) -{ - for (unsigned int face=0; face < 2*Dim; face++) { - MeshBC[face] = meshBCTypes[face]; - updateMeshSpacingGuards(face); - } - // if spacing fields allocated, we must update values - if (hasSpacingFields) storeSpacingFields(); +void Cartesian::set_MeshBC(MeshBC_E* meshBCTypes) { + for (unsigned int face = 0; face < 2 * Dim; face++) { + MeshBC[face] = meshBCTypes[face]; + updateMeshSpacingGuards(face); + } + // if spacing fields allocated, we must update values + if (hasSpacingFields) + storeSpacingFields(); } // Helper function to update guard layer values of mesh spacings: template -void -Cartesian:: -updateMeshSpacingGuards(int face) -{ - // Apply the current state of the mesh BC to add guards to meshSpacings map - // Assume worst case of needing ngridpts/2 guard layers (for periodic, most - // likely): - int d = face/2; - unsigned int cell, guardLayer; - // The following bitwise AND logical test returns true if face is odd - // (meaning the "high" or "right" face in the numbering convention) and - // returns false if face is even (meaning the "low" or "left" face in - // the numbering convention): - if ( face & 1 ) { - // "High" guard cells: - switch (MeshBC[d*2]) { - case Periodic: - for (guardLayer = 0; guardLayer <= (gridSizes[d]-1)/2; guardLayer++) { - cell = gridSizes[d] - 1 + guardLayer; - (meshSpacing[d])[cell] = (meshSpacing[d])[guardLayer]; - (meshPosition[d])[cell+1] = (meshPosition[d])[cell] + - (meshSpacing[d])[cell]; - } - break; - case Reflective: - for (guardLayer = 0; guardLayer <= (gridSizes[d]-1)/2; guardLayer++) { - cell = gridSizes[d] - 1 + guardLayer; - (meshSpacing[d])[cell] = (meshSpacing[d])[cell - guardLayer - 1]; - (meshPosition[d])[cell+1] = (meshPosition[d])[cell] + - (meshSpacing[d])[cell]; - } - break; - case NoBC: - for (guardLayer = 0; guardLayer <= (gridSizes[d]-1)/2; guardLayer++) { - cell = gridSizes[d] - 1 + guardLayer; - (meshSpacing[d])[cell] = 0; - (meshPosition[d])[cell+1] = (meshPosition[d])[cell] + - (meshSpacing[d])[cell]; - } - break; - default: - ERRORMSG("Cartesian::updateMeshSpacingGuards(): unknown MeshBC type" - << endl); - break; - } - } - else { - // "Low" guard cells: - switch (MeshBC[d]) { - case Periodic: - for (guardLayer = 0; guardLayer <= (gridSizes[d]-1)/2; guardLayer++) { - cell = -1 - guardLayer; - (meshSpacing[d])[cell] = (meshSpacing[d])[gridSizes[d] + cell]; - (meshPosition[d])[cell] = (meshPosition[d])[cell+1] - - (meshSpacing[d])[cell]; - } - break; - case Reflective: - for (guardLayer = 0; guardLayer <= (gridSizes[d]-1)/2; guardLayer++) { - cell = -1 - guardLayer; - (meshSpacing[d])[cell] = (meshSpacing[d])[-cell - 1]; - (meshPosition[d])[cell] = (meshPosition[d])[cell+1] - - (meshSpacing[d])[cell]; - } - break; - case NoBC: - for (guardLayer = 0; guardLayer <= (gridSizes[d]-1)/2; guardLayer++) { - cell = -1 - guardLayer; - (meshSpacing[d])[cell] = 0; - (meshPosition[d])[cell] = (meshPosition[d])[cell+1] - - (meshSpacing[d])[cell]; - } - break; - default: - ERRORMSG("Cartesian::updateMeshSpacingGuards(): unknown MeshBC type" - << endl); - break; - } - } +void Cartesian::updateMeshSpacingGuards(int face) { + // Apply the current state of the mesh BC to add guards to meshSpacings map + // Assume worst case of needing ngridpts/2 guard layers (for periodic, most + // likely): + int d = face / 2; + unsigned int cell, guardLayer; + // The following bitwise AND logical test returns true if face is odd + // (meaning the "high" or "right" face in the numbering convention) and + // returns false if face is even (meaning the "low" or "left" face in + // the numbering convention): + if (face & 1) { + // "High" guard cells: + switch (MeshBC[d * 2]) { + case Periodic: + for (guardLayer = 0; guardLayer <= (gridSizes[d] - 1) / 2; guardLayer++) { + cell = gridSizes[d] - 1 + guardLayer; + (meshSpacing[d])[cell] = (meshSpacing[d])[guardLayer]; + (meshPosition[d])[cell + 1] = (meshPosition[d])[cell] + (meshSpacing[d])[cell]; + } + break; + case Reflective: + for (guardLayer = 0; guardLayer <= (gridSizes[d] - 1) / 2; guardLayer++) { + cell = gridSizes[d] - 1 + guardLayer; + (meshSpacing[d])[cell] = (meshSpacing[d])[cell - guardLayer - 1]; + (meshPosition[d])[cell + 1] = (meshPosition[d])[cell] + (meshSpacing[d])[cell]; + } + break; + case NoBC: + for (guardLayer = 0; guardLayer <= (gridSizes[d] - 1) / 2; guardLayer++) { + cell = gridSizes[d] - 1 + guardLayer; + (meshSpacing[d])[cell] = 0; + (meshPosition[d])[cell + 1] = (meshPosition[d])[cell] + (meshSpacing[d])[cell]; + } + break; + default: + ERRORMSG("Cartesian::updateMeshSpacingGuards(): unknown MeshBC type" << endl); + break; + } + } else { + // "Low" guard cells: + switch (MeshBC[d]) { + case Periodic: + for (guardLayer = 0; guardLayer <= (gridSizes[d] - 1) / 2; guardLayer++) { + cell = -1 - guardLayer; + (meshSpacing[d])[cell] = (meshSpacing[d])[gridSizes[d] + cell]; + (meshPosition[d])[cell] = (meshPosition[d])[cell + 1] - (meshSpacing[d])[cell]; + } + break; + case Reflective: + for (guardLayer = 0; guardLayer <= (gridSizes[d] - 1) / 2; guardLayer++) { + cell = -1 - guardLayer; + (meshSpacing[d])[cell] = (meshSpacing[d])[-cell - 1]; + (meshPosition[d])[cell] = (meshPosition[d])[cell + 1] - (meshSpacing[d])[cell]; + } + break; + case NoBC: + for (guardLayer = 0; guardLayer <= (gridSizes[d] - 1) / 2; guardLayer++) { + cell = -1 - guardLayer; + (meshSpacing[d])[cell] = 0; + (meshPosition[d])[cell] = (meshPosition[d])[cell + 1] - (meshSpacing[d])[cell]; + } + break; + default: + ERRORMSG("Cartesian::updateMeshSpacingGuards(): unknown MeshBC type" << endl); + break; + } + } } // Get mesh boundary conditions: // One face at a time template -MeshBC_E -Cartesian:: -get_MeshBC(unsigned face) const -{ - MeshBC_E mb; - mb = MeshBC[face]; - return mb; +MeshBC_E Cartesian::get_MeshBC(unsigned face) const { + MeshBC_E mb; + mb = MeshBC[face]; + return mb; } // All faces at once template -MeshBC_E* -Cartesian:: -get_MeshBC() const -{ - MeshBC_E* mb = new MeshBC_E[2*Dim]; - for (unsigned int b=0; b < 2*Dim; b++) mb[b] = MeshBC[b]; - return mb; +MeshBC_E* Cartesian::get_MeshBC() const { + MeshBC_E* mb = new MeshBC_E[2 * Dim]; + for (unsigned int b = 0; b < 2 * Dim; b++) + mb[b] = MeshBC[b]; + return mb; } - - //-------------------------------------------------------------------------- // Global functions //-------------------------------------------------------------------------- - //***************************************************************************** // Stuff taken from old Cartesian.h, modified for new nonuniform Cartesian: //***************************************************************************** @@ -2187,120 +1971,107 @@ get_MeshBC() const //---------------------------------------------------------------------- // Divergence Vektor/Vert -> Scalar/Cell //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,Cell>& -Div(Field,1U,Cartesian<1U,MFLOAT>,Vert>& x, - Field,Cell>& r) -{ - PAssert_EQ(x.get_mesh().hasSpacingFields, true); - BareField,1U>& vertSpacings = *(x.get_mesh().VertSpacings); - const NDIndex<1U>& domain = r.getDomain(); - Index I = domain[0]; - r[I] = - dot(x[I ], x.get_mesh().Dvc[0]/vertSpacings[I]) + - dot(x[I+1], x.get_mesh().Dvc[1]/vertSpacings[I]); - return r; -} -//---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,Cell>& -Div(Field,2U,Cartesian<2U,MFLOAT>,Vert>& x, - Field,Cell>& r) -{ - PAssert_EQ(x.get_mesh().hasSpacingFields, true); - BareField,2U>& vertSpacings = *(x.get_mesh().VertSpacings); - const NDIndex<2U>& domain = r.getDomain(); - Index I = domain[0]; - Index J = domain[1]; - r[I][J] = - dot(x[I ][J ], x.get_mesh().Dvc[0]/vertSpacings[I][J]) + - dot(x[I+1][J ], x.get_mesh().Dvc[1]/vertSpacings[I][J]) + - dot(x[I ][J+1], x.get_mesh().Dvc[2]/vertSpacings[I][J]) + - dot(x[I+1][J+1], x.get_mesh().Dvc[3]/vertSpacings[I][J]); - return r; -} -//---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,Cell>& -Div(Field,3U,Cartesian<3U,MFLOAT>,Vert>& x, - Field,Cell>& r) -{ - PAssert_EQ(x.get_mesh().hasSpacingFields, true); - BareField,3U>& vertSpacings = *(x.get_mesh().VertSpacings); - const NDIndex<3U>& domain = r.getDomain(); - Index I = domain[0]; - Index J = domain[1]; - Index K = domain[2]; - r[I][J][K] = - dot(x[I ][J ][K ], x.get_mesh().Dvc[0]/vertSpacings[I][J][K]) + - dot(x[I+1][J ][K ], x.get_mesh().Dvc[1]/vertSpacings[I][J][K]) + - dot(x[I ][J+1][K ], x.get_mesh().Dvc[2]/vertSpacings[I][J][K]) + - dot(x[I+1][J+1][K ], x.get_mesh().Dvc[3]/vertSpacings[I][J][K]) + - dot(x[I ][J ][K+1], x.get_mesh().Dvc[4]/vertSpacings[I][J][K]) + - dot(x[I+1][J ][K+1], x.get_mesh().Dvc[5]/vertSpacings[I][J][K]) + - dot(x[I ][J+1][K+1], x.get_mesh().Dvc[6]/vertSpacings[I][J][K]) + - dot(x[I+1][J+1][K+1], x.get_mesh().Dvc[7]/vertSpacings[I][J][K]); - return r; +template +Field, Cell>& Div( + Field, 1U, Cartesian<1U, MFLOAT>, Vert>& x, + Field, Cell>& r) { + PAssert_EQ(x.get_mesh().hasSpacingFields, true); + BareField, 1U>& vertSpacings = *(x.get_mesh().VertSpacings); + const NDIndex<1U>& domain = r.getDomain(); + Index I = domain[0]; + r[I] = dot(x[I], x.get_mesh().Dvc[0] / vertSpacings[I]) + + dot(x[I + 1], x.get_mesh().Dvc[1] / vertSpacings[I]); + return r; +} +//---------------------------------------------------------------------- +template +Field, Cell>& Div( + Field, 2U, Cartesian<2U, MFLOAT>, Vert>& x, + Field, Cell>& r) { + PAssert_EQ(x.get_mesh().hasSpacingFields, true); + BareField, 2U>& vertSpacings = *(x.get_mesh().VertSpacings); + const NDIndex<2U>& domain = r.getDomain(); + Index I = domain[0]; + Index J = domain[1]; + r[I][J] = dot(x[I][J], x.get_mesh().Dvc[0] / vertSpacings[I][J]) + + dot(x[I + 1][J], x.get_mesh().Dvc[1] / vertSpacings[I][J]) + + dot(x[I][J + 1], x.get_mesh().Dvc[2] / vertSpacings[I][J]) + + dot(x[I + 1][J + 1], x.get_mesh().Dvc[3] / vertSpacings[I][J]); + return r; +} +//---------------------------------------------------------------------- +template +Field, Cell>& Div( + Field, 3U, Cartesian<3U, MFLOAT>, Vert>& x, + Field, Cell>& r) { + PAssert_EQ(x.get_mesh().hasSpacingFields, true); + BareField, 3U>& vertSpacings = *(x.get_mesh().VertSpacings); + const NDIndex<3U>& domain = r.getDomain(); + Index I = domain[0]; + Index J = domain[1]; + Index K = domain[2]; + r[I][J][K] = dot(x[I][J][K], x.get_mesh().Dvc[0] / vertSpacings[I][J][K]) + + dot(x[I + 1][J][K], x.get_mesh().Dvc[1] / vertSpacings[I][J][K]) + + dot(x[I][J + 1][K], x.get_mesh().Dvc[2] / vertSpacings[I][J][K]) + + dot(x[I + 1][J + 1][K], x.get_mesh().Dvc[3] / vertSpacings[I][J][K]) + + dot(x[I][J][K + 1], x.get_mesh().Dvc[4] / vertSpacings[I][J][K]) + + dot(x[I + 1][J][K + 1], x.get_mesh().Dvc[5] / vertSpacings[I][J][K]) + + dot(x[I][J + 1][K + 1], x.get_mesh().Dvc[6] / vertSpacings[I][J][K]) + + dot(x[I + 1][J + 1][K + 1], x.get_mesh().Dvc[7] / vertSpacings[I][J][K]); + return r; } //---------------------------------------------------------------------- // Divergence Vektor/Cell -> Scalar/Vert //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,Vert>& -Div(Field,1U,Cartesian<1U,MFLOAT>,Cell>& x, - Field,Vert>& r) -{ - PAssert_EQ(x.get_mesh().hasSpacingFields, true); - BareField,1U>& cellSpacings = *(x.get_mesh().CellSpacings); - const NDIndex<1U>& domain = r.getDomain(); - Index I = domain[0]; - r[I] = - dot(x[I-1], x.get_mesh().Dvc[0]/cellSpacings[I]) + - dot(x[I ], x.get_mesh().Dvc[1]/cellSpacings[I]); - return r; +template +Field, Vert>& Div( + Field, 1U, Cartesian<1U, MFLOAT>, Cell>& x, + Field, Vert>& r) { + PAssert_EQ(x.get_mesh().hasSpacingFields, true); + BareField, 1U>& cellSpacings = *(x.get_mesh().CellSpacings); + const NDIndex<1U>& domain = r.getDomain(); + Index I = domain[0]; + r[I] = dot(x[I - 1], x.get_mesh().Dvc[0] / cellSpacings[I]) + + dot(x[I], x.get_mesh().Dvc[1] / cellSpacings[I]); + return r; +} +//---------------------------------------------------------------------- +template +Field, Vert>& Div( + Field, 2U, Cartesian<2U, MFLOAT>, Cell>& x, + Field, Vert>& r) { + PAssert_EQ(x.get_mesh().hasSpacingFields, true); + BareField, 2U>& cellSpacings = *(x.get_mesh().CellSpacings); + const NDIndex<2U>& domain = r.getDomain(); + Index I = domain[0]; + Index J = domain[1]; + r[I][J] = dot(x[I - 1][J - 1], x.get_mesh().Dvc[0] / cellSpacings[I][J]) + + dot(x[I][J - 1], x.get_mesh().Dvc[1] / cellSpacings[I][J]) + + dot(x[I - 1][J], x.get_mesh().Dvc[2] / cellSpacings[I][J]) + + dot(x[I][J], x.get_mesh().Dvc[3] / cellSpacings[I][J]); + return r; +} +//---------------------------------------------------------------------- +template +Field, Vert>& Div( + Field, 3U, Cartesian<3U, MFLOAT>, Cell>& x, + Field, Vert>& r) { + PAssert_EQ(x.get_mesh().hasSpacingFields, true); + BareField, 3U>& cellSpacings = *(x.get_mesh().CellSpacings); + const NDIndex<3U>& domain = r.getDomain(); + Index I = domain[0]; + Index J = domain[1]; + Index K = domain[2]; + r[I][J][K] = dot(x[I - 1][J - 1][K - 1], x.get_mesh().Dvc[0] / cellSpacings[I][J][K]) + + dot(x[I][J - 1][K - 1], x.get_mesh().Dvc[1] / cellSpacings[I][J][K]) + + dot(x[I - 1][J][K - 1], x.get_mesh().Dvc[2] / cellSpacings[I][J][K]) + + dot(x[I][J][K - 1], x.get_mesh().Dvc[3] / cellSpacings[I][J][K]) + + dot(x[I - 1][J - 1][K], x.get_mesh().Dvc[4] / cellSpacings[I][J][K]) + + dot(x[I][J - 1][K], x.get_mesh().Dvc[5] / cellSpacings[I][J][K]) + + dot(x[I - 1][J][K], x.get_mesh().Dvc[6] / cellSpacings[I][J][K]) + + dot(x[I][J][K], x.get_mesh().Dvc[7] / cellSpacings[I][J][K]); + return r; } -//---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,Vert>& -Div(Field,2U,Cartesian<2U,MFLOAT>,Cell>& x, - Field,Vert>& r) -{ - PAssert_EQ(x.get_mesh().hasSpacingFields, true); - BareField,2U>& cellSpacings = *(x.get_mesh().CellSpacings); - const NDIndex<2U>& domain = r.getDomain(); - Index I = domain[0]; - Index J = domain[1]; - r[I][J] = - dot(x[I-1][J-1], x.get_mesh().Dvc[0]/cellSpacings[I][J]) + - dot(x[I ][J-1], x.get_mesh().Dvc[1]/cellSpacings[I][J]) + - dot(x[I-1][J ], x.get_mesh().Dvc[2]/cellSpacings[I][J]) + - dot(x[I ][J ], x.get_mesh().Dvc[3]/cellSpacings[I][J]); - return r; -} -//---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,Vert>& -Div(Field,3U,Cartesian<3U,MFLOAT>,Cell>& x, - Field,Vert>& r) -{ - PAssert_EQ(x.get_mesh().hasSpacingFields, true); - BareField,3U>& cellSpacings = *(x.get_mesh().CellSpacings); - const NDIndex<3U>& domain = r.getDomain(); - Index I = domain[0]; - Index J = domain[1]; - Index K = domain[2]; - r[I][J][K] = - dot(x[I-1][J-1][K-1], x.get_mesh().Dvc[0]/cellSpacings[I][J][K]) + - dot(x[I ][J-1][K-1], x.get_mesh().Dvc[1]/cellSpacings[I][J][K]) + - dot(x[I-1][J ][K-1], x.get_mesh().Dvc[2]/cellSpacings[I][J][K]) + - dot(x[I ][J ][K-1], x.get_mesh().Dvc[3]/cellSpacings[I][J][K]) + - dot(x[I-1][J-1][K ], x.get_mesh().Dvc[4]/cellSpacings[I][J][K]) + - dot(x[I ][J-1][K ], x.get_mesh().Dvc[5]/cellSpacings[I][J][K]) + - dot(x[I-1][J ][K ], x.get_mesh().Dvc[6]/cellSpacings[I][J][K]) + - dot(x[I ][J ][K ], x.get_mesh().Dvc[7]/cellSpacings[I][J][K]); - return r; -} - // TJW: I've attempted to update these differential operators from uniform // cartesian implementations to workfor (nonuniform) Cartesian class, but they @@ -2318,71 +2089,63 @@ Div(Field,3U,Cartesian<3U,MFLOAT>,Cell>& x, // Divergence Vektor/Vert -> Scalar/Vert // (Re-coded 1/20/1998 tjw. Hope it's right....???) //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,Vert>& -Div(Field,1U,Cartesian<1U,MFLOAT>,Vert>& x, - Field,Vert>& r) -{ - PAssert_EQ(x.get_mesh().hasSpacingFields, true); - BareField,1U>& vS = *(x.get_mesh().VertSpacings); - const NDIndex<1U>& domain = r.getDomain(); - Index I = domain[0]; - Vektor idx; - idx[0] = 1.0; - r[I] = dot(idx, (x[I+1] - x[I-1])/(vS[I ] + vS[I-1])); - return r; -} -//---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,Vert>& -Div(Field,2U,Cartesian<2U,MFLOAT>,Vert>& x, - Field,Vert>& r) -{ - PAssert_EQ(x.get_mesh().hasSpacingFields, true); - BareField,2U>& vS = *(x.get_mesh().VertSpacings); - const NDIndex<2U>& domain = r.getDomain(); - Index I = domain[0]; - Index J = domain[1]; - Vektor idx,idy; - idx[0] = 1.0; - idx[1] = 0.0; - idy[0] = 0.0; - idy[1] = 1.0; - r[I][J] = - dot(idx, (x[I+1][J ] - x[I-1][J ])/(vS[I ][J ] + vS[I-1][J ])) + - dot(idy, (x[I ][J+1] - x[I ][J-1])/(vS[I ][J ] + vS[I ][J-1])); - return r; -} -//---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,Vert>& -Div(Field,3U,Cartesian<3U,MFLOAT>,Vert>& x, - Field,Vert>& r) -{ - PAssert_EQ(x.get_mesh().hasSpacingFields, true); - BareField,3U>& vS = *(x.get_mesh().VertSpacings); - const NDIndex<3U>& domain = r.getDomain(); - Index I = domain[0]; - Index J = domain[1]; - Index K = domain[2]; - Vektor idx,idy,idz; - idx[0] = 1.0; - idx[1] = 0.0; - idx[2] = 0.0; - idy[0] = 0.0; - idy[1] = 1.0; - idy[2] = 0.0; - idz[0] = 0.0; - idz[1] = 0.0; - idz[2] = 1.0; - r[I][J][K] = - dot(idx, ((x[I+1][J ][K ] - x[I-1][J ][K ])/ - (vS[I ][J ][K ] + vS[I-1][J ][K ]))) + - dot(idy, ((x[I ][J+1][K ] - x[I ][J-1][K ])/ - (vS[I ][J ][K ] + vS[I ][J-1][K ]))) + - dot(idz, ((x[I ][J ][K+1] - x[I ][J ][K-1])/ - (vS[I ][J ][K ] + vS[I ][J ][K-1]))); - return r; +template +Field, Vert>& Div( + Field, 1U, Cartesian<1U, MFLOAT>, Vert>& x, + Field, Vert>& r) { + PAssert_EQ(x.get_mesh().hasSpacingFields, true); + BareField, 1U>& vS = *(x.get_mesh().VertSpacings); + const NDIndex<1U>& domain = r.getDomain(); + Index I = domain[0]; + Vektor idx; + idx[0] = 1.0; + r[I] = dot(idx, (x[I + 1] - x[I - 1]) / (vS[I] + vS[I - 1])); + return r; +} +//---------------------------------------------------------------------- +template +Field, Vert>& Div( + Field, 2U, Cartesian<2U, MFLOAT>, Vert>& x, + Field, Vert>& r) { + PAssert_EQ(x.get_mesh().hasSpacingFields, true); + BareField, 2U>& vS = *(x.get_mesh().VertSpacings); + const NDIndex<2U>& domain = r.getDomain(); + Index I = domain[0]; + Index J = domain[1]; + Vektor idx, idy; + idx[0] = 1.0; + idx[1] = 0.0; + idy[0] = 0.0; + idy[1] = 1.0; + r[I][J] = dot(idx, (x[I + 1][J] - x[I - 1][J]) / (vS[I][J] + vS[I - 1][J])) + + dot(idy, (x[I][J + 1] - x[I][J - 1]) / (vS[I][J] + vS[I][J - 1])); + return r; +} +//---------------------------------------------------------------------- +template +Field, Vert>& Div( + Field, 3U, Cartesian<3U, MFLOAT>, Vert>& x, + Field, Vert>& r) { + PAssert_EQ(x.get_mesh().hasSpacingFields, true); + BareField, 3U>& vS = *(x.get_mesh().VertSpacings); + const NDIndex<3U>& domain = r.getDomain(); + Index I = domain[0]; + Index J = domain[1]; + Index K = domain[2]; + Vektor idx, idy, idz; + idx[0] = 1.0; + idx[1] = 0.0; + idx[2] = 0.0; + idy[0] = 0.0; + idy[1] = 1.0; + idy[2] = 0.0; + idz[0] = 0.0; + idz[1] = 0.0; + idz[2] = 1.0; + r[I][J][K] = dot(idx, ((x[I + 1][J][K] - x[I - 1][J][K]) / (vS[I][J][K] + vS[I - 1][J][K]))) + + dot(idy, ((x[I][J + 1][K] - x[I][J - 1][K]) / (vS[I][J][K] + vS[I][J - 1][K]))) + + dot(idz, ((x[I][J][K + 1] - x[I][J][K - 1]) / (vS[I][J][K] + vS[I][J][K - 1]))); + return r; } //---------------------------------------------------------------------- // Divergence Vektor/Cell -> Scalar/Cell (???right? tjw 3/10/97) @@ -2391,305 +2154,273 @@ Div(Field,3U,Cartesian<3U,MFLOAT>,Vert>& x, // the 1D denom should be (cs[I+1] + cs[I]) as I see it. // This one wasn't in test/simple/TestCartesian.cpp. //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,Cell>& -Div(Field,1U,Cartesian<1U,MFLOAT>,Cell>& x, - Field,Cell>& r) -{ - PAssert_EQ(x.get_mesh().hasSpacingFields, true); - BareField,1U>& cS = *(x.get_mesh().CellSpacings); - const NDIndex<1U>& domain = r.getDomain(); - Index I = domain[0]; - Vektor idx; - idx[0] = 1.0; - r[I] = dot(idx, (x[I+1] - x[I-1])/(cS[I ] + cS[I-1])); - return r; -} -//---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,Cell>& -Div(Field,2U,Cartesian<2U,MFLOAT>,Cell>& x, - Field,Cell>& r) -{ - PAssert_EQ(x.get_mesh().hasSpacingFields, true); - BareField,2U>& cS = *(x.get_mesh().CellSpacings); - const NDIndex<2U>& domain = r.getDomain(); - Index I = domain[0]; - Index J = domain[1]; - Vektor idx,idy; - idx[0] = 1.0; - idx[1] = 0.0; - idy[0] = 0.0; - idy[1] = 1.0; - r[I][J] = - dot(idx, (x[I+1][J ] - x[I-1][J ])/(cS[I ][J ] + cS[I-1][J ])) + - dot(idy, (x[I ][J+1] - x[I ][J-1])/(cS[I ][J ] + cS[I ][J-1])); - return r; -} -//---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,Cell>& -Div(Field,3U,Cartesian<3U,MFLOAT>,Cell>& x, - Field,Cell>& r) -{ - PAssert_EQ(x.get_mesh().hasSpacingFields, true); - BareField,3U>& cS = *(x.get_mesh().CellSpacings); - const NDIndex<3U>& domain = r.getDomain(); - Index I = domain[0]; - Index J = domain[1]; - Index K = domain[2]; - Vektor idx,idy,idz; - idx[0] = 1.0; - idx[1] = 0.0; - idx[2] = 0.0; - idy[0] = 0.0; - idy[1] = 1.0; - idy[2] = 0.0; - idz[0] = 0.0; - idz[1] = 0.0; - idz[2] = 1.0; - r[I][J][K] = - dot(idx, ((x[I+1][J ][K ] - x[I-1][J ][K ])/ - (cS[I ][J ][K ] + cS[I-1][J ][K ]))) + - dot(idy, ((x[I ][J+1][K ] - x[I ][J-1][K ])/ - (cS[I ][J ][K ] + cS[I ][J-1][K ]))) + - dot(idz, ((x[I ][J ][K+1] - x[I ][J ][K-1])/ - (cS[I ][J ][K ] + cS[I ][J ][K-1]))); - return r; +template +Field, Cell>& Div( + Field, 1U, Cartesian<1U, MFLOAT>, Cell>& x, + Field, Cell>& r) { + PAssert_EQ(x.get_mesh().hasSpacingFields, true); + BareField, 1U>& cS = *(x.get_mesh().CellSpacings); + const NDIndex<1U>& domain = r.getDomain(); + Index I = domain[0]; + Vektor idx; + idx[0] = 1.0; + r[I] = dot(idx, (x[I + 1] - x[I - 1]) / (cS[I] + cS[I - 1])); + return r; +} +//---------------------------------------------------------------------- +template +Field, Cell>& Div( + Field, 2U, Cartesian<2U, MFLOAT>, Cell>& x, + Field, Cell>& r) { + PAssert_EQ(x.get_mesh().hasSpacingFields, true); + BareField, 2U>& cS = *(x.get_mesh().CellSpacings); + const NDIndex<2U>& domain = r.getDomain(); + Index I = domain[0]; + Index J = domain[1]; + Vektor idx, idy; + idx[0] = 1.0; + idx[1] = 0.0; + idy[0] = 0.0; + idy[1] = 1.0; + r[I][J] = dot(idx, (x[I + 1][J] - x[I - 1][J]) / (cS[I][J] + cS[I - 1][J])) + + dot(idy, (x[I][J + 1] - x[I][J - 1]) / (cS[I][J] + cS[I][J - 1])); + return r; +} +//---------------------------------------------------------------------- +template +Field, Cell>& Div( + Field, 3U, Cartesian<3U, MFLOAT>, Cell>& x, + Field, Cell>& r) { + PAssert_EQ(x.get_mesh().hasSpacingFields, true); + BareField, 3U>& cS = *(x.get_mesh().CellSpacings); + const NDIndex<3U>& domain = r.getDomain(); + Index I = domain[0]; + Index J = domain[1]; + Index K = domain[2]; + Vektor idx, idy, idz; + idx[0] = 1.0; + idx[1] = 0.0; + idx[2] = 0.0; + idy[0] = 0.0; + idy[1] = 1.0; + idy[2] = 0.0; + idz[0] = 0.0; + idz[1] = 0.0; + idz[2] = 1.0; + r[I][J][K] = dot(idx, ((x[I + 1][J][K] - x[I - 1][J][K]) / (cS[I][J][K] + cS[I - 1][J][K]))) + + dot(idy, ((x[I][J + 1][K] - x[I][J - 1][K]) / (cS[I][J][K] + cS[I][J - 1][K]))) + + dot(idz, ((x[I][J][K + 1] - x[I][J][K - 1]) / (cS[I][J][K] + cS[I][J][K - 1]))); + return r; } //---------------------------------------------------------------------- // Divergence Tenzor/Vert -> Vektor/Cell (???dot right thing? tjw 1/20/1998) //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,1U,Cartesian<1U,MFLOAT>,Cell>& -Div(Field,1U,Cartesian<1U,MFLOAT>,Vert>& x, - Field,1U,Cartesian<1U,MFLOAT>,Cell>& r) -{ - PAssert_EQ(x.get_mesh().hasSpacingFields, true); - BareField,1U>& vertSpacings = *(x.get_mesh().VertSpacings); - const NDIndex<1U>& domain = r.getDomain(); - Index I = domain[0]; - r[I] = - dot(x[I ], x.get_mesh().Dvc[0]/vertSpacings[I]) + - dot(x[I+1], x.get_mesh().Dvc[1]/vertSpacings[I]); - return r; -} -//---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,2U,Cartesian<2U,MFLOAT>,Cell>& -Div(Field,2U,Cartesian<2U,MFLOAT>,Vert>& x, - Field,2U,Cartesian<2U,MFLOAT>,Cell>& r) -{ - PAssert_EQ(x.get_mesh().hasSpacingFields, true); - BareField,2U>& vertSpacings = *(x.get_mesh().VertSpacings); - const NDIndex<2U>& domain = r.getDomain(); - Index I = domain[0]; - Index J = domain[1]; - r[I][J] = - dot(x[I ][J ], x.get_mesh().Dvc[0]/vertSpacings[I][J]) + - dot(x[I+1][J ], x.get_mesh().Dvc[1]/vertSpacings[I][J]) + - dot(x[I ][J+1], x.get_mesh().Dvc[2]/vertSpacings[I][J]) + - dot(x[I+1][J+1], x.get_mesh().Dvc[3]/vertSpacings[I][J]); - return r; -} -//---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,3U,Cartesian<3U,MFLOAT>,Cell>& -Div(Field,3U,Cartesian<3U,MFLOAT>,Vert>& x, - Field,3U,Cartesian<3U,MFLOAT>,Cell>& r) -{ - PAssert_EQ(x.get_mesh().hasSpacingFields, true); - BareField,3U>& vertSpacings = *(x.get_mesh().VertSpacings); - const NDIndex<3U>& domain = r.getDomain(); - Index I = domain[0]; - Index J = domain[1]; - Index K = domain[2]; - r[I][J][K] = - dot(x[I ][J ][K ], x.get_mesh().Dvc[0]/vertSpacings[I][J][K]) + - dot(x[I+1][J ][K ], x.get_mesh().Dvc[1]/vertSpacings[I][J][K]) + - dot(x[I ][J+1][K ], x.get_mesh().Dvc[2]/vertSpacings[I][J][K]) + - dot(x[I+1][J+1][K ], x.get_mesh().Dvc[3]/vertSpacings[I][J][K]) + - dot(x[I ][J ][K+1], x.get_mesh().Dvc[4]/vertSpacings[I][J][K]) + - dot(x[I+1][J ][K+1], x.get_mesh().Dvc[5]/vertSpacings[I][J][K]) + - dot(x[I ][J+1][K+1], x.get_mesh().Dvc[6]/vertSpacings[I][J][K]) + - dot(x[I+1][J+1][K+1], x.get_mesh().Dvc[7]/vertSpacings[I][J][K]); - return r; +template +Field, 1U, Cartesian<1U, MFLOAT>, Cell>& Div( + Field, 1U, Cartesian<1U, MFLOAT>, Vert>& x, + Field, 1U, Cartesian<1U, MFLOAT>, Cell>& r) { + PAssert_EQ(x.get_mesh().hasSpacingFields, true); + BareField, 1U>& vertSpacings = *(x.get_mesh().VertSpacings); + const NDIndex<1U>& domain = r.getDomain(); + Index I = domain[0]; + r[I] = dot(x[I], x.get_mesh().Dvc[0] / vertSpacings[I]) + + dot(x[I + 1], x.get_mesh().Dvc[1] / vertSpacings[I]); + return r; +} +//---------------------------------------------------------------------- +template +Field, 2U, Cartesian<2U, MFLOAT>, Cell>& Div( + Field, 2U, Cartesian<2U, MFLOAT>, Vert>& x, + Field, 2U, Cartesian<2U, MFLOAT>, Cell>& r) { + PAssert_EQ(x.get_mesh().hasSpacingFields, true); + BareField, 2U>& vertSpacings = *(x.get_mesh().VertSpacings); + const NDIndex<2U>& domain = r.getDomain(); + Index I = domain[0]; + Index J = domain[1]; + r[I][J] = dot(x[I][J], x.get_mesh().Dvc[0] / vertSpacings[I][J]) + + dot(x[I + 1][J], x.get_mesh().Dvc[1] / vertSpacings[I][J]) + + dot(x[I][J + 1], x.get_mesh().Dvc[2] / vertSpacings[I][J]) + + dot(x[I + 1][J + 1], x.get_mesh().Dvc[3] / vertSpacings[I][J]); + return r; +} +//---------------------------------------------------------------------- +template +Field, 3U, Cartesian<3U, MFLOAT>, Cell>& Div( + Field, 3U, Cartesian<3U, MFLOAT>, Vert>& x, + Field, 3U, Cartesian<3U, MFLOAT>, Cell>& r) { + PAssert_EQ(x.get_mesh().hasSpacingFields, true); + BareField, 3U>& vertSpacings = *(x.get_mesh().VertSpacings); + const NDIndex<3U>& domain = r.getDomain(); + Index I = domain[0]; + Index J = domain[1]; + Index K = domain[2]; + r[I][J][K] = dot(x[I][J][K], x.get_mesh().Dvc[0] / vertSpacings[I][J][K]) + + dot(x[I + 1][J][K], x.get_mesh().Dvc[1] / vertSpacings[I][J][K]) + + dot(x[I][J + 1][K], x.get_mesh().Dvc[2] / vertSpacings[I][J][K]) + + dot(x[I + 1][J + 1][K], x.get_mesh().Dvc[3] / vertSpacings[I][J][K]) + + dot(x[I][J][K + 1], x.get_mesh().Dvc[4] / vertSpacings[I][J][K]) + + dot(x[I + 1][J][K + 1], x.get_mesh().Dvc[5] / vertSpacings[I][J][K]) + + dot(x[I][J + 1][K + 1], x.get_mesh().Dvc[6] / vertSpacings[I][J][K]) + + dot(x[I + 1][J + 1][K + 1], x.get_mesh().Dvc[7] / vertSpacings[I][J][K]); + return r; } //---------------------------------------------------------------------- // Divergence SymTenzor/Vert -> Vektor/Cell (???dot right thing? tjw 1/20/1998) //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,1U,Cartesian<1U,MFLOAT>,Cell>& -Div(Field,1U,Cartesian<1U,MFLOAT>,Vert>& x, - Field,1U,Cartesian<1U,MFLOAT>,Cell>& r) -{ - PAssert_EQ(x.get_mesh().hasSpacingFields, true); - BareField,1U>& vertSpacings = *(x.get_mesh().VertSpacings); - const NDIndex<1U>& domain = r.getDomain(); - Index I = domain[0]; - r[I] = - dot(x[I ], x.get_mesh().Dvc[0]/vertSpacings[I]) + - dot(x[I+1], x.get_mesh().Dvc[1]/vertSpacings[I]); - return r; -} -//---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,2U,Cartesian<2U,MFLOAT>,Cell>& -Div(Field,2U,Cartesian<2U,MFLOAT>,Vert>& x, - Field,2U,Cartesian<2U,MFLOAT>,Cell>& r) -{ - PAssert_EQ(x.get_mesh().hasSpacingFields, true); - BareField,2U>& vertSpacings = *(x.get_mesh().VertSpacings); - const NDIndex<2U>& domain = r.getDomain(); - Index I = domain[0]; - Index J = domain[1]; - r[I][J] = - dot(x[I ][J ], x.get_mesh().Dvc[0]/vertSpacings[I][J]) + - dot(x[I+1][J ], x.get_mesh().Dvc[1]/vertSpacings[I][J]) + - dot(x[I ][J+1], x.get_mesh().Dvc[2]/vertSpacings[I][J]) + - dot(x[I+1][J+1], x.get_mesh().Dvc[3]/vertSpacings[I][J]); - return r; -} -//---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,3U,Cartesian<3U,MFLOAT>,Cell>& -Div(Field,3U,Cartesian<3U,MFLOAT>,Vert>& x, - Field,3U,Cartesian<3U,MFLOAT>,Cell>& r) -{ - PAssert_EQ(x.get_mesh().hasSpacingFields, true); - BareField,3U>& vertSpacings = *(x.get_mesh().VertSpacings); - const NDIndex<3U>& domain = r.getDomain(); - Index I = domain[0]; - Index J = domain[1]; - Index K = domain[2]; - r[I][J][K] = - dot(x[I ][J ][K ], x.get_mesh().Dvc[0]/vertSpacings[I][J][K]) + - dot(x[I+1][J ][K ], x.get_mesh().Dvc[1]/vertSpacings[I][J][K]) + - dot(x[I ][J+1][K ], x.get_mesh().Dvc[2]/vertSpacings[I][J][K]) + - dot(x[I+1][J+1][K ], x.get_mesh().Dvc[3]/vertSpacings[I][J][K]) + - dot(x[I ][J ][K+1], x.get_mesh().Dvc[4]/vertSpacings[I][J][K]) + - dot(x[I+1][J ][K+1], x.get_mesh().Dvc[5]/vertSpacings[I][J][K]) + - dot(x[I ][J+1][K+1], x.get_mesh().Dvc[6]/vertSpacings[I][J][K]) + - dot(x[I+1][J+1][K+1], x.get_mesh().Dvc[7]/vertSpacings[I][J][K]); - return r; +template +Field, 1U, Cartesian<1U, MFLOAT>, Cell>& Div( + Field, 1U, Cartesian<1U, MFLOAT>, Vert>& x, + Field, 1U, Cartesian<1U, MFLOAT>, Cell>& r) { + PAssert_EQ(x.get_mesh().hasSpacingFields, true); + BareField, 1U>& vertSpacings = *(x.get_mesh().VertSpacings); + const NDIndex<1U>& domain = r.getDomain(); + Index I = domain[0]; + r[I] = dot(x[I], x.get_mesh().Dvc[0] / vertSpacings[I]) + + dot(x[I + 1], x.get_mesh().Dvc[1] / vertSpacings[I]); + return r; +} +//---------------------------------------------------------------------- +template +Field, 2U, Cartesian<2U, MFLOAT>, Cell>& Div( + Field, 2U, Cartesian<2U, MFLOAT>, Vert>& x, + Field, 2U, Cartesian<2U, MFLOAT>, Cell>& r) { + PAssert_EQ(x.get_mesh().hasSpacingFields, true); + BareField, 2U>& vertSpacings = *(x.get_mesh().VertSpacings); + const NDIndex<2U>& domain = r.getDomain(); + Index I = domain[0]; + Index J = domain[1]; + r[I][J] = dot(x[I][J], x.get_mesh().Dvc[0] / vertSpacings[I][J]) + + dot(x[I + 1][J], x.get_mesh().Dvc[1] / vertSpacings[I][J]) + + dot(x[I][J + 1], x.get_mesh().Dvc[2] / vertSpacings[I][J]) + + dot(x[I + 1][J + 1], x.get_mesh().Dvc[3] / vertSpacings[I][J]); + return r; +} +//---------------------------------------------------------------------- +template +Field, 3U, Cartesian<3U, MFLOAT>, Cell>& Div( + Field, 3U, Cartesian<3U, MFLOAT>, Vert>& x, + Field, 3U, Cartesian<3U, MFLOAT>, Cell>& r) { + PAssert_EQ(x.get_mesh().hasSpacingFields, true); + BareField, 3U>& vertSpacings = *(x.get_mesh().VertSpacings); + const NDIndex<3U>& domain = r.getDomain(); + Index I = domain[0]; + Index J = domain[1]; + Index K = domain[2]; + r[I][J][K] = dot(x[I][J][K], x.get_mesh().Dvc[0] / vertSpacings[I][J][K]) + + dot(x[I + 1][J][K], x.get_mesh().Dvc[1] / vertSpacings[I][J][K]) + + dot(x[I][J + 1][K], x.get_mesh().Dvc[2] / vertSpacings[I][J][K]) + + dot(x[I + 1][J + 1][K], x.get_mesh().Dvc[3] / vertSpacings[I][J][K]) + + dot(x[I][J][K + 1], x.get_mesh().Dvc[4] / vertSpacings[I][J][K]) + + dot(x[I + 1][J][K + 1], x.get_mesh().Dvc[5] / vertSpacings[I][J][K]) + + dot(x[I][J + 1][K + 1], x.get_mesh().Dvc[6] / vertSpacings[I][J][K]) + + dot(x[I + 1][J + 1][K + 1], x.get_mesh().Dvc[7] / vertSpacings[I][J][K]); + return r; } //---------------------------------------------------------------------- // Divergence Tenzor/Cell -> Vektor/Vert (???dot right thing? tjw 1/20/1998) //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,1U,Cartesian<1U,MFLOAT>,Vert>& -Div(Field,1U,Cartesian<1U,MFLOAT>,Cell>& x, - Field,1U,Cartesian<1U,MFLOAT>,Vert>& r) -{ - PAssert_EQ(x.get_mesh().hasSpacingFields, true); - BareField,1U>& cellSpacings = *(x.get_mesh().CellSpacings); - const NDIndex<1U>& domain = r.getDomain(); - Index I = domain[0]; - r[I] = - dot(x[I-1], x.get_mesh().Dvc[0]/cellSpacings[I]) + - dot(x[I ], x.get_mesh().Dvc[1]/cellSpacings[I]); - return r; -} -//---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,2U,Cartesian<2U,MFLOAT>,Vert>& -Div(Field,2U,Cartesian<2U,MFLOAT>,Cell>& x, - Field,2U,Cartesian<2U,MFLOAT>,Vert>& r) -{ - PAssert_EQ(x.get_mesh().hasSpacingFields, true); - BareField,2U>& cellSpacings = *(x.get_mesh().CellSpacings); - const NDIndex<2U>& domain = r.getDomain(); - Index I = domain[0]; - Index J = domain[1]; - r[I][J] = - dot(x[I-1][J-1], x.get_mesh().Dvc[0]/cellSpacings[I][J]) + - dot(x[I ][J-1], x.get_mesh().Dvc[1]/cellSpacings[I][J]) + - dot(x[I-1][J ], x.get_mesh().Dvc[2]/cellSpacings[I][J]) + - dot(x[I ][J ], x.get_mesh().Dvc[3]/cellSpacings[I][J]); - return r; -} -//---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,3U,Cartesian<3U,MFLOAT>,Vert>& -Div(Field,3U,Cartesian<3U,MFLOAT>,Cell>& x, - Field,3U,Cartesian<3U,MFLOAT>,Vert>& r) -{ - PAssert_EQ(x.get_mesh().hasSpacingFields, true); - BareField,3U>& cellSpacings = *(x.get_mesh().CellSpacings); - const NDIndex<3U>& domain = r.getDomain(); - Index I = domain[0]; - Index J = domain[1]; - Index K = domain[2]; - r[I][J][K] = - dot(x[I-1][J-1][K-1], x.get_mesh().Dvc[0]/cellSpacings[I][J][K]) + - dot(x[I ][J-1][K-1], x.get_mesh().Dvc[1]/cellSpacings[I][J][K]) + - dot(x[I-1][J ][K-1], x.get_mesh().Dvc[2]/cellSpacings[I][J][K]) + - dot(x[I ][J ][K-1], x.get_mesh().Dvc[3]/cellSpacings[I][J][K]) + - dot(x[I-1][J-1][K ], x.get_mesh().Dvc[4]/cellSpacings[I][J][K]) + - dot(x[I ][J-1][K ], x.get_mesh().Dvc[5]/cellSpacings[I][J][K]) + - dot(x[I-1][J ][K ], x.get_mesh().Dvc[6]/cellSpacings[I][J][K]) + - dot(x[I ][J ][K ], x.get_mesh().Dvc[7]/cellSpacings[I][J][K]); - return r; +template +Field, 1U, Cartesian<1U, MFLOAT>, Vert>& Div( + Field, 1U, Cartesian<1U, MFLOAT>, Cell>& x, + Field, 1U, Cartesian<1U, MFLOAT>, Vert>& r) { + PAssert_EQ(x.get_mesh().hasSpacingFields, true); + BareField, 1U>& cellSpacings = *(x.get_mesh().CellSpacings); + const NDIndex<1U>& domain = r.getDomain(); + Index I = domain[0]; + r[I] = dot(x[I - 1], x.get_mesh().Dvc[0] / cellSpacings[I]) + + dot(x[I], x.get_mesh().Dvc[1] / cellSpacings[I]); + return r; +} +//---------------------------------------------------------------------- +template +Field, 2U, Cartesian<2U, MFLOAT>, Vert>& Div( + Field, 2U, Cartesian<2U, MFLOAT>, Cell>& x, + Field, 2U, Cartesian<2U, MFLOAT>, Vert>& r) { + PAssert_EQ(x.get_mesh().hasSpacingFields, true); + BareField, 2U>& cellSpacings = *(x.get_mesh().CellSpacings); + const NDIndex<2U>& domain = r.getDomain(); + Index I = domain[0]; + Index J = domain[1]; + r[I][J] = dot(x[I - 1][J - 1], x.get_mesh().Dvc[0] / cellSpacings[I][J]) + + dot(x[I][J - 1], x.get_mesh().Dvc[1] / cellSpacings[I][J]) + + dot(x[I - 1][J], x.get_mesh().Dvc[2] / cellSpacings[I][J]) + + dot(x[I][J], x.get_mesh().Dvc[3] / cellSpacings[I][J]); + return r; +} +//---------------------------------------------------------------------- +template +Field, 3U, Cartesian<3U, MFLOAT>, Vert>& Div( + Field, 3U, Cartesian<3U, MFLOAT>, Cell>& x, + Field, 3U, Cartesian<3U, MFLOAT>, Vert>& r) { + PAssert_EQ(x.get_mesh().hasSpacingFields, true); + BareField, 3U>& cellSpacings = *(x.get_mesh().CellSpacings); + const NDIndex<3U>& domain = r.getDomain(); + Index I = domain[0]; + Index J = domain[1]; + Index K = domain[2]; + r[I][J][K] = dot(x[I - 1][J - 1][K - 1], x.get_mesh().Dvc[0] / cellSpacings[I][J][K]) + + dot(x[I][J - 1][K - 1], x.get_mesh().Dvc[1] / cellSpacings[I][J][K]) + + dot(x[I - 1][J][K - 1], x.get_mesh().Dvc[2] / cellSpacings[I][J][K]) + + dot(x[I][J][K - 1], x.get_mesh().Dvc[3] / cellSpacings[I][J][K]) + + dot(x[I - 1][J - 1][K], x.get_mesh().Dvc[4] / cellSpacings[I][J][K]) + + dot(x[I][J - 1][K], x.get_mesh().Dvc[5] / cellSpacings[I][J][K]) + + dot(x[I - 1][J][K], x.get_mesh().Dvc[6] / cellSpacings[I][J][K]) + + dot(x[I][J][K], x.get_mesh().Dvc[7] / cellSpacings[I][J][K]); + return r; } //---------------------------------------------------------------------- // Divergence SymTenzor/Cell -> Vektor/Vert (???dot right thing? tjw 1/20/1998) //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,1U,Cartesian<1U,MFLOAT>,Vert>& -Div(Field,1U,Cartesian<1U,MFLOAT>,Cell>& x, - Field,1U,Cartesian<1U,MFLOAT>,Vert>& r) -{ - PAssert_EQ(x.get_mesh().hasSpacingFields, true); - BareField,1U>& cellSpacings = *(x.get_mesh().CellSpacings); - const NDIndex<1U>& domain = r.getDomain(); - Index I = domain[0]; - r[I] = - dot(x[I-1], x.get_mesh().Dvc[0]/cellSpacings[I]) + - dot(x[I ], x.get_mesh().Dvc[1]/cellSpacings[I]); - return r; -} -//---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,2U,Cartesian<2U,MFLOAT>,Vert>& -Div(Field,2U,Cartesian<2U,MFLOAT>,Cell>& x, - Field,2U,Cartesian<2U,MFLOAT>,Vert>& r) -{ - PAssert_EQ(x.get_mesh().hasSpacingFields, true); - BareField,2U>& cellSpacings = *(x.get_mesh().CellSpacings); - const NDIndex<2U>& domain = r.getDomain(); - Index I = domain[0]; - Index J = domain[1]; - r[I][J] = - dot(x[I-1][J-1], x.get_mesh().Dvc[0]/cellSpacings[I][J]) + - dot(x[I ][J-1], x.get_mesh().Dvc[1]/cellSpacings[I][J]) + - dot(x[I-1][J ], x.get_mesh().Dvc[2]/cellSpacings[I][J]) + - dot(x[I ][J ], x.get_mesh().Dvc[3]/cellSpacings[I][J]); - return r; -} -//---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,3U,Cartesian<3U,MFLOAT>,Vert>& -Div(Field,3U,Cartesian<3U,MFLOAT>,Cell>& x, - Field,3U,Cartesian<3U,MFLOAT>,Vert>& r) -{ - PAssert_EQ(x.get_mesh().hasSpacingFields, true); - BareField,3U>& cellSpacings = *(x.get_mesh().CellSpacings); - const NDIndex<3U>& domain = r.getDomain(); - Index I = domain[0]; - Index J = domain[1]; - Index K = domain[2]; - r[I][J][K] = - dot(x[I-1][J-1][K-1], x.get_mesh().Dvc[0]/cellSpacings[I][J][K]) + - dot(x[I ][J-1][K-1], x.get_mesh().Dvc[1]/cellSpacings[I][J][K]) + - dot(x[I-1][J ][K-1], x.get_mesh().Dvc[2]/cellSpacings[I][J][K]) + - dot(x[I ][J ][K-1], x.get_mesh().Dvc[3]/cellSpacings[I][J][K]) + - dot(x[I-1][J-1][K ], x.get_mesh().Dvc[4]/cellSpacings[I][J][K]) + - dot(x[I ][J-1][K ], x.get_mesh().Dvc[5]/cellSpacings[I][J][K]) + - dot(x[I-1][J ][K ], x.get_mesh().Dvc[6]/cellSpacings[I][J][K]) + - dot(x[I ][J ][K ], x.get_mesh().Dvc[7]/cellSpacings[I][J][K]); - return r; +template +Field, 1U, Cartesian<1U, MFLOAT>, Vert>& Div( + Field, 1U, Cartesian<1U, MFLOAT>, Cell>& x, + Field, 1U, Cartesian<1U, MFLOAT>, Vert>& r) { + PAssert_EQ(x.get_mesh().hasSpacingFields, true); + BareField, 1U>& cellSpacings = *(x.get_mesh().CellSpacings); + const NDIndex<1U>& domain = r.getDomain(); + Index I = domain[0]; + r[I] = dot(x[I - 1], x.get_mesh().Dvc[0] / cellSpacings[I]) + + dot(x[I], x.get_mesh().Dvc[1] / cellSpacings[I]); + return r; +} +//---------------------------------------------------------------------- +template +Field, 2U, Cartesian<2U, MFLOAT>, Vert>& Div( + Field, 2U, Cartesian<2U, MFLOAT>, Cell>& x, + Field, 2U, Cartesian<2U, MFLOAT>, Vert>& r) { + PAssert_EQ(x.get_mesh().hasSpacingFields, true); + BareField, 2U>& cellSpacings = *(x.get_mesh().CellSpacings); + const NDIndex<2U>& domain = r.getDomain(); + Index I = domain[0]; + Index J = domain[1]; + r[I][J] = dot(x[I - 1][J - 1], x.get_mesh().Dvc[0] / cellSpacings[I][J]) + + dot(x[I][J - 1], x.get_mesh().Dvc[1] / cellSpacings[I][J]) + + dot(x[I - 1][J], x.get_mesh().Dvc[2] / cellSpacings[I][J]) + + dot(x[I][J], x.get_mesh().Dvc[3] / cellSpacings[I][J]); + return r; +} +//---------------------------------------------------------------------- +template +Field, 3U, Cartesian<3U, MFLOAT>, Vert>& Div( + Field, 3U, Cartesian<3U, MFLOAT>, Cell>& x, + Field, 3U, Cartesian<3U, MFLOAT>, Vert>& r) { + PAssert_EQ(x.get_mesh().hasSpacingFields, true); + BareField, 3U>& cellSpacings = *(x.get_mesh().CellSpacings); + const NDIndex<3U>& domain = r.getDomain(); + Index I = domain[0]; + Index J = domain[1]; + Index K = domain[2]; + r[I][J][K] = dot(x[I - 1][J - 1][K - 1], x.get_mesh().Dvc[0] / cellSpacings[I][J][K]) + + dot(x[I][J - 1][K - 1], x.get_mesh().Dvc[1] / cellSpacings[I][J][K]) + + dot(x[I - 1][J][K - 1], x.get_mesh().Dvc[2] / cellSpacings[I][J][K]) + + dot(x[I][J][K - 1], x.get_mesh().Dvc[3] / cellSpacings[I][J][K]) + + dot(x[I - 1][J - 1][K], x.get_mesh().Dvc[4] / cellSpacings[I][J][K]) + + dot(x[I][J - 1][K], x.get_mesh().Dvc[5] / cellSpacings[I][J][K]) + + dot(x[I - 1][J][K], x.get_mesh().Dvc[6] / cellSpacings[I][J][K]) + + dot(x[I][J][K], x.get_mesh().Dvc[7] / cellSpacings[I][J][K]); + return r; } // END FLAGGED DIFFOPS REGION I @@ -2697,117 +2428,102 @@ Div(Field,3U,Cartesian<3U,MFLOAT>,Cell>& x, //---------------------------------------------------------------------- // Grad Scalar/Vert -> Vektor/Cell //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,1U,Cartesian<1U,MFLOAT>,Cell>& -Grad(Field,Vert>& x, - Field,1U,Cartesian<1U,MFLOAT>,Cell>& r) -{ - PAssert_EQ(x.get_mesh().hasSpacingFields, true); - BareField,1U>& vertSpacings = *(x.get_mesh().VertSpacings); - const NDIndex<1U>& domain = r.getDomain(); - Index I = domain[0]; - r[I] = (x[I ]*x.get_mesh().Dvc[0] + - x[I+1]*x.get_mesh().Dvc[1])/vertSpacings[I]; - return r; -} -//---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,2U,Cartesian<2U,MFLOAT>,Cell>& -Grad(Field,Vert>& x, - Field,2U,Cartesian<2U,MFLOAT>,Cell>& r) -{ - PAssert_EQ(x.get_mesh().hasSpacingFields, true); - BareField,2U>& vertSpacings = *(x.get_mesh().VertSpacings); - const NDIndex<2U>& domain = r.getDomain(); - Index I = domain[0]; - Index J = domain[1]; - r[I][J] = (x[I ][J ]*x.get_mesh().Dvc[0] + - x[I+1][J ]*x.get_mesh().Dvc[1] + - x[I ][J+1]*x.get_mesh().Dvc[2] + - x[I+1][J+1]*x.get_mesh().Dvc[3])/vertSpacings[I][J]; - return r; -} -//---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,3U,Cartesian<3U,MFLOAT>,Cell>& -Grad(Field,Vert>& x, - Field,3U,Cartesian<3U,MFLOAT>,Cell>& r) -{ - PAssert_EQ(x.get_mesh().hasSpacingFields, true); - BareField,3U>& vertSpacings = *(x.get_mesh().VertSpacings); - const NDIndex<3U>& domain = r.getDomain(); - Index I = domain[0]; - Index J = domain[1]; - Index K = domain[2]; - r[I][J][K] = (x[I ][J ][K ]*x.get_mesh().Dvc[0] + - x[I+1][J ][K ]*x.get_mesh().Dvc[1] + - x[I ][J+1][K ]*x.get_mesh().Dvc[2] + - x[I+1][J+1][K ]*x.get_mesh().Dvc[3] + - x[I ][J ][K+1]*x.get_mesh().Dvc[4] + - x[I+1][J ][K+1]*x.get_mesh().Dvc[5] + - x[I ][J+1][K+1]*x.get_mesh().Dvc[6] + - x[I+1][J+1][K+1]*x.get_mesh().Dvc[7])/vertSpacings[I][J][K]; - return r; +template +Field, 1U, Cartesian<1U, MFLOAT>, Cell>& Grad( + Field, Vert>& x, + Field, 1U, Cartesian<1U, MFLOAT>, Cell>& r) { + PAssert_EQ(x.get_mesh().hasSpacingFields, true); + BareField, 1U>& vertSpacings = *(x.get_mesh().VertSpacings); + const NDIndex<1U>& domain = r.getDomain(); + Index I = domain[0]; + r[I] = (x[I] * x.get_mesh().Dvc[0] + x[I + 1] * x.get_mesh().Dvc[1]) / vertSpacings[I]; + return r; +} +//---------------------------------------------------------------------- +template +Field, 2U, Cartesian<2U, MFLOAT>, Cell>& Grad( + Field, Vert>& x, + Field, 2U, Cartesian<2U, MFLOAT>, Cell>& r) { + PAssert_EQ(x.get_mesh().hasSpacingFields, true); + BareField, 2U>& vertSpacings = *(x.get_mesh().VertSpacings); + const NDIndex<2U>& domain = r.getDomain(); + Index I = domain[0]; + Index J = domain[1]; + r[I][J] = (x[I][J] * x.get_mesh().Dvc[0] + x[I + 1][J] * x.get_mesh().Dvc[1] + + x[I][J + 1] * x.get_mesh().Dvc[2] + x[I + 1][J + 1] * x.get_mesh().Dvc[3]) + / vertSpacings[I][J]; + return r; +} +//---------------------------------------------------------------------- +template +Field, 3U, Cartesian<3U, MFLOAT>, Cell>& Grad( + Field, Vert>& x, + Field, 3U, Cartesian<3U, MFLOAT>, Cell>& r) { + PAssert_EQ(x.get_mesh().hasSpacingFields, true); + BareField, 3U>& vertSpacings = *(x.get_mesh().VertSpacings); + const NDIndex<3U>& domain = r.getDomain(); + Index I = domain[0]; + Index J = domain[1]; + Index K = domain[2]; + r[I][J][K] = + (x[I][J][K] * x.get_mesh().Dvc[0] + x[I + 1][J][K] * x.get_mesh().Dvc[1] + + x[I][J + 1][K] * x.get_mesh().Dvc[2] + x[I + 1][J + 1][K] * x.get_mesh().Dvc[3] + + x[I][J][K + 1] * x.get_mesh().Dvc[4] + x[I + 1][J][K + 1] * x.get_mesh().Dvc[5] + + x[I][J + 1][K + 1] * x.get_mesh().Dvc[6] + x[I + 1][J + 1][K + 1] * x.get_mesh().Dvc[7]) + / vertSpacings[I][J][K]; + return r; } //---------------------------------------------------------------------- // Grad Scalar/Cell -> Vektor/Vert // (cellSpacings[I,J,K]->[I-1,....] 1/20/1998 tjw) // (reverted to cellSpacings[I-1,J-1,K-1]->[I,....] 2/2/1998 tjw) //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,1U,Cartesian<1U,MFLOAT>,Vert>& -Grad(Field,Cell>& x, - Field,1U,Cartesian<1U,MFLOAT>,Vert>& r) -{ - PAssert_EQ(x.get_mesh().hasSpacingFields, true); - BareField,1U>& cellSpacings = *(x.get_mesh().CellSpacings); - const NDIndex<1U>& domain = r.getDomain(); - Index I = domain[0]; - r[I] = (x[I-1]*x.get_mesh().Dvc[0] + - x[I ]*x.get_mesh().Dvc[1])/cellSpacings[I]; - return r; -} -//---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,2U,Cartesian<2U,MFLOAT>,Vert>& -Grad(Field,Cell>& x, - Field,2U,Cartesian<2U,MFLOAT>,Vert>& r) -{ - PAssert_EQ(x.get_mesh().hasSpacingFields, true); - BareField,2U>& cellSpacings = *(x.get_mesh().CellSpacings); - const NDIndex<2U>& domain = r.getDomain(); - Index I = domain[0]; - Index J = domain[1]; - r[I][J] = (x[I-1][J-1]*x.get_mesh().Dvc[0] + - x[I ][J-1]*x.get_mesh().Dvc[1] + - x[I-1][J ]*x.get_mesh().Dvc[2] + - x[I ][J ]*x.get_mesh().Dvc[3])/cellSpacings[I][J]; - return r; -} -//---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,3U,Cartesian<3U,MFLOAT>,Vert>& -Grad(Field,Cell>& x, - Field,3U,Cartesian<3U,MFLOAT>,Vert>& r) -{ - PAssert_EQ(x.get_mesh().hasSpacingFields, true); - BareField,3U>& cellSpacings = *(x.get_mesh().CellSpacings); - const NDIndex<3U>& domain = r.getDomain(); - Index I = domain[0]; - Index J = domain[1]; - Index K = domain[2]; - Vektor dvc[1<<3U]; - for (unsigned int d=0; d < 1<<3U; d++) dvc[d] = x.get_mesh().Dvc[d]; - r[I][J][K] = (x[I-1][J-1][K-1]*dvc[0] + - x[I ][J-1][K-1]*dvc[1] + - x[I-1][J ][K-1]*dvc[2] + - x[I ][J ][K-1]*dvc[3] + - x[I-1][J-1][K ]*dvc[4] + - x[I ][J-1][K ]*dvc[5] + - x[I-1][J ][K ]*dvc[6] + - x[I ][J ][K ]*dvc[7])/ - cellSpacings[I][J][K]; - return r; +template +Field, 1U, Cartesian<1U, MFLOAT>, Vert>& Grad( + Field, Cell>& x, + Field, 1U, Cartesian<1U, MFLOAT>, Vert>& r) { + PAssert_EQ(x.get_mesh().hasSpacingFields, true); + BareField, 1U>& cellSpacings = *(x.get_mesh().CellSpacings); + const NDIndex<1U>& domain = r.getDomain(); + Index I = domain[0]; + r[I] = (x[I - 1] * x.get_mesh().Dvc[0] + x[I] * x.get_mesh().Dvc[1]) / cellSpacings[I]; + return r; +} +//---------------------------------------------------------------------- +template +Field, 2U, Cartesian<2U, MFLOAT>, Vert>& Grad( + Field, Cell>& x, + Field, 2U, Cartesian<2U, MFLOAT>, Vert>& r) { + PAssert_EQ(x.get_mesh().hasSpacingFields, true); + BareField, 2U>& cellSpacings = *(x.get_mesh().CellSpacings); + const NDIndex<2U>& domain = r.getDomain(); + Index I = domain[0]; + Index J = domain[1]; + r[I][J] = (x[I - 1][J - 1] * x.get_mesh().Dvc[0] + x[I][J - 1] * x.get_mesh().Dvc[1] + + x[I - 1][J] * x.get_mesh().Dvc[2] + x[I][J] * x.get_mesh().Dvc[3]) + / cellSpacings[I][J]; + return r; +} +//---------------------------------------------------------------------- +template +Field, 3U, Cartesian<3U, MFLOAT>, Vert>& Grad( + Field, Cell>& x, + Field, 3U, Cartesian<3U, MFLOAT>, Vert>& r) { + PAssert_EQ(x.get_mesh().hasSpacingFields, true); + BareField, 3U>& cellSpacings = *(x.get_mesh().CellSpacings); + const NDIndex<3U>& domain = r.getDomain(); + Index I = domain[0]; + Index J = domain[1]; + Index K = domain[2]; + Vektor dvc[1 << 3U]; + for (unsigned int d = 0; d < 1 << 3U; d++) + dvc[d] = x.get_mesh().Dvc[d]; + r[I][J][K] = + (x[I - 1][J - 1][K - 1] * dvc[0] + x[I][J - 1][K - 1] * dvc[1] + x[I - 1][J][K - 1] * dvc[2] + + x[I][J][K - 1] * dvc[3] + x[I - 1][J - 1][K] * dvc[4] + x[I][J - 1][K] * dvc[5] + + x[I - 1][J][K] * dvc[6] + x[I][J][K] * dvc[7]) + / cellSpacings[I][J][K]; + return r; } // TJW: I've attempted to update these differential operators from uniform @@ -2825,501 +2541,439 @@ Grad(Field,Cell>& x, //---------------------------------------------------------------------- // Grad Scalar/Vert -> Vektor/Vert (???right? tjw 1/16/98) //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,1U,Cartesian<1U,MFLOAT>,Vert>& -Grad(Field,Vert>& x, - Field,1U,Cartesian<1U,MFLOAT>,Vert>& r) -{ - PAssert_EQ(x.get_mesh().hasSpacingFields, true); - BareField,1U>& vertSpacings = - *(x.get_mesh().VertSpacings); - const NDIndex<1U>& domain = r.getDomain(); - Index I = domain[0]; - - Vektor idx; - idx[0] = 1.0; - - r[I] = idx*((x[I+1] - x[I-1])/(vertSpacings[I] + vertSpacings[I-1])); - return r; -} -//---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,2U,Cartesian<2U,MFLOAT>,Vert>& -Grad(Field,Vert>& x, - Field,2U,Cartesian<2U,MFLOAT>,Vert>& r) -{ - PAssert_EQ(x.get_mesh().hasSpacingFields, true); - BareField,2U>& vertSpacings = - *(x.get_mesh().VertSpacings); - const NDIndex<2U>& domain = r.getDomain(); - Index I = domain[0]; - Index J = domain[1]; - - Vektor idx,idy; - idx[0] = 1.0; - idx[1] = 0.0; - idy[0] = 0.0; - idy[1] = 1.0; - - r[I][J] = - idx*((x[I+1][J ] - x[I-1][J ])/ - (vertSpacings[I][J] + vertSpacings[I-1][J])) + - idy*((x[I ][J+1] - x[I ][J-1])/ - (vertSpacings[I][J] + vertSpacings[I][J-1])); - return r; -} -//---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,3U,Cartesian<3U,MFLOAT>,Vert>& -Grad(Field,Vert>& x, - Field,3U,Cartesian<3U,MFLOAT>,Vert>& r) -{ - PAssert_EQ(x.get_mesh().hasSpacingFields, true); - BareField,3U>& vertSpacings = - *(x.get_mesh().VertSpacings); - const NDIndex<3U>& domain = r.getDomain(); - Index I = domain[0]; - Index J = domain[1]; - Index K = domain[2]; - - Vektor idx,idy,idz; - idx[0] = 1.0; - idx[1] = 0.0; - idx[2] = 0.0; - idy[0] = 0.0; - idy[1] = 1.0; - idy[2] = 0.0; - idz[0] = 0.0; - idz[1] = 0.0; - idz[2] = 1.0; - - r[I][J][K] = - idx*((x[I+1][J ][K ] - x[I-1][J ][K ])/ - (vertSpacings[I][J][K] + vertSpacings[I-1][J][K])) + - idy*((x[I ][J+1][K ] - x[I ][J-1][K ])/ - (vertSpacings[I][J][K] + vertSpacings[I][J-1][K])) + - idz*((x[I ][J ][K+1] - x[I ][J ][K-1])/ - (vertSpacings[I][J][K] + vertSpacings[I][J][K-1])); - return r; +template +Field, 1U, Cartesian<1U, MFLOAT>, Vert>& Grad( + Field, Vert>& x, + Field, 1U, Cartesian<1U, MFLOAT>, Vert>& r) { + PAssert_EQ(x.get_mesh().hasSpacingFields, true); + BareField, 1U>& vertSpacings = *(x.get_mesh().VertSpacings); + const NDIndex<1U>& domain = r.getDomain(); + Index I = domain[0]; + + Vektor idx; + idx[0] = 1.0; + + r[I] = idx * ((x[I + 1] - x[I - 1]) / (vertSpacings[I] + vertSpacings[I - 1])); + return r; +} +//---------------------------------------------------------------------- +template +Field, 2U, Cartesian<2U, MFLOAT>, Vert>& Grad( + Field, Vert>& x, + Field, 2U, Cartesian<2U, MFLOAT>, Vert>& r) { + PAssert_EQ(x.get_mesh().hasSpacingFields, true); + BareField, 2U>& vertSpacings = *(x.get_mesh().VertSpacings); + const NDIndex<2U>& domain = r.getDomain(); + Index I = domain[0]; + Index J = domain[1]; + + Vektor idx, idy; + idx[0] = 1.0; + idx[1] = 0.0; + idy[0] = 0.0; + idy[1] = 1.0; + + r[I][J] = idx * ((x[I + 1][J] - x[I - 1][J]) / (vertSpacings[I][J] + vertSpacings[I - 1][J])) + + idy * ((x[I][J + 1] - x[I][J - 1]) / (vertSpacings[I][J] + vertSpacings[I][J - 1])); + return r; +} +//---------------------------------------------------------------------- +template +Field, 3U, Cartesian<3U, MFLOAT>, Vert>& Grad( + Field, Vert>& x, + Field, 3U, Cartesian<3U, MFLOAT>, Vert>& r) { + PAssert_EQ(x.get_mesh().hasSpacingFields, true); + BareField, 3U>& vertSpacings = *(x.get_mesh().VertSpacings); + const NDIndex<3U>& domain = r.getDomain(); + Index I = domain[0]; + Index J = domain[1]; + Index K = domain[2]; + + Vektor idx, idy, idz; + idx[0] = 1.0; + idx[1] = 0.0; + idx[2] = 0.0; + idy[0] = 0.0; + idy[1] = 1.0; + idy[2] = 0.0; + idz[0] = 0.0; + idz[1] = 0.0; + idz[2] = 1.0; + + r[I][J][K] = idx + * ((x[I + 1][J][K] - x[I - 1][J][K]) + / (vertSpacings[I][J][K] + vertSpacings[I - 1][J][K])) + + idy + * ((x[I][J + 1][K] - x[I][J - 1][K]) + / (vertSpacings[I][J][K] + vertSpacings[I][J - 1][K])) + + idz + * ((x[I][J][K + 1] - x[I][J][K - 1]) + / (vertSpacings[I][J][K] + vertSpacings[I][J][K - 1])); + return r; } //---------------------------------------------------------------------- // Grad Scalar/Cell -> Vektor/Cell //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,1U,Cartesian<1U,MFLOAT>,Cell>& -Grad(Field,Cell>& x, - Field,1U,Cartesian<1U,MFLOAT>,Cell>& r) -{ - PAssert_EQ(x.get_mesh().hasSpacingFields, true); - BareField,1U>& cellSpacings = - *(x.get_mesh().CellSpacings); - const NDIndex<1U>& domain = r.getDomain(); - Index I = domain[0]; - - Vektor idx; - idx[0] = 1.0; - - r[I] = idx*((x[I+1] - x[I-1])/(cellSpacings[I] + cellSpacings[I+1])); - return r; -} -//---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,2U,Cartesian<2U,MFLOAT>,Cell>& -Grad(Field,Cell>& x, - Field,2U,Cartesian<2U,MFLOAT>,Cell>& r) -{ - PAssert_EQ(x.get_mesh().hasSpacingFields, true); - BareField,2U>& cellSpacings = - *(x.get_mesh().CellSpacings); - const NDIndex<2U>& domain = r.getDomain(); - Index I = domain[0]; - Index J = domain[1]; - - Vektor idx,idy; - idx[0] = 1.0; - idx[1] = 0.0; - idy[0] = 0.0; - idy[1] = 1.0; - - r[I][J] = - idx*((x[I+1][J ] - x[I-1][J ])/ - (cellSpacings[I][J] + cellSpacings[I+1][J])) + - idy*((x[I ][J+1] - x[I ][J-1])/ - (cellSpacings[I][J] + cellSpacings[I][J+1])); - return r; -} -//---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,3U,Cartesian<3U,MFLOAT>,Cell>& -Grad(Field,Cell>& x, - Field,3U,Cartesian<3U,MFLOAT>,Cell>& r) -{ - PAssert_EQ(x.get_mesh().hasSpacingFields, true); - BareField,3U>& cellSpacings = - *(x.get_mesh().CellSpacings); - const NDIndex<3U>& domain = r.getDomain(); - Index I = domain[0]; - Index J = domain[1]; - Index K = domain[2]; - - Vektor idx,idy,idz; - idx[0] = 1.0; - idx[1] = 0.0; - idx[2] = 0.0; - idy[0] = 0.0; - idy[1] = 1.0; - idy[2] = 0.0; - idz[0] = 0.0; - idz[1] = 0.0; - idz[2] = 1.0; - - r[I][J][K] = - idx*((x[I+1][J ][K ] - x[I-1][J ][K ])/ - (cellSpacings[I][J][K] + cellSpacings[I+1][J][K])) + - idy*((x[I ][J+1][K ] - x[I ][J-1][K ])/ - (cellSpacings[I][J][K] + cellSpacings[I][J+1][K])) + - idz*((x[I ][J ][K+1] - x[I ][J ][K-1])/ - (cellSpacings[I][J][K] + cellSpacings[I][J][K+1])); - return r; +template +Field, 1U, Cartesian<1U, MFLOAT>, Cell>& Grad( + Field, Cell>& x, + Field, 1U, Cartesian<1U, MFLOAT>, Cell>& r) { + PAssert_EQ(x.get_mesh().hasSpacingFields, true); + BareField, 1U>& cellSpacings = *(x.get_mesh().CellSpacings); + const NDIndex<1U>& domain = r.getDomain(); + Index I = domain[0]; + + Vektor idx; + idx[0] = 1.0; + + r[I] = idx * ((x[I + 1] - x[I - 1]) / (cellSpacings[I] + cellSpacings[I + 1])); + return r; +} +//---------------------------------------------------------------------- +template +Field, 2U, Cartesian<2U, MFLOAT>, Cell>& Grad( + Field, Cell>& x, + Field, 2U, Cartesian<2U, MFLOAT>, Cell>& r) { + PAssert_EQ(x.get_mesh().hasSpacingFields, true); + BareField, 2U>& cellSpacings = *(x.get_mesh().CellSpacings); + const NDIndex<2U>& domain = r.getDomain(); + Index I = domain[0]; + Index J = domain[1]; + + Vektor idx, idy; + idx[0] = 1.0; + idx[1] = 0.0; + idy[0] = 0.0; + idy[1] = 1.0; + + r[I][J] = idx * ((x[I + 1][J] - x[I - 1][J]) / (cellSpacings[I][J] + cellSpacings[I + 1][J])) + + idy * ((x[I][J + 1] - x[I][J - 1]) / (cellSpacings[I][J] + cellSpacings[I][J + 1])); + return r; +} +//---------------------------------------------------------------------- +template +Field, 3U, Cartesian<3U, MFLOAT>, Cell>& Grad( + Field, Cell>& x, + Field, 3U, Cartesian<3U, MFLOAT>, Cell>& r) { + PAssert_EQ(x.get_mesh().hasSpacingFields, true); + BareField, 3U>& cellSpacings = *(x.get_mesh().CellSpacings); + const NDIndex<3U>& domain = r.getDomain(); + Index I = domain[0]; + Index J = domain[1]; + Index K = domain[2]; + + Vektor idx, idy, idz; + idx[0] = 1.0; + idx[1] = 0.0; + idx[2] = 0.0; + idy[0] = 0.0; + idy[1] = 1.0; + idy[2] = 0.0; + idz[0] = 0.0; + idz[1] = 0.0; + idz[2] = 1.0; + + r[I][J][K] = idx + * ((x[I + 1][J][K] - x[I - 1][J][K]) + / (cellSpacings[I][J][K] + cellSpacings[I + 1][J][K])) + + idy + * ((x[I][J + 1][K] - x[I][J - 1][K]) + / (cellSpacings[I][J][K] + cellSpacings[I][J + 1][K])) + + idz + * ((x[I][J][K + 1] - x[I][J][K - 1]) + / (cellSpacings[I][J][K] + cellSpacings[I][J][K + 1])); + return r; } //---------------------------------------------------------------------- // Grad Vektor/Vert -> Tenzor/Cell (???o.p. right thing? tjw 1/20/1998) //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,1U,Cartesian<1U,MFLOAT>,Cell>& -Grad(Field,1U,Cartesian<1U,MFLOAT>,Vert>& x, - Field,1U,Cartesian<1U,MFLOAT>,Cell>& r) -{ - PAssert_EQ(x.get_mesh().hasSpacingFields, true); - BareField,1U>& vS = *(x.get_mesh().VertSpacings); - const NDIndex<1U>& domain = r.getDomain(); - Index I = domain[0]; - r[I] = - outerProduct(x[I ], x.get_mesh().Dvc[0]/vS[I]) + - outerProduct(x[I+1], x.get_mesh().Dvc[1]/vS[I]); - return r; -} -//---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,2U,Cartesian<2U,MFLOAT>,Cell>& -Grad(Field,2U,Cartesian<2U,MFLOAT>,Vert>& x, - Field,2U,Cartesian<2U,MFLOAT>,Cell>& r) -{ - PAssert_EQ(x.get_mesh().hasSpacingFields, true); - BareField,2U>& vS = *(x.get_mesh().VertSpacings); - const NDIndex<2U>& domain = r.getDomain(); - Index I = domain[0]; - Index J = domain[1]; - r[I][J] = - outerProduct(x[I ][J ], x.get_mesh().Dvc[0]/vS[I][J]) + - outerProduct(x[I+1][J ], x.get_mesh().Dvc[1]/vS[I][J]) + - outerProduct(x[I ][J+1], x.get_mesh().Dvc[2]/vS[I][J]) + - outerProduct(x[I+1][J+1], x.get_mesh().Dvc[3]/vS[I][J]); - return r; -} -//---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,3U,Cartesian<3U,MFLOAT>,Cell>& -Grad(Field,3U,Cartesian<3U,MFLOAT>,Vert>& x, - Field,3U,Cartesian<3U,MFLOAT>,Cell>& r) -{ - PAssert_EQ(x.get_mesh().hasSpacingFields, true); - BareField,3U>& vS = *(x.get_mesh().VertSpacings); - const NDIndex<3U>& domain = r.getDomain(); - Index I = domain[0]; - Index J = domain[1]; - Index K = domain[2]; - r[I][J][K] = - outerProduct(x[I ][J ][K ], x.get_mesh().Dvc[0]/vS[I][J][K]) + - outerProduct(x[I+1][J ][K ], x.get_mesh().Dvc[1]/vS[I][J][K]) + - outerProduct(x[I ][J+1][K ], x.get_mesh().Dvc[2]/vS[I][J][K]) + - outerProduct(x[I+1][J+1][K ], x.get_mesh().Dvc[3]/vS[I][J][K]) + - outerProduct(x[I ][J ][K+1], x.get_mesh().Dvc[4]/vS[I][J][K]) + - outerProduct(x[I+1][J ][K+1], x.get_mesh().Dvc[5]/vS[I][J][K]) + - outerProduct(x[I ][J+1][K+1], x.get_mesh().Dvc[6]/vS[I][J][K]) + - outerProduct(x[I+1][J+1][K+1], x.get_mesh().Dvc[7]/vS[I][J][K]); - - return r; +template +Field, 1U, Cartesian<1U, MFLOAT>, Cell>& Grad( + Field, 1U, Cartesian<1U, MFLOAT>, Vert>& x, + Field, 1U, Cartesian<1U, MFLOAT>, Cell>& r) { + PAssert_EQ(x.get_mesh().hasSpacingFields, true); + BareField, 1U>& vS = *(x.get_mesh().VertSpacings); + const NDIndex<1U>& domain = r.getDomain(); + Index I = domain[0]; + r[I] = outerProduct(x[I], x.get_mesh().Dvc[0] / vS[I]) + + outerProduct(x[I + 1], x.get_mesh().Dvc[1] / vS[I]); + return r; +} +//---------------------------------------------------------------------- +template +Field, 2U, Cartesian<2U, MFLOAT>, Cell>& Grad( + Field, 2U, Cartesian<2U, MFLOAT>, Vert>& x, + Field, 2U, Cartesian<2U, MFLOAT>, Cell>& r) { + PAssert_EQ(x.get_mesh().hasSpacingFields, true); + BareField, 2U>& vS = *(x.get_mesh().VertSpacings); + const NDIndex<2U>& domain = r.getDomain(); + Index I = domain[0]; + Index J = domain[1]; + r[I][J] = outerProduct(x[I][J], x.get_mesh().Dvc[0] / vS[I][J]) + + outerProduct(x[I + 1][J], x.get_mesh().Dvc[1] / vS[I][J]) + + outerProduct(x[I][J + 1], x.get_mesh().Dvc[2] / vS[I][J]) + + outerProduct(x[I + 1][J + 1], x.get_mesh().Dvc[3] / vS[I][J]); + return r; +} +//---------------------------------------------------------------------- +template +Field, 3U, Cartesian<3U, MFLOAT>, Cell>& Grad( + Field, 3U, Cartesian<3U, MFLOAT>, Vert>& x, + Field, 3U, Cartesian<3U, MFLOAT>, Cell>& r) { + PAssert_EQ(x.get_mesh().hasSpacingFields, true); + BareField, 3U>& vS = *(x.get_mesh().VertSpacings); + const NDIndex<3U>& domain = r.getDomain(); + Index I = domain[0]; + Index J = domain[1]; + Index K = domain[2]; + r[I][J][K] = outerProduct(x[I][J][K], x.get_mesh().Dvc[0] / vS[I][J][K]) + + outerProduct(x[I + 1][J][K], x.get_mesh().Dvc[1] / vS[I][J][K]) + + outerProduct(x[I][J + 1][K], x.get_mesh().Dvc[2] / vS[I][J][K]) + + outerProduct(x[I + 1][J + 1][K], x.get_mesh().Dvc[3] / vS[I][J][K]) + + outerProduct(x[I][J][K + 1], x.get_mesh().Dvc[4] / vS[I][J][K]) + + outerProduct(x[I + 1][J][K + 1], x.get_mesh().Dvc[5] / vS[I][J][K]) + + outerProduct(x[I][J + 1][K + 1], x.get_mesh().Dvc[6] / vS[I][J][K]) + + outerProduct(x[I + 1][J + 1][K + 1], x.get_mesh().Dvc[7] / vS[I][J][K]); + + return r; } //---------------------------------------------------------------------- // Grad Vektor/Cell -> Tenzor/Vert (???o.p. right thing? tjw 1/20/1998) // (cellSpacings[I,J,K]->[I-1,....] 1/20/1998 tjw) //---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,1U,Cartesian<1U,MFLOAT>,Vert>& -Grad(Field,1U,Cartesian<1U,MFLOAT>,Cell>& x, - Field,1U,Cartesian<1U,MFLOAT>,Vert>& r) -{ - PAssert_EQ(x.get_mesh().hasSpacingFields, true); - BareField,1U>& cellSpacings = *(x.get_mesh().CellSpacings); - const NDIndex<1U>& domain = r.getDomain(); - Index I = domain[0]; - r[I] = - outerProduct(x[I-1], x.get_mesh().Dvc[0]/cellSpacings[I-1]) + - outerProduct(x[I ], x.get_mesh().Dvc[1]/cellSpacings[I-1]); - return r; -} -//---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,2U,Cartesian<2U,MFLOAT>,Vert>& -Grad(Field,2U,Cartesian<2U,MFLOAT>,Cell>& x, - Field,2U,Cartesian<2U,MFLOAT>,Vert>& r) -{ - PAssert_EQ(x.get_mesh().hasSpacingFields, true); - BareField,2U>& cS = *(x.get_mesh().CellSpacings); - const NDIndex<2U>& domain = r.getDomain(); - Index I = domain[0]; - Index J = domain[1]; - r[I][J] = - outerProduct(x[I-1][J-1], x.get_mesh().Dvc[0]/cS[I-1][J-1]) + - outerProduct(x[I ][J-1], x.get_mesh().Dvc[1]/cS[I-1][J-1]) + - outerProduct(x[I-1][J ], x.get_mesh().Dvc[2]/cS[I-1][J-1]) + - outerProduct(x[I ][J ], x.get_mesh().Dvc[3]/cS[I-1][J-1]); - return r; -} -//---------------------------------------------------------------------- -template < class T, class MFLOAT > -Field,3U,Cartesian<3U,MFLOAT>,Vert>& -Grad(Field,3U,Cartesian<3U,MFLOAT>,Cell>& x, - Field,3U,Cartesian<3U,MFLOAT>,Vert>& r) -{ - PAssert_EQ(x.get_mesh().hasSpacingFields, true); - BareField,3U>& cS = *(x.get_mesh().CellSpacings); - const NDIndex<3U>& domain = r.getDomain(); - Index I = domain[0]; - Index J = domain[1]; - Index K = domain[2]; - r[I][J][K] = - outerProduct(x[I-1][J-1][K-1], x.get_mesh().Dvc[0]/cS[I-1][J-1][K-1]) + - outerProduct(x[I ][J-1][K-1], x.get_mesh().Dvc[1]/cS[I-1][J-1][K-1]) + - outerProduct(x[I-1][J ][K-1], x.get_mesh().Dvc[2]/cS[I-1][J-1][K-1]) + - outerProduct(x[I ][J ][K-1], x.get_mesh().Dvc[3]/cS[I-1][J-1][K-1]) + - outerProduct(x[I-1][J-1][K ], x.get_mesh().Dvc[4]/cS[I-1][J-1][K-1]) + - outerProduct(x[I ][J-1][K ], x.get_mesh().Dvc[5]/cS[I-1][J-1][K-1]) + - outerProduct(x[I-1][J ][K ], x.get_mesh().Dvc[6]/cS[I-1][J-1][K-1]) + - outerProduct(x[I ][J ][K ], x.get_mesh().Dvc[7]/cS[I-1][J-1][K-1]); - return r; +template +Field, 1U, Cartesian<1U, MFLOAT>, Vert>& Grad( + Field, 1U, Cartesian<1U, MFLOAT>, Cell>& x, + Field, 1U, Cartesian<1U, MFLOAT>, Vert>& r) { + PAssert_EQ(x.get_mesh().hasSpacingFields, true); + BareField, 1U>& cellSpacings = *(x.get_mesh().CellSpacings); + const NDIndex<1U>& domain = r.getDomain(); + Index I = domain[0]; + r[I] = outerProduct(x[I - 1], x.get_mesh().Dvc[0] / cellSpacings[I - 1]) + + outerProduct(x[I], x.get_mesh().Dvc[1] / cellSpacings[I - 1]); + return r; +} +//---------------------------------------------------------------------- +template +Field, 2U, Cartesian<2U, MFLOAT>, Vert>& Grad( + Field, 2U, Cartesian<2U, MFLOAT>, Cell>& x, + Field, 2U, Cartesian<2U, MFLOAT>, Vert>& r) { + PAssert_EQ(x.get_mesh().hasSpacingFields, true); + BareField, 2U>& cS = *(x.get_mesh().CellSpacings); + const NDIndex<2U>& domain = r.getDomain(); + Index I = domain[0]; + Index J = domain[1]; + r[I][J] = outerProduct(x[I - 1][J - 1], x.get_mesh().Dvc[0] / cS[I - 1][J - 1]) + + outerProduct(x[I][J - 1], x.get_mesh().Dvc[1] / cS[I - 1][J - 1]) + + outerProduct(x[I - 1][J], x.get_mesh().Dvc[2] / cS[I - 1][J - 1]) + + outerProduct(x[I][J], x.get_mesh().Dvc[3] / cS[I - 1][J - 1]); + return r; +} +//---------------------------------------------------------------------- +template +Field, 3U, Cartesian<3U, MFLOAT>, Vert>& Grad( + Field, 3U, Cartesian<3U, MFLOAT>, Cell>& x, + Field, 3U, Cartesian<3U, MFLOAT>, Vert>& r) { + PAssert_EQ(x.get_mesh().hasSpacingFields, true); + BareField, 3U>& cS = *(x.get_mesh().CellSpacings); + const NDIndex<3U>& domain = r.getDomain(); + Index I = domain[0]; + Index J = domain[1]; + Index K = domain[2]; + r[I][J][K] = outerProduct(x[I - 1][J - 1][K - 1], x.get_mesh().Dvc[0] / cS[I - 1][J - 1][K - 1]) + + outerProduct(x[I][J - 1][K - 1], x.get_mesh().Dvc[1] / cS[I - 1][J - 1][K - 1]) + + outerProduct(x[I - 1][J][K - 1], x.get_mesh().Dvc[2] / cS[I - 1][J - 1][K - 1]) + + outerProduct(x[I][J][K - 1], x.get_mesh().Dvc[3] / cS[I - 1][J - 1][K - 1]) + + outerProduct(x[I - 1][J - 1][K], x.get_mesh().Dvc[4] / cS[I - 1][J - 1][K - 1]) + + outerProduct(x[I][J - 1][K], x.get_mesh().Dvc[5] / cS[I - 1][J - 1][K - 1]) + + outerProduct(x[I - 1][J][K], x.get_mesh().Dvc[6] / cS[I - 1][J - 1][K - 1]) + + outerProduct(x[I][J][K], x.get_mesh().Dvc[7] / cS[I - 1][J - 1][K - 1]); + return r; } // END FLAGGED DIFFOPS REGION II namespace IPPL { -//---------------------------------------------------------------------- -// Weighted average Cell to Vert -//---------------------------------------------------------------------- -template < class T1, class T2, class MFLOAT > -Field,Vert>& -Average(Field,Cell>& x, - Field,Cell>& w, - Field,Vert>& r) -{ - const NDIndex<1U>& domain = r.getDomain(); - Index I = domain[0]; - r[I] = (x[I-1]*w[I-1] + x[I ]*w[I ])/(w[I-1] + w[I ]); - return r; -} -//---------------------------------------------------------------------- -template < class T1, class T2, class MFLOAT > -Field,Vert>& -Average(Field,Cell>& x, - Field,Cell>& w, - Field,Vert>& r) -{ - const NDIndex<2U>& domain = r.getDomain(); - Index I = domain[0]; - Index J = domain[1]; - - r[I][J] = (x[I-1][J-1] * w[I-1][J-1] + - x[I ][J-1] * w[I ][J-1] + - x[I-1][J ] * w[I-1][J ] + - x[I ][J ] * w[I ][J ])/ - (w[I-1][J-1] + - w[I ][J-1] + - w[I-1][J ] + - w[I ][J ]); - return r; -} -//---------------------------------------------------------------------- -template < class T1, class T2, class MFLOAT > -Field,Vert>& -Average(Field,Cell>& x, - Field,Cell>& w, - Field,Vert>& r) -{ - const NDIndex<3U>& domain = r.getDomain(); - Index I = domain[0]; - Index J = domain[1]; - Index K = domain[2]; - - r[I][J][K] = (x[I-1][J-1][K-1] * w[I-1][J-1][K-1] + - x[I ][J-1][K-1] * w[I ][J-1][K-1] + - x[I-1][J ][K-1] * w[I-1][J ][K-1] + - x[I ][J ][K-1] * w[I ][J ][K-1] + - x[I-1][J-1][K ] * w[I-1][J-1][K ] + - x[I ][J-1][K ] * w[I ][J-1][K ] + - x[I-1][J ][K ] * w[I-1][J ][K ] + - x[I ][J ][K ] * w[I ][J ][K ] )/ - (w[I-1][J-1][K-1] + - w[I ][J-1][K-1] + - w[I-1][J ][K-1] + - w[I ][J ][K-1] + - w[I-1][J-1][K ] + - w[I ][J-1][K ] + - w[I-1][J ][K ] + - w[I ][J ][K ]); - return r; -} -//---------------------------------------------------------------------- -// Weighted average Vert to Cell -// N.B.: won't work except for unit-stride (& zero-base?) Field's. -//---------------------------------------------------------------------- -template < class T1, class T2, class MFLOAT > -Field,Cell>& -Average(Field,Vert>& x, - Field,Vert>& w, - Field,Cell>& r) -{ - const NDIndex<1U>& domain = r.getDomain(); - Index I = domain[0]; - r[I] = (x[I+1]*w[I+1] + x[I ]*w[I ])/(w[I+1] + w[I ]); - return r; -} -//---------------------------------------------------------------------- -template < class T1, class T2, class MFLOAT > -Field,Cell>& -Average(Field,Vert>& x, - Field,Vert>& w, - Field,Cell>& r) -{ - const NDIndex<2U>& domain = r.getDomain(); - Index I = domain[0]; - Index J = domain[1]; - - r[I][J] = (x[I+1][J+1] * w[I+1][J+1] + - x[I ][J+1] * w[I ][J+1] + - x[I+1][J ] * w[I+1][J ] + - x[I ][J ] * w[I ][J ])/ - (w[I+1][J+1] + - w[I ][J+1] + - w[I+1][J ] + - w[I ][J ]); - return r; -} -//---------------------------------------------------------------------- -template < class T1, class T2, class MFLOAT > -Field,Cell>& -Average(Field,Vert>& x, - Field,Vert>& w, - Field,Cell>& r) -{ - const NDIndex<3U>& domain = r.getDomain(); - Index I = domain[0]; - Index J = domain[1]; - Index K = domain[2]; - - r[I][J][K] = (x[I+1][J+1][K+1] * w[I+1][J+1][K+1] + - x[I ][J+1][K+1] * w[I ][J+1][K+1] + - x[I+1][J ][K+1] * w[I+1][J ][K+1] + - x[I ][J ][K+1] * w[I ][J ][K+1] + - x[I+1][J+1][K ] * w[I+1][J+1][K ] + - x[I ][J+1][K ] * w[I ][J+1][K ] + - x[I+1][J ][K ] * w[I+1][J ][K ] + - x[I ][J ][K ] * w[I ][J ][K ])/ - (w[I+1][J+1][K+1] + - w[I ][J+1][K+1] + - w[I+1][J ][K+1] + - w[I ][J ][K+1] + - w[I+1][J+1][K ] + - w[I ][J+1][K ] + - w[I+1][J ][K ] + - w[I ][J ][K ]); - return r; -} + //---------------------------------------------------------------------- + // Weighted average Cell to Vert + //---------------------------------------------------------------------- + template + Field, Vert>& Average( + Field, Cell>& x, + Field, Cell>& w, + Field, Vert>& r) { + const NDIndex<1U>& domain = r.getDomain(); + Index I = domain[0]; + r[I] = (x[I - 1] * w[I - 1] + x[I] * w[I]) / (w[I - 1] + w[I]); + return r; + } + //---------------------------------------------------------------------- + template + Field, Vert>& Average( + Field, Cell>& x, + Field, Cell>& w, + Field, Vert>& r) { + const NDIndex<2U>& domain = r.getDomain(); + Index I = domain[0]; + Index J = domain[1]; + + r[I][J] = (x[I - 1][J - 1] * w[I - 1][J - 1] + x[I][J - 1] * w[I][J - 1] + + x[I - 1][J] * w[I - 1][J] + x[I][J] * w[I][J]) + / (w[I - 1][J - 1] + w[I][J - 1] + w[I - 1][J] + w[I][J]); + return r; + } + //---------------------------------------------------------------------- + template + Field, Vert>& Average( + Field, Cell>& x, + Field, Cell>& w, + Field, Vert>& r) { + const NDIndex<3U>& domain = r.getDomain(); + Index I = domain[0]; + Index J = domain[1]; + Index K = domain[2]; + + r[I][J][K] = + (x[I - 1][J - 1][K - 1] * w[I - 1][J - 1][K - 1] + + x[I][J - 1][K - 1] * w[I][J - 1][K - 1] + x[I - 1][J][K - 1] * w[I - 1][J][K - 1] + + x[I][J][K - 1] * w[I][J][K - 1] + x[I - 1][J - 1][K] * w[I - 1][J - 1][K] + + x[I][J - 1][K] * w[I][J - 1][K] + x[I - 1][J][K] * w[I - 1][J][K] + + x[I][J][K] * w[I][J][K]) + / (w[I - 1][J - 1][K - 1] + w[I][J - 1][K - 1] + w[I - 1][J][K - 1] + w[I][J][K - 1] + + w[I - 1][J - 1][K] + w[I][J - 1][K] + w[I - 1][J][K] + w[I][J][K]); + return r; + } + //---------------------------------------------------------------------- + // Weighted average Vert to Cell + // N.B.: won't work except for unit-stride (& zero-base?) Field's. + //---------------------------------------------------------------------- + template + Field, Cell>& Average( + Field, Vert>& x, + Field, Vert>& w, + Field, Cell>& r) { + const NDIndex<1U>& domain = r.getDomain(); + Index I = domain[0]; + r[I] = (x[I + 1] * w[I + 1] + x[I] * w[I]) / (w[I + 1] + w[I]); + return r; + } + //---------------------------------------------------------------------- + template + Field, Cell>& Average( + Field, Vert>& x, + Field, Vert>& w, + Field, Cell>& r) { + const NDIndex<2U>& domain = r.getDomain(); + Index I = domain[0]; + Index J = domain[1]; + + r[I][J] = (x[I + 1][J + 1] * w[I + 1][J + 1] + x[I][J + 1] * w[I][J + 1] + + x[I + 1][J] * w[I + 1][J] + x[I][J] * w[I][J]) + / (w[I + 1][J + 1] + w[I][J + 1] + w[I + 1][J] + w[I][J]); + return r; + } + //---------------------------------------------------------------------- + template + Field, Cell>& Average( + Field, Vert>& x, + Field, Vert>& w, + Field, Cell>& r) { + const NDIndex<3U>& domain = r.getDomain(); + Index I = domain[0]; + Index J = domain[1]; + Index K = domain[2]; + + r[I][J][K] = + (x[I + 1][J + 1][K + 1] * w[I + 1][J + 1][K + 1] + + x[I][J + 1][K + 1] * w[I][J + 1][K + 1] + x[I + 1][J][K + 1] * w[I + 1][J][K + 1] + + x[I][J][K + 1] * w[I][J][K + 1] + x[I + 1][J + 1][K] * w[I + 1][J + 1][K] + + x[I][J + 1][K] * w[I][J + 1][K] + x[I + 1][J][K] * w[I + 1][J][K] + + x[I][J][K] * w[I][J][K]) + / (w[I + 1][J + 1][K + 1] + w[I][J + 1][K + 1] + w[I + 1][J][K + 1] + w[I][J][K + 1] + + w[I + 1][J + 1][K] + w[I][J + 1][K] + w[I + 1][J][K] + w[I][J][K]); + return r; + } -//---------------------------------------------------------------------- -// Unweighted average Cell to Vert -//---------------------------------------------------------------------- -template < class T1, class MFLOAT > -Field,Vert>& -Average(Field,Cell>& x, - Field,Vert>& r) -{ - const NDIndex<1U>& domain = r.getDomain(); - Index I = domain[0]; - r[I] = 0.5*(x[I-1] + x[I ]); - return r; -} -//---------------------------------------------------------------------- -template < class T1, class MFLOAT > -Field,Vert>& -Average(Field,Cell>& x, - Field,Vert>& r) -{ - const NDIndex<2U>& domain = r.getDomain(); - Index I = domain[0]; - Index J = domain[1]; - r[I][J] = 0.25*(x[I-1][J-1] + x[I ][J-1] + x[I-1][J ] + x[I ][J ]); - return r; -} -//---------------------------------------------------------------------- -template < class T1, class MFLOAT > -Field,Vert>& -Average(Field,Cell>& x, - Field,Vert>& r) -{ - const NDIndex<3U>& domain = r.getDomain(); - Index I = domain[0]; - Index J = domain[1]; - Index K = domain[2]; - r[I][J][K] = 0.125*(x[I-1][J-1][K-1] + x[I ][J-1][K-1] + x[I-1][J ][K-1] + - x[I ][J ][K-1] + x[I-1][J-1][K ] + x[I ][J-1][K ] + - x[I-1][J ][K ] + x[I ][J ][K ]); - return r; -} -//---------------------------------------------------------------------- -// Unweighted average Vert to Cell -// N.B.: won't work except for unit-stride (& zero-base?) Field's. -//---------------------------------------------------------------------- -template < class T1, class MFLOAT > -Field,Cell>& -Average(Field,Vert>& x, - Field,Cell>& r) -{ - const NDIndex<1U>& domain = r.getDomain(); - Index I = domain[0]; - r[I] = 0.5*(x[I+1] + x[I ]); - return r; -} -//---------------------------------------------------------------------- -template < class T1, class MFLOAT > -Field,Cell>& -Average(Field,Vert>& x, - Field,Cell>& r) -{ - const NDIndex<2U>& domain = r.getDomain(); - Index I = domain[0]; - Index J = domain[1]; - r[I][J] = 0.25*(x[I+1][J+1] + x[I ][J+1] + x[I+1][J ] + x[I ][J ]); - return r; -} -//---------------------------------------------------------------------- -template < class T1, class MFLOAT > -Field,Cell>& -Average(Field,Vert>& x, - Field,Cell>& r) -{ - const NDIndex<3U>& domain = r.getDomain(); - Index I = domain[0]; - Index J = domain[1]; - Index K = domain[2]; - r[I][J][K] = 0.125*(x[I+1][J+1][K+1] + x[I ][J+1][K+1] + x[I+1][J ][K+1] + - x[I ][J ][K+1] + x[I+1][J+1][K ] + x[I ][J+1][K ] + - x[I+1][J ][K ] + x[I ][J ][K ]); - return r; -} + //---------------------------------------------------------------------- + // Unweighted average Cell to Vert + //---------------------------------------------------------------------- + template + Field, Vert>& Average( + Field, Cell>& x, + Field, Vert>& r) { + const NDIndex<1U>& domain = r.getDomain(); + Index I = domain[0]; + r[I] = 0.5 * (x[I - 1] + x[I]); + return r; + } + //---------------------------------------------------------------------- + template + Field, Vert>& Average( + Field, Cell>& x, + Field, Vert>& r) { + const NDIndex<2U>& domain = r.getDomain(); + Index I = domain[0]; + Index J = domain[1]; + r[I][J] = 0.25 * (x[I - 1][J - 1] + x[I][J - 1] + x[I - 1][J] + x[I][J]); + return r; + } + //---------------------------------------------------------------------- + template + Field, Vert>& Average( + Field, Cell>& x, + Field, Vert>& r) { + const NDIndex<3U>& domain = r.getDomain(); + Index I = domain[0]; + Index J = domain[1]; + Index K = domain[2]; + r[I][J][K] = + 0.125 + * (x[I - 1][J - 1][K - 1] + x[I][J - 1][K - 1] + x[I - 1][J][K - 1] + x[I][J][K - 1] + + x[I - 1][J - 1][K] + x[I][J - 1][K] + x[I - 1][J][K] + x[I][J][K]); + return r; + } + //---------------------------------------------------------------------- + // Unweighted average Vert to Cell + // N.B.: won't work except for unit-stride (& zero-base?) Field's. + //---------------------------------------------------------------------- + template + Field, Cell>& Average( + Field, Vert>& x, + Field, Cell>& r) { + const NDIndex<1U>& domain = r.getDomain(); + Index I = domain[0]; + r[I] = 0.5 * (x[I + 1] + x[I]); + return r; + } + //---------------------------------------------------------------------- + template + Field, Cell>& Average( + Field, Vert>& x, + Field, Cell>& r) { + const NDIndex<2U>& domain = r.getDomain(); + Index I = domain[0]; + Index J = domain[1]; + r[I][J] = 0.25 * (x[I + 1][J + 1] + x[I][J + 1] + x[I + 1][J] + x[I][J]); + return r; + } + //---------------------------------------------------------------------- + template + Field, Cell>& Average( + Field, Vert>& x, + Field, Cell>& r) { + const NDIndex<3U>& domain = r.getDomain(); + Index I = domain[0]; + Index J = domain[1]; + Index K = domain[2]; + r[I][J][K] = + 0.125 + * (x[I + 1][J + 1][K + 1] + x[I][J + 1][K + 1] + x[I + 1][J][K + 1] + x[I][J][K + 1] + + x[I + 1][J + 1][K] + x[I][J + 1][K] + x[I + 1][J][K] + x[I][J][K]); + return r; + } -} +} // namespace IPPL /*************************************************************************** * $RCSfile: Cartesian.cpp,v $ $Author: adelmann $ diff --git a/src/Meshes/CartesianCentering.h b/src/Meshes/CartesianCentering.h index 2766271b8..602fefca4 100644 --- a/src/Meshes/CartesianCentering.h +++ b/src/Meshes/CartesianCentering.h @@ -2,7 +2,7 @@ /*************************************************************************** * * The IPPL Framework - * + * * * Visit http://people.web.psi.ch/adelmann/ for more details * @@ -29,33 +29,34 @@ // May add to this when unstructured comes in, and it means something to // simply say FACE or EDGE centering (for cartesian meshes, face and edge // centerings are a combination of CELL and VERTEX along directions): -enum CenteringEnum {CELL=0, VERTEX=1, VERT=1}; +enum CenteringEnum { + CELL = 0, + VERTEX = 1, + VERT = 1 +}; // Primary class for canned and user-defined cartesian centerings: -template -class CartesianCentering -{ +template +class CartesianCentering { public: - static void print_Centerings(std::ostream&); // Print function - static std::string CenteringName; + static void print_Centerings(std::ostream&); // Print function + static std::string CenteringName; }; template -void CartesianCentering:: -print_Centerings(std::ostream& out) -{ - unsigned int i,j; - out << CenteringName << std::endl; - out << "Dim = " << Dim << " ; NComponents = " << NComponents << std::endl; - for (i=0;i -struct CCCEnums -{ - // CenteringEnum arrays Classes with simple, descriptive names - //--------------------------------------------------------------------- - // All components of Field cell-centered in all directions: - // static CenteringEnum allCell[NComponents*Dim]; - // All components of Field vertex-centered in all directions: - // static CenteringEnum allVertex[NComponents*Dim]; - // All components of Field face-centered in specified direction (meaning - // vertex centered in that direction, cell-centered in others): - // static CenteringEnum allFace[NComponents*Dim]; - // All components of Field edge-centered along specified direction (cell - // centered in that direction, vertex-centered in others): - // static CenteringEnum allEdge[NComponents*Dim]; - // Each vector component of Field face-centered in the corresponding - // direction: - // static CenteringEnum vectorFace[NComponents*Dim]; - // Each vector component of Field edge-centered along the corresponding - // direction: - // static CenteringEnum vectorEdge[NComponents*Dim]; - //--------------------------------------------------------------------- +template +struct CCCEnums { + // CenteringEnum arrays Classes with simple, descriptive names + //--------------------------------------------------------------------- + // All components of Field cell-centered in all directions: + // static CenteringEnum allCell[NComponents*Dim]; + // All components of Field vertex-centered in all directions: + // static CenteringEnum allVertex[NComponents*Dim]; + // All components of Field face-centered in specified direction (meaning + // vertex centered in that direction, cell-centered in others): + // static CenteringEnum allFace[NComponents*Dim]; + // All components of Field edge-centered along specified direction (cell + // centered in that direction, vertex-centered in others): + // static CenteringEnum allEdge[NComponents*Dim]; + // Each vector component of Field face-centered in the corresponding + // direction: + // static CenteringEnum vectorFace[NComponents*Dim]; + // Each vector component of Field edge-centered along the corresponding + // direction: + // static CenteringEnum vectorEdge[NComponents*Dim]; + //--------------------------------------------------------------------- }; //***************CommonCartesianCenteringEnum Specializations****************** -//11111111111111111111111111111111111111111111111111111111111111111111111111111 -// 1D fields -//11111111111111111111111111111111111111111111111111111111111111111111111111111 +// 11111111111111111111111111111111111111111111111111111111111111111111111111111 +// 1D fields +// 11111111111111111111111111111111111111111111111111111111111111111111111111111 // 1D field of scalars (or 1D vectors, or 1D tensors, or 1D sym. tensors) -template<> -struct CCCEnums<1U,1U,0U> { - static CenteringEnum allCell[1U*1U]; - static CenteringEnum allVertex[1U*1U]; - // Componentwise centering along/perpendicular to component direction: - static CenteringEnum vectorFace[1U*1U]; - static CenteringEnum vectorEdge[1U*1U]; - // Face/Edge centering perpendicular to/along direction 0: - static CenteringEnum allFace[1U*1U]; - static CenteringEnum allEdge[1U*1U]; -}; - - -//22222222222222222222222222222222222222222222222222222222222222222222222222222 -// 2D fields -//22222222222222222222222222222222222222222222222222222222222222222222222222222 +template <> +struct CCCEnums<1U, 1U, 0U> { + static CenteringEnum allCell[1U * 1U]; + static CenteringEnum allVertex[1U * 1U]; + // Componentwise centering along/perpendicular to component direction: + static CenteringEnum vectorFace[1U * 1U]; + static CenteringEnum vectorEdge[1U * 1U]; + // Face/Edge centering perpendicular to/along direction 0: + static CenteringEnum allFace[1U * 1U]; + static CenteringEnum allEdge[1U * 1U]; +}; + +// 22222222222222222222222222222222222222222222222222222222222222222222222222222 +// 2D fields +// 22222222222222222222222222222222222222222222222222222222222222222222222222222 // 2D field of scalars (or 1D vectors, or 1D tensors, or 1D sym. tensors) -template<> -struct CCCEnums<2U,1U,0U> { - static CenteringEnum allCell[2U*1U]; - static CenteringEnum allVertex[2U*1U]; - // Face/Edge centering perpendicular to/along direction 0: - static CenteringEnum allFace[2U*1U]; - static CenteringEnum allEdge[2U*1U]; +template <> +struct CCCEnums<2U, 1U, 0U> { + static CenteringEnum allCell[2U * 1U]; + static CenteringEnum allVertex[2U * 1U]; + // Face/Edge centering perpendicular to/along direction 0: + static CenteringEnum allFace[2U * 1U]; + static CenteringEnum allEdge[2U * 1U]; }; -template<> -struct CCCEnums<2U,1U,1U> { - // Face/Edge centering perpendicular to/along direction 1: - static CenteringEnum allFace[2U*1U]; - static CenteringEnum allEdge[2U*1U]; +template <> +struct CCCEnums<2U, 1U, 1U> { + // Face/Edge centering perpendicular to/along direction 1: + static CenteringEnum allFace[2U * 1U]; + static CenteringEnum allEdge[2U * 1U]; }; // 2D field of 2D vectors: -template<> -struct CCCEnums<2U,2U,0U> { - static CenteringEnum allCell[2U*2U]; - static CenteringEnum allVertex[2U*2U]; - // Componentwise centering along/perpendicular to component direction: - static CenteringEnum vectorFace[2U*2U]; - static CenteringEnum vectorEdge[2U*2U]; - // Face/Edge centering perpendicular to/along direction 0: - static CenteringEnum allFace[2U*2U]; - static CenteringEnum allEdge[2U*2U]; -}; -template<> -struct CCCEnums<2U,2U,1U> { - // Face/Edge centering perpendicular to/along direction 1: - static CenteringEnum allFace[2U*2U]; - static CenteringEnum allEdge[2U*2U]; +template <> +struct CCCEnums<2U, 2U, 0U> { + static CenteringEnum allCell[2U * 2U]; + static CenteringEnum allVertex[2U * 2U]; + // Componentwise centering along/perpendicular to component direction: + static CenteringEnum vectorFace[2U * 2U]; + static CenteringEnum vectorEdge[2U * 2U]; + // Face/Edge centering perpendicular to/along direction 0: + static CenteringEnum allFace[2U * 2U]; + static CenteringEnum allEdge[2U * 2U]; +}; +template <> +struct CCCEnums<2U, 2U, 1U> { + // Face/Edge centering perpendicular to/along direction 1: + static CenteringEnum allFace[2U * 2U]; + static CenteringEnum allEdge[2U * 2U]; }; // 2D field of 2D tensors: -template<> -struct CCCEnums<2U,4U,0U> { - static CenteringEnum allCell[4U*2U]; - static CenteringEnum allVertex[4U*2U]; - // Face/Edge centering perpendicular to/along direction 0: - static CenteringEnum allFace[2U*4U]; - static CenteringEnum allEdge[2U*4U]; +template <> +struct CCCEnums<2U, 4U, 0U> { + static CenteringEnum allCell[4U * 2U]; + static CenteringEnum allVertex[4U * 2U]; + // Face/Edge centering perpendicular to/along direction 0: + static CenteringEnum allFace[2U * 4U]; + static CenteringEnum allEdge[2U * 4U]; }; -template<> -struct CCCEnums<2U,4U,1U> { - // Face/Edge centering perpendicular to/along direction 1: - static CenteringEnum allFace[2U*4U]; - static CenteringEnum allEdge[2U*4U]; +template <> +struct CCCEnums<2U, 4U, 1U> { + // Face/Edge centering perpendicular to/along direction 1: + static CenteringEnum allFace[2U * 4U]; + static CenteringEnum allEdge[2U * 4U]; }; // 2D field of 2D symmetric tensors: -template<> -struct CCCEnums<2U,3U,0U> { - static CenteringEnum allCell[2U*3U]; - static CenteringEnum allVertex[2U*3U]; - // Face/Edge centering perpendicular to/along direction 0: - static CenteringEnum allFace[2U*3U]; - static CenteringEnum allEdge[2U*3U]; +template <> +struct CCCEnums<2U, 3U, 0U> { + static CenteringEnum allCell[2U * 3U]; + static CenteringEnum allVertex[2U * 3U]; + // Face/Edge centering perpendicular to/along direction 0: + static CenteringEnum allFace[2U * 3U]; + static CenteringEnum allEdge[2U * 3U]; }; -template<> -struct CCCEnums<2U,3U,1U> { - // Face/Edge centering perpendicular to/along direction 1: - static CenteringEnum allFace[2U*3U]; - static CenteringEnum allEdge[2U*3U]; +template <> +struct CCCEnums<2U, 3U, 1U> { + // Face/Edge centering perpendicular to/along direction 1: + static CenteringEnum allFace[2U * 3U]; + static CenteringEnum allEdge[2U * 3U]; }; - -//33333333333333333333333333333333333333333333333333333333333333333333333333333 -// 3D fields -//33333333333333333333333333333333333333333333333333333333333333333333333333333 +// 33333333333333333333333333333333333333333333333333333333333333333333333333333 +// 3D fields +// 33333333333333333333333333333333333333333333333333333333333333333333333333333 // 3D field of scalars (or 1D vectors, or 1D tensors, or 1D sym. tensors) -template<> -struct CCCEnums<3U,1U,0U> { - static CenteringEnum allCell[3U*1U]; - static CenteringEnum allVertex[3U*1U]; - // Face/Edge centering perpendicular to/along direction 0: - static CenteringEnum allFace[3U*1U]; - static CenteringEnum allEdge[3U*1U]; -}; -template<> -struct CCCEnums<3U,1U,1U> { - // Face/Edge centering perpendicular to/along direction 1: - static CenteringEnum allFace[3U*1U]; - static CenteringEnum allEdge[3U*1U]; -}; -template<> -struct CCCEnums<3U,1U,2U> { - // Face/Edge centering perpendicular to/along direction 2: - static CenteringEnum allFace[3U*1U]; - static CenteringEnum allEdge[3U*1U]; +template <> +struct CCCEnums<3U, 1U, 0U> { + static CenteringEnum allCell[3U * 1U]; + static CenteringEnum allVertex[3U * 1U]; + // Face/Edge centering perpendicular to/along direction 0: + static CenteringEnum allFace[3U * 1U]; + static CenteringEnum allEdge[3U * 1U]; +}; +template <> +struct CCCEnums<3U, 1U, 1U> { + // Face/Edge centering perpendicular to/along direction 1: + static CenteringEnum allFace[3U * 1U]; + static CenteringEnum allEdge[3U * 1U]; +}; +template <> +struct CCCEnums<3U, 1U, 2U> { + // Face/Edge centering perpendicular to/along direction 2: + static CenteringEnum allFace[3U * 1U]; + static CenteringEnum allEdge[3U * 1U]; }; // 3D field of 2D vectors: -template<> -struct CCCEnums<3U,2U,0U> { - static CenteringEnum allCell[3U*2U]; - static CenteringEnum allVertex[3U*2U]; - // Face/Edge centering perpendicular to/along direction 0: - static CenteringEnum allFace[3U*2U]; - static CenteringEnum allEdge[3U*2U]; -}; -template<> -struct CCCEnums<3U,2U,1U> { - // Face/Edge centering perpendicular to/along direction 1: - static CenteringEnum allFace[3U*2U]; - static CenteringEnum allEdge[3U*2U]; -}; -template<> -struct CCCEnums<3U,2U,2U> { - // Face/Edge centering perpendicular to/along direction 2: - static CenteringEnum allFace[3U*2U]; - static CenteringEnum allEdge[3U*2U]; +template <> +struct CCCEnums<3U, 2U, 0U> { + static CenteringEnum allCell[3U * 2U]; + static CenteringEnum allVertex[3U * 2U]; + // Face/Edge centering perpendicular to/along direction 0: + static CenteringEnum allFace[3U * 2U]; + static CenteringEnum allEdge[3U * 2U]; +}; +template <> +struct CCCEnums<3U, 2U, 1U> { + // Face/Edge centering perpendicular to/along direction 1: + static CenteringEnum allFace[3U * 2U]; + static CenteringEnum allEdge[3U * 2U]; +}; +template <> +struct CCCEnums<3U, 2U, 2U> { + // Face/Edge centering perpendicular to/along direction 2: + static CenteringEnum allFace[3U * 2U]; + static CenteringEnum allEdge[3U * 2U]; }; // 3D field of 3D vectors: -template<> -struct CCCEnums<3U,3U,0U> { - static CenteringEnum allCell[3U*3U]; - static CenteringEnum allVertex[3U*3U]; - // Componentwise centering along/perpendicular to component direction: - static CenteringEnum vectorFace[3U*3U]; - static CenteringEnum vectorEdge[3U*3U]; - // Face/Edge centering perpendicular to/along direction 0: - static CenteringEnum allFace[3U*3U]; - static CenteringEnum allEdge[3U*3U]; -}; -template<> -struct CCCEnums<3U,3U,1U> { - // Face/Edge centering perpendicular to/along direction 1: - static CenteringEnum allFace[3U*3U]; - static CenteringEnum allEdge[3U*3U]; -}; -template<> -struct CCCEnums<3U,3U,2U> { - // Face/Edge centering perpendicular to/along direction 2: - static CenteringEnum allFace[3U*3U]; - static CenteringEnum allEdge[3U*3U]; +template <> +struct CCCEnums<3U, 3U, 0U> { + static CenteringEnum allCell[3U * 3U]; + static CenteringEnum allVertex[3U * 3U]; + // Componentwise centering along/perpendicular to component direction: + static CenteringEnum vectorFace[3U * 3U]; + static CenteringEnum vectorEdge[3U * 3U]; + // Face/Edge centering perpendicular to/along direction 0: + static CenteringEnum allFace[3U * 3U]; + static CenteringEnum allEdge[3U * 3U]; +}; +template <> +struct CCCEnums<3U, 3U, 1U> { + // Face/Edge centering perpendicular to/along direction 1: + static CenteringEnum allFace[3U * 3U]; + static CenteringEnum allEdge[3U * 3U]; +}; +template <> +struct CCCEnums<3U, 3U, 2U> { + // Face/Edge centering perpendicular to/along direction 2: + static CenteringEnum allFace[3U * 3U]; + static CenteringEnum allEdge[3U * 3U]; }; // 3D field of 3D tensors: -template<> -struct CCCEnums<3U,9U,0U> { - static CenteringEnum allCell[3U*9U]; - static CenteringEnum allVertex[3U*9U]; - // Face/Edge centering perpendicular to/along direction 0: - static CenteringEnum allFace[3U*9U]; - static CenteringEnum allEdge[3U*9U]; -}; -template<> -struct CCCEnums<3U,9U,1U> { - // Face/Edge centering perpendicular to/along direction 1: - static CenteringEnum allFace[3U*9U]; - static CenteringEnum allEdge[3U*9U]; -}; -template<> -struct CCCEnums<3U,9U,2U> { - // Face/Edge centering perpendicular to/along direction 2: - static CenteringEnum allFace[3U*9U]; - static CenteringEnum allEdge[3U*9U]; +template <> +struct CCCEnums<3U, 9U, 0U> { + static CenteringEnum allCell[3U * 9U]; + static CenteringEnum allVertex[3U * 9U]; + // Face/Edge centering perpendicular to/along direction 0: + static CenteringEnum allFace[3U * 9U]; + static CenteringEnum allEdge[3U * 9U]; +}; +template <> +struct CCCEnums<3U, 9U, 1U> { + // Face/Edge centering perpendicular to/along direction 1: + static CenteringEnum allFace[3U * 9U]; + static CenteringEnum allEdge[3U * 9U]; +}; +template <> +struct CCCEnums<3U, 9U, 2U> { + // Face/Edge centering perpendicular to/along direction 2: + static CenteringEnum allFace[3U * 9U]; + static CenteringEnum allEdge[3U * 9U]; }; // 3D field of 3D symmetric tensors: -template<> -struct CCCEnums<3U,6U,0U> { - static CenteringEnum allCell[3U*6U]; - static CenteringEnum allVertex[3U*6U]; - // Face/Edge centering perpendicular to/along direction 0: - static CenteringEnum allFace[3U*6U]; - static CenteringEnum allEdge[3U*6U]; -}; -template<> -struct CCCEnums<3U,6U,1U> { - // Face/Edge centering perpendicular to/along direction 1: - static CenteringEnum allFace[3U*6U]; - static CenteringEnum allEdge[3U*6U]; +template <> +struct CCCEnums<3U, 6U, 0U> { + static CenteringEnum allCell[3U * 6U]; + static CenteringEnum allVertex[3U * 6U]; + // Face/Edge centering perpendicular to/along direction 0: + static CenteringEnum allFace[3U * 6U]; + static CenteringEnum allEdge[3U * 6U]; +}; +template <> +struct CCCEnums<3U, 6U, 1U> { + // Face/Edge centering perpendicular to/along direction 1: + static CenteringEnum allFace[3U * 6U]; + static CenteringEnum allEdge[3U * 6U]; +}; +template <> +struct CCCEnums<3U, 6U, 2U> { + // Face/Edge centering perpendicular to/along direction 2: + static CenteringEnum allFace[3U * 6U]; + static CenteringEnum allEdge[3U * 6U]; }; -template<> -struct CCCEnums<3U,6U,2U> { - // Face/Edge centering perpendicular to/along direction 2: - static CenteringEnum allFace[3U*6U]; - static CenteringEnum allEdge[3U*6U]; -}; - - //----------------------------------------------------------------------------- @@ -305,264 +301,236 @@ struct CCCEnums<3U,6U,2U> { // the canned typedefs in the canned specializations of these below are what // the user will likely use. -template -struct CommonCartesianCenterings -{ - //public: - // typedef CartesianCentering::allCell, Dim, NComponents> allCell; - //typedef CartesianCentering::allVertex, Dim, NComponents> allVertex; - //typedef CartesianCentering::allFace, Dim, NComponents> allFace; - //typedef CartesianCentering::allEdge, Dim, NComponents> allEdge; - //typedef CartesianCentering::vectorFace, Dim, NComponents> vectorFace; - //typedef CartesianCentering::vectorEdge, Dim, NComponents> vectorEdge; +template +struct CommonCartesianCenterings { + // public: + // typedef CartesianCentering::allCell, Dim, NComponents> allCell; + // typedef CartesianCentering::allVertex, Dim, NComponents> allVertex; + // typedef CartesianCentering::allFace, Dim, NComponents> allFace; + // typedef CartesianCentering::allEdge, Dim, NComponents> allEdge; + // typedef CartesianCentering::vectorFace, Dim, NComponents> vectorFace; + // typedef CartesianCentering::vectorEdge, Dim, NComponents> vectorEdge; }; //**********CommonCartesianCententerings specializations, typedefs************* - -//11111111111111111111111111111111111111111111111111111111111111111111111111111 -// 1D fields -//11111111111111111111111111111111111111111111111111111111111111111111111111111 +// 11111111111111111111111111111111111111111111111111111111111111111111111111111 +// 1D fields +// 11111111111111111111111111111111111111111111111111111111111111111111111111111 // 1D field of scalars (or 1D vectors, or 1D tensors, or 1D sym. tensors) -template<> -struct CommonCartesianCenterings<1U,1U,0U> -{ - typedef CartesianCentering::allCell,1U,1U> allCell; - typedef CartesianCentering::allVertex,1U,1U> allVertex; - // Componentwise centering along/perpendicular to component direction: - typedef CartesianCentering::vectorFace,1U,1U> vectorFace; - typedef CartesianCentering::vectorEdge,1U,1U> vectorEdge; - // Face/Edge centering perpendicular to/along direction 0: - typedef CartesianCentering::allFace,1U,1U> allFace; - typedef CartesianCentering::allEdge,1U,1U> allEdge; -}; - - -//22222222222222222222222222222222222222222222222222222222222222222222222222222 -// 2D fields -//22222222222222222222222222222222222222222222222222222222222222222222222222222 +template <> +struct CommonCartesianCenterings<1U, 1U, 0U> { + typedef CartesianCentering::allCell, 1U, 1U> allCell; + typedef CartesianCentering::allVertex, 1U, 1U> allVertex; + // Componentwise centering along/perpendicular to component direction: + typedef CartesianCentering::vectorFace, 1U, 1U> vectorFace; + typedef CartesianCentering::vectorEdge, 1U, 1U> vectorEdge; + // Face/Edge centering perpendicular to/along direction 0: + typedef CartesianCentering::allFace, 1U, 1U> allFace; + typedef CartesianCentering::allEdge, 1U, 1U> allEdge; +}; + +// 22222222222222222222222222222222222222222222222222222222222222222222222222222 +// 2D fields +// 22222222222222222222222222222222222222222222222222222222222222222222222222222 // 2D field of scalars (or 1D vectors, or 1D tensors, or 1D sym. tensors) -template<> -struct CommonCartesianCenterings<2U,1U,0U> -{ - typedef CartesianCentering::allCell,2U,1U> allCell; - typedef CartesianCentering::allVertex,2U,1U> allVertex; - // Face/Edge centering perpendicular to/along direction 0: - typedef CartesianCentering::allFace,2U,1U> allFace; - typedef CartesianCentering::allEdge,2U,1U> allEdge; -}; -template<> -struct CommonCartesianCenterings<2U,1U,1U> { - // Face/Edge centering perpendicular to/along direction 1: - typedef CartesianCentering::allFace,2U,1U> allFace; - typedef CartesianCentering::allEdge,2U,1U> allEdge; +template <> +struct CommonCartesianCenterings<2U, 1U, 0U> { + typedef CartesianCentering::allCell, 2U, 1U> allCell; + typedef CartesianCentering::allVertex, 2U, 1U> allVertex; + // Face/Edge centering perpendicular to/along direction 0: + typedef CartesianCentering::allFace, 2U, 1U> allFace; + typedef CartesianCentering::allEdge, 2U, 1U> allEdge; +}; +template <> +struct CommonCartesianCenterings<2U, 1U, 1U> { + // Face/Edge centering perpendicular to/along direction 1: + typedef CartesianCentering::allFace, 2U, 1U> allFace; + typedef CartesianCentering::allEdge, 2U, 1U> allEdge; }; // 2D field of 2D vectors: -template<> -struct CommonCartesianCenterings<2U,2U,0U> -{ - typedef CartesianCentering::allCell,2U,2U> allCell; - typedef CartesianCentering::allVertex,2U,2U> allVertex; - // Componentwise centering along/perpendicular to component direction: - typedef CartesianCentering::vectorFace,2U,2U> vectorFace; - typedef CartesianCentering::vectorEdge,2U,2U> vectorEdge; - // Face/Edge centering perpendicular to/along direction 0: - typedef CartesianCentering::allFace,2U,2U> allFace; - typedef CartesianCentering::allEdge,2U,2U> allEdge; -}; -template<> -struct CommonCartesianCenterings<2U,2U,1U> -{ - // Face/Edge centering perpendicular to/along direction 1: - typedef CartesianCentering::allFace,2U,2U> allFace; - typedef CartesianCentering::allEdge,2U,2U> allEdge; +template <> +struct CommonCartesianCenterings<2U, 2U, 0U> { + typedef CartesianCentering::allCell, 2U, 2U> allCell; + typedef CartesianCentering::allVertex, 2U, 2U> allVertex; + // Componentwise centering along/perpendicular to component direction: + typedef CartesianCentering::vectorFace, 2U, 2U> vectorFace; + typedef CartesianCentering::vectorEdge, 2U, 2U> vectorEdge; + // Face/Edge centering perpendicular to/along direction 0: + typedef CartesianCentering::allFace, 2U, 2U> allFace; + typedef CartesianCentering::allEdge, 2U, 2U> allEdge; +}; +template <> +struct CommonCartesianCenterings<2U, 2U, 1U> { + // Face/Edge centering perpendicular to/along direction 1: + typedef CartesianCentering::allFace, 2U, 2U> allFace; + typedef CartesianCentering::allEdge, 2U, 2U> allEdge; }; // 2D field of 2D tensors: -template<> -struct CommonCartesianCenterings<2U,4U,0U> -{ - typedef CartesianCentering::allCell,2U,4U> allCell; - typedef CartesianCentering::allVertex,2U,4U> allVertex; - // Face/Edge centering perpendicular to/along direction 0: - typedef CartesianCentering::allFace,2U,4U> allFace; - typedef CartesianCentering::allEdge,2U,4U> allEdge; -}; -template<> -struct CommonCartesianCenterings<2U,4U,1U> -{ - // Face/Edge centering perpendicular to/along direction 1: - typedef CartesianCentering::allFace,2U,4U> allFace; - typedef CartesianCentering::allEdge,2U,4U> allEdge; +template <> +struct CommonCartesianCenterings<2U, 4U, 0U> { + typedef CartesianCentering::allCell, 2U, 4U> allCell; + typedef CartesianCentering::allVertex, 2U, 4U> allVertex; + // Face/Edge centering perpendicular to/along direction 0: + typedef CartesianCentering::allFace, 2U, 4U> allFace; + typedef CartesianCentering::allEdge, 2U, 4U> allEdge; +}; +template <> +struct CommonCartesianCenterings<2U, 4U, 1U> { + // Face/Edge centering perpendicular to/along direction 1: + typedef CartesianCentering::allFace, 2U, 4U> allFace; + typedef CartesianCentering::allEdge, 2U, 4U> allEdge; }; // 2D field of 2D symmetric tensors: -template<> -struct CommonCartesianCenterings<2U,3U,0U> -{ - typedef CartesianCentering::allCell,2U,3U> allCell; - typedef CartesianCentering::allVertex,2U,3U> allVertex; - // Face/Edge centering perpendicular to/along direction 0: - typedef CartesianCentering::allFace,2U,3U> allFace; - typedef CartesianCentering::allEdge,2U,3U> allEdge; +template <> +struct CommonCartesianCenterings<2U, 3U, 0U> { + typedef CartesianCentering::allCell, 2U, 3U> allCell; + typedef CartesianCentering::allVertex, 2U, 3U> allVertex; + // Face/Edge centering perpendicular to/along direction 0: + typedef CartesianCentering::allFace, 2U, 3U> allFace; + typedef CartesianCentering::allEdge, 2U, 3U> allEdge; }; -template<> -struct CommonCartesianCenterings<2U,3U,1U> -{ - // Face/Edge centering perpendicular to/along direction 1: - typedef CartesianCentering::allFace,2U,3U> allFace; - typedef CartesianCentering::allEdge,2U,3U> allEdge; +template <> +struct CommonCartesianCenterings<2U, 3U, 1U> { + // Face/Edge centering perpendicular to/along direction 1: + typedef CartesianCentering::allFace, 2U, 3U> allFace; + typedef CartesianCentering::allEdge, 2U, 3U> allEdge; }; - -//33333333333333333333333333333333333333333333333333333333333333333333333333333 -// 3D fields -//33333333333333333333333333333333333333333333333333333333333333333333333333333 +// 33333333333333333333333333333333333333333333333333333333333333333333333333333 +// 3D fields +// 33333333333333333333333333333333333333333333333333333333333333333333333333333 // 3D field of scalars (or 1D vectors, or 1D tensors, or 1D sym. tensors) -template<> -struct CommonCartesianCenterings<3U,1U,0U> -{ - typedef CartesianCentering::allCell,3U,1U> allCell; - typedef CartesianCentering::allVertex,3U,1U> allVertex; - // Face/Edge centering perpendicular to/along direction 0: - typedef CartesianCentering::allFace,3U,1U> allFace; - typedef CartesianCentering::allEdge,3U,1U> allEdge; -}; -template<> -struct CommonCartesianCenterings<3U,1U,1U> -{ - // Face/Edge centering perpendicular to/along direction 1: - typedef CartesianCentering::allFace,3U,1U> allFace; - typedef CartesianCentering::allEdge,3U,1U> allEdge; -}; -template<> -struct CommonCartesianCenterings<3U,1U,2U> -{ - // Face/Edge centering perpendicular to/along direction 2: - typedef CartesianCentering::allFace,3U,1U> allFace; - typedef CartesianCentering::allEdge,3U,1U> allEdge; +template <> +struct CommonCartesianCenterings<3U, 1U, 0U> { + typedef CartesianCentering::allCell, 3U, 1U> allCell; + typedef CartesianCentering::allVertex, 3U, 1U> allVertex; + // Face/Edge centering perpendicular to/along direction 0: + typedef CartesianCentering::allFace, 3U, 1U> allFace; + typedef CartesianCentering::allEdge, 3U, 1U> allEdge; +}; +template <> +struct CommonCartesianCenterings<3U, 1U, 1U> { + // Face/Edge centering perpendicular to/along direction 1: + typedef CartesianCentering::allFace, 3U, 1U> allFace; + typedef CartesianCentering::allEdge, 3U, 1U> allEdge; +}; +template <> +struct CommonCartesianCenterings<3U, 1U, 2U> { + // Face/Edge centering perpendicular to/along direction 2: + typedef CartesianCentering::allFace, 3U, 1U> allFace; + typedef CartesianCentering::allEdge, 3U, 1U> allEdge; }; // 3D field of 2D vectors: -template<> -struct CommonCartesianCenterings<3U,2U,0U> -{ - typedef CartesianCentering::allCell,3U,2U> allCell; - typedef CartesianCentering::allVertex,3U,2U> allVertex; - // Face/Edge centering perpendicular to/along direction 0: - typedef CartesianCentering::allFace,3U,2U> allFace; - typedef CartesianCentering::allEdge,3U,2U> allEdge; -}; -template<> -struct CommonCartesianCenterings<3U,2U,1U> -{ - // Face/Edge centering perpendicular to/along direction 1: - typedef CartesianCentering::allFace,3U,2U> allFace; - typedef CartesianCentering::allEdge,3U,2U> allEdge; -}; -template<> -struct CommonCartesianCenterings<3U,2U,2U> -{ - // Face/Edge centering perpendicular to/along direction 2: - typedef CartesianCentering::allFace,3U,2U> allFace; - typedef CartesianCentering::allEdge,3U,2U> allEdge; +template <> +struct CommonCartesianCenterings<3U, 2U, 0U> { + typedef CartesianCentering::allCell, 3U, 2U> allCell; + typedef CartesianCentering::allVertex, 3U, 2U> allVertex; + // Face/Edge centering perpendicular to/along direction 0: + typedef CartesianCentering::allFace, 3U, 2U> allFace; + typedef CartesianCentering::allEdge, 3U, 2U> allEdge; +}; +template <> +struct CommonCartesianCenterings<3U, 2U, 1U> { + // Face/Edge centering perpendicular to/along direction 1: + typedef CartesianCentering::allFace, 3U, 2U> allFace; + typedef CartesianCentering::allEdge, 3U, 2U> allEdge; +}; +template <> +struct CommonCartesianCenterings<3U, 2U, 2U> { + // Face/Edge centering perpendicular to/along direction 2: + typedef CartesianCentering::allFace, 3U, 2U> allFace; + typedef CartesianCentering::allEdge, 3U, 2U> allEdge; }; // 3D field of 3D vectors: -template<> -struct CommonCartesianCenterings<3U,3U,0U> -{ - typedef CartesianCentering::allCell,3U,3U> allCell; - typedef CartesianCentering::allVertex,3U,3U> allVertex; - // Componentwise centering along/perpendicular to component direction: - typedef CartesianCentering::vectorFace,3U,3U> vectorFace; - typedef CartesianCentering::vectorEdge,3U,3U> vectorEdge; - // Face/Edge centering perpendicular to/along direction 0: - typedef CartesianCentering::allFace,3U,3U> allFace; - typedef CartesianCentering::allEdge,3U,3U> allEdge; -}; -template<> -struct CommonCartesianCenterings<3U,3U,1U> -{ - // Face/Edge centering perpendicular to/along direction 1: - typedef CartesianCentering::allFace,3U,3U> allFace; - typedef CartesianCentering::allEdge,3U,3U> allEdge; -}; -template<> -struct CommonCartesianCenterings<3U,3U,2U> -{ - // Face/Edge centering perpendicular to/along direction 2: - typedef CartesianCentering::allFace,3U,3U> allFace; - typedef CartesianCentering::allEdge,3U,3U> allEdge; +template <> +struct CommonCartesianCenterings<3U, 3U, 0U> { + typedef CartesianCentering::allCell, 3U, 3U> allCell; + typedef CartesianCentering::allVertex, 3U, 3U> allVertex; + // Componentwise centering along/perpendicular to component direction: + typedef CartesianCentering::vectorFace, 3U, 3U> vectorFace; + typedef CartesianCentering::vectorEdge, 3U, 3U> vectorEdge; + // Face/Edge centering perpendicular to/along direction 0: + typedef CartesianCentering::allFace, 3U, 3U> allFace; + typedef CartesianCentering::allEdge, 3U, 3U> allEdge; +}; +template <> +struct CommonCartesianCenterings<3U, 3U, 1U> { + // Face/Edge centering perpendicular to/along direction 1: + typedef CartesianCentering::allFace, 3U, 3U> allFace; + typedef CartesianCentering::allEdge, 3U, 3U> allEdge; +}; +template <> +struct CommonCartesianCenterings<3U, 3U, 2U> { + // Face/Edge centering perpendicular to/along direction 2: + typedef CartesianCentering::allFace, 3U, 3U> allFace; + typedef CartesianCentering::allEdge, 3U, 3U> allEdge; }; - // 3D field of 3D tensors: -template<> -struct CommonCartesianCenterings<3U,9U,0U> -{ - typedef CartesianCentering::allCell,3U,9U> allCell; - typedef CartesianCentering::allVertex,3U,9U> allVertex; - // Face/Edge centering perpendicular to/along direction 0: - typedef CartesianCentering::allFace,3U,9U> allFace; - typedef CartesianCentering::allEdge,3U,9U> allEdge; -}; -template<> -struct CommonCartesianCenterings<3U,9U,1U> -{ - // Face/Edge centering perpendicular to/along direction 1: - typedef CartesianCentering::allFace,3U,9U> allFace; - typedef CartesianCentering::allEdge,3U,9U> allEdge; -}; -template<> -struct CommonCartesianCenterings<3U,9U,2U> -{ - // Face/Edge centering perpendicular to/along direction 2: - typedef CartesianCentering::allFace,3U,9U> allFace; - typedef CartesianCentering::allEdge,3U,9U> allEdge; +template <> +struct CommonCartesianCenterings<3U, 9U, 0U> { + typedef CartesianCentering::allCell, 3U, 9U> allCell; + typedef CartesianCentering::allVertex, 3U, 9U> allVertex; + // Face/Edge centering perpendicular to/along direction 0: + typedef CartesianCentering::allFace, 3U, 9U> allFace; + typedef CartesianCentering::allEdge, 3U, 9U> allEdge; +}; +template <> +struct CommonCartesianCenterings<3U, 9U, 1U> { + // Face/Edge centering perpendicular to/along direction 1: + typedef CartesianCentering::allFace, 3U, 9U> allFace; + typedef CartesianCentering::allEdge, 3U, 9U> allEdge; +}; +template <> +struct CommonCartesianCenterings<3U, 9U, 2U> { + // Face/Edge centering perpendicular to/along direction 2: + typedef CartesianCentering::allFace, 3U, 9U> allFace; + typedef CartesianCentering::allEdge, 3U, 9U> allEdge; }; // 3D field of 3D symmetric tensors: -template<> -struct CommonCartesianCenterings<3U,6U,0U> -{ - typedef CartesianCentering::allCell,3U,6U> allCell; - typedef CartesianCentering::allVertex,3U,6U> allVertex; - // Face/Edge centering perpendicular to/along direction 0: - typedef CartesianCentering::allFace,3U,6U> allFace; - typedef CartesianCentering::allEdge,3U,6U> allEdge; -}; -template<> -struct CommonCartesianCenterings<3U,6U,1U> -{ - // Face/Edge centering perpendicular to/along direction 1: - typedef CartesianCentering::allFace,3U,6U> allFace; - typedef CartesianCentering::allEdge,3U,6U> allEdge; -}; -template<> -struct CommonCartesianCenterings<3U,6U,2U> -{ - // Face/Edge centering perpendicular to/along direction 2: - typedef CartesianCentering::allFace,3U,6U> allFace; - typedef CartesianCentering::allEdge,3U,6U> allEdge; +template <> +struct CommonCartesianCenterings<3U, 6U, 0U> { + typedef CartesianCentering::allCell, 3U, 6U> allCell; + typedef CartesianCentering::allVertex, 3U, 6U> allVertex; + // Face/Edge centering perpendicular to/along direction 0: + typedef CartesianCentering::allFace, 3U, 6U> allFace; + typedef CartesianCentering::allEdge, 3U, 6U> allEdge; +}; +template <> +struct CommonCartesianCenterings<3U, 6U, 1U> { + // Face/Edge centering perpendicular to/along direction 1: + typedef CartesianCentering::allFace, 3U, 6U> allFace; + typedef CartesianCentering::allEdge, 3U, 6U> allEdge; +}; +template <> +struct CommonCartesianCenterings<3U, 6U, 2U> { + // Face/Edge centering perpendicular to/along direction 2: + typedef CartesianCentering::allFace, 3U, 6U> allFace; + typedef CartesianCentering::allEdge, 3U, 6U> allEdge; }; #include "Meshes/CartesianCentering.hpp" -#endif // CARTESIAN_CENTERING_H +#endif // CARTESIAN_CENTERING_H /*************************************************************************** * $RCSfile: CartesianCentering.h,v $ $Author: adelmann $ * $Revision: 1.1.1.1 $ $Date: 2003/01/23 07:40:28 $ - * IPPL_VERSION_ID: $Id: CartesianCentering.h,v 1.1.1.1 2003/01/23 07:40:28 adelmann Exp $ + * IPPL_VERSION_ID: $Id: CartesianCentering.h,v 1.1.1.1 2003/01/23 07:40:28 adelmann Exp $ ***************************************************************************/ diff --git a/src/Meshes/CartesianCentering.hpp b/src/Meshes/CartesianCentering.hpp index c237a4f93..1f2378326 100644 --- a/src/Meshes/CartesianCentering.hpp +++ b/src/Meshes/CartesianCentering.hpp @@ -2,8 +2,8 @@ /*************************************************************************** * * The IPPL Framework - * - * This program was prepared by PSI. + * + * This program was prepared by PSI. * All rights in the program are reserved by PSI. * Neither PSI nor the author(s) * makes any warranty, express or implied, or assumes any liability or @@ -17,7 +17,7 @@ /*************************************************************************** * * The IPPL Framework - * + * * * Visit http://people.web.psi.ch/adelmann/ for more details * @@ -27,11 +27,11 @@ #include "Meshes/CartesianCentering.h" template -std::string CartesianCentering::CenteringName = -"CartesianCentering: no specialized name (yet) for this case"; +std::string CartesianCentering::CenteringName = + "CartesianCentering: no specialized name (yet) for this case"; /*************************************************************************** * $RCSfile: CartesianCentering.cpp,v $ $Author: adelmann $ * $Revision: 1.1.1.1 $ $Date: 2003/01/23 07:40:28 $ - * IPPL_VERSION_ID: $Id: CartesianCentering.cpp,v 1.1.1.1 2003/01/23 07:40:28 adelmann Exp $ + * IPPL_VERSION_ID: $Id: CartesianCentering.cpp,v 1.1.1.1 2003/01/23 07:40:28 adelmann Exp $ ***************************************************************************/ diff --git a/src/Meshes/CartesianStencilSetup.h b/src/Meshes/CartesianStencilSetup.h index 1dc614de5..744884ea8 100644 --- a/src/Meshes/CartesianStencilSetup.h +++ b/src/Meshes/CartesianStencilSetup.h @@ -2,7 +2,7 @@ /*************************************************************************** * * The IPPL Framework - * + * * * Visit http://people.web.psi.ch/adelmann/ for more details * @@ -22,8 +22,10 @@ // what kind of operation is to be done. // //---------------------------------------------------------------------- -template struct Divergence {}; -template struct Gradient {}; +template +struct Divergence {}; +template +struct Gradient {}; //---------------------------------------------------------------------- // @@ -36,23 +38,20 @@ template struct Gradient {}; // //---------------------------------------------------------------------- -template -struct CenteredDivergence -{ - typedef Divergence operator_type; +template +struct CenteredDivergence { + typedef Divergence operator_type; }; -template -class CenteredGradient -{ - typedef Gradient operator_type; +template +class CenteredGradient { + typedef Gradient operator_type; }; -#endif // CARTESIAN_STENCIL_SETUP_H +#endif // CARTESIAN_STENCIL_SETUP_H /*************************************************************************** * $RCSfile: CartesianStencilSetup.h,v $ $Author: adelmann $ * $Revision: 1.1.1.1 $ $Date: 2003/01/23 07:40:28 $ - * IPPL_VERSION_ID: $Id: CartesianStencilSetup.h,v 1.1.1.1 2003/01/23 07:40:28 adelmann Exp $ + * IPPL_VERSION_ID: $Id: CartesianStencilSetup.h,v 1.1.1.1 2003/01/23 07:40:28 adelmann Exp $ ***************************************************************************/ - diff --git a/src/Meshes/Centering.cpp b/src/Meshes/Centering.cpp index 83167ffeb..acdd5df8a 100644 --- a/src/Meshes/Centering.cpp +++ b/src/Meshes/Centering.cpp @@ -32,21 +32,18 @@ const char* Cell::CenteringName = "Cell"; const char* Vert::CenteringName = "Vert"; const char* Edge::CenteringName = "Edge"; -const char* Centering::CenteringEnum_Names[] = {"CELL ","VERTEX","EDGE "}; +const char* Centering::CenteringEnum_Names[] = {"CELL ", "VERTEX", "EDGE "}; -//CC chokes static void Cell::print_Centerings(ostream& out) -void Cell::print_Centerings(std::ostream& out) -{ - out << Cell::CenteringName << std::endl; +// CC chokes static void Cell::print_Centerings(ostream& out) +void Cell::print_Centerings(std::ostream& out) { + out << Cell::CenteringName << std::endl; } -//CC chokes static void Vert::print_Centerings(ostream& out) -void Vert::print_Centerings(std::ostream& out) -{ - out << Vert::CenteringName << std::endl; +// CC chokes static void Vert::print_Centerings(ostream& out) +void Vert::print_Centerings(std::ostream& out) { + out << Vert::CenteringName << std::endl; } -void Edge::print_Centerings(std::ostream& out) -{ +void Edge::print_Centerings(std::ostream& out) { out << Edge::CenteringName << std::endl; } diff --git a/src/Meshes/Centering.h b/src/Meshes/Centering.h index 6f6cb5ce2..5dc5213db 100644 --- a/src/Meshes/Centering.h +++ b/src/Meshes/Centering.h @@ -22,38 +22,34 @@ // instead of this for cartesian meshes. // Keep this class around for backwards compatibility, and possibly for use // with non-cartesian meshes. -class Centering -{ +class Centering { public: static const char* CenteringEnum_Names[3]; }; -class Cell -{ +class Cell { public: - static const char* CenteringName; - static void print_Centerings(std::ostream&); + static const char* CenteringName; + static void print_Centerings(std::ostream&); }; // Vertex-centered, all components, for all dimensions in cartesian-mesh case. // Recommendation: use CommonCartesianCenterings::allVert // instead of this for cartesian meshes. // Keep this class around for backwards compatibility, and possibly for use // with non-cartesian meshes. -class Vert -{ +class Vert { public: - static const char* CenteringName; - static void print_Centerings(std::ostream&); + static const char* CenteringName; + static void print_Centerings(std::ostream&); }; -class Edge -{ +class Edge { public: static const char* CenteringName; static void print_Centerings(std::ostream&); }; -#endif // CENTERING_H +#endif // CENTERING_H /*************************************************************************** * $RCSfile: Centering.h,v $ $Author: adelmann $ diff --git a/src/Meshes/Mesh.h b/src/Meshes/Mesh.h index 0ffd0888b..7bfffd260 100644 --- a/src/Meshes/Mesh.h +++ b/src/Meshes/Mesh.h @@ -24,19 +24,20 @@ #include "Types/Vector.h" namespace ippl { - template + template class Mesh { - public: typedef T value_type; - enum { Dimension = Dim }; + enum { + Dimension = Dim + }; typedef Vector vector_type; typedef Vector matrix_type; - Mesh() {}; + Mesh(){}; - virtual ~Mesh() {}; + virtual ~Mesh(){}; // Get the origin of mesh vertex positions vector_type getOrigin() const; @@ -61,10 +62,10 @@ namespace ippl { T getGridsize(size_t dim) const; protected: - vector_type origin_m; // Origin of mesh coordinates (vertices) - vector_type gridSizes_m; // Sizes (number of vertices) + vector_type origin_m; // Origin of mesh coordinates (vertices) + vector_type gridSizes_m; // Sizes (number of vertices) }; -} +} // namespace ippl #include "Meshes/Mesh.hpp" diff --git a/src/Meshes/Mesh.hpp b/src/Meshes/Mesh.hpp index d5ec231a4..30f342329 100644 --- a/src/Meshes/Mesh.hpp +++ b/src/Meshes/Mesh.hpp @@ -19,26 +19,23 @@ // along with IPPL. If not, see . // namespace ippl { - template + template typename Mesh::vector_type Mesh::getOrigin() const { return origin_m; } - - template + template void Mesh::setOrigin(const vector_type& origin) { origin_m = origin; } - - template + template const typename Mesh::vector_type& Mesh::getGridsize() const { return gridSizes_m; } - - template + template T Mesh::getGridsize(size_t dim) const { return gridSizes_m[dim]; } -} +} // namespace ippl diff --git a/src/Meshes/UniformCartesian.h b/src/Meshes/UniformCartesian.h index 563f286a8..dd4261016 100644 --- a/src/Meshes/UniformCartesian.h +++ b/src/Meshes/UniformCartesian.h @@ -18,67 +18,60 @@ #ifndef IPPL_UNIFORM_CARTESIAN_H #define IPPL_UNIFORM_CARTESIAN_H -#include "Meshes/Mesh.h" #include "Meshes/CartesianCentering.h" +#include "Meshes/Mesh.h" namespace ippl { -template -class UniformCartesian : public Mesh { - -public: - typedef typename Mesh::vector_type vector_type; - typedef Cell DefaultCentering; + template + class UniformCartesian : public Mesh { + public: + typedef typename Mesh::vector_type vector_type; + typedef Cell DefaultCentering; + UniformCartesian(); - UniformCartesian(); + UniformCartesian(const NDIndex& ndi, const vector_type& hx, const vector_type& origin); - UniformCartesian(const NDIndex& ndi, - const vector_type& hx, - const vector_type& origin); + ~UniformCartesian() = default; + void initialize(const NDIndex& ndi, const vector_type& hx, const vector_type& origin); - ~UniformCartesian() = default; + // Set the spacings of mesh vertex positions (recompute Dvc, cell volume): + void setMeshSpacing(const vector_type& meshSpacing); - void initialize(const NDIndex& ndi, - const vector_type& hx, - const vector_type& origin); - - // Set the spacings of mesh vertex positions (recompute Dvc, cell volume): - void setMeshSpacing(const vector_type& meshSpacing); + // Get the spacings of mesh vertex positions along specified direction + T getMeshSpacing(unsigned dim) const; - // Get the spacings of mesh vertex positions along specified direction - T getMeshSpacing(unsigned dim) const; + const vector_type& getMeshSpacing() const; - const vector_type& getMeshSpacing() const; + T getCellVolume() const override; + T getMeshVolume() const override; - T getCellVolume() const override; - T getMeshVolume() const override; - - void updateCellVolume_m(); + void updateCellVolume_m(); - // (x,y,z) coordinates of indexed vertex: - vector_type getVertexPosition(const NDIndex& ndi) const { - vector_type vertexPosition; - for (unsigned int d = 0; d < Dim; d++) + // (x,y,z) coordinates of indexed vertex: + vector_type getVertexPosition(const NDIndex& ndi) const { + vector_type vertexPosition; + for (unsigned int d = 0; d < Dim; d++) vertexPosition(d) = ndi[d].first() * meshSpacing_m[d] + this->origin_m(d); - return vertexPosition; - } - - // Vertex-vertex grid spacing of indexed cell: - vector_type getDeltaVertex(const NDIndex& ndi) const { - vector_type vertexVertexSpacing; - for (unsigned int d = 0; d < Dim; d++) - vertexVertexSpacing[d] = meshSpacing_m[d] * ndi[d].length(); - return vertexVertexSpacing; - } - -private: - vector_type meshSpacing_m; // delta-x, delta-y (>1D), delta-z (>2D) - T volume_m; // Cell length(1D), area(2D), or volume (>2D) -}; - -} + return vertexPosition; + } + + // Vertex-vertex grid spacing of indexed cell: + vector_type getDeltaVertex(const NDIndex& ndi) const { + vector_type vertexVertexSpacing; + for (unsigned int d = 0; d < Dim; d++) + vertexVertexSpacing[d] = meshSpacing_m[d] * ndi[d].length(); + return vertexVertexSpacing; + } + + private: + vector_type meshSpacing_m; // delta-x, delta-y (>1D), delta-z (>2D) + T volume_m; // Cell length(1D), area(2D), or volume (>2D) + }; + +} // namespace ippl #include "Meshes/UniformCartesian.hpp" diff --git a/src/Meshes/UniformCartesian.hpp b/src/Meshes/UniformCartesian.hpp index 2322c43fa..9ad94ca5e 100644 --- a/src/Meshes/UniformCartesian.hpp +++ b/src/Meshes/UniformCartesian.hpp @@ -15,34 +15,27 @@ // You should have received a copy of the GNU General Public License // along with IPPL. If not, see . // -#include "Utility/PAssert.h" -#include "Utility/IpplInfo.h" #include "Field/BareField.h" #include "Field/Field.h" +#include "Utility/IpplInfo.h" +#include "Utility/PAssert.h" namespace ippl { - template + template UniformCartesian::UniformCartesian() : Mesh() - , volume_m(0.0) - { } + , volume_m(0.0) {} - - template - UniformCartesian::UniformCartesian(const NDIndex& ndi, - const vector_type& hx, - const vector_type& origin) - { + template + UniformCartesian::UniformCartesian(const NDIndex& ndi, const vector_type& hx, + const vector_type& origin) { this->initialize(ndi, hx, origin); } - template - void UniformCartesian::initialize(const NDIndex& ndi, - const vector_type& hx, - const vector_type& origin) - { + void UniformCartesian::initialize(const NDIndex& ndi, const vector_type& hx, + const vector_type& origin) { meshSpacing_m = hx; volume_m = 1.0; @@ -53,35 +46,31 @@ namespace ippl { this->setOrigin(origin); } - - template + + template void UniformCartesian::setMeshSpacing(const vector_type& meshSpacing) { meshSpacing_m = meshSpacing; this->updateCellVolume_m(); } - - template + template T UniformCartesian::getMeshSpacing(unsigned dim) const { PAssert_LT(dim, Dim); return meshSpacing_m[dim]; } - - template - const typename UniformCartesian::vector_type& - UniformCartesian::getMeshSpacing() const - { + template + const typename UniformCartesian::vector_type& UniformCartesian::getMeshSpacing() + const { return meshSpacing_m; } - - template + template T UniformCartesian::getCellVolume() const { return volume_m; } - template + template T UniformCartesian::getMeshVolume() const { T ret = 1; for (unsigned int d = 0; d < Dim; ++d) { @@ -89,8 +78,8 @@ namespace ippl { } return ret; } - - template + + template void UniformCartesian::updateCellVolume_m() { // update cell volume volume_m = 1.0; @@ -99,4 +88,4 @@ namespace ippl { } } -} +} // namespace ippl diff --git a/src/Particle/AbstractParticle.h b/src/Particle/AbstractParticle.h index 9ba7e8c0e..bcb4cb00c 100644 --- a/src/Particle/AbstractParticle.h +++ b/src/Particle/AbstractParticle.h @@ -21,46 +21,45 @@ #ifndef ABSTRACT_PARTICLE_H #define ABSTRACT_PARTICLE_H -#include "Particle/ParticleLayout.h" #include "Particle/ParticleAttrib.h" +#include "Particle/ParticleLayout.h" template class AbstractParticle { - public: - typedef typename ParticleLayout::SingleParticlePos_t - SingleParticlePos_t; + typedef typename ParticleLayout::SingleParticlePos_t SingleParticlePos_t; typedef typename ParticleLayout::Index_t Index_t; typedef ParticleAttrib ParticlePos_t; - typedef ParticleAttrib ParticleIndex_t; + typedef ParticleAttrib ParticleIndex_t; typedef typename ParticleLayout::UpdateFlags UpdateFlags; typedef typename ParticleLayout::Position_t Position_t; typedef ParticleLayout Layout_t; public: + AbstractParticle() + : R_p(0) + , ID_p(0) {} - AbstractParticle() : R_p(0), ID_p(0) {} - - virtual ~AbstractParticle() { } -// AbstractParticle(ParticlePos_t& R, -// ParticleIndex_t& ID) : R_p(&R), ID_p(&ID) -// { -// std::cout << "AbstractParticle()" << std::endl; -// } + virtual ~AbstractParticle() {} + // AbstractParticle(ParticlePos_t& R, + // ParticleIndex_t& ID) : R_p(&R), ID_p(&ID) + // { + // std::cout << "AbstractParticle()" << std::endl; + // } virtual void addAttribute(ParticleAttribBase& pa) = 0; - virtual size_t getTotalNum() const = 0; - virtual size_t getLocalNum() const = 0; + virtual size_t getTotalNum() const = 0; + virtual size_t getLocalNum() const = 0; virtual size_t getDestroyNum() const = 0; - virtual size_t getGhostNum() const = 0; - virtual void setTotalNum(size_t n) = 0; - virtual void setLocalNum(size_t n) = 0; + virtual size_t getGhostNum() const = 0; + virtual void setTotalNum(size_t n) = 0; + virtual void setLocalNum(size_t n) = 0; virtual unsigned int getMinimumNumberOfParticlesPerCore() const = 0; virtual void setMinimumNumberOfParticlesPerCore(unsigned int n) = 0; - virtual Layout_t& getLayout() = 0; + virtual Layout_t& getLayout() = 0; virtual const Layout_t& getLayout() const = 0; virtual bool getUpdateFlag(UpdateFlags f) const = 0; @@ -75,13 +74,12 @@ class AbstractParticle { virtual void resetID() = 0; - - virtual void update() = 0; + virtual void update() = 0; virtual void update(const ParticleAttrib& canSwap) = 0; virtual void createWithID(unsigned id) = 0; - virtual void create(size_t) = 0; - virtual void globalCreate(size_t np) = 0; + virtual void create(size_t) = 0; + virtual void globalCreate(size_t np) = 0; virtual void destroy(size_t, size_t, bool = false) = 0; diff --git a/src/Particle/IntNGP.h b/src/Particle/IntNGP.h index 99a5b2b64..a2bf81723 100644 --- a/src/Particle/IntNGP.h +++ b/src/Particle/IntNGP.h @@ -2,7 +2,7 @@ /*************************************************************************** * * The IPPL Framework - * + * * * Visit http://people.web.psi.ch/adelmann/ for more details * @@ -15,9 +15,8 @@ interpolation of data for a single particle to or from IPPL Field. */ // include files -#include "Particle/Interpolator.h" #include "Field/Field.h" - +#include "Particle/Interpolator.h" // forward declaration class IntNGP; @@ -25,124 +24,110 @@ class IntNGP; // specialization of InterpolatorTraits template -struct InterpolatorTraits { - typedef NDIndex Cache_t; +struct InterpolatorTraits { + typedef NDIndex Cache_t; }; - // IntNGP class definition class IntNGP : public Interpolator { - public: - // constructor/destructor - IntNGP() {} - ~IntNGP() {} - - // gather/scatter functions - - // scatter particle data into Field using particle position and mesh - template - static - void scatter(const FT& pdata, Field& f, - const Vektor& ppos, const M& mesh) { - // find nearest-grid-point for particle position, store in NDIndex obj - NDIndex ngp = FindNGP(mesh, ppos, CenteringTag()); - // scatter data value to Field ... this assumes that the Field - // data point is local to this processor, if not an error will be printed. - - CompressedBrickIterator fiter = getFieldIter(f,ngp); - *fiter += pdata; - - return; - } - - // scatter particle data into Field using particle position and mesh - // and cache mesh information for reuse - template - static - void scatter(const FT& pdata, Field& f, - const Vektor& ppos, const M& mesh, - NDIndex& ngp) { - // find nearest-grid-point for particle position, store in NDIndex obj - ngp = FindNGP(mesh, ppos, CenteringTag()); - // scatter data value to Field ... this assumes that the Field - // data point is local to this processor, if not an error will be printed. - - CompressedBrickIterator fiter = getFieldIter(f,ngp); - *fiter += pdata; - - return; - } - - // scatter particle data into Field using cached mesh information - template - static - void scatter(const FT& pdata, Field& f, - const NDIndex& ngp) { - // scatter data value to Field ... this assumes that the Field - // data point is local to this processor, if not an error will be printed. - - CompressedBrickIterator fiter = getFieldIter(f,ngp); - *fiter += pdata; - - return; - } - - // gather particle data from Field using particle position and mesh - template - static - void gather(FT& pdata, const Field& f, - const Vektor& ppos, const M& mesh) { - // find nearest-grid-point for particle position, store in NDIndex obj - NDIndex ngp = FindNGP(mesh, ppos, CenteringTag()); - // gather Field value to particle data ... this assumes that the Field - // data point is local to this processor, if not an error will be printed. - - CompressedBrickIterator fiter = getFieldIter(f,ngp); - pdata = *fiter; - - return; - } - - // gather particle data from Field using particle position and mesh - // and cache mesh information for reuse - template - static - void gather(FT& pdata, const Field& f, - const Vektor& ppos, const M& mesh, - NDIndex& ngp) { - // find nearest-grid-point for particle position, store in NDIndex obj - ngp = FindNGP(mesh, ppos, CenteringTag()); - // gather Field value to particle data ... this assumes that the Field - // data point is local to this processor, if not an error will be printed. - - CompressedBrickIterator fiter = getFieldIter(f,ngp); - pdata = *fiter; - - return; - } - - // gather particle data from Field using cached mesh information - template - static - void gather(FT& pdata, const Field& f, - const NDIndex& ngp) { - // gather Field value to particle data ... this assumes that the Field - // data point is local to this processor, if not an error will be printed. - - CompressedBrickIterator fiter = getFieldIter(f,ngp); - pdata = *fiter; - - return; - } - + // constructor/destructor + IntNGP() {} + ~IntNGP() {} + + // gather/scatter functions + + // scatter particle data into Field using particle position and mesh + template + static void scatter(const FT& pdata, Field& f, const Vektor& ppos, + const M& mesh) { + // find nearest-grid-point for particle position, store in NDIndex obj + NDIndex ngp = FindNGP(mesh, ppos, CenteringTag()); + // scatter data value to Field ... this assumes that the Field + // data point is local to this processor, if not an error will be printed. + + CompressedBrickIterator fiter = getFieldIter(f, ngp); + *fiter += pdata; + + return; + } + + // scatter particle data into Field using particle position and mesh + // and cache mesh information for reuse + template + static void scatter(const FT& pdata, Field& f, const Vektor& ppos, + const M& mesh, NDIndex& ngp) { + // find nearest-grid-point for particle position, store in NDIndex obj + ngp = FindNGP(mesh, ppos, CenteringTag()); + // scatter data value to Field ... this assumes that the Field + // data point is local to this processor, if not an error will be printed. + + CompressedBrickIterator fiter = getFieldIter(f, ngp); + *fiter += pdata; + + return; + } + + // scatter particle data into Field using cached mesh information + template + static void scatter(const FT& pdata, Field& f, const NDIndex& ngp) { + // scatter data value to Field ... this assumes that the Field + // data point is local to this processor, if not an error will be printed. + + CompressedBrickIterator fiter = getFieldIter(f, ngp); + *fiter += pdata; + + return; + } + + // gather particle data from Field using particle position and mesh + template + static void gather(FT& pdata, const Field& f, const Vektor& ppos, + const M& mesh) { + // find nearest-grid-point for particle position, store in NDIndex obj + NDIndex ngp = FindNGP(mesh, ppos, CenteringTag()); + // gather Field value to particle data ... this assumes that the Field + // data point is local to this processor, if not an error will be printed. + + CompressedBrickIterator fiter = getFieldIter(f, ngp); + pdata = *fiter; + + return; + } + + // gather particle data from Field using particle position and mesh + // and cache mesh information for reuse + template + static void gather(FT& pdata, const Field& f, const Vektor& ppos, + const M& mesh, NDIndex& ngp) { + // find nearest-grid-point for particle position, store in NDIndex obj + ngp = FindNGP(mesh, ppos, CenteringTag()); + // gather Field value to particle data ... this assumes that the Field + // data point is local to this processor, if not an error will be printed. + + CompressedBrickIterator fiter = getFieldIter(f, ngp); + pdata = *fiter; + + return; + } + + // gather particle data from Field using cached mesh information + template + static void gather(FT& pdata, const Field& f, const NDIndex& ngp) { + // gather Field value to particle data ... this assumes that the Field + // data point is local to this processor, if not an error will be printed. + + CompressedBrickIterator fiter = getFieldIter(f, ngp); + pdata = *fiter; + + return; + } }; -#endif // INT_NGP_H +#endif // INT_NGP_H /*************************************************************************** * $RCSfile: IntNGP.h,v $ $Author: adelmann $ * $Revision: 1.1.1.1 $ $Date: 2003/01/23 07:40:28 $ - * IPPL_VERSION_ID: $Id: IntNGP.h,v 1.1.1.1 2003/01/23 07:40:28 adelmann Exp $ + * IPPL_VERSION_ID: $Id: IntNGP.h,v 1.1.1.1 2003/01/23 07:40:28 adelmann Exp $ ***************************************************************************/ - diff --git a/src/Particle/Interpolator.h b/src/Particle/Interpolator.h index 1a0fdc93b..8d56d4b2f 100644 --- a/src/Particle/Interpolator.h +++ b/src/Particle/Interpolator.h @@ -12,45 +12,39 @@ #define INTERPOLATOR_H // include files +#include "AppTypes/Vektor.h" #include "Field/BareField.h" -#include "Field/LField.h" #include "Field/CompressedBrickIterator.h" +#include "Field/LField.h" #include "Index/NDIndex.h" -#include "AppTypes/Vektor.h" -#include "Utility/IpplInfo.h" #include "Utility/IpplException.h" +#include "Utility/IpplInfo.h" +#include #include -#include #include -#include +#include // Helper class and functions for finding nearest grid point given centering // A tag indicating the Field centering type template -class CenteringTag { -}; +class CenteringTag {}; // Return NDIndex referring to the nearest Field element to the given position template -inline -NDIndex FindNGP(const M& mesh, const Vektor& ppos, - CenteringTag) { - return mesh.getCellContaining(ppos); +inline NDIndex FindNGP(const M& mesh, const Vektor& ppos, CenteringTag) { + return mesh.getCellContaining(ppos); } template -inline -NDIndex FindNGP(const M& mesh, const Vektor& ppos, - CenteringTag) { - return mesh.getNearestVertex(ppos); +inline NDIndex FindNGP(const M& mesh, const Vektor& ppos, CenteringTag) { + return mesh.getNearestVertex(ppos); } template -inline -std::vector > FindNGP(const M& mesh, const Vektor&ppos, - CenteringTag) { +inline std::vector > FindNGP(const M& mesh, const Vektor& ppos, + CenteringTag) { std::vector > ngp; ngp.push_back(mesh.getCellContaining(ppos)); ngp.push_back(mesh.getNearestVertex(ppos)); @@ -59,69 +53,64 @@ std::vector > FindNGP(const M& mesh, const Vektor&ppos, // Return position of element indicated by NDIndex template -inline -void FindPos(Vektor& pos, const M& mesh, const NDIndex& indices, - CenteringTag) { - pos = mesh.getCellPosition(indices); - return; +inline void FindPos(Vektor& pos, const M& mesh, const NDIndex& indices, + CenteringTag) { + pos = mesh.getCellPosition(indices); + return; } template -inline -void FindPos(Vektor& pos, const M& mesh, const NDIndex& indices, - CenteringTag) { - pos = mesh.getVertexPosition(indices); - return; +inline void FindPos(Vektor& pos, const M& mesh, const NDIndex& indices, + CenteringTag) { + pos = mesh.getVertexPosition(indices); + return; } template -inline -void FindPos(std::vector >& pos, const M& mesh, - const std::vector >& indices, CenteringTag) { - pos.resize(Dim); - Vektor cell_pos = mesh.getCellPosition(indices[0]); - Vektor vert_pos = mesh.getVertexPosition(indices[1]); - - for (unsigned int d = 0; d < Dim; ++ d) { - pos[d] = vert_pos; - pos[d](d) = cell_pos(d); - } - - return; +inline void FindPos(std::vector >& pos, const M& mesh, + const std::vector >& indices, CenteringTag) { + pos.resize(Dim); + Vektor cell_pos = mesh.getCellPosition(indices[0]); + Vektor vert_pos = mesh.getVertexPosition(indices[1]); + + for (unsigned int d = 0; d < Dim; ++d) { + pos[d] = vert_pos; + pos[d](d) = cell_pos(d); + } + + return; } // Find sizes of next mesh element template -inline -void FindDelta(Vektor& delta, const M& mesh, const NDIndex& gp, - CenteringTag) { - NDIndex vp; - for (unsigned d=0; d& delta, const M& mesh, const NDIndex& gp, + CenteringTag) { + NDIndex vp; + for (unsigned d = 0; d < Dim; ++d) + vp[d] = gp[d] + 1; + delta = mesh.getDeltaCell(vp); + return; } template -inline -void FindDelta(Vektor& delta, const M& mesh, const NDIndex& gp, - CenteringTag) { - delta = mesh.getDeltaVertex(gp); - return; +inline void FindDelta(Vektor& delta, const M& mesh, const NDIndex& gp, + CenteringTag) { + delta = mesh.getDeltaVertex(gp); + return; } template -inline -void FindDelta(std::vector >& delta, const M& mesh, - const std::vector >& gp, CenteringTag) { +inline void FindDelta(std::vector >& delta, const M& mesh, + const std::vector >& gp, CenteringTag) { NDIndex vp; - for (unsigned d=0; d struct CacheData1 { - NDIndex Index_m; - Vektor Delta_m; + NDIndex Index_m; + Vektor Delta_m; }; template -inline std::ostream &operator<<(std::ostream &o, const CacheData1 &c) -{ - o << "(" << c.Index_m << "," << c.Delta_m << ")"; - return o; +inline std::ostream& operator<<(std::ostream& o, const CacheData1& c) { + o << "(" << c.Index_m << "," << c.Delta_m << ")"; + return o; } - // define struct for cached mesh info for CIC interpolator template struct CacheDataCIC { - NDIndex Index_m; - int Offset_m[Dim]; - Vektor Delta_m; + NDIndex Index_m; + int Offset_m[Dim]; + Vektor Delta_m; }; -//BENI: -// define struct for cached mesh info for TSC interpolator +// BENI: +// define struct for cached mesh info for TSC interpolator template struct CacheDataTSC { - NDIndex Index_m; - int Offset_m[Dim]; - Vektor Delta_m; + NDIndex Index_m; + int Offset_m[Dim]; + Vektor Delta_m; }; template -inline std::ostream &operator<<(std::ostream &o, const CacheDataCIC &c) -{ - Vektor offset; - for (unsigned int i=0; i < Dim; ++i) - offset[i] = c.Offset_m[i]; - o << "(" << c.Index_m << "," << c.Delta_m << "," << offset << ")"; - return o; +inline std::ostream& operator<<(std::ostream& o, const CacheDataCIC& c) { + Vektor offset; + for (unsigned int i = 0; i < Dim; ++i) + offset[i] = c.Offset_m[i]; + o << "(" << c.Index_m << "," << c.Delta_m << "," << offset << ")"; + return o; } - /* Interpolator -- Definition of base class for interpolation of data for a single particle to or from a IPPL Field. */ class Interpolator { - protected: - - // helper function, similar to BareField::localElement, but return iterator - template - static CompressedBrickIterator - getFieldIter(const BareField& f, const NDIndex& pt) { - - typename BareField::const_iterator_if lf_i, lf_end = f.end_if(); - for (lf_i = f.begin_if(); lf_i != lf_end; ++lf_i) { - LField& lf(*(*lf_i).second); - if ( lf.getOwned().contains(pt) ) { - // found it ... get iterator for requested element - return lf.begin(pt); - } + // helper function, similar to BareField::localElement, but return iterator + template + static CompressedBrickIterator getFieldIter(const BareField& f, + const NDIndex& pt) { + typename BareField::const_iterator_if lf_i, lf_end = f.end_if(); + for (lf_i = f.begin_if(); lf_i != lf_end; ++lf_i) { + LField& lf(*(*lf_i).second); + if (lf.getOwned().contains(pt)) { + // found it ... get iterator for requested element + return lf.begin(pt); + } + } + + // if not found ... try examining guard cell layers + for (lf_i = f.begin_if(); lf_i != lf_end; ++lf_i) { + LField& lf(*(*lf_i).second); + if (lf.getAllocated().contains(pt)) { + // found it ... get iterator for requested element + return lf.begin(pt); + } + } + + // throw ("Interploator:getFieldIter: attempt to access non-local index"); + + // if we're here, we did not find it ... it must not be local + ERRORMSG("Interpolator::getFieldIter: attempt to access non-local index"); + ERRORMSG(pt << " on node " << Ippl::Comm->myNode() << endl); + ERRORMSG("Dumping local owned and allocated domains:" << endl); + int lfc = 0; + for (lf_i = f.begin_if(); lf_i != lf_end; ++lf_i, ++lfc) { + LField& lf(*(*lf_i).second); + ERRORMSG(lfc << ": owned = " << lf.getOwned()); + ERRORMSG(", allocated = " << lf.getAllocated() << endl); + } + ERRORMSG("Error occurred for BareField with layout = " << f.getLayout()); + ERRORMSG(endl); + ERRORMSG("Calling abort ..." << endl); + Ippl::abort(); + return (*(*(f.begin_if())).second).begin(); } - // if not found ... try examining guard cell layers - for (lf_i = f.begin_if(); lf_i != lf_end; ++lf_i) { - LField& lf(*(*lf_i).second); - if ( lf.getAllocated().contains(pt) ) { - // found it ... get iterator for requested element - return lf.begin(pt); - } - } - - // throw ("Interploator:getFieldIter: attempt to access non-local index"); - - - // if we're here, we did not find it ... it must not be local - ERRORMSG("Interpolator::getFieldIter: attempt to access non-local index"); - ERRORMSG(pt << " on node " << Ippl::Comm->myNode() << endl); - ERRORMSG("Dumping local owned and allocated domains:" << endl); - int lfc = 0; - for ( lf_i = f.begin_if(); lf_i != lf_end ; ++lf_i, ++lfc ) { - LField& lf(*(*lf_i).second); - ERRORMSG(lfc << ": owned = " << lf.getOwned()); - ERRORMSG(", allocated = " << lf.getAllocated() << endl); - } - ERRORMSG("Error occurred for BareField with layout = " << f.getLayout()); - ERRORMSG(endl); - ERRORMSG("Calling abort ..." << endl); - Ippl::abort(); - return (*(*(f.begin_if())).second).begin(); - - } - public: - // constructor/destructor - Interpolator() {} - ~Interpolator() {} - - // gather/scatter function interfaces (implemented in derived classes) - /* - - // scatter particle data into Field using particle position and mesh - template - static - void scatter(const FT& pdata, Field& f, - const Vektor& ppos, const M& mesh); - - // scatter particle data into Field using particle position and mesh - // and cache mesh information for reuse - template - static - void scatter(const FT& pdata, Field& f, - const Vektor& ppos, const M& mesh, - InterpolatorTraits::Cache_t& cache); - - // scatter particle data into Field using cached mesh information - template - static - void scatter(const FT& pdata, Field& f, - const InterpolatorTraits::Cache_t& cache); - - - // gather particle data from Field using particle position and mesh - template - static - void gather(FT& pdata, const Field& f, - const Vektor& ppos, const M& mesh); - - // gather particle data from Field using particle position and mesh - // and cache mesh information for reuse - template - static - void gather(FT& pdata, const Field& f, - const Vektor& ppos, const M& mesh, - InterpolatorTraits::Cache_t& cache); - - // gather particle data from Field using cached mesh information - template - static - void gather(FT& pdata, const Field& f, - const InterpolatorTraits::Cache_t& cache); - - */ - + // constructor/destructor + Interpolator() {} + ~Interpolator() {} + + // gather/scatter function interfaces (implemented in derived classes) + /* + + // scatter particle data into Field using particle position and mesh + template + static + void scatter(const FT& pdata, Field& f, + const Vektor& ppos, const M& mesh); + + // scatter particle data into Field using particle position and mesh + // and cache mesh information for reuse + template + static + void scatter(const FT& pdata, Field& f, + const Vektor& ppos, const M& mesh, + InterpolatorTraits::Cache_t& cache); + + // scatter particle data into Field using cached mesh information + template + static + void scatter(const FT& pdata, Field& f, + const InterpolatorTraits::Cache_t& cache); + + + // gather particle data from Field using particle position and mesh + template + static + void gather(FT& pdata, const Field& f, + const Vektor& ppos, const M& mesh); + + // gather particle data from Field using particle position and mesh + // and cache mesh information for reuse + template + static + void gather(FT& pdata, const Field& f, + const Vektor& ppos, const M& mesh, + InterpolatorTraits::Cache_t& cache); + + // gather particle data from Field using cached mesh information + template + static + void gather(FT& pdata, const Field& f, + const InterpolatorTraits::Cache_t& cache); + + */ }; -#endif // INTERPOLATOR_H +#endif // INTERPOLATOR_H /*************************************************************************** * $RCSfile: Interpolator.h,v $ $Author: adelmann $ diff --git a/src/Particle/ParticleAttrib.h b/src/Particle/ParticleAttrib.h index 99276a82c..78b2815b1 100644 --- a/src/Particle/ParticleAttrib.h +++ b/src/Particle/ParticleAttrib.h @@ -36,16 +36,15 @@ namespace ippl { // ParticleAttrib class definition template - class ParticleAttrib : public detail::ParticleAttribBase - , public detail::Expression< - ParticleAttrib, - sizeof(typename detail::ViewType::view_type) - > - { + class ParticleAttrib : public detail::ParticleAttribBase, + public detail::Expression< + ParticleAttrib, + sizeof(typename detail::ViewType::view_type)> { public: typedef T value_type; - using boolean_view_type = typename detail::ParticleAttribBase::boolean_view_type; - using view_type = typename detail::ViewType::view_type; + using boolean_view_type = + typename detail::ParticleAttribBase::boolean_view_type; + using view_type = typename detail::ViewType::view_type; using HostMirror = typename view_type::host_mirror_type; using size_type = detail::size_type; @@ -61,9 +60,8 @@ namespace ippl { * @param keepIndex List of indices of valid particles in the invalid region * @param invalidCount Number of invalid particles in the valid region */ - void destroy(const Kokkos::View& deleteIndex, - const Kokkos::View& keepIndex, - size_type invalidCount) override; + void destroy(const Kokkos::View& deleteIndex, const Kokkos::View& keepIndex, + size_type invalidCount) override; void pack(void*, const Kokkos::View&) const override; @@ -78,22 +76,16 @@ namespace ippl { } virtual ~ParticleAttrib() = default; - - size_type size() const override { - return dview_m.extent(0); - } + + size_type size() const override { return dview_m.extent(0); } size_type packedSize(const size_type count) const override { return count * sizeof(value_type); } - void resize(size_type n) { - Kokkos::resize(dview_m, n); - } + void resize(size_type n) { Kokkos::resize(dview_m, n); } - void realloc(size_type n) { - Kokkos::realloc(dview_m, n); - } + void realloc(size_type n) { Kokkos::realloc(dview_m, n); } void print() { HostMirror hview = Kokkos::create_mirror_view(dview_m); @@ -103,31 +95,18 @@ namespace ippl { } } + KOKKOS_INLINE_FUNCTION T& operator()(const size_t i) const { return dview_m(i); } - KOKKOS_INLINE_FUNCTION - T& operator()(const size_t i) const { - return dview_m(i); - } - - - view_type& getView() { - return dview_m; - } - - const view_type& getView() const{ - return dview_m; - } + view_type& getView() { return dview_m; } + const view_type& getView() const { return dview_m; } - HostMirror getHostMirror() { - return Kokkos::create_mirror(dview_m); - } - + HostMirror getHostMirror() { return Kokkos::create_mirror(dview_m); } /*! * Assign the same value to the whole attribute. */ - //KOKKOS_INLINE_FUNCTION + // KOKKOS_INLINE_FUNCTION ParticleAttrib& operator=(T x); /*! @@ -138,21 +117,18 @@ namespace ippl { * @param expr is the expression */ template - //KOKKOS_INLINE_FUNCTION + // KOKKOS_INLINE_FUNCTION ParticleAttrib& operator=(detail::Expression const& expr); - // // scatter the data from this attribute onto the given Field, using -// // the given Position attribute + // // the given Position attribute template - void - scatter(Field& f, - const ParticleAttrib, Properties... >& pp) const; - + void scatter(Field& f, + const ParticleAttrib, Properties...>& pp) const; + template - void - gather(Field& f, - const ParticleAttrib, Properties...>& pp); + void gather(Field& f, + const ParticleAttrib, Properties...>& pp); T sum(); T max(); @@ -162,7 +138,7 @@ namespace ippl { private: view_type dview_m; }; -} +} // namespace ippl #include "Particle/ParticleAttrib.hpp" diff --git a/src/Particle/ParticleAttrib.hpp b/src/Particle/ParticleAttrib.hpp index 824cd50aa..eaf2bc1bf 100644 --- a/src/Particle/ParticleAttrib.hpp +++ b/src/Particle/ParticleAttrib.hpp @@ -26,13 +26,13 @@ // You should have received a copy of the GNU General Public License // along with IPPL. If not, see . // -#include "Ippl.h" #include "Communicate/DataTypes.h" +#include "Ippl.h" #include "Utility/IpplTimings.h" namespace ippl { - template + template void ParticleAttrib::create(size_type n) { size_type required = *(this->localNum_mp) + n; if (this->size() < required) { @@ -41,108 +41,84 @@ namespace ippl { } } - template + template void ParticleAttrib::destroy(const Kokkos::View& deleteIndex, - const Kokkos::View& keepIndex, - size_type invalidCount) { + const Kokkos::View& keepIndex, + size_type invalidCount) { // Replace all invalid particles in the valid region with valid // particles in the invalid region - Kokkos::parallel_for("ParticleAttrib::destroy()", - invalidCount, - KOKKOS_CLASS_LAMBDA(const size_t i) - { - dview_m(deleteIndex(i)) = dview_m(keepIndex(i)); - }); + Kokkos::parallel_for( + "ParticleAttrib::destroy()", invalidCount, KOKKOS_CLASS_LAMBDA(const size_t i) { + dview_m(deleteIndex(i)) = dview_m(keepIndex(i)); + }); } - template + template void ParticleAttrib::pack(void* buffer, - const Kokkos::View& hash) const - { - using this_type = ParticleAttrib; + const Kokkos::View& hash) const { + using this_type = ParticleAttrib; this_type* buffer_p = static_cast(buffer); - auto& view = buffer_p->dview_m; - auto size = hash.extent(0); - if(view.extent(0) < size) { + auto& view = buffer_p->dview_m; + auto size = hash.extent(0); + if (view.extent(0) < size) { int overalloc = Ippl::Comm->getDefaultOverallocation(); Kokkos::realloc(view, size * overalloc); } Kokkos::parallel_for( - "ParticleAttrib::pack()", - size, - KOKKOS_CLASS_LAMBDA(const size_t i) { - view(i) = dview_m(hash(i)); - }); + "ParticleAttrib::pack()", size, + KOKKOS_CLASS_LAMBDA(const size_t i) { view(i) = dview_m(hash(i)); }); Kokkos::fence(); - - } - template void ParticleAttrib::unpack(void* buffer, size_type nrecvs) { - using this_type = ParticleAttrib; + using this_type = ParticleAttrib; this_type* buffer_p = static_cast(buffer); - auto& view = buffer_p->dview_m; - auto size = dview_m.extent(0); - size_type required = *(this->localNum_mp) + nrecvs; - if(size < required) { + auto& view = buffer_p->dview_m; + auto size = dview_m.extent(0); + size_type required = *(this->localNum_mp) + nrecvs; + if (size < required) { int overalloc = Ippl::Comm->getDefaultOverallocation(); this->resize(required * overalloc); } size_type count = *(this->localNum_mp); Kokkos::parallel_for( - "ParticleAttrib::unpack()", - nrecvs, - KOKKOS_CLASS_LAMBDA(const size_t i) { - dview_m(count + i) = view(i); - }); + "ParticleAttrib::unpack()", nrecvs, + KOKKOS_CLASS_LAMBDA(const size_t i) { dview_m(count + i) = view(i); }); Kokkos::fence(); - } - template - //KOKKOS_INLINE_FUNCTION - ParticleAttrib& - ParticleAttrib::operator=(T x) - { - Kokkos::parallel_for("ParticleAttrib::operator=()", - *(this->localNum_mp), - KOKKOS_CLASS_LAMBDA(const size_t i) { - dview_m(i) = x; - }); + template + // KOKKOS_INLINE_FUNCTION + ParticleAttrib& ParticleAttrib::operator=(T x) { + Kokkos::parallel_for( + "ParticleAttrib::operator=()", *(this->localNum_mp), + KOKKOS_CLASS_LAMBDA(const size_t i) { dview_m(i) = x; }); return *this; } - - template + template template - //KOKKOS_INLINE_FUNCTION - ParticleAttrib& - ParticleAttrib::operator=(detail::Expression const& expr) - { + // KOKKOS_INLINE_FUNCTION + ParticleAttrib& ParticleAttrib::operator=( + detail::Expression const& expr) { using capture_type = detail::CapturedExpression; capture_type expr_ = reinterpret_cast(expr); - Kokkos::parallel_for("ParticleAttrib::operator=()", - *(this->localNum_mp), - KOKKOS_CLASS_LAMBDA(const size_t i) { - dview_m(i) = expr_(i); - }); + Kokkos::parallel_for( + "ParticleAttrib::operator=()", *(this->localNum_mp), + KOKKOS_CLASS_LAMBDA(const size_t i) { dview_m(i) = expr_(i); }); return *this; } - - template + template template - void ParticleAttrib::scatter(Field& f, - const ParticleAttrib< Vector, Properties... >& pp) - const - { - static IpplTimings::TimerRef scatterTimer = IpplTimings::getTimer("scatter"); - IpplTimings::startTimer(scatterTimer); + void ParticleAttrib::scatter( + Field& f, const ParticleAttrib, Properties...>& pp) const { + static IpplTimings::TimerRef scatterTimer = IpplTimings::getTimer("scatter"); + IpplTimings::startTimer(scatterTimer); typename Field::view_type view = f.getView(); const M& mesh = f.get_mesh(); @@ -150,22 +126,19 @@ namespace ippl { using vector_type = typename M::vector_type; using value_type = typename ParticleAttrib::value_type; - const vector_type& dx = mesh.getMeshSpacing(); + const vector_type& dx = mesh.getMeshSpacing(); const vector_type& origin = mesh.getOrigin(); - const vector_type invdx = 1.0 / dx; + const vector_type invdx = 1.0 / dx; - const FieldLayout& layout = f.getLayout(); - const NDIndex& lDom = layout.getLocalNDIndex(); - const int nghost = f.getNghost(); + const FieldLayout& layout = f.getLayout(); + const NDIndex& lDom = layout.getLocalNDIndex(); + const int nghost = f.getNghost(); Kokkos::parallel_for( - "ParticleAttrib::scatter", - *(this->localNum_mp), - KOKKOS_CLASS_LAMBDA(const size_t idx) - { + "ParticleAttrib::scatter", *(this->localNum_mp), KOKKOS_CLASS_LAMBDA(const size_t idx) { // find nearest grid point - vector_type l = (pp(idx) - origin) * invdx + 0.5; - Vector index = l; + vector_type l = (pp(idx) - origin) * invdx + 0.5; + Vector index = l; Vector whi = l - index; Vector wlo = 1.0 - whi; @@ -173,41 +146,36 @@ namespace ippl { const size_t j = index[1] - lDom[1].first() + nghost; const size_t k = index[2] - lDom[2].first() + nghost; - // scatter const value_type& val = dview_m(idx); - Kokkos::atomic_add(&view(i-1, j-1, k-1), wlo[0] * wlo[1] * wlo[2] * val); - Kokkos::atomic_add(&view(i-1, j-1, k ), wlo[0] * wlo[1] * whi[2] * val); - Kokkos::atomic_add(&view(i-1, j, k-1), wlo[0] * whi[1] * wlo[2] * val); - Kokkos::atomic_add(&view(i-1, j, k ), wlo[0] * whi[1] * whi[2] * val); - Kokkos::atomic_add(&view(i, j-1, k-1), whi[0] * wlo[1] * wlo[2] * val); - Kokkos::atomic_add(&view(i, j-1, k ), whi[0] * wlo[1] * whi[2] * val); - Kokkos::atomic_add(&view(i, j, k-1), whi[0] * whi[1] * wlo[2] * val); - Kokkos::atomic_add(&view(i, j, k ), whi[0] * whi[1] * whi[2] * val); - } - ); + Kokkos::atomic_add(&view(i - 1, j - 1, k - 1), wlo[0] * wlo[1] * wlo[2] * val); + Kokkos::atomic_add(&view(i - 1, j - 1, k), wlo[0] * wlo[1] * whi[2] * val); + Kokkos::atomic_add(&view(i - 1, j, k - 1), wlo[0] * whi[1] * wlo[2] * val); + Kokkos::atomic_add(&view(i - 1, j, k), wlo[0] * whi[1] * whi[2] * val); + Kokkos::atomic_add(&view(i, j - 1, k - 1), whi[0] * wlo[1] * wlo[2] * val); + Kokkos::atomic_add(&view(i, j - 1, k), whi[0] * wlo[1] * whi[2] * val); + Kokkos::atomic_add(&view(i, j, k - 1), whi[0] * whi[1] * wlo[2] * val); + Kokkos::atomic_add(&view(i, j, k), whi[0] * whi[1] * whi[2] * val); + }); IpplTimings::stopTimer(scatterTimer); - - static IpplTimings::TimerRef accumulateHaloTimer = IpplTimings::getTimer("accumulateHalo"); - IpplTimings::startTimer(accumulateHaloTimer); + + static IpplTimings::TimerRef accumulateHaloTimer = IpplTimings::getTimer("accumulateHalo"); + IpplTimings::startTimer(accumulateHaloTimer); f.accumulateHalo(); - IpplTimings::stopTimer(accumulateHaloTimer); + IpplTimings::stopTimer(accumulateHaloTimer); } - - template + template template - void ParticleAttrib::gather(Field& f, - const ParticleAttrib, Properties...>& pp) - { - - static IpplTimings::TimerRef fillHaloTimer = IpplTimings::getTimer("fillHalo"); - IpplTimings::startTimer(fillHaloTimer); + void ParticleAttrib::gather( + Field& f, const ParticleAttrib, Properties...>& pp) { + static IpplTimings::TimerRef fillHaloTimer = IpplTimings::getTimer("fillHalo"); + IpplTimings::startTimer(fillHaloTimer); f.fillHalo(); - IpplTimings::stopTimer(fillHaloTimer); + IpplTimings::stopTimer(fillHaloTimer); - static IpplTimings::TimerRef gatherTimer = IpplTimings::getTimer("gather"); - IpplTimings::startTimer(gatherTimer); + static IpplTimings::TimerRef gatherTimer = IpplTimings::getTimer("gather"); + IpplTimings::startTimer(gatherTimer); const typename Field::view_type view = f.getView(); const M& mesh = f.get_mesh(); @@ -215,22 +183,19 @@ namespace ippl { using vector_type = typename M::vector_type; using value_type = typename ParticleAttrib::value_type; - const vector_type& dx = mesh.getMeshSpacing(); + const vector_type& dx = mesh.getMeshSpacing(); const vector_type& origin = mesh.getOrigin(); - const vector_type invdx = 1.0 / dx; + const vector_type invdx = 1.0 / dx; - const FieldLayout& layout = f.getLayout(); - const NDIndex& lDom = layout.getLocalNDIndex(); - const int nghost = f.getNghost(); + const FieldLayout& layout = f.getLayout(); + const NDIndex& lDom = layout.getLocalNDIndex(); + const int nghost = f.getNghost(); Kokkos::parallel_for( - "ParticleAttrib::gather", - *(this->localNum_mp), - KOKKOS_CLASS_LAMBDA(const size_t idx) - { + "ParticleAttrib::gather", *(this->localNum_mp), KOKKOS_CLASS_LAMBDA(const size_t idx) { // find nearest grid point - vector_type l = (pp(idx) - origin) * invdx + 0.5; - Vector index = l; + vector_type l = (pp(idx) - origin) * invdx + 0.5; + Vector index = l; Vector whi = l - index; Vector wlo = 1.0 - whi; @@ -240,61 +205,54 @@ namespace ippl { // gather value_type& val = dview_m(idx); - val = wlo[0] * wlo[1] * wlo[2] * view(i-1, j-1, k-1) - + wlo[0] * wlo[1] * whi[2] * view(i-1, j-1, k ) - + wlo[0] * whi[1] * wlo[2] * view(i-1, j, k-1) - + wlo[0] * whi[1] * whi[2] * view(i-1, j, k ) - + whi[0] * wlo[1] * wlo[2] * view(i, j-1, k-1) - + whi[0] * wlo[1] * whi[2] * view(i, j-1, k ) - + whi[0] * whi[1] * wlo[2] * view(i, j, k-1) - + whi[0] * whi[1] * whi[2] * view(i, j, k ); - } - ); - IpplTimings::stopTimer(gatherTimer); + val = wlo[0] * wlo[1] * wlo[2] * view(i - 1, j - 1, k - 1) + + wlo[0] * wlo[1] * whi[2] * view(i - 1, j - 1, k) + + wlo[0] * whi[1] * wlo[2] * view(i - 1, j, k - 1) + + wlo[0] * whi[1] * whi[2] * view(i - 1, j, k) + + whi[0] * wlo[1] * wlo[2] * view(i, j - 1, k - 1) + + whi[0] * wlo[1] * whi[2] * view(i, j - 1, k) + + whi[0] * whi[1] * wlo[2] * view(i, j, k - 1) + + whi[0] * whi[1] * whi[2] * view(i, j, k); + }); + IpplTimings::stopTimer(gatherTimer); } - - /* * Non-class function * */ - - template - inline - void scatter(const ParticleAttrib& attrib, Field& f, - const ParticleAttrib, Properties...>& pp) - { + template + inline void scatter(const ParticleAttrib& attrib, Field& f, + const ParticleAttrib, Properties...>& pp) { attrib.scatter(f, pp); } - - template - inline - void gather(ParticleAttrib& attrib, Field& f, - const ParticleAttrib, Properties...>& pp) - { + template + inline void gather(ParticleAttrib& attrib, Field& f, + const ParticleAttrib, Properties...>& pp) { attrib.gather(f, pp); } - #define DefineParticleReduction(fun, name, op, MPI_Op) \ - template \ - T ParticleAttrib::name() { \ - T temp = 0.0; \ - Kokkos::parallel_reduce("fun", *(this->localNum_mp), \ - KOKKOS_CLASS_LAMBDA(const size_t i, T& valL) { \ - T myVal = dview_m(i); \ - op; \ - }, Kokkos::fun(temp)); \ - T globaltemp = 0.0; \ - MPI_Datatype type = get_mpi_datatype(temp); \ - MPI_Allreduce(&temp, &globaltemp, 1, type, MPI_Op, Ippl::getComm()); \ - return globaltemp; \ +#define DefineParticleReduction(fun, name, op, MPI_Op) \ + template \ + T ParticleAttrib::name() { \ + T temp = 0.0; \ + Kokkos::parallel_reduce( \ + "fun", *(this->localNum_mp), \ + KOKKOS_CLASS_LAMBDA(const size_t i, T& valL) { \ + T myVal = dview_m(i); \ + op; \ + }, \ + Kokkos::fun(temp)); \ + T globaltemp = 0.0; \ + MPI_Datatype type = get_mpi_datatype(temp); \ + MPI_Allreduce(&temp, &globaltemp, 1, type, MPI_Op, Ippl::getComm()); \ + return globaltemp; \ } - DefineParticleReduction(Sum, sum, valL += myVal, MPI_SUM) - DefineParticleReduction(Max, max, if(myVal > valL) valL = myVal, MPI_MAX) - DefineParticleReduction(Min, min, if(myVal < valL) valL = myVal, MPI_MIN) - DefineParticleReduction(Prod, prod, valL *= myVal, MPI_PROD) -} + DefineParticleReduction(Sum, sum, valL += myVal, MPI_SUM) + DefineParticleReduction(Max, max, if (myVal > valL) valL = myVal, MPI_MAX) + DefineParticleReduction(Min, min, if (myVal < valL) valL = myVal, MPI_MIN) + DefineParticleReduction(Prod, prod, valL *= myVal, MPI_PROD) +} // namespace ippl diff --git a/src/Particle/ParticleAttribBase.h b/src/Particle/ParticleAttribBase.h index b1e2f5fd2..532550a41 100644 --- a/src/Particle/ParticleAttribBase.h +++ b/src/Particle/ParticleAttribBase.h @@ -27,22 +27,22 @@ #ifndef IPPL_PARTICLE_ATTRIB_BASE_H #define IPPL_PARTICLE_ATTRIB_BASE_H -#include "Types/ViewTypes.h" #include "Types/IpplTypes.h" +#include "Types/ViewTypes.h" #include "Communicate/Archive.h" namespace ippl { namespace detail { - template + template class ParticleAttribBase { - public: typedef typename ViewType::view_type boolean_view_type; virtual void create(size_type) = 0; - virtual void destroy(const Kokkos::View&, const Kokkos::View&, size_type) = 0; + virtual void destroy(const Kokkos::View&, const Kokkos::View&, + size_type) = 0; virtual size_type packedSize(const size_type) const = 0; virtual void pack(void*, const Kokkos::View&) const = 0; @@ -63,7 +63,7 @@ namespace ippl { protected: const size_type* localNum_mp; }; - } -} + } // namespace detail +} // namespace ippl #endif diff --git a/src/Particle/ParticleBC.h b/src/Particle/ParticleBC.h index 275f04e00..2d14122f3 100644 --- a/src/Particle/ParticleBC.h +++ b/src/Particle/ParticleBC.h @@ -30,7 +30,7 @@ namespace ippl { namespace detail { - template + template struct ParticleBC { using value_type = typename ViewType::value_type::value_type; @@ -54,17 +54,13 @@ namespace ippl { KOKKOS_DEFAULTED_FUNCTION ParticleBC() = default; - KOKKOS_INLINE_FUNCTION - ParticleBC(const ViewType& view, - const NDRegion& nr, - const unsigned& dim, - const bool& isUpper) - : view_m(view) - , dim_m(dim) - , minval_m(nr[dim].min()) - , maxval_m(nr[dim].max()) - , isUpper_m(isUpper) - { + KOKKOS_INLINE_FUNCTION ParticleBC(const ViewType& view, const NDRegion& nr, + const unsigned& dim, const bool& isUpper) + : view_m(view) + , dim_m(dim) + , minval_m(nr[dim].min()) + , maxval_m(nr[dim].max()) + , isUpper_m(isUpper) { extent_m = nr[dim].length(); middle_m = (minval_m + maxval_m) / 2; } @@ -73,7 +69,7 @@ namespace ippl { ~ParticleBC() = default; }; - template + template struct PeriodicBC : public ParticleBC { using value_type = typename ParticleBC::value_type; @@ -83,25 +79,20 @@ namespace ippl { KOKKOS_DEFAULTED_FUNCTION PeriodicBC() = default; - KOKKOS_INLINE_FUNCTION - PeriodicBC(const ViewType& view, - const NDRegion& nr, - const unsigned& dim, - const bool& isUpper) - : ParticleBC(view, nr, dim, isUpper) - { } + KOKKOS_INLINE_FUNCTION PeriodicBC(const ViewType& view, const NDRegion& nr, + const unsigned& dim, const bool& isUpper) + : ParticleBC(view, nr, dim, isUpper) {} - KOKKOS_INLINE_FUNCTION - void operator()(const size_t& i) const { + KOKKOS_INLINE_FUNCTION void operator()(const size_t& i) const { value_type& value = this->view_m(i)[this->dim_m]; - value = value - extent_m * (int)((value - middle_m) * 2 / extent_m); + value = value - extent_m * (int)((value - middle_m) * 2 / extent_m); } KOKKOS_DEFAULTED_FUNCTION ~PeriodicBC() = default; }; - template + template struct ReflectiveBC : public ParticleBC { using value_type = typename ParticleBC::value_type; @@ -112,30 +103,24 @@ namespace ippl { KOKKOS_DEFAULTED_FUNCTION ReflectiveBC() = default; - KOKKOS_INLINE_FUNCTION - ReflectiveBC(const ViewType& view, - const NDRegion& nr, - const unsigned& dim, - const bool& isUpper) - : ParticleBC(view, nr, dim, isUpper) - { } + KOKKOS_INLINE_FUNCTION ReflectiveBC(const ViewType& view, const NDRegion& nr, + const unsigned& dim, const bool& isUpper) + : ParticleBC(view, nr, dim, isUpper) {} - KOKKOS_INLINE_FUNCTION - void operator()(const size_t& i) const { + KOKKOS_INLINE_FUNCTION void operator()(const size_t& i) const { value_type& value = this->view_m(i)[this->dim_m]; - bool tooHigh = value >= maxval_m; - bool tooLow = value < minval_m; - value += 2 * ( - (tooHigh && isUpper_m) * (maxval_m - value) + - (tooLow && !isUpper_m) * (minval_m - value) - ); + bool tooHigh = value >= maxval_m; + bool tooLow = value < minval_m; + value += 2 + * ((tooHigh && isUpper_m) * (maxval_m - value) + + (tooLow && !isUpper_m) * (minval_m - value)); } KOKKOS_DEFAULTED_FUNCTION ~ReflectiveBC() = default; }; - template + template struct SinkBC : public ParticleBC { using value_type = typename ParticleBC::value_type; @@ -146,28 +131,23 @@ namespace ippl { KOKKOS_DEFAULTED_FUNCTION SinkBC() = default; - KOKKOS_INLINE_FUNCTION - SinkBC(const ViewType& view, - const NDRegion& nr, - const unsigned& dim, - const bool& isUpper) - : ParticleBC(view, nr, dim, isUpper) - { } + KOKKOS_INLINE_FUNCTION SinkBC(const ViewType& view, const NDRegion& nr, + const unsigned& dim, const bool& isUpper) + : ParticleBC(view, nr, dim, isUpper) {} - KOKKOS_INLINE_FUNCTION - void operator()(const size_t& i) const { + KOKKOS_INLINE_FUNCTION void operator()(const size_t& i) const { value_type& value = this->view_m(i)[this->dim_m]; - bool tooHigh = value >= maxval_m; - bool tooLow = value < minval_m; - value += (tooHigh && isUpper_m) * (maxval_m - value) + - (tooLow && !isUpper_m) * (minval_m - value); + bool tooHigh = value >= maxval_m; + bool tooLow = value < minval_m; + value += (tooHigh && isUpper_m) * (maxval_m - value) + + (tooLow && !isUpper_m) * (minval_m - value); } KOKKOS_DEFAULTED_FUNCTION ~SinkBC() = default; }; - } -} + } // namespace detail +} // namespace ippl #endif diff --git a/src/Particle/ParticleBalancer.h b/src/Particle/ParticleBalancer.h index f0b1da997..13d2dfb83 100644 --- a/src/Particle/ParticleBalancer.h +++ b/src/Particle/ParticleBalancer.h @@ -2,7 +2,7 @@ /*************************************************************************** * * The IPPL Framework - * + * * * Visit http://people.web.psi.ch/adelmann/ for more details * @@ -26,24 +26,23 @@ ***************************************************************************/ // forward declarations -template class ParticleSpatialLayout; -template class IpplParticleBase; - +template +class ParticleSpatialLayout; +template +class IpplParticleBase; // calculate a new RegionLayout for a given IpplParticleBase, and distribute the // new RegionLayout to all the nodes. This uses a Field BinaryBalancer. -template -bool -BinaryRepartition(IpplParticleBase >&, double = 0.0); +template +bool BinaryRepartition(IpplParticleBase >&, + double = 0.0); #include "Particle/ParticleBalancer.hpp" -#endif // PARTICLE_BALANCER_H +#endif // PARTICLE_BALANCER_H /*************************************************************************** * $RCSfile: ParticleBalancer.h,v $ $Author: adelmann $ * $Revision: 1.1.1.1 $ $Date: 2003/01/23 07:40:28 $ - * IPPL_VERSION_ID: $Id: ParticleBalancer.h,v 1.1.1.1 2003/01/23 07:40:28 adelmann Exp $ + * IPPL_VERSION_ID: $Id: ParticleBalancer.h,v 1.1.1.1 2003/01/23 07:40:28 adelmann Exp $ ***************************************************************************/ - - diff --git a/src/Particle/ParticleBalancer.hpp b/src/Particle/ParticleBalancer.hpp index d46d654a7..a13378ae3 100644 --- a/src/Particle/ParticleBalancer.hpp +++ b/src/Particle/ParticleBalancer.hpp @@ -24,114 +24,100 @@ ***************************************************************************/ // include files -#include "Particle/ParticleBalancer.h" +#include "FieldLayout/BinaryBalancer.h" +#include "FieldLayout/FieldLayout.h" +#include "Index/NDIndex.h" +#include "Particle/IntNGP.h" #include "Particle/IpplParticleBase.h" -#include "Particle/ParticleSpatialLayout.h" #include "Particle/ParticleAttrib.h" -#include "Particle/IntNGP.h" +#include "Particle/ParticleBalancer.h" +#include "Particle/ParticleSpatialLayout.h" #include "Region/RegionLayout.h" -#include "Index/NDIndex.h" -#include "FieldLayout/FieldLayout.h" -#include "FieldLayout/BinaryBalancer.h" #include "Utility/IpplInfo.h" - - ///////////////////////////////////////////////////////////////////////////// // calculate a new RegionLayout for a given ParticleBase, and distribute the // new RegionLayout to all the nodes. This uses a Field BinaryBalancer. -template < class T, unsigned Dim, class Mesh, class CachingPolicy> -bool -BinaryRepartition(IpplParticleBase >& PB, double offset) { - - - - static IntNGP interp; // to scatter particle density - - //Inform dbgmsg("Particle BinaryRepartition", INFORM_ALL_NODES); - //dbgmsg << "Performing particle load balancing, for "; - //dbgmsg << PB.getTotalNum() << " particles ..." << endl; - - // get the internal FieldLayout from the Particle object's internal - // RegionLayout. From this, we make a new Field (we do not need a - RegionLayout& RL = PB.getLayout().getLayout(); - if ( ! RL.initialized()) { - ERRORMSG("Cannot repartition particles: uninitialized layout." << endl); - return false; - } - FieldLayout& FL = RL.getFieldLayout(); - Mesh& mesh = RL.getMesh(); - - // NDIndex which describes the entire domain ... if a particle is - // outside this region, we are in trouble! - const NDIndex& TotalDomain = FL.getDomain(); - - // for all the particles, do the following: - // 1. get the position, and invert to the 'FieldLayout' index space - // 2. increment the field at the position near this index position - NDIndex indx; - - // By default, we do the number density computation and repartition of - // index space on a cell-centered Field. If FieldLayout is vertex-centered, - // we'll need to make some adjustments here. - bool CenterOffset[Dim]; - int CenteringTotal = 0; - unsigned int d; - for (d=0; d BF(mesh,FL,GuardCellSizes(1)); - - // Now do a number density scatter on this Field - // Afterwards, the Field will be deleted, and will checkout of the - // FieldLayout. This is desired so that when we repartition the - // FieldLayout, we do not waste time redistributing the Field's data. - BF = offset; - scatter(BF,PB.R,interp); - - // calculate a new repartitioning of the field, and use this to repartition - // the FieldLayout used inside the Particle object - try - { - indx = CalcBinaryRepartition(FL, BF); - } - catch(BinaryRepartitionFailed bf) - { - return false; - } - } - else if (CenteringTotal == 0) { // allVert centering - Field BF(mesh,FL,GuardCellSizes(1)); - - // Now do a number density scatter on this Field - // Afterwards, the Field will be deleted, and will checkout of the - // FieldLayout. This is desired so that when we repartition the - // FieldLayout, we do not waste time redistributing the Field's data. - BF = offset; - scatter(BF,PB.R,interp); - - // calculate a new repartitioning of the field, and use this to repartition - // the FieldLayout used inside the Particle object - try - { - indx = CalcBinaryRepartition(FL, BF); - } - catch(BinaryRepartitionFailed bf) - { - return false; - } - } - else { - ERRORMSG("Not implemented for face- and edge-centered Fields!!" << endl); - Ippl::abort(); - } - - // now, we can repartition the FieldLayout within the RegionLayout - RL.RepartitionLayout(indx); - PB.update(); - return true; +template +bool BinaryRepartition(IpplParticleBase >& PB, + double offset) { + static IntNGP interp; // to scatter particle density + + // Inform dbgmsg("Particle BinaryRepartition", INFORM_ALL_NODES); + // dbgmsg << "Performing particle load balancing, for "; + // dbgmsg << PB.getTotalNum() << " particles ..." << endl; + + // get the internal FieldLayout from the Particle object's internal + // RegionLayout. From this, we make a new Field (we do not need a + RegionLayout& RL = PB.getLayout().getLayout(); + if (!RL.initialized()) { + ERRORMSG("Cannot repartition particles: uninitialized layout." << endl); + return false; + } + FieldLayout& FL = RL.getFieldLayout(); + Mesh& mesh = RL.getMesh(); + + // NDIndex which describes the entire domain ... if a particle is + // outside this region, we are in trouble! + const NDIndex& TotalDomain = FL.getDomain(); + + // for all the particles, do the following: + // 1. get the position, and invert to the 'FieldLayout' index space + // 2. increment the field at the position near this index position + NDIndex indx; + + // By default, we do the number density computation and repartition of + // index space on a cell-centered Field. If FieldLayout is vertex-centered, + // we'll need to make some adjustments here. + bool CenterOffset[Dim]; + int CenteringTotal = 0; + unsigned int d; + for (d = 0; d < Dim; ++d) { + CenterOffset[d] = (TotalDomain[d].length() < mesh.gridSizes[d]); + CenteringTotal += CenterOffset[d]; + } + + if (CenteringTotal == Dim) { // allCell centering + Field BF(mesh, FL, GuardCellSizes(1)); + + // Now do a number density scatter on this Field + // Afterwards, the Field will be deleted, and will checkout of the + // FieldLayout. This is desired so that when we repartition the + // FieldLayout, we do not waste time redistributing the Field's data. + BF = offset; + scatter(BF, PB.R, interp); + + // calculate a new repartitioning of the field, and use this to repartition + // the FieldLayout used inside the Particle object + try { + indx = CalcBinaryRepartition(FL, BF); + } catch (BinaryRepartitionFailed bf) { + return false; + } + } else if (CenteringTotal == 0) { // allVert centering + Field BF(mesh, FL, GuardCellSizes(1)); + + // Now do a number density scatter on this Field + // Afterwards, the Field will be deleted, and will checkout of the + // FieldLayout. This is desired so that when we repartition the + // FieldLayout, we do not waste time redistributing the Field's data. + BF = offset; + scatter(BF, PB.R, interp); + + // calculate a new repartitioning of the field, and use this to repartition + // the FieldLayout used inside the Particle object + try { + indx = CalcBinaryRepartition(FL, BF); + } catch (BinaryRepartitionFailed bf) { + return false; + } + } else { + ERRORMSG("Not implemented for face- and edge-centered Fields!!" << endl); + Ippl::abort(); + } + + // now, we can repartition the FieldLayout within the RegionLayout + RL.RepartitionLayout(indx); + PB.update(); + return true; } \ No newline at end of file diff --git a/src/Particle/ParticleBase.h b/src/Particle/ParticleBase.h index d4741472d..c6cb118f3 100644 --- a/src/Particle/ParticleBase.h +++ b/src/Particle/ParticleBase.h @@ -77,11 +77,10 @@ namespace ippl { */ template class ParticleBase { - public: using vector_type = typename PLayout::vector_type; using index_type = typename PLayout::index_type; - using particle_position_type = typename PLayout::particle_position_type ; + using particle_position_type = typename PLayout::particle_position_type; using particle_index_type = ParticleAttrib; using Layout_t = PLayout; @@ -118,14 +117,15 @@ namespace ippl { */ ParticleBase(Layout_t& layout); - /* cannot use '= default' since we get a * compiler warning otherwise: - * warning: calling a __host__ function("std::vector< ::ippl::detail::ParticleAttribBase *, ::std::allocator< - * ::ippl::detail::ParticleAttribBase *> > ::~vector") from a __host__ __device__ function("ippl::ParticleBase< + * warning: calling a __host__ function("std::vector< ::ippl::detail::ParticleAttribBase *, + * ::std::allocator< + * ::ippl::detail::ParticleAttribBase *> > ::~vector") from a __host__ __device__ + * function("ippl::ParticleBase< * ::ippl::ParticleLayout > ::~ParticleBase") is not allowed */ - ~ParticleBase() {} // = default; //{ } + ~ParticleBase() {} // = default; //{ } /*! * Initialize the particle layout. Needs to be called @@ -138,8 +138,7 @@ namespace ippl { * @returns processor local number of particles */ size_type getLocalNum() const { return localNum_m; } - - + void setLocalNum(size_type size) { localNum_m = size; } /*! @@ -151,23 +150,18 @@ namespace ippl { * @returns particle layout */ const Layout_t& getLayout() const { return *layout_m; } - + /*! * Set all boundary conditions * @param bc the boundary conditions */ - void setParticleBC(const bc_container_type& bcs) { - layout_m->setParticleBC(bcs); - } + void setParticleBC(const bc_container_type& bcs) { layout_m->setParticleBC(bcs); } /*! * Set all boundary conditions to this BC * @param bc the boundary conditions */ - void setParticleBC(BC bc) { - layout_m->setParticleBC(bc); - } - + void setParticleBC(BC bc) { layout_m->setParticleBC(bc); } /*! * Add particle attribute @@ -175,16 +169,12 @@ namespace ippl { */ void addAttribute(detail::ParticleAttribBase& pa); - /*! * Get particle attribute * @param i attribute number in container * @returns a pointer to the attribute */ - attribute_type* getAttribute(size_t i) { - return attributes_m[i]; - } - + attribute_type* getAttribute(size_t i) { return attributes_m[i]; } /*! * @returns the number of attributes @@ -193,7 +183,6 @@ namespace ippl { return attributes_m.size(); } - /*! * Create nLocal processor local particles * @param nLocal number of local particles to be created @@ -227,7 +216,6 @@ namespace ippl { */ void serialize(detail::Archive& ar, size_type nsends); - /*! * Deserialize to do MPI calls. * @param ar archive @@ -241,7 +229,7 @@ namespace ippl { */ size_type packedSize(const size_type count) const; -// protected: + // protected: /*! * Fill attributes of buffer. @@ -281,7 +269,7 @@ namespace ippl { hash_type deleteIndex_m; hash_type keepIndex_m; }; -} +} // namespace ippl #include "Particle/ParticleBase.hpp" diff --git a/src/Particle/ParticleBase.hpp b/src/Particle/ParticleBase.hpp index 71e9d0331..0d2d3d07c 100644 --- a/src/Particle/ParticleBase.hpp +++ b/src/Particle/ParticleBase.hpp @@ -66,61 +66,53 @@ namespace ippl { template ParticleBase::ParticleBase() - : layout_m(nullptr) - , localNum_m(0) - , attributes_m(0) - , nextID_m(Ippl::Comm->myNode()) - , numNodes_m(Ippl::Comm->getNodes()) - { - addAttribute(ID); // needs to be added first due to destroy function + : layout_m(nullptr) + , localNum_m(0) + , attributes_m(0) + , nextID_m(Ippl::Comm->myNode()) + , numNodes_m(Ippl::Comm->getNodes()) { + addAttribute(ID); // needs to be added first due to destroy function addAttribute(R); } template ParticleBase::ParticleBase(PLayout& layout) - : ParticleBase() - { + : ParticleBase() { initialize(layout); } - template - void ParticleBase::addAttribute(detail::ParticleAttribBase& pa) - { + void ParticleBase::addAttribute( + detail::ParticleAttribBase& pa) { attributes_m.push_back(&pa); pa.setParticleCount(localNum_m); } template - void ParticleBase::initialize(PLayout& layout) - { -// PAssert(layout_m == nullptr); + void ParticleBase::initialize(PLayout& layout) { + // PAssert(layout_m == nullptr); // save the layout, and perform setup tasks layout_m = &layout; } - template - void ParticleBase::create(size_type nLocal) - { + void ParticleBase::create(size_type nLocal) { PAssert(layout_m != nullptr); - for (attribute_iterator it = attributes_m.begin(); - it != attributes_m.end(); ++it) { + for (attribute_iterator it = attributes_m.begin(); it != attributes_m.end(); ++it) { (*it)->create(nLocal); } // set the unique ID value for these new particles - auto pIDs = ID.getView(); - auto nextID = this->nextID_m; + auto pIDs = ID.getView(); + auto nextID = this->nextID_m; auto numNodes = this->numNodes_m; - Kokkos::parallel_for("ParticleBase::create(size_t)", - Kokkos::RangePolicy(localNum_m, nLocal), - KOKKOS_LAMBDA(const std::int64_t i) { - pIDs(i) = nextID + numNodes * i; - }); - //nextID_m += numNodes_m * (nLocal - localNum_m); + Kokkos::parallel_for( + "ParticleBase::create(size_t)", + Kokkos::RangePolicy(localNum_m, nLocal), + KOKKOS_LAMBDA(const std::int64_t i) { pIDs(i) = nextID + numNodes * i; }); + // nextID_m += numNodes_m * (nLocal - localNum_m); nextID_m += numNodes_m * nLocal; // remember that we're creating these new particles @@ -128,24 +120,22 @@ namespace ippl { } template - void ParticleBase::createWithID(index_type id) - { + void ParticleBase::createWithID(index_type id) { PAssert(layout_m != nullptr); // temporary change index_type tmpNextID = nextID_m; - nextID_m = id; - numNodes_m = 0; + nextID_m = id; + numNodes_m = 0; create(1); - nextID_m = tmpNextID; + nextID_m = tmpNextID; numNodes_m = Ippl::Comm->getNodes(); } template - void ParticleBase::globalCreate(size_type nTotal) - { + void ParticleBase::globalCreate(size_type nTotal) { PAssert(layout_m != nullptr); // Compute the number of particles local to each processor @@ -161,11 +151,13 @@ namespace ippl { } template - void ParticleBase::destroy(const Kokkos::View& invalid, const size_type destroyNum) { + void ParticleBase::destroy(const Kokkos::View& invalid, + const size_type destroyNum) { PAssert(destroyNum <= localNum_m); // If there aren't any particles to delete, do nothing - if (destroyNum == 0) return; + if (destroyNum == 0) + return; // If we're deleting all the particles, there's no point in doing // anything because the valid region will be empty; we only need to @@ -186,49 +178,53 @@ namespace ippl { Kokkos::deep_copy(deleteIndex_m, -1); auto locDeleteIndex = deleteIndex_m; - auto locKeepIndex = keepIndex_m; + auto locKeepIndex = keepIndex_m; // Find the indices of the invalid particles in the valid region - Kokkos::parallel_scan("Scan in ParticleBase::destroy()", - localNum_m - destroyNum, - KOKKOS_LAMBDA(const size_t i, int& idx, const bool final) - { - if (final && invalid(i)) locDeleteIndex(idx) = i; - if (invalid(i)) idx += 1; - }); + Kokkos::parallel_scan( + "Scan in ParticleBase::destroy()", localNum_m - destroyNum, + KOKKOS_LAMBDA(const size_t i, int& idx, const bool final) { + if (final && invalid(i)) + locDeleteIndex(idx) = i; + if (invalid(i)) + idx += 1; + }); Kokkos::fence(); // Determine the total number of invalid particles in the valid region size_type maxDeleteIndex = 0; - Kokkos::parallel_reduce("Reduce in ParticleBase::destroy()", destroyNum, - KOKKOS_LAMBDA(const size_t i, size_t& maxIdx) - { - if (locDeleteIndex(i) >= 0 && i > maxIdx) maxIdx = i; - }, Kokkos::Max(maxDeleteIndex)); + Kokkos::parallel_reduce( + "Reduce in ParticleBase::destroy()", destroyNum, + KOKKOS_LAMBDA(const size_t i, size_t& maxIdx) { + if (locDeleteIndex(i) >= 0 && i > maxIdx) + maxIdx = i; + }, + Kokkos::Max(maxDeleteIndex)); // Find the indices of the valid particles in the invalid region - Kokkos::parallel_scan("Second scan in ParticleBase::destroy()", - Kokkos::RangePolicy(localNum_m - destroyNum, localNum_m), - KOKKOS_LAMBDA(const size_t i, int& idx, const bool final) - { - if (final && !invalid(i)) locKeepIndex(idx) = i; - if (!invalid(i)) idx += 1; - }); + Kokkos::parallel_scan( + "Second scan in ParticleBase::destroy()", + Kokkos::RangePolicy(localNum_m - destroyNum, localNum_m), + KOKKOS_LAMBDA(const size_t i, int& idx, const bool final) { + if (final && !invalid(i)) + locKeepIndex(idx) = i; + if (!invalid(i)) + idx += 1; + }); Kokkos::fence(); localNum_m -= destroyNum; // Partition the attributes into valid and invalid regions - for (attribute_iterator it = attributes_m.begin(); - it != attributes_m.end(); ++it) - { + for (attribute_iterator it = attributes_m.begin(); it != attributes_m.end(); ++it) { (*it)->destroy(deleteIndex_m, keepIndex_m, maxDeleteIndex + 1); } } template - void ParticleBase::serialize(detail::Archive& ar, size_type nsends) { + void ParticleBase::serialize(detail::Archive& ar, + size_type nsends) { using size_type = typename attribute_container_t::size_type; for (size_type i = 0; i < attributes_m.size(); ++i) { attributes_m[i]->serialize(ar, nsends); @@ -236,7 +232,8 @@ namespace ippl { } template - void ParticleBase::deserialize(detail::Archive& ar, size_type nrecvs) { + void ParticleBase::deserialize(detail::Archive& ar, + size_type nrecvs) { using size_type = typename attribute_container_t::size_type; for (size_type i = 0; i < attributes_m.size(); ++i) { attributes_m[i]->deserialize(ar, nrecvs); @@ -244,7 +241,8 @@ namespace ippl { } template - detail::size_type ParticleBase::packedSize(const size_type count) const { + detail::size_type ParticleBase::packedSize( + const size_type count) const { size_type total = 0; // Vector size type using vsize_t = typename attribute_container_t::size_type; @@ -256,9 +254,7 @@ namespace ippl { template template - void ParticleBase::pack(Buffer& buffer, - const hash_type& hash) - { + void ParticleBase::pack(Buffer& buffer, const hash_type& hash) { // Vector size type using vsize_t = typename attribute_container_t::size_type; for (vsize_t j = 0; j < attributes_m.size(); ++j) { @@ -268,8 +264,7 @@ namespace ippl { template template - void ParticleBase::unpack(Buffer& buffer, size_type nrecvs) - { + void ParticleBase::unpack(Buffer& buffer, size_type nrecvs) { // Vector size type using vsize_t = typename attribute_container_t::size_type; for (vsize_t j = 0; j < attributes_m.size(); ++j) { @@ -277,4 +272,4 @@ namespace ippl { } localNum_m += nrecvs; } -} +} // namespace ippl diff --git a/src/Particle/ParticleLayout.h b/src/Particle/ParticleLayout.h index c1e8ce37f..34809aa46 100644 --- a/src/Particle/ParticleLayout.h +++ b/src/Particle/ParticleLayout.h @@ -54,31 +54,25 @@ namespace ippl { namespace detail { // ParticleLayout class definition. Template parameters are the type // and dimension of the ParticlePos object used for the particles. - template + template class ParticleLayout { - public: - - typedef T value_type; - typedef std::int64_t index_type; - typedef Vector vector_type; - typedef ParticleAttrib particle_position_type; - typedef std::array bc_container_type; - + typedef T value_type; + typedef std::int64_t index_type; + typedef Vector vector_type; + typedef ParticleAttrib particle_position_type; + typedef std::array bc_container_type; static constexpr unsigned dim = Dim; public: - ParticleLayout() - { - bcs_m.fill(BC::NO); - }; + ParticleLayout() { bcs_m.fill(BC::NO); }; ~ParticleLayout() = default; - template + template void update(PBase&) { - //FIXME + // FIXME std::cout << "TODO" << std::endl; } @@ -86,18 +80,13 @@ namespace ippl { * Copy over the given boundary conditions. * @param bcs are the boundary conditions */ - void setParticleBC(bc_container_type bcs) { - bcs_m = bcs; - } + void setParticleBC(bc_container_type bcs) { bcs_m = bcs; } /*! * Use the same boundary condition on each face * @param bcs are the boundary conditions */ - void setParticleBC(BC bc) { - bcs_m.fill(bc); - } - + void setParticleBC(BC bc) { bcs_m.fill(bc); } /*! * Apply the given boundary conditions to the current particle positions. @@ -111,8 +100,8 @@ namespace ippl { //! the list of boundary conditions for this set of particles bc_container_type bcs_m; }; - } -} + } // namespace detail +} // namespace ippl #include "Particle/ParticleLayout.hpp" diff --git a/src/Particle/ParticleLayout.hpp b/src/Particle/ParticleLayout.hpp index 2232677d8..e8929e2f7 100644 --- a/src/Particle/ParticleLayout.hpp +++ b/src/Particle/ParticleLayout.hpp @@ -43,10 +43,9 @@ namespace ippl { namespace detail { - template + template void ParticleLayout::applyBC(const particle_position_type& R, - const NDRegion& nr) - { + const NDRegion& nr) { /* loop over all faces * 0: lower x-face * 1: upper x-face @@ -56,8 +55,8 @@ namespace ippl { * 5: upper z-face */ for (unsigned face = 0; face < 2 * Dim; ++face) { - //unsigned face = i % Dim; - unsigned d = face / 2; + // unsigned face = i % Dim; + unsigned d = face / 2; bool isUpper = face & 1; switch (bcs_m[face]) { case BC::PERIODIC: @@ -67,18 +66,15 @@ namespace ippl { if (isUpper) break; - Kokkos::parallel_for("Periodic BC", - R.getParticleCount(), + Kokkos::parallel_for("Periodic BC", R.getParticleCount(), PeriodicBC(R.getView(), nr, d, isUpper)); break; case BC::REFLECTIVE: - Kokkos::parallel_for("Reflective BC", - R.getParticleCount(), + Kokkos::parallel_for("Reflective BC", R.getParticleCount(), ReflectiveBC(R.getView(), nr, d, isUpper)); break; case BC::SINK: - Kokkos::parallel_for("Sink BC", - R.getParticleCount(), + Kokkos::parallel_for("Sink BC", R.getParticleCount(), SinkBC(R.getView(), nr, d, isUpper)); break; case BC::NO: @@ -88,5 +84,5 @@ namespace ippl { Kokkos::fence(); } } - } -} + } // namespace detail +} // namespace ippl diff --git a/src/Particle/ParticleSpatialLayout.h b/src/Particle/ParticleSpatialLayout.h index dceee4262..d1d12b62b 100644 --- a/src/Particle/ParticleSpatialLayout.h +++ b/src/Particle/ParticleSpatialLayout.h @@ -35,10 +35,9 @@ #ifndef IPPL_PARTICLE_SPATIAL_LAYOUT_H #define IPPL_PARTICLE_SPATIAL_LAYOUT_H - #include "FieldLayout/FieldLayout.h" -#include "Particle/ParticleLayout.h" #include "Particle/ParticleBase.h" +#include "Particle/ParticleLayout.h" #include "Types/IpplTypes.h" @@ -51,17 +50,14 @@ namespace ippl { * @tparam Dim dimension * @tparam Mesh type */ - template - > - class ParticleSpatialLayout : public detail::ParticleLayout - { + template > + class ParticleSpatialLayout : public detail::ParticleLayout { public: - using hash_type = typename ParticleBase >::hash_type; + using hash_type = typename ParticleBase>::hash_type; using locate_type = typename detail::ViewType::view_type; - using bool_type = typename detail::ViewType::view_type; + using bool_type = typename detail::ViewType::view_type; using RegionLayout_t = detail::RegionLayout; - using Mesh_t = UniformCartesian; + using Mesh_t = UniformCartesian; using size_type = detail::size_type; @@ -69,26 +65,26 @@ namespace ippl { // constructor: this one also takes a Mesh ParticleSpatialLayout(FieldLayout&, Mesh&); - ParticleSpatialLayout() : detail::ParticleLayout() { } + ParticleSpatialLayout() + : detail::ParticleLayout() {} ~ParticleSpatialLayout() = default; //~ParticleSpatialLayout() {} void updateLayout(FieldLayout&, Mesh&); - + template void update(BufferType& pdata, BufferType& buffer); const RegionLayout_t& getRegionLayout() const { return rlayout_m; } - + protected: //! The RegionLayout which determines where our particles go. RegionLayout_t rlayout_m; - + public: void locateParticles(const ParticleBase>& pdata, - locate_type& ranks, - bool_type& invalid) const; + locate_type& ranks, bool_type& invalid) const; /*! * @param rank we sent to @@ -102,9 +98,8 @@ namespace ippl { * @param ranks a container specifying where a particle at the i-th index should go. */ size_t numberOfSends(int rank, const locate_type& ranks); - }; -} +} // namespace ippl #include "Particle/ParticleSpatialLayout.hpp" diff --git a/src/Particle/ParticleSpatialLayout.hpp b/src/Particle/ParticleSpatialLayout.hpp index 599892903..fd34dbd55 100644 --- a/src/Particle/ParticleSpatialLayout.hpp +++ b/src/Particle/ParticleSpatialLayout.hpp @@ -32,31 +32,25 @@ // You should have received a copy of the GNU General Public License // along with IPPL. If not, see . // -#include -#include #include +#include +#include #include "Utility/IpplTimings.h" namespace ippl { template - ParticleSpatialLayout::ParticleSpatialLayout( - FieldLayout& fl, - Mesh& mesh) - : rlayout_m(fl, mesh) - {} + ParticleSpatialLayout::ParticleSpatialLayout(FieldLayout& fl, Mesh& mesh) + : rlayout_m(fl, mesh) {} template - void - ParticleSpatialLayout::updateLayout(FieldLayout& fl, Mesh& mesh) { + void ParticleSpatialLayout::updateLayout(FieldLayout& fl, Mesh& mesh) { rlayout_m.changeDomain(fl, mesh); } template template - void ParticleSpatialLayout::update( - BufferType& pdata, BufferType& buffer) - { + void ParticleSpatialLayout::update(BufferType& pdata, BufferType& buffer) { static IpplTimings::TimerRef ParticleBCTimer = IpplTimings::getTimer("particleBC"); IpplTimings::startTimer(ParticleBCTimer); this->applyBC(pdata.R, rlayout_m.getDomain()); @@ -103,8 +97,8 @@ namespace ippl { IpplTimings::startTimer(preprocTimer); MPI_Win win; std::vector nRecvs(nRanks, 0); - MPI_Win_create(nRecvs.data(), nRanks*sizeof(size_type), sizeof(size_type), - MPI_INFO_NULL, Ippl::getComm(), &win); + MPI_Win_create(nRecvs.data(), nRanks * sizeof(size_type), sizeof(size_type), MPI_INFO_NULL, + Ippl::getComm(), &win); std::vector nSends(nRanks, 0); @@ -116,8 +110,8 @@ namespace ippl { continue; } nSends[rank] = numberOfSends(rank, ranks); - MPI_Put(nSends.data() + rank, 1, MPI_LONG_LONG_INT, rank, Ippl::Comm->rank(), - 1, MPI_LONG_LONG_INT, win); + MPI_Put(nSends.data() + rank, 1, MPI_LONG_LONG_INT, rank, Ippl::Comm->rank(), 1, + MPI_LONG_LONG_INT, win); } MPI_Win_fence(0, win); MPI_Win_free(&win); @@ -145,8 +139,7 @@ namespace ippl { buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARTICLE_SEND + sends, bufSize); - Ippl::Comm->isend(rank, tag, buffer, *buf, - requests.back(), nSends[rank]); + Ippl::Comm->isend(rank, tag, buffer, *buf, requests.back(), nSends[rank]); buf->resetWritePos(); ++sends; @@ -159,16 +152,16 @@ namespace ippl { IpplTimings::startTimer(destroyTimer); size_type invalidCount = 0; - auto pIDs = pdata.ID.getView(); + auto pIDs = pdata.ID.getView(); Kokkos::parallel_reduce( - "set/count invalid", - localnum, + "set/count invalid", localnum, KOKKOS_LAMBDA(const size_t i, size_type& nInvalid) { if (invalid(i)) { pIDs(i) = -1; nInvalid += 1; } - }, invalidCount); + }, + invalidCount); Kokkos::fence(); pdata.destroy(invalid, invalidCount); @@ -182,7 +175,7 @@ namespace ippl { for (int rank = 0; rank < nRanks; ++rank) { if (nRecvs[rank] > 0) { size_type bufSize = pdata.packedSize(nRecvs[rank]); - buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARTICLE_RECV + recvs, bufSize); + buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARTICLE_RECV + recvs, bufSize); Ippl::Comm->recv(rank, tag, buffer, *buf, bufSize, nRecvs[rank]); buf->resetReadPos(); @@ -191,7 +184,6 @@ namespace ippl { ++recvs; } - } IpplTimings::stopTimer(recvTimer); @@ -205,49 +197,41 @@ namespace ippl { IpplTimings::stopTimer(ParticleUpdateTimer); } - template void ParticleSpatialLayout::locateParticles( - const ParticleBase>& pdata, - locate_type& ranks, - bool_type& invalid) const - { - auto& positions = pdata.R.getView(); + const ParticleBase>& pdata, locate_type& ranks, + bool_type& invalid) const { + auto& positions = pdata.R.getView(); typename RegionLayout_t::view_type Regions = rlayout_m.getdLocalRegions(); - using view_size_t = typename RegionLayout_t::view_type::size_type; - using mdrange_type = Kokkos::MDRangePolicy>; - int myRank = Ippl::Comm->rank(); + using view_size_t = typename RegionLayout_t::view_type::size_type; + using mdrange_type = Kokkos::MDRangePolicy>; + int myRank = Ippl::Comm->rank(); Kokkos::parallel_for( "ParticleSpatialLayout::locateParticles()", - mdrange_type({0, 0}, - {ranks.extent(0), Regions.extent(0)}), + mdrange_type({0, 0}, {ranks.extent(0), Regions.extent(0)}), KOKKOS_LAMBDA(const size_t i, const view_size_t j) { bool xyz_bool = false; - xyz_bool = ((positions(i)[0] >= Regions(j)[0].min()) && - (positions(i)[0] <= Regions(j)[0].max()) && - (positions(i)[1] >= Regions(j)[1].min()) && - (positions(i)[1] <= Regions(j)[1].max()) && - (positions(i)[2] >= Regions(j)[2].min()) && - (positions(i)[2] <= Regions(j)[2].max())); - if(xyz_bool){ - ranks(i) = j; + xyz_bool = ((positions(i)[0] >= Regions(j)[0].min()) + && (positions(i)[0] <= Regions(j)[0].max()) + && (positions(i)[1] >= Regions(j)[1].min()) + && (positions(i)[1] <= Regions(j)[1].max()) + && (positions(i)[2] >= Regions(j)[2].min()) + && (positions(i)[2] <= Regions(j)[2].max())); + if (xyz_bool) { + ranks(i) = j; invalid(i) = (myRank != ranks(i)); } - }); + }); Kokkos::fence(); } - template - void ParticleSpatialLayout::fillHash(int rank, - const locate_type& ranks, - hash_type& hash) - { + void ParticleSpatialLayout::fillHash(int rank, const locate_type& ranks, + hash_type& hash) { /* Compute the prefix sum and fill the hash */ Kokkos::parallel_scan( - "ParticleSpatialLayout::fillHash()", - ranks.extent(0), + "ParticleSpatialLayout::fillHash()", ranks.extent(0), KOKKOS_LAMBDA(const size_t i, int& idx, const bool final) { if (final) { if (rank == ranks(i)) { @@ -262,22 +246,14 @@ namespace ippl { Kokkos::fence(); } - template - size_t ParticleSpatialLayout::numberOfSends( - int rank, - const locate_type& ranks) - { + size_t ParticleSpatialLayout::numberOfSends(int rank, const locate_type& ranks) { size_t nSends = 0; Kokkos::parallel_reduce( - "ParticleSpatialLayout::numberOfSends()", - ranks.extent(0), - KOKKOS_LAMBDA(const size_t i, - size_t& num) - { - num += size_t(rank == ranks(i)); - }, nSends); + "ParticleSpatialLayout::numberOfSends()", ranks.extent(0), + KOKKOS_LAMBDA(const size_t i, size_t& num) { num += size_t(rank == ranks(i)); }, + nSends); Kokkos::fence(); return nSends; } -} +} // namespace ippl diff --git a/src/Partition/Partitioner.h b/src/Partition/Partitioner.h index a6e510a65..fa486af4c 100644 --- a/src/Partition/Partitioner.h +++ b/src/Partition/Partitioner.h @@ -24,20 +24,17 @@ namespace ippl { namespace detail { template - class Partitioner - { + class Partitioner { public: - Partitioner() = default; + Partitioner() = default; ~Partitioner() = default; template - void split(const NDIndex& domain, - view_type& view, - e_dim_tag* decomp, + void split(const NDIndex& domain, view_type& view, e_dim_tag* decomp, int nSplits) const; }; - } -} + } // namespace detail +} // namespace ippl #include "Partition/Partitioner.hpp" diff --git a/src/Partition/Partitioner.hpp b/src/Partition/Partitioner.hpp index f4c11108b..2fecf9247 100644 --- a/src/Partition/Partitioner.hpp +++ b/src/Partition/Partitioner.hpp @@ -25,11 +25,8 @@ namespace ippl { template template - void Partitioner::split(const NDIndex& domain, - view_type& view, - e_dim_tag* decomp, - int nSplits) const - { + void Partitioner::split(const NDIndex& domain, view_type& view, e_dim_tag* decomp, + int nSplits) const { using NDIndex_t = NDIndex; // Recursively split the domain until we have generated all the domains. @@ -39,11 +36,10 @@ namespace ippl { // Start with the whole domain. domains_c[0] = domain; int v; - unsigned int d=0; - - int v1,v2,rm,vtot,vl,vr; - double a,lmax,len; + unsigned int d = 0; + int v1, v2, rm, vtot, vl, vr; + double a, lmax, len; for (v = nSplits, rm = 0; v > 1; v /= 2) { rm += (v % 2); @@ -64,7 +60,7 @@ namespace ippl { int i, j; for (i = 0, j = 0; i < v; ++i, j += 2) { // Split to the left and to the right, saving both. - domains_c[i].split(copy_c[j], copy_c[j+1], d); + domains_c[i].split(copy_c[j], copy_c[j + 1], d); } // Copy back. std::copy(copy_c.begin(), copy_c.begin() + v * 2, domains_c.begin()); @@ -74,11 +70,10 @@ namespace ippl { d = 0; } - } else { - vtot = 1; // count the number of nSplits to make sure that it worked - // nSplits is not a power of 2 so we need to do some fancy splitting - // sorry... this would be much cleaner with recursion + vtot = 1; // count the number of nSplits to make sure that it worked + // nSplits is not a power of 2 so we need to do some fancy splitting + // sorry... this would be much cleaner with recursion /* The way this works is to recursively split on the longest dimension. Suppose you request 11 nSplits. It will split the longest dimension @@ -97,7 +92,7 @@ namespace ippl { vr = nSplits; while (v1 > 1) { - if ((v1 % 2) ==1) { + if ((v1 % 2) == 1) { vl = vl + (vr - vl) / 2; } else { vr = vl + (vr - vl) / 2; @@ -110,19 +105,19 @@ namespace ippl { if (v2 > vl) { a = v2 - vl; a /= vr - vl; - vr = v2; + vr = v2; leftDomain = domains_c[vl]; - lmax=0; - d = std::numeric_limits::max(); - for (unsigned int dd=0;dd::max(); + for (unsigned int dd = 0; dd < Dim; ++dd) { + if (decomp[dd] == PARALLEL) { if ((len = leftDomain[dd].length()) > lmax) { lmax = len; - d = dd; + d = dd; } } } - domains_c[vl].split( domains_c[vl] , domains_c[vr] , d , a); + domains_c[vl].split(domains_c[vl], domains_c[vr], d, a); ++vtot; } } @@ -137,5 +132,5 @@ namespace ippl { view(i) = domains_c[i]; } } - } -} + } // namespace detail +} // namespace ippl diff --git a/src/Region/NDRegion.h b/src/Region/NDRegion.h index b5d0b5c86..c8f598d17 100644 --- a/src/Region/NDRegion.h +++ b/src/Region/NDRegion.h @@ -30,17 +30,16 @@ namespace ippl { * @tparam T data type * @tparam Dim number of PRegions */ - class NDRegion - { + class NDRegion { public: /*! * Create an empty NDregion */ KOKKOS_FUNCTION - NDRegion() { } + NDRegion() {} KOKKOS_FUNCTION - ~NDRegion() { } + ~NDRegion() {} /*! * Create a NDregion from PRegions @@ -50,35 +49,25 @@ namespace ippl { * https://stackoverflow.com/questions/16478089/converting-variadic-template-pack-into-stdinitializer-list */ template - KOKKOS_FUNCTION - NDRegion(const Args&... args); + KOKKOS_FUNCTION NDRegion(const Args&... args); - KOKKOS_INLINE_FUNCTION - NDRegion(const NDRegion& nr); + KOKKOS_INLINE_FUNCTION NDRegion(const NDRegion& nr); - KOKKOS_INLINE_FUNCTION - NDRegion& operator=(const NDRegion& nr); + KOKKOS_INLINE_FUNCTION NDRegion& operator=(const NDRegion& nr); - KOKKOS_INLINE_FUNCTION - const PRegion& operator[](unsigned d) const; + KOKKOS_INLINE_FUNCTION const PRegion& operator[](unsigned d) const; - KOKKOS_INLINE_FUNCTION - PRegion& operator[](unsigned d); + KOKKOS_INLINE_FUNCTION PRegion& operator[](unsigned d); - KOKKOS_INLINE_FUNCTION - NDRegion& operator+=(const T t); + KOKKOS_INLINE_FUNCTION NDRegion& operator+=(const T t); - KOKKOS_INLINE_FUNCTION - NDRegion& operator-=(const T t); + KOKKOS_INLINE_FUNCTION NDRegion& operator-=(const T t); - KOKKOS_INLINE_FUNCTION - NDRegion& operator*=(const T t); + KOKKOS_INLINE_FUNCTION NDRegion& operator*=(const T t); - KOKKOS_INLINE_FUNCTION - NDRegion& operator/=(const T t); + KOKKOS_INLINE_FUNCTION NDRegion& operator/=(const T t); - KOKKOS_INLINE_FUNCTION - bool empty() const; + KOKKOS_INLINE_FUNCTION bool empty() const; private: KOKKOS_FUNCTION @@ -87,7 +76,7 @@ namespace ippl { //! Array of PRegions PRegion regions_m[Dim]; }; -} +} // namespace ippl #include "Region/NDRegion.hpp" diff --git a/src/Region/NDRegion.hpp b/src/Region/NDRegion.hpp index ec25c00f9..dfe658a6d 100644 --- a/src/Region/NDRegion.hpp +++ b/src/Region/NDRegion.hpp @@ -21,18 +21,13 @@ namespace ippl { template template - KOKKOS_FUNCTION - NDRegion::NDRegion(const Args&... args) - : NDRegion({args...}) - { - static_assert(Dim == sizeof...(args), - "Wrong number of arguments."); + KOKKOS_FUNCTION NDRegion::NDRegion(const Args&... args) + : NDRegion({args...}) { + static_assert(Dim == sizeof...(args), "Wrong number of arguments."); } - template - KOKKOS_FUNCTION - NDRegion::NDRegion(std::initializer_list> regions) { + KOKKOS_FUNCTION NDRegion::NDRegion(std::initializer_list> regions) { unsigned int i = 0; for (auto& r : regions) { regions_m[i] = r; @@ -40,73 +35,58 @@ namespace ippl { } } - template - KOKKOS_INLINE_FUNCTION - NDRegion::NDRegion(const NDRegion& nr) { + KOKKOS_INLINE_FUNCTION NDRegion::NDRegion(const NDRegion& nr) { for (unsigned int i = 0; i < Dim; i++) { regions_m[i] = nr.regions_m[i]; } } - template - KOKKOS_INLINE_FUNCTION - NDRegion& NDRegion::operator=(const NDRegion& nr) { + KOKKOS_INLINE_FUNCTION NDRegion& NDRegion::operator=( + const NDRegion& nr) { for (unsigned int i = 0; i < Dim; i++) { regions_m[i] = nr.regions_m[i]; } return *this; } - template - KOKKOS_INLINE_FUNCTION - const PRegion& NDRegion::operator[](unsigned d) const { + KOKKOS_INLINE_FUNCTION const PRegion& NDRegion::operator[](unsigned d) const { return regions_m[d]; } - template - KOKKOS_INLINE_FUNCTION - PRegion& NDRegion::operator[](unsigned d) { + KOKKOS_INLINE_FUNCTION PRegion& NDRegion::operator[](unsigned d) { return regions_m[d]; } - template - KOKKOS_INLINE_FUNCTION - NDRegion& NDRegion::operator+=(const T t) { + KOKKOS_INLINE_FUNCTION NDRegion& NDRegion::operator+=(const T t) { for (unsigned int i = 0; i < Dim; i++) { regions_m[i] += t; } return *this; } - template - KOKKOS_INLINE_FUNCTION - NDRegion& NDRegion::operator-=(const T t) { + KOKKOS_INLINE_FUNCTION NDRegion& NDRegion::operator-=(const T t) { for (unsigned int i = 0; i < Dim; i++) { regions_m[i] -= t; } return *this; } - template - KOKKOS_INLINE_FUNCTION - NDRegion& NDRegion::operator*=(const T t) { + KOKKOS_INLINE_FUNCTION NDRegion& NDRegion::operator*=(const T t) { for (unsigned int i = 0; i < Dim; i++) { regions_m[i] *= t; } return *this; } - template - KOKKOS_INLINE_FUNCTION - NDRegion& NDRegion::operator/=(const T t) { + KOKKOS_INLINE_FUNCTION NDRegion& NDRegion::operator/=(const T t) { if (t != 0) { for (unsigned int i = 0; i < Dim; i++) { regions_m[i] /= t; @@ -115,10 +95,8 @@ namespace ippl { return *this; } - template - KOKKOS_INLINE_FUNCTION - bool NDRegion::empty() const { + KOKKOS_INLINE_FUNCTION bool NDRegion::empty() const { bool isEmpty = true; for (unsigned int i = 0; i < Dim; i++) { isEmpty &= regions_m[i].empty(); @@ -126,13 +104,12 @@ namespace ippl { return isEmpty; } - template inline std::ostream& operator<<(std::ostream& out, const NDRegion& idx) { out << '{'; for (unsigned d = 0; d < Dim; ++d) { - out << idx[d] << ((d == Dim - 1) ? '}' : ','); + out << idx[d] << ((d == Dim - 1) ? '}' : ','); } return out; } -} \ No newline at end of file +} // namespace ippl \ No newline at end of file diff --git a/src/Region/PRegion.h b/src/Region/PRegion.h index 72490ff89..e6ae7d8b6 100644 --- a/src/Region/PRegion.h +++ b/src/Region/PRegion.h @@ -36,8 +36,7 @@ namespace ippl { * @tparam T type of interval */ template - class PRegion - { + class PRegion { public: /*! * Default region [0, 1[ @@ -59,48 +58,39 @@ namespace ippl { KOKKOS_DEFAULTED_FUNCTION ~PRegion() = default; - + KOKKOS_FUNCTION PRegion(const PRegion&); - KOKKOS_INLINE_FUNCTION - PRegion& operator=(const PRegion& rhs); - + KOKKOS_INLINE_FUNCTION PRegion& operator=(const PRegion& rhs); + /*! * @returns the lower bound */ - KOKKOS_INLINE_FUNCTION - T min() const noexcept; + KOKKOS_INLINE_FUNCTION T min() const noexcept; /*! * @returns the upper bound */ - KOKKOS_INLINE_FUNCTION - T max() const noexcept; + KOKKOS_INLINE_FUNCTION T max() const noexcept; /*! * @returns the length of the region */ - KOKKOS_INLINE_FUNCTION - T length() const noexcept; + KOKKOS_INLINE_FUNCTION T length() const noexcept; /*! * @returns true if empty */ - KOKKOS_INLINE_FUNCTION - bool empty() const noexcept; - - KOKKOS_INLINE_FUNCTION - PRegion& operator+=(T t) noexcept; + KOKKOS_INLINE_FUNCTION bool empty() const noexcept; + + KOKKOS_INLINE_FUNCTION PRegion& operator+=(T t) noexcept; + + KOKKOS_INLINE_FUNCTION PRegion& operator-=(T t) noexcept; - KOKKOS_INLINE_FUNCTION - PRegion& operator-=(T t) noexcept; + KOKKOS_INLINE_FUNCTION PRegion& operator*=(T t) noexcept; - KOKKOS_INLINE_FUNCTION - PRegion& operator*=(T t) noexcept; - - KOKKOS_INLINE_FUNCTION - PRegion& operator/=(T t) noexcept; + KOKKOS_INLINE_FUNCTION PRegion& operator/=(T t) noexcept; private: //! Interval start point @@ -109,7 +99,7 @@ namespace ippl { //! Interval end point T b_m; }; -} +} // namespace ippl #include "PRegion.hpp" diff --git a/src/Region/PRegion.hpp b/src/Region/PRegion.hpp index 655cfc1de..835bc9e4c 100644 --- a/src/Region/PRegion.hpp +++ b/src/Region/PRegion.hpp @@ -35,118 +35,88 @@ namespace ippl { template - KOKKOS_FUNCTION - PRegion::PRegion() - : PRegion(0, 1) - { } - + KOKKOS_FUNCTION PRegion::PRegion() + : PRegion(0, 1) {} template - KOKKOS_FUNCTION - PRegion::PRegion(T b) - : PRegion(0, b) - { } - + KOKKOS_FUNCTION PRegion::PRegion(T b) + : PRegion(0, b) {} template - KOKKOS_FUNCTION - PRegion::PRegion(T a, T b) - : a_m(a) - , b_m(b) - { + KOKKOS_FUNCTION PRegion::PRegion(T a, T b) + : a_m(a) + , b_m(b) { PAssert(a_m < b_m); } - template - KOKKOS_FUNCTION - PRegion::PRegion(const PRegion& pregion) { + KOKKOS_FUNCTION PRegion::PRegion(const PRegion& pregion) { a_m = pregion.a_m; b_m = pregion.b_m; } - template - KOKKOS_INLINE_FUNCTION - PRegion& PRegion::operator=(const PRegion& pregion) { + KOKKOS_INLINE_FUNCTION PRegion& PRegion::operator=(const PRegion& pregion) { a_m = pregion.a_m; b_m = pregion.b_m; return *this; } - template - KOKKOS_INLINE_FUNCTION - T PRegion::min() const noexcept { + KOKKOS_INLINE_FUNCTION T PRegion::min() const noexcept { return a_m; } - template - KOKKOS_INLINE_FUNCTION - T PRegion::max() const noexcept { + KOKKOS_INLINE_FUNCTION T PRegion::max() const noexcept { return b_m; } - template - KOKKOS_INLINE_FUNCTION - T PRegion::length() const noexcept { + KOKKOS_INLINE_FUNCTION T PRegion::length() const noexcept { return b_m - a_m; } - template - KOKKOS_INLINE_FUNCTION - bool PRegion::empty() const noexcept { + KOKKOS_INLINE_FUNCTION bool PRegion::empty() const noexcept { return (a_m == b_m); } - template - KOKKOS_INLINE_FUNCTION - PRegion& PRegion::operator+=(T t) noexcept { + KOKKOS_INLINE_FUNCTION PRegion& PRegion::operator+=(T t) noexcept { a_m += t; b_m += t; return *this; } - template - KOKKOS_INLINE_FUNCTION - PRegion& PRegion::operator-=(T t) noexcept { + KOKKOS_INLINE_FUNCTION PRegion& PRegion::operator-=(T t) noexcept { a_m -= t; b_m -= t; return *this; } - template - KOKKOS_INLINE_FUNCTION - PRegion& PRegion::operator*=(T t) noexcept { + KOKKOS_INLINE_FUNCTION PRegion& PRegion::operator*=(T t) noexcept { a_m *= t; b_m *= t; return *this; } - template - KOKKOS_INLINE_FUNCTION - PRegion& PRegion::operator/=(T t) noexcept { + KOKKOS_INLINE_FUNCTION PRegion& PRegion::operator/=(T t) noexcept { if (t != 0) { - a_m /= t; - b_m /= t; + a_m /= t; + b_m /= t; } return *this; } - template - inline - std::ostream& operator<<(std::ostream& out, const PRegion& r) { + inline std::ostream& operator<<(std::ostream& out, const PRegion& r) { out << '[' << r.min(); out << ',' << r.max(); out << ')'; return out; } -} \ No newline at end of file +} // namespace ippl \ No newline at end of file diff --git a/src/Region/RegionLayout.h b/src/Region/RegionLayout.h index 5e1f3ec20..a8ab193d4 100644 --- a/src/Region/RegionLayout.h +++ b/src/Region/RegionLayout.h @@ -43,18 +43,17 @@ namespace ippl { namespace detail { - template class RegionLayout; + template + class RegionLayout; template std::ostream& operator<<(std::ostream&, const RegionLayout&); - template */> - class RegionLayout - { + template */> + class RegionLayout { public: - using NDRegion_t = NDRegion; - using view_type = typename ViewType::view_type; - using host_mirror_type = typename view_type::host_mirror_type; - + using NDRegion_t = NDRegion; + using view_type = typename ViewType::view_type; + using host_mirror_type = typename view_type::host_mirror_type; // Default constructor. To make this class actually work, the user // will have to later call 'changeDomain' to set the proper Domain @@ -76,7 +75,7 @@ namespace ippl { void write(std::ostream& = std::cout) const; - void changeDomain(const FieldLayout&, const Mesh& mesh); // previously private... + void changeDomain(const FieldLayout&, const Mesh& mesh); // previously private... private: NDRegion_t convertNDIndex(const NDIndex&, const Mesh& mesh) const; @@ -99,10 +98,8 @@ namespace ippl { view_type subdomains_m; }; - - - } -} + } // namespace detail +} // namespace ippl #include "Region/RegionLayout.hpp" diff --git a/src/Region/RegionLayout.hpp b/src/Region/RegionLayout.hpp index d4bb73e84..b11cdfa61 100644 --- a/src/Region/RegionLayout.hpp +++ b/src/Region/RegionLayout.hpp @@ -35,31 +35,24 @@ namespace ippl { namespace detail { template RegionLayout::RegionLayout() - : dLocalRegions_m("local regions (device)", 0) - , hLocalRegions_m(Kokkos::create_mirror_view(dLocalRegions_m)) - { + : dLocalRegions_m("local regions (device)", 0) + , hLocalRegions_m(Kokkos::create_mirror_view(dLocalRegions_m)) { indexOffset_m.fill(0); centerOffset_m.fill(0); } - template - RegionLayout::RegionLayout(const FieldLayout& fl, - const Mesh& mesh) - : RegionLayout() - { + RegionLayout::RegionLayout(const FieldLayout& fl, const Mesh& mesh) + : RegionLayout() { changeDomain(fl, mesh); } - - template void RegionLayout::changeDomain(const FieldLayout& fl, - const Mesh& mesh) - { + const Mesh& mesh) { // set our index space offset for (unsigned int d = 0; d < Dim; ++d) { - indexOffset_m[d] = fl.getDomain()[d].first(); + indexOffset_m[d] = fl.getDomain()[d].first(); centerOffset_m[d] = 1; } @@ -68,29 +61,26 @@ namespace ippl { fillRegions(fl, mesh); } - // convert a given NDIndex into an NDRegion ... if this object was // constructed from a FieldLayout, this does nothing, but if we are maintaining // our own internal FieldLayout, we must convert from the [0,N-1] index // space to our own continuous NDRegion space. // NOTE: THIS ASSUMES THAT REGION'S HAVE first() < last() !! template - typename RegionLayout::NDRegion_t - RegionLayout::convertNDIndex(const NDIndex& ni, - const Mesh& mesh) const - { + typename RegionLayout::NDRegion_t RegionLayout::convertNDIndex( + const NDIndex& ni, const Mesh& mesh) const { // find first and last points in NDIndex and get coordinates from mesh NDIndex firstPoint, lastPoint; for (unsigned int d = 0; d < Dim; d++) { - int first = ni[d].first() - indexOffset_m[d]; - int last = ni[d].last() - indexOffset_m[d] + centerOffset_m[d]; + int first = ni[d].first() - indexOffset_m[d]; + int last = ni[d].last() - indexOffset_m[d] + centerOffset_m[d]; firstPoint[d] = Index(first, first); - lastPoint[d] = Index(last, last); + lastPoint[d] = Index(last, last); } // convert to mesh space Vector firstCoord = mesh.getVertexPosition(firstPoint); - Vector lastCoord = mesh.getVertexPosition(lastPoint); + Vector lastCoord = mesh.getVertexPosition(lastPoint); NDRegion_t ndregion; for (unsigned int d = 0; d < Dim; d++) { ndregion[d] = PRegion(firstCoord(d), lastCoord(d)); @@ -98,12 +88,9 @@ namespace ippl { return ndregion; } - template - void RegionLayout::fillRegions(const FieldLayout& fl, - const Mesh& mesh) - { - using domain_type = typename FieldLayout::host_mirror_type; + void RegionLayout::fillRegions(const FieldLayout& fl, const Mesh& mesh) { + using domain_type = typename FieldLayout::host_mirror_type; const domain_type& ldomains = fl.getHostLocalDomains(); Kokkos::resize(hLocalRegions_m, ldomains.size()); @@ -117,10 +104,8 @@ namespace ippl { Kokkos::deep_copy(dLocalRegions_m, hLocalRegions_m); } - template - void RegionLayout::write(std::ostream& out) const - { + void RegionLayout::write(std::ostream& out) const { if (Ippl::Comm->rank() > 0) { return; } @@ -135,25 +120,21 @@ namespace ippl { } template - const typename RegionLayout::view_type - RegionLayout::getdLocalRegions() const - { + const typename RegionLayout::view_type + RegionLayout::getdLocalRegions() const { return dLocalRegions_m; } - template - const typename RegionLayout::host_mirror_type - RegionLayout::gethLocalRegions() const - { + const typename RegionLayout::host_mirror_type + RegionLayout::gethLocalRegions() const { return hLocalRegions_m; } template - std::ostream& operator<<(std::ostream& out, const RegionLayout& rl) - { + std::ostream& operator<<(std::ostream& out, const RegionLayout& rl) { rl.write(out); return out; } - } -} + } // namespace detail +} // namespace ippl diff --git a/src/Solver/Electrostatics.h b/src/Solver/Electrostatics.h index 7628532b2..d2ea3aa12 100644 --- a/src/Solver/Electrostatics.h +++ b/src/Solver/Electrostatics.h @@ -23,24 +23,22 @@ namespace ippl { - template , - class C=typename M::DefaultCentering> - class Electrostatics : public Solver - { + template , + class C = typename M::DefaultCentering> + class Electrostatics : public Solver { public: using grad_type = Field, Dim, M, C>; - using lhs_type = typename Solver::lhs_type; - using rhs_type = typename Solver::rhs_type; + using lhs_type = typename Solver::lhs_type; + using rhs_type = typename Solver::rhs_type; /*! * Represents the types of fields that should * be output by the solver */ enum OutputType { - SOL = 0b01, - GRAD = 0b10, - SOL_AND_GRAD = 0b11 + SOL = 0b01, + GRAD = 0b10, + SOL_AND_GRAD = 0b11 }; /*! @@ -49,15 +47,13 @@ namespace ippl { */ Electrostatics() : Solver() - , grad_mp(nullptr) - { + , grad_mp(nullptr) { setDefaultParameters(); } Electrostatics(lhs_type& lhs, rhs_type& rhs) : Solver(lhs, rhs) - , grad_mp(nullptr) - { + , grad_mp(nullptr) { setDefaultParameters(); } @@ -74,15 +70,13 @@ namespace ippl { */ virtual void solve() = 0; - virtual ~Electrostatics() { } + virtual ~Electrostatics() {} protected: grad_type* grad_mp; - virtual void setDefaultParameters() override { - this->params_m.add("output_type", SOL); - } + virtual void setDefaultParameters() override { this->params_m.add("output_type", SOL); } }; -} +} // namespace ippl #endif diff --git a/src/Solver/ElectrostaticsCG.h b/src/Solver/ElectrostaticsCG.h index 9fb642b30..cf26ef921 100644 --- a/src/Solver/ElectrostaticsCG.h +++ b/src/Solver/ElectrostaticsCG.h @@ -24,35 +24,31 @@ namespace ippl { - // Expands to a lambda that acts as a wrapper for a differential operator - // fun: the function for which to create the wrapper, such as ippl::laplace - // type: the argument type, which should match the LHS type for the solver - #define IPPL_SOLVER_OPERATOR_WRAPPER(fun, type) \ - [] (type arg) { \ - return fun(arg); \ +// Expands to a lambda that acts as a wrapper for a differential operator +// fun: the function for which to create the wrapper, such as ippl::laplace +// type: the argument type, which should match the LHS type for the solver +#define IPPL_SOLVER_OPERATOR_WRAPPER(fun, type) \ + [](type arg) { \ + return fun(arg); \ } - template , - class C=typename M::DefaultCentering> - class ElectrostaticsCG : public Electrostatics - { + template , + class C = typename M::DefaultCentering> + class ElectrostaticsCG : public Electrostatics { public: using lhs_type = typename Solver::lhs_type; using rhs_type = typename Solver::rhs_type; - using OpRet = UnaryMinus>; - using algo = PCG; - using Base = Electrostatics; + using OpRet = UnaryMinus>; + using algo = PCG; + using Base = Electrostatics; ElectrostaticsCG() - : Base() - { + : Base() { setDefaultParameters(); } ElectrostaticsCG(lhs_type& lhs, rhs_type& rhs) - : Base(lhs, rhs) - { + : Base(lhs, rhs) { setDefaultParameters(); } @@ -71,9 +67,7 @@ namespace ippl { * the last time this solver was used * @return Iteration count of last solve */ - int getIterationCount() { - return algo_m.getIterationCount(); - } + int getIterationCount() { return algo_m.getIterationCount(); } protected: algo algo_m = algo(); @@ -84,6 +78,6 @@ namespace ippl { } }; -} +} // namespace ippl #endif diff --git a/src/Solver/FFTPeriodicPoissonSolver.h b/src/Solver/FFTPeriodicPoissonSolver.h index 85b007329..748021ce0 100644 --- a/src/Solver/FFTPeriodicPoissonSolver.h +++ b/src/Solver/FFTPeriodicPoissonSolver.h @@ -20,41 +20,36 @@ #ifndef IPPL_FFT_PERIODIC_POISSON_SOLVER_H #define IPPL_FFT_PERIODIC_POISSON_SOLVER_H +#include "Electrostatics.h" +#include "FFT/FFT.h" #include "FieldLayout/FieldLayout.h" #include "Index/NDIndex.h" #include "Types/ViewTypes.h" -#include "FFT/FFT.h" -#include "Electrostatics.h" namespace ippl { - template , - class C=typename M::DefaultCentering> - class FFTPeriodicPoissonSolver : public Electrostatics - { + template , + class C = typename M::DefaultCentering> + class FFTPeriodicPoissonSolver : public Electrostatics { public: - using Field_t = Field; - using FFT_t = FFT; + using Field_t = Field; + using FFT_t = FFT; using Complex_t = Kokkos::complex; using CxField_t = Field; - using Layout_t = FieldLayout; - using Vector_t = Vector; + using Layout_t = FieldLayout; + using Vector_t = Vector; - using Base = Electrostatics; + using Base = Electrostatics; using lhs_type = typename Solver::lhs_type; using rhs_type = typename Solver::rhs_type; FFTPeriodicPoissonSolver() - : Base() - { + : Base() { setDefaultParameters(); } - FFTPeriodicPoissonSolver(lhs_type& lhs, - rhs_type& rhs) - : Base(lhs, rhs) - { + FFTPeriodicPoissonSolver(lhs_type& lhs, rhs_type& rhs) + : Base(lhs, rhs) { setDefaultParameters(); } @@ -65,7 +60,6 @@ namespace ippl { void solve() override; private: - void initialize(); std::shared_ptr fft_mp; @@ -75,37 +69,34 @@ namespace ippl { std::shared_ptr layoutComplex_mp; protected: - virtual void setDefaultParameters() override { - using heffteBackend = typename FFT_t::heffteBackend; + using heffteBackend = typename FFT_t::heffteBackend; heffte::plan_options opts = heffte::default_options(); this->params_m.add("use_pencils", opts.use_pencils); this->params_m.add("use_reorder", opts.use_reorder); this->params_m.add("use_gpu_aware", opts.use_gpu_aware); this->params_m.add("r2c_direction", 0); - + switch (opts.algorithm) { - case heffte::reshape_algorithm::alltoall : + case heffte::reshape_algorithm::alltoall: this->params_m.add("comm", a2a); break; - case heffte::reshape_algorithm::alltoallv : + case heffte::reshape_algorithm::alltoallv: this->params_m.add("comm", a2av); break; - case heffte::reshape_algorithm::p2p : + case heffte::reshape_algorithm::p2p: this->params_m.add("comm", p2p); break; - case heffte::reshape_algorithm::p2p_plined : + case heffte::reshape_algorithm::p2p_plined: this->params_m.add("comm", p2p_pl); break; default: throw IpplException("FFTPeriodicPoissonSolver::setDefaultParameters", - "Unrecognized heffte communication type"); + "Unrecognized heffte communication type"); } - } - }; -} +} // namespace ippl #include "Solver/FFTPeriodicPoissonSolver.hpp" #endif diff --git a/src/Solver/FFTPeriodicPoissonSolver.hpp b/src/Solver/FFTPeriodicPoissonSolver.hpp index 015400e9a..09df65e92 100644 --- a/src/Solver/FFTPeriodicPoissonSolver.hpp +++ b/src/Solver/FFTPeriodicPoissonSolver.hpp @@ -20,133 +20,75 @@ namespace ippl { template - void FFTPeriodicPoissonSolver::setRhs(rhs_type& rhs) { + void FFTPeriodicPoissonSolver::setRhs(rhs_type& rhs) { Base::setRhs(rhs); initialize(); } template - void FFTPeriodicPoissonSolver::initialize() { + void FFTPeriodicPoissonSolver::initialize() { const Layout_t& layout_r = this->rhs_mp->getLayout(); - domain_m = layout_r.getDomain(); + domain_m = layout_r.getDomain(); e_dim_tag decomp[Dim]; NDIndex domainComplex; - for(unsigned d = 0; d < Dim; ++d) { + for (unsigned d = 0; d < Dim; ++d) { decomp[d] = layout_r.getRequestedDistribution(d); - if(this->params_m.template get("r2c_direction") == (int)d) - domainComplex[d] = Index(domain_m[d].length()/2 + 1); + if (this->params_m.template get("r2c_direction") == (int)d) + domainComplex[d] = Index(domain_m[d].length() / 2 + 1); else domainComplex[d] = Index(domain_m[d].length()); } layoutComplex_mp = std::make_shared(domainComplex, decomp); - Vector hComplex = {1.0, 1.0, 1.0}; + Vector hComplex = {1.0, 1.0, 1.0}; Vector originComplex = {0.0, 0.0, 0.0}; M meshComplex(domainComplex, hComplex, originComplex); fieldComplex_m.initialize(meshComplex, *layoutComplex_mp); - if(this->params_m.template get("output_type") == Base::GRAD) + if (this->params_m.template get("output_type") == Base::GRAD) tempFieldComplex_m.initialize(meshComplex, *layoutComplex_mp); - fft_mp = std::make_shared(layout_r, *layoutComplex_mp, - this->params_m); + fft_mp = std::make_shared(layout_r, *layoutComplex_mp, this->params_m); } template - void FFTPeriodicPoissonSolver::solve() { - + void FFTPeriodicPoissonSolver::solve() { fft_mp->transform(1, *this->rhs_mp, fieldComplex_m); - auto view = fieldComplex_m.getView(); - const int nghost = fieldComplex_m.getNghost(); + auto view = fieldComplex_m.getView(); + const int nghost = fieldComplex_m.getNghost(); using mdrange_type = Kokkos::MDRangePolicy>; - - double pi = std::acos(-1.0); - const M& mesh = this->rhs_mp->get_mesh(); - const auto& lDomComplex = layoutComplex_mp->getLocalNDIndex(); - using vector_type = typename M::vector_type; + double pi = std::acos(-1.0); + const M& mesh = this->rhs_mp->get_mesh(); + const auto& lDomComplex = layoutComplex_mp->getLocalNDIndex(); + using vector_type = typename M::vector_type; const vector_type& origin = mesh.getOrigin(); - const vector_type& hx = mesh.getMeshSpacing(); + const vector_type& hx = mesh.getMeshSpacing(); vector_type rmax; Vector N; for (size_t d = 0; d < Dim; ++d) { - N[d] = domain_m[d].length(); + N[d] = domain_m[d].length(); rmax[d] = origin[d] + (N[d] * hx[d]); } - //Based on output_type calculate either solution - //or gradient + // Based on output_type calculate either solution + // or gradient switch (this->params_m.template get("output_type")) { - case Base::SOL: { - - Kokkos::parallel_for("Solution FFTPeriodicPoissonSolver", - mdrange_type({nghost, nghost, nghost}, - {view.extent(0) - nghost, - view.extent(1) - nghost, - view.extent(2) - nghost}), - KOKKOS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - - const int ig = i + lDomComplex[0].first() - nghost; - const int jg = j + lDomComplex[1].first() - nghost; - const int kg = k + lDomComplex[2].first() - nghost; - - Vector iVec = {ig, jg, kg}; - Vector_t kVec; - - for(size_t d = 0; d < Dim; ++d) { - const double Len = rmax[d] - origin[d]; - bool shift = (iVec[d] > (N[d]/2)); - kVec[d] = 2 * pi / Len * (iVec[d] - shift * N[d]); - } - - double Dr = kVec[0] * kVec[0] + - kVec[1] * kVec[1] + kVec[2] * kVec[2]; - - //It would be great if we can remove this conditional - if(Dr != 0.0) - view(i, j, k) *= 1 / Dr; - else - view(i, j, k) = 0.0; - }); - - fft_mp->transform(-1, *this->rhs_mp, fieldComplex_m); - - break; - } - case Base::GRAD:{ - //Compute gradient in Fourier space and then - //take inverse FFT. - - Kokkos::complex imag = {0.0, 1.0}; - auto tempview = tempFieldComplex_m.getView(); - auto viewRhs = this->rhs_mp->getView(); - auto viewLhs = this->lhs_mp->getView(); - const int nghostL = this->lhs_mp->getNghost(); - - for(size_t gd = 0; gd < Dim; ++gd) { - - Kokkos::parallel_for("Gradient FFTPeriodicPoissonSolver", - mdrange_type({nghost, nghost, nghost}, - {view.extent(0) - nghost, - view.extent(1) - nghost, - view.extent(2) - nghost}), - KOKKOS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - + Kokkos::parallel_for( + "Solution FFTPeriodicPoissonSolver", + mdrange_type({nghost, nghost, nghost}, + {view.extent(0) - nghost, view.extent(1) - nghost, + view.extent(2) - nghost}), + KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k) { const int ig = i + lDomComplex[0].first() - nghost; const int jg = j + lDomComplex[1].first() - nghost; const int kg = k + lDomComplex[2].first() - nghost; @@ -154,48 +96,84 @@ namespace ippl { Vector iVec = {ig, jg, kg}; Vector_t kVec; - for(size_t d = 0; d < Dim; ++d) { + for (size_t d = 0; d < Dim; ++d) { const double Len = rmax[d] - origin[d]; - bool shift = (iVec[d] > (N[d]/2)); - bool notMid = (iVec[d] != (N[d]/2)); - kVec[d] = notMid * 2 * pi / Len * (iVec[d] - shift * N[d]); + bool shift = (iVec[d] > (N[d] / 2)); + kVec[d] = 2 * pi / Len * (iVec[d] - shift * N[d]); } - double Dr = kVec[0] * kVec[0] + - kVec[1] * kVec[1] + kVec[2] * kVec[2]; + double Dr = kVec[0] * kVec[0] + kVec[1] * kVec[1] + kVec[2] * kVec[2]; - tempview(i, j, k) = view(i, j, k); - - //It would be great if we can remove this conditional - if(Dr != 0.0) - tempview(i, j, k) *= -(imag * kVec[gd] / Dr); + // It would be great if we can remove this conditional + if (Dr != 0.0) + view(i, j, k) *= 1 / Dr; else - tempview(i, j, k) = 0.0; + view(i, j, k) = 0.0; }); + fft_mp->transform(-1, *this->rhs_mp, fieldComplex_m); + + break; + } + case Base::GRAD: { + // Compute gradient in Fourier space and then + // take inverse FFT. + + Kokkos::complex imag = {0.0, 1.0}; + auto tempview = tempFieldComplex_m.getView(); + auto viewRhs = this->rhs_mp->getView(); + auto viewLhs = this->lhs_mp->getView(); + const int nghostL = this->lhs_mp->getNghost(); + + for (size_t gd = 0; gd < Dim; ++gd) { + Kokkos::parallel_for( + "Gradient FFTPeriodicPoissonSolver", + mdrange_type({nghost, nghost, nghost}, + {view.extent(0) - nghost, view.extent(1) - nghost, + view.extent(2) - nghost}), + KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k) { + const int ig = i + lDomComplex[0].first() - nghost; + const int jg = j + lDomComplex[1].first() - nghost; + const int kg = k + lDomComplex[2].first() - nghost; + + Vector iVec = {ig, jg, kg}; + Vector_t kVec; + + for (size_t d = 0; d < Dim; ++d) { + const double Len = rmax[d] - origin[d]; + bool shift = (iVec[d] > (N[d] / 2)); + bool notMid = (iVec[d] != (N[d] / 2)); + kVec[d] = notMid * 2 * pi / Len * (iVec[d] - shift * N[d]); + } + + double Dr = kVec[0] * kVec[0] + kVec[1] * kVec[1] + kVec[2] * kVec[2]; + + tempview(i, j, k) = view(i, j, k); + + // It would be great if we can remove this conditional + if (Dr != 0.0) + tempview(i, j, k) *= -(imag * kVec[gd] / Dr); + else + tempview(i, j, k) = 0.0; + }); + fft_mp->transform(-1, *this->rhs_mp, tempFieldComplex_m); - Kokkos::parallel_for("Assign Gradient FFTPeriodicPoissonSolver", - mdrange_type({nghostL, nghostL, nghostL}, - {viewLhs.extent(0) - nghostL, - viewLhs.extent(1) - nghostL, - viewLhs.extent(2) - nghostL}), - KOKKOS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - viewLhs(i, j, k)[gd] = viewRhs(i, j, k); - }); + Kokkos::parallel_for( + "Assign Gradient FFTPeriodicPoissonSolver", + mdrange_type({nghostL, nghostL, nghostL}, + {viewLhs.extent(0) - nghostL, viewLhs.extent(1) - nghostL, + viewLhs.extent(2) - nghostL}), + KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k) { + viewLhs(i, j, k)[gd] = viewRhs(i, j, k); + }); } break; - } + } default: - throw IpplException("FFTPeriodicPoissonSolver::solve", - "Unrecognized output_type"); - + throw IpplException("FFTPeriodicPoissonSolver::solve", "Unrecognized output_type"); } - } -} +} // namespace ippl diff --git a/src/Solver/FFTPoissonSolver.h b/src/Solver/FFTPoissonSolver.h index 3ae9f89ea..84068caa3 100644 --- a/src/Solver/FFTPoissonSolver.h +++ b/src/Solver/FFTPoissonSolver.h @@ -17,165 +17,163 @@ #ifndef FFT_POISSON_SOLVER_H_ #define FFT_POISSON_SOLVER_H_ - #include "Electrostatics.h" #include "FFT/FFT.h" #include "Field/Field.h" #include "FieldLayout/FieldLayout.h" -#include "Types/Vector.h" #include "Meshes/UniformCartesian.h" +#include "Types/Vector.h" namespace ippl { - template , - class C=typename M::DefaultCentering> - class FFTPoissonSolver : public Electrostatics { - public: - // types for LHS and RHS - using lhs_type = typename Solver::lhs_type; - using rhs_type = typename Solver::rhs_type; - - // type of output - using Base = Electrostatics; - - // define a type for a 3 dimensional field (e.g. charge density field) - // define a type of Field with integers to be used for the helper Green's function - // also define a type for the Fourier transformed complex valued fields - typedef Field Field_t; - typedef Field IField_t; - typedef Field, Dim, M> CxField_t; - typedef Vector Vector_t; - - // define type for field layout - typedef FieldLayout FieldLayout_t; - - // define a type for the 3 dimensional real to complex Fourier transform - typedef FFT FFT_t; - - // type for communication buffers - using buffer_type = Communicate::buffer_type; - - // constructor and destructor - FFTPoissonSolver(rhs_type& rhs, ParameterList& fftparams, std::string alg); - FFTPoissonSolver(lhs_type& lhs, rhs_type& rhs, ParameterList& fftparams, std::string alg, - int sol = Base::SOL_AND_GRAD); - ~FFTPoissonSolver(); - - // allows user to set gradient of phi = Efield instead of spectral - // calculation of Efield (which uses FFTs) - void setGradFD(); - - // solve the Poisson equation using FFT; - // more specifically, compute the scalar potential given a density field rho using - void solve() override; - - // compute standard Green's function - void greensFunction(); - - // function called in the constructor to initialize the fields - void initializeFields(); - - // communication used for multi-rank Vico-Greengard's Green's function - void communicateVico(Vector size, typename CxField_t::view_type view_g, - const ippl::NDIndex ldom_g, const int nghost_g, - typename Field_t::view_type view, const ippl::NDIndex ldom, - const int nghost); - - private: - // create a field to use as temporary storage - // references to it can be created to make the code where it is used readable - Field_t storage_field; - - Field_t& rho2_mr = storage_field; // the charge-density field with mesh doubled in each dimension - Field_t& grn_mr = storage_field; // the Green's function - - // rho2tr_m is the Fourier transformed charge-density field - // domain3_m and mesh3_m are used - CxField_t rho2tr_m; - - // grntr_m is the Fourier transformed Green's function - // domain3_m and mesh3_m are used - CxField_t grntr_m; - - // temp_m field for the E-field computation - CxField_t temp_m; - - // fields that facilitate the calculation in greensFunction() - IField_t grnIField_m[Dim]; - - // the FFT object - std::unique_ptr fft_m; - - // mesh and layout objects for rho_m (RHS) - M* mesh_mp; - FieldLayout_t* layout_mp; - - // mesh and layout objects for rho2_m - std::unique_ptr mesh2_m; - std::unique_ptr layout2_m; - - // mesh and layout objects for the Fourier transformed Complex fields - std::unique_ptr meshComplex_m; - std::unique_ptr layoutComplex_m; - - // domains for the various fields - NDIndex domain_m; // original domain, gridsize - NDIndex domain2_m; // doubled gridsize (2*Nx,2*Ny,2*Nz) - NDIndex domainComplex_m; // field for the complex values of the RC transformation - - // mesh spacing and mesh size - Vector_t hr_m; - Vector nr_m; - - // string specifying algorithm: Hockney or Vico-Greengard - std::string alg_m; - - // members for Vico-Greengard - CxField_t grnL_m; - - std::unique_ptr> fft4n_m; - - std::unique_ptr mesh4_m; - std::unique_ptr layout4_m; - - NDIndex domain4_m; - - // bool indicating whether we want gradient of solution to calculate E field - bool isGradFD_m; - - // buffer for communication - detail::FieldBufferData fd_m; - - protected: - - virtual void setDefaultParameters() override { - using heffteBackend = typename FFT_t::heffteBackend; - heffte::plan_options opts = heffte::default_options(); - this->params_m.add("use_pencils", opts.use_pencils); - this->params_m.add("use_reorder", opts.use_reorder); - this->params_m.add("use_gpu_aware", opts.use_gpu_aware); - this->params_m.add("r2c_direction", 0); - - switch (opts.algorithm) { - case heffte::reshape_algorithm::alltoall : - this->params_m.add("comm", a2a); - break; - case heffte::reshape_algorithm::alltoallv : - this->params_m.add("comm", a2av); - break; - case heffte::reshape_algorithm::p2p : - this->params_m.add("comm", p2p); - break; - case heffte::reshape_algorithm::p2p_plined : - this->params_m.add("comm", p2p_pl); - break; - default: - throw IpplException("FFTPoissonSolver::setDefaultParameters", - "Unrecognized heffte communication type"); - } + template , + class C = typename M::DefaultCentering> + class FFTPoissonSolver : public Electrostatics { + public: + // types for LHS and RHS + using lhs_type = typename Solver::lhs_type; + using rhs_type = typename Solver::rhs_type; + + // type of output + using Base = Electrostatics; + + // define a type for a 3 dimensional field (e.g. charge density field) + // define a type of Field with integers to be used for the helper Green's function + // also define a type for the Fourier transformed complex valued fields + typedef Field Field_t; + typedef Field IField_t; + typedef Field, Dim, M> CxField_t; + typedef Vector Vector_t; + + // define type for field layout + typedef FieldLayout FieldLayout_t; + + // define a type for the 3 dimensional real to complex Fourier transform + typedef FFT FFT_t; + + // type for communication buffers + using buffer_type = Communicate::buffer_type; + + // constructor and destructor + FFTPoissonSolver(rhs_type& rhs, ParameterList& fftparams, std::string alg); + FFTPoissonSolver(lhs_type& lhs, rhs_type& rhs, ParameterList& fftparams, std::string alg, + int sol = Base::SOL_AND_GRAD); + ~FFTPoissonSolver(); + + // allows user to set gradient of phi = Efield instead of spectral + // calculation of Efield (which uses FFTs) + void setGradFD(); + + // solve the Poisson equation using FFT; + // more specifically, compute the scalar potential given a density field rho using + void solve() override; + + // compute standard Green's function + void greensFunction(); + + // function called in the constructor to initialize the fields + void initializeFields(); + + // communication used for multi-rank Vico-Greengard's Green's function + void communicateVico(Vector size, typename CxField_t::view_type view_g, + const ippl::NDIndex ldom_g, const int nghost_g, + typename Field_t::view_type view, const ippl::NDIndex ldom, + const int nghost); + + private: + // create a field to use as temporary storage + // references to it can be created to make the code where it is used readable + Field_t storage_field; + + Field_t& rho2_mr = + storage_field; // the charge-density field with mesh doubled in each dimension + Field_t& grn_mr = storage_field; // the Green's function + + // rho2tr_m is the Fourier transformed charge-density field + // domain3_m and mesh3_m are used + CxField_t rho2tr_m; + + // grntr_m is the Fourier transformed Green's function + // domain3_m and mesh3_m are used + CxField_t grntr_m; + + // temp_m field for the E-field computation + CxField_t temp_m; + + // fields that facilitate the calculation in greensFunction() + IField_t grnIField_m[Dim]; + + // the FFT object + std::unique_ptr fft_m; + + // mesh and layout objects for rho_m (RHS) + M* mesh_mp; + FieldLayout_t* layout_mp; + + // mesh and layout objects for rho2_m + std::unique_ptr mesh2_m; + std::unique_ptr layout2_m; + + // mesh and layout objects for the Fourier transformed Complex fields + std::unique_ptr meshComplex_m; + std::unique_ptr layoutComplex_m; + + // domains for the various fields + NDIndex domain_m; // original domain, gridsize + NDIndex domain2_m; // doubled gridsize (2*Nx,2*Ny,2*Nz) + NDIndex domainComplex_m; // field for the complex values of the RC transformation + + // mesh spacing and mesh size + Vector_t hr_m; + Vector nr_m; + + // string specifying algorithm: Hockney or Vico-Greengard + std::string alg_m; + + // members for Vico-Greengard + CxField_t grnL_m; + + std::unique_ptr> fft4n_m; + + std::unique_ptr mesh4_m; + std::unique_ptr layout4_m; + + NDIndex domain4_m; + + // bool indicating whether we want gradient of solution to calculate E field + bool isGradFD_m; + + // buffer for communication + detail::FieldBufferData fd_m; + + protected: + virtual void setDefaultParameters() override { + using heffteBackend = typename FFT_t::heffteBackend; + heffte::plan_options opts = heffte::default_options(); + this->params_m.add("use_pencils", opts.use_pencils); + this->params_m.add("use_reorder", opts.use_reorder); + this->params_m.add("use_gpu_aware", opts.use_gpu_aware); + this->params_m.add("r2c_direction", 0); + + switch (opts.algorithm) { + case heffte::reshape_algorithm::alltoall: + this->params_m.add("comm", a2a); + break; + case heffte::reshape_algorithm::alltoallv: + this->params_m.add("comm", a2av); + break; + case heffte::reshape_algorithm::p2p: + this->params_m.add("comm", p2p); + break; + case heffte::reshape_algorithm::p2p_plined: + this->params_m.add("comm", p2p_pl); + break; + default: + throw IpplException("FFTPoissonSolver::setDefaultParameters", + "Unrecognized heffte communication type"); } + } }; -} +} // namespace ippl #include "FFTPoissonSolver.hpp" diff --git a/src/Solver/FFTPoissonSolver.hpp b/src/Solver/FFTPoissonSolver.hpp index 363a8a8b1..00316e5ef 100644 --- a/src/Solver/FFTPoissonSolver.hpp +++ b/src/Solver/FFTPoissonSolver.hpp @@ -14,23 +14,22 @@ //// // -#include "FFTPoissonSolver.h" -#include "Utility/IpplTimings.h" -#include "Utility/IpplException.h" +#include #include "Communicate/Archive.h" +#include "FFTPoissonSolver.h" #include "Field/HaloCells.h" -#include +#include "Utility/IpplException.h" +#include "Utility/IpplTimings.h" // Communication specific functions (pack and unpack). template -void pack(const ippl::NDIndex<3> intersect, Kokkos::View& view, +void pack(const ippl::NDIndex<3> intersect, Kokkos::View& view, ippl::detail::FieldBufferData& fd, int nghost, const ippl::NDIndex<3> ldom, ippl::Communicate::size_type& nsends) { - ippl::Field::view_type& buffer = fd.buffer; size_t size = intersect.size(); - nsends = size; + nsends = size; if (buffer.size() < size) { const int overalloc = Ippl::Comm->getDefaultOverallocation(); Kokkos::realloc(buffer, size * overalloc); @@ -45,33 +44,31 @@ void pack(const ippl::NDIndex<3> intersect, Kokkos::View& view, const int last2 = intersect[2].last() + nghost - ldom[2].first() + 1; using mdrange_type = Kokkos::MDRangePolicy>; - //This type casting to long int is necessary as otherwise Kokkos complains for - //intel compilers - Kokkos::parallel_for("pack()", mdrange_type({first0, first1, first2}, - {(long int)last0, - (long int)last1, - (long int)last2}), - KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k) { - const int ig = i - first0; - const int jg = j - first1; - const int kg = k - first2; - - const int l = ig + jg * intersect[0].length() + - kg * intersect[1].length() * intersect[0].length(); - - Kokkos::complex val = view(i,j,k); - - buffer(l) = Kokkos::real(val); - }); + // This type casting to long int is necessary as otherwise Kokkos complains for + // intel compilers + Kokkos::parallel_for( + "pack()", + mdrange_type({first0, first1, first2}, {(long int)last0, (long int)last1, (long int)last2}), + KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k) { + const int ig = i - first0; + const int jg = j - first1; + const int kg = k - first2; + + const int l = ig + jg * intersect[0].length() + + kg * intersect[1].length() * intersect[0].length(); + + Kokkos::complex val = view(i, j, k); + + buffer(l) = Kokkos::real(val); + }); Kokkos::fence(); } -void unpack(const ippl::NDIndex<3> intersect, const ippl::Field::view_type& view, +void unpack(const ippl::NDIndex<3> intersect, const ippl::Field::view_type& view, ippl::detail::FieldBufferData& fd, int nghost, const ippl::NDIndex<3> ldom, - bool x=false, bool y=false, bool z=false) { - + bool x = false, bool y = false, bool z = false) { ippl::Field::view_type& buffer = fd.buffer; - + const int first0 = intersect[0].first() + nghost - ldom[0].first(); const int first1 = intersect[1].first() + nghost - ldom[1].first(); const int first2 = intersect[2].first() + nghost - ldom[2].first(); @@ -81,30 +78,28 @@ void unpack(const ippl::NDIndex<3> intersect, const ippl::Field::view_ const int last2 = intersect[2].last() + nghost - ldom[2].first() + 1; using mdrange_type = Kokkos::MDRangePolicy>; - Kokkos::parallel_for("pack()", mdrange_type({first0, first1, first2}, - {last0, last1, last2}), - KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k) { - - int ig = i - first0; - int jg = j - first1; - int kg = k - first2; - - ig = x * (intersect[0].length() - 2*ig - 1) + ig; - jg = y * (intersect[1].length() - 2*jg - 1) + jg; - kg = z * (intersect[2].length() - 2*kg - 1) + kg; - - const int l = ig + jg * intersect[0].length() + - kg * intersect[1].length() * intersect[0].length(); - - view(i,j,k) = buffer(l); - }); - Kokkos::fence(); + Kokkos::parallel_for( + "pack()", mdrange_type({first0, first1, first2}, {last0, last1, last2}), + KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k) { + int ig = i - first0; + int jg = j - first1; + int kg = k - first2; + + ig = x * (intersect[0].length() - 2 * ig - 1) + ig; + jg = y * (intersect[1].length() - 2 * jg - 1) + jg; + kg = z * (intersect[2].length() - 2 * kg - 1) + kg; + + const int l = ig + jg * intersect[0].length() + + kg * intersect[1].length() * intersect[0].length(); + + view(i, j, k) = buffer(l); + }); + Kokkos::fence(); } - -void unpack(const ippl::NDIndex<3> intersect, const ippl::Field,3>::view_type& view, - size_t dim, ippl::detail::FieldBufferData& fd, int nghost, const ippl::NDIndex<3> ldom) { - +void unpack(const ippl::NDIndex<3> intersect, + const ippl::Field, 3>::view_type& view, size_t dim, + ippl::detail::FieldBufferData& fd, int nghost, const ippl::NDIndex<3> ldom) { ippl::Field::view_type& buffer = fd.buffer; const int first0 = intersect[0].first() + nghost - ldom[0].first(); @@ -116,389 +111,509 @@ void unpack(const ippl::NDIndex<3> intersect, const ippl::Field>; - Kokkos::parallel_for("pack()", mdrange_type({first0, first1, first2}, - {last0, last1, last2}), - KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k) { - - const int ig = i - first0; - const int jg = j - first1; - const int kg = k - first2; - - const int l = ig + jg * intersect[0].length() + kg * intersect[1].length() - * intersect[0].length(); - view(i,j,k)[dim] = buffer(l); - }); - Kokkos::fence(); + Kokkos::parallel_for( + "pack()", mdrange_type({first0, first1, first2}, {last0, last1, last2}), + KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k) { + const int ig = i - first0; + const int jg = j - first1; + const int kg = k - first2; + + const int l = ig + jg * intersect[0].length() + + kg * intersect[1].length() * intersect[0].length(); + view(i, j, k)[dim] = buffer(l); + }); + Kokkos::fence(); } namespace ippl { - ///////////////////////////////////////////////////////////////////////// - // constructor and destructor - - template - FFTPoissonSolver::FFTPoissonSolver(rhs_type& rhs, - ParameterList& fftparams, - std::string alg) - : mesh_mp(nullptr), - layout_mp(nullptr), - mesh2_m(nullptr), - layout2_m(nullptr), - meshComplex_m(nullptr), - layoutComplex_m(nullptr), - alg_m(alg), - mesh4_m(nullptr), - layout4_m(nullptr), - isGradFD_m(false) - { - std::transform(alg_m.begin(), alg_m.end(), alg_m.begin(), ::toupper); - setDefaultParameters(); - this->setRhs(rhs); - - this->params_m.merge(fftparams); - this->params_m.update("output_type", Base::SOL); - - // start a timer - static IpplTimings::TimerRef initialize = IpplTimings::getTimer("Initialize"); - IpplTimings::startTimer(initialize); - - initializeFields(); - - IpplTimings::stopTimer(initialize); - } - - template - FFTPoissonSolver::FFTPoissonSolver(lhs_type& lhs, rhs_type& rhs, - ParameterList& fftparams, std::string alg, int sol) - : mesh_mp(nullptr), - layout_mp(nullptr), - mesh2_m(nullptr), - layout2_m(nullptr), - meshComplex_m(nullptr), - layoutComplex_m(nullptr), - alg_m(alg), - mesh4_m(nullptr), - layout4_m(nullptr), - isGradFD_m(false) - { - - std::transform(alg_m.begin(), alg_m.end(), alg_m.begin(), ::toupper); - setDefaultParameters(); - this->setRhs(rhs); - this->setLhs(lhs); - - this->params_m.merge(fftparams); - this->params_m.update("output_type", sol); + ///////////////////////////////////////////////////////////////////////// + // constructor and destructor + + template + FFTPoissonSolver::FFTPoissonSolver(rhs_type& rhs, + ParameterList& fftparams, + std::string alg) + : mesh_mp(nullptr) + , layout_mp(nullptr) + , mesh2_m(nullptr) + , layout2_m(nullptr) + , meshComplex_m(nullptr) + , layoutComplex_m(nullptr) + , alg_m(alg) + , mesh4_m(nullptr) + , layout4_m(nullptr) + , isGradFD_m(false) { + std::transform(alg_m.begin(), alg_m.end(), alg_m.begin(), ::toupper); + setDefaultParameters(); + this->setRhs(rhs); + + this->params_m.merge(fftparams); + this->params_m.update("output_type", Base::SOL); + + // start a timer + static IpplTimings::TimerRef initialize = IpplTimings::getTimer("Initialize"); + IpplTimings::startTimer(initialize); + + initializeFields(); + + IpplTimings::stopTimer(initialize); + } - // start a timer - static IpplTimings::TimerRef initialize = IpplTimings::getTimer("Initialize"); - IpplTimings::startTimer(initialize); - - initializeFields(); - - IpplTimings::stopTimer(initialize); - } + template + FFTPoissonSolver::FFTPoissonSolver(lhs_type& lhs, rhs_type& rhs, + ParameterList& fftparams, + std::string alg, int sol) + : mesh_mp(nullptr) + , layout_mp(nullptr) + , mesh2_m(nullptr) + , layout2_m(nullptr) + , meshComplex_m(nullptr) + , layoutComplex_m(nullptr) + , alg_m(alg) + , mesh4_m(nullptr) + , layout4_m(nullptr) + , isGradFD_m(false) { + std::transform(alg_m.begin(), alg_m.end(), alg_m.begin(), ::toupper); + setDefaultParameters(); + this->setRhs(rhs); + this->setLhs(lhs); + + this->params_m.merge(fftparams); + this->params_m.update("output_type", sol); + + // start a timer + static IpplTimings::TimerRef initialize = IpplTimings::getTimer("Initialize"); + IpplTimings::startTimer(initialize); + + initializeFields(); + + IpplTimings::stopTimer(initialize); + } - template - FFTPoissonSolver::~FFTPoissonSolver() {}; + template + FFTPoissonSolver::~FFTPoissonSolver(){}; - ///////////////////////////////////////////////////////////////////////// - // allows user to set gradient of phi = Efield instead of spectral - // calculation of Efield (which uses FFTs) + ///////////////////////////////////////////////////////////////////////// + // allows user to set gradient of phi = Efield instead of spectral + // calculation of Efield (which uses FFTs) - template - void - FFTPoissonSolver::setGradFD() - { - // get the output type (sol, grad, or sol & grad) - const int out = this->params_m.template get("output_type"); + template + void FFTPoissonSolver::setGradFD() { + // get the output type (sol, grad, or sol & grad) + const int out = this->params_m.template get("output_type"); - if (out != Base::SOL_AND_GRAD) { - throw IpplException("FFTPoissonSolver::setGradFD()", + if (out != Base::SOL_AND_GRAD) { + throw IpplException( + "FFTPoissonSolver::setGradFD()", "Cannot use gradient for Efield computation unless output type is SOL_AND_GRAD"); - } else { - isGradFD_m = true; - } + } else { + isGradFD_m = true; } + } - ///////////////////////////////////////////////////////////////////////// - // initializeFields method, called in constructor - - template - void - FFTPoissonSolver::initializeFields() { - - // first check if valid algorithm choice - if ((alg_m != "VICO") && (alg_m != "HOCKNEY") && (alg_m != "BIHARMONIC")) { - throw IpplException("FFTPoissonSolver::initializeFields()", + ///////////////////////////////////////////////////////////////////////// + // initializeFields method, called in constructor + + template + void FFTPoissonSolver::initializeFields() { + // first check if valid algorithm choice + if ((alg_m != "VICO") && (alg_m != "HOCKNEY") && (alg_m != "BIHARMONIC")) { + throw IpplException( + "FFTPoissonSolver::initializeFields()", "Currently only Hockney and Vico algorithms are supported for open BCs"); - } + } - // get layout and mesh - layout_mp = &(this->rhs_mp->getLayout()); - mesh_mp = &(this->rhs_mp->get_mesh()); + // get layout and mesh + layout_mp = &(this->rhs_mp->getLayout()); + mesh_mp = &(this->rhs_mp->get_mesh()); - // get mesh spacing - hr_m = mesh_mp->getMeshSpacing(); + // get mesh spacing + hr_m = mesh_mp->getMeshSpacing(); - // get origin - Vector_t origin = mesh_mp->getOrigin(); - const double sum = std::abs(origin[0]) + std::abs(origin[1]) + std::abs(origin[2]); + // get origin + Vector_t origin = mesh_mp->getOrigin(); + const double sum = std::abs(origin[0]) + std::abs(origin[1]) + std::abs(origin[2]); - // origin should always be 0 for Green's function computation to work... - if (sum != 0.0) { - throw IpplException("FFTPoissonSolver::initializeFields", "Origin is not 0"); - } + // origin should always be 0 for Green's function computation to work... + if (sum != 0.0) { + throw IpplException("FFTPoissonSolver::initializeFields", "Origin is not 0"); + } - // create domain for the real fields - domain_m = layout_mp->getDomain(); - - // get the mesh spacings and sizes for each dimension - for (unsigned int i = 0; i < Dim; ++i) { - nr_m[i] = domain_m[i].length(); - - // create the doubled domain for the FFT procedure - domain2_m[i] = Index(2 * nr_m[i]); - } - - // define decomposition (parallel / serial) - e_dim_tag decomp[Dim]; - for(unsigned int d = 0; d < Dim; ++d) { - decomp[d] = layout_mp->getRequestedDistribution(d); - } - - // create double sized mesh and layout objects using the previously defined domain2_m - mesh2_m = std::unique_ptr(new M(domain2_m, hr_m, origin)); - layout2_m = std::unique_ptr(new FieldLayout_t(domain2_m, decomp)); - - // create the domain for the transformed (complex) fields - // since we use HeFFTe for the transforms it doesn't require permuting to the right - // one of the dimensions has only (n/2 +1) as our original fields are fully real - // the dimension is given by the user via r2c_direction - unsigned int RCDirection = this->params_m.template get("r2c_direction"); - for(unsigned int i=0; i < Dim; ++i) { - if (i == RCDirection) - domainComplex_m[RCDirection] = Index(nr_m[RCDirection]+1); - else - domainComplex_m[i] = Index(2 * nr_m[i]); - } - - // create mesh and layout for the real to complex FFT transformed fields - meshComplex_m = std::unique_ptr(new M(domainComplex_m, hr_m, origin)); - layoutComplex_m = std::unique_ptr(new FieldLayout_t(domainComplex_m, decomp)); - - // initialize fields - storage_field.initialize(*mesh2_m, *layout2_m); - rho2tr_m.initialize(*meshComplex_m, *layoutComplex_m); - grntr_m.initialize(*meshComplex_m, *layoutComplex_m); - - int out = this->params_m.template get("output_type"); - if (((out == Base::GRAD) || - (out == Base::SOL_AND_GRAD)) && (!isGradFD_m)) { - temp_m.initialize(*meshComplex_m, *layoutComplex_m); - } + // create domain for the real fields + domain_m = layout_mp->getDomain(); - // create the FFT object - fft_m = std::make_unique(*layout2_m, *layoutComplex_m, this->params_m); + // get the mesh spacings and sizes for each dimension + for (unsigned int i = 0; i < Dim; ++i) { + nr_m[i] = domain_m[i].length(); - // if Vico, also need to create mesh and layout for 4N Fourier domain - // on this domain, the truncated Green's function is defined - // also need to create the 4N complex grid, on which precomputation step done - if ((alg_m == "VICO") || (alg_m == "BIHARMONIC")) { + // create the doubled domain for the FFT procedure + domain2_m[i] = Index(2 * nr_m[i]); + } - // start a timer - static IpplTimings::TimerRef initialize_vico = IpplTimings::getTimer("Initialize: extra Vico"); - IpplTimings::startTimer(initialize_vico); + // define decomposition (parallel / serial) + e_dim_tag decomp[Dim]; + for (unsigned int d = 0; d < Dim; ++d) { + decomp[d] = layout_mp->getRequestedDistribution(d); + } - for (unsigned int i=0; i< Dim; ++i) { - domain4_m[i] = Index(4 * nr_m[i]); - } + // create double sized mesh and layout objects using the previously defined domain2_m + mesh2_m = std::unique_ptr(new M(domain2_m, hr_m, origin)); + layout2_m = std::unique_ptr(new FieldLayout_t(domain2_m, decomp)); + + // create the domain for the transformed (complex) fields + // since we use HeFFTe for the transforms it doesn't require permuting to the right + // one of the dimensions has only (n/2 +1) as our original fields are fully real + // the dimension is given by the user via r2c_direction + unsigned int RCDirection = this->params_m.template get("r2c_direction"); + for (unsigned int i = 0; i < Dim; ++i) { + if (i == RCDirection) + domainComplex_m[RCDirection] = Index(nr_m[RCDirection] + 1); + else + domainComplex_m[i] = Index(2 * nr_m[i]); + } + + // create mesh and layout for the real to complex FFT transformed fields + meshComplex_m = std::unique_ptr(new M(domainComplex_m, hr_m, origin)); + layoutComplex_m = + std::unique_ptr(new FieldLayout_t(domainComplex_m, decomp)); - // 4N grid - mesh4_m = std::unique_ptr(new M(domain4_m, hr_m, origin)); - layout4_m = std::unique_ptr(new FieldLayout_t(domain4_m, decomp)); - - // initialize fields - grnL_m.initialize(*mesh4_m, *layout4_m); - - // create a Complex-to-Complex FFT object to transform for layout4 - fft4n_m = std::make_unique>(*layout4_m, this->params_m); - - IpplTimings::stopTimer(initialize_vico); + // initialize fields + storage_field.initialize(*mesh2_m, *layout2_m); + rho2tr_m.initialize(*meshComplex_m, *layoutComplex_m); + grntr_m.initialize(*meshComplex_m, *layoutComplex_m); + + int out = this->params_m.template get("output_type"); + if (((out == Base::GRAD) || (out == Base::SOL_AND_GRAD)) && (!isGradFD_m)) { + temp_m.initialize(*meshComplex_m, *layoutComplex_m); + } + + // create the FFT object + fft_m = std::make_unique(*layout2_m, *layoutComplex_m, this->params_m); + + // if Vico, also need to create mesh and layout for 4N Fourier domain + // on this domain, the truncated Green's function is defined + // also need to create the 4N complex grid, on which precomputation step done + if ((alg_m == "VICO") || (alg_m == "BIHARMONIC")) { + // start a timer + static IpplTimings::TimerRef initialize_vico = + IpplTimings::getTimer("Initialize: extra Vico"); + IpplTimings::startTimer(initialize_vico); + + for (unsigned int i = 0; i < Dim; ++i) { + domain4_m[i] = Index(4 * nr_m[i]); } + // 4N grid + mesh4_m = std::unique_ptr(new M(domain4_m, hr_m, origin)); + layout4_m = std::unique_ptr(new FieldLayout_t(domain4_m, decomp)); - // these are fields that are used for calculating the Green's function for Hockney - if (alg_m == "HOCKNEY") { + // initialize fields + grnL_m.initialize(*mesh4_m, *layout4_m); - // start a timer - static IpplTimings::TimerRef initialize_hockney = IpplTimings::getTimer("Initialize: extra Hockney"); - IpplTimings::startTimer(initialize_hockney); - - for (unsigned int d = 0; d < Dim; ++d) { - grnIField_m[d].initialize(*mesh2_m, *layout2_m); - - // get number of ghost points and the Kokkos view to iterate over field - auto view = grnIField_m[d].getView(); - const int nghost = grnIField_m[d].getNghost(); - const auto& ldom = layout2_m->getLocalNDIndex(); - - // the length of the physical domain - const int size = nr_m[d]; - - // Kokkos parallel for loop to initialize grnIField[d] - using mdrange_type = Kokkos::MDRangePolicy>; - switch (d) { - case 0: - Kokkos::parallel_for("Helper index Green field initialization", - mdrange_type({nghost, nghost, nghost}, - {view.extent(0)-nghost, view.extent(1)-nghost, view.extent(2)-nghost}), - KOKKOS_LAMBDA(const int i, const int j, const int k) { - - // go from local indices to global - const int ig = i + ldom[0].first() - nghost; - const int jg = j + ldom[1].first() - nghost; - const int kg = k + ldom[2].first() - nghost; - - // assign (index)^2 if 0 <= index < N, and (2N-index)^2 elsewhere - const bool outsideN = (ig >= size); - view(i,j,k) = (2*size*outsideN - ig) * (2*size*outsideN - ig); - - // add 1.0 if at (0,0,0) to avoid singularity - const bool isOrig = ((ig == 0) && (jg == 0) && (kg == 0)); - view(i,j,k) += isOrig * 1.0; - }); - break; - case 1: - Kokkos::parallel_for("Helper index Green field initialization", - mdrange_type({nghost, nghost, nghost}, - {view.extent(0)-nghost, view.extent(1)-nghost, view.extent(2)-nghost}), - KOKKOS_LAMBDA(const int i, const int j, const int k) { - - // go from local indices to global - const int jg = j + ldom[1].first() - nghost; - - // assign (index)^2 if 0 <= index < N, and (2N-index)^2 elsewhere - const bool outsideN = (jg >= size); - view(i,j,k) = (2*size*outsideN - jg) * (2*size*outsideN - jg); - }); - break; - case 2: - Kokkos::parallel_for("Helper index Green field initialization", - mdrange_type({nghost, nghost, nghost}, - {view.extent(0)-nghost, view.extent(1)-nghost, view.extent(2)-nghost}), - KOKKOS_LAMBDA(const int i, const int j, const int k) { - - // go from local indices to global - const int kg = k + ldom[2].first() - nghost; - - // assign (index)^2 if 0 <= index < N, and (2N-index)^2 elsewhere - const bool outsideN = (kg >= size); - view(i,j,k) = (2*size*outsideN - kg) * (2*size*outsideN - kg); - }); - break; - } + // create a Complex-to-Complex FFT object to transform for layout4 + fft4n_m = std::make_unique>(*layout4_m, this->params_m); + + IpplTimings::stopTimer(initialize_vico); + } + + // these are fields that are used for calculating the Green's function for Hockney + if (alg_m == "HOCKNEY") { + // start a timer + static IpplTimings::TimerRef initialize_hockney = + IpplTimings::getTimer("Initialize: extra Hockney"); + IpplTimings::startTimer(initialize_hockney); + + for (unsigned int d = 0; d < Dim; ++d) { + grnIField_m[d].initialize(*mesh2_m, *layout2_m); + + // get number of ghost points and the Kokkos view to iterate over field + auto view = grnIField_m[d].getView(); + const int nghost = grnIField_m[d].getNghost(); + const auto& ldom = layout2_m->getLocalNDIndex(); + + // the length of the physical domain + const int size = nr_m[d]; + + // Kokkos parallel for loop to initialize grnIField[d] + using mdrange_type = Kokkos::MDRangePolicy>; + switch (d) { + case 0: + Kokkos::parallel_for( + "Helper index Green field initialization", + mdrange_type({nghost, nghost, nghost}, + {view.extent(0) - nghost, view.extent(1) - nghost, + view.extent(2) - nghost}), + KOKKOS_LAMBDA(const int i, const int j, const int k) { + // go from local indices to global + const int ig = i + ldom[0].first() - nghost; + const int jg = j + ldom[1].first() - nghost; + const int kg = k + ldom[2].first() - nghost; + + // assign (index)^2 if 0 <= index < N, and (2N-index)^2 elsewhere + const bool outsideN = (ig >= size); + view(i, j, k) = + (2 * size * outsideN - ig) * (2 * size * outsideN - ig); + + // add 1.0 if at (0,0,0) to avoid singularity + const bool isOrig = ((ig == 0) && (jg == 0) && (kg == 0)); + view(i, j, k) += isOrig * 1.0; + }); + break; + case 1: + Kokkos::parallel_for( + "Helper index Green field initialization", + mdrange_type({nghost, nghost, nghost}, + {view.extent(0) - nghost, view.extent(1) - nghost, + view.extent(2) - nghost}), + KOKKOS_LAMBDA(const int i, const int j, const int k) { + // go from local indices to global + const int jg = j + ldom[1].first() - nghost; + + // assign (index)^2 if 0 <= index < N, and (2N-index)^2 elsewhere + const bool outsideN = (jg >= size); + view(i, j, k) = + (2 * size * outsideN - jg) * (2 * size * outsideN - jg); + }); + break; + case 2: + Kokkos::parallel_for( + "Helper index Green field initialization", + mdrange_type({nghost, nghost, nghost}, + {view.extent(0) - nghost, view.extent(1) - nghost, + view.extent(2) - nghost}), + KOKKOS_LAMBDA(const int i, const int j, const int k) { + // go from local indices to global + const int kg = k + ldom[2].first() - nghost; + + // assign (index)^2 if 0 <= index < N, and (2N-index)^2 elsewhere + const bool outsideN = (kg >= size); + view(i, j, k) = + (2 * size * outsideN - kg) * (2 * size * outsideN - kg); + }); + break; } - IpplTimings::stopTimer(initialize_hockney); } + IpplTimings::stopTimer(initialize_hockney); + } + + static IpplTimings::TimerRef warmup = IpplTimings::getTimer("Warmup"); + IpplTimings::startTimer(warmup); + + fft_m->transform(+1, rho2_mr, rho2tr_m); + if ((alg_m == "VICO") || (alg_m == "BIHARMONIC")) { + fft4n_m->transform(+1, grnL_m); + } + + IpplTimings::stopTimer(warmup); - static IpplTimings::TimerRef warmup = IpplTimings::getTimer("Warmup"); - IpplTimings::startTimer(warmup); + rho2_mr = 0.0; + rho2tr_m = 0.0; + grnL_m = 0.0; - fft_m->transform(+1, rho2_mr, rho2tr_m); - if ((alg_m == "VICO") || (alg_m == "BIHARMONIC")) { - fft4n_m->transform(+1, grnL_m); + // call greensFunction and we will get the transformed G in the class attribute + // this is done in initialization so that we already have the precomputed fct + // for all timesteps (green's fct will only change if mesh size changes) + static IpplTimings::TimerRef ginit = IpplTimings::getTimer("Green Init"); + IpplTimings::startTimer(ginit); + + greensFunction(); + + IpplTimings::stopTimer(ginit); + }; + + ///////////////////////////////////////////////////////////////////////// + // compute electric potential by solving Poisson's eq given a field rho and mesh spacings hr + template + void FFTPoissonSolver::solve() { + // start a timer + static IpplTimings::TimerRef solve = IpplTimings::getTimer("Solve"); + IpplTimings::startTimer(solve); + + // get the output type (sol, grad, or sol & grad) + const int out = this->params_m.template get("output_type"); + + // set the mesh & spacing, which may change each timestep + mesh_mp = &(this->rhs_mp->get_mesh()); + + // check whether the mesh spacing has changed with respect to the old one + // if yes, update and set green flag to true + bool green = false; + for (unsigned int i = 0; i < Dim; ++i) { + if (hr_m[i] != mesh_mp->getMeshSpacing(i)) { + hr_m[i] = mesh_mp->getMeshSpacing(i); + green = true; } + } - IpplTimings::stopTimer(warmup); + // set mesh spacing on the other grids again + mesh2_m->setMeshSpacing(hr_m); + meshComplex_m->setMeshSpacing(hr_m); - rho2_mr = 0.0; - rho2tr_m = 0.0; - grnL_m = 0.0; + // field object on the doubled grid; zero-padded + rho2_mr = 0.0; - // call greensFunction and we will get the transformed G in the class attribute - // this is done in initialization so that we already have the precomputed fct - // for all timesteps (green's fct will only change if mesh size changes) - static IpplTimings::TimerRef ginit = IpplTimings::getTimer("Green Init"); - IpplTimings::startTimer(ginit); + // start a timer + static IpplTimings::TimerRef stod = IpplTimings::getTimer("Solve: Physical to double"); + IpplTimings::startTimer(stod); - greensFunction(); + // store rho (RHS) in the lower left quadrant of the doubled grid + // with or without communication (if only 1 rank) - IpplTimings::stopTimer(ginit); - }; + using mdrange_type = Kokkos::MDRangePolicy>; + const int ranks = Ippl::Comm->size(); - ///////////////////////////////////////////////////////////////////////// - // compute electric potential by solving Poisson's eq given a field rho and mesh spacings hr - template - void - FFTPoissonSolver::solve() { - - // start a timer - static IpplTimings::TimerRef solve = IpplTimings::getTimer("Solve"); - IpplTimings::startTimer(solve); + auto view2 = rho2_mr.getView(); + auto view1 = this->rhs_mp->getView(); - // get the output type (sol, grad, or sol & grad) - const int out = this->params_m.template get("output_type"); + const int nghost2 = rho2_mr.getNghost(); + const int nghost1 = this->rhs_mp->getNghost(); - // set the mesh & spacing, which may change each timestep - mesh_mp = &(this->rhs_mp->get_mesh()); + const auto& ldom2 = layout2_m->getLocalNDIndex(); + const auto& ldom1 = layout_mp->getLocalNDIndex(); - // check whether the mesh spacing has changed with respect to the old one - // if yes, update and set green flag to true - bool green = false; - for (unsigned int i = 0; i < Dim; ++i) { - if (hr_m[i] != mesh_mp->getMeshSpacing(i)) { - hr_m[i] = mesh_mp->getMeshSpacing(i); - green = true; + if (ranks > 1) { + // COMMUNICATION + const auto& lDomains2 = layout2_m->getHostLocalDomains(); + + // send + std::vector requests(0); + + for (int i = 0; i < ranks; ++i) { + if (lDomains2[i].touches(ldom1)) { + auto intersection = lDomains2[i].intersect(ldom1); + + requests.resize(requests.size() + 1); + + Communicate::size_type nsends; + pack(intersection, view1, fd_m, nghost1, ldom1, nsends); + + buffer_type buf = Ippl::Comm->getBuffer(IPPL_SOLVER_SEND + i, nsends); + + Ippl::Comm->isend(i, OPEN_SOLVER_TAG, fd_m, *buf, requests.back(), nsends); + buf->resetWritePos(); } } - // set mesh spacing on the other grids again - mesh2_m->setMeshSpacing(hr_m); - meshComplex_m->setMeshSpacing(hr_m); + // receive + const auto& lDomains1 = layout_mp->getHostLocalDomains(); + int myRank = Ippl::Comm->rank(); - // field object on the doubled grid; zero-padded - rho2_mr = 0.0; + for (int i = 0; i < ranks; ++i) { + if (lDomains1[i].touches(ldom2)) { + auto intersection = lDomains1[i].intersect(ldom2); - // start a timer - static IpplTimings::TimerRef stod = IpplTimings::getTimer("Solve: Physical to double"); - IpplTimings::startTimer(stod); + Communicate::size_type nrecvs; + nrecvs = intersection.size(); - // store rho (RHS) in the lower left quadrant of the doubled grid - // with or without communication (if only 1 rank) - - using mdrange_type = Kokkos::MDRangePolicy>; + buffer_type buf = + Ippl::Comm->getBuffer(IPPL_SOLVER_RECV + myRank, nrecvs); - const int ranks = Ippl::Comm->size(); + Ippl::Comm->recv(i, OPEN_SOLVER_TAG, fd_m, *buf, nrecvs * sizeof(double), + nrecvs); + buf->resetReadPos(); - auto view2 = rho2_mr.getView(); - auto view1 = this->rhs_mp->getView(); - - const int nghost2 = rho2_mr.getNghost(); - const int nghost1 = this->rhs_mp->getNghost(); - - const auto& ldom2 = layout2_m->getLocalNDIndex(); - const auto& ldom1 = layout_mp->getLocalNDIndex(); - - if (ranks > 1) { + unpack(intersection, view2, fd_m, nghost2, ldom2); + } + } + + // wait for all messages to be received + if (requests.size() > 0) { + MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE); + } + Ippl::Comm->barrier(); + + } else { + Kokkos::parallel_for( + "Write rho on the doubled grid", + mdrange_type({nghost1, nghost1, nghost1}, + {view1.extent(0) - nghost1, view1.extent(1) - nghost1, + view1.extent(2) - nghost1}), + KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k) { + const size_t ig2 = i + ldom2[0].first() - nghost2; + const size_t jg2 = j + ldom2[1].first() - nghost2; + const size_t kg2 = k + ldom2[2].first() - nghost2; + + const size_t ig1 = i + ldom1[0].first() - nghost1; + const size_t jg1 = j + ldom1[1].first() - nghost1; + const size_t kg1 = k + ldom1[2].first() - nghost1; + + // write physical rho on [0,N-1] of doubled field + const bool isQuadrant1 = ((ig1 == ig2) && (jg1 == jg2) && (kg1 == kg2)); + view2(i, j, k) = view1(i, j, k) * isQuadrant1; + }); + } + + IpplTimings::stopTimer(stod); + + // start a timer + static IpplTimings::TimerRef fftrho = IpplTimings::getTimer("FFT: Rho"); + IpplTimings::startTimer(fftrho); + + // forward FFT of the charge density field on doubled grid + fft_m->transform(+1, rho2_mr, rho2tr_m); + + IpplTimings::stopTimer(fftrho); + + // call greensFunction to recompute if the mesh spacing has changed + if (green) { + greensFunction(); + } + + // multiply FFT(rho2)*FFT(green) + // convolution becomes multiplication in FFT + rho2tr_m = rho2tr_m * grntr_m; + + // if output_type is SOL or SOL_AND_GRAD, we caculate solution + if ((out == Base::SOL) || (out == Base::SOL_AND_GRAD)) { + // start a timer + static IpplTimings::TimerRef fftc = IpplTimings::getTimer("FFT: Convolution"); + IpplTimings::startTimer(fftc); + + // inverse FFT of the product and store the electrostatic potential in rho2_mr + fft_m->transform(-1, rho2_mr, rho2tr_m); + + IpplTimings::stopTimer(fftc); + + // Hockney: multiply the rho2_mr field by the total number of points to account for + // double counting (rho and green) of normalization factor in forward transform + // also multiply by the mesh spacing^3 (to account for discretization) + // Vico: need to multiply by normalization factor of 1/4N^3, + // since only backward transform was performed on the 4N grid + for (unsigned int i = 0; i < Dim; ++i) { + if ((alg_m == "VICO") || (alg_m == "BIHARMONIC")) + rho2_mr = rho2_mr * 2.0 * (1.0 / 4.0); + else + rho2_mr = rho2_mr * 2.0 * nr_m[i] * hr_m[i]; + } + + // start a timer + static IpplTimings::TimerRef dtos = IpplTimings::getTimer("Solve: Double to physical"); + IpplTimings::startTimer(dtos); + + // get the physical part only --> physical electrostatic potential is now given in RHS + // need communication if more than one rank + if (ranks > 1) { // COMMUNICATION - const auto& lDomains2 = layout2_m->getHostLocalDomains(); // send + const auto& lDomains1 = layout_mp->getHostLocalDomains(); + std::vector requests(0); for (int i = 0; i < ranks; ++i) { - if (lDomains2[i].touches(ldom1)) { - auto intersection = lDomains2[i].intersect(ldom1); - + if (lDomains1[i].touches(ldom2)) { + auto intersection = lDomains1[i].intersect(ldom2); + requests.resize(requests.size() + 1); - + Communicate::size_type nsends; - pack(intersection, view1, fd_m, nghost1, ldom1, nsends); + pack(intersection, view2, fd_m, nghost2, ldom2, nsends); - buffer_type buf = Ippl::Comm->getBuffer(IPPL_SOLVER_SEND + i, nsends); + buffer_type buf = + Ippl::Comm->getBuffer(IPPL_SOLVER_SEND + i, nsends); Ippl::Comm->isend(i, OPEN_SOLVER_TAG, fd_m, *buf, requests.back(), nsends); buf->resetWritePos(); @@ -506,25 +621,27 @@ namespace ippl { } // receive - const auto& lDomains1 = layout_mp->getHostLocalDomains(); - int myRank = Ippl::Comm->rank(); + const auto& lDomains2 = layout2_m->getHostLocalDomains(); + int myRank = Ippl::Comm->rank(); for (int i = 0; i < ranks; ++i) { - if(lDomains1[i].touches(ldom2)) { - auto intersection = lDomains1[i].intersect(ldom2); - + if (ldom1.touches(lDomains2[i])) { + auto intersection = ldom1.intersect(lDomains2[i]); + Communicate::size_type nrecvs; nrecvs = intersection.size(); - buffer_type buf = Ippl::Comm->getBuffer(IPPL_SOLVER_RECV + myRank, nrecvs); + buffer_type buf = + Ippl::Comm->getBuffer(IPPL_SOLVER_RECV + myRank, nrecvs); - Ippl::Comm->recv(i, OPEN_SOLVER_TAG, fd_m, *buf, nrecvs * sizeof(double), nrecvs); + Ippl::Comm->recv(i, OPEN_SOLVER_TAG, fd_m, *buf, nrecvs * sizeof(double), + nrecvs); buf->resetReadPos(); - unpack(intersection, view2, fd_m, nghost2, ldom2); + unpack(intersection, view1, fd_m, nghost1, ldom1); } } - + // wait for all messages to be received if (requests.size() > 0) { MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE); @@ -532,121 +649,160 @@ namespace ippl { Ippl::Comm->barrier(); } else { - Kokkos::parallel_for("Write rho on the doubled grid", - mdrange_type({nghost1, nghost1, nghost1}, - {view1.extent(0)-nghost1, view1.extent(1)-nghost1, view1.extent(2)-nghost1}), - KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k) { - - const size_t ig2 = i + ldom2[0].first() - nghost2; - const size_t jg2 = j + ldom2[1].first() - nghost2; - const size_t kg2 = k + ldom2[2].first() - nghost2; - - const size_t ig1 = i + ldom1[0].first() - nghost1; - const size_t jg1 = j + ldom1[1].first() - nghost1; - const size_t kg1 = k + ldom1[2].first() - nghost1; - - // write physical rho on [0,N-1] of doubled field - const bool isQuadrant1 = ((ig1==ig2) && (jg1==jg2) && (kg1==kg2)); - view2(i,j,k) = view1(i,j,k)*isQuadrant1; - }); + Kokkos::parallel_for( + "Write the solution into the LHS on physical grid", + mdrange_type({nghost1, nghost1, nghost1}, + {view1.extent(0) - nghost1, view1.extent(1) - nghost1, + view1.extent(2) - nghost1}), + KOKKOS_LAMBDA(const int i, const int j, const int k) { + const int ig2 = i + ldom2[0].first() - nghost2; + const int jg2 = j + ldom2[1].first() - nghost2; + const int kg2 = k + ldom2[2].first() - nghost2; + + const int ig = i + ldom1[0].first() - nghost1; + const int jg = j + ldom1[1].first() - nghost1; + const int kg = k + ldom1[2].first() - nghost1; + + // take [0,N-1] as physical solution + const bool isQuadrant1 = ((ig == ig2) && (jg == jg2) && (kg == kg2)); + view1(i, j, k) = view2(i, j, k) * isQuadrant1; + }); } + IpplTimings::stopTimer(dtos); + } - IpplTimings::stopTimer(stod); + // if we want gradient of phi = Efield instead of doing grad in Fourier domain + // this is only possible if SOL_AND_GRAD is output type + if (isGradFD_m && (out == Base::SOL_AND_GRAD)) { + *(this->lhs_mp) = -grad(*this->rhs_mp); + } + // if output_type is GRAD or SOL_AND_GRAD, we calculate E-field (gradient in Fourier domain) + if (((out == Base::GRAD) || (out == Base::SOL_AND_GRAD)) && (!isGradFD_m)) { // start a timer - static IpplTimings::TimerRef fftrho = IpplTimings::getTimer("FFT: Rho"); - IpplTimings::startTimer(fftrho); + static IpplTimings::TimerRef efield = IpplTimings::getTimer("Solve: Electric field"); + IpplTimings::startTimer(efield); + + // get E field view (LHS) + auto viewL = this->lhs_mp->getView(); + const int nghostL = this->lhs_mp->getNghost(); + + // get rho2tr_m view (as we want to multiply by ik then transform) + auto viewR = rho2tr_m.getView(); + const int nghostR = rho2tr_m.getNghost(); + const auto& ldomR = layoutComplex_m->getLocalNDIndex(); + + // use temp_m as a temporary complex field + auto view_g = temp_m.getView(); + + // define some constants + const double pi = std::acos(-1.0); + const Kokkos::complex I = {0.0, 1.0}; + + // define some member variables in local scope for the parallel_for + Vector_t hsize = hr_m; + Vector N = nr_m; + + // loop over each component (E = vector field) + for (size_t gd = 0; gd < Dim; ++gd) { + // loop over rho2tr_m to multiply by -ik (gradient in Fourier space) + Kokkos::parallel_for( + "Gradient - E field", + mdrange_type({nghostR, nghostR, nghostR}, + {viewR.extent(0) - nghostR, viewR.extent(1) - nghostR, + viewR.extent(2) - nghostR}), + KOKKOS_LAMBDA(const int i, const int j, const int k) { + // global indices for 2N rhotr_m + const int ig = i + ldomR[0].first() - nghostR; + const int jg = j + ldomR[1].first() - nghostR; + const int kg = k + ldomR[2].first() - nghostR; - // forward FFT of the charge density field on doubled grid - fft_m->transform(+1, rho2_mr, rho2tr_m); + Vector iVec = {ig, jg, kg}; + Vector_t kVec; - IpplTimings::stopTimer(fftrho); - - // call greensFunction to recompute if the mesh spacing has changed - if (green) { - greensFunction(); - } + for (size_t d = 0; d < Dim; ++d) { + const double Len = N[d] * hsize[d]; + const bool shift = (iVec[d] > N[d]); + const bool notMid = (iVec[d] != N[d]); + + kVec[d] = notMid * (pi / Len) * (iVec[d] - shift * 2 * N[d]); + } + + const double Dr = kVec[0] * kVec[0] + kVec[1] * kVec[1] + kVec[2] * kVec[2]; - // multiply FFT(rho2)*FFT(green) - // convolution becomes multiplication in FFT - rho2tr_m = rho2tr_m * grntr_m; - - // if output_type is SOL or SOL_AND_GRAD, we caculate solution - if ((out == Base::SOL) || (out == Base::SOL_AND_GRAD)) { + const bool isNotZero = (Dr != 0.0); + view_g(i, j, k) = -isNotZero * (I * kVec[gd]) * viewR(i, j, k); + }); // start a timer - static IpplTimings::TimerRef fftc = IpplTimings::getTimer("FFT: Convolution"); - IpplTimings::startTimer(fftc); + static IpplTimings::TimerRef ffte = IpplTimings::getTimer("FFT: Efield"); + IpplTimings::startTimer(ffte); - // inverse FFT of the product and store the electrostatic potential in rho2_mr - fft_m->transform(-1, rho2_mr, rho2tr_m); + // transform to get E-field + fft_m->transform(-1, rho2_mr, temp_m); - IpplTimings::stopTimer(fftc); + IpplTimings::stopTimer(ffte); - // Hockney: multiply the rho2_mr field by the total number of points to account for - // double counting (rho and green) of normalization factor in forward transform - // also multiply by the mesh spacing^3 (to account for discretization) - // Vico: need to multiply by normalization factor of 1/4N^3, - // since only backward transform was performed on the 4N grid + // apply proper normalization for (unsigned int i = 0; i < Dim; ++i) { if ((alg_m == "VICO") || (alg_m == "BIHARMONIC")) - rho2_mr = rho2_mr * 2.0 * (1.0/4.0); - else + rho2_mr = rho2_mr * 2.0 * (1.0 / 4.0); + else rho2_mr = rho2_mr * 2.0 * nr_m[i] * hr_m[i]; } - // start a timer - static IpplTimings::TimerRef dtos = IpplTimings::getTimer("Solve: Double to physical"); - IpplTimings::startTimer(dtos); + // start a timer + static IpplTimings::TimerRef edtos = + IpplTimings::getTimer("Efield: double to phys."); + IpplTimings::startTimer(edtos); - // get the physical part only --> physical electrostatic potential is now given in RHS - // need communication if more than one rank - + // restrict to physical grid (N^3) and assign to LHS (E-field) + // communication needed if more than one rank if (ranks > 1) { - // COMMUNICATION - + // send const auto& lDomains1 = layout_mp->getHostLocalDomains(); - std::vector requests(0); for (int i = 0; i < ranks; ++i) { if (lDomains1[i].touches(ldom2)) { - auto intersection = lDomains1[i].intersect(ldom2); - + requests.resize(requests.size() + 1); - + Communicate::size_type nsends; pack(intersection, view2, fd_m, nghost2, ldom2, nsends); - buffer_type buf = Ippl::Comm->getBuffer(IPPL_SOLVER_SEND + i, nsends); + buffer_type buf = + Ippl::Comm->getBuffer(IPPL_SOLVER_SEND + i, nsends); - Ippl::Comm->isend(i, OPEN_SOLVER_TAG, fd_m, *buf, requests.back(), nsends); + Ippl::Comm->isend(i, OPEN_SOLVER_TAG, fd_m, *buf, requests.back(), + nsends); buf->resetWritePos(); } } // receive const auto& lDomains2 = layout2_m->getHostLocalDomains(); - int myRank = Ippl::Comm->rank(); + int myRank = Ippl::Comm->rank(); for (int i = 0; i < ranks; ++i) { - if(ldom1.touches(lDomains2[i])) { - + if (ldom1.touches(lDomains2[i])) { auto intersection = ldom1.intersect(lDomains2[i]); - + Communicate::size_type nrecvs; nrecvs = intersection.size(); - buffer_type buf = Ippl::Comm->getBuffer(IPPL_SOLVER_RECV + myRank, nrecvs); + buffer_type buf = + Ippl::Comm->getBuffer(IPPL_SOLVER_RECV + myRank, nrecvs); - Ippl::Comm->recv(i, OPEN_SOLVER_TAG, fd_m, *buf, nrecvs * sizeof(double), nrecvs); + Ippl::Comm->recv(i, OPEN_SOLVER_TAG, fd_m, *buf, + nrecvs * sizeof(double), nrecvs); buf->resetReadPos(); - unpack(intersection, view1, fd_m, nghost1, ldom1); - } + unpack(intersection, viewL, gd, fd_m, nghostL, ldom1); + } } // wait for all messages to be received @@ -656,969 +812,816 @@ namespace ippl { Ippl::Comm->barrier(); } else { - Kokkos::parallel_for("Write the solution into the LHS on physical grid", - mdrange_type({nghost1, nghost1, nghost1}, - {view1.extent(0)-nghost1, view1.extent(1)-nghost1, view1.extent(2)-nghost1}), + Kokkos::parallel_for( + "Write the E-field on physical grid", + mdrange_type({nghostL, nghostL, nghostL}, + {viewL.extent(0) - nghostL, viewL.extent(1) - nghostL, + viewL.extent(2) - nghostL}), KOKKOS_LAMBDA(const int i, const int j, const int k) { - const int ig2 = i + ldom2[0].first() - nghost2; const int jg2 = j + ldom2[1].first() - nghost2; const int kg2 = k + ldom2[2].first() - nghost2; - const int ig = i + ldom1[0].first() - nghost1; - const int jg = j + ldom1[1].first() - nghost1; - const int kg = k + ldom1[2].first() - nghost1; + const int ig = i + ldom1[0].first() - nghostL; + const int jg = j + ldom1[1].first() - nghostL; + const int kg = k + ldom1[2].first() - nghostL; // take [0,N-1] as physical solution - const bool isQuadrant1 = ((ig==ig2) && (jg==jg2) && (kg==kg2)); - view1(i,j,k) = view2(i,j,k)*isQuadrant1; - }); + const bool isQuadrant1 = ((ig == ig2) && (jg == jg2) && (kg == kg2)); + viewL(i, j, k)[gd] = view2(i, j, k) * isQuadrant1; + }); } - IpplTimings::stopTimer(dtos); + IpplTimings::stopTimer(edtos); + } + IpplTimings::stopTimer(efield); + } + IpplTimings::stopTimer(solve); + }; + + //////////////////////////////////////////////////////////////////////// + // calculate FFT of the Green's function + + template + void FFTPoissonSolver::greensFunction() { + const double pi = std::acos(-1.0); + grn_mr = 0.0; + + if ((alg_m == "VICO") || (alg_m == "BIHARMONIC")) { + Vector_t l(hr_m * nr_m); + Vector_t hs_m; + double L_sum(0.0); + + // compute length of the physical domain + // compute Fourier domain spacing + for (unsigned int i = 0; i < Dim; ++i) { + hs_m[i] = pi * 0.5 / l[i]; + L_sum = L_sum + l[i] * l[i]; } - // if we want gradient of phi = Efield instead of doing grad in Fourier domain - // this is only possible if SOL_AND_GRAD is output type - if (isGradFD_m && (out == Base::SOL_AND_GRAD)) { - *(this->lhs_mp) = -grad(*this->rhs_mp); + // define the origin of the 4N grid + Vector_t origin; + + for (unsigned int i = 0; i < Dim; ++i) { + origin[i] = -2 * nr_m[i] * pi / l[i]; } - //if output_type is GRAD or SOL_AND_GRAD, we calculate E-field (gradient in Fourier domain) - if (((out == Base::GRAD) || (out == Base::SOL_AND_GRAD)) && (!isGradFD_m)) { + // set mesh for the 4N mesh + mesh4_m->setMeshSpacing(hs_m); - // start a timer - static IpplTimings::TimerRef efield = IpplTimings::getTimer("Solve: Electric field"); - IpplTimings::startTimer(efield); - - // get E field view (LHS) - auto viewL = this->lhs_mp->getView(); - const int nghostL = this->lhs_mp->getNghost(); - - // get rho2tr_m view (as we want to multiply by ik then transform) - auto viewR = rho2tr_m.getView(); - const int nghostR = rho2tr_m.getNghost(); - const auto& ldomR = layoutComplex_m->getLocalNDIndex(); - - // use temp_m as a temporary complex field - auto view_g = temp_m.getView(); - - // define some constants - const double pi = std::acos(-1.0); - const Kokkos::complex I = {0.0, 1.0}; - - // define some member variables in local scope for the parallel_for - Vector_t hsize = hr_m; - Vector N = nr_m; - - // loop over each component (E = vector field) - for (size_t gd = 0; gd < Dim; ++gd) { - - // loop over rho2tr_m to multiply by -ik (gradient in Fourier space) - Kokkos::parallel_for("Gradient - E field", - mdrange_type({nghostR, nghostR, nghostR}, - {viewR.extent(0)-nghostR, viewR.extent(1)-nghostR, viewR.extent(2)-nghostR}), - KOKKOS_LAMBDA(const int i, const int j, const int k) { + // size of truncation window + L_sum = std::sqrt(L_sum); + L_sum = 1.1 * L_sum; + + // initialize grnL_m + typename CxField_t::view_type view_g = grnL_m.getView(); + const int nghost_g = grnL_m.getNghost(); + const auto& ldom_g = layout4_m->getLocalNDIndex(); + + Vector size = nr_m; + + // Kokkos parallel for loop to assign analytic grnL_m + using mdrange_type = Kokkos::MDRangePolicy>; + + if (alg_m == "VICO") { + Kokkos::parallel_for( + "Initialize Green's function ", + mdrange_type({nghost_g, nghost_g, nghost_g}, + {view_g.extent(0) - nghost_g, view_g.extent(1) - nghost_g, + view_g.extent(2) - nghost_g}), + KOKKOS_LAMBDA(const int i, const int j, const int k) { + // go from local indices to global + const int ig = i + ldom_g[0].first() - nghost_g; + const int jg = j + ldom_g[1].first() - nghost_g; + const int kg = k + ldom_g[2].first() - nghost_g; - // global indices for 2N rhotr_m - const int ig = i + ldomR[0].first() - nghostR; - const int jg = j + ldomR[1].first() - nghostR; - const int kg = k + ldomR[2].first() - nghostR; + bool isOutside = (ig > 2 * size[0] - 1); + const double t = ig * hs_m[0] + isOutside * origin[0]; - Vector iVec = {ig, jg, kg}; - Vector_t kVec; + isOutside = (jg > 2 * size[1] - 1); + const double u = jg * hs_m[1] + isOutside * origin[1]; - for(size_t d = 0; d < Dim; ++d) { + isOutside = (kg > 2 * size[2] - 1); + const double v = kg * hs_m[2] + isOutside * origin[2]; - const double Len = N[d]*hsize[d]; - const bool shift = (iVec[d] > N[d]); - const bool notMid = (iVec[d] != N[d]); + double s = (t * t) + (u * u) + (v * v); + s = std::sqrt(s); - kVec[d] = notMid * (pi / Len) * (iVec[d] - shift*2*N[d]); - } + // assign the green's function value + // if (0,0,0), assign L^2/2 (analytical limit of sinc) - const double Dr = kVec[0] * kVec[0] + kVec[1] * kVec[1] + kVec[2] * kVec[2]; + const bool isOrig = ((ig == 0 && jg == 0 && kg == 0)); + const double analyticLim = -L_sum * L_sum * 0.5; + const double value = -2.0 * (std::sin(0.5 * L_sum * s) / (s + isOrig * 1.0)) + * (std::sin(0.5 * L_sum * s) / (s + isOrig * 1.0)); - const bool isNotZero = (Dr != 0.0); - view_g(i,j,k) = - isNotZero * (I * kVec[gd])*viewR(i,j,k); + view_g(i, j, k) = (!isOrig) * value + isOrig * analyticLim; }); - // start a timer - static IpplTimings::TimerRef ffte = IpplTimings::getTimer("FFT: Efield"); - IpplTimings::startTimer(ffte); - - // transform to get E-field - fft_m->transform(-1, rho2_mr, temp_m); - - IpplTimings::stopTimer(ffte); - - // apply proper normalization - for (unsigned int i = 0; i < Dim; ++i) { - if ((alg_m == "VICO") || (alg_m == "BIHARMONIC")) - rho2_mr = rho2_mr * 2.0 * (1.0/4.0) ; - else - rho2_mr = rho2_mr * 2.0 * nr_m[i] * hr_m[i]; - } + } else if (alg_m == "BIHARMONIC") { + Kokkos::parallel_for( + "Initialize Green's function ", + mdrange_type({nghost_g, nghost_g, nghost_g}, + {view_g.extent(0) - nghost_g, view_g.extent(1) - nghost_g, + view_g.extent(2) - nghost_g}), + KOKKOS_LAMBDA(const int i, const int j, const int k) { + // go from local indices to global + const int ig = i + ldom_g[0].first() - nghost_g; + const int jg = j + ldom_g[1].first() - nghost_g; + const int kg = k + ldom_g[2].first() - nghost_g; - // start a timer - static IpplTimings::TimerRef edtos = IpplTimings::getTimer("Efield: double to phys."); - IpplTimings::startTimer(edtos); + bool isOutside = (ig > 2 * size[0] - 1); + const double t = ig * hs_m[0] + isOutside * origin[0]; - // restrict to physical grid (N^3) and assign to LHS (E-field) - // communication needed if more than one rank - if (ranks > 1) { + isOutside = (jg > 2 * size[1] - 1); + const double u = jg * hs_m[1] + isOutside * origin[1]; - // COMMUNICATION - - // send - const auto& lDomains1 = layout_mp->getHostLocalDomains(); - std::vector requests(0); + isOutside = (kg > 2 * size[2] - 1); + const double v = kg * hs_m[2] + isOutside * origin[2]; - for (int i = 0; i < ranks; ++i) { - if (lDomains1[i].touches(ldom2)) { + double s = (t * t) + (u * u) + (v * v); + s = std::sqrt(s); - auto intersection = lDomains1[i].intersect(ldom2); - - requests.resize(requests.size() + 1); - - Communicate::size_type nsends; - pack(intersection, view2, fd_m, nghost2, ldom2, nsends); + // assign value and replace with analytic limit at origin (0,0,0) - buffer_type buf = Ippl::Comm->getBuffer(IPPL_SOLVER_SEND + i, nsends); + const bool isOrig = ((ig == 0 && jg == 0 && kg == 0)); + const double analyticLim = -L_sum * L_sum * L_sum * L_sum / 8.0; + const double value = -((2 - (L_sum * L_sum * s * s)) * std::cos(L_sum * s) + + 2 * L_sum * s * std::sin(L_sum * s) - 2) + / (2 * s * s * s * s + isOrig * 1.0); - Ippl::Comm->isend(i, OPEN_SOLVER_TAG, fd_m, *buf, requests.back(), nsends); - buf->resetWritePos(); - } - } + view_g(i, j, k) = (!isOrig) * value + isOrig * analyticLim; + }); + } - // receive - const auto& lDomains2 = layout2_m->getHostLocalDomains(); - int myRank = Ippl::Comm->rank(); + // start a timer + static IpplTimings::TimerRef fft4 = IpplTimings::getTimer("FFT: Precomputation"); + IpplTimings::startTimer(fft4); - for (int i = 0; i < ranks; ++i) { - if(ldom1.touches(lDomains2[i])) { + // inverse Fourier transform of the green's function for precomputation + fft4n_m->transform(-1, grnL_m); - auto intersection = ldom1.intersect(lDomains2[i]); - - Communicate::size_type nrecvs; - nrecvs = intersection.size(); + IpplTimings::stopTimer(fft4); - buffer_type buf = Ippl::Comm->getBuffer(IPPL_SOLVER_RECV + myRank, nrecvs); + // Restrict transformed grnL_m to 2N domain after precomputation step - Ippl::Comm->recv(i, OPEN_SOLVER_TAG, fd_m, *buf, nrecvs * sizeof(double), nrecvs); - buf->resetReadPos(); + // get the field data first + typename Field_t::view_type view = grn_mr.getView(); + const int nghost = grn_mr.getNghost(); + const auto& ldom = layout2_m->getLocalNDIndex(); - unpack(intersection, viewL, gd, fd_m, nghostL, ldom1); - } - } + // start a timer + static IpplTimings::TimerRef ifftshift = IpplTimings::getTimer("Vico shift loop"); + IpplTimings::startTimer(ifftshift); + + // get number of ranks to see if need communication + const int ranks = Ippl::Comm->size(); + + if (ranks > 1) { + communicateVico(size, view_g, ldom_g, nghost_g, view, ldom, nghost); + } else { + // restrict the green's function to a (2N)^3 grid from the (4N)^3 grid + Kokkos::parallel_for( + "Restrict domain of Green's function from 4N to 2N", + mdrange_type({nghost, nghost, nghost}, {view.extent(0) - nghost - size[0], + view.extent(1) - nghost - size[1], + view.extent(2) - nghost - size[2]}), + KOKKOS_LAMBDA(const int i, const int j, const int k) { + // go from local indices to global + const int ig = i + ldom[0].first() - nghost; + const int jg = j + ldom[1].first() - nghost; + const int kg = k + ldom[2].first() - nghost; - // wait for all messages to be received - if (requests.size() > 0) { - MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE); + const int ig2 = i + ldom_g[0].first() - nghost_g; + const int jg2 = j + ldom_g[1].first() - nghost_g; + const int kg2 = k + ldom_g[2].first() - nghost_g; + + if ((ig == ig2) && (jg == jg2) && (kg == kg2)) { + view(i, j, k) = real(view_g(i, j, k)); } - Ippl::Comm->barrier(); - - } else { - Kokkos::parallel_for("Write the E-field on physical grid", - mdrange_type({nghostL, nghostL, nghostL}, - {viewL.extent(0)-nghostL, viewL.extent(1)-nghostL, viewL.extent(2)-nghostL}), - KOKKOS_LAMBDA(const int i, const int j, const int k) { - const int ig2 = i + ldom2[0].first() - nghost2; - const int jg2 = j + ldom2[1].first() - nghost2; - const int kg2 = k + ldom2[2].first() - nghost2; - - const int ig = i + ldom1[0].first() - nghostL; - const int jg = j + ldom1[1].first() - nghostL; - const int kg = k + ldom1[2].first() - nghostL; - - // take [0,N-1] as physical solution - const bool isQuadrant1 = ((ig==ig2) && (jg==jg2) && (kg==kg2)); - viewL(i,j,k)[gd] = view2(i,j,k)*isQuadrant1; - }); - } - IpplTimings::stopTimer(edtos); - } - IpplTimings::stopTimer(efield); + // Now fill the rest of the field + const int s = 2 * size[0] - ig - 1 - ldom_g[0].first() + nghost_g; + const int p = 2 * size[1] - jg - 1 - ldom_g[1].first() + nghost_g; + const int q = 2 * size[2] - kg - 1 - ldom_g[2].first() + nghost_g; + + view(s, j, k) = real(view_g(i + 1, j, k)); + view(i, p, k) = real(view_g(i, j + 1, k)); + view(i, j, q) = real(view_g(i, j, k + 1)); + view(s, j, q) = real(view_g(i + 1, j, k + 1)); + view(s, p, k) = real(view_g(i + 1, j + 1, k)); + view(i, p, q) = real(view_g(i, j + 1, k + 1)); + view(s, p, q) = real(view_g(i + 1, j + 1, k + 1)); + }); } - IpplTimings::stopTimer(solve); - }; + IpplTimings::stopTimer(ifftshift); + } else { + // Hockney case - //////////////////////////////////////////////////////////////////////// - // calculate FFT of the Green's function + // calculate square of the mesh spacing for each dimension + Vector_t hrsq(hr_m * hr_m); - template - void - FFTPoissonSolver::greensFunction() { + // use the grnIField_m helper field to compute Green's function + for (unsigned int i = 0; i < Dim; ++i) { + grn_mr = grn_mr + grnIField_m[i] * hrsq[i]; + } - const double pi = std::acos(-1.0); - grn_mr = 0.0; + grn_mr = -1.0 / (4.0 * pi * sqrt(grn_mr)); - if ((alg_m == "VICO") || (alg_m == "BIHARMONIC")) { - - Vector_t l(hr_m * nr_m); - Vector_t hs_m; - double L_sum (0.0); + typename Field_t::view_type view = grn_mr.getView(); + const int nghost = grn_mr.getNghost(); + const auto& ldom = layout2_m->getLocalNDIndex(); - // compute length of the physical domain - // compute Fourier domain spacing - for (unsigned int i=0; i < Dim; ++i) { - hs_m[i] = pi * 0.5 / l[i]; - L_sum = L_sum + l[i]*l[i]; - } - - // define the origin of the 4N grid - Vector_t origin; - - for (unsigned int i = 0; i < Dim; ++i) { - origin[i] = -2*nr_m[i]*pi/l[i]; - } + // Kokkos parallel for loop to find (0,0,0) point and regularize + using mdrange_type = Kokkos::MDRangePolicy>; + Kokkos::parallel_for( + "Regularize Green's function ", + mdrange_type( + {nghost, nghost, nghost}, + {view.extent(0) - nghost, view.extent(1) - nghost, view.extent(2) - nghost}), + KOKKOS_LAMBDA(const int i, const int j, const int k) { + // go from local indices to global + const int ig = i + ldom[0].first() - nghost; + const int jg = j + ldom[1].first() - nghost; + const int kg = k + ldom[2].first() - nghost; + + // if (0,0,0), assign to it 1/(4*pi) + const bool isOrig = (ig == 0 && jg == 0 && kg == 0); + view(i, j, k) = isOrig * (-1.0 / (4.0 * pi)) + (!isOrig) * view(i, j, k); + }); + } - // set mesh for the 4N mesh - mesh4_m->setMeshSpacing(hs_m); - - // size of truncation window - L_sum = std::sqrt(L_sum); - L_sum = 1.1 * L_sum; + // start a timer + static IpplTimings::TimerRef fftg = IpplTimings::getTimer("FFT: Green"); + IpplTimings::startTimer(fftg); - // initialize grnL_m - typename CxField_t::view_type view_g = grnL_m.getView(); - const int nghost_g = grnL_m.getNghost(); - const auto& ldom_g = layout4_m->getLocalNDIndex(); + // perform the FFT of the Green's function for the convolution + fft_m->transform(+1, grn_mr, grntr_m); - Vector size = nr_m; + IpplTimings::stopTimer(fftg); + }; - // Kokkos parallel for loop to assign analytic grnL_m - using mdrange_type = Kokkos::MDRangePolicy>; + template + void FFTPoissonSolver::communicateVico( + Vector size, typename CxField_t::view_type view_g, + const ippl::NDIndex ldom_g, const int nghost_g, typename Field_t::view_type view, + const ippl::NDIndex ldom, const int nghost) { + const auto& lDomains2 = layout2_m->getHostLocalDomains(); + const auto& lDomains4 = layout4_m->getHostLocalDomains(); - if (alg_m == "VICO") { - Kokkos::parallel_for("Initialize Green's function ", - mdrange_type({nghost_g, nghost_g, nghost_g}, - {view_g.extent(0)-nghost_g, view_g.extent(1)-nghost_g, view_g.extent(2)-nghost_g}), - KOKKOS_LAMBDA(const int i, const int j, const int k) { - - // go from local indices to global - const int ig = i + ldom_g[0].first() - nghost_g; - const int jg = j + ldom_g[1].first() - nghost_g; - const int kg = k + ldom_g[2].first() - nghost_g; + std::vector requests(0); + const int myRank = Ippl::Comm->rank(); + const int ranks = Ippl::Comm->size(); - bool isOutside = (ig > 2*size[0]-1); - const double t = ig*hs_m[0] + isOutside*origin[0]; + // 1st step: Define 8 domains corresponding to the different quadrants + ippl::NDIndex none; + for (unsigned i = 0; i < Dim; i++) { + none[i] = ippl::Index(size[i]); + } - isOutside = (jg > 2*size[1]-1); - const double u = jg*hs_m[1] + isOutside*origin[1]; + ippl::NDIndex x; + x[0] = ippl::Index(size[0], 2 * size[0] - 1); + x[1] = ippl::Index(size[1]); + x[2] = ippl::Index(size[2]); + + ippl::NDIndex y; + y[0] = ippl::Index(size[0]); + y[1] = ippl::Index(size[1], 2 * size[1] - 1); + y[2] = ippl::Index(size[2]); + + ippl::NDIndex z; + z[0] = ippl::Index(size[0]); + z[1] = ippl::Index(size[1]); + z[2] = ippl::Index(size[2], 2 * size[2] - 1); + + ippl::NDIndex xy; + xy[0] = ippl::Index(size[0], 2 * size[0] - 1); + xy[1] = ippl::Index(size[1], 2 * size[1] - 1); + xy[2] = ippl::Index(size[2]); + + ippl::NDIndex xz; + xz[0] = ippl::Index(size[0], 2 * size[0] - 1); + xz[1] = ippl::Index(size[1]); + xz[2] = ippl::Index(size[2], 2 * size[2] - 1); + + ippl::NDIndex yz; + yz[0] = ippl::Index(size[0]); + yz[1] = ippl::Index(size[1], 2 * size[1] - 1); + yz[2] = ippl::Index(size[2], 2 * size[2] - 1); + + ippl::NDIndex xyz; + for (unsigned i = 0; i < Dim; i++) { + xyz[i] = ippl::Index(size[i], 2 * size[i] - 1); + } - isOutside = (kg > 2*size[2]-1); - const double v = kg*hs_m[2] + isOutside*origin[2]; + // 2nd step: send + for (int i = 0; i < ranks; ++i) { + auto domain2 = lDomains2[i]; - double s = (t*t) + (u*u) + (v*v); - s = std::sqrt(s); + if (domain2.touches(none)) { + auto intersection = domain2.intersect(none); - // assign the green's function value - // if (0,0,0), assign L^2/2 (analytical limit of sinc) + if (ldom_g.touches(intersection)) { + intersection = intersection.intersect(ldom_g); + requests.resize(requests.size() + 1); - const bool isOrig = ((ig == 0 && jg == 0 && kg == 0)); - const double analyticLim = -L_sum * L_sum * 0.5; - const double value = -2.0 * (std::sin(0.5 * L_sum * s) / (s + isOrig*1.0)) - * (std::sin(0.5 * L_sum * s) / (s + isOrig*1.0)); + Communicate::size_type nsends; + pack(intersection, view_g, fd_m, nghost_g, ldom_g, nsends); - view_g(i,j,k) = (!isOrig) * value + isOrig * analyticLim; - }); + buffer_type buf = Ippl::Comm->getBuffer(IPPL_VICO_SEND + i, nsends); - } else if (alg_m == "BIHARMONIC") { + int tag = VICO_SOLVER_TAG; - Kokkos::parallel_for("Initialize Green's function ", - mdrange_type({nghost_g, nghost_g, nghost_g}, - {view_g.extent(0)-nghost_g, view_g.extent(1)-nghost_g, view_g.extent(2)-nghost_g}), - KOKKOS_LAMBDA(const int i, const int j, const int k) { - - // go from local indices to global - const int ig = i + ldom_g[0].first() - nghost_g; - const int jg = j + ldom_g[1].first() - nghost_g; - const int kg = k + ldom_g[2].first() - nghost_g; + Ippl::Comm->isend(i, tag, fd_m, *buf, requests.back(), nsends); + buf->resetWritePos(); + } + } - bool isOutside = (ig > 2*size[0]-1); - const double t = ig*hs_m[0] + isOutside*origin[0]; + if (domain2.touches(x)) { + auto intersection = domain2.intersect(x); + auto xdom = ippl::Index((2 * size[0] - intersection[0].first()), + (2 * size[0] - intersection[0].last()), -1); - isOutside = (jg > 2*size[1]-1); - const double u = jg*hs_m[1] + isOutside*origin[1]; + ippl::NDIndex domain4; + domain4[0] = xdom; + domain4[1] = intersection[1]; + domain4[2] = intersection[2]; - isOutside = (kg > 2*size[2]-1); - const double v = kg*hs_m[2] + isOutside*origin[2]; + if (ldom_g.touches(domain4)) { + intersection = ldom_g.intersect(domain4); - double s = (t*t) + (u*u) + (v*v); - s = std::sqrt(s); + requests.resize(requests.size() + 1); - // assign value and replace with analytic limit at origin (0,0,0) + Communicate::size_type nsends; + pack(intersection, view_g, fd_m, nghost_g, ldom_g, nsends); - const bool isOrig = ((ig == 0 && jg == 0 && kg == 0)); - const double analyticLim = -L_sum * L_sum * L_sum * L_sum / 8.0; - const double value = -((2-(L_sum*L_sum*s*s))*std::cos(L_sum*s) - + 2*L_sum*s*std::sin(L_sum*s) - 2)/(2*s*s*s*s + isOrig * 1.0); + buffer_type buf = Ippl::Comm->getBuffer(IPPL_VICO_SEND + 8 + i, nsends); - view_g(i,j,k) = (!isOrig) * value + isOrig * analyticLim; - }); + int tag = VICO_SOLVER_TAG + 1; + Ippl::Comm->isend(i, tag, fd_m, *buf, requests.back(), nsends); + buf->resetWritePos(); } + } - // start a timer - static IpplTimings::TimerRef fft4 = IpplTimings::getTimer("FFT: Precomputation"); - IpplTimings::startTimer(fft4); + if (domain2.touches(y)) { + auto intersection = domain2.intersect(y); + auto ydom = ippl::Index((2 * size[1] - intersection[1].first()), + (2 * size[1] - intersection[1].last()), -1); - // inverse Fourier transform of the green's function for precomputation - fft4n_m->transform(-1, grnL_m); + ippl::NDIndex domain4; + domain4[0] = intersection[0]; + domain4[1] = ydom; + domain4[2] = intersection[2]; - IpplTimings::stopTimer(fft4); + if (ldom_g.touches(domain4)) { + intersection = ldom_g.intersect(domain4); - // Restrict transformed grnL_m to 2N domain after precomputation step + requests.resize(requests.size() + 1); - // get the field data first - typename Field_t::view_type view = grn_mr.getView(); - const int nghost = grn_mr.getNghost(); - const auto& ldom = layout2_m->getLocalNDIndex(); + Communicate::size_type nsends; + pack(intersection, view_g, fd_m, nghost_g, ldom_g, nsends); - // start a timer - static IpplTimings::TimerRef ifftshift = IpplTimings::getTimer("Vico shift loop"); - IpplTimings::startTimer(ifftshift); + buffer_type buf = + Ippl::Comm->getBuffer(IPPL_VICO_SEND + 2 * 8 + i, nsends); - // get number of ranks to see if need communication - const int ranks = Ippl::Comm->size(); + int tag = VICO_SOLVER_TAG + 2; - if (ranks > 1) { - communicateVico(size, view_g, ldom_g, nghost_g, view, ldom, nghost); - } else { + Ippl::Comm->isend(i, tag, fd_m, *buf, requests.back(), nsends); + buf->resetWritePos(); + } + } - // restrict the green's function to a (2N)^3 grid from the (4N)^3 grid - Kokkos::parallel_for("Restrict domain of Green's function from 4N to 2N", - mdrange_type({nghost, nghost, nghost}, - {view.extent(0)-nghost-size[0], view.extent(1)-nghost-size[1], view.extent(2)-nghost-size[2]}), - KOKKOS_LAMBDA(const int i, const int j, const int k) { + if (domain2.touches(z)) { + auto intersection = domain2.intersect(z); + auto zdom = ippl::Index((2 * size[2] - intersection[2].first()), + (2 * size[2] - intersection[2].last()), -1); - // go from local indices to global - const int ig = i + ldom[0].first() - nghost; - const int jg = j + ldom[1].first() - nghost; - const int kg = k + ldom[2].first() - nghost; - - const int ig2 = i + ldom_g[0].first() - nghost_g; - const int jg2 = j + ldom_g[1].first() - nghost_g; - const int kg2 = k + ldom_g[2].first() - nghost_g; - - if ((ig==ig2) && (jg==jg2) && (kg==kg2)) { - view(i,j,k) = real(view_g(i,j,k)); - } - - // Now fill the rest of the field - const int s = 2*size[0] - ig - 1 - ldom_g[0].first() + nghost_g; - const int p = 2*size[1] - jg - 1 - ldom_g[1].first() + nghost_g; - const int q = 2*size[2] - kg - 1 - ldom_g[2].first() + nghost_g; - - view(s, j, k) = real(view_g(i+1,j,k)); - view(i, p, k) = real(view_g(i,j+1,k)); - view(i, j, q) = real(view_g(i,j,k+1)); - view(s, j, q) = real(view_g(i+1,j,k+1)); - view(s, p, k) = real(view_g(i+1,j+1,k)); - view(i, p, q) = real(view_g(i,j+1,k+1)); - view(s, p, q) = real(view_g(i+1,j+1,k+1)); - }); - } - IpplTimings::stopTimer(ifftshift); + ippl::NDIndex domain4; + domain4[0] = intersection[0]; + domain4[1] = intersection[1]; + domain4[2] = zdom; - } else { - // Hockney case + if (ldom_g.touches(domain4)) { + intersection = ldom_g.intersect(domain4); - // calculate square of the mesh spacing for each dimension - Vector_t hrsq(hr_m * hr_m); - - // use the grnIField_m helper field to compute Green's function - for (unsigned int i = 0; i < Dim; ++i) { - grn_mr = grn_mr + grnIField_m[i] * hrsq [i]; - } + requests.resize(requests.size() + 1); - grn_mr = -1.0/(4.0 * pi * sqrt(grn_mr)); + Communicate::size_type nsends; + pack(intersection, view_g, fd_m, nghost_g, ldom_g, nsends); - typename Field_t::view_type view = grn_mr.getView(); - const int nghost = grn_mr.getNghost(); - const auto& ldom = layout2_m->getLocalNDIndex(); + buffer_type buf = + Ippl::Comm->getBuffer(IPPL_VICO_SEND + 3 * 8 + i, nsends); - // Kokkos parallel for loop to find (0,0,0) point and regularize - using mdrange_type = Kokkos::MDRangePolicy>; - Kokkos::parallel_for("Regularize Green's function ", - mdrange_type({nghost, nghost, nghost}, - {view.extent(0)-nghost, view.extent(1)-nghost, view.extent(2)-nghost}), - KOKKOS_LAMBDA(const int i, const int j, const int k) { - - // go from local indices to global - const int ig = i + ldom[0].first() - nghost; - const int jg = j + ldom[1].first() - nghost; - const int kg = k + ldom[2].first() - nghost; - - // if (0,0,0), assign to it 1/(4*pi) - const bool isOrig = (ig == 0 && jg == 0 && kg == 0); - view(i,j,k) = isOrig * (-1.0/(4.0 * pi)) + (!isOrig) * view(i,j,k); - }); + int tag = VICO_SOLVER_TAG + 3; + + Ippl::Comm->isend(i, tag, fd_m, *buf, requests.back(), nsends); + buf->resetWritePos(); + } } - // start a timer - static IpplTimings::TimerRef fftg = IpplTimings::getTimer("FFT: Green"); - IpplTimings::startTimer(fftg); - - // perform the FFT of the Green's function for the convolution - fft_m->transform(+1, grn_mr, grntr_m); - - IpplTimings::stopTimer(fftg); - - }; - - template - void - FFTPoissonSolver::communicateVico(Vector size, - typename CxField_t::view_type view_g, const ippl::NDIndex ldom_g, - const int nghost_g, typename Field_t::view_type view, const ippl::NDIndex ldom, - const int nghost) { + if (domain2.touches(xy)) { + auto intersection = domain2.intersect(xy); + auto xdom = ippl::Index((2 * size[0] - intersection[0].first()), + (2 * size[0] - intersection[0].last()), -1); + auto ydom = ippl::Index((2 * size[1] - intersection[1].first()), + (2 * size[1] - intersection[1].last()), -1); - const auto& lDomains2 = layout2_m->getHostLocalDomains(); - const auto& lDomains4 = layout4_m->getHostLocalDomains(); + ippl::NDIndex domain4; + domain4[0] = xdom; + domain4[1] = ydom; + domain4[2] = intersection[2]; - std::vector requests(0); - const int myRank = Ippl::Comm->rank(); - const int ranks = Ippl::Comm->size(); + if (ldom_g.touches(domain4)) { + intersection = ldom_g.intersect(domain4); - // 1st step: Define 8 domains corresponding to the different quadrants - ippl::NDIndex none; - for (unsigned i = 0; i< Dim; i++) { - none[i] = ippl::Index(size[i]); - } - - ippl::NDIndex x; - x[0] = ippl::Index(size[0], 2*size[0]-1); - x[1] = ippl::Index(size[1]); - x[2] = ippl::Index(size[2]); - - ippl::NDIndex y; - y[0] = ippl::Index(size[0]); - y[1] = ippl::Index(size[1], 2*size[1]-1); - y[2] = ippl::Index(size[2]); - - ippl::NDIndex z; - z[0] = ippl::Index(size[0]); - z[1] = ippl::Index(size[1]); - z[2] = ippl::Index(size[2], 2*size[2]-1); - - ippl::NDIndex xy; - xy[0] = ippl::Index(size[0], 2*size[0]-1); - xy[1] = ippl::Index(size[1], 2*size[1]-1); - xy[2] = ippl::Index(size[2]); - - ippl::NDIndex xz; - xz[0] = ippl::Index(size[0], 2*size[0]-1); - xz[1] = ippl::Index(size[1]); - xz[2] = ippl::Index(size[2], 2*size[2]-1); - - ippl::NDIndex yz; - yz[0] = ippl::Index(size[0]); - yz[1] = ippl::Index(size[1], 2*size[1]-1); - yz[2] = ippl::Index(size[2], 2*size[2]-1); - - ippl::NDIndex xyz; - for (unsigned i = 0; i< Dim; i++) { - xyz[i] = ippl::Index(size[i], 2*size[i]-1); - } + requests.resize(requests.size() + 1); - // 2nd step: send - for (int i = 0; i < ranks; ++i) { - auto domain2 = lDomains2[i]; - - if (domain2.touches(none)) { - auto intersection = domain2.intersect(none); - - if (ldom_g.touches(intersection)) { - intersection = intersection.intersect(ldom_g); - requests.resize(requests.size() + 1); - - Communicate::size_type nsends; - pack(intersection, view_g, fd_m, nghost_g, ldom_g, nsends); - - buffer_type buf = Ippl::Comm->getBuffer(IPPL_VICO_SEND+i, nsends); - - int tag = VICO_SOLVER_TAG; - - Ippl::Comm->isend(i, tag, fd_m, *buf, requests.back(), nsends); - buf->resetWritePos(); - } - } - - if (domain2.touches(x)) { - auto intersection = domain2.intersect(x); - auto xdom = ippl::Index((2*size[0] - intersection[0].first()), - (2*size[0]- intersection[0].last()), -1); - - ippl::NDIndex domain4; - domain4[0] = xdom; - domain4[1] = intersection[1]; - domain4[2] = intersection[2]; - - if (ldom_g.touches(domain4)) { - intersection = ldom_g.intersect(domain4); - - requests.resize(requests.size() + 1); - - Communicate::size_type nsends; - pack(intersection, view_g, fd_m, nghost_g, ldom_g, nsends); - - buffer_type buf = Ippl::Comm->getBuffer(IPPL_VICO_SEND+8+i, nsends); - - int tag = VICO_SOLVER_TAG + 1; - - Ippl::Comm->isend(i, tag, fd_m, *buf, requests.back(), nsends); - buf->resetWritePos(); - } - } - - if (domain2.touches(y)) { - auto intersection = domain2.intersect(y); - auto ydom = ippl::Index((2*size[1] - intersection[1].first()), - (2*size[1]- intersection[1].last()), -1); - - ippl::NDIndex domain4; - domain4[0] = intersection[0]; - domain4[1] = ydom; - domain4[2] = intersection[2]; - - if (ldom_g.touches(domain4)) { - intersection = ldom_g.intersect(domain4); - - requests.resize(requests.size() + 1); - - Communicate::size_type nsends; - pack(intersection, view_g, fd_m, nghost_g, ldom_g, nsends); - - buffer_type buf = Ippl::Comm->getBuffer(IPPL_VICO_SEND+2*8+i, nsends); - - int tag = VICO_SOLVER_TAG + 2; - - Ippl::Comm->isend(i, tag, fd_m, *buf, requests.back(), nsends); - buf->resetWritePos(); - } - } - - if (domain2.touches(z)) { - auto intersection = domain2.intersect(z); - auto zdom = ippl::Index((2*size[2] - intersection[2].first()), - (2*size[2]- intersection[2].last()), -1); - - ippl::NDIndex domain4; - domain4[0] = intersection[0]; - domain4[1] = intersection[1]; - domain4[2] = zdom; - - if (ldom_g.touches(domain4)) { - intersection = ldom_g.intersect(domain4); - - requests.resize(requests.size() + 1); - - Communicate::size_type nsends; - pack(intersection, view_g, fd_m, nghost_g, ldom_g, nsends); - - buffer_type buf = Ippl::Comm->getBuffer(IPPL_VICO_SEND+3*8+i, nsends); - - int tag = VICO_SOLVER_TAG + 3; - - Ippl::Comm->isend(i, tag, fd_m, *buf, requests.back(), nsends); - buf->resetWritePos(); - } - } - - if (domain2.touches(xy)) { - auto intersection = domain2.intersect(xy); - auto xdom = ippl::Index((2*size[0] - intersection[0].first()), - (2*size[0]- intersection[0].last()), -1); - auto ydom = ippl::Index((2*size[1] - intersection[1].first()), - (2*size[1]- intersection[1].last()), -1); - - ippl::NDIndex domain4; - domain4[0] = xdom; - domain4[1] = ydom; - domain4[2] = intersection[2]; - - if (ldom_g.touches(domain4)) { - intersection = ldom_g.intersect(domain4); - - requests.resize(requests.size() + 1); - - Communicate::size_type nsends; - pack(intersection, view_g, fd_m, nghost_g, ldom_g, nsends); - - buffer_type buf = Ippl::Comm->getBuffer(IPPL_VICO_SEND+4*8+i, nsends); - - int tag = VICO_SOLVER_TAG + 4; - - Ippl::Comm->isend(i, tag, fd_m, *buf, requests.back(), nsends); - buf->resetWritePos(); - } - } - - if (domain2.touches(yz)) { - auto intersection = domain2.intersect(yz); - auto ydom = ippl::Index((2*size[1] - intersection[1].first()), - (2*size[1]- intersection[1].last()), -1); - auto zdom = ippl::Index((2*size[2] - intersection[2].first()), - (2*size[2]- intersection[2].last()), -1); - - ippl::NDIndex domain4; - domain4[0] = intersection[0]; - domain4[1] = ydom; - domain4[2] = zdom; - - if (ldom_g.touches(domain4)) { - intersection = ldom_g.intersect(domain4); - - requests.resize(requests.size() + 1); - - Communicate::size_type nsends; - pack(intersection, view_g, fd_m, nghost_g, ldom_g, nsends); - - buffer_type buf = Ippl::Comm->getBuffer(IPPL_VICO_SEND+5*8+i, nsends); - - int tag = VICO_SOLVER_TAG + 5; - - Ippl::Comm->isend(i, tag, fd_m, *buf, requests.back(), nsends); - buf->resetWritePos(); - } + Communicate::size_type nsends; + pack(intersection, view_g, fd_m, nghost_g, ldom_g, nsends); + + buffer_type buf = + Ippl::Comm->getBuffer(IPPL_VICO_SEND + 4 * 8 + i, nsends); + + int tag = VICO_SOLVER_TAG + 4; + + Ippl::Comm->isend(i, tag, fd_m, *buf, requests.back(), nsends); + buf->resetWritePos(); } - - if (domain2.touches(xz)) { - auto intersection = domain2.intersect(xz); - auto xdom = ippl::Index((2*size[0] - intersection[0].first()), - (2*size[0]- intersection[0].last()), -1); - auto zdom = ippl::Index((2*size[2] - intersection[2].first()), - (2*size[2]- intersection[2].last()), -1); - - ippl::NDIndex domain4; - domain4[0] = xdom; - domain4[1] = intersection[1]; - domain4[2] = zdom; - - if (ldom_g.touches(domain4)) { - intersection = ldom_g.intersect(domain4); - - requests.resize(requests.size() + 1); - - Communicate::size_type nsends; - pack(intersection, view_g, fd_m, nghost_g, ldom_g, nsends); - - buffer_type buf = Ippl::Comm->getBuffer(IPPL_VICO_SEND+6*8+i, nsends); - - int tag = VICO_SOLVER_TAG + 6; - - Ippl::Comm->isend(i, tag, fd_m, *buf, requests.back(), nsends); - buf->resetWritePos(); - } + } + + if (domain2.touches(yz)) { + auto intersection = domain2.intersect(yz); + auto ydom = ippl::Index((2 * size[1] - intersection[1].first()), + (2 * size[1] - intersection[1].last()), -1); + auto zdom = ippl::Index((2 * size[2] - intersection[2].first()), + (2 * size[2] - intersection[2].last()), -1); + + ippl::NDIndex domain4; + domain4[0] = intersection[0]; + domain4[1] = ydom; + domain4[2] = zdom; + + if (ldom_g.touches(domain4)) { + intersection = ldom_g.intersect(domain4); + + requests.resize(requests.size() + 1); + + Communicate::size_type nsends; + pack(intersection, view_g, fd_m, nghost_g, ldom_g, nsends); + + buffer_type buf = + Ippl::Comm->getBuffer(IPPL_VICO_SEND + 5 * 8 + i, nsends); + + int tag = VICO_SOLVER_TAG + 5; + + Ippl::Comm->isend(i, tag, fd_m, *buf, requests.back(), nsends); + buf->resetWritePos(); } - - if (domain2.touches(xyz)) { - auto intersection = domain2.intersect(xyz); - auto xdom = ippl::Index((2*size[0] - intersection[0].first()), - (2*size[0]- intersection[0].last()), -1); - auto ydom = ippl::Index((2*size[1] - intersection[1].first()), - (2*size[1]- intersection[1].last()), -1); - auto zdom = ippl::Index((2*size[2] - intersection[2].first()), - (2*size[2]- intersection[2].last()), -1); - - ippl::NDIndex domain4; - domain4[0] = xdom; - domain4[1] = ydom; - domain4[2] = zdom; - - if (ldom_g.touches(domain4)) { - intersection = ldom_g.intersect(domain4); - - requests.resize(requests.size() + 1); - - Communicate::size_type nsends; - pack(intersection, view_g, fd_m, nghost_g, ldom_g, nsends); - - buffer_type buf = Ippl::Comm->getBuffer(IPPL_VICO_SEND+7*8+i, nsends); - - int tag = VICO_SOLVER_TAG + 7; - - Ippl::Comm->isend(i, tag, fd_m, *buf, requests.back(), nsends); - buf->resetWritePos(); - } + } + + if (domain2.touches(xz)) { + auto intersection = domain2.intersect(xz); + auto xdom = ippl::Index((2 * size[0] - intersection[0].first()), + (2 * size[0] - intersection[0].last()), -1); + auto zdom = ippl::Index((2 * size[2] - intersection[2].first()), + (2 * size[2] - intersection[2].last()), -1); + + ippl::NDIndex domain4; + domain4[0] = xdom; + domain4[1] = intersection[1]; + domain4[2] = zdom; + + if (ldom_g.touches(domain4)) { + intersection = ldom_g.intersect(domain4); + + requests.resize(requests.size() + 1); + + Communicate::size_type nsends; + pack(intersection, view_g, fd_m, nghost_g, ldom_g, nsends); + + buffer_type buf = + Ippl::Comm->getBuffer(IPPL_VICO_SEND + 6 * 8 + i, nsends); + + int tag = VICO_SOLVER_TAG + 6; + + Ippl::Comm->isend(i, tag, fd_m, *buf, requests.back(), nsends); + buf->resetWritePos(); } } - - // 3rd step: receive - for (int i = 0; i < ranks; ++i) { - - if (ldom.touches(none)) { - auto intersection = ldom.intersect(none); - - if (lDomains4[i].touches(intersection)) { - intersection = intersection.intersect(lDomains4[i]); - - Communicate::size_type nrecvs; - nrecvs = intersection.size(); - - buffer_type buf = Ippl::Comm->getBuffer(IPPL_VICO_RECV+myRank, nrecvs); - - int tag = VICO_SOLVER_TAG; - - Ippl::Comm->recv(i, tag, fd_m, *buf, nrecvs * sizeof(double), nrecvs); - buf->resetReadPos(); - - unpack(intersection, view, fd_m, nghost, ldom); - } + + if (domain2.touches(xyz)) { + auto intersection = domain2.intersect(xyz); + auto xdom = ippl::Index((2 * size[0] - intersection[0].first()), + (2 * size[0] - intersection[0].last()), -1); + auto ydom = ippl::Index((2 * size[1] - intersection[1].first()), + (2 * size[1] - intersection[1].last()), -1); + auto zdom = ippl::Index((2 * size[2] - intersection[2].first()), + (2 * size[2] - intersection[2].last()), -1); + + ippl::NDIndex domain4; + domain4[0] = xdom; + domain4[1] = ydom; + domain4[2] = zdom; + + if (ldom_g.touches(domain4)) { + intersection = ldom_g.intersect(domain4); + + requests.resize(requests.size() + 1); + + Communicate::size_type nsends; + pack(intersection, view_g, fd_m, nghost_g, ldom_g, nsends); + + buffer_type buf = + Ippl::Comm->getBuffer(IPPL_VICO_SEND + 7 * 8 + i, nsends); + + int tag = VICO_SOLVER_TAG + 7; + + Ippl::Comm->isend(i, tag, fd_m, *buf, requests.back(), nsends); + buf->resetWritePos(); } - - if (ldom.touches(x)) { - auto intersection = ldom.intersect(x); - - auto xdom = ippl::Index((2*size[0] - intersection[0].first()), - (2*size[0]- intersection[0].last()), -1); - - ippl::NDIndex domain4; - domain4[0] = xdom; - domain4[1] = intersection[1]; - domain4[2] = intersection[2]; - - if (lDomains4[i].touches(domain4)) { - domain4 = lDomains4[i].intersect(domain4); - domain4[0] = ippl::Index(2*size[0]-domain4[0].first(), - 2*size[0]-domain4[0].last(), -1); - - intersection = intersection.intersect(domain4); - - Communicate::size_type nrecvs; - nrecvs = intersection.size(); - - buffer_type buf = Ippl::Comm->getBuffer(IPPL_VICO_RECV+8+myRank, nrecvs); - - int tag = VICO_SOLVER_TAG + 1; - - Ippl::Comm->recv(i, tag, fd_m, *buf, nrecvs * sizeof(double), nrecvs); - buf->resetReadPos(); - - unpack(intersection, view, fd_m, nghost, ldom, true, false, false); - } + } + } + + // 3rd step: receive + for (int i = 0; i < ranks; ++i) { + if (ldom.touches(none)) { + auto intersection = ldom.intersect(none); + + if (lDomains4[i].touches(intersection)) { + intersection = intersection.intersect(lDomains4[i]); + + Communicate::size_type nrecvs; + nrecvs = intersection.size(); + + buffer_type buf = + Ippl::Comm->getBuffer(IPPL_VICO_RECV + myRank, nrecvs); + + int tag = VICO_SOLVER_TAG; + + Ippl::Comm->recv(i, tag, fd_m, *buf, nrecvs * sizeof(double), nrecvs); + buf->resetReadPos(); + + unpack(intersection, view, fd_m, nghost, ldom); } - - if (ldom.touches(y)) { - auto intersection = ldom.intersect(y); - - auto ydom = ippl::Index((2*size[1] - intersection[1].first()), - (2*size[1]- intersection[1].last()), -1); - - ippl::NDIndex domain4; - domain4[0] = intersection[0]; - domain4[1] = ydom; - domain4[2] = intersection[2]; - - if (lDomains4[i].touches(domain4)) { - domain4 = lDomains4[i].intersect(domain4); - domain4[1] = ippl::Index(2*size[1]-domain4[1].first(), - 2*size[1]-domain4[1].last(), -1); - - intersection = intersection.intersect(domain4); - - Communicate::size_type nrecvs; - nrecvs = intersection.size(); - - buffer_type buf = Ippl::Comm->getBuffer(IPPL_VICO_RECV+8*2+myRank, nrecvs); - - int tag = VICO_SOLVER_TAG + 2; - - Ippl::Comm->recv(i, tag, fd_m, *buf, nrecvs * sizeof(double), nrecvs); - buf->resetReadPos(); - - unpack(intersection, view, fd_m, nghost, ldom, false, true, false); - } + } + + if (ldom.touches(x)) { + auto intersection = ldom.intersect(x); + + auto xdom = ippl::Index((2 * size[0] - intersection[0].first()), + (2 * size[0] - intersection[0].last()), -1); + + ippl::NDIndex domain4; + domain4[0] = xdom; + domain4[1] = intersection[1]; + domain4[2] = intersection[2]; + + if (lDomains4[i].touches(domain4)) { + domain4 = lDomains4[i].intersect(domain4); + domain4[0] = ippl::Index(2 * size[0] - domain4[0].first(), + 2 * size[0] - domain4[0].last(), -1); + + intersection = intersection.intersect(domain4); + + Communicate::size_type nrecvs; + nrecvs = intersection.size(); + + buffer_type buf = + Ippl::Comm->getBuffer(IPPL_VICO_RECV + 8 + myRank, nrecvs); + + int tag = VICO_SOLVER_TAG + 1; + + Ippl::Comm->recv(i, tag, fd_m, *buf, nrecvs * sizeof(double), nrecvs); + buf->resetReadPos(); + + unpack(intersection, view, fd_m, nghost, ldom, true, false, false); } - - if (ldom.touches(z)) { - auto intersection = ldom.intersect(z); - - auto zdom = ippl::Index((2*size[2] - intersection[2].first()), - (2*size[2]- intersection[2].last()), -1); - - ippl::NDIndex domain4; - domain4[0] = intersection[0]; - domain4[1] = intersection[1]; - domain4[2] = zdom; - - if (lDomains4[i].touches(domain4)) { - domain4 = lDomains4[i].intersect(domain4); - domain4[2] = ippl::Index(2*size[2]-domain4[2].first(), - 2*size[2]-domain4[2].last(), -1); - - intersection = intersection.intersect(domain4); - - Communicate::size_type nrecvs; - nrecvs = intersection.size(); - - buffer_type buf = Ippl::Comm->getBuffer(IPPL_VICO_RECV+8*3+myRank, nrecvs); - - int tag = VICO_SOLVER_TAG + 3; - - Ippl::Comm->recv(i, tag, fd_m, *buf, nrecvs * sizeof(double), nrecvs); - buf->resetReadPos(); - - unpack(intersection, view, fd_m, nghost, ldom, false, false, true); - } + } + + if (ldom.touches(y)) { + auto intersection = ldom.intersect(y); + + auto ydom = ippl::Index((2 * size[1] - intersection[1].first()), + (2 * size[1] - intersection[1].last()), -1); + + ippl::NDIndex domain4; + domain4[0] = intersection[0]; + domain4[1] = ydom; + domain4[2] = intersection[2]; + + if (lDomains4[i].touches(domain4)) { + domain4 = lDomains4[i].intersect(domain4); + domain4[1] = ippl::Index(2 * size[1] - domain4[1].first(), + 2 * size[1] - domain4[1].last(), -1); + + intersection = intersection.intersect(domain4); + + Communicate::size_type nrecvs; + nrecvs = intersection.size(); + + buffer_type buf = + Ippl::Comm->getBuffer(IPPL_VICO_RECV + 8 * 2 + myRank, nrecvs); + + int tag = VICO_SOLVER_TAG + 2; + + Ippl::Comm->recv(i, tag, fd_m, *buf, nrecvs * sizeof(double), nrecvs); + buf->resetReadPos(); + + unpack(intersection, view, fd_m, nghost, ldom, false, true, false); } - - if (ldom.touches(xy)) { - auto intersection = ldom.intersect(xy); - - auto xdom = ippl::Index((2*size[0] - intersection[0].first()), - (2*size[0]- intersection[0].last()), -1); - auto ydom = ippl::Index((2*size[1] - intersection[1].first()), - (2*size[1]- intersection[1].last()), -1); - - ippl::NDIndex domain4; - domain4[0] = xdom; - domain4[1] = ydom; - domain4[2] = intersection[2]; - - if (lDomains4[i].touches(domain4)) { - domain4 = lDomains4[i].intersect(domain4); - domain4[0] = ippl::Index(2*size[0]-domain4[0].first(), - 2*size[0]-domain4[0].last(), -1); - domain4[1] = ippl::Index(2*size[1]-domain4[1].first(), - 2*size[1]-domain4[1].last(), -1); - - intersection = intersection.intersect(domain4); - - Communicate::size_type nrecvs; - nrecvs = intersection.size(); - - buffer_type buf = Ippl::Comm->getBuffer(IPPL_VICO_RECV+8*4+myRank, nrecvs); - - int tag = VICO_SOLVER_TAG + 4; - - Ippl::Comm->recv(i, tag, fd_m, *buf, nrecvs * sizeof(double), nrecvs); - buf->resetReadPos(); - - unpack(intersection, view, fd_m, nghost, ldom, true, true, false); - } + } + + if (ldom.touches(z)) { + auto intersection = ldom.intersect(z); + + auto zdom = ippl::Index((2 * size[2] - intersection[2].first()), + (2 * size[2] - intersection[2].last()), -1); + + ippl::NDIndex domain4; + domain4[0] = intersection[0]; + domain4[1] = intersection[1]; + domain4[2] = zdom; + + if (lDomains4[i].touches(domain4)) { + domain4 = lDomains4[i].intersect(domain4); + domain4[2] = ippl::Index(2 * size[2] - domain4[2].first(), + 2 * size[2] - domain4[2].last(), -1); + + intersection = intersection.intersect(domain4); + + Communicate::size_type nrecvs; + nrecvs = intersection.size(); + + buffer_type buf = + Ippl::Comm->getBuffer(IPPL_VICO_RECV + 8 * 3 + myRank, nrecvs); + + int tag = VICO_SOLVER_TAG + 3; + + Ippl::Comm->recv(i, tag, fd_m, *buf, nrecvs * sizeof(double), nrecvs); + buf->resetReadPos(); + + unpack(intersection, view, fd_m, nghost, ldom, false, false, true); } - - if (ldom.touches(yz)) { - auto intersection = ldom.intersect(yz); - - auto ydom = ippl::Index((2*size[1] - intersection[1].first()), - (2*size[1]- intersection[1].last()), -1); - auto zdom = ippl::Index((2*size[2] - intersection[2].first()), - (2*size[2]- intersection[2].last()), -1); - - ippl::NDIndex domain4; - domain4[0] = intersection[0]; - domain4[1] = ydom; - domain4[2] = zdom; - - if (lDomains4[i].touches(domain4)) { - domain4 = lDomains4[i].intersect(domain4); - domain4[1] = ippl::Index(2*size[1]-domain4[1].first(), - 2*size[1]-domain4[1].last(), -1); - domain4[2] = ippl::Index(2*size[2]-domain4[2].first(), - 2*size[2]-domain4[2].last(), -1); - - intersection = intersection.intersect(domain4); - - Communicate::size_type nrecvs; - nrecvs = intersection.size(); - - buffer_type buf = Ippl::Comm->getBuffer(IPPL_VICO_RECV+8*5+myRank, nrecvs); - - int tag = VICO_SOLVER_TAG + 5; - - Ippl::Comm->recv(i, tag, fd_m, *buf, nrecvs * sizeof(double), nrecvs); - buf->resetReadPos(); - - unpack(intersection, view, fd_m, nghost, ldom, false, true, true); - } + } + + if (ldom.touches(xy)) { + auto intersection = ldom.intersect(xy); + + auto xdom = ippl::Index((2 * size[0] - intersection[0].first()), + (2 * size[0] - intersection[0].last()), -1); + auto ydom = ippl::Index((2 * size[1] - intersection[1].first()), + (2 * size[1] - intersection[1].last()), -1); + + ippl::NDIndex domain4; + domain4[0] = xdom; + domain4[1] = ydom; + domain4[2] = intersection[2]; + + if (lDomains4[i].touches(domain4)) { + domain4 = lDomains4[i].intersect(domain4); + domain4[0] = ippl::Index(2 * size[0] - domain4[0].first(), + 2 * size[0] - domain4[0].last(), -1); + domain4[1] = ippl::Index(2 * size[1] - domain4[1].first(), + 2 * size[1] - domain4[1].last(), -1); + + intersection = intersection.intersect(domain4); + + Communicate::size_type nrecvs; + nrecvs = intersection.size(); + + buffer_type buf = + Ippl::Comm->getBuffer(IPPL_VICO_RECV + 8 * 4 + myRank, nrecvs); + + int tag = VICO_SOLVER_TAG + 4; + + Ippl::Comm->recv(i, tag, fd_m, *buf, nrecvs * sizeof(double), nrecvs); + buf->resetReadPos(); + + unpack(intersection, view, fd_m, nghost, ldom, true, true, false); } - - if (ldom.touches(xz)) { - auto intersection = ldom.intersect(xz); - - auto xdom = ippl::Index((2*size[0] - intersection[0].first()), - (2*size[0]- intersection[0].last()), -1); - auto zdom = ippl::Index((2*size[2] - intersection[2].first()), - (2*size[2]- intersection[2].last()), -1); - - ippl::NDIndex domain4; - domain4[0] = xdom; - domain4[1] = intersection[1]; - domain4[2] = zdom; - - if (lDomains4[i].touches(domain4)) { - domain4 = lDomains4[i].intersect(domain4); - domain4[0] = ippl::Index(2*size[0]-domain4[0].first(), - 2*size[0]-domain4[0].last(), -1); - domain4[2] = ippl::Index(2*size[2]-domain4[2].first(), - 2*size[2]-domain4[2].last(), -1); - - intersection = intersection.intersect(domain4); - - Communicate::size_type nrecvs; - nrecvs = intersection.size(); - - buffer_type buf = Ippl::Comm->getBuffer(IPPL_VICO_RECV+8*6+myRank, nrecvs); - - int tag = VICO_SOLVER_TAG + 6; - - Ippl::Comm->recv(i, tag, fd_m, *buf, nrecvs * sizeof(double), nrecvs); - buf->resetReadPos(); - - unpack(intersection, view, fd_m, nghost, ldom, true, false, true); - } + } + + if (ldom.touches(yz)) { + auto intersection = ldom.intersect(yz); + + auto ydom = ippl::Index((2 * size[1] - intersection[1].first()), + (2 * size[1] - intersection[1].last()), -1); + auto zdom = ippl::Index((2 * size[2] - intersection[2].first()), + (2 * size[2] - intersection[2].last()), -1); + + ippl::NDIndex domain4; + domain4[0] = intersection[0]; + domain4[1] = ydom; + domain4[2] = zdom; + + if (lDomains4[i].touches(domain4)) { + domain4 = lDomains4[i].intersect(domain4); + domain4[1] = ippl::Index(2 * size[1] - domain4[1].first(), + 2 * size[1] - domain4[1].last(), -1); + domain4[2] = ippl::Index(2 * size[2] - domain4[2].first(), + 2 * size[2] - domain4[2].last(), -1); + + intersection = intersection.intersect(domain4); + + Communicate::size_type nrecvs; + nrecvs = intersection.size(); + + buffer_type buf = + Ippl::Comm->getBuffer(IPPL_VICO_RECV + 8 * 5 + myRank, nrecvs); + + int tag = VICO_SOLVER_TAG + 5; + + Ippl::Comm->recv(i, tag, fd_m, *buf, nrecvs * sizeof(double), nrecvs); + buf->resetReadPos(); + + unpack(intersection, view, fd_m, nghost, ldom, false, true, true); } - - if (ldom.touches(xyz)) { - auto intersection = ldom.intersect(xyz); - - auto xdom = ippl::Index((2*size[0] - intersection[0].first()), - (2*size[0]- intersection[0].last()), -1); - auto ydom = ippl::Index((2*size[1] - intersection[1].first()), - (2*size[1]- intersection[1].last()), -1); - auto zdom = ippl::Index((2*size[2] - intersection[2].first()), - (2*size[2]- intersection[2].last()), -1); - - ippl::NDIndex domain4; - domain4[0] = xdom; - domain4[1] = ydom; - domain4[2] = zdom; - - if (lDomains4[i].touches(domain4)) { - domain4 = lDomains4[i].intersect(domain4); - domain4[0] = ippl::Index(2*size[0]-domain4[0].first(), - 2*size[0]-domain4[0].last(), -1); - domain4[1] = ippl::Index(2*size[1]-domain4[1].first(), - 2*size[1]-domain4[1].last(), -1); - domain4[2] = ippl::Index(2*size[2]-domain4[2].first(), - 2*size[2]-domain4[2].last(), -1); - - intersection = intersection.intersect(domain4); - - Communicate::size_type nrecvs; - nrecvs = intersection.size(); - - buffer_type buf = Ippl::Comm->getBuffer(IPPL_VICO_RECV+8*7+myRank, nrecvs); - - int tag = VICO_SOLVER_TAG + 7; - - Ippl::Comm->recv(i, tag, fd_m, *buf, nrecvs * sizeof(double), nrecvs); - buf->resetReadPos(); - - unpack(intersection, view, fd_m, nghost, ldom, true, true, true); - } + } + + if (ldom.touches(xz)) { + auto intersection = ldom.intersect(xz); + + auto xdom = ippl::Index((2 * size[0] - intersection[0].first()), + (2 * size[0] - intersection[0].last()), -1); + auto zdom = ippl::Index((2 * size[2] - intersection[2].first()), + (2 * size[2] - intersection[2].last()), -1); + + ippl::NDIndex domain4; + domain4[0] = xdom; + domain4[1] = intersection[1]; + domain4[2] = zdom; + + if (lDomains4[i].touches(domain4)) { + domain4 = lDomains4[i].intersect(domain4); + domain4[0] = ippl::Index(2 * size[0] - domain4[0].first(), + 2 * size[0] - domain4[0].last(), -1); + domain4[2] = ippl::Index(2 * size[2] - domain4[2].first(), + 2 * size[2] - domain4[2].last(), -1); + + intersection = intersection.intersect(domain4); + + Communicate::size_type nrecvs; + nrecvs = intersection.size(); + + buffer_type buf = + Ippl::Comm->getBuffer(IPPL_VICO_RECV + 8 * 6 + myRank, nrecvs); + + int tag = VICO_SOLVER_TAG + 6; + + Ippl::Comm->recv(i, tag, fd_m, *buf, nrecvs * sizeof(double), nrecvs); + buf->resetReadPos(); + + unpack(intersection, view, fd_m, nghost, ldom, true, false, true); } } - - // Wait for all messages to be received - if (requests.size() > 0) { - MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE); + + if (ldom.touches(xyz)) { + auto intersection = ldom.intersect(xyz); + + auto xdom = ippl::Index((2 * size[0] - intersection[0].first()), + (2 * size[0] - intersection[0].last()), -1); + auto ydom = ippl::Index((2 * size[1] - intersection[1].first()), + (2 * size[1] - intersection[1].last()), -1); + auto zdom = ippl::Index((2 * size[2] - intersection[2].first()), + (2 * size[2] - intersection[2].last()), -1); + + ippl::NDIndex domain4; + domain4[0] = xdom; + domain4[1] = ydom; + domain4[2] = zdom; + + if (lDomains4[i].touches(domain4)) { + domain4 = lDomains4[i].intersect(domain4); + domain4[0] = ippl::Index(2 * size[0] - domain4[0].first(), + 2 * size[0] - domain4[0].last(), -1); + domain4[1] = ippl::Index(2 * size[1] - domain4[1].first(), + 2 * size[1] - domain4[1].last(), -1); + domain4[2] = ippl::Index(2 * size[2] - domain4[2].first(), + 2 * size[2] - domain4[2].last(), -1); + + intersection = intersection.intersect(domain4); + + Communicate::size_type nrecvs; + nrecvs = intersection.size(); + + buffer_type buf = + Ippl::Comm->getBuffer(IPPL_VICO_RECV + 8 * 7 + myRank, nrecvs); + + int tag = VICO_SOLVER_TAG + 7; + + Ippl::Comm->recv(i, tag, fd_m, *buf, nrecvs * sizeof(double), nrecvs); + buf->resetReadPos(); + + unpack(intersection, view, fd_m, nghost, ldom, true, true, true); + } } - Ippl::Comm->barrier(); - }; -} + } + + // Wait for all messages to be received + if (requests.size() > 0) { + MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE); + } + Ippl::Comm->barrier(); + }; +} // namespace ippl diff --git a/src/Solver/PCG.h b/src/Solver/PCG.h index fc62ac824..e3ec6d2be 100644 --- a/src/Solver/PCG.h +++ b/src/Solver/PCG.h @@ -24,10 +24,8 @@ namespace ippl { template , - class C=typename M::DefaultCentering> - class PCG : public SolverAlgorithm - { + class M = UniformCartesian, class C = typename M::DefaultCentering> + class PCG : public SolverAlgorithm { public: using Base = SolverAlgorithm; using typename Base::lhs_type; @@ -38,33 +36,29 @@ namespace ippl { * Sets the differential operator for the conjugate gradient algorithm * @param op A function that returns OpRet and takes a field of the LHS type */ - void setOperator(operator_type op) { - op_m = std::move(op); - } + void setOperator(operator_type op) { op_m = std::move(op); } /*! * Query how many iterations were required to obtain the solution * the last time this solver was used * @return Iteration count of last solve */ - int getIterationCount() { - return iterations_m; - } + int getIterationCount() { return iterations_m; } void operator()(lhs_type& lhs, rhs_type& rhs, const ParameterList& params) override { typedef typename lhs_type::type T; - typename lhs_type::Mesh_t mesh = lhs.get_mesh(); + typename lhs_type::Mesh_t mesh = lhs.get_mesh(); typename lhs_type::Layout_t layout = lhs.getLayout(); - iterations_m = 0; + iterations_m = 0; const int maxIterations = params.get("max_iterations"); // Variable names mostly based on description in // https://www.cs.cmu.edu/~quake-papers/painless-conjugate-gradient.pdf lhs_type r(mesh, layout), d(mesh, layout); - using bc_type = BConds; + using bc_type = BConds; bc_type lhsBCs = lhs.getFieldBC(); bc_type bc; @@ -77,11 +71,11 @@ namespace ippl { } else if (bcType & CONSTANT_FACE) { // If the LHS has constant BCs, the residue is zero on the BCs // Bitwise AND with CONSTANT_FACE will succeed for ZeroFace or ConstantFace - bc[i] = std::make_shared>(i); + bc[i] = std::make_shared>(i); allFacesPeriodic = false; } else { throw IpplException("PCG::operator()", - "Only periodic or constant BCs for LHS supported."); + "Only periodic or constant BCs for LHS supported."); return; } } @@ -94,16 +88,16 @@ namespace ippl { // https://gitlab.psi.ch/OPAL/Libraries/ippl/-/issues/80 d = r * 1; - T delta1 = innerProduct(r, r); - T rNorm = std::sqrt(delta1); + T delta1 = innerProduct(r, r); + T rNorm = std::sqrt(delta1); const T tolerance = params.get("tolerance") * norm(rhs); lhs_type q(mesh, layout); while (iterations_m < maxIterations && rNorm > tolerance) { - q = op_m(d); + q = op_m(d); T alpha = delta1 / innerProduct(d, q); - lhs = lhs + alpha * d; + lhs = lhs + alpha * d; // The exact residue is given by // r = rhs - op_m(lhs); @@ -115,8 +109,8 @@ namespace ippl { r = r - alpha * q; T delta0 = delta1; - delta1 = innerProduct(r, r); - T beta = delta1 / delta0; + delta1 = innerProduct(r, r); + T beta = delta1 / delta0; rNorm = std::sqrt(delta1); @@ -127,7 +121,7 @@ namespace ippl { if (allFacesPeriodic) { T avg = lhs.getVolumeAverage(); - lhs = lhs - avg; + lhs = lhs - avg; } } @@ -136,6 +130,6 @@ namespace ippl { int iterations_m = 0; }; -} +} // namespace ippl #endif diff --git a/src/Solver/Solver.h b/src/Solver/Solver.h index 64cb00c00..b3218ee0d 100644 --- a/src/Solver/Solver.h +++ b/src/Solver/Solver.h @@ -19,16 +19,14 @@ #ifndef IPPL_SOLVER_H #define IPPL_SOLVER_H -#include "Utility/ParameterList.h" #include "Field/Field.h" +#include "Utility/ParameterList.h" namespace ippl { - template , - class C=typename M::DefaultCentering> - class Solver - { + template , + class C = typename M::DefaultCentering> + class Solver { public: using lhs_type = Field; using rhs_type = Field; @@ -36,7 +34,7 @@ namespace ippl { /*! * Default constructor */ - Solver() { } + Solver() {} /*! * Convenience constructor with LHS and RHS parameters @@ -62,20 +60,17 @@ namespace ippl { /*! * Updates all solver parameters based on values in another parameter set * @param params Parameter list with updated values - * @throw IpplException Fails if the provided parameter list includes keys not already present + * @throw IpplException Fails if the provided parameter list includes keys not already + * present */ - void updateParameters(const ParameterList& params) { - params_m.update(params); - } + void updateParameters(const ParameterList& params) { params_m.update(params); } /*! * Merges another parameter set into the solver's parameters, overwriting * existing parameters in case of conflict * @param params Parameter list with desired values */ - void mergeParameters(const ParameterList& params) { - params_m.merge(params); - } + void mergeParameters(const ParameterList& params) { params_m.merge(params); } /*! * Set the problem LHS @@ -101,6 +96,6 @@ namespace ippl { */ virtual void setDefaultParameters() {} }; -} +} // namespace ippl #endif diff --git a/src/Solver/SolverAlgorithm.h b/src/Solver/SolverAlgorithm.h index 72d25f98a..36d6e11b0 100644 --- a/src/Solver/SolverAlgorithm.h +++ b/src/Solver/SolverAlgorithm.h @@ -19,16 +19,14 @@ #ifndef IPPL_SOLVER_ALGORITHM_H #define IPPL_SOLVER_ALGORITHM_H -#include "Utility/ParameterList.h" #include +#include "Utility/ParameterList.h" namespace ippl { - template , - class C=typename M::DefaultCentering> - class SolverAlgorithm - { + template , + class C = typename M::DefaultCentering> + class SolverAlgorithm { public: using lhs_type = Field; using rhs_type = Field; @@ -43,6 +41,6 @@ namespace ippl { virtual void operator()(lhs_type& lhs, rhs_type& rhs, const ParameterList& params) = 0; }; -} +} // namespace ippl #endif diff --git a/src/Solver/test/Budiardja_plot.cpp b/src/Solver/test/Budiardja_plot.cpp index dd63fa94d..89e6756ab 100644 --- a/src/Solver/test/Budiardja_plot.cpp +++ b/src/Solver/test/Budiardja_plot.cpp @@ -1,37 +1,34 @@ // This program recreates the convergence test plot from the Budiardja et al. (2010) paper. // Gravitational potential of a sphere. -#include "Ippl.h" #include "FFTPoissonSolver.h" +#include "Ippl.h" -KOKKOS_INLINE_FUNCTION -double source(double x, double y, double z, double density = 1.0, double R = 1.0, double mu = 1.2) { - +KOKKOS_INLINE_FUNCTION double source(double x, double y, double z, double density = 1.0, + double R = 1.0, double mu = 1.2) { double pi = std::acos(-1.0); - double G = 6.674e-11; + double G = 6.674e-11; - double r = std::sqrt((x-mu)*(x-mu) + (y-mu)*(y-mu) + (z-mu)*(z-mu)); + double r = std::sqrt((x - mu) * (x - mu) + (y - mu) * (y - mu) + (z - mu) * (z - mu)); bool checkInside = (r <= R); return double(checkInside) * 4.0 * pi * G * density; } -KOKKOS_INLINE_FUNCTION -double exact_fct(double x, double y, double z, double density = 1.0, double R = 1.0, double mu = 1.2) { - +KOKKOS_INLINE_FUNCTION double exact_fct(double x, double y, double z, double density = 1.0, + double R = 1.0, double mu = 1.2) { double pi = std::acos(-1.0); - double G = 6.674e-11; - - double r = std::sqrt((x-mu)*(x-mu) + (y-mu)*(y-mu) + (z-mu)*(z-mu)); + double G = 6.674e-11; + + double r = std::sqrt((x - mu) * (x - mu) + (y - mu) * (y - mu) + (z - mu) * (z - mu)); bool checkInside = (r <= R); - return -(double(checkInside) * (2.0/3.0) * pi * G * density * (3*R*R - r*r)) - -((1.0 - double(checkInside)) * (4.0/3.0) * pi * G * density * R * R * R / r); + return -(double(checkInside) * (2.0 / 3.0) * pi * G * density * (3 * R * R - r * r)) + - ((1.0 - double(checkInside)) * (4.0 / 3.0) * pi * G * density * R * R * R / r); } -int main(int argc, char *argv[]) { - - Ippl ippl(argc,argv); +int main(int argc, char* argv[]) { + Ippl ippl(argc, argv); // number of interations const int n = 5; @@ -42,97 +39,97 @@ int main(int argc, char *argv[]) { std::cout << "Spacing Error" << std::endl; for (int p = 0; p < n; ++p) { - // domain - int pt = N[p]; - ippl::Index I(pt); - ippl::NDIndex<3> owned(I, I, I); - - // specifies decomposition; here all dimensions are parallel - ippl::e_dim_tag decomp[3]; - for (unsigned int d = 0; d < 3; d++) - decomp[d] = ippl::PARALLEL; - - // define computational box of side 2.4 - double dx = 2.4/pt; - ippl::Vector hx = {dx, dx, dx}; - ippl::Vector origin = {0.0, 0.0, 0.0}; - ippl::UniformCartesian mesh(owned, hx, origin); - - // all parallel layout, standard domain, normal axis order - ippl::FieldLayout<3> layout(owned, decomp); - - // define the L (phi) and R (rho) fields - typedef ippl::Field field; - field rho; - rho.initialize(mesh, layout); + int pt = N[p]; + ippl::Index I(pt); + ippl::NDIndex<3> owned(I, I, I); + + // specifies decomposition; here all dimensions are parallel + ippl::e_dim_tag decomp[3]; + for (unsigned int d = 0; d < 3; d++) + decomp[d] = ippl::PARALLEL; + + // define computational box of side 2.4 + double dx = 2.4 / pt; + ippl::Vector hx = {dx, dx, dx}; + ippl::Vector origin = {0.0, 0.0, 0.0}; + ippl::UniformCartesian mesh(owned, hx, origin); + + // all parallel layout, standard domain, normal axis order + ippl::FieldLayout<3> layout(owned, decomp); + + // define the L (phi) and R (rho) fields + typedef ippl::Field field; + field rho; + rho.initialize(mesh, layout); // define the exact solution field field exact; exact.initialize(mesh, layout); - - // assign the rho field with its value - typename field::view_type view_rho = rho.getView(); - const int nghost = rho.getNghost(); - const auto& ldom = layout.getLocalNDIndex(); - - Kokkos::parallel_for("Assign rho field", - Kokkos::MDRangePolicy>({nghost, nghost, nghost}, - {view_rho.extent(0) - nghost, - view_rho.extent(1) - nghost, - view_rho.extent(2) - nghost}), - KOKKOS_LAMBDA(const int i, const int j, const int k){ - // go from local to global indices - const int ig = i + ldom[0].first() - nghost; - const int jg = j + ldom[1].first() - nghost; - const int kg = k + ldom[2].first() - nghost; - - // define the physical points (cell-centered) - double x = (ig + 0.5) * hx[0] + origin[0]; - double y = (jg + 0.5) * hx[1] + origin[1]; - double z = (kg + 0.5) * hx[2] + origin[2]; - - view_rho(i, j, k) = source(x, y, z); - - }); + + // assign the rho field with its value + typename field::view_type view_rho = rho.getView(); + const int nghost = rho.getNghost(); + const auto& ldom = layout.getLocalNDIndex(); + + Kokkos::parallel_for( + "Assign rho field", + Kokkos::MDRangePolicy>( + {nghost, nghost, nghost}, {view_rho.extent(0) - nghost, view_rho.extent(1) - nghost, + view_rho.extent(2) - nghost}), + KOKKOS_LAMBDA(const int i, const int j, const int k) { + // go from local to global indices + const int ig = i + ldom[0].first() - nghost; + const int jg = j + ldom[1].first() - nghost; + const int kg = k + ldom[2].first() - nghost; + + // define the physical points (cell-centered) + double x = (ig + 0.5) * hx[0] + origin[0]; + double y = (jg + 0.5) * hx[1] + origin[1]; + double z = (kg + 0.5) * hx[2] + origin[2]; + + view_rho(i, j, k) = source(x, y, z); + }); // assign the exact field with its values typename field::view_type view_exact = exact.getView(); - - Kokkos::parallel_for("Assign exact field", - Kokkos::MDRangePolicy>({nghost, nghost, nghost}, - {view_exact.extent(0) - nghost, - view_exact.extent(1) - nghost, - view_exact.extent(2) - nghost}), - KOKKOS_LAMBDA(const int i, const int j, const int k){ - const int ig = i + ldom[0].first() - nghost; - const int jg = j + ldom[1].first() - nghost; - const int kg = k + ldom[2].first() - nghost; - - double x = (ig + 0.5) * hx[0] + origin[0]; - double y = (jg + 0.5) * hx[1] + origin[1]; - double z = (kg + 0.5) * hx[2] + origin[2]; - - view_exact(i, j, k) = exact_fct(x,y,z); - }); - - // set FFT parameters - ippl::ParameterList fftParams; - fftParams.add("use_heffte_defaults", false); - fftParams.add("use_pencils", true); - fftParams.add("use_gpu_aware", true); - fftParams.add("comm", ippl::a2av); - fftParams.add("r2c_direction", 0); - - // define an FFTPoissonSolver object - ippl::FFTPoissonSolver, double, 3> FFTsolver(rho, fftParams, "HOCKNEY"); - - // solve the Poisson equation -> rho contains the solution (phi) now - FFTsolver.solve(); - - // compute the L1 error - rho = (rho - exact); - double err = norm(rho, 1)/norm(exact, 1); + + Kokkos::parallel_for( + "Assign exact field", + Kokkos::MDRangePolicy>( + {nghost, nghost, nghost}, + {view_exact.extent(0) - nghost, view_exact.extent(1) - nghost, + view_exact.extent(2) - nghost}), + KOKKOS_LAMBDA(const int i, const int j, const int k) { + const int ig = i + ldom[0].first() - nghost; + const int jg = j + ldom[1].first() - nghost; + const int kg = k + ldom[2].first() - nghost; + + double x = (ig + 0.5) * hx[0] + origin[0]; + double y = (jg + 0.5) * hx[1] + origin[1]; + double z = (kg + 0.5) * hx[2] + origin[2]; + + view_exact(i, j, k) = exact_fct(x, y, z); + }); + + // set FFT parameters + ippl::ParameterList fftParams; + fftParams.add("use_heffte_defaults", false); + fftParams.add("use_pencils", true); + fftParams.add("use_gpu_aware", true); + fftParams.add("comm", ippl::a2av); + fftParams.add("r2c_direction", 0); + + // define an FFTPoissonSolver object + ippl::FFTPoissonSolver, double, 3> FFTsolver(rho, fftParams, + "HOCKNEY"); + + // solve the Poisson equation -> rho contains the solution (phi) now + FFTsolver.solve(); + + // compute the L1 error + rho = (rho - exact); + double err = norm(rho, 1) / norm(exact, 1); std::cout << dx << " " << err << std::endl; } diff --git a/src/Solver/test/TestCGSolver.cpp b/src/Solver/test/TestCGSolver.cpp index e54cb8abd..91f2b6980 100644 --- a/src/Solver/test/TestCGSolver.cpp +++ b/src/Solver/test/TestCGSolver.cpp @@ -4,16 +4,16 @@ // TestCGSolver [size [scaling_type]] #include "Ippl.h" +#include #include #include -#include +#include "ElectrostaticsCG.h" #include "Utility/Inform.h" #include "Utility/IpplTimings.h" -#include "ElectrostaticsCG.h" -int main(int argc, char *argv[]) { - Ippl ippl(argc,argv); +int main(int argc, char* argv[]) { + Ippl ippl(argc, argv); constexpr unsigned int dim = 3; @@ -32,7 +32,7 @@ int main(int argc, char *argv[]) { // along the Y axis such that each rank has the same workload // (simplest enlargement method) ptY = 1 << (5 + (int)N); - pt = 32; + pt = 32; info << "Performing weak scaling" << endl; isWeak = true; } @@ -42,26 +42,24 @@ int main(int argc, char *argv[]) { ippl::Index I(pt), Iy(ptY); ippl::NDIndex owned(I, Iy, I); - ippl::e_dim_tag allParallel[dim]; // Specifies SERIAL, PARALLEL dims - for (unsigned int d=0; d layout(owned,allParallel); + ippl::FieldLayout layout(owned, allParallel); - //Unit box - double dx = 2.0 / double(pt); - double dy = 2.0 / double(ptY); - ippl::Vector hx = {dx, dy, dx}; + // Unit box + double dx = 2.0 / double(pt); + double dy = 2.0 / double(ptY); + ippl::Vector hx = {dx, dy, dx}; ippl::Vector origin = {-1, -1, -1}; ippl::UniformCartesian mesh(owned, hx, origin); double pi = acos(-1.0); typedef ippl::Field field_type; - field_type rhs(mesh, layout), - lhs(mesh, layout), - solution(mesh, layout); + field_type rhs(mesh, layout), lhs(mesh, layout), solution(mesh, layout); typedef ippl::BConds bc_type; @@ -73,43 +71,46 @@ int main(int argc, char *argv[]) { lhs.setFieldBC(bcField); - typename field_type::view_type& viewRHS = rhs.getView(), - viewSol = solution.getView(); + typename field_type::view_type &viewRHS = rhs.getView(), viewSol = solution.getView(); const ippl::NDIndex& lDom = layout.getLocalNDIndex(); - int shift1 = solution.getNghost(); + int shift1 = solution.getNghost(); auto policySol = solution.getRangePolicy(); - Kokkos::parallel_for("Assign solution", policySol, - KOKKOS_LAMBDA(const int i, const int j, const int k){ - const size_t ig = i + lDom[0].first() - shift1; - const size_t jg = j + lDom[1].first() - shift1; - const size_t kg = k + lDom[2].first() - shift1; - double x = (ig + 0.5) * hx[0]; - double y = (jg + 0.5) * hx[1]; - double z = (kg + 0.5) * hx[2]; - - viewSol(i, j, k) = sin(sin(pi * x)) * sin(sin(pi * y)) * sin(sin(pi * z)); - }); - + Kokkos::parallel_for( + "Assign solution", policySol, KOKKOS_LAMBDA(const int i, const int j, const int k) { + const size_t ig = i + lDom[0].first() - shift1; + const size_t jg = j + lDom[1].first() - shift1; + const size_t kg = k + lDom[2].first() - shift1; + double x = (ig + 0.5) * hx[0]; + double y = (jg + 0.5) * hx[1]; + double z = (kg + 0.5) * hx[2]; + + viewSol(i, j, k) = sin(sin(pi * x)) * sin(sin(pi * y)) * sin(sin(pi * z)); + }); const int shift2 = rhs.getNghost(); - auto policyRHS = rhs.getRangePolicy(); - Kokkos::parallel_for("Assign rhs", policyRHS, - KOKKOS_LAMBDA(const int i, const int j, const int k){ - const size_t ig = i + lDom[0].first() - shift2; - const size_t jg = j + lDom[1].first() - shift2; - const size_t kg = k + lDom[2].first() - shift2; - double x = (ig + 0.5) * hx[0]; - double y = (jg + 0.5) * hx[1]; - double z = (kg + 0.5) * hx[2]; - - // https://gitlab.psi.ch/OPAL/Libraries/ippl-solvers/-/blob/5-fftperiodicpoissonsolver/test/TestFFTPeriodicPoissonSolver.cpp#L91 - viewRHS(i, j, k) = pow(pi, 2) * (cos(sin(pi * z)) * sin(pi * z) * sin(sin(pi * x)) * sin(sin(pi * y)) - + (cos(sin(pi * y)) * sin(pi * y) * sin(sin(pi * x)) + (cos(sin(pi * x)) * sin(pi * x) - + (pow(cos(pi * x), 2) + pow(cos(pi * y), 2) +pow(cos(pi * z), 2)) * sin(sin(pi * x))) - * sin(sin(pi * y))) * sin(sin(pi * z))); - }); + auto policyRHS = rhs.getRangePolicy(); + Kokkos::parallel_for( + "Assign rhs", policyRHS, KOKKOS_LAMBDA(const int i, const int j, const int k) { + const size_t ig = i + lDom[0].first() - shift2; + const size_t jg = j + lDom[1].first() - shift2; + const size_t kg = k + lDom[2].first() - shift2; + double x = (ig + 0.5) * hx[0]; + double y = (jg + 0.5) * hx[1]; + double z = (kg + 0.5) * hx[2]; + + // https://gitlab.psi.ch/OPAL/Libraries/ippl-solvers/-/blob/5-fftperiodicpoissonsolver/test/TestFFTPeriodicPoissonSolver.cpp#L91 + viewRHS(i, j, k) = + pow(pi, 2) + * (cos(sin(pi * z)) * sin(pi * z) * sin(sin(pi * x)) * sin(sin(pi * y)) + + (cos(sin(pi * y)) * sin(pi * y) * sin(sin(pi * x)) + + (cos(sin(pi * x)) * sin(pi * x) + + (pow(cos(pi * x), 2) + pow(cos(pi * y), 2) + pow(cos(pi * z), 2)) + * sin(sin(pi * x))) + * sin(sin(pi * y))) + * sin(sin(pi * z))); + }); ippl::ElectrostaticsCG lapsolver; @@ -128,16 +129,17 @@ int main(int argc, char *argv[]) { field_type error(mesh, layout); // Solver solution - analytical solution - error = lhs - solution; + error = lhs - solution; double relError = norm(error) / norm(solution); // Laplace(solver solution) - rhs - error = -laplace(lhs) - rhs; + error = -laplace(lhs) - rhs; double residue = norm(error) / norm(rhs); - int size = isWeak ? pt * pt * ptY : pt; + int size = isWeak ? pt * pt * ptY : pt; int itCount = lapsolver.getIterationCount(); - m << size << "," << std::setprecision(16) << relError << "," << residue << "," << itCount << endl; + m << size << "," << std::setprecision(16) << relError << "," << residue << "," << itCount + << endl; IpplTimings::print("timings" + std::to_string(pt) + ".dat"); diff --git a/src/Solver/test/TestFFTPeriodicPoissonSolver.cpp b/src/Solver/test/TestFFTPeriodicPoissonSolver.cpp index 32414cd3b..b197c809f 100644 --- a/src/Solver/test/TestFFTPeriodicPoissonSolver.cpp +++ b/src/Solver/test/TestFFTPeriodicPoissonSolver.cpp @@ -5,38 +5,35 @@ #include "FFTPeriodicPoissonSolver.h" -int main(int argc, char *argv[]) { - - Ippl ippl(argc,argv); +int main(int argc, char* argv[]) { + Ippl ippl(argc, argv); constexpr unsigned int dim = 3; - const int npts = 7; + const int npts = 7; std::array pts = {2, 4, 8, 16, 32, 64, 128}; - if(Ippl::Comm->size() > 4) { + if (Ippl::Comm->size() > 4) { if (Ippl::Comm->rank() == 0) { std::cerr << " Too many MPI ranks please use <= 4 ranks" << std::endl; } } - - for (int p=0; p owned(I, I, I); - - ippl::e_dim_tag decomp[dim]; // Specifies SERIAL, PARALLEL dims - for (unsigned int d=0; d layout(owned,decomp); + ippl::FieldLayout layout(owned, decomp); //[-1, 1] box - double dx = 2.0 / double(pt); - ippl::Vector hx = {dx, dx, dx}; + double dx = 2.0 / double(pt); + ippl::Vector hx = {dx, dx, dx}; ippl::Vector origin = {-1.0, -1.0, -1.0}; ippl::UniformCartesian mesh(owned, hx, origin); @@ -53,12 +50,12 @@ int main(int argc, char *argv[]) { ippl::ParameterList params; params.add("output_type", Solver_t::SOL); - params.add("use_heffte_defaults", false); - params.add("use_pencils", true); - //params.add("use_reorder", false); - params.add("use_gpu_aware", true); - params.add("comm", ippl::a2av); - params.add("r2c_direction", 0); + params.add("use_heffte_defaults", false); + params.add("use_pencils", true); + // params.add("use_reorder", false); + params.add("use_gpu_aware", true); + params.add("comm", ippl::a2av); + params.add("r2c_direction", 0); Solver_t FFTsolver; @@ -66,63 +63,65 @@ int main(int argc, char *argv[]) { FFTsolver.setRhs(field); - const ippl::NDIndex& lDom = layout.getLocalNDIndex(); - const int nghost = field.getNghost(); - using mdrange_type = Kokkos::MDRangePolicy>; + const ippl::NDIndex& lDom = layout.getLocalNDIndex(); + const int nghost = field.getNghost(); + using mdrange_type = Kokkos::MDRangePolicy>; typename Field_t::view_type view = field.getView(); switch (params.template get("output_type")) { case Solver_t::SOL: { - Field_t phifield_exact(mesh, layout); auto view_exact = phifield_exact.getView(); - Kokkos::parallel_for("Assign rhs", - mdrange_type({nghost, nghost, nghost}, - {view.extent(0) - nghost, - view.extent(1) - nghost, - view.extent(2) - nghost}), - KOKKOS_LAMBDA(const int i, - const int j, - const int k) - { - //local to global index conversion - const size_t ig = i + lDom[0].first() - nghost; - const size_t jg = j + lDom[1].first() - nghost; - const size_t kg = k + lDom[2].first() - nghost; - double x = origin[0] + (ig + 0.5) * hx[0]; - double y = origin[1] + (jg + 0.5) * hx[1]; - double z = origin[2] + (kg + 0.5) * hx[2]; - - //view(i, j, k) = 3.0 * pow(pi, 2) * sin(pi * x) * sin(pi * y) * sin(pi * z); - view(i, j, k) = pow(pi, 2) * (cos(sin(pi * z)) * sin(pi * z) * sin(sin(pi * x)) * sin(sin(pi * y)) - + (cos(sin(pi * y)) * sin(pi * y) * sin(sin(pi * x)) + (cos(sin(pi * x)) * sin(pi * x) - + (pow(cos(pi * x), 2) + pow(cos(pi * y), 2) +pow(cos(pi * z), 2)) * sin(sin(pi * x))) - * sin(sin(pi * y))) * sin(sin(pi * z))); - - //view_exact(i, j, k) = sin(pi * x) * sin(pi * y) * sin(pi * z); - view_exact(i, j, k) = sin(sin(pi * x)) * sin(sin(pi * y)) * sin(sin(pi * z)); - }); + Kokkos::parallel_for( + "Assign rhs", + mdrange_type({nghost, nghost, nghost}, + {view.extent(0) - nghost, view.extent(1) - nghost, + view.extent(2) - nghost}), + KOKKOS_LAMBDA(const int i, const int j, const int k) { + // local to global index conversion + const size_t ig = i + lDom[0].first() - nghost; + const size_t jg = j + lDom[1].first() - nghost; + const size_t kg = k + lDom[2].first() - nghost; + double x = origin[0] + (ig + 0.5) * hx[0]; + double y = origin[1] + (jg + 0.5) * hx[1]; + double z = origin[2] + (kg + 0.5) * hx[2]; + + // view(i, j, k) = 3.0 * pow(pi, 2) * sin(pi * x) * sin(pi * y) * sin(pi * + // z); + view(i, j, k) = + pow(pi, 2) + * (cos(sin(pi * z)) * sin(pi * z) * sin(sin(pi * x)) * sin(sin(pi * y)) + + (cos(sin(pi * y)) * sin(pi * y) * sin(sin(pi * x)) + + (cos(sin(pi * x)) * sin(pi * x) + + (pow(cos(pi * x), 2) + pow(cos(pi * y), 2) + + pow(cos(pi * z), 2)) + * sin(sin(pi * x))) + * sin(sin(pi * y))) + * sin(sin(pi * z))); + + // view_exact(i, j, k) = sin(pi * x) * sin(pi * y) * sin(pi * z); + view_exact(i, j, k) = + sin(sin(pi * x)) * sin(sin(pi * y)) * sin(sin(pi * z)); + }); FFTsolver.solve(); - //Compute the relative error norm - field = field - phifield_exact; - field = pow(field,2); - phifield_exact = pow(phifield_exact,2); - double error1 = sqrt(field.sum()); - double error2 = sqrt(phifield_exact.sum()); - double error_norm2 = error1/error2; - - if(Ippl::Comm->rank() == 0) { - std::cout << "L2 relative error norm: " - << error_norm2 << std::endl; + // Compute the relative error norm + field = field - phifield_exact; + field = pow(field, 2); + phifield_exact = pow(phifield_exact, 2); + double error1 = sqrt(field.sum()); + double error2 = sqrt(phifield_exact.sum()); + double error_norm2 = error1 / error2; + + if (Ippl::Comm->rank() == 0) { + std::cout << "L2 relative error norm: " << error_norm2 << std::endl; } break; - } + } case Solver_t::GRAD: { - VField_t Efield, Efield_exact; Efield.initialize(mesh, layout); @@ -131,90 +130,97 @@ int main(int argc, char *argv[]) { auto Eview_exact = Efield_exact.getView(); - Kokkos::parallel_for("Assign rhs", - mdrange_type({nghost, nghost, nghost}, - {view.extent(0) - nghost, - view.extent(1) - nghost, - view.extent(2) - nghost}), - KOKKOS_LAMBDA(const int i, - const int j, - const int k) - { - //local to global index conversion - const size_t ig = i + lDom[0].first() - nghost; - const size_t jg = j + lDom[1].first() - nghost; - const size_t kg = k + lDom[2].first() - nghost; - double x = origin[0] + (ig + 0.5) * hx[0]; - double y = origin[1] + (jg + 0.5) * hx[1]; - double z = origin[2] + (kg + 0.5) * hx[2]; - - view(i, j, k) = pow(pi, 2) * (cos(sin(pi * z)) * sin(pi * z) * sin(sin(pi * x)) * sin(sin(pi * y)) - + (cos(sin(pi * y)) * sin(pi * y) * sin(sin(pi * x)) + (cos(sin(pi * x)) * sin(pi * x) - + (pow(cos(pi * x), 2) + pow(cos(pi * y), 2) +pow(cos(pi * z), 2)) * sin(sin(pi * x))) - * sin(sin(pi * y))) * sin(sin(pi * z))); - - Eview_exact(i, j, k)[0] = -pi * cos(pi * x) * cos(sin(pi * x)) * sin(sin(pi * y)) * sin(sin(pi * z)); - Eview_exact(i, j, k)[1] = -pi * cos(pi * y) * cos(sin(pi * y)) * sin(sin(pi * x)) * sin(sin(pi * z)); - Eview_exact(i, j, k)[2] = -pi * cos(pi * z) * cos(sin(pi * z)) * sin(sin(pi * x)) * sin(sin(pi * y)); - }); + Kokkos::parallel_for( + "Assign rhs", + mdrange_type({nghost, nghost, nghost}, + {view.extent(0) - nghost, view.extent(1) - nghost, + view.extent(2) - nghost}), + KOKKOS_LAMBDA(const int i, const int j, const int k) { + // local to global index conversion + const size_t ig = i + lDom[0].first() - nghost; + const size_t jg = j + lDom[1].first() - nghost; + const size_t kg = k + lDom[2].first() - nghost; + double x = origin[0] + (ig + 0.5) * hx[0]; + double y = origin[1] + (jg + 0.5) * hx[1]; + double z = origin[2] + (kg + 0.5) * hx[2]; + + view(i, j, k) = + pow(pi, 2) + * (cos(sin(pi * z)) * sin(pi * z) * sin(sin(pi * x)) * sin(sin(pi * y)) + + (cos(sin(pi * y)) * sin(pi * y) * sin(sin(pi * x)) + + (cos(sin(pi * x)) * sin(pi * x) + + (pow(cos(pi * x), 2) + pow(cos(pi * y), 2) + + pow(cos(pi * z), 2)) + * sin(sin(pi * x))) + * sin(sin(pi * y))) + * sin(sin(pi * z))); + + Eview_exact(i, j, k)[0] = -pi * cos(pi * x) * cos(sin(pi * x)) + * sin(sin(pi * y)) * sin(sin(pi * z)); + Eview_exact(i, j, k)[1] = -pi * cos(pi * y) * cos(sin(pi * y)) + * sin(sin(pi * x)) * sin(sin(pi * z)); + Eview_exact(i, j, k)[2] = -pi * cos(pi * z) * cos(sin(pi * z)) + * sin(sin(pi * x)) * sin(sin(pi * y)); + }); FFTsolver.setLhs(Efield); FFTsolver.solve(); ippl::Vector errorNr, errorDr, error_norm2; - Efield = Efield - Efield_exact; + Efield = Efield - Efield_exact; auto Eview = Efield.getView(); - //We don't have a vector reduce yet.. - for (size_t d=0; d>({nghost, nghost, nghost}, - {view.extent(0) - nghost, - view.extent(1) - nghost, - view.extent(2) - nghost}), - KOKKOS_LAMBDA(const size_t i, const size_t j, - const size_t k, double& valL) { - double myVal = pow(Eview(i, j, k)[d], 2); - valL += myVal; - }, Kokkos::Sum(temp)); + Kokkos::parallel_reduce( + "Vector errorNr reduce", + Kokkos::MDRangePolicy>( + {nghost, nghost, nghost}, + {view.extent(0) - nghost, view.extent(1) - nghost, + view.extent(2) - nghost}), + KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k, + double& valL) { + double myVal = pow(Eview(i, j, k)[d], 2); + valL += myVal; + }, + Kokkos::Sum(temp)); double globaltemp = 0.0; MPI_Allreduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); errorNr[d] = sqrt(globaltemp); temp = 0.0; - Kokkos::parallel_reduce("Vector errorDr reduce", - Kokkos::MDRangePolicy>({nghost, nghost, nghost}, - {view.extent(0) - nghost, - view.extent(1) - nghost, - view.extent(2) - nghost}), - KOKKOS_LAMBDA(const size_t i, const size_t j, - const size_t k, double& valL) { - double myVal = pow(Eview_exact(i, j, k)[d], 2); - valL += myVal; - }, Kokkos::Sum(temp)); + Kokkos::parallel_reduce( + "Vector errorDr reduce", + Kokkos::MDRangePolicy>( + {nghost, nghost, nghost}, + {view.extent(0) - nghost, view.extent(1) - nghost, + view.extent(2) - nghost}), + KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k, + double& valL) { + double myVal = pow(Eview_exact(i, j, k)[d], 2); + valL += myVal; + }, + Kokkos::Sum(temp)); globaltemp = 0.0; MPI_Allreduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); errorDr[d] = sqrt(globaltemp); - error_norm2[d] = errorNr[d]/errorDr[d]; + error_norm2[d] = errorNr[d] / errorDr[d]; } - if(Ippl::Comm->rank() == 0) { - for (size_t d=0; drank() == 0) { + for (size_t d = 0; d < dim; ++d) { + std::cout << "L2 relative error norm Efield[" << d + << "]: " << error_norm2[d] << std::endl; } } break; - } + } default: - std::cout << "Unrecognized option" << std::endl; + std::cout << "Unrecognized option" << std::endl; } } diff --git a/src/Solver/test/TestGaussian.cpp b/src/Solver/test/TestGaussian.cpp index dbc525789..9fd0b5deb 100644 --- a/src/Solver/test/TestGaussian.cpp +++ b/src/Solver/test/TestGaussian.cpp @@ -5,51 +5,48 @@ // Possible algorithms: "HOCKNEY" or "VICO". // Possible heffte parameters: // - "pencils" or "slabs" (reshape) -// - "a2a", "a2av", "p2p", "p2p_pl" (communication) +// - "a2a", "a2av", "p2p", "p2p_pl" (communication) // - "reorder" or "no-reorder" (reordering) // See heffte documentation for more information on these parameters: // https://mkstoyanov.bitbucket.io/heffte/ +#include +#include "FFTPoissonSolver.h" #include "Ippl.h" #include "Utility/IpplException.h" #include "Utility/IpplTimings.h" -#include "FFTPoissonSolver.h" -#include -KOKKOS_INLINE_FUNCTION -double gaussian(double x, double y, double z, double sigma = 0.05, double mu = 0.5) { +KOKKOS_INLINE_FUNCTION double gaussian(double x, double y, double z, double sigma = 0.05, + double mu = 0.5) { + double pi = std::acos(-1.0); + double prefactor = (1 / std::sqrt(2 * 2 * 2 * pi * pi * pi)) * (1 / (sigma * sigma * sigma)); + double r2 = (x - mu) * (x - mu) + (y - mu) * (y - mu) + (z - mu) * (z - mu); - double pi = std::acos(-1.0); - double prefactor = (1/std::sqrt(2*2*2*pi*pi*pi))*(1/(sigma*sigma*sigma)); - double r2 = (x-mu)*(x-mu) + (y-mu)*(y-mu) + (z-mu)*(z-mu); - - return -prefactor * exp(-r2/(2*sigma*sigma)); + return -prefactor * exp(-r2 / (2 * sigma * sigma)); } -KOKKOS_INLINE_FUNCTION -double exact_fct(double x, double y, double z, double sigma = 0.05, double mu = 0.5) { - +KOKKOS_INLINE_FUNCTION double exact_fct(double x, double y, double z, double sigma = 0.05, + double mu = 0.5) { double pi = std::acos(-1.0); - double r = std::sqrt((x-mu)*(x-mu) + (y-mu)*(y-mu) + (z-mu)*(z-mu)); + double r = std::sqrt((x - mu) * (x - mu) + (y - mu) * (y - mu) + (z - mu) * (z - mu)); - return (1/(4.0*pi*r)) * std::erf(r/(std::sqrt(2.0)*sigma)); + return (1 / (4.0 * pi * r)) * std::erf(r / (std::sqrt(2.0) * sigma)); } -KOKKOS_INLINE_FUNCTION -ippl::Vector exact_E(double x, double y, double z, double sigma = 0.05, double mu = 0.5) { - - double pi = std::acos(-1.0); - double r = std::sqrt((x-mu)*(x-mu) + (y-mu)*(y-mu) + (z-mu)*(z-mu)); - double factor = (1.0/(4.0*pi*r*r)) * ((1.0/r)*std::erf(r/(std::sqrt(2.0)*sigma)) - - std::sqrt(2.0/pi)*(1.0/sigma)*exp(-r*r/(2*sigma*sigma))); +KOKKOS_INLINE_FUNCTION ippl::Vector exact_E(double x, double y, double z, + double sigma = 0.05, double mu = 0.5) { + double pi = std::acos(-1.0); + double r = std::sqrt((x - mu) * (x - mu) + (y - mu) * (y - mu) + (z - mu) * (z - mu)); + double factor = (1.0 / (4.0 * pi * r * r)) + * ((1.0 / r) * std::erf(r / (std::sqrt(2.0) * sigma)) + - std::sqrt(2.0 / pi) * (1.0 / sigma) * exp(-r * r / (2 * sigma * sigma))); - ippl::Vector Efield = {(x-mu), (y-mu), (z-mu)}; + ippl::Vector Efield = {(x - mu), (y - mu), (z - mu)}; return factor * Efield; } -int main(int argc, char *argv[]) { - - Ippl ippl(argc,argv); +int main(int argc, char* argv[]) { + Ippl ippl(argc, argv); Inform msg(argv[0]); Inform msg2all(argv[0], INFORM_ALL_NODES); @@ -61,28 +58,24 @@ int main(int argc, char *argv[]) { IpplTimings::startTimer(allTimer); // get the gridsize from the user - ippl::Vector nr = { - std::atoi(argv[1]), - std::atoi(argv[2]), - std::atoi(argv[3]) - }; + ippl::Vector nr = {std::atoi(argv[1]), std::atoi(argv[2]), std::atoi(argv[3])}; // get heffte parameters from the user - std::string reshape = argv[4]; // slabs or pencils - std::string communication = argv[5]; // a2a or p2p - std::string reordering = argv[6]; // reorder or no-reorder + std::string reshape = argv[4]; // slabs or pencils + std::string communication = argv[5]; // a2a or p2p + std::string reordering = argv[6]; // reorder or no-reorder // get the algorithm to be used - std::string algorithm = argv[7]; // Hockney or Vico + std::string algorithm = argv[7]; // Hockney or Vico // print out info and title for the relative error (L2 norm) msg << "Test Gaussian, grid = " << nr << ", heffte params: " << reshape << " " << communication << " " << reordering << ", algorithm = " << algorithm << endl; msg << "Spacing Error ErrorEx ErrorEy ErrorEz" << endl; - + // domain ippl::NDIndex owned; - for (unsigned i = 0; i< Dim; i++) { + for (unsigned i = 0; i < Dim; i++) { owned[i] = ippl::Index(nr[i]); } @@ -90,19 +83,19 @@ int main(int argc, char *argv[]) { ippl::e_dim_tag decomp[Dim]; for (unsigned int d = 0; d < Dim; d++) { decomp[d] = ippl::PARALLEL; - } + } // unit box - double dx = 1.0/nr[0]; - double dy = 1.0/nr[1]; - double dz = 1.0/nr[2]; - ippl::Vector hr = {dx, dy, dz}; + double dx = 1.0 / nr[0]; + double dy = 1.0 / nr[1]; + double dz = 1.0 / nr[2]; + ippl::Vector hr = {dx, dy, dz}; ippl::Vector origin = {0.0, 0.0, 0.0}; ippl::UniformCartesian mesh(owned, hr, origin); // all parallel layout, standard domain, normal axis order ippl::FieldLayout layout(owned, decomp); - + // define the R (rho) field typedef ippl::Field field; field exact, rho; @@ -114,7 +107,7 @@ int main(int argc, char *argv[]) { fieldV exactE, fieldE; exactE.initialize(mesh, layout); fieldE.initialize(mesh, layout); - + field Ex, Ey, Ez; Ex.initialize(mesh, layout); Ey.initialize(mesh, layout); @@ -122,236 +115,244 @@ int main(int argc, char *argv[]) { // assign the rho field with a gaussian typename field::view_type view_rho = rho.getView(); - const int nghost = rho.getNghost(); - const auto& ldom = layout.getLocalNDIndex(); - - Kokkos::parallel_for("Assign rho field", - Kokkos::MDRangePolicy>({nghost, nghost, nghost}, - {view_rho.extent(0) - nghost, - view_rho.extent(1) - nghost, - view_rho.extent(2) - nghost}), - KOKKOS_LAMBDA(const int i, const int j, const int k){ - // go from local to global indices - const int ig = i + ldom[0].first() - nghost; - const int jg = j + ldom[1].first() - nghost; - const int kg = k + ldom[2].first() - nghost; - - // define the physical points (cell-centered) - double x = (ig + 0.5) * hr[0] + origin[0]; - double y = (jg + 0.5) * hr[1] + origin[1]; - double z = (kg + 0.5) * hr[2] + origin[2]; - - view_rho(i, j, k) = gaussian(x, y, z); - - }); + const int nghost = rho.getNghost(); + const auto& ldom = layout.getLocalNDIndex(); + + Kokkos::parallel_for( + "Assign rho field", + Kokkos::MDRangePolicy>( + {nghost, nghost, nghost}, {view_rho.extent(0) - nghost, view_rho.extent(1) - nghost, + view_rho.extent(2) - nghost}), + KOKKOS_LAMBDA(const int i, const int j, const int k) { + // go from local to global indices + const int ig = i + ldom[0].first() - nghost; + const int jg = j + ldom[1].first() - nghost; + const int kg = k + ldom[2].first() - nghost; + + // define the physical points (cell-centered) + double x = (ig + 0.5) * hr[0] + origin[0]; + double y = (jg + 0.5) * hr[1] + origin[1]; + double z = (kg + 0.5) * hr[2] + origin[2]; + + view_rho(i, j, k) = gaussian(x, y, z); + }); // assign the exact field with its values (erf function) typename field::view_type view_exact = exact.getView(); - - Kokkos::parallel_for("Assign exact field", - Kokkos::MDRangePolicy>({nghost, nghost, nghost}, - {view_exact.extent(0) - nghost, - view_exact.extent(1) - nghost, - view_exact.extent(2) - nghost}), - KOKKOS_LAMBDA(const int i, const int j, const int k){ - const int ig = i + ldom[0].first() - nghost; - const int jg = j + ldom[1].first() - nghost; - const int kg = k + ldom[2].first() - nghost; - - double x = (ig + 0.5) * hr[0] + origin[0]; - double y = (jg + 0.5) * hr[1] + origin[1]; - double z = (kg + 0.5) * hr[2] + origin[2]; - - view_exact(i, j, k) = exact_fct(x,y,z); - }); - + + Kokkos::parallel_for( + "Assign exact field", + Kokkos::MDRangePolicy>( + {nghost, nghost, nghost}, {view_exact.extent(0) - nghost, view_exact.extent(1) - nghost, + view_exact.extent(2) - nghost}), + KOKKOS_LAMBDA(const int i, const int j, const int k) { + const int ig = i + ldom[0].first() - nghost; + const int jg = j + ldom[1].first() - nghost; + const int kg = k + ldom[2].first() - nghost; + + double x = (ig + 0.5) * hr[0] + origin[0]; + double y = (jg + 0.5) * hr[1] + origin[1]; + double z = (kg + 0.5) * hr[2] + origin[2]; + + view_exact(i, j, k) = exact_fct(x, y, z); + }); + // assign the exact E field auto view_exactE = exactE.getView(); - - Kokkos::parallel_for("Assign exact E-field", - Kokkos::MDRangePolicy>({nghost, nghost, nghost}, - {view_exactE.extent(0) - nghost, - view_exactE.extent(1) - nghost, - view_exactE.extent(2) - nghost}), - KOKKOS_LAMBDA(const int i, const int j, const int k){ - const int ig = i + ldom[0].first() - nghost; - const int jg = j + ldom[1].first() - nghost; - const int kg = k + ldom[2].first() - nghost; - - double x = (ig + 0.5) * hr[0] + origin[0]; - double y = (jg + 0.5) * hr[1] + origin[1]; - double z = (kg + 0.5) * hr[2] + origin[2]; - - view_exactE(i, j, k)[0] = exact_E(x,y,z)[0]; - view_exactE(i, j, k)[1] = exact_E(x,y,z)[1]; - view_exactE(i, j, k)[2] = exact_E(x,y,z)[2]; - }); - + + Kokkos::parallel_for( + "Assign exact E-field", + Kokkos::MDRangePolicy>( + {nghost, nghost, nghost}, + {view_exactE.extent(0) - nghost, view_exactE.extent(1) - nghost, + view_exactE.extent(2) - nghost}), + KOKKOS_LAMBDA(const int i, const int j, const int k) { + const int ig = i + ldom[0].first() - nghost; + const int jg = j + ldom[1].first() - nghost; + const int kg = k + ldom[2].first() - nghost; + + double x = (ig + 0.5) * hr[0] + origin[0]; + double y = (jg + 0.5) * hr[1] + origin[1]; + double z = (kg + 0.5) * hr[2] + origin[2]; + + view_exactE(i, j, k)[0] = exact_E(x, y, z)[0]; + view_exactE(i, j, k)[1] = exact_E(x, y, z)[1]; + view_exactE(i, j, k)[2] = exact_E(x, y, z)[2]; + }); + // set the FFT parameters ippl::ParameterList fftParams; if (reshape == "pencils") { - fftParams.add("use_pencils", true); + fftParams.add("use_pencils", true); } else if (reshape == "slabs") { - fftParams.add("use_pencils", false); + fftParams.add("use_pencils", false); } else { - throw IpplException("TestGaussian.cpp main()", "Unrecognized heffte parameter"); + throw IpplException("TestGaussian.cpp main()", "Unrecognized heffte parameter"); } - + if (communication == "a2a") { - fftParams.add("comm", ippl::a2a); + fftParams.add("comm", ippl::a2a); } else if (communication == "a2av") { - fftParams.add("comm", ippl::a2av); + fftParams.add("comm", ippl::a2av); } else if (communication == "p2p") { - fftParams.add("comm", ippl::p2p); + fftParams.add("comm", ippl::p2p); } else if (communication == "p2p_pl") { - fftParams.add("comm", ippl::p2p_pl); + fftParams.add("comm", ippl::p2p_pl); } else { - throw IpplException("TestGaussian.cpp main()", "Unrecognized heffte parameter"); + throw IpplException("TestGaussian.cpp main()", "Unrecognized heffte parameter"); } - + if (reordering == "reorder") { fftParams.add("use_reorder", true); } else if (reordering == "no-reorder") { fftParams.add("use_reorder", false); } else { - throw IpplException("TestGaussian.cpp main()", "Unrecognized heffte parameter"); + throw IpplException("TestGaussian.cpp main()", "Unrecognized heffte parameter"); } fftParams.add("use_heffte_defaults", false); - fftParams.add("use_gpu_aware", true); - fftParams.add("r2c_direction", 0); + fftParams.add("use_gpu_aware", true); + fftParams.add("r2c_direction", 0); // define an FFTPoissonSolver object - ippl::FFTPoissonSolver, double, Dim> FFTsolver(fieldE, rho, fftParams, algorithm); - + ippl::FFTPoissonSolver, double, Dim> FFTsolver(fieldE, rho, fftParams, + algorithm); + // iterate over 5 timesteps for (int times = 0; times < 5; ++times) { - // solve the Poisson equation -> rho contains the solution (phi) now FFTsolver.solve(); - const int nghostE = fieldE.getNghost(); - auto Eview = fieldE.getView(); + const int nghostE = fieldE.getNghost(); + auto Eview = fieldE.getView(); using mdrange_type = Kokkos::MDRangePolicy>; auto viewEx = Ex.getView(); auto viewEy = Ey.getView(); auto viewEz = Ez.getView(); - Kokkos::parallel_for("Vector E reduce", - mdrange_type({nghostE, nghostE, nghostE}, - {Eview.extent(0) - nghostE, - Eview.extent(1) - nghostE, - Eview.extent(2) - nghostE}), - KOKKOS_LAMBDA(const size_t i, const size_t j, - const size_t k) { - viewEx(i,j,k) = Eview(i, j, k)[0]; - viewEy(i,j,k) = Eview(i, j, k)[1]; - viewEz(i,j,k) = Eview(i, j, k)[2]; - }); + Kokkos::parallel_for( + "Vector E reduce", + mdrange_type( + {nghostE, nghostE, nghostE}, + {Eview.extent(0) - nghostE, Eview.extent(1) - nghostE, Eview.extent(2) - nghostE}), + KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k) { + viewEx(i, j, k) = Eview(i, j, k)[0]; + viewEy(i, j, k) = Eview(i, j, k)[1]; + viewEz(i, j, k) = Eview(i, j, k)[2]; + }); // compute relative error norm for potential - rho = rho - exact; - double err = norm(rho)/norm(exact); - + rho = rho - exact; + double err = norm(rho) / norm(exact); + // compute relative error norm for the E-field components - ippl::Vector errE {0.0, 0.0, 0.0}; - fieldE = fieldE - exactE; + ippl::Vector errE{0.0, 0.0, 0.0}; + fieldE = fieldE - exactE; auto view_fieldE = fieldE.getView(); for (size_t d = 0; d < Dim; ++d) { - double temp = 0.0; - Kokkos::parallel_reduce("Vector errorNr reduce", - Kokkos::MDRangePolicy>({nghost, nghost, nghost}, {view_fieldE.extent(0) - - nghost, view_fieldE.extent(1) - nghost, view_fieldE.extent(2) - nghost}), - KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k, double& valL) { - double myVal = pow(view_fieldE(i, j, k)[d], 2); - valL += myVal; - }, Kokkos::Sum(temp)); - - double globaltemp = 0.0; - MPI_Allreduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); - double errorNr = std::sqrt(globaltemp); - - temp = 0.0; - Kokkos::parallel_reduce("Vector errorDr reduce", - Kokkos::MDRangePolicy>({nghost, nghost, nghost}, {view_exactE.extent(0) - - nghost, view_exactE.extent(1) - nghost, view_exactE.extent(2) - nghost}), - KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k, double& valL) { - double myVal = pow(view_exactE(i, j, k)[d], 2); - valL += myVal; - }, Kokkos::Sum(temp)); - - globaltemp = 0.0; + double temp = 0.0; + Kokkos::parallel_reduce( + "Vector errorNr reduce", + Kokkos::MDRangePolicy>( + {nghost, nghost, nghost}, + {view_fieldE.extent(0) - nghost, view_fieldE.extent(1) - nghost, + view_fieldE.extent(2) - nghost}), + KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k, double& valL) { + double myVal = pow(view_fieldE(i, j, k)[d], 2); + valL += myVal; + }, + Kokkos::Sum(temp)); + + double globaltemp = 0.0; + MPI_Allreduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); + double errorNr = std::sqrt(globaltemp); + + temp = 0.0; + Kokkos::parallel_reduce( + "Vector errorDr reduce", + Kokkos::MDRangePolicy>( + {nghost, nghost, nghost}, + {view_exactE.extent(0) - nghost, view_exactE.extent(1) - nghost, + view_exactE.extent(2) - nghost}), + KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k, double& valL) { + double myVal = pow(view_exactE(i, j, k)[d], 2); + valL += myVal; + }, + Kokkos::Sum(temp)); + + globaltemp = 0.0; MPI_Allreduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); double errorDr = std::sqrt(globaltemp); - errE[d] = errorNr/errorDr; + errE[d] = errorNr / errorDr; } - - msg << std::setprecision(16) << dx << " " << err - << " " << errE[0] << " " << errE[1] << " " << errE[2] << endl; - - // reassign the correct values to the fields for the loop to work - Kokkos::parallel_for("Assign rho field", - Kokkos::MDRangePolicy>({nghost, nghost, nghost}, - {view_rho.extent(0) - nghost, - view_rho.extent(1) - nghost, - view_rho.extent(2) - nghost}), - KOKKOS_LAMBDA(const int i, const int j, const int k){ - // go from local to global indices - const int ig = i + ldom[0].first() - nghost; - const int jg = j + ldom[1].first() - nghost; - const int kg = k + ldom[2].first() - nghost; - - // define the physical points (cell-centered) - double x = (ig + 0.5) * hr[0] + origin[0]; - double y = (jg + 0.5) * hr[1] + origin[1]; - double z = (kg + 0.5) * hr[2] + origin[2]; - - view_rho(i, j, k) = gaussian(x, y, z); - }); - - Kokkos::parallel_for("Assign exact field", - Kokkos::MDRangePolicy>({nghost, nghost, nghost}, - {view_exact.extent(0) - nghost, - view_exact.extent(1) - nghost, - view_exact.extent(2) - nghost}), - KOKKOS_LAMBDA(const int i, const int j, const int k){ - const int ig = i + ldom[0].first() - nghost; - const int jg = j + ldom[1].first() - nghost; - const int kg = k + ldom[2].first() - nghost; - - double x = (ig + 0.5) * hr[0] + origin[0]; - double y = (jg + 0.5) * hr[1] + origin[1]; - double z = (kg + 0.5) * hr[2] + origin[2]; - - view_exact(i, j, k) = exact_fct(x,y,z); - }); - - Kokkos::parallel_for("Assign exact E-field", - Kokkos::MDRangePolicy>({nghost, nghost, nghost}, - {view_exactE.extent(0) - nghost, - view_exactE.extent(1) - nghost, - view_exactE.extent(2) - nghost}), - KOKKOS_LAMBDA(const int i, const int j, const int k){ - const int ig = i + ldom[0].first() - nghost; - const int jg = j + ldom[1].first() - nghost; - const int kg = k + ldom[2].first() - nghost; - - double x = (ig + 0.5) * hr[0] + origin[0]; - double y = (jg + 0.5) * hr[1] + origin[1]; - double z = (kg + 0.5) * hr[2] + origin[2]; - - view_exactE(i, j, k)[0] = exact_E(x,y,z)[0]; - view_exactE(i, j, k)[1] = exact_E(x,y,z)[1]; - view_exactE(i, j, k)[2] = exact_E(x,y,z)[2]; - }); + msg << std::setprecision(16) << dx << " " << err << " " << errE[0] << " " << errE[1] << " " + << errE[2] << endl; + + // reassign the correct values to the fields for the loop to work + Kokkos::parallel_for( + "Assign rho field", + Kokkos::MDRangePolicy>( + {nghost, nghost, nghost}, {view_rho.extent(0) - nghost, view_rho.extent(1) - nghost, + view_rho.extent(2) - nghost}), + KOKKOS_LAMBDA(const int i, const int j, const int k) { + // go from local to global indices + const int ig = i + ldom[0].first() - nghost; + const int jg = j + ldom[1].first() - nghost; + const int kg = k + ldom[2].first() - nghost; + + // define the physical points (cell-centered) + double x = (ig + 0.5) * hr[0] + origin[0]; + double y = (jg + 0.5) * hr[1] + origin[1]; + double z = (kg + 0.5) * hr[2] + origin[2]; + + view_rho(i, j, k) = gaussian(x, y, z); + }); + + Kokkos::parallel_for( + "Assign exact field", + Kokkos::MDRangePolicy>( + {nghost, nghost, nghost}, + {view_exact.extent(0) - nghost, view_exact.extent(1) - nghost, + view_exact.extent(2) - nghost}), + KOKKOS_LAMBDA(const int i, const int j, const int k) { + const int ig = i + ldom[0].first() - nghost; + const int jg = j + ldom[1].first() - nghost; + const int kg = k + ldom[2].first() - nghost; + + double x = (ig + 0.5) * hr[0] + origin[0]; + double y = (jg + 0.5) * hr[1] + origin[1]; + double z = (kg + 0.5) * hr[2] + origin[2]; + + view_exact(i, j, k) = exact_fct(x, y, z); + }); + + Kokkos::parallel_for( + "Assign exact E-field", + Kokkos::MDRangePolicy>( + {nghost, nghost, nghost}, + {view_exactE.extent(0) - nghost, view_exactE.extent(1) - nghost, + view_exactE.extent(2) - nghost}), + KOKKOS_LAMBDA(const int i, const int j, const int k) { + const int ig = i + ldom[0].first() - nghost; + const int jg = j + ldom[1].first() - nghost; + const int kg = k + ldom[2].first() - nghost; + + double x = (ig + 0.5) * hr[0] + origin[0]; + double y = (jg + 0.5) * hr[1] + origin[1]; + double z = (kg + 0.5) * hr[2] + origin[2]; + + view_exactE(i, j, k)[0] = exact_E(x, y, z)[0]; + view_exactE(i, j, k)[1] = exact_E(x, y, z)[1]; + view_exactE(i, j, k)[2] = exact_E(x, y, z)[2]; + }); } // stop the timers IpplTimings::stopTimer(allTimer); IpplTimings::print(std::string("timing.dat")); - + return 0; } diff --git a/src/Solver/test/TestGaussian_biharmonic.cpp b/src/Solver/test/TestGaussian_biharmonic.cpp index 14c68b68b..7c60a1ee8 100644 --- a/src/Solver/test/TestGaussian_biharmonic.cpp +++ b/src/Solver/test/TestGaussian_biharmonic.cpp @@ -1,52 +1,51 @@ // This program tests the FFTPoissonSolver class with a Gaussian source. // Different problem sizes are used for the purpose of convergence tests. // The algorithm used is chosen by the user: -// srun ./TestGaussian_convergence HOCKNEY --info 10 +// srun ./TestGaussian_convergence HOCKNEY --info 10 // OR srun ./TestGaussian_convergence VICO --info 10 +#include "FFTPoissonSolver.h" #include "Ippl.h" #include "Utility/IpplTimings.h" -#include "FFTPoissonSolver.h" - -KOKKOS_INLINE_FUNCTION -double gaussian(double x, double y, double z, double sigma = 0.05, double mu = 0.5) { - double pi = std::acos(-1.0); - double prefactor = (1/std::sqrt(2*2*2*pi*pi*pi))*(1/(sigma*sigma*sigma)); - double r2 = (x-mu)*(x-mu) + (y-mu)*(y-mu) + (z-mu)*(z-mu); +KOKKOS_INLINE_FUNCTION double gaussian(double x, double y, double z, double sigma = 0.05, + double mu = 0.5) { + double pi = std::acos(-1.0); + double prefactor = (1 / std::sqrt(2 * 2 * 2 * pi * pi * pi)) * (1 / (sigma * sigma * sigma)); + double r2 = (x - mu) * (x - mu) + (y - mu) * (y - mu) + (z - mu) * (z - mu); - return -prefactor * exp(-r2/(2*sigma*sigma)); + return -prefactor * exp(-r2 / (2 * sigma * sigma)); } -KOKKOS_INLINE_FUNCTION -double exact_fct(double x, double y, double z, double sigma = 0.05, double mu = 0.5) { - +KOKKOS_INLINE_FUNCTION double exact_fct(double x, double y, double z, double sigma = 0.05, + double mu = 0.5) { double pi = std::acos(-1.0); - double r = std::sqrt((x-mu)*(x-mu) + (y-mu)*(y-mu) + (z-mu)*(z-mu)); - double r2 = (x-mu)*(x-mu) + (y-mu)*(y-mu) + (z-mu)*(z-mu); + double r = std::sqrt((x - mu) * (x - mu) + (y - mu) * (y - mu) + (z - mu) * (z - mu)); + double r2 = (x - mu) * (x - mu) + (y - mu) * (y - mu) + (z - mu) * (z - mu); - return (1/(8.0*pi)) * (sigma*std::sqrt(2.0/pi)*exp(-r2/(2*sigma*sigma)) - + std::erf(r/(std::sqrt(2.0)*sigma))*(r + (sigma*sigma/r))); + return (1 / (8.0 * pi)) + * (sigma * std::sqrt(2.0 / pi) * exp(-r2 / (2 * sigma * sigma)) + + std::erf(r / (std::sqrt(2.0) * sigma)) * (r + (sigma * sigma / r))); } -KOKKOS_INLINE_FUNCTION -ippl::Vector exact_grad(double x, double y, double z, double sigma = 0.05, double mu = 0.5) { - +KOKKOS_INLINE_FUNCTION ippl::Vector exact_grad(double x, double y, double z, + double sigma = 0.05, double mu = 0.5) { double pi = std::acos(-1.0); - double r = std::sqrt((x-mu)*(x-mu) + (y-mu)*(y-mu) + (z-mu)*(z-mu)); - double r2 = (x-mu)*(x-mu) + (y-mu)*(y-mu) + (z-mu)*(z-mu); - - ippl::Vector Efield = {(x-mu), (y-mu), (z-mu)}; - double factor = -(1.0/r) * (1/(8.0*pi)) * ((sigma/r)*std::sqrt(2.0/pi)*exp(-r2/(2*sigma*sigma)) - + std::erf(r/(std::sqrt(2.0)*sigma))*(1.0 - (sigma*sigma/(r*r)))); + double r = std::sqrt((x - mu) * (x - mu) + (y - mu) * (y - mu) + (z - mu) * (z - mu)); + double r2 = (x - mu) * (x - mu) + (y - mu) * (y - mu) + (z - mu) * (z - mu); + + ippl::Vector Efield = {(x - mu), (y - mu), (z - mu)}; + double factor = + -(1.0 / r) * (1 / (8.0 * pi)) + * ((sigma / r) * std::sqrt(2.0 / pi) * exp(-r2 / (2 * sigma * sigma)) + + std::erf(r / (std::sqrt(2.0) * sigma)) * (1.0 - (sigma * sigma / (r * r)))); return factor * Efield; } // Define vtk dump function for plotting the fields -void dumpVTK(std::string path, ippl::Field& rho, int nx, int ny, int nz, int iteration, +void dumpVTK(std::string path, ippl::Field& rho, int nx, int ny, int nz, int iteration, double dx, double dy, double dz) { - - typename ippl::Field::view_type::host_mirror_type host_view = rho.getHostMirror(); + typename ippl::Field::view_type::host_mirror_type host_view = rho.getHostMirror(); Kokkos::deep_copy(host_view, rho.getView()); std::ofstream vtkout; vtkout.precision(10); @@ -61,39 +60,37 @@ void dumpVTK(std::string path, ippl::Field& rho, int nx, int ny, int n // open a new data file for this iteration // and start with header vtkout.open(fname.str().c_str(), std::ios::out); - if (!vtkout) - { - std::cout <<"couldn't open" << std::endl; + if (!vtkout) { + std::cout << "couldn't open" << std::endl; } vtkout << "# vtk DataFile Version 2.0" << std::endl; vtkout << "GaussianSource" << std::endl; vtkout << "ASCII" << std::endl; vtkout << "DATASET STRUCTURED_POINTS" << std::endl; - vtkout << "DIMENSIONS " << nx+1 << " " << ny+1 << " " << nz+1 << std::endl; + vtkout << "DIMENSIONS " << nx + 1 << " " << ny + 1 << " " << nz + 1 << std::endl; vtkout << "ORIGIN " << 0.0 << " " << 0.0 << " " << 0.0 << std::endl; vtkout << "SPACING " << dx << " " << dy << " " << dz << std::endl; - vtkout << "CELL_DATA " << (nx)*(ny)*(nz) << std::endl; - + vtkout << "CELL_DATA " << (nx) * (ny) * (nz) << std::endl; + vtkout << "SCALARS Phi float" << std::endl; vtkout << "LOOKUP_TABLE default" << std::endl; - for (int z=1; z N = {4,8,16,32,64,128}; + // number of gridpoints to iterate over + std::array N = {4, 8, 16, 32, 64, 128}; msg << "Spacing Error" << endl; for (int p = 0; p < n; ++p) { - // domain int pt = N[p]; ippl::Index I(pt); @@ -121,19 +117,19 @@ int main(int argc, char *argv[]) { decomp[d] = ippl::PARALLEL; // unit box - double dx = 1.0/pt; - ippl::Vector hx = {dx, dx, dx}; + double dx = 1.0 / pt; + ippl::Vector hx = {dx, dx, dx}; ippl::Vector origin = {0.0, 0.0, 0.0}; ippl::UniformCartesian mesh(owned, hx, origin); // all parallel layout, standard domain, normal axis order ippl::FieldLayout<3> layout(owned, decomp); - + // define the R (rho) field typedef ippl::Field field; field rho; rho.initialize(mesh, layout); - + // define the exact solution field field exact; exact.initialize(mesh, layout); @@ -146,105 +142,112 @@ int main(int argc, char *argv[]) { // assign the rho field with a gaussian typename field::view_type view_rho = rho.getView(); - const int nghost = rho.getNghost(); - const auto& ldom = layout.getLocalNDIndex(); - - Kokkos::parallel_for("Assign rho field", - Kokkos::MDRangePolicy>({nghost, nghost, nghost}, - {view_rho.extent(0) - nghost, - view_rho.extent(1) - nghost, - view_rho.extent(2) - nghost}), - KOKKOS_LAMBDA(const int i, const int j, const int k){ - // go from local to global indices - const int ig = i + ldom[0].first() - nghost; - const int jg = j + ldom[1].first() - nghost; - const int kg = k + ldom[2].first() - nghost; - - // define the physical points (cell-centered) - double x = (ig + 0.5) * hx[0] + origin[0]; - double y = (jg + 0.5) * hx[1] + origin[1]; - double z = (kg + 0.5) * hx[2] + origin[2]; - - view_rho(i, j, k) = gaussian(x, y, z); - }); + const int nghost = rho.getNghost(); + const auto& ldom = layout.getLocalNDIndex(); + + Kokkos::parallel_for( + "Assign rho field", + Kokkos::MDRangePolicy>( + {nghost, nghost, nghost}, {view_rho.extent(0) - nghost, view_rho.extent(1) - nghost, + view_rho.extent(2) - nghost}), + KOKKOS_LAMBDA(const int i, const int j, const int k) { + // go from local to global indices + const int ig = i + ldom[0].first() - nghost; + const int jg = j + ldom[1].first() - nghost; + const int kg = k + ldom[2].first() - nghost; + + // define the physical points (cell-centered) + double x = (ig + 0.5) * hx[0] + origin[0]; + double y = (jg + 0.5) * hx[1] + origin[1]; + double z = (kg + 0.5) * hx[2] + origin[2]; + + view_rho(i, j, k) = gaussian(x, y, z); + }); // assign the exact field with its values (erf function) typename field::view_type view_exact = exact.getView(); - - Kokkos::parallel_for("Assign exact field", - Kokkos::MDRangePolicy>({nghost, nghost, nghost}, - {view_exact.extent(0) - nghost, - view_exact.extent(1) - nghost, - view_exact.extent(2) - nghost}), - KOKKOS_LAMBDA(const int i, const int j, const int k){ - const int ig = i + ldom[0].first() - nghost; - const int jg = j + ldom[1].first() - nghost; - const int kg = k + ldom[2].first() - nghost; - - double x = (ig + 0.5) * hx[0] + origin[0]; - double y = (jg + 0.5) * hx[1] + origin[1]; - double z = (kg + 0.5) * hx[2] + origin[2]; - - view_exact(i, j, k) = exact_fct(x,y,z); - }); + + Kokkos::parallel_for( + "Assign exact field", + Kokkos::MDRangePolicy>( + {nghost, nghost, nghost}, + {view_exact.extent(0) - nghost, view_exact.extent(1) - nghost, + view_exact.extent(2) - nghost}), + KOKKOS_LAMBDA(const int i, const int j, const int k) { + const int ig = i + ldom[0].first() - nghost; + const int jg = j + ldom[1].first() - nghost; + const int kg = k + ldom[2].first() - nghost; + + double x = (ig + 0.5) * hx[0] + origin[0]; + double y = (jg + 0.5) * hx[1] + origin[1]; + double z = (kg + 0.5) * hx[2] + origin[2]; + + view_exact(i, j, k) = exact_fct(x, y, z); + }); // assign the exact gradient field auto view_grad = exactE.getView(); - Kokkos::parallel_for("Assign exact field", - Kokkos::MDRangePolicy>({nghost, nghost, nghost}, - {view_grad.extent(0) - nghost, - view_grad.extent(1) - nghost, - view_grad.extent(2) - nghost}), - KOKKOS_LAMBDA(const int i, const int j, const int k){ - const int ig = i + ldom[0].first() - nghost; - const int jg = j + ldom[1].first() - nghost; - const int kg = k + ldom[2].first() - nghost; - - double x = (ig + 0.5) * hx[0] + origin[0]; - double y = (jg + 0.5) * hx[1] + origin[1]; - double z = (kg + 0.5) * hx[2] + origin[2]; - - view_grad(i, j, k)[0] = exact_grad(x,y,z)[0]; - view_grad(i, j, k)[1] = exact_grad(x,y,z)[1]; - view_grad(i, j, k)[2] = exact_grad(x,y,z)[2]; - }); + Kokkos::parallel_for( + "Assign exact field", + Kokkos::MDRangePolicy>( + {nghost, nghost, nghost}, + {view_grad.extent(0) - nghost, view_grad.extent(1) - nghost, + view_grad.extent(2) - nghost}), + KOKKOS_LAMBDA(const int i, const int j, const int k) { + const int ig = i + ldom[0].first() - nghost; + const int jg = j + ldom[1].first() - nghost; + const int kg = k + ldom[2].first() - nghost; + + double x = (ig + 0.5) * hx[0] + origin[0]; + double y = (jg + 0.5) * hx[1] + origin[1]; + double z = (kg + 0.5) * hx[2] + origin[2]; + + view_grad(i, j, k)[0] = exact_grad(x, y, z)[0]; + view_grad(i, j, k)[1] = exact_grad(x, y, z)[1]; + view_grad(i, j, k)[2] = exact_grad(x, y, z)[2]; + }); Kokkos::fence(); - // set the FFT parameters + // set the FFT parameters ippl::ParameterList fftParams; - fftParams.add("use_heffte_defaults", false); - fftParams.add("use_pencils", true); - fftParams.add("use_gpu_aware", true); - fftParams.add("comm", ippl::a2av); - fftParams.add("r2c_direction", 0); + fftParams.add("use_heffte_defaults", false); + fftParams.add("use_pencils", true); + fftParams.add("use_gpu_aware", true); + fftParams.add("comm", ippl::a2av); + fftParams.add("r2c_direction", 0); // define an FFTPoissonSolver object - ippl::FFTPoissonSolver, double, 3> FFTsolver(fieldE, rho, fftParams, algorithm); - + ippl::FFTPoissonSolver, double, 3> FFTsolver(fieldE, rho, fftParams, + algorithm); + // solve the Poisson equation -> rho contains the solution (phi) now FFTsolver.solve(); // compute relative error norm for potential - rho = rho - exact; - double err = norm(rho)/norm(exact); - + rho = rho - exact; + double err = norm(rho) / norm(exact); + // compute relative error norm for the E-field components - ippl::Vector errE {0.0, 0.0, 0.0}; - fieldE = fieldE - exactE; + ippl::Vector errE{0.0, 0.0, 0.0}; + fieldE = fieldE - exactE; auto view_fieldE = fieldE.getView(); for (size_t d = 0; d < 3; ++d) { double temp = 0.0; - Kokkos::parallel_reduce("Vector errorNr reduce", - Kokkos::MDRangePolicy>({nghost, nghost, nghost}, - {view_fieldE.extent(0)-nghost, view_fieldE.extent(1)-nghost, view_fieldE.extent(2)-nghost}), - + Kokkos::parallel_reduce( + "Vector errorNr reduce", + Kokkos::MDRangePolicy>( + {nghost, nghost, nghost}, + {view_fieldE.extent(0) - nghost, view_fieldE.extent(1) - nghost, + view_fieldE.extent(2) - nghost}), + KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k, double& valL) { - double myVal = pow(view_fieldE(i,j,k)[d], 2); + double myVal = pow(view_fieldE(i, j, k)[d], 2); valL += myVal; - }, Kokkos::Sum(temp)); + }, + Kokkos::Sum(temp)); double globaltemp = 0.0; MPI_Allreduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); @@ -252,30 +255,33 @@ int main(int argc, char *argv[]) { temp = 0.0; - Kokkos::parallel_reduce("Vector errorDr reduce", - Kokkos::MDRangePolicy>({nghost, nghost, nghost}, - {view_grad.extent(0)-nghost, view_grad.extent(1)-nghost, view_grad.extent(2)-nghost}), - + Kokkos::parallel_reduce( + "Vector errorDr reduce", + Kokkos::MDRangePolicy>( + {nghost, nghost, nghost}, + {view_grad.extent(0) - nghost, view_grad.extent(1) - nghost, + view_grad.extent(2) - nghost}), + KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k, double& valL) { - double myVal = pow(view_grad(i,j,k)[d], 2); + double myVal = pow(view_grad(i, j, k)[d], 2); valL += myVal; - }, Kokkos::Sum(temp)); + }, + Kokkos::Sum(temp)); globaltemp = 0.0; MPI_Allreduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); double errorDr = std::sqrt(globaltemp); - errE[d] = errorNr/errorDr; + errE[d] = errorNr / errorDr; } - msg << std::setprecision(16) << dx << " " << err - << " " << errE[0] << " " << errE[1] << " " << errE[2] << endl; - + msg << std::setprecision(16) << dx << " " << err << " " << errE[0] << " " << errE[1] << " " + << errE[2] << endl; } - - // stop the timer + + // stop the timer IpplTimings::stopTimer(allTimer); IpplTimings::print(std::string("timing.dat")); - + return 0; } diff --git a/src/Solver/test/TestGaussian_convergence.cpp b/src/Solver/test/TestGaussian_convergence.cpp index 8a38d622b..a0e0b9761 100644 --- a/src/Solver/test/TestGaussian_convergence.cpp +++ b/src/Solver/test/TestGaussian_convergence.cpp @@ -1,49 +1,46 @@ // This program tests the FFTPoissonSolver class with a Gaussian source. // Different problem sizes are used for the purpose of convergence tests. // The algorithm used is chosen by the user: -// srun ./TestGaussian_convergence HOCKNEY --info 10 +// srun ./TestGaussian_convergence HOCKNEY --info 10 // OR srun ./TestGaussian_convergence VICO --info 10 +#include "FFTPoissonSolver.h" #include "Ippl.h" #include "Utility/IpplTimings.h" -#include "FFTPoissonSolver.h" -KOKKOS_INLINE_FUNCTION -double gaussian(double x, double y, double z, double sigma = 0.05, double mu = 0.5) { - - double pi = std::acos(-1.0); - double prefactor = (1/std::sqrt(2*2*2*pi*pi*pi))*(1/(sigma*sigma*sigma)); - double r2 = (x-mu)*(x-mu) + (y-mu)*(y-mu) + (z-mu)*(z-mu); +KOKKOS_INLINE_FUNCTION double gaussian(double x, double y, double z, double sigma = 0.05, + double mu = 0.5) { + double pi = std::acos(-1.0); + double prefactor = (1 / std::sqrt(2 * 2 * 2 * pi * pi * pi)) * (1 / (sigma * sigma * sigma)); + double r2 = (x - mu) * (x - mu) + (y - mu) * (y - mu) + (z - mu) * (z - mu); - return -prefactor * exp(-r2/(2*sigma*sigma)); + return -prefactor * exp(-r2 / (2 * sigma * sigma)); } -KOKKOS_INLINE_FUNCTION -double exact_fct(double x, double y, double z, double sigma = 0.05, double mu = 0.5) { - +KOKKOS_INLINE_FUNCTION double exact_fct(double x, double y, double z, double sigma = 0.05, + double mu = 0.5) { double pi = std::acos(-1.0); - double r = std::sqrt((x-mu)*(x-mu) + (y-mu)*(y-mu) + (z-mu)*(z-mu)); + double r = std::sqrt((x - mu) * (x - mu) + (y - mu) * (y - mu) + (z - mu) * (z - mu)); - return (1/(4.0*pi*r)) * std::erf(r/(std::sqrt(2.0)*sigma)); + return (1 / (4.0 * pi * r)) * std::erf(r / (std::sqrt(2.0) * sigma)); } -KOKKOS_INLINE_FUNCTION -ippl::Vector exact_E(double x, double y, double z, double sigma = 0.05, double mu = 0.5) { - - double pi = std::acos(-1.0); - double r = std::sqrt((x-mu)*(x-mu) + (y-mu)*(y-mu) + (z-mu)*(z-mu)); - double factor = (1.0/(4.0*pi*r*r)) * ((1.0/r)*std::erf(r/(std::sqrt(2.0)*sigma)) - - std::sqrt(2.0/pi)*(1.0/sigma)*exp(-r*r/(2*sigma*sigma))); +KOKKOS_INLINE_FUNCTION ippl::Vector exact_E(double x, double y, double z, + double sigma = 0.05, double mu = 0.5) { + double pi = std::acos(-1.0); + double r = std::sqrt((x - mu) * (x - mu) + (y - mu) * (y - mu) + (z - mu) * (z - mu)); + double factor = (1.0 / (4.0 * pi * r * r)) + * ((1.0 / r) * std::erf(r / (std::sqrt(2.0) * sigma)) + - std::sqrt(2.0 / pi) * (1.0 / sigma) * exp(-r * r / (2 * sigma * sigma))); - ippl::Vector Efield = {(x-mu), (y-mu), (z-mu)}; + ippl::Vector Efield = {(x - mu), (y - mu), (z - mu)}; return factor * Efield; } // Define vtk dump function for plotting the fields -void dumpVTK(std::string path, ippl::Field& rho, int nx, int ny, int nz, int iteration, +void dumpVTK(std::string path, ippl::Field& rho, int nx, int ny, int nz, int iteration, double dx, double dy, double dz) { - - typename ippl::Field::view_type::host_mirror_type host_view = rho.getHostMirror(); + typename ippl::Field::view_type::host_mirror_type host_view = rho.getHostMirror(); Kokkos::deep_copy(host_view, rho.getView()); std::ofstream vtkout; vtkout.precision(10); @@ -58,39 +55,37 @@ void dumpVTK(std::string path, ippl::Field& rho, int nx, int ny, int n // open a new data file for this iteration // and start with header vtkout.open(fname.str().c_str(), std::ios::out); - if (!vtkout) - { - std::cout <<"couldn't open" << std::endl; + if (!vtkout) { + std::cout << "couldn't open" << std::endl; } vtkout << "# vtk DataFile Version 2.0" << std::endl; vtkout << "GaussianSource" << std::endl; vtkout << "ASCII" << std::endl; vtkout << "DATASET STRUCTURED_POINTS" << std::endl; - vtkout << "DIMENSIONS " << nx+1 << " " << ny+1 << " " << nz+1 << std::endl; + vtkout << "DIMENSIONS " << nx + 1 << " " << ny + 1 << " " << nz + 1 << std::endl; vtkout << "ORIGIN " << 0.0 << " " << 0.0 << " " << 0.0 << std::endl; vtkout << "SPACING " << dx << " " << dy << " " << dz << std::endl; - vtkout << "CELL_DATA " << (nx)*(ny)*(nz) << std::endl; - + vtkout << "CELL_DATA " << (nx) * (ny) * (nz) << std::endl; + vtkout << "SCALARS Phi float" << std::endl; vtkout << "LOOKUP_TABLE default" << std::endl; - for (int z=1; z N = {4,8,16,32,64,128}; + // number of gridpoints to iterate over + std::array N = {4, 8, 16, 32, 64, 128}; msg << "Spacing Error ErrorEx ErrorEy ErrorEz" << endl; for (int p = 0; p < n; ++p) { - // domain - int pt = N[p]; - ippl::Index I(pt); - ippl::NDIndex<3> owned(I, I, I); - - // specifies decomposition; here all dimensions are parallel - ippl::e_dim_tag decomp[3]; - for (unsigned int d = 0; d < 3; d++) - decomp[d] = ippl::PARALLEL; - - // unit box - double dx = 1.0/pt; - ippl::Vector hx = {dx, dx, dx}; - ippl::Vector origin = {0.0, 0.0, 0.0}; - ippl::UniformCartesian mesh(owned, hx, origin); - - // all parallel layout, standard domain, normal axis order - ippl::FieldLayout<3> layout(owned, decomp); - - // define the R (rho) field - typedef ippl::Field field; - field rho; - rho.initialize(mesh, layout); + int pt = N[p]; + ippl::Index I(pt); + ippl::NDIndex<3> owned(I, I, I); + + // specifies decomposition; here all dimensions are parallel + ippl::e_dim_tag decomp[3]; + for (unsigned int d = 0; d < 3; d++) + decomp[d] = ippl::PARALLEL; + + // unit box + double dx = 1.0 / pt; + ippl::Vector hx = {dx, dx, dx}; + ippl::Vector origin = {0.0, 0.0, 0.0}; + ippl::UniformCartesian mesh(owned, hx, origin); + + // all parallel layout, standard domain, normal axis order + ippl::FieldLayout<3> layout(owned, decomp); + + // define the R (rho) field + typedef ippl::Field field; + field rho; + rho.initialize(mesh, layout); // define the exact solution field field exact; @@ -137,140 +131,148 @@ int main(int argc, char *argv[]) { // define the Vector field E and the exact E field typedef ippl::Field, 3> fieldV; - + fieldV exactE, fieldE; exactE.initialize(mesh, layout); fieldE.initialize(mesh, layout); - - // assign the rho field with a gaussian - typename field::view_type view_rho = rho.getView(); - const int nghost = rho.getNghost(); - const auto& ldom = layout.getLocalNDIndex(); - - Kokkos::parallel_for("Assign rho field", - Kokkos::MDRangePolicy>({nghost, nghost, nghost}, - {view_rho.extent(0) - nghost, - view_rho.extent(1) - nghost, - view_rho.extent(2) - nghost}), - KOKKOS_LAMBDA(const int i, const int j, const int k){ - // go from local to global indices - const int ig = i + ldom[0].first() - nghost; - const int jg = j + ldom[1].first() - nghost; - const int kg = k + ldom[2].first() - nghost; - - // define the physical points (cell-centered) - double x = (ig + 0.5) * hx[0] + origin[0]; - double y = (jg + 0.5) * hx[1] + origin[1]; - double z = (kg + 0.5) * hx[2] + origin[2]; - - view_rho(i, j, k) = gaussian(x, y, z); - - }); + + // assign the rho field with a gaussian + typename field::view_type view_rho = rho.getView(); + const int nghost = rho.getNghost(); + const auto& ldom = layout.getLocalNDIndex(); + + Kokkos::parallel_for( + "Assign rho field", + Kokkos::MDRangePolicy>( + {nghost, nghost, nghost}, {view_rho.extent(0) - nghost, view_rho.extent(1) - nghost, + view_rho.extent(2) - nghost}), + KOKKOS_LAMBDA(const int i, const int j, const int k) { + // go from local to global indices + const int ig = i + ldom[0].first() - nghost; + const int jg = j + ldom[1].first() - nghost; + const int kg = k + ldom[2].first() - nghost; + + // define the physical points (cell-centered) + double x = (ig + 0.5) * hx[0] + origin[0]; + double y = (jg + 0.5) * hx[1] + origin[1]; + double z = (kg + 0.5) * hx[2] + origin[2]; + + view_rho(i, j, k) = gaussian(x, y, z); + }); // assign the exact field with its values (erf function) typename field::view_type view_exact = exact.getView(); - - Kokkos::parallel_for("Assign exact field", - Kokkos::MDRangePolicy>({nghost, nghost, nghost}, - {view_exact.extent(0) - nghost, - view_exact.extent(1) - nghost, - view_exact.extent(2) - nghost}), - KOKKOS_LAMBDA(const int i, const int j, const int k){ - const int ig = i + ldom[0].first() - nghost; - const int jg = j + ldom[1].first() - nghost; - const int kg = k + ldom[2].first() - nghost; - - double x = (ig + 0.5) * hx[0] + origin[0]; - double y = (jg + 0.5) * hx[1] + origin[1]; - double z = (kg + 0.5) * hx[2] + origin[2]; - - view_exact(i, j, k) = exact_fct(x,y,z); - }); - + + Kokkos::parallel_for( + "Assign exact field", + Kokkos::MDRangePolicy>( + {nghost, nghost, nghost}, + {view_exact.extent(0) - nghost, view_exact.extent(1) - nghost, + view_exact.extent(2) - nghost}), + KOKKOS_LAMBDA(const int i, const int j, const int k) { + const int ig = i + ldom[0].first() - nghost; + const int jg = j + ldom[1].first() - nghost; + const int kg = k + ldom[2].first() - nghost; + + double x = (ig + 0.5) * hx[0] + origin[0]; + double y = (jg + 0.5) * hx[1] + origin[1]; + double z = (kg + 0.5) * hx[2] + origin[2]; + + view_exact(i, j, k) = exact_fct(x, y, z); + }); + // assign the exact E field auto view_exactE = exactE.getView(); - - Kokkos::parallel_for("Assign exact E-field", - Kokkos::MDRangePolicy>({nghost, nghost, nghost}, - {view_exactE.extent(0) - nghost, - view_exactE.extent(1) - nghost, - view_exactE.extent(2) - nghost}), - KOKKOS_LAMBDA(const int i, const int j, const int k){ - const int ig = i + ldom[0].first() - nghost; - const int jg = j + ldom[1].first() - nghost; - const int kg = k + ldom[2].first() - nghost; - - double x = (ig + 0.5) * hx[0] + origin[0]; - double y = (jg + 0.5) * hx[1] + origin[1]; - double z = (kg + 0.5) * hx[2] + origin[2]; - - view_exactE(i, j, k)[0] = exact_E(x,y,z)[0]; - view_exactE(i, j, k)[1] = exact_E(x,y,z)[1]; - view_exactE(i, j, k)[2] = exact_E(x,y,z)[2]; - }); - - - // set the FFT parameters - - ippl::ParameterList fftParams; - fftParams.add("use_heffte_defaults", false); - fftParams.add("use_pencils", true); - fftParams.add("use_gpu_aware", true); - fftParams.add("comm", ippl::a2av); - fftParams.add("r2c_direction", 0); - // define an FFTPoissonSolver object - ippl::FFTPoissonSolver, double, 3> FFTsolver(fieldE, rho, fftParams, algorithm); - - // solve the Poisson equation -> rho contains the solution (phi) now - FFTsolver.solve(); + + Kokkos::parallel_for( + "Assign exact E-field", + Kokkos::MDRangePolicy>( + {nghost, nghost, nghost}, + {view_exactE.extent(0) - nghost, view_exactE.extent(1) - nghost, + view_exactE.extent(2) - nghost}), + KOKKOS_LAMBDA(const int i, const int j, const int k) { + const int ig = i + ldom[0].first() - nghost; + const int jg = j + ldom[1].first() - nghost; + const int kg = k + ldom[2].first() - nghost; + + double x = (ig + 0.5) * hx[0] + origin[0]; + double y = (jg + 0.5) * hx[1] + origin[1]; + double z = (kg + 0.5) * hx[2] + origin[2]; + + view_exactE(i, j, k)[0] = exact_E(x, y, z)[0]; + view_exactE(i, j, k)[1] = exact_E(x, y, z)[1]; + view_exactE(i, j, k)[2] = exact_E(x, y, z)[2]; + }); + + // set the FFT parameters + + ippl::ParameterList fftParams; + fftParams.add("use_heffte_defaults", false); + fftParams.add("use_pencils", true); + fftParams.add("use_gpu_aware", true); + fftParams.add("comm", ippl::a2av); + fftParams.add("r2c_direction", 0); + // define an FFTPoissonSolver object + ippl::FFTPoissonSolver, double, 3> FFTsolver(fieldE, rho, fftParams, + algorithm); + + // solve the Poisson equation -> rho contains the solution (phi) now + FFTsolver.solve(); // compute relative error norm for potential - rho = rho - exact; - double err = norm(rho)/norm(exact); - + rho = rho - exact; + double err = norm(rho) / norm(exact); + // compute relative error norm for the E-field components - ippl::Vector errE {0.0, 0.0, 0.0}; - fieldE = fieldE - exactE; + ippl::Vector errE{0.0, 0.0, 0.0}; + fieldE = fieldE - exactE; auto view_fieldE = fieldE.getView(); - for (size_t d=0; d<3; ++d) { - - double temp = 0.0; - Kokkos::parallel_reduce("Vector errorNr reduce", - Kokkos::MDRangePolicy>({nghost, nghost, nghost}, {view_fieldE.extent(0) - - nghost, view_fieldE.extent(1) - nghost, view_fieldE.extent(2) - nghost}), - KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k, double& valL) { - double myVal = pow(view_fieldE(i, j, k)[d], 2); - valL += myVal; - }, Kokkos::Sum(temp)); - - double globaltemp = 0.0; - MPI_Allreduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); - double errorNr = std::sqrt(globaltemp); - - temp = 0.0; - Kokkos::parallel_reduce("Vector errorDr reduce", - Kokkos::MDRangePolicy>({nghost, nghost, nghost}, {view_exactE.extent(0) - - nghost, view_exactE.extent(1) - nghost, view_exactE.extent(2) - nghost}), - KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k, double& valL) { - double myVal = pow(view_exactE(i, j, k)[d], 2); - valL += myVal; - }, Kokkos::Sum(temp)); - - globaltemp = 0.0; + for (size_t d = 0; d < 3; ++d) { + double temp = 0.0; + Kokkos::parallel_reduce( + "Vector errorNr reduce", + Kokkos::MDRangePolicy>( + {nghost, nghost, nghost}, + {view_fieldE.extent(0) - nghost, view_fieldE.extent(1) - nghost, + view_fieldE.extent(2) - nghost}), + KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k, double& valL) { + double myVal = pow(view_fieldE(i, j, k)[d], 2); + valL += myVal; + }, + Kokkos::Sum(temp)); + + double globaltemp = 0.0; + MPI_Allreduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); + double errorNr = std::sqrt(globaltemp); + + temp = 0.0; + Kokkos::parallel_reduce( + "Vector errorDr reduce", + Kokkos::MDRangePolicy>( + {nghost, nghost, nghost}, + {view_exactE.extent(0) - nghost, view_exactE.extent(1) - nghost, + view_exactE.extent(2) - nghost}), + KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k, double& valL) { + double myVal = pow(view_exactE(i, j, k)[d], 2); + valL += myVal; + }, + Kokkos::Sum(temp)); + + globaltemp = 0.0; MPI_Allreduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); double errorDr = std::sqrt(globaltemp); - errE[d] = errorNr/errorDr; - } - - msg << std::setprecision(16) << dx << " " << err << " " - << errE[0] << " " << errE[1] << " " << errE[2] << endl; + errE[d] = errorNr / errorDr; + } + + msg << std::setprecision(16) << dx << " " << err << " " << errE[0] << " " << errE[1] << " " + << errE[2] << endl; } - - // stop the timer + + // stop the timer IpplTimings::stopTimer(allTimer); IpplTimings::print(std::string("timing.dat")); - + return 0; } diff --git a/src/Solver/test/TestSolverDesign.cpp b/src/Solver/test/TestSolverDesign.cpp index 64d136d15..b4448cc96 100644 --- a/src/Solver/test/TestSolverDesign.cpp +++ b/src/Solver/test/TestSolverDesign.cpp @@ -2,17 +2,15 @@ #include "Ippl.h" #include -#include #include +#include #include "Electrostatics.h" constexpr unsigned int dim = 3; -class TestSolver : public ippl::Electrostatics -{ +class TestSolver : public ippl::Electrostatics { public: - void solve() override { *rhs_mp = *lhs_mp + *rhs_mp; @@ -22,25 +20,23 @@ class TestSolver : public ippl::Electrostatics } }; -int main(int argc, char *argv[]) { - - Ippl ippl(argc,argv); - +int main(int argc, char* argv[]) { + Ippl ippl(argc, argv); int pt = 4; ippl::Index I(pt); ippl::NDIndex owned(I, I, I); - ippl::e_dim_tag allParallel[dim]; // Specifies SERIAL, PARALLEL dims - for (unsigned int d=0; d layout(owned,allParallel); + ippl::FieldLayout layout(owned, allParallel); - //Unit box - double dx = 1.0 / double(pt); - ippl::Vector hx = {dx, dx, dx}; + // Unit box + double dx = 1.0 / double(pt); + ippl::Vector hx = {dx, dx, dx}; ippl::Vector origin = {0, 0, 0}; ippl::UniformCartesian mesh(owned, hx, origin); diff --git a/src/Solver/test/TestSphere.cpp b/src/Solver/test/TestSphere.cpp index add9ab34e..c7326ccde 100644 --- a/src/Solver/test/TestSphere.cpp +++ b/src/Solver/test/TestSphere.cpp @@ -4,39 +4,36 @@ // The algorithm can be chosen by the user ("HOCKNEY" or "VICO"). Example: // srun ./TestSphere HOCKNEY --info 10 -#include "Ippl.h" #include "FFTPoissonSolver.h" +#include "Ippl.h" -KOKKOS_INLINE_FUNCTION -double source(double x, double y, double z, double density = 1.0, double R = 1.0, double mu = 1.2) { - +KOKKOS_INLINE_FUNCTION double source(double x, double y, double z, double density = 1.0, + double R = 1.0, double mu = 1.2) { double pi = std::acos(-1.0); - double G = 6.674e-11; + double G = 6.674e-11; - double r = std::sqrt((x-mu)*(x-mu) + (y-mu)*(y-mu) + (z-mu)*(z-mu)); + double r = std::sqrt((x - mu) * (x - mu) + (y - mu) * (y - mu) + (z - mu) * (z - mu)); bool checkInside = (r <= R); return double(checkInside) * 4.0 * pi * G * density; } -KOKKOS_INLINE_FUNCTION -double exact_fct(double x, double y, double z, double density = 1.0, double R = 1.0, double mu = 1.2) { - +KOKKOS_INLINE_FUNCTION double exact_fct(double x, double y, double z, double density = 1.0, + double R = 1.0, double mu = 1.2) { double pi = std::acos(-1.0); - double G = 6.674e-11; + double G = 6.674e-11; - double r = std::sqrt((x-mu)*(x-mu) + (y-mu)*(y-mu) + (z-mu)*(z-mu)); + double r = std::sqrt((x - mu) * (x - mu) + (y - mu) * (y - mu) + (z - mu) * (z - mu)); bool checkInside = (r <= R); - return -(double(checkInside) * (2.0/3.0) * pi * G * density * (3*R*R - r*r)) - -((1.0 - double(checkInside)) * (4.0/3.0) * pi * G * density * R * R * R / r); + return -(double(checkInside) * (2.0 / 3.0) * pi * G * density * (3 * R * R - r * r)) + - ((1.0 - double(checkInside)) * (4.0 / 3.0) * pi * G * density * R * R * R / r); } -int main(int argc, char *argv[]) { - - Ippl ippl(argc,argv); +int main(int argc, char* argv[]) { + Ippl ippl(argc, argv); - std::string algorithm = argv[1]; + std::string algorithm = argv[1]; // number of interations const int n = 4; @@ -47,96 +44,96 @@ int main(int argc, char *argv[]) { std::cout << "Spacing Error" << std::endl; for (int p = 0; p < n; ++p) { - // domain - int pt = N[p]; - ippl::Index I(pt); - ippl::NDIndex<3> owned(I, I, I); - - // specifies decomposition; here all dimensions are parallel - ippl::e_dim_tag decomp[3]; - for (unsigned int d = 0; d < 3; d++) - decomp[d] = ippl::PARALLEL; - - // unit box - double dx = 2.4/pt; - ippl::Vector hx = {dx, dx, dx}; - ippl::Vector origin = {0.0, 0.0, 0.0}; - ippl::UniformCartesian mesh(owned, hx, origin); - - // all parallel layout, standard domain, normal axis order - ippl::FieldLayout<3> layout(owned, decomp); - - // define the L (phi) and R (rho) fields - typedef ippl::Field field; - field rho; - rho.initialize(mesh, layout); + int pt = N[p]; + ippl::Index I(pt); + ippl::NDIndex<3> owned(I, I, I); + + // specifies decomposition; here all dimensions are parallel + ippl::e_dim_tag decomp[3]; + for (unsigned int d = 0; d < 3; d++) + decomp[d] = ippl::PARALLEL; + + // unit box + double dx = 2.4 / pt; + ippl::Vector hx = {dx, dx, dx}; + ippl::Vector origin = {0.0, 0.0, 0.0}; + ippl::UniformCartesian mesh(owned, hx, origin); + + // all parallel layout, standard domain, normal axis order + ippl::FieldLayout<3> layout(owned, decomp); + + // define the L (phi) and R (rho) fields + typedef ippl::Field field; + field rho; + rho.initialize(mesh, layout); // define the exact solution field field exact; exact.initialize(mesh, layout); - - // assign the rho field with its value - typename field::view_type view_rho = rho.getView(); - const int nghost = rho.getNghost(); - const auto& ldom = layout.getLocalNDIndex(); - - Kokkos::parallel_for("Assign rho field", - Kokkos::MDRangePolicy>({nghost, nghost, nghost}, - {view_rho.extent(0) - nghost, - view_rho.extent(1) - nghost, - view_rho.extent(2) - nghost}), - KOKKOS_LAMBDA(const int i, const int j, const int k){ - // go from local to global indices - const int ig = i + ldom[0].first() - nghost; - const int jg = j + ldom[1].first() - nghost; - const int kg = k + ldom[2].first() - nghost; - - // define the physical points (cell-centered) - double x = (ig + 0.5) * hx[0] + origin[0]; - double y = (jg + 0.5) * hx[1] + origin[1]; - double z = (kg + 0.5) * hx[2] + origin[2]; - - view_rho(i, j, k) = source(x, y, z); - - }); + + // assign the rho field with its value + typename field::view_type view_rho = rho.getView(); + const int nghost = rho.getNghost(); + const auto& ldom = layout.getLocalNDIndex(); + + Kokkos::parallel_for( + "Assign rho field", + Kokkos::MDRangePolicy>( + {nghost, nghost, nghost}, {view_rho.extent(0) - nghost, view_rho.extent(1) - nghost, + view_rho.extent(2) - nghost}), + KOKKOS_LAMBDA(const int i, const int j, const int k) { + // go from local to global indices + const int ig = i + ldom[0].first() - nghost; + const int jg = j + ldom[1].first() - nghost; + const int kg = k + ldom[2].first() - nghost; + + // define the physical points (cell-centered) + double x = (ig + 0.5) * hx[0] + origin[0]; + double y = (jg + 0.5) * hx[1] + origin[1]; + double z = (kg + 0.5) * hx[2] + origin[2]; + + view_rho(i, j, k) = source(x, y, z); + }); // assign the exact field with its values typename field::view_type view_exact = exact.getView(); - - Kokkos::parallel_for("Assign exact field", - Kokkos::MDRangePolicy>({nghost, nghost, nghost}, - {view_exact.extent(0) - nghost, - view_exact.extent(1) - nghost, - view_exact.extent(2) - nghost}), - KOKKOS_LAMBDA(const int i, const int j, const int k){ - const int ig = i + ldom[0].first() - nghost; - const int jg = j + ldom[1].first() - nghost; - const int kg = k + ldom[2].first() - nghost; - - double x = (ig + 0.5) * hx[0] + origin[0]; - double y = (jg + 0.5) * hx[1] + origin[1]; - double z = (kg + 0.5) * hx[2] + origin[2]; - - view_exact(i, j, k) = exact_fct(x,y,z); - }); - - // set FFT parameters - ippl::ParameterList fftParams; - fftParams.add("use_heffte_defaults", false); - fftParams.add("use_pencils", true); - fftParams.add("use_gpu_aware", true); - fftParams.add("comm", ippl::a2av); - fftParams.add("r2c_direction", 0); - - ippl::FFTPoissonSolver, double, 3> FFTsolver(rho, fftParams, algorithm); - - // solve the Poisson equation -> rho contains the solution (phi) now - FFTsolver.solve(); - - // compute the relative error norm - rho = rho - exact; - double err = norm(rho)/norm(exact); + + Kokkos::parallel_for( + "Assign exact field", + Kokkos::MDRangePolicy>( + {nghost, nghost, nghost}, + {view_exact.extent(0) - nghost, view_exact.extent(1) - nghost, + view_exact.extent(2) - nghost}), + KOKKOS_LAMBDA(const int i, const int j, const int k) { + const int ig = i + ldom[0].first() - nghost; + const int jg = j + ldom[1].first() - nghost; + const int kg = k + ldom[2].first() - nghost; + + double x = (ig + 0.5) * hx[0] + origin[0]; + double y = (jg + 0.5) * hx[1] + origin[1]; + double z = (kg + 0.5) * hx[2] + origin[2]; + + view_exact(i, j, k) = exact_fct(x, y, z); + }); + + // set FFT parameters + ippl::ParameterList fftParams; + fftParams.add("use_heffte_defaults", false); + fftParams.add("use_pencils", true); + fftParams.add("use_gpu_aware", true); + fftParams.add("comm", ippl::a2av); + fftParams.add("r2c_direction", 0); + + ippl::FFTPoissonSolver, double, 3> FFTsolver(rho, fftParams, + algorithm); + + // solve the Poisson equation -> rho contains the solution (phi) now + FFTsolver.solve(); + + // compute the relative error norm + rho = rho - exact; + double err = norm(rho) / norm(exact); std::cout << std::setprecision(16) << dx << " " << err << std::endl; } diff --git a/src/Types/IpplTypes.h b/src/Types/IpplTypes.h index 5d8b40e8c..ce10c823c 100644 --- a/src/Types/IpplTypes.h +++ b/src/Types/IpplTypes.h @@ -25,6 +25,6 @@ namespace ippl { namespace detail { typedef std::size_t size_type; } -} +} // namespace ippl #endif diff --git a/src/Types/Vector.h b/src/Types/Vector.h index 9af61edd0..ee97dedc8 100644 --- a/src/Types/Vector.h +++ b/src/Types/Vector.h @@ -32,19 +32,18 @@ namespace ippl { * @tparam T intrinsic vector data type * @tparam Dim vector dimension */ - template + template class Vector : public detail::Expression, sizeof(T) * Dim> { public: typedef T value_type; static constexpr unsigned dim = Dim; KOKKOS_FUNCTION - Vector() : Vector(value_type(0)) { } + Vector() + : Vector(value_type(0)) {} - - template - KOKKOS_FUNCTION - Vector(const detail::Expression& expr); + template + KOKKOS_FUNCTION Vector(const detail::Expression& expr); KOKKOS_DEFAULTED_FUNCTION Vector(const Vector& v) = default; @@ -59,47 +58,37 @@ namespace ippl { Vector(const std::initializer_list& list); KOKKOS_FUNCTION - ~Vector() { } - + ~Vector() {} // Get and Set Operations - KOKKOS_INLINE_FUNCTION - value_type& operator[](unsigned int i); + KOKKOS_INLINE_FUNCTION value_type& operator[](unsigned int i); - KOKKOS_INLINE_FUNCTION - value_type operator[](unsigned int i) const; + KOKKOS_INLINE_FUNCTION value_type operator[](unsigned int i) const; - KOKKOS_INLINE_FUNCTION - value_type& operator()(unsigned int i); + KOKKOS_INLINE_FUNCTION value_type& operator()(unsigned int i); - KOKKOS_INLINE_FUNCTION - value_type operator()(unsigned int i) const; + KOKKOS_INLINE_FUNCTION value_type operator()(unsigned int i) const; // Assignment Operators - template - KOKKOS_INLINE_FUNCTION - Vector& operator=(const detail::Expression& expr); + template + KOKKOS_INLINE_FUNCTION Vector& operator=(const detail::Expression& expr); - template - KOKKOS_INLINE_FUNCTION - Vector& operator+=(const detail::Expression& expr); + template + KOKKOS_INLINE_FUNCTION Vector& operator+=(const detail::Expression& expr); - template - KOKKOS_INLINE_FUNCTION - Vector& operator-=(const detail::Expression& expr); + template + KOKKOS_INLINE_FUNCTION Vector& operator-=(const detail::Expression& expr); - template - KOKKOS_INLINE_FUNCTION - Vector& operator*=(const detail::Expression& expr); + template + KOKKOS_INLINE_FUNCTION Vector& operator*=(const detail::Expression& expr); - template - KOKKOS_INLINE_FUNCTION - Vector& operator/=(const detail::Expression& expr); + template + KOKKOS_INLINE_FUNCTION Vector& operator/=(const detail::Expression& expr); private: T data_m[Dim]; }; -} +} // namespace ippl #include "Vector.hpp" diff --git a/src/Types/Vector.hpp b/src/Types/Vector.hpp index 1536d93eb..10599d75f 100644 --- a/src/Types/Vector.hpp +++ b/src/Types/Vector.hpp @@ -15,7 +15,7 @@ // You should have received a copy of the GNU General Public License // along with IPPL. If not, see . // -//#include "Utility/PAssert.h" +// #include "Utility/PAssert.h" #include #include @@ -24,32 +24,26 @@ namespace ippl { namespace detail { template struct isExpression> : std::true_type {}; - } - + } // namespace detail - template - template - KOKKOS_FUNCTION - Vector::Vector(const detail::Expression& expr) { + template + template + KOKKOS_FUNCTION Vector::Vector(const detail::Expression& expr) { for (unsigned int i = 0; i < Dim; ++i) { data_m[i] = expr[i]; } } - - template - KOKKOS_FUNCTION - Vector::Vector(const T& val) { + template + KOKKOS_FUNCTION Vector::Vector(const T& val) { for (unsigned i = 0; i < Dim; ++i) { data_m[i] = val; } } - - template - KOKKOS_FUNCTION - Vector::Vector(const std::initializer_list& list) { - //PAssert(list.size() == Dim); + template + KOKKOS_FUNCTION Vector::Vector(const std::initializer_list& list) { + // PAssert(list.size() == Dim); unsigned int i = 0; for (auto& l : list) { data_m[i] = l; @@ -57,119 +51,109 @@ namespace ippl { } } - /* * * Element access operators * */ - template - KOKKOS_INLINE_FUNCTION - typename Vector::value_type& Vector::operator[](unsigned int i) { - //PAssert(i < Dim); + template + KOKKOS_INLINE_FUNCTION typename Vector::value_type& Vector::operator[]( + unsigned int i) { + // PAssert(i < Dim); return data_m[i]; } - - template - KOKKOS_INLINE_FUNCTION - typename Vector::value_type Vector::operator[](unsigned int i) const { - //PAssert(i < Dim); + template + KOKKOS_INLINE_FUNCTION typename Vector::value_type Vector::operator[]( + unsigned int i) const { + // PAssert(i < Dim); return data_m[i]; } - - template - KOKKOS_INLINE_FUNCTION - typename Vector::value_type& Vector::operator()(unsigned int i) { - //PAssert(i < Dim); + template + KOKKOS_INLINE_FUNCTION typename Vector::value_type& Vector::operator()( + unsigned int i) { + // PAssert(i < Dim); return data_m[i]; } - - template - KOKKOS_INLINE_FUNCTION - typename Vector::value_type Vector::operator()(unsigned int i) const { - //PAssert(i < Dim); + template + KOKKOS_INLINE_FUNCTION typename Vector::value_type Vector::operator()( + unsigned int i) const { + // PAssert(i < Dim); return data_m[i]; } - /* * * Vector Expression assignment operators * */ - template - template - KOKKOS_INLINE_FUNCTION - Vector& Vector::operator=(const detail::Expression& expr) { + template + template + KOKKOS_INLINE_FUNCTION Vector& Vector::operator=( + const detail::Expression& expr) { for (unsigned int i = 0; i < Dim; ++i) { data_m[i] = expr[i]; } return *this; } - - template - template - KOKKOS_INLINE_FUNCTION - Vector& Vector::operator+=(const detail::Expression& expr) { + template + template + KOKKOS_INLINE_FUNCTION Vector& Vector::operator+=( + const detail::Expression& expr) { for (unsigned int i = 0; i < Dim; ++i) { data_m[i] += expr[i]; } return *this; } - - template - template - KOKKOS_INLINE_FUNCTION - Vector& Vector::operator-=(const detail::Expression& expr) { + template + template + KOKKOS_INLINE_FUNCTION Vector& Vector::operator-=( + const detail::Expression& expr) { for (unsigned int i = 0; i < Dim; ++i) { data_m[i] -= expr[i]; } return *this; } - - template - template - KOKKOS_INLINE_FUNCTION - Vector& Vector::operator*=(const detail::Expression& expr) { + template + template + KOKKOS_INLINE_FUNCTION Vector& Vector::operator*=( + const detail::Expression& expr) { for (unsigned int i = 0; i < Dim; ++i) { data_m[i] *= expr[i]; } return *this; } - - template - template - KOKKOS_INLINE_FUNCTION - Vector& Vector::operator/=(const detail::Expression& expr) { + template + template + KOKKOS_INLINE_FUNCTION Vector& Vector::operator/=( + const detail::Expression& expr) { for (unsigned int i = 0; i < Dim; ++i) { data_m[i] /= expr[i]; } return *this; } - - template + template inline std::ostream& operator<<(std::ostream& out, const Vector& v) { - std::streamsize sw = out.width(); + std::streamsize sw = out.width(); out << std::setw(1); if (Dim >= 1) { out << "( "; for (unsigned int i = 0; i < Dim - 1; i++) - out << std::setw(sw) << v[i] << " , "; + out << std::setw(sw) << v[i] << " , "; out << std::setw(sw) << v[Dim - 1] << " )"; } else { out << "( " << std::setw(sw) << v[0] << " )"; } return out; } -} +} // namespace ippl // vi: set et ts=4 sw=4 sts=4: // Local Variables: diff --git a/src/Types/ViewTypes.h b/src/Types/ViewTypes.h index 179cc4056..a028ffbdf 100644 --- a/src/Types/ViewTypes.h +++ b/src/Types/ViewTypes.h @@ -35,7 +35,7 @@ namespace ippl { * @tparam Properties further template parameters of Kokkos */ template - struct ViewType { }; + struct ViewType {}; /*! * Specialized view type for one dimension. @@ -77,7 +77,6 @@ namespace ippl { typedef Kokkos::RangePolicy<> policy_type; }; - /*! * Empty function for general write. * @tparam T view data type @@ -89,16 +88,14 @@ namespace ippl { */ template void write(const typename ViewType::view_type& view, - std::ostream& out = std::cout); - + std::ostream& out = std::cout); /*! * Specialized write function for one-dimensional views. */ template void write(const typename ViewType::view_type& view, - std::ostream& out = std::cout) - { + std::ostream& out = std::cout) { using view_type = typename ViewType::view_type; typename view_type::HostMirror hview = Kokkos::create_mirror_view(view); Kokkos::deep_copy(hview, view); @@ -108,14 +105,12 @@ namespace ippl { out << std::endl; } - /*! * Specialized write function for two-dimensional views. */ template void write(const typename ViewType::view_type& view, - std::ostream& out = std::cout) - { + std::ostream& out = std::cout) { using view_type = typename ViewType::view_type; typename view_type::HostMirror hview = Kokkos::create_mirror_view(view); Kokkos::deep_copy(hview, view); @@ -132,8 +127,7 @@ namespace ippl { */ template void write(const typename ViewType::view_type& view, - std::ostream& out = std::cout) - { + std::ostream& out = std::cout) { using view_type = typename ViewType::view_type; typename view_type::HostMirror hview = Kokkos::create_mirror_view(view); Kokkos::deep_copy(hview, view); @@ -148,8 +142,7 @@ namespace ippl { out << std::endl; } } - } -} - + } // namespace detail +} // namespace ippl #endif diff --git a/src/Utility/Inform.cpp b/src/Utility/Inform.cpp index 323dd95f6..557168054 100644 --- a/src/Utility/Inform.cpp +++ b/src/Utility/Inform.cpp @@ -28,8 +28,8 @@ #include "Ippl.h" -#include #include +#include // range of Inform message levels constexpr int MIN_INFORM_LEVEL = 1; @@ -39,21 +39,32 @@ constexpr int MAX_INFORM_LEVEL = 5; // manipulator functions // signal we wish to send the message -Inform& endl(Inform& inf) { inf << '\n'; return inf.outputMessage(); } +Inform& endl(Inform& inf) { + inf << '\n'; + return inf.outputMessage(); +} // set the current msg level -Inform& level1(Inform& inf) { return inf.setMessageLevel(1); } -Inform& level2(Inform& inf) { return inf.setMessageLevel(2); } -Inform& level3(Inform& inf) { return inf.setMessageLevel(3); } -Inform& level4(Inform& inf) { return inf.setMessageLevel(4); } -Inform& level5(Inform& inf) { return inf.setMessageLevel(5); } - +Inform& level1(Inform& inf) { + return inf.setMessageLevel(1); +} +Inform& level2(Inform& inf) { + return inf.setMessageLevel(2); +} +Inform& level3(Inform& inf) { + return inf.setMessageLevel(3); +} +Inform& level4(Inform& inf) { + return inf.setMessageLevel(4); +} +Inform& level5(Inform& inf) { + return inf.setMessageLevel(5); +} ///////////////////////////////////////////////////////////////////// // perform initialization for this object; called by the constructors. // arguments = prefix string, print node -void Inform::setup(const char *myname, int pnode) { - +void Inform::setup(const char* myname, int pnode) { On = true; if (Ippl::Info != NULL) { @@ -61,38 +72,35 @@ void Inform::setup(const char *myname, int pnode) { } else { OutputLevel = MIN_INFORM_LEVEL; } - MsgLevel = MIN_INFORM_LEVEL; + MsgLevel = MIN_INFORM_LEVEL; PrintNode = pnode; - if ( myname != 0 ) { + if (myname != 0) { Name = strcpy(new char[strlen(myname) + 1], myname); } else { Name = 0; } } - ///////////////////////////////////////////////////////////////////// // class constructor -Inform::Inform(const char *myname, int pnode) - : FormatBuf(std::ios::out), OpenedSuccessfully(true) { - +Inform::Inform(const char* myname, int pnode) + : FormatBuf(std::ios::out) + , OpenedSuccessfully(true) { // in this case, the default destination stream is cout NeedClose = false; - MsgDest = &std::cout; + MsgDest = &std::cout; // perform all other needed initialization setup(myname, pnode); } - ///////////////////////////////////////////////////////////////////// // class constructor specifying a file to open -Inform::Inform(const char *myname, const char *fname, const WriteMode opnmode, - int pnode) - : FormatBuf(std::ios::out), OpenedSuccessfully(true) { - - // only open a file if we're on the proper node +Inform::Inform(const char* myname, const char* fname, const WriteMode opnmode, int pnode) + : FormatBuf(std::ios::out) + , OpenedSuccessfully(true) { + // only open a file if we're on the proper node MsgDest = 0; if (pnode >= 0 && pnode == Ippl::Comm->myNode()) { if (opnmode == OVERWRITE) @@ -102,12 +110,12 @@ Inform::Inform(const char *myname, const char *fname, const WriteMode opnmode, } // make sure it was opened properly - if ( MsgDest == 0 || ! (*MsgDest) ) { + if (MsgDest == 0 || !(*MsgDest)) { if (pnode >= 0 && pnode == Ippl::Comm->myNode()) { std::cerr << "Inform: Cannot open file '" << fname << "'." << std::endl; } - NeedClose = false; - MsgDest = &std::cout; + NeedClose = false; + MsgDest = &std::cout; OpenedSuccessfully = false; } else { NeedClose = true; @@ -117,26 +125,25 @@ Inform::Inform(const char *myname, const char *fname, const WriteMode opnmode, setup(myname, pnode); } - ///////////////////////////////////////////////////////////////////// // class constructor specifying an output stream to use -Inform::Inform(const char *myname, std::ostream& os, int pnode) - : FormatBuf(std::ios::out), OpenedSuccessfully(true) { - +Inform::Inform(const char* myname, std::ostream& os, int pnode) + : FormatBuf(std::ios::out) + , OpenedSuccessfully(true) { // just store a ref to the provided stream NeedClose = false; - MsgDest = &os; + MsgDest = &os; // perform all other needed initialization setup(myname, pnode); } - ///////////////////////////////////////////////////////////////////// // class constructor specifying an other Inform instance -Inform::Inform(const char *myname, const Inform& os, int pnode) - : FormatBuf(std::ios::out), MsgDest(os.MsgDest), OpenedSuccessfully(true) { - +Inform::Inform(const char* myname, const Inform& os, int pnode) + : FormatBuf(std::ios::out) + , MsgDest(os.MsgDest) + , OpenedSuccessfully(true) { // just store a ref to the provided stream NeedClose = false; @@ -144,23 +151,19 @@ Inform::Inform(const char *myname, const Inform& os, int pnode) setup(myname, pnode); } - ///////////////////////////////////////////////////////////////////// // class destructor ... frees up space Inform::~Inform(void) { - - delete [] Name; - if ( NeedClose ) + delete[] Name; + if (NeedClose) delete MsgDest; } - // print out just a single line, from the given buffer -void Inform::display_single_line(char *buf) { - +void Inform::display_single_line(char* buf) { // output the prefix name if necessary ... if no name was given, do // not print any prefix at all - if ( Name != 0 ) { + if (Name != 0) { *MsgDest << Name; // output the node number if necessary @@ -168,11 +171,11 @@ void Inform::display_single_line(char *buf) { *MsgDest << "{" << Ippl::Comm->myNode() << "}"; // output the message level number if necessary - if ( MsgLevel > 1 ) + if (MsgLevel > 1) *MsgDest << "[" << MsgLevel << "]"; // output the end of the prefix string if necessary - if ( Name != 0) + if (Name != 0) *MsgDest << "> "; } @@ -180,15 +183,13 @@ void Inform::display_single_line(char *buf) { *MsgDest << buf << std::endl; } - ///////////////////////////////////////////////////////////////////// // Print out the message in the given buffer. -void Inform::display_message(char *buf) { - +void Inform::display_message(char* buf) { // check if we should even print out the message - if ( On && MsgLevel <= OutputLevel && buf != 0 ) { + if (On && MsgLevel <= OutputLevel && buf != 0) { // get location of final string term char - char *stend = buf + strlen(buf); + char* stend = buf + strlen(buf); // print blank lines for leading endlines while (*buf == '\n') { @@ -197,7 +198,7 @@ void Inform::display_message(char *buf) { } // print out all lines in the string now - while ( (buf = strtok(buf, "\n")) != 0 ) { + while ((buf = strtok(buf, "\n")) != 0) { display_single_line(buf); buf += strlen(buf); if (buf < stend) @@ -213,8 +214,7 @@ void Inform::display_message(char *buf) { MsgLevel = MIN_INFORM_LEVEL; } -void Inform::setDestination(std::ostream &dest) { - +void Inform::setDestination(std::ostream& dest) { if (NeedClose) delete MsgDest; @@ -226,32 +226,27 @@ void Inform::setDestination(std::ostream &dest) { ///////////////////////////////////////////////////////////////////// // Set the current output level for this Inform object. Inform& Inform::setOutputLevel(const int ol) { - - if ( ol >= (MIN_INFORM_LEVEL-1) && ol <= MAX_INFORM_LEVEL ) + if (ol >= (MIN_INFORM_LEVEL - 1) && ol <= MAX_INFORM_LEVEL) OutputLevel = ol; return *this; } - ///////////////////////////////////////////////////////////////////// // Set the current message level for the current message in this Inform object. Inform& Inform::setMessageLevel(const int ol) { - - if ( ol >= MIN_INFORM_LEVEL && ol <= MAX_INFORM_LEVEL ) + if (ol >= MIN_INFORM_LEVEL && ol <= MAX_INFORM_LEVEL) MsgLevel = ol; return *this; } - ///////////////////////////////////////////////////////////////////// // the signal has been given ... process the message. Return ref to object. Inform& Inform::outputMessage(void) { - // print out the message (only if this is the master node) if (PrintNode < 0 || PrintNode == Ippl::Comm->myNode()) { FormatBuf << std::ends; // extract C string and display - MsgBuf = FormatBuf.str(); + MsgBuf = FormatBuf.str(); char* cstring = const_cast(MsgBuf.c_str()); display_message(cstring); // clear buffer contents @@ -264,29 +259,27 @@ Inform& Inform::outputMessage(void) { return *this; } - ///////////////////////////////////////////////////////////////////// // test program #ifdef DEBUG_INFORM_CLASS - int main(int argc, char *argv[]) { - - int i; +int main(int argc, char* argv[]) { + int i; - // create an Inform instance - Inform inf("Inform Test"); + // create an Inform instance + Inform inf("Inform Test"); - // copy in the argv's ... then print them out - for ( i=0; i < argc ; i++) - inf << "Argument " << i << " = " << argv[i] << "\n"; - inf << endl << endl; + // copy in the argv's ... then print them out + for (i = 0; i < argc; i++) + inf << "Argument " << i << " = " << argv[i] << "\n"; + inf << endl << endl; - // do another one to make sure - inf.setOutputLevel(3); - inf << level2 << "This is the second test." << endl; + // do another one to make sure + inf.setOutputLevel(3); + inf << level2 << "This is the second test." << endl; - return 0; + return 0; } #endif diff --git a/src/Utility/Inform.h b/src/Utility/Inform.h index 3317301d4..eabf48de8 100644 --- a/src/Utility/Inform.h +++ b/src/Utility/Inform.h @@ -31,132 +31,132 @@ * argument to the constructor may also be set to the node to print on. */ -#include #include +#include #include -#define INFORM_ALL_NODES (-1) - +#define INFORM_ALL_NODES (-1) class Inform { - public: - // enumeration listing the ways in which a file may be opened for writing - enum WriteMode { OVERWRITE, APPEND }; + // enumeration listing the ways in which a file may be opened for writing + enum WriteMode { + OVERWRITE, + APPEND + }; public: - // constructor: arguments = name, print node - Inform(const char * = 0, int = 0); + // constructor: arguments = name, print node + Inform(const char* = 0, int = 0); + + // second constructor: this specifies the name of a file as well as + // a prefix and a mode for opening the file (i.e. OVERWRITE or APPEND). + // The final argument is the print node. + Inform(const char* prefix, const char* fname, const WriteMode, int = 0); - // second constructor: this specifies the name of a file as well as - // a prefix and a mode for opening the file (i.e. OVERWRITE or APPEND). - // The final argument is the print node. - Inform(const char *prefix, const char *fname, const WriteMode, int = 0); + // third constructor: this specifies the prefix and an ostream object + // to write to, as well as as the print node + Inform(const char*, std::ostream&, int = 0); - // third constructor: this specifies the prefix and an ostream object - // to write to, as well as as the print node - Inform(const char *, std::ostream&, int = 0); + // fourth constructor: this specifies the prefix and an Inform instance + // from which the ostream object is copied, as well as as the print node + Inform(const char* myname, const Inform& os, int pnode = 0); - // fourth constructor: this specifies the prefix and an Inform instance - // from which the ostream object is copied, as well as as the print node - Inform(const char *myname, const Inform& os, int pnode = 0); + // destructor + ~Inform(); - // destructor - ~Inform(); + // turn messages on/off + void on(const bool o) { On = o; } + bool isOn() const { return On; } - // turn messages on/off - void on(const bool o) { On = o; } - bool isOn() const { return On; } + // change output destination + void setDestination(std::ostream& dest); + std::ostream& getDestination() { return *MsgDest; } - // change output destination - void setDestination(std::ostream &dest); - std::ostream& getDestination() { return *MsgDest; } + // get/set the current output level + Inform& setOutputLevel(const int); + int getOutputLevel(void) const { return OutputLevel; } - // get/set the current output level - Inform& setOutputLevel(const int); - int getOutputLevel(void) const { return OutputLevel; } + // get/set the current message level + Inform& setMessageLevel(const int); + int getMessageLevel(void) const { return MsgLevel; } - // get/set the current message level - Inform& setMessageLevel(const int); - int getMessageLevel(void) const { return MsgLevel; } + // get/set the printing node. If set to a value < 0, all nodes print. + int getPrintNode() const { return PrintNode; } + void setPrintNode(int n = (-1)) { PrintNode = n; } - // get/set the printing node. If set to a value < 0, all nodes print. - int getPrintNode() const { return PrintNode; } - void setPrintNode(int n = (-1)) { PrintNode = n; } + // return a reference to the internal ostream used to print messages + std::ostream& getStream() { return FormatBuf; } - // return a reference to the internal ostream used to print messages - std::ostream& getStream() { return FormatBuf; } + // Was the stream opened successfully on construction? + bool openedSuccessfully() { return OpenedSuccessfully; } - // Was the stream opened successfully on construction? - bool openedSuccessfully() { return OpenedSuccessfully; } + // the signal has been given, print out the message. Return ref to object. + Inform& outputMessage(void); - // the signal has been given, print out the message. Return ref to object. - Inform& outputMessage(void); + // functions used to change format state; used just as for iostreams - // functions used to change format state; used just as for iostreams + typedef std::ios_base::fmtflags FmtFlags_t; - typedef std::ios_base::fmtflags FmtFlags_t; + FmtFlags_t setf(FmtFlags_t setbits, FmtFlags_t field) { return FormatBuf.setf(setbits, field); } - FmtFlags_t setf(FmtFlags_t setbits, FmtFlags_t field) - { return FormatBuf.setf(setbits,field); } + FmtFlags_t setf(FmtFlags_t f) { return FormatBuf.setf(f); } + void /*long*/ unsetf(FmtFlags_t f) { FormatBuf.unsetf(f); } + FmtFlags_t flags() const { return FormatBuf.flags(); } + FmtFlags_t flags(FmtFlags_t f) { return FormatBuf.flags(f); } + int width() const { return FormatBuf.width(); } + int width(int w) { return FormatBuf.width(w); } + char fill() const { return FormatBuf.fill(); } + char fill(char c) { return FormatBuf.fill(c); } + int precision() const { return FormatBuf.precision(); } + int precision(int p) { return FormatBuf.precision(p); } + void flush() { MsgDest->flush(); } - FmtFlags_t setf(FmtFlags_t f) { return FormatBuf.setf(f); } - void /*long*/ unsetf(FmtFlags_t f) { FormatBuf.unsetf(f); } - FmtFlags_t flags() const { return FormatBuf.flags(); } - FmtFlags_t flags(FmtFlags_t f) { return FormatBuf.flags(f); } - int width() const { return FormatBuf.width(); } - int width(int w) { return FormatBuf.width(w); } - char fill() const { return FormatBuf.fill(); } - char fill(char c) { return FormatBuf.fill(c); } - int precision() const { return FormatBuf.precision(); } - int precision(int p) { return FormatBuf.precision(p); } - void flush() { MsgDest->flush();} private: - // name of this object; put at the start of each message. - char *Name; + // name of this object; put at the start of each message. + char* Name; - // storage for the message text - std::string MsgBuf; - // an ostringstream used to format the messages - std::ostringstream FormatBuf; + // storage for the message text + std::string MsgBuf; + // an ostringstream used to format the messages + std::ostringstream FormatBuf; - // where to put the messages; can be changed, by default = cout - std::ostream *MsgDest; + // where to put the messages; can be changed, by default = cout + std::ostream* MsgDest; - // do we need to close the destination stream? - bool NeedClose; + // do we need to close the destination stream? + bool NeedClose; - // Was the stream opened successfully on construction? - bool OpenedSuccessfully; + // Was the stream opened successfully on construction? + bool OpenedSuccessfully; - // do we output the message? - bool On; + // do we output the message? + bool On; - // limit printing only to this node (if < 0, all nodes print) - int PrintNode; + // limit printing only to this node (if < 0, all nodes print) + int PrintNode; - // output level of this Inform object; messages with a level <= the output - // level are printed. Setting this to < 1 turns off messages. - int OutputLevel; + // output level of this Inform object; messages with a level <= the output + // level are printed. Setting this to < 1 turns off messages. + int OutputLevel; - // current message level; this is set by the 'levelN' manipulators, or - // by the routine setMsgLevel(int). After a message is printed, the current - // message level is reset to the minimum. - int MsgLevel; + // current message level; this is set by the 'levelN' manipulators, or + // by the routine setMsgLevel(int). After a message is printed, the current + // message level is reset to the minimum. + int MsgLevel; - // print out the message in the given buffer. Will modify the string, - // so beware. Arguments: string - void display_message(char *); + // print out the message in the given buffer. Will modify the string, + // so beware. Arguments: string + void display_message(char*); - // print out just a single line of the message. - void display_single_line(char *); + // print out just a single line of the message. + void display_single_line(char*); - // perform initialization for this object; called by the constructors. - // arguments = prefix string, print node - void setup(const char *, int); + // perform initialization for this object; called by the constructors. + // arguments = prefix string, print node + void setup(const char*, int); }; - // manipulator for signaling we want to send the message. extern Inform& endl(Inform&); @@ -167,47 +167,39 @@ extern Inform& level3(Inform&); extern Inform& level4(Inform&); extern Inform& level5(Inform&); - // templated version of operator<< for Inform objects -template -inline -Inform& operator<<(Inform& o, const T& val) { - o.getStream() << val; - return o; +template +inline Inform& operator<<(Inform& o, const T& val) { + o.getStream() << val; + return o; } - // specialized version of operator<< to handle Inform-specific manipulators -inline -Inform& operator<<(Inform& o, Inform& (*d)(Inform&)) { - return d(o); +inline Inform& operator<<(Inform& o, Inform& (*d)(Inform&)) { + return d(o); } - // specialized version of operator<< to handle void * arguments -inline -Inform& operator<<(Inform& o, const void *val) { - Inform::FmtFlags_t oldformat = o.setf(std::ios::hex, std::ios::basefield); - o.getStream() << "0x" << (long)val; - o.setf(oldformat, std::ios::basefield); - return o; +inline Inform& operator<<(Inform& o, const void* val) { + Inform::FmtFlags_t oldformat = o.setf(std::ios::hex, std::ios::basefield); + o.getStream() << "0x" << (long)val; + o.setf(oldformat, std::ios::basefield); + return o; } // specialized version of operator<< to handle long long type (KCC workaround) -inline -Inform& operator<<(Inform& o, const long long& val) { - o.getStream() << val; - return o; +inline Inform& operator<<(Inform& o, const long long& val) { + o.getStream() << val; + return o; } // specialized function for sending strings to Inform object inline Inform& operator<<(Inform& out, const std::string& s) { - out << s.c_str(); - return out; + out << s.c_str(); + return out; } - -#endif // INFORM_H +#endif // INFORM_H /*************************************************************************** * $RCSfile: Inform.h,v $ $Author: adelmann $ diff --git a/src/Utility/IpplException.h b/src/Utility/IpplException.h index e598a430e..709527e1a 100644 --- a/src/Utility/IpplException.h +++ b/src/Utility/IpplException.h @@ -4,27 +4,19 @@ #include class IpplException { - public: - - IpplException(const std::string &meth, const std::string &descr) { + IpplException(const std::string& meth, const std::string& descr) { descr_ = descr; - meth_ = meth; + meth_ = meth; } - virtual const char* what() const throw() { - return descr_.c_str(); - } + virtual const char* what() const throw() { return descr_.c_str(); } - virtual const std::string& where() const { - return meth_; - } + virtual const std::string& where() const { return meth_; } private: - std::string descr_; std::string meth_; - }; #endif diff --git a/src/Utility/IpplInfo.cpp b/src/Utility/IpplInfo.cpp index 548054f8b..3039f6b03 100644 --- a/src/Utility/IpplInfo.cpp +++ b/src/Utility/IpplInfo.cpp @@ -22,9 +22,8 @@ #include "IpplVersions.h" #include -#include #include - +#include ///////////////////////////////////////////////////////////////////// // printVersion: print out a version summary. If the argument is true, @@ -36,7 +35,6 @@ void IpplInfo::printVersion(void) { std::cout << "Built for machine: " << compileMachine() << std::endl; } - void IpplInfo::printHelp(char** argv) { std::cout << "Usage: " << argv[0] << " [