diff --git a/src/Solver/FFTPoissonSolver.h b/src/Solver/FFTPoissonSolver.h index b7a11551a..1a6d0dcba 100644 --- a/src/Solver/FFTPoissonSolver.h +++ b/src/Solver/FFTPoissonSolver.h @@ -41,29 +41,41 @@ namespace ippl { namespace detail { + /*! * Access a view that either contains a vector field or a scalar field * in such a way that the correct element access is determined at compile * time, reducing the number of functions needed to achieve the same * behavior for both kinds of fields - * @tparam isVec whether the field is a vector field + * @tparam tensorRank indicates whether scalar, vector, or matrix field * @tparam - the view type */ - template + template struct ViewAccess; template - struct ViewAccess { - KOKKOS_INLINE_FUNCTION constexpr static auto& get(View&& view, unsigned dim, size_t i, - size_t j, size_t k) { - return view(i, j, k)[dim]; + struct ViewAccess<2, View> { + KOKKOS_INLINE_FUNCTION constexpr static auto& get(View&& view, unsigned dim1, + unsigned dim2, size_t i, size_t j, + size_t k) { + return view(i, j, k)[dim1][dim2]; + } + }; + + template + struct ViewAccess<1, View> { + KOKKOS_INLINE_FUNCTION constexpr static auto& get(View&& view, unsigned dim1, + [[maybe_unused]] unsigned dim2, + size_t i, size_t j, size_t k) { + return view(i, j, k)[dim1]; } }; template - struct ViewAccess { + struct ViewAccess<0, View> { KOKKOS_INLINE_FUNCTION constexpr static auto& get(View&& view, - [[maybe_unused]] unsigned dim, + [[maybe_unused]] unsigned dim1, + [[maybe_unused]] unsigned dim2, size_t i, size_t j, size_t k) { return view(i, j, k); } @@ -97,12 +109,16 @@ namespace ippl { // define a type for a 3 dimensional field (e.g. charge density field) // define a type of Field with integers to be used for the helper Green's function // also define a type for the Fourier transformed complex valued fields + // define matrix and matrix field types for the Hessian typedef FieldRHS Field_t; - typedef Field IField_t; + typedef typename FieldLHS::Centering_t Centering; + typedef Field IField_t; typedef Field Field_gt; typedef Field, Dim, mesh_type, Centering> CxField_gt; typedef typename FFT_t::ComplexField CxField_t; typedef Vector Vector_t; + typedef typename mesh_type::matrix_type Matrix_t; + typedef Field MField_t; // define type for field layout typedef FieldLayout FieldLayout_t; @@ -133,6 +149,17 @@ namespace ippl { // more specifically, compute the scalar potential given a density field rho using void solve() override; + // override getHessian to return Hessian field if flag is on + MField_t* getHessian() override { + bool hessian = this->params_m.template get("hessian"); + if (!hessian) { + throw IpplException( + "FFTPoissonSolver::getHessian()", + "Cannot call getHessian() if 'hessian' flag in ParameterList is false"); + } + return &hess_m; + } + // compute standard Green's function void greensFunction(); @@ -168,6 +195,9 @@ namespace ippl { // fields that facilitate the calculation in greensFunction() IField_t grnIField_m[Dim]; + // hessian matrix field (only if hessian parameter is set) + MField_t hess_m; + // the FFT object std::unique_ptr fft_m; @@ -239,6 +269,7 @@ namespace ippl { } this->params_m.add("algorithm", HOCKNEY); + this->params_m.add("hessian", true); } }; } // namespace ippl diff --git a/src/Solver/FFTPoissonSolver.hpp b/src/Solver/FFTPoissonSolver.hpp index ff13640cf..f28d87fe1 100644 --- a/src/Solver/FFTPoissonSolver.hpp +++ b/src/Solver/FFTPoissonSolver.hpp @@ -61,10 +61,10 @@ void pack(const ippl::NDIndex<3> intersect, Kokkos::View& view, Kokkos::fence(); } -template +template void unpack_impl(const ippl::NDIndex<3> intersect, const Kokkos::View& view, ippl::detail::FieldBufferData& fd, int nghost, const ippl::NDIndex<3> ldom, - size_t dim = 0, bool x = false, bool y = false, bool z = false) { + size_t dim1 = 0, size_t dim2 = 0, bool x = false, bool y = false, bool z = false) { Kokkos::View& buffer = fd.buffer; const int first0 = intersect[0].first() + nghost - ldom[0].first(); @@ -90,7 +90,8 @@ void unpack_impl(const ippl::NDIndex<3> intersect, const Kokkos::View& vi int l = ig + jg * intersect[0].length() + kg * intersect[1].length() * intersect[0].length(); - ippl::detail::ViewAccess::get(view, dim, i, j, k) = buffer(l); + ippl::detail::ViewAccess::get(view, dim1, dim2, i, j, k) = + buffer(l); }); Kokkos::fence(); } @@ -99,14 +100,23 @@ template void unpack(const ippl::NDIndex<3> intersect, const Kokkos::View& view, ippl::detail::FieldBufferData& fd, int nghost, const ippl::NDIndex<3> ldom, bool x = false, bool y = false, bool z = false) { - unpack_impl(intersect, view, fd, nghost, ldom, 0, x, y, z); + unpack_impl<0, Tb, Tf>(intersect, view, fd, nghost, ldom, 0, 0, x, y, z); } template void unpack(const ippl::NDIndex<3> intersect, const Kokkos::View***>& view, - size_t dim, ippl::detail::FieldBufferData& fd, int nghost, + size_t dim1, ippl::detail::FieldBufferData& fd, int nghost, const ippl::NDIndex<3> ldom) { - unpack_impl>(intersect, view, fd, nghost, ldom, dim); + unpack_impl<1, Tb, ippl::Vector>(intersect, view, fd, nghost, ldom, dim1); +} + +template +void unpack(const ippl::NDIndex<3> intersect, + const Kokkos::View, 3>***>& view, + ippl::detail::FieldBufferData& fd, int nghost, const ippl::NDIndex<3> ldom, + size_t dim1, size_t dim2) { + unpack_impl<2, Tb, ippl::Vector, 3>>(intersect, view, fd, nghost, ldom, + dim1, dim2); } namespace ippl { @@ -209,7 +219,9 @@ namespace ippl { template void FFTPoissonSolver::initializeFields() { - const int alg = this->params_m.template get("algorithm"); + // get algorithm and hessian flag from parameter list + const int alg = this->params_m.template get("algorithm"); + const bool hessian = this->params_m.template get("hessian"); // first check if valid algorithm choice if ((alg != Algorithm::VICO) && (alg != Algorithm::HOCKNEY) @@ -281,16 +293,20 @@ namespace ippl { grntr_m.initialize(*meshComplex_m, *layoutComplex_m); int out = this->params_m.template get("output_type"); - if (((out == Base::GRAD) || (out == Base::SOL_AND_GRAD)) && (!isGradFD_m)) { + if (((out == Base::GRAD || out == Base::SOL_AND_GRAD) && !isGradFD_m) || hessian) { temp_m.initialize(*meshComplex_m, *layoutComplex_m); } + if (hessian) { + hess_m.initialize(*mesh_mp, *layout_mp); + } + // create the FFT object fft_m = std::make_unique(*layout2_m, *layoutComplex_m, this->params_m); // if Vico, also need to create mesh and layout for 4N Fourier domain // on this domain, the truncated Green's function is defined // also need to create the 4N complex grid, on which precomputation step done - if ((alg == Algorithm::VICO) || (alg == Algorithm::BIHARMONIC)) { + if (alg == Algorithm::VICO || alg == Algorithm::BIHARMONIC) { // start a timer static IpplTimings::TimerRef initialize_vico = IpplTimings::getTimer("Initialize: extra Vico"); @@ -333,14 +349,11 @@ namespace ippl { const int size = nr_m[d]; // Kokkos parallel for loop to initialize grnIField[d] - using mdrange_type = Kokkos::MDRangePolicy>; switch (d) { case 0: Kokkos::parallel_for( "Helper index Green field initialization", - mdrange_type({nghost, nghost, nghost}, - {view.extent(0) - nghost, view.extent(1) - nghost, - view.extent(2) - nghost}), + grnIField_m[d].getFieldRangePolicy(), KOKKOS_LAMBDA(const int i, const int j, const int k) { // go from local indices to global const int ig = i + ldom[0].first() - nghost; @@ -360,9 +373,7 @@ namespace ippl { case 1: Kokkos::parallel_for( "Helper index Green field initialization", - mdrange_type({nghost, nghost, nghost}, - {view.extent(0) - nghost, view.extent(1) - nghost, - view.extent(2) - nghost}), + grnIField_m[d].getFieldRangePolicy(), KOKKOS_LAMBDA(const int i, const int j, const int k) { // go from local indices to global const int jg = j + ldom[1].first() - nghost; @@ -376,9 +387,7 @@ namespace ippl { case 2: Kokkos::parallel_for( "Helper index Green field initialization", - mdrange_type({nghost, nghost, nghost}, - {view.extent(0) - nghost, view.extent(1) - nghost, - view.extent(2) - nghost}), + grnIField_m[d].getFieldRangePolicy(), KOKKOS_LAMBDA(const int i, const int j, const int k) { // go from local indices to global const int kg = k + ldom[2].first() - nghost; @@ -398,7 +407,7 @@ namespace ippl { IpplTimings::startTimer(warmup); fft_m->transform(+1, rho2_mr, rho2tr_m); - if ((alg == Algorithm::VICO) || (alg == Algorithm::BIHARMONIC)) { + if (alg == Algorithm::VICO || alg == Algorithm::BIHARMONIC) { fft4n_m->transform(+1, grnL_m); } @@ -431,6 +440,9 @@ namespace ippl { // get the algorithm (hockney, vico, or biharmonic) const int alg = this->params_m.template get("algorithm"); + // get hessian flag (if true, we compute the Hessian) + const bool hessian = this->params_m.template get("hessian"); + // set the mesh & spacing, which may change each timestep mesh_mp = &(this->rhs_mp->get_mesh()); @@ -458,8 +470,6 @@ namespace ippl { // store rho (RHS) in the lower left quadrant of the doubled grid // with or without communication (if only 1 rank) - using mdrange_type = Kokkos::MDRangePolicy>; - const int ranks = Comm->size(); auto view2 = rho2_mr.getView(); @@ -524,10 +534,7 @@ namespace ippl { } else { Kokkos::parallel_for( - "Write rho on the doubled grid", - mdrange_type({nghost1, nghost1, nghost1}, - {view1.extent(0) - nghost1, view1.extent(1) - nghost1, - view1.extent(2) - nghost1}), + "Write rho on the doubled grid", this->rhs_mp->getFieldRangePolicy(), KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k) { const size_t ig2 = i + ldom2[0].first() - nghost2; const size_t jg2 = j + ldom2[1].first() - nghost2; @@ -581,10 +588,11 @@ namespace ippl { // Vico: need to multiply by normalization factor of 1/4N^3, // since only backward transform was performed on the 4N grid for (unsigned int i = 0; i < Dim; ++i) { - if ((alg == Algorithm::VICO) || (alg == Algorithm::BIHARMONIC)) + if (alg == Algorithm::VICO || alg == Algorithm::BIHARMONIC) { rho2_mr = rho2_mr * 2.0 * (1.0 / 4.0); - else + } else { rho2_mr = rho2_mr * 2.0 * nr_m[i] * hr_m[i]; + } } // start a timer @@ -649,9 +657,7 @@ namespace ippl { } else { Kokkos::parallel_for( "Write the solution into the LHS on physical grid", - mdrange_type({nghost1, nghost1, nghost1}, - {view1.extent(0) - nghost1, view1.extent(1) - nghost1, - view1.extent(2) - nghost1}), + this->rhs_mp->getFieldRangePolicy(), KOKKOS_LAMBDA(const int i, const int j, const int k) { const int ig2 = i + ldom2[0].first() - nghost2; const int jg2 = j + ldom2[1].first() - nghost2; @@ -706,10 +712,7 @@ namespace ippl { for (size_t gd = 0; gd < Dim; ++gd) { // loop over rho2tr_m to multiply by -ik (gradient in Fourier space) Kokkos::parallel_for( - "Gradient - E field", - mdrange_type({nghostR, nghostR, nghostR}, - {viewR.extent(0) - nghostR, viewR.extent(1) - nghostR, - viewR.extent(2) - nghostR}), + "Gradient - E field", rho2tr_m.getFieldRangePolicy(), KOKKOS_LAMBDA(const int i, const int j, const int k) { // global indices for 2N rhotr_m const int ig = i + ldomR[0].first() - nghostR; @@ -717,21 +720,15 @@ namespace ippl { const int kg = k + ldomR[2].first() - nghostR; Vector iVec = {ig, jg, kg}; - Vector_t kVec; - for (size_t d = 0; d < Dim; ++d) { - const scalar_type Len = N[d] * hsize[d]; - const bool shift = (iVec[d] > N[d]); - const bool notMid = (iVec[d] != N[d]); - - kVec[d] = notMid * (pi / Len) * (iVec[d] - shift * 2 * N[d]); - } + scalar_type k_gd; + const scalar_type Len = N[gd] * hsize[gd]; + const bool shift = (iVec[gd] > N[gd]); + const bool notMid = (iVec[gd] != N[gd]); - const scalar_type Dr = - kVec[0] * kVec[0] + kVec[1] * kVec[1] + kVec[2] * kVec[2]; + k_gd = notMid * (pi / Len) * (iVec[gd] - shift * 2 * N[gd]); - const bool isNotZero = (Dr != 0.0); - view_g(i, j, k) = -isNotZero * (I * kVec[gd]) * viewR(i, j, k); + view_g(i, j, k) = -(I * k_gd) * viewR(i, j, k); }); // start a timer @@ -745,10 +742,11 @@ namespace ippl { // apply proper normalization for (unsigned int i = 0; i < Dim; ++i) { - if ((alg == Algorithm::VICO) || (alg == Algorithm::BIHARMONIC)) + if (alg == Algorithm::VICO || alg == Algorithm::BIHARMONIC) { rho2_mr = rho2_mr * 2.0 * (1.0 / 4.0); - else + } else { rho2_mr = rho2_mr * 2.0 * nr_m[i] * hr_m[i]; + } } // start a timer @@ -812,10 +810,7 @@ namespace ippl { } else { Kokkos::parallel_for( - "Write the E-field on physical grid", - mdrange_type({nghostL, nghostL, nghostL}, - {viewL.extent(0) - nghostL, viewL.extent(1) - nghostL, - viewL.extent(2) - nghostL}), + "Write the E-field on physical grid", this->lhs_mp->getFieldRangePolicy(), KOKKOS_LAMBDA(const int i, const int j, const int k) { const int ig2 = i + ldom2[0].first() - nghost2; const int jg2 = j + ldom2[1].first() - nghost2; @@ -834,6 +829,164 @@ namespace ippl { } IpplTimings::stopTimer(efield); } + + // if user asked for Hessian, compute in Fourier domain (-k^2 multiplication) + if (hessian) { + // start a timer + static IpplTimings::TimerRef hess = IpplTimings::getTimer("Solve: Hessian"); + IpplTimings::startTimer(hess); + + // get Hessian matrix view (LHS) + auto viewH = hess_m.getView(); + const int nghostH = hess_m.getNghost(); + + // get rho2tr_m view (as we want to multiply by -k^2 then transform) + auto viewR = rho2tr_m.getView(); + const int nghostR = rho2tr_m.getNghost(); + const auto& ldomR = layoutComplex_m->getLocalNDIndex(); + + // use temp_m as a temporary complex field + auto view_g = temp_m.getView(); + + // define some constants + const scalar_type pi = Kokkos::numbers::pi_v; + + // define some member variables in local scope for the parallel_for + vector_type hsize = hr_m; + Vector N = nr_m; + + // loop over each component (Hessian = Matrix field) + for (size_t row = 0; row < Dim; ++row) { + for (size_t col = 0; col < Dim; ++col) { + // loop over rho2tr_m to multiply by -k^2 (second derivative in Fourier space) + // if diagonal element (row = col), do not need N/2 term = 0 + // else, if mixed derivative, need kVec = 0 at N/2 + + Kokkos::parallel_for( + "Hessian", rho2tr_m.getFieldRangePolicy(), + KOKKOS_LAMBDA(const int i, const int j, const int k) { + // global indices for 2N rhotr_m + const int ig = i + ldomR[0].first() - nghostR; + const int jg = j + ldomR[1].first() - nghostR; + const int kg = k + ldomR[2].first() - nghostR; + + Vector iVec = {ig, jg, kg}; + Vector_t kVec; + + for (size_t d = 0; d < Dim; ++d) { + const scalar_type Len = N[d] * hsize[d]; + const bool shift = (iVec[d] > N[d]); + const bool isMid = (iVec[d] == N[d]); + const bool notDiag = (row != col); + + kVec[d] = (1 - (notDiag * isMid)) * (pi / Len) + * (iVec[d] - shift * 2 * N[d]); + } + + view_g(i, j, k) = -(kVec[col] * kVec[row]) * viewR(i, j, k); + }); + + // start a timer + static IpplTimings::TimerRef ffth = IpplTimings::getTimer("FFT: Hessian"); + IpplTimings::startTimer(ffth); + + // transform to get Hessian + fft_m->transform(-1, rho2_mr, temp_m); + + IpplTimings::stopTimer(ffth); + + // apply proper normalization + for (unsigned int i = 0; i < Dim; ++i) { + if (alg == Algorithm::VICO || alg == Algorithm::BIHARMONIC) { + rho2_mr = rho2_mr * 2.0 * (1.0 / 4.0); + } else { + rho2_mr = rho2_mr * 2.0 * nr_m[i] * hr_m[i]; + } + } + + // start a timer + static IpplTimings::TimerRef hdtos = + IpplTimings::getTimer("Hessian: double to phys."); + IpplTimings::startTimer(hdtos); + + // restrict to physical grid (N^3) and assign to Matrix field (Hessian) + // communication needed if more than one rank + if (ranks > 1) { + // COMMUNICATION + + // send + const auto& lDomains1 = layout_mp->getHostLocalDomains(); + std::vector requests(0); + + for (int i = 0; i < ranks; ++i) { + if (lDomains1[i].touches(ldom2)) { + auto intersection = lDomains1[i].intersect(ldom2); + + requests.resize(requests.size() + 1); + + Communicate::size_type nsends; + pack(intersection, view2, fd_m, nghost2, ldom2, nsends); + + buffer_type buf = Comm->getBuffer( + IPPL_SOLVER_SEND + i, nsends); + + Comm->isend(i, OPEN_SOLVER_TAG, fd_m, *buf, requests.back(), + nsends); + buf->resetWritePos(); + } + } + + // receive + const auto& lDomains2 = layout2_m->getHostLocalDomains(); + int myRank = Comm->rank(); + + for (int i = 0; i < ranks; ++i) { + if (ldom1.touches(lDomains2[i])) { + auto intersection = ldom1.intersect(lDomains2[i]); + + Communicate::size_type nrecvs; + nrecvs = intersection.size(); + + buffer_type buf = Comm->getBuffer( + IPPL_SOLVER_RECV + myRank, nrecvs); + + Comm->recv(i, OPEN_SOLVER_TAG, fd_m, *buf, nrecvs * sizeof(Trhs), + nrecvs); + buf->resetReadPos(); + + unpack(intersection, viewH, fd_m, nghostH, ldom1, row, col); + } + } + + // wait for all messages to be received + if (requests.size() > 0) { + MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE); + } + Comm->barrier(); + + } else { + Kokkos::parallel_for( + "Write Hessian on physical grid", hess_m.getFieldRangePolicy(), + KOKKOS_LAMBDA(const int i, const int j, const int k) { + const int ig2 = i + ldom2[0].first() - nghost2; + const int jg2 = j + ldom2[1].first() - nghost2; + const int kg2 = k + ldom2[2].first() - nghost2; + + const int ig = i + ldom1[0].first() - nghostH; + const int jg = j + ldom1[1].first() - nghostH; + const int kg = k + ldom1[2].first() - nghostH; + + // take [0,N-1] as physical solution + const bool isQuadrant1 = + ((ig == ig2) && (jg == jg2) && (kg == kg2)); + viewH(i, j, k)[row][col] = view2(i, j, k) * isQuadrant1; + }); + } + IpplTimings::stopTimer(hdtos); + } + } + IpplTimings::stopTimer(hess); + } IpplTimings::stopTimer(solve); }; @@ -847,7 +1000,7 @@ namespace ippl { const int alg = this->params_m.template get("algorithm"); - if ((alg == Algorithm::VICO) || (alg == Algorithm::BIHARMONIC)) { + if (alg == Algorithm::VICO || alg == Algorithm::BIHARMONIC) { Vector_t l(hr_m * nr_m); Vector_t hs_m; double L_sum(0.0); @@ -881,14 +1034,9 @@ namespace ippl { Vector size = nr_m; // Kokkos parallel for loop to assign analytic grnL_m - using mdrange_type = Kokkos::MDRangePolicy>; - if (alg == Algorithm::VICO) { Kokkos::parallel_for( - "Initialize Green's function ", - mdrange_type({nghost_g, nghost_g, nghost_g}, - {view_g.extent(0) - nghost_g, view_g.extent(1) - nghost_g, - view_g.extent(2) - nghost_g}), + "Initialize Green's function ", grnL_m.getFieldRangePolicy(), KOKKOS_LAMBDA(const int i, const int j, const int k) { // go from local indices to global const int ig = i + ldom_g[0].first() - nghost_g; @@ -920,10 +1068,7 @@ namespace ippl { } else if (alg == Algorithm::BIHARMONIC) { Kokkos::parallel_for( - "Initialize Green's function ", - mdrange_type({nghost_g, nghost_g, nghost_g}, - {view_g.extent(0) - nghost_g, view_g.extent(1) - nghost_g, - view_g.extent(2) - nghost_g}), + "Initialize Green's function ", grnL_m.getFieldRangePolicy(), KOKKOS_LAMBDA(const int i, const int j, const int k) { // go from local indices to global const int ig = i + ldom_g[0].first() - nghost_g; @@ -980,6 +1125,7 @@ namespace ippl { communicateVico(size, view_g, ldom_g, nghost_g, view, ldom, nghost); } else { // restrict the green's function to a (2N)^3 grid from the (4N)^3 grid + using mdrange_type = Kokkos::MDRangePolicy>; Kokkos::parallel_for( "Restrict domain of Green's function from 4N to 2N", mdrange_type({nghost, nghost, nghost}, {view.extent(0) - nghost - size[0], @@ -1033,12 +1179,8 @@ namespace ippl { const auto& ldom = layout2_m->getLocalNDIndex(); // Kokkos parallel for loop to find (0,0,0) point and regularize - using mdrange_type = Kokkos::MDRangePolicy>; Kokkos::parallel_for( - "Regularize Green's function ", - mdrange_type( - {nghost, nghost, nghost}, - {view.extent(0) - nghost, view.extent(1) - nghost, view.extent(2) - nghost}), + "Regularize Green's function ", grn_mr.getFieldRangePolicy(), KOKKOS_LAMBDA(const int i, const int j, const int k) { // go from local indices to global const int ig = i + ldom[0].first() - nghost; diff --git a/src/Solver/Solver.h b/src/Solver/Solver.h index f32b55645..09a25b1fd 100644 --- a/src/Solver/Solver.h +++ b/src/Solver/Solver.h @@ -31,6 +31,12 @@ namespace ippl { using lhs_type = FieldLHS; using rhs_type = FieldRHS; + constexpr static unsigned Dim = FieldLHS::dim; + typedef typename FieldLHS::Mesh_t Mesh; + typedef typename FieldLHS::Centering_t Centering; + typedef typename Mesh::matrix_type Matrix_t; + typedef Field MField_t; + /*! * Default constructor */ @@ -84,6 +90,8 @@ namespace ippl { */ virtual void setRhs(rhs_type& rhs) { rhs_mp = &rhs; } + virtual MField_t* getHessian() { return nullptr; } + protected: ParameterList params_m; diff --git a/src/Solver/test/CMakeLists.txt b/src/Solver/test/CMakeLists.txt index df264c630..358c475cf 100644 --- a/src/Solver/test/CMakeLists.txt +++ b/src/Solver/test/CMakeLists.txt @@ -69,6 +69,13 @@ if (ENABLE_FFT) ${MPI_CXX_LIBRARIES} ) + add_executable (TestGaussian_hessian TestGaussian_hessian.cpp) + target_link_libraries ( + TestGaussian_hessian + ${IPPL_LIBS} + ${MPI_CXX_LIBRARIES} + ) + add_executable (TestP3MSolver TestP3MSolver.cpp) target_link_libraries ( TestP3MSolver diff --git a/src/Solver/test/TestGaussian.cpp b/src/Solver/test/TestGaussian.cpp index b0e353ef9..b785c2750 100644 --- a/src/Solver/test/TestGaussian.cpp +++ b/src/Solver/test/TestGaussian.cpp @@ -143,18 +143,13 @@ int main(int argc, char* argv[]) { exactE.initialize(mesh, layout); fieldE.initialize(mesh, layout); - field Ex, Ey, Ez; - Ex.initialize(mesh, layout); - Ey.initialize(mesh, layout); - Ez.initialize(mesh, layout); - // assign the rho field with a gaussian - typename field::view_type view_rho = rho.getView(); - const int nghost = rho.getNghost(); - const auto& ldom = layout.getLocalNDIndex(); + auto view_rho = rho.getView(); + const int nghost = rho.getNghost(); + const auto& ldom = layout.getLocalNDIndex(); Kokkos::parallel_for( - "Assign rho field", ippl::getRangePolicy(view_rho, nghost), + "Assign rho field", rho.getFieldRangePolicy(), KOKKOS_LAMBDA(const int i, const int j, const int k) { // go from local to global indices const int ig = i + ldom[0].first() - nghost; @@ -170,10 +165,10 @@ int main(int argc, char* argv[]) { }); // assign the exact field with its values (erf function) - typename field::view_type view_exact = exact.getView(); + auto view_exact = exact.getView(); Kokkos::parallel_for( - "Assign exact field", ippl::getRangePolicy(view_exact, nghost), + "Assign exact field", exact.getFieldRangePolicy(), KOKKOS_LAMBDA(const int i, const int j, const int k) { const int ig = i + ldom[0].first() - nghost; const int jg = j + ldom[1].first() - nghost; @@ -190,7 +185,7 @@ int main(int argc, char* argv[]) { auto view_exactE = exactE.getView(); Kokkos::parallel_for( - "Assign exact E-field", ippl::getRangePolicy(view_exactE, nghost), + "Assign exact E-field", exactE.getFieldRangePolicy(), KOKKOS_LAMBDA(const int i, const int j, const int k) { const int ig = i + ldom[0].first() - nghost; const int jg = j + ldom[1].first() - nghost; @@ -200,9 +195,7 @@ int main(int argc, char* argv[]) { double y = (jg + 0.5) * hr[1] + origin[1]; double z = (kg + 0.5) * hr[2] + origin[2]; - view_exactE(i, j, k)[0] = exact_E(x, y, z)[0]; - view_exactE(i, j, k)[1] = exact_E(x, y, z)[1]; - view_exactE(i, j, k)[2] = exact_E(x, y, z)[2]; + view_exactE(i, j, k) = exact_E(x, y, z); }); // Parameter List to pass to solver @@ -260,36 +253,22 @@ int main(int argc, char* argv[]) { // solve the Poisson equation -> rho contains the solution (phi) now FFTsolver.solve(); - const int nghostE = fieldE.getNghost(); - auto Eview = fieldE.getView(); - - auto viewEx = Ex.getView(); - auto viewEy = Ey.getView(); - auto viewEz = Ez.getView(); - - Kokkos::parallel_for( - "Vector E reduce", ippl::getRangePolicy(Eview, nghostE), - KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k) { - viewEx(i, j, k) = Eview(i, j, k)[0]; - viewEy(i, j, k) = Eview(i, j, k)[1]; - viewEz(i, j, k) = Eview(i, j, k)[2]; - }); - // compute relative error norm for potential rho = rho - exact; double err = norm(rho) / norm(exact); // compute relative error norm for the E-field components ippl::Vector errE{0.0, 0.0, 0.0}; - fieldE = fieldE - exactE; - auto view_fieldE = fieldE.getView(); + fieldE = fieldE - exactE; + + auto Eview = fieldE.getView(); for (size_t d = 0; d < Dim; ++d) { double temp = 0.0; Kokkos::parallel_reduce( - "Vector errorNr reduce", ippl::getRangePolicy(view_fieldE, nghost), + "Vector errorNr reduce", fieldE.getFieldRangePolicy(), KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k, double& valL) { - double myVal = pow(view_fieldE(i, j, k)[d], 2); + double myVal = Kokkos::pow(Eview(i, j, k)[d], 2); valL += myVal; }, Kokkos::Sum(temp)); @@ -301,9 +280,9 @@ int main(int argc, char* argv[]) { temp = 0.0; Kokkos::parallel_reduce( - "Vector errorDr reduce", ippl::getRangePolicy(view_exactE, nghost), + "Vector errorDr reduce", exactE.getFieldRangePolicy(), KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k, double& valL) { - double myVal = pow(view_exactE(i, j, k)[d], 2); + double myVal = Kokkos::pow(view_exactE(i, j, k)[d], 2); valL += myVal; }, Kokkos::Sum(temp)); @@ -321,7 +300,7 @@ int main(int argc, char* argv[]) { // reassign the correct values to the fields for the loop to work Kokkos::parallel_for( - "Assign rho field", ippl::getRangePolicy(view_rho, nghost), + "Assign rho field", rho.getFieldRangePolicy(), KOKKOS_LAMBDA(const int i, const int j, const int k) { // go from local to global indices const int ig = i + ldom[0].first() - nghost; @@ -337,7 +316,7 @@ int main(int argc, char* argv[]) { }); Kokkos::parallel_for( - "Assign exact field", ippl::getRangePolicy(view_exact, nghost), + "Assign exact field", exact.getFieldRangePolicy(), KOKKOS_LAMBDA(const int i, const int j, const int k) { const int ig = i + ldom[0].first() - nghost; const int jg = j + ldom[1].first() - nghost; @@ -351,7 +330,7 @@ int main(int argc, char* argv[]) { }); Kokkos::parallel_for( - "Assign exact E-field", ippl::getRangePolicy(view_exactE, nghost), + "Assign exact E-field", exactE.getFieldRangePolicy(), KOKKOS_LAMBDA(const int i, const int j, const int k) { const int ig = i + ldom[0].first() - nghost; const int jg = j + ldom[1].first() - nghost; @@ -361,9 +340,7 @@ int main(int argc, char* argv[]) { double y = (jg + 0.5) * hr[1] + origin[1]; double z = (kg + 0.5) * hr[2] + origin[2]; - view_exactE(i, j, k)[0] = exact_E(x, y, z)[0]; - view_exactE(i, j, k)[1] = exact_E(x, y, z)[1]; - view_exactE(i, j, k)[2] = exact_E(x, y, z)[2]; + view_exactE(i, j, k) = exact_E(x, y, z); }); } diff --git a/src/Solver/test/TestGaussian_biharmonic.cpp b/src/Solver/test/TestGaussian_biharmonic.cpp index 648766ddd..ddd3c3cc3 100644 --- a/src/Solver/test/TestGaussian_biharmonic.cpp +++ b/src/Solver/test/TestGaussian_biharmonic.cpp @@ -120,17 +120,13 @@ int main(int argc, char* argv[]) { static IpplTimings::TimerRef allTimer = IpplTimings::getTimer("allTimer"); IpplTimings::startTimer(allTimer); - // number of interations - const int n = 6; - - // number of gridpoints to iterate over - std::array N = {4, 8, 16, 32, 64, 128}; + // gridsizes to iterate over + std::array N = {4, 8, 16, 32, 64, 128}; msg << "Spacing Error" << endl; - for (int p = 0; p < n; ++p) { + for (int pt : N) { // domain - int pt = N[p]; ippl::Index I(pt); ippl::NDIndex<3> owned(I, I, I); @@ -162,12 +158,12 @@ int main(int argc, char* argv[]) { exactE.initialize(mesh, layout); // assign the rho field with a gaussian - typename ScalarField_t::view_type view_rho = rho.getView(); - const int nghost = rho.getNghost(); - const auto& ldom = layout.getLocalNDIndex(); + auto view_rho = rho.getView(); + const int nghost = rho.getNghost(); + const auto& ldom = layout.getLocalNDIndex(); Kokkos::parallel_for( - "Assign rho field", ippl::getRangePolicy(view_rho, nghost), + "Assign rho field", rho.getFieldRangePolicy(), KOKKOS_LAMBDA(const int i, const int j, const int k) { // go from local to global indices const int ig = i + ldom[0].first() - nghost; @@ -183,10 +179,10 @@ int main(int argc, char* argv[]) { }); // assign the exact field with its values (erf function) - typename ScalarField_t::view_type view_exact = exact.getView(); + auto view_exact = exact.getView(); Kokkos::parallel_for( - "Assign exact field", ippl::getRangePolicy(view_exact, nghost), + "Assign exact field", exact.getFieldRangePolicy(), KOKKOS_LAMBDA(const int i, const int j, const int k) { const int ig = i + ldom[0].first() - nghost; const int jg = j + ldom[1].first() - nghost; @@ -202,7 +198,7 @@ int main(int argc, char* argv[]) { // assign the exact gradient field auto view_grad = exactE.getView(); Kokkos::parallel_for( - "Assign exact field", ippl::getRangePolicy(view_grad, nghost), + "Assign exact field", exactE.getFieldRangePolicy(), KOKKOS_LAMBDA(const int i, const int j, const int k) { const int ig = i + ldom[0].first() - nghost; const int jg = j + ldom[1].first() - nghost; @@ -212,9 +208,7 @@ int main(int argc, char* argv[]) { double y = (jg + 0.5) * hx[1] + origin[1]; double z = (kg + 0.5) * hx[2] + origin[2]; - view_grad(i, j, k)[0] = exact_grad(x, y, z)[0]; - view_grad(i, j, k)[1] = exact_grad(x, y, z)[1]; - view_grad(i, j, k)[2] = exact_grad(x, y, z)[2]; + view_grad(i, j, k) = exact_grad(x, y, z); }); Kokkos::fence(); @@ -254,10 +248,10 @@ int main(int argc, char* argv[]) { double temp = 0.0; Kokkos::parallel_reduce( - "Vector errorNr reduce", ippl::getRangePolicy(view_fieldE, nghost), + "Vector errorNr reduce", fieldE.getFieldRangePolicy(), KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k, double& valL) { - double myVal = pow(view_fieldE(i, j, k)[d], 2); + double myVal = Kokkos::pow(view_fieldE(i, j, k)[d], 2); valL += myVal; }, Kokkos::Sum(temp)); @@ -270,10 +264,10 @@ int main(int argc, char* argv[]) { temp = 0.0; Kokkos::parallel_reduce( - "Vector errorDr reduce", ippl::getRangePolicy(view_grad, nghost), + "Vector errorDr reduce", exactE.getFieldRangePolicy(), KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k, double& valL) { - double myVal = pow(view_grad(i, j, k)[d], 2); + double myVal = Kokkos::pow(view_grad(i, j, k)[d], 2); valL += myVal; }, Kokkos::Sum(temp)); diff --git a/src/Solver/test/TestGaussian_convergence.cpp b/src/Solver/test/TestGaussian_convergence.cpp index 7e17cf887..cd3f7abe2 100644 --- a/src/Solver/test/TestGaussian_convergence.cpp +++ b/src/Solver/test/TestGaussian_convergence.cpp @@ -161,12 +161,12 @@ void compute_convergence(std::string algorithm, int pt) { fieldE.initialize(mesh, layout); // assign the rho field with a gaussian - typename ScalarField_t::view_type view_rho = rho.getView(); - const int nghost = rho.getNghost(); - const auto& ldom = layout.getLocalNDIndex(); + auto view_rho = rho.getView(); + const int nghost = rho.getNghost(); + const auto& ldom = layout.getLocalNDIndex(); Kokkos::parallel_for( - "Assign rho field", ippl::getRangePolicy(view_rho, nghost), + "Assign rho field", rho.getFieldRangePolicy(), KOKKOS_LAMBDA(const int i, const int j, const int k) { // go from local to global indices const int ig = i + ldom[0].first() - nghost; @@ -182,10 +182,10 @@ void compute_convergence(std::string algorithm, int pt) { }); // assign the exact field with its values (erf function) - typename ScalarField_t::view_type view_exact = exact.getView(); + auto view_exact = exact.getView(); Kokkos::parallel_for( - "Assign exact field", ippl::getRangePolicy(view_exact, nghost), + "Assign exact field", exact.getFieldRangePolicy(), KOKKOS_LAMBDA(const int i, const int j, const int k) { const int ig = i + ldom[0].first() - nghost; const int jg = j + ldom[1].first() - nghost; @@ -202,7 +202,7 @@ void compute_convergence(std::string algorithm, int pt) { auto view_exactE = exactE.getView(); Kokkos::parallel_for( - "Assign exact E-field", ippl::getRangePolicy(view_exactE, nghost), + "Assign exact E-field", exactE.getFieldRangePolicy(), KOKKOS_LAMBDA(const int i, const int j, const int k) { const int ig = i + ldom[0].first() - nghost; const int jg = j + ldom[1].first() - nghost; @@ -212,9 +212,7 @@ void compute_convergence(std::string algorithm, int pt) { T y = (jg + 0.5) * hx[1] + origin[1]; T z = (kg + 0.5) * hx[2] + origin[2]; - view_exactE(i, j, k)[0] = exact_E(x, y, z)[0]; - view_exactE(i, j, k)[1] = exact_E(x, y, z)[1]; - view_exactE(i, j, k)[2] = exact_E(x, y, z)[2]; + view_exactE(i, j, k) = exact_E(x, y, z); }); // set the solver parameters @@ -257,9 +255,9 @@ void compute_convergence(std::string algorithm, int pt) { for (size_t d = 0; d < 3; ++d) { T temp = 0.0; Kokkos::parallel_reduce( - "Vector errorNr reduce", ippl::getRangePolicy(view_fieldE, nghost), + "Vector errorNr reduce", fieldE.getFieldRangePolicy(), KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k, T& valL) { - T myVal = pow(view_fieldE(i, j, k)[d], 2); + T myVal = Kokkos::pow(view_fieldE(i, j, k)[d], 2); valL += myVal; }, Kokkos::Sum(temp)); @@ -272,9 +270,9 @@ void compute_convergence(std::string algorithm, int pt) { temp = 0.0; Kokkos::parallel_reduce( - "Vector errorDr reduce", ippl::getRangePolicy(view_exactE, nghost), + "Vector errorDr reduce", exactE.getFieldRangePolicy(), KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k, T& valL) { - T myVal = pow(view_exactE(i, j, k)[d], 2); + T myVal = Kokkos::pow(view_exactE(i, j, k)[d], 2); valL += myVal; }, Kokkos::Sum(temp)); @@ -310,19 +308,16 @@ int main(int argc, char* argv[]) { static IpplTimings::TimerRef allTimer = IpplTimings::getTimer("allTimer"); IpplTimings::startTimer(allTimer); - // number of interations - const int n = 6; - - // number of gridpoints to iterate over - std::array N = {4, 8, 16, 32, 64, 128}; + // gridsizes to iterate over + std::array N = {4, 8, 16, 32, 64, 128}; msg << "Spacing Error ErrorEx ErrorEy ErrorEz" << endl; - for (int p = 0; p < n; ++p) { + for (int pt : N) { if (precision == "DOUBLE") { - compute_convergence(algorithm, N[p]); + compute_convergence(algorithm, pt); } else { - compute_convergence(algorithm, N[p]); + compute_convergence(algorithm, pt); } } diff --git a/src/Solver/test/TestGaussian_hessian.cpp b/src/Solver/test/TestGaussian_hessian.cpp new file mode 100644 index 000000000..9cc8c1f62 --- /dev/null +++ b/src/Solver/test/TestGaussian_hessian.cpp @@ -0,0 +1,462 @@ +// +// TestGaussian_hessian +// This programs tests the FFTPoissonSolver for a Gaussian source. +// More specifically, it tests also the Hessian calculation functionality. +// Different problem sizes are used for the purpose of convergence tests. +// Usage: +// srun ./TestGaussian_hessian --info 5 +// algorithm = "HOCKNEY" or "VICO", types of open BC algorithms +// precision = "DOUBLE" or "SINGLE", precision of the fields +// +// Example: +// srun ./TestGaussian_hessian HOCKNEY DOUBLE --info 5 +// +// Copyright (c) 2023, Sonali Mayani, +// Paul Scherrer Institut, Villigen PSI, Switzerland +// All rights reserved +// +// This file is part of IPPL. +// +// IPPL is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// You should have received a copy of the GNU General Public License +// along with IPPL. If not, see . +// + +#include "Ippl.h" + +#include +#include + +#include "Utility/IpplException.h" +#include "Utility/IpplTimings.h" + +#include "FFTPoissonSolver.h" + +template +using Mesh_t = typename ippl::UniformCartesian; + +template +using Matrix_t = typename Mesh_t::matrix_type; + +template +using Centering_t = typename Mesh_t::DefaultCentering; + +template +using ScalarField_t = typename ippl::Field, Centering_t>; + +template +using VectorField_t = typename ippl::Field, 3, Mesh_t, Centering_t>; + +template +using MField_t = typename ippl::Field, 3, Mesh_t, Centering_t>; + +template +using Solver_t = ippl::FFTPoissonSolver, ScalarField_t>; + +template +KOKKOS_INLINE_FUNCTION T gaussian(T x, T y, T z, T sigma = 0.05, T mu = 0.5) { + T pi = Kokkos::numbers::pi_v; + T prefactor = (1 / Kokkos::sqrt(2 * 2 * 2 * pi * pi * pi)) * (1 / (sigma * sigma * sigma)); + T r2 = (x - mu) * (x - mu) + (y - mu) * (y - mu) + (z - mu) * (z - mu); + + return prefactor * exp(-r2 / (2 * sigma * sigma)); +} + +template +KOKKOS_INLINE_FUNCTION T exact_fct(T x, T y, T z, T sigma = 0.05, T mu = 0.5) { + T pi = Kokkos::numbers::pi_v; + + T r = Kokkos::sqrt((x - mu) * (x - mu) + (y - mu) * (y - mu) + (z - mu) * (z - mu)); + + return (1 / (4.0 * pi * r)) * Kokkos::erf(r / (Kokkos::sqrt(2.0) * sigma)); +} + +template +KOKKOS_INLINE_FUNCTION ippl::Vector exact_E(T x, T y, T z, T sigma = 0.05, T mu = 0.5) { + T pi = Kokkos::numbers::pi_v; + T r = Kokkos::sqrt((x - mu) * (x - mu) + (y - mu) * (y - mu) + (z - mu) * (z - mu)); + T factor = (1.0 / (4.0 * pi * r * r)) + * ((1.0 / r) * Kokkos::erf(r / (Kokkos::sqrt(2.0) * sigma)) + - Kokkos::sqrt(2.0 / pi) * (1.0 / sigma) * exp(-r * r / (2 * sigma * sigma))); + + ippl::Vector Efield = {(x - mu), (y - mu), (z - mu)}; + return factor * Efield; +} + +template +KOKKOS_INLINE_FUNCTION Matrix_t exact_H(T x, T y, T z, T sigma = 0.05, T mu = 0.5) { + T pi = Kokkos::numbers::pi_v; + + x -= mu; + y -= mu; + z -= mu; + + T r = Kokkos::sqrt(x * x + y * y + z * z); + T errorfct = Kokkos::erf(r / (Kokkos::sqrt(2.0) * sigma)); + T exponent = exp(-r * r / (2 * sigma * sigma)); + + Matrix_t exactH; + + exactH[0][0] = ((3.0 * x * x * errorfct) / (4.0 * pi * Kokkos::pow(r, 5))) + - (errorfct / (4.0 * pi * Kokkos::pow(r, 3))) + - ((3.0 * x * x * exponent) + / (Kokkos::sqrt(2 * 2 * 2 * pi * pi * pi) * sigma * Kokkos::pow(r, 4))) + + (exponent / (Kokkos::sqrt(2 * 2 * 2 * pi * pi * pi) * sigma * r * r)) + - ((x * x * exponent) + / (Kokkos::sqrt(2 * 2 * 2 * pi * pi * pi) * Kokkos::pow(sigma, 3) * r * r)); + + exactH[1][1] = ((3.0 * y * y * errorfct) / (4.0 * pi * Kokkos::pow(r, 5))) + - (errorfct / (4.0 * pi * Kokkos::pow(r, 3))) + - ((3.0 * y * y * exponent) + / (Kokkos::sqrt(2 * 2 * 2 * pi * pi * pi) * sigma * Kokkos::pow(r, 4))) + + (exponent / (Kokkos::sqrt(2 * 2 * 2 * pi * pi * pi) * sigma * r * r)) + - ((y * y * exponent) + / (Kokkos::sqrt(2 * 2 * 2 * pi * pi * pi) * Kokkos::pow(sigma, 3) * r * r)); + + exactH[2][2] = ((3.0 * z * z * errorfct) / (4.0 * pi * Kokkos::pow(r, 5))) + - (errorfct / (4.0 * pi * Kokkos::pow(r, 3))) + - ((3.0 * z * z * exponent) + / (Kokkos::sqrt(2 * 2 * 2 * pi * pi * pi) * sigma * Kokkos::pow(r, 4))) + + (exponent / (Kokkos::sqrt(2 * 2 * 2 * pi * pi * pi) * sigma * r * r)) + - ((z * z * exponent) + / (Kokkos::sqrt(2 * 2 * 2 * pi * pi * pi) * Kokkos::pow(sigma, 3) * r * r)); + + exactH[0][1] = + x * y + * (((3.0 * errorfct) / (4.0 * pi * Kokkos::pow(r, 5))) + - ((3.0 * exponent) + / (Kokkos::sqrt(2 * 2 * 2 * pi * pi * pi) * sigma * Kokkos::pow(r, 4))) + - (exponent / (Kokkos::sqrt(2 * 2 * 2 * pi * pi * pi) * Kokkos::pow(sigma, 3) * r * r))); + + exactH[0][2] = + x * z + * (((3.0 * errorfct) / (4.0 * pi * Kokkos::pow(r, 5))) + - ((3.0 * exponent) + / (Kokkos::sqrt(2 * 2 * 2 * pi * pi * pi) * sigma * Kokkos::pow(r, 4))) + - (exponent / (Kokkos::sqrt(2 * 2 * 2 * pi * pi * pi) * Kokkos::pow(sigma, 3) * r * r))); + + exactH[1][2] = + z * y + * (((3.0 * errorfct) / (4.0 * pi * Kokkos::pow(r, 5))) + - ((3.0 * exponent) + / (Kokkos::sqrt(2 * 2 * 2 * pi * pi * pi) * sigma * Kokkos::pow(r, 4))) + - (exponent / (Kokkos::sqrt(2 * 2 * 2 * pi * pi * pi) * Kokkos::pow(sigma, 3) * r * r))); + + exactH[1][0] = exactH[0][1]; + exactH[2][0] = exactH[0][2]; + exactH[2][1] = exactH[1][2]; + + return exactH; +} + +// Define vtk dump function for plotting the fields +template +void dumpVTK(std::string path, ScalarField_t& rho, int nx, int ny, int nz, int iteration, T dx, + T dy, T dz) { + typename ScalarField_t::view_type::host_mirror_type host_view = rho.getHostMirror(); + Kokkos::deep_copy(host_view, rho.getView()); + std::ofstream vtkout; + vtkout.precision(10); + vtkout.setf(std::ios::scientific, std::ios::floatfield); + + std::stringstream fname; + fname << path; + fname << "/scalar_"; + fname << std::setw(4) << std::setfill('0') << iteration; + fname << ".vtk"; + + // open a new data file for this iteration + // and start with header + vtkout.open(fname.str().c_str(), std::ios::out); + if (!vtkout) { + std::cout << "couldn't open" << std::endl; + } + vtkout << "# vtk DataFile Version 2.0" << std::endl; + vtkout << "GaussianSource" << std::endl; + vtkout << "ASCII" << std::endl; + vtkout << "DATASET STRUCTURED_POINTS" << std::endl; + vtkout << "DIMENSIONS " << nx + 1 << " " << ny + 1 << " " << nz + 1 << std::endl; + vtkout << "ORIGIN " << 0.0 << " " << 0.0 << " " << 0.0 << std::endl; + vtkout << "SPACING " << dx << " " << dy << " " << dz << std::endl; + vtkout << "CELL_DATA " << (nx) * (ny) * (nz) << std::endl; + + vtkout << "SCALARS Phi float" << std::endl; + vtkout << "LOOKUP_TABLE default" << std::endl; + for (int z = 1; z < nz + 1; z++) { + for (int y = 1; y < ny + 1; y++) { + for (int x = 1; x < nx + 1; x++) { + vtkout << host_view(x, y, z) << std::endl; + } + } + } + + // close the output file for this iteration: + vtkout.close(); +} + +template +void compute_convergence(std::string algorithm, int pt) { + Inform errorMsg(""); + Inform errorMsg2all("", INFORM_ALL_NODES); + + ippl::Index I(pt); + ippl::NDIndex<3> owned(I, I, I); + + // specifies decomposition; here all dimensions are parallel + ippl::e_dim_tag decomp[3]; + for (unsigned int d = 0; d < 3; d++) + decomp[d] = ippl::PARALLEL; + + // unit box + T dx = 1.0 / pt; + ippl::Vector hx = {dx, dx, dx}; + ippl::Vector origin = {0.0, 0.0, 0.0}; + Mesh_t mesh(owned, hx, origin); + + // all parallel layout, standard domain, normal axis order + ippl::FieldLayout<3> layout(owned, decomp); + + // define the R (rho) field + ScalarField_t rho; + rho.initialize(mesh, layout); + + // define the exact solution field + ScalarField_t exact; + exact.initialize(mesh, layout); + + // define the Vector field E and the exact E field + VectorField_t exactE, fieldE; + exactE.initialize(mesh, layout); + fieldE.initialize(mesh, layout); + + // define the Matrix field for the Hessian + MField_t exactH; + MField_t* fieldH = nullptr; + exactH.initialize(mesh, layout); + + // assign the rho field with a gaussian + auto view_rho = rho.getView(); + const int nghost = rho.getNghost(); + const auto& ldom = layout.getLocalNDIndex(); + + Kokkos::parallel_for( + "Assign rho field", rho.getFieldRangePolicy(), + KOKKOS_LAMBDA(const int i, const int j, const int k) { + // go from local to global indices + const int ig = i + ldom[0].first() - nghost; + const int jg = j + ldom[1].first() - nghost; + const int kg = k + ldom[2].first() - nghost; + + // define the physical points (cell-centered) + T x = (ig + 0.5) * hx[0] + origin[0]; + T y = (jg + 0.5) * hx[1] + origin[1]; + T z = (kg + 0.5) * hx[2] + origin[2]; + + view_rho(i, j, k) = gaussian(x, y, z); + }); + + // assign the exact field with its values (erf function) + auto view_exact = exact.getView(); + + Kokkos::parallel_for( + "Assign exact field", exact.getFieldRangePolicy(), + KOKKOS_LAMBDA(const int i, const int j, const int k) { + const int ig = i + ldom[0].first() - nghost; + const int jg = j + ldom[1].first() - nghost; + const int kg = k + ldom[2].first() - nghost; + + T x = (ig + 0.5) * hx[0] + origin[0]; + T y = (jg + 0.5) * hx[1] + origin[1]; + T z = (kg + 0.5) * hx[2] + origin[2]; + + view_exact(i, j, k) = exact_fct(x, y, z); + }); + + // assign the exact E field + auto view_exactE = exactE.getView(); + + Kokkos::parallel_for( + "Assign exact E-field", exactE.getFieldRangePolicy(), + KOKKOS_LAMBDA(const int i, const int j, const int k) { + const int ig = i + ldom[0].first() - nghost; + const int jg = j + ldom[1].first() - nghost; + const int kg = k + ldom[2].first() - nghost; + + T x = (ig + 0.5) * hx[0] + origin[0]; + T y = (jg + 0.5) * hx[1] + origin[1]; + T z = (kg + 0.5) * hx[2] + origin[2]; + + view_exactE(i, j, k) = exact_E(x, y, z); + }); + + // assign the exact Hessian field + auto view_exactH = exactH.getView(); + + Kokkos::parallel_for( + "Assign exact Matrix field", exactH.getFieldRangePolicy(), + KOKKOS_LAMBDA(const int i, const int j, const int k) { + const int ig = i + ldom[0].first() - nghost; + const int jg = j + ldom[1].first() - nghost; + const int kg = k + ldom[2].first() - nghost; + + T x = (ig + 0.5) * hx[0] + origin[0]; + T y = (jg + 0.5) * hx[1] + origin[1]; + T z = (kg + 0.5) * hx[2] + origin[2]; + + view_exactH(i, j, k) = exact_H(x, y, z); + }); + + // set the solver parameters + ippl::ParameterList params; + + // set the FFT parameters + params.add("use_heffte_defaults", false); + params.add("use_pencils", true); + params.add("use_gpu_aware", true); + params.add("comm", ippl::a2av); + params.add("r2c_direction", 0); + + // set the algorithm + if (algorithm == "HOCKNEY") { + params.add("algorithm", Solver_t::HOCKNEY); + } else if (algorithm == "VICO") { + params.add("algorithm", Solver_t::VICO); + } else { + throw IpplException("TestGaussian_convergence.cpp main()", "Unrecognized algorithm type"); + } + + // add output type + params.add("output_type", Solver_t::SOL_AND_GRAD); + + // add hessian flag parameter + params.add("hessian", true); + + // define an FFTPoissonSolver object + Solver_t FFTsolver(fieldE, rho, params); + + // solve the Poisson equation -> rho contains the solution (phi) now + FFTsolver.solve(); + + fieldH = FFTsolver.getHessian(); + + // compute relative error norm for potential + rho = rho - exact; + T err = norm(rho) / norm(exact); + + // compute relative error norm for the E-field components + ippl::Vector errE{0.0, 0.0, 0.0}; + fieldE = fieldE - exactE; + auto view_fieldE = fieldE.getView(); + + for (size_t d = 0; d < 3; ++d) { + T temp = 0.0; + Kokkos::parallel_reduce( + "Vector errorNr reduce", fieldE.getFieldRangePolicy(), + KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k, T& valL) { + T myVal = Kokkos::pow(view_fieldE(i, j, k)[d], 2); + valL += myVal; + }, + Kokkos::Sum(temp)); + + T globaltemp = 0.0; + + MPI_Datatype mpi_type = get_mpi_datatype(temp); + MPI_Allreduce(&temp, &globaltemp, 1, mpi_type, MPI_SUM, ippl::Comm->getCommunicator()); + T errorNr = std::sqrt(globaltemp); + + temp = 0.0; + Kokkos::parallel_reduce( + "Vector errorDr reduce", exactE.getFieldRangePolicy(), + KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k, T& valL) { + T myVal = Kokkos::pow(view_exactE(i, j, k)[d], 2); + valL += myVal; + }, + Kokkos::Sum(temp)); + + globaltemp = 0.0; + MPI_Allreduce(&temp, &globaltemp, 1, mpi_type, MPI_SUM, ippl::Comm->getCommunicator()); + T errorDr = std::sqrt(globaltemp); + + errE[d] = errorNr / errorDr; + } + + // compute relative error for hessian components + Matrix_t errH; + auto view_fieldH = fieldH->getView(); + + for (size_t m = 0; m < 3; ++m) { + for (size_t n = 0; n < 3; ++n) { + T diffNorm = 0; + T exactNorm = 0; + Kokkos::parallel_reduce( + "MFieldError", fieldH->getFieldRangePolicy(), + KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k, T& diffVal, + T& exactVal) { + diffVal += + Kokkos::pow(view_fieldH(i, j, k)[m][n] - view_exactH(i, j, k)[m][n], 2); + exactVal += Kokkos::pow(view_exactH(i, j, k)[m][n], 2); + }, + Kokkos::Sum(diffNorm), Kokkos::Sum(exactNorm)); + + T global_diff = 0.0; + T global_exact = 0.0; + MPI_Datatype mpi_type = get_mpi_datatype(diffNorm); + + MPI_Allreduce(&diffNorm, &global_diff, 1, mpi_type, MPI_SUM, + ippl::Comm->getCommunicator()); + MPI_Allreduce(&exactNorm, &global_exact, 1, mpi_type, MPI_SUM, + ippl::Comm->getCommunicator()); + + errH[m][n] = Kokkos::sqrt(global_diff / global_exact); + } + } + + errorMsg << std::setprecision(16) << dx << " " << err << " " << errE[0] << " " << errH[0][0] + << " " << errH[0][1] << endl; + + return; +} + +int main(int argc, char* argv[]) { + ippl::initialize(argc, argv); + { + Inform msg(""); + Inform msg2all("", INFORM_ALL_NODES); + + std::string algorithm = argv[1]; + std::string precision = argv[2]; + + if (precision != "DOUBLE" && precision != "SINGLE") { + throw IpplException("TestGaussian_convergence", + "Precision argument must be DOUBLE or SINGLE."); + } + + // start a timer to time the FFT Poisson solver + static IpplTimings::TimerRef allTimer = IpplTimings::getTimer("allTimer"); + IpplTimings::startTimer(allTimer); + + // gridsizes to iterate over + std::array N = {4, 8, 16, 32, 64, 128}; + + msg << "Spacing Error ErrorEx ErrorDxx ErrorDxy" << endl; + + for (int pt : N) { + if (precision == "DOUBLE") { + compute_convergence(algorithm, pt); + } else { + compute_convergence(algorithm, pt); + } + } + + // stop the timer + IpplTimings::stopTimer(allTimer); + IpplTimings::print(std::string("timing.dat")); + } + ippl::finalize(); + + return 0; +} diff --git a/src/Solver/test/TestSphere.cpp b/src/Solver/test/TestSphere.cpp index aaf3b059f..e556a39c1 100644 --- a/src/Solver/test/TestSphere.cpp +++ b/src/Solver/test/TestSphere.cpp @@ -109,7 +109,7 @@ int main(int argc, char* argv[]) { const auto& ldom = layout.getLocalNDIndex(); Kokkos::parallel_for( - "Assign rho field", ippl::getRangePolicy(view_rho, nghost), + "Assign rho field", rho.getFieldRangePolicy(), KOKKOS_LAMBDA(const int i, const int j, const int k) { // go from local to global indices const int ig = i + ldom[0].first() - nghost; @@ -128,7 +128,7 @@ int main(int argc, char* argv[]) { typename field::view_type view_exact = exact.getView(); Kokkos::parallel_for( - "Assign exact field", ippl::getRangePolicy(view_exact, nghost), + "Assign exact field", exact.getFieldRangePolicy(), KOKKOS_LAMBDA(const int i, const int j, const int k) { const int ig = i + ldom[0].first() - nghost; const int jg = j + ldom[1].first() - nghost;