diff --git a/src/Communicate/Archive.h b/src/Communicate/Archive.h index ac3fad4a1..568c71499 100644 --- a/src/Communicate/Archive.h +++ b/src/Communicate/Archive.h @@ -1,7 +1,14 @@ // -// Class Ippl +// Class Archive // Class to (de-)serialize in MPI communication. // +// When data is exchanged between MPI ranks, it is stored in one dimensional +// arrays. These have the type detail::Archive, which are wrappers around +// one dimensional Kokkos views of type char. The data is then transferred using +// MPI send/recv calls. Note that the archive type differs from other buffers in +// that they have type char and thus contain raw bytes, unlike other typed buffers +// such as detail::FieldBufferData used by HaloCells. +// // Copyright (c) 2020, Matthias Frey, Paul Scherrer Institut, Villigen PSI, Switzerland // All rights reserved // @@ -18,6 +25,7 @@ #ifndef IPPL_ARCHIVE_H #define IPPL_ARCHIVE_H +#include "Types/IpplTypes.h" #include "Types/ViewTypes.h" #include "Types/Vector.h" @@ -34,17 +42,15 @@ namespace ippl { public: using buffer_type = typename ViewType::view_type; using pointer_type = typename buffer_type::pointer_type; - using size_type = typename buffer_type::size_type; - Archive(int size = 0); + Archive(size_type size = 0); /*! * Serialize. * @param view to take data from. */ template - void operator<<(const Kokkos::View& view); - + void serialize(const Kokkos::View& view, size_type nsends); /*! * Serialize vector attributes @@ -55,16 +61,14 @@ namespace ippl { * @param view to take data from. */ template - void operator<<(const Kokkos::View*>& view); - + void serialize(const Kokkos::View*>& view, size_type nsends); /*! * Deserialize. * @param view to put data to */ template - void operator>>(Kokkos::View& view); - + void deserialize(Kokkos::View& view, size_type nrecvs); /*! * Deserialize vector attributes @@ -75,8 +79,7 @@ namespace ippl { * @param view to put data to */ template - void operator>>(Kokkos::View*>& view); - + void deserialize(Kokkos::View*>& view, size_type nrecvs); /*! * @returns a pointer to the data of the buffer @@ -90,16 +93,35 @@ namespace ippl { * @returns the size of the buffer */ size_type getSize() const { + return writepos_m; + } + + size_type getBufferSize() const { return buffer_m.size(); } + void resizeBuffer(size_type size) { + Kokkos::resize(buffer_m, size); + } + + void reallocBuffer(size_type size) { + Kokkos::realloc(buffer_m, size); + } + + void resetWritePos() { + writepos_m = 0; + } + void resetReadPos() { + readpos_m = 0; + } + ~Archive() = default; private: //! write position for serialization - size_t writepos_m; + size_type writepos_m; //! read position for deserialization - size_t readpos_m; + size_type readpos_m; //! serialized data buffer_type buffer_m; }; diff --git a/src/Communicate/Archive.hpp b/src/Communicate/Archive.hpp index 081971a04..1f77bc40f 100644 --- a/src/Communicate/Archive.hpp +++ b/src/Communicate/Archive.hpp @@ -1,5 +1,5 @@ // -// Class Ippl +// Class Archive // Class to (de-)serialize in MPI communication. // // Copyright (c) 2020, Matthias Frey, Paul Scherrer Institut, Villigen PSI, Switzerland @@ -23,76 +23,94 @@ namespace ippl { namespace detail { template - Archive::Archive(int size) + Archive::Archive(size_type size) : writepos_m(0) , readpos_m(0) , buffer_m("buffer", size) { } - template template - void Archive::operator<<(const Kokkos::View& view) { + void Archive::serialize(const Kokkos::View& view, + size_type nsends) { size_t size = sizeof(T); - Kokkos::resize(buffer_m, buffer_m.size() + size * view.size()); Kokkos::parallel_for( - "Archive::serialize()", view.extent(0), - KOKKOS_CLASS_LAMBDA(const size_t i) { + "Archive::serialize()", nsends, + KOKKOS_CLASS_LAMBDA(const size_type i) { std::memcpy(buffer_m.data() + i * size + writepos_m, view.data() + i, size); }); - writepos_m += size * view.size(); + Kokkos::fence(); + writepos_m += size * nsends; } - template template - void Archive::operator<<(const Kokkos::View*>& view) { + void Archive::serialize(const Kokkos::View*>& view, + size_type nsends) { size_t size = sizeof(T); - Kokkos::resize(buffer_m, buffer_m.size() + Dim * size * view.size()); - using mdrange_t = Kokkos::MDRangePolicy>; + // Default index type for range policies is int64, + // so we have to explicitly specify size_type (uint64) + using mdrange_t = Kokkos::MDRangePolicy, + Kokkos::IndexType>; Kokkos::parallel_for( "Archive::serialize()", - mdrange_t({0, 0}, {(long int)view.extent(0), Dim}), - KOKKOS_CLASS_LAMBDA(const size_t i, const size_t d) { + // The constructor for Kokkos range policies always + // expects int64 regardless of index type provided + // by template parameters, so the typecast is necessary + // to avoid compiler warnings + mdrange_t({0, 0}, {(long int)nsends, Dim}), + KOKKOS_CLASS_LAMBDA(const size_type i, const size_t d) { std::memcpy(buffer_m.data() + (Dim * i + d) * size + writepos_m, &(*(view.data() + i))[d], size); }); - writepos_m += Dim * size * view.size(); + Kokkos::fence(); + writepos_m += Dim * size * nsends; } - template template - void Archive::operator>>(Kokkos::View& view) { + void Archive::deserialize(Kokkos::View& view, + size_type nrecvs) { size_t size = sizeof(T); + if(nrecvs > view.extent(0)) { + Kokkos::realloc(view, nrecvs); + } Kokkos::parallel_for( - "Archive::deserialize()", view.extent(0), - KOKKOS_CLASS_LAMBDA(const size_t i) { + "Archive::deserialize()", nrecvs, + KOKKOS_CLASS_LAMBDA(const size_type i) { std::memcpy(view.data() + i, buffer_m.data() + i * size + readpos_m, size); }); - readpos_m += size * view.size(); + // Wait for deserialization kernel to complete + // (as with serialization kernels) + Kokkos::fence(); + readpos_m += size * nrecvs; } - template template - void Archive::operator>>(Kokkos::View*>& view) { + void Archive::deserialize(Kokkos::View*>& view, + size_type nrecvs) { size_t size = sizeof(T); - using mdrange_t = Kokkos::MDRangePolicy>; + if(nrecvs > view.extent(0)) { + Kokkos::realloc(view, nrecvs); + } + using mdrange_t = Kokkos::MDRangePolicy, + Kokkos::IndexType>; Kokkos::parallel_for( "Archive::deserialize()", - mdrange_t({0, 0}, {(long int)view.extent(0), Dim}), - KOKKOS_CLASS_LAMBDA(const size_t i, const size_t d) { + mdrange_t({0, 0}, {(long int)nrecvs, Dim}), + KOKKOS_CLASS_LAMBDA(const size_type i, const size_t d) { std::memcpy(&(*(view.data() + i))[d], buffer_m.data() + (Dim * i + d) * size + readpos_m, size); }); - readpos_m += Dim * size * view.size(); + Kokkos::fence(); + readpos_m += Dim * size * nrecvs; } } } diff --git a/src/Communicate/Buffers.cpp b/src/Communicate/Buffers.cpp new file mode 100644 index 000000000..993d811d5 --- /dev/null +++ b/src/Communicate/Buffers.cpp @@ -0,0 +1,52 @@ +// +// Buffers.cpp +// Interface for globally accessible buffer factory for communication +// +// Data sent between MPI ranks has to be stored in a buffer for sending and receiving. +// To reduce the number of times memory has to be allocated and freed, the buffer +// factory interface allows buffers to be reused. This is especially relevant on +// GPUs, as Cuda allocation calls are expensive. To avoid reallocating the buffers +// in the case that the amount of data to be exchanged increases, when a new buffer +// is created, an amount of memory greater than the requested size is allocated +// for the new buffer. The factor by which memory is overallocated is determined by +// a data member in Communicate, which can be set and queried at runtime. Only new +// buffers are overallocated. If a buffer is requested with the same ID as a buffer +// that has been previously allocated, the same buffer will be used. If the requested +// size exceeds the buffer size, that buffer will be resized to have exactly +// the requested size. +// +// Currently, the buffer factory is used for application of periodic boundary +// conditions; halo cell exchange along faces, edges, and vertices; as well as +// exchanging particle data between ranks. +// +// Copyright (c) 2021 Paul Scherrer Institut, Villigen PSI, Switzerland +// All rights reserved +// +// This file is part of IPPL. +// +// IPPL is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// You should have received a copy of the GNU General Public License +// along with IPPL. If not, see . +// + +#include "Communicate.h" + +namespace ippl { + + void Communicate::setDefaultOverallocation(int factor) { + defaultOveralloc_m = factor; + } + + void Communicate::deleteBuffer(int id) { + buffers_m.erase(id); + } + + void Communicate::deleteAllBuffers() { + buffers_m.clear(); + } + +} diff --git a/src/Communicate/Buffers.hpp b/src/Communicate/Buffers.hpp new file mode 100644 index 000000000..4daf04d4f --- /dev/null +++ b/src/Communicate/Buffers.hpp @@ -0,0 +1,58 @@ +// +// Buffers.hpp +// Interface for globally accessible buffer factory for communication +// +// Data sent between MPI ranks has to be stored in a buffer for sending and receiving. +// To reduce the number of times memory has to be allocated and freed, the buffer +// factory interface allows buffers to be reused. This is especially relevant on +// GPUs, as Cuda allocation calls are expensive. To avoid reallocating the buffers +// in the case that the amount of data to be exchanged increases, when a new buffer +// is created, an amount of memory greater than the requested size is allocated +// for the new buffer. The factor by which memory is overallocated is determined by +// a data member in Communicate, which can be set and queried at runtime. Only new +// buffers are overallocated. If a buffer is requested with the same ID as a buffer +// that has been previously allocated, the same buffer will be used. If the requested +// size exceeds the buffer size, that buffer will be resized to have exactly +// the requested size. +// +// Currently, the buffer factory is used for application of periodic boundary +// conditions; halo cell exchange along faces, edges, and vertices; as well as +// exchanging particle data between ranks. +// +// Copyright (c) 2021 Paul Scherrer Institut, Villigen PSI, Switzerland +// All rights reserved +// +// This file is part of IPPL. +// +// IPPL is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// You should have received a copy of the GNU General Public License +// along with IPPL. If not, see . +// + +namespace ippl { + + template + Communicate::buffer_type Communicate::getBuffer(int id, + size_type size, int overallocation) { + size *= sizeof(T); + #if __cplusplus > 201703L + if (buffers_m.contains(id)) { + #else + if (buffers_m.find(id) != buffers_m.end()) { + #endif + buffer_type buf = buffers_m[id]; + if (buf->getBufferSize() < size) { + buf->reallocBuffer(size); + } + return buf; + } + buffers_m[id] = std::make_shared(size * + std::max(overallocation, defaultOveralloc_m)); + return buffers_m[id]; + } + +} diff --git a/src/Communicate/CMakeLists.txt b/src/Communicate/CMakeLists.txt index bf02f812e..f0e1e22e5 100644 --- a/src/Communicate/CMakeLists.txt +++ b/src/Communicate/CMakeLists.txt @@ -1,5 +1,6 @@ set (_SRCS Communicate.cpp + Buffers.cpp ) set (_HDRS diff --git a/src/Communicate/Communicate.cpp b/src/Communicate/Communicate.cpp index dc68abcd2..0843d5881 100644 --- a/src/Communicate/Communicate.cpp +++ b/src/Communicate/Communicate.cpp @@ -26,4 +26,16 @@ namespace ippl { Communicate::Communicate(const MPI_Comm& comm) : boost::mpi::communicator(comm, kind_type::comm_duplicate) {} + + void Communicate::irecv(int src, int tag, + archive_type& ar, MPI_Request& request, size_type msize) + { + if (msize > INT_MAX) { + std::cerr << "Message size exceeds range of int" << std::endl; + std::abort(); + } + MPI_Irecv(ar.getBuffer(), msize, + MPI_BYTE, src, tag, *this, &request); + } + } diff --git a/src/Communicate/Communicate.h b/src/Communicate/Communicate.h index 2edd9f9d3..40ceadf93 100644 --- a/src/Communicate/Communicate.h +++ b/src/Communicate/Communicate.h @@ -19,6 +19,11 @@ #define IPPL_COMMUNICATE_H #include +#include + +// For message size check; see below +#include +#include #include "Communicate/Archive.h" #include "Communicate/Tags.h" @@ -39,14 +44,56 @@ namespace ippl { // Attention: only works with default spaces using archive_type = detail::Archive<>; + using buffer_type = std::shared_ptr; + + using size_type = detail::size_type; Communicate(); Communicate(const MPI_Comm& comm = MPI_COMM_WORLD); + /** + * Query the current default overallocation factor + * @return Factor by which new buffers are overallocated by default + */ + int getDefaultOverallocation() const { return defaultOveralloc_m; } - ~Communicate() = default; + /** + * Set the default overallocation factor + * @param factor New overallocation factor for new buffers + */ + void setDefaultOverallocation(int factor); + + /** + * Obtain a buffer of at least the requested size that is associated + * with the given ID, overallocating memory for the buffer if it's new + * @tparam T The datatype to be stored in the buffer; in particular, the size + * is scaled by the size of the data type (default char for when + * the size is already in bytes) + * @param id The numerical ID with which the buffer is associated (allows buffer reuse) + * @param size The minimum size of the buffer, measured in number of elements + * of the provided datatype (if the size is in bytes, the default + * type char should be used) + * @param overallocation The factor by which memory for the buffer should be + * overallocated; only used if the buffer with the given + * ID has not been allocated before; by default, the larger + * value between 1 and the defaultOveralloc_m member + * is used + * @return A shared pointer to the buffer with the requested properties + */ + template + buffer_type getBuffer(int id, size_type size, int overallocation = 1); + /** + * Deletes a buffer + * @param id Buffer ID + */ + void deleteBuffer(int id); + + /** + * Deletes all buffers created by the buffer factory + */ + void deleteAllBuffers(); [[deprecated]] int myNode() const noexcept { @@ -72,76 +119,58 @@ namespace ippl { * \warning Only works with default spaces! */ template - void send(int dest, int tag, Buffer& buffer); - - - /*! - * \warning Only works with default spaces! - */ - template - void recv(int src, int tag, Buffer& buffer); - + void recv(int src, int tag, Buffer& buffer, archive_type& ar, + size_type msize, size_type nrecvs); /*! * \warning Only works with default spaces! */ template - void isend(int dest, int tag, Buffer& buffer, archive_type&, MPI_Request&); - + void isend(int dest, int tag, Buffer& buffer, archive_type&, + MPI_Request&, size_type nsends); /*! * \warning Only works with default spaces! */ - template - void irecv(int src, int tag, Buffer& buffer); + void irecv(int src, int tag, archive_type&, MPI_Request&, size_type msize); + private: + std::map buffers_m; + int defaultOveralloc_m = 1; }; - template - void Communicate::send(int dest, int tag, Buffer& buffer) - { - // Attention: only works with default spaces - archive_type ar; - - buffer.serialize(ar); - - MPI_Send(ar.getBuffer(), ar.getSize(), - MPI_BYTE, dest, tag, *this); - } - - - template - void Communicate::recv(int src, int tag, Buffer& buffer) + void Communicate::recv(int src, int tag, Buffer& buffer, archive_type& ar, + size_type msize, size_type nrecvs) { + // Temporary fix. MPI communication seems to have problems when the + // count argument exceeds the range of int, so large messages should + // be split into smaller messages + if (msize > INT_MAX) { + std::cerr << "Message size exceeds range of int" << std::endl; + std::abort(); + } MPI_Status status; - - MPI_Probe(src, tag, *this, &status); - - int msize = 0; - MPI_Get_count(&status, MPI_BYTE, &msize); - - // Attention: only works with default spaces - archive_type ar(msize); - - - MPI_Recv(ar.getBuffer(), ar.getSize(), + MPI_Recv(ar.getBuffer(), msize, MPI_BYTE, src, tag, *this, &status); - buffer.deserialize(ar); + buffer.deserialize(ar, nrecvs); } - template void Communicate::isend(int dest, int tag, Buffer& buffer, - archive_type& ar, MPI_Request& request) + archive_type& ar, MPI_Request& request, size_type nsends) { - buffer.serialize(ar); - + if (ar.getSize() > INT_MAX) { + std::cerr << "Message size exceeds range of int" << std::endl; + std::abort(); + } + buffer.serialize(ar, nsends); MPI_Isend(ar.getBuffer(), ar.getSize(), MPI_BYTE, dest, tag, *this, &request); } } +#include "Communicate/Buffers.hpp" #endif diff --git a/src/Communicate/Tags.h b/src/Communicate/Tags.h index e7aa827e8..4036f6df6 100644 --- a/src/Communicate/Tags.h +++ b/src/Communicate/Tags.h @@ -100,4 +100,23 @@ namespace ippl { #define IPPL_APP_TAG9 99000 #define IPPL_APP_CYCLE 1000 +// IDs used to identify buffers created using the buffer factory interface +// Periodic boundary conditions +#define IPPL_PERIODIC_BC_SEND 1000 +#define IPPL_PERIODIC_BC_RECV 2000 + +// Halo cells +#define IPPL_HALO_FACE_SEND 3000 +#define IPPL_HALO_FACE_RECV 4000 + +#define IPPL_HALO_EDGE_SEND 5000 +#define IPPL_HALO_EDGE_RECV 6000 + +#define IPPL_HALO_VERTEX_SEND 7000 +#define IPPL_HALO_VERTEX_RECV 8000 + +// Particle spatial layout +#define IPPL_PARTICLE_SEND 9000 +#define IPPL_PARTICLE_RECV 10000 + #endif // TAGS_H diff --git a/src/FFT/FFT.h b/src/FFT/FFT.h index b9724ff89..7b8afdb96 100644 --- a/src/FFT/FFT.h +++ b/src/FFT/FFT.h @@ -7,9 +7,8 @@ // floating-point precision type of the Field (float or double). // Currently, we use heffte for taking the transforms and the class FFT // serves as an interface between IPPL and heffte. In making this interface, -// we have utilized ideas from Cabana library -// https://github.com/ECP-copa/Cabana especially for the temporary -// field with layout right for passing into heffte. +// we have referred Cabana library +// https://github.com/ECP-copa/Cabana. // // Copyright (c) 2021, Sriramkrishnan Muralikrishnan, // Paul Scherrer Institut, Villigen PSI, Switzerland @@ -90,7 +89,7 @@ namespace ippl { using complexType = std::complex; }; template <> - struct HeffteBackendType { + struct HeffteBackendType { using backend = heffte::backend::mkl; using complexType = std::complex; }; @@ -131,7 +130,7 @@ namespace ippl { using heffteBackend = typename detail::HeffteBackendType::backend; using heffteComplex_t = typename detail::HeffteBackendType::complexType; - + using workspace_t = typename heffte::fft3d::template buffer_container; /** Create a new FFT object with the layout for the input Field and * parameters for heffte. @@ -148,6 +147,7 @@ namespace ippl { private: + //using long long = detail::long long; /** setup performs the initialization necessary. @@ -157,6 +157,8 @@ namespace ippl { const FFTParams& params); std::shared_ptr> heffte_m; + workspace_t workspace_m; + }; @@ -169,13 +171,15 @@ namespace ippl { public: typedef FieldLayout Layout_t; - typedef Kokkos::complex Complex_t; typedef Field RealField_t; - typedef Field ComplexField_t; using heffteBackend = typename detail::HeffteBackendType::backend; using heffteComplex_t = typename detail::HeffteBackendType::complexType; + using workspace_t = typename heffte::fft3d_r2c::template buffer_container; + typedef Kokkos::complex Complex_t; + //typedef heffteComplex_t Complex_t; + typedef Field ComplexField_t; /** Create a new FFT object with the layout for the input and output Fields * and parameters for heffte. @@ -192,6 +196,7 @@ namespace ippl { private: + //using long long = detail::long long; /** setup performs the initialization necessary after the transform @@ -205,6 +210,7 @@ namespace ippl { std::shared_ptr> heffte_m; + workspace_t workspace_m; }; diff --git a/src/FFT/FFT.hpp b/src/FFT/FFT.hpp index ebdb75983..e8b7fa8ab 100644 --- a/src/FFT/FFT.hpp +++ b/src/FFT/FFT.hpp @@ -7,9 +7,8 @@ // floating-point precision type of the Field (float or double). // Currently, we use heffte for taking the transforms and the class FFT // serves as an interface between IPPL and heffte. In making this interface, -// we have utilized ideas from Cabana library -// https://github.com/ECP-copa/Cabana especially for the temporary -// field with layout right for passing into heffte. +// we have referred Cabana library. +// https://github.com/ECP-copa/Cabana. // // Copyright (c) 2021, Sriramkrishnan Muralikrishnan, // Paul Scherrer Institut, Villigen PSI, Switzerland @@ -32,6 +31,7 @@ #include "FFT/FFT.h" #include "FieldLayout/FieldLayout.h" #include "Field/BareField.h" +#include "Utility/IpplTimings.h" namespace ippl { @@ -66,7 +66,7 @@ namespace ippl { high.fill(0); /** - * Static cast to long long is necessary, as heffte::box3d requires it + * Static cast to detail::long long (uint64_t) is necessary, as heffte::box3d requires it * like that. */ for(size_t d = 0; d < Dim; ++d) { @@ -88,8 +88,8 @@ namespace ippl { const FFTParams& params) { - heffte::box3d inbox = {low, high}; - heffte::box3d outbox = {low, high}; + heffte::box3d inbox = {low, high}; + heffte::box3d outbox = {low, high}; heffte::plan_options heffteOptions = heffte::default_options(); @@ -100,6 +100,9 @@ namespace ippl { heffte_m = std::make_shared> (inbox, outbox, Ippl::getComm(), heffteOptions); + //heffte::gpu::device_set(Ippl::Comm->rank() % heffte::gpu::device_count()); + workspace_m = workspace_t(heffte_m->size_workspace()); + } @@ -114,11 +117,14 @@ namespace ippl { const int nghost = f.getNghost(); /** - *This copy to a temporary Kokkos view is needed because heffte accepts - *input and output data in layout left by default, whereas - *default Kokkos views can be layout left or right depending on whether - *the device is gpu or cpu. Also the data types which heFFTe accepts are - * different from Kokkos. + *This copy to a temporary Kokkos view is needed because of following + *reasons: + *1) heffte wants the input and output fields without ghost layers + *2) heffte's data types are different than Kokkos::complex + *3) heffte accepts data in layout left (by default) eventhough this + *can be changed during heffte box creation + *Points 2 and 3 are slightly less of a concern and the main one is + *point 1. */ Kokkos::View tempField("tempField", fview.extent(0) - 2*nghost, @@ -149,14 +155,18 @@ namespace ippl { fview(i, j, k).imag()); #endif }); + + + + if ( direction == 1 ) { - heffte_m->forward(tempField.data(), tempField.data(), + heffte_m->forward(tempField.data(), tempField.data(), workspace_m.data(), heffte::scale::full); } else if ( direction == -1 ) { - heffte_m->backward(tempField.data(), tempField.data(), + heffte_m->backward(tempField.data(), tempField.data(), workspace_m.data(), heffte::scale::none); } else @@ -213,10 +223,10 @@ namespace ippl { * 1D FFTs we just have to make the length in other * dimensions to be 1. */ - std::array lowInput; - std::array highInput; - std::array lowOutput; - std::array highOutput; + std::array lowInput; + std::array highInput; + std::array lowOutput; + std::array highOutput; const NDIndex& lDomInput = layoutInput.getLocalNDIndex(); const NDIndex& lDomOutput = layoutOutput.getLocalNDIndex(); @@ -227,7 +237,7 @@ namespace ippl { highOutput.fill(0); /** - * Static cast to long long is necessary, as heffte::box3d requires it + * Static cast to detail::long long (uint64_t) is necessary, as heffte::box3d requires it * like that. */ for(size_t d = 0; d < Dim; ++d) { @@ -240,7 +250,6 @@ namespace ippl { lDomOutput[d].first() - 1); } - setup(lowInput, highInput, lowOutput, highOutput, params); } @@ -257,8 +266,8 @@ namespace ippl { const FFTParams& params) { - heffte::box3d inbox = {lowInput, highInput}; - heffte::box3d outbox = {lowOutput, highOutput}; + heffte::box3d inbox = {lowInput, highInput}; + heffte::box3d outbox = {lowOutput, highOutput}; heffte::plan_options heffteOptions = heffte::default_options(); @@ -269,6 +278,9 @@ namespace ippl { heffte_m = std::make_shared> (inbox, outbox, params.getRCDirection(), Ippl::getComm(), heffteOptions); + + //heffte::gpu::device_set(Ippl::Comm->rank() % heffte::gpu::device_count()); + workspace_m = workspace_t(heffte_m->size_workspace()); } @@ -285,18 +297,21 @@ namespace ippl { const int nghostg = g.getNghost(); /** - *This copy to a temporary Kokkos view is needed because heffte accepts - *input and output data in layout left by default, whereas - *default Kokkos views can be layout left or right depending on whether - *the device is gpu or cpu. Also the data types which heFFTe accepts are - * different from Kokkos. + *This copy to a temporary Kokkos view is needed because of following + *reasons: + *1) heffte wants the input and output fields without ghost layers + *2) heffte's data types are different than Kokkos::complex + *3) heffte accepts data in layout left (by default) eventhough this + *can be changed during heffte box creation + *Points 2 and 3 are slightly less of a concern and the main one is + *point 1. */ - Kokkos::View + Kokkos::View tempFieldf("tempFieldf", fview.extent(0) - 2*nghostf, fview.extent(1) - 2*nghostf, fview.extent(2) - 2*nghostf); - Kokkos::View + Kokkos::View tempFieldg("tempFieldg", gview.extent(0) - 2*nghostg, gview.extent(1) - 2*nghostg, gview.extent(2) - 2*nghostg); @@ -337,14 +352,16 @@ namespace ippl { gview(i, j, k).imag()); #endif }); + + if ( direction == 1 ) { - heffte_m->forward( tempFieldf.data(), tempFieldg.data(), + heffte_m->forward( tempFieldf.data(), tempFieldg.data(), workspace_m.data(), heffte::scale::full ); } else if ( direction == -1 ) { - heffte_m->backward( tempFieldg.data(), tempFieldf.data(), + heffte_m->backward( tempFieldg.data(), tempFieldf.data(), workspace_m.data(), heffte::scale::none ); } else @@ -353,6 +370,7 @@ namespace ippl { "Only 1:forward and -1:backward are allowed as directions"); } + Kokkos::parallel_for("copy to Kokkos f field FFT", mdrange_type({nghostf, nghostf, nghostf}, {fview.extent(0) - nghostf, diff --git a/src/Field/BareField.h b/src/Field/BareField.h index ff96eca45..0c6610172 100644 --- a/src/Field/BareField.h +++ b/src/Field/BareField.h @@ -22,6 +22,7 @@ #include "Expression/IpplExpressions.h" #include "Types/ViewTypes.h" +#include "Types/IpplTypes.h" #include "FieldLayout/FieldLayout.h" @@ -107,7 +108,7 @@ namespace ippl { * @param d the dimension * @returns the number of grid points in the given dimension. */ - int size(unsigned d) const { return owned_m[d].length(); } + detail::size_type size(unsigned d) const { return owned_m[d].length(); } /*! diff --git a/src/Field/BcTypes.h b/src/Field/BcTypes.h index e948adc48..77e1a67af 100644 --- a/src/Field/BcTypes.h +++ b/src/Field/BcTypes.h @@ -31,6 +31,7 @@ #include "Index/NDIndex.h" #include "Types/ViewTypes.h" +#include "Types/IpplTypes.h" #include "Communicate/Archive.h" #include "FieldLayout/FieldLayout.h" #include "Meshes/UniformCartesian.h" @@ -191,7 +192,7 @@ namespace ippl { private: face_neighbor_type faceNeighbors_m; - + detail::FieldBufferData haloData_m; }; } diff --git a/src/Field/BcTypes.hpp b/src/Field/BcTypes.hpp index 1dce90bd3..9f3912d4e 100644 --- a/src/Field/BcTypes.hpp +++ b/src/Field/BcTypes.hpp @@ -274,9 +274,8 @@ namespace ippl { matchtag = Ippl::Comm->following_tag(BC_PARALLEL_PERIODIC_TAG); } - std::vector requests(0); - using archive_type = Communicate::archive_type; - std::vector> archives(0); + using buffer_type = Communicate::buffer_type; + std::vector requests(faceNeighbors_m[face].size()); using HaloCells_t = detail::HaloCells; using range_t = typename HaloCells_t::bound_type; @@ -304,15 +303,15 @@ namespace ippl { } rangeNeighbors.push_back(range); - archives.push_back(std::make_unique()); - requests.resize(requests.size() + 1); - - detail::FieldBufferData fdSend; - halo.pack(range, view, fdSend); + detail::size_type nSends; + halo.pack(range, view, haloData_m, nSends); + + buffer_type buf = Ippl::Comm->getBuffer(IPPL_PERIODIC_BC_SEND + i, nSends); - Ippl::Comm->isend(rank, tag, fdSend, *(archives.back()), - requests.back()); + Ippl::Comm->isend(rank, tag, haloData_m, *buf, + requests[i], nSends); + buf->resetWritePos(); } for (size_t i = 0; i < faceNeighbors_m[face].size(); ++i) { @@ -324,23 +323,20 @@ namespace ippl { range.lo[d] = range.lo[d] + offsetRecv; range.hi[d] = range.hi[d] + offsetRecv; - detail::FieldBufferData fdRecv; - - Kokkos::resize(fdRecv.buffer, - (range.hi[0] - range.lo[0]) * - (range.hi[1] - range.lo[1]) * - (range.hi[2] - range.lo[2])); - - Ippl::Comm->recv(rank, matchtag, fdRecv); + detail::size_type nRecvs = (range.hi[0] - range.lo[0]) * + (range.hi[1] - range.lo[1]) * + (range.hi[2] - range.lo[2]); + buffer_type buf = Ippl::Comm->getBuffer(IPPL_PERIODIC_BC_RECV + i, nRecvs); + Ippl::Comm->recv(rank, matchtag, haloData_m, *buf, nRecvs * sizeof(T), nRecvs); + buf->resetReadPos(); using assign_t = typename HaloCells_t::assign; - halo.template unpack(range, view, fdRecv); + halo.template unpack(range, view, haloData_m); } if (requests.size() > 0) { MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE); - archives.clear(); } } //For all other processors do nothing diff --git a/src/Field/HaloCells.h b/src/Field/HaloCells.h index f34389198..883acb71c 100644 --- a/src/Field/HaloCells.h +++ b/src/Field/HaloCells.h @@ -20,6 +20,7 @@ #include "Index/NDIndex.h" #include "Types/ViewTypes.h" +#include "Types/IpplTypes.h" #include "Communicate/Archive.h" #include "FieldLayout/FieldLayout.h" #include @@ -33,18 +34,17 @@ namespace ippl { struct FieldBufferData { using view_type = typename detail::ViewType::view_type; - void serialize(Archive<>& ar) { - ar << buffer; + void serialize(Archive<>& ar, size_type nsends) { + ar.serialize(buffer, nsends); } - void deserialize(Archive<>& ar) { - ar >> buffer; + void deserialize(Archive<>& ar, size_type nrecvs) { + ar.deserialize(buffer, nrecvs); } view_type buffer; }; - /*! * This class provides the functionality to do field halo exchange. * @file HaloCells.h @@ -64,7 +64,6 @@ namespace ippl { std::array hi; }; - enum SendOrder { HALO_TO_INTERNAL, INTERNAL_TO_HALO @@ -72,7 +71,6 @@ namespace ippl { HaloCells(); - /*! * Send halo data to internal cells. This operation uses * assign_plus functor to assign the data. @@ -93,7 +91,6 @@ namespace ippl { */ void fillHalo(view_type&, const Layout_t* layout, int nghost); - /*! * Pack the field data to be sent into a contiguous array. * @param range the bounds of the subdomain to be sent @@ -102,7 +99,8 @@ namespace ippl { */ void pack(const bound_type& range, const view_type& view, - FieldBufferData& fd); + FieldBufferData& fd, + size_type& nsends); /*! * Unpack the received field data and assign it. @@ -116,7 +114,6 @@ namespace ippl { const view_type& view, FieldBufferData& fd); - /*! * Operator for the unpack function. * This operator is used in case of INTERNAL_TO_HALO. @@ -128,7 +125,6 @@ namespace ippl { } }; - /*! * Operator for the unpack function. * This operator is used in case of HALO_TO_INTERNAL. @@ -139,6 +135,7 @@ namespace ippl { lhs += rhs; } }; + /*! * Obtain the bounds to send / receive. The second domain, i.e., * nd2, is grown by nghost cells in each dimension in order to @@ -209,6 +206,8 @@ namespace ippl { auto makeSubview(const view_type& view, const bound_type& intersect); + FieldBufferData haloData_m; + }; } } diff --git a/src/Field/HaloCells.hpp b/src/Field/HaloCells.hpp index 7532f39a3..29167a085 100644 --- a/src/Field/HaloCells.hpp +++ b/src/Field/HaloCells.hpp @@ -15,9 +15,12 @@ // You should have received a copy of the GNU General Public License // along with IPPL. If not, see . // + #include #include +#include "Communicate/Communicate.h" + namespace ippl { namespace detail { template @@ -26,7 +29,6 @@ namespace ippl { static_assert(Dim == 3, "Dimension must be 3!"); } - template void HaloCells::accumulateHalo(view_type& view, const Layout_t* layout, @@ -70,13 +72,18 @@ namespace ippl { int myRank = Ippl::Comm->rank(); - // send - std::vector requests(0); - using archive_type = Communicate::archive_type; - std::vector> archives(0); + size_t totalRequests = 0; + for (auto& neighbor : neighbors) { + totalRequests += neighbor.size(); + } + + using buffer_type = Communicate::buffer_type; + std::vector requests(totalRequests); int tag = Ippl::Comm->next_tag(HALO_FACE_TAG, HALO_TAG_CYCLE); + const size_t groupCount = neighbors.size(); + size_t requestIndex = 0; for (size_t face = 0; face < neighbors.size(); ++face) { for (size_t i = 0; i < neighbors[face].size(); ++i) { @@ -92,17 +99,16 @@ namespace ippl { lDomains[myRank], nghost); } + size_type nsends; + pack(range, view, haloData_m, nsends); - archives.push_back(std::make_unique()); - requests.resize(requests.size() + 1); - - - FieldBufferData fd; - pack(range, view, fd); - - Ippl::Comm->isend(rank, tag, fd, *(archives.back()), - requests.back()); + buffer_type buf = Ippl::Comm->getBuffer( + IPPL_HALO_FACE_SEND + i * groupCount + face, + nsends); + Ippl::Comm->isend(rank, tag, haloData_m, *buf, + requests[requestIndex++], nsends); + buf->resetWritePos(); } } @@ -122,22 +128,24 @@ namespace ippl { lDomains[myRank], nghost); } - FieldBufferData fd; + size_type nrecvs = (int)((range.hi[0] - range.lo[0]) * + (range.hi[1] - range.lo[1]) * + (range.hi[2] - range.lo[2])); - Kokkos::resize(fd.buffer, - (range.hi[0] - range.lo[0]) * - (range.hi[1] - range.lo[1]) * - (range.hi[2] - range.lo[2])); + buffer_type buf = Ippl::Comm->getBuffer( + IPPL_HALO_FACE_RECV + i * groupCount + face, + nrecvs); - Ippl::Comm->recv(rank, tag, fd); + Ippl::Comm->recv(rank, tag, haloData_m, *buf, + nrecvs * sizeof(T), nrecvs); + buf->resetReadPos(); - unpack(range, view, fd); + unpack(range, view, haloData_m); } } - if (requests.size() > 0) { - MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE); - archives.clear(); + if (totalRequests > 0) { + MPI_Waitall(totalRequests, requests.data(), MPI_STATUSES_IGNORE); } } @@ -155,13 +163,18 @@ namespace ippl { int myRank = Ippl::Comm->rank(); - // send - std::vector requests(0); - using archive_type = Communicate::archive_type; - std::vector> archives(0); + size_t totalRequests = 0; + for (auto& neighbor : neighbors) { + totalRequests += neighbor.size(); + } + + using buffer_type = Communicate::buffer_type; + std::vector requests(totalRequests); int tag = Ippl::Comm->next_tag(HALO_EDGE_TAG, HALO_TAG_CYCLE); + const size_t groupCount = neighbors.size(); + size_t requestIndex = 0; for (size_t edge = 0; edge < neighbors.size(); ++edge) { for (size_t i = 0; i < neighbors[edge].size(); ++i) { @@ -177,16 +190,16 @@ namespace ippl { lDomains[myRank], nghost); } - archives.push_back(std::make_unique()); - requests.resize(requests.size() + 1); - - - FieldBufferData fd; - pack(range, view, fd); + size_type nsends; + pack(range, view, haloData_m, nsends); - Ippl::Comm->isend(rank, tag, fd, *(archives.back()), - requests.back()); + buffer_type buf = Ippl::Comm->getBuffer( + IPPL_HALO_EDGE_SEND + i * groupCount + edge, + nsends); + Ippl::Comm->isend(rank, tag, haloData_m, *buf, + requests[requestIndex++], nsends); + buf->resetWritePos(); } } @@ -206,22 +219,24 @@ namespace ippl { lDomains[myRank], nghost); } - FieldBufferData fd; + size_type nrecvs = (int)((range.hi[0] - range.lo[0]) * + (range.hi[1] - range.lo[1]) * + (range.hi[2] - range.lo[2])); - Kokkos::resize(fd.buffer, - (range.hi[0] - range.lo[0]) * - (range.hi[1] - range.lo[1]) * - (range.hi[2] - range.lo[2])); + buffer_type buf = Ippl::Comm->getBuffer( + IPPL_HALO_EDGE_RECV + i * groupCount + edge, + nrecvs); - Ippl::Comm->recv(rank, tag, fd); + Ippl::Comm->recv(rank, tag, haloData_m, *buf, + nrecvs * sizeof(T), nrecvs); + buf->resetReadPos(); - unpack(range, view, fd); + unpack(range, view, haloData_m); } } - if (requests.size() > 0) { - MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE); - archives.clear(); + if (totalRequests > 0) { + MPI_Waitall(totalRequests, requests.data(), MPI_STATUSES_IGNORE); } } @@ -239,13 +254,12 @@ namespace ippl { int myRank = Ippl::Comm->rank(); - // send - std::vector requests(0); - using archive_type = Communicate::archive_type; - std::vector> archives(0); + using buffer_type = Communicate::buffer_type; + std::vector requests(neighbors.size()); int tag = Ippl::Comm->next_tag(HALO_VERTEX_TAG, HALO_TAG_CYCLE); + size_t requestIndex = 0; for (size_t vertex = 0; vertex < neighbors.size(); ++vertex) { if (neighbors[vertex] < 0) { // we are on a mesh / physical boundary @@ -264,15 +278,16 @@ namespace ippl { lDomains[myRank], nghost); } - archives.push_back(std::make_unique()); - requests.resize(requests.size() + 1); + size_type nsends; + pack(range, view, haloData_m, nsends); + buffer_type buf = Ippl::Comm->getBuffer( + IPPL_HALO_VERTEX_SEND + vertex, + nsends); - FieldBufferData fd; - pack(range, view, fd); - - Ippl::Comm->isend(rank, tag, fd, *(archives.back()), - requests.back()); + Ippl::Comm->isend(rank, tag, haloData_m, *buf, + requests[requestIndex++], nsends); + buf->resetWritePos(); } // receive @@ -294,21 +309,23 @@ namespace ippl { lDomains[myRank], nghost); } - FieldBufferData fd; - - Kokkos::resize(fd.buffer, - (range.hi[0] - range.lo[0]) * - (range.hi[1] - range.lo[1]) * - (range.hi[2] - range.lo[2])); + size_type nrecvs = (int)((range.hi[0] - range.lo[0]) * + (range.hi[1] - range.lo[1]) * + (range.hi[2] - range.lo[2])); + + buffer_type buf = Ippl::Comm->getBuffer( + IPPL_HALO_VERTEX_RECV + vertex, + nrecvs); - Ippl::Comm->recv(rank, tag, fd); + Ippl::Comm->recv(rank, tag, haloData_m, *buf, + nrecvs * sizeof(T), nrecvs); + buf->resetReadPos(); - unpack(range, view, fd); + unpack(range, view, haloData_m); } - if (requests.size() > 0) { - MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE); - archives.clear(); + if (requestIndex > 0) { + MPI_Waitall(requestIndex, requests.data(), MPI_STATUSES_IGNORE); } } @@ -316,13 +333,19 @@ namespace ippl { template void HaloCells::pack(const bound_type& range, const view_type& view, - FieldBufferData& fd) + FieldBufferData& fd, + size_type& nsends) { auto subview = makeSubview(view, range); auto& buffer = fd.buffer; - Kokkos::resize(buffer, subview.extent(0) * subview.extent(1) * subview.extent(2)); + size_t size = subview.size(); + nsends = size; + if (buffer.size() < size) { + int overalloc = Ippl::Comm->getDefaultOverallocation(); + Kokkos::realloc(buffer, size * overalloc); + } using mdrange_type = Kokkos::MDRangePolicy>; Kokkos::parallel_for( @@ -335,10 +358,12 @@ namespace ippl { const size_t j, const size_t k) { - int l = i + j * subview.extent(0) + k * subview.extent(0) * subview.extent(1); + int l = i + j * subview.extent(0) + + k * subview.extent(0) * subview.extent(1); buffer(l) = subview(i, j, k); } ); + Kokkos::fence(); } @@ -366,10 +391,12 @@ namespace ippl { const size_t j, const size_t k) { - int l = i + j * subview.extent(0) + k * subview.extent(0) * subview.extent(1); + int l = i + j * subview.extent(0) + + k * subview.extent(0) * subview.extent(1); op(subview(i, j, k), buffer(l)); } ); + Kokkos::fence(); } @@ -390,8 +417,10 @@ namespace ippl { * Add "+1" to the upper bound since Kokkos loops always to "< extent". */ for (size_t i = 0; i < Dim; ++i) { - intersect.lo[i] = overlap[i].first() - offset[i].first() /*offset*/ + nghost; - intersect.hi[i] = overlap[i].last() - offset[i].first() /*offset*/ + nghost + 1; + intersect.lo[i] = overlap[i].first() - offset[i].first() /*offset*/ + + nghost; + intersect.hi[i] = overlap[i].last() - offset[i].first() /*offset*/ + + nghost + 1; } return intersect; diff --git a/src/Ippl.cpp b/src/Ippl.cpp index 486a01a1b..b8f201d1a 100644 --- a/src/Ippl.cpp +++ b/src/Ippl.cpp @@ -152,6 +152,7 @@ Ippl::Ippl(int& argc, char**& argv, MPI_Comm mpicomm) ///////////////////////////////////////////////////////////////////// // Destructor: need to delete comm library if this is the last IpplInfo Ippl::~Ippl() { + Comm->deleteAllBuffers(); Kokkos::finalize(); } diff --git a/src/Ippl.h b/src/Ippl.h index de263d845..2af1f3c23 100644 --- a/src/Ippl.h +++ b/src/Ippl.h @@ -24,6 +24,7 @@ #include "Communicate/Communicate.h" #include "Utility/Inform.h" +#include "Types/IpplTypes.h" class Ippl; std::ostream& operator<<(std::ostream&, const Ippl&); diff --git a/src/Particle/ParticleAttrib.h b/src/Particle/ParticleAttrib.h index 71b59736b..2e137714a 100644 --- a/src/Particle/ParticleAttrib.h +++ b/src/Particle/ParticleAttrib.h @@ -48,38 +48,57 @@ namespace ippl { using view_type = typename detail::ViewType::view_type; using HostMirror = typename view_type::host_mirror_type; + using size_type = detail::size_type; + // Create storage for M particle attributes. The storage is uninitialized. // New items are appended to the end of the array. - void create(size_t) override; + void create(size_type) override; - void destroy(boolean_view_type, Kokkos::View cc, size_t) override; + /*! + * Particle deletion function. Partition the particles into a valid region + * and an invalid region. + * @param deleteIndex List of indices of invalid particles in the valid region + * @param keepIndex List of indices of valid particles in the invalid region + * @param invalidCount Number of invalid particles in the valid region + */ + void destroy(const Kokkos::View& deleteIndex, + const Kokkos::View& keepIndex, + size_type invalidCount) override; void pack(void*, const Kokkos::View&) const override; - void unpack(void*) override; + void unpack(void*, size_type) override; - void serialize(detail::Archive& ar) override { - ar << dview_m; + void serialize(detail::Archive& ar, size_type nsends) override { + ar.serialize(dview_m, nsends); } - void deserialize(detail::Archive& ar) override { - ar >> dview_m; + void deserialize(detail::Archive& ar, size_type nrecvs) override { + ar.deserialize(dview_m, nrecvs); } virtual ~ParticleAttrib() = default; - size_t size() const { + size_type size() const { return dview_m.extent(0); } - void resize(size_t n) { + size_type packedSize(const size_type count) const { + return count * sizeof(value_type); + } + + void resize(size_type n) { Kokkos::resize(dview_m, n); } + void realloc(size_type n) { + Kokkos::realloc(dview_m, n); + } + void print() { HostMirror hview = Kokkos::create_mirror_view(dview_m); Kokkos::deep_copy(hview, dview_m); - for (size_t i = 0; i < this->size(); ++i) { + for (size_type i = 0; i < *(this->localNum_mp); ++i) { std::cout << hview(i) << std::endl; } } diff --git a/src/Particle/ParticleAttrib.hpp b/src/Particle/ParticleAttrib.hpp index 44678509f..dad8ededf 100644 --- a/src/Particle/ParticleAttrib.hpp +++ b/src/Particle/ParticleAttrib.hpp @@ -28,33 +28,33 @@ // #include "Ippl.h" #include "Communicate/DataTypes.h" +#include "Utility/IpplTimings.h" namespace ippl { template - void ParticleAttrib::create(size_t n) { - size_t current = this->size(); - this->resize(current + n); + void ParticleAttrib::create(size_type n) { + size_type required = *(this->localNum_mp) + n; + if (this->size() < required) { + int overalloc = Ippl::Comm->getDefaultOverallocation(); + this->realloc(required * overalloc); + } } - template - void ParticleAttrib::destroy(boolean_view_type invalidIndex, - Kokkos::View newIndex, size_t n) { - Kokkos::View temp("temp", n); + void ParticleAttrib::destroy(const Kokkos::View& deleteIndex, + const Kokkos::View& keepIndex, + size_type invalidCount) { + // Replace all invalid particles in the valid region with valid + // particles in the invalid region Kokkos::parallel_for("ParticleAttrib::destroy()", - dview_m.extent(0), + invalidCount, KOKKOS_CLASS_LAMBDA(const size_t i) { - if ( !invalidIndex(i) ) { - temp(newIndex(i)) = dview_m(i); - } + dview_m(deleteIndex(i)) = dview_m(keepIndex(i)); }); - this->resize(n); - Kokkos::deep_copy(dview_m, temp); } - template void ParticleAttrib::pack(void* buffer, const Kokkos::View& hash) const @@ -63,7 +63,10 @@ namespace ippl { this_type* buffer_p = static_cast(buffer); auto& view = buffer_p->dview_m; auto size = hash.extent(0); - Kokkos::resize(view, size); + if(view.extent(0) < size) { + int overalloc = Ippl::Comm->getDefaultOverallocation(); + Kokkos::realloc(view, size * overalloc); + } Kokkos::parallel_for( "ParticleAttrib::pack()", @@ -71,23 +74,33 @@ namespace ippl { KOKKOS_CLASS_LAMBDA(const size_t i) { view(i) = dview_m(hash(i)); }); + Kokkos::fence(); + + } template - void ParticleAttrib::unpack(void* buffer) { + void ParticleAttrib::unpack(void* buffer, size_type nrecvs) { using this_type = ParticleAttrib; this_type* buffer_p = static_cast(buffer); auto& view = buffer_p->dview_m; auto size = dview_m.extent(0); - Kokkos::resize(dview_m, size + view.size()); + size_type required = *(this->localNum_mp) + nrecvs; + if(size < required) { + int overalloc = Ippl::Comm->getDefaultOverallocation(); + this->resize(required * overalloc); + } + size_type count = *(this->localNum_mp); Kokkos::parallel_for( "ParticleAttrib::unpack()", - view.extent(0), + nrecvs, KOKKOS_CLASS_LAMBDA(const size_t i) { - dview_m(size + i) = view(i); + dview_m(count + i) = view(i); }); + Kokkos::fence(); + } template @@ -96,7 +109,7 @@ namespace ippl { ParticleAttrib::operator=(T x) { Kokkos::parallel_for("ParticleAttrib::operator=()", - dview_m.extent(0), + *(this->localNum_mp), KOKKOS_CLASS_LAMBDA(const size_t i) { dview_m(i) = x; }); @@ -114,7 +127,7 @@ namespace ippl { capture_type expr_ = reinterpret_cast(expr); Kokkos::parallel_for("ParticleAttrib::operator=()", - dview_m.extent(0), + *(this->localNum_mp), KOKKOS_CLASS_LAMBDA(const size_t i) { dview_m(i) = expr_(i); }); @@ -128,6 +141,8 @@ namespace ippl { const ParticleAttrib< Vector, Properties... >& pp) const { + static IpplTimings::TimerRef scatterTimer = IpplTimings::getTimer("Scatter"); + IpplTimings::startTimer(scatterTimer); typename Field::view_type view = f.getView(); const M& mesh = f.get_mesh(); @@ -145,7 +160,7 @@ namespace ippl { Kokkos::parallel_for( "ParticleAttrib::scatter", - dview_m.extent(0), + *(this->localNum_mp), KOKKOS_CLASS_LAMBDA(const size_t idx) { // find nearest grid point @@ -171,8 +186,12 @@ namespace ippl { Kokkos::atomic_add(&view(i, j, k ), whi[0] * whi[1] * whi[2] * val); } ); + IpplTimings::stopTimer(scatterTimer); + static IpplTimings::TimerRef accumulateHaloTimer = IpplTimings::getTimer("AccumulateHalo"); + IpplTimings::startTimer(accumulateHaloTimer); f.accumulateHalo(); + IpplTimings::stopTimer(accumulateHaloTimer); } @@ -182,8 +201,13 @@ namespace ippl { const ParticleAttrib, Properties...>& pp) { + static IpplTimings::TimerRef fillHaloTimer = IpplTimings::getTimer("FillHalo"); + IpplTimings::startTimer(fillHaloTimer); f.fillHalo(); + IpplTimings::stopTimer(fillHaloTimer); + static IpplTimings::TimerRef gatherTimer = IpplTimings::getTimer("Gather"); + IpplTimings::startTimer(gatherTimer); const typename Field::view_type view = f.getView(); const M& mesh = f.get_mesh(); @@ -201,7 +225,7 @@ namespace ippl { Kokkos::parallel_for( "ParticleAttrib::gather", - size(), + *(this->localNum_mp), KOKKOS_CLASS_LAMBDA(const size_t idx) { // find nearest grid point @@ -214,7 +238,7 @@ namespace ippl { const size_t j = index[1] - lDom[1].first() + nghost; const size_t k = index[2] - lDom[2].first() + nghost; - // scatter + // gather value_type& val = dview_m(idx); val = wlo[0] * wlo[1] * wlo[2] * view(i-1, j-1, k-1) + wlo[0] * wlo[1] * whi[2] * view(i-1, j-1, k ) @@ -226,6 +250,7 @@ namespace ippl { + whi[0] * whi[1] * whi[2] * view(i, j, k ); } ); + IpplTimings::stopTimer(gatherTimer); } @@ -257,7 +282,7 @@ namespace ippl { template \ T ParticleAttrib::name() { \ T temp = 0.0; \ - Kokkos::parallel_reduce("fun", dview_m.extent(0), \ + Kokkos::parallel_reduce("fun", *(this->localNum_mp), \ KOKKOS_CLASS_LAMBDA(const size_t i, T& valL) { \ T myVal = dview_m(i); \ op; \ diff --git a/src/Particle/ParticleAttribBase.h b/src/Particle/ParticleAttribBase.h index 0a25d61f9..b1e2f5fd2 100644 --- a/src/Particle/ParticleAttribBase.h +++ b/src/Particle/ParticleAttribBase.h @@ -28,6 +28,7 @@ #define IPPL_PARTICLE_ATTRIB_BASE_H #include "Types/ViewTypes.h" +#include "Types/IpplTypes.h" #include "Communicate/Archive.h" @@ -39,19 +40,28 @@ namespace ippl { public: typedef typename ViewType::view_type boolean_view_type; - virtual void create(size_t) = 0; + virtual void create(size_type) = 0; - virtual void destroy(boolean_view_type, Kokkos::View, size_t) = 0; + virtual void destroy(const Kokkos::View&, const Kokkos::View&, size_type) = 0; + virtual size_type packedSize(const size_type) const = 0; virtual void pack(void*, const Kokkos::View&) const = 0; - virtual void unpack(void*) = 0; + virtual void unpack(void*, size_type) = 0; - virtual void serialize(Archive& ar) = 0; + virtual void serialize(Archive& ar, size_type nsends) = 0; - virtual void deserialize(Archive& ar) = 0; + virtual void deserialize(Archive& ar, size_type nrecvs) = 0; + + virtual size_type size() const = 0; virtual ~ParticleAttribBase() = default; + + void setParticleCount(size_type& num) { localNum_mp = # } + size_type getParticleCount() const { return *localNum_mp; } + + protected: + const size_type* localNum_mp; }; } } diff --git a/src/Particle/ParticleBase.h b/src/Particle/ParticleBase.h index 6b10fda1c..a08c5ffc2 100644 --- a/src/Particle/ParticleBase.h +++ b/src/Particle/ParticleBase.h @@ -6,9 +6,8 @@ // The user must define a class derived from ParticleBase which describes // what specific data attributes the particle has (e.g., mass or charge). // Each attribute is an instance of a ParticleAttribute class; ParticleBase -// keeps a list of pointers to these attributes, and performs global -// operations on them such as update, particle creation and destruction, -// and inter-processor particle migration. +// keeps a list of pointers to these attributes, and performs particle creation +// and destruction. // // ParticleBase is templated on the ParticleLayout mechanism for the particles. // This template parameter should be a class derived from ParticleLayout. @@ -49,14 +48,6 @@ // This example defines a user class with 3D position and two extra // attributes: a radius rad (double), and a velocity vel (a 3D Vector). // -// After each 'time step' in a calculation, which is defined as a period -// in which the particle positions may change enough to affect the global -// layout, the user must call the 'update' routine, which will move -// particles between processors, etc. After the Nth call to update, a -// load balancing routine will be called instead. The user may set the -// frequency of load balancing (N), or may supply a function to -// determine if load balancing should be done or not. -// // Copyright (c) 2020, Matthias Frey, Paul Scherrer Institut, Villigen PSI, Switzerland // All rights reserved // @@ -74,7 +65,7 @@ #define IPPL_PARTICLE_BASE_H #include "Particle/ParticleLayout.h" - +#include "Types/IpplTypes.h" #include @@ -100,6 +91,8 @@ namespace ippl { using bc_container_type = typename PLayout::bc_container_type; using hash_type = typename detail::ViewType::view_type; + using size_type = detail::size_type; + public: //! view of particle positions particle_position_type R; @@ -107,6 +100,9 @@ namespace ippl { //! view of particle IDs particle_index_type ID; + typename attribute_type::boolean_view_type invalidIndex; + hash_type newIndex; + /*! * If this constructor is used, the user must call 'initialize' with * a layout object in order to use this. @@ -141,14 +137,10 @@ namespace ippl { /*! * @returns processor local number of particles */ - size_t getLocalNum() const { return localNum_m; } - - /*! - * Set the processor local number of particles - * @param nLocal number of particles - */ - void setLocalNum(size_t nLocal) { localNum_m = nLocal; } - + size_type getLocalNum() const { return localNum_m; } + + + void setLocalNum(size_type size) { localNum_m = size; } /*! * @returns particle layout @@ -207,7 +199,7 @@ namespace ippl { * Create nLocal processor local particles * @param nLocal number of local particles to be created */ - void create(size_t nLocal); + void create(size_type nLocal); /*! * Create a new particle with a given ID @@ -219,37 +211,36 @@ namespace ippl { * Create nTotal particles globally, equally distributed among all processors * @param nTotal number of total particles to be created */ - void globalCreate(size_t nTotal); + void globalCreate(size_type nTotal); /*! - * Delete particles. - * @param + * Particle deletion Function. Partition the particles into a valid region + * and an invalid region, + * effectively deleting the invalid particles + * @param invalid View marking which indices are invalid + * @param destroyNum Total number of invalid particles */ - void destroy(); - + void destroy(const Kokkos::View& invalid, const size_type destroyNum); /*! * Serialize to do MPI calls. * @param ar archive */ - void serialize(detail::Archive& ar); + void serialize(detail::Archive& ar, size_type nsends); /*! * Deserialize to do MPI calls. * @param ar archive */ - void deserialize(detail::Archive& ar); - + void deserialize(detail::Archive& ar, size_type nrecvs); /*! - * Redistribute particles among MPI ranks. - * This function calls the underlying particle layout - * routine. + * Determine the total space necessary to store a certain number of particles + * @param count particle number + * @return Total size of a buffer packed with the given number of particles */ -// template - void update(); - + size_type packedSize(const size_type count) const; // protected: @@ -268,7 +259,7 @@ namespace ippl { * @param buffer received */ template - void unpack(Buffer& buffer); + void unpack(Buffer& buffer, size_type nrecvs); private: //! particle layout @@ -276,7 +267,7 @@ namespace ippl { Layout_t* layout_m; //! processor local number of particles - size_t localNum_m; + size_type localNum_m; //! all attributes attribute_container_t attributes_m; @@ -286,6 +277,10 @@ namespace ippl { //! number of MPI ranks index_type numNodes_m; + + //! buffers for particle partitioning + hash_type deleteIndex_m; + hash_type keepIndex_m; }; } diff --git a/src/Particle/ParticleBase.hpp b/src/Particle/ParticleBase.hpp index 3bc1b6c85..8fe4a0a66 100644 --- a/src/Particle/ParticleBase.hpp +++ b/src/Particle/ParticleBase.hpp @@ -6,9 +6,8 @@ // The user must define a class derived from ParticleBase which describes // what specific data attributes the particle has (e.g., mass or charge). // Each attribute is an instance of a ParticleAttribute class; ParticleBase -// keeps a list of pointers to these attributes, and performs global -// operations on them such as update, particle creation and destruction, -// and inter-processor particle migration. +// keeps a list of pointers to these attributes, and performs particle creation +// and destruction. // // ParticleBase is templated on the ParticleLayout mechanism for the particles. // This template parameter should be a class derived from ParticleLayout. @@ -49,14 +48,6 @@ // This example defines a user class with 3D position and two extra // attributes: a radius rad (double), and a velocity vel (a 3D Vector). // -// After each 'time step' in a calculation, which is defined as a period -// in which the particle positions may change enough to affect the global -// layout, the user must call the 'update' routine, which will move -// particles between processors, etc. After the Nth call to update, a -// load balancing routine will be called instead. The user may set the -// frequency of load balancing (N), or may supply a function to -// determine if load balancing should be done or not. -// // Copyright (c) 2020, Matthias Frey, Paul Scherrer Institut, Villigen PSI, Switzerland // All rights reserved // @@ -97,6 +88,7 @@ namespace ippl { void ParticleBase::addAttribute(detail::ParticleAttribBase& pa) { attributes_m.push_back(&pa); + pa.setParticleCount(localNum_m); } template @@ -110,7 +102,7 @@ namespace ippl { template - void ParticleBase::create(size_t nLocal) + void ParticleBase::create(size_type nLocal) { PAssert(layout_m != nullptr); @@ -148,108 +140,125 @@ namespace ippl { } template - void ParticleBase::globalCreate(size_t nTotal) + void ParticleBase::globalCreate(size_type nTotal) { PAssert(layout_m != nullptr); // Compute the number of particles local to each processor - size_t nLocal = nTotal / numNodes_m; + size_type nLocal = nTotal / numNodes_m; const size_t rank = Ippl::Comm->myNode(); - size_t rest = nTotal - nLocal * rank; + size_type rest = nTotal - nLocal * rank; if (rank < rest) ++nLocal; create(nLocal); } - template - void ParticleBase::destroy() { - - /* count the number of particles with ID == -1 and fill - * a boolean view - */ - size_t destroyNum = 0; - Kokkos::View invalidIndex("", localNum_m); - Kokkos::parallel_reduce("Reduce in ParticleBase::destroy()", - localNum_m, - KOKKOS_CLASS_LAMBDA(const size_t i, - size_t& nInvalid) - { - nInvalid += size_t(ID(i) < 0); - invalidIndex(i) = (ID(i) < 0); - }, destroyNum); - + void ParticleBase::destroy(const Kokkos::View& invalid, const size_type destroyNum) { PAssert(destroyNum <= localNum_m); - if (destroyNum == 0) { + // If there aren't any particles to delete, do nothing + if (destroyNum == 0) return; + + // If we're deleting all the particles, there's no point in doing + // anything because the valid region will be empty; we only need to + // update the particle count + if (destroyNum == localNum_m) { + localNum_m = 0; return; } - /* Compute the prefix sum and store the new - * particle indices in newIndex. - */ - Kokkos::View newIndex("newIndex", localNum_m); + // Resize buffers, if necessary + if (deleteIndex_m.size() < destroyNum) { + int overalloc = Ippl::Comm->getDefaultOverallocation(); + Kokkos::realloc(deleteIndex_m, destroyNum * overalloc); + Kokkos::realloc(keepIndex_m, destroyNum * overalloc); + } + + // Reset index buffer + Kokkos::deep_copy(deleteIndex_m, -1); + + auto locDeleteIndex = deleteIndex_m; + auto locKeepIndex = keepIndex_m; + + // Find the indices of the invalid particles in the valid region Kokkos::parallel_scan("Scan in ParticleBase::destroy()", - localNum_m, + localNum_m - destroyNum, KOKKOS_LAMBDA(const size_t i, int& idx, const bool final) { - if (final) { - newIndex(i) = idx; - } - - if (!invalidIndex(i)) { - idx += 1; - } + if (final && invalid(i)) locDeleteIndex(idx) = i; + if (invalid(i)) idx += 1; }); + Kokkos::fence(); + + // Determine the total number of invalid particles in the valid region + size_type maxDeleteIndex = 0; + Kokkos::parallel_reduce("Reduce in ParticleBase::destroy()", destroyNum, + KOKKOS_LAMBDA(const size_t i, size_t& maxIdx) + { + if (locDeleteIndex(i) >= 0 && i > maxIdx) maxIdx = i; + }, Kokkos::Max(maxDeleteIndex)); + + // Find the indices of the valid particles in the invalid region + Kokkos::parallel_scan("Second scan in ParticleBase::destroy()", + Kokkos::RangePolicy(localNum_m - destroyNum, localNum_m), + KOKKOS_LAMBDA(const size_t i, int& idx, const bool final) + { + if (final && !invalid(i)) locKeepIndex(idx) = i; + if (!invalid(i)) idx += 1; + }); + + Kokkos::fence(); localNum_m -= destroyNum; - // delete the invalide attribut indices + // Partition the attributes into valid and invalid regions for (attribute_iterator it = attributes_m.begin(); it != attributes_m.end(); ++it) { - (*it)->destroy(invalidIndex, newIndex, localNum_m); + (*it)->destroy(deleteIndex_m, keepIndex_m, maxDeleteIndex + 1); } } - template - void ParticleBase::serialize(detail::Archive& ar) { + void ParticleBase::serialize(detail::Archive& ar, size_type nsends) { using size_type = typename attribute_container_t::size_type; for (size_type i = 0; i < attributes_m.size(); ++i) { - attributes_m[i]->serialize(ar); + attributes_m[i]->serialize(ar, nsends); } } template - void ParticleBase::deserialize(detail::Archive& ar) { + void ParticleBase::deserialize(detail::Archive& ar, size_type nrecvs) { using size_type = typename attribute_container_t::size_type; for (size_type i = 0; i < attributes_m.size(); ++i) { - attributes_m[i]->deserialize(ar); + attributes_m[i]->deserialize(ar, nrecvs); } } - template -// template - void ParticleBase::update() - { - PAssert(layout_m != nullptr); -// layout_m->update(*this); + detail::size_type ParticleBase::packedSize(const size_type count) const { + size_type total = 0; + // Vector size type + using vsize_t = typename attribute_container_t::size_type; + for (vsize_t i = 0; i < attributes_m.size(); ++i) { + total += attributes_m[i]->packedSize(count); + } + return total; } - template template void ParticleBase::pack(Buffer& buffer, const hash_type& hash) { - using size_type = typename attribute_container_t::size_type; - for (size_type j = 0; j < attributes_m.size(); ++j) { + // Vector size type + using vsize_t = typename attribute_container_t::size_type; + for (vsize_t j = 0; j < attributes_m.size(); ++j) { attributes_m[j]->pack(buffer.getAttribute(j), hash); } } @@ -257,11 +266,13 @@ namespace ippl { template template - void ParticleBase::unpack(Buffer& buffer) + void ParticleBase::unpack(Buffer& buffer, size_type nrecvs) { - using size_type = typename attribute_container_t::size_type; - for (size_type j = 0; j < attributes_m.size(); ++j) { - attributes_m[j]->unpack(buffer.getAttribute(j)); + // Vector size type + using vsize_t = typename attribute_container_t::size_type; + for (vsize_t j = 0; j < attributes_m.size(); ++j) { + attributes_m[j]->unpack(buffer.getAttribute(j), nrecvs); } + localNum_m += nrecvs; } } diff --git a/src/Particle/ParticleLayout.hpp b/src/Particle/ParticleLayout.hpp index b1034e4f9..06851ccbb 100644 --- a/src/Particle/ParticleLayout.hpp +++ b/src/Particle/ParticleLayout.hpp @@ -62,17 +62,17 @@ namespace ippl { switch (bcs_m[face]) { case BC::PERIODIC: Kokkos::parallel_for("Periodic BC", - R.getView().extent(0), + R.getParticleCount(), PeriodicBC(R.getView(), nr, d, isUpper)); break; case BC::REFLECTIVE: Kokkos::parallel_for("Reflective BC", - R.getView().extent(0), + R.getParticleCount(), ReflectiveBC(R.getView(), nr, d, isUpper)); break; case BC::SINK: Kokkos::parallel_for("Sink BC", - R.getView().extent(0), + R.getParticleCount(), SinkBC(R.getView(), nr, d, isUpper)); break; case BC::NO: diff --git a/src/Particle/ParticleSpatialLayout.h b/src/Particle/ParticleSpatialLayout.h index 6e3812449..d32335749 100644 --- a/src/Particle/ParticleSpatialLayout.h +++ b/src/Particle/ParticleSpatialLayout.h @@ -11,6 +11,14 @@ // in which case a grid is selected based on an even distribution of // particles among processors. // +// After each 'time step' in a calculation, which is defined as a period +// in which the particle positions may change enough to affect the global +// layout, the user must call the 'update' routine, which will move +// particles between processors, etc. After the Nth call to update, a +// load balancing routine will be called instead. The user may set the +// frequency of load balancing (N), or may supply a function to +// determine if load balancing should be done or not. +// // Copyright (c) 2020, Paul Scherrer Institut, Villigen PSI, Switzerland // All rights reserved // @@ -32,6 +40,8 @@ #include "Particle/ParticleLayout.h" #include "Particle/ParticleBase.h" +#include "Types/IpplTypes.h" + #include "Region/RegionLayout.h" namespace ippl { @@ -52,6 +62,8 @@ namespace ippl { using bool_type = typename detail::ViewType::view_type; using RegionLayout_t = detail::RegionLayout; + using size_type = detail::size_type; + public: // constructor: this one also takes a Mesh ParticleSpatialLayout(FieldLayout&, Mesh&); @@ -59,9 +71,10 @@ namespace ippl { ParticleSpatialLayout() : detail::ParticleLayout() { } ~ParticleSpatialLayout() = default; + //~ParticleSpatialLayout() {} template - void update(/*ParticleBase>*/BufferType& pdata); + void update(BufferType& pdata, BufferType& buffer); const RegionLayout_t& getRegionLayout() const { return rlayout_m; } diff --git a/src/Particle/ParticleSpatialLayout.hpp b/src/Particle/ParticleSpatialLayout.hpp index 816bd1d40..303262df8 100644 --- a/src/Particle/ParticleSpatialLayout.hpp +++ b/src/Particle/ParticleSpatialLayout.hpp @@ -11,6 +11,14 @@ // in which case a grid is selected based on an even distribution of // particles among processors. // +// After each 'time step' in a calculation, which is defined as a period +// in which the particle positions may change enough to affect the global +// layout, the user must call the 'update' routine, which will move +// particles between processors, etc. After the Nth call to update, a +// load balancing routine will be called instead. The user may set the +// frequency of load balancing (N), or may supply a function to +// determine if load balancing should be done or not. +// // Copyright (c) 2020, Paul Scherrer Institut, Villigen PSI, Switzerland // All rights reserved // @@ -27,6 +35,7 @@ #include #include #include +#include "Utility/IpplTimings.h" namespace ippl { @@ -35,33 +44,38 @@ namespace ippl { FieldLayout& fl, Mesh& mesh) : rlayout_m(fl, mesh) - { } + {} template template void ParticleSpatialLayout::update( - /*ParticleBase>*/BufferType& pdata) + BufferType& pdata, BufferType& buffer) { + static IpplTimings::TimerRef ParticleBCTimer = IpplTimings::getTimer("ParticleBC"); + IpplTimings::startTimer(ParticleBCTimer); this->applyBC(pdata.R, rlayout_m.getDomain()); + IpplTimings::stopTimer(ParticleBCTimer); + static IpplTimings::TimerRef ParticleUpdateTimer = IpplTimings::getTimer("ParticleUpdate"); + IpplTimings::startTimer(ParticleUpdateTimer); int nRanks = Ippl::Comm->size(); if (nRanks < 2) { - // delete invalidated particles - pdata.destroy(); return; } /* particle MPI exchange: * 1. figure out which particles need to go where - * 2. fill send buffer - * 3. send / receive particles - * 4. delete invalidated particles + * 2. fill send buffer and send particles + * 3. delete invalidated particles + * 4. receive particles */ - size_t localnum = pdata.getLocalNum(); + static IpplTimings::TimerRef locateTimer = IpplTimings::getTimer("locateParticles"); + IpplTimings::startTimer(locateTimer); + size_type localnum = pdata.getLocalNum(); // 1st step @@ -76,89 +90,113 @@ namespace ippl { bool_type invalid("invalid", localnum); locateParticles(pdata, ranks, invalid); + IpplTimings::stopTimer(locateTimer); - /* - * 2nd step - */ + // 2nd step // figure out how many receives + static IpplTimings::TimerRef preprocTimer = IpplTimings::getTimer("SendPreprocess"); + IpplTimings::startTimer(preprocTimer); MPI_Win win; - std::vector nRecvs(nRanks, 0); - MPI_Win_create(nRecvs.data(), nRanks*sizeof(int), sizeof(int), + std::vector nRecvs(nRanks, 0); + MPI_Win_create(nRecvs.data(), nRanks*sizeof(size_type), sizeof(size_type), MPI_INFO_NULL, *Ippl::Comm, &win); - std::vector nSends(nRanks, 0); + std::vector nSends(nRanks, 0); MPI_Win_fence(0, win); for (int rank = 0; rank < nRanks; ++rank) { if (rank == Ippl::Comm->rank()) { - // we do not need to send to ourself + // we do not need to send to ourselves continue; } nSends[rank] = numberOfSends(rank, ranks); - MPI_Put(nSends.data() + rank, 1, MPI_INT, rank, Ippl::Comm->rank(), - 1, MPI_INT, win); + MPI_Put(nSends.data() + rank, 1, MPI_LONG_LONG_INT, rank, Ippl::Comm->rank(), + 1, MPI_LONG_LONG_INT, win); } MPI_Win_fence(0, win); + IpplTimings::stopTimer(preprocTimer); + static IpplTimings::TimerRef sendTimer = IpplTimings::getTimer("ParticleSend"); + IpplTimings::startTimer(sendTimer); // send std::vector requests(0); - using archive_type = Communicate::archive_type; - std::vector> archives(0); + + using buffer_type = Communicate::buffer_type; int tag = Ippl::Comm->next_tag(P_SPATIAL_LAYOUT_TAG, P_LAYOUT_CYCLE); + int sends = 0; for (int rank = 0; rank < nRanks; ++rank) { if (nSends[rank] > 0) { hash_type hash("hash", nSends[rank]); fillHash(rank, ranks, hash); - archives.push_back(std::make_unique()); requests.resize(requests.size() + 1); - BufferType buffer(pdata.getLayout()); - buffer.create(nSends[rank]); - pdata.pack(buffer, hash); + size_type bufSize = pdata.packedSize(nSends[rank]); + + buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARTICLE_SEND + sends, bufSize); - Ippl::Comm->isend(rank, tag, buffer, *(archives.back()), - requests.back()); + Ippl::Comm->isend(rank, tag, buffer, *buf, + requests.back(), nSends[rank]); + buf->resetWritePos(); + + ++sends; } } + IpplTimings::stopTimer(sendTimer); // 3rd step + static IpplTimings::TimerRef destroyTimer = IpplTimings::getTimer("ParticleDestroy"); + IpplTimings::startTimer(destroyTimer); + + size_type invalidCount = 0; + Kokkos::parallel_reduce( + "set/count invalid", + localnum, + KOKKOS_LAMBDA(const size_t i, size_type& nInvalid) { + if (invalid(i)) { + pdata.ID(i) = -1; + nInvalid += 1; + } + }, invalidCount); + Kokkos::fence(); + + pdata.destroy(invalid, invalidCount); + Kokkos::fence(); + + IpplTimings::stopTimer(destroyTimer); + static IpplTimings::TimerRef recvTimer = IpplTimings::getTimer("ParticleRecv"); + IpplTimings::startTimer(recvTimer); + // 4th step + int recvs = 0; for (int rank = 0; rank < nRanks; ++rank) { if (nRecvs[rank] > 0) { - BufferType buffer(pdata.getLayout()); - buffer.create(nRecvs[rank]); + size_type bufSize = pdata.packedSize(nRecvs[rank]); + buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARTICLE_RECV + recvs, bufSize); - Ippl::Comm->recv(rank, tag, buffer); + Ippl::Comm->recv(rank, tag, buffer, *buf, bufSize, nRecvs[rank]); + buf->resetReadPos(); - pdata.unpack(buffer); + pdata.unpack(buffer, nRecvs[rank]); + + ++recvs; } + } + IpplTimings::stopTimer(recvTimer); + + IpplTimings::startTimer(sendTimer); if (requests.size() > 0) { MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE); - archives.clear(); } + IpplTimings::stopTimer(sendTimer); - // create space for received particles - int nTotalRecvs = std::accumulate(nRecvs.begin(), nRecvs.end(), 0); - pdata.setLocalNum(localnum + nTotalRecvs); - - Kokkos::parallel_for( - "set invalid", - localnum, - KOKKOS_LAMBDA(const size_t i) { - if (invalid(i)) { - pdata.ID(i) = -1; - } - }); - - // 4th step - pdata.destroy(); + IpplTimings::stopTimer(ParticleUpdateTimer); } @@ -170,30 +208,22 @@ namespace ippl { { auto& positions = pdata.R.getView(); typename RegionLayout_t::view_type Regions = rlayout_m.getdLocalRegions(); - using size_type = typename RegionLayout_t::view_type::size_type; + using view_size_t = typename RegionLayout_t::view_type::size_type; using mdrange_type = Kokkos::MDRangePolicy>; int myRank = Ippl::Comm->rank(); Kokkos::parallel_for( "ParticleSpatialLayout::locateParticles()", mdrange_type({0, 0}, - {ranks.extent(0), Regions.extent(0)}), - KOKKOS_CLASS_LAMBDA(const size_t i, const size_type j) { - bool x_bool = false; - bool y_bool = false; - bool z_bool = false; - if((positions(i)[0] >= Regions(j)[0].min()) && - (positions(i)[0] <= Regions(j)[0].max())) { - x_bool = true; - } - if((positions(i)[1] >= Regions(j)[1].min()) && - (positions(i)[1] <= Regions(j)[1].max())) { - y_bool = true; - } - if((positions(i)[2] >= Regions(j)[2].min()) && - (positions(i)[2] <= Regions(j)[2].max())) { - z_bool = true; - } - if(x_bool && y_bool && z_bool){ + {ranks.extent(0), Regions.extent(0)}), + KOKKOS_LAMBDA(const size_t i, const view_size_t j) { + bool xyz_bool = false; + xyz_bool = ((positions(i)[0] >= Regions(j)[0].min()) && + (positions(i)[0] <= Regions(j)[0].max()) && + (positions(i)[1] >= Regions(j)[1].min()) && + (positions(i)[1] <= Regions(j)[1].max()) && + (positions(i)[2] >= Regions(j)[2].min()) && + (positions(i)[2] <= Regions(j)[2].max())); + if(xyz_bool){ ranks(i) = j; invalid(i) = (myRank != ranks(i)); } @@ -223,6 +253,7 @@ namespace ippl { idx += 1; } }); + Kokkos::fence(); } @@ -235,11 +266,12 @@ namespace ippl { Kokkos::parallel_reduce( "ParticleSpatialLayout::numberOfSends()", ranks.extent(0), - KOKKOS_CLASS_LAMBDA(const size_t i, + KOKKOS_LAMBDA(const size_t i, size_t& num) { num += size_t(rank == ranks(i)); }, nSends); + Kokkos::fence(); return nSends; } } diff --git a/src/Solver b/src/Solver index 23065aba0..07d49b7a0 160000 --- a/src/Solver +++ b/src/Solver @@ -1 +1 @@ -Subproject commit 23065aba0233e5d2321661bee93274b2b7effa0b +Subproject commit 07d49b7a0d66b6be58fac002c9dd58ee34e8bc0e diff --git a/src/Types/IpplTypes.h b/src/Types/IpplTypes.h new file mode 100644 index 000000000..f03c69be8 --- /dev/null +++ b/src/Types/IpplTypes.h @@ -0,0 +1,30 @@ +// +// IpplTypes +// Typedefs for basic types used throughout IPPL +// +// Copyright (c) 2021 Paul Scherrer Institut, Villigen PSI, Switzerland +// All rights reserved +// +// This file is part of IPPL. +// +// IPPL is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// You should have received a copy of the GNU General Public License +// along with IPPL. If not, see . +// + +#ifndef IPPL_TYPES_H +#define IPPL_TYPES_H + +#include + +namespace ippl { + namespace detail { + typedef uint64_t size_type; + } +} + +#endif diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 3466d23d8..82311fc75 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -15,7 +15,6 @@ endmacro() # endforeach() add_subdirectory (kokkos) -add_subdirectory (barefield) add_subdirectory (vector) add_subdirectory (field) if (ENABLE_FFT) diff --git a/test/FFT/TestFFTRC.cpp b/test/FFT/TestFFTRC.cpp index e1a7e343e..2edd27ed3 100644 --- a/test/FFT/TestFFTRC.cpp +++ b/test/FFT/TestFFTRC.cpp @@ -12,7 +12,7 @@ int main(int argc, char *argv[]) { constexpr unsigned int dim = 3; - std::array pt = {16, 16, 16}; + std::array pt = {4, 4, 4}; ippl::Index Iinput(pt[0]); ippl::Index Jinput(pt[1]); ippl::Index Kinput(pt[2]); diff --git a/test/barefield/CMakeLists.txt b/test/barefield/CMakeLists.txt deleted file mode 100644 index cf4b667c5..000000000 --- a/test/barefield/CMakeLists.txt +++ /dev/null @@ -1,33 +0,0 @@ -file (RELATIVE_PATH _relPath "${CMAKE_SOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}") -message (STATUS "Adding index test found in ${_relPath}") - -include_directories ( - ${CMAKE_SOURCE_DIR}/src -) - -link_directories ( - ${CMAKE_CURRENT_SOURCE_DIR} - ${Boost_LIBRARY_DIRS} - ${Kokkos_DIR}/.. -) - -set (IPPL_LIBS ippl ${MPI_CXX_LIBRARIES} ${Boost_LIBRARIES}) -set (COMPILE_FLAGS ${OPAL_CXX_FLAGS}) - -add_executable (TestBareField TestBareField.cpp) -target_link_libraries (TestBareField ${IPPL_LIBS}) - -add_executable (TestBareField2 TestBareField2.cpp) -target_link_libraries ( TestBareField2 ${IPPL_LIBS}) - -add_executable (TestBareField3 TestBareField3.cpp) -target_link_libraries (TestBareField3 ${IPPL_LIBS}) - -# vi: set et ts=4 sw=4 sts=4: - -# Local Variables: -# mode: cmake -# cmake-tab-width: 4 -# indent-tabs-mode: nil -# require-final-newline: nil -# End: diff --git a/test/barefield/TestBareField.cpp b/test/barefield/TestBareField.cpp deleted file mode 100644 index 4e60ad7b9..000000000 --- a/test/barefield/TestBareField.cpp +++ /dev/null @@ -1,43 +0,0 @@ -#include "Ippl.h" - -#include -#include - -// #include - -int main(int argc, char *argv[]) { - - Ippl ippl(argc,argv); - - constexpr unsigned int dim = 3; - - ippl::Index I(10); - ippl::NDIndex owned(I, I, I); - - - ippl::e_dim_tag allParallel[dim]; // Specifies SERIAL, PARALLEL dims - for (unsigned int d=0; d layout(owned, allParallel); - - typedef ippl::BareField bfield_t; - bfield_t barefield(layout); - - barefield = 1.0; - - barefield.write(); - - barefield = 2 * ((barefield + barefield) * (barefield + barefield)) - / (barefield + 5.0 * barefield + barefield) - barefield; - - barefield.write(); - - -// const char* name = typeid(2 * barefield + barefield).name(); -// std::cout << boost::core::demangle(name) << std::endl; - - return 0; -} - diff --git a/test/barefield/TestBareField2.cpp b/test/barefield/TestBareField2.cpp deleted file mode 100644 index 596ac492c..000000000 --- a/test/barefield/TestBareField2.cpp +++ /dev/null @@ -1,42 +0,0 @@ -#include "Ippl.h" - -#include -#include - -int main(int argc, char *argv[]) { - - Ippl ippl(argc,argv); - - constexpr unsigned int dim = 3; - - ippl::Index I(10); - ippl::NDIndex owned(I, I, I); - - - ippl::e_dim_tag allParallel[dim]; // Specifies SERIAL, PARALLEL dims - for (unsigned int d=0; d layout(owned, allParallel); - - typedef ippl::Vector vector_t; - typedef ippl::BareField bfield_t; - typedef ippl::BareField bfield_s_t; - bfield_t barefield(layout); - bfield_s_t barefield_s(layout); - - barefield = 1.0; - - barefield.write(); - - //barefield = 2 * ((barefield + barefield) * (barefield + barefield)) / (barefield + barefield + barefield) - barefield; - - //barefield = 5.0 * cross(barefield, barefield); - - barefield_s = 5.0 * dot(barefield, barefield); - - barefield_s.write(); - - return 0; -} diff --git a/test/barefield/TestBareField3.cpp b/test/barefield/TestBareField3.cpp deleted file mode 100644 index fa2efe6f2..000000000 --- a/test/barefield/TestBareField3.cpp +++ /dev/null @@ -1,51 +0,0 @@ -#include "Ippl.h" - -#include -#include - -// #include - -int main(int argc, char *argv[]) { - - Ippl ippl(argc,argv); - - constexpr unsigned int dim = 3; - - ippl::Index I(10); - ippl::NDIndex owned(I, I, I); - - - ippl::e_dim_tag allParallel[dim]; // Specifies SERIAL, PARALLEL dims - for (unsigned int d=0; d layout(owned, allParallel); - - typedef ippl::BareField bfield_t; - bfield_t barefield(layout); - - double pi = acos(-1.0); - - barefield = pi/4; - - //barefield.write(); - - barefield = fabs(7.0 * (sin(barefield) * cos(barefield))/(tan(barefield) * acos(barefield)) - - exp(barefield) + erf(barefield) + (asin(barefield) * cosh(barefield)) / (atan(barefield) - * sinh(barefield)) + tanh(barefield) * log(barefield) - - log10(barefield) * sqrt(barefield) + floor(barefield) * ceil(barefield)); - - barefield.write(); - - barefield = -barefield; - - barefield.write(); - - -// const char* name = typeid(2 * barefield + barefield).name(); -// std::cout << boost::core::demangle(name) << std::endl; - - return 0; -} - diff --git a/test/field/CMakeLists.txt b/test/field/CMakeLists.txt index 7ee937faf..ddf968896 100644 --- a/test/field/CMakeLists.txt +++ b/test/field/CMakeLists.txt @@ -14,33 +14,9 @@ link_directories ( set (IPPL_LIBS ippl) set (COMPILE_FLAGS ${OPAL_CXX_FLAGS}) -add_executable (TestField TestField.cpp) +add_executable (TestLaplace TestLaplace.cpp) target_link_libraries ( - TestField - ${IPPL_LIBS} - ${MPI_CXX_LIBRARIES} - ${Boost_LIBRARIES} -) - -add_executable (TestField2 TestField2.cpp) -target_link_libraries ( - TestField2 - ${IPPL_LIBS} - ${MPI_CXX_LIBRARIES} - ${Boost_LIBRARIES} -) - -add_executable (TestField3 TestField3.cpp) -target_link_libraries ( - TestField3 - ${IPPL_LIBS} - ${MPI_CXX_LIBRARIES} - ${Boost_LIBRARIES} -) - -add_executable (TestFieldReductions TestFieldReductions.cpp) -target_link_libraries ( - TestFieldReductions + TestLaplace ${IPPL_LIBS} ${MPI_CXX_LIBRARIES} ${Boost_LIBRARIES} diff --git a/test/field/TestField.cpp b/test/field/TestField.cpp deleted file mode 100644 index d348df3c3..000000000 --- a/test/field/TestField.cpp +++ /dev/null @@ -1,42 +0,0 @@ -#include "Ippl.h" - -#include -#include - -int main(int argc, char *argv[]) { - - Ippl ippl(argc,argv); - - constexpr unsigned int dim = 3; - - int pt = 4; - ippl::Index I(pt); - ippl::NDIndex owned(I, I, I); - - ippl::e_dim_tag allParallel[dim]; // Specifies SERIAL, PARALLEL dims - for (unsigned int d=0; d layout(owned,allParallel); - - double dx = 1.0 / double(pt); - ippl::Vector hx = {dx, dx, dx}; - ippl::Vector origin = {0, 0, 0}; - ippl::UniformCartesian mesh(owned, hx, origin); - - - typedef ippl::Field field_type; - typedef ippl::Field, dim> vector_field_type; - - field_type field(mesh, layout); - - vector_field_type vfield(mesh, layout); - - field = 1.0; - - vfield = grad(field); - - vfield.write(); - - return 0; -} diff --git a/test/field/TestField2.cpp b/test/field/TestField2.cpp deleted file mode 100644 index 74318607e..000000000 --- a/test/field/TestField2.cpp +++ /dev/null @@ -1,52 +0,0 @@ -#include "Ippl.h" - -#include -#include - -int main(int argc, char *argv[]) { - - Ippl ippl(argc,argv); - - constexpr unsigned int dim = 3; - - - int pt = 4; - ippl::Index I(pt); - ippl::NDIndex owned(I, I, I); - - ippl::e_dim_tag allParallel[dim]; // Specifies SERIAL, PARALLEL dims - for (unsigned int d=0; d layout(owned,allParallel); - - double dx = 1.0 / double(pt); - ippl::Vector hx = {dx, dx, dx}; - ippl::Vector origin = {0, 0, 0}; - ippl::UniformCartesian mesh(owned, hx, origin); - - - typedef ippl::Field field_type; - - field_type field(mesh, layout); - - double pi = acos(-1.0); - - field = pi/4; - - //field.write(); - - field = fabs(7.0 * (sin(field) * cos(field))/(tan(field) * acos(field)) - - exp(field) + erf(field) + (asin(field) * cosh(field)) / (atan(field) - * sinh(field)) + tanh(field) * log(field) - - log10(field) * sqrt(field) + floor(field) * ceil(field) + exp(2)); - - field.write(); - - field = -field; - - field.write(); - - return 0; -} diff --git a/test/field/TestFieldBC.cpp b/test/field/TestFieldBC.cpp index 02a745d1f..4a1f32a6c 100644 --- a/test/field/TestFieldBC.cpp +++ b/test/field/TestFieldBC.cpp @@ -1,3 +1,4 @@ +// Tests the application of various kinds of boundary conditions on fields #include "Ippl.h" #include @@ -52,7 +53,7 @@ int main(int argc, char *argv[]) { //std::cout << bcField << std::endl; std::cout << layout << std::endl; - field_type field(mesh, layout, 2); + field_type field(mesh, layout, 1); field = 1.0; @@ -74,9 +75,9 @@ int main(int argc, char *argv[]) { const size_t ig = i + lDom[0].first() - nghost; //const size_t jg = j + lDom[1].first() - nghost; //const size_t kg = k + lDom[2].first() - nghost; - double x = origin[0] + (ig + 0.5) * hx[0]; - //double y = origin[1] + (jg + 0.5) * hx[1]; - //double z = origin[2] + (kg + 0.5) * hx[2]; + double x = (ig + 0.5) * hx[0] + origin[0]; + //double y = (jg + 0.5) * hx[1]; + //double z = (kg + 0.5) * hx[2]; //view(i, j, k) = 3.0*pow(x,1) + 4.0*pow(y,1) + 5.0*pow(z,1); //view(i, j, k) = sin(pi * x) * cos(pi * y) * exp(z); diff --git a/test/field/TestFieldReductions.cpp b/test/field/TestFieldReductions.cpp deleted file mode 100644 index 051d2f6b0..000000000 --- a/test/field/TestFieldReductions.cpp +++ /dev/null @@ -1,84 +0,0 @@ -#include "Ippl.h" - -#include -#include -#include - -int main(int argc, char *argv[]) { - - Ippl ippl(argc,argv); - - constexpr unsigned int dim = 3; - - - int pt = 4; - ippl::Index I(pt); - ippl::NDIndex owned(I, I, I); - - ippl::e_dim_tag allParallel[dim]; // Specifies SERIAL, PARALLEL dims - for (unsigned int d=0; d layout(owned,allParallel); - - //Unit box - double dx = 1.0 / double(pt); - ippl::Vector hx = {dx, dx, dx}; - ippl::Vector origin = {0, 0, 0}; - ippl::UniformCartesian mesh(owned, hx, origin); - - double pi = acos(-1.0); - - typedef ippl::Field field_type; - - field_type field; - - field.initialize(mesh, layout); - - typename field_type::view_type& view = field.getView(); - - - Kokkos::parallel_for("Assign lfield", - Kokkos::MDRangePolicy>({0, 0, 0}, - {view.extent(0), - view.extent(1), - view.extent(2)}), - KOKKOS_LAMBDA(const int i, const int j, const int k){ - - double x = (i + 0.5) * hx[0] + origin[0]; - double y = (j + 0.5) * hx[1] + origin[1]; - double z = (k + 0.5) * hx[2] + origin[2]; - - //view(i, j, k) = 3.0 * x + 4.0 * y + 5.0 * z; - view(i, j, k) = sin(pi * x) * cos(pi * y) * exp(z); - - - }); - - double total = field.sum(); - double max = field.max(); - double min = field.min(); - double product = field.prod(); - - field = fabs(field); - double normInf = field.max(); - - //const char* name = typeid(fabs(field)).name(); - - //std::cout << name << std::endl; - //std::cout << boost::core::demangle(name) << std::endl; - field = pow(field,2); - double norm2 = sqrt(field.sum()); - - - std::cout << "total: " << total << std::endl; - std::cout << "max: " << max << std::endl; - std::cout << "min: " << min << std::endl; - std::cout << "Product: " << product << std::endl; - std::cout << "norm2: " << norm2 << std::endl; - std::cout << "normInf: " << normInf << std::endl; - - - return 0; -} diff --git a/test/field/TestHalo.cpp b/test/field/TestHalo.cpp index 775cf5ce5..ea2fc5134 100644 --- a/test/field/TestHalo.cpp +++ b/test/field/TestHalo.cpp @@ -1,3 +1,5 @@ +// Tests the halo cell update functions to verify that the +// correct data is copied to halo cells of neighboring MPI ranks #include "Ippl.h" #include @@ -8,11 +10,15 @@ int main(int argc, char *argv[]) { Ippl ippl(argc,argv); + Inform msg("PreallocationHalo"); + + static IpplTimings::TimerRef mainTimer = IpplTimings::getTimer("mainTimer"); + IpplTimings::startTimer(mainTimer); constexpr unsigned int dim = 3; // std::array pt = {8, 7, 13}; - std::array pt = {8, 8, 8}; + std::array pt = {2048, 2048, 2048}; ippl::Index I(pt[0]); ippl::Index J(pt[1]); ippl::Index K(pt[2]); @@ -40,8 +46,8 @@ int main(int argc, char *argv[]) { field_type field(mesh, layout); field = Ippl::Comm->rank(); - int myRank = Ippl::Comm->rank(); - int nRanks = Ippl::Comm->size(); + int myRank = Ippl::Comm->rank(); + int nRanks = Ippl::Comm->size(); for (int rank = 0; rank < nRanks; ++rank) { @@ -83,61 +89,66 @@ int main(int argc, char *argv[]) { Ippl::Comm->barrier(); } -// auto& domains = layout.getHostLocalDomains(); -// -// for (int rank = 0; rank < Ippl::Comm->size(); ++rank) { -// -// if (rank == Ippl::Comm->rank()) { -// auto& faces = layout.getFaceNeighbors(); -// auto& edges = layout.getEdgeNeighbors(); -// auto& vertices = layout.getVertexNeighbors(); -// -// int nFaces = 0, nEdges = 0, nVertices = 0; -// for (size_t i = 0; i < faces.size(); ++i) { -// nFaces += faces[i].size(); -// } -// -// for (size_t i = 0; i < edges.size(); ++i) { -// nEdges += edges[i].size(); -// } -// -// for (size_t i = 0; i < vertices.size(); ++i) { -// nVertices += (vertices[i] > -1) ? 1: 0; -// } -// -// -// std::cout << "rank " << rank << ": " << std::endl -// << " - domain: " << domains[rank] << std::endl -// << " - faces: " << nFaces << std::endl -// << " - edges: " << nEdges << std::endl -// << " - vertices: " << nVertices << std::endl -// << "--------------------------------------" << std::endl; -// } -// Ippl::Comm->barrier(); -// } - - - -// layout.findNeighbors(2); - - - field.fillHalo(); -// field.accumulateHalo(); -// -// // std::cout << std::endl; -// -// field.fillLocalHalo(10.0); -// + auto& domains = layout.getHostLocalDomains(); + + for (int rank = 0; rank < Ippl::Comm->size(); ++rank) { + + if (rank == Ippl::Comm->rank()) { + auto& faces = layout.getFaceNeighbors(); + auto& edges = layout.getEdgeNeighbors(); + auto& vertices = layout.getVertexNeighbors(); + + int nFaces = 0, nEdges = 0, nVertices = 0; + for (size_t i = 0; i < faces.size(); ++i) { + nFaces += faces[i].size(); + } + + for (size_t i = 0; i < edges.size(); ++i) { + nEdges += edges[i].size(); + } + + for (size_t i = 0; i < vertices.size(); ++i) { + nVertices += (vertices[i] > -1) ? 1: 0; + } + + + std::cout << "rank " << rank << ": " << std::endl + << " - domain: " << domains[rank] << std::endl + << " - faces: " << nFaces << std::endl + << " - edges: " << nEdges << std::endl + << " - vertices: " << nVertices << std::endl + << "--------------------------------------" << std::endl; + } + Ippl::Comm->barrier(); + } + + + + int nsteps = 300; + + for (int nt=0; nt < nsteps; ++nt) { + + static IpplTimings::TimerRef fillHaloTimer = IpplTimings::getTimer("fillHalo"); + IpplTimings::startTimer(fillHaloTimer); + field.accumulateHalo(); + Ippl::Comm->barrier(); + field.fillHalo(); + Ippl::Comm->barrier(); + IpplTimings::stopTimer(fillHaloTimer); + msg << "Update: " << nt+1 << endl; + } for (int rank = 0; rank < nRanks; ++rank) { if (rank == Ippl::Comm->rank()) { std::ofstream out("field_" + std::to_string(rank) + ".dat", std::ios::out); - std::cout << field.getOwned().grow(1) << std::endl; field.write(out); out.close(); } Ippl::Comm->barrier(); } + IpplTimings::stopTimer(mainTimer); + IpplTimings::print(); + IpplTimings::print(std::string("timing.dat")); return 0; } diff --git a/test/field/TestField3.cpp b/test/field/TestLaplace.cpp similarity index 74% rename from test/field/TestField3.cpp rename to test/field/TestLaplace.cpp index b639a8cc9..ce17101bb 100644 --- a/test/field/TestField3.cpp +++ b/test/field/TestLaplace.cpp @@ -1,3 +1,4 @@ +// Tests the Laplacian on a scalar field #include "Ippl.h" #include @@ -17,16 +18,16 @@ int main(int argc, char *argv[]) { ippl::e_dim_tag decomp[dim]; // Specifies SERIAL, PARALLEL dims for (unsigned int d=0; d layout(owned,decomp); - //Unit box - double dx = 1.0 / double(pt); + //Unit box + double dx = 2.0 / double(pt); ippl::Vector hx = {dx, dx, dx}; - ippl::Vector origin = {0, 0, 0}; + ippl::Vector origin = {-1.0, -1.0, -1.0}; ippl::UniformCartesian mesh(owned, hx, origin); double pi = acos(-1.0); @@ -39,13 +40,15 @@ int main(int argc, char *argv[]) { field_type field(mesh, layout); field_type Lap(mesh, layout); + field_type Lap_exact(mesh, layout); vector_field_type vfield(mesh, layout); typedef ippl::Field Field_t; typename Field_t::view_type& view = field.getView(); - typedef ippl::BConds bc_type; - typedef ippl::BConds vbc_type; + typename Field_t::view_type& view_exact = Lap_exact.getView(); + typedef ippl::BConds bc_type; + typedef ippl::BConds vbc_type; bc_type bcField; vbc_type vbcField; @@ -55,7 +58,7 @@ int main(int argc, char *argv[]) { bcField[i] = std::make_shared>(i); vbcField[i] = std::make_shared>(i); } - ////Lower Y face + ////Lower Y face //bcField[2] = std::make_shared>(2); //vbcField[2] = std::make_shared>(2); ////Higher Y face @@ -76,26 +79,27 @@ int main(int argc, char *argv[]) { const int nghost = field.getNghost(); using mdrange_type = Kokkos::MDRangePolicy>; - Kokkos::parallel_for("Assign field", + Kokkos::parallel_for("Assign field", mdrange_type({nghost, nghost, nghost}, {view.extent(0) - nghost, view.extent(1) - nghost, view.extent(2) - nghost}), - KOKKOS_LAMBDA(const int i, - const int j, + KOKKOS_LAMBDA(const int i, + const int j, const int k) { //local to global index conversion const size_t ig = i + lDom[0].first() - nghost; const size_t jg = j + lDom[1].first() - nghost; const size_t kg = k + lDom[2].first() - nghost; - double x = origin[0] + (ig + 0.5) * hx[0]; - double y = origin[1] + (jg + 0.5) * hx[1]; - double z = origin[2] + (kg + 0.5) * hx[2]; + double x = (ig + 0.5) * hx[0] + origin[0]; + double y = (jg + 0.5) * hx[1] + origin[1]; + double z = (kg + 0.5) * hx[2] + origin[2]; //view(i, j, k) = 3.0*pow(x,1) + 4.0*pow(y,1) + 5.0*pow(z,1); //view(i, j, k) = sin(pi * x) * cos(pi * y) * exp(z); view(i, j, k) = sin(pi * x) * sin(pi * y) * sin(pi * z); + view_exact(i, j, k) = -3.0 * pi * pi * sin(pi * x) * sin(pi * y) * sin(pi * z); }); @@ -114,16 +118,15 @@ int main(int argc, char *argv[]) { Lap = laplace(field); - int nRanks = Ippl::Comm->size(); - for (int rank = 0; rank < nRanks; ++rank) { - if (rank == Ippl::Comm->rank()) { - std::ofstream out("LaplacePeriodicBCSerial_" + - std::to_string(rank) + - ".dat", std::ios::out); - Lap.write(out); - out.close(); - } - Ippl::Comm->barrier(); + Lap = Lap - Lap_exact; + + Lap = pow(Lap, 2); + Lap_exact = pow(Lap_exact, 2); + double error = sqrt(Lap.sum()); + error = error/sqrt(Lap_exact.sum()); + + if (Ippl::Comm->rank() == 0) { + std::cout << "Error: " << error << std::endl; } diff --git a/test/particle/CMakeLists.txt b/test/particle/CMakeLists.txt index cddb440c6..681ecda64 100644 --- a/test/particle/CMakeLists.txt +++ b/test/particle/CMakeLists.txt @@ -14,26 +14,20 @@ link_directories ( set (IPPL_LIBS ippl ${MPI_CXX_LIBRARIES} ${Boost_LIBRARIES}) set (COMPILE_FLAGS ${OPAL_CXX_FLAGS}) -add_executable (TestIpplParticleBase TestIpplParticleBase.cpp) -target_link_libraries (TestIpplParticleBase ${IPPL_LIBS}) - add_executable (TestScatter TestScatter.cpp) target_link_libraries (TestScatter ${IPPL_LIBS}) add_executable (TestGather TestGather.cpp) target_link_libraries (TestGather ${IPPL_LIBS}) -add_executable (TestExpression TestExpression.cpp) -target_link_libraries (TestExpression ${IPPL_LIBS}) - add_executable (PIC3d PIC3d.cpp) target_link_libraries (PIC3d ${IPPL_LIBS}) -add_executable (TestParticleBConds TestParticleBConds.cpp) -target_link_libraries (TestParticleBConds ${IPPL_LIBS}) +add_executable (PenningTrap PenningTrap.cpp) +target_link_libraries (PenningTrap ${IPPL_LIBS}) -add_executable (TestParticleSpatialLayout TestParticleSpatialLayout.cpp) -target_link_libraries (TestParticleSpatialLayout ${IPPL_LIBS}) +add_executable (UniformPlasmaTest UniformPlasmaTest.cpp) +target_link_libraries (UniformPlasmaTest ${IPPL_LIBS}) add_executable (benchmarkParticleUpdate benchmarkParticleUpdate.cpp) target_link_libraries (benchmarkParticleUpdate ${IPPL_LIBS}) diff --git a/test/particle/PIC3d.cpp b/test/particle/PIC3d.cpp index d199f7572..dabc7033e 100644 --- a/test/particle/PIC3d.cpp +++ b/test/particle/PIC3d.cpp @@ -117,11 +117,7 @@ class ChargedParticles : public ippl::ParticleBase { setBCAllPeriodic(); } - void update() { - PLayout& layout = this->getLayout(); - layout.update(*this); - } - + ~ChargedParticles() {} void gatherStatistics(unsigned int totalP, int iteration) { @@ -309,6 +305,8 @@ int main(int argc, char *argv[]){ Inform msg(argv[0]); Inform msg2all(argv[0],INFORM_ALL_NODES); + Ippl::Comm->setDefaultOverallocation(3); + ippl::Vector nr = { std::atoi(argv[1]), @@ -403,9 +401,11 @@ int main(int argc, char *argv[]){ P->P = 0.0; IpplTimings::stopTimer(particleCreation); + bunch_type bunchBuffer(PL); + static IpplTimings::TimerRef UpdateTimer = IpplTimings::getTimer("ParticleUpdate"); IpplTimings::startTimer(UpdateTimer); - P->update(); + PL.update(*P, bunchBuffer); IpplTimings::stopTimer(UpdateTimer); msg << "particles created and initial conditions assigned " << endl; @@ -433,7 +433,7 @@ int main(int argc, char *argv[]){ IpplTimings::startTimer(UpdateTimer); - P->update(); + PL.update(*P, bunchBuffer); IpplTimings::stopTimer(UpdateTimer); //scatter the charge onto the underlying grid diff --git a/test/particle/PenningTrap.cpp b/test/particle/PenningTrap.cpp new file mode 100644 index 000000000..0137e9671 --- /dev/null +++ b/test/particle/PenningTrap.cpp @@ -0,0 +1,738 @@ +// Penning Trap +// +// Usage: +// srun ./PenningTrap 128 128 128 10000 300 FFT Gaussian --info 10 +// +// Copyright (c) 2020, Sriramkrishnan Muralikrishnan, +// Paul Scherrer Institut, Villigen PSI, Switzerland +// All rights reserved +// +// This file is part of IPPL. +// +// IPPL is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// You should have received a copy of the GNU General Public License +// along with IPPL. If not, see . +// +#include "Ippl.h" +#include +#include +#include +#include +#include +#include + +#include +#include "Utility/IpplTimings.h" +#include "Solver/FFTPeriodicPoissonSolver.h" + +// dimension of our positions +constexpr unsigned Dim = 3; + +// some typedefs +typedef ippl::ParticleSpatialLayout PLayout_t; +typedef ippl::UniformCartesian Mesh_t; +typedef ippl::FieldLayout FieldLayout_t; + + +template +using Vector = ippl::Vector; + +template +using Field = ippl::Field; + +template +using ParticleAttrib = ippl::ParticleAttrib; + +typedef Vector Vector_t; +typedef Field Field_t; +typedef Field VField_t; +typedef ippl::FFTPeriodicPoissonSolver Solver_t; + +double pi = acos(-1.0); + +void dumpVTK(VField_t& E, int nx, int ny, int nz, int iteration, + double dx, double dy, double dz) { + + + typename VField_t::view_type::host_mirror_type host_view = E.getHostMirror(); + + Kokkos::deep_copy(host_view, E.getView()); + std::ofstream vtkout; + vtkout.precision(10); + vtkout.setf(std::ios::scientific, std::ios::floatfield); + + std::stringstream fname; + fname << "data/ef_"; + fname << std::setw(4) << std::setfill('0') << iteration; + fname << ".vtk"; + + // open a new data file for this iteration + // and start with header + vtkout.open(fname.str().c_str(), std::ios::out); + vtkout << "# vtk DataFile Version 2.0" << std::endl; + vtkout << "PenningTrap" << std::endl; + vtkout << "ASCII" << std::endl; + vtkout << "DATASET STRUCTURED_POINTS" << std::endl; + vtkout << "DIMENSIONS " << nx+3 << " " << ny+3 << " " << nz+3 << std::endl; + vtkout << "ORIGIN " << -2*dx << " " << -2*dy << " " << -2*dz << std::endl; + vtkout << "SPACING " << dx << " " << dy << " " << dz << std::endl; + vtkout << "CELL_DATA " << (nx+2)*(ny+2)*(nz+2) << std::endl; + + vtkout << "VECTORS E-Field float" << std::endl; + for (int z=0; z +class ChargedParticles : public ippl::ParticleBase { +public: + VField_t E_m; + Field_t rho_m; + + Vector nr_m; + + ippl::e_dim_tag decomp_m[Dim]; + + Vector_t hr_m; + Vector_t rmin_m; + Vector_t rmax_m; + + double Q_m; + + std::string stype_m; + + std::shared_ptr solver_mp; + + double time_m; + + double rhoNorm_m; + + +public: + ParticleAttrib q; // charge + typename ippl::ParticleBase::particle_position_type P; // particle velocity + typename ippl::ParticleBase::particle_position_type E; // electric field at particle position + + + /* + This constructor is mandatory for all derived classes from + ParticleBase as the update function invokes this + */ + ChargedParticles(PLayout& pl) + : ippl::ParticleBase(pl) + { + // register the particle attributes + this->addAttribute(q); + this->addAttribute(P); + this->addAttribute(E); + } + + ChargedParticles(PLayout& pl, + Vector_t hr, + Vector_t rmin, + Vector_t rmax, + ippl::e_dim_tag decomp[Dim], + double Q) + : ippl::ParticleBase(pl) + , hr_m(hr) + , rmin_m(rmin) + , rmax_m(rmax) + , Q_m(Q) + { + // register the particle attributes + this->addAttribute(q); + this->addAttribute(P); + this->addAttribute(E); + setupBCs(); + for (unsigned int i = 0; i < Dim; i++) + decomp_m[i]=decomp[i]; + } + + ~ChargedParticles(){ } + + void setupBCs() { + setBCAllPeriodic(); + } + + void gatherStatistics(unsigned int totalP, int iteration) { + + std::cout << "Rank " << Ippl::Comm->rank() << " has " + << (double)this->getLocalNum()/totalP*100.0 + << "percent of the total particles " << std::endl; + } + + void gatherCIC() { + + gather(this->E, E_m, this->R); + + } + + void scatterCIC(unsigned int totalP, int iteration, Vector_t& hrField) { + + + Inform m("scatter "); + + rho_m = 0.0; + scatter(q, rho_m, this->R); + + static IpplTimings::TimerRef sumTimer = IpplTimings::getTimer("Check"); + IpplTimings::startTimer(sumTimer); + double Q_grid = rho_m.sum(); + + unsigned int Total_particles = 0; + unsigned int local_particles = this->getLocalNum(); + + MPI_Reduce(&local_particles, &Total_particles, 1, + MPI_UNSIGNED, MPI_SUM, 0, Ippl::getComm()); + + double rel_error = std::fabs((Q_m-Q_grid)/Q_m); + m << "Rel. error in charge conservation = " << rel_error << endl; + + if(Ippl::Comm->rank() == 0) { + //if((Total_particles != totalP) || (rel_error > 1e-10)) { + if((Total_particles != totalP)) { + std::cout << "Total particles in the sim. " << totalP + << " " << "after update: " + << Total_particles << std::endl; + std::cout << "Total particles not matched in iteration: " + << iteration << std::endl; + std::cout << "Rel. error in charge conservation: " + << rel_error << std::endl; + exit(1); + } + } + + + + rho_m = rho_m / (hrField[0] * hrField[1] * hrField[2]); + + rhoNorm_m = norm(rho_m); + IpplTimings::stopTimer(sumTimer); + + //dumpVTK(rho_m,nr_m[0],nr_m[1],nr_m[2],iteration,hrField[0],hrField[1],hrField[2]); + + //rho = rho_e - rho_i + rho_m = rho_m - (Q_m/((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]))); + } + + void initSolver() { + + Inform m("solver "); + if(stype_m == "FFT") + initFFTSolver(); + else + m << "No solver matches the argument" << endl; + + } + + void initFFTSolver() { + ippl::SolverParams sp; + sp.add("output_type",1); + + ippl::FFTParams fftParams; + + fftParams.setAllToAll( false ); + fftParams.setPencils( true ); + fftParams.setReorder( false ); + fftParams.setRCDirection( 0 ); + + solver_mp = std::make_shared(fftParams); + + solver_mp->setParameters(sp); + + solver_mp->setRhs(&rho_m); + + solver_mp->setLhs(&E_m); + } + + + + void dumpData() { + + auto Pview = P.getView(); + + double Energy = 0.0; + + Kokkos::parallel_reduce("Particle Energy", this->getLocalNum(), + KOKKOS_LAMBDA(const int i, double& valL){ + double myVal = dot(Pview(i), Pview(i)).apply(); + valL += myVal; + }, Kokkos::Sum(Energy)); + + Energy *= 0.5; + double gEnergy = 0.0; + + MPI_Reduce(&Energy, &gEnergy, 1, + MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); + + + const int nghostE = E_m.getNghost(); + auto Eview = E_m.getView(); + Vector_t normE; + using mdrange_type = Kokkos::MDRangePolicy>; + + for (unsigned d=0; d(temp)); + double globaltemp = 0.0; + MPI_Reduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); + normE[d] = sqrt(globaltemp); + } + + + + if(Ippl::Comm->rank() == 0) { + std::ofstream csvout; + csvout.precision(10); + csvout.setf(std::ios::scientific, std::ios::floatfield); + + std::stringstream fname; + fname << "data/ParticleField_"; + fname << Ippl::Comm->size(); + fname << ".csv"; + csvout.open(fname.str().c_str(), std::ios::out | std::ofstream::app); + + if(time_m == 0.0) { + csvout << "time, Kinetic energy, Rho_norm2, Ex_norm2, Ey_norm2, Ez_norm2" << std::endl; + } + + csvout << time_m << " " + << gEnergy << " " + << rhoNorm_m << " " + << normE[0] << " " + << normE[1] << " " + << normE[2] << std::endl; + + csvout.close(); + } + + Ippl::Comm->barrier(); + + } + +private: + void setBCAllPeriodic() { + + this->setParticleBC(ippl::BC::PERIODIC); + } + +}; + +int main(int argc, char *argv[]){ + Ippl ippl(argc, argv); + Inform msg("PenningTrap"); + Inform msg2all(argv[0],INFORM_ALL_NODES); + + Ippl::Comm->setDefaultOverallocation(1); + + + auto start = std::chrono::high_resolution_clock::now(); + ippl::Vector nr = { + std::atoi(argv[1]), + std::atoi(argv[2]), + std::atoi(argv[3]) + }; + + static IpplTimings::TimerRef mainTimer = IpplTimings::getTimer("mainTimer"); + IpplTimings::startTimer(mainTimer); + const unsigned long long int totalP = std::atol(argv[4]); + const unsigned int nt = std::atoi(argv[5]); + + msg << "Penning Trap " + << endl + << "nt " << nt << " Np= " + << totalP << " grid = " << nr + << endl; + + + using bunch_type = ChargedParticles; + + std::unique_ptr P; + + ippl::NDIndex domain; + for (unsigned i = 0; i< Dim; i++) { + domain[i] = ippl::Index(nr[i]); + } + + ippl::e_dim_tag decomp[Dim]; + for (unsigned d = 0; d < Dim; ++d) { + decomp[d] = ippl::PARALLEL; + } + + // create mesh and layout objects for this problem domain + Vector_t rmin(0.0); + Vector_t rmax(20.0); + double dx = rmax[0] / nr[0]; + double dy = rmax[1] / nr[1]; + double dz = rmax[2] / nr[2]; + + Vector_t hr = {dx, dy, dz}; + Vector_t origin = {rmin[0], rmin[1], rmin[2]}; + const double dt = 0.05;//size of timestep + + Mesh_t mesh(domain, hr, origin); + FieldLayout_t FL(domain, decomp); + PLayout_t PL(FL, mesh); + + double Q = -1562.5; + double Bext = 5.0; + P = std::make_unique(PL,hr,rmin,rmax,decomp,Q); + + std::string dist; + dist = argv[7]; + + P->nr_m = nr; + unsigned long long int nloc = totalP / Ippl::Comm->size(); + + int rest = (int) (totalP - nloc * Ippl::Comm->size()); + + if ( Ippl::Comm->rank() < rest ) + ++nloc; + + static IpplTimings::TimerRef particleCreation = IpplTimings::getTimer("particlesCreation"); + IpplTimings::startTimer(particleCreation); + + Vector_t length = rmax - rmin; + + std::vector mu(2*Dim); + std::vector sd(2*Dim); + std::vector states(2*Dim); + + + for (unsigned d = 0; d func; + + func = [&](double& x, + double& y, + double& z) + { + double val_conf = std::pow((x - mu[0])/sd[0], 2) + + std::pow((y - mu[1])/sd[1], 2) + + std::pow((z - mu[2])/sd[2], 2); + + return std::exp(-0.5 * val_conf); + }; + + const ippl::NDIndex& lDom = FL.getLocalNDIndex(); + std::vector Rmin(Dim), Rmax(Dim); + for (unsigned d = 0; d dist_uniform(0.0, 1.0); + std::uniform_real_distribution distribution_x(Rmin[0], Rmax[0]); + std::uniform_real_distribution distribution_y(Rmin[1], Rmax[1]); + std::uniform_real_distribution distribution_z(Rmin[2], Rmax[2]); + + std::mt19937_64 eng[3*Dim]; + for (unsigned i = 0; i < 3*Dim; ++i) { + eng[i].seed(42 + i * Dim); + eng[i].discard( nloc * Ippl::Comm->rank()); + } + + + double sum_f = 0.0; + std::size_t ip = 0; + double sum_coord=0.0; + P->create(nloc); + typename bunch_type::particle_position_type::HostMirror R_host = P->R.getHostMirror(); + typename bunch_type::particle_position_type::HostMirror P_host = P->P.getHostMirror(); + typename ParticleAttrib::HostMirror q_host = P->q.getHostMirror(); + for (unsigned long long int i = 0; i< nloc; i++) { + + states[0] = distribution_x(eng[0]); + states[1] = distribution_y(eng[1]); + states[2] = distribution_z(eng[2]); + + for (unsigned istate = 0; istate < Dim; ++istate) { + double u1 = dist_uniform(eng[istate*2 + Dim]); + double u2 = dist_uniform(eng[istate*2+1 + Dim]); + states[istate + Dim] = fabs(sd[istate + Dim] * (std::sqrt(-2.0 * std::log(u1)) * + std::cos(2.0 * pi * u2)) + mu[istate + Dim]); + } + + double f = func(states[0], states[1], states[2]); + f = f * states[3] * states[4] * states[5]; + + //if( f > 1e-9 ) { + for (unsigned d = 0; drank() == 0) { + std::cout << "Sum Coord: " << std::setprecision(16) << global_sum_coord << std::endl; + } + //P->setLocalNum(ip); + Kokkos::deep_copy(P->R.getView(), R_host); + Kokkos::deep_copy(P->P.getView(), P_host); + Kokkos::deep_copy(P->q.getView(), q_host); + + P->q = P->q * (P->Q_m/Total_sum); + + unsigned int local_particles = P->getLocalNum(); + MPI_Reduce(&local_particles, &Total_particles, 1, + MPI_UNSIGNED, MPI_SUM, 0, Ippl::getComm()); + msg << "#particles: " << Total_particles << endl; + double PPC = Total_particles/((double)(nr[0] * nr[1] * nr[2])); + msg << "#PPC: " << PPC << endl; + + } + else if(dist == "Gaussian") { + + P->create(nloc); + std::mt19937_64 eng[4*Dim]; + for (unsigned i = 0; i < 4*Dim; ++i) { + eng[i].seed(42 + i * Dim); + eng[i].discard( nloc * Ippl::Comm->rank()); + } + + + std::uniform_real_distribution dist_uniform(0.0, 1.0); + + typename bunch_type::particle_position_type::HostMirror R_host = P->R.getHostMirror(); + typename bunch_type::particle_position_type::HostMirror P_host = P->P.getHostMirror(); + + double sum_coord=0.0; + for (unsigned long long int i = 0; i< nloc; i++) { + for (unsigned istate = 0; istate < 2*Dim; ++istate) { + double u1 = dist_uniform(eng[istate*2]); + double u2 = dist_uniform(eng[istate*2+1]); + states[istate] = sd[istate] * (std::sqrt(-2.0 * std::log(u1)) * + std::cos(2.0 * pi * u2)) + mu[istate]; + } + for (unsigned d = 0; drank() == 0) { + std::cout << "Sum Coord: " << std::setprecision(16) << global_sum_coord << std::endl; + } + + Kokkos::deep_copy(P->R.getView(), R_host); + Kokkos::deep_copy(P->P.getView(), P_host); + P->q = P->Q_m/totalP; + + Total_particles = totalP; + } + IpplTimings::stopTimer(particleCreation); + + P->E_m.initialize(mesh, FL); + P->rho_m.initialize(mesh, FL); + + bunch_type bunchBuffer(PL); + static IpplTimings::TimerRef FirstUpdateTimer = IpplTimings::getTimer("FirstUpdate"); + IpplTimings::startTimer(FirstUpdateTimer); + //P->update(); + PL.update(*P, bunchBuffer); + IpplTimings::stopTimer(FirstUpdateTimer); + + msg << "particles created and initial conditions assigned " << endl; + + P->stype_m = argv[6]; + P->initSolver(); + + P->time_m = 0.0; + + P->scatterCIC(Total_particles, 0, hr); + + static IpplTimings::TimerRef SolveTimer = IpplTimings::getTimer("Solve"); + IpplTimings::startTimer(SolveTimer); + P->solver_mp->solve(); + IpplTimings::stopTimer(SolveTimer); + + P->gatherCIC(); + + static IpplTimings::TimerRef dumpDataTimer = IpplTimings::getTimer("dumpData"); + IpplTimings::startTimer(dumpDataTimer); + P->dumpData(); + IpplTimings::stopTimer(dumpDataTimer); + + // begin main timestep loop + msg << "Starting iterations ..." << endl; + for (unsigned int it=0; itR.getView(); + auto Pview = P->P.getView(); + auto Eview = P->E.getView(); + double V0 = 30*rmax[2]; + Kokkos::parallel_for("Kick1", P->getLocalNum(), + KOKKOS_LAMBDA(const size_t j){ + double Eext_x = -(Rview(j)[0] - (rmax[0]/2)) * (V0/(2*pow(rmax[2],2))); + double Eext_y = -(Rview(j)[1] - (rmax[1]/2)) * (V0/(2*pow(rmax[2],2))); + double Eext_z = (Rview(j)[2] - (rmax[2]/2)) * (V0/(pow(rmax[2],2))); + + Pview(j)[0] -= 0.5 * dt * ((Eview(j)[0] + Eext_x) + Pview(j)[1] * Bext); + Pview(j)[1] -= 0.5 * dt * ((Eview(j)[1] + Eext_y) - Pview(j)[0] * Bext); + Pview(j)[2] -= 0.5 * dt * (Eview(j)[2] + Eext_z); + }); + IpplTimings::stopTimer(PTimer); + + //drift + static IpplTimings::TimerRef RTimer = IpplTimings::getTimer("positionPush"); + IpplTimings::startTimer(RTimer); + P->R = P->R + dt * P->P; + Ippl::Comm->barrier(); + IpplTimings::stopTimer(RTimer); + + //Since the particles have moved spatially update them to correct processors + PL.update(*P, bunchBuffer); + + //scatter the charge onto the underlying grid + P->scatterCIC(Total_particles, it+1, hr); + + //Field solve + IpplTimings::startTimer(SolveTimer); + P->solver_mp->solve(); + IpplTimings::stopTimer(SolveTimer); + + // gather E field + P->gatherCIC(); + + //kick + IpplTimings::startTimer(PTimer); + auto R2view = P->R.getView(); + auto P2view = P->P.getView(); + auto E2view = P->E.getView(); + Kokkos::parallel_for("Kick2", P->getLocalNum(), + KOKKOS_LAMBDA(const size_t j){ + double Eext_x = -(R2view(j)[0] - (rmax[0]/2)) * (V0/(2*pow(rmax[2],2))); + double Eext_y = -(R2view(j)[1] - (rmax[1]/2)) * (V0/(2*pow(rmax[2],2))); + double Eext_z = (R2view(j)[2] - (rmax[2]/2)) * (V0/(pow(rmax[2],2))); + + P2view(j)[0] -= 0.5 * dt * ((E2view(j)[0] + Eext_x) + P2view(j)[1] * Bext); + P2view(j)[1] -= 0.5 * dt * ((E2view(j)[1] + Eext_y) - P2view(j)[0] * Bext); + P2view(j)[2] -= 0.5 * dt * (E2view(j)[2] + Eext_z); + }); + IpplTimings::stopTimer(PTimer); + + P->time_m += dt; + IpplTimings::startTimer(dumpDataTimer); + P->dumpData(); + IpplTimings::stopTimer(dumpDataTimer); + msg << "Finished iteration: " << it << " time: " << P->time_m << endl; + } + + msg << "Penning Trap: End." << endl; + IpplTimings::stopTimer(mainTimer); + IpplTimings::print(); + IpplTimings::print(std::string("timing.dat")); + auto end = std::chrono::high_resolution_clock::now(); + + std::chrono::duration time_chrono = std::chrono::duration_cast>(end - start); + std::cout << "Elapsed time: " << time_chrono.count() << std::endl; + + return 0; +} diff --git a/test/particle/TestExpression.cpp b/test/particle/TestExpression.cpp deleted file mode 100644 index 3b8500518..000000000 --- a/test/particle/TestExpression.cpp +++ /dev/null @@ -1,26 +0,0 @@ -#include "Ippl.h" - -int main(int argc, char *argv[]) { - Ippl ippl(argc, argv); - - typedef ippl::detail::ParticleLayout playout_type; - typedef ippl::ParticleBase bunch_type; - - playout_type pl; - - bunch_type p(pl); - - p.create(10); - - p.ID.print(); - - p.ID = 1; - - p.ID.print(); - - p.ID = p.ID + 1; - - p.ID.print(); - - return 0; -} diff --git a/test/particle/TestGather.cpp b/test/particle/TestGather.cpp index 4016bed6b..a86af4a3a 100644 --- a/test/particle/TestGather.cpp +++ b/test/particle/TestGather.cpp @@ -8,58 +8,59 @@ struct Bunch : public ippl::ParticleBase Bunch(PLayout& playout) : ippl::ParticleBase(playout) { - this->addAttribute(E); + this->addAttribute(Q); } + ~Bunch() {} + typedef ippl::ParticleAttrib charge_container_type; - charge_container_type E; + charge_container_type Q; }; int main(int argc, char *argv[]) { Ippl ippl(argc, argv); - typedef ippl::detail::ParticleLayout playout_type; + typedef ippl::ParticleSpatialLayout playout_type; typedef Bunch bunch_type; - playout_type pl; + int pt = 512; + ippl::Index I(pt); + ippl::NDIndex<3> owned(I, I, I); - bunch_type bunch(pl); + ippl::e_dim_tag allParallel[3]; // Specifies SERIAL, PARALLEL dims + for (unsigned int d=0; d<3; d++) + allParallel[d] = ippl::PARALLEL; + + // all parallel layout, standard domain, normal axis order + ippl::FieldLayout<3> layout(owned, allParallel); + double dx = 1.0 / double(pt); + ippl::Vector hx = {dx, dx, dx}; + ippl::Vector origin = {0, 0, 0}; + ippl::UniformCartesian mesh(owned, hx, origin); - int n = 10; + playout_type pl(layout, mesh); - bunch.create(n); + bunch_type bunch(pl); + int n = 10; + bunch.create(n); std::mt19937_64 eng; std::uniform_real_distribution unif(0, 1); typename bunch_type::particle_position_type::HostMirror R_host = bunch.R.getHostMirror(); - typename bunch_type::charge_container_type::HostMirror E_host = bunch.E.getHostMirror(); + typename bunch_type::charge_container_type::HostMirror Q_host = bunch.Q.getHostMirror(); for(int i = 0; i < n; ++i) { ippl::Vector r = {unif(eng), unif(eng), unif(eng)}; R_host(i) = r; - E_host(i) = 0.0; + Q_host(i) = 0.0; } Kokkos::deep_copy(bunch.R.getView(), R_host); - Kokkos::deep_copy(bunch.E.getView(), E_host); - - - int pt = 20; - ippl::Index I(pt); - ippl::NDIndex<3> owned(I, I, I); - - ippl::e_dim_tag allParallel[3]; // Specifies SERIAL, PARALLEL dims - for (unsigned int d=0; d<3; d++) - allParallel[d] = ippl::SERIAL; - - // all parallel layout, standard domain, normal axis order - ippl::FieldLayout<3> layout(owned, allParallel); + Kokkos::deep_copy(bunch.Q.getView(), Q_host); - double dx = 1.0 / double(pt); - ippl::Vector hx = {dx, dx, dx}; - ippl::Vector origin = {0, 0, 0}; - ippl::UniformCartesian mesh(owned, hx, origin); + bunch_type buffer(pl); + pl.update(bunch, buffer); typedef ippl::Field field_type; @@ -70,9 +71,9 @@ int main(int argc, char *argv[]) { field = 1.0; - gather(bunch.E, field, bunch.R); + gather(bunch.Q, field, bunch.R); - bunch.E.print(); + bunch.Q.print(); return 0; } diff --git a/test/particle/TestIpplParticleBase.cpp b/test/particle/TestIpplParticleBase.cpp deleted file mode 100644 index 14caf0c18..000000000 --- a/test/particle/TestIpplParticleBase.cpp +++ /dev/null @@ -1,22 +0,0 @@ -#include "Ippl.h" - -int main(int argc, char *argv[]) { - Ippl ippl(argc, argv); - - typedef ippl::detail::ParticleLayout playout_type; - typedef ippl::ParticleBase bunch_type; - - playout_type pl; - - bunch_type p(pl); - - std::cout << p.getLocalNum() << std::endl; - - p.create(10); - - p.destroy(); - - std::cout << p.getLocalNum() << std::endl; - - return 0; -} diff --git a/test/particle/TestParticleBConds.cpp b/test/particle/TestParticleBConds.cpp deleted file mode 100644 index 2fb80d7b9..000000000 --- a/test/particle/TestParticleBConds.cpp +++ /dev/null @@ -1,63 +0,0 @@ -#include "Ippl.h" - -int main(int argc, char *argv[]) { - Ippl ippl(argc,argv); - - enum { dim = 3 }; - typedef ippl::detail::ParticleLayout playout_type; - typedef ippl::ParticleBase bunch_type; - - std::unique_ptr bunch; - double len = 0.2; - ippl::NDRegion nr; - typename bunch_type::particle_position_type::HostMirror HostR; - playout_type pl_m; - - - - double shift = 0.1; - // setup(len + shift); - double pos = len + shift; - bunch = std::make_unique(pl_m); - - size_t nParticles = 1000; - bunch->create(nParticles); - - HostR = bunch->R.getHostMirror(); - - for (size_t i = 0; i < nParticles; ++i) { - HostR(i) = ippl::Vector({pos, pos, pos}); - } - - Kokkos::deep_copy(bunch->R.getView(), HostR); - - // domain - ippl::PRegion region(0.0, 0.2); - - nr = ippl::NDRegion(region, region, region); - - using BC = ippl::BC; - - bunch_type::bc_container_type bcs = { - BC::PERIODIC, - BC::PERIODIC, - BC::REFLECTIVE, - BC::REFLECTIVE, - BC::SINK, - BC::NO - }; - - bunch->setParticleBC(bcs); - - bunch->getLayout().applyBC(bunch->R, nr); - - Kokkos::deep_copy(HostR, bunch->R.getView()); - - ippl::Vector expected = {shift, shift, pos}; - - for (unsigned i = 0; i < 3; ++i) { - std::cout << HostR(0)[i] << std::endl; - } - - return 0; -} diff --git a/test/particle/TestParticleSpatialLayout.cpp b/test/particle/TestParticleSpatialLayout.cpp deleted file mode 100644 index 508837686..000000000 --- a/test/particle/TestParticleSpatialLayout.cpp +++ /dev/null @@ -1,193 +0,0 @@ -#include - -#include "Ippl.h" - -template -struct Bunch : public ippl::ParticleBase -{ - - Bunch(PLayout& playout) - : ippl::ParticleBase(playout) - { - this->addAttribute(expectedRank); - this->addAttribute(Q); - } - - ~Bunch(){ } - - typedef ippl::ParticleAttrib rank_type; - typedef ippl::ParticleAttrib charge_type; - rank_type expectedRank; - charge_type Q; - - void update() { - PLayout& layout = this->getLayout(); - layout.update(*this); - } -}; - -int main(int argc, char *argv[]) { - Ippl ippl(argc, argv); - - typedef ippl::ParticleSpatialLayout playout_type; - typedef Bunch bunch_type; - - constexpr unsigned int dim = 3; - - int pt = 1024; - ippl::Index I(pt); - ippl::NDIndex owned(I, I, I); - - ippl::e_dim_tag allParallel[dim]; // Specifies SERIAL, PARALLEL dims - for (unsigned int d=0; d layout(owned,allParallel); - - double dx = 1.0 / double(pt); - ippl::Vector hx = {dx, dx, dx}; - ippl::Vector origin = {0, 0, 0}; - typedef ippl::UniformCartesian Mesh_t; - Mesh_t mesh(owned, hx, origin); - - playout_type pl(layout, mesh); - - bunch_type bunch(pl); - - using BC = ippl::BC; - - bunch_type::bc_container_type bcs = { - BC::PERIODIC, - BC::PERIODIC, - BC::PERIODIC, - BC::PERIODIC, - BC::PERIODIC, - BC::PERIODIC - }; - - bunch.setParticleBC(bcs); - - int nRanks = Ippl::Comm->size(); - unsigned int nParticles = std::pow(256, 3); - - if (nParticles % nRanks > 0) { - if (Ippl::Comm->rank() == 0) { - std::cerr << nParticles << " not a multiple of " << nRanks << std::endl; - } - return 0; - } - - bunch.create(nParticles / nRanks); - -#ifdef KOKKOS_ENABLE_CUDA - int id = -1; - auto err = cudaGetDevice(&id); - if (err != cudaSuccess) printf("kernel cuda error: %d, %s\n", (int)err, cudaGetErrorString(err)); - std::cout << "Rank " << Ippl::Comm->rank() << " has device " << id << "\n"; -#endif - - std::mt19937_64 eng(Ippl::Comm->rank()); - std::uniform_real_distribution unif(0, 1); - - typename bunch_type::particle_position_type::HostMirror R_host = bunch.R.getHostMirror(); - for (size_t i = 0; i < bunch.getLocalNum(); ++i) { - ippl::Vector r = {unif(eng), unif(eng), unif(eng)}; - R_host(i) = r; - } - - bunch.Q = 1.0; - Ippl::Comm->barrier(); - Kokkos::deep_copy(bunch.R.getView(), R_host); - - typedef ippl::detail::RegionLayout RegionLayout_t; - RegionLayout_t RLayout = pl.getRegionLayout(); - - auto& positions = bunch.R.getView(); - typename RegionLayout_t::view_type Regions = RLayout.getdLocalRegions(); - using size_type = typename RegionLayout_t::view_type::size_type; - using mdrange_type = Kokkos::MDRangePolicy>; - typedef ippl::ParticleAttrib ER_t; - ER_t::view_type ER = bunch.expectedRank.getView(); - - Kokkos::parallel_for("Expected Rank", - mdrange_type({0, 0}, - {ER.extent(0), Regions.extent(0)}), - KOKKOS_LAMBDA(const size_t i, const size_type j) { - bool x_bool = false; - bool y_bool = false; - bool z_bool = false; - if((positions(i)[0] >= Regions(j)[0].min()) && - (positions(i)[0] <= Regions(j)[0].max())) { - x_bool = true; - } - if((positions(i)[1] >= Regions(j)[1].min()) && - (positions(i)[1] <= Regions(j)[1].max())) { - y_bool = true; - } - if((positions(i)[2] >= Regions(j)[2].min()) && - (positions(i)[2] <= Regions(j)[2].max())) { - z_bool = true; - } - if(x_bool && y_bool && z_bool){ - ER(i) = j; - } - }); - Kokkos::fence(); - - typename bunch_type::particle_index_type::HostMirror ID_host = bunch.ID.getHostMirror(); - Kokkos::deep_copy(ID_host, bunch.ID.getView()); - - ER_t::view_type::host_mirror_type ER_host = bunch.expectedRank.getHostMirror(); - Kokkos::deep_copy(ER_host, bunch.expectedRank.getView()); - typedef ippl::ParticleAttrib Q_t; - Q_t::view_type::host_mirror_type Q_host = bunch.Q.getHostMirror(); - - Kokkos::deep_copy(Q_host, bunch.Q.getView()); - if (Ippl::Comm->rank() == 0) { - std::cout << "Before update:" << std::endl; - } - - - std::cout << layout << std::endl; - - bunch.update(); - - Ippl::Comm->barrier(); - - Kokkos::resize(R_host, bunch.R.size()); - Kokkos::deep_copy(R_host, bunch.R.getView()); - - Kokkos::resize(ID_host, bunch.ID.size()); - Kokkos::deep_copy(ID_host, bunch.ID.getView()); - - Kokkos::resize(Q_host, bunch.Q.size()); - Kokkos::deep_copy(Q_host, bunch.Q.getView()); - - Kokkos::resize(ER_host, bunch.expectedRank.size()); - Kokkos::deep_copy(ER_host, bunch.expectedRank.getView()); - - if (Ippl::Comm->rank() == 0) { - std::cout << "After update:" << std::endl; - } - - for (size_t i = 0; i < bunch.getLocalNum(); ++i) { - if(Ippl::Comm->rank() != ER_host(i)) { - std::cout << "Particle with ID: " << ID_host(i) << " " - << "has wrong rank!" << std::endl; - } - } - Ippl::Comm->barrier(); - - unsigned int Total_particles = 0; - unsigned int local_particles = bunch.getLocalNum(); - - MPI_Reduce(&local_particles, &Total_particles, 1, - MPI_UNSIGNED, MPI_SUM, 0, Ippl::getComm()); - if (Ippl::Comm->rank() == 0) { - std::cout << "All expected ranks correct!!" << std::endl; - - std::cout << "Total particles before: " << nParticles - << " " << "after: " << Total_particles << std::endl; - } - return 0; -} diff --git a/test/particle/TestScatter.cpp b/test/particle/TestScatter.cpp index 58db5ea55..a48b73dfb 100644 --- a/test/particle/TestScatter.cpp +++ b/test/particle/TestScatter.cpp @@ -16,10 +16,6 @@ struct Bunch : public ippl::ParticleBase typedef ippl::ParticleAttrib charge_container_type; charge_container_type Q; - void update() { - PLayout& layout = this->getLayout(); - layout.update(*this); - } }; int main(int argc, char *argv[]) { @@ -93,14 +89,13 @@ int main(int argc, char *argv[]) { bunch.Q = 1.0; - bunch.update(); + bunch_type bunchBuffer(pl); + pl.update(bunch, bunchBuffer); field = 0.0; scatter(bunch.Q, field, bunch.R); - //field.write(); - // Check charge conservation try { double Total_charge_field = field.sum(); diff --git a/test/particle/UniformPlasmaTest.cpp b/test/particle/UniformPlasmaTest.cpp new file mode 100644 index 000000000..213522c8f --- /dev/null +++ b/test/particle/UniformPlasmaTest.cpp @@ -0,0 +1,616 @@ +// Uniform Plasma Test +// +// Usage: +// srun ./UniformPlasmaTest 128 128 128 10000 300 FFT --info 10 +// +// Copyright (c) 2020, Sriramkrishnan Muralikrishnan, +// Paul Scherrer Institut, Villigen PSI, Switzerland +// All rights reserved +// +// This file is part of IPPL. +// +// IPPL is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// You should have received a copy of the GNU General Public License +// along with IPPL. If not, see . +// +#include "Ippl.h" +#include +#include +#include +#include +#include +#include + +#include + +#include +#include "Utility/IpplTimings.h" +#include "Solver/FFTPeriodicPoissonSolver.h" + +// dimension of our positions +constexpr unsigned Dim = 3; + +// some typedefs +typedef ippl::ParticleSpatialLayout PLayout_t; +typedef ippl::UniformCartesian Mesh_t; +typedef ippl::FieldLayout FieldLayout_t; + +template +using Vector = ippl::Vector; + +template +using Field = ippl::Field; + +template +using ParticleAttrib = ippl::ParticleAttrib; + +typedef Vector Vector_t; +typedef Field Field_t; +typedef Field VField_t; +typedef ippl::FFTPeriodicPoissonSolver Solver_t; + +double pi = acos(-1.0); + +void dumpVTK(VField_t& E, int nx, int ny, int nz, int iteration, + double dx, double dy, double dz) { + + typename VField_t::view_type::host_mirror_type host_view = E.getHostMirror(); + + Kokkos::deep_copy(host_view, E.getView()); + std::ofstream vtkout; + vtkout.precision(10); + vtkout.setf(std::ios::scientific, std::ios::floatfield); + + std::stringstream fname; + fname << "data/ef_"; + fname << std::setw(4) << std::setfill('0') << iteration; + fname << ".vtk"; + + // open a new data file for this iteration + // and start with header + vtkout.open(fname.str().c_str(), std::ios::out); + vtkout << "# vtk DataFile Version 2.0" << std::endl; + vtkout << "UniformPlasmaTest" << std::endl; + vtkout << "ASCII" << std::endl; + vtkout << "DATASET STRUCTURED_POINTS" << std::endl; + vtkout << "DIMENSIONS " << nx+3 << " " << ny+3 << " " << nz+3 << std::endl; + vtkout << "ORIGIN " << -2*dx << " " << -2*dy << " " << -2*dz << std::endl; + vtkout << "SPACING " << dx << " " << dy << " " << dz << std::endl; + vtkout << "CELL_DATA " << (nx+2)*(ny+2)*(nz+2) << std::endl; + + vtkout << "VECTORS E-Field float" << std::endl; + for (int z=0; z +struct generate_random { + + using view_type = typename ippl::detail::ViewType::view_type; + // Output View for the random numbers + view_type vals; + + // The GeneratorPool + GeneratorPool rand_pool; + + T start, end; + + // Initialize all members + generate_random(view_type vals_, GeneratorPool rand_pool_, T start_, T end_) + : vals(vals_), rand_pool(rand_pool_), start(start_), end(end_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(int i) const { + // Get a random number state from the pool for the active thread + typename GeneratorPool::generator_type rand_gen = rand_pool.get_state(); + + // Draw samples numbers from the pool as double in the range [start, end) + vals(i)[0] = rand_gen.drand(start[0], end[0]); + vals(i)[1] = rand_gen.drand(start[1], end[1]); + vals(i)[2] = rand_gen.drand(start[2], end[2]); + + // Give the state back, which will allow another thread to acquire it + rand_pool.free_state(rand_gen); + } +}; + + +template +class ChargedParticles : public ippl::ParticleBase { +public: + VField_t E_m; + Field_t rho_m; + + Vector nr_m; + + ippl::e_dim_tag decomp_m[Dim]; + + Vector_t hr_m; + Vector_t rmin_m; + Vector_t rmax_m; + + double Q_m; + + std::string stype_m; + + std::shared_ptr solver_mp; + + double time_m; + + double rhoNorm_m; + + +public: + ParticleAttrib q; // charge + typename ippl::ParticleBase::particle_position_type P; // particle velocity + typename ippl::ParticleBase::particle_position_type E; // electric field at particle position + + + /* + This constructor is mandatory for all derived classes from + ParticleBase as the update function invokes this + */ + ChargedParticles(PLayout& pl) + : ippl::ParticleBase(pl) + { + // register the particle attributes + this->addAttribute(q); + this->addAttribute(P); + this->addAttribute(E); + } + + ChargedParticles(PLayout& pl, + Vector_t hr, + Vector_t rmin, + Vector_t rmax, + ippl::e_dim_tag decomp[Dim], + double Q) + : ippl::ParticleBase(pl) + , hr_m(hr) + , rmin_m(rmin) + , rmax_m(rmax) + , Q_m(Q) + { + // register the particle attributes + this->addAttribute(q); + this->addAttribute(P); + this->addAttribute(E); + setupBCs(); + for (unsigned int i = 0; i < Dim; i++) + decomp_m[i]=decomp[i]; + } + + ~ChargedParticles(){ } + + void setupBCs() { + setBCAllPeriodic(); + } + + void gatherStatistics(uint64_t totalP, int iteration) { + std::cout << "Rank " << Ippl::Comm->rank() << " has " + << (double)this->getLocalNum()/totalP*100.0 + << "percent of the total particles " << std::endl; + } + + void gatherCIC() { + gather(this->E, E_m, this->R); + } + + void scatterCIC(uint64_t totalP, int iteration, Vector_t& hrField) { + Inform m("scatter "); + + rho_m = 0.0; + scatter(q, rho_m, this->R); + + static IpplTimings::TimerRef sumTimer = IpplTimings::getTimer("Check"); + IpplTimings::startTimer(sumTimer); + double Q_grid = rho_m.sum(); + + uint64_t Total_particles = 0; + uint64_t local_particles = this->getLocalNum(); + + MPI_Reduce(&local_particles, &Total_particles, 1, + MPI_UNSIGNED_LONG_LONG, MPI_SUM, 0, Ippl::getComm()); + + double rel_error = std::fabs((Q_m-Q_grid)/Q_m); + m << "Rel. error in charge conservation = " << rel_error << endl; + + if(Ippl::Comm->rank() == 0) { + //if((Total_particles != totalP) || (rel_error > 1e-10)) { + if((Total_particles != totalP)) { + std::cout << "Total particles in the sim. " << totalP + << " " << "after update: " + << Total_particles << std::endl; + std::cout << "Total particles not matched in iteration: " + << iteration << std::endl; + std::cout << "Rel. error in charge conservation: " + << rel_error << std::endl; + exit(1); + } + } + + rho_m = rho_m / (hrField[0] * hrField[1] * hrField[2]); + + const int nghostRho = rho_m.getNghost(); + auto Rhoview = rho_m.getView(); + using mdrange_type = Kokkos::MDRangePolicy>; + + double temp = 0.0; + Kokkos::parallel_reduce("Rho reduce", + mdrange_type({nghostRho, nghostRho, nghostRho}, + {Rhoview.extent(0) - nghostRho, + Rhoview.extent(1) - nghostRho, + Rhoview.extent(2) - nghostRho}), + KOKKOS_LAMBDA(const size_t i, const size_t j, + const size_t k, double& valL) + { + double myVal = pow(Rhoview(i, j, k), 2); + valL += myVal; + }, Kokkos::Sum(temp)); + double globaltemp = 0.0; + MPI_Reduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); + rhoNorm_m = sqrt(globaltemp); + IpplTimings::stopTimer(sumTimer); + + //dumpVTK(rho_m,nr_m[0],nr_m[1],nr_m[2],iteration,hrField[0],hrField[1],hrField[2]); + + //rho = rho_e - rho_i + rho_m = rho_m - (Q_m/((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]))); + } + + void initSolver() { + Inform m("solver "); + if(stype_m == "FFT") + initFFTSolver(); + else + m << "No solver matches the argument" << endl; + } + + void initFFTSolver() { + ippl::SolverParams sp; + sp.add("output_type",1); + + ippl::FFTParams fftParams; + + fftParams.setAllToAll( false ); + fftParams.setPencils( true ); + fftParams.setReorder( false ); + fftParams.setRCDirection( 0 ); + + solver_mp = std::make_shared(fftParams); + + solver_mp->setParameters(sp); + + solver_mp->setRhs(&rho_m); + + solver_mp->setLhs(&E_m); + } + + + + void dumpData() { + auto Pview = P.getView(); + + double Energy = 0.0; + + Kokkos::parallel_reduce("Particle Energy", this->getLocalNum(), + KOKKOS_LAMBDA(const int i, double& valL){ + double myVal = dot(Pview(i), Pview(i)).apply(); + valL += myVal; + }, Kokkos::Sum(Energy)); + + Energy *= 0.5; + double gEnergy = 0.0; + + MPI_Reduce(&Energy, &gEnergy, 1, + MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); + + + const int nghostE = E_m.getNghost(); + auto Eview = E_m.getView(); + Vector_t normE; + using mdrange_type = Kokkos::MDRangePolicy>; + + for (unsigned d=0; d(temp)); + double globaltemp = 0.0; + MPI_Reduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); + normE[d] = sqrt(globaltemp); + } + + + + if(Ippl::Comm->rank() == 0) { + std::ofstream csvout; + csvout.precision(10); + csvout.setf(std::ios::scientific, std::ios::floatfield); + + std::stringstream fname; + fname << "data/ParticleField_"; + fname << Ippl::Comm->size(); + fname << ".csv"; + csvout.open(fname.str().c_str(), std::ios::out | std::ofstream::app); + + if(time_m == 0.0) { + csvout << "time, Kinetic energy, Rho_norm2, Ex_norm2, Ey_norm2, Ez_norm2" << std::endl; + } + + csvout << time_m << " " + << gEnergy << " " + << rhoNorm_m << " " + << normE[0] << " " + << normE[1] << " " + << normE[2] << std::endl; + + csvout.close(); + } + + Ippl::Comm->barrier(); + + } + +private: + void setBCAllPeriodic() { + this->setParticleBC(ippl::BC::PERIODIC); + } + +}; + +int main(int argc, char *argv[]){ + Ippl ippl(argc, argv); + Inform msg("UniformPlasmaTest"); + Inform msg2all(argv[0],INFORM_ALL_NODES); + + Ippl::Comm->setDefaultOverallocation(2); + + auto start = std::chrono::high_resolution_clock::now(); + ippl::Vector nr = { + std::atoi(argv[1]), + std::atoi(argv[2]), + std::atoi(argv[3]) + }; + + static IpplTimings::TimerRef mainTimer = IpplTimings::getTimer("mainTimer"); + static IpplTimings::TimerRef particleCreation = IpplTimings::getTimer("particlesCreation"); + static IpplTimings::TimerRef FirstUpdateTimer = IpplTimings::getTimer("initialisation"); + static IpplTimings::TimerRef dumpDataTimer = IpplTimings::getTimer("dumpData"); + static IpplTimings::TimerRef PTimer = IpplTimings::getTimer("kick"); + static IpplTimings::TimerRef temp = IpplTimings::getTimer("randomMove"); + static IpplTimings::TimerRef RTimer = IpplTimings::getTimer("drift"); + static IpplTimings::TimerRef updateTimer = IpplTimings::getTimer("update"); + static IpplTimings::TimerRef SolveTimer = IpplTimings::getTimer("solve"); + + IpplTimings::startTimer(mainTimer); + + const uint64_t totalP = std::atoll(argv[4]); + const unsigned int nt = std::atoi(argv[5]); + + msg << "Uniform Plasma Test" + << endl + << "nt " << nt << " Np= " + << totalP << " grid = " << nr + << endl; + + using bunch_type = ChargedParticles; + + std::unique_ptr P; + + ippl::NDIndex domain; + for (unsigned i = 0; i< Dim; i++) { + domain[i] = ippl::Index(nr[i]); + } + + ippl::e_dim_tag decomp[Dim]; + for (unsigned d = 0; d < Dim; ++d) { + decomp[d] = ippl::PARALLEL; + } + + // create mesh and layout objects for this problem domain + Vector_t rmin(0.0); + Vector_t rmax(20.0); + double dx = rmax[0] / nr[0]; + double dy = rmax[1] / nr[1]; + double dz = rmax[2] / nr[2]; + + Vector_t hr = {dx, dy, dz}; + Vector_t origin = {rmin[0], rmin[1], rmin[2]}; + const double dt = 1.0; + + Mesh_t mesh(domain, hr, origin); + FieldLayout_t FL(domain, decomp); + PLayout_t PL(FL, mesh); + + double Q = -1562.5; + P = std::make_unique(PL,hr,rmin,rmax,decomp,Q); + + P->nr_m = nr; + uint64_t nloc = totalP / Ippl::Comm->size(); + + int rest = (int) (totalP - nloc * Ippl::Comm->size()); + + if ( Ippl::Comm->rank() < rest ) + ++nloc; + + IpplTimings::startTimer(particleCreation); + P->create(nloc); + + const ippl::NDIndex& lDom = FL.getLocalNDIndex(); + Vector_t Rmin, Rmax; + for (unsigned d = 0; d rand_pool64((uint64_t)(42 + 100*Ippl::Comm->rank())); + Kokkos::parallel_for(nloc, + generate_random, Dim>( + P->R.getView(), rand_pool64, Rmin, Rmax)); + Kokkos::fence(); + P->q = P->Q_m/totalP; + IpplTimings::stopTimer(particleCreation); + + IpplTimings::startTimer(FirstUpdateTimer); + P->E_m.initialize(mesh, FL); + P->rho_m.initialize(mesh, FL); + + bunch_type bunchBuffer(PL); + + IpplTimings::startTimer(updateTimer); + PL.update(*P, bunchBuffer); + IpplTimings::stopTimer(updateTimer); + + msg << "particles created and initial conditions assigned " << endl; + + P->stype_m = argv[6]; + P->initSolver(); + P->time_m = 0.0; + + P->scatterCIC(totalP, 0, hr); + + IpplTimings::startTimer(SolveTimer); + P->solver_mp->solve(); + IpplTimings::stopTimer(SolveTimer); + + P->gatherCIC(); + + IpplTimings::startTimer(dumpDataTimer); + P->dumpData(); + IpplTimings::stopTimer(dumpDataTimer); + + IpplTimings::stopTimer(FirstUpdateTimer); + + // begin main timestep loop + msg << "Starting iterations ..." << endl; + for (unsigned int it=0; itP = P->P - 0.5 * dt * P->E * 0.0; + IpplTimings::stopTimer(PTimer); + + IpplTimings::startTimer(temp); + Kokkos::parallel_for(P->getLocalNum(), + generate_random, Dim>( + P->P.getView(), rand_pool64, -hr, hr)); + Kokkos::fence(); + IpplTimings::stopTimer(temp); + + //drift + IpplTimings::startTimer(RTimer); + P->R = P->R + dt * P->P; + IpplTimings::stopTimer(RTimer); + + //Since the particles have moved spatially update them to correct processors + IpplTimings::startTimer(updateTimer); + PL.update(*P, bunchBuffer); //P->update(); + IpplTimings::stopTimer(updateTimer); + + //scatter the charge onto the underlying grid + P->scatterCIC(totalP, it+1, hr); + + //Field solve + IpplTimings::startTimer(SolveTimer); + P->solver_mp->solve(); + IpplTimings::stopTimer(SolveTimer); + + // gather E field + P->gatherCIC(); + + //kick + IpplTimings::startTimer(PTimer); + P->P = P->P - 0.5 * dt * P->E * 0.0; + IpplTimings::stopTimer(PTimer); + + P->time_m += dt; + IpplTimings::startTimer(dumpDataTimer); + P->dumpData(); + IpplTimings::stopTimer(dumpDataTimer); + msg << "Finished iteration: " << it << " time: " << P->time_m << endl; + } + + msg << "Uniform Plasma Test: End." << endl; + IpplTimings::stopTimer(mainTimer); + IpplTimings::print(); + IpplTimings::print(std::string("timing.dat")); + auto end = std::chrono::high_resolution_clock::now(); + + std::chrono::duration time_chrono = std::chrono::duration_cast>(end - start); + std::cout << "Elapsed time: " << time_chrono.count() << std::endl; + + return 0; +} diff --git a/test/particle/benchmarkParticleUpdate.cpp b/test/particle/benchmarkParticleUpdate.cpp index 3e1a2c617..ceb46eafb 100644 --- a/test/particle/benchmarkParticleUpdate.cpp +++ b/test/particle/benchmarkParticleUpdate.cpp @@ -101,11 +101,6 @@ class ChargedParticles : public ippl::ParticleBase { setBCAllPeriodic(); } - void update() { - PLayout& layout = this->getLayout(); - layout.update(*this); - } - void gatherStatistics(unsigned int totalP, int iteration) { unsigned int Total_particles = 0; @@ -271,9 +266,10 @@ int main(int argc, char *argv[]){ IpplTimings::stopTimer(particleCreation); P->E = 0.0; + bunch_type bunchBuffer(PL); static IpplTimings::TimerRef UpdateTimer = IpplTimings::getTimer("ParticleUpdate"); IpplTimings::startTimer(UpdateTimer); - P->update(); + PL.update(*P, bunchBuffer); IpplTimings::stopTimer(UpdateTimer); @@ -325,7 +321,7 @@ int main(int argc, char *argv[]){ IpplTimings::stopTimer(RTimer); IpplTimings::startTimer(UpdateTimer); - P->update(); + PL.update(*P, bunchBuffer); IpplTimings::stopTimer(UpdateTimer); diff --git a/unit_tests/BareField/BareField.cpp b/unit_tests/BareField/BareField.cpp new file mode 100644 index 000000000..1b9e02de0 --- /dev/null +++ b/unit_tests/BareField/BareField.cpp @@ -0,0 +1,197 @@ +// +// Unit test BareFieldTest +// Test the functionality of the class BareField. +// +// Copyright (c) 2021 Paul Scherrer Institut, Villigen PSI, Switzerland +// All rights reserved +// +// This file is part of IPPL. +// +// IPPL is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// You should have received a copy of the GNU General Public License +// along with IPPL. If not, see . +// +#include "Ippl.h" + +#include +#include "gtest/gtest.h" + +class BareFieldTest : public ::testing::Test { + +public: + static constexpr size_t dim = 3; + typedef ippl::BareField field_type; + typedef ippl::BareField, dim> vfield_type; + + BareFieldTest() + : nPoints(8) + { + setup(); + } + + void setup() { + ippl::Index I(nPoints); + ippl::NDIndex owned(I, I, I); + + ippl::e_dim_tag domDec[dim]; // Specifies SERIAL, PARALLEL dims + for (unsigned int d = 0; d < dim; d++) + domDec[d] = ippl::PARALLEL; + + ippl::FieldLayout layout(owned, domDec); + field = std::make_unique(layout); + vfield = std::make_unique(layout); + } + + std::unique_ptr field; + std::unique_ptr vfield; + size_t nPoints; +}; + + + +TEST_F(BareFieldTest, Sum) { + double val = 1.0; + + *field = val; + + double sum = field->sum(); + + ASSERT_DOUBLE_EQ(val * std::pow(nPoints, dim), sum); +} + +TEST_F(BareFieldTest, Min) { + const ippl::NDIndex lDom = field->getLayout().getLocalNDIndex(); + const int shift = field->getNghost(); + + auto view = field->getView(); + auto mirror = Kokkos::create_mirror_view(view); + Kokkos::deep_copy(mirror, view); + + for (size_t i = shift; i < mirror.extent(0) - shift; ++i) { + for (size_t j = shift; j < mirror.extent(1) - shift; ++j) { + for (size_t k = shift; k < mirror.extent(2) - shift; ++k) { + const size_t ig = i + lDom[0].first(); + const size_t jg = j + lDom[1].first(); + const size_t kg = k + lDom[2].first(); + + mirror(i, j, k) = -1.0 + (ig + jg + kg); + } + } + } + Kokkos::deep_copy(view, mirror); + + double min = field->min(); + // minimum value -1 + nghost + nghost + nghost + ASSERT_DOUBLE_EQ(min, 2.); +} + +TEST_F(BareFieldTest, Max) { + const ippl::NDIndex lDom = field->getLayout().getLocalNDIndex(); + const int shift = field->getNghost(); + + auto view = field->getView(); + auto mirror = Kokkos::create_mirror_view(view); + Kokkos::deep_copy(mirror, view); + + for (size_t i = shift; i < mirror.extent(0) - shift; ++i) { + for (size_t j = shift; j < mirror.extent(1) - shift; ++j) { + for (size_t k = shift; k < mirror.extent(2) - shift; ++k) { + const size_t ig = i + lDom[0].first(); + const size_t jg = j + lDom[1].first(); + const size_t kg = k + lDom[2].first(); + + mirror(i, j, k) = -1.0 + (ig + jg + kg); + } + } + } + Kokkos::deep_copy(view, mirror); + + double max = field->max(); + double expected = -1. + nPoints * 3; + ASSERT_DOUBLE_EQ(max, expected); +} + +TEST_F(BareFieldTest, Prod) { + *field = 2.; + double val = field->prod(); + ASSERT_DOUBLE_EQ(val, pow(2, nPoints * nPoints * nPoints)); +} + +TEST_F(BareFieldTest, ScalarMultiplication) { + *field = 1.; + *field = *field * 10; + + const int shift = field->getNghost(); + + auto view = field->getView(); + auto mirror = Kokkos::create_mirror_view(view); + Kokkos::deep_copy(mirror, view); + + for (size_t i = shift; i < mirror.extent(0) - shift; ++i) { + for (size_t j = shift; j < mirror.extent(1) - shift; ++j) { + for (size_t k = shift; k < mirror.extent(2) - shift; ++k) { + ASSERT_DOUBLE_EQ(mirror(i,j,k), 10.); + } + } + } +} + +TEST_F(BareFieldTest, DotProduct) { + *vfield = 1.; + *field = 5. * dot(*vfield, *vfield); + + const int shift = field->getNghost(); + + auto view = field->getView(); + auto mirror = Kokkos::create_mirror_view(view); + Kokkos::deep_copy(mirror, view); + + for (size_t i = shift; i < mirror.extent(0) - shift; ++i) { + for (size_t j = shift; j < mirror.extent(1) - shift; ++j) { + for (size_t k = shift; k < mirror.extent(2) - shift; ++k) { + ASSERT_DOUBLE_EQ(mirror(i,j,k), 15.); + } + } + } +} + +TEST_F(BareFieldTest, AllFuncs) { + double pi = acos(-1.); + double alpha = pi / 4; + *field = alpha; + // Compute new value + double beta = fabs(7.0 * (sin(alpha) * cos(alpha))/(tan(alpha) * acos(alpha)) + - exp(alpha) + erf(alpha) + (asin(alpha) * cosh(alpha)) / (atan(alpha) + * sinh(alpha)) + tanh(alpha) * log(alpha) + - log10(alpha) * sqrt(alpha) + floor(alpha) * ceil(alpha)); + + // Compute same value via field ops + *field = fabs(7.0 * (sin(*field) * cos(*field))/(tan(*field) * acos(*field)) + - exp(*field) + erf(*field) + (asin(*field) * cosh(*field)) / (atan(*field) + * sinh(*field)) + tanh(*field) * log(*field) + - log10(*field) * sqrt(*field) + floor(*field) * ceil(*field)); + + const int shift = field->getNghost(); + + auto view = field->getView(); + auto mirror = Kokkos::create_mirror_view(view); + Kokkos::deep_copy(mirror, view); + + for (size_t i = shift; i < mirror.extent(0) - shift; ++i) { + for (size_t j = shift; j < mirror.extent(1) - shift; ++j) { + for (size_t k = shift; k < mirror.extent(2) - shift; ++k) { + ASSERT_DOUBLE_EQ(mirror(i,j,k), beta); + } + } + } +} + +int main(int argc, char *argv[]) { + Ippl ippl(argc,argv); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/unit_tests/BareField/CMakeLists.txt b/unit_tests/BareField/CMakeLists.txt new file mode 100644 index 000000000..ba8d52104 --- /dev/null +++ b/unit_tests/BareField/CMakeLists.txt @@ -0,0 +1,33 @@ +file (RELATIVE_PATH _relPath "${CMAKE_SOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}") +message (STATUS "Adding unit tests found in ${_relPath}") + +include_directories ( + ${CMAKE_SOURCE_DIR}/src + ${GTEST_INCLUDE_DIRS} +) + +link_directories ( + ${CMAKE_CURRENT_SOURCE_DIR} + ${GTEST_LIBRARY_DIRS} + ${Boost_LIBRARY_DIRS} + ${Kokkos_DIR}/.. +) + +add_executable (BareField BareField.cpp) +target_link_libraries ( + BareField + ippl + pthread + ${MPI_CXX_LIBRARIES} + ${GTEST_BOTH_LIBRARIES} + ${Boost_LIBRARIES} +) + +# vi: set et ts=4 sw=4 sts=4: + +# Local Variables: +# mode: cmake +# cmake-tab-width: 4 +# indent-tabs-mode: nil +# require-final-newline: nil +# End: diff --git a/unit_tests/Field/Field.cpp b/unit_tests/Field/Field.cpp index 59738a6e4..449e57db5 100644 --- a/unit_tests/Field/Field.cpp +++ b/unit_tests/Field/Field.cpp @@ -1,6 +1,6 @@ // // Unit test FieldTest -// Test the functionality of the classes Field and BareField. +// Test the functionality of the class Field. // // Copyright (c) 2020, Matthias Frey, Paul Scherrer Institut, Villigen PSI, Switzerland // All rights reserved @@ -26,6 +26,7 @@ class FieldTest : public ::testing::Test { static constexpr size_t dim = 3; typedef ippl::Field field_type; typedef ippl::UniformCartesian mesh_type; + typedef ippl::FieldLayout layout_type; FieldTest() : nPoints(8) @@ -41,111 +42,23 @@ class FieldTest : public ::testing::Test { for (unsigned int d = 0; d < dim; d++) domDec[d] = ippl::PARALLEL; - ippl::FieldLayout layout(owned, domDec); + layout = std::make_shared(owned, domDec); double dx = 1.0 / double(nPoints); ippl::Vector hx = {dx, dx, dx}; ippl::Vector origin = {0, 0, 0}; mesh = std::make_shared(owned, hx, origin); - field = std::make_unique(*mesh, layout); + field = std::make_unique(*mesh, *layout); } std::unique_ptr field; std::shared_ptr mesh; + std::shared_ptr layout; size_t nPoints; }; - -TEST_F(FieldTest, Sum) { - double val = 1.0; - - *field = val; - - double sum = field->sum(); - - ASSERT_DOUBLE_EQ(val * std::pow(nPoints, dim), sum); -} - -TEST_F(FieldTest, Min) { - const ippl::NDIndex lDom = field->getLayout().getLocalNDIndex(); - const int shift = field->getNghost(); - - auto view = field->getView(); - auto mirror = Kokkos::create_mirror_view(view); - Kokkos::deep_copy(mirror, view); - - for (size_t i = shift; i < mirror.extent(0) - shift; ++i) { - for (size_t j = shift; j < mirror.extent(1) - shift; ++j) { - for (size_t k = shift; k < mirror.extent(2) - shift; ++k) { - const size_t ig = i + lDom[0].first(); - const size_t jg = j + lDom[1].first(); - const size_t kg = k + lDom[2].first(); - - mirror(i, j, k) = -1.0 + (ig + jg + kg); - } - } - } - Kokkos::deep_copy(view, mirror); - - double min = field->min(); - // minimum value -1 + nghost + nghost + nghost - ASSERT_DOUBLE_EQ(min, 2.); -} - -TEST_F(FieldTest, Max) { - const ippl::NDIndex lDom = field->getLayout().getLocalNDIndex(); - const int shift = field->getNghost(); - - auto view = field->getView(); - auto mirror = Kokkos::create_mirror_view(view); - Kokkos::deep_copy(mirror, view); - - for (size_t i = shift; i < mirror.extent(0) - shift; ++i) { - for (size_t j = shift; j < mirror.extent(1) - shift; ++j) { - for (size_t k = shift; k < mirror.extent(2) - shift; ++k) { - const size_t ig = i + lDom[0].first(); - const size_t jg = j + lDom[1].first(); - const size_t kg = k + lDom[2].first(); - - mirror(i, j, k) = -1.0 + (ig + jg + kg); - } - } - } - Kokkos::deep_copy(view, mirror); - - double max = field->max(); - double expected = -1. + nPoints * 3; - ASSERT_DOUBLE_EQ(max, expected); -} - -TEST_F(FieldTest, Prod) { - *field = 2.; - double val = field->prod(); - ASSERT_DOUBLE_EQ(val, pow(2, nPoints * nPoints * nPoints)); -} - -TEST_F(FieldTest, ScalarMultiplication) { - *field = 1.; - *field = *field * 10; - - const int shift = field->getNghost(); - - auto view = field->getView(); - auto mirror = Kokkos::create_mirror_view(view); - Kokkos::deep_copy(mirror, view); - - for (size_t i = shift; i < mirror.extent(0) - shift; ++i) { - for (size_t j = shift; j < mirror.extent(1) - shift; ++j) { - for (size_t k = shift; k < mirror.extent(2) - shift; ++k) { - ASSERT_DOUBLE_EQ(mirror(i,j,k), 10.); - } - } - } -} - - TEST_F(FieldTest, Norm1) { double val = -1.5; @@ -232,6 +145,28 @@ TEST_F(FieldTest, VolumeIntegral2) { ASSERT_DOUBLE_EQ(integral, volume); } +TEST_F(FieldTest, Grad) { + *field = 1.; + + ippl::Field, dim> vfield(*mesh, *layout); + vfield = grad(*field); + + const int shift = vfield.getNghost(); + auto view = vfield.getView(); + auto mirror = Kokkos::create_mirror_view(view); + Kokkos::deep_copy(mirror, view); + + for (size_t i = shift; i < mirror.extent(0) - shift; ++i) { + for (size_t j = shift; j < mirror.extent(1) - shift; ++j) { + for (size_t k = shift; k < mirror.extent(2) - shift; ++k) { + for (size_t d = 0; d < dim; ++d) { + ASSERT_DOUBLE_EQ(mirror(i, j, k)[d], 0.); + } + } + } + } +} + int main(int argc, char *argv[]) { Ippl ippl(argc,argv); diff --git a/unit_tests/PIC/PIC.cpp b/unit_tests/PIC/PIC.cpp index 9095e3c9d..aa577b0e9 100644 --- a/unit_tests/PIC/PIC.cpp +++ b/unit_tests/PIC/PIC.cpp @@ -45,10 +45,10 @@ class PICTest : public ::testing::Test { typedef ippl::ParticleAttrib charge_container_type; charge_container_type Q; - void update() { - PLayout& layout = this->getLayout(); - layout.update(*this); - } + //void update() { + // PLayout& layout = this->getLayout(); + // layout.update(*this); + //} }; @@ -80,9 +80,9 @@ class PICTest : public ::testing::Test { field = std::make_unique(mesh_m, layout_m); - pl_m = playout_type(layout_m, mesh_m); + pl = playout_type(layout_m, mesh_m); - bunch = std::make_unique(pl_m); + bunch = std::make_unique(pl); int nRanks = Ippl::Comm->size(); if (nParticles % nRanks > 0) { @@ -116,11 +116,11 @@ class PICTest : public ::testing::Test { std::unique_ptr bunch; size_t nParticles; size_t nPoints; + playout_type pl; private: flayout_type layout_m; mesh_type mesh_m; - playout_type pl_m; }; @@ -133,13 +133,14 @@ TEST_F(PICTest, Scatter) { bunch->Q = charge; - bunch->update(); + bunch_type bunchBuffer(pl); + pl.update(*bunch, bunchBuffer); scatter(bunch->Q, *field, bunch->R); double totalcharge = field->sum(); - ASSERT_DOUBLE_EQ(nParticles * charge, totalcharge); + ASSERT_NEAR((nParticles * charge - totalcharge)/(nParticles * charge), 0.0, 1e-13); } @@ -149,11 +150,12 @@ TEST_F(PICTest, Gather) { bunch->Q = 0.0; - bunch->update(); + bunch_type bunchBuffer(pl); + pl.update(*bunch, bunchBuffer); gather(bunch->Q, *field, bunch->R); - ASSERT_DOUBLE_EQ(nParticles, bunch->Q.sum()); + ASSERT_DOUBLE_EQ((nParticles - bunch->Q.sum())/nParticles, 0.0); } diff --git a/unit_tests/Particle/CMakeLists.txt b/unit_tests/Particle/CMakeLists.txt index 6aa4c5248..39887f953 100644 --- a/unit_tests/Particle/CMakeLists.txt +++ b/unit_tests/Particle/CMakeLists.txt @@ -39,6 +39,7 @@ target_link_libraries ( ${GTEST_BOTH_LIBRARIES} ${Boost_LIBRARIES} ) + # vi: set et ts=4 sw=4 sts=4: # Local Variables: diff --git a/unit_tests/Particle/ParticleBase.cpp b/unit_tests/Particle/ParticleBase.cpp index b53af176e..a3d16e944 100644 --- a/unit_tests/Particle/ParticleBase.cpp +++ b/unit_tests/Particle/ParticleBase.cpp @@ -41,42 +41,50 @@ class ParticleBaseTest : public ::testing::Test { -TEST_F(ParticleBaseTest, Create) { +TEST_F(ParticleBaseTest, CreateAndDestroy) { + if (Ippl::Comm->size() > 1) { + std::cerr << "ParticleBaseTest::CreateAndDestroy test only works for one MPI rank!" << std::endl; + return; + } size_t nParticles = 1000; + // Create 1000 particles pbase->create(nParticles); size_t localnum = pbase->getLocalNum(); EXPECT_EQ(nParticles, localnum); -} - - -TEST_F(ParticleBaseTest, Destroy) { - size_t nParticles = 1000; - size_t nDestroy = 500; - - pbase->create(nParticles); - - using HostMirror = typename bunch_type::particle_index_type::HostMirror; - HostMirror ID_host = pbase->ID.getHostMirror(); - - Kokkos::deep_copy(ID_host, pbase->ID.getView()); - for (size_t i = 0; i < nDestroy; ++i) { - ID_host(i) = -1; + // Check that the right IDs are present + auto mirror = pbase->ID.getHostMirror(); + Kokkos::deep_copy(mirror, pbase->ID.getView()); + for (size_t i = 0; i < mirror.extent(0); ++i) { + EXPECT_EQ(mirror[i], i); } - Kokkos::deep_copy(pbase->ID.getView(), ID_host); - - pbase->destroy(); - - size_t localnum = pbase->getLocalNum(); - - EXPECT_EQ(nParticles - nDestroy, localnum); + // Delete all the particles with odd indices + // (i.e. mark as invalid) + typedef typename ippl::detail::ViewType::view_type bool_type; + bool_type invalid("invalid", nParticles); + auto mirror2 = Kokkos::create_mirror(invalid); + for (size_t i = 0; i < 500; ++i) { + mirror2(2 * i) = false; + mirror2(2 * i + 1) = true; + } + Kokkos::deep_copy(invalid, mirror2); + pbase->destroy(invalid, 500); + + // Verify remaining indices + Kokkos::deep_copy(mirror, pbase->ID.getView()); + for (size_t i = 0; i < 500; ++i) { + // The even indices contain the original particles + // The particles with odd indices are deleted and replaced + // with particles with even indices (in ascending order w.r.t. index) + int index = i % 2 == 0 ? i : 500 + (i - 1); + EXPECT_EQ(mirror[i], index); + } } - TEST_F(ParticleBaseTest, AddAttribute) { using attrib_type = ippl::ParticleAttrib; @@ -125,4 +133,4 @@ int main(int argc, char *argv[]) { Ippl ippl(argc,argv); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); -} \ No newline at end of file +} diff --git a/unit_tests/Particle/ParticleSendRecv.cpp b/unit_tests/Particle/ParticleSendRecv.cpp index 4473ba650..56f460123 100644 --- a/unit_tests/Particle/ParticleSendRecv.cpp +++ b/unit_tests/Particle/ParticleSendRecv.cpp @@ -83,9 +83,9 @@ class ParticleSendRecv : public ::testing::Test { mesh_m = mesh_type(owned, hx, origin); - pl_m = playout_type(layout_m, mesh_m); + pl = playout_type(layout_m, mesh_m); - bunch = std::make_unique(pl_m); + bunch = std::make_unique(pl); using BC = ippl::BC; @@ -120,7 +120,7 @@ class ParticleSendRecv : public ::testing::Test { Kokkos::deep_copy(bunch->R.getView(), R_host); bunch->Q = 1.0; - RegionLayout_t RLayout = pl_m.getRegionLayout(); + RegionLayout_t RLayout = pl.getRegionLayout(); auto& positions = bunch->R.getView(); typename RegionLayout_t::view_type Regions = RLayout.getdLocalRegions(); @@ -157,18 +157,20 @@ class ParticleSendRecv : public ::testing::Test { std::unique_ptr bunch; unsigned int nParticles; size_t nPoints; + playout_type pl; private: flayout_type layout_m; mesh_type mesh_m; - playout_type pl_m; }; TEST_F(ParticleSendRecv, SendAndRecieve) { - bunch->update(); + bunch_type bunchBuffer(pl); + pl.update(*bunch, bunchBuffer); + //bunch->update(); ER_t::view_type::host_mirror_type ER_host = bunch->expectedRank.getHostMirror(); Kokkos::resize(ER_host, bunch->expectedRank.size()); Kokkos::deep_copy(ER_host, bunch->expectedRank.getView());