Skip to content

Commit

Permalink
Merge branch 'buffer-factory' into 'master'
Browse files Browse the repository at this point in the history
Buffer Preallocation and other Optimizations

See merge request OPAL/Libraries/ippl!97
  • Loading branch information
Arc676 committed Sep 1, 2021
2 parents cb82af9 + 7d33805 commit 9a962e7
Show file tree
Hide file tree
Showing 58 changed files with 2,643 additions and 1,388 deletions.
48 changes: 35 additions & 13 deletions src/Communicate/Archive.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@
//
// Class Ippl
// Class Archive
// Class to (de-)serialize in MPI communication.
//
// When data is exchanged between MPI ranks, it is stored in one dimensional
// arrays. These have the type detail::Archive, which are wrappers around
// one dimensional Kokkos views of type char. The data is then transferred using
// MPI send/recv calls. Note that the archive type differs from other buffers in
// that they have type char and thus contain raw bytes, unlike other typed buffers
// such as detail::FieldBufferData used by HaloCells.
//
// Copyright (c) 2020, Matthias Frey, Paul Scherrer Institut, Villigen PSI, Switzerland
// All rights reserved
//
Expand All @@ -18,6 +25,7 @@
#ifndef IPPL_ARCHIVE_H
#define IPPL_ARCHIVE_H

#include "Types/IpplTypes.h"
#include "Types/ViewTypes.h"
#include "Types/Vector.h"

Expand All @@ -34,17 +42,15 @@ namespace ippl {
public:
using buffer_type = typename ViewType<char, 1, Properties...>::view_type;
using pointer_type = typename buffer_type::pointer_type;
using size_type = typename buffer_type::size_type;

Archive(int size = 0);
Archive(size_type size = 0);

/*!
* Serialize.
* @param view to take data from.
*/
template <typename T>
void operator<<(const Kokkos::View<T*>& view);

void serialize(const Kokkos::View<T*>& view, size_type nsends);

/*!
* Serialize vector attributes
Expand All @@ -55,16 +61,14 @@ namespace ippl {
* @param view to take data from.
*/
template <typename T, unsigned Dim>
void operator<<(const Kokkos::View<Vector<T, Dim>*>& view);

void serialize(const Kokkos::View<Vector<T, Dim>*>& view, size_type nsends);

/*!
* Deserialize.
* @param view to put data to
*/
template <typename T>
void operator>>(Kokkos::View<T*>& view);

void deserialize(Kokkos::View<T*>& view, size_type nrecvs);

/*!
* Deserialize vector attributes
Expand All @@ -75,8 +79,7 @@ namespace ippl {
* @param view to put data to
*/
template <typename T, unsigned Dim>
void operator>>(Kokkos::View<Vector<T, Dim>*>& view);

void deserialize(Kokkos::View<Vector<T, Dim>*>& view, size_type nrecvs);

/*!
* @returns a pointer to the data of the buffer
Expand All @@ -90,16 +93,35 @@ namespace ippl {
* @returns the size of the buffer
*/
size_type getSize() const {
return writepos_m;
}

size_type getBufferSize() const {
return buffer_m.size();
}

void resizeBuffer(size_type size) {
Kokkos::resize(buffer_m, size);
}

void reallocBuffer(size_type size) {
Kokkos::realloc(buffer_m, size);
}

void resetWritePos() {
writepos_m = 0;
}
void resetReadPos() {
readpos_m = 0;
}

~Archive() = default;

private:
//! write position for serialization
size_t writepos_m;
size_type writepos_m;
//! read position for deserialization
size_t readpos_m;
size_type readpos_m;
//! serialized data
buffer_type buffer_m;
};
Expand Down
70 changes: 44 additions & 26 deletions src/Communicate/Archive.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//
// Class Ippl
// Class Archive
// Class to (de-)serialize in MPI communication.
//
// Copyright (c) 2020, Matthias Frey, Paul Scherrer Institut, Villigen PSI, Switzerland
Expand All @@ -23,76 +23,94 @@ namespace ippl {
namespace detail {

template <class... Properties>
Archive<Properties...>::Archive(int size)
Archive<Properties...>::Archive(size_type size)
: writepos_m(0)
, readpos_m(0)
, buffer_m("buffer", size)
{ }


template <class... Properties>
template <typename T>
void Archive<Properties...>::operator<<(const Kokkos::View<T*>& view) {
void Archive<Properties...>::serialize(const Kokkos::View<T*>& view,
size_type nsends) {
size_t size = sizeof(T);
Kokkos::resize(buffer_m, buffer_m.size() + size * view.size());
Kokkos::parallel_for(
"Archive::serialize()", view.extent(0),
KOKKOS_CLASS_LAMBDA(const size_t i) {
"Archive::serialize()", nsends,
KOKKOS_CLASS_LAMBDA(const size_type i) {
std::memcpy(buffer_m.data() + i * size + writepos_m,
view.data() + i,
size);
});
writepos_m += size * view.size();
Kokkos::fence();
writepos_m += size * nsends;
}


template <class... Properties>
template <typename T, unsigned Dim>
void Archive<Properties...>::operator<<(const Kokkos::View<Vector<T, Dim>*>& view) {
void Archive<Properties...>::serialize(const Kokkos::View<Vector<T, Dim>*>& view,
size_type nsends) {
size_t size = sizeof(T);
Kokkos::resize(buffer_m, buffer_m.size() + Dim * size * view.size());
using mdrange_t = Kokkos::MDRangePolicy<Kokkos::Rank<2>>;
// Default index type for range policies is int64,
// so we have to explicitly specify size_type (uint64)
using mdrange_t = Kokkos::MDRangePolicy<Kokkos::Rank<2>,
Kokkos::IndexType<size_type>>;
Kokkos::parallel_for(
"Archive::serialize()",
mdrange_t({0, 0}, {(long int)view.extent(0), Dim}),
KOKKOS_CLASS_LAMBDA(const size_t i, const size_t d) {
// The constructor for Kokkos range policies always
// expects int64 regardless of index type provided
// by template parameters, so the typecast is necessary
// to avoid compiler warnings
mdrange_t({0, 0}, {(long int)nsends, Dim}),
KOKKOS_CLASS_LAMBDA(const size_type i, const size_t d) {
std::memcpy(buffer_m.data() + (Dim * i + d) * size + writepos_m,
&(*(view.data() + i))[d],
size);
});
writepos_m += Dim * size * view.size();
Kokkos::fence();
writepos_m += Dim * size * nsends;
}


template <class... Properties>
template <typename T>
void Archive<Properties...>::operator>>(Kokkos::View<T*>& view) {
void Archive<Properties...>::deserialize(Kokkos::View<T*>& view,
size_type nrecvs) {
size_t size = sizeof(T);
if(nrecvs > view.extent(0)) {
Kokkos::realloc(view, nrecvs);
}
Kokkos::parallel_for(
"Archive::deserialize()", view.extent(0),
KOKKOS_CLASS_LAMBDA(const size_t i) {
"Archive::deserialize()", nrecvs,
KOKKOS_CLASS_LAMBDA(const size_type i) {
std::memcpy(view.data() + i,
buffer_m.data() + i * size + readpos_m,
size);
});
readpos_m += size * view.size();
// Wait for deserialization kernel to complete
// (as with serialization kernels)
Kokkos::fence();
readpos_m += size * nrecvs;
}


template <class... Properties>
template <typename T, unsigned Dim>
void Archive<Properties...>::operator>>(Kokkos::View<Vector<T, Dim>*>& view) {
void Archive<Properties...>::deserialize(Kokkos::View<Vector<T, Dim>*>& view,
size_type nrecvs) {
size_t size = sizeof(T);
using mdrange_t = Kokkos::MDRangePolicy<Kokkos::Rank<2>>;
if(nrecvs > view.extent(0)) {
Kokkos::realloc(view, nrecvs);
}
using mdrange_t = Kokkos::MDRangePolicy<Kokkos::Rank<2>,
Kokkos::IndexType<size_type>>;
Kokkos::parallel_for(
"Archive::deserialize()",
mdrange_t({0, 0}, {(long int)view.extent(0), Dim}),
KOKKOS_CLASS_LAMBDA(const size_t i, const size_t d) {
mdrange_t({0, 0}, {(long int)nrecvs, Dim}),
KOKKOS_CLASS_LAMBDA(const size_type i, const size_t d) {
std::memcpy(&(*(view.data() + i))[d],
buffer_m.data() + (Dim * i + d) * size + readpos_m,
size);
});
readpos_m += Dim * size * view.size();
Kokkos::fence();
readpos_m += Dim * size * nrecvs;
}
}
}
52 changes: 52 additions & 0 deletions src/Communicate/Buffers.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
//
// Buffers.cpp
// Interface for globally accessible buffer factory for communication
//
// Data sent between MPI ranks has to be stored in a buffer for sending and receiving.
// To reduce the number of times memory has to be allocated and freed, the buffer
// factory interface allows buffers to be reused. This is especially relevant on
// GPUs, as Cuda allocation calls are expensive. To avoid reallocating the buffers
// in the case that the amount of data to be exchanged increases, when a new buffer
// is created, an amount of memory greater than the requested size is allocated
// for the new buffer. The factor by which memory is overallocated is determined by
// a data member in Communicate, which can be set and queried at runtime. Only new
// buffers are overallocated. If a buffer is requested with the same ID as a buffer
// that has been previously allocated, the same buffer will be used. If the requested
// size exceeds the buffer size, that buffer will be resized to have exactly
// the requested size.
//
// Currently, the buffer factory is used for application of periodic boundary
// conditions; halo cell exchange along faces, edges, and vertices; as well as
// exchanging particle data between ranks.
//
// Copyright (c) 2021 Paul Scherrer Institut, Villigen PSI, Switzerland
// All rights reserved
//
// This file is part of IPPL.
//
// IPPL is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// You should have received a copy of the GNU General Public License
// along with IPPL. If not, see <https://www.gnu.org/licenses/>.
//

#include "Communicate.h"

namespace ippl {

void Communicate::setDefaultOverallocation(int factor) {
defaultOveralloc_m = factor;
}

void Communicate::deleteBuffer(int id) {
buffers_m.erase(id);
}

void Communicate::deleteAllBuffers() {
buffers_m.clear();
}

}
58 changes: 58 additions & 0 deletions src/Communicate/Buffers.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
//
// Buffers.hpp
// Interface for globally accessible buffer factory for communication
//
// Data sent between MPI ranks has to be stored in a buffer for sending and receiving.
// To reduce the number of times memory has to be allocated and freed, the buffer
// factory interface allows buffers to be reused. This is especially relevant on
// GPUs, as Cuda allocation calls are expensive. To avoid reallocating the buffers
// in the case that the amount of data to be exchanged increases, when a new buffer
// is created, an amount of memory greater than the requested size is allocated
// for the new buffer. The factor by which memory is overallocated is determined by
// a data member in Communicate, which can be set and queried at runtime. Only new
// buffers are overallocated. If a buffer is requested with the same ID as a buffer
// that has been previously allocated, the same buffer will be used. If the requested
// size exceeds the buffer size, that buffer will be resized to have exactly
// the requested size.
//
// Currently, the buffer factory is used for application of periodic boundary
// conditions; halo cell exchange along faces, edges, and vertices; as well as
// exchanging particle data between ranks.
//
// Copyright (c) 2021 Paul Scherrer Institut, Villigen PSI, Switzerland
// All rights reserved
//
// This file is part of IPPL.
//
// IPPL is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// You should have received a copy of the GNU General Public License
// along with IPPL. If not, see <https://www.gnu.org/licenses/>.
//

namespace ippl {

template <typename T>
Communicate::buffer_type Communicate::getBuffer(int id,
size_type size, int overallocation) {
size *= sizeof(T);
#if __cplusplus > 201703L
if (buffers_m.contains(id)) {
#else
if (buffers_m.find(id) != buffers_m.end()) {
#endif
buffer_type buf = buffers_m[id];
if (buf->getBufferSize() < size) {
buf->reallocBuffer(size);
}
return buf;
}
buffers_m[id] = std::make_shared<archive_type>(size *
std::max(overallocation, defaultOveralloc_m));
return buffers_m[id];
}

}
1 change: 1 addition & 0 deletions src/Communicate/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
set (_SRCS
Communicate.cpp
Buffers.cpp
)

set (_HDRS
Expand Down
12 changes: 12 additions & 0 deletions src/Communicate/Communicate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,16 @@ namespace ippl {
Communicate::Communicate(const MPI_Comm& comm)
: boost::mpi::communicator(comm, kind_type::comm_duplicate)
{}

void Communicate::irecv(int src, int tag,
archive_type& ar, MPI_Request& request, size_type msize)
{
if (msize > INT_MAX) {
std::cerr << "Message size exceeds range of int" << std::endl;
std::abort();
}
MPI_Irecv(ar.getBuffer(), msize,
MPI_BYTE, src, tag, *this, &request);
}

}
Loading

0 comments on commit 9a962e7

Please sign in to comment.