diff --git a/src/atlas/array/ArrayView.h b/src/atlas/array/ArrayView.h index 51ecfecf2..f94551b15 100644 --- a/src/atlas/array/ArrayView.h +++ b/src/atlas/array/ArrayView.h @@ -26,6 +26,7 @@ namespace array { extern template class ArrayView; #define EXPLICIT_TEMPLATE_DECLARATION( RANK ) \ + EXPLICIT_TEMPLATE_DECLARATION_TYPE_RANK( char, RANK ); \ EXPLICIT_TEMPLATE_DECLARATION_TYPE_RANK( int, RANK ); \ EXPLICIT_TEMPLATE_DECLARATION_TYPE_RANK( long, RANK ); \ EXPLICIT_TEMPLATE_DECLARATION_TYPE_RANK( float, RANK ); \ diff --git a/src/atlas/array/DataType.h b/src/atlas/array/DataType.h index 5666aa7d8..efa945897 100644 --- a/src/atlas/array/DataType.h +++ b/src/atlas/array/DataType.h @@ -21,6 +21,7 @@ namespace array { class DataType { public: typedef long kind_t; + static const kind_t KIND_INT8 = -1; static const kind_t KIND_INT32 = -4; static const kind_t KIND_INT64 = -8; static const kind_t KIND_REAL32 = 4; @@ -30,6 +31,7 @@ class DataType { template static DataType create(); + static DataType int8() { return DataType( KIND_INT8 ); } static DataType int32() { return DataType( KIND_INT32 ); } static DataType int64() { return DataType( KIND_INT64 ); } static DataType real32() { return DataType( KIND_REAL32 ); } @@ -51,6 +53,7 @@ class DataType { static bool kind_valid( kind_t ); private: + static std::string int8_str() { return "int8"; } static std::string int32_str() { return "int32"; } static std::string int64_str() { return "int64"; } static std::string real32_str() { return "real32"; } @@ -79,6 +82,14 @@ class DataType { kind_t kind_; }; +template <> +inline std::string DataType::str() { + return int8_str(); +} +template <> +inline std::string DataType::str() { + return int8_str(); +} template <> inline std::string DataType::str() { return int32_str(); @@ -140,6 +151,14 @@ inline std::string DataType::str( const double& ) { return real64_str(); } template <> +inline DataType::kind_t DataType::kind() { + return KIND_INT8; +} +template <> +inline DataType::kind_t DataType::kind() { + return KIND_INT8; +} +template <> inline DataType::kind_t DataType::kind() { return KIND_INT32; } @@ -201,7 +220,9 @@ inline DataType::kind_t DataType::kind( const double& ) { } inline DataType::kind_t DataType::str_to_kind( const std::string& datatype ) { - if ( datatype == "int32" ) + if ( datatype == "int8" ) + return KIND_INT8; + else if ( datatype == "int32" ) return KIND_INT32; else if ( datatype == "int64" ) return KIND_INT64; @@ -217,6 +238,8 @@ inline DataType::kind_t DataType::str_to_kind( const std::string& datatype ) { } inline std::string DataType::kind_to_str( kind_t kind ) { switch ( kind ) { + case KIND_INT8: + return int8_str(); case KIND_INT32: return int32_str(); case KIND_INT64: @@ -233,6 +256,7 @@ inline std::string DataType::kind_to_str( kind_t kind ) { } inline bool DataType::kind_valid( kind_t kind ) { switch ( kind ) { + case KIND_INT8: case KIND_INT32: case KIND_INT64: case KIND_UINT64: diff --git a/src/atlas/array/MakeView.h b/src/atlas/array/MakeView.h index d2892be9f..7fc3ff1ee 100644 --- a/src/atlas/array/MakeView.h +++ b/src/atlas/array/MakeView.h @@ -14,6 +14,7 @@ #include "atlas/array/IndexView.h" #include "atlas/array/LocalView.h" #include "atlas/array_fwd.h" +#include "atlas/util/Config.h" namespace atlas { namespace array { @@ -25,6 +26,7 @@ extern template IndexView make_indexview( Array& extern template IndexView make_indexview( const Array& ); extern template IndexView make_indexview( const Array& ); + #define EXPLICIT_TEMPLATE_DECLARATION_TYPE_RANK( TYPE, RANK ) \ extern template ArrayView make_view( Array& ); \ extern template ArrayView make_view( Array& ); \ @@ -46,6 +48,7 @@ extern template IndexView make_indexview( const Array& #define EXPLICIT_TEMPLATE_DECLARATION( RANK ) \ + EXPLICIT_TEMPLATE_DECLARATION_TYPE_RANK( char, RANK ) \ EXPLICIT_TEMPLATE_DECLARATION_TYPE_RANK( int, RANK ) \ EXPLICIT_TEMPLATE_DECLARATION_TYPE_RANK( long, RANK ) \ EXPLICIT_TEMPLATE_DECLARATION_TYPE_RANK( float, RANK ) \ diff --git a/src/atlas/array/native/NativeArray.cc b/src/atlas/array/native/NativeArray.cc index bd4b6f117..9ae1b6417 100644 --- a/src/atlas/array/native/NativeArray.cc +++ b/src/atlas/array/native/NativeArray.cc @@ -68,6 +68,8 @@ Array* Array::create( DataType datatype, const ArrayShape& shape ) { return new ArrayT( shape ); case DataType::KIND_REAL32: return new ArrayT( shape ); + case DataType::KIND_INT8: + return new ArrayT( shape ); case DataType::KIND_INT32: return new ArrayT( shape ); case DataType::KIND_INT64: diff --git a/src/atlas/array/native/NativeArrayView.cc b/src/atlas/array/native/NativeArrayView.cc index 8c7d0e9f1..de024cf44 100644 --- a/src/atlas/array/native/NativeArrayView.cc +++ b/src/atlas/array/native/NativeArrayView.cc @@ -59,6 +59,8 @@ void ArrayView::dump( std::ostream& os ) const { namespace atlas { namespace array { #define EXPLICIT_TEMPLATE_INSTANTIATION( Rank ) \ + template class ArrayView; \ + template class ArrayView; \ template class ArrayView; \ template class ArrayView; \ template class ArrayView; \ @@ -69,10 +71,12 @@ namespace array { template class ArrayView; \ template class ArrayView; \ template class ArrayView; \ + template void ArrayView::assign( char const& ); \ template void ArrayView::assign( int const& ); \ template void ArrayView::assign( long const& ); \ template void ArrayView::assign( float const& ); \ template void ArrayView::assign( double const& ); \ + template void ArrayView::assign( std::initializer_list const& ); \ template void ArrayView::assign( std::initializer_list const& ); \ template void ArrayView::assign( std::initializer_list const& ); \ template void ArrayView::assign( std::initializer_list const& ); \ diff --git a/src/atlas/array/native/NativeMakeView.cc b/src/atlas/array/native/NativeMakeView.cc index b4c137e7b..2aac613f9 100644 --- a/src/atlas/array/native/NativeMakeView.cc +++ b/src/atlas/array/native/NativeMakeView.cc @@ -118,6 +118,7 @@ namespace array { template ArrayView make_device_view( const Array& ); #define EXPLICIT_TEMPLATE_INSTATIATION( RANK ) \ + EXPLICIT_TEMPLATE_INSTANTIATION_TYPE_RANK( char, RANK ) \ EXPLICIT_TEMPLATE_INSTANTIATION_TYPE_RANK( int, RANK ) \ EXPLICIT_TEMPLATE_INSTANTIATION_TYPE_RANK( long, RANK ) \ EXPLICIT_TEMPLATE_INSTANTIATION_TYPE_RANK( float, RANK ) \ diff --git a/src/atlas/functionspace/detail/StructuredColumns.cc b/src/atlas/functionspace/detail/StructuredColumns.cc index 12d037b23..ace85b4d7 100644 --- a/src/atlas/functionspace/detail/StructuredColumns.cc +++ b/src/atlas/functionspace/detail/StructuredColumns.cc @@ -492,7 +492,8 @@ Field StructuredColumns::createField( const Field& other, const eckit::Configura // ---------------------------------------------------------------------------- void StructuredColumns::gather( const FieldSet& local_fieldset, FieldSet& global_fieldset ) const { ATLAS_ASSERT( local_fieldset.size() == global_fieldset.size() ); - + ATLAS_TRACE_SCOPE ("StructuredColumns::gather") + { for ( idx_t f = 0; f < local_fieldset.size(); ++f ) { const Field& loc = local_fieldset[f]; Field& glb = global_fieldset[f]; @@ -524,6 +525,7 @@ void StructuredColumns::gather( const FieldSet& local_fieldset, FieldSet& global throw_Exception( "datatype not supported", Here() ); } } + } } // ---------------------------------------------------------------------------- diff --git a/src/atlas/grid/Distribution.h b/src/atlas/grid/Distribution.h index 7a91230d8..46f5ee617 100644 --- a/src/atlas/grid/Distribution.h +++ b/src/atlas/grid/Distribution.h @@ -64,6 +64,11 @@ class Distribution : DOXYGEN_HIDE( public util::ObjectHandle ) const std::string& type() const; + gidx_t size () const + { + return get ()->size (); + } + friend std::ostream& operator<<( std::ostream& os, const Distribution& distribution ); }; diff --git a/src/atlas/grid/detail/distribution/DistributionImpl.cc b/src/atlas/grid/detail/distribution/DistributionImpl.cc index d6d85e1a2..e2e1912ae 100644 --- a/src/atlas/grid/detail/distribution/DistributionImpl.cc +++ b/src/atlas/grid/detail/distribution/DistributionImpl.cc @@ -40,9 +40,10 @@ DistributionImpl::DistributionImpl( const Grid& grid ) : nb_pts_( nb_partitions_, grid.size() ), max_pts_( grid.size() ), min_pts_( grid.size() ), - type_( distribution_type( nb_partitions_ ) ) {} + type_( distribution_type( nb_partitions_ ) ), + size_( grid.size ()) {} -DistributionImpl::DistributionImpl( const Grid& grid, const Partitioner& partitioner ) : part_( grid.size() ) { +DistributionImpl::DistributionImpl( const Grid& grid, const Partitioner& partitioner ) : part_( grid.size() ), size_ (grid.size ()) { partitioner.partition( grid, part_.data() ); nb_partitions_ = partitioner.nb_partitions(); @@ -99,6 +100,7 @@ DistributionImpl::DistributionImpl( int nb_partitions, idx_t npts, int part[], i max_pts_ = *std::max_element( nb_pts_.begin(), nb_pts_.end() ); min_pts_ = *std::min_element( nb_pts_.begin(), nb_pts_.end() ); type_ = distribution_type( nb_partitions_ ); + size_ = npts; } DistributionImpl::DistributionImpl( int nb_partitions, partition_t&& part ) : @@ -123,6 +125,7 @@ DistributionImpl::DistributionImpl( int nb_partitions, partition_t&& part ) : max_pts_ = *std::max_element( nb_pts_.begin(), nb_pts_.end() ); min_pts_ = *std::min_element( nb_pts_.begin(), nb_pts_.end() ); type_ = distribution_type( nb_partitions_ ); + size_ = size; } DistributionImpl::~DistributionImpl() = default; diff --git a/src/atlas/grid/detail/distribution/DistributionImpl.h b/src/atlas/grid/detail/distribution/DistributionImpl.h index 187b13e68..919bd8d64 100644 --- a/src/atlas/grid/detail/distribution/DistributionImpl.h +++ b/src/atlas/grid/detail/distribution/DistributionImpl.h @@ -59,6 +59,8 @@ class DistributionImpl : public util::Object { idx_t max_pts() const { return max_pts_; } idx_t min_pts() const { return min_pts_; } + gidx_t size () const { return size_; } + const std::string& type() const { return type_; } void print( std::ostream& ) const; @@ -70,6 +72,7 @@ class DistributionImpl : public util::Object { idx_t max_pts_; idx_t min_pts_; std::string type_; + gidx_t size_; }; extern "C" { diff --git a/src/atlas/parallel/GatherScatter.h b/src/atlas/parallel/GatherScatter.h index d39519a91..2084fcebd 100644 --- a/src/atlas/parallel/GatherScatter.h +++ b/src/atlas/parallel/GatherScatter.h @@ -227,15 +227,21 @@ void GatherScatter::gather( parallel::Field lfields[], parallel /// Pack - pack_send_buffer( lfields[jfield], locmap_, loc_buffer.data() ); + ATLAS_TRACE_SCOPE ("Pack") + { + pack_send_buffer( lfields[jfield], locmap_, loc_buffer.data() ); + }; /// Gather ATLAS_TRACE_MPI( GATHER ) { mpi::comm().gatherv( loc_buffer, glb_buffer, glb_counts, glb_displs, root ); } + ATLAS_TRACE_SCOPE ("Unpack") + { /// Unpack if ( myproc == root ) unpack_recv_buffer( glbmap_, glb_buffer.data(), gfields[jfield] ); + } } } diff --git a/src/atlas/util/vector.h b/src/atlas/util/vector.h index 26bdb916e..0e7f70122 100644 --- a/src/atlas/util/vector.h +++ b/src/atlas/util/vector.h @@ -98,13 +98,18 @@ class vector { T* data() { return data_; } - idx_t size() const { return size_; } + size_t size() const { return size_; } void assign( idx_t n, const value_type& value ) { resize( n ); omp::fill( begin(), begin() + n, value ); } + void assign( size_t n, const value_type& value ) { + resize( n ); + omp::fill( begin(), begin() + n, value ); + } + template void assign( const Iter& first, const Iter& last ) { size_t size = std::distance( first, last ); @@ -121,11 +126,11 @@ class vector { } template void resize( size_t size ) { - if ( static_cast( size ) > 0 ) { + if ( size > 0 ) { if ( capacity_ == 0 ) { reserve( size ); } - if ( static_cast( size ) > capacity_ ) { + if ( size > capacity_ ) { ATLAS_NOTIMPLEMENTED; } size_ = size; @@ -140,8 +145,8 @@ class vector { private: value_type* data_{nullptr}; - idx_t size_{0}; - idx_t capacity_{0}; + size_t size_{0}; + size_t capacity_{0}; }; } // namespace atlas diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index 25f4e3b73..5bad92b89 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt @@ -114,6 +114,7 @@ add_subdirectory( projection ) add_subdirectory( grid ) add_subdirectory( mesh ) add_subdirectory( functionspace ) +add_subdirectory( gatherscatter ) add_subdirectory( io ) add_subdirectory( numerics ) add_subdirectory( trans ) diff --git a/src/tests/gatherscatter/CMakeLists.txt b/src/tests/gatherscatter/CMakeLists.txt new file mode 100644 index 000000000..a8c482033 --- /dev/null +++ b/src/tests/gatherscatter/CMakeLists.txt @@ -0,0 +1,20 @@ +# (C) Copyright 2013 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation nor +# does it submit to any jurisdiction. + +ecbuild_add_test( TARGET test_gatherscatter + SOURCES test_gatherscatter.cc ioFieldDesc.cc GatherScatter.cc + LIBS atlas + ENVIRONMENT ${ATLAS_TEST_ENVIRONMENT} +) + +ecbuild_add_test( TARGET test_arrayViewHelpers + SOURCES test_arrayViewHelpers.cc ioFieldDesc.cc + LIBS atlas + ENVIRONMENT ${ATLAS_TEST_ENVIRONMENT} +) + diff --git a/src/tests/gatherscatter/GatherScatter.cc b/src/tests/gatherscatter/GatherScatter.cc new file mode 100644 index 000000000..d823345c6 --- /dev/null +++ b/src/tests/gatherscatter/GatherScatter.cc @@ -0,0 +1,350 @@ +#include "GatherScatter.h" + +#include "atlas/array.h" +#include "atlas/field.h" +#include "atlas/grid.h" +#include "atlas/parallel/mpi/mpi.h" +#include "atlas/runtime/Trace.h" + +#include + +namespace +{ + +template +T reorder (const T & vec, const I & ord) +{ + T v; + v.reserve (ord.size ()); + for (typename I::value_type i = 0; i < ord.size (); i++) + v.push_back (vec[ord[i]]); + return v; +} + +template +void integrate (T & v) +{ + v[0].off = 0; + for (size_t i = 1; i < v.size (); i++) + v[i].off = v[i-1].off + v[i-1].len; +} + +template +V grep (const V v, F f) +{ + V g; + std::copy_if (v.begin (), v.end (), std::back_inserter (g), f); + return g; +} + +template +std::vector range (N n) +{ + std::vector r (n); + std::iota (r.begin (), r.end (), 0); + return r; +} + +inline void unpack (const byte & a, byte & b) { b = a; }; +inline void pack (byte & a, const byte & b) { a = b; }; + +}; + +GatherScatter::GatherScatter (const atlas::grid::Distribution & _dist) : dist (_dist) +{ +ATLAS_TRACE_SCOPE ("GatherScatter::GatherScatter") +{ + + nprc = dist.nb_partitions (); + + std::vector count (nprc); + std::vector ind (dist.size ()); + + for (atlas::gidx_t i = 0; i < dist.size (); i++) + ind[i] = count[dist.partition (i)]++; + + max = *std::max_element (count.begin (), count.end ()); + + _prcloc2glo.resize (max * nprc, std::numeric_limits::min ()); + _glo2prcloc.resize (dist.size ()); + + for (atlas::gidx_t i = 0; i < dist.size (); i++) + { + _prcloc2glo[max * dist.partition (i) + ind[i]] = i; + _glo2prcloc[i].loc = ind[i]; + _glo2prcloc[i].prc = dist.partition (i); + } + +} +} + +void GatherScatter::reOrderFields (ioFieldDesc_v & floc, ioFieldDesc_v & fglo) const +{ + ATLAS_ASSERT (floc.size () == fglo.size ()); + atlas::idx_t nfld = fglo.size (); + + // Sort fields by owner + + std::vector isort (nfld); + + std::iota (std::begin (isort), std::end (isort), 0); + std::sort (std::begin (isort), std::end (isort), + [&fglo] (atlas::idx_t a, atlas::idx_t b) + { return fglo[a].owner () < fglo[b].owner (); }); + + floc = reorder (floc, isort); + fglo = reorder (fglo, isort); + + for (int jfld = 0; jfld < nfld; jfld++) + floc[jfld].owner () = fglo[jfld].owner (); + +} + +GatherScatter::fldprc_t GatherScatter::computeTLoc (const ioFieldDesc_v & floc) const +{ + fldprc_t tloc; + auto & comm = eckit::mpi::comm (); + atlas::idx_t nprc = comm.size (); + atlas::idx_t nfld = floc.size (); + + tloc.fld.resize (nfld + 1); + tloc.prc.resize (nprc + 1); + + for (atlas::idx_t jfld = 0; jfld < nfld; jfld++) + { + atlas::idx_t owner = floc[jfld].owner (); + tloc.fld[jfld].len = floc[jfld].size (); + tloc.prc[owner].len += floc[jfld].size (); + } + + integrate (tloc.fld); + integrate (tloc.prc); + + return tloc; +} + +GatherScatter::fldprc_t GatherScatter::computeTGlo (const ioFieldDesc_v & fglo) const +{ + fldprc_t tglo; + auto & comm = eckit::mpi::comm (); + atlas::idx_t nprc = comm.size (); + atlas::idx_t lprc = comm.rank (); + atlas::idx_t nfld = fglo.size (); + + tglo.prc.resize (nprc + 1); + tglo.fld.resize (nfld + 1); + + for (atlas::idx_t jfld = 0; jfld < nfld; jfld++) + if (lprc == fglo[jfld].owner ()) + tglo.fld[jfld].len = fglo[jfld].dlength (); + + integrate (tglo.fld); + + for (atlas::idx_t iprc = 0; iprc < nprc; iprc++) + tglo.prc[iprc].len = dist.nb_pts ()[iprc] * tglo.fld.back ().off; + + integrate (tglo.prc); + + return tglo; +} + + +template +void GatherScatter::processLocBuffer (ioFieldDesc_v & floc, const fldprc_t & tloc, + byte_v & buf_loc, A a) const +{ + atlas::idx_t nfld = floc.size (); + + ATLAS_TRACE_SCOPE ("GatherScatter::processLocBuffer") + { +#pragma omp parallel for + for (atlas::idx_t jfld = 0; jfld < nfld; jfld++) + { + auto & f = floc[jfld]; + byte * buffer = &buf_loc[tloc.fld[jfld].off]; + const size_t dlength = f.dlength (); + const size_t ngptot = f.ngptot (); + const size_t nproma = f.nproma (); + const size_t ngpblks = f.ngpblks (); + + for (atlas::idx_t jblk = 0; jblk < ngpblks; jblk++) + { + const int jidia = 0, jfdia = std::min (nproma, ngptot - jblk * nproma); + for (atlas::idx_t jlon = jidia; jlon < jfdia; jlon++) + { + atlas::idx_t k = (jlon+jblk*nproma)*dlength; + for (int j = 0; j < dlength; j++) + a (buffer[k+j], f (jblk, jlon, j)); + } + } + } + } + +} + +template +void GatherScatter::processGloBuffer (ioFieldDesc_v & fglo, const fldprc_t & tglo, + byte_v & buf_glo, A a) const +{ + auto & comm = eckit::mpi::comm (); + atlas::idx_t nfld = fglo.size (); + atlas::idx_t nprc = comm.size (); + + ATLAS_TRACE_SCOPE ("GatherScatter::processGloBuffer") + { + std::vector prcs = grep (range (nprc), + [&tglo] (atlas::idx_t i) { return tglo.prc[i].len > 0; }); + std::vector flds = grep (range (nfld), + [&tglo] (atlas::idx_t i) { return tglo.fld[i].len > 0; }); + +#ifdef UNDEF + for (atlas::idx_t iprc = 0; iprc < nprc; iprc++) + if (tglo.prc[iprc].len > 0) + { + for (atlas::idx_t jfld = 0; jfld < nfld; jfld++) + { + if (tglo.fld[jfld].len > 0) +#endif + + for (int ii = 0; ii < prcs.size (); ii++) + for (int jj = 0; jj < flds.size (); jj++) + { + + const atlas::idx_t iprc = prcs[ii]; + const atlas::idx_t jfld = flds[jj]; + const atlas::idx_t ngptot = dist.nb_pts ()[iprc]; + const size_t off = tglo.prc[iprc].off + ngptot * tglo.fld[jfld].off; + auto & f = fglo[jfld]; + const size_t len = tglo.fld[jfld].len; +#pragma omp parallel for + for (atlas::idx_t jloc = 0; jloc < ngptot; jloc++) + for (int j = 0; j < len; j++) + { + size_t k = jloc * tglo.fld[jfld].len + j; + a (buf_glo[off+k], f (0, prcloc2glo (iprc, jloc), j)); + } + + } + + } +} + +std::vector GatherScatter::postRecv (byte_v & buf, const fldprc_t & t) const +{ +ATLAS_TRACE_SCOPE ("GatherScatter::postRecv") +{ + std::vector rqr; + auto & comm = eckit::mpi::comm (); + atlas::idx_t nprc = comm.size (); + + for (atlas::idx_t iprc = 0; iprc < nprc; iprc++) + if (t.prc[iprc].len > 0) + rqr.push_back (comm.iReceive (&buf[t.prc[iprc].off], t.prc[iprc].len, iprc, 100)); + + return rqr; +} +} + +std::vector GatherScatter::postSend (const byte_v & buf, const fldprc_t & t) const +{ +ATLAS_TRACE_SCOPE ("GatherScatter::postSend") +{ + std::vector rqs; + auto & comm = eckit::mpi::comm (); + atlas::idx_t nprc = comm.size (); + + for (atlas::idx_t iprc = 0; iprc < nprc; iprc++) + if (t.prc[iprc].len > 0) + rqs.push_back (comm.iSend (&buf[t.prc[iprc].off], t.prc[iprc].len, iprc, 100)); + + return rqs; +} +} + +void GatherScatter::gather (const ioFieldDesc_v & _floc, ioFieldDesc_v & _fglo) const +{ + +ioFieldDesc_v floc = _floc; +ioFieldDesc_v fglo = _fglo; + +ATLAS_TRACE_SCOPE ("GatherScatter::gather") +{ + auto & comm = eckit::mpi::comm (); + + reOrderFields (floc, fglo); + + fldprc_t tglo = computeTGlo (fglo); + byte_v buf_glo (tglo.prc.back ().off); + std::vector rqr = postRecv (buf_glo, tglo); + + fldprc_t tloc = computeTLoc (floc); + byte_v buf_loc (tloc.fld.back ().off); + processLocBuffer (floc, tloc, buf_loc, pack); + + ATLAS_TRACE_SCOPE ("barrier") + { + comm.barrier (); + } + + std::vector rqs = postSend (buf_loc, tloc); + + ATLAS_TRACE_SCOPE ("waitRecv") + { + for (auto & r : rqr) + comm.wait (r); + } + + processGloBuffer (fglo, tglo, buf_glo, unpack); + + ATLAS_TRACE_SCOPE ("waitSend") + { + for (auto & r : rqs) + comm.wait (r); + }; + +} +} + +void GatherScatter::scatter (const ioFieldDesc_v & _fglo, ioFieldDesc_v & _floc) const +{ + +ioFieldDesc_v fglo = _fglo; +ioFieldDesc_v floc = _floc; + +ATLAS_TRACE_SCOPE ("GatherScatter::scatter") +{ + auto & comm = eckit::mpi::comm (); + + reOrderFields (floc, fglo); + + fldprc_t tloc = computeTLoc (floc); + byte_v buf_loc (tloc.fld.back ().off); + std::vector rqr = postRecv (buf_loc, tloc); + + fldprc_t tglo = computeTGlo (fglo); + byte_v buf_glo (tglo.prc.back ().off); + processGloBuffer (fglo, tglo, buf_glo, pack); + + ATLAS_TRACE_SCOPE ("barrier") + { + comm.barrier (); + } + + std::vector rqs = postSend (buf_glo, tglo); + + ATLAS_TRACE_SCOPE ("waitRecv") + { + for (auto & r : rqr) + comm.wait (r); + } + + processLocBuffer (floc, tloc, buf_loc, unpack); + + ATLAS_TRACE_SCOPE ("waitSend") + { + for (auto & r : rqs) + comm.wait (r); + } +} +} + diff --git a/src/tests/gatherscatter/GatherScatter.h b/src/tests/gatherscatter/GatherScatter.h new file mode 100644 index 000000000..f01d3a571 --- /dev/null +++ b/src/tests/gatherscatter/GatherScatter.h @@ -0,0 +1,78 @@ +#pragma once + + +#include "ioFieldDesc.h" +#include "atlas/grid.h" +#include "atlas/util/vector.h" +#include "atlas/parallel/mpi/mpi.h" + +class GatherScatter +{ +private: + + class locprc_t + { + public: + atlas::idx_t loc = std::numeric_limits::min (); + atlas::idx_t prc = std::numeric_limits::min (); + }; + + class offlen_t + { + public: + atlas::gidx_t off = 0, len = 0; + }; + + using offlen_v = std::vector; + + class fldprc_t + { + public: + offlen_v prc; + offlen_v fld; + }; + + using ioFieldDesc_v = std::vector; + + using byte_v = atlas::vector; + + atlas::idx_t max, nprc; + + std::vector _prcloc2glo; + std::vector _glo2prcloc; + + const atlas::grid::Distribution & dist; + +public: + GatherScatter (const atlas::grid::Distribution & _dist); + + void gather (const ioFieldDesc_v & _floc, ioFieldDesc_v & fglo) const; + void scatter (const ioFieldDesc_v & _fglo, ioFieldDesc_v & floc) const; + +private: + atlas::gidx_t prcloc2glo (atlas::idx_t iprc, atlas::idx_t jloc) const + { + return _prcloc2glo[iprc * max + jloc]; + } + + const locprc_t & glo2prcloc (atlas::gidx_t jglo) const + { + return _glo2prcloc[jglo]; + } + + void reOrderFields (ioFieldDesc_v & floc, ioFieldDesc_v & fglo) const; + fldprc_t computeTLoc (const ioFieldDesc_v & floc) const; + fldprc_t computeTGlo (const ioFieldDesc_v & fglo) const; + + template + void processLocBuffer (ioFieldDesc_v & floc, const fldprc_t & tloc, + byte_v & buf_loc, A a) const; + template + void processGloBuffer (ioFieldDesc_v & fglo, const fldprc_t & tglo, + byte_v & buf_glo, A a) const; + + std::vector postRecv (byte_v & buf, const fldprc_t & t) const; + std::vector postSend (const byte_v & buf, const fldprc_t & t) const; + +}; + diff --git a/src/tests/gatherscatter/arrayViewHelpers.h b/src/tests/gatherscatter/arrayViewHelpers.h new file mode 100644 index 000000000..22c5f6565 --- /dev/null +++ b/src/tests/gatherscatter/arrayViewHelpers.h @@ -0,0 +1,118 @@ + +#pragma once + + +// Drop a dimension of a view (set it to a fixed value); we get a view of rank=Rank-1 + +template +atlas::array::ArrayView +dropDimension (const atlas::array::ArrayView & view, int dim, atlas::idx_t idx) +{ + constexpr int rank = Rank-1; + atlas::array::ArrayShape shape; + atlas::array::ArrayStrides strides; + + if (dim < 0) + dim = dim + Rank; + + for (int i = 0; i < dim; i++) + { + shape.push_back (view.shape (i)); + strides.push_back (view.stride (i)); + } + atlas::idx_t stride_dim = view.stride (dim); + for (int i = dim + 1; i < Rank; i++) + { + shape.push_back (view.shape (i)); + strides.push_back (view.stride (i)); + } + + using nonConstValue = typename std::remove_const::type; + + Value * data = (nonConstValue *) (view.data ()); + + return atlas::array::ArrayView (data + stride_dim * idx, shape, strides); +} + +// Convert a view to a view of bytes (add an extra inner dimension) + +template +atlas::array::ArrayView::value, const byte, byte>::type,Rank+1> +byteView (const atlas::array::ArrayView & view) +{ + using B = typename std::conditional::value, const byte, byte>::type; + constexpr int rank = Rank+1; + atlas::array::ArrayShape shape; + atlas::array::ArrayStrides strides; + + size_t dlen = atlas::array::DataType::create ().size (); + + for (int i = 0; i < Rank; i++) + { + shape.push_back (view.shape (i)); + strides.push_back (view.stride (i) * dlen); + } + + shape.push_back (dlen); + strides.push_back (1); + + B * data = (B *) (view.data ()); + + return atlas::array::ArrayView (data, shape, strides); +} + +// Add an extra outer dummy dimension + +template +atlas::array::ArrayView +addDummyDimension (const atlas::array::ArrayView & view) +{ + constexpr int rank = Rank+1; + atlas::array::ArrayShape shape = {1}; + atlas::array::ArrayStrides strides = {view.size ()}; + + for (int i = 0; i < Rank; i++) + { + shape.push_back (view.shape (i)); + strides.push_back (view.stride (i)); + } + + using nonConstValue = typename std::remove_const::type; + + Value * data = (nonConstValue *) (view.data ()); + + return atlas::array::ArrayView (data, shape, strides); +} + +// Apply a function to view elements + +template +class viewLoopHelper +{ +public: + template + static void apply (atlas::array::ArrayView view, Func func, Index... index) + { + for (int i = 0; i < view.shape (Rank - Count); i++) + viewLoopHelper::apply (view, func, index..., i); + } +}; + +template <> +class viewLoopHelper<0> +{ +public: + template + static void apply (atlas::array::ArrayView view, Func func, Index... index) + { + func (view (index...), index...); + } +}; + + +template +void viewLoop (atlas::array::ArrayView view, Func func) +{ + viewLoopHelper::apply (view, func); +} + diff --git a/src/tests/gatherscatter/ioFieldDesc.cc b/src/tests/gatherscatter/ioFieldDesc.cc new file mode 100644 index 000000000..3c86f40ac --- /dev/null +++ b/src/tests/gatherscatter/ioFieldDesc.cc @@ -0,0 +1,228 @@ +#include "ioFieldDesc.h" +#include "arrayViewHelpers.h" +#include "atlas/runtime/Exception.h" + + +namespace +{ + +// The following templates create a list of byte views of rank 2 from a view of any rank, any type + + +// Non-blocked (1:NGPTOT) + +template +void listOf1DByteView (atlas::array::ArrayView & view, + const std::vector & _ind, + const atlas::Field & f, size_t ngptot, atlas::idx_t gdim, + std::vector & list) +{ + static_assert (Rank > 2, "listOf1DByteView should be called with views having a Rank > 2"); + + std::vector ind = _ind; + ind.push_back (0); + + if (gdim == 0) + { + for (int i = 0; i < view.shape (1); i++) + { + auto v = dropDimension (view, 1, i); + ind.back () = i; + listOf1DByteView (v, ind, f, ngptot, 0, list); + } + } + else + { + for (int i = 0; i < view.shape (0); i++) + { + auto v = dropDimension (view, 0, i); + ind.back () = i; + listOf1DByteView (v, ind, f, ngptot, gdim-1, list); + } + } +} + +template <> +void listOf1DByteView (atlas::array::ArrayView & view, + const std::vector & ind, + const atlas::Field & f, size_t ngptot, atlas::idx_t gdim, + std::vector & list) +{ + auto v = addDummyDimension (view); + list.push_back (ioFieldDesc (v, ind, f, ngptot)); +} + +template +void createListOf1DByteView (atlas::array::ArrayView & view, + const atlas::Field & f, size_t ngptot, atlas::idx_t gdim, + std::vector & list) +{ + auto v = byteView (view); + listOf1DByteView (v, std::vector (), f, ngptot, gdim, list); +} + +// Blocked (1:NPROMA,1:NGPBLKS) + +template +void listOf1DByteViewBlocked + (atlas::array::ArrayView & view, + const std::vector & _ind, + const atlas::Field & f, atlas::idx_t bdim, atlas::idx_t gdim, size_t ngptot, + std::vector & list) +{ + static_assert (Rank > 3, "listOf1DByteViewBlocked should be called with views having a Rank > 3"); + + std::vector ind = _ind; + ind.push_back (0); + + + // First dimension is block + if (bdim == 0) + { + // Grid dimension is second + if (gdim == 1) + { + // Drop dimensions after grid dimension + for (int i = 0; i < view.shape (2); i++) + { + auto v = dropDimension (view, 2, i); + ind.back () = i; + listOf1DByteViewBlocked (v, ind, f, bdim, gdim, ngptot, list); + } + } + else + { + // Remove second dimension + for (int i = 0; i < view.shape (1); i++) + { + auto v = dropDimension (view, 1, i); + ind.back () = i; + listOf1DByteViewBlocked (v, ind, f, bdim, gdim-1, ngptot, list); + } + } + } + else + { + // Remove another leading dimension + for (int i = 0; i < view.shape (0); i++) + { + auto v = dropDimension (view, 0, i); + ind.back () = i; + listOf1DByteViewBlocked (v, ind, f, bdim-1, gdim-1, ngptot, list); + } + } + +} + +template <> +void listOf1DByteViewBlocked + (atlas::array::ArrayView & view, + const std::vector & ind, + const atlas::Field & f, atlas::idx_t bdim, atlas::idx_t gdim, size_t ngptot, + std::vector & list) +{ + list.push_back (ioFieldDesc (view, ind, f, ngptot)); +} + +template +void createListOf1DByteViewBlocked (atlas::array::ArrayView & view, + const atlas::Field & f, atlas::idx_t bdim, atlas::idx_t gdim, + size_t ngptot, std::vector & list) +{ + auto v = byteView (view); + listOf1DByteViewBlocked (v, std::vector (), f, bdim, gdim, ngptot, list); +} + +}; + + +void createIoFieldDescriptorsBlocked + (atlas::Field & f, std::vector & list, atlas::idx_t bdim, atlas::idx_t gdim, size_t ngptot) +{ + int rank = f.rank (); + auto type = f.datatype (); + + if (gdim < 0) + gdim = gdim + rank; + +#define HANDLE_TYPE_RANK(__type,__rank) \ + if (rank == __rank) \ + { \ + auto v = atlas::array::make_view<__type,__rank> (f); \ + createListOf1DByteViewBlocked (v, f, bdim, gdim, ngptot, list); \ + goto done; \ + } + +#define HANDLE_TYPE(__type) \ + if (type.kind () == atlas::array::DataType::create<__type> ().kind ()) \ + { \ + HANDLE_TYPE_RANK (__type, 2); HANDLE_TYPE_RANK (__type, 3); \ + HANDLE_TYPE_RANK (__type, 4); HANDLE_TYPE_RANK (__type, 5); HANDLE_TYPE_RANK (__type, 6); \ + HANDLE_TYPE_RANK (__type, 7); HANDLE_TYPE_RANK (__type, 8); HANDLE_TYPE_RANK (__type, 9); \ + } + + HANDLE_TYPE (long); + HANDLE_TYPE (double); + HANDLE_TYPE (int); + HANDLE_TYPE (float); + +#undef HANDLE_TYPE_RANK +#undef HANDLE_TYPE + + atlas::throw_NotImplemented ("createIoFieldDescriptorsBlocked type/rank", Here ()); + +done: + + return; +} + + +void createIoFieldDescriptors + (atlas::Field & f, std::vector & list, size_t ngptot, atlas::idx_t gdim) +{ + int rank = f.rank (); + auto type = f.datatype (); + + if (gdim < 0) + gdim = gdim + rank; + +#define HANDLE_TYPE_RANK(__type,__rank) \ + if (rank == __rank) \ + { \ + auto v = atlas::array::make_view<__type,__rank> (f); \ + createListOf1DByteView (v, f, ngptot, gdim, list); \ + goto done; \ + } + +#define HANDLE_TYPE(__type) \ + if (type.kind () == atlas::array::DataType::create<__type> ().kind ()) \ + { \ + HANDLE_TYPE_RANK (__type, 1); HANDLE_TYPE_RANK (__type, 2); HANDLE_TYPE_RANK (__type, 3); \ + HANDLE_TYPE_RANK (__type, 4); HANDLE_TYPE_RANK (__type, 5); HANDLE_TYPE_RANK (__type, 6); \ + HANDLE_TYPE_RANK (__type, 7); HANDLE_TYPE_RANK (__type, 8); HANDLE_TYPE_RANK (__type, 9); \ + } + + HANDLE_TYPE (long); + HANDLE_TYPE (double); + HANDLE_TYPE (int); + HANDLE_TYPE (float); + +#undef HANDLE_TYPE_RANK +#undef HANDLE_TYPE + + atlas::throw_NotImplemented ("createIoFieldDescriptors type/rank", Here ()); + +done: + + return; +} + +void createIoFieldDescriptors + (atlas::FieldSet & s, std::vector & list, size_t ngptot, atlas::idx_t gdim) +{ + for (auto & f : s) + createIoFieldDescriptors (f, list, ngptot, gdim); +} + + + diff --git a/src/tests/gatherscatter/ioFieldDesc.h b/src/tests/gatherscatter/ioFieldDesc.h new file mode 100644 index 000000000..845354cd9 --- /dev/null +++ b/src/tests/gatherscatter/ioFieldDesc.h @@ -0,0 +1,104 @@ + +#pragma once + +#include "atlas/field.h" +#include "atlas/array.h" +#include + + +typedef char byte; + +class ioFieldDesc +{ +public: + ioFieldDesc (atlas::array::ArrayView & v, + const std::vector & ind, + const atlas::Field & f, size_t ngptot) + : _v (v), _ind (ind), _f (f), _ngptot (ngptot) + { + _ngpblks = _v.shape (0); + _nproma = _v.shape (1); + _dlength = _v.shape (2); + if (_ngptot == 0) + _ngptot = _ngpblks * _nproma; + _size = _ngptot * _dlength; + } + + + atlas::array::ArrayView & view () + { + return _v; + } + + const std::vector & indices () const + { + return _ind; + } + + const atlas::Field & field () const + { + return _f; + } + + int & owner () + { + return _owner; + } + + int owner () const + { + return _owner; + } + + size_t size () const + { + return _size; + } + + size_t ngpblks () const + { + return _ngpblks; + } + + size_t ngptot () const + { + return _ngptot; + } + + size_t nproma () const + { + return _nproma; + } + + size_t dlength () const + { + return _dlength; + } + + byte & operator () (int jblk, int jlon, int k) + { + return _v (jblk, jlon, k); + } + + byte operator () (int jblk, int jlon, int k) const + { + return _v (jblk, jlon, k); + } + +private: + atlas::array::ArrayView _v; // NGPBKLS, NPROMA, sizeof (element) + const std::vector _ind; + const atlas::Field & _f; + int _owner = 0; + size_t _ngptot; // Total number of elements in a distributed field + size_t _nproma; // Blocking factor; may be equal to NGPTOT + size_t _ngpblks; // Number of blocks + size_t _size; // Size in bytes + size_t _dlength; // Element length +}; + +void createIoFieldDescriptorsBlocked + (atlas::Field & f, std::vector & list, atlas::idx_t bdim, atlas::idx_t gdim = -1, size_t ngptot = 0); + +void createIoFieldDescriptors (atlas::Field & f, std::vector & list, size_t ngptot = 0, atlas::idx_t gdim = -1); +void createIoFieldDescriptors (atlas::FieldSet & s, std::vector & list, size_t ngptot = 0, atlas::idx_t gdim = -1); diff --git a/src/tests/gatherscatter/test_arrayViewHelpers.cc b/src/tests/gatherscatter/test_arrayViewHelpers.cc new file mode 100644 index 000000000..2da9cffbc --- /dev/null +++ b/src/tests/gatherscatter/test_arrayViewHelpers.cc @@ -0,0 +1,39 @@ +#include "ioFieldDesc.h" +#include "arrayViewHelpers.h" + +#include "atlas/field.h" +#include "atlas/array.h" +#include +#include + + +template +void prss (const std::string & t, const V & v) +{ + printf ("--------- %s ---------\n", t.data ()); + for (int i = 0; i < v.rank (); i++) + printf (" %8d > %8d, %8d\n", i, v.shape (i), v.stride (i)); + printf ("\n"); +} + +int main (int argc, char * argv[]) +{ + const int ngpblks = 10, nflevg = 20, nproma = 16; + + const int ngptot = ngpblks * nproma; + + atlas::Field f ("f", atlas::array::DataType::kind (), {ngpblks, nflevg, nproma}); + + auto v = atlas::array::make_view (f); + + viewLoop (v, [] (long & x, int i, int j, int k) { x = 100000 * i + 100 * j + k; }); + + for (int i = 0; i < v.shape (0); i++) + for (int j = 0; j < v.shape (1); j++) + for (int k = 0; k < v.shape (2); k++) + printf (" %10.10d\n", v (i, j, k)); + + return 0; + +} + diff --git a/src/tests/gatherscatter/test_gatherscatter.cc b/src/tests/gatherscatter/test_gatherscatter.cc new file mode 100644 index 000000000..3caeb64fb --- /dev/null +++ b/src/tests/gatherscatter/test_gatherscatter.cc @@ -0,0 +1,572 @@ +/* + * (C) Copyright 2013 ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#include "atlas/array.h" +#include "atlas/field.h" +#include "atlas/grid.h" +#include "atlas/functionspace.h" +#include "atlas/parallel/mpi/mpi.h" +#include "tests/AtlasTestEnvironment.h" +#include "atlas/runtime/Trace.h" + +#include "GatherScatter.h" +#include "ioFieldDesc.h" +#include "arrayViewHelpers.h" + +using namespace eckit; +using namespace atlas::functionspace; +using namespace atlas::util; + +namespace atlas { +namespace test { + +namespace +{ + +void getprcind (const atlas::functionspace::StructuredColumns & fs, const atlas::grid::Distribution & dist, + std::vector & prc, std::vector & ind) +{ + auto & comm = mpi::comm (); + auto & grid = fs.grid (); + prc.resize (grid.size ()); + ind.resize (grid.size ()); + + for (int i = 0; i < grid.size (); i++) + prc[i] = dist.partition (i); + + { + atlas::Field indloc = fs.createField (atlas::util::Config ("name", "ind") + | atlas::util::Config ("owner", 0)); + atlas::Field indglo ("ind", &ind[0], {grid.size ()}); + auto v = array::make_view (indloc); + for (int i = 0; i < fs.sizeOwned (); i++) + v (i) = i; + fs.gather (indloc, indglo); + comm.broadcast (ind, 0); + } +} + +template +void prff (const std::string & name, const atlas::FieldSet & s) +{ + auto & comm = mpi::comm (); + int irank = comm.rank (); + for (int i = 0; i < s.size (); i++) + { + auto v = array::make_view (s[i]); + if (v.size () == 0) + continue; + + char tmp[128]; + sprintf (tmp, "%s.%8.8d.%8.8d.txt", name.c_str (), irank, i); + FILE * fp = fopen (tmp, "w"); + for (int i = 0; i < v.size (); i++) + fprintf (fp, "%8d > %8d\n", i, v (i)); + fclose (fp); + } +} + +template +void prff (const std::string & name, const atlas::Field & f) +{ + atlas::FieldSet s; + s.add (f); + prff (name, f); +} + +}; + +//----------------------------------------------------------------------------- + +CASE( "test_gatherscatter_NPROMAxNFLEVGxNGPBLKS" ) +{ + if (! eckit::Resource ("--NPROMAxNFLEVGxNGPBLKS", false)) + return; + + int nproma = eckit::Resource ("--nproma", 24); + int nflevg = eckit::Resource ("--nflevg", 10); + bool gather (eckit::Resource ("--gather", false)); + bool scatter (eckit::Resource ("--scatter", false)); + bool check (eckit::Resource ("--check", false)); + + atlas::StructuredGrid grid (eckit::Resource ("--grid", "N16")); + + auto & comm = mpi::comm (); + int irank = comm.rank (); + int nproc = comm.size (); + + atlas::grid::Distribution dist (grid, atlas::grid::Partitioner ("equal_regions")); + atlas::functionspace::StructuredColumns fs (grid, dist, atlas::util::Config ("halo", 1) + | atlas::util::Config ("periodic_points", true)); + + int ngptot = fs.sizeOwned (); + int ngpblks = ngptot / nproma; + + if (fs.sizeOwned () % nproma) + ngpblks++; + + std::vector prc, ind; + + if (check) + getprcind (fs, dist, prc, ind); + + atlas::FieldSet sloc; + + using T = long; + + const int ind_bit = 21, ind_off = 0; + const int prc_bit = 21, prc_off = ind_off + ind_bit; + const int lev_bit = 22, lev_off = prc_off + prc_bit; + + if (check) + { + ATLAS_ASSERT (fs.sizeOwned () < (1 << ind_bit)); + ATLAS_ASSERT (nproc < (1 << prc_bit)); + ATLAS_ASSERT (nflevg < (1 << lev_bit)); + } + + using T = long; + + auto func = [check,ind_off,prc_off,lev_off] (int lev, int prc, int ind) + { + long v = 0; + v = (static_cast (lev) << lev_off) + + (static_cast (prc) << prc_off) + + (static_cast (ind) << ind_off); + return v; + }; + + // Distributed field, Fortran dimensions (1:NPROMA,1:NFLEVG,1:NGPBLKS) + atlas::Field floc ("field", atlas::array::DataType::kind (), {ngpblks, nflevg, nproma}); + + // Gather our multi-field, multi-level Atlas field to a set of fields (1:NGPTOTG) + atlas::FieldSet sglo; + + for (int jlev = 0; jlev < nflevg; jlev++) + { + int owner = jlev % nproc; // RR distribution + std::string name = std::string ("#") + std::to_string (jlev); + atlas::Field fglo = atlas::Field (name, atlas::array::DataType::kind (), {irank == owner ? grid.size () : 0}); + fglo.metadata ().set ("owner", owner); + sglo.add (fglo); + } + + // IO descriptors + std::vector dloc; + std::vector dglo; + + createIoFieldDescriptors (sglo, dglo); // Default for grid dimension is inner dimension + createIoFieldDescriptorsBlocked (floc, dloc, 0, 2, fs.sizeOwned ()); // NGPBLKS dimension, NPROMA dimension, NGPTOT + + // Set target processor for all fields + for (auto & d : dglo) + d.field ().metadata ().get ("owner", d.owner ()); + + GatherScatter gs (dist); + + + auto walkLoc = [&floc, ngpblks, nproma, nflevg, ngptot, func, irank] (std::function f) + { + auto vloc = array::make_view (floc); + viewLoop (vloc, + [func,f,&irank,&nproma] (T & x, int jblk, int jlev, int jlon) + { f (x, func (jlev, irank, jlon + jblk * nproma)); }); +std::cout << " walkLoc " << std::endl; + }; + + auto walkGlo = [&sglo, nflevg, &grid, &irank, &prc, &ind, func] (std::function f) + { + for (int jlev = 0; jlev < nflevg; jlev++) + { + auto fglo = sglo[jlev]; + int owner; + fglo.metadata ().get ("owner", owner); + if (irank == owner) + { + auto vglo = array::make_view (fglo); + for (int jglo = 0; jglo < grid.size (); jglo++) + f (vglo (jglo), func (jlev, prc[jglo], ind[jglo])); + } + } + }; + + auto set = [](T & v, T r) { v = r; }; + auto cmp = [](T & v, T r) { EXPECT (v == r); }; + auto clear = [](T & v, T r) { v = 0; }; + + if (gather) + { + if (check) + { + walkLoc (set); + walkGlo (clear); + } + gs.gather (dloc, dglo); + if (check) + walkGlo (cmp); + } + + if (scatter) + { + if (check) + { + walkGlo (set); + walkLoc (clear); + } + gs.scatter (dglo, dloc); + if (check) + walkGlo (cmp); + } +} + +CASE( "test_gatherscatter_NFLEVGxNGPTOT" ) +{ + if (! eckit::Resource ("--NFLEVGxNGPTOT", false)) + return; + + int nfield = eckit::Resource ("--fields", 3); + int nflevg = eckit::Resource ("--nflevg", 10); + atlas::StructuredGrid grid (eckit::Resource ("--grid", "N16")); + bool gather (eckit::Resource ("--gather", false)); + bool scatter (eckit::Resource ("--scatter", false)); + bool check (eckit::Resource ("--check", false)); + + + auto & comm = mpi::comm (); + int irank = comm.rank (); + int nproc = comm.size (); + + atlas::grid::Distribution dist (grid, atlas::grid::Partitioner ("equal_regions")); + atlas::functionspace::StructuredColumns fs (grid, dist, atlas::util::Config ("halo", 1) + | atlas::util::Config ("periodic_points", true)); + std::vector prc, ind; + + if (check) + getprcind (fs, dist, prc, ind); + + atlas::FieldSet sloc; + + using T = long; + + const int ind_bit = 24, ind_off = 0; + const int prc_bit = 12, prc_off = ind_off + ind_bit; + const int fld_bit = 14, fld_off = prc_off + prc_bit; + const int lev_bit = 14, lev_off = fld_off + fld_bit; + + if (check) + { + ATLAS_ASSERT (fs.sizeOwned () < (1 << ind_bit)); + ATLAS_ASSERT (nproc < (1 << prc_bit)); + ATLAS_ASSERT (nfield < (1 << fld_bit)); + ATLAS_ASSERT (nflevg < (1 << lev_bit)); + } + + using T = long; + + auto func = [check,ind_off,prc_off,fld_off,lev_off] (int fld, int lev, int prc, int ind) + { + long v = 0; + v = (static_cast (fld) << fld_off) + (static_cast (lev) << lev_off) + + (static_cast (prc) << prc_off) + (static_cast (ind) << ind_off); + return v; + }; + + + // Distributed field, Fortran dimensions (1:NFLEVG,1:NGPTOT,1:NFIELDS) + atlas::Field floc ("field", atlas::array::DataType::kind (), {nfield, fs.size (), nflevg}); + atlas::FieldSet sglo; + + for (int jfld = 0, count = 0; jfld < nfield; jfld++) + for (int jlev = 0; jlev < nflevg; jlev++, count++) + { + int owner = count % nproc; // RR distribution + std::string name = std::string ("#") + std::to_string (jfld) + std::string (",") + std::to_string (jlev); + atlas::Field fglo = atlas::Field (name, atlas::array::DataType::kind (), {irank == owner ? grid.size () : 0}); + fglo.metadata ().set ("owner", owner); + sglo.add (fglo); + } + + auto walkLoc = [&floc, nfield, nflevg, &fs, irank, func] (std::function f) + { + auto vloc = array::make_view (floc); + viewLoop (vloc, [func,&irank,f] (T & x, int jfld, int jloc, int jlev) { f (x, func (jfld, jlev, irank, jloc)); }); + }; + + auto walkGlo = [&sglo, nfield, nflevg, irank, func, &grid, &prc, &ind] (std::function f) + { + for (int jfld = 0, count = 0; jfld < nfield; jfld++) + for (int jlev = 0; jlev < nflevg; jlev++, count++) + { + atlas::Field fglo = sglo[count]; + int owner; EXPECT (fglo.metadata ().get ("owner", owner)); + if (owner == irank) + { + auto v = array::make_view (fglo); + for (int jglo = 0; jglo < grid.size (); jglo++) + f (v (jglo), func (jfld, jlev, prc[jglo], ind[jglo])); + } + } + }; + + auto set = [](T & v, T r) { v = r; }; + auto cmp = [](T & v, T r) { EXPECT (v == r); }; + auto clear = [](T & v, T r) { v = 0; }; + + // IO descriptors + std::vector dloc; + std::vector dglo; + + createIoFieldDescriptors (floc, dloc, fs.sizeOwned (), 1); // Grid dimension is 1 + createIoFieldDescriptors (sglo, dglo); // Default for grid dimension is inner dimension + + // Set target processor for all fields + for (auto & d : dglo) + d.field ().metadata ().get ("owner", d.owner ()); + + GatherScatter gs (dist); + + // Gather + + if (gather) + { + if (check) + { + walkLoc (set); + walkGlo (clear); + } + + gs.gather (dloc, dglo); + + if (check) + walkGlo (cmp); + } + + // Scatter + + if (scatter) + { + if (check) + { + walkGlo (set); + walkLoc (clear); + } + + gs.scatter (dglo, dloc); + + if (check) + walkLoc (cmp); + } + +} + +CASE( "test_gatherscatter_simple" ) +{ + if (! eckit::Resource ("--simple", false)) + return; + + int nfield = eckit::Resource ("--fields", 3); + atlas::StructuredGrid grid (eckit::Resource ("--grid", "N16")); + bool gather1 (eckit::Resource ("--gather1", false)); + bool gather2 (eckit::Resource ("--gather2", false)); + bool scatter1 (eckit::Resource ("--scatter1", false)); + bool scatter2 (eckit::Resource ("--scatter2", false)); + bool debug (eckit::Resource ("--debug", false)); + bool check (eckit::Resource ("--check", false)); + + auto & comm = mpi::comm (); + int irank = comm.rank (); + int nproc = comm.size (); + + atlas::grid::Distribution dist (grid, atlas::grid::Partitioner ("equal_regions")); + atlas::functionspace::StructuredColumns fs (grid, dist, atlas::util::Config ("halo", 1) + | atlas::util::Config ("periodic_points", true)); + + std::vector prc, ind; + + if (check) + getprcind (fs, dist, prc, ind); + + atlas::FieldSet sloc; + atlas::FieldSet sglo; + + using T = long; + + const int ind_bit = 21, ind_off = 0; + const int prc_bit = 21, prc_off = ind_off + ind_bit; + const int fld_bit = 21, fld_off = prc_off + prc_bit; + + if (check) + { + ATLAS_ASSERT (fs.sizeOwned () < (1 << ind_bit)); + ATLAS_ASSERT (nproc < (1 << prc_bit)); + ATLAS_ASSERT (nfield < (1 << fld_bit)); + } + + auto func = [check,ind_off,prc_off,fld_off] (int fld, int prc, int ind) + { + long v = 0; + v = (static_cast (fld) << fld_off) + + (static_cast (prc) << prc_off) + + (static_cast (ind) << ind_off); + return v; + }; + + auto walkGlo = [&sglo, &ind, &prc, &grid, func, irank] (std::function f) + { + for (int i = 0; i < sglo.size (); i++) + { + auto & fglo = sglo[i]; + int owner; + EXPECT (fglo.metadata ().get ("owner", owner)); + if (owner == irank) + { + auto v = array::make_view (fglo); + for (int j = 0; j < grid.size (); j++) + f (v[j], func (i, prc[j], ind[j])); + } + } + }; + + auto walkLoc = [&sloc, &fs, func, irank] (std::function f) + { + for (int i = 0; i < sloc.size (); i++) + { + auto & floc = sloc[i]; + auto v = array::make_view (floc); + for (int j = 0; j < fs.sizeOwned (); j++) + f (v[j], func (i, irank, j)); + } + }; + + + for (int i = 0; i < nfield; i++) + { + int owner = i % nproc; + + std::string name = std::string ("#") + std::to_string (i); + atlas::Field floc = fs.createField (atlas::util::Config ("name", name) + | atlas::util::Config ("owner", owner)); + + sloc.add (floc); + + Field fglo = Field (name, atlas::array::DataType::kind (), {irank == owner ? grid.size () : 0}); + fglo.metadata ().set ("owner", owner); + sglo.add (fglo); + } + + auto set = [](T & v, T r) { v = r; }; + auto cmp = [](T & v, T r) { EXPECT (v == r); }; + auto clear = [](T & v, T r) { v = 0; }; + + if (gather1) + { + if (check) + { + walkLoc (set); + walkGlo (clear); + } + + ATLAS_TRACE_SCOPE ("test_gather1") + { + fs.gather (sloc, sglo); + } + + if (check) + walkGlo (cmp); + + if (debug) prff ("sglo1", sglo); + } + + if (scatter1) + { + if (check) + { + walkGlo (set); + walkLoc (clear); + } + ATLAS_TRACE_SCOPE ("test_scatter1") + { + fs.scatter (sglo, sloc); + }; + if (check) + walkLoc (cmp); + } + + if (gather2) + { + GatherScatter gs (dist); + if (check) + { + walkLoc (set); + walkGlo (clear); + } + ATLAS_TRACE_SCOPE ("test_gather2") + { + std::vector dloc; + std::vector dglo; + + ATLAS_TRACE_SCOPE ("create io descriptors") + { + createIoFieldDescriptors (sloc, dloc, fs.sizeOwned ()); + createIoFieldDescriptors (sglo, dglo); + for (auto & d : dglo) + EXPECT (d.field ().metadata ().get ("owner", d.owner ())); + } + + gs.gather (dloc, dglo); + }; + + if (check) + walkGlo (cmp); + + if (debug) prff ("sglo2", sglo); + } + + if (scatter2) + { + GatherScatter gs (dist); + if (check) + { + walkGlo (set); + walkLoc (clear); + } + ATLAS_TRACE_SCOPE ("test_scatter2") + { + std::vector dloc; + std::vector dglo; + + ATLAS_TRACE_SCOPE ("create io descriptors") + { + createIoFieldDescriptors (sloc, dloc, fs.sizeOwned ()); + createIoFieldDescriptors (sglo, dglo); + for (auto & d : dglo) + EXPECT (d.field ().metadata ().get ("owner", d.owner ())); + } + + gs.scatter (dglo, dloc); + }; + + if (check) + walkLoc (cmp); + } + + + // Prevent error in Atlas barrier + comm.barrier (); + +} + + +} // namespace test +} // namespace atlas + +int main( int argc, char** argv ) { + return atlas::test::run( argc, argv ); +}