Skip to content

Commit

Permalink
Merge pull request #23 from yasahi-hpc/overlapping-with-executors
Browse files Browse the repository at this point in the history
Overlapping with executors
  • Loading branch information
yasahi-hpc authored Jul 1, 2023
2 parents 0c8020e + 8fb4c57 commit 52843f4
Show file tree
Hide file tree
Showing 23 changed files with 508 additions and 169 deletions.
17 changes: 13 additions & 4 deletions lib/executors/View.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
#include <numeric>
#include <cassert>
#include <algorithm>
#if ! defined(ENABLE_OPENMP) && (defined(_NVHPC_CUDA) || defined(__CUDACC__))
#include "../Cuda_Helper.hpp"
#endif

/* [TO DO] Check the behaviour of thrust::device_vector if it is configured for CPUs */
template <typename ElementType>
Expand Down Expand Up @@ -217,14 +220,20 @@ class View {
inline void setIsEmpty(bool is_empty) { is_empty_ = is_empty; }

void updateDevice() {
#if ! defined(ENABLE_OPENMP) && (defined(_NVHPC_CUDA) || defined(__CUDACC__) || defined(__HIPCC__))
device_vector_ = host_vector_;
//#if ! defined(ENABLE_OPENMP) && (defined(_NVHPC_CUDA) || defined(__CUDACC__) || defined(__HIPCC__))
// device_vector_ = host_vector_;
//#endif
#if ! defined(ENABLE_OPENMP) && (defined(_NVHPC_CUDA) || defined(__CUDACC__))
SafeCudaCall( cudaMemcpy(device_data_, host_data_, size_ * sizeof(value_type), cudaMemcpyHostToDevice) );
#endif
}

void updateSelf() {
#if ! defined(ENABLE_OPENMP) && (defined(_NVHPC_CUDA) || defined(__CUDACC__) || defined(__HIPCC__))
host_vector_ = device_vector_;
//#if ! defined(ENABLE_OPENMP) && (defined(_NVHPC_CUDA) || defined(__CUDACC__) || defined(__HIPCC__))
// host_vector_ = device_vector_;
//#endif
#if ! defined(ENABLE_OPENMP) && (defined(_NVHPC_CUDA) || defined(__CUDACC__))
SafeCudaCall( cudaMemcpy(host_data_, device_data_, size_ * sizeof(value_type), cudaMemcpyDeviceToHost) );
#endif
}

Expand Down
1 change: 1 addition & 0 deletions mini-apps/lbm2d-letkf/config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ struct Settings {
bool lyapnov_ = false;
bool is_les_ = true;
bool is_reference_ = true; // false for DA cases
bool is_async_ = false; // In order to enable overlapping, in senders/receivers version of letkf
double ly_epsilon_ = 1.e-8;

// data assimilation parameter
Expand Down
28 changes: 12 additions & 16 deletions mini-apps/lbm2d-letkf/executors/da_models.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "../config.hpp"
#include "../io_config.hpp"
#include "../mpi_config.hpp"
#include "../timer.hpp"
#include "data_vars.hpp"

class DA_Model {
Expand All @@ -19,15 +20,15 @@ class DA_Model {

public:
DA_Model(Config& conf, IOConfig& io_conf) : conf_(conf), io_conf_(io_conf) {
base_dir_name_ = io_conf_.base_dir_ + "/" + io_conf_.in_case_name_;
base_dir_name_ = io_conf_.base_dir_ + "/" + io_conf_.in_case_name_ + "/observed/ens0000";
}

DA_Model(Config& conf, IOConfig& io_conf, MPIConfig& mpi_conf) : conf_(conf), io_conf_(io_conf) {
base_dir_name_ = io_conf_.base_dir_ + "/" + io_conf_.in_case_name_;
base_dir_name_ = io_conf_.base_dir_ + "/" + io_conf_.in_case_name_ + "/observed/ens0000";
}
virtual ~DA_Model(){}
virtual void initialize()=0;
virtual void apply(std::unique_ptr<DataVars>& data_vars, const int it)=0;
virtual void apply(std::unique_ptr<DataVars>& data_vars, const int it, std::vector<Timer*>& timers)=0;
virtual void diag()=0;
virtual void finalize()=0;

Expand All @@ -36,8 +37,9 @@ class DA_Model {
int nb_expected_files = conf_.settings_.nbiter_ / conf_.settings_.io_interval_;
std::string variables[3] = {"rho", "u", "v"};
for(int it=0; it<nb_expected_files; it++) {
for(int i=0; i<3; i++) {
auto file_name = base_dir_name_ + "/" + variables[i] + "obs_step" + Impl::zfill(it, 10) + ".dat";
for(const auto& variable: variables) {
auto step = it * conf_.settings_.io_interval_;
auto file_name = base_dir_name_ + "/" + variable + "_obs_step" + Impl::zfill(step, 10) + ".dat";
if(!Impl::isFileExists(file_name)) {
std::runtime_error("Expected observation file does not exist." + file_name);
}
Expand All @@ -46,21 +48,15 @@ class DA_Model {
}

void load(std::unique_ptr<DataVars>& data_vars, const int it) {
auto step = it / conf_.settings_.io_interval_;
if(step % conf_.settings_.da_interval_ != 0) {
std::cout << __PRETTY_FUNCTION__ << ": t=" << it << ": skip" << std::endl;
return;
};
from_file(data_vars->rho_obs(), step);
from_file(data_vars->u_obs(), step);
from_file(data_vars->v_obs(), step);
from_file(data_vars->rho_obs(), it);
from_file(data_vars->u_obs(), it);
from_file(data_vars->v_obs(), it);
}

private:
template <class ViewType>
void from_file(ViewType& value, const int step) {
auto file_name = base_dir_name_ + "/" + value.name() + "_step"
+ Impl::zfill(step, 10) + ".dat";
auto file_name = base_dir_name_ + "/" + value.name() + "_step" + Impl::zfill(step, 10) + ".dat";
auto mdspan = value.host_mdspan();
Impl::from_binary(file_name, mdspan);
value.updateDevice();
Expand All @@ -76,7 +72,7 @@ class NonDA : public DA_Model {
NonDA(Config& conf, IOConfig& io_conf, MPIConfig& mpi_conf) : DA_Model(conf, io_conf, mpi_conf) {}
virtual ~NonDA(){}
void initialize() {}
void apply(std::unique_ptr<DataVars>& data_vars, const int it){};
void apply(std::unique_ptr<DataVars>& data_vars, const int it, std::vector<Timer*>& timers){};
void diag(){};
void finalize(){};
};
Expand Down
124 changes: 36 additions & 88 deletions mini-apps/lbm2d-letkf/executors/lbm2d.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

class LBM2D : public Model {
private:
using value_type = RealView2D::value_type;
bool is_master_ = true;
bool is_reference_ = true;

Expand All @@ -30,7 +31,7 @@ class LBM2D : public Model {
RealView2D noise_;

// Observation
Impl::Random<double> rand_;
Impl::Random<value_type> rand_;

// Force term
std::unique_ptr<Force> force_;
Expand Down Expand Up @@ -143,7 +144,6 @@ class LBM2D : public Model {
// Initialize force term
force_ = std::move( std::unique_ptr<Force>(new Force(conf_)) );


// Initialize IO
const std::string out_dir = io_conf_.base_dir_ + "/" + io_conf_.case_name_;

Expand All @@ -163,10 +163,6 @@ class LBM2D : public Model {
}

void reset(std::unique_ptr<DataVars>& data_vars, const std::string mode) {
// Always reset counts
it_ = 0;
diag_it_ = 0;

if(mode == "purturbulate") {
purturbulate(data_vars);
}
Expand All @@ -188,11 +184,9 @@ class LBM2D : public Model {
auto& f = data_vars->f();
auto& fn = data_vars->fn();
fn.swap(f);

it_++;
}

void diag(std::unique_ptr<DataVars>& data_vars){
void diag(std::unique_ptr<DataVars>& data_vars, const int it, std::vector<Timer*>& timers){
/*
* 0. Nature run or perturbed run (as reference)
* Save rho, u, v and vor into /nature (as is) and /observed (with noise)
Expand All @@ -201,25 +195,27 @@ class LBM2D : public Model {
* Save rho, u, v and vor into /calc (as is)
*
* */
if(it_ % conf_.settings_.io_interval_ != 0) return;
if(is_master_) inspect(data_vars);
if(it % conf_.settings_.io_interval_ != 0) return;

timers[TimerEnum::Diag]->begin();
if(is_master_) inspect(data_vars, it);

// Save values calculated by this ensemble member
// Save simulation results without noises
std::string sim_result_name = "calc";
auto rho = data_vars->rho();
auto u = data_vars->u();
auto v = data_vars->v();
save_to_files(sim_result_name, rho, u, v, it_);
save_to_files("calc", rho, u, v, it);

// Save noisy results
if(is_reference_) {
observe(data_vars); // adding noise to u_obs_, v_obs_, rho_obs_
auto rho_obs = data_vars->rho_obs();
auto u_obs = data_vars->u_obs();
auto v_obs = data_vars->v_obs();
save_to_files("observed", rho_obs, u_obs, v_obs, it_);
save_to_files("observed", rho_obs, u_obs, v_obs, it);
}
timers[TimerEnum::Diag]->end();
}

void finalize() {}
Expand Down Expand Up @@ -329,15 +325,11 @@ class LBM2D : public Model {
}

private:
void inspect(std::unique_ptr<DataVars>& data_vars) {
void inspect(std::unique_ptr<DataVars>& data_vars, const int it) {
auto [nx, ny] = conf_.settings_.n_;
auto dx = conf_.settings_.dx_;
auto u_ref = conf_.phys_.u_ref_;

data_vars->rho().updateSelf();
data_vars->u().updateSelf();
data_vars->v().updateSelf();
nu_.updateSelf();
auto rho = data_vars->rho().mdspan();
auto u = data_vars->u().mdspan();
auto v = data_vars->v().mdspan();
Expand All @@ -347,7 +339,7 @@ class LBM2D : public Model {
moment_type moments = {0, 0, 0, 0, 0, 0, 0, 0, 0};

auto moment_kernel =
[=](const int ix, const int iy) {
[=] MDSPAN_FORCE_INLINE_FUNCTION (const int ix, const int iy) {
auto tmp_rho = rho(ix, iy);
auto tmp_u = u(ix, iy);
auto tmp_v = v(ix, iy);
Expand Down Expand Up @@ -378,7 +370,7 @@ class LBM2D : public Model {
};

auto sum_operator =
[=] (const moment_type& left, const moment_type& right) {
[=] MDSPAN_FORCE_INLINE_FUNCTION (const moment_type& left, const moment_type& right) {
return moment_type {std::get<0>(left) + std::get<0>(right),
std::get<1>(left) + std::get<1>(right),
std::get<2>(left) + std::get<2>(right),
Expand All @@ -394,12 +386,11 @@ class LBM2D : public Model {
Iterate_policy<2> policy2d({0, 0}, {nx, ny});
Impl::transform_reduce(policy2d, sum_operator, moment_kernel, moments);

/* [FIX THIS] transform reduce to get multiple max elements does not work correctly???
using maximum_type = std::tuple<double, double, double>;
maximum_type maximums = {0, 0, 0};
using minmax_type = std::tuple<double, double, double, double>;
minmax_type minmaxs = {0, 0, 0, 10000};
// Compute maximum
auto maximum_kernel =
[=](const int ix, const int iy) {
auto minmax_kernel =
[=] MDSPAN_FORCE_INLINE_FUNCTION (const int ix, const int iy) {
auto tmp_rho = rho(ix, iy);
auto tmp_u = u(ix, iy);
auto tmp_v = v(ix, iy);
Expand All @@ -418,67 +409,24 @@ class LBM2D : public Model {
auto maxdivu = std::abs(ux + vy);
auto maxvel2 = tmp_u * tmp_u + tmp_v * tmp_v;

return maximum_type {maxdivu, maxvel2, tmp_rho};
return minmax_type {maxdivu, maxvel2, tmp_rho, tmp_rho};
};

auto max_operator =
[=] (const maximum_type& left, const maximum_type& right) {
return maximum_type {std::max( std::get<0>(left), std::get<0>(right) ),
std::max( std::get<1>(left), std::get<1>(right) ),
std::max( std::get<2>(left), std::get<2>(right) )
};
auto minmax_operator =
[=] MDSPAN_FORCE_INLINE_FUNCTION (const minmax_type& left, const minmax_type& right) {
return minmax_type {thrust::max( std::get<0>(left), std::get<0>(right) ),
thrust::max( std::get<1>(left), std::get<1>(right) ),
thrust::max( std::get<2>(left), std::get<2>(right) ),
thrust::min( std::get<3>(left), std::get<3>(right) )
};
};
Impl::transform_reduce(policy2d, max_operator, maximum_kernel, maximums);
// Compute minimum
double rho_min = 9999; // some large number
auto minimum_kernel =
[=](const int ix, const int iy) { return rho(ix, iy); };
auto min_operator =
[=] (const auto& left, const auto& right) { return std::min(left, right); };
Impl::transform_reduce(policy2d, min_operator, minimum_kernel, rho_min);
auto maxvel2 = std::get<0>(maximums);
auto maxdivu = std::get<1>(maximums);
auto rho_max = std::get<2>(maximums);
*/

// To be removed
double maxdivu = 0;
double maxvel2 = 0;
double rho_max = 0;
double rho_min = 9999;
Impl::transform_reduce(policy2d, minmax_operator, minmax_kernel, minmaxs);

auto _rho = data_vars->rho();
auto _u = data_vars->u();
auto _v = data_vars->v();
auto maxvel2 = std::get<0>(minmaxs);
auto maxdivu = std::get<1>(minmaxs);
auto rho_max = std::get<2>(minmaxs);
auto rho_min = std::get<3>(minmaxs);

_rho.updateSelf();
_u.updateSelf();
_v.updateSelf();
for(int iy=0; iy<ny; iy++) {
for(int ix=0; ix<nx; ix++) {
auto tmp_rho = _rho(ix, iy);
auto tmp_u = _u(ix, iy);
auto tmp_v = _v(ix, iy);

// derivatives
const int ixp1 = periodic(ix+1, nx);
const int ixm1 = periodic(ix-1, nx);
const int iyp1 = periodic(iy+1, ny);
const int iym1 = periodic(iy-1, ny);

const auto ux = (_u(ixp1, iy) - _u(ixm1, iy)) / (2*dx);
const auto uy = (_u(ix, iyp1) - _u(ix, iym1)) / (2*dx);
const auto vx = (_v(ixp1, iy) - _v(ixm1, iy)) / (2*dx);
const auto vy = (_v(ix, iyp1) - _v(ix, iym1)) / (2*dx);

maxdivu = std::max(maxdivu, std::abs(ux + vy));
maxvel2 = std::max(maxvel2, tmp_u * tmp_u + tmp_v * tmp_v);
rho_max = std::max(rho_max, tmp_rho);
rho_min = std::min(rho_min, tmp_rho);
}
}
auto momentum_x_total = std::get<0>(moments) / (nx * ny);
auto momentum_y_total = std::get<1>(moments) / (nx * ny);
auto energy = std::get<2>(moments) / (nx * ny);
Expand All @@ -490,6 +438,7 @@ class LBM2D : public Model {
auto vel2 = std::get<8>(moments) / (nx * ny);

std::cout << std::scientific << std::setprecision(16) << std::flush;
std::cout << " it/nbiter: " << it << "/" << conf_.settings_.nbiter_ << std::endl;
std::cout << " RMS, max speed: " << std::sqrt(vel2) << ", " << std::sqrt(maxvel2) << " [m/s]" << std::endl;
//std::cout << " mean energy: " << energy << " [m2/s2]" << std::endl;
//std::cout << " mean enstrophy: " << enstrophy << " [/s2]" << std::endl;
Expand All @@ -515,18 +464,18 @@ class LBM2D : public Model {
}

template <class ViewType>
void add_noise(const ViewType& value, ViewType& noisy_value, const double error=0.0) {
void add_noise(const ViewType& value, ViewType& noisy_value, const value_type error=0.0) {
auto [nx, ny] = conf_.settings_.n_;
const auto value_tmp = value.mdspan();
auto noisy_value_tmp = noisy_value.mdspan();
const auto noise_tmp = noise_.mdspan();
auto noise_tmp = noise_.mdspan();

const double mean = 0.0, stddev = 1.0;
const value_type mean = 0.0, stddev = 1.0;
rand_.normal(noise_.data(), nx*ny, mean, stddev);

Iterate_policy<2> policy2d({0, 0}, {nx, ny});
Impl::for_each(policy2d,
[=](const int ix, const int iy) {
[=] MDSPAN_FORCE_INLINE_FUNCTION (const int ix, const int iy) {
noisy_value_tmp(ix, iy) = value_tmp(ix, iy) + error * noise_tmp(ix, iy);
});
}
Expand Down Expand Up @@ -558,8 +507,7 @@ class LBM2D : public Model {
void to_file(std::string case_name, ViewType& value, const int it) {
auto dir_name = directory_names_.at(case_name);
value.updateSelf();
std::string file_name = dir_name + "/" + value.name() + "_step"
+ Impl::zfill(it / conf_.settings_.io_interval_, 10) + ".dat";
std::string file_name = dir_name + "/" + value.name() + "_step" + Impl::zfill(it, 10) + ".dat";
Impl::to_binary(file_name, value.host_mdspan());
}
};
Expand Down
Loading

0 comments on commit 52843f4

Please sign in to comment.