Skip to content

Commit

Permalink
Merge pull request #33 from yasahi-hpc/hotfix-thrust-version
Browse files Browse the repository at this point in the history
Hotfix thrust version
  • Loading branch information
yasahi-hpc authored Aug 8, 2023
2 parents 3bf18fb + 15d6fae commit c62bc94
Show file tree
Hide file tree
Showing 28 changed files with 204 additions and 100 deletions.
44 changes: 44 additions & 0 deletions lib/utils/device_utils.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#ifndef __DEVICE_UTILS_HPP__
#define __DEVICE_UTILS_HPP__

#include <cstdio>

namespace Impl {
#if defined(_NVHPC_CUDA) || defined(__CUDACC__)
inline void synchronize() {
cudaDeviceSynchronize();
}

inline void setDevice(int rank) {
int count;
int id;

cudaGetDeviceCount(&count);
cudaSetDevice(rank % count);
cudaGetDevice(&id);
printf("Process%d running on GPU%d\n", rank, id);
}
#elif defined(__HIPCC__)
#include <hip/hip_runtime.h>
inline void synchronize() {
[[maybe_unused]] hipError_t err = hipDeviceSynchronize();
}

inline void setDevice(int rank) {
int count;
int id;
hipError_t err;

err = hipGetDeviceCount(&count);
err = hipSetDevice(rank % count);
err = hipGetDevice(&id);
printf("Process%d running on GPU%d\n", rank, id);
}

#else
inline void synchronize() {}
inline void setDevice(int rank) {}
#endif
};

#endif
2 changes: 2 additions & 0 deletions mini-apps/heat3d-mpi/thrust/heat3D.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ void solve(const Config& conf,
heat3d_functor(conf, x_mask, y_mask, z_mask, u, un));
timers[Heat]->end();

std::swap(u, un);

timers[MainLoop]->end();
}
}
Expand Down
1 change: 1 addition & 0 deletions mini-apps/lbm2d-letkf/config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ struct Settings {
bool is_async_ = false; // In order to enable overlapping, in senders/receivers version of letkf
bool is_bcast_on_host_ = false; // broadcast on device or host
bool use_time_stamps_ = false; // for detailed analysis
bool disable_output_ = false; // for performance measurements
double ly_epsilon_ = 1.e-8;

// data assimilation parameter
Expand Down
8 changes: 7 additions & 1 deletion mini-apps/lbm2d-letkf/executors/solver.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,9 @@ class Solver {
timers_[TimerEnum::MainLoop]->begin();

da_model_->apply(data_vars_, it, timers_);
model_->diag(data_vars_, it, timers_);
if(!conf_.settings_.disable_output_) {
model_->diag(data_vars_, it, timers_);
}

timers_[TimerEnum::LBMSolver]->begin();
model_->solve(data_vars_);
Expand Down Expand Up @@ -169,6 +171,10 @@ class Solver {
conf_.settings_.use_time_stamps_ = json_data["Settings"]["use_time_stamps"].get<bool>();
}

if(json_data["Settings"].contains("disable_output") ) {
conf_.settings_.disable_output_ = json_data["Settings"]["disable_output"].get<bool>();
}

// IO settings
io_conf_.base_dir_ = json_data["Settings"]["base_dir"].get<std::string>();
io_conf_.case_name_ = json_data["Settings"]["case_name"].get<std::string>();
Expand Down
8 changes: 7 additions & 1 deletion mini-apps/lbm2d-letkf/stdpar/solver.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,9 @@ class Solver {
timers_[TimerEnum::MainLoop]->begin();

da_model_->apply(data_vars_, it, timers_);
model_->diag(data_vars_, it, timers_);
if(!conf_.settings_.disable_output_) {
model_->diag(data_vars_, it, timers_);
}

timers_[TimerEnum::LBMSolver]->begin();
model_->solve(data_vars_);
Expand Down Expand Up @@ -161,6 +163,10 @@ class Solver {
conf_.settings_.use_time_stamps_ = json_data["Settings"]["use_time_stamps"].get<bool>();
}

if(json_data["Settings"].contains("disable_output") ) {
conf_.settings_.disable_output_ = json_data["Settings"]["disable_output"].get<bool>();
}

// IO settings
io_conf_.base_dir_ = json_data["Settings"]["base_dir"].get<std::string>();
io_conf_.case_name_ = json_data["Settings"]["case_name"].get<std::string>();
Expand Down
17 changes: 14 additions & 3 deletions mini-apps/lbm2d-letkf/thrust/da_models.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,9 @@ class DA_Model {
int nb_expected_files = conf_.settings_.nbiter_ / conf_.settings_.io_interval_;
std::string variables[3] = {"rho", "u", "v"};
for(int it=0; it<nb_expected_files; it++) {
for(int i=0; i<3; i++) {
for(const auto& variable: variables) {
auto step = it * conf_.settings_.io_interval_;
auto file_name = base_dir_name_ + "/" + variables[i] + "_obs_step" + Impl::zfill(step, 10) + ".dat";
auto file_name = base_dir_name_ + "/" + variable + "_obs_step" + Impl::zfill(step, 10) + ".dat";
if(!Impl::isFileExists(file_name)) {
std::runtime_error("Expected observation file does not exist." + file_name);
}
Expand All @@ -54,12 +54,23 @@ class DA_Model {
from_file(data_vars->v_obs(), it);
}

void load(std::unique_ptr<DataVars>& data_vars, const std::string variable, const int it) {
if(variable == "rho") {
from_file(data_vars->rho_obs(), it);
} else if(variable == "u") {
from_file(data_vars->u_obs(), it);
} else if(variable == "v") {
from_file(data_vars->v_obs(), it);
}
}

private:
template <class ViewType>
void from_file(ViewType& value, const int step) {
auto file_name = base_dir_name_ + "/" + value.name() + "_step" + Impl::zfill(step, 10) + ".dat";
auto mdspan = value.mdspan();
auto mdspan = value.host_mdspan();
Impl::from_binary(file_name, mdspan);
value.updateDevice();
}

};
Expand Down
21 changes: 15 additions & 6 deletions mini-apps/lbm2d-letkf/thrust/force.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,11 @@ struct Force {
const auto x = x_.mdspan();
const auto y = y_.mdspan();
const auto rand_pool = rand_pool_.mdspan();
//const auto sub_rand_pool = stdex::submdspan(rand_pool, std::full_extent_t, std::full_extent_t, shift);
const auto sub_rand_pool = stdex::submdspan(rand_pool, std::full_extent, std::full_extent, shift);
auto fx = fx_.mdspan();
auto fy = fy_.mdspan();

auto force_lambda = [=](const int ix, const int iy) {
auto force_lambda = [=] MDSPAN_FORCE_INLINE_FUNCTION (const int ix, const int iy) {
const auto x_tmp = x(ix);
const auto y_tmp = y(iy);
value_type fx_tmp = 0.0, fy_tmp = 0.0;
Expand All @@ -68,10 +68,10 @@ struct Force {
const auto sine = sin(theta);
const auto cosi = cos(theta);
const value_type r[4] = {
rand_pool(n, 0, shift),
rand_pool(n, 1, shift),
rand_pool(n, 2, shift),
rand_pool(n, 3, shift),
sub_rand_pool(n, 0),
sub_rand_pool(n, 1),
sub_rand_pool(n, 2),
sub_rand_pool(n, 3)
};

const auto amp_tmp = amp(n);
Expand Down Expand Up @@ -183,6 +183,15 @@ struct Force {
amp_(i) = force_amp.at(i);
}

// deep copy to devices
kx_.updateDevice();
ky_.updateDevice();
amp_.updateDevice();
x_.updateDevice();
y_.updateDevice();
rand_pool_.updateDevice();
fx_.updateDevice();
fy_.updateDevice();
}
};

Expand Down
100 changes: 30 additions & 70 deletions mini-apps/lbm2d-letkf/thrust/lbm2d.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ class LBM2D : public Model {
const auto _theta = theta.mdspan();
value_type rho_ref = static_cast<value_type>(conf_.phys_.rho_ref_);

auto init_fluid_moments = [=](const int ix, const int iy) {
auto init_fluid_moments = [=] MDSPAN_FORCE_INLINE_FUNCTION (const int ix, const int iy) {
// fluid
value_type u_tmp = 0.0;
value_type v_tmp = 0.0;
Expand Down Expand Up @@ -126,11 +126,11 @@ class LBM2D : public Model {
};

auto max_operator =
[=](const auto& lhs, const auto& rhs) { return std::max(lhs, rhs); };
[=] MDSPAN_FORCE_INLINE_FUNCTION (const auto& lhs, const auto& rhs) { return std::max(lhs, rhs); };

Impl::transform_reduce(policy2d, max_operator, max_speed, vmax);
Impl::for_each(policy2d,
[=](const int ix, const int iy) {
[=] MDSPAN_FORCE_INLINE_FUNCTION (const int ix, const int iy) {
u(ix, iy) *= u_ref / vmax * p_amp;
v(ix, iy) *= u_ref / vmax * p_amp;
});
Expand Down Expand Up @@ -202,11 +202,10 @@ class LBM2D : public Model {

// Save values calculated by this ensemble member
// Save simulation results without noises
std::string sim_result_name = "calc";
auto rho = data_vars->rho();
auto u = data_vars->u();
auto v = data_vars->v();
save_to_files(sim_result_name, rho, u, v, it);
save_to_files("calc", rho, u, v, it);

// Save noisy results
if(is_reference_) {
Expand Down Expand Up @@ -344,7 +343,7 @@ class LBM2D : public Model {
moment_type moments = {0, 0, 0, 0, 0, 0, 0, 0, 0};

auto moment_kernel =
[=](const int ix, const int iy) {
[=] MDSPAN_FORCE_INLINE_FUNCTION (const int ix, const int iy) {
auto tmp_rho = rho(ix, iy);
auto tmp_u = u(ix, iy);
auto tmp_v = v(ix, iy);
Expand Down Expand Up @@ -372,10 +371,10 @@ class LBM2D : public Model {
const double vel2 = tmp_u * tmp_u + tmp_v * tmp_v;

return moment_type {momentum_x, momentum_y, energy, enstrophy, nus, mass, divu2, divu, vel2};
};
};

auto sum_operator =
[=] (const moment_type& left, const moment_type& right) {
[=] MDSPAN_FORCE_INLINE_FUNCTION (const moment_type& left, const moment_type& right) {
return moment_type {std::get<0>(left) + std::get<0>(right),
std::get<1>(left) + std::get<1>(right),
std::get<2>(left) + std::get<2>(right),
Expand All @@ -386,17 +385,16 @@ class LBM2D : public Model {
std::get<7>(left) + std::get<7>(right),
std::get<8>(left) + std::get<8>(right)
};
};
};

Iterate_policy<2> policy2d({0, 0}, {nx, ny});
Impl::transform_reduce(policy2d, sum_operator, moment_kernel, moments);

/* [FIX THIS] transform reduce to get multiple max elements does not work correctly???
using maximum_type = std::tuple<double, double, double>;
maximum_type maximums = {0, 0, 0};
using minmax_type = std::tuple<double, double, double, double>;
minmax_type minmaxs = {0, 0, 0, 10000};
// Compute maximum
auto maximum_kernel =
[=](const int ix, const int iy) {
auto minmax_kernel =
[=] MDSPAN_FORCE_INLINE_FUNCTION (const int ix, const int iy) {
auto tmp_rho = rho(ix, iy);
auto tmp_u = u(ix, iy);
auto tmp_v = v(ix, iy);
Expand All @@ -415,60 +413,24 @@ class LBM2D : public Model {
auto maxdivu = std::abs(ux + vy);
auto maxvel2 = tmp_u * tmp_u + tmp_v * tmp_v;

return maximum_type {maxdivu, maxvel2, tmp_rho};
};
return minmax_type {maxdivu, maxvel2, tmp_rho, tmp_rho};
};

auto max_operator =
[=] (const maximum_type& left, const maximum_type& right) {
return maximum_type {std::max( std::get<0>(left), std::get<0>(right) ),
std::max( std::get<1>(left), std::get<1>(right) ),
std::max( std::get<2>(left), std::get<2>(right) )
};
};
Impl::transform_reduce(policy2d, max_operator, maximum_kernel, maximums);
// Compute minimum
double rho_min = 9999; // some large number
auto minimum_kernel =
[=](const int ix, const int iy) { return rho(ix, iy); };
auto min_operator =
[=] (const auto& left, const auto& right) { return std::min(left, right); };
Impl::transform_reduce(policy2d, min_operator, minimum_kernel, rho_min);
auto maxvel2 = std::get<0>(maximums);
auto maxdivu = std::get<1>(maximums);
auto rho_max = std::get<2>(maximums);
*/

// To be removed
value_type maxdivu = 0;
value_type maxvel2 = 0;
value_type rho_max = 0;
value_type rho_min = 9999;
auto minmax_operator =
[=] MDSPAN_FORCE_INLINE_FUNCTION (const minmax_type& left, const minmax_type& right) {
return minmax_type {thrust::max( std::get<0>(left), std::get<0>(right) ),
thrust::max( std::get<1>(left), std::get<1>(right) ),
thrust::max( std::get<2>(left), std::get<2>(right) ),
thrust::min( std::get<3>(left), std::get<3>(right) )
};
};
Impl::transform_reduce(policy2d, minmax_operator, minmax_kernel, minmaxs);

for(int iy=0; iy<ny; iy++) {
for(int ix=0; ix<nx; ix++) {
auto tmp_rho = rho(ix, iy);
auto tmp_u = u(ix, iy);
auto tmp_v = v(ix, iy);
auto maxvel2 = std::get<0>(minmaxs);
auto maxdivu = std::get<1>(minmaxs);
auto rho_max = std::get<2>(minmaxs);
auto rho_min = std::get<3>(minmaxs);

// derivatives
const int ixp1 = periodic(ix+1, nx);
const int ixm1 = periodic(ix-1, nx);
const int iyp1 = periodic(iy+1, ny);
const int iym1 = periodic(iy-1, ny);

const value_type ux = (u(ixp1, iy) - u(ixm1, iy)) / (2*dx);
const value_type uy = (u(ix, iyp1) - u(ix, iym1)) / (2*dx);
const value_type vx = (v(ixp1, iy) - v(ixm1, iy)) / (2*dx);
const value_type vy = (v(ix, iyp1) - v(ix, iym1)) / (2*dx);

maxdivu = std::max(maxdivu, std::abs(ux + vy));
maxvel2 = std::max(maxvel2, tmp_u * tmp_u + tmp_v * tmp_v);
rho_max = std::max(rho_max, tmp_rho);
rho_min = std::min(rho_min, tmp_rho);
}
}
auto momentum_x_total = std::get<0>(moments) / (nx * ny);
auto momentum_y_total = std::get<1>(moments) / (nx * ny);
auto energy = std::get<2>(moments) / (nx * ny);
Expand Down Expand Up @@ -517,7 +479,7 @@ class LBM2D : public Model {

Iterate_policy<2> policy2d({0, 0}, {nx, ny});
Impl::for_each(policy2d,
[=](const int ix, const int iy) {
[=] MDSPAN_FORCE_INLINE_FUNCTION (const int ix, const int iy) {
noisy_value_tmp(ix, iy) = value_tmp(ix, iy) + error * noise_tmp(ix, iy);
});
}
Expand Down Expand Up @@ -549,10 +511,8 @@ class LBM2D : public Model {
void to_file(std::string case_name, ViewType& value, const int it) {
auto dir_name = directory_names_.at(case_name);
value.updateSelf();

std::string file_name = dir_name + "/" + value.name() + "_step"
+ Impl::zfill(it, 10) + ".dat";
Impl::to_binary(file_name, value.mdspan());
std::string file_name = dir_name + "/" + value.name() + "_step" + Impl::zfill(it, 10) + ".dat";
Impl::to_binary(file_name, value.host_mdspan());
}
};

Expand Down
Loading

0 comments on commit c62bc94

Please sign in to comment.