Merge pull request #33 from yasahi-hpc/hotfix-thrust-version

Hotfix thrust version
yasahi-hpc · Aug 8, 2023 · c62bc94 · c62bc94
2 parents 3bf18fb + 15d6fae
commit c62bc94
Show file tree

Hide file tree

Showing 28 changed files with 204 additions and 100 deletions.
diff --git a/lib/utils/device_utils.hpp b/lib/utils/device_utils.hpp
@@ -0,0 +1,44 @@
+#ifndef __DEVICE_UTILS_HPP__
+#define __DEVICE_UTILS_HPP__
+
+#include <cstdio>
+
+namespace Impl {
+  #if defined(_NVHPC_CUDA) || defined(__CUDACC__)
+    inline void synchronize() {
+      cudaDeviceSynchronize();
+    }
+
+    inline void setDevice(int rank) {
+      int count;
+      int id;
+
+      cudaGetDeviceCount(&count);
+      cudaSetDevice(rank % count);
+      cudaGetDevice(&id);
+      printf("Process%d running on GPU%d\n", rank, id);
+    }
+  #elif defined(__HIPCC__)
+    #include <hip/hip_runtime.h>
+    inline void synchronize() {
+      [[maybe_unused]] hipError_t err = hipDeviceSynchronize();
+    }
+
+    inline void setDevice(int rank) {
+      int count;
+      int id;
+      hipError_t err;
+
+      err = hipGetDeviceCount(&count);
+      err = hipSetDevice(rank % count);
+      err = hipGetDevice(&id);
+      printf("Process%d running on GPU%d\n", rank, id);
+    }
+
+  #else
+    inline void synchronize() {}
+    inline void setDevice(int rank) {}
+  #endif
+};
+
+#endif
diff --git a/mini-apps/heat3d-mpi/thrust/heat3D.hpp b/mini-apps/heat3d-mpi/thrust/heat3D.hpp
@@ -74,6 +74,8 @@ void solve(const Config& conf,
                      heat3d_functor(conf, x_mask, y_mask, z_mask, u, un));
     timers[Heat]->end();
 
+    std::swap(u, un);
+
     timers[MainLoop]->end();
   }
 }

diff --git a/mini-apps/lbm2d-letkf/config.hpp b/mini-apps/lbm2d-letkf/config.hpp
@@ -71,6 +71,7 @@ struct Settings {
   bool is_async_ = false; // In order to enable overlapping, in senders/receivers version of letkf
   bool is_bcast_on_host_ = false; // broadcast on device or host
   bool use_time_stamps_ = false; // for detailed analysis
+  bool disable_output_ = false; // for performance measurements
   double ly_epsilon_ = 1.e-8;
 
   // data assimilation parameter

diff --git a/mini-apps/lbm2d-letkf/executors/solver.hpp b/mini-apps/lbm2d-letkf/executors/solver.hpp
@@ -73,7 +73,9 @@ class Solver {
       timers_[TimerEnum::MainLoop]->begin();
 
       da_model_->apply(data_vars_, it, timers_);
-      model_->diag(data_vars_, it, timers_);
+      if(!conf_.settings_.disable_output_) {
+        model_->diag(data_vars_, it, timers_);
+      }
 
       timers_[TimerEnum::LBMSolver]->begin();
       model_->solve(data_vars_);
@@ -169,6 +171,10 @@ class Solver {
       conf_.settings_.use_time_stamps_ = json_data["Settings"]["use_time_stamps"].get<bool>();
     }
 
+    if(json_data["Settings"].contains("disable_output") ) {
+      conf_.settings_.disable_output_ = json_data["Settings"]["disable_output"].get<bool>();
+    }
+
     // IO settings
     io_conf_.base_dir_     = json_data["Settings"]["base_dir"].get<std::string>();
     io_conf_.case_name_    = json_data["Settings"]["case_name"].get<std::string>();

diff --git a/mini-apps/lbm2d-letkf/stdpar/solver.hpp b/mini-apps/lbm2d-letkf/stdpar/solver.hpp
@@ -73,7 +73,9 @@ class Solver {
       timers_[TimerEnum::MainLoop]->begin();
 
       da_model_->apply(data_vars_, it, timers_);
-      model_->diag(data_vars_, it, timers_);
+      if(!conf_.settings_.disable_output_) {
+        model_->diag(data_vars_, it, timers_);
+      }
 
       timers_[TimerEnum::LBMSolver]->begin();
       model_->solve(data_vars_);
@@ -161,6 +163,10 @@ class Solver {
       conf_.settings_.use_time_stamps_ = json_data["Settings"]["use_time_stamps"].get<bool>();
     }
 
+    if(json_data["Settings"].contains("disable_output") ) {
+      conf_.settings_.disable_output_ = json_data["Settings"]["disable_output"].get<bool>();
+    }
+
     // IO settings
     io_conf_.base_dir_     = json_data["Settings"]["base_dir"].get<std::string>();
     io_conf_.case_name_    = json_data["Settings"]["case_name"].get<std::string>();

diff --git a/mini-apps/lbm2d-letkf/thrust/da_models.hpp b/mini-apps/lbm2d-letkf/thrust/da_models.hpp
@@ -38,9 +38,9 @@ class DA_Model {
     int nb_expected_files = conf_.settings_.nbiter_ / conf_.settings_.io_interval_;
     std::string variables[3] = {"rho", "u", "v"};
     for(int it=0; it<nb_expected_files; it++) {
-      for(int i=0; i<3; i++) {
+      for(const auto& variable: variables) {
         auto step = it * conf_.settings_.io_interval_;
-        auto file_name = base_dir_name_ + "/" + variables[i] + "_obs_step" + Impl::zfill(step, 10) + ".dat";
+        auto file_name = base_dir_name_ + "/" + variable + "_obs_step" + Impl::zfill(step, 10) + ".dat";
         if(!Impl::isFileExists(file_name)) {
           std::runtime_error("Expected observation file does not exist." + file_name);
         }
@@ -54,12 +54,23 @@ class DA_Model {
     from_file(data_vars->v_obs(), it);
   }
 
+  void load(std::unique_ptr<DataVars>& data_vars, const std::string variable, const int it) {
+    if(variable == "rho") {
+      from_file(data_vars->rho_obs(), it);
+    } else if(variable == "u") {
+      from_file(data_vars->u_obs(), it);
+    } else if(variable == "v") {
+      from_file(data_vars->v_obs(), it);
+    }
+  }
+
 private:
   template <class ViewType>
   void from_file(ViewType& value, const int step) {
     auto file_name = base_dir_name_ + "/" + value.name() + "_step" + Impl::zfill(step, 10) + ".dat";
-    auto mdspan = value.mdspan();
+    auto mdspan = value.host_mdspan();
     Impl::from_binary(file_name, mdspan);
+    value.updateDevice();
   }
 
 };

diff --git a/mini-apps/lbm2d-letkf/thrust/force.hpp b/mini-apps/lbm2d-letkf/thrust/force.hpp
@@ -53,11 +53,11 @@ struct Force {
     const auto x = x_.mdspan();
     const auto y = y_.mdspan();
     const auto rand_pool = rand_pool_.mdspan();
-    //const auto sub_rand_pool = stdex::submdspan(rand_pool, std::full_extent_t, std::full_extent_t, shift);
+    const auto sub_rand_pool = stdex::submdspan(rand_pool, std::full_extent, std::full_extent, shift);
     auto fx = fx_.mdspan();
     auto fy = fy_.mdspan();
 
-    auto force_lambda = [=](const int ix, const int iy) {
+    auto force_lambda = [=] MDSPAN_FORCE_INLINE_FUNCTION (const int ix, const int iy) {
       const auto x_tmp = x(ix);
       const auto y_tmp = y(iy);
       value_type fx_tmp = 0.0, fy_tmp = 0.0;
@@ -68,10 +68,10 @@ struct Force {
         const auto sine = sin(theta);
         const auto cosi = cos(theta);
         const value_type r[4] = {
-          rand_pool(n, 0, shift),
-          rand_pool(n, 1, shift),
-          rand_pool(n, 2, shift),
-          rand_pool(n, 3, shift),
+          sub_rand_pool(n, 0),
+          sub_rand_pool(n, 1),
+          sub_rand_pool(n, 2),
+          sub_rand_pool(n, 3)
         };
 
         const auto amp_tmp = amp(n);
@@ -183,6 +183,15 @@ struct Force {
       amp_(i) = force_amp.at(i);
     }
 
+    // deep copy to devices
+    kx_.updateDevice();
+    ky_.updateDevice();
+    amp_.updateDevice();
+    x_.updateDevice();
+    y_.updateDevice();
+    rand_pool_.updateDevice();
+    fx_.updateDevice();
+    fy_.updateDevice();
   }
 };
 

diff --git a/mini-apps/lbm2d-letkf/thrust/lbm2d.hpp b/mini-apps/lbm2d-letkf/thrust/lbm2d.hpp
@@ -95,7 +95,7 @@ class LBM2D : public Model {
     const auto _theta = theta.mdspan();
     value_type rho_ref = static_cast<value_type>(conf_.phys_.rho_ref_);
 
-    auto init_fluid_moments = [=](const int ix, const int iy) {
+    auto init_fluid_moments = [=] MDSPAN_FORCE_INLINE_FUNCTION (const int ix, const int iy) {
       // fluid
       value_type u_tmp = 0.0;
       value_type v_tmp = 0.0;
@@ -126,11 +126,11 @@ class LBM2D : public Model {
     };
 
     auto max_operator = 
-      [=](const auto& lhs, const auto& rhs) { return std::max(lhs, rhs); };
+      [=] MDSPAN_FORCE_INLINE_FUNCTION (const auto& lhs, const auto& rhs) { return std::max(lhs, rhs); };
 
     Impl::transform_reduce(policy2d, max_operator, max_speed, vmax);
     Impl::for_each(policy2d,
-      [=](const int ix, const int iy) { 
+      [=] MDSPAN_FORCE_INLINE_FUNCTION (const int ix, const int iy) {
         u(ix, iy) *= u_ref / vmax * p_amp;
         v(ix, iy) *= u_ref / vmax * p_amp;
     });
@@ -202,11 +202,10 @@ class LBM2D : public Model {
 
     // Save values calculated by this ensemble member
     // Save simulation results without noises
-    std::string sim_result_name = "calc";
     auto rho = data_vars->rho();
     auto u = data_vars->u();
     auto v = data_vars->v();
-    save_to_files(sim_result_name, rho, u, v, it);
+    save_to_files("calc", rho, u, v, it);
 
     // Save noisy results
     if(is_reference_) {
@@ -344,7 +343,7 @@ class LBM2D : public Model {
     moment_type moments = {0, 0, 0, 0, 0, 0, 0, 0, 0};
 
     auto moment_kernel = 
-      [=](const int ix, const int iy) {
+      [=] MDSPAN_FORCE_INLINE_FUNCTION (const int ix, const int iy) {
         auto tmp_rho = rho(ix, iy);
         auto tmp_u   = u(ix, iy);
         auto tmp_v   = v(ix, iy);
@@ -372,10 +371,10 @@ class LBM2D : public Model {
         const double vel2 = tmp_u * tmp_u + tmp_v * tmp_v;
 
         return moment_type {momentum_x, momentum_y, energy, enstrophy, nus, mass, divu2, divu, vel2};
-    };
+      };
 
     auto sum_operator =
-      [=] (const moment_type& left, const moment_type& right) {
+      [=] MDSPAN_FORCE_INLINE_FUNCTION (const moment_type& left, const moment_type& right) {
         return moment_type {std::get<0>(left) + std::get<0>(right),
                             std::get<1>(left) + std::get<1>(right),
                             std::get<2>(left) + std::get<2>(right),
@@ -386,17 +385,16 @@ class LBM2D : public Model {
                             std::get<7>(left) + std::get<7>(right),
                             std::get<8>(left) + std::get<8>(right)
                            };
-    };
+      };
 
     Iterate_policy<2> policy2d({0, 0}, {nx, ny});
     Impl::transform_reduce(policy2d, sum_operator, moment_kernel, moments);
 
-    /* [FIX THIS] transform reduce to get multiple max elements does not work correctly???
-    using maximum_type = std::tuple<double, double, double>;
-    maximum_type maximums = {0, 0, 0};
+    using minmax_type = std::tuple<double, double, double, double>;
+    minmax_type minmaxs = {0, 0, 0, 10000};
     // Compute maximum
-    auto maximum_kernel = 
-      [=](const int ix, const int iy) {
+    auto minmax_kernel =
+      [=] MDSPAN_FORCE_INLINE_FUNCTION (const int ix, const int iy) {
         auto tmp_rho = rho(ix, iy);
         auto tmp_u   = u(ix, iy);
         auto tmp_v   = v(ix, iy);
@@ -415,60 +413,24 @@ class LBM2D : public Model {
         auto maxdivu = std::abs(ux + vy);
         auto maxvel2 = tmp_u * tmp_u + tmp_v * tmp_v;
 
-        return maximum_type {maxdivu, maxvel2, tmp_rho};
-    };
+        return minmax_type {maxdivu, maxvel2, tmp_rho, tmp_rho};
+      };
 
-    auto max_operator =
-      [=] (const maximum_type& left, const maximum_type& right) {
-        return maximum_type {std::max( std::get<0>(left), std::get<0>(right) ),
-                             std::max( std::get<1>(left), std::get<1>(right) ),
-                             std::max( std::get<2>(left), std::get<2>(right) )
-                            };
-    };
-    Impl::transform_reduce(policy2d, max_operator, maximum_kernel, maximums);
-
-    // Compute minimum
-    double rho_min = 9999; // some large number
-    auto minimum_kernel = 
-      [=](const int ix, const int iy) { return rho(ix, iy); };
-
-    auto min_operator =
-      [=] (const auto& left, const auto& right) { return std::min(left, right); };
-    Impl::transform_reduce(policy2d, min_operator, minimum_kernel, rho_min);
-    auto maxvel2 = std::get<0>(maximums);
-    auto maxdivu = std::get<1>(maximums);
-    auto rho_max = std::get<2>(maximums);
-    */
-
-    // To be removed
-    value_type maxdivu = 0;
-    value_type maxvel2 = 0;
-    value_type rho_max = 0;
-    value_type rho_min = 9999;
+    auto minmax_operator =
+      [=] MDSPAN_FORCE_INLINE_FUNCTION (const minmax_type& left, const minmax_type& right) {
+        return minmax_type {thrust::max( std::get<0>(left), std::get<0>(right) ),
+                            thrust::max( std::get<1>(left), std::get<1>(right) ),
+                            thrust::max( std::get<2>(left), std::get<2>(right) ),
+                            thrust::min( std::get<3>(left), std::get<3>(right) )
+                           };
+      };
+    Impl::transform_reduce(policy2d, minmax_operator, minmax_kernel, minmaxs);
 
-    for(int iy=0; iy<ny; iy++) {
-      for(int ix=0; ix<nx; ix++) {
-        auto tmp_rho = rho(ix, iy);
-        auto tmp_u   = u(ix, iy);
-        auto tmp_v   = v(ix, iy);
+    auto maxvel2 = std::get<0>(minmaxs);
+    auto maxdivu = std::get<1>(minmaxs);
+    auto rho_max = std::get<2>(minmaxs);
+    auto rho_min = std::get<3>(minmaxs);
 
-        // derivatives
-        const int ixp1 = periodic(ix+1, nx);
-        const int ixm1 = periodic(ix-1, nx);
-        const int iyp1 = periodic(iy+1, ny);
-        const int iym1 = periodic(iy-1, ny);
-
-        const value_type ux = (u(ixp1, iy) - u(ixm1, iy)) / (2*dx);
-        const value_type uy = (u(ix, iyp1) - u(ix, iym1)) / (2*dx);
-        const value_type vx = (v(ixp1, iy) - v(ixm1, iy)) / (2*dx);
-        const value_type vy = (v(ix, iyp1) - v(ix, iym1)) / (2*dx);
-
-        maxdivu = std::max(maxdivu, std::abs(ux + vy));
-        maxvel2 = std::max(maxvel2, tmp_u * tmp_u + tmp_v * tmp_v);
-        rho_max = std::max(rho_max, tmp_rho);
-        rho_min = std::min(rho_min, tmp_rho);
-      }
-    }
     auto momentum_x_total = std::get<0>(moments) / (nx * ny);
     auto momentum_y_total = std::get<1>(moments) / (nx * ny);
     auto energy           = std::get<2>(moments) / (nx * ny);
@@ -517,7 +479,7 @@ class LBM2D : public Model {
 
     Iterate_policy<2> policy2d({0, 0}, {nx, ny});
     Impl::for_each(policy2d, 
-      [=](const int ix, const int iy) {
+      [=] MDSPAN_FORCE_INLINE_FUNCTION (const int ix, const int iy) {
         noisy_value_tmp(ix, iy) = value_tmp(ix, iy) + error * noise_tmp(ix, iy);
       });
   }
@@ -549,10 +511,8 @@ class LBM2D : public Model {
   void to_file(std::string case_name, ViewType& value, const int it) {
     auto dir_name = directory_names_.at(case_name);
     value.updateSelf();
-
-    std::string file_name = dir_name + "/" + value.name() + "_step"
-                          + Impl::zfill(it, 10) + ".dat";
-    Impl::to_binary(file_name, value.mdspan());
+    std::string file_name = dir_name + "/" + value.name() + "_step" + Impl::zfill(it, 10) + ".dat";
+    Impl::to_binary(file_name, value.host_mdspan());
   }
 };