From 4b4073a0da13e0ff5a25a14119c0329d4200344c Mon Sep 17 00:00:00 2001 From: Yuuichi Asahi Date: Tue, 8 Aug 2023 15:03:38 +0900 Subject: [PATCH 1/4] [Bugfix] thrust vesion of heat3d-mpi and lbm2d-letkf --- lib/utils/device_utils.hpp | 44 +++++++++ mini-apps/heat3d-mpi/thrust/heat3D.hpp | 2 + mini-apps/lbm2d-letkf/config.hpp | 1 + mini-apps/lbm2d-letkf/thrust/da_models.hpp | 17 +++- mini-apps/lbm2d-letkf/thrust/force.hpp | 21 +++-- mini-apps/lbm2d-letkf/thrust/lbm2d.hpp | 100 +++++++-------------- mini-apps/lbm2d-letkf/thrust/solver.hpp | 10 ++- mini-apps/lbm2d-letkf/thrust/types.hpp | 4 +- 8 files changed, 117 insertions(+), 82 deletions(-) create mode 100644 lib/utils/device_utils.hpp diff --git a/lib/utils/device_utils.hpp b/lib/utils/device_utils.hpp new file mode 100644 index 0000000..617b6d6 --- /dev/null +++ b/lib/utils/device_utils.hpp @@ -0,0 +1,44 @@ +#ifndef __DEVICE_UTILS_HPP__ +#define __DEVICE_UTILS_HPP__ + +#include + +namespace Impl { + #if defined(_NVHPC_CUDA) || defined(__CUDACC__) + inline void synchronize() { + cudaDeviceSynchronize(); + } + + inline void setDevice(int rank) { + int count; + int id; + + cudaGetDeviceCount(&count); + cudaSetDevice(rank % count); + cudaGetDevice(&id); + printf("Process%d running on GPU%d\n", rank, id); + } + #elif defined(__HIPCC__) + #include + inline void synchronize() { + [[maybe_unused]] hipError_t err = hipDeviceSynchronize(); + } + + inline void setDevice(int rank) { + int count; + int id; + hipError_t err; + + err = hipGetDeviceCount(&count); + err = hipSetDevice(rank % count); + err = hipGetDevice(&id); + printf("Process%d running on GPU%d\n", rank, id); + } + + #else + inline void synchronize() {} + inline void setDevice(int rank) {} + #endif +}; + +#endif diff --git a/mini-apps/heat3d-mpi/thrust/heat3D.hpp b/mini-apps/heat3d-mpi/thrust/heat3D.hpp index 3404d1a..923bd37 100644 --- a/mini-apps/heat3d-mpi/thrust/heat3D.hpp +++ b/mini-apps/heat3d-mpi/thrust/heat3D.hpp @@ -74,6 +74,8 @@ void solve(const Config& conf, heat3d_functor(conf, x_mask, y_mask, z_mask, u, un)); timers[Heat]->end(); + std::swap(u, un); + timers[MainLoop]->end(); } } diff --git a/mini-apps/lbm2d-letkf/config.hpp b/mini-apps/lbm2d-letkf/config.hpp index 43d33b0..c2d2724 100644 --- a/mini-apps/lbm2d-letkf/config.hpp +++ b/mini-apps/lbm2d-letkf/config.hpp @@ -71,6 +71,7 @@ struct Settings { bool is_async_ = false; // In order to enable overlapping, in senders/receivers version of letkf bool is_bcast_on_host_ = false; // broadcast on device or host bool use_time_stamps_ = false; // for detailed analysis + bool disable_output_ = false; // for performance measurements double ly_epsilon_ = 1.e-8; // data assimilation parameter diff --git a/mini-apps/lbm2d-letkf/thrust/da_models.hpp b/mini-apps/lbm2d-letkf/thrust/da_models.hpp index 81a9edb..9cefb9a 100644 --- a/mini-apps/lbm2d-letkf/thrust/da_models.hpp +++ b/mini-apps/lbm2d-letkf/thrust/da_models.hpp @@ -38,9 +38,9 @@ class DA_Model { int nb_expected_files = conf_.settings_.nbiter_ / conf_.settings_.io_interval_; std::string variables[3] = {"rho", "u", "v"}; for(int it=0; itv_obs(), it); } + void load(std::unique_ptr& data_vars, const std::string variable, const int it) { + if(variable == "rho") { + from_file(data_vars->rho_obs(), it); + } else if(variable == "u") { + from_file(data_vars->u_obs(), it); + } else if(variable == "v") { + from_file(data_vars->v_obs(), it); + } + } + private: template void from_file(ViewType& value, const int step) { auto file_name = base_dir_name_ + "/" + value.name() + "_step" + Impl::zfill(step, 10) + ".dat"; - auto mdspan = value.mdspan(); + auto mdspan = value.host_mdspan(); Impl::from_binary(file_name, mdspan); + value.updateDevice(); } }; diff --git a/mini-apps/lbm2d-letkf/thrust/force.hpp b/mini-apps/lbm2d-letkf/thrust/force.hpp index add40bc..2f5c790 100644 --- a/mini-apps/lbm2d-letkf/thrust/force.hpp +++ b/mini-apps/lbm2d-letkf/thrust/force.hpp @@ -53,11 +53,11 @@ struct Force { const auto x = x_.mdspan(); const auto y = y_.mdspan(); const auto rand_pool = rand_pool_.mdspan(); - //const auto sub_rand_pool = stdex::submdspan(rand_pool, std::full_extent_t, std::full_extent_t, shift); + const auto sub_rand_pool = stdex::submdspan(rand_pool, std::full_extent, std::full_extent, shift); auto fx = fx_.mdspan(); auto fy = fy_.mdspan(); - auto force_lambda = [=](const int ix, const int iy) { + auto force_lambda = [=] MDSPAN_FORCE_INLINE_FUNCTION (const int ix, const int iy) { const auto x_tmp = x(ix); const auto y_tmp = y(iy); value_type fx_tmp = 0.0, fy_tmp = 0.0; @@ -68,10 +68,10 @@ struct Force { const auto sine = sin(theta); const auto cosi = cos(theta); const value_type r[4] = { - rand_pool(n, 0, shift), - rand_pool(n, 1, shift), - rand_pool(n, 2, shift), - rand_pool(n, 3, shift), + sub_rand_pool(n, 0), + sub_rand_pool(n, 1), + sub_rand_pool(n, 2), + sub_rand_pool(n, 3) }; const auto amp_tmp = amp(n); @@ -183,6 +183,15 @@ struct Force { amp_(i) = force_amp.at(i); } + // deep copy to devices + kx_.updateDevice(); + ky_.updateDevice(); + amp_.updateDevice(); + x_.updateDevice(); + y_.updateDevice(); + rand_pool_.updateDevice(); + fx_.updateDevice(); + fy_.updateDevice(); } }; diff --git a/mini-apps/lbm2d-letkf/thrust/lbm2d.hpp b/mini-apps/lbm2d-letkf/thrust/lbm2d.hpp index bead738..db48def 100644 --- a/mini-apps/lbm2d-letkf/thrust/lbm2d.hpp +++ b/mini-apps/lbm2d-letkf/thrust/lbm2d.hpp @@ -95,7 +95,7 @@ class LBM2D : public Model { const auto _theta = theta.mdspan(); value_type rho_ref = static_cast(conf_.phys_.rho_ref_); - auto init_fluid_moments = [=](const int ix, const int iy) { + auto init_fluid_moments = [=] MDSPAN_FORCE_INLINE_FUNCTION (const int ix, const int iy) { // fluid value_type u_tmp = 0.0; value_type v_tmp = 0.0; @@ -126,11 +126,11 @@ class LBM2D : public Model { }; auto max_operator = - [=](const auto& lhs, const auto& rhs) { return std::max(lhs, rhs); }; + [=] MDSPAN_FORCE_INLINE_FUNCTION (const auto& lhs, const auto& rhs) { return std::max(lhs, rhs); }; Impl::transform_reduce(policy2d, max_operator, max_speed, vmax); Impl::for_each(policy2d, - [=](const int ix, const int iy) { + [=] MDSPAN_FORCE_INLINE_FUNCTION (const int ix, const int iy) { u(ix, iy) *= u_ref / vmax * p_amp; v(ix, iy) *= u_ref / vmax * p_amp; }); @@ -202,11 +202,10 @@ class LBM2D : public Model { // Save values calculated by this ensemble member // Save simulation results without noises - std::string sim_result_name = "calc"; auto rho = data_vars->rho(); auto u = data_vars->u(); auto v = data_vars->v(); - save_to_files(sim_result_name, rho, u, v, it); + save_to_files("calc", rho, u, v, it); // Save noisy results if(is_reference_) { @@ -344,7 +343,7 @@ class LBM2D : public Model { moment_type moments = {0, 0, 0, 0, 0, 0, 0, 0, 0}; auto moment_kernel = - [=](const int ix, const int iy) { + [=] MDSPAN_FORCE_INLINE_FUNCTION (const int ix, const int iy) { auto tmp_rho = rho(ix, iy); auto tmp_u = u(ix, iy); auto tmp_v = v(ix, iy); @@ -372,10 +371,10 @@ class LBM2D : public Model { const double vel2 = tmp_u * tmp_u + tmp_v * tmp_v; return moment_type {momentum_x, momentum_y, energy, enstrophy, nus, mass, divu2, divu, vel2}; - }; + }; auto sum_operator = - [=] (const moment_type& left, const moment_type& right) { + [=] MDSPAN_FORCE_INLINE_FUNCTION (const moment_type& left, const moment_type& right) { return moment_type {std::get<0>(left) + std::get<0>(right), std::get<1>(left) + std::get<1>(right), std::get<2>(left) + std::get<2>(right), @@ -386,17 +385,16 @@ class LBM2D : public Model { std::get<7>(left) + std::get<7>(right), std::get<8>(left) + std::get<8>(right) }; - }; + }; Iterate_policy<2> policy2d({0, 0}, {nx, ny}); Impl::transform_reduce(policy2d, sum_operator, moment_kernel, moments); - /* [FIX THIS] transform reduce to get multiple max elements does not work correctly??? - using maximum_type = std::tuple; - maximum_type maximums = {0, 0, 0}; + using minmax_type = std::tuple; + minmax_type minmaxs = {0, 0, 0, 10000}; // Compute maximum - auto maximum_kernel = - [=](const int ix, const int iy) { + auto minmax_kernel = + [=] MDSPAN_FORCE_INLINE_FUNCTION (const int ix, const int iy) { auto tmp_rho = rho(ix, iy); auto tmp_u = u(ix, iy); auto tmp_v = v(ix, iy); @@ -415,60 +413,24 @@ class LBM2D : public Model { auto maxdivu = std::abs(ux + vy); auto maxvel2 = tmp_u * tmp_u + tmp_v * tmp_v; - return maximum_type {maxdivu, maxvel2, tmp_rho}; - }; + return minmax_type {maxdivu, maxvel2, tmp_rho, tmp_rho}; + }; - auto max_operator = - [=] (const maximum_type& left, const maximum_type& right) { - return maximum_type {std::max( std::get<0>(left), std::get<0>(right) ), - std::max( std::get<1>(left), std::get<1>(right) ), - std::max( std::get<2>(left), std::get<2>(right) ) - }; - }; - Impl::transform_reduce(policy2d, max_operator, maximum_kernel, maximums); - - // Compute minimum - double rho_min = 9999; // some large number - auto minimum_kernel = - [=](const int ix, const int iy) { return rho(ix, iy); }; - - auto min_operator = - [=] (const auto& left, const auto& right) { return std::min(left, right); }; - Impl::transform_reduce(policy2d, min_operator, minimum_kernel, rho_min); - auto maxvel2 = std::get<0>(maximums); - auto maxdivu = std::get<1>(maximums); - auto rho_max = std::get<2>(maximums); - */ - - // To be removed - value_type maxdivu = 0; - value_type maxvel2 = 0; - value_type rho_max = 0; - value_type rho_min = 9999; + auto minmax_operator = + [=] MDSPAN_FORCE_INLINE_FUNCTION (const minmax_type& left, const minmax_type& right) { + return minmax_type {thrust::max( std::get<0>(left), std::get<0>(right) ), + thrust::max( std::get<1>(left), std::get<1>(right) ), + thrust::max( std::get<2>(left), std::get<2>(right) ), + thrust::min( std::get<3>(left), std::get<3>(right) ) + }; + }; + Impl::transform_reduce(policy2d, minmax_operator, minmax_kernel, minmaxs); - for(int iy=0; iy(minmaxs); + auto maxdivu = std::get<1>(minmaxs); + auto rho_max = std::get<2>(minmaxs); + auto rho_min = std::get<3>(minmaxs); - // derivatives - const int ixp1 = periodic(ix+1, nx); - const int ixm1 = periodic(ix-1, nx); - const int iyp1 = periodic(iy+1, ny); - const int iym1 = periodic(iy-1, ny); - - const value_type ux = (u(ixp1, iy) - u(ixm1, iy)) / (2*dx); - const value_type uy = (u(ix, iyp1) - u(ix, iym1)) / (2*dx); - const value_type vx = (v(ixp1, iy) - v(ixm1, iy)) / (2*dx); - const value_type vy = (v(ix, iyp1) - v(ix, iym1)) / (2*dx); - - maxdivu = std::max(maxdivu, std::abs(ux + vy)); - maxvel2 = std::max(maxvel2, tmp_u * tmp_u + tmp_v * tmp_v); - rho_max = std::max(rho_max, tmp_rho); - rho_min = std::min(rho_min, tmp_rho); - } - } auto momentum_x_total = std::get<0>(moments) / (nx * ny); auto momentum_y_total = std::get<1>(moments) / (nx * ny); auto energy = std::get<2>(moments) / (nx * ny); @@ -517,7 +479,7 @@ class LBM2D : public Model { Iterate_policy<2> policy2d({0, 0}, {nx, ny}); Impl::for_each(policy2d, - [=](const int ix, const int iy) { + [=] MDSPAN_FORCE_INLINE_FUNCTION (const int ix, const int iy) { noisy_value_tmp(ix, iy) = value_tmp(ix, iy) + error * noise_tmp(ix, iy); }); } @@ -549,10 +511,8 @@ class LBM2D : public Model { void to_file(std::string case_name, ViewType& value, const int it) { auto dir_name = directory_names_.at(case_name); value.updateSelf(); - - std::string file_name = dir_name + "/" + value.name() + "_step" - + Impl::zfill(it, 10) + ".dat"; - Impl::to_binary(file_name, value.mdspan()); + std::string file_name = dir_name + "/" + value.name() + "_step" + Impl::zfill(it, 10) + ".dat"; + Impl::to_binary(file_name, value.host_mdspan()); } }; diff --git a/mini-apps/lbm2d-letkf/thrust/solver.hpp b/mini-apps/lbm2d-letkf/thrust/solver.hpp index 0a2100e..7b96904 100644 --- a/mini-apps/lbm2d-letkf/thrust/solver.hpp +++ b/mini-apps/lbm2d-letkf/thrust/solver.hpp @@ -7,6 +7,7 @@ #include #include #include +#include #include "../timer.hpp" #include "../config.hpp" #include "../io_config.hpp" @@ -35,6 +36,7 @@ class Solver { // Initialize MPI mpi_conf_.initialize(argc, argv); + Impl::setDevice( mpi_conf_.rank() ); // Initialize Configuration from the input json file initialize_conf(filename, conf_); @@ -73,7 +75,9 @@ class Solver { timers_[TimerEnum::MainLoop]->begin(); da_model_->apply(data_vars_, it, timers_); - model_->diag(data_vars_, it, timers_); + if(!conf_.settings_.disable_output_) { + model_->diag(data_vars_, it, timers_); + } timers_[TimerEnum::LBMSolver]->begin(); model_->solve(data_vars_); @@ -161,6 +165,10 @@ class Solver { conf_.settings_.use_time_stamps_ = json_data["Settings"]["use_time_stamps"].get(); } + if(json_data["Settings"].contains("disable_output") ) { + conf_.settings_.disable_output_ = json_data["Settings"]["disable_output"].get(); + } + // IO settings io_conf_.base_dir_ = json_data["Settings"]["base_dir"].get(); io_conf_.case_name_ = json_data["Settings"]["case_name"].get(); diff --git a/mini-apps/lbm2d-letkf/thrust/types.hpp b/mini-apps/lbm2d-letkf/thrust/types.hpp index d729316..9c57f26 100644 --- a/mini-apps/lbm2d-letkf/thrust/types.hpp +++ b/mini-apps/lbm2d-letkf/thrust/types.hpp @@ -4,11 +4,11 @@ #include #include #include -#include +#include namespace stdex = std::experimental; -#if defined(_NVHPC_CUDA) || defined(__CUDACC__) +#if defined(_NVHPC_CUDA) || defined(__CUDACC__) || defined(__HIPCC__) #include #define SIMD_LOOP #define SIMD_WIDTH 1 From 6e8045fde1af697e4ee66cb42dba770e98463fb0 Mon Sep 17 00:00:00 2001 From: Yuuichi Asahi Date: Tue, 8 Aug 2023 15:04:05 +0900 Subject: [PATCH 2/4] add performance measurement mode --- mini-apps/lbm2d-letkf/executors/solver.hpp | 8 +++++++- mini-apps/lbm2d-letkf/stdpar/solver.hpp | 8 +++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/mini-apps/lbm2d-letkf/executors/solver.hpp b/mini-apps/lbm2d-letkf/executors/solver.hpp index 5673ead..4418531 100644 --- a/mini-apps/lbm2d-letkf/executors/solver.hpp +++ b/mini-apps/lbm2d-letkf/executors/solver.hpp @@ -73,7 +73,9 @@ class Solver { timers_[TimerEnum::MainLoop]->begin(); da_model_->apply(data_vars_, it, timers_); - model_->diag(data_vars_, it, timers_); + if(!conf_.settings_.disable_output_) { + model_->diag(data_vars_, it, timers_); + } timers_[TimerEnum::LBMSolver]->begin(); model_->solve(data_vars_); @@ -169,6 +171,10 @@ class Solver { conf_.settings_.use_time_stamps_ = json_data["Settings"]["use_time_stamps"].get(); } + if(json_data["Settings"].contains("disable_output") ) { + conf_.settings_.disable_output_ = json_data["Settings"]["disable_output"].get(); + } + // IO settings io_conf_.base_dir_ = json_data["Settings"]["base_dir"].get(); io_conf_.case_name_ = json_data["Settings"]["case_name"].get(); diff --git a/mini-apps/lbm2d-letkf/stdpar/solver.hpp b/mini-apps/lbm2d-letkf/stdpar/solver.hpp index 0a2100e..6301892 100644 --- a/mini-apps/lbm2d-letkf/stdpar/solver.hpp +++ b/mini-apps/lbm2d-letkf/stdpar/solver.hpp @@ -73,7 +73,9 @@ class Solver { timers_[TimerEnum::MainLoop]->begin(); da_model_->apply(data_vars_, it, timers_); - model_->diag(data_vars_, it, timers_); + if(!conf_.settings_.disable_output_) { + model_->diag(data_vars_, it, timers_); + } timers_[TimerEnum::LBMSolver]->begin(); model_->solve(data_vars_); @@ -161,6 +163,10 @@ class Solver { conf_.settings_.use_time_stamps_ = json_data["Settings"]["use_time_stamps"].get(); } + if(json_data["Settings"].contains("disable_output") ) { + conf_.settings_.disable_output_ = json_data["Settings"]["disable_output"].get(); + } + // IO settings io_conf_.base_dir_ = json_data["Settings"]["base_dir"].get(); io_conf_.case_name_ = json_data["Settings"]["case_name"].get(); From 78b2662f3475801cbf9786ff163198d414a33108 Mon Sep 17 00:00:00 2001 From: Yuuichi Asahi Date: Tue, 8 Aug 2023 15:04:51 +0900 Subject: [PATCH 3/4] update default base_dir in input json files --- wk/letkf.json | 2 +- wk/letkf_256.json | 2 +- wk/letkf_256_time.json | 2 +- wk/letkf_512.json | 2 +- wk/letkf_512_time.json | 2 +- wk/letkf_async_256.json | 2 +- wk/letkf_async_256_time.json | 2 +- wk/letkf_async_512.json | 2 +- wk/letkf_async_512_time.json | 2 +- wk/nature.json | 2 +- wk/nature_256.json | 2 +- wk/nature_512.json | 2 +- wk/no_da.json | 2 +- wk/no_da_256.json | 2 +- wk/nudging.json | 2 +- wk/nudging_256.json | 2 +- 16 files changed, 16 insertions(+), 16 deletions(-) diff --git a/wk/letkf.json b/wk/letkf.json index dc41946..b251d0d 100644 --- a/wk/letkf.json +++ b/wk/letkf.json @@ -13,7 +13,7 @@ "obs_error_u": 0.1 }, "Settings": { - "base_dir": "/work/03/jh220030a/i18048/2023P3HPC/executor_testing/wk", + "base_dir": "./", "sim_type": "letkf", "case_name": "letkf", "in_case_name": "nature", diff --git a/wk/letkf_256.json b/wk/letkf_256.json index 6b0d6a1..e6b5e9f 100644 --- a/wk/letkf_256.json +++ b/wk/letkf_256.json @@ -13,7 +13,7 @@ "obs_error_u": 0.1 }, "Settings": { - "base_dir": "/work/03/jh220030a/i18048/2023P3HPC/executor_testing/wk", + "base_dir": "./", "sim_type": "letkf", "case_name": "letkf256", "in_case_name": "nature256", diff --git a/wk/letkf_256_time.json b/wk/letkf_256_time.json index aad6785..4765f46 100644 --- a/wk/letkf_256_time.json +++ b/wk/letkf_256_time.json @@ -13,7 +13,7 @@ "obs_error_u": 0.1 }, "Settings": { - "base_dir": "/work/03/jh220030a/i18048/2023P3HPC/executor_testing/wk", + "base_dir": "./", "sim_type": "letkf", "case_name": "letkf256", "in_case_name": "nature256", diff --git a/wk/letkf_512.json b/wk/letkf_512.json index cf9d309..b475b1b 100644 --- a/wk/letkf_512.json +++ b/wk/letkf_512.json @@ -13,7 +13,7 @@ "obs_error_u": 0.1 }, "Settings": { - "base_dir": "/work/03/jh220030a/i18048/2023P3HPC/executor_testing/wk", + "base_dir": "./", "sim_type": "letkf", "case_name": "letkf512", "in_case_name": "nature512", diff --git a/wk/letkf_512_time.json b/wk/letkf_512_time.json index d3d3eb3..51f7044 100644 --- a/wk/letkf_512_time.json +++ b/wk/letkf_512_time.json @@ -13,7 +13,7 @@ "obs_error_u": 0.1 }, "Settings": { - "base_dir": "/work/03/jh220030a/i18048/2023P3HPC/executor_testing/wk", + "base_dir": "./", "sim_type": "letkf", "case_name": "letkf512", "in_case_name": "nature512", diff --git a/wk/letkf_async_256.json b/wk/letkf_async_256.json index 8cd66fa..001cbfc 100644 --- a/wk/letkf_async_256.json +++ b/wk/letkf_async_256.json @@ -13,7 +13,7 @@ "obs_error_u": 0.1 }, "Settings": { - "base_dir": "/work/03/jh220030a/i18048/2023P3HPC/executor_testing/wk", + "base_dir": "./", "sim_type": "letkf", "case_name": "letkf256", "in_case_name": "nature256", diff --git a/wk/letkf_async_256_time.json b/wk/letkf_async_256_time.json index 5152cdc..2857666 100644 --- a/wk/letkf_async_256_time.json +++ b/wk/letkf_async_256_time.json @@ -13,7 +13,7 @@ "obs_error_u": 0.1 }, "Settings": { - "base_dir": "/work/03/jh220030a/i18048/2023P3HPC/executor_testing/wk", + "base_dir": "./", "sim_type": "letkf", "case_name": "letkf_async256", "in_case_name": "nature256", diff --git a/wk/letkf_async_512.json b/wk/letkf_async_512.json index 8b99959..aabeb76 100644 --- a/wk/letkf_async_512.json +++ b/wk/letkf_async_512.json @@ -13,7 +13,7 @@ "obs_error_u": 0.1 }, "Settings": { - "base_dir": "/work/03/jh220030a/i18048/2023P3HPC/executor_testing/wk", + "base_dir": "./", "sim_type": "letkf", "case_name": "letkf_async512", "in_case_name": "nature512", diff --git a/wk/letkf_async_512_time.json b/wk/letkf_async_512_time.json index bcad6bc..ab251e0 100644 --- a/wk/letkf_async_512_time.json +++ b/wk/letkf_async_512_time.json @@ -13,7 +13,7 @@ "obs_error_u": 0.1 }, "Settings": { - "base_dir": "/work/03/jh220030a/i18048/2023P3HPC/executor_testing/wk", + "base_dir": "./", "sim_type": "letkf", "case_name": "letkf_async512", "in_case_name": "nature512", diff --git a/wk/nature.json b/wk/nature.json index da721de..b935208 100644 --- a/wk/nature.json +++ b/wk/nature.json @@ -13,7 +13,7 @@ "obs_error_u": 0.1 }, "Settings": { - "base_dir": "/work/03/jh220030a/i18048/2023P3HPC/executor_testing/wk", + "base_dir": "./", "sim_type": "nature", "case_name": "nature", "nx": 512, diff --git a/wk/nature_256.json b/wk/nature_256.json index e1bba44..7a9581a 100644 --- a/wk/nature_256.json +++ b/wk/nature_256.json @@ -13,7 +13,7 @@ "obs_error_u": 0.1 }, "Settings": { - "base_dir": "/work/03/jh220030a/i18048/2023P3HPC/executor_testing/wk", + "base_dir": "./", "sim_type": "nature", "case_name": "nature256", "nx": 256, diff --git a/wk/nature_512.json b/wk/nature_512.json index 3fa87dd..04dd4c8 100644 --- a/wk/nature_512.json +++ b/wk/nature_512.json @@ -13,7 +13,7 @@ "obs_error_u": 0.1 }, "Settings": { - "base_dir": "/work/03/jh220030a/i18048/2023P3HPC/executor_testing/wk", + "base_dir": "./", "sim_type": "nature", "case_name": "nature512", "nx": 512, diff --git a/wk/no_da.json b/wk/no_da.json index 12e69db..8a083fc 100644 --- a/wk/no_da.json +++ b/wk/no_da.json @@ -13,7 +13,7 @@ "obs_error_u": 0.1 }, "Settings": { - "base_dir": "/work/03/jh220030a/i18048/2023P3HPC/executor_testing/wk", + "base_dir": "./", "sim_type": "no_da", "case_name": "no_da", "in_case_name": "nature", diff --git a/wk/no_da_256.json b/wk/no_da_256.json index 3d6fade..5bdb4cb 100644 --- a/wk/no_da_256.json +++ b/wk/no_da_256.json @@ -13,7 +13,7 @@ "obs_error_u": 0.1 }, "Settings": { - "base_dir": "/work/03/jh220030a/i18048/2023P3HPC/executor_testing/wk", + "base_dir": "./", "sim_type": "no_da", "case_name": "no_da256", "in_case_name": "nature256", diff --git a/wk/nudging.json b/wk/nudging.json index f1dd0d3..a814faa 100644 --- a/wk/nudging.json +++ b/wk/nudging.json @@ -13,7 +13,7 @@ "obs_error_u": 0.1 }, "Settings": { - "base_dir": "/work/03/jh220030a/i18048/2023P3HPC/executor_testing/wk", + "base_dir": "./", "sim_type": "nudging", "case_name": "nudging", "in_case_name": "nature", diff --git a/wk/nudging_256.json b/wk/nudging_256.json index eb7e8d1..979ec90 100644 --- a/wk/nudging_256.json +++ b/wk/nudging_256.json @@ -13,7 +13,7 @@ "obs_error_u": 0.1 }, "Settings": { - "base_dir": "/work/03/jh220030a/i18048/2023P3HPC/executor_testing/wk", + "base_dir": "./", "sim_type": "nudging", "case_name": "nudging256", "in_case_name": "nature256", From 15d6fae0acc659f60a462ed5aaf1ed405a10fc6e Mon Sep 17 00:00:00 2001 From: Yuuichi Asahi Date: Tue, 8 Aug 2023 15:05:58 +0900 Subject: [PATCH 4/4] Update job scripts --- wk/sub_executors_heat3d_mpi_A100.sh | 3 ++ wk/sub_thrust_lbm2d_letkf_time_stamps_A100.sh | 54 +++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 wk/sub_thrust_lbm2d_letkf_time_stamps_A100.sh diff --git a/wk/sub_executors_heat3d_mpi_A100.sh b/wk/sub_executors_heat3d_mpi_A100.sh index 279a6e4..5ba1244 100644 --- a/wk/sub_executors_heat3d_mpi_A100.sh +++ b/wk/sub_executors_heat3d_mpi_A100.sh @@ -35,5 +35,8 @@ export UCX_MEMTYPE_CACHE=n export UCX_IB_GPU_DIRECT_RDMA=no export UCX_RNDV_FRAG_MEM_TYPE=cuda +mpiexec -machinefile $PJM_O_NODEINF -np $PJM_MPI_PROC -npernode 2 \ + ./wrapper.sh ../build/mini-apps/heat3d-mpi/thrust/heat3d-mpi-thrust --px 1 --py 1 --pz 2 --nx 512 --ny 512 --nz 256 --nbiter 1000 --freq_diag 0 + mpiexec -machinefile $PJM_O_NODEINF -np $PJM_MPI_PROC -npernode 2 \ ./wrapper.sh ../build/mini-apps/heat3d-mpi/executors/heat3d-mpi-executors --px 1 --py 1 --pz 2 --nx 512 --ny 512 --nz 256 --nbiter 1000 --freq_diag 0 diff --git a/wk/sub_thrust_lbm2d_letkf_time_stamps_A100.sh b/wk/sub_thrust_lbm2d_letkf_time_stamps_A100.sh new file mode 100644 index 0000000..6309d23 --- /dev/null +++ b/wk/sub_thrust_lbm2d_letkf_time_stamps_A100.sh @@ -0,0 +1,54 @@ +#!/bin/bash +#PJM -L "node=1" +#PJM -L "rscgrp=regular-a" +#PJM -L "elapse=60:00" +#PJM -s +#PJM -g jh220031a +#PJM --mpi proc=4 + +. /etc/profile.d/modules.sh # Initialize module command + +module purge + +# Load spack +export HOME=/work/jh220031a/i18048 +. $HOME/spack/share/spack/setup-env.sh + +spack load gcc@11.3.0 +spack load cmake@3.24.3%gcc@8.3.1 +module load /work/04/jh220031a/i18048/lib/nvidia/hpc_sdk23.3/modulefiles/nvhpc/23.3 +module list + +# Need GPUs to build the code appropriately +# So compile inside a batch job, wherein GPUs are visible +if [ ! -d "../build" ] +then + cd ../ + rm -rf build + mkdir build && cd build + cmake -DCMAKE_CXX_COMPILER=nvc++ -DBACKEND=CUDA .. + cmake --build . -j 8 + cd ../wk/ +fi + +export UCX_MEMTYPE_CACHE=n +export UCX_IB_GPU_DIRECT_RDMA=no +export UCX_RNDV_FRAG_MEM_TYPE=cuda + +mpiexec -machinefile $PJM_O_NODEINF -np 1 -npernode 1 \ + ../build/mini-apps/lbm2d-letkf/thrust/lbm2d-letkf-thrust --filename nature_256.json + +mpiexec -machinefile $PJM_O_NODEINF -np $PJM_MPI_PROC -npernode 4 \ + ../build/mini-apps/lbm2d-letkf/thrust/lbm2d-letkf-thrust --filename letkf_256.json + +###mpiexec -machinefile $PJM_O_NODEINF -np $PJM_MPI_PROC -npernode 4 \ +### ./wrapper.sh ../build/mini-apps/lbm2d-letkf/thrust/lbm2d-letkf-thrust --filename letkf_256_time.json +### +###mpiexec -machinefile $PJM_O_NODEINF -np 1 -npernode 1 \ +### ../build/mini-apps/lbm2d-letkf/thrust/lbm2d-letkf-thrust --filename nature_512.json +### +###mpiexec -machinefile $PJM_O_NODEINF -np $PJM_MPI_PROC -npernode 4 \ +### ./wrapper.sh ../build/mini-apps/lbm2d-letkf/thrust/lbm2d-letkf-thrust --filename letkf_512.json +### +###mpiexec -machinefile $PJM_O_NODEINF -np $PJM_MPI_PROC -npernode 4 \ +### ./wrapper.sh ../build/mini-apps/lbm2d-letkf/thrust/lbm2d-letkf-thrust --filename letkf_512_time.json