From 873e5801d7917726479c0376074d0b9169b12aff Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 20 Feb 2024 15:13:51 -0500 Subject: [PATCH 01/29] Add a __device__ function version of PCM Added a version of PCM that can be called as a device function from within a Riemann solver --- src/reconstruction/pcm_cuda.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/reconstruction/pcm_cuda.h b/src/reconstruction/pcm_cuda.h index 4a6585922..a5a6759ad 100644 --- a/src/reconstruction/pcm_cuda.h +++ b/src/reconstruction/pcm_cuda.h @@ -7,6 +7,9 @@ #include "../utils/basic_structs.h" #include "../utils/hydro_utilities.h" +#include "../utils/basic_structs.h" +#include "../utils/hydro_utilities.h" + namespace reconstruction { /*! From 423ca527aa9a3ab54789722d8544b8fd68225275 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 22 Feb 2024 15:49:50 -0500 Subject: [PATCH 02/29] Fuse PCM reconstruction and HLLC solver kernels --- src/integrators/VL_3D_cuda.cu | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/src/integrators/VL_3D_cuda.cu b/src/integrators/VL_3D_cuda.cu index c2ea14d42..f3674a890 100644 --- a/src/integrators/VL_3D_cuda.cu +++ b/src/integrators/VL_3D_cuda.cu @@ -312,17 +312,14 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int cuda_utilities::AutomaticLaunchParams static const hllc_higher_order_launch_params( Calculate_HLLC_Fluxes_CUDA, n_cells); hipLaunchKernelGGL(HIP_KERNEL_NAME(Calculate_HLLC_Fluxes_CUDA), - hllc_higher_order_launch_params.get_numBlocks(), - hllc_higher_order_launch_params.get_threadsPerBlock(), 0, 0, dev_conserved_half, Q_Lx, Q_Rx, F_x, - nx, ny, nz, n_cells, gama, n_fields); + hllc_higher_order_launch_params.numBlocks(), hllc_higher_order_launch_params.threadsPerBlock(), 0, 0, + dev_conserved_half, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_cells, gama, n_fields); hipLaunchKernelGGL(HIP_KERNEL_NAME(Calculate_HLLC_Fluxes_CUDA), - hllc_higher_order_launch_params.get_numBlocks(), - hllc_higher_order_launch_params.get_threadsPerBlock(), 0, 0, dev_conserved_half, Q_Ly, Q_Ry, F_y, - nx, ny, nz, n_cells, gama, n_fields); + hllc_higher_order_launch_params.numBlocks(), hllc_higher_order_launch_params.threadsPerBlock(), 0, 0, + dev_conserved_half, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_cells, gama, n_fields); hipLaunchKernelGGL(HIP_KERNEL_NAME(Calculate_HLLC_Fluxes_CUDA), - hllc_higher_order_launch_params.get_numBlocks(), - hllc_higher_order_launch_params.get_threadsPerBlock(), 0, 0, dev_conserved_half, Q_Lz, Q_Rz, F_z, - nx, ny, nz, n_cells, gama, n_fields); + hllc_higher_order_launch_params.numBlocks(), hllc_higher_order_launch_params.threadsPerBlock(), 0, 0, + dev_conserved_half, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_cells, gama, n_fields); #endif // HLLC #ifdef HLL cuda_utilities::AutomaticLaunchParams static const hll_higher_order_launch_params( From a8e31462c456d61781016ee8d12f66f89e5c9ab9 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 26 Feb 2024 15:19:21 -0500 Subject: [PATCH 03/29] Fuse PCM reconstruction and Roe solver kernels --- src/integrators/VL_3D_cuda.cu | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/src/integrators/VL_3D_cuda.cu b/src/integrators/VL_3D_cuda.cu index f3674a890..3da9e394e 100644 --- a/src/integrators/VL_3D_cuda.cu +++ b/src/integrators/VL_3D_cuda.cu @@ -156,14 +156,17 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int cuda_utilities::AutomaticLaunchParams static const roe_pcm_launch_params( Calculate_Roe_Fluxes_CUDA, n_cells); hipLaunchKernelGGL(HIP_KERNEL_NAME(Calculate_Roe_Fluxes_CUDA), - roe_pcm_launch_params.get_numBlocks(), roe_pcm_launch_params.get_threadsPerBlock(), 0, 0, - dev_conserved, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_cells, gama, n_fields); - hipLaunchKernelGGL(HIP_KERNEL_NAME(Calculate_Roe_Fluxes_CUDA), - roe_pcm_launch_params.get_numBlocks(), roe_pcm_launch_params.get_threadsPerBlock(), 0, 0, - dev_conserved, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_cells, gama, n_fields); - hipLaunchKernelGGL(HIP_KERNEL_NAME(Calculate_Roe_Fluxes_CUDA), - roe_pcm_launch_params.get_numBlocks(), roe_pcm_launch_params.get_threadsPerBlock(), 0, 0, - dev_conserved, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_cells, gama, n_fields); + roe_pcm_launch_params.get_numBlocks(), + roe_pcm_launch_params.get_threadsPerBlock(), 0, 0, dev_conserved, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_cells, gama, + n_fields); + hipLaunchKernelGGL(HIP_KERNEL_NAME(Calculate_Roe_Fluxes_CUDA), + roe_pcm_launch_params.get_numBlocks(), + roe_pcm_launch_params.get_threadsPerBlock(), 0, 0, dev_conserved, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_cells, gama, + n_fields); + hipLaunchKernelGGL(HIP_KERNEL_NAME(Calculate_Roe_Fluxes_CUDA), + roe_pcm_launch_params.get_numBlocks(), + roe_pcm_launch_params.get_threadsPerBlock(), 0, 0, dev_conserved, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_cells, gama, + n_fields); #endif // ROE #ifdef HLLC cuda_utilities::AutomaticLaunchParams static const hllc_pcm_launch_params( @@ -297,16 +300,16 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int Calculate_Roe_Fluxes_CUDA, n_cells); hipLaunchKernelGGL(HIP_KERNEL_NAME(Calculate_Roe_Fluxes_CUDA), roe_higher_order_launch_params.get_numBlocks(), - roe_higher_order_launch_params.get_threadsPerBlock(), 0, 0, dev_conserved_half, Q_Lx, Q_Rx, F_x, - nx, ny, nz, n_cells, gama, n_fields); + roe_higher_order_launch_params.get_threadsPerBlock(), 0, 0, dev_conserved_half, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_cells, gama, + n_fields); hipLaunchKernelGGL(HIP_KERNEL_NAME(Calculate_Roe_Fluxes_CUDA), roe_higher_order_launch_params.get_numBlocks(), - roe_higher_order_launch_params.get_threadsPerBlock(), 0, 0, dev_conserved_half, Q_Ly, Q_Ry, F_y, - nx, ny, nz, n_cells, gama, n_fields); + roe_higher_order_launch_params.get_threadsPerBlock(), 0, 0, dev_conserved_half, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_cells, gama, + n_fields); hipLaunchKernelGGL(HIP_KERNEL_NAME(Calculate_Roe_Fluxes_CUDA), roe_higher_order_launch_params.get_numBlocks(), - roe_higher_order_launch_params.get_threadsPerBlock(), 0, 0, dev_conserved_half, Q_Lz, Q_Rz, F_z, - nx, ny, nz, n_cells, gama, n_fields); + roe_higher_order_launch_params.get_threadsPerBlock(), 0, 0, dev_conserved_half, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_cells, gama, + n_fields); #endif // ROE #ifdef HLLC cuda_utilities::AutomaticLaunchParams static const hllc_higher_order_launch_params( From 627ddadb5ae34a06c14a7aaad1da6d47f2a28e31 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 26 Feb 2024 15:51:08 -0500 Subject: [PATCH 04/29] Remove standalone PCM kernels --- src/reconstruction/pcm_cuda.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/reconstruction/pcm_cuda.h b/src/reconstruction/pcm_cuda.h index a5a6759ad..2867a77c3 100644 --- a/src/reconstruction/pcm_cuda.h +++ b/src/reconstruction/pcm_cuda.h @@ -7,8 +7,17 @@ #include "../utils/basic_structs.h" #include "../utils/hydro_utilities.h" -#include "../utils/basic_structs.h" -#include "../utils/hydro_utilities.h" +__global__ void PCM_Reconstruction_1D(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int n_cells, + int n_ghost, Real gamma, int n_fields); + +__global__ void PCM_Reconstruction_2D(Real *dev_conserved, Real *dev_bounds_Lx, Real *dev_bounds_Rx, + Real *dev_bounds_Ly, Real *dev_bounds_Ry, int nx, int ny, int n_ghost, Real gamma, + int n_fields); + +__global__ void PCM_Reconstruction_3D(Real *dev_conserved, Real *dev_bounds_Lx, Real *dev_bounds_Rx, + Real *dev_bounds_Ly, Real *dev_bounds_Ry, Real *dev_bounds_Lz, + Real *dev_bounds_Rz, int nx, int ny, int nz, int n_ghost, Real gamma, + int n_fields); namespace reconstruction { From ddd4560c77c02396f313287d0deb2a675f853baf Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 4 Mar 2024 15:11:56 -0500 Subject: [PATCH 05/29] Refactor Monotonize_Characteristic_Return_Primitive Refactored this function to put the limiter into its own function, rename it to better reflect what it actually does, and move the characteristic to primitive conversion outside the function --- src/reconstruction/plmc_cuda.cu | 40 ++++++--- src/reconstruction/ppmc_cuda.cu | 59 ++++++++++-- src/reconstruction/reconstruction_internals.h | 89 ++++++++----------- .../reconstruction_internals_tests.cu | 54 +++++------ 4 files changed, 136 insertions(+), 106 deletions(-) diff --git a/src/reconstruction/plmc_cuda.cu b/src/reconstruction/plmc_cuda.cu index 83004d0c7..9970e64f1 100644 --- a/src/reconstruction/plmc_cuda.cu +++ b/src/reconstruction/plmc_cuda.cu @@ -117,13 +117,27 @@ __global__ __launch_bounds__(TPB) void PLMC_cuda(Real *dev_conserved, Real *dev_ // Apply monotonicity constraints to the differences in the characteristic variables and project the monotonized // difference in the characteristic variables back onto the primitive variables Stone Eqn 39 - hydro_utilities::Primitive del_m_i = reconstruction::Monotonize_Characteristic_Return_Primitive( - cell_i, del_L, del_R, del_C, del_G, del_a_L, del_a_R, del_a_C, del_a_G, eigenvectors, sound_speed, - sound_speed_squared, gamma); + reconstruction::Characteristic const del_a_m = reconstruction::Van_Leer_Limiter(del_a_L, del_a_R, del_a_C, del_a_G); + + // Project back into the primitive variables. + hydro_utilities::Primitive del_m = + Characteristic_To_Primitive(cell_i, del_a_m, eigenvectors, sound_speed, sound_speed_squared, gamma); + + // Limit the variables that aren't transformed by the characteristic projection +#ifdef DE + del_m.gas_energy_specific = Van_Leer_Limiter(del_L.gas_energy_specific, del_R.gas_energy_specific, + del_C.gas_energy_specific, del_G.gas_energy_specific); +#endif // DE +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + del_m.scalar_specific[i] = Van_Leer_Limiter(del_L.scalar_specific[i], del_R.scalar_specific[i], + del_C.scalar_specific[i], del_G.scalar_specific[i]); + } +#endif // SCALAR // Compute the left and right interface values using the monotonized difference in the primitive variables - hydro_utilities::Primitive interface_L_iph = reconstruction::Calc_Interface_Linear(cell_i, del_m_i, 1.0); - hydro_utilities::Primitive interface_R_imh = reconstruction::Calc_Interface_Linear(cell_i, del_m_i, -1.0); + hydro_utilities::Primitive interface_L_iph = reconstruction::Calc_Interface_Linear(cell_i, del_m, 1.0); + hydro_utilities::Primitive interface_R_imh = reconstruction::Calc_Interface_Linear(cell_i, del_m, -1.0); #ifndef VL @@ -152,14 +166,14 @@ __global__ __launch_bounds__(TPB) void PLMC_cuda(Real *dev_conserved, Real *dev_ interface_L_iph.pressure = interface_L_iph.pressure - qx * del_m_i.pressure; #ifdef DE - interface_R_imh.gas_energy_specific = interface_R_imh.gas_energy_specific + qx * del_m_i.gas_energy_specific; - interface_L_iph.gas_energy_specific = interface_L_iph.gas_energy_specific - qx * del_m_i.gas_energy_specific; + interface_R_imh.gas_energy_specific = interface_R_imh.gas_energy_specific + qx * del_m.gas_energy_specific; + interface_L_iph.gas_energy_specific = interface_L_iph.gas_energy_specific - qx * del_m.gas_energy_specific; #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - interface_R_imh.scalar[i] = interface_R_imh.scalar[i] + qx * del_m_i.scalar[i]; - interface_L_iph.scalar[i] = interface_L_iph.scalar[i] - qx * del_m_i.scalar[i]; + interface_R_imh.scalar[i] = interface_R_imh.scalar[i] + qx * del_m.scalar[i]; + interface_L_iph.scalar[i] = interface_L_iph.scalar[i] - qx * del_m.scalar[i]; } #endif // SCALAR @@ -192,11 +206,11 @@ __global__ __launch_bounds__(TPB) void PLMC_cuda(Real *dev_conserved, Real *dev_ sum_2 += lamdiff * del_m_i.velocity.y(); sum_3 += lamdiff * del_m_i.velocity.z(); #ifdef DE - sum_ge += lamdiff * del_m_i.gas_energy_specific; + sum_ge += lamdiff * del_m.gas_energy_specific; #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - sum_scalar[i] += lamdiff * del_m_i.scalar[i]; + sum_scalar[i] += lamdiff * del_m.scalar[i]; } #endif // SCALAR } @@ -249,11 +263,11 @@ __global__ __launch_bounds__(TPB) void PLMC_cuda(Real *dev_conserved, Real *dev_ sum_2 += lamdiff * del_m_i.velocity.y(); sum_3 += lamdiff * del_m_i.velocity.z(); #ifdef DE - sum_ge += lamdiff * del_m_i.gas_energy_specific; + sum_ge += lamdiff * del_m.gas_energy_specific; #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - sum_scalar[i] += lamdiff * del_m_i.scalar[i]; + sum_scalar[i] += lamdiff * del_m.scalar[i]; } #endif // SCALAR } diff --git a/src/reconstruction/ppmc_cuda.cu b/src/reconstruction/ppmc_cuda.cu index b610b9902..06f98a72f 100644 --- a/src/reconstruction/ppmc_cuda.cu +++ b/src/reconstruction/ppmc_cuda.cu @@ -124,11 +124,25 @@ __global__ void PPMC_CTU(Real *dev_conserved, Real *dev_bounds_L, Real *dev_boun cell_im1, del_G, eigenvector, sound_speed, sound_speed * sound_speed, gamma); // Step 4 - Apply monotonicity constraints to the differences in the characteristic variables + reconstruction::Characteristic const del_a_m_im1 = + reconstruction::Van_Leer_Limiter(del_a_L, del_a_R, del_a_C, del_a_G); + // Step 5 - and project the monotonized difference in the characteristic variables back onto the primitive variables // Stone Eqn 39 - hydro_utilities::Primitive const del_m_im1 = reconstruction::Monotonize_Characteristic_Return_Primitive( - cell_im1, del_L, del_R, del_C, del_G, del_a_L, del_a_R, del_a_C, del_a_G, eigenvector, sound_speed, - sound_speed * sound_speed, gamma); + hydro_utilities::Primitive del_m_im1 = + Characteristic_To_Primitive(cell_im1, del_a_m_im1, eigenvector, sound_speed, sound_speed * sound_speed, gamma); + + // Limit the variables that aren't transformed by the characteristic projection +#ifdef DE + del_m_im1.gas_energy_specific = Van_Leer_Limiter(del_L.gas_energy_specific, del_R.gas_energy_specific, + del_C.gas_energy_specific, del_G.gas_energy_specific); +#endif // DE +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + del_m_im1.scalar_specific[i] = Van_Leer_Limiter(del_L.scalar_specific[i], del_R.scalar_specific[i], + del_C.scalar_specific[i], del_G.scalar_specific[i]); + } +#endif // SCALAR // ============= // Cell i slopes @@ -169,11 +183,24 @@ __global__ void PPMC_CTU(Real *dev_conserved, Real *dev_bounds_L, Real *dev_boun sound_speed * sound_speed, gamma); // Step 4 - Apply monotonicity constraints to the differences in the characteristic variables + reconstruction::Characteristic const del_a_m_i = reconstruction::Van_Leer_Limiter(del_a_L, del_a_R, del_a_C, del_a_G); + // Step 5 - and project the monotonized difference in the characteristic variables back onto the primitive variables // Stone Eqn 39 - hydro_utilities::Primitive del_m_i = reconstruction::Monotonize_Characteristic_Return_Primitive( - cell_i, del_L, del_R, del_C, del_G, del_a_L, del_a_R, del_a_C, del_a_G, eigenvector, sound_speed, - sound_speed * sound_speed, gamma); + hydro_utilities::Primitive del_m_i = + Characteristic_To_Primitive(cell_ip1, del_a_m_i, eigenvector, sound_speed, sound_speed * sound_speed, gamma); + + // Limit the variables that aren't transformed by the characteristic projection +#ifdef DE + del_m_i.gas_energy_specific = Van_Leer_Limiter(del_L.gas_energy_specific, del_R.gas_energy_specific, + del_C.gas_energy_specific, del_G.gas_energy_specific); +#endif // DE +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + del_m_i.scalar_specific[i] = Van_Leer_Limiter(del_L.scalar_specific[i], del_R.scalar_specific[i], + del_C.scalar_specific[i], del_G.scalar_specific[i]); + } +#endif // SCALAR // =============== // Cell i+1 slopes @@ -214,11 +241,25 @@ __global__ void PPMC_CTU(Real *dev_conserved, Real *dev_bounds_L, Real *dev_boun sound_speed * sound_speed, gamma); // Step 4 - Apply monotonicity constraints to the differences in the characteristic variables + reconstruction::Characteristic const del_a_m_ip1 = + reconstruction::Van_Leer_Limiter(del_a_L, del_a_R, del_a_C, del_a_G); + // Step 5 - and project the monotonized difference in the characteristic variables back onto the primitive variables // Stone Eqn 39 - hydro_utilities::Primitive const del_m_ip1 = reconstruction::Monotonize_Characteristic_Return_Primitive( - cell_ip1, del_L, del_R, del_C, del_G, del_a_L, del_a_R, del_a_C, del_a_G, eigenvector, sound_speed, - sound_speed * sound_speed, gamma); + hydro_utilities::Primitive del_m_ip1 = + Characteristic_To_Primitive(cell_ip1, del_a_m_ip1, eigenvector, sound_speed, sound_speed * sound_speed, gamma); + + // Limit the variables that aren't transformed by the characteristic projection +#ifdef DE + del_m_ip1.gas_energy_specific = Van_Leer_Limiter(del_L.gas_energy_specific, del_R.gas_energy_specific, + del_C.gas_energy_specific, del_G.gas_energy_specific); +#endif // DE +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + del_m_ip1.scalar_specific[i] = Van_Leer_Limiter(del_L.scalar_specific[i], del_R.scalar_specific[i], + del_C.scalar_specific[i], del_G.scalar_specific[i]); + } +#endif // SCALAR // Step 6 - Use parabolic interpolation to compute values at the left and right of each cell center Here, the // subscripts L and R refer to the left and right side of the ith cell center Stone Eqn 46 diff --git a/src/reconstruction/reconstruction_internals.h b/src/reconstruction/reconstruction_internals.h index 73e661246..daba8d4fe 100644 --- a/src/reconstruction/reconstruction_internals.h +++ b/src/reconstruction/reconstruction_internals.h @@ -562,72 +562,59 @@ hydro_utilities::Primitive __device__ __host__ __inline__ Characteristic_To_Prim // ===================================================================================================================== /*! - * \brief Monotonize the characteristic slopes and project back into the primitive slopes + * \brief Compute the limited slope using the Van Leer limiter + * + * \param[in] left The left slope + * \param[in] right The right slope + * \param[in] centered The centered slope + * \param[in] van_leer The Van Leer slope + * \return Real The limited slope + */ +Real __device__ __host__ __inline__ Van_Leer_Limiter(Real const &left, Real const &right, Real const ¢ered, + Real const &van_leer) +{ + if (left * right > 0.0) { + Real const lim_slope_a = 2.0 * fmin(fabs(left), fabs(right)); + Real const lim_slope_b = fmin(fabs(centered), fabs(van_leer)); + return copysign(fmin(lim_slope_a, lim_slope_b), centered); + } else { + return 0.0; + } +}; +// ===================================================================================================================== + +// ===================================================================================================================== +/*! + * \brief Limit the charactistic slopes. This is an overload that take reconstruction::Characteristic variables instead + * of Reals as arguments. Note that it does not limit the gas energy or scalars * - * \param[in] primitive The primitive variables - * \param[in] del_L The left primitive slopes - * \param[in] del_R The right primitive slopes - * \param[in] del_C The centered primitive slopes - * \param[in] del_G The Van Leer primitive slopes * \param[in] del_a_L The left characteristic slopes * \param[in] del_a_R The right characteristic slopes * \param[in] del_a_C The centered characteristic slopes * \param[in] del_a_G The Van Leer characteristic slopes - * \param[in] sound_speed The sound speed - * \param[in] sound_speed_squared The sound speed squared - * \param[in] gamma The adiabatic index - * \return hydro_utilities::Primitive The Monotonized primitive slopes + * \return Characteristic The limited characteristic slopes */ -hydro_utilities::Primitive __device__ __inline__ Monotonize_Characteristic_Return_Primitive( - hydro_utilities::Primitive const &primitive, hydro_utilities::Primitive const &del_L, - hydro_utilities::Primitive const &del_R, hydro_utilities::Primitive const &del_C, - hydro_utilities::Primitive const &del_G, Characteristic const &del_a_L, Characteristic const &del_a_R, - Characteristic const &del_a_C, Characteristic const &del_a_G, EigenVecs const &eigenvectors, - Real const &sound_speed, Real const &sound_speed_squared, Real const &gamma) +Characteristic __device__ __host__ __inline__ Van_Leer_Limiter(Characteristic const &del_a_L, + Characteristic const &del_a_R, + Characteristic const &del_a_C, + Characteristic const &del_a_G) { - // The function that will actually do the monotozation - auto Monotonize = [](Real const &left, Real const &right, Real const ¢ered, Real const &van_leer) -> Real { - if (left * right > 0.0) { - Real const lim_slope_a = 2.0 * fmin(fabs(left), fabs(right)); - Real const lim_slope_b = fmin(fabs(centered), fabs(van_leer)); - return copysign(fmin(lim_slope_a, lim_slope_b), centered); - } else { - return 0.0; - } - }; - // the monotonized difference in the characteristic variables Characteristic del_a_m; // Monotonize the slopes - del_a_m.a0 = Monotonize(del_a_L.a0, del_a_R.a0, del_a_C.a0, del_a_G.a0); - del_a_m.a1 = Monotonize(del_a_L.a1, del_a_R.a1, del_a_C.a1, del_a_G.a1); - del_a_m.a2 = Monotonize(del_a_L.a2, del_a_R.a2, del_a_C.a2, del_a_G.a2); - del_a_m.a3 = Monotonize(del_a_L.a3, del_a_R.a3, del_a_C.a3, del_a_G.a3); - del_a_m.a4 = Monotonize(del_a_L.a4, del_a_R.a4, del_a_C.a4, del_a_G.a4); + del_a_m.a0 = Van_Leer_Limiter(del_a_L.a0, del_a_R.a0, del_a_C.a0, del_a_G.a0); + del_a_m.a1 = Van_Leer_Limiter(del_a_L.a1, del_a_R.a1, del_a_C.a1, del_a_G.a1); + del_a_m.a2 = Van_Leer_Limiter(del_a_L.a2, del_a_R.a2, del_a_C.a2, del_a_G.a2); + del_a_m.a3 = Van_Leer_Limiter(del_a_L.a3, del_a_R.a3, del_a_C.a3, del_a_G.a3); + del_a_m.a4 = Van_Leer_Limiter(del_a_L.a4, del_a_R.a4, del_a_C.a4, del_a_G.a4); #ifdef MHD - del_a_m.a5 = Monotonize(del_a_L.a5, del_a_R.a5, del_a_C.a5, del_a_G.a5); - del_a_m.a6 = Monotonize(del_a_L.a6, del_a_R.a6, del_a_C.a6, del_a_G.a6); + del_a_m.a5 = Van_Leer_Limiter(del_a_L.a5, del_a_R.a5, del_a_C.a5, del_a_G.a5); + del_a_m.a6 = Van_Leer_Limiter(del_a_L.a6, del_a_R.a6, del_a_C.a6, del_a_G.a6); #endif // MHD - // Project into the primitive variables. Note the return by reference to preserve the values in the gas_energy and - // scalars - hydro_utilities::Primitive output = - Characteristic_To_Primitive(primitive, del_a_m, eigenvectors, sound_speed, sound_speed_squared, gamma); - -#ifdef DE - output.gas_energy_specific = Monotonize(del_L.gas_energy_specific, del_R.gas_energy_specific, - del_C.gas_energy_specific, del_G.gas_energy_specific); -#endif // DE -#ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - output.scalar_specific[i] = Monotonize(del_L.scalar_specific[i], del_R.scalar_specific[i], del_C.scalar_specific[i], - del_G.scalar_specific[i]); - } -#endif // SCALAR - - return output; + return del_a_m; } // ===================================================================================================================== diff --git a/src/reconstruction/reconstruction_internals_tests.cu b/src/reconstruction/reconstruction_internals_tests.cu index 857a7d70f..be780a0cc 100644 --- a/src/reconstruction/reconstruction_internals_tests.cu +++ b/src/reconstruction/reconstruction_internals_tests.cu @@ -350,58 +350,46 @@ TEST(tALLReconstructionVanLeerSlope, CorrectInputExpectCorrectOutput) #endif // MHD } -__global__ void Test_Monotize_Characteristic_Return_Primitive( - hydro_utilities::Primitive const primitive, hydro_utilities::Primitive const del_L, - hydro_utilities::Primitive const del_R, hydro_utilities::Primitive const del_C, - hydro_utilities::Primitive const del_G, reconstruction::Characteristic const del_a_L, - reconstruction::Characteristic const del_a_R, reconstruction::Characteristic const del_a_C, - reconstruction::Characteristic const del_a_G, reconstruction::EigenVecs const eigenvectors, Real const sound_speed, - Real const sound_speed_squared, Real const gamma, hydro_utilities::Primitive *monotonized_slope) +TEST(tALLReconstructionVanLeerSlopeReal, CorrectInputExpectCorrectOutput) { - *monotonized_slope = reconstruction::Monotonize_Characteristic_Return_Primitive( - primitive, del_L, del_R, del_C, del_G, del_a_L, del_a_R, del_a_C, del_a_G, eigenvectors, sound_speed, - sound_speed_squared, gamma); + // Note that this check is performed without any margin for error since the function should return the hard coded zero + EXPECT_EQ(0, reconstruction::Van_Leer_Limiter(8, -12, 32, 128)) << "Test failed in the case of mixed sign slopes."; + + // These checks also have no margin of error since they should return bit exact values + EXPECT_EQ(-24, reconstruction::Van_Leer_Limiter(12, 17, -38, 128)) + << "Test failed in the case of selecting the left slope"; + EXPECT_EQ(-24, reconstruction::Van_Leer_Limiter(17, 12, -38, 128)) + << "Test failed in the case of selecting the right slope"; + EXPECT_EQ(-12, reconstruction::Van_Leer_Limiter(128, 38, -12, 17)) + << "Test failed in the case of selecting the centered slope"; + EXPECT_EQ(-12, reconstruction::Van_Leer_Limiter(128, 38, -17, 12)) + << "Test failed in the case of selecting the van leer slope"; } -TEST(tALLReconstructionMonotonizeCharacteristicReturnPrimitive, CorrectInputExpectCorrectOutput) +TEST(tALLReconstructionVanLeerSlopeCharacteristic, CorrectInputExpectCorrectOutput) { #ifdef MHD - hydro_utilities::Primitive const primitive{1, {2, 3, 4}, 5, {6, 7, 8}}; - hydro_utilities::Primitive const del_L{9, {10, 11, 12}, 13, {14, 15, 16}}; - hydro_utilities::Primitive const del_R{17, {18, 19, 20}, 21, {22, 23, 24}}; - hydro_utilities::Primitive const del_C{25, {26, 27, 28}, 29, {30, 31, 32}}; - hydro_utilities::Primitive const del_G{33, {34, 35, 36}, 37, {38, 39, 40}}; reconstruction::Characteristic const del_a_L{41, 42, 43, 44, 45, 46, 47}; reconstruction::Characteristic const del_a_R{48, 49, 50, 51, 52, 53, 54}; reconstruction::Characteristic const del_a_C{55, 56, 57, 58, 59, 60, 61}; reconstruction::Characteristic const del_a_G{62, 64, 65, 66, 67, 68, 69}; #else // MHD - hydro_utilities::Primitive const primitive{1, {2, 3, 4}, 5}; - hydro_utilities::Primitive const del_L{9, {10, 11, 12}, 13}; - hydro_utilities::Primitive const del_R{17, {18, 19, 20}, 21}; - hydro_utilities::Primitive const del_C{25, {26, 27, 28}, 29}; - hydro_utilities::Primitive const del_G{33, {34, 35, 36}, 37}; reconstruction::Characteristic const del_a_L{41, 42, 43, 44, 45}; reconstruction::Characteristic const del_a_R{48, 49, 50, 51, 52}; reconstruction::Characteristic const del_a_C{55, 56, 57, 58, 59}; reconstruction::Characteristic const del_a_G{62, 64, 65, 66, 67}; #endif // MHD - Real const sound_speed = 17.0, sound_speed_squared = sound_speed * sound_speed; - Real const gamma = 5. / 3.; - reconstruction::EigenVecs const eigenvectors{ - 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, - }; // Get test data - cuda_utilities::DeviceVector dev_results(1); - hipLaunchKernelGGL(Test_Monotize_Characteristic_Return_Primitive, 1, 1, 0, 0, primitive, del_L, del_R, del_C, del_G, - del_a_L, del_a_R, del_a_C, del_a_G, eigenvectors, sound_speed, sound_speed_squared, gamma, - dev_results.data()); - GPU_Error_Check(); - cudaDeviceSynchronize(); - hydro_utilities::Primitive const host_results = dev_results.at(0); + reconstruction::Characteristic test_data = reconstruction::Van_Leer_Limiter(del_a_L, del_a_R, del_a_C, del_a_G); // Check results + reconstruction::Characteristic fiducial_data{55, 56, 57, 58, 59}; + testing_utilities::Check_Results(fiducial_data.a0, test_data.a0, "a0"); + testing_utilities::Check_Results(fiducial_data.a1, test_data.a1, "a1"); + testing_utilities::Check_Results(fiducial_data.a2, test_data.a2, "a2"); + testing_utilities::Check_Results(fiducial_data.a3, test_data.a3, "a3"); + testing_utilities::Check_Results(fiducial_data.a4, test_data.a4, "a4"); #ifdef MHD hydro_utilities::Primitive const fiducial_data{5046, {2934, -2526, -2828}, 1441532, {0.0, -69716, 72152}}; testing_utilities::Check_Results(fiducial_data.density, host_results.density, "density"); From eee20a504454942212fdbecdf6f3e1cdbb4376ac Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 4 Mar 2024 16:03:26 -0500 Subject: [PATCH 06/29] Move sound_speed into EigenVecs Sound speed plays a similar role to the other members of EigenVecs so including it makes sense rather than passing around extra arguments --- src/reconstruction/plmc_cuda.cu | 67 ++++---- src/reconstruction/ppmc_cuda.cu | 162 +++++++----------- src/reconstruction/reconstruction_internals.h | 53 +++--- .../reconstruction_internals_tests.cu | 153 +++++++++++------ 4 files changed, 226 insertions(+), 209 deletions(-) diff --git a/src/reconstruction/plmc_cuda.cu b/src/reconstruction/plmc_cuda.cu index 9970e64f1..10d783937 100644 --- a/src/reconstruction/plmc_cuda.cu +++ b/src/reconstruction/plmc_cuda.cu @@ -40,22 +40,18 @@ __global__ __launch_bounds__(TPB) void PLMC_cuda(Real *dev_conserved, Real *dev_ // Set the field indices for the various directions int o1, o2, o3; - switch (dir) { - case 0: - o1 = grid_enum::momentum_x; - o2 = grid_enum::momentum_y; - o3 = grid_enum::momentum_z; - break; - case 1: - o1 = grid_enum::momentum_y; - o2 = grid_enum::momentum_z; - o3 = grid_enum::momentum_x; - break; - case 2: - o1 = grid_enum::momentum_z; - o2 = grid_enum::momentum_x; - o3 = grid_enum::momentum_y; - break; + if constexpr (dir == 0) { + o1 = grid_enum::momentum_x; + o2 = grid_enum::momentum_y; + o3 = grid_enum::momentum_z; + } else if constexpr (dir == 1) { + o1 = grid_enum::momentum_y; + o2 = grid_enum::momentum_z; + o3 = grid_enum::momentum_x; + } else if constexpr (dir == 2) { + o1 = grid_enum::momentum_z; + o2 = grid_enum::momentum_x; + o3 = grid_enum::momentum_y; } // load the 3-cell stencil into registers @@ -71,21 +67,11 @@ __global__ __launch_bounds__(TPB) void PLMC_cuda(Real *dev_conserved, Real *dev_ hydro_utilities::Primitive const cell_ipo = hydro_utilities::Load_Cell_Primitive( dev_conserved, xid + int(dir == 0), yid + int(dir == 1), zid + int(dir == 2), nx, ny, n_cells, gamma); - // calculate the adiabatic sound speed in cell i - Real const sound_speed = hydro_utilities::Calc_Sound_Speed(cell_i.pressure, cell_i.density, gamma); - Real const sound_speed_squared = sound_speed * sound_speed; + // Compute the eigenvectors + reconstruction::EigenVecs const eigenvectors = reconstruction::Compute_Eigenvectors(cell_i, gamma); -// Compute the eigenvectors -#ifdef MHD - reconstruction::EigenVecs const eigenvectors = - reconstruction::Compute_Eigenvectors(cell_i, sound_speed, sound_speed_squared, gamma); -#else - reconstruction::EigenVecs eigenvectors; -#endif // MHD - - // Compute the left, right, centered, and van Leer differences of the - // primitive variables Note that here L and R refer to locations relative to - // the cell center + // Compute the left, right, centered, and van Leer differences of the primitive variables Note that here L and R refer + // to locations relative to the cell center // left hydro_utilities::Primitive const del_L = reconstruction::Compute_Slope(cell_imo, cell_i); @@ -104,24 +90,23 @@ __global__ __launch_bounds__(TPB) void PLMC_cuda(Real *dev_conserved, Real *dev_ // characteristic variables, see Stone for notation) Use the eigenvectors // given in Stone 2008, Appendix A reconstruction::Characteristic const del_a_L = - reconstruction::Primitive_To_Characteristic(cell_i, del_L, eigenvectors, sound_speed, sound_speed_squared, gamma); + reconstruction::Primitive_To_Characteristic(cell_i, del_L, eigenvectors, gamma); reconstruction::Characteristic const del_a_R = - reconstruction::Primitive_To_Characteristic(cell_i, del_R, eigenvectors, sound_speed, sound_speed_squared, gamma); + reconstruction::Primitive_To_Characteristic(cell_i, del_R, eigenvectors, gamma); reconstruction::Characteristic const del_a_C = - reconstruction::Primitive_To_Characteristic(cell_i, del_C, eigenvectors, sound_speed, sound_speed_squared, gamma); + reconstruction::Primitive_To_Characteristic(cell_i, del_C, eigenvectors, gamma); reconstruction::Characteristic const del_a_G = - reconstruction::Primitive_To_Characteristic(cell_i, del_G, eigenvectors, sound_speed, sound_speed_squared, gamma); + reconstruction::Primitive_To_Characteristic(cell_i, del_G, eigenvectors, gamma); // Apply monotonicity constraints to the differences in the characteristic variables and project the monotonized // difference in the characteristic variables back onto the primitive variables Stone Eqn 39 reconstruction::Characteristic const del_a_m = reconstruction::Van_Leer_Limiter(del_a_L, del_a_R, del_a_C, del_a_G); // Project back into the primitive variables. - hydro_utilities::Primitive del_m = - Characteristic_To_Primitive(cell_i, del_a_m, eigenvectors, sound_speed, sound_speed_squared, gamma); + hydro_utilities::Primitive del_m = Characteristic_To_Primitive(cell_i, del_a_m, eigenvectors, gamma); // Limit the variables that aren't transformed by the characteristic projection #ifdef DE @@ -221,6 +206,10 @@ __global__ __launch_bounds__(TPB) void PLMC_cuda(Real *dev_conserved, Real *dev_ (cell_i.density * del_m_i.velocity.x() / (2 * sound_speed) + del_m_i.pressure / (2 * sound_speed_squared)); sum_1 += lamdiff * (del_m_i.velocity.x() / 2.0 + del_m_i.pressure / (2 * sound_speed * cell_i.density)); sum_4 += lamdiff * (cell_i.density * del_m_i.velocity.x() * sound_speed / 2.0 + del_m_i.pressure / 2.0); + sum_0 += lamdiff * (cell_i.density * del_m.velocity.x / (2 * eigenvectors.sound_speed) + + del_m.pressure / (2 * eigenvectors.sound_speed * eigenvectors.sound_speed)); + sum_1 += lamdiff * (del_m.velocity.x / 2.0 + del_m.pressure / (2 * eigenvectors.sound_speed * cell_i.density)); + sum_4 += lamdiff * (cell_i.density * del_m.velocity.x * eigenvectors.sound_speed / 2.0 + del_m.pressure / 2.0); } // add the corrections to the initial guesses for the interface values @@ -255,11 +244,15 @@ __global__ __launch_bounds__(TPB) void PLMC_cuda(Real *dev_conserved, Real *dev_ del_m_i.pressure / (2 * sound_speed_squared)); sum_1 += lamdiff * (del_m_i.velocity.x() / 2.0 - del_m_i.pressure / (2 * sound_speed * cell_i.density)); sum_4 += lamdiff * (-cell_i.density * del_m_i.velocity.x() * sound_speed / 2.0 + del_m_i.pressure / 2.0); + sum_0 += lamdiff * (-cell_i.density * del_m.velocity.x / (2 * eigenvectors.sound_speed) + + del_m.pressure / (2 * eigenvectors.sound_speed * eigenvectors.sound_speed)); + sum_1 += lamdiff * (del_m.velocity.x / 2.0 - del_m.pressure / (2 * eigenvectors.sound_speed * cell_i.density)); + sum_4 += lamdiff * (-cell_i.density * del_m.velocity.x * eigenvectors.sound_speed / 2.0 + del_m.pressure / 2.0); } if (lambda_0 <= 0) { Real lamdiff = lambda_m - lambda_0; - sum_0 += lamdiff * (del_m_i.density - del_m_i.pressure / (sound_speed_squared)); + sum_0 += lamdiff * (del_m_i.density - del_m_i.pressure / (eigenvectors.sound_speed * eigenvectors.sound_speed)); sum_2 += lamdiff * del_m_i.velocity.y(); sum_3 += lamdiff * del_m_i.velocity.z(); #ifdef DE diff --git a/src/reconstruction/ppmc_cuda.cu b/src/reconstruction/ppmc_cuda.cu index 06f98a72f..977b1540f 100644 --- a/src/reconstruction/ppmc_cuda.cu +++ b/src/reconstruction/ppmc_cuda.cu @@ -37,22 +37,18 @@ __global__ void PPMC_CTU(Real *dev_conserved, Real *dev_bounds_L, Real *dev_boun // Set the field indices for the various directions int o1, o2, o3; - switch (dir) { - case 0: - o1 = grid_enum::momentum_x; - o2 = grid_enum::momentum_y; - o3 = grid_enum::momentum_z; - break; - case 1: - o1 = grid_enum::momentum_y; - o2 = grid_enum::momentum_z; - o3 = grid_enum::momentum_x; - break; - case 2: - o1 = grid_enum::momentum_z; - o2 = grid_enum::momentum_x; - o3 = grid_enum::momentum_y; - break; + if constexpr (dir == 0) { + o1 = grid_enum::momentum_x; + o2 = grid_enum::momentum_y; + o3 = grid_enum::momentum_z; + } else if constexpr (dir == 1) { + o1 = grid_enum::momentum_y; + o2 = grid_enum::momentum_z; + o3 = grid_enum::momentum_x; + } else if constexpr (dir == 2) { + o1 = grid_enum::momentum_z; + o2 = grid_enum::momentum_x; + o3 = grid_enum::momentum_y; } // load the 5-cell stencil into registers @@ -86,11 +82,8 @@ __global__ void PPMC_CTU(Real *dev_conserved, Real *dev_bounds_L, Real *dev_boun // Cell i-1 slopes // =============== - // calculate the adiabatic sound speed in cell im1 - Real sound_speed = hydro_utilities::Calc_Sound_Speed(cell_im1.pressure, cell_im1.density, gamma); - // this isn't actually used and the compiler should optimize it away but since this is the only reconstruction - // function that won't use it it was easier to add it here as an unused variable - reconstruction::EigenVecs eigenvector; + // Compute the eigenvectors for this cell + reconstruction::EigenVecs eigenvectors = reconstruction::Compute_Eigenvectors(cell_im1, gamma); // Step 2 - Compute the left, right, centered, and van Leer differences of the primitive variables. Note that here L // and R refer to locations relative to the cell center Stone Eqn 36 @@ -111,17 +104,17 @@ __global__ void PPMC_CTU(Real *dev_conserved, Real *dev_bounds_L, Real *dev_boun // characteristic variables Stone Eqn 37 (del_a are differences in // characteristic variables, see Stone for notation) Use the eigenvectors // given in Stone 2008, Appendix A - reconstruction::Characteristic del_a_L = reconstruction::Primitive_To_Characteristic( - cell_im1, del_L, eigenvector, sound_speed, sound_speed * sound_speed, gamma); + reconstruction::Characteristic del_a_L = + reconstruction::Primitive_To_Characteristic(cell_im1, del_L, eigenvectors, gamma); - reconstruction::Characteristic del_a_R = reconstruction::Primitive_To_Characteristic( - cell_im1, del_R, eigenvector, sound_speed, sound_speed * sound_speed, gamma); + reconstruction::Characteristic del_a_R = + reconstruction::Primitive_To_Characteristic(cell_im1, del_R, eigenvectors, gamma); - reconstruction::Characteristic del_a_C = reconstruction::Primitive_To_Characteristic( - cell_im1, del_C, eigenvector, sound_speed, sound_speed * sound_speed, gamma); + reconstruction::Characteristic del_a_C = + reconstruction::Primitive_To_Characteristic(cell_im1, del_C, eigenvectors, gamma); - reconstruction::Characteristic del_a_G = reconstruction::Primitive_To_Characteristic( - cell_im1, del_G, eigenvector, sound_speed, sound_speed * sound_speed, gamma); + reconstruction::Characteristic del_a_G = + reconstruction::Primitive_To_Characteristic(cell_im1, del_G, eigenvectors, gamma); // Step 4 - Apply monotonicity constraints to the differences in the characteristic variables reconstruction::Characteristic const del_a_m_im1 = @@ -129,8 +122,7 @@ __global__ void PPMC_CTU(Real *dev_conserved, Real *dev_bounds_L, Real *dev_boun // Step 5 - and project the monotonized difference in the characteristic variables back onto the primitive variables // Stone Eqn 39 - hydro_utilities::Primitive del_m_im1 = - Characteristic_To_Primitive(cell_im1, del_a_m_im1, eigenvector, sound_speed, sound_speed * sound_speed, gamma); + hydro_utilities::Primitive del_m_im1 = Characteristic_To_Primitive(cell_im1, del_a_m_im1, eigenvectors, gamma); // Limit the variables that aren't transformed by the characteristic projection #ifdef DE @@ -148,8 +140,8 @@ __global__ void PPMC_CTU(Real *dev_conserved, Real *dev_bounds_L, Real *dev_boun // Cell i slopes // ============= - // calculate the adiabatic sound speed in cell i - sound_speed = hydro_utilities::Calc_Sound_Speed(cell_i.pressure, cell_i.density, gamma); + // Compute the eigenvectors for this cell + eigenvectors = reconstruction::Compute_Eigenvectors(cell_i, gamma); // Step 2 - Compute the left, right, centered, and van Leer differences of the primitive variables. Note that here L // and R refer to locations relative to the cell center Stone Eqn 36 @@ -170,25 +162,20 @@ __global__ void PPMC_CTU(Real *dev_conserved, Real *dev_bounds_L, Real *dev_boun // characteristic variables Stone Eqn 37 (del_a are differences in // characteristic variables, see Stone for notation) Use the eigenvectors // given in Stone 2008, Appendix A - del_a_L = reconstruction::Primitive_To_Characteristic(cell_i, del_L, eigenvector, sound_speed, - sound_speed * sound_speed, gamma); + del_a_L = reconstruction::Primitive_To_Characteristic(cell_i, del_L, eigenvectors, gamma); - del_a_R = reconstruction::Primitive_To_Characteristic(cell_i, del_R, eigenvector, sound_speed, - sound_speed * sound_speed, gamma); + del_a_R = reconstruction::Primitive_To_Characteristic(cell_i, del_R, eigenvectors, gamma); - del_a_C = reconstruction::Primitive_To_Characteristic(cell_i, del_C, eigenvector, sound_speed, - sound_speed * sound_speed, gamma); + del_a_C = reconstruction::Primitive_To_Characteristic(cell_i, del_C, eigenvectors, gamma); - del_a_G = reconstruction::Primitive_To_Characteristic(cell_i, del_G, eigenvector, sound_speed, - sound_speed * sound_speed, gamma); + del_a_G = reconstruction::Primitive_To_Characteristic(cell_i, del_G, eigenvectors, gamma); // Step 4 - Apply monotonicity constraints to the differences in the characteristic variables reconstruction::Characteristic const del_a_m_i = reconstruction::Van_Leer_Limiter(del_a_L, del_a_R, del_a_C, del_a_G); // Step 5 - and project the monotonized difference in the characteristic variables back onto the primitive variables // Stone Eqn 39 - hydro_utilities::Primitive del_m_i = - Characteristic_To_Primitive(cell_ip1, del_a_m_i, eigenvector, sound_speed, sound_speed * sound_speed, gamma); + hydro_utilities::Primitive del_m_i = Characteristic_To_Primitive(cell_ip1, del_a_m_i, eigenvectors, gamma); // Limit the variables that aren't transformed by the characteristic projection #ifdef DE @@ -206,8 +193,8 @@ __global__ void PPMC_CTU(Real *dev_conserved, Real *dev_bounds_L, Real *dev_boun // Cell i+1 slopes // =============== - // calculate the adiabatic sound speed in cell ipo - sound_speed = hydro_utilities::Calc_Sound_Speed(cell_ip1.pressure, cell_ip1.density, gamma); + // Compute the eigenvectors for this cell + eigenvectors = reconstruction::Compute_Eigenvectors(cell_ip1, gamma); // Step 2 - Compute the left, right, centered, and van Leer differences of the primitive variables. Note that here L // and R refer to locations relative to the cell center Stone Eqn 36 @@ -228,17 +215,13 @@ __global__ void PPMC_CTU(Real *dev_conserved, Real *dev_bounds_L, Real *dev_boun // characteristic variables Stone Eqn 37 (del_a are differences in // characteristic variables, see Stone for notation) Use the eigenvectors // given in Stone 2008, Appendix A - del_a_L = reconstruction::Primitive_To_Characteristic(cell_ip1, del_L, eigenvector, sound_speed, - sound_speed * sound_speed, gamma); + del_a_L = reconstruction::Primitive_To_Characteristic(cell_ip1, del_L, eigenvectors, gamma); - del_a_R = reconstruction::Primitive_To_Characteristic(cell_ip1, del_R, eigenvector, sound_speed, - sound_speed * sound_speed, gamma); + del_a_R = reconstruction::Primitive_To_Characteristic(cell_ip1, del_R, eigenvectors, gamma); - del_a_C = reconstruction::Primitive_To_Characteristic(cell_ip1, del_C, eigenvector, sound_speed, - sound_speed * sound_speed, gamma); + del_a_C = reconstruction::Primitive_To_Characteristic(cell_ip1, del_C, eigenvectors, gamma); - del_a_G = reconstruction::Primitive_To_Characteristic(cell_ip1, del_G, eigenvector, sound_speed, - sound_speed * sound_speed, gamma); + del_a_G = reconstruction::Primitive_To_Characteristic(cell_ip1, del_G, eigenvectors, gamma); // Step 4 - Apply monotonicity constraints to the differences in the characteristic variables reconstruction::Characteristic const del_a_m_ip1 = @@ -246,8 +229,7 @@ __global__ void PPMC_CTU(Real *dev_conserved, Real *dev_bounds_L, Real *dev_boun // Step 5 - and project the monotonized difference in the characteristic variables back onto the primitive variables // Stone Eqn 39 - hydro_utilities::Primitive del_m_ip1 = - Characteristic_To_Primitive(cell_ip1, del_a_m_ip1, eigenvector, sound_speed, sound_speed * sound_speed, gamma); + hydro_utilities::Primitive del_m_ip1 = Characteristic_To_Primitive(cell_ip1, del_a_m_ip1, eigenvectors, gamma); // Limit the variables that aren't transformed by the characteristic projection #ifdef DE @@ -309,7 +291,7 @@ __global__ void PPMC_CTU(Real *dev_conserved, Real *dev_bounds_L, Real *dev_boun // primitive variables using the cell-centered primitive variables // recalculate the adiabatic sound speed in cell i - sound_speed = hydro_utilities::Calc_Sound_Speed(cell_i.pressure, cell_i.density, gamma); + Real const sound_speed = hydro_utilities::Calc_Sound_Speed(cell_i.pressure, cell_i.density, gamma); Real const lambda_m = cell_i.velocity.x() - sound_speed; Real const lambda_0 = cell_i.velocity.x(); @@ -600,22 +582,18 @@ __global__ __launch_bounds__(TPB) void PPMC_VL(Real *dev_conserved, Real *dev_bo // Set the field indices for the various directions int o1, o2, o3; - switch (dir) { - case 0: - o1 = grid_enum::momentum_x; - o2 = grid_enum::momentum_y; - o3 = grid_enum::momentum_z; - break; - case 1: - o1 = grid_enum::momentum_y; - o2 = grid_enum::momentum_z; - o3 = grid_enum::momentum_x; - break; - case 2: - o1 = grid_enum::momentum_z; - o2 = grid_enum::momentum_x; - o3 = grid_enum::momentum_y; - break; + if constexpr (dir == 0) { + o1 = grid_enum::momentum_x; + o2 = grid_enum::momentum_y; + o3 = grid_enum::momentum_z; + } else if constexpr (dir == 1) { + o1 = grid_enum::momentum_y; + o2 = grid_enum::momentum_z; + o3 = grid_enum::momentum_x; + } else if constexpr (dir == 2) { + o1 = grid_enum::momentum_z; + o2 = grid_enum::momentum_x; + o3 = grid_enum::momentum_y; } // load the 5-cell stencil into registers @@ -643,36 +621,28 @@ __global__ __launch_bounds__(TPB) void PPMC_VL(Real *dev_conserved, Real *dev_bo hydro_utilities::Primitive const cell_ip2 = hydro_utilities::Load_Cell_Primitive( dev_conserved, xid + 2 * int(dir == 0), yid + 2 * int(dir == 1), zid + 2 * int(dir == 2), nx, ny, n_cells, gamma); - // Convert to the characteristic variables - Real const sound_speed = hydro_utilities::Calc_Sound_Speed(cell_i.pressure, cell_i.density, gamma); - Real const sound_speed_squared = sound_speed * sound_speed; - -#ifdef MHD - reconstruction::EigenVecs eigenvectors = - reconstruction::Compute_Eigenvectors(cell_i, sound_speed, sound_speed_squared, gamma); -#else - reconstruction::EigenVecs eigenvectors; -#endif // MHD + // Compute the eigenvectors + reconstruction::EigenVecs const eigenvectors = reconstruction::Compute_Eigenvectors(cell_i, gamma); // Cell i - reconstruction::Characteristic const cell_i_characteristic = reconstruction::Primitive_To_Characteristic( - cell_i, cell_i, eigenvectors, sound_speed, sound_speed_squared, gamma); + reconstruction::Characteristic const cell_i_characteristic = + reconstruction::Primitive_To_Characteristic(cell_i, cell_i, eigenvectors, gamma); // Cell i-1 - reconstruction::Characteristic const cell_im1_characteristic = reconstruction::Primitive_To_Characteristic( - cell_i, cell_im1, eigenvectors, sound_speed, sound_speed_squared, gamma); + reconstruction::Characteristic const cell_im1_characteristic = + reconstruction::Primitive_To_Characteristic(cell_i, cell_im1, eigenvectors, gamma); // Cell i-2 - reconstruction::Characteristic const cell_im2_characteristic = reconstruction::Primitive_To_Characteristic( - cell_i, cell_im2, eigenvectors, sound_speed, sound_speed_squared, gamma); + reconstruction::Characteristic const cell_im2_characteristic = + reconstruction::Primitive_To_Characteristic(cell_i, cell_im2, eigenvectors, gamma); // Cell i+1 - reconstruction::Characteristic const cell_ip1_characteristic = reconstruction::Primitive_To_Characteristic( - cell_i, cell_ip1, eigenvectors, sound_speed, sound_speed_squared, gamma); + reconstruction::Characteristic const cell_ip1_characteristic = + reconstruction::Primitive_To_Characteristic(cell_i, cell_ip1, eigenvectors, gamma); // Cell i+2 - reconstruction::Characteristic const cell_ip2_characteristic = reconstruction::Primitive_To_Characteristic( - cell_i, cell_ip2, eigenvectors, sound_speed, sound_speed_squared, gamma); + reconstruction::Characteristic const cell_ip2_characteristic = + reconstruction::Primitive_To_Characteristic(cell_i, cell_ip2, eigenvectors, gamma); // Compute the interface states for each field reconstruction::Characteristic interface_R_imh_characteristic, interface_L_iph_characteristic; @@ -703,10 +673,10 @@ __global__ __launch_bounds__(TPB) void PPMC_VL(Real *dev_conserved, Real *dev_bo #endif // MHD // Convert back to primitive variables - hydro_utilities::Primitive interface_L_iph = reconstruction::Characteristic_To_Primitive( - cell_i, interface_L_iph_characteristic, eigenvectors, sound_speed, sound_speed_squared, gamma); - hydro_utilities::Primitive interface_R_imh = reconstruction::Characteristic_To_Primitive( - cell_i, interface_R_imh_characteristic, eigenvectors, sound_speed, sound_speed_squared, gamma); + hydro_utilities::Primitive interface_L_iph = + reconstruction::Characteristic_To_Primitive(cell_i, interface_L_iph_characteristic, eigenvectors, gamma); + hydro_utilities::Primitive interface_R_imh = + reconstruction::Characteristic_To_Primitive(cell_i, interface_R_imh_characteristic, eigenvectors, gamma); // Compute the interfaces for the variables that don't have characteristics #ifdef DE diff --git a/src/reconstruction/reconstruction_internals.h b/src/reconstruction/reconstruction_internals.h index daba8d4fe..403f43fc9 100644 --- a/src/reconstruction/reconstruction_internals.h +++ b/src/reconstruction/reconstruction_internals.h @@ -58,6 +58,8 @@ enum Kind { // ===================================================================================================================== struct EigenVecs { + Real sound_speed; +#ifdef MHD Real magnetosonic_speed_fast, magnetosonic_speed_slow, magnetosonic_speed_fast_squared, magnetosonic_speed_slow_squared; Real alpha_fast, alpha_slow; @@ -69,6 +71,7 @@ struct EigenVecs { /// The primed values are used in the conversion from primitive to characteristic variables Real q_prime_fast, q_prime_slow; Real a_prime_fast, a_prime_slow; +#endif // MHD }; // ===================================================================================================================== @@ -362,17 +365,16 @@ hydro_utilities::Primitive __device__ __host__ __inline__ Compute_Van_Leer_Slope * \brief Compute the eigenvectors in the given cell * * \param[in] primitive The primitive variables in a particular cell - * \param[in] sound_speed The sound speed - * \param[in] sound_speed_squared The sound speed squared * \param[in] gamma The adiabatic index * \return EigenVecs */ -#ifdef MHD -EigenVecs __device__ __inline__ Compute_Eigenvectors(hydro_utilities::Primitive const &primitive, - Real const &sound_speed, Real const &sound_speed_squared, - Real const &gamma) +EigenVecs __device__ __inline__ Compute_Eigenvectors(hydro_utilities::Primitive const &primitive, Real const &gamma) { EigenVecs output; + + output.sound_speed = hydro_utilities::Calc_Sound_Speed(primitive.pressure, primitive.density, gamma); + +#ifdef MHD // This is taken from Stone et al. 2008, appendix A. Equation numbers will be quoted as relevant // Compute wave speeds and their squares @@ -386,6 +388,8 @@ EigenVecs __device__ __inline__ Compute_Eigenvectors(hydro_utilities::Primitive output.magnetosonic_speed_fast_squared = output.magnetosonic_speed_fast * output.magnetosonic_speed_fast; output.magnetosonic_speed_slow_squared = output.magnetosonic_speed_slow * output.magnetosonic_speed_slow; + Real const sound_speed_squared = output.sound_speed * output.sound_speed; + // Compute Alphas (equation A16) if (Real const denom = (output.magnetosonic_speed_fast_squared - output.magnetosonic_speed_slow_squared), numerator_2 = (output.magnetosonic_speed_fast_squared - sound_speed_squared); @@ -416,14 +420,14 @@ EigenVecs __device__ __inline__ Compute_Eigenvectors(hydro_utilities::Primitive output.q_slow = output.sign * output.alpha_slow * output.magnetosonic_speed_slow; // Compute A(s) (equation A15) - output.a_fast = output.alpha_fast * sound_speed * sqrt(primitive.density); - output.a_slow = output.alpha_slow * sound_speed * sqrt(primitive.density); - output.a_prime_fast = 0.5 * output.alpha_fast / (sound_speed * sqrt(primitive.density)); - output.a_prime_slow = 0.5 * output.alpha_slow / (sound_speed * sqrt(primitive.density)); + output.a_fast = output.alpha_fast * output.sound_speed * sqrt(primitive.density); + output.a_slow = output.alpha_slow * output.sound_speed * sqrt(primitive.density); + output.a_prime_fast = 0.5 * output.alpha_fast / (output.sound_speed * sqrt(primitive.density)); + output.a_prime_slow = 0.5 * output.alpha_slow / (output.sound_speed * sqrt(primitive.density)); +#endif // MHD return output; } -#endif // MHD // ===================================================================================================================== // ===================================================================================================================== @@ -434,15 +438,12 @@ EigenVecs __device__ __inline__ Compute_Eigenvectors(hydro_utilities::Primitive * \param[in] primitive The primitive variables * \param[in] primitive_slope The primitive variables slopes * \param[in] EigenVecs The eigenvectors - * \param[in] sound_speed The speed of sound - * \param[in] sound_speed_squared The speed of sound squared * \param[in] gamma The adiabatic index * \return Characteristic */ Characteristic __device__ __inline__ Primitive_To_Characteristic(hydro_utilities::Primitive const &primitive, hydro_utilities::Primitive const &primitive_slope, - EigenVecs const &eigen, Real const &sound_speed, - Real const &sound_speed_squared, Real const &gamma) + EigenVecs const &eigen, Real const &gamma) { Characteristic output; @@ -469,7 +470,7 @@ Characteristic __device__ __inline__ Primitive_To_Characteristic(hydro_utilities eigen.q_prime_fast * (eigen.beta_y * primitive_slope.velocity.y() + eigen.beta_z * primitive_slope.velocity.z()) - eigen.a_prime_fast * (eigen.beta_y * primitive_slope.magnetic.y() + eigen.beta_z * primitive_slope.magnetic.z()); - output.a3 = primitive_slope.density - primitive_slope.pressure / sound_speed_squared; + output.a3 = primitive_slope.density - primitive_slope.pressure / (eigen.sound_speed * eigen.sound_speed); output.a4 = eigen.n_fs * eigen.alpha_slow * @@ -491,13 +492,13 @@ Characteristic __device__ __inline__ Primitive_To_Characteristic(hydro_utilities eigen.a_prime_slow * (eigen.beta_y * primitive_slope.magnetic.y() + eigen.beta_z * primitive_slope.magnetic.z()); #else // not MHD - output.a0 = -primitive.density * primitive_slope.velocity.x() / (2.0 * sound_speed) + - primitive_slope.pressure / (2.0 * sound_speed_squared); - output.a1 = primitive_slope.density - primitive_slope.pressure / (sound_speed_squared); + output.a0 = -primitive.density * primitive_slope.velocity.x() / (2.0 * eigen.sound_speed) + + primitive_slope.pressure / (2.0 * (eigen.sound_speed * eigen.sound_speed)); + output.a1 = primitive_slope.density - primitive_slope.pressure / ((eigen.sound_speed * eigen.sound_speed)); output.a2 = primitive_slope.velocity.y(); output.a3 = primitive_slope.velocity.z(); - output.a4 = primitive.density * primitive_slope.velocity.x() / (2.0 * sound_speed) + - primitive_slope.pressure / (2.0 * sound_speed_squared); + output.a4 = primitive.density * primitive_slope.velocity.x() / (2.0 * eigen.sound_speed) + + primitive_slope.pressure / (2.0 * (eigen.sound_speed * eigen.sound_speed)); #endif // MHD return output; @@ -512,14 +513,12 @@ Characteristic __device__ __inline__ Primitive_To_Characteristic(hydro_utilities * \param[in] primitive The primitive variables * \param[in] characteristic_slope The characteristic slopes * \param[in] eigen The eigenvectors - * \param[in] sound_speed The sound speed - * \param[in] sound_speed_squared The sound speed squared * \param[in] gamma The adiabatic index * \return hydro_utilities::Primitive The state in primitive variables */ hydro_utilities::Primitive __device__ __host__ __inline__ Characteristic_To_Primitive( hydro_utilities::Primitive const &primitive, Characteristic const &characteristic_slope, EigenVecs const &eigen, - Real const &sound_speed, Real const &sound_speed_squared, Real const &gamma) + Real const &gamma) { hydro_utilities::Primitive output; #ifdef MHD @@ -536,7 +535,7 @@ hydro_utilities::Primitive __device__ __host__ __inline__ Characteristic_To_Prim output.velocity.z() = eigen.beta_z * (eigen.q_slow * (characteristic_slope.a0 - characteristic_slope.a6) + eigen.q_fast * (characteristic_slope.a4 - characteristic_slope.a2)) + eigen.beta_y * (characteristic_slope.a1 - characteristic_slope.a5); - output.pressure = primitive.density * sound_speed_squared * + output.pressure = primitive.density * (eigen.sound_speed * eigen.sound_speed) * (eigen.alpha_fast * (characteristic_slope.a0 + characteristic_slope.a6) + eigen.alpha_slow * (characteristic_slope.a2 + characteristic_slope.a4)); output.magnetic.y() = @@ -550,10 +549,10 @@ hydro_utilities::Primitive __device__ __host__ __inline__ Characteristic_To_Prim #else // not MHD output.density = characteristic_slope.a0 + characteristic_slope.a1 + characteristic_slope.a4; - output.velocity.x() = sound_speed / primitive.density * (characteristic_slope.a4 - characteristic_slope.a0); + output.velocity.x() = eigen.sound_speed / primitive.density * (characteristic_slope.a4 - characteristic_slope.a0); output.velocity.y() = characteristic_slope.a2; output.velocity.z() = characteristic_slope.a3; - output.pressure = sound_speed_squared * (characteristic_slope.a0 + characteristic_slope.a4); + output.pressure = (eigen.sound_speed * eigen.sound_speed) * (characteristic_slope.a0 + characteristic_slope.a4); #endif // MHD return output; diff --git a/src/reconstruction/reconstruction_internals_tests.cu b/src/reconstruction/reconstruction_internals_tests.cu index be780a0cc..6200cb5eb 100644 --- a/src/reconstruction/reconstruction_internals_tests.cu +++ b/src/reconstruction/reconstruction_internals_tests.cu @@ -22,32 +22,29 @@ #include "../utils/gpu.hpp" #include "../utils/testing_utilities.h" -#ifdef MHD __global__ void Test_Prim_2_Char(hydro_utilities::Primitive const primitive, hydro_utilities::Primitive const primitive_slope, - reconstruction::EigenVecs const eigenvectors, Real const gamma, Real const sound_speed, - Real const sound_speed_squared, reconstruction::Characteristic *characteristic_slope) + reconstruction::EigenVecs const eigenvectors, Real const gamma, + reconstruction::Characteristic *characteristic_slope) { - *characteristic_slope = reconstruction::Primitive_To_Characteristic(primitive, primitive_slope, eigenvectors, - sound_speed, sound_speed_squared, gamma); + *characteristic_slope = reconstruction::Primitive_To_Characteristic(primitive, primitive_slope, eigenvectors, gamma); } __global__ void Test_Char_2_Prim(hydro_utilities::Primitive const primitive, reconstruction::Characteristic const characteristic_slope, - reconstruction::EigenVecs const eigenvectors, Real const gamma, Real const sound_speed, - Real const sound_speed_squared, hydro_utilities::Primitive *primitive_slope) + reconstruction::EigenVecs const eigenvectors, Real const gamma, + hydro_utilities::Primitive *primitive_slope) { - *primitive_slope = reconstruction::Characteristic_To_Primitive(primitive, characteristic_slope, eigenvectors, - sound_speed, sound_speed_squared, gamma); + *primitive_slope = reconstruction::Characteristic_To_Primitive(primitive, characteristic_slope, eigenvectors, gamma); } -__global__ void Test_Compute_Eigenvectors(hydro_utilities::Primitive const primitive, Real const sound_speed, - Real const sound_speed_squared, Real const gamma, +__global__ void Test_Compute_Eigenvectors(hydro_utilities::Primitive const primitive, Real const gamma, reconstruction::EigenVecs *eigenvectors) { - *eigenvectors = reconstruction::Compute_Eigenvectors(primitive, sound_speed, sound_speed_squared, gamma); + *eigenvectors = reconstruction::Compute_Eigenvectors(primitive, gamma); } +#ifdef MHD TEST(tMHDReconstructionPrimitive2Characteristic, CorrectInputExpectCorrectOutput) { // Test parameters @@ -55,15 +52,12 @@ TEST(tMHDReconstructionPrimitive2Characteristic, CorrectInputExpectCorrectOutput hydro_utilities::Primitive const primitive{1, {2, 3, 4}, 5, {6, 7, 8}}; hydro_utilities::Primitive const primitive_slope{9, {10, 11, 12}, 13, {14, 15, 16}}; reconstruction::EigenVecs const eigenvectors{ - 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, + 2.886751345948129, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, }; - Real const sound_speed = hydro_utilities::Calc_Sound_Speed(primitive.pressure, primitive.density, gamma); - Real const sound_speed_squared = sound_speed * sound_speed; // Run test cuda_utilities::DeviceVector dev_results(1); - hipLaunchKernelGGL(Test_Prim_2_Char, 1, 1, 0, 0, primitive, primitive_slope, eigenvectors, gamma, sound_speed, - sound_speed_squared, dev_results.data()); + hipLaunchKernelGGL(Test_Prim_2_Char, 1, 1, 0, 0, primitive, primitive_slope, eigenvectors, gamma, dev_results.data()); GPU_Error_Check(); cudaDeviceSynchronize(); reconstruction::Characteristic const host_results = dev_results.at(0); @@ -86,15 +80,13 @@ TEST(tMHDReconstructionCharacteristic2Primitive, CorrectInputExpectCorrectOutput hydro_utilities::Primitive const primitive{1, {2, 3, 4}, 5, {6, 7, 8}}; reconstruction::Characteristic const characteristic_slope{17, 18, 19, 20, 21, 22, 23}; reconstruction::EigenVecs const eigenvectors{ - 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, + 2.886751345948129, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, }; - Real const sound_speed = hydro_utilities::Calc_Sound_Speed(primitive.pressure, primitive.density, gamma); - Real const sound_speed_squared = sound_speed * sound_speed; // Run test cuda_utilities::DeviceVector dev_results(1); - hipLaunchKernelGGL(Test_Char_2_Prim, 1, 1, 0, 0, primitive, characteristic_slope, eigenvectors, gamma, sound_speed, - sound_speed_squared, dev_results.data()); + hipLaunchKernelGGL(Test_Char_2_Prim, 1, 1, 0, 0, primitive, characteristic_slope, eigenvectors, gamma, + dev_results.data()); GPU_Error_Check(); cudaDeviceSynchronize(); hydro_utilities::Primitive const host_results = dev_results.at(0); @@ -116,40 +108,35 @@ TEST(tMHDReconstructionComputeEigenvectors, CorrectInputExpectCorrectOutput) // Test parameters Real const &gamma = 5. / 3.; hydro_utilities::Primitive const primitive{1, {2, 3, 4}, 5, {6, 7, 8}}; - Real const sound_speed = hydro_utilities::Calc_Sound_Speed(primitive.pressure, primitive.density, gamma); - Real const sound_speed_squared = sound_speed * sound_speed; // Run test cuda_utilities::DeviceVector dev_results(1); - hipLaunchKernelGGL(Test_Compute_Eigenvectors, 1, 1, 0, 0, primitive, sound_speed, sound_speed_squared, gamma, - dev_results.data()); + hipLaunchKernelGGL(Test_Compute_Eigenvectors, 1, 1, 0, 0, primitive, gamma, dev_results.data()); GPU_Error_Check(); cudaDeviceSynchronize(); reconstruction::EigenVecs const host_results = dev_results.at(0); - // std::cout << to_string_exact(host_results.magnetosonic_speed_fast) << ","; - // std::cout << to_string_exact(host_results.magnetosonic_speed_slow) << ","; - // std::cout << to_string_exact(host_results.magnetosonic_speed_fast_squared) << ","; - // std::cout << to_string_exact(host_results.magnetosonic_speed_slow_squared) << ","; - // std::cout << to_string_exact(host_results.alpha_fast) << ","; - // std::cout << to_string_exact(host_results.alpha_slow) << ","; - // std::cout << to_string_exact(host_results.beta_y) << ","; - // std::cout << to_string_exact(host_results.beta_z) << ","; - // std::cout << to_string_exact(host_results.n_fs) << ","; - // std::cout << to_string_exact(host_results.sign) << ","; - // std::cout << to_string_exact(host_results.q_fast) << ","; - // std::cout << to_string_exact(host_results.q_slow) << ","; - // std::cout << to_string_exact(host_results.a_fast) << ","; - // std::cout << to_string_exact(host_results.a_slow) << ","; - // std::cout << to_string_exact(host_results.q_prime_fast) << ","; - // std::cout << to_string_exact(host_results.q_prime_slow) << ","; - // std::cout << to_string_exact(host_results.a_prime_fast) << ","; - // std::cout << to_string_exact(host_results.a_prime_slow) << "," << std::endl; + // Check results - reconstruction::EigenVecs const fiducial_results{ - 12.466068627219666, 1.3894122191714398, 155.40286701855041, 1.9304663147829049, 0.20425471836256681, - 0.97891777490585408, 0.65850460786851805, 0.75257669470687782, 0.059999999999999984, 1, - 2.546253336541183, 1.3601203180183106, 0.58963258314939582, 2.825892204282022, 0.15277520019247093, - 0.081607219081098623, 0.03537795498896374, 0.1695535322569213}; + reconstruction::EigenVecs const fiducial_results{2.886751345948129, + 12.466068627219666, + 1.3894122191714398, + 155.40286701855041, + 1.9304663147829049, + 0.20425471836256681, + 0.97891777490585408, + 0.65850460786851805, + 0.75257669470687782, + 0.059999999999999984, + 1, + 2.546253336541183, + 1.3601203180183106, + 0.58963258314939582, + 2.825892204282022, + 0.15277520019247093, + 0.081607219081098623, + 0.03537795498896374, + 0.1695535322569213}; + testing_utilities::Check_Results(fiducial_results.sound_speed, host_results.sound_speed, "sound_speed"); testing_utilities::Check_Results(fiducial_results.magnetosonic_speed_fast, host_results.magnetosonic_speed_fast, "magnetosonic_speed_fast"); testing_utilities::Check_Results(fiducial_results.magnetosonic_speed_slow, host_results.magnetosonic_speed_slow, @@ -173,6 +160,74 @@ TEST(tMHDReconstructionComputeEigenvectors, CorrectInputExpectCorrectOutput) testing_utilities::Check_Results(fiducial_results.a_prime_fast, host_results.a_prime_fast, "a_prime_fast"); testing_utilities::Check_Results(fiducial_results.a_prime_slow, host_results.a_prime_slow, "a_prime_slow"); } +#else // i.e. not MHD +TEST(tHYDROReconstructionPrimitive2Characteristic, CorrectInputExpectCorrectOutput) +{ + // Test parameters + Real const &gamma = 5. / 3.; + hydro_utilities::Primitive const primitive{1, {2, 3, 4}, 5}; + hydro_utilities::Primitive const primitive_slope{9, {10, 11, 12}, 13}; + reconstruction::EigenVecs const eigenvectors{2.886751345948129}; + + // Run test + cuda_utilities::DeviceVector dev_results(1); + hipLaunchKernelGGL(Test_Prim_2_Char, 1, 1, 0, 0, primitive, primitive_slope, eigenvectors, gamma, dev_results.data()); + GPU_Error_Check(); + cudaDeviceSynchronize(); + reconstruction::Characteristic const host_results = dev_results.at(0); + + // Check results + reconstruction::Characteristic const fiducial_results{-0.95205080756887739, 7.4400000000000004, 11, 12, + 2.512050807568877}; + testing_utilities::Check_Results(fiducial_results.a0, host_results.a0, "a0"); + testing_utilities::Check_Results(fiducial_results.a1, host_results.a1, "a1"); + testing_utilities::Check_Results(fiducial_results.a2, host_results.a2, "a2"); + testing_utilities::Check_Results(fiducial_results.a3, host_results.a3, "a3"); + testing_utilities::Check_Results(fiducial_results.a4, host_results.a4, "a4"); +} + +TEST(tHYDROReconstructionCharacteristic2Primitive, CorrectInputExpectCorrectOutput) +{ + // Test parameters + Real const &gamma = 5. / 3.; + hydro_utilities::Primitive const primitive{1, {2, 3, 4}, 5}; + reconstruction::Characteristic const characteristic_slope{17, 18, 19, 20, 21}; + reconstruction::EigenVecs const eigenvectors{2.886751345948129}; + + // Run test + cuda_utilities::DeviceVector dev_results(1); + hipLaunchKernelGGL(Test_Char_2_Prim, 1, 1, 0, 0, primitive, characteristic_slope, eigenvectors, gamma, + dev_results.data()); + GPU_Error_Check(); + cudaDeviceSynchronize(); + hydro_utilities::Primitive const host_results = dev_results.at(0); + + // Check results + hydro_utilities::Primitive const fiducial_results{56, {11.547005383792516, 19, 20}, 316.66666666666674}; + testing_utilities::Check_Results(fiducial_results.density, host_results.density, "density"); + testing_utilities::Check_Results(fiducial_results.velocity.x, host_results.velocity.x, "velocity.x"); + testing_utilities::Check_Results(fiducial_results.velocity.y, host_results.velocity.y, "velocity.y"); + testing_utilities::Check_Results(fiducial_results.velocity.z, host_results.velocity.z, "velocity.z"); + testing_utilities::Check_Results(fiducial_results.pressure, host_results.pressure, "pressure"); +} + +TEST(tHYDROReconstructionComputeEigenvectors, CorrectInputExpectCorrectOutput) +{ + // Test parameters + Real const &gamma = 5. / 3.; + hydro_utilities::Primitive const primitive{1, {2, 3, 4}, 5}; + + // Run test + cuda_utilities::DeviceVector dev_results(1); + hipLaunchKernelGGL(Test_Compute_Eigenvectors, 1, 1, 0, 0, primitive, gamma, dev_results.data()); + GPU_Error_Check(); + cudaDeviceSynchronize(); + reconstruction::EigenVecs const host_results = dev_results.at(0); + + // Check results + reconstruction::EigenVecs const fiducial_results{2.886751345948129}; + testing_utilities::Check_Results(fiducial_results.sound_speed, host_results.sound_speed, "sound_speed"); +} #endif // MHD TEST(tALLReconstructionThreadGuard, CorrectInputExpectCorrectOutput) From 068d5370603a7787183783cdf70835565300f49f Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 4 Mar 2024 16:44:51 -0500 Subject: [PATCH 07/29] Convert PLMC into a device function --- src/integrators/VL_1D_cuda.cu | 3 +- src/integrators/VL_2D_cuda.cu | 4 +- src/integrators/simple_1D_cuda.cu | 3 +- src/integrators/simple_2D_cuda.cu | 6 +- src/integrators/simple_3D_cuda.cu | 9 +- src/reconstruction/plmc_cuda.cu | 261 ++------------------------ src/reconstruction/plmc_cuda.h | 252 ++++++++++++++++++++++++- src/reconstruction/plmc_cuda_tests.cu | 12 +- 8 files changed, 280 insertions(+), 270 deletions(-) diff --git a/src/integrators/VL_1D_cuda.cu b/src/integrators/VL_1D_cuda.cu index b487bc3c9..bdc6009bf 100644 --- a/src/integrators/VL_1D_cuda.cu +++ b/src/integrators/VL_1D_cuda.cu @@ -84,8 +84,7 @@ void VL_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, Rea // Step 4: Construct left and right interface values using updated conserved variables #ifdef PLMC - hipLaunchKernelGGL(PLMC_cuda<0>, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, - n_fields); + hipLaunchKernelGGL(PLMC_cuda<0>, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama); #endif #ifdef PLMP hipLaunchKernelGGL(PLMP_cuda, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, diff --git a/src/integrators/VL_2D_cuda.cu b/src/integrators/VL_2D_cuda.cu index 3926e1de0..702ee0500 100644 --- a/src/integrators/VL_2D_cuda.cu +++ b/src/integrators/VL_2D_cuda.cu @@ -100,9 +100,9 @@ void VL_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int y_of #endif #ifdef PLMC hipLaunchKernelGGL(PLMC_cuda<0>, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, - gama, n_fields); + gama); hipLaunchKernelGGL(PLMC_cuda<1>, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, - gama, n_fields); + gama); #endif #ifdef PPMP hipLaunchKernelGGL(PPMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, diff --git a/src/integrators/simple_1D_cuda.cu b/src/integrators/simple_1D_cuda.cu index d0a8359ad..83711b002 100644 --- a/src/integrators/simple_1D_cuda.cu +++ b/src/integrators/simple_1D_cuda.cu @@ -59,8 +59,7 @@ void Simple_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, GPU_Error_Check(); #endif #ifdef PLMC - hipLaunchKernelGGL(PLMC_cuda<0>, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, - n_fields); + hipLaunchKernelGGL(PLMC_cuda<0>, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama); GPU_Error_Check(); #endif #ifdef PPMP diff --git a/src/integrators/simple_2D_cuda.cu b/src/integrators/simple_2D_cuda.cu index 48421c8c0..c590e127c 100644 --- a/src/integrators/simple_2D_cuda.cu +++ b/src/integrators/simple_2D_cuda.cu @@ -61,10 +61,8 @@ void Simple_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int gama, 1, n_fields); #endif #ifdef PLMC - hipLaunchKernelGGL(PLMC_cuda<0>, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, - n_fields); - hipLaunchKernelGGL(PLMC_cuda<1>, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama, - n_fields); + hipLaunchKernelGGL(PLMC_cuda<0>, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama); + hipLaunchKernelGGL(PLMC_cuda<1>, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama); #endif #ifdef PPMP hipLaunchKernelGGL(PPMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, diff --git a/src/integrators/simple_3D_cuda.cu b/src/integrators/simple_3D_cuda.cu index 0ada1f37f..bb2d473c0 100644 --- a/src/integrators/simple_3D_cuda.cu +++ b/src/integrators/simple_3D_cuda.cu @@ -94,12 +94,9 @@ void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, gama, 2, n_fields); #endif // PLMP #ifdef PLMC - hipLaunchKernelGGL(PLMC_cuda<0>, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, - n_fields); - hipLaunchKernelGGL(PLMC_cuda<1>, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama, - n_fields); - hipLaunchKernelGGL(PLMC_cuda<2>, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lz, Q_Rz, nx, ny, nz, dz, dt, gama, - n_fields); + hipLaunchKernelGGL(PLMC_cuda<0>, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama); + hipLaunchKernelGGL(PLMC_cuda<1>, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama); + hipLaunchKernelGGL(PLMC_cuda<2>, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lz, Q_Rz, nx, ny, nz, dz, dt, gama); #endif #ifdef PPMP hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, diff --git a/src/reconstruction/plmc_cuda.cu b/src/reconstruction/plmc_cuda.cu index 10d783937..c418d913d 100644 --- a/src/reconstruction/plmc_cuda.cu +++ b/src/reconstruction/plmc_cuda.cu @@ -8,7 +8,6 @@ #include "../global/global.h" #include "../global/global_cuda.h" #include "../reconstruction/plmc_cuda.h" -#include "../reconstruction/reconstruction_internals.h" #include "../utils/cuda_utilities.h" #include "../utils/gpu.hpp" @@ -23,7 +22,7 @@ right boundary values for the interface calculated using plm. */ template __global__ __launch_bounds__(TPB) void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, - int ny, int nz, Real dx, Real dt, Real gamma, int n_fields) + int ny, int nz, Real dx, Real dt, Real gamma) { // get a thread ID int const thread_id = threadIdx.x + blockIdx.x * blockDim.x; @@ -35,8 +34,14 @@ __global__ __launch_bounds__(TPB) void PLMC_cuda(Real *dev_conserved, Real *dev_ return; } - // Compute the total number of cells - int const n_cells = nx * ny * nz; + auto [interface_L_iph, interface_R_imh] = + reconstruction::PLMC_Reconstruction(dev_conserved, xid, yid, zid, nx, ny, nz, dx, dt, gamma); + + // apply minimum constraints + interface_R_imh.density = fmax(interface_R_imh.density, (Real)TINY_NUMBER); + interface_L_iph.density = fmax(interface_L_iph.density, (Real)TINY_NUMBER); + interface_R_imh.pressure = fmax(interface_R_imh.pressure, (Real)TINY_NUMBER); + interface_L_iph.pressure = fmax(interface_L_iph.pressure, (Real)TINY_NUMBER); // Set the field indices for the various directions int o1, o2, o3; @@ -54,246 +59,8 @@ __global__ __launch_bounds__(TPB) void PLMC_cuda(Real *dev_conserved, Real *dev_ o3 = grid_enum::momentum_y; } - // load the 3-cell stencil into registers - // cell i - hydro_utilities::Primitive const cell_i = - hydro_utilities::Load_Cell_Primitive(dev_conserved, xid, yid, zid, nx, ny, n_cells, gamma); - - // cell i-1. The equality checks the direction and will subtract one from the correct direction - hydro_utilities::Primitive const cell_imo = hydro_utilities::Load_Cell_Primitive( - dev_conserved, xid - int(dir == 0), yid - int(dir == 1), zid - int(dir == 2), nx, ny, n_cells, gamma); - - // cell i+1. The equality checks the direction and add one to the correct direction - hydro_utilities::Primitive const cell_ipo = hydro_utilities::Load_Cell_Primitive( - dev_conserved, xid + int(dir == 0), yid + int(dir == 1), zid + int(dir == 2), nx, ny, n_cells, gamma); - - // Compute the eigenvectors - reconstruction::EigenVecs const eigenvectors = reconstruction::Compute_Eigenvectors(cell_i, gamma); - - // Compute the left, right, centered, and van Leer differences of the primitive variables Note that here L and R refer - // to locations relative to the cell center - - // left - hydro_utilities::Primitive const del_L = reconstruction::Compute_Slope(cell_imo, cell_i); - - // right - hydro_utilities::Primitive const del_R = reconstruction::Compute_Slope(cell_i, cell_ipo); - - // centered - hydro_utilities::Primitive const del_C = reconstruction::Compute_Slope(cell_imo, cell_ipo, 0.5); - - // Van Leer - hydro_utilities::Primitive const del_G = reconstruction::Compute_Van_Leer_Slope(del_L, del_R); - - // Project the left, right, centered and van Leer differences onto the - // characteristic variables Stone Eqn 37 (del_a are differences in - // characteristic variables, see Stone for notation) Use the eigenvectors - // given in Stone 2008, Appendix A - reconstruction::Characteristic const del_a_L = - reconstruction::Primitive_To_Characteristic(cell_i, del_L, eigenvectors, gamma); - - reconstruction::Characteristic const del_a_R = - reconstruction::Primitive_To_Characteristic(cell_i, del_R, eigenvectors, gamma); - - reconstruction::Characteristic const del_a_C = - reconstruction::Primitive_To_Characteristic(cell_i, del_C, eigenvectors, gamma); - - reconstruction::Characteristic const del_a_G = - reconstruction::Primitive_To_Characteristic(cell_i, del_G, eigenvectors, gamma); - - // Apply monotonicity constraints to the differences in the characteristic variables and project the monotonized - // difference in the characteristic variables back onto the primitive variables Stone Eqn 39 - reconstruction::Characteristic const del_a_m = reconstruction::Van_Leer_Limiter(del_a_L, del_a_R, del_a_C, del_a_G); - - // Project back into the primitive variables. - hydro_utilities::Primitive del_m = Characteristic_To_Primitive(cell_i, del_a_m, eigenvectors, gamma); - - // Limit the variables that aren't transformed by the characteristic projection -#ifdef DE - del_m.gas_energy_specific = Van_Leer_Limiter(del_L.gas_energy_specific, del_R.gas_energy_specific, - del_C.gas_energy_specific, del_G.gas_energy_specific); -#endif // DE -#ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - del_m.scalar_specific[i] = Van_Leer_Limiter(del_L.scalar_specific[i], del_R.scalar_specific[i], - del_C.scalar_specific[i], del_G.scalar_specific[i]); - } -#endif // SCALAR - - // Compute the left and right interface values using the monotonized difference in the primitive variables - hydro_utilities::Primitive interface_L_iph = reconstruction::Calc_Interface_Linear(cell_i, del_m, 1.0); - hydro_utilities::Primitive interface_R_imh = reconstruction::Calc_Interface_Linear(cell_i, del_m, -1.0); - -#ifndef VL - - Real const dtodx = dt / dx; - - // Compute the eigenvalues of the linearized equations in the - // primitive variables using the cell-centered primitive variables - Real const lambda_m = cell_i.velocity.x() - sound_speed; - Real const lambda_0 = cell_i.velocity.x(); - Real const lambda_p = cell_i.velocity.x() + sound_speed; - - // Integrate linear interpolation function over domain of dependence - // defined by max(min) eigenvalue - Real qx = -0.5 * fmin(lambda_m, 0.0) * dtodx; - interface_R_imh.density = interface_R_imh.density + qx * del_m_i.density; - interface_R_imh.velocity.x() = interface_R_imh.velocity.x() + qx * del_m_i.velocity.x(); - interface_R_imh.velocity.y() = interface_R_imh.velocity.y() + qx * del_m_i.velocity.y(); - interface_R_imh.velocity.z() = interface_R_imh.velocity.z() + qx * del_m_i.velocity.z(); - interface_R_imh.pressure = interface_R_imh.pressure + qx * del_m_i.pressure; - - qx = 0.5 * fmax(lambda_p, 0.0) * dtodx; - interface_L_iph.density = interface_L_iph.density - qx * del_m_i.density; - interface_L_iph.velocity.x() = interface_L_iph.velocity.x() - qx * del_m_i.velocity.x(); - interface_L_iph.velocity.y() = interface_L_iph.velocity.y() - qx * del_m_i.velocity.y(); - interface_L_iph.velocity.z() = interface_L_iph.velocity.z() - qx * del_m_i.velocity.z(); - interface_L_iph.pressure = interface_L_iph.pressure - qx * del_m_i.pressure; - - #ifdef DE - interface_R_imh.gas_energy_specific = interface_R_imh.gas_energy_specific + qx * del_m.gas_energy_specific; - interface_L_iph.gas_energy_specific = interface_L_iph.gas_energy_specific - qx * del_m.gas_energy_specific; - #endif // DE - - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - interface_R_imh.scalar[i] = interface_R_imh.scalar[i] + qx * del_m.scalar[i]; - interface_L_iph.scalar[i] = interface_L_iph.scalar[i] - qx * del_m.scalar[i]; - } - #endif // SCALAR - - // Perform the characteristic tracing - // Stone Eqns 42 & 43 - - // left-hand interface value, i+1/2 - Real sum_0 = 0.0, sum_1 = 0.0, sum_2 = 0.0, sum_3 = 0.0, sum_4 = 0.0; - #ifdef DE - Real sum_ge = 0; - #endif // DE - #ifdef SCALAR - Real sum_scalar[NSCALARS]; - for (int i = 0; i < NSCALARS; i++) { - sum_scalar[i] = 0.0; - } - #endif // SCALAR - if (lambda_m >= 0) { - Real lamdiff = lambda_p - lambda_m; - - sum_0 += lamdiff * (-cell_i.density * del_m_i.velocity.x() / (2 * sound_speed) + - del_m_i.pressure / (2 * sound_speed_squared)); - sum_1 += lamdiff * (del_m_i.velocity.x() / 2.0 - del_m_i.pressure / (2 * sound_speed * cell_i.density)); - sum_4 += lamdiff * (-cell_i.density * del_m_i.velocity.x() * sound_speed / 2.0 + del_m_i.pressure / 2.0); - } - if (lambda_0 >= 0) { - Real lamdiff = lambda_p - lambda_0; - - sum_0 += lamdiff * (del_m_i.density - del_m_i.pressure / (sound_speed_squared)); - sum_2 += lamdiff * del_m_i.velocity.y(); - sum_3 += lamdiff * del_m_i.velocity.z(); - #ifdef DE - sum_ge += lamdiff * del_m.gas_energy_specific; - #endif // DE - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - sum_scalar[i] += lamdiff * del_m.scalar[i]; - } - #endif // SCALAR - } - if (lambda_p >= 0) { - Real lamdiff = lambda_p - lambda_p; - - sum_0 += lamdiff * - (cell_i.density * del_m_i.velocity.x() / (2 * sound_speed) + del_m_i.pressure / (2 * sound_speed_squared)); - sum_1 += lamdiff * (del_m_i.velocity.x() / 2.0 + del_m_i.pressure / (2 * sound_speed * cell_i.density)); - sum_4 += lamdiff * (cell_i.density * del_m_i.velocity.x() * sound_speed / 2.0 + del_m_i.pressure / 2.0); - sum_0 += lamdiff * (cell_i.density * del_m.velocity.x / (2 * eigenvectors.sound_speed) + - del_m.pressure / (2 * eigenvectors.sound_speed * eigenvectors.sound_speed)); - sum_1 += lamdiff * (del_m.velocity.x / 2.0 + del_m.pressure / (2 * eigenvectors.sound_speed * cell_i.density)); - sum_4 += lamdiff * (cell_i.density * del_m.velocity.x * eigenvectors.sound_speed / 2.0 + del_m.pressure / 2.0); - } - - // add the corrections to the initial guesses for the interface values - interface_L_iph.density += 0.5 * dtodx * sum_0; - interface_L_iph.velocity.x() += 0.5 * dtodx * sum_1; - interface_L_iph.velocity.y() += 0.5 * dtodx * sum_2; - interface_L_iph.velocity.z() += 0.5 * dtodx * sum_3; - interface_L_iph.pressure += 0.5 * dtodx * sum_4; - #ifdef DE - interface_L_iph.gas_energy_specific += 0.5 * dtodx * sum_ge; - #endif // DE - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - interface_L_iph.scalar[i] += 0.5 * dtodx * sum_scalar[i]; - } - #endif // SCALAR - - // right-hand interface value, i-1/2 - sum_0 = sum_1 = sum_2 = sum_3 = sum_4 = 0; - #ifdef DE - sum_ge = 0; - #endif // DE - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - sum_scalar[i] = 0; - } - #endif // SCALAR - if (lambda_m <= 0) { - Real lamdiff = lambda_m - lambda_m; - - sum_0 += lamdiff * (-cell_i.density * del_m_i.velocity.x() / (2 * sound_speed) + - del_m_i.pressure / (2 * sound_speed_squared)); - sum_1 += lamdiff * (del_m_i.velocity.x() / 2.0 - del_m_i.pressure / (2 * sound_speed * cell_i.density)); - sum_4 += lamdiff * (-cell_i.density * del_m_i.velocity.x() * sound_speed / 2.0 + del_m_i.pressure / 2.0); - sum_0 += lamdiff * (-cell_i.density * del_m.velocity.x / (2 * eigenvectors.sound_speed) + - del_m.pressure / (2 * eigenvectors.sound_speed * eigenvectors.sound_speed)); - sum_1 += lamdiff * (del_m.velocity.x / 2.0 - del_m.pressure / (2 * eigenvectors.sound_speed * cell_i.density)); - sum_4 += lamdiff * (-cell_i.density * del_m.velocity.x * eigenvectors.sound_speed / 2.0 + del_m.pressure / 2.0); - } - if (lambda_0 <= 0) { - Real lamdiff = lambda_m - lambda_0; - - sum_0 += lamdiff * (del_m_i.density - del_m_i.pressure / (eigenvectors.sound_speed * eigenvectors.sound_speed)); - sum_2 += lamdiff * del_m_i.velocity.y(); - sum_3 += lamdiff * del_m_i.velocity.z(); - #ifdef DE - sum_ge += lamdiff * del_m.gas_energy_specific; - #endif // DE - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - sum_scalar[i] += lamdiff * del_m.scalar[i]; - } - #endif // SCALAR - } - if (lambda_p <= 0) { - Real lamdiff = lambda_m - lambda_p; - - sum_0 += lamdiff * - (cell_i.density * del_m_i.velocity.x() / (2 * sound_speed) + del_m_i.pressure / (2 * sound_speed_squared)); - sum_1 += lamdiff * (del_m_i.velocity.x() / 2.0 + del_m_i.pressure / (2 * sound_speed * cell_i.density)); - sum_4 += lamdiff * (cell_i.density * del_m_i.velocity.x() * sound_speed / 2.0 + del_m_i.pressure / 2.0); - } - - // add the corrections - interface_R_imh.density += 0.5 * dtodx * sum_0; - interface_R_imh.velocity.x() += 0.5 * dtodx * sum_1; - interface_R_imh.velocity.y() += 0.5 * dtodx * sum_2; - interface_R_imh.velocity.z() += 0.5 * dtodx * sum_3; - interface_R_imh.pressure += 0.5 * dtodx * sum_4; - #ifdef DE - interface_R_imh.gas_energy_specific += 0.5 * dtodx * sum_ge; - #endif // DE - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - interface_R_imh.scalar[i] += 0.5 * dtodx * sum_scalar[i]; - } - #endif // SCALAR -#endif // CTU - - // apply minimum constraints - interface_R_imh.density = fmax(interface_R_imh.density, (Real)TINY_NUMBER); - interface_L_iph.density = fmax(interface_L_iph.density, (Real)TINY_NUMBER); - interface_R_imh.pressure = fmax(interface_R_imh.pressure, (Real)TINY_NUMBER); - interface_L_iph.pressure = fmax(interface_L_iph.pressure, (Real)TINY_NUMBER); + // Compute the total number of cells + int const n_cells = nx * ny * nz; // Convert the left and right states in the primitive to the conserved variables send final values back from kernel // bounds_R refers to the right side of the i-1/2 interface @@ -307,10 +74,10 @@ __global__ __launch_bounds__(TPB) void PLMC_cuda(Real *dev_conserved, Real *dev_ // Instantiate the relevant template specifications template __global__ __launch_bounds__(TPB) void PLMC_cuda<0>(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, Real dx, - Real dt, Real gamma, int n_fields); + Real dt, Real gamma); template __global__ __launch_bounds__(TPB) void PLMC_cuda<1>(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, Real dx, - Real dt, Real gamma, int n_fields); + Real dt, Real gamma); template __global__ __launch_bounds__(TPB) void PLMC_cuda<2>(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, Real dx, - Real dt, Real gamma, int n_fields); + Real dt, Real gamma); diff --git a/src/reconstruction/plmc_cuda.h b/src/reconstruction/plmc_cuda.h index d50f493a1..c7f34e187 100644 --- a/src/reconstruction/plmc_cuda.h +++ b/src/reconstruction/plmc_cuda.h @@ -7,6 +7,7 @@ #include "../global/global.h" #include "../grid/grid_enum.h" +#include "../reconstruction/reconstruction_internals.h" #include "../utils/hydro_utilities.h" #include "../utils/mhd_utilities.h" @@ -17,6 +18,255 @@ right boundary values for the interface calculated using plm. */ template __global__ __launch_bounds__(TPB) void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, - int ny, int nz, Real dx, Real dt, Real gamma, int n_fields); + int ny, int nz, Real dx, Real dt, Real gamma); + +namespace reconstruction +{ +template +auto __device__ __inline__ PLMC_Reconstruction(Real *dev_conserved, int const xid, int const yid, int const zid, + int const nx, int const ny, int const nz, Real const dx, Real const dt, + Real const gamma) +{ + // Compute the total number of cells + int const n_cells = nx * ny * nz; + + // load the 3-cell stencil into registers + // cell i + hydro_utilities::Primitive const cell_i = + hydro_utilities::Load_Cell_Primitive(dev_conserved, xid, yid, zid, nx, ny, n_cells, gamma); + + // cell i-1. The equality checks the direction and will subtract one from the correct direction + hydro_utilities::Primitive const cell_imo = hydro_utilities::Load_Cell_Primitive( + dev_conserved, xid - int(direction == 0), yid - int(direction == 1), zid - int(direction == 2), nx, ny, n_cells, + gamma); + + // cell i+1. The equality checks the direction and add one to the correct direction + hydro_utilities::Primitive const cell_ipo = hydro_utilities::Load_Cell_Primitive( + dev_conserved, xid + int(direction == 0), yid + int(direction == 1), zid + int(direction == 2), nx, ny, n_cells, + gamma); + + // Compute the eigenvectors + reconstruction::EigenVecs const eigenvectors = reconstruction::Compute_Eigenvectors(cell_i, gamma); + + // Compute the left, right, centered, and van Leer differences of the primitive variables Note that here L and R refer + // to locations relative to the cell center + + // left + hydro_utilities::Primitive const del_L = reconstruction::Compute_Slope(cell_imo, cell_i); + + // right + hydro_utilities::Primitive const del_R = reconstruction::Compute_Slope(cell_i, cell_ipo); + + // centered + hydro_utilities::Primitive const del_C = reconstruction::Compute_Slope(cell_imo, cell_ipo, 0.5); + + // Van Leer + hydro_utilities::Primitive const del_G = reconstruction::Van_Leer_Slope(del_L, del_R); + + // Project the left, right, centered and van Leer differences onto the + // characteristic variables Stone Eqn 37 (del_a are differences in + // characteristic variables, see Stone for notation) Use the eigenvectors + // given in Stone 2008, Appendix A + reconstruction::Characteristic const del_a_L = + reconstruction::Primitive_To_Characteristic(cell_i, del_L, eigenvectors, gamma); + + reconstruction::Characteristic const del_a_R = + reconstruction::Primitive_To_Characteristic(cell_i, del_R, eigenvectors, gamma); + + reconstruction::Characteristic const del_a_C = + reconstruction::Primitive_To_Characteristic(cell_i, del_C, eigenvectors, gamma); + + reconstruction::Characteristic const del_a_G = + reconstruction::Primitive_To_Characteristic(cell_i, del_G, eigenvectors, gamma); + + // Apply monotonicity constraints to the differences in the characteristic variables and project the monotonized + // difference in the characteristic variables back onto the primitive variables Stone Eqn 39 + reconstruction::Characteristic const del_a_m = reconstruction::Van_Leer_Limiter(del_a_L, del_a_R, del_a_C, del_a_G); + + // Project back into the primitive variables. + hydro_utilities::Primitive del_m = Characteristic_To_Primitive(cell_i, del_a_m, eigenvectors, gamma); + + // Limit the variables that aren't transformed by the characteristic projection +#ifdef DE + del_m.gas_energy_specific = Van_Leer_Limiter(del_L.gas_energy_specific, del_R.gas_energy_specific, + del_C.gas_energy_specific, del_G.gas_energy_specific); +#endif // DE +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + del_m.scalar_specific[i] = Van_Leer_Limiter(del_L.scalar_specific[i], del_R.scalar_specific[i], + del_C.scalar_specific[i], del_G.scalar_specific[i]); + } +#endif // SCALAR + + // Compute the left and right interface values using the monotonized difference in the primitive variables + hydro_utilities::Primitive interface_L_iph = reconstruction::Calc_Interface_Linear(cell_i, del_m, 1.0); + hydro_utilities::Primitive interface_R_imh = reconstruction::Calc_Interface_Linear(cell_i, del_m, -1.0); + + // Limit the interfaces + reconstruction::Plm_Limit_Interfaces(interface_L_iph, interface_R_imh, cell_imo, cell_i, cell_ipo); + +#ifndef VL + + Real const dtodx = dt / dx; + + // Compute the eigenvalues of the linearized equations in the + // primitive variables using the cell-centered primitive variables + Real const lambda_m = cell_i.velocity.x - eigenvectors.sound_speed; + Real const lambda_0 = cell_i.velocity.x; + Real const lambda_p = cell_i.velocity.x + eigenvectors.sound_speed; + + // Integrate linear interpolation function over domain of dependence + // defined by max(min) eigenvalue + Real qx = -0.5 * fmin(lambda_m, 0.0) * dtodx; + interface_R_imh.density = interface_R_imh.density + qx * del_m.density; + interface_R_imh.velocity.x = interface_R_imh.velocity.x + qx * del_m.velocity.x; + interface_R_imh.velocity.y = interface_R_imh.velocity.y + qx * del_m.velocity.y; + interface_R_imh.velocity.z = interface_R_imh.velocity.z + qx * del_m.velocity.z; + interface_R_imh.pressure = interface_R_imh.pressure + qx * del_m.pressure; + + qx = 0.5 * fmax(lambda_p, 0.0) * dtodx; + interface_L_iph.density = interface_L_iph.density - qx * del_m.density; + interface_L_iph.velocity.x = interface_L_iph.velocity.x - qx * del_m.velocity.x; + interface_L_iph.velocity.y = interface_L_iph.velocity.y - qx * del_m.velocity.y; + interface_L_iph.velocity.z = interface_L_iph.velocity.z - qx * del_m.velocity.z; + interface_L_iph.pressure = interface_L_iph.pressure - qx * del_m.pressure; + + #ifdef DE + interface_R_imh.gas_energy_specific = interface_R_imh.gas_energy_specific + qx * del_m.gas_energy_specific; + interface_L_iph.gas_energy_specific = interface_L_iph.gas_energy_specific - qx * del_m.gas_energy_specific; + #endif // DE + + #ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + interface_R_imh.scalar[i] = interface_R_imh.scalar[i] + qx * del_m.scalar[i]; + interface_L_iph.scalar[i] = interface_L_iph.scalar[i] - qx * del_m.scalar[i]; + } + #endif // SCALAR + + // Perform the characteristic tracing + // Stone Eqns 42 & 43 + + // left-hand interface value, i+1/2 + Real sum_0 = 0.0, sum_1 = 0.0, sum_2 = 0.0, sum_3 = 0.0, sum_4 = 0.0; + #ifdef DE + Real sum_ge = 0; + #endif // DE + #ifdef SCALAR + Real sum_scalar[NSCALARS]; + for (int i = 0; i < NSCALARS; i++) { + sum_scalar[i] = 0.0; + } + #endif // SCALAR + if (lambda_m >= 0) { + Real lamdiff = lambda_p - lambda_m; + + sum_0 += lamdiff * (-cell_i.density * del_m.velocity.x / (2 * eigenvectors.sound_speed) + + del_m.pressure / (2 * eigenvectors.sound_speed * eigenvectors.sound_speed)); + sum_1 += lamdiff * (del_m.velocity.x / 2.0 - del_m.pressure / (2 * eigenvectors.sound_speed * cell_i.density)); + sum_4 += lamdiff * (-cell_i.density * del_m.velocity.x * eigenvectors.sound_speed / 2.0 + del_m.pressure / 2.0); + } + if (lambda_0 >= 0) { + Real lamdiff = lambda_p - lambda_0; + + sum_0 += lamdiff * (del_m.density - del_m.pressure / (eigenvectors.sound_speed * eigenvectors.sound_speed)); + sum_2 += lamdiff * del_m.velocity.y; + sum_3 += lamdiff * del_m.velocity.z; + #ifdef DE + sum_ge += lamdiff * del_m.gas_energy_specific; + #endif // DE + #ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + sum_scalar[i] += lamdiff * del_m.scalar[i]; + } + #endif // SCALAR + } + if (lambda_p >= 0) { + Real lamdiff = lambda_p - lambda_p; + + sum_0 += lamdiff * (cell_i.density * del_m.velocity.x / (2 * eigenvectors.sound_speed) + + del_m.pressure / (2 * eigenvectors.sound_speed * eigenvectors.sound_speed)); + sum_1 += lamdiff * (del_m.velocity.x / 2.0 + del_m.pressure / (2 * eigenvectors.sound_speed * cell_i.density)); + sum_4 += lamdiff * (cell_i.density * del_m.velocity.x * eigenvectors.sound_speed / 2.0 + del_m.pressure / 2.0); + } + + // add the corrections to the initial guesses for the interface values + interface_L_iph.density += 0.5 * dtodx * sum_0; + interface_L_iph.velocity.x += 0.5 * dtodx * sum_1; + interface_L_iph.velocity.y += 0.5 * dtodx * sum_2; + interface_L_iph.velocity.z += 0.5 * dtodx * sum_3; + interface_L_iph.pressure += 0.5 * dtodx * sum_4; + #ifdef DE + interface_L_iph.gas_energy_specific += 0.5 * dtodx * sum_ge; + #endif // DE + #ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + interface_L_iph.scalar[i] += 0.5 * dtodx * sum_scalar[i]; + } + #endif // SCALAR + + // right-hand interface value, i-1/2 + sum_0 = sum_1 = sum_2 = sum_3 = sum_4 = 0; + #ifdef DE + sum_ge = 0; + #endif // DE + #ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + sum_scalar[i] = 0; + } + #endif // SCALAR + if (lambda_m <= 0) { + Real lamdiff = lambda_m - lambda_m; + + sum_0 += lamdiff * (-cell_i.density * del_m.velocity.x / (2 * eigenvectors.sound_speed) + + del_m.pressure / (2 * eigenvectors.sound_speed * eigenvectors.sound_speed)); + sum_1 += lamdiff * (del_m.velocity.x / 2.0 - del_m.pressure / (2 * eigenvectors.sound_speed * cell_i.density)); + sum_4 += lamdiff * (-cell_i.density * del_m.velocity.x * eigenvectors.sound_speed / 2.0 + del_m.pressure / 2.0); + } + if (lambda_0 <= 0) { + Real lamdiff = lambda_m - lambda_0; + + sum_0 += lamdiff * (del_m.density - del_m.pressure / (eigenvectors.sound_speed * eigenvectors.sound_speed)); + sum_2 += lamdiff * del_m.velocity.y; + sum_3 += lamdiff * del_m.velocity.z; + #ifdef DE + sum_ge += lamdiff * del_m.gas_energy_specific; + #endif // DE + #ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + sum_scalar[i] += lamdiff * del_m.scalar[i]; + } + #endif // SCALAR + } + if (lambda_p <= 0) { + Real lamdiff = lambda_m - lambda_p; + + sum_0 += lamdiff * (cell_i.density * del_m.velocity.x / (2 * eigenvectors.sound_speed) + + del_m.pressure / (2 * eigenvectors.sound_speed * eigenvectors.sound_speed)); + sum_1 += lamdiff * (del_m.velocity.x / 2.0 + del_m.pressure / (2 * eigenvectors.sound_speed * cell_i.density)); + sum_4 += lamdiff * (cell_i.density * del_m.velocity.x * eigenvectors.sound_speed / 2.0 + del_m.pressure / 2.0); + } + + // add the corrections + interface_R_imh.density += 0.5 * dtodx * sum_0; + interface_R_imh.velocity.x += 0.5 * dtodx * sum_1; + interface_R_imh.velocity.y += 0.5 * dtodx * sum_2; + interface_R_imh.velocity.z += 0.5 * dtodx * sum_3; + interface_R_imh.pressure += 0.5 * dtodx * sum_4; + #ifdef DE + interface_R_imh.gas_energy_specific += 0.5 * dtodx * sum_ge; + #endif // DE + #ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + interface_R_imh.scalar[i] += 0.5 * dtodx * sum_scalar[i]; + } + #endif // SCALAR +#endif // CTU + + struct LocalReturnStruct { + hydro_utilities::Primitive left, right; + }; + return LocalReturnStruct{interface_L_iph, interface_R_imh}; +} +} // namespace reconstruction #endif // PLMC_CUDA_H diff --git a/src/reconstruction/plmc_cuda_tests.cu b/src/reconstruction/plmc_cuda_tests.cu index 7763e9509..02f863c93 100644 --- a/src/reconstruction/plmc_cuda_tests.cu +++ b/src/reconstruction/plmc_cuda_tests.cu @@ -151,15 +151,15 @@ TEST(tHYDROPlmcReconstructor, CorrectInputExpectCorrectOutput) switch (direction) { case 0: hipLaunchKernelGGL(PLMC_cuda<0>, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), - dev_interface_right.data(), nx_rot, ny_rot, nz_rot, dx, dt, gamma, n_fields); + dev_interface_right.data(), nx_rot, ny_rot, nz_rot, dx, dt, gamma); break; case 1: hipLaunchKernelGGL(PLMC_cuda<1>, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), - dev_interface_right.data(), nx_rot, ny_rot, nz_rot, dx, dt, gamma, n_fields); + dev_interface_right.data(), nx_rot, ny_rot, nz_rot, dx, dt, gamma); break; case 2: hipLaunchKernelGGL(PLMC_cuda<2>, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), - dev_interface_right.data(), nx_rot, ny_rot, nz_rot, dx, dt, gamma, n_fields); + dev_interface_right.data(), nx_rot, ny_rot, nz_rot, dx, dt, gamma); break; } GPU_Error_Check(); @@ -276,15 +276,15 @@ TEST(tMHDPlmcReconstructor, CorrectInputExpectCorrectOutput) switch (direction) { case 0: hipLaunchKernelGGL(PLMC_cuda<0>, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), - dev_interface_right.data(), nx, ny, nz, dx, dt, gamma, n_fields); + dev_interface_right.data(), nx, ny, nz, dx, dt, gamma); break; case 1: hipLaunchKernelGGL(PLMC_cuda<1>, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), - dev_interface_right.data(), nx, ny, nz, dx, dt, gamma, n_fields); + dev_interface_right.data(), nx, ny, nz, dx, dt, gamma); break; case 2: hipLaunchKernelGGL(PLMC_cuda<2>, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), - dev_interface_right.data(), nx, ny, nz, dx, dt, gamma, n_fields); + dev_interface_right.data(), nx, ny, nz, dx, dt, gamma); break; } GPU_Error_Check(); From 8735833ad7c9ae563f4c54db9206d29e11f9f4ee Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 5 Mar 2024 15:38:39 -0500 Subject: [PATCH 08/29] Add documentation, standard type of direction Doxygen for PCM, HLLD, and Reconstruction functions. Make all `direction` tparams of type uint instead of a mix of int, size_t, and uint. uint was chosen since direction should never be negative and size_t is misleading in that it looks like it should be an array index. --- src/reconstruction/pcm_cuda.h | 2 +- src/reconstruction/plmc_cuda.h | 2 +- src/reconstruction/reconstruction.h | 6 +++--- src/utils/hydro_utilities.h | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/reconstruction/pcm_cuda.h b/src/reconstruction/pcm_cuda.h index 2867a77c3..d1c593ff2 100644 --- a/src/reconstruction/pcm_cuda.h +++ b/src/reconstruction/pcm_cuda.h @@ -35,7 +35,7 @@ namespace reconstruction * \param[in] gamma The adiabatic index * \return reconstruction::InterfaceState The interface state at xid, yid, zid */ -template +template reconstruction::InterfaceState __device__ __host__ inline PCM_Reconstruction(Real const *dev_conserved, size_t const xid, size_t const yid, size_t const zid, size_t const nx, diff --git a/src/reconstruction/plmc_cuda.h b/src/reconstruction/plmc_cuda.h index c7f34e187..e3264b8c8 100644 --- a/src/reconstruction/plmc_cuda.h +++ b/src/reconstruction/plmc_cuda.h @@ -22,7 +22,7 @@ __global__ __launch_bounds__(TPB) void PLMC_cuda(Real *dev_conserved, Real *dev_ namespace reconstruction { -template +template auto __device__ __inline__ PLMC_Reconstruction(Real *dev_conserved, int const xid, int const yid, int const zid, int const nx, int const ny, int const nz, Real const dx, Real const dt, Real const gamma) diff --git a/src/reconstruction/reconstruction.h b/src/reconstruction/reconstruction.h index 0ea1211d9..6fd8531bd 100644 --- a/src/reconstruction/reconstruction.h +++ b/src/reconstruction/reconstruction.h @@ -14,7 +14,7 @@ namespace reconstruction { /*! - * \brief Compute the interface states + * \brief * * \tparam reconstruction_order What kind of reconstruction to use, PCM, PLMC, etc. This argument should always be a * member of the reconstruction::Kind enum, behaviour is undefined otherwise. @@ -30,8 +30,8 @@ namespace reconstruction * \param[out] left_interface * \param[out] right_interface */ -template -auto __device__ __host__ inline Reconstruct_Interface_States(Real const *dev_conserved, size_t const xid, +template +void __device__ __host__ inline Reconstruct_Interface_States(Real const *dev_conserved, size_t const xid, size_t const yid, size_t const zid, size_t const nx, size_t const ny, size_t const n_cells, Real const gamma, reconstruction::InterfaceState &left_interface, diff --git a/src/utils/hydro_utilities.h b/src/utils/hydro_utilities.h index 57e91b3ec..ec9759d72 100644 --- a/src/utils/hydro_utilities.h +++ b/src/utils/hydro_utilities.h @@ -229,7 +229,7 @@ inline __host__ __device__ Real Calc_Sound_Speed(Real const &P, Real const &d, R * \param[in] n_cells The total number of cells * \return Conserved The cell centered conserved variables in the cell at location xid, yid, zid. */ -template +template inline __host__ __device__ Conserved Load_Cell_Conserved(Real const *dev_conserved, size_t const xid, size_t const yid, size_t const zid, size_t const nx, size_t const ny, size_t const n_cells) @@ -412,7 +412,7 @@ __inline__ __host__ __device__ Conserved Primitive_2_Conserved(Primitive const & * \param[in] gamma The adiabatic index * \return Primitive The cell centered conserved variables in the cell at location xid, yid, zid. */ -template +template inline __host__ __device__ Primitive Load_Cell_Primitive(Real const *dev_conserved, size_t const xid, size_t const yid, size_t const zid, size_t const nx, size_t const ny, size_t const n_cells, Real const gamma) From cdfd4c0166cd872011089b290cad2f32e5483369 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Wed, 17 Apr 2024 15:12:56 -0400 Subject: [PATCH 09/29] Fix a few errors from bad merges --- src/integrators/VL_3D_cuda.cu | 6 +++--- src/reconstruction/plmc_cuda.h | 5 +---- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/src/integrators/VL_3D_cuda.cu b/src/integrators/VL_3D_cuda.cu index 3da9e394e..5301baced 100644 --- a/src/integrators/VL_3D_cuda.cu +++ b/src/integrators/VL_3D_cuda.cu @@ -252,11 +252,11 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int #ifdef PLMC cuda_utilities::AutomaticLaunchParams static const plmc_vl_launch_params(PLMC_cuda<0>, n_cells); hipLaunchKernelGGL(PLMC_cuda<0>, plmc_vl_launch_params.get_numBlocks(), plmc_vl_launch_params.get_threadsPerBlock(), - 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, n_fields); + 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama); hipLaunchKernelGGL(PLMC_cuda<1>, plmc_vl_launch_params.get_numBlocks(), plmc_vl_launch_params.get_threadsPerBlock(), - 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama, n_fields); + 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama); hipLaunchKernelGGL(PLMC_cuda<2>, plmc_vl_launch_params.get_numBlocks(), plmc_vl_launch_params.get_threadsPerBlock(), - 0, 0, dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, dz, dt, gama, n_fields); + 0, 0, dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, dz, dt, gama); #endif // PLMC #ifdef PPMP cuda_utilities::AutomaticLaunchParams static const ppmp_launch_params(PPMP_cuda, n_cells); diff --git a/src/reconstruction/plmc_cuda.h b/src/reconstruction/plmc_cuda.h index e3264b8c8..517a79599 100644 --- a/src/reconstruction/plmc_cuda.h +++ b/src/reconstruction/plmc_cuda.h @@ -61,7 +61,7 @@ auto __device__ __inline__ PLMC_Reconstruction(Real *dev_conserved, int const xi hydro_utilities::Primitive const del_C = reconstruction::Compute_Slope(cell_imo, cell_ipo, 0.5); // Van Leer - hydro_utilities::Primitive const del_G = reconstruction::Van_Leer_Slope(del_L, del_R); + hydro_utilities::Primitive const del_G = reconstruction::Compute_Van_Leer_Slope(del_L, del_R); // Project the left, right, centered and van Leer differences onto the // characteristic variables Stone Eqn 37 (del_a are differences in @@ -102,9 +102,6 @@ auto __device__ __inline__ PLMC_Reconstruction(Real *dev_conserved, int const xi hydro_utilities::Primitive interface_L_iph = reconstruction::Calc_Interface_Linear(cell_i, del_m, 1.0); hydro_utilities::Primitive interface_R_imh = reconstruction::Calc_Interface_Linear(cell_i, del_m, -1.0); - // Limit the interfaces - reconstruction::Plm_Limit_Interfaces(interface_L_iph, interface_R_imh, cell_imo, cell_i, cell_ipo); - #ifndef VL Real const dtodx = dt / dx; From 88276448abb80ab0e6a8ffc1a84e84f4ad391bd1 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Tue, 5 Mar 2024 16:33:39 -0500 Subject: [PATCH 10/29] Move PLMC characteristic tracing into its own function --- src/reconstruction/plmc_cuda.h | 281 +++++++++++++++++---------------- 1 file changed, 145 insertions(+), 136 deletions(-) diff --git a/src/reconstruction/plmc_cuda.h b/src/reconstruction/plmc_cuda.h index 517a79599..7172bf12b 100644 --- a/src/reconstruction/plmc_cuda.h +++ b/src/reconstruction/plmc_cuda.h @@ -22,95 +22,19 @@ __global__ __launch_bounds__(TPB) void PLMC_cuda(Real *dev_conserved, Real *dev_ namespace reconstruction { -template -auto __device__ __inline__ PLMC_Reconstruction(Real *dev_conserved, int const xid, int const yid, int const zid, - int const nx, int const ny, int const nz, Real const dx, Real const dt, - Real const gamma) +void __device__ __host__ __inline__ PLMC_Characteristic_Evolution(hydro_utilities::Primitive const &cell_i, + hydro_utilities::Primitive const &del_m, + Real const dt, Real const dx, Real const sound_speed, + hydro_utilities::Primitive &interface_R_imh, + hydro_utilities::Primitive &interface_L_iph) { - // Compute the total number of cells - int const n_cells = nx * ny * nz; - - // load the 3-cell stencil into registers - // cell i - hydro_utilities::Primitive const cell_i = - hydro_utilities::Load_Cell_Primitive(dev_conserved, xid, yid, zid, nx, ny, n_cells, gamma); - - // cell i-1. The equality checks the direction and will subtract one from the correct direction - hydro_utilities::Primitive const cell_imo = hydro_utilities::Load_Cell_Primitive( - dev_conserved, xid - int(direction == 0), yid - int(direction == 1), zid - int(direction == 2), nx, ny, n_cells, - gamma); - - // cell i+1. The equality checks the direction and add one to the correct direction - hydro_utilities::Primitive const cell_ipo = hydro_utilities::Load_Cell_Primitive( - dev_conserved, xid + int(direction == 0), yid + int(direction == 1), zid + int(direction == 2), nx, ny, n_cells, - gamma); - - // Compute the eigenvectors - reconstruction::EigenVecs const eigenvectors = reconstruction::Compute_Eigenvectors(cell_i, gamma); - - // Compute the left, right, centered, and van Leer differences of the primitive variables Note that here L and R refer - // to locations relative to the cell center - - // left - hydro_utilities::Primitive const del_L = reconstruction::Compute_Slope(cell_imo, cell_i); - - // right - hydro_utilities::Primitive const del_R = reconstruction::Compute_Slope(cell_i, cell_ipo); - - // centered - hydro_utilities::Primitive const del_C = reconstruction::Compute_Slope(cell_imo, cell_ipo, 0.5); - - // Van Leer - hydro_utilities::Primitive const del_G = reconstruction::Compute_Van_Leer_Slope(del_L, del_R); - - // Project the left, right, centered and van Leer differences onto the - // characteristic variables Stone Eqn 37 (del_a are differences in - // characteristic variables, see Stone for notation) Use the eigenvectors - // given in Stone 2008, Appendix A - reconstruction::Characteristic const del_a_L = - reconstruction::Primitive_To_Characteristic(cell_i, del_L, eigenvectors, gamma); - - reconstruction::Characteristic const del_a_R = - reconstruction::Primitive_To_Characteristic(cell_i, del_R, eigenvectors, gamma); - - reconstruction::Characteristic const del_a_C = - reconstruction::Primitive_To_Characteristic(cell_i, del_C, eigenvectors, gamma); - - reconstruction::Characteristic const del_a_G = - reconstruction::Primitive_To_Characteristic(cell_i, del_G, eigenvectors, gamma); - - // Apply monotonicity constraints to the differences in the characteristic variables and project the monotonized - // difference in the characteristic variables back onto the primitive variables Stone Eqn 39 - reconstruction::Characteristic const del_a_m = reconstruction::Van_Leer_Limiter(del_a_L, del_a_R, del_a_C, del_a_G); - - // Project back into the primitive variables. - hydro_utilities::Primitive del_m = Characteristic_To_Primitive(cell_i, del_a_m, eigenvectors, gamma); - - // Limit the variables that aren't transformed by the characteristic projection -#ifdef DE - del_m.gas_energy_specific = Van_Leer_Limiter(del_L.gas_energy_specific, del_R.gas_energy_specific, - del_C.gas_energy_specific, del_G.gas_energy_specific); -#endif // DE -#ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - del_m.scalar_specific[i] = Van_Leer_Limiter(del_L.scalar_specific[i], del_R.scalar_specific[i], - del_C.scalar_specific[i], del_G.scalar_specific[i]); - } -#endif // SCALAR - - // Compute the left and right interface values using the monotonized difference in the primitive variables - hydro_utilities::Primitive interface_L_iph = reconstruction::Calc_Interface_Linear(cell_i, del_m, 1.0); - hydro_utilities::Primitive interface_R_imh = reconstruction::Calc_Interface_Linear(cell_i, del_m, -1.0); - -#ifndef VL - Real const dtodx = dt / dx; // Compute the eigenvalues of the linearized equations in the // primitive variables using the cell-centered primitive variables - Real const lambda_m = cell_i.velocity.x - eigenvectors.sound_speed; + Real const lambda_m = cell_i.velocity.x - sound_speed; Real const lambda_0 = cell_i.velocity.x; - Real const lambda_p = cell_i.velocity.x + eigenvectors.sound_speed; + Real const lambda_p = cell_i.velocity.x + sound_speed; // Integrate linear interpolation function over domain of dependence // defined by max(min) eigenvalue @@ -128,62 +52,62 @@ auto __device__ __inline__ PLMC_Reconstruction(Real *dev_conserved, int const xi interface_L_iph.velocity.z = interface_L_iph.velocity.z - qx * del_m.velocity.z; interface_L_iph.pressure = interface_L_iph.pressure - qx * del_m.pressure; - #ifdef DE +#ifdef DE interface_R_imh.gas_energy_specific = interface_R_imh.gas_energy_specific + qx * del_m.gas_energy_specific; interface_L_iph.gas_energy_specific = interface_L_iph.gas_energy_specific - qx * del_m.gas_energy_specific; - #endif // DE +#endif // DE - #ifdef SCALAR +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - interface_R_imh.scalar[i] = interface_R_imh.scalar[i] + qx * del_m.scalar[i]; - interface_L_iph.scalar[i] = interface_L_iph.scalar[i] - qx * del_m.scalar[i]; + interface_R_imh.scalar_specific[i] = interface_R_imh.scalar_specific[i] + qx * del_m.scalar_specific[i]; + interface_L_iph.scalar_specific[i] = interface_L_iph.scalar_specific[i] - qx * del_m.scalar_specific[i]; } - #endif // SCALAR +#endif // SCALAR // Perform the characteristic tracing // Stone Eqns 42 & 43 // left-hand interface value, i+1/2 Real sum_0 = 0.0, sum_1 = 0.0, sum_2 = 0.0, sum_3 = 0.0, sum_4 = 0.0; - #ifdef DE +#ifdef DE Real sum_ge = 0; - #endif // DE - #ifdef SCALAR +#endif // DE +#ifdef SCALAR Real sum_scalar[NSCALARS]; for (int i = 0; i < NSCALARS; i++) { sum_scalar[i] = 0.0; } - #endif // SCALAR +#endif // SCALAR if (lambda_m >= 0) { Real lamdiff = lambda_p - lambda_m; - sum_0 += lamdiff * (-cell_i.density * del_m.velocity.x / (2 * eigenvectors.sound_speed) + - del_m.pressure / (2 * eigenvectors.sound_speed * eigenvectors.sound_speed)); - sum_1 += lamdiff * (del_m.velocity.x / 2.0 - del_m.pressure / (2 * eigenvectors.sound_speed * cell_i.density)); - sum_4 += lamdiff * (-cell_i.density * del_m.velocity.x * eigenvectors.sound_speed / 2.0 + del_m.pressure / 2.0); + sum_0 += lamdiff * (-cell_i.density * del_m.velocity.x / (2 * sound_speed) + + del_m.pressure / (2 * sound_speed * sound_speed)); + sum_1 += lamdiff * (del_m.velocity.x / 2.0 - del_m.pressure / (2 * sound_speed * cell_i.density)); + sum_4 += lamdiff * (-cell_i.density * del_m.velocity.x * sound_speed / 2.0 + del_m.pressure / 2.0); } if (lambda_0 >= 0) { Real lamdiff = lambda_p - lambda_0; - sum_0 += lamdiff * (del_m.density - del_m.pressure / (eigenvectors.sound_speed * eigenvectors.sound_speed)); + sum_0 += lamdiff * (del_m.density - del_m.pressure / (sound_speed * sound_speed)); sum_2 += lamdiff * del_m.velocity.y; sum_3 += lamdiff * del_m.velocity.z; - #ifdef DE +#ifdef DE sum_ge += lamdiff * del_m.gas_energy_specific; - #endif // DE - #ifdef SCALAR +#endif // DE +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - sum_scalar[i] += lamdiff * del_m.scalar[i]; + sum_scalar[i] += lamdiff * del_m.scalar_specific[i]; } - #endif // SCALAR +#endif // SCALAR } if (lambda_p >= 0) { Real lamdiff = lambda_p - lambda_p; - sum_0 += lamdiff * (cell_i.density * del_m.velocity.x / (2 * eigenvectors.sound_speed) + - del_m.pressure / (2 * eigenvectors.sound_speed * eigenvectors.sound_speed)); - sum_1 += lamdiff * (del_m.velocity.x / 2.0 + del_m.pressure / (2 * eigenvectors.sound_speed * cell_i.density)); - sum_4 += lamdiff * (cell_i.density * del_m.velocity.x * eigenvectors.sound_speed / 2.0 + del_m.pressure / 2.0); + sum_0 += lamdiff * + (cell_i.density * del_m.velocity.x / (2 * sound_speed) + del_m.pressure / (2 * sound_speed * sound_speed)); + sum_1 += lamdiff * (del_m.velocity.x / 2.0 + del_m.pressure / (2 * sound_speed * cell_i.density)); + sum_4 += lamdiff * (cell_i.density * del_m.velocity.x * sound_speed / 2.0 + del_m.pressure / 2.0); } // add the corrections to the initial guesses for the interface values @@ -192,55 +116,55 @@ auto __device__ __inline__ PLMC_Reconstruction(Real *dev_conserved, int const xi interface_L_iph.velocity.y += 0.5 * dtodx * sum_2; interface_L_iph.velocity.z += 0.5 * dtodx * sum_3; interface_L_iph.pressure += 0.5 * dtodx * sum_4; - #ifdef DE +#ifdef DE interface_L_iph.gas_energy_specific += 0.5 * dtodx * sum_ge; - #endif // DE - #ifdef SCALAR +#endif // DE +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - interface_L_iph.scalar[i] += 0.5 * dtodx * sum_scalar[i]; + interface_L_iph.scalar_specific[i] += 0.5 * dtodx * sum_scalar[i]; } - #endif // SCALAR +#endif // SCALAR // right-hand interface value, i-1/2 sum_0 = sum_1 = sum_2 = sum_3 = sum_4 = 0; - #ifdef DE +#ifdef DE sum_ge = 0; - #endif // DE - #ifdef SCALAR +#endif // DE +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { sum_scalar[i] = 0; } - #endif // SCALAR +#endif // SCALAR if (lambda_m <= 0) { Real lamdiff = lambda_m - lambda_m; - sum_0 += lamdiff * (-cell_i.density * del_m.velocity.x / (2 * eigenvectors.sound_speed) + - del_m.pressure / (2 * eigenvectors.sound_speed * eigenvectors.sound_speed)); - sum_1 += lamdiff * (del_m.velocity.x / 2.0 - del_m.pressure / (2 * eigenvectors.sound_speed * cell_i.density)); - sum_4 += lamdiff * (-cell_i.density * del_m.velocity.x * eigenvectors.sound_speed / 2.0 + del_m.pressure / 2.0); + sum_0 += lamdiff * (-cell_i.density * del_m.velocity.x / (2 * sound_speed) + + del_m.pressure / (2 * sound_speed * sound_speed)); + sum_1 += lamdiff * (del_m.velocity.x / 2.0 - del_m.pressure / (2 * sound_speed * cell_i.density)); + sum_4 += lamdiff * (-cell_i.density * del_m.velocity.x * sound_speed / 2.0 + del_m.pressure / 2.0); } if (lambda_0 <= 0) { Real lamdiff = lambda_m - lambda_0; - sum_0 += lamdiff * (del_m.density - del_m.pressure / (eigenvectors.sound_speed * eigenvectors.sound_speed)); + sum_0 += lamdiff * (del_m.density - del_m.pressure / (sound_speed * sound_speed)); sum_2 += lamdiff * del_m.velocity.y; sum_3 += lamdiff * del_m.velocity.z; - #ifdef DE +#ifdef DE sum_ge += lamdiff * del_m.gas_energy_specific; - #endif // DE - #ifdef SCALAR +#endif // DE +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - sum_scalar[i] += lamdiff * del_m.scalar[i]; + sum_scalar[i] += lamdiff * del_m.scalar_specific[i]; } - #endif // SCALAR +#endif // SCALAR } if (lambda_p <= 0) { Real lamdiff = lambda_m - lambda_p; - sum_0 += lamdiff * (cell_i.density * del_m.velocity.x / (2 * eigenvectors.sound_speed) + - del_m.pressure / (2 * eigenvectors.sound_speed * eigenvectors.sound_speed)); - sum_1 += lamdiff * (del_m.velocity.x / 2.0 + del_m.pressure / (2 * eigenvectors.sound_speed * cell_i.density)); - sum_4 += lamdiff * (cell_i.density * del_m.velocity.x * eigenvectors.sound_speed / 2.0 + del_m.pressure / 2.0); + sum_0 += lamdiff * + (cell_i.density * del_m.velocity.x / (2 * sound_speed) + del_m.pressure / (2 * sound_speed * sound_speed)); + sum_1 += lamdiff * (del_m.velocity.x / 2.0 + del_m.pressure / (2 * sound_speed * cell_i.density)); + sum_4 += lamdiff * (cell_i.density * del_m.velocity.x * sound_speed / 2.0 + del_m.pressure / 2.0); } // add the corrections @@ -249,15 +173,100 @@ auto __device__ __inline__ PLMC_Reconstruction(Real *dev_conserved, int const xi interface_R_imh.velocity.y += 0.5 * dtodx * sum_2; interface_R_imh.velocity.z += 0.5 * dtodx * sum_3; interface_R_imh.pressure += 0.5 * dtodx * sum_4; - #ifdef DE +#ifdef DE interface_R_imh.gas_energy_specific += 0.5 * dtodx * sum_ge; - #endif // DE - #ifdef SCALAR +#endif // DE +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + interface_R_imh.scalar_specific[i] += 0.5 * dtodx * sum_scalar[i]; + } +#endif // SCALAR +} + +template +auto __device__ __inline__ PLMC_Reconstruction(Real *dev_conserved, int const xid, int const yid, int const zid, + int const nx, int const ny, int const nz, Real const dx, Real const dt, + Real const gamma) +{ + // Compute the total number of cells + int const n_cells = nx * ny * nz; + + // load the 3-cell stencil into registers + // cell i + hydro_utilities::Primitive const cell_i = + hydro_utilities::Load_Cell_Primitive(dev_conserved, xid, yid, zid, nx, ny, n_cells, gamma); + + // cell i-1. The equality checks the direction and will subtract one from the correct direction + hydro_utilities::Primitive const cell_imo = hydro_utilities::Load_Cell_Primitive( + dev_conserved, xid - int(direction == 0), yid - int(direction == 1), zid - int(direction == 2), nx, ny, n_cells, + gamma); + + // cell i+1. The equality checks the direction and add one to the correct direction + hydro_utilities::Primitive const cell_ipo = hydro_utilities::Load_Cell_Primitive( + dev_conserved, xid + int(direction == 0), yid + int(direction == 1), zid + int(direction == 2), nx, ny, n_cells, + gamma); + + // Compute the eigenvectors + reconstruction::EigenVecs const eigenvectors = reconstruction::Compute_Eigenvectors(cell_i, gamma); + + // Compute the left, right, centered, and van Leer differences of the primitive variables Note that here L and R refer + // to locations relative to the cell center + + // left + hydro_utilities::Primitive const del_L = reconstruction::Compute_Slope(cell_imo, cell_i); + + // right + hydro_utilities::Primitive const del_R = reconstruction::Compute_Slope(cell_i, cell_ipo); + + // centered + hydro_utilities::Primitive const del_C = reconstruction::Compute_Slope(cell_imo, cell_ipo, 0.5); + + // Van Leer + hydro_utilities::Primitive const del_G = reconstruction::Compute_Van_Leer_Slope(del_L, del_R); + + // Project the left, right, centered and van Leer differences onto the + // characteristic variables Stone Eqn 37 (del_a are differences in + // characteristic variables, see Stone for notation) Use the eigenvectors + // given in Stone 2008, Appendix A + reconstruction::Characteristic const del_a_L = + reconstruction::Primitive_To_Characteristic(cell_i, del_L, eigenvectors, gamma); + + reconstruction::Characteristic const del_a_R = + reconstruction::Primitive_To_Characteristic(cell_i, del_R, eigenvectors, gamma); + + reconstruction::Characteristic const del_a_C = + reconstruction::Primitive_To_Characteristic(cell_i, del_C, eigenvectors, gamma); + + reconstruction::Characteristic const del_a_G = + reconstruction::Primitive_To_Characteristic(cell_i, del_G, eigenvectors, gamma); + + // Apply monotonicity constraints to the differences in the characteristic variables and project the monotonized + // difference in the characteristic variables back onto the primitive variables Stone Eqn 39 + reconstruction::Characteristic const del_a_m = reconstruction::Van_Leer_Limiter(del_a_L, del_a_R, del_a_C, del_a_G); + + // Project back into the primitive variables. + hydro_utilities::Primitive del_m = Characteristic_To_Primitive(cell_i, del_a_m, eigenvectors, gamma); + + // Limit the variables that aren't transformed by the characteristic projection +#ifdef DE + del_m.gas_energy_specific = Van_Leer_Limiter(del_L.gas_energy_specific, del_R.gas_energy_specific, + del_C.gas_energy_specific, del_G.gas_energy_specific); +#endif // DE +#ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - interface_R_imh.scalar[i] += 0.5 * dtodx * sum_scalar[i]; + del_m.scalar_specific[i] = Van_Leer_Limiter(del_L.scalar_specific[i], del_R.scalar_specific[i], + del_C.scalar_specific[i], del_G.scalar_specific[i]); } - #endif // SCALAR -#endif // CTU +#endif // SCALAR + + // Compute the left and right interface values using the monotonized difference in the primitive variables + hydro_utilities::Primitive interface_L_iph = reconstruction::Calc_Interface_Linear(cell_i, del_m, 1.0); + hydro_utilities::Primitive interface_R_imh = reconstruction::Calc_Interface_Linear(cell_i, del_m, -1.0); + +// Do the characteristic tracing +#ifndef VL + PLMC_Characteristic_Evolution(cell_i, del_m, dt, dx, eigenvectors.sound_speed, interface_R_imh, interface_L_iph); +#endif // VL struct LocalReturnStruct { hydro_utilities::Primitive left, right; From 7ff04e1679cd78fa5c7b426922d7519b8737601c Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 7 Mar 2024 12:23:14 -0500 Subject: [PATCH 11/29] Add missing namespace qualification --- src/reconstruction/ppmc_cuda.cu | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/reconstruction/ppmc_cuda.cu b/src/reconstruction/ppmc_cuda.cu index 977b1540f..c13b78b91 100644 --- a/src/reconstruction/ppmc_cuda.cu +++ b/src/reconstruction/ppmc_cuda.cu @@ -126,13 +126,13 @@ __global__ void PPMC_CTU(Real *dev_conserved, Real *dev_bounds_L, Real *dev_boun // Limit the variables that aren't transformed by the characteristic projection #ifdef DE - del_m_im1.gas_energy_specific = Van_Leer_Limiter(del_L.gas_energy_specific, del_R.gas_energy_specific, - del_C.gas_energy_specific, del_G.gas_energy_specific); + del_m_im1.gas_energy_specific = reconstruction::Van_Leer_Limiter( + del_L.gas_energy_specific, del_R.gas_energy_specific, del_C.gas_energy_specific, del_G.gas_energy_specific); #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - del_m_im1.scalar_specific[i] = Van_Leer_Limiter(del_L.scalar_specific[i], del_R.scalar_specific[i], - del_C.scalar_specific[i], del_G.scalar_specific[i]); + del_m_im1.scalar_specific[i] = reconstruction::Van_Leer_Limiter(del_L.scalar_specific[i], del_R.scalar_specific[i], + del_C.scalar_specific[i], del_G.scalar_specific[i]); } #endif // SCALAR @@ -179,13 +179,13 @@ __global__ void PPMC_CTU(Real *dev_conserved, Real *dev_bounds_L, Real *dev_boun // Limit the variables that aren't transformed by the characteristic projection #ifdef DE - del_m_i.gas_energy_specific = Van_Leer_Limiter(del_L.gas_energy_specific, del_R.gas_energy_specific, - del_C.gas_energy_specific, del_G.gas_energy_specific); + del_m_i.gas_energy_specific = reconstruction::Van_Leer_Limiter(del_L.gas_energy_specific, del_R.gas_energy_specific, + del_C.gas_energy_specific, del_G.gas_energy_specific); #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - del_m_i.scalar_specific[i] = Van_Leer_Limiter(del_L.scalar_specific[i], del_R.scalar_specific[i], - del_C.scalar_specific[i], del_G.scalar_specific[i]); + del_m_i.scalar_specific[i] = reconstruction::Van_Leer_Limiter(del_L.scalar_specific[i], del_R.scalar_specific[i], + del_C.scalar_specific[i], del_G.scalar_specific[i]); } #endif // SCALAR @@ -233,13 +233,13 @@ __global__ void PPMC_CTU(Real *dev_conserved, Real *dev_bounds_L, Real *dev_boun // Limit the variables that aren't transformed by the characteristic projection #ifdef DE - del_m_ip1.gas_energy_specific = Van_Leer_Limiter(del_L.gas_energy_specific, del_R.gas_energy_specific, - del_C.gas_energy_specific, del_G.gas_energy_specific); + del_m_ip1.gas_energy_specific = reconstruction::Van_Leer_Limiter( + del_L.gas_energy_specific, del_R.gas_energy_specific, del_C.gas_energy_specific, del_G.gas_energy_specific); #endif // DE #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { - del_m_ip1.scalar_specific[i] = Van_Leer_Limiter(del_L.scalar_specific[i], del_R.scalar_specific[i], - del_C.scalar_specific[i], del_G.scalar_specific[i]); + del_m_ip1.scalar_specific[i] = reconstruction::Van_Leer_Limiter(del_L.scalar_specific[i], del_R.scalar_specific[i], + del_C.scalar_specific[i], del_G.scalar_specific[i]); } #endif // SCALAR From a9d5e4aa989f3676d02b60da5e225974bba163b8 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 18 Apr 2024 10:44:34 -0400 Subject: [PATCH 12/29] Fix bugs in Riemann_Thread_Guard These don't change much for most builds but they do allow higher order reconstructions to be fused later if needed --- src/reconstruction/reconstruction_internals.h | 5 +++-- src/reconstruction/reconstruction_internals_tests.cu | 8 ++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/reconstruction/reconstruction_internals.h b/src/reconstruction/reconstruction_internals.h index 403f43fc9..f484397fe 100644 --- a/src/reconstruction/reconstruction_internals.h +++ b/src/reconstruction/reconstruction_internals.h @@ -247,10 +247,11 @@ bool __device__ __host__ __inline__ Riemann_Thread_Guard(size_t const nx, size_t if constexpr (reconstruction == reconstruction::Kind::pcm) { order = 1; } else if constexpr (reconstruction == reconstruction::Kind::plmc or reconstruction == reconstruction::Kind::plmp) { - order = 2; - } else if constexpr (reconstruction == reconstruction::Kind::ppmc or reconstruction == reconstruction::Kind::ppmp) { order = 3; + } else if constexpr (reconstruction == reconstruction::Kind::ppmc or reconstruction == reconstruction::Kind::ppmp) { + order = 4; } + bool out_of_bounds_thread = false; // X check if (nx > 1) { diff --git a/src/reconstruction/reconstruction_internals_tests.cu b/src/reconstruction/reconstruction_internals_tests.cu index 6200cb5eb..7177252f0 100644 --- a/src/reconstruction/reconstruction_internals_tests.cu +++ b/src/reconstruction/reconstruction_internals_tests.cu @@ -262,13 +262,13 @@ TEST(tALLReconstructionRiemannThreadGuard, CorrectInputExpectCorrectOutput) { // Test parameters int const order = 3; - int const nx = 6; - int const ny = 6; - int const nz = 6; + int const nx = 8; + int const ny = 8; + int const nz = 8; // fiducial data std::vector fiducial_vals(nx * ny * nz, 1); - fiducial_vals.at(86) = 0; + fiducial_vals.at(219) = 0; // loop through all values of the indices and check them for (int xid = 0; xid < nx; xid++) { From 8fa52ebc9b1a7bfbd07665d71e5e6d54921d3680 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 18 Apr 2024 11:26:46 -0400 Subject: [PATCH 13/29] Add doxygen comments to refactored PLMC code --- src/reconstruction/plmc_cuda.cu | 5 ----- src/reconstruction/plmc_cuda.h | 37 ++++++++++++++++++++++++++++----- 2 files changed, 32 insertions(+), 10 deletions(-) diff --git a/src/reconstruction/plmc_cuda.cu b/src/reconstruction/plmc_cuda.cu index c418d913d..dabe8c0ba 100644 --- a/src/reconstruction/plmc_cuda.cu +++ b/src/reconstruction/plmc_cuda.cu @@ -15,11 +15,6 @@ #include "../utils/hydro_utilities.h" #endif // DE -/*! \fn __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real - *dev_bounds_R, int nx, int ny, int nz, Real dx, Real dt, Real - gamma, int dir) - * \brief When passed a stencil of conserved variables, returns the left and - right boundary values for the interface calculated using plm. */ template __global__ __launch_bounds__(TPB) void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, Real dx, Real dt, Real gamma) diff --git a/src/reconstruction/plmc_cuda.h b/src/reconstruction/plmc_cuda.h index 7172bf12b..8e145f5ed 100644 --- a/src/reconstruction/plmc_cuda.h +++ b/src/reconstruction/plmc_cuda.h @@ -11,17 +11,28 @@ #include "../utils/hydro_utilities.h" #include "../utils/mhd_utilities.h" -/*! \fn __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real - *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real - gamma, int dir) - * \brief When passed a stencil of conserved variables, returns the left and - right boundary values for the interface calculated using plm. */ +/*! + * \brief Performs second order reconstruction using limiting in the characteristic variables + * + * \tparam dir The direction that the solve is taking place in. 0=X, 1=Y, 2=Z + */ template __global__ __launch_bounds__(TPB) void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, Real dx, Real dt, Real gamma); namespace reconstruction { +/*! + * \brief Perform characteristic tracing/evolution on an interface + * + * \param[in] cell_i The cell state at cell i + * \param[in] del_m The limited slopes + * \param[in] dt The time step + * \param[in] dx The cell size in the direction of solve + * \param[in] sound_speed The sound speed in cell i + * \param interface_R_imh The R interface at i-1/2 + * \param interface_L_iph The L interface at i+1/2 + */ void __device__ __host__ __inline__ PLMC_Characteristic_Evolution(hydro_utilities::Primitive const &cell_i, hydro_utilities::Primitive const &del_m, Real const dt, Real const dx, Real const sound_speed, @@ -183,6 +194,22 @@ void __device__ __host__ __inline__ PLMC_Characteristic_Evolution(hydro_utilitie #endif // SCALAR } +/*! + * \brief This is the device function that actually does the piecewise linear reconstruction. + * + * \tparam direction The direction that the solve is taking place in. 0=X, 1=Y, 2=Z + * \param dev_conserved The conserved variable array + * \param xid The x index of the cell in the center of the stencil + * \param yid The y index of the cell in the center of the stencil + * \param zid The z index of the cell in the center of the stencil + * \param nx The number of cells in the x-direction + * \param ny The number of cells in the y-direction + * \param nz The number of cells in the z-direction + * \param dx The width of the cells in the direction of the solve + * \param dt The time step + * \param gamma The adiabatic index + * \return auto A local struct which returns the left interface at i+1/2 and the right interface at i-1/2 in that order. + */ template auto __device__ __inline__ PLMC_Reconstruction(Real *dev_conserved, int const xid, int const yid, int const zid, int const nx, int const ny, int const nz, Real const dx, Real const dt, From c08007c1cdd4c39f3cbaadca4f7068ffb9060284 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 18 Apr 2024 11:27:03 -0400 Subject: [PATCH 14/29] Move minimum value enforcement in PLMC into the proper function --- src/reconstruction/plmc_cuda.cu | 6 ------ src/reconstruction/plmc_cuda.h | 6 ++++++ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/reconstruction/plmc_cuda.cu b/src/reconstruction/plmc_cuda.cu index dabe8c0ba..d036e3eca 100644 --- a/src/reconstruction/plmc_cuda.cu +++ b/src/reconstruction/plmc_cuda.cu @@ -32,12 +32,6 @@ __global__ __launch_bounds__(TPB) void PLMC_cuda(Real *dev_conserved, Real *dev_ auto [interface_L_iph, interface_R_imh] = reconstruction::PLMC_Reconstruction(dev_conserved, xid, yid, zid, nx, ny, nz, dx, dt, gamma); - // apply minimum constraints - interface_R_imh.density = fmax(interface_R_imh.density, (Real)TINY_NUMBER); - interface_L_iph.density = fmax(interface_L_iph.density, (Real)TINY_NUMBER); - interface_R_imh.pressure = fmax(interface_R_imh.pressure, (Real)TINY_NUMBER); - interface_L_iph.pressure = fmax(interface_L_iph.pressure, (Real)TINY_NUMBER); - // Set the field indices for the various directions int o1, o2, o3; if constexpr (dir == 0) { diff --git a/src/reconstruction/plmc_cuda.h b/src/reconstruction/plmc_cuda.h index 8e145f5ed..f296e310d 100644 --- a/src/reconstruction/plmc_cuda.h +++ b/src/reconstruction/plmc_cuda.h @@ -295,6 +295,12 @@ auto __device__ __inline__ PLMC_Reconstruction(Real *dev_conserved, int const xi PLMC_Characteristic_Evolution(cell_i, del_m, dt, dx, eigenvectors.sound_speed, interface_R_imh, interface_L_iph); #endif // VL + // apply minimum constraints + interface_R_imh.density = fmax(interface_R_imh.density, (Real)TINY_NUMBER); + interface_L_iph.density = fmax(interface_L_iph.density, (Real)TINY_NUMBER); + interface_R_imh.pressure = fmax(interface_R_imh.pressure, (Real)TINY_NUMBER); + interface_L_iph.pressure = fmax(interface_L_iph.pressure, (Real)TINY_NUMBER); + struct LocalReturnStruct { hydro_utilities::Primitive left, right; }; From 7e2df9ad9bad435b2dbffc02870bf7c69aeebd4f Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 18 Apr 2024 12:47:24 -0400 Subject: [PATCH 15/29] Fix typo --- src/reconstruction/plmc_cuda.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/reconstruction/plmc_cuda.h b/src/reconstruction/plmc_cuda.h index f296e310d..e50a6456e 100644 --- a/src/reconstruction/plmc_cuda.h +++ b/src/reconstruction/plmc_cuda.h @@ -208,7 +208,8 @@ void __device__ __host__ __inline__ PLMC_Characteristic_Evolution(hydro_utilitie * \param dx The width of the cells in the direction of the solve * \param dt The time step * \param gamma The adiabatic index - * \return auto A local struct which returns the left interface at i+1/2 and the right interface at i-1/2 in that order. + * \return auto A local struct which returns the left primitive interface at i+1/2 and the right primitive interface at + * i-1/2 in that order. */ template auto __device__ __inline__ PLMC_Reconstruction(Real *dev_conserved, int const xid, int const yid, int const zid, From 67fc328408274511948d6eae153c376c7c6fe932 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 18 Apr 2024 14:53:04 -0400 Subject: [PATCH 16/29] Add switch to PLMC to turn it into PLMP This is the first step in combining the two PLM methods into one with a switch for the limiting variables. Currently that switch exists but the kernels haven't been modified to use it yet. --- src/reconstruction/plmc_cuda.h | 14 ++++-- src/reconstruction/reconstruction_internals.h | 46 +++++++++++++++++++ 2 files changed, 55 insertions(+), 5 deletions(-) diff --git a/src/reconstruction/plmc_cuda.h b/src/reconstruction/plmc_cuda.h index e50a6456e..43408ab39 100644 --- a/src/reconstruction/plmc_cuda.h +++ b/src/reconstruction/plmc_cuda.h @@ -252,6 +252,7 @@ auto __device__ __inline__ PLMC_Reconstruction(Real *dev_conserved, int const xi // Van Leer hydro_utilities::Primitive const del_G = reconstruction::Compute_Van_Leer_Slope(del_L, del_R); +#ifdef PLMC // Project the left, right, centered and van Leer differences onto the // characteristic variables Stone Eqn 37 (del_a are differences in // characteristic variables, see Stone for notation) Use the eigenvectors @@ -275,17 +276,20 @@ auto __device__ __inline__ PLMC_Reconstruction(Real *dev_conserved, int const xi // Project back into the primitive variables. hydro_utilities::Primitive del_m = Characteristic_To_Primitive(cell_i, del_a_m, eigenvectors, gamma); - // Limit the variables that aren't transformed by the characteristic projection -#ifdef DE + // Limit the variables that aren't transformed by the characteristic projection + #ifdef DE del_m.gas_energy_specific = Van_Leer_Limiter(del_L.gas_energy_specific, del_R.gas_energy_specific, del_C.gas_energy_specific, del_G.gas_energy_specific); -#endif // DE -#ifdef SCALAR + #endif // DE + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { del_m.scalar_specific[i] = Van_Leer_Limiter(del_L.scalar_specific[i], del_R.scalar_specific[i], del_C.scalar_specific[i], del_G.scalar_specific[i]); } -#endif // SCALAR + #endif // SCALAR +#else // PLMP + hydro_utilities::Primitive const del_m = reconstruction::Van_Leer_Limiter(del_L, del_R, del_C, del_G); +#endif // PLMC // Compute the left and right interface values using the monotonized difference in the primitive variables hydro_utilities::Primitive interface_L_iph = reconstruction::Calc_Interface_Linear(cell_i, del_m, 1.0); diff --git a/src/reconstruction/reconstruction_internals.h b/src/reconstruction/reconstruction_internals.h index f484397fe..5a2358dae 100644 --- a/src/reconstruction/reconstruction_internals.h +++ b/src/reconstruction/reconstruction_internals.h @@ -618,6 +618,52 @@ Characteristic __device__ __host__ __inline__ Van_Leer_Limiter(Characteristic co } // ===================================================================================================================== +// ===================================================================================================================== +/*! + * \brief Limit the primitive slopes. This is an overload that take reconstruction::Primitive variables instead + * of Reals as arguments. + * + * \param[in] del_L The left primitive slopes + * \param[in] del_R The right primitive slopes + * \param[in] del_C The centered primitive slopes + * \param[in] del_G The Van Leer primitive slopes + * \return hydro_utilities::Primitive The limited primitive slopes + */ +hydro_utilities::Primitive __device__ __host__ __inline__ Van_Leer_Limiter(hydro_utilities::Primitive const &del_L, + hydro_utilities::Primitive const &del_R, + hydro_utilities::Primitive const &del_C, + hydro_utilities::Primitive const &del_G) +{ + // the monotonized difference in the primitive variables + hydro_utilities::Primitive del_m; + + // Monotonize the slopes + del_m.density = Van_Leer_Limiter(del_L.density, del_R.density, del_C.density, del_G.density); + del_m.velocity.x = Van_Leer_Limiter(del_L.velocity.x, del_R.velocity.x, del_C.velocity.x, del_G.velocity.x); + del_m.velocity.y = Van_Leer_Limiter(del_L.velocity.y, del_R.velocity.y, del_C.velocity.y, del_G.velocity.y); + del_m.velocity.z = Van_Leer_Limiter(del_L.velocity.z, del_R.velocity.z, del_C.velocity.z, del_G.velocity.z); + del_m.pressure = Van_Leer_Limiter(del_L.pressure, del_R.pressure, del_C.pressure, del_G.pressure); + +#ifdef MHD + del_m.magnetic.y = Van_Leer_Limiter(del_L.magnetic.y, del_R.magnetic.y, del_C.magnetic.y, del_G.magnetic.y); + del_m.magnetic.z = Van_Leer_Limiter(del_L.magnetic.z, del_R.magnetic.z, del_C.magnetic.z, del_G.magnetic.z); +#endif // MHD + +#ifdef DE + del_m.gas_energy_specific = Van_Leer_Limiter(del_L.gas_energy_specific, del_R.gas_energy_specific, + del_C.gas_energy_specific, del_G.gas_energy_specific); +#endif // DE +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + del_m.scalar_specific[i] = Van_Leer_Limiter(del_L.scalar_specific[i], del_R.scalar_specific[i], + del_C.scalar_specific[i], del_G.scalar_specific[i]); + } +#endif // SCALAR + + return del_m; +} +// ===================================================================================================================== + // ===================================================================================================================== /*! * \brief Monotonize the parabolic interface states From 284203fd05baeee673ff5ffc644a310862abeb13 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 18 Apr 2024 15:22:51 -0400 Subject: [PATCH 17/29] Merge PLMC & PLMP, MHD support in PLMP - Removed the PLMP kernel since PLMC can do both now - Renamed PLMC_cuda to PLM_cuda since it can now do limiting in either set of variables - Renamed plmc files to plm - The PLM kernel has MHD support for either type of limiting --- src/grid/grid3D.cpp | 14 +- src/integrators/VL_1D_cuda.cu | 13 +- src/integrators/VL_2D_cuda.cu | 17 +- src/integrators/VL_3D_cuda.cu | 30 +- src/integrators/simple_1D_cuda.cu | 14 +- src/integrators/simple_2D_cuda.cu | 17 +- src/integrators/simple_3D_cuda.cu | 21 +- .../{plmc_cuda.cu => plm_cuda.cu} | 30 +- .../{plmc_cuda.h => plm_cuda.h} | 18 +- .../{plmc_cuda_tests.cu => plm_cuda_tests.cu} | 18 +- src/reconstruction/plmp_cuda.cu | 365 ------------------ src/reconstruction/plmp_cuda.h | 24 -- src/utils/error_handling.cpp | 4 +- 13 files changed, 71 insertions(+), 514 deletions(-) rename src/reconstruction/{plmc_cuda.cu => plm_cuda.cu} (56%) rename src/reconstruction/{plmc_cuda.h => plm_cuda.h} (95%) rename src/reconstruction/{plmc_cuda_tests.cu => plm_cuda_tests.cu} (95%) delete mode 100644 src/reconstruction/plmp_cuda.cu delete mode 100644 src/reconstruction/plmp_cuda.h diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index c80c5a9fd..9e7e9d5cc 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -53,18 +53,12 @@ Grid3D::Grid3D(void) #ifdef PCM H.n_ghost = 2; #endif // PCM -#ifdef PLMP +#if defined(PLMP) or defined(PLMC) H.n_ghost = 3; -#endif // PLMP -#ifdef PLMC - H.n_ghost = 3; -#endif // PLMC -#ifdef PPMP - H.n_ghost = 4; -#endif // PPMP -#ifdef PPMC +#endif // PLMP or PLMC +#if defined(PPMP) or defined(PPMC) H.n_ghost = 4; -#endif // PPMC +#endif // PPMP or PLMC #ifdef GRAVITY H.n_ghost_potential_offset = H.n_ghost - N_GHOST_POTENTIAL; diff --git a/src/integrators/VL_1D_cuda.cu b/src/integrators/VL_1D_cuda.cu index bdc6009bf..9995c5573 100644 --- a/src/integrators/VL_1D_cuda.cu +++ b/src/integrators/VL_1D_cuda.cu @@ -12,8 +12,7 @@ #include "../hydro/hydro_cuda.h" #include "../integrators/VL_1D_cuda.h" #include "../io/io.h" - #include "../reconstruction/plmc_cuda.h" - #include "../reconstruction/plmp_cuda.h" + #include "../reconstruction/plm_cuda.h" #include "../reconstruction/ppmc_cuda.h" #include "../reconstruction/ppmp_cuda.h" #include "../reconstruction/reconstruction.h" @@ -83,13 +82,9 @@ void VL_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, Rea GPU_Error_Check(); // Step 4: Construct left and right interface values using updated conserved variables - #ifdef PLMC - hipLaunchKernelGGL(PLMC_cuda<0>, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama); - #endif - #ifdef PLMP - hipLaunchKernelGGL(PLMP_cuda, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, - gama, 0, n_fields); - #endif + #if defined(PLMP) or defined(PLMC) + hipLaunchKernelGGL(PLM_cuda<0>, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama); + #endif // PLMP or PLMC #ifdef PPMP hipLaunchKernelGGL(PPMP_cuda, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); diff --git a/src/integrators/VL_2D_cuda.cu b/src/integrators/VL_2D_cuda.cu index 702ee0500..8cd98632a 100644 --- a/src/integrators/VL_2D_cuda.cu +++ b/src/integrators/VL_2D_cuda.cu @@ -10,8 +10,7 @@ #include "../global/global_cuda.h" #include "../hydro/hydro_cuda.h" #include "../integrators/VL_2D_cuda.h" - #include "../reconstruction/plmc_cuda.h" - #include "../reconstruction/plmp_cuda.h" + #include "../reconstruction/plm_cuda.h" #include "../reconstruction/ppmc_cuda.h" #include "../reconstruction/ppmp_cuda.h" #include "../reconstruction/reconstruction.h" @@ -92,18 +91,12 @@ void VL_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int y_of // Step 4: Construct left and right interface values using updated conserved // variables - #ifdef PLMP - hipLaunchKernelGGL(PLMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, - dt, gama, 0, n_fields); - hipLaunchKernelGGL(PLMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, - dt, gama, 1, n_fields); - #endif - #ifdef PLMC - hipLaunchKernelGGL(PLMC_cuda<0>, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, + #if defined(PLMP) or defined(PLMC) + hipLaunchKernelGGL(PLM_cuda<0>, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama); - hipLaunchKernelGGL(PLMC_cuda<1>, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, + hipLaunchKernelGGL(PLM_cuda<1>, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama); - #endif + #endif // PLMP or PLMC #ifdef PPMP hipLaunchKernelGGL(PPMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); diff --git a/src/integrators/VL_3D_cuda.cu b/src/integrators/VL_3D_cuda.cu index 5301baced..d8c8f5d78 100644 --- a/src/integrators/VL_3D_cuda.cu +++ b/src/integrators/VL_3D_cuda.cu @@ -17,8 +17,7 @@ #include "../io/io.h" #include "../mhd/ct_electric_fields.h" #include "../mhd/magnetic_update.h" - #include "../reconstruction/plmc_cuda.h" - #include "../reconstruction/plmp_cuda.h" + #include "../reconstruction/plm_cuda.h" #include "../reconstruction/ppmc_cuda.h" #include "../reconstruction/ppmp_cuda.h" #include "../riemann_solvers/exact_cuda.h" @@ -240,24 +239,15 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int #endif // MHD // Step 4: Construct left and right interface values using updated conserved variables - #ifdef PLMP - cuda_utilities::AutomaticLaunchParams static const plmp_launch_params(PLMP_cuda, n_cells); - hipLaunchKernelGGL(PLMP_cuda, plmp_launch_params.get_numBlocks(), plmp_launch_params.get_threadsPerBlock(), 0, 0, - dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); - hipLaunchKernelGGL(PLMP_cuda, plmp_launch_params.get_numBlocks(), plmp_launch_params.get_threadsPerBlock(), 0, 0, - dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields); - hipLaunchKernelGGL(PLMP_cuda, plmp_launch_params.get_numBlocks(), plmp_launch_params.get_threadsPerBlock(), 0, 0, - dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, n_fields); - #endif // PLMP - #ifdef PLMC - cuda_utilities::AutomaticLaunchParams static const plmc_vl_launch_params(PLMC_cuda<0>, n_cells); - hipLaunchKernelGGL(PLMC_cuda<0>, plmc_vl_launch_params.get_numBlocks(), plmc_vl_launch_params.get_threadsPerBlock(), - 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama); - hipLaunchKernelGGL(PLMC_cuda<1>, plmc_vl_launch_params.get_numBlocks(), plmc_vl_launch_params.get_threadsPerBlock(), - 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama); - hipLaunchKernelGGL(PLMC_cuda<2>, plmc_vl_launch_params.get_numBlocks(), plmc_vl_launch_params.get_threadsPerBlock(), - 0, 0, dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, dz, dt, gama); - #endif // PLMC + #if defined(PLMP) or defined(PLMC) + cuda_utilities::AutomaticLaunchParams static const plm_vl_launch_params(PLM_cuda<0>, n_cells); + hipLaunchKernelGGL(PLM_cuda<0>, plm_vl_launch_params.get_numBlocks(), plm_vl_launch_params.get_threadsPerBlock(), 0, + 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama); + hipLaunchKernelGGL(PLM_cuda<1>, plm_vl_launch_params.get_numBlocks(), plm_vl_launch_params.get_threadsPerBlock(), 0, + 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama); + hipLaunchKernelGGL(PLM_cuda<2>, plm_vl_launch_params.get_numBlocks(), plm_vl_launch_params.get_threadsPerBlock(), 0, + 0, dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, dz, dt, gama); + #endif // PLMP or PLMC #ifdef PPMP cuda_utilities::AutomaticLaunchParams static const ppmp_launch_params(PPMP_cuda, n_cells); hipLaunchKernelGGL(PPMP_cuda, ppmp_launch_params.get_numBlocks(), ppmp_launch_params.get_threadsPerBlock(), 0, 0, diff --git a/src/integrators/simple_1D_cuda.cu b/src/integrators/simple_1D_cuda.cu index 83711b002..369c055af 100644 --- a/src/integrators/simple_1D_cuda.cu +++ b/src/integrators/simple_1D_cuda.cu @@ -10,8 +10,7 @@ #include "../hydro/hydro_cuda.h" #include "../integrators/simple_1D_cuda.h" #include "../io/io.h" -#include "../reconstruction/plmc_cuda.h" -#include "../reconstruction/plmp_cuda.h" +#include "../reconstruction/plm_cuda.h" #include "../reconstruction/ppmc_cuda.h" #include "../reconstruction/ppmp_cuda.h" #include "../reconstruction/reconstruction.h" @@ -53,15 +52,10 @@ void Simple_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, } // Step 1: Do the reconstruction -#ifdef PLMP - hipLaunchKernelGGL(PLMP_cuda, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, - 0, n_fields); - GPU_Error_Check(); -#endif -#ifdef PLMC - hipLaunchKernelGGL(PLMC_cuda<0>, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama); +#if defined(PLMP) or defined(PLMC) + hipLaunchKernelGGL(PLM_cuda<0>, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama); GPU_Error_Check(); -#endif +#endif // PLMP or PLMC #ifdef PPMP hipLaunchKernelGGL(PPMP_cuda, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); diff --git a/src/integrators/simple_2D_cuda.cu b/src/integrators/simple_2D_cuda.cu index c590e127c..426a9a2d9 100644 --- a/src/integrators/simple_2D_cuda.cu +++ b/src/integrators/simple_2D_cuda.cu @@ -8,8 +8,7 @@ #include "../global/global_cuda.h" #include "../hydro/hydro_cuda.h" #include "../integrators/simple_2D_cuda.h" -#include "../reconstruction/plmc_cuda.h" -#include "../reconstruction/plmp_cuda.h" +#include "../reconstruction/plm_cuda.h" #include "../reconstruction/ppmc_cuda.h" #include "../reconstruction/ppmp_cuda.h" #include "../reconstruction/reconstruction.h" @@ -54,16 +53,10 @@ void Simple_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int } // Step 1: Do the reconstruction -#ifdef PLMP - hipLaunchKernelGGL(PLMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, - gama, 0, n_fields); - hipLaunchKernelGGL(PLMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, - gama, 1, n_fields); -#endif -#ifdef PLMC - hipLaunchKernelGGL(PLMC_cuda<0>, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama); - hipLaunchKernelGGL(PLMC_cuda<1>, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama); -#endif +#if defined(PLMP) or defined(PLMC) + hipLaunchKernelGGL(PLM_cuda<0>, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama); + hipLaunchKernelGGL(PLM_cuda<1>, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama); +#endif // PLMP or PLMC #ifdef PPMP hipLaunchKernelGGL(PPMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); diff --git a/src/integrators/simple_3D_cuda.cu b/src/integrators/simple_3D_cuda.cu index bb2d473c0..2e834e370 100644 --- a/src/integrators/simple_3D_cuda.cu +++ b/src/integrators/simple_3D_cuda.cu @@ -12,8 +12,7 @@ #include "../hydro/hydro_cuda.h" #include "../integrators/simple_3D_cuda.h" #include "../io/io.h" - #include "../reconstruction/plmc_cuda.h" - #include "../reconstruction/plmp_cuda.h" + #include "../reconstruction/plm_cuda.h" #include "../reconstruction/ppmc_cuda.h" #include "../reconstruction/ppmp_cuda.h" #include "../reconstruction/reconstruction.h" @@ -85,19 +84,11 @@ void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, #endif // Step 1: Construct left and right interface values using updated conserved variables - #ifdef PLMP - hipLaunchKernelGGL(PLMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, - gama, 0, n_fields); - hipLaunchKernelGGL(PLMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, - gama, 1, n_fields); - hipLaunchKernelGGL(PLMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, - gama, 2, n_fields); - #endif // PLMP - #ifdef PLMC - hipLaunchKernelGGL(PLMC_cuda<0>, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama); - hipLaunchKernelGGL(PLMC_cuda<1>, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama); - hipLaunchKernelGGL(PLMC_cuda<2>, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lz, Q_Rz, nx, ny, nz, dz, dt, gama); - #endif + #if defined(PLMP) or defined(PLMC) + hipLaunchKernelGGL(PLM_cuda<0>, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama); + hipLaunchKernelGGL(PLM_cuda<1>, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama); + hipLaunchKernelGGL(PLM_cuda<2>, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lz, Q_Rz, nx, ny, nz, dz, dt, gama); + #endif // PLMP or PLMC #ifdef PPMP hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); diff --git a/src/reconstruction/plmc_cuda.cu b/src/reconstruction/plm_cuda.cu similarity index 56% rename from src/reconstruction/plmc_cuda.cu rename to src/reconstruction/plm_cuda.cu index d036e3eca..078ab75fc 100644 --- a/src/reconstruction/plmc_cuda.cu +++ b/src/reconstruction/plm_cuda.cu @@ -1,13 +1,12 @@ -/*! \file plmc_cuda.cu - * \brief Definitions of the piecewise linear reconstruction functions with - limiting applied in the characteristic variables, as described - in Stone et al., 2008. */ +/*! \file plm_cuda.cu + * \brief Definitions of the piecewise linear reconstruction functions, as described in Stone et al., 2008. + */ #include #include "../global/global.h" #include "../global/global_cuda.h" -#include "../reconstruction/plmc_cuda.h" +#include "../reconstruction/plm_cuda.h" #include "../utils/cuda_utilities.h" #include "../utils/gpu.hpp" @@ -16,8 +15,8 @@ #endif // DE template -__global__ __launch_bounds__(TPB) void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, - int ny, int nz, Real dx, Real dt, Real gamma) +__global__ __launch_bounds__(TPB) void PLM_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, + int ny, int nz, Real dx, Real dt, Real gamma) { // get a thread ID int const thread_id = threadIdx.x + blockIdx.x * blockDim.x; @@ -30,7 +29,7 @@ __global__ __launch_bounds__(TPB) void PLMC_cuda(Real *dev_conserved, Real *dev_ } auto [interface_L_iph, interface_R_imh] = - reconstruction::PLMC_Reconstruction(dev_conserved, xid, yid, zid, nx, ny, nz, dx, dt, gamma); + reconstruction::PLM_Reconstruction(dev_conserved, xid, yid, zid, nx, ny, nz, dx, dt, gamma); // Set the field indices for the various directions int o1, o2, o3; @@ -61,12 +60,9 @@ __global__ __launch_bounds__(TPB) void PLMC_cuda(Real *dev_conserved, Real *dev_ } // Instantiate the relevant template specifications -template __global__ __launch_bounds__(TPB) void PLMC_cuda<0>(Real *dev_conserved, Real *dev_bounds_L, - Real *dev_bounds_R, int nx, int ny, int nz, Real dx, - Real dt, Real gamma); -template __global__ __launch_bounds__(TPB) void PLMC_cuda<1>(Real *dev_conserved, Real *dev_bounds_L, - Real *dev_bounds_R, int nx, int ny, int nz, Real dx, - Real dt, Real gamma); -template __global__ __launch_bounds__(TPB) void PLMC_cuda<2>(Real *dev_conserved, Real *dev_bounds_L, - Real *dev_bounds_R, int nx, int ny, int nz, Real dx, - Real dt, Real gamma); +template __global__ __launch_bounds__(TPB) void PLM_cuda<0>(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, + int nx, int ny, int nz, Real dx, Real dt, Real gamma); +template __global__ __launch_bounds__(TPB) void PLM_cuda<1>(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, + int nx, int ny, int nz, Real dx, Real dt, Real gamma); +template __global__ __launch_bounds__(TPB) void PLM_cuda<2>(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, + int nx, int ny, int nz, Real dx, Real dt, Real gamma); diff --git a/src/reconstruction/plmc_cuda.h b/src/reconstruction/plm_cuda.h similarity index 95% rename from src/reconstruction/plmc_cuda.h rename to src/reconstruction/plm_cuda.h index 43408ab39..15b7bc411 100644 --- a/src/reconstruction/plmc_cuda.h +++ b/src/reconstruction/plm_cuda.h @@ -1,6 +1,6 @@ -/*! \file plmc_cuda.h - * \brief Declarations of the cuda plm kernels, characteristic reconstruction - * version. */ +/*! \file plm_cuda.h + * \brief Declarations of the cuda plm kernels + */ #ifndef PLMC_CUDA_H #define PLMC_CUDA_H @@ -12,13 +12,13 @@ #include "../utils/mhd_utilities.h" /*! - * \brief Performs second order reconstruction using limiting in the characteristic variables + * \brief Performs second order reconstruction using limiting in the characteristic or primitive variables * * \tparam dir The direction that the solve is taking place in. 0=X, 1=Y, 2=Z */ template -__global__ __launch_bounds__(TPB) void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, - int ny, int nz, Real dx, Real dt, Real gamma); +__global__ __launch_bounds__(TPB) void PLM_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, + int ny, int nz, Real dx, Real dt, Real gamma); namespace reconstruction { @@ -212,9 +212,9 @@ void __device__ __host__ __inline__ PLMC_Characteristic_Evolution(hydro_utilitie * i-1/2 in that order. */ template -auto __device__ __inline__ PLMC_Reconstruction(Real *dev_conserved, int const xid, int const yid, int const zid, - int const nx, int const ny, int const nz, Real const dx, Real const dt, - Real const gamma) +auto __device__ __inline__ PLM_Reconstruction(Real *dev_conserved, int const xid, int const yid, int const zid, + int const nx, int const ny, int const nz, Real const dx, Real const dt, + Real const gamma) { // Compute the total number of cells int const n_cells = nx * ny * nz; diff --git a/src/reconstruction/plmc_cuda_tests.cu b/src/reconstruction/plm_cuda_tests.cu similarity index 95% rename from src/reconstruction/plmc_cuda_tests.cu rename to src/reconstruction/plm_cuda_tests.cu index 02f863c93..a5673ec30 100644 --- a/src/reconstruction/plmc_cuda_tests.cu +++ b/src/reconstruction/plm_cuda_tests.cu @@ -1,6 +1,6 @@ /*! - * \file plmc_cuda_tests.cu - * \brief Tests for the contents of plmc_cuda.h and plmc_cuda.cu + * \file plm_cuda_tests.cu + * \brief Tests for the contents of plm_cuda.h and plm_cuda.cu * */ @@ -18,7 +18,7 @@ #include "../global/global.h" #include "../io/io.h" -#include "../reconstruction/plmc_cuda.h" +#include "../reconstruction/plm_cuda.h" #include "../utils/DeviceVector.h" #include "../utils/hydro_utilities.h" #include "../utils/testing_utilities.h" @@ -150,15 +150,15 @@ TEST(tHYDROPlmcReconstructor, CorrectInputExpectCorrectOutput) // Launch kernel switch (direction) { case 0: - hipLaunchKernelGGL(PLMC_cuda<0>, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), + hipLaunchKernelGGL(PLM_cuda<0>, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), dev_interface_right.data(), nx_rot, ny_rot, nz_rot, dx, dt, gamma); break; case 1: - hipLaunchKernelGGL(PLMC_cuda<1>, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), + hipLaunchKernelGGL(PLM_cuda<1>, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), dev_interface_right.data(), nx_rot, ny_rot, nz_rot, dx, dt, gamma); break; case 2: - hipLaunchKernelGGL(PLMC_cuda<2>, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), + hipLaunchKernelGGL(PLM_cuda<2>, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), dev_interface_right.data(), nx_rot, ny_rot, nz_rot, dx, dt, gamma); break; } @@ -275,15 +275,15 @@ TEST(tMHDPlmcReconstructor, CorrectInputExpectCorrectOutput) // Launch kernel switch (direction) { case 0: - hipLaunchKernelGGL(PLMC_cuda<0>, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), + hipLaunchKernelGGL(PLM_cuda<0>, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), dev_interface_right.data(), nx, ny, nz, dx, dt, gamma); break; case 1: - hipLaunchKernelGGL(PLMC_cuda<1>, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), + hipLaunchKernelGGL(PLM_cuda<1>, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), dev_interface_right.data(), nx, ny, nz, dx, dt, gamma); break; case 2: - hipLaunchKernelGGL(PLMC_cuda<2>, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), + hipLaunchKernelGGL(PLM_cuda<2>, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), dev_interface_right.data(), nx, ny, nz, dx, dt, gamma); break; } diff --git a/src/reconstruction/plmp_cuda.cu b/src/reconstruction/plmp_cuda.cu deleted file mode 100644 index e8cfa0d09..000000000 --- a/src/reconstruction/plmp_cuda.cu +++ /dev/null @@ -1,365 +0,0 @@ -/*! \file plmp_cuda.cu - * \brief Definitions of the piecewise linear reconstruction functions for - with limiting in the primitive variables. */ - -#include - -#include "../global/global.h" -#include "../global/global_cuda.h" -#include "../reconstruction/plmp_cuda.h" -#include "../utils/gpu.hpp" - -#ifdef DE // PRESSURE_DE - #include "../utils/hydro_utilities.h" -#endif - -/*! \fn __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real - *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real - gamma, int dir, int n_fields) - * \brief When passed a stencil of conserved variables, returns the left and - right boundary values for the interface calculated using plm. */ -__global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, - int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields) -{ - int n_cells = nx * ny * nz; - int o1, o2, o3; - if (dir == 0) { - o1 = 1; - o2 = 2; - o3 = 3; - } - if (dir == 1) { - o1 = 2; - o2 = 3; - o3 = 1; - } - if (dir == 2) { - o1 = 3; - o2 = 1; - o3 = 2; - } - - // declare primitive variables in the stencil - Real d_i, vx_i, vy_i, vz_i, p_i; - Real d_imo, vx_imo, vy_imo, vz_imo, p_imo; - Real d_ipo, vx_ipo, vy_ipo, vz_ipo, p_ipo; - - // declare left and right interface values - Real d_L, vx_L, vy_L, vz_L, p_L; - Real d_R, vx_R, vy_R, vz_R, p_R; - - // declare conserved variables; - Real mx_L, my_L, mz_L, E_L; - Real mx_R, my_R, mz_R, E_R; - -#ifdef DE - Real ge_i, ge_imo, ge_ipo, ge_L, ge_R, dge_L, dge_R, E_kin, E, dge; -#endif // DE -#ifdef SCALAR - Real scalar_i[NSCALARS], scalar_imo[NSCALARS], scalar_ipo[NSCALARS]; - Real scalar_L[NSCALARS], scalar_R[NSCALARS], dscalar_L[NSCALARS], dscalar_R[NSCALARS]; -#endif // SCALAR - -#ifndef VL // Don't use velocities to reconstruct when using VL - Real dtodx = dt / dx; - Real dfl, dfr, mxfl, mxfr, myfl, myfr, mzfl, mzfr, Efl, Efr; - #ifdef DE - Real gefl, gefr; - #endif // DE - #ifdef SCALAR - Real scalarfl[NSCALARS], scalarfr[NSCALARS]; - #endif // SCALAR -#endif // VL - - // get a thread ID - int blockId = blockIdx.x + blockIdx.y * gridDim.x; - int tid = threadIdx.x + blockId * blockDim.x; - int id; - int zid = tid / (nx * ny); - int yid = (tid - zid * nx * ny) / nx; - int xid = tid - zid * nx * ny - yid * nx; - - int xs, xe, ys, ye, zs, ze; - if (dir == 0) { - xs = 1; - xe = nx - 2; - ys = 0; - ye = ny; - zs = 0; - ze = nz; - } - if (dir == 1) { - xs = 0; - xe = nx; - ys = 1; - ye = ny - 2; - zs = 0; - ze = nz; - } - if (dir == 2) { - xs = 0; - xe = nx; - ys = 0; - ye = ny; - zs = 1; - ze = nz - 2; - } - - if (xid >= xs && xid < xe && yid >= ys && yid < ye && zid >= zs && zid < ze) { - // load the 3-cell stencil into registers - // cell i - id = xid + yid * nx + zid * nx * ny; - d_i = dev_conserved[id]; - vx_i = dev_conserved[o1 * n_cells + id] / d_i; - vy_i = dev_conserved[o2 * n_cells + id] / d_i; - vz_i = dev_conserved[o3 * n_cells + id] / d_i; -#ifdef DE // PRESSURE_DE - E = dev_conserved[4 * n_cells + id]; - E_kin = 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i); - dge = dev_conserved[(n_fields - 1) * n_cells + id]; - p_i = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); -#else - p_i = (dev_conserved[4 * n_cells + id] - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * (gamma - 1.0); -#endif // PRESSURE_DE - p_i = fmax(p_i, (Real)TINY_NUMBER); -#ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - scalar_i[i] = dev_conserved[(5 + i) * n_cells + id] / d_i; - } -#endif // SCALAR -#ifdef DE - ge_i = dge / d_i; -#endif // DE - // cell i-1 - if (dir == 0) { - id = xid - 1 + yid * nx + zid * nx * ny; - } - if (dir == 1) { - id = xid + (yid - 1) * nx + zid * nx * ny; - } - if (dir == 2) { - id = xid + yid * nx + (zid - 1) * nx * ny; - } - d_imo = dev_conserved[id]; - vx_imo = dev_conserved[o1 * n_cells + id] / d_imo; - vy_imo = dev_conserved[o2 * n_cells + id] / d_imo; - vz_imo = dev_conserved[o3 * n_cells + id] / d_imo; -#ifdef DE // PRESSURE_DE - E = dev_conserved[4 * n_cells + id]; - E_kin = 0.5 * d_imo * (vx_imo * vx_imo + vy_imo * vy_imo + vz_imo * vz_imo); - dge = dev_conserved[(n_fields - 1) * n_cells + id]; - p_imo = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); -#else - p_imo = (dev_conserved[4 * n_cells + id] - 0.5 * d_imo * (vx_imo * vx_imo + vy_imo * vy_imo + vz_imo * vz_imo)) * - (gamma - 1.0); -#endif // PRESSURE_DE - p_imo = fmax(p_imo, (Real)TINY_NUMBER); -#ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - scalar_imo[i] = dev_conserved[(5 + i) * n_cells + id] / d_imo; - } -#endif // SCALAR -#ifdef DE - ge_imo = dge / d_imo; -#endif // DE - // cell i+1 - if (dir == 0) { - id = xid + 1 + yid * nx + zid * nx * ny; - } - if (dir == 1) { - id = xid + (yid + 1) * nx + zid * nx * ny; - } - if (dir == 2) { - id = xid + yid * nx + (zid + 1) * nx * ny; - } - d_ipo = dev_conserved[id]; - vx_ipo = dev_conserved[o1 * n_cells + id] / d_ipo; - vy_ipo = dev_conserved[o2 * n_cells + id] / d_ipo; - vz_ipo = dev_conserved[o3 * n_cells + id] / d_ipo; -#ifdef DE // PRESSURE_DE - E = dev_conserved[4 * n_cells + id]; - E_kin = 0.5 * d_ipo * (vx_ipo * vx_ipo + vy_ipo * vy_ipo + vz_ipo * vz_ipo); - dge = dev_conserved[(n_fields - 1) * n_cells + id]; - p_ipo = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); -#else - p_ipo = (dev_conserved[4 * n_cells + id] - 0.5 * d_ipo * (vx_ipo * vx_ipo + vy_ipo * vy_ipo + vz_ipo * vz_ipo)) * - (gamma - 1.0); -#endif // PRESSURE_DE - p_ipo = fmax(p_ipo, (Real)TINY_NUMBER); -#ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - scalar_ipo[i] = dev_conserved[(5 + i) * n_cells + id] / d_ipo; - } -#endif // SCALAR -#ifdef DE - ge_ipo = dge / d_ipo; -#endif // DE - - // Calculate the interface values for each primitive variable - Interface_Values_PLM(d_imo, d_i, d_ipo, &d_L, &d_R); - Interface_Values_PLM(vx_imo, vx_i, vx_ipo, &vx_L, &vx_R); - Interface_Values_PLM(vy_imo, vy_i, vy_ipo, &vy_L, &vy_R); - Interface_Values_PLM(vz_imo, vz_i, vz_ipo, &vz_L, &vz_R); - Interface_Values_PLM(p_imo, p_i, p_ipo, &p_L, &p_R); -#ifdef DE - Interface_Values_PLM(ge_imo, ge_i, ge_ipo, &ge_L, &ge_R); -#endif // DE -#ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - Interface_Values_PLM(scalar_imo[i], scalar_i[i], scalar_ipo[i], &scalar_L[i], &scalar_R[i]); - } -#endif // SCALAR - - // Apply mimimum constraints - d_L = fmax(d_L, (Real)TINY_NUMBER); - d_R = fmax(d_R, (Real)TINY_NUMBER); - p_L = fmax(p_L, (Real)TINY_NUMBER); - p_R = fmax(p_R, (Real)TINY_NUMBER); - - // calculate the conserved variables at each interface - mx_L = d_L * vx_L; - mx_R = d_R * vx_R; - my_L = d_L * vy_L; - my_R = d_R * vy_R; - mz_L = d_L * vz_L; - mz_R = d_R * vz_R; - E_L = p_L / (gamma - 1.0) + 0.5 * d_L * (vx_L * vx_L + vy_L * vy_L + vz_L * vz_L); - E_R = p_R / (gamma - 1.0) + 0.5 * d_R * (vx_R * vx_R + vy_R * vy_R + vz_R * vz_R); -#ifdef DE - dge_L = d_L * ge_L; - dge_R = d_R * ge_R; -#endif // DE -#ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - dscalar_L[i] = d_L * scalar_L[i]; - dscalar_R[i] = d_R * scalar_R[i]; - } -#endif // SCALAR - -// #ifdef CTU -#ifndef VL // Don't use velocities to reconstruct when using VL - // calculate fluxes for each variable - dfl = mx_L; - dfr = mx_R; - mxfl = mx_L * vx_L + p_L; - mxfr = mx_R * vx_R + p_R; - myfl = mx_L * vy_L; - myfr = mx_R * vy_R; - mzfl = mx_L * vz_L; - mzfr = mx_R * vz_R; - Efl = (E_L + p_L) * vx_L; - Efr = (E_R + p_R) * vx_R; - #ifdef DE - gefl = dge_L * vx_L; - gefr = dge_R * vx_R; - #endif // DE - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - scalarfl[i] = dscalar_L[i] * vx_L; - scalarfr[i] = dscalar_R[i] * vx_R; - } - #endif // SCALAR - - // Evolve the boundary extrapolated values half a timestep. - d_L += 0.5 * (dtodx) * (dfl - dfr); - d_R += 0.5 * (dtodx) * (dfl - dfr); - mx_L += 0.5 * (dtodx) * (mxfl - mxfr); - mx_R += 0.5 * (dtodx) * (mxfl - mxfr); - my_L += 0.5 * (dtodx) * (myfl - myfr); - my_R += 0.5 * (dtodx) * (myfl - myfr); - mz_L += 0.5 * (dtodx) * (mzfl - mzfr); - mz_R += 0.5 * (dtodx) * (mzfl - mzfr); - E_L += 0.5 * (dtodx) * (Efl - Efr); - E_R += 0.5 * (dtodx) * (Efl - Efr); - #ifdef DE - dge_L += 0.5 * (dtodx) * (gefl - gefr); - dge_R += 0.5 * (dtodx) * (gefl - gefr); - #endif // DE - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - dscalar_L[i] += 0.5 * (dtodx) * (scalarfl[i] - scalarfr[i]); - dscalar_R[i] += 0.5 * (dtodx) * (scalarfl[i] - scalarfr[i]); - } - #endif // SCALAR - -#endif // NO VL - - // Convert the left and right states in the primitive to the conserved - // variables send final values back from kernel bounds_R refers to the right - // side of the i-1/2 interface - if (dir == 0) { - id = xid - 1 + yid * nx + zid * nx * ny; - } - if (dir == 1) { - id = xid + (yid - 1) * nx + zid * nx * ny; - } - if (dir == 2) { - id = xid + yid * nx + (zid - 1) * nx * ny; - } - dev_bounds_R[id] = d_L; - dev_bounds_R[o1 * n_cells + id] = mx_L; - dev_bounds_R[o2 * n_cells + id] = my_L; - dev_bounds_R[o3 * n_cells + id] = mz_L; - dev_bounds_R[4 * n_cells + id] = E_L; -#ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - dev_bounds_R[(5 + i) * n_cells + id] = dscalar_L[i]; - } -#endif // SCALAR -#ifdef DE - dev_bounds_R[(n_fields - 1) * n_cells + id] = dge_L; -#endif // DE - // bounds_L refers to the left side of the i+1/2 interface - id = xid + yid * nx + zid * nx * ny; - dev_bounds_L[id] = d_R; - dev_bounds_L[o1 * n_cells + id] = mx_R; - dev_bounds_L[o2 * n_cells + id] = my_R; - dev_bounds_L[o3 * n_cells + id] = mz_R; - dev_bounds_L[4 * n_cells + id] = E_R; -#ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - dev_bounds_L[(5 + i) * n_cells + id] = dscalar_R[i]; - } -#endif // SCALAR -#ifdef DE - dev_bounds_L[(n_fields - 1) * n_cells + id] = dge_R; -#endif // DE - } -} - -__device__ void Interface_Values_PLM(Real q_imo, Real q_i, Real q_ipo, Real *q_L, Real *q_R) -{ - Real del_q_L, del_q_R, del_q_C, del_q_G; - Real lim_slope_a, lim_slope_b, del_q_m; - - // Compute the left, right, centered, and Van Leer differences of the - // primitive variables Note that here L and R refer to locations relative to - // the cell center - - // left - del_q_L = q_i - q_imo; - // right - del_q_R = q_ipo - q_i; - // centered - del_q_C = 0.5 * (q_ipo - q_imo); - // Van Leer - if (del_q_L * del_q_R > 0.0) { - del_q_G = 2.0 * del_q_L * del_q_R / (del_q_L + del_q_R); - } else { - del_q_G = 0.0; - } - - // Monotonize the differences - lim_slope_a = fmin(fabs(del_q_L), fabs(del_q_R)); - lim_slope_b = fmin(fabs(del_q_C), fabs(del_q_G)); - - // Minmod limiter - // del_q_m = sgn_CUDA(del_q_C)*fmin(2.0*lim_slope_a, fabs(del_q_C)); - - // Van Leer limiter - del_q_m = sgn_CUDA(del_q_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - - // Calculate the left and right interface values using the limited slopes - *q_L = q_i - 0.5 * del_q_m; - *q_R = q_i + 0.5 * del_q_m; -} diff --git a/src/reconstruction/plmp_cuda.h b/src/reconstruction/plmp_cuda.h deleted file mode 100644 index 34faa14df..000000000 --- a/src/reconstruction/plmp_cuda.h +++ /dev/null @@ -1,24 +0,0 @@ -/*! \file plmp_cuda.h - * \brief Declarations of the cuda plmp kernels. */ - -#ifndef PLMP_CUDA_H -#define PLMP_CUDA_H - -#include "../global/global.h" - -/*! \fn __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real - *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real - gamma, int dir, int n_fields) - * \brief When passed a stencil of conserved variables, returns the left and - right boundary values for the interface calculated using plmp. */ -__global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, - int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields); - -/*! \fn __device__ void Interface_Values_PLM(Real q_imo, Real q_i, Real q_ipo, - Real *q_L, Real *q_R) - * \brief Calculates the left and right interface values for a cell using - linear reconstruction in the primitive variables with Van Leer or Minmod slope - limiting. */ -__device__ void Interface_Values_PLM(Real q_imo, Real q_i, Real q_ipo, Real *q_L, Real *q_R); - -#endif // PLMP_CUDA_H diff --git a/src/utils/error_handling.cpp b/src/utils/error_handling.cpp index 629e31632..004254550 100644 --- a/src/utils/error_handling.cpp +++ b/src/utils/error_handling.cpp @@ -96,8 +96,8 @@ void Check_Configuration(Parameters const& P) #endif //! HLLD or EXACT or ROE or HLL or HLLC // May only use certain reconstructions - #if ((defined(PCM) + defined(PLMC) + defined(PPMC)) != 1) || defined(PLMP) || defined(PPMP) - #error "MHD only supports PCM, PLMC, and PPMC reconstruction" + #if ((defined(PCM) + defined(PLMP) + defined(PLMC) + defined(PPMC)) != 1) || defined(PPMP) + #error "MHD does not support PPMP reconstruction." #endif // Reconstruction check // must have HDF5 From 0f3f1a011c7f25be7bdb8e6d17ed7f8254f16cde Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 18 Apr 2024 15:54:51 -0400 Subject: [PATCH 18/29] Update PLMC tests to work with the new PLM kernel --- src/reconstruction/plm_cuda_tests.cu | 120 ++++++++++++++++++--------- 1 file changed, 82 insertions(+), 38 deletions(-) diff --git a/src/reconstruction/plm_cuda_tests.cu b/src/reconstruction/plm_cuda_tests.cu index a5673ec30..741583fbd 100644 --- a/src/reconstruction/plm_cuda_tests.cu +++ b/src/reconstruction/plm_cuda_tests.cu @@ -23,11 +23,11 @@ #include "../utils/hydro_utilities.h" #include "../utils/testing_utilities.h" -TEST(tHYDROPlmcReconstructor, CorrectInputExpectCorrectOutput) +TEST(tHYDROPlmReconstructor, CorrectInputExpectCorrectOutput) { #ifndef VL - std::cerr << "Warning: The tHYDROPlmcReconstructor.CorrectInputExpectCorrectOutput only supports the Van Leer (VL) " - "integrator" + std::cerr << "Warning: The tHYDROPlmReconstructor.CorrectInputExpectCorrectOutput test only supports the Van Leer " + "(VL) integrator" << std::endl; return; #endif // VL @@ -86,40 +86,51 @@ TEST(tHYDROPlmcReconstructor, CorrectInputExpectCorrectOutput) {345, 3.1670194578067843}, {349, 19.142817472509272}}}; - std::vector> fiducial_interface_right = - - {{{25, 3.8877922383184833}, - {26, 0.70033864721549188}, - {105, 1.594778794367564}, - {106, 3.0633780053857027}, - {185, 4.0069556576401011}, - {186, 2.1015872413794123}, - {265, 1.7883678016935782}, - {266, 3.9675148506537838}, - {345, 2.8032969746372531}, - {346, 21.091316282933843}}, - {{17, 0.43265217076853835}, - {33, 0.19457128219588618}, - {97, 3.2697645945288754}, - {113, 4.4286255636679313}, - {177, 0.07588397666718491}, - {193, 2.2851440769830953}, - {257, 0.91612950577699748}, - {273, 2.697375839048191}, - {337, 13.332201861384396}, - {353, 82.515887983144168}}, - {{5, 2.2863650183226212}, - {9, 1.686415421301841}, - {85, 0.72340346106443465}, - {89, 1.77925054463361}, - {165, 5.3997753452111859}, - {169, 1.4379190463124141}, - {245, 0.95177493689267167}, - {249, 0.46056494878491938}, - {325, 6.6889498465051398}, - {329, 1.6145084086614285}}} - - ; + std::vector> fiducial_interface_right = {{{25, 3.8877922383184833}, + {26, 0.70033864721549188}, + {105, 1.594778794367564}, + {106, 3.0633780053857027}, + {185, 4.0069556576401011}, + {186, 2.1015872413794123}, + {265, 1.7883678016935782}, + {266, 3.9675148506537838}, + {345, 2.8032969746372531}, + {346, 21.091316282933843}}, + {{17, 0.43265217076853835}, + {33, 0.19457128219588618}, + {97, 3.2697645945288754}, + {113, 4.4286255636679313}, + {177, 0.07588397666718491}, + {193, 2.2851440769830953}, + {257, 0.91612950577699748}, + {273, 2.697375839048191}, + {337, 13.332201861384396}, + {353, 82.515887983144168}}, + {{5, 2.2863650183226212}, + {9, 1.686415421301841}, + {85, 0.72340346106443465}, + {89, 1.77925054463361}, + {165, 5.3997753452111859}, + {169, 1.4379190463124141}, + {245, 0.95177493689267167}, + {249, 0.46056494878491938}, + {325, 6.6889498465051398}, + {329, 1.6145084086614285}}}; + +#ifdef PLMP + // Change the fiducial data for the PLMP version of this test. Not all elements need to be changed + fiducial_interface_left.at(2)[29] = 1.1600386335016277; + fiducial_interface_left.at(2)[109] = 4.0972120006692201; + fiducial_interface_left.at(2)[189] = 3.6963273197291162; + fiducial_interface_left.at(2)[269] = 0.31680991947688009; + fiducial_interface_left.at(2)[349] = 13.167815913968777; + + fiducial_interface_right.at(2)[9] = 2.2127922091020542; + fiducial_interface_right.at(2)[89] = 2.3346037361106178; + fiducial_interface_right.at(2)[169] = 1.8867332584893817; + fiducial_interface_right.at(2)[249] = 0.60431997809295868; + fiducial_interface_right.at(2)[329] = 2.1184410336202282; +#endif // PLMP // Loop over different directions for (size_t direction = 0; direction < 3; direction++) { @@ -193,7 +204,7 @@ TEST(tHYDROPlmcReconstructor, CorrectInputExpectCorrectOutput) } } -TEST(tMHDPlmcReconstructor, CorrectInputExpectCorrectOutput) +TEST(tMHDPlmReconstructor, CorrectInputExpectCorrectOutput) { // Set up PRNG to use std::mt19937_64 prng(42); @@ -266,6 +277,39 @@ TEST(tMHDPlmcReconstructor, CorrectInputExpectCorrectOutput) {389, 1.9034519507895218}, }}; +#ifdef PLMP + // Change the fiducial data for the PLMP version of this test. Not all elements need to be changed + fiducial_interface_left.at(1)[21] = 0.74780807318015607; + fiducial_interface_left.at(1)[85] = 2.7423587304689869; + fiducial_interface_left.at(1)[149] = 2.0740041194407692; + fiducial_interface_left.at(1)[213] = 1.2020911406758266; + fiducial_interface_left.at(1)[277] = 14.779549963507595; + fiducial_interface_left.at(1)[341] = 2.1583975283044725; + fiducial_interface_left.at(1)[405] = 1.7965456420437496; + fiducial_interface_left.at(2)[21] = 0.2849255850177943; + fiducial_interface_left.at(2)[85] = 2.2425026758932325; + fiducial_interface_left.at(2)[149] = 2.2953938046062077; + fiducial_interface_left.at(2)[213] = 0.57593655962650969; + fiducial_interface_left.at(2)[277] = 26.253543495427323; + fiducial_interface_left.at(2)[341] = 1.7033818819502551; + fiducial_interface_left.at(2)[405] = 1.9082633079592932; + + fiducial_interface_right.at(1)[17] = 0.43265217076853835; + fiducial_interface_right.at(1)[81] = 2.8178764819733342; + fiducial_interface_right.at(1)[145] = 2.6588068794446604; + fiducial_interface_right.at(1)[209] = 0.69548238395352202; + fiducial_interface_right.at(1)[273] = 23.035559922784643; + fiducial_interface_right.at(1)[337] = 2.1583975283044725; + fiducial_interface_right.at(1)[401] = 1.6102181218567606; + fiducial_interface_right.at(2)[5] = 0.89553465893090012; + fiducial_interface_right.at(2)[69] = 2.068452470340659; + fiducial_interface_right.at(2)[133] = 0.77257393218171133; + fiducial_interface_right.at(2)[197] = 1.0689231838445987; + fiducial_interface_right.at(2)[261] = 7.3918889587333432; + fiducial_interface_right.at(2)[325] = 1.7033818819502551; + fiducial_interface_right.at(2)[389] = 1.8093240100745669; +#endif // PLMP + // Loop over different directions for (size_t direction = 0; direction < 3; direction++) { // Allocate device buffers From 8749c1d75c2d8e08b8d3fc6e342d48da4a88203e Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Thu, 18 Apr 2024 16:21:24 -0400 Subject: [PATCH 19/29] Move the eigenvector computation so it's not in PLMP Moved the computation of the eigenvectors so it's not unnecerarily done in PLMP --- src/reconstruction/plm_cuda.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/reconstruction/plm_cuda.h b/src/reconstruction/plm_cuda.h index 15b7bc411..2a1d3153a 100644 --- a/src/reconstruction/plm_cuda.h +++ b/src/reconstruction/plm_cuda.h @@ -29,17 +29,18 @@ namespace reconstruction * \param[in] del_m The limited slopes * \param[in] dt The time step * \param[in] dx The cell size in the direction of solve - * \param[in] sound_speed The sound speed in cell i + * \param[in] gamma The adiabatic index * \param interface_R_imh The R interface at i-1/2 * \param interface_L_iph The L interface at i+1/2 */ void __device__ __host__ __inline__ PLMC_Characteristic_Evolution(hydro_utilities::Primitive const &cell_i, hydro_utilities::Primitive const &del_m, - Real const dt, Real const dx, Real const sound_speed, + Real const dt, Real const dx, Real const gamma, hydro_utilities::Primitive &interface_R_imh, hydro_utilities::Primitive &interface_L_iph) { - Real const dtodx = dt / dx; + Real const dtodx = dt / dx; + Real const sound_speed = hydro_utilities::Calc_Sound_Speed(cell_i.pressure, cell_i.density, gamma); // Compute the eigenvalues of the linearized equations in the // primitive variables using the cell-centered primitive variables @@ -234,9 +235,6 @@ auto __device__ __inline__ PLM_Reconstruction(Real *dev_conserved, int const xid dev_conserved, xid + int(direction == 0), yid + int(direction == 1), zid + int(direction == 2), nx, ny, n_cells, gamma); - // Compute the eigenvectors - reconstruction::EigenVecs const eigenvectors = reconstruction::Compute_Eigenvectors(cell_i, gamma); - // Compute the left, right, centered, and van Leer differences of the primitive variables Note that here L and R refer // to locations relative to the cell center @@ -253,6 +251,9 @@ auto __device__ __inline__ PLM_Reconstruction(Real *dev_conserved, int const xid hydro_utilities::Primitive const del_G = reconstruction::Compute_Van_Leer_Slope(del_L, del_R); #ifdef PLMC + // Compute the eigenvectors + reconstruction::EigenVecs const eigenvectors = reconstruction::Compute_Eigenvectors(cell_i, gamma); + // Project the left, right, centered and van Leer differences onto the // characteristic variables Stone Eqn 37 (del_a are differences in // characteristic variables, see Stone for notation) Use the eigenvectors @@ -297,7 +298,7 @@ auto __device__ __inline__ PLM_Reconstruction(Real *dev_conserved, int const xid // Do the characteristic tracing #ifndef VL - PLMC_Characteristic_Evolution(cell_i, del_m, dt, dx, eigenvectors.sound_speed, interface_R_imh, interface_L_iph); + PLMC_Characteristic_Evolution(cell_i, del_m, dt, dx, gamma, interface_R_imh, interface_L_iph); #endif // VL // apply minimum constraints From 170f8a8b5b62257f03a960a51b810654cad31417 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Wed, 24 Apr 2024 12:53:17 -0400 Subject: [PATCH 20/29] Rename PLMC_Characteristic_Tracing to PLM_... --- src/reconstruction/plm_cuda.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/reconstruction/plm_cuda.h b/src/reconstruction/plm_cuda.h index 2a1d3153a..c214661a9 100644 --- a/src/reconstruction/plm_cuda.h +++ b/src/reconstruction/plm_cuda.h @@ -33,11 +33,11 @@ namespace reconstruction * \param interface_R_imh The R interface at i-1/2 * \param interface_L_iph The L interface at i+1/2 */ -void __device__ __host__ __inline__ PLMC_Characteristic_Evolution(hydro_utilities::Primitive const &cell_i, - hydro_utilities::Primitive const &del_m, - Real const dt, Real const dx, Real const gamma, - hydro_utilities::Primitive &interface_R_imh, - hydro_utilities::Primitive &interface_L_iph) +void __device__ __host__ __inline__ PLM_Characteristic_Evolution(hydro_utilities::Primitive const &cell_i, + hydro_utilities::Primitive const &del_m, Real const dt, + Real const dx, Real const gamma, + hydro_utilities::Primitive &interface_R_imh, + hydro_utilities::Primitive &interface_L_iph) { Real const dtodx = dt / dx; Real const sound_speed = hydro_utilities::Calc_Sound_Speed(cell_i.pressure, cell_i.density, gamma); @@ -298,7 +298,7 @@ auto __device__ __inline__ PLM_Reconstruction(Real *dev_conserved, int const xid // Do the characteristic tracing #ifndef VL - PLMC_Characteristic_Evolution(cell_i, del_m, dt, dx, gamma, interface_R_imh, interface_L_iph); + PLM_Characteristic_Evolution(cell_i, del_m, dt, dx, gamma, interface_R_imh, interface_L_iph); #endif // VL // apply minimum constraints From 24b808267c06ea9085d9628f5d24d7366460854e Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 29 Apr 2024 15:21:11 -0400 Subject: [PATCH 21/29] Fix a bad rebase --- src/integrators/VL_3D_cuda.cu | 42 +++++----- src/reconstruction/plm_cuda.h | 80 +++++++++---------- src/reconstruction/reconstruction_internals.h | 14 ++-- .../reconstruction_internals_tests.cu | 23 +----- 4 files changed, 71 insertions(+), 88 deletions(-) diff --git a/src/integrators/VL_3D_cuda.cu b/src/integrators/VL_3D_cuda.cu index d8c8f5d78..952b69663 100644 --- a/src/integrators/VL_3D_cuda.cu +++ b/src/integrators/VL_3D_cuda.cu @@ -155,17 +155,14 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int cuda_utilities::AutomaticLaunchParams static const roe_pcm_launch_params( Calculate_Roe_Fluxes_CUDA, n_cells); hipLaunchKernelGGL(HIP_KERNEL_NAME(Calculate_Roe_Fluxes_CUDA), - roe_pcm_launch_params.get_numBlocks(), - roe_pcm_launch_params.get_threadsPerBlock(), 0, 0, dev_conserved, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_cells, gama, - n_fields); + roe_pcm_launch_params.get_numBlocks(), roe_pcm_launch_params.get_threadsPerBlock(), 0, 0, + dev_conserved, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_cells, gama, n_fields); hipLaunchKernelGGL(HIP_KERNEL_NAME(Calculate_Roe_Fluxes_CUDA), - roe_pcm_launch_params.get_numBlocks(), - roe_pcm_launch_params.get_threadsPerBlock(), 0, 0, dev_conserved, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_cells, gama, - n_fields); + roe_pcm_launch_params.get_numBlocks(), roe_pcm_launch_params.get_threadsPerBlock(), 0, 0, + dev_conserved, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_cells, gama, n_fields); hipLaunchKernelGGL(HIP_KERNEL_NAME(Calculate_Roe_Fluxes_CUDA), - roe_pcm_launch_params.get_numBlocks(), - roe_pcm_launch_params.get_threadsPerBlock(), 0, 0, dev_conserved, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_cells, gama, - n_fields); + roe_pcm_launch_params.get_numBlocks(), roe_pcm_launch_params.get_threadsPerBlock(), 0, 0, + dev_conserved, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_cells, gama, n_fields); #endif // ROE #ifdef HLLC cuda_utilities::AutomaticLaunchParams static const hllc_pcm_launch_params( @@ -290,29 +287,32 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int Calculate_Roe_Fluxes_CUDA, n_cells); hipLaunchKernelGGL(HIP_KERNEL_NAME(Calculate_Roe_Fluxes_CUDA), roe_higher_order_launch_params.get_numBlocks(), - roe_higher_order_launch_params.get_threadsPerBlock(), 0, 0, dev_conserved_half, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_cells, gama, - n_fields); + roe_higher_order_launch_params.get_threadsPerBlock(), 0, 0, dev_conserved_half, Q_Lx, Q_Rx, F_x, + nx, ny, nz, n_cells, gama, n_fields); hipLaunchKernelGGL(HIP_KERNEL_NAME(Calculate_Roe_Fluxes_CUDA), roe_higher_order_launch_params.get_numBlocks(), - roe_higher_order_launch_params.get_threadsPerBlock(), 0, 0, dev_conserved_half, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_cells, gama, - n_fields); + roe_higher_order_launch_params.get_threadsPerBlock(), 0, 0, dev_conserved_half, Q_Ly, Q_Ry, F_y, + nx, ny, nz, n_cells, gama, n_fields); hipLaunchKernelGGL(HIP_KERNEL_NAME(Calculate_Roe_Fluxes_CUDA), roe_higher_order_launch_params.get_numBlocks(), - roe_higher_order_launch_params.get_threadsPerBlock(), 0, 0, dev_conserved_half, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_cells, gama, - n_fields); + roe_higher_order_launch_params.get_threadsPerBlock(), 0, 0, dev_conserved_half, Q_Lz, Q_Rz, F_z, + nx, ny, nz, n_cells, gama, n_fields); #endif // ROE #ifdef HLLC cuda_utilities::AutomaticLaunchParams static const hllc_higher_order_launch_params( Calculate_HLLC_Fluxes_CUDA, n_cells); hipLaunchKernelGGL(HIP_KERNEL_NAME(Calculate_HLLC_Fluxes_CUDA), - hllc_higher_order_launch_params.numBlocks(), hllc_higher_order_launch_params.threadsPerBlock(), 0, 0, - dev_conserved_half, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_cells, gama, n_fields); + hllc_higher_order_launch_params.get_numBlocks(), + hllc_higher_order_launch_params.get_threadsPerBlock(), 0, 0, dev_conserved_half, Q_Lx, Q_Rx, F_x, + nx, ny, nz, n_cells, gama, n_fields); hipLaunchKernelGGL(HIP_KERNEL_NAME(Calculate_HLLC_Fluxes_CUDA), - hllc_higher_order_launch_params.numBlocks(), hllc_higher_order_launch_params.threadsPerBlock(), 0, 0, - dev_conserved_half, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_cells, gama, n_fields); + hllc_higher_order_launch_params.get_numBlocks(), + hllc_higher_order_launch_params.get_threadsPerBlock(), 0, 0, dev_conserved_half, Q_Ly, Q_Ry, F_y, + nx, ny, nz, n_cells, gama, n_fields); hipLaunchKernelGGL(HIP_KERNEL_NAME(Calculate_HLLC_Fluxes_CUDA), - hllc_higher_order_launch_params.numBlocks(), hllc_higher_order_launch_params.threadsPerBlock(), 0, 0, - dev_conserved_half, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_cells, gama, n_fields); + hllc_higher_order_launch_params.get_numBlocks(), + hllc_higher_order_launch_params.get_threadsPerBlock(), 0, 0, dev_conserved_half, Q_Lz, Q_Rz, F_z, + nx, ny, nz, n_cells, gama, n_fields); #endif // HLLC #ifdef HLL cuda_utilities::AutomaticLaunchParams static const hll_higher_order_launch_params( diff --git a/src/reconstruction/plm_cuda.h b/src/reconstruction/plm_cuda.h index c214661a9..cd6aced72 100644 --- a/src/reconstruction/plm_cuda.h +++ b/src/reconstruction/plm_cuda.h @@ -44,25 +44,25 @@ void __device__ __host__ __inline__ PLM_Characteristic_Evolution(hydro_utilities // Compute the eigenvalues of the linearized equations in the // primitive variables using the cell-centered primitive variables - Real const lambda_m = cell_i.velocity.x - sound_speed; - Real const lambda_0 = cell_i.velocity.x; - Real const lambda_p = cell_i.velocity.x + sound_speed; + Real const lambda_m = cell_i.velocity.x() - sound_speed; + Real const lambda_0 = cell_i.velocity.x(); + Real const lambda_p = cell_i.velocity.x() + sound_speed; // Integrate linear interpolation function over domain of dependence // defined by max(min) eigenvalue - Real qx = -0.5 * fmin(lambda_m, 0.0) * dtodx; - interface_R_imh.density = interface_R_imh.density + qx * del_m.density; - interface_R_imh.velocity.x = interface_R_imh.velocity.x + qx * del_m.velocity.x; - interface_R_imh.velocity.y = interface_R_imh.velocity.y + qx * del_m.velocity.y; - interface_R_imh.velocity.z = interface_R_imh.velocity.z + qx * del_m.velocity.z; - interface_R_imh.pressure = interface_R_imh.pressure + qx * del_m.pressure; - - qx = 0.5 * fmax(lambda_p, 0.0) * dtodx; - interface_L_iph.density = interface_L_iph.density - qx * del_m.density; - interface_L_iph.velocity.x = interface_L_iph.velocity.x - qx * del_m.velocity.x; - interface_L_iph.velocity.y = interface_L_iph.velocity.y - qx * del_m.velocity.y; - interface_L_iph.velocity.z = interface_L_iph.velocity.z - qx * del_m.velocity.z; - interface_L_iph.pressure = interface_L_iph.pressure - qx * del_m.pressure; + Real qx = -0.5 * fmin(lambda_m, 0.0) * dtodx; + interface_R_imh.density = interface_R_imh.density + qx * del_m.density; + interface_R_imh.velocity.x() = interface_R_imh.velocity.x() + qx * del_m.velocity.x(); + interface_R_imh.velocity.y() = interface_R_imh.velocity.y() + qx * del_m.velocity.y(); + interface_R_imh.velocity.z() = interface_R_imh.velocity.z() + qx * del_m.velocity.z(); + interface_R_imh.pressure = interface_R_imh.pressure + qx * del_m.pressure; + + qx = 0.5 * fmax(lambda_p, 0.0) * dtodx; + interface_L_iph.density = interface_L_iph.density - qx * del_m.density; + interface_L_iph.velocity.x() = interface_L_iph.velocity.x() - qx * del_m.velocity.x(); + interface_L_iph.velocity.y() = interface_L_iph.velocity.y() - qx * del_m.velocity.y(); + interface_L_iph.velocity.z() = interface_L_iph.velocity.z() - qx * del_m.velocity.z(); + interface_L_iph.pressure = interface_L_iph.pressure - qx * del_m.pressure; #ifdef DE interface_R_imh.gas_energy_specific = interface_R_imh.gas_energy_specific + qx * del_m.gas_energy_specific; @@ -93,17 +93,17 @@ void __device__ __host__ __inline__ PLM_Characteristic_Evolution(hydro_utilities if (lambda_m >= 0) { Real lamdiff = lambda_p - lambda_m; - sum_0 += lamdiff * (-cell_i.density * del_m.velocity.x / (2 * sound_speed) + + sum_0 += lamdiff * (-cell_i.density * del_m.velocity.x() / (2 * sound_speed) + del_m.pressure / (2 * sound_speed * sound_speed)); - sum_1 += lamdiff * (del_m.velocity.x / 2.0 - del_m.pressure / (2 * sound_speed * cell_i.density)); - sum_4 += lamdiff * (-cell_i.density * del_m.velocity.x * sound_speed / 2.0 + del_m.pressure / 2.0); + sum_1 += lamdiff * (del_m.velocity.x() / 2.0 - del_m.pressure / (2 * sound_speed * cell_i.density)); + sum_4 += lamdiff * (-cell_i.density * del_m.velocity.x() * sound_speed / 2.0 + del_m.pressure / 2.0); } if (lambda_0 >= 0) { Real lamdiff = lambda_p - lambda_0; sum_0 += lamdiff * (del_m.density - del_m.pressure / (sound_speed * sound_speed)); - sum_2 += lamdiff * del_m.velocity.y; - sum_3 += lamdiff * del_m.velocity.z; + sum_2 += lamdiff * del_m.velocity.y(); + sum_3 += lamdiff * del_m.velocity.z(); #ifdef DE sum_ge += lamdiff * del_m.gas_energy_specific; #endif // DE @@ -116,17 +116,17 @@ void __device__ __host__ __inline__ PLM_Characteristic_Evolution(hydro_utilities if (lambda_p >= 0) { Real lamdiff = lambda_p - lambda_p; - sum_0 += lamdiff * - (cell_i.density * del_m.velocity.x / (2 * sound_speed) + del_m.pressure / (2 * sound_speed * sound_speed)); - sum_1 += lamdiff * (del_m.velocity.x / 2.0 + del_m.pressure / (2 * sound_speed * cell_i.density)); - sum_4 += lamdiff * (cell_i.density * del_m.velocity.x * sound_speed / 2.0 + del_m.pressure / 2.0); + sum_0 += lamdiff * (cell_i.density * del_m.velocity.x() / (2 * sound_speed) + + del_m.pressure / (2 * sound_speed * sound_speed)); + sum_1 += lamdiff * (del_m.velocity.x() / 2.0 + del_m.pressure / (2 * sound_speed * cell_i.density)); + sum_4 += lamdiff * (cell_i.density * del_m.velocity.x() * sound_speed / 2.0 + del_m.pressure / 2.0); } // add the corrections to the initial guesses for the interface values interface_L_iph.density += 0.5 * dtodx * sum_0; - interface_L_iph.velocity.x += 0.5 * dtodx * sum_1; - interface_L_iph.velocity.y += 0.5 * dtodx * sum_2; - interface_L_iph.velocity.z += 0.5 * dtodx * sum_3; + interface_L_iph.velocity.x() += 0.5 * dtodx * sum_1; + interface_L_iph.velocity.y() += 0.5 * dtodx * sum_2; + interface_L_iph.velocity.z() += 0.5 * dtodx * sum_3; interface_L_iph.pressure += 0.5 * dtodx * sum_4; #ifdef DE interface_L_iph.gas_energy_specific += 0.5 * dtodx * sum_ge; @@ -150,17 +150,17 @@ void __device__ __host__ __inline__ PLM_Characteristic_Evolution(hydro_utilities if (lambda_m <= 0) { Real lamdiff = lambda_m - lambda_m; - sum_0 += lamdiff * (-cell_i.density * del_m.velocity.x / (2 * sound_speed) + + sum_0 += lamdiff * (-cell_i.density * del_m.velocity.x() / (2 * sound_speed) + del_m.pressure / (2 * sound_speed * sound_speed)); - sum_1 += lamdiff * (del_m.velocity.x / 2.0 - del_m.pressure / (2 * sound_speed * cell_i.density)); - sum_4 += lamdiff * (-cell_i.density * del_m.velocity.x * sound_speed / 2.0 + del_m.pressure / 2.0); + sum_1 += lamdiff * (del_m.velocity.x() / 2.0 - del_m.pressure / (2 * sound_speed * cell_i.density)); + sum_4 += lamdiff * (-cell_i.density * del_m.velocity.x() * sound_speed / 2.0 + del_m.pressure / 2.0); } if (lambda_0 <= 0) { Real lamdiff = lambda_m - lambda_0; sum_0 += lamdiff * (del_m.density - del_m.pressure / (sound_speed * sound_speed)); - sum_2 += lamdiff * del_m.velocity.y; - sum_3 += lamdiff * del_m.velocity.z; + sum_2 += lamdiff * del_m.velocity.y(); + sum_3 += lamdiff * del_m.velocity.z(); #ifdef DE sum_ge += lamdiff * del_m.gas_energy_specific; #endif // DE @@ -173,17 +173,17 @@ void __device__ __host__ __inline__ PLM_Characteristic_Evolution(hydro_utilities if (lambda_p <= 0) { Real lamdiff = lambda_m - lambda_p; - sum_0 += lamdiff * - (cell_i.density * del_m.velocity.x / (2 * sound_speed) + del_m.pressure / (2 * sound_speed * sound_speed)); - sum_1 += lamdiff * (del_m.velocity.x / 2.0 + del_m.pressure / (2 * sound_speed * cell_i.density)); - sum_4 += lamdiff * (cell_i.density * del_m.velocity.x * sound_speed / 2.0 + del_m.pressure / 2.0); + sum_0 += lamdiff * (cell_i.density * del_m.velocity.x() / (2 * sound_speed) + + del_m.pressure / (2 * sound_speed * sound_speed)); + sum_1 += lamdiff * (del_m.velocity.x() / 2.0 + del_m.pressure / (2 * sound_speed * cell_i.density)); + sum_4 += lamdiff * (cell_i.density * del_m.velocity.x() * sound_speed / 2.0 + del_m.pressure / 2.0); } // add the corrections interface_R_imh.density += 0.5 * dtodx * sum_0; - interface_R_imh.velocity.x += 0.5 * dtodx * sum_1; - interface_R_imh.velocity.y += 0.5 * dtodx * sum_2; - interface_R_imh.velocity.z += 0.5 * dtodx * sum_3; + interface_R_imh.velocity.x() += 0.5 * dtodx * sum_1; + interface_R_imh.velocity.y() += 0.5 * dtodx * sum_2; + interface_R_imh.velocity.z() += 0.5 * dtodx * sum_3; interface_R_imh.pressure += 0.5 * dtodx * sum_4; #ifdef DE interface_R_imh.gas_energy_specific += 0.5 * dtodx * sum_ge; diff --git a/src/reconstruction/reconstruction_internals.h b/src/reconstruction/reconstruction_internals.h index 5a2358dae..dbb624e04 100644 --- a/src/reconstruction/reconstruction_internals.h +++ b/src/reconstruction/reconstruction_internals.h @@ -638,15 +638,15 @@ hydro_utilities::Primitive __device__ __host__ __inline__ Van_Leer_Limiter(hydro hydro_utilities::Primitive del_m; // Monotonize the slopes - del_m.density = Van_Leer_Limiter(del_L.density, del_R.density, del_C.density, del_G.density); - del_m.velocity.x = Van_Leer_Limiter(del_L.velocity.x, del_R.velocity.x, del_C.velocity.x, del_G.velocity.x); - del_m.velocity.y = Van_Leer_Limiter(del_L.velocity.y, del_R.velocity.y, del_C.velocity.y, del_G.velocity.y); - del_m.velocity.z = Van_Leer_Limiter(del_L.velocity.z, del_R.velocity.z, del_C.velocity.z, del_G.velocity.z); - del_m.pressure = Van_Leer_Limiter(del_L.pressure, del_R.pressure, del_C.pressure, del_G.pressure); + del_m.density = Van_Leer_Limiter(del_L.density, del_R.density, del_C.density, del_G.density); + del_m.velocity.x() = Van_Leer_Limiter(del_L.velocity.x(), del_R.velocity.x(), del_C.velocity.x(), del_G.velocity.x()); + del_m.velocity.y() = Van_Leer_Limiter(del_L.velocity.y(), del_R.velocity.y(), del_C.velocity.y(), del_G.velocity.y()); + del_m.velocity.z() = Van_Leer_Limiter(del_L.velocity.z(), del_R.velocity.z(), del_C.velocity.z(), del_G.velocity.z()); + del_m.pressure = Van_Leer_Limiter(del_L.pressure, del_R.pressure, del_C.pressure, del_G.pressure); #ifdef MHD - del_m.magnetic.y = Van_Leer_Limiter(del_L.magnetic.y, del_R.magnetic.y, del_C.magnetic.y, del_G.magnetic.y); - del_m.magnetic.z = Van_Leer_Limiter(del_L.magnetic.z, del_R.magnetic.z, del_C.magnetic.z, del_G.magnetic.z); + del_m.magnetic.y() = Van_Leer_Limiter(del_L.magnetic.y(), del_R.magnetic.y(), del_C.magnetic.y(), del_G.magnetic.y()); + del_m.magnetic.z() = Van_Leer_Limiter(del_L.magnetic.z(), del_R.magnetic.z(), del_C.magnetic.z(), del_G.magnetic.z()); #endif // MHD #ifdef DE diff --git a/src/reconstruction/reconstruction_internals_tests.cu b/src/reconstruction/reconstruction_internals_tests.cu index 7177252f0..ec3c579d2 100644 --- a/src/reconstruction/reconstruction_internals_tests.cu +++ b/src/reconstruction/reconstruction_internals_tests.cu @@ -205,9 +205,9 @@ TEST(tHYDROReconstructionCharacteristic2Primitive, CorrectInputExpectCorrectOutp // Check results hydro_utilities::Primitive const fiducial_results{56, {11.547005383792516, 19, 20}, 316.66666666666674}; testing_utilities::Check_Results(fiducial_results.density, host_results.density, "density"); - testing_utilities::Check_Results(fiducial_results.velocity.x, host_results.velocity.x, "velocity.x"); - testing_utilities::Check_Results(fiducial_results.velocity.y, host_results.velocity.y, "velocity.y"); - testing_utilities::Check_Results(fiducial_results.velocity.z, host_results.velocity.z, "velocity.z"); + testing_utilities::Check_Results(fiducial_results.velocity.x(), host_results.velocity.x(), "velocity.x"); + testing_utilities::Check_Results(fiducial_results.velocity.y(), host_results.velocity.y(), "velocity.y"); + testing_utilities::Check_Results(fiducial_results.velocity.z(), host_results.velocity.z(), "velocity.z"); testing_utilities::Check_Results(fiducial_results.pressure, host_results.pressure, "pressure"); } @@ -445,23 +445,6 @@ TEST(tALLReconstructionVanLeerSlopeCharacteristic, CorrectInputExpectCorrectOutp testing_utilities::Check_Results(fiducial_data.a2, test_data.a2, "a2"); testing_utilities::Check_Results(fiducial_data.a3, test_data.a3, "a3"); testing_utilities::Check_Results(fiducial_data.a4, test_data.a4, "a4"); -#ifdef MHD - hydro_utilities::Primitive const fiducial_data{5046, {2934, -2526, -2828}, 1441532, {0.0, -69716, 72152}}; - testing_utilities::Check_Results(fiducial_data.density, host_results.density, "density"); - testing_utilities::Check_Results(fiducial_data.velocity.x(), host_results.velocity.x(), "velocity.x()"); - testing_utilities::Check_Results(fiducial_data.velocity.y(), host_results.velocity.y(), "velocity.y()"); - testing_utilities::Check_Results(fiducial_data.velocity.z(), host_results.velocity.z(), "velocity.z()"); - testing_utilities::Check_Results(fiducial_data.pressure, host_results.pressure, "pressure"); - testing_utilities::Check_Results(fiducial_data.magnetic.y(), host_results.magnetic.y(), "magnetic.y()"); - testing_utilities::Check_Results(fiducial_data.magnetic.z(), host_results.magnetic.z(), "magnetic.z()"); -#else // MHD - hydro_utilities::Primitive const fiducial_data{170, {68, 57, 58}, 32946}; - testing_utilities::Check_Results(fiducial_data.density, host_results.density, "density"); - testing_utilities::Check_Results(fiducial_data.velocity.x(), host_results.velocity.x(), "velocity.x()"); - testing_utilities::Check_Results(fiducial_data.velocity.y(), host_results.velocity.y(), "velocity.y()"); - testing_utilities::Check_Results(fiducial_data.velocity.z(), host_results.velocity.z(), "velocity.z()"); - testing_utilities::Check_Results(fiducial_data.pressure, host_results.pressure, "pressure"); -#endif // MHD } TEST(tHYDROReconstructionMonotizeParabolicInterface, CorrectInputExpectCorrectOutput) From f79db634a9e0cb8ccc46ed36e4f5330204e46af1 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 26 Feb 2024 15:51:08 -0500 Subject: [PATCH 22/29] Remove standalone PCM kernels --- src/reconstruction/pcm_cuda.h | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/reconstruction/pcm_cuda.h b/src/reconstruction/pcm_cuda.h index d1c593ff2..7f22efb30 100644 --- a/src/reconstruction/pcm_cuda.h +++ b/src/reconstruction/pcm_cuda.h @@ -7,18 +7,6 @@ #include "../utils/basic_structs.h" #include "../utils/hydro_utilities.h" -__global__ void PCM_Reconstruction_1D(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int n_cells, - int n_ghost, Real gamma, int n_fields); - -__global__ void PCM_Reconstruction_2D(Real *dev_conserved, Real *dev_bounds_Lx, Real *dev_bounds_Rx, - Real *dev_bounds_Ly, Real *dev_bounds_Ry, int nx, int ny, int n_ghost, Real gamma, - int n_fields); - -__global__ void PCM_Reconstruction_3D(Real *dev_conserved, Real *dev_bounds_Lx, Real *dev_bounds_Rx, - Real *dev_bounds_Ly, Real *dev_bounds_Ry, Real *dev_bounds_Lz, - Real *dev_bounds_Rz, int nx, int ny, int nz, int n_ghost, Real gamma, - int n_fields); - namespace reconstruction { /*! From 502f30a9f4d432359a88490e3b6c03e091e05848 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 4 Mar 2024 15:11:56 -0500 Subject: [PATCH 23/29] Refactor Monotonize_Characteristic_Return_Primitive Refactored this function to put the limiter into its own function, rename it to better reflect what it actually does, and move the characteristic to primitive conversion outside the function --- src/reconstruction/plmc_cuda.cu | 323 ++++++++++++++++++++++++++++++++ 1 file changed, 323 insertions(+) create mode 100644 src/reconstruction/plmc_cuda.cu diff --git a/src/reconstruction/plmc_cuda.cu b/src/reconstruction/plmc_cuda.cu new file mode 100644 index 000000000..180f77905 --- /dev/null +++ b/src/reconstruction/plmc_cuda.cu @@ -0,0 +1,323 @@ +/*! \file plmc_cuda.cu + * \brief Definitions of the piecewise linear reconstruction functions with + limiting applied in the characteristic variables, as described + in Stone et al., 2008. */ + +#include + +#include "../global/global.h" +#include "../global/global_cuda.h" +#include "../reconstruction/plmc_cuda.h" +#include "../reconstruction/reconstruction_internals.h" +#include "../utils/cuda_utilities.h" +#include "../utils/gpu.hpp" + +#ifdef DE // PRESSURE_DE + #include "../utils/hydro_utilities.h" +#endif // DE + +/*! \fn __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real + *dev_bounds_R, int nx, int ny, int nz, Real dx, Real dt, Real + gamma, int dir) + * \brief When passed a stencil of conserved variables, returns the left and + right boundary values for the interface calculated using plm. */ +template +__global__ __launch_bounds__(TPB) void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, + int ny, int nz, Real dx, Real dt, Real gamma, int n_fields) +{ + // get a thread ID + int const thread_id = threadIdx.x + blockIdx.x * blockDim.x; + int xid, yid, zid; + cuda_utilities::compute3DIndices(thread_id, nx, ny, xid, yid, zid); + + // Ensure that we are only operating on cells that will be used + if (reconstruction::Thread_Guard<2>(nx, ny, nz, xid, yid, zid)) { + return; + } + + // Compute the total number of cells + int const n_cells = nx * ny * nz; + + // Set the field indices for the various directions + int o1, o2, o3; + switch (dir) { + case 0: + o1 = grid_enum::momentum_x; + o2 = grid_enum::momentum_y; + o3 = grid_enum::momentum_z; + break; + case 1: + o1 = grid_enum::momentum_y; + o2 = grid_enum::momentum_z; + o3 = grid_enum::momentum_x; + break; + case 2: + o1 = grid_enum::momentum_z; + o2 = grid_enum::momentum_x; + o3 = grid_enum::momentum_y; + break; + } + + // load the 3-cell stencil into registers + // cell i + hydro_utilities::Primitive const cell_i = + hydro_utilities::Load_Cell_Primitive(dev_conserved, xid, yid, zid, nx, ny, n_cells, gamma); + + // cell i-1. The equality checks the direction and will subtract one from the correct direction + hydro_utilities::Primitive const cell_imo = hydro_utilities::Load_Cell_Primitive( + dev_conserved, xid - int(dir == 0), yid - int(dir == 1), zid - int(dir == 2), nx, ny, n_cells, gamma); + + // cell i+1. The equality checks the direction and add one to the correct direction + hydro_utilities::Primitive const cell_ipo = hydro_utilities::Load_Cell_Primitive( + dev_conserved, xid + int(dir == 0), yid + int(dir == 1), zid + int(dir == 2), nx, ny, n_cells, gamma); + + // calculate the adiabatic sound speed in cell i + Real const sound_speed = hydro_utilities::Calc_Sound_Speed(cell_i.pressure, cell_i.density, gamma); + Real const sound_speed_squared = sound_speed * sound_speed; + +// Compute the eigenvectors +#ifdef MHD + reconstruction::EigenVecs const eigenvectors = + reconstruction::Compute_Eigenvectors(cell_i, sound_speed, sound_speed_squared, gamma); +#else + reconstruction::EigenVecs eigenvectors; +#endif // MHD + + // Compute the left, right, centered, and van Leer differences of the + // primitive variables Note that here L and R refer to locations relative to + // the cell center + + // left + hydro_utilities::Primitive const del_L = reconstruction::Compute_Slope(cell_imo, cell_i); + + // right + hydro_utilities::Primitive const del_R = reconstruction::Compute_Slope(cell_i, cell_ipo); + + // centered + hydro_utilities::Primitive const del_C = reconstruction::Compute_Slope(cell_imo, cell_ipo, 0.5); + + // Van Leer + hydro_utilities::Primitive const del_G = reconstruction::Compute_Van_Leer_Slope(del_L, del_R); + + // Project the left, right, centered and van Leer differences onto the + // characteristic variables Stone Eqn 37 (del_a are differences in + // characteristic variables, see Stone for notation) Use the eigenvectors + // given in Stone 2008, Appendix A + reconstruction::Characteristic const del_a_L = + reconstruction::Primitive_To_Characteristic(cell_i, del_L, eigenvectors, sound_speed, sound_speed_squared, gamma); + + reconstruction::Characteristic const del_a_R = + reconstruction::Primitive_To_Characteristic(cell_i, del_R, eigenvectors, sound_speed, sound_speed_squared, gamma); + + reconstruction::Characteristic const del_a_C = + reconstruction::Primitive_To_Characteristic(cell_i, del_C, eigenvectors, sound_speed, sound_speed_squared, gamma); + + reconstruction::Characteristic const del_a_G = + reconstruction::Primitive_To_Characteristic(cell_i, del_G, eigenvectors, sound_speed, sound_speed_squared, gamma); + + // Apply monotonicity constraints to the differences in the characteristic variables and project the monotonized + // difference in the characteristic variables back onto the primitive variables Stone Eqn 39 + reconstruction::Characteristic const del_a_m = reconstruction::Van_Leer_Limiter(del_a_L, del_a_R, del_a_C, del_a_G); + + // Project back into the primitive variables. + hydro_utilities::Primitive del_m = + Characteristic_To_Primitive(cell_i, del_a_m, eigenvectors, sound_speed, sound_speed_squared, gamma); + + // Limit the variables that aren't transformed by the characteristic projection +#ifdef DE + del_m.gas_energy_specific = Van_Leer_Limiter(del_L.gas_energy_specific, del_R.gas_energy_specific, + del_C.gas_energy_specific, del_G.gas_energy_specific); +#endif // DE +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + del_m.scalar_specific[i] = Van_Leer_Limiter(del_L.scalar_specific[i], del_R.scalar_specific[i], + del_C.scalar_specific[i], del_G.scalar_specific[i]); + } +#endif // SCALAR + + // Compute the left and right interface values using the monotonized difference in the primitive variables + hydro_utilities::Primitive interface_L_iph = reconstruction::Calc_Interface_Linear(cell_i, del_m, 1.0); + hydro_utilities::Primitive interface_R_imh = reconstruction::Calc_Interface_Linear(cell_i, del_m, -1.0); + +#ifndef VL + + Real const dtodx = dt / dx; + + // Compute the eigenvalues of the linearized equations in the + // primitive variables using the cell-centered primitive variables + Real const lambda_m = cell_i.velocity.x - sound_speed; + Real const lambda_0 = cell_i.velocity.x; + Real const lambda_p = cell_i.velocity.x + sound_speed; + + // Integrate linear interpolation function over domain of dependence + // defined by max(min) eigenvalue + Real qx = -0.5 * fmin(lambda_m, 0.0) * dtodx; + interface_R_imh.density = interface_R_imh.density + qx * del_m.density; + interface_R_imh.velocity.x = interface_R_imh.velocity.x + qx * del_m.velocity.x; + interface_R_imh.velocity.y = interface_R_imh.velocity.y + qx * del_m.velocity.y; + interface_R_imh.velocity.z = interface_R_imh.velocity.z + qx * del_m.velocity.z; + interface_R_imh.pressure = interface_R_imh.pressure + qx * del_m.pressure; + + qx = 0.5 * fmax(lambda_p, 0.0) * dtodx; + interface_L_iph.density = interface_L_iph.density - qx * del_m.density; + interface_L_iph.velocity.x = interface_L_iph.velocity.x - qx * del_m.velocity.x; + interface_L_iph.velocity.y = interface_L_iph.velocity.y - qx * del_m.velocity.y; + interface_L_iph.velocity.z = interface_L_iph.velocity.z - qx * del_m.velocity.z; + interface_L_iph.pressure = interface_L_iph.pressure - qx * del_m.pressure; + + #ifdef DE + interface_R_imh.gas_energy_specific = interface_R_imh.gas_energy_specific + qx * del_m.gas_energy_specific; + interface_L_iph.gas_energy_specific = interface_L_iph.gas_energy_specific - qx * del_m.gas_energy_specific; + #endif // DE + + #ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + interface_R_imh.scalar[i] = interface_R_imh.scalar[i] + qx * del_m.scalar[i]; + interface_L_iph.scalar[i] = interface_L_iph.scalar[i] - qx * del_m.scalar[i]; + } + #endif // SCALAR + + // Perform the characteristic tracing + // Stone Eqns 42 & 43 + + // left-hand interface value, i+1/2 + Real sum_0 = 0.0, sum_1 = 0.0, sum_2 = 0.0, sum_3 = 0.0, sum_4 = 0.0; + #ifdef DE + Real sum_ge = 0; + #endif // DE + #ifdef SCALAR + Real sum_scalar[NSCALARS]; + for (int i = 0; i < NSCALARS; i++) { + sum_scalar[i] = 0.0; + } + #endif // SCALAR + if (lambda_m >= 0) { + Real lamdiff = lambda_p - lambda_m; + + sum_0 += + lamdiff * (-cell_i.density * del_m.velocity.x / (2 * sound_speed) + del_m.pressure / (2 * sound_speed_squared)); + sum_1 += lamdiff * (del_m.velocity.x / 2.0 - del_m.pressure / (2 * sound_speed * cell_i.density)); + sum_4 += lamdiff * (-cell_i.density * del_m.velocity.x * sound_speed / 2.0 + del_m.pressure / 2.0); + } + if (lambda_0 >= 0) { + Real lamdiff = lambda_p - lambda_0; + + sum_0 += lamdiff * (del_m.density - del_m.pressure / (sound_speed_squared)); + sum_2 += lamdiff * del_m.velocity.y; + sum_3 += lamdiff * del_m.velocity.z; + #ifdef DE + sum_ge += lamdiff * del_m.gas_energy_specific; + #endif // DE + #ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + sum_scalar[i] += lamdiff * del_m.scalar[i]; + } + #endif // SCALAR + } + if (lambda_p >= 0) { + Real lamdiff = lambda_p - lambda_p; + + sum_0 += + lamdiff * (cell_i.density * del_m.velocity.x / (2 * sound_speed) + del_m.pressure / (2 * sound_speed_squared)); + sum_1 += lamdiff * (del_m.velocity.x / 2.0 + del_m.pressure / (2 * sound_speed * cell_i.density)); + sum_4 += lamdiff * (cell_i.density * del_m.velocity.x * sound_speed / 2.0 + del_m.pressure / 2.0); + } + + // add the corrections to the initial guesses for the interface values + interface_L_iph.density += 0.5 * dtodx * sum_0; + interface_L_iph.velocity.x += 0.5 * dtodx * sum_1; + interface_L_iph.velocity.y += 0.5 * dtodx * sum_2; + interface_L_iph.velocity.z += 0.5 * dtodx * sum_3; + interface_L_iph.pressure += 0.5 * dtodx * sum_4; + #ifdef DE + interface_L_iph.gas_energy_specific += 0.5 * dtodx * sum_ge; + #endif // DE + #ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + interface_L_iph.scalar[i] += 0.5 * dtodx * sum_scalar[i]; + } + #endif // SCALAR + + // right-hand interface value, i-1/2 + sum_0 = sum_1 = sum_2 = sum_3 = sum_4 = 0; + #ifdef DE + sum_ge = 0; + #endif // DE + #ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + sum_scalar[i] = 0; + } + #endif // SCALAR + if (lambda_m <= 0) { + Real lamdiff = lambda_m - lambda_m; + + sum_0 += + lamdiff * (-cell_i.density * del_m.velocity.x / (2 * sound_speed) + del_m.pressure / (2 * sound_speed_squared)); + sum_1 += lamdiff * (del_m.velocity.x / 2.0 - del_m.pressure / (2 * sound_speed * cell_i.density)); + sum_4 += lamdiff * (-cell_i.density * del_m.velocity.x * sound_speed / 2.0 + del_m.pressure / 2.0); + } + if (lambda_0 <= 0) { + Real lamdiff = lambda_m - lambda_0; + + sum_0 += lamdiff * (del_m.density - del_m.pressure / (sound_speed_squared)); + sum_2 += lamdiff * del_m.velocity.y; + sum_3 += lamdiff * del_m.velocity.z; + #ifdef DE + sum_ge += lamdiff * del_m.gas_energy_specific; + #endif // DE + #ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + sum_scalar[i] += lamdiff * del_m.scalar[i]; + } + #endif // SCALAR + } + if (lambda_p <= 0) { + Real lamdiff = lambda_m - lambda_p; + + sum_0 += + lamdiff * (cell_i.density * del_m.velocity.x / (2 * sound_speed) + del_m.pressure / (2 * sound_speed_squared)); + sum_1 += lamdiff * (del_m.velocity.x / 2.0 + del_m.pressure / (2 * sound_speed * cell_i.density)); + sum_4 += lamdiff * (cell_i.density * del_m.velocity.x * sound_speed / 2.0 + del_m.pressure / 2.0); + } + + // add the corrections + interface_R_imh.density += 0.5 * dtodx * sum_0; + interface_R_imh.velocity.x += 0.5 * dtodx * sum_1; + interface_R_imh.velocity.y += 0.5 * dtodx * sum_2; + interface_R_imh.velocity.z += 0.5 * dtodx * sum_3; + interface_R_imh.pressure += 0.5 * dtodx * sum_4; + #ifdef DE + interface_R_imh.gas_energy_specific += 0.5 * dtodx * sum_ge; + #endif // DE + #ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + interface_R_imh.scalar[i] += 0.5 * dtodx * sum_scalar[i]; + } + #endif // SCALAR +#endif // CTU + + // apply minimum constraints + interface_R_imh.density = fmax(interface_R_imh.density, (Real)TINY_NUMBER); + interface_L_iph.density = fmax(interface_L_iph.density, (Real)TINY_NUMBER); + interface_R_imh.pressure = fmax(interface_R_imh.pressure, (Real)TINY_NUMBER); + interface_L_iph.pressure = fmax(interface_L_iph.pressure, (Real)TINY_NUMBER); + + // Convert the left and right states in the primitive to the conserved variables send final values back from kernel + // bounds_R refers to the right side of the i-1/2 interface + size_t id = cuda_utilities::compute1DIndex(xid, yid, zid, nx, ny); + reconstruction::Write_Data(interface_L_iph, dev_bounds_L, dev_conserved, id, n_cells, o1, o2, o3, gamma); + + id = cuda_utilities::compute1DIndex(xid - int(dir == 0), yid - int(dir == 1), zid - int(dir == 2), nx, ny); + reconstruction::Write_Data(interface_R_imh, dev_bounds_R, dev_conserved, id, n_cells, o1, o2, o3, gamma); +} + +// Instantiate the relevant template specifications +template __global__ __launch_bounds__(TPB) void PLMC_cuda<0>(Real *dev_conserved, Real *dev_bounds_L, + Real *dev_bounds_R, int nx, int ny, int nz, Real dx, + Real dt, Real gamma, int n_fields); +template __global__ __launch_bounds__(TPB) void PLMC_cuda<1>(Real *dev_conserved, Real *dev_bounds_L, + Real *dev_bounds_R, int nx, int ny, int nz, Real dx, + Real dt, Real gamma, int n_fields); +template __global__ __launch_bounds__(TPB) void PLMC_cuda<2>(Real *dev_conserved, Real *dev_bounds_L, + Real *dev_bounds_R, int nx, int ny, int nz, Real dx, + Real dt, Real gamma, int n_fields); From 35ce51263bee989737b75377abb263210f70207b Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 4 Mar 2024 16:03:26 -0500 Subject: [PATCH 24/29] Move sound_speed into EigenVecs Sound speed plays a similar role to the other members of EigenVecs so including it makes sense rather than passing around extra arguments --- src/reconstruction/plmc_cuda.cu | 323 -------------------------------- 1 file changed, 323 deletions(-) delete mode 100644 src/reconstruction/plmc_cuda.cu diff --git a/src/reconstruction/plmc_cuda.cu b/src/reconstruction/plmc_cuda.cu deleted file mode 100644 index 180f77905..000000000 --- a/src/reconstruction/plmc_cuda.cu +++ /dev/null @@ -1,323 +0,0 @@ -/*! \file plmc_cuda.cu - * \brief Definitions of the piecewise linear reconstruction functions with - limiting applied in the characteristic variables, as described - in Stone et al., 2008. */ - -#include - -#include "../global/global.h" -#include "../global/global_cuda.h" -#include "../reconstruction/plmc_cuda.h" -#include "../reconstruction/reconstruction_internals.h" -#include "../utils/cuda_utilities.h" -#include "../utils/gpu.hpp" - -#ifdef DE // PRESSURE_DE - #include "../utils/hydro_utilities.h" -#endif // DE - -/*! \fn __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real - *dev_bounds_R, int nx, int ny, int nz, Real dx, Real dt, Real - gamma, int dir) - * \brief When passed a stencil of conserved variables, returns the left and - right boundary values for the interface calculated using plm. */ -template -__global__ __launch_bounds__(TPB) void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, - int ny, int nz, Real dx, Real dt, Real gamma, int n_fields) -{ - // get a thread ID - int const thread_id = threadIdx.x + blockIdx.x * blockDim.x; - int xid, yid, zid; - cuda_utilities::compute3DIndices(thread_id, nx, ny, xid, yid, zid); - - // Ensure that we are only operating on cells that will be used - if (reconstruction::Thread_Guard<2>(nx, ny, nz, xid, yid, zid)) { - return; - } - - // Compute the total number of cells - int const n_cells = nx * ny * nz; - - // Set the field indices for the various directions - int o1, o2, o3; - switch (dir) { - case 0: - o1 = grid_enum::momentum_x; - o2 = grid_enum::momentum_y; - o3 = grid_enum::momentum_z; - break; - case 1: - o1 = grid_enum::momentum_y; - o2 = grid_enum::momentum_z; - o3 = grid_enum::momentum_x; - break; - case 2: - o1 = grid_enum::momentum_z; - o2 = grid_enum::momentum_x; - o3 = grid_enum::momentum_y; - break; - } - - // load the 3-cell stencil into registers - // cell i - hydro_utilities::Primitive const cell_i = - hydro_utilities::Load_Cell_Primitive(dev_conserved, xid, yid, zid, nx, ny, n_cells, gamma); - - // cell i-1. The equality checks the direction and will subtract one from the correct direction - hydro_utilities::Primitive const cell_imo = hydro_utilities::Load_Cell_Primitive( - dev_conserved, xid - int(dir == 0), yid - int(dir == 1), zid - int(dir == 2), nx, ny, n_cells, gamma); - - // cell i+1. The equality checks the direction and add one to the correct direction - hydro_utilities::Primitive const cell_ipo = hydro_utilities::Load_Cell_Primitive( - dev_conserved, xid + int(dir == 0), yid + int(dir == 1), zid + int(dir == 2), nx, ny, n_cells, gamma); - - // calculate the adiabatic sound speed in cell i - Real const sound_speed = hydro_utilities::Calc_Sound_Speed(cell_i.pressure, cell_i.density, gamma); - Real const sound_speed_squared = sound_speed * sound_speed; - -// Compute the eigenvectors -#ifdef MHD - reconstruction::EigenVecs const eigenvectors = - reconstruction::Compute_Eigenvectors(cell_i, sound_speed, sound_speed_squared, gamma); -#else - reconstruction::EigenVecs eigenvectors; -#endif // MHD - - // Compute the left, right, centered, and van Leer differences of the - // primitive variables Note that here L and R refer to locations relative to - // the cell center - - // left - hydro_utilities::Primitive const del_L = reconstruction::Compute_Slope(cell_imo, cell_i); - - // right - hydro_utilities::Primitive const del_R = reconstruction::Compute_Slope(cell_i, cell_ipo); - - // centered - hydro_utilities::Primitive const del_C = reconstruction::Compute_Slope(cell_imo, cell_ipo, 0.5); - - // Van Leer - hydro_utilities::Primitive const del_G = reconstruction::Compute_Van_Leer_Slope(del_L, del_R); - - // Project the left, right, centered and van Leer differences onto the - // characteristic variables Stone Eqn 37 (del_a are differences in - // characteristic variables, see Stone for notation) Use the eigenvectors - // given in Stone 2008, Appendix A - reconstruction::Characteristic const del_a_L = - reconstruction::Primitive_To_Characteristic(cell_i, del_L, eigenvectors, sound_speed, sound_speed_squared, gamma); - - reconstruction::Characteristic const del_a_R = - reconstruction::Primitive_To_Characteristic(cell_i, del_R, eigenvectors, sound_speed, sound_speed_squared, gamma); - - reconstruction::Characteristic const del_a_C = - reconstruction::Primitive_To_Characteristic(cell_i, del_C, eigenvectors, sound_speed, sound_speed_squared, gamma); - - reconstruction::Characteristic const del_a_G = - reconstruction::Primitive_To_Characteristic(cell_i, del_G, eigenvectors, sound_speed, sound_speed_squared, gamma); - - // Apply monotonicity constraints to the differences in the characteristic variables and project the monotonized - // difference in the characteristic variables back onto the primitive variables Stone Eqn 39 - reconstruction::Characteristic const del_a_m = reconstruction::Van_Leer_Limiter(del_a_L, del_a_R, del_a_C, del_a_G); - - // Project back into the primitive variables. - hydro_utilities::Primitive del_m = - Characteristic_To_Primitive(cell_i, del_a_m, eigenvectors, sound_speed, sound_speed_squared, gamma); - - // Limit the variables that aren't transformed by the characteristic projection -#ifdef DE - del_m.gas_energy_specific = Van_Leer_Limiter(del_L.gas_energy_specific, del_R.gas_energy_specific, - del_C.gas_energy_specific, del_G.gas_energy_specific); -#endif // DE -#ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - del_m.scalar_specific[i] = Van_Leer_Limiter(del_L.scalar_specific[i], del_R.scalar_specific[i], - del_C.scalar_specific[i], del_G.scalar_specific[i]); - } -#endif // SCALAR - - // Compute the left and right interface values using the monotonized difference in the primitive variables - hydro_utilities::Primitive interface_L_iph = reconstruction::Calc_Interface_Linear(cell_i, del_m, 1.0); - hydro_utilities::Primitive interface_R_imh = reconstruction::Calc_Interface_Linear(cell_i, del_m, -1.0); - -#ifndef VL - - Real const dtodx = dt / dx; - - // Compute the eigenvalues of the linearized equations in the - // primitive variables using the cell-centered primitive variables - Real const lambda_m = cell_i.velocity.x - sound_speed; - Real const lambda_0 = cell_i.velocity.x; - Real const lambda_p = cell_i.velocity.x + sound_speed; - - // Integrate linear interpolation function over domain of dependence - // defined by max(min) eigenvalue - Real qx = -0.5 * fmin(lambda_m, 0.0) * dtodx; - interface_R_imh.density = interface_R_imh.density + qx * del_m.density; - interface_R_imh.velocity.x = interface_R_imh.velocity.x + qx * del_m.velocity.x; - interface_R_imh.velocity.y = interface_R_imh.velocity.y + qx * del_m.velocity.y; - interface_R_imh.velocity.z = interface_R_imh.velocity.z + qx * del_m.velocity.z; - interface_R_imh.pressure = interface_R_imh.pressure + qx * del_m.pressure; - - qx = 0.5 * fmax(lambda_p, 0.0) * dtodx; - interface_L_iph.density = interface_L_iph.density - qx * del_m.density; - interface_L_iph.velocity.x = interface_L_iph.velocity.x - qx * del_m.velocity.x; - interface_L_iph.velocity.y = interface_L_iph.velocity.y - qx * del_m.velocity.y; - interface_L_iph.velocity.z = interface_L_iph.velocity.z - qx * del_m.velocity.z; - interface_L_iph.pressure = interface_L_iph.pressure - qx * del_m.pressure; - - #ifdef DE - interface_R_imh.gas_energy_specific = interface_R_imh.gas_energy_specific + qx * del_m.gas_energy_specific; - interface_L_iph.gas_energy_specific = interface_L_iph.gas_energy_specific - qx * del_m.gas_energy_specific; - #endif // DE - - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - interface_R_imh.scalar[i] = interface_R_imh.scalar[i] + qx * del_m.scalar[i]; - interface_L_iph.scalar[i] = interface_L_iph.scalar[i] - qx * del_m.scalar[i]; - } - #endif // SCALAR - - // Perform the characteristic tracing - // Stone Eqns 42 & 43 - - // left-hand interface value, i+1/2 - Real sum_0 = 0.0, sum_1 = 0.0, sum_2 = 0.0, sum_3 = 0.0, sum_4 = 0.0; - #ifdef DE - Real sum_ge = 0; - #endif // DE - #ifdef SCALAR - Real sum_scalar[NSCALARS]; - for (int i = 0; i < NSCALARS; i++) { - sum_scalar[i] = 0.0; - } - #endif // SCALAR - if (lambda_m >= 0) { - Real lamdiff = lambda_p - lambda_m; - - sum_0 += - lamdiff * (-cell_i.density * del_m.velocity.x / (2 * sound_speed) + del_m.pressure / (2 * sound_speed_squared)); - sum_1 += lamdiff * (del_m.velocity.x / 2.0 - del_m.pressure / (2 * sound_speed * cell_i.density)); - sum_4 += lamdiff * (-cell_i.density * del_m.velocity.x * sound_speed / 2.0 + del_m.pressure / 2.0); - } - if (lambda_0 >= 0) { - Real lamdiff = lambda_p - lambda_0; - - sum_0 += lamdiff * (del_m.density - del_m.pressure / (sound_speed_squared)); - sum_2 += lamdiff * del_m.velocity.y; - sum_3 += lamdiff * del_m.velocity.z; - #ifdef DE - sum_ge += lamdiff * del_m.gas_energy_specific; - #endif // DE - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - sum_scalar[i] += lamdiff * del_m.scalar[i]; - } - #endif // SCALAR - } - if (lambda_p >= 0) { - Real lamdiff = lambda_p - lambda_p; - - sum_0 += - lamdiff * (cell_i.density * del_m.velocity.x / (2 * sound_speed) + del_m.pressure / (2 * sound_speed_squared)); - sum_1 += lamdiff * (del_m.velocity.x / 2.0 + del_m.pressure / (2 * sound_speed * cell_i.density)); - sum_4 += lamdiff * (cell_i.density * del_m.velocity.x * sound_speed / 2.0 + del_m.pressure / 2.0); - } - - // add the corrections to the initial guesses for the interface values - interface_L_iph.density += 0.5 * dtodx * sum_0; - interface_L_iph.velocity.x += 0.5 * dtodx * sum_1; - interface_L_iph.velocity.y += 0.5 * dtodx * sum_2; - interface_L_iph.velocity.z += 0.5 * dtodx * sum_3; - interface_L_iph.pressure += 0.5 * dtodx * sum_4; - #ifdef DE - interface_L_iph.gas_energy_specific += 0.5 * dtodx * sum_ge; - #endif // DE - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - interface_L_iph.scalar[i] += 0.5 * dtodx * sum_scalar[i]; - } - #endif // SCALAR - - // right-hand interface value, i-1/2 - sum_0 = sum_1 = sum_2 = sum_3 = sum_4 = 0; - #ifdef DE - sum_ge = 0; - #endif // DE - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - sum_scalar[i] = 0; - } - #endif // SCALAR - if (lambda_m <= 0) { - Real lamdiff = lambda_m - lambda_m; - - sum_0 += - lamdiff * (-cell_i.density * del_m.velocity.x / (2 * sound_speed) + del_m.pressure / (2 * sound_speed_squared)); - sum_1 += lamdiff * (del_m.velocity.x / 2.0 - del_m.pressure / (2 * sound_speed * cell_i.density)); - sum_4 += lamdiff * (-cell_i.density * del_m.velocity.x * sound_speed / 2.0 + del_m.pressure / 2.0); - } - if (lambda_0 <= 0) { - Real lamdiff = lambda_m - lambda_0; - - sum_0 += lamdiff * (del_m.density - del_m.pressure / (sound_speed_squared)); - sum_2 += lamdiff * del_m.velocity.y; - sum_3 += lamdiff * del_m.velocity.z; - #ifdef DE - sum_ge += lamdiff * del_m.gas_energy_specific; - #endif // DE - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - sum_scalar[i] += lamdiff * del_m.scalar[i]; - } - #endif // SCALAR - } - if (lambda_p <= 0) { - Real lamdiff = lambda_m - lambda_p; - - sum_0 += - lamdiff * (cell_i.density * del_m.velocity.x / (2 * sound_speed) + del_m.pressure / (2 * sound_speed_squared)); - sum_1 += lamdiff * (del_m.velocity.x / 2.0 + del_m.pressure / (2 * sound_speed * cell_i.density)); - sum_4 += lamdiff * (cell_i.density * del_m.velocity.x * sound_speed / 2.0 + del_m.pressure / 2.0); - } - - // add the corrections - interface_R_imh.density += 0.5 * dtodx * sum_0; - interface_R_imh.velocity.x += 0.5 * dtodx * sum_1; - interface_R_imh.velocity.y += 0.5 * dtodx * sum_2; - interface_R_imh.velocity.z += 0.5 * dtodx * sum_3; - interface_R_imh.pressure += 0.5 * dtodx * sum_4; - #ifdef DE - interface_R_imh.gas_energy_specific += 0.5 * dtodx * sum_ge; - #endif // DE - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - interface_R_imh.scalar[i] += 0.5 * dtodx * sum_scalar[i]; - } - #endif // SCALAR -#endif // CTU - - // apply minimum constraints - interface_R_imh.density = fmax(interface_R_imh.density, (Real)TINY_NUMBER); - interface_L_iph.density = fmax(interface_L_iph.density, (Real)TINY_NUMBER); - interface_R_imh.pressure = fmax(interface_R_imh.pressure, (Real)TINY_NUMBER); - interface_L_iph.pressure = fmax(interface_L_iph.pressure, (Real)TINY_NUMBER); - - // Convert the left and right states in the primitive to the conserved variables send final values back from kernel - // bounds_R refers to the right side of the i-1/2 interface - size_t id = cuda_utilities::compute1DIndex(xid, yid, zid, nx, ny); - reconstruction::Write_Data(interface_L_iph, dev_bounds_L, dev_conserved, id, n_cells, o1, o2, o3, gamma); - - id = cuda_utilities::compute1DIndex(xid - int(dir == 0), yid - int(dir == 1), zid - int(dir == 2), nx, ny); - reconstruction::Write_Data(interface_R_imh, dev_bounds_R, dev_conserved, id, n_cells, o1, o2, o3, gamma); -} - -// Instantiate the relevant template specifications -template __global__ __launch_bounds__(TPB) void PLMC_cuda<0>(Real *dev_conserved, Real *dev_bounds_L, - Real *dev_bounds_R, int nx, int ny, int nz, Real dx, - Real dt, Real gamma, int n_fields); -template __global__ __launch_bounds__(TPB) void PLMC_cuda<1>(Real *dev_conserved, Real *dev_bounds_L, - Real *dev_bounds_R, int nx, int ny, int nz, Real dx, - Real dt, Real gamma, int n_fields); -template __global__ __launch_bounds__(TPB) void PLMC_cuda<2>(Real *dev_conserved, Real *dev_bounds_L, - Real *dev_bounds_R, int nx, int ny, int nz, Real dx, - Real dt, Real gamma, int n_fields); From 761098aee68e03d2cbcdbe6635167beda6fb4b25 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Wed, 24 Apr 2024 16:04:00 -0400 Subject: [PATCH 25/29] Merge PPMC_CTU and PPMC_VL I removed PPMC_CTU and added a call to the characteristic tracing function to PPMC_VL and renamed it to PPMC_cuda --- src/integrators/VL_1D_cuda.cu | 2 +- src/integrators/VL_2D_cuda.cu | 6 +- src/integrators/VL_3D_cuda.cu | 14 +- src/integrators/simple_1D_cuda.cu | 2 +- src/integrators/simple_2D_cuda.cu | 4 +- src/integrators/simple_3D_cuda.cu | 6 +- src/reconstruction/ppmc_cuda.cu | 578 +------------------------- src/reconstruction/ppmc_cuda.h | 41 +- src/reconstruction/ppmc_cuda_tests.cu | 120 +----- 9 files changed, 49 insertions(+), 724 deletions(-) diff --git a/src/integrators/VL_1D_cuda.cu b/src/integrators/VL_1D_cuda.cu index 9995c5573..421351fba 100644 --- a/src/integrators/VL_1D_cuda.cu +++ b/src/integrators/VL_1D_cuda.cu @@ -90,7 +90,7 @@ void VL_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, Rea gama, 0, n_fields); #endif #ifdef PPMC - hipLaunchKernelGGL(PPMC_VL<0>, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, gama); + hipLaunchKernelGGL(PPMC_cuda<0>, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama); #endif GPU_Error_Check(); diff --git a/src/integrators/VL_2D_cuda.cu b/src/integrators/VL_2D_cuda.cu index 8cd98632a..c993d695c 100644 --- a/src/integrators/VL_2D_cuda.cu +++ b/src/integrators/VL_2D_cuda.cu @@ -104,8 +104,10 @@ void VL_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int y_of dt, gama, 1, n_fields); #endif // PPMP #ifdef PPMC - hipLaunchKernelGGL(PPMC_VL<0>, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, gama); - hipLaunchKernelGGL(PPMC_VL<1>, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, gama); + hipLaunchKernelGGL(PPMC_cuda<0>, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, + gama); + hipLaunchKernelGGL(PPMC_cuda<1>, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, + gama); #endif // PPMC GPU_Error_Check(); diff --git a/src/integrators/VL_3D_cuda.cu b/src/integrators/VL_3D_cuda.cu index 952b69663..c98d784c7 100644 --- a/src/integrators/VL_3D_cuda.cu +++ b/src/integrators/VL_3D_cuda.cu @@ -255,13 +255,13 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, n_fields); #endif // PPMP #ifdef PPMC - cuda_utilities::AutomaticLaunchParams static const ppmc_vl_launch_params(PPMC_VL<0>, n_cells); - hipLaunchKernelGGL(PPMC_VL<0>, ppmc_vl_launch_params.get_numBlocks(), ppmc_vl_launch_params.get_threadsPerBlock(), 0, - 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, gama); - hipLaunchKernelGGL(PPMC_VL<1>, ppmc_vl_launch_params.get_numBlocks(), ppmc_vl_launch_params.get_threadsPerBlock(), 0, - 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, gama); - hipLaunchKernelGGL(PPMC_VL<2>, ppmc_vl_launch_params.get_numBlocks(), ppmc_vl_launch_params.get_threadsPerBlock(), 0, - 0, dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, gama); + cuda_utilities::AutomaticLaunchParams static const ppmc_vl_launch_params(PPMC_cuda<0>, n_cells); + hipLaunchKernelGGL(PPMC_cuda<0>, ppmc_vl_launch_params.get_numBlocks(), ppmc_vl_launch_params.get_threadsPerBlock(), + 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama); + hipLaunchKernelGGL(PPMC_cuda<1>, ppmc_vl_launch_params.get_numBlocks(), ppmc_vl_launch_params.get_threadsPerBlock(), + 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama); + hipLaunchKernelGGL(PPMC_cuda<2>, ppmc_vl_launch_params.get_numBlocks(), ppmc_vl_launch_params.get_threadsPerBlock(), + 0, 0, dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, dz, dt, gama); #endif // PPMC GPU_Error_Check(); diff --git a/src/integrators/simple_1D_cuda.cu b/src/integrators/simple_1D_cuda.cu index 369c055af..3137a2f27 100644 --- a/src/integrators/simple_1D_cuda.cu +++ b/src/integrators/simple_1D_cuda.cu @@ -62,7 +62,7 @@ void Simple_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, GPU_Error_Check(); #endif #ifdef PPMC - hipLaunchKernelGGL(PPMC_CTU<0>, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama); + hipLaunchKernelGGL(PPMC_cuda<0>, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama); GPU_Error_Check(); #endif diff --git a/src/integrators/simple_2D_cuda.cu b/src/integrators/simple_2D_cuda.cu index 426a9a2d9..bdfe1e25e 100644 --- a/src/integrators/simple_2D_cuda.cu +++ b/src/integrators/simple_2D_cuda.cu @@ -64,8 +64,8 @@ void Simple_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int gama, 1, n_fields); #endif #ifdef PPMC - hipLaunchKernelGGL(PPMC_CTU<0>, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama); - hipLaunchKernelGGL(PPMC_CTU<1>, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama); + hipLaunchKernelGGL(PPMC_cuda<0>, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama); + hipLaunchKernelGGL(PPMC_cuda<1>, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama); #endif GPU_Error_Check(); diff --git a/src/integrators/simple_3D_cuda.cu b/src/integrators/simple_3D_cuda.cu index 2e834e370..671e61768 100644 --- a/src/integrators/simple_3D_cuda.cu +++ b/src/integrators/simple_3D_cuda.cu @@ -98,9 +98,9 @@ void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, gama, 2, n_fields); #endif // PPMP #ifdef PPMC - hipLaunchKernelGGL(PPMC_CTU<0>, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama); - hipLaunchKernelGGL(PPMC_CTU<1>, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama); - hipLaunchKernelGGL(PPMC_CTU<2>, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lz, Q_Rz, nx, ny, nz, dz, dt, gama); + hipLaunchKernelGGL(PPMC_cuda<0>, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama); + hipLaunchKernelGGL(PPMC_cuda<1>, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama); + hipLaunchKernelGGL(PPMC_cuda<2>, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lz, Q_Rz, nx, ny, nz, dz, dt, gama); GPU_Error_Check(); #endif // PPMC diff --git a/src/reconstruction/ppmc_cuda.cu b/src/reconstruction/ppmc_cuda.cu index c13b78b91..49eed45c5 100644 --- a/src/reconstruction/ppmc_cuda.cu +++ b/src/reconstruction/ppmc_cuda.cu @@ -16,556 +16,9 @@ #endif // ===================================================================================================================== -/*! - * \brief When passed a stencil of conserved variables, returns the left and - right boundary values for the interface calculated using ppm. */ template -__global__ void PPMC_CTU(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, Real dx, - Real dt, Real gamma) -{ - // get a thread ID - int const thread_id = threadIdx.x + blockIdx.x * blockDim.x; - int xid, yid, zid; - cuda_utilities::compute3DIndices(thread_id, nx, ny, xid, yid, zid); - - if (reconstruction::Thread_Guard<3>(nx, ny, nz, xid, yid, zid)) { - return; - } - - // Compute the total number of cells - int const n_cells = nx * ny * nz; - - // Set the field indices for the various directions - int o1, o2, o3; - if constexpr (dir == 0) { - o1 = grid_enum::momentum_x; - o2 = grid_enum::momentum_y; - o3 = grid_enum::momentum_z; - } else if constexpr (dir == 1) { - o1 = grid_enum::momentum_y; - o2 = grid_enum::momentum_z; - o3 = grid_enum::momentum_x; - } else if constexpr (dir == 2) { - o1 = grid_enum::momentum_z; - o2 = grid_enum::momentum_x; - o3 = grid_enum::momentum_y; - } - - // load the 5-cell stencil into registers - // cell i - hydro_utilities::Primitive const cell_i = - hydro_utilities::Load_Cell_Primitive(dev_conserved, xid, yid, zid, nx, ny, n_cells, gamma); - - // cell i-1. The equality checks check the direction and subtracts one from the direction - // im1 stands for "i minus 1" - hydro_utilities::Primitive const cell_im1 = hydro_utilities::Load_Cell_Primitive( - dev_conserved, xid - int(dir == 0), yid - int(dir == 1), zid - int(dir == 2), nx, ny, n_cells, gamma); - - // cell i+1. The equality checks check the direction and adds one to the direction - // ip1 stands for "i plus 1" - hydro_utilities::Primitive const cell_ip1 = hydro_utilities::Load_Cell_Primitive( - dev_conserved, xid + int(dir == 0), yid + int(dir == 1), zid + int(dir == 2), nx, ny, n_cells, gamma); - - // cell i-2. The equality checks check the direction and subtracts one from the direction - // im2 stands for "i minus 2" - hydro_utilities::Primitive const cell_im2 = hydro_utilities::Load_Cell_Primitive( - dev_conserved, xid - 2 * int(dir == 0), yid - 2 * int(dir == 1), zid - 2 * int(dir == 2), nx, ny, n_cells, gamma); - - // cell i+2. The equality checks check the direction and adds one to the direction - // ip2 stands for "i plus 2" - hydro_utilities::Primitive const cell_ip2 = hydro_utilities::Load_Cell_Primitive( - dev_conserved, xid + 2 * int(dir == 0), yid + 2 * int(dir == 1), zid + 2 * int(dir == 2), nx, ny, n_cells, gamma); - - // Steps 2 - 5 are repeated for cell i-1, i, and i+1 - - // =============== - // Cell i-1 slopes - // =============== - - // Compute the eigenvectors for this cell - reconstruction::EigenVecs eigenvectors = reconstruction::Compute_Eigenvectors(cell_im1, gamma); - - // Step 2 - Compute the left, right, centered, and van Leer differences of the primitive variables. Note that here L - // and R refer to locations relative to the cell center Stone Eqn 36 - - // left - hydro_utilities::Primitive del_L = reconstruction::Compute_Slope(cell_im2, cell_im1); - - // right - hydro_utilities::Primitive del_R = reconstruction::Compute_Slope(cell_im1, cell_i); - - // centered - hydro_utilities::Primitive del_C = reconstruction::Compute_Slope(cell_im2, cell_i, 0.5); - - // Van Leer - hydro_utilities::Primitive del_G = reconstruction::Compute_Van_Leer_Slope(del_L, del_R); - - // Step 3 - Project the left, right, centered and van Leer differences onto the - // characteristic variables Stone Eqn 37 (del_a are differences in - // characteristic variables, see Stone for notation) Use the eigenvectors - // given in Stone 2008, Appendix A - reconstruction::Characteristic del_a_L = - reconstruction::Primitive_To_Characteristic(cell_im1, del_L, eigenvectors, gamma); - - reconstruction::Characteristic del_a_R = - reconstruction::Primitive_To_Characteristic(cell_im1, del_R, eigenvectors, gamma); - - reconstruction::Characteristic del_a_C = - reconstruction::Primitive_To_Characteristic(cell_im1, del_C, eigenvectors, gamma); - - reconstruction::Characteristic del_a_G = - reconstruction::Primitive_To_Characteristic(cell_im1, del_G, eigenvectors, gamma); - - // Step 4 - Apply monotonicity constraints to the differences in the characteristic variables - reconstruction::Characteristic const del_a_m_im1 = - reconstruction::Van_Leer_Limiter(del_a_L, del_a_R, del_a_C, del_a_G); - - // Step 5 - and project the monotonized difference in the characteristic variables back onto the primitive variables - // Stone Eqn 39 - hydro_utilities::Primitive del_m_im1 = Characteristic_To_Primitive(cell_im1, del_a_m_im1, eigenvectors, gamma); - - // Limit the variables that aren't transformed by the characteristic projection -#ifdef DE - del_m_im1.gas_energy_specific = reconstruction::Van_Leer_Limiter( - del_L.gas_energy_specific, del_R.gas_energy_specific, del_C.gas_energy_specific, del_G.gas_energy_specific); -#endif // DE -#ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - del_m_im1.scalar_specific[i] = reconstruction::Van_Leer_Limiter(del_L.scalar_specific[i], del_R.scalar_specific[i], - del_C.scalar_specific[i], del_G.scalar_specific[i]); - } -#endif // SCALAR - - // ============= - // Cell i slopes - // ============= - - // Compute the eigenvectors for this cell - eigenvectors = reconstruction::Compute_Eigenvectors(cell_i, gamma); - - // Step 2 - Compute the left, right, centered, and van Leer differences of the primitive variables. Note that here L - // and R refer to locations relative to the cell center Stone Eqn 36 - - // left - del_L = reconstruction::Compute_Slope(cell_im1, cell_i); - - // right - del_R = reconstruction::Compute_Slope(cell_i, cell_ip1); - - // centered - del_C = reconstruction::Compute_Slope(cell_im1, cell_ip1, 0.5); - - // Van Leer - del_G = reconstruction::Compute_Van_Leer_Slope(del_L, del_R); - - // Step 3 - Project the left, right, centered and van Leer differences onto the - // characteristic variables Stone Eqn 37 (del_a are differences in - // characteristic variables, see Stone for notation) Use the eigenvectors - // given in Stone 2008, Appendix A - del_a_L = reconstruction::Primitive_To_Characteristic(cell_i, del_L, eigenvectors, gamma); - - del_a_R = reconstruction::Primitive_To_Characteristic(cell_i, del_R, eigenvectors, gamma); - - del_a_C = reconstruction::Primitive_To_Characteristic(cell_i, del_C, eigenvectors, gamma); - - del_a_G = reconstruction::Primitive_To_Characteristic(cell_i, del_G, eigenvectors, gamma); - - // Step 4 - Apply monotonicity constraints to the differences in the characteristic variables - reconstruction::Characteristic const del_a_m_i = reconstruction::Van_Leer_Limiter(del_a_L, del_a_R, del_a_C, del_a_G); - - // Step 5 - and project the monotonized difference in the characteristic variables back onto the primitive variables - // Stone Eqn 39 - hydro_utilities::Primitive del_m_i = Characteristic_To_Primitive(cell_ip1, del_a_m_i, eigenvectors, gamma); - - // Limit the variables that aren't transformed by the characteristic projection -#ifdef DE - del_m_i.gas_energy_specific = reconstruction::Van_Leer_Limiter(del_L.gas_energy_specific, del_R.gas_energy_specific, - del_C.gas_energy_specific, del_G.gas_energy_specific); -#endif // DE -#ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - del_m_i.scalar_specific[i] = reconstruction::Van_Leer_Limiter(del_L.scalar_specific[i], del_R.scalar_specific[i], - del_C.scalar_specific[i], del_G.scalar_specific[i]); - } -#endif // SCALAR - - // =============== - // Cell i+1 slopes - // =============== - - // Compute the eigenvectors for this cell - eigenvectors = reconstruction::Compute_Eigenvectors(cell_ip1, gamma); - - // Step 2 - Compute the left, right, centered, and van Leer differences of the primitive variables. Note that here L - // and R refer to locations relative to the cell center Stone Eqn 36 - - // left - del_L = reconstruction::Compute_Slope(cell_i, cell_ip1); - - // right - del_R = reconstruction::Compute_Slope(cell_ip1, cell_ip2); - - // centered - del_C = reconstruction::Compute_Slope(cell_i, cell_ip2, 0.5); - - // Van Leer - del_G = reconstruction::Compute_Van_Leer_Slope(del_L, del_R); - - // Step 3 - Project the left, right, centered and van Leer differences onto the - // characteristic variables Stone Eqn 37 (del_a are differences in - // characteristic variables, see Stone for notation) Use the eigenvectors - // given in Stone 2008, Appendix A - del_a_L = reconstruction::Primitive_To_Characteristic(cell_ip1, del_L, eigenvectors, gamma); - - del_a_R = reconstruction::Primitive_To_Characteristic(cell_ip1, del_R, eigenvectors, gamma); - - del_a_C = reconstruction::Primitive_To_Characteristic(cell_ip1, del_C, eigenvectors, gamma); - - del_a_G = reconstruction::Primitive_To_Characteristic(cell_ip1, del_G, eigenvectors, gamma); - - // Step 4 - Apply monotonicity constraints to the differences in the characteristic variables - reconstruction::Characteristic const del_a_m_ip1 = - reconstruction::Van_Leer_Limiter(del_a_L, del_a_R, del_a_C, del_a_G); - - // Step 5 - and project the monotonized difference in the characteristic variables back onto the primitive variables - // Stone Eqn 39 - hydro_utilities::Primitive del_m_ip1 = Characteristic_To_Primitive(cell_ip1, del_a_m_ip1, eigenvectors, gamma); - - // Limit the variables that aren't transformed by the characteristic projection -#ifdef DE - del_m_ip1.gas_energy_specific = reconstruction::Van_Leer_Limiter( - del_L.gas_energy_specific, del_R.gas_energy_specific, del_C.gas_energy_specific, del_G.gas_energy_specific); -#endif // DE -#ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - del_m_ip1.scalar_specific[i] = reconstruction::Van_Leer_Limiter(del_L.scalar_specific[i], del_R.scalar_specific[i], - del_C.scalar_specific[i], del_G.scalar_specific[i]); - } -#endif // SCALAR - - // Step 6 - Use parabolic interpolation to compute values at the left and right of each cell center Here, the - // subscripts L and R refer to the left and right side of the ith cell center Stone Eqn 46 - hydro_utilities::Primitive interface_L_iph = - reconstruction::Calc_Interface_Parabolic(cell_ip1, cell_i, del_m_ip1, del_m_i); - - hydro_utilities::Primitive interface_R_imh = - reconstruction::Calc_Interface_Parabolic(cell_i, cell_im1, del_m_i, del_m_im1); - - // Step 7 - Apply further monotonicity constraints to ensure the values on the left and right side of cell center lie - // between neighboring cell-centered values Stone Eqns 47 - 53 - reconstruction::Monotonize_Parabolic_Interface(cell_i, cell_im1, cell_ip1, interface_L_iph, interface_R_imh); - - // This is the beginning of the characteristic tracing - // Step 8 - Compute the coefficients for the monotonized parabolic - // interpolation function - // Stone Eqn 54 - - del_m_i.density = interface_L_iph.density - interface_R_imh.density; - del_m_i.velocity.x() = interface_L_iph.velocity.x() - interface_R_imh.velocity.x(); - del_m_i.velocity.y() = interface_L_iph.velocity.y() - interface_R_imh.velocity.y(); - del_m_i.velocity.z() = interface_L_iph.velocity.z() - interface_R_imh.velocity.z(); - del_m_i.pressure = interface_L_iph.pressure - interface_R_imh.pressure; - - Real const d_6 = 6.0 * (cell_i.density - 0.5 * (interface_R_imh.density + interface_L_iph.density)); - Real const vx_6 = 6.0 * (cell_i.velocity.x() - 0.5 * (interface_R_imh.velocity.x() + interface_L_iph.velocity.x())); - Real const vy_6 = 6.0 * (cell_i.velocity.y() - 0.5 * (interface_R_imh.velocity.y() + interface_L_iph.velocity.y())); - Real const vz_6 = 6.0 * (cell_i.velocity.z() - 0.5 * (interface_R_imh.velocity.z() + interface_L_iph.velocity.z())); - Real const p_6 = 6.0 * (cell_i.pressure - 0.5 * (interface_R_imh.pressure + interface_L_iph.pressure)); - -#ifdef DE - del_m_i.gas_energy_specific = interface_L_iph.gas_energy_specific - interface_R_imh.gas_energy_specific; - Real const ge_6 = 6.0 * (cell_i.gas_energy_specific - - 0.5 * (interface_R_imh.gas_energy_specific + interface_L_iph.gas_energy_specific)); -#endif // DE - -#ifdef SCALAR - Real scalar_6[NSCALARS]; - for (int i = 0; i < NSCALARS; i++) { - del_m_i.scalar_specific[i] = interface_L_iph.scalar_specific[i] - interface_R_imh.scalar_specific[i]; - scalar_6[i] = 6.0 * (cell_i.scalar_specific[i] - - 0.5 * (interface_R_imh.scalar_specific[i] + interface_L_iph.scalar_specific[i])); - } -#endif // SCALAR - - // Compute the eigenvalues of the linearized equations in the - // primitive variables using the cell-centered primitive variables - - // recalculate the adiabatic sound speed in cell i - Real const sound_speed = hydro_utilities::Calc_Sound_Speed(cell_i.pressure, cell_i.density, gamma); - - Real const lambda_m = cell_i.velocity.x() - sound_speed; - Real const lambda_0 = cell_i.velocity.x(); - Real const lambda_p = cell_i.velocity.x() + sound_speed; - - // Step 9 - Compute the left and right interface values using monotonized - // parabolic interpolation - // Stone Eqns 55 & 56 - - // largest eigenvalue - Real const lambda_max = fmax(lambda_p, (Real)0); - // smallest eigenvalue - Real const lambda_min = fmin(lambda_m, (Real)0); - - // left interface value, i+1/2 - Real const dtodx = dt / dx; - interface_L_iph.density = - interface_L_iph.density - - lambda_max * (0.5 * dtodx) * (del_m_i.density - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * d_6); - interface_L_iph.velocity.x() = - interface_L_iph.velocity.x() - - lambda_max * (0.5 * dtodx) * (del_m_i.velocity.x() - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * vx_6); - interface_L_iph.velocity.y() = - interface_L_iph.velocity.y() - - lambda_max * (0.5 * dtodx) * (del_m_i.velocity.y() - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * vy_6); - interface_L_iph.velocity.z() = - interface_L_iph.velocity.z() - - lambda_max * (0.5 * dtodx) * (del_m_i.velocity.z() - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * vz_6); - interface_L_iph.pressure = - interface_L_iph.pressure - - lambda_max * (0.5 * dtodx) * (del_m_i.pressure - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * p_6); - - // right interface value, i-1/2 - interface_R_imh.density = - interface_R_imh.density - - lambda_min * (0.5 * dtodx) * (del_m_i.density + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * d_6); - interface_R_imh.velocity.x() = - interface_R_imh.velocity.x() - - lambda_min * (0.5 * dtodx) * (del_m_i.velocity.x() + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * vx_6); - interface_R_imh.velocity.y() = - interface_R_imh.velocity.y() - - lambda_min * (0.5 * dtodx) * (del_m_i.velocity.y() + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * vy_6); - interface_R_imh.velocity.z() = - interface_R_imh.velocity.z() - - lambda_min * (0.5 * dtodx) * (del_m_i.velocity.z() + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * vz_6); - interface_R_imh.pressure = - interface_R_imh.pressure - - lambda_min * (0.5 * dtodx) * (del_m_i.pressure + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * p_6); - -#ifdef DE - interface_L_iph.gas_energy_specific = - interface_L_iph.gas_energy_specific - - lambda_max * (0.5 * dtodx) * (del_m_i.gas_energy_specific - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * ge_6); - interface_R_imh.gas_energy_specific = - interface_R_imh.gas_energy_specific - - lambda_min * (0.5 * dtodx) * (del_m_i.gas_energy_specific + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * ge_6); -#endif // DE - -#ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - interface_L_iph.scalar_specific[i] = - interface_L_iph.scalar_specific[i] - - lambda_max * (0.5 * dtodx) * - (del_m_i.scalar_specific[i] - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * scalar_6[i]); - interface_R_imh.scalar_specific[i] = - interface_R_imh.scalar_specific[i] - - lambda_min * (0.5 * dtodx) * - (del_m_i.scalar_specific[i] + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * scalar_6[i]); - } -#endif // SCALAR - - // Step 10 - Perform the characteristic tracing - // Stone Eqns 57 - 60 - - // left-hand interface value, i+1/2 - Real sum_1 = 0, sum_2 = 0, sum_3 = 0, sum_4 = 0, sum_5 = 0; -#ifdef DE - Real sum_ge = 0; - Real chi_ge = 0; -#endif // DE -#ifdef SCALAR - Real chi_scalar[NSCALARS]; - Real sum_scalar[NSCALARS]; - for (Real &val : sum_scalar) { - val = 0; - } -#endif // SCALAR - - if (lambda_m >= 0) { - Real const A = (0.5 * dtodx) * (lambda_p - lambda_m); - Real const B = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_p * lambda_p - lambda_m * lambda_m); - - Real const chi_1 = A * (del_m_i.density - d_6) + B * d_6; - Real const chi_2 = A * (del_m_i.velocity.x() - vx_6) + B * vx_6; - Real const chi_3 = A * (del_m_i.velocity.y() - vy_6) + B * vy_6; - Real const chi_4 = A * (del_m_i.velocity.z() - vz_6) + B * vz_6; - Real const chi_5 = A * (del_m_i.pressure - p_6) + B * p_6; - - sum_1 += -0.5 * (cell_i.density * chi_2 / sound_speed - chi_5 / (sound_speed * sound_speed)); - sum_2 += 0.5 * (chi_2 - chi_5 / (sound_speed * cell_i.density)); - sum_5 += -0.5 * (cell_i.density * chi_2 * sound_speed - chi_5); - } - if (lambda_0 >= 0) { - Real const A = (0.5 * dtodx) * (lambda_p - lambda_0); - Real const B = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_p * lambda_p - lambda_0 * lambda_0); - - Real const chi_1 = A * (del_m_i.density - d_6) + B * d_6; - Real const chi_2 = A * (del_m_i.velocity.x() - vx_6) + B * vx_6; - Real const chi_3 = A * (del_m_i.velocity.y() - vy_6) + B * vy_6; - Real const chi_4 = A * (del_m_i.velocity.z() - vz_6) + B * vz_6; - Real const chi_5 = A * (del_m_i.pressure - p_6) + B * p_6; -#ifdef DE - chi_ge = A * (del_m_i.gas_energy_specific - ge_6) + B * ge_6; -#endif // DE -#ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - chi_scalar[i] = A * (del_m_i.scalar_specific[i] - scalar_6[i]) + B * scalar_6[i]; - } -#endif // SCALAR - - sum_1 += chi_1 - chi_5 / (sound_speed * sound_speed); - sum_3 += chi_3; - sum_4 += chi_4; -#ifdef DE - sum_ge += chi_ge; -#endif // DE -#ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - sum_scalar[i] += chi_scalar[i]; - } -#endif // SCALAR - } - if (lambda_p >= 0) { - Real const A = (0.5 * dtodx) * (lambda_p - lambda_p); - Real const B = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_p * lambda_p - lambda_p * lambda_p); - - Real const chi_1 = A * (del_m_i.density - d_6) + B * d_6; - Real const chi_2 = A * (del_m_i.velocity.x() - vx_6) + B * vx_6; - Real const chi_3 = A * (del_m_i.velocity.y() - vy_6) + B * vy_6; - Real const chi_4 = A * (del_m_i.velocity.z() - vz_6) + B * vz_6; - Real const chi_5 = A * (del_m_i.pressure - p_6) + B * p_6; - - sum_1 += 0.5 * (cell_i.density * chi_2 / sound_speed + chi_5 / (sound_speed * sound_speed)); - sum_2 += 0.5 * (chi_2 + chi_5 / (sound_speed * cell_i.density)); - sum_5 += 0.5 * (cell_i.density * chi_2 * sound_speed + chi_5); - } - - // add the corrections to the initial guesses for the interface values - interface_L_iph.density += sum_1; - interface_L_iph.velocity.x() += sum_2; - interface_L_iph.velocity.y() += sum_3; - interface_L_iph.velocity.z() += sum_4; - interface_L_iph.pressure += sum_5; -#ifdef DE - interface_L_iph.gas_energy_specific += sum_ge; -#endif // DE -#ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - interface_L_iph.scalar_specific[i] += sum_scalar[i]; - } -#endif // SCALAR - - // right-hand interface value, i-1/2 - sum_1 = 0; - sum_2 = 0; - sum_3 = 0; - sum_4 = 0; - sum_5 = 0; -#ifdef DE - sum_ge = 0; -#endif // DE -#ifdef SCALAR - for (Real &val : sum_scalar) { - val = 0; - } -#endif // SCALAR - if (lambda_m <= 0) { - Real const C = (0.5 * dtodx) * (lambda_m - lambda_m); - Real const D = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_m * lambda_m - lambda_m * lambda_m); - - Real const chi_1 = C * (del_m_i.density + d_6) + D * d_6; - Real const chi_2 = C * (del_m_i.velocity.x() + vx_6) + D * vx_6; - Real const chi_3 = C * (del_m_i.velocity.y() + vy_6) + D * vy_6; - Real const chi_4 = C * (del_m_i.velocity.z() + vz_6) + D * vz_6; - Real const chi_5 = C * (del_m_i.pressure + p_6) + D * p_6; - - sum_1 += -0.5 * (cell_i.density * chi_2 / sound_speed - chi_5 / (sound_speed * sound_speed)); - sum_2 += 0.5 * (chi_2 - chi_5 / (sound_speed * cell_i.density)); - sum_5 += -0.5 * (cell_i.density * chi_2 * sound_speed - chi_5); - } - if (lambda_0 <= 0) { - Real const C = (0.5 * dtodx) * (lambda_m - lambda_0); - Real const D = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_m * lambda_m - lambda_0 * lambda_0); - - Real const chi_1 = C * (del_m_i.density + d_6) + D * d_6; - Real const chi_2 = C * (del_m_i.velocity.x() + vx_6) + D * vx_6; - Real const chi_3 = C * (del_m_i.velocity.y() + vy_6) + D * vy_6; - Real const chi_4 = C * (del_m_i.velocity.z() + vz_6) + D * vz_6; - Real const chi_5 = C * (del_m_i.pressure + p_6) + D * p_6; -#ifdef DE - chi_ge = C * (del_m_i.gas_energy_specific + ge_6) + D * ge_6; -#endif // DE -#ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - chi_scalar[i] = C * (del_m_i.scalar_specific[i] + scalar_6[i]) + D * scalar_6[i]; - } -#endif // SCALAR - - sum_1 += chi_1 - chi_5 / (sound_speed * sound_speed); - sum_3 += chi_3; - sum_4 += chi_4; -#ifdef DE - sum_ge += chi_ge; -#endif // DE -#ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - sum_scalar[i] += chi_scalar[i]; - } -#endif // SCALAR - } - if (lambda_p <= 0) { - Real const C = (0.5 * dtodx) * (lambda_m - lambda_p); - Real const D = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_m * lambda_m - lambda_p * lambda_p); - - Real const chi_1 = C * (del_m_i.density + d_6) + D * d_6; - Real const chi_2 = C * (del_m_i.velocity.x() + vx_6) + D * vx_6; - Real const chi_3 = C * (del_m_i.velocity.y() + vy_6) + D * vy_6; - Real const chi_4 = C * (del_m_i.velocity.z() + vz_6) + D * vz_6; - Real const chi_5 = C * (del_m_i.pressure + p_6) + D * p_6; - - sum_1 += 0.5 * (cell_i.density * chi_2 / sound_speed + chi_5 / (sound_speed * sound_speed)); - sum_2 += 0.5 * (chi_2 + chi_5 / (sound_speed * cell_i.density)); - sum_5 += 0.5 * (cell_i.density * chi_2 * sound_speed + chi_5); - } - - // add the corrections - interface_R_imh.density += sum_1; - interface_R_imh.velocity.x() += sum_2; - interface_R_imh.velocity.y() += sum_3; - interface_R_imh.velocity.z() += sum_4; - interface_R_imh.pressure += sum_5; -#ifdef DE - interface_R_imh.gas_energy_specific += sum_ge; -#endif // DE -#ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - interface_R_imh.scalar_specific[i] += sum_scalar[i]; - } -#endif // SCALAR - - // This is the end of the characteristic tracing - - // enforce minimum values - interface_R_imh.density = fmax(interface_R_imh.density, (Real)TINY_NUMBER); - interface_L_iph.density = fmax(interface_L_iph.density, (Real)TINY_NUMBER); - interface_R_imh.pressure = fmax(interface_R_imh.pressure, (Real)TINY_NUMBER); - interface_L_iph.pressure = fmax(interface_L_iph.pressure, (Real)TINY_NUMBER); - - // Step 11 - Send final values back from kernel - - // Convert the left and right states in the primitive to the conserved variables send final values back from kernel - // bounds_R refers to the right side of the i-1/2 interface - size_t id = cuda_utilities::compute1DIndex(xid, yid, zid, nx, ny); - reconstruction::Write_Data(interface_L_iph, dev_bounds_L, dev_conserved, id, n_cells, o1, o2, o3, gamma); - - id = cuda_utilities::compute1DIndex(xid - int(dir == 0), yid - int(dir == 1), zid - int(dir == 2), nx, ny); - reconstruction::Write_Data(interface_R_imh, dev_bounds_R, dev_conserved, id, n_cells, o1, o2, o3, gamma); -} -// ===================================================================================================================== - -// ===================================================================================================================== -template -__global__ __launch_bounds__(TPB) void PPMC_VL(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, - int ny, int nz, Real gamma) +__global__ __launch_bounds__(TPB) void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, + int ny, int nz, Real dx, Real dt, Real gamma) { // get a thread ID int const thread_id = threadIdx.x + blockIdx.x * blockDim.x; @@ -694,6 +147,11 @@ __global__ __launch_bounds__(TPB) void PPMC_VL(Real *dev_conserved, Real *dev_bo } #endif // SCALAR + // Do the characteristic tracing +#ifndef VL + PPM_Characteristic_Evolution(cell_i, dt, dx, gamma, interface_R_imh, interface_L_iph); +#endif // VL + // enforce minimum values interface_R_imh.density = fmax(interface_R_imh.density, (Real)TINY_NUMBER); interface_L_iph.density = fmax(interface_L_iph.density, (Real)TINY_NUMBER); @@ -711,17 +169,13 @@ __global__ __launch_bounds__(TPB) void PPMC_VL(Real *dev_conserved, Real *dev_bo reconstruction::Write_Data(interface_R_imh, dev_bounds_R, dev_conserved, id, n_cells, o1, o2, o3, gamma); } // Instantiate the relevant template specifications -template __global__ void PPMC_CTU<0>(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, - int nz, Real dx, Real dt, Real gamma); -template __global__ void PPMC_CTU<1>(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, - int nz, Real dx, Real dt, Real gamma); -template __global__ void PPMC_CTU<2>(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, - int nz, Real dx, Real dt, Real gamma); - -template __global__ __launch_bounds__(TPB) void PPMC_VL<0>(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, - int nx, int ny, int nz, Real gamma); -template __global__ __launch_bounds__(TPB) void PPMC_VL<1>(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, - int nx, int ny, int nz, Real gamma); -template __global__ __launch_bounds__(TPB) void PPMC_VL<2>(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, - int nx, int ny, int nz, Real gamma); +template __global__ __launch_bounds__(TPB) void PPMC_cuda<0>(Real *dev_conserved, Real *dev_bounds_L, + Real *dev_bounds_R, int nx, int ny, int nz, Real dx, + Real dt, Real gamma); +template __global__ __launch_bounds__(TPB) void PPMC_cuda<1>(Real *dev_conserved, Real *dev_bounds_L, + Real *dev_bounds_R, int nx, int ny, int nz, Real dx, + Real dt, Real gamma); +template __global__ __launch_bounds__(TPB) void PPMC_cuda<2>(Real *dev_conserved, Real *dev_bounds_L, + Real *dev_bounds_R, int nx, int ny, int nz, Real dx, + Real dt, Real gamma); // ===================================================================================================================== diff --git a/src/reconstruction/ppmc_cuda.h b/src/reconstruction/ppmc_cuda.h index a820b7e0b..23d1f28db 100644 --- a/src/reconstruction/ppmc_cuda.h +++ b/src/reconstruction/ppmc_cuda.h @@ -8,11 +8,13 @@ #include "../global/global.h" /*! - * \brief Computes the left and right interface states using PPM with limiting in the characteristic variables and - * characteristic tracing. Used for the CTU and SIMPLE integrators. This uses the PPM method described in - * Stone et al. 2008 "Athena: A New Code for Astrophysical MHD". Fundementally this method relies on a Van Leer limiter - * in the characteristic variables to monotonize the slopes followed by limiting the interface states using the limiter - * from Colella & Woodward 1984. + * \brief Computes the left and right interface states using PPM with limiting in the characteristic variables. This + * uses the PPM method described in Felker & Stone 2018 "A fourth-order accurate finite volume method for ideal MHD via + * upwind constrained transport". This method computes the 3rd order interface then applies a mixture of monoticity + * constraints from from Colella & Sekora 2008, McCorquodale & Colella 2011, and Colella et al. 2011. We found that this + * newer method and limiters was more stable, less oscillatory, and faster than the method described in Stone et al. + * 2008 which was previously used. The difference is most pronounced in the Brio & Wu shock tube where the PPM + * oscillations are much smaller using this method. * * \tparam dir The direction to reconstruct. 0=X, 1=Y, 2=Z * \param[in] dev_conserved The conserved variable array @@ -21,35 +23,10 @@ * \param[in] nx The number of cells in the X-direction * \param[in] ny The number of cells in the Y-direction * \param[in] nz The number of cells in the Z-direction - * \param[in] dx The length of the cells in the `dir` direction - * \param[in] dt The time step * \param[in] gamma The adiabatic index */ template -__global__ void PPMC_CTU(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, Real dx, - Real dt, Real gamma); - -/*! - * \brief Computes the left and right interface states using PPM with limiting in the characteristic variables. Used for - * the VL (Van Leer) integrators. This uses the PPM method described in - * Felker & Stone 2018 "A fourth-order accurate finite volume method for ideal MHD via upwind constrained transport". - * This method computes the 3rd order interface then applies a mixture of monoticity constraints from from Colella & - * Sekora 2008, McCorquodale & Colella 2011, and Colella et al. 2011; for details see the - * `reconstruction::PPM_Single_Variable` function. We found that this newer method and limiters was more stable, less - * oscillatory, and faster than the method described in Stone et al. 2008 which is used in PPMC_CTU. The difference is - * most pronounced in the Brio & Wu shock tube where the PPM oscillations are much smaller using this method. - * - * \tparam dir The direction to reconstruct. 0=X, 1=Y, 2=Z - * \param[in] dev_conserved The conserved variable array - * \param[out] dev_bounds_L The array of left interfaces - * \param[out] dev_bounds_R The array of right interfaces - * \param[in] nx The number of cells in the X-direction - * \param[in] ny The number of cells in the Y-direction - * \param[in] nz The number of cells in the Z-direction - * \param[in] gamma The adiabatic index - */ -template -__global__ __launch_bounds__(TPB) void PPMC_VL(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, - int ny, int nz, Real gamma); +__global__ __launch_bounds__(TPB) void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, + int ny, int nz, Real dx, Real dt, Real gamma); #endif // PPMC_CUDA_H diff --git a/src/reconstruction/ppmc_cuda_tests.cu b/src/reconstruction/ppmc_cuda_tests.cu index bcacb9869..03c4c9223 100644 --- a/src/reconstruction/ppmc_cuda_tests.cu +++ b/src/reconstruction/ppmc_cuda_tests.cu @@ -22,114 +22,6 @@ #include "../utils/hydro_utilities.h" #include "../utils/testing_utilities.h" -TEST(tHYDROPpmcCTUReconstructor, CorrectInputExpectCorrectOutput) -{ - // Set up PRNG to use - std::mt19937_64 prng(42); - std::uniform_real_distribution doubleRand(0.1, 5); - - // Mock up needed information - size_t const nx = 6; - size_t const ny = 6; - size_t const nz = 6; - size_t const n_fields = 5; - double const dx = doubleRand(prng); - double const dt = doubleRand(prng); - double const gamma = 5.0 / 3.0; - - // Setup host grid. Fill host grid with random values and randomly assign maximum value - std::vector host_grid(nx * ny * nz * n_fields); - for (double &val : host_grid) { - val = doubleRand(prng); - } - - // Allocating and copying to device - cuda_utilities::DeviceVector dev_grid(host_grid.size()); - dev_grid.cpyHostToDevice(host_grid); - - // Fiducial Data - std::vector> fiducial_interface_left = {{{86, 2.6558981128823214}, - {302, 0.84399195916314151}, - {518, 2.2002498722761787}, - {734, 1.764334292986655}, - {950, 3.3600925565746804}}, - {{86, 2.4950488327292639}, - {302, 0.79287723513518138}, - {518, 1.7614576990062414}, - {734, 1.8238574169157304}, - {950, 3.14294317122161}}, - {{86, 2.6558981128823214}, - {302, 0.84399195916314151}, - {518, 2.0109603398129137}, - {734, 1.764334292986655}, - {950, 3.2100231679403066}}}; - - std::vector> fiducial_interface_right = {{{85, 2.6558981128823214}, - {301, 0.84399195916314151}, - {517, 1.8381070277226794}, - {733, 1.764334292986655}, - {949, 3.0847691079841209}}, - {{80, 3.1281603739188069}, - {296, 0.99406757727427164}, - {512, 1.8732124042412865}, - {728, 1.6489758692176784}, - {944, 2.8820015278590443}}, - {{50, 2.6558981128823214}, - {266, 0.84399195916314151}, - {482, 2.0109603398129137}, - {698, 1.764334292986655}, - {914, 3.2100231679403066}}}; - - // Loop over different directions - for (size_t direction = 0; direction < 3; direction++) { - // Allocate device buffers - cuda_utilities::DeviceVector dev_interface_left(host_grid.size(), true); - cuda_utilities::DeviceVector dev_interface_right(host_grid.size(), true); - - // Launch kernel - switch (direction) { - case 0: - hipLaunchKernelGGL(PPMC_CTU<0>, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), - dev_interface_right.data(), nx, ny, nz, dx, dt, gamma); - break; - case 1: - hipLaunchKernelGGL(PPMC_CTU<1>, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), - dev_interface_right.data(), nx, ny, nz, dx, dt, gamma); - break; - case 2: - hipLaunchKernelGGL(PPMC_CTU<2>, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), - dev_interface_right.data(), nx, ny, nz, dx, dt, gamma); - break; - } - GPU_Error_Check(); - GPU_Error_Check(cudaDeviceSynchronize()); - - // Perform Comparison - for (size_t i = 0; i < host_grid.size(); i++) { - // Check the left interface - double test_val = dev_interface_left.at(i); - double fiducial_val = - (fiducial_interface_left.at(direction).find(i) == fiducial_interface_left.at(direction).end()) - ? 0.0 - : fiducial_interface_left.at(direction)[i]; - - testing_utilities::Check_Results( - fiducial_val, test_val, - "left interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); - - // Check the right interface - test_val = dev_interface_right.at(i); - fiducial_val = (fiducial_interface_right.at(direction).find(i) == fiducial_interface_right.at(direction).end()) - ? 0.0 - : fiducial_interface_right.at(direction)[i]; - - testing_utilities::Check_Results( - fiducial_val, test_val, - "right interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); - } - } -} - TEST(tALLPpmcVLReconstructor, CorrectInputExpectCorrectOutput) { #ifdef DE @@ -241,16 +133,16 @@ TEST(tALLPpmcVLReconstructor, CorrectInputExpectCorrectOutput) // Launch kernel switch (direction) { case 0: - hipLaunchKernelGGL(PPMC_VL<0>, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), - dev_interface_right.data(), nx, ny, nz, gamma); + hipLaunchKernelGGL(PPMC_cuda<0>, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), + dev_interface_right.data(), nx, ny, nz, 0, 0, gamma); break; case 1: - hipLaunchKernelGGL(PPMC_VL<1>, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), - dev_interface_right.data(), nx, ny, nz, gamma); + hipLaunchKernelGGL(PPMC_cuda<1>, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), + dev_interface_right.data(), nx, ny, nz, 0, 0, gamma); break; case 2: - hipLaunchKernelGGL(PPMC_VL<2>, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), - dev_interface_right.data(), nx, ny, nz, gamma); + hipLaunchKernelGGL(PPMC_cuda<2>, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), + dev_interface_right.data(), nx, ny, nz, 0, 0, gamma); break; } GPU_Error_Check(); From 3e851d119c4790d630117ce561f85c5c5d5b3734 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 26 Apr 2024 06:27:42 -0400 Subject: [PATCH 26/29] Remove now unused PPMC related device functions --- src/reconstruction/reconstruction_internals.h | 127 ------------------ 1 file changed, 127 deletions(-) diff --git a/src/reconstruction/reconstruction_internals.h b/src/reconstruction/reconstruction_internals.h index dbb624e04..56c8a03c4 100644 --- a/src/reconstruction/reconstruction_internals.h +++ b/src/reconstruction/reconstruction_internals.h @@ -664,81 +664,6 @@ hydro_utilities::Primitive __device__ __host__ __inline__ Van_Leer_Limiter(hydro } // ===================================================================================================================== -// ===================================================================================================================== -/*! - * \brief Monotonize the parabolic interface states - * - * \param[in] cell_i The state in cell i - * \param[in] cell_im1 The state in cell i-1 - * \param[in] cell_ip1 The state in cell i+1 - * \param[in,out] interface_L_iph The left interface state at i+1/2 - * \param[in,out] interface_R_imh The right interface state at i-1/2 - * \return hydro_utilities::Primitive - */ -void __device__ __host__ __inline__ Monotonize_Parabolic_Interface(hydro_utilities::Primitive const &cell_i, - hydro_utilities::Primitive const &cell_im1, - hydro_utilities::Primitive const &cell_ip1, - hydro_utilities::Primitive &interface_L_iph, - hydro_utilities::Primitive &interface_R_imh) -{ - // The function that will actually do the monotozation. Note the return by refernce of the interface state - auto Monotonize = [](Real const &state_i, Real const &state_im1, Real const &state_ip1, Real &interface_L, - Real &interface_R) { - // Some terms we need for the comparisons - Real const term_1 = 6.0 * (interface_L - interface_R) * (state_i - 0.5 * (interface_R + interface_L)); - Real const term_2 = pow(interface_L - interface_R, 2.0); - - // First monotonicity constraint. Equations 47-49 in Stone et al. 2008 - if ((interface_L - state_i) * (state_i - interface_R) <= 0.0) { - interface_L = state_i; - interface_R = state_i; - } - // Second monotonicity constraint. Equations 50 & 51 in Stone et al. 2008 - else if (term_1 > term_2) { - interface_R = 3.0 * state_i - 2.0 * interface_L; - } - // Third monotonicity constraint. Equations 52 & 53 in Stone et al. 2008 - else if (term_1 < -term_2) { - interface_L = 3.0 * state_i - 2.0 * interface_R; - } - - // Bound the interface to lie between adjacent cell centered values - interface_R = fmax(fmin(state_i, state_im1), interface_R); - interface_R = fmin(fmax(state_i, state_im1), interface_R); - interface_L = fmax(fmin(state_i, state_ip1), interface_L); - interface_L = fmin(fmax(state_i, state_ip1), interface_L); - }; - - // Monotonize each interface state - Monotonize(cell_i.density, cell_im1.density, cell_ip1.density, interface_L_iph.density, interface_R_imh.density); - Monotonize(cell_i.velocity.x(), cell_im1.velocity.x(), cell_ip1.velocity.x(), interface_L_iph.velocity.x(), - interface_R_imh.velocity.x()); - Monotonize(cell_i.velocity.y(), cell_im1.velocity.y(), cell_ip1.velocity.y(), interface_L_iph.velocity.y(), - interface_R_imh.velocity.y()); - Monotonize(cell_i.velocity.z(), cell_im1.velocity.z(), cell_ip1.velocity.z(), interface_L_iph.velocity.z(), - interface_R_imh.velocity.z()); - Monotonize(cell_i.pressure, cell_im1.pressure, cell_ip1.pressure, interface_L_iph.pressure, interface_R_imh.pressure); - -#ifdef MHD - Monotonize(cell_i.magnetic.y(), cell_im1.magnetic.y(), cell_ip1.magnetic.y(), interface_L_iph.magnetic.y(), - interface_R_imh.magnetic.y()); - Monotonize(cell_i.magnetic.z(), cell_im1.magnetic.z(), cell_ip1.magnetic.z(), interface_L_iph.magnetic.z(), - interface_R_imh.magnetic.z()); -#endif // MHD - -#ifdef DE - Monotonize(cell_i.gas_energy_specific, cell_im1.gas_energy_specific, cell_ip1.gas_energy_specific, - interface_L_iph.gas_energy_specific, interface_R_imh.gas_energy_specific); -#endif // DE -#ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - Monotonize(cell_i.scalar_specific[i], cell_im1.scalar_specific[i], cell_ip1.scalar_specific[i], - interface_L_iph.scalar_specific[i], interface_R_imh.scalar_specific[i]); - } -#endif // SCALAR -} -// ===================================================================================================================== - // ===================================================================================================================== /*! * \brief Compute the interface state from the slope and cell centered state using linear interpolation @@ -779,58 +704,6 @@ hydro_utilities::Primitive __device__ __host__ __inline__ Calc_Interface_Linear( } // ===================================================================================================================== -// ===================================================================================================================== -/*! - * \brief Compute the interface state for the CTU version fo the reconstructor from the slope and cell centered state - * using parabolic interpolation - * - * \param[in] cell_i The state in cell i - * \param[in] cell_im1 The state in cell i-1 - * \param[in] slopes_i The slopes in cell i - * \param[in] slopes_im1 The slopes in cell i-1 - * \return hydro_utilities::Primitive The interface state - */ -hydro_utilities::Primitive __device__ __host__ __inline__ Calc_Interface_Parabolic( - hydro_utilities::Primitive const &cell_i, hydro_utilities::Primitive const &cell_im1, - hydro_utilities::Primitive const &slopes_i, hydro_utilities::Primitive const &slopes_im1) -{ - hydro_utilities::Primitive output; - - auto interface = [](Real const &state_i, Real const &state_im1, Real const &slope_i, Real const &slope_im1) -> Real { - return 0.5 * (state_i + state_im1) - (slope_i - slope_im1) / 6.0; - }; - - output.density = interface(cell_i.density, cell_im1.density, slopes_i.density, slopes_im1.density); - output.velocity.x() = - interface(cell_i.velocity.x(), cell_im1.velocity.x(), slopes_i.velocity.x(), slopes_im1.velocity.x()); - output.velocity.y() = - interface(cell_i.velocity.y(), cell_im1.velocity.y(), slopes_i.velocity.y(), slopes_im1.velocity.y()); - output.velocity.z() = - interface(cell_i.velocity.z(), cell_im1.velocity.z(), slopes_i.velocity.z(), slopes_im1.velocity.z()); - output.pressure = interface(cell_i.pressure, cell_im1.pressure, slopes_i.pressure, slopes_im1.pressure); - -#ifdef MHD - output.magnetic.y() = - interface(cell_i.magnetic.y(), cell_im1.magnetic.y(), slopes_i.magnetic.y(), slopes_im1.magnetic.y()); - output.magnetic.z() = - interface(cell_i.magnetic.z(), cell_im1.magnetic.z(), slopes_i.magnetic.z(), slopes_im1.magnetic.z()); -#endif // MHD - -#ifdef DE - output.gas_energy_specific = interface(cell_i.gas_energy_specific, cell_im1.gas_energy_specific, - slopes_i.gas_energy_specific, slopes_im1.gas_energy_specific); -#endif // DE -#ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - output.scalar_specific[i] = interface(cell_i.scalar_specific[i], cell_im1.scalar_specific[i], - slopes_i.scalar_specific[i], slopes_im1.scalar_specific[i]); - } -#endif // SCALAR - - return output; -} -// ===================================================================================================================== - // ===================================================================================================================== /*! * \brief Compute the PPM interface state for a given field/stencil. From d555bab6c92f07b7b084e958fa52c65d7b60ce87 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Fri, 26 Apr 2024 16:20:16 -0400 Subject: [PATCH 27/29] Merge PPMP and PPMC, add MHD support to PPMP --- src/integrators/VL_1D_cuda.cu | 11 +- src/integrators/VL_2D_cuda.cu | 15 +- src/integrators/VL_3D_cuda.cu | 28 +- src/integrators/simple_1D_cuda.cu | 12 +- src/integrators/simple_2D_cuda.cu | 15 +- src/integrators/simple_3D_cuda.cu | 19 +- src/reconstruction/plm_cuda.h | 6 +- .../{ppmc_cuda.cu => ppm_cuda.cu} | 65 +- src/reconstruction/ppm_cuda.h | 32 + .../{ppmc_cuda_tests.cu => ppm_cuda_tests.cu} | 87 +- src/reconstruction/ppmc_cuda.h | 32 - src/reconstruction/ppmp_cuda.cu | 775 ------------------ src/reconstruction/ppmp_cuda.h | 40 - src/reconstruction/reconstruction_internals.h | 101 +++ src/utils/error_handling.cpp | 5 - 15 files changed, 264 insertions(+), 979 deletions(-) rename src/reconstruction/{ppmc_cuda.cu => ppm_cuda.cu} (64%) create mode 100644 src/reconstruction/ppm_cuda.h rename src/reconstruction/{ppmc_cuda_tests.cu => ppm_cuda_tests.cu} (57%) delete mode 100644 src/reconstruction/ppmc_cuda.h delete mode 100644 src/reconstruction/ppmp_cuda.cu delete mode 100644 src/reconstruction/ppmp_cuda.h diff --git a/src/integrators/VL_1D_cuda.cu b/src/integrators/VL_1D_cuda.cu index 421351fba..fe91819dc 100644 --- a/src/integrators/VL_1D_cuda.cu +++ b/src/integrators/VL_1D_cuda.cu @@ -13,8 +13,7 @@ #include "../integrators/VL_1D_cuda.h" #include "../io/io.h" #include "../reconstruction/plm_cuda.h" - #include "../reconstruction/ppmc_cuda.h" - #include "../reconstruction/ppmp_cuda.h" + #include "../reconstruction/ppm_cuda.h" #include "../reconstruction/reconstruction.h" #include "../riemann_solvers/exact_cuda.h" #include "../riemann_solvers/hllc_cuda.h" @@ -85,12 +84,8 @@ void VL_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, Rea #if defined(PLMP) or defined(PLMC) hipLaunchKernelGGL(PLM_cuda<0>, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama); #endif // PLMP or PLMC - #ifdef PPMP - hipLaunchKernelGGL(PPMP_cuda, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, - gama, 0, n_fields); - #endif - #ifdef PPMC - hipLaunchKernelGGL(PPMC_cuda<0>, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama); + #if defined(PPMP) or defined(PPMC) + hipLaunchKernelGGL(PPM_cuda<0>, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama); #endif GPU_Error_Check(); diff --git a/src/integrators/VL_2D_cuda.cu b/src/integrators/VL_2D_cuda.cu index c993d695c..e510d3e55 100644 --- a/src/integrators/VL_2D_cuda.cu +++ b/src/integrators/VL_2D_cuda.cu @@ -11,8 +11,7 @@ #include "../hydro/hydro_cuda.h" #include "../integrators/VL_2D_cuda.h" #include "../reconstruction/plm_cuda.h" - #include "../reconstruction/ppmc_cuda.h" - #include "../reconstruction/ppmp_cuda.h" + #include "../reconstruction/ppm_cuda.h" #include "../reconstruction/reconstruction.h" #include "../riemann_solvers/exact_cuda.h" #include "../riemann_solvers/hllc_cuda.h" @@ -97,16 +96,10 @@ void VL_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int y_of hipLaunchKernelGGL(PLM_cuda<1>, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama); #endif // PLMP or PLMC - #ifdef PPMP - hipLaunchKernelGGL(PPMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, - dt, gama, 0, n_fields); - hipLaunchKernelGGL(PPMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, - dt, gama, 1, n_fields); - #endif // PPMP - #ifdef PPMC - hipLaunchKernelGGL(PPMC_cuda<0>, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, + #if defined(PPMP) or defined(PPMC) + hipLaunchKernelGGL(PPM_cuda<0>, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama); - hipLaunchKernelGGL(PPMC_cuda<1>, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, + hipLaunchKernelGGL(PPM_cuda<1>, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama); #endif // PPMC GPU_Error_Check(); diff --git a/src/integrators/VL_3D_cuda.cu b/src/integrators/VL_3D_cuda.cu index c98d784c7..d1fd47846 100644 --- a/src/integrators/VL_3D_cuda.cu +++ b/src/integrators/VL_3D_cuda.cu @@ -18,8 +18,7 @@ #include "../mhd/ct_electric_fields.h" #include "../mhd/magnetic_update.h" #include "../reconstruction/plm_cuda.h" - #include "../reconstruction/ppmc_cuda.h" - #include "../reconstruction/ppmp_cuda.h" + #include "../reconstruction/ppm_cuda.h" #include "../riemann_solvers/exact_cuda.h" #include "../riemann_solvers/hll_cuda.h" #include "../riemann_solvers/hllc_cuda.h" @@ -245,23 +244,14 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int hipLaunchKernelGGL(PLM_cuda<2>, plm_vl_launch_params.get_numBlocks(), plm_vl_launch_params.get_threadsPerBlock(), 0, 0, dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, dz, dt, gama); #endif // PLMP or PLMC - #ifdef PPMP - cuda_utilities::AutomaticLaunchParams static const ppmp_launch_params(PPMP_cuda, n_cells); - hipLaunchKernelGGL(PPMP_cuda, ppmp_launch_params.get_numBlocks(), ppmp_launch_params.get_threadsPerBlock(), 0, 0, - dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); - hipLaunchKernelGGL(PPMP_cuda, ppmp_launch_params.get_numBlocks(), ppmp_launch_params.get_threadsPerBlock(), 0, 0, - dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields); - hipLaunchKernelGGL(PPMP_cuda, ppmp_launch_params.get_numBlocks(), ppmp_launch_params.get_threadsPerBlock(), 0, 0, - dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, n_fields); - #endif // PPMP - #ifdef PPMC - cuda_utilities::AutomaticLaunchParams static const ppmc_vl_launch_params(PPMC_cuda<0>, n_cells); - hipLaunchKernelGGL(PPMC_cuda<0>, ppmc_vl_launch_params.get_numBlocks(), ppmc_vl_launch_params.get_threadsPerBlock(), - 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama); - hipLaunchKernelGGL(PPMC_cuda<1>, ppmc_vl_launch_params.get_numBlocks(), ppmc_vl_launch_params.get_threadsPerBlock(), - 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama); - hipLaunchKernelGGL(PPMC_cuda<2>, ppmc_vl_launch_params.get_numBlocks(), ppmc_vl_launch_params.get_threadsPerBlock(), - 0, 0, dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, dz, dt, gama); + #if defined(PPMP) or defined(PPMC) + cuda_utilities::AutomaticLaunchParams static const ppm_launch_params(PPM_cuda<0>, n_cells); + hipLaunchKernelGGL(PPM_cuda<0>, ppm_launch_params.get_numBlocks(), ppm_launch_params.get_threadsPerBlock(), 0, 0, + dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama); + hipLaunchKernelGGL(PPM_cuda<1>, ppm_launch_params.get_numBlocks(), ppm_launch_params.get_threadsPerBlock(), 0, 0, + dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama); + hipLaunchKernelGGL(PPM_cuda<2>, ppm_launch_params.get_numBlocks(), ppm_launch_params.get_threadsPerBlock(), 0, 0, + dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, dz, dt, gama); #endif // PPMC GPU_Error_Check(); diff --git a/src/integrators/simple_1D_cuda.cu b/src/integrators/simple_1D_cuda.cu index 3137a2f27..f83aff576 100644 --- a/src/integrators/simple_1D_cuda.cu +++ b/src/integrators/simple_1D_cuda.cu @@ -11,8 +11,7 @@ #include "../integrators/simple_1D_cuda.h" #include "../io/io.h" #include "../reconstruction/plm_cuda.h" -#include "../reconstruction/ppmc_cuda.h" -#include "../reconstruction/ppmp_cuda.h" +#include "../reconstruction/ppm_cuda.h" #include "../reconstruction/reconstruction.h" #include "../riemann_solvers/exact_cuda.h" #include "../riemann_solvers/hllc_cuda.h" @@ -56,13 +55,8 @@ void Simple_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, hipLaunchKernelGGL(PLM_cuda<0>, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama); GPU_Error_Check(); #endif // PLMP or PLMC -#ifdef PPMP - hipLaunchKernelGGL(PPMP_cuda, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, - 0, n_fields); - GPU_Error_Check(); -#endif -#ifdef PPMC - hipLaunchKernelGGL(PPMC_cuda<0>, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama); +#if defined(PPMP) or defined(PPMC) + hipLaunchKernelGGL(PPM_cuda<0>, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama); GPU_Error_Check(); #endif diff --git a/src/integrators/simple_2D_cuda.cu b/src/integrators/simple_2D_cuda.cu index bdfe1e25e..9873379c1 100644 --- a/src/integrators/simple_2D_cuda.cu +++ b/src/integrators/simple_2D_cuda.cu @@ -9,8 +9,7 @@ #include "../hydro/hydro_cuda.h" #include "../integrators/simple_2D_cuda.h" #include "../reconstruction/plm_cuda.h" -#include "../reconstruction/ppmc_cuda.h" -#include "../reconstruction/ppmp_cuda.h" +#include "../reconstruction/ppm_cuda.h" #include "../reconstruction/reconstruction.h" #include "../riemann_solvers/exact_cuda.h" #include "../riemann_solvers/hllc_cuda.h" @@ -57,15 +56,9 @@ void Simple_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int hipLaunchKernelGGL(PLM_cuda<0>, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama); hipLaunchKernelGGL(PLM_cuda<1>, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama); #endif // PLMP or PLMC -#ifdef PPMP - hipLaunchKernelGGL(PPMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, - gama, 0, n_fields); - hipLaunchKernelGGL(PPMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, - gama, 1, n_fields); -#endif -#ifdef PPMC - hipLaunchKernelGGL(PPMC_cuda<0>, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama); - hipLaunchKernelGGL(PPMC_cuda<1>, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama); +#if defined(PPMP) or defined(PPMC) + hipLaunchKernelGGL(PPM_cuda<0>, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama); + hipLaunchKernelGGL(PPM_cuda<1>, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama); #endif GPU_Error_Check(); diff --git a/src/integrators/simple_3D_cuda.cu b/src/integrators/simple_3D_cuda.cu index 671e61768..56c575b81 100644 --- a/src/integrators/simple_3D_cuda.cu +++ b/src/integrators/simple_3D_cuda.cu @@ -13,8 +13,7 @@ #include "../integrators/simple_3D_cuda.h" #include "../io/io.h" #include "../reconstruction/plm_cuda.h" - #include "../reconstruction/ppmc_cuda.h" - #include "../reconstruction/ppmp_cuda.h" + #include "../reconstruction/ppm_cuda.h" #include "../reconstruction/reconstruction.h" #include "../riemann_solvers/exact_cuda.h" #include "../riemann_solvers/hll_cuda.h" @@ -89,18 +88,10 @@ void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, hipLaunchKernelGGL(PLM_cuda<1>, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama); hipLaunchKernelGGL(PLM_cuda<2>, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lz, Q_Rz, nx, ny, nz, dz, dt, gama); #endif // PLMP or PLMC - #ifdef PPMP - hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, - gama, 0, n_fields); - hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, - gama, 1, n_fields); - hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, - gama, 2, n_fields); - #endif // PPMP - #ifdef PPMC - hipLaunchKernelGGL(PPMC_cuda<0>, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama); - hipLaunchKernelGGL(PPMC_cuda<1>, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama); - hipLaunchKernelGGL(PPMC_cuda<2>, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lz, Q_Rz, nx, ny, nz, dz, dt, gama); + #if defined(PPMP) or defined(PPMC) + hipLaunchKernelGGL(PPM_cuda<0>, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama); + hipLaunchKernelGGL(PPM_cuda<1>, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama); + hipLaunchKernelGGL(PPM_cuda<2>, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lz, Q_Rz, nx, ny, nz, dz, dt, gama); GPU_Error_Check(); #endif // PPMC diff --git a/src/reconstruction/plm_cuda.h b/src/reconstruction/plm_cuda.h index cd6aced72..9eedd38dd 100644 --- a/src/reconstruction/plm_cuda.h +++ b/src/reconstruction/plm_cuda.h @@ -2,8 +2,8 @@ * \brief Declarations of the cuda plm kernels */ -#ifndef PLMC_CUDA_H -#define PLMC_CUDA_H +#ifndef PLM_CUDA_H +#define PLM_CUDA_H #include "../global/global.h" #include "../grid/grid_enum.h" @@ -314,4 +314,4 @@ auto __device__ __inline__ PLM_Reconstruction(Real *dev_conserved, int const xid } } // namespace reconstruction -#endif // PLMC_CUDA_H +#endif // PLM_CUDA_H diff --git a/src/reconstruction/ppmc_cuda.cu b/src/reconstruction/ppm_cuda.cu similarity index 64% rename from src/reconstruction/ppmc_cuda.cu rename to src/reconstruction/ppm_cuda.cu index 49eed45c5..6c974233a 100644 --- a/src/reconstruction/ppmc_cuda.cu +++ b/src/reconstruction/ppm_cuda.cu @@ -1,4 +1,4 @@ -/*! \file ppmc_cuda.cu +/*! \file ppm_cuda.cu * \brief Functions definitions for the ppm kernels, using characteristic tracing. Written following Stone et al. 2008. */ @@ -6,7 +6,7 @@ #include "../global/global.h" #include "../global/global_cuda.h" -#include "../reconstruction/ppmc_cuda.h" +#include "../reconstruction/ppm_cuda.h" #include "../reconstruction/reconstruction_internals.h" #include "../utils/gpu.hpp" #include "../utils/hydro_utilities.h" @@ -17,8 +17,8 @@ // ===================================================================================================================== template -__global__ __launch_bounds__(TPB) void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, - int ny, int nz, Real dx, Real dt, Real gamma) +__global__ __launch_bounds__(TPB) void PPM_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, + int ny, int nz, Real dx, Real dt, Real gamma) { // get a thread ID int const thread_id = threadIdx.x + blockIdx.x * blockDim.x; @@ -74,6 +74,7 @@ __global__ __launch_bounds__(TPB) void PPMC_cuda(Real *dev_conserved, Real *dev_ hydro_utilities::Primitive const cell_ip2 = hydro_utilities::Load_Cell_Primitive( dev_conserved, xid + 2 * int(dir == 0), yid + 2 * int(dir == 1), zid + 2 * int(dir == 2), nx, ny, n_cells, gamma); +#ifdef PPMC // Compute the eigenvectors reconstruction::EigenVecs const eigenvectors = reconstruction::Compute_Eigenvectors(cell_i, gamma); @@ -98,32 +99,9 @@ __global__ __launch_bounds__(TPB) void PPMC_cuda(Real *dev_conserved, Real *dev_ reconstruction::Primitive_To_Characteristic(cell_i, cell_ip2, eigenvectors, gamma); // Compute the interface states for each field - reconstruction::Characteristic interface_R_imh_characteristic, interface_L_iph_characteristic; - - reconstruction::PPM_Single_Variable(cell_im2_characteristic.a0, cell_im1_characteristic.a0, cell_i_characteristic.a0, - cell_ip1_characteristic.a0, cell_ip2_characteristic.a0, - interface_L_iph_characteristic.a0, interface_R_imh_characteristic.a0); - reconstruction::PPM_Single_Variable(cell_im2_characteristic.a1, cell_im1_characteristic.a1, cell_i_characteristic.a1, - cell_ip1_characteristic.a1, cell_ip2_characteristic.a1, - interface_L_iph_characteristic.a1, interface_R_imh_characteristic.a1); - reconstruction::PPM_Single_Variable(cell_im2_characteristic.a2, cell_im1_characteristic.a2, cell_i_characteristic.a2, - cell_ip1_characteristic.a2, cell_ip2_characteristic.a2, - interface_L_iph_characteristic.a2, interface_R_imh_characteristic.a2); - reconstruction::PPM_Single_Variable(cell_im2_characteristic.a3, cell_im1_characteristic.a3, cell_i_characteristic.a3, - cell_ip1_characteristic.a3, cell_ip2_characteristic.a3, - interface_L_iph_characteristic.a3, interface_R_imh_characteristic.a3); - reconstruction::PPM_Single_Variable(cell_im2_characteristic.a4, cell_im1_characteristic.a4, cell_i_characteristic.a4, - cell_ip1_characteristic.a4, cell_ip2_characteristic.a4, - interface_L_iph_characteristic.a4, interface_R_imh_characteristic.a4); - -#ifdef MHD - reconstruction::PPM_Single_Variable(cell_im2_characteristic.a5, cell_im1_characteristic.a5, cell_i_characteristic.a5, - cell_ip1_characteristic.a5, cell_ip2_characteristic.a5, - interface_L_iph_characteristic.a5, interface_R_imh_characteristic.a5); - reconstruction::PPM_Single_Variable(cell_im2_characteristic.a6, cell_im1_characteristic.a6, cell_i_characteristic.a6, - cell_ip1_characteristic.a6, cell_ip2_characteristic.a6, - interface_L_iph_characteristic.a6, interface_R_imh_characteristic.a6); -#endif // MHD + auto const [interface_L_iph_characteristic, interface_R_imh_characteristic] = + reconstruction::PPM_Interfaces(cell_im2_characteristic, cell_im1_characteristic, cell_i_characteristic, + cell_ip1_characteristic, cell_ip2_characteristic); // Convert back to primitive variables hydro_utilities::Primitive interface_L_iph = @@ -132,20 +110,24 @@ __global__ __launch_bounds__(TPB) void PPMC_cuda(Real *dev_conserved, Real *dev_ reconstruction::Characteristic_To_Primitive(cell_i, interface_R_imh_characteristic, eigenvectors, gamma); // Compute the interfaces for the variables that don't have characteristics -#ifdef DE + #ifdef DE reconstruction::PPM_Single_Variable(cell_im2.gas_energy_specific, cell_im1.gas_energy_specific, cell_i.gas_energy_specific, cell_ip1.gas_energy_specific, cell_ip2.gas_energy_specific, interface_L_iph.gas_energy_specific, interface_R_imh.gas_energy_specific); -#endif // DE -#ifdef SCALAR + #endif // DE + #ifdef SCALAR for (int i = 0; i < NSCALARS; i++) { reconstruction::PPM_Single_Variable(cell_im2.scalar_specific[i], cell_im1.scalar_specific[i], cell_i.scalar_specific[i], cell_ip1.scalar_specific[i], cell_ip2.scalar_specific[i], interface_L_iph.scalar_specific[i], interface_R_imh.scalar_specific[i]); } -#endif // SCALAR + #endif // SCALAR +#else // PPMC + auto [interface_L_iph, interface_R_imh] = + reconstruction::PPM_Interfaces(cell_im2, cell_im1, cell_i, cell_ip1, cell_ip2); +#endif // PPMC // Do the characteristic tracing #ifndef VL @@ -169,13 +151,10 @@ __global__ __launch_bounds__(TPB) void PPMC_cuda(Real *dev_conserved, Real *dev_ reconstruction::Write_Data(interface_R_imh, dev_bounds_R, dev_conserved, id, n_cells, o1, o2, o3, gamma); } // Instantiate the relevant template specifications -template __global__ __launch_bounds__(TPB) void PPMC_cuda<0>(Real *dev_conserved, Real *dev_bounds_L, - Real *dev_bounds_R, int nx, int ny, int nz, Real dx, - Real dt, Real gamma); -template __global__ __launch_bounds__(TPB) void PPMC_cuda<1>(Real *dev_conserved, Real *dev_bounds_L, - Real *dev_bounds_R, int nx, int ny, int nz, Real dx, - Real dt, Real gamma); -template __global__ __launch_bounds__(TPB) void PPMC_cuda<2>(Real *dev_conserved, Real *dev_bounds_L, - Real *dev_bounds_R, int nx, int ny, int nz, Real dx, - Real dt, Real gamma); +template __global__ __launch_bounds__(TPB) void PPM_cuda<0>(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, + int nx, int ny, int nz, Real dx, Real dt, Real gamma); +template __global__ __launch_bounds__(TPB) void PPM_cuda<1>(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, + int nx, int ny, int nz, Real dx, Real dt, Real gamma); +template __global__ __launch_bounds__(TPB) void PPM_cuda<2>(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, + int nx, int ny, int nz, Real dx, Real dt, Real gamma); // ===================================================================================================================== diff --git a/src/reconstruction/ppm_cuda.h b/src/reconstruction/ppm_cuda.h new file mode 100644 index 000000000..c84936c5c --- /dev/null +++ b/src/reconstruction/ppm_cuda.h @@ -0,0 +1,32 @@ +/*! \file ppm_cuda.h + * \brief Declarations of the cuda ppm kernels, characteristic reconstruction + * version. */ + +#ifndef PPM_CUDA_H +#define PPM_CUDA_H + +#include "../global/global.h" + +/*! + * \brief Computes the left and right interface states using PPM with limiting in the primitive or characteristic + * variables. This uses the PPM method described in Felker & Stone 2018 "A fourth-order accurate finite volume method + * for ideal MHD via upwind constrained transport". This method computes the 3rd order interface then applies a mixture + * of monoticity constraints from from Colella & Sekora 2008, McCorquodale & Colella 2011, and Colella et al. 2011. We + * found that this newer method and limiters was more stable, less oscillatory, and faster than the method described in + * Stone et al. 2008 which was previously used. The difference is most pronounced in the Brio & Wu shock tube where the + * PPM oscillations are much smaller using this method. + * + * \tparam dir The direction to reconstruct. 0=X, 1=Y, 2=Z + * \param[in] dev_conserved The conserved variable array + * \param[out] dev_bounds_L The array of left interfaces + * \param[out] dev_bounds_R The array of right interfaces + * \param[in] nx The number of cells in the X-direction + * \param[in] ny The number of cells in the Y-direction + * \param[in] nz The number of cells in the Z-direction + * \param[in] gamma The adiabatic index + */ +template +__global__ __launch_bounds__(TPB) void PPM_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, + int ny, int nz, Real dx, Real dt, Real gamma); + +#endif // PPM_CUDA_H diff --git a/src/reconstruction/ppmc_cuda_tests.cu b/src/reconstruction/ppm_cuda_tests.cu similarity index 57% rename from src/reconstruction/ppmc_cuda_tests.cu rename to src/reconstruction/ppm_cuda_tests.cu index 03c4c9223..c43fc2d0f 100644 --- a/src/reconstruction/ppmc_cuda_tests.cu +++ b/src/reconstruction/ppm_cuda_tests.cu @@ -1,6 +1,6 @@ /*! - * \file ppmc_cuda_tests.cu - * \brief Tests for the contents of ppmc_cuda.h and ppmc_cuda.cu + * \file ppm_cuda_tests.cu + * \brief Tests for the contents of ppm_cuda.h and ppm_cuda.cu * */ @@ -17,12 +17,12 @@ // Local Includes #include "../global/global.h" #include "../io/io.h" -#include "../reconstruction/ppmc_cuda.h" +#include "../reconstruction/ppm_cuda.h" #include "../utils/DeviceVector.h" #include "../utils/hydro_utilities.h" #include "../utils/testing_utilities.h" -TEST(tALLPpmcVLReconstructor, CorrectInputExpectCorrectOutput) +TEST(tALLPpmReconstructor, CorrectInputExpectCorrectOutput) { #ifdef DE /// This test doesn't support Dual Energy. It wouldn't be that hard to add support for DE but the DE parts of the @@ -58,6 +58,7 @@ TEST(tALLPpmcVLReconstructor, CorrectInputExpectCorrectOutput) // Fiducial Data #ifdef MHD + #ifdef PPMC std::vector> fiducial_interface_left = {{{86, 3.6926886385390683}, {302, 2.3022467009220993}, {518, 2.3207781368125389}, @@ -101,7 +102,53 @@ TEST(tALLPpmcVLReconstructor, CorrectInputExpectCorrectOutput) {914, 14.017699282483312}, {1130, 1.5292690020097823}, {1346, -0.12121484974901264}}}; -#else // not MHD + #else // PPMC + std::vector> fiducial_interface_left = {{{86, 3.1608646282711232}, + {302, 0.84444422521258167}, + {518, 1.2459789393105685}, + {734, 2.2721401574613527}, + {950, 7.7508629541568022}, + {1166, 0.54567382624989913}, + {1382, 3.5147238706385462}}, + {{86, 3.6292858956631076}, + {302, 1.8316886259802778}, + {518, 2.2809308293670103}, + {734, 3.6939841768696002}, + {950, 10.405768833830281}, + {1166, 3.5147238706385462}, + {1382, 1.234487908582131}}, + {{86, 3.1608646282711232}, + {302, 0.84444422521258167}, + {518, 1.9865377887960551}, + {734, 1.1540870822905045}, + {950, 4.8971025794015812}, + {1166, 1.234487908582131}, + {1382, 0.54567382624989913}}}; + + std::vector> fiducial_interface_right = {{{301, 0.84444422521258167}, + {85, 3.1608646282711232}, + {733, 2.2721401574613527}, + {517, 3.2701799807980008}, + {949, 10.497902459040514}, + {1165, 0.54567382624989913}, + {1381, 3.5147238706385462}}, + {{80, 2.245959460360242}, + {296, 0.33326844362749702}, + {512, 1.4115388872411132}, + {728, 0.72702830835784316}, + {944, 7.5422056995631559}, + {1160, 3.5147238706385462}, + {1376, 1.234487908582131}}, + {{50, 3.1608646282711232}, + {266, 0.84444422521258167}, + {482, 1.9865377887960551}, + {698, 4.1768690252280765}, + {914, 14.823997016980297}, + {1130, 1.234487908582131}, + {1346, 0.54567382624989913}}}; + #endif // PPMC +#else // not MHD + #ifdef PPMC std::vector> fiducial_interface_left = { {{86, 4.155160222900312}, {302, 1.1624633361407897}, {518, 1.6379195998743412}, {734, 2.9868746414179093}}, {{86, 4.1795874335665655}, {302, 2.1094239978455054}, {518, 2.6811988240843849}, {734, 4.2540957888954054}}, @@ -122,7 +169,29 @@ TEST(tALLPpmcVLReconstructor, CorrectInputExpectCorrectOutput) {266, 0.75482470285166003}, {482, 1.7757096932649317}, {698, 3.6101832818706452}}}; -#endif // MHD + #else // PPMC + std::vector> fiducial_interface_left = { + {{86, 3.1608646282711232}, {302, 0.84444422521258167}, {518, 1.2459789393105685}, {734, 2.2721401574613527}}, + {{86, 3.6292858956631076}, {302, 1.8316886259802778}, {518, 2.2809308293670103}, {734, 3.6939841768696002}}, + {{86, 3.1608646282711232}, {302, 0.84444422521258167}, {518, 1.9865377887960551}, {734, 1.1540870822905045}}}; + + std::vector> fiducial_interface_right = {{{54, 3.4283787020401455}, + {85, 3.1608646282711232}, + {301, 0.84444422521258167}, + {517, 3.2701799807980008}, + {733, 2.2721401574613527}}, + {{54, 5.3122571267813665}, + {80, 2.245959460360242}, + {296, 0.33326844362749702}, + {512, 1.4115388872411132}, + {728, 0.72702830835784316}}, + {{50, 3.1608646282711232}, + {54, 3.2010935757366896}, + {266, 0.84444422521258167}, + {482, 1.9865377887960551}, + {698, 4.1768690252280765}}}; + #endif // PPMC +#endif // MHD // Loop over different directions for (size_t direction = 0; direction < 3; direction++) { @@ -133,15 +202,15 @@ TEST(tALLPpmcVLReconstructor, CorrectInputExpectCorrectOutput) // Launch kernel switch (direction) { case 0: - hipLaunchKernelGGL(PPMC_cuda<0>, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), + hipLaunchKernelGGL(PPM_cuda<0>, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), dev_interface_right.data(), nx, ny, nz, 0, 0, gamma); break; case 1: - hipLaunchKernelGGL(PPMC_cuda<1>, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), + hipLaunchKernelGGL(PPM_cuda<1>, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), dev_interface_right.data(), nx, ny, nz, 0, 0, gamma); break; case 2: - hipLaunchKernelGGL(PPMC_cuda<2>, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), + hipLaunchKernelGGL(PPM_cuda<2>, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), dev_interface_right.data(), nx, ny, nz, 0, 0, gamma); break; } diff --git a/src/reconstruction/ppmc_cuda.h b/src/reconstruction/ppmc_cuda.h deleted file mode 100644 index 23d1f28db..000000000 --- a/src/reconstruction/ppmc_cuda.h +++ /dev/null @@ -1,32 +0,0 @@ -/*! \file ppmc_cuda.h - * \brief Declarations of the cuda ppm kernels, characteristic reconstruction - * version. */ - -#ifndef PPMC_CUDA_H -#define PPMC_CUDA_H - -#include "../global/global.h" - -/*! - * \brief Computes the left and right interface states using PPM with limiting in the characteristic variables. This - * uses the PPM method described in Felker & Stone 2018 "A fourth-order accurate finite volume method for ideal MHD via - * upwind constrained transport". This method computes the 3rd order interface then applies a mixture of monoticity - * constraints from from Colella & Sekora 2008, McCorquodale & Colella 2011, and Colella et al. 2011. We found that this - * newer method and limiters was more stable, less oscillatory, and faster than the method described in Stone et al. - * 2008 which was previously used. The difference is most pronounced in the Brio & Wu shock tube where the PPM - * oscillations are much smaller using this method. - * - * \tparam dir The direction to reconstruct. 0=X, 1=Y, 2=Z - * \param[in] dev_conserved The conserved variable array - * \param[out] dev_bounds_L The array of left interfaces - * \param[out] dev_bounds_R The array of right interfaces - * \param[in] nx The number of cells in the X-direction - * \param[in] ny The number of cells in the Y-direction - * \param[in] nz The number of cells in the Z-direction - * \param[in] gamma The adiabatic index - */ -template -__global__ __launch_bounds__(TPB) void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, - int ny, int nz, Real dx, Real dt, Real gamma); - -#endif // PPMC_CUDA_H diff --git a/src/reconstruction/ppmp_cuda.cu b/src/reconstruction/ppmp_cuda.cu deleted file mode 100644 index 2038f215a..000000000 --- a/src/reconstruction/ppmp_cuda.cu +++ /dev/null @@ -1,775 +0,0 @@ -/*! \file ppmp_cuda.cu - * \brief Definitions of the piecewise parabolic reconstruction (Fryxell 2000) - functions with limiting in the primitive variables. */ - -#ifdef PPMP - - #include - - #include "../global/global.h" - #include "../global/global_cuda.h" - #include "../reconstruction/ppmp_cuda.h" - #include "../utils/gpu.hpp" - - #ifdef DE // PRESSURE_DE - #include "../utils/hydro_utilities.h" - #endif - -// #define STEEPENING -// #define FLATTENING -// Note: Errors when using FLATTENING, need to check the ghost cells - -/*! \fn __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real - *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int - n_fields) - * \brief When passed a stencil of conserved variables, returns the left and - right boundary values for the interface calculated using ppm with limiting in - the primitive variables. */ -__global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, - int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields) -{ - int n_cells = nx * ny * nz; - int o1, o2, o3; - if (dir == 0) { - o1 = 1; - o2 = 2; - o3 = 3; - } - if (dir == 1) { - o1 = 2; - o2 = 3; - o3 = 1; - } - if (dir == 2) { - o1 = 3; - o2 = 1; - o3 = 2; - } - - // declare primitive variables in the stencil - Real d_i, vx_i, vy_i, vz_i, p_i; - Real d_imo, vx_imo, vy_imo, vz_imo, p_imo; - Real d_ipo, vx_ipo, vy_ipo, vz_ipo, p_ipo; - Real d_imt, vx_imt, vy_imt, vz_imt, p_imt; - Real d_ipt, vx_ipt, vy_ipt, vz_ipt, p_ipt; - #ifdef FLATTENING - Real p_imth, p_ipth; - #endif // FLATTENING - - // declare left and right interface values - Real d_L, vx_L, vy_L, vz_L, p_L; - Real d_R, vx_R, vy_R, vz_R, p_R; - - // declare other variables - Real del_q_imo, del_q_i, del_q_ipo; - - #ifndef VL - // #ifdef CTU - Real cs, cl, cr; // sound speed in cell i, and at left and right boundaries - Real del_d, del_vx, del_vy, del_vz, del_p; // "slope" accross cell i - Real d_6, vx_6, vy_6, vz_6, p_6; - Real beta_m, beta_0, beta_p; - Real alpha_m, alpha_0, alpha_p; - Real lambda_m, lambda_0, lambda_p; // speed of characteristics - Real dL_m, vxL_m, pL_m; - Real dL_0, vyL_0, vzL_0, pL_0; - Real vxL_p, pL_p; - Real vxR_m, pR_m; - Real dR_0, vyR_0, vzR_0, pR_0; - Real dR_p, vxR_p, pR_p; - Real chi_L_m, chi_L_0, chi_L_p; - Real chi_R_m, chi_R_0, chi_R_p; - #endif // CTU - - #ifdef DE - Real ge_i, ge_imo, ge_ipo, ge_imt, ge_ipt, ge_L, ge_R, E_kin, E, dge; - #ifndef VL - // #ifdef CTU - Real del_ge, ge_6, geL_0, geR_0; - #endif // CTU - #endif // DE - - #ifdef SCALAR - Real scalar_i[NSCALARS], scalar_imo[NSCALARS], scalar_ipo[NSCALARS], scalar_imt[NSCALARS], scalar_ipt[NSCALARS]; - Real scalar_L[NSCALARS], scalar_R[NSCALARS]; - #ifndef VL - // #ifdef CTU - Real del_scalar[NSCALARS], scalar_6[NSCALARS], scalarL_0[NSCALARS], scalarR_0[NSCALARS]; - #endif // CTU - #endif // SCALAR - - // get a thread ID - int blockId = blockIdx.x + blockIdx.y * gridDim.x; - int tid = threadIdx.x + blockId * blockDim.x; - int id; - int zid = tid / (nx * ny); - int yid = (tid - zid * nx * ny) / nx; - int xid = tid - zid * nx * ny - yid * nx; - - int xs, xe, ys, ye, zs, ze; - - // - // if (dir == 0) { - // xs = 3; xe = nx-4; - // ys = 0; ye = ny; - // zs = 0; ze = nz; - // } - // if (dir == 1) { - // xs = 0; xe = nx; - // ys = 3; ye = ny-4; - // zs = 0; ze = nz; - // } - // if (dir == 2) { - // xs = 0; xe = nx; - // ys = 0; ye = ny; - // zs = 3; ze = nz-4; - // } - - // Ignore only the 2 ghost cells on each side ( instead of ignoring 3 ghost - // cells on each side ) - if (dir == 0) { - xs = 2; - xe = nx - 3; - ys = 0; - ye = ny; - zs = 0; - ze = nz; - } - if (dir == 1) { - xs = 0; - xe = nx; - ys = 2; - ye = ny - 3; - zs = 0; - ze = nz; - } - if (dir == 2) { - xs = 0; - xe = nx; - ys = 0; - ye = ny; - zs = 2; - ze = nz - 3; - } - - if (xid >= xs && xid < xe && yid >= ys && yid < ye && zid >= zs && zid < ze) { - // load the 5-cell stencil into registers - // cell i - id = xid + yid * nx + zid * nx * ny; - d_i = dev_conserved[id]; - vx_i = dev_conserved[o1 * n_cells + id] / d_i; - vy_i = dev_conserved[o2 * n_cells + id] / d_i; - vz_i = dev_conserved[o3 * n_cells + id] / d_i; - #ifdef DE // PRESSURE_DE - E = dev_conserved[4 * n_cells + id]; - E_kin = 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i); - dge = dev_conserved[(n_fields - 1) * n_cells + id]; - p_i = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); - #else - p_i = (dev_conserved[4 * n_cells + id] - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * (gamma - 1.0); - #endif // PRESSURE_DE - p_i = fmax(p_i, (Real)TINY_NUMBER); - #ifdef DE - ge_i = dge / d_i; - #endif // DE - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - scalar_i[i] = dev_conserved[(5 + i) * n_cells + id] / d_i; - } - #endif // SCALAR - // cell i-1 - if (dir == 0) id = xid - 1 + yid * nx + zid * nx * ny; - if (dir == 1) id = xid + (yid - 1) * nx + zid * nx * ny; - if (dir == 2) id = xid + yid * nx + (zid - 1) * nx * ny; - d_imo = dev_conserved[id]; - vx_imo = dev_conserved[o1 * n_cells + id] / d_imo; - vy_imo = dev_conserved[o2 * n_cells + id] / d_imo; - vz_imo = dev_conserved[o3 * n_cells + id] / d_imo; - #ifdef DE // PRESSURE_DE - E = dev_conserved[4 * n_cells + id]; - E_kin = 0.5 * d_imo * (vx_imo * vx_imo + vy_imo * vy_imo + vz_imo * vz_imo); - dge = dev_conserved[(n_fields - 1) * n_cells + id]; - p_imo = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); - #else - p_imo = (dev_conserved[4 * n_cells + id] - 0.5 * d_imo * (vx_imo * vx_imo + vy_imo * vy_imo + vz_imo * vz_imo)) * - (gamma - 1.0); - #endif // PRESSURE_DE - p_imo = fmax(p_imo, (Real)TINY_NUMBER); - #ifdef DE - ge_imo = dge / d_imo; - #endif // DE - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - scalar_imo[i] = dev_conserved[(5 + i) * n_cells + id] / d_imo; - } - #endif // SCALAR - // cell i+1 - if (dir == 0) id = xid + 1 + yid * nx + zid * nx * ny; - if (dir == 1) id = xid + (yid + 1) * nx + zid * nx * ny; - if (dir == 2) id = xid + yid * nx + (zid + 1) * nx * ny; - d_ipo = dev_conserved[id]; - vx_ipo = dev_conserved[o1 * n_cells + id] / d_ipo; - vy_ipo = dev_conserved[o2 * n_cells + id] / d_ipo; - vz_ipo = dev_conserved[o3 * n_cells + id] / d_ipo; - #ifdef DE // PRESSURE_DE - E = dev_conserved[4 * n_cells + id]; - E_kin = 0.5 * d_ipo * (vx_ipo * vx_ipo + vy_ipo * vy_ipo + vz_ipo * vz_ipo); - dge = dev_conserved[(n_fields - 1) * n_cells + id]; - p_ipo = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); - #else - p_ipo = (dev_conserved[4 * n_cells + id] - 0.5 * d_ipo * (vx_ipo * vx_ipo + vy_ipo * vy_ipo + vz_ipo * vz_ipo)) * - (gamma - 1.0); - #endif // PRESSURE_DE - p_ipo = fmax(p_ipo, (Real)TINY_NUMBER); - #ifdef DE - ge_ipo = dge / d_ipo; - #endif // DE - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - scalar_ipo[i] = dev_conserved[(5 + i) * n_cells + id] / d_ipo; - } - #endif // SCALAR - // cell i-2 - if (dir == 0) id = xid - 2 + yid * nx + zid * nx * ny; - if (dir == 1) id = xid + (yid - 2) * nx + zid * nx * ny; - if (dir == 2) id = xid + yid * nx + (zid - 2) * nx * ny; - d_imt = dev_conserved[id]; - vx_imt = dev_conserved[o1 * n_cells + id] / d_imt; - vy_imt = dev_conserved[o2 * n_cells + id] / d_imt; - vz_imt = dev_conserved[o3 * n_cells + id] / d_imt; - #ifdef DE // PRESSURE_DE - E = dev_conserved[4 * n_cells + id]; - E_kin = 0.5 * d_imt * (vx_imt * vx_imt + vy_imt * vy_imt + vz_imt * vz_imt); - dge = dev_conserved[(n_fields - 1) * n_cells + id]; - p_imt = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); - #else - p_imt = (dev_conserved[4 * n_cells + id] - 0.5 * d_imt * (vx_imt * vx_imt + vy_imt * vy_imt + vz_imt * vz_imt)) * - (gamma - 1.0); - #endif // PRESSURE_DE - p_imt = fmax(p_imt, (Real)TINY_NUMBER); - #ifdef DE - ge_imt = dge / d_imt; - #endif // DE - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - scalar_imt[i] = dev_conserved[(5 + i) * n_cells + id] / d_imt; - } - #endif // SCALAR - // cell i+2 - if (dir == 0) id = xid + 2 + yid * nx + zid * nx * ny; - if (dir == 1) id = xid + (yid + 2) * nx + zid * nx * ny; - if (dir == 2) id = xid + yid * nx + (zid + 2) * nx * ny; - d_ipt = dev_conserved[id]; - vx_ipt = dev_conserved[o1 * n_cells + id] / d_ipt; - vy_ipt = dev_conserved[o2 * n_cells + id] / d_ipt; - vz_ipt = dev_conserved[o3 * n_cells + id] / d_ipt; - #ifdef DE // PRESSURE_DE - E = dev_conserved[4 * n_cells + id]; - E_kin = 0.5 * d_ipt * (vx_ipt * vx_ipt + vy_ipt * vy_ipt + vz_ipt * vz_ipt); - dge = dev_conserved[(n_fields - 1) * n_cells + id]; - p_ipt = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); - #else - p_ipt = (dev_conserved[4 * n_cells + id] - 0.5 * d_ipt * (vx_ipt * vx_ipt + vy_ipt * vy_ipt + vz_ipt * vz_ipt)) * - (gamma - 1.0); - #endif // PRESSURE_DE - p_ipt = fmax(p_ipt, (Real)TINY_NUMBER); - #ifdef DE - ge_ipt = dge / d_ipt; - #endif // DE - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - scalar_ipt[i] = dev_conserved[(5 + i) * n_cells + id] / d_ipt; - } - #endif // SCALAR - #ifdef FLATTENING - // cell i-3 - if (dir == 0) id = xid - 3 + yid * nx + zid * nx * ny; - if (dir == 1) id = xid + (yid - 3) * nx + zid * nx * ny; - if (dir == 2) id = xid + yid * nx + (zid - 3) * nx * ny; - p_imth = - (dev_conserved[4 * n_cells + id] - 0.5 * - (dev_conserved[o1 * n_cells + id] * dev_conserved[o1 * n_cells + id] + - dev_conserved[o2 * n_cells + id] * dev_conserved[o2 * n_cells + id] + - dev_conserved[o3 * n_cells + id] * dev_conserved[o3 * n_cells + id]) / - dev_conserved[id]) * - (gamma - 1.0); - p_imth = fmax(p_imth, (Real)TINY_NUMBER); - // cell i+3 - if (dir == 0) id = xid + 3 + yid * nx + zid * nx * ny; - if (dir == 1) id = xid + (yid + 3) * nx + zid * nx * ny; - if (dir == 2) id = xid + yid * nx + (zid + 3) * nx * ny; - p_ipth = - (dev_conserved[4 * n_cells + id] - 0.5 * - (dev_conserved[o1 * n_cells + id] * dev_conserved[o1 * n_cells + id] + - dev_conserved[o2 * n_cells + id] * dev_conserved[o2 * n_cells + id] + - dev_conserved[o3 * n_cells + id] * dev_conserved[o3 * n_cells + id]) / - dev_conserved[id]) * - (gamma - 1.0); - p_ipth = fmax(p_imth, (Real)TINY_NUMBER); - #endif // FLATTENING - - // use ppm routines to set cell boundary values (see Fryxell Sec. 3.1.1) - - // Calculate the monotonized slopes for cells imo, i, ipo (density) - del_q_imo = Calculate_Slope(d_imt, d_imo, d_i); - del_q_i = Calculate_Slope(d_imo, d_i, d_ipo); - del_q_ipo = Calculate_Slope(d_i, d_ipo, d_ipt); - - // Calculate the interface values for density - Interface_Values_PPM(d_imo, d_i, d_ipo, del_q_imo, del_q_i, del_q_ipo, &d_L, &d_R); - - // Calculate the monotonized slopes for cells imo, i, ipo (x-velocity) - del_q_imo = Calculate_Slope(vx_imt, vx_imo, vx_i); - del_q_i = Calculate_Slope(vx_imo, vx_i, vx_ipo); - del_q_ipo = Calculate_Slope(vx_i, vx_ipo, vx_ipt); - - // Calculate the interface values for x-velocity - Interface_Values_PPM(vx_imo, vx_i, vx_ipo, del_q_imo, del_q_i, del_q_ipo, &vx_L, &vx_R); - - // Calculate the monotonized slopes for cells imo, i, ipo (y-velocity) - del_q_imo = Calculate_Slope(vy_imt, vy_imo, vy_i); - del_q_i = Calculate_Slope(vy_imo, vy_i, vy_ipo); - del_q_ipo = Calculate_Slope(vy_i, vy_ipo, vy_ipt); - - // Calculate the interface values for y-velocity - Interface_Values_PPM(vy_imo, vy_i, vy_ipo, del_q_imo, del_q_i, del_q_ipo, &vy_L, &vy_R); - - // Calculate the monotonized slopes for cells imo, i, ipo (z-velocity) - del_q_imo = Calculate_Slope(vz_imt, vz_imo, vz_i); - del_q_i = Calculate_Slope(vz_imo, vz_i, vz_ipo); - del_q_ipo = Calculate_Slope(vz_i, vz_ipo, vz_ipt); - - // Calculate the interface values for z-velocity - Interface_Values_PPM(vz_imo, vz_i, vz_ipo, del_q_imo, del_q_i, del_q_ipo, &vz_L, &vz_R); - - // Calculate the monotonized slopes for cells imo, i, ipo (pressure) - del_q_imo = Calculate_Slope(p_imt, p_imo, p_i); - del_q_i = Calculate_Slope(p_imo, p_i, p_ipo); - del_q_ipo = Calculate_Slope(p_i, p_ipo, p_ipt); - - // Calculate the interface values for pressure - Interface_Values_PPM(p_imo, p_i, p_ipo, del_q_imo, del_q_i, del_q_ipo, &p_L, &p_R); - - #ifdef DE - // Calculate the monotonized slopes for cells imo, i, ipo (internal energy) - del_q_imo = Calculate_Slope(ge_imt, ge_imo, ge_i); - del_q_i = Calculate_Slope(ge_imo, ge_i, ge_ipo); - del_q_ipo = Calculate_Slope(ge_i, ge_ipo, ge_ipt); - - // Calculate the interface values for internal energy - Interface_Values_PPM(ge_imo, ge_i, ge_ipo, del_q_imo, del_q_i, del_q_ipo, &ge_L, &ge_R); - #endif // DE - - #ifdef SCALAR - // Calculate the monotonized slopes for cells imo, i, ipo (passive scalars) - for (int i = 0; i < NSCALARS; i++) { - del_q_imo = Calculate_Slope(scalar_imt[i], scalar_imo[i], scalar_i[i]); - del_q_i = Calculate_Slope(scalar_imo[i], scalar_i[i], scalar_ipo[i]); - del_q_ipo = Calculate_Slope(scalar_i[i], scalar_ipo[i], scalar_ipt[i]); - - // Calculate the interface values for the passive scalars - Interface_Values_PPM(scalar_imo[i], scalar_i[i], scalar_ipo[i], del_q_imo, del_q_i, del_q_ipo, &scalar_L[i], - &scalar_R[i]); - } - #endif // SCALAR - - #ifdef STEEPENING - Real d2_rho_imo, d2_rho_ipo, eta_i; - // check for contact discontinuities & steepen if necessary (see Fryxell - // Sec 3.1.2) if condition 4 (Fryxell Eqn 37) (Colella Eqn 1.16.5) is true, - // check further conditions, otherwise do nothing - if ((fabs(d_ipo - d_imo) / fmin(d_ipo, d_imo)) > 0.01) { - // calculate the second derivative of the density in the imo and ipo cells - d2_rho_imo = calc_d2_rho(d_imt, d_imo, d_i, dx); - d2_rho_ipo = calc_d2_rho(d_i, d_ipo, d_ipt, dx); - // if condition 1 (Fryxell Eqn 38) (Colella Eqn 1.16.5) is true, check - // further conditions, otherwise do nothing - if ((d2_rho_imo * d2_rho_ipo) < 0) { - // calculate condition 5, pressure vs density jumps (Fryxell Eqn 39) - // (Colella Eqn 3.2) if c5 is true, set value of eta for discontinuity - // steepening - if ((fabs(p_ipo - p_imo) / fmin(p_ipo, p_imo)) < 0.1 * gamma * (fabs(d_ipo - d_imo) / fmin(d_ipo, d_imo))) { - // calculate first eta value (Fryxell Eqn 36) (Colella Eqn 1.16.5) - eta_i = calc_eta(d2_rho_imo, d2_rho_ipo, dx, d_imo, d_ipo); - // calculate steepening coefficient (Fryxell Eqn 40) (Colella - // Eqn 1.16) - eta_i = fmax(0, fmin(20 * (eta_i - 0.05), 1)); - - // calculate new left and right interface variables using monotonized - // slopes - del_q_imo = Calculate_Slope(d_imt, d_imo, d_i); - del_q_ipo = Calculate_Slope(d_i, d_ipo, d_ipt); - - // replace left and right interface values of density (Colella - // Eqn 1.14, 1.15) - d_L = d_L * (1 - eta_i) + (d_imo + 0.5 * del_q_imo) * eta_i; - d_R = d_R * (1 - eta_i) + (d_ipo - 0.5 * del_q_ipo) * eta_i; - } - } - } - #endif // STEEPENING - - #ifdef FLATTENING - Real F_imo, F_i, F_ipo; - // flatten shock fronts that are too narrow (see Fryxell Sec 3.1.3) - // calculate the shock steepness parameters (Fryxell Eqn 43) - // calculate the dimensionless flattening coefficients (Fryxell Eqn 45) - F_imo = fmax(0, fmin(1, 10 * (((p_i - p_imt) / (p_ipo - p_imth)) - 0.75))); - F_i = fmax(0, fmin(1, 10 * (((p_ipo - p_imo) / (p_ipt - p_imt)) - 0.75))); - F_ipo = fmax(0, fmin(1, 10 * (((p_ipt - p_i) / (p_ipth - p_imo)) - 0.75))); - // ensure that we are encountering a shock (Fryxell Eqns 46 & 47) - if (fabs(p_i - p_imt) / fmin(p_i, p_imt) < 1. / 3.) { - F_imo = 0; - } - if (fabs(p_ipo - p_imo) / fmin(p_ipo, p_imo) < 1. / 3.) { - F_i = 0; - } - if (fabs(p_ipt - p_i) / fmin(p_ipt, p_i) < 1. / 3.) { - F_ipo = 0; - } - if (vx_i - vx_imt > 0) { - F_imo = 0; - } - if (vx_ipo - vx_imo > 0) { - F_i = 0; - } - if (vx_ipt - vx_i > 0) { - F_ipo = 0; - } - // set the flattening coefficient (Fryxell Eqn 48) - if (p_ipo - p_imo < 0) { - F_i = fmax(F_i, F_ipo); - } else { - F_i = fmax(F_i, F_imo); - } - // modify the interface values - d_L = F_i * d_i + (1 - F_i) * d_L; - vx_L = F_i * vx_i + (1 - F_i) * vx_L; - vy_L = F_i * vy_i + (1 - F_i) * vy_L; - vz_L = F_i * vz_i + (1 - F_i) * vz_L; - p_L = F_i * p_i + (1 - F_i) * p_L; - #ifdef DE - ge_L = F_i * ge_i + (1 - F_i) * ge_L; - #endif // DE - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - scalar_L[i] = F_i * scalar_i[i] + (1 - F_i) * scalar_L[i]; - } - #endif // SCALAR - d_R = F_i * d_i + (1 - F_i) * d_R; - vx_R = F_i * vx_i + (1 - F_i) * vx_R; - vy_R = F_i * vy_i + (1 - F_i) * vy_R; - vz_R = F_i * vz_i + (1 - F_i) * vz_R; - p_R = F_i * p_i + (1 - F_i) * p_R; - #ifdef DE - ge_R = F_i * ge_i + (1 - F_i) * ge_R; - #endif // DE - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - scalar_R[i] = F_i * scalar_i[i] + (1 - F_i) * scalar_R[i]; - } - #endif // SCALAR - #endif // FLATTENING - - #ifndef VL - // #ifdef CTU - // compute sound speed in cell i - cs = sqrt(gamma * p_i / d_i); - - // compute a first guess at the left and right states by taking the average - // under the characteristic on each side that has the largest speed - - // recompute slope across cell for each variable Fryxell Eqn 29 - del_d = d_R - d_L; - del_vx = vx_R - vx_L; - del_vy = vy_R - vy_L; - del_vz = vz_R - vz_L; - del_p = p_R - p_L; - #ifdef DE - del_ge = ge_R - ge_L; - #endif // DE - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - del_scalar[i] = scalar_R[i] - scalar_L[i]; - } - #endif // SCALAR - - d_6 = 6.0 * (d_i - 0.5 * (d_L + d_R)); // Fryxell Eqn 30 - vx_6 = 6.0 * (vx_i - 0.5 * (vx_L + vx_R)); // Fryxell Eqn 30 - vy_6 = 6.0 * (vy_i - 0.5 * (vy_L + vy_R)); // Fryxell Eqn 30 - vz_6 = 6.0 * (vz_i - 0.5 * (vz_L + vz_R)); // Fryxell Eqn 30 - p_6 = 6.0 * (p_i - 0.5 * (p_L + p_R)); // Fryxell Eqn 30 - #ifdef DE - ge_6 = 6.0 * (ge_i - 0.5 * (ge_L + ge_R)); // Fryxell Eqn 30 - #endif // DE - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - scalar_6[i] = 6.0 * (scalar_i[i] - 0.5 * (scalar_L[i] + scalar_R[i])); // Fryxell Eqn 30 - } - #endif // SCALAR - - // set speed of characteristics (v-c, v, v+c) using average values of v and - // c - lambda_m = vx_i - cs; - lambda_0 = vx_i; - lambda_p = vx_i + cs; - - // calculate betas (for left state guesses) - beta_m = fmax((lambda_m * dt / dx), 0.0); // Fryxell Eqn 59 - beta_0 = fmax((lambda_0 * dt / dx), 0.0); // Fryxell Eqn 59 - beta_p = fmax((lambda_p * dt / dx), 0.0); // Fryxell Eqn 59 - - // calculate alphas (for right state guesses) - alpha_m = fmax((-lambda_m * dt / dx), 0.0); // Fryxell Eqn 61 - alpha_0 = fmax((-lambda_0 * dt / dx), 0.0); // Fryxell Eqn 61 - alpha_p = fmax((-lambda_p * dt / dx), 0.0); // Fryxell Eqn 61 - - // average values under characteristics for left interface (Fryxell Eqn 60) - dL_m = d_L + 0.5 * alpha_m * (del_d + d_6 * (1 - (2. / 3.) * alpha_m)); - vxL_m = vx_L + 0.5 * alpha_m * (del_vx + vx_6 * (1 - (2. / 3.) * alpha_m)); - pL_m = p_L + 0.5 * alpha_m * (del_p + p_6 * (1 - (2. / 3.) * alpha_m)); - dL_0 = d_L + 0.5 * alpha_0 * (del_d + d_6 * (1 - (2. / 3.) * alpha_0)); - vyL_0 = vy_L + 0.5 * alpha_0 * (del_vy + vy_6 * (1 - (2. / 3.) * alpha_0)); - vzL_0 = vz_L + 0.5 * alpha_0 * (del_vz + vz_6 * (1 - (2. / 3.) * alpha_0)); - #ifdef DE - geL_0 = ge_L + 0.5 * alpha_0 * (del_ge + ge_6 * (1 - (2. / 3.) * alpha_0)); - #endif // DE - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - scalarL_0[i] = scalar_L[i] + 0.5 * alpha_0 * (del_scalar[i] + scalar_6[i] * (1 - (2. / 3.) * alpha_0)); - } - #endif // SCALAR - pL_0 = p_L + 0.5 * alpha_0 * (del_p + p_6 * (1 - (2. / 3.) * alpha_0)); - vxL_p = vx_L + 0.5 * alpha_p * (del_vx + vx_6 * (1 - (2. / 3.) * alpha_p)); - pL_p = p_L + 0.5 * alpha_p * (del_p + p_6 * (1 - (2. / 3.) * alpha_p)); - - // average values under characteristics for right interface (Fryxell Eqn 58) - vxR_m = vx_R - 0.5 * beta_m * (del_vx - vx_6 * (1 - (2. / 3.) * beta_m)); - pR_m = p_R - 0.5 * beta_m * (del_p - p_6 * (1 - (2. / 3.) * beta_m)); - dR_0 = d_R - 0.5 * beta_0 * (del_d - d_6 * (1 - (2. / 3.) * beta_0)); - vyR_0 = vy_R - 0.5 * beta_0 * (del_vy - vy_6 * (1 - (2. / 3.) * beta_0)); - vzR_0 = vz_R - 0.5 * beta_0 * (del_vz - vz_6 * (1 - (2. / 3.) * beta_0)); - #ifdef DE - geR_0 = ge_R - 0.5 * beta_0 * (del_ge - ge_6 * (1 - (2. / 3.) * beta_0)); - #endif // DE - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - scalarR_0[i] = scalar_R[i] - 0.5 * beta_0 * (del_scalar[i] - scalar_6[i] * (1 - (2. / 3.) * beta_0)); - } - #endif // SCALAR - pR_0 = p_R - 0.5 * beta_0 * (del_p - p_6 * (1 - (2. / 3.) * beta_0)); - dR_p = d_R - 0.5 * beta_p * (del_d - d_6 * (1 - (2. / 3.) * beta_p)); - vxR_p = vx_R - 0.5 * beta_p * (del_vx - vx_6 * (1 - (2. / 3.) * beta_p)); - pR_p = p_R - 0.5 * beta_p * (del_p - p_6 * (1 - (2. / 3.) * beta_p)); - - // as a first guess, use characteristics with the largest speeds - // for transverse velocities, use the 0 characteristic - // left - d_L = dL_m; - vx_L = vxL_m; - vy_L = vyL_0; - vz_L = vzL_0; - p_L = pL_m; - #ifdef DE - ge_L = geL_0; - #endif // DE - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - scalar_L[i] = scalarL_0[i]; - } - #endif // SCALAR - // right - d_R = dR_p; - vx_R = vxR_p; - vy_R = vyR_0; - vz_R = vzR_0; - p_R = pR_p; - #ifdef DE - ge_R = geR_0; - #endif // DE - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - scalar_R[i] = scalarR_0[i]; - } - #endif // SCALAR - - // correct these initial guesses by taking into account the number of - // characteristics on each side of the interface - - // calculate the 'guess' sound speeds - cl = sqrt(gamma * p_L / d_L); - cr = sqrt(gamma * p_R / d_L); - - // calculate the chi values (Fryxell Eqns 62 & 63) - chi_L_m = 1. / (2 * d_L * cl) * (vx_L - vxL_m - (p_L - pL_m) / (d_L * cl)); - chi_L_p = -1. / (2 * d_L * cl) * (vx_L - vxL_p + (p_L - pL_p) / (d_L * cl)); - chi_L_0 = (p_L - pL_0) / (d_L * d_L * cl * cl) + 1. / d_L - 1. / dL_0; - chi_R_m = 1. / (2 * d_R * cr) * (vx_R - vxR_m - (p_R - pR_m) / (d_R * cr)); - chi_R_p = -1. / (2 * d_R * cr) * (vx_R - vxR_p + (p_R - pR_p) / (d_R * cr)); - chi_R_0 = (p_R - pR_0) / (d_R * d_R * cr * cr) + 1. / d_R - 1. / dR_0; - - // set chi to 0 if characteristic velocity has the wrong sign (Fryxell Eqn - // 64) - if (lambda_m >= 0) { - chi_L_m = 0; - } - if (lambda_0 >= 0) { - chi_L_0 = 0; - } - if (lambda_p >= 0) { - chi_L_p = 0; - } - if (lambda_m <= 0) { - chi_R_m = 0; - } - if (lambda_0 <= 0) { - chi_R_0 = 0; - } - if (lambda_p <= 0) { - chi_R_p = 0; - } - - // use the chi values to correct the initial guesses and calculate final - // input states - p_L = p_L + (d_L * d_L * cl * cl) * (chi_L_p + chi_L_m); - vx_L = vx_L + d_L * cl * (chi_L_p - chi_L_m); - d_L = pow(((1.0 / d_L) - (chi_L_m + chi_L_0 + chi_L_p)), -1); - p_R = p_L + (d_R * d_R * cr * cr) * (chi_R_p + chi_R_m); - vx_R = vx_R + d_R * cr * (chi_R_p - chi_R_m); - d_R = pow(((1.0 / d_R) - (chi_R_m + chi_R_0 + chi_R_p)), -1); - #endif // CTU - - // Apply mimimum constraints - d_L = fmax(d_L, (Real)TINY_NUMBER); - d_R = fmax(d_R, (Real)TINY_NUMBER); - p_L = fmax(p_L, (Real)TINY_NUMBER); - p_R = fmax(p_R, (Real)TINY_NUMBER); - - // Convert the left and right states in the primitive to the conserved - // variables send final values back from kernel bounds_R refers to the right - // side of the i-1/2 interface - if (dir == 0) id = xid - 1 + yid * nx + zid * nx * ny; - if (dir == 1) id = xid + (yid - 1) * nx + zid * nx * ny; - if (dir == 2) id = xid + yid * nx + (zid - 1) * nx * ny; - dev_bounds_R[id] = d_L; - dev_bounds_R[o1 * n_cells + id] = d_L * vx_L; - dev_bounds_R[o2 * n_cells + id] = d_L * vy_L; - dev_bounds_R[o3 * n_cells + id] = d_L * vz_L; - dev_bounds_R[4 * n_cells + id] = p_L / (gamma - 1.0) + 0.5 * d_L * (vx_L * vx_L + vy_L * vy_L + vz_L * vz_L); - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - dev_bounds_R[(5 + i) * n_cells + id] = d_L * scalar_L[i]; - } - #endif // SCALAR - #ifdef DE - dev_bounds_R[(n_fields - 1) * n_cells + id] = d_L * ge_L; - #endif // DE - // bounds_L refers to the left side of the i+1/2 interface - id = xid + yid * nx + zid * nx * ny; - dev_bounds_L[id] = d_R; - dev_bounds_L[o1 * n_cells + id] = d_R * vx_R; - dev_bounds_L[o2 * n_cells + id] = d_R * vy_R; - dev_bounds_L[o3 * n_cells + id] = d_R * vz_R; - dev_bounds_L[4 * n_cells + id] = p_R / (gamma - 1.0) + 0.5 * d_R * (vx_R * vx_R + vy_R * vy_R + vz_R * vz_R); - #ifdef SCALAR - for (int i = 0; i < NSCALARS; i++) { - dev_bounds_L[(5 + i) * n_cells + id] = d_R * scalar_R[i]; - } - #endif // SCALAR - #ifdef DE - dev_bounds_L[(n_fields - 1) * n_cells + id] = d_R * ge_R; - #endif // DE - } -} - -/*! \fn __device__ Real Calculate_Slope(Real q_imo, Real q_i, Real q_ipo) - * \brief Calculates the limited slope across a cell.*/ -__device__ Real Calculate_Slope(Real q_imo, Real q_i, Real q_ipo) -{ - Real del_q_L, del_q_R, del_q_C, del_q_G; - Real lim_slope_a, lim_slope_b, del_q_m; - - // Compute the left, right, and centered differences of the primitive - // variables Note that here L and R refer to locations relative to the cell - // center - - // left - del_q_L = q_i - q_imo; - // right - del_q_R = q_ipo - q_i; - // centered - del_q_C = 0.5 * (q_ipo - q_imo); - // Van Leer - if (del_q_L * del_q_R > 0.0) { - del_q_G = 2.0 * del_q_L * del_q_R / (del_q_L + del_q_R); - } else { - del_q_G = 0.0; - } - - // Monotonize the differences - lim_slope_a = fmin(fabs(del_q_L), fabs(del_q_R)); - lim_slope_b = fmin(fabs(del_q_C), fabs(del_q_G)); - - // Minmod limiter - // del_q_m = sgn_CUDA(del_q_C)*fmin(2.0*lim_slope_a, fabs(del_q_C)); - - // Van Leer limiter - del_q_m = sgn_CUDA(del_q_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); - - return del_q_m; -} - -/*! \fn __device__ void Interface_Values_PPM(Real q_imo, Real q_i, Real q_ipo, - Real del_q_imo, Real del_q_i, Real del_q_ipo, Real *q_L, Real *q_R) - * \brief Calculates the left and right interface values for a cell using - parabolic reconstruction in the primitive variables with limited slopes - provided. Applies further monotonicity constraints.*/ -__device__ void Interface_Values_PPM(Real q_imo, Real q_i, Real q_ipo, Real del_q_imo, Real del_q_i, Real del_q_ipo, - Real *q_L, Real *q_R) -{ - // Calculate the left and right interface values using the limited slopes - *q_L = 0.5 * (q_i + q_imo) - (1.0 / 6.0) * (del_q_i - del_q_imo); - *q_R = 0.5 * (q_ipo + q_i) - (1.0 / 6.0) * (del_q_ipo - del_q_i); - - // Apply further monotonicity constraints to ensure interface values lie - // between neighboring cell-centered values - - // local maximum or minimum criterion (Fryxell Eqn 52, Fig 11) - if ((*q_R - q_i) * (q_i - *q_L) <= 0) *q_L = *q_R = q_i; - - // steep gradient criterion (Fryxell Eqn 53, Fig 12) - if (6.0 * (*q_R - *q_L) * (q_i - 0.5 * (*q_L + *q_R)) > (*q_R - *q_L) * (*q_R - *q_L)) { - *q_L = 3.0 * q_i - 2.0 * (*q_R); - } - if (6.0 * (*q_R - *q_L) * (q_i - 0.5 * (*q_L + *q_R)) < -(*q_R - *q_L) * (*q_R - *q_L)) { - *q_R = 3.0 * q_i - 2.0 * (*q_L); - } - - *q_L = fmax(fmin(q_i, q_imo), *q_L); - *q_L = fmin(fmax(q_i, q_imo), *q_L); - *q_R = fmax(fmin(q_i, q_ipo), *q_R); - *q_R = fmin(fmax(q_i, q_ipo), *q_R); -} - -/*! \fn calc_d2_rho - * \brief Returns the second derivative of rho across zone i. (Fryxell Eqn 35) - */ -__device__ Real calc_d2_rho(Real rho_imo, Real rho_i, Real rho_ipo, Real dx) -{ - return (1. / (6 * dx * dx)) * (rho_ipo - 2 * rho_i + rho_imo); -} - -/*! \fn calc_eta - * \brief Returns a dimensionless quantity relating the 1st and 3rd derivatives - See Fryxell Eqn 36. */ -__device__ Real calc_eta(Real d2rho_imo, Real d2rho_ipo, Real dx, Real rho_imo, Real rho_ipo) -{ - Real A, B; - - A = (d2rho_ipo - d2rho_imo) * dx * dx; - B = 1.0 / (rho_ipo - rho_imo); - - return -A * B; -} - -#endif // PPMP diff --git a/src/reconstruction/ppmp_cuda.h b/src/reconstruction/ppmp_cuda.h deleted file mode 100644 index 064d328fa..000000000 --- a/src/reconstruction/ppmp_cuda.h +++ /dev/null @@ -1,40 +0,0 @@ -/*! \file ppmp_cuda.h - * \brief Declarations of the cuda ppmp kernels. */ - -#ifndef PPMP_CUDA_H -#define PPMP_CUDA_H - -#include "../global/global.h" - -/*! \fn __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real - *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real - gamma, int dir, int n_fields) - * \brief When passed a stencil of conserved variables, returns the left and - right boundary values for the interface calculated using ppm with limiting in - the primitive variables. */ -__global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, - int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields); - -/*! \fn __device__ Real Calculate_Slope(Real q_imo, Real q_i, Real q_ipo) - * \brief Calculates the limited slope across a cell.*/ -__device__ Real Calculate_Slope(Real q_imo, Real q_i, Real q_ipo); - -/*! \fn __device__ void Interface_Values_PPM(Real q_imo, Real q_i, Real q_ipo, - Real *q_L, Real *q_R) - * \brief Calculates the left and right interface values for a cell using - parabolic reconstruction in the primitive variables with limited slopes - provided. Applies further monotonicity constraints.*/ -__device__ void Interface_Values_PPM(Real q_imo, Real q_i, Real q_ipo, Real del_q_imo, Real del_q_i, Real del_q_ipo, - Real *q_L, Real *q_R); - -/*! \fn calc_d2_rho - * \brief Returns the second derivative of rho across zone i. (Fryxell Eqn 35) - */ -__device__ Real calc_d2_rho(Real rho_imo, Real rho_i, Real rho_ipo, Real dx); - -/*! \fn calc_eta - * \brief Returns a dimensionless quantity relating the 1st and 3rd derivatives - See Fryxell Eqn 36. */ -__device__ Real calc_eta(Real d2rho_imo, Real d2rho_ipo, Real dx, Real rho_imo, Real rho_ipo); - -#endif // PPMP_CUDA_H diff --git a/src/reconstruction/reconstruction_internals.h b/src/reconstruction/reconstruction_internals.h index 56c8a03c4..9893cc9f0 100644 --- a/src/reconstruction/reconstruction_internals.h +++ b/src/reconstruction/reconstruction_internals.h @@ -837,6 +837,107 @@ void __device__ __host__ __inline__ PPM_Single_Variable(Real const &cell_im2, Re } // ===================================================================================================================== +// ===================================================================================================================== +/*! + * \brief Compute the primitive PPM interfaces. Calls PPM_Single_Variable on each field + * + * \param[in] cell_im2 The state of the cell at i-2 + * \param[in] cell_im1 The state of the cell at i-1 + * \param[in] cell_i The state of the cell at i + * \param[in] cell_ip1 The state of the cell at i+1 + * \param[in] cell_ip2 The state of the cell at i+2 + * \return auto The left interface at i+1/2 and the right interface at i-1/2 in that order + */ +auto __device__ __host__ __inline__ PPM_Interfaces(hydro_utilities::Primitive const &cell_im2, + hydro_utilities::Primitive const &cell_im1, + hydro_utilities::Primitive const &cell_i, + hydro_utilities::Primitive const &cell_ip1, + hydro_utilities::Primitive const &cell_ip2) +{ + hydro_utilities::Primitive interface_R_imh, interface_L_iph; + + reconstruction::PPM_Single_Variable(cell_im2.density, cell_im1.density, cell_i.density, cell_ip1.density, + cell_ip2.density, interface_L_iph.density, interface_R_imh.density); + reconstruction::PPM_Single_Variable(cell_im2.velocity.x, cell_im1.velocity.x, cell_i.velocity.x, cell_ip1.velocity.x, + cell_ip2.velocity.x, interface_L_iph.velocity.x, interface_R_imh.velocity.x); + reconstruction::PPM_Single_Variable(cell_im2.velocity.y, cell_im1.velocity.y, cell_i.velocity.y, cell_ip1.velocity.y, + cell_ip2.velocity.y, interface_L_iph.velocity.y, interface_R_imh.velocity.y); + reconstruction::PPM_Single_Variable(cell_im2.velocity.z, cell_im1.velocity.z, cell_i.velocity.z, cell_ip1.velocity.z, + cell_ip2.velocity.z, interface_L_iph.velocity.z, interface_R_imh.velocity.z); + reconstruction::PPM_Single_Variable(cell_im2.pressure, cell_im1.pressure, cell_i.pressure, cell_ip1.pressure, + cell_ip2.pressure, interface_L_iph.pressure, interface_R_imh.pressure); + +#ifdef MHD + reconstruction::PPM_Single_Variable(cell_im2.magnetic.y, cell_im1.magnetic.y, cell_i.magnetic.y, cell_ip1.magnetic.y, + cell_ip2.magnetic.y, interface_L_iph.magnetic.y, interface_R_imh.magnetic.y); + reconstruction::PPM_Single_Variable(cell_im2.magnetic.z, cell_im1.magnetic.z, cell_i.magnetic.z, cell_ip1.magnetic.z, + cell_ip2.magnetic.z, interface_L_iph.magnetic.z, interface_R_imh.magnetic.z); +#endif // MHD + +#ifdef DE + reconstruction::PPM_Single_Variable(cell_im2.gas_energy_specific, cell_im1.gas_energy_specific, + cell_i.gas_energy_specific, cell_ip1.gas_energy_specific, + cell_ip2.gas_energy_specific, interface_L_iph.gas_energy_specific, + interface_R_imh.gas_energy_specific); +#endif // DE +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + reconstruction::PPM_Single_Variable(cell_im2.scalar_specific[i], cell_im1.scalar_specific[i], + cell_i.scalar_specific[i], cell_ip1.scalar_specific[i], + cell_ip2.scalar_specific[i], interface_L_iph.scalar_specific[i], + interface_R_imh.scalar_specific[i]); + } +#endif // DE + + struct LocalReturnStruct { + hydro_utilities::Primitive left, right; + }; + return LocalReturnStruct{interface_L_iph, interface_R_imh}; +} +// ===================================================================================================================== + +// ===================================================================================================================== +/*! + * \brief Compute the characteristic PPM interfaces. Calls PPM_Single_Variable on each field + * + * \param[in] cell_im2 The state of the cell at i-2 + * \param[in] cell_im1 The state of the cell at i-1 + * \param[in] cell_i The state of the cell at i + * \param[in] cell_ip1 The state of the cell at i+1 + * \param[in] cell_ip2 The state of the cell at i+2 + * \return auto The left interface at i+1/2 and the right interface at i-1/2 in that order + */ +auto __device__ __host__ __inline__ PPM_Interfaces(Characteristic const &cell_im2, Characteristic const &cell_im1, + Characteristic const &cell_i, Characteristic const &cell_ip1, + Characteristic const &cell_ip2) +{ + Characteristic interface_R_imh, interface_L_iph; + + reconstruction::PPM_Single_Variable(cell_im2.a0, cell_im1.a0, cell_i.a0, cell_ip1.a0, cell_ip2.a0, interface_L_iph.a0, + interface_R_imh.a0); + reconstruction::PPM_Single_Variable(cell_im2.a1, cell_im1.a1, cell_i.a1, cell_ip1.a1, cell_ip2.a1, interface_L_iph.a1, + interface_R_imh.a1); + reconstruction::PPM_Single_Variable(cell_im2.a2, cell_im1.a2, cell_i.a2, cell_ip1.a2, cell_ip2.a2, interface_L_iph.a2, + interface_R_imh.a2); + reconstruction::PPM_Single_Variable(cell_im2.a3, cell_im1.a3, cell_i.a3, cell_ip1.a3, cell_ip2.a3, interface_L_iph.a3, + interface_R_imh.a3); + reconstruction::PPM_Single_Variable(cell_im2.a4, cell_im1.a4, cell_i.a4, cell_ip1.a4, cell_ip2.a4, interface_L_iph.a4, + interface_R_imh.a4); + +#ifdef MHD + reconstruction::PPM_Single_Variable(cell_im2.a5, cell_im1.a5, cell_i.a5, cell_ip1.a5, cell_ip2.a5, interface_L_iph.a5, + interface_R_imh.a5); + reconstruction::PPM_Single_Variable(cell_im2.a6, cell_im1.a6, cell_i.a6, cell_ip1.a6, cell_ip2.a6, interface_L_iph.a6, + interface_R_imh.a6); +#endif // MHD + + struct LocalReturnStruct { + Characteristic left, right; + }; + return LocalReturnStruct{interface_L_iph, interface_R_imh}; +} +// ===================================================================================================================== + // ===================================================================================================================== /*! * \brief Write the interface data to the appropriate arrays diff --git a/src/utils/error_handling.cpp b/src/utils/error_handling.cpp index 004254550..3bb88254e 100644 --- a/src/utils/error_handling.cpp +++ b/src/utils/error_handling.cpp @@ -95,11 +95,6 @@ void Check_Configuration(Parameters const& P) #error "MHD only supports the HLLD Riemann Solver" #endif //! HLLD or EXACT or ROE or HLL or HLLC - // May only use certain reconstructions - #if ((defined(PCM) + defined(PLMP) + defined(PLMC) + defined(PPMC)) != 1) || defined(PPMP) - #error "MHD does not support PPMP reconstruction." - #endif // Reconstruction check - // must have HDF5 #if defined(OUTPUT) and (not defined(HDF5)) #error "MHD only supports HDF5 output" From a028b12a1c7c9f3b5d985549b3f7e5ce3d7df562 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 29 Apr 2024 16:20:02 -0400 Subject: [PATCH 28/29] Fix bad rebase --- src/reconstruction/reconstruction_internals.h | 25 ++++--- .../reconstruction_internals_tests.cu | 73 ------------------- 2 files changed, 15 insertions(+), 83 deletions(-) diff --git a/src/reconstruction/reconstruction_internals.h b/src/reconstruction/reconstruction_internals.h index 9893cc9f0..54d0dc291 100644 --- a/src/reconstruction/reconstruction_internals.h +++ b/src/reconstruction/reconstruction_internals.h @@ -858,20 +858,25 @@ auto __device__ __host__ __inline__ PPM_Interfaces(hydro_utilities::Primitive co reconstruction::PPM_Single_Variable(cell_im2.density, cell_im1.density, cell_i.density, cell_ip1.density, cell_ip2.density, interface_L_iph.density, interface_R_imh.density); - reconstruction::PPM_Single_Variable(cell_im2.velocity.x, cell_im1.velocity.x, cell_i.velocity.x, cell_ip1.velocity.x, - cell_ip2.velocity.x, interface_L_iph.velocity.x, interface_R_imh.velocity.x); - reconstruction::PPM_Single_Variable(cell_im2.velocity.y, cell_im1.velocity.y, cell_i.velocity.y, cell_ip1.velocity.y, - cell_ip2.velocity.y, interface_L_iph.velocity.y, interface_R_imh.velocity.y); - reconstruction::PPM_Single_Variable(cell_im2.velocity.z, cell_im1.velocity.z, cell_i.velocity.z, cell_ip1.velocity.z, - cell_ip2.velocity.z, interface_L_iph.velocity.z, interface_R_imh.velocity.z); + reconstruction::PPM_Single_Variable(cell_im2.velocity.x(), cell_im1.velocity.x(), cell_i.velocity.x(), + cell_ip1.velocity.x(), cell_ip2.velocity.x(), interface_L_iph.velocity.x(), + interface_R_imh.velocity.x()); + reconstruction::PPM_Single_Variable(cell_im2.velocity.y(), cell_im1.velocity.y(), cell_i.velocity.y(), + cell_ip1.velocity.y(), cell_ip2.velocity.y(), interface_L_iph.velocity.y(), + interface_R_imh.velocity.y()); + reconstruction::PPM_Single_Variable(cell_im2.velocity.z(), cell_im1.velocity.z(), cell_i.velocity.z(), + cell_ip1.velocity.z(), cell_ip2.velocity.z(), interface_L_iph.velocity.z(), + interface_R_imh.velocity.z()); reconstruction::PPM_Single_Variable(cell_im2.pressure, cell_im1.pressure, cell_i.pressure, cell_ip1.pressure, cell_ip2.pressure, interface_L_iph.pressure, interface_R_imh.pressure); #ifdef MHD - reconstruction::PPM_Single_Variable(cell_im2.magnetic.y, cell_im1.magnetic.y, cell_i.magnetic.y, cell_ip1.magnetic.y, - cell_ip2.magnetic.y, interface_L_iph.magnetic.y, interface_R_imh.magnetic.y); - reconstruction::PPM_Single_Variable(cell_im2.magnetic.z, cell_im1.magnetic.z, cell_i.magnetic.z, cell_ip1.magnetic.z, - cell_ip2.magnetic.z, interface_L_iph.magnetic.z, interface_R_imh.magnetic.z); + reconstruction::PPM_Single_Variable(cell_im2.magnetic.y(), cell_im1.magnetic.y(), cell_i.magnetic.y(), + cell_ip1.magnetic.y(), cell_ip2.magnetic.y(), interface_L_iph.magnetic.y(), + interface_R_imh.magnetic.y()); + reconstruction::PPM_Single_Variable(cell_im2.magnetic.z(), cell_im1.magnetic.z(), cell_i.magnetic.z(), + cell_ip1.magnetic.z(), cell_ip2.magnetic.z(), interface_L_iph.magnetic.z(), + interface_R_imh.magnetic.z()); #endif // MHD #ifdef DE diff --git a/src/reconstruction/reconstruction_internals_tests.cu b/src/reconstruction/reconstruction_internals_tests.cu index ec3c579d2..6cdc6759f 100644 --- a/src/reconstruction/reconstruction_internals_tests.cu +++ b/src/reconstruction/reconstruction_internals_tests.cu @@ -447,37 +447,6 @@ TEST(tALLReconstructionVanLeerSlopeCharacteristic, CorrectInputExpectCorrectOutp testing_utilities::Check_Results(fiducial_data.a4, test_data.a4, "a4"); } -TEST(tHYDROReconstructionMonotizeParabolicInterface, CorrectInputExpectCorrectOutput) -{ - // Input Data - - hydro_utilities::Primitive const cell_i{1.4708046701, {9.5021020181, 3.7123503442, 4.6476103466}, 3.7096802847}; - hydro_utilities::Primitive const cell_im1{3.9547588941, {3.1552319951, 3.0209247624, 9.5841013261}, 2.2945188332}; - hydro_utilities::Primitive const cell_ip1{5.1973323534, {6.9132613767, 1.8397298636, 5.341960387}, 9.093498542}; - hydro_utilities::Primitive interface_L_iph{6.7787324804, {9.5389820358, 9.8522754567, 7.8305142852}, 2.450533435}; - hydro_utilities::Primitive interface_R_imh{4.8015193892, {5.9124263972, 8.7513040382, 8.3659359773}, 1.339777121}; - - // Get test data - reconstruction::Monotonize_Parabolic_Interface(cell_i, cell_im1, cell_ip1, interface_L_iph, interface_R_imh); - - // Check results - hydro_utilities::Primitive const fiducial_interface_L{ - 1.4708046700999999, {9.5021020181000004, 3.7123503441999999, 4.6476103465999996}, 3.7096802847000001}; - hydro_utilities::Primitive const fiducial_interface_R{ - 1.4708046700999999, {9.428341982700001, 3.7123503441999999, 4.6476103465999996}, 3.7096802847000001}; - testing_utilities::Check_Results(fiducial_interface_L.density, interface_L_iph.density, "density"); - testing_utilities::Check_Results(fiducial_interface_L.velocity.x(), interface_L_iph.velocity.x(), "velocity.x()"); - testing_utilities::Check_Results(fiducial_interface_L.velocity.y(), interface_L_iph.velocity.y(), "velocity.y()"); - testing_utilities::Check_Results(fiducial_interface_L.velocity.z(), interface_L_iph.velocity.z(), "velocity.z()"); - testing_utilities::Check_Results(fiducial_interface_L.pressure, interface_L_iph.pressure, "pressure"); - - testing_utilities::Check_Results(fiducial_interface_R.density, interface_R_imh.density, "density"); - testing_utilities::Check_Results(fiducial_interface_R.velocity.x(), interface_R_imh.velocity.x(), "velocity.x()"); - testing_utilities::Check_Results(fiducial_interface_R.velocity.y(), interface_R_imh.velocity.y(), "velocity.y()"); - testing_utilities::Check_Results(fiducial_interface_R.velocity.z(), interface_R_imh.velocity.z(), "velocity.z()"); - testing_utilities::Check_Results(fiducial_interface_R.pressure, interface_R_imh.pressure, "pressure"); -} - TEST(tALLReconstructionCalcInterfaceLinear, CorrectInputExpectCorrectOutput) { // Setup input data @@ -513,48 +482,6 @@ TEST(tALLReconstructionCalcInterfaceLinear, CorrectInputExpectCorrectOutput) #endif // MHD } -TEST(tALLReconstructionCalcInterfaceParabolic, CorrectInputExpectCorrectOutput) -{ - // Setup input data -#ifdef MHD - hydro_utilities::Primitive cell_i{1, {2, 3, 4}, 5, {6, 7, 8}}; - hydro_utilities::Primitive cell_im1{6, {7, 8, 9}, 10, {11, 12, 13}}; - hydro_utilities::Primitive slopes_i{14, {15, 16, 17}, 18, {19, 20, 21}}; - hydro_utilities::Primitive slopes_im1{22, {23, 24, 25}, 26, {27, 28, 29}}; -#else // MHD - hydro_utilities::Primitive cell_i{1, {2, 3, 4}, 5}; - hydro_utilities::Primitive cell_im1{6, {7, 8, 9}, 10}; - hydro_utilities::Primitive slopes_i{14, {15, 16, 17}, 18}; - hydro_utilities::Primitive slopes_im1{22, {23, 24, 25}, 26}; -#endif // MHD - - // Get test data - auto test_data = reconstruction::Calc_Interface_Parabolic(cell_i, cell_im1, slopes_i, slopes_im1); - - // Check results -#ifdef MHD - hydro_utilities::Primitive const fiducial_data{4.833333333333333, - {5.833333333333333, 6.833333333333333, 7.833333333333333}, - 8.8333333333333339, - {0.0, 10.833333333333334, 11.833333333333334}}; - testing_utilities::Check_Results(fiducial_data.density, test_data.density, "density"); - testing_utilities::Check_Results(fiducial_data.velocity.x(), test_data.velocity.x(), "velocity.x()"); - testing_utilities::Check_Results(fiducial_data.velocity.y(), test_data.velocity.y(), "velocity.y()"); - testing_utilities::Check_Results(fiducial_data.velocity.z(), test_data.velocity.z(), "velocity.z()"); - testing_utilities::Check_Results(fiducial_data.pressure, test_data.pressure, "pressure"); - testing_utilities::Check_Results(fiducial_data.magnetic.y(), test_data.magnetic.y(), "magnetic.y()"); - testing_utilities::Check_Results(fiducial_data.magnetic.z(), test_data.magnetic.z(), "magnetic.z()"); -#else // MHD - hydro_utilities::Primitive const fiducial_data{ - 4.833333333333333, {5.833333333333333, 6.833333333333333, 7.833333333333333}, 8.8333333333333339}; - testing_utilities::Check_Results(fiducial_data.density, test_data.density, "density"); - testing_utilities::Check_Results(fiducial_data.velocity.x(), test_data.velocity.x(), "velocity.x()"); - testing_utilities::Check_Results(fiducial_data.velocity.y(), test_data.velocity.y(), "velocity.y()"); - testing_utilities::Check_Results(fiducial_data.velocity.z(), test_data.velocity.z(), "velocity.z()"); - testing_utilities::Check_Results(fiducial_data.pressure, test_data.pressure, "pressure"); -#endif // MHD -} - TEST(tALLReconstructionPPMSingleVariable, CorrectInputExpectCorrectOutput) { // Set up PRNG to use From f07c6b799effd442c5933845083afcdbb9a940a7 Mon Sep 17 00:00:00 2001 From: Bob Caddy Date: Mon, 29 Apr 2024 16:58:30 -0400 Subject: [PATCH 29/29] Restore fuction that got lost in rebase --- src/reconstruction/ppm_cuda.cu | 296 +++++++++++++++++++++++++++++++++ 1 file changed, 296 insertions(+) diff --git a/src/reconstruction/ppm_cuda.cu b/src/reconstruction/ppm_cuda.cu index 6c974233a..02abaf9bb 100644 --- a/src/reconstruction/ppm_cuda.cu +++ b/src/reconstruction/ppm_cuda.cu @@ -15,6 +15,302 @@ #include "../utils/hydro_utilities.h" #endif +void __device__ __host__ PPM_Characteristic_Evolution(hydro_utilities::Primitive const cell_i, Real const dt, + Real const dx, Real const gamma, + hydro_utilities::Primitive &interface_R_imh, + hydro_utilities::Primitive &interface_L_iph) +{ + // This is the beginning of the characteristic tracing + // Step 8 - Compute the coefficients for the monotonized parabolic + // interpolation function + // Stone Eqn 54 + hydro_utilities::Primitive interface_slope; + interface_slope.density = interface_L_iph.density - interface_R_imh.density; + interface_slope.velocity.x() = interface_L_iph.velocity.x() - interface_R_imh.velocity.x(); + interface_slope.velocity.y() = interface_L_iph.velocity.y() - interface_R_imh.velocity.y(); + interface_slope.velocity.z() = interface_L_iph.velocity.z() - interface_R_imh.velocity.z(); + interface_slope.pressure = interface_L_iph.pressure - interface_R_imh.pressure; + + Real const d_6 = 6.0 * (cell_i.density - 0.5 * (interface_R_imh.density + interface_L_iph.density)); + Real const vx_6 = 6.0 * (cell_i.velocity.x() - 0.5 * (interface_R_imh.velocity.x() + interface_L_iph.velocity.x())); + Real const vy_6 = 6.0 * (cell_i.velocity.y() - 0.5 * (interface_R_imh.velocity.y() + interface_L_iph.velocity.y())); + Real const vz_6 = 6.0 * (cell_i.velocity.z() - 0.5 * (interface_R_imh.velocity.z() + interface_L_iph.velocity.z())); + Real const p_6 = 6.0 * (cell_i.pressure - 0.5 * (interface_R_imh.pressure + interface_L_iph.pressure)); + +#ifdef DE + interface_slope.gas_energy_specific = interface_L_iph.gas_energy_specific - interface_R_imh.gas_energy_specific; + Real const ge_6 = 6.0 * (cell_i.gas_energy_specific - + 0.5 * (interface_R_imh.gas_energy_specific + interface_L_iph.gas_energy_specific)); +#endif // DE + +#ifdef SCALAR + Real scalar_6[NSCALARS]; + for (int i = 0; i < NSCALARS; i++) { + interface_slope.scalar_specific[i] = interface_L_iph.scalar_specific[i] - interface_R_imh.scalar_specific[i]; + scalar_6[i] = 6.0 * (cell_i.scalar_specific[i] - + 0.5 * (interface_R_imh.scalar_specific[i] + interface_L_iph.scalar_specific[i])); + } +#endif // SCALAR + + // Compute the eigenvalues of the linearized equations in the + // primitive variables using the cell-centered primitive variables + + // recalculate the adiabatic sound speed in cell i + Real const sound_speed = hydro_utilities::Calc_Sound_Speed(cell_i.pressure, cell_i.density, gamma); + + Real const lambda_m = cell_i.velocity.x() - sound_speed; + Real const lambda_0 = cell_i.velocity.x(); + Real const lambda_p = cell_i.velocity.x() + sound_speed; + + // Step 9 - Compute the left and right interface values using monotonized + // parabolic interpolation + // Stone Eqns 55 & 56 + + // largest eigenvalue + Real const lambda_max = fmax(lambda_p, (Real)0); + // smallest eigenvalue + Real const lambda_min = fmin(lambda_m, (Real)0); + + // left interface value, i+1/2 + Real const dtodx = dt / dx; + interface_L_iph.density = + interface_L_iph.density - + lambda_max * (0.5 * dtodx) * (interface_slope.density - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * d_6); + interface_L_iph.velocity.x() = + interface_L_iph.velocity.x() - + lambda_max * (0.5 * dtodx) * (interface_slope.velocity.x() - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * vx_6); + interface_L_iph.velocity.y() = + interface_L_iph.velocity.y() - + lambda_max * (0.5 * dtodx) * (interface_slope.velocity.y() - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * vy_6); + interface_L_iph.velocity.z() = + interface_L_iph.velocity.z() - + lambda_max * (0.5 * dtodx) * (interface_slope.velocity.z() - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * vz_6); + interface_L_iph.pressure = + interface_L_iph.pressure - + lambda_max * (0.5 * dtodx) * (interface_slope.pressure - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * p_6); + + // right interface value, i-1/2 + interface_R_imh.density = + interface_R_imh.density - + lambda_min * (0.5 * dtodx) * (interface_slope.density + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * d_6); + interface_R_imh.velocity.x() = + interface_R_imh.velocity.x() - + lambda_min * (0.5 * dtodx) * (interface_slope.velocity.x() + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * vx_6); + interface_R_imh.velocity.y() = + interface_R_imh.velocity.y() - + lambda_min * (0.5 * dtodx) * (interface_slope.velocity.y() + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * vy_6); + interface_R_imh.velocity.z() = + interface_R_imh.velocity.z() - + lambda_min * (0.5 * dtodx) * (interface_slope.velocity.z() + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * vz_6); + interface_R_imh.pressure = + interface_R_imh.pressure - + lambda_min * (0.5 * dtodx) * (interface_slope.pressure + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * p_6); + +#ifdef DE + interface_L_iph.gas_energy_specific = + interface_L_iph.gas_energy_specific - + lambda_max * (0.5 * dtodx) * + (interface_slope.gas_energy_specific - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * ge_6); + interface_R_imh.gas_energy_specific = + interface_R_imh.gas_energy_specific - + lambda_min * (0.5 * dtodx) * + (interface_slope.gas_energy_specific + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * ge_6); +#endif // DE + +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + interface_L_iph.scalar_specific[i] = + interface_L_iph.scalar_specific[i] - + lambda_max * (0.5 * dtodx) * + (interface_slope.scalar_specific[i] - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * scalar_6[i]); + interface_R_imh.scalar_specific[i] = + interface_R_imh.scalar_specific[i] - + lambda_min * (0.5 * dtodx) * + (interface_slope.scalar_specific[i] + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * scalar_6[i]); + } +#endif // SCALAR + + // Step 10 - Perform the characteristic tracing + // Stone Eqns 57 - 60 + + // left-hand interface value, i+1/2 + Real sum_1 = 0, sum_2 = 0, sum_3 = 0, sum_4 = 0, sum_5 = 0; +#ifdef DE + Real sum_ge = 0; + Real chi_ge = 0; +#endif // DE +#ifdef SCALAR + Real chi_scalar[NSCALARS]; + Real sum_scalar[NSCALARS]; + for (Real &val : sum_scalar) { + val = 0; + } +#endif // SCALAR + + if (lambda_m >= 0) { + Real const A = (0.5 * dtodx) * (lambda_p - lambda_m); + Real const B = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_p * lambda_p - lambda_m * lambda_m); + + Real const chi_1 = A * (interface_slope.density - d_6) + B * d_6; + Real const chi_2 = A * (interface_slope.velocity.x() - vx_6) + B * vx_6; + Real const chi_3 = A * (interface_slope.velocity.y() - vy_6) + B * vy_6; + Real const chi_4 = A * (interface_slope.velocity.z() - vz_6) + B * vz_6; + Real const chi_5 = A * (interface_slope.pressure - p_6) + B * p_6; + + sum_1 += -0.5 * (cell_i.density * chi_2 / sound_speed - chi_5 / (sound_speed * sound_speed)); + sum_2 += 0.5 * (chi_2 - chi_5 / (sound_speed * cell_i.density)); + sum_5 += -0.5 * (cell_i.density * chi_2 * sound_speed - chi_5); + } + if (lambda_0 >= 0) { + Real const A = (0.5 * dtodx) * (lambda_p - lambda_0); + Real const B = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_p * lambda_p - lambda_0 * lambda_0); + + Real const chi_1 = A * (interface_slope.density - d_6) + B * d_6; + Real const chi_2 = A * (interface_slope.velocity.x() - vx_6) + B * vx_6; + Real const chi_3 = A * (interface_slope.velocity.y() - vy_6) + B * vy_6; + Real const chi_4 = A * (interface_slope.velocity.z() - vz_6) + B * vz_6; + Real const chi_5 = A * (interface_slope.pressure - p_6) + B * p_6; +#ifdef DE + chi_ge = A * (interface_slope.gas_energy_specific - ge_6) + B * ge_6; +#endif // DE +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + chi_scalar[i] = A * (interface_slope.scalar_specific[i] - scalar_6[i]) + B * scalar_6[i]; + } +#endif // SCALAR + + sum_1 += chi_1 - chi_5 / (sound_speed * sound_speed); + sum_3 += chi_3; + sum_4 += chi_4; +#ifdef DE + sum_ge += chi_ge; +#endif // DE +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + sum_scalar[i] += chi_scalar[i]; + } +#endif // SCALAR + } + if (lambda_p >= 0) { + Real const A = (0.5 * dtodx) * (lambda_p - lambda_p); + Real const B = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_p * lambda_p - lambda_p * lambda_p); + + Real const chi_1 = A * (interface_slope.density - d_6) + B * d_6; + Real const chi_2 = A * (interface_slope.velocity.x() - vx_6) + B * vx_6; + Real const chi_3 = A * (interface_slope.velocity.y() - vy_6) + B * vy_6; + Real const chi_4 = A * (interface_slope.velocity.z() - vz_6) + B * vz_6; + Real const chi_5 = A * (interface_slope.pressure - p_6) + B * p_6; + + sum_1 += 0.5 * (cell_i.density * chi_2 / sound_speed + chi_5 / (sound_speed * sound_speed)); + sum_2 += 0.5 * (chi_2 + chi_5 / (sound_speed * cell_i.density)); + sum_5 += 0.5 * (cell_i.density * chi_2 * sound_speed + chi_5); + } + + // add the corrections to the initial guesses for the interface values + interface_L_iph.density += sum_1; + interface_L_iph.velocity.x() += sum_2; + interface_L_iph.velocity.y() += sum_3; + interface_L_iph.velocity.z() += sum_4; + interface_L_iph.pressure += sum_5; +#ifdef DE + interface_L_iph.gas_energy_specific += sum_ge; +#endif // DE +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + interface_L_iph.scalar_specific[i] += sum_scalar[i]; + } +#endif // SCALAR + + // right-hand interface value, i-1/2 + sum_1 = 0; + sum_2 = 0; + sum_3 = 0; + sum_4 = 0; + sum_5 = 0; +#ifdef DE + sum_ge = 0; +#endif // DE +#ifdef SCALAR + for (Real &val : sum_scalar) { + val = 0; + } +#endif // SCALAR + if (lambda_m <= 0) { + Real const C = (0.5 * dtodx) * (lambda_m - lambda_m); + Real const D = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_m * lambda_m - lambda_m * lambda_m); + + Real const chi_1 = C * (interface_slope.density + d_6) + D * d_6; + Real const chi_2 = C * (interface_slope.velocity.x() + vx_6) + D * vx_6; + Real const chi_3 = C * (interface_slope.velocity.y() + vy_6) + D * vy_6; + Real const chi_4 = C * (interface_slope.velocity.z() + vz_6) + D * vz_6; + Real const chi_5 = C * (interface_slope.pressure + p_6) + D * p_6; + + sum_1 += -0.5 * (cell_i.density * chi_2 / sound_speed - chi_5 / (sound_speed * sound_speed)); + sum_2 += 0.5 * (chi_2 - chi_5 / (sound_speed * cell_i.density)); + sum_5 += -0.5 * (cell_i.density * chi_2 * sound_speed - chi_5); + } + if (lambda_0 <= 0) { + Real const C = (0.5 * dtodx) * (lambda_m - lambda_0); + Real const D = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_m * lambda_m - lambda_0 * lambda_0); + + Real const chi_1 = C * (interface_slope.density + d_6) + D * d_6; + Real const chi_2 = C * (interface_slope.velocity.x() + vx_6) + D * vx_6; + Real const chi_3 = C * (interface_slope.velocity.y() + vy_6) + D * vy_6; + Real const chi_4 = C * (interface_slope.velocity.z() + vz_6) + D * vz_6; + Real const chi_5 = C * (interface_slope.pressure + p_6) + D * p_6; +#ifdef DE + chi_ge = C * (interface_slope.gas_energy_specific + ge_6) + D * ge_6; +#endif // DE +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + chi_scalar[i] = C * (interface_slope.scalar_specific[i] + scalar_6[i]) + D * scalar_6[i]; + } +#endif // SCALAR + + sum_1 += chi_1 - chi_5 / (sound_speed * sound_speed); + sum_3 += chi_3; + sum_4 += chi_4; +#ifdef DE + sum_ge += chi_ge; +#endif // DE +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + sum_scalar[i] += chi_scalar[i]; + } +#endif // SCALAR + } + if (lambda_p <= 0) { + Real const C = (0.5 * dtodx) * (lambda_m - lambda_p); + Real const D = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_m * lambda_m - lambda_p * lambda_p); + + Real const chi_1 = C * (interface_slope.density + d_6) + D * d_6; + Real const chi_2 = C * (interface_slope.velocity.x() + vx_6) + D * vx_6; + Real const chi_3 = C * (interface_slope.velocity.y() + vy_6) + D * vy_6; + Real const chi_4 = C * (interface_slope.velocity.z() + vz_6) + D * vz_6; + Real const chi_5 = C * (interface_slope.pressure + p_6) + D * p_6; + + sum_1 += 0.5 * (cell_i.density * chi_2 / sound_speed + chi_5 / (sound_speed * sound_speed)); + sum_2 += 0.5 * (chi_2 + chi_5 / (sound_speed * cell_i.density)); + sum_5 += 0.5 * (cell_i.density * chi_2 * sound_speed + chi_5); + } + + // add the corrections + interface_R_imh.density += sum_1; + interface_R_imh.velocity.x() += sum_2; + interface_R_imh.velocity.y() += sum_3; + interface_R_imh.velocity.z() += sum_4; + interface_R_imh.pressure += sum_5; +#ifdef DE + interface_R_imh.gas_energy_specific += sum_ge; +#endif // DE +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + interface_R_imh.scalar_specific[i] += sum_scalar[i]; + } +#endif // SCALAR + + // This is the end of the characteristic tracing +} // ===================================================================================================================== template __global__ __launch_bounds__(TPB) void PPM_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx,