Skip to content

Commit

Permalink
Fixed performance issues on GPUs
Browse files Browse the repository at this point in the history
- Inline functions
- Fixed thread ordering on quadrature points
  • Loading branch information
Rohit-Kakodkar committed Oct 4, 2024
1 parent 42a4240 commit 36637f9
Show file tree
Hide file tree
Showing 12 changed files with 182 additions and 160 deletions.
2 changes: 1 addition & 1 deletion include/algorithms/divergence.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ namespace algorithms {
template <typename MemberType, typename IteratorType, typename VectorFieldType,
typename QuadratureType, typename CallableType,
std::enable_if_t<(VectorFieldType::isChunkViewType), int> = 0>
NOINLINE KOKKOS_FUNCTION void divergence(
KOKKOS_FORCEINLINE_FUNCTION void divergence(
const MemberType &team, const IteratorType &iterator,
const specfem::compute::partial_derivatives &partial_derivatives,
const Kokkos::View<type_real *,
Expand Down
33 changes: 16 additions & 17 deletions include/algorithms/gradient.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ namespace algorithms {
template <typename MemberType, typename IteratorType, typename ViewType,
typename QuadratureType, typename CallbackFunctor,
std::enable_if_t<ViewType::isChunkViewType, int> = 0>
NOINLINE KOKKOS_FUNCTION void
KOKKOS_FORCEINLINE_FUNCTION void
gradient(const MemberType &team, const IteratorType &iterator,
const specfem::compute::partial_derivatives &partial_derivatives,
const QuadratureType &quadrature, const ViewType &f,
Expand Down Expand Up @@ -78,10 +78,10 @@ gradient(const MemberType &team, const IteratorType &iterator,
Kokkos::parallel_for(
Kokkos::TeamThreadRange(team, iterator.chunk_size()), [&](const int &i) {
const auto iterator_index = iterator(i);
const auto index = iterator_index.index;
const int ielement = iterator_index.ielement;
const int ix = index.ix;
const int iz = index.iz;
const auto &index = iterator_index.index;
const int &ielement = iterator_index.ielement;
const int &ix = index.ix;
const int &iz = index.iz;

datatype df_dxi[components] = { 0.0 };
datatype df_dgamma[components] = { 0.0 };
Expand Down Expand Up @@ -147,7 +147,7 @@ gradient(const MemberType &team, const IteratorType &iterator,
template <typename MemberType, typename IteratorType, typename ViewType,
typename QuadratureType, typename CallbackFunctor,
std::enable_if_t<ViewType::isChunkViewType, int> = 0>
NOINLINE KOKKOS_FUNCTION void
KOKKOS_FORCEINLINE_FUNCTION void
gradient(const MemberType &team, const IteratorType &iterator,
const specfem::compute::partial_derivatives &partial_derivatives,
const QuadratureType &quadrature, const ViewType &f, const ViewType &g,
Expand Down Expand Up @@ -188,10 +188,10 @@ gradient(const MemberType &team, const IteratorType &iterator,
Kokkos::parallel_for(
Kokkos::TeamThreadRange(team, iterator.chunk_size()), [=](const int &i) {
const auto iterator_index = iterator(i);
const auto index = iterator_index.index;
const int ielement = iterator_index.ielement;
const int ix = index.ix;
const int iz = index.iz;
const auto &index = iterator_index.index;
const int &ielement = iterator_index.ielement;
const int &ix = index.ix;
const int &iz = index.iz;

datatype df_dxi[components];
datatype df_dgamma[components];
Expand All @@ -210,13 +210,12 @@ gradient(const MemberType &team, const IteratorType &iterator,
}
}

const auto point_partial_derivatives = [&]() {
specfem::point::partial_derivatives<specfem::dimension::type::dim2,
false, using_simd>
result;
specfem::compute::load_on_device(index, partial_derivatives, result);
return result;
}();
specfem::point::partial_derivatives<specfem::dimension::type::dim2,
false, using_simd>
point_partial_derivatives;

specfem::compute::load_on_device(index, partial_derivatives,
point_partial_derivatives);

VectorPointViewType df;

Expand Down
2 changes: 1 addition & 1 deletion include/compute/boundaries/boundaries.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ template <typename IndexType, typename PointBoundaryType,
typename std::enable_if<PointBoundaryType::simd::using_simd ==
IndexType::using_simd,
int>::type = 0>
KOKKOS_INLINE_FUNCTION void
KOKKOS_FORCEINLINE_FUNCTION void
load_on_device(const IndexType &index,
const specfem::compute::boundaries &boundaries,
PointBoundaryType &boundary) {
Expand Down
8 changes: 4 additions & 4 deletions include/compute/boundaries/impl/acoustic_free_surface.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ struct acoustic_free_surface {
const Kokkos::View<int *, Kokkos::HostSpace> &boundary_index_mapping,
std::vector<specfem::element::boundary_tag_container> &boundary_tag);

KOKKOS_INLINE_FUNCTION void
KOKKOS_FORCEINLINE_FUNCTION void
load_on_device(const specfem::point::index<dimension> &index,
specfem::point::boundary<boundary_tag, dimension, false>
&boundary) const {
Expand All @@ -55,7 +55,7 @@ struct acoustic_free_surface {
return;
}

KOKKOS_INLINE_FUNCTION void
KOKKOS_FORCEINLINE_FUNCTION void
load_on_device(const specfem::point::index<dimension> &index,
specfem::point::boundary<
specfem::element::boundary_tag::composite_stacey_dirichlet,
Expand All @@ -66,7 +66,7 @@ struct acoustic_free_surface {
return;
}

KOKKOS_INLINE_FUNCTION void load_on_device(
KOKKOS_FORCEINLINE_FUNCTION void load_on_device(
const specfem::point::simd_index<dimension> &index,
specfem::point::boundary<boundary_tag, dimension, true> &boundary) const {

Expand All @@ -87,7 +87,7 @@ struct acoustic_free_surface {
return;
}

KOKKOS_INLINE_FUNCTION void
KOKKOS_FORCEINLINE_FUNCTION void
load_on_device(const specfem::point::simd_index<dimension> &index,
specfem::point::boundary<
specfem::element::boundary_tag::composite_stacey_dirichlet,
Expand Down
8 changes: 4 additions & 4 deletions include/compute/boundaries/impl/stacey.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ struct stacey {
const Kokkos::View<int *, Kokkos::HostSpace> &boundary_index_mapping,
std::vector<specfem::element::boundary_tag_container> &boundary_tag);

KOKKOS_INLINE_FUNCTION void
KOKKOS_FORCEINLINE_FUNCTION void
load_on_device(const specfem::point::index<dimension> &index,
specfem::point::boundary<boundary_tag, dimension, false>
&boundary) const {
Expand All @@ -75,7 +75,7 @@ struct stacey {
return;
}

KOKKOS_INLINE_FUNCTION void
KOKKOS_FORCEINLINE_FUNCTION void
load_on_device(const specfem::point::index<dimension> &index,
specfem::point::boundary<
specfem::element::boundary_tag::composite_stacey_dirichlet,
Expand All @@ -91,7 +91,7 @@ struct stacey {
return;
}

KOKKOS_INLINE_FUNCTION void load_on_device(
KOKKOS_FORCEINLINE_FUNCTION void load_on_device(
const specfem::point::simd_index<dimension> &index,
specfem::point::boundary<boundary_tag, dimension, true> &boundary) const {

Expand Down Expand Up @@ -121,7 +121,7 @@ struct stacey {
.copy_from(&edge_weight(index.ispec, index.iz, index.ix), tag_type());
}

KOKKOS_INLINE_FUNCTION void
KOKKOS_FORCEINLINE_FUNCTION void
load_on_device(const specfem::point::simd_index<dimension> &index,
specfem::point::boundary<
specfem::element::boundary_tag::composite_stacey_dirichlet,
Expand Down
35 changes: 19 additions & 16 deletions include/compute/compute_partial_derivatives.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ struct partial_derivatives {
template <typename PointPartialDerivativesType,
typename std::enable_if_t<
PointPartialDerivativesType::simd::using_simd, int> = 0>
NOINLINE KOKKOS_FUNCTION void impl_load_on_device(
KOKKOS_FORCEINLINE_FUNCTION void impl_load_on_device(
const specfem::point::simd_index<PointPartialDerivativesType::dimension>
&index,
const specfem::compute::partial_derivatives &derivatives,
Expand Down Expand Up @@ -111,7 +111,7 @@ NOINLINE KOKKOS_FUNCTION void impl_load_on_device(
template <typename PointPartialDerivativesType,
typename std::enable_if_t<
!PointPartialDerivativesType::simd::using_simd, int> = 0>
NOINLINE KOKKOS_FUNCTION void impl_load_on_device(
KOKKOS_FORCEINLINE_FUNCTION void impl_load_on_device(
const specfem::point::index<PointPartialDerivativesType::dimension> &index,
const specfem::compute::partial_derivatives &derivatives,
PointPartialDerivativesType &partial_derivatives) {
Expand All @@ -135,10 +135,11 @@ NOINLINE KOKKOS_FUNCTION void impl_load_on_device(
template <typename PointPointPartialDerivativesType,
typename std::enable_if_t<
PointPointPartialDerivativesType::simd::using_simd, int> = 0>
void impl_load_on_host(const specfem::point::simd_index<
PointPointPartialDerivativesType::dimension> &index,
const specfem::compute::partial_derivatives &derivatives,
PointPointPartialDerivativesType &partial_derivatives) {
inline void
impl_load_on_host(const specfem::point::simd_index<
PointPointPartialDerivativesType::dimension> &index,
const specfem::compute::partial_derivatives &derivatives,
PointPointPartialDerivativesType &partial_derivatives) {

const int ispec = index.ispec;
const int nspec = derivatives.nspec;
Expand Down Expand Up @@ -171,7 +172,7 @@ void impl_load_on_host(const specfem::point::simd_index<
template <typename PointPartialDerivativesType,
typename std::enable_if_t<
!PointPartialDerivativesType::simd::using_simd, int> = 0>
void impl_load_on_host(
inline void impl_load_on_host(
const specfem::point::index<PointPartialDerivativesType::dimension> &index,
const specfem::compute::partial_derivatives &derivatives,
PointPartialDerivativesType &partial_derivatives) {
Expand All @@ -195,7 +196,7 @@ void impl_load_on_host(
template <typename PointPartialDerivativesType,
typename std::enable_if_t<
PointPartialDerivativesType::simd::using_simd, int> = 0>
void impl_store_on_host(
inline void impl_store_on_host(
const specfem::point::simd_index<PointPartialDerivativesType::dimension>
&index,
const specfem::compute::partial_derivatives &derivatives,
Expand Down Expand Up @@ -232,7 +233,7 @@ void impl_store_on_host(
template <typename PointPartialDerivativesType,
typename std::enable_if_t<
!PointPartialDerivativesType::simd::using_simd, int> = 0>
void impl_store_on_host(
inline void impl_store_on_host(
const specfem::point::index<PointPartialDerivativesType::dimension> &index,
const specfem::compute::partial_derivatives &derivatives,
const PointPartialDerivativesType &partial_derivatives) {
Expand Down Expand Up @@ -271,7 +272,7 @@ template <
typename std::enable_if_t<IndexType::using_simd ==
PointPartialDerivativesType::simd::using_simd,
int> = 0>
KOKKOS_INLINE_FUNCTION void
KOKKOS_FORCEINLINE_FUNCTION void
load_on_device(const IndexType &index,
const specfem::compute::partial_derivatives &derivatives,
PointPartialDerivativesType &partial_derivatives) {
Expand All @@ -297,9 +298,10 @@ template <
typename std::enable_if_t<IndexType::using_simd ==
PointPartialDerivativesType::simd::using_simd,
int> = 0>
void load_on_host(const IndexType &index,
const specfem::compute::partial_derivatives &derivatives,
PointPartialDerivativesType &partial_derivatives) {
inline void
load_on_host(const IndexType &index,
const specfem::compute::partial_derivatives &derivatives,
PointPartialDerivativesType &partial_derivatives) {
impl_load_on_host(index, derivatives, partial_derivatives);
}

Expand All @@ -322,9 +324,10 @@ template <
typename std::enable_if_t<IndexType::using_simd ==
PointPartialDerivativesType::simd::using_simd,
int> = 0>
void store_on_host(const IndexType &index,
const specfem::compute::partial_derivatives &derivatives,
const PointPartialDerivativesType &partial_derivatives) {
inline void
store_on_host(const IndexType &index,
const specfem::compute::partial_derivatives &derivatives,
const PointPartialDerivativesType &partial_derivatives) {
impl_store_on_host(index, derivatives, partial_derivatives);
}
} // namespace compute
Expand Down
Loading

0 comments on commit 36637f9

Please sign in to comment.