Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Main PR] Performance optimizations #488

Open
wants to merge 12 commits into
base: devel
Choose a base branch
from
2 changes: 1 addition & 1 deletion include/algorithms/divergence.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ namespace algorithms {
template <typename MemberType, typename IteratorType, typename VectorFieldType,
typename QuadratureType, typename CallableType,
std::enable_if_t<(VectorFieldType::isChunkViewType), int> = 0>
KOKKOS_FORCEINLINE_FUNCTION void divergence(
NOINLINE KOKKOS_FUNCTION void divergence(
const MemberType &team, const IteratorType &iterator,
const specfem::compute::partial_derivatives &partial_derivatives,
const Kokkos::View<type_real *,
Expand Down
4 changes: 2 additions & 2 deletions include/algorithms/gradient.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ namespace algorithms {
template <typename MemberType, typename IteratorType, typename ViewType,
typename QuadratureType, typename CallbackFunctor,
std::enable_if_t<ViewType::isChunkViewType, int> = 0>
KOKKOS_FORCEINLINE_FUNCTION void
NOINLINE KOKKOS_FUNCTION void
gradient(const MemberType &team, const IteratorType &iterator,
const specfem::compute::partial_derivatives &partial_derivatives,
const QuadratureType &quadrature, const ViewType &f,
Expand Down Expand Up @@ -147,7 +147,7 @@ gradient(const MemberType &team, const IteratorType &iterator,
template <typename MemberType, typename IteratorType, typename ViewType,
typename QuadratureType, typename CallbackFunctor,
std::enable_if_t<ViewType::isChunkViewType, int> = 0>
KOKKOS_FORCEINLINE_FUNCTION void
NOINLINE KOKKOS_FUNCTION void
gradient(const MemberType &team, const IteratorType &iterator,
const specfem::compute::partial_derivatives &partial_derivatives,
const QuadratureType &quadrature, const ViewType &f, const ViewType &g,
Expand Down
8 changes: 4 additions & 4 deletions include/compute/fields/data_access.tpp
Original file line number Diff line number Diff line change
Expand Up @@ -1690,7 +1690,7 @@ template <
typename WavefieldType, typename ViewType,
typename std::enable_if_t<
ViewType::isPointFieldType && !ViewType::simd::using_simd, int> = 0>
KOKKOS_FORCEINLINE_FUNCTION void impl_atomic_add_on_device(
NOINLINE KOKKOS_FUNCTION void impl_atomic_add_on_device(
const specfem::point::index<ViewType::dimension> &index,
const ViewType &point_field, const WavefieldType &field) {

Expand All @@ -1707,7 +1707,7 @@ template <
typename WavefieldType, typename ViewType,
typename std::enable_if_t<
ViewType::isPointFieldType && ViewType::simd::using_simd, int> = 0>
KOKKOS_FORCEINLINE_FUNCTION void impl_atomic_add_on_device(
NOINLINE KOKKOS_FUNCTION void impl_atomic_add_on_device(
const specfem::point::simd_index<ViewType::dimension> &index,
const ViewType &point_field, const WavefieldType &field) {

Expand Down Expand Up @@ -1902,7 +1902,7 @@ template <
typename ViewType,
typename std::enable_if_t<
ViewType::isChunkFieldType && !ViewType::simd::using_simd, int> = 0>
KOKKOS_FORCEINLINE_FUNCTION void
NOINLINE KOKKOS_FUNCTION void
impl_load_on_device(const MemberType &team, const IteratorType &iterator,
const WavefieldType &field, ViewType &chunk_field) {

Expand Down Expand Up @@ -1980,7 +1980,7 @@ template <
typename ViewType,
typename std::enable_if_t<
ViewType::isChunkFieldType && ViewType::simd::using_simd, int> = 0>
KOKKOS_FORCEINLINE_FUNCTION void
NOINLINE KOKKOS_FUNCTION void
impl_load_on_device(const MemberType &team, const IteratorType &iterator,
const WavefieldType &field, ViewType &chunk_field) {

Expand Down
37 changes: 23 additions & 14 deletions include/compute/fields/impl/field_impl.tpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
#ifndef _COMPUTE_FIELDS_IMPL_FIELD_IMPL_TPP_
#define _COMPUTE_FIELDS_IMPL_FIELD_IMPL_TPP_

#include "compute/fields/impl/field_impl.hpp"
#include "compute/element_types/element_types.hpp"
#include "compute/fields/impl/field_impl.hpp"
#include "parallel_configuration/chunk_config.hpp"
#include "kokkos_abstractions.h"
#include <Kokkos_Core.hpp>

Expand Down Expand Up @@ -35,19 +36,27 @@ specfem::compute::impl::field_impl<DimensionType, MediumTag>::field_impl(

// Count the total number of distinct global indices for the medium
int count = 0;

for (int ix = 0; ix < ngllx; ++ix) {
for (int iz = 0; iz < ngllz; ++iz) {
for (int ispec = 0; ispec < nspec; ++ispec) {
const auto medium = element_types.get_medium_tag(ispec);
if (medium == MediumTag) {
const int index = index_mapping(ispec, iz, ix); // get global index
// increase the count only if the global index is not already counted
/// static_cast<int>(medium::value) is the index of the medium in the
/// enum class
if (assembly_index_mapping(index) == -1) {
assembly_index_mapping(index) = count;
count++;
using simd = specfem::datatype::simd<type_real, true>;

constexpr int chunk_size = specfem::parallel_config::storage_chunk_size;
int nchunks = nspec / chunk_size;
int iloc = 0;
for (int ichunk = 0; ichunk < nchunks; ichunk++) {
for (int iz = 0; iz < ngllz; iz++) {
for (int ix = 0; ix < ngllx; ix++) {
for (int ielement = 0; ielement < chunk_size; ielement++) {
int ispec = ichunk * chunk_size + ielement;
const auto medium = element_types.get_medium_tag(ispec);
if (medium == MediumTag) {
const int index = index_mapping(ispec, iz, ix); // get global index
// increase the count only if the global index is not already
// counted
/// static_cast<int>(medium::value) is the index of the medium in
/// the enum class
if (assembly_index_mapping(index) == -1) {
assembly_index_mapping(index) = count;
count++;
}
}
}
}
Expand Down
Loading