Skip to content

Commit

Permalink
Merge pull request #487 from PrincetonUniversity/issue-481
Browse files Browse the repository at this point in the history
Global index mapping reordering
  • Loading branch information
Rohit-Kakodkar authored Feb 25, 2025
2 parents 8bb074b + 1ff5ffb commit 3176d35
Show file tree
Hide file tree
Showing 4 changed files with 233 additions and 190 deletions.
37 changes: 23 additions & 14 deletions include/compute/fields/impl/field_impl.tpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
#ifndef _COMPUTE_FIELDS_IMPL_FIELD_IMPL_TPP_
#define _COMPUTE_FIELDS_IMPL_FIELD_IMPL_TPP_

#include "compute/fields/impl/field_impl.hpp"
#include "compute/element_types/element_types.hpp"
#include "compute/fields/impl/field_impl.hpp"
#include "parallel_configuration/chunk_config.hpp"
#include "kokkos_abstractions.h"
#include <Kokkos_Core.hpp>

Expand Down Expand Up @@ -35,19 +36,27 @@ specfem::compute::impl::field_impl<DimensionType, MediumTag>::field_impl(

// Count the total number of distinct global indices for the medium
int count = 0;

for (int ix = 0; ix < ngllx; ++ix) {
for (int iz = 0; iz < ngllz; ++iz) {
for (int ispec = 0; ispec < nspec; ++ispec) {
const auto medium = element_types.get_medium_tag(ispec);
if (medium == MediumTag) {
const int index = index_mapping(ispec, iz, ix); // get global index
// increase the count only if the global index is not already counted
/// static_cast<int>(medium::value) is the index of the medium in the
/// enum class
if (assembly_index_mapping(index) == -1) {
assembly_index_mapping(index) = count;
count++;
using simd = specfem::datatype::simd<type_real, true>;

constexpr int chunk_size = specfem::parallel_config::storage_chunk_size;
int nchunks = nspec / chunk_size;
int iloc = 0;
for (int ichunk = 0; ichunk < nchunks; ichunk++) {
for (int iz = 0; iz < ngllz; iz++) {
for (int ix = 0; ix < ngllx; ix++) {
for (int ielement = 0; ielement < chunk_size; ielement++) {
int ispec = ichunk * chunk_size + ielement;
const auto medium = element_types.get_medium_tag(ispec);
if (medium == MediumTag) {
const int index = index_mapping(ispec, iz, ix); // get global index
// increase the count only if the global index is not already
// counted
/// static_cast<int>(medium::value) is the index of the medium in
/// the enum class
if (assembly_index_mapping(index) == -1) {
assembly_index_mapping(index) = count;
count++;
}
}
}
}
Expand Down
278 changes: 140 additions & 138 deletions include/kokkos_kernels/impl/compute_stiffness_interaction.tpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,151 +105,153 @@ void specfem::kokkos_kernels::impl::compute_stiffness_interaction(
constexpr int simd_size = simd::size();

if constexpr (BoundaryTag == specfem::element::boundary_tag::stacey &&
WavefieldType == specfem::wavefield::simulation_field::backward) {

Kokkos::parallel_for(
"specfem::domain::impl::kernels::elements::compute_stiffness_"
"interaction",
static_cast<const typename ChunkPolicyType::policy_type &>(chunk_policy),
KOKKOS_LAMBDA(const typename ChunkPolicyType::member_type &team) {
for (int tile = 0; tile < ChunkPolicyType::tile_size * simd_size;
tile += ChunkPolicyType::chunk_size * simd_size) {
const int starting_element_index =
team.league_rank() * ChunkPolicyType::tile_size * simd_size +
tile;

if (starting_element_index >= nelements) {
break;
WavefieldType ==
specfem::wavefield::simulation_field::backward) {

Kokkos::parallel_for(
"specfem::domain::impl::kernels::elements::compute_stiffness_"
"interaction",
static_cast<const typename ChunkPolicyType::policy_type &>(
chunk_policy),
KOKKOS_LAMBDA(const typename ChunkPolicyType::member_type &team) {
for (int tile = 0; tile < ChunkPolicyType::tile_size * simd_size;
tile += ChunkPolicyType::chunk_size * simd_size) {
const int starting_element_index =
team.league_rank() * ChunkPolicyType::tile_size * simd_size +
tile;

if (starting_element_index >= nelements) {
break;
}

const auto iterator =
chunk_policy.league_iterator(starting_element_index);

Kokkos::parallel_for(
Kokkos::TeamThreadRange(team, iterator.chunk_size()),
[&](const int i) {
const auto iterator_index = iterator(i);
const auto index = iterator_index.index;

PointAccelerationType acceleration;
specfem::compute::load_on_device(
istep, index, boundary_values, acceleration);

specfem::compute::atomic_add_on_device(index, acceleration,
field);
});
}
});
} else {

Kokkos::parallel_for(
"specfem::kernels::impl::domain_kernels::compute_stiffness_interaction",
chunk_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_size)),
KOKKOS_LAMBDA(const typename ChunkPolicyType::member_type &team) {
ChunkElementFieldType element_field(team);
ElementQuadratureType element_quadrature(team);
ChunkStressIntegrandType stress_integrand(team);

specfem::compute::load_on_device(team, quadrature,
element_quadrature);
for (int tile = 0; tile < ChunkPolicyType::tile_size * simd_size;
tile += ChunkPolicyType::chunk_size * simd_size) {
const int starting_element_index =
team.league_rank() * ChunkPolicyType::tile_size * simd_size +
tile;

if (starting_element_index >= nelements) {
break;
}

const auto iterator =
chunk_policy.league_iterator(starting_element_index);
specfem::compute::load_on_device(team, iterator, field,
element_field);

team.team_barrier();

specfem::algorithms::gradient(
team, iterator, partial_derivatives,
element_quadrature.hprime_gll, element_field.displacement,
// Compute stresses using the gradients
[&](const typename ChunkPolicyType::iterator_type::index_type
&iterator_index,
const typename PointFieldDerivativesType::ViewType &du) {
const auto &index = iterator_index.index;

PointPartialDerivativesType point_partial_derivatives;
specfem::compute::load_on_device(index, partial_derivatives,
point_partial_derivatives);

PointPropertyType point_property;
specfem::compute::load_on_device(index, properties,
point_property);

PointFieldDerivativesType field_derivatives(du);

const auto point_stress = specfem::medium::compute_stress(
point_property, field_derivatives);

const auto F = point_stress * point_partial_derivatives;

const int &ielement = iterator_index.ielement;

for (int icomponent = 0; icomponent < components;
++icomponent) {
for (int idim = 0; idim < num_dimensions; ++idim) {
stress_integrand.F(ielement, index.iz, index.ix, idim,
icomponent) = F(idim, icomponent);
}
}
});

team.team_barrier();

specfem::algorithms::divergence(
team, iterator, partial_derivatives, wgll,
element_quadrature.hprime_wgll, stress_integrand.F,
[&, istep = istep](
const typename ChunkPolicyType::iterator_type::index_type
&iterator_index,
const typename PointAccelerationType::ViewType &result) {
const auto &index = iterator_index.index;
PointAccelerationType acceleration(result);

for (int icomponent = 0; icomponent < components;
++icomponent) {
acceleration.acceleration(icomponent) *=
static_cast<type_real>(-1.0);
}

const auto iterator =
chunk_policy.league_iterator(starting_element_index);

Kokkos::parallel_for(
Kokkos::TeamThreadRange(team, iterator.chunk_size()),
[&](const int i) {
const auto iterator_index = iterator(i);
const auto index = iterator_index.index;

PointAccelerationType acceleration;
specfem::compute::load_on_device(
istep, index, boundary_values, acceleration);

specfem::compute::atomic_add_on_device(index, acceleration,
field);
});
}
});
}
else {

Kokkos::parallel_for(
"specfem::kernels::impl::domain_kernels::compute_stiffness_interaction",
chunk_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_size)),
KOKKOS_LAMBDA(const typename ChunkPolicyType::member_type &team) {
ChunkElementFieldType element_field(team);
ElementQuadratureType element_quadrature(team);
ChunkStressIntegrandType stress_integrand(team);

specfem::compute::load_on_device(team, quadrature, element_quadrature);
for (int tile = 0; tile < ChunkPolicyType::tile_size * simd_size;
tile += ChunkPolicyType::chunk_size * simd_size) {
const int starting_element_index =
team.league_rank() * ChunkPolicyType::tile_size * simd_size +
tile;

if (starting_element_index >= nelements) {
break;
}

const auto iterator =
chunk_policy.league_iterator(starting_element_index);
specfem::compute::load_on_device(team, iterator, field,
element_field);

team.team_barrier();

specfem::algorithms::gradient(
team, iterator, partial_derivatives,
element_quadrature.hprime_gll, element_field.displacement,
// Compute stresses using the gradients
[&](const typename ChunkPolicyType::iterator_type::index_type
&iterator_index,
const typename PointFieldDerivativesType::ViewType &du) {
const auto &index = iterator_index.index;

PointPartialDerivativesType point_partial_derivatives;
specfem::compute::load_on_device(index, partial_derivatives,
point_partial_derivatives);

PointPropertyType point_property;
specfem::compute::load_on_device(index, properties,
point_property);

PointFieldDerivativesType field_derivatives(du);
PointPropertyType point_property;
specfem::compute::load_on_device(index, properties,
point_property);

const auto point_stress = specfem::medium::compute_stress(
point_property, field_derivatives);
PointVelocityType velocity;
specfem::compute::load_on_device(index, field, velocity);

const auto F = point_stress * point_partial_derivatives;
PointBoundaryType point_boundary;
specfem::compute::load_on_device(index, boundaries,
point_boundary);

const int &ielement = iterator_index.ielement;
specfem::boundary_conditions::apply_boundary_conditions(
point_boundary, point_property, velocity, acceleration);

for (int icomponent = 0; icomponent < components;
++icomponent) {
for (int idim = 0; idim < num_dimensions; ++idim) {
stress_integrand.F(ielement, index.iz, index.ix, idim,
icomponent) = F(idim, icomponent);
// Store forward boundary values for reconstruction during
// adjoint simulations. The function does nothing if the
// boundary tag is not stacey
if (wavefield ==
specfem::wavefield::simulation_field::forward) {
specfem::compute::store_on_device(
istep, index, acceleration, boundary_values);
}
}
});

team.team_barrier();

specfem::algorithms::divergence(
team, iterator, partial_derivatives, wgll,
element_quadrature.hprime_wgll, stress_integrand.F,
[&, istep = istep](
const typename ChunkPolicyType::iterator_type::index_type
&iterator_index,
const typename PointAccelerationType::ViewType &result) {
const auto &index = iterator_index.index;
PointAccelerationType acceleration(result);

for (int icomponent = 0; icomponent < components;
++icomponent) {
acceleration.acceleration(icomponent) *=
static_cast<type_real>(-1.0);
}

PointPropertyType point_property;
specfem::compute::load_on_device(index, properties,
point_property);

PointVelocityType velocity;
specfem::compute::load_on_device(index, field, velocity);

PointBoundaryType point_boundary;
specfem::compute::load_on_device(index, boundaries,
point_boundary);

specfem::boundary_conditions::apply_boundary_conditions(
point_boundary, point_property, velocity, acceleration);

// Store forward boundary values for reconstruction during
// adjoint simulations. The function does nothing if the
// boundary tag is not stacey
if (wavefield ==
specfem::wavefield::simulation_field::forward) {
specfem::compute::store_on_device(istep, index, acceleration,
boundary_values);
}

specfem::compute::atomic_add_on_device(index, acceleration,
field);
});
}
});

specfem::compute::atomic_add_on_device(index, acceleration,
field);
});
}
});
}

Kokkos::fence();
Expand Down
Loading

0 comments on commit 3176d35

Please sign in to comment.