Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rely on deep copies in SplineBuilder #511

Closed
wants to merge 21 commits into from
131 changes: 103 additions & 28 deletions include/ddc/kernels/splines/spline_builder.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -184,12 +184,32 @@ class SplineBuilder

double m_dx; // average cell size for normalization of derivatives

std::pair<Kokkos::LayoutStride, Kokkos::LayoutStride> m_spline_tr_src_layouts;

// interpolator specific
std::unique_ptr<ddc::detail::SplinesLinearProblem<exec_space>> matrix;

/// Calculate offset so that the matrix is diagonally dominant
int compute_offset(interpolation_domain_type const& interpolation_domain);

/// Build a LayoutStride to be used by SplineBuilder::operator() for internal transposition
std::pair<Kokkos::LayoutStride, Kokkos::LayoutStride> build_spline_tr_src_layouts();

Kokkos::LayoutStride spline_tr_src_layout([[maybe_unused]] Kokkos::LayoutLeft layout) const
{
return std::get<0>(m_spline_tr_src_layouts);
}

Kokkos::LayoutStride spline_tr_src_layout([[maybe_unused]] Kokkos::LayoutRight layout) const
{
return std::get<1>(m_spline_tr_src_layouts);
}

Kokkos::LayoutStride spline_tr_src_layout(Kokkos::LayoutStride layout) const
{
return layout;
}

public:
/**
* @brief Build a SplineBuilder acting on batched_interpolation_domain.
Expand All @@ -215,6 +235,7 @@ class SplineBuilder
, m_offset(compute_offset(interpolation_domain()))
, m_dx((ddc::discrete_space<BSplines>().rmax() - ddc::discrete_space<BSplines>().rmin())
/ ddc::discrete_space<BSplines>().ncells())
, m_spline_tr_src_layouts(build_spline_tr_src_layouts())
{
static_assert(
((BcXmin == BoundCond::PERIODIC) == (BcXmax == BoundCond::PERIODIC)),
Expand Down Expand Up @@ -433,6 +454,50 @@ class SplineBuilder
void build_matrix_system();
};

template <
class ExecSpace,
class MemorySpace,
class BSplines,
class InterpolationMesh,
ddc::BoundCond BcXmin,
ddc::BoundCond BcXmax,
SplineSolver Solver,
class... IDimX>
std::pair<Kokkos::LayoutStride, Kokkos::LayoutStride> SplineBuilder<
ExecSpace,
MemorySpace,
BSplines,
InterpolationMesh,
BcXmin,
BcXmax,
Solver,
IDimX...>::build_spline_tr_src_layouts()
{
// Build the domain on which spline_tr_src is defined
batched_spline_domain_type spline_tr_src_domain(batched_spline_tr_domain());

std::pair<Kokkos::LayoutStride, Kokkos::LayoutStride> layouts;

// Convert LayoutRight and LayoutLeft to LayoutStride
std::array<std::size_t, sizeof...(IDimX)> dims_order_left {};
std::array<std::size_t, sizeof...(IDimX)> dims_order_right {};
for (int i = 0; i < sizeof...(IDimX); ++i) {
dims_order_left[i] = i;
dims_order_right[i] = sizeof...(IDimX) - i - 1;
}
std::array<std::size_t, sizeof...(IDimX)> extents {
static_cast<std::size_t>(spline_tr_src_domain.template extent<std::conditional_t<
std::is_same_v<IDimX, interpolation_mesh_type>,
bsplines_type,
IDimX>>())...};
std::get<0>(layouts) = Kokkos::LayoutStride::
order_dimensions(sizeof...(IDimX), dims_order_left.data(), extents.data());
std::get<1>(layouts) = Kokkos::LayoutStride::
order_dimensions(sizeof...(IDimX), dims_order_right.data(), extents.data());

return layouts;
}

template <
class ExecSpace,
class MemorySpace,
Expand Down Expand Up @@ -798,8 +863,6 @@ operator()(
.allocation_kokkos_view(),
vals.allocation_kokkos_view());



// Hermite boundary conditions at xmax, if any
// NOTE: For consistency with the linear system, the i-th derivative
// provided by the user must be multiplied by dx^i
Expand All @@ -822,42 +885,54 @@ operator()(
});
}

// Allocate and fill a transposed version of spline in order to get dimension of interest as last dimension (optimal for GPU, necessary for Ginkgo). Also select only relevant rows in case of periodic boundaries
auto const& offset_proxy = m_offset;
// Allocate a Chunk to receive a transposed version of spline in order to get dimension of interest
// as last dimension (optimal for GPU, necessary for Ginkgo)
ddc::Chunk spline_tr_alloc(
batched_spline_tr_domain(),
ddc::KokkosAllocator<double, memory_space>());
ddc::ChunkSpan spline_tr = spline_tr_alloc.span_view();
ddc::parallel_for_each(
"ddc_splines_transpose_rhs",
exec_space(),
batch_domain(),
KOKKOS_LAMBDA(typename batch_domain_type::discrete_element_type const j) {
for (std::size_t i = 0; i < nbasis_proxy; i++) {
spline_tr(ddc::DiscreteElement<bsplines_type>(i), j)
= spline(ddc::DiscreteElement<bsplines_type>(i + offset_proxy), j);
}
});
// Create a 2D Kokkos::View to manage spline_tr as a matrix
Kokkos::View<double**, Kokkos::LayoutRight, exec_space> bcoef_section(

// Select the source ChunkSpan to copy, ignore m_offset rows which should not be passed to linear solver
ddc::ChunkSpan spline_tr_src = spline[ddc::DiscreteDomain<bsplines_type>(
ddc::DiscreteElement<bsplines_type>(m_offset),
ddc::DiscreteVector<bsplines_type>(ddc::discrete_space<bsplines_type>().nbasis()))];

// Define a Kokkos::View to represent a dimensions-reordered version of spline_tr_src and permit the deep copies
// between spline_tr_src and spline_tr
Kokkos::View<
ddc::detail::mdspan_to_kokkos_element_t<double, sizeof...(IDimX)>,
Kokkos::LayoutStride,
exec_space>
spline_tr_src_view(
spline_tr_src.data_handle(),
spline_tr_src_layout(spline_tr_src.allocation_kokkos_view().layout()));
// Swap extents and strides
Kokkos::LayoutStride layout = spline_tr_src_view.layout();
int const index
= ddc::type_seq_rank_v<bsplines_type, ddc::to_type_seq_t<batched_spline_domain_type>>;
std::rotate(layout.dimension, layout.dimension + index, layout.dimension + index + 1);
std::rotate(layout.stride, layout.stride + index, layout.stride + index + 1);
spline_tr_src_view = Kokkos::View<
ddc::detail::mdspan_to_kokkos_element_t<double, sizeof...(IDimX)>,
Kokkos::LayoutStride,
exec_space>(spline_tr_src_view.data(), layout);

// Transpose spline_tr_src_view into spline_tr
Kokkos::deep_copy(spline_tr.allocation_kokkos_view(), spline_tr_src_view);

// Create a 2D Kokkos::View to see spline_tr as a matrix
Kokkos::View<double**, Kokkos::LayoutRight, exec_space> spline_tr_view(
spline_tr.data_handle(),
ddc::discrete_space<bsplines_type>().nbasis(),
batch_domain().size());
// Compute spline coef
matrix->solve(bcoef_section);
// Transpose back spline_tr into spline.
ddc::parallel_for_each(
"ddc_splines_transpose_back_rhs",
exec_space(),
batch_domain(),
KOKKOS_LAMBDA(typename batch_domain_type::discrete_element_type const j) {
for (std::size_t i = 0; i < nbasis_proxy; i++) {
spline(ddc::DiscreteElement<bsplines_type>(i + offset_proxy), j)
= spline_tr(ddc::DiscreteElement<bsplines_type>(i), j);
}
});
matrix->solve(spline_tr_view);

// Transpose back spline_tr into spline_tr_src_view
Kokkos::deep_copy(spline_tr_src_view, spline_tr.allocation_kokkos_view());

// Duplicate the lower spline coefficients to the upper side in case of periodic boundaries
auto const& offset_proxy = m_offset;
if (bsplines_type::is_periodic()) {
ddc::parallel_for_each(
"ddc_splines_periodic_rows_duplicate_rhs",
Expand Down