From 09a4c80488c97ba76558e99c001afb5b0f2c30ca Mon Sep 17 00:00:00 2001 From: blegouix Date: Wed, 6 Mar 2024 08:37:31 +0100 Subject: [PATCH 01/28] extract matrices from lapack branch --- include/ddc/kernels/splines.hpp | 7 + include/ddc/kernels/splines/matrix.hpp | 68 ++-- include/ddc/kernels/splines/matrix_banded.hpp | 163 +++++++++ .../kernels/splines/matrix_center_block.hpp | 185 ++++++++++ .../kernels/splines/matrix_corner_block.hpp | 333 ++++++++++++++++++ include/ddc/kernels/splines/matrix_dense.hpp | 123 +++++++ include/ddc/kernels/splines/matrix_maker.hpp | 84 +++++ .../ddc/kernels/splines/matrix_pds_banded.hpp | 190 ++++++++++ .../kernels/splines/matrix_pds_tridiag.hpp | 160 +++++++++ .../splines/matrix_periodic_banded.hpp | 230 ++++++++++++ include/ddc/kernels/splines/matrix_sparse.hpp | 28 +- include/ddc/kernels/splines/view.hpp | 133 ++----- tests/splines/matrix.cpp | 206 ++++++++++- 13 files changed, 1759 insertions(+), 151 deletions(-) create mode 100644 include/ddc/kernels/splines/matrix_banded.hpp create mode 100644 include/ddc/kernels/splines/matrix_center_block.hpp create mode 100644 include/ddc/kernels/splines/matrix_corner_block.hpp create mode 100644 include/ddc/kernels/splines/matrix_dense.hpp create mode 100644 include/ddc/kernels/splines/matrix_pds_banded.hpp create mode 100644 include/ddc/kernels/splines/matrix_pds_tridiag.hpp create mode 100644 include/ddc/kernels/splines/matrix_periodic_banded.hpp diff --git a/include/ddc/kernels/splines.hpp b/include/ddc/kernels/splines.hpp index 5e199752b..9ae8f1f39 100644 --- a/include/ddc/kernels/splines.hpp +++ b/include/ddc/kernels/splines.hpp @@ -9,7 +9,14 @@ #include "splines/knots_as_interpolation_points.hpp" #include "splines/math_tools.hpp" #include "splines/matrix.hpp" +#include "splines/matrix_banded.hpp" +#include "splines/matrix_center_block.hpp" +#include "splines/matrix_corner_block.hpp" +#include "splines/matrix_dense.hpp" #include "splines/matrix_maker.hpp" +#include "splines/matrix_pds_banded.hpp" +#include "splines/matrix_pds_tridiag.hpp" +#include "splines/matrix_periodic_banded.hpp" #include "splines/matrix_sparse.hpp" #include "splines/null_extrapolation_rule.hpp" #include "splines/periodic_extrapolation_rule.hpp" diff --git a/include/ddc/kernels/splines/matrix.hpp b/include/ddc/kernels/splines/matrix.hpp index f1495d103..8f5e00f1b 100644 --- a/include/ddc/kernels/splines/matrix.hpp +++ b/include/ddc/kernels/splines/matrix.hpp @@ -12,6 +12,7 @@ namespace ddc::detail { class Matrix { +protected: int m_n; public: @@ -19,9 +20,11 @@ class Matrix virtual ~Matrix() = default; + virtual void reset() const = 0; + virtual double get_element(int i, int j) const = 0; - virtual void set_element(int i, int j, double aij) = 0; + virtual void set_element(int i, int j, double aij) const = 0; virtual void factorize() { @@ -43,44 +46,26 @@ class Matrix virtual ddc::DSpan1D solve_inplace(ddc::DSpan1D const b) const { - assert(int(b.extent(0)) == m_n); - int const info = solve_inplace_method(b.data_handle(), 'N', 1); - - if (info < 0) { - std::cerr << -info << "-th argument had an illegal value" << std::endl; - // TODO: Add LOG_FATAL_ERROR - } + ddc::DSpan2D_left b_left(b.data_handle(), b.extent(0), 1); + solve_inplace(b_left); return b; } virtual ddc::DSpan1D solve_transpose_inplace(ddc::DSpan1D const b) const { - assert(int(b.extent(0)) == m_n); - int const info = solve_inplace_method(b.data_handle(), 'T', 1); - - if (info < 0) { - std::cerr << -info << "-th argument had an illegal value" << std::endl; - // TODO: Add LOG_FATAL_ERROR - } + ddc::DSpan2D_left b_left(b.data_handle(), b.extent(0), 1); + solve_transpose_inplace(b_left); return b; } - virtual ddc::DSpan2D solve_multiple_inplace(ddc::DSpan2D const bx) const + virtual ddc::DSpan2D_stride solve_inplace(ddc::DSpan2D_stride const bx) const { - assert(int(bx.extent(1)) == m_n); - int const info = solve_inplace_method(bx.data_handle(), 'N', bx.extent(0)); - - if (info < 0) { - std::cerr << -info << "-th argument had an illegal value" << std::endl; - // TODO: Add LOG_FATAL_ERROR - } - return bx; - } - - virtual ddc::DSpan2D solve_multiple_transpose_inplace(ddc::DSpan2D const bx) const - { - assert(int(bx.extent(1)) == m_n); - int const info = solve_inplace_method(bx.data_handle(), 'T', bx.extent(0)); + assert(int(bx.extent(0)) == m_n); + int const info = solve_inplace_method( + bx.data_handle(), + 'N', + bx.extent(1), + std::max(bx.stride(0), bx.stride(1))); if (info < 0) { std::cerr << -info << "-th argument had an illegal value" << std::endl; @@ -89,12 +74,14 @@ class Matrix return bx; } - template - Kokkos::View solve_batch_inplace( - Kokkos::View const bx) const + virtual ddc::DSpan2D_stride solve_transpose_inplace(ddc::DSpan2D_stride const bx) const { assert(int(bx.extent(0)) == m_n); - int const info = solve_inplace_method(bx.data(), 'N', bx.extent(1)); + int const info = solve_inplace_method( + bx.data_handle(), + 'T', + bx.extent(1), + std::max(bx.stride(0), bx.stride(1))); if (info < 0) { std::cerr << -info << "-th argument had an illegal value" << std::endl; @@ -103,7 +90,7 @@ class Matrix return bx; } - int get_size() const + int KOKKOS_INLINE_FUNCTION get_size() const { return m_n; } @@ -123,7 +110,16 @@ class Matrix protected: virtual int factorize_method() = 0; - virtual int solve_inplace_method(double* b, char transpose, int n_equations) const = 0; + virtual int solve_inplace_method( + double* const b, + char const transpose, + int const n_equations, + int const stride) const = 0; + + int solve_inplace_method(double* const b, char const transpose, int const n_equations) const + { + return solve_inplace_method(b, transpose, n_equations, get_size()); + }; }; } // namespace ddc::detail diff --git a/include/ddc/kernels/splines/matrix_banded.hpp b/include/ddc/kernels/splines/matrix_banded.hpp new file mode 100644 index 000000000..c080ceb00 --- /dev/null +++ b/include/ddc/kernels/splines/matrix_banded.hpp @@ -0,0 +1,163 @@ +#pragma once + +#include +#include +#include +#include + +#include "matrix.hpp" + +namespace ddc::detail { +extern "C" int dgbtrf_( + int const* m, + int const* n, + int const* kl, + int const* ku, + double* a_b, + int const* lda_b, + int* ipiv, + int* info); +extern "C" int dgbtrs_( + char const* trans, + int const* n, + int const* kl, + int const* ku, + int const* nrhs, + double* a_b, + int const* lda_b, + int* ipiv, + double* b, + int const* ldb, + int* info); + +template +class Matrix_Banded : public Matrix +{ +protected: + int const m_kl; // no. of subdiagonals + int const m_ku; // no. of superdiagonals + int const m_c; // no. of columns in q + Kokkos::View m_ipiv; // pivot indices + // TODO: double** + Kokkos::View m_q; // banded matrix representation + +public: + Matrix_Banded(int const mat_size, int const kl, int const ku) + : Matrix(mat_size) + , m_kl(kl) + , m_ku(ku) + , m_c(2 * kl + ku + 1) + , m_ipiv("ipiv", mat_size) + , m_q("q", m_c * mat_size) + { + assert(mat_size > 0); + assert(m_kl >= 0); + assert(m_ku >= 0); + assert(m_kl <= mat_size); + assert(m_ku <= mat_size); + + /* + * Given the linear system A*x=b, we assume that A is a square (n by n) + * with ku super-diagonals and kl sub-diagonals. + * All non-zero elements of A are stored in the rectangular matrix q, using + * the format required by DGBTRF (LAPACK): diagonals of A are rows of q. + * q has 2*kl rows for the subdiagonals, 1 row for the diagonal, and ku rows + * for the superdiagonals. (The kl additional rows are needed for pivoting.) + * The term A(i,j) of the full matrix is stored in q(i-j+2*kl+1,j). + */ + } + + void reset() const override + { + Kokkos::deep_copy(m_q, 0.); + } + + double get_element(int const i, int const j) const override + { + if (i >= std::max(0, j - m_ku) && i < std::min(get_size(), j + m_kl + 1)) { + if constexpr (Kokkos::SpaceAccessibility< + Kokkos::DefaultHostExecutionSpace, + typename ExecSpace::memory_space>::accessible) { + return m_q(j * m_c + m_kl + m_ku + i - j); + } else { + // Inefficient, usage is strongly discouraged + double aij; + Kokkos::deep_copy( + Kokkos::View(&aij), + Kokkos::subview(m_q, j * m_c + m_kl + m_ku + i - j)); + return aij; + } + } else { + return 0.0; + } + } + + void set_element(int const i, int const j, double const aij) const override + { + if (i >= std::max(0, j - m_ku) && i < std::min(get_size(), j + m_kl + 1)) { + if constexpr (Kokkos::SpaceAccessibility< + Kokkos::DefaultHostExecutionSpace, + typename ExecSpace::memory_space>::accessible) { + m_q(j * m_c + m_kl + m_ku + i - j) = aij; + } else { + // Inefficient, usage is strongly discouraged + Kokkos::deep_copy( + Kokkos::subview(m_q, j * m_c + m_kl + m_ku + i - j), + Kokkos::View(&aij)); + } + } else { + assert(std::fabs(aij) < 1e-20); + } + } + +protected: + int factorize_method() override + { + auto q_host = create_mirror_view_and_copy(Kokkos::DefaultHostExecutionSpace(), m_q); + auto ipiv_host = create_mirror_view(Kokkos::DefaultHostExecutionSpace(), m_ipiv); + int info; + int const n = get_size(); + dgbtrf_(&n, &n, &m_kl, &m_ku, q_host.data(), &m_c, ipiv_host.data(), &info); + Kokkos::deep_copy(m_q, q_host); + Kokkos::deep_copy(m_ipiv, ipiv_host); + return info; + } + int solve_inplace_method( + double* b, + char const transpose, + int const n_equations, + int const stride) const override + { + auto q_host = create_mirror_view_and_copy(Kokkos::DefaultHostExecutionSpace(), m_q); + auto ipiv_host = create_mirror_view_and_copy(Kokkos::DefaultHostExecutionSpace(), m_ipiv); + Kokkos::View + b_view(b, Kokkos::LayoutStride(get_size(), 1, n_equations, stride)); + auto b_host = create_mirror_view(Kokkos::DefaultHostExecutionSpace(), b_view); + for (int i = 0; i < n_equations; ++i) { + Kokkos::deep_copy( + Kokkos::subview(b_host, Kokkos::ALL, i), + Kokkos::subview(b_view, Kokkos::ALL, i)); + } + int info; + int const n = get_size(); + dgbtrs_(&transpose, + &n, + &m_kl, + &m_ku, + &n_equations, + q_host.data(), + &m_c, + ipiv_host.data(), + b_host.data(), + &stride, + &info); + for (int i = 0; i < n_equations; ++i) { + Kokkos::deep_copy( + Kokkos::subview(b_view, Kokkos::ALL, i), + Kokkos::subview(b_host, Kokkos::ALL, i)); + } + return info; + } +}; + +} // namespace ddc::detail diff --git a/include/ddc/kernels/splines/matrix_center_block.hpp b/include/ddc/kernels/splines/matrix_center_block.hpp new file mode 100644 index 000000000..c38316ad6 --- /dev/null +++ b/include/ddc/kernels/splines/matrix_center_block.hpp @@ -0,0 +1,185 @@ +#pragma once + +#include +#include + +#include + +#include "matrix_corner_block.hpp" +#include "view.hpp" + +namespace ddc::detail { +class Matrix; + +template +class Matrix_Center_Block : public Matrix_Corner_Block +{ + // Necessary because we inherit from a template class, otherwise we should use this-> everywhere + using Matrix_Corner_Block::get_size; + using Matrix_Corner_Block::solve_inplace; + using Matrix_Corner_Block::m_q_block; + using Matrix_Corner_Block::m_delta; + using Matrix_Corner_Block::m_Abm_1_gamma; + using Matrix_Corner_Block::m_lambda; + +protected: + int const m_top_block_size; + int const m_bottom_block_size; + int const m_bottom_block_index; + +public: + Matrix_Center_Block( + int const n, + int const m_top_block_size, + int const m_bottom_block_size, + std::unique_ptr q) + : Matrix_Corner_Block(n, m_top_block_size + m_bottom_block_size, std::move(q)) + , m_top_block_size(m_top_block_size) + , m_bottom_block_size(m_bottom_block_size) + , m_bottom_block_index(n - m_bottom_block_size) + { + } + + double get_element(int i, int j) const override + { + adjust_indexes(i, j); + return Matrix_Corner_Block::get_element(i, j); + } + void set_element(int i, int j, double aij) const override + { + adjust_indexes(i, j); + Matrix_Corner_Block::set_element(i, j, aij); + } + ddc::DSpan2D_stride solve_inplace(ddc::DSpan2D_stride const bx) const override + { + swap_array_to_corner(bx); + Matrix_Corner_Block::solve_inplace(bx); + swap_array_to_center(bx); + return bx; + } + + +protected: + void adjust_indexes(int& i, int& j) const + { + if (i < m_top_block_size) + i += m_q_block->get_size(); + else if (i < m_bottom_block_index) + i -= m_top_block_size; + + if (j < m_top_block_size) + j += m_q_block->get_size(); + else if (j < m_bottom_block_index) + j -= m_top_block_size; + } + ddc::DSpan2D_stride swap_array_to_corner(ddc::DSpan2D_stride const bx) const + { + auto bx_top = std::experimental::submdspan( + bx, + std::pair {0, m_top_block_size}, + std::experimental::full_extent); + auto bx_q = std::experimental::submdspan( + bx, + std::pair {m_top_block_size, m_top_block_size + m_q_block->get_size()}, + std::experimental::full_extent); + auto bx_top_dest = std::experimental::submdspan( + bx, + std::pair< + int, + int> {m_q_block->get_size(), m_top_block_size + m_q_block->get_size()}, + std::experimental::full_extent); + auto bx_q_dest = std::experimental::submdspan( + bx, + std::pair {0, m_q_block->get_size()}, + std::experimental::full_extent); + // Necessary to convert to Kokkos::View to deep_copy afterward, this is inlining of code present in ddc/details/kokkos.hpp, maybe we could make a dedicated function for it + auto bx_top_kokkos_layout = ddc::detail::build_kokkos_layout( + bx_top.extents(), + bx_top.mapping(), + std::make_index_sequence<2> {}); + auto bx_q_kokkos_layout = ddc::detail:: + build_kokkos_layout(bx_q.extents(), bx_q.mapping(), std::make_index_sequence<2> {}); + Kokkos::View< + ddc::detail::mdspan_to_kokkos_element_t, + Kokkos::LayoutStride, + typename ExecSpace::memory_space> + bx_top_view(bx_top.data_handle(), bx_top_kokkos_layout); + Kokkos::View< + ddc::detail::mdspan_to_kokkos_element_t, + Kokkos::LayoutStride, + typename ExecSpace::memory_space> + bx_q_view(bx_q.data_handle(), bx_q_kokkos_layout); + Kokkos::View< + ddc::detail::mdspan_to_kokkos_element_t, + Kokkos::LayoutStride, + typename ExecSpace::memory_space> + bx_top_dest_view(bx_top_dest.data_handle(), bx_top_kokkos_layout); + Kokkos::View< + ddc::detail::mdspan_to_kokkos_element_t, + Kokkos::LayoutStride, + typename ExecSpace::memory_space> + bx_q_dest_view(bx_q_dest.data_handle(), bx_q_kokkos_layout); + auto bx_q_buffer = Kokkos::create_mirror(ExecSpace(), bx_q_view); + + Kokkos::deep_copy(bx_q_buffer, bx_q_view); + Kokkos::deep_copy(bx_top_dest_view, bx_top_view); + Kokkos::deep_copy(bx_q_dest_view, bx_q_buffer); + return bx; + } + ddc::DSpan2D_stride swap_array_to_center(ddc::DSpan2D_stride const bx) const + { + auto bx_top = std::experimental::submdspan( + bx, + std::pair {0, m_top_block_size}, + std::experimental::full_extent); + auto bx_q = std::experimental::submdspan( + bx, + std::pair {m_top_block_size, m_top_block_size + m_q_block->get_size()}, + std::experimental::full_extent); + auto bx_top_src = std::experimental::submdspan( + bx, + std::pair< + int, + int> {m_q_block->get_size(), m_top_block_size + m_q_block->get_size()}, + std::experimental::full_extent); + auto bx_q_src = std::experimental::submdspan( + bx, + std::pair {0, m_q_block->get_size()}, + std::experimental::full_extent); + // Necessary to convert to Kokkos::View to deep_copy afterward, this is inlining of code present in ddc/details/kokkos.hpp, maybe we could make a dedicated function for it + auto bx_top_kokkos_layout = ddc::detail::build_kokkos_layout( + bx_top.extents(), + bx_top.mapping(), + std::make_index_sequence<2> {}); + auto bx_q_kokkos_layout = ddc::detail:: + build_kokkos_layout(bx_q.extents(), bx_q.mapping(), std::make_index_sequence<2> {}); + Kokkos::View< + ddc::detail::mdspan_to_kokkos_element_t, + Kokkos::LayoutStride, + typename ExecSpace::memory_space> + bx_top_view(bx_top.data_handle(), bx_top_kokkos_layout); + Kokkos::View< + ddc::detail::mdspan_to_kokkos_element_t, + Kokkos::LayoutStride, + typename ExecSpace::memory_space> + bx_q_view(bx_q.data_handle(), bx_q_kokkos_layout); + Kokkos::View< + ddc::detail::mdspan_to_kokkos_element_t, + Kokkos::LayoutStride, + typename ExecSpace::memory_space> + bx_top_src_view(bx_top_src.data_handle(), bx_top_kokkos_layout); + Kokkos::View< + ddc::detail::mdspan_to_kokkos_element_t, + Kokkos::LayoutStride, + typename ExecSpace::memory_space> + bx_q_src_view(bx_q_src.data_handle(), bx_q_kokkos_layout); + auto bx_q_buffer = Kokkos::create_mirror(ExecSpace(), bx_q_src_view); + + Kokkos::deep_copy(bx_q_buffer, bx_q_src_view); + Kokkos::deep_copy(bx_top_view, bx_top_src_view); + Kokkos::deep_copy(bx_q_view, bx_q_buffer); + return bx; + } +}; + +} // namespace ddc::detail diff --git a/include/ddc/kernels/splines/matrix_corner_block.hpp b/include/ddc/kernels/splines/matrix_corner_block.hpp new file mode 100644 index 000000000..854e99657 --- /dev/null +++ b/include/ddc/kernels/splines/matrix_corner_block.hpp @@ -0,0 +1,333 @@ +#pragma once + +#include +#include +#include + +#include + +#include + +#include "matrix.hpp" +#include "matrix_dense.hpp" +#include "view.hpp" + +namespace ddc::detail { + +template +class Matrix_Corner_Block : public Matrix +{ +protected: + int const m_k; // small block size + int const m_nb; // main block matrix size + //------------------------------------- + // + // q = | q_block | gamma | + // | lambda | delta | + // + //------------------------------------- + std::shared_ptr m_q_block; + std::shared_ptr> m_delta; + Kokkos::View m_Abm_1_gamma; + Kokkos::View m_lambda; + +public: + Matrix_Corner_Block(int const n, int const k, std::unique_ptr q) + : Matrix(n) + , m_k(k) + , m_nb(n - k) + , m_q_block(std::move(q)) + , m_delta(new Matrix_Dense(k)) + , m_Abm_1_gamma("Abm_1_gamma", m_nb, m_k) + , m_lambda("lambda", m_k, m_nb) + { + assert(n > 0); + assert(m_k >= 0); + assert(m_k <= get_size()); + assert(m_nb == m_q_block->get_size()); + } + + virtual void reset() const override + { + m_q_block->reset(); + m_delta->reset(); + Kokkos::deep_copy(m_Abm_1_gamma, 0.); + Kokkos::deep_copy(m_lambda, 0.); + } + + virtual double get_element(int const i, int const j) const override + { + assert(i >= 0); + assert(i < get_size()); + assert(j >= 0); + assert(i < get_size()); + if (i < m_nb && j < m_nb) { + return m_q_block->get_element(i, j); + } else if (i >= m_nb && j >= m_nb) { + return m_delta->get_element(i - m_nb, j - m_nb); + } else if (j >= m_nb) { + if constexpr (Kokkos::SpaceAccessibility< + Kokkos::DefaultHostExecutionSpace, + typename ExecSpace::memory_space>::accessible) { + return m_Abm_1_gamma(i, j - m_nb); + } else { + // Inefficient, usage is strongly discouraged + double aij; + Kokkos::deep_copy( + Kokkos::View(&aij), + Kokkos::subview(m_Abm_1_gamma, i, j - m_nb)); + return aij; + } + } else { + if constexpr (Kokkos::SpaceAccessibility< + Kokkos::DefaultHostExecutionSpace, + typename ExecSpace::memory_space>::accessible) { + return m_lambda(i - m_nb, j); + } else { + // Inefficient, usage is strongly discouraged + double aij; + Kokkos::deep_copy( + Kokkos::View(&aij), + Kokkos::subview(m_lambda, i - m_nb, j)); + return aij; + } + } + } + virtual void set_element(int const i, int const j, double const aij) const override + { + assert(i >= 0); + assert(i < get_size()); + assert(j >= 0); + assert(i < get_size()); + if (i < m_nb && j < m_nb) { + m_q_block->set_element(i, j, aij); + } else if (i >= m_nb && j >= m_nb) { + m_delta->set_element(i - m_nb, j - m_nb, aij); + } else if (j >= m_nb) { + if constexpr (Kokkos::SpaceAccessibility< + Kokkos::DefaultHostExecutionSpace, + typename ExecSpace::memory_space>::accessible) { + m_Abm_1_gamma(i, j - m_nb) = aij; + } else { + // Inefficient, usage is strongly discouraged + Kokkos::deep_copy( + Kokkos::subview(m_Abm_1_gamma, i, j - m_nb), + Kokkos::View(&aij)); + } + } else { + if constexpr (Kokkos::SpaceAccessibility< + Kokkos::DefaultHostExecutionSpace, + typename ExecSpace::memory_space>::accessible) { + m_lambda(i - m_nb, j) = aij; + } else { + // Inefficient, usage is strongly discouraged + Kokkos::deep_copy( + Kokkos::subview(m_lambda, i - m_nb, j), + Kokkos::View(&aij)); + } + } + } + virtual void factorize() override + { + m_q_block->factorize(); + m_q_block->solve_inplace( + detail::build_mdspan(m_Abm_1_gamma, std::make_index_sequence<2> {})); + + calculate_delta_to_factorize(); + + m_delta->factorize(); + } + +protected: + Matrix_Corner_Block( + int const n, + int const k, + std::unique_ptr q, + int const lambda_size1, + int const lambda_size2) + : Matrix(n) + , m_k(k) + , m_nb(n - k) + , m_q_block(std::move(q)) + , m_delta(new Matrix_Dense(k)) + , m_Abm_1_gamma("Abm_1_gamma", m_nb, m_k) + , m_lambda("lambda", lambda_size1, lambda_size2) + { + assert(n > 0); + assert(m_k >= 0); + assert(m_k <= n); + assert(m_nb == m_q_block->get_size()); + } + +public: + virtual void calculate_delta_to_factorize() + { + auto delta_proxy = *m_delta; + auto lambda_host = Kokkos:: + create_mirror_view_and_copy(Kokkos::DefaultHostExecutionSpace(), m_lambda); + auto Abm_1_gamma_host = Kokkos:: + create_mirror_view_and_copy(Kokkos::DefaultHostExecutionSpace(), m_Abm_1_gamma); + Kokkos::parallel_for( + "calculate_delta_to_factorize", + Kokkos::MDRangePolicy< + Kokkos::DefaultHostExecutionSpace, + Kokkos::Rank<2>>({0, 0}, {m_k, m_k}), + [&](const int i, const int j) { + double val = 0.0; + for (int l = 0; l < m_nb; ++l) { + val += lambda_host(i, l) * Abm_1_gamma_host(l, j); + } + delta_proxy.set_element(i, j, delta_proxy.get_element(i, j) - val); + }); + } + virtual ddc::DSpan2D_stride solve_lambda_section( + ddc::DSpan2D_stride const v, + ddc::DView2D_stride const u) const + { + Kokkos::parallel_for( + "solve_lambda_section", + Kokkos::TeamPolicy(v.extent(1), Kokkos::AUTO), + KOKKOS_CLASS_LAMBDA( + const typename Kokkos::TeamPolicy::member_type& teamMember) { + const int j = teamMember.league_rank(); + + + Kokkos::parallel_for( + Kokkos::TeamThreadRange(teamMember, m_k), + [&](const int i) { + // Upper diagonals in lambda + for (int l = 0; l < m_nb; ++l) { + Kokkos::atomic_sub(&v(i, j), m_lambda(i, l) * u(l, j)); + } + }); + }); + return v; + } + virtual ddc::DSpan2D_stride solve_lambda_section_transpose( + ddc::DSpan2D_stride const u, + ddc::DView2D_stride const v) const + { + Kokkos::parallel_for( + "solve_lambda_section_transpose", + Kokkos::TeamPolicy(u.extent(1), Kokkos::AUTO), + KOKKOS_CLASS_LAMBDA( + const typename Kokkos::TeamPolicy::member_type& teamMember) { + const int j = teamMember.league_rank(); + + + Kokkos::parallel_for( + Kokkos::TeamThreadRange(teamMember, m_nb), + [&](const int i) { + // Upper diagonals in lambda + for (int l = 0; l < m_k; ++l) { + Kokkos::atomic_sub(&u(i, j), m_lambda(l, i) * v(l, j)); + } + }); + }); + return u; + } + virtual ddc::DSpan2D_stride solve_gamma_section( + ddc::DSpan2D_stride const u, + ddc::DView2D_stride const v) const + { + Kokkos::parallel_for( + "solve_gamma_section", + Kokkos::TeamPolicy(u.extent(1), Kokkos::AUTO), + KOKKOS_CLASS_LAMBDA( + const typename Kokkos::TeamPolicy::member_type& teamMember) { + const int j = teamMember.league_rank(); + + + Kokkos::parallel_for( + Kokkos::TeamThreadRange(teamMember, m_nb), + [&](const int i) { + // Upper diagonals in lambda + for (int l = 0; l < m_k; ++l) { + Kokkos::atomic_sub(&u(i, j), m_Abm_1_gamma(i, l) * v(l, j)); + } + }); + }); + return u; + } + virtual ddc::DSpan2D_stride solve_gamma_section_transpose( + ddc::DSpan2D_stride const v, + ddc::DView2D_stride const u) const + { + Kokkos::parallel_for( + "solve_gamma_section_transpose", + Kokkos::TeamPolicy(v.extent(1), Kokkos::AUTO), + KOKKOS_CLASS_LAMBDA( + const typename Kokkos::TeamPolicy::member_type& teamMember) { + const int j = teamMember.league_rank(); + + + Kokkos::parallel_for( + Kokkos::TeamThreadRange(teamMember, m_k), + [&](const int i) { + // Upper diagonals in lambda + for (int l = 0; l < m_nb; ++l) { + Kokkos::atomic_sub(&v(i, j), m_Abm_1_gamma(l, i) * u(l, j)); + } + }); + }); + return v; + } + +protected: + virtual int factorize_method() override + { + return 0; + } + virtual int solve_inplace_method( + double* const b, + char const transpose, + int const n_equations, + int const stride) const override + { + std::experimental::layout_stride::mapping> + layout_mapping_u { + std::experimental::dextents< + std::size_t, + 2> {(std::size_t)m_nb, (std::size_t)n_equations}, + std::array {1, (std::size_t)stride}}; + std::experimental::layout_stride::mapping> + layout_mapping_v { + std::experimental::dextents< + std::size_t, + 2> {(std::size_t)m_k, (std::size_t)n_equations}, + std::array {1, (std::size_t)stride}}; + + + ddc::DSpan2D_stride const u(b, layout_mapping_u); + ddc::DSpan2D_stride const v(b + m_nb, layout_mapping_v); + + if (transpose == 'N') { + m_q_block->solve_inplace(u); + + solve_lambda_section(v, u); + + m_delta->solve_inplace(v); + + solve_gamma_section(u, v); + } else if (transpose == 'T') { + solve_gamma_section_transpose(v, u); + + m_delta->solve_transpose_inplace(v); + + solve_lambda_section_transpose(u, v); + + m_q_block->solve_transpose_inplace(u); + } else { + return -1; + } + return 0; + } +}; + +} // namespace ddc::detail diff --git a/include/ddc/kernels/splines/matrix_dense.hpp b/include/ddc/kernels/splines/matrix_dense.hpp new file mode 100644 index 000000000..4e1c71a46 --- /dev/null +++ b/include/ddc/kernels/splines/matrix_dense.hpp @@ -0,0 +1,123 @@ +#pragma once + +#include +#include + +#include "matrix.hpp" + +namespace ddc::detail { +extern "C" int dgetrf_(int const* m, int const* n, double* a, int const* lda, int* ipiv, int* info); +extern "C" int dgetrs_( + char const* trans, + int const* n, + int const* nrhs, + double* a, + int const* lda, + int* ipiv, + double* b, + int const* ldb, + int* info); + +template +class Matrix_Dense : public Matrix +{ +protected: + Kokkos::View m_a; + Kokkos::View m_ipiv; + +public: + explicit Matrix_Dense(int const mat_size) + : Matrix(mat_size) + , m_a("a", mat_size, mat_size) + , m_ipiv("ipiv", mat_size) + { + assert(mat_size > 0); + } + + void reset() const override + { + Kokkos::deep_copy(m_a, 0.); + } + + double get_element(int const i, int const j) const override + { + assert(i < get_size()); + assert(j < get_size()); + if constexpr (Kokkos::SpaceAccessibility< + Kokkos::DefaultHostExecutionSpace, + typename ExecSpace::memory_space>::accessible) { + return m_a(i, j); + } else { + // Inefficient, usage is strongly discouraged + double aij; + Kokkos::deep_copy( + Kokkos::View(&aij), + Kokkos::subview(m_a, i, j)); + return aij; + } + } + + void set_element(int const i, int const j, double const aij) const override + { + if constexpr (Kokkos::SpaceAccessibility< + Kokkos::DefaultHostExecutionSpace, + typename ExecSpace::memory_space>::accessible) { + m_a(i, j) = aij; + } else { + // Inefficient, usage is strongly discouraged + Kokkos::deep_copy( + Kokkos::subview(m_a, i, j), + Kokkos::View(&aij)); + } + } + +private: + int factorize_method() override + { + auto a_host = create_mirror_view_and_copy(Kokkos::DefaultHostExecutionSpace(), m_a); + auto ipiv_host = create_mirror_view(Kokkos::DefaultHostExecutionSpace(), m_ipiv); + int info; + int const n = get_size(); + dgetrf_(&n, &n, a_host.data(), &n, ipiv_host.data(), &info); + Kokkos::deep_copy(m_a, a_host); + Kokkos::deep_copy(m_ipiv, ipiv_host); + return info; + } + + int solve_inplace_method( + double* const b, + char const transpose, + int const n_equations, + int const stride) const override + { + auto a_host = create_mirror_view_and_copy(Kokkos::DefaultHostExecutionSpace(), m_a); + auto ipiv_host = create_mirror_view_and_copy(Kokkos::DefaultHostExecutionSpace(), m_ipiv); + Kokkos::View + b_view(b, Kokkos::LayoutStride(get_size(), 1, n_equations, stride)); + auto b_host = create_mirror_view(Kokkos::DefaultHostExecutionSpace(), b_view); + for (int i = 0; i < n_equations; ++i) { + Kokkos::deep_copy( + Kokkos::subview(b_host, Kokkos::ALL, i), + Kokkos::subview(b_view, Kokkos::ALL, i)); + } + int info; + int const n = get_size(); + dgetrs_(&transpose, + &n, + &n_equations, + a_host.data(), + &n, + ipiv_host.data(), + b_host.data(), + &stride, + &info); + for (int i = 0; i < n_equations; ++i) { + Kokkos::deep_copy( + Kokkos::subview(b_view, Kokkos::ALL, i), + Kokkos::subview(b_host, Kokkos::ALL, i)); + } + return info; + } +}; + +} // namespace ddc::detail diff --git a/include/ddc/kernels/splines/matrix_maker.hpp b/include/ddc/kernels/splines/matrix_maker.hpp index 1bccc6c84..c3ea262b1 100644 --- a/include/ddc/kernels/splines/matrix_maker.hpp +++ b/include/ddc/kernels/splines/matrix_maker.hpp @@ -3,6 +3,13 @@ #include #include +#include "matrix_banded.hpp" +#include "matrix_center_block.hpp" +#include "matrix_corner_block.hpp" +#include "matrix_dense.hpp" +#include "matrix_pds_banded.hpp" +#include "matrix_pds_tridiag.hpp" +#include "matrix_periodic_banded.hpp" #include "matrix_sparse.hpp" namespace ddc::detail { @@ -10,6 +17,83 @@ namespace ddc::detail { class MatrixMaker { public: + template + static std::unique_ptr make_new_dense(int const n) + { + return std::make_unique>(n); + } + + template + static std::unique_ptr make_new_banded( + int const n, + int const kl, + int const ku, + bool const pds) + { + if (kl == ku && kl == 1 && pds) { + return std::make_unique>(n); + } else if (2 * kl + 1 + ku >= n) { + return std::make_unique>(n); + } else if (kl == ku && pds) { + return std::make_unique>(n, kl); + } else { + return std::make_unique>(n, kl, ku); + } + } + + template + static std::unique_ptr make_new_periodic_banded( + int const n, + int const kl, + int const ku, + bool const pds) + { + int const border_size = std::max(kl, ku); + int const banded_size = n - border_size; + std::unique_ptr block_mat; + if (pds && kl == ku && kl == 1) { + block_mat = std::make_unique>(banded_size); + } else if ( + border_size * n + border_size * (border_size + 1) + (2 * kl + 1 + ku) * banded_size + >= n * n) { + return std::make_unique>(n); + } else if (kl == ku && pds) { + block_mat = std::make_unique>(banded_size, kl); + } else { + block_mat = std::make_unique>(banded_size, kl, ku); + } + return std::make_unique>(n, kl, ku, std::move(block_mat)); + } + + template + static std::unique_ptr make_new_block_with_banded_region( + int const n, + int const kl, + int const ku, + bool const pds, + int const block1_size, + int const block2_size = 0) + { + int const banded_size = n - block1_size - block2_size; + std::unique_ptr block_mat; + if (pds && kl == ku && kl == 1) { + block_mat = std::make_unique>(banded_size); + } else if (2 * kl + 1 + ku >= banded_size) { + return std::make_unique>(n); + } else if (kl == ku && pds) { + block_mat = std::make_unique>(banded_size, kl); + } else { + block_mat = std::make_unique>(banded_size, kl, ku); + } + if (block2_size == 0) { + return std::make_unique< + Matrix_Corner_Block>(n, block1_size, std::move(block_mat)); + } else { + return std::make_unique>(n, block1_size, block2_size, std::move(block_mat)); + } + } + template static std::unique_ptr make_new_sparse( int const n, diff --git a/include/ddc/kernels/splines/matrix_pds_banded.hpp b/include/ddc/kernels/splines/matrix_pds_banded.hpp new file mode 100644 index 000000000..944f161b5 --- /dev/null +++ b/include/ddc/kernels/splines/matrix_pds_banded.hpp @@ -0,0 +1,190 @@ +#pragma once + +#include +#include +#include + +#include + +#include "matrix.hpp" + +namespace ddc::detail { +extern "C" int dpbtrf_( + char const* uplo, + int const* n, + int const* kd, + double* ab, + int const* ldab, + int* info); +extern "C" int dpbtrs_( + char const* uplo, + int const* n, + int const* kd, + int const* nrhs, + double const* ab, + int const* ldab, + double* b, + int const* ldb, + int* info); + +template +class Matrix_PDS_Banded : public Matrix +{ + /* + * Represents a real symmetric positive definite matrix + * stored in a block format + * */ +protected: + int const m_kd; // no. of columns in q + Kokkos::View + m_q; // pds banded matrix representation + +public: + Matrix_PDS_Banded(int const mat_size, int const kd) + : Matrix(mat_size) + , m_kd(kd) + , m_q("q", kd + 1, mat_size) + { + } + + void reset() const override + { + Kokkos::deep_copy(m_q, 0.); + } + + double get_element(int i, int j) const override + { + if (i == j) { + KOKKOS_IF_ON_HOST( + if constexpr (Kokkos::SpaceAccessibility< + Kokkos::DefaultHostExecutionSpace, + typename ExecSpace::memory_space>::accessible) { + return m_q(0, i); + } else { + // Inefficient, usage is strongly discouraged + double aij; + Kokkos::deep_copy( + Kokkos::View(&aij), + Kokkos::subview(m_q, 0, i)); + return aij; + }) + KOKKOS_IF_ON_DEVICE(return m_q(0, i);) + } + if (i > j) { + // inline swap i<->j + int tmp = i; + i = j; + j = tmp; + } + for (int k = 1; k < m_kd + 1; ++k) { + if (i + k == j) { + KOKKOS_IF_ON_HOST( + if constexpr (Kokkos::SpaceAccessibility< + Kokkos::DefaultHostExecutionSpace, + typename ExecSpace::memory_space>::accessible) { + return m_q(k, i); + } else { + // Inefficient, usage is strongly discouraged + double aij; + Kokkos::deep_copy( + Kokkos::View(&aij), + Kokkos::subview(m_q, k, i)); + return aij; + }) + KOKKOS_IF_ON_DEVICE(return m_q(k, i);) + } + } + return 0.0; + } + void set_element(int i, int j, double const aij) const override + { + if (i == j) { + KOKKOS_IF_ON_HOST( + if constexpr (Kokkos::SpaceAccessibility< + Kokkos::DefaultHostExecutionSpace, + typename ExecSpace::memory_space>::accessible) { + m_q(0, i) = aij; + } else { + // Inefficient, usage is strongly discouraged + Kokkos::deep_copy( + Kokkos::subview(m_q, 0, i), + Kokkos::View(&aij)); + }) + KOKKOS_IF_ON_DEVICE(m_q(0, i) = aij;) + return; + } + if (i > j) { + // inline swap i<->j + int tmp = i; + i = j; + j = tmp; + } + for (int k = 1; k < m_kd + 1; ++k) { + if (i + k == j) { + KOKKOS_IF_ON_HOST( + if constexpr (Kokkos::SpaceAccessibility< + Kokkos::DefaultHostExecutionSpace, + typename ExecSpace::memory_space>::accessible) { + m_q(k, i) = aij; + } else { + // Inefficient, usage is strongly discouraged + Kokkos::deep_copy( + Kokkos::subview(m_q, k, i), + Kokkos::View(&aij)); + }) + KOKKOS_IF_ON_DEVICE(m_q(k, i) = aij;) + return; + } + } + assert(std::fabs(aij) < 1e-20); + return; + } + +protected: + int factorize_method() override + { + auto q_host = create_mirror_view_and_copy(Kokkos::DefaultHostExecutionSpace(), m_q); + int info; + char const uplo = 'L'; + int const n = get_size(); + int const ldab = m_kd + 1; + dpbtrf_(&uplo, &n, &m_kd, q_host.data(), &ldab, &info); + Kokkos::deep_copy(m_q, q_host); + return info; + } + + int solve_inplace_method(double* const b, char const, int const n_equations, int const stride) + const override + { + auto q_host = create_mirror_view_and_copy(Kokkos::DefaultHostExecutionSpace(), m_q); + Kokkos::View + b_view(b, Kokkos::LayoutStride(get_size(), 1, n_equations, stride)); + auto b_host = create_mirror_view(Kokkos::DefaultHostExecutionSpace(), b_view); + for (int i = 0; i < n_equations; ++i) { + Kokkos::deep_copy( + Kokkos::subview(b_host, Kokkos::ALL, i), + Kokkos::subview(b_view, Kokkos::ALL, i)); + } + int info; + char const uplo = 'L'; + int const n = get_size(); + int const ldab = m_kd + 1; + dpbtrs_(&uplo, + &n, + &m_kd, + &n_equations, + q_host.data(), + &ldab, + b_host.data(), + &stride, + &info); + for (int i = 0; i < n_equations; ++i) { + Kokkos::deep_copy( + Kokkos::subview(b_view, Kokkos::ALL, i), + Kokkos::subview(b_host, Kokkos::ALL, i)); + } + return info; + } +}; + +} // namespace ddc::detail diff --git a/include/ddc/kernels/splines/matrix_pds_tridiag.hpp b/include/ddc/kernels/splines/matrix_pds_tridiag.hpp new file mode 100644 index 000000000..312617845 --- /dev/null +++ b/include/ddc/kernels/splines/matrix_pds_tridiag.hpp @@ -0,0 +1,160 @@ +#pragma once + +#include +#include +#include + +#include + +#include "matrix.hpp" + +namespace ddc::detail { +extern "C" int dpttrf_(int const* n, double* d, double* e, int* info); +extern "C" int dpttrs_( + int const* n, + int const* nrhs, + double* d, + double* e, + double* b, + int const* ldb, + int* info); + +template +class Matrix_PDS_Tridiag : public Matrix +{ + /* + * Represents a real symmetric positive definite matrix + * stored in a block format + * */ +protected: + Kokkos::View m_d; // diagonal + Kokkos::View m_l; // lower diagonal + +public: + Matrix_PDS_Tridiag(int const mat_size) + : Matrix(mat_size) + , m_d("d", mat_size) + , m_l("l", mat_size - 1) + { + } + + void reset() const override + { + Kokkos::deep_copy(m_d, 0.); + Kokkos::deep_copy(m_l, 0.); + } + + double get_element(int i, int j) const override + { + if (i == j) { + if constexpr (Kokkos::SpaceAccessibility< + Kokkos::DefaultHostExecutionSpace, + typename ExecSpace::memory_space>::accessible) { + return m_d(i); + } else { + // Inefficient, usage is strongly discouraged + double aij; + Kokkos::deep_copy( + Kokkos::View(&aij), + Kokkos::subview(m_d, i)); + return aij; + } + } + if (i > j) { + // inline swap i<->j + int tmp = i; + i = j; + j = tmp; + } + if (i + 1 == j) { + if constexpr (Kokkos::SpaceAccessibility< + Kokkos::DefaultHostExecutionSpace, + typename ExecSpace::memory_space>::accessible) { + return m_l(i); + } else { + // Inefficient, usage is strongly discouraged + double aij; + Kokkos::deep_copy( + Kokkos::View(&aij), + Kokkos::subview(m_l, i)); + return aij; + } + } + return 0.0; + } + void set_element(int i, int j, double const aij) const override + { + if (i == j) { + if constexpr (Kokkos::SpaceAccessibility< + Kokkos::DefaultHostExecutionSpace, + typename ExecSpace::memory_space>::accessible) { + m_d(i) = aij; + } else { + // Inefficient, usage is strongly discouraged + Kokkos::deep_copy( + Kokkos::subview(m_d, i), + Kokkos::View(&aij)); + } + return; + } + if (i > j) { + // inline swap i<->j + int tmp = i; + i = j; + j = tmp; + } + if (i + 1 != j) { + assert(std::fabs(aij) < 1e-20); + } else { + if constexpr (Kokkos::SpaceAccessibility< + Kokkos::DefaultHostExecutionSpace, + typename ExecSpace::memory_space>::accessible) { + m_l(i) = aij; + } else { + // Inefficient, usage is strongly discouraged + Kokkos::deep_copy( + Kokkos::subview(m_l, i), + Kokkos::View(&aij)); + } + } + } + +protected: + int factorize_method() override + { + auto d_host = create_mirror_view_and_copy(Kokkos::DefaultHostExecutionSpace(), m_d); + auto l_host = create_mirror_view_and_copy(Kokkos::DefaultHostExecutionSpace(), m_l); + int info; + int const n = get_size(); + dpttrf_(&n, d_host.data(), l_host.data(), &info); + Kokkos::deep_copy(m_d, d_host); + Kokkos::deep_copy(m_l, l_host); + return info; + } + + int solve_inplace_method(double* const b, char const, int const n_equations, int const stride) + const override + { + auto d_host = create_mirror_view_and_copy(Kokkos::DefaultHostExecutionSpace(), m_d); + auto l_host = create_mirror_view_and_copy(Kokkos::DefaultHostExecutionSpace(), m_l); + Kokkos::View + b_view(b, Kokkos::LayoutStride(get_size(), 1, n_equations, stride)); + auto b_host = create_mirror_view(Kokkos::DefaultHostExecutionSpace(), b_view); + for (int i = 0; i < n_equations; ++i) { + Kokkos::deep_copy( + Kokkos::subview(b_host, Kokkos::ALL, i), + Kokkos::subview(b_view, Kokkos::ALL, i)); + } + int info; + int const n = get_size(); + dpttrs_(&n, &n_equations, d_host.data(), l_host.data(), b_host.data(), &stride, &info); + for (int i = 0; i < n_equations; ++i) { + Kokkos::deep_copy( + Kokkos::subview(b_view, Kokkos::ALL, i), + Kokkos::subview(b_host, Kokkos::ALL, i)); + } + return info; + } +}; + +} // namespace ddc::detail diff --git a/include/ddc/kernels/splines/matrix_periodic_banded.hpp b/include/ddc/kernels/splines/matrix_periodic_banded.hpp new file mode 100644 index 000000000..3125acfbf --- /dev/null +++ b/include/ddc/kernels/splines/matrix_periodic_banded.hpp @@ -0,0 +1,230 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include + +#include "matrix.hpp" +#include "matrix_corner_block.hpp" +#include "matrix_dense.hpp" +#include "view.hpp" + + +namespace ddc::detail { + +template +class Matrix_Periodic_Banded : public Matrix_Corner_Block +{ + // Necessary because we inherit from a template class, otherwise we should use this-> everywhere + using Matrix_Corner_Block::get_size; + using Matrix_Corner_Block::m_k; + using Matrix_Corner_Block::m_nb; + using Matrix_Corner_Block::m_q_block; + using Matrix_Corner_Block::m_delta; + using Matrix_Corner_Block::m_Abm_1_gamma; + using Matrix_Corner_Block::m_lambda; + +protected: + int const m_kl; // no. of subdiagonals + int const m_ku; // no. of superdiagonals + +public: + Matrix_Periodic_Banded(int const n, int const m_kl, int const m_ku, std::unique_ptr q) + : Matrix_Corner_Block( + n, + std::max(m_kl, m_ku), + std::move(q), + std::max(m_kl, m_ku), + std::max(m_kl, m_ku) + 1) + , m_kl(m_kl) + , m_ku(m_ku) + { + } + + double get_element(int const i, int j) const override + { + assert(i >= 0); + assert(i < get_size()); + assert(j >= 0); + assert(i < get_size()); + if (i >= m_nb && j < m_nb) { + int d = j - i; + if (d > get_size() / 2) + d -= get_size(); + if (d < -get_size() / 2) + d += get_size(); + + if (d < -m_kl || d > m_ku) + return 0.0; + if (d > 0) { + if constexpr (Kokkos::SpaceAccessibility< + Kokkos::DefaultHostExecutionSpace, + typename ExecSpace::memory_space>::accessible) { + return m_lambda(i - m_nb, j); + } else { + // Inefficient, usage is strongly discouraged + double aij; + Kokkos::deep_copy( + Kokkos::View(&aij), + Kokkos::subview(m_lambda, i - m_nb, j)); + return aij; + } + } else { + if constexpr (Kokkos::SpaceAccessibility< + Kokkos::DefaultHostExecutionSpace, + typename ExecSpace::memory_space>::accessible) { + return m_lambda(i - m_nb, j - m_nb + m_k + 1); + } else { + // Inefficient, usage is strongly discouraged + double aij; + Kokkos::deep_copy( + Kokkos::View(&aij), + Kokkos::subview(m_lambda, i - m_nb, j - m_nb + m_k + 1)); + return aij; + } + } + } else { + return Matrix_Corner_Block::get_element(i, j); + } + } + void set_element(int const i, int j, double const aij) const override + { + assert(i >= 0); + assert(i < get_size()); + assert(j >= 0); + assert(i < get_size()); + if (i >= m_nb && j < m_nb) { + int d = j - i; + if (d > get_size() / 2) + d -= get_size(); + if (d < -get_size() / 2) + d += get_size(); + + if (d < -m_kl || d > m_ku) { + assert(std::fabs(aij) < 1e-20); + return; + } + + if (d > 0) { + if constexpr (Kokkos::SpaceAccessibility< + Kokkos::DefaultHostExecutionSpace, + typename ExecSpace::memory_space>::accessible) { + m_lambda(i - m_nb, j) = aij; + } else { + // Inefficient, usage is strongly discouraged + Kokkos::deep_copy( + Kokkos::subview(m_lambda, i - m_nb, j), + Kokkos::View(&aij)); + } + } else { + if constexpr (Kokkos::SpaceAccessibility< + Kokkos::DefaultHostExecutionSpace, + typename ExecSpace::memory_space>::accessible) { + m_lambda(i - m_nb, j - m_nb + m_k + 1) = aij; + } else { + // Inefficient, usage is strongly discouraged + Kokkos::deep_copy( + Kokkos::subview(m_lambda, i - m_nb, j - m_nb + m_k + 1), + Kokkos::View(&aij)); + } + } + } else { + Matrix_Corner_Block::set_element(i, j, aij); + } + } + +public: + void calculate_delta_to_factorize() override + { + auto delta_proxy = *m_delta; + auto lambda_host = Kokkos:: + create_mirror_view_and_copy(Kokkos::DefaultHostExecutionSpace(), m_lambda); + auto Abm_1_gamma_host = Kokkos:: + create_mirror_view_and_copy(Kokkos::DefaultHostExecutionSpace(), m_Abm_1_gamma); + Kokkos::parallel_for( + "calculate_delta_to_factorize", + Kokkos::MDRangePolicy< + Kokkos::DefaultHostExecutionSpace, + Kokkos::Rank<2>>({0, 0}, {m_k, m_k}), + [&](const int i, const int j) { + double val = 0.0; + // Upper diagonals in lambda, lower diagonals in Abm_1_gamma + for (int l = 0; l < i + 1; ++l) { + val += lambda_host(i, l) * Abm_1_gamma_host(l, j); + } + // Lower diagonals in lambda, upper diagonals in Abm_1_gamma + for (int l = i + 1; l < m_k + 1; ++l) { + int l_full = m_nb - 1 - m_k + l; + val += lambda_host(i, l) * Abm_1_gamma_host(l_full, j); + } + auto tmp = delta_proxy.get_element(i, j); + delta_proxy.set_element(i, j, tmp - val); + }); + } + + ddc::DSpan2D_stride solve_lambda_section( + ddc::DSpan2D_stride const v, + ddc::DView2D_stride const u) const override + { + Kokkos::parallel_for( + "solve_lambda_section", + Kokkos::TeamPolicy(v.extent(1), Kokkos::AUTO), + KOKKOS_CLASS_LAMBDA( + const typename Kokkos::TeamPolicy::member_type& teamMember) { + const int j = teamMember.league_rank(); + + + Kokkos::parallel_for( + Kokkos::TeamThreadRange(teamMember, m_k), + [&](const int i) { + /// Upper diagonals in lambda + for (int l = 0; l <= i; ++l) { + Kokkos::atomic_sub(&v(i, j), m_lambda(i, l) * u(l, j)); + } + // Lower diagonals in lambda + for (int l = i + 1; l < m_k + 1; ++l) { + Kokkos::atomic_sub( + &v(i, j), + m_lambda(i, l) * u(m_nb - 1 - m_k + l, j)); + } + }); + }); + return v; + } + + ddc::DSpan2D_stride solve_lambda_section_transpose( + ddc::DSpan2D_stride const u, + ddc::DView2D_stride const v) const override + { + Kokkos::parallel_for( + "solve_lambda_section_transpose", + Kokkos::TeamPolicy(v.extent(1), Kokkos::AUTO), + KOKKOS_CLASS_LAMBDA( + const typename Kokkos::TeamPolicy::member_type& teamMember) { + const int j = teamMember.league_rank(); + + + Kokkos::parallel_for( + Kokkos::TeamThreadRange(teamMember, m_k), + [&](const int i) { + /// Upper diagonals in lambda + for (int l = 0; l <= i; ++l) { + Kokkos::atomic_sub(&u(l, j), m_lambda(i, l) * v(i, j)); + } + // Lower diagonals in lambda + for (int l = i + 1; l < m_k + 1; ++l) { + Kokkos::atomic_sub( + &u(m_nb - 1 - m_k + l, j), + m_lambda(i, l) * v(i, j)); + } + }); + }); + return u; + } +}; + +} // namespace ddc::detail diff --git a/include/ddc/kernels/splines/matrix_sparse.hpp b/include/ddc/kernels/splines/matrix_sparse.hpp index f20cc3a6e..53bb3ac03 100644 --- a/include/ddc/kernels/splines/matrix_sparse.hpp +++ b/include/ddc/kernels/splines/matrix_sparse.hpp @@ -96,7 +96,7 @@ class Matrix_Sparse : public Matrix { using matrix_sparse_type = gko::matrix::Csr; -private: +protected: std::unique_ptr> m_matrix_dense; std::shared_ptr m_matrix_sparse; @@ -122,17 +122,23 @@ class Matrix_Sparse : public Matrix std::shared_ptr const gko_exec = create_gko_exec(); m_matrix_dense = gko::matrix::Dense< double>::create(gko_exec->get_master(), gko::dim<2>(mat_size, mat_size)); - m_matrix_dense->fill(0); m_matrix_sparse = matrix_sparse_type::create(gko_exec, gko::dim<2>(mat_size, mat_size)); } - virtual double get_element([[maybe_unused]] int i, [[maybe_unused]] int j) const override + void reset() const override + { + m_matrix_dense->fill(0); + m_matrix_sparse->clear(); + } + + double get_element([[maybe_unused]] int i, [[maybe_unused]] int j) const override { throw std::runtime_error("MatrixSparse::get_element() is not implemented because no API is " "provided by Ginkgo"); + return 0; } - virtual void set_element(int i, int j, double aij) override + void set_element(int i, int j, double aij) const override { m_matrix_dense->at(i, j) = aij; } @@ -173,16 +179,20 @@ class Matrix_Sparse : public Matrix return 0; } - virtual int solve_inplace_method(double* b, char transpose, int n_equations) const override + int solve_inplace_method( + double* const b, + char const transpose, + int const n_equations, + int const stride) const override { std::shared_ptr const gko_exec = m_solver->get_executor(); int const main_chunk_size = std::min(m_cols_per_chunk, n_equations); - Kokkos::View const - b_view(b, get_size(), n_equations); - Kokkos::View const - x_view("", get_size(), main_chunk_size); + Kokkos::View const + b_view(b, Kokkos::LayoutStride(get_size(), stride, n_equations, 1)); + Kokkos::View const + x_view("", Kokkos::LayoutStride(get_size(), stride, main_chunk_size, 1)); int const iend = (n_equations + main_chunk_size - 1) / main_chunk_size; for (int i = 0; i < iend; ++i) { diff --git a/include/ddc/kernels/splines/view.hpp b/include/ddc/kernels/splines/view.hpp index 5ad3fc4ef..fed9ab96c 100644 --- a/include/ddc/kernels/splines/view.hpp +++ b/include/ddc/kernels/splines/view.hpp @@ -6,94 +6,31 @@ #include -namespace ddc::detail { +namespace ddc { -template -struct ViewNDMaker; +template +using SpanND = std::experimental::mdspan>; template -struct ViewNDMaker -{ - using type = std::experimental::mdspan< - ElementType, - std::experimental::dextents, - std::experimental::layout_right>; -}; +using SpanND_left = std::experimental::mdspan< + ElementType, + std::experimental::dextents, + std::experimental::layout_left>; template -struct ViewNDMaker -{ - using type = std::experimental::mdspan< - ElementType, - std::experimental::dextents, - std::experimental::layout_stride>; -}; - -/// Note: We use the comma operator to fill the input parameters -/// -/// If Is=[1, 2], `subspan(s, i0, (Is, all)...)` will be expanded as -/// `subspan(s, i0, (1, all), (2, all))` which is equivalent to -/// `subspan(s, i0, all, all)` -template < - class ElementType, - class Extents, - class Layout, - class Accessor, - std::size_t I0, - std::size_t... Is> -std::ostream& stream_impl( - std::ostream& os, - std::experimental::mdspan const& s, - std::index_sequence) -{ - if constexpr (sizeof...(Is) > 0) { - os << '['; - for (std::size_t i0 = 0; i0 < s.extent(I0); ++i0) { - stream_impl( - os, - std::experimental:: - submdspan(s, i0, ((void)Is, std::experimental::full_extent)...), - std::make_index_sequence()); - } - os << ']'; - } else { - os << '['; - for (std::size_t i0 = 0; i0 < s.extent(I0) - 1; ++i0) { - os << s(i0) << ','; - } - os << s(s.extent(I0) - 1) << ']'; - } - return os; -} - - -// template -// struct IsContiguous; -// template <> -// struct IsContiguous -// { -// static constexpr bool val = false; -// }; -// template <> -// struct IsContiguous -// { -// static constexpr bool val = true; -// }; -// template -// struct IsContiguous> -// { -// static constexpr bool val = IsContiguous::val; -// }; - -} // namespace ddc::detail +using SpanND_stride = std::experimental::mdspan< + ElementType, + std::experimental::dextents, + std::experimental::layout_stride>; +template +using ViewND = SpanND; -namespace ddc { template -using SpanND = std::experimental::mdspan>; +using ViewND_left = SpanND_left; template -using ViewND = SpanND; +using ViewND_stride = SpanND_stride; template using Span1D = SpanND<1, ElementType>; @@ -101,46 +38,38 @@ using Span1D = SpanND<1, ElementType>; template using Span2D = SpanND<2, ElementType>; +template +using Span2D_left = SpanND_left<2, ElementType>; + +template +using Span2D_stride = SpanND_stride<2, ElementType>; + template using View1D = ViewND<1, ElementType>; template using View2D = ViewND<2, ElementType>; -template -Span1D as_span(std::array& arr) noexcept -{ - return Span1D(arr.data(), N); -} +template +using View2D_left = ViewND_left<2, ElementType>; -template -Span1D as_span(std::array const& arr) noexcept -{ - return Span1D(arr.data(), N); -} +template +using View2D_stride = ViewND_stride<2, ElementType>; using DSpan1D = ddc::Span1D; using DSpan2D = ddc::Span2D; -using CDSpan1D = ddc::Span1D; +using DSpan2D_left = ddc::Span2D_left; -using CDSpan2D = ddc::Span2D; +using DSpan2D_stride = ddc::Span2D_stride; using DView1D = View1D; using DView2D = View2D; -// template -// constexpr bool is_contiguous_v = detail::IsContiguous::val; - -/// Convenient function to dump a mdspan, it recursively prints all dimensions. -/// Disclaimer: use with caution for large arrays -template -std::ostream& operator<<( - std::ostream& os, - std::experimental::mdspan const& s) -{ - return ddc::detail::stream_impl(os, s, std::make_index_sequence()); -} +using DView2D_left = View2D_left; + +using DView2D_stride = View2D_stride; + } // namespace ddc diff --git a/tests/splines/matrix.cpp b/tests/splines/matrix.cpp index 14cbc07a9..95ba9435a 100644 --- a/tests/splines/matrix.cpp +++ b/tests/splines/matrix.cpp @@ -25,7 +25,6 @@ void fill_identity(ddc::DSpan2D mat) } } -/* void copy_matrix(ddc::DSpan2D copy, std::unique_ptr& mat) { assert(mat->get_size() == int(copy.extent(0))); @@ -37,7 +36,6 @@ void copy_matrix(ddc::DSpan2D copy, std::unique_ptr& mat) } } } -*/ void check_inverse(ddc::DSpan2D matrix, ddc::DSpan2D inv) { @@ -76,6 +74,205 @@ class MatrixSizesFixture : public testing::TestWithParam matrix = ddc::detail::MatrixMaker::make_new_banded< + Kokkos::DefaultHostExecutionSpace>(N, k, k, true); + matrix->reset(); + + for (std::size_t i(0); i < N; ++i) { + matrix->set_element(i, i, 2.0 * k); + for (std::size_t j(std::max(0, int(i) - int(k))); j < i; ++j) { + matrix->set_element(i, j, -1.0); + } + for (std::size_t j(i + 1); j < std::min(N, i + k + 1); ++j) { + matrix->set_element(i, j, -1.0); + } + } + std::vector val_ptr(N * N); + ddc::DSpan2D val(val_ptr.data(), N, N); + copy_matrix(val, matrix); + + std::vector inv_ptr(N * N); + ddc::DSpan2D inv(inv_ptr.data(), N, N); + fill_identity(inv); + matrix->factorize(); + matrix->solve_inplace(inv); + check_inverse(val, inv); +} + +TEST_P(MatrixSizesFixture, OffsetBanded) +{ + auto const [N, k] = GetParam(); + std::unique_ptr matrix = ddc::detail::MatrixMaker::make_new_banded< + Kokkos::DefaultHostExecutionSpace>(N, 0, 2 * k, true); + matrix->reset(); + + for (std::size_t i(0); i < N; ++i) { + for (std::size_t j(i); j < std::min(N, i + k); ++j) { + matrix->set_element(i, i, -1.0); + } + if (i + k < N) { + matrix->set_element(i, i + k, 2.0 * k); + } + for (std::size_t j(i + k + 1); j < std::min(N, i + k + 1); ++j) { + matrix->set_element(i, j, -1.0); + } + } + std::vector val_ptr(N * N); + ddc::DSpan2D val(val_ptr.data(), N, N); + copy_matrix(val, matrix); + + std::vector inv_ptr(N * N); + ddc::DSpan2D inv(inv_ptr.data(), N, N); + fill_identity(inv); + matrix->factorize(); + matrix->solve_inplace(inv); + check_inverse(val, inv); +} + +TEST_P(MatrixSizesFixture, PeriodicBanded) +{ + auto const [N, k] = GetParam(); + + for (std::ptrdiff_t s(-k); s < (std::ptrdiff_t)k + 1; ++s) { + if (s == 0) + continue; + + std::unique_ptr matrix + = ddc::detail::MatrixMaker::make_new_periodic_banded< + Kokkos::DefaultHostExecutionSpace>(N, k - s, k + s, false); + matrix->reset(); + for (std::size_t i(0); i < N; ++i) { + for (std::size_t j(0); j < N; ++j) { + std::ptrdiff_t diag = ddc::detail::modulo((int)(j - i), (int)N); + if (diag == s || diag == (std::ptrdiff_t)N + s) { + matrix->set_element(i, j, 0.5); + } else if ( + diag <= s + (std::ptrdiff_t)k + || diag >= (std::ptrdiff_t)N + s - (std::ptrdiff_t)k) { + matrix->set_element(i, j, -1.0 / k); + } + } + } + std::vector val_ptr(N * N); + ddc::DSpan2D val(val_ptr.data(), N, N); + copy_matrix(val, matrix); + + std::vector inv_ptr(N * N); + ddc::DSpan2D inv(inv_ptr.data(), N, N); + fill_identity(inv); + matrix->factorize(); + matrix->solve_inplace(inv); + check_inverse(val, inv); + } +} + +TEST_P(MatrixSizesFixture, PositiveDefiniteSymmetricTranspose) +{ + auto const [N, k] = GetParam(); + std::unique_ptr matrix = ddc::detail::MatrixMaker::make_new_banded< + Kokkos::DefaultHostExecutionSpace>(N, k, k, true); + matrix->reset(); + + for (std::size_t i(0); i < N; ++i) { + matrix->set_element(i, i, 2.0 * k); + for (std::size_t j(std::max(0, int(i) - int(k))); j < i; ++j) { + matrix->set_element(i, j, -1.0); + } + for (std::size_t j(i + 1); j < std::min(N, i + k + 1); ++j) { + matrix->set_element(i, j, -1.0); + } + } + std::vector val_ptr(N * N); + ddc::DSpan2D val(val_ptr.data(), N, N); + copy_matrix(val, matrix); + + std::vector inv_ptr(N * N); + ddc::DSpan2D inv(inv_ptr.data(), N, N); + fill_identity(inv); + matrix->factorize(); + for (std::size_t i(0); i < N; ++i) { + ddc::DSpan1D inv_line(inv_ptr.data() + i * N, N); + matrix->solve_transpose_inplace(inv_line); + } + check_inverse_transpose(val, inv); +} + +TEST_P(MatrixSizesFixture, OffsetBandedTranspose) +{ + auto const [N, k] = GetParam(); + std::unique_ptr matrix = ddc::detail::MatrixMaker::make_new_banded< + Kokkos::DefaultHostExecutionSpace>(N, 0, 2 * k, true); + matrix->reset(); + + for (std::size_t i(0); i < N; ++i) { + for (std::size_t j(i); j < std::min(N, i + k); ++j) { + matrix->set_element(i, i, -1.0); + } + if (i + k < N) { + matrix->set_element(i, i + k, 2.0 * k); + } + for (std::size_t j(i + k + 1); j < std::min(N, i + k + 1); ++j) { + matrix->set_element(i, j, -1.0); + } + } + std::vector val_ptr(N * N); + ddc::DSpan2D val(val_ptr.data(), N, N); + copy_matrix(val, matrix); + + std::vector inv_ptr(N * N); + ddc::DSpan2D inv(inv_ptr.data(), N, N); + fill_identity(inv); + matrix->factorize(); + for (std::size_t i(0); i < N; ++i) { + ddc::DSpan1D inv_line(inv_ptr.data() + i * N, N); + matrix->solve_transpose_inplace(inv_line); + } + check_inverse_transpose(val, inv); +} + +TEST_P(MatrixSizesFixture, PeriodicBandedTranspose) +{ + auto const [N, k] = GetParam(); + + for (std::ptrdiff_t s(-k); s < (std::ptrdiff_t)k + 1; ++s) { + if (s == 0) + continue; + + std::unique_ptr matrix + = ddc::detail::MatrixMaker::make_new_periodic_banded< + Kokkos::DefaultHostExecutionSpace>(N, k - s, k + s, false); + matrix->reset(); + for (std::size_t i(0); i < N; ++i) { + for (std::size_t j(0); j < N; ++j) { + int diag = ddc::detail::modulo((int)(j - i), (int)N); + if (diag == s || diag == (std::ptrdiff_t)N + s) { + matrix->set_element(i, j, 0.5); + } else if ( + diag <= s + (std::ptrdiff_t)k + || diag >= (std::ptrdiff_t)N + s - (std::ptrdiff_t)k) { + matrix->set_element(i, j, -1.0 / k); + } + } + } + std::vector val_ptr(N * N); + ddc::DSpan2D val(val_ptr.data(), N, N); + copy_matrix(val, matrix); + + std::vector inv_ptr(N * N); + ddc::DSpan2D inv(inv_ptr.data(), N, N); + fill_identity(inv); + matrix->factorize(); + for (std::size_t i(0); i < N; ++i) { + ddc::DSpan1D inv_line(inv_ptr.data() + i * N, N); + matrix->solve_transpose_inplace(inv_line); + } + check_inverse_transpose(val, inv); + } +} + TEST_P(MatrixSizesFixture, Sparse) { auto const [N, k] = GetParam(); @@ -83,6 +280,7 @@ TEST_P(MatrixSizesFixture, Sparse) std::unique_ptr matrix = ddc::detail::MatrixMaker::make_new_sparse(N); + matrix->reset(); std::vector val_ptr(N * N); ddc::DSpan2D val(val_ptr.data(), N, N); @@ -108,7 +306,7 @@ TEST_P(MatrixSizesFixture, Sparse) fill_identity(inv); inv_ptr.modify_host(); inv_ptr.sync_device(); - matrix->solve_multiple_inplace(ddc::DSpan2D(inv_ptr.d_view.data(), N, N)); + matrix->solve_inplace(ddc::DSpan2D(inv_ptr.d_view.data(), N, N)); inv_ptr.modify_device(); inv_ptr.sync_host(); @@ -117,7 +315,7 @@ TEST_P(MatrixSizesFixture, Sparse) fill_identity(inv_tr); inv_tr_ptr.modify_host(); inv_tr_ptr.sync_device(); - matrix->solve_multiple_transpose_inplace(ddc::DSpan2D(inv_tr_ptr.d_view.data(), N, N)); + matrix->solve_transpose_inplace(ddc::DSpan2D(inv_tr_ptr.d_view.data(), N, N)); inv_tr_ptr.modify_device(); inv_tr_ptr.sync_host(); From f62cde8824074058ec71a4a865f24b9eebd109bd Mon Sep 17 00:00:00 2001 From: blegouix Date: Wed, 6 Mar 2024 09:16:12 +0100 Subject: [PATCH 02/28] looks ok --- CMakeLists.txt | 11 +- cmake/3.18/CMakePushCheckState.cmake | 91 +++ cmake/3.18/CheckFunctionExists.cmake | 119 ++++ cmake/3.18/FindLAPACK.cmake | 555 ++++++++++++++++++ docker/doxygen/Dockerfile | 2 + docker/latest/Dockerfile | 2 + docker/oldest/Dockerfile | 2 + .../ddc/kernels/splines/spline_builder.hpp | 17 +- 8 files changed, 793 insertions(+), 6 deletions(-) create mode 100644 cmake/3.18/CMakePushCheckState.cmake create mode 100644 cmake/3.18/CheckFunctionExists.cmake create mode 100644 cmake/3.18/FindLAPACK.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index 6f4238902..daa100119 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -91,8 +91,13 @@ elseif("${DDC_Kokkos_DEPENDENCY_POLICY}" STREQUAL "INSTALLED") find_package(Kokkos CONFIG REQUIRED) endif() +# Custom cmake modules +list( APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake" ) +if("${CMAKE_VERSION}" VERSION_LESS 3.18) + list( APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/3.18" ) +endif() + # FFTW -list( APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake" ) # Maybe not specific to FFTW if("${DDC_BUILD_KERNELS_FFT}" AND NOT FFTW_FOUND) find_package( FFTW MODULE REQUIRED ) endif() @@ -241,6 +246,10 @@ if("${DDC_BUILD_KERNELS_SPLINES}") find_package(Ginkgo 1.7.0 EXACT REQUIRED) target_link_libraries(DDC INTERFACE Ginkgo::ginkgo) target_compile_definitions(DDC INTERFACE ginkgo_AVAIL) + + # Lapack + find_package(LAPACK REQUIRED COMPONENTS CXX) + target_link_libraries(DDC INTERFACE LAPACK::LAPACK) endif() ## The PDI wrapper diff --git a/cmake/3.18/CMakePushCheckState.cmake b/cmake/3.18/CMakePushCheckState.cmake new file mode 100644 index 000000000..3e519ee51 --- /dev/null +++ b/cmake/3.18/CMakePushCheckState.cmake @@ -0,0 +1,91 @@ +# Distributed under the OSI-approved BSD 3-Clause License. See accompanying +# file Copyright.txt or https://cmake.org/licensing for details. + +#[=======================================================================[.rst: +CMakePushCheckState +------------------- + + + +This module defines three macros: ``CMAKE_PUSH_CHECK_STATE()`` +``CMAKE_POP_CHECK_STATE()`` and ``CMAKE_RESET_CHECK_STATE()`` These macros can +be used to save, restore and reset (i.e., clear contents) the state of +the variables ``CMAKE_REQUIRED_FLAGS``, ``CMAKE_REQUIRED_DEFINITIONS``, +``CMAKE_REQUIRED_LINK_OPTIONS``, ``CMAKE_REQUIRED_LIBRARIES``, +``CMAKE_REQUIRED_INCLUDES`` and ``CMAKE_EXTRA_INCLUDE_FILES`` used by the +various Check-files coming with CMake, like e.g. ``check_function_exists()`` +etc. +The variable contents are pushed on a stack, pushing multiple times is +supported. This is useful e.g. when executing such tests in a Find-module, +where they have to be set, but after the Find-module has been executed they +should have the same value as they had before. + +``CMAKE_PUSH_CHECK_STATE()`` macro receives optional argument ``RESET``. +Whether it's specified, ``CMAKE_PUSH_CHECK_STATE()`` will set all +``CMAKE_REQUIRED_*`` variables to empty values, same as +``CMAKE_RESET_CHECK_STATE()`` call will do. + +Usage: + +.. code-block:: cmake + + cmake_push_check_state(RESET) + set(CMAKE_REQUIRED_DEFINITIONS -DSOME_MORE_DEF) + check_function_exists(...) + cmake_reset_check_state() + set(CMAKE_REQUIRED_DEFINITIONS -DANOTHER_DEF) + check_function_exists(...) + cmake_pop_check_state() +#]=======================================================================] + +macro(CMAKE_RESET_CHECK_STATE) + + set(CMAKE_EXTRA_INCLUDE_FILES) + set(CMAKE_REQUIRED_INCLUDES) + set(CMAKE_REQUIRED_DEFINITIONS) + set(CMAKE_REQUIRED_LINK_OPTIONS) + set(CMAKE_REQUIRED_LIBRARIES) + set(CMAKE_REQUIRED_FLAGS) + set(CMAKE_REQUIRED_QUIET) + +endmacro() + +macro(CMAKE_PUSH_CHECK_STATE) + + if(NOT DEFINED _CMAKE_PUSH_CHECK_STATE_COUNTER) + set(_CMAKE_PUSH_CHECK_STATE_COUNTER 0) + endif() + + math(EXPR _CMAKE_PUSH_CHECK_STATE_COUNTER "${_CMAKE_PUSH_CHECK_STATE_COUNTER}+1") + + set(_CMAKE_EXTRA_INCLUDE_FILES_SAVE_${_CMAKE_PUSH_CHECK_STATE_COUNTER} ${CMAKE_EXTRA_INCLUDE_FILES}) + set(_CMAKE_REQUIRED_INCLUDES_SAVE_${_CMAKE_PUSH_CHECK_STATE_COUNTER} ${CMAKE_REQUIRED_INCLUDES}) + set(_CMAKE_REQUIRED_DEFINITIONS_SAVE_${_CMAKE_PUSH_CHECK_STATE_COUNTER} ${CMAKE_REQUIRED_DEFINITIONS}) + set(_CMAKE_REQUIRED_LINK_OPTIONS_SAVE_${_CMAKE_PUSH_CHECK_STATE_COUNTER} ${CMAKE_REQUIRED_LINK_OPTIONS}) + set(_CMAKE_REQUIRED_LIBRARIES_SAVE_${_CMAKE_PUSH_CHECK_STATE_COUNTER} ${CMAKE_REQUIRED_LIBRARIES}) + set(_CMAKE_REQUIRED_FLAGS_SAVE_${_CMAKE_PUSH_CHECK_STATE_COUNTER} ${CMAKE_REQUIRED_FLAGS}) + set(_CMAKE_REQUIRED_QUIET_SAVE_${_CMAKE_PUSH_CHECK_STATE_COUNTER} ${CMAKE_REQUIRED_QUIET}) + + if (${ARGC} GREATER 0 AND "${ARGV0}" STREQUAL "RESET") + cmake_reset_check_state() + endif() + +endmacro() + +macro(CMAKE_POP_CHECK_STATE) + +# don't pop more than we pushed + if("${_CMAKE_PUSH_CHECK_STATE_COUNTER}" GREATER "0") + + set(CMAKE_EXTRA_INCLUDE_FILES ${_CMAKE_EXTRA_INCLUDE_FILES_SAVE_${_CMAKE_PUSH_CHECK_STATE_COUNTER}}) + set(CMAKE_REQUIRED_INCLUDES ${_CMAKE_REQUIRED_INCLUDES_SAVE_${_CMAKE_PUSH_CHECK_STATE_COUNTER}}) + set(CMAKE_REQUIRED_DEFINITIONS ${_CMAKE_REQUIRED_DEFINITIONS_SAVE_${_CMAKE_PUSH_CHECK_STATE_COUNTER}}) + set(CMAKE_REQUIRED_LINK_OPTIONS ${_CMAKE_REQUIRED_LINK_OPTIONS_SAVE_${_CMAKE_PUSH_CHECK_STATE_COUNTER}}) + set(CMAKE_REQUIRED_LIBRARIES ${_CMAKE_REQUIRED_LIBRARIES_SAVE_${_CMAKE_PUSH_CHECK_STATE_COUNTER}}) + set(CMAKE_REQUIRED_FLAGS ${_CMAKE_REQUIRED_FLAGS_SAVE_${_CMAKE_PUSH_CHECK_STATE_COUNTER}}) + set(CMAKE_REQUIRED_QUIET ${_CMAKE_REQUIRED_QUIET_SAVE_${_CMAKE_PUSH_CHECK_STATE_COUNTER}}) + + math(EXPR _CMAKE_PUSH_CHECK_STATE_COUNTER "${_CMAKE_PUSH_CHECK_STATE_COUNTER}-1") + endif() + +endmacro() diff --git a/cmake/3.18/CheckFunctionExists.cmake b/cmake/3.18/CheckFunctionExists.cmake new file mode 100644 index 000000000..136da89d9 --- /dev/null +++ b/cmake/3.18/CheckFunctionExists.cmake @@ -0,0 +1,119 @@ +# Distributed under the OSI-approved BSD 3-Clause License. See accompanying +# file Copyright.txt or https://cmake.org/licensing for details. + +#[=======================================================================[.rst: +CheckFunctionExists +------------------- + +Check if a C function can be linked + +.. command:: check_function_exists + + .. code-block:: cmake + + check_function_exists( ) + + Checks that the ```` is provided by libraries on the system and store + the result in a ````, which will be created as an internal + cache variable. + +The following variables may be set before calling this macro to modify the +way the check is run: + +``CMAKE_REQUIRED_FLAGS`` + string of compile command line flags. +``CMAKE_REQUIRED_DEFINITIONS`` + a :ref:`;-list ` of macros to define (-DFOO=bar). +``CMAKE_REQUIRED_INCLUDES`` + a :ref:`;-list ` of header search paths to pass to + the compiler. +``CMAKE_REQUIRED_LINK_OPTIONS`` + a :ref:`;-list ` of options to add to the link command. +``CMAKE_REQUIRED_LIBRARIES`` + a :ref:`;-list ` of libraries to add to the link + command. See policy :policy:`CMP0075`. +``CMAKE_REQUIRED_QUIET`` + execute quietly without messages. + +.. note:: + + Prefer using :Module:`CheckSymbolExists` instead of this module, + for the following reasons: + + * ``check_function_exists()`` can't detect functions that are inlined + in headers or specified as a macro. + + * ``check_function_exists()`` can't detect anything in the 32-bit + versions of the Win32 API, because of a mismatch in calling conventions. + + * ``check_function_exists()`` only verifies linking, it does not verify + that the function is declared in system headers. +#]=======================================================================] + +include_guard(GLOBAL) + +macro(CHECK_FUNCTION_EXISTS FUNCTION VARIABLE) + if(NOT DEFINED "${VARIABLE}" OR "x${${VARIABLE}}" STREQUAL "x${VARIABLE}") + set(MACRO_CHECK_FUNCTION_DEFINITIONS + "-DCHECK_FUNCTION_EXISTS=${FUNCTION} ${CMAKE_REQUIRED_FLAGS}") + if(NOT CMAKE_REQUIRED_QUIET) + message(CHECK_START "Looking for ${FUNCTION}") + endif() + if(CMAKE_REQUIRED_LINK_OPTIONS) + set(CHECK_FUNCTION_EXISTS_ADD_LINK_OPTIONS + LINK_OPTIONS ${CMAKE_REQUIRED_LINK_OPTIONS}) + else() + set(CHECK_FUNCTION_EXISTS_ADD_LINK_OPTIONS) + endif() + if(CMAKE_REQUIRED_LIBRARIES) + set(CHECK_FUNCTION_EXISTS_ADD_LIBRARIES + LINK_LIBRARIES ${CMAKE_REQUIRED_LIBRARIES}) + else() + set(CHECK_FUNCTION_EXISTS_ADD_LIBRARIES) + endif() + if(CMAKE_REQUIRED_INCLUDES) + set(CHECK_FUNCTION_EXISTS_ADD_INCLUDES + "-DINCLUDE_DIRECTORIES:STRING=${CMAKE_REQUIRED_INCLUDES}") + else() + set(CHECK_FUNCTION_EXISTS_ADD_INCLUDES) + endif() + + if(CMAKE_C_COMPILER_LOADED) + set(_cfe_source ${CMAKE_ROOT}/Modules/CheckFunctionExists.c) + elseif(CMAKE_CXX_COMPILER_LOADED) + set(_cfe_source ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CheckFunctionExists/CheckFunctionExists.cxx) + configure_file(${CMAKE_ROOT}/Modules/CheckFunctionExists.c "${_cfe_source}" COPYONLY) + else() + message(FATAL_ERROR "CHECK_FUNCTION_EXISTS needs either C or CXX language enabled") + endif() + + try_compile(${VARIABLE} + ${CMAKE_BINARY_DIR} + ${_cfe_source} + COMPILE_DEFINITIONS ${CMAKE_REQUIRED_DEFINITIONS} + ${CHECK_FUNCTION_EXISTS_ADD_LINK_OPTIONS} + ${CHECK_FUNCTION_EXISTS_ADD_LIBRARIES} + CMAKE_FLAGS -DCOMPILE_DEFINITIONS:STRING=${MACRO_CHECK_FUNCTION_DEFINITIONS} + "${CHECK_FUNCTION_EXISTS_ADD_INCLUDES}" + OUTPUT_VARIABLE OUTPUT) + unset(_cfe_source) + + if(${VARIABLE}) + set(${VARIABLE} 1 CACHE INTERNAL "Have function ${FUNCTION}") + if(NOT CMAKE_REQUIRED_QUIET) + message(CHECK_PASS "found") + endif() + file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeOutput.log + "Determining if the function ${FUNCTION} exists passed with the following output:\n" + "${OUTPUT}\n\n") + else() + if(NOT CMAKE_REQUIRED_QUIET) + message(CHECK_FAIL "not found") + endif() + set(${VARIABLE} "" CACHE INTERNAL "Have function ${FUNCTION}") + file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeError.log + "Determining if the function ${FUNCTION} exists failed with the following output:\n" + "${OUTPUT}\n\n") + endif() + endif() +endmacro() diff --git a/cmake/3.18/FindLAPACK.cmake b/cmake/3.18/FindLAPACK.cmake new file mode 100644 index 000000000..31e7de68c --- /dev/null +++ b/cmake/3.18/FindLAPACK.cmake @@ -0,0 +1,555 @@ +# Distributed under the OSI-approved BSD 3-Clause License. See accompanying +# file Copyright.txt or https://cmake.org/licensing for details. + +#[=======================================================================[.rst: +FindLAPACK +---------- + +Find Linear Algebra PACKage (LAPACK) library + +This module finds an installed Fortran library that implements the +LAPACK linear-algebra interface (see http://www.netlib.org/lapack/). + +The approach follows that taken for the ``autoconf`` macro file, +``acx_lapack.m4`` (distributed at +http://ac-archive.sourceforge.net/ac-archive/acx_lapack.html). + +Input Variables +^^^^^^^^^^^^^^^ + +The following variables may be set to influence this module's behavior: + +``BLA_STATIC`` + if ``ON`` use static linkage + +``BLA_VENDOR`` + If set, checks only the specified vendor, if not set checks all the + possibilities. List of vendors valid in this module: + + * ``OpenBLAS`` + * ``FLAME`` + * ``Intel10_32`` (intel mkl v10 32 bit) + * ``Intel10_64lp`` (intel mkl v10+ 64 bit, threaded code, lp64 model) + * ``Intel10_64lp_seq`` (intel mkl v10+ 64 bit, sequential code, lp64 model) + * ``Intel10_64ilp`` (intel mkl v10+ 64 bit, threaded code, ilp64 model) + * ``Intel10_64ilp_seq`` (intel mkl v10+ 64 bit, sequential code, ilp64 model) + * ``Intel10_64_dyn`` (intel mkl v10+ 64 bit, single dynamic library) + * ``Intel`` (obsolete versions of mkl 32 and 64 bit) + * ``ACML`` + * ``Apple`` + * ``NAS`` + * ``Arm`` + * ``Arm_mp`` + * ``Arm_ilp64`` + * ``Arm_ilp64_mp`` + * ``Generic`` + +``BLA_F95`` + if ``ON`` tries to find the BLAS95/LAPACK95 interfaces + +Imported targets +^^^^^^^^^^^^^^^^ + +This module defines the following :prop_tgt:`IMPORTED` target: + +``LAPACK::LAPACK`` + The libraries to use for LAPACK, if found. + +Result Variables +^^^^^^^^^^^^^^^^ + +This module defines the following variables: + +``LAPACK_FOUND`` + library implementing the LAPACK interface is found +``LAPACK_LINKER_FLAGS`` + uncached list of required linker flags (excluding ``-l`` and ``-L``). +``LAPACK_LIBRARIES`` + uncached list of libraries (using full path name) to link against + to use LAPACK +``LAPACK95_LIBRARIES`` + uncached list of libraries (using full path name) to link against + to use LAPACK95 +``LAPACK95_FOUND`` + library implementing the LAPACK95 interface is found + +.. note:: + + C, CXX or Fortran must be enabled to detect a BLAS/LAPACK library. + C or CXX must be enabled to use Intel Math Kernel Library (MKL). + + For example, to use Intel MKL libraries and/or Intel compiler: + + .. code-block:: cmake + + set(BLA_VENDOR Intel10_64lp) + find_package(LAPACK) +#]=======================================================================] + +# Check the language being used +if(NOT (CMAKE_C_COMPILER_LOADED OR CMAKE_CXX_COMPILER_LOADED OR CMAKE_Fortran_COMPILER_LOADED)) + if(LAPACK_FIND_REQUIRED) + message(FATAL_ERROR "FindLAPACK requires Fortran, C, or C++ to be enabled.") + else() + message(STATUS "Looking for LAPACK... - NOT found (Unsupported languages)") + return() + endif() +endif() + +if(CMAKE_Fortran_COMPILER_LOADED) + include(${CMAKE_CURRENT_LIST_DIR}/CheckFortranFunctionExists.cmake) +else() + include(${CMAKE_CURRENT_LIST_DIR}/CheckFunctionExists.cmake) +endif() +include(${CMAKE_CURRENT_LIST_DIR}/CMakePushCheckState.cmake) + +cmake_push_check_state() +set(CMAKE_REQUIRED_QUIET ${LAPACK_FIND_QUIETLY}) + +set(LAPACK_FOUND FALSE) +set(LAPACK95_FOUND FALSE) + +set(_lapack_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES}) +if(BLA_STATIC) + if(WIN32) + set(CMAKE_FIND_LIBRARY_SUFFIXES .lib ${CMAKE_FIND_LIBRARY_SUFFIXES}) + else() + set(CMAKE_FIND_LIBRARY_SUFFIXES .a ${CMAKE_FIND_LIBRARY_SUFFIXES}) + endif() +else() + if(CMAKE_SYSTEM_NAME STREQUAL "Linux") + # for ubuntu's libblas3gf and liblapack3gf packages + set(CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES} .so.3gf) + endif() +endif() + +# TODO: move this stuff to a separate module + +macro(CHECK_LAPACK_LIBRARIES LIBRARIES _prefix _name _flags _list _threadlibs _addlibdir _subdirs _blas) + # This macro checks for the existence of the combination of fortran libraries + # given by _list. If the combination is found, this macro checks (using the + # Check_Fortran_Function_Exists macro) whether can link against that library + # combination using the name of a routine given by _name using the linker + # flags given by _flags. If the combination of libraries is found and passes + # the link test, LIBRARIES is set to the list of complete library paths that + # have been found. Otherwise, LIBRARIES is set to FALSE. + + # N.B. _prefix is the prefix applied to the names of all cached variables that + # are generated internally and marked advanced by this macro. + # _addlibdir is a list of additional search paths. _subdirs is a list of path + # suffixes to be used by find_library(). + + set(_libraries_work TRUE) + set(${LIBRARIES}) + set(_combined_name) + + set(_extaddlibdir "${_addlibdir}") + if(WIN32) + list(APPEND _extaddlibdir ENV LIB) + elseif(APPLE) + list(APPEND _extaddlibdir ENV DYLD_LIBRARY_PATH) + else() + list(APPEND _extaddlibdir ENV LD_LIBRARY_PATH) + endif() + list(APPEND _extaddlibdir "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}") + + foreach(_library ${_list}) + if(_library MATCHES "^-Wl,--(start|end)-group$") + # Respect linker flags like --start/end-group (required by MKL) + set(${LIBRARIES} ${${LIBRARIES}} "${_library}") + else() + set(_combined_name ${_combined_name}_${_library}) + if(_libraries_work) + find_library(${_prefix}_${_library}_LIBRARY + NAMES ${_library} + PATHS ${_extaddlibdir} + PATH_SUFFIXES ${_subdirs} + ) + #message("DEBUG: find_library(${_library}) got ${${_prefix}_${_library}_LIBRARY}") + mark_as_advanced(${_prefix}_${_library}_LIBRARY) + set(${LIBRARIES} ${${LIBRARIES}} ${${_prefix}_${_library}_LIBRARY}) + set(_libraries_work ${${_prefix}_${_library}_LIBRARY}) + endif() + endif() + endforeach() + + if(_libraries_work) + # Test this combination of libraries. + set(CMAKE_REQUIRED_LIBRARIES ${_flags} ${${LIBRARIES}} ${_blas} ${_threadlibs}) + #message("DEBUG: CMAKE_REQUIRED_LIBRARIES = ${CMAKE_REQUIRED_LIBRARIES}") + if(CMAKE_Fortran_COMPILER_LOADED) + check_fortran_function_exists("${_name}" ${_prefix}${_combined_name}_WORKS) + else() + check_function_exists("${_name}_" ${_prefix}${_combined_name}_WORKS) + endif() + set(CMAKE_REQUIRED_LIBRARIES) + set(_libraries_work ${${_prefix}${_combined_name}_WORKS}) + endif() + + if(_libraries_work) + if("${_list}${_blas}" STREQUAL "") + set(${LIBRARIES} "${LIBRARIES}-PLACEHOLDER-FOR-EMPTY-LIBRARIES") + else() + set(${LIBRARIES} ${${LIBRARIES}} ${_blas} ${_threadlibs}) + endif() + else() + set(${LIBRARIES} FALSE) + endif() + #message("DEBUG: ${LIBRARIES} = ${${LIBRARIES}}") +endmacro() + +set(LAPACK_LINKER_FLAGS) +set(LAPACK_LIBRARIES) +set(LAPACK95_LIBRARIES) + +if(LAPACK_FIND_QUIETLY OR NOT LAPACK_FIND_REQUIRED) + find_package(BLAS) +else() + find_package(BLAS REQUIRED) +endif() + +if(BLAS_FOUND) + set(LAPACK_LINKER_FLAGS ${BLAS_LINKER_FLAGS}) + if(NOT $ENV{BLA_VENDOR} STREQUAL "") + set(BLA_VENDOR $ENV{BLA_VENDOR}) + else() + if(NOT BLA_VENDOR) + set(BLA_VENDOR "All") + endif() + endif() + + # LAPACK in the Intel MKL 10+ library? + if(BLA_VENDOR MATCHES "Intel" OR BLA_VENDOR STREQUAL "All") + if(NOT LAPACK_LIBRARIES) + if(CMAKE_C_COMPILER_LOADED OR CMAKE_CXX_COMPILER_LOADED) + # System-specific settings + if(NOT WIN32) + set(LAPACK_mkl_LM "-lm") + set(LAPACK_mkl_LDL "-ldl") + endif() + + if(LAPACK_FIND_QUIETLY OR NOT LAPACK_FIND_REQUIRED) + find_package(Threads) + else() + find_package(Threads REQUIRED) + endif() + + if(BLA_VENDOR MATCHES "_64ilp") + set(LAPACK_mkl_ILP_MODE "ilp64") + else() + set(LAPACK_mkl_ILP_MODE "lp64") + endif() + + set(LAPACK_SEARCH_LIBS "") + + if(BLA_F95) + set(LAPACK_mkl_SEARCH_SYMBOL "cheev_f95") + set(_LIBRARIES LAPACK95_LIBRARIES) + set(_BLAS_LIBRARIES ${BLAS95_LIBRARIES}) + + # old + list(APPEND LAPACK_SEARCH_LIBS + "mkl_lapack95") + # new >= 10.3 + list(APPEND LAPACK_SEARCH_LIBS + "mkl_intel_c") + list(APPEND LAPACK_SEARCH_LIBS + "mkl_lapack95_${LAPACK_mkl_ILP_MODE}") + else() + set(LAPACK_mkl_SEARCH_SYMBOL "cheev") + set(_LIBRARIES LAPACK_LIBRARIES) + set(_BLAS_LIBRARIES ${BLAS_LIBRARIES}) + + # old and new >= 10.3 + list(APPEND LAPACK_SEARCH_LIBS + "mkl_lapack") + endif() + + # MKL uses a multitude of partially platform-specific subdirectories: + if(BLA_VENDOR STREQUAL "Intel10_32") + set(LAPACK_mkl_ARCH_NAME "ia32") + else() + set(LAPACK_mkl_ARCH_NAME "intel64") + endif() + if(WIN32) + set(LAPACK_mkl_OS_NAME "win") + elseif(APPLE) + set(LAPACK_mkl_OS_NAME "mac") + else() + set(LAPACK_mkl_OS_NAME "lin") + endif() + if(DEFINED ENV{MKLROOT}) + file(TO_CMAKE_PATH "$ENV{MKLROOT}" LAPACK_mkl_MKLROOT) + # If MKLROOT points to the subdirectory 'mkl', use the parent directory instead + # so we can better detect other relevant libraries in 'compiler' or 'tbb': + get_filename_component(LAPACK_mkl_MKLROOT_LAST_DIR "${LAPACK_mkl_MKLROOT}" NAME) + if(LAPACK_mkl_MKLROOT_LAST_DIR STREQUAL "mkl") + get_filename_component(LAPACK_mkl_MKLROOT "${LAPACK_mkl_MKLROOT}" DIRECTORY) + endif() + endif() + set(LAPACK_mkl_LIB_PATH_SUFFIXES + "compiler/lib" "compiler/lib/${LAPACK_mkl_ARCH_NAME}_${LAPACK_mkl_OS_NAME}" + "mkl/lib" "mkl/lib/${LAPACK_mkl_ARCH_NAME}_${LAPACK_mkl_OS_NAME}" + "lib/${LAPACK_mkl_ARCH_NAME}_${LAPACK_mkl_OS_NAME}") + + # First try empty lapack libs + if(NOT ${_LIBRARIES}) + check_lapack_libraries( + ${_LIBRARIES} + LAPACK + ${LAPACK_mkl_SEARCH_SYMBOL} + "" + "" + "${CMAKE_THREAD_LIBS_INIT};${LAPACK_mkl_LM};${LAPACK_mkl_LDL}" + "${LAPACK_mkl_MKLROOT}" + "${LAPACK_mkl_LIB_PATH_SUFFIXES}" + "${_BLAS_LIBRARIES}" + ) + endif() + + # Then try the search libs + foreach(IT ${LAPACK_SEARCH_LIBS}) + string(REPLACE " " ";" SEARCH_LIBS ${IT}) + if(NOT ${_LIBRARIES}) + check_lapack_libraries( + ${_LIBRARIES} + LAPACK + ${LAPACK_mkl_SEARCH_SYMBOL} + "" + "${SEARCH_LIBS}" + "${CMAKE_THREAD_LIBS_INIT};${LAPACK_mkl_LM};${LAPACK_mkl_LDL}" + "${LAPACK_mkl_MKLROOT}" + "${LAPACK_mkl_LIB_PATH_SUFFIXES}" + "${_BLAS_LIBRARIES}" + ) + endif() + endforeach() + + unset(LAPACK_mkl_ILP_MODE) + unset(LAPACK_mkl_SEARCH_SYMBOL) + unset(LAPACK_mkl_LM) + unset(LAPACK_mkl_LDL) + unset(LAPACK_mkl_MKLROOT) + unset(LAPACK_mkl_ARCH_NAME) + unset(LAPACK_mkl_OS_NAME) + unset(LAPACK_mkl_LIB_PATH_SUFFIXES) + endif() + endif() + endif() + + # gotoblas? (http://www.tacc.utexas.edu/tacc-projects/gotoblas2) + if(BLA_VENDOR STREQUAL "Goto" OR BLA_VENDOR STREQUAL "All") + if(NOT LAPACK_LIBRARIES) + check_lapack_libraries( + LAPACK_LIBRARIES + LAPACK + cheev + "" + "goto2" + "" + "" + "" + "${BLAS_LIBRARIES}" + ) + endif() + endif() + + # OpenBLAS? (http://www.openblas.net) + if(BLA_VENDOR STREQUAL "OpenBLAS" OR BLA_VENDOR STREQUAL "All") + if(NOT LAPACK_LIBRARIES) + check_lapack_libraries( + LAPACK_LIBRARIES + LAPACK + cheev + "" + "openblas" + "" + "" + "" + "${BLAS_LIBRARIES}" + ) + endif() + endif() + + # ArmPL? (https://developer.arm.com/tools-and-software/server-and-hpc/compile/arm-compiler-for-linux/arm-performance-libraries) + if(BLA_VENDOR MATCHES "Arm" OR BLA_VENDOR STREQUAL "All") + + # Check for 64bit Integer support + if(BLA_VENDOR MATCHES "_ilp64") + set(LAPACK_armpl_LIB "armpl_ilp64") + else() + set(LAPACK_armpl_LIB "armpl_lp64") + endif() + + # Check for OpenMP support, VIA BLA_VENDOR of Arm_mp or Arm_ipl64_mp + if(BLA_VENDOR MATCHES "_mp") + set(LAPACK_armpl_LIB "${LAPACK_armpl_LIB}_mp") + endif() + + if(NOT LAPACK_LIBRARIES) + check_lapack_libraries( + LAPACK_LIBRARIES + LAPACK + cheev + "" + "${LAPACK_armpl_LIB}" + "" + "" + "" + "${BLAS_LIBRARIES}" + ) + endif() + endif() + + # FLAME's blis library? (https://github.com/flame/blis) + if(BLA_VENDOR STREQUAL "FLAME" OR BLA_VENDOR STREQUAL "All") + if(NOT LAPACK_LIBRARIES) + check_lapack_libraries( + LAPACK_LIBRARIES + LAPACK + cheev + "" + "flame" + "" + "" + "" + "${BLAS_LIBRARIES}" + ) + endif() + endif() + + # BLAS in acml library? + if(BLA_VENDOR MATCHES "ACML" OR BLA_VENDOR STREQUAL "All") + if(BLAS_LIBRARIES MATCHES ".+acml.+") + set(LAPACK_LIBRARIES ${BLAS_LIBRARIES}) + endif() + endif() + + # Apple LAPACK library? + if(BLA_VENDOR STREQUAL "Apple" OR BLA_VENDOR STREQUAL "All") + if(NOT LAPACK_LIBRARIES) + check_lapack_libraries( + LAPACK_LIBRARIES + LAPACK + cheev + "" + "Accelerate" + "" + "" + "" + "${BLAS_LIBRARIES}" + ) + endif() + endif() + + # Apple NAS (vecLib) library? + if(BLA_VENDOR STREQUAL "NAS" OR BLA_VENDOR STREQUAL "All") + if(NOT LAPACK_LIBRARIES) + check_lapack_libraries( + LAPACK_LIBRARIES + LAPACK + cheev + "" + "vecLib" + "" + "" + "" + "${BLAS_LIBRARIES}" + ) + endif() + endif() + + # Generic LAPACK library? + if(BLA_VENDOR STREQUAL "Generic" OR + BLA_VENDOR STREQUAL "ATLAS" OR + BLA_VENDOR STREQUAL "All") + if(NOT LAPACK_LIBRARIES) + check_lapack_libraries( + LAPACK_LIBRARIES + LAPACK + cheev + "" + "lapack" + "" + "" + "" + "${BLAS_LIBRARIES}" + ) + endif() + endif() +else() + message(STATUS "LAPACK requires BLAS") +endif() + +if(BLA_F95) + if(LAPACK95_LIBRARIES) + set(LAPACK95_FOUND TRUE) + else() + set(LAPACK95_FOUND FALSE) + endif() + if(NOT LAPACK_FIND_QUIETLY) + if(LAPACK95_FOUND) + message(STATUS "A library with LAPACK95 API found.") + else() + if(LAPACK_FIND_REQUIRED) + message(FATAL_ERROR + "A required library with LAPACK95 API not found. Please specify library location." + ) + else() + message(STATUS + "A library with LAPACK95 API not found. Please specify library location." + ) + endif() + endif() + endif() + set(LAPACK_FOUND "${LAPACK95_FOUND}") + set(LAPACK_LIBRARIES "${LAPACK95_LIBRARIES}") +else() + if(LAPACK_LIBRARIES) + set(LAPACK_FOUND TRUE) + else() + set(LAPACK_FOUND FALSE) + endif() + + if(NOT LAPACK_FIND_QUIETLY) + if(LAPACK_FOUND) + message(STATUS "A library with LAPACK API found.") + else() + if(LAPACK_FIND_REQUIRED) + message(FATAL_ERROR + "A required library with LAPACK API not found. Please specify library location." + ) + else() + message(STATUS + "A library with LAPACK API not found. Please specify library location." + ) + endif() + endif() + endif() +endif() + +# On compilers that implicitly link LAPACK (such as ftn, cc, and CC on Cray HPC machines) +# we used a placeholder for empty LAPACK_LIBRARIES to get through our logic above. +if(LAPACK_LIBRARIES STREQUAL "LAPACK_LIBRARIES-PLACEHOLDER-FOR-EMPTY-LIBRARIES") + set(LAPACK_LIBRARIES "") +endif() + +if(NOT TARGET LAPACK::LAPACK) + add_library(LAPACK::LAPACK INTERFACE IMPORTED) + set(_lapack_libs "${LAPACK_LIBRARIES}") + if(_lapack_libs AND TARGET BLAS::BLAS) + # remove the ${BLAS_LIBRARIES} from the interface and replace it + # with the BLAS::BLAS target + list(REMOVE_ITEM _lapack_libs "${BLAS_LIBRARIES}") + endif() + + if(_lapack_libs) + set_target_properties(LAPACK::LAPACK PROPERTIES + INTERFACE_LINK_LIBRARIES "${_lapack_libs}" + ) + endif() + unset(_lapack_libs) +endif() + +cmake_pop_check_state() +set(CMAKE_FIND_LIBRARY_SUFFIXES ${_lapack_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES}) diff --git a/docker/doxygen/Dockerfile b/docker/doxygen/Dockerfile index db051060e..8e15d2e94 100644 --- a/docker/doxygen/Dockerfile +++ b/docker/doxygen/Dockerfile @@ -28,6 +28,8 @@ RUN chmod +x /bin/bash_run \ pkg-config \ cmake-data \ libfftw3-dev \ + libopenblas-dev \ + liblapack-dev \ libpdi-dev \ doxygen \ doxygen-latex \ diff --git a/docker/latest/Dockerfile b/docker/latest/Dockerfile index 08d26435c..8059aaa57 100644 --- a/docker/latest/Dockerfile +++ b/docker/latest/Dockerfile @@ -37,6 +37,8 @@ RUN chmod +x /bin/bash_run \ cmake \ git \ libfftw3-dev \ + libopenblas-dev \ + liblapack-dev \ libhwloc-dev \ libpdi-dev \ pdidev-archive-keyring \ diff --git a/docker/oldest/Dockerfile b/docker/oldest/Dockerfile index 58654dd4b..c4aef391f 100644 --- a/docker/oldest/Dockerfile +++ b/docker/oldest/Dockerfile @@ -40,6 +40,8 @@ RUN chmod +x /bin/bash_run \ git \ libelf-dev \ libfftw3-dev \ + libopenblas-dev \ + liblapack-dev \ libhwloc-dev \ libnuma1 \ libpdi-dev \ diff --git a/include/ddc/kernels/splines/spline_builder.hpp b/include/ddc/kernels/splines/spline_builder.hpp index 09dfb538b..7ee274b6d 100644 --- a/include/ddc/kernels/splines/spline_builder.hpp +++ b/include/ddc/kernels/splines/spline_builder.hpp @@ -623,12 +623,19 @@ operator()( } }); // Create a 2D Kokkos::View to manage spline_tr as a matrix - Kokkos::View bcoef_section( - spline_tr.data_handle(), - ddc::discrete_space().nbasis(), - batch_domain().size()); + std::experimental::mdspan< + double, + std::experimental::extents< + size_t, + std::experimental::dynamic_extent, + std::experimental::dynamic_extent>, + std::experimental::layout_right> + bcoef_section( + spline_tr.data_handle(), + ddc::discrete_space().nbasis(), + batch_domain().size()); // Compute spline coef - matrix->solve_batch_inplace(bcoef_section); + matrix->solve_inplace(bcoef_section); // Transpose back spline_tr in spline ddc::parallel_for_each( exec_space(), From 6717f008a85ec570dc3b89be536a83967d2d07ce Mon Sep 17 00:00:00 2001 From: blegouix Date: Wed, 6 Mar 2024 09:28:40 +0100 Subject: [PATCH 03/28] align a comment --- include/ddc/kernels/splines/spline_builder.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/ddc/kernels/splines/spline_builder.hpp b/include/ddc/kernels/splines/spline_builder.hpp index 7ee274b6d..c20f18804 100644 --- a/include/ddc/kernels/splines/spline_builder.hpp +++ b/include/ddc/kernels/splines/spline_builder.hpp @@ -622,7 +622,7 @@ operator()( = spline(ddc::DiscreteElement(i + offset_proxy), j); } }); - // Create a 2D Kokkos::View to manage spline_tr as a matrix + // Create a mdspan to manage spline_tr as a matrix std::experimental::mdspan< double, std::experimental::extents< From 7d7aed92c4fb53004a7f4c76cd6d0440e72f349e Mon Sep 17 00:00:00 2001 From: blegouix Date: Wed, 6 Mar 2024 13:13:15 +0100 Subject: [PATCH 04/28] forgotten reset --- include/ddc/kernels/splines/spline_builder.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/ddc/kernels/splines/spline_builder.hpp b/include/ddc/kernels/splines/spline_builder.hpp index c20f18804..36a0a7c7e 100644 --- a/include/ddc/kernels/splines/spline_builder.hpp +++ b/include/ddc/kernels/splines/spline_builder.hpp @@ -408,6 +408,7 @@ void SplineBuilder< ddc::discrete_space().nbasis(), cols_per_chunk, preconditionner_max_block_size); + matrix->reset(); build_matrix_system(); @@ -622,7 +623,7 @@ operator()( = spline(ddc::DiscreteElement(i + offset_proxy), j); } }); - // Create a mdspan to manage spline_tr as a matrix + // Create a 2D mdspan to manage spline_tr as a matrix std::experimental::mdspan< double, std::experimental::extents< From 8b49216284fd713f8c669e53502fb1e754ad499a Mon Sep 17 00:00:00 2001 From: blegouix Date: Thu, 7 Mar 2024 11:40:14 +0100 Subject: [PATCH 05/28] minor changes from Thomas' review --- include/ddc/kernels/splines/matrix.hpp | 4 ++-- include/ddc/kernels/splines/matrix_banded.hpp | 2 +- include/ddc/kernels/splines/matrix_center_block.hpp | 2 +- include/ddc/kernels/splines/matrix_corner_block.hpp | 2 +- include/ddc/kernels/splines/matrix_dense.hpp | 2 +- include/ddc/kernels/splines/matrix_pds_banded.hpp | 2 +- include/ddc/kernels/splines/matrix_pds_tridiag.hpp | 2 +- include/ddc/kernels/splines/matrix_periodic_banded.hpp | 2 +- include/ddc/kernels/splines/matrix_sparse.hpp | 2 +- 9 files changed, 10 insertions(+), 10 deletions(-) diff --git a/include/ddc/kernels/splines/matrix.hpp b/include/ddc/kernels/splines/matrix.hpp index 8f5e00f1b..70561e01e 100644 --- a/include/ddc/kernels/splines/matrix.hpp +++ b/include/ddc/kernels/splines/matrix.hpp @@ -24,7 +24,7 @@ class Matrix virtual double get_element(int i, int j) const = 0; - virtual void set_element(int i, int j, double aij) const = 0; + virtual void set_element(int i, int j, double aij) = 0; virtual void factorize() { @@ -90,7 +90,7 @@ class Matrix return bx; } - int KOKKOS_INLINE_FUNCTION get_size() const + int get_size() const { return m_n; } diff --git a/include/ddc/kernels/splines/matrix_banded.hpp b/include/ddc/kernels/splines/matrix_banded.hpp index c080ceb00..549b15f72 100644 --- a/include/ddc/kernels/splines/matrix_banded.hpp +++ b/include/ddc/kernels/splines/matrix_banded.hpp @@ -92,7 +92,7 @@ class Matrix_Banded : public Matrix } } - void set_element(int const i, int const j, double const aij) const override + void set_element(int const i, int const j, double const aij) override { if (i >= std::max(0, j - m_ku) && i < std::min(get_size(), j + m_kl + 1)) { if constexpr (Kokkos::SpaceAccessibility< diff --git a/include/ddc/kernels/splines/matrix_center_block.hpp b/include/ddc/kernels/splines/matrix_center_block.hpp index c38316ad6..6a3d39097 100644 --- a/include/ddc/kernels/splines/matrix_center_block.hpp +++ b/include/ddc/kernels/splines/matrix_center_block.hpp @@ -45,7 +45,7 @@ class Matrix_Center_Block : public Matrix_Corner_Block adjust_indexes(i, j); return Matrix_Corner_Block::get_element(i, j); } - void set_element(int i, int j, double aij) const override + void set_element(int i, int j, double aij) override { adjust_indexes(i, j); Matrix_Corner_Block::set_element(i, j, aij); diff --git a/include/ddc/kernels/splines/matrix_corner_block.hpp b/include/ddc/kernels/splines/matrix_corner_block.hpp index 854e99657..32c1d247b 100644 --- a/include/ddc/kernels/splines/matrix_corner_block.hpp +++ b/include/ddc/kernels/splines/matrix_corner_block.hpp @@ -93,7 +93,7 @@ class Matrix_Corner_Block : public Matrix } } } - virtual void set_element(int const i, int const j, double const aij) const override + virtual void set_element(int const i, int const j, double const aij) override { assert(i >= 0); assert(i < get_size()); diff --git a/include/ddc/kernels/splines/matrix_dense.hpp b/include/ddc/kernels/splines/matrix_dense.hpp index 4e1c71a46..c04cff490 100644 --- a/include/ddc/kernels/splines/matrix_dense.hpp +++ b/include/ddc/kernels/splines/matrix_dense.hpp @@ -57,7 +57,7 @@ class Matrix_Dense : public Matrix } } - void set_element(int const i, int const j, double const aij) const override + void set_element(int const i, int const j, double const aij) override { if constexpr (Kokkos::SpaceAccessibility< Kokkos::DefaultHostExecutionSpace, diff --git a/include/ddc/kernels/splines/matrix_pds_banded.hpp b/include/ddc/kernels/splines/matrix_pds_banded.hpp index 944f161b5..5fa303f5c 100644 --- a/include/ddc/kernels/splines/matrix_pds_banded.hpp +++ b/include/ddc/kernels/splines/matrix_pds_banded.hpp @@ -96,7 +96,7 @@ class Matrix_PDS_Banded : public Matrix } return 0.0; } - void set_element(int i, int j, double const aij) const override + void set_element(int i, int j, double const aij) override { if (i == j) { KOKKOS_IF_ON_HOST( diff --git a/include/ddc/kernels/splines/matrix_pds_tridiag.hpp b/include/ddc/kernels/splines/matrix_pds_tridiag.hpp index 312617845..3e613d0ba 100644 --- a/include/ddc/kernels/splines/matrix_pds_tridiag.hpp +++ b/include/ddc/kernels/splines/matrix_pds_tridiag.hpp @@ -82,7 +82,7 @@ class Matrix_PDS_Tridiag : public Matrix } return 0.0; } - void set_element(int i, int j, double const aij) const override + void set_element(int i, int j, double const aij) override { if (i == j) { if constexpr (Kokkos::SpaceAccessibility< diff --git a/include/ddc/kernels/splines/matrix_periodic_banded.hpp b/include/ddc/kernels/splines/matrix_periodic_banded.hpp index 3125acfbf..c9cad9cb5 100644 --- a/include/ddc/kernels/splines/matrix_periodic_banded.hpp +++ b/include/ddc/kernels/splines/matrix_periodic_banded.hpp @@ -91,7 +91,7 @@ class Matrix_Periodic_Banded : public Matrix_Corner_Block return Matrix_Corner_Block::get_element(i, j); } } - void set_element(int const i, int j, double const aij) const override + void set_element(int const i, int j, double const aij) override { assert(i >= 0); assert(i < get_size()); diff --git a/include/ddc/kernels/splines/matrix_sparse.hpp b/include/ddc/kernels/splines/matrix_sparse.hpp index 53bb3ac03..c8f8a675a 100644 --- a/include/ddc/kernels/splines/matrix_sparse.hpp +++ b/include/ddc/kernels/splines/matrix_sparse.hpp @@ -138,7 +138,7 @@ class Matrix_Sparse : public Matrix return 0; } - void set_element(int i, int j, double aij) const override + void set_element(int i, int j, double aij) override { m_matrix_dense->at(i, j) = aij; } From 354023aef87bc2ef9f356b11becc17c641e3dd78 Mon Sep 17 00:00:00 2001 From: blegouix Date: Thu, 7 Mar 2024 15:08:31 +0100 Subject: [PATCH 06/28] privatize m_n --- include/ddc/kernels/splines/matrix.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/ddc/kernels/splines/matrix.hpp b/include/ddc/kernels/splines/matrix.hpp index 70561e01e..bafb52f6a 100644 --- a/include/ddc/kernels/splines/matrix.hpp +++ b/include/ddc/kernels/splines/matrix.hpp @@ -12,7 +12,7 @@ namespace ddc::detail { class Matrix { -protected: +private: int m_n; public: From 7854cce567609cdd8cf74b0b5b803b6297b4738f Mon Sep 17 00:00:00 2001 From: blegouix Date: Thu, 7 Mar 2024 15:20:52 +0100 Subject: [PATCH 07/28] remove reset --- include/ddc/kernels/splines/matrix.hpp | 2 -- include/ddc/kernels/splines/matrix_banded.hpp | 3 --- include/ddc/kernels/splines/matrix_corner_block.hpp | 5 ----- include/ddc/kernels/splines/matrix_dense.hpp | 3 --- include/ddc/kernels/splines/matrix_pds_banded.hpp | 4 ---- include/ddc/kernels/splines/matrix_pds_tridiag.hpp | 4 ---- include/ddc/kernels/splines/matrix_sparse.hpp | 4 ---- include/ddc/kernels/splines/spline_builder.hpp | 1 - tests/splines/matrix.cpp | 7 ------- 9 files changed, 33 deletions(-) diff --git a/include/ddc/kernels/splines/matrix.hpp b/include/ddc/kernels/splines/matrix.hpp index bafb52f6a..ef6102c95 100644 --- a/include/ddc/kernels/splines/matrix.hpp +++ b/include/ddc/kernels/splines/matrix.hpp @@ -20,8 +20,6 @@ class Matrix virtual ~Matrix() = default; - virtual void reset() const = 0; - virtual double get_element(int i, int j) const = 0; virtual void set_element(int i, int j, double aij) = 0; diff --git a/include/ddc/kernels/splines/matrix_banded.hpp b/include/ddc/kernels/splines/matrix_banded.hpp index 549b15f72..aa2552431 100644 --- a/include/ddc/kernels/splines/matrix_banded.hpp +++ b/include/ddc/kernels/splines/matrix_banded.hpp @@ -65,10 +65,7 @@ class Matrix_Banded : public Matrix * for the superdiagonals. (The kl additional rows are needed for pivoting.) * The term A(i,j) of the full matrix is stored in q(i-j+2*kl+1,j). */ - } - void reset() const override - { Kokkos::deep_copy(m_q, 0.); } diff --git a/include/ddc/kernels/splines/matrix_corner_block.hpp b/include/ddc/kernels/splines/matrix_corner_block.hpp index 32c1d247b..57a6e2755 100644 --- a/include/ddc/kernels/splines/matrix_corner_block.hpp +++ b/include/ddc/kernels/splines/matrix_corner_block.hpp @@ -45,12 +45,7 @@ class Matrix_Corner_Block : public Matrix assert(m_k >= 0); assert(m_k <= get_size()); assert(m_nb == m_q_block->get_size()); - } - virtual void reset() const override - { - m_q_block->reset(); - m_delta->reset(); Kokkos::deep_copy(m_Abm_1_gamma, 0.); Kokkos::deep_copy(m_lambda, 0.); } diff --git a/include/ddc/kernels/splines/matrix_dense.hpp b/include/ddc/kernels/splines/matrix_dense.hpp index c04cff490..ef2f80fc1 100644 --- a/include/ddc/kernels/splines/matrix_dense.hpp +++ b/include/ddc/kernels/splines/matrix_dense.hpp @@ -32,10 +32,7 @@ class Matrix_Dense : public Matrix , m_ipiv("ipiv", mat_size) { assert(mat_size > 0); - } - void reset() const override - { Kokkos::deep_copy(m_a, 0.); } diff --git a/include/ddc/kernels/splines/matrix_pds_banded.hpp b/include/ddc/kernels/splines/matrix_pds_banded.hpp index 5fa303f5c..dfe71bb85 100644 --- a/include/ddc/kernels/splines/matrix_pds_banded.hpp +++ b/include/ddc/kernels/splines/matrix_pds_banded.hpp @@ -44,10 +44,6 @@ class Matrix_PDS_Banded : public Matrix : Matrix(mat_size) , m_kd(kd) , m_q("q", kd + 1, mat_size) - { - } - - void reset() const override { Kokkos::deep_copy(m_q, 0.); } diff --git a/include/ddc/kernels/splines/matrix_pds_tridiag.hpp b/include/ddc/kernels/splines/matrix_pds_tridiag.hpp index 3e613d0ba..a5f69d266 100644 --- a/include/ddc/kernels/splines/matrix_pds_tridiag.hpp +++ b/include/ddc/kernels/splines/matrix_pds_tridiag.hpp @@ -35,10 +35,6 @@ class Matrix_PDS_Tridiag : public Matrix : Matrix(mat_size) , m_d("d", mat_size) , m_l("l", mat_size - 1) - { - } - - void reset() const override { Kokkos::deep_copy(m_d, 0.); Kokkos::deep_copy(m_l, 0.); diff --git a/include/ddc/kernels/splines/matrix_sparse.hpp b/include/ddc/kernels/splines/matrix_sparse.hpp index c8f8a675a..853c59860 100644 --- a/include/ddc/kernels/splines/matrix_sparse.hpp +++ b/include/ddc/kernels/splines/matrix_sparse.hpp @@ -123,10 +123,7 @@ class Matrix_Sparse : public Matrix m_matrix_dense = gko::matrix::Dense< double>::create(gko_exec->get_master(), gko::dim<2>(mat_size, mat_size)); m_matrix_sparse = matrix_sparse_type::create(gko_exec, gko::dim<2>(mat_size, mat_size)); - } - void reset() const override - { m_matrix_dense->fill(0); m_matrix_sparse->clear(); } @@ -148,7 +145,6 @@ class Matrix_Sparse : public Matrix // Remove zeros gko::matrix_data matrix_data(gko::dim<2>(get_size(), get_size())); m_matrix_dense->write(matrix_data); - m_matrix_dense.reset(); matrix_data.remove_zeros(); m_matrix_sparse->read(matrix_data); std::shared_ptr const gko_exec = m_matrix_sparse->get_executor(); diff --git a/include/ddc/kernels/splines/spline_builder.hpp b/include/ddc/kernels/splines/spline_builder.hpp index 36a0a7c7e..dd6464772 100644 --- a/include/ddc/kernels/splines/spline_builder.hpp +++ b/include/ddc/kernels/splines/spline_builder.hpp @@ -408,7 +408,6 @@ void SplineBuilder< ddc::discrete_space().nbasis(), cols_per_chunk, preconditionner_max_block_size); - matrix->reset(); build_matrix_system(); diff --git a/tests/splines/matrix.cpp b/tests/splines/matrix.cpp index 95ba9435a..7494605d8 100644 --- a/tests/splines/matrix.cpp +++ b/tests/splines/matrix.cpp @@ -79,7 +79,6 @@ TEST_P(MatrixSizesFixture, PositiveDefiniteSymmetric) auto const [N, k] = GetParam(); std::unique_ptr matrix = ddc::detail::MatrixMaker::make_new_banded< Kokkos::DefaultHostExecutionSpace>(N, k, k, true); - matrix->reset(); for (std::size_t i(0); i < N; ++i) { matrix->set_element(i, i, 2.0 * k); @@ -107,7 +106,6 @@ TEST_P(MatrixSizesFixture, OffsetBanded) auto const [N, k] = GetParam(); std::unique_ptr matrix = ddc::detail::MatrixMaker::make_new_banded< Kokkos::DefaultHostExecutionSpace>(N, 0, 2 * k, true); - matrix->reset(); for (std::size_t i(0); i < N; ++i) { for (std::size_t j(i); j < std::min(N, i + k); ++j) { @@ -143,7 +141,6 @@ TEST_P(MatrixSizesFixture, PeriodicBanded) std::unique_ptr matrix = ddc::detail::MatrixMaker::make_new_periodic_banded< Kokkos::DefaultHostExecutionSpace>(N, k - s, k + s, false); - matrix->reset(); for (std::size_t i(0); i < N; ++i) { for (std::size_t j(0); j < N; ++j) { std::ptrdiff_t diag = ddc::detail::modulo((int)(j - i), (int)N); @@ -174,7 +171,6 @@ TEST_P(MatrixSizesFixture, PositiveDefiniteSymmetricTranspose) auto const [N, k] = GetParam(); std::unique_ptr matrix = ddc::detail::MatrixMaker::make_new_banded< Kokkos::DefaultHostExecutionSpace>(N, k, k, true); - matrix->reset(); for (std::size_t i(0); i < N; ++i) { matrix->set_element(i, i, 2.0 * k); @@ -205,7 +201,6 @@ TEST_P(MatrixSizesFixture, OffsetBandedTranspose) auto const [N, k] = GetParam(); std::unique_ptr matrix = ddc::detail::MatrixMaker::make_new_banded< Kokkos::DefaultHostExecutionSpace>(N, 0, 2 * k, true); - matrix->reset(); for (std::size_t i(0); i < N; ++i) { for (std::size_t j(i); j < std::min(N, i + k); ++j) { @@ -244,7 +239,6 @@ TEST_P(MatrixSizesFixture, PeriodicBandedTranspose) std::unique_ptr matrix = ddc::detail::MatrixMaker::make_new_periodic_banded< Kokkos::DefaultHostExecutionSpace>(N, k - s, k + s, false); - matrix->reset(); for (std::size_t i(0); i < N; ++i) { for (std::size_t j(0); j < N; ++j) { int diag = ddc::detail::modulo((int)(j - i), (int)N); @@ -280,7 +274,6 @@ TEST_P(MatrixSizesFixture, Sparse) std::unique_ptr matrix = ddc::detail::MatrixMaker::make_new_sparse(N); - matrix->reset(); std::vector val_ptr(N * N); ddc::DSpan2D val(val_ptr.data(), N, N); From a2bcd08478b033e963d080cee4b113075653ab3b Mon Sep 17 00:00:00 2001 From: blegouix Date: Fri, 8 Mar 2024 16:31:54 +0100 Subject: [PATCH 08/28] wip (problem with assert) --- include/ddc/kernels/splines/matrix.hpp | 31 +++++-------------- include/ddc/kernels/splines/matrix_banded.hpp | 12 +++---- .../kernels/splines/matrix_corner_block.hpp | 14 ++++----- include/ddc/kernels/splines/matrix_dense.hpp | 12 +++---- .../ddc/kernels/splines/matrix_pds_banded.hpp | 9 ++++-- .../kernels/splines/matrix_pds_tridiag.hpp | 9 ++++-- include/ddc/kernels/splines/matrix_sparse.hpp | 12 +++---- 7 files changed, 44 insertions(+), 55 deletions(-) diff --git a/include/ddc/kernels/splines/matrix.hpp b/include/ddc/kernels/splines/matrix.hpp index ef6102c95..7bf13c9d4 100644 --- a/include/ddc/kernels/splines/matrix.hpp +++ b/include/ddc/kernels/splines/matrix.hpp @@ -44,26 +44,22 @@ class Matrix virtual ddc::DSpan1D solve_inplace(ddc::DSpan1D const b) const { - ddc::DSpan2D_left b_left(b.data_handle(), b.extent(0), 1); - solve_inplace(b_left); + ddc::DSpan2D b_2d(b.data_handle(), b.extent(0), 1); + solve_inplace(b_2d); return b; } virtual ddc::DSpan1D solve_transpose_inplace(ddc::DSpan1D const b) const { - ddc::DSpan2D_left b_left(b.data_handle(), b.extent(0), 1); - solve_transpose_inplace(b_left); + ddc::DSpan2D b_2d(b.data_handle(), b.extent(0), 1); + solve_transpose_inplace(b_2d); return b; } virtual ddc::DSpan2D_stride solve_inplace(ddc::DSpan2D_stride const bx) const { assert(int(bx.extent(0)) == m_n); - int const info = solve_inplace_method( - bx.data_handle(), - 'N', - bx.extent(1), - std::max(bx.stride(0), bx.stride(1))); + int const info = solve_inplace_method(bx, 'N'); if (info < 0) { std::cerr << -info << "-th argument had an illegal value" << std::endl; @@ -75,11 +71,7 @@ class Matrix virtual ddc::DSpan2D_stride solve_transpose_inplace(ddc::DSpan2D_stride const bx) const { assert(int(bx.extent(0)) == m_n); - int const info = solve_inplace_method( - bx.data_handle(), - 'T', - bx.extent(1), - std::max(bx.stride(0), bx.stride(1))); + int const info = solve_inplace_method(bx, 'T'); if (info < 0) { std::cerr << -info << "-th argument had an illegal value" << std::endl; @@ -108,16 +100,7 @@ class Matrix protected: virtual int factorize_method() = 0; - virtual int solve_inplace_method( - double* const b, - char const transpose, - int const n_equations, - int const stride) const = 0; - - int solve_inplace_method(double* const b, char const transpose, int const n_equations) const - { - return solve_inplace_method(b, transpose, n_equations, get_size()); - }; + virtual int solve_inplace_method(ddc::DSpan2D_stride const b, char const transpose) const = 0; }; } // namespace ddc::detail diff --git a/include/ddc/kernels/splines/matrix_banded.hpp b/include/ddc/kernels/splines/matrix_banded.hpp index aa2552431..6740f5d95 100644 --- a/include/ddc/kernels/splines/matrix_banded.hpp +++ b/include/ddc/kernels/splines/matrix_banded.hpp @@ -119,16 +119,16 @@ class Matrix_Banded : public Matrix Kokkos::deep_copy(m_ipiv, ipiv_host); return info; } - int solve_inplace_method( - double* b, - char const transpose, - int const n_equations, - int const stride) const override + int solve_inplace_method(ddc::DSpan2D_stride b, char const transpose) const override { + assert(b.stride(0) == 1 || b.extent(1) == 1); + int const n_equations = b.extent(1); + int const stride = b.stride(1); + auto q_host = create_mirror_view_and_copy(Kokkos::DefaultHostExecutionSpace(), m_q); auto ipiv_host = create_mirror_view_and_copy(Kokkos::DefaultHostExecutionSpace(), m_ipiv); Kokkos::View - b_view(b, Kokkos::LayoutStride(get_size(), 1, n_equations, stride)); + b_view(b.data_handle(), Kokkos::LayoutStride(get_size(), 1, n_equations, stride)); auto b_host = create_mirror_view(Kokkos::DefaultHostExecutionSpace(), b_view); for (int i = 0; i < n_equations; ++i) { Kokkos::deep_copy( diff --git a/include/ddc/kernels/splines/matrix_corner_block.hpp b/include/ddc/kernels/splines/matrix_corner_block.hpp index 57a6e2755..be34198f2 100644 --- a/include/ddc/kernels/splines/matrix_corner_block.hpp +++ b/include/ddc/kernels/splines/matrix_corner_block.hpp @@ -273,12 +273,12 @@ class Matrix_Corner_Block : public Matrix { return 0; } - virtual int solve_inplace_method( - double* const b, - char const transpose, - int const n_equations, - int const stride) const override + virtual int solve_inplace_method(ddc::DSpan2D_stride b, char const transpose) const override { + assert(b.stride(0) == 1 || b.extent(1) == 1); + int const n_equations = b.extent(1); + int const stride = b.stride(1); + std::experimental::layout_stride::mapping {1, (std::size_t)stride}}; - ddc::DSpan2D_stride const u(b, layout_mapping_u); - ddc::DSpan2D_stride const v(b + m_nb, layout_mapping_v); + ddc::DSpan2D_stride const u(b.data_handle(), layout_mapping_u); + ddc::DSpan2D_stride const v(b.data_handle() + m_nb, layout_mapping_v); if (transpose == 'N') { m_q_block->solve_inplace(u); diff --git a/include/ddc/kernels/splines/matrix_dense.hpp b/include/ddc/kernels/splines/matrix_dense.hpp index ef2f80fc1..86471a037 100644 --- a/include/ddc/kernels/splines/matrix_dense.hpp +++ b/include/ddc/kernels/splines/matrix_dense.hpp @@ -81,16 +81,16 @@ class Matrix_Dense : public Matrix return info; } - int solve_inplace_method( - double* const b, - char const transpose, - int const n_equations, - int const stride) const override + int solve_inplace_method(ddc::DSpan2D_stride b, char const transpose) const override { + assert(b.stride(0) == 1 || b.extent(1) == 1); + int const n_equations = b.extent(1); + int const stride = b.stride(1); + auto a_host = create_mirror_view_and_copy(Kokkos::DefaultHostExecutionSpace(), m_a); auto ipiv_host = create_mirror_view_and_copy(Kokkos::DefaultHostExecutionSpace(), m_ipiv); Kokkos::View - b_view(b, Kokkos::LayoutStride(get_size(), 1, n_equations, stride)); + b_view(b.data_handle(), Kokkos::LayoutStride(get_size(), 1, n_equations, stride)); auto b_host = create_mirror_view(Kokkos::DefaultHostExecutionSpace(), b_view); for (int i = 0; i < n_equations; ++i) { Kokkos::deep_copy( diff --git a/include/ddc/kernels/splines/matrix_pds_banded.hpp b/include/ddc/kernels/splines/matrix_pds_banded.hpp index dfe71bb85..4b2997110 100644 --- a/include/ddc/kernels/splines/matrix_pds_banded.hpp +++ b/include/ddc/kernels/splines/matrix_pds_banded.hpp @@ -149,12 +149,15 @@ class Matrix_PDS_Banded : public Matrix return info; } - int solve_inplace_method(double* const b, char const, int const n_equations, int const stride) - const override + int solve_inplace_method(ddc::DSpan2D_stride b, char const) const override { + assert(b.stride(0) == 1 || b.extent(1) == 1); + int const n_equations = b.extent(1); + int const stride = b.stride(1); + auto q_host = create_mirror_view_and_copy(Kokkos::DefaultHostExecutionSpace(), m_q); Kokkos::View - b_view(b, Kokkos::LayoutStride(get_size(), 1, n_equations, stride)); + b_view(b.data_handle(), Kokkos::LayoutStride(get_size(), 1, n_equations, stride)); auto b_host = create_mirror_view(Kokkos::DefaultHostExecutionSpace(), b_view); for (int i = 0; i < n_equations; ++i) { Kokkos::deep_copy( diff --git a/include/ddc/kernels/splines/matrix_pds_tridiag.hpp b/include/ddc/kernels/splines/matrix_pds_tridiag.hpp index a5f69d266..dff5ef983 100644 --- a/include/ddc/kernels/splines/matrix_pds_tridiag.hpp +++ b/include/ddc/kernels/splines/matrix_pds_tridiag.hpp @@ -128,13 +128,16 @@ class Matrix_PDS_Tridiag : public Matrix return info; } - int solve_inplace_method(double* const b, char const, int const n_equations, int const stride) - const override + int solve_inplace_method(ddc::DSpan2D_stride b, char const) const override { + assert(b.stride(0) == 1 || b.extent(1) == 1); + int const n_equations = b.extent(1); + int const stride = b.stride(1); + auto d_host = create_mirror_view_and_copy(Kokkos::DefaultHostExecutionSpace(), m_d); auto l_host = create_mirror_view_and_copy(Kokkos::DefaultHostExecutionSpace(), m_l); Kokkos::View - b_view(b, Kokkos::LayoutStride(get_size(), 1, n_equations, stride)); + b_view(b.data_handle(), Kokkos::LayoutStride(get_size(), 1, n_equations, stride)); auto b_host = create_mirror_view(Kokkos::DefaultHostExecutionSpace(), b_view); for (int i = 0; i < n_equations; ++i) { Kokkos::deep_copy( diff --git a/include/ddc/kernels/splines/matrix_sparse.hpp b/include/ddc/kernels/splines/matrix_sparse.hpp index 853c59860..8143e9cf6 100644 --- a/include/ddc/kernels/splines/matrix_sparse.hpp +++ b/include/ddc/kernels/splines/matrix_sparse.hpp @@ -175,18 +175,18 @@ class Matrix_Sparse : public Matrix return 0; } - int solve_inplace_method( - double* const b, - char const transpose, - int const n_equations, - int const stride) const override + int solve_inplace_method(ddc::DSpan2D_stride b, char const transpose) const override { + assert(b.stride(1) == 1 || b.extent(1) == 1); + int const n_equations = b.extent(1); + int const stride = b.stride(0); + std::shared_ptr const gko_exec = m_solver->get_executor(); int const main_chunk_size = std::min(m_cols_per_chunk, n_equations); Kokkos::View const - b_view(b, Kokkos::LayoutStride(get_size(), stride, n_equations, 1)); + b_view(b.data_handle(), Kokkos::LayoutStride(get_size(), stride, n_equations, 1)); Kokkos::View const x_view("", Kokkos::LayoutStride(get_size(), stride, main_chunk_size, 1)); From e5f32491dee8ecd7f8c34d426346a44994dcc941 Mon Sep 17 00:00:00 2001 From: blegouix Date: Fri, 8 Mar 2024 17:58:10 +0100 Subject: [PATCH 09/28] replace double* with strided mdspan in argument of solve_inplace_method and revert the "layout logic" of the tests --- include/ddc/kernels/splines/matrix.hpp | 4 +- include/ddc/kernels/splines/matrix_banded.hpp | 2 +- .../kernels/splines/matrix_corner_block.hpp | 3 +- include/ddc/kernels/splines/matrix_dense.hpp | 2 +- .../ddc/kernels/splines/matrix_pds_banded.hpp | 2 +- .../kernels/splines/matrix_pds_tridiag.hpp | 2 +- tests/splines/matrix.cpp | 38 +++++++++---------- 7 files changed, 27 insertions(+), 26 deletions(-) diff --git a/include/ddc/kernels/splines/matrix.hpp b/include/ddc/kernels/splines/matrix.hpp index 7bf13c9d4..c7c40517b 100644 --- a/include/ddc/kernels/splines/matrix.hpp +++ b/include/ddc/kernels/splines/matrix.hpp @@ -44,14 +44,14 @@ class Matrix virtual ddc::DSpan1D solve_inplace(ddc::DSpan1D const b) const { - ddc::DSpan2D b_2d(b.data_handle(), b.extent(0), 1); + ddc::DSpan2D_left b_2d(b.data_handle(), b.extent(0), 1); solve_inplace(b_2d); return b; } virtual ddc::DSpan1D solve_transpose_inplace(ddc::DSpan1D const b) const { - ddc::DSpan2D b_2d(b.data_handle(), b.extent(0), 1); + ddc::DSpan2D_left b_2d(b.data_handle(), b.extent(0), 1); solve_transpose_inplace(b_2d); return b; } diff --git a/include/ddc/kernels/splines/matrix_banded.hpp b/include/ddc/kernels/splines/matrix_banded.hpp index 6740f5d95..0d8d25376 100644 --- a/include/ddc/kernels/splines/matrix_banded.hpp +++ b/include/ddc/kernels/splines/matrix_banded.hpp @@ -121,7 +121,7 @@ class Matrix_Banded : public Matrix } int solve_inplace_method(ddc::DSpan2D_stride b, char const transpose) const override { - assert(b.stride(0) == 1 || b.extent(1) == 1); + assert(b.stride(0) == 1); int const n_equations = b.extent(1); int const stride = b.stride(1); diff --git a/include/ddc/kernels/splines/matrix_corner_block.hpp b/include/ddc/kernels/splines/matrix_corner_block.hpp index be34198f2..a8af6ce1a 100644 --- a/include/ddc/kernels/splines/matrix_corner_block.hpp +++ b/include/ddc/kernels/splines/matrix_corner_block.hpp @@ -275,7 +275,8 @@ class Matrix_Corner_Block : public Matrix } virtual int solve_inplace_method(ddc::DSpan2D_stride b, char const transpose) const override { - assert(b.stride(0) == 1 || b.extent(1) == 1); + std::cout << b.stride(0) << "\n"; + assert(b.stride(0) == 1); int const n_equations = b.extent(1); int const stride = b.stride(1); diff --git a/include/ddc/kernels/splines/matrix_dense.hpp b/include/ddc/kernels/splines/matrix_dense.hpp index 86471a037..22d3ce1b1 100644 --- a/include/ddc/kernels/splines/matrix_dense.hpp +++ b/include/ddc/kernels/splines/matrix_dense.hpp @@ -83,7 +83,7 @@ class Matrix_Dense : public Matrix int solve_inplace_method(ddc::DSpan2D_stride b, char const transpose) const override { - assert(b.stride(0) == 1 || b.extent(1) == 1); + assert(b.stride(0) == 1); int const n_equations = b.extent(1); int const stride = b.stride(1); diff --git a/include/ddc/kernels/splines/matrix_pds_banded.hpp b/include/ddc/kernels/splines/matrix_pds_banded.hpp index 4b2997110..72d50993a 100644 --- a/include/ddc/kernels/splines/matrix_pds_banded.hpp +++ b/include/ddc/kernels/splines/matrix_pds_banded.hpp @@ -151,7 +151,7 @@ class Matrix_PDS_Banded : public Matrix int solve_inplace_method(ddc::DSpan2D_stride b, char const) const override { - assert(b.stride(0) == 1 || b.extent(1) == 1); + assert(b.stride(0) == 1); int const n_equations = b.extent(1); int const stride = b.stride(1); diff --git a/include/ddc/kernels/splines/matrix_pds_tridiag.hpp b/include/ddc/kernels/splines/matrix_pds_tridiag.hpp index dff5ef983..09f676ea7 100644 --- a/include/ddc/kernels/splines/matrix_pds_tridiag.hpp +++ b/include/ddc/kernels/splines/matrix_pds_tridiag.hpp @@ -130,7 +130,7 @@ class Matrix_PDS_Tridiag : public Matrix int solve_inplace_method(ddc::DSpan2D_stride b, char const) const override { - assert(b.stride(0) == 1 || b.extent(1) == 1); + assert(b.stride(0) == 1); int const n_equations = b.extent(1); int const stride = b.stride(1); diff --git a/tests/splines/matrix.cpp b/tests/splines/matrix.cpp index 7494605d8..bcf53e8cd 100644 --- a/tests/splines/matrix.cpp +++ b/tests/splines/matrix.cpp @@ -15,7 +15,7 @@ namespace { -void fill_identity(ddc::DSpan2D mat) +void fill_identity(ddc::DSpan2D_stride mat) { assert(mat.extent(0) == mat.extent(1)); for (std::size_t i(0); i < mat.extent(0); ++i) { @@ -25,7 +25,7 @@ void fill_identity(ddc::DSpan2D mat) } } -void copy_matrix(ddc::DSpan2D copy, std::unique_ptr& mat) +void copy_matrix(ddc::DSpan2D_stride copy, std::unique_ptr& mat) { assert(mat->get_size() == int(copy.extent(0))); assert(mat->get_size() == int(copy.extent(1))); @@ -37,7 +37,7 @@ void copy_matrix(ddc::DSpan2D copy, std::unique_ptr& mat) } } -void check_inverse(ddc::DSpan2D matrix, ddc::DSpan2D inv) +void check_inverse(ddc::DSpan2D_stride matrix, ddc::DSpan2D_stride inv) { double TOL = 1e-10; std::size_t N = matrix.extent(0); @@ -46,14 +46,14 @@ void check_inverse(ddc::DSpan2D matrix, ddc::DSpan2D inv) for (std::size_t j(0); j < N; ++j) { double id_val = 0.0; for (std::size_t k(0); k < N; ++k) { - id_val += matrix(i, k) * inv(j, k); + id_val += matrix(i, k) * inv(k, j); } EXPECT_NEAR(id_val, static_cast(i == j), TOL); } } } -void check_inverse_transpose(ddc::DSpan2D matrix, ddc::DSpan2D inv) +void check_inverse_transpose(ddc::DSpan2D_stride matrix, ddc::DSpan2D_stride inv) { double TOL = 1e-10; std::size_t N = matrix.extent(0); @@ -62,7 +62,7 @@ void check_inverse_transpose(ddc::DSpan2D matrix, ddc::DSpan2D inv) for (std::size_t j(0); j < N; ++j) { double id_val = 0.0; for (std::size_t k(0); k < N; ++k) { - id_val += matrix(i, k) * inv(k, j); + id_val += matrix(i, k) * inv(j, k); } EXPECT_NEAR(id_val, static_cast(i == j), TOL); } @@ -90,11 +90,11 @@ TEST_P(MatrixSizesFixture, PositiveDefiniteSymmetric) } } std::vector val_ptr(N * N); - ddc::DSpan2D val(val_ptr.data(), N, N); + ddc::DSpan2D_left val(val_ptr.data(), N, N); copy_matrix(val, matrix); std::vector inv_ptr(N * N); - ddc::DSpan2D inv(inv_ptr.data(), N, N); + ddc::DSpan2D_left inv(inv_ptr.data(), N, N); fill_identity(inv); matrix->factorize(); matrix->solve_inplace(inv); @@ -119,11 +119,11 @@ TEST_P(MatrixSizesFixture, OffsetBanded) } } std::vector val_ptr(N * N); - ddc::DSpan2D val(val_ptr.data(), N, N); + ddc::DSpan2D_left val(val_ptr.data(), N, N); copy_matrix(val, matrix); std::vector inv_ptr(N * N); - ddc::DSpan2D inv(inv_ptr.data(), N, N); + ddc::DSpan2D_left inv(inv_ptr.data(), N, N); fill_identity(inv); matrix->factorize(); matrix->solve_inplace(inv); @@ -154,11 +154,11 @@ TEST_P(MatrixSizesFixture, PeriodicBanded) } } std::vector val_ptr(N * N); - ddc::DSpan2D val(val_ptr.data(), N, N); + ddc::DSpan2D_left val(val_ptr.data(), N, N); copy_matrix(val, matrix); std::vector inv_ptr(N * N); - ddc::DSpan2D inv(inv_ptr.data(), N, N); + ddc::DSpan2D_left inv(inv_ptr.data(), N, N); fill_identity(inv); matrix->factorize(); matrix->solve_inplace(inv); @@ -182,11 +182,11 @@ TEST_P(MatrixSizesFixture, PositiveDefiniteSymmetricTranspose) } } std::vector val_ptr(N * N); - ddc::DSpan2D val(val_ptr.data(), N, N); + ddc::DSpan2D_left val(val_ptr.data(), N, N); copy_matrix(val, matrix); std::vector inv_ptr(N * N); - ddc::DSpan2D inv(inv_ptr.data(), N, N); + ddc::DSpan2D_left inv(inv_ptr.data(), N, N); fill_identity(inv); matrix->factorize(); for (std::size_t i(0); i < N; ++i) { @@ -214,11 +214,11 @@ TEST_P(MatrixSizesFixture, OffsetBandedTranspose) } } std::vector val_ptr(N * N); - ddc::DSpan2D val(val_ptr.data(), N, N); + ddc::DSpan2D_left val(val_ptr.data(), N, N); copy_matrix(val, matrix); std::vector inv_ptr(N * N); - ddc::DSpan2D inv(inv_ptr.data(), N, N); + ddc::DSpan2D_left inv(inv_ptr.data(), N, N); fill_identity(inv); matrix->factorize(); for (std::size_t i(0); i < N; ++i) { @@ -252,11 +252,11 @@ TEST_P(MatrixSizesFixture, PeriodicBandedTranspose) } } std::vector val_ptr(N * N); - ddc::DSpan2D val(val_ptr.data(), N, N); + ddc::DSpan2D_left val(val_ptr.data(), N, N); copy_matrix(val, matrix); std::vector inv_ptr(N * N); - ddc::DSpan2D inv(inv_ptr.data(), N, N); + ddc::DSpan2D_left inv(inv_ptr.data(), N, N); fill_identity(inv); matrix->factorize(); for (std::size_t i(0); i < N; ++i) { @@ -276,7 +276,7 @@ TEST_P(MatrixSizesFixture, Sparse) = ddc::detail::MatrixMaker::make_new_sparse(N); std::vector val_ptr(N * N); - ddc::DSpan2D val(val_ptr.data(), N, N); + ddc::DSpan2D_left val(val_ptr.data(), N, N); for (std::size_t i(0); i < N; ++i) { for (std::size_t j(0); j < N; ++j) { if (i == j) { From 40d16f26799f7b2ee999d0bce068ce85b19fdb20 Mon Sep 17 00:00:00 2001 From: blegouix Date: Fri, 8 Mar 2024 18:02:54 +0100 Subject: [PATCH 10/28] remove debug line --- include/ddc/kernels/splines/matrix_corner_block.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/include/ddc/kernels/splines/matrix_corner_block.hpp b/include/ddc/kernels/splines/matrix_corner_block.hpp index a8af6ce1a..f01423e7c 100644 --- a/include/ddc/kernels/splines/matrix_corner_block.hpp +++ b/include/ddc/kernels/splines/matrix_corner_block.hpp @@ -275,7 +275,6 @@ class Matrix_Corner_Block : public Matrix } virtual int solve_inplace_method(ddc::DSpan2D_stride b, char const transpose) const override { - std::cout << b.stride(0) << "\n"; assert(b.stride(0) == 1); int const n_equations = b.extent(1); int const stride = b.stride(1); From 14b241bd88722e0377a7d648175eb32d65684797 Mon Sep 17 00:00:00 2001 From: Baptiste Legouix Date: Fri, 8 Mar 2024 18:04:54 +0100 Subject: [PATCH 11/28] Update include/ddc/kernels/splines/matrix_banded.hpp Co-authored-by: Thomas Padioleau --- include/ddc/kernels/splines/matrix_banded.hpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/include/ddc/kernels/splines/matrix_banded.hpp b/include/ddc/kernels/splines/matrix_banded.hpp index 0d8d25376..a88b48aba 100644 --- a/include/ddc/kernels/splines/matrix_banded.hpp +++ b/include/ddc/kernels/splines/matrix_banded.hpp @@ -57,14 +57,14 @@ class Matrix_Banded : public Matrix assert(m_ku <= mat_size); /* - * Given the linear system A*x=b, we assume that A is a square (n by n) - * with ku super-diagonals and kl sub-diagonals. - * All non-zero elements of A are stored in the rectangular matrix q, using - * the format required by DGBTRF (LAPACK): diagonals of A are rows of q. - * q has 2*kl rows for the subdiagonals, 1 row for the diagonal, and ku rows - * for the superdiagonals. (The kl additional rows are needed for pivoting.) - * The term A(i,j) of the full matrix is stored in q(i-j+2*kl+1,j). - */ + * Given the linear system A*x=b, we assume that A is a square (n by n) + * with ku super-diagonals and kl sub-diagonals. + * All non-zero elements of A are stored in the rectangular matrix q, using + * the format required by DGBTRF (LAPACK): diagonals of A are rows of q. + * q has 2*kl rows for the subdiagonals, 1 row for the diagonal, and ku rows + * for the superdiagonals. (The kl additional rows are needed for pivoting.) + * The term A(i,j) of the full matrix is stored in q(i-j+2*kl+1,j). + */ Kokkos::deep_copy(m_q, 0.); } From 44533fbca0f32db766f000033f2c74fe009240e5 Mon Sep 17 00:00:00 2001 From: blegouix Date: Tue, 12 Mar 2024 11:02:05 +0100 Subject: [PATCH 12/28] /!\ matrixes data moved to host --- include/ddc/kernels/splines/matrix_banded.hpp | 40 ++-------- .../kernels/splines/matrix_corner_block.hpp | 75 +++++-------------- include/ddc/kernels/splines/matrix_dense.hpp | 40 ++-------- .../ddc/kernels/splines/matrix_pds_banded.hpp | 73 ++---------------- .../kernels/splines/matrix_pds_tridiag.hpp | 62 ++------------- .../splines/matrix_periodic_banded.hpp | 66 +++------------- 6 files changed, 60 insertions(+), 296 deletions(-) diff --git a/include/ddc/kernels/splines/matrix_banded.hpp b/include/ddc/kernels/splines/matrix_banded.hpp index a88b48aba..7b5bd7417 100644 --- a/include/ddc/kernels/splines/matrix_banded.hpp +++ b/include/ddc/kernels/splines/matrix_banded.hpp @@ -37,9 +37,9 @@ class Matrix_Banded : public Matrix int const m_kl; // no. of subdiagonals int const m_ku; // no. of superdiagonals int const m_c; // no. of columns in q - Kokkos::View m_ipiv; // pivot indices + Kokkos::View m_ipiv; // pivot indices // TODO: double** - Kokkos::View m_q; // banded matrix representation + Kokkos::View m_q; // banded matrix representation public: Matrix_Banded(int const mat_size, int const kl, int const ku) @@ -72,18 +72,7 @@ class Matrix_Banded : public Matrix double get_element(int const i, int const j) const override { if (i >= std::max(0, j - m_ku) && i < std::min(get_size(), j + m_kl + 1)) { - if constexpr (Kokkos::SpaceAccessibility< - Kokkos::DefaultHostExecutionSpace, - typename ExecSpace::memory_space>::accessible) { - return m_q(j * m_c + m_kl + m_ku + i - j); - } else { - // Inefficient, usage is strongly discouraged - double aij; - Kokkos::deep_copy( - Kokkos::View(&aij), - Kokkos::subview(m_q, j * m_c + m_kl + m_ku + i - j)); - return aij; - } + return m_q(j * m_c + m_kl + m_ku + i - j); } else { return 0.0; } @@ -92,16 +81,7 @@ class Matrix_Banded : public Matrix void set_element(int const i, int const j, double const aij) override { if (i >= std::max(0, j - m_ku) && i < std::min(get_size(), j + m_kl + 1)) { - if constexpr (Kokkos::SpaceAccessibility< - Kokkos::DefaultHostExecutionSpace, - typename ExecSpace::memory_space>::accessible) { - m_q(j * m_c + m_kl + m_ku + i - j) = aij; - } else { - // Inefficient, usage is strongly discouraged - Kokkos::deep_copy( - Kokkos::subview(m_q, j * m_c + m_kl + m_ku + i - j), - Kokkos::View(&aij)); - } + m_q(j * m_c + m_kl + m_ku + i - j) = aij; } else { assert(std::fabs(aij) < 1e-20); } @@ -110,13 +90,9 @@ class Matrix_Banded : public Matrix protected: int factorize_method() override { - auto q_host = create_mirror_view_and_copy(Kokkos::DefaultHostExecutionSpace(), m_q); - auto ipiv_host = create_mirror_view(Kokkos::DefaultHostExecutionSpace(), m_ipiv); int info; int const n = get_size(); - dgbtrf_(&n, &n, &m_kl, &m_ku, q_host.data(), &m_c, ipiv_host.data(), &info); - Kokkos::deep_copy(m_q, q_host); - Kokkos::deep_copy(m_ipiv, ipiv_host); + dgbtrf_(&n, &n, &m_kl, &m_ku, m_q.data(), &m_c, m_ipiv.data(), &info); return info; } int solve_inplace_method(ddc::DSpan2D_stride b, char const transpose) const override @@ -125,8 +101,6 @@ class Matrix_Banded : public Matrix int const n_equations = b.extent(1); int const stride = b.stride(1); - auto q_host = create_mirror_view_and_copy(Kokkos::DefaultHostExecutionSpace(), m_q); - auto ipiv_host = create_mirror_view_and_copy(Kokkos::DefaultHostExecutionSpace(), m_ipiv); Kokkos::View b_view(b.data_handle(), Kokkos::LayoutStride(get_size(), 1, n_equations, stride)); auto b_host = create_mirror_view(Kokkos::DefaultHostExecutionSpace(), b_view); @@ -142,9 +116,9 @@ class Matrix_Banded : public Matrix &m_kl, &m_ku, &n_equations, - q_host.data(), + m_q.data(), &m_c, - ipiv_host.data(), + m_ipiv.data(), b_host.data(), &stride, &info); diff --git a/include/ddc/kernels/splines/matrix_corner_block.hpp b/include/ddc/kernels/splines/matrix_corner_block.hpp index f01423e7c..49b8f487c 100644 --- a/include/ddc/kernels/splines/matrix_corner_block.hpp +++ b/include/ddc/kernels/splines/matrix_corner_block.hpp @@ -28,8 +28,8 @@ class Matrix_Corner_Block : public Matrix //------------------------------------- std::shared_ptr m_q_block; std::shared_ptr> m_delta; - Kokkos::View m_Abm_1_gamma; - Kokkos::View m_lambda; + Kokkos::View m_Abm_1_gamma; + Kokkos::View m_lambda; public: Matrix_Corner_Block(int const n, int const k, std::unique_ptr q) @@ -61,31 +61,9 @@ class Matrix_Corner_Block : public Matrix } else if (i >= m_nb && j >= m_nb) { return m_delta->get_element(i - m_nb, j - m_nb); } else if (j >= m_nb) { - if constexpr (Kokkos::SpaceAccessibility< - Kokkos::DefaultHostExecutionSpace, - typename ExecSpace::memory_space>::accessible) { - return m_Abm_1_gamma(i, j - m_nb); - } else { - // Inefficient, usage is strongly discouraged - double aij; - Kokkos::deep_copy( - Kokkos::View(&aij), - Kokkos::subview(m_Abm_1_gamma, i, j - m_nb)); - return aij; - } + return m_Abm_1_gamma(i, j - m_nb); } else { - if constexpr (Kokkos::SpaceAccessibility< - Kokkos::DefaultHostExecutionSpace, - typename ExecSpace::memory_space>::accessible) { - return m_lambda(i - m_nb, j); - } else { - // Inefficient, usage is strongly discouraged - double aij; - Kokkos::deep_copy( - Kokkos::View(&aij), - Kokkos::subview(m_lambda, i - m_nb, j)); - return aij; - } + return m_lambda(i - m_nb, j); } } virtual void set_element(int const i, int const j, double const aij) override @@ -99,27 +77,9 @@ class Matrix_Corner_Block : public Matrix } else if (i >= m_nb && j >= m_nb) { m_delta->set_element(i - m_nb, j - m_nb, aij); } else if (j >= m_nb) { - if constexpr (Kokkos::SpaceAccessibility< - Kokkos::DefaultHostExecutionSpace, - typename ExecSpace::memory_space>::accessible) { - m_Abm_1_gamma(i, j - m_nb) = aij; - } else { - // Inefficient, usage is strongly discouraged - Kokkos::deep_copy( - Kokkos::subview(m_Abm_1_gamma, i, j - m_nb), - Kokkos::View(&aij)); - } + m_Abm_1_gamma(i, j - m_nb) = aij; } else { - if constexpr (Kokkos::SpaceAccessibility< - Kokkos::DefaultHostExecutionSpace, - typename ExecSpace::memory_space>::accessible) { - m_lambda(i - m_nb, j) = aij; - } else { - // Inefficient, usage is strongly discouraged - Kokkos::deep_copy( - Kokkos::subview(m_lambda, i - m_nb, j), - Kokkos::View(&aij)); - } + m_lambda(i - m_nb, j) = aij; } } virtual void factorize() override @@ -158,10 +118,6 @@ class Matrix_Corner_Block : public Matrix virtual void calculate_delta_to_factorize() { auto delta_proxy = *m_delta; - auto lambda_host = Kokkos:: - create_mirror_view_and_copy(Kokkos::DefaultHostExecutionSpace(), m_lambda); - auto Abm_1_gamma_host = Kokkos:: - create_mirror_view_and_copy(Kokkos::DefaultHostExecutionSpace(), m_Abm_1_gamma); Kokkos::parallel_for( "calculate_delta_to_factorize", Kokkos::MDRangePolicy< @@ -170,7 +126,7 @@ class Matrix_Corner_Block : public Matrix [&](const int i, const int j) { double val = 0.0; for (int l = 0; l < m_nb; ++l) { - val += lambda_host(i, l) * Abm_1_gamma_host(l, j); + val += m_lambda(i, l) * m_Abm_1_gamma(l, j); } delta_proxy.set_element(i, j, delta_proxy.get_element(i, j) - val); }); @@ -179,6 +135,7 @@ class Matrix_Corner_Block : public Matrix ddc::DSpan2D_stride const v, ddc::DView2D_stride const u) const { + auto lambda_device = create_mirror_view_and_copy(ExecSpace(), m_lambda); Kokkos::parallel_for( "solve_lambda_section", Kokkos::TeamPolicy(v.extent(1), Kokkos::AUTO), @@ -192,7 +149,7 @@ class Matrix_Corner_Block : public Matrix [&](const int i) { // Upper diagonals in lambda for (int l = 0; l < m_nb; ++l) { - Kokkos::atomic_sub(&v(i, j), m_lambda(i, l) * u(l, j)); + Kokkos::atomic_sub(&v(i, j), lambda_device(i, l) * u(l, j)); } }); }); @@ -202,6 +159,7 @@ class Matrix_Corner_Block : public Matrix ddc::DSpan2D_stride const u, ddc::DView2D_stride const v) const { + auto lambda_device = create_mirror_view_and_copy(ExecSpace(), m_lambda); Kokkos::parallel_for( "solve_lambda_section_transpose", Kokkos::TeamPolicy(u.extent(1), Kokkos::AUTO), @@ -215,7 +173,7 @@ class Matrix_Corner_Block : public Matrix [&](const int i) { // Upper diagonals in lambda for (int l = 0; l < m_k; ++l) { - Kokkos::atomic_sub(&u(i, j), m_lambda(l, i) * v(l, j)); + Kokkos::atomic_sub(&u(i, j), lambda_device(l, i) * v(l, j)); } }); }); @@ -225,6 +183,7 @@ class Matrix_Corner_Block : public Matrix ddc::DSpan2D_stride const u, ddc::DView2D_stride const v) const { + auto Abm_1_gamma_device = create_mirror_view_and_copy(ExecSpace(), m_Abm_1_gamma); Kokkos::parallel_for( "solve_gamma_section", Kokkos::TeamPolicy(u.extent(1), Kokkos::AUTO), @@ -238,7 +197,9 @@ class Matrix_Corner_Block : public Matrix [&](const int i) { // Upper diagonals in lambda for (int l = 0; l < m_k; ++l) { - Kokkos::atomic_sub(&u(i, j), m_Abm_1_gamma(i, l) * v(l, j)); + Kokkos::atomic_sub( + &u(i, j), + Abm_1_gamma_device(i, l) * v(l, j)); } }); }); @@ -248,6 +209,7 @@ class Matrix_Corner_Block : public Matrix ddc::DSpan2D_stride const v, ddc::DView2D_stride const u) const { + auto Abm_1_gamma_device = create_mirror_view_and_copy(ExecSpace(), m_Abm_1_gamma); Kokkos::parallel_for( "solve_gamma_section_transpose", Kokkos::TeamPolicy(v.extent(1), Kokkos::AUTO), @@ -261,7 +223,9 @@ class Matrix_Corner_Block : public Matrix [&](const int i) { // Upper diagonals in lambda for (int l = 0; l < m_nb; ++l) { - Kokkos::atomic_sub(&v(i, j), m_Abm_1_gamma(l, i) * u(l, j)); + Kokkos::atomic_sub( + &v(i, j), + Abm_1_gamma_device(l, i) * u(l, j)); } }); }); @@ -298,7 +262,6 @@ class Matrix_Corner_Block : public Matrix 2> {(std::size_t)m_k, (std::size_t)n_equations}, std::array {1, (std::size_t)stride}}; - ddc::DSpan2D_stride const u(b.data_handle(), layout_mapping_u); ddc::DSpan2D_stride const v(b.data_handle() + m_nb, layout_mapping_v); diff --git a/include/ddc/kernels/splines/matrix_dense.hpp b/include/ddc/kernels/splines/matrix_dense.hpp index 22d3ce1b1..e6eddce73 100644 --- a/include/ddc/kernels/splines/matrix_dense.hpp +++ b/include/ddc/kernels/splines/matrix_dense.hpp @@ -22,8 +22,8 @@ template class Matrix_Dense : public Matrix { protected: - Kokkos::View m_a; - Kokkos::View m_ipiv; + Kokkos::View m_a; + Kokkos::View m_ipiv; public: explicit Matrix_Dense(int const mat_size) @@ -40,44 +40,20 @@ class Matrix_Dense : public Matrix { assert(i < get_size()); assert(j < get_size()); - if constexpr (Kokkos::SpaceAccessibility< - Kokkos::DefaultHostExecutionSpace, - typename ExecSpace::memory_space>::accessible) { - return m_a(i, j); - } else { - // Inefficient, usage is strongly discouraged - double aij; - Kokkos::deep_copy( - Kokkos::View(&aij), - Kokkos::subview(m_a, i, j)); - return aij; - } + return m_a(i, j); } void set_element(int const i, int const j, double const aij) override { - if constexpr (Kokkos::SpaceAccessibility< - Kokkos::DefaultHostExecutionSpace, - typename ExecSpace::memory_space>::accessible) { - m_a(i, j) = aij; - } else { - // Inefficient, usage is strongly discouraged - Kokkos::deep_copy( - Kokkos::subview(m_a, i, j), - Kokkos::View(&aij)); - } + m_a(i, j) = aij; } private: int factorize_method() override { - auto a_host = create_mirror_view_and_copy(Kokkos::DefaultHostExecutionSpace(), m_a); - auto ipiv_host = create_mirror_view(Kokkos::DefaultHostExecutionSpace(), m_ipiv); int info; int const n = get_size(); - dgetrf_(&n, &n, a_host.data(), &n, ipiv_host.data(), &info); - Kokkos::deep_copy(m_a, a_host); - Kokkos::deep_copy(m_ipiv, ipiv_host); + dgetrf_(&n, &n, m_a.data(), &n, m_ipiv.data(), &info); return info; } @@ -87,8 +63,6 @@ class Matrix_Dense : public Matrix int const n_equations = b.extent(1); int const stride = b.stride(1); - auto a_host = create_mirror_view_and_copy(Kokkos::DefaultHostExecutionSpace(), m_a); - auto ipiv_host = create_mirror_view_and_copy(Kokkos::DefaultHostExecutionSpace(), m_ipiv); Kokkos::View b_view(b.data_handle(), Kokkos::LayoutStride(get_size(), 1, n_equations, stride)); auto b_host = create_mirror_view(Kokkos::DefaultHostExecutionSpace(), b_view); @@ -102,9 +76,9 @@ class Matrix_Dense : public Matrix dgetrs_(&transpose, &n, &n_equations, - a_host.data(), + m_a.data(), &n, - ipiv_host.data(), + m_ipiv.data(), b_host.data(), &stride, &info); diff --git a/include/ddc/kernels/splines/matrix_pds_banded.hpp b/include/ddc/kernels/splines/matrix_pds_banded.hpp index 72d50993a..679088504 100644 --- a/include/ddc/kernels/splines/matrix_pds_banded.hpp +++ b/include/ddc/kernels/splines/matrix_pds_banded.hpp @@ -36,7 +36,7 @@ class Matrix_PDS_Banded : public Matrix * */ protected: int const m_kd; // no. of columns in q - Kokkos::View + Kokkos::View m_q; // pds banded matrix representation public: @@ -51,20 +51,7 @@ class Matrix_PDS_Banded : public Matrix double get_element(int i, int j) const override { if (i == j) { - KOKKOS_IF_ON_HOST( - if constexpr (Kokkos::SpaceAccessibility< - Kokkos::DefaultHostExecutionSpace, - typename ExecSpace::memory_space>::accessible) { - return m_q(0, i); - } else { - // Inefficient, usage is strongly discouraged - double aij; - Kokkos::deep_copy( - Kokkos::View(&aij), - Kokkos::subview(m_q, 0, i)); - return aij; - }) - KOKKOS_IF_ON_DEVICE(return m_q(0, i);) + return m_q(0, i); } if (i > j) { // inline swap i<->j @@ -74,20 +61,7 @@ class Matrix_PDS_Banded : public Matrix } for (int k = 1; k < m_kd + 1; ++k) { if (i + k == j) { - KOKKOS_IF_ON_HOST( - if constexpr (Kokkos::SpaceAccessibility< - Kokkos::DefaultHostExecutionSpace, - typename ExecSpace::memory_space>::accessible) { - return m_q(k, i); - } else { - // Inefficient, usage is strongly discouraged - double aij; - Kokkos::deep_copy( - Kokkos::View(&aij), - Kokkos::subview(m_q, k, i)); - return aij; - }) - KOKKOS_IF_ON_DEVICE(return m_q(k, i);) + return m_q(k, i); } } return 0.0; @@ -95,18 +69,7 @@ class Matrix_PDS_Banded : public Matrix void set_element(int i, int j, double const aij) override { if (i == j) { - KOKKOS_IF_ON_HOST( - if constexpr (Kokkos::SpaceAccessibility< - Kokkos::DefaultHostExecutionSpace, - typename ExecSpace::memory_space>::accessible) { - m_q(0, i) = aij; - } else { - // Inefficient, usage is strongly discouraged - Kokkos::deep_copy( - Kokkos::subview(m_q, 0, i), - Kokkos::View(&aij)); - }) - KOKKOS_IF_ON_DEVICE(m_q(0, i) = aij;) + m_q(0, i) = aij; return; } if (i > j) { @@ -117,18 +80,7 @@ class Matrix_PDS_Banded : public Matrix } for (int k = 1; k < m_kd + 1; ++k) { if (i + k == j) { - KOKKOS_IF_ON_HOST( - if constexpr (Kokkos::SpaceAccessibility< - Kokkos::DefaultHostExecutionSpace, - typename ExecSpace::memory_space>::accessible) { - m_q(k, i) = aij; - } else { - // Inefficient, usage is strongly discouraged - Kokkos::deep_copy( - Kokkos::subview(m_q, k, i), - Kokkos::View(&aij)); - }) - KOKKOS_IF_ON_DEVICE(m_q(k, i) = aij;) + m_q(k, i) = aij; return; } } @@ -139,13 +91,11 @@ class Matrix_PDS_Banded : public Matrix protected: int factorize_method() override { - auto q_host = create_mirror_view_and_copy(Kokkos::DefaultHostExecutionSpace(), m_q); int info; char const uplo = 'L'; int const n = get_size(); int const ldab = m_kd + 1; - dpbtrf_(&uplo, &n, &m_kd, q_host.data(), &ldab, &info); - Kokkos::deep_copy(m_q, q_host); + dpbtrf_(&uplo, &n, &m_kd, m_q.data(), &ldab, &info); return info; } @@ -155,7 +105,6 @@ class Matrix_PDS_Banded : public Matrix int const n_equations = b.extent(1); int const stride = b.stride(1); - auto q_host = create_mirror_view_and_copy(Kokkos::DefaultHostExecutionSpace(), m_q); Kokkos::View b_view(b.data_handle(), Kokkos::LayoutStride(get_size(), 1, n_equations, stride)); auto b_host = create_mirror_view(Kokkos::DefaultHostExecutionSpace(), b_view); @@ -168,15 +117,7 @@ class Matrix_PDS_Banded : public Matrix char const uplo = 'L'; int const n = get_size(); int const ldab = m_kd + 1; - dpbtrs_(&uplo, - &n, - &m_kd, - &n_equations, - q_host.data(), - &ldab, - b_host.data(), - &stride, - &info); + dpbtrs_(&uplo, &n, &m_kd, &n_equations, m_q.data(), &ldab, b_host.data(), &stride, &info); for (int i = 0; i < n_equations; ++i) { Kokkos::deep_copy( Kokkos::subview(b_view, Kokkos::ALL, i), diff --git a/include/ddc/kernels/splines/matrix_pds_tridiag.hpp b/include/ddc/kernels/splines/matrix_pds_tridiag.hpp index 09f676ea7..c6ea87287 100644 --- a/include/ddc/kernels/splines/matrix_pds_tridiag.hpp +++ b/include/ddc/kernels/splines/matrix_pds_tridiag.hpp @@ -27,8 +27,8 @@ class Matrix_PDS_Tridiag : public Matrix * stored in a block format * */ protected: - Kokkos::View m_d; // diagonal - Kokkos::View m_l; // lower diagonal + Kokkos::View m_d; // diagonal + Kokkos::View m_l; // lower diagonal public: Matrix_PDS_Tridiag(int const mat_size) @@ -43,18 +43,7 @@ class Matrix_PDS_Tridiag : public Matrix double get_element(int i, int j) const override { if (i == j) { - if constexpr (Kokkos::SpaceAccessibility< - Kokkos::DefaultHostExecutionSpace, - typename ExecSpace::memory_space>::accessible) { - return m_d(i); - } else { - // Inefficient, usage is strongly discouraged - double aij; - Kokkos::deep_copy( - Kokkos::View(&aij), - Kokkos::subview(m_d, i)); - return aij; - } + return m_d(i); } if (i > j) { // inline swap i<->j @@ -63,34 +52,14 @@ class Matrix_PDS_Tridiag : public Matrix j = tmp; } if (i + 1 == j) { - if constexpr (Kokkos::SpaceAccessibility< - Kokkos::DefaultHostExecutionSpace, - typename ExecSpace::memory_space>::accessible) { - return m_l(i); - } else { - // Inefficient, usage is strongly discouraged - double aij; - Kokkos::deep_copy( - Kokkos::View(&aij), - Kokkos::subview(m_l, i)); - return aij; - } + return m_l(i); } return 0.0; } void set_element(int i, int j, double const aij) override { if (i == j) { - if constexpr (Kokkos::SpaceAccessibility< - Kokkos::DefaultHostExecutionSpace, - typename ExecSpace::memory_space>::accessible) { - m_d(i) = aij; - } else { - // Inefficient, usage is strongly discouraged - Kokkos::deep_copy( - Kokkos::subview(m_d, i), - Kokkos::View(&aij)); - } + m_d(i) = aij; return; } if (i > j) { @@ -102,29 +71,16 @@ class Matrix_PDS_Tridiag : public Matrix if (i + 1 != j) { assert(std::fabs(aij) < 1e-20); } else { - if constexpr (Kokkos::SpaceAccessibility< - Kokkos::DefaultHostExecutionSpace, - typename ExecSpace::memory_space>::accessible) { - m_l(i) = aij; - } else { - // Inefficient, usage is strongly discouraged - Kokkos::deep_copy( - Kokkos::subview(m_l, i), - Kokkos::View(&aij)); - } + m_l(i) = aij; } } protected: int factorize_method() override { - auto d_host = create_mirror_view_and_copy(Kokkos::DefaultHostExecutionSpace(), m_d); - auto l_host = create_mirror_view_and_copy(Kokkos::DefaultHostExecutionSpace(), m_l); int info; int const n = get_size(); - dpttrf_(&n, d_host.data(), l_host.data(), &info); - Kokkos::deep_copy(m_d, d_host); - Kokkos::deep_copy(m_l, l_host); + dpttrf_(&n, m_d.data(), m_l.data(), &info); return info; } @@ -134,8 +90,6 @@ class Matrix_PDS_Tridiag : public Matrix int const n_equations = b.extent(1); int const stride = b.stride(1); - auto d_host = create_mirror_view_and_copy(Kokkos::DefaultHostExecutionSpace(), m_d); - auto l_host = create_mirror_view_and_copy(Kokkos::DefaultHostExecutionSpace(), m_l); Kokkos::View b_view(b.data_handle(), Kokkos::LayoutStride(get_size(), 1, n_equations, stride)); auto b_host = create_mirror_view(Kokkos::DefaultHostExecutionSpace(), b_view); @@ -146,7 +100,7 @@ class Matrix_PDS_Tridiag : public Matrix } int info; int const n = get_size(); - dpttrs_(&n, &n_equations, d_host.data(), l_host.data(), b_host.data(), &stride, &info); + dpttrs_(&n, &n_equations, m_d.data(), m_l.data(), b_host.data(), &stride, &info); for (int i = 0; i < n_equations; ++i) { Kokkos::deep_copy( Kokkos::subview(b_view, Kokkos::ALL, i), diff --git a/include/ddc/kernels/splines/matrix_periodic_banded.hpp b/include/ddc/kernels/splines/matrix_periodic_banded.hpp index c9cad9cb5..6411c9dc1 100644 --- a/include/ddc/kernels/splines/matrix_periodic_banded.hpp +++ b/include/ddc/kernels/splines/matrix_periodic_banded.hpp @@ -61,31 +61,9 @@ class Matrix_Periodic_Banded : public Matrix_Corner_Block if (d < -m_kl || d > m_ku) return 0.0; if (d > 0) { - if constexpr (Kokkos::SpaceAccessibility< - Kokkos::DefaultHostExecutionSpace, - typename ExecSpace::memory_space>::accessible) { - return m_lambda(i - m_nb, j); - } else { - // Inefficient, usage is strongly discouraged - double aij; - Kokkos::deep_copy( - Kokkos::View(&aij), - Kokkos::subview(m_lambda, i - m_nb, j)); - return aij; - } + return m_lambda(i - m_nb, j); } else { - if constexpr (Kokkos::SpaceAccessibility< - Kokkos::DefaultHostExecutionSpace, - typename ExecSpace::memory_space>::accessible) { - return m_lambda(i - m_nb, j - m_nb + m_k + 1); - } else { - // Inefficient, usage is strongly discouraged - double aij; - Kokkos::deep_copy( - Kokkos::View(&aij), - Kokkos::subview(m_lambda, i - m_nb, j - m_nb + m_k + 1)); - return aij; - } + return m_lambda(i - m_nb, j - m_nb + m_k + 1); } } else { return Matrix_Corner_Block::get_element(i, j); @@ -110,27 +88,9 @@ class Matrix_Periodic_Banded : public Matrix_Corner_Block } if (d > 0) { - if constexpr (Kokkos::SpaceAccessibility< - Kokkos::DefaultHostExecutionSpace, - typename ExecSpace::memory_space>::accessible) { - m_lambda(i - m_nb, j) = aij; - } else { - // Inefficient, usage is strongly discouraged - Kokkos::deep_copy( - Kokkos::subview(m_lambda, i - m_nb, j), - Kokkos::View(&aij)); - } + m_lambda(i - m_nb, j) = aij; } else { - if constexpr (Kokkos::SpaceAccessibility< - Kokkos::DefaultHostExecutionSpace, - typename ExecSpace::memory_space>::accessible) { - m_lambda(i - m_nb, j - m_nb + m_k + 1) = aij; - } else { - // Inefficient, usage is strongly discouraged - Kokkos::deep_copy( - Kokkos::subview(m_lambda, i - m_nb, j - m_nb + m_k + 1), - Kokkos::View(&aij)); - } + m_lambda(i - m_nb, j - m_nb + m_k + 1) = aij; } } else { Matrix_Corner_Block::set_element(i, j, aij); @@ -141,10 +101,6 @@ class Matrix_Periodic_Banded : public Matrix_Corner_Block void calculate_delta_to_factorize() override { auto delta_proxy = *m_delta; - auto lambda_host = Kokkos:: - create_mirror_view_and_copy(Kokkos::DefaultHostExecutionSpace(), m_lambda); - auto Abm_1_gamma_host = Kokkos:: - create_mirror_view_and_copy(Kokkos::DefaultHostExecutionSpace(), m_Abm_1_gamma); Kokkos::parallel_for( "calculate_delta_to_factorize", Kokkos::MDRangePolicy< @@ -154,12 +110,12 @@ class Matrix_Periodic_Banded : public Matrix_Corner_Block double val = 0.0; // Upper diagonals in lambda, lower diagonals in Abm_1_gamma for (int l = 0; l < i + 1; ++l) { - val += lambda_host(i, l) * Abm_1_gamma_host(l, j); + val += m_lambda(i, l) * m_Abm_1_gamma(l, j); } // Lower diagonals in lambda, upper diagonals in Abm_1_gamma for (int l = i + 1; l < m_k + 1; ++l) { int l_full = m_nb - 1 - m_k + l; - val += lambda_host(i, l) * Abm_1_gamma_host(l_full, j); + val += m_lambda(i, l) * m_Abm_1_gamma(l_full, j); } auto tmp = delta_proxy.get_element(i, j); delta_proxy.set_element(i, j, tmp - val); @@ -170,6 +126,7 @@ class Matrix_Periodic_Banded : public Matrix_Corner_Block ddc::DSpan2D_stride const v, ddc::DView2D_stride const u) const override { + auto lambda_device = create_mirror_view_and_copy(ExecSpace(), m_lambda); Kokkos::parallel_for( "solve_lambda_section", Kokkos::TeamPolicy(v.extent(1), Kokkos::AUTO), @@ -183,13 +140,13 @@ class Matrix_Periodic_Banded : public Matrix_Corner_Block [&](const int i) { /// Upper diagonals in lambda for (int l = 0; l <= i; ++l) { - Kokkos::atomic_sub(&v(i, j), m_lambda(i, l) * u(l, j)); + Kokkos::atomic_sub(&v(i, j), lambda_device(i, l) * u(l, j)); } // Lower diagonals in lambda for (int l = i + 1; l < m_k + 1; ++l) { Kokkos::atomic_sub( &v(i, j), - m_lambda(i, l) * u(m_nb - 1 - m_k + l, j)); + lambda_device(i, l) * u(m_nb - 1 - m_k + l, j)); } }); }); @@ -200,6 +157,7 @@ class Matrix_Periodic_Banded : public Matrix_Corner_Block ddc::DSpan2D_stride const u, ddc::DView2D_stride const v) const override { + auto lambda_device = create_mirror_view_and_copy(ExecSpace(), m_lambda); Kokkos::parallel_for( "solve_lambda_section_transpose", Kokkos::TeamPolicy(v.extent(1), Kokkos::AUTO), @@ -213,13 +171,13 @@ class Matrix_Periodic_Banded : public Matrix_Corner_Block [&](const int i) { /// Upper diagonals in lambda for (int l = 0; l <= i; ++l) { - Kokkos::atomic_sub(&u(l, j), m_lambda(i, l) * v(i, j)); + Kokkos::atomic_sub(&u(l, j), lambda_device(i, l) * v(i, j)); } // Lower diagonals in lambda for (int l = i + 1; l < m_k + 1; ++l) { Kokkos::atomic_sub( &u(m_nb - 1 - m_k + l, j), - m_lambda(i, l) * v(i, j)); + lambda_device(i, l) * v(i, j)); } }); }); From d2a165bfc02bf8110944133035d2f6a15970fb12 Mon Sep 17 00:00:00 2001 From: Baptiste Legouix Date: Tue, 12 Mar 2024 12:27:44 +0100 Subject: [PATCH 13/28] Update include/ddc/kernels/splines/matrix_center_block.hpp Co-authored-by: Thomas Padioleau --- include/ddc/kernels/splines/matrix_center_block.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/include/ddc/kernels/splines/matrix_center_block.hpp b/include/ddc/kernels/splines/matrix_center_block.hpp index 6a3d39097..aa18b5198 100644 --- a/include/ddc/kernels/splines/matrix_center_block.hpp +++ b/include/ddc/kernels/splines/matrix_center_block.hpp @@ -45,6 +45,7 @@ class Matrix_Center_Block : public Matrix_Corner_Block adjust_indexes(i, j); return Matrix_Corner_Block::get_element(i, j); } + void set_element(int i, int j, double aij) override { adjust_indexes(i, j); From 7e4c6c71baf138a0379b5780ee327d7fb65d9841 Mon Sep 17 00:00:00 2001 From: Baptiste Legouix Date: Tue, 12 Mar 2024 12:28:12 +0100 Subject: [PATCH 14/28] Update include/ddc/kernels/splines/matrix_center_block.hpp Co-authored-by: Thomas Padioleau --- include/ddc/kernels/splines/matrix_center_block.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/include/ddc/kernels/splines/matrix_center_block.hpp b/include/ddc/kernels/splines/matrix_center_block.hpp index aa18b5198..1146c1891 100644 --- a/include/ddc/kernels/splines/matrix_center_block.hpp +++ b/include/ddc/kernels/splines/matrix_center_block.hpp @@ -51,6 +51,7 @@ class Matrix_Center_Block : public Matrix_Corner_Block adjust_indexes(i, j); Matrix_Corner_Block::set_element(i, j, aij); } + ddc::DSpan2D_stride solve_inplace(ddc::DSpan2D_stride const bx) const override { swap_array_to_corner(bx); From ac0a0df2404cafd3b74fe6e3f1905b68e9796551 Mon Sep 17 00:00:00 2001 From: blegouix Date: Tue, 12 Mar 2024 15:41:04 +0100 Subject: [PATCH 15/28] remove KOKKOS_CLASS_LAMBDA --- .../kernels/splines/matrix_corner_block.hpp | 32 ++++++++++++------- .../splines/matrix_periodic_banded.hpp | 20 +++++++----- 2 files changed, 32 insertions(+), 20 deletions(-) diff --git a/include/ddc/kernels/splines/matrix_corner_block.hpp b/include/ddc/kernels/splines/matrix_corner_block.hpp index 49b8f487c..ea71de63e 100644 --- a/include/ddc/kernels/splines/matrix_corner_block.hpp +++ b/include/ddc/kernels/splines/matrix_corner_block.hpp @@ -136,19 +136,21 @@ class Matrix_Corner_Block : public Matrix ddc::DView2D_stride const u) const { auto lambda_device = create_mirror_view_and_copy(ExecSpace(), m_lambda); + auto nb_proxy = m_nb; + auto k_proxy = m_k; Kokkos::parallel_for( "solve_lambda_section", Kokkos::TeamPolicy(v.extent(1), Kokkos::AUTO), - KOKKOS_CLASS_LAMBDA( + KOKKOS_LAMBDA( const typename Kokkos::TeamPolicy::member_type& teamMember) { const int j = teamMember.league_rank(); Kokkos::parallel_for( - Kokkos::TeamThreadRange(teamMember, m_k), + Kokkos::TeamThreadRange(teamMember, k_proxy), [&](const int i) { // Upper diagonals in lambda - for (int l = 0; l < m_nb; ++l) { + for (int l = 0; l < nb_proxy; ++l) { Kokkos::atomic_sub(&v(i, j), lambda_device(i, l) * u(l, j)); } }); @@ -160,19 +162,21 @@ class Matrix_Corner_Block : public Matrix ddc::DView2D_stride const v) const { auto lambda_device = create_mirror_view_and_copy(ExecSpace(), m_lambda); + auto nb_proxy = m_nb; + auto k_proxy = m_k; Kokkos::parallel_for( "solve_lambda_section_transpose", Kokkos::TeamPolicy(u.extent(1), Kokkos::AUTO), - KOKKOS_CLASS_LAMBDA( + KOKKOS_LAMBDA( const typename Kokkos::TeamPolicy::member_type& teamMember) { const int j = teamMember.league_rank(); Kokkos::parallel_for( - Kokkos::TeamThreadRange(teamMember, m_nb), + Kokkos::TeamThreadRange(teamMember, nb_proxy), [&](const int i) { // Upper diagonals in lambda - for (int l = 0; l < m_k; ++l) { + for (int l = 0; l < k_proxy; ++l) { Kokkos::atomic_sub(&u(i, j), lambda_device(l, i) * v(l, j)); } }); @@ -184,19 +188,21 @@ class Matrix_Corner_Block : public Matrix ddc::DView2D_stride const v) const { auto Abm_1_gamma_device = create_mirror_view_and_copy(ExecSpace(), m_Abm_1_gamma); + auto nb_proxy = m_nb; + auto k_proxy = m_k; Kokkos::parallel_for( "solve_gamma_section", Kokkos::TeamPolicy(u.extent(1), Kokkos::AUTO), - KOKKOS_CLASS_LAMBDA( + KOKKOS_LAMBDA( const typename Kokkos::TeamPolicy::member_type& teamMember) { const int j = teamMember.league_rank(); Kokkos::parallel_for( - Kokkos::TeamThreadRange(teamMember, m_nb), + Kokkos::TeamThreadRange(teamMember, nb_proxy), [&](const int i) { // Upper diagonals in lambda - for (int l = 0; l < m_k; ++l) { + for (int l = 0; l < k_proxy; ++l) { Kokkos::atomic_sub( &u(i, j), Abm_1_gamma_device(i, l) * v(l, j)); @@ -210,19 +216,21 @@ class Matrix_Corner_Block : public Matrix ddc::DView2D_stride const u) const { auto Abm_1_gamma_device = create_mirror_view_and_copy(ExecSpace(), m_Abm_1_gamma); + auto nb_proxy = m_nb; + auto k_proxy = m_k; Kokkos::parallel_for( "solve_gamma_section_transpose", Kokkos::TeamPolicy(v.extent(1), Kokkos::AUTO), - KOKKOS_CLASS_LAMBDA( + KOKKOS_LAMBDA( const typename Kokkos::TeamPolicy::member_type& teamMember) { const int j = teamMember.league_rank(); Kokkos::parallel_for( - Kokkos::TeamThreadRange(teamMember, m_k), + Kokkos::TeamThreadRange(teamMember, k_proxy), [&](const int i) { // Upper diagonals in lambda - for (int l = 0; l < m_nb; ++l) { + for (int l = 0; l < nb_proxy; ++l) { Kokkos::atomic_sub( &v(i, j), Abm_1_gamma_device(l, i) * u(l, j)); diff --git a/include/ddc/kernels/splines/matrix_periodic_banded.hpp b/include/ddc/kernels/splines/matrix_periodic_banded.hpp index 6411c9dc1..7de1a8d98 100644 --- a/include/ddc/kernels/splines/matrix_periodic_banded.hpp +++ b/include/ddc/kernels/splines/matrix_periodic_banded.hpp @@ -127,26 +127,28 @@ class Matrix_Periodic_Banded : public Matrix_Corner_Block ddc::DView2D_stride const u) const override { auto lambda_device = create_mirror_view_and_copy(ExecSpace(), m_lambda); + auto nb_proxy = m_nb; + auto k_proxy = m_k; Kokkos::parallel_for( "solve_lambda_section", Kokkos::TeamPolicy(v.extent(1), Kokkos::AUTO), - KOKKOS_CLASS_LAMBDA( + KOKKOS_LAMBDA( const typename Kokkos::TeamPolicy::member_type& teamMember) { const int j = teamMember.league_rank(); Kokkos::parallel_for( - Kokkos::TeamThreadRange(teamMember, m_k), + Kokkos::TeamThreadRange(teamMember, k_proxy), [&](const int i) { /// Upper diagonals in lambda for (int l = 0; l <= i; ++l) { Kokkos::atomic_sub(&v(i, j), lambda_device(i, l) * u(l, j)); } // Lower diagonals in lambda - for (int l = i + 1; l < m_k + 1; ++l) { + for (int l = i + 1; l < k_proxy + 1; ++l) { Kokkos::atomic_sub( &v(i, j), - lambda_device(i, l) * u(m_nb - 1 - m_k + l, j)); + lambda_device(i, l) * u(nb_proxy - 1 - k_proxy + l, j)); } }); }); @@ -158,25 +160,27 @@ class Matrix_Periodic_Banded : public Matrix_Corner_Block ddc::DView2D_stride const v) const override { auto lambda_device = create_mirror_view_and_copy(ExecSpace(), m_lambda); + auto nb_proxy = m_nb; + auto k_proxy = m_k; Kokkos::parallel_for( "solve_lambda_section_transpose", Kokkos::TeamPolicy(v.extent(1), Kokkos::AUTO), - KOKKOS_CLASS_LAMBDA( + KOKKOS_LAMBDA( const typename Kokkos::TeamPolicy::member_type& teamMember) { const int j = teamMember.league_rank(); Kokkos::parallel_for( - Kokkos::TeamThreadRange(teamMember, m_k), + Kokkos::TeamThreadRange(teamMember, k_proxy), [&](const int i) { /// Upper diagonals in lambda for (int l = 0; l <= i; ++l) { Kokkos::atomic_sub(&u(l, j), lambda_device(i, l) * v(i, j)); } // Lower diagonals in lambda - for (int l = i + 1; l < m_k + 1; ++l) { + for (int l = i + 1; l < k_proxy + 1; ++l) { Kokkos::atomic_sub( - &u(m_nb - 1 - m_k + l, j), + &u(nb_proxy - 1 - k_proxy + l, j), lambda_device(i, l) * v(i, j)); } }); From 2d5e7b588863653effed6f5242636738f91d5d81 Mon Sep 17 00:00:00 2001 From: Baptiste Legouix Date: Wed, 13 Mar 2024 13:07:14 +0100 Subject: [PATCH 16/28] Update CMakeLists.txt Co-authored-by: Thomas Padioleau --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index daa100119..68a489ee7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -248,7 +248,7 @@ if("${DDC_BUILD_KERNELS_SPLINES}") target_compile_definitions(DDC INTERFACE ginkgo_AVAIL) # Lapack - find_package(LAPACK REQUIRED COMPONENTS CXX) + find_package(LAPACK REQUIRED) target_link_libraries(DDC INTERFACE LAPACK::LAPACK) endif() From c0553b85eb7c96b229a1752e4bf56cc0d4ecc2e5 Mon Sep 17 00:00:00 2001 From: Baptiste Legouix Date: Fri, 15 Mar 2024 08:20:14 +0100 Subject: [PATCH 17/28] Update include/ddc/kernels/splines/matrix.hpp Co-authored-by: Thomas Padioleau --- include/ddc/kernels/splines/matrix.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/ddc/kernels/splines/matrix.hpp b/include/ddc/kernels/splines/matrix.hpp index c7c40517b..dbeb84a7f 100644 --- a/include/ddc/kernels/splines/matrix.hpp +++ b/include/ddc/kernels/splines/matrix.hpp @@ -100,7 +100,7 @@ class Matrix protected: virtual int factorize_method() = 0; - virtual int solve_inplace_method(ddc::DSpan2D_stride const b, char const transpose) const = 0; + virtual int solve_inplace_method(ddc::DSpan2D_stride b, char transpose) const = 0; }; } // namespace ddc::detail From 1bf5b2dcea130cf520cbb3bbc9ca6890a6ae3575 Mon Sep 17 00:00:00 2001 From: Baptiste Legouix Date: Fri, 15 Mar 2024 08:30:09 +0100 Subject: [PATCH 18/28] Update include/ddc/kernels/splines/matrix_sparse.hpp Co-authored-by: Thomas Padioleau --- include/ddc/kernels/splines/matrix_sparse.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/include/ddc/kernels/splines/matrix_sparse.hpp b/include/ddc/kernels/splines/matrix_sparse.hpp index 8143e9cf6..13b5fb7fe 100644 --- a/include/ddc/kernels/splines/matrix_sparse.hpp +++ b/include/ddc/kernels/splines/matrix_sparse.hpp @@ -132,7 +132,6 @@ class Matrix_Sparse : public Matrix { throw std::runtime_error("MatrixSparse::get_element() is not implemented because no API is " "provided by Ginkgo"); - return 0; } void set_element(int i, int j, double aij) override From ddf030e256075ca4a6a8c2666ebdb7e0818f8530 Mon Sep 17 00:00:00 2001 From: Baptiste Legouix Date: Fri, 15 Mar 2024 08:33:24 +0100 Subject: [PATCH 19/28] Update include/ddc/kernels/splines/matrix_sparse.hpp --- include/ddc/kernels/splines/matrix_sparse.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/include/ddc/kernels/splines/matrix_sparse.hpp b/include/ddc/kernels/splines/matrix_sparse.hpp index 13b5fb7fe..a6db40088 100644 --- a/include/ddc/kernels/splines/matrix_sparse.hpp +++ b/include/ddc/kernels/splines/matrix_sparse.hpp @@ -125,7 +125,6 @@ class Matrix_Sparse : public Matrix m_matrix_sparse = matrix_sparse_type::create(gko_exec, gko::dim<2>(mat_size, mat_size)); m_matrix_dense->fill(0); - m_matrix_sparse->clear(); } double get_element([[maybe_unused]] int i, [[maybe_unused]] int j) const override From bb79f3f67e9d5e5b8614ca0a17592bdaea0101f0 Mon Sep 17 00:00:00 2001 From: blegouix Date: Fri, 15 Mar 2024 08:58:05 +0100 Subject: [PATCH 20/28] remove atomics --- .../ddc/kernels/splines/matrix_corner_block.hpp | 12 ++++-------- .../ddc/kernels/splines/matrix_periodic_banded.hpp | 14 ++++++-------- 2 files changed, 10 insertions(+), 16 deletions(-) diff --git a/include/ddc/kernels/splines/matrix_corner_block.hpp b/include/ddc/kernels/splines/matrix_corner_block.hpp index ea71de63e..001c18641 100644 --- a/include/ddc/kernels/splines/matrix_corner_block.hpp +++ b/include/ddc/kernels/splines/matrix_corner_block.hpp @@ -151,7 +151,7 @@ class Matrix_Corner_Block : public Matrix [&](const int i) { // Upper diagonals in lambda for (int l = 0; l < nb_proxy; ++l) { - Kokkos::atomic_sub(&v(i, j), lambda_device(i, l) * u(l, j)); + v(i, j) -= lambda_device(i, l) * u(l, j); } }); }); @@ -177,7 +177,7 @@ class Matrix_Corner_Block : public Matrix [&](const int i) { // Upper diagonals in lambda for (int l = 0; l < k_proxy; ++l) { - Kokkos::atomic_sub(&u(i, j), lambda_device(l, i) * v(l, j)); + u(i, j) -= lambda_device(l, i) * v(l, j); } }); }); @@ -203,9 +203,7 @@ class Matrix_Corner_Block : public Matrix [&](const int i) { // Upper diagonals in lambda for (int l = 0; l < k_proxy; ++l) { - Kokkos::atomic_sub( - &u(i, j), - Abm_1_gamma_device(i, l) * v(l, j)); + u(i, j) -= Abm_1_gamma_device(i, l) * v(l, j); } }); }); @@ -231,9 +229,7 @@ class Matrix_Corner_Block : public Matrix [&](const int i) { // Upper diagonals in lambda for (int l = 0; l < nb_proxy; ++l) { - Kokkos::atomic_sub( - &v(i, j), - Abm_1_gamma_device(l, i) * u(l, j)); + v(i, j) -= Abm_1_gamma_device(l, i) * u(l, j); } }); }); diff --git a/include/ddc/kernels/splines/matrix_periodic_banded.hpp b/include/ddc/kernels/splines/matrix_periodic_banded.hpp index 7de1a8d98..a37019ef9 100644 --- a/include/ddc/kernels/splines/matrix_periodic_banded.hpp +++ b/include/ddc/kernels/splines/matrix_periodic_banded.hpp @@ -142,13 +142,12 @@ class Matrix_Periodic_Banded : public Matrix_Corner_Block [&](const int i) { /// Upper diagonals in lambda for (int l = 0; l <= i; ++l) { - Kokkos::atomic_sub(&v(i, j), lambda_device(i, l) * u(l, j)); + v(i, j) -= lambda_device(i, l) * u(l, j); } // Lower diagonals in lambda for (int l = i + 1; l < k_proxy + 1; ++l) { - Kokkos::atomic_sub( - &v(i, j), - lambda_device(i, l) * u(nb_proxy - 1 - k_proxy + l, j)); + v(i, j) -= lambda_device(i, l) + * u(nb_proxy - 1 - k_proxy + l, j); } }); }); @@ -175,13 +174,12 @@ class Matrix_Periodic_Banded : public Matrix_Corner_Block [&](const int i) { /// Upper diagonals in lambda for (int l = 0; l <= i; ++l) { - Kokkos::atomic_sub(&u(l, j), lambda_device(i, l) * v(i, j)); + u(l, j) -= lambda_device(i, l) * v(i, j); } // Lower diagonals in lambda for (int l = i + 1; l < k_proxy + 1; ++l) { - Kokkos::atomic_sub( - &u(nb_proxy - 1 - k_proxy + l, j), - lambda_device(i, l) * v(i, j)); + u(nb_proxy - 1 - k_proxy + l, j) + -= lambda_device(i, l) * v(i, j); } }); }); From cc0b104e60d0a7d2fb9f841e0d9742474258ec2c Mon Sep 17 00:00:00 2001 From: blegouix Date: Fri, 15 Mar 2024 10:42:53 +0100 Subject: [PATCH 21/28] matrix tests on GPU --- tests/splines/matrix.cpp | 180 ++++++++++++++------------------------- 1 file changed, 65 insertions(+), 115 deletions(-) diff --git a/tests/splines/matrix.cpp b/tests/splines/matrix.cpp index bcf53e8cd..96fc71fc7 100644 --- a/tests/splines/matrix.cpp +++ b/tests/splines/matrix.cpp @@ -62,6 +62,7 @@ void check_inverse_transpose(ddc::DSpan2D_stride matrix, ddc::DSpan2D_stride inv for (std::size_t j(0); j < N; ++j) { double id_val = 0.0; for (std::size_t k(0); k < N; ++k) { + std::cout << inv(j, k) << " "; id_val += matrix(i, k) * inv(j, k); } EXPECT_NEAR(id_val, static_cast(i == j), TOL); @@ -78,7 +79,7 @@ TEST_P(MatrixSizesFixture, PositiveDefiniteSymmetric) { auto const [N, k] = GetParam(); std::unique_ptr matrix = ddc::detail::MatrixMaker::make_new_banded< - Kokkos::DefaultHostExecutionSpace>(N, k, k, true); + Kokkos::DefaultExecutionSpace>(N, k, k, true); for (std::size_t i(0); i < N; ++i) { matrix->set_element(i, i, 2.0 * k); @@ -93,19 +94,35 @@ TEST_P(MatrixSizesFixture, PositiveDefiniteSymmetric) ddc::DSpan2D_left val(val_ptr.data(), N, N); copy_matrix(val, matrix); - std::vector inv_ptr(N * N); - ddc::DSpan2D_left inv(inv_ptr.data(), N, N); - fill_identity(inv); matrix->factorize(); - matrix->solve_inplace(inv); + + Kokkos::DualView inv_ptr("inv_ptr", N * N); + ddc::DSpan2D_left inv(inv_ptr.h_view.data(), N, N); + fill_identity(inv); + inv_ptr.modify_host(); + inv_ptr.sync_device(); + matrix->solve_inplace(ddc::DSpan2D_left(inv_ptr.d_view.data(), N, N)); + inv_ptr.modify_device(); + inv_ptr.sync_host(); + + Kokkos::DualView inv_tr_ptr("inv_tr_ptr", N * N); + ddc::DSpan2D_left inv_tr(inv_tr_ptr.h_view.data(), N, N); + fill_identity(inv_tr); + inv_tr_ptr.modify_host(); + inv_tr_ptr.sync_device(); + matrix->solve_transpose_inplace(ddc::DSpan2D_left(inv_tr_ptr.d_view.data(), N, N)); + inv_tr_ptr.modify_device(); + inv_tr_ptr.sync_host(); + check_inverse(val, inv); + check_inverse_transpose(val, inv_tr); } TEST_P(MatrixSizesFixture, OffsetBanded) { auto const [N, k] = GetParam(); std::unique_ptr matrix = ddc::detail::MatrixMaker::make_new_banded< - Kokkos::DefaultHostExecutionSpace>(N, 0, 2 * k, true); + Kokkos::DefaultExecutionSpace>(N, 0, 2 * k, true); for (std::size_t i(0); i < N; ++i) { for (std::size_t j(i); j < std::min(N, i + k); ++j) { @@ -118,117 +135,36 @@ TEST_P(MatrixSizesFixture, OffsetBanded) matrix->set_element(i, j, -1.0); } } - std::vector val_ptr(N * N); - ddc::DSpan2D_left val(val_ptr.data(), N, N); - copy_matrix(val, matrix); - std::vector inv_ptr(N * N); - ddc::DSpan2D_left inv(inv_ptr.data(), N, N); - fill_identity(inv); - matrix->factorize(); - matrix->solve_inplace(inv); - check_inverse(val, inv); -} - -TEST_P(MatrixSizesFixture, PeriodicBanded) -{ - auto const [N, k] = GetParam(); - - for (std::ptrdiff_t s(-k); s < (std::ptrdiff_t)k + 1; ++s) { - if (s == 0) - continue; - - std::unique_ptr matrix - = ddc::detail::MatrixMaker::make_new_periodic_banded< - Kokkos::DefaultHostExecutionSpace>(N, k - s, k + s, false); - for (std::size_t i(0); i < N; ++i) { - for (std::size_t j(0); j < N; ++j) { - std::ptrdiff_t diag = ddc::detail::modulo((int)(j - i), (int)N); - if (diag == s || diag == (std::ptrdiff_t)N + s) { - matrix->set_element(i, j, 0.5); - } else if ( - diag <= s + (std::ptrdiff_t)k - || diag >= (std::ptrdiff_t)N + s - (std::ptrdiff_t)k) { - matrix->set_element(i, j, -1.0 / k); - } - } - } - std::vector val_ptr(N * N); - ddc::DSpan2D_left val(val_ptr.data(), N, N); - copy_matrix(val, matrix); - - std::vector inv_ptr(N * N); - ddc::DSpan2D_left inv(inv_ptr.data(), N, N); - fill_identity(inv); - matrix->factorize(); - matrix->solve_inplace(inv); - check_inverse(val, inv); - } -} - -TEST_P(MatrixSizesFixture, PositiveDefiniteSymmetricTranspose) -{ - auto const [N, k] = GetParam(); - std::unique_ptr matrix = ddc::detail::MatrixMaker::make_new_banded< - Kokkos::DefaultHostExecutionSpace>(N, k, k, true); - - for (std::size_t i(0); i < N; ++i) { - matrix->set_element(i, i, 2.0 * k); - for (std::size_t j(std::max(0, int(i) - int(k))); j < i; ++j) { - matrix->set_element(i, j, -1.0); - } - for (std::size_t j(i + 1); j < std::min(N, i + k + 1); ++j) { - matrix->set_element(i, j, -1.0); - } - } std::vector val_ptr(N * N); ddc::DSpan2D_left val(val_ptr.data(), N, N); copy_matrix(val, matrix); - std::vector inv_ptr(N * N); - ddc::DSpan2D_left inv(inv_ptr.data(), N, N); - fill_identity(inv); matrix->factorize(); - for (std::size_t i(0); i < N; ++i) { - ddc::DSpan1D inv_line(inv_ptr.data() + i * N, N); - matrix->solve_transpose_inplace(inv_line); - } - check_inverse_transpose(val, inv); -} -TEST_P(MatrixSizesFixture, OffsetBandedTranspose) -{ - auto const [N, k] = GetParam(); - std::unique_ptr matrix = ddc::detail::MatrixMaker::make_new_banded< - Kokkos::DefaultHostExecutionSpace>(N, 0, 2 * k, true); + Kokkos::DualView inv_ptr("inv_ptr", N * N); + ddc::DSpan2D_left inv(inv_ptr.h_view.data(), N, N); + fill_identity(inv); + inv_ptr.modify_host(); + inv_ptr.sync_device(); + matrix->solve_inplace(ddc::DSpan2D_left(inv_ptr.d_view.data(), N, N)); + inv_ptr.modify_device(); + inv_ptr.sync_host(); - for (std::size_t i(0); i < N; ++i) { - for (std::size_t j(i); j < std::min(N, i + k); ++j) { - matrix->set_element(i, i, -1.0); - } - if (i + k < N) { - matrix->set_element(i, i + k, 2.0 * k); - } - for (std::size_t j(i + k + 1); j < std::min(N, i + k + 1); ++j) { - matrix->set_element(i, j, -1.0); - } - } - std::vector val_ptr(N * N); - ddc::DSpan2D_left val(val_ptr.data(), N, N); - copy_matrix(val, matrix); + Kokkos::DualView inv_tr_ptr("inv_tr_ptr", N * N); + ddc::DSpan2D_left inv_tr(inv_tr_ptr.h_view.data(), N, N); + fill_identity(inv_tr); + inv_tr_ptr.modify_host(); + inv_tr_ptr.sync_device(); + matrix->solve_transpose_inplace(ddc::DSpan2D_left(inv_tr_ptr.d_view.data(), N, N)); + inv_tr_ptr.modify_device(); + inv_tr_ptr.sync_host(); - std::vector inv_ptr(N * N); - ddc::DSpan2D_left inv(inv_ptr.data(), N, N); - fill_identity(inv); - matrix->factorize(); - for (std::size_t i(0); i < N; ++i) { - ddc::DSpan1D inv_line(inv_ptr.data() + i * N, N); - matrix->solve_transpose_inplace(inv_line); - } - check_inverse_transpose(val, inv); + check_inverse(val, inv); + check_inverse_transpose(val, inv_tr); } -TEST_P(MatrixSizesFixture, PeriodicBandedTranspose) +TEST_P(MatrixSizesFixture, PeriodicBanded) { auto const [N, k] = GetParam(); @@ -238,7 +174,7 @@ TEST_P(MatrixSizesFixture, PeriodicBandedTranspose) std::unique_ptr matrix = ddc::detail::MatrixMaker::make_new_periodic_banded< - Kokkos::DefaultHostExecutionSpace>(N, k - s, k + s, false); + Kokkos::DefaultExecutionSpace>(N, k - s, k + s, false); for (std::size_t i(0); i < N; ++i) { for (std::size_t j(0); j < N; ++j) { int diag = ddc::detail::modulo((int)(j - i), (int)N); @@ -251,19 +187,33 @@ TEST_P(MatrixSizesFixture, PeriodicBandedTranspose) } } } + std::vector val_ptr(N * N); ddc::DSpan2D_left val(val_ptr.data(), N, N); copy_matrix(val, matrix); - std::vector inv_ptr(N * N); - ddc::DSpan2D_left inv(inv_ptr.data(), N, N); - fill_identity(inv); matrix->factorize(); - for (std::size_t i(0); i < N; ++i) { - ddc::DSpan1D inv_line(inv_ptr.data() + i * N, N); - matrix->solve_transpose_inplace(inv_line); - } - check_inverse_transpose(val, inv); + + Kokkos::DualView inv_ptr("inv_ptr", N * N); + ddc::DSpan2D_left inv(inv_ptr.h_view.data(), N, N); + fill_identity(inv); + inv_ptr.modify_host(); + inv_ptr.sync_device(); + matrix->solve_inplace(ddc::DSpan2D_left(inv_ptr.d_view.data(), N, N)); + inv_ptr.modify_device(); + inv_ptr.sync_host(); + + Kokkos::DualView inv_tr_ptr("inv_tr_ptr", N * N); + ddc::DSpan2D_left inv_tr(inv_tr_ptr.h_view.data(), N, N); + fill_identity(inv_tr); + inv_tr_ptr.modify_host(); + inv_tr_ptr.sync_device(); + matrix->solve_transpose_inplace(ddc::DSpan2D_left(inv_tr_ptr.d_view.data(), N, N)); + inv_tr_ptr.modify_device(); + inv_tr_ptr.sync_host(); + + check_inverse(val, inv); + check_inverse_transpose(val, inv_tr); } } From 64e0f195f42ea75e795b97dd5af2c0ce5ec53380 Mon Sep 17 00:00:00 2001 From: blegouix Date: Fri, 15 Mar 2024 11:37:58 +0100 Subject: [PATCH 22/28] fix, revert the nesting of MatrixPeriodicBanded::lambda_transpose to avoid race condition after atomics removal --- .../kernels/splines/matrix_periodic_banded.hpp | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/include/ddc/kernels/splines/matrix_periodic_banded.hpp b/include/ddc/kernels/splines/matrix_periodic_banded.hpp index a37019ef9..79e90e27b 100644 --- a/include/ddc/kernels/splines/matrix_periodic_banded.hpp +++ b/include/ddc/kernels/splines/matrix_periodic_banded.hpp @@ -168,18 +168,17 @@ class Matrix_Periodic_Banded : public Matrix_Corner_Block const typename Kokkos::TeamPolicy::member_type& teamMember) { const int j = teamMember.league_rank(); - Kokkos::parallel_for( - Kokkos::TeamThreadRange(teamMember, k_proxy), + Kokkos::TeamThreadRange(teamMember, k_proxy + 1), [&](const int i) { - /// Upper diagonals in lambda - for (int l = 0; l <= i; ++l) { - u(l, j) -= lambda_device(i, l) * v(i, j); - } // Lower diagonals in lambda - for (int l = i + 1; l < k_proxy + 1; ++l) { - u(nb_proxy - 1 - k_proxy + l, j) - -= lambda_device(i, l) * v(i, j); + for (int l = 0; l < i; ++l) { + u(nb_proxy - 1 - k_proxy + i, j) + -= lambda_device(l, i) * v(l, j); + } + /// Upper diagonals in lambda + for (int l = i; l < k_proxy; ++l) { + u(i, j) -= lambda_device(l, i) * v(l, j); } }); }); From 36323c7792a1fd1d57e2b9547b993dbd5b5cedc5 Mon Sep 17 00:00:00 2001 From: blegouix Date: Fri, 15 Mar 2024 11:40:31 +0100 Subject: [PATCH 23/28] remove debug line --- tests/splines/matrix.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/splines/matrix.cpp b/tests/splines/matrix.cpp index 96fc71fc7..5e7ad97e9 100644 --- a/tests/splines/matrix.cpp +++ b/tests/splines/matrix.cpp @@ -62,7 +62,6 @@ void check_inverse_transpose(ddc::DSpan2D_stride matrix, ddc::DSpan2D_stride inv for (std::size_t j(0); j < N; ++j) { double id_val = 0.0; for (std::size_t k(0); k < N; ++k) { - std::cout << inv(j, k) << " "; id_val += matrix(i, k) * inv(j, k); } EXPECT_NEAR(id_val, static_cast(i == j), TOL); From 3afad7a0eb757eafb4a2a7d30f141ae23d791e01 Mon Sep 17 00:00:00 2001 From: blegouix Date: Fri, 29 Mar 2024 08:29:21 +0100 Subject: [PATCH 24/28] reuse headers --- include/ddc/kernels/splines/matrix_banded.hpp | 4 ++++ include/ddc/kernels/splines/matrix_center_block.hpp | 4 ++++ include/ddc/kernels/splines/matrix_corner_block.hpp | 4 ++++ include/ddc/kernels/splines/matrix_dense.hpp | 4 ++++ include/ddc/kernels/splines/matrix_pds_banded.hpp | 4 ++++ include/ddc/kernels/splines/matrix_pds_tridiag.hpp | 4 ++++ include/ddc/kernels/splines/matrix_periodic_banded.hpp | 4 ++++ 7 files changed, 28 insertions(+) diff --git a/include/ddc/kernels/splines/matrix_banded.hpp b/include/ddc/kernels/splines/matrix_banded.hpp index 7b5bd7417..44a64404b 100644 --- a/include/ddc/kernels/splines/matrix_banded.hpp +++ b/include/ddc/kernels/splines/matrix_banded.hpp @@ -1,3 +1,7 @@ +// Copyright (C) The DDC development team, see COPYRIGHT.md file +// +// SPDX-License-Identifier: MIT + #pragma once #include diff --git a/include/ddc/kernels/splines/matrix_center_block.hpp b/include/ddc/kernels/splines/matrix_center_block.hpp index 1146c1891..703dfe9c4 100644 --- a/include/ddc/kernels/splines/matrix_center_block.hpp +++ b/include/ddc/kernels/splines/matrix_center_block.hpp @@ -1,3 +1,7 @@ +// Copyright (C) The DDC development team, see COPYRIGHT.md file +// +// SPDX-License-Identifier: MIT + #pragma once #include diff --git a/include/ddc/kernels/splines/matrix_corner_block.hpp b/include/ddc/kernels/splines/matrix_corner_block.hpp index 001c18641..e9c1f0025 100644 --- a/include/ddc/kernels/splines/matrix_corner_block.hpp +++ b/include/ddc/kernels/splines/matrix_corner_block.hpp @@ -1,3 +1,7 @@ +// Copyright (C) The DDC development team, see COPYRIGHT.md file +// +// SPDX-License-Identifier: MIT + #pragma once #include diff --git a/include/ddc/kernels/splines/matrix_dense.hpp b/include/ddc/kernels/splines/matrix_dense.hpp index e6eddce73..cc789d33a 100644 --- a/include/ddc/kernels/splines/matrix_dense.hpp +++ b/include/ddc/kernels/splines/matrix_dense.hpp @@ -1,3 +1,7 @@ +// Copyright (C) The DDC development team, see COPYRIGHT.md file +// +// SPDX-License-Identifier: MIT + #pragma once #include diff --git a/include/ddc/kernels/splines/matrix_pds_banded.hpp b/include/ddc/kernels/splines/matrix_pds_banded.hpp index 679088504..f0e4d1450 100644 --- a/include/ddc/kernels/splines/matrix_pds_banded.hpp +++ b/include/ddc/kernels/splines/matrix_pds_banded.hpp @@ -1,3 +1,7 @@ +// Copyright (C) The DDC development team, see COPYRIGHT.md file +// +// SPDX-License-Identifier: MIT + #pragma once #include diff --git a/include/ddc/kernels/splines/matrix_pds_tridiag.hpp b/include/ddc/kernels/splines/matrix_pds_tridiag.hpp index c6ea87287..13f1af4e0 100644 --- a/include/ddc/kernels/splines/matrix_pds_tridiag.hpp +++ b/include/ddc/kernels/splines/matrix_pds_tridiag.hpp @@ -1,3 +1,7 @@ +// Copyright (C) The DDC development team, see COPYRIGHT.md file +// +// SPDX-License-Identifier: MIT + #pragma once #include diff --git a/include/ddc/kernels/splines/matrix_periodic_banded.hpp b/include/ddc/kernels/splines/matrix_periodic_banded.hpp index 79e90e27b..12653baa4 100644 --- a/include/ddc/kernels/splines/matrix_periodic_banded.hpp +++ b/include/ddc/kernels/splines/matrix_periodic_banded.hpp @@ -1,3 +1,7 @@ +// Copyright (C) The DDC development team, see COPYRIGHT.md file +// +// SPDX-License-Identifier: MIT + #pragma once #include From e961cff5138182f35a19581206147f7efd55a239 Mon Sep 17 00:00:00 2001 From: blegouix Date: Fri, 29 Mar 2024 08:38:24 +0100 Subject: [PATCH 25/28] reuse cmake --- cmake/3.18/CMakePushCheckState.cmake | 4 ++++ cmake/3.18/CheckFunctionExists.cmake | 4 ++++ cmake/3.18/FindLAPACK.cmake | 4 ++++ 3 files changed, 12 insertions(+) diff --git a/cmake/3.18/CMakePushCheckState.cmake b/cmake/3.18/CMakePushCheckState.cmake index 3e519ee51..d51ca6237 100644 --- a/cmake/3.18/CMakePushCheckState.cmake +++ b/cmake/3.18/CMakePushCheckState.cmake @@ -1,3 +1,7 @@ +# Copyright (C) Copyright 2000-2024 Kitware, Inc. and Contributors +# +# SPDX-License-Identifier: BSD-3-Clause + # Distributed under the OSI-approved BSD 3-Clause License. See accompanying # file Copyright.txt or https://cmake.org/licensing for details. diff --git a/cmake/3.18/CheckFunctionExists.cmake b/cmake/3.18/CheckFunctionExists.cmake index 136da89d9..8f5015377 100644 --- a/cmake/3.18/CheckFunctionExists.cmake +++ b/cmake/3.18/CheckFunctionExists.cmake @@ -1,3 +1,7 @@ +# Copyright (C) Copyright 2000-2024 Kitware, Inc. and Contributors +# +# SPDX-License-Identifier: BSD-3-Clause + # Distributed under the OSI-approved BSD 3-Clause License. See accompanying # file Copyright.txt or https://cmake.org/licensing for details. diff --git a/cmake/3.18/FindLAPACK.cmake b/cmake/3.18/FindLAPACK.cmake index 31e7de68c..e516e58e7 100644 --- a/cmake/3.18/FindLAPACK.cmake +++ b/cmake/3.18/FindLAPACK.cmake @@ -1,3 +1,7 @@ +# Copyright (C) Copyright 2000-2024 Kitware, Inc. and Contributors +# +# SPDX-License-Identifier: BSD-3-Clause + # Distributed under the OSI-approved BSD 3-Clause License. See accompanying # file Copyright.txt or https://cmake.org/licensing for details. From 8624b0fea7abf605c77baf9a96301d21fb6359bc Mon Sep 17 00:00:00 2001 From: Baptiste Legouix Date: Mon, 15 Apr 2024 14:47:39 +0200 Subject: [PATCH 26/28] Update include/ddc/kernels/splines/matrix_banded.hpp --- include/ddc/kernels/splines/matrix_banded.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/include/ddc/kernels/splines/matrix_banded.hpp b/include/ddc/kernels/splines/matrix_banded.hpp index 44a64404b..4f019511a 100644 --- a/include/ddc/kernels/splines/matrix_banded.hpp +++ b/include/ddc/kernels/splines/matrix_banded.hpp @@ -42,7 +42,6 @@ class Matrix_Banded : public Matrix int const m_ku; // no. of superdiagonals int const m_c; // no. of columns in q Kokkos::View m_ipiv; // pivot indices - // TODO: double** Kokkos::View m_q; // banded matrix representation public: From 0feb2b83ce771b902b6ad31466a76ac3242c4e0b Mon Sep 17 00:00:00 2001 From: blegouix Date: Mon, 15 Apr 2024 14:59:29 +0200 Subject: [PATCH 27/28] camel case --- include/ddc/kernels/splines/matrix_banded.hpp | 4 +-- .../kernels/splines/matrix_center_block.hpp | 24 ++++++------- .../kernels/splines/matrix_corner_block.hpp | 12 +++---- include/ddc/kernels/splines/matrix_dense.hpp | 4 +-- include/ddc/kernels/splines/matrix_maker.hpp | 34 +++++++++---------- .../ddc/kernels/splines/matrix_pds_banded.hpp | 4 +-- .../kernels/splines/matrix_pds_tridiag.hpp | 4 +-- .../splines/matrix_periodic_banded.hpp | 24 ++++++------- include/ddc/kernels/splines/matrix_sparse.hpp | 4 +-- 9 files changed, 57 insertions(+), 57 deletions(-) diff --git a/include/ddc/kernels/splines/matrix_banded.hpp b/include/ddc/kernels/splines/matrix_banded.hpp index 4f019511a..df7f105bb 100644 --- a/include/ddc/kernels/splines/matrix_banded.hpp +++ b/include/ddc/kernels/splines/matrix_banded.hpp @@ -35,7 +35,7 @@ extern "C" int dgbtrs_( int* info); template -class Matrix_Banded : public Matrix +class MatrixBanded : public Matrix { protected: int const m_kl; // no. of subdiagonals @@ -45,7 +45,7 @@ class Matrix_Banded : public Matrix Kokkos::View m_q; // banded matrix representation public: - Matrix_Banded(int const mat_size, int const kl, int const ku) + MatrixBanded(int const mat_size, int const kl, int const ku) : Matrix(mat_size) , m_kl(kl) , m_ku(ku) diff --git a/include/ddc/kernels/splines/matrix_center_block.hpp b/include/ddc/kernels/splines/matrix_center_block.hpp index 703dfe9c4..a611754dd 100644 --- a/include/ddc/kernels/splines/matrix_center_block.hpp +++ b/include/ddc/kernels/splines/matrix_center_block.hpp @@ -16,15 +16,15 @@ namespace ddc::detail { class Matrix; template -class Matrix_Center_Block : public Matrix_Corner_Block +class MatrixCenterBlock : public MatrixCornerBlock { // Necessary because we inherit from a template class, otherwise we should use this-> everywhere - using Matrix_Corner_Block::get_size; - using Matrix_Corner_Block::solve_inplace; - using Matrix_Corner_Block::m_q_block; - using Matrix_Corner_Block::m_delta; - using Matrix_Corner_Block::m_Abm_1_gamma; - using Matrix_Corner_Block::m_lambda; + using MatrixCornerBlock::get_size; + using MatrixCornerBlock::solve_inplace; + using MatrixCornerBlock::m_q_block; + using MatrixCornerBlock::m_delta; + using MatrixCornerBlock::m_Abm_1_gamma; + using MatrixCornerBlock::m_lambda; protected: int const m_top_block_size; @@ -32,12 +32,12 @@ class Matrix_Center_Block : public Matrix_Corner_Block int const m_bottom_block_index; public: - Matrix_Center_Block( + MatrixCenterBlock( int const n, int const m_top_block_size, int const m_bottom_block_size, std::unique_ptr q) - : Matrix_Corner_Block(n, m_top_block_size + m_bottom_block_size, std::move(q)) + : MatrixCornerBlock(n, m_top_block_size + m_bottom_block_size, std::move(q)) , m_top_block_size(m_top_block_size) , m_bottom_block_size(m_bottom_block_size) , m_bottom_block_index(n - m_bottom_block_size) @@ -47,19 +47,19 @@ class Matrix_Center_Block : public Matrix_Corner_Block double get_element(int i, int j) const override { adjust_indexes(i, j); - return Matrix_Corner_Block::get_element(i, j); + return MatrixCornerBlock::get_element(i, j); } void set_element(int i, int j, double aij) override { adjust_indexes(i, j); - Matrix_Corner_Block::set_element(i, j, aij); + MatrixCornerBlock::set_element(i, j, aij); } ddc::DSpan2D_stride solve_inplace(ddc::DSpan2D_stride const bx) const override { swap_array_to_corner(bx); - Matrix_Corner_Block::solve_inplace(bx); + MatrixCornerBlock::solve_inplace(bx); swap_array_to_center(bx); return bx; } diff --git a/include/ddc/kernels/splines/matrix_corner_block.hpp b/include/ddc/kernels/splines/matrix_corner_block.hpp index e9c1f0025..f31ee547a 100644 --- a/include/ddc/kernels/splines/matrix_corner_block.hpp +++ b/include/ddc/kernels/splines/matrix_corner_block.hpp @@ -19,7 +19,7 @@ namespace ddc::detail { template -class Matrix_Corner_Block : public Matrix +class MatrixCornerBlock : public Matrix { protected: int const m_k; // small block size @@ -31,17 +31,17 @@ class Matrix_Corner_Block : public Matrix // //------------------------------------- std::shared_ptr m_q_block; - std::shared_ptr> m_delta; + std::shared_ptr> m_delta; Kokkos::View m_Abm_1_gamma; Kokkos::View m_lambda; public: - Matrix_Corner_Block(int const n, int const k, std::unique_ptr q) + MatrixCornerBlock(int const n, int const k, std::unique_ptr q) : Matrix(n) , m_k(k) , m_nb(n - k) , m_q_block(std::move(q)) - , m_delta(new Matrix_Dense(k)) + , m_delta(new MatrixDense(k)) , m_Abm_1_gamma("Abm_1_gamma", m_nb, m_k) , m_lambda("lambda", m_k, m_nb) { @@ -98,7 +98,7 @@ class Matrix_Corner_Block : public Matrix } protected: - Matrix_Corner_Block( + MatrixCornerBlock( int const n, int const k, std::unique_ptr q, @@ -108,7 +108,7 @@ class Matrix_Corner_Block : public Matrix , m_k(k) , m_nb(n - k) , m_q_block(std::move(q)) - , m_delta(new Matrix_Dense(k)) + , m_delta(new MatrixDense(k)) , m_Abm_1_gamma("Abm_1_gamma", m_nb, m_k) , m_lambda("lambda", lambda_size1, lambda_size2) { diff --git a/include/ddc/kernels/splines/matrix_dense.hpp b/include/ddc/kernels/splines/matrix_dense.hpp index cc789d33a..16999d028 100644 --- a/include/ddc/kernels/splines/matrix_dense.hpp +++ b/include/ddc/kernels/splines/matrix_dense.hpp @@ -23,14 +23,14 @@ extern "C" int dgetrs_( int* info); template -class Matrix_Dense : public Matrix +class MatrixDense : public Matrix { protected: Kokkos::View m_a; Kokkos::View m_ipiv; public: - explicit Matrix_Dense(int const mat_size) + explicit MatrixDense(int const mat_size) : Matrix(mat_size) , m_a("a", mat_size, mat_size) , m_ipiv("ipiv", mat_size) diff --git a/include/ddc/kernels/splines/matrix_maker.hpp b/include/ddc/kernels/splines/matrix_maker.hpp index 2a6c72584..a901331bd 100644 --- a/include/ddc/kernels/splines/matrix_maker.hpp +++ b/include/ddc/kernels/splines/matrix_maker.hpp @@ -24,7 +24,7 @@ class MatrixMaker template static std::unique_ptr make_new_dense(int const n) { - return std::make_unique>(n); + return std::make_unique>(n); } template @@ -35,13 +35,13 @@ class MatrixMaker bool const pds) { if (kl == ku && kl == 1 && pds) { - return std::make_unique>(n); + return std::make_unique>(n); } else if (2 * kl + 1 + ku >= n) { - return std::make_unique>(n); + return std::make_unique>(n); } else if (kl == ku && pds) { - return std::make_unique>(n, kl); + return std::make_unique>(n, kl); } else { - return std::make_unique>(n, kl, ku); + return std::make_unique>(n, kl, ku); } } @@ -56,17 +56,17 @@ class MatrixMaker int const banded_size = n - border_size; std::unique_ptr block_mat; if (pds && kl == ku && kl == 1) { - block_mat = std::make_unique>(banded_size); + block_mat = std::make_unique>(banded_size); } else if ( border_size * n + border_size * (border_size + 1) + (2 * kl + 1 + ku) * banded_size >= n * n) { - return std::make_unique>(n); + return std::make_unique>(n); } else if (kl == ku && pds) { - block_mat = std::make_unique>(banded_size, kl); + block_mat = std::make_unique>(banded_size, kl); } else { - block_mat = std::make_unique>(banded_size, kl, ku); + block_mat = std::make_unique>(banded_size, kl, ku); } - return std::make_unique>(n, kl, ku, std::move(block_mat)); + return std::make_unique>(n, kl, ku, std::move(block_mat)); } template @@ -81,19 +81,19 @@ class MatrixMaker int const banded_size = n - block1_size - block2_size; std::unique_ptr block_mat; if (pds && kl == ku && kl == 1) { - block_mat = std::make_unique>(banded_size); + block_mat = std::make_unique>(banded_size); } else if (2 * kl + 1 + ku >= banded_size) { - return std::make_unique>(n); + return std::make_unique>(n); } else if (kl == ku && pds) { - block_mat = std::make_unique>(banded_size, kl); + block_mat = std::make_unique>(banded_size, kl); } else { - block_mat = std::make_unique>(banded_size, kl, ku); + block_mat = std::make_unique>(banded_size, kl, ku); } if (block2_size == 0) { return std::make_unique< - Matrix_Corner_Block>(n, block1_size, std::move(block_mat)); + MatrixCornerBlock>(n, block1_size, std::move(block_mat)); } else { - return std::make_unique>(n, block1_size, block2_size, std::move(block_mat)); } } @@ -105,7 +105,7 @@ class MatrixMaker std::optional preconditionner_max_block_size = std::nullopt) { return std::make_unique< - Matrix_Sparse>(n, cols_per_chunk, preconditionner_max_block_size); + MatrixSparse>(n, cols_per_chunk, preconditionner_max_block_size); } }; diff --git a/include/ddc/kernels/splines/matrix_pds_banded.hpp b/include/ddc/kernels/splines/matrix_pds_banded.hpp index f0e4d1450..36f244d04 100644 --- a/include/ddc/kernels/splines/matrix_pds_banded.hpp +++ b/include/ddc/kernels/splines/matrix_pds_banded.hpp @@ -32,7 +32,7 @@ extern "C" int dpbtrs_( int* info); template -class Matrix_PDS_Banded : public Matrix +class MatrixPDSBanded : public Matrix { /* * Represents a real symmetric positive definite matrix @@ -44,7 +44,7 @@ class Matrix_PDS_Banded : public Matrix m_q; // pds banded matrix representation public: - Matrix_PDS_Banded(int const mat_size, int const kd) + MatrixPDSBanded(int const mat_size, int const kd) : Matrix(mat_size) , m_kd(kd) , m_q("q", kd + 1, mat_size) diff --git a/include/ddc/kernels/splines/matrix_pds_tridiag.hpp b/include/ddc/kernels/splines/matrix_pds_tridiag.hpp index 13f1af4e0..a4cc9082b 100644 --- a/include/ddc/kernels/splines/matrix_pds_tridiag.hpp +++ b/include/ddc/kernels/splines/matrix_pds_tridiag.hpp @@ -24,7 +24,7 @@ extern "C" int dpttrs_( int* info); template -class Matrix_PDS_Tridiag : public Matrix +class MatrixPDSTridiag : public Matrix { /* * Represents a real symmetric positive definite matrix @@ -35,7 +35,7 @@ class Matrix_PDS_Tridiag : public Matrix Kokkos::View m_l; // lower diagonal public: - Matrix_PDS_Tridiag(int const mat_size) + MatrixPDSTridiag(int const mat_size) : Matrix(mat_size) , m_d("d", mat_size) , m_l("l", mat_size - 1) diff --git a/include/ddc/kernels/splines/matrix_periodic_banded.hpp b/include/ddc/kernels/splines/matrix_periodic_banded.hpp index 12653baa4..217b4069d 100644 --- a/include/ddc/kernels/splines/matrix_periodic_banded.hpp +++ b/include/ddc/kernels/splines/matrix_periodic_banded.hpp @@ -21,24 +21,24 @@ namespace ddc::detail { template -class Matrix_Periodic_Banded : public Matrix_Corner_Block +class MatrixPeriodicBanded : public MatrixCornerBlock { // Necessary because we inherit from a template class, otherwise we should use this-> everywhere - using Matrix_Corner_Block::get_size; - using Matrix_Corner_Block::m_k; - using Matrix_Corner_Block::m_nb; - using Matrix_Corner_Block::m_q_block; - using Matrix_Corner_Block::m_delta; - using Matrix_Corner_Block::m_Abm_1_gamma; - using Matrix_Corner_Block::m_lambda; + using MatrixCornerBlock::get_size; + using MatrixCornerBlock::m_k; + using MatrixCornerBlock::m_nb; + using MatrixCornerBlock::m_q_block; + using MatrixCornerBlock::m_delta; + using MatrixCornerBlock::m_Abm_1_gamma; + using MatrixCornerBlock::m_lambda; protected: int const m_kl; // no. of subdiagonals int const m_ku; // no. of superdiagonals public: - Matrix_Periodic_Banded(int const n, int const m_kl, int const m_ku, std::unique_ptr q) - : Matrix_Corner_Block( + MatrixPeriodicBanded(int const n, int const m_kl, int const m_ku, std::unique_ptr q) + : MatrixCornerBlock( n, std::max(m_kl, m_ku), std::move(q), @@ -70,7 +70,7 @@ class Matrix_Periodic_Banded : public Matrix_Corner_Block return m_lambda(i - m_nb, j - m_nb + m_k + 1); } } else { - return Matrix_Corner_Block::get_element(i, j); + return MatrixCornerBlock::get_element(i, j); } } void set_element(int const i, int j, double const aij) override @@ -97,7 +97,7 @@ class Matrix_Periodic_Banded : public Matrix_Corner_Block m_lambda(i - m_nb, j - m_nb + m_k + 1) = aij; } } else { - Matrix_Corner_Block::set_element(i, j, aij); + MatrixCornerBlock::set_element(i, j, aij); } } diff --git a/include/ddc/kernels/splines/matrix_sparse.hpp b/include/ddc/kernels/splines/matrix_sparse.hpp index dc07ceebf..f31453d6e 100644 --- a/include/ddc/kernels/splines/matrix_sparse.hpp +++ b/include/ddc/kernels/splines/matrix_sparse.hpp @@ -95,7 +95,7 @@ unsigned int default_preconditionner_max_block_size() noexcept // Matrix class for sparse storage and iterative solve template -class Matrix_Sparse : public Matrix +class MatrixSparse : public Matrix { using matrix_sparse_type = gko::matrix::Csr; #ifdef KOKKOS_ENABLE_OPENMP @@ -122,7 +122,7 @@ class Matrix_Sparse : public Matrix public: // Constructor - explicit Matrix_Sparse( + explicit MatrixSparse( const int mat_size, std::optional cols_per_chunk = std::nullopt, std::optional preconditionner_max_block_size = std::nullopt) From 3e68db61dea6ac3abb74e3b230c59be8a9ffc7d1 Mon Sep 17 00:00:00 2001 From: blegouix Date: Wed, 15 May 2024 18:37:07 +0200 Subject: [PATCH 28/28] LAPACKE --- include/ddc/kernels/splines/matrix_banded.hpp | 52 +++++++------------ include/ddc/kernels/splines/matrix_dense.hpp | 30 ++++------- .../ddc/kernels/splines/matrix_pds_banded.hpp | 32 ++++-------- .../kernels/splines/matrix_pds_tridiag.hpp | 22 ++++---- 4 files changed, 50 insertions(+), 86 deletions(-) diff --git a/include/ddc/kernels/splines/matrix_banded.hpp b/include/ddc/kernels/splines/matrix_banded.hpp index df7f105bb..f80f6c509 100644 --- a/include/ddc/kernels/splines/matrix_banded.hpp +++ b/include/ddc/kernels/splines/matrix_banded.hpp @@ -9,30 +9,11 @@ #include #include +#include + #include "matrix.hpp" namespace ddc::detail { -extern "C" int dgbtrf_( - int const* m, - int const* n, - int const* kl, - int const* ku, - double* a_b, - int const* lda_b, - int* ipiv, - int* info); -extern "C" int dgbtrs_( - char const* trans, - int const* n, - int const* kl, - int const* ku, - int const* nrhs, - double* a_b, - int const* lda_b, - int* ipiv, - double* b, - int const* ldb, - int* info); template class MatrixBanded : public Matrix @@ -93,9 +74,16 @@ class MatrixBanded : public Matrix protected: int factorize_method() override { - int info; int const n = get_size(); - dgbtrf_(&n, &n, &m_kl, &m_ku, m_q.data(), &m_c, m_ipiv.data(), &info); + int const info = LAPACKE_dgbtrf( + LAPACK_COL_MAJOR, + n, + n, + m_kl, + m_ku, + m_q.data(), + m_c, + m_ipiv.data()); return info; } int solve_inplace_method(ddc::DSpan2D_stride b, char const transpose) const override @@ -112,19 +100,19 @@ class MatrixBanded : public Matrix Kokkos::subview(b_host, Kokkos::ALL, i), Kokkos::subview(b_view, Kokkos::ALL, i)); } - int info; int const n = get_size(); - dgbtrs_(&transpose, - &n, - &m_kl, - &m_ku, - &n_equations, + int const info = LAPACKE_dgbtrs( + LAPACK_COL_MAJOR, + transpose, + n, + m_kl, + m_ku, + n_equations, m_q.data(), - &m_c, + m_c, m_ipiv.data(), b_host.data(), - &stride, - &info); + stride); for (int i = 0; i < n_equations; ++i) { Kokkos::deep_copy( Kokkos::subview(b_view, Kokkos::ALL, i), diff --git a/include/ddc/kernels/splines/matrix_dense.hpp b/include/ddc/kernels/splines/matrix_dense.hpp index 16999d028..010ebc123 100644 --- a/include/ddc/kernels/splines/matrix_dense.hpp +++ b/include/ddc/kernels/splines/matrix_dense.hpp @@ -7,20 +7,11 @@ #include #include +#include + #include "matrix.hpp" namespace ddc::detail { -extern "C" int dgetrf_(int const* m, int const* n, double* a, int const* lda, int* ipiv, int* info); -extern "C" int dgetrs_( - char const* trans, - int const* n, - int const* nrhs, - double* a, - int const* lda, - int* ipiv, - double* b, - int const* ldb, - int* info); template class MatrixDense : public Matrix @@ -55,9 +46,8 @@ class MatrixDense : public Matrix private: int factorize_method() override { - int info; int const n = get_size(); - dgetrf_(&n, &n, m_a.data(), &n, m_ipiv.data(), &info); + int const info = LAPACKE_dgetrf(LAPACK_COL_MAJOR, n, n, m_a.data(), n, m_ipiv.data()); return info; } @@ -75,17 +65,17 @@ class MatrixDense : public Matrix Kokkos::subview(b_host, Kokkos::ALL, i), Kokkos::subview(b_view, Kokkos::ALL, i)); } - int info; int const n = get_size(); - dgetrs_(&transpose, - &n, - &n_equations, + int const info = LAPACKE_dgetrs( + LAPACK_COL_MAJOR, + transpose, + n, + n_equations, m_a.data(), - &n, + n, m_ipiv.data(), b_host.data(), - &stride, - &info); + stride); for (int i = 0; i < n_equations; ++i) { Kokkos::deep_copy( Kokkos::subview(b_view, Kokkos::ALL, i), diff --git a/include/ddc/kernels/splines/matrix_pds_banded.hpp b/include/ddc/kernels/splines/matrix_pds_banded.hpp index 36f244d04..15957544f 100644 --- a/include/ddc/kernels/splines/matrix_pds_banded.hpp +++ b/include/ddc/kernels/splines/matrix_pds_banded.hpp @@ -13,23 +13,6 @@ #include "matrix.hpp" namespace ddc::detail { -extern "C" int dpbtrf_( - char const* uplo, - int const* n, - int const* kd, - double* ab, - int const* ldab, - int* info); -extern "C" int dpbtrs_( - char const* uplo, - int const* n, - int const* kd, - int const* nrhs, - double const* ab, - int const* ldab, - double* b, - int const* ldb, - int* info); template class MatrixPDSBanded : public Matrix @@ -95,11 +78,10 @@ class MatrixPDSBanded : public Matrix protected: int factorize_method() override { - int info; char const uplo = 'L'; int const n = get_size(); int const ldab = m_kd + 1; - dpbtrf_(&uplo, &n, &m_kd, m_q.data(), &ldab, &info); + int const info = LAPACKE_dpbtrf(LAPACK_COL_MAJOR, uplo, n, m_kd, m_q.data(), ldab); return info; } @@ -117,11 +99,19 @@ class MatrixPDSBanded : public Matrix Kokkos::subview(b_host, Kokkos::ALL, i), Kokkos::subview(b_view, Kokkos::ALL, i)); } - int info; char const uplo = 'L'; int const n = get_size(); int const ldab = m_kd + 1; - dpbtrs_(&uplo, &n, &m_kd, &n_equations, m_q.data(), &ldab, b_host.data(), &stride, &info); + int const info = LAPACKE_dpbtrs( + LAPACK_COL_MAJOR, + uplo, + n, + m_kd, + n_equations, + m_q.data(), + ldab, + b_host.data(), + stride); for (int i = 0; i < n_equations; ++i) { Kokkos::deep_copy( Kokkos::subview(b_view, Kokkos::ALL, i), diff --git a/include/ddc/kernels/splines/matrix_pds_tridiag.hpp b/include/ddc/kernels/splines/matrix_pds_tridiag.hpp index a4cc9082b..3c9c641e7 100644 --- a/include/ddc/kernels/splines/matrix_pds_tridiag.hpp +++ b/include/ddc/kernels/splines/matrix_pds_tridiag.hpp @@ -13,15 +13,6 @@ #include "matrix.hpp" namespace ddc::detail { -extern "C" int dpttrf_(int const* n, double* d, double* e, int* info); -extern "C" int dpttrs_( - int const* n, - int const* nrhs, - double* d, - double* e, - double* b, - int const* ldb, - int* info); template class MatrixPDSTridiag : public Matrix @@ -82,9 +73,8 @@ class MatrixPDSTridiag : public Matrix protected: int factorize_method() override { - int info; int const n = get_size(); - dpttrf_(&n, m_d.data(), m_l.data(), &info); + int const info = LAPACKE_dpttrf(n, m_d.data(), m_l.data()); return info; } @@ -102,9 +92,15 @@ class MatrixPDSTridiag : public Matrix Kokkos::subview(b_host, Kokkos::ALL, i), Kokkos::subview(b_view, Kokkos::ALL, i)); } - int info; int const n = get_size(); - dpttrs_(&n, &n_equations, m_d.data(), m_l.data(), b_host.data(), &stride, &info); + int const info = LAPACKE_dpttrs( + LAPACK_COL_MAJOR, + n, + n_equations, + m_d.data(), + m_l.data(), + b_host.data(), + stride); for (int i = 0; i < n_equations; ++i) { Kokkos::deep_copy( Kokkos::subview(b_view, Kokkos::ALL, i),