From e5742611cfbc447f56bd53dd71311d2ecb88c38e Mon Sep 17 00:00:00 2001 From: Hossein Moein Date: Sat, 14 Dec 2024 09:55:12 -0500 Subject: [PATCH] Implmented svd() in Matrix --- docs/HTML/Matrix.html | 279 +++++++++++++----- include/DataFrame/DataFrame.h | 28 ++ include/DataFrame/DataFrameTypes.h | 22 ++ include/DataFrame/Utils/Matrix.h | 45 ++- include/DataFrame/Utils/Matrix.tcc | 448 +++++++++++++++++++++++++++++ test/matrix_tester.cc | 159 +++++----- 6 files changed, 841 insertions(+), 140 deletions(-) diff --git a/docs/HTML/Matrix.html b/docs/HTML/Matrix.html index 71da2c9b..09a66a67 100644 --- a/docs/HTML/Matrix.html +++ b/docs/HTML/Matrix.html @@ -39,20 +39,23 @@ - + + @@ -61,12 +64,71 @@ T: Matrix data type
MO: Matrix memory orientation + + +
Signature DescriptionSignature Description Public Member Functions

 enum class  matrix_orient : unsigned char  {
 
-    column_major = 1,  // Data is laid out column by column
-    row_major = 2,     // Data is laid out row by row
+    column_major = 1, // Data is laid out col by col
+    row_major = 2,    // Data is laid out row by row
 };
 
 // -----------------------
 
-template<typename T, matrix_orient MO = matrix_orient::column_major>
+template<typename T,
+         matrix_orient MO =
+             matrix_orient::column_major>
 class   Matrix;
 		
+
Matrix() = default;
+Matrix(size_type rows,
+       size_type cols,
+       const_reference def_v = T());
+Matrix(const Matrix &) = default;
+Matrix(Matrix &&) = default;
+~Matrix() = default;
+Matrix &operator = (const Matrix &) = default;
+Matrix &operator = (Matrix &&) = default;
+
+void clear() noexcept;
+void swap(Matrix &rhs) noexcept;
+bool empty() const noexcept;
+void reserve(size_type rows, size_type cols);
+
+size_type rows() const noexcept;
+size_type cols() const noexcept;
+
+static constexpr matrix_orient orientation();
+
+void resize(size_type rows,
+            size_type cols,
+            const_reference def_v = T());
+
+reference at(size_type r, size_type c);
+const_reference at(size_type r, size_type c) const;
+reference operator() (size_type r, size_type c);
+const_reference operator() (size_type r, size_type c) const;
+
+template<typename I>
+void set_column(I col_data, size_type col);
+
+template<typename I>
+void set_row(I row_data, size_type row);
+
+bool is_square() const noexcept;
+bool is_symmetric() const noexcept;
+
+trans_result_t transpose() const noexcept;
+Matrix transpose2() const noexcept;
+
+Matrix inverse() const;
+
+Matrix covariance(bool is_unbiased = true) const;
+
+template<typename MA1, typename MA2>
+void eigen_space(MA1 &eigenvalues,
+                 MA2 &eigenvectors,
+                 bool sort_values) const;
+
+template<typename MA1, typename MA2, typename MA3>
+void svd(MA1 &U,
+         MA2 &S,
+         MA3 &V,
+         bool full_size = true) const;
+
using namespace hmdf;
 
-// -----------------------------------------------------------------------------
+// ----------------------------------------------------------------------------
 
 using row_mat_t = Matrix<std::size_t, matrix_orient::row_major>;
 using col_mat_t = Matrix<std::size_t, matrix_orient::column_major>;
@@ -74,7 +136,7 @@
 static constexpr long   ROWS = 5;
 static constexpr long   COLS = 6;
 
-// -----------------------------------------------------------------------------
+// ----------------------------------------------------------------------------
 
 int main(int, char *[]) {
 
@@ -98,7 +160,7 @@
     assert((row_matb(1, 1) == col_matb(1, 1) && row_matb(1, 1) == 81));
     assert((row_matb(2, 1) == col_matb(2, 1) && row_matb(2, 1) == 126));
     assert((row_matb(2, 2) == col_matb(2, 2) && row_matb(2, 2) == 150));
-    
+
     row_mat_t   row_mat { ROWS, COLS };
     col_mat_t   col_mat { ROWS, COLS };
 
@@ -155,8 +217,7 @@
     assert(empty_mat.empty());
     assert(empty_mat.rows() == 0);
     assert(empty_mat.cols() == 0);
-    for (auto citer = empty_mat.row_cbegin();
-             citer != empty_mat.row_cend(); ++citer)
+    for (auto citer = empty_mat.row_cbegin(); citer != empty_mat.row_cend(); ++citer)
         assert(*citer == value++);
 
     auto    col_iter1 = col_mat.col_begin();
@@ -188,93 +249,95 @@
         for (long c = 0; c < tran_mat.cols(); ++c)
             assert(tran_mat(r, c) == col_mat(c, r));
 
-    //
     // Test arithmetic functions
     //
+    {
+        auto    sum_mat = col_mat + row_mat;
 
-    auto    sum_mat = col_mat + row_mat;
-
-    assert(sum_mat(0, 0) == 0);
-    assert(sum_mat(4, 5) == 58);
-    assert(sum_mat(1, 1) == 13);
-    assert(sum_mat(3, 4) == 45);
+        assert(sum_mat(0, 0) == 0);
+        assert(sum_mat(4, 5) == 58);
+        assert(sum_mat(1, 1) == 13);
+        assert(sum_mat(3, 4) == 45);
 
-    sum_mat += col_mat;
-    assert(sum_mat(0, 0) == 0);
-    assert(sum_mat(4, 5) == 87);
-    assert(sum_mat(1, 1) == 19);
-    assert(sum_mat(3, 4) == 68);
+        sum_mat += col_mat;
+        assert(sum_mat(0, 0) == 0);
+        assert(sum_mat(4, 5) == 87);
+        assert(sum_mat(1, 1) == 19);
+        assert(sum_mat(3, 4) == 68);
 
-    row_mat_t   lhs_mat { ROWS, COLS };
-    col_mat_t   rhs_mat { COLS, COLS };
+        row_mat_t   lhs_mat { ROWS, COLS };
+        col_mat_t   rhs_mat { COLS, COLS };
 
-    value = 0;
-    for (long r = 0; r < lhs_mat.rows(); ++r)
-        for (long c = 0; c < lhs_mat.cols(); ++c)
-            lhs_mat(r, c) = value++;
-    value = 0;
-    for (long c = 0; c < rhs_mat.cols(); ++c)
-        for (long r = 0; r < rhs_mat.rows(); ++r)
-            rhs_mat(r, c) = value++;
+        value = 0;
+        for (long r = 0; r < lhs_mat.rows(); ++r)
+            for (long c = 0; c < lhs_mat.cols(); ++c)
+                lhs_mat(r, c) = value++;
+        value = 0;
+        for (long c = 0; c < rhs_mat.cols(); ++c)
+            for (long r = 0; r < rhs_mat.rows(); ++r)
+                rhs_mat(r, c) = value++;
 
-    auto    multi_mat = lhs_mat * rhs_mat;
+        auto    multi_mat = lhs_mat * rhs_mat;
 
-    assert(multi_mat(0, 0) == 55);
-    assert(multi_mat(4, 5) == 5185);
-    assert(multi_mat(1, 1) == 451);
-    assert(multi_mat(3, 4) == 3277);
+        assert(multi_mat(0, 0) == 55);
+        assert(multi_mat(4, 5) == 5185);
+        assert(multi_mat(1, 1) == 451);
+        assert(multi_mat(3, 4) == 3277);
 
-    col_mat_t   big_lhs_mat { 100, 100 };
-    col_mat_t   big_rhs_mat { 100, 100 };
+        col_mat_t   big_lhs_mat { 100, 100 };
+        col_mat_t   big_rhs_mat { 100, 100 };
 
-    for (long c = 0; c < 100; ++c)
-        for (long r = 0; r < 100; ++r)  {
-            big_lhs_mat(r, c) = c + 1;
-            big_rhs_mat(r, c) = c + 1;
-        }
+        for (long c = 0; c < 100; ++c)
+            for (long r = 0; r < 100; ++r)  {
+                big_lhs_mat(r, c) = c + 1;
+                big_rhs_mat(r, c) = c + 1;
+            }
 
-    auto    big_multi_mat = big_lhs_mat * big_rhs_mat;
+        auto    big_multi_mat = big_lhs_mat * big_rhs_mat;
 
-    assert(big_multi_mat(0, 0) == 5050);
-    assert(big_multi_mat(99, 99) == 505000);
-    assert(big_multi_mat(98, 2) == 15150);
-    assert(big_multi_mat(2, 5) == 30300);
+        assert(big_multi_mat(0, 0) == 5050);
+        assert(big_multi_mat(99, 99) == 505000);
+        assert(big_multi_mat(98, 2) == 15150);
+        assert(big_multi_mat(2, 5) == 30300);
+    }
 
+    // Test Inverse
     //
-    // Test inverse
-    //
+    {
+        using row_dmat_t = Matrix<double, matrix_orient::row_major>;
 
-    using row_dmat_t = Matrix<double, matrix_orient::row_major>;
+        row_dmat_t  mat2 { 3, 3 };
 
-    row_dmat_t  mat2 { 3, 3 };
+        mat2(0, 0) = 2.0;
+        mat2(0, 1) = 3.0;
+        mat2(0, 2) = 2.0;
 
-    mat2(0, 0) = 2.0;
-    mat2(0, 1) = 3.0;
-    mat2(0, 2) = 2.0;
+        mat2(1, 0) = 3.0;
+        mat2(1, 1) = 2.0;
+        mat2(1, 2) = 3.0;
 
-    mat2(1, 0) = 3.0;
-    mat2(1, 1) = 2.0;
-    mat2(1, 2) = 3.0;
+        mat2(2, 0) = 4.0;
+        mat2(2, 1) = 2.0;
+        mat2(2, 2) = 2.0;
 
-    mat2(2, 0) = 4.0;
-    mat2(2, 1) = 2.0;
-    mat2(2, 2) = 2.0;
+        row_dmat_t  mat2_inv = mat2.inverse();
+        auto        mat3 = mat2 * mat2_inv;
 
-    row_dmat_t  mat2_inv = mat2.inverse();
-    auto        mat3 = mat2 * mat2_inv;
+        // It must result to identity matrix
+        //
+        assert((std::fabs(mat3(0, 0) - 1.0) < 0.00000001));
+        assert((std::fabs(mat3(1, 1) - 1.0) < 0.00000001));
+        assert((std::fabs(mat3(2, 2) - 1.0) < 0.00000001));
+        assert((std::fabs(mat3(0, 1) - 0.0) < 0.00000001));
+        assert((std::fabs(mat3(0, 2) - 0.0) < 0.00000001));
+        assert((std::fabs(mat3(1, 0) - 0.0) < 0.00000001));
+        assert((std::fabs(mat3(1, 2) - 0.0) < 0.00000001));
+        assert((std::fabs(mat3(2, 0) - 0.0) < 0.00000001));
+        assert((std::fabs(mat3(2, 1) - 0.0) < 0.00000001));
+    }
 
-    // It must result to identity matrix
+    // Test Eigen space
     //
-    assert((std::fabs(mat3(0, 0) - 1.0) < 0.00000001));
-    assert((std::fabs(mat3(1, 1) - 1.0) < 0.00000001));
-    assert((std::fabs(mat3(2, 2) - 1.0) < 0.00000001));
-    assert((std::fabs(mat3(0, 1) - 0.0) < 0.00000001));
-    assert((std::fabs(mat3(0, 2) - 0.0) < 0.00000001));
-    assert((std::fabs(mat3(1, 0) - 0.0) < 0.00000001));
-    assert((std::fabs(mat3(1, 2) - 0.0) < 0.00000001));
-    assert((std::fabs(mat3(2, 0) - 0.0) < 0.00000001));
-    assert((std::fabs(mat3(2, 1) - 0.0) < 0.00000001));
-
     {
         using col_dmat_t = Matrix<double, matrix_orient::column_major>;
 
@@ -331,6 +394,74 @@
         assert((std::fabs(eigenvecs(9, 9) - -0.51616) < 0.00001));
     }
 
+    // Test Covariance matrix
+    //
+    {
+        using col_dmat_t = Matrix<double, matrix_orient::column_major>;
+
+        col_dmat_t  col_mat { 5, 4 };
+
+        col_mat(0, 0) = 4.0;
+        col_mat(0, 1) = 2.0;
+        col_mat(0, 2) = 0.6;
+        col_mat(0, 3) = 3.0;
+
+        col_mat(1, 0) = 4.2;
+        col_mat(1, 1) = 2.1;
+        col_mat(1, 2) = 0.59;
+        col_mat(1, 3) = 3.2;
+
+        col_mat(2, 0) = 3.9;
+        col_mat(2, 1) = 2.0;
+        col_mat(2, 2) = 0.58;
+        col_mat(2, 3) = 2.89;
+
+        col_mat(3, 0) = 4.3;
+        col_mat(3, 1) = 2.1;
+        col_mat(3, 2) = 0.62;
+        col_mat(3, 3) = 3.298;
+
+        col_mat(4, 0) = 4.1;
+        col_mat(4, 1) = 2.2;
+        col_mat(4, 2) = 0.63;
+        col_mat(4, 3) = 3.098;
+
+        const auto  cov = col_mat.covariance(true);
+
+        assert(cov.cols() == 4);
+        assert(cov.rows() == 4);
+        assert((std::fabs(cov(0, 0) - 0.025) < 0.001));
+        assert((std::fabs(cov(0, 3) - 0.0254) < 0.0001));
+        assert((std::fabs(cov(2, 3) - 0.001789) < 0.000001));
+        assert((std::fabs(cov(3, 1) - 0.00763) < 0.00001));
+        assert((std::fabs(cov(3, 3) - 0.0258172) < 0.0000001));
+    }
+
+    // Test SVD decomposition
+    //
+    {
+        using col_dmat_t = Matrix<double, matrix_orient::column_major>;
+
+        col_dmat_t  col_mat { 8, 4 };
+        std::size_t value { 0 };
+
+        for (long r = 0; r < col_mat.rows(); ++r)
+            for (long c = 0; c < col_mat.cols(); ++c)
+                col_mat(r, c) = double(++value);
+
+        col_dmat_t  U;
+        col_dmat_t  S;
+        col_dmat_t  V;
+
+        col_mat.svd (U, S, V);
+
+        const auto  col_mat2 = U * S * V.transpose();
+
+        for (long r = 0; r < col_mat2.rows(); ++r)
+            for (long c = 0; c < col_mat2.cols(); ++c)
+                assert((std::fabs(col_mat(r, c) - col_mat2(r, c)) < 0.0001));
+    }
+
     return (0);
 }
 
diff --git a/include/DataFrame/DataFrame.h b/include/DataFrame/DataFrame.h index b5c0619b..685f18f9 100644 --- a/include/DataFrame/DataFrame.h +++ b/include/DataFrame/DataFrame.h @@ -3759,6 +3759,34 @@ class DataFrame : public ThreadGranularity { std::vector &&col_names, normalization_type norm_type = normalization_type::none) const; + + + + + + + + + + + // Principal Component Analysis (PCA) + // + template + EigenSpace + prin_comp_analysis(std::vector &&col_names, + const PCAParams params = { }) const; + + + + + + + + + + + + // This function returns a DataFrame indexed by std::string that provides // a few statistics about the columns of the calling DataFrame. // The statistics are: diff --git a/include/DataFrame/DataFrameTypes.h b/include/DataFrame/DataFrameTypes.h index e284e6b8..8c4b9b93 100644 --- a/include/DataFrame/DataFrameTypes.h +++ b/include/DataFrame/DataFrameTypes.h @@ -717,6 +717,28 @@ struct StationaryTestParams { // ---------------------------------------------------------------------------- +enum class pca_method : unsigned char { + + eigen = 1, // Eigen decomposition of the covariance matrix + svd = 2, // Singular Value Decomposition of the data matrix +}; + +struct PCAParams { + + pca_method method { pca_method::eigen }; + normalization_type norm_type { normalization_type::z_score }; + + // if populated, number of eigen components kept. + // + std::size_t num_comp_kept { 0 }; + + // if populated, percentage of eigen components kept -- 0.9 means 90%. + // + double pct_comp_kept { 0.9 }; +}; + +// ---------------------------------------------------------------------------- + template struct RandGenParams { diff --git a/include/DataFrame/Utils/Matrix.h b/include/DataFrame/Utils/Matrix.h index bb805af2..78645b29 100644 --- a/include/DataFrame/Utils/Matrix.h +++ b/include/DataFrame/Utils/Matrix.h @@ -137,7 +137,7 @@ class Matrix { // Matrix covariance(bool is_unbiased = true) const; - + // Let A be an nXn matrix. The number l is an eigenvalue of A if there // exists a non-zero vector v such that // Av = lv @@ -209,6 +209,40 @@ class Matrix { MA2 &eigenvectors, bool sort_values) const; + // In linear algebra, the Singular Value Decomposition (SVD) is an + // important factorization of a rectangular real or complex matrix, + // with several applications in signal processing and statistics. + // Applications which employ the SVD include computing the + // pseudoinverse, matrix approximation, and determining the rank, + // range and null space of a matrix. + // + // Suppose M is an mXn matrix whose entries come from the field K, + // which is either the field of real numbers or the field of complex + // numbers. Then there exists a factorization of the form + // M = U*Σ*~V + // + // where U is an mXm unitary matrix over K, the matrix Σ is mXn + // with nonnegative numbers on the diagonal (as defined for a + // rectangular matrix) and zeros off the diagonal, and ~V denotes the + // conjugate transpose of V (transpose of V in case of real matrices), + // an nXn unitary matrix over K. Such a factorization is called a + // Singular Value Decomposition of M. + // + // -- The matrix V thus contains a set of orthonormal "input" or + // "analysing" basis vector directions for M + // -- The matrix U contains a set of orthonormal "output" basis vector + // directions for M + // -- The matrix Σ contains the singular values, which can be thought + // of as scalar "gain controls" by which each corresponding input + // is multiplied to give a corresponding output. + // + // A common convention is to order the values Σi,i in non-increasing + // fashion. In this case, the diagonal matrix Σ is uniquely determined + // by M (though the matrices U and V are not). + // + template + inline void svd(MA1 &U, MA2 &S, MA3 &V, bool full_size = true) const; + private: static constexpr size_type NOPOS_ = static_cast(-9); @@ -1453,6 +1487,15 @@ operator * (const Matrix &lhs, const Matrix &rhs) { return (result); } +// ---------------------------------------------------------------------------- + +template +struct EigenSpace { + + Matrix eigen_vals { }; + Matrix eigen_vecs { }; +}; + } // namespace hmdf // ---------------------------------------------------------------------------- diff --git a/include/DataFrame/Utils/Matrix.tcc b/include/DataFrame/Utils/Matrix.tcc index 4b31667c..1d31ee90 100644 --- a/include/DataFrame/Utils/Matrix.tcc +++ b/include/DataFrame/Utils/Matrix.tcc @@ -1262,6 +1262,454 @@ covariance(bool is_unbiased) const { return (result); } +// ---------------------------------------------------------------------------- + +template +template +void Matrix:: +svd(MA1 &U, MA2 &S, MA3 &V, bool full_size) const { + + const size_type min_dem = std::min(rows(), cols()); + + if (min_dem < 3) + throw DataFrameError("Matrix::svd(): MAtrix is too small"); + + Matrix self_tmp = *this; + MA1 u_tmp(rows(), min_dem); + std::vector s_tmp(std::min(rows() + 1, cols())); + MA3 v_tmp(cols(), cols()); + Matrix imagi(1, cols()); // Imaginary part + std::vector sandbox(rows()); + const size_type min_col_cnt { std::min(rows() - 1, cols()) }; + const size_type max_row_cnt { + std::max(0L, std::min(cols() - 2, rows())) }; + + // Reduce A to bidiagonal form, storing the diagonal elements + // in s and the super-diagonal elements in e. + // + for (size_type c = 0; c < std::max(min_col_cnt, max_row_cnt); ++c) { + if (c < min_col_cnt) { + // Compute the transformation for the k-th column and + // place the k-th diagonal in S (0, c). + // Compute 2-norm of k-th column without under / overflow. + // + s_tmp[c] = 0; + for (size_type r = c; r < rows(); ++r) + s_tmp [c] = hypot(s_tmp[c], self_tmp(r, c)); + + if (s_tmp [c] != value_type(0)) { + if (self_tmp(c, c) < value_type(0)) + s_tmp[c] = -s_tmp[c]; + + for (size_type r = c; r < rows(); ++r) + self_tmp(r, c) /= s_tmp[c]; + + self_tmp(c, c) += 1; + } + s_tmp[c] = -s_tmp[c]; + } + for (size_type cc = c + 1; cc < cols(); ++cc) { + if (c < min_col_cnt && s_tmp [c] != value_type(0)) { + // Apply the transformation. + // + value_type t { 0 }; + + for (size_type r = c; r < rows(); ++r) + t += self_tmp(r, c) * self_tmp(r, cc); + + t /= -self_tmp(c, c); + for (size_type r = c; r < rows(); ++r) + self_tmp(r, cc) += t * self_tmp(r, c); + + } + + // Place the k-th row of A into e for the + // subsequent calculation of the row transformation. + // + imagi(0, cc) = self_tmp(c, cc); + } + if (c < min_col_cnt) + // Place the transformation in U for subsequent back + // multiplication. + // + for (size_type r = c; r < rows(); ++r) + u_tmp(r, c) = self_tmp(r, c); + + if (c < max_row_cnt) { + + // Compute the k-th row transformation and place the + // k-th super-diagonal in imagi (0, c). + // Compute 2-norm without under / overflow. + // + imagi(0, c) = 0; + for (size_type cc = c + 1; cc < cols(); ++cc) + imagi(0, c) = hypot(imagi(0, c), imagi(0, cc)); + + if (imagi(0, c) != value_type(0)) { + if (imagi(0, c + 1) < value_type(0)) + imagi(0, c) = -imagi(0, c); + + for (size_type cc = c + 1; cc < cols(); ++cc) + imagi(0, cc) /= imagi(0, c); + + imagi(0, c + 1) += value_type(1); + } + imagi(0, c) = -imagi(0, c); + + if (c + 1 < rows() && imagi(0, c) != value_type(0)) { + + // Apply the transformation. + // + for (size_type r = c + 1; r < rows(); ++r) + sandbox[r] = 0; + + for (size_type cc = c + 1; cc < cols(); ++cc) + for (size_type r = c + 1; r < rows(); ++r) + sandbox[r] += imagi(0, cc) * self_tmp(r, cc); + + for (size_type cc = c + 1; cc < cols(); ++cc) { + const value_type t { -imagi(0, cc) / imagi(0, c + 1) }; + + for (size_type r = c + 1; r < rows(); ++r) + self_tmp(r, cc) += t * sandbox[r]; + } + } + + // Place the transformation in V for subsequent + // back multiplication. + // + for (size_type cc = c + 1; cc < cols(); ++cc) + v_tmp(cc, c) = imagi(0, cc); + } + } + + // Set up the final bidiagonal matrix of order p. + // + size_type p { std::min(cols(), rows() + 1) }; + + if (min_col_cnt < cols()) + s_tmp[min_col_cnt] = self_tmp(min_col_cnt, min_col_cnt); + + if (rows() < p) + s_tmp[p - 1] = 0; + + if ((max_row_cnt + 1) < p) + imagi(0, max_row_cnt) = self_tmp(max_row_cnt, p - 1); + + imagi(0, p - 1) = 0; + + for (size_type c = min_col_cnt; c < min_dem; ++c) { + for (size_type r = 0; r < rows(); ++r) + u_tmp(r, c) = 0; + u_tmp(c, c) = 1; + } + + for (size_type c = min_col_cnt - 1; c >= 0; --c) { + if (s_tmp[c] != value_type(0)) { + for (size_type cc = c + 1; cc < min_dem; ++cc) { + value_type t { 0 }; + + for (size_type r = c; r < rows(); ++r) + t += u_tmp(r, c) * u_tmp(r, cc); + + t /= -u_tmp(c, c); + for (size_type r = c; r < rows(); ++r) + u_tmp(r, cc) += t * u_tmp(r, c); + } + for (size_type r = c; r < rows(); ++r ) + u_tmp(r, c) = -u_tmp(r, c); + + u_tmp(c, c) += value_type(1); + for (size_type r = 0; r < c - 1; ++r) + u_tmp (r, c) = 0; + } + else { + for (size_type r = 0; r < rows(); ++r) + u_tmp(r, c) = 0; + u_tmp(c, c) = 1; + } + } + + for (size_type c = cols() - 1; c >= 0; --c) { + if ((c < max_row_cnt) && (imagi(0, c) != value_type(0))) + for (size_type cc = c + 1; cc < min_dem; ++cc) { + value_type t { 0 }; + + for (size_type r = c + 1; r < cols(); ++r) + t += v_tmp(r, c) * v_tmp(r, cc); + + t /= -v_tmp(c + 1, c); + for (size_type r = c + 1; r < cols(); ++r) + v_tmp(r, cc) += t * v_tmp(r, c); + } + + for (size_type r = 0; r < cols(); ++r) + v_tmp(r, c) = 0; + v_tmp(c, c) = 1; + } + + const size_type pp { p - 1 }; + + // Main iteration loop for the singular values. + // + while (p > value_type(0)) { + size_type c; + size_type kase; + + // Here is where a test for too many iterations would go. + // + // This section of the routine inspects for + // negligible elements in the s and imagi arrays. On + // completion the variables kase and c are set as follows. + // + // case == 1 --> if s (p) and imagi (0, c - 1) are negligible and k < p + // case == 2 --> if s (c) is negligible and c < p + // case == 3 --> if imagi (0, c - 1) is negligible, c < p, and + // s (c), ..., s (p) are not negligible (qr step). + // case == 4 --> if e (p - 1) is negligible (convergence). + // + for (c = p - 2; c >= -1; --c) { + if (c == -1) + break; + + if (std::fabs(imagi (0, c)) <= + EPSILON_ * (std::fabs(s_tmp[c]) + + std::fabs(s_tmp[c + 1]))) { + imagi(0, c) = 0; + break; + } + } + if (c == p - 2) + kase = 4; + + else { + size_type ks; + + for (ks = p - 1; ks >= c; --ks) { + if (ks == c) + break; + + const value_type t { + ks != p ? std::fabs(imagi (0, ks)) : value_type(0) + + ks != c + value_type(1) + ? std::fabs(imagi(0, ks - 1)) + : value_type(0) }; + + if (std::fabs(s_tmp[ks]) <= (EPSILON_ * t)) { + s_tmp[ks] = 0; + break; + } + } + if (ks == c) + kase = 3; + else if (ks == p - 1) + kase = 1; + else { + kase = 2; + c = ks; + } + } + c += 1; + + // Perform the task indicated by kase. + // + switch (kase) { + // Deflate negligible s (p). + // + case 1: + { + value_type f { imagi(0, p - 2) }; + + imagi(0, p - 2) = 0; + for (size_type cc = p - 2; cc >= c; --cc) { + value_type t { hypot(s_tmp[cc], f) }; + value_type cs { s_tmp[cc] / t }; + value_type sn { f / t }; + + s_tmp [cc] = t; + if (cc != c) { + f = -sn * imagi(0, cc - 1); + imagi(0, cc - 1) *= cs; + } + + for (size_type r = 0; r < cols(); ++r) { + t = cs * v_tmp(r, cc) + sn * v_tmp(r, p - 1); + v_tmp(r, p - 1) = + -sn * v_tmp(r, cc) + cs * v_tmp(r, p - 1); + v_tmp(r, cc) = t; + } + } + } + break; + + // Split at negligible s (c). + // + case 2: + { + value_type f { imagi(0, c - 1) }; + + imagi (0, c - 1) = value_type(0.0); + for (size_type cc = c; cc < p; ++cc) { + value_type t { hypot(s_tmp[cc], f) }; + value_type cs { s_tmp[cc] / t }; + value_type sn { f / t }; + + s_tmp[cc] = t; + f = -sn * imagi(0, cc); + imagi(0, cc) *= cs; + + for (size_type r = 0; r < rows(); ++r) { + t = cs * u_tmp(r, cc) + sn * u_tmp(r, c - 1); + u_tmp(r, c - 1) = + -sn * u_tmp(r, cc) + cs * u_tmp(r, c - 1); + u_tmp(r, cc) = t; + } + } + } + break; + + // Perform one qr step. + // + case 3: + { + + // Calculate the shift. + // + const value_type scale { + std::max ( + std::max ( + std::max ( + std::max (std::fabs(s_tmp [p - 1]), + std::fabs(s_tmp [p - 2])), + std::fabs(imagi(0, p - 2))), + std::fabs(s_tmp[c])), + std::fabs(imagi(0, c))) }; + const value_type sp { s_tmp[p - 1] / scale }; + const value_type spm1 { s_tmp[p - 2] / scale }; + const value_type epm1 { imagi(0, p - 2) / scale }; + const value_type sk { s_tmp[c] / scale }; + const value_type ek { imagi(0, c) / scale }; + const value_type b { + ((spm1 + sp) * (spm1 - sp) + epm1 * epm1) / value_type(2) }; + const value_type dd { (sp * epm1) * (sp * epm1) }; + value_type shift { 0 }; + + if (b != value_type(0) || dd != value_type(0)) { + shift = b < value_type(0) + ? -std::sqrt(b * b + dd) + : std::sqrt(b * b + dd); + shift = dd / (b + shift); + } + + value_type f { (sk + sp) * (sk - sp) + shift }; + value_type g { sk * ek }; + + // Chase zeros. + // + for (size_type cc = c; cc < p - 1; ++cc) { + value_type t { hypot(f, g) }; + value_type cs { f / t }; + value_type sn { g / t }; + + if (cc != c) + imagi(0, cc - 1) = t; + + f = cs * s_tmp[cc] + sn * imagi(0, cc); + imagi(0, cc) = cs * imagi(0, cc) - sn * s_tmp[cc]; + g = sn * s_tmp[cc + 1]; + s_tmp[cc + 1] *= cs; + + for (size_type r = 0; r < cols(); ++r) { + t = cs * v_tmp(r, cc) + sn * v_tmp(r, cc + 1); + v_tmp (r, cc + 1) = + -sn * v_tmp(r, cc) + cs * v_tmp(r, cc + 1); + v_tmp(r, cc) = t; + } + + t = hypot(f, g); + cs = f / t; + sn = g / t; + s_tmp[cc] = t; + f = cs * imagi(0, cc) + sn * s_tmp[cc + 1]; + s_tmp[cc + 1] = -sn * imagi(0, cc) + cs * s_tmp[cc + 1]; + g = sn * imagi(0, cc + 1); + imagi(0, cc + 1) = cs * imagi(0, cc + 1); + + if (cc < rows () - 1) + for (size_type r = 0; r < rows (); ++r) { + t = cs * u_tmp(r, cc) + sn * u_tmp(r, cc + 1); + u_tmp (r, cc + 1) = + -sn * u_tmp(r, cc) + cs * u_tmp(r, cc + 1); + u_tmp(r, cc) = t; + } + } + + imagi(0, p - 2) = f; + } + break; + + // Convergence. + // + case 4: + { + // Make the singular values positive. + // + if (s_tmp[c] <= value_type(0)) { + s_tmp[c] = + s_tmp [c] < value_type(0) ? -s_tmp[c] : value_type(0); + + for (size_type r = 0; r <= pp; ++r) + v_tmp(r, c) = -v_tmp(r, c); + } + + // Order the singular values. + // + while (c < pp) { + if (s_tmp[c] >= s_tmp[c + 1]) + break; + + value_type t { s_tmp[c] }; + + s_tmp[c] = s_tmp[c + 1]; + s_tmp[c + 1] = t; + + if (c < cols() - 1) + for (size_type r = 0; r < cols(); ++r) { + t = v_tmp(r, c + 1); + v_tmp(r, c + 1) = v_tmp(r, c); + v_tmp(r, c) = t; + } + + if (c < rows() - 1) + for (size_type r = 0; r < rows(); ++r) { + t = u_tmp(r, c + 1); + u_tmp(r, c + 1) = u_tmp(r, c); + u_tmp(r, c) = t; + } + + c += 1; + } + + p -= 1; + } + break; + } + } + + U.swap(u_tmp); + + S.resize(s_tmp.size(), full_size ? s_tmp.size() : 1); + + size_type row_count = 0; + + for (auto citer = s_tmp.begin(); citer != s_tmp.end(); ++citer, ++row_count) + S(row_count, full_size ? row_count : 0) = *citer; + + V.swap(v_tmp); + + return; +} + } // namespace hmdf // ---------------------------------------------------------------------------- diff --git a/test/matrix_tester.cc b/test/matrix_tester.cc index c1e51934..3255b544 100644 --- a/test/matrix_tester.cc +++ b/test/matrix_tester.cc @@ -154,93 +154,95 @@ int main(int, char *[]) { for (long c = 0; c < tran_mat.cols(); ++c) assert(tran_mat(r, c) == col_mat(c, r)); - // // Test arithmetic functions // + { + auto sum_mat = col_mat + row_mat; - auto sum_mat = col_mat + row_mat; - - assert(sum_mat(0, 0) == 0); - assert(sum_mat(4, 5) == 58); - assert(sum_mat(1, 1) == 13); - assert(sum_mat(3, 4) == 45); + assert(sum_mat(0, 0) == 0); + assert(sum_mat(4, 5) == 58); + assert(sum_mat(1, 1) == 13); + assert(sum_mat(3, 4) == 45); - sum_mat += col_mat; - assert(sum_mat(0, 0) == 0); - assert(sum_mat(4, 5) == 87); - assert(sum_mat(1, 1) == 19); - assert(sum_mat(3, 4) == 68); + sum_mat += col_mat; + assert(sum_mat(0, 0) == 0); + assert(sum_mat(4, 5) == 87); + assert(sum_mat(1, 1) == 19); + assert(sum_mat(3, 4) == 68); - row_mat_t lhs_mat { ROWS, COLS }; - col_mat_t rhs_mat { COLS, COLS }; + row_mat_t lhs_mat { ROWS, COLS }; + col_mat_t rhs_mat { COLS, COLS }; - value = 0; - for (long r = 0; r < lhs_mat.rows(); ++r) - for (long c = 0; c < lhs_mat.cols(); ++c) - lhs_mat(r, c) = value++; - value = 0; - for (long c = 0; c < rhs_mat.cols(); ++c) - for (long r = 0; r < rhs_mat.rows(); ++r) - rhs_mat(r, c) = value++; + value = 0; + for (long r = 0; r < lhs_mat.rows(); ++r) + for (long c = 0; c < lhs_mat.cols(); ++c) + lhs_mat(r, c) = value++; + value = 0; + for (long c = 0; c < rhs_mat.cols(); ++c) + for (long r = 0; r < rhs_mat.rows(); ++r) + rhs_mat(r, c) = value++; - auto multi_mat = lhs_mat * rhs_mat; + auto multi_mat = lhs_mat * rhs_mat; - assert(multi_mat(0, 0) == 55); - assert(multi_mat(4, 5) == 5185); - assert(multi_mat(1, 1) == 451); - assert(multi_mat(3, 4) == 3277); + assert(multi_mat(0, 0) == 55); + assert(multi_mat(4, 5) == 5185); + assert(multi_mat(1, 1) == 451); + assert(multi_mat(3, 4) == 3277); - col_mat_t big_lhs_mat { 100, 100 }; - col_mat_t big_rhs_mat { 100, 100 }; + col_mat_t big_lhs_mat { 100, 100 }; + col_mat_t big_rhs_mat { 100, 100 }; - for (long c = 0; c < 100; ++c) - for (long r = 0; r < 100; ++r) { - big_lhs_mat(r, c) = c + 1; - big_rhs_mat(r, c) = c + 1; - } + for (long c = 0; c < 100; ++c) + for (long r = 0; r < 100; ++r) { + big_lhs_mat(r, c) = c + 1; + big_rhs_mat(r, c) = c + 1; + } - auto big_multi_mat = big_lhs_mat * big_rhs_mat; + auto big_multi_mat = big_lhs_mat * big_rhs_mat; - assert(big_multi_mat(0, 0) == 5050); - assert(big_multi_mat(99, 99) == 505000); - assert(big_multi_mat(98, 2) == 15150); - assert(big_multi_mat(2, 5) == 30300); + assert(big_multi_mat(0, 0) == 5050); + assert(big_multi_mat(99, 99) == 505000); + assert(big_multi_mat(98, 2) == 15150); + assert(big_multi_mat(2, 5) == 30300); + } + // Test Inverse // - // Test inverse - // + { + using row_dmat_t = Matrix; - using row_dmat_t = Matrix; + row_dmat_t mat2 { 3, 3 }; - row_dmat_t mat2 { 3, 3 }; + mat2(0, 0) = 2.0; + mat2(0, 1) = 3.0; + mat2(0, 2) = 2.0; - mat2(0, 0) = 2.0; - mat2(0, 1) = 3.0; - mat2(0, 2) = 2.0; + mat2(1, 0) = 3.0; + mat2(1, 1) = 2.0; + mat2(1, 2) = 3.0; - mat2(1, 0) = 3.0; - mat2(1, 1) = 2.0; - mat2(1, 2) = 3.0; + mat2(2, 0) = 4.0; + mat2(2, 1) = 2.0; + mat2(2, 2) = 2.0; - mat2(2, 0) = 4.0; - mat2(2, 1) = 2.0; - mat2(2, 2) = 2.0; + row_dmat_t mat2_inv = mat2.inverse(); + auto mat3 = mat2 * mat2_inv; - row_dmat_t mat2_inv = mat2.inverse(); - auto mat3 = mat2 * mat2_inv; + // It must result to identity matrix + // + assert((std::fabs(mat3(0, 0) - 1.0) < 0.00000001)); + assert((std::fabs(mat3(1, 1) - 1.0) < 0.00000001)); + assert((std::fabs(mat3(2, 2) - 1.0) < 0.00000001)); + assert((std::fabs(mat3(0, 1) - 0.0) < 0.00000001)); + assert((std::fabs(mat3(0, 2) - 0.0) < 0.00000001)); + assert((std::fabs(mat3(1, 0) - 0.0) < 0.00000001)); + assert((std::fabs(mat3(1, 2) - 0.0) < 0.00000001)); + assert((std::fabs(mat3(2, 0) - 0.0) < 0.00000001)); + assert((std::fabs(mat3(2, 1) - 0.0) < 0.00000001)); + } - // It must result to identity matrix + // Test Eigen space // - assert((std::fabs(mat3(0, 0) - 1.0) < 0.00000001)); - assert((std::fabs(mat3(1, 1) - 1.0) < 0.00000001)); - assert((std::fabs(mat3(2, 2) - 1.0) < 0.00000001)); - assert((std::fabs(mat3(0, 1) - 0.0) < 0.00000001)); - assert((std::fabs(mat3(0, 2) - 0.0) < 0.00000001)); - assert((std::fabs(mat3(1, 0) - 0.0) < 0.00000001)); - assert((std::fabs(mat3(1, 2) - 0.0) < 0.00000001)); - assert((std::fabs(mat3(2, 0) - 0.0) < 0.00000001)); - assert((std::fabs(mat3(2, 1) - 0.0) < 0.00000001)); - { using col_dmat_t = Matrix; @@ -297,6 +299,8 @@ int main(int, char *[]) { assert((std::fabs(eigenvecs(9, 9) - -0.51616) < 0.00001)); } + // Test Covariance matrix + // { using col_dmat_t = Matrix; @@ -338,6 +342,31 @@ int main(int, char *[]) { assert((std::fabs(cov(3, 3) - 0.0258172) < 0.0000001)); } + // Test SVD decomposition + // + { + using col_dmat_t = Matrix; + + col_dmat_t col_mat { 8, 4 }; + std::size_t value { 0 }; + + for (long r = 0; r < col_mat.rows(); ++r) + for (long c = 0; c < col_mat.cols(); ++c) + col_mat(r, c) = double(++value); + + col_dmat_t U; + col_dmat_t S; + col_dmat_t V; + + col_mat.svd (U, S, V); + + const auto col_mat2 = U * S * V.transpose(); + + for (long r = 0; r < col_mat2.rows(); ++r) + for (long c = 0; c < col_mat2.cols(); ++c) + assert((std::fabs(col_mat(r, c) - col_mat2(r, c)) < 0.0001)); + } + return (0); }