Merge pull request #1 from tvanderbruggen/pairwise_accumulate

Pairwise accumulate
tvanderbruggen · Feb 21, 2019 · 904505f · 904505f
2 parents e4650a1 + 669234b
commit 904505f
Show file tree

Hide file tree

Showing 3 changed files with 159 additions and 89 deletions.
diff --git a/scicpp/core/functional.hpp b/scicpp/core/functional.hpp
@@ -205,12 +205,93 @@ filter_reduce(const Array &a, BinaryOp op, T init, UnaryPredicate filter) {
     return filter_reduce(a.cbegin(), a.cend(), op, init, filter);
 }
 
+//---------------------------------------------------------------------------------
+// reduce
+//---------------------------------------------------------------------------------
+
+template <class Array, class BinaryOp, typename T = typename Array::value_type>
+[[nodiscard]] constexpr scicpp_pure auto
+reduce(const Array &a, BinaryOp op, T init) {
+    return filter_reduce(a, op, init, filters::all);
+}
+
+//---------------------------------------------------------------------------------
+// pairwise_accumulate
+//
+// Implements pairwise recursion
+// https://en.wikipedia.org/wiki/Pairwise_summation
+//
+//  | BLOCK | BLOCK | BLOCK | BLOCK | BLOCK | BLOCK | BLOCK | BLOCK |
+//      |       |       |       |       |       |       |       |
+//     acc     acc     acc     acc     acc     acc     acc     acc
+//      |_______|       |_______|       |_______|       |_______|
+//          |               |               |               |
+//       combine         combine         combine         combine
+//          |_______________|               |_______________|
+//                  |_______________________________|
+//                                  |
+//                               combine
+//                                  |
+//                                Result
+//---------------------------------------------------------------------------------
+
+template <signed_size_t PW_BLOCKSIZE,
+          class InputIt,
+          class AccumulateOp,
+          class CombineOp>
+[[nodiscard]] constexpr auto pairwise_accumulate(InputIt first,
+                                                 InputIt last,
+                                                 AccumulateOp accop,
+                                                 CombineOp combop) {
+    const auto size = std::distance(first, last);
+
+    if (size <= PW_BLOCKSIZE) {
+        return accop(first, last);
+    } else {
+        return combop(pairwise_accumulate<PW_BLOCKSIZE>(
+                          first, first + size / 2, accop, combop),
+                      pairwise_accumulate<PW_BLOCKSIZE>(
+                          first + size / 2, last, accop, combop));
+    }
+}
+
+template <signed_size_t PW_BLOCKSIZE,
+          class InputItLhs,
+          class InputItRhs,
+          class AccumulateOp,
+          class CombineOp>
+[[nodiscard]] constexpr auto pairwise_accumulate(InputItLhs first1,
+                                                 InputItLhs last1,
+                                                 InputItRhs first2,
+                                                 InputItRhs last2,
+                                                 AccumulateOp accop,
+                                                 CombineOp combop) {
+    const auto size = std::distance(first1, last1);
+    scicpp_require(size == std::distance(first2, last2));
+
+    if (size <= PW_BLOCKSIZE) {
+        return accop(first1, last1, first2, last2);
+    } else {
+        return combop(pairwise_accumulate<PW_BLOCKSIZE>(first1,
+                                                        first1 + size / 2,
+                                                        first2,
+                                                        first2 + size / 2,
+                                                        accop,
+                                                        combop),
+                      pairwise_accumulate<PW_BLOCKSIZE>(first1 + size / 2,
+                                                        last1,
+                                                        first2 + size / 2,
+                                                        last2,
+                                                        accop,
+                                                        combop));
+    }
+}
+
 //---------------------------------------------------------------------------------
 // filter_reduce_associative
 //
 // Use pairwise recursion for improved precision in associative operations.
 //
-// https://en.wikipedia.org/wiki/Pairwise_summation
 // https://github.com/numpy/numpy/pull/3685
 // https://github.com/JuliaLang/julia/pull/4039
 //---------------------------------------------------------------------------------
@@ -223,26 +304,25 @@ template <class InputIt,
 filter_reduce_associative(InputIt first,
                           InputIt last,
                           AssociativeBinaryOp op,
-                          T init,
-                          UnaryPredicate filter) {
+                          UnaryPredicate filter,
+                          T id_elt = T{0}) {
     if constexpr (std::is_integral_v<T>) {
         // No precision problem for integers, as long as you don't overflow ...
-        return filter_reduce(first, last, op, init, filter);
+        return filter_reduce(first, last, op, id_elt, filter);
     } else {
         static_assert(std::is_floating_point_v<T> || meta::is_complex_v<T>);
 
-        constexpr long PW_BLOCKSIZE = 64;
-        const auto size = std::distance(first, last);
-
-        if (size <= PW_BLOCKSIZE) {
-            return filter_reduce(first, last, op, init, filter);
-        } else {
-            const auto [res1, cnt1] = filter_reduce_associative(
-                first, first + size / 2, op, init, filter);
-            const auto [res2, cnt2] = filter_reduce_associative(
-                first + size / 2, last, op, init, filter);
-            return std::make_tuple(op(res1, res2), cnt1 + cnt2);
-        }
+        return pairwise_accumulate<64>(
+            first,
+            last,
+            [&](auto f, auto l) {
+                return filter_reduce(f, l, op, id_elt, filter);
+            },
+            [&](const auto res1, const auto res2) {
+                const auto [x1, n1] = res1;
+                const auto [x2, n2] = res2;
+                return std::make_tuple(op(x1, x2), n1 + n2);
+            });
     }
 }
 
@@ -251,18 +331,8 @@ template <class Array,
           class AssociativeBinaryOp,
           typename T = typename Array::value_type>
 [[nodiscard]] constexpr scicpp_pure auto filter_reduce_associative(
-    const Array &a, AssociativeBinaryOp op, T init, UnaryPredicate filter) {
-    return filter_reduce_associative(a.cbegin(), a.cend(), op, init, filter);
-}
-
-//---------------------------------------------------------------------------------
-// reduce
-//---------------------------------------------------------------------------------
-
-template <class Array, class BinaryOp, typename T = typename Array::value_type>
-[[nodiscard]] constexpr scicpp_pure auto
-reduce(const Array &a, BinaryOp op, T init) {
-    return filter_reduce(a, op, init, filters::all);
+    const Array &a, AssociativeBinaryOp op, UnaryPredicate filter) {
+    return filter_reduce_associative(a.cbegin(), a.cend(), op, filter);
 }
 
 //---------------------------------------------------------------------------------

diff --git a/scicpp/core/numeric.hpp b/scicpp/core/numeric.hpp
@@ -25,8 +25,7 @@ namespace scicpp {
 
 template <class InputIt, class Predicate>
 constexpr auto sum(InputIt first, InputIt last, Predicate filter) {
-    using T = typename std::iterator_traits<InputIt>::value_type;
-    return filter_reduce_associative(first, last, std::plus<>(), T{0}, filter);
+    return filter_reduce_associative(first, last, std::plus<>(), filter);
 }
 
 template <class InputIt>
@@ -53,12 +52,11 @@ auto nansum(const Array &f) {
 // prod
 //---------------------------------------------------------------------------------
 
-template <class InputIt,
-          class Predicate,
-          typename T = typename std::iterator_traits<InputIt>::value_type>
+template <class InputIt, class Predicate>
 constexpr auto prod(InputIt first, InputIt last, Predicate filter) {
+    using T = typename std::iterator_traits<InputIt>::value_type;
     return filter_reduce_associative(
-        first, last, std::multiplies<>(), T{1}, filter);
+        first, last, std::multiplies<>(), filter, T{1});
 }
 
 template <class InputIt>
@@ -213,31 +211,30 @@ auto diff(const std::vector<T> &a, int n = 1) {
 //---------------------------------------------------------------------------------
 
 template <class InputItLhs, class InputItRhs, class ProductOp>
-constexpr auto scicpp_pure inner(InputItLhs first1,
-                                 InputItLhs last1,
-                                 InputItRhs first2,
-                                 InputItRhs last2,
-                                 ProductOp op) {
+constexpr auto inner(InputItLhs first1,
+                     InputItLhs last1,
+                     InputItRhs first2,
+                     InputItRhs last2,
+                     ProductOp op) {
     using T = typename std::common_type_t<
         typename std::iterator_traits<InputItLhs>::value_type,
         typename std::iterator_traits<InputItRhs>::value_type>;
 
-    constexpr long PW_BLOCKSIZE = 64;
-    const auto size = std::distance(first1, last1);
-    scicpp_require(size == std::distance(first2, last2));
-
-    if (size <= PW_BLOCKSIZE) {
-        auto res = T{0};
-
-        for (; first1 != last1; ++first1, ++first2) {
-            res += op(*first1, *first2);
-        }
-
-        return res;
-    } else {
-        return inner(first1, first1 + size / 2, first2, first2 + size / 2, op) +
-               inner(first1 + size / 2, last1, first2 + size / 2, last2, op);
-    }
+    return pairwise_accumulate<64>(
+        first1,
+        last1,
+        first2,
+        last2,
+        [&](auto f1, auto l1, auto f2, [[maybe_unused]] auto l2) scicpp_pure {
+            auto res = T{0};
+
+            for (; f1 != l1; ++f1, ++f2) {
+                res += op(*f1, *f2);
+            }
+
+            return res;
+        },
+        std::plus<>());
 }
 
 template <class InputIt>

diff --git a/scicpp/core/stats.hpp b/scicpp/core/stats.hpp
@@ -11,6 +11,7 @@
 #include <cmath>
 #include <limits>
 #include <numeric>
+#include <tuple>
 
 namespace scicpp::stats {
 
@@ -119,40 +120,42 @@ template <class InputIt,
           class Predicate,
           typename T = typename std::iterator_traits<InputIt>::value_type>
 constexpr auto variance_helper(InputIt first, InputIt last, Predicate filter) {
-    constexpr long PW_BLOCKSIZE = 64;
-    const auto size = std::distance(first, last);
-
-    if (size <= PW_BLOCKSIZE) {
-        const T m0 = mean(first, last, filter);
-
-        const auto [res, cnt] =
-            filter_reduce(first,
-                          last,
-                          [m0](auto r, auto v) {
-                              const T diff = v - m0;
-                              return r + diff * diff;
-                              // Benchmark: this is slower on both GCC and Clang
-                              // (and not constexpr)
-                              // return std::fma(diff, diff, r);
-                          },
-                          T{0},
-                          filter);
-
-        return std::make_tuple(m0, res / T(cnt), cnt);
-    } else {
-        const auto [m1, var1, n1] =
-            variance_helper(first, first + size / 2, filter);
-        const auto [m2, var2, n2] =
-            variance_helper(first + size / 2, last, filter);
-        // Combine variances
-        // https://www.emathzone.com/tutorials/basic-statistics/combined-variance.html
-        const auto n_c = n1 + n2;
-        const auto r = T{1} / T(n_c);
-        const auto m_c = r * (T(n1) * m1 + T(n2) * m2);
-        const auto var_c = r * (T(n1) * (var1 + (m1 - m_c) * (m1 - m_c)) +
-                                T(n2) * (var2 + (m2 - m_c) * (m2 - m_c)));
-        return std::make_tuple(m_c, var_c, n_c);
-    }
+    return pairwise_accumulate<64>(
+        first,
+        last,
+        [&](auto f, auto l) {
+            const T m0 = mean(f, l, filter);
+
+            // Cannot use structure binding directly here with GCC => bug ??
+            // const auto [res, cnt] =
+            const auto t = filter_reduce(f,
+                                         l,
+                                         [m0](auto r, auto v) {
+                                             const T diff = v - m0;
+                                             return r + diff * diff;
+                                             // Benchmark: this is slower on both GCC and Clang
+                                             // (and also not constexpr)
+                                             // return std::fma(diff, diff, r);
+                                         },
+                                         T{0},
+                                         filter);
+
+            const auto [res, cnt] = t;
+            return std::make_tuple(m0, res / T(cnt), cnt);
+        },
+        [&](const auto res1, const auto res2) {
+            // Combine variances
+            // https://www.emathzone.com/tutorials/basic-statistics/combined-variance.html
+            const auto [m1, var1, n1] = res1;
+            const auto [m2, var2, n2] = res2;
+
+            const auto n_c = n1 + n2;
+            const auto r = T{1} / T(n_c);
+            const auto m_c = r * (T(n1) * m1 + T(n2) * m2);
+            const auto var_c = r * (T(n1) * (var1 + (m1 - m_c) * (m1 - m_c)) +
+                                    T(n2) * (var2 + (m2 - m_c) * (m2 - m_c)));
+            return std::make_tuple(m_c, var_c, n_c);
+        });
 }
 
 } // namespace detail