Skip to content

Commit

Permalink
Merge pull request #1 from tvanderbruggen/pairwise_accumulate
Browse files Browse the repository at this point in the history
Pairwise accumulate
  • Loading branch information
tvanderbruggen authored Feb 21, 2019
2 parents e4650a1 + 669234b commit 904505f
Show file tree
Hide file tree
Showing 3 changed files with 159 additions and 89 deletions.
126 changes: 98 additions & 28 deletions scicpp/core/functional.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -205,12 +205,93 @@ filter_reduce(const Array &a, BinaryOp op, T init, UnaryPredicate filter) {
return filter_reduce(a.cbegin(), a.cend(), op, init, filter);
}

//---------------------------------------------------------------------------------
// reduce
//---------------------------------------------------------------------------------

template <class Array, class BinaryOp, typename T = typename Array::value_type>
[[nodiscard]] constexpr scicpp_pure auto
reduce(const Array &a, BinaryOp op, T init) {
return filter_reduce(a, op, init, filters::all);
}

//---------------------------------------------------------------------------------
// pairwise_accumulate
//
// Implements pairwise recursion
// https://en.wikipedia.org/wiki/Pairwise_summation
//
// | BLOCK | BLOCK | BLOCK | BLOCK | BLOCK | BLOCK | BLOCK | BLOCK |
// | | | | | | | |
// acc acc acc acc acc acc acc acc
// |_______| |_______| |_______| |_______|
// | | | |
// combine combine combine combine
// |_______________| |_______________|
// |_______________________________|
// |
// combine
// |
// Result
//---------------------------------------------------------------------------------

template <signed_size_t PW_BLOCKSIZE,
class InputIt,
class AccumulateOp,
class CombineOp>
[[nodiscard]] constexpr auto pairwise_accumulate(InputIt first,
InputIt last,
AccumulateOp accop,
CombineOp combop) {
const auto size = std::distance(first, last);

if (size <= PW_BLOCKSIZE) {
return accop(first, last);
} else {
return combop(pairwise_accumulate<PW_BLOCKSIZE>(
first, first + size / 2, accop, combop),
pairwise_accumulate<PW_BLOCKSIZE>(
first + size / 2, last, accop, combop));
}
}

template <signed_size_t PW_BLOCKSIZE,
class InputItLhs,
class InputItRhs,
class AccumulateOp,
class CombineOp>
[[nodiscard]] constexpr auto pairwise_accumulate(InputItLhs first1,
InputItLhs last1,
InputItRhs first2,
InputItRhs last2,
AccumulateOp accop,
CombineOp combop) {
const auto size = std::distance(first1, last1);
scicpp_require(size == std::distance(first2, last2));

if (size <= PW_BLOCKSIZE) {
return accop(first1, last1, first2, last2);
} else {
return combop(pairwise_accumulate<PW_BLOCKSIZE>(first1,
first1 + size / 2,
first2,
first2 + size / 2,
accop,
combop),
pairwise_accumulate<PW_BLOCKSIZE>(first1 + size / 2,
last1,
first2 + size / 2,
last2,
accop,
combop));
}
}

//---------------------------------------------------------------------------------
// filter_reduce_associative
//
// Use pairwise recursion for improved precision in associative operations.
//
// https://en.wikipedia.org/wiki/Pairwise_summation
// https://github.com/numpy/numpy/pull/3685
// https://github.com/JuliaLang/julia/pull/4039
//---------------------------------------------------------------------------------
Expand All @@ -223,26 +304,25 @@ template <class InputIt,
filter_reduce_associative(InputIt first,
InputIt last,
AssociativeBinaryOp op,
T init,
UnaryPredicate filter) {
UnaryPredicate filter,
T id_elt = T{0}) {
if constexpr (std::is_integral_v<T>) {
// No precision problem for integers, as long as you don't overflow ...
return filter_reduce(first, last, op, init, filter);
return filter_reduce(first, last, op, id_elt, filter);
} else {
static_assert(std::is_floating_point_v<T> || meta::is_complex_v<T>);

constexpr long PW_BLOCKSIZE = 64;
const auto size = std::distance(first, last);

if (size <= PW_BLOCKSIZE) {
return filter_reduce(first, last, op, init, filter);
} else {
const auto [res1, cnt1] = filter_reduce_associative(
first, first + size / 2, op, init, filter);
const auto [res2, cnt2] = filter_reduce_associative(
first + size / 2, last, op, init, filter);
return std::make_tuple(op(res1, res2), cnt1 + cnt2);
}
return pairwise_accumulate<64>(
first,
last,
[&](auto f, auto l) {
return filter_reduce(f, l, op, id_elt, filter);
},
[&](const auto res1, const auto res2) {
const auto [x1, n1] = res1;
const auto [x2, n2] = res2;
return std::make_tuple(op(x1, x2), n1 + n2);
});
}
}

Expand All @@ -251,18 +331,8 @@ template <class Array,
class AssociativeBinaryOp,
typename T = typename Array::value_type>
[[nodiscard]] constexpr scicpp_pure auto filter_reduce_associative(
const Array &a, AssociativeBinaryOp op, T init, UnaryPredicate filter) {
return filter_reduce_associative(a.cbegin(), a.cend(), op, init, filter);
}

//---------------------------------------------------------------------------------
// reduce
//---------------------------------------------------------------------------------

template <class Array, class BinaryOp, typename T = typename Array::value_type>
[[nodiscard]] constexpr scicpp_pure auto
reduce(const Array &a, BinaryOp op, T init) {
return filter_reduce(a, op, init, filters::all);
const Array &a, AssociativeBinaryOp op, UnaryPredicate filter) {
return filter_reduce_associative(a.cbegin(), a.cend(), op, filter);
}

//---------------------------------------------------------------------------------
Expand Down
51 changes: 24 additions & 27 deletions scicpp/core/numeric.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,7 @@ namespace scicpp {

template <class InputIt, class Predicate>
constexpr auto sum(InputIt first, InputIt last, Predicate filter) {
using T = typename std::iterator_traits<InputIt>::value_type;
return filter_reduce_associative(first, last, std::plus<>(), T{0}, filter);
return filter_reduce_associative(first, last, std::plus<>(), filter);
}

template <class InputIt>
Expand All @@ -53,12 +52,11 @@ auto nansum(const Array &f) {
// prod
//---------------------------------------------------------------------------------

template <class InputIt,
class Predicate,
typename T = typename std::iterator_traits<InputIt>::value_type>
template <class InputIt, class Predicate>
constexpr auto prod(InputIt first, InputIt last, Predicate filter) {
using T = typename std::iterator_traits<InputIt>::value_type;
return filter_reduce_associative(
first, last, std::multiplies<>(), T{1}, filter);
first, last, std::multiplies<>(), filter, T{1});
}

template <class InputIt>
Expand Down Expand Up @@ -213,31 +211,30 @@ auto diff(const std::vector<T> &a, int n = 1) {
//---------------------------------------------------------------------------------

template <class InputItLhs, class InputItRhs, class ProductOp>
constexpr auto scicpp_pure inner(InputItLhs first1,
InputItLhs last1,
InputItRhs first2,
InputItRhs last2,
ProductOp op) {
constexpr auto inner(InputItLhs first1,
InputItLhs last1,
InputItRhs first2,
InputItRhs last2,
ProductOp op) {
using T = typename std::common_type_t<
typename std::iterator_traits<InputItLhs>::value_type,
typename std::iterator_traits<InputItRhs>::value_type>;

constexpr long PW_BLOCKSIZE = 64;
const auto size = std::distance(first1, last1);
scicpp_require(size == std::distance(first2, last2));

if (size <= PW_BLOCKSIZE) {
auto res = T{0};

for (; first1 != last1; ++first1, ++first2) {
res += op(*first1, *first2);
}

return res;
} else {
return inner(first1, first1 + size / 2, first2, first2 + size / 2, op) +
inner(first1 + size / 2, last1, first2 + size / 2, last2, op);
}
return pairwise_accumulate<64>(
first1,
last1,
first2,
last2,
[&](auto f1, auto l1, auto f2, [[maybe_unused]] auto l2) scicpp_pure {
auto res = T{0};

for (; f1 != l1; ++f1, ++f2) {
res += op(*f1, *f2);
}

return res;
},
std::plus<>());
}

template <class InputIt>
Expand Down
71 changes: 37 additions & 34 deletions scicpp/core/stats.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <cmath>
#include <limits>
#include <numeric>
#include <tuple>

namespace scicpp::stats {

Expand Down Expand Up @@ -119,40 +120,42 @@ template <class InputIt,
class Predicate,
typename T = typename std::iterator_traits<InputIt>::value_type>
constexpr auto variance_helper(InputIt first, InputIt last, Predicate filter) {
constexpr long PW_BLOCKSIZE = 64;
const auto size = std::distance(first, last);

if (size <= PW_BLOCKSIZE) {
const T m0 = mean(first, last, filter);

const auto [res, cnt] =
filter_reduce(first,
last,
[m0](auto r, auto v) {
const T diff = v - m0;
return r + diff * diff;
// Benchmark: this is slower on both GCC and Clang
// (and not constexpr)
// return std::fma(diff, diff, r);
},
T{0},
filter);

return std::make_tuple(m0, res / T(cnt), cnt);
} else {
const auto [m1, var1, n1] =
variance_helper(first, first + size / 2, filter);
const auto [m2, var2, n2] =
variance_helper(first + size / 2, last, filter);
// Combine variances
// https://www.emathzone.com/tutorials/basic-statistics/combined-variance.html
const auto n_c = n1 + n2;
const auto r = T{1} / T(n_c);
const auto m_c = r * (T(n1) * m1 + T(n2) * m2);
const auto var_c = r * (T(n1) * (var1 + (m1 - m_c) * (m1 - m_c)) +
T(n2) * (var2 + (m2 - m_c) * (m2 - m_c)));
return std::make_tuple(m_c, var_c, n_c);
}
return pairwise_accumulate<64>(
first,
last,
[&](auto f, auto l) {
const T m0 = mean(f, l, filter);

// Cannot use structure binding directly here with GCC => bug ??
// const auto [res, cnt] =
const auto t = filter_reduce(f,
l,
[m0](auto r, auto v) {
const T diff = v - m0;
return r + diff * diff;
// Benchmark: this is slower on both GCC and Clang
// (and also not constexpr)
// return std::fma(diff, diff, r);
},
T{0},
filter);

const auto [res, cnt] = t;
return std::make_tuple(m0, res / T(cnt), cnt);
},
[&](const auto res1, const auto res2) {
// Combine variances
// https://www.emathzone.com/tutorials/basic-statistics/combined-variance.html
const auto [m1, var1, n1] = res1;
const auto [m2, var2, n2] = res2;

const auto n_c = n1 + n2;
const auto r = T{1} / T(n_c);
const auto m_c = r * (T(n1) * m1 + T(n2) * m2);
const auto var_c = r * (T(n1) * (var1 + (m1 - m_c) * (m1 - m_c)) +
T(n2) * (var2 + (m2 - m_c) * (m2 - m_c)));
return std::make_tuple(m_c, var_c, n_c);
});
}

} // namespace detail
Expand Down

0 comments on commit 904505f

Please sign in to comment.