Skip to content

Commit

Permalink
Merge pull request #200 from hvdijk/math
Browse files Browse the repository at this point in the history
Fix exact multiplication of doubles, fix undefined behavior in is_odd.
  • Loading branch information
hvdijk authored Nov 13, 2023
2 parents dae3d36 + 2099483 commit 98388a1
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 37 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,19 @@ inline typename TypeTraits<T>::SignedType is_odd(const T &x) {
const SignedType unbiasedExp = abacus::internal::logb_unsafe(x);

// We're using the exponent to shift, we can only shift by [0, <bits in type>)
const SignedType validExp = (unbiasedExp >= 0) & (unbiasedExp < numBits);
// Scalar relational comparisons return +1 for true, vector relational
// comparisons return -1. -1 has all bits set so is useful as a mask, so make
// sure we get that even for scalars.
const SignedType validExp =
TypeTraits<T>::num_elements == 1
? -((unbiasedExp >= 0) & (unbiasedExp < numBits))
: (unbiasedExp >= 0) & (unbiasedExp < numBits);

// Shifting our mantissa by the exponent means that the hidden bit now holds
// the least significant bit of the integer component of the float. If this
// is set it means that the number is odd.
const SignedType hiddenBitMasked =
(mantissa << unbiasedExp) & mantissaHiddenBit;
(mantissa << (validExp & unbiasedExp)) & mantissaHiddenBit;

// Truncates any remaining fractional bits, since we're only interested in
// the last bit of integer component.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,16 @@
#include <abacus/abacus_detail_cast.h>
#include <abacus/abacus_type_traits.h>

// Perform exact multiplication of two floating point values.
//
// After hi = multiply_exact(x, y, &lo), unless overflow occurred, hi = x * y
// according to floating point rules, and hi + lo = x * y according to
// mathematical rules.
//
// Dekker, T.J. A floating-point technique for extending the available
// precision. Numer. Math. 18, 224–242 (1971).
// https://doi.org/10.1007/BF01397083

namespace {
template <typename E>
struct multiply_exact_helper {
Expand All @@ -31,8 +41,9 @@ struct multiply_exact_helper {

UnsignedType C = 1;
// shift is the number of mantissa bits plus 1 for the implicit bit, then
// divided by two as we're splitting `x` into two parts.
const SignedType shift = (FPShape<E>::Mantissa() + 1) / 2;
// divided by two as we're splitting `x` into two parts, rounded up as
// described in section 6.3 with reference to 5.7.
const SignedType shift = FPShape<E>::Mantissa() / 2 + 1;
C = (C << shift) + 1;
const T gamma = x * abacus::detail::cast::convert<E>(C);
const T delta = x - gamma;
Expand All @@ -41,37 +52,6 @@ struct multiply_exact_helper {
}
};

#ifdef __CA_BUILTINS_HALF_SUPPORT

// This is very similar to the templated function, however because half's have
// an odd (11 including hidden bit) number of bits in the mantissa it doesn't
// quite fit in.
// Potentially you could change the line in the template
// const SignedType shift = (FPShape<E>::Mantissa() + 1) / 2;
// to
// const SignedType shift = (FPShape<E>::Mantissa() + 2) / 2;
// and it should theoretically be the same for 32/64 bit, and also work for 16
// bit. However I don't have the tests for that so for the time being we're just
// specializing the 16 bit version:
template <>
struct multiply_exact_helper<abacus_half> {
template <typename T>
static void split(const T &x, T *x_hi, T *x_lo) {
// Derived from the 'Handbook of Floating Point Arithmetic',
// section 4.4 (page 132)

T C = 64.0f16; // 2^6
T gamma = C * x;
T delta = x - gamma;
T xh = gamma + delta;

*x_lo = x - xh;
*x_hi = xh;
}
};

#endif

template <>
struct multiply_exact_helper<abacus_float> {
template <typename T>
Expand All @@ -96,8 +76,6 @@ struct multiply_exact_helper<abacus_float> {

namespace abacus {
namespace internal {
// See paper on error free transformation of the product of two floating point
// numbers(https://doi.org/10.1007/BF01397083) where this algorithm taken from
template <typename T>
inline T multiply_exact(const T x, const T y, T *out_remainder) {
// TODO
Expand Down

0 comments on commit 98388a1

Please sign in to comment.