diff --git a/modules/compiler/builtins/abacus/include/abacus/internal/is_odd.h b/modules/compiler/builtins/abacus/include/abacus/internal/is_odd.h index 9fd11d1be..eaba9a337 100644 --- a/modules/compiler/builtins/abacus/include/abacus/internal/is_odd.h +++ b/modules/compiler/builtins/abacus/include/abacus/internal/is_odd.h @@ -45,13 +45,19 @@ inline typename TypeTraits::SignedType is_odd(const T &x) { const SignedType unbiasedExp = abacus::internal::logb_unsafe(x); // We're using the exponent to shift, we can only shift by [0, ) - const SignedType validExp = (unbiasedExp >= 0) & (unbiasedExp < numBits); + // Scalar relational comparisons return +1 for true, vector relational + // comparisons return -1. -1 has all bits set so is useful as a mask, so make + // sure we get that even for scalars. + const SignedType validExp = + TypeTraits::num_elements == 1 + ? -((unbiasedExp >= 0) & (unbiasedExp < numBits)) + : (unbiasedExp >= 0) & (unbiasedExp < numBits); // Shifting our mantissa by the exponent means that the hidden bit now holds // the least significant bit of the integer component of the float. If this // is set it means that the number is odd. const SignedType hiddenBitMasked = - (mantissa << unbiasedExp) & mantissaHiddenBit; + (mantissa << (validExp & unbiasedExp)) & mantissaHiddenBit; // Truncates any remaining fractional bits, since we're only interested in // the last bit of integer component. diff --git a/modules/compiler/builtins/abacus/include/abacus/internal/multiply_exact.h b/modules/compiler/builtins/abacus/include/abacus/internal/multiply_exact.h index a3413d700..0f977ebbe 100644 --- a/modules/compiler/builtins/abacus/include/abacus/internal/multiply_exact.h +++ b/modules/compiler/builtins/abacus/include/abacus/internal/multiply_exact.h @@ -21,6 +21,16 @@ #include #include +// Perform exact multiplication of two floating point values. +// +// After hi = multiply_exact(x, y, &lo), unless overflow occurred, hi = x * y +// according to floating point rules, and hi + lo = x * y according to +// mathematical rules. +// +// Dekker, T.J. A floating-point technique for extending the available +// precision. Numer. Math. 18, 224–242 (1971). +// https://doi.org/10.1007/BF01397083 + namespace { template struct multiply_exact_helper { @@ -31,8 +41,9 @@ struct multiply_exact_helper { UnsignedType C = 1; // shift is the number of mantissa bits plus 1 for the implicit bit, then - // divided by two as we're splitting `x` into two parts. - const SignedType shift = (FPShape::Mantissa() + 1) / 2; + // divided by two as we're splitting `x` into two parts, rounded up as + // described in section 6.3 with reference to 5.7. + const SignedType shift = FPShape::Mantissa() / 2 + 1; C = (C << shift) + 1; const T gamma = x * abacus::detail::cast::convert(C); const T delta = x - gamma; @@ -41,37 +52,6 @@ struct multiply_exact_helper { } }; -#ifdef __CA_BUILTINS_HALF_SUPPORT - -// This is very similar to the templated function, however because half's have -// an odd (11 including hidden bit) number of bits in the mantissa it doesn't -// quite fit in. -// Potentially you could change the line in the template -// const SignedType shift = (FPShape::Mantissa() + 1) / 2; -// to -// const SignedType shift = (FPShape::Mantissa() + 2) / 2; -// and it should theoretically be the same for 32/64 bit, and also work for 16 -// bit. However I don't have the tests for that so for the time being we're just -// specializing the 16 bit version: -template <> -struct multiply_exact_helper { - template - static void split(const T &x, T *x_hi, T *x_lo) { - // Derived from the 'Handbook of Floating Point Arithmetic', - // section 4.4 (page 132) - - T C = 64.0f16; // 2^6 - T gamma = C * x; - T delta = x - gamma; - T xh = gamma + delta; - - *x_lo = x - xh; - *x_hi = xh; - } -}; - -#endif - template <> struct multiply_exact_helper { template @@ -96,8 +76,6 @@ struct multiply_exact_helper { namespace abacus { namespace internal { -// See paper on error free transformation of the product of two floating point -// numbers(https://doi.org/10.1007/BF01397083) where this algorithm taken from template inline T multiply_exact(const T x, const T y, T *out_remainder) { // TODO