Merge pull request #200 from hvdijk/math

Fix exact multiplication of doubles, fix undefined behavior in is_odd.
uxlfoundation · Nov 13, 2023 · 98388a1 · 98388a1
2 parents dae3d36 + 2099483
commit 98388a1
Show file tree

Hide file tree

Showing 2 changed files with 21 additions and 37 deletions.
diff --git a/modules/compiler/builtins/abacus/include/abacus/internal/is_odd.h b/modules/compiler/builtins/abacus/include/abacus/internal/is_odd.h
@@ -45,13 +45,19 @@ inline typename TypeTraits<T>::SignedType is_odd(const T &x) {
   const SignedType unbiasedExp = abacus::internal::logb_unsafe(x);
 
   // We're using the exponent to shift, we can only shift by [0, <bits in type>)
-  const SignedType validExp = (unbiasedExp >= 0) & (unbiasedExp < numBits);
+  // Scalar relational comparisons return +1 for true, vector relational
+  // comparisons return -1. -1 has all bits set so is useful as a mask, so make
+  // sure we get that even for scalars.
+  const SignedType validExp =
+      TypeTraits<T>::num_elements == 1
+          ? -((unbiasedExp >= 0) & (unbiasedExp < numBits))
+          : (unbiasedExp >= 0) & (unbiasedExp < numBits);
 
   // Shifting our mantissa by the exponent means that the hidden bit now holds
   // the least significant bit of the integer component of the float. If this
   // is set it means that the number is odd.
   const SignedType hiddenBitMasked =
-      (mantissa << unbiasedExp) & mantissaHiddenBit;
+      (mantissa << (validExp & unbiasedExp)) & mantissaHiddenBit;
 
   // Truncates any remaining fractional bits, since we're only interested in
   // the last bit of integer component.

diff --git a/modules/compiler/builtins/abacus/include/abacus/internal/multiply_exact.h b/modules/compiler/builtins/abacus/include/abacus/internal/multiply_exact.h
@@ -21,6 +21,16 @@
 #include <abacus/abacus_detail_cast.h>
 #include <abacus/abacus_type_traits.h>
 
+// Perform exact multiplication of two floating point values.
+//
+// After hi = multiply_exact(x, y, &lo), unless overflow occurred, hi = x * y
+// according to floating point rules, and hi + lo = x * y according to
+// mathematical rules.
+//
+// Dekker, T.J. A floating-point technique for extending the available
+// precision. Numer. Math. 18, 224–242 (1971).
+// https://doi.org/10.1007/BF01397083
+
 namespace {
 template <typename E>
 struct multiply_exact_helper {
@@ -31,8 +41,9 @@ struct multiply_exact_helper {
 
     UnsignedType C = 1;
     // shift is the number of mantissa bits plus 1 for the implicit bit, then
-    // divided by two as we're splitting `x` into two parts.
-    const SignedType shift = (FPShape<E>::Mantissa() + 1) / 2;
+    // divided by two as we're splitting `x` into two parts, rounded up as
+    // described in section 6.3 with reference to 5.7.
+    const SignedType shift = FPShape<E>::Mantissa() / 2 + 1;
     C = (C << shift) + 1;
     const T gamma = x * abacus::detail::cast::convert<E>(C);
     const T delta = x - gamma;
@@ -41,37 +52,6 @@ struct multiply_exact_helper {
   }
 };
 
-#ifdef __CA_BUILTINS_HALF_SUPPORT
-
-// This is very similar to the templated function, however because half's have
-// an odd (11 including hidden bit) number of bits in the mantissa it doesn't
-// quite fit in.
-// Potentially you could change the line in the template
-// const SignedType shift = (FPShape<E>::Mantissa() + 1) / 2;
-// to
-// const SignedType shift = (FPShape<E>::Mantissa() + 2) / 2;
-// and it should theoretically be the same for 32/64 bit, and also work for 16
-// bit. However I don't have the tests for that so for the time being we're just
-// specializing the 16 bit version:
-template <>
-struct multiply_exact_helper<abacus_half> {
-  template <typename T>
-  static void split(const T &x, T *x_hi, T *x_lo) {
-    // Derived from the 'Handbook of Floating Point Arithmetic',
-    // section 4.4 (page 132)
-
-    T C = 64.0f16;  // 2^6
-    T gamma = C * x;
-    T delta = x - gamma;
-    T xh = gamma + delta;
-
-    *x_lo = x - xh;
-    *x_hi = xh;
-  }
-};
-
-#endif
-
 template <>
 struct multiply_exact_helper<abacus_float> {
   template <typename T>
@@ -96,8 +76,6 @@ struct multiply_exact_helper<abacus_float> {
 
 namespace abacus {
 namespace internal {
-// See paper on error free transformation of the product of two floating point
-// numbers(https://doi.org/10.1007/BF01397083) where this algorithm taken from
 template <typename T>
 inline T multiply_exact(const T x, const T y, T *out_remainder) {
   // TODO