A working implementation.

mockingbirdnest · May 31, 2024 · ac3623c · ac3623c
1 parent 571d445
commit ac3623c
Show file tree

Hide file tree

Showing 2 changed files with 99 additions and 63 deletions.
diff --git a/.gitignore b/.gitignore
@@ -123,3 +123,6 @@ unity/
 
 # Visual Studio Code
 .vscode/
+
+# Nuget
+.nuget/
diff --git a/benchmarks/elementary_functions_experiments_benchmark.cpp b/benchmarks/elementary_functions_experiments_benchmark.cpp
@@ -7,6 +7,7 @@
 
 #include "benchmark/benchmark.h"
 #include "numerics/double_precision.hpp"
+#include "numerics/scale_b.hpp"
 #include "quantities/elementary_functions.hpp"
 #include "quantities/numbers.hpp"  // 🧙 For π.
 
@@ -15,8 +16,11 @@ namespace functions {
 
 // TODO(phl): The polynomials in this file should use class
 // |PolynomialInMonomialBasis|.
+// TODO(phl): Study the effect of rounding the polynomial coefficients to
+// machine numbers.
 
 using namespace principia::numerics::_double_precision;
+using namespace principia::numerics::_scale_b;
 using namespace principia::quantities::_elementary_functions;
 
 using Value = double;
@@ -52,17 +56,14 @@ class TableSpacingImplementation {
       accurate_values_;
 };
 
-template<Argument max_table_spacing, std::int64_t number_of_tables>
 class MultiTableImplementation {
  public:
-  void Initialize();
+  static constexpr double max_table_spacing = 2.0 / 1024.0;
+  static constexpr std::int64_t number_of_tables = 9;
 
-  Value Sin(Argument x);
-  Value Cos(Argument x);
-
- private:
-  // ArcSin(2^-(n + 1)) for n in [0, 8] rounded towards positive infinity.
-  static constexpr std::array<double, number_of_tables> cutoffs_{
+  // ArcSin(2^-(n + 1)) for n in [0, 8] rounded towards positive infinity.  The
+  // entry at index n has n leading zeroes in the mantissa of its sinus.
+  static constexpr std::array<double, number_of_tables> cutoffs{
       0x1.0C152382D7366p-1,
       0x1.02BE9CE0B87CEp-2,
       0x1.00ABE0C129E1Fp-3,
@@ -73,6 +74,24 @@ class MultiTableImplementation {
       0x1.00002AAABDDDFp-8,
       0x1.00000AAAABDDEp-9};
 
+  // The spacing between arguments above each cutoff.
+  static constexpr std::array<double, number_of_tables> table_spacings{
+      ScaleB(max_table_spacing, 0),
+      ScaleB(max_table_spacing, -1),
+      ScaleB(max_table_spacing, -2),
+      ScaleB(max_table_spacing, -3),
+      ScaleB(max_table_spacing, -4),
+      ScaleB(max_table_spacing, -5),
+      ScaleB(max_table_spacing, -6),
+      ScaleB(max_table_spacing, -7),
+      ScaleB(max_table_spacing, -8)};
+
+  void Initialize();
+
+  Value Sin(Argument x);
+  Value Cos(Argument x);
+
+ private:
   // Despite the name these are not accurate values, but for the purposes of
   // benchmarking it doesn't matter.
   struct AccurateValues {
@@ -81,12 +100,14 @@ class MultiTableImplementation {
     Value cos_x;
   };
 
-  //TODO(phl)templatize
   Value SinPolynomial(Argument x);
-  Value CosPolynomial(Argument x);
+  // |i| is the index of the binade in |cutoffs_|,
+  Value CosPolynomial(std::int64_t i, Argument x);
 
+  // Because the interval [π / 6, π / 4] is shorter than the next, the maximum
+  // value is reached between the first two cutoffs.
   static constexpr std::int64_t table_size =
-      static_cast<std::int64_t>((2 * x_min - x_min) / max_table_spacing);
+      static_cast<std::int64_t>((cutoffs[0] - cutoffs[1]) / table_spacings[1]);
 
   std::array<std::int64_t, number_of_tables> one_over_table_spacings_;
   std::array<std::array<AccurateValues, table_size>, number_of_tables>
@@ -140,7 +161,8 @@ Value TableSpacingImplementation<table_spacing>::Cos(Argument const x) {
 }
 
 template<Argument table_spacing>
-Value TableSpacingImplementation<table_spacing>::SinPolynomial(Argument const x) {
+Value TableSpacingImplementation<table_spacing>::SinPolynomial(
+    Argument const x) {
   if constexpr (table_spacing == 2.0 / 256.0) {
     // 71 bits.
     return -0.166666666666666666666421797625 +
@@ -153,7 +175,8 @@ Value TableSpacingImplementation<table_spacing>::SinPolynomial(Argument const x)
 }
 
 template<Argument table_spacing>
-Value TableSpacingImplementation<table_spacing>::CosPolynomial(Argument const x) {
+Value TableSpacingImplementation<table_spacing>::CosPolynomial(
+    Argument const x) {
   if constexpr (table_spacing == 2.0 / 256.0) {
     // 77 bits.
     return -0.499999999999999999999999974543 +
@@ -166,43 +189,37 @@ Value TableSpacingImplementation<table_spacing>::CosPolynomial(Argument const x)
   }
 }
 
-template<Argument max_table_spacing, std::int64_t number_of_tables>
-void MultiTableImplementation<max_table_spacing, number_of_tables>::
-Initialize() {
+void MultiTableImplementation::Initialize() {
   Argument current_x_max = x_max;
-  Argument current_x_min = cutoffs_[0];
-  Argument current_table_spacing = max_table_spacing;
+  Argument current_x_min;
   for (std::int64_t i = 0; i < number_of_tables; ++i) {
-    one_over_table_spacings_[i] = 1.0 / current_table_spacing;
-    Argument x = current_x_min;
+    current_x_min = cutoffs[i];
+    one_over_table_spacings_[i] = 1.0 / table_spacings[i];
+    Argument x = current_x_min + table_spacings[i] / 2;
     for (std::int64_t j = 0; j < table_size && x < current_x_max; ++j) {
       accurate_values_[i][j] = {.x = x,
                                 .sin_x = std::sin(x),
                                 .cos_x = std::cos(x)};
-      x += current_table_spacing;
+      x += table_spacings[i];
     }
     current_x_max = current_x_min;
-    current_x_min /= 2;
-    current_table_spacing /= 2;
   }
 }
 
-template<Argument max_table_spacing, std::int64_t number_of_tables>
 FORCE_INLINE(inline)
-Value MultiTableImplementation<max_table_spacing, number_of_tables>::
-Sin(Argument const x) {
+Value MultiTableImplementation::Sin(Argument const x) {
   // Because the intervals are unequal, this loop does on average 2.28
   // comparisons, which is better than a binary tree.
   std::int64_t i = -1;
-  for (std::int64_t k = 0; k < cutoffs_.size(); ++k) {
-    if (cutoffs_[k] <= x) {
+  for (std::int64_t k = 0; k < cutoffs.size(); ++k) {
+    if (cutoffs[k] <= x) {
       i = k;
       break;
     }
   }
 
-  Argument const x_minus_cutoff = x - cutoffs_[i]
-  auto const j = static_cast<std::int64_t>((x_minus_cutoff) *
+  Argument const x_minus_cutoff = x - cutoffs[i];
+  auto const j = static_cast<std::int64_t>(x_minus_cutoff *
                                            one_over_table_spacings_[i]);
   auto const& accurate_values = accurate_values_[i][j];
   auto const& x₀ = accurate_values.x;
@@ -212,19 +229,25 @@ Sin(Argument const x) {
   auto const h² = h * h;
   auto const h³ = h² * h;
   auto const sin_x₀_plus_h_cos_x₀ = TwoProductAdd(cos_x₀, h, sin_x₀);
-  return sin_x₀_plus_h_cos_x₀.value +
-         ((sin_x₀ * h² * CosPolynomial(h²) + cos_x₀ * h³ * SinPolynomial(h²)) +
-          sin_x₀_plus_h_cos_x₀.error);
+  return sin_x₀_plus_h_cos_x₀.value + ((sin_x₀ * h² * CosPolynomial(i, h²) +
+                                        cos_x₀ * h³ * SinPolynomial(h²)) +
+                                       sin_x₀_plus_h_cos_x₀.error);
 }
 
-template<Argument max_table_spacing, std::int64_t number_of_tables>
 FORCE_INLINE(inline)
-Value MultiTableImplementation<max_table_spacing, number_of_tables>::
-Cos(Argument const x) {
-  int x_exponent;
-  auto const x_mantissa = std::frexp(x, &x_exponent);
-  auto const i = number_of_tables + x_exponent;
-  auto const j = static_cast<std::int64_t>((x_mantissa - 0.5) *
+Value MultiTableImplementation::Cos(Argument const x) {
+  // Because the intervals are unequal, this loop does on average 2.28
+  // comparisons, which is better than a binary tree.
+  std::int64_t i = -1;
+  for (std::int64_t k = 0; k < cutoffs.size(); ++k) {
+    if (cutoffs[k] <= x) {
+      i = k;
+      break;
+    }
+  }
+
+  Argument const x_minus_cutoff = x - cutoffs[i];
+  auto const j = static_cast<std::int64_t>(x_minus_cutoff *
                                            one_over_table_spacings_[i]);
   auto const& accurate_values = accurate_values_[i][j];
   auto const& x₀ = accurate_values.x;
@@ -234,25 +257,33 @@ Cos(Argument const x) {
   auto const h² = h * h;
   auto const h³ = h² * h;
   auto const cos_x₀_minus_h_sin_x₀ = TwoProductNegatedAdd(sin_x₀, h, cos_x₀);
-  return cos_x₀_minus_h_sin_x₀.value +
-         ((cos_x₀ * h² * CosPolynomial(h²) - sin_x₀ * h³ * SinPolynomial(h²)) +
-          cos_x₀_minus_h_sin_x₀.error);
+  return cos_x₀_minus_h_sin_x₀.value + ((cos_x₀ * h² * CosPolynomial(i, h²) -
+                                         sin_x₀ * h³ * SinPolynomial(h²)) +
+                                        cos_x₀_minus_h_sin_x₀.error);
 }
 
-template<Argument max_table_spacing, std::int64_t number_of_tables>
-Value MultiTableImplementation<max_table_spacing, number_of_tables>::
-SinPolynomial(Argument const x) {
+Value MultiTableImplementation::SinPolynomial(Argument const x) {
   // 85 bits.
   return -0.166666666666666666666666651721 +
          0.00833333316093951937646271666739 * x;
 }
 
-template<Argument max_table_spacing, std::int64_t number_of_tables>
-Value MultiTableImplementation<max_table_spacing, number_of_tables>::
-CosPolynomial(Argument const x) {
-  // 72 bits.
-  return -0.499999999999999999999872434553 +
-          0.0416666654823785864634569932662 * x;
+Value MultiTableImplementation::CosPolynomial(std::int64_t const i,
+                                              Argument const x) {
+  // i == 1 goes first because it is the largest argument interval.
+  if (i == 1) {
+    // 78 bits.
+    return -0.499999999999999999999998006790 +
+           0.0416666663705946726372008045758 * x;
+  } else if (i == 0) {
+    // 72 bits.
+    return -0.499999999999999999999872434553 +
+           0.0416666654823785864634569932662 * x;
+  } else {
+    // 84 bits.
+    return -0.499999999999999999999999968856 +
+           0.0416666665926486697856340784849 * x;
+  }
 }
 
 template<Argument table_spacing>
@@ -313,13 +344,14 @@ void BM_ExperimentCosTableSpacing(benchmark::State& state) {
   }
 }
 
-template<Argument max_table_spacing, std::int64_t number_of_tables>
 void BM_ExperimentSinMultiTable(benchmark::State& state) {
   std::mt19937_64 random(42);
   std::uniform_real_distribution<> uniformly_at(
-      std::scalbln(x_min, 1 - number_of_tables), x_max);
+      MultiTableImplementation::cutoffs
+          [MultiTableImplementation::number_of_tables - 1],
+      x_max);
 
-  MultiTableImplementation<max_table_spacing, number_of_tables> implementation;
+  MultiTableImplementation implementation;
   implementation.Initialize();
 
   Argument a[number_of_iterations];
@@ -336,20 +368,22 @@ void BM_ExperimentSinMultiTable(benchmark::State& state) {
       // The implementation is not accurate, but let's check that it's not
       // broken.
       auto const absolute_error = Abs(v[i] - std::sin(a[i]));
-      CHECK_LT(absolute_error, 5.6e-17);
+      //LOG(ERROR)<<absolute_error;
+      CHECK_LT(absolute_error, 1.2e-16);
 #endif
     }
     benchmark::DoNotOptimize(v);
   }
 }
 
-template<Argument max_table_spacing, std::int64_t number_of_tables>
 void BM_ExperimentCosMultiTable(benchmark::State& state) {
   std::mt19937_64 random(42);
   std::uniform_real_distribution<> uniformly_at(
-      std::scalbln(x_min, 1 - number_of_tables), x_max);
+      MultiTableImplementation::cutoffs
+          [MultiTableImplementation::number_of_tables - 1],
+      x_max);
 
-  MultiTableImplementation<max_table_spacing, number_of_tables> implementation;
+  MultiTableImplementation implementation;
   implementation.Initialize();
 
   Argument a[number_of_iterations];
@@ -366,6 +400,7 @@ void BM_ExperimentCosMultiTable(benchmark::State& state) {
       // The implementation is not accurate, but let's check that it's not
       // broken.
       auto const absolute_error = Abs(v[i] - std::cos(a[i]));
+      //LOG(ERROR)<<absolute_error;
       CHECK_LT(absolute_error, 1.2e-16);
 #endif
     }
@@ -381,10 +416,8 @@ BENCHMARK_TEMPLATE(BM_ExperimentCosTableSpacing, 2.0 / 256.0)
     ->Unit(benchmark::kNanosecond);
 BENCHMARK_TEMPLATE(BM_ExperimentCosTableSpacing, 2.0 / 1024.0)
     ->Unit(benchmark::kNanosecond);
-BENCHMARK_TEMPLATE(BM_ExperimentSinMultiTable, 2.0 / 1024.0, 9)
-    ->Unit(benchmark::kNanosecond);
-BENCHMARK_TEMPLATE(BM_ExperimentCosMultiTable, 2.0 / 256.0, 9)
-    ->Unit(benchmark::kNanosecond);
+BENCHMARK(BM_ExperimentSinMultiTable)->Unit(benchmark::kNanosecond);
+BENCHMARK(BM_ExperimentCosMultiTable)->Unit(benchmark::kNanosecond);
 
 }  // namespace functions
 }  // namespace principia
-Original file line number
+Diff line change
@@ Expand Up / @@ -123,3 +123,6 @@ unity/ @@
     # Visual Studio Code
     .vscode/
+    # Nuget
+    .nuget/