Skip to content

Commit

Permalink
Merge pull request mockingbirdnest#4096 from pleroy/TheRightTables
Browse files Browse the repository at this point in the history
Let's generate the tables that we actually need for the Gal algorithm
  • Loading branch information
pleroy authored Sep 15, 2024
2 parents 6d143b1 + 0470466 commit 0864ed9
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 86 deletions.
17 changes: 7 additions & 10 deletions functions/accurate_table_generator_body.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -365,8 +365,9 @@ absl::StatusOr<cpp_rational> StehléZimmermannSimultaneousSearch(
// Preliminary: shift and rescale the functions and the polynomials:
// Fᵢ = N fᵢ(t / N)
// Pᵢ = N pᵢ(t / N)
// We don't reify Pᵢ as that would require an extra call to `Compose`.
// Instead, we directly compute P̃ᵢ below.
std::array<AccurateFunction, 2> F;
std::array<std::optional<AccuratePolynomial<cpp_rational, 2>>, 2> P;
for (std::int64_t i = 0; i < functions.size(); ++i) {
F[i] = [&functions, i, N, &starting_argument](cpp_rational const& t) {
// Here |t| ≤ T.
Expand All @@ -385,12 +386,6 @@ absl::StatusOr<cpp_rational> StehléZimmermannSimultaneousSearch(
return starting_argument + cpp_rational(status_or_t.value(), N);
}

AccuratePolynomial<cpp_rational, 1> const shift_and_rescale(
{starting_argument, cpp_rational(1, N)});
for (std::int64_t i = 0; i < polynomials.size(); ++i) {
P[i] = N * Compose(polynomials[i], shift_and_rescale);
}

// Step 2: compute ε. We use the remainders provided by the clients. Note
// that we could save on the number of evaluations by providing both bounds to
// a single call.
Expand All @@ -413,9 +408,11 @@ absl::StatusOr<cpp_rational> StehléZimmermannSimultaneousSearch(

// Step 3, second part: compute P̃
std::array<std::optional<AccuratePolynomial<cpp_int, 2>>, 2> P̃;
AccuratePolynomial<cpp_rational, 1> const Tτ({0, T});
for (std::int64_t i = 0; i < P.size(); ++i) {
auto const composition_coefficients = Compose(C * *P[i], Tτ).coefficients();
AccuratePolynomial<cpp_rational, 1> const shift_and_rescale(
{starting_argument, cpp_rational(T, N)});
for (std::int64_t i = 0; i < polynomials.size(); ++i) {
auto const composition_coefficients =
Compose(C * (N * polynomials[i]), shift_and_rescale).coefficients();
AccuratePolynomial<cpp_int, 2>::Coefficients P̃_coefficients;
for_all_of(composition_coefficients, P̃_coefficients)
.loop([](auto const& composition_coefficient, auto& P̃_coefficient) {
Expand Down
136 changes: 67 additions & 69 deletions functions/accurate_table_generator_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -430,77 +430,75 @@ TEST_F(AccurateTableGeneratorTest, StehléZimmermannMultisearchSinCos15) {
}

TEST_F(AccurateTableGeneratorTest, DISABLED_SECULAR_SinCos18) {
for (std::int64_t n = 0; n < 1; ++n) {
Logger logger(TEMP_DIR / absl::StrCat("sin_cos_18_", n, ".wl"),
/*make_unique=*/false);

// Process the binade [1 / 2^(n + 1), 1 / 2^n[ (except that for n = 0 the
// upper bound is π / 4).
double const lower_bound = std::asin(1.0 / (1 << (n + 1)));
double const upper_bound = n == 0 ? π / 4 : std::asin(1.0 / (1 << n));

double const h = 1.0 / (1 << (n + 10));
double const h_over_2 = h / 2.0;

std::vector<cpp_rational> starting_arguments;
std::vector<std::array<AccuratePolynomial<cpp_rational, 2>, 2>> polynomials;
std::vector<std::array<AccurateFunction, 2>> remainders;
for (std::int64_t i = std::floor(lower_bound / h_over_2);
i <= std::ceil(upper_bound / h_over_2);
++i) {
// The arguments are odd multiples of h/2.
if (i % 2 == 1) {
double const x₀ = i * h_over_2;
if (lower_bound <= x₀ && x₀ < upper_bound) {
AccuratePolynomial<cpp_rational, 2> const sin_taylor2(
{cpp_rational(Sin(x₀)),
cpp_rational(Cos(x₀)),
-cpp_rational(Sin(x₀)) / 2},
x₀);
AccuratePolynomial<cpp_rational, 2> const cos_taylor2(
{cpp_rational(Cos(x₀)),
-cpp_rational(Sin(x₀)),
-cpp_rational(Cos(x₀) / 2)},
x₀);

// The remainders don't need to be extremely precise, so for speed
// they are computed using double.
auto const remainder_sin_taylor2 =
[x₀ = static_cast<double>(cpp_rational(x₀))](
cpp_rational const& x) {
auto const Δx = static_cast<double>(x) - x₀;
auto const Δx³ = Δx * Δx * Δx;
return -Δx³ * -std::cos(std::min(x₀ + Δx, x₀)) / Factorial(3);
};
auto const remainder_cos_taylor2 =
[x₀ = static_cast<double>(cpp_rational(x₀))](
cpp_rational const& x) {
auto const Δx = static_cast<double>(x) - x₀;
auto const Δx³ = Δx * Δx * Δx;
return Δx³ * std::sin(std::max(x₀ + Δx, x₀)) / Factorial(3);
};

starting_arguments.push_back(x₀);
polynomials.push_back({sin_taylor2, cos_taylor2});
remainders.push_back({remainder_sin_taylor2, remainder_cos_taylor2});
}
}
}
Logger logger(TEMP_DIR / absl::StrCat("sin_cos_18.wl"),
/*make_unique=*/false);

// The radius of each interval.
double const h = 1.0 / (1 << 10);

// The centre of the interval with index `i`.
auto const centre = [h](std::int64_t const i) { return 2 * i * h; };

// The index of the first interval, which starts at `h` with a centre at
// `2 * h`.
std::int64_t const i_min = 1;

// The index of the last interval, which goes a bit beyond π / 4.
std::int64_t i_max = std::ceil(π / (8 * h) - 0.5);

// Check that the last interval straddles π / 4.
CHECK_LT(centre(i_max) - h, π / 4);
CHECK_LT(π / 4, centre(i_max) + h);

std::vector<cpp_rational> starting_arguments;
std::vector<std::array<AccuratePolynomial<cpp_rational, 2>, 2>> polynomials;
std::vector<std::array<AccurateFunction, 2>> remainders;
for (std::int64_t i = i_min; i <= i_max; ++i) {
double const x₀ = centre(i);
AccuratePolynomial<cpp_rational, 2> const sin_taylor2(
{cpp_rational(Sin(x₀)),
cpp_rational(Cos(x₀)),
-cpp_rational(Sin(x₀)) / 2},
x₀);
AccuratePolynomial<cpp_rational, 2> const cos_taylor2(
{cpp_rational(Cos(x₀)),
-cpp_rational(Sin(x₀)),
-cpp_rational(Cos(x₀) / 2)},
x₀);

StehléZimmermannSimultaneousStreamingMultisearch<18>(
{Sin, Cos},
polynomials,
remainders,
starting_arguments,
[n, &logger](std::int64_t const index,
absl::StatusOr<cpp_rational> status_or_x) {
auto const& x = status_or_x.value();
logger.Set(
absl::StrCat("accurateTables[", n, ",", index, "]"),
std::tuple{static_cast<cpp_bin_float_50>(x), Sin(x), Cos(x)});
logger.FlushAndClear();
});
// The remainders don't need to be extremely precise, so for speed
// they are computed using double.
auto const remainder_sin_taylor2 =
[x₀ = static_cast<double>(cpp_rational(x₀))](cpp_rational const& x) {
auto const Δx = static_cast<double>(x) - x₀;
auto const Δx³ = Δx * Δx * Δx;
return -Δx³ * -std::cos(std::min(x₀ + Δx, x₀)) / Factorial(3);
};
auto const remainder_cos_taylor2 =
[x₀ = static_cast<double>(cpp_rational(x₀))](cpp_rational const& x) {
auto const Δx = static_cast<double>(x) - x₀;
auto const Δx³ = Δx * Δx * Δx;
return Δx³ * std::sin(std::max(x₀ + Δx, x₀)) / Factorial(3);
};

starting_arguments.push_back(x₀);
polynomials.push_back({sin_taylor2, cos_taylor2});
remainders.push_back({remainder_sin_taylor2, remainder_cos_taylor2});
}

StehléZimmermannSimultaneousStreamingMultisearch<18>(
{Sin, Cos},
polynomials,
remainders,
starting_arguments,
[i_min, &logger](std::int64_t const index,
absl::StatusOr<cpp_rational> status_or_x) {
auto const& x = status_or_x.value();
logger.Set(
absl::StrCat("accurateTables[", index + i_min, "]"),
std::tuple{static_cast<cpp_bin_float_50>(x), Sin(x), Cos(x)});
logger.FlushAndClear();
});
}

#endif
Expand Down
14 changes: 7 additions & 7 deletions numerics/polynomial_in_monomial_basis_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ TEST_F(PolynomialInMonomialBasisTest, Monoid) {
-4 * Ampere / Kelvin}, 3 * Kelvin);
P2A const p2a({1 * Kelvin,
3 * Kelvin / Second,
-8 * Kelvin / Second / Second}, t0);
-8 * Kelvin / Second / Second}, t0 + 4 * Second);
P2V const p2v({1 * Kelvin,
3 * Kelvin / Second,
-8 * Kelvin / Second / Second});
Expand All @@ -303,36 +303,36 @@ TEST_F(PolynomialInMonomialBasisTest, Monoid) {
{
auto const actual_a = pa(t0 + 0 * Second);
auto const actual_v = pv(0 * Second);
EXPECT_THAT(actual_a, AlmostEquals(2 * Ampere, 0));
EXPECT_THAT(actual_a, AlmostEquals(-2'627'098 * Ampere, 0));
EXPECT_THAT(actual_v, AlmostEquals(2 * Ampere, 0));
}
{
auto const actual_a = pa(t0 + 1 * Second);
auto const actual_v = pv(1 * Second);
EXPECT_THAT(actual_a, AlmostEquals(2 * Ampere, 0));
EXPECT_THAT(actual_a, AlmostEquals(-492'478 * Ampere, 0));
EXPECT_THAT(actual_v, AlmostEquals(2 * Ampere, 0));
}
{
auto const actual_a = pa(t0 - 1 * Second);
auto const actual_v = pv(-1 * Second);
EXPECT_THAT(actual_a, AlmostEquals(-658 * Ampere, 0));
EXPECT_THAT(actual_a, AlmostEquals(-9'662'098 * Ampere, 0));
EXPECT_THAT(actual_v, AlmostEquals(-658 * Ampere, 0));
}
{
auto const actual_a = pa(t0 + 2 * Second);
auto const actual_v = pv(2 * Second);
EXPECT_THAT(actual_a, AlmostEquals(-13648 * Ampere, 0));
EXPECT_THAT(actual_a, AlmostEquals(-46'396 * Ampere, 0));
EXPECT_THAT(actual_v, AlmostEquals(-13648 * Ampere, 0));
}
{
auto const actual_a = pa(t0 - 2 * Second);
auto const actual_v = pv(-2 * Second);
EXPECT_THAT(actual_a, AlmostEquals(-46396 * Ampere, 0));
EXPECT_THAT(actual_a, AlmostEquals(-28'092'328 * Ampere, 0));
EXPECT_THAT(actual_v, AlmostEquals(-46396 * Ampere, 0));
}
{
auto const actual = Compose(p1a, p2a)(t0 + 1 * Second);
EXPECT_THAT(actual, AlmostEquals(30 * Ampere, 0));
EXPECT_THAT(actual, AlmostEquals(334 * Ampere, 0));
}
}

Expand Down

0 comments on commit 0864ed9

Please sign in to comment.