diff --git a/benchmarks/elementary_functions_benchmark.cpp b/benchmarks/elementary_functions_benchmark.cpp index f0f119c87f..3c907e6780 100644 --- a/benchmarks/elementary_functions_benchmark.cpp +++ b/benchmarks/elementary_functions_benchmark.cpp @@ -17,14 +17,17 @@ namespace functions { using namespace principia::benchmarks::_metric; using namespace principia::numerics::_sin_cos; +namespace sin_cos = principia::numerics::_sin_cos; -static constexpr std::int64_t number_of_iterations = 1000; +constexpr std::int64_t number_of_iterations = 1000; template void BM_EvaluateElementaryFunction(benchmark::State& state) { using Value = double; using Argument = double; + sin_cos::StaticInitialization(); + std::mt19937_64 random(42); std::uniform_real_distribution<> uniformly_at(-2 * π, 2 * π); diff --git a/functions/sin_cos_test.cpp b/functions/sin_cos_test.cpp index 40f7b5b819..b96bcda3b9 100644 --- a/functions/sin_cos_test.cpp +++ b/functions/sin_cos_test.cpp @@ -26,9 +26,14 @@ using namespace principia::functions::_multiprecision; using namespace principia::numerics::_next; using namespace principia::numerics::_sin_cos; using namespace principia::testing_utilities::_almost_equals; +namespace sin_cos = principia::numerics::_sin_cos; class SinCosTest : public ::testing::Test { protected: + static void SetUpTestCase() { + sin_cos::StaticInitialization(); + } + double a_ = 1.0; }; diff --git a/nanobenchmarks/main.cpp b/nanobenchmarks/main.cpp index ce1fbf1d8e..7431d4a65d 100644 --- a/nanobenchmarks/main.cpp +++ b/nanobenchmarks/main.cpp @@ -24,6 +24,7 @@ #include "nanobenchmarks/function_registry.hpp" #include "nanobenchmarks/microarchitectures.hpp" #include "nanobenchmarks/performance_settings_controller.hpp" +#include "numerics/sin_cos.hpp" #include "testing_utilities/statistics.hpp" @@ -86,7 +87,9 @@ using namespace principia::mathematica::_mathematica; using namespace principia::nanobenchmarks::_function_registry; using namespace principia::nanobenchmarks::_microarchitectures; using namespace principia::nanobenchmarks::_performance_settings_controller; +using namespace principia::numerics::_sin_cos; using namespace principia::testing_utilities::_statistics; +namespace sin_cos = principia::numerics::_sin_cos; struct LatencyDistributionTable { double min; @@ -209,6 +212,7 @@ std::size_t FormattedWidth(std::string const& s) { } void Main() { + sin_cos::StaticInitialization(); std::regex const name_matcher(absl::GetFlag(FLAGS_benchmark_filter)); auto controller = PerformanceSettingsController::New(); std::unique_ptr logger; diff --git a/numerics/sin_cos.cpp b/numerics/sin_cos.cpp index 04f81d24ac..a4cb6f197b 100644 --- a/numerics/sin_cos.cpp +++ b/numerics/sin_cos.cpp @@ -29,7 +29,7 @@ using namespace principia::quantities::_elementary_functions; #define OSACA_ANALYSED_FUNCTION Cos #define OSACA_ANALYSED_FUNCTION_NAMESPACE -#define OSACA_ANALYSED_FUNCTION_TEMPLATE_PARAMETERS +#define OSACA_ANALYSED_FUNCTION_TEMPLATE_PARAMETERS #define UNDER_OSACA_HYPOTHESES(expression) \ [&] { \ constexpr bool UseHardwareFMA = true; \ @@ -74,14 +74,24 @@ constexpr Argument mantissa_reduce_shifter = template using Polynomial1 = HornerEvaluator; +// Pointers used for indirect calls, set by `StaticInitialization`. +Value (__cdecl *cos)(Argument θ) = nullptr; +Value (__cdecl *sin)(Argument θ) = nullptr; + +// Forward declarations needed by the OSACA macros. +template +Value __cdecl Sin(Argument θ); +template +Value __cdecl Cos(Argument θ); + namespace masks { -static const __m128d sign_bit = +__m128d const sign_bit = _mm_castsi128_pd(_mm_cvtsi64_si128(0x8000'0000'0000'0000)); -static const __m128d exponent_bits = +__m128d const exponent_bits = _mm_castsi128_pd(_mm_cvtsi64_si128(0x7ff0'0000'0000'0000)); -static const __m128d mantissa_bits = +__m128d const mantissa_bits = _mm_castsi128_pd(_mm_cvtsi64_si128(0x000f'ffff'ffff'ffff)); -static const __m128d mantissa_index_bits = +__m128d const mantissa_index_bits = _mm_castsi128_pd(_mm_cvtsi64_si128(0x0000'0000'0000'01ff)); } // namespace masks @@ -298,25 +308,17 @@ Value CosImplementation(DoublePrecision const θ_reduced) { return DetectDangerousRounding(cos_x₀_minus_h_sin_x₀.value, polynomial_term); } +template Value __cdecl Sin(Argument θ) { - OSACA_FUNCTION_BEGIN(θ); + OSACA_FUNCTION_BEGIN(θ, ); DoublePrecision θ_reduced; std::int64_t quadrant; double value; - OSACA_IF(UseHardwareFMA) { - Reduce(θ, θ_reduced, quadrant); - OSACA_IF(quadrant & 0b1) { - value = CosImplementation(θ_reduced); - } else { - value = SinImplementation(θ_reduced); - } + Reduce(θ, θ_reduced, quadrant); + OSACA_IF(quadrant & 0b1) { + value = CosImplementation(θ_reduced); } else { - Reduce(θ, θ_reduced, quadrant); - OSACA_IF(quadrant & 0b1) { - value = CosImplementation(θ_reduced); - } else { - value = SinImplementation(θ_reduced); - } + value = SinImplementation(θ_reduced); } OSACA_IF(value != value) { OSACA_RETURN(cr_sin(θ)); @@ -327,26 +329,17 @@ Value __cdecl Sin(Argument θ) { } } +template Value __cdecl Cos(Argument θ) { - OSACA_FUNCTION_BEGIN(θ); + OSACA_FUNCTION_BEGIN(θ, ); DoublePrecision θ_reduced; std::int64_t quadrant; double value; - OSACA_IF(UseHardwareFMA) { - Reduce(θ, θ_reduced, quadrant); - OSACA_IF(quadrant & 0b1) { - value = SinImplementation(θ_reduced); - } else { - value = CosImplementation(θ_reduced); - } + Reduce(θ, θ_reduced, quadrant); + OSACA_IF(quadrant & 0b1) { + value = SinImplementation(θ_reduced); } else { - Reduce(θ, θ_reduced, quadrant); - OSACA_IF(quadrant & 0b1) { - value = SinImplementation(θ_reduced); - } else { - value = CosImplementation(θ_reduced); - } + value = CosImplementation(θ_reduced); } OSACA_IF(value != value) { OSACA_RETURN(cr_cos(θ)); @@ -357,6 +350,24 @@ Value __cdecl Cos(Argument θ) { } } +void StaticInitialization() { + if (UseHardwareFMA) { + cos = &Cos; + sin = &Sin; + } else { + cos = &Cos; + sin = &Sin; + } +} + +Value __cdecl Sin(Argument const θ) { + return sin(θ); +} + +Value __cdecl Cos(Argument const θ) { + return cos(θ); +} + } // namespace internal } // namespace _sin_cos } // namespace numerics diff --git a/numerics/sin_cos.hpp b/numerics/sin_cos.hpp index b33a11a7c1..b098aa34e1 100644 --- a/numerics/sin_cos.hpp +++ b/numerics/sin_cos.hpp @@ -7,6 +7,8 @@ namespace numerics { namespace _sin_cos { namespace internal { +void StaticInitialization(); + double __cdecl Sin(double x); double __cdecl Cos(double x); @@ -14,6 +16,7 @@ double __cdecl Cos(double x); using internal::Cos; using internal::Sin; +using internal::StaticInitialization; } // namespace _sin_cos } // namespace numerics