Skip to content

Commit

Permalink
Merge pull request #4164 from pleroy/IndirectCall
Browse files Browse the repository at this point in the history
Use indirect calls for Sin and Cos
  • Loading branch information
pleroy authored Feb 5, 2025
2 parents 44f1ac9 + 307df07 commit 61922c4
Show file tree
Hide file tree
Showing 5 changed files with 61 additions and 35 deletions.
5 changes: 4 additions & 1 deletion benchmarks/elementary_functions_benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,17 @@ namespace functions {

using namespace principia::benchmarks::_metric;
using namespace principia::numerics::_sin_cos;
namespace sin_cos = principia::numerics::_sin_cos;

static constexpr std::int64_t number_of_iterations = 1000;
constexpr std::int64_t number_of_iterations = 1000;

template<Metric metric, double (__cdecl *fn)(double)>
void BM_EvaluateElementaryFunction(benchmark::State& state) {
using Value = double;
using Argument = double;

sin_cos::StaticInitialization();

std::mt19937_64 random(42);
std::uniform_real_distribution<> uniformly_at(-2 * π, 2 * π);

Expand Down
5 changes: 5 additions & 0 deletions functions/sin_cos_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,14 @@ using namespace principia::functions::_multiprecision;
using namespace principia::numerics::_next;
using namespace principia::numerics::_sin_cos;
using namespace principia::testing_utilities::_almost_equals;
namespace sin_cos = principia::numerics::_sin_cos;

class SinCosTest : public ::testing::Test {
protected:
static void SetUpTestCase() {
sin_cos::StaticInitialization();
}

double a_ = 1.0;
};

Expand Down
4 changes: 4 additions & 0 deletions nanobenchmarks/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include "nanobenchmarks/function_registry.hpp"
#include "nanobenchmarks/microarchitectures.hpp"
#include "nanobenchmarks/performance_settings_controller.hpp"
#include "numerics/sin_cos.hpp"
#include "testing_utilities/statistics.hpp"


Expand Down Expand Up @@ -86,7 +87,9 @@ using namespace principia::mathematica::_mathematica;
using namespace principia::nanobenchmarks::_function_registry;
using namespace principia::nanobenchmarks::_microarchitectures;
using namespace principia::nanobenchmarks::_performance_settings_controller;
using namespace principia::numerics::_sin_cos;
using namespace principia::testing_utilities::_statistics;
namespace sin_cos = principia::numerics::_sin_cos;

struct LatencyDistributionTable {
double min;
Expand Down Expand Up @@ -209,6 +212,7 @@ std::size_t FormattedWidth(std::string const& s) {
}

void Main() {
sin_cos::StaticInitialization();
std::regex const name_matcher(absl::GetFlag(FLAGS_benchmark_filter));
auto controller = PerformanceSettingsController::New();
std::unique_ptr<Logger> logger;
Expand Down
79 changes: 45 additions & 34 deletions numerics/sin_cos.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ using namespace principia::quantities::_elementary_functions;

#define OSACA_ANALYSED_FUNCTION Cos
#define OSACA_ANALYSED_FUNCTION_NAMESPACE
#define OSACA_ANALYSED_FUNCTION_TEMPLATE_PARAMETERS
#define OSACA_ANALYSED_FUNCTION_TEMPLATE_PARAMETERS <FMAPolicy::Force>
#define UNDER_OSACA_HYPOTHESES(expression) \
[&] { \
constexpr bool UseHardwareFMA = true; \
Expand Down Expand Up @@ -74,14 +74,24 @@ constexpr Argument mantissa_reduce_shifter =
template<FMAPolicy fma_policy>
using Polynomial1 = HornerEvaluator<Value, Argument, 1, fma_policy>;

// Pointers used for indirect calls, set by `StaticInitialization`.
Value (__cdecl *cos)(Argument θ) = nullptr;
Value (__cdecl *sin)(Argument θ) = nullptr;

// Forward declarations needed by the OSACA macros.
template<FMAPolicy fma_policy>
Value __cdecl Sin(Argument θ);
template<FMAPolicy fma_policy>
Value __cdecl Cos(Argument θ);

namespace masks {
static const __m128d sign_bit =
__m128d const sign_bit =
_mm_castsi128_pd(_mm_cvtsi64_si128(0x8000'0000'0000'0000));
static const __m128d exponent_bits =
__m128d const exponent_bits =
_mm_castsi128_pd(_mm_cvtsi64_si128(0x7ff0'0000'0000'0000));
static const __m128d mantissa_bits =
__m128d const mantissa_bits =
_mm_castsi128_pd(_mm_cvtsi64_si128(0x000f'ffff'ffff'ffff));
static const __m128d mantissa_index_bits =
__m128d const mantissa_index_bits =
_mm_castsi128_pd(_mm_cvtsi64_si128(0x0000'0000'0000'01ff));
} // namespace masks

Expand Down Expand Up @@ -298,25 +308,17 @@ Value CosImplementation(DoublePrecision<Argument> const θ_reduced) {
return DetectDangerousRounding(cos_x₀_minus_h_sin_x₀.value, polynomial_term);
}

template<FMAPolicy fma_policy>
Value __cdecl Sin(Argument θ) {
OSACA_FUNCTION_BEGIN(θ);
OSACA_FUNCTION_BEGIN, <fma_policy>);
DoublePrecision<Argument> θ_reduced;
std::int64_t quadrant;
double value;
OSACA_IF(UseHardwareFMA) {
Reduce<FMAPolicy::Force, /*preserve_sign=*/true>(θ, θ_reduced, quadrant);
OSACA_IF(quadrant & 0b1) {
value = CosImplementation<FMAPolicy::Force>(θ_reduced);
} else {
value = SinImplementation<FMAPolicy::Force>(θ_reduced);
}
Reduce<fma_policy, /*preserve_sign=*/true>(θ, θ_reduced, quadrant);
OSACA_IF(quadrant & 0b1) {
value = CosImplementation<fma_policy>(θ_reduced);
} else {
Reduce<FMAPolicy::Disallow, /*preserve_sign=*/true>(θ, θ_reduced, quadrant);
OSACA_IF(quadrant & 0b1) {
value = CosImplementation<FMAPolicy::Disallow>(θ_reduced);
} else {
value = SinImplementation<FMAPolicy::Disallow>(θ_reduced);
}
value = SinImplementation<fma_policy>(θ_reduced);
}
OSACA_IF(value != value) {
OSACA_RETURN(cr_sin(θ));
Expand All @@ -327,26 +329,17 @@ Value __cdecl Sin(Argument θ) {
}
}

template<FMAPolicy fma_policy>
Value __cdecl Cos(Argument θ) {
OSACA_FUNCTION_BEGIN(θ);
OSACA_FUNCTION_BEGIN, <fma_policy>);
DoublePrecision<Argument> θ_reduced;
std::int64_t quadrant;
double value;
OSACA_IF(UseHardwareFMA) {
Reduce<FMAPolicy::Force, /*preserve_sign=*/false>(θ, θ_reduced, quadrant);
OSACA_IF(quadrant & 0b1) {
value = SinImplementation<FMAPolicy::Force>(θ_reduced);
} else {
value = CosImplementation<FMAPolicy::Force>(θ_reduced);
}
Reduce<fma_policy, /*preserve_sign=*/false>(θ, θ_reduced, quadrant);
OSACA_IF(quadrant & 0b1) {
value = SinImplementation<fma_policy>(θ_reduced);
} else {
Reduce<FMAPolicy::Disallow,
/*preserve_sign=*/false>(θ, θ_reduced, quadrant);
OSACA_IF(quadrant & 0b1) {
value = SinImplementation<FMAPolicy::Disallow>(θ_reduced);
} else {
value = CosImplementation<FMAPolicy::Disallow>(θ_reduced);
}
value = CosImplementation<fma_policy>(θ_reduced);
}
OSACA_IF(value != value) {
OSACA_RETURN(cr_cos(θ));
Expand All @@ -357,6 +350,24 @@ Value __cdecl Cos(Argument θ) {
}
}

void StaticInitialization() {
if (UseHardwareFMA) {
cos = &Cos<FMAPolicy::Force>;
sin = &Sin<FMAPolicy::Force>;
} else {
cos = &Cos<FMAPolicy::Disallow>;
sin = &Sin<FMAPolicy::Disallow>;
}
}

Value __cdecl Sin(Argument const θ) {
return sin(θ);
}

Value __cdecl Cos(Argument const θ) {
return cos(θ);
}

} // namespace internal
} // namespace _sin_cos
} // namespace numerics
Expand Down
3 changes: 3 additions & 0 deletions numerics/sin_cos.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,16 @@ namespace numerics {
namespace _sin_cos {
namespace internal {

void StaticInitialization();

double __cdecl Sin(double x);
double __cdecl Cos(double x);

} // namespace internal

using internal::Cos;
using internal::Sin;
using internal::StaticInitialization;

} // namespace _sin_cos
} // namespace numerics
Expand Down

0 comments on commit 61922c4

Please sign in to comment.