From 6065459c5cacfb1f09b333b8938aa01cd3be6a78 Mon Sep 17 00:00:00 2001 From: pleroy Date: Sun, 12 Jan 2025 00:03:00 +0100 Subject: [PATCH 1/5] Do not preserve the sign during reduction if evaluating a Cos. --- numerics/sin_cos.cpp | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/numerics/sin_cos.cpp b/numerics/sin_cos.cpp index 4d7086d439..3be8c4b72d 100644 --- a/numerics/sin_cos.cpp +++ b/numerics/sin_cos.cpp @@ -151,7 +151,7 @@ inline double DetectDangerousRounding(double const x, double const Δx) { } } -template +template inline void Reduce(Argument const θ, DoublePrecision& θ_reduced, std::int64_t& quadrant) { @@ -167,14 +167,19 @@ inline void Reduce(Argument const θ, // vicinity of π / 4 to the vicinity of -π / 4 with appropriate adjustment // of the quadrant. __m128d const sign = _mm_and_pd(masks::sign_bit, _mm_set_sd(θ)); - double const n_shifted = - FusedMultiplyAdd(abs_θ, (2 / π), mantissa_reduce_shifter); - double const n_double = _mm_cvtsd_f64( - _mm_xor_pd(_mm_set_sd(n_shifted - mantissa_reduce_shifter), sign)); + double n_double = + FusedMultiplyAdd(abs_θ, (2 / π), mantissa_reduce_shifter) - + mantissa_reduce_shifter; + Argument value; + if constexpr (preserve_sign) { + n_double = _mm_cvtsd_f64(_mm_xor_pd(_mm_set_sd(n_double), sign)); + value = FusedNegatedMultiplyAdd(n_double, π_over_2_high, θ); + } else { + value = + FusedNegatedMultiplyAdd(n_double, π_over_2_high, abs_θ); + } std::int64_t const n = _mm_cvtsd_si64(_mm_set_sd(n_double)); - Argument const value = - FusedNegatedMultiplyAdd(n_double, π_over_2_high, θ); Argument const error = n_double * π_over_2_low; θ_reduced = QuickTwoDifference(value, error); // TODO(phl): Error analysis needed to find the right bounds. @@ -294,14 +299,14 @@ Value __cdecl Sin(Argument θ) { std::int64_t quadrant; double value; OSACA_IF(UseHardwareFMA) { - Reduce(θ, θ_reduced, quadrant); + Reduce(θ, θ_reduced, quadrant); OSACA_IF(quadrant & 0b1) { value = CosImplementation(θ_reduced); } else { value = SinImplementation(θ_reduced); } } else { - Reduce(θ, θ_reduced, quadrant); + Reduce(θ, θ_reduced, quadrant); OSACA_IF(quadrant & 0b1) { value = CosImplementation(θ_reduced); } else { @@ -323,14 +328,15 @@ Value __cdecl Cos(Argument θ) { std::int64_t quadrant; double value; OSACA_IF(UseHardwareFMA) { - Reduce(θ, θ_reduced, quadrant); + Reduce(θ, θ_reduced, quadrant); OSACA_IF(quadrant & 0b1) { value = SinImplementation(θ_reduced); } else { value = CosImplementation(θ_reduced); } } else { - Reduce(θ, θ_reduced, quadrant); + Reduce(θ, θ_reduced, quadrant); OSACA_IF(quadrant & 0b1) { value = SinImplementation(θ_reduced); } else { From 55c7c8e4bf8be1712ac1101562534d5852d2de77 Mon Sep 17 00:00:00 2001 From: pleroy Date: Sun, 12 Jan 2025 18:03:55 +0100 Subject: [PATCH 2/5] More symmetry. --- numerics/sin_cos.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/numerics/sin_cos.cpp b/numerics/sin_cos.cpp index 4d7086d439..15a4fcbde2 100644 --- a/numerics/sin_cos.cpp +++ b/numerics/sin_cos.cpp @@ -169,9 +169,10 @@ inline void Reduce(Argument const θ, __m128d const sign = _mm_and_pd(masks::sign_bit, _mm_set_sd(θ)); double const n_shifted = FusedMultiplyAdd(abs_θ, (2 / π), mantissa_reduce_shifter); - double const n_double = _mm_cvtsd_f64( - _mm_xor_pd(_mm_set_sd(n_shifted - mantissa_reduce_shifter), sign)); - std::int64_t const n = _mm_cvtsd_si64(_mm_set_sd(n_double)); + __m128d const n_128d = + _mm_xor_pd(_mm_set_sd(n_shifted - mantissa_reduce_shifter), sign); + double const n_double = _mm_cvtsd_f64(n_128d); + std::int64_t const n = _mm_cvtsd_si64(n_128d); Argument const value = FusedNegatedMultiplyAdd(n_double, π_over_2_high, θ); From 99509024987ed3b81cbc92768ece279342c6beb8 Mon Sep 17 00:00:00 2001 From: pleroy Date: Sun, 26 Jan 2025 16:21:20 +0100 Subject: [PATCH 3/5] Redundant computation. --- numerics/sin_cos.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/numerics/sin_cos.cpp b/numerics/sin_cos.cpp index 3be8c4b72d..a52499a2a1 100644 --- a/numerics/sin_cos.cpp +++ b/numerics/sin_cos.cpp @@ -171,14 +171,16 @@ inline void Reduce(Argument const θ, FusedMultiplyAdd(abs_θ, (2 / π), mantissa_reduce_shifter) - mantissa_reduce_shifter; Argument value; + std::int64_t n; if constexpr (preserve_sign) { n_double = _mm_cvtsd_f64(_mm_xor_pd(_mm_set_sd(n_double), sign)); + n = _mm_cvtsd_si64(_mm_set_sd(n_double)); value = FusedNegatedMultiplyAdd(n_double, π_over_2_high, θ); } else { + n = _mm_cvtsd_si64(_mm_set_sd(n_double)); value = FusedNegatedMultiplyAdd(n_double, π_over_2_high, abs_θ); } - std::int64_t const n = _mm_cvtsd_si64(_mm_set_sd(n_double)); Argument const error = n_double * π_over_2_low; θ_reduced = QuickTwoDifference(value, error); From c5019e3a5ef882c276baad25a9bd27cdd6822a96 Mon Sep 17 00:00:00 2001 From: pleroy Date: Mon, 27 Jan 2025 22:13:34 +0100 Subject: [PATCH 4/5] Comment. --- numerics/sin_cos.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/numerics/sin_cos.cpp b/numerics/sin_cos.cpp index a52499a2a1..90af3e5ae1 100644 --- a/numerics/sin_cos.cpp +++ b/numerics/sin_cos.cpp @@ -170,6 +170,9 @@ inline void Reduce(Argument const θ, double n_double = FusedMultiplyAdd(abs_θ, (2 / π), mantissa_reduce_shifter) - mantissa_reduce_shifter; + + // Don't move the computation of `n` before the loop, it generates some + // extra moves. Argument value; std::int64_t n; if constexpr (preserve_sign) { From 1dc06346c23fa48b34d10155b5d76dabdd5cb932 Mon Sep 17 00:00:00 2001 From: pleroy Date: Mon, 27 Jan 2025 22:18:26 +0100 Subject: [PATCH 5/5] Comment. --- numerics/sin_cos.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/numerics/sin_cos.cpp b/numerics/sin_cos.cpp index 90af3e5ae1..04f81d24ac 100644 --- a/numerics/sin_cos.cpp +++ b/numerics/sin_cos.cpp @@ -171,8 +171,8 @@ inline void Reduce(Argument const θ, FusedMultiplyAdd(abs_θ, (2 / π), mantissa_reduce_shifter) - mantissa_reduce_shifter; - // Don't move the computation of `n` before the loop, it generates some - // extra moves. + // Don't move the computation of `n` after the if, it generates some extra + // moves. Argument value; std::int64_t n; if constexpr (preserve_sign) {