Skip to content
This repository was archived by the owner on Aug 2, 2024. It is now read-only.

Commit cc5b33c

Browse files
committed
Optimize several instructions in plane translation
1 parent f8a2a90 commit cc5b33c

File tree

1 file changed

+5
-4
lines changed

1 file changed

+5
-4
lines changed

public/klein/detail/x86/x86_sandwich.hpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -179,10 +179,11 @@ namespace detail
179179
// a1*b1 + a2*b2 + a3*b3 stored in the low component of tmp
180180
__m128 tmp = hi_dp(a, b);
181181

182-
// Scale by 2
183-
float b0;
184-
_mm_store_ss(&b0, b);
185-
tmp = _mm_mul_ps(tmp, _mm_set_ps(0.f, 0.f, 0.f, 2.f / b0));
182+
__m128 inv_b = rcp_nr1(b);
183+
// 2 / b0
184+
inv_b = _mm_add_ss(inv_b, inv_b);
185+
inv_b = _mm_and_ps(inv_b, _mm_castsi128_ps(_mm_set_epi32(0, 0, 0, -1)));
186+
tmp = _mm_mul_ss(tmp, inv_b);
186187

187188
// Add to the plane
188189
return _mm_add_ps(a, tmp);

0 commit comments

Comments
 (0)