Fix rounding in SSE conversion functions (#25130)

juj · web-flow · commit e57726cccd5b · 2025-09-02T22:38:21.000Z
Value 3.6 was not that interesting float, replaced it with the more interesting 1022.99998194495 from #25129. Fixes: #25129
diff --git a/system/include/compat/emmintrin.h b/system/include/compat/emmintrin.h
@@ -385,7 +385,7 @@ _mm_cvtpd_epi32(__m128d __a)
   {
     double e = __a[i];
     int x = lrint(e);
-    if ((x != 0 || fabs(e) < 2.0) && !isnan(e) && e <= INT_MAX && e >= INT_MIN)
+    if (e <= INT_MAX && e >= INT_MIN && (x != 0 || fabs(e) < 2.0))
       m[i] = x;
     else
       m[i] = (int)0x80000000;
@@ -399,7 +399,7 @@ _mm_cvtsd_si32(__m128d __a)
   // TODO: OPTIMIZE!
   double e = __a[0];
   int x = lrint(e);
-  if ((x != 0 || fabs(e) < 2.0) && !isnan(e) && e <= INT_MAX && e >= INT_MIN)
+  if (e <= INT_MAX && e >= INT_MIN && (x != 0 || fabs(e) < 2.0))
     return x;
   else
     return (int)0x80000000;
@@ -433,11 +433,11 @@ _mm_cvttpd_epi32(__m128d __a)
   int m[2];
   for(int i = 0; i < 2; ++i)
   {
-    float elem = __a[i];
-    if ((lrint(elem) != 0 || fabs(elem) < 2.0) && !isnanf(elem) && elem <= INT_MAX && elem >= INT_MIN)
+    double elem = __a[i];
+    if (elem < 2147483648.0 && elem >= -2147483648.0 && (lrint(elem) != 0 || fabs(elem) < 2.0))
       // Use the trapping instruction here since we have explicit bounds checks
       // above.
-      m[i] = __builtin_wasm_trunc_s_i32_f32(elem);
+      m[i] = __builtin_wasm_trunc_s_i32_f64(elem);
     else
       m[i] = (int)0x80000000;
   }
@@ -448,11 +448,11 @@ static __inline__ int __attribute__((__always_inline__, __nodebug__))
 _mm_cvttsd_si32(__m128d __a)
 {
   // TODO: OPTIMIZE!
-  float elem = __a[0];
-  if ((lrint(elem) != 0 || fabs(elem) < 2.0) && !isnanf(elem) && elem <= INT_MAX && elem >= INT_MIN)
+  double elem = __a[0];
+  if (elem < 2147483648.0 && elem >= -2147483648.0 && (lrint(elem) != 0 || fabs(elem) < 2.0))
     // Use the trapping instruction here since we have explicit bounds checks
     // above.
-    return __builtin_wasm_trunc_s_i32_f32(elem);
+    return __builtin_wasm_trunc_s_i32_f64(elem);
   else
     return (int)0x80000000;
 }
@@ -1010,7 +1010,7 @@ _mm_cvtsd_si64(__m128d __a)
   double e = __a[0];
   if (isnan(e) || isinf(e)) return 0x8000000000000000LL;
   long long x = llrint(e);
-  if (x != 0xFFFFFFFF00000000ULL && (x != 0 || fabs(e) < 2.f) && e <= LLONG_MAX && e >= LLONG_MIN)
+  if (e <= LLONG_MAX && e >= LLONG_MIN && (x != 0 || fabs(e) < 2.f))
     return x;
   else
     return 0x8000000000000000LL;
@@ -1023,10 +1023,10 @@ _mm_cvttsd_si64(__m128d __a)
   double e = __a[0];
   if (isnan(e) || isinf(e) || e > LLONG_MAX || e < LLONG_MIN) return 0x8000000000000000LL;
   long long x = llrint(e);
-  if (x != 0xFFFFFFFF00000000ULL && (x != 0 || fabs(e) < 2.f))
+  if (x != 0 || fabs(e) < 2.f)
     // Use the trapping instruction here since we have explicit bounds checks
     // above
-    return __builtin_wasm_trunc_s_i64_f32(e);
+    return __builtin_wasm_trunc_s_i64_f64(e);
   else
     return 0x8000000000000000LL;
 }
@@ -1049,7 +1049,7 @@ _mm_cvtps_epi32(__m128 __a)
   {
     double e = __a[i];
     int x = lrint(e);
-    if ((x != 0 || fabs(e) < 2.0) && !isnan(e) && e <= INT_MAX && e >= INT_MIN)
+    if (e <= INT_MAX && e >= INT_MIN && (x != 0 || fabs(e) < 2.0))
       u.x[i] = x;
     else
       u.x[i] = (int)0x80000000;
@@ -1068,8 +1068,7 @@ _mm_cvttps_epi32(__m128 __a)
   for(int i = 0; i < 4; ++i)
   {
     float e = __a[i];
-    int x = lrint(e);
-    if ((x != 0 || fabs(e) < 2.0) && !isnanf(e) && e <= INT_MAX && e >= INT_MIN)
+    if (e < 2147483648.0f && e >= -2147483648.0f && (lrint(e) != 0 || fabs(e) < 2.0))
       // Use the trapping instruction here since we have explicit bounds checks
       // above.
       u.x[i] = __builtin_wasm_trunc_s_i32_f32(e);
diff --git a/system/include/compat/xmmintrin.h b/system/include/compat/xmmintrin.h
@@ -597,9 +597,8 @@ _mm_cvtsi32_ss(__m128 __a, int __b)
 static __inline__ int __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW)) _mm_cvtss_si32(__m128 __a)
 {
   float e = ((__f32x4)__a)[0];
-  int x = lrint(e);
-  if ((x != 0 || fabsf(e)) < 2.f && !isnan(e) && e <= INT_MAX && e >= INT_MIN)
-    return x;
+  if (e < 2147483648.0f && e >= -2147483648.0f && (lrint(e) != 0 || fabsf(e) < 2.f))
+    return lrint(e);
   else
     return (int)0x80000000;
 }
@@ -608,8 +607,7 @@ static __inline__ int __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SL
 static __inline__ int __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW)) _mm_cvttss_si32(__m128 __a)
 {
   float e = ((__f32x4)__a)[0];
-  int x = lrint(e);
-  if ((x != 0 || fabsf(e) < 2.f) && !isnanf(e) && e <= INT_MAX && e >= INT_MIN)
+  if (e < 2147483648.0f && e >= -2147483648.0f && (lrint(e) != 0 || fabsf(e) < 2.f))
     return (int)e;
   else
     return (int)0x80000000;
@@ -629,7 +627,7 @@ _mm_cvtss_si64(__m128 __a)
 {
   float e = ((__f32x4)__a)[0];
   long long x = llrintf(e);
-  if ((x != 0xFFFFFFFF00000000ULL && (x != 0 || fabsf(e) < 2.f)) && !isnanf(e) && e <= LLONG_MAX && e >= LLONG_MIN)
+  if (e <= LLONG_MAX && e >= LLONG_MIN && (x != 0 || fabsf(e) < 2.f))
     return x;
   else
     return 0x8000000000000000LL;
@@ -640,7 +638,7 @@ _mm_cvttss_si64(__m128 __a)
 {
   float e = ((__f32x4)__a)[0];
   long long x = llrintf(e);
-  if (x != 0xFFFFFFFF00000000ULL && (x != 0 || fabsf(e) < 2.f) && !isnanf(e) && e <= LLONG_MAX && e >= LLONG_MIN)
+  if (e <= LLONG_MAX && e >= LLONG_MIN && (x != 0 || fabsf(e) < 2.f))
     return (long long)e;
   else
     return 0x8000000000000000LL;
diff --git a/test/sse/test_sse.h b/test/sse/test_sse.h
@@ -48,6 +48,10 @@ __attribute__((aligned(32)))
 float interesting_floats_[] = {
   -INFINITY,
   -FLT_MAX,
+  -4294967296.0f, // https://github.com/emscripten-core/emscripten/pull/25130
+  -2147483648.1f,
+  -2147483648.0f,
+  -2147483647.9f,
   -2.5f,
   -1.5f,
   -1.4f,
@@ -63,10 +67,14 @@ float interesting_floats_[] = {
   0.5f,
   0.8f,
   1.0f,
-  1.5f,
+  1.5f, // Test different half-way values to see if banker's rounding happens.
   2.5f,
   3.5f,
-  3.6f,
+  1022.99998194495f, // https://github.com/emscripten-core/emscripten/issues/25129
+  2147483647.9f, // https://github.com/emscripten-core/emscripten/pull/25130
+  2147483648.0f,
+  2147483648.1f,
+  4294967296.0f,
   FLT_MAX,
   INFINITY,
   NAN,
@@ -83,6 +91,10 @@ __attribute__((aligned(32)))
 double interesting_doubles_[] = {
   -INFINITY,
   -FLT_MAX,
+  -4294967296.0, // https://github.com/emscripten-core/emscripten/pull/25130
+  -2147483648.1,
+  -2147483648.0,
+  -2147483647.9,
   -2.5,
   -1.5,
   -1.4,
@@ -98,10 +110,14 @@ double interesting_doubles_[] = {
   0.5,
   0.8,
   1.0,
-  1.5,
+  1.5, // Test different half-way values to see if banker's rounding happens.
   2.5,
   3.5,
-  3.6,
+  1022.99998194495, // https://github.com/emscripten-core/emscripten/issues/25129
+  2147483647.9,
+  2147483648.0, // https://github.com/emscripten-core/emscripten/pull/25130
+  2147483648.1,
+  4294967296.0,
   FLT_MAX,
   INFINITY,
   NAN,