From e67120fc2f2cb9a355c7b717508134699aa016f1 Mon Sep 17 00:00:00 2001 From: Tobias Decking Date: Mon, 1 Jul 2024 14:24:03 +0200 Subject: [PATCH] Fix incorrect reduction operations in avx512f --- crates/core_arch/src/x86/avx512f.rs | 33 +++++++++-------------------- 1 file changed, 10 insertions(+), 23 deletions(-) diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index 8c88d3aa2f..3d16a409a6 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -31571,7 +31571,7 @@ pub unsafe fn _mm512_mask_reduce_max_epi32(k: __mmask16, a: __m512i) -> i32 { simd_reduce_max(simd_select_bitmask( k, a.as_i32x16(), - _mm512_undefined_epi32().as_i32x16(), + i32x16::splat(i32::MIN), )) } @@ -31592,11 +31592,7 @@ pub unsafe fn _mm512_reduce_max_epi64(a: __m512i) -> i64 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm512_mask_reduce_max_epi64(k: __mmask8, a: __m512i) -> i64 { - simd_reduce_max(simd_select_bitmask( - k, - a.as_i64x8(), - _mm512_set1_epi64(0).as_i64x8(), - )) + simd_reduce_max(simd_select_bitmask(k, a.as_i64x8(), i64x8::splat(i64::MIN))) } /// Reduce the packed unsigned 32-bit integers in a by maximum. Returns the maximum of all elements in a. @@ -31619,7 +31615,7 @@ pub unsafe fn _mm512_mask_reduce_max_epu32(k: __mmask16, a: __m512i) -> u32 { simd_reduce_max(simd_select_bitmask( k, a.as_u32x16(), - _mm512_undefined_epi32().as_u32x16(), + _mm512_setzero_si512().as_u32x16(), )) } @@ -31643,7 +31639,7 @@ pub unsafe fn _mm512_mask_reduce_max_epu64(k: __mmask8, a: __m512i) -> u64 { simd_reduce_max(simd_select_bitmask( k, a.as_u64x8(), - _mm512_set1_epi64(0).as_u64x8(), + _mm512_setzero_si512().as_u64x8(), )) } @@ -31718,7 +31714,7 @@ pub unsafe fn _mm512_mask_reduce_min_epi32(k: __mmask16, a: __m512i) -> i32 { simd_reduce_min(simd_select_bitmask( k, a.as_i32x16(), - _mm512_undefined_epi32().as_i32x16(), + i32x16::splat(0x7fffffff), )) } @@ -31742,7 +31738,7 @@ pub unsafe fn _mm512_mask_reduce_min_epi64(k: __mmask8, a: __m512i) -> i64 { simd_reduce_min(simd_select_bitmask( k, a.as_i64x8(), - _mm512_set1_epi64(0).as_i64x8(), + i64x8::splat(0x7fffffff_ffffffff), )) } @@ -31766,7 +31762,7 @@ pub unsafe fn _mm512_mask_reduce_min_epu32(k: __mmask16, a: __m512i) -> u32 { simd_reduce_min(simd_select_bitmask( k, a.as_u32x16(), - _mm512_undefined_epi32().as_u32x16(), + u32x16::splat(0xffffffff), )) } @@ -31790,7 +31786,7 @@ pub unsafe fn _mm512_mask_reduce_min_epu64(k: __mmask8, a: __m512i) -> u64 { simd_reduce_min(simd_select_bitmask( k, a.as_u64x8(), - _mm512_set1_epi64(0).as_u64x8(), + u64x8::splat(0xffffffff_ffffffff), )) } @@ -31862,11 +31858,7 @@ pub unsafe fn _mm512_reduce_and_epi32(a: __m512i) -> i32 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm512_mask_reduce_and_epi32(k: __mmask16, a: __m512i) -> i32 { - simd_reduce_and(simd_select_bitmask( - k, - a.as_i32x16(), - _mm512_set1_epi32(0xFF).as_i32x16(), - )) + simd_reduce_and(simd_select_bitmask(k, a.as_i32x16(), i32x16::splat(-1))) } /// Reduce the packed 64-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a. @@ -31886,12 +31878,7 @@ pub unsafe fn _mm512_reduce_and_epi64(a: __m512i) -> i64 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm512_mask_reduce_and_epi64(k: __mmask8, a: __m512i) -> i64 { - simd_reduce_and(simd_select_bitmask( - k, - a.as_i64x8(), - _mm512_set1_epi64(1 << 0 | 1 << 1 | 1 << 2 | 1 << 3 | 1 << 4 | 1 << 5 | 1 << 6 | 1 << 7) - .as_i64x8(), - )) + simd_reduce_and(simd_select_bitmask(k, a.as_i64x8(), i64x8::splat(-1))) } /// Reduce the packed 32-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.