Skip to content

Commit

Permalink
Fix incorrect reduction operations in avx512f
Browse files Browse the repository at this point in the history
  • Loading branch information
TDecking committed Jul 2, 2024
1 parent d5ba463 commit a277aab
Showing 1 changed file with 10 additions and 23 deletions.
33 changes: 10 additions & 23 deletions crates/core_arch/src/x86/avx512f.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31571,7 +31571,7 @@ pub unsafe fn _mm512_mask_reduce_max_epi32(k: __mmask16, a: __m512i) -> i32 {
simd_reduce_max(simd_select_bitmask(
k,
a.as_i32x16(),
_mm512_undefined_epi32().as_i32x16(),
i32x16::splat(i32::MIN),
))
}

Expand All @@ -31592,11 +31592,7 @@ pub unsafe fn _mm512_reduce_max_epi64(a: __m512i) -> i64 {
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm512_mask_reduce_max_epi64(k: __mmask8, a: __m512i) -> i64 {
simd_reduce_max(simd_select_bitmask(
k,
a.as_i64x8(),
_mm512_set1_epi64(0).as_i64x8(),
))
simd_reduce_max(simd_select_bitmask(k, a.as_i64x8(), i64x8::splat(i64::MIN)))
}

/// Reduce the packed unsigned 32-bit integers in a by maximum. Returns the maximum of all elements in a.
Expand All @@ -31619,7 +31615,7 @@ pub unsafe fn _mm512_mask_reduce_max_epu32(k: __mmask16, a: __m512i) -> u32 {
simd_reduce_max(simd_select_bitmask(
k,
a.as_u32x16(),
_mm512_undefined_epi32().as_u32x16(),
_mm512_setzero_si512().as_u32x16(),
))
}

Expand All @@ -31643,7 +31639,7 @@ pub unsafe fn _mm512_mask_reduce_max_epu64(k: __mmask8, a: __m512i) -> u64 {
simd_reduce_max(simd_select_bitmask(
k,
a.as_u64x8(),
_mm512_set1_epi64(0).as_u64x8(),
_mm512_setzero_si512().as_u64x8(),
))
}

Expand Down Expand Up @@ -31718,7 +31714,7 @@ pub unsafe fn _mm512_mask_reduce_min_epi32(k: __mmask16, a: __m512i) -> i32 {
simd_reduce_min(simd_select_bitmask(
k,
a.as_i32x16(),
_mm512_undefined_epi32().as_i32x16(),
i32x16::splat(i32::MAX),
))
}

Expand All @@ -31742,7 +31738,7 @@ pub unsafe fn _mm512_mask_reduce_min_epi64(k: __mmask8, a: __m512i) -> i64 {
simd_reduce_min(simd_select_bitmask(
k,
a.as_i64x8(),
_mm512_set1_epi64(0).as_i64x8(),
i64x8::splat(i64::MAX),
))
}

Expand All @@ -31766,7 +31762,7 @@ pub unsafe fn _mm512_mask_reduce_min_epu32(k: __mmask16, a: __m512i) -> u32 {
simd_reduce_min(simd_select_bitmask(
k,
a.as_u32x16(),
_mm512_undefined_epi32().as_u32x16(),
u32x16::splat(u32::MAX),
))
}

Expand All @@ -31790,7 +31786,7 @@ pub unsafe fn _mm512_mask_reduce_min_epu64(k: __mmask8, a: __m512i) -> u64 {
simd_reduce_min(simd_select_bitmask(
k,
a.as_u64x8(),
_mm512_set1_epi64(0).as_u64x8(),
u64x8::splat(u64::MAX),
))
}

Expand Down Expand Up @@ -31862,11 +31858,7 @@ pub unsafe fn _mm512_reduce_and_epi32(a: __m512i) -> i32 {
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm512_mask_reduce_and_epi32(k: __mmask16, a: __m512i) -> i32 {
simd_reduce_and(simd_select_bitmask(
k,
a.as_i32x16(),
_mm512_set1_epi32(0xFF).as_i32x16(),
))
simd_reduce_and(simd_select_bitmask(k, a.as_i32x16(), i32x16::splat(-1)))
}

/// Reduce the packed 64-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
Expand All @@ -31886,12 +31878,7 @@ pub unsafe fn _mm512_reduce_and_epi64(a: __m512i) -> i64 {
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm512_mask_reduce_and_epi64(k: __mmask8, a: __m512i) -> i64 {
simd_reduce_and(simd_select_bitmask(
k,
a.as_i64x8(),
_mm512_set1_epi64(1 << 0 | 1 << 1 | 1 << 2 | 1 << 3 | 1 << 4 | 1 << 5 | 1 << 6 | 1 << 7)
.as_i64x8(),
))
simd_reduce_and(simd_select_bitmask(k, a.as_i64x8(), i64x8::splat(-1)))
}

/// Reduce the packed 32-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
Expand Down

0 comments on commit a277aab

Please sign in to comment.