Skip to content

Commit 445451e

Browse files
authored
Add unsigned_abs to i8,i16,i32,i64 to provide well defined behavior for abs (#152)
* add unsigned version of abs for clearer code when handling corner cases * fix typo * fixed wasm typo * fix neon cast * add pub(crate) to unsigned types * fixed i8x16 * x86 doesnt have abs i64 * typo * fix 64 bit on intel * fix abs * remove trait accidentally added during merge * replace map with explicit array after reading warning about bad debug perf
1 parent 4875c14 commit 445451e

18 files changed

+318
-18
lines changed

src/i16x8_.rs

+43-1
Original file line numberDiff line numberDiff line change
@@ -696,10 +696,52 @@ impl i16x8 {
696696
} else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
697697
unsafe {Self { neon: vabsq_s16(self.neon) }}
698698
} else {
699-
self.is_negative().blend(self.neg(), self)
699+
let arr: [i16; 8] = cast(self);
700+
cast(
701+
[
702+
arr[0].wrapping_abs(),
703+
arr[1].wrapping_abs(),
704+
arr[2].wrapping_abs(),
705+
arr[3].wrapping_abs(),
706+
arr[4].wrapping_abs(),
707+
arr[5].wrapping_abs(),
708+
arr[6].wrapping_abs(),
709+
arr[7].wrapping_abs(),
710+
])
711+
}
712+
}
713+
}
714+
715+
#[inline]
716+
#[must_use]
717+
pub fn unsigned_abs(self) -> u16x8 {
718+
pick! {
719+
if #[cfg(target_feature="sse2")] {
720+
let mask = shr_imm_i16_m128i::<15>(self.sse);
721+
u16x8 { sse: bitxor_m128i(add_i16_m128i(self.sse, mask), mask) }
722+
} else if #[cfg(target_feature="ssse3")] {
723+
u16x8 { sse: abs_i16_m128i(self.sse) }
724+
} else if #[cfg(target_feature="simd128")] {
725+
u16x8 { simd: i16x8_abs(self.simd) }
726+
} else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
727+
unsafe {u16x8 { neon: vreinterpretq_u16_s16(vabsq_s16(self.neon)) }}
728+
} else {
729+
let arr: [i16; 8] = cast(self);
730+
cast(
731+
[
732+
arr[0].unsigned_abs(),
733+
arr[1].unsigned_abs(),
734+
arr[2].unsigned_abs(),
735+
arr[3].unsigned_abs(),
736+
arr[4].unsigned_abs(),
737+
arr[5].unsigned_abs(),
738+
arr[6].unsigned_abs(),
739+
arr[7].unsigned_abs(),
740+
])
700741
}
701742
}
702743
}
744+
703745
#[inline]
704746
#[must_use]
705747
pub fn max(self, rhs: Self) -> Self {

src/i32x4_.rs

+22
Original file line numberDiff line numberDiff line change
@@ -439,6 +439,28 @@ impl i32x4 {
439439
}
440440
}
441441

442+
#[inline]
443+
#[must_use]
444+
pub fn unsigned_abs(self) -> u32x4 {
445+
pick! {
446+
if #[cfg(target_feature="ssse3")] {
447+
u32x4 { sse: abs_i32_m128i(self.sse) }
448+
} else if #[cfg(target_feature="simd128")] {
449+
u32x4 { simd: i32x4_abs(self.simd) }
450+
} else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
451+
unsafe {u32x4 { neon: vreinterpretq_u32_s32(vabsq_s32(self.neon)) }}
452+
} else {
453+
let arr: [i32; 4] = cast(self);
454+
cast([
455+
arr[0].unsigned_abs(),
456+
arr[1].unsigned_abs(),
457+
arr[2].unsigned_abs(),
458+
arr[3].unsigned_abs(),
459+
])
460+
}
461+
}
462+
}
463+
442464
/// horizontal add of all the elements of the vector
443465
#[inline]
444466
#[must_use]

src/i32x8_.rs

+16
Original file line numberDiff line numberDiff line change
@@ -369,6 +369,22 @@ impl i32x8 {
369369
}
370370
}
371371
}
372+
373+
#[inline]
374+
#[must_use]
375+
pub fn unsigned_abs(self) -> u32x8 {
376+
pick! {
377+
if #[cfg(target_feature="avx2")] {
378+
u32x8 { avx2: abs_i32_m256i(self.avx2) }
379+
} else {
380+
u32x8 {
381+
a : self.a.unsigned_abs(),
382+
b : self.b.unsigned_abs(),
383+
}
384+
}
385+
}
386+
}
387+
372388
#[inline]
373389
#[must_use]
374390
pub fn max(self, rhs: Self) -> Self {

src/i64x2_.rs

+40
Original file line numberDiff line numberDiff line change
@@ -398,6 +398,46 @@ impl i64x2 {
398398
}
399399
}
400400

401+
#[inline]
402+
#[must_use]
403+
pub fn abs(self) -> Self {
404+
pick! {
405+
// x86 doesn't have this builtin
406+
if #[cfg(target_feature="simd128")] {
407+
Self { simd: i64x2_abs(self.simd) }
408+
} else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
409+
unsafe {Self { neon: vabsq_s64(self.neon) }}
410+
} else {
411+
let arr: [i64; 2] = cast(self);
412+
cast(
413+
[
414+
arr[0].wrapping_abs(),
415+
arr[1].wrapping_abs(),
416+
])
417+
}
418+
}
419+
}
420+
421+
#[inline]
422+
#[must_use]
423+
pub fn unsigned_abs(self) -> u64x2 {
424+
pick! {
425+
// x86 doesn't have this builtin
426+
if #[cfg(target_feature="simd128")] {
427+
u64x2 { simd: i64x2_abs(self.simd) }
428+
} else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
429+
unsafe {u64x2 { neon: vreinterpretq_u64_s64(vabsq_s64(self.neon)) }}
430+
} else {
431+
let arr: [i64; 2] = cast(self);
432+
cast(
433+
[
434+
arr[0].unsigned_abs(),
435+
arr[1].unsigned_abs(),
436+
])
437+
}
438+
}
439+
}
440+
401441
#[inline]
402442
#[must_use]
403443
pub fn round_float(self) -> f64x2 {

src/i64x4_.rs

+46
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,52 @@ impl i64x4 {
308308
}
309309
}
310310

311+
#[inline]
312+
#[must_use]
313+
pub fn abs(self) -> Self {
314+
pick! {
315+
if #[cfg(target_feature="avx2")] {
316+
// avx x86 doesn't have this builtin
317+
let arr: [i64; 4] = cast(self);
318+
cast(
319+
[
320+
arr[0].wrapping_abs(),
321+
arr[1].wrapping_abs(),
322+
arr[2].wrapping_abs(),
323+
arr[3].wrapping_abs(),
324+
])
325+
} else {
326+
Self {
327+
a : self.a.abs(),
328+
b : self.b.abs(),
329+
}
330+
}
331+
}
332+
}
333+
334+
#[inline]
335+
#[must_use]
336+
pub fn unsigned_abs(self) -> u64x4 {
337+
pick! {
338+
if #[cfg(target_feature="avx2")] {
339+
// avx x86 doesn't have this builtin
340+
let arr: [i64; 4] = cast(self);
341+
cast(
342+
[
343+
arr[0].unsigned_abs(),
344+
arr[1].unsigned_abs(),
345+
arr[2].unsigned_abs(),
346+
arr[3].unsigned_abs(),
347+
])
348+
} else {
349+
u64x4 {
350+
a : self.a.unsigned_abs(),
351+
b : self.b.unsigned_abs(),
352+
}
353+
}
354+
}
355+
}
356+
311357
#[inline]
312358
#[must_use]
313359
pub fn round_float(self) -> f64x4 {

src/i8x16_.rs

+36
Original file line numberDiff line numberDiff line change
@@ -527,6 +527,42 @@ impl i8x16 {
527527
}
528528
}
529529
}
530+
531+
#[inline]
532+
#[must_use]
533+
pub fn unsigned_abs(self) -> u8x16 {
534+
pick! {
535+
if #[cfg(target_feature="ssse3")] {
536+
u8x16 { sse: abs_i8_m128i(self.sse) }
537+
} else if #[cfg(target_feature="simd128")] {
538+
u8x16 { simd: i8x16_abs(self.simd) }
539+
} else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
540+
unsafe { u8x16 { neon: vreinterpretq_u8_s8(vabsq_s8(self.neon)) }}
541+
} else {
542+
let arr: [i8; 16] = cast(self);
543+
cast(
544+
[
545+
arr[0].unsigned_abs(),
546+
arr[1].unsigned_abs(),
547+
arr[2].unsigned_abs(),
548+
arr[3].unsigned_abs(),
549+
arr[4].unsigned_abs(),
550+
arr[5].unsigned_abs(),
551+
arr[6].unsigned_abs(),
552+
arr[7].unsigned_abs(),
553+
arr[8].unsigned_abs(),
554+
arr[9].unsigned_abs(),
555+
arr[10].unsigned_abs(),
556+
arr[11].unsigned_abs(),
557+
arr[12].unsigned_abs(),
558+
arr[13].unsigned_abs(),
559+
arr[14].unsigned_abs(),
560+
arr[15].unsigned_abs(),
561+
])
562+
}
563+
}
564+
}
565+
530566
#[inline]
531567
#[must_use]
532568
pub fn max(self, rhs: Self) -> Self {

src/u16x8_.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,13 @@ pick! {
44
if #[cfg(target_feature="sse2")] {
55
#[derive(Default, Clone, Copy, PartialEq, Eq)]
66
#[repr(C, align(16))]
7-
pub struct u16x8 { sse: m128i }
7+
pub struct u16x8 { pub(crate) sse: m128i }
88
} else if #[cfg(target_feature="simd128")] {
99
use core::arch::wasm32::*;
1010

1111
#[derive(Clone, Copy)]
1212
#[repr(transparent)]
13-
pub struct u16x8 { simd: v128 }
13+
pub struct u16x8 { pub(crate) simd: v128 }
1414

1515
impl Default for u16x8 {
1616
fn default() -> Self {
@@ -29,7 +29,7 @@ pick! {
2929
use core::arch::aarch64::*;
3030
#[repr(C)]
3131
#[derive(Copy, Clone)]
32-
pub struct u16x8 { neon : uint16x8_t }
32+
pub struct u16x8 { pub(crate) neon : uint16x8_t }
3333

3434
impl Default for u16x8 {
3535
#[inline]
@@ -51,7 +51,7 @@ pick! {
5151
} else {
5252
#[derive(Default, Clone, Copy, PartialEq, Eq)]
5353
#[repr(C, align(16))]
54-
pub struct u16x8 { arr: [u16;8] }
54+
pub struct u16x8 { pub(crate) arr: [u16;8] }
5555
}
5656
}
5757

src/u32x4_.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,13 @@ pick! {
44
if #[cfg(target_feature="sse2")] {
55
#[derive(Default, Clone, Copy, PartialEq, Eq)]
66
#[repr(C, align(16))]
7-
pub struct u32x4 { sse: m128i }
7+
pub struct u32x4 { pub(crate) sse: m128i }
88
} else if #[cfg(target_feature="simd128")] {
99
use core::arch::wasm32::*;
1010

1111
#[derive(Clone, Copy)]
1212
#[repr(transparent)]
13-
pub struct u32x4 { simd: v128 }
13+
pub struct u32x4 { pub(crate) simd: v128 }
1414

1515
impl Default for u32x4 {
1616
fn default() -> Self {
@@ -29,7 +29,7 @@ pick! {
2929
use core::arch::aarch64::*;
3030
#[repr(C)]
3131
#[derive(Copy, Clone)]
32-
pub struct u32x4 { neon : uint32x4_t }
32+
pub struct u32x4 { pub(crate) neon : uint32x4_t }
3333

3434
impl Default for u32x4 {
3535
#[inline]

src/u32x8_.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,11 @@ pick! {
44
if #[cfg(target_feature="avx2")] {
55
#[derive(Default, Clone, Copy, PartialEq, Eq)]
66
#[repr(C, align(32))]
7-
pub struct u32x8 { avx2: m256i }
7+
pub struct u32x8 { pub(crate) avx2: m256i }
88
} else {
99
#[derive(Default, Clone, Copy, PartialEq, Eq)]
1010
#[repr(C, align(32))]
11-
pub struct u32x8 { a : u32x4, b : u32x4 }
11+
pub struct u32x8 { pub(crate) a : u32x4, pub(crate) b : u32x4 }
1212
}
1313
}
1414

src/u64x2_.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,13 @@ pick! {
44
if #[cfg(target_feature="sse2")] {
55
#[derive(Default, Clone, Copy, PartialEq, Eq)]
66
#[repr(C, align(16))]
7-
pub struct u64x2 { sse: m128i }
7+
pub struct u64x2 { pub(crate) sse: m128i }
88
} else if #[cfg(target_feature="simd128")] {
99
use core::arch::wasm32::*;
1010

1111
#[derive(Clone, Copy)]
1212
#[repr(transparent)]
13-
pub struct u64x2 { simd: v128 }
13+
pub struct u64x2 { pub(crate) simd: v128 }
1414

1515
impl Default for u64x2 {
1616
fn default() -> Self {
@@ -29,7 +29,7 @@ pick! {
2929
use core::arch::aarch64::*;
3030
#[repr(C)]
3131
#[derive(Copy, Clone)]
32-
pub struct u64x2 { neon : uint64x2_t }
32+
pub struct u64x2 { pub(crate) neon : uint64x2_t }
3333

3434
impl Default for u64x2 {
3535
#[inline]

src/u64x4_.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,11 @@ pick! {
44
if #[cfg(target_feature="avx2")] {
55
#[derive(Default, Clone, Copy, PartialEq, Eq)]
66
#[repr(C, align(32))]
7-
pub struct u64x4 { avx2: m256i }
7+
pub struct u64x4 { pub(crate) avx2: m256i }
88
} else {
99
#[derive(Default, Clone, Copy, PartialEq, Eq)]
1010
#[repr(C, align(32))]
11-
pub struct u64x4 { a : u64x2, b : u64x2 }
11+
pub struct u64x4 { pub(crate) a : u64x2, pub(crate) b : u64x2 }
1212
}
1313
}
1414

src/u8x16_.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ pick! {
1010

1111
#[derive(Clone, Copy)]
1212
#[repr(transparent)]
13-
pub struct u8x16 { simd: v128 }
13+
pub struct u8x16 { pub(crate) simd: v128 }
1414

1515
impl Default for u8x16 {
1616
fn default() -> Self {
@@ -29,7 +29,7 @@ pick! {
2929
use core::arch::aarch64::*;
3030
#[repr(C)]
3131
#[derive(Copy, Clone)]
32-
pub struct u8x16 { neon : uint8x16_t }
32+
pub struct u8x16 { pub(crate) neon : uint8x16_t }
3333

3434
impl Default for u8x16 {
3535
#[inline]
@@ -51,7 +51,7 @@ pick! {
5151
} else {
5252
#[derive(Default, Clone, Copy, PartialEq, Eq)]
5353
#[repr(C, align(16))]
54-
pub struct u8x16 { arr: [u8;16] }
54+
pub struct u8x16 { pub(crate) arr: [u8;16] }
5555
}
5656
}
5757

0 commit comments

Comments
 (0)