diff --git a/CHANGELOG.md b/CHANGELOG.md index d2ac0d0c..048b9bcd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ Note: In this file, do not use the hard wrap in the middle of a sentence for com ## [Unreleased] +- Add `compare_exchange`, `compare_exchange_weak`, and `fetch_update`. + - Support x86_64 128-bit atomics when the `cmpxchg16b` target feature is enabled at compile-time. ## [0.2.8] - 2022-06-21 diff --git a/README.md b/README.md index f02f100c..70addaeb 100644 --- a/README.md +++ b/README.md @@ -19,24 +19,24 @@ This crate provides a way to soundly perform such operations. Currently, x86, x86_64, ARM (v6-m, v7+), AArch64, RISC-V, MIPS32r2, MIPS64r2, PowerPC, and s390x are supported. -| target_arch | primitives | [load]/[store] | [swap] | -| --------------------------------- | --------------------------------------------------- |:--------------:|:------:| -| x86 | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓ | -| x86_64 | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | -| x86_64 (+cmpxchg16b) | i128,u128 | ✓ | ✓ | -| arm (v6-m, v7+) | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓\[1] | -| arm (v7-a) | i64,u64 | ✓ | ✓ | -| aarch64 \[2] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64,i128,u128 | ✓ | ✓ | -| riscv32 | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓\[1] | -| riscv64 | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓\[1] | -| mips \[3] | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓ | -| mips64 \[3] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | -| powerpc \[3] | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓ | -| powerpc64 \[3] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | -| powerpc64 (le or pwr8+) \[3] \[4] | i128,u128 | ✓ | ✓ | -| s390x \[3] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64,i128,u128 | ✓ | ✓ | - -\[1] ARM's atomic swap is not available on v6-m (thumbv6m). RISC-V's atomic swap is not available on targets without the A (or G) extension such as riscv32i, riscv32imc, etc.
+| target_arch | primitives | load/store | RMW | +| --------------------------------- | --------------------------------------------------- |:----------:|:-----:| +| x86 | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓ | +| x86_64 | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | +| x86_64 (+cmpxchg16b) | i128,u128 | ✓ | ✓ | +| arm (v6-m, v7+) | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓\[1] | +| arm (v7-a) | i64,u64 | ✓ | ✓ | +| aarch64 \[2] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64,i128,u128 | ✓ | ✓ | +| riscv32 | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓\[1] | +| riscv64 | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓\[1] | +| mips \[3] | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓ | +| mips64 \[3] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | +| powerpc \[3] | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓ | +| powerpc64 \[3] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | +| powerpc64 (le or pwr8+) \[3] \[4] | i128,u128 | ✓ | ✓ | +| s390x \[3] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64,i128,u128 | ✓ | ✓ | + +\[1] ARM's atomic RMW operations are not available on v6-m (thumbv6m). RISC-V's atomic RMW operations are not available on targets without the A (or G) extension such as riscv32i, riscv32imc, etc.
\[2] If target features such as `lse` and `lse2` are enabled at compile-time, more efficient instructions are used.
\[3] Requires nightly due to `#![feature(asm_experimental_arch)]`.
\[4] target-cpu `pwr8`, `pwr9`, or `pwr10`.
@@ -48,9 +48,6 @@ Feel free to submit an issue if your target is not supported yet. - [portable-atomic]: Portable atomic types including support for 128-bit atomics, atomic float, etc. - [atomic-memcpy]: Byte-wise atomic memcpy. -[load]: https://docs.rs/atomic-maybe-uninit/latest/atomic_maybe_uninit/struct.AtomicMaybeUninit.html#method.load -[store]: https://docs.rs/atomic-maybe-uninit/latest/atomic_maybe_uninit/struct.AtomicMaybeUninit.html#method.store -[swap]: https://docs.rs/atomic-maybe-uninit/latest/atomic_maybe_uninit/struct.AtomicMaybeUninit.html#method.swap [atomic-memcpy]: https://github.com/taiki-e/atomic-memcpy [portable-atomic]: https://github.com/taiki-e/portable-atomic [undefined-behavior]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html diff --git a/src/arch/aarch64.rs b/src/arch/aarch64.rs index bc08c472..5a811daf 100644 --- a/src/arch/aarch64.rs +++ b/src/arch/aarch64.rs @@ -6,9 +6,9 @@ // - portable-atomic https://github.com/taiki-e/portable-atomic // // Generated asm: -// - aarch64 https://godbolt.org/z/hhKxn4b6j -// - aarch64 (+lse) https://godbolt.org/z/acK5a1rxb -// - aarch64 (+lse,+lse2) https://godbolt.org/z/oe3754brh +// - aarch64 https://godbolt.org/z/z841Wjz67 +// - aarch64 (+lse) https://godbolt.org/z/fzz4E1K6h +// - aarch64 (+lse,+lse2) https://godbolt.org/z/6G5x748Yj use core::{ arch::asm, @@ -16,7 +16,7 @@ use core::{ sync::atomic::Ordering, }; -use crate::raw::{AtomicLoad, AtomicStore, AtomicSwap}; +use crate::raw::{AtomicCompareExchange, AtomicLoad, AtomicStore, AtomicSwap}; #[cfg(target_pointer_width = "32")] macro_rules! ptr_modifier { @@ -173,6 +173,166 @@ macro_rules! atomic { } } } + impl AtomicCompareExchange for $int_type { + #[inline] + unsafe fn atomic_compare_exchange( + dst: *mut MaybeUninit, + old: *const MaybeUninit, + new: *const MaybeUninit, + out: *mut MaybeUninit, + success: Ordering, + failure: Ordering, + ) -> bool { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + debug_assert!(old as usize % mem::align_of::<$int_type>() == 0); + debug_assert!(new as usize % mem::align_of::<$int_type>() == 0); + debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let success = crate::utils::upgrade_success_ordering(success, failure); + + // SAFETY: the caller must uphold the safety contract for `atomic_compare_exchange`. + unsafe { + let mut r: i32; + #[cfg(any(target_feature = "lse", atomic_maybe_uninit_target_feature = "lse"))] + macro_rules! atomic_cmpxchg { + ($acquire:tt, $release:tt) => {{ + asm!( + // load from old/new to old_tmp/new_tmp + concat!("ldr", $asm_suffix, " {old_tmp", $val_modifier, "}, [{old", ptr_modifier!(), "}]"), + concat!("ldr", $asm_suffix, " {new_tmp", $val_modifier, "}, [{new", ptr_modifier!(), "}]"), + // cas writes the current value to the first register, + // so copy the `old`'s value for later comparison. + concat!("mov {out_tmp", $val_modifier, "}, {old_tmp", $val_modifier, "}"), + // (atomic) compare and exchange + // Refs: https://developer.arm.com/documentation/dui0801/g/A64-Data-Transfer-Instructions/CASA--CASAL--CAS--CASL--CASAL--CAS--CASL + concat!("cas", $acquire, $release, $asm_suffix, " {out_tmp", $val_modifier, "}, {new_tmp", $val_modifier, "}, [{dst", ptr_modifier!(), "}]"), + concat!("cmp {out_tmp", $val_modifier, "}, {old_tmp", $val_modifier, "}"), + // store tmp to out + concat!("str", $asm_suffix, " {out_tmp", $val_modifier, "}, [{out", ptr_modifier!(), "}]"), + "cset {r:w}, eq", + dst = inout(reg) dst => _, + old = in(reg) old, + old_tmp = lateout(reg) _, + new = inout(reg) new => _, + new_tmp = lateout(reg) _, + out = inout(reg) out => _, + out_tmp = lateout(reg) _, + r = lateout(reg) r, + options(nostack), + ); + debug_assert!(r == 0 || r == 1, "r={}", r); + r != 0 + }}; + } + #[cfg(not(any(target_feature = "lse", atomic_maybe_uninit_target_feature = "lse")))] + macro_rules! atomic_cmpxchg { + ($acquire:tt, $release:tt) => {{ + asm!( + // load from old/new to old_tmp/new_tmp + concat!("ldr", $asm_suffix, " {new_tmp", $val_modifier, "}, [{new", ptr_modifier!(), "}]"), + concat!("ldr", $asm_suffix, " {old_tmp", $val_modifier, "}, [{old", ptr_modifier!(), "}]"), + // (atomic) compare and exchange + "2:", + concat!("ld", $acquire, "xr", $asm_suffix, " {out_tmp", $val_modifier, "}, [{dst", ptr_modifier!(), "}]"), + concat!("cmp {out_tmp", $val_modifier, "}, {old_tmp", $val_modifier, "}"), + "b.ne 3f", + concat!("st", $release, "xr", $asm_suffix, " {r:w}, {new_tmp", $val_modifier, "}, [{dst", ptr_modifier!(), "}]"), + // 0 if the store was successful, 1 if no store was performed + "cbnz {r:w}, 2b", + "b 4f", + "3:", + "mov {r:w}, #1", + "clrex", + "4:", + // store out_tmp to out + concat!("str", $asm_suffix, " {out_tmp", $val_modifier, "}, [{out", ptr_modifier!(), "}]"), + dst = inout(reg) dst => _, + old = in(reg) old, + old_tmp = lateout(reg) _, + new = inout(reg) new => _, + new_tmp = lateout(reg) _, + out = inout(reg) out => _, + out_tmp = lateout(reg) _, + r = lateout(reg) r, + options(nostack), + ); + debug_assert!(r == 0 || r == 1, "r={}", r); + // 0 if the store was successful, 1 if no store was performed + r == 0 + }}; + } + match success { + Ordering::Relaxed => atomic_cmpxchg!("", ""), + Ordering::Acquire => atomic_cmpxchg!("a", ""), + Ordering::Release => atomic_cmpxchg!("", "l"), + // AcqRel and SeqCst compare_exchange are equivalent. + Ordering::AcqRel | Ordering::SeqCst => atomic_cmpxchg!("a", "l"), + _ => unreachable_unchecked!("{:?}", success), + } + } + } + #[cfg(not(any(target_feature = "lse", atomic_maybe_uninit_target_feature = "lse")))] + #[inline] + unsafe fn atomic_compare_exchange_weak( + dst: *mut MaybeUninit, + old: *const MaybeUninit, + new: *const MaybeUninit, + out: *mut MaybeUninit, + success: Ordering, + failure: Ordering, + ) -> bool { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + debug_assert!(old as usize % mem::align_of::<$int_type>() == 0); + debug_assert!(new as usize % mem::align_of::<$int_type>() == 0); + debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let success = crate::utils::upgrade_success_ordering(success, failure); + + // SAFETY: the caller must uphold the safety contract for `atomic_compare_exchange_weak`. + unsafe { + let r: i32; + macro_rules! atomic_cmpxchg { + ($acquire:tt, $release:tt) => { + asm!( + // load from old/new to old_tmp/new_tmp + concat!("ldr", $asm_suffix, " {new_tmp", $val_modifier, "}, [{new", ptr_modifier!(), "}]"), + concat!("ldr", $asm_suffix, " {old_tmp", $val_modifier, "}, [{old", ptr_modifier!(), "}]"), + // (atomic) compare and exchange + concat!("ld", $acquire, "xr", $asm_suffix, " {out_tmp", $val_modifier, "}, [{dst", ptr_modifier!(), "}]"), + concat!("cmp {out_tmp", $val_modifier, "}, {old_tmp", $val_modifier, "}"), + "b.ne 3f", + concat!("st", $release, "xr", $asm_suffix, " {r:w}, {new_tmp", $val_modifier, "}, [{dst", ptr_modifier!(), "}]"), + "b 4f", + "3:", + "mov {r:w}, #1", + "clrex", + "4:", + // store out_tmp to out + concat!("str", $asm_suffix, " {out_tmp", $val_modifier, "}, [{out", ptr_modifier!(), "}]"), + dst = inout(reg) dst => _, + old = in(reg) old, + old_tmp = lateout(reg) _, + new = inout(reg) new => _, + new_tmp = lateout(reg) _, + out = inout(reg) out => _, + out_tmp = lateout(reg) _, + r = lateout(reg) r, + options(nostack), + ) + }; + } + match success { + Ordering::Relaxed => atomic_cmpxchg!("", ""), + Ordering::Acquire => atomic_cmpxchg!("a", ""), + Ordering::Release => atomic_cmpxchg!("", "l"), + // AcqRel and SeqCst compare_exchange are equivalent. + Ordering::AcqRel | Ordering::SeqCst => atomic_cmpxchg!("a", "l"), + _ => unreachable_unchecked!("{:?}", success), + } + debug_assert!(r == 0 || r == 1, "r={}", r); + // 0 if the store was successful, 1 if no store was performed + r == 0 + } + } + } }; } @@ -414,6 +574,124 @@ macro_rules! atomic128 { } } } + impl AtomicCompareExchange for $int_type { + #[inline] + unsafe fn atomic_compare_exchange( + dst: *mut MaybeUninit, + old: *const MaybeUninit, + new: *const MaybeUninit, + out: *mut MaybeUninit, + success: Ordering, + failure: Ordering, + ) -> bool { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + debug_assert!(old as usize % mem::align_of::<$int_type>() == 0); + debug_assert!(new as usize % mem::align_of::<$int_type>() == 0); + debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let success = crate::utils::upgrade_success_ordering(success, failure); + + // SAFETY: the caller must uphold the safety contract for `atomic_compare_exchange`. + unsafe { + let r: i32; + #[cfg(any(target_feature = "lse", atomic_maybe_uninit_target_feature = "lse"))] + macro_rules! atomic_cmpxchg { + ($acquire:tt, $release:tt) => {{ + asm!( + // load from old/new to x6-x7/x4-x5 pairs + concat!("ldp x6, x7, [{old", ptr_modifier!(), "}]"), + concat!("ldp x4, x5, [{new", ptr_modifier!(), "}]"), + // casp writes the current value to the first register pair, + // so copy the `old`'s value for later comparison. + "mov x8, x6", + "mov x9, x7", + // (atomic) compare and exchange + // Refs: https://developer.arm.com/documentation/dui0801/g/A64-Data-Transfer-Instructions/CASPA--CASPAL--CASP--CASPL--CASPAL--CASP--CASPL + concat!("casp", $acquire, $release, " x8, x9, x4, x5, [{dst", ptr_modifier!(), "}]"), + "eor {tmp_hi}, x9, x7", + "eor {tmp_lo}, x8, x6", + "orr {tmp_hi}, {tmp_lo}, {tmp_hi}", + // store tmp to out + concat!("stp x8, x9, [{out", ptr_modifier!(), "}]"), + "cmp {tmp_hi}, #0", + "cset {r:w}, eq", + dst = inout(reg) dst => _, + old = in(reg) old, + new = inout(reg) new => _, + out = inout(reg) out => _, + r = lateout(reg) r, + tmp_hi = lateout(reg) _, + tmp_lo = lateout(reg) _, + // must be allocated to even/odd register pair + lateout("x6") _, + lateout("x7") _, + // must be allocated to even/odd register pair + lateout("x4") _, + lateout("x5") _, + // must be allocated to even/odd register pair + lateout("x8") _, + lateout("x9") _, + options(nostack), + ); + debug_assert!(r == 0 || r == 1, "r={}", r); + r != 0 + }}; + } + #[cfg(not(any(target_feature = "lse", atomic_maybe_uninit_target_feature = "lse")))] + macro_rules! atomic_cmpxchg { + ($acquire:tt, $release:tt) => {{ + asm!( + // load from old/new to old/new pair + concat!("ldp {new_lo}, {new_hi}, [{new", ptr_modifier!(), "}]"), + concat!("ldp {old_lo}, {old_hi}, [{old", ptr_modifier!(), "}]"), + // (atomic) compare and exchange + "2:", + concat!("ld", $acquire, "xp {out_lo}, {out_hi}, [{dst", ptr_modifier!(), "}]"), + "cmp {out_lo}, {old_lo}", + "cset {r:w}, ne", + "cmp {out_hi}, {old_hi}", + "cinc {r:w}, {r:w}, ne", + "cbz {r:w}, 3f", + concat!("st", $release, "xp {r:w}, {out_lo}, {out_hi}, [{dst", ptr_modifier!(), "}]"), + // 0 if the store was successful, 1 if no store was performed + "cbnz {r:w}, 2b", + "mov {r:w}, #1", + "b 4f", + "3:", + concat!("st", $release, "xp {r:w}, {new_lo}, {new_hi}, [{dst", ptr_modifier!(), "}]"), + // 0 if the store was successful, 1 if no store was performed + "cbnz {r:w}, 2b", + "4:", + // store out_tmp to out + concat!("stp {out_lo}, {out_hi}, [{out", ptr_modifier!(), "}]"), + dst = inout(reg) dst => _, + old = in(reg) old, + old_hi = lateout(reg) _, + old_lo = lateout(reg) _, + new = inout(reg) new => _, + new_hi = lateout(reg) _, + new_lo = lateout(reg) _, + out = inout(reg) out => _, + out_hi = lateout(reg) _, + out_lo = lateout(reg) _, + r = lateout(reg) r, + options(nostack), + ); + debug_assert!(r == 0 || r == 1, "r={}", r); + // 0 if the store was successful, 1 if no store was performed + r == 0 + }}; + } + match success { + Ordering::Relaxed => atomic_cmpxchg!("", ""), + Ordering::Acquire => atomic_cmpxchg!("a", ""), + Ordering::Release => atomic_cmpxchg!("", "l"), + // AcqRel and SeqCst compare_exchange are equivalent. + Ordering::AcqRel | Ordering::SeqCst => atomic_cmpxchg!("a", "l"), + _ => unreachable_unchecked!("{:?}", success), + } + } + } + } }; } diff --git a/src/arch/arm.rs b/src/arch/arm.rs index ee7559d3..0e18f796 100644 --- a/src/arch/arm.rs +++ b/src/arch/arm.rs @@ -1,8 +1,8 @@ // Generated asm: -// - armv7-a https://godbolt.org/z/T9cnP84EM -// - armv7-r https://godbolt.org/z/x8sheaP1Y -// - armv6-m https://godbolt.org/z/Pen5ej4fj -// - armv7-m https://godbolt.org/z/YY54PP3jz +// - armv7-a https://godbolt.org/z/5d77z7q9M +// - armv7-r https://godbolt.org/z/3zrv8M156 +// - armv7-m https://godbolt.org/z/reWzr1Khn +// - armv6-m https://godbolt.org/z/dYTovPcWT use core::{ arch::asm, @@ -11,7 +11,7 @@ use core::{ }; #[cfg(any(target_feature = "v7", atomic_maybe_uninit_target_feature = "v7"))] -use crate::raw::AtomicSwap; +use crate::raw::{AtomicCompareExchange, AtomicSwap}; use crate::raw::{AtomicLoad, AtomicStore}; #[cfg(not(any(target_feature = "mclass", atomic_maybe_uninit_target_feature = "mclass")))] @@ -156,6 +156,291 @@ macro_rules! atomic { } } } + #[cfg(any(target_feature = "v7", atomic_maybe_uninit_target_feature = "v7"))] + impl AtomicCompareExchange for $int_type { + #[inline] + unsafe fn atomic_compare_exchange( + dst: *mut MaybeUninit, + old: *const MaybeUninit, + new: *const MaybeUninit, + out: *mut MaybeUninit, + success: Ordering, + failure: Ordering, + ) -> bool { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + debug_assert!(old as usize % mem::align_of::<$int_type>() == 0); + debug_assert!(new as usize % mem::align_of::<$int_type>() == 0); + debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + + // SAFETY: the caller must uphold the safety contract for `atomic_compare_exchange`. + unsafe { + use core::sync::atomic::Ordering::{AcqRel, Acquire, Relaxed, Release, SeqCst}; + let mut r: i32; + macro_rules! cmpxchg_store_relaxed { + ($acquire_success:expr, $acquire_failure:expr) => { + asm!( + concat!("ldr", $asm_suffix, " {old_tmp}, [{old}]"), + concat!("ldr", $asm_suffix, " {new_tmp}, [{new}]"), + "2:", + concat!("ldrex", $asm_suffix, " {out_tmp}, [{dst}]"), + "cmp {out_tmp}, {old_tmp}", + "bne 3f", + concat!("strex", $asm_suffix, " {r}, {new_tmp}, [{dst}]"), + // 0 if the store was successful, 1 if no store was performed + "cmp {r}, #0", + "bne 2b", + $acquire_success, + "b 4f", + "3:", + "mov {r}, #1", + "clrex", + $acquire_failure, + "4:", + // store out_tmp to out + concat!("str", $asm_suffix, " {out_tmp}, [{out}]"), + dst = inout(reg) dst => _, + r = lateout(reg) r, + old = in(reg) old, + new = inout(reg) new => _, + out = inout(reg) out => _, + out_tmp = lateout(reg) _, + old_tmp = lateout(reg) _, + new_tmp = lateout(reg) _, + options(nostack), + ) + }; + } + macro_rules! cmpxchg_release { + ($acquire_failure:expr) => { + asm!( + concat!("ldr", $asm_suffix, " {old_tmp}, [{old}]"), + concat!("ldr", $asm_suffix, " {new_tmp}, [{new}]"), + concat!("ldrex", $asm_suffix, " {out_tmp}, [{dst}]"), + "cmp {out_tmp}, {old_tmp}", + "bne 3f", + asm_dmb!(), // release + "2:", + concat!("strex", $asm_suffix, " {r}, {new_tmp}, [{dst}]"), + // 0 if the store was successful, 1 if no store was performed + "cmp {r}, #0", + "beq 4f", + concat!("ldrex", $asm_suffix, " {out_tmp}, [{dst}]"), + "cmp {out_tmp}, {old_tmp}", + "beq 2b", + "3:", + "mov {r}, #1", + "clrex", + $acquire_failure, + "4:", + // store out_tmp to out + concat!("str", $asm_suffix, " {out_tmp}, [{out}]"), + dst = inout(reg) dst => _, + r = lateout(reg) r, + old = in(reg) old, + new = inout(reg) new => _, + out = inout(reg) out => _, + out_tmp = lateout(reg) _, + old_tmp = lateout(reg) _, + new_tmp = lateout(reg) _, + options(nostack), + ) + }; + } + macro_rules! cmpxchg_acqrel { + ($acquire_failure:expr) => { + asm!( + concat!("ldr", $asm_suffix, " {old_tmp}, [{old}]"), + concat!("ldr", $asm_suffix, " {new_tmp}, [{new}]"), + concat!("ldrex", $asm_suffix, " {out_tmp}, [{dst}]"), + "cmp {out_tmp}, {old_tmp}", + "bne 3f", + asm_dmb!(), // release + "2:", + concat!("strex", $asm_suffix, " {r}, {new_tmp}, [{dst}]"), + // 0 if the store was successful, 1 if no store was performed + "cmp {r}, #0", + "beq 4f", + concat!("ldrex", $asm_suffix, " {out_tmp}, [{dst}]"), + "cmp {out_tmp}, {old_tmp}", + "beq 2b", + "3:", + "mov {r}, #1", + "clrex", + $acquire_failure, + "b 5f", + "4:", + asm_dmb!(), // acquire_success + "5:", + // store out_tmp to out + concat!("str", $asm_suffix, " {out_tmp}, [{out}]"), + dst = inout(reg) dst => _, + r = lateout(reg) r, + old = in(reg) old, + new = inout(reg) new => _, + out = inout(reg) out => _, + out_tmp = lateout(reg) _, + old_tmp = lateout(reg) _, + new_tmp = lateout(reg) _, + options(nostack), + ) + }; + } + match (success, failure) { + (Relaxed, Relaxed) => cmpxchg_store_relaxed!("", ""), + (Relaxed, Acquire) => cmpxchg_store_relaxed!("", asm_dmb!()), + (Acquire, Relaxed) => cmpxchg_store_relaxed!(asm_dmb!(), ""), + (Acquire, Acquire) => cmpxchg_store_relaxed!(asm_dmb!(), asm_dmb!()), + (Release, Relaxed) => cmpxchg_release!(""), + (Release, Acquire) => cmpxchg_release!(asm_dmb!()), + // AcqRel and SeqCst compare_exchange are equivalent. + (AcqRel | SeqCst, Relaxed) => cmpxchg_acqrel!(""), + (AcqRel | SeqCst, _) => cmpxchg_acqrel!(asm_dmb!()), + // TODO: upgrade success to SeqCst for now + (_, SeqCst) => cmpxchg_acqrel!(asm_dmb!()), + _ => unreachable_unchecked!("{:?}", (success, failure)), + } + debug_assert!(r == 0 || r == 1, "r={}", r); + // 0 if the store was successful, 1 if no store was performed + r == 0 + } + } + #[inline] + unsafe fn atomic_compare_exchange_weak( + dst: *mut MaybeUninit, + old: *const MaybeUninit, + new: *const MaybeUninit, + out: *mut MaybeUninit, + success: Ordering, + failure: Ordering, + ) -> bool { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + debug_assert!(old as usize % mem::align_of::<$int_type>() == 0); + debug_assert!(new as usize % mem::align_of::<$int_type>() == 0); + debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + + // SAFETY: the caller must uphold the safety contract for `atomic_compare_exchange`. + unsafe { + use core::sync::atomic::Ordering::{AcqRel, Acquire, Relaxed, Release, SeqCst}; + let mut r: i32; + macro_rules! cmpxchg { + ($acquire:expr, $release:expr) => { + asm!( + concat!("ldr", $asm_suffix, " {old_tmp}, [{old}]"), + concat!("ldr", $asm_suffix, " {new_tmp}, [{new}]"), + concat!("ldrex", $asm_suffix, " {out_tmp}, [{dst}]"), + "cmp {out_tmp}, {old_tmp}", + "bne 3f", + $release, + concat!("strex", $asm_suffix, " {r}, {new_tmp}, [{dst}]"), + "b 4f", + "3:", + "mov {r}, #1", + "clrex", + "4:", + $acquire, + // store out_tmp to out + concat!("str", $asm_suffix, " {out_tmp}, [{out}]"), + dst = inout(reg) dst => _, + r = lateout(reg) r, + old = in(reg) old, + new = inout(reg) new => _, + out = inout(reg) out => _, + out_tmp = lateout(reg) _, + old_tmp = lateout(reg) _, + new_tmp = lateout(reg) _, + options(nostack), + ) + }; + } + macro_rules! cmpxchg_fail_load_relaxed { + ($release:expr) => { + asm!( + concat!("ldr", $asm_suffix, " {old_tmp}, [{old}]"), + concat!("ldr", $asm_suffix, " {new_tmp}, [{new}]"), + concat!("ldrex", $asm_suffix, " {out_tmp}, [{dst}]"), + "cmp {out_tmp}, {old_tmp}", + "bne 3f", + $release, + concat!("strex", $asm_suffix, " {r}, {new_tmp}, [{dst}]"), + // 0 if the store was successful, 1 if no store was performed + "cmp {r}, #0", + "beq 4f", + "b 5f", + "3:", + "mov {r}, #1", + "clrex", + "b 5f", + "4:", + asm_dmb!(), // acquire_success + "5:", + // store out_tmp to out + concat!("str", $asm_suffix, " {out_tmp}, [{out}]"), + dst = inout(reg) dst => _, + r = lateout(reg) r, + old = in(reg) old, + new = inout(reg) new => _, + out = inout(reg) out => _, + out_tmp = lateout(reg) _, + old_tmp = lateout(reg) _, + new_tmp = lateout(reg) _, + options(nostack), + ) + }; + } + macro_rules! cmpxchg_success_load_relaxed { + ($release:expr) => { + asm!( + concat!("ldr", $asm_suffix, " {old_tmp}, [{old}]"), + concat!("ldr", $asm_suffix, " {new_tmp}, [{new}]"), + concat!("ldrex", $asm_suffix, " {out_tmp}, [{dst}]"), + "cmp {out_tmp}, {old_tmp}", + "bne 3f", + $release, + concat!("strex", $asm_suffix, " {r}, {new_tmp}, [{dst}]"), + // 0 if the store was successful, 1 if no store was performed + "cmp {r}, #0", + "beq 5f", + "b 4f", + "3:", + "mov {r}, #1", + "clrex", + "4:", + asm_dmb!(), // acquire_failure + "5:", + // store out_tmp to out + concat!("str", $asm_suffix, " {out_tmp}, [{out}]"), + dst = inout(reg) dst => _, + r = lateout(reg) r, + old = in(reg) old, + new = inout(reg) new => _, + out = inout(reg) out => _, + out_tmp = lateout(reg) _, + old_tmp = lateout(reg) _, + new_tmp = lateout(reg) _, + options(nostack), + ) + }; + } + match (success, failure) { + (Relaxed, Relaxed) => cmpxchg!("", ""), + (Relaxed, Acquire) => cmpxchg_success_load_relaxed!(""), + (Acquire, Relaxed) => cmpxchg_fail_load_relaxed!(""), + (Acquire, Acquire) => cmpxchg!(asm_dmb!(), ""), + (Release, Relaxed) => cmpxchg!("", asm_dmb!()), + (Release, Acquire) => cmpxchg_success_load_relaxed!(asm_dmb!()), + // AcqRel and SeqCst compare_exchange are equivalent. + (AcqRel | SeqCst, Relaxed) => cmpxchg_fail_load_relaxed!(asm_dmb!()), + (AcqRel | SeqCst, _) => cmpxchg!(asm_dmb!(), asm_dmb!()), + // TODO: upgrade success to SeqCst for now + (_, SeqCst) => cmpxchg!(asm_dmb!(), asm_dmb!()), + _ => unreachable_unchecked!("{:?}", (success, failure)), + } + debug_assert!(r == 0 || r == 1, "r={}", r); + // 0 if the store was successful, 1 if no store was performed + r == 0 + } + } + } }; } @@ -323,6 +608,349 @@ macro_rules! atomic64 { } } } + #[cfg(any(target_feature = "aclass", atomic_maybe_uninit_target_feature = "aclass"))] + impl AtomicCompareExchange for $int_type { + #[inline] + unsafe fn atomic_compare_exchange( + dst: *mut MaybeUninit, + old: *const MaybeUninit, + new: *const MaybeUninit, + out: *mut MaybeUninit, + success: Ordering, + failure: Ordering, + ) -> bool { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + debug_assert!(old as usize % mem::align_of::<$int_type>() == 0); + debug_assert!(new as usize % mem::align_of::<$int_type>() == 0); + debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + + // SAFETY: the caller must uphold the safety contract for `atomic_compare_exchange`. + unsafe { + use core::sync::atomic::Ordering::{AcqRel, Acquire, Relaxed, Release, SeqCst}; + let mut r: i32; + macro_rules! cmpxchg_store_relaxed { + ($acquire_success:expr, $acquire_failure:expr) => { + asm!( + "ldrd r2, r3, [{old}]", + "ldrd r8, r9, [{new}]", + "2:", + "ldrexd r4, r5, [{dst}]", + "eor {tmp}, r5, r3", + "eor {r}, r4, r2", + "orrs {r}, {r}, {tmp}", + "bne 3f", + "strexd {r}, r8, r9, [{dst}]", + // 0 if the store was successful, 1 if no store was performed + "cmp {r}, #0", + "bne 2b", + $acquire_success, + "b 4f", + "3:", + "mov {r}, #1", + "clrex", + $acquire_failure, + "4:", + // store r4-r5 pair to out + "strd r4, r5, [{out}]", + dst = inout(reg) dst => _, + r = lateout(reg) r, + old = in(reg) old, + new = inout(reg) new => _, + out = inout(reg) out => _, + tmp = out(reg) _, + // old pair - must be even-numbered and not R14 + out("r2") _, + out("r3") _, + // out pair - must be even-numbered and not R14 + lateout("r4") _, + lateout("r5") _, + // new pair - must be even-numbered and not R14 + lateout("r8") _, + lateout("r9") _, + options(nostack), + ) + }; + } + macro_rules! cmpxchg_release { + ($acquire_failure:expr) => { + asm!( + "ldrd r2, r3, [{old}]", + "ldrd r8, r9, [{new}]", + "ldrexd r4, r5, [{dst}]", + "eor {tmp}, r5, r3", + "eor {r}, r4, r2", + "orrs {r}, {r}, {tmp}", + "bne 3f", + asm_dmb!(), // release + "2:", + "strexd {r}, r8, r9, [{dst}]", + // 0 if the store was successful, 1 if no store was performed + "cmp {r}, #0", + "beq 4f", + "ldrexd r4, r5, [{dst}]", + "eor {tmp}, r5, r3", + "eor {r}, r4, r2", + "orrs {r}, {r}, {tmp}", + "beq 2b", + "3:", + "mov {r}, #1", + "clrex", + $acquire_failure, + "4:", + // store r4-r5 pair to out + "strd r4, r5, [{out}]", + dst = inout(reg) dst => _, + r = lateout(reg) r, + old = in(reg) old, + new = inout(reg) new => _, + out = inout(reg) out => _, + tmp = out(reg) _, + // old pair - must be even-numbered and not R14 + out("r2") _, + out("r3") _, + // out pair - must be even-numbered and not R14 + lateout("r4") _, + lateout("r5") _, + // new pair - must be even-numbered and not R14 + lateout("r8") _, + lateout("r9") _, + options(nostack), + ) + }; + } + macro_rules! cmpxchg_acqrel { + ($acquire_failure:expr) => { + asm!( + "ldrd r2, r3, [{old}]", + "ldrd r8, r9, [{new}]", + "ldrexd r4, r5, [{dst}]", + "eor {tmp}, r5, r3", + "eor {r}, r4, r2", + "orrs {r}, {r}, {tmp}", + "bne 3f", + asm_dmb!(), // release + "2:", + "strexd {r}, r8, r9, [{dst}]", + // 0 if the store was successful, 1 if no store was performed + "cmp {r}, #0", + "beq 4f", + "ldrexd r4, r5, [{dst}]", + "eor {tmp}, r5, r3", + "eor {r}, r4, r2", + "orrs {r}, {r}, {tmp}", + "beq 2b", + "3:", + "mov {r}, #1", + "clrex", + $acquire_failure, + "b 5f", + "4:", + asm_dmb!(), // acquire_success + "5:", + // store r4-r5 pair to out + "strd r4, r5, [{out}]", + dst = inout(reg) dst => _, + r = lateout(reg) r, + old = in(reg) old, + new = inout(reg) new => _, + out = inout(reg) out => _, + tmp = out(reg) _, + // old pair - must be even-numbered and not R14 + out("r2") _, + out("r3") _, + // out pair - must be even-numbered and not R14 + lateout("r4") _, + lateout("r5") _, + // new pair - must be even-numbered and not R14 + lateout("r8") _, + lateout("r9") _, + options(nostack), + ) + }; + } + match (success, failure) { + (Relaxed, Relaxed) => cmpxchg_store_relaxed!("", ""), + (Relaxed, Acquire) => cmpxchg_store_relaxed!("", asm_dmb!()), + (Acquire, Relaxed) => cmpxchg_store_relaxed!(asm_dmb!(), ""), + (Acquire, Acquire) => cmpxchg_store_relaxed!(asm_dmb!(), asm_dmb!()), + (Release, Relaxed) => cmpxchg_release!(""), + (Release, Acquire) => cmpxchg_release!(asm_dmb!()), + // AcqRel and SeqCst compare_exchange are equivalent. + (AcqRel | SeqCst, Relaxed) => cmpxchg_acqrel!(""), + (AcqRel | SeqCst, _) => cmpxchg_acqrel!(asm_dmb!()), + // TODO: upgrade success to SeqCst for now + (_, SeqCst) => cmpxchg_acqrel!(asm_dmb!()), + _ => unreachable_unchecked!("{:?}", (success, failure)), + } + debug_assert!(r == 0 || r == 1, "r={}", r); + // 0 if the store was successful, 1 if no store was performed + r == 0 + } + } + #[inline] + unsafe fn atomic_compare_exchange_weak( + dst: *mut MaybeUninit, + old: *const MaybeUninit, + new: *const MaybeUninit, + out: *mut MaybeUninit, + success: Ordering, + failure: Ordering, + ) -> bool { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + debug_assert!(old as usize % mem::align_of::<$int_type>() == 0); + debug_assert!(new as usize % mem::align_of::<$int_type>() == 0); + debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + + // SAFETY: the caller must uphold the safety contract for `atomic_compare_exchange`. + unsafe { + use core::sync::atomic::Ordering::{AcqRel, Acquire, Relaxed, Release, SeqCst}; + let mut r: i32; + macro_rules! cmpxchg { + ($acquire:expr, $release:expr) => { + asm!( + "ldrd r2, r3, [{old}]", + "ldrd r8, r9, [{new}]", + "ldrexd r4, r5, [{dst}]", + "eor {tmp}, r5, r3", + "eor {r}, r4, r2", + "orrs {r}, {r}, {tmp}", + "bne 3f", + $release, + "strexd {r}, r8, r9, [{dst}]", + "b 4f", + "3:", + "mov {r}, #1", + "clrex", + "4:", + $acquire, + // store r4-r5 pair to out + "strd r4, r5, [{out}]", + dst = inout(reg) dst => _, + r = lateout(reg) r, + old = in(reg) old, + new = inout(reg) new => _, + out = inout(reg) out => _, + tmp = out(reg) _, + // old pair - must be even-numbered and not R14 + out("r2") _, + out("r3") _, + // out pair - must be even-numbered and not R14 + lateout("r4") _, + lateout("r5") _, + // new pair - must be even-numbered and not R14 + lateout("r8") _, + lateout("r9") _, + options(nostack), + ) + }; + } + macro_rules! cmpxchg_fail_load_relaxed { + ($release:expr) => { + asm!( + "ldrd r2, r3, [{old}]", + "ldrd r8, r9, [{new}]", + "ldrexd r4, r5, [{dst}]", + "eor {tmp}, r5, r3", + "eor {r}, r4, r2", + "orrs {r}, {r}, {tmp}", + "bne 3f", + $release, + "strexd {r}, r8, r9, [{dst}]", + // 0 if the store was successful, 1 if no store was performed + "cmp {r}, #0", + "beq 4f", + "b 5f", + "3:", + "mov {r}, #1", + "clrex", + "b 5f", + "4:", + asm_dmb!(), // acquire_success + "5:", + // store r4-r5 pair to out + "strd r4, r5, [{out}]", + dst = inout(reg) dst => _, + r = lateout(reg) r, + old = in(reg) old, + new = inout(reg) new => _, + out = inout(reg) out => _, + tmp = out(reg) _, + // old pair - must be even-numbered and not R14 + out("r2") _, + out("r3") _, + // out pair - must be even-numbered and not R14 + lateout("r4") _, + lateout("r5") _, + // new pair - must be even-numbered and not R14 + lateout("r8") _, + lateout("r9") _, + options(nostack), + ) + }; + } + macro_rules! cmpxchg_success_load_relaxed { + ($release:expr) => { + asm!( + "ldrd r2, r3, [{old}]", + "ldrd r8, r9, [{new}]", + "ldrexd r4, r5, [{dst}]", + "eor {tmp}, r5, r3", + "eor {r}, r4, r2", + "orrs {r}, {r}, {tmp}", + "bne 3f", + $release, + "strexd {r}, r8, r9, [{dst}]", + // 0 if the store was successful, 1 if no store was performed + "cmp {r}, #0", + "beq 5f", + "b 4f", + "3:", + "mov {r}, #1", + "clrex", + "4:", + asm_dmb!(), // acquire_failure + "5:", + // store r4-r5 pair to out + "strd r4, r5, [{out}]", + dst = inout(reg) dst => _, + r = lateout(reg) r, + old = in(reg) old, + new = inout(reg) new => _, + out = inout(reg) out => _, + tmp = out(reg) _, + // old pair - must be even-numbered and not R14 + out("r2") _, + out("r3") _, + // out pair - must be even-numbered and not R14 + lateout("r4") _, + lateout("r5") _, + // new pair - must be even-numbered and not R14 + lateout("r8") _, + lateout("r9") _, + options(nostack), + ) + }; + } + match (success, failure) { + (Relaxed, Relaxed) => cmpxchg!("", ""), + (Relaxed, Acquire) => cmpxchg_success_load_relaxed!(""), + (Acquire, Relaxed) => cmpxchg_fail_load_relaxed!(""), + (Acquire, Acquire) => cmpxchg!(asm_dmb!(), ""), + (Release, Relaxed) => cmpxchg!("", asm_dmb!()), + (Release, Acquire) => cmpxchg_success_load_relaxed!(asm_dmb!()), + // AcqRel and SeqCst compare_exchange are equivalent. + (AcqRel | SeqCst, Relaxed) => cmpxchg_fail_load_relaxed!(asm_dmb!()), + (AcqRel | SeqCst, _) => cmpxchg!(asm_dmb!(), asm_dmb!()), + // TODO: upgrade success to SeqCst for now + (_, SeqCst) => cmpxchg!(asm_dmb!(), asm_dmb!()), + _ => unreachable_unchecked!("{:?}", (success, failure)), + } + debug_assert!(r == 0 || r == 1, "r={}", r); + // 0 if the store was successful, 1 if no store was performed + r == 0 + } + } + } }; } diff --git a/src/arch/armv8m.rs b/src/arch/armv8m.rs index db2abb13..aca2b7f3 100644 --- a/src/arch/armv8m.rs +++ b/src/arch/armv8m.rs @@ -1,6 +1,6 @@ // Generated asm: -// - armv8-m baseline https://godbolt.org/z/xsPrKj88T -// - armv8-m mainline https://godbolt.org/z/zjbddd9Es +// - armv8-m baseline https://godbolt.org/z/crb989Te3 +// - armv8-m mainline https://godbolt.org/z/PrezjhsY6 use core::{ arch::asm, @@ -8,7 +8,7 @@ use core::{ sync::atomic::Ordering, }; -use crate::raw::{AtomicLoad, AtomicStore, AtomicSwap}; +use crate::raw::{AtomicCompareExchange, AtomicLoad, AtomicStore, AtomicSwap}; macro_rules! atomic { ($int_type:ident, $asm_suffix:tt) => { @@ -133,6 +133,135 @@ macro_rules! atomic { } } } + impl AtomicCompareExchange for $int_type { + #[inline] + unsafe fn atomic_compare_exchange( + dst: *mut MaybeUninit, + old: *const MaybeUninit, + new: *const MaybeUninit, + out: *mut MaybeUninit, + success: Ordering, + failure: Ordering, + ) -> bool { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + debug_assert!(old as usize % mem::align_of::<$int_type>() == 0); + debug_assert!(new as usize % mem::align_of::<$int_type>() == 0); + debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let success = crate::utils::upgrade_success_ordering(success, failure); + + // SAFETY: the caller must uphold the safety contract for `atomic_compare_exchange`. + unsafe { + let mut r: i32; + macro_rules! atomic_cmpxchg { + ($acquire:tt, $release:tt) => { + asm!( + // load from old/new to old_tmp/new_tmp + concat!("ldr", $asm_suffix, " {old_tmp}, [{old}]"), + concat!("ldr", $asm_suffix, " {new_tmp}, [{new}]"), + "2:", + // load from dst to out_tmp + concat!("ld", $acquire, "ex", $asm_suffix, " {out_tmp}, [{dst}]"), + "cmp {out_tmp}, {old_tmp}", + "bne 3f", + // store val to dst + concat!("st", $release, "ex", $asm_suffix, " {r}, {new_tmp}, [{dst}]"), + // 0 if the store was successful, 1 if no store was performed + "cmp {r}, #0", + "bne 2b", + "b 4f", + "3:", + "movs {r}, #1", + "4:", + // store out_tmp to out + concat!("str", $asm_suffix, " {out_tmp}, [{out}]"), + dst = inout(reg) dst => _, + old = in(reg) old, + new = inout(reg) new => _, + out = inout(reg) out => _, + r = lateout(reg) r, + out_tmp = lateout(reg) _, + old_tmp = lateout(reg) _, + new_tmp = lateout(reg) _, + options(nostack), + ) + }; + } + match success { + Ordering::Relaxed => atomic_cmpxchg!("r", "r"), + Ordering::Acquire => atomic_cmpxchg!("a", "r"), + Ordering::Release => atomic_cmpxchg!("r", "l"), + // AcqRel and SeqCst swaps are equivalent. + Ordering::AcqRel | Ordering::SeqCst => atomic_cmpxchg!("a", "l"), + _ => unreachable_unchecked!("{:?}", success), + } + debug_assert!(r == 0 || r == 1, "r={}", r); + // 0 if the store was successful, 1 if no store was performed + r == 0 + } + } + #[inline] + unsafe fn atomic_compare_exchange_weak( + dst: *mut MaybeUninit, + old: *const MaybeUninit, + new: *const MaybeUninit, + out: *mut MaybeUninit, + success: Ordering, + failure: Ordering, + ) -> bool { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + debug_assert!(old as usize % mem::align_of::<$int_type>() == 0); + debug_assert!(new as usize % mem::align_of::<$int_type>() == 0); + debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let success = crate::utils::upgrade_success_ordering(success, failure); + + // SAFETY: the caller must uphold the safety contract for `atomic_compare_exchange_weak`. + unsafe { + let mut r: i32; + macro_rules! atomic_cmpxchg { + ($acquire:tt, $release:tt) => { + asm!( + // load from old/new to old_tmp/new_tmp + concat!("ldr", $asm_suffix, " {old_tmp}, [{old}]"), + concat!("ldr", $asm_suffix, " {new_tmp}, [{new}]"), + // load from dst to out_tmp + concat!("ld", $acquire, "ex", $asm_suffix, " {out_tmp}, [{dst}]"), + "cmp {out_tmp}, {old_tmp}", + "bne 3f", + // store val to dst + concat!("st", $release, "ex", $asm_suffix, " {r}, {new_tmp}, [{dst}]"), + "b 4f", + "3:", + "clrex", + "movs {r}, #1", + "4:", + // store out_tmp to out + concat!("str", $asm_suffix, " {out_tmp}, [{out}]"), + dst = inout(reg) dst => _, + old = in(reg) old, + new = inout(reg) new => _, + out = inout(reg) out => _, + r = lateout(reg) r, + out_tmp = lateout(reg) _, + old_tmp = lateout(reg) _, + new_tmp = lateout(reg) _, + options(nostack), + ) + }; + } + match success { + Ordering::Relaxed => atomic_cmpxchg!("r", "r"), + Ordering::Acquire => atomic_cmpxchg!("a", "r"), + Ordering::Release => atomic_cmpxchg!("r", "l"), + // AcqRel and SeqCst swaps are equivalent. + Ordering::AcqRel | Ordering::SeqCst => atomic_cmpxchg!("a", "l"), + _ => unreachable_unchecked!("{:?}", success), + } + debug_assert!(r == 0 || r == 1, "r={}", r); + // 0 if the store was successful, 1 if no store was performed + r == 0 + } + } + } }; } diff --git a/src/arch/mips.rs b/src/arch/mips.rs index edbeff3d..0ad36dc4 100644 --- a/src/arch/mips.rs +++ b/src/arch/mips.rs @@ -1,8 +1,8 @@ // Generated asm: -// - mips https://godbolt.org/z/fExesqqWb -// - mipsel https://godbolt.org/z/q8rrhb4MT -// - mips64 https://godbolt.org/z/7jhvvWxEx -// - mips64el https://godbolt.org/z/dK5n56qd8 +// - mips https://godbolt.org/z/877bMEs75 +// - mipsel https://godbolt.org/z/P3dqK1Pos +// - mips64 https://godbolt.org/z/W8EaaaYeo +// - mips64el https://godbolt.org/z/n35shYWEE use core::{ arch::asm, @@ -10,7 +10,7 @@ use core::{ sync::atomic::Ordering, }; -use crate::raw::{AtomicLoad, AtomicStore, AtomicSwap}; +use crate::raw::{AtomicCompareExchange, AtomicLoad, AtomicStore, AtomicSwap}; #[cfg(target_endian = "little")] macro_rules! if_be { @@ -213,6 +213,74 @@ macro_rules! atomic { } } } + impl AtomicCompareExchange for $int_type { + #[inline] + unsafe fn atomic_compare_exchange( + dst: *mut MaybeUninit, + old: *const MaybeUninit, + new: *const MaybeUninit, + out: *mut MaybeUninit, + success: Ordering, + failure: Ordering, + ) -> bool { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + debug_assert!(old as usize % mem::align_of::<$int_type>() == 0); + debug_assert!(new as usize % mem::align_of::<$int_type>() == 0); + debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let success = crate::utils::upgrade_success_ordering(success, failure); + + // SAFETY: the caller must uphold the safety contract for `atomic_compare_exchange`. + unsafe { + let mut r: usize; + macro_rules! atomic_cmpxchg { + ($acquire:expr, $release:expr) => { + asm!( + ".set noat", + // load from old/new to old_tmp/new_tmp + concat!("l", $asm_suffix, " {old_tmp}, 0({old})"), + concat!("l", $asm_suffix, " {new_tmp}, 0({new})"), + $release, // release fence + "2:", + // load from dst to out_tmp + concat!("ll", $asm_ll_suffix, " {out_tmp}, 0({dst})"), + "bne {out_tmp}, {old_tmp}, 3f", + "move {r}, {new_tmp}", + // store new to dst + concat!("sc", $asm_ll_suffix, " {r}, 0({dst})"), + // 1 if the store was successful, 0 if no store was performed + "beqz {r}, 2b", + "3:", + $acquire, // acquire fence + "xor {new_tmp}, {out_tmp}, {old_tmp}", + // store out_tmp to out + concat!("s", $asm_suffix, " {out_tmp}, 0({out})"), + "sltiu {r}, {new_tmp}, 1", + ".set at", + dst = inout(reg) dst => _, + old = in(reg) old, + new = inout(reg) new => _, + out = inout(reg) out => _, + r = lateout(reg) r, + out_tmp = lateout(reg) _, + old_tmp = out(reg) _, + new_tmp = lateout(reg) _, + options(nostack), + ) + }; + } + match success { + Ordering::Relaxed => atomic_cmpxchg!("", ""), + Ordering::Acquire => atomic_cmpxchg!("sync", ""), + Ordering::Release => atomic_cmpxchg!("", "sync"), + // AcqRel and SeqCst swaps are equivalent. + Ordering::AcqRel | Ordering::SeqCst => atomic_cmpxchg!("sync", "sync"), + _ => unreachable_unchecked!("{:?}", success), + } + debug_assert!(r == 0 || r == 1, "r={}", r); + r != 0 + } + } + } }; } @@ -295,6 +363,93 @@ macro_rules! atomic8 { } } } + impl AtomicCompareExchange for $int_type { + #[inline] + unsafe fn atomic_compare_exchange( + dst: *mut MaybeUninit, + old: *const MaybeUninit, + new: *const MaybeUninit, + out: *mut MaybeUninit, + success: Ordering, + failure: Ordering, + ) -> bool { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + debug_assert!(old as usize % mem::align_of::<$int_type>() == 0); + debug_assert!(new as usize % mem::align_of::<$int_type>() == 0); + debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let success = crate::utils::upgrade_success_ordering(success, failure); + + // SAFETY: the caller must uphold the safety contract for `atomic_compare_exchange`. + unsafe { + let mut r: usize; + macro_rules! atomic_cmpxchg { + ($acquire:expr, $release:expr) => { + asm!( + // Implement sub-word atomic operations using word-sized LL/SC loop. + // Based on assemblies generated by rustc/LLVM. + // Refs: + // - https://github.com/llvm/llvm-project/blob/03c066ab134f02289df1b61db00294c1da579f9c/llvm/lib/CodeGen/AtomicExpandPass.cpp#L677 + // - https://github.com/llvm/llvm-project/blob/03c066ab134f02289df1b61db00294c1da579f9c/llvm/test/CodeGen/Mips/atomic.ll + ".set noat", + concat!(daddiu!(), " $3, $zero, -4"), + "lbu $2, 0($6)", // new + "lb {tmp}, 0($5)", // old + "ori $5, $zero, 255", + $release, + "and $3, $4, $3", + "andi $4, $4, 3", + if_be!("xori $4, $4, 3"), + "sll $4, $4, 3", + "andi $8, {tmp}, 255", + "andi $2, $2, 255", + "sllv $5, $5, $4", + "sllv $8, $8, $4", + "sllv $9, $2, $4", + "nor $6, $zero, $5", + "2:", + "ll $10, 0($3)", + "and $11, $10, $5", + "bne $11, $8, 3f", + "and $10, $10, $6", + "or $10, $10, $9", + "sc $10, 0($3)", + "beqz $10, 2b", + "3:", + "srlv $2, $11, $4", + "seb $2, $2", + $acquire, + "xor {tmp}, $2, {tmp}", + "sb $2, 0($7)", + "sltiu $2, {tmp}, 1", + ".set at", + tmp = out(reg) _, + out("$2") r, + out("$3") _, // dst (aligned) + inout("$4") dst => _, + inout("$5") old => _, + inout("$6") new => _, + in("$7") out, + out("$8") _, + out("$9") _, + out("$10") _, + out("$11") _, + options(nostack), + ) + }; + } + match success { + Ordering::Relaxed => atomic_cmpxchg!("", ""), + Ordering::Acquire => atomic_cmpxchg!("sync", ""), + Ordering::Release => atomic_cmpxchg!("", "sync"), + // AcqRel and SeqCst swaps are equivalent. + Ordering::AcqRel | Ordering::SeqCst => atomic_cmpxchg!("sync", "sync"), + _ => unreachable_unchecked!("{:?}", success), + } + debug_assert!(r == 0 || r == 1, "r={}", r); + r != 0 + } + } + } }; } @@ -377,6 +532,93 @@ macro_rules! atomic16 { } } } + impl AtomicCompareExchange for $int_type { + #[inline] + unsafe fn atomic_compare_exchange( + dst: *mut MaybeUninit, + old: *const MaybeUninit, + new: *const MaybeUninit, + out: *mut MaybeUninit, + success: Ordering, + failure: Ordering, + ) -> bool { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + debug_assert!(old as usize % mem::align_of::<$int_type>() == 0); + debug_assert!(new as usize % mem::align_of::<$int_type>() == 0); + debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let success = crate::utils::upgrade_success_ordering(success, failure); + + // SAFETY: the caller must uphold the safety contract for `atomic_compare_exchange`. + unsafe { + let mut r: usize; + macro_rules! atomic_cmpxchg { + ($acquire:expr, $release:expr) => { + asm!( + // Implement sub-word atomic operations using word-sized LL/SC loop. + // Based on assemblies generated by rustc/LLVM. + // Refs: + // - https://github.com/llvm/llvm-project/blob/03c066ab134f02289df1b61db00294c1da579f9c/llvm/lib/CodeGen/AtomicExpandPass.cpp#L677 + // - https://github.com/llvm/llvm-project/blob/03c066ab134f02289df1b61db00294c1da579f9c/llvm/test/CodeGen/Mips/atomic.ll + ".set noat", + concat!(daddiu!(), " $3, $zero, -4"), + "lhu $2, 0($6)", // new + "lh {tmp}, 0($5)", // old + "ori $5, $zero, 65535", + $release, + "and $3, $4, $3", + "andi $4, $4, 3", + if_be!("xori $4, $4, 2"), + "sll $4, $4, 3", + "andi $8, {tmp}, 65535", + "andi $2, $2, 65535", + "sllv $5, $5, $4", + "sllv $8, $8, $4", + "sllv $9, $2, $4", + "nor $6, $zero, $5", + "2:", + "ll $10, 0($3)", + "and $11, $10, $5", + "bne $11, $8, 3f", + "and $10, $10, $6", + "or $10, $10, $9", + "sc $10, 0($3)", + "beqz $10, 2b", + "3:", + "srlv $2, $11, $4", + "seh $2, $2", + $acquire, + "xor {tmp}, $2, {tmp}", + "sh $2, 0($7)", + "sltiu $2, {tmp}, 1", + ".set at", + tmp = out(reg) _, + out("$2") r, + out("$3") _, // dst (aligned) + inout("$4") dst => _, + inout("$5") old => _, + inout("$6") new => _, + in("$7") out, + out("$8") _, + out("$9") _, + out("$10") _, + out("$11") _, + options(nostack), + ) + }; + } + match success { + Ordering::Relaxed => atomic_cmpxchg!("", ""), + Ordering::Acquire => atomic_cmpxchg!("sync", ""), + Ordering::Release => atomic_cmpxchg!("", "sync"), + // AcqRel and SeqCst swaps are equivalent. + Ordering::AcqRel | Ordering::SeqCst => atomic_cmpxchg!("sync", "sync"), + _ => unreachable_unchecked!("{:?}", success), + } + debug_assert!(r == 0 || r == 1, "r={}", r); + r != 0 + } + } + } }; } diff --git a/src/arch/powerpc.rs b/src/arch/powerpc.rs index 00f5ca82..a9cf44fa 100644 --- a/src/arch/powerpc.rs +++ b/src/arch/powerpc.rs @@ -3,10 +3,10 @@ // - https://www.ibm.com/docs/en/aix/7.3?topic=aix-assembler-language-reference // // Generated asm: -// - powerpc https://godbolt.org/z/en6cvhq9r -// - powerpc64 https://godbolt.org/z/oav69v8fc -// - powerpc64 (pwr8) https://godbolt.org/z/8Tc5qM13T -// - powerpc64le https://godbolt.org/z/MK3j5evjh +// - powerpc https://godbolt.org/z/ja7s8hdTs +// - powerpc64 https://godbolt.org/z/eqvzzKcTa +// - powerpc64 (pwr8) https://godbolt.org/z/5ssdef73T +// - powerpc64le https://godbolt.org/z/o8xxzajqY use core::{ arch::asm, @@ -14,8 +14,16 @@ use core::{ sync::atomic::Ordering, }; -use crate::raw::{AtomicLoad, AtomicStore, AtomicSwap}; +use crate::raw::{AtomicCompareExchange, AtomicLoad, AtomicStore, AtomicSwap}; +macro_rules! if_d { + ("d", $then:expr, $else:expr) => { + $then + }; + ($asm_suffix:tt, $then:expr, $else:expr) => { + $else + }; +} #[cfg(target_arch = "powerpc64")] #[cfg(any(target_endian = "little", atomic_maybe_uninit_pwr8))] #[cfg(target_endian = "big")] @@ -191,7 +199,7 @@ macro_rules! atomic_load_store { #[rustfmt::skip] macro_rules! atomic { - ($int_type:ident, $ld_suffix:tt, $asm_suffix:tt) => { + ($int_type:ident, $ld_suffix:tt, $asm_suffix:tt, $cmp_suffix:tt) => { atomic_load_store!($int_type, $ld_suffix, $asm_suffix); impl AtomicSwap for $int_type { #[inline] @@ -245,6 +253,76 @@ macro_rules! atomic { } } } + impl AtomicCompareExchange for $int_type { + #[inline] + unsafe fn atomic_compare_exchange( + dst: *mut MaybeUninit, + old: *const MaybeUninit, + new: *const MaybeUninit, + out: *mut MaybeUninit, + success: Ordering, + failure: Ordering, + ) -> bool { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + debug_assert!(old as usize % mem::align_of::<$int_type>() == 0); + debug_assert!(new as usize % mem::align_of::<$int_type>() == 0); + debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let success = crate::utils::upgrade_success_ordering(success, failure); + + // SAFETY: the caller must uphold the safety contract for `atomic_compare_exchange`. + unsafe { + let mut r: usize; + macro_rules! atomic_cmpxchg { + ($acquire:tt, $release:tt) => { + asm!( + // load from old/new to old_tmp/new_tmp pairs + concat!("l", $ld_suffix, " {old_tmp}, 0({old})"), + concat!("l", $ld_suffix, " {new_tmp}, 0({new})"), + // (atomic) compare and exchange + $release, + "2:", + concat!("l", $asm_suffix, "arx {out_tmp}, 0, {dst}"), + concat!("cmp", $cmp_suffix, " {old_tmp}, {out_tmp}"), + "bne %cr0, 3f", + concat!("st", $asm_suffix, "cx. {new_tmp}, 0, {dst}"), + "bne %cr0, 2b", + "b 4f", + "3:", + concat!("st", $asm_suffix, "cx. {out_tmp}, 0, {dst}"), + "4:", + "xor {r}, {out_tmp}, {old_tmp}", + $acquire, + // store out_tmp pair to out + concat!("st", $asm_suffix, " {out_tmp}, 0({out})"), + concat!("cntlz", $cmp_suffix, " {r}, {r}"), + if_d!($asm_suffix, "rldicl {r}, {r}, 58, 63", "srwi {r}, {r}, 5"), + dst = inout(reg) dst => _, + old = in(reg) old, + old_tmp = lateout(reg) _, + new = inout(reg) new => _, + new_tmp = lateout(reg) _, + out = inout(reg) out => _, + out_tmp = lateout(reg) _, + r = lateout(reg) r, + out("r0") _, + out("cr0") _, + options(nostack), + ) + }; + } + match success { + Ordering::Relaxed => atomic_cmpxchg!("", ""), + Ordering::Acquire => atomic_cmpxchg!("lwsync", ""), + Ordering::Release => atomic_cmpxchg!("", "lwsync"), + Ordering::AcqRel => atomic_cmpxchg!("lwsync", "lwsync"), + Ordering::SeqCst => atomic_cmpxchg!("lwsync", "sync"), + _ => unreachable_unchecked!("{:?}", success), + } + debug_assert!(r == 0 || r == 1, "r={}", r); + r != 0 + } + } + } }; } @@ -358,6 +436,148 @@ macro_rules! atomic8 { } } } + impl AtomicCompareExchange for $int_type { + #[inline] + unsafe fn atomic_compare_exchange( + dst: *mut MaybeUninit, + old: *const MaybeUninit, + new: *const MaybeUninit, + out: *mut MaybeUninit, + success: Ordering, + failure: Ordering, + ) -> bool { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + debug_assert!(old as usize % mem::align_of::<$int_type>() == 0); + debug_assert!(new as usize % mem::align_of::<$int_type>() == 0); + debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let success = crate::utils::upgrade_success_ordering(success, failure); + + // SAFETY: the caller must uphold the safety contract for `atomic_compare_exchange`. + unsafe { + let mut r: usize; + // Implement sub-word atomic operations using word-sized LL/SC loop. + // Based on assemblies generated by rustc/LLVM. + // Refs: + // - https://github.com/llvm/llvm-project/blob/03c066ab134f02289df1b61db00294c1da579f9c/llvm/lib/CodeGen/AtomicExpandPass.cpp#L677 + // - https://github.com/llvm/llvm-project/blob/03c066ab134f02289df1b61db00294c1da579f9c/llvm/test/CodeGen/PowerPC/atomics.ll + #[cfg(target_arch = "powerpc64")] + macro_rules! atomic_cmpxchg { + ($acquire:tt, $release:tt) => { + asm!( + "lbz %r4, 0(%r4)", + "lbz %r5, 0(%r5)", + "rlwinm %r7, %r3, 3, 27, 28", + "li %r8, 255", + "xori %r7, %r7, 24", + "rldicr %r3, %r3, 0, 61", + "slw %r8, %r8, %r7", + "slw %r10, %r4, %r7", + "slw %r5, %r5, %r7", + "and %r10, %r10, %r8", + $release, + "and %r9, %r5, %r8", + "2:", + "lwarx %r11, 0, %r3", + "and %r5, %r11, %r8", + "cmpw %r5, %r10", + "bne %cr0, 3f", + "andc %r11, %r11, %r8", + "or %r11, %r11, %r9", + "stwcx. %r11, 0, %r3", + "bne %cr0, 2b", + "b 4f", + "3:", + "stwcx. %r11, 0, %r3", + "4:", + "srw %r5, %r5, %r7", + $acquire, + "xor %r3, %r5, %r4", + "cntlzw %r3, %r3", + "srwi %r3, %r3, 5", + "stb %r5, 0(%r6)", + out("r0") _, + inout("r3") dst => r, + inout("r4") old => _, + inout("r5") new => _, + in("r6") out, + out("r7") _, + out("r8") _, + out("r9") _, + out("r10") _, + out("r11") _, + out("cr0") _, + options(nostack), + ) + }; + } + #[cfg(target_arch = "powerpc")] + macro_rules! atomic_cmpxchg { + ($acquire:tt, $release:tt) => { + asm!( + "lbz %r4, 0(%r4)", + "lbz %r8, 0(%r5)", + "rlwinm %r9, %r3, 3, 27, 28", + "li %r7, 255", + "rlwinm %r5, %r3, 0, 0, 29", + "xori %r3, %r9, 24", + "slw %r7, %r7, %r3", + "slw %r8, %r8, %r3", + "slw %r9, %r4, %r3", + "and %r8, %r8, %r7", + "and %r10, %r9, %r7", + $release, + "2:", + "lwarx %r11, 0, %r5", + "and %r9, %r11, %r7", + "cmpw %r9, %r10", + "bne %cr0, 3f", + "andc %r11, %r11, %r7", + "or %r11, %r11, %r8", + "stwcx. %r11, 0, %r5", + "bne %cr0, 2b", + "b 4f", + "3:", + "stwcx. %r11, 0, %r5", + "4:", + "srw %r5, %r9, %r3", + "li %r3, 0", + "cmpw 5, 4", + "li %r4, 1", + "bc 12, 2, 5f", + "b 6f", + "5:", + "addi %r3, %r4, 0", + "6:", + $acquire, + "stb %r5, 0(%r6)", + out("r0") _, + inout("r3") dst => r, + inout("r4") old => _, + inout("r5") new => _, + in("r6") out, + out("r7") _, + out("r8") _, + out("r9") _, + out("r10") _, + out("r11") _, + out("cr0") _, + options(nostack), + ) + }; + } + match success { + Ordering::Relaxed => atomic_cmpxchg!("", ""), + Ordering::Acquire => atomic_cmpxchg!("lwsync", ""), + Ordering::Release => atomic_cmpxchg!("", "lwsync"), + Ordering::AcqRel => atomic_cmpxchg!("lwsync", "lwsync"), + Ordering::SeqCst => atomic_cmpxchg!("lwsync", "sync"), + _ => unreachable_unchecked!("{:?}", success), + } + debug_assert!(r == 0 || r == 1, "r={}", r); + r != 0 + } + } + } }; } @@ -473,21 +693,165 @@ macro_rules! atomic16 { } } } + impl AtomicCompareExchange for $int_type { + #[inline] + unsafe fn atomic_compare_exchange( + dst: *mut MaybeUninit, + old: *const MaybeUninit, + new: *const MaybeUninit, + out: *mut MaybeUninit, + success: Ordering, + failure: Ordering, + ) -> bool { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + debug_assert!(old as usize % mem::align_of::<$int_type>() == 0); + debug_assert!(new as usize % mem::align_of::<$int_type>() == 0); + debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let success = crate::utils::upgrade_success_ordering(success, failure); + + // SAFETY: the caller must uphold the safety contract for `atomic_compare_exchange`. + unsafe { + let mut r: usize; + // Implement sub-word atomic operations using word-sized LL/SC loop. + // Based on assemblies generated by rustc/LLVM. + // Refs: + // - https://github.com/llvm/llvm-project/blob/03c066ab134f02289df1b61db00294c1da579f9c/llvm/lib/CodeGen/AtomicExpandPass.cpp#L677 + // - https://github.com/llvm/llvm-project/blob/03c066ab134f02289df1b61db00294c1da579f9c/llvm/test/CodeGen/PowerPC/atomics.ll + #[cfg(target_arch = "powerpc64")] + macro_rules! atomic_cmpxchg { + ($acquire:tt, $release:tt) => { + asm!( + "lhz %r4, 0(%r4)", + "lhz %r10, 0(%r5)", + "li %r7, 0", + "rlwinm %r8, %r3, 3, 27, 27", + "ori %r9, %r7, 65535", + "xori %r7, %r8, 16", + "rldicr %r5, %r3, 0, 61", + "slw %r8, %r9, %r7", + "slw %r3, %r10, %r7", + "slw %r10, %r4, %r7", + $release, + "and %r9, %r3, %r8", + "and %r10, %r10, %r8", + "2:", + "lwarx %r11, 0, %r5", + "and %r3, %r11, %r8", + "cmpw %r3, %r10", + "bne %cr0, 3f", + "andc %r11, %r11, %r8", + "or %r11, %r11, %r9", + "stwcx. %r11, 0, %r5", + "bne %cr0, 2b", + "b 4f", + "3:", + "stwcx. %r11, 0, %r5", + "4:", + "srw %r5, %r3, %r7", + $acquire, + "xor %r3, %r5, %r4", + "cntlzw %r3, %r3", + "srwi %r3, %r3, 5", + "sth %r5, 0(%r6)", + out("r0") _, + inout("r3") dst => r, + inout("r4") old => _, + inout("r5") new => _, + in("r6") out, + out("r7") _, + out("r8") _, + out("r9") _, + out("r10") _, + out("r11") _, + out("cr0") _, + options(nostack), + ) + }; + } + #[cfg(target_arch = "powerpc")] + macro_rules! atomic_cmpxchg { + ($acquire:tt, $release:tt) => { + asm!( + "lhz %r4, 0(%r4)", + "lhz %r8, 0(%r5)", + "li %r7, 0", + "rlwinm %r5, %r3, 3, 27, 27", + "ori %r7, %r7, 65535", + "xori %r5, %r5, 16", + "slw %r7, %r7, %r5", + "slw %r8, %r8, %r5", + "slw %r9, %r4, %r5", + "rlwinm %r3, %r3, 0, 0, 29", + "and %r8, %r8, %r7", + "and %r10, %r9, %r7", + $release, + "2:", + "lwarx %r11, 0, %r3", + "and %r9, %r11, %r7", + "cmpw %r9, %r10", + "bne %cr0, 3f", + "andc %r11, %r11, %r7", + "or %r11, %r11, %r8", + "stwcx. %r11, 0, %r3", + "bne %cr0, 2b", + "b 4f", + "3:", + "stwcx. %r11, 0, %r3", + "4:", + "srw %r5, %r9, %r5", + "li %r3, 0", + "cmpw %r5, %r4", + "li %r4, 1", + "bc 12, 2, 5f", + "b 6f", + "5:", + "addi %r3, %r4, 0", + "6:", + $acquire, + "sth %r5, 0(%r6)", + out("r0") _, + inout("r3") dst => r, + inout("r4") old => _, + inout("r5") new => _, + in("r6") out, + out("r7") _, + out("r8") _, + out("r9") _, + out("r10") _, + out("r11") _, + out("cr0") _, + options(nostack), + ) + }; + } + match success { + Ordering::Relaxed => atomic_cmpxchg!("", ""), + Ordering::Acquire => atomic_cmpxchg!("lwsync", ""), + Ordering::Release => atomic_cmpxchg!("", "lwsync"), + Ordering::AcqRel => atomic_cmpxchg!("lwsync", "lwsync"), + Ordering::SeqCst => atomic_cmpxchg!("lwsync", "sync"), + _ => unreachable_unchecked!("{:?}", success), + } + debug_assert!(r == 0 || r == 1, "r={}", r); + r != 0 + } + } + } }; } #[cfg(target_arch = "powerpc64")] #[cfg(any(target_endian = "little", atomic_maybe_uninit_pwr8))] -atomic!(i8, "bz", "b"); +atomic!(i8, "bz", "b", "w"); #[cfg(target_arch = "powerpc64")] #[cfg(any(target_endian = "little", atomic_maybe_uninit_pwr8))] -atomic!(u8, "bz", "b"); +atomic!(u8, "bz", "b", "w"); #[cfg(target_arch = "powerpc64")] #[cfg(any(target_endian = "little", atomic_maybe_uninit_pwr8))] -atomic!(i16, "hz", "h"); +atomic!(i16, "hz", "h", "w"); #[cfg(target_arch = "powerpc64")] #[cfg(any(target_endian = "little", atomic_maybe_uninit_pwr8))] -atomic!(u16, "hz", "h"); +atomic!(u16, "hz", "h", "w"); #[cfg(target_endian = "big")] #[cfg(not(all(target_arch = "powerpc64", atomic_maybe_uninit_pwr8)))] atomic8!(i8, "bz", "b"); @@ -500,20 +864,20 @@ atomic16!(i16, "hz", "h"); #[cfg(target_endian = "big")] #[cfg(not(all(target_arch = "powerpc64", atomic_maybe_uninit_pwr8)))] atomic16!(u16, "hz", "h"); -atomic!(i32, "wz", "w"); -atomic!(u32, "wz", "w"); +atomic!(i32, "wz", "w", "w"); +atomic!(u32, "wz", "w", "w"); #[cfg(target_arch = "powerpc64")] -atomic!(i64, "d", "d"); +atomic!(i64, "d", "d", "d"); #[cfg(target_arch = "powerpc64")] -atomic!(u64, "d", "d"); +atomic!(u64, "d", "d", "d"); #[cfg(target_pointer_width = "32")] -atomic!(isize, "wz", "w"); +atomic!(isize, "wz", "w", "w"); #[cfg(target_pointer_width = "32")] -atomic!(usize, "wz", "w"); +atomic!(usize, "wz", "w", "w"); #[cfg(target_pointer_width = "64")] -atomic!(isize, "d", "d"); +atomic!(isize, "d", "d", "d"); #[cfg(target_pointer_width = "64")] -atomic!(usize, "d", "d"); +atomic!(usize, "d", "d", "d"); // https://github.com/llvm/llvm-project/commit/549e118e93c666914a1045fde38a2cac33e1e445 // https://github.com/llvm/llvm-project/blob/2ba5d820e2b0e5016ec706e324060a329f9a83a3/llvm/test/CodeGen/PowerPC/atomics-i128-ldst.ll @@ -709,6 +1073,91 @@ macro_rules! atomic128 { } } } + impl AtomicCompareExchange for $int_type { + #[inline] + unsafe fn atomic_compare_exchange( + dst: *mut MaybeUninit, + old: *const MaybeUninit, + new: *const MaybeUninit, + out: *mut MaybeUninit, + success: Ordering, + failure: Ordering, + ) -> bool { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + debug_assert!(old as usize % mem::align_of::<$int_type>() == 0); + debug_assert!(new as usize % mem::align_of::<$int_type>() == 0); + debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let success = crate::utils::upgrade_success_ordering(success, failure); + + // SAFETY: the caller must uphold the safety contract for `atomic_compare_exchange`. + unsafe { + let mut r: usize; + macro_rules! atomic_cmpxchg { + ($acquire:tt, $release:tt) => { + asm!( + // load from old/new to r4-r5/r6-r7 pairs + concat!("ld %r4, ", p128h!(), "({old})"), + concat!("ld %r5, ", p128l!(), "({old})"), + concat!("ld %r6, ", p128h!(), "({new})"), + concat!("ld %r7, ", p128l!(), "({new})"), + // (atomic) compare and exchange + $release, + "2:", + "lqarx %r8, 0, {dst}", + "xor %r11, %r9, %r5", + "xor %r10, %r8, %r4", + "or. %r11, %r11, %r10", + "bne %cr0, 3f", + "mr %r11, %r7", + "mr %r10, %r6", + "stqcx. %r10, 0, {dst}", + "bne %cr0, 2b", + "b 4f", + "3:", + "stqcx. %r8, 0, {dst}", + "4:", + $acquire, + // store r8-r9 pair to out + concat!("std %r8, ", p128h!(), "({out})"), + concat!("std %r9, ", p128l!(), "({out})"), + "xor %r11, %r9, %r5", + "xor %r10, %r8, %r4", + "or. %r11, %r11, %r10", + "cntlzd %r11, %r11", + "rldicl %r11, %r11, 58, 63", + dst = inout(reg) dst => _, + old = in(reg) old, + new = inout(reg) new => _, + out = inout(reg) out => _, + out("r0") _, + // lq loads value into even/odd pair of specified register and subsequent register. + // We cannot use r1 and r2, so starting with r4. + out("r4") _, // old (hi) + out("r5") _, // old (lo) + out("r6") _, // new (hi) + out("r7") _, // new (lo) + out("r8") _, // out (hi) + out("r9") _, // out (lo) + out("r10") _, + out("r11") r, + out("cr0") _, + options(nostack), + ) + }; + } + match success { + Ordering::Relaxed => atomic_cmpxchg!("", ""), + Ordering::Acquire => atomic_cmpxchg!("lwsync", ""), + Ordering::Release => atomic_cmpxchg!("", "lwsync"), + Ordering::AcqRel => atomic_cmpxchg!("lwsync", "lwsync"), + Ordering::SeqCst => atomic_cmpxchg!("lwsync", "sync"), + _ => unreachable_unchecked!("{:?}", success), + } + debug_assert!(r == 0 || r == 1, "r={}", r); + r != 0 + } + } + } }; } diff --git a/src/arch/riscv.rs b/src/arch/riscv.rs index 1bedc0e0..2048248f 100644 --- a/src/arch/riscv.rs +++ b/src/arch/riscv.rs @@ -5,8 +5,8 @@ // - portable-atomic https://github.com/taiki-e/portable-atomic // // Generated asm: -// - riscv64gc https://godbolt.org/z/Ge5ozMEf3 -// - riscv32imac https://godbolt.org/z/s7PYcjnh4 +// - riscv64gc https://godbolt.org/z/cfvqr9eTh +// - riscv32imac https://godbolt.org/z/jKdTPW16v use core::{ arch::asm, @@ -15,9 +15,24 @@ use core::{ }; #[cfg(any(target_feature = "a", atomic_maybe_uninit_target_feature = "a"))] -use crate::raw::AtomicSwap; +use crate::raw::{AtomicCompareExchange, AtomicSwap}; use crate::raw::{AtomicLoad, AtomicStore}; +#[cfg(any(target_feature = "a", atomic_maybe_uninit_target_feature = "a"))] +#[cfg(target_arch = "riscv32")] +macro_rules! if_64 { + ($($tt:tt)*) => { + "" + }; +} +#[cfg(any(target_feature = "a", atomic_maybe_uninit_target_feature = "a"))] +#[cfg(target_arch = "riscv64")] +macro_rules! if_64 { + ($($tt:tt)*) => { + $($tt)* + }; +} + #[cfg(any(target_feature = "a", atomic_maybe_uninit_target_feature = "a"))] #[cfg(target_arch = "riscv32")] macro_rules! addiw { @@ -233,6 +248,68 @@ macro_rules! atomic { } } } + #[cfg(any(target_feature = "a", atomic_maybe_uninit_target_feature = "a"))] + impl AtomicCompareExchange for $int_type { + #[inline] + unsafe fn atomic_compare_exchange( + dst: *mut MaybeUninit, + old: *const MaybeUninit, + new: *const MaybeUninit, + out: *mut MaybeUninit, + success: Ordering, + failure: Ordering, + ) -> bool { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + debug_assert!(old as usize % mem::align_of::<$int_type>() == 0); + debug_assert!(new as usize % mem::align_of::<$int_type>() == 0); + debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let success = crate::utils::upgrade_success_ordering(success, failure); + + // SAFETY: the caller must uphold the safety contract for `atomic_compare_exchange`. + unsafe { + let mut r: usize; + macro_rules! atomic_cmpxchg { + ($acquire:expr, $release:expr) => { + asm!( + // load old/new to old_tmp/new_tmp + concat!("l", $asm_suffix, " {old_tmp}, 0({old})"), + concat!("l", $asm_suffix, " {new_tmp}, 0({new})"), + // (atomic) compare and exchange + "2:", + concat!("lr.", $asm_suffix, $acquire, " {out_tmp}, 0({dst})"), + "bne {out_tmp}, {old_tmp}, 3f", + concat!("sc.", $asm_suffix, $release, " {r}, {new_tmp}, 0({dst})"), + "bnez {r}, 2b", + "3:", + "xor {r}, {out_tmp}, {old_tmp}", + "seqz {r}, {r}", + // store out_tmp to out + concat!("s", $asm_suffix, " {out_tmp}, 0({out})"), + dst = inout(reg) dst => _, + old = in(reg) old, + old_tmp = lateout(reg) _, + new = inout(reg) new => _, + new_tmp = lateout(reg) _, + out = inout(reg) out => _, + out_tmp = lateout(reg) _, + r = out(reg) r, + options(nostack), + ) + }; + } + match success { + Ordering::Relaxed => atomic_cmpxchg!("", ""), + Ordering::Acquire => atomic_cmpxchg!(".aq", ""), + Ordering::Release => atomic_cmpxchg!("", ".rl"), + Ordering::AcqRel => atomic_cmpxchg!(".aq", ".rl"), + Ordering::SeqCst => atomic_cmpxchg!(".aqrl", ".aqrl"), + _ => unreachable_unchecked!("{:?}", success), + } + debug_assert!(r == 0 || r == 1, "r={}", r); + r != 0 + } + } + } }; } @@ -305,6 +382,86 @@ macro_rules! atomic8 { } } } + #[cfg(any(target_feature = "a", atomic_maybe_uninit_target_feature = "a"))] + impl AtomicCompareExchange for $int_type { + #[inline] + unsafe fn atomic_compare_exchange( + dst: *mut MaybeUninit, + old: *const MaybeUninit, + new: *const MaybeUninit, + out: *mut MaybeUninit, + success: Ordering, + failure: Ordering, + ) -> bool { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + debug_assert!(old as usize % mem::align_of::<$int_type>() == 0); + debug_assert!(new as usize % mem::align_of::<$int_type>() == 0); + debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let success = crate::utils::upgrade_success_ordering(success, failure); + + // SAFETY: the caller must uphold the safety contract for `atomic_compare_exchange`. + unsafe { + let mut r: usize; + macro_rules! atomic_cmpxchg { + ($acquire:expr, $release:expr) => { + asm!( + // Implement sub-word atomic operations using word-sized LL/SC loop. + // Based on assemblies generated by rustc/LLVM. + // Refs: + // - https://github.com/llvm/llvm-project/blob/03c066ab134f02289df1b61db00294c1da579f9c/llvm/lib/CodeGen/AtomicExpandPass.cpp#L677 + // - https://github.com/llvm/llvm-project/blob/03c066ab134f02289df1b61db00294c1da579f9c/llvm/test/CodeGen/RISCV/atomic-rmw.ll + "lbu a1, 0(a1)", + "lbu a2, 0(a2)", + // create aligned address and masks + // https://github.com/llvm/llvm-project/blob/03c066ab134f02289df1b61db00294c1da579f9c/llvm/lib/CodeGen/AtomicExpandPass.cpp#L677 + "andi a6, a0, -4", + concat!(slliw!(), " a0, a0, 3"), + "li a5, 255", + concat!(sllw!(), " a5, a5, a0"), + concat!(sllw!(), " a7, a1, a0"), + concat!(sllw!(), " a2, a2, a0"), + // (atomic) compare and exchange (LR/SC loop) + "2:", + concat!("lr.w", $acquire, " a4, 0(a6)"), + "and a1, a4, a5", + "bne a1, a7, 3f", + "xor a1, a4, a2", + "and a1, a1, a5", + "xor a1, a1, a4", + concat!("sc.w", $release, " a1, a1, 0(a6)"), + "bnez a1, 2b", + "3:", + concat!(srlw!(), " a1, a4, a0"), + "and a0, a4, a5", + if_64!("sext.w a0, a0"), + "xor a0, a7, a0", + "seqz a0, a0", + "sb a1, 0({out})", + out = inout(reg) out => _, + inout("a0") dst => r, + inout("a1") old => _, + inout("a2") new => _, + out("a4") _, + out("a5") _, + out("a6") _, + out("a7") _, + options(nostack), + ) + }; + } + match success { + Ordering::Relaxed => atomic_cmpxchg!("", ""), + Ordering::Acquire => atomic_cmpxchg!(".aq", ""), + Ordering::Release => atomic_cmpxchg!("", ".rl"), + Ordering::AcqRel => atomic_cmpxchg!(".aq", ".rl"), + Ordering::SeqCst => atomic_cmpxchg!(".aqrl", ".aqrl"), + _ => unreachable_unchecked!("{:?}", success), + } + debug_assert!(r == 0 || r == 1, "r={}", r); + r != 0 + } + } + } }; } @@ -378,6 +535,87 @@ macro_rules! atomic16 { } } } + #[cfg(any(target_feature = "a", atomic_maybe_uninit_target_feature = "a"))] + impl AtomicCompareExchange for $int_type { + #[inline] + unsafe fn atomic_compare_exchange( + dst: *mut MaybeUninit, + old: *const MaybeUninit, + new: *const MaybeUninit, + out: *mut MaybeUninit, + success: Ordering, + failure: Ordering, + ) -> bool { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + debug_assert!(old as usize % mem::align_of::<$int_type>() == 0); + debug_assert!(new as usize % mem::align_of::<$int_type>() == 0); + debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + let success = crate::utils::upgrade_success_ordering(success, failure); + + // SAFETY: the caller must uphold the safety contract for `atomic_compare_exchange`. + unsafe { + let mut r: usize; + macro_rules! atomic_cmpxchg { + ($acquire:expr, $release:expr) => { + asm!( + // Implement sub-word atomic operations using word-sized LL/SC loop. + // Based on assemblies generated by rustc/LLVM. + // Refs: + // - https://github.com/llvm/llvm-project/blob/03c066ab134f02289df1b61db00294c1da579f9c/llvm/lib/CodeGen/AtomicExpandPass.cpp#L677 + // - https://github.com/llvm/llvm-project/blob/03c066ab134f02289df1b61db00294c1da579f9c/llvm/test/CodeGen/RISCV/atomic-rmw.ll + "lhu a1, 0(a1)", + "lhu a2, 0(a2)", + // create aligned address and masks + // https://github.com/llvm/llvm-project/blob/03c066ab134f02289df1b61db00294c1da579f9c/llvm/lib/CodeGen/AtomicExpandPass.cpp#L677 + "andi a6, a0, -4", + concat!(slliw!(), " a0, a0, 3"), + "lui a5, 16", + concat!(addiw!(), " a5, a5, -1"), + concat!(sllw!(), " a5, a5, a0"), + concat!(sllw!(), " a7, a1, a0"), + concat!(sllw!(), " a2, a2, a0"), + // (atomic) compare and exchange (LR/SC loop) + "2:", + concat!("lr.w", $acquire, " a4, 0(a6)"), + "and a1, a4, a5", + "bne a1, a7, 3f", + "xor a1, a4, a2", + "and a1, a1, a5", + "xor a1, a1, a4", + concat!("sc.w", $release, " a1, a1, 0(a6)"), + "bnez a1, 2b", + "3:", + concat!(srlw!(), " a1, a4, a0"), + "and a0, a4, a5", + if_64!("sext.w a0, a0"), + "xor a0, a7, a0", + "seqz a0, a0", + "sh a1, 0({out})", + out = inout(reg) out => _, + inout("a0") dst => r, + inout("a1") old => _, + inout("a2") new => _, + out("a4") _, + out("a5") _, + out("a6") _, + out("a7") _, + options(nostack), + ) + }; + } + match success { + Ordering::Relaxed => atomic_cmpxchg!("", ""), + Ordering::Acquire => atomic_cmpxchg!(".aq", ""), + Ordering::Release => atomic_cmpxchg!("", ".rl"), + Ordering::AcqRel => atomic_cmpxchg!(".aq", ".rl"), + Ordering::SeqCst => atomic_cmpxchg!(".aqrl", ".aqrl"), + _ => unreachable_unchecked!("{:?}", success), + } + debug_assert!(r == 0 || r == 1, "r={}", r); + r != 0 + } + } + } }; } diff --git a/src/arch/s390x.rs b/src/arch/s390x.rs index cdf0e74d..1cb70946 100644 --- a/src/arch/s390x.rs +++ b/src/arch/s390x.rs @@ -2,7 +2,7 @@ // - https://www.ibm.com/support/pages/zarchitecture-reference-summary // // Generated asm: -// - s390x https://godbolt.org/z/3n97znqd3 +// - s390x https://godbolt.org/z/c3s5dT15P use core::{ arch::asm, @@ -10,7 +10,7 @@ use core::{ sync::atomic::Ordering, }; -use crate::raw::{AtomicLoad, AtomicStore, AtomicSwap}; +use crate::raw::{AtomicCompareExchange, AtomicLoad, AtomicStore, AtomicSwap}; macro_rules! atomic_load_store { ($int_type:ident, $asm_suffix:tt, $st_suffix:tt) => { @@ -125,6 +125,51 @@ macro_rules! atomic { } } } + impl AtomicCompareExchange for $int_type { + #[inline] + unsafe fn atomic_compare_exchange( + dst: *mut MaybeUninit, + old: *const MaybeUninit, + new: *const MaybeUninit, + out: *mut MaybeUninit, + _success: Ordering, + _failure: Ordering, + ) -> bool { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + debug_assert!(old as usize % mem::align_of::<$int_type>() == 0); + debug_assert!(new as usize % mem::align_of::<$int_type>() == 0); + debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + + // SAFETY: the caller must uphold the safety contract for `atomic_compare_exchange`. + unsafe { + let mut r: usize; + // compare_exchange is always SeqCst. + asm!( + // load from old/new to r0/tmp + concat!("l", $asm_suffix, " %r0, 0({old})"), + concat!("l", $asm_suffix, " {tmp}, 0({new})"), + // (atomic) compare and exchange + concat!("cs", $asm_suffix, " %r0, {tmp}, 0({dst})"), + // store result to r as true(1) or false(0) + "ipm {tmp}", + "afi {tmp}, -268435456", + "risbg {r}, {tmp}, 63, 191, 33", + // store r0 to out + concat!("st", $st_suffix, " %r0, 0({out})"), + dst = inout(reg) dst => _, + old = in(reg) old, + new = inout(reg) new => _, + tmp = lateout(reg) _, + out = inout(reg) out => _, + r = lateout(reg) r, + out("r0") _, + options(nostack), + ); + debug_assert!(r == 0 || r == 1, "r={}", r); + r != 0 + } + } + } }; } @@ -177,6 +222,67 @@ macro_rules! atomic8 { } } } + impl AtomicCompareExchange for $int_type { + #[inline] + unsafe fn atomic_compare_exchange( + dst: *mut MaybeUninit, + old: *const MaybeUninit, + new: *const MaybeUninit, + out: *mut MaybeUninit, + _success: Ordering, + _failure: Ordering, + ) -> bool { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + debug_assert!(old as usize % mem::align_of::<$int_type>() == 0); + debug_assert!(new as usize % mem::align_of::<$int_type>() == 0); + debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + + // SAFETY: the caller must uphold the safety contract for `atomic_compare_exchange`. + unsafe { + let mut r: usize; + // compare_exchange is always SeqCst. + asm!( + // Implement sub-word atomic operations using word-sized CAS loop. + // Based on assemblies generated by rustc/LLVM. + // Refs: + // - https://github.com/llvm/llvm-project/blob/03c066ab134f02289df1b61db00294c1da579f9c/llvm/lib/CodeGen/AtomicExpandPass.cpp#L677 + "llc %r0, 0(%r3)", + "lb %r1, 0(%r4)", + "risbg %r3, %r2, 0, 189, 0", + "l %r4, 0(%r3)", + "sll %r2, 3", + "lcr %r14, %r2", + "2:", + "rll %r13, %r4, 8(%r2)", + "risbg %r1, %r13, 32, 55, 0", + "llcr %r13, %r13", + "cr %r13, %r0", + "jlh 3f", + "rll %r12, %r1, -8(%r14)", + "cs %r4, %r12, 0(%r3)", + "jl 2b", + "3:", + // store result to r2 as true(1) or false(0) + "ipm %r0", + "afi %r0, -268435456", + "risbg %r2, %r0, 63, 191, 33", + "stc %r13, 0(%r5)", + out("r0") _, + out("r1") _, + inout("r2") dst => r, + inout("r3") old => _, + inout("r4") new => _, + in("r5") out, + out("r12") _, + out("r13") _, + out("r14") _, + options(nostack), + ); + debug_assert!(r == 0 || r == 1, "r={}", r); + r != 0 + } + } + } }; } @@ -229,6 +335,67 @@ macro_rules! atomic16 { } } } + impl AtomicCompareExchange for $int_type { + #[inline] + unsafe fn atomic_compare_exchange( + dst: *mut MaybeUninit, + old: *const MaybeUninit, + new: *const MaybeUninit, + out: *mut MaybeUninit, + _success: Ordering, + _failure: Ordering, + ) -> bool { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + debug_assert!(old as usize % mem::align_of::<$int_type>() == 0); + debug_assert!(new as usize % mem::align_of::<$int_type>() == 0); + debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + + // SAFETY: the caller must uphold the safety contract for `atomic_compare_exchange`. + unsafe { + let mut r: usize; + // compare_exchange is always SeqCst. + asm!( + // Implement sub-word atomic operations using word-sized CAS loop. + // Based on assemblies generated by rustc/LLVM. + // Refs: + // - https://github.com/llvm/llvm-project/blob/03c066ab134f02289df1b61db00294c1da579f9c/llvm/lib/CodeGen/AtomicExpandPass.cpp#L677 + "llh %r0, 0(%r3)", + "lh %r1, 0(%r4)", + "risbg %r3, %r2, 0, 189, 0", + "l %r4, 0(%r3)", + "sll %r2, 3", + "lcr %r14, %r2", + "2:", + "rll %r13, %r4, 16(%r2)", + "risbg %r1, %r13, 32, 47, 0", + "llhr %r13, %r13", + "cr %r13, %r0", + "jlh 3f", + "rll %r12, %r1, -16(%r14)", + "cs %r4, %r12, 0(%r3)", + "jl 2b", + "3:", + // store result to r2 as true(1) or false(0) + "ipm %r0", + "afi %r0, -268435456", + "risbg %r2, %r0, 63, 191, 33", + "sth %r13, 0(%r5)", + out("r0") _, + out("r1") _, + inout("r2") dst => r, + inout("r3") old => _, + inout("r4") new => _, + in("r5") out, + out("r12") _, + out("r13") _, + out("r14") _, + options(nostack), + ); + debug_assert!(r == 0 || r == 1, "r={}", r); + r != 0 + } + } + } }; } @@ -365,6 +532,57 @@ macro_rules! atomic128 { } } } + impl AtomicCompareExchange for $int_type { + #[inline] + unsafe fn atomic_compare_exchange( + dst: *mut MaybeUninit, + old: *const MaybeUninit, + new: *const MaybeUninit, + out: *mut MaybeUninit, + _success: Ordering, + _failure: Ordering, + ) -> bool { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + debug_assert!(old as usize % mem::align_of::<$int_type>() == 0); + debug_assert!(new as usize % mem::align_of::<$int_type>() == 0); + debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + + // SAFETY: the caller must uphold the safety contract for `atomic_compare_exchange`. + unsafe { + let mut r: usize; + // compare_exchange is always SeqCst. + asm!( + // load from old/new to r0-r1/r12-r13 pairs + "lg %r1, 8({old})", + "lg %r0, 0({old})", + "lg %r13, 8({new})", + "lg %r12, 0({new})", + // (atomic) compare and exchange + "cdsg %r0, %r12, 0({dst})", + // store result to r as true(1) or false(0) + "ipm {r}", + "afi {r}, -268435456", + "risbg {r}, {r}, 63, 191, 33", + // store r0-r1 pair to out + "stg %r1, 8({out})", + "stg %r0, 0({out})", + dst = in(reg) dst, + old = in(reg) old, + new = in(reg) new, + out = inout(reg) out => _, + r = lateout(reg) r, + // lpq loads value into even/odd pair of specified register and subsequent register. + out("r0") _, // old (hi) -> out (hi) + out("r1") _, // old (lo) -> out (lo) + out("r12") _, // new (hi) + out("r13") _, // new (hi) + options(nostack), + ); + debug_assert!(r == 0 || r == 1, "r={}", r); + r != 0 + } + } + } }; } diff --git a/src/arch/x86.rs b/src/arch/x86.rs index cf8d2583..d70e33db 100644 --- a/src/arch/x86.rs +++ b/src/arch/x86.rs @@ -1,6 +1,6 @@ // Generated asm: -// - x86_64 https://godbolt.org/z/vMMbTGeM1 -// - x86_64 (+cmpxchg16b) https://godbolt.org/z/hr8o9rPP3 +// - x86_64 https://godbolt.org/z/aW7vsqr9h +// - x86_64 (+cmpxchg16b) https://godbolt.org/z/jdYrfddjf use core::{ arch::asm, @@ -8,7 +8,7 @@ use core::{ sync::atomic::Ordering, }; -use crate::raw::{AtomicLoad, AtomicStore, AtomicSwap}; +use crate::raw::{AtomicCompareExchange, AtomicLoad, AtomicStore, AtomicSwap}; #[cfg(target_pointer_width = "32")] macro_rules! ptr_modifier { @@ -24,7 +24,7 @@ macro_rules! ptr_modifier { } macro_rules! atomic { - ($int_type:ident, $val_reg:tt, $val_modifier:tt, $ptr_size:tt) => { + ($int_type:ident, $val_reg:tt, $val_modifier:tt, $ptr_size:tt, $cmpxchg_cmp_reg:tt) => { impl AtomicLoad for $int_type { #[inline] unsafe fn atomic_load( @@ -125,37 +125,85 @@ macro_rules! atomic { } } } + impl AtomicCompareExchange for $int_type { + #[inline] + unsafe fn atomic_compare_exchange( + dst: *mut MaybeUninit, + old: *const MaybeUninit, + new: *const MaybeUninit, + out: *mut MaybeUninit, + _success: Ordering, + _failure: Ordering, + ) -> bool { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + debug_assert!(old as usize % mem::align_of::<$int_type>() == 0); + debug_assert!(new as usize % mem::align_of::<$int_type>() == 0); + debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + + // SAFETY: the caller must uphold the safety contract for `atomic_compare_exchange`. + // + // Refs: https://www.felixcloutier.com/x86/cmpxchg + unsafe { + let r: u8; + // compare_exchange is always SeqCst. + asm!( + // load from old/new to $cmpxchg_cmp_reg/tmp_new + concat!("mov ", $cmpxchg_cmp_reg, ", ", $ptr_size, " ptr [{old", ptr_modifier!(), "}]"), + concat!("mov {tmp_new", $val_modifier, "}, ", $ptr_size, " ptr [{new", ptr_modifier!(), "}]"), + // (atomic) compare and exchange + // - Compare $cmpxchg_cmp_reg with dst. + // - If equal, ZF is set and tmp_new is loaded into dst. + // - Else, clear ZF and load dst into $cmpxchg_cmp_reg. + concat!("lock cmpxchg ", $ptr_size, " ptr [{dst", ptr_modifier!(), "}], {tmp_new", $val_modifier, "}"), + // load ZF to dl + "sete {r}", + // store $cmpxchg_cmp_reg to out + concat!("mov ", $ptr_size, " ptr [{out", ptr_modifier!(), "}], ", $cmpxchg_cmp_reg, ""), + dst = in(reg) dst, + old = in(reg) old, + new = in(reg) new, + out = in(reg) out, + tmp_new = out($val_reg) _, + r = out(reg_byte) r, + out($cmpxchg_cmp_reg) _, + options(nostack), + ); + debug_assert!(r == 0 || r == 1, "r={}", r); + r != 0 + } + } + } }; } -atomic!(i8, reg_byte, "", "byte"); -atomic!(u8, reg_byte, "", "byte"); -atomic!(i16, reg, ":x", "word"); -atomic!(u16, reg, ":x", "word"); -atomic!(i32, reg, ":e", "dword"); -atomic!(u32, reg, ":e", "dword"); +atomic!(i8, reg_byte, "", "byte", "al"); +atomic!(u8, reg_byte, "", "byte", "al"); +atomic!(i16, reg, ":x", "word", "ax"); +atomic!(u16, reg, ":x", "word", "ax"); +atomic!(i32, reg, ":e", "dword", "eax"); +atomic!(u32, reg, ":e", "dword", "eax"); #[cfg(target_arch = "x86_64")] -atomic!(i64, reg, "", "qword"); +atomic!(i64, reg, "", "qword", "rax"); #[cfg(target_arch = "x86_64")] -atomic!(u64, reg, "", "qword"); +atomic!(u64, reg, "", "qword", "rax"); #[cfg(target_pointer_width = "32")] -atomic!(isize, reg, ":e", "dword"); +atomic!(isize, reg, ":e", "dword", "eax"); #[cfg(target_pointer_width = "32")] -atomic!(usize, reg, ":e", "dword"); +atomic!(usize, reg, ":e", "dword", "eax"); #[cfg(target_pointer_width = "64")] -atomic!(isize, reg, "", "qword"); +atomic!(isize, reg, "", "qword", "rax"); #[cfg(target_pointer_width = "64")] -atomic!(usize, reg, "", "qword"); +atomic!(usize, reg, "", "qword", "rax"); #[cfg(target_arch = "x86_64")] macro_rules! atomic128 { ($int_type:ident) => { #[cfg(target_pointer_width = "32")] - atomic128!($int_type, "edi", "esi", "r8d"); + atomic128!($int_type, "edi", "esi", "r8d", "edx"); #[cfg(target_pointer_width = "64")] - atomic128!($int_type, "rdi", "rsi", "r8"); + atomic128!($int_type, "rdi", "rsi", "r8", "rdx"); }; - ($int_type:ident, $rdi:tt, $rsi:tt, $r8:tt) => { + ($int_type:ident, $rdi:tt, $rsi:tt, $r8:tt, $rdx:tt) => { #[cfg(any(target_feature = "cmpxchg16b", atomic_maybe_uninit_target_feature = "cmpxchg16b"))] impl AtomicLoad for $int_type { #[inline] @@ -313,6 +361,69 @@ macro_rules! atomic128 { } } } + #[cfg(any(target_feature = "cmpxchg16b", atomic_maybe_uninit_target_feature = "cmpxchg16b"))] + impl AtomicCompareExchange for $int_type { + #[inline] + unsafe fn atomic_compare_exchange( + dst: *mut MaybeUninit, + old: *const MaybeUninit, + new: *const MaybeUninit, + out: *mut MaybeUninit, + _success: Ordering, + _failure: Ordering, + ) -> bool { + debug_assert!(dst as usize % mem::size_of::<$int_type>() == 0); + debug_assert!(old as usize % mem::align_of::<$int_type>() == 0); + debug_assert!(new as usize % mem::align_of::<$int_type>() == 0); + debug_assert!(out as usize % mem::align_of::<$int_type>() == 0); + + // SAFETY: the caller must guarantee that `dst` is valid for both writes and + // reads, 16-byte aligned, and that there are no concurrent non-atomic operations. + // cfg guarantees that the CPU supports cmpxchg16b. + // + // If the value at `dst` (destination operand) and rdx:rax are equal, the + // 128-bit value in rcx:rbx is stored in the `dst`, otherwise the value at + // `dst` is loaded to rdx:rax. + // + // The ZF flag is set if the value at `dst` and rdx:rax are equal, + // otherwise it is cleared. Other flags are unaffected. + // + // Refs: https://www.felixcloutier.com/x86/cmpxchg8b:cmpxchg16b + unsafe { + let mut r: u64; + // compare_exchange is always SeqCst. + asm!( + // rbx is reserved by LLVM + "xchg {rbx_tmp}, rbx", + concat!("mov rax, qword ptr [", $rsi, "]"), + concat!("mov rsi, qword ptr [", $rsi, " + 8]"), + concat!("mov rbx, qword ptr [", $rdx, "]"), + concat!("mov rcx, qword ptr [", $rdx, " + 8]"), + "mov rdx, rsi", + // (atomic) compare and exchange + concat!("lock cmpxchg16b xmmword ptr [", $rdi, "]"), + "sete cl", + // store previous value to out + concat!("mov qword ptr [", $r8, "], rax"), + concat!("mov qword ptr [", $r8, " + 8], rdx"), + // restore rbx + "mov rbx, {rbx_tmp}", + rbx_tmp = out(reg) _, + out("rax") _, + out("rcx") r, + lateout("rdx") _, + lateout("rsi") _, + in($rdi) dst, + in($rsi) old, + in($rdx) new, + in($r8) out, + options(nostack), + ); + debug_assert!(r as u8 == 0 || r as u8 == 1, "r={}", r as u8); + r as u8 != 0 + } + } + } }; } diff --git a/src/lib.rs b/src/lib.rs index 503dd64c..1497d21c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,24 +11,24 @@ This crate provides a way to soundly perform such operations. Currently, x86, x86_64, ARM (v6-m, v7+), AArch64, RISC-V, MIPS32r2, MIPS64r2, PowerPC, and s390x are supported. -| target_arch | primitives | [load]/[store] | [swap] | -| --------------------------------- | --------------------------------------------------- |:--------------:|:------:| -| x86 | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓ | -| x86_64 | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | -| x86_64 (+cmpxchg16b) | i128,u128 | ✓ | ✓ | -| arm (v6-m, v7+) | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓\[1] | -| arm (v7-a) | i64,u64 | ✓ | ✓ | -| aarch64 \[2] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64,i128,u128 | ✓ | ✓ | -| riscv32 | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓\[1] | -| riscv64 | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓\[1] | -| mips \[3] | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓ | -| mips64 \[3] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | -| powerpc \[3] | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓ | -| powerpc64 \[3] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | -| powerpc64 (le or pwr8+) \[3] \[4] | i128,u128 | ✓ | ✓ | -| s390x \[3] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64,i128,u128 | ✓ | ✓ | - -\[1] ARM's atomic swap is not available on v6-m (thumbv6m). RISC-V's atomic swap is not available on targets without the A (or G) extension such as riscv32i, riscv32imc, etc.
+| target_arch | primitives | load/store | RMW | +| --------------------------------- | --------------------------------------------------- |:----------:|:-----:| +| x86 | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓ | +| x86_64 | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | +| x86_64 (+cmpxchg16b) | i128,u128 | ✓ | ✓ | +| arm (v6-m, v7+) | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓\[1] | +| arm (v7-a) | i64,u64 | ✓ | ✓ | +| aarch64 \[2] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64,i128,u128 | ✓ | ✓ | +| riscv32 | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓\[1] | +| riscv64 | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓\[1] | +| mips \[3] | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓ | +| mips64 \[3] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | +| powerpc \[3] | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓ | +| powerpc64 \[3] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | +| powerpc64 (le or pwr8+) \[3] \[4] | i128,u128 | ✓ | ✓ | +| s390x \[3] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64,i128,u128 | ✓ | ✓ | + +\[1] ARM's atomic RMW operations are not available on v6-m (thumbv6m). RISC-V's atomic RMW operations are not available on targets without the A (or G) extension such as riscv32i, riscv32imc, etc.
\[2] If target features such as `lse` and `lse2` are enabled at compile-time, more efficient instructions are used.
\[3] Requires nightly due to `#![feature(asm_experimental_arch)]`.
\[4] target-cpu `pwr8`, `pwr9`, or `pwr10`.
@@ -40,9 +40,6 @@ Feel free to submit an issue if your target is not supported yet. - [portable-atomic]: Portable atomic types including support for 128-bit atomics, atomic float, etc. - [atomic-memcpy]: Byte-wise atomic memcpy. -[load]: https://docs.rs/atomic-maybe-uninit/latest/atomic_maybe_uninit/struct.AtomicMaybeUninit.html#method.load -[store]: https://docs.rs/atomic-maybe-uninit/latest/atomic_maybe_uninit/struct.AtomicMaybeUninit.html#method.store -[swap]: https://docs.rs/atomic-maybe-uninit/latest/atomic_maybe_uninit/struct.AtomicMaybeUninit.html#method.swap [atomic-memcpy]: https://github.com/taiki-e/atomic-memcpy [portable-atomic]: https://github.com/taiki-e/portable-atomic [undefined-behavior]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html @@ -74,7 +71,13 @@ Feel free to submit an issue if your target is not supported yet. clippy::transmute_undefined_repr, clippy::undocumented_unsafe_blocks )] -#![allow(clippy::doc_markdown, clippy::module_inception, clippy::type_complexity)] +#![allow( + clippy::doc_markdown, + clippy::missing_errors_doc, + clippy::module_inception, + clippy::too_many_lines, + clippy::type_complexity +)] #![cfg_attr( not(any( target_arch = "x86", @@ -110,7 +113,7 @@ use core::{ sync::atomic::Ordering, }; -use crate::raw::{AtomicLoad, AtomicStore, AtomicSwap, Primitive}; +use crate::raw::{AtomicCompareExchange, AtomicLoad, AtomicStore, AtomicSwap, Primitive}; /// A potentially uninitialized integer type which can be safely shared between threads. /// @@ -328,6 +331,327 @@ impl AtomicMaybeUninit { unsafe { T::atomic_swap(self.v.get(), &val, &mut out, order) } out } + + /// Stores a value into the atomic integer if the current value is the same as + /// the `current` value. + /// + /// The return value is a result indicating whether the new value was written and + /// containing the previous value. On success this value is guaranteed to be equal to + /// `current`. + /// + /// `compare_exchange` takes two [`Ordering`] arguments to describe the memory + /// ordering of this operation. `success` describes the required ordering for the + /// read-modify-write operation that takes place if the comparison with `current` succeeds. + /// `failure` describes the required ordering for the load operation that takes place when + /// the comparison fails. Using [`Acquire`] as success ordering makes the store part + /// of this operation [`Relaxed`], and using [`Release`] makes the successful load + /// [`Relaxed`]. The failure ordering can only be [`SeqCst`], [`Acquire`] or [`Relaxed`]. + /// + /// # Panics + /// + /// Panics if `failure` is [`Release`], [`AcqRel`]. + /// + /// # Notes + /// + /// Comparison of two values containing uninitialized bytes may fail even if + /// they are equivalent as Rust's type, because their contents are not frozen + /// until a pointer to the value containing uninitialized bytes is passed to `asm!`. + /// + /// For example, the following example could be an infinite loop: + /// + /// ```no_run + /// use std::{ + /// mem::{self, MaybeUninit}, + /// sync::atomic::Ordering, + /// }; + /// + /// use atomic_maybe_uninit::AtomicMaybeUninit; + /// + /// #[derive(Clone, Copy, PartialEq, Eq)] + /// #[repr(C, align(4))] + /// struct Test(u8, u16); + /// + /// unsafe { + /// let x = mem::transmute::<_, MaybeUninit>(Test(0, 0)); + /// let v = AtomicMaybeUninit::new(x); + /// while v + /// .compare_exchange( + /// mem::transmute::<_, MaybeUninit>(Test(0, 0)), + /// mem::transmute::<_, MaybeUninit>(Test(1, 0)), + /// Ordering::AcqRel, + /// Ordering::Acquire, + /// ) + /// .is_err() + /// {} + /// } + /// ``` + /// + /// To work around this problem, you need to use a helper like the following. + /// + /// ``` + /// # if cfg!(valgrind) { return; } + /// # use std::{ + /// # mem::{self, MaybeUninit}, + /// # sync::atomic::Ordering, + /// # }; + /// # use atomic_maybe_uninit::AtomicMaybeUninit; + /// # #[derive(Clone, Copy, PartialEq, Eq)] + /// # #[repr(C, align(4))] + /// # struct Test(u8, u16); + /// // Adapted from https://github.com/crossbeam-rs/crossbeam/blob/crossbeam-utils-0.8.10/crossbeam-utils/src/atomic/atomic_cell.rs#L1081-L1110 + /// unsafe fn atomic_compare_exchange( + /// v: &AtomicMaybeUninit, + /// mut current: Test, + /// new: Test, + /// ) -> Result { + /// let mut current_raw = mem::transmute::<_, MaybeUninit>(current); + /// let new_raw = mem::transmute::<_, MaybeUninit>(new); + /// loop { + /// match v.compare_exchange_weak(current_raw, new_raw, Ordering::AcqRel, Ordering::Acquire) + /// { + /// Ok(_) => break Ok(current), + /// Err(previous_raw) => { + /// let previous = mem::transmute_copy(&previous_raw); + /// + /// if !Test::eq(&previous, ¤t) { + /// break Err(previous); + /// } + /// + /// // The compare-exchange operation has failed and didn't store `new`. The + /// // failure is either spurious, or `previous` was semantically equal to + /// // `current` but not byte-equal. Let's retry with `previous` as the new + /// // `current`. + /// current = previous; + /// current_raw = previous_raw; + /// } + /// } + /// } + /// } + /// + /// unsafe { + /// let x = mem::transmute::<_, MaybeUninit>(Test(0, 0)); + /// let v = AtomicMaybeUninit::new(x); + /// while atomic_compare_exchange(&v, Test(0, 0), Test(1, 0)).is_err() {} + /// } + /// ``` + /// + /// See [crossbeam-rs/crossbeam#315](https://github.com/crossbeam-rs/crossbeam/issues/315) for more details. + /// + /// Also, Valgrind reports "Conditional jump or move depends on uninitialized value(s)" + /// error if there is such comparison. + /// + /// # Examples + /// + /// ``` + /// use std::{mem::MaybeUninit, sync::atomic::Ordering}; + /// + /// use atomic_maybe_uninit::AtomicMaybeUninit; + /// + /// unsafe { + /// let v = AtomicMaybeUninit::from(5_i32); + /// + /// assert_eq!( + /// v.compare_exchange( + /// MaybeUninit::new(5), + /// MaybeUninit::new(10), + /// Ordering::Acquire, + /// Ordering::Relaxed + /// ) + /// .unwrap() + /// .assume_init(), + /// 5 + /// ); + /// assert_eq!(v.load(Ordering::Relaxed).assume_init(), 10); + /// + /// assert_eq!( + /// v.compare_exchange( + /// MaybeUninit::new(6), + /// MaybeUninit::new(12), + /// Ordering::SeqCst, + /// Ordering::Acquire + /// ) + /// .unwrap_err() + /// .assume_init(), + /// 10 + /// ); + /// assert_eq!(v.load(Ordering::Relaxed).assume_init(), 10); + /// } + /// ``` + #[inline] + pub fn compare_exchange( + &self, + current: MaybeUninit, + new: MaybeUninit, + success: Ordering, + failure: Ordering, + ) -> Result, MaybeUninit> + where + T: AtomicCompareExchange, + { + utils::assert_compare_exchange_ordering(success, failure); + let mut out = MaybeUninit::::uninit(); + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + let res = unsafe { + T::atomic_compare_exchange(self.v.get(), ¤t, &new, &mut out, success, failure) + }; + if res { + Ok(out) + } else { + Err(out) + } + } + + /// Stores a value into the atomic integer if the current value is the same as + /// the `current` value. + /// + /// This function is allowed to spuriously fail even when the comparison succeeds, + /// which can result in more efficient code on some platforms. The return value + /// is a result indicating whether the new value was written and containing + /// the previous value. + /// + /// `compare_exchange_weak` takes two [`Ordering`] arguments to describe the memory + /// ordering of this operation. `success` describes the required ordering for the + /// read-modify-write operation that takes place if the comparison with `current` succeeds. + /// `failure` describes the required ordering for the load operation that takes place when + /// the comparison fails. Using [`Acquire`] as success ordering makes the store part + /// of this operation [`Relaxed`], and using [`Release`] makes the successful load + /// [`Relaxed`]. The failure ordering can only be [`SeqCst`], [`Acquire`] or [`Relaxed`]. + /// + /// # Panics + /// + /// Panics if `failure` is [`Release`], [`AcqRel`]. + /// + /// # Notes + /// + /// Comparison of two values containing uninitialized bytes may fail even if + /// they are equivalent as Rust's type, because their contents are not frozen + /// until a pointer to the value containing uninitialized bytes is passed to `asm!`. + /// + /// See [`compare_exchange`](Self::compare_exchange) for details. + /// + /// # Examples + /// + /// ``` + /// use std::{mem::MaybeUninit, sync::atomic::Ordering}; + /// + /// use atomic_maybe_uninit::AtomicMaybeUninit; + /// + /// let v = AtomicMaybeUninit::from(5_i32); + /// + /// unsafe { + /// let mut old = v.load(Ordering::Relaxed); + /// loop { + /// let new = old.assume_init() * 2; + /// match v.compare_exchange_weak( + /// old, + /// MaybeUninit::new(new), + /// Ordering::SeqCst, + /// Ordering::Relaxed, + /// ) { + /// Ok(_) => break, + /// Err(x) => old = x, + /// } + /// } + /// } + /// ``` + #[inline] + pub fn compare_exchange_weak( + &self, + current: MaybeUninit, + new: MaybeUninit, + success: Ordering, + failure: Ordering, + ) -> Result, MaybeUninit> + where + T: AtomicCompareExchange, + { + utils::assert_compare_exchange_ordering(success, failure); + let mut out = MaybeUninit::::uninit(); + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + let res = unsafe { + T::atomic_compare_exchange_weak( + self.v.get(), + ¤t, + &new, + &mut out, + success, + failure, + ) + }; + if res { + Ok(out) + } else { + Err(out) + } + } + + /// Fetches the value, and applies a function to it that returns an optional + /// new value. Returns a `Result` of `Ok(previous_value)` if the function returned `Some(_)`, else + /// `Err(previous_value)`. + /// + /// Note: This may call the function multiple times if the value has been changed from other threads in + /// the meantime, as long as the function returns `Some(_)`, but the function will have been applied + /// only once to the stored value. + /// + /// `fetch_update` takes two [`Ordering`] arguments to describe the memory ordering of this operation. + /// The first describes the required ordering for when the operation finally succeeds while the second + /// describes the required ordering for loads. These correspond to the success and failure orderings of + /// [`compare_exchange`](Self::compare_exchange) respectively. + /// + /// Using [`Acquire`] as success ordering makes the store part + /// of this operation [`Relaxed`], and using [`Release`] makes the final successful load + /// [`Relaxed`]. The (failed) load ordering can only be [`SeqCst`], [`Acquire`] or [`Relaxed`]. + /// + /// # Panics + /// + /// Panics if `fetch_order` is [`Release`], [`AcqRel`]. + /// + /// # Examples + /// + /// ``` + /// use std::{mem::MaybeUninit, sync::atomic::Ordering}; + /// + /// use atomic_maybe_uninit::AtomicMaybeUninit; + /// + /// unsafe { + /// let v = AtomicMaybeUninit::from(5_i32); + /// assert_eq!( + /// v.fetch_update(Ordering::SeqCst, Ordering::SeqCst, |_| None).unwrap_err().assume_init(), + /// 5 + /// ); + /// assert_eq!( + /// v.fetch_update(Ordering::SeqCst, Ordering::SeqCst, |x| Some(MaybeUninit::new( + /// x.assume_init() + 1 + /// ))) + /// .unwrap() + /// .assume_init(), + /// 5 + /// ); + /// assert_eq!(v.load(Ordering::SeqCst).assume_init(), 6); + /// } + /// ``` + #[inline] + pub fn fetch_update( + &self, + set_order: Ordering, + fetch_order: Ordering, + mut f: F, + ) -> Result, MaybeUninit> + where + F: FnMut(MaybeUninit) -> Option>, + T: AtomicCompareExchange, + { + let mut prev = self.load(fetch_order); + while let Some(next) = f(prev) { + match self.compare_exchange_weak(prev, next, set_order, fetch_order) { + x @ Ok(_) => return x, + Err(next_prev) => prev = next_prev, + } + } + Err(prev) + } } macro_rules! int { diff --git a/src/raw.rs b/src/raw.rs index 9d5269c5..ed3677fc 100644 --- a/src/raw.rs +++ b/src/raw.rs @@ -7,6 +7,8 @@ use core::{ }; use core::{mem::MaybeUninit, sync::atomic::Ordering}; +// TODO: merge traits and rename to AtomicLoadStore and AtomicRMW? + /// Primitive types that may support atomic operations. /// /// This trait is sealed and cannot be implemented for types outside of `atomic-maybe-uninit`. @@ -120,3 +122,123 @@ pub trait AtomicSwap: AtomicLoad + AtomicStore { order: Ordering, ); } + +/// Atomic compare and exchange. +/// +/// This trait is sealed and cannot be implemented for types outside of `atomic-maybe-uninit`. +pub trait AtomicCompareExchange: AtomicLoad + AtomicStore { + /// Stores a value from `new` into `dst` if the current value is the same as + /// the value at `current`, writes the previous value to `out`. + /// + /// The return value is a result indicating whether the new value was written and + /// containing the previous value. On success the value at `out` is guaranteed to be equal to + /// the value at `current`. + /// + /// `atomic_compare_exchange` takes two [`Ordering`] arguments to describe the memory + /// ordering of this operation. `success` describes the required ordering for the + /// read-modify-write operation that takes place if the comparison with `current` succeeds. + /// `failure` describes the required ordering for the load operation that takes place when + /// the comparison fails. Using [`Acquire`] as success ordering makes the store part + /// of this operation [`Relaxed`], and using [`Release`] makes the successful load + /// [`Relaxed`]. The failure ordering can only be [`SeqCst`], [`Acquire`] or [`Relaxed`]. + /// + /// # Safety + /// + /// Behavior is undefined if any of the following conditions are violated: + /// + /// - `dst` must be valid for both reads and writes. + /// - `dst` must be properly aligned **to the size of `Self`**. + /// (For example, if `Self` is `u128`, `dst` must be aligned to 16-byte even if the alignment of `u128` is 8-byte.) + /// - `dst` must go through [`UnsafeCell::get`]. + /// - `dst` must *not* overlap with `current`, `new`, or `out`. + /// - `current` must be valid for reads. + /// - `current` must be properly aligned. + /// - `new` must be valid for reads. + /// - `new` must be properly aligned. + /// - `out` must be valid for writes. + /// - `out` must be properly aligned. + /// - `success` must be [`SeqCst`], [`AcqRel`], [`Acquire`], [`Release`], or [`Relaxed`]. + /// - `failure` must be [`SeqCst`], [`Acquire`], or [`Relaxed`]. + /// + /// The rules for the validity of pointer follow [the rules applied to + /// functions exposed by the standard library's `ptr` module][validity], + /// except that concurrent atomic operations on `dst` are allowed. + /// + /// [validity]: core::ptr#safety + /// + /// # Notes + /// + /// Comparison of two values containing uninitialized bytes may fail even if + /// they are equivalent as Rust's type, because their contents are not frozen + /// until a pointer to the value containing uninitialized bytes is passed to `asm!`. + /// + /// See [`AtomicMaybeUninit::compare_exchange`](crate::AtomicMaybeUninit::compare_exchange) for details. + unsafe fn atomic_compare_exchange( + dst: *mut MaybeUninit, + current: *const MaybeUninit, + new: *const MaybeUninit, + out: *mut MaybeUninit, + success: Ordering, + failure: Ordering, + ) -> bool; + + /// Stores a value from `new` into `dst` if the current value is the same as + /// the value at `current`, writes the previous value to `out`. + /// + /// This function is allowed to spuriously fail even when the comparison succeeds, + /// which can result in more efficient code on some platforms. The return value + /// is a result indicating whether the new value was written and containing + /// the previous value. + /// + /// `atomic_compare_exchange_weak` takes two [`Ordering`] arguments to describe the memory + /// ordering of this operation. `success` describes the required ordering for the + /// read-modify-write operation that takes place if the comparison with `current` succeeds. + /// `failure` describes the required ordering for the load operation that takes place when + /// the comparison fails. Using [`Acquire`] as success ordering makes the store part + /// of this operation [`Relaxed`], and using [`Release`] makes the successful load + /// [`Relaxed`]. The failure ordering can only be [`SeqCst`], [`Acquire`] or [`Relaxed`]. + /// + /// # Safety + /// + /// Behavior is undefined if any of the following conditions are violated: + /// + /// - `dst` must be valid for both reads and writes. + /// - `dst` must be properly aligned **to the size of `Self`**. + /// (For example, if `Self` is `u128`, `dst` must be aligned to 16-byte even if the alignment of `u128` is 8-byte.) + /// - `dst` must go through [`UnsafeCell::get`]. + /// - `dst` must *not* overlap with `current`, `new`, or `out`. + /// - `current` must be valid for reads. + /// - `current` must be properly aligned. + /// - `new` must be valid for reads. + /// - `new` must be properly aligned. + /// - `out` must be valid for writes. + /// - `out` must be properly aligned. + /// - `success` must be [`SeqCst`], [`AcqRel`], [`Acquire`], [`Release`], or [`Relaxed`]. + /// - `failure` must be [`SeqCst`], [`Acquire`], or [`Relaxed`]. + /// + /// The rules for the validity of pointer follow [the rules applied to + /// functions exposed by the standard library's `ptr` module][validity], + /// except that concurrent atomic operations on `dst` are allowed. + /// + /// [validity]: core::ptr#safety + /// + /// # Notes + /// + /// Comparison of two values containing uninitialized bytes may fail even if + /// they are equivalent as Rust's type, because their contents are not frozen + /// until a pointer to the value containing uninitialized bytes is passed to `asm!`. + /// + /// See [`AtomicMaybeUninit::compare_exchange`](crate::AtomicMaybeUninit::compare_exchange) for details. + #[inline] + unsafe fn atomic_compare_exchange_weak( + dst: *mut MaybeUninit, + current: *const MaybeUninit, + new: *const MaybeUninit, + out: *mut MaybeUninit, + success: Ordering, + failure: Ordering, + ) -> bool { + // SAFETY: the caller must uphold the safety contract for `atomic_compare_exchange_weak`. + unsafe { Self::atomic_compare_exchange(dst, current, new, out, success, failure) } + } +} diff --git a/src/tests/helper.rs b/src/tests/helper.rs index 0d52e56b..ed636b55 100644 --- a/src/tests/helper.rs +++ b/src/tests/helper.rs @@ -18,7 +18,7 @@ macro_rules! test_atomic { #[allow(clippy::undocumented_unsafe_blocks)] mod [] { __test_atomic!(load_store, $int_type); - __test_atomic!(swap, $int_type); + __test_atomic!(rmw, $int_type); } } }; @@ -128,7 +128,7 @@ macro_rules! __test_atomic { } } }; - (swap, $int_type:ident) => { + (rmw, $int_type:ident) => { #[test] fn swap() { unsafe { @@ -145,6 +145,135 @@ macro_rules! __test_atomic { } } } + #[test] + fn compare_exchange() { + unsafe { + let a = AtomicMaybeUninit::<$int_type>::new(MaybeUninit::new(5)); + test_compare_exchange_ordering(|success, failure| { + a.compare_exchange(MaybeUninit::new(5), MaybeUninit::new(5), success, failure) + }); + for (success, failure) in compare_exchange_orderings() { + let a = AtomicMaybeUninit::<$int_type>::new(MaybeUninit::new(5)); + assert_eq!( + a.compare_exchange( + MaybeUninit::new(5), + MaybeUninit::new(10), + success, + failure + ) + .unwrap() + .assume_init(), + 5 + ); + assert_eq!(a.load(Ordering::Relaxed).assume_init(), 10); + assert_eq!( + a.compare_exchange( + MaybeUninit::new(6), + MaybeUninit::new(12), + success, + failure + ) + .unwrap_err() + .assume_init(), + 10 + ); + assert_eq!(a.load(Ordering::Relaxed).assume_init(), 10); + + if !cfg!(valgrind) { + let mut u = MaybeUninit::uninit(); + let a = AtomicMaybeUninit::<$int_type>::new(u); + while let Err(e) = + a.compare_exchange(u, MaybeUninit::new(10), success, failure) + { + u = e; + } + assert_eq!(a.load(Ordering::Relaxed).assume_init(), 10); + assert_eq!( + a.compare_exchange( + MaybeUninit::new(10), + MaybeUninit::uninit(), + success, + failure + ) + .unwrap() + .assume_init(), + 10 + ); + let _v = a.load(Ordering::Relaxed); + } + } + } + } + #[test] + fn compare_exchange_weak() { + unsafe { + let a = AtomicMaybeUninit::<$int_type>::new(MaybeUninit::new(5)); + test_compare_exchange_ordering(|success, failure| { + a.compare_exchange_weak( + MaybeUninit::new(5), + MaybeUninit::new(5), + success, + failure, + ) + }); + for (success, failure) in compare_exchange_orderings() { + let a = AtomicMaybeUninit::<$int_type>::new(MaybeUninit::new(4)); + assert_eq!( + a.compare_exchange_weak( + MaybeUninit::new(6), + MaybeUninit::new(8), + success, + failure + ) + .unwrap_err() + .assume_init(), + 4 + ); + let mut old = a.load(Ordering::Relaxed); + loop { + let new = MaybeUninit::new(old.assume_init() * 2); + match a.compare_exchange_weak(old, new, success, failure) { + Ok(_) => break, + Err(x) => old = x, + } + } + assert_eq!(a.load(Ordering::Relaxed).assume_init(), 8); + } + } + } + #[test] + fn fetch_update() { + unsafe { + let a = AtomicMaybeUninit::<$int_type>::new(MaybeUninit::new(7)); + test_compare_exchange_ordering(|set, fetch| { + a.fetch_update(set, fetch, |x| Some(x)) + }); + for (success, failure) in compare_exchange_orderings() { + let a = AtomicMaybeUninit::<$int_type>::new(MaybeUninit::new(7)); + assert_eq!( + a.fetch_update(success, failure, |_| None).unwrap_err().assume_init(), + 7 + ); + assert_eq!( + a.fetch_update(success, failure, |x| Some(MaybeUninit::new( + x.assume_init() + 1 + ))) + .unwrap() + .assume_init(), + 7 + ); + assert_eq!( + a.fetch_update(success, failure, |x| Some(MaybeUninit::new( + x.assume_init() + 1 + ))) + .unwrap() + .assume_init(), + 8 + ); + assert_eq!(a.load(Ordering::Relaxed).assume_init(), 9); + } + } + } #[cfg(not(all(valgrind, target_arch = "aarch64")))] // TODO: flaky ::quickcheck::quickcheck! { fn quickcheck_swap(x: $int_type, y: $int_type) -> bool { @@ -158,6 +287,83 @@ macro_rules! __test_atomic { } true } + fn quickcheck_compare_exchange(x: $int_type, y: $int_type) -> bool { + unsafe { + let z = loop { + let z = fastrand::$int_type(..); + if z != y { + break z; + } + }; + for (success, failure) in compare_exchange_orderings() { + let a = AtomicMaybeUninit::<$int_type>::new(MaybeUninit::new(x)); + assert_eq!( + a.compare_exchange( + MaybeUninit::new(x), + MaybeUninit::new(y), + success, + failure + ) + .unwrap() + .assume_init(), + x + ); + assert_eq!(a.load(Ordering::Relaxed).assume_init(), y); + assert_eq!( + a.compare_exchange( + MaybeUninit::new(z), + MaybeUninit::new(z), + success, + failure + ) + .unwrap_err() + .assume_init(), + y + ); + assert_eq!(a.load(Ordering::Relaxed).assume_init(), y); + assert_eq!( + a.compare_exchange( + MaybeUninit::new(y), + MaybeUninit::uninit(), + success, + failure + ) + .unwrap() + .assume_init(), + y + ); + let _v = a.load(Ordering::Relaxed); + } + } + true + } + fn quickcheck_fetch_update(x: $int_type, y: $int_type) -> bool { + unsafe { + let z = loop { + let z = fastrand::$int_type(..); + if z != y { + break z; + } + }; + for (success, failure) in compare_exchange_orderings() { + let a = AtomicMaybeUninit::<$int_type>::new(MaybeUninit::new(x)); + assert_eq!( + a.fetch_update(success, failure, |_| Some(MaybeUninit::new(y))) + .unwrap() + .assume_init(), + x + ); + assert_eq!( + a.fetch_update(success, failure, |_| Some(MaybeUninit::new(z))) + .unwrap() + .assume_init(), + y + ); + assert_eq!(a.load(Ordering::Relaxed).assume_init(), z); + } + } + true + } } #[test] fn stress_swap() { @@ -224,6 +430,79 @@ macro_rules! __test_atomic { .unwrap(); } } + #[test] + fn stress_compare_exchange() { + unsafe { + let iterations = + if cfg!(valgrind) && cfg!(debug_assertions) { 5_000 } else { 25_000 }; + let threads = if cfg!(debug_assertions) { 2 } else { fastrand::usize(2..=8) }; + let data1 = &(0..threads) + .map(|_| (0..iterations).map(|_| fastrand::$int_type(..)).collect::>()) + .collect::>(); + let data2 = &(0..threads) + .map(|_| (0..iterations).map(|_| fastrand::$int_type(..)).collect::>()) + .collect::>(); + let set = &data1 + .iter() + .flat_map(|v| v.iter().copied()) + .chain(data2.iter().flat_map(|v| v.iter().copied())) + .collect::>(); + let a = + &AtomicMaybeUninit::<$int_type>::from(data2[0][fastrand::usize(0..iterations)]); + std::eprintln!("threads={}", threads); + let now = &std::time::Instant::now(); + thread::scope(|s| { + for thread in 0..threads { + if thread % 2 == 0 { + s.spawn(move |_| { + let now = *now; + for i in 0..iterations { + a.store( + MaybeUninit::new(data1[thread][i]), + rand_store_ordering(), + ); + } + std::eprintln!("store end={:?}", now.elapsed()); + }); + } else { + s.spawn(|_| { + let now = *now; + let mut v = vec![0; iterations]; + for i in 0..iterations { + v[i] = a.load(rand_load_ordering()).assume_init(); + } + std::eprintln!("load end={:?}", now.elapsed()); + for v in v { + assert!(set.contains(&v), "v={}", v); + } + }); + } + s.spawn(move |_| { + let now = *now; + let mut v = vec![data2[0][0]; iterations]; + for i in 0..iterations { + let old = if i % 2 == 0 { + MaybeUninit::new(fastrand::$int_type(..)) + } else { + a.load(Ordering::Relaxed) + }; + let new = MaybeUninit::new(data2[thread][i]); + let o = rand_compare_exchange_ordering(); + match a.compare_exchange(old, new, o.0, o.1) { + Ok(r) => assert_eq!(old.assume_init(), r.assume_init()), + Err(r) => v[i] = r.assume_init(), + } + } + std::eprintln!("compare_exchange end={:?}", now.elapsed()); + for v in v { + assert!(set.contains(&v), "v={}", v); + } + }); + } + }) + .unwrap(); + } + } }; } @@ -404,6 +683,55 @@ pub(crate) fn test_swap_ordering(f: impl Fn(Ordering) -> T) f(order); } } +pub(crate) fn compare_exchange_orderings() -> [(Ordering, Ordering); 15] { + [ + (Ordering::Relaxed, Ordering::Relaxed), + (Ordering::Relaxed, Ordering::Acquire), + (Ordering::Relaxed, Ordering::SeqCst), + (Ordering::Acquire, Ordering::Relaxed), + (Ordering::Acquire, Ordering::Acquire), + (Ordering::Acquire, Ordering::SeqCst), + (Ordering::Release, Ordering::Relaxed), + (Ordering::Release, Ordering::Acquire), + (Ordering::Release, Ordering::SeqCst), + (Ordering::AcqRel, Ordering::Relaxed), + (Ordering::AcqRel, Ordering::Acquire), + (Ordering::AcqRel, Ordering::SeqCst), + (Ordering::SeqCst, Ordering::Relaxed), + (Ordering::SeqCst, Ordering::Acquire), + (Ordering::SeqCst, Ordering::SeqCst), + ] +} +pub(crate) fn rand_compare_exchange_ordering() -> (Ordering, Ordering) { + compare_exchange_orderings()[fastrand::usize(0..compare_exchange_orderings().len())] +} +pub(crate) fn test_compare_exchange_ordering( + f: impl Fn(Ordering, Ordering) -> T, +) { + for &(success, failure) in &compare_exchange_orderings() { + f(success, failure); + } + + if skip_should_panic_test() { + return; + } + for &order in &swap_orderings() { + let msg = assert_panic(|| f(order, Ordering::AcqRel)); + assert!( + msg == "there is no such thing as an acquire/release failure ordering" + || msg == "there is no such thing as an acquire/release load", + "{}", + msg + ); + let msg = assert_panic(|| f(order, Ordering::Release)); + assert!( + msg == "there is no such thing as a release failure ordering" + || msg == "there is no such thing as a release load", + "{}", + msg + ); + } +} fn skip_should_panic_test() -> bool { // Miri's panic handling is slow // MSAN false positive: https://gist.github.com/taiki-e/dd6269a8ffec46284fdc764a4849f884 diff --git a/src/utils.rs b/src/utils.rs index d373dc87..79a4b969 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -65,3 +65,34 @@ pub(crate) fn assert_swap_ordering(order: Ordering) { _ => unreachable!("{:?}", order), } } + +// https://github.com/rust-lang/rust/pull/98383 +#[inline] +pub(crate) fn assert_compare_exchange_ordering(success: Ordering, failure: Ordering) { + match success { + Ordering::AcqRel + | Ordering::Acquire + | Ordering::Relaxed + | Ordering::Release + | Ordering::SeqCst => {} + _ => unreachable!("{:?}", success), + } + match failure { + Ordering::Acquire | Ordering::Relaxed | Ordering::SeqCst => {} + Ordering::Release => panic!("there is no such thing as a release failure ordering"), + Ordering::AcqRel => panic!("there is no such thing as an acquire/release failure ordering"), + _ => unreachable!("{:?}", failure), + } +} + +// https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2016/p0418r2.html +#[allow(dead_code)] +#[inline] +pub(crate) fn upgrade_success_ordering(success: Ordering, failure: Ordering) -> Ordering { + match (success, failure) { + (Ordering::Relaxed, Ordering::Acquire) => Ordering::Acquire, + (Ordering::Release, Ordering::Acquire) => Ordering::AcqRel, + (_, Ordering::SeqCst) => Ordering::SeqCst, + _ => success, + } +} diff --git a/tests/cortex-m/src/main.rs b/tests/cortex-m/src/main.rs index 07b0acc2..ad4d74b6 100644 --- a/tests/cortex-m/src/main.rs +++ b/tests/cortex-m/src/main.rs @@ -54,6 +54,98 @@ macro_rules! __test_atomic { } } } + compare_exchange(); + fn compare_exchange() { + #[cfg(target_has_atomic = "ptr")] + unsafe { + for (success, failure) in compare_exchange_orderings() { + let a = AtomicMaybeUninit::<$int_type>::new(MaybeUninit::new(5)); + assert_eq!( + a.compare_exchange( + MaybeUninit::new(5), + MaybeUninit::new(10), + success, + failure + ) + .unwrap() + .assume_init(), + 5 + ); + assert_eq!(a.load(Ordering::Relaxed).assume_init(), 10); + assert_eq!( + a.compare_exchange( + MaybeUninit::new(6), + MaybeUninit::new(12), + success, + failure + ) + .unwrap_err() + .assume_init(), + 10 + ); + assert_eq!(a.load(Ordering::Relaxed).assume_init(), 10); + } + } + } + compare_exchange_weak(); + fn compare_exchange_weak() { + #[cfg(target_has_atomic = "ptr")] + unsafe { + for (success, failure) in compare_exchange_orderings() { + let a = AtomicMaybeUninit::<$int_type>::new(MaybeUninit::new(4)); + assert_eq!( + a.compare_exchange_weak( + MaybeUninit::new(6), + MaybeUninit::new(8), + success, + failure + ) + .unwrap_err() + .assume_init(), + 4 + ); + let mut old = a.load(Ordering::Relaxed); + loop { + let new = MaybeUninit::new(old.assume_init() * 2); + match a.compare_exchange_weak(old, new, success, failure) { + Ok(_) => break, + Err(x) => old = x, + } + } + assert_eq!(a.load(Ordering::Relaxed).assume_init(), 8); + } + } + } + fetch_update(); + fn fetch_update() { + #[cfg(target_has_atomic = "ptr")] + unsafe { + for (success, failure) in compare_exchange_orderings() { + let a = AtomicMaybeUninit::<$int_type>::new(MaybeUninit::new(7)); + assert_eq!( + a.fetch_update(success, failure, |_| None).unwrap_err().assume_init(), + 7 + ); + assert_eq!( + a.fetch_update(success, failure, |x| Some(MaybeUninit::new( + x.assume_init() + 1 + ))) + .unwrap() + .assume_init(), + 7 + ); + assert_eq!( + a.fetch_update(success, failure, |x| Some(MaybeUninit::new( + x.assume_init() + 1 + ))) + .unwrap() + .assume_init(), + 8 + ); + assert_eq!(a.load(Ordering::Relaxed).assume_init(), 9); + } + } + } }; } @@ -67,6 +159,26 @@ fn store_orderings() -> [Ordering; 3] { fn swap_orderings() -> [Ordering; 5] { [Ordering::Relaxed, Ordering::Release, Ordering::Acquire, Ordering::AcqRel, Ordering::SeqCst] } +#[cfg(target_has_atomic = "ptr")] +fn compare_exchange_orderings() -> [(Ordering, Ordering); 15] { + [ + (Ordering::Relaxed, Ordering::Relaxed), + (Ordering::Relaxed, Ordering::Acquire), + (Ordering::Relaxed, Ordering::SeqCst), + (Ordering::Acquire, Ordering::Relaxed), + (Ordering::Acquire, Ordering::Acquire), + (Ordering::Acquire, Ordering::SeqCst), + (Ordering::Release, Ordering::Relaxed), + (Ordering::Release, Ordering::Acquire), + (Ordering::Release, Ordering::SeqCst), + (Ordering::AcqRel, Ordering::Relaxed), + (Ordering::AcqRel, Ordering::Acquire), + (Ordering::AcqRel, Ordering::SeqCst), + (Ordering::SeqCst, Ordering::Relaxed), + (Ordering::SeqCst, Ordering::Acquire), + (Ordering::SeqCst, Ordering::SeqCst), + ] +} #[entry] fn main() -> ! {