From 1dbed76e9eeb25cc350e8545c8f3e6fd0ba68cc1 Mon Sep 17 00:00:00 2001 From: Taiki Endo Date: Mon, 5 Dec 2022 06:36:59 +0900 Subject: [PATCH 1/3] Add add/sub/and/or/xor methods that do not return previous value --- CHANGELOG.md | 4 + src/imp/atomic128/intrinsics.rs | 1 + src/imp/atomic128/macros.rs | 2 + src/imp/core_atomic.rs | 12 + src/imp/fallback/imp.rs | 2 + src/imp/interrupt/mod.rs | 72 ++++- src/imp/interrupt/msp430.rs | 2 + src/imp/msp430.rs | 191 ++++++++++++ src/lib.rs | 503 ++++++++++++++++++++++++++++++++ src/tests/helper.rs | 104 +++++++ src/utils.rs | 45 +++ 11 files changed, 937 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 50c9f2f9..658f1917 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,10 @@ Note: In this file, do not use the hard wrap in the middle of a sentence for com ## [Unreleased] +- Add `Atomic{I,U}*::{add,sub,and,or,xor}` and `AtomicBool::{and,or,xor}` methods. + + They are equivalent to the corresponding `fetch_*` methods, but do not return the previous value. They are intended for optimization on platforms that implement atomics using inline assembly, such as the MSP430. + - Various improvements to `portable_atomic_unsafe_assume_single_core` cfg. ([#44](https://github.com/taiki-e/portable-atomic/pull/44)) - Support disabling FIQs on pre-v6 ARM under `portable_atomic_disable_fiq` cfg. diff --git a/src/imp/atomic128/intrinsics.rs b/src/imp/atomic128/intrinsics.rs index 37895e8d..db144f0e 100644 --- a/src/imp/atomic128/intrinsics.rs +++ b/src/imp/atomic128/intrinsics.rs @@ -415,6 +415,7 @@ macro_rules! atomic128 { // SAFETY: any data races are prevented by atomic intrinsics. unsafe impl Sync for $atomic_type {} + no_fetch_ops_impl!($atomic_type, $int_type); impl $atomic_type { #[inline] pub(crate) const fn new(v: $int_type) -> Self { diff --git a/src/imp/atomic128/macros.rs b/src/imp/atomic128/macros.rs index e5b1d761..972a3bb1 100644 --- a/src/imp/atomic128/macros.rs +++ b/src/imp/atomic128/macros.rs @@ -10,6 +10,7 @@ macro_rules! atomic128 { // SAFETY: any data races are prevented by atomic intrinsics. unsafe impl Sync for $atomic_type {} + no_fetch_ops_impl!($atomic_type, $int_type); impl $atomic_type { #[inline] pub(crate) const fn new(v: $int_type) -> Self { @@ -216,6 +217,7 @@ macro_rules! atomic128 { // SAFETY: any data races are prevented by atomic intrinsics. unsafe impl Sync for $atomic_type {} + no_fetch_ops_impl!($atomic_type, $int_type); impl $atomic_type { #[inline] pub(crate) const fn new(v: $int_type) -> Self { diff --git a/src/imp/core_atomic.rs b/src/imp/core_atomic.rs index 9631c500..d0991ad9 100644 --- a/src/imp/core_atomic.rs +++ b/src/imp/core_atomic.rs @@ -41,6 +41,9 @@ impl AtomicBool { } #[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(not(portable_atomic_no_atomic_cas)))] #[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(target_has_atomic = "ptr"))] +no_fetch_ops_impl!(AtomicBool, bool); +#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(not(portable_atomic_no_atomic_cas)))] +#[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(target_has_atomic = "ptr"))] impl AtomicBool { #[inline] #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] @@ -171,6 +174,15 @@ macro_rules! atomic_int { pub(crate) struct $atomic_type { inner: core::sync::atomic::$atomic_type, } + #[cfg_attr( + portable_atomic_no_cfg_target_has_atomic, + cfg(not(portable_atomic_no_atomic_cas)) + )] + #[cfg_attr( + not(portable_atomic_no_cfg_target_has_atomic), + cfg(target_has_atomic = "ptr") + )] + no_fetch_ops_impl!($atomic_type, $int_type); impl $atomic_type { #[inline] pub(crate) const fn new(v: $int_type) -> Self { diff --git a/src/imp/fallback/imp.rs b/src/imp/fallback/imp.rs index 6e5da46c..b81c9a24 100644 --- a/src/imp/fallback/imp.rs +++ b/src/imp/fallback/imp.rs @@ -130,6 +130,8 @@ macro_rules! atomic { // SAFETY: any data races are prevented by the lock and atomic operation. unsafe impl Sync for $atomic_type {} + #[cfg(any(test, not(portable_atomic_cmpxchg16b_dynamic)))] + no_fetch_ops_impl!($atomic_type, $int_type); impl $atomic_type { #[cfg(any(test, not(portable_atomic_cmpxchg16b_dynamic)))] #[inline] diff --git a/src/imp/interrupt/mod.rs b/src/imp/interrupt/mod.rs index f46895b6..82c5a30b 100644 --- a/src/imp/interrupt/mod.rs +++ b/src/imp/interrupt/mod.rs @@ -23,7 +23,8 @@ // [^avr2]: https://github.com/llvm/llvm-project/blob/llvmorg-15.0.0/llvm/test/CodeGen/AVR/atomics/load16.ll#L5 // On some platforms, atomic load/store can be implemented in a more efficient -// way than disabling interrupts. +// way than disabling interrupts. On MSP430, some RMWs that do not return the +// previous value can also be optimized. // // Note: On single-core systems, it is okay to use critical session-based // CAS together with atomic load/store. The load/store will not be @@ -246,6 +247,33 @@ impl AtomicBool { } } +#[cfg(not(target_arch = "msp430"))] +no_fetch_ops_impl!(AtomicBool, bool); +#[cfg(target_arch = "msp430")] +impl AtomicBool { + #[inline] + pub(crate) fn and(&self, val: bool, order: Ordering) { + // SAFETY: Self and atomic::AtomicBool have the same layout, + unsafe { + (*(self as *const Self as *const atomic::AtomicBool)).and(val, order); + } + } + #[inline] + pub(crate) fn or(&self, val: bool, order: Ordering) { + // SAFETY: Self and atomic::AtomicBool have the same layout, + unsafe { + (*(self as *const Self as *const atomic::AtomicBool)).or(val, order); + } + } + #[inline] + pub(crate) fn xor(&self, val: bool, order: Ordering) { + // SAFETY: Self and atomic::AtomicBool have the same layout, + unsafe { + (*(self as *const Self as *const atomic::AtomicBool)).xor(val, order); + } + } +} + #[cfg_attr(target_pointer_width = "16", repr(C, align(2)))] #[cfg_attr(target_pointer_width = "32", repr(C, align(4)))] #[cfg_attr(target_pointer_width = "64", repr(C, align(8)))] @@ -462,10 +490,52 @@ macro_rules! atomic_int { } } } + + #[cfg(not(target_arch = "msp430"))] + no_fetch_ops_impl!($atomic_type, $int_type); + #[cfg(target_arch = "msp430")] + impl $atomic_type { + #[inline] + pub(crate) fn add(&self, val: $int_type, order: Ordering) { + // SAFETY: Self and atomic::$atomic_type have the same layout, + unsafe { + (*(self as *const Self as *const atomic::$atomic_type)).add(val, order); + } + } + #[inline] + pub(crate) fn sub(&self, val: $int_type, order: Ordering) { + // SAFETY: Self and atomic::$atomic_type have the same layout, + unsafe { + (*(self as *const Self as *const atomic::$atomic_type)).sub(val, order); + } + } + #[inline] + pub(crate) fn and(&self, val: $int_type, order: Ordering) { + // SAFETY: Self and atomic::$atomic_type have the same layout, + unsafe { + (*(self as *const Self as *const atomic::$atomic_type)).and(val, order); + } + } + #[inline] + pub(crate) fn or(&self, val: $int_type, order: Ordering) { + // SAFETY: Self and atomic::$atomic_type have the same layout, + unsafe { + (*(self as *const Self as *const atomic::$atomic_type)).or(val, order); + } + } + #[inline] + pub(crate) fn xor(&self, val: $int_type, order: Ordering) { + // SAFETY: Self and atomic::$atomic_type have the same layout, + unsafe { + (*(self as *const Self as *const atomic::$atomic_type)).xor(val, order); + } + } + } }; (load_store_critical_session, $atomic_type:ident, $int_type:ident, $align:expr) => { atomic_int!(base, $atomic_type, $int_type, $align); atomic_int!(cas, $atomic_type, $int_type); + no_fetch_ops_impl!($atomic_type, $int_type); impl $atomic_type { #[inline] #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] diff --git a/src/imp/interrupt/msp430.rs b/src/imp/interrupt/msp430.rs index 89b516c6..a81ac1e6 100644 --- a/src/imp/interrupt/msp430.rs +++ b/src/imp/interrupt/msp430.rs @@ -1,4 +1,6 @@ // Adapted from https://github.com/rust-embedded/msp430. +// +// See also src/imp/msp430.rs. #[cfg(not(portable_atomic_no_asm))] use core::arch::asm; diff --git a/src/imp/msp430.rs b/src/imp/msp430.rs index e5f48c7b..8961c7b1 100644 --- a/src/imp/msp430.rs +++ b/src/imp/msp430.rs @@ -4,6 +4,9 @@ // Including https://github.com/pftbest/msp430-atomic/pull/4 for a compile error fix. // Including https://github.com/pftbest/msp430-atomic/pull/5 for a soundness bug fix. // +// Operations not supported here are provided by disabling interrupts. +// See also src/imp/interrupt/msp430.rs. +// // Note: Ordering is always SeqCst. #[cfg(not(portable_atomic_no_asm))] @@ -106,6 +109,39 @@ impl AtomicBool { u8::atomic_store(self.v.get(), val as u8); } } + + #[inline] + #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] + pub(crate) fn and(&self, val: bool, order: Ordering) { + crate::utils::assert_swap_ordering(order); + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + unsafe { + u8::atomic_and(self.v.get(), val as u8); + } + } + + #[inline] + #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] + pub(crate) fn or(&self, val: bool, order: Ordering) { + crate::utils::assert_swap_ordering(order); + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + unsafe { + u8::atomic_or(self.v.get(), val as u8); + } + } + + #[inline] + #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] + pub(crate) fn xor(&self, val: bool, order: Ordering) { + crate::utils::assert_swap_ordering(order); + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + unsafe { + u8::atomic_xor(self.v.get(), val as u8); + } + } } #[repr(transparent)] @@ -235,6 +271,61 @@ macro_rules! atomic_int { $int_type::atomic_store(self.v.get(), val); } } + + #[inline] + #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] + pub(crate) fn add(&self, val: $int_type, order: Ordering) { + crate::utils::assert_swap_ordering(order); + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + unsafe { + $int_type::atomic_add(self.v.get(), val); + } + } + + #[inline] + #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] + pub(crate) fn sub(&self, val: $int_type, order: Ordering) { + crate::utils::assert_swap_ordering(order); + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + unsafe { + $int_type::atomic_sub(self.v.get(), val); + } + } + + #[inline] + #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] + pub(crate) fn and(&self, val: $int_type, order: Ordering) { + crate::utils::assert_swap_ordering(order); + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + unsafe { + $int_type::atomic_and(self.v.get(), val); + } + } + + #[inline] + #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] + pub(crate) fn or(&self, val: $int_type, order: Ordering) { + crate::utils::assert_swap_ordering(order); + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + unsafe { + $int_type::atomic_or(self.v.get(), val); + } + } + + #[inline] + #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] + pub(crate) fn xor(&self, val: $int_type, order: Ordering) { + crate::utils::assert_swap_ordering(order); + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + unsafe { + $int_type::atomic_xor(self.v.get(), val); + } + } } impl AtomicOperations for $int_type { @@ -277,6 +368,101 @@ macro_rules! atomic_int { ); } } + + #[inline] + unsafe fn atomic_add(dst: *mut Self, val: Self) { + // SAFETY: the caller must uphold the safety contract for `atomic_add`. + unsafe { + #[cfg(not(portable_atomic_no_asm))] + asm!( + concat!("add", $asm_suffix, " {val}, 0({dst})"), + dst = in(reg) dst, + val = in(reg) val, + options(nostack), + ); + #[cfg(portable_atomic_no_asm)] + llvm_asm!( + concat!("add", $asm_suffix, " $1, $0") + :: "*m"(dst), "ir"(val) : "memory" : "volatile" + ); + } + } + + #[inline] + unsafe fn atomic_sub(dst: *mut Self, val: Self) { + // SAFETY: the caller must uphold the safety contract for `atomic_sub`. + unsafe { + #[cfg(not(portable_atomic_no_asm))] + asm!( + concat!("sub", $asm_suffix, " {val}, 0({dst})"), + dst = in(reg) dst, + val = in(reg) val, + options(nostack), + ); + #[cfg(portable_atomic_no_asm)] + llvm_asm!( + concat!("sub", $asm_suffix, " $1, $0") + :: "*m"(dst), "ir"(val) : "memory" : "volatile" + ); + } + } + + #[inline] + unsafe fn atomic_and(dst: *mut Self, val: Self) { + // SAFETY: the caller must uphold the safety contract for `atomic_and`. + unsafe { + #[cfg(not(portable_atomic_no_asm))] + asm!( + concat!("and", $asm_suffix, " {val}, 0({dst})"), + dst = in(reg) dst, + val = in(reg) val, + options(nostack), + ); + #[cfg(portable_atomic_no_asm)] + llvm_asm!( + concat!("and", $asm_suffix, " $1, $0") + :: "*m"(dst), "ir"(val) : "memory" : "volatile" + ); + } + } + + #[inline] + unsafe fn atomic_or(dst: *mut Self, val: Self) { + // SAFETY: the caller must uphold the safety contract for `atomic_or`. + unsafe { + #[cfg(not(portable_atomic_no_asm))] + asm!( + concat!("bis", $asm_suffix, " {val}, 0({dst})"), + dst = in(reg) dst, + val = in(reg) val, + options(nostack), + ); + #[cfg(portable_atomic_no_asm)] + llvm_asm!( + concat!("bis", $asm_suffix, " $1, $0") + :: "*m"(dst), "ir"(val) : "memory" : "volatile" + ); + } + } + + #[inline] + unsafe fn atomic_xor(dst: *mut Self, val: Self) { + // SAFETY: the caller must uphold the safety contract for `atomic_xor`. + unsafe { + #[cfg(not(portable_atomic_no_asm))] + asm!( + concat!("xor", $asm_suffix, " {val}, 0({dst})"), + dst = in(reg) dst, + val = in(reg) val, + options(nostack), + ); + #[cfg(portable_atomic_no_asm)] + llvm_asm!( + concat!("xor", $asm_suffix, " $1, $0") + :: "*m"(dst), "ir"(val) : "memory" : "volatile" + ); + } + } } } } @@ -291,4 +477,9 @@ atomic_int!(usize, AtomicUsize, ".w"); trait AtomicOperations: Sized { unsafe fn atomic_load(src: *const Self) -> Self; unsafe fn atomic_store(dst: *mut Self, val: Self); + unsafe fn atomic_add(dst: *mut Self, val: Self); + unsafe fn atomic_sub(dst: *mut Self, val: Self); + unsafe fn atomic_and(dst: *mut Self, val: Self); + unsafe fn atomic_or(dst: *mut Self, val: Self); + unsafe fn atomic_xor(dst: *mut Self, val: Self); } diff --git a/src/lib.rs b/src/lib.rs index ab3e7d85..bad86292 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -818,6 +818,67 @@ impl AtomicBool { self.inner.fetch_and(val, order) } + /// Logical "and" with a boolean value. + /// + /// Performs a logical "and" operation on the current value and the argument `val`, and sets + /// the new value to the result. + /// + /// Unlike `fetch_and`, this does not return the previous value. + /// + /// `and` takes an [`Ordering`] argument which describes the memory ordering + /// of this operation. All ordering modes are possible. Note that using + /// [`Acquire`] makes the store part of this operation [`Relaxed`], and + /// using [`Release`] makes the load part [`Relaxed`]. + /// + /// This function may generate more efficient code than `fetch_and` on some platforms. + /// + /// - x86: `lock and` instead of `cmpxchg` loop + /// - MSP430: `and` instead of disabling interrupts + /// + /// Note: On x86, the use of either function should not usually + /// affect the generated code, because LLVM can properly optimize the case + /// where the result is unused. + /// + /// # Examples + /// + /// ``` + /// use portable_atomic::{AtomicBool, Ordering}; + /// + /// let foo = AtomicBool::new(true); + /// foo.and(false, Ordering::SeqCst); + /// assert_eq!(foo.load(Ordering::SeqCst), false); + /// + /// let foo = AtomicBool::new(true); + /// foo.and(true, Ordering::SeqCst); + /// assert_eq!(foo.load(Ordering::SeqCst), true); + /// + /// let foo = AtomicBool::new(false); + /// foo.and(false, Ordering::SeqCst); + /// assert_eq!(foo.load(Ordering::SeqCst), false); + /// ``` + #[cfg_attr( + portable_atomic_no_cfg_target_has_atomic, + cfg(any( + not(portable_atomic_no_atomic_cas), + portable_atomic_unsafe_assume_single_core, + target_arch = "avr", + target_arch = "msp430" + )) + )] + #[cfg_attr( + not(portable_atomic_no_cfg_target_has_atomic), + cfg(any( + target_has_atomic = "ptr", + portable_atomic_unsafe_assume_single_core, + target_arch = "avr", + target_arch = "msp430" + )) + )] + #[inline] + pub fn and(&self, val: bool, order: Ordering) { + self.inner.and(val, order); + } + /// Logical "nand" with a boolean value. /// /// Performs a logical "nand" operation on the current value and the argument `val`, and sets @@ -923,6 +984,67 @@ impl AtomicBool { self.inner.fetch_or(val, order) } + /// Logical "or" with a boolean value. + /// + /// Performs a logical "or" operation on the current value and the argument `val`, and sets the + /// new value to the result. + /// + /// Unlike `fetch_or`, this does not return the previous value. + /// + /// `or` takes an [`Ordering`] argument which describes the memory ordering + /// of this operation. All ordering modes are possible. Note that using + /// [`Acquire`] makes the store part of this operation [`Relaxed`], and + /// using [`Release`] makes the load part [`Relaxed`]. + /// + /// This function may generate more efficient code than `fetch_or` on some platforms. + /// + /// - x86: `lock or` instead of `cmpxchg` loop + /// - MSP430: `bis` instead of disabling interrupts + /// + /// Note: On x86, the use of either function should not usually + /// affect the generated code, because LLVM can properly optimize the case + /// where the result is unused. + /// + /// # Examples + /// + /// ``` + /// use portable_atomic::{AtomicBool, Ordering}; + /// + /// let foo = AtomicBool::new(true); + /// foo.or(false, Ordering::SeqCst); + /// assert_eq!(foo.load(Ordering::SeqCst), true); + /// + /// let foo = AtomicBool::new(true); + /// foo.or(true, Ordering::SeqCst); + /// assert_eq!(foo.load(Ordering::SeqCst), true); + /// + /// let foo = AtomicBool::new(false); + /// foo.or(false, Ordering::SeqCst); + /// assert_eq!(foo.load(Ordering::SeqCst), false); + /// ``` + #[cfg_attr( + portable_atomic_no_cfg_target_has_atomic, + cfg(any( + not(portable_atomic_no_atomic_cas), + portable_atomic_unsafe_assume_single_core, + target_arch = "avr", + target_arch = "msp430" + )) + )] + #[cfg_attr( + not(portable_atomic_no_cfg_target_has_atomic), + cfg(any( + target_has_atomic = "ptr", + portable_atomic_unsafe_assume_single_core, + target_arch = "avr", + target_arch = "msp430" + )) + )] + #[inline] + pub fn or(&self, val: bool, order: Ordering) { + self.inner.or(val, order); + } + /// Logical "xor" with a boolean value. /// /// Performs a logical "xor" operation on the current value and the argument `val`, and sets @@ -975,6 +1097,67 @@ impl AtomicBool { self.inner.fetch_xor(val, order) } + /// Logical "xor" with a boolean value. + /// + /// Performs a logical "xor" operation on the current value and the argument `val`, and sets + /// the new value to the result. + /// + /// Unlike `fetch_xor`, this does not return the previous value. + /// + /// `xor` takes an [`Ordering`] argument which describes the memory ordering + /// of this operation. All ordering modes are possible. Note that using + /// [`Acquire`] makes the store part of this operation [`Relaxed`], and + /// using [`Release`] makes the load part [`Relaxed`]. + /// + /// This function may generate more efficient code than `fetch_xor` on some platforms. + /// + /// - x86: `lock xor` instead of `cmpxchg` loop + /// - MSP430: `xor` instead of disabling interrupts + /// + /// Note: On x86, the use of either function should not usually + /// affect the generated code, because LLVM can properly optimize the case + /// where the result is unused. + /// + /// # Examples + /// + /// ``` + /// use portable_atomic::{AtomicBool, Ordering}; + /// + /// let foo = AtomicBool::new(true); + /// foo.xor(false, Ordering::SeqCst); + /// assert_eq!(foo.load(Ordering::SeqCst), true); + /// + /// let foo = AtomicBool::new(true); + /// foo.xor(true, Ordering::SeqCst); + /// assert_eq!(foo.load(Ordering::SeqCst), false); + /// + /// let foo = AtomicBool::new(false); + /// foo.xor(false, Ordering::SeqCst); + /// assert_eq!(foo.load(Ordering::SeqCst), false); + /// ``` + #[cfg_attr( + portable_atomic_no_cfg_target_has_atomic, + cfg(any( + not(portable_atomic_no_atomic_cas), + portable_atomic_unsafe_assume_single_core, + target_arch = "avr", + target_arch = "msp430" + )) + )] + #[cfg_attr( + not(portable_atomic_no_cfg_target_has_atomic), + cfg(any( + target_has_atomic = "ptr", + portable_atomic_unsafe_assume_single_core, + target_arch = "avr", + target_arch = "msp430" + )) + )] + #[inline] + pub fn xor(&self, val: bool, order: Ordering) { + self.inner.xor(val, order); + } + /// Logical "not" with a boolean value. /// /// Performs a logical "not" operation on the current value, and sets @@ -1023,6 +1206,63 @@ impl AtomicBool { self.fetch_xor(true, order) } + /// Logical "not" with a boolean value. + /// + /// Performs a logical "not" operation on the current value, and sets + /// the new value to the result. + /// + /// Unlike `fetch_not`, this does not return the previous value. + /// + /// `not` takes an [`Ordering`] argument which describes the memory ordering + /// of this operation. All ordering modes are possible. Note that using + /// [`Acquire`] makes the store part of this operation [`Relaxed`], and + /// using [`Release`] makes the load part [`Relaxed`]. + /// + /// This function may generate more efficient code than `fetch_not` on some platforms. + /// + /// - x86: `lock xor` instead of `cmpxchg` loop + /// - MSP430: `xor` instead of disabling interrupts + /// + /// Note: On x86, the use of either function should not usually + /// affect the generated code, because LLVM can properly optimize the case + /// where the result is unused. + /// + /// # Examples + /// + /// ``` + /// use portable_atomic::{AtomicBool, Ordering}; + /// + /// let foo = AtomicBool::new(true); + /// assert_eq!(foo.fetch_not(Ordering::SeqCst), true); + /// assert_eq!(foo.load(Ordering::SeqCst), false); + /// + /// let foo = AtomicBool::new(false); + /// assert_eq!(foo.fetch_not(Ordering::SeqCst), false); + /// assert_eq!(foo.load(Ordering::SeqCst), true); + /// ``` + #[cfg_attr( + portable_atomic_no_cfg_target_has_atomic, + cfg(any( + not(portable_atomic_no_atomic_cas), + portable_atomic_unsafe_assume_single_core, + target_arch = "avr", + target_arch = "msp430" + )) + )] + #[cfg_attr( + not(portable_atomic_no_cfg_target_has_atomic), + cfg(any( + target_has_atomic = "ptr", + portable_atomic_unsafe_assume_single_core, + target_arch = "avr", + target_arch = "msp430" + )) + )] + #[inline] + pub fn not(&self, order: Ordering) { + self.xor(true, order); + } + // TODO: Add as_mut_ptr once it is stable on std atomic types. // https://github.com/rust-lang/rust/issues/66893 @@ -2539,6 +2779,55 @@ assert_eq!(foo.load(Ordering::SeqCst), 10); } } + doc_comment! { + concat!("Adds to the current value. + +This operation wraps around on overflow. + +Unlike `fetch_add`, this does not return the previous value. + +`add` takes an [`Ordering`] argument which describes the memory ordering +of this operation. All ordering modes are possible. Note that using +[`Acquire`] makes the store part of this operation [`Relaxed`], and +using [`Release`] makes the load part [`Relaxed`]. + +This function may generate more efficient code than `fetch_add` on some platforms. + +- MSP430: `add` instead of disabling interrupts + +# Examples + +``` +use portable_atomic::{", stringify!($atomic_type), ", Ordering}; + +let foo = ", stringify!($atomic_type), "::new(0); +foo.add(10, Ordering::SeqCst); +assert_eq!(foo.load(Ordering::SeqCst), 10); +```"), + #[cfg_attr( + portable_atomic_no_cfg_target_has_atomic, + cfg(any( + not(portable_atomic_no_atomic_cas), + portable_atomic_unsafe_assume_single_core, + target_arch = "avr", + target_arch = "msp430" + )) + )] + #[cfg_attr( + not(portable_atomic_no_cfg_target_has_atomic), + cfg(any( + target_has_atomic = "ptr", + portable_atomic_unsafe_assume_single_core, + target_arch = "avr", + target_arch = "msp430" + )) + )] + #[inline] + pub fn add(&self, val: $int_type, order: Ordering) { + self.inner.add(val, order); + } + } + doc_comment! { concat!("Subtracts from the current value, returning the previous value. @@ -2582,6 +2871,55 @@ assert_eq!(foo.load(Ordering::SeqCst), 10); } } + doc_comment! { + concat!("Subtracts from the current value. + +This operation wraps around on overflow. + +Unlike `fetch_sub`, this does not return the previous value. + +`sub` takes an [`Ordering`] argument which describes the memory ordering +of this operation. All ordering modes are possible. Note that using +[`Acquire`] makes the store part of this operation [`Relaxed`], and +using [`Release`] makes the load part [`Relaxed`]. + +This function may generate more efficient code than `fetch_sub` on some platforms. + +- MSP430: `sub` instead of disabling interrupts + +# Examples + +``` +use portable_atomic::{", stringify!($atomic_type), ", Ordering}; + +let foo = ", stringify!($atomic_type), "::new(20); +foo.sub(10, Ordering::SeqCst); +assert_eq!(foo.load(Ordering::SeqCst), 10); +```"), + #[cfg_attr( + portable_atomic_no_cfg_target_has_atomic, + cfg(any( + not(portable_atomic_no_atomic_cas), + portable_atomic_unsafe_assume_single_core, + target_arch = "avr", + target_arch = "msp430" + )) + )] + #[cfg_attr( + not(portable_atomic_no_cfg_target_has_atomic), + cfg(any( + target_has_atomic = "ptr", + portable_atomic_unsafe_assume_single_core, + target_arch = "avr", + target_arch = "msp430" + )) + )] + #[inline] + pub fn sub(&self, val: $int_type, order: Ordering) { + self.inner.sub(val, order); + } + } + doc_comment! { concat!("Bitwise \"and\" with the current value. @@ -2628,6 +2966,61 @@ assert_eq!(foo.load(Ordering::SeqCst), 0b100001); } } + doc_comment! { + concat!("Bitwise \"and\" with the current value. + +Performs a bitwise \"and\" operation on the current value and the argument `val`, and +sets the new value to the result. + +Unlike `fetch_and`, this does not return the previous value. + +`and` takes an [`Ordering`] argument which describes the memory ordering +of this operation. All ordering modes are possible. Note that using +[`Acquire`] makes the store part of this operation [`Relaxed`], and +using [`Release`] makes the load part [`Relaxed`]. + +This function may generate more efficient code than `fetch_and` on some platforms. + +- x86: `lock and` instead of `cmpxchg` loop +- MSP430: `and` instead of disabling interrupts + +Note: On x86, the use of either function should not usually +affect the generated code, because LLVM can properly optimize the case +where the result is unused. + +# Examples + +``` +use portable_atomic::{", stringify!($atomic_type), ", Ordering}; + +let foo = ", stringify!($atomic_type), "::new(0b101101); +assert_eq!(foo.fetch_and(0b110011, Ordering::SeqCst), 0b101101); +assert_eq!(foo.load(Ordering::SeqCst), 0b100001); +```"), + #[cfg_attr( + portable_atomic_no_cfg_target_has_atomic, + cfg(any( + not(portable_atomic_no_atomic_cas), + portable_atomic_unsafe_assume_single_core, + target_arch = "avr", + target_arch = "msp430" + )) + )] + #[cfg_attr( + not(portable_atomic_no_cfg_target_has_atomic), + cfg(any( + target_has_atomic = "ptr", + portable_atomic_unsafe_assume_single_core, + target_arch = "avr", + target_arch = "msp430" + )) + )] + #[inline] + pub fn and(&self, val: $int_type, order: Ordering) { + self.inner.and(val, order); + } + } + doc_comment! { concat!("Bitwise \"nand\" with the current value. @@ -2720,6 +3113,61 @@ assert_eq!(foo.load(Ordering::SeqCst), 0b111111); } } + doc_comment! { + concat!("Bitwise \"or\" with the current value. + +Performs a bitwise \"or\" operation on the current value and the argument `val`, and +sets the new value to the result. + +Unlike `fetch_or`, this does not return the previous value. + +`or` takes an [`Ordering`] argument which describes the memory ordering +of this operation. All ordering modes are possible. Note that using +[`Acquire`] makes the store part of this operation [`Relaxed`], and +using [`Release`] makes the load part [`Relaxed`]. + +This function may generate more efficient code than `fetch_or` on some platforms. + +- x86: `lock or` instead of `cmpxchg` loop +- MSP430: `or` instead of disabling interrupts + +Note: On x86, the use of either function should not usually +affect the generated code, because LLVM can properly optimize the case +where the result is unused. + +# Examples + +``` +use portable_atomic::{", stringify!($atomic_type), ", Ordering}; + +let foo = ", stringify!($atomic_type), "::new(0b101101); +assert_eq!(foo.fetch_or(0b110011, Ordering::SeqCst), 0b101101); +assert_eq!(foo.load(Ordering::SeqCst), 0b111111); +```"), + #[cfg_attr( + portable_atomic_no_cfg_target_has_atomic, + cfg(any( + not(portable_atomic_no_atomic_cas), + portable_atomic_unsafe_assume_single_core, + target_arch = "avr", + target_arch = "msp430" + )) + )] + #[cfg_attr( + not(portable_atomic_no_cfg_target_has_atomic), + cfg(any( + target_has_atomic = "ptr", + portable_atomic_unsafe_assume_single_core, + target_arch = "avr", + target_arch = "msp430" + )) + )] + #[inline] + pub fn or(&self, val: $int_type, order: Ordering) { + self.inner.or(val, order); + } + } + doc_comment! { concat!("Bitwise \"xor\" with the current value. @@ -2766,6 +3214,61 @@ assert_eq!(foo.load(Ordering::SeqCst), 0b011110); } } + doc_comment! { + concat!("Bitwise \"xor\" with the current value. + +Performs a bitwise \"xor\" operation on the current value and the argument `val`, and +sets the new value to the result. + +Unlike `fetch_xor`, this does not return the previous value. + +`xor` takes an [`Ordering`] argument which describes the memory ordering +of this operation. All ordering modes are possible. Note that using +[`Acquire`] makes the store part of this operation [`Relaxed`], and +using [`Release`] makes the load part [`Relaxed`]. + +This function may generate more efficient code than `fetch_xor` on some platforms. + +- x86: `lock xor` instead of `cmpxchg` loop +- MSP430: `xor` instead of disabling interrupts + +Note: On x86, the use of either function should not usually +affect the generated code, because LLVM can properly optimize the case +where the result is unused. + +# Examples + +``` +use portable_atomic::{", stringify!($atomic_type), ", Ordering}; + +let foo = ", stringify!($atomic_type), "::new(0b101101); +foo.xor(0b110011, Ordering::SeqCst); +assert_eq!(foo.load(Ordering::SeqCst), 0b011110); +```"), + #[cfg_attr( + portable_atomic_no_cfg_target_has_atomic, + cfg(any( + not(portable_atomic_no_atomic_cas), + portable_atomic_unsafe_assume_single_core, + target_arch = "avr", + target_arch = "msp430" + )) + )] + #[cfg_attr( + not(portable_atomic_no_cfg_target_has_atomic), + cfg(any( + target_has_atomic = "ptr", + portable_atomic_unsafe_assume_single_core, + target_arch = "avr", + target_arch = "msp430" + )) + )] + #[inline] + pub fn xor(&self, val: $int_type, order: Ordering) { + self.inner.xor(val, order); + } + } + doc_comment! { concat!("Fetches the value, and applies a function to it that returns an optional new value. Returns a `Result` of `Ok(previous_value)` if the function returned `Some(_)`, else diff --git a/src/tests/helper.rs b/src/tests/helper.rs index ec7cf563..91a8e7eb 100644 --- a/src/tests/helper.rs +++ b/src/tests/helper.rs @@ -280,6 +280,19 @@ macro_rules! __test_atomic_int { } } #[test] + fn add() { + let a = <$atomic_type>::new(0); + test_swap_ordering(|order| a.add(0, order)); + for order in swap_orderings() { + let a = <$atomic_type>::new(0); + a.add(10, order); + assert_eq!(a.load(Ordering::Relaxed), 10); + let a = <$atomic_type>::new($int_type::MAX); + a.add(1, order); + assert_eq!(a.load(Ordering::Relaxed), $int_type::MAX.wrapping_add(1)); + } + } + #[test] fn fetch_sub() { let a = <$atomic_type>::new(20); test_swap_ordering(|order| a.fetch_sub(0, order)); @@ -293,6 +306,19 @@ macro_rules! __test_atomic_int { } } #[test] + fn sub() { + let a = <$atomic_type>::new(20); + test_swap_ordering(|order| a.sub(0, order)); + for order in swap_orderings() { + let a = <$atomic_type>::new(20); + a.sub(10, order); + assert_eq!(a.load(Ordering::Relaxed), 10); + let a = <$atomic_type>::new($int_type::MIN); + a.sub(1, order); + assert_eq!(a.load(Ordering::Relaxed), $int_type::MIN.wrapping_sub(1)); + } + } + #[test] fn fetch_and() { let a = <$atomic_type>::new(0b101101); test_swap_ordering(|order| a.fetch_and(0b101101, order)); @@ -303,6 +329,16 @@ macro_rules! __test_atomic_int { } } #[test] + fn and() { + let a = <$atomic_type>::new(0b101101); + test_swap_ordering(|order| a.and(0b101101, order)); + for order in swap_orderings() { + let a = <$atomic_type>::new(0b101101); + a.and(0b110011, order); + assert_eq!(a.load(Ordering::Relaxed), 0b100001); + } + } + #[test] fn fetch_nand() { let a = <$atomic_type>::new(0x13); test_swap_ordering(|order| a.fetch_nand(0x31, order)); @@ -323,6 +359,16 @@ macro_rules! __test_atomic_int { } } #[test] + fn or() { + let a = <$atomic_type>::new(0b101101); + test_swap_ordering(|order| a.or(0, order)); + for order in swap_orderings() { + let a = <$atomic_type>::new(0b101101); + a.or(0b110011, order); + assert_eq!(a.load(Ordering::Relaxed), 0b111111); + } + } + #[test] fn fetch_xor() { let a = <$atomic_type>::new(0b101101); test_swap_ordering(|order| a.fetch_xor(0, order)); @@ -333,6 +379,16 @@ macro_rules! __test_atomic_int { } } #[test] + fn xor() { + let a = <$atomic_type>::new(0b101101); + test_swap_ordering(|order| a.xor(0, order)); + for order in swap_orderings() { + let a = <$atomic_type>::new(0b101101); + a.xor(0b110011, order); + assert_eq!(a.load(Ordering::Relaxed), 0b011110); + } + } + #[test] fn fetch_max() { let a = <$atomic_type>::new(23); test_swap_ordering(|order| a.fetch_max(23, order)); @@ -897,6 +953,22 @@ macro_rules! __test_atomic_bool { } } #[test] + fn and() { + let a = <$atomic_type>::new(true); + test_swap_ordering(|order| a.and(true, order)); + for order in swap_orderings() { + let a = <$atomic_type>::new(true); + a.and(false, order); + assert_eq!(a.load(Ordering::Relaxed), false); + let a = <$atomic_type>::new(true); + a.and(true, order); + assert_eq!(a.load(Ordering::Relaxed), true); + let a = <$atomic_type>::new(false); + a.and(false, order); + assert_eq!(a.load(Ordering::Relaxed), false); + } + } + #[test] fn fetch_nand() { let a = <$atomic_type>::new(true); test_swap_ordering(|order| assert_eq!(a.fetch_nand(false, order), true)); @@ -930,6 +1002,22 @@ macro_rules! __test_atomic_bool { } } #[test] + fn or() { + let a = <$atomic_type>::new(true); + test_swap_ordering(|order| a.or(false, order)); + for order in swap_orderings() { + let a = <$atomic_type>::new(true); + a.or(false, order); + assert_eq!(a.load(Ordering::Relaxed), true); + let a = <$atomic_type>::new(true); + a.or(true, order); + assert_eq!(a.load(Ordering::Relaxed), true); + let a = <$atomic_type>::new(false); + a.or(false, order); + assert_eq!(a.load(Ordering::Relaxed), false); + } + } + #[test] fn fetch_xor() { let a = <$atomic_type>::new(true); test_swap_ordering(|order| assert_eq!(a.fetch_xor(false, order), true)); @@ -945,6 +1033,22 @@ macro_rules! __test_atomic_bool { assert_eq!(a.load(Ordering::Relaxed), false); } } + #[test] + fn xor() { + let a = <$atomic_type>::new(true); + test_swap_ordering(|order| a.xor(false, order)); + for order in swap_orderings() { + let a = <$atomic_type>::new(true); + a.xor(false, order); + assert_eq!(a.load(Ordering::Relaxed), true); + let a = <$atomic_type>::new(true); + a.xor(true, order); + assert_eq!(a.load(Ordering::Relaxed), false); + let a = <$atomic_type>::new(false); + a.xor(false, order); + assert_eq!(a.load(Ordering::Relaxed), false); + } + } ::quickcheck::quickcheck! { fn quickcheck_swap(x: bool, y: bool) -> bool { for order in swap_orderings() { diff --git a/src/utils.rs b/src/utils.rs index 662b57f3..b77739f5 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -186,6 +186,51 @@ macro_rules! ifunc { }}; } +// We do not provide `nand` because it cannot be optimized on neither x86 nor MSP430. +// https://godbolt.org/z/x88voWGov +macro_rules! no_fetch_ops_impl { + ($atomic_type:ident, bool) => { + impl $atomic_type { + #[inline] + pub(crate) fn and(&self, val: bool, order: Ordering) { + self.fetch_and(val, order); + } + #[inline] + pub(crate) fn or(&self, val: bool, order: Ordering) { + self.fetch_or(val, order); + } + #[inline] + pub(crate) fn xor(&self, val: bool, order: Ordering) { + self.fetch_xor(val, order); + } + } + }; + ($atomic_type:ident, $value_type:ident) => { + impl $atomic_type { + #[inline] + pub(crate) fn add(&self, val: $value_type, order: Ordering) { + self.fetch_add(val, order); + } + #[inline] + pub(crate) fn sub(&self, val: $value_type, order: Ordering) { + self.fetch_sub(val, order); + } + #[inline] + pub(crate) fn and(&self, val: $value_type, order: Ordering) { + self.fetch_and(val, order); + } + #[inline] + pub(crate) fn or(&self, val: $value_type, order: Ordering) { + self.fetch_or(val, order); + } + #[inline] + pub(crate) fn xor(&self, val: $value_type, order: Ordering) { + self.fetch_xor(val, order); + } + } + }; +} + pub(crate) struct NoRefUnwindSafe(UnsafeCell<()>); // SAFETY: this is a marker type and we'll never access the value. unsafe impl Sync for NoRefUnwindSafe {} From 5fdde5429cd902676cda8056a0c414912d34dcf5 Mon Sep 17 00:00:00 2001 From: Taiki Endo Date: Tue, 18 Oct 2022 00:29:37 +0900 Subject: [PATCH 2/3] interrupt: Optimize restore on AVR --- src/imp/interrupt/avr.rs | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/src/imp/interrupt/avr.rs b/src/imp/interrupt/avr.rs index cb91283a..5181bcd5 100644 --- a/src/imp/interrupt/avr.rs +++ b/src/imp/interrupt/avr.rs @@ -4,11 +4,11 @@ use core::arch::asm; #[derive(Clone, Copy)] -pub(super) struct WasEnabled(bool); +pub(super) struct State(u8); /// Disables interrupts and returns the previous interrupt state. #[inline] -pub(super) fn disable() -> WasEnabled { +pub(super) fn disable() -> State { let sreg: u8; // SAFETY: reading the status register (SREG) and disabling interrupts are safe. // (see module-level comments of interrupt/mod.rs on the safety of using privileged instructions) @@ -25,28 +25,23 @@ pub(super) fn disable() -> WasEnabled { ); #[cfg(portable_atomic_no_asm)] { - llvm_asm!("in $0,0x3F" :"=r"(sreg) ::: "volatile"); + llvm_asm!("in $0, 0x3F" : "=r"(sreg) ::: "volatile"); llvm_asm!("cli" ::: "memory" : "volatile"); } } - // I (Global Interrupt Enable) bit (1 << 7) - WasEnabled(sreg & 0x80 != 0) + State(sreg) } /// Restores the previous interrupt state. #[inline] -pub(super) unsafe fn restore(WasEnabled(was_enabled): WasEnabled) { - if was_enabled { - // SAFETY: the caller must guarantee that the state was retrieved by the previous `disable`, - // and we've checked that interrupts were enabled before disabling interrupts. - unsafe { - // Do not use `nomem` and `readonly` because prevent preceding memory accesses from being reordered after interrupts are enabled. - // Do not use `preserves_flags` because SEI modifies the I bit of the status register (SREG). - // Refs: https://ww1.microchip.com/downloads/en/DeviceDoc/AVR-InstructionSet-Manual-DS40002198.pdf#page=127 - #[cfg(not(portable_atomic_no_asm))] - asm!("sei", options(nostack)); - #[cfg(portable_atomic_no_asm)] - llvm_asm!("sei" ::: "memory" : "volatile"); - } +pub(super) unsafe fn restore(State(sreg): State) { + // SAFETY: the caller must guarantee that the state was retrieved by the previous `disable`, + unsafe { + // Do not use `nomem` and `readonly` because prevent preceding memory accesses from being reordered after interrupts are enabled. + // Do not use `preserves_flags` because OUT modifies the status register (SREG). + #[cfg(not(portable_atomic_no_asm))] + asm!("out 0x3F, {0}", in(reg) sreg, options(nostack)); + #[cfg(portable_atomic_no_asm)] + llvm_asm!("out 0x3F, $0" :: "r"(sreg) : "memory" : "volatile"); } } From edbca26b7363fdad9442afa643076469873b8219 Mon Sep 17 00:00:00 2001 From: Taiki Endo Date: Fri, 9 Dec 2022 22:48:58 +0900 Subject: [PATCH 3/3] interrupt: Optimize restore on MSP430 --- src/imp/interrupt/armv4t.rs | 2 ++ src/imp/interrupt/avr.rs | 2 ++ src/imp/interrupt/msp430.rs | 36 +++++++++++++++++++----------------- 3 files changed, 23 insertions(+), 17 deletions(-) diff --git a/src/imp/interrupt/armv4t.rs b/src/imp/interrupt/armv4t.rs index 4010946c..0ef044a7 100644 --- a/src/imp/interrupt/armv4t.rs +++ b/src/imp/interrupt/armv4t.rs @@ -51,6 +51,8 @@ pub(super) fn disable() -> State { pub(super) unsafe fn restore(State(cpsr): State) { // SAFETY: the caller must guarantee that the state was retrieved by the previous `disable`, unsafe { + // This clobbers the entire CPSR. See msp430.rs to safety on this. + // // Do not use `nomem` and `readonly` because prevent preceding memory accesses from being reordered after interrupts are enabled. asm!("msr cpsr_c, {0}", in(reg) cpsr, options(nostack)); } diff --git a/src/imp/interrupt/avr.rs b/src/imp/interrupt/avr.rs index 5181bcd5..19725eab 100644 --- a/src/imp/interrupt/avr.rs +++ b/src/imp/interrupt/avr.rs @@ -37,6 +37,8 @@ pub(super) fn disable() -> State { pub(super) unsafe fn restore(State(sreg): State) { // SAFETY: the caller must guarantee that the state was retrieved by the previous `disable`, unsafe { + // This clobbers the entire status register. See msp430.rs to safety on this. + // // Do not use `nomem` and `readonly` because prevent preceding memory accesses from being reordered after interrupts are enabled. // Do not use `preserves_flags` because OUT modifies the status register (SREG). #[cfg(not(portable_atomic_no_asm))] diff --git a/src/imp/interrupt/msp430.rs b/src/imp/interrupt/msp430.rs index 89b516c6..35e0017d 100644 --- a/src/imp/interrupt/msp430.rs +++ b/src/imp/interrupt/msp430.rs @@ -6,11 +6,11 @@ use core::arch::asm; pub(super) use super::super::msp430 as atomic; #[derive(Clone, Copy)] -pub(super) struct WasEnabled(bool); +pub(super) struct State(u16); /// Disables interrupts and returns the previous interrupt state. #[inline] -pub(super) fn disable() -> WasEnabled { +pub(super) fn disable() -> State { let r: u16; // SAFETY: reading the status register and disabling interrupts are safe. // (see module-level comments of interrupt/mod.rs on the safety of using privileged instructions) @@ -31,24 +31,26 @@ pub(super) fn disable() -> WasEnabled { llvm_asm!("dint { nop" ::: "memory" : "volatile"); } } - // GIE (global interrupt enable) bit (1 << 3) - WasEnabled(r & 0x8 != 0) + State(r) } /// Restores the previous interrupt state. #[inline] -pub(super) unsafe fn restore(WasEnabled(was_enabled): WasEnabled) { - if was_enabled { - // SAFETY: the caller must guarantee that the state was retrieved by the previous `disable`, - // and we've checked that interrupts were enabled before disabling interrupts. - unsafe { - // Do not use `nomem` and `readonly` because prevent preceding memory accesses from being reordered after interrupts are enabled. - // Do not use `preserves_flags` because EINT modifies the GIE (global interrupt enable) bit of the status register. - // Refs: http://mspgcc.sourceforge.net/manual/x951.html - #[cfg(not(portable_atomic_no_asm))] - asm!("nop {{ eint {{ nop", options(nostack)); - #[cfg(portable_atomic_no_asm)] - llvm_asm!("nop { eint { nop" ::: "memory" : "volatile"); - } +pub(super) unsafe fn restore(State(r): State) { + // SAFETY: the caller must guarantee that the state was retrieved by the previous `disable`, + unsafe { + // This clobbers the entire status register, but we never explicitly modify + // flags within a critical session, and the only flags that may be changed + // within a critical session are the arithmetic flags that are changed as + // a side effect of arithmetic operations, etc., which LLVM recognizes, + // so it is safe to clobber them here. + // See also the discussion at https://github.com/taiki-e/portable-atomic/pull/40. + // + // Do not use `nomem` and `readonly` because prevent preceding memory accesses from being reordered after interrupts are enabled. + // Do not use `preserves_flags` because MOV modifies the status register. + #[cfg(not(portable_atomic_no_asm))] + asm!("nop {{ mov {0}, R2 {{ nop", in(reg) r, options(nostack)); + #[cfg(portable_atomic_no_asm)] + llvm_asm!("nop { mov $0, R2 { nop" :: "r"(r) : "memory" : "volatile"); } }