From 1dbed76e9eeb25cc350e8545c8f3e6fd0ba68cc1 Mon Sep 17 00:00:00 2001
From: Taiki Endo <te316e89@gmail.com>
Date: Mon, 5 Dec 2022 06:36:59 +0900
Subject: [PATCH 1/3] Add add/sub/and/or/xor methods that do not return
 previous value

---
 CHANGELOG.md                    |   4 +
 src/imp/atomic128/intrinsics.rs |   1 +
 src/imp/atomic128/macros.rs     |   2 +
 src/imp/core_atomic.rs          |  12 +
 src/imp/fallback/imp.rs         |   2 +
 src/imp/interrupt/mod.rs        |  72 ++++-
 src/imp/interrupt/msp430.rs     |   2 +
 src/imp/msp430.rs               | 191 ++++++++++++
 src/lib.rs                      | 503 ++++++++++++++++++++++++++++++++
 src/tests/helper.rs             | 104 +++++++
 src/utils.rs                    |  45 +++
 11 files changed, 937 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 50c9f2f9..658f1917 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,6 +10,10 @@ Note: In this file, do not use the hard wrap in the middle of a sentence for com
 
 ## [Unreleased]
 
+- Add `Atomic{I,U}*::{add,sub,and,or,xor}` and `AtomicBool::{and,or,xor}` methods.
+
+  They are equivalent to the corresponding `fetch_*` methods, but do not return the previous value. They are intended for optimization on platforms that implement atomics using inline assembly, such as the MSP430.
+
 - Various improvements to `portable_atomic_unsafe_assume_single_core` cfg. ([#44](https://github.com/taiki-e/portable-atomic/pull/44))
 
   - Support disabling FIQs on pre-v6 ARM under `portable_atomic_disable_fiq` cfg.
diff --git a/src/imp/atomic128/intrinsics.rs b/src/imp/atomic128/intrinsics.rs
index 37895e8d..db144f0e 100644
--- a/src/imp/atomic128/intrinsics.rs
+++ b/src/imp/atomic128/intrinsics.rs
@@ -415,6 +415,7 @@ macro_rules! atomic128 {
         // SAFETY: any data races are prevented by atomic intrinsics.
         unsafe impl Sync for $atomic_type {}
 
+        no_fetch_ops_impl!($atomic_type, $int_type);
         impl $atomic_type {
             #[inline]
             pub(crate) const fn new(v: $int_type) -> Self {
diff --git a/src/imp/atomic128/macros.rs b/src/imp/atomic128/macros.rs
index e5b1d761..972a3bb1 100644
--- a/src/imp/atomic128/macros.rs
+++ b/src/imp/atomic128/macros.rs
@@ -10,6 +10,7 @@ macro_rules! atomic128 {
         // SAFETY: any data races are prevented by atomic intrinsics.
         unsafe impl Sync for $atomic_type {}
 
+        no_fetch_ops_impl!($atomic_type, $int_type);
         impl $atomic_type {
             #[inline]
             pub(crate) const fn new(v: $int_type) -> Self {
@@ -216,6 +217,7 @@ macro_rules! atomic128 {
         // SAFETY: any data races are prevented by atomic intrinsics.
         unsafe impl Sync for $atomic_type {}
 
+        no_fetch_ops_impl!($atomic_type, $int_type);
         impl $atomic_type {
             #[inline]
             pub(crate) const fn new(v: $int_type) -> Self {
diff --git a/src/imp/core_atomic.rs b/src/imp/core_atomic.rs
index 9631c500..d0991ad9 100644
--- a/src/imp/core_atomic.rs
+++ b/src/imp/core_atomic.rs
@@ -41,6 +41,9 @@ impl AtomicBool {
 }
 #[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(not(portable_atomic_no_atomic_cas)))]
 #[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(target_has_atomic = "ptr"))]
+no_fetch_ops_impl!(AtomicBool, bool);
+#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(not(portable_atomic_no_atomic_cas)))]
+#[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(target_has_atomic = "ptr"))]
 impl AtomicBool {
     #[inline]
     #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
@@ -171,6 +174,15 @@ macro_rules! atomic_int {
         pub(crate) struct $atomic_type {
             inner: core::sync::atomic::$atomic_type,
         }
+        #[cfg_attr(
+            portable_atomic_no_cfg_target_has_atomic,
+            cfg(not(portable_atomic_no_atomic_cas))
+        )]
+        #[cfg_attr(
+            not(portable_atomic_no_cfg_target_has_atomic),
+            cfg(target_has_atomic = "ptr")
+        )]
+        no_fetch_ops_impl!($atomic_type, $int_type);
         impl $atomic_type {
             #[inline]
             pub(crate) const fn new(v: $int_type) -> Self {
diff --git a/src/imp/fallback/imp.rs b/src/imp/fallback/imp.rs
index 6e5da46c..b81c9a24 100644
--- a/src/imp/fallback/imp.rs
+++ b/src/imp/fallback/imp.rs
@@ -130,6 +130,8 @@ macro_rules! atomic {
         // SAFETY: any data races are prevented by the lock and atomic operation.
         unsafe impl Sync for $atomic_type {}
 
+        #[cfg(any(test, not(portable_atomic_cmpxchg16b_dynamic)))]
+        no_fetch_ops_impl!($atomic_type, $int_type);
         impl $atomic_type {
             #[cfg(any(test, not(portable_atomic_cmpxchg16b_dynamic)))]
             #[inline]
diff --git a/src/imp/interrupt/mod.rs b/src/imp/interrupt/mod.rs
index f46895b6..82c5a30b 100644
--- a/src/imp/interrupt/mod.rs
+++ b/src/imp/interrupt/mod.rs
@@ -23,7 +23,8 @@
 // [^avr2]: https://github.com/llvm/llvm-project/blob/llvmorg-15.0.0/llvm/test/CodeGen/AVR/atomics/load16.ll#L5
 
 // On some platforms, atomic load/store can be implemented in a more efficient
-// way than disabling interrupts.
+// way than disabling interrupts. On MSP430, some RMWs that do not return the
+// previous value can also be optimized.
 //
 // Note: On single-core systems, it is okay to use critical session-based
 // CAS together with atomic load/store. The load/store will not be
@@ -246,6 +247,33 @@ impl AtomicBool {
     }
 }
 
+#[cfg(not(target_arch = "msp430"))]
+no_fetch_ops_impl!(AtomicBool, bool);
+#[cfg(target_arch = "msp430")]
+impl AtomicBool {
+    #[inline]
+    pub(crate) fn and(&self, val: bool, order: Ordering) {
+        // SAFETY: Self and atomic::AtomicBool have the same layout,
+        unsafe {
+            (*(self as *const Self as *const atomic::AtomicBool)).and(val, order);
+        }
+    }
+    #[inline]
+    pub(crate) fn or(&self, val: bool, order: Ordering) {
+        // SAFETY: Self and atomic::AtomicBool have the same layout,
+        unsafe {
+            (*(self as *const Self as *const atomic::AtomicBool)).or(val, order);
+        }
+    }
+    #[inline]
+    pub(crate) fn xor(&self, val: bool, order: Ordering) {
+        // SAFETY: Self and atomic::AtomicBool have the same layout,
+        unsafe {
+            (*(self as *const Self as *const atomic::AtomicBool)).xor(val, order);
+        }
+    }
+}
+
 #[cfg_attr(target_pointer_width = "16", repr(C, align(2)))]
 #[cfg_attr(target_pointer_width = "32", repr(C, align(4)))]
 #[cfg_attr(target_pointer_width = "64", repr(C, align(8)))]
@@ -462,10 +490,52 @@ macro_rules! atomic_int {
                 }
             }
         }
+
+        #[cfg(not(target_arch = "msp430"))]
+        no_fetch_ops_impl!($atomic_type, $int_type);
+        #[cfg(target_arch = "msp430")]
+        impl $atomic_type {
+            #[inline]
+            pub(crate) fn add(&self, val: $int_type, order: Ordering) {
+                // SAFETY: Self and atomic::$atomic_type have the same layout,
+                unsafe {
+                    (*(self as *const Self as *const atomic::$atomic_type)).add(val, order);
+                }
+            }
+            #[inline]
+            pub(crate) fn sub(&self, val: $int_type, order: Ordering) {
+                // SAFETY: Self and atomic::$atomic_type have the same layout,
+                unsafe {
+                    (*(self as *const Self as *const atomic::$atomic_type)).sub(val, order);
+                }
+            }
+            #[inline]
+            pub(crate) fn and(&self, val: $int_type, order: Ordering) {
+                // SAFETY: Self and atomic::$atomic_type have the same layout,
+                unsafe {
+                    (*(self as *const Self as *const atomic::$atomic_type)).and(val, order);
+                }
+            }
+            #[inline]
+            pub(crate) fn or(&self, val: $int_type, order: Ordering) {
+                // SAFETY: Self and atomic::$atomic_type have the same layout,
+                unsafe {
+                    (*(self as *const Self as *const atomic::$atomic_type)).or(val, order);
+                }
+            }
+            #[inline]
+            pub(crate) fn xor(&self, val: $int_type, order: Ordering) {
+                // SAFETY: Self and atomic::$atomic_type have the same layout,
+                unsafe {
+                    (*(self as *const Self as *const atomic::$atomic_type)).xor(val, order);
+                }
+            }
+        }
     };
     (load_store_critical_session, $atomic_type:ident, $int_type:ident, $align:expr) => {
         atomic_int!(base, $atomic_type, $int_type, $align);
         atomic_int!(cas, $atomic_type, $int_type);
+        no_fetch_ops_impl!($atomic_type, $int_type);
         impl $atomic_type {
             #[inline]
             #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
diff --git a/src/imp/interrupt/msp430.rs b/src/imp/interrupt/msp430.rs
index 89b516c6..a81ac1e6 100644
--- a/src/imp/interrupt/msp430.rs
+++ b/src/imp/interrupt/msp430.rs
@@ -1,4 +1,6 @@
 // Adapted from https://github.com/rust-embedded/msp430.
+//
+// See also src/imp/msp430.rs.
 
 #[cfg(not(portable_atomic_no_asm))]
 use core::arch::asm;
diff --git a/src/imp/msp430.rs b/src/imp/msp430.rs
index e5f48c7b..8961c7b1 100644
--- a/src/imp/msp430.rs
+++ b/src/imp/msp430.rs
@@ -4,6 +4,9 @@
 // Including https://github.com/pftbest/msp430-atomic/pull/4 for a compile error fix.
 // Including https://github.com/pftbest/msp430-atomic/pull/5 for a soundness bug fix.
 //
+// Operations not supported here are provided by disabling interrupts.
+// See also src/imp/interrupt/msp430.rs.
+//
 // Note: Ordering is always SeqCst.
 
 #[cfg(not(portable_atomic_no_asm))]
@@ -106,6 +109,39 @@ impl AtomicBool {
             u8::atomic_store(self.v.get(), val as u8);
         }
     }
+
+    #[inline]
+    #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
+    pub(crate) fn and(&self, val: bool, order: Ordering) {
+        crate::utils::assert_swap_ordering(order);
+        // SAFETY: any data races are prevented by atomic intrinsics and the raw
+        // pointer passed in is valid because we got it from a reference.
+        unsafe {
+            u8::atomic_and(self.v.get(), val as u8);
+        }
+    }
+
+    #[inline]
+    #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
+    pub(crate) fn or(&self, val: bool, order: Ordering) {
+        crate::utils::assert_swap_ordering(order);
+        // SAFETY: any data races are prevented by atomic intrinsics and the raw
+        // pointer passed in is valid because we got it from a reference.
+        unsafe {
+            u8::atomic_or(self.v.get(), val as u8);
+        }
+    }
+
+    #[inline]
+    #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
+    pub(crate) fn xor(&self, val: bool, order: Ordering) {
+        crate::utils::assert_swap_ordering(order);
+        // SAFETY: any data races are prevented by atomic intrinsics and the raw
+        // pointer passed in is valid because we got it from a reference.
+        unsafe {
+            u8::atomic_xor(self.v.get(), val as u8);
+        }
+    }
 }
 
 #[repr(transparent)]
@@ -235,6 +271,61 @@ macro_rules! atomic_int {
                     $int_type::atomic_store(self.v.get(), val);
                 }
             }
+
+            #[inline]
+            #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
+            pub(crate) fn add(&self, val: $int_type, order: Ordering) {
+                crate::utils::assert_swap_ordering(order);
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe {
+                    $int_type::atomic_add(self.v.get(), val);
+                }
+            }
+
+            #[inline]
+            #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
+            pub(crate) fn sub(&self, val: $int_type, order: Ordering) {
+                crate::utils::assert_swap_ordering(order);
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe {
+                    $int_type::atomic_sub(self.v.get(), val);
+                }
+            }
+
+            #[inline]
+            #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
+            pub(crate) fn and(&self, val: $int_type, order: Ordering) {
+                crate::utils::assert_swap_ordering(order);
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe {
+                    $int_type::atomic_and(self.v.get(), val);
+                }
+            }
+
+            #[inline]
+            #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
+            pub(crate) fn or(&self, val: $int_type, order: Ordering) {
+                crate::utils::assert_swap_ordering(order);
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe {
+                    $int_type::atomic_or(self.v.get(), val);
+                }
+            }
+
+            #[inline]
+            #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
+            pub(crate) fn xor(&self, val: $int_type, order: Ordering) {
+                crate::utils::assert_swap_ordering(order);
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe {
+                    $int_type::atomic_xor(self.v.get(), val);
+                }
+            }
         }
 
         impl AtomicOperations for $int_type {
@@ -277,6 +368,101 @@ macro_rules! atomic_int {
                     );
                 }
             }
+
+            #[inline]
+            unsafe fn atomic_add(dst: *mut Self, val: Self) {
+                // SAFETY: the caller must uphold the safety contract for `atomic_add`.
+                unsafe {
+                    #[cfg(not(portable_atomic_no_asm))]
+                    asm!(
+                        concat!("add", $asm_suffix, " {val}, 0({dst})"),
+                        dst = in(reg) dst,
+                        val = in(reg) val,
+                        options(nostack),
+                    );
+                    #[cfg(portable_atomic_no_asm)]
+                    llvm_asm!(
+                        concat!("add", $asm_suffix, " $1, $0")
+                        :: "*m"(dst), "ir"(val) : "memory" : "volatile"
+                    );
+                }
+            }
+
+            #[inline]
+            unsafe fn atomic_sub(dst: *mut Self, val: Self) {
+                // SAFETY: the caller must uphold the safety contract for `atomic_sub`.
+                unsafe {
+                    #[cfg(not(portable_atomic_no_asm))]
+                    asm!(
+                        concat!("sub", $asm_suffix, " {val}, 0({dst})"),
+                        dst = in(reg) dst,
+                        val = in(reg) val,
+                        options(nostack),
+                    );
+                    #[cfg(portable_atomic_no_asm)]
+                    llvm_asm!(
+                        concat!("sub", $asm_suffix, " $1, $0")
+                        :: "*m"(dst), "ir"(val) : "memory" : "volatile"
+                    );
+                }
+            }
+
+            #[inline]
+            unsafe fn atomic_and(dst: *mut Self, val: Self) {
+                // SAFETY: the caller must uphold the safety contract for `atomic_and`.
+                unsafe {
+                    #[cfg(not(portable_atomic_no_asm))]
+                    asm!(
+                        concat!("and", $asm_suffix, " {val}, 0({dst})"),
+                        dst = in(reg) dst,
+                        val = in(reg) val,
+                        options(nostack),
+                    );
+                    #[cfg(portable_atomic_no_asm)]
+                    llvm_asm!(
+                        concat!("and", $asm_suffix, " $1, $0")
+                        :: "*m"(dst), "ir"(val) : "memory" : "volatile"
+                    );
+                }
+            }
+
+            #[inline]
+            unsafe fn atomic_or(dst: *mut Self, val: Self) {
+                // SAFETY: the caller must uphold the safety contract for `atomic_or`.
+                unsafe {
+                    #[cfg(not(portable_atomic_no_asm))]
+                    asm!(
+                        concat!("bis", $asm_suffix, " {val}, 0({dst})"),
+                        dst = in(reg) dst,
+                        val = in(reg) val,
+                        options(nostack),
+                    );
+                    #[cfg(portable_atomic_no_asm)]
+                    llvm_asm!(
+                        concat!("bis", $asm_suffix, " $1, $0")
+                        :: "*m"(dst), "ir"(val) : "memory" : "volatile"
+                    );
+                }
+            }
+
+            #[inline]
+            unsafe fn atomic_xor(dst: *mut Self, val: Self) {
+                // SAFETY: the caller must uphold the safety contract for `atomic_xor`.
+                unsafe {
+                    #[cfg(not(portable_atomic_no_asm))]
+                    asm!(
+                        concat!("xor", $asm_suffix, " {val}, 0({dst})"),
+                        dst = in(reg) dst,
+                        val = in(reg) val,
+                        options(nostack),
+                    );
+                    #[cfg(portable_atomic_no_asm)]
+                    llvm_asm!(
+                        concat!("xor", $asm_suffix, " $1, $0")
+                        :: "*m"(dst), "ir"(val) : "memory" : "volatile"
+                    );
+                }
+            }
         }
     }
 }
@@ -291,4 +477,9 @@ atomic_int!(usize, AtomicUsize, ".w");
 trait AtomicOperations: Sized {
     unsafe fn atomic_load(src: *const Self) -> Self;
     unsafe fn atomic_store(dst: *mut Self, val: Self);
+    unsafe fn atomic_add(dst: *mut Self, val: Self);
+    unsafe fn atomic_sub(dst: *mut Self, val: Self);
+    unsafe fn atomic_and(dst: *mut Self, val: Self);
+    unsafe fn atomic_or(dst: *mut Self, val: Self);
+    unsafe fn atomic_xor(dst: *mut Self, val: Self);
 }
diff --git a/src/lib.rs b/src/lib.rs
index ab3e7d85..bad86292 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -818,6 +818,67 @@ impl AtomicBool {
         self.inner.fetch_and(val, order)
     }
 
+    /// Logical "and" with a boolean value.
+    ///
+    /// Performs a logical "and" operation on the current value and the argument `val`, and sets
+    /// the new value to the result.
+    ///
+    /// Unlike `fetch_and`, this does not return the previous value.
+    ///
+    /// `and` takes an [`Ordering`] argument which describes the memory ordering
+    /// of this operation. All ordering modes are possible. Note that using
+    /// [`Acquire`] makes the store part of this operation [`Relaxed`], and
+    /// using [`Release`] makes the load part [`Relaxed`].
+    ///
+    /// This function may generate more efficient code than `fetch_and` on some platforms.
+    ///
+    /// - x86: `lock and` instead of `cmpxchg` loop
+    /// - MSP430: `and` instead of disabling interrupts
+    ///
+    /// Note: On x86, the use of either function should not usually
+    /// affect the generated code, because LLVM can properly optimize the case
+    /// where the result is unused.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use portable_atomic::{AtomicBool, Ordering};
+    ///
+    /// let foo = AtomicBool::new(true);
+    /// foo.and(false, Ordering::SeqCst);
+    /// assert_eq!(foo.load(Ordering::SeqCst), false);
+    ///
+    /// let foo = AtomicBool::new(true);
+    /// foo.and(true, Ordering::SeqCst);
+    /// assert_eq!(foo.load(Ordering::SeqCst), true);
+    ///
+    /// let foo = AtomicBool::new(false);
+    /// foo.and(false, Ordering::SeqCst);
+    /// assert_eq!(foo.load(Ordering::SeqCst), false);
+    /// ```
+    #[cfg_attr(
+        portable_atomic_no_cfg_target_has_atomic,
+        cfg(any(
+            not(portable_atomic_no_atomic_cas),
+            portable_atomic_unsafe_assume_single_core,
+            target_arch = "avr",
+            target_arch = "msp430"
+        ))
+    )]
+    #[cfg_attr(
+        not(portable_atomic_no_cfg_target_has_atomic),
+        cfg(any(
+            target_has_atomic = "ptr",
+            portable_atomic_unsafe_assume_single_core,
+            target_arch = "avr",
+            target_arch = "msp430"
+        ))
+    )]
+    #[inline]
+    pub fn and(&self, val: bool, order: Ordering) {
+        self.inner.and(val, order);
+    }
+
     /// Logical "nand" with a boolean value.
     ///
     /// Performs a logical "nand" operation on the current value and the argument `val`, and sets
@@ -923,6 +984,67 @@ impl AtomicBool {
         self.inner.fetch_or(val, order)
     }
 
+    /// Logical "or" with a boolean value.
+    ///
+    /// Performs a logical "or" operation on the current value and the argument `val`, and sets the
+    /// new value to the result.
+    ///
+    /// Unlike `fetch_or`, this does not return the previous value.
+    ///
+    /// `or` takes an [`Ordering`] argument which describes the memory ordering
+    /// of this operation. All ordering modes are possible. Note that using
+    /// [`Acquire`] makes the store part of this operation [`Relaxed`], and
+    /// using [`Release`] makes the load part [`Relaxed`].
+    ///
+    /// This function may generate more efficient code than `fetch_or` on some platforms.
+    ///
+    /// - x86: `lock or` instead of `cmpxchg` loop
+    /// - MSP430: `bis` instead of disabling interrupts
+    ///
+    /// Note: On x86, the use of either function should not usually
+    /// affect the generated code, because LLVM can properly optimize the case
+    /// where the result is unused.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use portable_atomic::{AtomicBool, Ordering};
+    ///
+    /// let foo = AtomicBool::new(true);
+    /// foo.or(false, Ordering::SeqCst);
+    /// assert_eq!(foo.load(Ordering::SeqCst), true);
+    ///
+    /// let foo = AtomicBool::new(true);
+    /// foo.or(true, Ordering::SeqCst);
+    /// assert_eq!(foo.load(Ordering::SeqCst), true);
+    ///
+    /// let foo = AtomicBool::new(false);
+    /// foo.or(false, Ordering::SeqCst);
+    /// assert_eq!(foo.load(Ordering::SeqCst), false);
+    /// ```
+    #[cfg_attr(
+        portable_atomic_no_cfg_target_has_atomic,
+        cfg(any(
+            not(portable_atomic_no_atomic_cas),
+            portable_atomic_unsafe_assume_single_core,
+            target_arch = "avr",
+            target_arch = "msp430"
+        ))
+    )]
+    #[cfg_attr(
+        not(portable_atomic_no_cfg_target_has_atomic),
+        cfg(any(
+            target_has_atomic = "ptr",
+            portable_atomic_unsafe_assume_single_core,
+            target_arch = "avr",
+            target_arch = "msp430"
+        ))
+    )]
+    #[inline]
+    pub fn or(&self, val: bool, order: Ordering) {
+        self.inner.or(val, order);
+    }
+
     /// Logical "xor" with a boolean value.
     ///
     /// Performs a logical "xor" operation on the current value and the argument `val`, and sets
@@ -975,6 +1097,67 @@ impl AtomicBool {
         self.inner.fetch_xor(val, order)
     }
 
+    /// Logical "xor" with a boolean value.
+    ///
+    /// Performs a logical "xor" operation on the current value and the argument `val`, and sets
+    /// the new value to the result.
+    ///
+    /// Unlike `fetch_xor`, this does not return the previous value.
+    ///
+    /// `xor` takes an [`Ordering`] argument which describes the memory ordering
+    /// of this operation. All ordering modes are possible. Note that using
+    /// [`Acquire`] makes the store part of this operation [`Relaxed`], and
+    /// using [`Release`] makes the load part [`Relaxed`].
+    ///
+    /// This function may generate more efficient code than `fetch_xor` on some platforms.
+    ///
+    /// - x86: `lock xor` instead of `cmpxchg` loop
+    /// - MSP430: `xor` instead of disabling interrupts
+    ///
+    /// Note: On x86, the use of either function should not usually
+    /// affect the generated code, because LLVM can properly optimize the case
+    /// where the result is unused.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use portable_atomic::{AtomicBool, Ordering};
+    ///
+    /// let foo = AtomicBool::new(true);
+    /// foo.xor(false, Ordering::SeqCst);
+    /// assert_eq!(foo.load(Ordering::SeqCst), true);
+    ///
+    /// let foo = AtomicBool::new(true);
+    /// foo.xor(true, Ordering::SeqCst);
+    /// assert_eq!(foo.load(Ordering::SeqCst), false);
+    ///
+    /// let foo = AtomicBool::new(false);
+    /// foo.xor(false, Ordering::SeqCst);
+    /// assert_eq!(foo.load(Ordering::SeqCst), false);
+    /// ```
+    #[cfg_attr(
+        portable_atomic_no_cfg_target_has_atomic,
+        cfg(any(
+            not(portable_atomic_no_atomic_cas),
+            portable_atomic_unsafe_assume_single_core,
+            target_arch = "avr",
+            target_arch = "msp430"
+        ))
+    )]
+    #[cfg_attr(
+        not(portable_atomic_no_cfg_target_has_atomic),
+        cfg(any(
+            target_has_atomic = "ptr",
+            portable_atomic_unsafe_assume_single_core,
+            target_arch = "avr",
+            target_arch = "msp430"
+        ))
+    )]
+    #[inline]
+    pub fn xor(&self, val: bool, order: Ordering) {
+        self.inner.xor(val, order);
+    }
+
     /// Logical "not" with a boolean value.
     ///
     /// Performs a logical "not" operation on the current value, and sets
@@ -1023,6 +1206,63 @@ impl AtomicBool {
         self.fetch_xor(true, order)
     }
 
+    /// Logical "not" with a boolean value.
+    ///
+    /// Performs a logical "not" operation on the current value, and sets
+    /// the new value to the result.
+    ///
+    /// Unlike `fetch_not`, this does not return the previous value.
+    ///
+    /// `not` takes an [`Ordering`] argument which describes the memory ordering
+    /// of this operation. All ordering modes are possible. Note that using
+    /// [`Acquire`] makes the store part of this operation [`Relaxed`], and
+    /// using [`Release`] makes the load part [`Relaxed`].
+    ///
+    /// This function may generate more efficient code than `fetch_not` on some platforms.
+    ///
+    /// - x86: `lock xor` instead of `cmpxchg` loop
+    /// - MSP430: `xor` instead of disabling interrupts
+    ///
+    /// Note: On x86, the use of either function should not usually
+    /// affect the generated code, because LLVM can properly optimize the case
+    /// where the result is unused.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use portable_atomic::{AtomicBool, Ordering};
+    ///
+    /// let foo = AtomicBool::new(true);
+    /// assert_eq!(foo.fetch_not(Ordering::SeqCst), true);
+    /// assert_eq!(foo.load(Ordering::SeqCst), false);
+    ///
+    /// let foo = AtomicBool::new(false);
+    /// assert_eq!(foo.fetch_not(Ordering::SeqCst), false);
+    /// assert_eq!(foo.load(Ordering::SeqCst), true);
+    /// ```
+    #[cfg_attr(
+        portable_atomic_no_cfg_target_has_atomic,
+        cfg(any(
+            not(portable_atomic_no_atomic_cas),
+            portable_atomic_unsafe_assume_single_core,
+            target_arch = "avr",
+            target_arch = "msp430"
+        ))
+    )]
+    #[cfg_attr(
+        not(portable_atomic_no_cfg_target_has_atomic),
+        cfg(any(
+            target_has_atomic = "ptr",
+            portable_atomic_unsafe_assume_single_core,
+            target_arch = "avr",
+            target_arch = "msp430"
+        ))
+    )]
+    #[inline]
+    pub fn not(&self, order: Ordering) {
+        self.xor(true, order);
+    }
+
     // TODO: Add as_mut_ptr once it is stable on std atomic types.
     // https://github.com/rust-lang/rust/issues/66893
 
@@ -2539,6 +2779,55 @@ assert_eq!(foo.load(Ordering::SeqCst), 10);
                 }
             }
 
+            doc_comment! {
+                concat!("Adds to the current value.
+
+This operation wraps around on overflow.
+
+Unlike `fetch_add`, this does not return the previous value.
+
+`add` takes an [`Ordering`] argument which describes the memory ordering
+of this operation. All ordering modes are possible. Note that using
+[`Acquire`] makes the store part of this operation [`Relaxed`], and
+using [`Release`] makes the load part [`Relaxed`].
+
+This function may generate more efficient code than `fetch_add` on some platforms.
+
+- MSP430: `add` instead of disabling interrupts
+
+# Examples
+
+```
+use portable_atomic::{", stringify!($atomic_type), ", Ordering};
+
+let foo = ", stringify!($atomic_type), "::new(0);
+foo.add(10, Ordering::SeqCst);
+assert_eq!(foo.load(Ordering::SeqCst), 10);
+```"),
+                #[cfg_attr(
+                    portable_atomic_no_cfg_target_has_atomic,
+                    cfg(any(
+                        not(portable_atomic_no_atomic_cas),
+                        portable_atomic_unsafe_assume_single_core,
+                        target_arch = "avr",
+                        target_arch = "msp430"
+                    ))
+                )]
+                #[cfg_attr(
+                    not(portable_atomic_no_cfg_target_has_atomic),
+                    cfg(any(
+                        target_has_atomic = "ptr",
+                        portable_atomic_unsafe_assume_single_core,
+                        target_arch = "avr",
+                        target_arch = "msp430"
+                    ))
+                )]
+                #[inline]
+                pub fn add(&self, val: $int_type, order: Ordering) {
+                    self.inner.add(val, order);
+                }
+            }
+
             doc_comment! {
                 concat!("Subtracts from the current value, returning the previous value.
 
@@ -2582,6 +2871,55 @@ assert_eq!(foo.load(Ordering::SeqCst), 10);
                 }
             }
 
+            doc_comment! {
+                concat!("Subtracts from the current value.
+
+This operation wraps around on overflow.
+
+Unlike `fetch_sub`, this does not return the previous value.
+
+`sub` takes an [`Ordering`] argument which describes the memory ordering
+of this operation. All ordering modes are possible. Note that using
+[`Acquire`] makes the store part of this operation [`Relaxed`], and
+using [`Release`] makes the load part [`Relaxed`].
+
+This function may generate more efficient code than `fetch_sub` on some platforms.
+
+- MSP430: `sub` instead of disabling interrupts
+
+# Examples
+
+```
+use portable_atomic::{", stringify!($atomic_type), ", Ordering};
+
+let foo = ", stringify!($atomic_type), "::new(20);
+foo.sub(10, Ordering::SeqCst);
+assert_eq!(foo.load(Ordering::SeqCst), 10);
+```"),
+                #[cfg_attr(
+                    portable_atomic_no_cfg_target_has_atomic,
+                    cfg(any(
+                        not(portable_atomic_no_atomic_cas),
+                        portable_atomic_unsafe_assume_single_core,
+                        target_arch = "avr",
+                        target_arch = "msp430"
+                    ))
+                )]
+                #[cfg_attr(
+                    not(portable_atomic_no_cfg_target_has_atomic),
+                    cfg(any(
+                        target_has_atomic = "ptr",
+                        portable_atomic_unsafe_assume_single_core,
+                        target_arch = "avr",
+                        target_arch = "msp430"
+                    ))
+                )]
+                #[inline]
+                pub fn sub(&self, val: $int_type, order: Ordering) {
+                    self.inner.sub(val, order);
+                }
+            }
+
             doc_comment! {
                 concat!("Bitwise \"and\" with the current value.
 
@@ -2628,6 +2966,61 @@ assert_eq!(foo.load(Ordering::SeqCst), 0b100001);
                 }
             }
 
+            doc_comment! {
+                concat!("Bitwise \"and\" with the current value.
+
+Performs a bitwise \"and\" operation on the current value and the argument `val`, and
+sets the new value to the result.
+
+Unlike `fetch_and`, this does not return the previous value.
+
+`and` takes an [`Ordering`] argument which describes the memory ordering
+of this operation. All ordering modes are possible. Note that using
+[`Acquire`] makes the store part of this operation [`Relaxed`], and
+using [`Release`] makes the load part [`Relaxed`].
+
+This function may generate more efficient code than `fetch_and` on some platforms.
+
+- x86: `lock and` instead of `cmpxchg` loop
+- MSP430: `and` instead of disabling interrupts
+
+Note: On x86, the use of either function should not usually
+affect the generated code, because LLVM can properly optimize the case
+where the result is unused.
+
+# Examples
+
+```
+use portable_atomic::{", stringify!($atomic_type), ", Ordering};
+
+let foo = ", stringify!($atomic_type), "::new(0b101101);
+assert_eq!(foo.fetch_and(0b110011, Ordering::SeqCst), 0b101101);
+assert_eq!(foo.load(Ordering::SeqCst), 0b100001);
+```"),
+                #[cfg_attr(
+                    portable_atomic_no_cfg_target_has_atomic,
+                    cfg(any(
+                        not(portable_atomic_no_atomic_cas),
+                        portable_atomic_unsafe_assume_single_core,
+                        target_arch = "avr",
+                        target_arch = "msp430"
+                    ))
+                )]
+                #[cfg_attr(
+                    not(portable_atomic_no_cfg_target_has_atomic),
+                    cfg(any(
+                        target_has_atomic = "ptr",
+                        portable_atomic_unsafe_assume_single_core,
+                        target_arch = "avr",
+                        target_arch = "msp430"
+                    ))
+                )]
+                #[inline]
+                pub fn and(&self, val: $int_type, order: Ordering) {
+                    self.inner.and(val, order);
+                }
+            }
+
             doc_comment! {
                 concat!("Bitwise \"nand\" with the current value.
 
@@ -2720,6 +3113,61 @@ assert_eq!(foo.load(Ordering::SeqCst), 0b111111);
                 }
             }
 
+            doc_comment! {
+                concat!("Bitwise \"or\" with the current value.
+
+Performs a bitwise \"or\" operation on the current value and the argument `val`, and
+sets the new value to the result.
+
+Unlike `fetch_or`, this does not return the previous value.
+
+`or` takes an [`Ordering`] argument which describes the memory ordering
+of this operation. All ordering modes are possible. Note that using
+[`Acquire`] makes the store part of this operation [`Relaxed`], and
+using [`Release`] makes the load part [`Relaxed`].
+
+This function may generate more efficient code than `fetch_or` on some platforms.
+
+- x86: `lock or` instead of `cmpxchg` loop
+- MSP430: `or` instead of disabling interrupts
+
+Note: On x86, the use of either function should not usually
+affect the generated code, because LLVM can properly optimize the case
+where the result is unused.
+
+# Examples
+
+```
+use portable_atomic::{", stringify!($atomic_type), ", Ordering};
+
+let foo = ", stringify!($atomic_type), "::new(0b101101);
+assert_eq!(foo.fetch_or(0b110011, Ordering::SeqCst), 0b101101);
+assert_eq!(foo.load(Ordering::SeqCst), 0b111111);
+```"),
+                #[cfg_attr(
+                    portable_atomic_no_cfg_target_has_atomic,
+                    cfg(any(
+                        not(portable_atomic_no_atomic_cas),
+                        portable_atomic_unsafe_assume_single_core,
+                        target_arch = "avr",
+                        target_arch = "msp430"
+                    ))
+                )]
+                #[cfg_attr(
+                    not(portable_atomic_no_cfg_target_has_atomic),
+                    cfg(any(
+                        target_has_atomic = "ptr",
+                        portable_atomic_unsafe_assume_single_core,
+                        target_arch = "avr",
+                        target_arch = "msp430"
+                    ))
+                )]
+                #[inline]
+                pub fn or(&self, val: $int_type, order: Ordering) {
+                    self.inner.or(val, order);
+                }
+            }
+
             doc_comment! {
                 concat!("Bitwise \"xor\" with the current value.
 
@@ -2766,6 +3214,61 @@ assert_eq!(foo.load(Ordering::SeqCst), 0b011110);
                 }
             }
 
+            doc_comment! {
+                concat!("Bitwise \"xor\" with the current value.
+
+Performs a bitwise \"xor\" operation on the current value and the argument `val`, and
+sets the new value to the result.
+
+Unlike `fetch_xor`, this does not return the previous value.
+
+`xor` takes an [`Ordering`] argument which describes the memory ordering
+of this operation. All ordering modes are possible. Note that using
+[`Acquire`] makes the store part of this operation [`Relaxed`], and
+using [`Release`] makes the load part [`Relaxed`].
+
+This function may generate more efficient code than `fetch_xor` on some platforms.
+
+- x86: `lock xor` instead of `cmpxchg` loop
+- MSP430: `xor` instead of disabling interrupts
+
+Note: On x86, the use of either function should not usually
+affect the generated code, because LLVM can properly optimize the case
+where the result is unused.
+
+# Examples
+
+```
+use portable_atomic::{", stringify!($atomic_type), ", Ordering};
+
+let foo = ", stringify!($atomic_type), "::new(0b101101);
+foo.xor(0b110011, Ordering::SeqCst);
+assert_eq!(foo.load(Ordering::SeqCst), 0b011110);
+```"),
+                #[cfg_attr(
+                    portable_atomic_no_cfg_target_has_atomic,
+                    cfg(any(
+                        not(portable_atomic_no_atomic_cas),
+                        portable_atomic_unsafe_assume_single_core,
+                        target_arch = "avr",
+                        target_arch = "msp430"
+                    ))
+                )]
+                #[cfg_attr(
+                    not(portable_atomic_no_cfg_target_has_atomic),
+                    cfg(any(
+                        target_has_atomic = "ptr",
+                        portable_atomic_unsafe_assume_single_core,
+                        target_arch = "avr",
+                        target_arch = "msp430"
+                    ))
+                )]
+                #[inline]
+                pub fn xor(&self, val: $int_type, order: Ordering) {
+                    self.inner.xor(val, order);
+                }
+            }
+
             doc_comment! {
                 concat!("Fetches the value, and applies a function to it that returns an optional
 new value. Returns a `Result` of `Ok(previous_value)` if the function returned `Some(_)`, else
diff --git a/src/tests/helper.rs b/src/tests/helper.rs
index ec7cf563..91a8e7eb 100644
--- a/src/tests/helper.rs
+++ b/src/tests/helper.rs
@@ -280,6 +280,19 @@ macro_rules! __test_atomic_int {
             }
         }
         #[test]
+        fn add() {
+            let a = <$atomic_type>::new(0);
+            test_swap_ordering(|order| a.add(0, order));
+            for order in swap_orderings() {
+                let a = <$atomic_type>::new(0);
+                a.add(10, order);
+                assert_eq!(a.load(Ordering::Relaxed), 10);
+                let a = <$atomic_type>::new($int_type::MAX);
+                a.add(1, order);
+                assert_eq!(a.load(Ordering::Relaxed), $int_type::MAX.wrapping_add(1));
+            }
+        }
+        #[test]
         fn fetch_sub() {
             let a = <$atomic_type>::new(20);
             test_swap_ordering(|order| a.fetch_sub(0, order));
@@ -293,6 +306,19 @@ macro_rules! __test_atomic_int {
             }
         }
         #[test]
+        fn sub() {
+            let a = <$atomic_type>::new(20);
+            test_swap_ordering(|order| a.sub(0, order));
+            for order in swap_orderings() {
+                let a = <$atomic_type>::new(20);
+                a.sub(10, order);
+                assert_eq!(a.load(Ordering::Relaxed), 10);
+                let a = <$atomic_type>::new($int_type::MIN);
+                a.sub(1, order);
+                assert_eq!(a.load(Ordering::Relaxed), $int_type::MIN.wrapping_sub(1));
+            }
+        }
+        #[test]
         fn fetch_and() {
             let a = <$atomic_type>::new(0b101101);
             test_swap_ordering(|order| a.fetch_and(0b101101, order));
@@ -303,6 +329,16 @@ macro_rules! __test_atomic_int {
             }
         }
         #[test]
+        fn and() {
+            let a = <$atomic_type>::new(0b101101);
+            test_swap_ordering(|order| a.and(0b101101, order));
+            for order in swap_orderings() {
+                let a = <$atomic_type>::new(0b101101);
+                a.and(0b110011, order);
+                assert_eq!(a.load(Ordering::Relaxed), 0b100001);
+            }
+        }
+        #[test]
         fn fetch_nand() {
             let a = <$atomic_type>::new(0x13);
             test_swap_ordering(|order| a.fetch_nand(0x31, order));
@@ -323,6 +359,16 @@ macro_rules! __test_atomic_int {
             }
         }
         #[test]
+        fn or() {
+            let a = <$atomic_type>::new(0b101101);
+            test_swap_ordering(|order| a.or(0, order));
+            for order in swap_orderings() {
+                let a = <$atomic_type>::new(0b101101);
+                a.or(0b110011, order);
+                assert_eq!(a.load(Ordering::Relaxed), 0b111111);
+            }
+        }
+        #[test]
         fn fetch_xor() {
             let a = <$atomic_type>::new(0b101101);
             test_swap_ordering(|order| a.fetch_xor(0, order));
@@ -333,6 +379,16 @@ macro_rules! __test_atomic_int {
             }
         }
         #[test]
+        fn xor() {
+            let a = <$atomic_type>::new(0b101101);
+            test_swap_ordering(|order| a.xor(0, order));
+            for order in swap_orderings() {
+                let a = <$atomic_type>::new(0b101101);
+                a.xor(0b110011, order);
+                assert_eq!(a.load(Ordering::Relaxed), 0b011110);
+            }
+        }
+        #[test]
         fn fetch_max() {
             let a = <$atomic_type>::new(23);
             test_swap_ordering(|order| a.fetch_max(23, order));
@@ -897,6 +953,22 @@ macro_rules! __test_atomic_bool {
             }
         }
         #[test]
+        fn and() {
+            let a = <$atomic_type>::new(true);
+            test_swap_ordering(|order| a.and(true, order));
+            for order in swap_orderings() {
+                let a = <$atomic_type>::new(true);
+                a.and(false, order);
+                assert_eq!(a.load(Ordering::Relaxed), false);
+                let a = <$atomic_type>::new(true);
+                a.and(true, order);
+                assert_eq!(a.load(Ordering::Relaxed), true);
+                let a = <$atomic_type>::new(false);
+                a.and(false, order);
+                assert_eq!(a.load(Ordering::Relaxed), false);
+            }
+        }
+        #[test]
         fn fetch_nand() {
             let a = <$atomic_type>::new(true);
             test_swap_ordering(|order| assert_eq!(a.fetch_nand(false, order), true));
@@ -930,6 +1002,22 @@ macro_rules! __test_atomic_bool {
             }
         }
         #[test]
+        fn or() {
+            let a = <$atomic_type>::new(true);
+            test_swap_ordering(|order| a.or(false, order));
+            for order in swap_orderings() {
+                let a = <$atomic_type>::new(true);
+                a.or(false, order);
+                assert_eq!(a.load(Ordering::Relaxed), true);
+                let a = <$atomic_type>::new(true);
+                a.or(true, order);
+                assert_eq!(a.load(Ordering::Relaxed), true);
+                let a = <$atomic_type>::new(false);
+                a.or(false, order);
+                assert_eq!(a.load(Ordering::Relaxed), false);
+            }
+        }
+        #[test]
         fn fetch_xor() {
             let a = <$atomic_type>::new(true);
             test_swap_ordering(|order| assert_eq!(a.fetch_xor(false, order), true));
@@ -945,6 +1033,22 @@ macro_rules! __test_atomic_bool {
                 assert_eq!(a.load(Ordering::Relaxed), false);
             }
         }
+        #[test]
+        fn xor() {
+            let a = <$atomic_type>::new(true);
+            test_swap_ordering(|order| a.xor(false, order));
+            for order in swap_orderings() {
+                let a = <$atomic_type>::new(true);
+                a.xor(false, order);
+                assert_eq!(a.load(Ordering::Relaxed), true);
+                let a = <$atomic_type>::new(true);
+                a.xor(true, order);
+                assert_eq!(a.load(Ordering::Relaxed), false);
+                let a = <$atomic_type>::new(false);
+                a.xor(false, order);
+                assert_eq!(a.load(Ordering::Relaxed), false);
+            }
+        }
         ::quickcheck::quickcheck! {
             fn quickcheck_swap(x: bool, y: bool) -> bool {
                 for order in swap_orderings() {
diff --git a/src/utils.rs b/src/utils.rs
index 662b57f3..b77739f5 100644
--- a/src/utils.rs
+++ b/src/utils.rs
@@ -186,6 +186,51 @@ macro_rules! ifunc {
     }};
 }
 
+// We do not provide `nand` because it cannot be optimized on neither x86 nor MSP430.
+// https://godbolt.org/z/x88voWGov
+macro_rules! no_fetch_ops_impl {
+    ($atomic_type:ident, bool) => {
+        impl $atomic_type {
+            #[inline]
+            pub(crate) fn and(&self, val: bool, order: Ordering) {
+                self.fetch_and(val, order);
+            }
+            #[inline]
+            pub(crate) fn or(&self, val: bool, order: Ordering) {
+                self.fetch_or(val, order);
+            }
+            #[inline]
+            pub(crate) fn xor(&self, val: bool, order: Ordering) {
+                self.fetch_xor(val, order);
+            }
+        }
+    };
+    ($atomic_type:ident, $value_type:ident) => {
+        impl $atomic_type {
+            #[inline]
+            pub(crate) fn add(&self, val: $value_type, order: Ordering) {
+                self.fetch_add(val, order);
+            }
+            #[inline]
+            pub(crate) fn sub(&self, val: $value_type, order: Ordering) {
+                self.fetch_sub(val, order);
+            }
+            #[inline]
+            pub(crate) fn and(&self, val: $value_type, order: Ordering) {
+                self.fetch_and(val, order);
+            }
+            #[inline]
+            pub(crate) fn or(&self, val: $value_type, order: Ordering) {
+                self.fetch_or(val, order);
+            }
+            #[inline]
+            pub(crate) fn xor(&self, val: $value_type, order: Ordering) {
+                self.fetch_xor(val, order);
+            }
+        }
+    };
+}
+
 pub(crate) struct NoRefUnwindSafe(UnsafeCell<()>);
 // SAFETY: this is a marker type and we'll never access the value.
 unsafe impl Sync for NoRefUnwindSafe {}

From 5fdde5429cd902676cda8056a0c414912d34dcf5 Mon Sep 17 00:00:00 2001
From: Taiki Endo <te316e89@gmail.com>
Date: Tue, 18 Oct 2022 00:29:37 +0900
Subject: [PATCH 2/3] interrupt: Optimize restore on AVR

---
 src/imp/interrupt/avr.rs | 31 +++++++++++++------------------
 1 file changed, 13 insertions(+), 18 deletions(-)

diff --git a/src/imp/interrupt/avr.rs b/src/imp/interrupt/avr.rs
index cb91283a..5181bcd5 100644
--- a/src/imp/interrupt/avr.rs
+++ b/src/imp/interrupt/avr.rs
@@ -4,11 +4,11 @@
 use core::arch::asm;
 
 #[derive(Clone, Copy)]
-pub(super) struct WasEnabled(bool);
+pub(super) struct State(u8);
 
 /// Disables interrupts and returns the previous interrupt state.
 #[inline]
-pub(super) fn disable() -> WasEnabled {
+pub(super) fn disable() -> State {
     let sreg: u8;
     // SAFETY: reading the status register (SREG) and disabling interrupts are safe.
     // (see module-level comments of interrupt/mod.rs on the safety of using privileged instructions)
@@ -25,28 +25,23 @@ pub(super) fn disable() -> WasEnabled {
         );
         #[cfg(portable_atomic_no_asm)]
         {
-            llvm_asm!("in $0,0x3F" :"=r"(sreg) ::: "volatile");
+            llvm_asm!("in $0, 0x3F" : "=r"(sreg) ::: "volatile");
             llvm_asm!("cli" ::: "memory" : "volatile");
         }
     }
-    // I (Global Interrupt Enable) bit (1 << 7)
-    WasEnabled(sreg & 0x80 != 0)
+    State(sreg)
 }
 
 /// Restores the previous interrupt state.
 #[inline]
-pub(super) unsafe fn restore(WasEnabled(was_enabled): WasEnabled) {
-    if was_enabled {
-        // SAFETY: the caller must guarantee that the state was retrieved by the previous `disable`,
-        // and we've checked that interrupts were enabled before disabling interrupts.
-        unsafe {
-            // Do not use `nomem` and `readonly` because prevent preceding memory accesses from being reordered after interrupts are enabled.
-            // Do not use `preserves_flags` because SEI modifies the I bit of the status register (SREG).
-            // Refs: https://ww1.microchip.com/downloads/en/DeviceDoc/AVR-InstructionSet-Manual-DS40002198.pdf#page=127
-            #[cfg(not(portable_atomic_no_asm))]
-            asm!("sei", options(nostack));
-            #[cfg(portable_atomic_no_asm)]
-            llvm_asm!("sei" ::: "memory" : "volatile");
-        }
+pub(super) unsafe fn restore(State(sreg): State) {
+    // SAFETY: the caller must guarantee that the state was retrieved by the previous `disable`,
+    unsafe {
+        // Do not use `nomem` and `readonly` because prevent preceding memory accesses from being reordered after interrupts are enabled.
+        // Do not use `preserves_flags` because OUT modifies the status register (SREG).
+        #[cfg(not(portable_atomic_no_asm))]
+        asm!("out 0x3F, {0}", in(reg) sreg, options(nostack));
+        #[cfg(portable_atomic_no_asm)]
+        llvm_asm!("out 0x3F, $0" :: "r"(sreg) : "memory" : "volatile");
     }
 }

From edbca26b7363fdad9442afa643076469873b8219 Mon Sep 17 00:00:00 2001
From: Taiki Endo <te316e89@gmail.com>
Date: Fri, 9 Dec 2022 22:48:58 +0900
Subject: [PATCH 3/3] interrupt: Optimize restore on MSP430

---
 src/imp/interrupt/armv4t.rs |  2 ++
 src/imp/interrupt/avr.rs    |  2 ++
 src/imp/interrupt/msp430.rs | 36 +++++++++++++++++++-----------------
 3 files changed, 23 insertions(+), 17 deletions(-)

diff --git a/src/imp/interrupt/armv4t.rs b/src/imp/interrupt/armv4t.rs
index 4010946c..0ef044a7 100644
--- a/src/imp/interrupt/armv4t.rs
+++ b/src/imp/interrupt/armv4t.rs
@@ -51,6 +51,8 @@ pub(super) fn disable() -> State {
 pub(super) unsafe fn restore(State(cpsr): State) {
     // SAFETY: the caller must guarantee that the state was retrieved by the previous `disable`,
     unsafe {
+        // This clobbers the entire CPSR. See msp430.rs to safety on this.
+        //
         // Do not use `nomem` and `readonly` because prevent preceding memory accesses from being reordered after interrupts are enabled.
         asm!("msr cpsr_c, {0}", in(reg) cpsr, options(nostack));
     }
diff --git a/src/imp/interrupt/avr.rs b/src/imp/interrupt/avr.rs
index 5181bcd5..19725eab 100644
--- a/src/imp/interrupt/avr.rs
+++ b/src/imp/interrupt/avr.rs
@@ -37,6 +37,8 @@ pub(super) fn disable() -> State {
 pub(super) unsafe fn restore(State(sreg): State) {
     // SAFETY: the caller must guarantee that the state was retrieved by the previous `disable`,
     unsafe {
+        // This clobbers the entire status register. See msp430.rs to safety on this.
+        //
         // Do not use `nomem` and `readonly` because prevent preceding memory accesses from being reordered after interrupts are enabled.
         // Do not use `preserves_flags` because OUT modifies the status register (SREG).
         #[cfg(not(portable_atomic_no_asm))]
diff --git a/src/imp/interrupt/msp430.rs b/src/imp/interrupt/msp430.rs
index 89b516c6..35e0017d 100644
--- a/src/imp/interrupt/msp430.rs
+++ b/src/imp/interrupt/msp430.rs
@@ -6,11 +6,11 @@ use core::arch::asm;
 pub(super) use super::super::msp430 as atomic;
 
 #[derive(Clone, Copy)]
-pub(super) struct WasEnabled(bool);
+pub(super) struct State(u16);
 
 /// Disables interrupts and returns the previous interrupt state.
 #[inline]
-pub(super) fn disable() -> WasEnabled {
+pub(super) fn disable() -> State {
     let r: u16;
     // SAFETY: reading the status register and disabling interrupts are safe.
     // (see module-level comments of interrupt/mod.rs on the safety of using privileged instructions)
@@ -31,24 +31,26 @@ pub(super) fn disable() -> WasEnabled {
             llvm_asm!("dint { nop" ::: "memory" : "volatile");
         }
     }
-    // GIE (global interrupt enable) bit (1 << 3)
-    WasEnabled(r & 0x8 != 0)
+    State(r)
 }
 
 /// Restores the previous interrupt state.
 #[inline]
-pub(super) unsafe fn restore(WasEnabled(was_enabled): WasEnabled) {
-    if was_enabled {
-        // SAFETY: the caller must guarantee that the state was retrieved by the previous `disable`,
-        // and we've checked that interrupts were enabled before disabling interrupts.
-        unsafe {
-            // Do not use `nomem` and `readonly` because prevent preceding memory accesses from being reordered after interrupts are enabled.
-            // Do not use `preserves_flags` because EINT modifies the GIE (global interrupt enable) bit of the status register.
-            // Refs: http://mspgcc.sourceforge.net/manual/x951.html
-            #[cfg(not(portable_atomic_no_asm))]
-            asm!("nop {{ eint {{ nop", options(nostack));
-            #[cfg(portable_atomic_no_asm)]
-            llvm_asm!("nop { eint { nop" ::: "memory" : "volatile");
-        }
+pub(super) unsafe fn restore(State(r): State) {
+    // SAFETY: the caller must guarantee that the state was retrieved by the previous `disable`,
+    unsafe {
+        // This clobbers the entire status register, but we never explicitly modify
+        // flags within a critical session, and the only flags that may be changed
+        // within a critical session are the arithmetic flags that are changed as
+        // a side effect of arithmetic operations, etc., which LLVM recognizes,
+        // so it is safe to clobber them here.
+        // See also the discussion at https://github.com/taiki-e/portable-atomic/pull/40.
+        //
+        // Do not use `nomem` and `readonly` because prevent preceding memory accesses from being reordered after interrupts are enabled.
+        // Do not use `preserves_flags` because MOV modifies the status register.
+        #[cfg(not(portable_atomic_no_asm))]
+        asm!("nop {{ mov {0}, R2 {{ nop", in(reg) r, options(nostack));
+        #[cfg(portable_atomic_no_asm)]
+        llvm_asm!("nop { mov $0, R2 { nop" :: "r"(r) : "memory" : "volatile");
     }
 }