From 2a707eedb459369687ffdb2183ee82fabaa5d97a Mon Sep 17 00:00:00 2001 From: Brian Smith Date: Thu, 13 Mar 2025 11:09:24 -0700 Subject: [PATCH] race: Relax `compare_exchange` success ordering from `AcqRel` to `Release`. See the analogous change in https://github.com/rust-lang/rust/pull/131746 and the discussion in https://github.com/matklad/once_cell/issues/220. What is the effect of this change? Not much, because before we ever execute the `compare_exchange`, we do a load with `Ordering::Acquire`; the `compare_exchange` is in the `#[cold]` path already. Thus, this just mostly clarifies our expectations. See the non-doc comment added under the module's doc comment for the reasoning. How does this change the code gen? Consider this analogous example: ```diff #[no_mangle] fn foo1(y: &mut i32) -> bool { - let r = X.compare_exchange(0, 1, Ordering::AcqRel, Ordering::Acquire).is_ok(); + let r = X.compare_exchange(0, 1, Ordering::Release, Ordering::Acquire).is_ok(); r } ``` On x86_64, there is no change. Here is the generated code before and after: ``` foo1: mov rcx, qword ptr [rip + example::X::h9e1b81da80078af7@GOTPCREL] mov edx, 1 xor eax, eax lock cmpxchg dword ptr [rcx], edx sete al ret example::X::h9e1b81da80078af7: .zero 4 ``` On AArch64, regardless of whether atomics are outlined or not, there is no change. Here is the generated code with inlined atomics: ``` foo1: adrp x8, :got:example::X::h40b04fb69d714de3 ldr x8, [x8, :got_lo12:example::X::h40b04fb69d714de3] .LBB0_1: ldaxr w9, [x8] cbnz w9, .LBB0_4 mov w0, #1 stlxr w9, w0, [x8] cbnz w9, .LBB0_1 ret .LBB0_4: mov w0, wzr clrex ret example::X::h40b04fb69d714de3: .zero 4 ``` For 32-bit ARMv7, with inlined atomics, the resulting diff in the object code is: ```diff @@ -10,14 +10,13 @@ mov r0, #1 strex r2, r0, [r1] cmp r2, #0 - beq .LBB0_5 + bxeq lr ldrex r0, [r1] cmp r0, #0 beq .LBB0_2 .LBB0_4: - mov r0, #0 clrex -.LBB0_5: + mov r0, #0 dmb ish bx lr .LCPI0_0: @@ -54,4 +53,3 @@ example::X::h47e2038445e1c648: .zero 4 ``` --- CHANGELOG.md | 3 +++ Cargo.lock.msrv | 2 +- Cargo.toml | 2 +- src/race.rs | 20 ++++++++++++++------ 4 files changed, 19 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f80d90d..92e2d4d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +## 1.21.2 +- Relax success ordering from AcqRel to Release in `race`: [#278](https://github.com/matklad/once_cell/pull/278). + ## 1.21.1 - Reduce MSRV to 1.65: [#277](https://github.com/matklad/once_cell/pull/277). diff --git a/Cargo.lock.msrv b/Cargo.lock.msrv index 3dc9f9e..9c90a43 100644 --- a/Cargo.lock.msrv +++ b/Cargo.lock.msrv @@ -43,7 +43,7 @@ checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" [[package]] name = "once_cell" -version = "1.21.1" +version = "1.21.2" dependencies = [ "critical-section", "parking_lot_core", diff --git a/Cargo.toml b/Cargo.toml index 0ae49a1..b395717 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "once_cell" -version = "1.21.1" +version = "1.21.2" authors = ["Aleksey Kladov "] license = "MIT OR Apache-2.0" edition = "2021" diff --git a/src/race.rs b/src/race.rs index 838b841..27014d6 100644 --- a/src/race.rs +++ b/src/race.rs @@ -19,6 +19,14 @@ //! `Acquire` and `Release` have very little performance overhead on most //! architectures versus `Relaxed`. +// The "atomic orderings" section of the documentation above promises +// "happens-before" semantics. This drives the choice of orderings in the uses +// of `compare_exchange` below. On success, the value was zero/null, so there +// was nothing to acquire (there is never any `Ordering::Release` store of 0). +// On failure, the value was nonzero, so it was initialized previously (perhaps +// on another thread) using `Ordering::Release`, so we must use +// `Ordering::Acquire` to ensure that store "happens-before" this load. + #[cfg(not(feature = "portable-atomic"))] use core::sync::atomic; #[cfg(feature = "portable-atomic")] @@ -98,7 +106,7 @@ impl OnceNonZeroUsize { #[inline] pub fn set(&self, value: NonZeroUsize) -> Result<(), ()> { let exchange = - self.inner.compare_exchange(0, value.get(), Ordering::AcqRel, Ordering::Acquire); + self.inner.compare_exchange(0, value.get(), Ordering::Release, Ordering::Acquire); match exchange { Ok(_) => Ok(()), Err(_) => Err(()), @@ -144,7 +152,7 @@ impl OnceNonZeroUsize { #[inline(never)] fn init(&self, f: impl FnOnce() -> Result) -> Result { let mut val = f()?.get(); - let exchange = self.inner.compare_exchange(0, val, Ordering::AcqRel, Ordering::Acquire); + let exchange = self.inner.compare_exchange(0, val, Ordering::Release, Ordering::Acquire); if let Err(old) = exchange { val = old; } @@ -258,7 +266,7 @@ impl<'a, T> OnceRef<'a, T> { pub fn set(&self, value: &'a T) -> Result<(), ()> { let ptr = value as *const T as *mut T; let exchange = - self.inner.compare_exchange(ptr::null_mut(), ptr, Ordering::AcqRel, Ordering::Acquire); + self.inner.compare_exchange(ptr::null_mut(), ptr, Ordering::Release, Ordering::Acquire); match exchange { Ok(_) => Ok(()), Err(_) => Err(()), @@ -301,7 +309,7 @@ impl<'a, T> OnceRef<'a, T> { let exchange = self.inner.compare_exchange( ptr::null_mut(), ptr, - Ordering::AcqRel, + Ordering::Release, Ordering::Acquire, ); if let Err(old) = exchange { @@ -396,7 +404,7 @@ mod once_box { let exchange = self.inner.compare_exchange( ptr::null_mut(), ptr, - Ordering::AcqRel, + Ordering::Release, Ordering::Acquire, ); if exchange.is_err() { @@ -442,7 +450,7 @@ mod once_box { let exchange = self.inner.compare_exchange( ptr::null_mut(), ptr, - Ordering::AcqRel, + Ordering::Release, Ordering::Acquire, ); if let Err(old) = exchange {