taiki-e · taiki-e · Jul 8, 2022 · Jul 8, 2022
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -43,6 +43,8 @@ jobs:
             target: armv5te-unknown-linux-gnueabi
           - rust: nightly
             target: i686-unknown-linux-gnu
+          - rust: nightly
+            target: riscv64gc-unknown-linux-gnu
           - rust: nightly
             target: s390x-unknown-linux-gnu
     runs-on: ${{ matrix.os || 'ubuntu-latest' }}
@@ -66,14 +68,12 @@ jobs:
         env:
           CARGO_PROFILE_RELEASE_CODEGEN_UNITS: 1
           CARGO_PROFILE_RELEASE_LTO: fat
-      - run: cargo hack build -vv --workspace --ignore-private --feature-powerset --optional-deps --no-dev-deps
-      - run: cargo hack build -vv --workspace --ignore-private --feature-powerset --optional-deps --no-dev-deps
-        env:
-          RUSTFLAGS: ${{ env.RUSTFLAGS }} --cfg atomic_memcpy_unsafe_volatile
       - run: tools/build.sh
         if: matrix.target == ''
-      - run: cargo minimal-versions build -vv --workspace --all-features --ignore-private
+      - run: cargo minimal-versions build -vv --workspace --ignore-private
         if: startsWith(matrix.rust, 'nightly')
+      - run: cargo minimal-versions build -vv --workspace --all-features --ignore-private
+        if: startsWith(matrix.rust, 'nightly') && (matrix.target == '' || startsWith(matrix.target, 'aarch64') || startsWith(matrix.target, 'i586') || startsWith(matrix.target, 'i686') || startsWith(matrix.target, 'riscv'))
 
   msrv:
     runs-on: ubuntu-latest

diff --git a/Cargo.toml b/Cargo.toml
@@ -12,6 +12,8 @@ Byte-wise atomic memcpy.
 """
 
 [package.metadata.docs.rs]
+all-features = true
+rustdoc-args = ["--cfg", "docsrs"]
 targets = ["x86_64-unknown-linux-gnu"]
 
 [workspace]
@@ -28,6 +30,9 @@ members = [
 # Note that it is useless to enable this in most cases.
 inline-always = []
 
+[dependencies]
+atomic-maybe-uninit = { version = "0.2.10", optional = true }
+
 [target.'cfg(target_os = "none")'.dependencies]
 portable-atomic = { version = "0.3.6", default-features = false }
 

diff --git a/README.md b/README.md
@@ -21,6 +21,17 @@ See [P1478R1][p1478r1] for more.
 - If the type being copied contains pointers it is not compatible with strict provenance because the copy does ptr-to-int transmutes.
 - If the type being copied contains uninitialized bytes (e.g., padding) [it is undefined behavior because the copy goes through integers][undefined-behavior]. This problem will probably not be resolved until something like `AtomicMaybeUninit` is supported.
 
+## Optional features
+
+- **`atomic-maybe-uninit`**
+
+  Support copying types containing uninitialized bytes.
+
+  Note:
+  - This feature is only available on some platforms. See [the documentation of atomic-maybe-uninit](https://github.com/taiki-e/atomic-maybe-uninit#platform-support) crate for more.
+  - Enabling this feature increases the MSRV to Rust 1.59.
+  - This feature is not compatible with [Miri](https://github.com/rust-lang/miri/issues/1045) or [Sanitizer](https://github.com/google/sanitizers/issues/192) as of 2022-03-11 since it uses an inline assembly.
+
 ## Related Projects
 
 - [portable-atomic]: Portable atomic types including support for 128-bit atomics, atomic float, etc. Using byte-wise atomic memcpy to implement Seqlock, which is used in the fallback implementation.

diff --git a/src/lib.rs b/src/lib.rs
@@ -66,6 +66,7 @@ See [P1478R1][p1478r1] for more.
     clippy::single_match_else,
     clippy::too_many_lines
 )]
+#![cfg_attr(docsrs, feature(doc_cfg))]
 
 // This crate should work on targets with power-of-two pointer widths,
 // but it is not clear how it will work on targets without them.
@@ -206,6 +207,87 @@ pub unsafe fn atomic_store<T>(dst: *mut T, val: T, order: Ordering) {
     }
 }
 
+/// Byte-wise atomic load.
+///
+/// # Safety
+///
+/// Except that types containing uninitialized bytes are allowed, this function
+/// has the same safety requirements as [`atomic_load`].
+/// See [the documentation of `atomic_load`](atomic_load#safety) for more.
+///
+/// # Panics
+///
+/// Panics if `order` is [`Release`](Ordering::Release) or [`AcqRel`](Ordering::AcqRel).
+///
+/// # Examples
+///
+/// ```rust
+/// use std::{cell::UnsafeCell, sync::atomic::Ordering};
+///
+/// let v = UnsafeCell::new([0_u8; 64]);
+/// let result = unsafe { atomic_memcpy::atomic_load_maybe_uninit(v.get(), Ordering::Acquire) };
+/// // SAFETY: there was no concurrent write operations during load.
+/// assert_eq!(unsafe { result.assume_init() }, [0; 64]);
+/// ```
+#[cfg(feature = "atomic-maybe-uninit")]
+#[cfg_attr(docsrs, doc(cfg(feature = "atomic-maybe-uninit")))]
+#[cfg_attr(feature = "inline-always", inline(always))]
+#[cfg_attr(not(feature = "inline-always"), inline)]
+pub unsafe fn atomic_load_maybe_uninit<T>(
+    src: *const T,
+    order: Ordering,
+) -> core::mem::MaybeUninit<T> {
+    assert_load_ordering(order);
+    // SAFETY: the caller must uphold the safety contract for `atomic_load_maybe_uninit`.
+    let val = unsafe { maybe_uninit::atomic_load(src) };
+    match order {
+        Ordering::Relaxed => { /* no-op */ }
+        _ => atomic::fence(order),
+    }
+    val
+}
+
+/// Byte-wise atomic store.
+///
+/// # Safety
+///
+/// Except that types containing uninitialized bytes are allowed, this function
+/// has the same safety requirements as [`atomic_store`].
+/// See [the documentation of `atomic_store`](atomic_store#safety) for more.
+///
+/// # Panics
+///
+/// Panics if `order` is [`Acquire`](Ordering::Acquire) or [`AcqRel`](Ordering::AcqRel).
+///
+/// # Examples
+///
+/// ```rust
+/// use std::{cell::UnsafeCell, sync::atomic::Ordering};
+///
+/// let v = UnsafeCell::new([0_u8; 64]);
+/// unsafe {
+///     atomic_memcpy::atomic_store_maybe_uninit(v.get(), [1; 64], Ordering::Release);
+/// }
+/// let result = unsafe { atomic_memcpy::atomic_load_maybe_uninit(v.get(), Ordering::Acquire) };
+/// // SAFETY: there was no concurrent write operations during load.
+/// assert_eq!(unsafe { result.assume_init() }, [1; 64]);
+/// ```
+#[cfg(feature = "atomic-maybe-uninit")]
+#[cfg_attr(docsrs, doc(cfg(feature = "atomic-maybe-uninit")))]
+#[cfg_attr(feature = "inline-always", inline(always))]
+#[cfg_attr(not(feature = "inline-always"), inline)]
+pub unsafe fn atomic_store_maybe_uninit<T>(dst: *mut T, val: T, order: Ordering) {
+    assert_store_ordering(order);
+    match order {
+        Ordering::Relaxed => { /* no-op */ }
+        _ => atomic::fence(order),
+    }
+    // SAFETY: the caller must uphold the safety contract for `atomic_store_maybe_uninit`.
+    unsafe {
+        maybe_uninit::atomic_store(dst, val);
+    }
+}
+
 // https://github.com/rust-lang/rust/blob/7b68106ffb71f853ea32f0e0dc0785d9d647cbbf/library/core/src/sync/atomic.rs#L2624
 #[cfg_attr(feature = "inline-always", inline(always))]
 #[cfg_attr(not(feature = "inline-always"), inline)]
@@ -230,6 +312,237 @@ fn assert_store_ordering(order: Ordering) {
     }
 }
 
+#[cfg(feature = "atomic-maybe-uninit")]
+mod maybe_uninit {
+    use core::{
+        mem::{self, ManuallyDrop, MaybeUninit},
+        ops::Range,
+        sync::atomic::Ordering,
+    };
+
+    use atomic_maybe_uninit::raw::{AtomicLoad, AtomicStore};
+
+    #[cfg_attr(feature = "inline-always", inline(always))]
+    #[cfg_attr(not(feature = "inline-always"), inline)]
+    pub(crate) unsafe fn atomic_load<T>(src: *const T) -> MaybeUninit<T> {
+        // Safety requirements guaranteed by the caller:
+        // - `src` is valid for atomic reads.
+        // - `src` is properly aligned for `T`.
+        // - `src` go through `UnsafeCell::get`.
+        // - there are no concurrent non-atomic write operations.
+        // - there are no concurrent atomic write operations of different granularity.
+        // Note that the safety of the code in this function relies on these guarantees,
+        // whether or not they are explicitly mentioned in the each safety comment.
+        debug_assert!(!src.is_null());
+        debug_assert!(src as usize % mem::align_of::<T>() == 0);
+
+        let mut result = MaybeUninit::<T>::uninit();
+
+        if mem::size_of::<T>() == 0 {
+            return result;
+        }
+
+        // If the alignment of `T` is greater than or equal to usize,
+        // we can read it as a chunk of usize.
+        if mem::align_of::<T>() >= mem::align_of::<usize>() {
+            let src = src as *const MaybeUninit<usize>;
+            let dst = result.as_mut_ptr() as *mut MaybeUninit<usize>;
+            for i in range(0..mem::size_of::<T>() / mem::size_of::<usize>()) {
+                // SAFETY:
+                // - the caller must guarantee that `src` is properly aligned for `T`.
+                // - `T` has an alignment greater than or equal to usize.
+                // - the remaining bytes is greater than or equal to `size_of::<usize>()`.
+                unsafe {
+                    usize::atomic_load(src.add(i), dst.add(i), Ordering::Relaxed);
+                }
+            }
+            return result;
+        }
+
+        // If the alignment of `T` is greater than or equal to u32,
+        // we can read it as a chunk of u32.
+        if mem::size_of::<usize>() > 4 && mem::align_of::<T>() >= mem::align_of::<u32>() {
+            let src = src as *const MaybeUninit<u32>;
+            let dst = result.as_mut_ptr() as *mut MaybeUninit<u32>;
+            for i in range(0..mem::size_of::<T>() / mem::size_of::<u32>()) {
+                // SAFETY:
+                // - the caller must guarantee that `src` is properly aligned for `T`.
+                // - `T` has an alignment greater than or equal to u32.
+                // - the remaining bytes is greater than or equal to `size_of::<u32>()`.
+                unsafe {
+                    u32::atomic_load(src.add(i), dst.add(i), Ordering::Relaxed);
+                }
+            }
+            return result;
+        }
+
+        // If the alignment of `T` is greater than or equal to u16,
+        // we can read it as a chunk of u16.
+        if mem::size_of::<usize>() > 2 && mem::align_of::<T>() >= mem::align_of::<u16>() {
+            let src = src as *const MaybeUninit<u16>;
+            let dst = result.as_mut_ptr() as *mut MaybeUninit<u16>;
+            for i in range(0..mem::size_of::<T>() / mem::size_of::<u16>()) {
+                // SAFETY:
+                // - the caller must guarantee that `src` is properly aligned for `T`.
+                // - `T` has an alignment greater than or equal to u16.
+                // - the remaining bytes is greater than or equal to `size_of::<u16>()`.
+                unsafe {
+                    u16::atomic_load(src.add(i), dst.add(i), Ordering::Relaxed);
+                }
+            }
+            return result;
+        }
+
+        // Otherwise, we read it per byte.
+        let src = src as *const MaybeUninit<u8>;
+        let dst = result.as_mut_ptr() as *mut MaybeUninit<u8>;
+        for i in range(0..mem::size_of::<T>()) {
+            // SAFETY:
+            // - the remaining bytes is greater than or equal to 1.
+            unsafe {
+                u8::atomic_load(src.add(i), dst.add(i), Ordering::Relaxed);
+            }
+        }
+        result
+    }
+
+    #[cfg_attr(feature = "inline-always", inline(always))]
+    #[cfg_attr(not(feature = "inline-always"), inline)]
+    pub(crate) unsafe fn atomic_store<T>(dst: *mut T, val: T) {
+        // Safety requirements guaranteed by the caller:
+        // - `dst` is valid for atomic writes.
+        // - `dst` is properly aligned for `T`.
+        // - `dst` go through `UnsafeCell::get`.
+        // - there are no concurrent non-atomic operations.
+        // - there are no concurrent atomic operations of different granularity.
+        // - if there are concurrent atomic write operations, `T` is valid for all bit patterns.
+        // Note that the safety of the code in this function relies on these guarantees,
+        // whether or not they are explicitly mentioned in the each safety comment.
+        debug_assert!(!dst.is_null());
+        debug_assert!(dst as usize % mem::align_of::<T>() == 0);
+
+        //　In atomic_store, the panic *after* the first store operation is unsound
+        // because dst may become an invalid bit pattern.
+        //
+        // Our code is written very carefully so as not to cause panic, but we
+        // will use additional guards just in case.
+        //
+        // Note:
+        // - If the compiler can understand at compile time that panic will
+        //   never occur, this guard will be removed (as with no-panic).
+        // - atomic_load does not modify the data, so it does not have this requirement.
+        // - If an invalid ordering is passed, it will be panic *before* the
+        //   first store operation, so is fine.
+        let guard = PanicGuard;
+
+        let val = ManuallyDrop::new(val); // Do not drop `val`.
+
+        if mem::size_of::<T>() == 0 {
+            mem::forget(guard);
+            return;
+        }
+
+        // If the alignment of `T` is greater than or equal to usize,
+        // we can write it as a chunk of usize.
+        if mem::align_of::<T>() >= mem::align_of::<usize>() {
+            let src = (&*val as *const T).cast::<MaybeUninit<usize>>();
+            let dst = dst.cast::<MaybeUninit<usize>>();
+            for i in range(0..mem::size_of::<T>() / mem::size_of::<usize>()) {
+                // SAFETY:
+                // - the caller must guarantee that `dst` is properly aligned for `T`.
+                // - `T` has an alignment greater than or equal to usize.
+                // - the remaining bytes is greater than or equal to `size_of::<usize>()`.
+                unsafe {
+                    usize::atomic_store(dst.add(i), src.add(i), Ordering::Relaxed);
+                }
+            }
+            mem::forget(guard);
+            return;
+        }
+
+        // If the alignment of `T` is greater than or equal to u32,
+        // we can write it as a chunk of u32.
+        if mem::size_of::<usize>() > 4 && mem::align_of::<T>() >= mem::align_of::<u32>() {
+            let src = (&*val as *const T).cast::<MaybeUninit<u32>>();
+            let dst = dst.cast::<MaybeUninit<u32>>();
+            for i in range(0..mem::size_of::<T>() / mem::size_of::<u32>()) {
+                // SAFETY:
+                // - the caller must guarantee that `dst` is properly aligned for `T`.
+                // - `T` has an alignment greater than or equal to u32.
+                // - the remaining bytes is greater than or equal to `size_of::<u32>()`.
+                unsafe {
+                    u32::atomic_store(dst.add(i), src.add(i), Ordering::Relaxed);
+                }
+            }
+            mem::forget(guard);
+            return;
+        }
+
+        // If the alignment of `T` is greater than or equal to u16,
+        // we can write it as a chunk of u16.
+        if mem::size_of::<usize>() > 2 && mem::align_of::<T>() >= mem::align_of::<u16>() {
+            let src = (&*val as *const T).cast::<MaybeUninit<u16>>();
+            let dst = dst.cast::<MaybeUninit<u16>>();
+            for i in range(0..mem::size_of::<T>() / mem::size_of::<u16>()) {
+                // SAFETY:
+                // - the caller must guarantee that `dst` is properly aligned for `T`.
+                // - `T` has an alignment greater than or equal to u16.
+                // - the remaining bytes is greater than or equal to `size_of::<u16>()`.
+                unsafe {
+                    u16::atomic_store(dst.add(i), src.add(i), Ordering::Relaxed);
+                }
+            }
+            mem::forget(guard);
+            return;
+        }
+
+        // Otherwise, we write it per byte.
+        let src = (&*val as *const T).cast::<MaybeUninit<u8>>();
+        let dst = dst.cast::<MaybeUninit<u8>>();
+        for i in range(0..mem::size_of::<T>()) {
+            // SAFETY:
+            // - the caller must guarantee that `dst` is properly aligned for `T`.
+            // - `T` has an alignment greater than or equal to u16.
+            // - the remaining bytes is greater than or equal to `size_of::<u16>()`.
+            unsafe {
+                u8::atomic_store(dst.add(i), src.add(i), Ordering::Relaxed);
+            }
+        }
+        mem::forget(guard);
+    }
+
+    // This allows read_volatile and atomic_load to be lowered to exactly the
+    // same assembly on little endian platforms such as aarch64, riscv64.
+    #[cfg_attr(feature = "inline-always", inline(always))]
+    #[cfg_attr(not(feature = "inline-always"), inline)]
+    #[cfg(target_endian = "little")]
+    fn range<T>(r: Range<T>) -> core::iter::Rev<Range<T>>
+    where
+        Range<T>: DoubleEndedIterator,
+    {
+        r.rev()
+    }
+    #[cfg_attr(feature = "inline-always", inline(always))]
+    #[cfg_attr(not(feature = "inline-always"), inline)]
+    #[cfg(target_endian = "big")]
+    fn range<T>(r: Range<T>) -> Range<T>
+    where
+        Range<T>: DoubleEndedIterator,
+    {
+        r
+    }
+
+    struct PanicGuard;
+
+    impl Drop for PanicGuard {
+        fn drop(&mut self) {
+            // This crate supports no-std environment, so we cannot use std::process::abort.
+            // Instead, it uses the nature of double panics being converted to an abort.
+            panic!("abort");
+        }
+    }
+}
+
 /// There is `cfg(atomic_memcpy_unsafe_volatile)` to force the use of volatile
 /// read/write instead of atomic load/store.
 /// Note that the use of `--cfg atomic_memcpy_unsafe_volatile` is