diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 97d2d1e..1a44a1c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -43,6 +43,8 @@ jobs: target: armv5te-unknown-linux-gnueabi - rust: nightly target: i686-unknown-linux-gnu + - rust: nightly + target: riscv64gc-unknown-linux-gnu - rust: nightly target: s390x-unknown-linux-gnu runs-on: ${{ matrix.os || 'ubuntu-latest' }} @@ -66,14 +68,12 @@ jobs: env: CARGO_PROFILE_RELEASE_CODEGEN_UNITS: 1 CARGO_PROFILE_RELEASE_LTO: fat - - run: cargo hack build -vv --workspace --ignore-private --feature-powerset --optional-deps --no-dev-deps - - run: cargo hack build -vv --workspace --ignore-private --feature-powerset --optional-deps --no-dev-deps - env: - RUSTFLAGS: ${{ env.RUSTFLAGS }} --cfg atomic_memcpy_unsafe_volatile - run: tools/build.sh if: matrix.target == '' - - run: cargo minimal-versions build -vv --workspace --all-features --ignore-private + - run: cargo minimal-versions build -vv --workspace --ignore-private if: startsWith(matrix.rust, 'nightly') + - run: cargo minimal-versions build -vv --workspace --all-features --ignore-private + if: startsWith(matrix.rust, 'nightly') && (matrix.target == '' || startsWith(matrix.target, 'aarch64') || startsWith(matrix.target, 'i586') || startsWith(matrix.target, 'i686') || startsWith(matrix.target, 'riscv')) msrv: runs-on: ubuntu-latest diff --git a/Cargo.toml b/Cargo.toml index 80b5354..c01527e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,8 @@ Byte-wise atomic memcpy. """ [package.metadata.docs.rs] +all-features = true +rustdoc-args = ["--cfg", "docsrs"] targets = ["x86_64-unknown-linux-gnu"] [workspace] @@ -28,6 +30,9 @@ members = [ # Note that it is useless to enable this in most cases. inline-always = [] +[dependencies] +atomic-maybe-uninit = { version = "0.2.10", optional = true } + [target.'cfg(target_os = "none")'.dependencies] portable-atomic = { version = "0.3.6", default-features = false } diff --git a/README.md b/README.md index f351013..001055e 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,17 @@ See [P1478R1][p1478r1] for more. - If the type being copied contains pointers it is not compatible with strict provenance because the copy does ptr-to-int transmutes. - If the type being copied contains uninitialized bytes (e.g., padding) [it is undefined behavior because the copy goes through integers][undefined-behavior]. This problem will probably not be resolved until something like `AtomicMaybeUninit` is supported. +## Optional features + +- **`atomic-maybe-uninit`** + + Support copying types containing uninitialized bytes. + + Note: + - This feature is only available on some platforms. See [the documentation of atomic-maybe-uninit](https://github.com/taiki-e/atomic-maybe-uninit#platform-support) crate for more. + - Enabling this feature increases the MSRV to Rust 1.59. + - This feature is not compatible with [Miri](https://github.com/rust-lang/miri/issues/1045) or [Sanitizer](https://github.com/google/sanitizers/issues/192) as of 2022-03-11 since it uses an inline assembly. + ## Related Projects - [portable-atomic]: Portable atomic types including support for 128-bit atomics, atomic float, etc. Using byte-wise atomic memcpy to implement Seqlock, which is used in the fallback implementation. diff --git a/src/lib.rs b/src/lib.rs index 5b112d5..55bfe62 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -66,6 +66,7 @@ See [P1478R1][p1478r1] for more. clippy::single_match_else, clippy::too_many_lines )] +#![cfg_attr(docsrs, feature(doc_cfg))] // This crate should work on targets with power-of-two pointer widths, // but it is not clear how it will work on targets without them. @@ -206,6 +207,87 @@ pub unsafe fn atomic_store(dst: *mut T, val: T, order: Ordering) { } } +/// Byte-wise atomic load. +/// +/// # Safety +/// +/// Except that types containing uninitialized bytes are allowed, this function +/// has the same safety requirements as [`atomic_load`]. +/// See [the documentation of `atomic_load`](atomic_load#safety) for more. +/// +/// # Panics +/// +/// Panics if `order` is [`Release`](Ordering::Release) or [`AcqRel`](Ordering::AcqRel). +/// +/// # Examples +/// +/// ```rust +/// use std::{cell::UnsafeCell, sync::atomic::Ordering}; +/// +/// let v = UnsafeCell::new([0_u8; 64]); +/// let result = unsafe { atomic_memcpy::atomic_load_maybe_uninit(v.get(), Ordering::Acquire) }; +/// // SAFETY: there was no concurrent write operations during load. +/// assert_eq!(unsafe { result.assume_init() }, [0; 64]); +/// ``` +#[cfg(feature = "atomic-maybe-uninit")] +#[cfg_attr(docsrs, doc(cfg(feature = "atomic-maybe-uninit")))] +#[cfg_attr(feature = "inline-always", inline(always))] +#[cfg_attr(not(feature = "inline-always"), inline)] +pub unsafe fn atomic_load_maybe_uninit( + src: *const T, + order: Ordering, +) -> core::mem::MaybeUninit { + assert_load_ordering(order); + // SAFETY: the caller must uphold the safety contract for `atomic_load_maybe_uninit`. + let val = unsafe { maybe_uninit::atomic_load(src) }; + match order { + Ordering::Relaxed => { /* no-op */ } + _ => atomic::fence(order), + } + val +} + +/// Byte-wise atomic store. +/// +/// # Safety +/// +/// Except that types containing uninitialized bytes are allowed, this function +/// has the same safety requirements as [`atomic_store`]. +/// See [the documentation of `atomic_store`](atomic_store#safety) for more. +/// +/// # Panics +/// +/// Panics if `order` is [`Acquire`](Ordering::Acquire) or [`AcqRel`](Ordering::AcqRel). +/// +/// # Examples +/// +/// ```rust +/// use std::{cell::UnsafeCell, sync::atomic::Ordering}; +/// +/// let v = UnsafeCell::new([0_u8; 64]); +/// unsafe { +/// atomic_memcpy::atomic_store_maybe_uninit(v.get(), [1; 64], Ordering::Release); +/// } +/// let result = unsafe { atomic_memcpy::atomic_load_maybe_uninit(v.get(), Ordering::Acquire) }; +/// // SAFETY: there was no concurrent write operations during load. +/// assert_eq!(unsafe { result.assume_init() }, [1; 64]); +/// ``` +#[cfg(feature = "atomic-maybe-uninit")] +#[cfg_attr(docsrs, doc(cfg(feature = "atomic-maybe-uninit")))] +#[cfg_attr(feature = "inline-always", inline(always))] +#[cfg_attr(not(feature = "inline-always"), inline)] +pub unsafe fn atomic_store_maybe_uninit(dst: *mut T, val: T, order: Ordering) { + assert_store_ordering(order); + match order { + Ordering::Relaxed => { /* no-op */ } + _ => atomic::fence(order), + } + // SAFETY: the caller must uphold the safety contract for `atomic_store_maybe_uninit`. + unsafe { + maybe_uninit::atomic_store(dst, val); + } +} + // https://github.com/rust-lang/rust/blob/7b68106ffb71f853ea32f0e0dc0785d9d647cbbf/library/core/src/sync/atomic.rs#L2624 #[cfg_attr(feature = "inline-always", inline(always))] #[cfg_attr(not(feature = "inline-always"), inline)] @@ -230,6 +312,237 @@ fn assert_store_ordering(order: Ordering) { } } +#[cfg(feature = "atomic-maybe-uninit")] +mod maybe_uninit { + use core::{ + mem::{self, ManuallyDrop, MaybeUninit}, + ops::Range, + sync::atomic::Ordering, + }; + + use atomic_maybe_uninit::raw::{AtomicLoad, AtomicStore}; + + #[cfg_attr(feature = "inline-always", inline(always))] + #[cfg_attr(not(feature = "inline-always"), inline)] + pub(crate) unsafe fn atomic_load(src: *const T) -> MaybeUninit { + // Safety requirements guaranteed by the caller: + // - `src` is valid for atomic reads. + // - `src` is properly aligned for `T`. + // - `src` go through `UnsafeCell::get`. + // - there are no concurrent non-atomic write operations. + // - there are no concurrent atomic write operations of different granularity. + // Note that the safety of the code in this function relies on these guarantees, + // whether or not they are explicitly mentioned in the each safety comment. + debug_assert!(!src.is_null()); + debug_assert!(src as usize % mem::align_of::() == 0); + + let mut result = MaybeUninit::::uninit(); + + if mem::size_of::() == 0 { + return result; + } + + // If the alignment of `T` is greater than or equal to usize, + // we can read it as a chunk of usize. + if mem::align_of::() >= mem::align_of::() { + let src = src as *const MaybeUninit; + let dst = result.as_mut_ptr() as *mut MaybeUninit; + for i in range(0..mem::size_of::() / mem::size_of::()) { + // SAFETY: + // - the caller must guarantee that `src` is properly aligned for `T`. + // - `T` has an alignment greater than or equal to usize. + // - the remaining bytes is greater than or equal to `size_of::()`. + unsafe { + usize::atomic_load(src.add(i), dst.add(i), Ordering::Relaxed); + } + } + return result; + } + + // If the alignment of `T` is greater than or equal to u32, + // we can read it as a chunk of u32. + if mem::size_of::() > 4 && mem::align_of::() >= mem::align_of::() { + let src = src as *const MaybeUninit; + let dst = result.as_mut_ptr() as *mut MaybeUninit; + for i in range(0..mem::size_of::() / mem::size_of::()) { + // SAFETY: + // - the caller must guarantee that `src` is properly aligned for `T`. + // - `T` has an alignment greater than or equal to u32. + // - the remaining bytes is greater than or equal to `size_of::()`. + unsafe { + u32::atomic_load(src.add(i), dst.add(i), Ordering::Relaxed); + } + } + return result; + } + + // If the alignment of `T` is greater than or equal to u16, + // we can read it as a chunk of u16. + if mem::size_of::() > 2 && mem::align_of::() >= mem::align_of::() { + let src = src as *const MaybeUninit; + let dst = result.as_mut_ptr() as *mut MaybeUninit; + for i in range(0..mem::size_of::() / mem::size_of::()) { + // SAFETY: + // - the caller must guarantee that `src` is properly aligned for `T`. + // - `T` has an alignment greater than or equal to u16. + // - the remaining bytes is greater than or equal to `size_of::()`. + unsafe { + u16::atomic_load(src.add(i), dst.add(i), Ordering::Relaxed); + } + } + return result; + } + + // Otherwise, we read it per byte. + let src = src as *const MaybeUninit; + let dst = result.as_mut_ptr() as *mut MaybeUninit; + for i in range(0..mem::size_of::()) { + // SAFETY: + // - the remaining bytes is greater than or equal to 1. + unsafe { + u8::atomic_load(src.add(i), dst.add(i), Ordering::Relaxed); + } + } + result + } + + #[cfg_attr(feature = "inline-always", inline(always))] + #[cfg_attr(not(feature = "inline-always"), inline)] + pub(crate) unsafe fn atomic_store(dst: *mut T, val: T) { + // Safety requirements guaranteed by the caller: + // - `dst` is valid for atomic writes. + // - `dst` is properly aligned for `T`. + // - `dst` go through `UnsafeCell::get`. + // - there are no concurrent non-atomic operations. + // - there are no concurrent atomic operations of different granularity. + // - if there are concurrent atomic write operations, `T` is valid for all bit patterns. + // Note that the safety of the code in this function relies on these guarantees, + // whether or not they are explicitly mentioned in the each safety comment. + debug_assert!(!dst.is_null()); + debug_assert!(dst as usize % mem::align_of::() == 0); + + // In atomic_store, the panic *after* the first store operation is unsound + // because dst may become an invalid bit pattern. + // + // Our code is written very carefully so as not to cause panic, but we + // will use additional guards just in case. + // + // Note: + // - If the compiler can understand at compile time that panic will + // never occur, this guard will be removed (as with no-panic). + // - atomic_load does not modify the data, so it does not have this requirement. + // - If an invalid ordering is passed, it will be panic *before* the + // first store operation, so is fine. + let guard = PanicGuard; + + let val = ManuallyDrop::new(val); // Do not drop `val`. + + if mem::size_of::() == 0 { + mem::forget(guard); + return; + } + + // If the alignment of `T` is greater than or equal to usize, + // we can write it as a chunk of usize. + if mem::align_of::() >= mem::align_of::() { + let src = (&*val as *const T).cast::>(); + let dst = dst.cast::>(); + for i in range(0..mem::size_of::() / mem::size_of::()) { + // SAFETY: + // - the caller must guarantee that `dst` is properly aligned for `T`. + // - `T` has an alignment greater than or equal to usize. + // - the remaining bytes is greater than or equal to `size_of::()`. + unsafe { + usize::atomic_store(dst.add(i), src.add(i), Ordering::Relaxed); + } + } + mem::forget(guard); + return; + } + + // If the alignment of `T` is greater than or equal to u32, + // we can write it as a chunk of u32. + if mem::size_of::() > 4 && mem::align_of::() >= mem::align_of::() { + let src = (&*val as *const T).cast::>(); + let dst = dst.cast::>(); + for i in range(0..mem::size_of::() / mem::size_of::()) { + // SAFETY: + // - the caller must guarantee that `dst` is properly aligned for `T`. + // - `T` has an alignment greater than or equal to u32. + // - the remaining bytes is greater than or equal to `size_of::()`. + unsafe { + u32::atomic_store(dst.add(i), src.add(i), Ordering::Relaxed); + } + } + mem::forget(guard); + return; + } + + // If the alignment of `T` is greater than or equal to u16, + // we can write it as a chunk of u16. + if mem::size_of::() > 2 && mem::align_of::() >= mem::align_of::() { + let src = (&*val as *const T).cast::>(); + let dst = dst.cast::>(); + for i in range(0..mem::size_of::() / mem::size_of::()) { + // SAFETY: + // - the caller must guarantee that `dst` is properly aligned for `T`. + // - `T` has an alignment greater than or equal to u16. + // - the remaining bytes is greater than or equal to `size_of::()`. + unsafe { + u16::atomic_store(dst.add(i), src.add(i), Ordering::Relaxed); + } + } + mem::forget(guard); + return; + } + + // Otherwise, we write it per byte. + let src = (&*val as *const T).cast::>(); + let dst = dst.cast::>(); + for i in range(0..mem::size_of::()) { + // SAFETY: + // - the caller must guarantee that `dst` is properly aligned for `T`. + // - `T` has an alignment greater than or equal to u16. + // - the remaining bytes is greater than or equal to `size_of::()`. + unsafe { + u8::atomic_store(dst.add(i), src.add(i), Ordering::Relaxed); + } + } + mem::forget(guard); + } + + // This allows read_volatile and atomic_load to be lowered to exactly the + // same assembly on little endian platforms such as aarch64, riscv64. + #[cfg_attr(feature = "inline-always", inline(always))] + #[cfg_attr(not(feature = "inline-always"), inline)] + #[cfg(target_endian = "little")] + fn range(r: Range) -> core::iter::Rev> + where + Range: DoubleEndedIterator, + { + r.rev() + } + #[cfg_attr(feature = "inline-always", inline(always))] + #[cfg_attr(not(feature = "inline-always"), inline)] + #[cfg(target_endian = "big")] + fn range(r: Range) -> Range + where + Range: DoubleEndedIterator, + { + r + } + + struct PanicGuard; + + impl Drop for PanicGuard { + fn drop(&mut self) { + // This crate supports no-std environment, so we cannot use std::process::abort. + // Instead, it uses the nature of double panics being converted to an abort. + panic!("abort"); + } + } +} + /// There is `cfg(atomic_memcpy_unsafe_volatile)` to force the use of volatile /// read/write instead of atomic load/store. /// Note that the use of `--cfg atomic_memcpy_unsafe_volatile` is diff --git a/tests/asm-test/Cargo.toml b/tests/asm-test/Cargo.toml index 3cafb6c..feb0338 100644 --- a/tests/asm-test/Cargo.toml +++ b/tests/asm-test/Cargo.toml @@ -10,6 +10,7 @@ doc = false [features] default = ["anyhow", "duct", "fs-err", "indexmap", "lexopt", "rustc-demangle", "syn"] +atomic-maybe-uninit = ["atomic-memcpy/atomic-maybe-uninit"] [dependencies] atomic-memcpy = { path = "../..", features = ["inline-always"] } diff --git a/tests/asm-test/asm/aarch64-unknown-linux-gnu/atomic_memcpy_load_align1 b/tests/asm-test/asm/aarch64-unknown-linux-gnu/atomic_memcpy_load_align1 index 8a99d2e..eff9d1a 100644 --- a/tests/asm-test/asm/aarch64-unknown-linux-gnu/atomic_memcpy_load_align1 +++ b/tests/asm-test/asm/aarch64-unknown-linux-gnu/atomic_memcpy_load_align1 @@ -1,182 +1,389 @@ asm_test::atomic_memcpy_load_align1::acquire: sub sp, sp, #64 - add x9, x0, #7 - and x10, x9, #0xfffffffffffffff8 - sub x9, x10, x0 - cmp x9, #65 - b.hs .LBB0_5 - cbz x9, .LBB0_6 - add x11, x0, #64 - sub x12, x0, x10 - mov x13, sp - mov x14, x0 -.LBB0_3: - ldrb w15, [x14], #1 - adds x12, x12, #1 - strb w15, [x13], #1 - b.lo .LBB0_3 - sub x10, x11, x10 - cmp x10, #7 - b.hi .LBB0_7 - b .LBB0_9 -.LBB0_5: - ldrb w9, [x0] - strb w9, [sp] - ldrb w9, [x0, #1] - strb w9, [sp, #1] - ldrb w9, [x0, #2] - strb w9, [sp, #2] - ldrb w9, [x0, #3] - strb w9, [sp, #3] - ldrb w9, [x0, #4] - strb w9, [sp, #4] - ldrb w9, [x0, #5] - strb w9, [sp, #5] - ldrb w9, [x0, #6] - strb w9, [sp, #6] - ldrb w9, [x0, #7] - strb w9, [sp, #7] - ldrb w9, [x0, #8] - strb w9, [sp, #8] - ldrb w9, [x0, #9] - strb w9, [sp, #9] - ldrb w9, [x0, #10] - strb w9, [sp, #10] - ldrb w9, [x0, #11] - strb w9, [sp, #11] - ldrb w9, [x0, #12] - strb w9, [sp, #12] - ldrb w9, [x0, #13] - strb w9, [sp, #13] - ldrb w9, [x0, #14] - strb w9, [sp, #14] - ldrb w9, [x0, #15] - strb w9, [sp, #15] - ldrb w9, [x0, #16] - strb w9, [sp, #16] - ldrb w9, [x0, #17] - strb w9, [sp, #17] - ldrb w9, [x0, #18] - strb w9, [sp, #18] - ldrb w9, [x0, #19] - strb w9, [sp, #19] - ldrb w9, [x0, #20] - strb w9, [sp, #20] - ldrb w9, [x0, #21] - strb w9, [sp, #21] - ldrb w9, [x0, #22] - strb w9, [sp, #22] - ldrb w9, [x0, #23] - strb w9, [sp, #23] - ldrb w9, [x0, #24] - strb w9, [sp, #24] - ldrb w9, [x0, #25] - strb w9, [sp, #25] - ldrb w9, [x0, #26] - strb w9, [sp, #26] - ldrb w9, [x0, #27] - strb w9, [sp, #27] - ldrb w9, [x0, #28] - strb w9, [sp, #28] - ldrb w9, [x0, #29] - strb w9, [sp, #29] - ldrb w9, [x0, #30] - strb w9, [sp, #30] - ldrb w9, [x0, #31] - strb w9, [sp, #31] - ldrb w9, [x0, #32] - strb w9, [sp, #32] - ldrb w9, [x0, #33] - strb w9, [sp, #33] - ldrb w9, [x0, #34] - strb w9, [sp, #34] - ldrb w9, [x0, #35] - strb w9, [sp, #35] - ldrb w9, [x0, #36] - strb w9, [sp, #36] - ldrb w9, [x0, #37] - strb w9, [sp, #37] - ldrb w9, [x0, #38] - strb w9, [sp, #38] - ldrb w9, [x0, #39] - strb w9, [sp, #39] - ldrb w9, [x0, #40] - strb w9, [sp, #40] - ldrb w9, [x0, #41] - strb w9, [sp, #41] - ldrb w9, [x0, #42] - strb w9, [sp, #42] - ldrb w9, [x0, #43] - strb w9, [sp, #43] - ldrb w9, [x0, #44] - strb w9, [sp, #44] - ldrb w9, [x0, #45] - strb w9, [sp, #45] - ldrb w9, [x0, #46] - strb w9, [sp, #46] - ldrb w9, [x0, #47] - strb w9, [sp, #47] - ldrb w9, [x0, #48] - strb w9, [sp, #48] - ldrb w9, [x0, #49] - strb w9, [sp, #49] - ldrb w9, [x0, #50] - strb w9, [sp, #50] - ldrb w9, [x0, #51] - strb w9, [sp, #51] - ldrb w9, [x0, #52] - strb w9, [sp, #52] - ldrb w9, [x0, #53] - strb w9, [sp, #53] - ldrb w9, [x0, #54] - strb w9, [sp, #54] - ldrb w9, [x0, #55] - strb w9, [sp, #55] - ldrb w9, [x0, #56] - strb w9, [sp, #56] - ldrb w9, [x0, #57] - strb w9, [sp, #57] - ldrb w9, [x0, #58] - strb w9, [sp, #58] - ldrb w9, [x0, #59] - strb w9, [sp, #59] - ldrb w9, [x0, #60] - strb w9, [sp, #60] - ldrb w9, [x0, #61] - strb w9, [sp, #61] - ldrb w9, [x0, #62] - strb w9, [sp, #62] - ldrb w9, [x0, #63] - ldp q0, q1, [sp] - strb w9, [sp, #63] - ldp q2, q3, [sp, #32] - stp q0, q1, [x8] - stp q2, q3, [x8, #32] - dmb ishld - add sp, sp, #64 - ret -.LBB0_6: - mov w10, #64 -.LBB0_7: - mov x11, sp -.LBB0_8: - ldr x12, [x0, x9] - sub x10, x10, #8 - cmp x10, #7 - str x12, [x11, x9] - add x9, x9, #8 - b.hi .LBB0_8 -.LBB0_9: - cbz x10, .LBB0_12 - mov x11, sp - add x11, x11, x9 - add x9, x0, x9 -.LBB0_11: - ldrb w12, [x9], #1 - subs x10, x10, #1 - strb w12, [x11], #1 - b.ne .LBB0_11 -.LBB0_12: + mov x9, sp + add x10, x0, #63 + add x11, x9, #63 + add x13, x9, #62 + //APP + ldrb w10, [x10] + strb w10, [x11] + //NO_APP + add x12, x0, #62 + add x15, x9, #61 + //APP + ldrb w11, [x12] + strb w11, [x13] + //NO_APP + add x14, x0, #61 + add x10, x0, #60 + add x11, x9, #60 + //APP + ldrb w12, [x14] + strb w12, [x15] + //NO_APP + add x13, x9, #59 + //APP + ldrb w10, [x10] + strb w10, [x11] + //NO_APP + add x12, x0, #59 + add x15, x9, #58 + //APP + ldrb w10, [x12] + strb w10, [x13] + //NO_APP + add x14, x0, #58 + //APP + ldrb w10, [x14] + strb w10, [x15] + //NO_APP + add x11, x9, #57 + add x10, x0, #57 + add x13, x9, #56 + //APP + ldrb w10, [x10] + strb w10, [x11] + //NO_APP + add x12, x0, #56 + add x15, x9, #55 + //APP + ldrb w10, [x12] + strb w10, [x13] + //NO_APP + add x14, x0, #55 + //APP + ldrb w10, [x14] + strb w10, [x15] + //NO_APP + add x10, x0, #54 + add x11, x9, #54 + add x13, x9, #53 + //APP + ldrb w10, [x10] + strb w10, [x11] + //NO_APP + add x12, x0, #53 + add x15, x9, #52 + //APP + ldrb w10, [x12] + strb w10, [x13] + //NO_APP + add x14, x0, #52 + //APP + ldrb w10, [x14] + strb w10, [x15] + //NO_APP + add x11, x9, #51 + add x10, x0, #51 + add x13, x9, #50 + //APP + ldrb w10, [x10] + strb w10, [x11] + //NO_APP + add x12, x0, #50 + add x15, x9, #49 + //APP + ldrb w10, [x12] + strb w10, [x13] + //NO_APP + add x14, x0, #49 + //APP + ldrb w10, [x14] + strb w10, [x15] + //NO_APP + add x10, x0, #48 + add x11, x9, #48 + add x13, x9, #47 + //APP + ldrb w10, [x10] + strb w10, [x11] + //NO_APP + add x12, x0, #47 + add x15, x9, #46 + //APP + ldrb w10, [x12] + strb w10, [x13] + //NO_APP + add x14, x0, #46 + //APP + ldrb w10, [x14] + strb w10, [x15] + //NO_APP + add x11, x9, #45 + add x10, x0, #45 + add x13, x9, #44 + //APP + ldrb w10, [x10] + strb w10, [x11] + //NO_APP + add x12, x0, #44 + add x15, x9, #43 + //APP + ldrb w10, [x12] + strb w10, [x13] + //NO_APP + add x14, x0, #43 + //APP + ldrb w10, [x14] + strb w10, [x15] + //NO_APP + add x10, x0, #42 + add x11, x9, #42 + add x13, x9, #41 + //APP + ldrb w10, [x10] + strb w10, [x11] + //NO_APP + add x12, x0, #41 + add x15, x9, #40 + //APP + ldrb w10, [x12] + strb w10, [x13] + //NO_APP + add x14, x0, #40 + //APP + ldrb w10, [x14] + strb w10, [x15] + //NO_APP + add x11, x9, #39 + add x10, x0, #39 + add x13, x9, #38 + //APP + ldrb w10, [x10] + strb w10, [x11] + //NO_APP + add x12, x0, #38 + add x15, x9, #37 + //APP + ldrb w10, [x12] + strb w10, [x13] + //NO_APP + add x14, x0, #37 + //APP + ldrb w10, [x14] + strb w10, [x15] + //NO_APP + add x10, x0, #36 + add x11, x9, #36 + add x13, x9, #35 + //APP + ldrb w10, [x10] + strb w10, [x11] + //NO_APP + add x12, x0, #35 + add x15, x9, #34 + //APP + ldrb w10, [x12] + strb w10, [x13] + //NO_APP + add x14, x0, #34 + //APP + ldrb w10, [x14] + strb w10, [x15] + //NO_APP + add x11, x9, #33 + add x10, x0, #33 + add x13, x9, #32 + //APP + ldrb w10, [x10] + strb w10, [x11] + //NO_APP + add x12, x0, #32 + add x15, x9, #31 + //APP + ldrb w10, [x12] + strb w10, [x13] + //NO_APP + add x14, x0, #31 + //APP + ldrb w10, [x14] + strb w10, [x15] + //NO_APP + add x10, x0, #30 + add x11, x9, #30 + add x13, x9, #29 + //APP + ldrb w10, [x10] + strb w10, [x11] + //NO_APP + add x12, x0, #29 + add x15, x9, #28 + //APP + ldrb w10, [x12] + strb w10, [x13] + //NO_APP + add x14, x0, #28 + //APP + ldrb w10, [x14] + strb w10, [x15] + //NO_APP + add x11, x9, #27 + add x10, x0, #27 + add x13, x9, #26 + //APP + ldrb w10, [x10] + strb w10, [x11] + //NO_APP + add x12, x0, #26 + add x15, x9, #25 + //APP + ldrb w10, [x12] + strb w10, [x13] + //NO_APP + add x14, x0, #25 + //APP + ldrb w10, [x14] + strb w10, [x15] + //NO_APP + add x10, x0, #24 + add x11, x9, #24 + add x13, x9, #23 + //APP + ldrb w10, [x10] + strb w10, [x11] + //NO_APP + add x12, x0, #23 + add x15, x9, #22 + //APP + ldrb w10, [x12] + strb w10, [x13] + //NO_APP + add x14, x0, #22 + //APP + ldrb w10, [x14] + strb w10, [x15] + //NO_APP + add x11, x9, #21 + add x10, x0, #21 + add x13, x9, #20 + //APP + ldrb w10, [x10] + strb w10, [x11] + //NO_APP + add x12, x0, #20 + add x15, x9, #19 + //APP + ldrb w10, [x12] + strb w10, [x13] + //NO_APP + add x14, x0, #19 + //APP + ldrb w10, [x14] + strb w10, [x15] + //NO_APP + add x10, x0, #18 + add x11, x9, #18 + add x13, x9, #17 + //APP + ldrb w10, [x10] + strb w10, [x11] + //NO_APP + add x12, x0, #17 + add x15, x9, #16 + //APP + ldrb w10, [x12] + strb w10, [x13] + //NO_APP + add x14, x0, #16 + //APP + ldrb w10, [x14] + strb w10, [x15] + //NO_APP + add x11, x9, #15 + add x10, x0, #15 + add x13, x9, #14 + //APP + ldrb w10, [x10] + strb w10, [x11] + //NO_APP + add x12, x0, #14 + add x15, x9, #13 + //APP + ldrb w10, [x12] + strb w10, [x13] + //NO_APP + add x14, x0, #13 + //APP + ldrb w10, [x14] + strb w10, [x15] + //NO_APP + add x10, x0, #12 + add x11, x9, #12 + add x13, x9, #11 + //APP + ldrb w10, [x10] + strb w10, [x11] + //NO_APP + add x12, x0, #11 + add x15, x9, #10 + //APP + ldrb w10, [x12] + strb w10, [x13] + //NO_APP + add x14, x0, #10 + //APP + ldrb w10, [x14] + strb w10, [x15] + //NO_APP + add x11, x9, #9 + add x10, x0, #9 + add x13, x9, #8 + //APP + ldrb w10, [x10] + strb w10, [x11] + //NO_APP + add x12, x0, #8 + orr x15, x9, #0x7 + //APP + ldrb w10, [x12] + strb w10, [x13] + //NO_APP + add x14, x0, #7 + //APP + ldrb w10, [x14] + strb w10, [x15] + //NO_APP + add x10, x0, #6 + orr x11, x9, #0x6 + mov w12, #5 + //APP + ldrb w10, [x10] + strb w10, [x11] + //NO_APP + orr x12, x9, x12 + add x13, x0, #5 + orr x15, x9, #0x4 + //APP + ldrb w10, [x13] + strb w10, [x12] + //NO_APP + add x14, x0, #4 + //APP + ldrb w10, [x14] + strb w10, [x15] + //NO_APP + add x10, x0, #3 + orr x11, x9, #0x3 + orr x13, x9, #0x2 + //APP + ldrb w10, [x10] + strb w10, [x11] + //NO_APP + add x12, x0, #2 + orr x15, x9, #0x1 + //APP + ldrb w10, [x12] + strb w10, [x13] + //NO_APP + add x14, x0, #1 + //APP + ldrb w10, [x14] + strb w10, [x15] + //NO_APP + //APP + ldrb w10, [x0] + strb w10, [x9] + //NO_APP ldp q0, q1, [sp] ldp q2, q3, [sp, #32] stp q0, q1, [x8] diff --git a/tests/asm-test/asm/aarch64-unknown-linux-gnu/atomic_memcpy_load_align16 b/tests/asm-test/asm/aarch64-unknown-linux-gnu/atomic_memcpy_load_align16 index a77dad4..9a1da2f 100644 --- a/tests/asm-test/asm/aarch64-unknown-linux-gnu/atomic_memcpy_load_align16 +++ b/tests/asm-test/asm/aarch64-unknown-linux-gnu/atomic_memcpy_load_align16 @@ -1,17 +1,58 @@ asm_test::atomic_memcpy_load_align16::acquire: - ldr x9, [x0, #56] - ldr x10, [x0, #48] - ldr x11, [x0, #40] - ldr x12, [x0, #32] - ldr x13, [x0, #24] - ldr x14, [x0, #16] - ldr x15, [x0, #8] - ldr x16, [x0] - stp x12, x11, [x8, #32] - stp x14, x13, [x8, #16] - stp x10, x9, [x8, #48] - stp x16, x15, [x8] + sub sp, sp, #64 + mov x9, sp + add x10, x0, #56 + add x11, x9, #56 + add x12, x0, #48 + add x13, x9, #48 + //APP + ldr x10, [x10] + str x10, [x11] + //NO_APP + add x15, x9, #40 + //APP + ldr x12, [x12] + str x12, [x13] + //NO_APP + add x14, x0, #40 + add x10, x0, #32 + add x11, x9, #32 + //APP + ldr x12, [x14] + str x12, [x15] + //NO_APP + //APP + ldr x10, [x10] + str x10, [x11] + //NO_APP + add x12, x0, #24 + add x10, x9, #24 + add x13, x9, #16 + //APP + ldr x12, [x12] + str x12, [x10] + //NO_APP + add x11, x0, #16 + orr x15, x9, #0x8 + //APP + ldr x10, [x11] + str x10, [x13] + //NO_APP + add x14, x0, #8 + //APP + ldr x10, [x14] + str x10, [x15] + //NO_APP + //APP + ldr x10, [x0] + str x10, [x9] + //NO_APP + ldp q0, q1, [sp] + ldp q2, q3, [sp, #32] + stp q0, q1, [x8] + stp q2, q3, [x8, #32] dmb ishld + add sp, sp, #64 ret asm_test::atomic_memcpy_load_align16::read_volatile_acquire_fence: ldr x9, [x0, #56] diff --git a/tests/asm-test/asm/aarch64-unknown-linux-gnu/atomic_memcpy_load_align2 b/tests/asm-test/asm/aarch64-unknown-linux-gnu/atomic_memcpy_load_align2 index b5fc4c8..bbaa0ae 100644 --- a/tests/asm-test/asm/aarch64-unknown-linux-gnu/atomic_memcpy_load_align2 +++ b/tests/asm-test/asm/aarch64-unknown-linux-gnu/atomic_memcpy_load_align2 @@ -1,112 +1,196 @@ asm_test::atomic_memcpy_load_align2::acquire: sub sp, sp, #64 - add x9, x0, #7 - and x10, x9, #0xfffffffffffffff8 - sub x9, x10, x0 - cmp x9, #64 - b.hi .LBB4_5 - cbz x9, .LBB4_6 - add x11, x0, #64 - sub x12, x0, x10 - mov x13, sp - mov x14, x0 -.LBB4_3: - ldrb w15, [x14], #1 - adds x12, x12, #1 - strb w15, [x13], #1 - b.lo .LBB4_3 - sub x10, x11, x10 - cmp x10, #7 - b.hi .LBB4_7 - b .LBB4_9 -.LBB4_5: - ldrh w9, [x0, #62] - strh w9, [sp, #62] - ldrh w9, [x0, #60] - strh w9, [sp, #60] - ldrh w9, [x0, #58] - strh w9, [sp, #58] - ldrh w9, [x0, #56] - strh w9, [sp, #56] - ldrh w9, [x0, #54] - strh w9, [sp, #54] - ldrh w9, [x0, #52] - strh w9, [sp, #52] - ldrh w9, [x0, #50] - strh w9, [sp, #50] - ldrh w9, [x0, #48] - strh w9, [sp, #48] - ldrh w9, [x0, #46] - strh w9, [sp, #46] - ldrh w9, [x0, #44] - strh w9, [sp, #44] - ldrh w9, [x0, #42] - strh w9, [sp, #42] - ldrh w9, [x0, #40] - strh w9, [sp, #40] - ldrh w9, [x0, #38] - strh w9, [sp, #38] - ldrh w9, [x0, #36] - strh w9, [sp, #36] - ldrh w9, [x0, #34] - strh w9, [sp, #34] - ldrh w9, [x0, #32] - strh w9, [sp, #32] - ldrh w9, [x0, #30] - strh w9, [sp, #30] - ldrh w9, [x0, #28] - strh w9, [sp, #28] - ldrh w9, [x0, #26] - strh w9, [sp, #26] - ldrh w9, [x0, #24] - strh w9, [sp, #24] - ldrh w9, [x0, #22] - strh w9, [sp, #22] - ldrh w9, [x0, #20] - strh w9, [sp, #20] - ldrh w9, [x0, #18] - strh w9, [sp, #18] - ldrh w9, [x0, #16] - strh w9, [sp, #16] - ldrh w9, [x0, #14] - strh w9, [sp, #14] - ldrh w9, [x0, #12] - strh w9, [sp, #12] - ldrh w9, [x0, #10] - strh w9, [sp, #10] - ldrh w9, [x0, #8] - strh w9, [sp, #8] - ldrh w9, [x0, #6] - strh w9, [sp, #6] - ldrh w9, [x0, #4] - strh w9, [sp, #4] - ldrh w9, [x0, #2] - strh w9, [sp, #2] - ldrh w9, [x0] - strh w9, [sp] - b .LBB4_12 -.LBB4_6: - mov w10, #64 -.LBB4_7: - mov x11, sp -.LBB4_8: - ldr x12, [x0, x9] - sub x10, x10, #8 - cmp x10, #7 - str x12, [x11, x9] - add x9, x9, #8 - b.hi .LBB4_8 -.LBB4_9: - cbz x10, .LBB4_12 - mov x11, sp - add x11, x11, x9 - add x9, x0, x9 -.LBB4_11: - ldrb w12, [x9], #1 - subs x10, x10, #1 - strb w12, [x11], #1 - b.ne .LBB4_11 -.LBB4_12: + mov x9, sp + add x10, x0, #62 + add x11, x9, #62 + add x12, x0, #60 + add x13, x9, #60 + //APP + ldrh w10, [x10] + strh w10, [x11] + //NO_APP + add x15, x9, #58 + //APP + ldrh w12, [x12] + strh w12, [x13] + //NO_APP + add x14, x0, #58 + add x10, x0, #56 + add x11, x9, #56 + //APP + ldrh w12, [x14] + strh w12, [x15] + //NO_APP + //APP + ldrh w10, [x10] + strh w10, [x11] + //NO_APP + add x12, x0, #54 + add x10, x9, #54 + add x13, x9, #52 + //APP + ldrh w12, [x12] + strh w12, [x10] + //NO_APP + add x11, x0, #52 + add x15, x9, #50 + //APP + ldrh w10, [x11] + strh w10, [x13] + //NO_APP + add x14, x0, #50 + //APP + ldrh w10, [x14] + strh w10, [x15] + //NO_APP + add x10, x0, #48 + add x11, x9, #48 + add x13, x9, #46 + //APP + ldrh w10, [x10] + strh w10, [x11] + //NO_APP + add x12, x0, #46 + add x15, x9, #44 + //APP + ldrh w10, [x12] + strh w10, [x13] + //NO_APP + add x14, x0, #44 + //APP + ldrh w10, [x14] + strh w10, [x15] + //NO_APP + add x11, x9, #42 + add x10, x0, #42 + add x13, x9, #40 + //APP + ldrh w10, [x10] + strh w10, [x11] + //NO_APP + add x12, x0, #40 + add x15, x9, #38 + //APP + ldrh w10, [x12] + strh w10, [x13] + //NO_APP + add x14, x0, #38 + //APP + ldrh w10, [x14] + strh w10, [x15] + //NO_APP + add x10, x0, #36 + add x11, x9, #36 + add x13, x9, #34 + //APP + ldrh w10, [x10] + strh w10, [x11] + //NO_APP + add x12, x0, #34 + add x15, x9, #32 + //APP + ldrh w10, [x12] + strh w10, [x13] + //NO_APP + add x14, x0, #32 + //APP + ldrh w10, [x14] + strh w10, [x15] + //NO_APP + add x11, x9, #30 + add x10, x0, #30 + add x13, x9, #28 + //APP + ldrh w10, [x10] + strh w10, [x11] + //NO_APP + add x12, x0, #28 + add x15, x9, #26 + //APP + ldrh w10, [x12] + strh w10, [x13] + //NO_APP + add x14, x0, #26 + //APP + ldrh w10, [x14] + strh w10, [x15] + //NO_APP + add x10, x0, #24 + add x11, x9, #24 + add x13, x9, #22 + //APP + ldrh w10, [x10] + strh w10, [x11] + //NO_APP + add x12, x0, #22 + add x15, x9, #20 + //APP + ldrh w10, [x12] + strh w10, [x13] + //NO_APP + add x14, x0, #20 + //APP + ldrh w10, [x14] + strh w10, [x15] + //NO_APP + add x11, x9, #18 + add x10, x0, #18 + add x13, x9, #16 + //APP + ldrh w10, [x10] + strh w10, [x11] + //NO_APP + add x12, x0, #16 + add x15, x9, #14 + //APP + ldrh w10, [x12] + strh w10, [x13] + //NO_APP + add x14, x0, #14 + //APP + ldrh w10, [x14] + strh w10, [x15] + //NO_APP + add x10, x0, #12 + add x11, x9, #12 + add x13, x9, #10 + //APP + ldrh w10, [x10] + strh w10, [x11] + //NO_APP + add x12, x0, #10 + add x15, x9, #8 + //APP + ldrh w10, [x12] + strh w10, [x13] + //NO_APP + add x14, x0, #8 + //APP + ldrh w10, [x14] + strh w10, [x15] + //NO_APP + orr x11, x9, #0x6 + add x10, x0, #6 + orr x13, x9, #0x4 + //APP + ldrh w10, [x10] + strh w10, [x11] + //NO_APP + add x12, x0, #4 + orr x15, x9, #0x2 + //APP + ldrh w10, [x12] + strh w10, [x13] + //NO_APP + add x14, x0, #2 + //APP + ldrh w10, [x14] + strh w10, [x15] + //NO_APP + //APP + ldrh w10, [x0] + strh w10, [x9] + //NO_APP ldp q0, q1, [sp] ldp q2, q3, [sp, #32] stp q0, q1, [x8] diff --git a/tests/asm-test/asm/aarch64-unknown-linux-gnu/atomic_memcpy_load_align4 b/tests/asm-test/asm/aarch64-unknown-linux-gnu/atomic_memcpy_load_align4 index c2066d1..706f153 100644 --- a/tests/asm-test/asm/aarch64-unknown-linux-gnu/atomic_memcpy_load_align4 +++ b/tests/asm-test/asm/aarch64-unknown-linux-gnu/atomic_memcpy_load_align4 @@ -1,72 +1,100 @@ asm_test::atomic_memcpy_load_align4::acquire: sub sp, sp, #64 - add x9, x0, #7 - and x10, x9, #0xfffffffffffffff8 - sub x9, x10, x0 - cmp x9, #64 - b.hi .LBB8_5 - cbz x9, .LBB8_6 - add x11, x0, #64 - sub x12, x0, x10 - mov x13, sp - mov x14, x0 -.LBB8_3: - ldrb w15, [x14], #1 - adds x12, x12, #1 - strb w15, [x13], #1 - b.lo .LBB8_3 - sub x10, x11, x10 - cmp x10, #7 - b.hi .LBB8_7 - b .LBB8_9 -.LBB8_5: - ldr w10, [x0, #60] - ldr w9, [x0, #56] - stp w9, w10, [sp, #56] - ldr w10, [x0, #52] - ldr w9, [x0, #48] - stp w9, w10, [sp, #48] - ldr w10, [x0, #44] - ldr w9, [x0, #40] - stp w9, w10, [sp, #40] - ldr w10, [x0, #36] - ldr w9, [x0, #32] - stp w9, w10, [sp, #32] - ldr w10, [x0, #28] - ldr w9, [x0, #24] - stp w9, w10, [sp, #24] - ldr w10, [x0, #20] - ldr w9, [x0, #16] - stp w9, w10, [sp, #16] - ldr w10, [x0, #12] - ldr w9, [x0, #8] - stp w9, w10, [sp, #8] - ldr w10, [x0, #4] - ldr w9, [x0] - stp w9, w10, [sp] - b .LBB8_12 -.LBB8_6: - mov w10, #64 -.LBB8_7: - mov x11, sp -.LBB8_8: - ldr x12, [x0, x9] - sub x10, x10, #8 - cmp x10, #7 - str x12, [x11, x9] - add x9, x9, #8 - b.hi .LBB8_8 -.LBB8_9: - cbz x10, .LBB8_12 - mov x11, sp - add x11, x11, x9 - add x9, x0, x9 -.LBB8_11: - ldrb w12, [x9], #1 - subs x10, x10, #1 - strb w12, [x11], #1 - b.ne .LBB8_11 -.LBB8_12: + mov x9, sp + add x10, x0, #60 + add x11, x9, #60 + add x13, x9, #56 + //APP + ldr w10, [x10] + str w10, [x11] + //NO_APP + add x12, x0, #56 + add x15, x9, #52 + //APP + ldr w11, [x12] + str w11, [x13] + //NO_APP + add x14, x0, #52 + add x10, x0, #48 + add x11, x9, #48 + //APP + ldr w12, [x14] + str w12, [x15] + //NO_APP + add x13, x9, #44 + //APP + ldr w10, [x10] + str w10, [x11] + //NO_APP + add x12, x0, #44 + add x15, x9, #40 + //APP + ldr w10, [x12] + str w10, [x13] + //NO_APP + add x14, x0, #40 + //APP + ldr w10, [x14] + str w10, [x15] + //NO_APP + add x11, x9, #36 + add x10, x0, #36 + add x13, x9, #32 + //APP + ldr w10, [x10] + str w10, [x11] + //NO_APP + add x12, x0, #32 + add x15, x9, #28 + //APP + ldr w10, [x12] + str w10, [x13] + //NO_APP + add x14, x0, #28 + //APP + ldr w10, [x14] + str w10, [x15] + //NO_APP + add x10, x0, #24 + add x11, x9, #24 + add x13, x9, #20 + //APP + ldr w10, [x10] + str w10, [x11] + //NO_APP + add x12, x0, #20 + add x15, x9, #16 + //APP + ldr w10, [x12] + str w10, [x13] + //NO_APP + add x14, x0, #16 + //APP + ldr w10, [x14] + str w10, [x15] + //NO_APP + add x11, x9, #12 + add x10, x0, #12 + add x13, x9, #8 + //APP + ldr w10, [x10] + str w10, [x11] + //NO_APP + add x12, x0, #8 + orr x15, x9, #0x4 + //APP + ldr w10, [x12] + str w10, [x13] + //NO_APP + add x14, x0, #4 + //APP + ldr w10, [x14] + str w10, [x15] + //NO_APP + //APP + ldr w10, [x0] + str w10, [x9] + //NO_APP ldp q0, q1, [sp] ldp q2, q3, [sp, #32] stp q0, q1, [x8] diff --git a/tests/asm-test/asm/aarch64-unknown-linux-gnu/atomic_memcpy_load_align8 b/tests/asm-test/asm/aarch64-unknown-linux-gnu/atomic_memcpy_load_align8 index abb3c28..092a84b 100644 --- a/tests/asm-test/asm/aarch64-unknown-linux-gnu/atomic_memcpy_load_align8 +++ b/tests/asm-test/asm/aarch64-unknown-linux-gnu/atomic_memcpy_load_align8 @@ -1,17 +1,58 @@ asm_test::atomic_memcpy_load_align8::acquire: - ldr x9, [x0, #56] - ldr x10, [x0, #48] - ldr x11, [x0, #40] - ldr x12, [x0, #32] - ldr x13, [x0, #24] - ldr x14, [x0, #16] - ldr x15, [x0, #8] - ldr x16, [x0] - stp x12, x11, [x8, #32] - stp x14, x13, [x8, #16] - stp x10, x9, [x8, #48] - stp x16, x15, [x8] + sub sp, sp, #64 + mov x9, sp + add x10, x0, #56 + add x11, x9, #56 + add x12, x0, #48 + add x13, x9, #48 + //APP + ldr x10, [x10] + str x10, [x11] + //NO_APP + add x15, x9, #40 + //APP + ldr x12, [x12] + str x12, [x13] + //NO_APP + add x14, x0, #40 + add x10, x0, #32 + add x11, x9, #32 + //APP + ldr x12, [x14] + str x12, [x15] + //NO_APP + //APP + ldr x10, [x10] + str x10, [x11] + //NO_APP + add x12, x0, #24 + add x10, x9, #24 + add x13, x9, #16 + //APP + ldr x12, [x12] + str x12, [x10] + //NO_APP + add x11, x0, #16 + add x15, x9, #8 + //APP + ldr x10, [x11] + str x10, [x13] + //NO_APP + add x14, x0, #8 + //APP + ldr x10, [x14] + str x10, [x15] + //NO_APP + //APP + ldr x10, [x0] + str x10, [x9] + //NO_APP + ldp q0, q1, [sp] + ldp q2, q3, [sp, #32] + stp q0, q1, [x8] + stp q2, q3, [x8, #32] dmb ishld + add sp, sp, #64 ret asm_test::atomic_memcpy_load_align8::read_volatile_acquire_fence: ldr x9, [x0, #56] diff --git a/tests/asm-test/asm/aarch64-unknown-linux-gnu/atomic_memcpy_store_align1 b/tests/asm-test/asm/aarch64-unknown-linux-gnu/atomic_memcpy_store_align1 index 595bc25..0502348 100644 --- a/tests/asm-test/asm/aarch64-unknown-linux-gnu/atomic_memcpy_store_align1 +++ b/tests/asm-test/asm/aarch64-unknown-linux-gnu/atomic_memcpy_store_align1 @@ -1,184 +1,402 @@ asm_test::atomic_memcpy_store_align1::release: + sub sp, sp, #128 ldp q0, q1, [x1] - add x8, x0, #7 - and x9, x8, #0xfffffffffffffff8 + add x8, sp, #64 + add x9, x0, #63 + add x10, x8, #63 + add x11, x0, #62 + add x12, x8, #62 + add x13, x0, #61 + add x14, x8, #61 ldp q2, q3, [x1, #32] - stp q0, q1, [sp, #-128]! + stp q0, q1, [sp] stp q2, q3, [sp, #32] - sub x8, x9, x0 dmb ish - cmp x8, #65 ldp q0, q1, [sp] - ldp q2, q3, [sp, #32] stp q0, q1, [sp, #64] - stp q2, q3, [sp, #96] - b.hs .LBB2_5 - cbz x8, .LBB2_6 - add x10, x0, #64 - sub x11, x0, x9 - add x12, sp, #64 - mov x13, x0 -.LBB2_3: - ldrb w14, [x12], #1 - adds x11, x11, #1 - strb w14, [x13], #1 - b.lo .LBB2_3 - sub x9, x10, x9 - cmp x9, #7 - b.hi .LBB2_7 - b .LBB2_9 -.LBB2_5: - ldrb w8, [sp, #64] + ldp q2, q0, [sp, #32] + stp q2, q0, [sp, #96] + //APP + ldrb w10, [x10] + strb w10, [x9] + //NO_APP + //APP + ldrb w9, [x12] + strb w9, [x11] + //NO_APP + add x12, x8, #59 + //APP + ldrb w10, [x14] + strb w10, [x13] + //NO_APP + add x9, x0, #60 + add x10, x8, #60 + add x11, x0, #59 + //APP + ldrb w10, [x10] + strb w10, [x9] + //NO_APP + add x13, x0, #58 + //APP + ldrb w9, [x12] + strb w9, [x11] + //NO_APP + add x14, x8, #58 + //APP + ldrb w9, [x14] + strb w9, [x13] + //NO_APP + add x10, x8, #57 + add x9, x0, #57 + add x11, x0, #56 + //APP + ldrb w10, [x10] + strb w10, [x9] + //NO_APP + add x12, x8, #56 + add x13, x0, #55 + //APP + ldrb w9, [x12] + strb w9, [x11] + //NO_APP + add x14, x8, #55 + //APP + ldrb w9, [x14] + strb w9, [x13] + //NO_APP + add x9, x0, #54 + add x10, x8, #54 + add x11, x0, #53 + //APP + ldrb w10, [x10] + strb w10, [x9] + //NO_APP + add x12, x8, #53 + add x13, x0, #52 + //APP + ldrb w9, [x12] + strb w9, [x11] + //NO_APP + add x14, x8, #52 + //APP + ldrb w9, [x14] + strb w9, [x13] + //NO_APP + add x10, x8, #51 + add x9, x0, #51 + add x11, x0, #50 + //APP + ldrb w10, [x10] + strb w10, [x9] + //NO_APP + add x12, x8, #50 + add x13, x0, #49 + //APP + ldrb w10, [x12] + strb w10, [x11] + //NO_APP + add x14, x8, #49 + add x9, x8, #48 + add x10, x0, #48 + //APP + ldrb w11, [x14] + strb w11, [x13] + //NO_APP + add x11, x0, #47 + add x12, x8, #47 + //APP + ldrb w9, [x9] + strb w9, [x10] + //NO_APP + add x13, x8, #32 + add x9, x0, #46 + add x10, x8, #46 + //APP + ldrb w12, [x12] + strb w12, [x11] + //NO_APP + add x14, x8, #31 + add x11, x0, #45 + add x12, x8, #45 + //APP + ldrb w10, [x10] + strb w10, [x9] + //NO_APP + add x9, x0, #44 + add x10, x8, #44 + //APP + ldrb w12, [x12] + strb w12, [x11] + //NO_APP + add x11, x0, #43 + add x12, x8, #43 + //APP + ldrb w10, [x10] + strb w10, [x9] + //NO_APP + add x9, x0, #42 + add x10, x8, #42 + //APP + ldrb w12, [x12] + strb w12, [x11] + //NO_APP + add x11, x0, #41 + add x12, x8, #41 + //APP + ldrb w10, [x10] + strb w10, [x9] + //NO_APP + add x9, x0, #40 + add x10, x8, #40 + //APP + ldrb w12, [x12] + strb w12, [x11] + //NO_APP + add x11, x0, #39 + add x12, x8, #39 + //APP + ldrb w10, [x10] + strb w10, [x9] + //NO_APP + add x9, x0, #38 + add x10, x8, #38 + //APP + ldrb w12, [x12] + strb w12, [x11] + //NO_APP + add x11, x0, #37 + add x12, x8, #37 + //APP + ldrb w10, [x10] + strb w10, [x9] + //NO_APP + add x9, x0, #36 + add x10, x8, #36 + //APP + ldrb w12, [x12] + strb w12, [x11] + //NO_APP + add x11, x0, #35 + add x12, x8, #35 + //APP + ldrb w10, [x10] + strb w10, [x9] + //NO_APP + add x9, x0, #34 + add x10, x8, #34 + //APP + ldrb w12, [x12] + strb w12, [x11] + //NO_APP + add x11, x0, #33 + add x12, x8, #33 + //APP + ldrb w10, [x10] + strb w10, [x9] + //NO_APP + add x9, x0, #32 + //APP + ldrb w12, [x12] + strb w12, [x11] + //NO_APP + add x10, x0, #31 + //APP + ldrb w11, [x13] + strb w11, [x9] + //NO_APP + //APP + ldrb w9, [x14] + strb w9, [x10] + //NO_APP + add x11, x0, #29 + add x9, x0, #30 + add x10, x8, #30 + //APP + ldrb w10, [x10] + strb w10, [x9] + //NO_APP + add x12, x8, #29 + add x13, x0, #28 + //APP + ldrb w9, [x12] + strb w9, [x11] + //NO_APP + add x14, x8, #28 + //APP + ldrb w9, [x14] + strb w9, [x13] + //NO_APP + add x9, x0, #27 + add x10, x8, #27 + add x11, x0, #26 + //APP + ldrb w10, [x10] + strb w10, [x9] + //NO_APP + add x12, x8, #26 + add x13, x0, #25 + //APP + ldrb w9, [x12] + strb w9, [x11] + //NO_APP + add x14, x8, #25 + //APP + ldrb w9, [x14] + strb w9, [x13] + //NO_APP + add x10, x8, #24 + add x9, x0, #24 + add x11, x0, #23 + //APP + ldrb w10, [x10] + strb w10, [x9] + //NO_APP + add x12, x8, #23 + add x13, x0, #22 + //APP + ldrb w9, [x12] + strb w9, [x11] + //NO_APP + add x14, x8, #22 + //APP + ldrb w9, [x14] + strb w9, [x13] + //NO_APP + add x9, x0, #21 + add x10, x8, #21 + add x11, x0, #20 + //APP + ldrb w10, [x10] + strb w10, [x9] + //NO_APP + add x12, x8, #20 + add x13, x0, #19 + //APP + ldrb w9, [x12] + strb w9, [x11] + //NO_APP + add x14, x8, #19 + //APP + ldrb w9, [x14] + strb w9, [x13] + //NO_APP + add x10, x8, #18 + add x9, x0, #18 + add x11, x0, #17 + //APP + ldrb w10, [x10] + strb w10, [x9] + //NO_APP + add x12, x8, #17 + add x14, x0, #16 + //APP + ldrb w9, [x12] + strb w9, [x11] + //NO_APP + add x13, x8, #16 + //APP + ldrb w9, [x13] + strb w9, [x14] + //NO_APP + add x9, x0, #15 + orr x10, x8, #0xf + add x11, x0, #14 + //APP + ldrb w10, [x10] + strb w10, [x9] + //NO_APP + orr x12, x8, #0xe + mov w13, #13 + add x14, x0, #13 + //APP + ldrb w9, [x12] + strb w9, [x11] + //NO_APP + orr x13, x8, x13 + //APP + ldrb w9, [x13] + strb w9, [x14] + //NO_APP + add x9, x0, #12 + orr x10, x8, #0xc + add x11, x0, #11 + mov w12, #11 + //APP + ldrb w10, [x10] + strb w10, [x9] + //NO_APP + orr x12, x8, x12 + mov w13, #10 + add x14, x0, #10 + //APP + ldrb w9, [x12] + strb w9, [x11] + //NO_APP + orr x13, x8, x13 + //APP + ldrb w9, [x13] + strb w9, [x14] + //NO_APP + add x10, x0, #9 + mov w9, #9 + add x11, x0, #8 + orr x9, x8, x9 + orr x12, x8, #0x8 + //APP + ldrb w9, [x9] + strb w9, [x10] + //NO_APP + add x13, x0, #7 + //APP + ldrb w9, [x12] + strb w9, [x11] + //NO_APP + orr x14, x8, #0x7 + //APP + ldrb w9, [x14] + strb w9, [x13] + //NO_APP + orr x10, x8, #0x6 + add x9, x0, #6 + mov w11, #5 + add x12, x0, #5 + //APP + ldrb w10, [x10] + strb w10, [x9] + //NO_APP + orr x11, x8, x11 + add x13, x0, #4 + //APP + ldrb w9, [x11] + strb w9, [x12] + //NO_APP + orr x14, x8, #0x4 + //APP + ldrb w9, [x14] + strb w9, [x13] + //NO_APP + orr x10, x8, #0x3 + add x9, x0, #3 + add x11, x0, #2 + //APP + ldrb w10, [x10] + strb w10, [x9] + //NO_APP + orr x12, x8, #0x2 + add x13, x0, #1 + //APP + ldrb w9, [x12] + strb w9, [x11] + //NO_APP + orr x14, x8, #0x1 + //APP + ldrb w9, [x14] + strb w9, [x13] + //NO_APP + //APP + ldrb w8, [x8] strb w8, [x0] - ldrb w8, [sp, #65] - strb w8, [x0, #1] - ldrb w8, [sp, #66] - strb w8, [x0, #2] - ldrb w8, [sp, #67] - strb w8, [x0, #3] - ldrb w8, [sp, #68] - strb w8, [x0, #4] - ldrb w8, [sp, #69] - strb w8, [x0, #5] - ldrb w8, [sp, #70] - strb w8, [x0, #6] - ldrb w8, [sp, #71] - strb w8, [x0, #7] - ldrb w8, [sp, #72] - strb w8, [x0, #8] - ldrb w8, [sp, #73] - strb w8, [x0, #9] - ldrb w8, [sp, #74] - strb w8, [x0, #10] - ldrb w8, [sp, #75] - strb w8, [x0, #11] - ldrb w8, [sp, #76] - strb w8, [x0, #12] - ldrb w8, [sp, #77] - strb w8, [x0, #13] - ldrb w8, [sp, #78] - strb w8, [x0, #14] - ldrb w8, [sp, #79] - strb w8, [x0, #15] - ldrb w8, [sp, #80] - strb w8, [x0, #16] - ldrb w8, [sp, #81] - strb w8, [x0, #17] - ldrb w8, [sp, #82] - strb w8, [x0, #18] - ldrb w8, [sp, #83] - strb w8, [x0, #19] - ldrb w8, [sp, #84] - strb w8, [x0, #20] - ldrb w8, [sp, #85] - strb w8, [x0, #21] - ldrb w8, [sp, #86] - strb w8, [x0, #22] - ldrb w8, [sp, #87] - strb w8, [x0, #23] - ldrb w8, [sp, #88] - strb w8, [x0, #24] - ldrb w8, [sp, #89] - strb w8, [x0, #25] - ldrb w8, [sp, #90] - strb w8, [x0, #26] - ldrb w8, [sp, #91] - strb w8, [x0, #27] - ldrb w8, [sp, #92] - strb w8, [x0, #28] - ldrb w8, [sp, #93] - strb w8, [x0, #29] - ldrb w8, [sp, #94] - strb w8, [x0, #30] - ldrb w8, [sp, #95] - strb w8, [x0, #31] - ldrb w8, [sp, #96] - strb w8, [x0, #32] - ldrb w8, [sp, #97] - strb w8, [x0, #33] - ldrb w8, [sp, #98] - strb w8, [x0, #34] - ldrb w8, [sp, #99] - strb w8, [x0, #35] - ldrb w8, [sp, #100] - strb w8, [x0, #36] - ldrb w8, [sp, #101] - strb w8, [x0, #37] - ldrb w8, [sp, #102] - strb w8, [x0, #38] - ldrb w8, [sp, #103] - strb w8, [x0, #39] - ldrb w8, [sp, #104] - strb w8, [x0, #40] - ldrb w8, [sp, #105] - strb w8, [x0, #41] - ldrb w8, [sp, #106] - strb w8, [x0, #42] - ldrb w8, [sp, #107] - strb w8, [x0, #43] - ldrb w8, [sp, #108] - strb w8, [x0, #44] - ldrb w8, [sp, #109] - strb w8, [x0, #45] - ldrb w8, [sp, #110] - strb w8, [x0, #46] - ldrb w8, [sp, #111] - strb w8, [x0, #47] - ldrb w8, [sp, #112] - strb w8, [x0, #48] - ldrb w8, [sp, #113] - strb w8, [x0, #49] - ldrb w8, [sp, #114] - strb w8, [x0, #50] - ldrb w8, [sp, #115] - strb w8, [x0, #51] - ldrb w8, [sp, #116] - strb w8, [x0, #52] - ldrb w8, [sp, #117] - strb w8, [x0, #53] - ldrb w8, [sp, #118] - strb w8, [x0, #54] - ldrb w8, [sp, #119] - strb w8, [x0, #55] - ldrb w8, [sp, #120] - strb w8, [x0, #56] - ldrb w8, [sp, #121] - strb w8, [x0, #57] - ldrb w8, [sp, #122] - strb w8, [x0, #58] - ldrb w8, [sp, #123] - strb w8, [x0, #59] - ldrb w8, [sp, #124] - strb w8, [x0, #60] - ldrb w8, [sp, #125] - strb w8, [x0, #61] - ldrb w8, [sp, #126] - strb w8, [x0, #62] - ldrb w8, [sp, #127] - strb w8, [x0, #63] - b .LBB2_12 -.LBB2_6: - mov w9, #64 -.LBB2_7: - add x10, sp, #64 -.LBB2_8: - ldr x11, [x10, x8] - sub x9, x9, #8 - cmp x9, #7 - str x11, [x0, x8] - add x8, x8, #8 - b.hi .LBB2_8 -.LBB2_9: - cbz x9, .LBB2_12 - add x11, sp, #64 - add x10, x0, x8 - add x8, x11, x8 -.LBB2_11: - ldrb w11, [x8], #1 - subs x9, x9, #1 - strb w11, [x10], #1 - b.ne .LBB2_11 -.LBB2_12: + //NO_APP add sp, sp, #128 ret asm_test::atomic_memcpy_store_align1::write_volatile_release_fence: diff --git a/tests/asm-test/asm/aarch64-unknown-linux-gnu/atomic_memcpy_store_align16 b/tests/asm-test/asm/aarch64-unknown-linux-gnu/atomic_memcpy_store_align16 index 96a045d..2accfc6 100644 --- a/tests/asm-test/asm/aarch64-unknown-linux-gnu/atomic_memcpy_store_align16 +++ b/tests/asm-test/asm/aarch64-unknown-linux-gnu/atomic_memcpy_store_align16 @@ -1,17 +1,62 @@ asm_test::atomic_memcpy_store_align16::release: - ldp x8, x9, [x1] - ldp x10, x11, [x1, #16] - ldp x12, x13, [x1, #48] - ldp x14, x15, [x1, #32] + sub sp, sp, #128 + ldp q0, q1, [x1] + add x8, sp, #64 + add x10, x0, #56 + add x11, x8, #56 + add x9, x8, #48 + add x12, x0, #48 + add x13, x0, #40 + add x14, x8, #40 + ldp q2, q3, [x1, #32] + stp q0, q1, [sp] + stp q2, q3, [sp, #32] dmb ish - str x13, [x0, #56] - str x12, [x0, #48] - str x15, [x0, #40] - str x14, [x0, #32] - str x11, [x0, #24] - str x10, [x0, #16] - str x9, [x0, #8] + ldp q0, q1, [sp] + ldp q2, q3, [sp, #32] + stp q0, q1, [sp, #64] + stp q2, q3, [sp, #96] + //APP + ldr x11, [x11] + str x11, [x10] + //NO_APP + //APP + ldr x9, [x9] + str x9, [x12] + //NO_APP + add x10, x8, #32 + add x9, x0, #32 + //APP + ldr x12, [x14] + str x12, [x13] + //NO_APP + add x12, x0, #24 + add x13, x8, #24 + //APP + ldr x10, [x10] + str x10, [x9] + //NO_APP + add x11, x8, #16 + add x9, x0, #16 + //APP + ldr x13, [x13] + str x13, [x12] + //NO_APP + add x10, x0, #8 + //APP + ldr x11, [x11] + str x11, [x9] + //NO_APP + orr x14, x8, #0x8 + //APP + ldr x9, [x14] + str x9, [x10] + //NO_APP + //APP + ldr x8, [x8] str x8, [x0] + //NO_APP + add sp, sp, #128 ret asm_test::atomic_memcpy_store_align16::write_volatile_release_fence: sub sp, sp, #64 diff --git a/tests/asm-test/asm/aarch64-unknown-linux-gnu/atomic_memcpy_store_align2 b/tests/asm-test/asm/aarch64-unknown-linux-gnu/atomic_memcpy_store_align2 index 1f46a2a..a317b5e 100644 --- a/tests/asm-test/asm/aarch64-unknown-linux-gnu/atomic_memcpy_store_align2 +++ b/tests/asm-test/asm/aarch64-unknown-linux-gnu/atomic_memcpy_store_align2 @@ -1,120 +1,205 @@ asm_test::atomic_memcpy_store_align2::release: ldp q0, q1, [x1] - add x8, x0, #7 - and x9, x8, #0xfffffffffffffff8 ldp q2, q3, [x1, #32] stp q0, q1, [sp, #-128]! stp q2, q3, [sp, #32] - sub x8, x9, x0 + add x8, sp, #64 dmb ish - cmp x8, #64 + add x9, x0, #62 ldp q0, q1, [sp] - ldp q2, q3, [sp, #32] + add x10, x8, #62 + add x11, x0, #60 + add x12, x8, #60 + add x13, x0, #58 + add x14, x8, #46 stp q0, q1, [sp, #64] - stp q2, q3, [sp, #96] - b.hi .LBB6_5 - cbz x8, .LBB6_6 - add x10, x0, #64 - sub x11, x0, x9 - add x12, sp, #64 - mov x13, x0 -.LBB6_3: - ldrb w14, [x12], #1 - adds x11, x11, #1 - strb w14, [x13], #1 - b.lo .LBB6_3 - sub x9, x10, x9 - cmp x9, #7 - b.hi .LBB6_7 - b .LBB6_9 -.LBB6_5: - ldrh w8, [sp, #126] - strh w8, [x0, #62] - ldrh w8, [sp, #124] - strh w8, [x0, #60] - ldrh w8, [sp, #122] - strh w8, [x0, #58] - ldrh w8, [sp, #120] - strh w8, [x0, #56] - ldrh w8, [sp, #118] - strh w8, [x0, #54] - ldrh w8, [sp, #116] - strh w8, [x0, #52] - ldrh w8, [sp, #114] - strh w8, [x0, #50] - ldrh w8, [sp, #112] - strh w8, [x0, #48] - ldrh w8, [sp, #110] - strh w8, [x0, #46] - ldrh w8, [sp, #108] - strh w8, [x0, #44] - ldrh w8, [sp, #106] - strh w8, [x0, #42] - ldrh w8, [sp, #104] - strh w8, [x0, #40] - ldrh w8, [sp, #102] - strh w8, [x0, #38] - ldrh w8, [sp, #100] - strh w8, [x0, #36] - ldrh w8, [sp, #98] - strh w8, [x0, #34] - ldrh w8, [sp, #96] - strh w8, [x0, #32] - ldrh w8, [sp, #94] - strh w8, [x0, #30] - ldrh w8, [sp, #92] - strh w8, [x0, #28] - ldrh w8, [sp, #90] - strh w8, [x0, #26] - ldrh w8, [sp, #88] - strh w8, [x0, #24] - ldrh w8, [sp, #86] - strh w8, [x0, #22] - ldrh w8, [sp, #84] - strh w8, [x0, #20] - ldrh w8, [sp, #82] - strh w8, [x0, #18] - ldrh w8, [sp, #80] - strh w8, [x0, #16] - ldrh w8, [sp, #78] - strh w8, [x0, #14] - ldrh w8, [sp, #76] - strh w8, [x0, #12] - ldrh w8, [sp, #74] - strh w8, [x0, #10] - ldrh w8, [sp, #72] - strh w8, [x0, #8] - ldrh w8, [sp, #70] - strh w8, [x0, #6] - ldrh w8, [sp, #68] - strh w8, [x0, #4] - ldrh w8, [sp, #66] - strh w8, [x0, #2] - ldrh w8, [sp, #64] + ldp q2, q0, [sp, #32] + stp q2, q0, [sp, #96] + //APP + ldrh w10, [x10] + strh w10, [x9] + //NO_APP + add x9, x8, #58 + //APP + ldrh w10, [x12] + strh w10, [x11] + //NO_APP + add x10, x0, #56 + add x11, x8, #56 + //APP + ldrh w9, [x9] + strh w9, [x13] + //NO_APP + add x12, x8, #54 + add x9, x0, #54 + //APP + ldrh w11, [x11] + strh w11, [x10] + //NO_APP + add x10, x0, #52 + add x11, x8, #52 + //APP + ldrh w12, [x12] + strh w12, [x9] + //NO_APP + add x13, x8, #48 + add x9, x0, #50 + add x12, x8, #50 + //APP + ldrh w11, [x11] + strh w11, [x10] + //NO_APP + add x10, x0, #48 + //APP + ldrh w12, [x12] + strh w12, [x9] + //NO_APP + add x11, x0, #46 + //APP + ldrh w9, [x13] + strh w9, [x10] + //NO_APP + //APP + ldrh w9, [x14] + strh w9, [x11] + //NO_APP + add x10, x8, #44 + add x9, x0, #44 + add x11, x0, #42 + //APP + ldrh w10, [x10] + strh w10, [x9] + //NO_APP + add x12, x8, #42 + add x13, x0, #40 + //APP + ldrh w9, [x12] + strh w9, [x11] + //NO_APP + add x14, x8, #40 + //APP + ldrh w9, [x14] + strh w9, [x13] + //NO_APP + add x9, x0, #38 + add x10, x8, #38 + add x11, x0, #36 + //APP + ldrh w10, [x10] + strh w10, [x9] + //NO_APP + add x12, x8, #36 + add x13, x0, #34 + //APP + ldrh w10, [x12] + strh w10, [x11] + //NO_APP + add x14, x8, #34 + add x9, x8, #32 + add x10, x0, #32 + //APP + ldrh w11, [x14] + strh w11, [x13] + //NO_APP + add x12, x8, #30 + add x11, x0, #30 + //APP + ldrh w9, [x9] + strh w9, [x10] + //NO_APP + add x9, x0, #28 + add x10, x8, #28 + //APP + ldrh w12, [x12] + strh w12, [x11] + //NO_APP + add x13, x8, #16 + add x11, x0, #26 + add x12, x8, #26 + //APP + ldrh w10, [x10] + strh w10, [x9] + //NO_APP + orr x14, x8, #0xe + add x9, x0, #24 + add x10, x8, #24 + //APP + ldrh w12, [x12] + strh w12, [x11] + //NO_APP + add x11, x0, #22 + add x12, x8, #22 + //APP + ldrh w10, [x10] + strh w10, [x9] + //NO_APP + add x9, x0, #20 + add x10, x8, #20 + //APP + ldrh w12, [x12] + strh w12, [x11] + //NO_APP + add x11, x0, #18 + add x12, x8, #18 + //APP + ldrh w10, [x10] + strh w10, [x9] + //NO_APP + add x9, x0, #16 + //APP + ldrh w12, [x12] + strh w12, [x11] + //NO_APP + add x10, x0, #14 + //APP + ldrh w11, [x13] + strh w11, [x9] + //NO_APP + //APP + ldrh w9, [x14] + strh w9, [x10] + //NO_APP + mov w11, #10 + add x9, x0, #12 + orr x10, x8, #0xc + add x12, x0, #10 + //APP + ldrh w10, [x10] + strh w10, [x9] + //NO_APP + orr x11, x8, x11 + add x13, x0, #8 + //APP + ldrh w9, [x11] + strh w9, [x12] + //NO_APP + orr x14, x8, #0x8 + //APP + ldrh w9, [x14] + strh w9, [x13] + //NO_APP + orr x10, x8, #0x6 + add x9, x0, #6 + add x11, x0, #4 + //APP + ldrh w10, [x10] + strh w10, [x9] + //NO_APP + orr x12, x8, #0x4 + add x13, x0, #2 + //APP + ldrh w9, [x12] + strh w9, [x11] + //NO_APP + orr x14, x8, #0x2 + //APP + ldrh w9, [x14] + strh w9, [x13] + //NO_APP + //APP + ldrh w8, [x8] strh w8, [x0] - b .LBB6_12 -.LBB6_6: - mov w9, #64 -.LBB6_7: - add x10, sp, #64 -.LBB6_8: - ldr x11, [x10, x8] - sub x9, x9, #8 - cmp x9, #7 - str x11, [x0, x8] - add x8, x8, #8 - b.hi .LBB6_8 -.LBB6_9: - cbz x9, .LBB6_12 - add x11, sp, #64 - add x10, x0, x8 - add x8, x11, x8 -.LBB6_11: - ldrb w11, [x8], #1 - subs x9, x9, #1 - strb w11, [x10], #1 - b.ne .LBB6_11 -.LBB6_12: + //NO_APP add sp, sp, #128 ret asm_test::atomic_memcpy_store_align2::write_volatile_release_fence: diff --git a/tests/asm-test/asm/aarch64-unknown-linux-gnu/atomic_memcpy_store_align4 b/tests/asm-test/asm/aarch64-unknown-linux-gnu/atomic_memcpy_store_align4 index bf792e8..a62c615 100644 --- a/tests/asm-test/asm/aarch64-unknown-linux-gnu/atomic_memcpy_store_align4 +++ b/tests/asm-test/asm/aarch64-unknown-linux-gnu/atomic_memcpy_store_align4 @@ -1,88 +1,109 @@ asm_test::atomic_memcpy_store_align4::release: + sub sp, sp, #128 ldp q0, q1, [x1] - add x8, x0, #7 - and x9, x8, #0xfffffffffffffff8 + add x8, sp, #64 + add x9, x0, #60 + add x10, x8, #60 + add x11, x0, #56 + add x12, x8, #56 + add x13, x0, #52 + add x14, x8, #52 ldp q2, q3, [x1, #32] - stp q0, q1, [sp, #-128]! + stp q0, q1, [sp] stp q2, q3, [sp, #32] - sub x8, x9, x0 dmb ish - cmp x8, #64 ldp q0, q1, [sp] - ldp q2, q3, [sp, #32] stp q0, q1, [sp, #64] - stp q2, q3, [sp, #96] - b.hi .LBB10_5 - cbz x8, .LBB10_6 - add x10, x0, #64 - sub x11, x0, x9 - add x12, sp, #64 - mov x13, x0 -.LBB10_3: - ldrb w14, [x12], #1 - adds x11, x11, #1 - strb w14, [x13], #1 - b.lo .LBB10_3 - sub x9, x10, x9 - cmp x9, #7 - b.hi .LBB10_7 - b .LBB10_9 -.LBB10_5: - ldr w8, [sp, #124] - str w8, [x0, #60] - ldr w8, [sp, #120] - str w8, [x0, #56] - ldr w8, [sp, #116] - str w8, [x0, #52] - ldr w8, [sp, #112] - str w8, [x0, #48] - ldr w8, [sp, #108] - str w8, [x0, #44] - ldr w8, [sp, #104] - str w8, [x0, #40] - ldr w8, [sp, #100] - str w8, [x0, #36] - ldr w8, [sp, #96] - str w8, [x0, #32] - ldr w8, [sp, #92] - str w8, [x0, #28] - ldr w8, [sp, #88] - str w8, [x0, #24] - ldr w8, [sp, #84] - str w8, [x0, #20] - ldr w8, [sp, #80] - str w8, [x0, #16] - ldr w8, [sp, #76] - str w8, [x0, #12] - ldr w8, [sp, #72] - str w8, [x0, #8] - ldr w8, [sp, #68] - str w8, [x0, #4] - ldr w8, [sp, #64] + ldp q2, q0, [sp, #32] + stp q2, q0, [sp, #96] + //APP + ldr w10, [x10] + str w10, [x9] + //NO_APP + //APP + ldr w10, [x12] + str w10, [x11] + //NO_APP + add x9, x8, #48 + add x10, x0, #48 + //APP + ldr w11, [x14] + str w11, [x13] + //NO_APP + add x11, x0, #44 + add x12, x8, #44 + //APP + ldr w9, [x9] + str w9, [x10] + //NO_APP + add x13, x8, #32 + add x9, x0, #40 + add x10, x8, #40 + //APP + ldr w12, [x12] + str w12, [x11] + //NO_APP + add x14, x8, #28 + add x11, x0, #36 + add x12, x8, #36 + //APP + ldr w10, [x10] + str w10, [x9] + //NO_APP + add x9, x0, #32 + //APP + ldr w12, [x12] + str w12, [x11] + //NO_APP + add x10, x0, #28 + //APP + ldr w11, [x13] + str w11, [x9] + //NO_APP + //APP + ldr w9, [x14] + str w9, [x10] + //NO_APP + add x11, x0, #20 + add x9, x0, #24 + add x10, x8, #24 + //APP + ldr w10, [x10] + str w10, [x9] + //NO_APP + add x12, x8, #20 + add x14, x0, #16 + //APP + ldr w9, [x12] + str w9, [x11] + //NO_APP + add x13, x8, #16 + //APP + ldr w9, [x13] + str w9, [x14] + //NO_APP + add x9, x0, #12 + orr x10, x8, #0xc + add x11, x0, #8 + //APP + ldr w10, [x10] + str w10, [x9] + //NO_APP + orr x12, x8, #0x8 + add x13, x0, #4 + //APP + ldr w9, [x12] + str w9, [x11] + //NO_APP + orr x14, x8, #0x4 + //APP + ldr w9, [x14] + str w9, [x13] + //NO_APP + //APP + ldr w8, [x8] str w8, [x0] - b .LBB10_12 -.LBB10_6: - mov w9, #64 -.LBB10_7: - add x10, sp, #64 -.LBB10_8: - ldr x11, [x10, x8] - sub x9, x9, #8 - cmp x9, #7 - str x11, [x0, x8] - add x8, x8, #8 - b.hi .LBB10_8 -.LBB10_9: - cbz x9, .LBB10_12 - add x11, sp, #64 - add x10, x0, x8 - add x8, x11, x8 -.LBB10_11: - ldrb w11, [x8], #1 - subs x9, x9, #1 - strb w11, [x10], #1 - b.ne .LBB10_11 -.LBB10_12: + //NO_APP add sp, sp, #128 ret asm_test::atomic_memcpy_store_align4::write_volatile_release_fence: diff --git a/tests/asm-test/asm/aarch64-unknown-linux-gnu/atomic_memcpy_store_align8 b/tests/asm-test/asm/aarch64-unknown-linux-gnu/atomic_memcpy_store_align8 index 7897280..4fe924e 100644 --- a/tests/asm-test/asm/aarch64-unknown-linux-gnu/atomic_memcpy_store_align8 +++ b/tests/asm-test/asm/aarch64-unknown-linux-gnu/atomic_memcpy_store_align8 @@ -1,17 +1,62 @@ asm_test::atomic_memcpy_store_align8::release: - ldp x8, x9, [x1] - ldp x10, x11, [x1, #16] - ldp x12, x13, [x1, #48] - ldp x14, x15, [x1, #32] + sub sp, sp, #128 + ldp q0, q1, [x1] + add x8, sp, #64 + add x10, x0, #56 + add x11, x8, #56 + add x9, x8, #48 + add x12, x0, #48 + add x13, x0, #40 + add x14, x8, #40 + ldp q2, q3, [x1, #32] + stp q0, q1, [sp] + stp q2, q3, [sp, #32] dmb ish - str x13, [x0, #56] - str x12, [x0, #48] - str x15, [x0, #40] - str x14, [x0, #32] - str x11, [x0, #24] - str x10, [x0, #16] - str x9, [x0, #8] + ldp q0, q1, [sp] + ldp q2, q3, [sp, #32] + stp q0, q1, [sp, #64] + stp q2, q3, [sp, #96] + //APP + ldr x11, [x11] + str x11, [x10] + //NO_APP + //APP + ldr x9, [x9] + str x9, [x12] + //NO_APP + add x10, x8, #32 + add x9, x0, #32 + //APP + ldr x12, [x14] + str x12, [x13] + //NO_APP + add x12, x0, #24 + add x13, x8, #24 + //APP + ldr x10, [x10] + str x10, [x9] + //NO_APP + add x11, x8, #16 + add x9, x0, #16 + //APP + ldr x13, [x13] + str x13, [x12] + //NO_APP + add x10, x0, #8 + //APP + ldr x11, [x11] + str x11, [x9] + //NO_APP + orr x14, x8, #0x8 + //APP + ldr x9, [x14] + str x9, [x10] + //NO_APP + //APP + ldr x8, [x8] str x8, [x0] + //NO_APP + add sp, sp, #128 ret asm_test::atomic_memcpy_store_align8::write_volatile_release_fence: sub sp, sp, #64 diff --git a/tests/asm-test/asm/arm-unknown-linux-gnueabi/atomic_memcpy_load_align1 b/tests/asm-test/asm/arm-unknown-linux-gnueabi/atomic_memcpy_load_align1 index 2ef5038..0988e56 100644 --- a/tests/asm-test/asm/arm-unknown-linux-gnueabi/atomic_memcpy_load_align1 +++ b/tests/asm-test/asm/arm-unknown-linux-gnueabi/atomic_memcpy_load_align1 @@ -1,407 +1,239 @@ asm_test::atomic_memcpy_load_align1::acquire: push {r4, r5, r6, lr} sub sp, sp, #32 - add r2, r1, #3 - bic r12, r2, #3 - sub r3, r12, r1 - cmp r3, #33 - bhs .LBB0_6 - cmp r3, #0 - beq .LBB0_7 - sub r2, r1, r12 - add lr, r1, #32 - mov r4, sp - mov r5, r1 -.LBB0_3: - ldrb r6, [r5] - adds r2, r2, #1 - strb r6, [r4], #1 - add r5, r5, #1 - blo .LBB0_3 - sub r2, lr, r12 - cmp r2, #4 - bhs .LBB0_8 - cmp r2, #0 - bne .LBB0_11 - b .LBB0_13 -.LBB0_6: - ldrb r2, [r1] - strb r2, [sp] - ldrb r2, [r1, #1] - strb r2, [sp, #1] - ldrb r2, [r1, #2] - strb r2, [sp, #2] - ldrb r2, [r1, #3] - strb r2, [sp, #3] - ldrb r2, [r1, #4] - strb r2, [sp, #4] - ldrb r2, [r1, #5] - strb r2, [sp, #5] - ldrb r2, [r1, #6] - strb r2, [sp, #6] - ldrb r2, [r1, #7] - strb r2, [sp, #7] - ldrb r2, [r1, #8] - strb r2, [sp, #8] - ldrb r2, [r1, #9] - strb r2, [sp, #9] - ldrb r2, [r1, #10] - strb r2, [sp, #10] - ldrb r2, [r1, #11] - strb r2, [sp, #11] - ldrb r2, [r1, #12] - strb r2, [sp, #12] - ldrb r2, [r1, #13] - strb r2, [sp, #13] - ldrb r2, [r1, #14] - strb r2, [sp, #14] - ldrb r2, [r1, #15] - strb r2, [sp, #15] - ldrb r2, [r1, #16] - strb r2, [sp, #16] - ldrb r2, [r1, #17] - strb r2, [sp, #17] - ldrb r2, [r1, #18] - strb r2, [sp, #18] - ldrb r2, [r1, #19] - strb r2, [sp, #19] - ldrb r2, [r1, #20] - strb r2, [sp, #20] - ldrb r2, [r1, #21] - strb r2, [sp, #21] - ldrb r2, [r1, #22] - strb r2, [sp, #22] - ldrb r2, [r1, #23] - strb r2, [sp, #23] - ldrb r2, [r1, #24] - strb r2, [sp, #24] - ldrb r2, [r1, #25] - strb r2, [sp, #25] - ldrb r2, [r1, #26] - strb r2, [sp, #26] - ldrb r2, [r1, #27] - strb r2, [sp, #27] - ldrb r2, [r1, #28] - strb r2, [sp, #28] - ldrb r2, [r1, #29] - strb r2, [sp, #29] - ldrb r2, [r1, #30] - strb r2, [sp, #30] - ldrb r1, [r1, #31] - strb r1, [sp, #31] - b .LBB0_13 -.LBB0_7: - mov r2, #32 -.LBB0_8: - sub r3, r1, r12 - mov r12, sp -.LBB0_9: - ldr r6, [r1, -r3] - sub r5, r12, r3 - sub r2, r2, #4 - strb r6, [r12, -r3] - sub r3, r3, #4 - cmp r2, #3 - lsr r4, r6, #24 - strb r4, [r5, #3] - lsr r4, r6, #16 - lsr r6, r6, #8 - strb r4, [r5, #2] - strb r6, [r5, #1] - bhi .LBB0_9 - rsb r3, r3, #0 - cmp r2, #0 - beq .LBB0_13 -.LBB0_11: - add r1, r1, r3 - mov r6, sp - add r3, r6, r3 -.LBB0_12: - ldrb r6, [r1] - add r1, r1, #1 - strb r6, [r3], #1 - subs r2, r2, #1 - bne .LBB0_12 -.LBB0_13: + mov r2, r0 + mov r3, r1 + add r4, r1, #31 mov r1, sp - mov r2, #32 - bl memcpy + add r5, r1, #31 mov r0, #0 - mcr p15, #0, r0, c7, c10, #5 - add sp, sp, #32 - pop {r4, r5, r6, pc} -.LBB2_3: - ldrb r5, [r2], #1 - adds r6, r6, #1 + @APP + ldrb r4, [r4] + strb r4, [r5] + @NO_APP + mov r6, #0 + add r5, r3, #30 + add r4, r1, #30 + mov r0, #0 + @APP + ldrb r5, [r5] strb r5, [r4] - add r4, r4, #1 - blo .LBB2_3 - sub r6, r3, r1 - cmp r6, #4 - blo .LBB2_8 -.LBB2_5: - sub r0, r7, r1 - add r1, sp, #32 -.LBB2_6: - sub r3, r1, r0 - ldrb r2, [r1, -r0] - sub r6, r6, #4 - ldrb r5, [r3, #1] - cmp r6, #3 - ldrb r4, [r3, #2] - ldrb r3, [r3, #3] - orr r2, r2, r5, lsl, #8 - orr r3, r4, r3, lsl, #8 - orr r2, r2, r3, lsl, #16 - str r2, [r7, -r0] - sub r0, r0, #4 - bhi .LBB2_6 - rsb r0, r0, #0 -.LBB2_8: - cmp r6, #0 - beq .LBB2_11 - add r1, sp, #32 - add r1, r1, r0 - add r0, r7, r0 -.LBB2_10: - ldrb r2, [r1], #1 - subs r6, r6, #1 - strb r2, [r0] - add r0, r0, #1 - bne .LBB2_10 -.LBB2_11: - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} -.LBB2_12: - ldrb r0, [sp, #32] - strb r0, [r7] - ldrb r0, [sp, #33] - strb r0, [r7, #1] - ldrb r0, [sp, #34] - strb r0, [r7, #2] - ldrb r0, [sp, #35] - strb r0, [r7, #3] - ldrb r0, [sp, #36] - strb r0, [r7, #4] - ldrb r0, [sp, #37] - strb r0, [r7, #5] - ldrb r0, [sp, #38] - strb r0, [r7, #6] - ldrb r0, [sp, #39] - strb r0, [r7, #7] - ldrb r0, [sp, #40] - strb r0, [r7, #8] - ldrb r0, [sp, #41] - strb r0, [r7, #9] - ldrb r0, [sp, #42] - strb r0, [r7, #10] - ldrb r0, [sp, #43] - strb r0, [r7, #11] - ldrb r0, [sp, #44] - strb r0, [r7, #12] - ldrb r0, [sp, #45] - strb r0, [r7, #13] - ldrb r0, [sp, #46] - strb r0, [r7, #14] - ldrb r0, [sp, #47] - strb r0, [r7, #15] - ldrb r0, [sp, #48] - strb r0, [r7, #16] - ldrb r0, [sp, #49] - strb r0, [r7, #17] - ldrb r0, [sp, #50] - strb r0, [r7, #18] - ldrb r0, [sp, #51] - strb r0, [r7, #19] - ldrb r0, [sp, #52] - strb r0, [r7, #20] - ldrb r0, [sp, #53] - strb r0, [r7, #21] - ldrb r0, [sp, #54] - strb r0, [r7, #22] - ldrb r0, [sp, #55] - strb r0, [r7, #23] - ldrb r0, [sp, #56] - strb r0, [r7, #24] - ldrb r0, [sp, #57] - strb r0, [r7, #25] - ldrb r0, [sp, #58] - strb r0, [r7, #26] - ldrb r0, [sp, #59] - strb r0, [r7, #27] - ldrb r0, [sp, #60] - strb r0, [r7, #28] - ldrb r0, [sp, #61] - strb r0, [r7, #29] - ldrb r0, [sp, #62] - strb r0, [r7, #30] - ldrb r0, [sp, #63] - strb r0, [r7, #31] - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} -.LBB4_3: - ldrb r6, [r5] - adds r2, r2, #1 - strb r6, [r4], #1 - add r5, r5, #1 - blo .LBB4_3 - sub r2, lr, r12 - cmp r2, #4 - bhs .LBB4_8 - cmp r2, #0 - bne .LBB4_11 - b .LBB4_13 -.LBB4_6: - ldrh r2, [r1, #30] - strh r2, [sp, #30] - ldrh r2, [r1, #28] - strh r2, [sp, #28] - ldrh r2, [r1, #26] - strh r2, [sp, #26] - ldrh r2, [r1, #24] - strh r2, [sp, #24] - ldrh r2, [r1, #22] - strh r2, [sp, #22] - ldrh r2, [r1, #20] - strh r2, [sp, #20] - ldrh r2, [r1, #18] - strh r2, [sp, #18] - ldrh r2, [r1, #16] - strh r2, [sp, #16] - ldrh r2, [r1, #14] - strh r2, [sp, #14] - ldrh r2, [r1, #12] - strh r2, [sp, #12] - ldrh r2, [r1, #10] - strh r2, [sp, #10] - ldrh r2, [r1, #8] - strh r2, [sp, #8] - ldrh r2, [r1, #6] - strh r2, [sp, #6] - ldrh r2, [r1, #4] - strh r2, [sp, #4] - ldrh r2, [r1, #2] - strh r2, [sp, #2] - ldrh r1, [r1] - strh r1, [sp] - b .LBB4_13 -.LBB4_7: - mov r2, #32 -.LBB4_8: - sub r3, r1, r12 - mov r12, sp -.LBB4_9: - ldr r6, [r1, -r3] - sub r5, r12, r3 - sub r2, r2, #4 - strb r6, [r12, -r3] - sub r3, r3, #4 - cmp r2, #3 - lsr r4, r6, #24 - strb r4, [r5, #3] - lsr r4, r6, #16 - lsr r6, r6, #8 - strb r4, [r5, #2] - strb r6, [r5, #1] - bhi .LBB4_9 - rsb r3, r3, #0 - cmp r2, #0 - beq .LBB4_13 -.LBB4_11: - add r1, r1, r3 - mov r6, sp - add r3, r6, r3 -.LBB4_12: - ldrb r6, [r1] - add r1, r1, #1 - strb r6, [r3], #1 - subs r2, r2, #1 - bne .LBB4_12 -.LBB4_13: - mov r1, sp + @NO_APP + add r5, r3, #29 + add r4, r1, #29 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #28 + add r4, r1, #28 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #27 + add r4, r1, #27 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #26 + add r4, r1, #26 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #25 + add r4, r1, #25 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #24 + add r4, r1, #24 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #23 + add r4, r1, #23 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #22 + add r4, r1, #22 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #21 + add r4, r1, #21 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #20 + add r4, r1, #20 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #19 + add r4, r1, #19 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #18 + add r4, r1, #18 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #17 + add r4, r1, #17 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #16 + add r4, r1, #16 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #15 + add r4, r1, #15 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #14 + add r4, r1, #14 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #13 + add r4, r1, #13 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #12 + add r4, r1, #12 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #11 + add r4, r1, #11 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #10 + add r4, r1, #10 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #9 + add r4, r1, #9 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #8 + add r4, r1, #8 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #7 + orr r4, r1, #7 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #6 + orr r4, r1, #6 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #5 + orr r4, r1, #5 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #4 + orr r4, r1, #4 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #3 + orr r4, r1, #3 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #2 + orr r4, r1, #2 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #1 + orr r4, r1, #1 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + mov r5, r1 + mov r0, #0 + @APP + ldrb r3, [r3] + strb r3, [r5] + @NO_APP + mov r0, r2 mov r2, #32 bl memcpy - mov r0, #0 - mcr p15, #0, r0, c7, c10, #5 + mcr p15, #0, r6, c7, c10, #5 add sp, sp, #32 pop {r4, r5, r6, pc} -.LBB6_3: - ldrb r5, [r2], #1 - adds r6, r6, #1 - strb r5, [r4] - add r4, r4, #1 - blo .LBB6_3 - sub r6, r3, r1 - cmp r6, #4 - blo .LBB6_8 -.LBB6_5: - sub r0, r7, r1 - add r1, sp, #32 -.LBB6_6: - sub r3, r1, r0 - ldrb r2, [r1, -r0] - sub r6, r6, #4 - ldrb r5, [r3, #1] - cmp r6, #3 - ldrb r4, [r3, #2] - ldrb r3, [r3, #3] - orr r2, r2, r5, lsl, #8 - orr r3, r4, r3, lsl, #8 - orr r2, r2, r3, lsl, #16 - str r2, [r7, -r0] - sub r0, r0, #4 - bhi .LBB6_6 - rsb r0, r0, #0 -.LBB6_8: - cmp r6, #0 - beq .LBB6_11 - add r1, sp, #32 - add r1, r1, r0 - add r0, r7, r0 -.LBB6_10: - ldrb r2, [r1], #1 - subs r6, r6, #1 - strb r2, [r0] - add r0, r0, #1 - bne .LBB6_10 -.LBB6_11: - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} -.LBB6_12: - ldrh r0, [sp, #62] - strh r0, [r7, #30] - ldrh r0, [sp, #60] - strh r0, [r7, #28] - ldrh r0, [sp, #58] - strh r0, [r7, #26] - ldrh r0, [sp, #56] - strh r0, [r7, #24] - ldrh r0, [sp, #54] - strh r0, [r7, #22] - ldrh r0, [sp, #52] - strh r0, [r7, #20] - ldrh r0, [sp, #50] - strh r0, [r7, #18] - ldrh r0, [sp, #48] - strh r0, [r7, #16] - ldrh r0, [sp, #46] - strh r0, [r7, #14] - ldrh r0, [sp, #44] - strh r0, [r7, #12] - ldrh r0, [sp, #42] - strh r0, [r7, #10] - ldrh r0, [sp, #40] - strh r0, [r7, #8] - ldrh r0, [sp, #38] - strh r0, [r7, #6] - ldrh r0, [sp, #36] - strh r0, [r7, #4] - ldrh r0, [sp, #34] - strh r0, [r7, #2] - ldrh r0, [sp, #32] - strh r0, [r7] - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} asm_test::atomic_memcpy_load_align1::read_volatile_acquire_fence: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} sub sp, sp, #76 @@ -511,275 +343,3 @@ asm_test::atomic_memcpy_load_align1::read_volatile_acquire_fence: mcr p15, #0, r0, c7, c10, #5 add sp, sp, #76 pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} -.LBB2_3: - ldrb r5, [r2], #1 - adds r6, r6, #1 - strb r5, [r4] - add r4, r4, #1 - blo .LBB2_3 - sub r6, r3, r1 - cmp r6, #4 - blo .LBB2_8 -.LBB2_5: - sub r0, r7, r1 - add r1, sp, #32 -.LBB2_6: - sub r3, r1, r0 - ldrb r2, [r1, -r0] - sub r6, r6, #4 - ldrb r5, [r3, #1] - cmp r6, #3 - ldrb r4, [r3, #2] - ldrb r3, [r3, #3] - orr r2, r2, r5, lsl, #8 - orr r3, r4, r3, lsl, #8 - orr r2, r2, r3, lsl, #16 - str r2, [r7, -r0] - sub r0, r0, #4 - bhi .LBB2_6 - rsb r0, r0, #0 -.LBB2_8: - cmp r6, #0 - beq .LBB2_11 - add r1, sp, #32 - add r1, r1, r0 - add r0, r7, r0 -.LBB2_10: - ldrb r2, [r1], #1 - subs r6, r6, #1 - strb r2, [r0] - add r0, r0, #1 - bne .LBB2_10 -.LBB2_11: - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} -.LBB2_12: - ldrb r0, [sp, #32] - strb r0, [r7] - ldrb r0, [sp, #33] - strb r0, [r7, #1] - ldrb r0, [sp, #34] - strb r0, [r7, #2] - ldrb r0, [sp, #35] - strb r0, [r7, #3] - ldrb r0, [sp, #36] - strb r0, [r7, #4] - ldrb r0, [sp, #37] - strb r0, [r7, #5] - ldrb r0, [sp, #38] - strb r0, [r7, #6] - ldrb r0, [sp, #39] - strb r0, [r7, #7] - ldrb r0, [sp, #40] - strb r0, [r7, #8] - ldrb r0, [sp, #41] - strb r0, [r7, #9] - ldrb r0, [sp, #42] - strb r0, [r7, #10] - ldrb r0, [sp, #43] - strb r0, [r7, #11] - ldrb r0, [sp, #44] - strb r0, [r7, #12] - ldrb r0, [sp, #45] - strb r0, [r7, #13] - ldrb r0, [sp, #46] - strb r0, [r7, #14] - ldrb r0, [sp, #47] - strb r0, [r7, #15] - ldrb r0, [sp, #48] - strb r0, [r7, #16] - ldrb r0, [sp, #49] - strb r0, [r7, #17] - ldrb r0, [sp, #50] - strb r0, [r7, #18] - ldrb r0, [sp, #51] - strb r0, [r7, #19] - ldrb r0, [sp, #52] - strb r0, [r7, #20] - ldrb r0, [sp, #53] - strb r0, [r7, #21] - ldrb r0, [sp, #54] - strb r0, [r7, #22] - ldrb r0, [sp, #55] - strb r0, [r7, #23] - ldrb r0, [sp, #56] - strb r0, [r7, #24] - ldrb r0, [sp, #57] - strb r0, [r7, #25] - ldrb r0, [sp, #58] - strb r0, [r7, #26] - ldrb r0, [sp, #59] - strb r0, [r7, #27] - ldrb r0, [sp, #60] - strb r0, [r7, #28] - ldrb r0, [sp, #61] - strb r0, [r7, #29] - ldrb r0, [sp, #62] - strb r0, [r7, #30] - ldrb r0, [sp, #63] - strb r0, [r7, #31] - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} -.LBB4_3: - ldrb r6, [r5] - adds r2, r2, #1 - strb r6, [r4], #1 - add r5, r5, #1 - blo .LBB4_3 - sub r2, lr, r12 - cmp r2, #4 - bhs .LBB4_8 - cmp r2, #0 - bne .LBB4_11 - b .LBB4_13 -.LBB4_6: - ldrh r2, [r1, #30] - strh r2, [sp, #30] - ldrh r2, [r1, #28] - strh r2, [sp, #28] - ldrh r2, [r1, #26] - strh r2, [sp, #26] - ldrh r2, [r1, #24] - strh r2, [sp, #24] - ldrh r2, [r1, #22] - strh r2, [sp, #22] - ldrh r2, [r1, #20] - strh r2, [sp, #20] - ldrh r2, [r1, #18] - strh r2, [sp, #18] - ldrh r2, [r1, #16] - strh r2, [sp, #16] - ldrh r2, [r1, #14] - strh r2, [sp, #14] - ldrh r2, [r1, #12] - strh r2, [sp, #12] - ldrh r2, [r1, #10] - strh r2, [sp, #10] - ldrh r2, [r1, #8] - strh r2, [sp, #8] - ldrh r2, [r1, #6] - strh r2, [sp, #6] - ldrh r2, [r1, #4] - strh r2, [sp, #4] - ldrh r2, [r1, #2] - strh r2, [sp, #2] - ldrh r1, [r1] - strh r1, [sp] - b .LBB4_13 -.LBB4_7: - mov r2, #32 -.LBB4_8: - sub r3, r1, r12 - mov r12, sp -.LBB4_9: - ldr r6, [r1, -r3] - sub r5, r12, r3 - sub r2, r2, #4 - strb r6, [r12, -r3] - sub r3, r3, #4 - cmp r2, #3 - lsr r4, r6, #24 - strb r4, [r5, #3] - lsr r4, r6, #16 - lsr r6, r6, #8 - strb r4, [r5, #2] - strb r6, [r5, #1] - bhi .LBB4_9 - rsb r3, r3, #0 - cmp r2, #0 - beq .LBB4_13 -.LBB4_11: - add r1, r1, r3 - mov r6, sp - add r3, r6, r3 -.LBB4_12: - ldrb r6, [r1] - add r1, r1, #1 - strb r6, [r3], #1 - subs r2, r2, #1 - bne .LBB4_12 -.LBB4_13: - mov r1, sp - mov r2, #32 - bl memcpy - mov r0, #0 - mcr p15, #0, r0, c7, c10, #5 - add sp, sp, #32 - pop {r4, r5, r6, pc} -.LBB6_3: - ldrb r5, [r2], #1 - adds r6, r6, #1 - strb r5, [r4] - add r4, r4, #1 - blo .LBB6_3 - sub r6, r3, r1 - cmp r6, #4 - blo .LBB6_8 -.LBB6_5: - sub r0, r7, r1 - add r1, sp, #32 -.LBB6_6: - sub r3, r1, r0 - ldrb r2, [r1, -r0] - sub r6, r6, #4 - ldrb r5, [r3, #1] - cmp r6, #3 - ldrb r4, [r3, #2] - ldrb r3, [r3, #3] - orr r2, r2, r5, lsl, #8 - orr r3, r4, r3, lsl, #8 - orr r2, r2, r3, lsl, #16 - str r2, [r7, -r0] - sub r0, r0, #4 - bhi .LBB6_6 - rsb r0, r0, #0 -.LBB6_8: - cmp r6, #0 - beq .LBB6_11 - add r1, sp, #32 - add r1, r1, r0 - add r0, r7, r0 -.LBB6_10: - ldrb r2, [r1], #1 - subs r6, r6, #1 - strb r2, [r0] - add r0, r0, #1 - bne .LBB6_10 -.LBB6_11: - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} -.LBB6_12: - ldrh r0, [sp, #62] - strh r0, [r7, #30] - ldrh r0, [sp, #60] - strh r0, [r7, #28] - ldrh r0, [sp, #58] - strh r0, [r7, #26] - ldrh r0, [sp, #56] - strh r0, [r7, #24] - ldrh r0, [sp, #54] - strh r0, [r7, #22] - ldrh r0, [sp, #52] - strh r0, [r7, #20] - ldrh r0, [sp, #50] - strh r0, [r7, #18] - ldrh r0, [sp, #48] - strh r0, [r7, #16] - ldrh r0, [sp, #46] - strh r0, [r7, #14] - ldrh r0, [sp, #44] - strh r0, [r7, #12] - ldrh r0, [sp, #42] - strh r0, [r7, #10] - ldrh r0, [sp, #40] - strh r0, [r7, #8] - ldrh r0, [sp, #38] - strh r0, [r7, #6] - ldrh r0, [sp, #36] - strh r0, [r7, #4] - ldrh r0, [sp, #34] - strh r0, [r7, #2] - ldrh r0, [sp, #32] - strh r0, [r7] - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} diff --git a/tests/asm-test/asm/arm-unknown-linux-gnueabi/atomic_memcpy_load_align16 b/tests/asm-test/asm/arm-unknown-linux-gnueabi/atomic_memcpy_load_align16 index 1a6af41..937a77f 100644 --- a/tests/asm-test/asm/arm-unknown-linux-gnueabi/atomic_memcpy_load_align16 +++ b/tests/asm-test/asm/arm-unknown-linux-gnueabi/atomic_memcpy_load_align16 @@ -1,23 +1,73 @@ asm_test::atomic_memcpy_load_align16::acquire: - push {r4, r5, r6, lr} - ldr r12, [r1, #28] - ldr lr, [r1, #24] - ldr r2, [r1, #20] - ldr r3, [r1, #16] - ldr r4, [r1, #12] - ldr r5, [r1, #8] - ldr r6, [r1, #4] - ldr r1, [r1] - str r5, [r0, #8] - stm r0, {r1, r6} - str r4, [r0, #12] - str r3, [r0, #16] - str r2, [r0, #20] - str lr, [r0, #24] - str r12, [r0, #28] + push {r4, r5, r11, lr} + add r11, sp, #8 + sub sp, sp, #32 + bic sp, sp, #15 + mov r5, r0 + mov r3, sp + add r4, r3, #28 mov r0, #0 - mcr p15, #0, r0, c7, c10, #5 - pop {r4, r5, r6, pc} + add lr, r1, #28 + @APP + ldr r2, [lr] + str r2, [r4] + @NO_APP + mov r12, #0 + add r2, r1, #24 + add r4, r3, #24 + mov r0, #0 + @APP + ldr r2, [r2] + str r2, [r4] + @NO_APP + add r2, r1, #20 + add r4, r3, #20 + mov r0, #0 + @APP + ldr r2, [r2] + str r2, [r4] + @NO_APP + add r2, r1, #16 + add r4, r3, #16 + mov r0, #0 + @APP + ldr r2, [r2] + str r2, [r4] + @NO_APP + add r2, r1, #12 + orr r4, r3, #12 + mov r0, #0 + @APP + ldr r2, [r2] + str r2, [r4] + @NO_APP + add r2, r1, #8 + orr r4, r3, #8 + mov r0, #0 + @APP + ldr r2, [r2] + str r2, [r4] + @NO_APP + add r2, r1, #4 + orr r4, r3, #4 + mov r0, #0 + @APP + ldr r2, [r2] + str r2, [r4] + @NO_APP + mov r2, r3 + mov r0, #0 + @APP + ldr r1, [r1] + str r1, [r2] + @NO_APP + ldm r3!, {r0, r1, r2, r4} + stm r5!, {r0, r1, r2, r4} + ldm r3, {r0, r1, r2, r4} + stm r5, {r0, r1, r2, r4} + mcr p15, #0, r12, c7, c10, #5 + sub sp, r11, #8 + pop {r4, r5, r11, pc} asm_test::atomic_memcpy_load_align16::read_volatile_acquire_fence: push {r4, r5, r6, r7, r8, r9, r11, lr} ldrd r8, r9, [r1, #24] diff --git a/tests/asm-test/asm/arm-unknown-linux-gnueabi/atomic_memcpy_load_align2 b/tests/asm-test/asm/arm-unknown-linux-gnueabi/atomic_memcpy_load_align2 index 71e9bf5..3aed785 100644 --- a/tests/asm-test/asm/arm-unknown-linux-gnueabi/atomic_memcpy_load_align2 +++ b/tests/asm-test/asm/arm-unknown-linux-gnueabi/atomic_memcpy_load_align2 @@ -1,180 +1,127 @@ asm_test::atomic_memcpy_load_align2::acquire: push {r4, r5, r6, lr} sub sp, sp, #32 - add r2, r1, #3 - bic r12, r2, #3 - sub r3, r12, r1 - cmp r3, #32 - bhi .LBB4_6 - cmp r3, #0 - beq .LBB4_7 - sub r2, r1, r12 - add lr, r1, #32 - mov r4, sp - mov r5, r1 -.LBB4_3: - ldrb r6, [r5] - adds r2, r2, #1 - strb r6, [r4], #1 - add r5, r5, #1 - blo .LBB4_3 - sub r2, lr, r12 - cmp r2, #4 - bhs .LBB4_8 - cmp r2, #0 - bne .LBB4_11 - b .LBB4_13 -.LBB4_6: - ldrh r2, [r1, #30] - strh r2, [sp, #30] - ldrh r2, [r1, #28] - strh r2, [sp, #28] - ldrh r2, [r1, #26] - strh r2, [sp, #26] - ldrh r2, [r1, #24] - strh r2, [sp, #24] - ldrh r2, [r1, #22] - strh r2, [sp, #22] - ldrh r2, [r1, #20] - strh r2, [sp, #20] - ldrh r2, [r1, #18] - strh r2, [sp, #18] - ldrh r2, [r1, #16] - strh r2, [sp, #16] - ldrh r2, [r1, #14] - strh r2, [sp, #14] - ldrh r2, [r1, #12] - strh r2, [sp, #12] - ldrh r2, [r1, #10] - strh r2, [sp, #10] - ldrh r2, [r1, #8] - strh r2, [sp, #8] - ldrh r2, [r1, #6] - strh r2, [sp, #6] - ldrh r2, [r1, #4] - strh r2, [sp, #4] - ldrh r2, [r1, #2] - strh r2, [sp, #2] - ldrh r1, [r1] - strh r1, [sp] - b .LBB4_13 -.LBB4_7: - mov r2, #32 -.LBB4_8: - sub r3, r1, r12 - mov r12, sp -.LBB4_9: - ldr r6, [r1, -r3] - sub r5, r12, r3 - sub r2, r2, #4 - strb r6, [r12, -r3] - sub r3, r3, #4 - cmp r2, #3 - lsr r4, r6, #24 - strb r4, [r5, #3] - lsr r4, r6, #16 - lsr r6, r6, #8 - strb r4, [r5, #2] - strb r6, [r5, #1] - bhi .LBB4_9 - rsb r3, r3, #0 - cmp r2, #0 - beq .LBB4_13 -.LBB4_11: - add r1, r1, r3 - mov r6, sp - add r3, r6, r3 -.LBB4_12: - ldrb r6, [r1] - add r1, r1, #1 - strb r6, [r3], #1 - subs r2, r2, #1 - bne .LBB4_12 -.LBB4_13: + mov r2, r0 + mov r3, r1 + add r4, r1, #30 mov r1, sp + add r5, r1, #30 + mov r0, #0 + @APP + ldrh r4, [r4] + strh r4, [r5] + @NO_APP + mov r6, #0 + add r5, r3, #28 + add r4, r1, #28 + mov r0, #0 + @APP + ldrh r5, [r5] + strh r5, [r4] + @NO_APP + add r5, r3, #26 + add r4, r1, #26 + mov r0, #0 + @APP + ldrh r5, [r5] + strh r5, [r4] + @NO_APP + add r5, r3, #24 + add r4, r1, #24 + mov r0, #0 + @APP + ldrh r5, [r5] + strh r5, [r4] + @NO_APP + add r5, r3, #22 + add r4, r1, #22 + mov r0, #0 + @APP + ldrh r5, [r5] + strh r5, [r4] + @NO_APP + add r5, r3, #20 + add r4, r1, #20 + mov r0, #0 + @APP + ldrh r5, [r5] + strh r5, [r4] + @NO_APP + add r5, r3, #18 + add r4, r1, #18 + mov r0, #0 + @APP + ldrh r5, [r5] + strh r5, [r4] + @NO_APP + add r5, r3, #16 + add r4, r1, #16 + mov r0, #0 + @APP + ldrh r5, [r5] + strh r5, [r4] + @NO_APP + add r5, r3, #14 + add r4, r1, #14 + mov r0, #0 + @APP + ldrh r5, [r5] + strh r5, [r4] + @NO_APP + add r5, r3, #12 + add r4, r1, #12 + mov r0, #0 + @APP + ldrh r5, [r5] + strh r5, [r4] + @NO_APP + add r5, r3, #10 + add r4, r1, #10 + mov r0, #0 + @APP + ldrh r5, [r5] + strh r5, [r4] + @NO_APP + add r5, r3, #8 + add r4, r1, #8 + mov r0, #0 + @APP + ldrh r5, [r5] + strh r5, [r4] + @NO_APP + add r5, r3, #6 + orr r4, r1, #6 + mov r0, #0 + @APP + ldrh r5, [r5] + strh r5, [r4] + @NO_APP + add r5, r3, #4 + orr r4, r1, #4 + mov r0, #0 + @APP + ldrh r5, [r5] + strh r5, [r4] + @NO_APP + add r5, r3, #2 + orr r4, r1, #2 + mov r0, #0 + @APP + ldrh r5, [r5] + strh r5, [r4] + @NO_APP + mov r5, r1 + mov r0, #0 + @APP + ldrh r3, [r3] + strh r3, [r5] + @NO_APP + mov r0, r2 mov r2, #32 bl memcpy - mov r0, #0 - mcr p15, #0, r0, c7, c10, #5 + mcr p15, #0, r6, c7, c10, #5 add sp, sp, #32 pop {r4, r5, r6, pc} -.LBB6_3: - ldrb r5, [r2], #1 - adds r6, r6, #1 - strb r5, [r4] - add r4, r4, #1 - blo .LBB6_3 - sub r6, r3, r1 - cmp r6, #4 - blo .LBB6_8 -.LBB6_5: - sub r0, r7, r1 - add r1, sp, #32 -.LBB6_6: - sub r3, r1, r0 - ldrb r2, [r1, -r0] - sub r6, r6, #4 - ldrb r5, [r3, #1] - cmp r6, #3 - ldrb r4, [r3, #2] - ldrb r3, [r3, #3] - orr r2, r2, r5, lsl, #8 - orr r3, r4, r3, lsl, #8 - orr r2, r2, r3, lsl, #16 - str r2, [r7, -r0] - sub r0, r0, #4 - bhi .LBB6_6 - rsb r0, r0, #0 -.LBB6_8: - cmp r6, #0 - beq .LBB6_11 - add r1, sp, #32 - add r1, r1, r0 - add r0, r7, r0 -.LBB6_10: - ldrb r2, [r1], #1 - subs r6, r6, #1 - strb r2, [r0] - add r0, r0, #1 - bne .LBB6_10 -.LBB6_11: - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} -.LBB6_12: - ldrh r0, [sp, #62] - strh r0, [r7, #30] - ldrh r0, [sp, #60] - strh r0, [r7, #28] - ldrh r0, [sp, #58] - strh r0, [r7, #26] - ldrh r0, [sp, #56] - strh r0, [r7, #24] - ldrh r0, [sp, #54] - strh r0, [r7, #22] - ldrh r0, [sp, #52] - strh r0, [r7, #20] - ldrh r0, [sp, #50] - strh r0, [r7, #18] - ldrh r0, [sp, #48] - strh r0, [r7, #16] - ldrh r0, [sp, #46] - strh r0, [r7, #14] - ldrh r0, [sp, #44] - strh r0, [r7, #12] - ldrh r0, [sp, #42] - strh r0, [r7, #10] - ldrh r0, [sp, #40] - strh r0, [r7, #8] - ldrh r0, [sp, #38] - strh r0, [r7, #6] - ldrh r0, [sp, #36] - strh r0, [r7, #4] - ldrh r0, [sp, #34] - strh r0, [r7, #2] - ldrh r0, [sp, #32] - strh r0, [r7] - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} asm_test::atomic_memcpy_load_align2::read_volatile_acquire_fence: ldrh r2, [r1, #30] strh r2, [r0, #30] @@ -211,80 +158,3 @@ asm_test::atomic_memcpy_load_align2::read_volatile_acquire_fence: mov r0, #0 mcr p15, #0, r0, c7, c10, #5 bx lr -.LBB6_3: - ldrb r5, [r2], #1 - adds r6, r6, #1 - strb r5, [r4] - add r4, r4, #1 - blo .LBB6_3 - sub r6, r3, r1 - cmp r6, #4 - blo .LBB6_8 -.LBB6_5: - sub r0, r7, r1 - add r1, sp, #32 -.LBB6_6: - sub r3, r1, r0 - ldrb r2, [r1, -r0] - sub r6, r6, #4 - ldrb r5, [r3, #1] - cmp r6, #3 - ldrb r4, [r3, #2] - ldrb r3, [r3, #3] - orr r2, r2, r5, lsl, #8 - orr r3, r4, r3, lsl, #8 - orr r2, r2, r3, lsl, #16 - str r2, [r7, -r0] - sub r0, r0, #4 - bhi .LBB6_6 - rsb r0, r0, #0 -.LBB6_8: - cmp r6, #0 - beq .LBB6_11 - add r1, sp, #32 - add r1, r1, r0 - add r0, r7, r0 -.LBB6_10: - ldrb r2, [r1], #1 - subs r6, r6, #1 - strb r2, [r0] - add r0, r0, #1 - bne .LBB6_10 -.LBB6_11: - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} -.LBB6_12: - ldrh r0, [sp, #62] - strh r0, [r7, #30] - ldrh r0, [sp, #60] - strh r0, [r7, #28] - ldrh r0, [sp, #58] - strh r0, [r7, #26] - ldrh r0, [sp, #56] - strh r0, [r7, #24] - ldrh r0, [sp, #54] - strh r0, [r7, #22] - ldrh r0, [sp, #52] - strh r0, [r7, #20] - ldrh r0, [sp, #50] - strh r0, [r7, #18] - ldrh r0, [sp, #48] - strh r0, [r7, #16] - ldrh r0, [sp, #46] - strh r0, [r7, #14] - ldrh r0, [sp, #44] - strh r0, [r7, #12] - ldrh r0, [sp, #42] - strh r0, [r7, #10] - ldrh r0, [sp, #40] - strh r0, [r7, #8] - ldrh r0, [sp, #38] - strh r0, [r7, #6] - ldrh r0, [sp, #36] - strh r0, [r7, #4] - ldrh r0, [sp, #34] - strh r0, [r7, #2] - ldrh r0, [sp, #32] - strh r0, [r7] - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} diff --git a/tests/asm-test/asm/arm-unknown-linux-gnueabi/atomic_memcpy_load_align4 b/tests/asm-test/asm/arm-unknown-linux-gnueabi/atomic_memcpy_load_align4 index 1f82454..8ae87a1 100644 --- a/tests/asm-test/asm/arm-unknown-linux-gnueabi/atomic_memcpy_load_align4 +++ b/tests/asm-test/asm/arm-unknown-linux-gnueabi/atomic_memcpy_load_align4 @@ -1,23 +1,71 @@ asm_test::atomic_memcpy_load_align4::acquire: - push {r4, r5, r6, lr} - ldr r12, [r1, #28] - ldr lr, [r1, #24] - ldr r2, [r1, #20] - ldr r3, [r1, #16] - ldr r4, [r1, #12] - ldr r5, [r1, #8] - ldr r6, [r1, #4] - ldr r1, [r1] - str r5, [r0, #8] - stm r0, {r1, r6} - str r4, [r0, #12] - str r3, [r0, #16] - str r2, [r0, #20] - str lr, [r0, #24] - str r12, [r0, #28] + push {r4, r5, r11, lr} + sub sp, sp, #32 + mov r5, r0 + mov r3, sp + add r4, r3, #28 mov r0, #0 - mcr p15, #0, r0, c7, c10, #5 - pop {r4, r5, r6, pc} + add lr, r1, #28 + @APP + ldr r2, [lr] + str r2, [r4] + @NO_APP + mov r12, #0 + add r2, r1, #24 + add r4, r3, #24 + mov r0, #0 + @APP + ldr r2, [r2] + str r2, [r4] + @NO_APP + add r2, r1, #20 + add r4, r3, #20 + mov r0, #0 + @APP + ldr r2, [r2] + str r2, [r4] + @NO_APP + add r2, r1, #16 + add r4, r3, #16 + mov r0, #0 + @APP + ldr r2, [r2] + str r2, [r4] + @NO_APP + add r2, r1, #12 + add r4, r3, #12 + mov r0, #0 + @APP + ldr r2, [r2] + str r2, [r4] + @NO_APP + add r2, r1, #8 + add r4, r3, #8 + mov r0, #0 + @APP + ldr r2, [r2] + str r2, [r4] + @NO_APP + add r2, r1, #4 + orr r4, r3, #4 + mov r0, #0 + @APP + ldr r2, [r2] + str r2, [r4] + @NO_APP + mov r2, r3 + mov r0, #0 + @APP + ldr r1, [r1] + str r1, [r2] + @NO_APP + ldm r3!, {r0, r1, r2, r4} + stm r5!, {r0, r1, r2, r4} + ldm r3, {r0, r1, r2, r4} + stm r5, {r0, r1, r2, r4} + mcr p15, #0, r12, c7, c10, #5 + add sp, sp, #32 + pop {r4, r5, r11, pc} asm_test::atomic_memcpy_load_align4::read_volatile_acquire_fence: push {r4, r5, r6, lr} ldr r12, [r1, #28] diff --git a/tests/asm-test/asm/arm-unknown-linux-gnueabi/atomic_memcpy_load_align8 b/tests/asm-test/asm/arm-unknown-linux-gnueabi/atomic_memcpy_load_align8 index 2b074db..bd36def 100644 --- a/tests/asm-test/asm/arm-unknown-linux-gnueabi/atomic_memcpy_load_align8 +++ b/tests/asm-test/asm/arm-unknown-linux-gnueabi/atomic_memcpy_load_align8 @@ -1,23 +1,71 @@ asm_test::atomic_memcpy_load_align8::acquire: - push {r4, r5, r6, lr} - ldr r12, [r1, #28] - ldr lr, [r1, #24] - ldr r2, [r1, #20] - ldr r3, [r1, #16] - ldr r4, [r1, #12] - ldr r5, [r1, #8] - ldr r6, [r1, #4] - ldr r1, [r1] - str r5, [r0, #8] - stm r0, {r1, r6} - str r4, [r0, #12] - str r3, [r0, #16] - str r2, [r0, #20] - str lr, [r0, #24] - str r12, [r0, #28] + push {r4, r5, r11, lr} + sub sp, sp, #32 + mov r5, r0 + mov r3, sp + add r4, r3, #28 mov r0, #0 - mcr p15, #0, r0, c7, c10, #5 - pop {r4, r5, r6, pc} + add lr, r1, #28 + @APP + ldr r2, [lr] + str r2, [r4] + @NO_APP + mov r12, #0 + add r2, r1, #24 + add r4, r3, #24 + mov r0, #0 + @APP + ldr r2, [r2] + str r2, [r4] + @NO_APP + add r2, r1, #20 + add r4, r3, #20 + mov r0, #0 + @APP + ldr r2, [r2] + str r2, [r4] + @NO_APP + add r2, r1, #16 + add r4, r3, #16 + mov r0, #0 + @APP + ldr r2, [r2] + str r2, [r4] + @NO_APP + add r2, r1, #12 + add r4, r3, #12 + mov r0, #0 + @APP + ldr r2, [r2] + str r2, [r4] + @NO_APP + add r2, r1, #8 + add r4, r3, #8 + mov r0, #0 + @APP + ldr r2, [r2] + str r2, [r4] + @NO_APP + add r2, r1, #4 + orr r4, r3, #4 + mov r0, #0 + @APP + ldr r2, [r2] + str r2, [r4] + @NO_APP + mov r2, r3 + mov r0, #0 + @APP + ldr r1, [r1] + str r1, [r2] + @NO_APP + ldm r3!, {r0, r1, r2, r4} + stm r5!, {r0, r1, r2, r4} + ldm r3, {r0, r1, r2, r4} + stm r5, {r0, r1, r2, r4} + mcr p15, #0, r12, c7, c10, #5 + add sp, sp, #32 + pop {r4, r5, r11, pc} asm_test::atomic_memcpy_load_align8::read_volatile_acquire_fence: push {r4, r5, r6, r7, r8, r9, r11, lr} ldrd r8, r9, [r1, #24] diff --git a/tests/asm-test/asm/arm-unknown-linux-gnueabi/atomic_memcpy_store_align1 b/tests/asm-test/asm/arm-unknown-linux-gnueabi/atomic_memcpy_store_align1 index a5e149b..8a068b4 100644 --- a/tests/asm-test/asm/arm-unknown-linux-gnueabi/atomic_memcpy_store_align1 +++ b/tests/asm-test/asm/arm-unknown-linux-gnueabi/atomic_memcpy_store_align1 @@ -1,302 +1,243 @@ asm_test::atomic_memcpy_store_align1::release: - push {r4, r5, r6, r7, r8, lr} + push {r4, r5, r6, lr} sub sp, sp, #64 - mov r8, sp - mov r7, r0 - mov r0, r8 - mov r2, #32 - mov r6, #32 - bl memcpy - mov r0, #0 - add r2, sp, #32 - mcr p15, #0, r0, c7, c10, #5 - ldm r8!, {r1, r3, r4, r5} - mov r0, r2 - stm r0!, {r1, r3, r4, r5} - ldm r8, {r1, r3, r4, r5} - stm r0, {r1, r3, r4, r5} - add r0, r7, #3 - bic r1, r0, #3 - sub r0, r1, r7 - cmp r0, #33 - bhs .LBB2_12 - cmp r0, #0 - beq .LBB2_5 - sub r6, r7, r1 - add r3, r7, #32 - mov r4, r7 -.LBB2_3: - ldrb r5, [r2], #1 - adds r6, r6, #1 - strb r5, [r4] - add r4, r4, #1 - blo .LBB2_3 - sub r6, r3, r1 - cmp r6, #4 - blo .LBB2_8 -.LBB2_5: - sub r0, r7, r1 - add r1, sp, #32 -.LBB2_6: - sub r3, r1, r0 - ldrb r2, [r1, -r0] - sub r6, r6, #4 - ldrb r5, [r3, #1] - cmp r6, #3 - ldrb r4, [r3, #2] - ldrb r3, [r3, #3] - orr r2, r2, r5, lsl, #8 - orr r3, r4, r3, lsl, #8 - orr r2, r2, r3, lsl, #16 - str r2, [r7, -r0] - sub r0, r0, #4 - bhi .LBB2_6 - rsb r0, r0, #0 -.LBB2_8: - cmp r6, #0 - beq .LBB2_11 - add r1, sp, #32 - add r1, r1, r0 - add r0, r7, r0 -.LBB2_10: - ldrb r2, [r1], #1 - subs r6, r6, #1 - strb r2, [r0] - add r0, r0, #1 - bne .LBB2_10 -.LBB2_11: - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} -.LBB2_12: - ldrb r0, [sp, #32] - strb r0, [r7] - ldrb r0, [sp, #33] - strb r0, [r7, #1] - ldrb r0, [sp, #34] - strb r0, [r7, #2] - ldrb r0, [sp, #35] - strb r0, [r7, #3] - ldrb r0, [sp, #36] - strb r0, [r7, #4] - ldrb r0, [sp, #37] - strb r0, [r7, #5] - ldrb r0, [sp, #38] - strb r0, [r7, #6] - ldrb r0, [sp, #39] - strb r0, [r7, #7] - ldrb r0, [sp, #40] - strb r0, [r7, #8] - ldrb r0, [sp, #41] - strb r0, [r7, #9] - ldrb r0, [sp, #42] - strb r0, [r7, #10] - ldrb r0, [sp, #43] - strb r0, [r7, #11] - ldrb r0, [sp, #44] - strb r0, [r7, #12] - ldrb r0, [sp, #45] - strb r0, [r7, #13] - ldrb r0, [sp, #46] - strb r0, [r7, #14] - ldrb r0, [sp, #47] - strb r0, [r7, #15] - ldrb r0, [sp, #48] - strb r0, [r7, #16] - ldrb r0, [sp, #49] - strb r0, [r7, #17] - ldrb r0, [sp, #50] - strb r0, [r7, #18] - ldrb r0, [sp, #51] - strb r0, [r7, #19] - ldrb r0, [sp, #52] - strb r0, [r7, #20] - ldrb r0, [sp, #53] - strb r0, [r7, #21] - ldrb r0, [sp, #54] - strb r0, [r7, #22] - ldrb r0, [sp, #55] - strb r0, [r7, #23] - ldrb r0, [sp, #56] - strb r0, [r7, #24] - ldrb r0, [sp, #57] - strb r0, [r7, #25] - ldrb r0, [sp, #58] - strb r0, [r7, #26] - ldrb r0, [sp, #59] - strb r0, [r7, #27] - ldrb r0, [sp, #60] - strb r0, [r7, #28] - ldrb r0, [sp, #61] - strb r0, [r7, #29] - ldrb r0, [sp, #62] - strb r0, [r7, #30] - ldrb r0, [sp, #63] - strb r0, [r7, #31] - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} -.LBB4_3: - ldrb r6, [r5] - adds r2, r2, #1 - strb r6, [r4], #1 - add r5, r5, #1 - blo .LBB4_3 - sub r2, lr, r12 - cmp r2, #4 - bhs .LBB4_8 - cmp r2, #0 - bne .LBB4_11 - b .LBB4_13 -.LBB4_6: - ldrh r2, [r1, #30] - strh r2, [sp, #30] - ldrh r2, [r1, #28] - strh r2, [sp, #28] - ldrh r2, [r1, #26] - strh r2, [sp, #26] - ldrh r2, [r1, #24] - strh r2, [sp, #24] - ldrh r2, [r1, #22] - strh r2, [sp, #22] - ldrh r2, [r1, #20] - strh r2, [sp, #20] - ldrh r2, [r1, #18] - strh r2, [sp, #18] - ldrh r2, [r1, #16] - strh r2, [sp, #16] - ldrh r2, [r1, #14] - strh r2, [sp, #14] - ldrh r2, [r1, #12] - strh r2, [sp, #12] - ldrh r2, [r1, #10] - strh r2, [sp, #10] - ldrh r2, [r1, #8] - strh r2, [sp, #8] - ldrh r2, [r1, #6] - strh r2, [sp, #6] - ldrh r2, [r1, #4] - strh r2, [sp, #4] - ldrh r2, [r1, #2] - strh r2, [sp, #2] - ldrh r1, [r1] - strh r1, [sp] - b .LBB4_13 -.LBB4_7: - mov r2, #32 -.LBB4_8: - sub r3, r1, r12 - mov r12, sp -.LBB4_9: - ldr r6, [r1, -r3] - sub r5, r12, r3 - sub r2, r2, #4 - strb r6, [r12, -r3] - sub r3, r3, #4 - cmp r2, #3 - lsr r4, r6, #24 - strb r4, [r5, #3] - lsr r4, r6, #16 - lsr r6, r6, #8 - strb r4, [r5, #2] - strb r6, [r5, #1] - bhi .LBB4_9 - rsb r3, r3, #0 - cmp r2, #0 - beq .LBB4_13 -.LBB4_11: - add r1, r1, r3 mov r6, sp - add r3, r6, r3 -.LBB4_12: - ldrb r6, [r1] - add r1, r1, #1 - strb r6, [r3], #1 - subs r2, r2, #1 - bne .LBB4_12 -.LBB4_13: - mov r1, sp + mov r4, r0 + mov r0, r6 mov r2, #32 bl memcpy mov r0, #0 + add r12, sp, #32 mcr p15, #0, r0, c7, c10, #5 - add sp, sp, #32 - pop {r4, r5, r6, pc} -.LBB6_3: - ldrb r5, [r2], #1 - adds r6, r6, #1 - strb r5, [r4] - add r4, r4, #1 - blo .LBB6_3 - sub r6, r3, r1 - cmp r6, #4 - blo .LBB6_8 -.LBB6_5: - sub r0, r7, r1 - add r1, sp, #32 -.LBB6_6: - sub r3, r1, r0 - ldrb r2, [r1, -r0] - sub r6, r6, #4 - ldrb r5, [r3, #1] - cmp r6, #3 - ldrb r4, [r3, #2] - ldrb r3, [r3, #3] - orr r2, r2, r5, lsl, #8 - orr r3, r4, r3, lsl, #8 - orr r2, r2, r3, lsl, #16 - str r2, [r7, -r0] - sub r0, r0, #4 - bhi .LBB6_6 - rsb r0, r0, #0 -.LBB6_8: - cmp r6, #0 - beq .LBB6_11 - add r1, sp, #32 - add r1, r1, r0 - add r0, r7, r0 -.LBB6_10: - ldrb r2, [r1], #1 - subs r6, r6, #1 - strb r2, [r0] - add r0, r0, #1 - bne .LBB6_10 -.LBB6_11: - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} -.LBB6_12: - ldrh r0, [sp, #62] - strh r0, [r7, #30] - ldrh r0, [sp, #60] - strh r0, [r7, #28] - ldrh r0, [sp, #58] - strh r0, [r7, #26] - ldrh r0, [sp, #56] - strh r0, [r7, #24] - ldrh r0, [sp, #54] - strh r0, [r7, #22] - ldrh r0, [sp, #52] - strh r0, [r7, #20] - ldrh r0, [sp, #50] - strh r0, [r7, #18] - ldrh r0, [sp, #48] - strh r0, [r7, #16] - ldrh r0, [sp, #46] - strh r0, [r7, #14] - ldrh r0, [sp, #44] - strh r0, [r7, #12] - ldrh r0, [sp, #42] - strh r0, [r7, #10] - ldrh r0, [sp, #40] - strh r0, [r7, #8] - ldrh r0, [sp, #38] - strh r0, [r7, #6] - ldrh r0, [sp, #36] - strh r0, [r7, #4] - ldrh r0, [sp, #34] - strh r0, [r7, #2] - ldrh r0, [sp, #32] - strh r0, [r7] + ldm r6!, {r1, r2, r3, r5} + mov r0, r12 + stm r0!, {r1, r2, r3, r5} + ldm r6, {r1, r2, r3, r5} + stm r0, {r1, r2, r3, r5} + add r1, r4, #31 + add r2, r12, #31 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #30 + add r2, r12, #30 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #29 + add r2, r12, #29 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #28 + add r2, r12, #28 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #27 + add r2, r12, #27 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #26 + add r2, r12, #26 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #25 + add r2, r12, #25 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #24 + add r2, r12, #24 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #23 + add r2, r12, #23 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #22 + add r2, r12, #22 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #21 + add r2, r12, #21 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #20 + add r2, r12, #20 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #19 + add r2, r12, #19 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #18 + add r2, r12, #18 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #17 + add r2, r12, #17 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #16 + add r2, r12, #16 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #15 + add r2, r12, #15 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #14 + add r2, r12, #14 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #13 + add r2, r12, #13 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #12 + add r2, r12, #12 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #11 + add r2, r12, #11 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #10 + add r2, r12, #10 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #9 + add r2, r12, #9 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #8 + add r2, r12, #8 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #7 + orr r2, r12, #7 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #6 + orr r2, r12, #6 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #5 + orr r2, r12, #5 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #4 + orr r2, r12, #4 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #3 + orr r2, r12, #3 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #2 + orr r2, r12, #2 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #1 + orr r2, r12, #1 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + mov r0, #0 + @APP + ldrb r1, [r12] + strb r1, [r4] + @NO_APP add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} + pop {r4, r5, r6, pc} asm_test::atomic_memcpy_store_align1::write_volatile_release_fence: push {r4, r5, r11, lr} sub sp, sp, #32 @@ -313,166 +254,3 @@ asm_test::atomic_memcpy_store_align1::write_volatile_release_fence: bl memcpy add sp, sp, #32 pop {r4, r5, r11, pc} -.LBB4_3: - ldrb r6, [r5] - adds r2, r2, #1 - strb r6, [r4], #1 - add r5, r5, #1 - blo .LBB4_3 - sub r2, lr, r12 - cmp r2, #4 - bhs .LBB4_8 - cmp r2, #0 - bne .LBB4_11 - b .LBB4_13 -.LBB4_6: - ldrh r2, [r1, #30] - strh r2, [sp, #30] - ldrh r2, [r1, #28] - strh r2, [sp, #28] - ldrh r2, [r1, #26] - strh r2, [sp, #26] - ldrh r2, [r1, #24] - strh r2, [sp, #24] - ldrh r2, [r1, #22] - strh r2, [sp, #22] - ldrh r2, [r1, #20] - strh r2, [sp, #20] - ldrh r2, [r1, #18] - strh r2, [sp, #18] - ldrh r2, [r1, #16] - strh r2, [sp, #16] - ldrh r2, [r1, #14] - strh r2, [sp, #14] - ldrh r2, [r1, #12] - strh r2, [sp, #12] - ldrh r2, [r1, #10] - strh r2, [sp, #10] - ldrh r2, [r1, #8] - strh r2, [sp, #8] - ldrh r2, [r1, #6] - strh r2, [sp, #6] - ldrh r2, [r1, #4] - strh r2, [sp, #4] - ldrh r2, [r1, #2] - strh r2, [sp, #2] - ldrh r1, [r1] - strh r1, [sp] - b .LBB4_13 -.LBB4_7: - mov r2, #32 -.LBB4_8: - sub r3, r1, r12 - mov r12, sp -.LBB4_9: - ldr r6, [r1, -r3] - sub r5, r12, r3 - sub r2, r2, #4 - strb r6, [r12, -r3] - sub r3, r3, #4 - cmp r2, #3 - lsr r4, r6, #24 - strb r4, [r5, #3] - lsr r4, r6, #16 - lsr r6, r6, #8 - strb r4, [r5, #2] - strb r6, [r5, #1] - bhi .LBB4_9 - rsb r3, r3, #0 - cmp r2, #0 - beq .LBB4_13 -.LBB4_11: - add r1, r1, r3 - mov r6, sp - add r3, r6, r3 -.LBB4_12: - ldrb r6, [r1] - add r1, r1, #1 - strb r6, [r3], #1 - subs r2, r2, #1 - bne .LBB4_12 -.LBB4_13: - mov r1, sp - mov r2, #32 - bl memcpy - mov r0, #0 - mcr p15, #0, r0, c7, c10, #5 - add sp, sp, #32 - pop {r4, r5, r6, pc} -.LBB6_3: - ldrb r5, [r2], #1 - adds r6, r6, #1 - strb r5, [r4] - add r4, r4, #1 - blo .LBB6_3 - sub r6, r3, r1 - cmp r6, #4 - blo .LBB6_8 -.LBB6_5: - sub r0, r7, r1 - add r1, sp, #32 -.LBB6_6: - sub r3, r1, r0 - ldrb r2, [r1, -r0] - sub r6, r6, #4 - ldrb r5, [r3, #1] - cmp r6, #3 - ldrb r4, [r3, #2] - ldrb r3, [r3, #3] - orr r2, r2, r5, lsl, #8 - orr r3, r4, r3, lsl, #8 - orr r2, r2, r3, lsl, #16 - str r2, [r7, -r0] - sub r0, r0, #4 - bhi .LBB6_6 - rsb r0, r0, #0 -.LBB6_8: - cmp r6, #0 - beq .LBB6_11 - add r1, sp, #32 - add r1, r1, r0 - add r0, r7, r0 -.LBB6_10: - ldrb r2, [r1], #1 - subs r6, r6, #1 - strb r2, [r0] - add r0, r0, #1 - bne .LBB6_10 -.LBB6_11: - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} -.LBB6_12: - ldrh r0, [sp, #62] - strh r0, [r7, #30] - ldrh r0, [sp, #60] - strh r0, [r7, #28] - ldrh r0, [sp, #58] - strh r0, [r7, #26] - ldrh r0, [sp, #56] - strh r0, [r7, #24] - ldrh r0, [sp, #54] - strh r0, [r7, #22] - ldrh r0, [sp, #52] - strh r0, [r7, #20] - ldrh r0, [sp, #50] - strh r0, [r7, #18] - ldrh r0, [sp, #48] - strh r0, [r7, #16] - ldrh r0, [sp, #46] - strh r0, [r7, #14] - ldrh r0, [sp, #44] - strh r0, [r7, #12] - ldrh r0, [sp, #42] - strh r0, [r7, #10] - ldrh r0, [sp, #40] - strh r0, [r7, #8] - ldrh r0, [sp, #38] - strh r0, [r7, #6] - ldrh r0, [sp, #36] - strh r0, [r7, #4] - ldrh r0, [sp, #34] - strh r0, [r7, #2] - ldrh r0, [sp, #32] - strh r0, [r7] - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} diff --git a/tests/asm-test/asm/arm-unknown-linux-gnueabi/atomic_memcpy_store_align16 b/tests/asm-test/asm/arm-unknown-linux-gnueabi/atomic_memcpy_store_align16 index 08efa6d..edc2621 100644 --- a/tests/asm-test/asm/arm-unknown-linux-gnueabi/atomic_memcpy_store_align16 +++ b/tests/asm-test/asm/arm-unknown-linux-gnueabi/atomic_memcpy_store_align16 @@ -1,20 +1,79 @@ asm_test::atomic_memcpy_store_align16::release: - push {r4, r5, r6, r7, r11, lr} - add r6, r1, #8 - ldm r1, {r12, lr} - mov r7, #0 - ldm r6, {r2, r3, r4, r5, r6} - ldr r1, [r1, #28] - mcr p15, #0, r7, c7, c10, #5 - str r1, [r0, #28] - str r6, [r0, #24] - str r5, [r0, #20] - str r4, [r0, #16] - str r3, [r0, #12] - str r2, [r0, #8] - str lr, [r0, #4] - str r12, [r0] - pop {r4, r5, r6, r7, r11, pc} + push {r4, r5, r11, lr} + add r11, sp, #8 + sub sp, sp, #64 + bic sp, sp, #15 + mov r12, r0 + ldm r1!, {r0, r2, r4, r5} + mov lr, sp + mov r3, lr + stm r3!, {r0, r2, r4, r5} + ldm r1, {r0, r2, r4, r5} + add r1, sp, #32 + stm r3, {r0, r2, r4, r5} + mov r0, #0 + mcr p15, #0, r0, c7, c10, #5 + mov r0, r1 + ldm lr!, {r2, r3, r4, r5} + stm r0!, {r2, r3, r4, r5} + ldm lr, {r2, r3, r4, r5} + stm r0, {r2, r3, r4, r5} + add r2, r12, #28 + add r3, r1, #28 + mov r0, #0 + @APP + ldr r3, [r3] + str r3, [r2] + @NO_APP + add r2, r12, #24 + add r3, r1, #24 + mov r0, #0 + @APP + ldr r3, [r3] + str r3, [r2] + @NO_APP + add r2, r12, #20 + add r3, r1, #20 + mov r0, #0 + @APP + ldr r3, [r3] + str r3, [r2] + @NO_APP + add r2, r12, #16 + add r3, r1, #16 + mov r0, #0 + @APP + ldr r3, [r3] + str r3, [r2] + @NO_APP + add r2, r12, #12 + orr r3, r1, #12 + mov r0, #0 + @APP + ldr r3, [r3] + str r3, [r2] + @NO_APP + add r2, r12, #8 + orr r3, r1, #8 + mov r0, #0 + @APP + ldr r3, [r3] + str r3, [r2] + @NO_APP + add r2, r12, #4 + orr r3, r1, #4 + mov r0, #0 + @APP + ldr r3, [r3] + str r3, [r2] + @NO_APP + mov r0, #0 + @APP + ldr r1, [r1] + str r1, [r12] + @NO_APP + sub sp, r11, #8 + pop {r4, r5, r11, pc} asm_test::atomic_memcpy_store_align16::write_volatile_release_fence: push {r4, r5, r11, lr} add r11, sp, #8 diff --git a/tests/asm-test/asm/arm-unknown-linux-gnueabi/atomic_memcpy_store_align2 b/tests/asm-test/asm/arm-unknown-linux-gnueabi/atomic_memcpy_store_align2 index c7e67ef..008ceff 100644 --- a/tests/asm-test/asm/arm-unknown-linux-gnueabi/atomic_memcpy_store_align2 +++ b/tests/asm-test/asm/arm-unknown-linux-gnueabi/atomic_memcpy_store_align2 @@ -1,107 +1,131 @@ asm_test::atomic_memcpy_store_align2::release: - push {r4, r5, r6, r7, r8, lr} + push {r4, r5, r6, lr} sub sp, sp, #64 - mov r8, sp - mov r7, r0 - mov r0, r8 + mov r5, sp + mov r6, r0 + mov r0, r5 mov r2, #32 - mov r6, #32 bl memcpy mov r0, #0 - add r2, sp, #32 + add r12, sp, #32 mcr p15, #0, r0, c7, c10, #5 - ldm r8!, {r1, r3, r4, r5} - mov r0, r2 - stm r0!, {r1, r3, r4, r5} - ldm r8, {r1, r3, r4, r5} - stm r0, {r1, r3, r4, r5} - add r0, r7, #3 - bic r1, r0, #3 - sub r0, r1, r7 - cmp r0, #32 - bhi .LBB6_12 - cmp r0, #0 - beq .LBB6_5 - sub r6, r7, r1 - add r3, r7, #32 - mov r4, r7 -.LBB6_3: - ldrb r5, [r2], #1 - adds r6, r6, #1 - strb r5, [r4] - add r4, r4, #1 - blo .LBB6_3 - sub r6, r3, r1 - cmp r6, #4 - blo .LBB6_8 -.LBB6_5: - sub r0, r7, r1 - add r1, sp, #32 -.LBB6_6: - sub r3, r1, r0 - ldrb r2, [r1, -r0] - sub r6, r6, #4 - ldrb r5, [r3, #1] - cmp r6, #3 - ldrb r4, [r3, #2] - ldrb r3, [r3, #3] - orr r2, r2, r5, lsl, #8 - orr r3, r4, r3, lsl, #8 - orr r2, r2, r3, lsl, #16 - str r2, [r7, -r0] - sub r0, r0, #4 - bhi .LBB6_6 - rsb r0, r0, #0 -.LBB6_8: - cmp r6, #0 - beq .LBB6_11 - add r1, sp, #32 - add r1, r1, r0 - add r0, r7, r0 -.LBB6_10: - ldrb r2, [r1], #1 - subs r6, r6, #1 - strb r2, [r0] - add r0, r0, #1 - bne .LBB6_10 -.LBB6_11: - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} -.LBB6_12: - ldrh r0, [sp, #62] - strh r0, [r7, #30] - ldrh r0, [sp, #60] - strh r0, [r7, #28] - ldrh r0, [sp, #58] - strh r0, [r7, #26] - ldrh r0, [sp, #56] - strh r0, [r7, #24] - ldrh r0, [sp, #54] - strh r0, [r7, #22] - ldrh r0, [sp, #52] - strh r0, [r7, #20] - ldrh r0, [sp, #50] - strh r0, [r7, #18] - ldrh r0, [sp, #48] - strh r0, [r7, #16] - ldrh r0, [sp, #46] - strh r0, [r7, #14] - ldrh r0, [sp, #44] - strh r0, [r7, #12] - ldrh r0, [sp, #42] - strh r0, [r7, #10] - ldrh r0, [sp, #40] - strh r0, [r7, #8] - ldrh r0, [sp, #38] - strh r0, [r7, #6] - ldrh r0, [sp, #36] - strh r0, [r7, #4] - ldrh r0, [sp, #34] - strh r0, [r7, #2] - ldrh r0, [sp, #32] - strh r0, [r7] + ldm r5!, {r1, r2, r3, r4} + mov r0, r12 + stm r0!, {r1, r2, r3, r4} + ldm r5, {r1, r2, r3, r4} + stm r0, {r1, r2, r3, r4} + add r1, r6, #30 + add r2, r12, #30 + mov r0, #0 + @APP + ldrh r2, [r2] + strh r2, [r1] + @NO_APP + add r1, r6, #28 + add r2, r12, #28 + mov r0, #0 + @APP + ldrh r2, [r2] + strh r2, [r1] + @NO_APP + add r1, r6, #26 + add r2, r12, #26 + mov r0, #0 + @APP + ldrh r2, [r2] + strh r2, [r1] + @NO_APP + add r1, r6, #24 + add r2, r12, #24 + mov r0, #0 + @APP + ldrh r2, [r2] + strh r2, [r1] + @NO_APP + add r1, r6, #22 + add r2, r12, #22 + mov r0, #0 + @APP + ldrh r2, [r2] + strh r2, [r1] + @NO_APP + add r1, r6, #20 + add r2, r12, #20 + mov r0, #0 + @APP + ldrh r2, [r2] + strh r2, [r1] + @NO_APP + add r1, r6, #18 + add r2, r12, #18 + mov r0, #0 + @APP + ldrh r2, [r2] + strh r2, [r1] + @NO_APP + add r1, r6, #16 + add r2, r12, #16 + mov r0, #0 + @APP + ldrh r2, [r2] + strh r2, [r1] + @NO_APP + add r1, r6, #14 + add r2, r12, #14 + mov r0, #0 + @APP + ldrh r2, [r2] + strh r2, [r1] + @NO_APP + add r1, r6, #12 + add r2, r12, #12 + mov r0, #0 + @APP + ldrh r2, [r2] + strh r2, [r1] + @NO_APP + add r1, r6, #10 + add r2, r12, #10 + mov r0, #0 + @APP + ldrh r2, [r2] + strh r2, [r1] + @NO_APP + add r1, r6, #8 + add r2, r12, #8 + mov r0, #0 + @APP + ldrh r2, [r2] + strh r2, [r1] + @NO_APP + add r1, r6, #6 + orr r2, r12, #6 + mov r0, #0 + @APP + ldrh r2, [r2] + strh r2, [r1] + @NO_APP + add r1, r6, #4 + orr r2, r12, #4 + mov r0, #0 + @APP + ldrh r2, [r2] + strh r2, [r1] + @NO_APP + add r1, r6, #2 + orr r2, r12, #2 + mov r0, #0 + @APP + ldrh r2, [r2] + strh r2, [r1] + @NO_APP + mov r0, #0 + @APP + ldrh r1, [r12] + strh r1, [r6] + @NO_APP add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} + pop {r4, r5, r6, pc} asm_test::atomic_memcpy_store_align2::write_volatile_release_fence: push {r4, r5, r11, lr} sub sp, sp, #32 diff --git a/tests/asm-test/asm/arm-unknown-linux-gnueabi/atomic_memcpy_store_align4 b/tests/asm-test/asm/arm-unknown-linux-gnueabi/atomic_memcpy_store_align4 index 2efd9a8..57908ae 100644 --- a/tests/asm-test/asm/arm-unknown-linux-gnueabi/atomic_memcpy_store_align4 +++ b/tests/asm-test/asm/arm-unknown-linux-gnueabi/atomic_memcpy_store_align4 @@ -1,20 +1,77 @@ asm_test::atomic_memcpy_store_align4::release: - push {r4, r5, r6, r7, r11, lr} - add r6, r1, #8 - ldm r1, {r12, lr} - mov r7, #0 - ldm r6, {r2, r3, r4, r5, r6} - ldr r1, [r1, #28] - mcr p15, #0, r7, c7, c10, #5 - str r1, [r0, #28] - str r6, [r0, #24] - str r5, [r0, #20] - str r4, [r0, #16] - str r3, [r0, #12] - str r2, [r0, #8] - str lr, [r0, #4] - str r12, [r0] - pop {r4, r5, r6, r7, r11, pc} + push {r4, r5, r11, lr} + sub sp, sp, #64 + mov r12, r0 + ldm r1!, {r0, r2, r4, r5} + mov lr, sp + mov r3, lr + stm r3!, {r0, r2, r4, r5} + ldm r1, {r0, r2, r4, r5} + add r1, sp, #32 + stm r3, {r0, r2, r4, r5} + mov r0, #0 + mcr p15, #0, r0, c7, c10, #5 + mov r0, r1 + ldm lr!, {r2, r3, r4, r5} + stm r0!, {r2, r3, r4, r5} + ldm lr, {r2, r3, r4, r5} + stm r0, {r2, r3, r4, r5} + add r2, r12, #28 + add r3, r1, #28 + mov r0, #0 + @APP + ldr r3, [r3] + str r3, [r2] + @NO_APP + add r2, r12, #24 + add r3, r1, #24 + mov r0, #0 + @APP + ldr r3, [r3] + str r3, [r2] + @NO_APP + add r2, r12, #20 + add r3, r1, #20 + mov r0, #0 + @APP + ldr r3, [r3] + str r3, [r2] + @NO_APP + add r2, r12, #16 + add r3, r1, #16 + mov r0, #0 + @APP + ldr r3, [r3] + str r3, [r2] + @NO_APP + add r2, r12, #12 + add r3, r1, #12 + mov r0, #0 + @APP + ldr r3, [r3] + str r3, [r2] + @NO_APP + add r2, r12, #8 + add r3, r1, #8 + mov r0, #0 + @APP + ldr r3, [r3] + str r3, [r2] + @NO_APP + add r2, r12, #4 + orr r3, r1, #4 + mov r0, #0 + @APP + ldr r3, [r3] + str r3, [r2] + @NO_APP + mov r0, #0 + @APP + ldr r1, [r1] + str r1, [r12] + @NO_APP + add sp, sp, #64 + pop {r4, r5, r11, pc} asm_test::atomic_memcpy_store_align4::write_volatile_release_fence: push {r4, r5, r11, lr} sub sp, sp, #32 diff --git a/tests/asm-test/asm/arm-unknown-linux-gnueabi/atomic_memcpy_store_align8 b/tests/asm-test/asm/arm-unknown-linux-gnueabi/atomic_memcpy_store_align8 index fde7eef..311ca1a 100644 --- a/tests/asm-test/asm/arm-unknown-linux-gnueabi/atomic_memcpy_store_align8 +++ b/tests/asm-test/asm/arm-unknown-linux-gnueabi/atomic_memcpy_store_align8 @@ -1,20 +1,77 @@ asm_test::atomic_memcpy_store_align8::release: - push {r4, r5, r6, r7, r11, lr} - add r6, r1, #8 - ldm r1, {r12, lr} - mov r7, #0 - ldm r6, {r2, r3, r4, r5, r6} - ldr r1, [r1, #28] - mcr p15, #0, r7, c7, c10, #5 - str r1, [r0, #28] - str r6, [r0, #24] - str r5, [r0, #20] - str r4, [r0, #16] - str r3, [r0, #12] - str r2, [r0, #8] - str lr, [r0, #4] - str r12, [r0] - pop {r4, r5, r6, r7, r11, pc} + push {r4, r5, r11, lr} + sub sp, sp, #64 + mov r12, r0 + ldm r1!, {r0, r2, r4, r5} + mov lr, sp + mov r3, lr + stm r3!, {r0, r2, r4, r5} + ldm r1, {r0, r2, r4, r5} + add r1, sp, #32 + stm r3, {r0, r2, r4, r5} + mov r0, #0 + mcr p15, #0, r0, c7, c10, #5 + mov r0, r1 + ldm lr!, {r2, r3, r4, r5} + stm r0!, {r2, r3, r4, r5} + ldm lr, {r2, r3, r4, r5} + stm r0, {r2, r3, r4, r5} + add r2, r12, #28 + add r3, r1, #28 + mov r0, #0 + @APP + ldr r3, [r3] + str r3, [r2] + @NO_APP + add r2, r12, #24 + add r3, r1, #24 + mov r0, #0 + @APP + ldr r3, [r3] + str r3, [r2] + @NO_APP + add r2, r12, #20 + add r3, r1, #20 + mov r0, #0 + @APP + ldr r3, [r3] + str r3, [r2] + @NO_APP + add r2, r12, #16 + add r3, r1, #16 + mov r0, #0 + @APP + ldr r3, [r3] + str r3, [r2] + @NO_APP + add r2, r12, #12 + add r3, r1, #12 + mov r0, #0 + @APP + ldr r3, [r3] + str r3, [r2] + @NO_APP + add r2, r12, #8 + add r3, r1, #8 + mov r0, #0 + @APP + ldr r3, [r3] + str r3, [r2] + @NO_APP + add r2, r12, #4 + orr r3, r1, #4 + mov r0, #0 + @APP + ldr r3, [r3] + str r3, [r2] + @NO_APP + mov r0, #0 + @APP + ldr r1, [r1] + str r1, [r12] + @NO_APP + add sp, sp, #64 + pop {r4, r5, r11, pc} asm_test::atomic_memcpy_store_align8::write_volatile_release_fence: push {r4, r5, r11, lr} sub sp, sp, #32 diff --git a/tests/asm-test/asm/arm-unknown-linux-gnueabihf/atomic_memcpy_load_align1 b/tests/asm-test/asm/arm-unknown-linux-gnueabihf/atomic_memcpy_load_align1 index 2ef5038..0988e56 100644 --- a/tests/asm-test/asm/arm-unknown-linux-gnueabihf/atomic_memcpy_load_align1 +++ b/tests/asm-test/asm/arm-unknown-linux-gnueabihf/atomic_memcpy_load_align1 @@ -1,407 +1,239 @@ asm_test::atomic_memcpy_load_align1::acquire: push {r4, r5, r6, lr} sub sp, sp, #32 - add r2, r1, #3 - bic r12, r2, #3 - sub r3, r12, r1 - cmp r3, #33 - bhs .LBB0_6 - cmp r3, #0 - beq .LBB0_7 - sub r2, r1, r12 - add lr, r1, #32 - mov r4, sp - mov r5, r1 -.LBB0_3: - ldrb r6, [r5] - adds r2, r2, #1 - strb r6, [r4], #1 - add r5, r5, #1 - blo .LBB0_3 - sub r2, lr, r12 - cmp r2, #4 - bhs .LBB0_8 - cmp r2, #0 - bne .LBB0_11 - b .LBB0_13 -.LBB0_6: - ldrb r2, [r1] - strb r2, [sp] - ldrb r2, [r1, #1] - strb r2, [sp, #1] - ldrb r2, [r1, #2] - strb r2, [sp, #2] - ldrb r2, [r1, #3] - strb r2, [sp, #3] - ldrb r2, [r1, #4] - strb r2, [sp, #4] - ldrb r2, [r1, #5] - strb r2, [sp, #5] - ldrb r2, [r1, #6] - strb r2, [sp, #6] - ldrb r2, [r1, #7] - strb r2, [sp, #7] - ldrb r2, [r1, #8] - strb r2, [sp, #8] - ldrb r2, [r1, #9] - strb r2, [sp, #9] - ldrb r2, [r1, #10] - strb r2, [sp, #10] - ldrb r2, [r1, #11] - strb r2, [sp, #11] - ldrb r2, [r1, #12] - strb r2, [sp, #12] - ldrb r2, [r1, #13] - strb r2, [sp, #13] - ldrb r2, [r1, #14] - strb r2, [sp, #14] - ldrb r2, [r1, #15] - strb r2, [sp, #15] - ldrb r2, [r1, #16] - strb r2, [sp, #16] - ldrb r2, [r1, #17] - strb r2, [sp, #17] - ldrb r2, [r1, #18] - strb r2, [sp, #18] - ldrb r2, [r1, #19] - strb r2, [sp, #19] - ldrb r2, [r1, #20] - strb r2, [sp, #20] - ldrb r2, [r1, #21] - strb r2, [sp, #21] - ldrb r2, [r1, #22] - strb r2, [sp, #22] - ldrb r2, [r1, #23] - strb r2, [sp, #23] - ldrb r2, [r1, #24] - strb r2, [sp, #24] - ldrb r2, [r1, #25] - strb r2, [sp, #25] - ldrb r2, [r1, #26] - strb r2, [sp, #26] - ldrb r2, [r1, #27] - strb r2, [sp, #27] - ldrb r2, [r1, #28] - strb r2, [sp, #28] - ldrb r2, [r1, #29] - strb r2, [sp, #29] - ldrb r2, [r1, #30] - strb r2, [sp, #30] - ldrb r1, [r1, #31] - strb r1, [sp, #31] - b .LBB0_13 -.LBB0_7: - mov r2, #32 -.LBB0_8: - sub r3, r1, r12 - mov r12, sp -.LBB0_9: - ldr r6, [r1, -r3] - sub r5, r12, r3 - sub r2, r2, #4 - strb r6, [r12, -r3] - sub r3, r3, #4 - cmp r2, #3 - lsr r4, r6, #24 - strb r4, [r5, #3] - lsr r4, r6, #16 - lsr r6, r6, #8 - strb r4, [r5, #2] - strb r6, [r5, #1] - bhi .LBB0_9 - rsb r3, r3, #0 - cmp r2, #0 - beq .LBB0_13 -.LBB0_11: - add r1, r1, r3 - mov r6, sp - add r3, r6, r3 -.LBB0_12: - ldrb r6, [r1] - add r1, r1, #1 - strb r6, [r3], #1 - subs r2, r2, #1 - bne .LBB0_12 -.LBB0_13: + mov r2, r0 + mov r3, r1 + add r4, r1, #31 mov r1, sp - mov r2, #32 - bl memcpy + add r5, r1, #31 mov r0, #0 - mcr p15, #0, r0, c7, c10, #5 - add sp, sp, #32 - pop {r4, r5, r6, pc} -.LBB2_3: - ldrb r5, [r2], #1 - adds r6, r6, #1 + @APP + ldrb r4, [r4] + strb r4, [r5] + @NO_APP + mov r6, #0 + add r5, r3, #30 + add r4, r1, #30 + mov r0, #0 + @APP + ldrb r5, [r5] strb r5, [r4] - add r4, r4, #1 - blo .LBB2_3 - sub r6, r3, r1 - cmp r6, #4 - blo .LBB2_8 -.LBB2_5: - sub r0, r7, r1 - add r1, sp, #32 -.LBB2_6: - sub r3, r1, r0 - ldrb r2, [r1, -r0] - sub r6, r6, #4 - ldrb r5, [r3, #1] - cmp r6, #3 - ldrb r4, [r3, #2] - ldrb r3, [r3, #3] - orr r2, r2, r5, lsl, #8 - orr r3, r4, r3, lsl, #8 - orr r2, r2, r3, lsl, #16 - str r2, [r7, -r0] - sub r0, r0, #4 - bhi .LBB2_6 - rsb r0, r0, #0 -.LBB2_8: - cmp r6, #0 - beq .LBB2_11 - add r1, sp, #32 - add r1, r1, r0 - add r0, r7, r0 -.LBB2_10: - ldrb r2, [r1], #1 - subs r6, r6, #1 - strb r2, [r0] - add r0, r0, #1 - bne .LBB2_10 -.LBB2_11: - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} -.LBB2_12: - ldrb r0, [sp, #32] - strb r0, [r7] - ldrb r0, [sp, #33] - strb r0, [r7, #1] - ldrb r0, [sp, #34] - strb r0, [r7, #2] - ldrb r0, [sp, #35] - strb r0, [r7, #3] - ldrb r0, [sp, #36] - strb r0, [r7, #4] - ldrb r0, [sp, #37] - strb r0, [r7, #5] - ldrb r0, [sp, #38] - strb r0, [r7, #6] - ldrb r0, [sp, #39] - strb r0, [r7, #7] - ldrb r0, [sp, #40] - strb r0, [r7, #8] - ldrb r0, [sp, #41] - strb r0, [r7, #9] - ldrb r0, [sp, #42] - strb r0, [r7, #10] - ldrb r0, [sp, #43] - strb r0, [r7, #11] - ldrb r0, [sp, #44] - strb r0, [r7, #12] - ldrb r0, [sp, #45] - strb r0, [r7, #13] - ldrb r0, [sp, #46] - strb r0, [r7, #14] - ldrb r0, [sp, #47] - strb r0, [r7, #15] - ldrb r0, [sp, #48] - strb r0, [r7, #16] - ldrb r0, [sp, #49] - strb r0, [r7, #17] - ldrb r0, [sp, #50] - strb r0, [r7, #18] - ldrb r0, [sp, #51] - strb r0, [r7, #19] - ldrb r0, [sp, #52] - strb r0, [r7, #20] - ldrb r0, [sp, #53] - strb r0, [r7, #21] - ldrb r0, [sp, #54] - strb r0, [r7, #22] - ldrb r0, [sp, #55] - strb r0, [r7, #23] - ldrb r0, [sp, #56] - strb r0, [r7, #24] - ldrb r0, [sp, #57] - strb r0, [r7, #25] - ldrb r0, [sp, #58] - strb r0, [r7, #26] - ldrb r0, [sp, #59] - strb r0, [r7, #27] - ldrb r0, [sp, #60] - strb r0, [r7, #28] - ldrb r0, [sp, #61] - strb r0, [r7, #29] - ldrb r0, [sp, #62] - strb r0, [r7, #30] - ldrb r0, [sp, #63] - strb r0, [r7, #31] - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} -.LBB4_3: - ldrb r6, [r5] - adds r2, r2, #1 - strb r6, [r4], #1 - add r5, r5, #1 - blo .LBB4_3 - sub r2, lr, r12 - cmp r2, #4 - bhs .LBB4_8 - cmp r2, #0 - bne .LBB4_11 - b .LBB4_13 -.LBB4_6: - ldrh r2, [r1, #30] - strh r2, [sp, #30] - ldrh r2, [r1, #28] - strh r2, [sp, #28] - ldrh r2, [r1, #26] - strh r2, [sp, #26] - ldrh r2, [r1, #24] - strh r2, [sp, #24] - ldrh r2, [r1, #22] - strh r2, [sp, #22] - ldrh r2, [r1, #20] - strh r2, [sp, #20] - ldrh r2, [r1, #18] - strh r2, [sp, #18] - ldrh r2, [r1, #16] - strh r2, [sp, #16] - ldrh r2, [r1, #14] - strh r2, [sp, #14] - ldrh r2, [r1, #12] - strh r2, [sp, #12] - ldrh r2, [r1, #10] - strh r2, [sp, #10] - ldrh r2, [r1, #8] - strh r2, [sp, #8] - ldrh r2, [r1, #6] - strh r2, [sp, #6] - ldrh r2, [r1, #4] - strh r2, [sp, #4] - ldrh r2, [r1, #2] - strh r2, [sp, #2] - ldrh r1, [r1] - strh r1, [sp] - b .LBB4_13 -.LBB4_7: - mov r2, #32 -.LBB4_8: - sub r3, r1, r12 - mov r12, sp -.LBB4_9: - ldr r6, [r1, -r3] - sub r5, r12, r3 - sub r2, r2, #4 - strb r6, [r12, -r3] - sub r3, r3, #4 - cmp r2, #3 - lsr r4, r6, #24 - strb r4, [r5, #3] - lsr r4, r6, #16 - lsr r6, r6, #8 - strb r4, [r5, #2] - strb r6, [r5, #1] - bhi .LBB4_9 - rsb r3, r3, #0 - cmp r2, #0 - beq .LBB4_13 -.LBB4_11: - add r1, r1, r3 - mov r6, sp - add r3, r6, r3 -.LBB4_12: - ldrb r6, [r1] - add r1, r1, #1 - strb r6, [r3], #1 - subs r2, r2, #1 - bne .LBB4_12 -.LBB4_13: - mov r1, sp + @NO_APP + add r5, r3, #29 + add r4, r1, #29 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #28 + add r4, r1, #28 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #27 + add r4, r1, #27 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #26 + add r4, r1, #26 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #25 + add r4, r1, #25 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #24 + add r4, r1, #24 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #23 + add r4, r1, #23 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #22 + add r4, r1, #22 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #21 + add r4, r1, #21 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #20 + add r4, r1, #20 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #19 + add r4, r1, #19 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #18 + add r4, r1, #18 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #17 + add r4, r1, #17 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #16 + add r4, r1, #16 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #15 + add r4, r1, #15 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #14 + add r4, r1, #14 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #13 + add r4, r1, #13 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #12 + add r4, r1, #12 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #11 + add r4, r1, #11 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #10 + add r4, r1, #10 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #9 + add r4, r1, #9 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #8 + add r4, r1, #8 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #7 + orr r4, r1, #7 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #6 + orr r4, r1, #6 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #5 + orr r4, r1, #5 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #4 + orr r4, r1, #4 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #3 + orr r4, r1, #3 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #2 + orr r4, r1, #2 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + add r5, r3, #1 + orr r4, r1, #1 + mov r0, #0 + @APP + ldrb r5, [r5] + strb r5, [r4] + @NO_APP + mov r5, r1 + mov r0, #0 + @APP + ldrb r3, [r3] + strb r3, [r5] + @NO_APP + mov r0, r2 mov r2, #32 bl memcpy - mov r0, #0 - mcr p15, #0, r0, c7, c10, #5 + mcr p15, #0, r6, c7, c10, #5 add sp, sp, #32 pop {r4, r5, r6, pc} -.LBB6_3: - ldrb r5, [r2], #1 - adds r6, r6, #1 - strb r5, [r4] - add r4, r4, #1 - blo .LBB6_3 - sub r6, r3, r1 - cmp r6, #4 - blo .LBB6_8 -.LBB6_5: - sub r0, r7, r1 - add r1, sp, #32 -.LBB6_6: - sub r3, r1, r0 - ldrb r2, [r1, -r0] - sub r6, r6, #4 - ldrb r5, [r3, #1] - cmp r6, #3 - ldrb r4, [r3, #2] - ldrb r3, [r3, #3] - orr r2, r2, r5, lsl, #8 - orr r3, r4, r3, lsl, #8 - orr r2, r2, r3, lsl, #16 - str r2, [r7, -r0] - sub r0, r0, #4 - bhi .LBB6_6 - rsb r0, r0, #0 -.LBB6_8: - cmp r6, #0 - beq .LBB6_11 - add r1, sp, #32 - add r1, r1, r0 - add r0, r7, r0 -.LBB6_10: - ldrb r2, [r1], #1 - subs r6, r6, #1 - strb r2, [r0] - add r0, r0, #1 - bne .LBB6_10 -.LBB6_11: - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} -.LBB6_12: - ldrh r0, [sp, #62] - strh r0, [r7, #30] - ldrh r0, [sp, #60] - strh r0, [r7, #28] - ldrh r0, [sp, #58] - strh r0, [r7, #26] - ldrh r0, [sp, #56] - strh r0, [r7, #24] - ldrh r0, [sp, #54] - strh r0, [r7, #22] - ldrh r0, [sp, #52] - strh r0, [r7, #20] - ldrh r0, [sp, #50] - strh r0, [r7, #18] - ldrh r0, [sp, #48] - strh r0, [r7, #16] - ldrh r0, [sp, #46] - strh r0, [r7, #14] - ldrh r0, [sp, #44] - strh r0, [r7, #12] - ldrh r0, [sp, #42] - strh r0, [r7, #10] - ldrh r0, [sp, #40] - strh r0, [r7, #8] - ldrh r0, [sp, #38] - strh r0, [r7, #6] - ldrh r0, [sp, #36] - strh r0, [r7, #4] - ldrh r0, [sp, #34] - strh r0, [r7, #2] - ldrh r0, [sp, #32] - strh r0, [r7] - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} asm_test::atomic_memcpy_load_align1::read_volatile_acquire_fence: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} sub sp, sp, #76 @@ -511,275 +343,3 @@ asm_test::atomic_memcpy_load_align1::read_volatile_acquire_fence: mcr p15, #0, r0, c7, c10, #5 add sp, sp, #76 pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} -.LBB2_3: - ldrb r5, [r2], #1 - adds r6, r6, #1 - strb r5, [r4] - add r4, r4, #1 - blo .LBB2_3 - sub r6, r3, r1 - cmp r6, #4 - blo .LBB2_8 -.LBB2_5: - sub r0, r7, r1 - add r1, sp, #32 -.LBB2_6: - sub r3, r1, r0 - ldrb r2, [r1, -r0] - sub r6, r6, #4 - ldrb r5, [r3, #1] - cmp r6, #3 - ldrb r4, [r3, #2] - ldrb r3, [r3, #3] - orr r2, r2, r5, lsl, #8 - orr r3, r4, r3, lsl, #8 - orr r2, r2, r3, lsl, #16 - str r2, [r7, -r0] - sub r0, r0, #4 - bhi .LBB2_6 - rsb r0, r0, #0 -.LBB2_8: - cmp r6, #0 - beq .LBB2_11 - add r1, sp, #32 - add r1, r1, r0 - add r0, r7, r0 -.LBB2_10: - ldrb r2, [r1], #1 - subs r6, r6, #1 - strb r2, [r0] - add r0, r0, #1 - bne .LBB2_10 -.LBB2_11: - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} -.LBB2_12: - ldrb r0, [sp, #32] - strb r0, [r7] - ldrb r0, [sp, #33] - strb r0, [r7, #1] - ldrb r0, [sp, #34] - strb r0, [r7, #2] - ldrb r0, [sp, #35] - strb r0, [r7, #3] - ldrb r0, [sp, #36] - strb r0, [r7, #4] - ldrb r0, [sp, #37] - strb r0, [r7, #5] - ldrb r0, [sp, #38] - strb r0, [r7, #6] - ldrb r0, [sp, #39] - strb r0, [r7, #7] - ldrb r0, [sp, #40] - strb r0, [r7, #8] - ldrb r0, [sp, #41] - strb r0, [r7, #9] - ldrb r0, [sp, #42] - strb r0, [r7, #10] - ldrb r0, [sp, #43] - strb r0, [r7, #11] - ldrb r0, [sp, #44] - strb r0, [r7, #12] - ldrb r0, [sp, #45] - strb r0, [r7, #13] - ldrb r0, [sp, #46] - strb r0, [r7, #14] - ldrb r0, [sp, #47] - strb r0, [r7, #15] - ldrb r0, [sp, #48] - strb r0, [r7, #16] - ldrb r0, [sp, #49] - strb r0, [r7, #17] - ldrb r0, [sp, #50] - strb r0, [r7, #18] - ldrb r0, [sp, #51] - strb r0, [r7, #19] - ldrb r0, [sp, #52] - strb r0, [r7, #20] - ldrb r0, [sp, #53] - strb r0, [r7, #21] - ldrb r0, [sp, #54] - strb r0, [r7, #22] - ldrb r0, [sp, #55] - strb r0, [r7, #23] - ldrb r0, [sp, #56] - strb r0, [r7, #24] - ldrb r0, [sp, #57] - strb r0, [r7, #25] - ldrb r0, [sp, #58] - strb r0, [r7, #26] - ldrb r0, [sp, #59] - strb r0, [r7, #27] - ldrb r0, [sp, #60] - strb r0, [r7, #28] - ldrb r0, [sp, #61] - strb r0, [r7, #29] - ldrb r0, [sp, #62] - strb r0, [r7, #30] - ldrb r0, [sp, #63] - strb r0, [r7, #31] - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} -.LBB4_3: - ldrb r6, [r5] - adds r2, r2, #1 - strb r6, [r4], #1 - add r5, r5, #1 - blo .LBB4_3 - sub r2, lr, r12 - cmp r2, #4 - bhs .LBB4_8 - cmp r2, #0 - bne .LBB4_11 - b .LBB4_13 -.LBB4_6: - ldrh r2, [r1, #30] - strh r2, [sp, #30] - ldrh r2, [r1, #28] - strh r2, [sp, #28] - ldrh r2, [r1, #26] - strh r2, [sp, #26] - ldrh r2, [r1, #24] - strh r2, [sp, #24] - ldrh r2, [r1, #22] - strh r2, [sp, #22] - ldrh r2, [r1, #20] - strh r2, [sp, #20] - ldrh r2, [r1, #18] - strh r2, [sp, #18] - ldrh r2, [r1, #16] - strh r2, [sp, #16] - ldrh r2, [r1, #14] - strh r2, [sp, #14] - ldrh r2, [r1, #12] - strh r2, [sp, #12] - ldrh r2, [r1, #10] - strh r2, [sp, #10] - ldrh r2, [r1, #8] - strh r2, [sp, #8] - ldrh r2, [r1, #6] - strh r2, [sp, #6] - ldrh r2, [r1, #4] - strh r2, [sp, #4] - ldrh r2, [r1, #2] - strh r2, [sp, #2] - ldrh r1, [r1] - strh r1, [sp] - b .LBB4_13 -.LBB4_7: - mov r2, #32 -.LBB4_8: - sub r3, r1, r12 - mov r12, sp -.LBB4_9: - ldr r6, [r1, -r3] - sub r5, r12, r3 - sub r2, r2, #4 - strb r6, [r12, -r3] - sub r3, r3, #4 - cmp r2, #3 - lsr r4, r6, #24 - strb r4, [r5, #3] - lsr r4, r6, #16 - lsr r6, r6, #8 - strb r4, [r5, #2] - strb r6, [r5, #1] - bhi .LBB4_9 - rsb r3, r3, #0 - cmp r2, #0 - beq .LBB4_13 -.LBB4_11: - add r1, r1, r3 - mov r6, sp - add r3, r6, r3 -.LBB4_12: - ldrb r6, [r1] - add r1, r1, #1 - strb r6, [r3], #1 - subs r2, r2, #1 - bne .LBB4_12 -.LBB4_13: - mov r1, sp - mov r2, #32 - bl memcpy - mov r0, #0 - mcr p15, #0, r0, c7, c10, #5 - add sp, sp, #32 - pop {r4, r5, r6, pc} -.LBB6_3: - ldrb r5, [r2], #1 - adds r6, r6, #1 - strb r5, [r4] - add r4, r4, #1 - blo .LBB6_3 - sub r6, r3, r1 - cmp r6, #4 - blo .LBB6_8 -.LBB6_5: - sub r0, r7, r1 - add r1, sp, #32 -.LBB6_6: - sub r3, r1, r0 - ldrb r2, [r1, -r0] - sub r6, r6, #4 - ldrb r5, [r3, #1] - cmp r6, #3 - ldrb r4, [r3, #2] - ldrb r3, [r3, #3] - orr r2, r2, r5, lsl, #8 - orr r3, r4, r3, lsl, #8 - orr r2, r2, r3, lsl, #16 - str r2, [r7, -r0] - sub r0, r0, #4 - bhi .LBB6_6 - rsb r0, r0, #0 -.LBB6_8: - cmp r6, #0 - beq .LBB6_11 - add r1, sp, #32 - add r1, r1, r0 - add r0, r7, r0 -.LBB6_10: - ldrb r2, [r1], #1 - subs r6, r6, #1 - strb r2, [r0] - add r0, r0, #1 - bne .LBB6_10 -.LBB6_11: - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} -.LBB6_12: - ldrh r0, [sp, #62] - strh r0, [r7, #30] - ldrh r0, [sp, #60] - strh r0, [r7, #28] - ldrh r0, [sp, #58] - strh r0, [r7, #26] - ldrh r0, [sp, #56] - strh r0, [r7, #24] - ldrh r0, [sp, #54] - strh r0, [r7, #22] - ldrh r0, [sp, #52] - strh r0, [r7, #20] - ldrh r0, [sp, #50] - strh r0, [r7, #18] - ldrh r0, [sp, #48] - strh r0, [r7, #16] - ldrh r0, [sp, #46] - strh r0, [r7, #14] - ldrh r0, [sp, #44] - strh r0, [r7, #12] - ldrh r0, [sp, #42] - strh r0, [r7, #10] - ldrh r0, [sp, #40] - strh r0, [r7, #8] - ldrh r0, [sp, #38] - strh r0, [r7, #6] - ldrh r0, [sp, #36] - strh r0, [r7, #4] - ldrh r0, [sp, #34] - strh r0, [r7, #2] - ldrh r0, [sp, #32] - strh r0, [r7] - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} diff --git a/tests/asm-test/asm/arm-unknown-linux-gnueabihf/atomic_memcpy_load_align16 b/tests/asm-test/asm/arm-unknown-linux-gnueabihf/atomic_memcpy_load_align16 index 4b8f777..d2226c6 100644 --- a/tests/asm-test/asm/arm-unknown-linux-gnueabihf/atomic_memcpy_load_align16 +++ b/tests/asm-test/asm/arm-unknown-linux-gnueabihf/atomic_memcpy_load_align16 @@ -1,23 +1,73 @@ asm_test::atomic_memcpy_load_align16::acquire: - push {r4, r5, r6, lr} - ldr r12, [r1, #28] - ldr lr, [r1, #24] - ldr r2, [r1, #20] - ldr r3, [r1, #16] - ldr r4, [r1, #12] - ldr r5, [r1, #8] - ldr r6, [r1, #4] - ldr r1, [r1] - str r5, [r0, #8] - stm r0, {r1, r6} - str r4, [r0, #12] - str r3, [r0, #16] - str r2, [r0, #20] - str lr, [r0, #24] - str r12, [r0, #28] + push {r4, r5, r11, lr} + add r11, sp, #8 + sub sp, sp, #32 + bic sp, sp, #15 + mov r5, r0 + mov r3, sp + add r4, r3, #28 mov r0, #0 - mcr p15, #0, r0, c7, c10, #5 - pop {r4, r5, r6, pc} + add lr, r1, #28 + @APP + ldr r2, [lr] + str r2, [r4] + @NO_APP + mov r12, #0 + add r2, r1, #24 + add r4, r3, #24 + mov r0, #0 + @APP + ldr r2, [r2] + str r2, [r4] + @NO_APP + add r2, r1, #20 + add r4, r3, #20 + mov r0, #0 + @APP + ldr r2, [r2] + str r2, [r4] + @NO_APP + add r2, r1, #16 + add r4, r3, #16 + mov r0, #0 + @APP + ldr r2, [r2] + str r2, [r4] + @NO_APP + add r2, r1, #12 + orr r4, r3, #12 + mov r0, #0 + @APP + ldr r2, [r2] + str r2, [r4] + @NO_APP + add r2, r1, #8 + orr r4, r3, #8 + mov r0, #0 + @APP + ldr r2, [r2] + str r2, [r4] + @NO_APP + add r2, r1, #4 + orr r4, r3, #4 + mov r0, #0 + @APP + ldr r2, [r2] + str r2, [r4] + @NO_APP + mov r2, r3 + mov r0, #0 + @APP + ldr r1, [r1] + str r1, [r2] + @NO_APP + ldm r3!, {r0, r1, r2, r4} + stm r5!, {r0, r1, r2, r4} + ldm r3, {r0, r1, r2, r4} + stm r5, {r0, r1, r2, r4} + mcr p15, #0, r12, c7, c10, #5 + sub sp, r11, #8 + pop {r4, r5, r11, pc} asm_test::atomic_memcpy_load_align16::read_volatile_acquire_fence: push {r4, r5, r6, r7, r8, r9, r11, lr} ldrd r8, r9, [r1] diff --git a/tests/asm-test/asm/arm-unknown-linux-gnueabihf/atomic_memcpy_load_align2 b/tests/asm-test/asm/arm-unknown-linux-gnueabihf/atomic_memcpy_load_align2 index 606e6c5..962e8c8 100644 --- a/tests/asm-test/asm/arm-unknown-linux-gnueabihf/atomic_memcpy_load_align2 +++ b/tests/asm-test/asm/arm-unknown-linux-gnueabihf/atomic_memcpy_load_align2 @@ -1,180 +1,127 @@ asm_test::atomic_memcpy_load_align2::acquire: push {r4, r5, r6, lr} sub sp, sp, #32 - add r2, r1, #3 - bic r12, r2, #3 - sub r3, r12, r1 - cmp r3, #32 - bhi .LBB4_6 - cmp r3, #0 - beq .LBB4_7 - sub r2, r1, r12 - add lr, r1, #32 - mov r4, sp - mov r5, r1 -.LBB4_3: - ldrb r6, [r5] - adds r2, r2, #1 - strb r6, [r4], #1 - add r5, r5, #1 - blo .LBB4_3 - sub r2, lr, r12 - cmp r2, #4 - bhs .LBB4_8 - cmp r2, #0 - bne .LBB4_11 - b .LBB4_13 -.LBB4_6: - ldrh r2, [r1, #30] - strh r2, [sp, #30] - ldrh r2, [r1, #28] - strh r2, [sp, #28] - ldrh r2, [r1, #26] - strh r2, [sp, #26] - ldrh r2, [r1, #24] - strh r2, [sp, #24] - ldrh r2, [r1, #22] - strh r2, [sp, #22] - ldrh r2, [r1, #20] - strh r2, [sp, #20] - ldrh r2, [r1, #18] - strh r2, [sp, #18] - ldrh r2, [r1, #16] - strh r2, [sp, #16] - ldrh r2, [r1, #14] - strh r2, [sp, #14] - ldrh r2, [r1, #12] - strh r2, [sp, #12] - ldrh r2, [r1, #10] - strh r2, [sp, #10] - ldrh r2, [r1, #8] - strh r2, [sp, #8] - ldrh r2, [r1, #6] - strh r2, [sp, #6] - ldrh r2, [r1, #4] - strh r2, [sp, #4] - ldrh r2, [r1, #2] - strh r2, [sp, #2] - ldrh r1, [r1] - strh r1, [sp] - b .LBB4_13 -.LBB4_7: - mov r2, #32 -.LBB4_8: - sub r3, r1, r12 - mov r12, sp -.LBB4_9: - ldr r6, [r1, -r3] - sub r5, r12, r3 - sub r2, r2, #4 - strb r6, [r12, -r3] - sub r3, r3, #4 - cmp r2, #3 - lsr r4, r6, #24 - strb r4, [r5, #3] - lsr r4, r6, #16 - lsr r6, r6, #8 - strb r4, [r5, #2] - strb r6, [r5, #1] - bhi .LBB4_9 - rsb r3, r3, #0 - cmp r2, #0 - beq .LBB4_13 -.LBB4_11: - add r1, r1, r3 - mov r6, sp - add r3, r6, r3 -.LBB4_12: - ldrb r6, [r1] - add r1, r1, #1 - strb r6, [r3], #1 - subs r2, r2, #1 - bne .LBB4_12 -.LBB4_13: + mov r2, r0 + mov r3, r1 + add r4, r1, #30 mov r1, sp + add r5, r1, #30 + mov r0, #0 + @APP + ldrh r4, [r4] + strh r4, [r5] + @NO_APP + mov r6, #0 + add r5, r3, #28 + add r4, r1, #28 + mov r0, #0 + @APP + ldrh r5, [r5] + strh r5, [r4] + @NO_APP + add r5, r3, #26 + add r4, r1, #26 + mov r0, #0 + @APP + ldrh r5, [r5] + strh r5, [r4] + @NO_APP + add r5, r3, #24 + add r4, r1, #24 + mov r0, #0 + @APP + ldrh r5, [r5] + strh r5, [r4] + @NO_APP + add r5, r3, #22 + add r4, r1, #22 + mov r0, #0 + @APP + ldrh r5, [r5] + strh r5, [r4] + @NO_APP + add r5, r3, #20 + add r4, r1, #20 + mov r0, #0 + @APP + ldrh r5, [r5] + strh r5, [r4] + @NO_APP + add r5, r3, #18 + add r4, r1, #18 + mov r0, #0 + @APP + ldrh r5, [r5] + strh r5, [r4] + @NO_APP + add r5, r3, #16 + add r4, r1, #16 + mov r0, #0 + @APP + ldrh r5, [r5] + strh r5, [r4] + @NO_APP + add r5, r3, #14 + add r4, r1, #14 + mov r0, #0 + @APP + ldrh r5, [r5] + strh r5, [r4] + @NO_APP + add r5, r3, #12 + add r4, r1, #12 + mov r0, #0 + @APP + ldrh r5, [r5] + strh r5, [r4] + @NO_APP + add r5, r3, #10 + add r4, r1, #10 + mov r0, #0 + @APP + ldrh r5, [r5] + strh r5, [r4] + @NO_APP + add r5, r3, #8 + add r4, r1, #8 + mov r0, #0 + @APP + ldrh r5, [r5] + strh r5, [r4] + @NO_APP + add r5, r3, #6 + orr r4, r1, #6 + mov r0, #0 + @APP + ldrh r5, [r5] + strh r5, [r4] + @NO_APP + add r5, r3, #4 + orr r4, r1, #4 + mov r0, #0 + @APP + ldrh r5, [r5] + strh r5, [r4] + @NO_APP + add r5, r3, #2 + orr r4, r1, #2 + mov r0, #0 + @APP + ldrh r5, [r5] + strh r5, [r4] + @NO_APP + mov r5, r1 + mov r0, #0 + @APP + ldrh r3, [r3] + strh r3, [r5] + @NO_APP + mov r0, r2 mov r2, #32 bl memcpy - mov r0, #0 - mcr p15, #0, r0, c7, c10, #5 + mcr p15, #0, r6, c7, c10, #5 add sp, sp, #32 pop {r4, r5, r6, pc} -.LBB6_3: - ldrb r5, [r2], #1 - adds r6, r6, #1 - strb r5, [r4] - add r4, r4, #1 - blo .LBB6_3 - sub r6, r3, r1 - cmp r6, #4 - blo .LBB6_8 -.LBB6_5: - sub r0, r7, r1 - add r1, sp, #32 -.LBB6_6: - sub r3, r1, r0 - ldrb r2, [r1, -r0] - sub r6, r6, #4 - ldrb r5, [r3, #1] - cmp r6, #3 - ldrb r4, [r3, #2] - ldrb r3, [r3, #3] - orr r2, r2, r5, lsl, #8 - orr r3, r4, r3, lsl, #8 - orr r2, r2, r3, lsl, #16 - str r2, [r7, -r0] - sub r0, r0, #4 - bhi .LBB6_6 - rsb r0, r0, #0 -.LBB6_8: - cmp r6, #0 - beq .LBB6_11 - add r1, sp, #32 - add r1, r1, r0 - add r0, r7, r0 -.LBB6_10: - ldrb r2, [r1], #1 - subs r6, r6, #1 - strb r2, [r0] - add r0, r0, #1 - bne .LBB6_10 -.LBB6_11: - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} -.LBB6_12: - ldrh r0, [sp, #62] - strh r0, [r7, #30] - ldrh r0, [sp, #60] - strh r0, [r7, #28] - ldrh r0, [sp, #58] - strh r0, [r7, #26] - ldrh r0, [sp, #56] - strh r0, [r7, #24] - ldrh r0, [sp, #54] - strh r0, [r7, #22] - ldrh r0, [sp, #52] - strh r0, [r7, #20] - ldrh r0, [sp, #50] - strh r0, [r7, #18] - ldrh r0, [sp, #48] - strh r0, [r7, #16] - ldrh r0, [sp, #46] - strh r0, [r7, #14] - ldrh r0, [sp, #44] - strh r0, [r7, #12] - ldrh r0, [sp, #42] - strh r0, [r7, #10] - ldrh r0, [sp, #40] - strh r0, [r7, #8] - ldrh r0, [sp, #38] - strh r0, [r7, #6] - ldrh r0, [sp, #36] - strh r0, [r7, #4] - ldrh r0, [sp, #34] - strh r0, [r7, #2] - ldrh r0, [sp, #32] - strh r0, [r7] - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} asm_test::atomic_memcpy_load_align2::read_volatile_acquire_fence: push {r4, r5, r11, lr} ldrh r2, [r1, #28] @@ -212,80 +159,3 @@ asm_test::atomic_memcpy_load_align2::read_volatile_acquire_fence: mov r0, #0 mcr p15, #0, r0, c7, c10, #5 pop {r4, r5, r11, pc} -.LBB6_3: - ldrb r5, [r2], #1 - adds r6, r6, #1 - strb r5, [r4] - add r4, r4, #1 - blo .LBB6_3 - sub r6, r3, r1 - cmp r6, #4 - blo .LBB6_8 -.LBB6_5: - sub r0, r7, r1 - add r1, sp, #32 -.LBB6_6: - sub r3, r1, r0 - ldrb r2, [r1, -r0] - sub r6, r6, #4 - ldrb r5, [r3, #1] - cmp r6, #3 - ldrb r4, [r3, #2] - ldrb r3, [r3, #3] - orr r2, r2, r5, lsl, #8 - orr r3, r4, r3, lsl, #8 - orr r2, r2, r3, lsl, #16 - str r2, [r7, -r0] - sub r0, r0, #4 - bhi .LBB6_6 - rsb r0, r0, #0 -.LBB6_8: - cmp r6, #0 - beq .LBB6_11 - add r1, sp, #32 - add r1, r1, r0 - add r0, r7, r0 -.LBB6_10: - ldrb r2, [r1], #1 - subs r6, r6, #1 - strb r2, [r0] - add r0, r0, #1 - bne .LBB6_10 -.LBB6_11: - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} -.LBB6_12: - ldrh r0, [sp, #62] - strh r0, [r7, #30] - ldrh r0, [sp, #60] - strh r0, [r7, #28] - ldrh r0, [sp, #58] - strh r0, [r7, #26] - ldrh r0, [sp, #56] - strh r0, [r7, #24] - ldrh r0, [sp, #54] - strh r0, [r7, #22] - ldrh r0, [sp, #52] - strh r0, [r7, #20] - ldrh r0, [sp, #50] - strh r0, [r7, #18] - ldrh r0, [sp, #48] - strh r0, [r7, #16] - ldrh r0, [sp, #46] - strh r0, [r7, #14] - ldrh r0, [sp, #44] - strh r0, [r7, #12] - ldrh r0, [sp, #42] - strh r0, [r7, #10] - ldrh r0, [sp, #40] - strh r0, [r7, #8] - ldrh r0, [sp, #38] - strh r0, [r7, #6] - ldrh r0, [sp, #36] - strh r0, [r7, #4] - ldrh r0, [sp, #34] - strh r0, [r7, #2] - ldrh r0, [sp, #32] - strh r0, [r7] - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} diff --git a/tests/asm-test/asm/arm-unknown-linux-gnueabihf/atomic_memcpy_load_align4 b/tests/asm-test/asm/arm-unknown-linux-gnueabihf/atomic_memcpy_load_align4 index 3c2bf50..42b79b3 100644 --- a/tests/asm-test/asm/arm-unknown-linux-gnueabihf/atomic_memcpy_load_align4 +++ b/tests/asm-test/asm/arm-unknown-linux-gnueabihf/atomic_memcpy_load_align4 @@ -1,23 +1,71 @@ asm_test::atomic_memcpy_load_align4::acquire: - push {r4, r5, r6, lr} - ldr r12, [r1, #28] - ldr lr, [r1, #24] - ldr r2, [r1, #20] - ldr r3, [r1, #16] - ldr r4, [r1, #12] - ldr r5, [r1, #8] - ldr r6, [r1, #4] - ldr r1, [r1] - str r5, [r0, #8] - stm r0, {r1, r6} - str r4, [r0, #12] - str r3, [r0, #16] - str r2, [r0, #20] - str lr, [r0, #24] - str r12, [r0, #28] + push {r4, r5, r11, lr} + sub sp, sp, #32 + mov r5, r0 + mov r3, sp + add r4, r3, #28 mov r0, #0 - mcr p15, #0, r0, c7, c10, #5 - pop {r4, r5, r6, pc} + add lr, r1, #28 + @APP + ldr r2, [lr] + str r2, [r4] + @NO_APP + mov r12, #0 + add r2, r1, #24 + add r4, r3, #24 + mov r0, #0 + @APP + ldr r2, [r2] + str r2, [r4] + @NO_APP + add r2, r1, #20 + add r4, r3, #20 + mov r0, #0 + @APP + ldr r2, [r2] + str r2, [r4] + @NO_APP + add r2, r1, #16 + add r4, r3, #16 + mov r0, #0 + @APP + ldr r2, [r2] + str r2, [r4] + @NO_APP + add r2, r1, #12 + add r4, r3, #12 + mov r0, #0 + @APP + ldr r2, [r2] + str r2, [r4] + @NO_APP + add r2, r1, #8 + add r4, r3, #8 + mov r0, #0 + @APP + ldr r2, [r2] + str r2, [r4] + @NO_APP + add r2, r1, #4 + orr r4, r3, #4 + mov r0, #0 + @APP + ldr r2, [r2] + str r2, [r4] + @NO_APP + mov r2, r3 + mov r0, #0 + @APP + ldr r1, [r1] + str r1, [r2] + @NO_APP + ldm r3!, {r0, r1, r2, r4} + stm r5!, {r0, r1, r2, r4} + ldm r3, {r0, r1, r2, r4} + stm r5, {r0, r1, r2, r4} + mcr p15, #0, r12, c7, c10, #5 + add sp, sp, #32 + pop {r4, r5, r11, pc} asm_test::atomic_memcpy_load_align4::read_volatile_acquire_fence: push {r4, r5, r6, lr} ldr lr, [r1, #20] diff --git a/tests/asm-test/asm/arm-unknown-linux-gnueabihf/atomic_memcpy_load_align8 b/tests/asm-test/asm/arm-unknown-linux-gnueabihf/atomic_memcpy_load_align8 index 630e8fa..ce3bda7 100644 --- a/tests/asm-test/asm/arm-unknown-linux-gnueabihf/atomic_memcpy_load_align8 +++ b/tests/asm-test/asm/arm-unknown-linux-gnueabihf/atomic_memcpy_load_align8 @@ -1,23 +1,71 @@ asm_test::atomic_memcpy_load_align8::acquire: - push {r4, r5, r6, lr} - ldr r12, [r1, #28] - ldr lr, [r1, #24] - ldr r2, [r1, #20] - ldr r3, [r1, #16] - ldr r4, [r1, #12] - ldr r5, [r1, #8] - ldr r6, [r1, #4] - ldr r1, [r1] - str r5, [r0, #8] - stm r0, {r1, r6} - str r4, [r0, #12] - str r3, [r0, #16] - str r2, [r0, #20] - str lr, [r0, #24] - str r12, [r0, #28] + push {r4, r5, r11, lr} + sub sp, sp, #32 + mov r5, r0 + mov r3, sp + add r4, r3, #28 mov r0, #0 - mcr p15, #0, r0, c7, c10, #5 - pop {r4, r5, r6, pc} + add lr, r1, #28 + @APP + ldr r2, [lr] + str r2, [r4] + @NO_APP + mov r12, #0 + add r2, r1, #24 + add r4, r3, #24 + mov r0, #0 + @APP + ldr r2, [r2] + str r2, [r4] + @NO_APP + add r2, r1, #20 + add r4, r3, #20 + mov r0, #0 + @APP + ldr r2, [r2] + str r2, [r4] + @NO_APP + add r2, r1, #16 + add r4, r3, #16 + mov r0, #0 + @APP + ldr r2, [r2] + str r2, [r4] + @NO_APP + add r2, r1, #12 + add r4, r3, #12 + mov r0, #0 + @APP + ldr r2, [r2] + str r2, [r4] + @NO_APP + add r2, r1, #8 + add r4, r3, #8 + mov r0, #0 + @APP + ldr r2, [r2] + str r2, [r4] + @NO_APP + add r2, r1, #4 + orr r4, r3, #4 + mov r0, #0 + @APP + ldr r2, [r2] + str r2, [r4] + @NO_APP + mov r2, r3 + mov r0, #0 + @APP + ldr r1, [r1] + str r1, [r2] + @NO_APP + ldm r3!, {r0, r1, r2, r4} + stm r5!, {r0, r1, r2, r4} + ldm r3, {r0, r1, r2, r4} + stm r5, {r0, r1, r2, r4} + mcr p15, #0, r12, c7, c10, #5 + add sp, sp, #32 + pop {r4, r5, r11, pc} asm_test::atomic_memcpy_load_align8::read_volatile_acquire_fence: push {r4, r5, r6, r7, r8, r9, r11, lr} ldrd r8, r9, [r1] diff --git a/tests/asm-test/asm/arm-unknown-linux-gnueabihf/atomic_memcpy_store_align1 b/tests/asm-test/asm/arm-unknown-linux-gnueabihf/atomic_memcpy_store_align1 index a5e149b..8a068b4 100644 --- a/tests/asm-test/asm/arm-unknown-linux-gnueabihf/atomic_memcpy_store_align1 +++ b/tests/asm-test/asm/arm-unknown-linux-gnueabihf/atomic_memcpy_store_align1 @@ -1,302 +1,243 @@ asm_test::atomic_memcpy_store_align1::release: - push {r4, r5, r6, r7, r8, lr} + push {r4, r5, r6, lr} sub sp, sp, #64 - mov r8, sp - mov r7, r0 - mov r0, r8 - mov r2, #32 - mov r6, #32 - bl memcpy - mov r0, #0 - add r2, sp, #32 - mcr p15, #0, r0, c7, c10, #5 - ldm r8!, {r1, r3, r4, r5} - mov r0, r2 - stm r0!, {r1, r3, r4, r5} - ldm r8, {r1, r3, r4, r5} - stm r0, {r1, r3, r4, r5} - add r0, r7, #3 - bic r1, r0, #3 - sub r0, r1, r7 - cmp r0, #33 - bhs .LBB2_12 - cmp r0, #0 - beq .LBB2_5 - sub r6, r7, r1 - add r3, r7, #32 - mov r4, r7 -.LBB2_3: - ldrb r5, [r2], #1 - adds r6, r6, #1 - strb r5, [r4] - add r4, r4, #1 - blo .LBB2_3 - sub r6, r3, r1 - cmp r6, #4 - blo .LBB2_8 -.LBB2_5: - sub r0, r7, r1 - add r1, sp, #32 -.LBB2_6: - sub r3, r1, r0 - ldrb r2, [r1, -r0] - sub r6, r6, #4 - ldrb r5, [r3, #1] - cmp r6, #3 - ldrb r4, [r3, #2] - ldrb r3, [r3, #3] - orr r2, r2, r5, lsl, #8 - orr r3, r4, r3, lsl, #8 - orr r2, r2, r3, lsl, #16 - str r2, [r7, -r0] - sub r0, r0, #4 - bhi .LBB2_6 - rsb r0, r0, #0 -.LBB2_8: - cmp r6, #0 - beq .LBB2_11 - add r1, sp, #32 - add r1, r1, r0 - add r0, r7, r0 -.LBB2_10: - ldrb r2, [r1], #1 - subs r6, r6, #1 - strb r2, [r0] - add r0, r0, #1 - bne .LBB2_10 -.LBB2_11: - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} -.LBB2_12: - ldrb r0, [sp, #32] - strb r0, [r7] - ldrb r0, [sp, #33] - strb r0, [r7, #1] - ldrb r0, [sp, #34] - strb r0, [r7, #2] - ldrb r0, [sp, #35] - strb r0, [r7, #3] - ldrb r0, [sp, #36] - strb r0, [r7, #4] - ldrb r0, [sp, #37] - strb r0, [r7, #5] - ldrb r0, [sp, #38] - strb r0, [r7, #6] - ldrb r0, [sp, #39] - strb r0, [r7, #7] - ldrb r0, [sp, #40] - strb r0, [r7, #8] - ldrb r0, [sp, #41] - strb r0, [r7, #9] - ldrb r0, [sp, #42] - strb r0, [r7, #10] - ldrb r0, [sp, #43] - strb r0, [r7, #11] - ldrb r0, [sp, #44] - strb r0, [r7, #12] - ldrb r0, [sp, #45] - strb r0, [r7, #13] - ldrb r0, [sp, #46] - strb r0, [r7, #14] - ldrb r0, [sp, #47] - strb r0, [r7, #15] - ldrb r0, [sp, #48] - strb r0, [r7, #16] - ldrb r0, [sp, #49] - strb r0, [r7, #17] - ldrb r0, [sp, #50] - strb r0, [r7, #18] - ldrb r0, [sp, #51] - strb r0, [r7, #19] - ldrb r0, [sp, #52] - strb r0, [r7, #20] - ldrb r0, [sp, #53] - strb r0, [r7, #21] - ldrb r0, [sp, #54] - strb r0, [r7, #22] - ldrb r0, [sp, #55] - strb r0, [r7, #23] - ldrb r0, [sp, #56] - strb r0, [r7, #24] - ldrb r0, [sp, #57] - strb r0, [r7, #25] - ldrb r0, [sp, #58] - strb r0, [r7, #26] - ldrb r0, [sp, #59] - strb r0, [r7, #27] - ldrb r0, [sp, #60] - strb r0, [r7, #28] - ldrb r0, [sp, #61] - strb r0, [r7, #29] - ldrb r0, [sp, #62] - strb r0, [r7, #30] - ldrb r0, [sp, #63] - strb r0, [r7, #31] - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} -.LBB4_3: - ldrb r6, [r5] - adds r2, r2, #1 - strb r6, [r4], #1 - add r5, r5, #1 - blo .LBB4_3 - sub r2, lr, r12 - cmp r2, #4 - bhs .LBB4_8 - cmp r2, #0 - bne .LBB4_11 - b .LBB4_13 -.LBB4_6: - ldrh r2, [r1, #30] - strh r2, [sp, #30] - ldrh r2, [r1, #28] - strh r2, [sp, #28] - ldrh r2, [r1, #26] - strh r2, [sp, #26] - ldrh r2, [r1, #24] - strh r2, [sp, #24] - ldrh r2, [r1, #22] - strh r2, [sp, #22] - ldrh r2, [r1, #20] - strh r2, [sp, #20] - ldrh r2, [r1, #18] - strh r2, [sp, #18] - ldrh r2, [r1, #16] - strh r2, [sp, #16] - ldrh r2, [r1, #14] - strh r2, [sp, #14] - ldrh r2, [r1, #12] - strh r2, [sp, #12] - ldrh r2, [r1, #10] - strh r2, [sp, #10] - ldrh r2, [r1, #8] - strh r2, [sp, #8] - ldrh r2, [r1, #6] - strh r2, [sp, #6] - ldrh r2, [r1, #4] - strh r2, [sp, #4] - ldrh r2, [r1, #2] - strh r2, [sp, #2] - ldrh r1, [r1] - strh r1, [sp] - b .LBB4_13 -.LBB4_7: - mov r2, #32 -.LBB4_8: - sub r3, r1, r12 - mov r12, sp -.LBB4_9: - ldr r6, [r1, -r3] - sub r5, r12, r3 - sub r2, r2, #4 - strb r6, [r12, -r3] - sub r3, r3, #4 - cmp r2, #3 - lsr r4, r6, #24 - strb r4, [r5, #3] - lsr r4, r6, #16 - lsr r6, r6, #8 - strb r4, [r5, #2] - strb r6, [r5, #1] - bhi .LBB4_9 - rsb r3, r3, #0 - cmp r2, #0 - beq .LBB4_13 -.LBB4_11: - add r1, r1, r3 mov r6, sp - add r3, r6, r3 -.LBB4_12: - ldrb r6, [r1] - add r1, r1, #1 - strb r6, [r3], #1 - subs r2, r2, #1 - bne .LBB4_12 -.LBB4_13: - mov r1, sp + mov r4, r0 + mov r0, r6 mov r2, #32 bl memcpy mov r0, #0 + add r12, sp, #32 mcr p15, #0, r0, c7, c10, #5 - add sp, sp, #32 - pop {r4, r5, r6, pc} -.LBB6_3: - ldrb r5, [r2], #1 - adds r6, r6, #1 - strb r5, [r4] - add r4, r4, #1 - blo .LBB6_3 - sub r6, r3, r1 - cmp r6, #4 - blo .LBB6_8 -.LBB6_5: - sub r0, r7, r1 - add r1, sp, #32 -.LBB6_6: - sub r3, r1, r0 - ldrb r2, [r1, -r0] - sub r6, r6, #4 - ldrb r5, [r3, #1] - cmp r6, #3 - ldrb r4, [r3, #2] - ldrb r3, [r3, #3] - orr r2, r2, r5, lsl, #8 - orr r3, r4, r3, lsl, #8 - orr r2, r2, r3, lsl, #16 - str r2, [r7, -r0] - sub r0, r0, #4 - bhi .LBB6_6 - rsb r0, r0, #0 -.LBB6_8: - cmp r6, #0 - beq .LBB6_11 - add r1, sp, #32 - add r1, r1, r0 - add r0, r7, r0 -.LBB6_10: - ldrb r2, [r1], #1 - subs r6, r6, #1 - strb r2, [r0] - add r0, r0, #1 - bne .LBB6_10 -.LBB6_11: - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} -.LBB6_12: - ldrh r0, [sp, #62] - strh r0, [r7, #30] - ldrh r0, [sp, #60] - strh r0, [r7, #28] - ldrh r0, [sp, #58] - strh r0, [r7, #26] - ldrh r0, [sp, #56] - strh r0, [r7, #24] - ldrh r0, [sp, #54] - strh r0, [r7, #22] - ldrh r0, [sp, #52] - strh r0, [r7, #20] - ldrh r0, [sp, #50] - strh r0, [r7, #18] - ldrh r0, [sp, #48] - strh r0, [r7, #16] - ldrh r0, [sp, #46] - strh r0, [r7, #14] - ldrh r0, [sp, #44] - strh r0, [r7, #12] - ldrh r0, [sp, #42] - strh r0, [r7, #10] - ldrh r0, [sp, #40] - strh r0, [r7, #8] - ldrh r0, [sp, #38] - strh r0, [r7, #6] - ldrh r0, [sp, #36] - strh r0, [r7, #4] - ldrh r0, [sp, #34] - strh r0, [r7, #2] - ldrh r0, [sp, #32] - strh r0, [r7] + ldm r6!, {r1, r2, r3, r5} + mov r0, r12 + stm r0!, {r1, r2, r3, r5} + ldm r6, {r1, r2, r3, r5} + stm r0, {r1, r2, r3, r5} + add r1, r4, #31 + add r2, r12, #31 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #30 + add r2, r12, #30 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #29 + add r2, r12, #29 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #28 + add r2, r12, #28 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #27 + add r2, r12, #27 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #26 + add r2, r12, #26 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #25 + add r2, r12, #25 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #24 + add r2, r12, #24 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #23 + add r2, r12, #23 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #22 + add r2, r12, #22 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #21 + add r2, r12, #21 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #20 + add r2, r12, #20 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #19 + add r2, r12, #19 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #18 + add r2, r12, #18 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #17 + add r2, r12, #17 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #16 + add r2, r12, #16 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #15 + add r2, r12, #15 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #14 + add r2, r12, #14 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #13 + add r2, r12, #13 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #12 + add r2, r12, #12 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #11 + add r2, r12, #11 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #10 + add r2, r12, #10 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #9 + add r2, r12, #9 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #8 + add r2, r12, #8 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #7 + orr r2, r12, #7 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #6 + orr r2, r12, #6 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #5 + orr r2, r12, #5 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #4 + orr r2, r12, #4 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #3 + orr r2, r12, #3 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #2 + orr r2, r12, #2 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + add r1, r4, #1 + orr r2, r12, #1 + mov r0, #0 + @APP + ldrb r2, [r2] + strb r2, [r1] + @NO_APP + mov r0, #0 + @APP + ldrb r1, [r12] + strb r1, [r4] + @NO_APP add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} + pop {r4, r5, r6, pc} asm_test::atomic_memcpy_store_align1::write_volatile_release_fence: push {r4, r5, r11, lr} sub sp, sp, #32 @@ -313,166 +254,3 @@ asm_test::atomic_memcpy_store_align1::write_volatile_release_fence: bl memcpy add sp, sp, #32 pop {r4, r5, r11, pc} -.LBB4_3: - ldrb r6, [r5] - adds r2, r2, #1 - strb r6, [r4], #1 - add r5, r5, #1 - blo .LBB4_3 - sub r2, lr, r12 - cmp r2, #4 - bhs .LBB4_8 - cmp r2, #0 - bne .LBB4_11 - b .LBB4_13 -.LBB4_6: - ldrh r2, [r1, #30] - strh r2, [sp, #30] - ldrh r2, [r1, #28] - strh r2, [sp, #28] - ldrh r2, [r1, #26] - strh r2, [sp, #26] - ldrh r2, [r1, #24] - strh r2, [sp, #24] - ldrh r2, [r1, #22] - strh r2, [sp, #22] - ldrh r2, [r1, #20] - strh r2, [sp, #20] - ldrh r2, [r1, #18] - strh r2, [sp, #18] - ldrh r2, [r1, #16] - strh r2, [sp, #16] - ldrh r2, [r1, #14] - strh r2, [sp, #14] - ldrh r2, [r1, #12] - strh r2, [sp, #12] - ldrh r2, [r1, #10] - strh r2, [sp, #10] - ldrh r2, [r1, #8] - strh r2, [sp, #8] - ldrh r2, [r1, #6] - strh r2, [sp, #6] - ldrh r2, [r1, #4] - strh r2, [sp, #4] - ldrh r2, [r1, #2] - strh r2, [sp, #2] - ldrh r1, [r1] - strh r1, [sp] - b .LBB4_13 -.LBB4_7: - mov r2, #32 -.LBB4_8: - sub r3, r1, r12 - mov r12, sp -.LBB4_9: - ldr r6, [r1, -r3] - sub r5, r12, r3 - sub r2, r2, #4 - strb r6, [r12, -r3] - sub r3, r3, #4 - cmp r2, #3 - lsr r4, r6, #24 - strb r4, [r5, #3] - lsr r4, r6, #16 - lsr r6, r6, #8 - strb r4, [r5, #2] - strb r6, [r5, #1] - bhi .LBB4_9 - rsb r3, r3, #0 - cmp r2, #0 - beq .LBB4_13 -.LBB4_11: - add r1, r1, r3 - mov r6, sp - add r3, r6, r3 -.LBB4_12: - ldrb r6, [r1] - add r1, r1, #1 - strb r6, [r3], #1 - subs r2, r2, #1 - bne .LBB4_12 -.LBB4_13: - mov r1, sp - mov r2, #32 - bl memcpy - mov r0, #0 - mcr p15, #0, r0, c7, c10, #5 - add sp, sp, #32 - pop {r4, r5, r6, pc} -.LBB6_3: - ldrb r5, [r2], #1 - adds r6, r6, #1 - strb r5, [r4] - add r4, r4, #1 - blo .LBB6_3 - sub r6, r3, r1 - cmp r6, #4 - blo .LBB6_8 -.LBB6_5: - sub r0, r7, r1 - add r1, sp, #32 -.LBB6_6: - sub r3, r1, r0 - ldrb r2, [r1, -r0] - sub r6, r6, #4 - ldrb r5, [r3, #1] - cmp r6, #3 - ldrb r4, [r3, #2] - ldrb r3, [r3, #3] - orr r2, r2, r5, lsl, #8 - orr r3, r4, r3, lsl, #8 - orr r2, r2, r3, lsl, #16 - str r2, [r7, -r0] - sub r0, r0, #4 - bhi .LBB6_6 - rsb r0, r0, #0 -.LBB6_8: - cmp r6, #0 - beq .LBB6_11 - add r1, sp, #32 - add r1, r1, r0 - add r0, r7, r0 -.LBB6_10: - ldrb r2, [r1], #1 - subs r6, r6, #1 - strb r2, [r0] - add r0, r0, #1 - bne .LBB6_10 -.LBB6_11: - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} -.LBB6_12: - ldrh r0, [sp, #62] - strh r0, [r7, #30] - ldrh r0, [sp, #60] - strh r0, [r7, #28] - ldrh r0, [sp, #58] - strh r0, [r7, #26] - ldrh r0, [sp, #56] - strh r0, [r7, #24] - ldrh r0, [sp, #54] - strh r0, [r7, #22] - ldrh r0, [sp, #52] - strh r0, [r7, #20] - ldrh r0, [sp, #50] - strh r0, [r7, #18] - ldrh r0, [sp, #48] - strh r0, [r7, #16] - ldrh r0, [sp, #46] - strh r0, [r7, #14] - ldrh r0, [sp, #44] - strh r0, [r7, #12] - ldrh r0, [sp, #42] - strh r0, [r7, #10] - ldrh r0, [sp, #40] - strh r0, [r7, #8] - ldrh r0, [sp, #38] - strh r0, [r7, #6] - ldrh r0, [sp, #36] - strh r0, [r7, #4] - ldrh r0, [sp, #34] - strh r0, [r7, #2] - ldrh r0, [sp, #32] - strh r0, [r7] - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} diff --git a/tests/asm-test/asm/arm-unknown-linux-gnueabihf/atomic_memcpy_store_align16 b/tests/asm-test/asm/arm-unknown-linux-gnueabihf/atomic_memcpy_store_align16 index 08efa6d..edc2621 100644 --- a/tests/asm-test/asm/arm-unknown-linux-gnueabihf/atomic_memcpy_store_align16 +++ b/tests/asm-test/asm/arm-unknown-linux-gnueabihf/atomic_memcpy_store_align16 @@ -1,20 +1,79 @@ asm_test::atomic_memcpy_store_align16::release: - push {r4, r5, r6, r7, r11, lr} - add r6, r1, #8 - ldm r1, {r12, lr} - mov r7, #0 - ldm r6, {r2, r3, r4, r5, r6} - ldr r1, [r1, #28] - mcr p15, #0, r7, c7, c10, #5 - str r1, [r0, #28] - str r6, [r0, #24] - str r5, [r0, #20] - str r4, [r0, #16] - str r3, [r0, #12] - str r2, [r0, #8] - str lr, [r0, #4] - str r12, [r0] - pop {r4, r5, r6, r7, r11, pc} + push {r4, r5, r11, lr} + add r11, sp, #8 + sub sp, sp, #64 + bic sp, sp, #15 + mov r12, r0 + ldm r1!, {r0, r2, r4, r5} + mov lr, sp + mov r3, lr + stm r3!, {r0, r2, r4, r5} + ldm r1, {r0, r2, r4, r5} + add r1, sp, #32 + stm r3, {r0, r2, r4, r5} + mov r0, #0 + mcr p15, #0, r0, c7, c10, #5 + mov r0, r1 + ldm lr!, {r2, r3, r4, r5} + stm r0!, {r2, r3, r4, r5} + ldm lr, {r2, r3, r4, r5} + stm r0, {r2, r3, r4, r5} + add r2, r12, #28 + add r3, r1, #28 + mov r0, #0 + @APP + ldr r3, [r3] + str r3, [r2] + @NO_APP + add r2, r12, #24 + add r3, r1, #24 + mov r0, #0 + @APP + ldr r3, [r3] + str r3, [r2] + @NO_APP + add r2, r12, #20 + add r3, r1, #20 + mov r0, #0 + @APP + ldr r3, [r3] + str r3, [r2] + @NO_APP + add r2, r12, #16 + add r3, r1, #16 + mov r0, #0 + @APP + ldr r3, [r3] + str r3, [r2] + @NO_APP + add r2, r12, #12 + orr r3, r1, #12 + mov r0, #0 + @APP + ldr r3, [r3] + str r3, [r2] + @NO_APP + add r2, r12, #8 + orr r3, r1, #8 + mov r0, #0 + @APP + ldr r3, [r3] + str r3, [r2] + @NO_APP + add r2, r12, #4 + orr r3, r1, #4 + mov r0, #0 + @APP + ldr r3, [r3] + str r3, [r2] + @NO_APP + mov r0, #0 + @APP + ldr r1, [r1] + str r1, [r12] + @NO_APP + sub sp, r11, #8 + pop {r4, r5, r11, pc} asm_test::atomic_memcpy_store_align16::write_volatile_release_fence: push {r4, r5, r11, lr} add r11, sp, #8 diff --git a/tests/asm-test/asm/arm-unknown-linux-gnueabihf/atomic_memcpy_store_align2 b/tests/asm-test/asm/arm-unknown-linux-gnueabihf/atomic_memcpy_store_align2 index c7e67ef..008ceff 100644 --- a/tests/asm-test/asm/arm-unknown-linux-gnueabihf/atomic_memcpy_store_align2 +++ b/tests/asm-test/asm/arm-unknown-linux-gnueabihf/atomic_memcpy_store_align2 @@ -1,107 +1,131 @@ asm_test::atomic_memcpy_store_align2::release: - push {r4, r5, r6, r7, r8, lr} + push {r4, r5, r6, lr} sub sp, sp, #64 - mov r8, sp - mov r7, r0 - mov r0, r8 + mov r5, sp + mov r6, r0 + mov r0, r5 mov r2, #32 - mov r6, #32 bl memcpy mov r0, #0 - add r2, sp, #32 + add r12, sp, #32 mcr p15, #0, r0, c7, c10, #5 - ldm r8!, {r1, r3, r4, r5} - mov r0, r2 - stm r0!, {r1, r3, r4, r5} - ldm r8, {r1, r3, r4, r5} - stm r0, {r1, r3, r4, r5} - add r0, r7, #3 - bic r1, r0, #3 - sub r0, r1, r7 - cmp r0, #32 - bhi .LBB6_12 - cmp r0, #0 - beq .LBB6_5 - sub r6, r7, r1 - add r3, r7, #32 - mov r4, r7 -.LBB6_3: - ldrb r5, [r2], #1 - adds r6, r6, #1 - strb r5, [r4] - add r4, r4, #1 - blo .LBB6_3 - sub r6, r3, r1 - cmp r6, #4 - blo .LBB6_8 -.LBB6_5: - sub r0, r7, r1 - add r1, sp, #32 -.LBB6_6: - sub r3, r1, r0 - ldrb r2, [r1, -r0] - sub r6, r6, #4 - ldrb r5, [r3, #1] - cmp r6, #3 - ldrb r4, [r3, #2] - ldrb r3, [r3, #3] - orr r2, r2, r5, lsl, #8 - orr r3, r4, r3, lsl, #8 - orr r2, r2, r3, lsl, #16 - str r2, [r7, -r0] - sub r0, r0, #4 - bhi .LBB6_6 - rsb r0, r0, #0 -.LBB6_8: - cmp r6, #0 - beq .LBB6_11 - add r1, sp, #32 - add r1, r1, r0 - add r0, r7, r0 -.LBB6_10: - ldrb r2, [r1], #1 - subs r6, r6, #1 - strb r2, [r0] - add r0, r0, #1 - bne .LBB6_10 -.LBB6_11: - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} -.LBB6_12: - ldrh r0, [sp, #62] - strh r0, [r7, #30] - ldrh r0, [sp, #60] - strh r0, [r7, #28] - ldrh r0, [sp, #58] - strh r0, [r7, #26] - ldrh r0, [sp, #56] - strh r0, [r7, #24] - ldrh r0, [sp, #54] - strh r0, [r7, #22] - ldrh r0, [sp, #52] - strh r0, [r7, #20] - ldrh r0, [sp, #50] - strh r0, [r7, #18] - ldrh r0, [sp, #48] - strh r0, [r7, #16] - ldrh r0, [sp, #46] - strh r0, [r7, #14] - ldrh r0, [sp, #44] - strh r0, [r7, #12] - ldrh r0, [sp, #42] - strh r0, [r7, #10] - ldrh r0, [sp, #40] - strh r0, [r7, #8] - ldrh r0, [sp, #38] - strh r0, [r7, #6] - ldrh r0, [sp, #36] - strh r0, [r7, #4] - ldrh r0, [sp, #34] - strh r0, [r7, #2] - ldrh r0, [sp, #32] - strh r0, [r7] + ldm r5!, {r1, r2, r3, r4} + mov r0, r12 + stm r0!, {r1, r2, r3, r4} + ldm r5, {r1, r2, r3, r4} + stm r0, {r1, r2, r3, r4} + add r1, r6, #30 + add r2, r12, #30 + mov r0, #0 + @APP + ldrh r2, [r2] + strh r2, [r1] + @NO_APP + add r1, r6, #28 + add r2, r12, #28 + mov r0, #0 + @APP + ldrh r2, [r2] + strh r2, [r1] + @NO_APP + add r1, r6, #26 + add r2, r12, #26 + mov r0, #0 + @APP + ldrh r2, [r2] + strh r2, [r1] + @NO_APP + add r1, r6, #24 + add r2, r12, #24 + mov r0, #0 + @APP + ldrh r2, [r2] + strh r2, [r1] + @NO_APP + add r1, r6, #22 + add r2, r12, #22 + mov r0, #0 + @APP + ldrh r2, [r2] + strh r2, [r1] + @NO_APP + add r1, r6, #20 + add r2, r12, #20 + mov r0, #0 + @APP + ldrh r2, [r2] + strh r2, [r1] + @NO_APP + add r1, r6, #18 + add r2, r12, #18 + mov r0, #0 + @APP + ldrh r2, [r2] + strh r2, [r1] + @NO_APP + add r1, r6, #16 + add r2, r12, #16 + mov r0, #0 + @APP + ldrh r2, [r2] + strh r2, [r1] + @NO_APP + add r1, r6, #14 + add r2, r12, #14 + mov r0, #0 + @APP + ldrh r2, [r2] + strh r2, [r1] + @NO_APP + add r1, r6, #12 + add r2, r12, #12 + mov r0, #0 + @APP + ldrh r2, [r2] + strh r2, [r1] + @NO_APP + add r1, r6, #10 + add r2, r12, #10 + mov r0, #0 + @APP + ldrh r2, [r2] + strh r2, [r1] + @NO_APP + add r1, r6, #8 + add r2, r12, #8 + mov r0, #0 + @APP + ldrh r2, [r2] + strh r2, [r1] + @NO_APP + add r1, r6, #6 + orr r2, r12, #6 + mov r0, #0 + @APP + ldrh r2, [r2] + strh r2, [r1] + @NO_APP + add r1, r6, #4 + orr r2, r12, #4 + mov r0, #0 + @APP + ldrh r2, [r2] + strh r2, [r1] + @NO_APP + add r1, r6, #2 + orr r2, r12, #2 + mov r0, #0 + @APP + ldrh r2, [r2] + strh r2, [r1] + @NO_APP + mov r0, #0 + @APP + ldrh r1, [r12] + strh r1, [r6] + @NO_APP add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} + pop {r4, r5, r6, pc} asm_test::atomic_memcpy_store_align2::write_volatile_release_fence: push {r4, r5, r11, lr} sub sp, sp, #32 diff --git a/tests/asm-test/asm/arm-unknown-linux-gnueabihf/atomic_memcpy_store_align4 b/tests/asm-test/asm/arm-unknown-linux-gnueabihf/atomic_memcpy_store_align4 index 2efd9a8..57908ae 100644 --- a/tests/asm-test/asm/arm-unknown-linux-gnueabihf/atomic_memcpy_store_align4 +++ b/tests/asm-test/asm/arm-unknown-linux-gnueabihf/atomic_memcpy_store_align4 @@ -1,20 +1,77 @@ asm_test::atomic_memcpy_store_align4::release: - push {r4, r5, r6, r7, r11, lr} - add r6, r1, #8 - ldm r1, {r12, lr} - mov r7, #0 - ldm r6, {r2, r3, r4, r5, r6} - ldr r1, [r1, #28] - mcr p15, #0, r7, c7, c10, #5 - str r1, [r0, #28] - str r6, [r0, #24] - str r5, [r0, #20] - str r4, [r0, #16] - str r3, [r0, #12] - str r2, [r0, #8] - str lr, [r0, #4] - str r12, [r0] - pop {r4, r5, r6, r7, r11, pc} + push {r4, r5, r11, lr} + sub sp, sp, #64 + mov r12, r0 + ldm r1!, {r0, r2, r4, r5} + mov lr, sp + mov r3, lr + stm r3!, {r0, r2, r4, r5} + ldm r1, {r0, r2, r4, r5} + add r1, sp, #32 + stm r3, {r0, r2, r4, r5} + mov r0, #0 + mcr p15, #0, r0, c7, c10, #5 + mov r0, r1 + ldm lr!, {r2, r3, r4, r5} + stm r0!, {r2, r3, r4, r5} + ldm lr, {r2, r3, r4, r5} + stm r0, {r2, r3, r4, r5} + add r2, r12, #28 + add r3, r1, #28 + mov r0, #0 + @APP + ldr r3, [r3] + str r3, [r2] + @NO_APP + add r2, r12, #24 + add r3, r1, #24 + mov r0, #0 + @APP + ldr r3, [r3] + str r3, [r2] + @NO_APP + add r2, r12, #20 + add r3, r1, #20 + mov r0, #0 + @APP + ldr r3, [r3] + str r3, [r2] + @NO_APP + add r2, r12, #16 + add r3, r1, #16 + mov r0, #0 + @APP + ldr r3, [r3] + str r3, [r2] + @NO_APP + add r2, r12, #12 + add r3, r1, #12 + mov r0, #0 + @APP + ldr r3, [r3] + str r3, [r2] + @NO_APP + add r2, r12, #8 + add r3, r1, #8 + mov r0, #0 + @APP + ldr r3, [r3] + str r3, [r2] + @NO_APP + add r2, r12, #4 + orr r3, r1, #4 + mov r0, #0 + @APP + ldr r3, [r3] + str r3, [r2] + @NO_APP + mov r0, #0 + @APP + ldr r1, [r1] + str r1, [r12] + @NO_APP + add sp, sp, #64 + pop {r4, r5, r11, pc} asm_test::atomic_memcpy_store_align4::write_volatile_release_fence: push {r4, r5, r11, lr} sub sp, sp, #32 diff --git a/tests/asm-test/asm/arm-unknown-linux-gnueabihf/atomic_memcpy_store_align8 b/tests/asm-test/asm/arm-unknown-linux-gnueabihf/atomic_memcpy_store_align8 index fde7eef..311ca1a 100644 --- a/tests/asm-test/asm/arm-unknown-linux-gnueabihf/atomic_memcpy_store_align8 +++ b/tests/asm-test/asm/arm-unknown-linux-gnueabihf/atomic_memcpy_store_align8 @@ -1,20 +1,77 @@ asm_test::atomic_memcpy_store_align8::release: - push {r4, r5, r6, r7, r11, lr} - add r6, r1, #8 - ldm r1, {r12, lr} - mov r7, #0 - ldm r6, {r2, r3, r4, r5, r6} - ldr r1, [r1, #28] - mcr p15, #0, r7, c7, c10, #5 - str r1, [r0, #28] - str r6, [r0, #24] - str r5, [r0, #20] - str r4, [r0, #16] - str r3, [r0, #12] - str r2, [r0, #8] - str lr, [r0, #4] - str r12, [r0] - pop {r4, r5, r6, r7, r11, pc} + push {r4, r5, r11, lr} + sub sp, sp, #64 + mov r12, r0 + ldm r1!, {r0, r2, r4, r5} + mov lr, sp + mov r3, lr + stm r3!, {r0, r2, r4, r5} + ldm r1, {r0, r2, r4, r5} + add r1, sp, #32 + stm r3, {r0, r2, r4, r5} + mov r0, #0 + mcr p15, #0, r0, c7, c10, #5 + mov r0, r1 + ldm lr!, {r2, r3, r4, r5} + stm r0!, {r2, r3, r4, r5} + ldm lr, {r2, r3, r4, r5} + stm r0, {r2, r3, r4, r5} + add r2, r12, #28 + add r3, r1, #28 + mov r0, #0 + @APP + ldr r3, [r3] + str r3, [r2] + @NO_APP + add r2, r12, #24 + add r3, r1, #24 + mov r0, #0 + @APP + ldr r3, [r3] + str r3, [r2] + @NO_APP + add r2, r12, #20 + add r3, r1, #20 + mov r0, #0 + @APP + ldr r3, [r3] + str r3, [r2] + @NO_APP + add r2, r12, #16 + add r3, r1, #16 + mov r0, #0 + @APP + ldr r3, [r3] + str r3, [r2] + @NO_APP + add r2, r12, #12 + add r3, r1, #12 + mov r0, #0 + @APP + ldr r3, [r3] + str r3, [r2] + @NO_APP + add r2, r12, #8 + add r3, r1, #8 + mov r0, #0 + @APP + ldr r3, [r3] + str r3, [r2] + @NO_APP + add r2, r12, #4 + orr r3, r1, #4 + mov r0, #0 + @APP + ldr r3, [r3] + str r3, [r2] + @NO_APP + mov r0, #0 + @APP + ldr r1, [r1] + str r1, [r12] + @NO_APP + add sp, sp, #64 + pop {r4, r5, r11, pc} asm_test::atomic_memcpy_store_align8::write_volatile_release_fence: push {r4, r5, r11, lr} sub sp, sp, #32 diff --git a/tests/asm-test/asm/armv7-unknown-linux-gnueabi/atomic_memcpy_load_align1 b/tests/asm-test/asm/armv7-unknown-linux-gnueabi/atomic_memcpy_load_align1 index 3e6d297..de7e896 100644 --- a/tests/asm-test/asm/armv7-unknown-linux-gnueabi/atomic_memcpy_load_align1 +++ b/tests/asm-test/asm/armv7-unknown-linux-gnueabi/atomic_memcpy_load_align1 @@ -1,377 +1,204 @@ asm_test::atomic_memcpy_load_align1::acquire: - push {r4, r5, r6, lr} + push {r11, lr} sub sp, sp, #32 - add r2, r1, #3 - bic r12, r2, #3 - sub r3, r12, r1 - cmp r3, #33 - bhs .LBB0_6 - cmp r3, #0 - beq .LBB0_7 - sub r2, r1, r12 - add lr, r1, #32 - mov r4, sp - mov r5, r1 -.LBB0_3: - ldrb r6, [r5] - adds r2, r2, #1 - strb r6, [r4], #1 - add r5, r5, #1 - blo .LBB0_3 - sub r2, lr, r12 - cmp r2, #4 - bhs .LBB0_8 - cmp r2, #0 - bne .LBB0_11 - b .LBB0_13 -.LBB0_6: - ldrb r2, [r1] - strb r2, [sp] - ldrb r2, [r1, #1] - strb r2, [sp, #1] - ldrb r2, [r1, #2] - strb r2, [sp, #2] - ldrb r2, [r1, #3] - strb r2, [sp, #3] - ldrb r2, [r1, #4] - strb r2, [sp, #4] - ldrb r2, [r1, #5] - strb r2, [sp, #5] - ldrb r2, [r1, #6] - strb r2, [sp, #6] - ldrb r2, [r1, #7] - strb r2, [sp, #7] - ldrb r2, [r1, #8] - strb r2, [sp, #8] - ldrb r2, [r1, #9] - strb r2, [sp, #9] - ldrb r2, [r1, #10] - strb r2, [sp, #10] - ldrb r2, [r1, #11] - strb r2, [sp, #11] - ldrb r2, [r1, #12] - strb r2, [sp, #12] - ldrb r2, [r1, #13] - strb r2, [sp, #13] - ldrb r2, [r1, #14] - strb r2, [sp, #14] - ldrb r2, [r1, #15] - strb r2, [sp, #15] - ldrb r2, [r1, #16] - strb r2, [sp, #16] - ldrb r2, [r1, #17] - strb r2, [sp, #17] - ldrb r2, [r1, #18] - strb r2, [sp, #18] - ldrb r2, [r1, #19] - strb r2, [sp, #19] - ldrb r2, [r1, #20] - strb r2, [sp, #20] - ldrb r2, [r1, #21] - strb r2, [sp, #21] - ldrb r2, [r1, #22] - strb r2, [sp, #22] - ldrb r2, [r1, #23] - strb r2, [sp, #23] - ldrb r2, [r1, #24] - strb r2, [sp, #24] - ldrb r2, [r1, #25] - strb r2, [sp, #25] - ldrb r2, [r1, #26] - strb r2, [sp, #26] - ldrb r2, [r1, #27] - strb r2, [sp, #27] - ldrb r2, [r1, #28] - strb r2, [sp, #28] - ldrb r2, [r1, #29] - strb r2, [sp, #29] - ldrb r2, [r1, #30] - strb r2, [sp, #30] - ldrb r1, [r1, #31] - strb r1, [sp, #31] - b .LBB0_13 -.LBB0_7: - mov r2, #32 -.LBB0_8: - sub r3, r1, r12 - mov r5, sp -.LBB0_9: - ldr r6, [r1, -r3] - sub r2, r2, #4 - cmp r2, #3 - str r6, [r5, -r3] - sub r3, r3, #4 - bhi .LBB0_9 - rsb r3, r3, #0 - cmp r2, #0 - beq .LBB0_13 -.LBB0_11: - add r1, r1, r3 - mov r6, sp - add r3, r6, r3 -.LBB0_12: - ldrb r6, [r1] - add r1, r1, #1 - strb r6, [r3], #1 - subs r2, r2, #1 - bne .LBB0_12 -.LBB0_13: + mov lr, r1 + add r12, r1, #31 mov r1, sp + add r3, r1, #31 + @APP + ldrb r2, [r12] + strb r2, [r3] + @NO_APP + add r2, lr, #30 + add r3, r1, #30 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #29 + add r3, r1, #29 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #28 + add r3, r1, #28 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #27 + add r3, r1, #27 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #26 + add r3, r1, #26 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #25 + add r3, r1, #25 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #24 + add r3, r1, #24 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #23 + add r3, r1, #23 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #22 + add r3, r1, #22 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #21 + add r3, r1, #21 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #20 + add r3, r1, #20 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #19 + add r3, r1, #19 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #18 + add r3, r1, #18 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #17 + add r3, r1, #17 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #16 + add r3, r1, #16 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #15 + add r3, r1, #15 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #14 + add r3, r1, #14 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #13 + add r3, r1, #13 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #12 + add r3, r1, #12 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #11 + add r3, r1, #11 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #10 + add r3, r1, #10 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #9 + add r3, r1, #9 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #8 + add r3, r1, #8 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #7 + orr r3, r1, #7 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #6 + orr r3, r1, #6 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #5 + orr r3, r1, #5 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #4 + orr r3, r1, #4 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #3 + orr r3, r1, #3 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #2 + orr r3, r1, #2 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #1 + orr r3, r1, #1 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + mov r2, r1 + @APP + ldrb r3, [lr] + strb r3, [r2] + @NO_APP mov r2, #32 bl memcpy dmb ish add sp, sp, #32 - pop {r4, r5, r6, pc} -.LBB2_3: - ldrb r5, [r2], #1 - adds r6, r6, #1 - strb r5, [r4] - add r4, r4, #1 - blo .LBB2_3 - sub r6, r3, r1 - cmp r6, #4 - blo .LBB2_8 -.LBB2_5: - sub r0, r7, r1 - add r1, sp, #32 -.LBB2_6: - ldr r2, [r1, -r0] - sub r6, r6, #4 - cmp r6, #3 - str r2, [r7, -r0] - sub r0, r0, #4 - bhi .LBB2_6 - rsb r0, r0, #0 -.LBB2_8: - cmp r6, #0 - beq .LBB2_11 - add r1, sp, #32 - add r1, r1, r0 - add r0, r7, r0 -.LBB2_10: - ldrb r2, [r1], #1 - subs r6, r6, #1 - strb r2, [r0] - add r0, r0, #1 - bne .LBB2_10 -.LBB2_11: - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} -.LBB2_12: - ldrb r0, [sp, #32] - strb r0, [r7] - ldrb r0, [sp, #33] - strb r0, [r7, #1] - ldrb r0, [sp, #34] - strb r0, [r7, #2] - ldrb r0, [sp, #35] - strb r0, [r7, #3] - ldrb r0, [sp, #36] - strb r0, [r7, #4] - ldrb r0, [sp, #37] - strb r0, [r7, #5] - ldrb r0, [sp, #38] - strb r0, [r7, #6] - ldrb r0, [sp, #39] - strb r0, [r7, #7] - ldrb r0, [sp, #40] - strb r0, [r7, #8] - ldrb r0, [sp, #41] - strb r0, [r7, #9] - ldrb r0, [sp, #42] - strb r0, [r7, #10] - ldrb r0, [sp, #43] - strb r0, [r7, #11] - ldrb r0, [sp, #44] - strb r0, [r7, #12] - ldrb r0, [sp, #45] - strb r0, [r7, #13] - ldrb r0, [sp, #46] - strb r0, [r7, #14] - ldrb r0, [sp, #47] - strb r0, [r7, #15] - ldrb r0, [sp, #48] - strb r0, [r7, #16] - ldrb r0, [sp, #49] - strb r0, [r7, #17] - ldrb r0, [sp, #50] - strb r0, [r7, #18] - ldrb r0, [sp, #51] - strb r0, [r7, #19] - ldrb r0, [sp, #52] - strb r0, [r7, #20] - ldrb r0, [sp, #53] - strb r0, [r7, #21] - ldrb r0, [sp, #54] - strb r0, [r7, #22] - ldrb r0, [sp, #55] - strb r0, [r7, #23] - ldrb r0, [sp, #56] - strb r0, [r7, #24] - ldrb r0, [sp, #57] - strb r0, [r7, #25] - ldrb r0, [sp, #58] - strb r0, [r7, #26] - ldrb r0, [sp, #59] - strb r0, [r7, #27] - ldrb r0, [sp, #60] - strb r0, [r7, #28] - ldrb r0, [sp, #61] - strb r0, [r7, #29] - ldrb r0, [sp, #62] - strb r0, [r7, #30] - ldrb r0, [sp, #63] - strb r0, [r7, #31] - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} -.LBB4_3: - ldrb r6, [r5] - adds r2, r2, #1 - strb r6, [r4], #1 - add r5, r5, #1 - blo .LBB4_3 - sub r2, lr, r12 - cmp r2, #4 - bhs .LBB4_8 - cmp r2, #0 - bne .LBB4_11 - b .LBB4_13 -.LBB4_6: - ldrh r2, [r1, #30] - strh r2, [sp, #30] - ldrh r2, [r1, #28] - strh r2, [sp, #28] - ldrh r2, [r1, #26] - strh r2, [sp, #26] - ldrh r2, [r1, #24] - strh r2, [sp, #24] - ldrh r2, [r1, #22] - strh r2, [sp, #22] - ldrh r2, [r1, #20] - strh r2, [sp, #20] - ldrh r2, [r1, #18] - strh r2, [sp, #18] - ldrh r2, [r1, #16] - strh r2, [sp, #16] - ldrh r2, [r1, #14] - strh r2, [sp, #14] - ldrh r2, [r1, #12] - strh r2, [sp, #12] - ldrh r2, [r1, #10] - strh r2, [sp, #10] - ldrh r2, [r1, #8] - strh r2, [sp, #8] - ldrh r2, [r1, #6] - strh r2, [sp, #6] - ldrh r2, [r1, #4] - strh r2, [sp, #4] - ldrh r2, [r1, #2] - strh r2, [sp, #2] - ldrh r1, [r1] - strh r1, [sp] - b .LBB4_13 -.LBB4_7: - mov r2, #32 -.LBB4_8: - sub r3, r1, r12 - mov r5, sp -.LBB4_9: - ldr r6, [r1, -r3] - sub r2, r2, #4 - cmp r2, #3 - str r6, [r5, -r3] - sub r3, r3, #4 - bhi .LBB4_9 - rsb r3, r3, #0 - cmp r2, #0 - beq .LBB4_13 -.LBB4_11: - add r1, r1, r3 - mov r6, sp - add r3, r6, r3 -.LBB4_12: - ldrb r6, [r1] - add r1, r1, #1 - strb r6, [r3], #1 - subs r2, r2, #1 - bne .LBB4_12 -.LBB4_13: - mov r1, sp - mov r2, #32 - bl memcpy - dmb ish - add sp, sp, #32 - pop {r4, r5, r6, pc} -.LBB6_3: - ldrb r5, [r2], #1 - adds r6, r6, #1 - strb r5, [r4] - add r4, r4, #1 - blo .LBB6_3 - sub r6, r3, r1 - cmp r6, #4 - blo .LBB6_8 -.LBB6_5: - sub r0, r7, r1 - add r1, sp, #32 -.LBB6_6: - ldr r2, [r1, -r0] - sub r6, r6, #4 - cmp r6, #3 - str r2, [r7, -r0] - sub r0, r0, #4 - bhi .LBB6_6 - rsb r0, r0, #0 -.LBB6_8: - cmp r6, #0 - beq .LBB6_11 - add r1, sp, #32 - add r1, r1, r0 - add r0, r7, r0 -.LBB6_10: - ldrb r2, [r1], #1 - subs r6, r6, #1 - strb r2, [r0] - add r0, r0, #1 - bne .LBB6_10 -.LBB6_11: - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} -.LBB6_12: - ldrh r0, [sp, #62] - strh r0, [r7, #30] - ldrh r0, [sp, #60] - strh r0, [r7, #28] - ldrh r0, [sp, #58] - strh r0, [r7, #26] - ldrh r0, [sp, #56] - strh r0, [r7, #24] - ldrh r0, [sp, #54] - strh r0, [r7, #22] - ldrh r0, [sp, #52] - strh r0, [r7, #20] - ldrh r0, [sp, #50] - strh r0, [r7, #18] - ldrh r0, [sp, #48] - strh r0, [r7, #16] - ldrh r0, [sp, #46] - strh r0, [r7, #14] - ldrh r0, [sp, #44] - strh r0, [r7, #12] - ldrh r0, [sp, #42] - strh r0, [r7, #10] - ldrh r0, [sp, #40] - strh r0, [r7, #8] - ldrh r0, [sp, #38] - strh r0, [r7, #6] - ldrh r0, [sp, #36] - strh r0, [r7, #4] - ldrh r0, [sp, #34] - strh r0, [r7, #2] - ldrh r0, [sp, #32] - strh r0, [r7] - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} + pop {r11, pc} asm_test::atomic_memcpy_load_align1::read_volatile_acquire_fence: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} sub sp, sp, #76 @@ -480,253 +307,3 @@ asm_test::atomic_memcpy_load_align1::read_volatile_acquire_fence: dmb ish add sp, sp, #76 pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} -.LBB2_3: - ldrb r5, [r2], #1 - adds r6, r6, #1 - strb r5, [r4] - add r4, r4, #1 - blo .LBB2_3 - sub r6, r3, r1 - cmp r6, #4 - blo .LBB2_8 -.LBB2_5: - sub r0, r7, r1 - add r1, sp, #32 -.LBB2_6: - ldr r2, [r1, -r0] - sub r6, r6, #4 - cmp r6, #3 - str r2, [r7, -r0] - sub r0, r0, #4 - bhi .LBB2_6 - rsb r0, r0, #0 -.LBB2_8: - cmp r6, #0 - beq .LBB2_11 - add r1, sp, #32 - add r1, r1, r0 - add r0, r7, r0 -.LBB2_10: - ldrb r2, [r1], #1 - subs r6, r6, #1 - strb r2, [r0] - add r0, r0, #1 - bne .LBB2_10 -.LBB2_11: - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} -.LBB2_12: - ldrb r0, [sp, #32] - strb r0, [r7] - ldrb r0, [sp, #33] - strb r0, [r7, #1] - ldrb r0, [sp, #34] - strb r0, [r7, #2] - ldrb r0, [sp, #35] - strb r0, [r7, #3] - ldrb r0, [sp, #36] - strb r0, [r7, #4] - ldrb r0, [sp, #37] - strb r0, [r7, #5] - ldrb r0, [sp, #38] - strb r0, [r7, #6] - ldrb r0, [sp, #39] - strb r0, [r7, #7] - ldrb r0, [sp, #40] - strb r0, [r7, #8] - ldrb r0, [sp, #41] - strb r0, [r7, #9] - ldrb r0, [sp, #42] - strb r0, [r7, #10] - ldrb r0, [sp, #43] - strb r0, [r7, #11] - ldrb r0, [sp, #44] - strb r0, [r7, #12] - ldrb r0, [sp, #45] - strb r0, [r7, #13] - ldrb r0, [sp, #46] - strb r0, [r7, #14] - ldrb r0, [sp, #47] - strb r0, [r7, #15] - ldrb r0, [sp, #48] - strb r0, [r7, #16] - ldrb r0, [sp, #49] - strb r0, [r7, #17] - ldrb r0, [sp, #50] - strb r0, [r7, #18] - ldrb r0, [sp, #51] - strb r0, [r7, #19] - ldrb r0, [sp, #52] - strb r0, [r7, #20] - ldrb r0, [sp, #53] - strb r0, [r7, #21] - ldrb r0, [sp, #54] - strb r0, [r7, #22] - ldrb r0, [sp, #55] - strb r0, [r7, #23] - ldrb r0, [sp, #56] - strb r0, [r7, #24] - ldrb r0, [sp, #57] - strb r0, [r7, #25] - ldrb r0, [sp, #58] - strb r0, [r7, #26] - ldrb r0, [sp, #59] - strb r0, [r7, #27] - ldrb r0, [sp, #60] - strb r0, [r7, #28] - ldrb r0, [sp, #61] - strb r0, [r7, #29] - ldrb r0, [sp, #62] - strb r0, [r7, #30] - ldrb r0, [sp, #63] - strb r0, [r7, #31] - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} -.LBB4_3: - ldrb r6, [r5] - adds r2, r2, #1 - strb r6, [r4], #1 - add r5, r5, #1 - blo .LBB4_3 - sub r2, lr, r12 - cmp r2, #4 - bhs .LBB4_8 - cmp r2, #0 - bne .LBB4_11 - b .LBB4_13 -.LBB4_6: - ldrh r2, [r1, #30] - strh r2, [sp, #30] - ldrh r2, [r1, #28] - strh r2, [sp, #28] - ldrh r2, [r1, #26] - strh r2, [sp, #26] - ldrh r2, [r1, #24] - strh r2, [sp, #24] - ldrh r2, [r1, #22] - strh r2, [sp, #22] - ldrh r2, [r1, #20] - strh r2, [sp, #20] - ldrh r2, [r1, #18] - strh r2, [sp, #18] - ldrh r2, [r1, #16] - strh r2, [sp, #16] - ldrh r2, [r1, #14] - strh r2, [sp, #14] - ldrh r2, [r1, #12] - strh r2, [sp, #12] - ldrh r2, [r1, #10] - strh r2, [sp, #10] - ldrh r2, [r1, #8] - strh r2, [sp, #8] - ldrh r2, [r1, #6] - strh r2, [sp, #6] - ldrh r2, [r1, #4] - strh r2, [sp, #4] - ldrh r2, [r1, #2] - strh r2, [sp, #2] - ldrh r1, [r1] - strh r1, [sp] - b .LBB4_13 -.LBB4_7: - mov r2, #32 -.LBB4_8: - sub r3, r1, r12 - mov r5, sp -.LBB4_9: - ldr r6, [r1, -r3] - sub r2, r2, #4 - cmp r2, #3 - str r6, [r5, -r3] - sub r3, r3, #4 - bhi .LBB4_9 - rsb r3, r3, #0 - cmp r2, #0 - beq .LBB4_13 -.LBB4_11: - add r1, r1, r3 - mov r6, sp - add r3, r6, r3 -.LBB4_12: - ldrb r6, [r1] - add r1, r1, #1 - strb r6, [r3], #1 - subs r2, r2, #1 - bne .LBB4_12 -.LBB4_13: - mov r1, sp - mov r2, #32 - bl memcpy - dmb ish - add sp, sp, #32 - pop {r4, r5, r6, pc} -.LBB6_3: - ldrb r5, [r2], #1 - adds r6, r6, #1 - strb r5, [r4] - add r4, r4, #1 - blo .LBB6_3 - sub r6, r3, r1 - cmp r6, #4 - blo .LBB6_8 -.LBB6_5: - sub r0, r7, r1 - add r1, sp, #32 -.LBB6_6: - ldr r2, [r1, -r0] - sub r6, r6, #4 - cmp r6, #3 - str r2, [r7, -r0] - sub r0, r0, #4 - bhi .LBB6_6 - rsb r0, r0, #0 -.LBB6_8: - cmp r6, #0 - beq .LBB6_11 - add r1, sp, #32 - add r1, r1, r0 - add r0, r7, r0 -.LBB6_10: - ldrb r2, [r1], #1 - subs r6, r6, #1 - strb r2, [r0] - add r0, r0, #1 - bne .LBB6_10 -.LBB6_11: - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} -.LBB6_12: - ldrh r0, [sp, #62] - strh r0, [r7, #30] - ldrh r0, [sp, #60] - strh r0, [r7, #28] - ldrh r0, [sp, #58] - strh r0, [r7, #26] - ldrh r0, [sp, #56] - strh r0, [r7, #24] - ldrh r0, [sp, #54] - strh r0, [r7, #22] - ldrh r0, [sp, #52] - strh r0, [r7, #20] - ldrh r0, [sp, #50] - strh r0, [r7, #18] - ldrh r0, [sp, #48] - strh r0, [r7, #16] - ldrh r0, [sp, #46] - strh r0, [r7, #14] - ldrh r0, [sp, #44] - strh r0, [r7, #12] - ldrh r0, [sp, #42] - strh r0, [r7, #10] - ldrh r0, [sp, #40] - strh r0, [r7, #8] - ldrh r0, [sp, #38] - strh r0, [r7, #6] - ldrh r0, [sp, #36] - strh r0, [r7, #4] - ldrh r0, [sp, #34] - strh r0, [r7, #2] - ldrh r0, [sp, #32] - strh r0, [r7] - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} diff --git a/tests/asm-test/asm/armv7-unknown-linux-gnueabi/atomic_memcpy_load_align16 b/tests/asm-test/asm/armv7-unknown-linux-gnueabi/atomic_memcpy_load_align16 index b663bd9..83d0ffd 100644 --- a/tests/asm-test/asm/armv7-unknown-linux-gnueabi/atomic_memcpy_load_align16 +++ b/tests/asm-test/asm/armv7-unknown-linux-gnueabi/atomic_memcpy_load_align16 @@ -1,22 +1,63 @@ asm_test::atomic_memcpy_load_align16::acquire: - push {r4, r5, r6, lr} - ldr r12, [r1, #28] - ldr lr, [r1, #24] - ldr r2, [r1, #20] - ldr r3, [r1, #16] - ldr r4, [r1, #12] - ldr r5, [r1, #8] - ldr r6, [r1, #4] + push {r11, lr} + mov r11, sp + sub sp, sp, #40 + bfc sp, #0, #4 + mov lr, sp + add r3, lr, #28 + add r12, r1, #28 + @APP + ldr r2, [r12] + str r2, [r3] + @NO_APP + add r2, r1, #24 + add r3, lr, #24 + @APP + ldr r2, [r2] + str r2, [r3] + @NO_APP + add r2, r1, #20 + add r3, lr, #20 + @APP + ldr r2, [r2] + str r2, [r3] + @NO_APP + add r2, r1, #16 + add r3, lr, #16 + @APP + ldr r2, [r2] + str r2, [r3] + @NO_APP + add r2, r1, #12 + orr r3, lr, #12 + @APP + ldr r2, [r2] + str r2, [r3] + @NO_APP + add r2, r1, #8 + orr r3, lr, #8 + @APP + ldr r2, [r2] + str r2, [r3] + @NO_APP + add r2, r1, #4 + orr r3, lr, #4 + @APP + ldr r2, [r2] + str r2, [r3] + @NO_APP + mov r2, lr + @APP ldr r1, [r1] - str r5, [r0, #8] - stm r0, {r1, r6} - str r4, [r0, #12] - str r3, [r0, #16] - str r2, [r0, #20] - str lr, [r0, #24] - str r12, [r0, #28] + str r1, [r2] + @NO_APP + ldm lr!, {r1, r2, r3, r12} + stm r0!, {r1, r2, r3, r12} + ldm lr, {r1, r2, r3, r12} + stm r0, {r1, r2, r3, r12} dmb ish - pop {r4, r5, r6, pc} + mov sp, r11 + pop {r11, pc} asm_test::atomic_memcpy_load_align16::read_volatile_acquire_fence: push {r4, r5, r6, r7, r8, r9, r11, lr} ldrd r8, r9, [r1, #24] diff --git a/tests/asm-test/asm/armv7-unknown-linux-gnueabi/atomic_memcpy_load_align2 b/tests/asm-test/asm/armv7-unknown-linux-gnueabi/atomic_memcpy_load_align2 index f629c24..51fda96 100644 --- a/tests/asm-test/asm/armv7-unknown-linux-gnueabi/atomic_memcpy_load_align2 +++ b/tests/asm-test/asm/armv7-unknown-linux-gnueabi/atomic_memcpy_load_align2 @@ -1,165 +1,108 @@ asm_test::atomic_memcpy_load_align2::acquire: - push {r4, r5, r6, lr} + push {r11, lr} sub sp, sp, #32 - add r2, r1, #3 - bic r12, r2, #3 - sub r3, r12, r1 - cmp r3, #32 - bhi .LBB4_6 - cmp r3, #0 - beq .LBB4_7 - sub r2, r1, r12 - add lr, r1, #32 - mov r4, sp - mov r5, r1 -.LBB4_3: - ldrb r6, [r5] - adds r2, r2, #1 - strb r6, [r4], #1 - add r5, r5, #1 - blo .LBB4_3 - sub r2, lr, r12 - cmp r2, #4 - bhs .LBB4_8 - cmp r2, #0 - bne .LBB4_11 - b .LBB4_13 -.LBB4_6: - ldrh r2, [r1, #30] - strh r2, [sp, #30] - ldrh r2, [r1, #28] - strh r2, [sp, #28] - ldrh r2, [r1, #26] - strh r2, [sp, #26] - ldrh r2, [r1, #24] - strh r2, [sp, #24] - ldrh r2, [r1, #22] - strh r2, [sp, #22] - ldrh r2, [r1, #20] - strh r2, [sp, #20] - ldrh r2, [r1, #18] - strh r2, [sp, #18] - ldrh r2, [r1, #16] - strh r2, [sp, #16] - ldrh r2, [r1, #14] - strh r2, [sp, #14] - ldrh r2, [r1, #12] - strh r2, [sp, #12] - ldrh r2, [r1, #10] - strh r2, [sp, #10] - ldrh r2, [r1, #8] - strh r2, [sp, #8] - ldrh r2, [r1, #6] - strh r2, [sp, #6] - ldrh r2, [r1, #4] - strh r2, [sp, #4] - ldrh r2, [r1, #2] - strh r2, [sp, #2] - ldrh r1, [r1] - strh r1, [sp] - b .LBB4_13 -.LBB4_7: - mov r2, #32 -.LBB4_8: - sub r3, r1, r12 - mov r5, sp -.LBB4_9: - ldr r6, [r1, -r3] - sub r2, r2, #4 - cmp r2, #3 - str r6, [r5, -r3] - sub r3, r3, #4 - bhi .LBB4_9 - rsb r3, r3, #0 - cmp r2, #0 - beq .LBB4_13 -.LBB4_11: - add r1, r1, r3 - mov r6, sp - add r3, r6, r3 -.LBB4_12: - ldrb r6, [r1] - add r1, r1, #1 - strb r6, [r3], #1 - subs r2, r2, #1 - bne .LBB4_12 -.LBB4_13: + mov lr, r1 + add r12, r1, #30 mov r1, sp + add r3, r1, #30 + @APP + ldrh r2, [r12] + strh r2, [r3] + @NO_APP + add r2, lr, #28 + add r3, r1, #28 + @APP + ldrh r2, [r2] + strh r2, [r3] + @NO_APP + add r2, lr, #26 + add r3, r1, #26 + @APP + ldrh r2, [r2] + strh r2, [r3] + @NO_APP + add r2, lr, #24 + add r3, r1, #24 + @APP + ldrh r2, [r2] + strh r2, [r3] + @NO_APP + add r2, lr, #22 + add r3, r1, #22 + @APP + ldrh r2, [r2] + strh r2, [r3] + @NO_APP + add r2, lr, #20 + add r3, r1, #20 + @APP + ldrh r2, [r2] + strh r2, [r3] + @NO_APP + add r2, lr, #18 + add r3, r1, #18 + @APP + ldrh r2, [r2] + strh r2, [r3] + @NO_APP + add r2, lr, #16 + add r3, r1, #16 + @APP + ldrh r2, [r2] + strh r2, [r3] + @NO_APP + add r2, lr, #14 + add r3, r1, #14 + @APP + ldrh r2, [r2] + strh r2, [r3] + @NO_APP + add r2, lr, #12 + add r3, r1, #12 + @APP + ldrh r2, [r2] + strh r2, [r3] + @NO_APP + add r2, lr, #10 + add r3, r1, #10 + @APP + ldrh r2, [r2] + strh r2, [r3] + @NO_APP + add r2, lr, #8 + add r3, r1, #8 + @APP + ldrh r2, [r2] + strh r2, [r3] + @NO_APP + add r2, lr, #6 + orr r3, r1, #6 + @APP + ldrh r2, [r2] + strh r2, [r3] + @NO_APP + add r2, lr, #4 + orr r3, r1, #4 + @APP + ldrh r2, [r2] + strh r2, [r3] + @NO_APP + add r2, lr, #2 + orr r3, r1, #2 + @APP + ldrh r2, [r2] + strh r2, [r3] + @NO_APP + mov r2, r1 + @APP + ldrh r3, [lr] + strh r3, [r2] + @NO_APP mov r2, #32 bl memcpy dmb ish add sp, sp, #32 - pop {r4, r5, r6, pc} -.LBB6_3: - ldrb r5, [r2], #1 - adds r6, r6, #1 - strb r5, [r4] - add r4, r4, #1 - blo .LBB6_3 - sub r6, r3, r1 - cmp r6, #4 - blo .LBB6_8 -.LBB6_5: - sub r0, r7, r1 - add r1, sp, #32 -.LBB6_6: - ldr r2, [r1, -r0] - sub r6, r6, #4 - cmp r6, #3 - str r2, [r7, -r0] - sub r0, r0, #4 - bhi .LBB6_6 - rsb r0, r0, #0 -.LBB6_8: - cmp r6, #0 - beq .LBB6_11 - add r1, sp, #32 - add r1, r1, r0 - add r0, r7, r0 -.LBB6_10: - ldrb r2, [r1], #1 - subs r6, r6, #1 - strb r2, [r0] - add r0, r0, #1 - bne .LBB6_10 -.LBB6_11: - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} -.LBB6_12: - ldrh r0, [sp, #62] - strh r0, [r7, #30] - ldrh r0, [sp, #60] - strh r0, [r7, #28] - ldrh r0, [sp, #58] - strh r0, [r7, #26] - ldrh r0, [sp, #56] - strh r0, [r7, #24] - ldrh r0, [sp, #54] - strh r0, [r7, #22] - ldrh r0, [sp, #52] - strh r0, [r7, #20] - ldrh r0, [sp, #50] - strh r0, [r7, #18] - ldrh r0, [sp, #48] - strh r0, [r7, #16] - ldrh r0, [sp, #46] - strh r0, [r7, #14] - ldrh r0, [sp, #44] - strh r0, [r7, #12] - ldrh r0, [sp, #42] - strh r0, [r7, #10] - ldrh r0, [sp, #40] - strh r0, [r7, #8] - ldrh r0, [sp, #38] - strh r0, [r7, #6] - ldrh r0, [sp, #36] - strh r0, [r7, #4] - ldrh r0, [sp, #34] - strh r0, [r7, #2] - ldrh r0, [sp, #32] - strh r0, [r7] - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} + pop {r11, pc} asm_test::atomic_memcpy_load_align2::read_volatile_acquire_fence: ldrh r2, [r1, #30] strh r2, [r0, #30] @@ -195,73 +138,3 @@ asm_test::atomic_memcpy_load_align2::read_volatile_acquire_fence: strh r1, [r0] dmb ish bx lr -.LBB6_3: - ldrb r5, [r2], #1 - adds r6, r6, #1 - strb r5, [r4] - add r4, r4, #1 - blo .LBB6_3 - sub r6, r3, r1 - cmp r6, #4 - blo .LBB6_8 -.LBB6_5: - sub r0, r7, r1 - add r1, sp, #32 -.LBB6_6: - ldr r2, [r1, -r0] - sub r6, r6, #4 - cmp r6, #3 - str r2, [r7, -r0] - sub r0, r0, #4 - bhi .LBB6_6 - rsb r0, r0, #0 -.LBB6_8: - cmp r6, #0 - beq .LBB6_11 - add r1, sp, #32 - add r1, r1, r0 - add r0, r7, r0 -.LBB6_10: - ldrb r2, [r1], #1 - subs r6, r6, #1 - strb r2, [r0] - add r0, r0, #1 - bne .LBB6_10 -.LBB6_11: - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} -.LBB6_12: - ldrh r0, [sp, #62] - strh r0, [r7, #30] - ldrh r0, [sp, #60] - strh r0, [r7, #28] - ldrh r0, [sp, #58] - strh r0, [r7, #26] - ldrh r0, [sp, #56] - strh r0, [r7, #24] - ldrh r0, [sp, #54] - strh r0, [r7, #22] - ldrh r0, [sp, #52] - strh r0, [r7, #20] - ldrh r0, [sp, #50] - strh r0, [r7, #18] - ldrh r0, [sp, #48] - strh r0, [r7, #16] - ldrh r0, [sp, #46] - strh r0, [r7, #14] - ldrh r0, [sp, #44] - strh r0, [r7, #12] - ldrh r0, [sp, #42] - strh r0, [r7, #10] - ldrh r0, [sp, #40] - strh r0, [r7, #8] - ldrh r0, [sp, #38] - strh r0, [r7, #6] - ldrh r0, [sp, #36] - strh r0, [r7, #4] - ldrh r0, [sp, #34] - strh r0, [r7, #2] - ldrh r0, [sp, #32] - strh r0, [r7] - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} diff --git a/tests/asm-test/asm/armv7-unknown-linux-gnueabi/atomic_memcpy_load_align4 b/tests/asm-test/asm/armv7-unknown-linux-gnueabi/atomic_memcpy_load_align4 index 11d66df..fe09454 100644 --- a/tests/asm-test/asm/armv7-unknown-linux-gnueabi/atomic_memcpy_load_align4 +++ b/tests/asm-test/asm/armv7-unknown-linux-gnueabi/atomic_memcpy_load_align4 @@ -1,22 +1,61 @@ asm_test::atomic_memcpy_load_align4::acquire: - push {r4, r5, r6, lr} - ldr r12, [r1, #28] - ldr lr, [r1, #24] - ldr r2, [r1, #20] - ldr r3, [r1, #16] - ldr r4, [r1, #12] - ldr r5, [r1, #8] - ldr r6, [r1, #4] + push {r11, lr} + sub sp, sp, #32 + mov lr, sp + add r3, lr, #28 + add r12, r1, #28 + @APP + ldr r2, [r12] + str r2, [r3] + @NO_APP + add r2, r1, #24 + add r3, lr, #24 + @APP + ldr r2, [r2] + str r2, [r3] + @NO_APP + add r2, r1, #20 + add r3, lr, #20 + @APP + ldr r2, [r2] + str r2, [r3] + @NO_APP + add r2, r1, #16 + add r3, lr, #16 + @APP + ldr r2, [r2] + str r2, [r3] + @NO_APP + add r2, r1, #12 + add r3, lr, #12 + @APP + ldr r2, [r2] + str r2, [r3] + @NO_APP + add r2, r1, #8 + add r3, lr, #8 + @APP + ldr r2, [r2] + str r2, [r3] + @NO_APP + add r2, r1, #4 + orr r3, lr, #4 + @APP + ldr r2, [r2] + str r2, [r3] + @NO_APP + mov r2, lr + @APP ldr r1, [r1] - str r5, [r0, #8] - stm r0, {r1, r6} - str r4, [r0, #12] - str r3, [r0, #16] - str r2, [r0, #20] - str lr, [r0, #24] - str r12, [r0, #28] + str r1, [r2] + @NO_APP + ldm lr!, {r1, r2, r3, r12} + stm r0!, {r1, r2, r3, r12} + ldm lr, {r1, r2, r3, r12} + stm r0, {r1, r2, r3, r12} dmb ish - pop {r4, r5, r6, pc} + add sp, sp, #32 + pop {r11, pc} asm_test::atomic_memcpy_load_align4::read_volatile_acquire_fence: push {r4, r5, r6, lr} ldr r12, [r1, #28] diff --git a/tests/asm-test/asm/armv7-unknown-linux-gnueabi/atomic_memcpy_load_align8 b/tests/asm-test/asm/armv7-unknown-linux-gnueabi/atomic_memcpy_load_align8 index 746522d..51266e2 100644 --- a/tests/asm-test/asm/armv7-unknown-linux-gnueabi/atomic_memcpy_load_align8 +++ b/tests/asm-test/asm/armv7-unknown-linux-gnueabi/atomic_memcpy_load_align8 @@ -1,22 +1,61 @@ asm_test::atomic_memcpy_load_align8::acquire: - push {r4, r5, r6, lr} - ldr r12, [r1, #28] - ldr lr, [r1, #24] - ldr r2, [r1, #20] - ldr r3, [r1, #16] - ldr r4, [r1, #12] - ldr r5, [r1, #8] - ldr r6, [r1, #4] + push {r11, lr} + sub sp, sp, #32 + mov lr, sp + add r3, lr, #28 + add r12, r1, #28 + @APP + ldr r2, [r12] + str r2, [r3] + @NO_APP + add r2, r1, #24 + add r3, lr, #24 + @APP + ldr r2, [r2] + str r2, [r3] + @NO_APP + add r2, r1, #20 + add r3, lr, #20 + @APP + ldr r2, [r2] + str r2, [r3] + @NO_APP + add r2, r1, #16 + add r3, lr, #16 + @APP + ldr r2, [r2] + str r2, [r3] + @NO_APP + add r2, r1, #12 + add r3, lr, #12 + @APP + ldr r2, [r2] + str r2, [r3] + @NO_APP + add r2, r1, #8 + add r3, lr, #8 + @APP + ldr r2, [r2] + str r2, [r3] + @NO_APP + add r2, r1, #4 + orr r3, lr, #4 + @APP + ldr r2, [r2] + str r2, [r3] + @NO_APP + mov r2, lr + @APP ldr r1, [r1] - str r5, [r0, #8] - stm r0, {r1, r6} - str r4, [r0, #12] - str r3, [r0, #16] - str r2, [r0, #20] - str lr, [r0, #24] - str r12, [r0, #28] + str r1, [r2] + @NO_APP + ldm lr!, {r1, r2, r3, r12} + stm r0!, {r1, r2, r3, r12} + ldm lr, {r1, r2, r3, r12} + stm r0, {r1, r2, r3, r12} dmb ish - pop {r4, r5, r6, pc} + add sp, sp, #32 + pop {r11, pc} asm_test::atomic_memcpy_load_align8::read_volatile_acquire_fence: push {r4, r5, r6, r7, r8, r9, r11, lr} ldrd r8, r9, [r1, #24] diff --git a/tests/asm-test/asm/armv7-unknown-linux-gnueabi/atomic_memcpy_store_align1 b/tests/asm-test/asm/armv7-unknown-linux-gnueabi/atomic_memcpy_store_align1 index ea2f3c4..9be42ed 100644 --- a/tests/asm-test/asm/armv7-unknown-linux-gnueabi/atomic_memcpy_store_align1 +++ b/tests/asm-test/asm/armv7-unknown-linux-gnueabi/atomic_memcpy_store_align1 @@ -1,279 +1,210 @@ asm_test::atomic_memcpy_store_align1::release: - push {r4, r5, r6, r7, r8, lr} + push {r4, r5, r6, lr} sub sp, sp, #64 - mov r8, sp - mov r7, r0 - mov r0, r8 - mov r2, #32 - mov r6, #32 - bl memcpy - dmb ish - add r2, sp, #32 - ldm r8!, {r1, r3, r4, r5} - mov r0, r2 - stm r0!, {r1, r3, r4, r5} - ldm r8, {r1, r3, r4, r5} - stm r0, {r1, r3, r4, r5} - add r0, r7, #3 - bic r1, r0, #3 - sub r0, r1, r7 - cmp r0, #33 - bhs .LBB2_12 - cmp r0, #0 - beq .LBB2_5 - sub r6, r7, r1 - add r3, r7, #32 - mov r4, r7 -.LBB2_3: - ldrb r5, [r2], #1 - adds r6, r6, #1 - strb r5, [r4] - add r4, r4, #1 - blo .LBB2_3 - sub r6, r3, r1 - cmp r6, #4 - blo .LBB2_8 -.LBB2_5: - sub r0, r7, r1 - add r1, sp, #32 -.LBB2_6: - ldr r2, [r1, -r0] - sub r6, r6, #4 - cmp r6, #3 - str r2, [r7, -r0] - sub r0, r0, #4 - bhi .LBB2_6 - rsb r0, r0, #0 -.LBB2_8: - cmp r6, #0 - beq .LBB2_11 - add r1, sp, #32 - add r1, r1, r0 - add r0, r7, r0 -.LBB2_10: - ldrb r2, [r1], #1 - subs r6, r6, #1 - strb r2, [r0] - add r0, r0, #1 - bne .LBB2_10 -.LBB2_11: - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} -.LBB2_12: - ldrb r0, [sp, #32] - strb r0, [r7] - ldrb r0, [sp, #33] - strb r0, [r7, #1] - ldrb r0, [sp, #34] - strb r0, [r7, #2] - ldrb r0, [sp, #35] - strb r0, [r7, #3] - ldrb r0, [sp, #36] - strb r0, [r7, #4] - ldrb r0, [sp, #37] - strb r0, [r7, #5] - ldrb r0, [sp, #38] - strb r0, [r7, #6] - ldrb r0, [sp, #39] - strb r0, [r7, #7] - ldrb r0, [sp, #40] - strb r0, [r7, #8] - ldrb r0, [sp, #41] - strb r0, [r7, #9] - ldrb r0, [sp, #42] - strb r0, [r7, #10] - ldrb r0, [sp, #43] - strb r0, [r7, #11] - ldrb r0, [sp, #44] - strb r0, [r7, #12] - ldrb r0, [sp, #45] - strb r0, [r7, #13] - ldrb r0, [sp, #46] - strb r0, [r7, #14] - ldrb r0, [sp, #47] - strb r0, [r7, #15] - ldrb r0, [sp, #48] - strb r0, [r7, #16] - ldrb r0, [sp, #49] - strb r0, [r7, #17] - ldrb r0, [sp, #50] - strb r0, [r7, #18] - ldrb r0, [sp, #51] - strb r0, [r7, #19] - ldrb r0, [sp, #52] - strb r0, [r7, #20] - ldrb r0, [sp, #53] - strb r0, [r7, #21] - ldrb r0, [sp, #54] - strb r0, [r7, #22] - ldrb r0, [sp, #55] - strb r0, [r7, #23] - ldrb r0, [sp, #56] - strb r0, [r7, #24] - ldrb r0, [sp, #57] - strb r0, [r7, #25] - ldrb r0, [sp, #58] - strb r0, [r7, #26] - ldrb r0, [sp, #59] - strb r0, [r7, #27] - ldrb r0, [sp, #60] - strb r0, [r7, #28] - ldrb r0, [sp, #61] - strb r0, [r7, #29] - ldrb r0, [sp, #62] - strb r0, [r7, #30] - ldrb r0, [sp, #63] - strb r0, [r7, #31] - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} -.LBB4_3: - ldrb r6, [r5] - adds r2, r2, #1 - strb r6, [r4], #1 - add r5, r5, #1 - blo .LBB4_3 - sub r2, lr, r12 - cmp r2, #4 - bhs .LBB4_8 - cmp r2, #0 - bne .LBB4_11 - b .LBB4_13 -.LBB4_6: - ldrh r2, [r1, #30] - strh r2, [sp, #30] - ldrh r2, [r1, #28] - strh r2, [sp, #28] - ldrh r2, [r1, #26] - strh r2, [sp, #26] - ldrh r2, [r1, #24] - strh r2, [sp, #24] - ldrh r2, [r1, #22] - strh r2, [sp, #22] - ldrh r2, [r1, #20] - strh r2, [sp, #20] - ldrh r2, [r1, #18] - strh r2, [sp, #18] - ldrh r2, [r1, #16] - strh r2, [sp, #16] - ldrh r2, [r1, #14] - strh r2, [sp, #14] - ldrh r2, [r1, #12] - strh r2, [sp, #12] - ldrh r2, [r1, #10] - strh r2, [sp, #10] - ldrh r2, [r1, #8] - strh r2, [sp, #8] - ldrh r2, [r1, #6] - strh r2, [sp, #6] - ldrh r2, [r1, #4] - strh r2, [sp, #4] - ldrh r2, [r1, #2] - strh r2, [sp, #2] - ldrh r1, [r1] - strh r1, [sp] - b .LBB4_13 -.LBB4_7: - mov r2, #32 -.LBB4_8: - sub r3, r1, r12 - mov r5, sp -.LBB4_9: - ldr r6, [r1, -r3] - sub r2, r2, #4 - cmp r2, #3 - str r6, [r5, -r3] - sub r3, r3, #4 - bhi .LBB4_9 - rsb r3, r3, #0 - cmp r2, #0 - beq .LBB4_13 -.LBB4_11: - add r1, r1, r3 mov r6, sp - add r3, r6, r3 -.LBB4_12: - ldrb r6, [r1] - add r1, r1, #1 - strb r6, [r3], #1 - subs r2, r2, #1 - bne .LBB4_12 -.LBB4_13: - mov r1, sp + mov r4, r0 + mov r0, r6 mov r2, #32 bl memcpy dmb ish - add sp, sp, #32 - pop {r4, r5, r6, pc} -.LBB6_3: - ldrb r5, [r2], #1 - adds r6, r6, #1 - strb r5, [r4] - add r4, r4, #1 - blo .LBB6_3 - sub r6, r3, r1 - cmp r6, #4 - blo .LBB6_8 -.LBB6_5: - sub r0, r7, r1 - add r1, sp, #32 -.LBB6_6: - ldr r2, [r1, -r0] - sub r6, r6, #4 - cmp r6, #3 - str r2, [r7, -r0] - sub r0, r0, #4 - bhi .LBB6_6 - rsb r0, r0, #0 -.LBB6_8: - cmp r6, #0 - beq .LBB6_11 - add r1, sp, #32 - add r1, r1, r0 - add r0, r7, r0 -.LBB6_10: - ldrb r2, [r1], #1 - subs r6, r6, #1 - strb r2, [r0] - add r0, r0, #1 - bne .LBB6_10 -.LBB6_11: - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} -.LBB6_12: - ldrh r0, [sp, #62] - strh r0, [r7, #30] - ldrh r0, [sp, #60] - strh r0, [r7, #28] - ldrh r0, [sp, #58] - strh r0, [r7, #26] - ldrh r0, [sp, #56] - strh r0, [r7, #24] - ldrh r0, [sp, #54] - strh r0, [r7, #22] - ldrh r0, [sp, #52] - strh r0, [r7, #20] - ldrh r0, [sp, #50] - strh r0, [r7, #18] - ldrh r0, [sp, #48] - strh r0, [r7, #16] - ldrh r0, [sp, #46] - strh r0, [r7, #14] - ldrh r0, [sp, #44] - strh r0, [r7, #12] - ldrh r0, [sp, #42] - strh r0, [r7, #10] - ldrh r0, [sp, #40] - strh r0, [r7, #8] - ldrh r0, [sp, #38] - strh r0, [r7, #6] - ldrh r0, [sp, #36] - strh r0, [r7, #4] - ldrh r0, [sp, #34] - strh r0, [r7, #2] - ldrh r0, [sp, #32] - strh r0, [r7] + add r12, sp, #32 + ldm r6!, {r0, r2, r3, r5} + mov r1, r12 + stm r1!, {r0, r2, r3, r5} + ldm r6, {r0, r2, r3, r5} + stm r1, {r0, r2, r3, r5} + add r0, r4, #31 + add r1, r12, #31 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #30 + add r1, r12, #30 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #29 + add r1, r12, #29 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #28 + add r1, r12, #28 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #27 + add r1, r12, #27 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #26 + add r1, r12, #26 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #25 + add r1, r12, #25 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #24 + add r1, r12, #24 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #23 + add r1, r12, #23 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #22 + add r1, r12, #22 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #21 + add r1, r12, #21 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #20 + add r1, r12, #20 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #19 + add r1, r12, #19 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #18 + add r1, r12, #18 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #17 + add r1, r12, #17 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #16 + add r1, r12, #16 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #15 + add r1, r12, #15 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #14 + add r1, r12, #14 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #13 + add r1, r12, #13 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #12 + add r1, r12, #12 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #11 + add r1, r12, #11 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #10 + add r1, r12, #10 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #9 + add r1, r12, #9 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #8 + add r1, r12, #8 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #7 + orr r1, r12, #7 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #6 + orr r1, r12, #6 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #5 + orr r1, r12, #5 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #4 + orr r1, r12, #4 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #3 + orr r1, r12, #3 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #2 + orr r1, r12, #2 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #1 + orr r1, r12, #1 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + @APP + ldrb r0, [r12] + strb r0, [r4] + @NO_APP add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} + pop {r4, r5, r6, pc} asm_test::atomic_memcpy_store_align1::write_volatile_release_fence: push {r4, r5, r11, lr} sub sp, sp, #32 @@ -289,151 +220,3 @@ asm_test::atomic_memcpy_store_align1::write_volatile_release_fence: bl memcpy add sp, sp, #32 pop {r4, r5, r11, pc} -.LBB4_3: - ldrb r6, [r5] - adds r2, r2, #1 - strb r6, [r4], #1 - add r5, r5, #1 - blo .LBB4_3 - sub r2, lr, r12 - cmp r2, #4 - bhs .LBB4_8 - cmp r2, #0 - bne .LBB4_11 - b .LBB4_13 -.LBB4_6: - ldrh r2, [r1, #30] - strh r2, [sp, #30] - ldrh r2, [r1, #28] - strh r2, [sp, #28] - ldrh r2, [r1, #26] - strh r2, [sp, #26] - ldrh r2, [r1, #24] - strh r2, [sp, #24] - ldrh r2, [r1, #22] - strh r2, [sp, #22] - ldrh r2, [r1, #20] - strh r2, [sp, #20] - ldrh r2, [r1, #18] - strh r2, [sp, #18] - ldrh r2, [r1, #16] - strh r2, [sp, #16] - ldrh r2, [r1, #14] - strh r2, [sp, #14] - ldrh r2, [r1, #12] - strh r2, [sp, #12] - ldrh r2, [r1, #10] - strh r2, [sp, #10] - ldrh r2, [r1, #8] - strh r2, [sp, #8] - ldrh r2, [r1, #6] - strh r2, [sp, #6] - ldrh r2, [r1, #4] - strh r2, [sp, #4] - ldrh r2, [r1, #2] - strh r2, [sp, #2] - ldrh r1, [r1] - strh r1, [sp] - b .LBB4_13 -.LBB4_7: - mov r2, #32 -.LBB4_8: - sub r3, r1, r12 - mov r5, sp -.LBB4_9: - ldr r6, [r1, -r3] - sub r2, r2, #4 - cmp r2, #3 - str r6, [r5, -r3] - sub r3, r3, #4 - bhi .LBB4_9 - rsb r3, r3, #0 - cmp r2, #0 - beq .LBB4_13 -.LBB4_11: - add r1, r1, r3 - mov r6, sp - add r3, r6, r3 -.LBB4_12: - ldrb r6, [r1] - add r1, r1, #1 - strb r6, [r3], #1 - subs r2, r2, #1 - bne .LBB4_12 -.LBB4_13: - mov r1, sp - mov r2, #32 - bl memcpy - dmb ish - add sp, sp, #32 - pop {r4, r5, r6, pc} -.LBB6_3: - ldrb r5, [r2], #1 - adds r6, r6, #1 - strb r5, [r4] - add r4, r4, #1 - blo .LBB6_3 - sub r6, r3, r1 - cmp r6, #4 - blo .LBB6_8 -.LBB6_5: - sub r0, r7, r1 - add r1, sp, #32 -.LBB6_6: - ldr r2, [r1, -r0] - sub r6, r6, #4 - cmp r6, #3 - str r2, [r7, -r0] - sub r0, r0, #4 - bhi .LBB6_6 - rsb r0, r0, #0 -.LBB6_8: - cmp r6, #0 - beq .LBB6_11 - add r1, sp, #32 - add r1, r1, r0 - add r0, r7, r0 -.LBB6_10: - ldrb r2, [r1], #1 - subs r6, r6, #1 - strb r2, [r0] - add r0, r0, #1 - bne .LBB6_10 -.LBB6_11: - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} -.LBB6_12: - ldrh r0, [sp, #62] - strh r0, [r7, #30] - ldrh r0, [sp, #60] - strh r0, [r7, #28] - ldrh r0, [sp, #58] - strh r0, [r7, #26] - ldrh r0, [sp, #56] - strh r0, [r7, #24] - ldrh r0, [sp, #54] - strh r0, [r7, #22] - ldrh r0, [sp, #52] - strh r0, [r7, #20] - ldrh r0, [sp, #50] - strh r0, [r7, #18] - ldrh r0, [sp, #48] - strh r0, [r7, #16] - ldrh r0, [sp, #46] - strh r0, [r7, #14] - ldrh r0, [sp, #44] - strh r0, [r7, #12] - ldrh r0, [sp, #42] - strh r0, [r7, #10] - ldrh r0, [sp, #40] - strh r0, [r7, #8] - ldrh r0, [sp, #38] - strh r0, [r7, #6] - ldrh r0, [sp, #36] - strh r0, [r7, #4] - ldrh r0, [sp, #34] - strh r0, [r7, #2] - ldrh r0, [sp, #32] - strh r0, [r7] - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} diff --git a/tests/asm-test/asm/armv7-unknown-linux-gnueabi/atomic_memcpy_store_align16 b/tests/asm-test/asm/armv7-unknown-linux-gnueabi/atomic_memcpy_store_align16 index c726285..04017ea 100644 --- a/tests/asm-test/asm/armv7-unknown-linux-gnueabi/atomic_memcpy_store_align16 +++ b/tests/asm-test/asm/armv7-unknown-linux-gnueabi/atomic_memcpy_store_align16 @@ -1,19 +1,69 @@ asm_test::atomic_memcpy_store_align16::release: - push {r4, r5, r6, lr} - add r6, r1, #8 - ldm r1, {r12, lr} - ldm r6, {r2, r3, r4, r5, r6} - ldr r1, [r1, #28] + push {r4, r5, r11, lr} + add r11, sp, #8 + sub sp, sp, #64 + bfc sp, #0, #4 + ldm r1!, {r2, r4, r5, lr} + mov r12, sp + mov r3, r12 + stm r3!, {r2, r4, r5, lr} + ldm r1, {r2, r4, r5, lr} + stm r3, {r2, r4, r5, lr} + add lr, sp, #32 dmb ish - str r1, [r0, #28] - str r6, [r0, #24] - str r5, [r0, #20] - str r4, [r0, #16] - str r3, [r0, #12] - str r2, [r0, #8] - str lr, [r0, #4] - str r12, [r0] - pop {r4, r5, r6, pc} + ldm r12!, {r1, r3, r4, r5} + mov r2, lr + stm r2!, {r1, r3, r4, r5} + ldm r12, {r1, r3, r4, r5} + stm r2, {r1, r3, r4, r5} + add r1, r0, #28 + add r2, lr, #28 + @APP + ldr r2, [r2] + str r2, [r1] + @NO_APP + add r1, r0, #24 + add r2, lr, #24 + @APP + ldr r2, [r2] + str r2, [r1] + @NO_APP + add r1, r0, #20 + add r2, lr, #20 + @APP + ldr r2, [r2] + str r2, [r1] + @NO_APP + add r1, r0, #16 + add r2, lr, #16 + @APP + ldr r2, [r2] + str r2, [r1] + @NO_APP + add r1, r0, #12 + orr r2, lr, #12 + @APP + ldr r2, [r2] + str r2, [r1] + @NO_APP + add r1, r0, #8 + orr r2, lr, #8 + @APP + ldr r2, [r2] + str r2, [r1] + @NO_APP + add r1, r0, #4 + orr r2, lr, #4 + @APP + ldr r2, [r2] + str r2, [r1] + @NO_APP + @APP + ldr r1, [lr] + str r1, [r0] + @NO_APP + sub sp, r11, #8 + pop {r4, r5, r11, pc} asm_test::atomic_memcpy_store_align16::write_volatile_release_fence: push {r4, r5, r11, lr} add r11, sp, #8 diff --git a/tests/asm-test/asm/armv7-unknown-linux-gnueabi/atomic_memcpy_store_align2 b/tests/asm-test/asm/armv7-unknown-linux-gnueabi/atomic_memcpy_store_align2 index 62e3bf5..fd07abe 100644 --- a/tests/asm-test/asm/armv7-unknown-linux-gnueabi/atomic_memcpy_store_align2 +++ b/tests/asm-test/asm/armv7-unknown-linux-gnueabi/atomic_memcpy_store_align2 @@ -1,99 +1,114 @@ asm_test::atomic_memcpy_store_align2::release: - push {r4, r5, r6, r7, r8, lr} + push {r4, r5, r6, lr} sub sp, sp, #64 - mov r8, sp - mov r7, r0 - mov r0, r8 + mov r6, sp + mov r4, r0 + mov r0, r6 mov r2, #32 - mov r6, #32 bl memcpy dmb ish - add r2, sp, #32 - ldm r8!, {r1, r3, r4, r5} - mov r0, r2 - stm r0!, {r1, r3, r4, r5} - ldm r8, {r1, r3, r4, r5} - stm r0, {r1, r3, r4, r5} - add r0, r7, #3 - bic r1, r0, #3 - sub r0, r1, r7 - cmp r0, #32 - bhi .LBB6_12 - cmp r0, #0 - beq .LBB6_5 - sub r6, r7, r1 - add r3, r7, #32 - mov r4, r7 -.LBB6_3: - ldrb r5, [r2], #1 - adds r6, r6, #1 - strb r5, [r4] - add r4, r4, #1 - blo .LBB6_3 - sub r6, r3, r1 - cmp r6, #4 - blo .LBB6_8 -.LBB6_5: - sub r0, r7, r1 - add r1, sp, #32 -.LBB6_6: - ldr r2, [r1, -r0] - sub r6, r6, #4 - cmp r6, #3 - str r2, [r7, -r0] - sub r0, r0, #4 - bhi .LBB6_6 - rsb r0, r0, #0 -.LBB6_8: - cmp r6, #0 - beq .LBB6_11 - add r1, sp, #32 - add r1, r1, r0 - add r0, r7, r0 -.LBB6_10: - ldrb r2, [r1], #1 - subs r6, r6, #1 - strb r2, [r0] - add r0, r0, #1 - bne .LBB6_10 -.LBB6_11: - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} -.LBB6_12: - ldrh r0, [sp, #62] - strh r0, [r7, #30] - ldrh r0, [sp, #60] - strh r0, [r7, #28] - ldrh r0, [sp, #58] - strh r0, [r7, #26] - ldrh r0, [sp, #56] - strh r0, [r7, #24] - ldrh r0, [sp, #54] - strh r0, [r7, #22] - ldrh r0, [sp, #52] - strh r0, [r7, #20] - ldrh r0, [sp, #50] - strh r0, [r7, #18] - ldrh r0, [sp, #48] - strh r0, [r7, #16] - ldrh r0, [sp, #46] - strh r0, [r7, #14] - ldrh r0, [sp, #44] - strh r0, [r7, #12] - ldrh r0, [sp, #42] - strh r0, [r7, #10] - ldrh r0, [sp, #40] - strh r0, [r7, #8] - ldrh r0, [sp, #38] - strh r0, [r7, #6] - ldrh r0, [sp, #36] - strh r0, [r7, #4] - ldrh r0, [sp, #34] - strh r0, [r7, #2] - ldrh r0, [sp, #32] - strh r0, [r7] + add r12, sp, #32 + ldm r6!, {r0, r2, r3, r5} + mov r1, r12 + stm r1!, {r0, r2, r3, r5} + ldm r6, {r0, r2, r3, r5} + stm r1, {r0, r2, r3, r5} + add r0, r4, #30 + add r1, r12, #30 + @APP + ldrh r1, [r1] + strh r1, [r0] + @NO_APP + add r0, r4, #28 + add r1, r12, #28 + @APP + ldrh r1, [r1] + strh r1, [r0] + @NO_APP + add r0, r4, #26 + add r1, r12, #26 + @APP + ldrh r1, [r1] + strh r1, [r0] + @NO_APP + add r0, r4, #24 + add r1, r12, #24 + @APP + ldrh r1, [r1] + strh r1, [r0] + @NO_APP + add r0, r4, #22 + add r1, r12, #22 + @APP + ldrh r1, [r1] + strh r1, [r0] + @NO_APP + add r0, r4, #20 + add r1, r12, #20 + @APP + ldrh r1, [r1] + strh r1, [r0] + @NO_APP + add r0, r4, #18 + add r1, r12, #18 + @APP + ldrh r1, [r1] + strh r1, [r0] + @NO_APP + add r0, r4, #16 + add r1, r12, #16 + @APP + ldrh r1, [r1] + strh r1, [r0] + @NO_APP + add r0, r4, #14 + add r1, r12, #14 + @APP + ldrh r1, [r1] + strh r1, [r0] + @NO_APP + add r0, r4, #12 + add r1, r12, #12 + @APP + ldrh r1, [r1] + strh r1, [r0] + @NO_APP + add r0, r4, #10 + add r1, r12, #10 + @APP + ldrh r1, [r1] + strh r1, [r0] + @NO_APP + add r0, r4, #8 + add r1, r12, #8 + @APP + ldrh r1, [r1] + strh r1, [r0] + @NO_APP + add r0, r4, #6 + orr r1, r12, #6 + @APP + ldrh r1, [r1] + strh r1, [r0] + @NO_APP + add r0, r4, #4 + orr r1, r12, #4 + @APP + ldrh r1, [r1] + strh r1, [r0] + @NO_APP + add r0, r4, #2 + orr r1, r12, #2 + @APP + ldrh r1, [r1] + strh r1, [r0] + @NO_APP + @APP + ldrh r0, [r12] + strh r0, [r4] + @NO_APP add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} + pop {r4, r5, r6, pc} asm_test::atomic_memcpy_store_align2::write_volatile_release_fence: push {r4, r5, r11, lr} sub sp, sp, #32 diff --git a/tests/asm-test/asm/armv7-unknown-linux-gnueabi/atomic_memcpy_store_align4 b/tests/asm-test/asm/armv7-unknown-linux-gnueabi/atomic_memcpy_store_align4 index 9dcd77e..bdf4317 100644 --- a/tests/asm-test/asm/armv7-unknown-linux-gnueabi/atomic_memcpy_store_align4 +++ b/tests/asm-test/asm/armv7-unknown-linux-gnueabi/atomic_memcpy_store_align4 @@ -1,19 +1,67 @@ asm_test::atomic_memcpy_store_align4::release: - push {r4, r5, r6, lr} - add r6, r1, #8 - ldm r1, {r12, lr} - ldm r6, {r2, r3, r4, r5, r6} - ldr r1, [r1, #28] + push {r4, r5, r11, lr} + sub sp, sp, #64 + ldm r1!, {r2, r4, r5, lr} + mov r12, sp + mov r3, r12 + stm r3!, {r2, r4, r5, lr} + ldm r1, {r2, r4, r5, lr} + stm r3, {r2, r4, r5, lr} + add lr, sp, #32 dmb ish - str r1, [r0, #28] - str r6, [r0, #24] - str r5, [r0, #20] - str r4, [r0, #16] - str r3, [r0, #12] - str r2, [r0, #8] - str lr, [r0, #4] - str r12, [r0] - pop {r4, r5, r6, pc} + ldm r12!, {r1, r3, r4, r5} + mov r2, lr + stm r2!, {r1, r3, r4, r5} + ldm r12, {r1, r3, r4, r5} + stm r2, {r1, r3, r4, r5} + add r1, r0, #28 + add r2, lr, #28 + @APP + ldr r2, [r2] + str r2, [r1] + @NO_APP + add r1, r0, #24 + add r2, lr, #24 + @APP + ldr r2, [r2] + str r2, [r1] + @NO_APP + add r1, r0, #20 + add r2, lr, #20 + @APP + ldr r2, [r2] + str r2, [r1] + @NO_APP + add r1, r0, #16 + add r2, lr, #16 + @APP + ldr r2, [r2] + str r2, [r1] + @NO_APP + add r1, r0, #12 + add r2, lr, #12 + @APP + ldr r2, [r2] + str r2, [r1] + @NO_APP + add r1, r0, #8 + add r2, lr, #8 + @APP + ldr r2, [r2] + str r2, [r1] + @NO_APP + add r1, r0, #4 + orr r2, lr, #4 + @APP + ldr r2, [r2] + str r2, [r1] + @NO_APP + @APP + ldr r1, [lr] + str r1, [r0] + @NO_APP + add sp, sp, #64 + pop {r4, r5, r11, pc} asm_test::atomic_memcpy_store_align4::write_volatile_release_fence: push {r4, r5, r11, lr} sub sp, sp, #32 diff --git a/tests/asm-test/asm/armv7-unknown-linux-gnueabi/atomic_memcpy_store_align8 b/tests/asm-test/asm/armv7-unknown-linux-gnueabi/atomic_memcpy_store_align8 index 4d4db31..16607ba 100644 --- a/tests/asm-test/asm/armv7-unknown-linux-gnueabi/atomic_memcpy_store_align8 +++ b/tests/asm-test/asm/armv7-unknown-linux-gnueabi/atomic_memcpy_store_align8 @@ -1,19 +1,67 @@ asm_test::atomic_memcpy_store_align8::release: - push {r4, r5, r6, lr} - add r6, r1, #8 - ldm r1, {r12, lr} - ldm r6, {r2, r3, r4, r5, r6} - ldr r1, [r1, #28] + push {r4, r5, r11, lr} + sub sp, sp, #64 + ldm r1!, {r2, r4, r5, lr} + mov r12, sp + mov r3, r12 + stm r3!, {r2, r4, r5, lr} + ldm r1, {r2, r4, r5, lr} + stm r3, {r2, r4, r5, lr} + add lr, sp, #32 dmb ish - str r1, [r0, #28] - str r6, [r0, #24] - str r5, [r0, #20] - str r4, [r0, #16] - str r3, [r0, #12] - str r2, [r0, #8] - str lr, [r0, #4] - str r12, [r0] - pop {r4, r5, r6, pc} + ldm r12!, {r1, r3, r4, r5} + mov r2, lr + stm r2!, {r1, r3, r4, r5} + ldm r12, {r1, r3, r4, r5} + stm r2, {r1, r3, r4, r5} + add r1, r0, #28 + add r2, lr, #28 + @APP + ldr r2, [r2] + str r2, [r1] + @NO_APP + add r1, r0, #24 + add r2, lr, #24 + @APP + ldr r2, [r2] + str r2, [r1] + @NO_APP + add r1, r0, #20 + add r2, lr, #20 + @APP + ldr r2, [r2] + str r2, [r1] + @NO_APP + add r1, r0, #16 + add r2, lr, #16 + @APP + ldr r2, [r2] + str r2, [r1] + @NO_APP + add r1, r0, #12 + add r2, lr, #12 + @APP + ldr r2, [r2] + str r2, [r1] + @NO_APP + add r1, r0, #8 + add r2, lr, #8 + @APP + ldr r2, [r2] + str r2, [r1] + @NO_APP + add r1, r0, #4 + orr r2, lr, #4 + @APP + ldr r2, [r2] + str r2, [r1] + @NO_APP + @APP + ldr r1, [lr] + str r1, [r0] + @NO_APP + add sp, sp, #64 + pop {r4, r5, r11, pc} asm_test::atomic_memcpy_store_align8::write_volatile_release_fence: push {r4, r5, r11, lr} sub sp, sp, #32 diff --git a/tests/asm-test/asm/armv7-unknown-linux-gnueabihf/atomic_memcpy_load_align1 b/tests/asm-test/asm/armv7-unknown-linux-gnueabihf/atomic_memcpy_load_align1 index 3e6d297..de7e896 100644 --- a/tests/asm-test/asm/armv7-unknown-linux-gnueabihf/atomic_memcpy_load_align1 +++ b/tests/asm-test/asm/armv7-unknown-linux-gnueabihf/atomic_memcpy_load_align1 @@ -1,377 +1,204 @@ asm_test::atomic_memcpy_load_align1::acquire: - push {r4, r5, r6, lr} + push {r11, lr} sub sp, sp, #32 - add r2, r1, #3 - bic r12, r2, #3 - sub r3, r12, r1 - cmp r3, #33 - bhs .LBB0_6 - cmp r3, #0 - beq .LBB0_7 - sub r2, r1, r12 - add lr, r1, #32 - mov r4, sp - mov r5, r1 -.LBB0_3: - ldrb r6, [r5] - adds r2, r2, #1 - strb r6, [r4], #1 - add r5, r5, #1 - blo .LBB0_3 - sub r2, lr, r12 - cmp r2, #4 - bhs .LBB0_8 - cmp r2, #0 - bne .LBB0_11 - b .LBB0_13 -.LBB0_6: - ldrb r2, [r1] - strb r2, [sp] - ldrb r2, [r1, #1] - strb r2, [sp, #1] - ldrb r2, [r1, #2] - strb r2, [sp, #2] - ldrb r2, [r1, #3] - strb r2, [sp, #3] - ldrb r2, [r1, #4] - strb r2, [sp, #4] - ldrb r2, [r1, #5] - strb r2, [sp, #5] - ldrb r2, [r1, #6] - strb r2, [sp, #6] - ldrb r2, [r1, #7] - strb r2, [sp, #7] - ldrb r2, [r1, #8] - strb r2, [sp, #8] - ldrb r2, [r1, #9] - strb r2, [sp, #9] - ldrb r2, [r1, #10] - strb r2, [sp, #10] - ldrb r2, [r1, #11] - strb r2, [sp, #11] - ldrb r2, [r1, #12] - strb r2, [sp, #12] - ldrb r2, [r1, #13] - strb r2, [sp, #13] - ldrb r2, [r1, #14] - strb r2, [sp, #14] - ldrb r2, [r1, #15] - strb r2, [sp, #15] - ldrb r2, [r1, #16] - strb r2, [sp, #16] - ldrb r2, [r1, #17] - strb r2, [sp, #17] - ldrb r2, [r1, #18] - strb r2, [sp, #18] - ldrb r2, [r1, #19] - strb r2, [sp, #19] - ldrb r2, [r1, #20] - strb r2, [sp, #20] - ldrb r2, [r1, #21] - strb r2, [sp, #21] - ldrb r2, [r1, #22] - strb r2, [sp, #22] - ldrb r2, [r1, #23] - strb r2, [sp, #23] - ldrb r2, [r1, #24] - strb r2, [sp, #24] - ldrb r2, [r1, #25] - strb r2, [sp, #25] - ldrb r2, [r1, #26] - strb r2, [sp, #26] - ldrb r2, [r1, #27] - strb r2, [sp, #27] - ldrb r2, [r1, #28] - strb r2, [sp, #28] - ldrb r2, [r1, #29] - strb r2, [sp, #29] - ldrb r2, [r1, #30] - strb r2, [sp, #30] - ldrb r1, [r1, #31] - strb r1, [sp, #31] - b .LBB0_13 -.LBB0_7: - mov r2, #32 -.LBB0_8: - sub r3, r1, r12 - mov r5, sp -.LBB0_9: - ldr r6, [r1, -r3] - sub r2, r2, #4 - cmp r2, #3 - str r6, [r5, -r3] - sub r3, r3, #4 - bhi .LBB0_9 - rsb r3, r3, #0 - cmp r2, #0 - beq .LBB0_13 -.LBB0_11: - add r1, r1, r3 - mov r6, sp - add r3, r6, r3 -.LBB0_12: - ldrb r6, [r1] - add r1, r1, #1 - strb r6, [r3], #1 - subs r2, r2, #1 - bne .LBB0_12 -.LBB0_13: + mov lr, r1 + add r12, r1, #31 mov r1, sp + add r3, r1, #31 + @APP + ldrb r2, [r12] + strb r2, [r3] + @NO_APP + add r2, lr, #30 + add r3, r1, #30 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #29 + add r3, r1, #29 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #28 + add r3, r1, #28 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #27 + add r3, r1, #27 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #26 + add r3, r1, #26 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #25 + add r3, r1, #25 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #24 + add r3, r1, #24 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #23 + add r3, r1, #23 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #22 + add r3, r1, #22 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #21 + add r3, r1, #21 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #20 + add r3, r1, #20 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #19 + add r3, r1, #19 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #18 + add r3, r1, #18 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #17 + add r3, r1, #17 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #16 + add r3, r1, #16 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #15 + add r3, r1, #15 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #14 + add r3, r1, #14 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #13 + add r3, r1, #13 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #12 + add r3, r1, #12 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #11 + add r3, r1, #11 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #10 + add r3, r1, #10 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #9 + add r3, r1, #9 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #8 + add r3, r1, #8 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #7 + orr r3, r1, #7 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #6 + orr r3, r1, #6 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #5 + orr r3, r1, #5 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #4 + orr r3, r1, #4 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #3 + orr r3, r1, #3 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #2 + orr r3, r1, #2 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add r2, lr, #1 + orr r3, r1, #1 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + mov r2, r1 + @APP + ldrb r3, [lr] + strb r3, [r2] + @NO_APP mov r2, #32 bl memcpy dmb ish add sp, sp, #32 - pop {r4, r5, r6, pc} -.LBB2_3: - ldrb r5, [r2], #1 - adds r6, r6, #1 - strb r5, [r4] - add r4, r4, #1 - blo .LBB2_3 - sub r6, r3, r1 - cmp r6, #4 - blo .LBB2_8 -.LBB2_5: - sub r0, r7, r1 - add r1, sp, #32 -.LBB2_6: - ldr r2, [r1, -r0] - sub r6, r6, #4 - cmp r6, #3 - str r2, [r7, -r0] - sub r0, r0, #4 - bhi .LBB2_6 - rsb r0, r0, #0 -.LBB2_8: - cmp r6, #0 - beq .LBB2_11 - add r1, sp, #32 - add r1, r1, r0 - add r0, r7, r0 -.LBB2_10: - ldrb r2, [r1], #1 - subs r6, r6, #1 - strb r2, [r0] - add r0, r0, #1 - bne .LBB2_10 -.LBB2_11: - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} -.LBB2_12: - ldrb r0, [sp, #32] - strb r0, [r7] - ldrb r0, [sp, #33] - strb r0, [r7, #1] - ldrb r0, [sp, #34] - strb r0, [r7, #2] - ldrb r0, [sp, #35] - strb r0, [r7, #3] - ldrb r0, [sp, #36] - strb r0, [r7, #4] - ldrb r0, [sp, #37] - strb r0, [r7, #5] - ldrb r0, [sp, #38] - strb r0, [r7, #6] - ldrb r0, [sp, #39] - strb r0, [r7, #7] - ldrb r0, [sp, #40] - strb r0, [r7, #8] - ldrb r0, [sp, #41] - strb r0, [r7, #9] - ldrb r0, [sp, #42] - strb r0, [r7, #10] - ldrb r0, [sp, #43] - strb r0, [r7, #11] - ldrb r0, [sp, #44] - strb r0, [r7, #12] - ldrb r0, [sp, #45] - strb r0, [r7, #13] - ldrb r0, [sp, #46] - strb r0, [r7, #14] - ldrb r0, [sp, #47] - strb r0, [r7, #15] - ldrb r0, [sp, #48] - strb r0, [r7, #16] - ldrb r0, [sp, #49] - strb r0, [r7, #17] - ldrb r0, [sp, #50] - strb r0, [r7, #18] - ldrb r0, [sp, #51] - strb r0, [r7, #19] - ldrb r0, [sp, #52] - strb r0, [r7, #20] - ldrb r0, [sp, #53] - strb r0, [r7, #21] - ldrb r0, [sp, #54] - strb r0, [r7, #22] - ldrb r0, [sp, #55] - strb r0, [r7, #23] - ldrb r0, [sp, #56] - strb r0, [r7, #24] - ldrb r0, [sp, #57] - strb r0, [r7, #25] - ldrb r0, [sp, #58] - strb r0, [r7, #26] - ldrb r0, [sp, #59] - strb r0, [r7, #27] - ldrb r0, [sp, #60] - strb r0, [r7, #28] - ldrb r0, [sp, #61] - strb r0, [r7, #29] - ldrb r0, [sp, #62] - strb r0, [r7, #30] - ldrb r0, [sp, #63] - strb r0, [r7, #31] - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} -.LBB4_3: - ldrb r6, [r5] - adds r2, r2, #1 - strb r6, [r4], #1 - add r5, r5, #1 - blo .LBB4_3 - sub r2, lr, r12 - cmp r2, #4 - bhs .LBB4_8 - cmp r2, #0 - bne .LBB4_11 - b .LBB4_13 -.LBB4_6: - ldrh r2, [r1, #30] - strh r2, [sp, #30] - ldrh r2, [r1, #28] - strh r2, [sp, #28] - ldrh r2, [r1, #26] - strh r2, [sp, #26] - ldrh r2, [r1, #24] - strh r2, [sp, #24] - ldrh r2, [r1, #22] - strh r2, [sp, #22] - ldrh r2, [r1, #20] - strh r2, [sp, #20] - ldrh r2, [r1, #18] - strh r2, [sp, #18] - ldrh r2, [r1, #16] - strh r2, [sp, #16] - ldrh r2, [r1, #14] - strh r2, [sp, #14] - ldrh r2, [r1, #12] - strh r2, [sp, #12] - ldrh r2, [r1, #10] - strh r2, [sp, #10] - ldrh r2, [r1, #8] - strh r2, [sp, #8] - ldrh r2, [r1, #6] - strh r2, [sp, #6] - ldrh r2, [r1, #4] - strh r2, [sp, #4] - ldrh r2, [r1, #2] - strh r2, [sp, #2] - ldrh r1, [r1] - strh r1, [sp] - b .LBB4_13 -.LBB4_7: - mov r2, #32 -.LBB4_8: - sub r3, r1, r12 - mov r5, sp -.LBB4_9: - ldr r6, [r1, -r3] - sub r2, r2, #4 - cmp r2, #3 - str r6, [r5, -r3] - sub r3, r3, #4 - bhi .LBB4_9 - rsb r3, r3, #0 - cmp r2, #0 - beq .LBB4_13 -.LBB4_11: - add r1, r1, r3 - mov r6, sp - add r3, r6, r3 -.LBB4_12: - ldrb r6, [r1] - add r1, r1, #1 - strb r6, [r3], #1 - subs r2, r2, #1 - bne .LBB4_12 -.LBB4_13: - mov r1, sp - mov r2, #32 - bl memcpy - dmb ish - add sp, sp, #32 - pop {r4, r5, r6, pc} -.LBB6_3: - ldrb r5, [r2], #1 - adds r6, r6, #1 - strb r5, [r4] - add r4, r4, #1 - blo .LBB6_3 - sub r6, r3, r1 - cmp r6, #4 - blo .LBB6_8 -.LBB6_5: - sub r0, r7, r1 - add r1, sp, #32 -.LBB6_6: - ldr r2, [r1, -r0] - sub r6, r6, #4 - cmp r6, #3 - str r2, [r7, -r0] - sub r0, r0, #4 - bhi .LBB6_6 - rsb r0, r0, #0 -.LBB6_8: - cmp r6, #0 - beq .LBB6_11 - add r1, sp, #32 - add r1, r1, r0 - add r0, r7, r0 -.LBB6_10: - ldrb r2, [r1], #1 - subs r6, r6, #1 - strb r2, [r0] - add r0, r0, #1 - bne .LBB6_10 -.LBB6_11: - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} -.LBB6_12: - ldrh r0, [sp, #62] - strh r0, [r7, #30] - ldrh r0, [sp, #60] - strh r0, [r7, #28] - ldrh r0, [sp, #58] - strh r0, [r7, #26] - ldrh r0, [sp, #56] - strh r0, [r7, #24] - ldrh r0, [sp, #54] - strh r0, [r7, #22] - ldrh r0, [sp, #52] - strh r0, [r7, #20] - ldrh r0, [sp, #50] - strh r0, [r7, #18] - ldrh r0, [sp, #48] - strh r0, [r7, #16] - ldrh r0, [sp, #46] - strh r0, [r7, #14] - ldrh r0, [sp, #44] - strh r0, [r7, #12] - ldrh r0, [sp, #42] - strh r0, [r7, #10] - ldrh r0, [sp, #40] - strh r0, [r7, #8] - ldrh r0, [sp, #38] - strh r0, [r7, #6] - ldrh r0, [sp, #36] - strh r0, [r7, #4] - ldrh r0, [sp, #34] - strh r0, [r7, #2] - ldrh r0, [sp, #32] - strh r0, [r7] - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} + pop {r11, pc} asm_test::atomic_memcpy_load_align1::read_volatile_acquire_fence: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} sub sp, sp, #76 @@ -480,253 +307,3 @@ asm_test::atomic_memcpy_load_align1::read_volatile_acquire_fence: dmb ish add sp, sp, #76 pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} -.LBB2_3: - ldrb r5, [r2], #1 - adds r6, r6, #1 - strb r5, [r4] - add r4, r4, #1 - blo .LBB2_3 - sub r6, r3, r1 - cmp r6, #4 - blo .LBB2_8 -.LBB2_5: - sub r0, r7, r1 - add r1, sp, #32 -.LBB2_6: - ldr r2, [r1, -r0] - sub r6, r6, #4 - cmp r6, #3 - str r2, [r7, -r0] - sub r0, r0, #4 - bhi .LBB2_6 - rsb r0, r0, #0 -.LBB2_8: - cmp r6, #0 - beq .LBB2_11 - add r1, sp, #32 - add r1, r1, r0 - add r0, r7, r0 -.LBB2_10: - ldrb r2, [r1], #1 - subs r6, r6, #1 - strb r2, [r0] - add r0, r0, #1 - bne .LBB2_10 -.LBB2_11: - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} -.LBB2_12: - ldrb r0, [sp, #32] - strb r0, [r7] - ldrb r0, [sp, #33] - strb r0, [r7, #1] - ldrb r0, [sp, #34] - strb r0, [r7, #2] - ldrb r0, [sp, #35] - strb r0, [r7, #3] - ldrb r0, [sp, #36] - strb r0, [r7, #4] - ldrb r0, [sp, #37] - strb r0, [r7, #5] - ldrb r0, [sp, #38] - strb r0, [r7, #6] - ldrb r0, [sp, #39] - strb r0, [r7, #7] - ldrb r0, [sp, #40] - strb r0, [r7, #8] - ldrb r0, [sp, #41] - strb r0, [r7, #9] - ldrb r0, [sp, #42] - strb r0, [r7, #10] - ldrb r0, [sp, #43] - strb r0, [r7, #11] - ldrb r0, [sp, #44] - strb r0, [r7, #12] - ldrb r0, [sp, #45] - strb r0, [r7, #13] - ldrb r0, [sp, #46] - strb r0, [r7, #14] - ldrb r0, [sp, #47] - strb r0, [r7, #15] - ldrb r0, [sp, #48] - strb r0, [r7, #16] - ldrb r0, [sp, #49] - strb r0, [r7, #17] - ldrb r0, [sp, #50] - strb r0, [r7, #18] - ldrb r0, [sp, #51] - strb r0, [r7, #19] - ldrb r0, [sp, #52] - strb r0, [r7, #20] - ldrb r0, [sp, #53] - strb r0, [r7, #21] - ldrb r0, [sp, #54] - strb r0, [r7, #22] - ldrb r0, [sp, #55] - strb r0, [r7, #23] - ldrb r0, [sp, #56] - strb r0, [r7, #24] - ldrb r0, [sp, #57] - strb r0, [r7, #25] - ldrb r0, [sp, #58] - strb r0, [r7, #26] - ldrb r0, [sp, #59] - strb r0, [r7, #27] - ldrb r0, [sp, #60] - strb r0, [r7, #28] - ldrb r0, [sp, #61] - strb r0, [r7, #29] - ldrb r0, [sp, #62] - strb r0, [r7, #30] - ldrb r0, [sp, #63] - strb r0, [r7, #31] - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} -.LBB4_3: - ldrb r6, [r5] - adds r2, r2, #1 - strb r6, [r4], #1 - add r5, r5, #1 - blo .LBB4_3 - sub r2, lr, r12 - cmp r2, #4 - bhs .LBB4_8 - cmp r2, #0 - bne .LBB4_11 - b .LBB4_13 -.LBB4_6: - ldrh r2, [r1, #30] - strh r2, [sp, #30] - ldrh r2, [r1, #28] - strh r2, [sp, #28] - ldrh r2, [r1, #26] - strh r2, [sp, #26] - ldrh r2, [r1, #24] - strh r2, [sp, #24] - ldrh r2, [r1, #22] - strh r2, [sp, #22] - ldrh r2, [r1, #20] - strh r2, [sp, #20] - ldrh r2, [r1, #18] - strh r2, [sp, #18] - ldrh r2, [r1, #16] - strh r2, [sp, #16] - ldrh r2, [r1, #14] - strh r2, [sp, #14] - ldrh r2, [r1, #12] - strh r2, [sp, #12] - ldrh r2, [r1, #10] - strh r2, [sp, #10] - ldrh r2, [r1, #8] - strh r2, [sp, #8] - ldrh r2, [r1, #6] - strh r2, [sp, #6] - ldrh r2, [r1, #4] - strh r2, [sp, #4] - ldrh r2, [r1, #2] - strh r2, [sp, #2] - ldrh r1, [r1] - strh r1, [sp] - b .LBB4_13 -.LBB4_7: - mov r2, #32 -.LBB4_8: - sub r3, r1, r12 - mov r5, sp -.LBB4_9: - ldr r6, [r1, -r3] - sub r2, r2, #4 - cmp r2, #3 - str r6, [r5, -r3] - sub r3, r3, #4 - bhi .LBB4_9 - rsb r3, r3, #0 - cmp r2, #0 - beq .LBB4_13 -.LBB4_11: - add r1, r1, r3 - mov r6, sp - add r3, r6, r3 -.LBB4_12: - ldrb r6, [r1] - add r1, r1, #1 - strb r6, [r3], #1 - subs r2, r2, #1 - bne .LBB4_12 -.LBB4_13: - mov r1, sp - mov r2, #32 - bl memcpy - dmb ish - add sp, sp, #32 - pop {r4, r5, r6, pc} -.LBB6_3: - ldrb r5, [r2], #1 - adds r6, r6, #1 - strb r5, [r4] - add r4, r4, #1 - blo .LBB6_3 - sub r6, r3, r1 - cmp r6, #4 - blo .LBB6_8 -.LBB6_5: - sub r0, r7, r1 - add r1, sp, #32 -.LBB6_6: - ldr r2, [r1, -r0] - sub r6, r6, #4 - cmp r6, #3 - str r2, [r7, -r0] - sub r0, r0, #4 - bhi .LBB6_6 - rsb r0, r0, #0 -.LBB6_8: - cmp r6, #0 - beq .LBB6_11 - add r1, sp, #32 - add r1, r1, r0 - add r0, r7, r0 -.LBB6_10: - ldrb r2, [r1], #1 - subs r6, r6, #1 - strb r2, [r0] - add r0, r0, #1 - bne .LBB6_10 -.LBB6_11: - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} -.LBB6_12: - ldrh r0, [sp, #62] - strh r0, [r7, #30] - ldrh r0, [sp, #60] - strh r0, [r7, #28] - ldrh r0, [sp, #58] - strh r0, [r7, #26] - ldrh r0, [sp, #56] - strh r0, [r7, #24] - ldrh r0, [sp, #54] - strh r0, [r7, #22] - ldrh r0, [sp, #52] - strh r0, [r7, #20] - ldrh r0, [sp, #50] - strh r0, [r7, #18] - ldrh r0, [sp, #48] - strh r0, [r7, #16] - ldrh r0, [sp, #46] - strh r0, [r7, #14] - ldrh r0, [sp, #44] - strh r0, [r7, #12] - ldrh r0, [sp, #42] - strh r0, [r7, #10] - ldrh r0, [sp, #40] - strh r0, [r7, #8] - ldrh r0, [sp, #38] - strh r0, [r7, #6] - ldrh r0, [sp, #36] - strh r0, [r7, #4] - ldrh r0, [sp, #34] - strh r0, [r7, #2] - ldrh r0, [sp, #32] - strh r0, [r7] - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} diff --git a/tests/asm-test/asm/armv7-unknown-linux-gnueabihf/atomic_memcpy_load_align16 b/tests/asm-test/asm/armv7-unknown-linux-gnueabihf/atomic_memcpy_load_align16 index d014a2b..214017e 100644 --- a/tests/asm-test/asm/armv7-unknown-linux-gnueabihf/atomic_memcpy_load_align16 +++ b/tests/asm-test/asm/armv7-unknown-linux-gnueabihf/atomic_memcpy_load_align16 @@ -1,22 +1,63 @@ asm_test::atomic_memcpy_load_align16::acquire: - push {r4, r5, r6, lr} - ldr r12, [r1, #28] - ldr lr, [r1, #24] - ldr r2, [r1, #20] - ldr r3, [r1, #16] - ldr r4, [r1, #12] - ldr r5, [r1, #8] - ldr r6, [r1, #4] + push {r11, lr} + mov r11, sp + sub sp, sp, #40 + bfc sp, #0, #4 + mov lr, sp + add r3, lr, #28 + add r12, r1, #28 + @APP + ldr r2, [r12] + str r2, [r3] + @NO_APP + add r2, r1, #24 + add r3, lr, #24 + @APP + ldr r2, [r2] + str r2, [r3] + @NO_APP + add r2, r1, #20 + add r3, lr, #20 + @APP + ldr r2, [r2] + str r2, [r3] + @NO_APP + add r2, r1, #16 + add r3, lr, #16 + @APP + ldr r2, [r2] + str r2, [r3] + @NO_APP + add r2, r1, #12 + orr r3, lr, #12 + @APP + ldr r2, [r2] + str r2, [r3] + @NO_APP + add r2, r1, #8 + orr r3, lr, #8 + @APP + ldr r2, [r2] + str r2, [r3] + @NO_APP + add r2, r1, #4 + orr r3, lr, #4 + @APP + ldr r2, [r2] + str r2, [r3] + @NO_APP + mov r2, lr + @APP ldr r1, [r1] - str r5, [r0, #8] - stm r0, {r1, r6} - str r4, [r0, #12] - str r3, [r0, #16] - str r2, [r0, #20] - str lr, [r0, #24] - str r12, [r0, #28] + str r1, [r2] + @NO_APP + ldm lr!, {r1, r2, r3, r12} + stm r0!, {r1, r2, r3, r12} + ldm lr, {r1, r2, r3, r12} + stm r0, {r1, r2, r3, r12} dmb ish - pop {r4, r5, r6, pc} + mov sp, r11 + pop {r11, pc} asm_test::atomic_memcpy_load_align16::read_volatile_acquire_fence: push {r4, r5, r6, r7, r8, r9, r11, lr} ldrd r8, r9, [r1] diff --git a/tests/asm-test/asm/armv7-unknown-linux-gnueabihf/atomic_memcpy_load_align2 b/tests/asm-test/asm/armv7-unknown-linux-gnueabihf/atomic_memcpy_load_align2 index 1e8c2b9..0459c03 100644 --- a/tests/asm-test/asm/armv7-unknown-linux-gnueabihf/atomic_memcpy_load_align2 +++ b/tests/asm-test/asm/armv7-unknown-linux-gnueabihf/atomic_memcpy_load_align2 @@ -1,165 +1,108 @@ asm_test::atomic_memcpy_load_align2::acquire: - push {r4, r5, r6, lr} + push {r11, lr} sub sp, sp, #32 - add r2, r1, #3 - bic r12, r2, #3 - sub r3, r12, r1 - cmp r3, #32 - bhi .LBB4_6 - cmp r3, #0 - beq .LBB4_7 - sub r2, r1, r12 - add lr, r1, #32 - mov r4, sp - mov r5, r1 -.LBB4_3: - ldrb r6, [r5] - adds r2, r2, #1 - strb r6, [r4], #1 - add r5, r5, #1 - blo .LBB4_3 - sub r2, lr, r12 - cmp r2, #4 - bhs .LBB4_8 - cmp r2, #0 - bne .LBB4_11 - b .LBB4_13 -.LBB4_6: - ldrh r2, [r1, #30] - strh r2, [sp, #30] - ldrh r2, [r1, #28] - strh r2, [sp, #28] - ldrh r2, [r1, #26] - strh r2, [sp, #26] - ldrh r2, [r1, #24] - strh r2, [sp, #24] - ldrh r2, [r1, #22] - strh r2, [sp, #22] - ldrh r2, [r1, #20] - strh r2, [sp, #20] - ldrh r2, [r1, #18] - strh r2, [sp, #18] - ldrh r2, [r1, #16] - strh r2, [sp, #16] - ldrh r2, [r1, #14] - strh r2, [sp, #14] - ldrh r2, [r1, #12] - strh r2, [sp, #12] - ldrh r2, [r1, #10] - strh r2, [sp, #10] - ldrh r2, [r1, #8] - strh r2, [sp, #8] - ldrh r2, [r1, #6] - strh r2, [sp, #6] - ldrh r2, [r1, #4] - strh r2, [sp, #4] - ldrh r2, [r1, #2] - strh r2, [sp, #2] - ldrh r1, [r1] - strh r1, [sp] - b .LBB4_13 -.LBB4_7: - mov r2, #32 -.LBB4_8: - sub r3, r1, r12 - mov r5, sp -.LBB4_9: - ldr r6, [r1, -r3] - sub r2, r2, #4 - cmp r2, #3 - str r6, [r5, -r3] - sub r3, r3, #4 - bhi .LBB4_9 - rsb r3, r3, #0 - cmp r2, #0 - beq .LBB4_13 -.LBB4_11: - add r1, r1, r3 - mov r6, sp - add r3, r6, r3 -.LBB4_12: - ldrb r6, [r1] - add r1, r1, #1 - strb r6, [r3], #1 - subs r2, r2, #1 - bne .LBB4_12 -.LBB4_13: + mov lr, r1 + add r12, r1, #30 mov r1, sp + add r3, r1, #30 + @APP + ldrh r2, [r12] + strh r2, [r3] + @NO_APP + add r2, lr, #28 + add r3, r1, #28 + @APP + ldrh r2, [r2] + strh r2, [r3] + @NO_APP + add r2, lr, #26 + add r3, r1, #26 + @APP + ldrh r2, [r2] + strh r2, [r3] + @NO_APP + add r2, lr, #24 + add r3, r1, #24 + @APP + ldrh r2, [r2] + strh r2, [r3] + @NO_APP + add r2, lr, #22 + add r3, r1, #22 + @APP + ldrh r2, [r2] + strh r2, [r3] + @NO_APP + add r2, lr, #20 + add r3, r1, #20 + @APP + ldrh r2, [r2] + strh r2, [r3] + @NO_APP + add r2, lr, #18 + add r3, r1, #18 + @APP + ldrh r2, [r2] + strh r2, [r3] + @NO_APP + add r2, lr, #16 + add r3, r1, #16 + @APP + ldrh r2, [r2] + strh r2, [r3] + @NO_APP + add r2, lr, #14 + add r3, r1, #14 + @APP + ldrh r2, [r2] + strh r2, [r3] + @NO_APP + add r2, lr, #12 + add r3, r1, #12 + @APP + ldrh r2, [r2] + strh r2, [r3] + @NO_APP + add r2, lr, #10 + add r3, r1, #10 + @APP + ldrh r2, [r2] + strh r2, [r3] + @NO_APP + add r2, lr, #8 + add r3, r1, #8 + @APP + ldrh r2, [r2] + strh r2, [r3] + @NO_APP + add r2, lr, #6 + orr r3, r1, #6 + @APP + ldrh r2, [r2] + strh r2, [r3] + @NO_APP + add r2, lr, #4 + orr r3, r1, #4 + @APP + ldrh r2, [r2] + strh r2, [r3] + @NO_APP + add r2, lr, #2 + orr r3, r1, #2 + @APP + ldrh r2, [r2] + strh r2, [r3] + @NO_APP + mov r2, r1 + @APP + ldrh r3, [lr] + strh r3, [r2] + @NO_APP mov r2, #32 bl memcpy dmb ish add sp, sp, #32 - pop {r4, r5, r6, pc} -.LBB6_3: - ldrb r5, [r2], #1 - adds r6, r6, #1 - strb r5, [r4] - add r4, r4, #1 - blo .LBB6_3 - sub r6, r3, r1 - cmp r6, #4 - blo .LBB6_8 -.LBB6_5: - sub r0, r7, r1 - add r1, sp, #32 -.LBB6_6: - ldr r2, [r1, -r0] - sub r6, r6, #4 - cmp r6, #3 - str r2, [r7, -r0] - sub r0, r0, #4 - bhi .LBB6_6 - rsb r0, r0, #0 -.LBB6_8: - cmp r6, #0 - beq .LBB6_11 - add r1, sp, #32 - add r1, r1, r0 - add r0, r7, r0 -.LBB6_10: - ldrb r2, [r1], #1 - subs r6, r6, #1 - strb r2, [r0] - add r0, r0, #1 - bne .LBB6_10 -.LBB6_11: - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} -.LBB6_12: - ldrh r0, [sp, #62] - strh r0, [r7, #30] - ldrh r0, [sp, #60] - strh r0, [r7, #28] - ldrh r0, [sp, #58] - strh r0, [r7, #26] - ldrh r0, [sp, #56] - strh r0, [r7, #24] - ldrh r0, [sp, #54] - strh r0, [r7, #22] - ldrh r0, [sp, #52] - strh r0, [r7, #20] - ldrh r0, [sp, #50] - strh r0, [r7, #18] - ldrh r0, [sp, #48] - strh r0, [r7, #16] - ldrh r0, [sp, #46] - strh r0, [r7, #14] - ldrh r0, [sp, #44] - strh r0, [r7, #12] - ldrh r0, [sp, #42] - strh r0, [r7, #10] - ldrh r0, [sp, #40] - strh r0, [r7, #8] - ldrh r0, [sp, #38] - strh r0, [r7, #6] - ldrh r0, [sp, #36] - strh r0, [r7, #4] - ldrh r0, [sp, #34] - strh r0, [r7, #2] - ldrh r0, [sp, #32] - strh r0, [r7] - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} + pop {r11, pc} asm_test::atomic_memcpy_load_align2::read_volatile_acquire_fence: push {r4, r5, r11, lr} ldrh r2, [r1, #28] @@ -196,73 +139,3 @@ asm_test::atomic_memcpy_load_align2::read_volatile_acquire_fence: strh r3, [r0] dmb ish pop {r4, r5, r11, pc} -.LBB6_3: - ldrb r5, [r2], #1 - adds r6, r6, #1 - strb r5, [r4] - add r4, r4, #1 - blo .LBB6_3 - sub r6, r3, r1 - cmp r6, #4 - blo .LBB6_8 -.LBB6_5: - sub r0, r7, r1 - add r1, sp, #32 -.LBB6_6: - ldr r2, [r1, -r0] - sub r6, r6, #4 - cmp r6, #3 - str r2, [r7, -r0] - sub r0, r0, #4 - bhi .LBB6_6 - rsb r0, r0, #0 -.LBB6_8: - cmp r6, #0 - beq .LBB6_11 - add r1, sp, #32 - add r1, r1, r0 - add r0, r7, r0 -.LBB6_10: - ldrb r2, [r1], #1 - subs r6, r6, #1 - strb r2, [r0] - add r0, r0, #1 - bne .LBB6_10 -.LBB6_11: - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} -.LBB6_12: - ldrh r0, [sp, #62] - strh r0, [r7, #30] - ldrh r0, [sp, #60] - strh r0, [r7, #28] - ldrh r0, [sp, #58] - strh r0, [r7, #26] - ldrh r0, [sp, #56] - strh r0, [r7, #24] - ldrh r0, [sp, #54] - strh r0, [r7, #22] - ldrh r0, [sp, #52] - strh r0, [r7, #20] - ldrh r0, [sp, #50] - strh r0, [r7, #18] - ldrh r0, [sp, #48] - strh r0, [r7, #16] - ldrh r0, [sp, #46] - strh r0, [r7, #14] - ldrh r0, [sp, #44] - strh r0, [r7, #12] - ldrh r0, [sp, #42] - strh r0, [r7, #10] - ldrh r0, [sp, #40] - strh r0, [r7, #8] - ldrh r0, [sp, #38] - strh r0, [r7, #6] - ldrh r0, [sp, #36] - strh r0, [r7, #4] - ldrh r0, [sp, #34] - strh r0, [r7, #2] - ldrh r0, [sp, #32] - strh r0, [r7] - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} diff --git a/tests/asm-test/asm/armv7-unknown-linux-gnueabihf/atomic_memcpy_load_align4 b/tests/asm-test/asm/armv7-unknown-linux-gnueabihf/atomic_memcpy_load_align4 index 96bf361..90d629e 100644 --- a/tests/asm-test/asm/armv7-unknown-linux-gnueabihf/atomic_memcpy_load_align4 +++ b/tests/asm-test/asm/armv7-unknown-linux-gnueabihf/atomic_memcpy_load_align4 @@ -1,22 +1,61 @@ asm_test::atomic_memcpy_load_align4::acquire: - push {r4, r5, r6, lr} - ldr r12, [r1, #28] - ldr lr, [r1, #24] - ldr r2, [r1, #20] - ldr r3, [r1, #16] - ldr r4, [r1, #12] - ldr r5, [r1, #8] - ldr r6, [r1, #4] + push {r11, lr} + sub sp, sp, #32 + mov lr, sp + add r3, lr, #28 + add r12, r1, #28 + @APP + ldr r2, [r12] + str r2, [r3] + @NO_APP + add r2, r1, #24 + add r3, lr, #24 + @APP + ldr r2, [r2] + str r2, [r3] + @NO_APP + add r2, r1, #20 + add r3, lr, #20 + @APP + ldr r2, [r2] + str r2, [r3] + @NO_APP + add r2, r1, #16 + add r3, lr, #16 + @APP + ldr r2, [r2] + str r2, [r3] + @NO_APP + add r2, r1, #12 + add r3, lr, #12 + @APP + ldr r2, [r2] + str r2, [r3] + @NO_APP + add r2, r1, #8 + add r3, lr, #8 + @APP + ldr r2, [r2] + str r2, [r3] + @NO_APP + add r2, r1, #4 + orr r3, lr, #4 + @APP + ldr r2, [r2] + str r2, [r3] + @NO_APP + mov r2, lr + @APP ldr r1, [r1] - str r5, [r0, #8] - stm r0, {r1, r6} - str r4, [r0, #12] - str r3, [r0, #16] - str r2, [r0, #20] - str lr, [r0, #24] - str r12, [r0, #28] + str r1, [r2] + @NO_APP + ldm lr!, {r1, r2, r3, r12} + stm r0!, {r1, r2, r3, r12} + ldm lr, {r1, r2, r3, r12} + stm r0, {r1, r2, r3, r12} dmb ish - pop {r4, r5, r6, pc} + add sp, sp, #32 + pop {r11, pc} asm_test::atomic_memcpy_load_align4::read_volatile_acquire_fence: push {r4, r5, r6, lr} ldr lr, [r1, #20] diff --git a/tests/asm-test/asm/armv7-unknown-linux-gnueabihf/atomic_memcpy_load_align8 b/tests/asm-test/asm/armv7-unknown-linux-gnueabihf/atomic_memcpy_load_align8 index dacaece..d9e9278 100644 --- a/tests/asm-test/asm/armv7-unknown-linux-gnueabihf/atomic_memcpy_load_align8 +++ b/tests/asm-test/asm/armv7-unknown-linux-gnueabihf/atomic_memcpy_load_align8 @@ -1,22 +1,61 @@ asm_test::atomic_memcpy_load_align8::acquire: - push {r4, r5, r6, lr} - ldr r12, [r1, #28] - ldr lr, [r1, #24] - ldr r2, [r1, #20] - ldr r3, [r1, #16] - ldr r4, [r1, #12] - ldr r5, [r1, #8] - ldr r6, [r1, #4] + push {r11, lr} + sub sp, sp, #32 + mov lr, sp + add r3, lr, #28 + add r12, r1, #28 + @APP + ldr r2, [r12] + str r2, [r3] + @NO_APP + add r2, r1, #24 + add r3, lr, #24 + @APP + ldr r2, [r2] + str r2, [r3] + @NO_APP + add r2, r1, #20 + add r3, lr, #20 + @APP + ldr r2, [r2] + str r2, [r3] + @NO_APP + add r2, r1, #16 + add r3, lr, #16 + @APP + ldr r2, [r2] + str r2, [r3] + @NO_APP + add r2, r1, #12 + add r3, lr, #12 + @APP + ldr r2, [r2] + str r2, [r3] + @NO_APP + add r2, r1, #8 + add r3, lr, #8 + @APP + ldr r2, [r2] + str r2, [r3] + @NO_APP + add r2, r1, #4 + orr r3, lr, #4 + @APP + ldr r2, [r2] + str r2, [r3] + @NO_APP + mov r2, lr + @APP ldr r1, [r1] - str r5, [r0, #8] - stm r0, {r1, r6} - str r4, [r0, #12] - str r3, [r0, #16] - str r2, [r0, #20] - str lr, [r0, #24] - str r12, [r0, #28] + str r1, [r2] + @NO_APP + ldm lr!, {r1, r2, r3, r12} + stm r0!, {r1, r2, r3, r12} + ldm lr, {r1, r2, r3, r12} + stm r0, {r1, r2, r3, r12} dmb ish - pop {r4, r5, r6, pc} + add sp, sp, #32 + pop {r11, pc} asm_test::atomic_memcpy_load_align8::read_volatile_acquire_fence: push {r4, r5, r6, r7, r8, r9, r11, lr} ldrd r8, r9, [r1] diff --git a/tests/asm-test/asm/armv7-unknown-linux-gnueabihf/atomic_memcpy_store_align1 b/tests/asm-test/asm/armv7-unknown-linux-gnueabihf/atomic_memcpy_store_align1 index ea2f3c4..9be42ed 100644 --- a/tests/asm-test/asm/armv7-unknown-linux-gnueabihf/atomic_memcpy_store_align1 +++ b/tests/asm-test/asm/armv7-unknown-linux-gnueabihf/atomic_memcpy_store_align1 @@ -1,279 +1,210 @@ asm_test::atomic_memcpy_store_align1::release: - push {r4, r5, r6, r7, r8, lr} + push {r4, r5, r6, lr} sub sp, sp, #64 - mov r8, sp - mov r7, r0 - mov r0, r8 - mov r2, #32 - mov r6, #32 - bl memcpy - dmb ish - add r2, sp, #32 - ldm r8!, {r1, r3, r4, r5} - mov r0, r2 - stm r0!, {r1, r3, r4, r5} - ldm r8, {r1, r3, r4, r5} - stm r0, {r1, r3, r4, r5} - add r0, r7, #3 - bic r1, r0, #3 - sub r0, r1, r7 - cmp r0, #33 - bhs .LBB2_12 - cmp r0, #0 - beq .LBB2_5 - sub r6, r7, r1 - add r3, r7, #32 - mov r4, r7 -.LBB2_3: - ldrb r5, [r2], #1 - adds r6, r6, #1 - strb r5, [r4] - add r4, r4, #1 - blo .LBB2_3 - sub r6, r3, r1 - cmp r6, #4 - blo .LBB2_8 -.LBB2_5: - sub r0, r7, r1 - add r1, sp, #32 -.LBB2_6: - ldr r2, [r1, -r0] - sub r6, r6, #4 - cmp r6, #3 - str r2, [r7, -r0] - sub r0, r0, #4 - bhi .LBB2_6 - rsb r0, r0, #0 -.LBB2_8: - cmp r6, #0 - beq .LBB2_11 - add r1, sp, #32 - add r1, r1, r0 - add r0, r7, r0 -.LBB2_10: - ldrb r2, [r1], #1 - subs r6, r6, #1 - strb r2, [r0] - add r0, r0, #1 - bne .LBB2_10 -.LBB2_11: - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} -.LBB2_12: - ldrb r0, [sp, #32] - strb r0, [r7] - ldrb r0, [sp, #33] - strb r0, [r7, #1] - ldrb r0, [sp, #34] - strb r0, [r7, #2] - ldrb r0, [sp, #35] - strb r0, [r7, #3] - ldrb r0, [sp, #36] - strb r0, [r7, #4] - ldrb r0, [sp, #37] - strb r0, [r7, #5] - ldrb r0, [sp, #38] - strb r0, [r7, #6] - ldrb r0, [sp, #39] - strb r0, [r7, #7] - ldrb r0, [sp, #40] - strb r0, [r7, #8] - ldrb r0, [sp, #41] - strb r0, [r7, #9] - ldrb r0, [sp, #42] - strb r0, [r7, #10] - ldrb r0, [sp, #43] - strb r0, [r7, #11] - ldrb r0, [sp, #44] - strb r0, [r7, #12] - ldrb r0, [sp, #45] - strb r0, [r7, #13] - ldrb r0, [sp, #46] - strb r0, [r7, #14] - ldrb r0, [sp, #47] - strb r0, [r7, #15] - ldrb r0, [sp, #48] - strb r0, [r7, #16] - ldrb r0, [sp, #49] - strb r0, [r7, #17] - ldrb r0, [sp, #50] - strb r0, [r7, #18] - ldrb r0, [sp, #51] - strb r0, [r7, #19] - ldrb r0, [sp, #52] - strb r0, [r7, #20] - ldrb r0, [sp, #53] - strb r0, [r7, #21] - ldrb r0, [sp, #54] - strb r0, [r7, #22] - ldrb r0, [sp, #55] - strb r0, [r7, #23] - ldrb r0, [sp, #56] - strb r0, [r7, #24] - ldrb r0, [sp, #57] - strb r0, [r7, #25] - ldrb r0, [sp, #58] - strb r0, [r7, #26] - ldrb r0, [sp, #59] - strb r0, [r7, #27] - ldrb r0, [sp, #60] - strb r0, [r7, #28] - ldrb r0, [sp, #61] - strb r0, [r7, #29] - ldrb r0, [sp, #62] - strb r0, [r7, #30] - ldrb r0, [sp, #63] - strb r0, [r7, #31] - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} -.LBB4_3: - ldrb r6, [r5] - adds r2, r2, #1 - strb r6, [r4], #1 - add r5, r5, #1 - blo .LBB4_3 - sub r2, lr, r12 - cmp r2, #4 - bhs .LBB4_8 - cmp r2, #0 - bne .LBB4_11 - b .LBB4_13 -.LBB4_6: - ldrh r2, [r1, #30] - strh r2, [sp, #30] - ldrh r2, [r1, #28] - strh r2, [sp, #28] - ldrh r2, [r1, #26] - strh r2, [sp, #26] - ldrh r2, [r1, #24] - strh r2, [sp, #24] - ldrh r2, [r1, #22] - strh r2, [sp, #22] - ldrh r2, [r1, #20] - strh r2, [sp, #20] - ldrh r2, [r1, #18] - strh r2, [sp, #18] - ldrh r2, [r1, #16] - strh r2, [sp, #16] - ldrh r2, [r1, #14] - strh r2, [sp, #14] - ldrh r2, [r1, #12] - strh r2, [sp, #12] - ldrh r2, [r1, #10] - strh r2, [sp, #10] - ldrh r2, [r1, #8] - strh r2, [sp, #8] - ldrh r2, [r1, #6] - strh r2, [sp, #6] - ldrh r2, [r1, #4] - strh r2, [sp, #4] - ldrh r2, [r1, #2] - strh r2, [sp, #2] - ldrh r1, [r1] - strh r1, [sp] - b .LBB4_13 -.LBB4_7: - mov r2, #32 -.LBB4_8: - sub r3, r1, r12 - mov r5, sp -.LBB4_9: - ldr r6, [r1, -r3] - sub r2, r2, #4 - cmp r2, #3 - str r6, [r5, -r3] - sub r3, r3, #4 - bhi .LBB4_9 - rsb r3, r3, #0 - cmp r2, #0 - beq .LBB4_13 -.LBB4_11: - add r1, r1, r3 mov r6, sp - add r3, r6, r3 -.LBB4_12: - ldrb r6, [r1] - add r1, r1, #1 - strb r6, [r3], #1 - subs r2, r2, #1 - bne .LBB4_12 -.LBB4_13: - mov r1, sp + mov r4, r0 + mov r0, r6 mov r2, #32 bl memcpy dmb ish - add sp, sp, #32 - pop {r4, r5, r6, pc} -.LBB6_3: - ldrb r5, [r2], #1 - adds r6, r6, #1 - strb r5, [r4] - add r4, r4, #1 - blo .LBB6_3 - sub r6, r3, r1 - cmp r6, #4 - blo .LBB6_8 -.LBB6_5: - sub r0, r7, r1 - add r1, sp, #32 -.LBB6_6: - ldr r2, [r1, -r0] - sub r6, r6, #4 - cmp r6, #3 - str r2, [r7, -r0] - sub r0, r0, #4 - bhi .LBB6_6 - rsb r0, r0, #0 -.LBB6_8: - cmp r6, #0 - beq .LBB6_11 - add r1, sp, #32 - add r1, r1, r0 - add r0, r7, r0 -.LBB6_10: - ldrb r2, [r1], #1 - subs r6, r6, #1 - strb r2, [r0] - add r0, r0, #1 - bne .LBB6_10 -.LBB6_11: - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} -.LBB6_12: - ldrh r0, [sp, #62] - strh r0, [r7, #30] - ldrh r0, [sp, #60] - strh r0, [r7, #28] - ldrh r0, [sp, #58] - strh r0, [r7, #26] - ldrh r0, [sp, #56] - strh r0, [r7, #24] - ldrh r0, [sp, #54] - strh r0, [r7, #22] - ldrh r0, [sp, #52] - strh r0, [r7, #20] - ldrh r0, [sp, #50] - strh r0, [r7, #18] - ldrh r0, [sp, #48] - strh r0, [r7, #16] - ldrh r0, [sp, #46] - strh r0, [r7, #14] - ldrh r0, [sp, #44] - strh r0, [r7, #12] - ldrh r0, [sp, #42] - strh r0, [r7, #10] - ldrh r0, [sp, #40] - strh r0, [r7, #8] - ldrh r0, [sp, #38] - strh r0, [r7, #6] - ldrh r0, [sp, #36] - strh r0, [r7, #4] - ldrh r0, [sp, #34] - strh r0, [r7, #2] - ldrh r0, [sp, #32] - strh r0, [r7] + add r12, sp, #32 + ldm r6!, {r0, r2, r3, r5} + mov r1, r12 + stm r1!, {r0, r2, r3, r5} + ldm r6, {r0, r2, r3, r5} + stm r1, {r0, r2, r3, r5} + add r0, r4, #31 + add r1, r12, #31 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #30 + add r1, r12, #30 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #29 + add r1, r12, #29 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #28 + add r1, r12, #28 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #27 + add r1, r12, #27 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #26 + add r1, r12, #26 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #25 + add r1, r12, #25 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #24 + add r1, r12, #24 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #23 + add r1, r12, #23 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #22 + add r1, r12, #22 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #21 + add r1, r12, #21 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #20 + add r1, r12, #20 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #19 + add r1, r12, #19 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #18 + add r1, r12, #18 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #17 + add r1, r12, #17 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #16 + add r1, r12, #16 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #15 + add r1, r12, #15 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #14 + add r1, r12, #14 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #13 + add r1, r12, #13 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #12 + add r1, r12, #12 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #11 + add r1, r12, #11 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #10 + add r1, r12, #10 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #9 + add r1, r12, #9 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #8 + add r1, r12, #8 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #7 + orr r1, r12, #7 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #6 + orr r1, r12, #6 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #5 + orr r1, r12, #5 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #4 + orr r1, r12, #4 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #3 + orr r1, r12, #3 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #2 + orr r1, r12, #2 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + add r0, r4, #1 + orr r1, r12, #1 + @APP + ldrb r1, [r1] + strb r1, [r0] + @NO_APP + @APP + ldrb r0, [r12] + strb r0, [r4] + @NO_APP add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} + pop {r4, r5, r6, pc} asm_test::atomic_memcpy_store_align1::write_volatile_release_fence: push {r4, r5, r11, lr} sub sp, sp, #32 @@ -289,151 +220,3 @@ asm_test::atomic_memcpy_store_align1::write_volatile_release_fence: bl memcpy add sp, sp, #32 pop {r4, r5, r11, pc} -.LBB4_3: - ldrb r6, [r5] - adds r2, r2, #1 - strb r6, [r4], #1 - add r5, r5, #1 - blo .LBB4_3 - sub r2, lr, r12 - cmp r2, #4 - bhs .LBB4_8 - cmp r2, #0 - bne .LBB4_11 - b .LBB4_13 -.LBB4_6: - ldrh r2, [r1, #30] - strh r2, [sp, #30] - ldrh r2, [r1, #28] - strh r2, [sp, #28] - ldrh r2, [r1, #26] - strh r2, [sp, #26] - ldrh r2, [r1, #24] - strh r2, [sp, #24] - ldrh r2, [r1, #22] - strh r2, [sp, #22] - ldrh r2, [r1, #20] - strh r2, [sp, #20] - ldrh r2, [r1, #18] - strh r2, [sp, #18] - ldrh r2, [r1, #16] - strh r2, [sp, #16] - ldrh r2, [r1, #14] - strh r2, [sp, #14] - ldrh r2, [r1, #12] - strh r2, [sp, #12] - ldrh r2, [r1, #10] - strh r2, [sp, #10] - ldrh r2, [r1, #8] - strh r2, [sp, #8] - ldrh r2, [r1, #6] - strh r2, [sp, #6] - ldrh r2, [r1, #4] - strh r2, [sp, #4] - ldrh r2, [r1, #2] - strh r2, [sp, #2] - ldrh r1, [r1] - strh r1, [sp] - b .LBB4_13 -.LBB4_7: - mov r2, #32 -.LBB4_8: - sub r3, r1, r12 - mov r5, sp -.LBB4_9: - ldr r6, [r1, -r3] - sub r2, r2, #4 - cmp r2, #3 - str r6, [r5, -r3] - sub r3, r3, #4 - bhi .LBB4_9 - rsb r3, r3, #0 - cmp r2, #0 - beq .LBB4_13 -.LBB4_11: - add r1, r1, r3 - mov r6, sp - add r3, r6, r3 -.LBB4_12: - ldrb r6, [r1] - add r1, r1, #1 - strb r6, [r3], #1 - subs r2, r2, #1 - bne .LBB4_12 -.LBB4_13: - mov r1, sp - mov r2, #32 - bl memcpy - dmb ish - add sp, sp, #32 - pop {r4, r5, r6, pc} -.LBB6_3: - ldrb r5, [r2], #1 - adds r6, r6, #1 - strb r5, [r4] - add r4, r4, #1 - blo .LBB6_3 - sub r6, r3, r1 - cmp r6, #4 - blo .LBB6_8 -.LBB6_5: - sub r0, r7, r1 - add r1, sp, #32 -.LBB6_6: - ldr r2, [r1, -r0] - sub r6, r6, #4 - cmp r6, #3 - str r2, [r7, -r0] - sub r0, r0, #4 - bhi .LBB6_6 - rsb r0, r0, #0 -.LBB6_8: - cmp r6, #0 - beq .LBB6_11 - add r1, sp, #32 - add r1, r1, r0 - add r0, r7, r0 -.LBB6_10: - ldrb r2, [r1], #1 - subs r6, r6, #1 - strb r2, [r0] - add r0, r0, #1 - bne .LBB6_10 -.LBB6_11: - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} -.LBB6_12: - ldrh r0, [sp, #62] - strh r0, [r7, #30] - ldrh r0, [sp, #60] - strh r0, [r7, #28] - ldrh r0, [sp, #58] - strh r0, [r7, #26] - ldrh r0, [sp, #56] - strh r0, [r7, #24] - ldrh r0, [sp, #54] - strh r0, [r7, #22] - ldrh r0, [sp, #52] - strh r0, [r7, #20] - ldrh r0, [sp, #50] - strh r0, [r7, #18] - ldrh r0, [sp, #48] - strh r0, [r7, #16] - ldrh r0, [sp, #46] - strh r0, [r7, #14] - ldrh r0, [sp, #44] - strh r0, [r7, #12] - ldrh r0, [sp, #42] - strh r0, [r7, #10] - ldrh r0, [sp, #40] - strh r0, [r7, #8] - ldrh r0, [sp, #38] - strh r0, [r7, #6] - ldrh r0, [sp, #36] - strh r0, [r7, #4] - ldrh r0, [sp, #34] - strh r0, [r7, #2] - ldrh r0, [sp, #32] - strh r0, [r7] - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} diff --git a/tests/asm-test/asm/armv7-unknown-linux-gnueabihf/atomic_memcpy_store_align16 b/tests/asm-test/asm/armv7-unknown-linux-gnueabihf/atomic_memcpy_store_align16 index c726285..04017ea 100644 --- a/tests/asm-test/asm/armv7-unknown-linux-gnueabihf/atomic_memcpy_store_align16 +++ b/tests/asm-test/asm/armv7-unknown-linux-gnueabihf/atomic_memcpy_store_align16 @@ -1,19 +1,69 @@ asm_test::atomic_memcpy_store_align16::release: - push {r4, r5, r6, lr} - add r6, r1, #8 - ldm r1, {r12, lr} - ldm r6, {r2, r3, r4, r5, r6} - ldr r1, [r1, #28] + push {r4, r5, r11, lr} + add r11, sp, #8 + sub sp, sp, #64 + bfc sp, #0, #4 + ldm r1!, {r2, r4, r5, lr} + mov r12, sp + mov r3, r12 + stm r3!, {r2, r4, r5, lr} + ldm r1, {r2, r4, r5, lr} + stm r3, {r2, r4, r5, lr} + add lr, sp, #32 dmb ish - str r1, [r0, #28] - str r6, [r0, #24] - str r5, [r0, #20] - str r4, [r0, #16] - str r3, [r0, #12] - str r2, [r0, #8] - str lr, [r0, #4] - str r12, [r0] - pop {r4, r5, r6, pc} + ldm r12!, {r1, r3, r4, r5} + mov r2, lr + stm r2!, {r1, r3, r4, r5} + ldm r12, {r1, r3, r4, r5} + stm r2, {r1, r3, r4, r5} + add r1, r0, #28 + add r2, lr, #28 + @APP + ldr r2, [r2] + str r2, [r1] + @NO_APP + add r1, r0, #24 + add r2, lr, #24 + @APP + ldr r2, [r2] + str r2, [r1] + @NO_APP + add r1, r0, #20 + add r2, lr, #20 + @APP + ldr r2, [r2] + str r2, [r1] + @NO_APP + add r1, r0, #16 + add r2, lr, #16 + @APP + ldr r2, [r2] + str r2, [r1] + @NO_APP + add r1, r0, #12 + orr r2, lr, #12 + @APP + ldr r2, [r2] + str r2, [r1] + @NO_APP + add r1, r0, #8 + orr r2, lr, #8 + @APP + ldr r2, [r2] + str r2, [r1] + @NO_APP + add r1, r0, #4 + orr r2, lr, #4 + @APP + ldr r2, [r2] + str r2, [r1] + @NO_APP + @APP + ldr r1, [lr] + str r1, [r0] + @NO_APP + sub sp, r11, #8 + pop {r4, r5, r11, pc} asm_test::atomic_memcpy_store_align16::write_volatile_release_fence: push {r4, r5, r11, lr} add r11, sp, #8 diff --git a/tests/asm-test/asm/armv7-unknown-linux-gnueabihf/atomic_memcpy_store_align2 b/tests/asm-test/asm/armv7-unknown-linux-gnueabihf/atomic_memcpy_store_align2 index 62e3bf5..fd07abe 100644 --- a/tests/asm-test/asm/armv7-unknown-linux-gnueabihf/atomic_memcpy_store_align2 +++ b/tests/asm-test/asm/armv7-unknown-linux-gnueabihf/atomic_memcpy_store_align2 @@ -1,99 +1,114 @@ asm_test::atomic_memcpy_store_align2::release: - push {r4, r5, r6, r7, r8, lr} + push {r4, r5, r6, lr} sub sp, sp, #64 - mov r8, sp - mov r7, r0 - mov r0, r8 + mov r6, sp + mov r4, r0 + mov r0, r6 mov r2, #32 - mov r6, #32 bl memcpy dmb ish - add r2, sp, #32 - ldm r8!, {r1, r3, r4, r5} - mov r0, r2 - stm r0!, {r1, r3, r4, r5} - ldm r8, {r1, r3, r4, r5} - stm r0, {r1, r3, r4, r5} - add r0, r7, #3 - bic r1, r0, #3 - sub r0, r1, r7 - cmp r0, #32 - bhi .LBB6_12 - cmp r0, #0 - beq .LBB6_5 - sub r6, r7, r1 - add r3, r7, #32 - mov r4, r7 -.LBB6_3: - ldrb r5, [r2], #1 - adds r6, r6, #1 - strb r5, [r4] - add r4, r4, #1 - blo .LBB6_3 - sub r6, r3, r1 - cmp r6, #4 - blo .LBB6_8 -.LBB6_5: - sub r0, r7, r1 - add r1, sp, #32 -.LBB6_6: - ldr r2, [r1, -r0] - sub r6, r6, #4 - cmp r6, #3 - str r2, [r7, -r0] - sub r0, r0, #4 - bhi .LBB6_6 - rsb r0, r0, #0 -.LBB6_8: - cmp r6, #0 - beq .LBB6_11 - add r1, sp, #32 - add r1, r1, r0 - add r0, r7, r0 -.LBB6_10: - ldrb r2, [r1], #1 - subs r6, r6, #1 - strb r2, [r0] - add r0, r0, #1 - bne .LBB6_10 -.LBB6_11: - add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} -.LBB6_12: - ldrh r0, [sp, #62] - strh r0, [r7, #30] - ldrh r0, [sp, #60] - strh r0, [r7, #28] - ldrh r0, [sp, #58] - strh r0, [r7, #26] - ldrh r0, [sp, #56] - strh r0, [r7, #24] - ldrh r0, [sp, #54] - strh r0, [r7, #22] - ldrh r0, [sp, #52] - strh r0, [r7, #20] - ldrh r0, [sp, #50] - strh r0, [r7, #18] - ldrh r0, [sp, #48] - strh r0, [r7, #16] - ldrh r0, [sp, #46] - strh r0, [r7, #14] - ldrh r0, [sp, #44] - strh r0, [r7, #12] - ldrh r0, [sp, #42] - strh r0, [r7, #10] - ldrh r0, [sp, #40] - strh r0, [r7, #8] - ldrh r0, [sp, #38] - strh r0, [r7, #6] - ldrh r0, [sp, #36] - strh r0, [r7, #4] - ldrh r0, [sp, #34] - strh r0, [r7, #2] - ldrh r0, [sp, #32] - strh r0, [r7] + add r12, sp, #32 + ldm r6!, {r0, r2, r3, r5} + mov r1, r12 + stm r1!, {r0, r2, r3, r5} + ldm r6, {r0, r2, r3, r5} + stm r1, {r0, r2, r3, r5} + add r0, r4, #30 + add r1, r12, #30 + @APP + ldrh r1, [r1] + strh r1, [r0] + @NO_APP + add r0, r4, #28 + add r1, r12, #28 + @APP + ldrh r1, [r1] + strh r1, [r0] + @NO_APP + add r0, r4, #26 + add r1, r12, #26 + @APP + ldrh r1, [r1] + strh r1, [r0] + @NO_APP + add r0, r4, #24 + add r1, r12, #24 + @APP + ldrh r1, [r1] + strh r1, [r0] + @NO_APP + add r0, r4, #22 + add r1, r12, #22 + @APP + ldrh r1, [r1] + strh r1, [r0] + @NO_APP + add r0, r4, #20 + add r1, r12, #20 + @APP + ldrh r1, [r1] + strh r1, [r0] + @NO_APP + add r0, r4, #18 + add r1, r12, #18 + @APP + ldrh r1, [r1] + strh r1, [r0] + @NO_APP + add r0, r4, #16 + add r1, r12, #16 + @APP + ldrh r1, [r1] + strh r1, [r0] + @NO_APP + add r0, r4, #14 + add r1, r12, #14 + @APP + ldrh r1, [r1] + strh r1, [r0] + @NO_APP + add r0, r4, #12 + add r1, r12, #12 + @APP + ldrh r1, [r1] + strh r1, [r0] + @NO_APP + add r0, r4, #10 + add r1, r12, #10 + @APP + ldrh r1, [r1] + strh r1, [r0] + @NO_APP + add r0, r4, #8 + add r1, r12, #8 + @APP + ldrh r1, [r1] + strh r1, [r0] + @NO_APP + add r0, r4, #6 + orr r1, r12, #6 + @APP + ldrh r1, [r1] + strh r1, [r0] + @NO_APP + add r0, r4, #4 + orr r1, r12, #4 + @APP + ldrh r1, [r1] + strh r1, [r0] + @NO_APP + add r0, r4, #2 + orr r1, r12, #2 + @APP + ldrh r1, [r1] + strh r1, [r0] + @NO_APP + @APP + ldrh r0, [r12] + strh r0, [r4] + @NO_APP add sp, sp, #64 - pop {r4, r5, r6, r7, r8, pc} + pop {r4, r5, r6, pc} asm_test::atomic_memcpy_store_align2::write_volatile_release_fence: push {r4, r5, r11, lr} sub sp, sp, #32 diff --git a/tests/asm-test/asm/armv7-unknown-linux-gnueabihf/atomic_memcpy_store_align4 b/tests/asm-test/asm/armv7-unknown-linux-gnueabihf/atomic_memcpy_store_align4 index 9dcd77e..bdf4317 100644 --- a/tests/asm-test/asm/armv7-unknown-linux-gnueabihf/atomic_memcpy_store_align4 +++ b/tests/asm-test/asm/armv7-unknown-linux-gnueabihf/atomic_memcpy_store_align4 @@ -1,19 +1,67 @@ asm_test::atomic_memcpy_store_align4::release: - push {r4, r5, r6, lr} - add r6, r1, #8 - ldm r1, {r12, lr} - ldm r6, {r2, r3, r4, r5, r6} - ldr r1, [r1, #28] + push {r4, r5, r11, lr} + sub sp, sp, #64 + ldm r1!, {r2, r4, r5, lr} + mov r12, sp + mov r3, r12 + stm r3!, {r2, r4, r5, lr} + ldm r1, {r2, r4, r5, lr} + stm r3, {r2, r4, r5, lr} + add lr, sp, #32 dmb ish - str r1, [r0, #28] - str r6, [r0, #24] - str r5, [r0, #20] - str r4, [r0, #16] - str r3, [r0, #12] - str r2, [r0, #8] - str lr, [r0, #4] - str r12, [r0] - pop {r4, r5, r6, pc} + ldm r12!, {r1, r3, r4, r5} + mov r2, lr + stm r2!, {r1, r3, r4, r5} + ldm r12, {r1, r3, r4, r5} + stm r2, {r1, r3, r4, r5} + add r1, r0, #28 + add r2, lr, #28 + @APP + ldr r2, [r2] + str r2, [r1] + @NO_APP + add r1, r0, #24 + add r2, lr, #24 + @APP + ldr r2, [r2] + str r2, [r1] + @NO_APP + add r1, r0, #20 + add r2, lr, #20 + @APP + ldr r2, [r2] + str r2, [r1] + @NO_APP + add r1, r0, #16 + add r2, lr, #16 + @APP + ldr r2, [r2] + str r2, [r1] + @NO_APP + add r1, r0, #12 + add r2, lr, #12 + @APP + ldr r2, [r2] + str r2, [r1] + @NO_APP + add r1, r0, #8 + add r2, lr, #8 + @APP + ldr r2, [r2] + str r2, [r1] + @NO_APP + add r1, r0, #4 + orr r2, lr, #4 + @APP + ldr r2, [r2] + str r2, [r1] + @NO_APP + @APP + ldr r1, [lr] + str r1, [r0] + @NO_APP + add sp, sp, #64 + pop {r4, r5, r11, pc} asm_test::atomic_memcpy_store_align4::write_volatile_release_fence: push {r4, r5, r11, lr} sub sp, sp, #32 diff --git a/tests/asm-test/asm/armv7-unknown-linux-gnueabihf/atomic_memcpy_store_align8 b/tests/asm-test/asm/armv7-unknown-linux-gnueabihf/atomic_memcpy_store_align8 index 4d4db31..16607ba 100644 --- a/tests/asm-test/asm/armv7-unknown-linux-gnueabihf/atomic_memcpy_store_align8 +++ b/tests/asm-test/asm/armv7-unknown-linux-gnueabihf/atomic_memcpy_store_align8 @@ -1,19 +1,67 @@ asm_test::atomic_memcpy_store_align8::release: - push {r4, r5, r6, lr} - add r6, r1, #8 - ldm r1, {r12, lr} - ldm r6, {r2, r3, r4, r5, r6} - ldr r1, [r1, #28] + push {r4, r5, r11, lr} + sub sp, sp, #64 + ldm r1!, {r2, r4, r5, lr} + mov r12, sp + mov r3, r12 + stm r3!, {r2, r4, r5, lr} + ldm r1, {r2, r4, r5, lr} + stm r3, {r2, r4, r5, lr} + add lr, sp, #32 dmb ish - str r1, [r0, #28] - str r6, [r0, #24] - str r5, [r0, #20] - str r4, [r0, #16] - str r3, [r0, #12] - str r2, [r0, #8] - str lr, [r0, #4] - str r12, [r0] - pop {r4, r5, r6, pc} + ldm r12!, {r1, r3, r4, r5} + mov r2, lr + stm r2!, {r1, r3, r4, r5} + ldm r12, {r1, r3, r4, r5} + stm r2, {r1, r3, r4, r5} + add r1, r0, #28 + add r2, lr, #28 + @APP + ldr r2, [r2] + str r2, [r1] + @NO_APP + add r1, r0, #24 + add r2, lr, #24 + @APP + ldr r2, [r2] + str r2, [r1] + @NO_APP + add r1, r0, #20 + add r2, lr, #20 + @APP + ldr r2, [r2] + str r2, [r1] + @NO_APP + add r1, r0, #16 + add r2, lr, #16 + @APP + ldr r2, [r2] + str r2, [r1] + @NO_APP + add r1, r0, #12 + add r2, lr, #12 + @APP + ldr r2, [r2] + str r2, [r1] + @NO_APP + add r1, r0, #8 + add r2, lr, #8 + @APP + ldr r2, [r2] + str r2, [r1] + @NO_APP + add r1, r0, #4 + orr r2, lr, #4 + @APP + ldr r2, [r2] + str r2, [r1] + @NO_APP + @APP + ldr r1, [lr] + str r1, [r0] + @NO_APP + add sp, sp, #64 + pop {r4, r5, r11, pc} asm_test::atomic_memcpy_store_align8::write_volatile_release_fence: push {r4, r5, r11, lr} sub sp, sp, #32 diff --git a/tests/asm-test/asm/i586-unknown-linux-gnu/atomic_memcpy_load_align1 b/tests/asm-test/asm/i586-unknown-linux-gnu/atomic_memcpy_load_align1 index ea78824..ed6642f 100644 --- a/tests/asm-test/asm/i586-unknown-linux-gnu/atomic_memcpy_load_align1 +++ b/tests/asm-test/asm/i586-unknown-linux-gnu/atomic_memcpy_load_align1 @@ -1,158 +1,218 @@ asm_test::atomic_memcpy_load_align1::acquire: - push ebp - push ebx - push edi push esi - sub esp, 36 - mov eax, dword, ptr, [esp, +, 56] - mov ebp, dword, ptr, [esp, +, 60] - lea edi, [ebp, +, 3] - and edi, -4 - mov esi, edi - sub esi, ebp - cmp esi, 33 - jae .LBB0_1 - test esi, esi - je .LBB0_4 - lea edx, [ebp, +, 32] - xor ebx, ebx -.LBB0_12: - movzx ecx, byte, ptr, [ebp, +, ebx] - mov byte, ptr, [esp, +, ebx], cl - inc ebx - cmp esi, ebx - jne .LBB0_12 - sub edx, edi - cmp edx, 4 - jae .LBB0_5 - jmp .LBB0_7 -.LBB0_1: - mov dl, byte, ptr, [ebp] - mov byte, ptr, [esp], dl - mov dl, byte, ptr, [ebp, +, 1] - mov byte, ptr, [esp, +, 1], dl - mov dl, byte, ptr, [ebp, +, 2] - mov byte, ptr, [esp, +, 2], dl - mov dl, byte, ptr, [ebp, +, 3] - mov byte, ptr, [esp, +, 3], dl - mov dl, byte, ptr, [ebp, +, 4] - mov byte, ptr, [esp, +, 4], dl - mov dl, byte, ptr, [ebp, +, 5] - mov byte, ptr, [esp, +, 5], dl - mov dl, byte, ptr, [ebp, +, 6] - mov byte, ptr, [esp, +, 6], dl - mov dl, byte, ptr, [ebp, +, 7] - mov byte, ptr, [esp, +, 7], dl - mov dl, byte, ptr, [ebp, +, 8] - mov byte, ptr, [esp, +, 8], dl - mov dl, byte, ptr, [ebp, +, 9] - mov byte, ptr, [esp, +, 9], dl - mov dl, byte, ptr, [ebp, +, 10] - mov byte, ptr, [esp, +, 10], dl - mov dl, byte, ptr, [ebp, +, 11] - mov byte, ptr, [esp, +, 11], dl - mov dl, byte, ptr, [ebp, +, 12] - mov byte, ptr, [esp, +, 12], dl - mov dl, byte, ptr, [ebp, +, 13] - mov byte, ptr, [esp, +, 13], dl - mov dl, byte, ptr, [ebp, +, 14] - mov byte, ptr, [esp, +, 14], dl - mov dl, byte, ptr, [ebp, +, 15] - mov byte, ptr, [esp, +, 15], dl - mov dl, byte, ptr, [ebp, +, 16] - mov byte, ptr, [esp, +, 16], dl - mov dl, byte, ptr, [ebp, +, 17] - mov byte, ptr, [esp, +, 17], dl - mov dl, byte, ptr, [ebp, +, 18] - mov byte, ptr, [esp, +, 18], dl - mov dl, byte, ptr, [ebp, +, 19] - mov byte, ptr, [esp, +, 19], dl - mov dl, byte, ptr, [ebp, +, 20] - mov byte, ptr, [esp, +, 20], dl - mov dl, byte, ptr, [ebp, +, 21] - mov byte, ptr, [esp, +, 21], dl - mov dl, byte, ptr, [ebp, +, 22] - mov byte, ptr, [esp, +, 22], dl - mov dl, byte, ptr, [ebp, +, 23] - mov byte, ptr, [esp, +, 23], dl - mov dl, byte, ptr, [ebp, +, 24] - mov byte, ptr, [esp, +, 24], dl - mov dl, byte, ptr, [ebp, +, 25] - mov byte, ptr, [esp, +, 25], dl - mov dl, byte, ptr, [ebp, +, 26] - mov byte, ptr, [esp, +, 26], dl - mov dl, byte, ptr, [ebp, +, 27] - mov byte, ptr, [esp, +, 27], dl - mov dl, byte, ptr, [ebp, +, 28] - mov byte, ptr, [esp, +, 28], dl - mov dl, byte, ptr, [ebp, +, 29] - mov byte, ptr, [esp, +, 29], dl - mov dl, byte, ptr, [ebp, +, 30] - mov byte, ptr, [esp, +, 30], dl - mov cl, byte, ptr, [ebp, +, 31] - mov byte, ptr, [esp, +, 31], cl - mov ecx, dword, ptr, [esp, +, 24] - mov dword, ptr, [eax, +, 24], ecx - mov ecx, dword, ptr, [esp, +, 20] - mov dword, ptr, [eax, +, 20], ecx - mov ecx, dword, ptr, [esp, +, 16] - mov dword, ptr, [eax, +, 16], ecx - mov ecx, dword, ptr, [esp, +, 12] - mov dword, ptr, [eax, +, 12], ecx - mov ecx, dword, ptr, [esp, +, 8] - mov dword, ptr, [eax, +, 8], ecx + sub esp, 32 + mov eax, dword, ptr, [esp, +, 40] + mov ecx, dword, ptr, [esp, +, 44] + lea edx, [ecx, +, 31] + lea esi, [esp, +, 31] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 30] + lea esi, [esp, +, 30] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 29] + lea esi, [esp, +, 29] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 28] + lea esi, [esp, +, 28] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 27] + lea esi, [esp, +, 27] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 26] + lea esi, [esp, +, 26] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 25] + lea esi, [esp, +, 25] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 24] + lea esi, [esp, +, 24] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 23] + lea esi, [esp, +, 23] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 22] + lea esi, [esp, +, 22] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 21] + lea esi, [esp, +, 21] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 20] + lea esi, [esp, +, 20] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 19] + lea esi, [esp, +, 19] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 18] + lea esi, [esp, +, 18] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 17] + lea esi, [esp, +, 17] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 16] + lea esi, [esp, +, 16] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 15] + lea esi, [esp, +, 15] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 14] + lea esi, [esp, +, 14] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 13] + lea esi, [esp, +, 13] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 12] + lea esi, [esp, +, 12] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 11] + lea esi, [esp, +, 11] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 10] + lea esi, [esp, +, 10] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 9] + lea esi, [esp, +, 9] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 8] + lea esi, [esp, +, 8] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 7] + lea esi, [esp, +, 7] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 6] + lea esi, [esp, +, 6] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 5] + lea esi, [esp, +, 5] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 4] + lea esi, [esp, +, 4] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 3] + lea esi, [esp, +, 3] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 2] + lea esi, [esp, +, 2] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 1] + lea esi, [esp, +, 1] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + mov edx, esp + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [edx], cl + #NO_APP mov ecx, dword, ptr, [esp] mov edx, dword, ptr, [esp, +, 4] - mov dword, ptr, [eax, +, 4], edx mov dword, ptr, [eax], ecx - mov ecx, dword, ptr, [esp, +, 28] - mov dword, ptr, [eax, +, 28], ecx - jmp .LBB0_2 -.LBB0_4: - mov edx, 32 -.LBB0_5: - mov ecx, dword, ptr, [ebp, +, esi] - mov dword, ptr, [esp, +, esi], ecx - add edx, -4 - add esi, 4 - cmp edx, 3 - ja .LBB0_5 -.LBB0_7: - test edx, edx - je .LBB0_10 - lea edi, [esp, +, esi] - add ebp, esi - xor esi, esi -.LBB0_9: - movzx ecx, byte, ptr, [ebp, +, esi] - mov byte, ptr, [edi, +, esi], cl - inc esi - cmp edx, esi - jne .LBB0_9 -.LBB0_10: - mov ecx, dword, ptr, [esp, +, 28] - mov dword, ptr, [eax, +, 28], ecx - mov ecx, dword, ptr, [esp, +, 24] - mov dword, ptr, [eax, +, 24], ecx - mov ecx, dword, ptr, [esp, +, 20] - mov dword, ptr, [eax, +, 20], ecx - mov ecx, dword, ptr, [esp, +, 16] - mov dword, ptr, [eax, +, 16], ecx - mov ecx, dword, ptr, [esp, +, 12] - mov dword, ptr, [eax, +, 12], ecx + mov dword, ptr, [eax, +, 4], edx mov ecx, dword, ptr, [esp, +, 8] mov dword, ptr, [eax, +, 8], ecx - mov ecx, dword, ptr, [esp] - mov edx, dword, ptr, [esp, +, 4] - mov dword, ptr, [eax, +, 4], edx - mov dword, ptr, [eax], ecx -.LBB0_2: + mov ecx, dword, ptr, [esp, +, 12] + mov dword, ptr, [eax, +, 12], ecx + mov ecx, dword, ptr, [esp, +, 16] + mov dword, ptr, [eax, +, 16], ecx + mov ecx, dword, ptr, [esp, +, 20] + mov dword, ptr, [eax, +, 20], ecx + mov ecx, dword, ptr, [esp, +, 24] + mov dword, ptr, [eax, +, 24], ecx + mov ecx, dword, ptr, [esp, +, 28] + mov dword, ptr, [eax, +, 28], ecx #MEMBARRIER - add esp, 36 + add esp, 32 pop esi - pop edi - pop ebx - pop ebp ret 4 asm_test::atomic_memcpy_load_align1::read_volatile_acquire_fence: push ebx diff --git a/tests/asm-test/asm/i586-unknown-linux-gnu/atomic_memcpy_load_align16 b/tests/asm-test/asm/i586-unknown-linux-gnu/atomic_memcpy_load_align16 index 8ba3102..8acabed 100644 --- a/tests/asm-test/asm/i586-unknown-linux-gnu/atomic_memcpy_load_align16 +++ b/tests/asm-test/asm/i586-unknown-linux-gnu/atomic_memcpy_load_align16 @@ -1,37 +1,74 @@ asm_test::atomic_memcpy_load_align16::acquire: - push ebp - push ebx - push edi push esi - sub esp, 8 - mov eax, dword, ptr, [esp, +, 32] - mov ecx, dword, ptr, [eax, +, 28] - mov dword, ptr, [esp, +, 4], ecx - mov ecx, dword, ptr, [eax, +, 24] - mov dword, ptr, [esp], ecx - mov esi, dword, ptr, [eax, +, 20] - mov edi, dword, ptr, [eax, +, 16] - mov ebx, dword, ptr, [eax, +, 12] - mov ebp, dword, ptr, [eax, +, 8] - mov edx, dword, ptr, [eax, +, 4] - mov ecx, dword, ptr, [eax] - mov eax, dword, ptr, [esp, +, 28] + sub esp, 40 + mov eax, dword, ptr, [esp, +, 48] + mov ecx, dword, ptr, [esp, +, 52] + lea edx, [ecx, +, 28] + lea esi, [esp, +, 28] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [esi], edx + #NO_APP + lea edx, [ecx, +, 24] + lea esi, [esp, +, 24] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [esi], edx + #NO_APP + lea edx, [ecx, +, 20] + lea esi, [esp, +, 20] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [esi], edx + #NO_APP + lea edx, [ecx, +, 16] + lea esi, [esp, +, 16] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [esi], edx + #NO_APP + lea edx, [ecx, +, 12] + lea esi, [esp, +, 12] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [esi], edx + #NO_APP + lea edx, [ecx, +, 8] + lea esi, [esp, +, 8] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [esi], edx + #NO_APP + lea edx, [ecx, +, 4] + lea esi, [esp, +, 4] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [esi], edx + #NO_APP + mov edx, esp + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [edx], ecx + #NO_APP + mov ecx, dword, ptr, [esp] + mov edx, dword, ptr, [esp, +, 4] mov dword, ptr, [eax], ecx mov dword, ptr, [eax, +, 4], edx - mov dword, ptr, [eax, +, 8], ebp - mov dword, ptr, [eax, +, 12], ebx - mov dword, ptr, [eax, +, 16], edi - mov dword, ptr, [eax, +, 20], esi - mov ecx, dword, ptr, [esp] + mov ecx, dword, ptr, [esp, +, 8] + mov dword, ptr, [eax, +, 8], ecx + mov ecx, dword, ptr, [esp, +, 12] + mov dword, ptr, [eax, +, 12], ecx + mov ecx, dword, ptr, [esp, +, 16] + mov dword, ptr, [eax, +, 16], ecx + mov ecx, dword, ptr, [esp, +, 20] + mov dword, ptr, [eax, +, 20], ecx + mov ecx, dword, ptr, [esp, +, 24] mov dword, ptr, [eax, +, 24], ecx - mov ecx, dword, ptr, [esp, +, 4] + mov ecx, dword, ptr, [esp, +, 28] mov dword, ptr, [eax, +, 28], ecx #MEMBARRIER - add esp, 8 + add esp, 40 pop esi - pop edi - pop ebx - pop ebp ret 4 asm_test::atomic_memcpy_load_align16::read_volatile_acquire_fence: push ebp diff --git a/tests/asm-test/asm/i586-unknown-linux-gnu/atomic_memcpy_load_align2 b/tests/asm-test/asm/i586-unknown-linux-gnu/atomic_memcpy_load_align2 index 48dea42..f1643ab 100644 --- a/tests/asm-test/asm/i586-unknown-linux-gnu/atomic_memcpy_load_align2 +++ b/tests/asm-test/asm/i586-unknown-linux-gnu/atomic_memcpy_load_align2 @@ -1,109 +1,122 @@ asm_test::atomic_memcpy_load_align2::acquire: - push ebp - push ebx - push edi push esi - sub esp, 36 - mov eax, dword, ptr, [esp, +, 56] - mov ebp, dword, ptr, [esp, +, 60] - lea edi, [ebp, +, 3] - and edi, -4 - mov esi, edi - sub esi, ebp - cmp esi, 32 - ja .LBB4_4 - test esi, esi - je .LBB4_2 - lea edx, [ebp, +, 32] - xor ebx, ebx -.LBB4_11: - movzx ecx, byte, ptr, [ebp, +, ebx] - mov byte, ptr, [esp, +, ebx], cl - inc ebx - cmp esi, ebx - jne .LBB4_11 - sub edx, edi - cmp edx, 4 - jae .LBB4_3 - jmp .LBB4_7 -.LBB4_4: - movzx edx, word, ptr, [ebp, +, 30] - mov word, ptr, [esp, +, 30], dx - movzx edx, word, ptr, [ebp, +, 28] - mov word, ptr, [esp, +, 28], dx - movzx edx, word, ptr, [ebp, +, 26] - mov word, ptr, [esp, +, 26], dx - movzx edx, word, ptr, [ebp, +, 24] - mov word, ptr, [esp, +, 24], dx - movzx edx, word, ptr, [ebp, +, 22] - mov word, ptr, [esp, +, 22], dx - movzx edx, word, ptr, [ebp, +, 20] - mov word, ptr, [esp, +, 20], dx - movzx edx, word, ptr, [ebp, +, 18] - mov word, ptr, [esp, +, 18], dx - movzx edx, word, ptr, [ebp, +, 16] - mov word, ptr, [esp, +, 16], dx - movzx edx, word, ptr, [ebp, +, 14] - mov word, ptr, [esp, +, 14], dx - movzx edx, word, ptr, [ebp, +, 12] - mov word, ptr, [esp, +, 12], dx - movzx edx, word, ptr, [ebp, +, 10] - mov word, ptr, [esp, +, 10], dx - movzx edx, word, ptr, [ebp, +, 8] - mov word, ptr, [esp, +, 8], dx - movzx edx, word, ptr, [ebp, +, 6] - mov word, ptr, [esp, +, 6], dx - movzx edx, word, ptr, [ebp, +, 4] - mov word, ptr, [esp, +, 4], dx - movzx edx, word, ptr, [ebp, +, 2] - mov word, ptr, [esp, +, 2], dx - movzx ecx, word, ptr, [ebp] - mov word, ptr, [esp], cx - jmp .LBB4_5 -.LBB4_2: - mov edx, 32 -.LBB4_3: - mov ecx, dword, ptr, [ebp, +, esi] - mov dword, ptr, [esp, +, esi], ecx - add edx, -4 - add esi, 4 - cmp edx, 3 - ja .LBB4_3 -.LBB4_7: - test edx, edx - je .LBB4_5 - lea edi, [esp, +, esi] - add ebp, esi - xor esi, esi -.LBB4_9: - movzx ecx, byte, ptr, [ebp, +, esi] - mov byte, ptr, [edi, +, esi], cl - inc esi - cmp edx, esi - jne .LBB4_9 -.LBB4_5: - mov ecx, dword, ptr, [esp, +, 28] - mov dword, ptr, [eax, +, 28], ecx - mov ecx, dword, ptr, [esp, +, 24] - mov dword, ptr, [eax, +, 24], ecx - mov ecx, dword, ptr, [esp, +, 20] - mov dword, ptr, [eax, +, 20], ecx - mov ecx, dword, ptr, [esp, +, 16] - mov dword, ptr, [eax, +, 16], ecx - mov ecx, dword, ptr, [esp, +, 12] - mov dword, ptr, [eax, +, 12], ecx - mov ecx, dword, ptr, [esp, +, 8] - mov dword, ptr, [eax, +, 8], ecx + sub esp, 32 + mov eax, dword, ptr, [esp, +, 40] + mov ecx, dword, ptr, [esp, +, 44] + lea edx, [ecx, +, 30] + lea esi, [esp, +, 30] + #APP + mov dx, word, ptr, [edx] + mov word, ptr, [esi], dx + #NO_APP + lea edx, [ecx, +, 28] + lea esi, [esp, +, 28] + #APP + mov dx, word, ptr, [edx] + mov word, ptr, [esi], dx + #NO_APP + lea edx, [ecx, +, 26] + lea esi, [esp, +, 26] + #APP + mov dx, word, ptr, [edx] + mov word, ptr, [esi], dx + #NO_APP + lea edx, [ecx, +, 24] + lea esi, [esp, +, 24] + #APP + mov dx, word, ptr, [edx] + mov word, ptr, [esi], dx + #NO_APP + lea edx, [ecx, +, 22] + lea esi, [esp, +, 22] + #APP + mov dx, word, ptr, [edx] + mov word, ptr, [esi], dx + #NO_APP + lea edx, [ecx, +, 20] + lea esi, [esp, +, 20] + #APP + mov dx, word, ptr, [edx] + mov word, ptr, [esi], dx + #NO_APP + lea edx, [ecx, +, 18] + lea esi, [esp, +, 18] + #APP + mov dx, word, ptr, [edx] + mov word, ptr, [esi], dx + #NO_APP + lea edx, [ecx, +, 16] + lea esi, [esp, +, 16] + #APP + mov dx, word, ptr, [edx] + mov word, ptr, [esi], dx + #NO_APP + lea edx, [ecx, +, 14] + lea esi, [esp, +, 14] + #APP + mov dx, word, ptr, [edx] + mov word, ptr, [esi], dx + #NO_APP + lea edx, [ecx, +, 12] + lea esi, [esp, +, 12] + #APP + mov dx, word, ptr, [edx] + mov word, ptr, [esi], dx + #NO_APP + lea edx, [ecx, +, 10] + lea esi, [esp, +, 10] + #APP + mov dx, word, ptr, [edx] + mov word, ptr, [esi], dx + #NO_APP + lea edx, [ecx, +, 8] + lea esi, [esp, +, 8] + #APP + mov dx, word, ptr, [edx] + mov word, ptr, [esi], dx + #NO_APP + lea edx, [ecx, +, 6] + lea esi, [esp, +, 6] + #APP + mov dx, word, ptr, [edx] + mov word, ptr, [esi], dx + #NO_APP + lea edx, [ecx, +, 4] + lea esi, [esp, +, 4] + #APP + mov dx, word, ptr, [edx] + mov word, ptr, [esi], dx + #NO_APP + lea edx, [ecx, +, 2] + lea esi, [esp, +, 2] + #APP + mov dx, word, ptr, [edx] + mov word, ptr, [esi], dx + #NO_APP + mov edx, esp + #APP + mov cx, word, ptr, [ecx] + mov word, ptr, [edx], cx + #NO_APP mov ecx, dword, ptr, [esp] mov edx, dword, ptr, [esp, +, 4] - mov dword, ptr, [eax, +, 4], edx mov dword, ptr, [eax], ecx + mov dword, ptr, [eax, +, 4], edx + mov ecx, dword, ptr, [esp, +, 8] + mov dword, ptr, [eax, +, 8], ecx + mov ecx, dword, ptr, [esp, +, 12] + mov dword, ptr, [eax, +, 12], ecx + mov ecx, dword, ptr, [esp, +, 16] + mov dword, ptr, [eax, +, 16], ecx + mov ecx, dword, ptr, [esp, +, 20] + mov dword, ptr, [eax, +, 20], ecx + mov ecx, dword, ptr, [esp, +, 24] + mov dword, ptr, [eax, +, 24], ecx + mov ecx, dword, ptr, [esp, +, 28] + mov dword, ptr, [eax, +, 28], ecx #MEMBARRIER - add esp, 36 + add esp, 32 pop esi - pop edi - pop ebx - pop ebp ret 4 asm_test::atomic_memcpy_load_align2::read_volatile_acquire_fence: push ebp diff --git a/tests/asm-test/asm/i586-unknown-linux-gnu/atomic_memcpy_load_align4 b/tests/asm-test/asm/i586-unknown-linux-gnu/atomic_memcpy_load_align4 index 6d76271..5daba2e 100644 --- a/tests/asm-test/asm/i586-unknown-linux-gnu/atomic_memcpy_load_align4 +++ b/tests/asm-test/asm/i586-unknown-linux-gnu/atomic_memcpy_load_align4 @@ -1,37 +1,74 @@ asm_test::atomic_memcpy_load_align4::acquire: - push ebp - push ebx - push edi push esi - sub esp, 8 - mov eax, dword, ptr, [esp, +, 32] - mov ecx, dword, ptr, [eax, +, 28] - mov dword, ptr, [esp, +, 4], ecx - mov ecx, dword, ptr, [eax, +, 24] - mov dword, ptr, [esp], ecx - mov esi, dword, ptr, [eax, +, 20] - mov edi, dword, ptr, [eax, +, 16] - mov ebx, dword, ptr, [eax, +, 12] - mov ebp, dword, ptr, [eax, +, 8] - mov edx, dword, ptr, [eax, +, 4] - mov ecx, dword, ptr, [eax] - mov eax, dword, ptr, [esp, +, 28] + sub esp, 32 + mov eax, dword, ptr, [esp, +, 40] + mov ecx, dword, ptr, [esp, +, 44] + lea edx, [ecx, +, 28] + lea esi, [esp, +, 28] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [esi], edx + #NO_APP + lea edx, [ecx, +, 24] + lea esi, [esp, +, 24] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [esi], edx + #NO_APP + lea edx, [ecx, +, 20] + lea esi, [esp, +, 20] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [esi], edx + #NO_APP + lea edx, [ecx, +, 16] + lea esi, [esp, +, 16] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [esi], edx + #NO_APP + lea edx, [ecx, +, 12] + lea esi, [esp, +, 12] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [esi], edx + #NO_APP + lea edx, [ecx, +, 8] + lea esi, [esp, +, 8] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [esi], edx + #NO_APP + lea edx, [ecx, +, 4] + lea esi, [esp, +, 4] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [esi], edx + #NO_APP + mov edx, esp + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [edx], ecx + #NO_APP + mov ecx, dword, ptr, [esp] + mov edx, dword, ptr, [esp, +, 4] mov dword, ptr, [eax], ecx mov dword, ptr, [eax, +, 4], edx - mov dword, ptr, [eax, +, 8], ebp - mov dword, ptr, [eax, +, 12], ebx - mov dword, ptr, [eax, +, 16], edi - mov dword, ptr, [eax, +, 20], esi - mov ecx, dword, ptr, [esp] + mov ecx, dword, ptr, [esp, +, 8] + mov dword, ptr, [eax, +, 8], ecx + mov ecx, dword, ptr, [esp, +, 12] + mov dword, ptr, [eax, +, 12], ecx + mov ecx, dword, ptr, [esp, +, 16] + mov dword, ptr, [eax, +, 16], ecx + mov ecx, dword, ptr, [esp, +, 20] + mov dword, ptr, [eax, +, 20], ecx + mov ecx, dword, ptr, [esp, +, 24] mov dword, ptr, [eax, +, 24], ecx - mov ecx, dword, ptr, [esp, +, 4] + mov ecx, dword, ptr, [esp, +, 28] mov dword, ptr, [eax, +, 28], ecx #MEMBARRIER - add esp, 8 + add esp, 32 pop esi - pop edi - pop ebx - pop ebp ret 4 asm_test::atomic_memcpy_load_align4::read_volatile_acquire_fence: push ebp diff --git a/tests/asm-test/asm/i586-unknown-linux-gnu/atomic_memcpy_load_align8 b/tests/asm-test/asm/i586-unknown-linux-gnu/atomic_memcpy_load_align8 index 1c48fbb..1b69c8d 100644 --- a/tests/asm-test/asm/i586-unknown-linux-gnu/atomic_memcpy_load_align8 +++ b/tests/asm-test/asm/i586-unknown-linux-gnu/atomic_memcpy_load_align8 @@ -1,37 +1,74 @@ asm_test::atomic_memcpy_load_align8::acquire: - push ebp - push ebx - push edi push esi - sub esp, 8 - mov eax, dword, ptr, [esp, +, 32] - mov ecx, dword, ptr, [eax, +, 28] - mov dword, ptr, [esp, +, 4], ecx - mov ecx, dword, ptr, [eax, +, 24] - mov dword, ptr, [esp], ecx - mov esi, dword, ptr, [eax, +, 20] - mov edi, dword, ptr, [eax, +, 16] - mov ebx, dword, ptr, [eax, +, 12] - mov ebp, dword, ptr, [eax, +, 8] - mov edx, dword, ptr, [eax, +, 4] - mov ecx, dword, ptr, [eax] - mov eax, dword, ptr, [esp, +, 28] + sub esp, 32 + mov eax, dword, ptr, [esp, +, 40] + mov ecx, dword, ptr, [esp, +, 44] + lea edx, [ecx, +, 28] + lea esi, [esp, +, 28] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [esi], edx + #NO_APP + lea edx, [ecx, +, 24] + lea esi, [esp, +, 24] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [esi], edx + #NO_APP + lea edx, [ecx, +, 20] + lea esi, [esp, +, 20] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [esi], edx + #NO_APP + lea edx, [ecx, +, 16] + lea esi, [esp, +, 16] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [esi], edx + #NO_APP + lea edx, [ecx, +, 12] + lea esi, [esp, +, 12] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [esi], edx + #NO_APP + lea edx, [ecx, +, 8] + lea esi, [esp, +, 8] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [esi], edx + #NO_APP + lea edx, [ecx, +, 4] + lea esi, [esp, +, 4] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [esi], edx + #NO_APP + mov edx, esp + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [edx], ecx + #NO_APP + mov ecx, dword, ptr, [esp] + mov edx, dword, ptr, [esp, +, 4] mov dword, ptr, [eax], ecx mov dword, ptr, [eax, +, 4], edx - mov dword, ptr, [eax, +, 8], ebp - mov dword, ptr, [eax, +, 12], ebx - mov dword, ptr, [eax, +, 16], edi - mov dword, ptr, [eax, +, 20], esi - mov ecx, dword, ptr, [esp] + mov ecx, dword, ptr, [esp, +, 8] + mov dword, ptr, [eax, +, 8], ecx + mov ecx, dword, ptr, [esp, +, 12] + mov dword, ptr, [eax, +, 12], ecx + mov ecx, dword, ptr, [esp, +, 16] + mov dword, ptr, [eax, +, 16], ecx + mov ecx, dword, ptr, [esp, +, 20] + mov dword, ptr, [eax, +, 20], ecx + mov ecx, dword, ptr, [esp, +, 24] mov dword, ptr, [eax, +, 24], ecx - mov ecx, dword, ptr, [esp, +, 4] + mov ecx, dword, ptr, [esp, +, 28] mov dword, ptr, [eax, +, 28], ecx #MEMBARRIER - add esp, 8 + add esp, 32 pop esi - pop edi - pop ebx - pop ebp ret 4 asm_test::atomic_memcpy_load_align8::read_volatile_acquire_fence: push ebp diff --git a/tests/asm-test/asm/i586-unknown-linux-gnu/atomic_memcpy_store_align1 b/tests/asm-test/asm/i586-unknown-linux-gnu/atomic_memcpy_store_align1 index 03662aa..af57eea 100644 --- a/tests/asm-test/asm/i586-unknown-linux-gnu/atomic_memcpy_store_align1 +++ b/tests/asm-test/asm/i586-unknown-linux-gnu/atomic_memcpy_store_align1 @@ -1,155 +1,232 @@ asm_test::atomic_memcpy_store_align1::release: - push ebx - push edi - push esi - sub esp, 64 - mov eax, dword, ptr, [esp, +, 80] - mov ecx, dword, ptr, [esp, +, 84] + sub esp, 68 + mov eax, dword, ptr, [esp, +, 72] + mov ecx, dword, ptr, [esp, +, 76] mov edx, dword, ptr, [ecx, +, 28] - mov dword, ptr, [esp, +, 60], edx + mov dword, ptr, [esp, +, 64], edx mov edx, dword, ptr, [ecx, +, 24] - mov dword, ptr, [esp, +, 56], edx + mov dword, ptr, [esp, +, 60], edx mov edx, dword, ptr, [ecx, +, 20] - mov dword, ptr, [esp, +, 52], edx + mov dword, ptr, [esp, +, 56], edx mov edx, dword, ptr, [ecx, +, 16] - mov dword, ptr, [esp, +, 48], edx + mov dword, ptr, [esp, +, 52], edx mov edx, dword, ptr, [ecx, +, 12] - mov dword, ptr, [esp, +, 44], edx + mov dword, ptr, [esp, +, 48], edx mov edx, dword, ptr, [ecx, +, 8] - mov dword, ptr, [esp, +, 40], edx + mov dword, ptr, [esp, +, 44], edx mov edx, dword, ptr, [ecx] mov ecx, dword, ptr, [ecx, +, 4] - mov dword, ptr, [esp, +, 36], ecx - mov dword, ptr, [esp, +, 32], edx + mov dword, ptr, [esp, +, 40], ecx + mov dword, ptr, [esp, +, 36], edx #MEMBARRIER - mov ecx, dword, ptr, [esp, +, 60] + mov ecx, dword, ptr, [esp, +, 64] mov dword, ptr, [esp, +, 28], ecx - mov ecx, dword, ptr, [esp, +, 56] + mov ecx, dword, ptr, [esp, +, 60] mov dword, ptr, [esp, +, 24], ecx - mov ecx, dword, ptr, [esp, +, 52] + mov ecx, dword, ptr, [esp, +, 56] mov dword, ptr, [esp, +, 20], ecx - mov ecx, dword, ptr, [esp, +, 48] + mov ecx, dword, ptr, [esp, +, 52] mov dword, ptr, [esp, +, 16], ecx - mov ecx, dword, ptr, [esp, +, 44] + mov ecx, dword, ptr, [esp, +, 48] mov dword, ptr, [esp, +, 12], ecx - mov ecx, dword, ptr, [esp, +, 40] + mov ecx, dword, ptr, [esp, +, 44] mov dword, ptr, [esp, +, 8], ecx - mov ecx, dword, ptr, [esp, +, 32] - mov edx, dword, ptr, [esp, +, 36] + mov ecx, dword, ptr, [esp, +, 36] + mov edx, dword, ptr, [esp, +, 40] mov dword, ptr, [esp, +, 4], edx mov dword, ptr, [esp], ecx - lea esi, [eax, +, 3] - and esi, -4 - mov edx, esi - sub edx, eax - cmp edx, 33 - jae .LBB2_1 - test edx, edx - je .LBB2_4 - lea ecx, [eax, +, 32] - xor edi, edi -.LBB2_11: - movzx ebx, byte, ptr, [esp, +, edi] - mov byte, ptr, [eax, +, edi], bl - inc edi - cmp edx, edi - jne .LBB2_11 - sub ecx, esi - cmp ecx, 4 - jae .LBB2_5 - jmp .LBB2_7 -.LBB2_1: - mov cl, byte, ptr, [esp] + lea ecx, [eax, +, 31] + lea edx, [esp, +, 31] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [ecx], dl + #NO_APP + lea ecx, [eax, +, 30] + lea edx, [esp, +, 30] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [ecx], dl + #NO_APP + lea ecx, [eax, +, 29] + lea edx, [esp, +, 29] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [ecx], dl + #NO_APP + lea ecx, [esp, +, 28] + lea edx, [eax, +, 28] + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [edx], cl + #NO_APP + lea ecx, [eax, +, 27] + lea edx, [esp, +, 27] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [ecx], dl + #NO_APP + lea ecx, [eax, +, 26] + lea edx, [esp, +, 26] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [ecx], dl + #NO_APP + lea ecx, [eax, +, 25] + lea edx, [esp, +, 25] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [ecx], dl + #NO_APP + lea ecx, [esp, +, 24] + lea edx, [eax, +, 24] + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [edx], cl + #NO_APP + lea ecx, [eax, +, 23] + lea edx, [esp, +, 23] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [ecx], dl + #NO_APP + lea ecx, [eax, +, 22] + lea edx, [esp, +, 22] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [ecx], dl + #NO_APP + lea ecx, [eax, +, 21] + lea edx, [esp, +, 21] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [ecx], dl + #NO_APP + lea ecx, [esp, +, 20] + lea edx, [eax, +, 20] + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [edx], cl + #NO_APP + lea ecx, [eax, +, 19] + lea edx, [esp, +, 19] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [ecx], dl + #NO_APP + lea ecx, [eax, +, 18] + lea edx, [esp, +, 18] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [ecx], dl + #NO_APP + lea ecx, [eax, +, 17] + lea edx, [esp, +, 17] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [ecx], dl + #NO_APP + lea ecx, [esp, +, 16] + lea edx, [eax, +, 16] + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [edx], cl + #NO_APP + lea ecx, [eax, +, 15] + lea edx, [esp, +, 15] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [ecx], dl + #NO_APP + lea ecx, [eax, +, 14] + lea edx, [esp, +, 14] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [ecx], dl + #NO_APP + lea ecx, [eax, +, 13] + lea edx, [esp, +, 13] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [ecx], dl + #NO_APP + lea ecx, [esp, +, 12] + lea edx, [eax, +, 12] + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [edx], cl + #NO_APP + lea ecx, [eax, +, 11] + lea edx, [esp, +, 11] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [ecx], dl + #NO_APP + lea ecx, [eax, +, 10] + lea edx, [esp, +, 10] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [ecx], dl + #NO_APP + lea ecx, [eax, +, 9] + lea edx, [esp, +, 9] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [ecx], dl + #NO_APP + lea ecx, [esp, +, 8] + lea edx, [eax, +, 8] + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [edx], cl + #NO_APP + lea ecx, [eax, +, 7] + lea edx, [esp, +, 7] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [ecx], dl + #NO_APP + lea ecx, [eax, +, 6] + lea edx, [esp, +, 6] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [ecx], dl + #NO_APP + lea ecx, [eax, +, 5] + lea edx, [esp, +, 5] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [ecx], dl + #NO_APP + lea ecx, [esp, +, 4] + lea edx, [eax, +, 4] + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [edx], cl + #NO_APP + lea ecx, [eax, +, 3] + lea edx, [esp, +, 3] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [ecx], dl + #NO_APP + lea ecx, [eax, +, 2] + lea edx, [esp, +, 2] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [ecx], dl + #NO_APP + lea ecx, [eax, +, 1] + lea edx, [esp, +, 1] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [ecx], dl + #NO_APP + mov ecx, esp + #APP + mov cl, byte, ptr, [ecx] mov byte, ptr, [eax], cl - mov cl, byte, ptr, [esp, +, 1] - mov byte, ptr, [eax, +, 1], cl - mov cl, byte, ptr, [esp, +, 2] - mov byte, ptr, [eax, +, 2], cl - mov cl, byte, ptr, [esp, +, 3] - mov byte, ptr, [eax, +, 3], cl - mov cl, byte, ptr, [esp, +, 4] - mov byte, ptr, [eax, +, 4], cl - mov cl, byte, ptr, [esp, +, 5] - mov byte, ptr, [eax, +, 5], cl - mov cl, byte, ptr, [esp, +, 6] - mov byte, ptr, [eax, +, 6], cl - mov cl, byte, ptr, [esp, +, 7] - mov byte, ptr, [eax, +, 7], cl - mov cl, byte, ptr, [esp, +, 8] - mov byte, ptr, [eax, +, 8], cl - mov cl, byte, ptr, [esp, +, 9] - mov byte, ptr, [eax, +, 9], cl - mov cl, byte, ptr, [esp, +, 10] - mov byte, ptr, [eax, +, 10], cl - mov cl, byte, ptr, [esp, +, 11] - mov byte, ptr, [eax, +, 11], cl - mov cl, byte, ptr, [esp, +, 12] - mov byte, ptr, [eax, +, 12], cl - mov cl, byte, ptr, [esp, +, 13] - mov byte, ptr, [eax, +, 13], cl - mov cl, byte, ptr, [esp, +, 14] - mov byte, ptr, [eax, +, 14], cl - mov cl, byte, ptr, [esp, +, 15] - mov byte, ptr, [eax, +, 15], cl - mov cl, byte, ptr, [esp, +, 16] - mov byte, ptr, [eax, +, 16], cl - mov cl, byte, ptr, [esp, +, 17] - mov byte, ptr, [eax, +, 17], cl - mov cl, byte, ptr, [esp, +, 18] - mov byte, ptr, [eax, +, 18], cl - mov cl, byte, ptr, [esp, +, 19] - mov byte, ptr, [eax, +, 19], cl - mov cl, byte, ptr, [esp, +, 20] - mov byte, ptr, [eax, +, 20], cl - mov cl, byte, ptr, [esp, +, 21] - mov byte, ptr, [eax, +, 21], cl - mov cl, byte, ptr, [esp, +, 22] - mov byte, ptr, [eax, +, 22], cl - mov cl, byte, ptr, [esp, +, 23] - mov byte, ptr, [eax, +, 23], cl - mov cl, byte, ptr, [esp, +, 24] - mov byte, ptr, [eax, +, 24], cl - mov cl, byte, ptr, [esp, +, 25] - mov byte, ptr, [eax, +, 25], cl - mov cl, byte, ptr, [esp, +, 26] - mov byte, ptr, [eax, +, 26], cl - mov cl, byte, ptr, [esp, +, 27] - mov byte, ptr, [eax, +, 27], cl - mov cl, byte, ptr, [esp, +, 28] - mov byte, ptr, [eax, +, 28], cl - mov cl, byte, ptr, [esp, +, 29] - mov byte, ptr, [eax, +, 29], cl - mov cl, byte, ptr, [esp, +, 30] - mov byte, ptr, [eax, +, 30], cl - mov cl, byte, ptr, [esp, +, 31] - mov byte, ptr, [eax, +, 31], cl - jmp .LBB2_2 -.LBB2_4: - mov ecx, 32 -.LBB2_5: - mov esi, dword, ptr, [esp, +, edx] - mov dword, ptr, [eax, +, edx], esi - add ecx, -4 - add edx, 4 - cmp ecx, 3 - ja .LBB2_5 -.LBB2_7: - test ecx, ecx - je .LBB2_2 - add eax, edx - add edx, esp - xor esi, esi -.LBB2_9: - movzx ebx, byte, ptr, [edx, +, esi] - mov byte, ptr, [eax, +, esi], bl - inc esi - cmp ecx, esi - jne .LBB2_9 -.LBB2_2: - add esp, 64 - pop esi - pop edi - pop ebx + #NO_APP + add esp, 68 ret asm_test::atomic_memcpy_store_align1::write_volatile_release_fence: sub esp, 32 diff --git a/tests/asm-test/asm/i586-unknown-linux-gnu/atomic_memcpy_store_align16 b/tests/asm-test/asm/i586-unknown-linux-gnu/atomic_memcpy_store_align16 index 4e82d88..b20d5e7 100644 --- a/tests/asm-test/asm/i586-unknown-linux-gnu/atomic_memcpy_store_align16 +++ b/tests/asm-test/asm/i586-unknown-linux-gnu/atomic_memcpy_store_align16 @@ -1,37 +1,88 @@ asm_test::atomic_memcpy_store_align16::release: - push ebp - push ebx - push edi - push esi - sub esp, 8 - mov ecx, dword, ptr, [esp, +, 32] - mov eax, dword, ptr, [ecx] - mov dword, ptr, [esp, +, 4], eax - mov eax, dword, ptr, [ecx, +, 4] - mov dword, ptr, [esp], eax - mov esi, dword, ptr, [ecx, +, 8] - mov edi, dword, ptr, [ecx, +, 12] - mov ebx, dword, ptr, [ecx, +, 16] - mov ebp, dword, ptr, [ecx, +, 20] + sub esp, 76 + mov eax, dword, ptr, [esp, +, 80] + mov ecx, dword, ptr, [esp, +, 84] + mov edx, dword, ptr, [ecx, +, 28] + mov dword, ptr, [esp, +, 28], edx mov edx, dword, ptr, [ecx, +, 24] - mov ecx, dword, ptr, [ecx, +, 28] + mov dword, ptr, [esp, +, 24], edx + mov edx, dword, ptr, [ecx, +, 20] + mov dword, ptr, [esp, +, 20], edx + mov edx, dword, ptr, [ecx, +, 16] + mov dword, ptr, [esp, +, 16], edx + mov edx, dword, ptr, [ecx, +, 12] + mov dword, ptr, [esp, +, 12], edx + mov edx, dword, ptr, [ecx, +, 8] + mov dword, ptr, [esp, +, 8], edx + mov edx, dword, ptr, [ecx] + mov ecx, dword, ptr, [ecx, +, 4] + mov dword, ptr, [esp, +, 4], ecx + mov dword, ptr, [esp], edx #MEMBARRIER - mov eax, dword, ptr, [esp, +, 28] - mov dword, ptr, [eax, +, 28], ecx - mov dword, ptr, [eax, +, 24], edx - mov dword, ptr, [eax, +, 20], ebp - mov dword, ptr, [eax, +, 16], ebx - mov dword, ptr, [eax, +, 12], edi - mov dword, ptr, [eax, +, 8], esi + mov ecx, dword, ptr, [esp, +, 28] + mov dword, ptr, [esp, +, 60], ecx + mov ecx, dword, ptr, [esp, +, 24] + mov dword, ptr, [esp, +, 56], ecx + mov ecx, dword, ptr, [esp, +, 20] + mov dword, ptr, [esp, +, 52], ecx + mov ecx, dword, ptr, [esp, +, 16] + mov dword, ptr, [esp, +, 48], ecx + mov ecx, dword, ptr, [esp, +, 12] + mov dword, ptr, [esp, +, 44], ecx + mov ecx, dword, ptr, [esp, +, 8] + mov dword, ptr, [esp, +, 40], ecx mov ecx, dword, ptr, [esp] - mov dword, ptr, [eax, +, 4], ecx - mov ecx, dword, ptr, [esp, +, 4] + mov edx, dword, ptr, [esp, +, 4] + mov dword, ptr, [esp, +, 36], edx + mov dword, ptr, [esp, +, 32], ecx + lea ecx, [esp, +, 60] + lea edx, [eax, +, 28] + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [edx], ecx + #NO_APP + lea ecx, [esp, +, 56] + lea edx, [eax, +, 24] + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [edx], ecx + #NO_APP + lea ecx, [esp, +, 52] + lea edx, [eax, +, 20] + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [edx], ecx + #NO_APP + lea ecx, [esp, +, 48] + lea edx, [eax, +, 16] + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [edx], ecx + #NO_APP + lea ecx, [esp, +, 44] + lea edx, [eax, +, 12] + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [edx], ecx + #NO_APP + lea ecx, [esp, +, 40] + lea edx, [eax, +, 8] + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [edx], ecx + #NO_APP + lea ecx, [esp, +, 36] + lea edx, [eax, +, 4] + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [edx], ecx + #NO_APP + lea ecx, [esp, +, 32] + #APP + mov ecx, dword, ptr, [ecx] mov dword, ptr, [eax], ecx - add esp, 8 - pop esi - pop edi - pop ebx - pop ebp + #NO_APP + add esp, 76 ret asm_test::atomic_memcpy_store_align16::write_volatile_release_fence: push esi diff --git a/tests/asm-test/asm/i586-unknown-linux-gnu/atomic_memcpy_store_align2 b/tests/asm-test/asm/i586-unknown-linux-gnu/atomic_memcpy_store_align2 index fd523ab..c28e8c6 100644 --- a/tests/asm-test/asm/i586-unknown-linux-gnu/atomic_memcpy_store_align2 +++ b/tests/asm-test/asm/i586-unknown-linux-gnu/atomic_memcpy_store_align2 @@ -1,123 +1,136 @@ asm_test::atomic_memcpy_store_align2::release: - push ebx - push edi - push esi - sub esp, 64 - mov eax, dword, ptr, [esp, +, 80] - mov ecx, dword, ptr, [esp, +, 84] + sub esp, 68 + mov eax, dword, ptr, [esp, +, 72] + mov ecx, dword, ptr, [esp, +, 76] mov edx, dword, ptr, [ecx, +, 28] - mov dword, ptr, [esp, +, 60], edx + mov dword, ptr, [esp, +, 64], edx mov edx, dword, ptr, [ecx, +, 24] - mov dword, ptr, [esp, +, 56], edx + mov dword, ptr, [esp, +, 60], edx mov edx, dword, ptr, [ecx, +, 20] - mov dword, ptr, [esp, +, 52], edx + mov dword, ptr, [esp, +, 56], edx mov edx, dword, ptr, [ecx, +, 16] - mov dword, ptr, [esp, +, 48], edx + mov dword, ptr, [esp, +, 52], edx mov edx, dword, ptr, [ecx, +, 12] - mov dword, ptr, [esp, +, 44], edx + mov dword, ptr, [esp, +, 48], edx mov edx, dword, ptr, [ecx, +, 8] - mov dword, ptr, [esp, +, 40], edx + mov dword, ptr, [esp, +, 44], edx mov edx, dword, ptr, [ecx] mov ecx, dword, ptr, [ecx, +, 4] - mov dword, ptr, [esp, +, 36], ecx - mov dword, ptr, [esp, +, 32], edx + mov dword, ptr, [esp, +, 40], ecx + mov dword, ptr, [esp, +, 36], edx #MEMBARRIER - mov ecx, dword, ptr, [esp, +, 60] + mov ecx, dword, ptr, [esp, +, 64] mov dword, ptr, [esp, +, 28], ecx - mov ecx, dword, ptr, [esp, +, 56] + mov ecx, dword, ptr, [esp, +, 60] mov dword, ptr, [esp, +, 24], ecx - mov ecx, dword, ptr, [esp, +, 52] + mov ecx, dword, ptr, [esp, +, 56] mov dword, ptr, [esp, +, 20], ecx - mov ecx, dword, ptr, [esp, +, 48] + mov ecx, dword, ptr, [esp, +, 52] mov dword, ptr, [esp, +, 16], ecx - mov ecx, dword, ptr, [esp, +, 44] + mov ecx, dword, ptr, [esp, +, 48] mov dword, ptr, [esp, +, 12], ecx - mov ecx, dword, ptr, [esp, +, 40] + mov ecx, dword, ptr, [esp, +, 44] mov dword, ptr, [esp, +, 8], ecx - mov ecx, dword, ptr, [esp, +, 32] - mov edx, dword, ptr, [esp, +, 36] + mov ecx, dword, ptr, [esp, +, 36] + mov edx, dword, ptr, [esp, +, 40] mov dword, ptr, [esp, +, 4], edx mov dword, ptr, [esp], ecx - lea esi, [eax, +, 3] - and esi, -4 - mov edx, esi - sub edx, eax - cmp edx, 32 - ja .LBB6_10 - test edx, edx - je .LBB6_2 - lea ecx, [eax, +, 32] - xor edi, edi -.LBB6_9: - movzx ebx, byte, ptr, [esp, +, edi] - mov byte, ptr, [eax, +, edi], bl - inc edi - cmp edx, edi - jne .LBB6_9 - sub ecx, esi - cmp ecx, 4 - jae .LBB6_3 - jmp .LBB6_5 -.LBB6_10: - movzx ecx, word, ptr, [esp, +, 30] - mov word, ptr, [eax, +, 30], cx - movzx ecx, word, ptr, [esp, +, 28] - mov word, ptr, [eax, +, 28], cx - movzx ecx, word, ptr, [esp, +, 26] - mov word, ptr, [eax, +, 26], cx - movzx ecx, word, ptr, [esp, +, 24] - mov word, ptr, [eax, +, 24], cx - movzx ecx, word, ptr, [esp, +, 22] - mov word, ptr, [eax, +, 22], cx - movzx ecx, word, ptr, [esp, +, 20] - mov word, ptr, [eax, +, 20], cx - movzx ecx, word, ptr, [esp, +, 18] - mov word, ptr, [eax, +, 18], cx - movzx ecx, word, ptr, [esp, +, 16] - mov word, ptr, [eax, +, 16], cx - movzx ecx, word, ptr, [esp, +, 14] - mov word, ptr, [eax, +, 14], cx - movzx ecx, word, ptr, [esp, +, 12] - mov word, ptr, [eax, +, 12], cx - movzx ecx, word, ptr, [esp, +, 10] - mov word, ptr, [eax, +, 10], cx - movzx ecx, word, ptr, [esp, +, 8] - mov word, ptr, [eax, +, 8], cx - movzx ecx, word, ptr, [esp, +, 6] - mov word, ptr, [eax, +, 6], cx - movzx ecx, word, ptr, [esp, +, 4] - mov word, ptr, [eax, +, 4], cx - movzx ecx, word, ptr, [esp, +, 2] - mov word, ptr, [eax, +, 2], cx - movzx ecx, word, ptr, [esp] + lea ecx, [eax, +, 30] + lea edx, [esp, +, 30] + #APP + mov dx, word, ptr, [edx] + mov word, ptr, [ecx], dx + #NO_APP + lea ecx, [esp, +, 28] + lea edx, [eax, +, 28] + #APP + mov cx, word, ptr, [ecx] + mov word, ptr, [edx], cx + #NO_APP + lea ecx, [eax, +, 26] + lea edx, [esp, +, 26] + #APP + mov dx, word, ptr, [edx] + mov word, ptr, [ecx], dx + #NO_APP + lea ecx, [esp, +, 24] + lea edx, [eax, +, 24] + #APP + mov cx, word, ptr, [ecx] + mov word, ptr, [edx], cx + #NO_APP + lea ecx, [eax, +, 22] + lea edx, [esp, +, 22] + #APP + mov dx, word, ptr, [edx] + mov word, ptr, [ecx], dx + #NO_APP + lea ecx, [esp, +, 20] + lea edx, [eax, +, 20] + #APP + mov cx, word, ptr, [ecx] + mov word, ptr, [edx], cx + #NO_APP + lea ecx, [eax, +, 18] + lea edx, [esp, +, 18] + #APP + mov dx, word, ptr, [edx] + mov word, ptr, [ecx], dx + #NO_APP + lea ecx, [esp, +, 16] + lea edx, [eax, +, 16] + #APP + mov cx, word, ptr, [ecx] + mov word, ptr, [edx], cx + #NO_APP + lea ecx, [eax, +, 14] + lea edx, [esp, +, 14] + #APP + mov dx, word, ptr, [edx] + mov word, ptr, [ecx], dx + #NO_APP + lea ecx, [esp, +, 12] + lea edx, [eax, +, 12] + #APP + mov cx, word, ptr, [ecx] + mov word, ptr, [edx], cx + #NO_APP + lea ecx, [eax, +, 10] + lea edx, [esp, +, 10] + #APP + mov dx, word, ptr, [edx] + mov word, ptr, [ecx], dx + #NO_APP + lea ecx, [esp, +, 8] + lea edx, [eax, +, 8] + #APP + mov cx, word, ptr, [ecx] + mov word, ptr, [edx], cx + #NO_APP + lea ecx, [eax, +, 6] + lea edx, [esp, +, 6] + #APP + mov dx, word, ptr, [edx] + mov word, ptr, [ecx], dx + #NO_APP + lea ecx, [esp, +, 4] + lea edx, [eax, +, 4] + #APP + mov cx, word, ptr, [ecx] + mov word, ptr, [edx], cx + #NO_APP + lea ecx, [eax, +, 2] + lea edx, [esp, +, 2] + #APP + mov dx, word, ptr, [edx] + mov word, ptr, [ecx], dx + #NO_APP + mov ecx, esp + #APP + mov cx, word, ptr, [ecx] mov word, ptr, [eax], cx - jmp .LBB6_11 -.LBB6_2: - mov ecx, 32 -.LBB6_3: - mov esi, dword, ptr, [esp, +, edx] - mov dword, ptr, [eax, +, edx], esi - add ecx, -4 - add edx, 4 - cmp ecx, 3 - ja .LBB6_3 -.LBB6_5: - test ecx, ecx - je .LBB6_11 - add eax, edx - add edx, esp - xor esi, esi -.LBB6_7: - movzx ebx, byte, ptr, [edx, +, esi] - mov byte, ptr, [eax, +, esi], bl - inc esi - cmp ecx, esi - jne .LBB6_7 -.LBB6_11: - add esp, 64 - pop esi - pop edi - pop ebx + #NO_APP + add esp, 68 ret asm_test::atomic_memcpy_store_align2::write_volatile_release_fence: sub esp, 32 diff --git a/tests/asm-test/asm/i586-unknown-linux-gnu/atomic_memcpy_store_align4 b/tests/asm-test/asm/i586-unknown-linux-gnu/atomic_memcpy_store_align4 index f6b0896..a3a5b46 100644 --- a/tests/asm-test/asm/i586-unknown-linux-gnu/atomic_memcpy_store_align4 +++ b/tests/asm-test/asm/i586-unknown-linux-gnu/atomic_memcpy_store_align4 @@ -1,37 +1,88 @@ asm_test::atomic_memcpy_store_align4::release: - push ebp - push ebx - push edi - push esi - sub esp, 8 - mov ecx, dword, ptr, [esp, +, 32] - mov eax, dword, ptr, [ecx] - mov dword, ptr, [esp, +, 4], eax - mov eax, dword, ptr, [ecx, +, 4] - mov dword, ptr, [esp], eax - mov esi, dword, ptr, [ecx, +, 8] - mov edi, dword, ptr, [ecx, +, 12] - mov ebx, dword, ptr, [ecx, +, 16] - mov ebp, dword, ptr, [ecx, +, 20] + sub esp, 68 + mov eax, dword, ptr, [esp, +, 72] + mov ecx, dword, ptr, [esp, +, 76] + mov edx, dword, ptr, [ecx, +, 28] + mov dword, ptr, [esp, +, 64], edx mov edx, dword, ptr, [ecx, +, 24] - mov ecx, dword, ptr, [ecx, +, 28] + mov dword, ptr, [esp, +, 60], edx + mov edx, dword, ptr, [ecx, +, 20] + mov dword, ptr, [esp, +, 56], edx + mov edx, dword, ptr, [ecx, +, 16] + mov dword, ptr, [esp, +, 52], edx + mov edx, dword, ptr, [ecx, +, 12] + mov dword, ptr, [esp, +, 48], edx + mov edx, dword, ptr, [ecx, +, 8] + mov dword, ptr, [esp, +, 44], edx + mov edx, dword, ptr, [ecx] + mov ecx, dword, ptr, [ecx, +, 4] + mov dword, ptr, [esp, +, 40], ecx + mov dword, ptr, [esp, +, 36], edx #MEMBARRIER - mov eax, dword, ptr, [esp, +, 28] - mov dword, ptr, [eax, +, 28], ecx - mov dword, ptr, [eax, +, 24], edx - mov dword, ptr, [eax, +, 20], ebp - mov dword, ptr, [eax, +, 16], ebx - mov dword, ptr, [eax, +, 12], edi - mov dword, ptr, [eax, +, 8], esi - mov ecx, dword, ptr, [esp] - mov dword, ptr, [eax, +, 4], ecx - mov ecx, dword, ptr, [esp, +, 4] + mov ecx, dword, ptr, [esp, +, 64] + mov dword, ptr, [esp, +, 28], ecx + mov ecx, dword, ptr, [esp, +, 60] + mov dword, ptr, [esp, +, 24], ecx + mov ecx, dword, ptr, [esp, +, 56] + mov dword, ptr, [esp, +, 20], ecx + mov ecx, dword, ptr, [esp, +, 52] + mov dword, ptr, [esp, +, 16], ecx + mov ecx, dword, ptr, [esp, +, 48] + mov dword, ptr, [esp, +, 12], ecx + mov ecx, dword, ptr, [esp, +, 44] + mov dword, ptr, [esp, +, 8], ecx + mov ecx, dword, ptr, [esp, +, 36] + mov edx, dword, ptr, [esp, +, 40] + mov dword, ptr, [esp, +, 4], edx + mov dword, ptr, [esp], ecx + lea ecx, [esp, +, 28] + lea edx, [eax, +, 28] + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [edx], ecx + #NO_APP + lea ecx, [esp, +, 24] + lea edx, [eax, +, 24] + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [edx], ecx + #NO_APP + lea ecx, [esp, +, 20] + lea edx, [eax, +, 20] + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [edx], ecx + #NO_APP + lea ecx, [esp, +, 16] + lea edx, [eax, +, 16] + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [edx], ecx + #NO_APP + lea ecx, [esp, +, 12] + lea edx, [eax, +, 12] + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [edx], ecx + #NO_APP + lea ecx, [esp, +, 8] + lea edx, [eax, +, 8] + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [edx], ecx + #NO_APP + lea ecx, [esp, +, 4] + lea edx, [eax, +, 4] + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [edx], ecx + #NO_APP + mov ecx, esp + #APP + mov ecx, dword, ptr, [ecx] mov dword, ptr, [eax], ecx - add esp, 8 - pop esi - pop edi - pop ebx - pop ebp + #NO_APP + add esp, 68 ret asm_test::atomic_memcpy_store_align4::write_volatile_release_fence: sub esp, 32 diff --git a/tests/asm-test/asm/i586-unknown-linux-gnu/atomic_memcpy_store_align8 b/tests/asm-test/asm/i586-unknown-linux-gnu/atomic_memcpy_store_align8 index c08384b..d17083a 100644 --- a/tests/asm-test/asm/i586-unknown-linux-gnu/atomic_memcpy_store_align8 +++ b/tests/asm-test/asm/i586-unknown-linux-gnu/atomic_memcpy_store_align8 @@ -1,37 +1,88 @@ asm_test::atomic_memcpy_store_align8::release: - push ebp - push ebx - push edi - push esi - sub esp, 8 - mov ecx, dword, ptr, [esp, +, 32] - mov eax, dword, ptr, [ecx] - mov dword, ptr, [esp, +, 4], eax - mov eax, dword, ptr, [ecx, +, 4] - mov dword, ptr, [esp], eax - mov esi, dword, ptr, [ecx, +, 8] - mov edi, dword, ptr, [ecx, +, 12] - mov ebx, dword, ptr, [ecx, +, 16] - mov ebp, dword, ptr, [ecx, +, 20] + sub esp, 68 + mov eax, dword, ptr, [esp, +, 72] + mov ecx, dword, ptr, [esp, +, 76] + mov edx, dword, ptr, [ecx, +, 28] + mov dword, ptr, [esp, +, 28], edx mov edx, dword, ptr, [ecx, +, 24] - mov ecx, dword, ptr, [ecx, +, 28] + mov dword, ptr, [esp, +, 24], edx + mov edx, dword, ptr, [ecx, +, 20] + mov dword, ptr, [esp, +, 20], edx + mov edx, dword, ptr, [ecx, +, 16] + mov dword, ptr, [esp, +, 16], edx + mov edx, dword, ptr, [ecx, +, 12] + mov dword, ptr, [esp, +, 12], edx + mov edx, dword, ptr, [ecx, +, 8] + mov dword, ptr, [esp, +, 8], edx + mov edx, dword, ptr, [ecx] + mov ecx, dword, ptr, [ecx, +, 4] + mov dword, ptr, [esp, +, 4], ecx + mov dword, ptr, [esp], edx #MEMBARRIER - mov eax, dword, ptr, [esp, +, 28] - mov dword, ptr, [eax, +, 28], ecx - mov dword, ptr, [eax, +, 24], edx - mov dword, ptr, [eax, +, 20], ebp - mov dword, ptr, [eax, +, 16], ebx - mov dword, ptr, [eax, +, 12], edi - mov dword, ptr, [eax, +, 8], esi + mov ecx, dword, ptr, [esp, +, 28] + mov dword, ptr, [esp, +, 60], ecx + mov ecx, dword, ptr, [esp, +, 24] + mov dword, ptr, [esp, +, 56], ecx + mov ecx, dword, ptr, [esp, +, 20] + mov dword, ptr, [esp, +, 52], ecx + mov ecx, dword, ptr, [esp, +, 16] + mov dword, ptr, [esp, +, 48], ecx + mov ecx, dword, ptr, [esp, +, 12] + mov dword, ptr, [esp, +, 44], ecx + mov ecx, dword, ptr, [esp, +, 8] + mov dword, ptr, [esp, +, 40], ecx mov ecx, dword, ptr, [esp] - mov dword, ptr, [eax, +, 4], ecx - mov ecx, dword, ptr, [esp, +, 4] + mov edx, dword, ptr, [esp, +, 4] + mov dword, ptr, [esp, +, 36], edx + mov dword, ptr, [esp, +, 32], ecx + lea ecx, [esp, +, 60] + lea edx, [eax, +, 28] + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [edx], ecx + #NO_APP + lea ecx, [esp, +, 56] + lea edx, [eax, +, 24] + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [edx], ecx + #NO_APP + lea ecx, [esp, +, 52] + lea edx, [eax, +, 20] + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [edx], ecx + #NO_APP + lea ecx, [esp, +, 48] + lea edx, [eax, +, 16] + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [edx], ecx + #NO_APP + lea ecx, [esp, +, 44] + lea edx, [eax, +, 12] + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [edx], ecx + #NO_APP + lea ecx, [esp, +, 40] + lea edx, [eax, +, 8] + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [edx], ecx + #NO_APP + lea ecx, [esp, +, 36] + lea edx, [eax, +, 4] + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [edx], ecx + #NO_APP + lea ecx, [esp, +, 32] + #APP + mov ecx, dword, ptr, [ecx] mov dword, ptr, [eax], ecx - add esp, 8 - pop esi - pop edi - pop ebx - pop ebp + #NO_APP + add esp, 68 ret asm_test::atomic_memcpy_store_align8::write_volatile_release_fence: push esi diff --git a/tests/asm-test/asm/i686-unknown-linux-gnu/atomic_memcpy_load_align1 b/tests/asm-test/asm/i686-unknown-linux-gnu/atomic_memcpy_load_align1 index b376ec1..f87f5be 100644 --- a/tests/asm-test/asm/i686-unknown-linux-gnu/atomic_memcpy_load_align1 +++ b/tests/asm-test/asm/i686-unknown-linux-gnu/atomic_memcpy_load_align1 @@ -1,142 +1,210 @@ asm_test::atomic_memcpy_load_align1::acquire: - push ebp - push ebx - push edi push esi - sub esp, 36 - mov ebp, dword, ptr, [esp, +, 60] - mov eax, dword, ptr, [esp, +, 56] - lea edi, [ebp, +, 3] - and edi, -4 - mov esi, edi - sub esi, ebp - cmp esi, 33 - jae .LBB0_1 - test esi, esi - je .LBB0_4 - lea edx, [ebp, +, 32] - xor ebx, ebx -.LBB0_12: - movzx ecx, byte, ptr, [ebp, +, ebx] - mov byte, ptr, [esp, +, ebx], cl - inc ebx - cmp esi, ebx - jne .LBB0_12 - sub edx, edi - cmp edx, 4 - jae .LBB0_5 - jmp .LBB0_7 -.LBB0_1: - mov dl, byte, ptr, [ebp] - mov byte, ptr, [esp], dl - mov dl, byte, ptr, [ebp, +, 1] - mov byte, ptr, [esp, +, 1], dl - mov dl, byte, ptr, [ebp, +, 2] - mov byte, ptr, [esp, +, 2], dl - mov dl, byte, ptr, [ebp, +, 3] - mov byte, ptr, [esp, +, 3], dl - mov dl, byte, ptr, [ebp, +, 4] - mov byte, ptr, [esp, +, 4], dl - mov dl, byte, ptr, [ebp, +, 5] - mov byte, ptr, [esp, +, 5], dl - mov dl, byte, ptr, [ebp, +, 6] - mov byte, ptr, [esp, +, 6], dl - mov dl, byte, ptr, [ebp, +, 7] - mov byte, ptr, [esp, +, 7], dl - mov dl, byte, ptr, [ebp, +, 8] - mov byte, ptr, [esp, +, 8], dl - mov dl, byte, ptr, [ebp, +, 9] - mov byte, ptr, [esp, +, 9], dl - mov dl, byte, ptr, [ebp, +, 10] - mov byte, ptr, [esp, +, 10], dl - mov dl, byte, ptr, [ebp, +, 11] - mov byte, ptr, [esp, +, 11], dl - mov dl, byte, ptr, [ebp, +, 12] - mov byte, ptr, [esp, +, 12], dl - mov dl, byte, ptr, [ebp, +, 13] - mov byte, ptr, [esp, +, 13], dl - mov dl, byte, ptr, [ebp, +, 14] - mov byte, ptr, [esp, +, 14], dl - mov dl, byte, ptr, [ebp, +, 15] - mov byte, ptr, [esp, +, 15], dl - mov dl, byte, ptr, [ebp, +, 16] - mov byte, ptr, [esp, +, 16], dl - mov dl, byte, ptr, [ebp, +, 17] - mov byte, ptr, [esp, +, 17], dl - mov dl, byte, ptr, [ebp, +, 18] - mov byte, ptr, [esp, +, 18], dl - mov dl, byte, ptr, [ebp, +, 19] - mov byte, ptr, [esp, +, 19], dl - mov dl, byte, ptr, [ebp, +, 20] - mov byte, ptr, [esp, +, 20], dl - mov dl, byte, ptr, [ebp, +, 21] - mov byte, ptr, [esp, +, 21], dl - mov dl, byte, ptr, [ebp, +, 22] - mov byte, ptr, [esp, +, 22], dl - mov dl, byte, ptr, [ebp, +, 23] - mov byte, ptr, [esp, +, 23], dl - mov dl, byte, ptr, [ebp, +, 24] - mov byte, ptr, [esp, +, 24], dl - mov dl, byte, ptr, [ebp, +, 25] - mov byte, ptr, [esp, +, 25], dl - mov dl, byte, ptr, [ebp, +, 26] - mov byte, ptr, [esp, +, 26], dl - mov dl, byte, ptr, [ebp, +, 27] - mov byte, ptr, [esp, +, 27], dl - mov dl, byte, ptr, [ebp, +, 28] - mov byte, ptr, [esp, +, 28], dl - mov dl, byte, ptr, [ebp, +, 29] - mov byte, ptr, [esp, +, 29], dl - mov dl, byte, ptr, [ebp, +, 30] - mov byte, ptr, [esp, +, 30], dl - mov cl, byte, ptr, [ebp, +, 31] - mov byte, ptr, [esp, +, 31], cl + sub esp, 32 + mov ecx, dword, ptr, [esp, +, 44] + lea esi, [esp, +, 31] + mov eax, dword, ptr, [esp, +, 40] + lea edx, [ecx, +, 31] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 30] + lea esi, [esp, +, 30] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 29] + lea esi, [esp, +, 29] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 28] + lea esi, [esp, +, 28] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 27] + lea esi, [esp, +, 27] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 26] + lea esi, [esp, +, 26] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 25] + lea esi, [esp, +, 25] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 24] + lea esi, [esp, +, 24] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 23] + lea esi, [esp, +, 23] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 22] + lea esi, [esp, +, 22] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 21] + lea esi, [esp, +, 21] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 20] + lea esi, [esp, +, 20] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 19] + lea esi, [esp, +, 19] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 18] + lea esi, [esp, +, 18] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 17] + lea esi, [esp, +, 17] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 16] + lea esi, [esp, +, 16] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 15] + lea esi, [esp, +, 15] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 14] + lea esi, [esp, +, 14] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 13] + lea esi, [esp, +, 13] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 12] + lea esi, [esp, +, 12] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 11] + lea esi, [esp, +, 11] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 10] + lea esi, [esp, +, 10] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 9] + lea esi, [esp, +, 9] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 8] + lea esi, [esp, +, 8] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 7] + lea esi, [esp, +, 7] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 6] + lea esi, [esp, +, 6] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 5] + lea esi, [esp, +, 5] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 4] + lea esi, [esp, +, 4] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 3] + lea esi, [esp, +, 3] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 2] + lea esi, [esp, +, 2] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [ecx, +, 1] + lea esi, [esp, +, 1] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + mov edx, esp + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [edx], cl + #NO_APP movsd xmm1, qword, ptr, [esp, +, 8] - movsd xmm0, qword, ptr, [esp, +, 16] - movsd xmm2, qword, ptr, [esp] + movsd xmm0, qword, ptr, [esp] + movsd xmm2, qword, ptr, [esp, +, 16] movsd qword, ptr, [eax, +, 8], xmm1 movsd xmm1, qword, ptr, [esp, +, 24] - movsd qword, ptr, [eax, +, 16], xmm0 - movsd qword, ptr, [eax], xmm2 + movsd qword, ptr, [eax], xmm0 + movsd qword, ptr, [eax, +, 16], xmm2 movsd qword, ptr, [eax, +, 24], xmm1 - jmp .LBB0_2 -.LBB0_4: - mov edx, 32 -.LBB0_5: - mov ecx, dword, ptr, [ebp, +, esi] - add edx, -4 - mov dword, ptr, [esp, +, esi], ecx - add esi, 4 - cmp edx, 3 - ja .LBB0_5 -.LBB0_7: - test edx, edx - je .LBB0_10 - lea edi, [esp, +, esi] - add ebp, esi - xor esi, esi -.LBB0_9: - movzx ecx, byte, ptr, [ebp, +, esi] - mov byte, ptr, [edi, +, esi], cl - inc esi - cmp edx, esi - jne .LBB0_9 -.LBB0_10: - movsd xmm1, qword, ptr, [esp, +, 16] - movsd xmm0, qword, ptr, [esp, +, 24] - movsd xmm2, qword, ptr, [esp] - movsd qword, ptr, [eax, +, 16], xmm1 - movsd xmm1, qword, ptr, [esp, +, 8] - movsd qword, ptr, [eax, +, 24], xmm0 - movsd qword, ptr, [eax], xmm2 - movsd qword, ptr, [eax, +, 8], xmm1 -.LBB0_2: #MEMBARRIER - add esp, 36 + add esp, 32 pop esi - pop edi - pop ebx - pop ebp ret 4 asm_test::atomic_memcpy_load_align1::read_volatile_acquire_fence: push ebx diff --git a/tests/asm-test/asm/i686-unknown-linux-gnu/atomic_memcpy_load_align16 b/tests/asm-test/asm/i686-unknown-linux-gnu/atomic_memcpy_load_align16 index 154dfb5..2ec778d 100644 --- a/tests/asm-test/asm/i686-unknown-linux-gnu/atomic_memcpy_load_align16 +++ b/tests/asm-test/asm/i686-unknown-linux-gnu/atomic_memcpy_load_align16 @@ -1,37 +1,62 @@ asm_test::atomic_memcpy_load_align16::acquire: - push ebp - push ebx - push edi push esi - sub esp, 8 - mov eax, dword, ptr, [esp, +, 32] - mov ecx, dword, ptr, [eax, +, 28] - mov dword, ptr, [esp, +, 4], ecx - mov ecx, dword, ptr, [eax, +, 24] - mov dword, ptr, [esp], ecx - mov esi, dword, ptr, [eax, +, 20] - mov edi, dword, ptr, [eax, +, 16] - mov ebx, dword, ptr, [eax, +, 12] - mov ebp, dword, ptr, [eax, +, 8] - mov edx, dword, ptr, [eax, +, 4] - mov ecx, dword, ptr, [eax] - mov eax, dword, ptr, [esp, +, 28] - mov dword, ptr, [eax], ecx - mov dword, ptr, [eax, +, 4], edx - mov dword, ptr, [eax, +, 8], ebp - mov dword, ptr, [eax, +, 12], ebx - mov dword, ptr, [eax, +, 16], edi - mov dword, ptr, [eax, +, 20], esi - mov esi, dword, ptr, [esp] - mov edx, dword, ptr, [esp, +, 4] - mov dword, ptr, [eax, +, 24], esi - mov dword, ptr, [eax, +, 28], edx + sub esp, 40 + mov ecx, dword, ptr, [esp, +, 52] + lea esi, [esp, +, 28] + mov eax, dword, ptr, [esp, +, 48] + lea edx, [ecx, +, 28] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [esi], edx + #NO_APP + lea edx, [ecx, +, 24] + lea esi, [esp, +, 24] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [esi], edx + #NO_APP + lea edx, [ecx, +, 20] + lea esi, [esp, +, 20] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [esi], edx + #NO_APP + lea edx, [ecx, +, 16] + lea esi, [esp, +, 16] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [esi], edx + #NO_APP + lea edx, [ecx, +, 12] + lea esi, [esp, +, 12] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [esi], edx + #NO_APP + lea edx, [ecx, +, 8] + lea esi, [esp, +, 8] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [esi], edx + #NO_APP + lea edx, [ecx, +, 4] + lea esi, [esp, +, 4] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [esi], edx + #NO_APP + mov edx, esp + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [edx], ecx + #NO_APP + movaps xmm0, xmmword, ptr, [esp] + movaps xmm1, xmmword, ptr, [esp, +, 16] + movaps xmmword, ptr, [eax], xmm0 + movaps xmmword, ptr, [eax, +, 16], xmm1 #MEMBARRIER - add esp, 8 + add esp, 40 pop esi - pop edi - pop ebx - pop ebp ret 4 asm_test::atomic_memcpy_load_align16::read_volatile_acquire_fence: push ebp diff --git a/tests/asm-test/asm/i686-unknown-linux-gnu/atomic_memcpy_load_align2 b/tests/asm-test/asm/i686-unknown-linux-gnu/atomic_memcpy_load_align2 index 41460d6..d21bde4 100644 --- a/tests/asm-test/asm/i686-unknown-linux-gnu/atomic_memcpy_load_align2 +++ b/tests/asm-test/asm/i686-unknown-linux-gnu/atomic_memcpy_load_align2 @@ -1,101 +1,114 @@ asm_test::atomic_memcpy_load_align2::acquire: - push ebp - push ebx - push edi push esi - sub esp, 36 - mov ebp, dword, ptr, [esp, +, 60] - mov eax, dword, ptr, [esp, +, 56] - lea edi, [ebp, +, 3] - and edi, -4 - mov esi, edi - sub esi, ebp - cmp esi, 32 - ja .LBB4_4 - test esi, esi - je .LBB4_2 - lea edx, [ebp, +, 32] - xor ebx, ebx -.LBB4_11: - movzx ecx, byte, ptr, [ebp, +, ebx] - mov byte, ptr, [esp, +, ebx], cl - inc ebx - cmp esi, ebx - jne .LBB4_11 - sub edx, edi - cmp edx, 4 - jae .LBB4_3 - jmp .LBB4_7 -.LBB4_4: - movzx edx, word, ptr, [ebp, +, 30] - mov word, ptr, [esp, +, 30], dx - movzx edx, word, ptr, [ebp, +, 28] - mov word, ptr, [esp, +, 28], dx - movzx edx, word, ptr, [ebp, +, 26] - mov word, ptr, [esp, +, 26], dx - movzx edx, word, ptr, [ebp, +, 24] - mov word, ptr, [esp, +, 24], dx - movzx edx, word, ptr, [ebp, +, 22] - mov word, ptr, [esp, +, 22], dx - movzx edx, word, ptr, [ebp, +, 20] - mov word, ptr, [esp, +, 20], dx - movzx edx, word, ptr, [ebp, +, 18] - mov word, ptr, [esp, +, 18], dx - movzx edx, word, ptr, [ebp, +, 16] - mov word, ptr, [esp, +, 16], dx - movzx edx, word, ptr, [ebp, +, 14] - mov word, ptr, [esp, +, 14], dx - movzx edx, word, ptr, [ebp, +, 12] - mov word, ptr, [esp, +, 12], dx - movzx edx, word, ptr, [ebp, +, 10] - mov word, ptr, [esp, +, 10], dx - movzx edx, word, ptr, [ebp, +, 8] - mov word, ptr, [esp, +, 8], dx - movzx edx, word, ptr, [ebp, +, 6] - mov word, ptr, [esp, +, 6], dx - movzx edx, word, ptr, [ebp, +, 4] - mov word, ptr, [esp, +, 4], dx - movzx edx, word, ptr, [ebp, +, 2] - mov word, ptr, [esp, +, 2], dx - movzx ecx, word, ptr, [ebp] - mov word, ptr, [esp], cx - jmp .LBB4_5 -.LBB4_2: - mov edx, 32 -.LBB4_3: - mov ecx, dword, ptr, [ebp, +, esi] - add edx, -4 - mov dword, ptr, [esp, +, esi], ecx - add esi, 4 - cmp edx, 3 - ja .LBB4_3 -.LBB4_7: - test edx, edx - je .LBB4_5 - lea edi, [esp, +, esi] - add ebp, esi - xor esi, esi -.LBB4_9: - movzx ecx, byte, ptr, [ebp, +, esi] - mov byte, ptr, [edi, +, esi], cl - inc esi - cmp edx, esi - jne .LBB4_9 -.LBB4_5: - movsd xmm1, qword, ptr, [esp, +, 16] - movsd xmm0, qword, ptr, [esp, +, 24] - movsd xmm2, qword, ptr, [esp] - movsd qword, ptr, [eax, +, 16], xmm1 + sub esp, 32 + mov ecx, dword, ptr, [esp, +, 44] + lea esi, [esp, +, 30] + mov eax, dword, ptr, [esp, +, 40] + lea edx, [ecx, +, 30] + #APP + mov dx, word, ptr, [edx] + mov word, ptr, [esi], dx + #NO_APP + lea edx, [ecx, +, 28] + lea esi, [esp, +, 28] + #APP + mov dx, word, ptr, [edx] + mov word, ptr, [esi], dx + #NO_APP + lea edx, [ecx, +, 26] + lea esi, [esp, +, 26] + #APP + mov dx, word, ptr, [edx] + mov word, ptr, [esi], dx + #NO_APP + lea edx, [ecx, +, 24] + lea esi, [esp, +, 24] + #APP + mov dx, word, ptr, [edx] + mov word, ptr, [esi], dx + #NO_APP + lea edx, [ecx, +, 22] + lea esi, [esp, +, 22] + #APP + mov dx, word, ptr, [edx] + mov word, ptr, [esi], dx + #NO_APP + lea edx, [ecx, +, 20] + lea esi, [esp, +, 20] + #APP + mov dx, word, ptr, [edx] + mov word, ptr, [esi], dx + #NO_APP + lea edx, [ecx, +, 18] + lea esi, [esp, +, 18] + #APP + mov dx, word, ptr, [edx] + mov word, ptr, [esi], dx + #NO_APP + lea edx, [ecx, +, 16] + lea esi, [esp, +, 16] + #APP + mov dx, word, ptr, [edx] + mov word, ptr, [esi], dx + #NO_APP + lea edx, [ecx, +, 14] + lea esi, [esp, +, 14] + #APP + mov dx, word, ptr, [edx] + mov word, ptr, [esi], dx + #NO_APP + lea edx, [ecx, +, 12] + lea esi, [esp, +, 12] + #APP + mov dx, word, ptr, [edx] + mov word, ptr, [esi], dx + #NO_APP + lea edx, [ecx, +, 10] + lea esi, [esp, +, 10] + #APP + mov dx, word, ptr, [edx] + mov word, ptr, [esi], dx + #NO_APP + lea edx, [ecx, +, 8] + lea esi, [esp, +, 8] + #APP + mov dx, word, ptr, [edx] + mov word, ptr, [esi], dx + #NO_APP + lea edx, [ecx, +, 6] + lea esi, [esp, +, 6] + #APP + mov dx, word, ptr, [edx] + mov word, ptr, [esi], dx + #NO_APP + lea edx, [ecx, +, 4] + lea esi, [esp, +, 4] + #APP + mov dx, word, ptr, [edx] + mov word, ptr, [esi], dx + #NO_APP + lea edx, [ecx, +, 2] + lea esi, [esp, +, 2] + #APP + mov dx, word, ptr, [edx] + mov word, ptr, [esi], dx + #NO_APP + mov edx, esp + #APP + mov cx, word, ptr, [ecx] + mov word, ptr, [edx], cx + #NO_APP movsd xmm1, qword, ptr, [esp, +, 8] - movsd qword, ptr, [eax, +, 24], xmm0 - movsd qword, ptr, [eax], xmm2 + movsd xmm0, qword, ptr, [esp] + movsd xmm2, qword, ptr, [esp, +, 16] movsd qword, ptr, [eax, +, 8], xmm1 + movsd xmm1, qword, ptr, [esp, +, 24] + movsd qword, ptr, [eax], xmm0 + movsd qword, ptr, [eax, +, 16], xmm2 + movsd qword, ptr, [eax, +, 24], xmm1 #MEMBARRIER - add esp, 36 + add esp, 32 pop esi - pop edi - pop ebx - pop ebp ret 4 asm_test::atomic_memcpy_load_align2::read_volatile_acquire_fence: push ebp diff --git a/tests/asm-test/asm/i686-unknown-linux-gnu/atomic_memcpy_load_align4 b/tests/asm-test/asm/i686-unknown-linux-gnu/atomic_memcpy_load_align4 index 43f7e29..0a634f9 100644 --- a/tests/asm-test/asm/i686-unknown-linux-gnu/atomic_memcpy_load_align4 +++ b/tests/asm-test/asm/i686-unknown-linux-gnu/atomic_memcpy_load_align4 @@ -1,37 +1,66 @@ asm_test::atomic_memcpy_load_align4::acquire: - push ebp - push ebx - push edi push esi - sub esp, 8 - mov eax, dword, ptr, [esp, +, 32] - mov ecx, dword, ptr, [eax, +, 28] - mov dword, ptr, [esp, +, 4], ecx - mov ecx, dword, ptr, [eax, +, 24] - mov dword, ptr, [esp], ecx - mov esi, dword, ptr, [eax, +, 20] - mov edi, dword, ptr, [eax, +, 16] - mov ebx, dword, ptr, [eax, +, 12] - mov ebp, dword, ptr, [eax, +, 8] - mov edx, dword, ptr, [eax, +, 4] - mov ecx, dword, ptr, [eax] - mov eax, dword, ptr, [esp, +, 28] - mov dword, ptr, [eax], ecx - mov dword, ptr, [eax, +, 4], edx - mov dword, ptr, [eax, +, 8], ebp - mov dword, ptr, [eax, +, 12], ebx - mov dword, ptr, [eax, +, 16], edi - mov dword, ptr, [eax, +, 20], esi - mov esi, dword, ptr, [esp] - mov edx, dword, ptr, [esp, +, 4] - mov dword, ptr, [eax, +, 24], esi - mov dword, ptr, [eax, +, 28], edx + sub esp, 32 + mov ecx, dword, ptr, [esp, +, 44] + lea esi, [esp, +, 28] + mov eax, dword, ptr, [esp, +, 40] + lea edx, [ecx, +, 28] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [esi], edx + #NO_APP + lea edx, [ecx, +, 24] + lea esi, [esp, +, 24] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [esi], edx + #NO_APP + lea edx, [ecx, +, 20] + lea esi, [esp, +, 20] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [esi], edx + #NO_APP + lea edx, [ecx, +, 16] + lea esi, [esp, +, 16] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [esi], edx + #NO_APP + lea edx, [ecx, +, 12] + lea esi, [esp, +, 12] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [esi], edx + #NO_APP + lea edx, [ecx, +, 8] + lea esi, [esp, +, 8] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [esi], edx + #NO_APP + lea edx, [ecx, +, 4] + lea esi, [esp, +, 4] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [esi], edx + #NO_APP + mov edx, esp + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [edx], ecx + #NO_APP + movsd xmm1, qword, ptr, [esp, +, 8] + movsd xmm0, qword, ptr, [esp] + movsd xmm2, qword, ptr, [esp, +, 16] + movsd qword, ptr, [eax, +, 8], xmm1 + movsd xmm1, qword, ptr, [esp, +, 24] + movsd qword, ptr, [eax], xmm0 + movsd qword, ptr, [eax, +, 16], xmm2 + movsd qword, ptr, [eax, +, 24], xmm1 #MEMBARRIER - add esp, 8 + add esp, 32 pop esi - pop edi - pop ebx - pop ebp ret 4 asm_test::atomic_memcpy_load_align4::read_volatile_acquire_fence: push ebp diff --git a/tests/asm-test/asm/i686-unknown-linux-gnu/atomic_memcpy_load_align8 b/tests/asm-test/asm/i686-unknown-linux-gnu/atomic_memcpy_load_align8 index 4bdcc1b..8e6c10a 100644 --- a/tests/asm-test/asm/i686-unknown-linux-gnu/atomic_memcpy_load_align8 +++ b/tests/asm-test/asm/i686-unknown-linux-gnu/atomic_memcpy_load_align8 @@ -1,37 +1,66 @@ asm_test::atomic_memcpy_load_align8::acquire: - push ebp - push ebx - push edi push esi - sub esp, 8 - mov eax, dword, ptr, [esp, +, 32] - mov ecx, dword, ptr, [eax, +, 28] - mov dword, ptr, [esp, +, 4], ecx - mov ecx, dword, ptr, [eax, +, 24] - mov dword, ptr, [esp], ecx - mov esi, dword, ptr, [eax, +, 20] - mov edi, dword, ptr, [eax, +, 16] - mov ebx, dword, ptr, [eax, +, 12] - mov ebp, dword, ptr, [eax, +, 8] - mov edx, dword, ptr, [eax, +, 4] - mov ecx, dword, ptr, [eax] - mov eax, dword, ptr, [esp, +, 28] - mov dword, ptr, [eax], ecx - mov dword, ptr, [eax, +, 4], edx - mov dword, ptr, [eax, +, 8], ebp - mov dword, ptr, [eax, +, 12], ebx - mov dword, ptr, [eax, +, 16], edi - mov dword, ptr, [eax, +, 20], esi - mov esi, dword, ptr, [esp] - mov edx, dword, ptr, [esp, +, 4] - mov dword, ptr, [eax, +, 24], esi - mov dword, ptr, [eax, +, 28], edx + sub esp, 32 + mov ecx, dword, ptr, [esp, +, 44] + lea esi, [esp, +, 28] + mov eax, dword, ptr, [esp, +, 40] + lea edx, [ecx, +, 28] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [esi], edx + #NO_APP + lea edx, [ecx, +, 24] + lea esi, [esp, +, 24] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [esi], edx + #NO_APP + lea edx, [ecx, +, 20] + lea esi, [esp, +, 20] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [esi], edx + #NO_APP + lea edx, [ecx, +, 16] + lea esi, [esp, +, 16] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [esi], edx + #NO_APP + lea edx, [ecx, +, 12] + lea esi, [esp, +, 12] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [esi], edx + #NO_APP + lea edx, [ecx, +, 8] + lea esi, [esp, +, 8] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [esi], edx + #NO_APP + lea edx, [ecx, +, 4] + lea esi, [esp, +, 4] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [esi], edx + #NO_APP + mov edx, esp + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [edx], ecx + #NO_APP + movsd xmm1, qword, ptr, [esp, +, 8] + movsd xmm0, qword, ptr, [esp] + movsd xmm2, qword, ptr, [esp, +, 16] + movsd qword, ptr, [eax, +, 8], xmm1 + movsd xmm1, qword, ptr, [esp, +, 24] + movsd qword, ptr, [eax], xmm0 + movsd qword, ptr, [eax, +, 16], xmm2 + movsd qword, ptr, [eax, +, 24], xmm1 #MEMBARRIER - add esp, 8 + add esp, 32 pop esi - pop edi - pop ebx - pop ebp ret 4 asm_test::atomic_memcpy_load_align8::read_volatile_acquire_fence: push ebp diff --git a/tests/asm-test/asm/i686-unknown-linux-gnu/atomic_memcpy_store_align1 b/tests/asm-test/asm/i686-unknown-linux-gnu/atomic_memcpy_store_align1 index efec32c..75fc5c0 100644 --- a/tests/asm-test/asm/i686-unknown-linux-gnu/atomic_memcpy_store_align1 +++ b/tests/asm-test/asm/i686-unknown-linux-gnu/atomic_memcpy_store_align1 @@ -5,18 +5,18 @@ asm_test::atomic_memcpy_store_align1::release: sub esp, 64 mov ecx, dword, ptr, [esp, +, 84] mov eax, dword, ptr, [esp, +, 80] + lea ebx, [esp, +, 31] + lea esi, [esp, +, 24] + lea edx, [esp, +, 16] movsd xmm1, qword, ptr, [ecx, +, 16] movsd xmm0, qword, ptr, [ecx, +, 24] movsd xmm3, qword, ptr, [ecx] - lea esi, [eax, +, 3] - and esi, -4 - mov edx, esi - sub edx, eax + lea edi, [eax, +, 31] movsd qword, ptr, [esp, +, 48], xmm1 movsd xmm1, qword, ptr, [ecx, +, 8] movsd qword, ptr, [esp, +, 56], xmm0 movsd qword, ptr, [esp, +, 32], xmm3 - cmp edx, 33 + lea ecx, [esp, +, 8] movsd qword, ptr, [esp, +, 40], xmm1 #MEMBARRIER movsd xmm2, qword, ptr, [esp, +, 56] @@ -27,109 +27,192 @@ asm_test::atomic_memcpy_store_align1::release: movsd xmm1, qword, ptr, [esp, +, 40] movsd qword, ptr, [esp, +, 8], xmm1 movsd qword, ptr, [esp], xmm2 - jae .LBB2_1 - test edx, edx - je .LBB2_4 - lea ecx, [eax, +, 32] - xor edi, edi -.LBB2_11: - movzx ebx, byte, ptr, [esp, +, edi] - mov byte, ptr, [eax, +, edi], bl - inc edi - cmp edx, edi - jne .LBB2_11 - sub ecx, esi - cmp ecx, 4 - jae .LBB2_5 - jmp .LBB2_7 -.LBB2_1: - mov cl, byte, ptr, [esp] + #APP + mov bl, byte, ptr, [ebx] + mov byte, ptr, [edi], bl + #NO_APP + lea edi, [eax, +, 30] + lea ebx, [esp, +, 30] + #APP + mov bl, byte, ptr, [ebx] + mov byte, ptr, [edi], bl + #NO_APP + lea edi, [eax, +, 29] + lea ebx, [esp, +, 29] + #APP + mov bl, byte, ptr, [ebx] + mov byte, ptr, [edi], bl + #NO_APP + lea edi, [eax, +, 28] + lea ebx, [esp, +, 28] + #APP + mov bl, byte, ptr, [ebx] + mov byte, ptr, [edi], bl + #NO_APP + lea edi, [eax, +, 27] + lea ebx, [esp, +, 27] + #APP + mov bl, byte, ptr, [ebx] + mov byte, ptr, [edi], bl + #NO_APP + lea edi, [eax, +, 26] + lea ebx, [esp, +, 26] + #APP + mov bl, byte, ptr, [ebx] + mov byte, ptr, [edi], bl + #NO_APP + lea edi, [eax, +, 25] + lea ebx, [esp, +, 25] + #APP + mov bl, byte, ptr, [ebx] + mov byte, ptr, [edi], bl + #NO_APP + lea edi, [eax, +, 24] + #APP + mov bl, byte, ptr, [esi] + mov byte, ptr, [edi], bl + #NO_APP + lea esi, [eax, +, 23] + lea edi, [esp, +, 23] + #APP + mov bl, byte, ptr, [edi] + mov byte, ptr, [esi], bl + #NO_APP + lea esi, [eax, +, 22] + lea edi, [esp, +, 22] + #APP + mov bl, byte, ptr, [edi] + mov byte, ptr, [esi], bl + #NO_APP + lea esi, [eax, +, 21] + lea edi, [esp, +, 21] + #APP + mov bl, byte, ptr, [edi] + mov byte, ptr, [esi], bl + #NO_APP + lea esi, [eax, +, 20] + lea edi, [esp, +, 20] + #APP + mov bl, byte, ptr, [edi] + mov byte, ptr, [esi], bl + #NO_APP + lea esi, [eax, +, 19] + lea edi, [esp, +, 19] + #APP + mov bl, byte, ptr, [edi] + mov byte, ptr, [esi], bl + #NO_APP + lea esi, [eax, +, 18] + lea edi, [esp, +, 18] + #APP + mov bl, byte, ptr, [edi] + mov byte, ptr, [esi], bl + #NO_APP + lea esi, [eax, +, 17] + lea edi, [esp, +, 17] + #APP + mov bl, byte, ptr, [edi] + mov byte, ptr, [esi], bl + #NO_APP + lea esi, [eax, +, 16] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [esi], dl + #NO_APP + lea edx, [eax, +, 15] + lea esi, [esp, +, 15] + #APP + mov bl, byte, ptr, [esi] + mov byte, ptr, [edx], bl + #NO_APP + lea edx, [eax, +, 14] + lea esi, [esp, +, 14] + #APP + mov bl, byte, ptr, [esi] + mov byte, ptr, [edx], bl + #NO_APP + lea edx, [eax, +, 13] + lea esi, [esp, +, 13] + #APP + mov bl, byte, ptr, [esi] + mov byte, ptr, [edx], bl + #NO_APP + lea edx, [eax, +, 12] + lea esi, [esp, +, 12] + #APP + mov bl, byte, ptr, [esi] + mov byte, ptr, [edx], bl + #NO_APP + lea edx, [eax, +, 11] + lea esi, [esp, +, 11] + #APP + mov bl, byte, ptr, [esi] + mov byte, ptr, [edx], bl + #NO_APP + lea edx, [eax, +, 10] + lea esi, [esp, +, 10] + #APP + mov bl, byte, ptr, [esi] + mov byte, ptr, [edx], bl + #NO_APP + lea edx, [eax, +, 9] + lea esi, [esp, +, 9] + #APP + mov bl, byte, ptr, [esi] + mov byte, ptr, [edx], bl + #NO_APP + lea edx, [eax, +, 8] + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [edx], cl + #NO_APP + lea ecx, [eax, +, 7] + lea edx, [esp, +, 7] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [ecx], dl + #NO_APP + lea ecx, [eax, +, 6] + lea edx, [esp, +, 6] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [ecx], dl + #NO_APP + lea ecx, [eax, +, 5] + lea edx, [esp, +, 5] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [ecx], dl + #NO_APP + lea ecx, [eax, +, 4] + lea edx, [esp, +, 4] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [ecx], dl + #NO_APP + lea ecx, [eax, +, 3] + lea edx, [esp, +, 3] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [ecx], dl + #NO_APP + lea ecx, [eax, +, 2] + lea edx, [esp, +, 2] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [ecx], dl + #NO_APP + lea ecx, [eax, +, 1] + lea edx, [esp, +, 1] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [ecx], dl + #NO_APP + mov ecx, esp + #APP + mov cl, byte, ptr, [ecx] mov byte, ptr, [eax], cl - mov cl, byte, ptr, [esp, +, 1] - mov byte, ptr, [eax, +, 1], cl - mov cl, byte, ptr, [esp, +, 2] - mov byte, ptr, [eax, +, 2], cl - mov cl, byte, ptr, [esp, +, 3] - mov byte, ptr, [eax, +, 3], cl - mov cl, byte, ptr, [esp, +, 4] - mov byte, ptr, [eax, +, 4], cl - mov cl, byte, ptr, [esp, +, 5] - mov byte, ptr, [eax, +, 5], cl - mov cl, byte, ptr, [esp, +, 6] - mov byte, ptr, [eax, +, 6], cl - mov cl, byte, ptr, [esp, +, 7] - mov byte, ptr, [eax, +, 7], cl - mov cl, byte, ptr, [esp, +, 8] - mov byte, ptr, [eax, +, 8], cl - mov cl, byte, ptr, [esp, +, 9] - mov byte, ptr, [eax, +, 9], cl - mov cl, byte, ptr, [esp, +, 10] - mov byte, ptr, [eax, +, 10], cl - mov cl, byte, ptr, [esp, +, 11] - mov byte, ptr, [eax, +, 11], cl - mov cl, byte, ptr, [esp, +, 12] - mov byte, ptr, [eax, +, 12], cl - mov cl, byte, ptr, [esp, +, 13] - mov byte, ptr, [eax, +, 13], cl - mov cl, byte, ptr, [esp, +, 14] - mov byte, ptr, [eax, +, 14], cl - mov cl, byte, ptr, [esp, +, 15] - mov byte, ptr, [eax, +, 15], cl - mov cl, byte, ptr, [esp, +, 16] - mov byte, ptr, [eax, +, 16], cl - mov cl, byte, ptr, [esp, +, 17] - mov byte, ptr, [eax, +, 17], cl - mov cl, byte, ptr, [esp, +, 18] - mov byte, ptr, [eax, +, 18], cl - mov cl, byte, ptr, [esp, +, 19] - mov byte, ptr, [eax, +, 19], cl - mov cl, byte, ptr, [esp, +, 20] - mov byte, ptr, [eax, +, 20], cl - mov cl, byte, ptr, [esp, +, 21] - mov byte, ptr, [eax, +, 21], cl - mov cl, byte, ptr, [esp, +, 22] - mov byte, ptr, [eax, +, 22], cl - mov cl, byte, ptr, [esp, +, 23] - mov byte, ptr, [eax, +, 23], cl - mov cl, byte, ptr, [esp, +, 24] - mov byte, ptr, [eax, +, 24], cl - mov cl, byte, ptr, [esp, +, 25] - mov byte, ptr, [eax, +, 25], cl - mov cl, byte, ptr, [esp, +, 26] - mov byte, ptr, [eax, +, 26], cl - mov cl, byte, ptr, [esp, +, 27] - mov byte, ptr, [eax, +, 27], cl - mov cl, byte, ptr, [esp, +, 28] - mov byte, ptr, [eax, +, 28], cl - mov cl, byte, ptr, [esp, +, 29] - mov byte, ptr, [eax, +, 29], cl - mov cl, byte, ptr, [esp, +, 30] - mov byte, ptr, [eax, +, 30], cl - mov cl, byte, ptr, [esp, +, 31] - mov byte, ptr, [eax, +, 31], cl - jmp .LBB2_2 -.LBB2_4: - mov ecx, 32 -.LBB2_5: - mov esi, dword, ptr, [esp, +, edx] - add ecx, -4 - mov dword, ptr, [eax, +, edx], esi - add edx, 4 - cmp ecx, 3 - ja .LBB2_5 -.LBB2_7: - test ecx, ecx - je .LBB2_2 - add eax, edx - add edx, esp - xor esi, esi -.LBB2_9: - movzx ebx, byte, ptr, [edx, +, esi] - mov byte, ptr, [eax, +, esi], bl - inc esi - cmp ecx, esi - jne .LBB2_9 -.LBB2_2: + #NO_APP add esp, 64 pop esi pop edi diff --git a/tests/asm-test/asm/i686-unknown-linux-gnu/atomic_memcpy_store_align16 b/tests/asm-test/asm/i686-unknown-linux-gnu/atomic_memcpy_store_align16 index 0613d06..1799a51 100644 --- a/tests/asm-test/asm/i686-unknown-linux-gnu/atomic_memcpy_store_align16 +++ b/tests/asm-test/asm/i686-unknown-linux-gnu/atomic_memcpy_store_align16 @@ -1,37 +1,66 @@ asm_test::atomic_memcpy_store_align16::release: - push ebp - push ebx - push edi push esi - sub esp, 8 - mov ecx, dword, ptr, [esp, +, 32] - mov eax, dword, ptr, [ecx] - mov esi, dword, ptr, [ecx, +, 8] - mov edi, dword, ptr, [ecx, +, 12] - mov ebx, dword, ptr, [ecx, +, 16] - mov ebp, dword, ptr, [ecx, +, 20] - mov edx, dword, ptr, [ecx, +, 24] - mov dword, ptr, [esp, +, 4], eax - mov eax, dword, ptr, [ecx, +, 4] - mov ecx, dword, ptr, [ecx, +, 28] - mov dword, ptr, [esp], eax - mov eax, dword, ptr, [esp, +, 28] + sub esp, 72 + mov ecx, dword, ptr, [esp, +, 84] + mov eax, dword, ptr, [esp, +, 80] + lea esi, [esp, +, 28] + movaps xmm0, xmmword, ptr, [ecx] + movaps xmm1, xmmword, ptr, [ecx, +, 16] + lea edx, [eax, +, 28] + lea ecx, [esp, +, 16] + movaps xmmword, ptr, [esp, +, 48], xmm1 + movaps xmmword, ptr, [esp, +, 32], xmm0 #MEMBARRIER - mov dword, ptr, [eax, +, 28], ecx - mov dword, ptr, [eax, +, 24], edx - mov dword, ptr, [eax, +, 20], ebp - mov dword, ptr, [eax, +, 16], ebx - mov dword, ptr, [eax, +, 12], edi - mov dword, ptr, [eax, +, 8], esi - mov ecx, dword, ptr, [esp] - mov dword, ptr, [eax, +, 4], ecx - mov ecx, dword, ptr, [esp, +, 4] + movaps xmm2, xmmword, ptr, [esp, +, 32] + movaps xmm1, xmmword, ptr, [esp, +, 48] + movaps xmmword, ptr, [esp, +, 16], xmm1 + movaps xmmword, ptr, [esp], xmm2 + #APP + mov esi, dword, ptr, [esi] + mov dword, ptr, [edx], esi + #NO_APP + lea edx, [eax, +, 24] + lea esi, [esp, +, 24] + #APP + mov esi, dword, ptr, [esi] + mov dword, ptr, [edx], esi + #NO_APP + lea edx, [eax, +, 20] + lea esi, [esp, +, 20] + #APP + mov esi, dword, ptr, [esi] + mov dword, ptr, [edx], esi + #NO_APP + lea edx, [eax, +, 16] + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [edx], ecx + #NO_APP + lea ecx, [eax, +, 12] + lea edx, [esp, +, 12] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [ecx], edx + #NO_APP + lea ecx, [eax, +, 8] + lea edx, [esp, +, 8] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [ecx], edx + #NO_APP + lea ecx, [eax, +, 4] + lea edx, [esp, +, 4] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [ecx], edx + #NO_APP + mov ecx, esp + #APP + mov ecx, dword, ptr, [ecx] mov dword, ptr, [eax], ecx - add esp, 8 + #NO_APP + add esp, 72 pop esi - pop edi - pop ebx - pop ebp ret asm_test::atomic_memcpy_store_align16::write_volatile_release_fence: sub esp, 44 diff --git a/tests/asm-test/asm/i686-unknown-linux-gnu/atomic_memcpy_store_align2 b/tests/asm-test/asm/i686-unknown-linux-gnu/atomic_memcpy_store_align2 index c8bb485..9bf7839 100644 --- a/tests/asm-test/asm/i686-unknown-linux-gnu/atomic_memcpy_store_align2 +++ b/tests/asm-test/asm/i686-unknown-linux-gnu/atomic_memcpy_store_align2 @@ -5,18 +5,18 @@ asm_test::atomic_memcpy_store_align2::release: sub esp, 64 mov ecx, dword, ptr, [esp, +, 84] mov eax, dword, ptr, [esp, +, 80] + lea ebx, [esp, +, 30] + lea esi, [esp, +, 24] + lea edx, [esp, +, 16] movsd xmm1, qword, ptr, [ecx, +, 16] movsd xmm0, qword, ptr, [ecx, +, 24] movsd xmm3, qword, ptr, [ecx] - lea esi, [eax, +, 3] - and esi, -4 - mov edx, esi - sub edx, eax + lea edi, [eax, +, 30] movsd qword, ptr, [esp, +, 48], xmm1 movsd xmm1, qword, ptr, [ecx, +, 8] movsd qword, ptr, [esp, +, 56], xmm0 movsd qword, ptr, [esp, +, 32], xmm3 - cmp edx, 32 + lea ecx, [esp, +, 8] movsd qword, ptr, [esp, +, 40], xmm1 #MEMBARRIER movsd xmm2, qword, ptr, [esp, +, 56] @@ -27,77 +27,96 @@ asm_test::atomic_memcpy_store_align2::release: movsd xmm1, qword, ptr, [esp, +, 40] movsd qword, ptr, [esp, +, 8], xmm1 movsd qword, ptr, [esp], xmm2 - ja .LBB6_10 - test edx, edx - je .LBB6_2 - lea ecx, [eax, +, 32] - xor edi, edi -.LBB6_9: - movzx ebx, byte, ptr, [esp, +, edi] - mov byte, ptr, [eax, +, edi], bl - inc edi - cmp edx, edi - jne .LBB6_9 - sub ecx, esi - cmp ecx, 4 - jae .LBB6_3 - jmp .LBB6_5 -.LBB6_10: - movzx ecx, word, ptr, [esp, +, 30] - mov word, ptr, [eax, +, 30], cx - movzx ecx, word, ptr, [esp, +, 28] - mov word, ptr, [eax, +, 28], cx - movzx ecx, word, ptr, [esp, +, 26] - mov word, ptr, [eax, +, 26], cx - movzx ecx, word, ptr, [esp, +, 24] - mov word, ptr, [eax, +, 24], cx - movzx ecx, word, ptr, [esp, +, 22] - mov word, ptr, [eax, +, 22], cx - movzx ecx, word, ptr, [esp, +, 20] - mov word, ptr, [eax, +, 20], cx - movzx ecx, word, ptr, [esp, +, 18] - mov word, ptr, [eax, +, 18], cx - movzx ecx, word, ptr, [esp, +, 16] - mov word, ptr, [eax, +, 16], cx - movzx ecx, word, ptr, [esp, +, 14] - mov word, ptr, [eax, +, 14], cx - movzx ecx, word, ptr, [esp, +, 12] - mov word, ptr, [eax, +, 12], cx - movzx ecx, word, ptr, [esp, +, 10] - mov word, ptr, [eax, +, 10], cx - movzx ecx, word, ptr, [esp, +, 8] - mov word, ptr, [eax, +, 8], cx - movzx ecx, word, ptr, [esp, +, 6] - mov word, ptr, [eax, +, 6], cx - movzx ecx, word, ptr, [esp, +, 4] - mov word, ptr, [eax, +, 4], cx - movzx ecx, word, ptr, [esp, +, 2] - mov word, ptr, [eax, +, 2], cx - movzx ecx, word, ptr, [esp] + #APP + mov bx, word, ptr, [ebx] + mov word, ptr, [edi], bx + #NO_APP + lea edi, [eax, +, 28] + lea ebx, [esp, +, 28] + #APP + mov bx, word, ptr, [ebx] + mov word, ptr, [edi], bx + #NO_APP + lea edi, [eax, +, 26] + lea ebx, [esp, +, 26] + #APP + mov bx, word, ptr, [ebx] + mov word, ptr, [edi], bx + #NO_APP + lea edi, [eax, +, 24] + #APP + mov si, word, ptr, [esi] + mov word, ptr, [edi], si + #NO_APP + lea esi, [eax, +, 22] + lea edi, [esp, +, 22] + #APP + mov di, word, ptr, [edi] + mov word, ptr, [esi], di + #NO_APP + lea esi, [eax, +, 20] + lea edi, [esp, +, 20] + #APP + mov di, word, ptr, [edi] + mov word, ptr, [esi], di + #NO_APP + lea esi, [eax, +, 18] + lea edi, [esp, +, 18] + #APP + mov di, word, ptr, [edi] + mov word, ptr, [esi], di + #NO_APP + lea esi, [eax, +, 16] + #APP + mov dx, word, ptr, [edx] + mov word, ptr, [esi], dx + #NO_APP + lea edx, [eax, +, 14] + lea esi, [esp, +, 14] + #APP + mov si, word, ptr, [esi] + mov word, ptr, [edx], si + #NO_APP + lea edx, [eax, +, 12] + lea esi, [esp, +, 12] + #APP + mov si, word, ptr, [esi] + mov word, ptr, [edx], si + #NO_APP + lea edx, [eax, +, 10] + lea esi, [esp, +, 10] + #APP + mov si, word, ptr, [esi] + mov word, ptr, [edx], si + #NO_APP + lea edx, [eax, +, 8] + #APP + mov cx, word, ptr, [ecx] + mov word, ptr, [edx], cx + #NO_APP + lea ecx, [eax, +, 6] + lea edx, [esp, +, 6] + #APP + mov dx, word, ptr, [edx] + mov word, ptr, [ecx], dx + #NO_APP + lea ecx, [eax, +, 4] + lea edx, [esp, +, 4] + #APP + mov dx, word, ptr, [edx] + mov word, ptr, [ecx], dx + #NO_APP + lea ecx, [eax, +, 2] + lea edx, [esp, +, 2] + #APP + mov dx, word, ptr, [edx] + mov word, ptr, [ecx], dx + #NO_APP + mov ecx, esp + #APP + mov cx, word, ptr, [ecx] mov word, ptr, [eax], cx - jmp .LBB6_11 -.LBB6_2: - mov ecx, 32 -.LBB6_3: - mov esi, dword, ptr, [esp, +, edx] - add ecx, -4 - mov dword, ptr, [eax, +, edx], esi - add edx, 4 - cmp ecx, 3 - ja .LBB6_3 -.LBB6_5: - test ecx, ecx - je .LBB6_11 - add eax, edx - add edx, esp - xor esi, esi -.LBB6_7: - movzx ebx, byte, ptr, [edx, +, esi] - mov byte, ptr, [eax, +, esi], bl - inc esi - cmp ecx, esi - jne .LBB6_7 -.LBB6_11: + #NO_APP add esp, 64 pop esi pop edi diff --git a/tests/asm-test/asm/i686-unknown-linux-gnu/atomic_memcpy_store_align4 b/tests/asm-test/asm/i686-unknown-linux-gnu/atomic_memcpy_store_align4 index e272d23..841c5f3 100644 --- a/tests/asm-test/asm/i686-unknown-linux-gnu/atomic_memcpy_store_align4 +++ b/tests/asm-test/asm/i686-unknown-linux-gnu/atomic_memcpy_store_align4 @@ -1,37 +1,78 @@ asm_test::atomic_memcpy_store_align4::release: - push ebp push ebx push edi push esi - sub esp, 8 - mov ecx, dword, ptr, [esp, +, 32] - mov eax, dword, ptr, [ecx] - mov esi, dword, ptr, [ecx, +, 8] - mov edi, dword, ptr, [ecx, +, 12] - mov ebx, dword, ptr, [ecx, +, 16] - mov ebp, dword, ptr, [ecx, +, 20] - mov edx, dword, ptr, [ecx, +, 24] - mov dword, ptr, [esp, +, 4], eax - mov eax, dword, ptr, [ecx, +, 4] - mov ecx, dword, ptr, [ecx, +, 28] - mov dword, ptr, [esp], eax - mov eax, dword, ptr, [esp, +, 28] + sub esp, 64 + mov ecx, dword, ptr, [esp, +, 84] + mov eax, dword, ptr, [esp, +, 80] + lea ebx, [esp, +, 28] + lea esi, [esp, +, 24] + lea edx, [esp, +, 16] + movsd xmm1, qword, ptr, [ecx, +, 16] + movsd xmm0, qword, ptr, [ecx, +, 24] + movsd xmm3, qword, ptr, [ecx] + lea edi, [eax, +, 28] + movsd qword, ptr, [esp, +, 48], xmm1 + movsd xmm1, qword, ptr, [ecx, +, 8] + movsd qword, ptr, [esp, +, 56], xmm0 + movsd qword, ptr, [esp, +, 32], xmm3 + lea ecx, [esp, +, 8] + movsd qword, ptr, [esp, +, 40], xmm1 #MEMBARRIER - mov dword, ptr, [eax, +, 28], ecx - mov dword, ptr, [eax, +, 24], edx - mov dword, ptr, [eax, +, 20], ebp - mov dword, ptr, [eax, +, 16], ebx - mov dword, ptr, [eax, +, 12], edi - mov dword, ptr, [eax, +, 8], esi - mov ecx, dword, ptr, [esp] - mov dword, ptr, [eax, +, 4], ecx - mov ecx, dword, ptr, [esp, +, 4] + movsd xmm2, qword, ptr, [esp, +, 56] + movsd xmm1, qword, ptr, [esp, +, 48] + movsd qword, ptr, [esp, +, 24], xmm2 + movsd qword, ptr, [esp, +, 16], xmm1 + movsd xmm2, qword, ptr, [esp, +, 32] + movsd xmm1, qword, ptr, [esp, +, 40] + movsd qword, ptr, [esp, +, 8], xmm1 + movsd qword, ptr, [esp], xmm2 + #APP + mov ebx, dword, ptr, [ebx] + mov dword, ptr, [edi], ebx + #NO_APP + lea edi, [eax, +, 24] + #APP + mov esi, dword, ptr, [esi] + mov dword, ptr, [edi], esi + #NO_APP + lea esi, [eax, +, 20] + lea edi, [esp, +, 20] + #APP + mov edi, dword, ptr, [edi] + mov dword, ptr, [esi], edi + #NO_APP + lea esi, [eax, +, 16] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [esi], edx + #NO_APP + lea edx, [eax, +, 12] + lea esi, [esp, +, 12] + #APP + mov esi, dword, ptr, [esi] + mov dword, ptr, [edx], esi + #NO_APP + lea edx, [eax, +, 8] + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [edx], ecx + #NO_APP + lea ecx, [eax, +, 4] + lea edx, [esp, +, 4] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [ecx], edx + #NO_APP + mov ecx, esp + #APP + mov ecx, dword, ptr, [ecx] mov dword, ptr, [eax], ecx - add esp, 8 + #NO_APP + add esp, 64 pop esi pop edi pop ebx - pop ebp ret asm_test::atomic_memcpy_store_align4::write_volatile_release_fence: sub esp, 32 diff --git a/tests/asm-test/asm/i686-unknown-linux-gnu/atomic_memcpy_store_align8 b/tests/asm-test/asm/i686-unknown-linux-gnu/atomic_memcpy_store_align8 index a360495..a92bacd 100644 --- a/tests/asm-test/asm/i686-unknown-linux-gnu/atomic_memcpy_store_align8 +++ b/tests/asm-test/asm/i686-unknown-linux-gnu/atomic_memcpy_store_align8 @@ -1,37 +1,78 @@ asm_test::atomic_memcpy_store_align8::release: - push ebp push ebx push edi push esi - sub esp, 8 - mov ecx, dword, ptr, [esp, +, 32] - mov eax, dword, ptr, [ecx] - mov esi, dword, ptr, [ecx, +, 8] - mov edi, dword, ptr, [ecx, +, 12] - mov ebx, dword, ptr, [ecx, +, 16] - mov ebp, dword, ptr, [ecx, +, 20] - mov edx, dword, ptr, [ecx, +, 24] - mov dword, ptr, [esp, +, 4], eax - mov eax, dword, ptr, [ecx, +, 4] - mov ecx, dword, ptr, [ecx, +, 28] - mov dword, ptr, [esp], eax - mov eax, dword, ptr, [esp, +, 28] + sub esp, 64 + mov ecx, dword, ptr, [esp, +, 84] + mov eax, dword, ptr, [esp, +, 80] + lea ebx, [esp, +, 28] + lea esi, [esp, +, 24] + lea edx, [esp, +, 16] + movsd xmm1, qword, ptr, [ecx, +, 16] + movsd xmm0, qword, ptr, [ecx, +, 24] + movsd xmm3, qword, ptr, [ecx] + lea edi, [eax, +, 28] + movsd qword, ptr, [esp, +, 48], xmm1 + movsd xmm1, qword, ptr, [ecx, +, 8] + movsd qword, ptr, [esp, +, 56], xmm0 + movsd qword, ptr, [esp, +, 32], xmm3 + lea ecx, [esp, +, 8] + movsd qword, ptr, [esp, +, 40], xmm1 #MEMBARRIER - mov dword, ptr, [eax, +, 28], ecx - mov dword, ptr, [eax, +, 24], edx - mov dword, ptr, [eax, +, 20], ebp - mov dword, ptr, [eax, +, 16], ebx - mov dword, ptr, [eax, +, 12], edi - mov dword, ptr, [eax, +, 8], esi - mov ecx, dword, ptr, [esp] - mov dword, ptr, [eax, +, 4], ecx - mov ecx, dword, ptr, [esp, +, 4] + movsd xmm2, qword, ptr, [esp, +, 56] + movsd xmm1, qword, ptr, [esp, +, 48] + movsd qword, ptr, [esp, +, 24], xmm2 + movsd qword, ptr, [esp, +, 16], xmm1 + movsd xmm2, qword, ptr, [esp, +, 32] + movsd xmm1, qword, ptr, [esp, +, 40] + movsd qword, ptr, [esp, +, 8], xmm1 + movsd qword, ptr, [esp], xmm2 + #APP + mov ebx, dword, ptr, [ebx] + mov dword, ptr, [edi], ebx + #NO_APP + lea edi, [eax, +, 24] + #APP + mov esi, dword, ptr, [esi] + mov dword, ptr, [edi], esi + #NO_APP + lea esi, [eax, +, 20] + lea edi, [esp, +, 20] + #APP + mov edi, dword, ptr, [edi] + mov dword, ptr, [esi], edi + #NO_APP + lea esi, [eax, +, 16] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [esi], edx + #NO_APP + lea edx, [eax, +, 12] + lea esi, [esp, +, 12] + #APP + mov esi, dword, ptr, [esi] + mov dword, ptr, [edx], esi + #NO_APP + lea edx, [eax, +, 8] + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [edx], ecx + #NO_APP + lea ecx, [eax, +, 4] + lea edx, [esp, +, 4] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [ecx], edx + #NO_APP + mov ecx, esp + #APP + mov ecx, dword, ptr, [ecx] mov dword, ptr, [eax], ecx - add esp, 8 + #NO_APP + add esp, 64 pop esi pop edi pop ebx - pop ebp ret asm_test::atomic_memcpy_store_align8::write_volatile_release_fence: sub esp, 36 diff --git a/tests/asm-test/asm/mips-unknown-linux-gnu/atomic_memcpy_load_align1 b/tests/asm-test/asm/mips-unknown-linux-gnu/atomic_memcpy_load_align1 index ece3ecf..17009b9 100644 --- a/tests/asm-test/asm/mips-unknown-linux-gnu/atomic_memcpy_load_align1 +++ b/tests/asm-test/asm/mips-unknown-linux-gnu/atomic_memcpy_load_align1 @@ -1,182 +1,248 @@ asm_test::atomic_memcpy_load_align1::acquire: - addiu $sp, $sp, -32 - addiu $1, $5, 3 - addiu $2, $zero, -4 - and $3, $1, $2 - subu $2, $3, $5 - sltiu $1, $2, 33 - beqz $1, $BB0_6 - nop - beqz $2, $BB0_7 - nop - addiu $6, $sp, 0 - subu $8, $5, $3 - addiu $7, $5, 32 - move $9, $5 -$BB0_3: - lb $1, 0($9) - addiu $9, $9, 1 - sb $1, 0($6) - addiu $1, $8, 1 - addiu $6, $6, 1 - sltu $10, $1, $8 - beqz $10, $BB0_3 - move $8, $1 - subu $3, $7, $3 - sltiu $1, $3, 4 - bnez $1, $BB0_10 - nop - b $BB0_8 - nop -$BB0_6: - lb $1, 0($5) - sb $1, 0($sp) - lb $1, 1($5) - sb $1, 1($sp) - lb $1, 2($5) - sb $1, 2($sp) - lb $1, 3($5) - sb $1, 3($sp) - lb $1, 4($5) - sb $1, 4($sp) - lb $1, 5($5) - sb $1, 5($sp) - lb $1, 6($5) - sb $1, 6($sp) - lb $1, 7($5) - sb $1, 7($sp) - lb $1, 8($5) - sb $1, 8($sp) - lb $1, 9($5) - sb $1, 9($sp) - lb $1, 10($5) - sb $1, 10($sp) - lb $1, 11($5) - sb $1, 11($sp) - lb $1, 12($5) - sb $1, 12($sp) - lb $1, 13($5) - sb $1, 13($sp) - lb $1, 14($5) - sb $1, 14($sp) - lb $1, 15($5) - sb $1, 15($sp) - lb $1, 16($5) - sb $1, 16($sp) - lb $1, 17($5) - sb $1, 17($sp) - lb $1, 18($5) - sb $1, 18($sp) - lb $1, 19($5) - sb $1, 19($sp) - lb $1, 20($5) - sb $1, 20($sp) - lb $1, 21($5) - sb $1, 21($sp) - lb $1, 22($5) - sb $1, 22($sp) - lb $1, 23($5) - sb $1, 23($sp) - lb $1, 24($5) - sb $1, 24($sp) - lb $1, 25($5) - sb $1, 25($sp) - lb $1, 26($5) - sb $1, 26($sp) - lb $1, 27($5) - sb $1, 27($sp) - lb $1, 28($5) - sb $1, 28($sp) - lb $1, 29($5) - sb $1, 29($sp) - lb $1, 30($5) - sb $1, 30($sp) - lb $1, 31($5) - sb $1, 31($sp) - lw $1, 12($sp) - lw $2, 20($sp) - lw $3, 24($sp) - lw $5, 4($sp) - lw $6, 8($sp) + addiu $sp, $sp, -80 + sw $ra, 76($sp) + sw $fp, 72($sp) + sw $23, 68($sp) + sw $22, 64($sp) + sw $21, 60($sp) + sw $20, 56($sp) + sw $19, 52($sp) + sw $18, 48($sp) + sw $17, 44($sp) + sw $16, 40($sp) + addiu $1, $sp, 8 + #APP + lb $2, 0($5) + sb $2, 0($1) + #NO_APP + addiu $10, $5, 5 + addiu $9, $5, 6 + addiu $13, $5, 7 + addiu $gp, $5, 8 + addiu $12, $5, 9 + addiu $25, $5, 10 + addiu $24, $5, 11 + addiu $15, $5, 12 + addiu $14, $5, 13 + addiu $17, $5, 14 + addiu $19, $5, 15 + addiu $16, $5, 16 + addiu $18, $5, 17 + addiu $7, $5, 28 + addiu $8, $5, 27 + addiu $20, $5, 26 + addiu $21, $5, 25 + addiu $22, $5, 24 + addiu $23, $5, 23 + addiu $fp, $5, 22 + addiu $ra, $5, 21 + addiu $2, $5, 1 + ori $3, $1, 1 + #APP + lb $6, 0($2) + sb $6, 0($3) + #NO_APP + ori $11, $1, 5 + addiu $2, $5, 2 + ori $3, $1, 2 + #APP + lb $6, 0($2) + sb $6, 0($3) + #NO_APP + addiu $2, $5, 3 + ori $3, $1, 3 + #APP + lb $6, 0($2) + sb $6, 0($3) + #NO_APP + addiu $2, $5, 4 + ori $3, $1, 4 + #APP + lb $6, 0($2) + sb $6, 0($3) + #NO_APP + addiu $2, $5, 31 + addiu $6, $5, 29 + sw $2, 4($sp) + addiu $2, $5, 30 + sw $2, 0($sp) + addiu $2, $5, 20 + #APP + lb $3, 0($10) + sb $3, 0($11) + #NO_APP + ori $10, $1, 6 + #APP + lb $11, 0($9) + sb $11, 0($10) + #NO_APP + ori $10, $1, 7 + addiu $9, $1, 9 + addiu $3, $5, 19 + addiu $5, $5, 18 + #APP + lb $11, 0($13) + sb $11, 0($10) + #NO_APP + addiu $10, $1, 8 + #APP + lb $13, 0($gp) + sb $13, 0($10) + #NO_APP + addiu $10, $1, 14 + addiu $11, $1, 15 + #APP + lb $gp, 0($12) + sb $gp, 0($9) + #NO_APP + addiu $12, $1, 10 + addiu $9, $1, 12 + addiu $13, $1, 13 + #APP + lb $gp, 0($25) + sb $gp, 0($12) + #NO_APP + addiu $12, $1, 11 + addiu $25, $1, 17 + #APP + lb $gp, 0($24) + sb $gp, 0($12) + #NO_APP + addiu $12, $1, 16 + addiu $24, $1, 31 + #APP + lb $gp, 0($15) + sb $gp, 0($9) + #NO_APP + addiu $9, $1, 30 + addiu $15, $1, 29 + #APP + lb $gp, 0($14) + sb $gp, 0($13) + #NO_APP + addiu $13, $1, 28 + addiu $14, $1, 27 + #APP + lb $gp, 0($17) + sb $gp, 0($10) + #NO_APP + addiu $10, $1, 26 + #APP + lb $17, 0($19) + sb $17, 0($11) + #NO_APP + addiu $gp, $1, 25 + addiu $11, $1, 24 + #APP + lb $19, 0($16) + sb $19, 0($12) + #NO_APP + addiu $17, $1, 23 + addiu $12, $1, 22 + addiu $16, $1, 21 + #APP + lb $19, 0($18) + sb $19, 0($25) + #NO_APP + addiu $25, $1, 20 + addiu $18, $1, 19 + addiu $1, $1, 18 + #APP + lb $19, 0($5) + sb $19, 0($1) + #NO_APP + #APP + lb $1, 0($3) + sb $1, 0($18) + #NO_APP + #APP + lb $1, 0($2) + sb $1, 0($25) + #NO_APP + #APP + lb $1, 0($ra) + sb $1, 0($16) + #NO_APP + #APP + lb $1, 0($fp) + sb $1, 0($12) + #NO_APP + #APP + lb $1, 0($23) + sb $1, 0($17) + #NO_APP + #APP + lb $1, 0($22) + sb $1, 0($11) + #NO_APP + #APP + lb $1, 0($21) + sb $1, 0($gp) + #NO_APP + #APP + lb $1, 0($20) + sb $1, 0($10) + #NO_APP + #APP + lb $1, 0($8) + sb $1, 0($14) + #NO_APP + #APP + lb $1, 0($7) + sb $1, 0($13) + #NO_APP + #APP + lb $1, 0($6) + sb $1, 0($15) + #NO_APP + lw $2, 0($sp) + #APP + lb $1, 0($2) + sb $1, 0($9) + #NO_APP + lw $2, 4($sp) + #APP + lb $1, 0($2) + sb $1, 0($24) + #NO_APP + lw $1, 8($sp) + lw $2, 24($sp) + lw $3, 20($sp) + lw $5, 12($sp) + lw $6, 28($sp) lw $7, 16($sp) - lw $8, 28($sp) - lw $9, 0($sp) - swl $3, 24($4) - swl $2, 20($4) - swl $7, 16($4) - swl $1, 12($4) - swl $6, 8($4) + lw $8, 36($sp) + lw $9, 32($sp) + swl $1, 0($4) swl $5, 4($4) - swl $9, 0($4) + swl $7, 8($4) + swl $3, 12($4) + swl $2, 16($4) + swl $6, 20($4) + swl $9, 24($4) swl $8, 28($4) - swr $3, 27($4) - swr $2, 23($4) - swr $7, 19($4) - swr $1, 15($4) - swr $6, 11($4) + swr $2, 19($4) + swr $1, 3($4) swr $5, 7($4) - swr $9, 3($4) + swr $7, 11($4) + swr $3, 15($4) + swr $6, 23($4) + swr $9, 27($4) swr $8, 31($4) sync + lw $16, 40($sp) + lw $17, 44($sp) + lw $18, 48($sp) + lw $19, 52($sp) + lw $20, 56($sp) + lw $21, 60($sp) + lw $22, 64($sp) + lw $23, 68($sp) + lw $fp, 72($sp) + lw $ra, 76($sp) move $2, $4 jr $ra - addiu $sp, $sp, 32 -$BB0_7: - addiu $3, $zero, 32 -$BB0_8: - addiu $6, $sp, 0 -$BB0_9: - addu $7, $5, $2 - addu $1, $6, $2 - addiu $3, $3, -4 - lw $7, 0($7) - swl $7, 0($1) - swr $7, 3($1) - sltiu $1, $3, 4 - beqz $1, $BB0_9 - addiu $2, $2, 4 -$BB0_10: - beqz $3, $BB0_13 - nop - addiu $1, $sp, 0 - addu $5, $5, $2 - addu $2, $1, $2 -$BB0_12: - lb $1, 0($5) - addiu $5, $5, 1 - addiu $3, $3, -1 - sb $1, 0($2) - bnez $3, $BB0_12 - addiu $2, $2, 1 -$BB0_13: - lw $1, 28($sp) - lw $2, 12($sp) - lw $3, 16($sp) - lw $5, 24($sp) - lw $6, 8($sp) - lw $7, 20($sp) - lw $8, 0($sp) - lw $9, 4($sp) - swl $1, 28($4) - swl $5, 24($4) - swl $7, 20($4) - swl $3, 16($4) - swl $2, 12($4) - swl $6, 8($4) - swl $9, 4($4) - swl $8, 0($4) - swr $1, 31($4) - swr $5, 27($4) - swr $7, 23($4) - swr $3, 19($4) - swr $2, 15($4) - swr $6, 11($4) - swr $9, 7($4) - swr $8, 3($4) - sync - move $2, $4 - jr $ra - addiu $sp, $sp, 32 + addiu $sp, $sp, 80 $func_end0: asm_test::atomic_memcpy_load_align1::read_volatile_acquire_fence: addiu $sp, $sp, -64 diff --git a/tests/asm-test/asm/mips-unknown-linux-gnu/atomic_memcpy_load_align16 b/tests/asm-test/asm/mips-unknown-linux-gnu/atomic_memcpy_load_align16 index 89dd7c7..850a2ee 100644 --- a/tests/asm-test/asm/mips-unknown-linux-gnu/atomic_memcpy_load_align16 +++ b/tests/asm-test/asm/mips-unknown-linux-gnu/atomic_memcpy_load_align16 @@ -1,24 +1,80 @@ asm_test::atomic_memcpy_load_align16::acquire: - lw $1, 0($5) - lw $2, 4($5) - lw $3, 8($5) - lw $6, 12($5) - lw $7, 16($5) - lw $8, 20($5) - lw $9, 24($5) - lw $5, 28($5) - sw $5, 28($4) - sw $9, 24($4) - sw $8, 20($4) - sw $7, 16($4) - sw $6, 12($4) + addiu $sp, $sp, -48 + sw $ra, 44($sp) + sw $fp, 40($sp) + move $fp, $sp + addiu $1, $zero, -16 + and $sp, $sp, $1 + addiu $1, $sp, 0 + #APP + lw $2, 0($5) + sw $2, 0($1) + #NO_APP + addiu $2, $5, 4 + ori $3, $1, 4 + #APP + lw $6, 0($2) + sw $6, 0($3) + #NO_APP + addiu $7, $1, 24 + addiu $2, $5, 8 + ori $3, $1, 8 + #APP + lw $6, 0($2) + sw $6, 0($3) + #NO_APP + addiu $2, $5, 12 + ori $3, $1, 12 + #APP + lw $6, 0($2) + sw $6, 0($3) + #NO_APP + addiu $2, $5, 16 + addiu $3, $1, 16 + #APP + lw $6, 0($2) + sw $6, 0($3) + #NO_APP + addiu $3, $1, 28 + addiu $2, $5, 28 + addiu $6, $5, 24 + addiu $5, $5, 20 + addiu $1, $1, 20 + #APP + lw $8, 0($5) + sw $8, 0($1) + #NO_APP + #APP + lw $1, 0($6) + sw $1, 0($7) + #NO_APP + #APP + lw $1, 0($2) + sw $1, 0($3) + #NO_APP + lw $1, 16($sp) + lw $2, 12($sp) + lw $3, 8($sp) + lw $5, 4($sp) + lw $6, 0($sp) + sw $1, 16($4) + lw $1, 20($sp) + sw $2, 12($4) + sw $6, 0($4) + sw $5, 4($4) sw $3, 8($4) - sw $2, 4($4) move $2, $4 - sw $1, 0($4) + sw $1, 20($4) + lw $1, 24($sp) + sw $1, 24($4) + lw $1, 28($sp) + sw $1, 28($4) sync + move $sp, $fp + lw $fp, 40($sp) + lw $ra, 44($sp) jr $ra - nop + addiu $sp, $sp, 48 $func_end16: asm_test::atomic_memcpy_load_align16::read_volatile_acquire_fence: lw $1, 4($5) diff --git a/tests/asm-test/asm/mips-unknown-linux-gnu/atomic_memcpy_load_align2 b/tests/asm-test/asm/mips-unknown-linux-gnu/atomic_memcpy_load_align2 index b293f27..3a21700 100644 --- a/tests/asm-test/asm/mips-unknown-linux-gnu/atomic_memcpy_load_align2 +++ b/tests/asm-test/asm/mips-unknown-linux-gnu/atomic_memcpy_load_align2 @@ -1,123 +1,140 @@ asm_test::atomic_memcpy_load_align2::acquire: - addiu $sp, $sp, -32 - addiu $1, $5, 3 - addiu $2, $zero, -4 - and $3, $1, $2 - subu $2, $3, $5 - sltiu $1, $2, 33 - beqz $1, $BB4_6 - nop - beqz $2, $BB4_7 - nop - addiu $6, $sp, 0 - subu $8, $5, $3 - addiu $7, $5, 32 - move $9, $5 -$BB4_3: - lb $1, 0($9) - addiu $9, $9, 1 - sb $1, 0($6) - addiu $1, $8, 1 - addiu $6, $6, 1 - sltu $10, $1, $8 - beqz $10, $BB4_3 - move $8, $1 - subu $3, $7, $3 - sltiu $1, $3, 4 - bnez $1, $BB4_10 - nop - b $BB4_8 - nop -$BB4_6: - lh $1, 0($5) - sh $1, 0($sp) - lh $1, 2($5) - sh $1, 2($sp) - lh $1, 4($5) - sh $1, 4($sp) - lh $1, 6($5) - sh $1, 6($sp) - lh $1, 8($5) - sh $1, 8($sp) - lh $1, 10($5) - sh $1, 10($sp) - lh $1, 12($5) - sh $1, 12($sp) - lh $1, 14($5) - sh $1, 14($sp) - lh $1, 16($5) - sh $1, 16($sp) - lh $1, 18($5) - sh $1, 18($sp) - lh $1, 20($5) - sh $1, 20($sp) - lh $1, 22($5) - sh $1, 22($sp) - lh $1, 24($5) - sh $1, 24($sp) - lh $1, 26($5) - sh $1, 26($sp) - lh $1, 28($5) - sh $1, 28($sp) - lh $1, 30($5) - b $BB4_13 - sh $1, 30($sp) -$BB4_7: - addiu $3, $zero, 32 -$BB4_8: - addiu $6, $sp, 0 -$BB4_9: - addu $7, $5, $2 - addu $1, $6, $2 - addiu $3, $3, -4 - lw $7, 0($7) - swl $7, 0($1) - swr $7, 3($1) - sltiu $1, $3, 4 - beqz $1, $BB4_9 - addiu $2, $2, 4 -$BB4_10: - beqz $3, $BB4_13 - nop + addiu $sp, $sp, -56 + sw $21, 52($sp) + sw $20, 48($sp) + sw $19, 44($sp) + sw $18, 40($sp) + sw $17, 36($sp) + sw $16, 32($sp) addiu $1, $sp, 0 - addu $5, $5, $2 - addu $2, $1, $2 -$BB4_12: - lb $1, 0($5) - addiu $5, $5, 1 - addiu $3, $3, -1 - sb $1, 0($2) - bnez $3, $BB4_12 - addiu $2, $2, 1 -$BB4_13: - lw $1, 28($sp) - lw $2, 12($sp) - lw $3, 16($sp) - lw $5, 24($sp) - lw $6, 8($sp) - lw $7, 20($sp) - lw $8, 0($sp) - lw $9, 4($sp) - swl $1, 28($4) - swl $5, 24($4) - swl $7, 20($4) - swl $3, 16($4) - swl $2, 12($4) - swl $6, 8($4) - swl $9, 4($4) - swl $8, 0($4) - swr $2, 15($4) - swr $1, 31($4) - swr $5, 27($4) - swr $7, 23($4) - swr $3, 19($4) - swr $6, 11($4) - swr $9, 7($4) - swr $8, 3($4) + #APP + lh $2, 0($5) + sh $2, 0($1) + #NO_APP + addiu $8, $5, 26 + addiu $10, $5, 24 + addiu $12, $5, 22 + addiu $14, $5, 20 + addiu $24, $5, 18 + addiu $gp, $5, 16 + addiu $17, $5, 14 + addiu $19, $5, 12 + addiu $2, $5, 2 + ori $3, $1, 2 + #APP + lh $6, 0($2) + sh $6, 0($3) + #NO_APP + addiu $7, $1, 28 + addiu $9, $1, 26 + addiu $11, $1, 24 + addiu $13, $1, 22 + addiu $15, $1, 20 + addiu $25, $1, 18 + addiu $16, $1, 16 + addiu $18, $1, 14 + addiu $20, $1, 12 + addiu $2, $5, 4 + ori $3, $1, 4 + #APP + lh $6, 0($2) + sh $6, 0($3) + #NO_APP + addiu $2, $5, 6 + ori $3, $1, 6 + #APP + lh $6, 0($2) + sh $6, 0($3) + #NO_APP + addiu $2, $5, 8 + addiu $3, $1, 8 + #APP + lh $6, 0($2) + sh $6, 0($3) + #NO_APP + addiu $3, $1, 30 + addiu $2, $5, 30 + addiu $6, $5, 28 + addiu $5, $5, 10 + addiu $1, $1, 10 + #APP + lh $21, 0($5) + sh $21, 0($1) + #NO_APP + #APP + lh $1, 0($19) + sh $1, 0($20) + #NO_APP + #APP + lh $1, 0($17) + sh $1, 0($18) + #NO_APP + #APP + lh $1, 0($gp) + sh $1, 0($16) + #NO_APP + #APP + lh $1, 0($24) + sh $1, 0($25) + #NO_APP + #APP + lh $1, 0($14) + sh $1, 0($15) + #NO_APP + #APP + lh $1, 0($12) + sh $1, 0($13) + #NO_APP + #APP + lh $1, 0($10) + sh $1, 0($11) + #NO_APP + #APP + lh $1, 0($8) + sh $1, 0($9) + #NO_APP + #APP + lh $1, 0($6) + sh $1, 0($7) + #NO_APP + #APP + lh $1, 0($2) + sh $1, 0($3) + #NO_APP + lw $1, 0($sp) + lw $2, 16($sp) + lw $3, 12($sp) + lw $5, 4($sp) + lw $6, 20($sp) + lw $7, 8($sp) + lw $8, 28($sp) + lw $9, 24($sp) + swl $1, 0($4) + swl $5, 4($4) + swl $7, 8($4) + swl $3, 12($4) + swl $2, 16($4) + swl $6, 20($4) + swl $9, 24($4) + swl $8, 28($4) + swr $2, 19($4) + swr $1, 3($4) + swr $5, 7($4) + swr $7, 11($4) + swr $3, 15($4) + swr $6, 23($4) + swr $9, 27($4) + swr $8, 31($4) sync + lw $16, 32($sp) + lw $17, 36($sp) + lw $18, 40($sp) + lw $19, 44($sp) + lw $20, 48($sp) + lw $21, 52($sp) move $2, $4 jr $ra - addiu $sp, $sp, 32 + addiu $sp, $sp, 56 $func_end4: asm_test::atomic_memcpy_load_align2::read_volatile_acquire_fence: lhu $1, 0($5) diff --git a/tests/asm-test/asm/mips-unknown-linux-gnu/atomic_memcpy_load_align4 b/tests/asm-test/asm/mips-unknown-linux-gnu/atomic_memcpy_load_align4 index f8a7a13..c3ad056 100644 --- a/tests/asm-test/asm/mips-unknown-linux-gnu/atomic_memcpy_load_align4 +++ b/tests/asm-test/asm/mips-unknown-linux-gnu/atomic_memcpy_load_align4 @@ -1,24 +1,72 @@ asm_test::atomic_memcpy_load_align4::acquire: - lw $1, 0($5) - lw $2, 4($5) - lw $3, 8($5) - lw $6, 12($5) - lw $7, 16($5) - lw $8, 20($5) - lw $9, 24($5) - lw $5, 28($5) - sw $5, 28($4) - sw $9, 24($4) - sw $8, 20($4) - sw $7, 16($4) - sw $6, 12($4) + addiu $sp, $sp, -32 + addiu $1, $sp, 0 + #APP + lw $2, 0($5) + sw $2, 0($1) + #NO_APP + addiu $2, $5, 4 + ori $3, $1, 4 + #APP + lw $6, 0($2) + sw $6, 0($3) + #NO_APP + addiu $7, $1, 24 + addiu $2, $5, 8 + addiu $3, $1, 8 + #APP + lw $6, 0($2) + sw $6, 0($3) + #NO_APP + addiu $2, $5, 12 + addiu $3, $1, 12 + #APP + lw $6, 0($2) + sw $6, 0($3) + #NO_APP + addiu $2, $5, 16 + addiu $3, $1, 16 + #APP + lw $6, 0($2) + sw $6, 0($3) + #NO_APP + addiu $3, $1, 28 + addiu $2, $5, 28 + addiu $6, $5, 24 + addiu $5, $5, 20 + addiu $1, $1, 20 + #APP + lw $8, 0($5) + sw $8, 0($1) + #NO_APP + #APP + lw $1, 0($6) + sw $1, 0($7) + #NO_APP + #APP + lw $1, 0($2) + sw $1, 0($3) + #NO_APP + lw $1, 16($sp) + lw $2, 12($sp) + lw $3, 8($sp) + lw $5, 4($sp) + lw $6, 0($sp) + sw $1, 16($4) + lw $1, 20($sp) + sw $2, 12($4) + sw $6, 0($4) + sw $5, 4($4) sw $3, 8($4) - sw $2, 4($4) move $2, $4 - sw $1, 0($4) + sw $1, 20($4) + lw $1, 24($sp) + sw $1, 24($4) + lw $1, 28($sp) + sw $1, 28($4) sync jr $ra - nop + addiu $sp, $sp, 32 $func_end8: asm_test::atomic_memcpy_load_align4::read_volatile_acquire_fence: lw $1, 0($5) diff --git a/tests/asm-test/asm/mips-unknown-linux-gnu/atomic_memcpy_load_align8 b/tests/asm-test/asm/mips-unknown-linux-gnu/atomic_memcpy_load_align8 index 170c954..50386c4 100644 --- a/tests/asm-test/asm/mips-unknown-linux-gnu/atomic_memcpy_load_align8 +++ b/tests/asm-test/asm/mips-unknown-linux-gnu/atomic_memcpy_load_align8 @@ -1,24 +1,72 @@ asm_test::atomic_memcpy_load_align8::acquire: - lw $1, 0($5) - lw $2, 4($5) - lw $3, 8($5) - lw $6, 12($5) - lw $7, 16($5) - lw $8, 20($5) - lw $9, 24($5) - lw $5, 28($5) - sw $5, 28($4) - sw $9, 24($4) - sw $8, 20($4) - sw $7, 16($4) - sw $6, 12($4) + addiu $sp, $sp, -32 + addiu $1, $sp, 0 + #APP + lw $2, 0($5) + sw $2, 0($1) + #NO_APP + addiu $2, $5, 4 + ori $3, $1, 4 + #APP + lw $6, 0($2) + sw $6, 0($3) + #NO_APP + addiu $7, $1, 24 + addiu $2, $5, 8 + addiu $3, $1, 8 + #APP + lw $6, 0($2) + sw $6, 0($3) + #NO_APP + addiu $2, $5, 12 + addiu $3, $1, 12 + #APP + lw $6, 0($2) + sw $6, 0($3) + #NO_APP + addiu $2, $5, 16 + addiu $3, $1, 16 + #APP + lw $6, 0($2) + sw $6, 0($3) + #NO_APP + addiu $3, $1, 28 + addiu $2, $5, 28 + addiu $6, $5, 24 + addiu $5, $5, 20 + addiu $1, $1, 20 + #APP + lw $8, 0($5) + sw $8, 0($1) + #NO_APP + #APP + lw $1, 0($6) + sw $1, 0($7) + #NO_APP + #APP + lw $1, 0($2) + sw $1, 0($3) + #NO_APP + lw $1, 16($sp) + lw $2, 12($sp) + lw $3, 8($sp) + lw $5, 4($sp) + lw $6, 0($sp) + sw $1, 16($4) + lw $1, 20($sp) + sw $2, 12($4) + sw $6, 0($4) + sw $5, 4($4) sw $3, 8($4) - sw $2, 4($4) move $2, $4 - sw $1, 0($4) + sw $1, 20($4) + lw $1, 24($sp) + sw $1, 24($4) + lw $1, 28($sp) + sw $1, 28($4) sync jr $ra - nop + addiu $sp, $sp, 32 $func_end12: asm_test::atomic_memcpy_load_align8::read_volatile_acquire_fence: lw $1, 4($5) diff --git a/tests/asm-test/asm/mips-unknown-linux-gnu/atomic_memcpy_store_align1 b/tests/asm-test/asm/mips-unknown-linux-gnu/atomic_memcpy_store_align1 index f9f54c2..3854ec3 100644 --- a/tests/asm-test/asm/mips-unknown-linux-gnu/atomic_memcpy_store_align1 +++ b/tests/asm-test/asm/mips-unknown-linux-gnu/atomic_memcpy_store_align1 @@ -1,169 +1,263 @@ asm_test::atomic_memcpy_store_align1::release: - addiu $sp, $sp, -64 + addiu $sp, $sp, -112 + sw $ra, 108($sp) + sw $fp, 104($sp) + sw $23, 100($sp) + sw $22, 96($sp) + sw $21, 92($sp) + sw $20, 88($sp) + sw $19, 84($sp) + sw $18, 80($sp) + sw $17, 76($sp) + sw $16, 72($sp) lwl $1, 20($5) lwl $2, 16($5) lwl $3, 12($5) lwl $6, 8($5) lwl $7, 24($5) lwl $8, 28($5) + addiu $25, $4, 1 + addiu $24, $4, 2 + addiu $11, $4, 3 + addiu $13, $4, 4 + addiu $18, $4, 5 + addiu $17, $4, 6 + addiu $16, $4, 7 + addiu $gp, $4, 8 + addiu $23, $4, 9 + addiu $22, $4, 10 + addiu $21, $4, 11 + addiu $20, $4, 12 + addiu $19, $4, 13 + addiu $ra, $4, 15 + addiu $fp, $4, 16 + addiu $9, $4, 25 lwr $1, 23($5) lwr $6, 11($5) lwr $3, 15($5) lwr $2, 19($5) lwr $7, 27($5) lwr $8, 31($5) - sw $1, 20($sp) + sw $1, 28($sp) lwl $1, 4($5) - sw $8, 28($sp) - sw $7, 24($sp) - sw $2, 16($sp) - sw $3, 12($sp) - sw $6, 8($sp) + sw $8, 36($sp) + sw $7, 32($sp) + sw $2, 24($sp) + sw $3, 20($sp) + sw $6, 16($sp) + addiu $8, $4, 26 lwr $1, 7($5) - sw $1, 4($sp) + sw $1, 12($sp) lwl $1, 0($5) lwr $1, 3($5) - sw $1, 0($sp) + sw $1, 8($sp) sync lw $1, 16($sp) lw $2, 20($sp) lw $3, 24($sp) - lw $5, 0($sp) - lw $6, 4($sp) - lw $7, 8($sp) - lw $8, 12($sp) - lw $9, 28($sp) - sw $2, 52($sp) + lw $5, 28($sp) + lw $6, 32($sp) + lw $7, 36($sp) sw $1, 48($sp) - addiu $1, $4, 3 - addiu $2, $zero, -4 + lw $1, 12($sp) + sw $7, 68($sp) + sw $6, 64($sp) + sw $5, 60($sp) sw $3, 56($sp) - sw $9, 60($sp) - sw $8, 44($sp) - sw $7, 40($sp) - sw $6, 36($sp) - and $3, $1, $2 - subu $2, $3, $4 - sltiu $1, $2, 33 - beqz $1, $BB2_6 - sw $5, 32($sp) - beqz $2, $BB2_7 - nop - addiu $5, $sp, 32 - subu $7, $4, $3 - addiu $6, $4, 32 - move $8, $4 -$BB2_3: - lbu $1, 0($5) - addiu $5, $5, 1 + sw $2, 52($sp) + addiu $3, $4, 31 + addiu $5, $4, 29 + addiu $6, $4, 28 + addiu $7, $4, 27 + sw $1, 44($sp) + lw $1, 8($sp) + sw $1, 40($sp) + addiu $1, $sp, 40 + #APP + lbu $2, 0($1) + sb $2, 0($4) + #NO_APP + sw $3, 4($sp) + addiu $3, $4, 30 + ori $10, $1, 1 + ori $12, $1, 2 + ori $15, $1, 3 + ori $14, $1, 4 + sw $3, 0($sp) + addiu $2, $4, 14 + #APP + lbu $3, 0($10) + sb $3, 0($25) + #NO_APP + addiu $10, $4, 24 + #APP + lbu $25, 0($12) + sb $25, 0($24) + #NO_APP + addiu $3, $4, 23 + addiu $12, $4, 22 + addiu $24, $4, 21 + #APP + lbu $25, 0($15) + sb $25, 0($11) + #NO_APP + addiu $11, $4, 20 + addiu $15, $4, 19 + #APP + lbu $25, 0($14) + sb $25, 0($13) + #NO_APP + ori $14, $1, 5 + addiu $13, $4, 18 + addiu $4, $4, 17 + #APP + lbu $25, 0($14) + sb $25, 0($18) + #NO_APP + addiu $14, $1, 8 + ori $25, $1, 6 + #APP + lbu $18, 0($25) + sb $18, 0($17) + #NO_APP + addiu $17, $1, 14 + ori $25, $1, 7 + #APP + lbu $18, 0($25) + sb $18, 0($16) + #NO_APP + addiu $16, $1, 12 + #APP + lbu $18, 0($14) + sb $18, 0($gp) + #NO_APP + addiu $gp, $1, 9 + addiu $14, $1, 11 + addiu $25, $1, 13 + #APP + lbu $18, 0($gp) + sb $18, 0($23) + #NO_APP + addiu $gp, $1, 10 + #APP + lbu $23, 0($gp) + sb $23, 0($22) + #NO_APP + addiu $gp, $1, 15 + addiu $18, $1, 16 + addiu $22, $1, 31 + #APP + lbu $23, 0($14) + sb $23, 0($21) + #NO_APP + addiu $14, $1, 30 + addiu $21, $1, 29 + #APP + lbu $23, 0($16) + sb $23, 0($20) + #NO_APP + addiu $16, $1, 28 + addiu $20, $1, 27 + #APP + lbu $23, 0($25) + sb $23, 0($19) + #NO_APP + addiu $25, $1, 26 + addiu $19, $1, 25 + #APP + lbu $23, 0($17) + sb $23, 0($2) + #NO_APP + addiu $2, $1, 24 + addiu $17, $1, 23 + #APP + lbu $23, 0($gp) + sb $23, 0($ra) + #NO_APP + addiu $gp, $1, 22 + #APP + lbu $ra, 0($18) + sb $ra, 0($fp) + #NO_APP + addiu $fp, $1, 17 + addiu $23, $1, 21 + addiu $18, $1, 20 + #APP + lbu $ra, 0($fp) + sb $ra, 0($4) + #NO_APP + addiu $4, $1, 19 + addiu $1, $1, 18 + #APP + lbu $fp, 0($1) + sb $fp, 0($13) + #NO_APP + #APP + lbu $1, 0($4) + sb $1, 0($15) + #NO_APP + #APP + lbu $1, 0($18) + sb $1, 0($11) + #NO_APP + #APP + lbu $1, 0($23) + sb $1, 0($24) + #NO_APP + #APP + lbu $1, 0($gp) + sb $1, 0($12) + #NO_APP + #APP + lbu $1, 0($17) + sb $1, 0($3) + #NO_APP + #APP + lbu $1, 0($2) + sb $1, 0($10) + #NO_APP + #APP + lbu $1, 0($19) + sb $1, 0($9) + #NO_APP + #APP + lbu $1, 0($25) sb $1, 0($8) - addiu $1, $7, 1 - addiu $8, $8, 1 - sltu $9, $1, $7 - beqz $9, $BB2_3 - move $7, $1 - subu $3, $6, $3 - sltiu $1, $3, 4 - bnez $1, $BB2_10 - nop - b $BB2_8 - nop -$BB2_6: - lbu $1, 32($sp) - sb $1, 0($4) - lbu $1, 33($sp) - sb $1, 1($4) - lbu $1, 34($sp) - sb $1, 2($4) - lbu $1, 35($sp) - sb $1, 3($4) - lbu $1, 36($sp) - sb $1, 4($4) - lbu $1, 37($sp) - sb $1, 5($4) - lbu $1, 38($sp) - sb $1, 6($4) - lbu $1, 39($sp) - sb $1, 7($4) - lbu $1, 40($sp) - sb $1, 8($4) - lbu $1, 41($sp) - sb $1, 9($4) - lbu $1, 42($sp) - sb $1, 10($4) - lbu $1, 43($sp) - sb $1, 11($4) - lbu $1, 44($sp) - sb $1, 12($4) - lbu $1, 45($sp) - sb $1, 13($4) - lbu $1, 46($sp) - sb $1, 14($4) - lbu $1, 47($sp) - sb $1, 15($4) - lbu $1, 48($sp) - sb $1, 16($4) - lbu $1, 49($sp) - sb $1, 17($4) - lbu $1, 50($sp) - sb $1, 18($4) - lbu $1, 51($sp) - sb $1, 19($4) - lbu $1, 52($sp) - sb $1, 20($4) - lbu $1, 53($sp) - sb $1, 21($4) - lbu $1, 54($sp) - sb $1, 22($4) - lbu $1, 55($sp) - sb $1, 23($4) - lbu $1, 56($sp) - sb $1, 24($4) - lbu $1, 57($sp) - sb $1, 25($4) - lbu $1, 58($sp) - sb $1, 26($4) - lbu $1, 59($sp) - sb $1, 27($4) - lbu $1, 60($sp) - sb $1, 28($4) - lbu $1, 61($sp) - sb $1, 29($4) - lbu $1, 62($sp) - sb $1, 30($4) - lbu $1, 63($sp) - b $BB2_13 - sb $1, 31($4) -$BB2_7: - addiu $3, $zero, 32 -$BB2_8: - addiu $5, $sp, 32 -$BB2_9: - addu $1, $5, $2 - addiu $3, $3, -4 - lwl $6, 0($1) - lwr $6, 3($1) - addu $1, $4, $2 - sw $6, 0($1) - sltiu $1, $3, 4 - beqz $1, $BB2_9 - addiu $2, $2, 4 -$BB2_10: - beqz $3, $BB2_13 - nop - addiu $1, $sp, 32 - addu $5, $1, $2 - addu $2, $4, $2 -$BB2_12: - lbu $1, 0($5) - addiu $5, $5, 1 - addiu $3, $3, -1 + #NO_APP + #APP + lbu $1, 0($20) + sb $1, 0($7) + #NO_APP + #APP + lbu $1, 0($16) + sb $1, 0($6) + #NO_APP + #APP + lbu $1, 0($21) + sb $1, 0($5) + #NO_APP + lw $2, 0($sp) + #APP + lbu $1, 0($14) + sb $1, 0($2) + #NO_APP + lw $2, 4($sp) + #APP + lbu $1, 0($22) sb $1, 0($2) - bnez $3, $BB2_12 - addiu $2, $2, 1 -$BB2_13: + #NO_APP + lw $16, 72($sp) + lw $17, 76($sp) + lw $18, 80($sp) + lw $19, 84($sp) + lw $20, 88($sp) + lw $21, 92($sp) + lw $22, 96($sp) + lw $23, 100($sp) + lw $fp, 104($sp) + lw $ra, 108($sp) jr $ra - addiu $sp, $sp, 64 + addiu $sp, $sp, 112 $func_end2: asm_test::atomic_memcpy_store_align1::write_volatile_release_fence: addiu $sp, $sp, -32 diff --git a/tests/asm-test/asm/mips-unknown-linux-gnu/atomic_memcpy_store_align16 b/tests/asm-test/asm/mips-unknown-linux-gnu/atomic_memcpy_store_align16 index 258e3fb..b7a132e 100644 --- a/tests/asm-test/asm/mips-unknown-linux-gnu/atomic_memcpy_store_align16 +++ b/tests/asm-test/asm/mips-unknown-linux-gnu/atomic_memcpy_store_align16 @@ -1,22 +1,95 @@ asm_test::atomic_memcpy_store_align16::release: - lw $1, 28($5) - lw $2, 24($5) + addiu $sp, $sp, -80 + sw $ra, 76($sp) + sw $fp, 72($sp) + move $fp, $sp + addiu $1, $zero, -16 + and $sp, $sp, $1 + lw $1, 12($5) + lw $2, 16($5) lw $3, 20($5) - lw $6, 16($5) - lw $7, 12($5) - lw $8, 8($5) - lw $9, 4($5) - lw $5, 0($5) + lw $6, 24($5) + lw $7, 28($5) + addiu $9, $4, 16 + addiu $11, $4, 12 + sw $1, 12($sp) + lw $1, 8($5) + sw $7, 28($sp) + sw $6, 24($sp) + sw $3, 20($sp) + sw $2, 16($sp) + sw $1, 8($sp) + lw $1, 4($5) + sw $1, 4($sp) + lw $1, 0($5) + sw $1, 0($sp) sync - sw $5, 0($4) - sw $9, 4($4) - sw $8, 8($4) - sw $7, 12($4) - sw $6, 16($4) - sw $3, 20($4) - sw $2, 24($4) + lw $1, 8($sp) + lw $2, 12($sp) + lw $3, 16($sp) + lw $5, 20($sp) + lw $6, 24($sp) + lw $7, 28($sp) + sw $1, 40($sp) + lw $1, 4($sp) + sw $7, 60($sp) + sw $6, 56($sp) + sw $5, 52($sp) + sw $3, 48($sp) + sw $2, 44($sp) + addiu $7, $4, 20 + sw $1, 36($sp) + lw $1, 0($sp) + sw $1, 32($sp) + addiu $1, $sp, 32 + #APP + lw $2, 0($1) + sw $2, 0($4) + #NO_APP + ori $3, $1, 4 + addiu $6, $1, 24 + addiu $8, $1, 20 + addiu $10, $1, 16 + ori $12, $1, 12 + addiu $2, $4, 4 + #APP + lw $5, 0($3) + sw $5, 0($2) + #NO_APP + addiu $3, $1, 28 + ori $1, $1, 8 + addiu $2, $4, 28 + addiu $5, $4, 24 + addiu $4, $4, 8 + #APP + lw $13, 0($1) + sw $13, 0($4) + #NO_APP + #APP + lw $1, 0($12) + sw $1, 0($11) + #NO_APP + #APP + lw $1, 0($10) + sw $1, 0($9) + #NO_APP + #APP + lw $1, 0($8) + sw $1, 0($7) + #NO_APP + #APP + lw $1, 0($6) + sw $1, 0($5) + #NO_APP + #APP + lw $1, 0($3) + sw $1, 0($2) + #NO_APP + move $sp, $fp + lw $fp, 72($sp) + lw $ra, 76($sp) jr $ra - sw $1, 28($4) + addiu $sp, $sp, 80 $func_end18: asm_test::atomic_memcpy_store_align16::write_volatile_release_fence: addiu $sp, $sp, -48 diff --git a/tests/asm-test/asm/mips-unknown-linux-gnu/atomic_memcpy_store_align2 b/tests/asm-test/asm/mips-unknown-linux-gnu/atomic_memcpy_store_align2 index 5830ac1..688092f 100644 --- a/tests/asm-test/asm/mips-unknown-linux-gnu/atomic_memcpy_store_align2 +++ b/tests/asm-test/asm/mips-unknown-linux-gnu/atomic_memcpy_store_align2 @@ -1,137 +1,167 @@ asm_test::atomic_memcpy_store_align2::release: - addiu $sp, $sp, -64 + addiu $sp, $sp, -112 + sw $ra, 108($sp) + sw $fp, 104($sp) + sw $23, 100($sp) + sw $22, 96($sp) + sw $21, 92($sp) + sw $20, 88($sp) + sw $19, 84($sp) + sw $18, 80($sp) + sw $17, 76($sp) + sw $16, 72($sp) lwl $1, 20($5) lwl $2, 16($5) lwl $3, 12($5) lwl $6, 8($5) lwl $7, 24($5) lwl $8, 28($5) + addiu $9, $4, 24 + addiu $11, $4, 22 + addiu $13, $4, 20 + addiu $15, $4, 18 + addiu $25, $4, 16 + addiu $16, $4, 14 + addiu $18, $4, 12 + addiu $20, $4, 10 + addiu $22, $4, 8 + addiu $23, $4, 6 + addiu $fp, $4, 4 lwr $1, 23($5) lwr $6, 11($5) lwr $3, 15($5) lwr $2, 19($5) lwr $7, 27($5) lwr $8, 31($5) - sw $1, 20($sp) + sw $1, 28($sp) lwl $1, 4($5) - sw $8, 28($sp) - sw $7, 24($sp) - sw $2, 16($sp) - sw $3, 12($sp) - sw $6, 8($sp) + sw $8, 36($sp) + sw $7, 32($sp) + sw $2, 24($sp) + sw $3, 20($sp) + sw $6, 16($sp) lwr $1, 7($5) - sw $1, 4($sp) + sw $1, 12($sp) lwl $1, 0($5) lwr $1, 3($5) - sw $1, 0($sp) + sw $1, 8($sp) sync lw $1, 16($sp) lw $2, 20($sp) lw $3, 24($sp) - lw $5, 0($sp) - lw $6, 4($sp) - lw $7, 8($sp) - lw $8, 12($sp) - lw $9, 28($sp) - sw $2, 52($sp) + lw $5, 28($sp) + lw $6, 32($sp) + lw $7, 36($sp) sw $1, 48($sp) - addiu $1, $4, 3 - addiu $2, $zero, -4 + lw $1, 12($sp) + sw $7, 68($sp) + sw $5, 60($sp) + sw $6, 64($sp) sw $3, 56($sp) - sw $9, 60($sp) - sw $8, 44($sp) - sw $7, 40($sp) - sw $6, 36($sp) - and $3, $1, $2 - subu $2, $3, $4 - sltiu $1, $2, 33 - beqz $1, $BB6_6 - sw $5, 32($sp) - beqz $2, $BB6_7 - nop - addiu $5, $sp, 32 - subu $7, $4, $3 - addiu $6, $4, 32 - move $8, $4 -$BB6_3: - lbu $1, 0($5) - addiu $5, $5, 1 - sb $1, 0($8) - addiu $1, $7, 1 - addiu $8, $8, 1 - sltu $9, $1, $7 - beqz $9, $BB6_3 - move $7, $1 - subu $3, $6, $3 - sltiu $1, $3, 4 - bnez $1, $BB6_10 - nop - b $BB6_8 - nop -$BB6_6: - lhu $1, 32($sp) - sh $1, 0($4) - lhu $1, 34($sp) - sh $1, 2($4) - lhu $1, 36($sp) - sh $1, 4($4) - lhu $1, 38($sp) - sh $1, 6($4) - lhu $1, 40($sp) - sh $1, 8($4) - lhu $1, 42($sp) - sh $1, 10($4) - lhu $1, 44($sp) - sh $1, 12($4) - lhu $1, 46($sp) - sh $1, 14($4) - lhu $1, 48($sp) - sh $1, 16($4) - lhu $1, 50($sp) - sh $1, 18($4) - lhu $1, 52($sp) - sh $1, 20($4) - lhu $1, 54($sp) - sh $1, 22($4) - lhu $1, 56($sp) - sh $1, 24($4) - lhu $1, 58($sp) - sh $1, 26($4) - lhu $1, 60($sp) - sh $1, 28($4) - lhu $1, 62($sp) - b $BB6_13 - sh $1, 30($4) -$BB6_7: - addiu $3, $zero, 32 -$BB6_8: - addiu $5, $sp, 32 -$BB6_9: - addu $1, $5, $2 - addiu $3, $3, -4 - lwl $6, 0($1) - lwr $6, 3($1) - addu $1, $4, $2 - sw $6, 0($1) - sltiu $1, $3, 4 - beqz $1, $BB6_9 - addiu $2, $2, 4 -$BB6_10: - beqz $3, $BB6_13 - nop - addiu $1, $sp, 32 - addu $5, $1, $2 - addu $2, $4, $2 -$BB6_12: - lbu $1, 0($5) - addiu $5, $5, 1 - addiu $3, $3, -1 - sb $1, 0($2) - bnez $3, $BB6_12 - addiu $2, $2, 1 -$BB6_13: + sw $2, 52($sp) + addiu $5, $4, 28 + addiu $7, $4, 26 + sw $1, 44($sp) + lw $1, 8($sp) + sw $1, 40($sp) + addiu $1, $sp, 40 + #APP + lhu $2, 0($1) + sh $2, 0($4) + #NO_APP + addiu $6, $1, 28 + addiu $8, $1, 26 + addiu $10, $1, 24 + addiu $12, $1, 22 + addiu $14, $1, 20 + addiu $24, $1, 18 + addiu $gp, $1, 16 + addiu $17, $1, 14 + addiu $19, $1, 12 + addiu $21, $1, 10 + addiu $ra, $1, 8 + addiu $2, $4, 30 + addiu $4, $4, 2 + sw $2, 4($sp) + addiu $2, $1, 30 + sw $2, 0($sp) + ori $2, $1, 2 + #APP + lhu $3, 0($2) + sh $3, 0($4) + #NO_APP + ori $2, $1, 6 + ori $1, $1, 4 + #APP + lhu $3, 0($1) + sh $3, 0($fp) + #NO_APP + #APP + lhu $1, 0($2) + sh $1, 0($23) + #NO_APP + #APP + lhu $1, 0($ra) + sh $1, 0($22) + #NO_APP + #APP + lhu $1, 0($21) + sh $1, 0($20) + #NO_APP + #APP + lhu $1, 0($19) + sh $1, 0($18) + #NO_APP + #APP + lhu $1, 0($17) + sh $1, 0($16) + #NO_APP + #APP + lhu $1, 0($gp) + sh $1, 0($25) + #NO_APP + #APP + lhu $1, 0($24) + sh $1, 0($15) + #NO_APP + #APP + lhu $1, 0($14) + sh $1, 0($13) + #NO_APP + #APP + lhu $1, 0($12) + sh $1, 0($11) + #NO_APP + #APP + lhu $1, 0($10) + sh $1, 0($9) + #NO_APP + #APP + lhu $1, 0($8) + sh $1, 0($7) + #NO_APP + #APP + lhu $1, 0($6) + sh $1, 0($5) + #NO_APP + lw $2, 4($sp) + lw $3, 0($sp) + #APP + lhu $1, 0($3) + sh $1, 0($2) + #NO_APP + lw $16, 72($sp) + lw $17, 76($sp) + lw $18, 80($sp) + lw $19, 84($sp) + lw $20, 88($sp) + lw $21, 92($sp) + lw $22, 96($sp) + lw $23, 100($sp) + lw $fp, 104($sp) + lw $ra, 108($sp) jr $ra - addiu $sp, $sp, 64 + addiu $sp, $sp, 112 $func_end6: asm_test::atomic_memcpy_store_align2::write_volatile_release_fence: addiu $sp, $sp, -32 diff --git a/tests/asm-test/asm/mips-unknown-linux-gnu/atomic_memcpy_store_align4 b/tests/asm-test/asm/mips-unknown-linux-gnu/atomic_memcpy_store_align4 index 9a7a4f6..c7777ef 100644 --- a/tests/asm-test/asm/mips-unknown-linux-gnu/atomic_memcpy_store_align4 +++ b/tests/asm-test/asm/mips-unknown-linux-gnu/atomic_memcpy_store_align4 @@ -1,22 +1,87 @@ asm_test::atomic_memcpy_store_align4::release: - lw $1, 28($5) - lw $2, 24($5) + addiu $sp, $sp, -64 + lw $1, 12($5) + lw $2, 16($5) lw $3, 20($5) - lw $6, 16($5) - lw $7, 12($5) - lw $8, 8($5) - lw $9, 4($5) - lw $5, 0($5) + lw $6, 24($5) + lw $7, 28($5) + addiu $9, $4, 16 + addiu $11, $4, 12 + sw $1, 12($sp) + lw $1, 8($5) + sw $7, 28($sp) + sw $6, 24($sp) + sw $3, 20($sp) + sw $2, 16($sp) + sw $1, 8($sp) + lw $1, 4($5) + sw $1, 4($sp) + lw $1, 0($5) + sw $1, 0($sp) sync - sw $5, 0($4) - sw $9, 4($4) - sw $8, 8($4) - sw $7, 12($4) - sw $6, 16($4) - sw $3, 20($4) - sw $2, 24($4) + lw $1, 8($sp) + lw $2, 12($sp) + lw $3, 16($sp) + lw $5, 20($sp) + lw $6, 24($sp) + lw $7, 28($sp) + sw $1, 40($sp) + lw $1, 4($sp) + sw $7, 60($sp) + sw $6, 56($sp) + sw $5, 52($sp) + sw $3, 48($sp) + sw $2, 44($sp) + addiu $7, $4, 20 + sw $1, 36($sp) + lw $1, 0($sp) + sw $1, 32($sp) + addiu $1, $sp, 32 + #APP + lw $2, 0($1) + sw $2, 0($4) + #NO_APP + ori $3, $1, 4 + addiu $6, $1, 24 + addiu $8, $1, 20 + addiu $10, $1, 16 + addiu $12, $1, 12 + addiu $2, $4, 4 + #APP + lw $5, 0($3) + sw $5, 0($2) + #NO_APP + addiu $3, $1, 28 + addiu $1, $1, 8 + addiu $2, $4, 28 + addiu $5, $4, 24 + addiu $4, $4, 8 + #APP + lw $13, 0($1) + sw $13, 0($4) + #NO_APP + #APP + lw $1, 0($12) + sw $1, 0($11) + #NO_APP + #APP + lw $1, 0($10) + sw $1, 0($9) + #NO_APP + #APP + lw $1, 0($8) + sw $1, 0($7) + #NO_APP + #APP + lw $1, 0($6) + sw $1, 0($5) + #NO_APP + #APP + lw $1, 0($3) + sw $1, 0($2) + #NO_APP jr $ra - sw $1, 28($4) + addiu $sp, $sp, 64 $func_end10: asm_test::atomic_memcpy_store_align4::write_volatile_release_fence: addiu $sp, $sp, -32 diff --git a/tests/asm-test/asm/mips-unknown-linux-gnu/atomic_memcpy_store_align8 b/tests/asm-test/asm/mips-unknown-linux-gnu/atomic_memcpy_store_align8 index 92c258d..919c836 100644 --- a/tests/asm-test/asm/mips-unknown-linux-gnu/atomic_memcpy_store_align8 +++ b/tests/asm-test/asm/mips-unknown-linux-gnu/atomic_memcpy_store_align8 @@ -1,22 +1,87 @@ asm_test::atomic_memcpy_store_align8::release: - lw $1, 28($5) - lw $2, 24($5) + addiu $sp, $sp, -64 + lw $1, 12($5) + lw $2, 16($5) lw $3, 20($5) - lw $6, 16($5) - lw $7, 12($5) - lw $8, 8($5) - lw $9, 4($5) - lw $5, 0($5) + lw $6, 24($5) + lw $7, 28($5) + addiu $9, $4, 16 + addiu $11, $4, 12 + sw $1, 12($sp) + lw $1, 8($5) + sw $7, 28($sp) + sw $6, 24($sp) + sw $3, 20($sp) + sw $2, 16($sp) + sw $1, 8($sp) + lw $1, 4($5) + sw $1, 4($sp) + lw $1, 0($5) + sw $1, 0($sp) sync - sw $5, 0($4) - sw $9, 4($4) - sw $8, 8($4) - sw $7, 12($4) - sw $6, 16($4) - sw $3, 20($4) - sw $2, 24($4) + lw $1, 8($sp) + lw $2, 12($sp) + lw $3, 16($sp) + lw $5, 20($sp) + lw $6, 24($sp) + lw $7, 28($sp) + sw $1, 40($sp) + lw $1, 4($sp) + sw $7, 60($sp) + sw $6, 56($sp) + sw $5, 52($sp) + sw $3, 48($sp) + sw $2, 44($sp) + addiu $7, $4, 20 + sw $1, 36($sp) + lw $1, 0($sp) + sw $1, 32($sp) + addiu $1, $sp, 32 + #APP + lw $2, 0($1) + sw $2, 0($4) + #NO_APP + ori $3, $1, 4 + addiu $6, $1, 24 + addiu $8, $1, 20 + addiu $10, $1, 16 + addiu $12, $1, 12 + addiu $2, $4, 4 + #APP + lw $5, 0($3) + sw $5, 0($2) + #NO_APP + addiu $3, $1, 28 + addiu $1, $1, 8 + addiu $2, $4, 28 + addiu $5, $4, 24 + addiu $4, $4, 8 + #APP + lw $13, 0($1) + sw $13, 0($4) + #NO_APP + #APP + lw $1, 0($12) + sw $1, 0($11) + #NO_APP + #APP + lw $1, 0($10) + sw $1, 0($9) + #NO_APP + #APP + lw $1, 0($8) + sw $1, 0($7) + #NO_APP + #APP + lw $1, 0($6) + sw $1, 0($5) + #NO_APP + #APP + lw $1, 0($3) + sw $1, 0($2) + #NO_APP jr $ra - sw $1, 28($4) + addiu $sp, $sp, 64 $func_end14: asm_test::atomic_memcpy_store_align8::write_volatile_release_fence: addiu $sp, $sp, -32 diff --git a/tests/asm-test/asm/mips64-unknown-linux-gnuabi64/atomic_memcpy_load_align1 b/tests/asm-test/asm/mips64-unknown-linux-gnuabi64/atomic_memcpy_load_align1 index edae0b8..c656440 100644 --- a/tests/asm-test/asm/mips64-unknown-linux-gnuabi64/atomic_memcpy_load_align1 +++ b/tests/asm-test/asm/mips64-unknown-linux-gnuabi64/atomic_memcpy_load_align1 @@ -1,98 +1,18 @@ asm_test::atomic_memcpy_load_align1::acquire: daddiu $sp, $sp, -64 - daddiu $1, $5, 7 - daddiu $2, $zero, -8 - and $1, $1, $2 - dsubu $2, $1, $5 - sltiu $1, $2, 65 - beqz $1, .LBB0_6 - nop - beqz $2, .LBB0_9 - nop - daddiu $3, $zero, 0 - daddiu $6, $sp, 0 -.LBB0_3: - daddu $7, $5, $3 - daddu $1, $6, $3 - daddiu $3, $3, 1 - lb $7, 0($7) - bne $2, $3, .LBB0_3 - sb $7, 0($1) - daddiu $1, $zero, 64 - dsubu $3, $1, $3 - sltiu $1, $3, 8 - bnez $1, .LBB0_12 - nop - b .LBB0_10 - nop -.LBB0_6: daddiu $2, $zero, 0 daddiu $3, $sp, 0 daddiu $6, $zero, 64 -.LBB0_7: - daddu $7, $5, $2 - daddu $1, $3, $2 +.LBB0_1: + daddu $1, $5, $2 + daddu $7, $3, $2 + #APP + lb $8, 0($1) + sb $8, 0($7) + #NO_APP daddiu $2, $2, 1 - lb $7, 0($7) - bne $2, $6, .LBB0_7 - sb $7, 0($1) - ld $1, 56($sp) - ld $2, 24($sp) - ld $3, 32($sp) - ld $5, 48($sp) - ld $6, 16($sp) - ld $7, 40($sp) - ld $8, 0($sp) - ld $9, 8($sp) - sdl $1, 56($4) - sdl $5, 48($4) - sdl $7, 40($4) - sdl $3, 32($4) - sdl $2, 24($4) - sdl $6, 16($4) - sdl $9, 8($4) - sdl $8, 0($4) - sdr $1, 63($4) - sdr $5, 55($4) - sdr $7, 47($4) - sdr $3, 39($4) - sdr $2, 31($4) - sdr $6, 23($4) - sdr $9, 15($4) - sdr $8, 7($4) - sync - move $2, $4 - jr $ra - daddiu $sp, $sp, 64 -.LBB0_9: - daddiu $3, $zero, 64 - daddiu $2, $zero, 0 -.LBB0_10: - daddiu $6, $sp, 0 -.LBB0_11: - daddu $7, $5, $2 - daddu $1, $6, $2 - daddiu $3, $3, -8 - ld $7, 0($7) - sdl $7, 0($1) - sdr $7, 7($1) - sltiu $1, $3, 8 - beqz $1, .LBB0_11 - daddiu $2, $2, 8 -.LBB0_12: - beqz $3, .LBB0_15 + bne $2, $6, .LBB0_1 nop - daddiu $1, $sp, 0 - daddu $5, $5, $2 - daddu $2, $1, $2 -.LBB0_14: - lb $1, 0($5) - daddiu $5, $5, 1 - daddiu $3, $3, -1 - sb $1, 0($2) - bnez $3, .LBB0_14 - daddiu $2, $2, 1 -.LBB0_15: ld $1, 56($sp) ld $2, 24($sp) ld $3, 32($sp) @@ -109,11 +29,11 @@ asm_test::atomic_memcpy_load_align1::acquire: sdl $6, 16($4) sdl $9, 8($4) sdl $8, 0($4) + sdr $2, 31($4) sdr $1, 63($4) sdr $5, 55($4) sdr $7, 47($4) sdr $3, 39($4) - sdr $2, 31($4) sdr $6, 23($4) sdr $9, 15($4) sdr $8, 7($4) diff --git a/tests/asm-test/asm/mips64-unknown-linux-gnuabi64/atomic_memcpy_load_align16 b/tests/asm-test/asm/mips64-unknown-linux-gnuabi64/atomic_memcpy_load_align16 index 7eb7afd..761f03e 100644 --- a/tests/asm-test/asm/mips64-unknown-linux-gnuabi64/atomic_memcpy_load_align16 +++ b/tests/asm-test/asm/mips64-unknown-linux-gnuabi64/atomic_memcpy_load_align16 @@ -1,24 +1,72 @@ asm_test::atomic_memcpy_load_align16::acquire: - ld $1, 0($5) - ld $2, 8($5) - ld $3, 16($5) - ld $6, 24($5) - ld $7, 32($5) - ld $8, 40($5) - ld $9, 48($5) - ld $5, 56($5) - sd $5, 56($4) - sd $9, 48($4) - sd $8, 40($4) - sd $7, 32($4) - sd $6, 24($4) + daddiu $sp, $sp, -64 + daddiu $1, $sp, 0 + #APP + ld $2, 0($5) + sd $2, 0($1) + #NO_APP + daddiu $2, $5, 8 + ori $3, $1, 8 + #APP + ld $6, 0($2) + sd $6, 0($3) + #NO_APP + daddiu $7, $1, 48 + daddiu $2, $5, 16 + daddiu $3, $1, 16 + #APP + ld $6, 0($2) + sd $6, 0($3) + #NO_APP + daddiu $2, $5, 24 + daddiu $3, $1, 24 + #APP + ld $6, 0($2) + sd $6, 0($3) + #NO_APP + daddiu $2, $5, 32 + daddiu $3, $1, 32 + #APP + ld $6, 0($2) + sd $6, 0($3) + #NO_APP + daddiu $3, $1, 56 + daddiu $2, $5, 56 + daddiu $6, $5, 48 + daddiu $5, $5, 40 + daddiu $1, $1, 40 + #APP + ld $8, 0($5) + sd $8, 0($1) + #NO_APP + #APP + ld $1, 0($6) + sd $1, 0($7) + #NO_APP + #APP + ld $1, 0($2) + sd $1, 0($3) + #NO_APP + ld $1, 32($sp) + ld $2, 24($sp) + ld $3, 16($sp) + ld $5, 8($sp) + ld $6, 0($sp) + sd $1, 32($4) + ld $1, 40($sp) + sd $2, 24($4) + sd $6, 0($4) + sd $5, 8($4) sd $3, 16($4) - sd $2, 8($4) move $2, $4 - sd $1, 0($4) + sd $1, 40($4) + ld $1, 48($sp) + sd $1, 48($4) + ld $1, 56($sp) + sd $1, 56($4) sync jr $ra - nop + daddiu $sp, $sp, 64 asm_test::atomic_memcpy_load_align16::read_volatile_acquire_fence: ld $1, 0($5) ld $2, 8($5) diff --git a/tests/asm-test/asm/mips64-unknown-linux-gnuabi64/atomic_memcpy_load_align2 b/tests/asm-test/asm/mips64-unknown-linux-gnuabi64/atomic_memcpy_load_align2 index 63ad372..0b51fd6 100644 --- a/tests/asm-test/asm/mips64-unknown-linux-gnuabi64/atomic_memcpy_load_align2 +++ b/tests/asm-test/asm/mips64-unknown-linux-gnuabi64/atomic_memcpy_load_align2 @@ -1,149 +1,250 @@ asm_test::atomic_memcpy_load_align2::acquire: - daddiu $sp, $sp, -64 - daddiu $1, $5, 7 - daddiu $2, $zero, -8 - and $1, $1, $2 - dsubu $2, $1, $5 - sltiu $1, $2, 65 - beqz $1, .LBB4_6 - nop - beqz $2, .LBB4_7 - daddiu $3, $sp, 0 - daddiu $6, $zero, 0 -.LBB4_3: - daddu $7, $5, $6 - daddu $1, $3, $6 - daddiu $6, $6, 1 - lb $7, 0($7) - bne $2, $6, .LBB4_3 - sb $7, 0($1) - daddiu $1, $zero, 64 - dsubu $6, $1, $6 - sltiu $1, $6, 8 - bnez $1, .LBB4_9 - nop - b .LBB4_8 - nop -.LBB4_6: - lh $1, 0($5) - sh $1, 0($sp) - lh $1, 2($5) - sh $1, 2($sp) - lh $1, 4($5) - sh $1, 4($sp) - lh $1, 6($5) - sh $1, 6($sp) - lh $1, 8($5) - sh $1, 8($sp) - lh $1, 10($5) - sh $1, 10($sp) - lh $1, 12($5) - sh $1, 12($sp) - lh $1, 14($5) - sh $1, 14($sp) - lh $1, 16($5) - sh $1, 16($sp) - lh $1, 18($5) - sh $1, 18($sp) - lh $1, 20($5) - sh $1, 20($sp) - lh $1, 22($5) - sh $1, 22($sp) - lh $1, 24($5) - sh $1, 24($sp) - lh $1, 26($5) - sh $1, 26($sp) - lh $1, 28($5) - sh $1, 28($sp) - lh $1, 30($5) - sh $1, 30($sp) - lh $1, 32($5) - sh $1, 32($sp) - lh $1, 34($5) - sh $1, 34($sp) - lh $1, 36($5) - sh $1, 36($sp) - lh $1, 38($5) - sh $1, 38($sp) - lh $1, 40($5) - sh $1, 40($sp) - lh $1, 42($5) - sh $1, 42($sp) - lh $1, 44($5) - sh $1, 44($sp) - lh $1, 46($5) - sh $1, 46($sp) - lh $1, 48($5) - sh $1, 48($sp) - lh $1, 50($5) - sh $1, 50($sp) - lh $1, 52($5) - sh $1, 52($sp) - lh $1, 54($5) - sh $1, 54($sp) - lh $1, 56($5) - sh $1, 56($sp) - lh $1, 58($5) - sh $1, 58($sp) - lh $1, 60($5) - sh $1, 60($sp) - lh $1, 62($5) - b .LBB4_12 - sh $1, 62($sp) -.LBB4_7: - daddiu $6, $zero, 64 - daddiu $2, $zero, 0 -.LBB4_8: - daddu $7, $5, $2 - daddu $1, $3, $2 - daddiu $6, $6, -8 - ld $7, 0($7) - sdl $7, 0($1) - sdr $7, 7($1) - sltiu $1, $6, 8 - beqz $1, .LBB4_8 - daddiu $2, $2, 8 -.LBB4_9: - beqz $6, .LBB4_12 - nop - daddu $5, $5, $2 - daddu $2, $3, $2 -.LBB4_11: - lb $1, 0($5) - daddiu $5, $5, 1 - daddiu $6, $6, -1 - sb $1, 0($2) - bnez $6, .LBB4_11 - daddiu $2, $2, 1 -.LBB4_12: - ld $1, 56($sp) - ld $2, 24($sp) - ld $3, 32($sp) - ld $5, 48($sp) - ld $6, 16($sp) + daddiu $sp, $sp, -176 + sd $ra, 168($sp) + sd $fp, 160($sp) + sd $gp, 152($sp) + sd $23, 144($sp) + sd $22, 136($sp) + sd $21, 128($sp) + sd $20, 120($sp) + sd $19, 112($sp) + sd $18, 104($sp) + sd $17, 96($sp) + sd $16, 88($sp) + daddiu $1, $sp, 24 + #APP + lh $2, 0($5) + sh $2, 0($1) + #NO_APP + daddiu $10, $5, 10 + daddiu $9, $5, 12 + daddiu $13, $5, 14 + daddiu $16, $5, 16 + daddiu $12, $5, 18 + daddiu $25, $5, 20 + daddiu $24, $5, 22 + daddiu $15, $5, 24 + daddiu $14, $5, 26 + daddiu $18, $5, 28 + daddiu $20, $5, 30 + daddiu $17, $5, 32 + daddiu $19, $5, 34 + daddiu $7, $5, 56 + daddiu $8, $5, 54 + daddiu $21, $5, 52 + daddiu $22, $5, 50 + daddiu $23, $5, 48 + daddiu $gp, $5, 46 + daddiu $fp, $5, 44 + daddiu $ra, $5, 42 + daddiu $2, $5, 2 + ori $3, $1, 2 + #APP + lh $6, 0($2) + sh $6, 0($3) + #NO_APP + daddiu $11, $1, 10 + daddiu $2, $5, 4 + ori $3, $1, 4 + #APP + lh $6, 0($2) + sh $6, 0($3) + #NO_APP + daddiu $2, $5, 6 + ori $3, $1, 6 + #APP + lh $6, 0($2) + sh $6, 0($3) + #NO_APP + daddiu $2, $5, 8 + daddiu $3, $1, 8 + #APP + lh $6, 0($2) + sh $6, 0($3) + #NO_APP + daddiu $2, $5, 62 + daddiu $6, $5, 58 + sd $2, 16($sp) + daddiu $2, $5, 60 + sd $2, 8($sp) + daddiu $2, $5, 40 + #APP + lh $3, 0($10) + sh $3, 0($11) + #NO_APP + daddiu $10, $1, 12 + #APP + lh $11, 0($9) + sh $11, 0($10) + #NO_APP + daddiu $10, $1, 14 + daddiu $9, $1, 18 + daddiu $3, $5, 38 + daddiu $5, $5, 36 + #APP + lh $11, 0($13) + sh $11, 0($10) + #NO_APP + daddiu $10, $1, 16 + #APP + lh $13, 0($16) + sh $13, 0($10) + #NO_APP + daddiu $10, $1, 28 + daddiu $11, $1, 30 + #APP + lh $16, 0($12) + sh $16, 0($9) + #NO_APP + daddiu $12, $1, 20 + daddiu $9, $1, 24 + daddiu $13, $1, 26 + #APP + lh $16, 0($25) + sh $16, 0($12) + #NO_APP + daddiu $12, $1, 22 + daddiu $25, $1, 34 + #APP + lh $16, 0($24) + sh $16, 0($12) + #NO_APP + daddiu $12, $1, 32 + daddiu $24, $1, 62 + #APP + lh $16, 0($15) + sh $16, 0($9) + #NO_APP + daddiu $9, $1, 60 + daddiu $15, $1, 58 + #APP + lh $16, 0($14) + sh $16, 0($13) + #NO_APP + daddiu $13, $1, 56 + daddiu $14, $1, 54 + #APP + lh $16, 0($18) + sh $16, 0($10) + #NO_APP + daddiu $10, $1, 52 + #APP + lh $18, 0($20) + sh $18, 0($11) + #NO_APP + daddiu $16, $1, 50 + daddiu $11, $1, 48 + #APP + lh $20, 0($17) + sh $20, 0($12) + #NO_APP + daddiu $18, $1, 46 + daddiu $12, $1, 44 + daddiu $17, $1, 42 + #APP + lh $20, 0($19) + sh $20, 0($25) + #NO_APP + daddiu $25, $1, 40 + daddiu $19, $1, 38 + daddiu $1, $1, 36 + #APP + lh $20, 0($5) + sh $20, 0($1) + #NO_APP + #APP + lh $1, 0($3) + sh $1, 0($19) + #NO_APP + #APP + lh $1, 0($2) + sh $1, 0($25) + #NO_APP + #APP + lh $1, 0($ra) + sh $1, 0($17) + #NO_APP + #APP + lh $1, 0($fp) + sh $1, 0($12) + #NO_APP + #APP + lh $1, 0($gp) + sh $1, 0($18) + #NO_APP + #APP + lh $1, 0($23) + sh $1, 0($11) + #NO_APP + #APP + lh $1, 0($22) + sh $1, 0($16) + #NO_APP + #APP + lh $1, 0($21) + sh $1, 0($10) + #NO_APP + #APP + lh $1, 0($8) + sh $1, 0($14) + #NO_APP + #APP + lh $1, 0($7) + sh $1, 0($13) + #NO_APP + #APP + lh $1, 0($6) + sh $1, 0($15) + #NO_APP + ld $2, 8($sp) + #APP + lh $1, 0($2) + sh $1, 0($9) + #NO_APP + ld $2, 16($sp) + #APP + lh $1, 0($2) + sh $1, 0($24) + #NO_APP + ld $1, 24($sp) + ld $2, 56($sp) + ld $3, 48($sp) + ld $5, 32($sp) + ld $6, 64($sp) ld $7, 40($sp) - ld $8, 0($sp) - ld $9, 8($sp) - sdl $1, 56($4) - sdl $5, 48($4) - sdl $7, 40($4) - sdl $3, 32($4) - sdl $2, 24($4) - sdl $6, 16($4) - sdl $9, 8($4) - sdl $8, 0($4) - sdr $2, 31($4) - sdr $1, 63($4) - sdr $5, 55($4) - sdr $7, 47($4) - sdr $3, 39($4) - sdr $6, 23($4) - sdr $9, 15($4) - sdr $8, 7($4) + ld $8, 80($sp) + ld $9, 72($sp) + sdl $1, 0($4) + sdl $5, 8($4) + sdl $7, 16($4) + sdl $3, 24($4) + sdl $2, 32($4) + sdl $6, 40($4) + sdl $9, 48($4) + sdl $8, 56($4) + sdr $2, 39($4) + sdr $1, 7($4) + sdr $5, 15($4) + sdr $7, 23($4) + sdr $3, 31($4) + sdr $6, 47($4) + sdr $9, 55($4) + sdr $8, 63($4) sync + ld $16, 88($sp) + ld $17, 96($sp) + ld $18, 104($sp) + ld $19, 112($sp) + ld $20, 120($sp) + ld $21, 128($sp) + ld $22, 136($sp) + ld $23, 144($sp) + ld $gp, 152($sp) + ld $fp, 160($sp) + ld $ra, 168($sp) move $2, $4 jr $ra - daddiu $sp, $sp, 64 + daddiu $sp, $sp, 176 asm_test::atomic_memcpy_load_align2::read_volatile_acquire_fence: daddiu $sp, $sp, -112 sd $ra, 104($sp) diff --git a/tests/asm-test/asm/mips64-unknown-linux-gnuabi64/atomic_memcpy_load_align4 b/tests/asm-test/asm/mips64-unknown-linux-gnuabi64/atomic_memcpy_load_align4 index 7e498d1..826e030 100644 --- a/tests/asm-test/asm/mips64-unknown-linux-gnuabi64/atomic_memcpy_load_align4 +++ b/tests/asm-test/asm/mips64-unknown-linux-gnuabi64/atomic_memcpy_load_align4 @@ -1,117 +1,142 @@ asm_test::atomic_memcpy_load_align4::acquire: - daddiu $sp, $sp, -64 - daddiu $1, $5, 7 - daddiu $2, $zero, -8 - and $1, $1, $2 - dsubu $2, $1, $5 - sltiu $1, $2, 65 - beqz $1, .LBB8_6 - nop - beqz $2, .LBB8_7 - daddiu $3, $sp, 0 - daddiu $6, $zero, 0 -.LBB8_3: - daddu $7, $5, $6 - daddu $1, $3, $6 - daddiu $6, $6, 1 - lb $7, 0($7) - bne $2, $6, .LBB8_3 - sb $7, 0($1) - daddiu $1, $zero, 64 - dsubu $6, $1, $6 - sltiu $1, $6, 8 - bnez $1, .LBB8_9 - nop - b .LBB8_8 - nop -.LBB8_6: - lw $1, 0($5) - sw $1, 0($sp) - lw $1, 4($5) - sw $1, 4($sp) - lw $1, 8($5) - sw $1, 8($sp) - lw $1, 12($5) - sw $1, 12($sp) - lw $1, 16($5) - sw $1, 16($sp) - lw $1, 20($5) - sw $1, 20($sp) - lw $1, 24($5) - sw $1, 24($sp) - lw $1, 28($5) - sw $1, 28($sp) - lw $1, 32($5) - sw $1, 32($sp) - lw $1, 36($5) - sw $1, 36($sp) - lw $1, 40($5) - sw $1, 40($sp) - lw $1, 44($5) - sw $1, 44($sp) - lw $1, 48($5) - sw $1, 48($sp) - lw $1, 52($5) - sw $1, 52($sp) - lw $1, 56($5) - sw $1, 56($sp) - lw $1, 60($5) - b .LBB8_12 - sw $1, 60($sp) -.LBB8_7: - daddiu $6, $zero, 64 - daddiu $2, $zero, 0 -.LBB8_8: - daddu $7, $5, $2 - daddu $1, $3, $2 - daddiu $6, $6, -8 - ld $7, 0($7) - sdl $7, 0($1) - sdr $7, 7($1) - sltiu $1, $6, 8 - beqz $1, .LBB8_8 - daddiu $2, $2, 8 -.LBB8_9: - beqz $6, .LBB8_12 - nop - daddu $5, $5, $2 - daddu $2, $3, $2 -.LBB8_11: - lb $1, 0($5) - daddiu $5, $5, 1 - daddiu $6, $6, -1 - sb $1, 0($2) - bnez $6, .LBB8_11 - daddiu $2, $2, 1 -.LBB8_12: - ld $1, 56($sp) - ld $2, 24($sp) + daddiu $sp, $sp, -128 + sd $22, 120($sp) + sd $21, 112($sp) + sd $20, 104($sp) + sd $19, 96($sp) + sd $18, 88($sp) + sd $17, 80($sp) + sd $16, 72($sp) + daddiu $1, $sp, 8 + #APP + lw $2, 0($5) + sw $2, 0($1) + #NO_APP + daddiu $8, $5, 52 + daddiu $10, $5, 48 + daddiu $12, $5, 44 + daddiu $14, $5, 40 + daddiu $24, $5, 36 + daddiu $16, $5, 32 + daddiu $18, $5, 28 + daddiu $20, $5, 24 + daddiu $2, $5, 4 + ori $3, $1, 4 + #APP + lw $6, 0($2) + sw $6, 0($3) + #NO_APP + daddiu $7, $1, 56 + daddiu $9, $1, 52 + daddiu $11, $1, 48 + daddiu $13, $1, 44 + daddiu $15, $1, 40 + daddiu $25, $1, 36 + daddiu $17, $1, 32 + daddiu $19, $1, 28 + daddiu $21, $1, 24 + daddiu $2, $5, 8 + daddiu $3, $1, 8 + #APP + lw $6, 0($2) + sw $6, 0($3) + #NO_APP + daddiu $2, $5, 12 + daddiu $3, $1, 12 + #APP + lw $6, 0($2) + sw $6, 0($3) + #NO_APP + daddiu $2, $5, 16 + daddiu $3, $1, 16 + #APP + lw $6, 0($2) + sw $6, 0($3) + #NO_APP + daddiu $3, $1, 60 + daddiu $2, $5, 60 + daddiu $6, $5, 56 + daddiu $5, $5, 20 + daddiu $1, $1, 20 + #APP + lw $22, 0($5) + sw $22, 0($1) + #NO_APP + #APP + lw $1, 0($20) + sw $1, 0($21) + #NO_APP + #APP + lw $1, 0($18) + sw $1, 0($19) + #NO_APP + #APP + lw $1, 0($16) + sw $1, 0($17) + #NO_APP + #APP + lw $1, 0($24) + sw $1, 0($25) + #NO_APP + #APP + lw $1, 0($14) + sw $1, 0($15) + #NO_APP + #APP + lw $1, 0($12) + sw $1, 0($13) + #NO_APP + #APP + lw $1, 0($10) + sw $1, 0($11) + #NO_APP + #APP + lw $1, 0($8) + sw $1, 0($9) + #NO_APP + #APP + lw $1, 0($6) + sw $1, 0($7) + #NO_APP + #APP + lw $1, 0($2) + sw $1, 0($3) + #NO_APP + ld $1, 8($sp) + ld $2, 40($sp) ld $3, 32($sp) - ld $5, 48($sp) - ld $6, 16($sp) - ld $7, 40($sp) - ld $8, 0($sp) - ld $9, 8($sp) - sdl $1, 56($4) - sdl $5, 48($4) - sdl $7, 40($4) - sdl $3, 32($4) - sdl $2, 24($4) - sdl $6, 16($4) - sdl $9, 8($4) - sdl $8, 0($4) - sdr $2, 31($4) - sdr $1, 63($4) - sdr $5, 55($4) - sdr $7, 47($4) - sdr $3, 39($4) - sdr $6, 23($4) - sdr $9, 15($4) - sdr $8, 7($4) + ld $5, 16($sp) + ld $6, 48($sp) + ld $7, 24($sp) + ld $8, 64($sp) + ld $9, 56($sp) + sdl $1, 0($4) + sdl $5, 8($4) + sdl $7, 16($4) + sdl $3, 24($4) + sdl $2, 32($4) + sdl $6, 40($4) + sdl $9, 48($4) + sdl $8, 56($4) + sdr $2, 39($4) + sdr $1, 7($4) + sdr $5, 15($4) + sdr $7, 23($4) + sdr $3, 31($4) + sdr $6, 47($4) + sdr $9, 55($4) + sdr $8, 63($4) sync + ld $16, 72($sp) + ld $17, 80($sp) + ld $18, 88($sp) + ld $19, 96($sp) + ld $20, 104($sp) + ld $21, 112($sp) + ld $22, 120($sp) move $2, $4 jr $ra - daddiu $sp, $sp, 64 + daddiu $sp, $sp, 128 asm_test::atomic_memcpy_load_align4::read_volatile_acquire_fence: lw $1, 0($5) lw $2, 4($5) diff --git a/tests/asm-test/asm/mips64-unknown-linux-gnuabi64/atomic_memcpy_load_align8 b/tests/asm-test/asm/mips64-unknown-linux-gnuabi64/atomic_memcpy_load_align8 index 3ab699c..aa7944f 100644 --- a/tests/asm-test/asm/mips64-unknown-linux-gnuabi64/atomic_memcpy_load_align8 +++ b/tests/asm-test/asm/mips64-unknown-linux-gnuabi64/atomic_memcpy_load_align8 @@ -1,24 +1,72 @@ asm_test::atomic_memcpy_load_align8::acquire: - ld $1, 0($5) - ld $2, 8($5) - ld $3, 16($5) - ld $6, 24($5) - ld $7, 32($5) - ld $8, 40($5) - ld $9, 48($5) - ld $5, 56($5) - sd $5, 56($4) - sd $9, 48($4) - sd $8, 40($4) - sd $7, 32($4) - sd $6, 24($4) + daddiu $sp, $sp, -64 + daddiu $1, $sp, 0 + #APP + ld $2, 0($5) + sd $2, 0($1) + #NO_APP + daddiu $2, $5, 8 + daddiu $3, $1, 8 + #APP + ld $6, 0($2) + sd $6, 0($3) + #NO_APP + daddiu $7, $1, 48 + daddiu $2, $5, 16 + daddiu $3, $1, 16 + #APP + ld $6, 0($2) + sd $6, 0($3) + #NO_APP + daddiu $2, $5, 24 + daddiu $3, $1, 24 + #APP + ld $6, 0($2) + sd $6, 0($3) + #NO_APP + daddiu $2, $5, 32 + daddiu $3, $1, 32 + #APP + ld $6, 0($2) + sd $6, 0($3) + #NO_APP + daddiu $3, $1, 56 + daddiu $2, $5, 56 + daddiu $6, $5, 48 + daddiu $5, $5, 40 + daddiu $1, $1, 40 + #APP + ld $8, 0($5) + sd $8, 0($1) + #NO_APP + #APP + ld $1, 0($6) + sd $1, 0($7) + #NO_APP + #APP + ld $1, 0($2) + sd $1, 0($3) + #NO_APP + ld $1, 32($sp) + ld $2, 24($sp) + ld $3, 16($sp) + ld $5, 8($sp) + ld $6, 0($sp) + sd $1, 32($4) + ld $1, 40($sp) + sd $2, 24($4) + sd $6, 0($4) + sd $5, 8($4) sd $3, 16($4) - sd $2, 8($4) move $2, $4 - sd $1, 0($4) + sd $1, 40($4) + ld $1, 48($sp) + sd $1, 48($4) + ld $1, 56($sp) + sd $1, 56($4) sync jr $ra - nop + daddiu $sp, $sp, 64 asm_test::atomic_memcpy_load_align8::read_volatile_acquire_fence: ld $1, 0($5) ld $2, 8($5) diff --git a/tests/asm-test/asm/mips64-unknown-linux-gnuabi64/atomic_memcpy_store_align1 b/tests/asm-test/asm/mips64-unknown-linux-gnuabi64/atomic_memcpy_store_align1 index c2003c3..05fdfa0 100644 --- a/tests/asm-test/asm/mips64-unknown-linux-gnuabi64/atomic_memcpy_store_align1 +++ b/tests/asm-test/asm/mips64-unknown-linux-gnuabi64/atomic_memcpy_store_align1 @@ -25,88 +25,35 @@ asm_test::atomic_memcpy_store_align1::release: ldr $1, 7($5) sd $1, 0($sp) sync - ld $1, 0($sp) - ld $2, 8($sp) - ld $3, 16($sp) - ld $5, 24($sp) - ld $6, 32($sp) - ld $7, 40($sp) - ld $8, 48($sp) - ld $9, 56($sp) - sd $2, 72($sp) - sd $1, 64($sp) - daddiu $1, $4, 7 - daddiu $2, $zero, -8 - sd $9, 120($sp) - sd $8, 112($sp) - sd $7, 104($sp) - sd $6, 96($sp) - sd $5, 88($sp) - and $1, $1, $2 - dsubu $2, $1, $4 - sltiu $1, $2, 65 - beqz $1, .LBB2_6 - sd $3, 80($sp) - beqz $2, .LBB2_9 - nop - daddiu $3, $zero, 0 - daddiu $5, $sp, 64 -.LBB2_3: - daddu $6, $5, $3 - daddu $1, $4, $3 - daddiu $3, $3, 1 - lbu $6, 0($6) - bne $2, $3, .LBB2_3 - sb $6, 0($1) - daddiu $1, $zero, 64 - dsubu $3, $1, $3 - sltiu $1, $3, 8 - bnez $1, .LBB2_12 - nop - b .LBB2_10 - nop -.LBB2_6: + ld $1, 16($sp) + ld $2, 24($sp) + ld $3, 32($sp) + ld $5, 40($sp) + ld $6, 48($sp) + ld $7, 56($sp) + sd $1, 80($sp) + ld $1, 8($sp) + sd $5, 104($sp) + sd $3, 96($sp) + sd $2, 88($sp) daddiu $2, $zero, 0 daddiu $3, $sp, 64 daddiu $5, $zero, 64 -.LBB2_7: - daddu $6, $3, $2 + sd $7, 120($sp) + sd $6, 112($sp) + sd $1, 72($sp) + ld $1, 0($sp) + sd $1, 64($sp) +.LBB2_1: daddu $1, $4, $2 + daddu $6, $3, $2 + #APP + lbu $7, 0($6) + sb $7, 0($1) + #NO_APP daddiu $2, $2, 1 - lbu $6, 0($6) - bne $2, $5, .LBB2_7 - sb $6, 0($1) - b .LBB2_15 + bne $2, $5, .LBB2_1 nop -.LBB2_9: - daddiu $3, $zero, 64 - daddiu $2, $zero, 0 -.LBB2_10: - daddiu $5, $sp, 64 -.LBB2_11: - daddu $1, $5, $2 - daddiu $3, $3, -8 - ldl $6, 0($1) - ldr $6, 7($1) - daddu $1, $4, $2 - sd $6, 0($1) - sltiu $1, $3, 8 - beqz $1, .LBB2_11 - daddiu $2, $2, 8 -.LBB2_12: - beqz $3, .LBB2_15 - nop - daddiu $1, $sp, 64 - daddu $5, $1, $2 - daddu $2, $4, $2 -.LBB2_14: - lbu $1, 0($5) - daddiu $5, $5, 1 - daddiu $3, $3, -1 - sb $1, 0($2) - bnez $3, .LBB2_14 - daddiu $2, $2, 1 -.LBB2_15: jr $ra daddiu $sp, $sp, 128 asm_test::atomic_memcpy_store_align1::write_volatile_release_fence: diff --git a/tests/asm-test/asm/mips64-unknown-linux-gnuabi64/atomic_memcpy_store_align16 b/tests/asm-test/asm/mips64-unknown-linux-gnuabi64/atomic_memcpy_store_align16 index 7141113..4343d46 100644 --- a/tests/asm-test/asm/mips64-unknown-linux-gnuabi64/atomic_memcpy_store_align16 +++ b/tests/asm-test/asm/mips64-unknown-linux-gnuabi64/atomic_memcpy_store_align16 @@ -1,22 +1,87 @@ asm_test::atomic_memcpy_store_align16::release: - ld $1, 56($5) - ld $2, 48($5) + daddiu $sp, $sp, -128 + ld $1, 24($5) + ld $2, 32($5) ld $3, 40($5) - ld $6, 32($5) - ld $7, 24($5) - ld $8, 16($5) - ld $9, 8($5) - ld $5, 0($5) + ld $6, 48($5) + ld $7, 56($5) + daddiu $9, $4, 32 + daddiu $11, $4, 24 + sd $1, 24($sp) + ld $1, 16($5) + sd $7, 56($sp) + sd $6, 48($sp) + sd $3, 40($sp) + sd $2, 32($sp) + sd $1, 16($sp) + ld $1, 8($5) + sd $1, 8($sp) + ld $1, 0($5) + sd $1, 0($sp) sync - sd $5, 0($4) - sd $9, 8($4) - sd $8, 16($4) - sd $7, 24($4) - sd $6, 32($4) - sd $3, 40($4) - sd $2, 48($4) + ld $1, 16($sp) + ld $2, 24($sp) + ld $3, 32($sp) + ld $5, 40($sp) + ld $6, 48($sp) + ld $7, 56($sp) + sd $1, 80($sp) + ld $1, 8($sp) + sd $7, 120($sp) + sd $6, 112($sp) + sd $5, 104($sp) + sd $3, 96($sp) + sd $2, 88($sp) + daddiu $7, $4, 40 + sd $1, 72($sp) + ld $1, 0($sp) + sd $1, 64($sp) + daddiu $1, $sp, 64 + #APP + ld $2, 0($1) + sd $2, 0($4) + #NO_APP + ori $3, $1, 8 + daddiu $6, $1, 48 + daddiu $8, $1, 40 + daddiu $10, $1, 32 + daddiu $12, $1, 24 + daddiu $2, $4, 8 + #APP + ld $5, 0($3) + sd $5, 0($2) + #NO_APP + daddiu $3, $1, 56 + daddiu $1, $1, 16 + daddiu $2, $4, 56 + daddiu $5, $4, 48 + daddiu $4, $4, 16 + #APP + ld $13, 0($1) + sd $13, 0($4) + #NO_APP + #APP + ld $1, 0($12) + sd $1, 0($11) + #NO_APP + #APP + ld $1, 0($10) + sd $1, 0($9) + #NO_APP + #APP + ld $1, 0($8) + sd $1, 0($7) + #NO_APP + #APP + ld $1, 0($6) + sd $1, 0($5) + #NO_APP + #APP + ld $1, 0($3) + sd $1, 0($2) + #NO_APP jr $ra - sd $1, 56($4) + daddiu $sp, $sp, 128 asm_test::atomic_memcpy_store_align16::write_volatile_release_fence: daddiu $sp, $sp, -64 sync diff --git a/tests/asm-test/asm/mips64-unknown-linux-gnuabi64/atomic_memcpy_store_align2 b/tests/asm-test/asm/mips64-unknown-linux-gnuabi64/atomic_memcpy_store_align2 index e097853..624e039 100644 --- a/tests/asm-test/asm/mips64-unknown-linux-gnuabi64/atomic_memcpy_store_align2 +++ b/tests/asm-test/asm/mips64-unknown-linux-gnuabi64/atomic_memcpy_store_align2 @@ -1,163 +1,265 @@ asm_test::atomic_memcpy_store_align2::release: - daddiu $sp, $sp, -128 + daddiu $sp, $sp, -240 + sd $ra, 232($sp) + sd $fp, 224($sp) + sd $gp, 216($sp) + sd $23, 208($sp) + sd $22, 200($sp) + sd $21, 192($sp) + sd $20, 184($sp) + sd $19, 176($sp) + sd $18, 168($sp) + sd $17, 160($sp) + sd $16, 152($sp) ldl $1, 40($5) ldl $2, 32($5) ldl $3, 24($5) ldl $6, 16($5) ldl $7, 48($5) ldl $8, 56($5) + daddiu $25, $4, 2 + daddiu $24, $4, 4 + daddiu $11, $4, 6 + daddiu $13, $4, 8 + daddiu $19, $4, 10 + daddiu $18, $4, 12 + daddiu $17, $4, 14 + daddiu $16, $4, 16 + daddiu $gp, $4, 18 + daddiu $23, $4, 20 + daddiu $22, $4, 22 + daddiu $21, $4, 24 + daddiu $20, $4, 26 + daddiu $ra, $4, 30 + daddiu $fp, $4, 32 + daddiu $9, $4, 50 ldr $1, 47($5) ldr $6, 23($5) ldr $3, 31($5) ldr $2, 39($5) ldr $7, 55($5) ldr $8, 63($5) - sd $1, 40($sp) + sd $1, 64($sp) ldl $1, 8($5) - sd $8, 56($sp) - sd $7, 48($sp) - sd $2, 32($sp) - sd $3, 24($sp) - sd $6, 16($sp) + sd $8, 80($sp) + sd $7, 72($sp) + sd $2, 56($sp) + sd $3, 48($sp) + sd $6, 40($sp) + daddiu $8, $4, 52 ldr $1, 15($5) - sd $1, 8($sp) + sd $1, 32($sp) ldl $1, 0($5) ldr $1, 7($5) - sd $1, 0($sp) + sd $1, 24($sp) sync - ld $1, 0($sp) + ld $1, 40($sp) + ld $2, 48($sp) + ld $3, 56($sp) + ld $5, 64($sp) + ld $6, 72($sp) + ld $7, 80($sp) + sd $1, 104($sp) + ld $1, 32($sp) + sd $7, 144($sp) + sd $6, 136($sp) + sd $5, 128($sp) + sd $3, 120($sp) + sd $2, 112($sp) + daddiu $3, $4, 62 + daddiu $5, $4, 58 + daddiu $6, $4, 56 + daddiu $7, $4, 54 + sd $1, 96($sp) + ld $1, 24($sp) + sd $1, 88($sp) + daddiu $1, $sp, 88 + #APP + lhu $2, 0($1) + sh $2, 0($4) + #NO_APP + sd $3, 16($sp) + daddiu $3, $4, 60 + ori $10, $1, 2 + ori $12, $1, 4 + ori $15, $1, 6 + daddiu $14, $1, 8 + sd $3, 8($sp) + daddiu $2, $4, 28 + #APP + lhu $3, 0($10) + sh $3, 0($25) + #NO_APP + daddiu $10, $4, 48 + #APP + lhu $25, 0($12) + sh $25, 0($24) + #NO_APP + daddiu $3, $4, 46 + daddiu $12, $4, 44 + daddiu $24, $4, 42 + #APP + lhu $25, 0($15) + sh $25, 0($11) + #NO_APP + daddiu $11, $4, 40 + daddiu $15, $4, 38 + #APP + lhu $25, 0($14) + sh $25, 0($13) + #NO_APP + daddiu $14, $1, 10 + daddiu $13, $4, 36 + daddiu $4, $4, 34 + #APP + lhu $25, 0($14) + sh $25, 0($19) + #NO_APP + daddiu $14, $1, 16 + daddiu $25, $1, 12 + #APP + lhu $19, 0($25) + sh $19, 0($18) + #NO_APP + daddiu $18, $1, 28 + daddiu $25, $1, 14 + #APP + lhu $19, 0($25) + sh $19, 0($17) + #NO_APP + daddiu $17, $1, 24 + #APP + lhu $19, 0($14) + sh $19, 0($16) + #NO_APP + daddiu $16, $1, 18 + daddiu $14, $1, 22 + daddiu $25, $1, 26 + #APP + lhu $19, 0($16) + sh $19, 0($gp) + #NO_APP + daddiu $16, $1, 20 + #APP + lhu $gp, 0($16) + sh $gp, 0($23) + #NO_APP + daddiu $16, $1, 30 + daddiu $19, $1, 32 + daddiu $23, $1, 62 + #APP + lhu $gp, 0($14) + sh $gp, 0($22) + #NO_APP + daddiu $14, $1, 60 + daddiu $22, $1, 58 + #APP + lhu $gp, 0($17) + sh $gp, 0($21) + #NO_APP + daddiu $17, $1, 56 + daddiu $21, $1, 54 + #APP + lhu $gp, 0($25) + sh $gp, 0($20) + #NO_APP + daddiu $25, $1, 52 + daddiu $20, $1, 50 + #APP + lhu $gp, 0($18) + sh $gp, 0($2) + #NO_APP + daddiu $2, $1, 48 + daddiu $18, $1, 46 + #APP + lhu $gp, 0($16) + sh $gp, 0($ra) + #NO_APP + daddiu $16, $1, 44 + #APP + lhu $ra, 0($19) + sh $ra, 0($fp) + #NO_APP + daddiu $fp, $1, 34 + daddiu $gp, $1, 42 + daddiu $19, $1, 40 + #APP + lhu $ra, 0($fp) + sh $ra, 0($4) + #NO_APP + daddiu $4, $1, 38 + daddiu $1, $1, 36 + #APP + lhu $fp, 0($1) + sh $fp, 0($13) + #NO_APP + #APP + lhu $1, 0($4) + sh $1, 0($15) + #NO_APP + #APP + lhu $1, 0($19) + sh $1, 0($11) + #NO_APP + #APP + lhu $1, 0($gp) + sh $1, 0($24) + #NO_APP + #APP + lhu $1, 0($16) + sh $1, 0($12) + #NO_APP + #APP + lhu $1, 0($18) + sh $1, 0($3) + #NO_APP + #APP + lhu $1, 0($2) + sh $1, 0($10) + #NO_APP + #APP + lhu $1, 0($20) + sh $1, 0($9) + #NO_APP + #APP + lhu $1, 0($25) + sh $1, 0($8) + #NO_APP + #APP + lhu $1, 0($21) + sh $1, 0($7) + #NO_APP + #APP + lhu $1, 0($17) + sh $1, 0($6) + #NO_APP + #APP + lhu $1, 0($22) + sh $1, 0($5) + #NO_APP ld $2, 8($sp) - ld $3, 16($sp) - ld $5, 24($sp) - ld $6, 32($sp) - ld $7, 40($sp) - ld $8, 48($sp) - ld $9, 56($sp) - sd $2, 72($sp) - sd $1, 64($sp) - daddiu $1, $4, 7 - daddiu $2, $zero, -8 - sd $9, 120($sp) - sd $8, 112($sp) - sd $7, 104($sp) - sd $6, 96($sp) - sd $5, 88($sp) - and $1, $1, $2 - dsubu $2, $1, $4 - sltiu $1, $2, 65 - beqz $1, .LBB6_6 - sd $3, 80($sp) - beqz $2, .LBB6_7 - daddiu $5, $sp, 64 - daddiu $3, $zero, 0 -.LBB6_3: - daddu $6, $5, $3 - daddu $1, $4, $3 - daddiu $3, $3, 1 - lbu $6, 0($6) - bne $2, $3, .LBB6_3 - sb $6, 0($1) - daddiu $1, $zero, 64 - dsubu $3, $1, $3 - sltiu $1, $3, 8 - bnez $1, .LBB6_9 - nop - b .LBB6_8 - nop -.LBB6_6: - lhu $1, 64($sp) - sh $1, 0($4) - lhu $1, 66($sp) - sh $1, 2($4) - lhu $1, 68($sp) - sh $1, 4($4) - lhu $1, 70($sp) - sh $1, 6($4) - lhu $1, 72($sp) - sh $1, 8($4) - lhu $1, 74($sp) - sh $1, 10($4) - lhu $1, 76($sp) - sh $1, 12($4) - lhu $1, 78($sp) - sh $1, 14($4) - lhu $1, 80($sp) - sh $1, 16($4) - lhu $1, 82($sp) - sh $1, 18($4) - lhu $1, 84($sp) - sh $1, 20($4) - lhu $1, 86($sp) - sh $1, 22($4) - lhu $1, 88($sp) - sh $1, 24($4) - lhu $1, 90($sp) - sh $1, 26($4) - lhu $1, 92($sp) - sh $1, 28($4) - lhu $1, 94($sp) - sh $1, 30($4) - lhu $1, 96($sp) - sh $1, 32($4) - lhu $1, 98($sp) - sh $1, 34($4) - lhu $1, 100($sp) - sh $1, 36($4) - lhu $1, 102($sp) - sh $1, 38($4) - lhu $1, 104($sp) - sh $1, 40($4) - lhu $1, 106($sp) - sh $1, 42($4) - lhu $1, 108($sp) - sh $1, 44($4) - lhu $1, 110($sp) - sh $1, 46($4) - lhu $1, 112($sp) - sh $1, 48($4) - lhu $1, 114($sp) - sh $1, 50($4) - lhu $1, 116($sp) - sh $1, 52($4) - lhu $1, 118($sp) - sh $1, 54($4) - lhu $1, 120($sp) - sh $1, 56($4) - lhu $1, 122($sp) - sh $1, 58($4) - lhu $1, 124($sp) - sh $1, 60($4) - lhu $1, 126($sp) - b .LBB6_12 - sh $1, 62($4) -.LBB6_7: - daddiu $3, $zero, 64 - daddiu $2, $zero, 0 -.LBB6_8: - daddu $1, $5, $2 - daddiu $3, $3, -8 - ldl $6, 0($1) - ldr $6, 7($1) - daddu $1, $4, $2 - sd $6, 0($1) - sltiu $1, $3, 8 - beqz $1, .LBB6_8 - daddiu $2, $2, 8 -.LBB6_9: - beqz $3, .LBB6_12 - nop - daddu $5, $5, $2 - daddu $2, $4, $2 -.LBB6_11: - lbu $1, 0($5) - daddiu $5, $5, 1 - daddiu $3, $3, -1 - sb $1, 0($2) - bnez $3, .LBB6_11 - daddiu $2, $2, 1 -.LBB6_12: + #APP + lhu $1, 0($14) + sh $1, 0($2) + #NO_APP + ld $2, 16($sp) + #APP + lhu $1, 0($23) + sh $1, 0($2) + #NO_APP + ld $16, 152($sp) + ld $17, 160($sp) + ld $18, 168($sp) + ld $19, 176($sp) + ld $20, 184($sp) + ld $21, 192($sp) + ld $22, 200($sp) + ld $23, 208($sp) + ld $gp, 216($sp) + ld $fp, 224($sp) + ld $ra, 232($sp) jr $ra - daddiu $sp, $sp, 128 + daddiu $sp, $sp, 240 asm_test::atomic_memcpy_store_align2::write_volatile_release_fence: daddiu $sp, $sp, -64 sync diff --git a/tests/asm-test/asm/mips64-unknown-linux-gnuabi64/atomic_memcpy_store_align4 b/tests/asm-test/asm/mips64-unknown-linux-gnuabi64/atomic_memcpy_store_align4 index 7d2f7bb..5e8a20a 100644 --- a/tests/asm-test/asm/mips64-unknown-linux-gnuabi64/atomic_memcpy_store_align4 +++ b/tests/asm-test/asm/mips64-unknown-linux-gnuabi64/atomic_memcpy_store_align4 @@ -1,131 +1,169 @@ asm_test::atomic_memcpy_store_align4::release: - daddiu $sp, $sp, -128 + daddiu $sp, $sp, -240 + sd $ra, 232($sp) + sd $fp, 224($sp) + sd $gp, 216($sp) + sd $23, 208($sp) + sd $22, 200($sp) + sd $21, 192($sp) + sd $20, 184($sp) + sd $19, 176($sp) + sd $18, 168($sp) + sd $17, 160($sp) + sd $16, 152($sp) ldl $1, 40($5) ldl $2, 32($5) ldl $3, 24($5) ldl $6, 16($5) ldl $7, 48($5) ldl $8, 56($5) + daddiu $9, $4, 48 + daddiu $11, $4, 44 + daddiu $13, $4, 40 + daddiu $15, $4, 36 + daddiu $25, $4, 32 + daddiu $17, $4, 28 + daddiu $19, $4, 24 + daddiu $21, $4, 20 + daddiu $23, $4, 16 + daddiu $gp, $4, 12 + daddiu $fp, $4, 8 ldr $1, 47($5) ldr $6, 23($5) ldr $3, 31($5) ldr $2, 39($5) ldr $7, 55($5) ldr $8, 63($5) - sd $1, 40($sp) + sd $1, 64($sp) ldl $1, 8($5) - sd $8, 56($sp) - sd $7, 48($sp) - sd $2, 32($sp) - sd $3, 24($sp) - sd $6, 16($sp) + sd $8, 80($sp) + sd $7, 72($sp) + sd $2, 56($sp) + sd $3, 48($sp) + sd $6, 40($sp) ldr $1, 15($5) - sd $1, 8($sp) + sd $1, 32($sp) ldl $1, 0($5) ldr $1, 7($5) - sd $1, 0($sp) + sd $1, 24($sp) sync - ld $1, 0($sp) - ld $2, 8($sp) - ld $3, 16($sp) - ld $5, 24($sp) - ld $6, 32($sp) - ld $7, 40($sp) - ld $8, 48($sp) - ld $9, 56($sp) - sd $2, 72($sp) - sd $1, 64($sp) - daddiu $1, $4, 7 - daddiu $2, $zero, -8 - sd $9, 120($sp) - sd $8, 112($sp) - sd $7, 104($sp) - sd $6, 96($sp) - sd $5, 88($sp) - and $1, $1, $2 - dsubu $2, $1, $4 - sltiu $1, $2, 65 - beqz $1, .LBB10_6 - sd $3, 80($sp) - beqz $2, .LBB10_7 - daddiu $5, $sp, 64 - daddiu $3, $zero, 0 -.LBB10_3: - daddu $6, $5, $3 - daddu $1, $4, $3 - daddiu $3, $3, 1 - lbu $6, 0($6) - bne $2, $3, .LBB10_3 - sb $6, 0($1) - daddiu $1, $zero, 64 - dsubu $3, $1, $3 - sltiu $1, $3, 8 - bnez $1, .LBB10_9 - nop - b .LBB10_8 - nop -.LBB10_6: - lw $1, 64($sp) - sw $1, 0($4) - lw $1, 68($sp) - sw $1, 4($4) - lw $1, 72($sp) - sw $1, 8($4) - lw $1, 76($sp) - sw $1, 12($4) - lw $1, 80($sp) - sw $1, 16($4) - lw $1, 84($sp) - sw $1, 20($4) - lw $1, 88($sp) - sw $1, 24($4) - lw $1, 92($sp) - sw $1, 28($4) - lw $1, 96($sp) - sw $1, 32($4) - lw $1, 100($sp) - sw $1, 36($4) - lw $1, 104($sp) - sw $1, 40($4) - lw $1, 108($sp) - sw $1, 44($4) - lw $1, 112($sp) - sw $1, 48($4) - lw $1, 116($sp) - sw $1, 52($4) - lw $1, 120($sp) - sw $1, 56($4) - lw $1, 124($sp) - b .LBB10_12 - sw $1, 60($4) -.LBB10_7: - daddiu $3, $zero, 64 - daddiu $2, $zero, 0 -.LBB10_8: - daddu $1, $5, $2 - daddiu $3, $3, -8 - ldl $6, 0($1) - ldr $6, 7($1) - daddu $1, $4, $2 - sd $6, 0($1) - sltiu $1, $3, 8 - beqz $1, .LBB10_8 - daddiu $2, $2, 8 -.LBB10_9: - beqz $3, .LBB10_12 - nop - daddu $5, $5, $2 - daddu $2, $4, $2 -.LBB10_11: - lbu $1, 0($5) - daddiu $5, $5, 1 - daddiu $3, $3, -1 - sb $1, 0($2) - bnez $3, .LBB10_11 - daddiu $2, $2, 1 -.LBB10_12: + ld $1, 40($sp) + ld $2, 48($sp) + ld $3, 56($sp) + ld $5, 64($sp) + ld $6, 72($sp) + ld $7, 80($sp) + sd $1, 104($sp) + ld $1, 32($sp) + sd $7, 144($sp) + sd $5, 128($sp) + sd $6, 136($sp) + sd $3, 120($sp) + sd $2, 112($sp) + daddiu $5, $4, 56 + daddiu $7, $4, 52 + sd $1, 96($sp) + ld $1, 24($sp) + sd $1, 88($sp) + daddiu $1, $sp, 88 + #APP + lw $2, 0($1) + sw $2, 0($4) + #NO_APP + daddiu $6, $1, 56 + daddiu $8, $1, 52 + daddiu $10, $1, 48 + daddiu $12, $1, 44 + daddiu $14, $1, 40 + daddiu $24, $1, 36 + daddiu $16, $1, 32 + daddiu $18, $1, 28 + daddiu $20, $1, 24 + daddiu $22, $1, 20 + daddiu $ra, $1, 16 + daddiu $2, $4, 60 + daddiu $4, $4, 4 + sd $2, 16($sp) + daddiu $2, $1, 60 + sd $2, 8($sp) + ori $2, $1, 4 + #APP + lw $3, 0($2) + sw $3, 0($4) + #NO_APP + daddiu $2, $1, 12 + daddiu $1, $1, 8 + #APP + lw $3, 0($1) + sw $3, 0($fp) + #NO_APP + #APP + lw $1, 0($2) + sw $1, 0($gp) + #NO_APP + #APP + lw $1, 0($ra) + sw $1, 0($23) + #NO_APP + #APP + lw $1, 0($22) + sw $1, 0($21) + #NO_APP + #APP + lw $1, 0($20) + sw $1, 0($19) + #NO_APP + #APP + lw $1, 0($18) + sw $1, 0($17) + #NO_APP + #APP + lw $1, 0($16) + sw $1, 0($25) + #NO_APP + #APP + lw $1, 0($24) + sw $1, 0($15) + #NO_APP + #APP + lw $1, 0($14) + sw $1, 0($13) + #NO_APP + #APP + lw $1, 0($12) + sw $1, 0($11) + #NO_APP + #APP + lw $1, 0($10) + sw $1, 0($9) + #NO_APP + #APP + lw $1, 0($8) + sw $1, 0($7) + #NO_APP + #APP + lw $1, 0($6) + sw $1, 0($5) + #NO_APP + ld $2, 16($sp) + ld $3, 8($sp) + #APP + lw $1, 0($3) + sw $1, 0($2) + #NO_APP + ld $16, 152($sp) + ld $17, 160($sp) + ld $18, 168($sp) + ld $19, 176($sp) + ld $20, 184($sp) + ld $21, 192($sp) + ld $22, 200($sp) + ld $23, 208($sp) + ld $gp, 216($sp) + ld $fp, 224($sp) + ld $ra, 232($sp) jr $ra - daddiu $sp, $sp, 128 + daddiu $sp, $sp, 240 asm_test::atomic_memcpy_store_align4::write_volatile_release_fence: daddiu $sp, $sp, -64 sync diff --git a/tests/asm-test/asm/mips64-unknown-linux-gnuabi64/atomic_memcpy_store_align8 b/tests/asm-test/asm/mips64-unknown-linux-gnuabi64/atomic_memcpy_store_align8 index 545dc92..649287f 100644 --- a/tests/asm-test/asm/mips64-unknown-linux-gnuabi64/atomic_memcpy_store_align8 +++ b/tests/asm-test/asm/mips64-unknown-linux-gnuabi64/atomic_memcpy_store_align8 @@ -1,22 +1,87 @@ asm_test::atomic_memcpy_store_align8::release: - ld $1, 56($5) - ld $2, 48($5) + daddiu $sp, $sp, -128 + ld $1, 24($5) + ld $2, 32($5) ld $3, 40($5) - ld $6, 32($5) - ld $7, 24($5) - ld $8, 16($5) - ld $9, 8($5) - ld $5, 0($5) + ld $6, 48($5) + ld $7, 56($5) + daddiu $9, $4, 32 + daddiu $11, $4, 24 + sd $1, 24($sp) + ld $1, 16($5) + sd $7, 56($sp) + sd $6, 48($sp) + sd $3, 40($sp) + sd $2, 32($sp) + sd $1, 16($sp) + ld $1, 8($5) + sd $1, 8($sp) + ld $1, 0($5) + sd $1, 0($sp) sync - sd $5, 0($4) - sd $9, 8($4) - sd $8, 16($4) - sd $7, 24($4) - sd $6, 32($4) - sd $3, 40($4) - sd $2, 48($4) + ld $1, 16($sp) + ld $2, 24($sp) + ld $3, 32($sp) + ld $5, 40($sp) + ld $6, 48($sp) + ld $7, 56($sp) + sd $1, 80($sp) + ld $1, 8($sp) + sd $7, 120($sp) + sd $6, 112($sp) + sd $5, 104($sp) + sd $3, 96($sp) + sd $2, 88($sp) + daddiu $7, $4, 40 + sd $1, 72($sp) + ld $1, 0($sp) + sd $1, 64($sp) + daddiu $1, $sp, 64 + #APP + ld $2, 0($1) + sd $2, 0($4) + #NO_APP + daddiu $3, $1, 8 + daddiu $6, $1, 48 + daddiu $8, $1, 40 + daddiu $10, $1, 32 + daddiu $12, $1, 24 + daddiu $2, $4, 8 + #APP + ld $5, 0($3) + sd $5, 0($2) + #NO_APP + daddiu $3, $1, 56 + daddiu $1, $1, 16 + daddiu $2, $4, 56 + daddiu $5, $4, 48 + daddiu $4, $4, 16 + #APP + ld $13, 0($1) + sd $13, 0($4) + #NO_APP + #APP + ld $1, 0($12) + sd $1, 0($11) + #NO_APP + #APP + ld $1, 0($10) + sd $1, 0($9) + #NO_APP + #APP + ld $1, 0($8) + sd $1, 0($7) + #NO_APP + #APP + ld $1, 0($6) + sd $1, 0($5) + #NO_APP + #APP + ld $1, 0($3) + sd $1, 0($2) + #NO_APP jr $ra - sd $1, 56($4) + daddiu $sp, $sp, 128 asm_test::atomic_memcpy_store_align8::write_volatile_release_fence: daddiu $sp, $sp, -64 sync diff --git a/tests/asm-test/asm/mips64el-unknown-linux-gnuabi64/atomic_memcpy_load_align1 b/tests/asm-test/asm/mips64el-unknown-linux-gnuabi64/atomic_memcpy_load_align1 index 30b2234..4f9ac43 100644 --- a/tests/asm-test/asm/mips64el-unknown-linux-gnuabi64/atomic_memcpy_load_align1 +++ b/tests/asm-test/asm/mips64el-unknown-linux-gnuabi64/atomic_memcpy_load_align1 @@ -1,98 +1,18 @@ asm_test::atomic_memcpy_load_align1::acquire: daddiu $sp, $sp, -64 - daddiu $1, $5, 7 - daddiu $2, $zero, -8 - and $1, $1, $2 - dsubu $2, $1, $5 - sltiu $1, $2, 65 - beqz $1, .LBB0_6 - nop - beqz $2, .LBB0_9 - nop - daddiu $3, $zero, 0 - daddiu $6, $sp, 0 -.LBB0_3: - daddu $7, $5, $3 - daddu $1, $6, $3 - daddiu $3, $3, 1 - lb $7, 0($7) - bne $2, $3, .LBB0_3 - sb $7, 0($1) - daddiu $1, $zero, 64 - dsubu $3, $1, $3 - sltiu $1, $3, 8 - bnez $1, .LBB0_12 - nop - b .LBB0_10 - nop -.LBB0_6: - daddiu $2, $zero, 0 + daddiu $2, $zero, 63 daddiu $3, $sp, 0 - daddiu $6, $zero, 64 -.LBB0_7: - daddu $7, $5, $2 - daddu $1, $3, $2 - daddiu $2, $2, 1 - lb $7, 0($7) - bne $2, $6, .LBB0_7 - sb $7, 0($1) - ld $1, 56($sp) - ld $2, 24($sp) - ld $3, 32($sp) - ld $5, 48($sp) - ld $6, 16($sp) - ld $7, 40($sp) - ld $8, 0($sp) - ld $9, 8($sp) - sdl $1, 63($4) - sdl $5, 55($4) - sdl $7, 47($4) - sdl $3, 39($4) - sdl $2, 31($4) - sdl $6, 23($4) - sdl $9, 15($4) - sdl $8, 7($4) - sdr $1, 56($4) - sdr $5, 48($4) - sdr $7, 40($4) - sdr $3, 32($4) - sdr $2, 24($4) - sdr $6, 16($4) - sdr $9, 8($4) - sdr $8, 0($4) - sync - move $2, $4 - jr $ra - daddiu $sp, $sp, 64 -.LBB0_9: - daddiu $3, $zero, 64 - daddiu $2, $zero, 0 -.LBB0_10: - daddiu $6, $sp, 0 -.LBB0_11: - daddu $7, $5, $2 - daddu $1, $6, $2 - daddiu $3, $3, -8 - ld $7, 0($7) - sdl $7, 7($1) - sdr $7, 0($1) - sltiu $1, $3, 8 - beqz $1, .LBB0_11 - daddiu $2, $2, 8 -.LBB0_12: - beqz $3, .LBB0_15 + daddiu $6, $zero, -1 +.LBB0_1: + daddu $1, $5, $2 + daddu $7, $3, $2 + #APP + lb $8, 0($1) + sb $8, 0($7) + #NO_APP + daddiu $2, $2, -1 + bne $2, $6, .LBB0_1 nop - daddiu $1, $sp, 0 - daddu $5, $5, $2 - daddu $2, $1, $2 -.LBB0_14: - lb $1, 0($5) - daddiu $5, $5, 1 - daddiu $3, $3, -1 - sb $1, 0($2) - bnez $3, .LBB0_14 - daddiu $2, $2, 1 -.LBB0_15: ld $1, 56($sp) ld $2, 24($sp) ld $3, 32($sp) @@ -109,11 +29,11 @@ asm_test::atomic_memcpy_load_align1::acquire: sdl $6, 23($4) sdl $9, 15($4) sdl $8, 7($4) + sdr $2, 24($4) sdr $1, 56($4) sdr $5, 48($4) sdr $7, 40($4) sdr $3, 32($4) - sdr $2, 24($4) sdr $6, 16($4) sdr $9, 8($4) sdr $8, 0($4) diff --git a/tests/asm-test/asm/mips64el-unknown-linux-gnuabi64/atomic_memcpy_load_align16 b/tests/asm-test/asm/mips64el-unknown-linux-gnuabi64/atomic_memcpy_load_align16 index 8c1bf3c..042e605 100644 --- a/tests/asm-test/asm/mips64el-unknown-linux-gnuabi64/atomic_memcpy_load_align16 +++ b/tests/asm-test/asm/mips64el-unknown-linux-gnuabi64/atomic_memcpy_load_align16 @@ -1,24 +1,72 @@ asm_test::atomic_memcpy_load_align16::acquire: - ld $1, 56($5) - ld $2, 48($5) - ld $3, 40($5) - ld $6, 32($5) - ld $7, 24($5) - ld $8, 16($5) - ld $9, 8($5) - ld $5, 0($5) - sd $1, 56($4) - sd $2, 48($4) + daddiu $sp, $sp, -64 + daddiu $2, $sp, 0 + daddiu $1, $5, 56 + daddiu $3, $2, 56 + #APP + ld $6, 0($1) + sd $6, 0($3) + #NO_APP + daddiu $1, $5, 48 + daddiu $7, $2, 16 + daddiu $3, $2, 48 + #APP + ld $6, 0($1) + sd $6, 0($3) + #NO_APP + daddiu $1, $5, 40 + daddiu $3, $2, 40 + #APP + ld $6, 0($1) + sd $6, 0($3) + #NO_APP + daddiu $1, $5, 32 + daddiu $3, $2, 32 + #APP + ld $6, 0($1) + sd $6, 0($3) + #NO_APP + daddiu $1, $5, 24 + daddiu $3, $2, 24 + #APP + ld $6, 0($1) + sd $6, 0($3) + #NO_APP + daddiu $1, $5, 8 + daddiu $6, $5, 16 + #APP + ld $8, 0($6) + sd $8, 0($7) + #NO_APP + ori $3, $2, 8 + #APP + ld $6, 0($1) + sd $6, 0($3) + #NO_APP + #APP + ld $1, 0($5) + sd $1, 0($2) + #NO_APP + ld $1, 32($sp) + ld $2, 24($sp) + ld $3, 16($sp) + ld $5, 8($sp) + ld $6, 0($sp) + sd $1, 32($4) + ld $1, 40($sp) + sd $2, 24($4) + sd $6, 0($4) + sd $5, 8($4) + sd $3, 16($4) move $2, $4 - sd $3, 40($4) - sd $6, 32($4) - sd $7, 24($4) - sd $8, 16($4) - sd $9, 8($4) - sd $5, 0($4) + sd $1, 40($4) + ld $1, 48($sp) + sd $1, 48($4) + ld $1, 56($sp) + sd $1, 56($4) sync jr $ra - nop + daddiu $sp, $sp, 64 asm_test::atomic_memcpy_load_align16::read_volatile_acquire_fence: ld $1, 0($5) ld $2, 8($5) diff --git a/tests/asm-test/asm/mips64el-unknown-linux-gnuabi64/atomic_memcpy_load_align2 b/tests/asm-test/asm/mips64el-unknown-linux-gnuabi64/atomic_memcpy_load_align2 index 174f60e..9c88a4d 100644 --- a/tests/asm-test/asm/mips64el-unknown-linux-gnuabi64/atomic_memcpy_load_align2 +++ b/tests/asm-test/asm/mips64el-unknown-linux-gnuabi64/atomic_memcpy_load_align2 @@ -1,149 +1,250 @@ asm_test::atomic_memcpy_load_align2::acquire: - daddiu $sp, $sp, -64 - daddiu $1, $5, 7 - daddiu $2, $zero, -8 - and $1, $1, $2 - dsubu $2, $1, $5 - sltiu $1, $2, 65 - beqz $1, .LBB4_6 - nop - beqz $2, .LBB4_7 - daddiu $3, $sp, 0 - daddiu $6, $zero, 0 -.LBB4_3: - daddu $7, $5, $6 - daddu $1, $3, $6 - daddiu $6, $6, 1 - lb $7, 0($7) - bne $2, $6, .LBB4_3 - sb $7, 0($1) - daddiu $1, $zero, 64 - dsubu $6, $1, $6 - sltiu $1, $6, 8 - bnez $1, .LBB4_9 - nop - b .LBB4_8 - nop -.LBB4_6: - lh $1, 62($5) - sh $1, 62($sp) - lh $1, 60($5) - sh $1, 60($sp) - lh $1, 58($5) - sh $1, 58($sp) - lh $1, 56($5) - sh $1, 56($sp) - lh $1, 54($5) - sh $1, 54($sp) - lh $1, 52($5) - sh $1, 52($sp) - lh $1, 50($5) - sh $1, 50($sp) - lh $1, 48($5) - sh $1, 48($sp) - lh $1, 46($5) - sh $1, 46($sp) - lh $1, 44($5) - sh $1, 44($sp) - lh $1, 42($5) - sh $1, 42($sp) - lh $1, 40($5) - sh $1, 40($sp) - lh $1, 38($5) - sh $1, 38($sp) - lh $1, 36($5) - sh $1, 36($sp) - lh $1, 34($5) - sh $1, 34($sp) - lh $1, 32($5) - sh $1, 32($sp) - lh $1, 30($5) - sh $1, 30($sp) - lh $1, 28($5) - sh $1, 28($sp) - lh $1, 26($5) - sh $1, 26($sp) - lh $1, 24($5) - sh $1, 24($sp) - lh $1, 22($5) - sh $1, 22($sp) - lh $1, 20($5) - sh $1, 20($sp) - lh $1, 18($5) - sh $1, 18($sp) - lh $1, 16($5) - sh $1, 16($sp) - lh $1, 14($5) - sh $1, 14($sp) - lh $1, 12($5) - sh $1, 12($sp) - lh $1, 10($5) - sh $1, 10($sp) - lh $1, 8($5) - sh $1, 8($sp) - lh $1, 6($5) - sh $1, 6($sp) - lh $1, 4($5) - sh $1, 4($sp) - lh $1, 2($5) - sh $1, 2($sp) - lh $1, 0($5) - b .LBB4_12 - sh $1, 0($sp) -.LBB4_7: - daddiu $6, $zero, 64 - daddiu $2, $zero, 0 -.LBB4_8: - daddu $7, $5, $2 - daddu $1, $3, $2 - daddiu $6, $6, -8 - ld $7, 0($7) - sdl $7, 7($1) - sdr $7, 0($1) - sltiu $1, $6, 8 - beqz $1, .LBB4_8 - daddiu $2, $2, 8 -.LBB4_9: - beqz $6, .LBB4_12 - nop - daddu $5, $5, $2 - daddu $2, $3, $2 -.LBB4_11: - lb $1, 0($5) - daddiu $5, $5, 1 - daddiu $6, $6, -1 - sb $1, 0($2) - bnez $6, .LBB4_11 - daddiu $2, $2, 1 -.LBB4_12: - ld $1, 56($sp) - ld $2, 24($sp) - ld $3, 32($sp) - ld $5, 48($sp) - ld $6, 16($sp) + daddiu $sp, $sp, -176 + sd $ra, 168($sp) + sd $fp, 160($sp) + sd $gp, 152($sp) + sd $23, 144($sp) + sd $22, 136($sp) + sd $21, 128($sp) + sd $20, 120($sp) + sd $19, 112($sp) + sd $18, 104($sp) + sd $17, 96($sp) + sd $16, 88($sp) + daddiu $1, $sp, 24 + daddiu $2, $5, 62 + daddiu $ra, $5, 2 + daddiu $8, $5, 52 + daddiu $14, $5, 46 + daddiu $12, $5, 44 + daddiu $10, $5, 42 + daddiu $18, $5, 40 + daddiu $16, $5, 38 + daddiu $24, $5, 36 + daddiu $gp, $5, 34 + daddiu $22, $5, 32 + daddiu $20, $5, 30 + daddiu $3, $1, 62 + #APP + lh $6, 0($2) + sh $6, 0($3) + #NO_APP + daddiu $2, $5, 60 + daddiu $9, $1, 52 + daddiu $7, $1, 50 + daddiu $15, $1, 46 + daddiu $13, $1, 44 + daddiu $11, $1, 42 + daddiu $19, $1, 40 + daddiu $17, $1, 38 + daddiu $25, $1, 36 + daddiu $fp, $1, 34 + daddiu $23, $1, 32 + daddiu $21, $1, 30 + daddiu $3, $1, 60 + #APP + lh $6, 0($2) + sh $6, 0($3) + #NO_APP + daddiu $2, $5, 58 + daddiu $3, $1, 58 + #APP + lh $6, 0($2) + sh $6, 0($3) + #NO_APP + daddiu $2, $5, 56 + daddiu $3, $1, 56 + #APP + lh $6, 0($2) + sh $6, 0($3) + #NO_APP + daddiu $2, $5, 54 + daddiu $3, $1, 54 + #APP + lh $6, 0($2) + sh $6, 0($3) + #NO_APP + sd $ra, 16($sp) + ori $ra, $1, 2 + daddiu $2, $5, 48 + sd $ra, 8($sp) + daddiu $6, $5, 50 + daddiu $3, $1, 48 + #APP + lh $ra, 0($8) + sh $ra, 0($9) + #NO_APP + daddiu $8, $5, 4 + ori $9, $1, 4 + #APP + lh $ra, 0($6) + sh $ra, 0($7) + #NO_APP + daddiu $6, $5, 6 + ori $7, $1, 6 + #APP + lh $ra, 0($2) + sh $ra, 0($3) + #NO_APP + daddiu $2, $5, 8 + daddiu $3, $1, 8 + #APP + lh $ra, 0($14) + sh $ra, 0($15) + #NO_APP + daddiu $14, $5, 10 + daddiu $15, $1, 10 + #APP + lh $ra, 0($12) + sh $ra, 0($13) + #NO_APP + daddiu $12, $5, 12 + daddiu $13, $1, 12 + #APP + lh $ra, 0($10) + sh $ra, 0($11) + #NO_APP + daddiu $10, $5, 14 + daddiu $11, $1, 14 + #APP + lh $ra, 0($18) + sh $ra, 0($19) + #NO_APP + daddiu $18, $5, 16 + daddiu $19, $1, 16 + #APP + lh $ra, 0($16) + sh $ra, 0($17) + #NO_APP + daddiu $16, $5, 18 + daddiu $17, $1, 18 + #APP + lh $ra, 0($24) + sh $ra, 0($25) + #NO_APP + daddiu $24, $5, 20 + daddiu $25, $1, 20 + #APP + lh $ra, 0($gp) + sh $ra, 0($fp) + #NO_APP + daddiu $gp, $5, 22 + daddiu $fp, $1, 22 + #APP + lh $ra, 0($22) + sh $ra, 0($23) + #NO_APP + daddiu $22, $5, 24 + daddiu $23, $1, 24 + #APP + lh $ra, 0($20) + sh $ra, 0($21) + #NO_APP + daddiu $20, $5, 28 + daddiu $21, $1, 28 + #APP + lh $ra, 0($20) + sh $ra, 0($21) + #NO_APP + daddiu $20, $5, 26 + daddiu $21, $1, 26 + #APP + lh $ra, 0($20) + sh $ra, 0($21) + #NO_APP + #APP + lh $20, 0($22) + sh $20, 0($23) + #NO_APP + #APP + lh $20, 0($gp) + sh $20, 0($fp) + #NO_APP + #APP + lh $20, 0($24) + sh $20, 0($25) + #NO_APP + #APP + lh $24, 0($16) + sh $24, 0($17) + #NO_APP + #APP + lh $24, 0($18) + sh $24, 0($19) + #NO_APP + #APP + lh $24, 0($10) + sh $24, 0($11) + #NO_APP + #APP + lh $10, 0($12) + sh $10, 0($13) + #NO_APP + #APP + lh $10, 0($14) + sh $10, 0($15) + #NO_APP + #APP + lh $10, 0($2) + sh $10, 0($3) + #NO_APP + #APP + lh $2, 0($6) + sh $2, 0($7) + #NO_APP + #APP + lh $2, 0($8) + sh $2, 0($9) + #NO_APP + ld $3, 16($sp) + ld $6, 8($sp) + #APP + lh $2, 0($3) + sh $2, 0($6) + #NO_APP + #APP + lh $2, 0($5) + sh $2, 0($1) + #NO_APP + ld $1, 24($sp) + ld $2, 56($sp) + ld $3, 48($sp) + ld $5, 32($sp) + ld $6, 64($sp) ld $7, 40($sp) - ld $8, 0($sp) - ld $9, 8($sp) - sdl $1, 63($4) - sdl $5, 55($4) - sdl $7, 47($4) - sdl $3, 39($4) - sdl $2, 31($4) - sdl $6, 23($4) - sdl $9, 15($4) - sdl $8, 7($4) - sdr $2, 24($4) - sdr $1, 56($4) - sdr $5, 48($4) - sdr $7, 40($4) - sdr $3, 32($4) - sdr $6, 16($4) - sdr $9, 8($4) - sdr $8, 0($4) + ld $8, 80($sp) + ld $9, 72($sp) + sdl $1, 7($4) + sdl $5, 15($4) + sdl $7, 23($4) + sdl $3, 31($4) + sdl $2, 39($4) + sdl $6, 47($4) + sdl $9, 55($4) + sdl $8, 63($4) + sdr $2, 32($4) + sdr $1, 0($4) + sdr $5, 8($4) + sdr $7, 16($4) + sdr $3, 24($4) + sdr $6, 40($4) + sdr $9, 48($4) + sdr $8, 56($4) sync + ld $16, 88($sp) + ld $17, 96($sp) + ld $18, 104($sp) + ld $19, 112($sp) + ld $20, 120($sp) + ld $21, 128($sp) + ld $22, 136($sp) + ld $23, 144($sp) + ld $gp, 152($sp) + ld $fp, 160($sp) + ld $ra, 168($sp) move $2, $4 jr $ra - daddiu $sp, $sp, 64 + daddiu $sp, $sp, 176 asm_test::atomic_memcpy_load_align2::read_volatile_acquire_fence: daddiu $sp, $sp, -112 sd $ra, 104($sp) diff --git a/tests/asm-test/asm/mips64el-unknown-linux-gnuabi64/atomic_memcpy_load_align4 b/tests/asm-test/asm/mips64el-unknown-linux-gnuabi64/atomic_memcpy_load_align4 index 2e08c60..b964a3b 100644 --- a/tests/asm-test/asm/mips64el-unknown-linux-gnuabi64/atomic_memcpy_load_align4 +++ b/tests/asm-test/asm/mips64el-unknown-linux-gnuabi64/atomic_memcpy_load_align4 @@ -1,117 +1,142 @@ asm_test::atomic_memcpy_load_align4::acquire: - daddiu $sp, $sp, -64 - daddiu $1, $5, 7 - daddiu $2, $zero, -8 - and $1, $1, $2 - dsubu $2, $1, $5 - sltiu $1, $2, 65 - beqz $1, .LBB8_6 - nop - beqz $2, .LBB8_7 - daddiu $3, $sp, 0 - daddiu $6, $zero, 0 -.LBB8_3: - daddu $7, $5, $6 - daddu $1, $3, $6 - daddiu $6, $6, 1 - lb $7, 0($7) - bne $2, $6, .LBB8_3 - sb $7, 0($1) - daddiu $1, $zero, 64 - dsubu $6, $1, $6 - sltiu $1, $6, 8 - bnez $1, .LBB8_9 - nop - b .LBB8_8 - nop -.LBB8_6: - lw $1, 60($5) - sw $1, 60($sp) - lw $1, 56($5) - sw $1, 56($sp) - lw $1, 52($5) - sw $1, 52($sp) - lw $1, 48($5) - sw $1, 48($sp) - lw $1, 44($5) - sw $1, 44($sp) - lw $1, 40($5) - sw $1, 40($sp) - lw $1, 36($5) - sw $1, 36($sp) - lw $1, 32($5) - sw $1, 32($sp) - lw $1, 28($5) - sw $1, 28($sp) - lw $1, 24($5) - sw $1, 24($sp) - lw $1, 20($5) - sw $1, 20($sp) - lw $1, 16($5) - sw $1, 16($sp) - lw $1, 12($5) - sw $1, 12($sp) - lw $1, 8($5) - sw $1, 8($sp) - lw $1, 4($5) - sw $1, 4($sp) + daddiu $sp, $sp, -128 + sd $22, 120($sp) + sd $21, 112($sp) + sd $20, 104($sp) + sd $19, 96($sp) + sd $18, 88($sp) + sd $17, 80($sp) + sd $16, 72($sp) + daddiu $2, $sp, 8 + daddiu $1, $5, 60 + daddiu $20, $5, 40 + daddiu $18, $5, 36 + daddiu $16, $5, 32 + daddiu $24, $5, 28 + daddiu $14, $5, 24 + daddiu $12, $5, 20 + daddiu $10, $5, 16 + daddiu $8, $5, 12 + daddiu $3, $2, 60 + #APP + lw $6, 0($1) + sw $6, 0($3) + #NO_APP + daddiu $1, $5, 56 + daddiu $21, $2, 40 + daddiu $19, $2, 36 + daddiu $17, $2, 32 + daddiu $25, $2, 28 + daddiu $15, $2, 24 + daddiu $13, $2, 20 + daddiu $11, $2, 16 + daddiu $9, $2, 12 + daddiu $7, $2, 8 + daddiu $3, $2, 56 + #APP + lw $6, 0($1) + sw $6, 0($3) + #NO_APP + daddiu $1, $5, 52 + daddiu $3, $2, 52 + #APP + lw $6, 0($1) + sw $6, 0($3) + #NO_APP + daddiu $1, $5, 48 + daddiu $3, $2, 48 + #APP + lw $6, 0($1) + sw $6, 0($3) + #NO_APP + daddiu $1, $5, 44 + daddiu $3, $2, 44 + #APP + lw $6, 0($1) + sw $6, 0($3) + #NO_APP + daddiu $1, $5, 4 + #APP + lw $22, 0($20) + sw $22, 0($21) + #NO_APP + daddiu $6, $5, 8 + ori $3, $2, 4 + #APP + lw $20, 0($18) + sw $20, 0($19) + #NO_APP + #APP + lw $18, 0($16) + sw $18, 0($17) + #NO_APP + #APP + lw $16, 0($24) + sw $16, 0($25) + #NO_APP + #APP + lw $24, 0($14) + sw $24, 0($15) + #NO_APP + #APP + lw $14, 0($12) + sw $14, 0($13) + #NO_APP + #APP + lw $12, 0($10) + sw $12, 0($11) + #NO_APP + #APP + lw $10, 0($8) + sw $10, 0($9) + #NO_APP + #APP + lw $8, 0($6) + sw $8, 0($7) + #NO_APP + #APP + lw $6, 0($1) + sw $6, 0($3) + #NO_APP + #APP lw $1, 0($5) - b .LBB8_12 - sw $1, 0($sp) -.LBB8_7: - daddiu $6, $zero, 64 - daddiu $2, $zero, 0 -.LBB8_8: - daddu $7, $5, $2 - daddu $1, $3, $2 - daddiu $6, $6, -8 - ld $7, 0($7) - sdl $7, 7($1) - sdr $7, 0($1) - sltiu $1, $6, 8 - beqz $1, .LBB8_8 - daddiu $2, $2, 8 -.LBB8_9: - beqz $6, .LBB8_12 - nop - daddu $5, $5, $2 - daddu $2, $3, $2 -.LBB8_11: - lb $1, 0($5) - daddiu $5, $5, 1 - daddiu $6, $6, -1 - sb $1, 0($2) - bnez $6, .LBB8_11 - daddiu $2, $2, 1 -.LBB8_12: - ld $1, 56($sp) - ld $2, 24($sp) + sw $1, 0($2) + #NO_APP + ld $1, 8($sp) + ld $2, 40($sp) ld $3, 32($sp) - ld $5, 48($sp) - ld $6, 16($sp) - ld $7, 40($sp) - ld $8, 0($sp) - ld $9, 8($sp) - sdl $1, 63($4) - sdl $5, 55($4) - sdl $7, 47($4) - sdl $3, 39($4) - sdl $2, 31($4) - sdl $6, 23($4) - sdl $9, 15($4) - sdl $8, 7($4) - sdr $2, 24($4) - sdr $1, 56($4) - sdr $5, 48($4) - sdr $7, 40($4) - sdr $3, 32($4) - sdr $6, 16($4) - sdr $9, 8($4) - sdr $8, 0($4) + ld $5, 16($sp) + ld $6, 48($sp) + ld $7, 24($sp) + ld $8, 64($sp) + ld $9, 56($sp) + sdl $1, 7($4) + sdl $5, 15($4) + sdl $7, 23($4) + sdl $3, 31($4) + sdl $2, 39($4) + sdl $6, 47($4) + sdl $9, 55($4) + sdl $8, 63($4) + sdr $2, 32($4) + sdr $1, 0($4) + sdr $5, 8($4) + sdr $7, 16($4) + sdr $3, 24($4) + sdr $6, 40($4) + sdr $9, 48($4) + sdr $8, 56($4) sync + ld $16, 72($sp) + ld $17, 80($sp) + ld $18, 88($sp) + ld $19, 96($sp) + ld $20, 104($sp) + ld $21, 112($sp) + ld $22, 120($sp) move $2, $4 jr $ra - daddiu $sp, $sp, 64 + daddiu $sp, $sp, 128 asm_test::atomic_memcpy_load_align4::read_volatile_acquire_fence: lw $1, 0($5) lw $2, 4($5) diff --git a/tests/asm-test/asm/mips64el-unknown-linux-gnuabi64/atomic_memcpy_load_align8 b/tests/asm-test/asm/mips64el-unknown-linux-gnuabi64/atomic_memcpy_load_align8 index 6971ae6..2ad2d0f 100644 --- a/tests/asm-test/asm/mips64el-unknown-linux-gnuabi64/atomic_memcpy_load_align8 +++ b/tests/asm-test/asm/mips64el-unknown-linux-gnuabi64/atomic_memcpy_load_align8 @@ -1,24 +1,72 @@ asm_test::atomic_memcpy_load_align8::acquire: - ld $1, 56($5) - ld $2, 48($5) - ld $3, 40($5) - ld $6, 32($5) - ld $7, 24($5) - ld $8, 16($5) - ld $9, 8($5) - ld $5, 0($5) - sd $1, 56($4) - sd $2, 48($4) + daddiu $sp, $sp, -64 + daddiu $2, $sp, 0 + daddiu $1, $5, 56 + daddiu $3, $2, 56 + #APP + ld $6, 0($1) + sd $6, 0($3) + #NO_APP + daddiu $1, $5, 48 + daddiu $7, $2, 16 + daddiu $3, $2, 48 + #APP + ld $6, 0($1) + sd $6, 0($3) + #NO_APP + daddiu $1, $5, 40 + daddiu $3, $2, 40 + #APP + ld $6, 0($1) + sd $6, 0($3) + #NO_APP + daddiu $1, $5, 32 + daddiu $3, $2, 32 + #APP + ld $6, 0($1) + sd $6, 0($3) + #NO_APP + daddiu $1, $5, 24 + daddiu $3, $2, 24 + #APP + ld $6, 0($1) + sd $6, 0($3) + #NO_APP + daddiu $1, $5, 8 + daddiu $6, $5, 16 + #APP + ld $8, 0($6) + sd $8, 0($7) + #NO_APP + daddiu $3, $2, 8 + #APP + ld $6, 0($1) + sd $6, 0($3) + #NO_APP + #APP + ld $1, 0($5) + sd $1, 0($2) + #NO_APP + ld $1, 32($sp) + ld $2, 24($sp) + ld $3, 16($sp) + ld $5, 8($sp) + ld $6, 0($sp) + sd $1, 32($4) + ld $1, 40($sp) + sd $2, 24($4) + sd $6, 0($4) + sd $5, 8($4) + sd $3, 16($4) move $2, $4 - sd $3, 40($4) - sd $6, 32($4) - sd $7, 24($4) - sd $8, 16($4) - sd $9, 8($4) - sd $5, 0($4) + sd $1, 40($4) + ld $1, 48($sp) + sd $1, 48($4) + ld $1, 56($sp) + sd $1, 56($4) sync jr $ra - nop + daddiu $sp, $sp, 64 asm_test::atomic_memcpy_load_align8::read_volatile_acquire_fence: ld $1, 0($5) ld $2, 8($5) diff --git a/tests/asm-test/asm/mips64el-unknown-linux-gnuabi64/atomic_memcpy_store_align1 b/tests/asm-test/asm/mips64el-unknown-linux-gnuabi64/atomic_memcpy_store_align1 index 0555d2f..c06a555 100644 --- a/tests/asm-test/asm/mips64el-unknown-linux-gnuabi64/atomic_memcpy_store_align1 +++ b/tests/asm-test/asm/mips64el-unknown-linux-gnuabi64/atomic_memcpy_store_align1 @@ -25,88 +25,35 @@ asm_test::atomic_memcpy_store_align1::release: ldr $1, 0($5) sd $1, 0($sp) sync + ld $1, 16($sp) + ld $2, 24($sp) + ld $3, 32($sp) + ld $5, 40($sp) + ld $6, 48($sp) + ld $7, 56($sp) + sd $1, 80($sp) + ld $1, 8($sp) + sd $5, 104($sp) + sd $3, 96($sp) + sd $2, 88($sp) + daddiu $2, $zero, 63 + daddiu $3, $sp, 64 + daddiu $5, $zero, -1 + sd $7, 120($sp) + sd $6, 112($sp) + sd $1, 72($sp) ld $1, 0($sp) - ld $2, 8($sp) - ld $3, 16($sp) - ld $5, 24($sp) - ld $6, 32($sp) - ld $7, 40($sp) - ld $8, 48($sp) - ld $9, 56($sp) - sd $2, 72($sp) sd $1, 64($sp) - daddiu $1, $4, 7 - daddiu $2, $zero, -8 - sd $9, 120($sp) - sd $8, 112($sp) - sd $7, 104($sp) - sd $6, 96($sp) - sd $5, 88($sp) - and $1, $1, $2 - dsubu $2, $1, $4 - sltiu $1, $2, 65 - beqz $1, .LBB2_6 - sd $3, 80($sp) - beqz $2, .LBB2_9 - nop - daddiu $3, $zero, 0 - daddiu $5, $sp, 64 -.LBB2_3: - daddu $6, $5, $3 - daddu $1, $4, $3 - daddiu $3, $3, 1 - lbu $6, 0($6) - bne $2, $3, .LBB2_3 - sb $6, 0($1) - daddiu $1, $zero, 64 - dsubu $3, $1, $3 - sltiu $1, $3, 8 - bnez $1, .LBB2_12 - nop - b .LBB2_10 - nop -.LBB2_6: - daddiu $2, $zero, 0 - daddiu $3, $sp, 64 - daddiu $5, $zero, 64 -.LBB2_7: - daddu $6, $3, $2 +.LBB2_1: daddu $1, $4, $2 - daddiu $2, $2, 1 - lbu $6, 0($6) - bne $2, $5, .LBB2_7 - sb $6, 0($1) - b .LBB2_15 - nop -.LBB2_9: - daddiu $3, $zero, 64 - daddiu $2, $zero, 0 -.LBB2_10: - daddiu $5, $sp, 64 -.LBB2_11: - daddu $1, $5, $2 - daddiu $3, $3, -8 - ldl $6, 7($1) - ldr $6, 0($1) - daddu $1, $4, $2 - sd $6, 0($1) - sltiu $1, $3, 8 - beqz $1, .LBB2_11 - daddiu $2, $2, 8 -.LBB2_12: - beqz $3, .LBB2_15 + daddu $6, $3, $2 + #APP + lbu $7, 0($6) + sb $7, 0($1) + #NO_APP + daddiu $2, $2, -1 + bne $2, $5, .LBB2_1 nop - daddiu $1, $sp, 64 - daddu $5, $1, $2 - daddu $2, $4, $2 -.LBB2_14: - lbu $1, 0($5) - daddiu $5, $5, 1 - daddiu $3, $3, -1 - sb $1, 0($2) - bnez $3, .LBB2_14 - daddiu $2, $2, 1 -.LBB2_15: jr $ra daddiu $sp, $sp, 128 asm_test::atomic_memcpy_store_align1::write_volatile_release_fence: diff --git a/tests/asm-test/asm/mips64el-unknown-linux-gnuabi64/atomic_memcpy_store_align16 b/tests/asm-test/asm/mips64el-unknown-linux-gnuabi64/atomic_memcpy_store_align16 index 35b6d27..ecfda64 100644 --- a/tests/asm-test/asm/mips64el-unknown-linux-gnuabi64/atomic_memcpy_store_align16 +++ b/tests/asm-test/asm/mips64el-unknown-linux-gnuabi64/atomic_memcpy_store_align16 @@ -1,22 +1,87 @@ asm_test::atomic_memcpy_store_align16::release: + daddiu $sp, $sp, -128 + ld $1, 24($5) + ld $2, 32($5) + ld $3, 40($5) + ld $6, 48($5) + ld $7, 56($5) + daddiu $11, $4, 40 + daddiu $9, $4, 32 + sd $1, 24($sp) + ld $1, 16($5) + sd $7, 56($sp) + sd $6, 48($sp) + sd $3, 40($sp) + sd $2, 32($sp) + sd $1, 16($sp) + ld $1, 8($5) + sd $1, 8($sp) ld $1, 0($5) - ld $2, 8($5) - ld $3, 16($5) - ld $6, 24($5) - ld $7, 32($5) - ld $8, 40($5) - ld $9, 48($5) - ld $5, 56($5) + sd $1, 0($sp) sync - sd $5, 56($4) - sd $9, 48($4) - sd $8, 40($4) - sd $7, 32($4) - sd $6, 24($4) - sd $3, 16($4) - sd $2, 8($4) - jr $ra + ld $1, 8($sp) + ld $2, 16($sp) + ld $3, 24($sp) + ld $5, 32($sp) + ld $6, 40($sp) + ld $7, 48($sp) + ld $8, 56($sp) + sd $1, 72($sp) + ld $1, 0($sp) + sd $7, 112($sp) + sd $2, 80($sp) + daddiu $2, $sp, 64 + sd $8, 120($sp) + sd $6, 104($sp) + sd $3, 88($sp) + sd $5, 96($sp) + daddiu $7, $4, 24 + daddiu $3, $2, 56 + daddiu $12, $2, 40 + daddiu $10, $2, 32 + daddiu $8, $2, 24 + daddiu $6, $2, 16 + sd $1, 64($sp) + daddiu $1, $4, 56 + #APP + ld $5, 0($3) + sd $5, 0($1) + #NO_APP + daddiu $1, $4, 48 + daddiu $3, $2, 48 + #APP + ld $5, 0($3) + sd $5, 0($1) + #NO_APP + daddiu $1, $4, 8 + ori $3, $2, 8 + #APP + ld $13, 0($12) + sd $13, 0($11) + #NO_APP + daddiu $5, $4, 16 + #APP + ld $11, 0($10) + sd $11, 0($9) + #NO_APP + #APP + ld $9, 0($8) + sd $9, 0($7) + #NO_APP + #APP + ld $7, 0($6) + sd $7, 0($5) + #NO_APP + #APP + ld $5, 0($3) + sd $5, 0($1) + #NO_APP + #APP + ld $1, 0($2) sd $1, 0($4) + #NO_APP + jr $ra + daddiu $sp, $sp, 128 asm_test::atomic_memcpy_store_align16::write_volatile_release_fence: daddiu $sp, $sp, -64 sync diff --git a/tests/asm-test/asm/mips64el-unknown-linux-gnuabi64/atomic_memcpy_store_align2 b/tests/asm-test/asm/mips64el-unknown-linux-gnuabi64/atomic_memcpy_store_align2 index e579ebd..3fd29d5 100644 --- a/tests/asm-test/asm/mips64el-unknown-linux-gnuabi64/atomic_memcpy_store_align2 +++ b/tests/asm-test/asm/mips64el-unknown-linux-gnuabi64/atomic_memcpy_store_align2 @@ -1,163 +1,269 @@ asm_test::atomic_memcpy_store_align2::release: - daddiu $sp, $sp, -128 + daddiu $sp, $sp, -256 + sd $ra, 248($sp) + sd $fp, 240($sp) + sd $gp, 232($sp) + sd $23, 224($sp) + sd $22, 216($sp) + sd $21, 208($sp) + sd $20, 200($sp) + sd $19, 192($sp) + sd $18, 184($sp) + sd $17, 176($sp) + sd $16, 168($sp) ldl $1, 47($5) ldl $2, 39($5) ldl $3, 31($5) ldl $6, 23($5) ldl $7, 55($5) ldl $8, 63($5) + daddiu $ra, $4, 32 + daddiu $9, $4, 60 + daddiu $14, $4, 52 + daddiu $12, $4, 50 + daddiu $17, $4, 48 + daddiu $25, $4, 46 + daddiu $21, $4, 44 + daddiu $19, $4, 42 + daddiu $fp, $4, 40 + daddiu $23, $4, 38 ldr $1, 40($5) ldr $6, 16($5) ldr $3, 24($5) ldr $2, 32($5) ldr $7, 48($5) ldr $8, 56($5) - sd $1, 40($sp) + sd $1, 80($sp) ldl $1, 15($5) - sd $8, 56($sp) - sd $7, 48($sp) - sd $2, 32($sp) - sd $3, 24($sp) - sd $6, 16($sp) + sd $8, 96($sp) + sd $7, 88($sp) + sd $2, 72($sp) + sd $3, 64($sp) + sd $6, 56($sp) ldr $1, 8($5) - sd $1, 8($sp) + sd $1, 48($sp) ldl $1, 7($5) ldr $1, 0($5) - sd $1, 0($sp) + sd $1, 40($sp) sync - ld $1, 0($sp) - ld $2, 8($sp) - ld $3, 16($sp) + ld $1, 48($sp) + ld $2, 56($sp) + ld $3, 64($sp) + ld $5, 72($sp) + ld $6, 80($sp) + ld $7, 88($sp) + ld $8, 96($sp) + sd $1, 112($sp) + ld $1, 40($sp) + sd $7, 152($sp) + sd $2, 120($sp) + sd $8, 160($sp) + sd $6, 144($sp) + sd $5, 136($sp) + sd $3, 128($sp) + daddiu $2, $4, 62 + daddiu $7, $4, 56 + sd $1, 104($sp) + daddiu $1, $sp, 104 + daddiu $3, $1, 62 + #APP + lhu $5, 0($3) + sh $5, 0($2) + #NO_APP + sd $ra, 16($sp) + daddiu $ra, $1, 32 + daddiu $2, $4, 58 + daddiu $10, $1, 60 + daddiu $8, $1, 58 + daddiu $15, $1, 56 + daddiu $6, $1, 54 + daddiu $13, $1, 50 + daddiu $11, $1, 48 + daddiu $16, $1, 46 + daddiu $24, $1, 44 + daddiu $20, $1, 42 + daddiu $18, $1, 40 + daddiu $gp, $1, 38 + daddiu $22, $1, 36 + sd $ra, 8($sp) + daddiu $5, $4, 54 + daddiu $3, $1, 52 + #APP + lhu $ra, 0($10) + sh $ra, 0($9) + #NO_APP + daddiu $9, $4, 34 + daddiu $10, $1, 34 + #APP + lhu $ra, 0($8) + sh $ra, 0($2) + #NO_APP + daddiu $2, $4, 2 + daddiu $8, $4, 36 + sd $2, 32($sp) + ori $2, $1, 2 + #APP + lhu $ra, 0($15) + sh $ra, 0($7) + #NO_APP + sd $2, 24($sp) + daddiu $15, $4, 4 + #APP + lhu $ra, 0($6) + sh $ra, 0($5) + #NO_APP + daddiu $6, $4, 6 + ori $5, $1, 4 + #APP + lhu $ra, 0($3) + sh $ra, 0($14) + #NO_APP + daddiu $14, $4, 8 + ori $3, $1, 6 + #APP + lhu $ra, 0($13) + sh $ra, 0($12) + #NO_APP + daddiu $13, $4, 10 + daddiu $12, $1, 8 + #APP + lhu $ra, 0($11) + sh $ra, 0($17) + #NO_APP + daddiu $17, $4, 12 + daddiu $11, $1, 10 + #APP + lhu $ra, 0($16) + sh $ra, 0($25) + #NO_APP + daddiu $16, $4, 14 + daddiu $25, $1, 12 + #APP + lhu $ra, 0($24) + sh $ra, 0($21) + #NO_APP + daddiu $21, $4, 16 + daddiu $24, $1, 14 + #APP + lhu $ra, 0($20) + sh $ra, 0($19) + #NO_APP + daddiu $20, $4, 18 + daddiu $19, $1, 16 + #APP + lhu $ra, 0($18) + sh $ra, 0($fp) + #NO_APP + daddiu $fp, $4, 20 + daddiu $18, $1, 18 + #APP + lhu $ra, 0($gp) + sh $ra, 0($23) + #NO_APP + daddiu $gp, $4, 22 + daddiu $23, $1, 20 + #APP + lhu $ra, 0($22) + sh $ra, 0($8) + #NO_APP + daddiu $22, $4, 24 + daddiu $8, $1, 22 + #APP + lhu $ra, 0($10) + sh $ra, 0($9) + #NO_APP + daddiu $10, $4, 26 + daddiu $9, $1, 24 + ld $2, 16($sp) + ld $7, 8($sp) + #APP + lhu $ra, 0($7) + sh $ra, 0($2) + #NO_APP + daddiu $ra, $4, 30 + daddiu $2, $1, 30 + #APP + lhu $7, 0($2) + sh $7, 0($ra) + #NO_APP + daddiu $2, $4, 28 + daddiu $7, $1, 28 + #APP + lhu $ra, 0($7) + sh $ra, 0($2) + #NO_APP + daddiu $2, $1, 26 + #APP + lhu $7, 0($2) + sh $7, 0($10) + #NO_APP + #APP + lhu $2, 0($9) + sh $2, 0($22) + #NO_APP + #APP + lhu $2, 0($8) + sh $2, 0($gp) + #NO_APP + #APP + lhu $2, 0($23) + sh $2, 0($fp) + #NO_APP + #APP + lhu $2, 0($18) + sh $2, 0($20) + #NO_APP + #APP + lhu $2, 0($19) + sh $2, 0($21) + #NO_APP + #APP + lhu $2, 0($24) + sh $2, 0($16) + #NO_APP + #APP + lhu $2, 0($25) + sh $2, 0($17) + #NO_APP + #APP + lhu $2, 0($11) + sh $2, 0($13) + #NO_APP + #APP + lhu $2, 0($12) + sh $2, 0($14) + #NO_APP + #APP + lhu $2, 0($3) + sh $2, 0($6) + #NO_APP + #APP + lhu $2, 0($5) + sh $2, 0($15) + #NO_APP + ld $3, 32($sp) ld $5, 24($sp) - ld $6, 32($sp) - ld $7, 40($sp) - ld $8, 48($sp) - ld $9, 56($sp) - sd $2, 72($sp) - sd $1, 64($sp) - daddiu $1, $4, 7 - daddiu $2, $zero, -8 - sd $9, 120($sp) - sd $8, 112($sp) - sd $7, 104($sp) - sd $6, 96($sp) - sd $5, 88($sp) - and $1, $1, $2 - dsubu $2, $1, $4 - sltiu $1, $2, 65 - beqz $1, .LBB6_6 - sd $3, 80($sp) - beqz $2, .LBB6_7 - daddiu $5, $sp, 64 - daddiu $3, $zero, 0 -.LBB6_3: - daddu $6, $5, $3 - daddu $1, $4, $3 - daddiu $3, $3, 1 - lbu $6, 0($6) - bne $2, $3, .LBB6_3 - sb $6, 0($1) - daddiu $1, $zero, 64 - dsubu $3, $1, $3 - sltiu $1, $3, 8 - bnez $1, .LBB6_9 - nop - b .LBB6_8 - nop -.LBB6_6: - lhu $1, 126($sp) - sh $1, 62($4) - lhu $1, 124($sp) - sh $1, 60($4) - lhu $1, 122($sp) - sh $1, 58($4) - lhu $1, 120($sp) - sh $1, 56($4) - lhu $1, 118($sp) - sh $1, 54($4) - lhu $1, 116($sp) - sh $1, 52($4) - lhu $1, 114($sp) - sh $1, 50($4) - lhu $1, 112($sp) - sh $1, 48($4) - lhu $1, 110($sp) - sh $1, 46($4) - lhu $1, 108($sp) - sh $1, 44($4) - lhu $1, 106($sp) - sh $1, 42($4) - lhu $1, 104($sp) - sh $1, 40($4) - lhu $1, 102($sp) - sh $1, 38($4) - lhu $1, 100($sp) - sh $1, 36($4) - lhu $1, 98($sp) - sh $1, 34($4) - lhu $1, 96($sp) - sh $1, 32($4) - lhu $1, 94($sp) - sh $1, 30($4) - lhu $1, 92($sp) - sh $1, 28($4) - lhu $1, 90($sp) - sh $1, 26($4) - lhu $1, 88($sp) - sh $1, 24($4) - lhu $1, 86($sp) - sh $1, 22($4) - lhu $1, 84($sp) - sh $1, 20($4) - lhu $1, 82($sp) - sh $1, 18($4) - lhu $1, 80($sp) - sh $1, 16($4) - lhu $1, 78($sp) - sh $1, 14($4) - lhu $1, 76($sp) - sh $1, 12($4) - lhu $1, 74($sp) - sh $1, 10($4) - lhu $1, 72($sp) - sh $1, 8($4) - lhu $1, 70($sp) - sh $1, 6($4) - lhu $1, 68($sp) - sh $1, 4($4) - lhu $1, 66($sp) - sh $1, 2($4) - lhu $1, 64($sp) - b .LBB6_12 - sh $1, 0($4) -.LBB6_7: - daddiu $3, $zero, 64 - daddiu $2, $zero, 0 -.LBB6_8: - daddu $1, $5, $2 - daddiu $3, $3, -8 - ldl $6, 7($1) - ldr $6, 0($1) - daddu $1, $4, $2 - sd $6, 0($1) - sltiu $1, $3, 8 - beqz $1, .LBB6_8 - daddiu $2, $2, 8 -.LBB6_9: - beqz $3, .LBB6_12 - nop - daddu $5, $5, $2 - daddu $2, $4, $2 -.LBB6_11: - lbu $1, 0($5) - daddiu $5, $5, 1 - daddiu $3, $3, -1 - sb $1, 0($2) - bnez $3, .LBB6_11 - daddiu $2, $2, 1 -.LBB6_12: + #APP + lhu $2, 0($5) + sh $2, 0($3) + #NO_APP + #APP + lhu $2, 0($1) + sh $2, 0($4) + #NO_APP + ld $16, 168($sp) + ld $17, 176($sp) + ld $18, 184($sp) + ld $19, 192($sp) + ld $20, 200($sp) + ld $21, 208($sp) + ld $22, 216($sp) + ld $23, 224($sp) + ld $gp, 232($sp) + ld $fp, 240($sp) + ld $ra, 248($sp) jr $ra - daddiu $sp, $sp, 128 + daddiu $sp, $sp, 256 asm_test::atomic_memcpy_store_align2::write_volatile_release_fence: daddiu $sp, $sp, -64 sync diff --git a/tests/asm-test/asm/mips64el-unknown-linux-gnuabi64/atomic_memcpy_store_align4 b/tests/asm-test/asm/mips64el-unknown-linux-gnuabi64/atomic_memcpy_store_align4 index 1af06e0..2fce26f 100644 --- a/tests/asm-test/asm/mips64el-unknown-linux-gnuabi64/atomic_memcpy_store_align4 +++ b/tests/asm-test/asm/mips64el-unknown-linux-gnuabi64/atomic_memcpy_store_align4 @@ -1,131 +1,169 @@ asm_test::atomic_memcpy_store_align4::release: - daddiu $sp, $sp, -128 + daddiu $sp, $sp, -240 + sd $ra, 232($sp) + sd $fp, 224($sp) + sd $gp, 216($sp) + sd $23, 208($sp) + sd $22, 200($sp) + sd $21, 192($sp) + sd $20, 184($sp) + sd $19, 176($sp) + sd $18, 168($sp) + sd $17, 160($sp) + sd $16, 152($sp) ldl $1, 47($5) ldl $2, 39($5) ldl $3, 31($5) ldl $6, 23($5) ldl $7, 55($5) ldl $8, 63($5) + daddiu $ra, $4, 56 + daddiu $fp, $4, 52 + daddiu $23, $4, 48 + daddiu $21, $4, 44 + daddiu $19, $4, 40 + daddiu $17, $4, 36 + daddiu $25, $4, 32 + daddiu $15, $4, 28 + daddiu $13, $4, 24 + daddiu $11, $4, 20 + daddiu $9, $4, 16 ldr $1, 40($5) ldr $6, 16($5) ldr $3, 24($5) ldr $2, 32($5) ldr $7, 48($5) ldr $8, 56($5) - sd $1, 40($sp) + sd $1, 64($sp) ldl $1, 15($5) - sd $8, 56($sp) - sd $7, 48($sp) - sd $2, 32($sp) - sd $3, 24($sp) - sd $6, 16($sp) + sd $8, 80($sp) + sd $7, 72($sp) + sd $2, 56($sp) + sd $3, 48($sp) + sd $6, 40($sp) ldr $1, 8($5) - sd $1, 8($sp) + sd $1, 32($sp) ldl $1, 7($5) ldr $1, 0($5) - sd $1, 0($sp) + sd $1, 24($sp) sync - ld $1, 0($sp) - ld $2, 8($sp) + ld $1, 32($sp) + ld $2, 40($sp) + ld $3, 48($sp) + ld $5, 56($sp) + ld $6, 64($sp) + ld $7, 72($sp) + ld $8, 80($sp) + sd $1, 96($sp) + ld $1, 24($sp) + sd $7, 136($sp) + sd $2, 104($sp) + daddiu $2, $sp, 88 + sd $8, 144($sp) + sd $6, 128($sp) + sd $3, 112($sp) + sd $5, 120($sp) + daddiu $7, $4, 12 + daddiu $3, $2, 60 + daddiu $gp, $2, 48 + daddiu $22, $2, 44 + daddiu $20, $2, 40 + daddiu $18, $2, 36 + daddiu $16, $2, 32 + daddiu $24, $2, 28 + daddiu $14, $2, 24 + daddiu $12, $2, 20 + daddiu $10, $2, 16 + daddiu $8, $2, 12 + daddiu $6, $2, 8 + sd $1, 88($sp) + daddiu $1, $4, 60 + #APP + lw $5, 0($3) + sw $5, 0($1) + #NO_APP + daddiu $1, $4, 4 + sd $1, 16($sp) + ori $1, $2, 4 + daddiu $5, $4, 8 + sd $1, 8($sp) + daddiu $1, $2, 56 + #APP + lw $3, 0($1) + sw $3, 0($ra) + #NO_APP + daddiu $1, $2, 52 + #APP + lw $3, 0($1) + sw $3, 0($fp) + #NO_APP + #APP + lw $1, 0($gp) + sw $1, 0($23) + #NO_APP + #APP + lw $1, 0($22) + sw $1, 0($21) + #NO_APP + #APP + lw $1, 0($20) + sw $1, 0($19) + #NO_APP + #APP + lw $1, 0($18) + sw $1, 0($17) + #NO_APP + #APP + lw $1, 0($16) + sw $1, 0($25) + #NO_APP + #APP + lw $1, 0($24) + sw $1, 0($15) + #NO_APP + #APP + lw $1, 0($14) + sw $1, 0($13) + #NO_APP + #APP + lw $1, 0($12) + sw $1, 0($11) + #NO_APP + #APP + lw $1, 0($10) + sw $1, 0($9) + #NO_APP + #APP + lw $1, 0($8) + sw $1, 0($7) + #NO_APP + #APP + lw $1, 0($6) + sw $1, 0($5) + #NO_APP ld $3, 16($sp) - ld $5, 24($sp) - ld $6, 32($sp) - ld $7, 40($sp) - ld $8, 48($sp) - ld $9, 56($sp) - sd $2, 72($sp) - sd $1, 64($sp) - daddiu $1, $4, 7 - daddiu $2, $zero, -8 - sd $9, 120($sp) - sd $8, 112($sp) - sd $7, 104($sp) - sd $6, 96($sp) - sd $5, 88($sp) - and $1, $1, $2 - dsubu $2, $1, $4 - sltiu $1, $2, 65 - beqz $1, .LBB10_6 - sd $3, 80($sp) - beqz $2, .LBB10_7 - daddiu $5, $sp, 64 - daddiu $3, $zero, 0 -.LBB10_3: - daddu $6, $5, $3 - daddu $1, $4, $3 - daddiu $3, $3, 1 - lbu $6, 0($6) - bne $2, $3, .LBB10_3 - sb $6, 0($1) - daddiu $1, $zero, 64 - dsubu $3, $1, $3 - sltiu $1, $3, 8 - bnez $1, .LBB10_9 - nop - b .LBB10_8 - nop -.LBB10_6: - lw $1, 124($sp) - sw $1, 60($4) - lw $1, 120($sp) - sw $1, 56($4) - lw $1, 116($sp) - sw $1, 52($4) - lw $1, 112($sp) - sw $1, 48($4) - lw $1, 108($sp) - sw $1, 44($4) - lw $1, 104($sp) - sw $1, 40($4) - lw $1, 100($sp) - sw $1, 36($4) - lw $1, 96($sp) - sw $1, 32($4) - lw $1, 92($sp) - sw $1, 28($4) - lw $1, 88($sp) - sw $1, 24($4) - lw $1, 84($sp) - sw $1, 20($4) - lw $1, 80($sp) - sw $1, 16($4) - lw $1, 76($sp) - sw $1, 12($4) - lw $1, 72($sp) - sw $1, 8($4) - lw $1, 68($sp) - sw $1, 4($4) - lw $1, 64($sp) - b .LBB10_12 + ld $5, 8($sp) + #APP + lw $1, 0($5) + sw $1, 0($3) + #NO_APP + #APP + lw $1, 0($2) sw $1, 0($4) -.LBB10_7: - daddiu $3, $zero, 64 - daddiu $2, $zero, 0 -.LBB10_8: - daddu $1, $5, $2 - daddiu $3, $3, -8 - ldl $6, 7($1) - ldr $6, 0($1) - daddu $1, $4, $2 - sd $6, 0($1) - sltiu $1, $3, 8 - beqz $1, .LBB10_8 - daddiu $2, $2, 8 -.LBB10_9: - beqz $3, .LBB10_12 - nop - daddu $5, $5, $2 - daddu $2, $4, $2 -.LBB10_11: - lbu $1, 0($5) - daddiu $5, $5, 1 - daddiu $3, $3, -1 - sb $1, 0($2) - bnez $3, .LBB10_11 - daddiu $2, $2, 1 -.LBB10_12: + #NO_APP + ld $16, 152($sp) + ld $17, 160($sp) + ld $18, 168($sp) + ld $19, 176($sp) + ld $20, 184($sp) + ld $21, 192($sp) + ld $22, 200($sp) + ld $23, 208($sp) + ld $gp, 216($sp) + ld $fp, 224($sp) + ld $ra, 232($sp) jr $ra - daddiu $sp, $sp, 128 + daddiu $sp, $sp, 240 asm_test::atomic_memcpy_store_align4::write_volatile_release_fence: daddiu $sp, $sp, -64 sync diff --git a/tests/asm-test/asm/mips64el-unknown-linux-gnuabi64/atomic_memcpy_store_align8 b/tests/asm-test/asm/mips64el-unknown-linux-gnuabi64/atomic_memcpy_store_align8 index aadb63d..f2b8f31 100644 --- a/tests/asm-test/asm/mips64el-unknown-linux-gnuabi64/atomic_memcpy_store_align8 +++ b/tests/asm-test/asm/mips64el-unknown-linux-gnuabi64/atomic_memcpy_store_align8 @@ -1,22 +1,87 @@ asm_test::atomic_memcpy_store_align8::release: + daddiu $sp, $sp, -128 + ld $1, 24($5) + ld $2, 32($5) + ld $3, 40($5) + ld $6, 48($5) + ld $7, 56($5) + daddiu $11, $4, 40 + daddiu $9, $4, 32 + sd $1, 24($sp) + ld $1, 16($5) + sd $7, 56($sp) + sd $6, 48($sp) + sd $3, 40($sp) + sd $2, 32($sp) + sd $1, 16($sp) + ld $1, 8($5) + sd $1, 8($sp) ld $1, 0($5) - ld $2, 8($5) - ld $3, 16($5) - ld $6, 24($5) - ld $7, 32($5) - ld $8, 40($5) - ld $9, 48($5) - ld $5, 56($5) + sd $1, 0($sp) sync - sd $5, 56($4) - sd $9, 48($4) - sd $8, 40($4) - sd $7, 32($4) - sd $6, 24($4) - sd $3, 16($4) - sd $2, 8($4) - jr $ra + ld $1, 8($sp) + ld $2, 16($sp) + ld $3, 24($sp) + ld $5, 32($sp) + ld $6, 40($sp) + ld $7, 48($sp) + ld $8, 56($sp) + sd $1, 72($sp) + ld $1, 0($sp) + sd $7, 112($sp) + sd $2, 80($sp) + daddiu $2, $sp, 64 + sd $8, 120($sp) + sd $6, 104($sp) + sd $3, 88($sp) + sd $5, 96($sp) + daddiu $7, $4, 24 + daddiu $3, $2, 56 + daddiu $12, $2, 40 + daddiu $10, $2, 32 + daddiu $8, $2, 24 + daddiu $6, $2, 16 + sd $1, 64($sp) + daddiu $1, $4, 56 + #APP + ld $5, 0($3) + sd $5, 0($1) + #NO_APP + daddiu $1, $4, 48 + daddiu $3, $2, 48 + #APP + ld $5, 0($3) + sd $5, 0($1) + #NO_APP + daddiu $1, $4, 8 + daddiu $3, $2, 8 + #APP + ld $13, 0($12) + sd $13, 0($11) + #NO_APP + daddiu $5, $4, 16 + #APP + ld $11, 0($10) + sd $11, 0($9) + #NO_APP + #APP + ld $9, 0($8) + sd $9, 0($7) + #NO_APP + #APP + ld $7, 0($6) + sd $7, 0($5) + #NO_APP + #APP + ld $5, 0($3) + sd $5, 0($1) + #NO_APP + #APP + ld $1, 0($2) sd $1, 0($4) + #NO_APP + jr $ra + daddiu $sp, $sp, 128 asm_test::atomic_memcpy_store_align8::write_volatile_release_fence: daddiu $sp, $sp, -64 sync diff --git a/tests/asm-test/asm/mipsel-unknown-linux-gnu/atomic_memcpy_load_align1 b/tests/asm-test/asm/mipsel-unknown-linux-gnu/atomic_memcpy_load_align1 index 9efb2ae..01b85dd 100644 --- a/tests/asm-test/asm/mipsel-unknown-linux-gnu/atomic_memcpy_load_align1 +++ b/tests/asm-test/asm/mipsel-unknown-linux-gnu/atomic_memcpy_load_align1 @@ -1,182 +1,248 @@ asm_test::atomic_memcpy_load_align1::acquire: - addiu $sp, $sp, -32 - addiu $1, $5, 3 - addiu $2, $zero, -4 - and $3, $1, $2 - subu $2, $3, $5 - sltiu $1, $2, 33 - beqz $1, $BB0_6 - nop - beqz $2, $BB0_7 - nop - addiu $6, $sp, 0 - subu $8, $5, $3 - addiu $7, $5, 32 - move $9, $5 -$BB0_3: - lb $1, 0($9) - addiu $9, $9, 1 - sb $1, 0($6) - addiu $1, $8, 1 - addiu $6, $6, 1 - sltu $10, $1, $8 - beqz $10, $BB0_3 - move $8, $1 - subu $3, $7, $3 - sltiu $1, $3, 4 - bnez $1, $BB0_10 - nop - b $BB0_8 - nop -$BB0_6: - lb $1, 0($5) - sb $1, 0($sp) - lb $1, 1($5) - sb $1, 1($sp) - lb $1, 2($5) - sb $1, 2($sp) - lb $1, 3($5) - sb $1, 3($sp) - lb $1, 4($5) - sb $1, 4($sp) - lb $1, 5($5) - sb $1, 5($sp) - lb $1, 6($5) - sb $1, 6($sp) - lb $1, 7($5) - sb $1, 7($sp) - lb $1, 8($5) - sb $1, 8($sp) - lb $1, 9($5) - sb $1, 9($sp) - lb $1, 10($5) - sb $1, 10($sp) - lb $1, 11($5) - sb $1, 11($sp) - lb $1, 12($5) - sb $1, 12($sp) - lb $1, 13($5) - sb $1, 13($sp) - lb $1, 14($5) - sb $1, 14($sp) - lb $1, 15($5) - sb $1, 15($sp) - lb $1, 16($5) - sb $1, 16($sp) - lb $1, 17($5) - sb $1, 17($sp) - lb $1, 18($5) - sb $1, 18($sp) - lb $1, 19($5) - sb $1, 19($sp) - lb $1, 20($5) - sb $1, 20($sp) - lb $1, 21($5) - sb $1, 21($sp) - lb $1, 22($5) - sb $1, 22($sp) - lb $1, 23($5) - sb $1, 23($sp) - lb $1, 24($5) - sb $1, 24($sp) - lb $1, 25($5) - sb $1, 25($sp) - lb $1, 26($5) - sb $1, 26($sp) - lb $1, 27($5) - sb $1, 27($sp) - lb $1, 28($5) - sb $1, 28($sp) - lb $1, 29($5) - sb $1, 29($sp) - lb $1, 30($5) - sb $1, 30($sp) - lb $1, 31($5) - sb $1, 31($sp) - lw $1, 12($sp) - lw $2, 20($sp) - lw $3, 24($sp) - lw $5, 4($sp) - lw $6, 8($sp) + addiu $sp, $sp, -80 + sw $ra, 76($sp) + sw $fp, 72($sp) + sw $23, 68($sp) + sw $22, 64($sp) + sw $21, 60($sp) + sw $20, 56($sp) + sw $19, 52($sp) + sw $18, 48($sp) + sw $17, 44($sp) + sw $16, 40($sp) + addiu $1, $sp, 8 + addiu $2, $5, 31 + addiu $ra, $5, 1 + addiu $8, $5, 26 + addiu $14, $5, 23 + addiu $12, $5, 22 + addiu $10, $5, 21 + addiu $17, $5, 20 + addiu $gp, $5, 19 + addiu $24, $5, 18 + addiu $23, $5, 17 + addiu $21, $5, 16 + addiu $19, $5, 15 + addiu $3, $1, 31 + #APP + lb $6, 0($2) + sb $6, 0($3) + #NO_APP + addiu $2, $5, 30 + addiu $9, $1, 26 + addiu $7, $1, 25 + addiu $15, $1, 23 + addiu $13, $1, 22 + addiu $11, $1, 21 + addiu $18, $1, 20 + addiu $16, $1, 19 + addiu $25, $1, 18 + addiu $fp, $1, 17 + addiu $22, $1, 16 + addiu $20, $1, 15 + addiu $3, $1, 30 + #APP + lb $6, 0($2) + sb $6, 0($3) + #NO_APP + addiu $2, $5, 29 + addiu $3, $1, 29 + #APP + lb $6, 0($2) + sb $6, 0($3) + #NO_APP + addiu $2, $5, 28 + addiu $3, $1, 28 + #APP + lb $6, 0($2) + sb $6, 0($3) + #NO_APP + addiu $2, $5, 27 + addiu $3, $1, 27 + #APP + lb $6, 0($2) + sb $6, 0($3) + #NO_APP + sw $ra, 4($sp) + ori $ra, $1, 1 + addiu $2, $5, 24 + sw $ra, 0($sp) + addiu $6, $5, 25 + addiu $3, $1, 24 + #APP + lb $ra, 0($8) + sb $ra, 0($9) + #NO_APP + addiu $8, $5, 2 + ori $9, $1, 2 + #APP + lb $ra, 0($6) + sb $ra, 0($7) + #NO_APP + addiu $6, $5, 3 + ori $7, $1, 3 + #APP + lb $ra, 0($2) + sb $ra, 0($3) + #NO_APP + addiu $2, $5, 4 + ori $3, $1, 4 + #APP + lb $ra, 0($14) + sb $ra, 0($15) + #NO_APP + addiu $14, $5, 5 + ori $15, $1, 5 + #APP + lb $ra, 0($12) + sb $ra, 0($13) + #NO_APP + addiu $12, $5, 6 + ori $13, $1, 6 + #APP + lb $ra, 0($10) + sb $ra, 0($11) + #NO_APP + addiu $10, $5, 7 + ori $11, $1, 7 + #APP + lb $ra, 0($17) + sb $ra, 0($18) + #NO_APP + addiu $17, $5, 8 + addiu $18, $1, 8 + #APP + lb $ra, 0($gp) + sb $ra, 0($16) + #NO_APP + addiu $gp, $5, 9 + addiu $16, $1, 9 + #APP + lb $ra, 0($24) + sb $ra, 0($25) + #NO_APP + addiu $24, $5, 10 + addiu $25, $1, 10 + #APP + lb $ra, 0($23) + sb $ra, 0($fp) + #NO_APP + addiu $23, $5, 11 + addiu $fp, $1, 11 + #APP + lb $ra, 0($21) + sb $ra, 0($22) + #NO_APP + addiu $21, $5, 12 + addiu $22, $1, 12 + #APP + lb $ra, 0($19) + sb $ra, 0($20) + #NO_APP + addiu $19, $5, 14 + addiu $20, $1, 14 + #APP + lb $ra, 0($19) + sb $ra, 0($20) + #NO_APP + addiu $19, $5, 13 + addiu $20, $1, 13 + #APP + lb $ra, 0($19) + sb $ra, 0($20) + #NO_APP + #APP + lb $19, 0($21) + sb $19, 0($22) + #NO_APP + #APP + lb $19, 0($23) + sb $19, 0($fp) + #NO_APP + #APP + lb $19, 0($24) + sb $19, 0($25) + #NO_APP + #APP + lb $24, 0($gp) + sb $24, 0($16) + #NO_APP + #APP + lb $24, 0($17) + sb $24, 0($18) + #NO_APP + #APP + lb $24, 0($10) + sb $24, 0($11) + #NO_APP + #APP + lb $10, 0($12) + sb $10, 0($13) + #NO_APP + #APP + lb $10, 0($14) + sb $10, 0($15) + #NO_APP + #APP + lb $10, 0($2) + sb $10, 0($3) + #NO_APP + #APP + lb $2, 0($6) + sb $2, 0($7) + #NO_APP + #APP + lb $2, 0($8) + sb $2, 0($9) + #NO_APP + lw $3, 4($sp) + lw $6, 0($sp) + #APP + lb $2, 0($3) + sb $2, 0($6) + #NO_APP + #APP + lb $2, 0($5) + sb $2, 0($1) + #NO_APP + lw $1, 8($sp) + lw $2, 24($sp) + lw $3, 20($sp) + lw $5, 12($sp) + lw $6, 28($sp) lw $7, 16($sp) - lw $8, 28($sp) - lw $9, 0($sp) - swl $3, 27($4) - swl $2, 23($4) - swl $7, 19($4) - swl $1, 15($4) - swl $6, 11($4) + lw $8, 36($sp) + lw $9, 32($sp) + swl $1, 3($4) swl $5, 7($4) - swl $9, 3($4) + swl $7, 11($4) + swl $3, 15($4) + swl $2, 19($4) + swl $6, 23($4) + swl $9, 27($4) swl $8, 31($4) - swr $3, 24($4) - swr $2, 20($4) - swr $7, 16($4) - swr $1, 12($4) - swr $6, 8($4) + swr $2, 16($4) + swr $1, 0($4) swr $5, 4($4) - swr $9, 0($4) + swr $7, 8($4) + swr $3, 12($4) + swr $6, 20($4) + swr $9, 24($4) swr $8, 28($4) sync + lw $16, 40($sp) + lw $17, 44($sp) + lw $18, 48($sp) + lw $19, 52($sp) + lw $20, 56($sp) + lw $21, 60($sp) + lw $22, 64($sp) + lw $23, 68($sp) + lw $fp, 72($sp) + lw $ra, 76($sp) move $2, $4 jr $ra - addiu $sp, $sp, 32 -$BB0_7: - addiu $3, $zero, 32 -$BB0_8: - addiu $6, $sp, 0 -$BB0_9: - addu $7, $5, $2 - addu $1, $6, $2 - addiu $3, $3, -4 - lw $7, 0($7) - swl $7, 3($1) - swr $7, 0($1) - sltiu $1, $3, 4 - beqz $1, $BB0_9 - addiu $2, $2, 4 -$BB0_10: - beqz $3, $BB0_13 - nop - addiu $1, $sp, 0 - addu $5, $5, $2 - addu $2, $1, $2 -$BB0_12: - lb $1, 0($5) - addiu $5, $5, 1 - addiu $3, $3, -1 - sb $1, 0($2) - bnez $3, $BB0_12 - addiu $2, $2, 1 -$BB0_13: - lw $1, 28($sp) - lw $2, 12($sp) - lw $3, 16($sp) - lw $5, 24($sp) - lw $6, 8($sp) - lw $7, 20($sp) - lw $8, 0($sp) - lw $9, 4($sp) - swl $1, 31($4) - swl $5, 27($4) - swl $7, 23($4) - swl $3, 19($4) - swl $2, 15($4) - swl $6, 11($4) - swl $9, 7($4) - swl $8, 3($4) - swr $1, 28($4) - swr $5, 24($4) - swr $7, 20($4) - swr $3, 16($4) - swr $2, 12($4) - swr $6, 8($4) - swr $9, 4($4) - swr $8, 0($4) - sync - move $2, $4 - jr $ra - addiu $sp, $sp, 32 + addiu $sp, $sp, 80 $func_end0: asm_test::atomic_memcpy_load_align1::read_volatile_acquire_fence: addiu $sp, $sp, -64 diff --git a/tests/asm-test/asm/mipsel-unknown-linux-gnu/atomic_memcpy_load_align16 b/tests/asm-test/asm/mipsel-unknown-linux-gnu/atomic_memcpy_load_align16 index b199e46..3a5734c 100644 --- a/tests/asm-test/asm/mipsel-unknown-linux-gnu/atomic_memcpy_load_align16 +++ b/tests/asm-test/asm/mipsel-unknown-linux-gnu/atomic_memcpy_load_align16 @@ -1,24 +1,80 @@ asm_test::atomic_memcpy_load_align16::acquire: - lw $1, 28($5) - lw $2, 24($5) - lw $3, 20($5) - lw $6, 16($5) - lw $7, 12($5) - lw $8, 8($5) - lw $9, 4($5) - lw $5, 0($5) - sw $1, 28($4) - sw $2, 24($4) + addiu $sp, $sp, -48 + sw $ra, 44($sp) + sw $fp, 40($sp) + move $fp, $sp + addiu $1, $zero, -16 + and $sp, $sp, $1 + addiu $2, $sp, 0 + addiu $1, $5, 28 + addiu $3, $2, 28 + #APP + lw $6, 0($1) + sw $6, 0($3) + #NO_APP + addiu $1, $5, 24 + ori $7, $2, 8 + addiu $3, $2, 24 + #APP + lw $6, 0($1) + sw $6, 0($3) + #NO_APP + addiu $1, $5, 20 + addiu $3, $2, 20 + #APP + lw $6, 0($1) + sw $6, 0($3) + #NO_APP + addiu $1, $5, 16 + addiu $3, $2, 16 + #APP + lw $6, 0($1) + sw $6, 0($3) + #NO_APP + addiu $1, $5, 12 + ori $3, $2, 12 + #APP + lw $6, 0($1) + sw $6, 0($3) + #NO_APP + addiu $1, $5, 4 + addiu $6, $5, 8 + #APP + lw $8, 0($6) + sw $8, 0($7) + #NO_APP + ori $3, $2, 4 + #APP + lw $6, 0($1) + sw $6, 0($3) + #NO_APP + #APP + lw $1, 0($5) + sw $1, 0($2) + #NO_APP + lw $1, 16($sp) + lw $2, 12($sp) + lw $3, 8($sp) + lw $5, 4($sp) + lw $6, 0($sp) + sw $1, 16($4) + lw $1, 20($sp) + sw $2, 12($4) + sw $6, 0($4) + sw $5, 4($4) + sw $3, 8($4) move $2, $4 - sw $3, 20($4) - sw $6, 16($4) - sw $7, 12($4) - sw $8, 8($4) - sw $9, 4($4) - sw $5, 0($4) + sw $1, 20($4) + lw $1, 24($sp) + sw $1, 24($4) + lw $1, 28($sp) + sw $1, 28($4) sync + move $sp, $fp + lw $fp, 40($sp) + lw $ra, 44($sp) jr $ra - nop + addiu $sp, $sp, 48 $func_end16: asm_test::atomic_memcpy_load_align16::read_volatile_acquire_fence: lw $1, 4($5) diff --git a/tests/asm-test/asm/mipsel-unknown-linux-gnu/atomic_memcpy_load_align2 b/tests/asm-test/asm/mipsel-unknown-linux-gnu/atomic_memcpy_load_align2 index 0139b53..2a32c0f 100644 --- a/tests/asm-test/asm/mipsel-unknown-linux-gnu/atomic_memcpy_load_align2 +++ b/tests/asm-test/asm/mipsel-unknown-linux-gnu/atomic_memcpy_load_align2 @@ -1,123 +1,140 @@ asm_test::atomic_memcpy_load_align2::acquire: - addiu $sp, $sp, -32 - addiu $1, $5, 3 - addiu $2, $zero, -4 - and $3, $1, $2 - subu $2, $3, $5 - sltiu $1, $2, 33 - beqz $1, $BB4_6 - nop - beqz $2, $BB4_7 - nop - addiu $6, $sp, 0 - subu $8, $5, $3 - addiu $7, $5, 32 - move $9, $5 -$BB4_3: - lb $1, 0($9) - addiu $9, $9, 1 - sb $1, 0($6) - addiu $1, $8, 1 - addiu $6, $6, 1 - sltu $10, $1, $8 - beqz $10, $BB4_3 - move $8, $1 - subu $3, $7, $3 - sltiu $1, $3, 4 - bnez $1, $BB4_10 - nop - b $BB4_8 - nop -$BB4_6: - lh $1, 30($5) - sh $1, 30($sp) - lh $1, 28($5) - sh $1, 28($sp) - lh $1, 26($5) - sh $1, 26($sp) - lh $1, 24($5) - sh $1, 24($sp) - lh $1, 22($5) - sh $1, 22($sp) - lh $1, 20($5) - sh $1, 20($sp) - lh $1, 18($5) - sh $1, 18($sp) - lh $1, 16($5) - sh $1, 16($sp) - lh $1, 14($5) - sh $1, 14($sp) - lh $1, 12($5) - sh $1, 12($sp) - lh $1, 10($5) - sh $1, 10($sp) - lh $1, 8($5) - sh $1, 8($sp) - lh $1, 6($5) - sh $1, 6($sp) - lh $1, 4($5) - sh $1, 4($sp) - lh $1, 2($5) - sh $1, 2($sp) + addiu $sp, $sp, -56 + sw $21, 52($sp) + sw $20, 48($sp) + sw $19, 44($sp) + sw $18, 40($sp) + sw $17, 36($sp) + sw $16, 32($sp) + addiu $2, $sp, 0 + addiu $1, $5, 30 + addiu $19, $5, 20 + addiu $17, $5, 18 + addiu $gp, $5, 16 + addiu $24, $5, 14 + addiu $14, $5, 12 + addiu $12, $5, 10 + addiu $10, $5, 8 + addiu $8, $5, 6 + addiu $3, $2, 30 + #APP + lh $6, 0($1) + sh $6, 0($3) + #NO_APP + addiu $1, $5, 28 + addiu $20, $2, 20 + addiu $18, $2, 18 + addiu $16, $2, 16 + addiu $25, $2, 14 + addiu $15, $2, 12 + addiu $13, $2, 10 + addiu $11, $2, 8 + ori $9, $2, 6 + ori $7, $2, 4 + addiu $3, $2, 28 + #APP + lh $6, 0($1) + sh $6, 0($3) + #NO_APP + addiu $1, $5, 26 + addiu $3, $2, 26 + #APP + lh $6, 0($1) + sh $6, 0($3) + #NO_APP + addiu $1, $5, 24 + addiu $3, $2, 24 + #APP + lh $6, 0($1) + sh $6, 0($3) + #NO_APP + addiu $1, $5, 22 + addiu $3, $2, 22 + #APP + lh $6, 0($1) + sh $6, 0($3) + #NO_APP + addiu $1, $5, 2 + #APP + lh $21, 0($19) + sh $21, 0($20) + #NO_APP + addiu $6, $5, 4 + ori $3, $2, 2 + #APP + lh $19, 0($17) + sh $19, 0($18) + #NO_APP + #APP + lh $17, 0($gp) + sh $17, 0($16) + #NO_APP + #APP + lh $gp, 0($24) + sh $gp, 0($25) + #NO_APP + #APP + lh $24, 0($14) + sh $24, 0($15) + #NO_APP + #APP + lh $14, 0($12) + sh $14, 0($13) + #NO_APP + #APP + lh $12, 0($10) + sh $12, 0($11) + #NO_APP + #APP + lh $10, 0($8) + sh $10, 0($9) + #NO_APP + #APP + lh $8, 0($6) + sh $8, 0($7) + #NO_APP + #APP + lh $6, 0($1) + sh $6, 0($3) + #NO_APP + #APP lh $1, 0($5) - b $BB4_13 - sh $1, 0($sp) -$BB4_7: - addiu $3, $zero, 32 -$BB4_8: - addiu $6, $sp, 0 -$BB4_9: - addu $7, $5, $2 - addu $1, $6, $2 - addiu $3, $3, -4 - lw $7, 0($7) - swl $7, 3($1) - swr $7, 0($1) - sltiu $1, $3, 4 - beqz $1, $BB4_9 - addiu $2, $2, 4 -$BB4_10: - beqz $3, $BB4_13 - nop - addiu $1, $sp, 0 - addu $5, $5, $2 - addu $2, $1, $2 -$BB4_12: - lb $1, 0($5) - addiu $5, $5, 1 - addiu $3, $3, -1 - sb $1, 0($2) - bnez $3, $BB4_12 - addiu $2, $2, 1 -$BB4_13: - lw $1, 28($sp) - lw $2, 12($sp) - lw $3, 16($sp) - lw $5, 24($sp) - lw $6, 8($sp) - lw $7, 20($sp) - lw $8, 0($sp) - lw $9, 4($sp) - swl $1, 31($4) - swl $5, 27($4) - swl $7, 23($4) - swl $3, 19($4) - swl $2, 15($4) - swl $6, 11($4) - swl $9, 7($4) - swl $8, 3($4) - swr $2, 12($4) - swr $1, 28($4) - swr $5, 24($4) - swr $7, 20($4) - swr $3, 16($4) - swr $6, 8($4) - swr $9, 4($4) - swr $8, 0($4) + sh $1, 0($2) + #NO_APP + lw $1, 0($sp) + lw $2, 16($sp) + lw $3, 12($sp) + lw $5, 4($sp) + lw $6, 20($sp) + lw $7, 8($sp) + lw $8, 28($sp) + lw $9, 24($sp) + swl $1, 3($4) + swl $5, 7($4) + swl $7, 11($4) + swl $3, 15($4) + swl $2, 19($4) + swl $6, 23($4) + swl $9, 27($4) + swl $8, 31($4) + swr $2, 16($4) + swr $1, 0($4) + swr $5, 4($4) + swr $7, 8($4) + swr $3, 12($4) + swr $6, 20($4) + swr $9, 24($4) + swr $8, 28($4) sync + lw $16, 32($sp) + lw $17, 36($sp) + lw $18, 40($sp) + lw $19, 44($sp) + lw $20, 48($sp) + lw $21, 52($sp) move $2, $4 jr $ra - addiu $sp, $sp, 32 + addiu $sp, $sp, 56 $func_end4: asm_test::atomic_memcpy_load_align2::read_volatile_acquire_fence: lhu $1, 0($5) diff --git a/tests/asm-test/asm/mipsel-unknown-linux-gnu/atomic_memcpy_load_align4 b/tests/asm-test/asm/mipsel-unknown-linux-gnu/atomic_memcpy_load_align4 index 0df379b..10ba556 100644 --- a/tests/asm-test/asm/mipsel-unknown-linux-gnu/atomic_memcpy_load_align4 +++ b/tests/asm-test/asm/mipsel-unknown-linux-gnu/atomic_memcpy_load_align4 @@ -1,24 +1,72 @@ asm_test::atomic_memcpy_load_align4::acquire: - lw $1, 28($5) - lw $2, 24($5) - lw $3, 20($5) - lw $6, 16($5) - lw $7, 12($5) - lw $8, 8($5) - lw $9, 4($5) - lw $5, 0($5) - sw $1, 28($4) - sw $2, 24($4) + addiu $sp, $sp, -32 + addiu $2, $sp, 0 + addiu $1, $5, 28 + addiu $3, $2, 28 + #APP + lw $6, 0($1) + sw $6, 0($3) + #NO_APP + addiu $1, $5, 24 + addiu $7, $2, 8 + addiu $3, $2, 24 + #APP + lw $6, 0($1) + sw $6, 0($3) + #NO_APP + addiu $1, $5, 20 + addiu $3, $2, 20 + #APP + lw $6, 0($1) + sw $6, 0($3) + #NO_APP + addiu $1, $5, 16 + addiu $3, $2, 16 + #APP + lw $6, 0($1) + sw $6, 0($3) + #NO_APP + addiu $1, $5, 12 + addiu $3, $2, 12 + #APP + lw $6, 0($1) + sw $6, 0($3) + #NO_APP + addiu $1, $5, 4 + addiu $6, $5, 8 + #APP + lw $8, 0($6) + sw $8, 0($7) + #NO_APP + ori $3, $2, 4 + #APP + lw $6, 0($1) + sw $6, 0($3) + #NO_APP + #APP + lw $1, 0($5) + sw $1, 0($2) + #NO_APP + lw $1, 16($sp) + lw $2, 12($sp) + lw $3, 8($sp) + lw $5, 4($sp) + lw $6, 0($sp) + sw $1, 16($4) + lw $1, 20($sp) + sw $2, 12($4) + sw $6, 0($4) + sw $5, 4($4) + sw $3, 8($4) move $2, $4 - sw $3, 20($4) - sw $6, 16($4) - sw $7, 12($4) - sw $8, 8($4) - sw $9, 4($4) - sw $5, 0($4) + sw $1, 20($4) + lw $1, 24($sp) + sw $1, 24($4) + lw $1, 28($sp) + sw $1, 28($4) sync jr $ra - nop + addiu $sp, $sp, 32 $func_end8: asm_test::atomic_memcpy_load_align4::read_volatile_acquire_fence: lw $1, 0($5) diff --git a/tests/asm-test/asm/mipsel-unknown-linux-gnu/atomic_memcpy_load_align8 b/tests/asm-test/asm/mipsel-unknown-linux-gnu/atomic_memcpy_load_align8 index eca3556..fbb15ae 100644 --- a/tests/asm-test/asm/mipsel-unknown-linux-gnu/atomic_memcpy_load_align8 +++ b/tests/asm-test/asm/mipsel-unknown-linux-gnu/atomic_memcpy_load_align8 @@ -1,24 +1,72 @@ asm_test::atomic_memcpy_load_align8::acquire: - lw $1, 28($5) - lw $2, 24($5) - lw $3, 20($5) - lw $6, 16($5) - lw $7, 12($5) - lw $8, 8($5) - lw $9, 4($5) - lw $5, 0($5) - sw $1, 28($4) - sw $2, 24($4) + addiu $sp, $sp, -32 + addiu $2, $sp, 0 + addiu $1, $5, 28 + addiu $3, $2, 28 + #APP + lw $6, 0($1) + sw $6, 0($3) + #NO_APP + addiu $1, $5, 24 + addiu $7, $2, 8 + addiu $3, $2, 24 + #APP + lw $6, 0($1) + sw $6, 0($3) + #NO_APP + addiu $1, $5, 20 + addiu $3, $2, 20 + #APP + lw $6, 0($1) + sw $6, 0($3) + #NO_APP + addiu $1, $5, 16 + addiu $3, $2, 16 + #APP + lw $6, 0($1) + sw $6, 0($3) + #NO_APP + addiu $1, $5, 12 + addiu $3, $2, 12 + #APP + lw $6, 0($1) + sw $6, 0($3) + #NO_APP + addiu $1, $5, 4 + addiu $6, $5, 8 + #APP + lw $8, 0($6) + sw $8, 0($7) + #NO_APP + ori $3, $2, 4 + #APP + lw $6, 0($1) + sw $6, 0($3) + #NO_APP + #APP + lw $1, 0($5) + sw $1, 0($2) + #NO_APP + lw $1, 16($sp) + lw $2, 12($sp) + lw $3, 8($sp) + lw $5, 4($sp) + lw $6, 0($sp) + sw $1, 16($4) + lw $1, 20($sp) + sw $2, 12($4) + sw $6, 0($4) + sw $5, 4($4) + sw $3, 8($4) move $2, $4 - sw $3, 20($4) - sw $6, 16($4) - sw $7, 12($4) - sw $8, 8($4) - sw $9, 4($4) - sw $5, 0($4) + sw $1, 20($4) + lw $1, 24($sp) + sw $1, 24($4) + lw $1, 28($sp) + sw $1, 28($4) sync jr $ra - nop + addiu $sp, $sp, 32 $func_end12: asm_test::atomic_memcpy_load_align8::read_volatile_acquire_fence: lw $1, 4($5) diff --git a/tests/asm-test/asm/mipsel-unknown-linux-gnu/atomic_memcpy_store_align1 b/tests/asm-test/asm/mipsel-unknown-linux-gnu/atomic_memcpy_store_align1 index 9283f73..4495233 100644 --- a/tests/asm-test/asm/mipsel-unknown-linux-gnu/atomic_memcpy_store_align1 +++ b/tests/asm-test/asm/mipsel-unknown-linux-gnu/atomic_memcpy_store_align1 @@ -1,169 +1,267 @@ asm_test::atomic_memcpy_store_align1::release: - addiu $sp, $sp, -64 + addiu $sp, $sp, -120 + sw $ra, 116($sp) + sw $fp, 112($sp) + sw $23, 108($sp) + sw $22, 104($sp) + sw $21, 100($sp) + sw $20, 96($sp) + sw $19, 92($sp) + sw $18, 88($sp) + sw $17, 84($sp) + sw $16, 80($sp) lwl $1, 23($5) lwl $2, 19($5) lwl $3, 15($5) lwl $6, 11($5) lwl $7, 27($5) lwl $8, 31($5) + addiu $ra, $4, 16 + addiu $9, $4, 30 + addiu $14, $4, 26 + addiu $12, $4, 25 + addiu $16, $4, 24 + addiu $25, $4, 23 + addiu $20, $4, 22 + addiu $18, $4, 21 + addiu $fp, $4, 20 + addiu $22, $4, 19 lwr $1, 20($5) lwr $6, 8($5) lwr $3, 12($5) lwr $2, 16($5) lwr $7, 24($5) lwr $8, 28($5) - sw $1, 20($sp) + sw $1, 36($sp) lwl $1, 7($5) - sw $8, 28($sp) - sw $7, 24($sp) - sw $2, 16($sp) - sw $3, 12($sp) - sw $6, 8($sp) + sw $8, 44($sp) + sw $7, 40($sp) + sw $2, 32($sp) + sw $3, 28($sp) + sw $6, 24($sp) lwr $1, 4($5) - sw $1, 4($sp) + sw $1, 20($sp) lwl $1, 3($5) lwr $1, 0($5) - sw $1, 0($sp) + sw $1, 16($sp) sync + lw $1, 20($sp) + lw $2, 24($sp) + lw $3, 28($sp) + lw $5, 32($sp) + lw $6, 36($sp) + lw $7, 40($sp) + lw $8, 44($sp) + sw $1, 52($sp) lw $1, 16($sp) - lw $2, 20($sp) - lw $3, 24($sp) - lw $5, 0($sp) - lw $6, 4($sp) - lw $7, 8($sp) - lw $8, 12($sp) - lw $9, 28($sp) - sw $2, 52($sp) + sw $7, 72($sp) + sw $2, 56($sp) + sw $8, 76($sp) + sw $6, 68($sp) + sw $5, 64($sp) + sw $3, 60($sp) + addiu $2, $4, 31 + addiu $7, $4, 28 sw $1, 48($sp) - addiu $1, $4, 3 - addiu $2, $zero, -4 - sw $3, 56($sp) - sw $9, 60($sp) - sw $8, 44($sp) - sw $7, 40($sp) - sw $6, 36($sp) - and $3, $1, $2 - subu $2, $3, $4 - sltiu $1, $2, 33 - beqz $1, $BB2_6 - sw $5, 32($sp) - beqz $2, $BB2_7 - nop - addiu $5, $sp, 32 - subu $7, $4, $3 - addiu $6, $4, 32 - move $8, $4 -$BB2_3: - lbu $1, 0($5) - addiu $5, $5, 1 - sb $1, 0($8) - addiu $1, $7, 1 - addiu $8, $8, 1 - sltu $9, $1, $7 - beqz $9, $BB2_3 - move $7, $1 - subu $3, $6, $3 - sltiu $1, $3, 4 - bnez $1, $BB2_10 - nop - b $BB2_8 - nop -$BB2_6: - lbu $1, 32($sp) - sb $1, 0($4) - lbu $1, 33($sp) - sb $1, 1($4) - lbu $1, 34($sp) - sb $1, 2($4) - lbu $1, 35($sp) - sb $1, 3($4) - lbu $1, 36($sp) - sb $1, 4($4) - lbu $1, 37($sp) - sb $1, 5($4) - lbu $1, 38($sp) - sb $1, 6($4) - lbu $1, 39($sp) - sb $1, 7($4) - lbu $1, 40($sp) - sb $1, 8($4) - lbu $1, 41($sp) - sb $1, 9($4) - lbu $1, 42($sp) - sb $1, 10($4) - lbu $1, 43($sp) - sb $1, 11($4) - lbu $1, 44($sp) - sb $1, 12($4) - lbu $1, 45($sp) - sb $1, 13($4) - lbu $1, 46($sp) - sb $1, 14($4) - lbu $1, 47($sp) - sb $1, 15($4) - lbu $1, 48($sp) - sb $1, 16($4) - lbu $1, 49($sp) - sb $1, 17($4) - lbu $1, 50($sp) - sb $1, 18($4) - lbu $1, 51($sp) - sb $1, 19($4) - lbu $1, 52($sp) - sb $1, 20($4) - lbu $1, 53($sp) - sb $1, 21($4) - lbu $1, 54($sp) - sb $1, 22($4) - lbu $1, 55($sp) - sb $1, 23($4) - lbu $1, 56($sp) - sb $1, 24($4) - lbu $1, 57($sp) - sb $1, 25($4) - lbu $1, 58($sp) - sb $1, 26($4) - lbu $1, 59($sp) - sb $1, 27($4) - lbu $1, 60($sp) - sb $1, 28($4) - lbu $1, 61($sp) - sb $1, 29($4) - lbu $1, 62($sp) - sb $1, 30($4) - lbu $1, 63($sp) - b $BB2_13 - sb $1, 31($4) -$BB2_7: - addiu $3, $zero, 32 -$BB2_8: - addiu $5, $sp, 32 -$BB2_9: - addu $1, $5, $2 - addiu $3, $3, -4 - lwl $6, 3($1) - lwr $6, 0($1) - addu $1, $4, $2 - sw $6, 0($1) - sltiu $1, $3, 4 - beqz $1, $BB2_9 - addiu $2, $2, 4 -$BB2_10: - beqz $3, $BB2_13 - nop - addiu $1, $sp, 32 - addu $5, $1, $2 - addu $2, $4, $2 -$BB2_12: - lbu $1, 0($5) - addiu $5, $5, 1 - addiu $3, $3, -1 - sb $1, 0($2) - bnez $3, $BB2_12 - addiu $2, $2, 1 -$BB2_13: + addiu $1, $sp, 48 + addiu $3, $1, 31 + #APP + lbu $5, 0($3) + sb $5, 0($2) + #NO_APP + sw $ra, 4($sp) + addiu $ra, $1, 16 + addiu $2, $4, 29 + addiu $10, $1, 30 + addiu $8, $1, 29 + addiu $15, $1, 28 + addiu $6, $1, 27 + addiu $13, $1, 25 + addiu $11, $1, 24 + addiu $gp, $1, 23 + addiu $24, $1, 22 + addiu $19, $1, 21 + addiu $17, $1, 20 + addiu $23, $1, 19 + addiu $21, $1, 18 + sw $ra, 0($sp) + addiu $5, $4, 27 + addiu $3, $1, 26 + #APP + lbu $ra, 0($10) + sb $ra, 0($9) + #NO_APP + addiu $9, $4, 17 + addiu $10, $1, 17 + #APP + lbu $ra, 0($8) + sb $ra, 0($2) + #NO_APP + addiu $2, $4, 1 + addiu $8, $4, 18 + sw $2, 12($sp) + ori $2, $1, 1 + #APP + lbu $ra, 0($15) + sb $ra, 0($7) + #NO_APP + sw $2, 8($sp) + addiu $15, $4, 2 + #APP + lbu $ra, 0($6) + sb $ra, 0($5) + #NO_APP + addiu $6, $4, 3 + ori $5, $1, 2 + #APP + lbu $ra, 0($3) + sb $ra, 0($14) + #NO_APP + addiu $14, $4, 4 + ori $3, $1, 3 + #APP + lbu $ra, 0($13) + sb $ra, 0($12) + #NO_APP + addiu $13, $4, 5 + ori $12, $1, 4 + #APP + lbu $ra, 0($11) + sb $ra, 0($16) + #NO_APP + addiu $16, $4, 6 + ori $11, $1, 5 + #APP + lbu $ra, 0($gp) + sb $ra, 0($25) + #NO_APP + addiu $gp, $4, 7 + ori $25, $1, 6 + #APP + lbu $ra, 0($24) + sb $ra, 0($20) + #NO_APP + addiu $20, $4, 8 + ori $24, $1, 7 + #APP + lbu $ra, 0($19) + sb $ra, 0($18) + #NO_APP + addiu $19, $4, 9 + addiu $18, $1, 8 + #APP + lbu $ra, 0($17) + sb $ra, 0($fp) + #NO_APP + addiu $fp, $4, 10 + addiu $17, $1, 9 + #APP + lbu $ra, 0($23) + sb $ra, 0($22) + #NO_APP + addiu $23, $4, 11 + addiu $22, $1, 10 + #APP + lbu $ra, 0($21) + sb $ra, 0($8) + #NO_APP + addiu $21, $4, 12 + addiu $8, $1, 11 + #APP + lbu $ra, 0($10) + sb $ra, 0($9) + #NO_APP + addiu $10, $4, 13 + addiu $9, $1, 12 + lw $2, 4($sp) + lw $7, 0($sp) + #APP + lbu $ra, 0($7) + sb $ra, 0($2) + #NO_APP + addiu $ra, $4, 15 + addiu $2, $1, 15 + #APP + lbu $7, 0($2) + sb $7, 0($ra) + #NO_APP + addiu $2, $4, 14 + addiu $7, $1, 14 + #APP + lbu $ra, 0($7) + sb $ra, 0($2) + #NO_APP + addiu $2, $1, 13 + #APP + lbu $7, 0($2) + sb $7, 0($10) + #NO_APP + #APP + lbu $2, 0($9) + sb $2, 0($21) + #NO_APP + #APP + lbu $2, 0($8) + sb $2, 0($23) + #NO_APP + #APP + lbu $2, 0($22) + sb $2, 0($fp) + #NO_APP + #APP + lbu $2, 0($17) + sb $2, 0($19) + #NO_APP + #APP + lbu $2, 0($18) + sb $2, 0($20) + #NO_APP + #APP + lbu $2, 0($24) + sb $2, 0($gp) + #NO_APP + #APP + lbu $2, 0($25) + sb $2, 0($16) + #NO_APP + #APP + lbu $2, 0($11) + sb $2, 0($13) + #NO_APP + #APP + lbu $2, 0($12) + sb $2, 0($14) + #NO_APP + #APP + lbu $2, 0($3) + sb $2, 0($6) + #NO_APP + #APP + lbu $2, 0($5) + sb $2, 0($15) + #NO_APP + lw $3, 12($sp) + lw $5, 8($sp) + #APP + lbu $2, 0($5) + sb $2, 0($3) + #NO_APP + #APP + lbu $2, 0($1) + sb $2, 0($4) + #NO_APP + lw $16, 80($sp) + lw $17, 84($sp) + lw $18, 88($sp) + lw $19, 92($sp) + lw $20, 96($sp) + lw $21, 100($sp) + lw $22, 104($sp) + lw $23, 108($sp) + lw $fp, 112($sp) + lw $ra, 116($sp) jr $ra - addiu $sp, $sp, 64 + addiu $sp, $sp, 120 $func_end2: asm_test::atomic_memcpy_store_align1::write_volatile_release_fence: addiu $sp, $sp, -32 diff --git a/tests/asm-test/asm/mipsel-unknown-linux-gnu/atomic_memcpy_store_align16 b/tests/asm-test/asm/mipsel-unknown-linux-gnu/atomic_memcpy_store_align16 index 38218d3..04cdf4b 100644 --- a/tests/asm-test/asm/mipsel-unknown-linux-gnu/atomic_memcpy_store_align16 +++ b/tests/asm-test/asm/mipsel-unknown-linux-gnu/atomic_memcpy_store_align16 @@ -1,22 +1,95 @@ asm_test::atomic_memcpy_store_align16::release: + addiu $sp, $sp, -80 + sw $ra, 76($sp) + sw $fp, 72($sp) + move $fp, $sp + addiu $1, $zero, -16 + and $sp, $sp, $1 + lw $1, 12($5) + lw $2, 16($5) + lw $3, 20($5) + lw $6, 24($5) + lw $7, 28($5) + addiu $11, $4, 20 + addiu $9, $4, 16 + sw $1, 12($sp) + lw $1, 8($5) + sw $7, 28($sp) + sw $6, 24($sp) + sw $3, 20($sp) + sw $2, 16($sp) + sw $1, 8($sp) + lw $1, 4($5) + sw $1, 4($sp) lw $1, 0($5) - lw $2, 4($5) - lw $3, 8($5) - lw $6, 12($5) - lw $7, 16($5) - lw $8, 20($5) - lw $9, 24($5) - lw $5, 28($5) + sw $1, 0($sp) sync - sw $5, 28($4) - sw $9, 24($4) - sw $8, 20($4) - sw $7, 16($4) - sw $6, 12($4) - sw $3, 8($4) - sw $2, 4($4) - jr $ra + lw $1, 4($sp) + lw $2, 8($sp) + lw $3, 12($sp) + lw $5, 16($sp) + lw $6, 20($sp) + lw $7, 24($sp) + lw $8, 28($sp) + sw $1, 36($sp) + lw $1, 0($sp) + sw $7, 56($sp) + sw $2, 40($sp) + addiu $2, $sp, 32 + sw $8, 60($sp) + sw $6, 52($sp) + sw $3, 44($sp) + sw $5, 48($sp) + addiu $7, $4, 12 + addiu $3, $2, 28 + addiu $12, $2, 20 + addiu $10, $2, 16 + ori $8, $2, 12 + ori $6, $2, 8 + sw $1, 32($sp) + addiu $1, $4, 28 + #APP + lw $5, 0($3) + sw $5, 0($1) + #NO_APP + addiu $1, $4, 24 + addiu $3, $2, 24 + #APP + lw $5, 0($3) + sw $5, 0($1) + #NO_APP + addiu $1, $4, 4 + ori $3, $2, 4 + #APP + lw $13, 0($12) + sw $13, 0($11) + #NO_APP + addiu $5, $4, 8 + #APP + lw $11, 0($10) + sw $11, 0($9) + #NO_APP + #APP + lw $9, 0($8) + sw $9, 0($7) + #NO_APP + #APP + lw $7, 0($6) + sw $7, 0($5) + #NO_APP + #APP + lw $5, 0($3) + sw $5, 0($1) + #NO_APP + #APP + lw $1, 0($2) sw $1, 0($4) + #NO_APP + move $sp, $fp + lw $fp, 72($sp) + lw $ra, 76($sp) + jr $ra + addiu $sp, $sp, 80 $func_end18: asm_test::atomic_memcpy_store_align16::write_volatile_release_fence: addiu $sp, $sp, -48 diff --git a/tests/asm-test/asm/mipsel-unknown-linux-gnu/atomic_memcpy_store_align2 b/tests/asm-test/asm/mipsel-unknown-linux-gnu/atomic_memcpy_store_align2 index e2b0e40..95862f4 100644 --- a/tests/asm-test/asm/mipsel-unknown-linux-gnu/atomic_memcpy_store_align2 +++ b/tests/asm-test/asm/mipsel-unknown-linux-gnu/atomic_memcpy_store_align2 @@ -1,137 +1,167 @@ asm_test::atomic_memcpy_store_align2::release: - addiu $sp, $sp, -64 + addiu $sp, $sp, -112 + sw $ra, 108($sp) + sw $fp, 104($sp) + sw $23, 100($sp) + sw $22, 96($sp) + sw $21, 92($sp) + sw $20, 88($sp) + sw $19, 84($sp) + sw $18, 80($sp) + sw $17, 76($sp) + sw $16, 72($sp) lwl $1, 23($5) lwl $2, 19($5) lwl $3, 15($5) lwl $6, 11($5) lwl $7, 27($5) lwl $8, 31($5) + addiu $ra, $4, 28 + addiu $fp, $4, 26 + addiu $22, $4, 24 + addiu $20, $4, 22 + addiu $18, $4, 20 + addiu $16, $4, 18 + addiu $25, $4, 16 + addiu $15, $4, 14 + addiu $13, $4, 12 + addiu $11, $4, 10 + addiu $9, $4, 8 lwr $1, 20($5) lwr $6, 8($5) lwr $3, 12($5) lwr $2, 16($5) lwr $7, 24($5) lwr $8, 28($5) - sw $1, 20($sp) + sw $1, 28($sp) lwl $1, 7($5) - sw $8, 28($sp) - sw $7, 24($sp) - sw $2, 16($sp) - sw $3, 12($sp) - sw $6, 8($sp) + sw $8, 36($sp) + sw $7, 32($sp) + sw $2, 24($sp) + sw $3, 20($sp) + sw $6, 16($sp) lwr $1, 4($5) - sw $1, 4($sp) + sw $1, 12($sp) lwl $1, 3($5) lwr $1, 0($5) - sw $1, 0($sp) + sw $1, 8($sp) sync - lw $1, 16($sp) - lw $2, 20($sp) - lw $3, 24($sp) + lw $1, 12($sp) + lw $2, 16($sp) + lw $3, 20($sp) + lw $5, 24($sp) + lw $6, 28($sp) + lw $7, 32($sp) + lw $8, 36($sp) + sw $1, 44($sp) + lw $1, 8($sp) + sw $7, 64($sp) + sw $2, 48($sp) + addiu $2, $sp, 40 + sw $8, 68($sp) + sw $6, 60($sp) + sw $3, 52($sp) + sw $5, 56($sp) + addiu $7, $4, 6 + addiu $3, $2, 30 + addiu $23, $2, 24 + addiu $21, $2, 22 + addiu $19, $2, 20 + addiu $17, $2, 18 + addiu $gp, $2, 16 + addiu $24, $2, 14 + addiu $14, $2, 12 + addiu $12, $2, 10 + addiu $10, $2, 8 + ori $8, $2, 6 + ori $6, $2, 4 + sw $1, 40($sp) + addiu $1, $4, 30 + #APP + lhu $5, 0($3) + sh $5, 0($1) + #NO_APP + addiu $1, $4, 2 + sw $1, 4($sp) + ori $1, $2, 2 + addiu $5, $4, 4 + sw $1, 0($sp) + addiu $1, $2, 28 + #APP + lhu $3, 0($1) + sh $3, 0($ra) + #NO_APP + addiu $1, $2, 26 + #APP + lhu $3, 0($1) + sh $3, 0($fp) + #NO_APP + #APP + lhu $1, 0($23) + sh $1, 0($22) + #NO_APP + #APP + lhu $1, 0($21) + sh $1, 0($20) + #NO_APP + #APP + lhu $1, 0($19) + sh $1, 0($18) + #NO_APP + #APP + lhu $1, 0($17) + sh $1, 0($16) + #NO_APP + #APP + lhu $1, 0($gp) + sh $1, 0($25) + #NO_APP + #APP + lhu $1, 0($24) + sh $1, 0($15) + #NO_APP + #APP + lhu $1, 0($14) + sh $1, 0($13) + #NO_APP + #APP + lhu $1, 0($12) + sh $1, 0($11) + #NO_APP + #APP + lhu $1, 0($10) + sh $1, 0($9) + #NO_APP + #APP + lhu $1, 0($8) + sh $1, 0($7) + #NO_APP + #APP + lhu $1, 0($6) + sh $1, 0($5) + #NO_APP + lw $3, 4($sp) lw $5, 0($sp) - lw $6, 4($sp) - lw $7, 8($sp) - lw $8, 12($sp) - lw $9, 28($sp) - sw $2, 52($sp) - sw $1, 48($sp) - addiu $1, $4, 3 - addiu $2, $zero, -4 - sw $3, 56($sp) - sw $9, 60($sp) - sw $8, 44($sp) - sw $7, 40($sp) - sw $6, 36($sp) - and $3, $1, $2 - subu $2, $3, $4 - sltiu $1, $2, 33 - beqz $1, $BB6_6 - sw $5, 32($sp) - beqz $2, $BB6_7 - nop - addiu $5, $sp, 32 - subu $7, $4, $3 - addiu $6, $4, 32 - move $8, $4 -$BB6_3: - lbu $1, 0($5) - addiu $5, $5, 1 - sb $1, 0($8) - addiu $1, $7, 1 - addiu $8, $8, 1 - sltu $9, $1, $7 - beqz $9, $BB6_3 - move $7, $1 - subu $3, $6, $3 - sltiu $1, $3, 4 - bnez $1, $BB6_10 - nop - b $BB6_8 - nop -$BB6_6: - lhu $1, 62($sp) - sh $1, 30($4) - lhu $1, 60($sp) - sh $1, 28($4) - lhu $1, 58($sp) - sh $1, 26($4) - lhu $1, 56($sp) - sh $1, 24($4) - lhu $1, 54($sp) - sh $1, 22($4) - lhu $1, 52($sp) - sh $1, 20($4) - lhu $1, 50($sp) - sh $1, 18($4) - lhu $1, 48($sp) - sh $1, 16($4) - lhu $1, 46($sp) - sh $1, 14($4) - lhu $1, 44($sp) - sh $1, 12($4) - lhu $1, 42($sp) - sh $1, 10($4) - lhu $1, 40($sp) - sh $1, 8($4) - lhu $1, 38($sp) - sh $1, 6($4) - lhu $1, 36($sp) - sh $1, 4($4) - lhu $1, 34($sp) - sh $1, 2($4) - lhu $1, 32($sp) - b $BB6_13 + #APP + lhu $1, 0($5) + sh $1, 0($3) + #NO_APP + #APP + lhu $1, 0($2) sh $1, 0($4) -$BB6_7: - addiu $3, $zero, 32 -$BB6_8: - addiu $5, $sp, 32 -$BB6_9: - addu $1, $5, $2 - addiu $3, $3, -4 - lwl $6, 3($1) - lwr $6, 0($1) - addu $1, $4, $2 - sw $6, 0($1) - sltiu $1, $3, 4 - beqz $1, $BB6_9 - addiu $2, $2, 4 -$BB6_10: - beqz $3, $BB6_13 - nop - addiu $1, $sp, 32 - addu $5, $1, $2 - addu $2, $4, $2 -$BB6_12: - lbu $1, 0($5) - addiu $5, $5, 1 - addiu $3, $3, -1 - sb $1, 0($2) - bnez $3, $BB6_12 - addiu $2, $2, 1 -$BB6_13: + #NO_APP + lw $16, 72($sp) + lw $17, 76($sp) + lw $18, 80($sp) + lw $19, 84($sp) + lw $20, 88($sp) + lw $21, 92($sp) + lw $22, 96($sp) + lw $23, 100($sp) + lw $fp, 104($sp) + lw $ra, 108($sp) jr $ra - addiu $sp, $sp, 64 + addiu $sp, $sp, 112 $func_end6: asm_test::atomic_memcpy_store_align2::write_volatile_release_fence: addiu $sp, $sp, -32 diff --git a/tests/asm-test/asm/mipsel-unknown-linux-gnu/atomic_memcpy_store_align4 b/tests/asm-test/asm/mipsel-unknown-linux-gnu/atomic_memcpy_store_align4 index 5ddfa86..a7c0169 100644 --- a/tests/asm-test/asm/mipsel-unknown-linux-gnu/atomic_memcpy_store_align4 +++ b/tests/asm-test/asm/mipsel-unknown-linux-gnu/atomic_memcpy_store_align4 @@ -1,22 +1,87 @@ asm_test::atomic_memcpy_store_align4::release: + addiu $sp, $sp, -64 + lw $1, 12($5) + lw $2, 16($5) + lw $3, 20($5) + lw $6, 24($5) + lw $7, 28($5) + addiu $11, $4, 20 + addiu $9, $4, 16 + sw $1, 12($sp) + lw $1, 8($5) + sw $7, 28($sp) + sw $6, 24($sp) + sw $3, 20($sp) + sw $2, 16($sp) + sw $1, 8($sp) + lw $1, 4($5) + sw $1, 4($sp) lw $1, 0($5) - lw $2, 4($5) - lw $3, 8($5) - lw $6, 12($5) - lw $7, 16($5) - lw $8, 20($5) - lw $9, 24($5) - lw $5, 28($5) + sw $1, 0($sp) sync - sw $5, 28($4) - sw $9, 24($4) - sw $8, 20($4) - sw $7, 16($4) - sw $6, 12($4) - sw $3, 8($4) - sw $2, 4($4) - jr $ra + lw $1, 4($sp) + lw $2, 8($sp) + lw $3, 12($sp) + lw $5, 16($sp) + lw $6, 20($sp) + lw $7, 24($sp) + lw $8, 28($sp) + sw $1, 36($sp) + lw $1, 0($sp) + sw $7, 56($sp) + sw $2, 40($sp) + addiu $2, $sp, 32 + sw $8, 60($sp) + sw $6, 52($sp) + sw $3, 44($sp) + sw $5, 48($sp) + addiu $7, $4, 12 + addiu $3, $2, 28 + addiu $12, $2, 20 + addiu $10, $2, 16 + addiu $8, $2, 12 + addiu $6, $2, 8 + sw $1, 32($sp) + addiu $1, $4, 28 + #APP + lw $5, 0($3) + sw $5, 0($1) + #NO_APP + addiu $1, $4, 24 + addiu $3, $2, 24 + #APP + lw $5, 0($3) + sw $5, 0($1) + #NO_APP + addiu $1, $4, 4 + ori $3, $2, 4 + #APP + lw $13, 0($12) + sw $13, 0($11) + #NO_APP + addiu $5, $4, 8 + #APP + lw $11, 0($10) + sw $11, 0($9) + #NO_APP + #APP + lw $9, 0($8) + sw $9, 0($7) + #NO_APP + #APP + lw $7, 0($6) + sw $7, 0($5) + #NO_APP + #APP + lw $5, 0($3) + sw $5, 0($1) + #NO_APP + #APP + lw $1, 0($2) sw $1, 0($4) + #NO_APP + jr $ra + addiu $sp, $sp, 64 $func_end10: asm_test::atomic_memcpy_store_align4::write_volatile_release_fence: addiu $sp, $sp, -32 diff --git a/tests/asm-test/asm/mipsel-unknown-linux-gnu/atomic_memcpy_store_align8 b/tests/asm-test/asm/mipsel-unknown-linux-gnu/atomic_memcpy_store_align8 index 6e81970..5686679 100644 --- a/tests/asm-test/asm/mipsel-unknown-linux-gnu/atomic_memcpy_store_align8 +++ b/tests/asm-test/asm/mipsel-unknown-linux-gnu/atomic_memcpy_store_align8 @@ -1,22 +1,87 @@ asm_test::atomic_memcpy_store_align8::release: + addiu $sp, $sp, -64 + lw $1, 12($5) + lw $2, 16($5) + lw $3, 20($5) + lw $6, 24($5) + lw $7, 28($5) + addiu $11, $4, 20 + addiu $9, $4, 16 + sw $1, 12($sp) + lw $1, 8($5) + sw $7, 28($sp) + sw $6, 24($sp) + sw $3, 20($sp) + sw $2, 16($sp) + sw $1, 8($sp) + lw $1, 4($5) + sw $1, 4($sp) lw $1, 0($5) - lw $2, 4($5) - lw $3, 8($5) - lw $6, 12($5) - lw $7, 16($5) - lw $8, 20($5) - lw $9, 24($5) - lw $5, 28($5) + sw $1, 0($sp) sync - sw $5, 28($4) - sw $9, 24($4) - sw $8, 20($4) - sw $7, 16($4) - sw $6, 12($4) - sw $3, 8($4) - sw $2, 4($4) - jr $ra + lw $1, 4($sp) + lw $2, 8($sp) + lw $3, 12($sp) + lw $5, 16($sp) + lw $6, 20($sp) + lw $7, 24($sp) + lw $8, 28($sp) + sw $1, 36($sp) + lw $1, 0($sp) + sw $7, 56($sp) + sw $2, 40($sp) + addiu $2, $sp, 32 + sw $8, 60($sp) + sw $6, 52($sp) + sw $3, 44($sp) + sw $5, 48($sp) + addiu $7, $4, 12 + addiu $3, $2, 28 + addiu $12, $2, 20 + addiu $10, $2, 16 + addiu $8, $2, 12 + addiu $6, $2, 8 + sw $1, 32($sp) + addiu $1, $4, 28 + #APP + lw $5, 0($3) + sw $5, 0($1) + #NO_APP + addiu $1, $4, 24 + addiu $3, $2, 24 + #APP + lw $5, 0($3) + sw $5, 0($1) + #NO_APP + addiu $1, $4, 4 + ori $3, $2, 4 + #APP + lw $13, 0($12) + sw $13, 0($11) + #NO_APP + addiu $5, $4, 8 + #APP + lw $11, 0($10) + sw $11, 0($9) + #NO_APP + #APP + lw $9, 0($8) + sw $9, 0($7) + #NO_APP + #APP + lw $7, 0($6) + sw $7, 0($5) + #NO_APP + #APP + lw $5, 0($3) + sw $5, 0($1) + #NO_APP + #APP + lw $1, 0($2) sw $1, 0($4) + #NO_APP + jr $ra + addiu $sp, $sp, 64 $func_end14: asm_test::atomic_memcpy_store_align8::write_volatile_release_fence: addiu $sp, $sp, -32 diff --git a/tests/asm-test/asm/powerpc-unknown-linux-gnu/atomic_memcpy_load_align1 b/tests/asm-test/asm/powerpc-unknown-linux-gnu/atomic_memcpy_load_align1 index a9545fe..9e76056 100644 --- a/tests/asm-test/asm/powerpc-unknown-linux-gnu/atomic_memcpy_load_align1 +++ b/tests/asm-test/asm/powerpc-unknown-linux-gnu/atomic_memcpy_load_align1 @@ -1,167 +1,260 @@ asm_test::atomic_memcpy_load_align1::acquire: - stwu 1, -48(1) - addi 5, 4, 3 - rlwinm 8, 5, 0, 0, 29 - sub 5, 8, 4 - cmplwi 5, 33 - bge 0, .LBB0_5 - cmplwi 5, 0 - addi 6, 1, 16 - beq 0, .LBB0_6 - addi 7, 4, 32 - addi 9, 4, -1 - addi 10, 1, 15 - mtctr 5 -.LBB0_3: - lbz 11, 1(9) - addi 9, 9, 1 - stbu 11, 1(10) - bdnz .LBB0_3 - sub 7, 7, 8 - cmplwi 7, 4 - bge 0, .LBB0_7 - b .LBB0_11 -.LBB0_5: - lbz 5, 0(4) - stb 5, 16(1) - lbz 5, 1(4) - stb 5, 17(1) - lbz 5, 2(4) - stb 5, 18(1) - lbz 5, 3(4) - stb 5, 19(1) - lbz 5, 4(4) - stb 5, 20(1) - lbz 5, 5(4) - stb 5, 21(1) - lbz 5, 6(4) - stb 5, 22(1) - lbz 5, 7(4) - stb 5, 23(1) - lbz 5, 8(4) - stb 5, 24(1) - lbz 5, 9(4) - stb 5, 25(1) - lbz 5, 10(4) - stb 5, 26(1) - lbz 5, 11(4) - stb 5, 27(1) - lbz 5, 12(4) - stb 5, 28(1) - lbz 5, 13(4) - stb 5, 29(1) - lbz 5, 14(4) - stb 5, 30(1) - lbz 5, 15(4) - stb 5, 31(1) - lbz 5, 16(4) - stb 5, 32(1) - lbz 5, 17(4) - stb 5, 33(1) - lbz 5, 18(4) - stb 5, 34(1) - lbz 5, 19(4) - stb 5, 35(1) - lbz 5, 20(4) - stb 5, 36(1) - lbz 5, 21(4) - stb 5, 37(1) - lbz 5, 22(4) - stb 5, 38(1) - lbz 5, 23(4) - stb 5, 39(1) - lbz 5, 24(4) - stb 5, 40(1) - lbz 5, 25(4) - stb 5, 41(1) - lbz 5, 26(4) - stb 5, 42(1) - lbz 5, 27(4) - stb 5, 43(1) - lbz 5, 28(4) - stb 5, 44(1) - lbz 5, 29(4) - stb 5, 45(1) - lbz 5, 30(4) - stb 5, 46(1) - lbz 4, 31(4) - lwz 5, 16(1) - stb 4, 47(1) - lwz 4, 20(1) - stw 5, 0(3) - lwz 5, 24(1) - stw 4, 4(3) - lwz 4, 28(1) - stw 5, 8(3) - lwz 5, 32(1) - stw 4, 12(3) - lwz 4, 36(1) - stw 5, 16(3) - lwz 5, 40(1) - stw 4, 20(3) - lwz 4, 44(1) - stw 5, 24(3) - stw 4, 28(3) - lwsync - addi 1, 1, 48 - blr -.LBB0_6: - li 7, 32 -.LBB0_7: - addi 10, 7, -4 - li 11, 3 - cmplwi 10, 3 - bc 12, 0, .LBB0_9 - ori 10, 11, 0 - b .LBB0_9 -.LBB0_9: - sub 8, 8, 4 - not 10, 10 - add 10, 10, 7 - addi 9, 8, -4 - srwi 10, 10, 2 - add 8, 4, 9 - add 9, 6, 9 - addi 10, 10, 1 - mtctr 10 -.LBB0_10: - lwz 10, 4(8) - addi 8, 8, 4 - addi 7, 7, -4 - addi 5, 5, 4 - stwu 10, 4(9) - bdnz .LBB0_10 -.LBB0_11: - cmplwi 7, 0 - beq 0, .LBB0_14 - addi 5, 5, -1 - add 4, 4, 5 - add 5, 6, 5 - mtctr 7 -.LBB0_13: - lbz 6, 1(4) - addi 4, 4, 1 - stbu 6, 1(5) - bdnz .LBB0_13 -.LBB0_14: + stwu 1, -128(1) + addi 10, 1, 29 + stw 10, 20(1) + addi 10, 1, 32 + stw 10, 16(1) + addi 10, 1, 33 + stw 14, 56(1) + addi 5, 1, 24 + stw 15, 60(1) + li 6, 1 + stw 16, 64(1) + li 7, 2 + stw 17, 68(1) + li 8, 3 + stw 18, 72(1) + li 9, 4 + stw 19, 76(1) + li 11, 6 + stw 20, 80(1) + li 12, 7 + stw 21, 84(1) + rlwimi 6, 5, 0, 0, 30 + stw 22, 88(1) + rlwimi 7, 5, 0, 31, 29 + stw 23, 92(1) + rlwimi 8, 5, 0, 0, 29 + stw 24, 96(1) + rlwimi 9, 5, 0, 30, 28 + stw 25, 100(1) + rlwimi 11, 5, 0, 31, 28 + stw 26, 104(1) + rlwimi 12, 5, 0, 0, 28 + stw 27, 108(1) + addi 27, 1, 34 + stw 28, 112(1) + addi 26, 1, 35 + stw 29, 116(1) + addi 29, 4, 1 + stw 31, 124(1) + addi 25, 1, 36 + stw 10, 12(1) + #APP + lbz 28, 0(4) + stb 28, 0(5) + #NO_APP + addi 24, 1, 37 + #APP + lbz 29, 0(29) + stb 29, 0(6) + #NO_APP + addi 23, 1, 38 + addi 6, 4, 2 + #APP + lbz 6, 0(6) + stb 6, 0(7) + #NO_APP + addi 22, 1, 39 + addi 6, 4, 3 + #APP + lbz 6, 0(6) + stb 6, 0(8) + #NO_APP + addi 21, 1, 40 + addi 6, 4, 4 + #APP + lbz 6, 0(6) + stb 6, 0(9) + #NO_APP + addi 20, 1, 41 + addi 6, 4, 5 + lwz 28, 20(1) + #APP + lbz 6, 0(6) + stb 6, 0(28) + #NO_APP + addi 19, 1, 42 + addi 6, 4, 6 + #APP + lbz 6, 0(6) + stb 6, 0(11) + #NO_APP + addi 28, 4, 7 + #APP + lbz 11, 0(28) + stb 11, 0(12) + #NO_APP + addi 6, 4, 8 + lwz 12, 16(1) + #APP + lbz 6, 0(6) + stb 6, 0(12) + #NO_APP + addi 11, 4, 9 + lwz 12, 12(1) + addi 6, 4, 10 + #APP + lbz 11, 0(11) + stb 11, 0(12) + #NO_APP + addi 18, 1, 43 + addi 11, 4, 11 + #APP + lbz 6, 0(6) + stb 6, 0(27) + #NO_APP + addi 17, 1, 44 + addi 6, 4, 12 + #APP + lbz 11, 0(11) + stb 11, 0(26) + #NO_APP + addi 16, 1, 45 + addi 11, 4, 13 + #APP + lbz 6, 0(6) + stb 6, 0(25) + #NO_APP + addi 15, 1, 46 + addi 6, 4, 14 + #APP + lbz 11, 0(11) + stb 11, 0(24) + #NO_APP + addi 14, 1, 47 + addi 11, 4, 15 + #APP + lbz 6, 0(6) + stb 6, 0(23) + #NO_APP + addi 31, 1, 48 + addi 6, 4, 16 + #APP + lbz 11, 0(11) + stb 11, 0(22) + #NO_APP + addi 10, 1, 49 + addi 11, 4, 17 + #APP + lbz 6, 0(6) + stb 6, 0(21) + #NO_APP + addi 5, 1, 50 + addi 6, 4, 18 + #APP + lbz 11, 0(11) + stb 11, 0(20) + #NO_APP + addi 29, 1, 51 + addi 11, 4, 19 + #APP + lbz 6, 0(6) + stb 6, 0(19) + #NO_APP + addi 7, 1, 52 + addi 6, 4, 20 + #APP + lbz 11, 0(11) + stb 11, 0(18) + #NO_APP + addi 8, 1, 53 + addi 11, 4, 21 + #APP + lbz 6, 0(6) + stb 6, 0(17) + #NO_APP + addi 9, 1, 54 + addi 6, 4, 22 + #APP + lbz 11, 0(11) + stb 11, 0(16) + #NO_APP + addi 11, 4, 23 + #APP + lbz 6, 0(6) + stb 6, 0(15) + #NO_APP + addi 6, 4, 24 + #APP + lbz 11, 0(11) + stb 11, 0(14) + #NO_APP + addi 11, 4, 25 + #APP + lbz 6, 0(6) + stb 6, 0(31) + #NO_APP + addi 6, 4, 26 + #APP + lbz 11, 0(11) + stb 11, 0(10) + #NO_APP + #APP + lbz 6, 0(6) + stb 6, 0(5) + #NO_APP + addi 10, 4, 27 + addi 5, 4, 28 + #APP + lbz 6, 0(10) + stb 6, 0(29) + #NO_APP + addi 6, 4, 29 + #APP + lbz 5, 0(5) + stb 5, 0(7) + #NO_APP + #APP + lbz 6, 0(6) + stb 6, 0(8) + #NO_APP + addi 5, 1, 55 + addi 6, 4, 30 + addi 4, 4, 31 + #APP + lbz 6, 0(6) + stb 6, 0(9) + #NO_APP + #APP + lbz 4, 0(4) + stb 4, 0(5) + #NO_APP + lwz 4, 24(1) + lwz 5, 28(1) + lwz 6, 32(1) + lwz 7, 36(1) + lwz 8, 40(1) + stw 4, 0(3) lwz 4, 44(1) - lwz 5, 40(1) - stw 4, 28(3) - lwz 4, 36(1) - stw 5, 24(3) - lwz 5, 32(1) + stw 5, 4(3) + lwz 5, 48(1) + stw 6, 8(3) + lwz 6, 52(1) + stw 7, 12(3) + stw 8, 16(3) stw 4, 20(3) - lwz 4, 28(1) - stw 5, 16(3) - lwz 5, 24(1) - stw 4, 12(3) - lwz 4, 20(1) - stw 5, 8(3) - lwz 5, 16(1) - stw 4, 4(3) - stw 5, 0(3) + stw 5, 24(3) + stw 6, 28(3) lwsync - addi 1, 1, 48 + lwz 31, 124(1) + lwz 29, 116(1) + lwz 28, 112(1) + lwz 27, 108(1) + lwz 26, 104(1) + lwz 25, 100(1) + lwz 24, 96(1) + lwz 23, 92(1) + lwz 22, 88(1) + lwz 21, 84(1) + lwz 20, 80(1) + lwz 19, 76(1) + lwz 18, 72(1) + lwz 17, 68(1) + lwz 16, 64(1) + lwz 15, 60(1) + lwz 14, 56(1) + addi 1, 1, 128 blr asm_test::atomic_memcpy_load_align1::read_volatile_acquire_fence: stwu 1, -112(1) diff --git a/tests/asm-test/asm/powerpc-unknown-linux-gnu/atomic_memcpy_load_align16 b/tests/asm-test/asm/powerpc-unknown-linux-gnu/atomic_memcpy_load_align16 index 9b18f9c..8bba2cb 100644 --- a/tests/asm-test/asm/powerpc-unknown-linux-gnu/atomic_memcpy_load_align16 +++ b/tests/asm-test/asm/powerpc-unknown-linux-gnu/atomic_memcpy_load_align16 @@ -1,21 +1,75 @@ asm_test::atomic_memcpy_load_align16::acquire: - lwz 5, 0(4) - lwz 6, 4(4) - lwz 7, 8(4) - lwz 8, 12(4) - lwz 9, 16(4) - lwz 10, 20(4) - lwz 11, 24(4) - lwz 4, 28(4) - stw 5, 0(3) - stw 6, 4(3) - stw 7, 8(3) - stw 8, 12(3) - stw 9, 16(3) - stw 10, 20(3) - stw 11, 24(3) - stw 4, 28(3) + stwu 1, -64(1) + stw 29, 52(1) + addi 5, 1, 16 + li 6, 4 + li 7, 8 + li 8, 12 + rlwimi 6, 5, 0, 30, 28 + rlwimi 7, 5, 0, 29, 27 + rlwimi 8, 5, 0, 30, 27 + addi 12, 4, 4 + #APP + lwz 29, 0(4) + stw 29, 0(5) + #NO_APP + addi 9, 1, 32 + addi 5, 4, 8 + #APP + lwz 12, 0(12) + stw 12, 0(6) + #NO_APP + addi 10, 1, 36 + addi 6, 4, 12 + #APP + lwz 5, 0(5) + stw 5, 0(7) + #NO_APP + addi 11, 1, 40 + addi 5, 4, 16 + #APP + lwz 6, 0(6) + stw 6, 0(8) + #NO_APP + addi 6, 4, 20 + #APP + lwz 5, 0(5) + stw 5, 0(9) + #NO_APP + #APP + lwz 6, 0(6) + stw 6, 0(10) + #NO_APP + addi 5, 1, 44 + addi 6, 4, 24 + addi 4, 4, 28 + #APP + lwz 6, 0(6) + stw 6, 0(11) + #NO_APP + #APP + lwz 4, 0(4) + stw 4, 0(5) + #NO_APP + lwz 4, 16(1) + lwz 5, 20(1) + lwz 6, 24(1) + lwz 7, 28(1) + lwz 8, 32(1) + stw 4, 0(3) + lwz 4, 36(1) + stw 5, 4(3) + lwz 5, 40(1) + stw 6, 8(3) + lwz 6, 44(1) + stw 7, 12(3) + stw 8, 16(3) + stw 4, 20(3) + stw 5, 24(3) + stw 6, 28(3) lwsync + lwz 29, 52(1) + addi 1, 1, 64 blr asm_test::atomic_memcpy_load_align16::read_volatile_acquire_fence: lwz 5, 24(4) diff --git a/tests/asm-test/asm/powerpc-unknown-linux-gnu/atomic_memcpy_load_align2 b/tests/asm-test/asm/powerpc-unknown-linux-gnu/atomic_memcpy_load_align2 index bbf4db0..c5c8a2c 100644 --- a/tests/asm-test/asm/powerpc-unknown-linux-gnu/atomic_memcpy_load_align2 +++ b/tests/asm-test/asm/powerpc-unknown-linux-gnu/atomic_memcpy_load_align2 @@ -1,119 +1,139 @@ asm_test::atomic_memcpy_load_align2::acquire: - stwu 1, -48(1) - addi 5, 4, 3 - rlwinm 8, 5, 0, 0, 29 - sub 5, 8, 4 - cmplwi 5, 32 - bgt 0, .LBB4_5 - cmplwi 5, 0 - addi 6, 1, 16 - beq 0, .LBB4_6 - addi 7, 4, 32 - addi 9, 4, -1 - addi 10, 6, -1 - mtctr 5 -.LBB4_3: - lbz 11, 1(9) - addi 9, 9, 1 - stbu 11, 1(10) - bdnz .LBB4_3 - sub 7, 7, 8 - cmplwi 7, 4 - bge 0, .LBB4_7 - b .LBB4_11 -.LBB4_5: - lhz 5, 0(4) - sth 5, 16(1) - lhz 5, 2(4) - sth 5, 18(1) - lhz 5, 4(4) - sth 5, 20(1) - lhz 5, 6(4) - sth 5, 22(1) - lhz 5, 8(4) - sth 5, 24(1) - lhz 5, 10(4) - sth 5, 26(1) - lhz 5, 12(4) - sth 5, 28(1) - lhz 5, 14(4) - sth 5, 30(1) - lhz 5, 16(4) - sth 5, 32(1) - lhz 5, 18(4) - sth 5, 34(1) - lhz 5, 20(4) - sth 5, 36(1) - lhz 5, 22(4) - sth 5, 38(1) - lhz 5, 24(4) - sth 5, 40(1) - lhz 5, 26(4) - sth 5, 42(1) - lhz 5, 28(4) - sth 5, 44(1) - lhz 4, 30(4) - sth 4, 46(1) - b .LBB4_14 -.LBB4_6: - li 7, 32 -.LBB4_7: - addi 10, 7, -4 - li 11, 3 - cmplwi 10, 3 - bc 12, 0, .LBB4_9 - ori 10, 11, 0 - b .LBB4_9 -.LBB4_9: - sub 9, 8, 4 - not 10, 10 - add 8, 8, 6 - add 10, 10, 7 - add 9, 9, 4 - sub 12, 8, 4 - srwi 10, 10, 2 - addi 8, 9, -4 - addi 9, 12, -4 - addi 10, 10, 1 - mtctr 10 -.LBB4_10: - lwz 10, 4(8) - addi 8, 8, 4 - addi 7, 7, -4 - addi 5, 5, 4 - stwu 10, 4(9) - bdnz .LBB4_10 -.LBB4_11: - cmplwi 7, 0 - beq 0, .LBB4_14 - addi 5, 5, -1 - add 4, 4, 5 - add 5, 6, 5 - mtctr 7 -.LBB4_13: - lbz 6, 1(4) - addi 4, 4, 1 - stbu 6, 1(5) - bdnz .LBB4_13 -.LBB4_14: - lwz 4, 44(1) - lwz 5, 40(1) - stw 4, 28(3) + stwu 1, -96(1) + stw 21, 52(1) + addi 5, 1, 16 + stw 22, 56(1) + li 6, 2 + stw 23, 60(1) + li 7, 4 + stw 24, 64(1) + li 8, 6 + stw 25, 68(1) + rlwimi 6, 5, 0, 31, 29 + stw 26, 72(1) + rlwimi 7, 5, 0, 30, 28 + stw 27, 76(1) + rlwimi 8, 5, 0, 31, 28 + stw 28, 80(1) + addi 22, 4, 2 + stw 29, 84(1) + #APP + lhz 21, 0(4) + sth 21, 0(5) + #NO_APP + addi 9, 1, 24 + addi 5, 4, 4 + #APP + lhz 22, 0(22) + sth 22, 0(6) + #NO_APP + addi 10, 1, 26 + addi 6, 4, 6 + #APP + lhz 5, 0(5) + sth 5, 0(7) + #NO_APP + addi 11, 1, 28 + addi 5, 4, 8 + #APP + lhz 6, 0(6) + sth 6, 0(8) + #NO_APP + addi 12, 1, 30 + addi 6, 4, 10 + #APP + lhz 5, 0(5) + sth 5, 0(9) + #NO_APP + addi 29, 1, 32 + addi 5, 4, 12 + #APP + lhz 6, 0(6) + sth 6, 0(10) + #NO_APP + addi 28, 1, 34 + addi 6, 4, 14 + #APP + lhz 5, 0(5) + sth 5, 0(11) + #NO_APP + addi 27, 1, 36 + addi 5, 4, 16 + #APP + lhz 6, 0(6) + sth 6, 0(12) + #NO_APP + addi 26, 1, 38 + addi 6, 4, 18 + #APP + lhz 5, 0(5) + sth 5, 0(29) + #NO_APP + addi 25, 1, 40 + addi 5, 4, 20 + #APP + lhz 6, 0(6) + sth 6, 0(28) + #NO_APP + addi 24, 1, 42 + addi 6, 4, 22 + #APP + lhz 5, 0(5) + sth 5, 0(27) + #NO_APP + addi 23, 1, 44 + addi 5, 4, 24 + #APP + lhz 6, 0(6) + sth 6, 0(26) + #NO_APP + addi 6, 4, 26 + #APP + lhz 5, 0(5) + sth 5, 0(25) + #NO_APP + #APP + lhz 6, 0(6) + sth 6, 0(24) + #NO_APP + addi 5, 1, 46 + addi 6, 4, 28 + addi 4, 4, 30 + #APP + lhz 6, 0(6) + sth 6, 0(23) + #NO_APP + #APP + lhz 4, 0(4) + sth 4, 0(5) + #NO_APP + lwz 4, 16(1) + lwz 5, 20(1) + lwz 6, 24(1) + lwz 7, 28(1) + lwz 8, 32(1) + stw 4, 0(3) lwz 4, 36(1) - stw 5, 24(3) - lwz 5, 32(1) + stw 5, 4(3) + lwz 5, 40(1) + stw 6, 8(3) + lwz 6, 44(1) + stw 7, 12(3) + stw 8, 16(3) stw 4, 20(3) - lwz 4, 28(1) - stw 5, 16(3) - lwz 5, 24(1) - stw 4, 12(3) - lwz 4, 20(1) - stw 5, 8(3) - lwz 5, 16(1) - stw 4, 4(3) - stw 5, 0(3) + stw 5, 24(3) + stw 6, 28(3) lwsync - addi 1, 1, 48 + lwz 29, 84(1) + lwz 28, 80(1) + lwz 27, 76(1) + lwz 26, 72(1) + lwz 25, 68(1) + lwz 24, 64(1) + lwz 23, 60(1) + lwz 22, 56(1) + lwz 21, 52(1) + addi 1, 1, 96 blr asm_test::atomic_memcpy_load_align2::read_volatile_acquire_fence: stwu 1, -48(1) diff --git a/tests/asm-test/asm/powerpc-unknown-linux-gnu/atomic_memcpy_load_align4 b/tests/asm-test/asm/powerpc-unknown-linux-gnu/atomic_memcpy_load_align4 index 56b73b1..8943ea1 100644 --- a/tests/asm-test/asm/powerpc-unknown-linux-gnu/atomic_memcpy_load_align4 +++ b/tests/asm-test/asm/powerpc-unknown-linux-gnu/atomic_memcpy_load_align4 @@ -1,21 +1,73 @@ asm_test::atomic_memcpy_load_align4::acquire: - lwz 5, 0(4) - lwz 6, 4(4) - lwz 7, 8(4) - lwz 8, 12(4) - lwz 9, 16(4) - lwz 10, 20(4) - lwz 11, 24(4) - lwz 4, 28(4) - stw 5, 0(3) - stw 6, 4(3) - stw 7, 8(3) - stw 8, 12(3) - stw 9, 16(3) - stw 10, 20(3) - stw 11, 24(3) - stw 4, 28(3) + stwu 1, -64(1) + stw 29, 52(1) + addi 5, 1, 16 + li 6, 4 + rlwimi 6, 5, 0, 30, 28 + addi 12, 4, 4 + #APP + lwz 29, 0(4) + stw 29, 0(5) + #NO_APP + addi 7, 1, 24 + addi 5, 4, 8 + #APP + lwz 12, 0(12) + stw 12, 0(6) + #NO_APP + addi 8, 1, 28 + addi 6, 4, 12 + #APP + lwz 5, 0(5) + stw 5, 0(7) + #NO_APP + addi 9, 1, 32 + addi 5, 4, 16 + #APP + lwz 6, 0(6) + stw 6, 0(8) + #NO_APP + addi 10, 1, 36 + addi 6, 4, 20 + #APP + lwz 5, 0(5) + stw 5, 0(9) + #NO_APP + addi 11, 1, 40 + #APP + lwz 6, 0(6) + stw 6, 0(10) + #NO_APP + addi 5, 1, 44 + addi 6, 4, 24 + addi 4, 4, 28 + #APP + lwz 6, 0(6) + stw 6, 0(11) + #NO_APP + #APP + lwz 4, 0(4) + stw 4, 0(5) + #NO_APP + lwz 4, 16(1) + lwz 5, 20(1) + lwz 6, 24(1) + lwz 7, 28(1) + lwz 8, 32(1) + stw 4, 0(3) + lwz 4, 36(1) + stw 5, 4(3) + lwz 5, 40(1) + stw 6, 8(3) + lwz 6, 44(1) + stw 7, 12(3) + stw 8, 16(3) + stw 4, 20(3) + stw 5, 24(3) + stw 6, 28(3) lwsync + lwz 29, 52(1) + addi 1, 1, 64 blr asm_test::atomic_memcpy_load_align4::read_volatile_acquire_fence: lwz 5, 28(4) diff --git a/tests/asm-test/asm/powerpc-unknown-linux-gnu/atomic_memcpy_load_align8 b/tests/asm-test/asm/powerpc-unknown-linux-gnu/atomic_memcpy_load_align8 index 03db782..78ef0a0 100644 --- a/tests/asm-test/asm/powerpc-unknown-linux-gnu/atomic_memcpy_load_align8 +++ b/tests/asm-test/asm/powerpc-unknown-linux-gnu/atomic_memcpy_load_align8 @@ -1,21 +1,73 @@ asm_test::atomic_memcpy_load_align8::acquire: - lwz 5, 0(4) - lwz 6, 4(4) - lwz 7, 8(4) - lwz 8, 12(4) - lwz 9, 16(4) - lwz 10, 20(4) - lwz 11, 24(4) - lwz 4, 28(4) - stw 5, 0(3) - stw 6, 4(3) - stw 7, 8(3) - stw 8, 12(3) - stw 9, 16(3) - stw 10, 20(3) - stw 11, 24(3) - stw 4, 28(3) + stwu 1, -64(1) + stw 29, 52(1) + addi 5, 1, 16 + li 6, 4 + rlwimi 6, 5, 0, 30, 28 + addi 12, 4, 4 + #APP + lwz 29, 0(4) + stw 29, 0(5) + #NO_APP + addi 7, 1, 24 + addi 5, 4, 8 + #APP + lwz 12, 0(12) + stw 12, 0(6) + #NO_APP + addi 8, 1, 28 + addi 6, 4, 12 + #APP + lwz 5, 0(5) + stw 5, 0(7) + #NO_APP + addi 9, 1, 32 + addi 5, 4, 16 + #APP + lwz 6, 0(6) + stw 6, 0(8) + #NO_APP + addi 10, 1, 36 + addi 6, 4, 20 + #APP + lwz 5, 0(5) + stw 5, 0(9) + #NO_APP + addi 11, 1, 40 + #APP + lwz 6, 0(6) + stw 6, 0(10) + #NO_APP + addi 5, 1, 44 + addi 6, 4, 24 + addi 4, 4, 28 + #APP + lwz 6, 0(6) + stw 6, 0(11) + #NO_APP + #APP + lwz 4, 0(4) + stw 4, 0(5) + #NO_APP + lwz 4, 16(1) + lwz 5, 20(1) + lwz 6, 24(1) + lwz 7, 28(1) + lwz 8, 32(1) + stw 4, 0(3) + lwz 4, 36(1) + stw 5, 4(3) + lwz 5, 40(1) + stw 6, 8(3) + lwz 6, 44(1) + stw 7, 12(3) + stw 8, 16(3) + stw 4, 20(3) + stw 5, 24(3) + stw 6, 28(3) lwsync + lwz 29, 52(1) + addi 1, 1, 64 blr asm_test::atomic_memcpy_load_align8::read_volatile_acquire_fence: lwz 5, 24(4) diff --git a/tests/asm-test/asm/powerpc-unknown-linux-gnu/atomic_memcpy_store_align1 b/tests/asm-test/asm/powerpc-unknown-linux-gnu/atomic_memcpy_store_align1 index 9c57baa..175aaab 100644 --- a/tests/asm-test/asm/powerpc-unknown-linux-gnu/atomic_memcpy_store_align1 +++ b/tests/asm-test/asm/powerpc-unknown-linux-gnu/atomic_memcpy_store_align1 @@ -1,167 +1,283 @@ asm_test::atomic_memcpy_store_align1::release: - stwu 1, -80(1) - lwz 5, 28(4) + stwu 1, -176(1) lwz 6, 24(4) - stw 5, 44(1) - lwz 5, 20(4) - stw 6, 40(1) - lwz 6, 16(4) - stw 5, 36(1) - lwz 5, 12(4) + addi 5, 1, 72 + stw 29, 164(1) + li 7, 1 + stw 6, 64(1) + addi 6, 3, 3 + stw 6, 36(1) + addi 6, 3, 4 + lwz 10, 28(4) + rlwimi 7, 5, 0, 0, 30 + lwz 8, 20(4) + lwz 9, 16(4) + lwz 29, 12(4) + lwz 11, 8(4) + lwz 12, 4(4) + lwz 0, 0(4) + addi 4, 3, 1 + stw 6, 28(1) + addi 6, 3, 5 stw 6, 32(1) - lwz 6, 8(4) - stw 5, 28(1) - lwz 5, 4(4) - lwz 4, 0(4) + addi 6, 3, 6 stw 6, 24(1) - stw 5, 20(1) - stw 4, 16(1) + addi 6, 3, 7 + stw 6, 20(1) + addi 6, 3, 8 + stw 14, 104(1) + stw 15, 108(1) + stw 16, 112(1) + stw 17, 116(1) + stw 18, 120(1) + stw 19, 124(1) + addi 19, 3, 18 + stw 20, 128(1) + addi 20, 3, 17 + stw 21, 132(1) + addi 21, 3, 16 + stw 22, 136(1) + addi 22, 3, 15 + stw 23, 140(1) + addi 23, 3, 14 + stw 24, 144(1) + addi 24, 3, 13 + stw 25, 148(1) + addi 25, 3, 12 + stw 26, 152(1) + addi 26, 3, 11 + stw 27, 156(1) + addi 27, 3, 10 + stw 28, 160(1) + addi 28, 3, 9 + stw 31, 172(1) + stw 10, 68(1) + addi 10, 3, 2 + stw 8, 60(1) + stw 9, 56(1) + stw 29, 52(1) + stw 11, 48(1) + stw 12, 44(1) + addi 12, 3, 19 + stw 0, 40(1) + stw 6, 16(1) lwsync - lwz 4, 44(1) - lwz 5, 40(1) - lwz 6, 36(1) - stw 4, 76(1) - lwz 4, 32(1) - stw 5, 72(1) - lwz 5, 28(1) - stw 6, 68(1) - lwz 6, 24(1) - stw 4, 64(1) - lwz 4, 20(1) - stw 5, 60(1) - addi 5, 3, 3 - stw 6, 56(1) - rlwinm 7, 5, 0, 0, 29 - lwz 6, 16(1) - stw 4, 52(1) - sub 4, 7, 3 - cmplwi 4, 33 - stw 6, 48(1) - bge 0, .LBB2_5 - cmplwi 4, 0 - addi 5, 1, 48 - beq 0, .LBB2_6 - addi 6, 3, 32 - addi 8, 1, 47 - addi 9, 3, -1 - mtctr 4 -.LBB2_3: - lbzu 10, 1(8) - addi 11, 9, 1 - stb 10, 1(9) - mr 9, 11 - bdnz .LBB2_3 - sub 6, 6, 7 - cmplwi 6, 4 - bge 0, .LBB2_7 - b .LBB2_11 -.LBB2_5: - lbz 4, 48(1) + lwz 0, 68(1) + lwz 18, 64(1) + lwz 17, 60(1) + lwz 16, 56(1) + lwz 15, 52(1) + lwz 14, 48(1) + lwz 31, 44(1) + lwz 6, 40(1) + stw 0, 100(1) + stw 18, 96(1) + addi 18, 3, 20 + stw 17, 92(1) + addi 17, 3, 21 + stw 16, 88(1) + addi 16, 3, 22 + stw 15, 84(1) + addi 15, 3, 23 + stw 14, 80(1) + addi 14, 3, 24 + stw 31, 76(1) + addi 31, 3, 25 + stw 6, 72(1) + mr 6, 3 + #APP + lbz 8, 0(5) + stb 8, 0(6) + #NO_APP + #APP + lbz 7, 0(7) + stb 7, 0(4) + #NO_APP + addi 6, 3, 26 + li 7, 2 + rlwimi 7, 5, 0, 31, 29 + #APP + lbz 7, 0(7) + stb 7, 0(10) + #NO_APP + addi 8, 3, 27 + li 7, 3 + rlwimi 7, 5, 0, 0, 29 + lwz 9, 36(1) + #APP + lbz 7, 0(7) + stb 7, 0(9) + #NO_APP + addi 4, 3, 28 + li 7, 4 + rlwimi 7, 5, 0, 30, 28 + lwz 10, 28(1) + #APP + lbz 7, 0(7) + stb 7, 0(10) + #NO_APP + addi 9, 3, 29 + addi 7, 1, 77 + lwz 11, 32(1) + #APP + lbz 7, 0(7) + stb 7, 0(11) + #NO_APP + addi 10, 3, 30 + li 7, 6 + li 11, 7 + lwz 29, 24(1) + rlwimi 7, 5, 0, 31, 28 + rlwimi 11, 5, 0, 0, 28 + #APP + lbz 5, 0(7) + stb 5, 0(29) + #NO_APP + addi 3, 3, 31 + lwz 29, 20(1) + #APP + lbz 7, 0(11) + stb 7, 0(29) + #NO_APP + addi 5, 1, 80 + lwz 11, 16(1) + addi 7, 1, 81 + #APP + lbz 5, 0(5) + stb 5, 0(11) + #NO_APP + addi 5, 1, 82 + #APP + lbz 7, 0(7) + stb 7, 0(28) + #NO_APP + addi 7, 1, 83 + #APP + lbz 5, 0(5) + stb 5, 0(27) + #NO_APP + addi 5, 1, 84 + #APP + lbz 7, 0(7) + stb 7, 0(26) + #NO_APP + addi 7, 1, 85 + #APP + lbz 5, 0(5) + stb 5, 0(25) + #NO_APP + addi 5, 1, 86 + #APP + lbz 7, 0(7) + stb 7, 0(24) + #NO_APP + addi 7, 1, 87 + #APP + lbz 5, 0(5) + stb 5, 0(23) + #NO_APP + addi 5, 1, 88 + #APP + lbz 7, 0(7) + stb 7, 0(22) + #NO_APP + addi 7, 1, 89 + #APP + lbz 5, 0(5) + stb 5, 0(21) + #NO_APP + addi 5, 1, 90 + #APP + lbz 7, 0(7) + stb 7, 0(20) + #NO_APP + addi 7, 1, 91 + #APP + lbz 5, 0(5) + stb 5, 0(19) + #NO_APP + addi 5, 1, 92 + #APP + lbz 7, 0(7) + stb 7, 0(12) + #NO_APP + addi 7, 1, 93 + #APP + lbz 5, 0(5) + stb 5, 0(18) + #NO_APP + addi 5, 1, 94 + #APP + lbz 7, 0(7) + stb 7, 0(17) + #NO_APP + addi 7, 1, 95 + #APP + lbz 5, 0(5) + stb 5, 0(16) + #NO_APP + addi 5, 1, 96 + #APP + lbz 7, 0(7) + stb 7, 0(15) + #NO_APP + addi 7, 1, 97 + #APP + lbz 5, 0(5) + stb 5, 0(14) + #NO_APP + addi 5, 1, 98 + #APP + lbz 7, 0(7) + stb 7, 0(31) + #NO_APP + #APP + lbz 5, 0(5) + stb 5, 0(6) + #NO_APP + addi 7, 1, 99 + addi 5, 1, 100 + #APP + lbz 6, 0(7) + stb 6, 0(8) + #NO_APP + #APP + lbz 5, 0(5) + stb 5, 0(4) + #NO_APP + addi 6, 1, 101 + addi 4, 1, 102 + #APP + lbz 5, 0(6) + stb 5, 0(9) + #NO_APP + #APP + lbz 4, 0(4) + stb 4, 0(10) + #NO_APP + addi 4, 1, 103 + #APP + lbz 4, 0(4) stb 4, 0(3) - lbz 4, 49(1) - stb 4, 1(3) - lbz 4, 50(1) - stb 4, 2(3) - lbz 4, 51(1) - stb 4, 3(3) - lbz 4, 52(1) - stb 4, 4(3) - lbz 4, 53(1) - stb 4, 5(3) - lbz 4, 54(1) - stb 4, 6(3) - lbz 4, 55(1) - stb 4, 7(3) - lbz 4, 56(1) - stb 4, 8(3) - lbz 4, 57(1) - stb 4, 9(3) - lbz 4, 58(1) - stb 4, 10(3) - lbz 4, 59(1) - stb 4, 11(3) - lbz 4, 60(1) - stb 4, 12(3) - lbz 4, 61(1) - stb 4, 13(3) - lbz 4, 62(1) - stb 4, 14(3) - lbz 4, 63(1) - stb 4, 15(3) - lbz 4, 64(1) - stb 4, 16(3) - lbz 4, 65(1) - stb 4, 17(3) - lbz 4, 66(1) - stb 4, 18(3) - lbz 4, 67(1) - stb 4, 19(3) - lbz 4, 68(1) - stb 4, 20(3) - lbz 4, 69(1) - stb 4, 21(3) - lbz 4, 70(1) - stb 4, 22(3) - lbz 4, 71(1) - stb 4, 23(3) - lbz 4, 72(1) - stb 4, 24(3) - lbz 4, 73(1) - stb 4, 25(3) - lbz 4, 74(1) - stb 4, 26(3) - lbz 4, 75(1) - stb 4, 27(3) - lbz 4, 76(1) - stb 4, 28(3) - lbz 4, 77(1) - stb 4, 29(3) - lbz 4, 78(1) - stb 4, 30(3) - lbz 4, 79(1) - stb 4, 31(3) - b .LBB2_14 -.LBB2_6: - li 6, 32 -.LBB2_7: - addi 9, 6, -4 - li 10, 3 - cmplwi 9, 3 - bc 12, 0, .LBB2_9 - ori 9, 10, 0 - b .LBB2_9 -.LBB2_9: - sub 7, 7, 3 - not 9, 9 - add 9, 9, 6 - addi 8, 7, -4 - srwi 9, 9, 2 - add 7, 5, 8 - add 8, 3, 8 - addi 9, 9, 1 - mtctr 9 -.LBB2_10: - lwzu 9, 4(7) - addi 6, 6, -4 - addi 4, 4, 4 - stw 9, 4(8) - addi 8, 8, 4 - bdnz .LBB2_10 -.LBB2_11: - cmplwi 6, 0 - beq 0, .LBB2_14 - addi 7, 4, -1 - add 4, 5, 7 - add 3, 3, 7 - mtctr 6 -.LBB2_13: - lbzu 5, 1(4) - addi 6, 3, 1 - stb 5, 1(3) - mr 3, 6 - bdnz .LBB2_13 -.LBB2_14: - addi 1, 1, 80 + #NO_APP + lwz 31, 172(1) + lwz 29, 164(1) + lwz 28, 160(1) + lwz 27, 156(1) + lwz 26, 152(1) + lwz 25, 148(1) + lwz 24, 144(1) + lwz 23, 140(1) + lwz 22, 136(1) + lwz 21, 132(1) + lwz 20, 128(1) + lwz 19, 124(1) + lwz 18, 120(1) + lwz 17, 116(1) + lwz 16, 112(1) + lwz 15, 108(1) + lwz 14, 104(1) + addi 1, 1, 176 blr asm_test::atomic_memcpy_store_align1::write_volatile_release_fence: stwu 1, -48(1) diff --git a/tests/asm-test/asm/powerpc-unknown-linux-gnu/atomic_memcpy_store_align16 b/tests/asm-test/asm/powerpc-unknown-linux-gnu/atomic_memcpy_store_align16 index 03bd98d..f08ac4d 100644 --- a/tests/asm-test/asm/powerpc-unknown-linux-gnu/atomic_memcpy_store_align16 +++ b/tests/asm-test/asm/powerpc-unknown-linux-gnu/atomic_memcpy_store_align16 @@ -1,21 +1,95 @@ asm_test::atomic_memcpy_store_align16::release: - lwz 5, 0(4) - lwz 6, 4(4) - lwz 7, 8(4) - lwz 8, 12(4) + stwu 1, -112(1) + lwz 6, 28(4) + li 5, 12 + lwz 7, 24(4) + lwz 8, 20(4) lwz 9, 16(4) - lwz 10, 20(4) - lwz 11, 24(4) - lwz 4, 28(4) + lwz 10, 12(4) + lwz 11, 8(4) + lwz 12, 4(4) + lwz 4, 0(4) + stw 27, 92(1) + stw 28, 96(1) + addi 28, 3, 28 + stw 29, 100(1) + li 29, 8 + stw 6, 44(1) + stw 7, 40(1) + stw 8, 36(1) + stw 9, 32(1) + stw 10, 28(1) + stw 11, 24(1) + stw 12, 20(1) + addi 12, 1, 48 + stw 4, 16(1) + rlwimi 29, 12, 0, 29, 27 lwsync - stw 5, 0(3) - stw 6, 4(3) - stw 7, 8(3) - stw 8, 12(3) - stw 9, 16(3) - stw 10, 20(3) - stw 11, 24(3) - stw 4, 28(3) + rlwimi 5, 12, 0, 30, 27 + lwz 4, 44(1) + lwz 6, 40(1) + lwz 7, 36(1) + lwz 8, 32(1) + lwz 9, 28(1) + lwz 10, 24(1) + lwz 11, 20(1) + lwz 0, 16(1) + stw 4, 76(1) + li 4, 4 + stw 6, 72(1) + addi 6, 3, 4 + stw 7, 68(1) + addi 7, 3, 8 + stw 8, 64(1) + addi 8, 3, 12 + stw 9, 60(1) + addi 9, 3, 16 + stw 10, 56(1) + addi 10, 3, 20 + stw 11, 52(1) + addi 11, 3, 24 + stw 0, 48(1) + #APP + lwz 27, 0(12) + stw 27, 0(3) + #NO_APP + rlwimi 4, 12, 0, 30, 28 + #APP + lwz 3, 0(4) + stw 3, 0(6) + #NO_APP + #APP + lwz 3, 0(29) + stw 3, 0(7) + #NO_APP + addi 3, 1, 64 + #APP + lwz 4, 0(5) + stw 4, 0(8) + #NO_APP + addi 4, 1, 68 + #APP + lwz 3, 0(3) + stw 3, 0(9) + #NO_APP + addi 3, 1, 72 + #APP + lwz 4, 0(4) + stw 4, 0(10) + #NO_APP + #APP + lwz 3, 0(3) + stw 3, 0(11) + #NO_APP + addi 4, 1, 76 + #APP + lwz 3, 0(4) + stw 3, 0(28) + #NO_APP + lwz 29, 100(1) + lwz 28, 96(1) + lwz 27, 92(1) + addi 1, 1, 112 blr asm_test::atomic_memcpy_store_align16::write_volatile_release_fence: stwu 1, -48(1) diff --git a/tests/asm-test/asm/powerpc-unknown-linux-gnu/atomic_memcpy_store_align2 b/tests/asm-test/asm/powerpc-unknown-linux-gnu/atomic_memcpy_store_align2 index d58a88b..b24c030 100644 --- a/tests/asm-test/asm/powerpc-unknown-linux-gnu/atomic_memcpy_store_align2 +++ b/tests/asm-test/asm/powerpc-unknown-linux-gnu/atomic_memcpy_store_align2 @@ -1,135 +1,154 @@ asm_test::atomic_memcpy_store_align2::release: - stwu 1, -80(1) - lwz 5, 28(4) - lwz 6, 24(4) - stw 5, 44(1) - lwz 5, 20(4) - stw 6, 40(1) - lwz 6, 16(4) - stw 5, 36(1) - lwz 5, 12(4) - stw 6, 32(1) - lwz 6, 8(4) - stw 5, 28(1) - lwz 5, 4(4) + stwu 1, -112(1) + lwz 6, 28(4) + addi 5, 1, 40 + lwz 7, 24(4) + lwz 8, 20(4) + lwz 9, 16(4) + lwz 10, 12(4) + lwz 11, 8(4) + lwz 12, 4(4) lwz 4, 0(4) - stw 6, 24(1) - stw 5, 20(1) - stw 4, 16(1) + stw 22, 72(1) + mr 22, 3 + stw 23, 76(1) + addi 23, 3, 24 + stw 24, 80(1) + addi 24, 3, 12 + stw 25, 84(1) + stw 26, 88(1) + addi 26, 3, 10 + stw 27, 92(1) + stw 28, 96(1) + addi 28, 3, 8 + stw 29, 100(1) + addi 29, 3, 6 + stw 6, 36(1) + stw 7, 32(1) + li 7, 2 + stw 8, 28(1) + rlwimi 7, 5, 0, 31, 29 + stw 9, 24(1) + addi 9, 3, 2 + stw 10, 20(1) + stw 11, 16(1) + addi 11, 3, 4 + stw 12, 12(1) + stw 4, 8(1) lwsync - lwz 4, 44(1) - lwz 5, 40(1) - lwz 6, 36(1) - stw 4, 76(1) - lwz 4, 32(1) - stw 5, 72(1) - lwz 5, 28(1) - stw 6, 68(1) - lwz 6, 24(1) - stw 4, 64(1) - lwz 4, 20(1) - stw 5, 60(1) - addi 5, 3, 3 - stw 6, 56(1) - rlwinm 7, 5, 0, 0, 29 - lwz 6, 16(1) - stw 4, 52(1) - sub 4, 7, 3 - cmplwi 4, 32 - stw 6, 48(1) - bgt 0, .LBB6_5 - cmplwi 4, 0 + lwz 4, 36(1) + lwz 6, 32(1) + lwz 8, 28(1) + lwz 10, 24(1) + lwz 12, 20(1) + lwz 0, 16(1) + lwz 27, 12(1) + lwz 25, 8(1) + stw 4, 68(1) + addi 4, 3, 14 + stw 6, 64(1) + addi 6, 3, 16 + stw 8, 60(1) + addi 8, 3, 18 + stw 10, 56(1) + addi 10, 3, 20 + stw 12, 52(1) + addi 12, 3, 22 + stw 0, 48(1) + stw 27, 44(1) + addi 27, 3, 26 + stw 25, 40(1) + #APP + lhz 25, 0(5) + sth 25, 0(22) + #NO_APP + #APP + lhz 7, 0(7) + sth 7, 0(9) + #NO_APP + addi 25, 3, 28 + li 7, 4 + li 9, 6 + rlwimi 7, 5, 0, 30, 28 + rlwimi 9, 5, 0, 31, 28 + #APP + lhz 5, 0(7) + sth 5, 0(11) + #NO_APP + addi 3, 3, 30 addi 5, 1, 48 - beq 0, .LBB6_6 - addi 6, 3, 32 - addi 8, 1, 47 - addi 9, 3, -1 - mtctr 4 -.LBB6_3: - lbzu 10, 1(8) - addi 11, 9, 1 - stb 10, 1(9) - mr 9, 11 - bdnz .LBB6_3 - sub 6, 6, 7 - cmplwi 6, 4 - bge 0, .LBB6_7 - b .LBB6_11 -.LBB6_5: - lhz 4, 48(1) + #APP + lhz 7, 0(9) + sth 7, 0(29) + #NO_APP + addi 7, 1, 50 + #APP + lhz 5, 0(5) + sth 5, 0(28) + #NO_APP + addi 5, 1, 52 + #APP + lhz 7, 0(7) + sth 7, 0(26) + #NO_APP + addi 7, 1, 54 + #APP + lhz 5, 0(5) + sth 5, 0(24) + #NO_APP + addi 5, 1, 56 + #APP + lhz 7, 0(7) + sth 7, 0(4) + #NO_APP + addi 4, 1, 58 + #APP + lhz 5, 0(5) + sth 5, 0(6) + #NO_APP + addi 5, 1, 60 + #APP + lhz 4, 0(4) + sth 4, 0(8) + #NO_APP + addi 4, 1, 62 + #APP + lhz 5, 0(5) + sth 5, 0(10) + #NO_APP + addi 5, 1, 64 + #APP + lhz 4, 0(4) + sth 4, 0(12) + #NO_APP + addi 4, 1, 66 + #APP + lhz 5, 0(5) + sth 5, 0(23) + #NO_APP + #APP + lhz 4, 0(4) + sth 4, 0(27) + #NO_APP + addi 5, 1, 68 + #APP + lhz 4, 0(5) + sth 4, 0(25) + #NO_APP + addi 4, 1, 70 + #APP + lhz 4, 0(4) sth 4, 0(3) - lhz 4, 50(1) - sth 4, 2(3) - lhz 4, 52(1) - sth 4, 4(3) - lhz 4, 54(1) - sth 4, 6(3) - lhz 4, 56(1) - sth 4, 8(3) - lhz 4, 58(1) - sth 4, 10(3) - lhz 4, 60(1) - sth 4, 12(3) - lhz 4, 62(1) - sth 4, 14(3) - lhz 4, 64(1) - sth 4, 16(3) - lhz 4, 66(1) - sth 4, 18(3) - lhz 4, 68(1) - sth 4, 20(3) - lhz 4, 70(1) - sth 4, 22(3) - lhz 4, 72(1) - sth 4, 24(3) - lhz 4, 74(1) - sth 4, 26(3) - lhz 4, 76(1) - sth 4, 28(3) - lhz 4, 78(1) - sth 4, 30(3) - b .LBB6_14 -.LBB6_6: - li 6, 32 -.LBB6_7: - addi 9, 6, -4 - li 10, 3 - cmplwi 9, 3 - bc 12, 0, .LBB6_9 - ori 9, 10, 0 - b .LBB6_9 -.LBB6_9: - sub 7, 7, 3 - not 9, 9 - add 9, 9, 6 - addi 8, 7, -4 - srwi 9, 9, 2 - add 7, 5, 8 - add 8, 3, 8 - addi 9, 9, 1 - mtctr 9 -.LBB6_10: - lwzu 9, 4(7) - addi 6, 6, -4 - addi 4, 4, 4 - stw 9, 4(8) - addi 8, 8, 4 - bdnz .LBB6_10 -.LBB6_11: - cmplwi 6, 0 - beq 0, .LBB6_14 - addi 7, 4, -1 - add 4, 5, 7 - add 3, 3, 7 - mtctr 6 -.LBB6_13: - lbzu 5, 1(4) - addi 6, 3, 1 - stb 5, 1(3) - mr 3, 6 - bdnz .LBB6_13 -.LBB6_14: - addi 1, 1, 80 + #NO_APP + lwz 29, 100(1) + lwz 28, 96(1) + lwz 27, 92(1) + lwz 26, 88(1) + lwz 25, 84(1) + lwz 24, 80(1) + lwz 23, 76(1) + lwz 22, 72(1) + addi 1, 1, 112 blr asm_test::atomic_memcpy_store_align2::write_volatile_release_fence: stwu 1, -48(1) diff --git a/tests/asm-test/asm/powerpc-unknown-linux-gnu/atomic_memcpy_store_align4 b/tests/asm-test/asm/powerpc-unknown-linux-gnu/atomic_memcpy_store_align4 index 285fdcb..0757d57 100644 --- a/tests/asm-test/asm/powerpc-unknown-linux-gnu/atomic_memcpy_store_align4 +++ b/tests/asm-test/asm/powerpc-unknown-linux-gnu/atomic_memcpy_store_align4 @@ -1,21 +1,87 @@ asm_test::atomic_memcpy_store_align4::release: - lwz 5, 0(4) - lwz 6, 4(4) - lwz 7, 8(4) - lwz 8, 12(4) + stwu 1, -80(1) + lwz 6, 28(4) + li 5, 4 + lwz 7, 24(4) + lwz 8, 20(4) lwz 9, 16(4) - lwz 10, 20(4) - lwz 11, 24(4) - lwz 4, 28(4) + lwz 10, 12(4) + lwz 11, 8(4) + lwz 12, 4(4) + lwz 4, 0(4) + stw 6, 44(1) + stw 7, 40(1) + stw 8, 36(1) + stw 9, 32(1) + stw 10, 28(1) + stw 11, 24(1) + stw 12, 20(1) + stw 4, 16(1) lwsync - stw 5, 0(3) - stw 6, 4(3) - stw 7, 8(3) - stw 8, 12(3) - stw 9, 16(3) - stw 10, 20(3) - stw 11, 24(3) - stw 4, 28(3) + lwz 4, 44(1) + lwz 6, 40(1) + lwz 7, 36(1) + lwz 8, 32(1) + lwz 9, 28(1) + lwz 10, 24(1) + lwz 11, 20(1) + lwz 12, 16(1) + stw 4, 76(1) + addi 4, 1, 48 + stw 6, 72(1) + addi 6, 3, 4 + stw 7, 68(1) + addi 7, 3, 8 + stw 8, 64(1) + addi 8, 3, 12 + stw 9, 60(1) + addi 9, 3, 16 + stw 10, 56(1) + addi 10, 3, 20 + stw 11, 52(1) + addi 11, 3, 24 + stw 12, 48(1) + addi 12, 3, 28 + rlwimi 5, 4, 0, 30, 28 + #APP + lwz 4, 0(4) + stw 4, 0(3) + #NO_APP + addi 3, 1, 56 + #APP + lwz 4, 0(5) + stw 4, 0(6) + #NO_APP + addi 4, 1, 60 + #APP + lwz 3, 0(3) + stw 3, 0(7) + #NO_APP + addi 3, 1, 64 + #APP + lwz 4, 0(4) + stw 4, 0(8) + #NO_APP + addi 4, 1, 68 + #APP + lwz 3, 0(3) + stw 3, 0(9) + #NO_APP + addi 3, 1, 72 + #APP + lwz 4, 0(4) + stw 4, 0(10) + #NO_APP + #APP + lwz 3, 0(3) + stw 3, 0(11) + #NO_APP + addi 4, 1, 76 + #APP + lwz 3, 0(4) + stw 3, 0(12) + #NO_APP + addi 1, 1, 80 blr asm_test::atomic_memcpy_store_align4::write_volatile_release_fence: stwu 1, -48(1) diff --git a/tests/asm-test/asm/powerpc-unknown-linux-gnu/atomic_memcpy_store_align8 b/tests/asm-test/asm/powerpc-unknown-linux-gnu/atomic_memcpy_store_align8 index fa96d3d..9ce44dc 100644 --- a/tests/asm-test/asm/powerpc-unknown-linux-gnu/atomic_memcpy_store_align8 +++ b/tests/asm-test/asm/powerpc-unknown-linux-gnu/atomic_memcpy_store_align8 @@ -1,21 +1,87 @@ asm_test::atomic_memcpy_store_align8::release: - lwz 5, 0(4) - lwz 6, 4(4) - lwz 7, 8(4) - lwz 8, 12(4) + stwu 1, -80(1) + lwz 6, 28(4) + li 5, 4 + lwz 7, 24(4) + lwz 8, 20(4) lwz 9, 16(4) - lwz 10, 20(4) - lwz 11, 24(4) - lwz 4, 28(4) + lwz 10, 12(4) + lwz 11, 8(4) + lwz 12, 4(4) + lwz 4, 0(4) + stw 6, 44(1) + stw 7, 40(1) + stw 8, 36(1) + stw 9, 32(1) + stw 10, 28(1) + stw 11, 24(1) + stw 12, 20(1) + stw 4, 16(1) lwsync - stw 5, 0(3) - stw 6, 4(3) - stw 7, 8(3) - stw 8, 12(3) - stw 9, 16(3) - stw 10, 20(3) - stw 11, 24(3) - stw 4, 28(3) + lwz 4, 44(1) + lwz 6, 40(1) + lwz 7, 36(1) + lwz 8, 32(1) + lwz 9, 28(1) + lwz 10, 24(1) + lwz 11, 20(1) + lwz 12, 16(1) + stw 4, 76(1) + addi 4, 1, 48 + stw 6, 72(1) + addi 6, 3, 4 + stw 7, 68(1) + addi 7, 3, 8 + stw 8, 64(1) + addi 8, 3, 12 + stw 9, 60(1) + addi 9, 3, 16 + stw 10, 56(1) + addi 10, 3, 20 + stw 11, 52(1) + addi 11, 3, 24 + stw 12, 48(1) + addi 12, 3, 28 + rlwimi 5, 4, 0, 30, 28 + #APP + lwz 4, 0(4) + stw 4, 0(3) + #NO_APP + addi 3, 1, 56 + #APP + lwz 4, 0(5) + stw 4, 0(6) + #NO_APP + addi 4, 1, 60 + #APP + lwz 3, 0(3) + stw 3, 0(7) + #NO_APP + addi 3, 1, 64 + #APP + lwz 4, 0(4) + stw 4, 0(8) + #NO_APP + addi 4, 1, 68 + #APP + lwz 3, 0(3) + stw 3, 0(9) + #NO_APP + addi 3, 1, 72 + #APP + lwz 4, 0(4) + stw 4, 0(10) + #NO_APP + #APP + lwz 3, 0(3) + stw 3, 0(11) + #NO_APP + addi 4, 1, 76 + #APP + lwz 3, 0(4) + stw 3, 0(12) + #NO_APP + addi 1, 1, 80 blr asm_test::atomic_memcpy_store_align8::write_volatile_release_fence: stwu 1, -48(1) diff --git a/tests/asm-test/asm/powerpc64-unknown-linux-gnu/atomic_memcpy_load_align1 b/tests/asm-test/asm/powerpc64-unknown-linux-gnu/atomic_memcpy_load_align1 index 8a54514..5574a66 100644 --- a/tests/asm-test/asm/powerpc64-unknown-linux-gnu/atomic_memcpy_load_align1 +++ b/tests/asm-test/asm/powerpc64-unknown-linux-gnu/atomic_memcpy_load_align1 @@ -1,231 +1,521 @@ asm_test::atomic_memcpy_load_align1::acquire: .Lfunc_begin0: - addi 5, 4, 7 - rldicr 8, 5, 0, 60 - sub 5, 8, 4 - cmpldi 5, 65 - bge 0, .LBB0_5 - cmpldi 5, 0 - addi 6, 1, -64 - beq 0, .LBB0_6 - addi 7, 4, 64 - addi 9, 4, -1 - addi 10, 1, -65 - mtctr 5 -.LBB0_3: - lbz 11, 1(9) - addi 9, 9, 1 - stbu 11, 1(10) - bdnz .LBB0_3 - sub 7, 7, 8 - cmpldi 7, 8 - bge 0, .LBB0_7 - b .LBB0_11 -.LBB0_5: - lbz 5, 0(4) - stb 5, -64(1) - lbz 5, 1(4) - stb 5, -63(1) - lbz 5, 2(4) - stb 5, -62(1) - lbz 5, 3(4) - stb 5, -61(1) - lbz 5, 4(4) - stb 5, -60(1) - lbz 5, 5(4) - stb 5, -59(1) - lbz 5, 6(4) - stb 5, -58(1) - lbz 5, 7(4) - stb 5, -57(1) - lbz 5, 8(4) - stb 5, -56(1) - lbz 5, 9(4) - stb 5, -55(1) - lbz 5, 10(4) - stb 5, -54(1) - lbz 5, 11(4) - stb 5, -53(1) - lbz 5, 12(4) - stb 5, -52(1) - lbz 5, 13(4) - stb 5, -51(1) - lbz 5, 14(4) - stb 5, -50(1) - lbz 5, 15(4) - stb 5, -49(1) - lbz 5, 16(4) - stb 5, -48(1) - lbz 5, 17(4) - stb 5, -47(1) - lbz 5, 18(4) - stb 5, -46(1) - lbz 5, 19(4) - stb 5, -45(1) - lbz 5, 20(4) - stb 5, -44(1) - lbz 5, 21(4) - stb 5, -43(1) - lbz 5, 22(4) - stb 5, -42(1) - lbz 5, 23(4) - stb 5, -41(1) - lbz 5, 24(4) - stb 5, -40(1) - lbz 5, 25(4) - stb 5, -39(1) - lbz 5, 26(4) - stb 5, -38(1) - lbz 5, 27(4) - stb 5, -37(1) - lbz 5, 28(4) - stb 5, -36(1) - lbz 5, 29(4) - stb 5, -35(1) - lbz 5, 30(4) - stb 5, -34(1) - lbz 5, 31(4) - stb 5, -33(1) - lbz 5, 32(4) - stb 5, -32(1) - lbz 5, 33(4) - stb 5, -31(1) - lbz 5, 34(4) - stb 5, -30(1) - lbz 5, 35(4) - stb 5, -29(1) - lbz 5, 36(4) - stb 5, -28(1) - lbz 5, 37(4) - stb 5, -27(1) - lbz 5, 38(4) - stb 5, -26(1) - lbz 5, 39(4) - stb 5, -25(1) - lbz 5, 40(4) - stb 5, -24(1) - lbz 5, 41(4) - stb 5, -23(1) - lbz 5, 42(4) - stb 5, -22(1) - lbz 5, 43(4) - stb 5, -21(1) - lbz 5, 44(4) - stb 5, -20(1) - lbz 5, 45(4) - stb 5, -19(1) - lbz 5, 46(4) - stb 5, -18(1) - lbz 5, 47(4) - stb 5, -17(1) - lbz 5, 48(4) - stb 5, -16(1) - lbz 5, 49(4) - stb 5, -15(1) - lbz 5, 50(4) - stb 5, -14(1) - lbz 5, 51(4) - stb 5, -13(1) - lbz 5, 52(4) - stb 5, -12(1) - lbz 5, 53(4) - stb 5, -11(1) - lbz 5, 54(4) - stb 5, -10(1) - lbz 5, 55(4) - stb 5, -9(1) - lbz 5, 56(4) - stb 5, -8(1) - lbz 5, 57(4) - stb 5, -7(1) - lbz 5, 58(4) - stb 5, -6(1) - lbz 5, 59(4) - stb 5, -5(1) - lbz 5, 60(4) - stb 5, -4(1) - lbz 5, 61(4) - stb 5, -3(1) - lbz 5, 62(4) - stb 5, -2(1) - lbz 4, 63(4) - ld 5, -64(1) - stb 4, -1(1) - ld 4, -56(1) - std 5, 0(3) - ld 5, -48(1) - std 4, 8(3) - ld 4, -40(1) - std 5, 16(3) - ld 5, -32(1) - std 4, 24(3) - ld 4, -24(1) - std 5, 32(3) - ld 5, -16(1) - std 4, 40(3) - ld 4, -8(1) - std 5, 48(3) - std 4, 56(3) - lwsync - blr -.LBB0_6: - li 7, 64 -.LBB0_7: - addi 9, 7, -8 - li 10, 7 - cmpldi 9, 7 - sub 8, 8, 4 - bc 12, 0, .LBB0_9 - ori 9, 10, 0 - b .LBB0_9 -.LBB0_9: - not 9, 9 - add 9, 9, 7 - addi 10, 8, -8 - rldicl 9, 9, 61, 3 - add 8, 4, 10 - addi 11, 9, 1 - add 9, 6, 10 - mtctr 11 -.LBB0_10: - ld 10, 8(8) - addi 11, 9, 8 - addi 8, 8, 8 - addi 7, 7, -8 - addi 5, 5, 8 - std 10, 8(9) - mr 9, 11 - bdnz .LBB0_10 -.LBB0_11: - cmpldi 7, 0 - beq 0, .LBB0_14 - addi 5, 5, -1 - add 4, 4, 5 - add 5, 6, 5 - mtctr 7 -.LBB0_13: - lbz 6, 1(4) - addi 4, 4, 1 - stbu 6, 1(5) - bdnz .LBB0_13 -.LBB0_14: - ld 4, -8(1) - std 4, 56(3) - ld 4, -16(1) - std 4, 48(3) - ld 4, -24(1) - std 4, 40(3) - ld 4, -32(1) - std 4, 32(3) - ld 4, -40(1) - std 4, 24(3) - ld 4, -48(1) - std 4, 16(3) - ld 4, -56(1) - std 4, 8(3) - ld 4, -64(1) + stdu 1, -576(1) + addi 5, 1, 369 + std 14, 432(1) + addi 14, 1, 418 + addi 6, 1, 421 + addi 7, 1, 422 + std 5, 360(1) + addi 5, 1, 370 + addi 8, 1, 423 + std 5, 352(1) + addi 5, 1, 371 + addi 9, 1, 424 + addi 10, 1, 425 + addi 11, 1, 426 + std 5, 344(1) + addi 5, 1, 372 + addi 12, 1, 427 + std 5, 336(1) + addi 5, 1, 373 + std 5, 328(1) + addi 5, 1, 374 + std 5, 320(1) + addi 5, 1, 375 + std 5, 312(1) + addi 5, 1, 376 + std 5, 304(1) + addi 5, 1, 377 + std 5, 296(1) + addi 5, 1, 378 + std 5, 288(1) + addi 5, 1, 379 + std 5, 280(1) + addi 5, 1, 380 + std 5, 272(1) + addi 5, 1, 381 + std 5, 264(1) + addi 5, 1, 382 + std 5, 256(1) + addi 5, 1, 383 + std 5, 248(1) + addi 5, 1, 384 + std 5, 240(1) + addi 5, 1, 385 + std 5, 232(1) + addi 5, 1, 386 + std 5, 224(1) + addi 5, 1, 387 + std 5, 216(1) + addi 5, 1, 388 + std 5, 208(1) + addi 5, 1, 389 + std 5, 200(1) + addi 5, 1, 390 + std 5, 192(1) + addi 5, 1, 391 + std 5, 184(1) + addi 5, 1, 392 + std 5, 176(1) + addi 5, 1, 393 + std 5, 168(1) + addi 5, 1, 394 + std 5, 160(1) + addi 5, 1, 395 + std 5, 152(1) + addi 5, 1, 396 + std 5, 144(1) + addi 5, 1, 397 + std 5, 136(1) + addi 5, 1, 398 + std 5, 128(1) + addi 5, 1, 399 + std 5, 120(1) + addi 5, 1, 400 + std 5, 112(1) + addi 5, 1, 401 + std 5, 104(1) + addi 5, 1, 402 + std 5, 96(1) + addi 5, 1, 403 + std 5, 88(1) + addi 5, 1, 404 + std 5, 80(1) + addi 5, 1, 405 + std 5, 72(1) + addi 5, 1, 406 + std 5, 64(1) + addi 5, 1, 407 + std 15, 440(1) + addi 15, 1, 417 + std 16, 448(1) + addi 16, 1, 416 + std 17, 456(1) + addi 17, 1, 415 + std 18, 464(1) + addi 18, 1, 414 + std 19, 472(1) + addi 19, 1, 413 + std 20, 480(1) + addi 20, 1, 412 + std 21, 488(1) + addi 21, 1, 411 + std 22, 496(1) + addi 22, 1, 410 + std 23, 504(1) + addi 23, 1, 409 + std 24, 512(1) + addi 24, 1, 408 + std 25, 520(1) + std 26, 528(1) + addi 26, 1, 368 + std 27, 536(1) + std 28, 544(1) + addi 28, 1, 430 + std 29, 552(1) + addi 29, 1, 429 + std 30, 560(1) + addi 30, 1, 428 + std 31, 568(1) + addi 31, 1, 419 + std 5, 56(1) + addi 5, 1, 420 + #APP + lbz 27, 0(4) + stb 27, 0(26) + #NO_APP + addi 27, 4, 1 + addi 26, 4, 2 + ld 25, 360(1) + #APP + lbz 27, 0(27) + stb 27, 0(25) + #NO_APP + ld 25, 352(1) + #APP + lbz 27, 0(26) + stb 27, 0(25) + #NO_APP + addi 26, 4, 4 + addi 27, 4, 3 + ld 25, 344(1) + #APP + lbz 27, 0(27) + stb 27, 0(25) + #NO_APP + ld 25, 336(1) + #APP + lbz 27, 0(26) + stb 27, 0(25) + #NO_APP + addi 26, 4, 6 + addi 27, 4, 5 + ld 25, 328(1) + #APP + lbz 27, 0(27) + stb 27, 0(25) + #NO_APP + ld 25, 320(1) + #APP + lbz 27, 0(26) + stb 27, 0(25) + #NO_APP + addi 26, 4, 8 + addi 27, 4, 7 + ld 25, 312(1) + #APP + lbz 27, 0(27) + stb 27, 0(25) + #NO_APP + ld 25, 304(1) + #APP + lbz 27, 0(26) + stb 27, 0(25) + #NO_APP + addi 26, 4, 10 + addi 27, 4, 9 + ld 25, 296(1) + #APP + lbz 27, 0(27) + stb 27, 0(25) + #NO_APP + ld 25, 288(1) + #APP + lbz 27, 0(26) + stb 27, 0(25) + #NO_APP + addi 26, 4, 12 + addi 27, 4, 11 + ld 25, 280(1) + #APP + lbz 27, 0(27) + stb 27, 0(25) + #NO_APP + ld 25, 272(1) + #APP + lbz 27, 0(26) + stb 27, 0(25) + #NO_APP + addi 26, 4, 14 + addi 27, 4, 13 + ld 25, 264(1) + #APP + lbz 27, 0(27) + stb 27, 0(25) + #NO_APP + ld 25, 256(1) + #APP + lbz 27, 0(26) + stb 27, 0(25) + #NO_APP + addi 26, 4, 16 + addi 27, 4, 15 + ld 25, 248(1) + #APP + lbz 27, 0(27) + stb 27, 0(25) + #NO_APP + ld 25, 240(1) + #APP + lbz 27, 0(26) + stb 27, 0(25) + #NO_APP + addi 26, 4, 18 + addi 27, 4, 17 + ld 25, 232(1) + #APP + lbz 27, 0(27) + stb 27, 0(25) + #NO_APP + ld 25, 224(1) + #APP + lbz 27, 0(26) + stb 27, 0(25) + #NO_APP + addi 26, 4, 20 + addi 27, 4, 19 + ld 25, 216(1) + #APP + lbz 27, 0(27) + stb 27, 0(25) + #NO_APP + ld 25, 208(1) + #APP + lbz 27, 0(26) + stb 27, 0(25) + #NO_APP + addi 26, 4, 22 + addi 27, 4, 21 + ld 25, 200(1) + #APP + lbz 27, 0(27) + stb 27, 0(25) + #NO_APP + ld 25, 192(1) + #APP + lbz 27, 0(26) + stb 27, 0(25) + #NO_APP + addi 26, 4, 24 + addi 27, 4, 23 + ld 25, 184(1) + #APP + lbz 27, 0(27) + stb 27, 0(25) + #NO_APP + ld 25, 176(1) + #APP + lbz 27, 0(26) + stb 27, 0(25) + #NO_APP + addi 26, 4, 26 + addi 27, 4, 25 + ld 25, 168(1) + #APP + lbz 27, 0(27) + stb 27, 0(25) + #NO_APP + ld 25, 160(1) + #APP + lbz 27, 0(26) + stb 27, 0(25) + #NO_APP + addi 26, 4, 28 + addi 27, 4, 27 + ld 25, 152(1) + #APP + lbz 27, 0(27) + stb 27, 0(25) + #NO_APP + ld 25, 144(1) + #APP + lbz 27, 0(26) + stb 27, 0(25) + #NO_APP + addi 26, 4, 30 + addi 27, 4, 29 + ld 25, 136(1) + #APP + lbz 27, 0(27) + stb 27, 0(25) + #NO_APP + ld 25, 128(1) + #APP + lbz 27, 0(26) + stb 27, 0(25) + #NO_APP + addi 26, 4, 32 + addi 27, 4, 31 + ld 25, 120(1) + #APP + lbz 27, 0(27) + stb 27, 0(25) + #NO_APP + ld 25, 112(1) + #APP + lbz 27, 0(26) + stb 27, 0(25) + #NO_APP + addi 26, 4, 34 + addi 27, 4, 33 + ld 25, 104(1) + #APP + lbz 27, 0(27) + stb 27, 0(25) + #NO_APP + ld 25, 96(1) + #APP + lbz 27, 0(26) + stb 27, 0(25) + #NO_APP + addi 26, 4, 36 + addi 27, 4, 35 + ld 25, 88(1) + #APP + lbz 27, 0(27) + stb 27, 0(25) + #NO_APP + ld 25, 80(1) + #APP + lbz 27, 0(26) + stb 27, 0(25) + #NO_APP + addi 26, 4, 38 + addi 27, 4, 37 + ld 25, 72(1) + #APP + lbz 27, 0(27) + stb 27, 0(25) + #NO_APP + addi 27, 4, 39 + ld 25, 64(1) + #APP + lbz 26, 0(26) + stb 26, 0(25) + #NO_APP + addi 26, 4, 40 + ld 25, 56(1) + #APP + lbz 27, 0(27) + stb 27, 0(25) + #NO_APP + addi 27, 4, 41 + #APP + lbz 26, 0(26) + stb 26, 0(24) + #NO_APP + addi 26, 4, 42 + #APP + lbz 27, 0(27) + stb 27, 0(23) + #NO_APP + addi 27, 4, 43 + #APP + lbz 26, 0(26) + stb 26, 0(22) + #NO_APP + addi 26, 4, 44 + #APP + lbz 27, 0(27) + stb 27, 0(21) + #NO_APP + addi 27, 4, 45 + #APP + lbz 26, 0(26) + stb 26, 0(20) + #NO_APP + addi 26, 4, 46 + #APP + lbz 27, 0(27) + stb 27, 0(19) + #NO_APP + addi 27, 4, 47 + #APP + lbz 26, 0(26) + stb 26, 0(18) + #NO_APP + addi 26, 4, 48 + #APP + lbz 27, 0(27) + stb 27, 0(17) + #NO_APP + addi 27, 4, 49 + #APP + lbz 26, 0(26) + stb 26, 0(16) + #NO_APP + addi 26, 4, 50 + #APP + lbz 27, 0(27) + stb 27, 0(15) + #NO_APP + addi 27, 4, 51 + #APP + lbz 26, 0(26) + stb 26, 0(14) + #NO_APP + addi 26, 4, 52 + #APP + lbz 27, 0(27) + stb 27, 0(31) + #NO_APP + addi 27, 4, 53 + #APP + lbz 26, 0(26) + stb 26, 0(5) + #NO_APP + addi 5, 4, 54 + #APP + lbz 27, 0(27) + stb 27, 0(6) + #NO_APP + addi 6, 4, 55 + #APP + lbz 5, 0(5) + stb 5, 0(7) + #NO_APP + addi 5, 4, 56 + #APP + lbz 6, 0(6) + stb 6, 0(8) + #NO_APP + addi 6, 4, 57 + #APP + lbz 5, 0(5) + stb 5, 0(9) + #NO_APP + addi 5, 4, 58 + #APP + lbz 6, 0(6) + stb 6, 0(10) + #NO_APP + addi 6, 4, 59 + #APP + lbz 5, 0(5) + stb 5, 0(11) + #NO_APP + addi 5, 4, 60 + #APP + lbz 6, 0(6) + stb 6, 0(12) + #NO_APP + addi 6, 4, 61 + #APP + lbz 5, 0(5) + stb 5, 0(30) + #NO_APP + addi 5, 4, 62 + #APP + lbz 6, 0(6) + stb 6, 0(29) + #NO_APP + addi 6, 1, 431 + #APP + lbz 5, 0(5) + stb 5, 0(28) + #NO_APP + addi 4, 4, 63 + #APP + lbz 4, 0(4) + stb 4, 0(6) + #NO_APP + ld 4, 368(1) + ld 5, 376(1) + ld 6, 384(1) + ld 7, 392(1) std 4, 0(3) + ld 4, 400(1) + std 5, 8(3) + ld 5, 408(1) + std 6, 16(3) + ld 6, 416(1) + std 7, 24(3) + ld 7, 424(1) + std 4, 32(3) + std 5, 40(3) + std 6, 48(3) + std 7, 56(3) lwsync + ld 31, 568(1) + ld 30, 560(1) + ld 29, 552(1) + ld 28, 544(1) + ld 27, 536(1) + ld 26, 528(1) + ld 25, 520(1) + ld 24, 512(1) + ld 23, 504(1) + ld 22, 496(1) + ld 21, 488(1) + ld 20, 480(1) + ld 19, 472(1) + ld 18, 464(1) + ld 17, 456(1) + ld 16, 448(1) + ld 15, 440(1) + ld 14, 432(1) + addi 1, 1, 576 blr asm_test::atomic_memcpy_load_align1::read_volatile_acquire_fence: .Lfunc_begin1: diff --git a/tests/asm-test/asm/powerpc64-unknown-linux-gnu/atomic_memcpy_load_align16 b/tests/asm-test/asm/powerpc64-unknown-linux-gnu/atomic_memcpy_load_align16 index 6953e9f..1d67e48 100644 --- a/tests/asm-test/asm/powerpc64-unknown-linux-gnu/atomic_memcpy_load_align16 +++ b/tests/asm-test/asm/powerpc64-unknown-linux-gnu/atomic_memcpy_load_align16 @@ -1,22 +1,71 @@ asm_test::atomic_memcpy_load_align16::acquire: .Lfunc_begin16: - ld 5, 0(4) - ld 6, 8(4) - ld 7, 16(4) - ld 8, 24(4) - ld 9, 32(4) - ld 10, 40(4) - ld 11, 48(4) - ld 4, 56(4) - std 5, 0(3) - std 6, 8(3) - std 7, 16(3) - std 8, 24(3) - std 9, 32(3) - std 10, 40(3) - std 11, 48(3) - std 4, 56(3) + addi 5, 1, -96 + std 28, -32(1) + addi 6, 1, -88 + addi 7, 1, -80 + addi 8, 1, -72 + std 29, -24(1) + addi 29, 4, 8 + addi 9, 1, -64 + std 30, -16(1) + mr 30, 5 + addi 10, 1, -56 + #APP + ld 28, 0(4) + std 28, 0(30) + #NO_APP + #APP + ld 30, 0(29) + std 30, 0(6) + #NO_APP + addi 11, 1, -48 + addi 12, 1, -40 + addi 6, 4, 16 + #APP + ld 6, 0(6) + std 6, 0(7) + #NO_APP + addi 6, 4, 24 + #APP + ld 6, 0(6) + std 6, 0(8) + #NO_APP + addi 6, 4, 32 + #APP + ld 6, 0(6) + std 6, 0(9) + #NO_APP + addi 6, 4, 40 + #APP + ld 6, 0(6) + std 6, 0(10) + #NO_APP + addi 6, 4, 48 + addi 4, 4, 56 + #APP + ld 6, 0(6) + std 6, 0(11) + #NO_APP + li 6, 16 + #APP + ld 4, 0(4) + std 4, 0(12) + #NO_APP + lvx 2, 5, 6 + li 4, 32 + stvx 2, 3, 6 + lvx 2, 5, 4 + stvx 2, 3, 4 + li 4, 48 + lvx 2, 5, 4 + stvx 2, 3, 4 + lvx 2, 0, 5 + stvx 2, 0, 3 lwsync + ld 30, -16(1) + ld 29, -24(1) + ld 28, -32(1) blr asm_test::atomic_memcpy_load_align16::read_volatile_acquire_fence: .Lfunc_begin17: diff --git a/tests/asm-test/asm/powerpc64-unknown-linux-gnu/atomic_memcpy_load_align2 b/tests/asm-test/asm/powerpc64-unknown-linux-gnu/atomic_memcpy_load_align2 index a17f61a..0258270 100644 --- a/tests/asm-test/asm/powerpc64-unknown-linux-gnu/atomic_memcpy_load_align2 +++ b/tests/asm-test/asm/powerpc64-unknown-linux-gnu/atomic_memcpy_load_align2 @@ -1,152 +1,263 @@ asm_test::atomic_memcpy_load_align2::acquire: .Lfunc_begin4: - addi 5, 4, 7 - rldicr 8, 5, 0, 60 - sub 5, 8, 4 - cmpldi 5, 64 - bgt 0, .LBB4_5 - cmpldi 5, 0 - addi 6, 1, -64 - beq 0, .LBB4_6 - addi 7, 4, 64 - addi 9, 4, -1 - addi 10, 6, -1 - mtctr 5 -.LBB4_3: - lbz 11, 1(9) - addi 9, 9, 1 - stbu 11, 1(10) - bdnz .LBB4_3 - sub 7, 7, 8 - cmpldi 7, 8 - bge 0, .LBB4_7 - b .LBB4_11 -.LBB4_5: - lhz 5, 0(4) - sth 5, -64(1) - lhz 5, 2(4) - sth 5, -62(1) - lhz 5, 4(4) - sth 5, -60(1) - lhz 5, 6(4) - sth 5, -58(1) - lhz 5, 8(4) - sth 5, -56(1) - lhz 5, 10(4) - sth 5, -54(1) - lhz 5, 12(4) - sth 5, -52(1) - lhz 5, 14(4) - sth 5, -50(1) - lhz 5, 16(4) - sth 5, -48(1) - lhz 5, 18(4) - sth 5, -46(1) - lhz 5, 20(4) - sth 5, -44(1) - lhz 5, 22(4) - sth 5, -42(1) - lhz 5, 24(4) - sth 5, -40(1) - lhz 5, 26(4) - sth 5, -38(1) - lhz 5, 28(4) - sth 5, -36(1) - lhz 5, 30(4) - sth 5, -34(1) - lhz 5, 32(4) - sth 5, -32(1) - lhz 5, 34(4) - sth 5, -30(1) - lhz 5, 36(4) - sth 5, -28(1) - lhz 5, 38(4) - sth 5, -26(1) - lhz 5, 40(4) - sth 5, -24(1) - lhz 5, 42(4) - sth 5, -22(1) - lhz 5, 44(4) - sth 5, -20(1) - lhz 5, 46(4) - sth 5, -18(1) - lhz 5, 48(4) - sth 5, -16(1) - lhz 5, 50(4) - sth 5, -14(1) - lhz 5, 52(4) - sth 5, -12(1) - lhz 5, 54(4) - sth 5, -10(1) - lhz 5, 56(4) - sth 5, -8(1) - lhz 5, 58(4) - sth 5, -6(1) - lhz 5, 60(4) - sth 5, -4(1) - lhz 4, 62(4) - sth 4, -2(1) - b .LBB4_14 -.LBB4_6: - li 7, 64 -.LBB4_7: - addi 9, 7, -8 - li 11, 7 - cmpldi 9, 7 - sub 10, 8, 4 - bc 12, 0, .LBB4_9 - ori 9, 11, 0 - b .LBB4_9 -.LBB4_9: - not 9, 9 - add 8, 8, 6 - add 9, 9, 7 - add 10, 10, 4 - sub 11, 8, 4 - rldicl 9, 9, 61, 3 - addi 8, 10, -8 - addi 10, 9, 1 - addi 9, 11, -8 - mtctr 10 -.LBB4_10: - ld 10, 8(8) - addi 11, 9, 8 - addi 8, 8, 8 - addi 7, 7, -8 - addi 5, 5, 8 - std 10, 8(9) - mr 9, 11 - bdnz .LBB4_10 -.LBB4_11: - cmpldi 7, 0 - beq 0, .LBB4_14 - addi 5, 5, -1 - add 4, 4, 5 - add 5, 6, 5 - mtctr 7 -.LBB4_13: - lbz 6, 1(4) - addi 4, 4, 1 - stbu 6, 1(5) - bdnz .LBB4_13 -.LBB4_14: - ld 4, -8(1) - std 4, 56(3) - ld 4, -16(1) - std 4, 48(3) - ld 4, -24(1) - std 4, 40(3) - ld 4, -32(1) - std 4, 32(3) - ld 4, -40(1) - std 4, 24(3) - ld 4, -48(1) - std 4, 16(3) - ld 4, -56(1) - std 4, 8(3) - ld 4, -64(1) + addi 5, 1, -206 + std 14, -144(1) + addi 11, 1, -208 + addi 14, 1, -160 + addi 6, 1, -154 + std 5, -216(1) + addi 5, 1, -204 + addi 7, 1, -152 + std 5, -224(1) + addi 5, 1, -202 + addi 8, 1, -150 + addi 9, 1, -148 + std 5, -232(1) + addi 5, 1, -200 + std 5, -240(1) + addi 5, 1, -198 + std 5, -248(1) + addi 5, 1, -196 + std 5, -256(1) + addi 5, 1, -194 + std 15, -136(1) + addi 15, 1, -162 + std 16, -128(1) + addi 16, 1, -164 + std 17, -120(1) + addi 17, 1, -166 + std 18, -112(1) + addi 18, 1, -168 + std 19, -104(1) + addi 19, 1, -170 + std 20, -96(1) + addi 20, 1, -172 + std 21, -88(1) + addi 21, 1, -174 + std 22, -80(1) + addi 22, 1, -176 + std 23, -72(1) + addi 23, 1, -178 + std 24, -64(1) + addi 24, 1, -180 + std 25, -56(1) + addi 25, 1, -182 + std 26, -48(1) + addi 26, 1, -184 + std 27, -40(1) + addi 27, 1, -186 + std 28, -32(1) + addi 28, 1, -188 + std 29, -24(1) + addi 29, 1, -190 + std 30, -16(1) + addi 30, 1, -192 + std 31, -8(1) + addi 31, 1, -158 + std 5, -264(1) + addi 5, 1, -156 + #APP + lhz 10, 0(4) + sth 10, 0(11) + #NO_APP + addi 10, 4, 2 + addi 11, 4, 4 + ld 12, -216(1) + #APP + lhz 10, 0(10) + sth 10, 0(12) + #NO_APP + ld 12, -224(1) + #APP + lhz 10, 0(11) + sth 10, 0(12) + #NO_APP + addi 11, 4, 8 + addi 10, 4, 6 + ld 12, -232(1) + #APP + lhz 10, 0(10) + sth 10, 0(12) + #NO_APP + ld 12, -240(1) + #APP + lhz 10, 0(11) + sth 10, 0(12) + #NO_APP + addi 11, 4, 12 + addi 10, 4, 10 + ld 12, -248(1) + #APP + lhz 10, 0(10) + sth 10, 0(12) + #NO_APP + addi 10, 4, 14 + ld 12, -256(1) + #APP + lhz 11, 0(11) + sth 11, 0(12) + #NO_APP + addi 11, 4, 16 + ld 12, -264(1) + #APP + lhz 10, 0(10) + sth 10, 0(12) + #NO_APP + addi 10, 4, 18 + #APP + lhz 11, 0(11) + sth 11, 0(30) + #NO_APP + addi 11, 4, 20 + #APP + lhz 10, 0(10) + sth 10, 0(29) + #NO_APP + addi 10, 4, 22 + #APP + lhz 11, 0(11) + sth 11, 0(28) + #NO_APP + addi 11, 4, 24 + #APP + lhz 10, 0(10) + sth 10, 0(27) + #NO_APP + addi 10, 4, 26 + #APP + lhz 11, 0(11) + sth 11, 0(26) + #NO_APP + addi 11, 4, 28 + #APP + lhz 10, 0(10) + sth 10, 0(25) + #NO_APP + addi 10, 4, 30 + #APP + lhz 11, 0(11) + sth 11, 0(24) + #NO_APP + addi 11, 4, 32 + #APP + lhz 10, 0(10) + sth 10, 0(23) + #NO_APP + addi 10, 4, 34 + #APP + lhz 11, 0(11) + sth 11, 0(22) + #NO_APP + addi 11, 4, 36 + #APP + lhz 10, 0(10) + sth 10, 0(21) + #NO_APP + addi 10, 4, 38 + #APP + lhz 11, 0(11) + sth 11, 0(20) + #NO_APP + addi 11, 4, 40 + #APP + lhz 10, 0(10) + sth 10, 0(19) + #NO_APP + addi 10, 4, 42 + #APP + lhz 11, 0(11) + sth 11, 0(18) + #NO_APP + addi 11, 4, 44 + #APP + lhz 10, 0(10) + sth 10, 0(17) + #NO_APP + addi 10, 4, 46 + #APP + lhz 11, 0(11) + sth 11, 0(16) + #NO_APP + addi 11, 4, 48 + #APP + lhz 10, 0(10) + sth 10, 0(15) + #NO_APP + addi 10, 4, 50 + #APP + lhz 11, 0(11) + sth 11, 0(14) + #NO_APP + addi 11, 4, 52 + #APP + lhz 10, 0(10) + sth 10, 0(31) + #NO_APP + addi 10, 4, 54 + #APP + lhz 11, 0(11) + sth 11, 0(5) + #NO_APP + addi 5, 4, 56 + #APP + lhz 10, 0(10) + sth 10, 0(6) + #NO_APP + addi 6, 4, 58 + #APP + lhz 5, 0(5) + sth 5, 0(7) + #NO_APP + addi 5, 4, 60 + #APP + lhz 6, 0(6) + sth 6, 0(8) + #NO_APP + addi 6, 1, -146 + #APP + lhz 5, 0(5) + sth 5, 0(9) + #NO_APP + addi 4, 4, 62 + #APP + lhz 4, 0(4) + sth 4, 0(6) + #NO_APP + ld 4, -208(1) + ld 5, -200(1) + ld 6, -192(1) + ld 7, -184(1) std 4, 0(3) + ld 4, -176(1) + std 5, 8(3) + ld 5, -168(1) + std 6, 16(3) + ld 6, -160(1) + std 7, 24(3) + ld 7, -152(1) + std 4, 32(3) + std 5, 40(3) + std 6, 48(3) + std 7, 56(3) lwsync + ld 31, -8(1) + ld 30, -16(1) + ld 29, -24(1) + ld 28, -32(1) + ld 27, -40(1) + ld 26, -48(1) + ld 25, -56(1) + ld 24, -64(1) + ld 23, -72(1) + ld 22, -80(1) + ld 21, -88(1) + ld 20, -96(1) + ld 19, -104(1) + ld 18, -112(1) + ld 17, -120(1) + ld 16, -128(1) + ld 15, -136(1) + ld 14, -144(1) blr asm_test::atomic_memcpy_load_align2::read_volatile_acquire_fence: .Lfunc_begin5: diff --git a/tests/asm-test/asm/powerpc64-unknown-linux-gnu/atomic_memcpy_load_align4 b/tests/asm-test/asm/powerpc64-unknown-linux-gnu/atomic_memcpy_load_align4 index 7d7cc3d..fab9325 100644 --- a/tests/asm-test/asm/powerpc64-unknown-linux-gnu/atomic_memcpy_load_align4 +++ b/tests/asm-test/asm/powerpc64-unknown-linux-gnu/atomic_memcpy_load_align4 @@ -1,120 +1,133 @@ asm_test::atomic_memcpy_load_align4::acquire: .Lfunc_begin8: - addi 5, 4, 7 - rldicr 8, 5, 0, 60 - sub 5, 8, 4 - cmpldi 5, 64 - bgt 0, .LBB8_5 - cmpldi 5, 0 - addi 6, 1, -64 - beq 0, .LBB8_6 - addi 7, 4, 64 - addi 9, 4, -1 - addi 10, 6, -1 - mtctr 5 -.LBB8_3: - lbz 11, 1(9) - addi 9, 9, 1 - stbu 11, 1(10) - bdnz .LBB8_3 - sub 7, 7, 8 - cmpldi 7, 8 - bge 0, .LBB8_7 - b .LBB8_11 -.LBB8_5: - lwz 5, 0(4) - stw 5, -64(1) - lwz 5, 4(4) - stw 5, -60(1) - lwz 5, 8(4) - stw 5, -56(1) - lwz 5, 12(4) - stw 5, -52(1) - lwz 5, 16(4) - stw 5, -48(1) - lwz 5, 20(4) - stw 5, -44(1) - lwz 5, 24(4) - stw 5, -40(1) - lwz 5, 28(4) - stw 5, -36(1) - lwz 5, 32(4) - stw 5, -32(1) - lwz 5, 36(4) - stw 5, -28(1) - lwz 5, 40(4) - stw 5, -24(1) - lwz 5, 44(4) - stw 5, -20(1) - lwz 5, 48(4) - stw 5, -16(1) - lwz 5, 52(4) - stw 5, -12(1) - lwz 5, 56(4) - stw 5, -8(1) - lwz 4, 60(4) - stw 4, -4(1) - b .LBB8_14 -.LBB8_6: - li 7, 64 -.LBB8_7: - addi 9, 7, -8 - li 11, 7 - cmpldi 9, 7 - sub 10, 8, 4 - bc 12, 0, .LBB8_9 - ori 9, 11, 0 - b .LBB8_9 -.LBB8_9: - not 9, 9 - add 8, 8, 6 - add 9, 9, 7 - add 10, 10, 4 - sub 11, 8, 4 - rldicl 9, 9, 61, 3 - addi 8, 10, -8 - addi 10, 9, 1 - addi 9, 11, -8 - mtctr 10 -.LBB8_10: - ld 10, 8(8) - addi 11, 9, 8 - addi 8, 8, 8 - addi 7, 7, -8 - addi 5, 5, 8 - std 10, 8(9) - mr 9, 11 - bdnz .LBB8_10 -.LBB8_11: - cmpldi 7, 0 - beq 0, .LBB8_14 - addi 5, 5, -1 - add 4, 4, 5 - add 5, 6, 5 - mtctr 7 -.LBB8_13: - lbz 6, 1(4) - addi 4, 4, 1 - stbu 6, 1(5) - bdnz .LBB8_13 -.LBB8_14: - ld 4, -8(1) - std 4, 56(3) - ld 4, -16(1) - std 4, 48(3) - ld 4, -24(1) - std 4, 40(3) - ld 4, -32(1) - std 4, 32(3) - ld 4, -40(1) - std 4, 24(3) - ld 4, -48(1) - std 4, 16(3) - ld 4, -56(1) - std 4, 8(3) - ld 4, -64(1) + std 23, -72(1) + addi 5, 1, -136 + addi 6, 1, -132 + addi 7, 1, -128 + addi 8, 1, -124 + std 24, -64(1) + addi 9, 1, -120 + addi 10, 1, -116 + std 25, -56(1) + addi 11, 1, -112 + addi 12, 1, -108 + addi 25, 1, -84 + addi 24, 1, -80 + std 26, -48(1) + addi 26, 1, -88 + std 27, -40(1) + addi 27, 1, -92 + std 28, -32(1) + addi 28, 1, -96 + std 29, -24(1) + addi 29, 1, -100 + std 30, -16(1) + addi 30, 1, -104 + #APP + lwz 23, 0(4) + stw 23, 0(5) + #NO_APP + addi 5, 4, 4 + addi 23, 4, 8 + #APP + lwz 5, 0(5) + stw 5, 0(6) + #NO_APP + addi 5, 4, 12 + #APP + lwz 6, 0(23) + stw 6, 0(7) + #NO_APP + addi 6, 4, 16 + #APP + lwz 5, 0(5) + stw 5, 0(8) + #NO_APP + addi 5, 4, 20 + #APP + lwz 6, 0(6) + stw 6, 0(9) + #NO_APP + addi 6, 4, 24 + #APP + lwz 5, 0(5) + stw 5, 0(10) + #NO_APP + addi 5, 4, 28 + #APP + lwz 6, 0(6) + stw 6, 0(11) + #NO_APP + addi 6, 4, 32 + #APP + lwz 5, 0(5) + stw 5, 0(12) + #NO_APP + addi 5, 4, 36 + #APP + lwz 6, 0(6) + stw 6, 0(30) + #NO_APP + addi 6, 4, 40 + #APP + lwz 5, 0(5) + stw 5, 0(29) + #NO_APP + addi 5, 4, 44 + #APP + lwz 6, 0(6) + stw 6, 0(28) + #NO_APP + addi 6, 4, 48 + #APP + lwz 5, 0(5) + stw 5, 0(27) + #NO_APP + addi 5, 4, 52 + #APP + lwz 6, 0(6) + stw 6, 0(26) + #NO_APP + addi 6, 4, 56 + #APP + lwz 5, 0(5) + stw 5, 0(25) + #NO_APP + addi 5, 1, -76 + #APP + lwz 6, 0(6) + stw 6, 0(24) + #NO_APP + addi 4, 4, 60 + #APP + lwz 4, 0(4) + stw 4, 0(5) + #NO_APP + ld 4, -136(1) + ld 5, -128(1) + ld 6, -120(1) + ld 7, -112(1) std 4, 0(3) + ld 4, -104(1) + std 5, 8(3) + ld 5, -96(1) + std 6, 16(3) + ld 6, -88(1) + std 7, 24(3) + ld 7, -80(1) + std 4, 32(3) + std 5, 40(3) + std 6, 48(3) + std 7, 56(3) lwsync + ld 30, -16(1) + ld 29, -24(1) + ld 28, -32(1) + ld 27, -40(1) + ld 26, -48(1) + ld 25, -56(1) + ld 24, -64(1) + ld 23, -72(1) blr asm_test::atomic_memcpy_load_align4::read_volatile_acquire_fence: .Lfunc_begin9: diff --git a/tests/asm-test/asm/powerpc64-unknown-linux-gnu/atomic_memcpy_load_align8 b/tests/asm-test/asm/powerpc64-unknown-linux-gnu/atomic_memcpy_load_align8 index dc9a02c..0969cca 100644 --- a/tests/asm-test/asm/powerpc64-unknown-linux-gnu/atomic_memcpy_load_align8 +++ b/tests/asm-test/asm/powerpc64-unknown-linux-gnu/atomic_memcpy_load_align8 @@ -1,21 +1,68 @@ asm_test::atomic_memcpy_load_align8::acquire: .Lfunc_begin12: - ld 5, 0(4) - ld 6, 8(4) - ld 7, 16(4) - ld 8, 24(4) - ld 9, 32(4) - ld 10, 40(4) - ld 11, 48(4) - ld 4, 56(4) - std 5, 0(3) - std 6, 8(3) - std 7, 16(3) - std 8, 24(3) - std 9, 32(3) - std 10, 40(3) - std 11, 48(3) - std 4, 56(3) + addi 5, 1, -64 + addi 6, 1, -56 + #APP + ld 12, 0(4) + std 12, 0(5) + #NO_APP + addi 5, 4, 8 + addi 7, 1, -48 + #APP + ld 5, 0(5) + std 5, 0(6) + #NO_APP + addi 8, 1, -40 + addi 12, 4, 16 + #APP + ld 6, 0(12) + std 6, 0(7) + #NO_APP + addi 5, 4, 24 + addi 9, 1, -32 + #APP + ld 5, 0(5) + std 5, 0(8) + #NO_APP + addi 6, 4, 32 + addi 10, 1, -24 + #APP + ld 6, 0(6) + std 6, 0(9) + #NO_APP + addi 5, 4, 40 + addi 11, 1, -16 + #APP + ld 5, 0(5) + std 5, 0(10) + #NO_APP + addi 6, 4, 48 + addi 5, 1, -8 + #APP + ld 6, 0(6) + std 6, 0(11) + #NO_APP + addi 4, 4, 56 + #APP + ld 4, 0(4) + std 4, 0(5) + #NO_APP + ld 4, -64(1) + ld 5, -56(1) + ld 6, -48(1) + ld 7, -40(1) + std 4, 0(3) + ld 4, -32(1) + std 5, 8(3) + ld 5, -24(1) + std 6, 16(3) + ld 6, -16(1) + std 7, 24(3) + ld 7, -8(1) + std 4, 32(3) + std 5, 40(3) + std 6, 48(3) + std 7, 56(3) lwsync blr asm_test::atomic_memcpy_load_align8::read_volatile_acquire_fence: diff --git a/tests/asm-test/asm/powerpc64-unknown-linux-gnu/atomic_memcpy_store_align1 b/tests/asm-test/asm/powerpc64-unknown-linux-gnu/atomic_memcpy_store_align1 index ecebed8..613d702 100644 --- a/tests/asm-test/asm/powerpc64-unknown-linux-gnu/atomic_memcpy_store_align1 +++ b/tests/asm-test/asm/powerpc64-unknown-linux-gnu/atomic_memcpy_store_align1 @@ -1,231 +1,537 @@ asm_test::atomic_memcpy_store_align1::release: .Lfunc_begin2: - ld 5, 56(4) + stdu 1, -624(1) + mr 5, 3 + ld 3, 56(4) + addi 12, 5, 62 ld 6, 48(4) - std 5, -72(1) - ld 5, 40(4) - std 6, -80(1) - ld 6, 32(4) - std 5, -88(1) - ld 5, 24(4) - std 6, -96(1) - ld 6, 16(4) - std 5, -104(1) - ld 5, 8(4) + ld 7, 40(4) + ld 8, 32(4) + ld 9, 24(4) + ld 10, 16(4) + ld 11, 8(4) ld 4, 0(4) - std 6, -112(1) - std 5, -120(1) - std 4, -128(1) + std 14, 480(1) + addi 14, 5, 52 + std 15, 488(1) + addi 15, 5, 51 + std 16, 496(1) + addi 16, 5, 50 + std 17, 504(1) + addi 17, 5, 49 + std 18, 512(1) + addi 18, 5, 48 + std 19, 520(1) + addi 19, 5, 47 + std 20, 528(1) + addi 20, 5, 46 + std 21, 536(1) + addi 21, 5, 45 + std 22, 544(1) + addi 22, 5, 44 + std 23, 552(1) + addi 23, 5, 43 + std 24, 560(1) + addi 24, 5, 42 + std 25, 568(1) + addi 25, 5, 41 + std 26, 576(1) + addi 26, 5, 40 + std 27, 584(1) + addi 27, 5, 39 + std 28, 592(1) + std 29, 600(1) + addi 29, 1, 416 + std 30, 608(1) + addi 30, 5, 54 + std 31, 616(1) + addi 31, 5, 53 + std 3, 408(1) + addi 3, 5, 1 + std 6, 400(1) + addi 6, 5, 21 + std 7, 392(1) + std 8, 384(1) + addi 8, 5, 58 + std 9, 376(1) + addi 9, 5, 59 + std 10, 368(1) + addi 10, 5, 60 + std 11, 360(1) + addi 11, 5, 61 + std 4, 352(1) lwsync - ld 4, -72(1) - ld 6, -88(1) - ld 5, -80(1) - std 4, -8(1) - ld 4, -96(1) - std 6, -24(1) - ld 6, -112(1) - std 5, -16(1) - ld 5, -104(1) - std 4, -32(1) - ld 4, -120(1) - std 6, -48(1) - addi 6, 3, 7 - rldicr 7, 6, 0, 60 - std 5, -40(1) - ld 5, -128(1) - std 4, -56(1) - sub 4, 7, 3 - cmpldi 4, 65 - std 5, -64(1) - bge 0, .LBB2_5 - cmpldi 4, 0 - addi 5, 1, -64 - beq 0, .LBB2_6 - addi 6, 3, 64 - addi 8, 1, -65 - addi 9, 3, -1 - mtctr 4 -.LBB2_3: - lbzu 10, 1(8) - addi 11, 9, 1 - stb 10, 1(9) - mr 9, 11 - bdnz .LBB2_3 - sub 6, 6, 7 - cmpldi 6, 8 - bge 0, .LBB2_7 - b .LBB2_11 -.LBB2_5: - lbz 4, -64(1) + std 3, 344(1) + addi 3, 5, 2 + std 3, 336(1) + addi 3, 5, 3 + std 3, 328(1) + addi 3, 5, 4 + std 3, 320(1) + addi 3, 5, 5 + std 3, 312(1) + addi 3, 5, 6 + std 3, 304(1) + addi 3, 5, 7 + std 3, 296(1) + addi 3, 5, 8 + std 3, 288(1) + addi 3, 5, 9 + std 3, 280(1) + addi 3, 5, 10 + std 3, 272(1) + addi 3, 5, 11 + std 3, 264(1) + addi 3, 5, 12 + std 3, 256(1) + addi 3, 5, 13 + std 3, 248(1) + addi 3, 5, 14 + std 3, 240(1) + addi 3, 5, 15 + std 3, 232(1) + addi 3, 5, 16 + std 3, 224(1) + addi 3, 5, 17 + std 3, 216(1) + addi 3, 5, 18 + std 3, 208(1) + addi 3, 5, 19 + ld 4, 408(1) + std 3, 200(1) + addi 3, 5, 20 + std 3, 192(1) + ld 3, 400(1) + std 4, 472(1) + addi 4, 5, 25 + std 4, 152(1) + addi 4, 5, 26 + std 4, 144(1) + std 3, 464(1) + addi 3, 5, 27 + ld 4, 392(1) + std 3, 136(1) + addi 3, 5, 28 + std 3, 128(1) + addi 3, 5, 29 + std 3, 120(1) + addi 3, 5, 30 + std 4, 456(1) + ld 4, 384(1) + std 3, 112(1) + addi 3, 5, 31 + std 3, 104(1) + addi 3, 5, 32 + std 3, 96(1) + addi 3, 5, 33 + std 4, 448(1) + ld 4, 376(1) + std 3, 88(1) + addi 3, 5, 34 + std 3, 80(1) + addi 3, 5, 35 + std 6, 184(1) + addi 6, 5, 22 + std 3, 72(1) + addi 3, 5, 36 + std 4, 440(1) + ld 4, 368(1) + ld 7, 360(1) + ld 0, 352(1) + std 6, 176(1) + addi 6, 5, 23 + std 3, 64(1) + addi 3, 5, 37 + std 6, 168(1) + addi 6, 5, 24 + std 3, 56(1) + addi 3, 5, 38 + std 6, 160(1) + addi 6, 5, 56 + std 3, 48(1) + mr 3, 5 + std 4, 432(1) + addi 4, 5, 55 + std 7, 424(1) + addi 7, 5, 57 + std 0, 416(1) + #APP + lbz 29, 0(29) + stb 29, 0(3) + #NO_APP + addi 3, 1, 417 + addi 29, 1, 418 + ld 28, 344(1) + #APP + lbz 3, 0(3) + stb 3, 0(28) + #NO_APP + ld 28, 336(1) + #APP + lbz 3, 0(29) + stb 3, 0(28) + #NO_APP + addi 29, 1, 420 + addi 3, 1, 419 + ld 28, 328(1) + #APP + lbz 3, 0(3) + stb 3, 0(28) + #NO_APP + ld 28, 320(1) + #APP + lbz 3, 0(29) + stb 3, 0(28) + #NO_APP + addi 29, 1, 422 + addi 3, 1, 421 + ld 28, 312(1) + #APP + lbz 3, 0(3) + stb 3, 0(28) + #NO_APP + ld 28, 304(1) + #APP + lbz 3, 0(29) + stb 3, 0(28) + #NO_APP + addi 29, 1, 423 + addi 3, 1, 424 + ld 28, 296(1) + #APP + lbz 29, 0(29) + stb 29, 0(28) + #NO_APP + ld 29, 288(1) + #APP + lbz 3, 0(3) + stb 3, 0(29) + #NO_APP + addi 3, 1, 425 + addi 29, 1, 426 + ld 28, 280(1) + #APP + lbz 3, 0(3) + stb 3, 0(28) + #NO_APP + ld 28, 272(1) + #APP + lbz 3, 0(29) + stb 3, 0(28) + #NO_APP + addi 29, 1, 428 + addi 3, 1, 427 + ld 28, 264(1) + #APP + lbz 3, 0(3) + stb 3, 0(28) + #NO_APP + ld 28, 256(1) + #APP + lbz 3, 0(29) + stb 3, 0(28) + #NO_APP + addi 29, 1, 430 + addi 3, 1, 429 + ld 28, 248(1) + #APP + lbz 3, 0(3) + stb 3, 0(28) + #NO_APP + ld 28, 240(1) + #APP + lbz 3, 0(29) + stb 3, 0(28) + #NO_APP + addi 29, 1, 431 + addi 3, 1, 432 + ld 28, 232(1) + #APP + lbz 29, 0(29) + stb 29, 0(28) + #NO_APP + ld 29, 224(1) + #APP + lbz 3, 0(3) + stb 3, 0(29) + #NO_APP + addi 3, 1, 433 + addi 29, 1, 434 + ld 28, 216(1) + #APP + lbz 3, 0(3) + stb 3, 0(28) + #NO_APP + ld 28, 208(1) + #APP + lbz 3, 0(29) + stb 3, 0(28) + #NO_APP + addi 29, 1, 436 + addi 3, 1, 435 + ld 28, 200(1) + #APP + lbz 3, 0(3) + stb 3, 0(28) + #NO_APP + ld 28, 192(1) + #APP + lbz 3, 0(29) + stb 3, 0(28) + #NO_APP + addi 29, 1, 438 + addi 3, 1, 437 + ld 28, 184(1) + #APP + lbz 3, 0(3) + stb 3, 0(28) + #NO_APP + ld 28, 176(1) + #APP + lbz 3, 0(29) + stb 3, 0(28) + #NO_APP + addi 29, 1, 439 + addi 3, 1, 440 + ld 28, 168(1) + #APP + lbz 29, 0(29) + stb 29, 0(28) + #NO_APP + ld 29, 160(1) + #APP + lbz 3, 0(3) + stb 3, 0(29) + #NO_APP + addi 3, 1, 441 + addi 29, 1, 442 + ld 28, 152(1) + #APP + lbz 3, 0(3) + stb 3, 0(28) + #NO_APP + ld 28, 144(1) + #APP + lbz 3, 0(29) + stb 3, 0(28) + #NO_APP + addi 29, 1, 444 + addi 3, 1, 443 + ld 28, 136(1) + #APP + lbz 3, 0(3) + stb 3, 0(28) + #NO_APP + ld 28, 128(1) + #APP + lbz 3, 0(29) + stb 3, 0(28) + #NO_APP + addi 29, 1, 446 + addi 3, 1, 445 + ld 28, 120(1) + #APP + lbz 3, 0(3) + stb 3, 0(28) + #NO_APP + ld 28, 112(1) + #APP + lbz 3, 0(29) + stb 3, 0(28) + #NO_APP + addi 29, 1, 447 + addi 3, 1, 448 + ld 28, 104(1) + #APP + lbz 29, 0(29) + stb 29, 0(28) + #NO_APP + ld 29, 96(1) + #APP + lbz 3, 0(3) + stb 3, 0(29) + #NO_APP + addi 3, 1, 449 + addi 29, 1, 450 + ld 28, 88(1) + #APP + lbz 3, 0(3) + stb 3, 0(28) + #NO_APP + ld 28, 80(1) + #APP + lbz 3, 0(29) + stb 3, 0(28) + #NO_APP + addi 29, 1, 452 + addi 3, 1, 451 + ld 28, 72(1) + #APP + lbz 3, 0(3) + stb 3, 0(28) + #NO_APP + ld 28, 64(1) + #APP + lbz 3, 0(29) + stb 3, 0(28) + #NO_APP + addi 29, 1, 454 + addi 3, 1, 453 + ld 28, 56(1) + #APP + lbz 3, 0(3) + stb 3, 0(28) + #NO_APP + addi 3, 1, 456 + ld 28, 48(1) + #APP + lbz 29, 0(29) + stb 29, 0(28) + #NO_APP + addi 29, 1, 455 + #APP + lbz 29, 0(29) + stb 29, 0(27) + #NO_APP + #APP + lbz 3, 0(3) + stb 3, 0(26) + #NO_APP + addi 29, 1, 458 + addi 3, 1, 457 + #APP + lbz 3, 0(3) + stb 3, 0(25) + #NO_APP + #APP + lbz 3, 0(29) + stb 3, 0(24) + #NO_APP + addi 29, 1, 460 + addi 3, 1, 459 + #APP + lbz 3, 0(3) + stb 3, 0(23) + #NO_APP + #APP + lbz 3, 0(29) + stb 3, 0(22) + #NO_APP + addi 29, 1, 462 + addi 3, 1, 461 + #APP + lbz 3, 0(3) + stb 3, 0(21) + #NO_APP + #APP + lbz 29, 0(29) + stb 29, 0(20) + #NO_APP + addi 3, 1, 464 + addi 29, 1, 463 + #APP + lbz 29, 0(29) + stb 29, 0(19) + #NO_APP + #APP + lbz 3, 0(3) + stb 3, 0(18) + #NO_APP + addi 29, 1, 466 + addi 3, 1, 465 + #APP + lbz 3, 0(3) + stb 3, 0(17) + #NO_APP + #APP + lbz 3, 0(29) + stb 3, 0(16) + #NO_APP + addi 29, 1, 468 + addi 3, 1, 467 + #APP + lbz 3, 0(3) + stb 3, 0(15) + #NO_APP + #APP + lbz 3, 0(29) + stb 3, 0(14) + #NO_APP + addi 29, 1, 470 + addi 3, 1, 469 + #APP + lbz 3, 0(3) + stb 3, 0(31) + #NO_APP + #APP + lbz 29, 0(29) + stb 29, 0(30) + #NO_APP + addi 3, 1, 472 + addi 30, 1, 471 + #APP + lbz 30, 0(30) + stb 30, 0(4) + #NO_APP + #APP + lbz 3, 0(3) + stb 3, 0(6) + #NO_APP + addi 4, 1, 474 + addi 3, 1, 473 + #APP + lbz 3, 0(3) + stb 3, 0(7) + #NO_APP + #APP + lbz 3, 0(4) + stb 3, 0(8) + #NO_APP + addi 4, 1, 476 + addi 3, 1, 475 + #APP + lbz 3, 0(3) + stb 3, 0(9) + #NO_APP + #APP + lbz 3, 0(4) + stb 3, 0(10) + #NO_APP + addi 4, 1, 478 + addi 3, 1, 477 + #APP + lbz 3, 0(3) + stb 3, 0(11) + #NO_APP + #APP + lbz 4, 0(4) + stb 4, 0(12) + #NO_APP + addi 3, 5, 63 + addi 4, 1, 479 + #APP + lbz 4, 0(4) stb 4, 0(3) - lbz 4, -63(1) - stb 4, 1(3) - lbz 4, -62(1) - stb 4, 2(3) - lbz 4, -61(1) - stb 4, 3(3) - lbz 4, -60(1) - stb 4, 4(3) - lbz 4, -59(1) - stb 4, 5(3) - lbz 4, -58(1) - stb 4, 6(3) - lbz 4, -57(1) - stb 4, 7(3) - lbz 4, -56(1) - stb 4, 8(3) - lbz 4, -55(1) - stb 4, 9(3) - lbz 4, -54(1) - stb 4, 10(3) - lbz 4, -53(1) - stb 4, 11(3) - lbz 4, -52(1) - stb 4, 12(3) - lbz 4, -51(1) - stb 4, 13(3) - lbz 4, -50(1) - stb 4, 14(3) - lbz 4, -49(1) - stb 4, 15(3) - lbz 4, -48(1) - stb 4, 16(3) - lbz 4, -47(1) - stb 4, 17(3) - lbz 4, -46(1) - stb 4, 18(3) - lbz 4, -45(1) - stb 4, 19(3) - lbz 4, -44(1) - stb 4, 20(3) - lbz 4, -43(1) - stb 4, 21(3) - lbz 4, -42(1) - stb 4, 22(3) - lbz 4, -41(1) - stb 4, 23(3) - lbz 4, -40(1) - stb 4, 24(3) - lbz 4, -39(1) - stb 4, 25(3) - lbz 4, -38(1) - stb 4, 26(3) - lbz 4, -37(1) - stb 4, 27(3) - lbz 4, -36(1) - stb 4, 28(3) - lbz 4, -35(1) - stb 4, 29(3) - lbz 4, -34(1) - stb 4, 30(3) - lbz 4, -33(1) - stb 4, 31(3) - lbz 4, -32(1) - stb 4, 32(3) - lbz 4, -31(1) - stb 4, 33(3) - lbz 4, -30(1) - stb 4, 34(3) - lbz 4, -29(1) - stb 4, 35(3) - lbz 4, -28(1) - stb 4, 36(3) - lbz 4, -27(1) - stb 4, 37(3) - lbz 4, -26(1) - stb 4, 38(3) - lbz 4, -25(1) - stb 4, 39(3) - lbz 4, -24(1) - stb 4, 40(3) - lbz 4, -23(1) - stb 4, 41(3) - lbz 4, -22(1) - stb 4, 42(3) - lbz 4, -21(1) - stb 4, 43(3) - lbz 4, -20(1) - stb 4, 44(3) - lbz 4, -19(1) - stb 4, 45(3) - lbz 4, -18(1) - stb 4, 46(3) - lbz 4, -17(1) - stb 4, 47(3) - lbz 4, -16(1) - stb 4, 48(3) - lbz 4, -15(1) - stb 4, 49(3) - lbz 4, -14(1) - stb 4, 50(3) - lbz 4, -13(1) - stb 4, 51(3) - lbz 4, -12(1) - stb 4, 52(3) - lbz 4, -11(1) - stb 4, 53(3) - lbz 4, -10(1) - stb 4, 54(3) - lbz 4, -9(1) - stb 4, 55(3) - lbz 4, -8(1) - stb 4, 56(3) - lbz 4, -7(1) - stb 4, 57(3) - lbz 4, -6(1) - stb 4, 58(3) - lbz 4, -5(1) - stb 4, 59(3) - lbz 4, -4(1) - stb 4, 60(3) - lbz 4, -3(1) - stb 4, 61(3) - lbz 4, -2(1) - stb 4, 62(3) - lbz 4, -1(1) - stb 4, 63(3) - blr -.LBB2_6: - li 6, 64 -.LBB2_7: - addi 8, 6, -8 - li 9, 7 - cmpldi 8, 7 - sub 7, 7, 3 - bc 12, 0, .LBB2_9 - ori 8, 9, 0 - b .LBB2_9 -.LBB2_9: - not 8, 8 - add 8, 8, 6 - addi 9, 7, -8 - rldicl 8, 8, 61, 3 - add 7, 5, 9 - addi 10, 8, 1 - add 8, 3, 9 - mtctr 10 -.LBB2_10: - ld 9, 8(7) - addi 10, 8, 8 - addi 7, 7, 8 - addi 6, 6, -8 - addi 4, 4, 8 - std 9, 8(8) - mr 8, 10 - bdnz .LBB2_10 -.LBB2_11: - cmpldi 6, 0 - beqlr 0 - addi 7, 4, -1 - add 4, 5, 7 - add 3, 3, 7 - mtctr 6 -.LBB2_13: - lbzu 5, 1(4) - addi 6, 3, 1 - stb 5, 1(3) - mr 3, 6 - bdnz .LBB2_13 + #NO_APP + ld 31, 616(1) + ld 30, 608(1) + ld 29, 600(1) + ld 28, 592(1) + ld 27, 584(1) + ld 26, 576(1) + ld 25, 568(1) + ld 24, 560(1) + ld 23, 552(1) + ld 22, 544(1) + ld 21, 536(1) + ld 20, 528(1) + ld 19, 520(1) + ld 18, 512(1) + ld 17, 504(1) + ld 16, 496(1) + ld 15, 488(1) + ld 14, 480(1) + addi 1, 1, 624 blr asm_test::atomic_memcpy_store_align1::write_volatile_release_fence: .Lfunc_begin3: diff --git a/tests/asm-test/asm/powerpc64-unknown-linux-gnu/atomic_memcpy_store_align16 b/tests/asm-test/asm/powerpc64-unknown-linux-gnu/atomic_memcpy_store_align16 index edf0163..55715ad 100644 --- a/tests/asm-test/asm/powerpc64-unknown-linux-gnu/atomic_memcpy_store_align16 +++ b/tests/asm-test/asm/powerpc64-unknown-linux-gnu/atomic_memcpy_store_align16 @@ -1,22 +1,74 @@ asm_test::atomic_memcpy_store_align16::release: .Lfunc_begin18: - ld 5, 0(4) - ld 6, 8(4) - ld 7, 16(4) - ld 8, 24(4) - ld 9, 32(4) - ld 10, 40(4) - ld 11, 48(4) - ld 4, 56(4) + li 5, 48 + addi 6, 1, -128 + lvx 2, 4, 5 + li 7, 32 + li 8, 16 + stvx 2, 6, 5 + mr 11, 3 + addi 12, 1, -48 + addi 9, 3, 32 + addi 10, 3, 48 + lvx 2, 4, 7 + stvx 2, 6, 7 + lvx 2, 4, 8 + stvx 2, 6, 8 + lvx 2, 0, 4 + addi 4, 1, -64 + stvx 2, 0, 6 lwsync - std 5, 0(3) - std 6, 8(3) - std 7, 16(3) - std 8, 24(3) - std 9, 32(3) - std 10, 40(3) - std 11, 48(3) - std 4, 56(3) + lvx 2, 6, 5 + stvx 2, 4, 5 + addi 5, 3, 8 + lvx 2, 6, 7 + stvx 2, 4, 7 + addi 7, 3, 16 + lvx 2, 6, 8 + stvx 2, 4, 8 + addi 8, 3, 24 + lvx 2, 0, 6 + addi 6, 3, 40 + addi 3, 3, 56 + stvx 2, 0, 4 + #APP + ld 4, 0(4) + std 4, 0(11) + #NO_APP + addi 4, 1, -56 + #APP + ld 4, 0(4) + std 4, 0(5) + #NO_APP + #APP + ld 5, 0(12) + std 5, 0(7) + #NO_APP + addi 4, 1, -32 + addi 5, 1, -40 + #APP + ld 5, 0(5) + std 5, 0(8) + #NO_APP + #APP + ld 4, 0(4) + std 4, 0(9) + #NO_APP + addi 5, 1, -16 + addi 4, 1, -24 + #APP + ld 4, 0(4) + std 4, 0(6) + #NO_APP + #APP + ld 4, 0(5) + std 4, 0(10) + #NO_APP + addi 4, 1, -8 + #APP + ld 4, 0(4) + std 4, 0(3) + #NO_APP blr asm_test::atomic_memcpy_store_align16::write_volatile_release_fence: .Lfunc_begin19: diff --git a/tests/asm-test/asm/powerpc64-unknown-linux-gnu/atomic_memcpy_store_align2 b/tests/asm-test/asm/powerpc64-unknown-linux-gnu/atomic_memcpy_store_align2 index 555d034..0bc66d7 100644 --- a/tests/asm-test/asm/powerpc64-unknown-linux-gnu/atomic_memcpy_store_align2 +++ b/tests/asm-test/asm/powerpc64-unknown-linux-gnu/atomic_memcpy_store_align2 @@ -1,167 +1,283 @@ asm_test::atomic_memcpy_store_align2::release: .Lfunc_begin6: - ld 5, 56(4) - ld 6, 48(4) - std 5, -72(1) - ld 5, 40(4) - std 6, -80(1) - ld 6, 32(4) - std 5, -88(1) - ld 5, 24(4) - std 6, -96(1) - ld 6, 16(4) - std 5, -104(1) - ld 5, 8(4) + stdu 1, -384(1) + ld 6, 56(4) + mr 5, 3 + addi 3, 1, 176 + ld 7, 48(4) + ld 8, 40(4) + ld 9, 32(4) + ld 10, 24(4) + ld 11, 16(4) + ld 12, 8(4) ld 4, 0(4) - std 6, -112(1) - std 5, -120(1) - std 4, -128(1) + std 14, 240(1) + addi 14, 5, 48 + std 15, 248(1) + addi 15, 5, 44 + std 16, 256(1) + addi 16, 5, 42 + std 17, 264(1) + addi 17, 5, 40 + std 18, 272(1) + addi 18, 5, 56 + std 19, 280(1) + addi 19, 5, 36 + std 20, 288(1) + addi 20, 5, 32 + std 21, 296(1) + addi 21, 5, 30 + std 22, 304(1) + std 23, 312(1) + addi 23, 5, 28 + std 24, 320(1) + std 25, 328(1) + std 26, 336(1) + addi 26, 5, 24 + std 27, 344(1) + std 28, 352(1) + addi 28, 5, 20 + std 29, 360(1) + addi 29, 5, 18 + std 30, 368(1) + addi 30, 5, 16 + std 31, 376(1) + addi 31, 5, 52 + std 3, 104(1) + addi 3, 5, 2 + std 6, 168(1) + addi 6, 5, 58 + std 7, 160(1) + addi 7, 5, 54 + std 8, 152(1) + addi 8, 5, 38 + std 9, 144(1) + std 10, 136(1) + std 11, 128(1) + std 12, 120(1) + std 4, 112(1) + addi 4, 5, 60 lwsync - ld 4, -72(1) - ld 6, -88(1) - ld 5, -80(1) - std 4, -8(1) - ld 4, -96(1) - std 6, -24(1) - ld 6, -112(1) - std 5, -16(1) - ld 5, -104(1) - std 4, -32(1) - ld 4, -120(1) - std 6, -48(1) - addi 6, 3, 7 - rldicr 7, 6, 0, 60 - std 5, -40(1) - ld 5, -128(1) - std 4, -56(1) - sub 4, 7, 3 - cmpldi 4, 64 - std 5, -64(1) - bgt 0, .LBB6_5 - cmpldi 4, 0 - addi 5, 1, -64 - beq 0, .LBB6_6 - addi 6, 3, 64 - addi 8, 1, -65 - addi 9, 3, -1 - mtctr 4 -.LBB6_3: - lbzu 10, 1(8) - addi 11, 9, 1 - stb 10, 1(9) - mr 9, 11 - bdnz .LBB6_3 - sub 6, 6, 7 - cmpldi 6, 8 - bge 0, .LBB6_7 - b .LBB6_11 -.LBB6_5: - lhz 4, -64(1) + ld 0, 136(1) + std 3, 96(1) + addi 3, 5, 4 + std 3, 88(1) + addi 3, 5, 6 + ld 12, 168(1) + ld 27, 160(1) + ld 25, 152(1) + ld 24, 144(1) + ld 11, 128(1) + std 3, 80(1) + addi 3, 5, 8 + ld 22, 120(1) + std 0, 200(1) + ld 0, 112(1) + std 3, 72(1) + addi 3, 5, 10 + std 3, 64(1) + addi 3, 5, 12 + std 3, 56(1) + mr 3, 5 + std 12, 232(1) + addi 12, 5, 14 + std 27, 224(1) + addi 27, 5, 22 + std 25, 216(1) + addi 25, 5, 26 + std 24, 208(1) + addi 24, 5, 34 + std 11, 192(1) + addi 11, 5, 46 + std 22, 184(1) + addi 22, 5, 50 + std 0, 176(1) + ld 9, 104(1) + #APP + lhz 9, 0(9) + sth 9, 0(3) + #NO_APP + addi 3, 1, 178 + addi 9, 1, 180 + ld 10, 96(1) + #APP + lhz 3, 0(3) + sth 3, 0(10) + #NO_APP + ld 10, 88(1) + #APP + lhz 3, 0(9) + sth 3, 0(10) + #NO_APP + addi 9, 1, 182 + addi 3, 1, 184 + ld 10, 80(1) + #APP + lhz 9, 0(9) + sth 9, 0(10) + #NO_APP + ld 9, 72(1) + #APP + lhz 3, 0(3) + sth 3, 0(9) + #NO_APP + addi 3, 1, 186 + addi 9, 1, 188 + ld 10, 64(1) + #APP + lhz 3, 0(3) + sth 3, 0(10) + #NO_APP + addi 3, 1, 192 + ld 10, 56(1) + #APP + lhz 9, 0(9) + sth 9, 0(10) + #NO_APP + addi 9, 1, 190 + #APP + lhz 9, 0(9) + sth 9, 0(12) + #NO_APP + #APP + lhz 3, 0(3) + sth 3, 0(30) + #NO_APP + addi 9, 1, 196 + addi 3, 1, 194 + #APP + lhz 3, 0(3) + sth 3, 0(29) + #NO_APP + #APP + lhz 9, 0(9) + sth 9, 0(28) + #NO_APP + addi 3, 1, 200 + addi 9, 1, 198 + #APP + lhz 9, 0(9) + sth 9, 0(27) + #NO_APP + #APP + lhz 3, 0(3) + sth 3, 0(26) + #NO_APP + addi 9, 1, 204 + addi 3, 1, 202 + #APP + lhz 3, 0(3) + sth 3, 0(25) + #NO_APP + #APP + lhz 9, 0(9) + sth 9, 0(23) + #NO_APP + addi 3, 1, 208 + addi 9, 1, 206 + #APP + lhz 9, 0(9) + sth 9, 0(21) + #NO_APP + #APP + lhz 3, 0(3) + sth 3, 0(20) + #NO_APP + addi 9, 1, 212 + addi 3, 1, 210 + #APP + lhz 3, 0(3) + sth 3, 0(24) + #NO_APP + #APP + lhz 9, 0(9) + sth 9, 0(19) + #NO_APP + addi 3, 1, 216 + addi 9, 1, 214 + #APP + lhz 9, 0(9) + sth 9, 0(8) + #NO_APP + #APP + lhz 3, 0(3) + sth 3, 0(17) + #NO_APP + addi 8, 1, 220 + addi 3, 1, 218 + #APP + lhz 3, 0(3) + sth 3, 0(16) + #NO_APP + #APP + lhz 8, 0(8) + sth 8, 0(15) + #NO_APP + addi 3, 1, 224 + addi 8, 1, 222 + #APP + lhz 8, 0(8) + sth 8, 0(11) + #NO_APP + #APP + lhz 3, 0(3) + sth 3, 0(14) + #NO_APP + addi 8, 1, 228 + addi 3, 1, 226 + #APP + lhz 3, 0(3) + sth 3, 0(22) + #NO_APP + #APP + lhz 8, 0(8) + sth 8, 0(31) + #NO_APP + addi 3, 1, 232 + addi 8, 1, 230 + #APP + lhz 8, 0(8) + sth 8, 0(7) + #NO_APP + #APP + lhz 3, 0(3) + sth 3, 0(18) + #NO_APP + addi 7, 1, 236 + addi 3, 1, 234 + #APP + lhz 3, 0(3) + sth 3, 0(6) + #NO_APP + addi 3, 5, 62 + #APP + lhz 5, 0(7) + sth 5, 0(4) + #NO_APP + addi 4, 1, 238 + #APP + lhz 4, 0(4) sth 4, 0(3) - lhz 4, -62(1) - sth 4, 2(3) - lhz 4, -60(1) - sth 4, 4(3) - lhz 4, -58(1) - sth 4, 6(3) - lhz 4, -56(1) - sth 4, 8(3) - lhz 4, -54(1) - sth 4, 10(3) - lhz 4, -52(1) - sth 4, 12(3) - lhz 4, -50(1) - sth 4, 14(3) - lhz 4, -48(1) - sth 4, 16(3) - lhz 4, -46(1) - sth 4, 18(3) - lhz 4, -44(1) - sth 4, 20(3) - lhz 4, -42(1) - sth 4, 22(3) - lhz 4, -40(1) - sth 4, 24(3) - lhz 4, -38(1) - sth 4, 26(3) - lhz 4, -36(1) - sth 4, 28(3) - lhz 4, -34(1) - sth 4, 30(3) - lhz 4, -32(1) - sth 4, 32(3) - lhz 4, -30(1) - sth 4, 34(3) - lhz 4, -28(1) - sth 4, 36(3) - lhz 4, -26(1) - sth 4, 38(3) - lhz 4, -24(1) - sth 4, 40(3) - lhz 4, -22(1) - sth 4, 42(3) - lhz 4, -20(1) - sth 4, 44(3) - lhz 4, -18(1) - sth 4, 46(3) - lhz 4, -16(1) - sth 4, 48(3) - lhz 4, -14(1) - sth 4, 50(3) - lhz 4, -12(1) - sth 4, 52(3) - lhz 4, -10(1) - sth 4, 54(3) - lhz 4, -8(1) - sth 4, 56(3) - lhz 4, -6(1) - sth 4, 58(3) - lhz 4, -4(1) - sth 4, 60(3) - lhz 4, -2(1) - sth 4, 62(3) - blr -.LBB6_6: - li 6, 64 -.LBB6_7: - addi 8, 6, -8 - li 9, 7 - cmpldi 8, 7 - sub 7, 7, 3 - bc 12, 0, .LBB6_9 - ori 8, 9, 0 - b .LBB6_9 -.LBB6_9: - not 8, 8 - add 8, 8, 6 - addi 9, 7, -8 - rldicl 8, 8, 61, 3 - add 7, 5, 9 - addi 10, 8, 1 - add 8, 3, 9 - mtctr 10 -.LBB6_10: - ld 9, 8(7) - addi 10, 8, 8 - addi 7, 7, 8 - addi 6, 6, -8 - addi 4, 4, 8 - std 9, 8(8) - mr 8, 10 - bdnz .LBB6_10 -.LBB6_11: - cmpldi 6, 0 - beqlr 0 - addi 7, 4, -1 - add 4, 5, 7 - add 3, 3, 7 - mtctr 6 -.LBB6_13: - lbzu 5, 1(4) - addi 6, 3, 1 - stb 5, 1(3) - mr 3, 6 - bdnz .LBB6_13 + #NO_APP + ld 31, 376(1) + ld 30, 368(1) + ld 29, 360(1) + ld 28, 352(1) + ld 27, 344(1) + ld 26, 336(1) + ld 25, 328(1) + ld 24, 320(1) + ld 23, 312(1) + ld 22, 304(1) + ld 21, 296(1) + ld 20, 288(1) + ld 19, 280(1) + ld 18, 272(1) + ld 17, 264(1) + ld 16, 256(1) + ld 15, 248(1) + ld 14, 240(1) + addi 1, 1, 384 blr asm_test::atomic_memcpy_store_align2::write_volatile_release_fence: .Lfunc_begin7: diff --git a/tests/asm-test/asm/powerpc64-unknown-linux-gnu/atomic_memcpy_store_align4 b/tests/asm-test/asm/powerpc64-unknown-linux-gnu/atomic_memcpy_store_align4 index 91be7e2..87b3a94 100644 --- a/tests/asm-test/asm/powerpc64-unknown-linux-gnu/atomic_memcpy_store_align4 +++ b/tests/asm-test/asm/powerpc64-unknown-linux-gnu/atomic_memcpy_store_align4 @@ -1,135 +1,150 @@ asm_test::atomic_memcpy_store_align4::release: .Lfunc_begin10: - ld 5, 56(4) - ld 6, 48(4) - std 5, -72(1) - ld 5, 40(4) - std 6, -80(1) - ld 6, 32(4) - std 5, -88(1) - ld 5, 24(4) - std 6, -96(1) - ld 6, 16(4) - std 5, -104(1) - ld 5, 8(4) + ld 6, 56(4) + addi 5, 1, -136 + ld 7, 48(4) + ld 8, 40(4) + ld 9, 32(4) + ld 10, 24(4) + ld 11, 16(4) + ld 12, 8(4) ld 4, 0(4) - std 6, -112(1) - std 5, -120(1) - std 4, -128(1) + std 23, -72(1) + addi 23, 1, -128 + std 24, -64(1) + addi 24, 3, 56 + std 25, -56(1) + addi 25, 3, 48 + std 26, -48(1) + addi 26, 3, 44 + std 27, -40(1) + addi 27, 3, 40 + std 28, -32(1) + addi 28, 3, 32 + std 29, -24(1) + addi 29, 3, 24 + std 30, -16(1) + addi 30, 3, 20 + std 6, -144(1) + std 7, -152(1) + std 8, -160(1) + std 9, -168(1) + std 10, -176(1) + std 11, -184(1) + std 12, -192(1) + std 4, -200(1) lwsync - ld 4, -72(1) - ld 6, -88(1) - ld 5, -80(1) - std 4, -8(1) - ld 4, -96(1) - std 6, -24(1) - ld 6, -112(1) - std 5, -16(1) - ld 5, -104(1) - std 4, -32(1) - ld 4, -120(1) - std 6, -48(1) - addi 6, 3, 7 - rldicr 7, 6, 0, 60 - std 5, -40(1) - ld 5, -128(1) - std 4, -56(1) - sub 4, 7, 3 - cmpldi 4, 64 - std 5, -64(1) - bgt 0, .LBB10_5 - cmpldi 4, 0 - addi 5, 1, -64 - beq 0, .LBB10_6 - addi 6, 3, 64 - addi 8, 1, -65 - addi 9, 3, -1 - mtctr 4 -.LBB10_3: - lbzu 10, 1(8) - addi 11, 9, 1 - stb 10, 1(9) - mr 9, 11 - bdnz .LBB10_3 - sub 6, 6, 7 - cmpldi 6, 8 - bge 0, .LBB10_7 - b .LBB10_11 -.LBB10_5: - lwz 4, -64(1) + ld 6, -144(1) + ld 7, -152(1) + ld 8, -160(1) + ld 9, -168(1) + ld 10, -176(1) + ld 11, -184(1) + ld 12, -192(1) + ld 4, -200(1) + std 6, -80(1) + addi 6, 3, 4 + std 7, -88(1) + addi 7, 3, 8 + std 8, -96(1) + addi 8, 3, 12 + std 9, -104(1) + addi 9, 3, 16 + std 10, -112(1) + addi 10, 3, 28 + std 11, -120(1) + addi 11, 3, 36 + std 12, -128(1) + addi 12, 3, 52 + std 4, -136(1) + mr 4, 3 + addi 3, 3, 60 + #APP + lwz 5, 0(5) + stw 5, 0(4) + #NO_APP + addi 4, 1, -132 + #APP + lwz 4, 0(4) + stw 4, 0(6) + #NO_APP + #APP + lwz 5, 0(23) + stw 5, 0(7) + #NO_APP + addi 4, 1, -120 + addi 5, 1, -124 + #APP + lwz 5, 0(5) + stw 5, 0(8) + #NO_APP + #APP + lwz 4, 0(4) + stw 4, 0(9) + #NO_APP + addi 5, 1, -112 + addi 4, 1, -116 + #APP + lwz 4, 0(4) + stw 4, 0(30) + #NO_APP + #APP + lwz 5, 0(5) + stw 5, 0(29) + #NO_APP + addi 4, 1, -104 + addi 5, 1, -108 + #APP + lwz 5, 0(5) + stw 5, 0(10) + #NO_APP + #APP + lwz 4, 0(4) + stw 4, 0(28) + #NO_APP + addi 5, 1, -96 + addi 4, 1, -100 + #APP + lwz 4, 0(4) + stw 4, 0(11) + #NO_APP + #APP + lwz 5, 0(5) + stw 5, 0(27) + #NO_APP + addi 4, 1, -88 + addi 5, 1, -92 + #APP + lwz 5, 0(5) + stw 5, 0(26) + #NO_APP + #APP + lwz 4, 0(4) + stw 4, 0(25) + #NO_APP + addi 5, 1, -80 + addi 4, 1, -84 + #APP + lwz 4, 0(4) + stw 4, 0(12) + #NO_APP + #APP + lwz 4, 0(5) + stw 4, 0(24) + #NO_APP + addi 4, 1, -76 + #APP + lwz 4, 0(4) stw 4, 0(3) - lwz 4, -60(1) - stw 4, 4(3) - lwz 4, -56(1) - stw 4, 8(3) - lwz 4, -52(1) - stw 4, 12(3) - lwz 4, -48(1) - stw 4, 16(3) - lwz 4, -44(1) - stw 4, 20(3) - lwz 4, -40(1) - stw 4, 24(3) - lwz 4, -36(1) - stw 4, 28(3) - lwz 4, -32(1) - stw 4, 32(3) - lwz 4, -28(1) - stw 4, 36(3) - lwz 4, -24(1) - stw 4, 40(3) - lwz 4, -20(1) - stw 4, 44(3) - lwz 4, -16(1) - stw 4, 48(3) - lwz 4, -12(1) - stw 4, 52(3) - lwz 4, -8(1) - stw 4, 56(3) - lwz 4, -4(1) - stw 4, 60(3) - blr -.LBB10_6: - li 6, 64 -.LBB10_7: - addi 8, 6, -8 - li 9, 7 - cmpldi 8, 7 - sub 7, 7, 3 - bc 12, 0, .LBB10_9 - ori 8, 9, 0 - b .LBB10_9 -.LBB10_9: - not 8, 8 - add 8, 8, 6 - addi 9, 7, -8 - rldicl 8, 8, 61, 3 - add 7, 5, 9 - addi 10, 8, 1 - add 8, 3, 9 - mtctr 10 -.LBB10_10: - ld 9, 8(7) - addi 10, 8, 8 - addi 7, 7, 8 - addi 6, 6, -8 - addi 4, 4, 8 - std 9, 8(8) - mr 8, 10 - bdnz .LBB10_10 -.LBB10_11: - cmpldi 6, 0 - beqlr 0 - addi 7, 4, -1 - add 4, 5, 7 - add 3, 3, 7 - mtctr 6 -.LBB10_13: - lbzu 5, 1(4) - addi 6, 3, 1 - stb 5, 1(3) - mr 3, 6 - bdnz .LBB10_13 + #NO_APP + ld 30, -16(1) + ld 29, -24(1) + ld 28, -32(1) + ld 27, -40(1) + ld 26, -48(1) + ld 25, -56(1) + ld 24, -64(1) + ld 23, -72(1) blr asm_test::atomic_memcpy_store_align4::write_volatile_release_fence: .Lfunc_begin11: diff --git a/tests/asm-test/asm/powerpc64-unknown-linux-gnu/atomic_memcpy_store_align8 b/tests/asm-test/asm/powerpc64-unknown-linux-gnu/atomic_memcpy_store_align8 index 25c235f..13de1e4 100644 --- a/tests/asm-test/asm/powerpc64-unknown-linux-gnu/atomic_memcpy_store_align8 +++ b/tests/asm-test/asm/powerpc64-unknown-linux-gnu/atomic_memcpy_store_align8 @@ -1,22 +1,88 @@ asm_test::atomic_memcpy_store_align8::release: .Lfunc_begin14: - ld 5, 0(4) - ld 6, 8(4) - ld 7, 16(4) - ld 8, 24(4) + ld 6, 56(4) + addi 5, 1, -80 + ld 7, 48(4) + ld 8, 40(4) ld 9, 32(4) - ld 10, 40(4) - ld 11, 48(4) - ld 4, 56(4) + ld 10, 24(4) + ld 11, 16(4) + ld 12, 8(4) + ld 4, 0(4) + std 30, -16(1) + addi 30, 1, -72 + std 6, -88(1) + std 7, -96(1) + std 8, -104(1) + std 9, -112(1) + std 10, -120(1) + std 11, -128(1) + std 12, -136(1) + std 4, -144(1) lwsync - std 5, 0(3) - std 6, 8(3) - std 7, 16(3) - std 8, 24(3) - std 9, 32(3) - std 10, 40(3) - std 11, 48(3) - std 4, 56(3) + ld 4, -88(1) + ld 6, -96(1) + ld 7, -104(1) + ld 8, -112(1) + ld 9, -120(1) + ld 10, -128(1) + ld 11, -136(1) + ld 12, -144(1) + std 4, -24(1) + addi 4, 3, 8 + std 6, -32(1) + addi 6, 3, 16 + std 7, -40(1) + addi 7, 3, 24 + std 8, -48(1) + addi 8, 3, 32 + std 9, -56(1) + addi 9, 3, 40 + std 10, -64(1) + addi 10, 3, 48 + std 11, -72(1) + mr 11, 3 + addi 3, 3, 56 + std 12, -80(1) + addi 12, 1, -64 + #APP + ld 5, 0(5) + std 5, 0(11) + #NO_APP + #APP + ld 5, 0(30) + std 5, 0(4) + #NO_APP + addi 5, 1, -56 + #APP + ld 11, 0(12) + std 11, 0(6) + #NO_APP + addi 4, 1, -48 + #APP + ld 5, 0(5) + std 5, 0(7) + #NO_APP + #APP + ld 4, 0(4) + std 4, 0(8) + #NO_APP + addi 5, 1, -32 + addi 6, 1, -40 + #APP + ld 4, 0(6) + std 4, 0(9) + #NO_APP + addi 4, 1, -24 + #APP + ld 5, 0(5) + std 5, 0(10) + #NO_APP + #APP + ld 4, 0(4) + std 4, 0(3) + #NO_APP + ld 30, -16(1) blr asm_test::atomic_memcpy_store_align8::write_volatile_release_fence: .Lfunc_begin15: diff --git a/tests/asm-test/asm/riscv32i-unknown-none-elf/atomic_memcpy_load_align1 b/tests/asm-test/asm/riscv32i-unknown-none-elf/atomic_memcpy_load_align1 index 176e9d8..fdfce1c 100644 --- a/tests/asm-test/asm/riscv32i-unknown-none-elf/atomic_memcpy_load_align1 +++ b/tests/asm-test/asm/riscv32i-unknown-none-elf/atomic_memcpy_load_align1 @@ -1,575 +1,196 @@ asm_test::atomic_memcpy_load_align1::acquire: sw ra, 44(sp) - addi a4, a1, 3 - andi a3, a4, -4 - sub a2, a3, a1 - li a5, 33 - bgeu a2, a5, .LBB0_5 - beqz a2, .LBB0_6 - addi a4, a1, 32 - sub a7, a1, a3 - addi a5, sp, 8 - mv a6, a1 -.LBB0_3: - #APP - lb t0, 0(a6) - #NO_APP - mv t1, a7 - sb t0, 0(a5) - addi a5, a5, 1 - addi a7, a7, 1 - addi a6, a6, 1 - bgeu a7, t1, .LBB0_3 - sub a3, a4, a3 - li a4, 4 - bgeu a3, a4, .LBB0_7 - j .LBB0_9 -.LBB0_5: - #APP - lb a2, 0(a1) - #NO_APP - sb a2, 8(sp) - addi a2, a1, 1 + addi a2, a1, 31 + addi a3, sp, 39 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 9(sp) - addi a2, a1, 2 + addi a2, a1, 30 + addi a3, sp, 38 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 10(sp) + addi a2, a1, 29 + addi a3, sp, 37 #APP - lb a2, 0(a4) + lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 11(sp) - addi a2, a1, 4 + addi a2, a1, 28 + addi a3, sp, 36 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 12(sp) - addi a2, a1, 5 + addi a2, a1, 27 + addi a3, sp, 35 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 13(sp) - addi a2, a1, 6 + addi a2, a1, 26 + addi a3, sp, 34 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 14(sp) - addi a2, a1, 7 + addi a2, a1, 25 + addi a3, sp, 33 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 15(sp) - addi a2, a1, 8 + addi a2, a1, 24 + addi a3, sp, 32 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 16(sp) - addi a2, a1, 9 + addi a2, a1, 23 + addi a3, sp, 31 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 17(sp) - addi a2, a1, 10 + addi a2, a1, 22 + addi a3, sp, 30 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 18(sp) - addi a2, a1, 11 + addi a2, a1, 21 + addi a3, sp, 29 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 19(sp) - addi a2, a1, 12 + addi a2, a1, 20 + addi a3, sp, 28 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 20(sp) - addi a2, a1, 13 + addi a2, a1, 19 + addi a3, sp, 27 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 21(sp) - addi a2, a1, 14 + addi a2, a1, 18 + addi a3, sp, 26 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 22(sp) - addi a2, a1, 15 + addi a2, a1, 17 + addi a3, sp, 25 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 23(sp) addi a2, a1, 16 + addi a3, sp, 24 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 24(sp) - addi a2, a1, 17 + addi a2, a1, 15 + addi a3, sp, 23 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 25(sp) - addi a2, a1, 18 + addi a2, a1, 14 + addi a3, sp, 22 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 26(sp) - addi a2, a1, 19 + addi a2, a1, 13 + addi a3, sp, 21 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 27(sp) - addi a2, a1, 20 + addi a2, a1, 12 + addi a3, sp, 20 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 28(sp) - addi a2, a1, 21 + addi a2, a1, 11 + addi a3, sp, 19 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 29(sp) - addi a2, a1, 22 + addi a2, a1, 10 + addi a3, sp, 18 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 30(sp) - addi a2, a1, 23 + addi a2, a1, 9 + addi a3, sp, 17 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 31(sp) - addi a2, a1, 24 + addi a2, a1, 8 + addi a3, sp, 16 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 32(sp) - addi a2, a1, 25 + addi a2, a1, 7 + addi a3, sp, 15 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 33(sp) - addi a2, a1, 26 + addi a2, a1, 6 + addi a3, sp, 14 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 34(sp) - addi a2, a1, 27 + addi a2, a1, 5 + addi a3, sp, 13 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 35(sp) - addi a2, a1, 28 + addi a2, a1, 4 + addi a3, sp, 12 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 36(sp) - addi a2, a1, 29 + addi a2, a1, 3 + addi a3, sp, 11 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 37(sp) - addi a2, a1, 30 + addi a2, a1, 2 + addi a3, sp, 10 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 38(sp) - addi a1, a1, 31 - #APP - lb a1, 0(a1) - #NO_APP - sb a1, 39(sp) - j .LBB0_12 -.LBB0_6: - li a3, 32 -.LBB0_7: - addi a4, sp, 8 - li a5, 3 -.LBB0_8: - add a6, a1, a2 - #APP - lw a6, 0(a6) - #NO_APP - add a7, a4, a2 - sb a6, 0(a7) - srli t0, a6, 24 - sb t0, 3(a7) - srli t0, a6, 16 - sb t0, 2(a7) - srli a6, a6, 8 - sb a6, 1(a7) - addi a3, a3, -4 - addi a2, a2, 4 - bltu a5, a3, .LBB0_8 -.LBB0_9: - beqz a3, .LBB0_12 - addi a4, sp, 8 - add a4, a4, a2 - add a1, a1, a2 -.LBB0_11: - #APP - lb a2, 0(a1) - #NO_APP - sb a2, 0(a4) - addi a3, a3, -1 - addi a4, a4, 1 - addi a1, a1, 1 - bnez a3, .LBB0_11 -.LBB0_12: - addi a1, sp, 8 - li a2, 32 - call memcpy@plt - fence r, rw - lw ra, 44(sp) - addi sp, sp, 48 - ret -.LBB2_3: - lbu a7, 0(a4) - mv t0, a6 - #APP - sb a7, 0(a5) - #NO_APP - addi a5, a5, 1 - addi a6, a6, 1 - addi a4, a4, 1 - bgeu a6, t0, .LBB2_3 - sub a2, a3, a2 - li a3, 4 - bgeu a2, a3, .LBB2_7 - j .LBB2_9 -.LBB2_5: - lbu a1, 32(sp) - #APP - sb a1, 0(a0) - #NO_APP - lbu a1, 33(sp) - addi a2, a0, 1 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 34(sp) - addi a2, a0, 2 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 35(sp) - #APP - sb a1, 0(a3) - #NO_APP - lbu a1, 36(sp) - addi a2, a0, 4 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 37(sp) - addi a2, a0, 5 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 38(sp) - addi a2, a0, 6 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 39(sp) - addi a2, a0, 7 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 40(sp) - addi a2, a0, 8 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 41(sp) - addi a2, a0, 9 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 42(sp) - addi a2, a0, 10 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 43(sp) - addi a2, a0, 11 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 44(sp) - addi a2, a0, 12 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 45(sp) - addi a2, a0, 13 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 46(sp) - addi a2, a0, 14 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 47(sp) - addi a2, a0, 15 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 48(sp) - addi a2, a0, 16 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 49(sp) - addi a2, a0, 17 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 50(sp) - addi a2, a0, 18 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 51(sp) - addi a2, a0, 19 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 52(sp) - addi a2, a0, 20 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 53(sp) - addi a2, a0, 21 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 54(sp) - addi a2, a0, 22 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 55(sp) - addi a2, a0, 23 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 56(sp) - addi a2, a0, 24 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 57(sp) - addi a2, a0, 25 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 58(sp) - addi a2, a0, 26 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 59(sp) - addi a2, a0, 27 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 60(sp) - addi a2, a0, 28 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 61(sp) - addi a2, a0, 29 + addi a2, a1, 1 + addi a3, sp, 9 #APP - sb a1, 0(a2) + lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - lbu a1, 62(sp) - addi a2, a0, 30 + addi a2, sp, 8 #APP + lb a1, 0(a1) sb a1, 0(a2) #NO_APP - lbu a1, 63(sp) - addi a0, a0, 31 - #APP - sb a1, 0(a0) - #NO_APP - j .LBB2_12 -.LBB2_6: - li a2, 32 -.LBB2_7: - addi a3, sp, 32 - li a4, 3 -.LBB2_8: - add a5, a3, a1 - lbu a6, 1(a5) - lbu a7, 0(a5) - lbu t0, 3(a5) - lbu a5, 2(a5) - slli a6, a6, 8 - or a6, a6, a7 - slli a7, t0, 8 - or a5, a7, a5 - slli a5, a5, 16 - or a5, a5, a6 - add a6, a0, a1 - #APP - sw a5, 0(a6) - #NO_APP - addi a2, a2, -4 - addi a1, a1, 4 - bltu a4, a2, .LBB2_8 -.LBB2_9: - beqz a2, .LBB2_12 - add a0, a0, a1 - addi a3, sp, 32 - add a1, a3, a1 -.LBB2_11: - lbu a3, 0(a1) - #APP - sb a3, 0(a0) - #NO_APP - addi a2, a2, -1 - addi a0, a0, 1 - addi a1, a1, 1 - bnez a2, .LBB2_11 -.LBB2_12: - addi sp, sp, 64 - ret -.LBB4_3: - #APP - lb t0, 0(a6) - #NO_APP - mv t1, a7 - sb t0, 0(a5) - addi a5, a5, 1 - addi a7, a7, 1 - addi a6, a6, 1 - bgeu a7, t1, .LBB4_3 - sub a3, a3, a4 - li a4, 4 - bltu a3, a4, .LBB4_7 -.LBB4_5: - addi a4, sp, 8 - li a5, 3 -.LBB4_6: - add a6, a1, a2 - #APP - lw a6, 0(a6) - #NO_APP - add a7, a4, a2 - sb a6, 0(a7) - srli t0, a6, 24 - sb t0, 3(a7) - srli t0, a6, 16 - sb t0, 2(a7) - srli a6, a6, 8 - sb a6, 1(a7) - addi a3, a3, -4 - addi a2, a2, 4 - bltu a5, a3, .LBB4_6 -.LBB4_7: - beqz a3, .LBB4_11 - addi a4, sp, 8 - add a4, a4, a2 - add a1, a1, a2 -.LBB4_9: - #APP - lb a2, 0(a1) - #NO_APP - sb a2, 0(a4) - addi a3, a3, -1 - addi a4, a4, 1 - addi a1, a1, 1 - bnez a3, .LBB4_9 - j .LBB4_11 -.LBB4_10: - addi a2, a1, 30 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 38(sp) - addi a2, a1, 28 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 36(sp) - addi a2, a1, 26 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 34(sp) - addi a2, a1, 24 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 32(sp) - addi a2, a1, 22 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 30(sp) - addi a2, a1, 20 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 28(sp) - addi a2, a1, 18 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 26(sp) - addi a2, a1, 16 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 24(sp) - addi a2, a1, 14 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 22(sp) - addi a2, a1, 12 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 20(sp) - addi a2, a1, 10 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 18(sp) - addi a2, a1, 8 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 16(sp) - addi a2, a1, 6 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 14(sp) - addi a2, a1, 4 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 12(sp) - addi a2, a1, 2 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 10(sp) - #APP - lh a1, 0(a1) - #NO_APP - sh a1, 8(sp) -.LBB4_11: addi a1, sp, 8 li a2, 32 call memcpy@plt @@ -577,140 +198,6 @@ asm_test::atomic_memcpy_load_align1::acquire: lw ra, 44(sp) addi sp, sp, 48 ret -.LBB6_3: - lbu a7, 0(a4) - mv t0, a6 - #APP - sb a7, 0(a5) - #NO_APP - addi a5, a5, 1 - addi a6, a6, 1 - addi a4, a4, 1 - bgeu a6, t0, .LBB6_3 - sub a2, a2, a3 - li a3, 4 - bltu a2, a3, .LBB6_7 -.LBB6_5: - addi a3, sp, 32 - li a4, 3 -.LBB6_6: - add a5, a3, a1 - lbu a6, 1(a5) - lbu a7, 0(a5) - lbu t0, 3(a5) - lbu a5, 2(a5) - slli a6, a6, 8 - or a6, a6, a7 - slli a7, t0, 8 - or a5, a7, a5 - slli a5, a5, 16 - or a5, a5, a6 - add a6, a0, a1 - #APP - sw a5, 0(a6) - #NO_APP - addi a2, a2, -4 - addi a1, a1, 4 - bltu a4, a2, .LBB6_6 -.LBB6_7: - beqz a2, .LBB6_10 - add a0, a0, a1 - addi a3, sp, 32 - add a1, a3, a1 -.LBB6_9: - lbu a3, 0(a1) - #APP - sb a3, 0(a0) - #NO_APP - addi a2, a2, -1 - addi a0, a0, 1 - addi a1, a1, 1 - bnez a2, .LBB6_9 -.LBB6_10: - addi sp, sp, 64 - ret -.LBB6_11: - lhu a1, 62(sp) - addi a2, a0, 30 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 60(sp) - addi a2, a0, 28 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 58(sp) - addi a2, a0, 26 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 56(sp) - addi a2, a0, 24 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 54(sp) - addi a2, a0, 22 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 52(sp) - addi a2, a0, 20 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 50(sp) - addi a2, a0, 18 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 48(sp) - addi a2, a0, 16 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 46(sp) - addi a2, a0, 14 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 44(sp) - addi a2, a0, 12 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 42(sp) - addi a2, a0, 10 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 40(sp) - addi a2, a0, 8 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 38(sp) - addi a2, a0, 6 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 36(sp) - addi a2, a0, 4 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 34(sp) - addi a2, a0, 2 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 32(sp) - #APP - sh a1, 0(a0) - #NO_APP - addi sp, sp, 64 - ret asm_test::atomic_memcpy_load_align1::read_volatile_acquire_fence: sw ra, 76(sp) sw s0, 72(sp) @@ -815,487 +302,3 @@ asm_test::atomic_memcpy_load_align1::read_volatile_acquire_fence: lw s11, 28(sp) addi sp, sp, 80 ret -.LBB2_3: - lbu a7, 0(a4) - mv t0, a6 - #APP - sb a7, 0(a5) - #NO_APP - addi a5, a5, 1 - addi a6, a6, 1 - addi a4, a4, 1 - bgeu a6, t0, .LBB2_3 - sub a2, a3, a2 - li a3, 4 - bgeu a2, a3, .LBB2_7 - j .LBB2_9 -.LBB2_5: - lbu a1, 32(sp) - #APP - sb a1, 0(a0) - #NO_APP - lbu a1, 33(sp) - addi a2, a0, 1 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 34(sp) - addi a2, a0, 2 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 35(sp) - #APP - sb a1, 0(a3) - #NO_APP - lbu a1, 36(sp) - addi a2, a0, 4 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 37(sp) - addi a2, a0, 5 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 38(sp) - addi a2, a0, 6 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 39(sp) - addi a2, a0, 7 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 40(sp) - addi a2, a0, 8 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 41(sp) - addi a2, a0, 9 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 42(sp) - addi a2, a0, 10 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 43(sp) - addi a2, a0, 11 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 44(sp) - addi a2, a0, 12 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 45(sp) - addi a2, a0, 13 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 46(sp) - addi a2, a0, 14 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 47(sp) - addi a2, a0, 15 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 48(sp) - addi a2, a0, 16 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 49(sp) - addi a2, a0, 17 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 50(sp) - addi a2, a0, 18 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 51(sp) - addi a2, a0, 19 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 52(sp) - addi a2, a0, 20 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 53(sp) - addi a2, a0, 21 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 54(sp) - addi a2, a0, 22 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 55(sp) - addi a2, a0, 23 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 56(sp) - addi a2, a0, 24 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 57(sp) - addi a2, a0, 25 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 58(sp) - addi a2, a0, 26 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 59(sp) - addi a2, a0, 27 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 60(sp) - addi a2, a0, 28 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 61(sp) - addi a2, a0, 29 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 62(sp) - addi a2, a0, 30 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 63(sp) - addi a0, a0, 31 - #APP - sb a1, 0(a0) - #NO_APP - j .LBB2_12 -.LBB2_6: - li a2, 32 -.LBB2_7: - addi a3, sp, 32 - li a4, 3 -.LBB2_8: - add a5, a3, a1 - lbu a6, 1(a5) - lbu a7, 0(a5) - lbu t0, 3(a5) - lbu a5, 2(a5) - slli a6, a6, 8 - or a6, a6, a7 - slli a7, t0, 8 - or a5, a7, a5 - slli a5, a5, 16 - or a5, a5, a6 - add a6, a0, a1 - #APP - sw a5, 0(a6) - #NO_APP - addi a2, a2, -4 - addi a1, a1, 4 - bltu a4, a2, .LBB2_8 -.LBB2_9: - beqz a2, .LBB2_12 - add a0, a0, a1 - addi a3, sp, 32 - add a1, a3, a1 -.LBB2_11: - lbu a3, 0(a1) - #APP - sb a3, 0(a0) - #NO_APP - addi a2, a2, -1 - addi a0, a0, 1 - addi a1, a1, 1 - bnez a2, .LBB2_11 -.LBB2_12: - addi sp, sp, 64 - ret -.LBB4_3: - #APP - lb t0, 0(a6) - #NO_APP - mv t1, a7 - sb t0, 0(a5) - addi a5, a5, 1 - addi a7, a7, 1 - addi a6, a6, 1 - bgeu a7, t1, .LBB4_3 - sub a3, a3, a4 - li a4, 4 - bltu a3, a4, .LBB4_7 -.LBB4_5: - addi a4, sp, 8 - li a5, 3 -.LBB4_6: - add a6, a1, a2 - #APP - lw a6, 0(a6) - #NO_APP - add a7, a4, a2 - sb a6, 0(a7) - srli t0, a6, 24 - sb t0, 3(a7) - srli t0, a6, 16 - sb t0, 2(a7) - srli a6, a6, 8 - sb a6, 1(a7) - addi a3, a3, -4 - addi a2, a2, 4 - bltu a5, a3, .LBB4_6 -.LBB4_7: - beqz a3, .LBB4_11 - addi a4, sp, 8 - add a4, a4, a2 - add a1, a1, a2 -.LBB4_9: - #APP - lb a2, 0(a1) - #NO_APP - sb a2, 0(a4) - addi a3, a3, -1 - addi a4, a4, 1 - addi a1, a1, 1 - bnez a3, .LBB4_9 - j .LBB4_11 -.LBB4_10: - addi a2, a1, 30 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 38(sp) - addi a2, a1, 28 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 36(sp) - addi a2, a1, 26 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 34(sp) - addi a2, a1, 24 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 32(sp) - addi a2, a1, 22 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 30(sp) - addi a2, a1, 20 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 28(sp) - addi a2, a1, 18 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 26(sp) - addi a2, a1, 16 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 24(sp) - addi a2, a1, 14 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 22(sp) - addi a2, a1, 12 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 20(sp) - addi a2, a1, 10 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 18(sp) - addi a2, a1, 8 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 16(sp) - addi a2, a1, 6 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 14(sp) - addi a2, a1, 4 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 12(sp) - addi a2, a1, 2 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 10(sp) - #APP - lh a1, 0(a1) - #NO_APP - sh a1, 8(sp) -.LBB4_11: - addi a1, sp, 8 - li a2, 32 - call memcpy@plt - fence r, rw - lw ra, 44(sp) - addi sp, sp, 48 - ret -.LBB6_3: - lbu a7, 0(a4) - mv t0, a6 - #APP - sb a7, 0(a5) - #NO_APP - addi a5, a5, 1 - addi a6, a6, 1 - addi a4, a4, 1 - bgeu a6, t0, .LBB6_3 - sub a2, a2, a3 - li a3, 4 - bltu a2, a3, .LBB6_7 -.LBB6_5: - addi a3, sp, 32 - li a4, 3 -.LBB6_6: - add a5, a3, a1 - lbu a6, 1(a5) - lbu a7, 0(a5) - lbu t0, 3(a5) - lbu a5, 2(a5) - slli a6, a6, 8 - or a6, a6, a7 - slli a7, t0, 8 - or a5, a7, a5 - slli a5, a5, 16 - or a5, a5, a6 - add a6, a0, a1 - #APP - sw a5, 0(a6) - #NO_APP - addi a2, a2, -4 - addi a1, a1, 4 - bltu a4, a2, .LBB6_6 -.LBB6_7: - beqz a2, .LBB6_10 - add a0, a0, a1 - addi a3, sp, 32 - add a1, a3, a1 -.LBB6_9: - lbu a3, 0(a1) - #APP - sb a3, 0(a0) - #NO_APP - addi a2, a2, -1 - addi a0, a0, 1 - addi a1, a1, 1 - bnez a2, .LBB6_9 -.LBB6_10: - addi sp, sp, 64 - ret -.LBB6_11: - lhu a1, 62(sp) - addi a2, a0, 30 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 60(sp) - addi a2, a0, 28 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 58(sp) - addi a2, a0, 26 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 56(sp) - addi a2, a0, 24 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 54(sp) - addi a2, a0, 22 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 52(sp) - addi a2, a0, 20 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 50(sp) - addi a2, a0, 18 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 48(sp) - addi a2, a0, 16 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 46(sp) - addi a2, a0, 14 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 44(sp) - addi a2, a0, 12 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 42(sp) - addi a2, a0, 10 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 40(sp) - addi a2, a0, 8 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 38(sp) - addi a2, a0, 6 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 36(sp) - addi a2, a0, 4 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 34(sp) - addi a2, a0, 2 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 32(sp) - #APP - sh a1, 0(a0) - #NO_APP - addi sp, sp, 64 - ret diff --git a/tests/asm-test/asm/riscv32i-unknown-none-elf/atomic_memcpy_load_align16 b/tests/asm-test/asm/riscv32i-unknown-none-elf/atomic_memcpy_load_align16 index 18bc36f..0c3329e 100644 --- a/tests/asm-test/asm/riscv32i-unknown-none-elf/atomic_memcpy_load_align16 +++ b/tests/asm-test/asm/riscv32i-unknown-none-elf/atomic_memcpy_load_align16 @@ -1,43 +1,69 @@ asm_test::atomic_memcpy_load_align16::acquire: + addi a2, a1, 28 + addi a3, sp, 28 #APP lw a2, 0(a2) + sw a2, 0(a3) #NO_APP - addi a3, a1, 24 + addi a2, a1, 24 + addi a3, sp, 24 #APP - lw a3, 0(a3) + lw a2, 0(a2) + sw a2, 0(a3) #NO_APP - addi a4, a1, 20 + addi a2, a1, 20 + addi a3, sp, 20 #APP - lw a4, 0(a4) + lw a2, 0(a2) + sw a2, 0(a3) #NO_APP - addi a5, a1, 16 + addi a2, a1, 16 + addi a3, sp, 16 #APP - lw a5, 0(a5) + lw a2, 0(a2) + sw a2, 0(a3) #NO_APP - addi a6, a1, 12 + addi a2, a1, 12 + addi a3, sp, 12 #APP - lw a6, 0(a6) + lw a2, 0(a2) + sw a2, 0(a3) #NO_APP - addi a7, a1, 8 + addi a2, a1, 8 + addi a3, sp, 8 #APP - lw a7, 0(a7) + lw a2, 0(a2) + sw a2, 0(a3) #NO_APP - addi t0, a1, 4 + addi a2, a1, 4 + addi a3, sp, 4 #APP - lw t0, 0(t0) + lw a2, 0(a2) + sw a2, 0(a3) #NO_APP + mv a2, sp #APP lw a1, 0(a1) + sw a1, 0(a2) #NO_APP + lw a1, 0(sp) + lw a2, 4(sp) + lw a3, 8(sp) + lw a4, 12(sp) sw a1, 0(a0) - sw t0, 4(a0) - sw a7, 8(a0) - sw a6, 12(a0) - sw a5, 16(a0) - sw a4, 20(a0) + sw a2, 4(a0) + sw a3, 8(a0) + sw a4, 12(a0) + lw a1, 16(sp) + lw a2, 20(sp) + lw a3, 24(sp) + lw a4, 28(sp) + sw a1, 16(a0) + sw a2, 20(a0) sw a3, 24(a0) - sw a2, 28(a0) + sw a4, 28(a0) fence r, rw + addi sp, sp, 32 ret asm_test::atomic_memcpy_load_align16::read_volatile_acquire_fence: lw a3, 24(a1) diff --git a/tests/asm-test/asm/riscv32i-unknown-none-elf/atomic_memcpy_load_align2 b/tests/asm-test/asm/riscv32i-unknown-none-elf/atomic_memcpy_load_align2 index 06b47fa..a46dd01 100644 --- a/tests/asm-test/asm/riscv32i-unknown-none-elf/atomic_memcpy_load_align2 +++ b/tests/asm-test/asm/riscv32i-unknown-none-elf/atomic_memcpy_load_align2 @@ -1,143 +1,100 @@ asm_test::atomic_memcpy_load_align2::acquire: sw ra, 44(sp) - addi a2, a1, 3 - andi a4, a2, -4 - sub a2, a4, a1 - li a3, 32 - bltu a3, a2, .LBB4_10 - beqz a2, .LBB4_5 - addi a3, a1, 32 - sub a7, a1, a4 - addi a5, sp, 8 - mv a6, a1 -.LBB4_3: - #APP - lb t0, 0(a6) - #NO_APP - mv t1, a7 - sb t0, 0(a5) - addi a5, a5, 1 - addi a7, a7, 1 - addi a6, a6, 1 - bgeu a7, t1, .LBB4_3 - sub a3, a3, a4 - li a4, 4 - bltu a3, a4, .LBB4_7 -.LBB4_5: - addi a4, sp, 8 - li a5, 3 -.LBB4_6: - add a6, a1, a2 - #APP - lw a6, 0(a6) - #NO_APP - add a7, a4, a2 - sb a6, 0(a7) - srli t0, a6, 24 - sb t0, 3(a7) - srli t0, a6, 16 - sb t0, 2(a7) - srli a6, a6, 8 - sb a6, 1(a7) - addi a3, a3, -4 - addi a2, a2, 4 - bltu a5, a3, .LBB4_6 -.LBB4_7: - beqz a3, .LBB4_11 - addi a4, sp, 8 - add a4, a4, a2 - add a1, a1, a2 -.LBB4_9: - #APP - lb a2, 0(a1) - #NO_APP - sb a2, 0(a4) - addi a3, a3, -1 - addi a4, a4, 1 - addi a1, a1, 1 - bnez a3, .LBB4_9 - j .LBB4_11 -.LBB4_10: addi a2, a1, 30 + addi a3, sp, 38 #APP lh a2, 0(a2) + sh a2, 0(a3) #NO_APP - sh a2, 38(sp) addi a2, a1, 28 + addi a3, sp, 36 #APP lh a2, 0(a2) + sh a2, 0(a3) #NO_APP - sh a2, 36(sp) addi a2, a1, 26 + addi a3, sp, 34 #APP lh a2, 0(a2) + sh a2, 0(a3) #NO_APP - sh a2, 34(sp) addi a2, a1, 24 + addi a3, sp, 32 #APP lh a2, 0(a2) + sh a2, 0(a3) #NO_APP - sh a2, 32(sp) addi a2, a1, 22 + addi a3, sp, 30 #APP lh a2, 0(a2) + sh a2, 0(a3) #NO_APP - sh a2, 30(sp) addi a2, a1, 20 + addi a3, sp, 28 #APP lh a2, 0(a2) + sh a2, 0(a3) #NO_APP - sh a2, 28(sp) addi a2, a1, 18 + addi a3, sp, 26 #APP lh a2, 0(a2) + sh a2, 0(a3) #NO_APP - sh a2, 26(sp) addi a2, a1, 16 + addi a3, sp, 24 #APP lh a2, 0(a2) + sh a2, 0(a3) #NO_APP - sh a2, 24(sp) addi a2, a1, 14 + addi a3, sp, 22 #APP lh a2, 0(a2) + sh a2, 0(a3) #NO_APP - sh a2, 22(sp) addi a2, a1, 12 + addi a3, sp, 20 #APP lh a2, 0(a2) + sh a2, 0(a3) #NO_APP - sh a2, 20(sp) addi a2, a1, 10 + addi a3, sp, 18 #APP lh a2, 0(a2) + sh a2, 0(a3) #NO_APP - sh a2, 18(sp) addi a2, a1, 8 + addi a3, sp, 16 #APP lh a2, 0(a2) + sh a2, 0(a3) #NO_APP - sh a2, 16(sp) addi a2, a1, 6 + addi a3, sp, 14 #APP lh a2, 0(a2) + sh a2, 0(a3) #NO_APP - sh a2, 14(sp) addi a2, a1, 4 + addi a3, sp, 12 #APP lh a2, 0(a2) + sh a2, 0(a3) #NO_APP - sh a2, 12(sp) addi a2, a1, 2 + addi a3, sp, 10 #APP lh a2, 0(a2) + sh a2, 0(a3) #NO_APP - sh a2, 10(sp) + addi a2, sp, 8 #APP lh a1, 0(a1) + sh a1, 0(a2) #NO_APP - sh a1, 8(sp) -.LBB4_11: addi a1, sp, 8 li a2, 32 call memcpy@plt @@ -145,140 +102,6 @@ asm_test::atomic_memcpy_load_align2::acquire: lw ra, 44(sp) addi sp, sp, 48 ret -.LBB6_3: - lbu a7, 0(a4) - mv t0, a6 - #APP - sb a7, 0(a5) - #NO_APP - addi a5, a5, 1 - addi a6, a6, 1 - addi a4, a4, 1 - bgeu a6, t0, .LBB6_3 - sub a2, a2, a3 - li a3, 4 - bltu a2, a3, .LBB6_7 -.LBB6_5: - addi a3, sp, 32 - li a4, 3 -.LBB6_6: - add a5, a3, a1 - lbu a6, 1(a5) - lbu a7, 0(a5) - lbu t0, 3(a5) - lbu a5, 2(a5) - slli a6, a6, 8 - or a6, a6, a7 - slli a7, t0, 8 - or a5, a7, a5 - slli a5, a5, 16 - or a5, a5, a6 - add a6, a0, a1 - #APP - sw a5, 0(a6) - #NO_APP - addi a2, a2, -4 - addi a1, a1, 4 - bltu a4, a2, .LBB6_6 -.LBB6_7: - beqz a2, .LBB6_10 - add a0, a0, a1 - addi a3, sp, 32 - add a1, a3, a1 -.LBB6_9: - lbu a3, 0(a1) - #APP - sb a3, 0(a0) - #NO_APP - addi a2, a2, -1 - addi a0, a0, 1 - addi a1, a1, 1 - bnez a2, .LBB6_9 -.LBB6_10: - addi sp, sp, 64 - ret -.LBB6_11: - lhu a1, 62(sp) - addi a2, a0, 30 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 60(sp) - addi a2, a0, 28 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 58(sp) - addi a2, a0, 26 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 56(sp) - addi a2, a0, 24 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 54(sp) - addi a2, a0, 22 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 52(sp) - addi a2, a0, 20 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 50(sp) - addi a2, a0, 18 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 48(sp) - addi a2, a0, 16 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 46(sp) - addi a2, a0, 14 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 44(sp) - addi a2, a0, 12 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 42(sp) - addi a2, a0, 10 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 40(sp) - addi a2, a0, 8 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 38(sp) - addi a2, a0, 6 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 36(sp) - addi a2, a0, 4 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 34(sp) - addi a2, a0, 2 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 32(sp) - #APP - sh a1, 0(a0) - #NO_APP - addi sp, sp, 64 - ret asm_test::atomic_memcpy_load_align2::read_volatile_acquire_fence: sw s0, 12(sp) sw s1, 8(sp) @@ -319,137 +142,3 @@ asm_test::atomic_memcpy_load_align2::read_volatile_acquire_fence: lw s1, 8(sp) addi sp, sp, 16 ret -.LBB6_3: - lbu a7, 0(a4) - mv t0, a6 - #APP - sb a7, 0(a5) - #NO_APP - addi a5, a5, 1 - addi a6, a6, 1 - addi a4, a4, 1 - bgeu a6, t0, .LBB6_3 - sub a2, a2, a3 - li a3, 4 - bltu a2, a3, .LBB6_7 -.LBB6_5: - addi a3, sp, 32 - li a4, 3 -.LBB6_6: - add a5, a3, a1 - lbu a6, 1(a5) - lbu a7, 0(a5) - lbu t0, 3(a5) - lbu a5, 2(a5) - slli a6, a6, 8 - or a6, a6, a7 - slli a7, t0, 8 - or a5, a7, a5 - slli a5, a5, 16 - or a5, a5, a6 - add a6, a0, a1 - #APP - sw a5, 0(a6) - #NO_APP - addi a2, a2, -4 - addi a1, a1, 4 - bltu a4, a2, .LBB6_6 -.LBB6_7: - beqz a2, .LBB6_10 - add a0, a0, a1 - addi a3, sp, 32 - add a1, a3, a1 -.LBB6_9: - lbu a3, 0(a1) - #APP - sb a3, 0(a0) - #NO_APP - addi a2, a2, -1 - addi a0, a0, 1 - addi a1, a1, 1 - bnez a2, .LBB6_9 -.LBB6_10: - addi sp, sp, 64 - ret -.LBB6_11: - lhu a1, 62(sp) - addi a2, a0, 30 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 60(sp) - addi a2, a0, 28 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 58(sp) - addi a2, a0, 26 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 56(sp) - addi a2, a0, 24 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 54(sp) - addi a2, a0, 22 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 52(sp) - addi a2, a0, 20 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 50(sp) - addi a2, a0, 18 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 48(sp) - addi a2, a0, 16 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 46(sp) - addi a2, a0, 14 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 44(sp) - addi a2, a0, 12 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 42(sp) - addi a2, a0, 10 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 40(sp) - addi a2, a0, 8 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 38(sp) - addi a2, a0, 6 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 36(sp) - addi a2, a0, 4 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 34(sp) - addi a2, a0, 2 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 32(sp) - #APP - sh a1, 0(a0) - #NO_APP - addi sp, sp, 64 - ret diff --git a/tests/asm-test/asm/riscv32i-unknown-none-elf/atomic_memcpy_load_align4 b/tests/asm-test/asm/riscv32i-unknown-none-elf/atomic_memcpy_load_align4 index f7ec9dc..b7aca51 100644 --- a/tests/asm-test/asm/riscv32i-unknown-none-elf/atomic_memcpy_load_align4 +++ b/tests/asm-test/asm/riscv32i-unknown-none-elf/atomic_memcpy_load_align4 @@ -1,43 +1,69 @@ asm_test::atomic_memcpy_load_align4::acquire: + addi a2, a1, 28 + addi a3, sp, 28 #APP lw a2, 0(a2) + sw a2, 0(a3) #NO_APP - addi a3, a1, 24 + addi a2, a1, 24 + addi a3, sp, 24 #APP - lw a3, 0(a3) + lw a2, 0(a2) + sw a2, 0(a3) #NO_APP - addi a4, a1, 20 + addi a2, a1, 20 + addi a3, sp, 20 #APP - lw a4, 0(a4) + lw a2, 0(a2) + sw a2, 0(a3) #NO_APP - addi a5, a1, 16 + addi a2, a1, 16 + addi a3, sp, 16 #APP - lw a5, 0(a5) + lw a2, 0(a2) + sw a2, 0(a3) #NO_APP - addi a6, a1, 12 + addi a2, a1, 12 + addi a3, sp, 12 #APP - lw a6, 0(a6) + lw a2, 0(a2) + sw a2, 0(a3) #NO_APP - addi a7, a1, 8 + addi a2, a1, 8 + addi a3, sp, 8 #APP - lw a7, 0(a7) + lw a2, 0(a2) + sw a2, 0(a3) #NO_APP - addi t0, a1, 4 + addi a2, a1, 4 + addi a3, sp, 4 #APP - lw t0, 0(t0) + lw a2, 0(a2) + sw a2, 0(a3) #NO_APP + mv a2, sp #APP lw a1, 0(a1) + sw a1, 0(a2) #NO_APP + lw a1, 0(sp) + lw a2, 4(sp) + lw a3, 8(sp) + lw a4, 12(sp) sw a1, 0(a0) - sw t0, 4(a0) - sw a7, 8(a0) - sw a6, 12(a0) - sw a5, 16(a0) - sw a4, 20(a0) + sw a2, 4(a0) + sw a3, 8(a0) + sw a4, 12(a0) + lw a1, 16(sp) + lw a2, 20(sp) + lw a3, 24(sp) + lw a4, 28(sp) + sw a1, 16(a0) + sw a2, 20(a0) sw a3, 24(a0) - sw a2, 28(a0) + sw a4, 28(a0) fence r, rw + addi sp, sp, 32 ret asm_test::atomic_memcpy_load_align4::read_volatile_acquire_fence: lw a3, 24(a1) diff --git a/tests/asm-test/asm/riscv32i-unknown-none-elf/atomic_memcpy_load_align8 b/tests/asm-test/asm/riscv32i-unknown-none-elf/atomic_memcpy_load_align8 index 2310280..a6ae3cf 100644 --- a/tests/asm-test/asm/riscv32i-unknown-none-elf/atomic_memcpy_load_align8 +++ b/tests/asm-test/asm/riscv32i-unknown-none-elf/atomic_memcpy_load_align8 @@ -1,43 +1,69 @@ asm_test::atomic_memcpy_load_align8::acquire: + addi a2, a1, 28 + addi a3, sp, 28 #APP lw a2, 0(a2) + sw a2, 0(a3) #NO_APP - addi a3, a1, 24 + addi a2, a1, 24 + addi a3, sp, 24 #APP - lw a3, 0(a3) + lw a2, 0(a2) + sw a2, 0(a3) #NO_APP - addi a4, a1, 20 + addi a2, a1, 20 + addi a3, sp, 20 #APP - lw a4, 0(a4) + lw a2, 0(a2) + sw a2, 0(a3) #NO_APP - addi a5, a1, 16 + addi a2, a1, 16 + addi a3, sp, 16 #APP - lw a5, 0(a5) + lw a2, 0(a2) + sw a2, 0(a3) #NO_APP - addi a6, a1, 12 + addi a2, a1, 12 + addi a3, sp, 12 #APP - lw a6, 0(a6) + lw a2, 0(a2) + sw a2, 0(a3) #NO_APP - addi a7, a1, 8 + addi a2, a1, 8 + addi a3, sp, 8 #APP - lw a7, 0(a7) + lw a2, 0(a2) + sw a2, 0(a3) #NO_APP - addi t0, a1, 4 + addi a2, a1, 4 + addi a3, sp, 4 #APP - lw t0, 0(t0) + lw a2, 0(a2) + sw a2, 0(a3) #NO_APP + mv a2, sp #APP lw a1, 0(a1) + sw a1, 0(a2) #NO_APP + lw a1, 0(sp) + lw a2, 4(sp) + lw a3, 8(sp) + lw a4, 12(sp) sw a1, 0(a0) - sw t0, 4(a0) - sw a7, 8(a0) - sw a6, 12(a0) - sw a5, 16(a0) - sw a4, 20(a0) + sw a2, 4(a0) + sw a3, 8(a0) + sw a4, 12(a0) + lw a1, 16(sp) + lw a2, 20(sp) + lw a3, 24(sp) + lw a4, 28(sp) + sw a1, 16(a0) + sw a2, 20(a0) sw a3, 24(a0) - sw a2, 28(a0) + sw a4, 28(a0) fence r, rw + addi sp, sp, 32 ret asm_test::atomic_memcpy_load_align8::read_volatile_acquire_fence: lw a3, 24(a1) diff --git a/tests/asm-test/asm/riscv32i-unknown-none-elf/atomic_memcpy_store_align1 b/tests/asm-test/asm/riscv32i-unknown-none-elf/atomic_memcpy_store_align1 index 4749d64..9bae64a 100644 --- a/tests/asm-test/asm/riscv32i-unknown-none-elf/atomic_memcpy_store_align1 +++ b/tests/asm-test/asm/riscv32i-unknown-none-elf/atomic_memcpy_store_align1 @@ -91,511 +91,210 @@ asm_test::atomic_memcpy_store_align1::release: lw a1, 28(sp) lw a2, 24(sp) lw a3, 20(sp) + lw a4, 16(sp) sw a1, 60(sp) sw a2, 56(sp) sw a3, 52(sp) - lw a1, 16(sp) - lw a2, 12(sp) - lw a3, 8(sp) - lw a4, 4(sp) - sw a1, 48(sp) - sw a2, 44(sp) - sw a3, 40(sp) - sw a4, 36(sp) + sw a4, 48(sp) + lw a1, 12(sp) + lw a2, 8(sp) + lw a3, 4(sp) lw a4, 0(sp) - addi a3, a0, 3 - andi a2, a3, -4 - sub a1, a2, a0 - li a5, 33 + sw a1, 44(sp) + sw a2, 40(sp) + sw a3, 36(sp) sw a4, 32(sp) - bgeu a1, a5, .LBB2_5 - beqz a1, .LBB2_6 - addi a3, a0, 32 - sub a6, a0, a2 - addi a4, sp, 32 - mv a5, a0 -.LBB2_3: - lbu a7, 0(a4) - mv t0, a6 - #APP - sb a7, 0(a5) - #NO_APP - addi a5, a5, 1 - addi a6, a6, 1 - addi a4, a4, 1 - bgeu a6, t0, .LBB2_3 - sub a2, a3, a2 - li a3, 4 - bgeu a2, a3, .LBB2_7 - j .LBB2_9 -.LBB2_5: - lbu a1, 32(sp) + addi a1, sp, 36 + addi a2, sp, 40 + addi a3, sp, 44 + addi a4, sp, 48 + addi a5, sp, 52 + addi a6, sp, 56 + addi a7, sp, 60 + addi t0, a0, 31 + addi t1, sp, 63 #APP - sb a1, 0(a0) + lb t1, 0(t1) + sb t1, 0(t0) #NO_APP - lbu a1, 33(sp) - addi a2, a0, 1 + addi t0, a0, 30 + addi t1, sp, 62 #APP - sb a1, 0(a2) + lb t1, 0(t1) + sb t1, 0(t0) #NO_APP - lbu a1, 34(sp) - addi a2, a0, 2 + addi t0, a0, 29 + addi t1, sp, 61 #APP - sb a1, 0(a2) + lb t1, 0(t1) + sb t1, 0(t0) #NO_APP - lbu a1, 35(sp) + addi t0, a0, 28 #APP - sb a1, 0(a3) + lb a7, 0(a7) + sb a7, 0(t0) #NO_APP - lbu a1, 36(sp) - addi a2, a0, 4 + addi a7, a0, 27 + addi t0, sp, 59 #APP - sb a1, 0(a2) + lb t0, 0(t0) + sb t0, 0(a7) #NO_APP - lbu a1, 37(sp) - addi a2, a0, 5 + addi a7, a0, 26 + addi t0, sp, 58 #APP - sb a1, 0(a2) + lb t0, 0(t0) + sb t0, 0(a7) #NO_APP - lbu a1, 38(sp) - addi a2, a0, 6 + addi a7, a0, 25 + addi t0, sp, 57 #APP - sb a1, 0(a2) + lb t0, 0(t0) + sb t0, 0(a7) #NO_APP - lbu a1, 39(sp) - addi a2, a0, 7 + addi a7, a0, 24 #APP - sb a1, 0(a2) + lb a6, 0(a6) + sb a6, 0(a7) #NO_APP - lbu a1, 40(sp) - addi a2, a0, 8 + addi a6, a0, 23 + addi a7, sp, 55 #APP - sb a1, 0(a2) + lb a7, 0(a7) + sb a7, 0(a6) #NO_APP - lbu a1, 41(sp) - addi a2, a0, 9 + addi a6, a0, 22 + addi a7, sp, 54 #APP - sb a1, 0(a2) + lb a7, 0(a7) + sb a7, 0(a6) #NO_APP - lbu a1, 42(sp) - addi a2, a0, 10 + addi a6, a0, 21 + addi a7, sp, 53 #APP - sb a1, 0(a2) + lb a7, 0(a7) + sb a7, 0(a6) #NO_APP - lbu a1, 43(sp) - addi a2, a0, 11 + addi a6, a0, 20 #APP - sb a1, 0(a2) + lb a5, 0(a5) + sb a5, 0(a6) #NO_APP - lbu a1, 44(sp) - addi a2, a0, 12 + addi a5, a0, 19 + addi a6, sp, 51 #APP - sb a1, 0(a2) + lb a6, 0(a6) + sb a6, 0(a5) #NO_APP - lbu a1, 45(sp) - addi a2, a0, 13 + addi a5, a0, 18 + addi a6, sp, 50 #APP - sb a1, 0(a2) + lb a6, 0(a6) + sb a6, 0(a5) #NO_APP - lbu a1, 46(sp) - addi a2, a0, 14 + addi a5, a0, 17 + addi a6, sp, 49 #APP - sb a1, 0(a2) + lb a6, 0(a6) + sb a6, 0(a5) #NO_APP - lbu a1, 47(sp) - addi a2, a0, 15 + addi a5, a0, 16 #APP - sb a1, 0(a2) + lb a4, 0(a4) + sb a4, 0(a5) #NO_APP - lbu a1, 48(sp) - addi a2, a0, 16 + addi a4, a0, 15 + addi a5, sp, 47 #APP - sb a1, 0(a2) + lb a5, 0(a5) + sb a5, 0(a4) #NO_APP - lbu a1, 49(sp) - addi a2, a0, 17 + addi a4, a0, 14 + addi a5, sp, 46 #APP - sb a1, 0(a2) + lb a5, 0(a5) + sb a5, 0(a4) #NO_APP - lbu a1, 50(sp) - addi a2, a0, 18 + addi a4, a0, 13 + addi a5, sp, 45 #APP - sb a1, 0(a2) + lb a5, 0(a5) + sb a5, 0(a4) #NO_APP - lbu a1, 51(sp) - addi a2, a0, 19 + addi a4, a0, 12 #APP - sb a1, 0(a2) + lb a3, 0(a3) + sb a3, 0(a4) #NO_APP - lbu a1, 52(sp) - addi a2, a0, 20 + addi a3, a0, 11 + addi a4, sp, 43 #APP - sb a1, 0(a2) + lb a4, 0(a4) + sb a4, 0(a3) #NO_APP - lbu a1, 53(sp) - addi a2, a0, 21 + addi a3, a0, 10 + addi a4, sp, 42 #APP - sb a1, 0(a2) + lb a4, 0(a4) + sb a4, 0(a3) #NO_APP - lbu a1, 54(sp) - addi a2, a0, 22 + addi a3, a0, 9 + addi a4, sp, 41 #APP - sb a1, 0(a2) + lb a4, 0(a4) + sb a4, 0(a3) #NO_APP - lbu a1, 55(sp) - addi a2, a0, 23 + addi a3, a0, 8 #APP - sb a1, 0(a2) + lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - lbu a1, 56(sp) - addi a2, a0, 24 + addi a2, a0, 7 + addi a3, sp, 39 #APP - sb a1, 0(a2) + lb a3, 0(a3) + sb a3, 0(a2) #NO_APP - lbu a1, 57(sp) - addi a2, a0, 25 + addi a2, a0, 6 + addi a3, sp, 38 #APP - sb a1, 0(a2) + lb a3, 0(a3) + sb a3, 0(a2) #NO_APP - lbu a1, 58(sp) - addi a2, a0, 26 + addi a2, a0, 5 + addi a3, sp, 37 #APP - sb a1, 0(a2) + lb a3, 0(a3) + sb a3, 0(a2) #NO_APP - lbu a1, 59(sp) - addi a2, a0, 27 + addi a2, a0, 4 #APP + lb a1, 0(a1) sb a1, 0(a2) #NO_APP - lbu a1, 60(sp) - addi a2, a0, 28 + addi a1, a0, 3 + addi a2, sp, 35 #APP - sb a1, 0(a2) + lb a2, 0(a2) + sb a2, 0(a1) #NO_APP - lbu a1, 61(sp) - addi a2, a0, 29 + addi a1, a0, 2 + addi a2, sp, 34 #APP - sb a1, 0(a2) + lb a2, 0(a2) + sb a2, 0(a1) #NO_APP - lbu a1, 62(sp) - addi a2, a0, 30 + addi a1, a0, 1 + addi a2, sp, 33 #APP - sb a1, 0(a2) + lb a2, 0(a2) + sb a2, 0(a1) #NO_APP - lbu a1, 63(sp) - addi a0, a0, 31 + addi a1, sp, 32 #APP + lb a1, 0(a1) sb a1, 0(a0) #NO_APP - j .LBB2_12 -.LBB2_6: - li a2, 32 -.LBB2_7: - addi a3, sp, 32 - li a4, 3 -.LBB2_8: - add a5, a3, a1 - lbu a6, 1(a5) - lbu a7, 0(a5) - lbu t0, 3(a5) - lbu a5, 2(a5) - slli a6, a6, 8 - or a6, a6, a7 - slli a7, t0, 8 - or a5, a7, a5 - slli a5, a5, 16 - or a5, a5, a6 - add a6, a0, a1 - #APP - sw a5, 0(a6) - #NO_APP - addi a2, a2, -4 - addi a1, a1, 4 - bltu a4, a2, .LBB2_8 -.LBB2_9: - beqz a2, .LBB2_12 - add a0, a0, a1 - addi a3, sp, 32 - add a1, a3, a1 -.LBB2_11: - lbu a3, 0(a1) - #APP - sb a3, 0(a0) - #NO_APP - addi a2, a2, -1 - addi a0, a0, 1 - addi a1, a1, 1 - bnez a2, .LBB2_11 -.LBB2_12: - addi sp, sp, 64 - ret -.LBB4_3: - #APP - lb t0, 0(a6) - #NO_APP - mv t1, a7 - sb t0, 0(a5) - addi a5, a5, 1 - addi a7, a7, 1 - addi a6, a6, 1 - bgeu a7, t1, .LBB4_3 - sub a3, a3, a4 - li a4, 4 - bltu a3, a4, .LBB4_7 -.LBB4_5: - addi a4, sp, 8 - li a5, 3 -.LBB4_6: - add a6, a1, a2 - #APP - lw a6, 0(a6) - #NO_APP - add a7, a4, a2 - sb a6, 0(a7) - srli t0, a6, 24 - sb t0, 3(a7) - srli t0, a6, 16 - sb t0, 2(a7) - srli a6, a6, 8 - sb a6, 1(a7) - addi a3, a3, -4 - addi a2, a2, 4 - bltu a5, a3, .LBB4_6 -.LBB4_7: - beqz a3, .LBB4_11 - addi a4, sp, 8 - add a4, a4, a2 - add a1, a1, a2 -.LBB4_9: - #APP - lb a2, 0(a1) - #NO_APP - sb a2, 0(a4) - addi a3, a3, -1 - addi a4, a4, 1 - addi a1, a1, 1 - bnez a3, .LBB4_9 - j .LBB4_11 -.LBB4_10: - addi a2, a1, 30 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 38(sp) - addi a2, a1, 28 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 36(sp) - addi a2, a1, 26 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 34(sp) - addi a2, a1, 24 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 32(sp) - addi a2, a1, 22 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 30(sp) - addi a2, a1, 20 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 28(sp) - addi a2, a1, 18 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 26(sp) - addi a2, a1, 16 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 24(sp) - addi a2, a1, 14 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 22(sp) - addi a2, a1, 12 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 20(sp) - addi a2, a1, 10 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 18(sp) - addi a2, a1, 8 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 16(sp) - addi a2, a1, 6 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 14(sp) - addi a2, a1, 4 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 12(sp) - addi a2, a1, 2 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 10(sp) - #APP - lh a1, 0(a1) - #NO_APP - sh a1, 8(sp) -.LBB4_11: - addi a1, sp, 8 - li a2, 32 - call memcpy@plt - fence r, rw - lw ra, 44(sp) - addi sp, sp, 48 - ret -.LBB6_3: - lbu a7, 0(a4) - mv t0, a6 - #APP - sb a7, 0(a5) - #NO_APP - addi a5, a5, 1 - addi a6, a6, 1 - addi a4, a4, 1 - bgeu a6, t0, .LBB6_3 - sub a2, a2, a3 - li a3, 4 - bltu a2, a3, .LBB6_7 -.LBB6_5: - addi a3, sp, 32 - li a4, 3 -.LBB6_6: - add a5, a3, a1 - lbu a6, 1(a5) - lbu a7, 0(a5) - lbu t0, 3(a5) - lbu a5, 2(a5) - slli a6, a6, 8 - or a6, a6, a7 - slli a7, t0, 8 - or a5, a7, a5 - slli a5, a5, 16 - or a5, a5, a6 - add a6, a0, a1 - #APP - sw a5, 0(a6) - #NO_APP - addi a2, a2, -4 - addi a1, a1, 4 - bltu a4, a2, .LBB6_6 -.LBB6_7: - beqz a2, .LBB6_10 - add a0, a0, a1 - addi a3, sp, 32 - add a1, a3, a1 -.LBB6_9: - lbu a3, 0(a1) - #APP - sb a3, 0(a0) - #NO_APP - addi a2, a2, -1 - addi a0, a0, 1 - addi a1, a1, 1 - bnez a2, .LBB6_9 -.LBB6_10: - addi sp, sp, 64 - ret -.LBB6_11: - lhu a1, 62(sp) - addi a2, a0, 30 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 60(sp) - addi a2, a0, 28 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 58(sp) - addi a2, a0, 26 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 56(sp) - addi a2, a0, 24 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 54(sp) - addi a2, a0, 22 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 52(sp) - addi a2, a0, 20 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 50(sp) - addi a2, a0, 18 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 48(sp) - addi a2, a0, 16 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 46(sp) - addi a2, a0, 14 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 44(sp) - addi a2, a0, 12 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 42(sp) - addi a2, a0, 10 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 40(sp) - addi a2, a0, 8 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 38(sp) - addi a2, a0, 6 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 36(sp) - addi a2, a0, 4 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 34(sp) - addi a2, a0, 2 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 32(sp) - #APP - sh a1, 0(a0) - #NO_APP addi sp, sp, 64 ret asm_test::atomic_memcpy_store_align1::write_volatile_release_fence: @@ -695,272 +394,3 @@ asm_test::atomic_memcpy_store_align1::write_volatile_release_fence: lw ra, 44(sp) addi sp, sp, 48 ret -.LBB4_3: - #APP - lb t0, 0(a6) - #NO_APP - mv t1, a7 - sb t0, 0(a5) - addi a5, a5, 1 - addi a7, a7, 1 - addi a6, a6, 1 - bgeu a7, t1, .LBB4_3 - sub a3, a3, a4 - li a4, 4 - bltu a3, a4, .LBB4_7 -.LBB4_5: - addi a4, sp, 8 - li a5, 3 -.LBB4_6: - add a6, a1, a2 - #APP - lw a6, 0(a6) - #NO_APP - add a7, a4, a2 - sb a6, 0(a7) - srli t0, a6, 24 - sb t0, 3(a7) - srli t0, a6, 16 - sb t0, 2(a7) - srli a6, a6, 8 - sb a6, 1(a7) - addi a3, a3, -4 - addi a2, a2, 4 - bltu a5, a3, .LBB4_6 -.LBB4_7: - beqz a3, .LBB4_11 - addi a4, sp, 8 - add a4, a4, a2 - add a1, a1, a2 -.LBB4_9: - #APP - lb a2, 0(a1) - #NO_APP - sb a2, 0(a4) - addi a3, a3, -1 - addi a4, a4, 1 - addi a1, a1, 1 - bnez a3, .LBB4_9 - j .LBB4_11 -.LBB4_10: - addi a2, a1, 30 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 38(sp) - addi a2, a1, 28 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 36(sp) - addi a2, a1, 26 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 34(sp) - addi a2, a1, 24 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 32(sp) - addi a2, a1, 22 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 30(sp) - addi a2, a1, 20 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 28(sp) - addi a2, a1, 18 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 26(sp) - addi a2, a1, 16 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 24(sp) - addi a2, a1, 14 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 22(sp) - addi a2, a1, 12 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 20(sp) - addi a2, a1, 10 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 18(sp) - addi a2, a1, 8 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 16(sp) - addi a2, a1, 6 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 14(sp) - addi a2, a1, 4 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 12(sp) - addi a2, a1, 2 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 10(sp) - #APP - lh a1, 0(a1) - #NO_APP - sh a1, 8(sp) -.LBB4_11: - addi a1, sp, 8 - li a2, 32 - call memcpy@plt - fence r, rw - lw ra, 44(sp) - addi sp, sp, 48 - ret -.LBB6_3: - lbu a7, 0(a4) - mv t0, a6 - #APP - sb a7, 0(a5) - #NO_APP - addi a5, a5, 1 - addi a6, a6, 1 - addi a4, a4, 1 - bgeu a6, t0, .LBB6_3 - sub a2, a2, a3 - li a3, 4 - bltu a2, a3, .LBB6_7 -.LBB6_5: - addi a3, sp, 32 - li a4, 3 -.LBB6_6: - add a5, a3, a1 - lbu a6, 1(a5) - lbu a7, 0(a5) - lbu t0, 3(a5) - lbu a5, 2(a5) - slli a6, a6, 8 - or a6, a6, a7 - slli a7, t0, 8 - or a5, a7, a5 - slli a5, a5, 16 - or a5, a5, a6 - add a6, a0, a1 - #APP - sw a5, 0(a6) - #NO_APP - addi a2, a2, -4 - addi a1, a1, 4 - bltu a4, a2, .LBB6_6 -.LBB6_7: - beqz a2, .LBB6_10 - add a0, a0, a1 - addi a3, sp, 32 - add a1, a3, a1 -.LBB6_9: - lbu a3, 0(a1) - #APP - sb a3, 0(a0) - #NO_APP - addi a2, a2, -1 - addi a0, a0, 1 - addi a1, a1, 1 - bnez a2, .LBB6_9 -.LBB6_10: - addi sp, sp, 64 - ret -.LBB6_11: - lhu a1, 62(sp) - addi a2, a0, 30 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 60(sp) - addi a2, a0, 28 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 58(sp) - addi a2, a0, 26 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 56(sp) - addi a2, a0, 24 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 54(sp) - addi a2, a0, 22 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 52(sp) - addi a2, a0, 20 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 50(sp) - addi a2, a0, 18 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 48(sp) - addi a2, a0, 16 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 46(sp) - addi a2, a0, 14 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 44(sp) - addi a2, a0, 12 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 42(sp) - addi a2, a0, 10 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 40(sp) - addi a2, a0, 8 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 38(sp) - addi a2, a0, 6 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 36(sp) - addi a2, a0, 4 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 34(sp) - addi a2, a0, 2 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 32(sp) - #APP - sh a1, 0(a0) - #NO_APP - addi sp, sp, 64 - ret diff --git a/tests/asm-test/asm/riscv32i-unknown-none-elf/atomic_memcpy_store_align16 b/tests/asm-test/asm/riscv32i-unknown-none-elf/atomic_memcpy_store_align16 index f534915..2a69823 100644 --- a/tests/asm-test/asm/riscv32i-unknown-none-elf/atomic_memcpy_store_align16 +++ b/tests/asm-test/asm/riscv32i-unknown-none-elf/atomic_memcpy_store_align16 @@ -1,43 +1,85 @@ asm_test::atomic_memcpy_store_align16::release: - lw a3, 4(a1) - lw a4, 8(a1) - lw a5, 12(a1) - lw a6, 16(a1) - lw a7, 20(a1) - lw t0, 28(a1) - lw a1, 24(a1) + lw a2, 28(a1) + lw a3, 24(a1) + lw a4, 20(a1) + lw a5, 16(a1) + sw a2, 28(sp) + sw a3, 24(sp) + sw a4, 20(sp) + sw a5, 16(sp) + lw a2, 12(a1) + lw a3, 8(a1) + lw a4, 4(a1) + lw a1, 0(a1) + sw a2, 12(sp) + sw a3, 8(sp) + sw a4, 4(sp) + sw a1, 0(sp) fence rw, w - addi t1, a0, 28 + lw a1, 28(sp) + lw a2, 24(sp) + lw a3, 20(sp) + lw a4, 16(sp) + sw a1, 60(sp) + sw a2, 56(sp) + sw a3, 52(sp) + sw a4, 48(sp) + lw a1, 12(sp) + lw a2, 8(sp) + lw a3, 4(sp) + lw a4, 0(sp) + sw a1, 44(sp) + sw a2, 40(sp) + sw a3, 36(sp) + sw a4, 32(sp) + addi a1, sp, 36 + addi a2, sp, 40 + addi a3, sp, 44 + addi a4, sp, 48 + addi a5, sp, 52 + addi a6, sp, 56 + addi a7, sp, 60 + addi t0, a0, 28 #APP - sw t0, 0(t1) + lw a7, 0(a7) + sw a7, 0(t0) #NO_APP - addi t0, a0, 24 + addi a7, a0, 24 #APP - sw a1, 0(t0) + lw a6, 0(a6) + sw a6, 0(a7) #NO_APP - addi a1, a0, 20 + addi a6, a0, 20 #APP - sw a7, 0(a1) + lw a5, 0(a5) + sw a5, 0(a6) #NO_APP - addi a1, a0, 16 + addi a5, a0, 16 #APP - sw a6, 0(a1) + lw a4, 0(a4) + sw a4, 0(a5) #NO_APP - addi a1, a0, 12 + addi a4, a0, 12 #APP - sw a5, 0(a1) + lw a3, 0(a3) + sw a3, 0(a4) #NO_APP - addi a1, a0, 8 + addi a3, a0, 8 #APP - sw a4, 0(a1) + lw a2, 0(a2) + sw a2, 0(a3) #NO_APP - addi a1, a0, 4 + addi a2, a0, 4 #APP - sw a3, 0(a1) + lw a1, 0(a1) + sw a1, 0(a2) #NO_APP + addi a1, sp, 32 #APP - sw a2, 0(a0) + lw a1, 0(a1) + sw a1, 0(a0) #NO_APP + addi sp, sp, 64 ret asm_test::atomic_memcpy_store_align16::write_volatile_release_fence: fence rw, w diff --git a/tests/asm-test/asm/riscv32i-unknown-none-elf/atomic_memcpy_store_align2 b/tests/asm-test/asm/riscv32i-unknown-none-elf/atomic_memcpy_store_align2 index 392de3e..f5c27ed 100644 --- a/tests/asm-test/asm/riscv32i-unknown-none-elf/atomic_memcpy_store_align2 +++ b/tests/asm-test/asm/riscv32i-unknown-none-elf/atomic_memcpy_store_align2 @@ -43,159 +43,112 @@ asm_test::atomic_memcpy_store_align2::release: lw a1, 28(sp) lw a2, 24(sp) lw a3, 20(sp) + lw a4, 16(sp) sw a1, 60(sp) sw a2, 56(sp) sw a3, 52(sp) - lw a1, 16(sp) - lw a2, 12(sp) - lw a3, 8(sp) - lw a4, 4(sp) - sw a1, 48(sp) - sw a2, 44(sp) - sw a3, 40(sp) - sw a4, 36(sp) + sw a4, 48(sp) + lw a1, 12(sp) + lw a2, 8(sp) + lw a3, 4(sp) lw a4, 0(sp) - addi a1, a0, 3 - andi a3, a1, -4 - sub a1, a3, a0 - li a2, 32 + sw a1, 44(sp) + sw a2, 40(sp) + sw a3, 36(sp) sw a4, 32(sp) - bltu a2, a1, .LBB6_11 - beqz a1, .LBB6_5 - addi a2, a0, 32 - sub a6, a0, a3 - addi a4, sp, 32 - mv a5, a0 -.LBB6_3: - lbu a7, 0(a4) - mv t0, a6 - #APP - sb a7, 0(a5) - #NO_APP - addi a5, a5, 1 - addi a6, a6, 1 - addi a4, a4, 1 - bgeu a6, t0, .LBB6_3 - sub a2, a2, a3 - li a3, 4 - bltu a2, a3, .LBB6_7 -.LBB6_5: - addi a3, sp, 32 - li a4, 3 -.LBB6_6: - add a5, a3, a1 - lbu a6, 1(a5) - lbu a7, 0(a5) - lbu t0, 3(a5) - lbu a5, 2(a5) - slli a6, a6, 8 - or a6, a6, a7 - slli a7, t0, 8 - or a5, a7, a5 - slli a5, a5, 16 - or a5, a5, a6 - add a6, a0, a1 - #APP - sw a5, 0(a6) - #NO_APP - addi a2, a2, -4 - addi a1, a1, 4 - bltu a4, a2, .LBB6_6 -.LBB6_7: - beqz a2, .LBB6_10 - add a0, a0, a1 - addi a3, sp, 32 - add a1, a3, a1 -.LBB6_9: - lbu a3, 0(a1) - #APP - sb a3, 0(a0) - #NO_APP - addi a2, a2, -1 - addi a0, a0, 1 - addi a1, a1, 1 - bnez a2, .LBB6_9 -.LBB6_10: - addi sp, sp, 64 - ret -.LBB6_11: - lhu a1, 62(sp) - addi a2, a0, 30 + addi a1, sp, 36 + addi a2, sp, 40 + addi a3, sp, 44 + addi a4, sp, 48 + addi a5, sp, 52 + addi a6, sp, 56 + addi a7, sp, 60 + addi t0, a0, 30 + addi t1, sp, 62 #APP - sh a1, 0(a2) + lh t1, 0(t1) + sh t1, 0(t0) #NO_APP - lhu a1, 60(sp) - addi a2, a0, 28 + addi t0, a0, 28 #APP - sh a1, 0(a2) + lh a7, 0(a7) + sh a7, 0(t0) #NO_APP - lhu a1, 58(sp) - addi a2, a0, 26 + addi a7, a0, 26 + addi t0, sp, 58 #APP - sh a1, 0(a2) + lh t0, 0(t0) + sh t0, 0(a7) #NO_APP - lhu a1, 56(sp) - addi a2, a0, 24 + addi a7, a0, 24 #APP - sh a1, 0(a2) + lh a6, 0(a6) + sh a6, 0(a7) #NO_APP - lhu a1, 54(sp) - addi a2, a0, 22 + addi a6, a0, 22 + addi a7, sp, 54 #APP - sh a1, 0(a2) + lh a7, 0(a7) + sh a7, 0(a6) #NO_APP - lhu a1, 52(sp) - addi a2, a0, 20 + addi a6, a0, 20 #APP - sh a1, 0(a2) + lh a5, 0(a5) + sh a5, 0(a6) #NO_APP - lhu a1, 50(sp) - addi a2, a0, 18 + addi a5, a0, 18 + addi a6, sp, 50 #APP - sh a1, 0(a2) + lh a6, 0(a6) + sh a6, 0(a5) #NO_APP - lhu a1, 48(sp) - addi a2, a0, 16 + addi a5, a0, 16 #APP - sh a1, 0(a2) + lh a4, 0(a4) + sh a4, 0(a5) #NO_APP - lhu a1, 46(sp) - addi a2, a0, 14 + addi a4, a0, 14 + addi a5, sp, 46 #APP - sh a1, 0(a2) + lh a5, 0(a5) + sh a5, 0(a4) #NO_APP - lhu a1, 44(sp) - addi a2, a0, 12 + addi a4, a0, 12 #APP - sh a1, 0(a2) + lh a3, 0(a3) + sh a3, 0(a4) #NO_APP - lhu a1, 42(sp) - addi a2, a0, 10 + addi a3, a0, 10 + addi a4, sp, 42 #APP - sh a1, 0(a2) + lh a4, 0(a4) + sh a4, 0(a3) #NO_APP - lhu a1, 40(sp) - addi a2, a0, 8 + addi a3, a0, 8 #APP - sh a1, 0(a2) + lh a2, 0(a2) + sh a2, 0(a3) #NO_APP - lhu a1, 38(sp) addi a2, a0, 6 + addi a3, sp, 38 #APP - sh a1, 0(a2) + lh a3, 0(a3) + sh a3, 0(a2) #NO_APP - lhu a1, 36(sp) addi a2, a0, 4 #APP + lh a1, 0(a1) sh a1, 0(a2) #NO_APP - lhu a1, 34(sp) - addi a2, a0, 2 + addi a1, a0, 2 + addi a2, sp, 34 #APP - sh a1, 0(a2) + lh a2, 0(a2) + sh a2, 0(a1) #NO_APP - lhu a1, 32(sp) + addi a1, sp, 32 #APP + lh a1, 0(a1) sh a1, 0(a0) #NO_APP addi sp, sp, 64 diff --git a/tests/asm-test/asm/riscv32i-unknown-none-elf/atomic_memcpy_store_align4 b/tests/asm-test/asm/riscv32i-unknown-none-elf/atomic_memcpy_store_align4 index c6260a7..7bab144 100644 --- a/tests/asm-test/asm/riscv32i-unknown-none-elf/atomic_memcpy_store_align4 +++ b/tests/asm-test/asm/riscv32i-unknown-none-elf/atomic_memcpy_store_align4 @@ -1,43 +1,85 @@ asm_test::atomic_memcpy_store_align4::release: - lw a3, 4(a1) - lw a4, 8(a1) - lw a5, 12(a1) - lw a6, 16(a1) - lw a7, 20(a1) - lw t0, 28(a1) - lw a1, 24(a1) + lw a2, 28(a1) + lw a3, 24(a1) + lw a4, 20(a1) + lw a5, 16(a1) + sw a2, 28(sp) + sw a3, 24(sp) + sw a4, 20(sp) + sw a5, 16(sp) + lw a2, 12(a1) + lw a3, 8(a1) + lw a4, 4(a1) + lw a1, 0(a1) + sw a2, 12(sp) + sw a3, 8(sp) + sw a4, 4(sp) + sw a1, 0(sp) fence rw, w - addi t1, a0, 28 + lw a1, 28(sp) + lw a2, 24(sp) + lw a3, 20(sp) + lw a4, 16(sp) + sw a1, 60(sp) + sw a2, 56(sp) + sw a3, 52(sp) + sw a4, 48(sp) + lw a1, 12(sp) + lw a2, 8(sp) + lw a3, 4(sp) + lw a4, 0(sp) + sw a1, 44(sp) + sw a2, 40(sp) + sw a3, 36(sp) + sw a4, 32(sp) + addi a1, sp, 36 + addi a2, sp, 40 + addi a3, sp, 44 + addi a4, sp, 48 + addi a5, sp, 52 + addi a6, sp, 56 + addi a7, sp, 60 + addi t0, a0, 28 #APP - sw t0, 0(t1) + lw a7, 0(a7) + sw a7, 0(t0) #NO_APP - addi t0, a0, 24 + addi a7, a0, 24 #APP - sw a1, 0(t0) + lw a6, 0(a6) + sw a6, 0(a7) #NO_APP - addi a1, a0, 20 + addi a6, a0, 20 #APP - sw a7, 0(a1) + lw a5, 0(a5) + sw a5, 0(a6) #NO_APP - addi a1, a0, 16 + addi a5, a0, 16 #APP - sw a6, 0(a1) + lw a4, 0(a4) + sw a4, 0(a5) #NO_APP - addi a1, a0, 12 + addi a4, a0, 12 #APP - sw a5, 0(a1) + lw a3, 0(a3) + sw a3, 0(a4) #NO_APP - addi a1, a0, 8 + addi a3, a0, 8 #APP - sw a4, 0(a1) + lw a2, 0(a2) + sw a2, 0(a3) #NO_APP - addi a1, a0, 4 + addi a2, a0, 4 #APP - sw a3, 0(a1) + lw a1, 0(a1) + sw a1, 0(a2) #NO_APP + addi a1, sp, 32 #APP - sw a2, 0(a0) + lw a1, 0(a1) + sw a1, 0(a0) #NO_APP + addi sp, sp, 64 ret asm_test::atomic_memcpy_store_align4::write_volatile_release_fence: fence rw, w diff --git a/tests/asm-test/asm/riscv32i-unknown-none-elf/atomic_memcpy_store_align8 b/tests/asm-test/asm/riscv32i-unknown-none-elf/atomic_memcpy_store_align8 index 0fe0144..d03abea 100644 --- a/tests/asm-test/asm/riscv32i-unknown-none-elf/atomic_memcpy_store_align8 +++ b/tests/asm-test/asm/riscv32i-unknown-none-elf/atomic_memcpy_store_align8 @@ -1,43 +1,85 @@ asm_test::atomic_memcpy_store_align8::release: - lw a3, 4(a1) - lw a4, 8(a1) - lw a5, 12(a1) - lw a6, 16(a1) - lw a7, 20(a1) - lw t0, 28(a1) - lw a1, 24(a1) + lw a2, 28(a1) + lw a3, 24(a1) + lw a4, 20(a1) + lw a5, 16(a1) + sw a2, 28(sp) + sw a3, 24(sp) + sw a4, 20(sp) + sw a5, 16(sp) + lw a2, 12(a1) + lw a3, 8(a1) + lw a4, 4(a1) + lw a1, 0(a1) + sw a2, 12(sp) + sw a3, 8(sp) + sw a4, 4(sp) + sw a1, 0(sp) fence rw, w - addi t1, a0, 28 + lw a1, 28(sp) + lw a2, 24(sp) + lw a3, 20(sp) + lw a4, 16(sp) + sw a1, 60(sp) + sw a2, 56(sp) + sw a3, 52(sp) + sw a4, 48(sp) + lw a1, 12(sp) + lw a2, 8(sp) + lw a3, 4(sp) + lw a4, 0(sp) + sw a1, 44(sp) + sw a2, 40(sp) + sw a3, 36(sp) + sw a4, 32(sp) + addi a1, sp, 36 + addi a2, sp, 40 + addi a3, sp, 44 + addi a4, sp, 48 + addi a5, sp, 52 + addi a6, sp, 56 + addi a7, sp, 60 + addi t0, a0, 28 #APP - sw t0, 0(t1) + lw a7, 0(a7) + sw a7, 0(t0) #NO_APP - addi t0, a0, 24 + addi a7, a0, 24 #APP - sw a1, 0(t0) + lw a6, 0(a6) + sw a6, 0(a7) #NO_APP - addi a1, a0, 20 + addi a6, a0, 20 #APP - sw a7, 0(a1) + lw a5, 0(a5) + sw a5, 0(a6) #NO_APP - addi a1, a0, 16 + addi a5, a0, 16 #APP - sw a6, 0(a1) + lw a4, 0(a4) + sw a4, 0(a5) #NO_APP - addi a1, a0, 12 + addi a4, a0, 12 #APP - sw a5, 0(a1) + lw a3, 0(a3) + sw a3, 0(a4) #NO_APP - addi a1, a0, 8 + addi a3, a0, 8 #APP - sw a4, 0(a1) + lw a2, 0(a2) + sw a2, 0(a3) #NO_APP - addi a1, a0, 4 + addi a2, a0, 4 #APP - sw a3, 0(a1) + lw a1, 0(a1) + sw a1, 0(a2) #NO_APP + addi a1, sp, 32 #APP - sw a2, 0(a0) + lw a1, 0(a1) + sw a1, 0(a0) #NO_APP + addi sp, sp, 64 ret asm_test::atomic_memcpy_store_align8::write_volatile_release_fence: fence rw, w diff --git a/tests/asm-test/asm/riscv32imac-unknown-none-elf/atomic_memcpy_load_align1 b/tests/asm-test/asm/riscv32imac-unknown-none-elf/atomic_memcpy_load_align1 index 6bb0960..5872a70 100644 --- a/tests/asm-test/asm/riscv32imac-unknown-none-elf/atomic_memcpy_load_align1 +++ b/tests/asm-test/asm/riscv32imac-unknown-none-elf/atomic_memcpy_load_align1 @@ -1,322 +1,196 @@ asm_test::atomic_memcpy_load_align1::acquire: sw ra, 44(sp) - addi a2, a1, 3 - andi a7, a2, -4 - sub t1, a7, a1 - li a3, 33 - bgeu t1, a3, .LBB0_5 - beqz t1, .LBB0_6 - addi a6, a1, 32 - sub a3, a1, a7 - addi a5, sp, 8 - mv a4, a1 -.LBB0_3: - lb t0, 0(a4) - mv a2, a3 - sb t0, 0(a5) - addi a5, a5, 1 - addi a3, a3, 1 - addi a4, a4, 1 - bgeu a3, a2, .LBB0_3 - sub a3, a6, a7 - li a4, 4 - bgeu a3, a4, .LBB0_7 - j .LBB0_9 -.LBB0_5: - lb a2, 0(a1) - sb a2, 8(sp) - lb a2, 1(a1) - sb a2, 9(sp) - lb a2, 2(a1) - sb a2, 10(sp) - lb a2, 3(a1) - sb a2, 11(sp) - lb a2, 4(a1) - sb a2, 12(sp) - lb a2, 5(a1) - sb a2, 13(sp) - lb a2, 6(a1) - sb a2, 14(sp) - lb a2, 7(a1) - sb a2, 15(sp) - lb a2, 8(a1) - sb a2, 16(sp) - lb a2, 9(a1) - sb a2, 17(sp) - lb a2, 10(a1) - sb a2, 18(sp) - lb a2, 11(a1) - sb a2, 19(sp) - lb a2, 12(a1) - sb a2, 20(sp) - lb a2, 13(a1) - sb a2, 21(sp) - lb a2, 14(a1) - sb a2, 22(sp) - lb a2, 15(a1) - sb a2, 23(sp) - lb a2, 16(a1) - sb a2, 24(sp) - lb a2, 17(a1) - sb a2, 25(sp) - lb a2, 18(a1) - sb a2, 26(sp) - lb a2, 19(a1) - sb a2, 27(sp) - lb a2, 20(a1) - sb a2, 28(sp) - lb a2, 21(a1) - sb a2, 29(sp) - lb a2, 22(a1) - sb a2, 30(sp) - lb a2, 23(a1) - sb a2, 31(sp) - lb a2, 24(a1) - sb a2, 32(sp) - lb a2, 25(a1) - sb a2, 33(sp) - lb a2, 26(a1) - sb a2, 34(sp) - lb a2, 27(a1) - sb a2, 35(sp) - lb a2, 28(a1) - sb a2, 36(sp) - lb a2, 29(a1) - sb a2, 37(sp) - lb a2, 30(a1) - sb a2, 38(sp) - lb a1, 31(a1) - sb a1, 39(sp) - j .LBB0_12 -.LBB0_6: - li a3, 32 -.LBB0_7: - addi a6, sp, 8 - li a7, 3 -.LBB0_8: - add a2, a1, t1 - lw a2, 0(a2) - add a4, a6, t1 - sb a2, 0(a4) - srli a5, a2, 24 - sb a5, 3(a4) - srli a5, a2, 16 - sb a5, 2(a4) - srli a2, a2, 8 - sb a2, 1(a4) - addi a3, a3, -4 - addi t1, t1, 4 - bltu a7, a3, .LBB0_8 -.LBB0_9: - beqz a3, .LBB0_12 - addi a2, sp, 8 - add a4, a2, t1 - add a1, a1, t1 -.LBB0_11: - lb a2, 0(a1) - sb a2, 0(a4) - addi a3, a3, -1 - addi a4, a4, 1 - addi a1, a1, 1 - bnez a3, .LBB0_11 -.LBB0_12: - addi a1, sp, 8 - li a2, 32 - call memcpy@plt - fence r, rw - lw ra, 44(sp) - addi sp, sp, 48 - ret -.LBB2_3: - lb t0, 0(a4) - mv a2, a5 - sb t0, 0(a3) - addi a3, a3, 1 - addi a5, a5, 1 - addi a4, a4, 1 - bgeu a5, a2, .LBB2_3 - sub a2, a6, a7 - li a3, 4 - bgeu a2, a3, .LBB2_7 - j .LBB2_9 -.LBB2_5: - lb a1, 32(sp) - sb a1, 0(a0) - lb a1, 33(sp) - sb a1, 1(a0) - lb a1, 34(sp) - sb a1, 2(a0) - lb a1, 35(sp) - sb a1, 3(a0) - lb a1, 36(sp) - sb a1, 4(a0) - lb a1, 37(sp) - sb a1, 5(a0) - lb a1, 38(sp) - sb a1, 6(a0) - lb a1, 39(sp) - sb a1, 7(a0) - lb a1, 40(sp) - sb a1, 8(a0) - lb a1, 41(sp) - sb a1, 9(a0) - lb a1, 42(sp) - sb a1, 10(a0) - lb a1, 43(sp) - sb a1, 11(a0) - lb a1, 44(sp) - sb a1, 12(a0) - lb a1, 45(sp) - sb a1, 13(a0) - lb a1, 46(sp) - sb a1, 14(a0) - lb a1, 47(sp) - sb a1, 15(a0) - lb a1, 48(sp) - sb a1, 16(a0) - lb a1, 49(sp) - sb a1, 17(a0) - lb a1, 50(sp) - sb a1, 18(a0) - lb a1, 51(sp) - sb a1, 19(a0) - lb a1, 52(sp) - sb a1, 20(a0) - lb a1, 53(sp) - sb a1, 21(a0) - lb a1, 54(sp) - sb a1, 22(a0) - lb a1, 55(sp) - sb a1, 23(a0) - lb a1, 56(sp) - sb a1, 24(a0) - lb a1, 57(sp) - sb a1, 25(a0) - lb a1, 58(sp) - sb a1, 26(a0) - lb a1, 59(sp) - sb a1, 27(a0) - lb a1, 60(sp) - sb a1, 28(a0) - lb a1, 61(sp) - sb a1, 29(a0) - lb a1, 62(sp) - sb a1, 30(a0) - lb a1, 63(sp) - sb a1, 31(a0) - j .LBB2_12 -.LBB2_6: - li a2, 32 -.LBB2_7: - addi a6, sp, 32 - li a7, 3 -.LBB2_8: - add a5, a6, a1 - lbu t0, 1(a5) - lbu t1, 0(a5) - lbu a3, 3(a5) - lbu a5, 2(a5) - slli a4, t0, 8 - or a4, a4, t1 - slli a3, a3, 8 - or a3, a3, a5 - slli a3, a3, 16 - or a3, a3, a4 - add a4, a0, a1 - sw a3, 0(a4) - addi a2, a2, -4 - addi a1, a1, 4 - bltu a7, a2, .LBB2_8 -.LBB2_9: - beqz a2, .LBB2_12 - add a0, a0, a1 + addi a2, a1, 31 + addi a3, sp, 39 + #APP + lb a2, 0(a2) + sb a2, 0(a3) + #NO_APP + addi a2, a1, 30 + addi a3, sp, 38 + #APP + lb a2, 0(a2) + sb a2, 0(a3) + #NO_APP + addi a2, a1, 29 + addi a3, sp, 37 + #APP + lb a2, 0(a2) + sb a2, 0(a3) + #NO_APP + addi a2, a1, 28 + addi a3, sp, 36 + #APP + lb a2, 0(a2) + sb a2, 0(a3) + #NO_APP + addi a2, a1, 27 + addi a3, sp, 35 + #APP + lb a2, 0(a2) + sb a2, 0(a3) + #NO_APP + addi a2, a1, 26 + addi a3, sp, 34 + #APP + lb a2, 0(a2) + sb a2, 0(a3) + #NO_APP + addi a2, a1, 25 + addi a3, sp, 33 + #APP + lb a2, 0(a2) + sb a2, 0(a3) + #NO_APP + addi a2, a1, 24 addi a3, sp, 32 - add a1, a1, a3 -.LBB2_11: - lb a3, 0(a1) - sb a3, 0(a0) - addi a2, a2, -1 - addi a0, a0, 1 - addi a1, a1, 1 - bnez a2, .LBB2_11 -.LBB2_12: - addi sp, sp, 64 - ret -.LBB4_3: - lb t0, 0(a3) - mv a2, a4 - sb t0, 0(a5) - addi a5, a5, 1 - addi a4, a4, 1 - addi a3, a3, 1 - bgeu a4, a2, .LBB4_3 - sub a3, a6, a7 - li a4, 4 - bltu a3, a4, .LBB4_7 -.LBB4_5: - addi a6, sp, 8 - li a7, 3 -.LBB4_6: - add a2, a1, t1 - lw a2, 0(a2) - add a4, a6, t1 - sb a2, 0(a4) - srli a5, a2, 24 - sb a5, 3(a4) - srli a5, a2, 16 - sb a5, 2(a4) - srli a2, a2, 8 - sb a2, 1(a4) - addi a3, a3, -4 - addi t1, t1, 4 - bltu a7, a3, .LBB4_6 -.LBB4_7: - beqz a3, .LBB4_11 + #APP + lb a2, 0(a2) + sb a2, 0(a3) + #NO_APP + addi a2, a1, 23 + addi a3, sp, 31 + #APP + lb a2, 0(a2) + sb a2, 0(a3) + #NO_APP + addi a2, a1, 22 + addi a3, sp, 30 + #APP + lb a2, 0(a2) + sb a2, 0(a3) + #NO_APP + addi a2, a1, 21 + addi a3, sp, 29 + #APP + lb a2, 0(a2) + sb a2, 0(a3) + #NO_APP + addi a2, a1, 20 + addi a3, sp, 28 + #APP + lb a2, 0(a2) + sb a2, 0(a3) + #NO_APP + addi a2, a1, 19 + addi a3, sp, 27 + #APP + lb a2, 0(a2) + sb a2, 0(a3) + #NO_APP + addi a2, a1, 18 + addi a3, sp, 26 + #APP + lb a2, 0(a2) + sb a2, 0(a3) + #NO_APP + addi a2, a1, 17 + addi a3, sp, 25 + #APP + lb a2, 0(a2) + sb a2, 0(a3) + #NO_APP + addi a2, a1, 16 + addi a3, sp, 24 + #APP + lb a2, 0(a2) + sb a2, 0(a3) + #NO_APP + addi a2, a1, 15 + addi a3, sp, 23 + #APP + lb a2, 0(a2) + sb a2, 0(a3) + #NO_APP + addi a2, a1, 14 + addi a3, sp, 22 + #APP + lb a2, 0(a2) + sb a2, 0(a3) + #NO_APP + addi a2, a1, 13 + addi a3, sp, 21 + #APP + lb a2, 0(a2) + sb a2, 0(a3) + #NO_APP + addi a2, a1, 12 + addi a3, sp, 20 + #APP + lb a2, 0(a2) + sb a2, 0(a3) + #NO_APP + addi a2, a1, 11 + addi a3, sp, 19 + #APP + lb a2, 0(a2) + sb a2, 0(a3) + #NO_APP + addi a2, a1, 10 + addi a3, sp, 18 + #APP + lb a2, 0(a2) + sb a2, 0(a3) + #NO_APP + addi a2, a1, 9 + addi a3, sp, 17 + #APP + lb a2, 0(a2) + sb a2, 0(a3) + #NO_APP + addi a2, a1, 8 + addi a3, sp, 16 + #APP + lb a2, 0(a2) + sb a2, 0(a3) + #NO_APP + addi a2, a1, 7 + addi a3, sp, 15 + #APP + lb a2, 0(a2) + sb a2, 0(a3) + #NO_APP + addi a2, a1, 6 + addi a3, sp, 14 + #APP + lb a2, 0(a2) + sb a2, 0(a3) + #NO_APP + addi a2, a1, 5 + addi a3, sp, 13 + #APP + lb a2, 0(a2) + sb a2, 0(a3) + #NO_APP + addi a2, a1, 4 + addi a3, sp, 12 + #APP + lb a2, 0(a2) + sb a2, 0(a3) + #NO_APP + addi a2, a1, 3 + addi a3, sp, 11 + #APP + lb a2, 0(a2) + sb a2, 0(a3) + #NO_APP + addi a2, a1, 2 + addi a3, sp, 10 + #APP + lb a2, 0(a2) + sb a2, 0(a3) + #NO_APP + addi a2, a1, 1 + addi a3, sp, 9 + #APP + lb a2, 0(a2) + sb a2, 0(a3) + #NO_APP addi a2, sp, 8 - add a4, a2, t1 - add a1, a1, t1 -.LBB4_9: - lb a2, 0(a1) - sb a2, 0(a4) - addi a3, a3, -1 - addi a4, a4, 1 - addi a1, a1, 1 - bnez a3, .LBB4_9 - j .LBB4_11 -.LBB4_10: - lh a2, 30(a1) - sh a2, 38(sp) - lh a2, 28(a1) - sh a2, 36(sp) - lh a2, 26(a1) - sh a2, 34(sp) - lh a2, 24(a1) - sh a2, 32(sp) - lh a2, 22(a1) - sh a2, 30(sp) - lh a2, 20(a1) - sh a2, 28(sp) - lh a2, 18(a1) - sh a2, 26(sp) - lh a2, 16(a1) - sh a2, 24(sp) - lh a2, 14(a1) - sh a2, 22(sp) - lh a2, 12(a1) - sh a2, 20(sp) - lh a2, 10(a1) - sh a2, 18(sp) - lh a2, 8(a1) - sh a2, 16(sp) - lh a2, 6(a1) - sh a2, 14(sp) - lh a2, 4(a1) - sh a2, 12(sp) - lh a2, 2(a1) - sh a2, 10(sp) - lh a1, 0(a1) - sh a1, 8(sp) -.LBB4_11: + #APP + lb a1, 0(a1) + sb a1, 0(a2) + #NO_APP addi a1, sp, 8 li a2, 32 call memcpy@plt @@ -324,87 +198,6 @@ asm_test::atomic_memcpy_load_align1::acquire: lw ra, 44(sp) addi sp, sp, 48 ret -.LBB6_3: - lb t0, 0(a4) - mv a3, a5 - sb t0, 0(a2) - addi a2, a2, 1 - addi a5, a5, 1 - addi a4, a4, 1 - bgeu a5, a3, .LBB6_3 - sub a2, a6, a7 - li a3, 4 - bltu a2, a3, .LBB6_7 -.LBB6_5: - addi a6, sp, 32 - li a7, 3 -.LBB6_6: - add a5, a6, a1 - lbu t0, 1(a5) - lbu t1, 0(a5) - lbu a3, 3(a5) - lbu a5, 2(a5) - slli a4, t0, 8 - or a4, a4, t1 - slli a3, a3, 8 - or a3, a3, a5 - slli a3, a3, 16 - or a3, a3, a4 - add a4, a0, a1 - sw a3, 0(a4) - addi a2, a2, -4 - addi a1, a1, 4 - bltu a7, a2, .LBB6_6 -.LBB6_7: - beqz a2, .LBB6_10 - add a0, a0, a1 - addi a3, sp, 32 - add a1, a1, a3 -.LBB6_9: - lb a3, 0(a1) - sb a3, 0(a0) - addi a2, a2, -1 - addi a0, a0, 1 - addi a1, a1, 1 - bnez a2, .LBB6_9 -.LBB6_10: - addi sp, sp, 64 - ret -.LBB6_11: - lh a1, 62(sp) - sh a1, 30(a0) - lh a1, 60(sp) - sh a1, 28(a0) - lh a1, 58(sp) - sh a1, 26(a0) - lh a1, 56(sp) - sh a1, 24(a0) - lh a1, 54(sp) - sh a1, 22(a0) - lh a1, 52(sp) - sh a1, 20(a0) - lh a1, 50(sp) - sh a1, 18(a0) - lh a1, 48(sp) - sh a1, 16(a0) - lh a1, 46(sp) - sh a1, 14(a0) - lh a1, 44(sp) - sh a1, 12(a0) - lh a1, 42(sp) - sh a1, 10(a0) - lh a1, 40(sp) - sh a1, 8(a0) - lh a1, 38(sp) - sh a1, 6(a0) - lh a1, 36(sp) - sh a1, 4(a0) - lh a1, 34(sp) - sh a1, 2(a0) - lh a1, 32(sp) - sh a1, 0(a0) - addi sp, sp, 64 - ret asm_test::atomic_memcpy_load_align1::read_volatile_acquire_fence: sw ra, 76(sp) sw s0, 72(sp) @@ -509,281 +302,3 @@ asm_test::atomic_memcpy_load_align1::read_volatile_acquire_fence: lw s11, 28(sp) addi sp, sp, 80 ret -.LBB2_3: - lb t0, 0(a4) - mv a2, a5 - sb t0, 0(a3) - addi a3, a3, 1 - addi a5, a5, 1 - addi a4, a4, 1 - bgeu a5, a2, .LBB2_3 - sub a2, a6, a7 - li a3, 4 - bgeu a2, a3, .LBB2_7 - j .LBB2_9 -.LBB2_5: - lb a1, 32(sp) - sb a1, 0(a0) - lb a1, 33(sp) - sb a1, 1(a0) - lb a1, 34(sp) - sb a1, 2(a0) - lb a1, 35(sp) - sb a1, 3(a0) - lb a1, 36(sp) - sb a1, 4(a0) - lb a1, 37(sp) - sb a1, 5(a0) - lb a1, 38(sp) - sb a1, 6(a0) - lb a1, 39(sp) - sb a1, 7(a0) - lb a1, 40(sp) - sb a1, 8(a0) - lb a1, 41(sp) - sb a1, 9(a0) - lb a1, 42(sp) - sb a1, 10(a0) - lb a1, 43(sp) - sb a1, 11(a0) - lb a1, 44(sp) - sb a1, 12(a0) - lb a1, 45(sp) - sb a1, 13(a0) - lb a1, 46(sp) - sb a1, 14(a0) - lb a1, 47(sp) - sb a1, 15(a0) - lb a1, 48(sp) - sb a1, 16(a0) - lb a1, 49(sp) - sb a1, 17(a0) - lb a1, 50(sp) - sb a1, 18(a0) - lb a1, 51(sp) - sb a1, 19(a0) - lb a1, 52(sp) - sb a1, 20(a0) - lb a1, 53(sp) - sb a1, 21(a0) - lb a1, 54(sp) - sb a1, 22(a0) - lb a1, 55(sp) - sb a1, 23(a0) - lb a1, 56(sp) - sb a1, 24(a0) - lb a1, 57(sp) - sb a1, 25(a0) - lb a1, 58(sp) - sb a1, 26(a0) - lb a1, 59(sp) - sb a1, 27(a0) - lb a1, 60(sp) - sb a1, 28(a0) - lb a1, 61(sp) - sb a1, 29(a0) - lb a1, 62(sp) - sb a1, 30(a0) - lb a1, 63(sp) - sb a1, 31(a0) - j .LBB2_12 -.LBB2_6: - li a2, 32 -.LBB2_7: - addi a6, sp, 32 - li a7, 3 -.LBB2_8: - add a5, a6, a1 - lbu t0, 1(a5) - lbu t1, 0(a5) - lbu a3, 3(a5) - lbu a5, 2(a5) - slli a4, t0, 8 - or a4, a4, t1 - slli a3, a3, 8 - or a3, a3, a5 - slli a3, a3, 16 - or a3, a3, a4 - add a4, a0, a1 - sw a3, 0(a4) - addi a2, a2, -4 - addi a1, a1, 4 - bltu a7, a2, .LBB2_8 -.LBB2_9: - beqz a2, .LBB2_12 - add a0, a0, a1 - addi a3, sp, 32 - add a1, a1, a3 -.LBB2_11: - lb a3, 0(a1) - sb a3, 0(a0) - addi a2, a2, -1 - addi a0, a0, 1 - addi a1, a1, 1 - bnez a2, .LBB2_11 -.LBB2_12: - addi sp, sp, 64 - ret -.LBB4_3: - lb t0, 0(a3) - mv a2, a4 - sb t0, 0(a5) - addi a5, a5, 1 - addi a4, a4, 1 - addi a3, a3, 1 - bgeu a4, a2, .LBB4_3 - sub a3, a6, a7 - li a4, 4 - bltu a3, a4, .LBB4_7 -.LBB4_5: - addi a6, sp, 8 - li a7, 3 -.LBB4_6: - add a2, a1, t1 - lw a2, 0(a2) - add a4, a6, t1 - sb a2, 0(a4) - srli a5, a2, 24 - sb a5, 3(a4) - srli a5, a2, 16 - sb a5, 2(a4) - srli a2, a2, 8 - sb a2, 1(a4) - addi a3, a3, -4 - addi t1, t1, 4 - bltu a7, a3, .LBB4_6 -.LBB4_7: - beqz a3, .LBB4_11 - addi a2, sp, 8 - add a4, a2, t1 - add a1, a1, t1 -.LBB4_9: - lb a2, 0(a1) - sb a2, 0(a4) - addi a3, a3, -1 - addi a4, a4, 1 - addi a1, a1, 1 - bnez a3, .LBB4_9 - j .LBB4_11 -.LBB4_10: - lh a2, 30(a1) - sh a2, 38(sp) - lh a2, 28(a1) - sh a2, 36(sp) - lh a2, 26(a1) - sh a2, 34(sp) - lh a2, 24(a1) - sh a2, 32(sp) - lh a2, 22(a1) - sh a2, 30(sp) - lh a2, 20(a1) - sh a2, 28(sp) - lh a2, 18(a1) - sh a2, 26(sp) - lh a2, 16(a1) - sh a2, 24(sp) - lh a2, 14(a1) - sh a2, 22(sp) - lh a2, 12(a1) - sh a2, 20(sp) - lh a2, 10(a1) - sh a2, 18(sp) - lh a2, 8(a1) - sh a2, 16(sp) - lh a2, 6(a1) - sh a2, 14(sp) - lh a2, 4(a1) - sh a2, 12(sp) - lh a2, 2(a1) - sh a2, 10(sp) - lh a1, 0(a1) - sh a1, 8(sp) -.LBB4_11: - addi a1, sp, 8 - li a2, 32 - call memcpy@plt - fence r, rw - lw ra, 44(sp) - addi sp, sp, 48 - ret -.LBB6_3: - lb t0, 0(a4) - mv a3, a5 - sb t0, 0(a2) - addi a2, a2, 1 - addi a5, a5, 1 - addi a4, a4, 1 - bgeu a5, a3, .LBB6_3 - sub a2, a6, a7 - li a3, 4 - bltu a2, a3, .LBB6_7 -.LBB6_5: - addi a6, sp, 32 - li a7, 3 -.LBB6_6: - add a5, a6, a1 - lbu t0, 1(a5) - lbu t1, 0(a5) - lbu a3, 3(a5) - lbu a5, 2(a5) - slli a4, t0, 8 - or a4, a4, t1 - slli a3, a3, 8 - or a3, a3, a5 - slli a3, a3, 16 - or a3, a3, a4 - add a4, a0, a1 - sw a3, 0(a4) - addi a2, a2, -4 - addi a1, a1, 4 - bltu a7, a2, .LBB6_6 -.LBB6_7: - beqz a2, .LBB6_10 - add a0, a0, a1 - addi a3, sp, 32 - add a1, a1, a3 -.LBB6_9: - lb a3, 0(a1) - sb a3, 0(a0) - addi a2, a2, -1 - addi a0, a0, 1 - addi a1, a1, 1 - bnez a2, .LBB6_9 -.LBB6_10: - addi sp, sp, 64 - ret -.LBB6_11: - lh a1, 62(sp) - sh a1, 30(a0) - lh a1, 60(sp) - sh a1, 28(a0) - lh a1, 58(sp) - sh a1, 26(a0) - lh a1, 56(sp) - sh a1, 24(a0) - lh a1, 54(sp) - sh a1, 22(a0) - lh a1, 52(sp) - sh a1, 20(a0) - lh a1, 50(sp) - sh a1, 18(a0) - lh a1, 48(sp) - sh a1, 16(a0) - lh a1, 46(sp) - sh a1, 14(a0) - lh a1, 44(sp) - sh a1, 12(a0) - lh a1, 42(sp) - sh a1, 10(a0) - lh a1, 40(sp) - sh a1, 8(a0) - lh a1, 38(sp) - sh a1, 6(a0) - lh a1, 36(sp) - sh a1, 4(a0) - lh a1, 34(sp) - sh a1, 2(a0) - lh a1, 32(sp) - sh a1, 0(a0) - addi sp, sp, 64 - ret diff --git a/tests/asm-test/asm/riscv32imac-unknown-none-elf/atomic_memcpy_load_align16 b/tests/asm-test/asm/riscv32imac-unknown-none-elf/atomic_memcpy_load_align16 index 875034b..dc16215 100644 --- a/tests/asm-test/asm/riscv32imac-unknown-none-elf/atomic_memcpy_load_align16 +++ b/tests/asm-test/asm/riscv32imac-unknown-none-elf/atomic_memcpy_load_align16 @@ -1,20 +1,69 @@ asm_test::atomic_memcpy_load_align16::acquire: - lw a7, 24(a1) - lw t0, 20(a1) - lw a5, 16(a1) - lw a2, 12(a1) - lw a3, 8(a1) - lw a4, 4(a1) + addi a2, a1, 28 + addi a3, sp, 28 + #APP + lw a2, 0(a2) + sw a2, 0(a3) + #NO_APP + addi a2, a1, 24 + addi a3, sp, 24 + #APP + lw a2, 0(a2) + sw a2, 0(a3) + #NO_APP + addi a2, a1, 20 + addi a3, sp, 20 + #APP + lw a2, 0(a2) + sw a2, 0(a3) + #NO_APP + addi a2, a1, 16 + addi a3, sp, 16 + #APP + lw a2, 0(a2) + sw a2, 0(a3) + #NO_APP + addi a2, a1, 12 + addi a3, sp, 12 + #APP + lw a2, 0(a2) + sw a2, 0(a3) + #NO_APP + addi a2, a1, 8 + addi a3, sp, 8 + #APP + lw a2, 0(a2) + sw a2, 0(a3) + #NO_APP + addi a2, a1, 4 + addi a3, sp, 4 + #APP + lw a2, 0(a2) + sw a2, 0(a3) + #NO_APP + mv a2, sp + #APP lw a1, 0(a1) + sw a1, 0(a2) + #NO_APP + lw a1, 0(sp) + lw a2, 4(sp) + lw a3, 8(sp) + lw a4, 12(sp) sw a1, 0(a0) - sw a4, 4(a0) + sw a2, 4(a0) sw a3, 8(a0) - sw a2, 12(a0) - sw a5, 16(a0) - sw t0, 20(a0) - sw a7, 24(a0) - sw a6, 28(a0) + sw a4, 12(a0) + lw a1, 16(sp) + lw a2, 20(sp) + lw a3, 24(sp) + lw a4, 28(sp) + sw a1, 16(a0) + sw a2, 20(a0) + sw a3, 24(a0) + sw a4, 28(a0) fence r, rw + addi sp, sp, 32 ret asm_test::atomic_memcpy_load_align16::read_volatile_acquire_fence: lw a7, 24(a1) diff --git a/tests/asm-test/asm/riscv32imac-unknown-none-elf/atomic_memcpy_load_align2 b/tests/asm-test/asm/riscv32imac-unknown-none-elf/atomic_memcpy_load_align2 index b39db3b..41bdd8e 100644 --- a/tests/asm-test/asm/riscv32imac-unknown-none-elf/atomic_memcpy_load_align2 +++ b/tests/asm-test/asm/riscv32imac-unknown-none-elf/atomic_memcpy_load_align2 @@ -1,90 +1,100 @@ asm_test::atomic_memcpy_load_align2::acquire: sw ra, 44(sp) - addi a2, a1, 3 - andi a7, a2, -4 - sub t1, a7, a1 - li a3, 32 - bltu a3, t1, .LBB4_10 - beqz t1, .LBB4_5 - addi a6, a1, 32 - sub a4, a1, a7 - addi a5, sp, 8 - mv a3, a1 -.LBB4_3: - lb t0, 0(a3) - mv a2, a4 - sb t0, 0(a5) - addi a5, a5, 1 - addi a4, a4, 1 - addi a3, a3, 1 - bgeu a4, a2, .LBB4_3 - sub a3, a6, a7 - li a4, 4 - bltu a3, a4, .LBB4_7 -.LBB4_5: - addi a6, sp, 8 - li a7, 3 -.LBB4_6: - add a2, a1, t1 - lw a2, 0(a2) - add a4, a6, t1 - sb a2, 0(a4) - srli a5, a2, 24 - sb a5, 3(a4) - srli a5, a2, 16 - sb a5, 2(a4) - srli a2, a2, 8 - sb a2, 1(a4) - addi a3, a3, -4 - addi t1, t1, 4 - bltu a7, a3, .LBB4_6 -.LBB4_7: - beqz a3, .LBB4_11 + addi a2, a1, 30 + addi a3, sp, 38 + #APP + lh a2, 0(a2) + sh a2, 0(a3) + #NO_APP + addi a2, a1, 28 + addi a3, sp, 36 + #APP + lh a2, 0(a2) + sh a2, 0(a3) + #NO_APP + addi a2, a1, 26 + addi a3, sp, 34 + #APP + lh a2, 0(a2) + sh a2, 0(a3) + #NO_APP + addi a2, a1, 24 + addi a3, sp, 32 + #APP + lh a2, 0(a2) + sh a2, 0(a3) + #NO_APP + addi a2, a1, 22 + addi a3, sp, 30 + #APP + lh a2, 0(a2) + sh a2, 0(a3) + #NO_APP + addi a2, a1, 20 + addi a3, sp, 28 + #APP + lh a2, 0(a2) + sh a2, 0(a3) + #NO_APP + addi a2, a1, 18 + addi a3, sp, 26 + #APP + lh a2, 0(a2) + sh a2, 0(a3) + #NO_APP + addi a2, a1, 16 + addi a3, sp, 24 + #APP + lh a2, 0(a2) + sh a2, 0(a3) + #NO_APP + addi a2, a1, 14 + addi a3, sp, 22 + #APP + lh a2, 0(a2) + sh a2, 0(a3) + #NO_APP + addi a2, a1, 12 + addi a3, sp, 20 + #APP + lh a2, 0(a2) + sh a2, 0(a3) + #NO_APP + addi a2, a1, 10 + addi a3, sp, 18 + #APP + lh a2, 0(a2) + sh a2, 0(a3) + #NO_APP + addi a2, a1, 8 + addi a3, sp, 16 + #APP + lh a2, 0(a2) + sh a2, 0(a3) + #NO_APP + addi a2, a1, 6 + addi a3, sp, 14 + #APP + lh a2, 0(a2) + sh a2, 0(a3) + #NO_APP + addi a2, a1, 4 + addi a3, sp, 12 + #APP + lh a2, 0(a2) + sh a2, 0(a3) + #NO_APP + addi a2, a1, 2 + addi a3, sp, 10 + #APP + lh a2, 0(a2) + sh a2, 0(a3) + #NO_APP addi a2, sp, 8 - add a4, a2, t1 - add a1, a1, t1 -.LBB4_9: - lb a2, 0(a1) - sb a2, 0(a4) - addi a3, a3, -1 - addi a4, a4, 1 - addi a1, a1, 1 - bnez a3, .LBB4_9 - j .LBB4_11 -.LBB4_10: - lh a2, 30(a1) - sh a2, 38(sp) - lh a2, 28(a1) - sh a2, 36(sp) - lh a2, 26(a1) - sh a2, 34(sp) - lh a2, 24(a1) - sh a2, 32(sp) - lh a2, 22(a1) - sh a2, 30(sp) - lh a2, 20(a1) - sh a2, 28(sp) - lh a2, 18(a1) - sh a2, 26(sp) - lh a2, 16(a1) - sh a2, 24(sp) - lh a2, 14(a1) - sh a2, 22(sp) - lh a2, 12(a1) - sh a2, 20(sp) - lh a2, 10(a1) - sh a2, 18(sp) - lh a2, 8(a1) - sh a2, 16(sp) - lh a2, 6(a1) - sh a2, 14(sp) - lh a2, 4(a1) - sh a2, 12(sp) - lh a2, 2(a1) - sh a2, 10(sp) + #APP lh a1, 0(a1) - sh a1, 8(sp) -.LBB4_11: + sh a1, 0(a2) + #NO_APP addi a1, sp, 8 li a2, 32 call memcpy@plt @@ -92,87 +102,6 @@ asm_test::atomic_memcpy_load_align2::acquire: lw ra, 44(sp) addi sp, sp, 48 ret -.LBB6_3: - lb t0, 0(a4) - mv a3, a5 - sb t0, 0(a2) - addi a2, a2, 1 - addi a5, a5, 1 - addi a4, a4, 1 - bgeu a5, a3, .LBB6_3 - sub a2, a6, a7 - li a3, 4 - bltu a2, a3, .LBB6_7 -.LBB6_5: - addi a6, sp, 32 - li a7, 3 -.LBB6_6: - add a5, a6, a1 - lbu t0, 1(a5) - lbu t1, 0(a5) - lbu a3, 3(a5) - lbu a5, 2(a5) - slli a4, t0, 8 - or a4, a4, t1 - slli a3, a3, 8 - or a3, a3, a5 - slli a3, a3, 16 - or a3, a3, a4 - add a4, a0, a1 - sw a3, 0(a4) - addi a2, a2, -4 - addi a1, a1, 4 - bltu a7, a2, .LBB6_6 -.LBB6_7: - beqz a2, .LBB6_10 - add a0, a0, a1 - addi a3, sp, 32 - add a1, a1, a3 -.LBB6_9: - lb a3, 0(a1) - sb a3, 0(a0) - addi a2, a2, -1 - addi a0, a0, 1 - addi a1, a1, 1 - bnez a2, .LBB6_9 -.LBB6_10: - addi sp, sp, 64 - ret -.LBB6_11: - lh a1, 62(sp) - sh a1, 30(a0) - lh a1, 60(sp) - sh a1, 28(a0) - lh a1, 58(sp) - sh a1, 26(a0) - lh a1, 56(sp) - sh a1, 24(a0) - lh a1, 54(sp) - sh a1, 22(a0) - lh a1, 52(sp) - sh a1, 20(a0) - lh a1, 50(sp) - sh a1, 18(a0) - lh a1, 48(sp) - sh a1, 16(a0) - lh a1, 46(sp) - sh a1, 14(a0) - lh a1, 44(sp) - sh a1, 12(a0) - lh a1, 42(sp) - sh a1, 10(a0) - lh a1, 40(sp) - sh a1, 8(a0) - lh a1, 38(sp) - sh a1, 6(a0) - lh a1, 36(sp) - sh a1, 4(a0) - lh a1, 34(sp) - sh a1, 2(a0) - lh a1, 32(sp) - sh a1, 0(a0) - addi sp, sp, 64 - ret asm_test::atomic_memcpy_load_align2::read_volatile_acquire_fence: sw s0, 12(sp) sw s1, 8(sp) @@ -213,84 +142,3 @@ asm_test::atomic_memcpy_load_align2::read_volatile_acquire_fence: lw s1, 8(sp) addi sp, sp, 16 ret -.LBB6_3: - lb t0, 0(a4) - mv a3, a5 - sb t0, 0(a2) - addi a2, a2, 1 - addi a5, a5, 1 - addi a4, a4, 1 - bgeu a5, a3, .LBB6_3 - sub a2, a6, a7 - li a3, 4 - bltu a2, a3, .LBB6_7 -.LBB6_5: - addi a6, sp, 32 - li a7, 3 -.LBB6_6: - add a5, a6, a1 - lbu t0, 1(a5) - lbu t1, 0(a5) - lbu a3, 3(a5) - lbu a5, 2(a5) - slli a4, t0, 8 - or a4, a4, t1 - slli a3, a3, 8 - or a3, a3, a5 - slli a3, a3, 16 - or a3, a3, a4 - add a4, a0, a1 - sw a3, 0(a4) - addi a2, a2, -4 - addi a1, a1, 4 - bltu a7, a2, .LBB6_6 -.LBB6_7: - beqz a2, .LBB6_10 - add a0, a0, a1 - addi a3, sp, 32 - add a1, a1, a3 -.LBB6_9: - lb a3, 0(a1) - sb a3, 0(a0) - addi a2, a2, -1 - addi a0, a0, 1 - addi a1, a1, 1 - bnez a2, .LBB6_9 -.LBB6_10: - addi sp, sp, 64 - ret -.LBB6_11: - lh a1, 62(sp) - sh a1, 30(a0) - lh a1, 60(sp) - sh a1, 28(a0) - lh a1, 58(sp) - sh a1, 26(a0) - lh a1, 56(sp) - sh a1, 24(a0) - lh a1, 54(sp) - sh a1, 22(a0) - lh a1, 52(sp) - sh a1, 20(a0) - lh a1, 50(sp) - sh a1, 18(a0) - lh a1, 48(sp) - sh a1, 16(a0) - lh a1, 46(sp) - sh a1, 14(a0) - lh a1, 44(sp) - sh a1, 12(a0) - lh a1, 42(sp) - sh a1, 10(a0) - lh a1, 40(sp) - sh a1, 8(a0) - lh a1, 38(sp) - sh a1, 6(a0) - lh a1, 36(sp) - sh a1, 4(a0) - lh a1, 34(sp) - sh a1, 2(a0) - lh a1, 32(sp) - sh a1, 0(a0) - addi sp, sp, 64 - ret diff --git a/tests/asm-test/asm/riscv32imac-unknown-none-elf/atomic_memcpy_load_align4 b/tests/asm-test/asm/riscv32imac-unknown-none-elf/atomic_memcpy_load_align4 index 5fdb6b1..8ff9af2 100644 --- a/tests/asm-test/asm/riscv32imac-unknown-none-elf/atomic_memcpy_load_align4 +++ b/tests/asm-test/asm/riscv32imac-unknown-none-elf/atomic_memcpy_load_align4 @@ -1,20 +1,69 @@ asm_test::atomic_memcpy_load_align4::acquire: - lw a7, 24(a1) - lw t0, 20(a1) - lw a5, 16(a1) - lw a2, 12(a1) - lw a3, 8(a1) - lw a4, 4(a1) + addi a2, a1, 28 + addi a3, sp, 28 + #APP + lw a2, 0(a2) + sw a2, 0(a3) + #NO_APP + addi a2, a1, 24 + addi a3, sp, 24 + #APP + lw a2, 0(a2) + sw a2, 0(a3) + #NO_APP + addi a2, a1, 20 + addi a3, sp, 20 + #APP + lw a2, 0(a2) + sw a2, 0(a3) + #NO_APP + addi a2, a1, 16 + addi a3, sp, 16 + #APP + lw a2, 0(a2) + sw a2, 0(a3) + #NO_APP + addi a2, a1, 12 + addi a3, sp, 12 + #APP + lw a2, 0(a2) + sw a2, 0(a3) + #NO_APP + addi a2, a1, 8 + addi a3, sp, 8 + #APP + lw a2, 0(a2) + sw a2, 0(a3) + #NO_APP + addi a2, a1, 4 + addi a3, sp, 4 + #APP + lw a2, 0(a2) + sw a2, 0(a3) + #NO_APP + mv a2, sp + #APP lw a1, 0(a1) + sw a1, 0(a2) + #NO_APP + lw a1, 0(sp) + lw a2, 4(sp) + lw a3, 8(sp) + lw a4, 12(sp) sw a1, 0(a0) - sw a4, 4(a0) + sw a2, 4(a0) sw a3, 8(a0) - sw a2, 12(a0) - sw a5, 16(a0) - sw t0, 20(a0) - sw a7, 24(a0) - sw a6, 28(a0) + sw a4, 12(a0) + lw a1, 16(sp) + lw a2, 20(sp) + lw a3, 24(sp) + lw a4, 28(sp) + sw a1, 16(a0) + sw a2, 20(a0) + sw a3, 24(a0) + sw a4, 28(a0) fence r, rw + addi sp, sp, 32 ret asm_test::atomic_memcpy_load_align4::read_volatile_acquire_fence: lw a7, 24(a1) diff --git a/tests/asm-test/asm/riscv32imac-unknown-none-elf/atomic_memcpy_load_align8 b/tests/asm-test/asm/riscv32imac-unknown-none-elf/atomic_memcpy_load_align8 index f5525e0..a1c06af 100644 --- a/tests/asm-test/asm/riscv32imac-unknown-none-elf/atomic_memcpy_load_align8 +++ b/tests/asm-test/asm/riscv32imac-unknown-none-elf/atomic_memcpy_load_align8 @@ -1,20 +1,69 @@ asm_test::atomic_memcpy_load_align8::acquire: - lw a7, 24(a1) - lw t0, 20(a1) - lw a5, 16(a1) - lw a2, 12(a1) - lw a3, 8(a1) - lw a4, 4(a1) + addi a2, a1, 28 + addi a3, sp, 28 + #APP + lw a2, 0(a2) + sw a2, 0(a3) + #NO_APP + addi a2, a1, 24 + addi a3, sp, 24 + #APP + lw a2, 0(a2) + sw a2, 0(a3) + #NO_APP + addi a2, a1, 20 + addi a3, sp, 20 + #APP + lw a2, 0(a2) + sw a2, 0(a3) + #NO_APP + addi a2, a1, 16 + addi a3, sp, 16 + #APP + lw a2, 0(a2) + sw a2, 0(a3) + #NO_APP + addi a2, a1, 12 + addi a3, sp, 12 + #APP + lw a2, 0(a2) + sw a2, 0(a3) + #NO_APP + addi a2, a1, 8 + addi a3, sp, 8 + #APP + lw a2, 0(a2) + sw a2, 0(a3) + #NO_APP + addi a2, a1, 4 + addi a3, sp, 4 + #APP + lw a2, 0(a2) + sw a2, 0(a3) + #NO_APP + mv a2, sp + #APP lw a1, 0(a1) + sw a1, 0(a2) + #NO_APP + lw a1, 0(sp) + lw a2, 4(sp) + lw a3, 8(sp) + lw a4, 12(sp) sw a1, 0(a0) - sw a4, 4(a0) + sw a2, 4(a0) sw a3, 8(a0) - sw a2, 12(a0) - sw a5, 16(a0) - sw t0, 20(a0) - sw a7, 24(a0) - sw a6, 28(a0) + sw a4, 12(a0) + lw a1, 16(sp) + lw a2, 20(sp) + lw a3, 24(sp) + lw a4, 28(sp) + sw a1, 16(a0) + sw a2, 20(a0) + sw a3, 24(a0) + sw a4, 28(a0) fence r, rw + addi sp, sp, 32 ret asm_test::atomic_memcpy_load_align8::read_volatile_acquire_fence: lw a7, 24(a1) diff --git a/tests/asm-test/asm/riscv32imac-unknown-none-elf/atomic_memcpy_store_align1 b/tests/asm-test/asm/riscv32imac-unknown-none-elf/atomic_memcpy_store_align1 index cc06381..47c9a47 100644 --- a/tests/asm-test/asm/riscv32imac-unknown-none-elf/atomic_memcpy_store_align1 +++ b/tests/asm-test/asm/riscv32imac-unknown-none-elf/atomic_memcpy_store_align1 @@ -91,305 +91,210 @@ asm_test::atomic_memcpy_store_align1::release: lw a1, 28(sp) lw a2, 24(sp) lw a3, 20(sp) + lw a4, 16(sp) sw a1, 60(sp) sw a2, 56(sp) sw a3, 52(sp) - lw a1, 16(sp) - lw a2, 12(sp) - lw a3, 8(sp) - lw a4, 4(sp) - sw a1, 48(sp) - sw a2, 44(sp) - sw a3, 40(sp) - sw a4, 36(sp) - lw a3, 0(sp) + sw a4, 48(sp) + lw a1, 12(sp) + lw a2, 8(sp) + lw a3, 4(sp) + lw a4, 0(sp) + sw a1, 44(sp) + sw a2, 40(sp) + sw a3, 36(sp) + sw a4, 32(sp) + addi a6, sp, 36 + addi a7, sp, 40 + addi t0, sp, 44 + addi t1, sp, 48 + addi a5, sp, 52 + addi a1, sp, 56 + addi a2, sp, 60 + addi a3, a0, 31 + addi a4, sp, 63 + #APP + lb a4, 0(a4) + sb a4, 0(a3) + #NO_APP + addi a3, a0, 30 + addi a4, sp, 62 + #APP + lb a4, 0(a4) + sb a4, 0(a3) + #NO_APP + addi a3, a0, 29 + addi a4, sp, 61 + #APP + lb a4, 0(a4) + sb a4, 0(a3) + #NO_APP + addi a3, a0, 28 + #APP + lb a2, 0(a2) + sb a2, 0(a3) + #NO_APP + addi a2, a0, 27 + addi a3, sp, 59 + #APP + lb a3, 0(a3) + sb a3, 0(a2) + #NO_APP + addi a2, a0, 26 + addi a3, sp, 58 + #APP + lb a3, 0(a3) + sb a3, 0(a2) + #NO_APP + addi a2, a0, 25 + addi a3, sp, 57 + #APP + lb a3, 0(a3) + sb a3, 0(a2) + #NO_APP + addi a2, a0, 24 + #APP + lb a1, 0(a1) + sb a1, 0(a2) + #NO_APP + addi a1, a0, 23 + addi a2, sp, 55 + #APP + lb a2, 0(a2) + sb a2, 0(a1) + #NO_APP + addi a1, a0, 22 + addi a2, sp, 54 + #APP + lb a2, 0(a2) + sb a2, 0(a1) + #NO_APP + addi a1, a0, 21 + addi a2, sp, 53 + #APP + lb a2, 0(a2) + sb a2, 0(a1) + #NO_APP + addi a1, a0, 20 + #APP + lb a2, 0(a5) + sb a2, 0(a1) + #NO_APP + addi a1, a0, 19 + addi a2, sp, 51 + #APP + lb a2, 0(a2) + sb a2, 0(a1) + #NO_APP + addi a1, a0, 18 + addi a2, sp, 50 + #APP + lb a2, 0(a2) + sb a2, 0(a1) + #NO_APP + addi a1, a0, 17 + addi a2, sp, 49 + #APP + lb a2, 0(a2) + sb a2, 0(a1) + #NO_APP + addi a1, a0, 16 + #APP + lb a2, 0(t1) + sb a2, 0(a1) + #NO_APP + addi a1, a0, 15 + addi a2, sp, 47 + #APP + lb a2, 0(a2) + sb a2, 0(a1) + #NO_APP + addi a1, a0, 14 + addi a2, sp, 46 + #APP + lb a2, 0(a2) + sb a2, 0(a1) + #NO_APP + addi a1, a0, 13 + addi a2, sp, 45 + #APP + lb a2, 0(a2) + sb a2, 0(a1) + #NO_APP + addi a1, a0, 12 + #APP + lb a2, 0(t0) + sb a2, 0(a1) + #NO_APP + addi a1, a0, 11 + addi a2, sp, 43 + #APP + lb a2, 0(a2) + sb a2, 0(a1) + #NO_APP + addi a1, a0, 10 + addi a2, sp, 42 + #APP + lb a2, 0(a2) + sb a2, 0(a1) + #NO_APP + addi a1, a0, 9 + addi a2, sp, 41 + #APP + lb a2, 0(a2) + sb a2, 0(a1) + #NO_APP + addi a1, a0, 8 + #APP + lb a2, 0(a7) + sb a2, 0(a1) + #NO_APP + addi a1, a0, 7 + addi a2, sp, 39 + #APP + lb a2, 0(a2) + sb a2, 0(a1) + #NO_APP + addi a1, a0, 6 + addi a2, sp, 38 + #APP + lb a2, 0(a2) + sb a2, 0(a1) + #NO_APP + addi a1, a0, 5 + addi a2, sp, 37 + #APP + lb a2, 0(a2) + sb a2, 0(a1) + #NO_APP + addi a1, a0, 4 + #APP + lb a2, 0(a6) + sb a2, 0(a1) + #NO_APP addi a1, a0, 3 - andi a7, a1, -4 - sub a1, a7, a0 - li a4, 33 - sw a3, 32(sp) - bgeu a1, a4, .LBB2_5 - beqz a1, .LBB2_6 - addi a6, a0, 32 - sub a5, a0, a7 - addi a4, sp, 32 - mv a3, a0 -.LBB2_3: - lb t0, 0(a4) - mv a2, a5 - sb t0, 0(a3) - addi a3, a3, 1 - addi a5, a5, 1 - addi a4, a4, 1 - bgeu a5, a2, .LBB2_3 - sub a2, a6, a7 - li a3, 4 - bgeu a2, a3, .LBB2_7 - j .LBB2_9 -.LBB2_5: - lb a1, 32(sp) + addi a2, sp, 35 + #APP + lb a2, 0(a2) + sb a2, 0(a1) + #NO_APP + addi a1, a0, 2 + addi a2, sp, 34 + #APP + lb a2, 0(a2) + sb a2, 0(a1) + #NO_APP + addi a1, a0, 1 + addi a2, sp, 33 + #APP + lb a2, 0(a2) + sb a2, 0(a1) + #NO_APP + addi a1, sp, 32 + #APP + lb a1, 0(a1) sb a1, 0(a0) - lb a1, 33(sp) - sb a1, 1(a0) - lb a1, 34(sp) - sb a1, 2(a0) - lb a1, 35(sp) - sb a1, 3(a0) - lb a1, 36(sp) - sb a1, 4(a0) - lb a1, 37(sp) - sb a1, 5(a0) - lb a1, 38(sp) - sb a1, 6(a0) - lb a1, 39(sp) - sb a1, 7(a0) - lb a1, 40(sp) - sb a1, 8(a0) - lb a1, 41(sp) - sb a1, 9(a0) - lb a1, 42(sp) - sb a1, 10(a0) - lb a1, 43(sp) - sb a1, 11(a0) - lb a1, 44(sp) - sb a1, 12(a0) - lb a1, 45(sp) - sb a1, 13(a0) - lb a1, 46(sp) - sb a1, 14(a0) - lb a1, 47(sp) - sb a1, 15(a0) - lb a1, 48(sp) - sb a1, 16(a0) - lb a1, 49(sp) - sb a1, 17(a0) - lb a1, 50(sp) - sb a1, 18(a0) - lb a1, 51(sp) - sb a1, 19(a0) - lb a1, 52(sp) - sb a1, 20(a0) - lb a1, 53(sp) - sb a1, 21(a0) - lb a1, 54(sp) - sb a1, 22(a0) - lb a1, 55(sp) - sb a1, 23(a0) - lb a1, 56(sp) - sb a1, 24(a0) - lb a1, 57(sp) - sb a1, 25(a0) - lb a1, 58(sp) - sb a1, 26(a0) - lb a1, 59(sp) - sb a1, 27(a0) - lb a1, 60(sp) - sb a1, 28(a0) - lb a1, 61(sp) - sb a1, 29(a0) - lb a1, 62(sp) - sb a1, 30(a0) - lb a1, 63(sp) - sb a1, 31(a0) - j .LBB2_12 -.LBB2_6: - li a2, 32 -.LBB2_7: - addi a6, sp, 32 - li a7, 3 -.LBB2_8: - add a5, a6, a1 - lbu t0, 1(a5) - lbu t1, 0(a5) - lbu a3, 3(a5) - lbu a5, 2(a5) - slli a4, t0, 8 - or a4, a4, t1 - slli a3, a3, 8 - or a3, a3, a5 - slli a3, a3, 16 - or a3, a3, a4 - add a4, a0, a1 - sw a3, 0(a4) - addi a2, a2, -4 - addi a1, a1, 4 - bltu a7, a2, .LBB2_8 -.LBB2_9: - beqz a2, .LBB2_12 - add a0, a0, a1 - addi a3, sp, 32 - add a1, a1, a3 -.LBB2_11: - lb a3, 0(a1) - sb a3, 0(a0) - addi a2, a2, -1 - addi a0, a0, 1 - addi a1, a1, 1 - bnez a2, .LBB2_11 -.LBB2_12: - addi sp, sp, 64 - ret -.LBB4_3: - lb t0, 0(a3) - mv a2, a4 - sb t0, 0(a5) - addi a5, a5, 1 - addi a4, a4, 1 - addi a3, a3, 1 - bgeu a4, a2, .LBB4_3 - sub a3, a6, a7 - li a4, 4 - bltu a3, a4, .LBB4_7 -.LBB4_5: - addi a6, sp, 8 - li a7, 3 -.LBB4_6: - add a2, a1, t1 - lw a2, 0(a2) - add a4, a6, t1 - sb a2, 0(a4) - srli a5, a2, 24 - sb a5, 3(a4) - srli a5, a2, 16 - sb a5, 2(a4) - srli a2, a2, 8 - sb a2, 1(a4) - addi a3, a3, -4 - addi t1, t1, 4 - bltu a7, a3, .LBB4_6 -.LBB4_7: - beqz a3, .LBB4_11 - addi a2, sp, 8 - add a4, a2, t1 - add a1, a1, t1 -.LBB4_9: - lb a2, 0(a1) - sb a2, 0(a4) - addi a3, a3, -1 - addi a4, a4, 1 - addi a1, a1, 1 - bnez a3, .LBB4_9 - j .LBB4_11 -.LBB4_10: - lh a2, 30(a1) - sh a2, 38(sp) - lh a2, 28(a1) - sh a2, 36(sp) - lh a2, 26(a1) - sh a2, 34(sp) - lh a2, 24(a1) - sh a2, 32(sp) - lh a2, 22(a1) - sh a2, 30(sp) - lh a2, 20(a1) - sh a2, 28(sp) - lh a2, 18(a1) - sh a2, 26(sp) - lh a2, 16(a1) - sh a2, 24(sp) - lh a2, 14(a1) - sh a2, 22(sp) - lh a2, 12(a1) - sh a2, 20(sp) - lh a2, 10(a1) - sh a2, 18(sp) - lh a2, 8(a1) - sh a2, 16(sp) - lh a2, 6(a1) - sh a2, 14(sp) - lh a2, 4(a1) - sh a2, 12(sp) - lh a2, 2(a1) - sh a2, 10(sp) - lh a1, 0(a1) - sh a1, 8(sp) -.LBB4_11: - addi a1, sp, 8 - li a2, 32 - call memcpy@plt - fence r, rw - lw ra, 44(sp) - addi sp, sp, 48 - ret -.LBB6_3: - lb t0, 0(a4) - mv a3, a5 - sb t0, 0(a2) - addi a2, a2, 1 - addi a5, a5, 1 - addi a4, a4, 1 - bgeu a5, a3, .LBB6_3 - sub a2, a6, a7 - li a3, 4 - bltu a2, a3, .LBB6_7 -.LBB6_5: - addi a6, sp, 32 - li a7, 3 -.LBB6_6: - add a5, a6, a1 - lbu t0, 1(a5) - lbu t1, 0(a5) - lbu a3, 3(a5) - lbu a5, 2(a5) - slli a4, t0, 8 - or a4, a4, t1 - slli a3, a3, 8 - or a3, a3, a5 - slli a3, a3, 16 - or a3, a3, a4 - add a4, a0, a1 - sw a3, 0(a4) - addi a2, a2, -4 - addi a1, a1, 4 - bltu a7, a2, .LBB6_6 -.LBB6_7: - beqz a2, .LBB6_10 - add a0, a0, a1 - addi a3, sp, 32 - add a1, a1, a3 -.LBB6_9: - lb a3, 0(a1) - sb a3, 0(a0) - addi a2, a2, -1 - addi a0, a0, 1 - addi a1, a1, 1 - bnez a2, .LBB6_9 -.LBB6_10: - addi sp, sp, 64 - ret -.LBB6_11: - lh a1, 62(sp) - sh a1, 30(a0) - lh a1, 60(sp) - sh a1, 28(a0) - lh a1, 58(sp) - sh a1, 26(a0) - lh a1, 56(sp) - sh a1, 24(a0) - lh a1, 54(sp) - sh a1, 22(a0) - lh a1, 52(sp) - sh a1, 20(a0) - lh a1, 50(sp) - sh a1, 18(a0) - lh a1, 48(sp) - sh a1, 16(a0) - lh a1, 46(sp) - sh a1, 14(a0) - lh a1, 44(sp) - sh a1, 12(a0) - lh a1, 42(sp) - sh a1, 10(a0) - lh a1, 40(sp) - sh a1, 8(a0) - lh a1, 38(sp) - sh a1, 6(a0) - lh a1, 36(sp) - sh a1, 4(a0) - lh a1, 34(sp) - sh a1, 2(a0) - lh a1, 32(sp) - sh a1, 0(a0) + #NO_APP addi sp, sp, 64 ret asm_test::atomic_memcpy_store_align1::write_volatile_release_fence: @@ -489,166 +394,3 @@ asm_test::atomic_memcpy_store_align1::write_volatile_release_fence: lw ra, 44(sp) addi sp, sp, 48 ret -.LBB4_3: - lb t0, 0(a3) - mv a2, a4 - sb t0, 0(a5) - addi a5, a5, 1 - addi a4, a4, 1 - addi a3, a3, 1 - bgeu a4, a2, .LBB4_3 - sub a3, a6, a7 - li a4, 4 - bltu a3, a4, .LBB4_7 -.LBB4_5: - addi a6, sp, 8 - li a7, 3 -.LBB4_6: - add a2, a1, t1 - lw a2, 0(a2) - add a4, a6, t1 - sb a2, 0(a4) - srli a5, a2, 24 - sb a5, 3(a4) - srli a5, a2, 16 - sb a5, 2(a4) - srli a2, a2, 8 - sb a2, 1(a4) - addi a3, a3, -4 - addi t1, t1, 4 - bltu a7, a3, .LBB4_6 -.LBB4_7: - beqz a3, .LBB4_11 - addi a2, sp, 8 - add a4, a2, t1 - add a1, a1, t1 -.LBB4_9: - lb a2, 0(a1) - sb a2, 0(a4) - addi a3, a3, -1 - addi a4, a4, 1 - addi a1, a1, 1 - bnez a3, .LBB4_9 - j .LBB4_11 -.LBB4_10: - lh a2, 30(a1) - sh a2, 38(sp) - lh a2, 28(a1) - sh a2, 36(sp) - lh a2, 26(a1) - sh a2, 34(sp) - lh a2, 24(a1) - sh a2, 32(sp) - lh a2, 22(a1) - sh a2, 30(sp) - lh a2, 20(a1) - sh a2, 28(sp) - lh a2, 18(a1) - sh a2, 26(sp) - lh a2, 16(a1) - sh a2, 24(sp) - lh a2, 14(a1) - sh a2, 22(sp) - lh a2, 12(a1) - sh a2, 20(sp) - lh a2, 10(a1) - sh a2, 18(sp) - lh a2, 8(a1) - sh a2, 16(sp) - lh a2, 6(a1) - sh a2, 14(sp) - lh a2, 4(a1) - sh a2, 12(sp) - lh a2, 2(a1) - sh a2, 10(sp) - lh a1, 0(a1) - sh a1, 8(sp) -.LBB4_11: - addi a1, sp, 8 - li a2, 32 - call memcpy@plt - fence r, rw - lw ra, 44(sp) - addi sp, sp, 48 - ret -.LBB6_3: - lb t0, 0(a4) - mv a3, a5 - sb t0, 0(a2) - addi a2, a2, 1 - addi a5, a5, 1 - addi a4, a4, 1 - bgeu a5, a3, .LBB6_3 - sub a2, a6, a7 - li a3, 4 - bltu a2, a3, .LBB6_7 -.LBB6_5: - addi a6, sp, 32 - li a7, 3 -.LBB6_6: - add a5, a6, a1 - lbu t0, 1(a5) - lbu t1, 0(a5) - lbu a3, 3(a5) - lbu a5, 2(a5) - slli a4, t0, 8 - or a4, a4, t1 - slli a3, a3, 8 - or a3, a3, a5 - slli a3, a3, 16 - or a3, a3, a4 - add a4, a0, a1 - sw a3, 0(a4) - addi a2, a2, -4 - addi a1, a1, 4 - bltu a7, a2, .LBB6_6 -.LBB6_7: - beqz a2, .LBB6_10 - add a0, a0, a1 - addi a3, sp, 32 - add a1, a1, a3 -.LBB6_9: - lb a3, 0(a1) - sb a3, 0(a0) - addi a2, a2, -1 - addi a0, a0, 1 - addi a1, a1, 1 - bnez a2, .LBB6_9 -.LBB6_10: - addi sp, sp, 64 - ret -.LBB6_11: - lh a1, 62(sp) - sh a1, 30(a0) - lh a1, 60(sp) - sh a1, 28(a0) - lh a1, 58(sp) - sh a1, 26(a0) - lh a1, 56(sp) - sh a1, 24(a0) - lh a1, 54(sp) - sh a1, 22(a0) - lh a1, 52(sp) - sh a1, 20(a0) - lh a1, 50(sp) - sh a1, 18(a0) - lh a1, 48(sp) - sh a1, 16(a0) - lh a1, 46(sp) - sh a1, 14(a0) - lh a1, 44(sp) - sh a1, 12(a0) - lh a1, 42(sp) - sh a1, 10(a0) - lh a1, 40(sp) - sh a1, 8(a0) - lh a1, 38(sp) - sh a1, 6(a0) - lh a1, 36(sp) - sh a1, 4(a0) - lh a1, 34(sp) - sh a1, 2(a0) - lh a1, 32(sp) - sh a1, 0(a0) - addi sp, sp, 64 - ret diff --git a/tests/asm-test/asm/riscv32imac-unknown-none-elf/atomic_memcpy_store_align16 b/tests/asm-test/asm/riscv32imac-unknown-none-elf/atomic_memcpy_store_align16 index 1e96a6a..312a7c5 100644 --- a/tests/asm-test/asm/riscv32imac-unknown-none-elf/atomic_memcpy_store_align16 +++ b/tests/asm-test/asm/riscv32imac-unknown-none-elf/atomic_memcpy_store_align16 @@ -1,20 +1,85 @@ asm_test::atomic_memcpy_store_align16::release: - lw a7, 4(a1) - lw t0, 8(a1) - lw a5, 12(a1) - lw a2, 16(a1) - lw a3, 28(a1) - lw a4, 24(a1) - lw a1, 20(a1) + lw a2, 28(a1) + lw a3, 24(a1) + lw a4, 20(a1) + lw a5, 16(a1) + sw a2, 28(sp) + sw a3, 24(sp) + sw a4, 20(sp) + sw a5, 16(sp) + lw a2, 12(a1) + lw a3, 8(a1) + lw a4, 4(a1) + lw a1, 0(a1) + sw a2, 12(sp) + sw a3, 8(sp) + sw a4, 4(sp) + sw a1, 0(sp) fence rw, w - sw a3, 28(a0) - sw a4, 24(a0) - sw a1, 20(a0) - sw a2, 16(a0) - sw a5, 12(a0) - sw t0, 8(a0) - sw a7, 4(a0) - sw a6, 0(a0) + lw a1, 28(sp) + lw a2, 24(sp) + lw a3, 20(sp) + lw a4, 16(sp) + sw a1, 60(sp) + sw a2, 56(sp) + sw a3, 52(sp) + sw a4, 48(sp) + lw a1, 12(sp) + lw a2, 8(sp) + lw a3, 4(sp) + lw a4, 0(sp) + sw a1, 44(sp) + sw a2, 40(sp) + sw a3, 36(sp) + sw a4, 32(sp) + addi a6, sp, 36 + addi a7, sp, 40 + addi t0, sp, 44 + addi a4, sp, 48 + addi a5, sp, 52 + addi a1, sp, 56 + addi a2, sp, 60 + addi a3, a0, 28 + #APP + lw a2, 0(a2) + sw a2, 0(a3) + #NO_APP + addi a2, a0, 24 + #APP + lw a1, 0(a1) + sw a1, 0(a2) + #NO_APP + addi a1, a0, 20 + #APP + lw a2, 0(a5) + sw a2, 0(a1) + #NO_APP + addi a1, a0, 16 + #APP + lw a2, 0(a4) + sw a2, 0(a1) + #NO_APP + addi a1, a0, 12 + #APP + lw a2, 0(t0) + sw a2, 0(a1) + #NO_APP + addi a1, a0, 8 + #APP + lw a2, 0(a7) + sw a2, 0(a1) + #NO_APP + addi a1, a0, 4 + #APP + lw a2, 0(a6) + sw a2, 0(a1) + #NO_APP + addi a1, sp, 32 + #APP + lw a1, 0(a1) + sw a1, 0(a0) + #NO_APP + addi sp, sp, 64 ret asm_test::atomic_memcpy_store_align16::write_volatile_release_fence: fence rw, w diff --git a/tests/asm-test/asm/riscv32imac-unknown-none-elf/atomic_memcpy_store_align2 b/tests/asm-test/asm/riscv32imac-unknown-none-elf/atomic_memcpy_store_align2 index 4481af9..de653a7 100644 --- a/tests/asm-test/asm/riscv32imac-unknown-none-elf/atomic_memcpy_store_align2 +++ b/tests/asm-test/asm/riscv32imac-unknown-none-elf/atomic_memcpy_store_align2 @@ -43,108 +43,114 @@ asm_test::atomic_memcpy_store_align2::release: lw a1, 28(sp) lw a2, 24(sp) lw a3, 20(sp) + lw a4, 16(sp) sw a1, 60(sp) sw a2, 56(sp) sw a3, 52(sp) - lw a1, 16(sp) - lw a2, 12(sp) - lw a3, 8(sp) - lw a4, 4(sp) - sw a1, 48(sp) - sw a2, 44(sp) - sw a3, 40(sp) - sw a4, 36(sp) + sw a4, 48(sp) + lw a1, 12(sp) + lw a2, 8(sp) + lw a3, 4(sp) lw a4, 0(sp) - addi a1, a0, 3 - andi a7, a1, -4 - sub a1, a7, a0 - li a2, 32 + sw a1, 44(sp) + sw a2, 40(sp) + sw a3, 36(sp) sw a4, 32(sp) - bltu a2, a1, .LBB6_11 - beqz a1, .LBB6_5 - addi a6, a0, 32 - sub a5, a0, a7 - addi a4, sp, 32 - mv a2, a0 -.LBB6_3: - lb t0, 0(a4) - mv a3, a5 - sb t0, 0(a2) - addi a2, a2, 1 - addi a5, a5, 1 - addi a4, a4, 1 - bgeu a5, a3, .LBB6_3 - sub a2, a6, a7 - li a3, 4 - bltu a2, a3, .LBB6_7 -.LBB6_5: - addi a6, sp, 32 - li a7, 3 -.LBB6_6: - add a5, a6, a1 - lbu t0, 1(a5) - lbu t1, 0(a5) - lbu a3, 3(a5) - lbu a5, 2(a5) - slli a4, t0, 8 - or a4, a4, t1 - slli a3, a3, 8 - or a3, a3, a5 - slli a3, a3, 16 - or a3, a3, a4 - add a4, a0, a1 - sw a3, 0(a4) - addi a2, a2, -4 - addi a1, a1, 4 - bltu a7, a2, .LBB6_6 -.LBB6_7: - beqz a2, .LBB6_10 - add a0, a0, a1 - addi a3, sp, 32 - add a1, a1, a3 -.LBB6_9: - lb a3, 0(a1) - sb a3, 0(a0) - addi a2, a2, -1 - addi a0, a0, 1 - addi a1, a1, 1 - bnez a2, .LBB6_9 -.LBB6_10: - addi sp, sp, 64 - ret -.LBB6_11: - lh a1, 62(sp) - sh a1, 30(a0) - lh a1, 60(sp) - sh a1, 28(a0) - lh a1, 58(sp) - sh a1, 26(a0) - lh a1, 56(sp) - sh a1, 24(a0) - lh a1, 54(sp) - sh a1, 22(a0) - lh a1, 52(sp) - sh a1, 20(a0) - lh a1, 50(sp) - sh a1, 18(a0) - lh a1, 48(sp) - sh a1, 16(a0) - lh a1, 46(sp) - sh a1, 14(a0) - lh a1, 44(sp) - sh a1, 12(a0) - lh a1, 42(sp) - sh a1, 10(a0) - lh a1, 40(sp) - sh a1, 8(a0) - lh a1, 38(sp) - sh a1, 6(a0) - lh a1, 36(sp) - sh a1, 4(a0) - lh a1, 34(sp) - sh a1, 2(a0) - lh a1, 32(sp) + addi a6, sp, 36 + addi a7, sp, 40 + addi t0, sp, 44 + addi t1, sp, 48 + addi a5, sp, 52 + addi a1, sp, 56 + addi a2, sp, 60 + addi a3, a0, 30 + addi a4, sp, 62 + #APP + lh a4, 0(a4) + sh a4, 0(a3) + #NO_APP + addi a3, a0, 28 + #APP + lh a2, 0(a2) + sh a2, 0(a3) + #NO_APP + addi a2, a0, 26 + addi a3, sp, 58 + #APP + lh a3, 0(a3) + sh a3, 0(a2) + #NO_APP + addi a2, a0, 24 + #APP + lh a1, 0(a1) + sh a1, 0(a2) + #NO_APP + addi a1, a0, 22 + addi a2, sp, 54 + #APP + lh a2, 0(a2) + sh a2, 0(a1) + #NO_APP + addi a1, a0, 20 + #APP + lh a2, 0(a5) + sh a2, 0(a1) + #NO_APP + addi a1, a0, 18 + addi a2, sp, 50 + #APP + lh a2, 0(a2) + sh a2, 0(a1) + #NO_APP + addi a1, a0, 16 + #APP + lh a2, 0(t1) + sh a2, 0(a1) + #NO_APP + addi a1, a0, 14 + addi a2, sp, 46 + #APP + lh a2, 0(a2) + sh a2, 0(a1) + #NO_APP + addi a1, a0, 12 + #APP + lh a2, 0(t0) + sh a2, 0(a1) + #NO_APP + addi a1, a0, 10 + addi a2, sp, 42 + #APP + lh a2, 0(a2) + sh a2, 0(a1) + #NO_APP + addi a1, a0, 8 + #APP + lh a2, 0(a7) + sh a2, 0(a1) + #NO_APP + addi a1, a0, 6 + addi a2, sp, 38 + #APP + lh a2, 0(a2) + sh a2, 0(a1) + #NO_APP + addi a1, a0, 4 + #APP + lh a2, 0(a6) + sh a2, 0(a1) + #NO_APP + addi a1, a0, 2 + addi a2, sp, 34 + #APP + lh a2, 0(a2) + sh a2, 0(a1) + #NO_APP + addi a1, sp, 32 + #APP + lh a1, 0(a1) sh a1, 0(a0) + #NO_APP addi sp, sp, 64 ret asm_test::atomic_memcpy_store_align2::write_volatile_release_fence: diff --git a/tests/asm-test/asm/riscv32imac-unknown-none-elf/atomic_memcpy_store_align4 b/tests/asm-test/asm/riscv32imac-unknown-none-elf/atomic_memcpy_store_align4 index df42957..c61939a 100644 --- a/tests/asm-test/asm/riscv32imac-unknown-none-elf/atomic_memcpy_store_align4 +++ b/tests/asm-test/asm/riscv32imac-unknown-none-elf/atomic_memcpy_store_align4 @@ -1,20 +1,85 @@ asm_test::atomic_memcpy_store_align4::release: - lw a7, 4(a1) - lw t0, 8(a1) - lw a5, 12(a1) - lw a2, 16(a1) - lw a3, 28(a1) - lw a4, 24(a1) - lw a1, 20(a1) + lw a2, 28(a1) + lw a3, 24(a1) + lw a4, 20(a1) + lw a5, 16(a1) + sw a2, 28(sp) + sw a3, 24(sp) + sw a4, 20(sp) + sw a5, 16(sp) + lw a2, 12(a1) + lw a3, 8(a1) + lw a4, 4(a1) + lw a1, 0(a1) + sw a2, 12(sp) + sw a3, 8(sp) + sw a4, 4(sp) + sw a1, 0(sp) fence rw, w - sw a3, 28(a0) - sw a4, 24(a0) - sw a1, 20(a0) - sw a2, 16(a0) - sw a5, 12(a0) - sw t0, 8(a0) - sw a7, 4(a0) - sw a6, 0(a0) + lw a1, 28(sp) + lw a2, 24(sp) + lw a3, 20(sp) + lw a4, 16(sp) + sw a1, 60(sp) + sw a2, 56(sp) + sw a3, 52(sp) + sw a4, 48(sp) + lw a1, 12(sp) + lw a2, 8(sp) + lw a3, 4(sp) + lw a4, 0(sp) + sw a1, 44(sp) + sw a2, 40(sp) + sw a3, 36(sp) + sw a4, 32(sp) + addi a6, sp, 36 + addi a7, sp, 40 + addi t0, sp, 44 + addi a4, sp, 48 + addi a5, sp, 52 + addi a1, sp, 56 + addi a2, sp, 60 + addi a3, a0, 28 + #APP + lw a2, 0(a2) + sw a2, 0(a3) + #NO_APP + addi a2, a0, 24 + #APP + lw a1, 0(a1) + sw a1, 0(a2) + #NO_APP + addi a1, a0, 20 + #APP + lw a2, 0(a5) + sw a2, 0(a1) + #NO_APP + addi a1, a0, 16 + #APP + lw a2, 0(a4) + sw a2, 0(a1) + #NO_APP + addi a1, a0, 12 + #APP + lw a2, 0(t0) + sw a2, 0(a1) + #NO_APP + addi a1, a0, 8 + #APP + lw a2, 0(a7) + sw a2, 0(a1) + #NO_APP + addi a1, a0, 4 + #APP + lw a2, 0(a6) + sw a2, 0(a1) + #NO_APP + addi a1, sp, 32 + #APP + lw a1, 0(a1) + sw a1, 0(a0) + #NO_APP + addi sp, sp, 64 ret asm_test::atomic_memcpy_store_align4::write_volatile_release_fence: fence rw, w diff --git a/tests/asm-test/asm/riscv32imac-unknown-none-elf/atomic_memcpy_store_align8 b/tests/asm-test/asm/riscv32imac-unknown-none-elf/atomic_memcpy_store_align8 index c71fa1e..79b6bca 100644 --- a/tests/asm-test/asm/riscv32imac-unknown-none-elf/atomic_memcpy_store_align8 +++ b/tests/asm-test/asm/riscv32imac-unknown-none-elf/atomic_memcpy_store_align8 @@ -1,20 +1,85 @@ asm_test::atomic_memcpy_store_align8::release: - lw a7, 4(a1) - lw t0, 8(a1) - lw a5, 12(a1) - lw a2, 16(a1) - lw a3, 28(a1) - lw a4, 24(a1) - lw a1, 20(a1) + lw a2, 28(a1) + lw a3, 24(a1) + lw a4, 20(a1) + lw a5, 16(a1) + sw a2, 28(sp) + sw a3, 24(sp) + sw a4, 20(sp) + sw a5, 16(sp) + lw a2, 12(a1) + lw a3, 8(a1) + lw a4, 4(a1) + lw a1, 0(a1) + sw a2, 12(sp) + sw a3, 8(sp) + sw a4, 4(sp) + sw a1, 0(sp) fence rw, w - sw a3, 28(a0) - sw a4, 24(a0) - sw a1, 20(a0) - sw a2, 16(a0) - sw a5, 12(a0) - sw t0, 8(a0) - sw a7, 4(a0) - sw a6, 0(a0) + lw a1, 28(sp) + lw a2, 24(sp) + lw a3, 20(sp) + lw a4, 16(sp) + sw a1, 60(sp) + sw a2, 56(sp) + sw a3, 52(sp) + sw a4, 48(sp) + lw a1, 12(sp) + lw a2, 8(sp) + lw a3, 4(sp) + lw a4, 0(sp) + sw a1, 44(sp) + sw a2, 40(sp) + sw a3, 36(sp) + sw a4, 32(sp) + addi a6, sp, 36 + addi a7, sp, 40 + addi t0, sp, 44 + addi a4, sp, 48 + addi a5, sp, 52 + addi a1, sp, 56 + addi a2, sp, 60 + addi a3, a0, 28 + #APP + lw a2, 0(a2) + sw a2, 0(a3) + #NO_APP + addi a2, a0, 24 + #APP + lw a1, 0(a1) + sw a1, 0(a2) + #NO_APP + addi a1, a0, 20 + #APP + lw a2, 0(a5) + sw a2, 0(a1) + #NO_APP + addi a1, a0, 16 + #APP + lw a2, 0(a4) + sw a2, 0(a1) + #NO_APP + addi a1, a0, 12 + #APP + lw a2, 0(t0) + sw a2, 0(a1) + #NO_APP + addi a1, a0, 8 + #APP + lw a2, 0(a7) + sw a2, 0(a1) + #NO_APP + addi a1, a0, 4 + #APP + lw a2, 0(a6) + sw a2, 0(a1) + #NO_APP + addi a1, sp, 32 + #APP + lw a1, 0(a1) + sw a1, 0(a0) + #NO_APP + addi sp, sp, 64 ret asm_test::atomic_memcpy_store_align8::write_volatile_release_fence: fence rw, w diff --git a/tests/asm-test/asm/riscv32imc-unknown-none-elf/atomic_memcpy_load_align1 b/tests/asm-test/asm/riscv32imc-unknown-none-elf/atomic_memcpy_load_align1 index f2ac3f8..5872a70 100644 --- a/tests/asm-test/asm/riscv32imc-unknown-none-elf/atomic_memcpy_load_align1 +++ b/tests/asm-test/asm/riscv32imc-unknown-none-elf/atomic_memcpy_load_align1 @@ -1,575 +1,196 @@ asm_test::atomic_memcpy_load_align1::acquire: sw ra, 44(sp) - addi a3, a1, 3 - andi a7, a3, -4 - sub t1, a7, a1 - li a4, 33 - bgeu t1, a4, .LBB0_5 - beqz t1, .LBB0_6 - addi a6, a1, 32 - sub a3, a1, a7 - addi a5, sp, 8 - mv a4, a1 -.LBB0_3: - #APP - lb t0, 0(a4) - #NO_APP - mv a2, a3 - sb t0, 0(a5) - addi a5, a5, 1 - addi a3, a3, 1 - addi a4, a4, 1 - bgeu a3, a2, .LBB0_3 - sub a3, a6, a7 - li a4, 4 - bgeu a3, a4, .LBB0_7 - j .LBB0_9 -.LBB0_5: - #APP - lb a2, 0(a1) - #NO_APP - sb a2, 8(sp) - addi a2, a1, 1 + addi a2, a1, 31 + addi a3, sp, 39 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 9(sp) - addi a2, a1, 2 + addi a2, a1, 30 + addi a3, sp, 38 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 10(sp) + addi a2, a1, 29 + addi a3, sp, 37 #APP - lb a2, 0(a3) + lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 11(sp) - addi a2, a1, 4 + addi a2, a1, 28 + addi a3, sp, 36 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 12(sp) - addi a2, a1, 5 + addi a2, a1, 27 + addi a3, sp, 35 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 13(sp) - addi a2, a1, 6 + addi a2, a1, 26 + addi a3, sp, 34 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 14(sp) - addi a2, a1, 7 + addi a2, a1, 25 + addi a3, sp, 33 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 15(sp) - addi a2, a1, 8 + addi a2, a1, 24 + addi a3, sp, 32 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 16(sp) - addi a2, a1, 9 + addi a2, a1, 23 + addi a3, sp, 31 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 17(sp) - addi a2, a1, 10 + addi a2, a1, 22 + addi a3, sp, 30 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 18(sp) - addi a2, a1, 11 + addi a2, a1, 21 + addi a3, sp, 29 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 19(sp) - addi a2, a1, 12 + addi a2, a1, 20 + addi a3, sp, 28 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 20(sp) - addi a2, a1, 13 + addi a2, a1, 19 + addi a3, sp, 27 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 21(sp) - addi a2, a1, 14 + addi a2, a1, 18 + addi a3, sp, 26 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 22(sp) - addi a2, a1, 15 + addi a2, a1, 17 + addi a3, sp, 25 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 23(sp) addi a2, a1, 16 + addi a3, sp, 24 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 24(sp) - addi a2, a1, 17 + addi a2, a1, 15 + addi a3, sp, 23 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 25(sp) - addi a2, a1, 18 + addi a2, a1, 14 + addi a3, sp, 22 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 26(sp) - addi a2, a1, 19 + addi a2, a1, 13 + addi a3, sp, 21 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 27(sp) - addi a2, a1, 20 + addi a2, a1, 12 + addi a3, sp, 20 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 28(sp) - addi a2, a1, 21 + addi a2, a1, 11 + addi a3, sp, 19 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 29(sp) - addi a2, a1, 22 + addi a2, a1, 10 + addi a3, sp, 18 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 30(sp) - addi a2, a1, 23 + addi a2, a1, 9 + addi a3, sp, 17 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 31(sp) - addi a2, a1, 24 + addi a2, a1, 8 + addi a3, sp, 16 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 32(sp) - addi a2, a1, 25 + addi a2, a1, 7 + addi a3, sp, 15 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 33(sp) - addi a2, a1, 26 + addi a2, a1, 6 + addi a3, sp, 14 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 34(sp) - addi a2, a1, 27 + addi a2, a1, 5 + addi a3, sp, 13 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 35(sp) - addi a2, a1, 28 + addi a2, a1, 4 + addi a3, sp, 12 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 36(sp) - addi a2, a1, 29 + addi a2, a1, 3 + addi a3, sp, 11 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 37(sp) - addi a2, a1, 30 + addi a2, a1, 2 + addi a3, sp, 10 #APP lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - sb a2, 38(sp) - addi a1, a1, 31 - #APP - lb a1, 0(a1) - #NO_APP - sb a1, 39(sp) - j .LBB0_12 -.LBB0_6: - li a3, 32 -.LBB0_7: - addi a6, sp, 8 - li a7, 3 -.LBB0_8: - add a2, a1, t1 + addi a2, a1, 1 + addi a3, sp, 9 #APP - lw a2, 0(a2) + lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - add a4, a6, t1 - sb a2, 0(a4) - srli a5, a2, 24 - sb a5, 3(a4) - srli a5, a2, 16 - sb a5, 2(a4) - srli a2, a2, 8 - sb a2, 1(a4) - addi a3, a3, -4 - addi t1, t1, 4 - bltu a7, a3, .LBB0_8 -.LBB0_9: - beqz a3, .LBB0_12 addi a2, sp, 8 - add a4, a2, t1 - add a1, a1, t1 -.LBB0_11: - #APP - lb a2, 0(a1) - #NO_APP - sb a2, 0(a4) - addi a3, a3, -1 - addi a4, a4, 1 - addi a1, a1, 1 - bnez a3, .LBB0_11 -.LBB0_12: - addi a1, sp, 8 - li a2, 32 - call memcpy@plt - fence r, rw - lw ra, 44(sp) - addi sp, sp, 48 - ret -.LBB2_3: - lbu t0, 0(a4) - mv a2, a3 - #APP - sb t0, 0(a5) - #NO_APP - addi a5, a5, 1 - addi a3, a3, 1 - addi a4, a4, 1 - bgeu a3, a2, .LBB2_3 - sub a2, a6, a7 - li a3, 4 - bgeu a2, a3, .LBB2_7 - j .LBB2_9 -.LBB2_5: - lbu a1, 32(sp) - #APP - sb a1, 0(a0) - #NO_APP - lbu a1, 33(sp) - addi a2, a0, 1 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 34(sp) - addi a2, a0, 2 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 35(sp) - #APP - sb a1, 0(a3) - #NO_APP - lbu a1, 36(sp) - addi a2, a0, 4 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 37(sp) - addi a2, a0, 5 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 38(sp) - addi a2, a0, 6 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 39(sp) - addi a2, a0, 7 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 40(sp) - addi a2, a0, 8 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 41(sp) - addi a2, a0, 9 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 42(sp) - addi a2, a0, 10 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 43(sp) - addi a2, a0, 11 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 44(sp) - addi a2, a0, 12 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 45(sp) - addi a2, a0, 13 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 46(sp) - addi a2, a0, 14 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 47(sp) - addi a2, a0, 15 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 48(sp) - addi a2, a0, 16 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 49(sp) - addi a2, a0, 17 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 50(sp) - addi a2, a0, 18 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 51(sp) - addi a2, a0, 19 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 52(sp) - addi a2, a0, 20 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 53(sp) - addi a2, a0, 21 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 54(sp) - addi a2, a0, 22 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 55(sp) - addi a2, a0, 23 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 56(sp) - addi a2, a0, 24 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 57(sp) - addi a2, a0, 25 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 58(sp) - addi a2, a0, 26 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 59(sp) - addi a2, a0, 27 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 60(sp) - addi a2, a0, 28 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 61(sp) - addi a2, a0, 29 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 62(sp) - addi a2, a0, 30 #APP + lb a1, 0(a1) sb a1, 0(a2) #NO_APP - lbu a1, 63(sp) - addi a0, a0, 31 - #APP - sb a1, 0(a0) - #NO_APP - j .LBB2_12 -.LBB2_6: - li a2, 32 -.LBB2_7: - addi a6, sp, 32 - li a7, 3 -.LBB2_8: - add a5, a6, a1 - lbu t0, 1(a5) - lbu t1, 0(a5) - lbu a3, 3(a5) - lbu a5, 2(a5) - slli a4, t0, 8 - or a4, a4, t1 - slli a3, a3, 8 - or a3, a3, a5 - slli a3, a3, 16 - or a3, a3, a4 - add a4, a0, a1 - #APP - sw a3, 0(a4) - #NO_APP - addi a2, a2, -4 - addi a1, a1, 4 - bltu a7, a2, .LBB2_8 -.LBB2_9: - beqz a2, .LBB2_12 - add a0, a0, a1 - addi a3, sp, 32 - add a1, a1, a3 -.LBB2_11: - lbu a3, 0(a1) - #APP - sb a3, 0(a0) - #NO_APP - addi a2, a2, -1 - addi a0, a0, 1 - addi a1, a1, 1 - bnez a2, .LBB2_11 -.LBB2_12: - addi sp, sp, 64 - ret -.LBB4_3: - #APP - lb t0, 0(a3) - #NO_APP - mv a2, a4 - sb t0, 0(a5) - addi a5, a5, 1 - addi a4, a4, 1 - addi a3, a3, 1 - bgeu a4, a2, .LBB4_3 - sub a3, a6, a7 - li a4, 4 - bltu a3, a4, .LBB4_7 -.LBB4_5: - addi a6, sp, 8 - li a7, 3 -.LBB4_6: - add a2, a1, t1 - #APP - lw a2, 0(a2) - #NO_APP - add a4, a6, t1 - sb a2, 0(a4) - srli a5, a2, 24 - sb a5, 3(a4) - srli a5, a2, 16 - sb a5, 2(a4) - srli a2, a2, 8 - sb a2, 1(a4) - addi a3, a3, -4 - addi t1, t1, 4 - bltu a7, a3, .LBB4_6 -.LBB4_7: - beqz a3, .LBB4_11 - addi a2, sp, 8 - add a4, a2, t1 - add a1, a1, t1 -.LBB4_9: - #APP - lb a2, 0(a1) - #NO_APP - sb a2, 0(a4) - addi a3, a3, -1 - addi a4, a4, 1 - addi a1, a1, 1 - bnez a3, .LBB4_9 - j .LBB4_11 -.LBB4_10: - addi a2, a1, 30 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 38(sp) - addi a2, a1, 28 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 36(sp) - addi a2, a1, 26 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 34(sp) - addi a2, a1, 24 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 32(sp) - addi a2, a1, 22 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 30(sp) - addi a2, a1, 20 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 28(sp) - addi a2, a1, 18 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 26(sp) - addi a2, a1, 16 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 24(sp) - addi a2, a1, 14 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 22(sp) - addi a2, a1, 12 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 20(sp) - addi a2, a1, 10 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 18(sp) - addi a2, a1, 8 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 16(sp) - addi a2, a1, 6 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 14(sp) - addi a2, a1, 4 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 12(sp) - addi a2, a1, 2 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 10(sp) - #APP - lh a1, 0(a1) - #NO_APP - sh a1, 8(sp) -.LBB4_11: addi a1, sp, 8 li a2, 32 call memcpy@plt @@ -577,140 +198,6 @@ asm_test::atomic_memcpy_load_align1::acquire: lw ra, 44(sp) addi sp, sp, 48 ret -.LBB6_3: - lbu t0, 0(a4) - mv a3, a2 - #APP - sb t0, 0(a5) - #NO_APP - addi a5, a5, 1 - addi a2, a2, 1 - addi a4, a4, 1 - bgeu a2, a3, .LBB6_3 - sub a2, a6, a7 - li a3, 4 - bltu a2, a3, .LBB6_7 -.LBB6_5: - addi a6, sp, 32 - li a7, 3 -.LBB6_6: - add a5, a6, a1 - lbu t0, 1(a5) - lbu t1, 0(a5) - lbu a3, 3(a5) - lbu a5, 2(a5) - slli a4, t0, 8 - or a4, a4, t1 - slli a3, a3, 8 - or a3, a3, a5 - slli a3, a3, 16 - or a3, a3, a4 - add a4, a0, a1 - #APP - sw a3, 0(a4) - #NO_APP - addi a2, a2, -4 - addi a1, a1, 4 - bltu a7, a2, .LBB6_6 -.LBB6_7: - beqz a2, .LBB6_10 - add a0, a0, a1 - addi a3, sp, 32 - add a1, a1, a3 -.LBB6_9: - lbu a3, 0(a1) - #APP - sb a3, 0(a0) - #NO_APP - addi a2, a2, -1 - addi a0, a0, 1 - addi a1, a1, 1 - bnez a2, .LBB6_9 -.LBB6_10: - addi sp, sp, 64 - ret -.LBB6_11: - lhu a1, 62(sp) - addi a2, a0, 30 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 60(sp) - addi a2, a0, 28 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 58(sp) - addi a2, a0, 26 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 56(sp) - addi a2, a0, 24 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 54(sp) - addi a2, a0, 22 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 52(sp) - addi a2, a0, 20 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 50(sp) - addi a2, a0, 18 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 48(sp) - addi a2, a0, 16 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 46(sp) - addi a2, a0, 14 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 44(sp) - addi a2, a0, 12 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 42(sp) - addi a2, a0, 10 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 40(sp) - addi a2, a0, 8 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 38(sp) - addi a2, a0, 6 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 36(sp) - addi a2, a0, 4 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 34(sp) - addi a2, a0, 2 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 32(sp) - #APP - sh a1, 0(a0) - #NO_APP - addi sp, sp, 64 - ret asm_test::atomic_memcpy_load_align1::read_volatile_acquire_fence: sw ra, 76(sp) sw s0, 72(sp) @@ -815,487 +302,3 @@ asm_test::atomic_memcpy_load_align1::read_volatile_acquire_fence: lw s11, 28(sp) addi sp, sp, 80 ret -.LBB2_3: - lbu t0, 0(a4) - mv a2, a3 - #APP - sb t0, 0(a5) - #NO_APP - addi a5, a5, 1 - addi a3, a3, 1 - addi a4, a4, 1 - bgeu a3, a2, .LBB2_3 - sub a2, a6, a7 - li a3, 4 - bgeu a2, a3, .LBB2_7 - j .LBB2_9 -.LBB2_5: - lbu a1, 32(sp) - #APP - sb a1, 0(a0) - #NO_APP - lbu a1, 33(sp) - addi a2, a0, 1 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 34(sp) - addi a2, a0, 2 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 35(sp) - #APP - sb a1, 0(a3) - #NO_APP - lbu a1, 36(sp) - addi a2, a0, 4 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 37(sp) - addi a2, a0, 5 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 38(sp) - addi a2, a0, 6 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 39(sp) - addi a2, a0, 7 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 40(sp) - addi a2, a0, 8 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 41(sp) - addi a2, a0, 9 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 42(sp) - addi a2, a0, 10 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 43(sp) - addi a2, a0, 11 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 44(sp) - addi a2, a0, 12 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 45(sp) - addi a2, a0, 13 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 46(sp) - addi a2, a0, 14 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 47(sp) - addi a2, a0, 15 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 48(sp) - addi a2, a0, 16 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 49(sp) - addi a2, a0, 17 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 50(sp) - addi a2, a0, 18 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 51(sp) - addi a2, a0, 19 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 52(sp) - addi a2, a0, 20 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 53(sp) - addi a2, a0, 21 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 54(sp) - addi a2, a0, 22 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 55(sp) - addi a2, a0, 23 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 56(sp) - addi a2, a0, 24 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 57(sp) - addi a2, a0, 25 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 58(sp) - addi a2, a0, 26 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 59(sp) - addi a2, a0, 27 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 60(sp) - addi a2, a0, 28 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 61(sp) - addi a2, a0, 29 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 62(sp) - addi a2, a0, 30 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 63(sp) - addi a0, a0, 31 - #APP - sb a1, 0(a0) - #NO_APP - j .LBB2_12 -.LBB2_6: - li a2, 32 -.LBB2_7: - addi a6, sp, 32 - li a7, 3 -.LBB2_8: - add a5, a6, a1 - lbu t0, 1(a5) - lbu t1, 0(a5) - lbu a3, 3(a5) - lbu a5, 2(a5) - slli a4, t0, 8 - or a4, a4, t1 - slli a3, a3, 8 - or a3, a3, a5 - slli a3, a3, 16 - or a3, a3, a4 - add a4, a0, a1 - #APP - sw a3, 0(a4) - #NO_APP - addi a2, a2, -4 - addi a1, a1, 4 - bltu a7, a2, .LBB2_8 -.LBB2_9: - beqz a2, .LBB2_12 - add a0, a0, a1 - addi a3, sp, 32 - add a1, a1, a3 -.LBB2_11: - lbu a3, 0(a1) - #APP - sb a3, 0(a0) - #NO_APP - addi a2, a2, -1 - addi a0, a0, 1 - addi a1, a1, 1 - bnez a2, .LBB2_11 -.LBB2_12: - addi sp, sp, 64 - ret -.LBB4_3: - #APP - lb t0, 0(a3) - #NO_APP - mv a2, a4 - sb t0, 0(a5) - addi a5, a5, 1 - addi a4, a4, 1 - addi a3, a3, 1 - bgeu a4, a2, .LBB4_3 - sub a3, a6, a7 - li a4, 4 - bltu a3, a4, .LBB4_7 -.LBB4_5: - addi a6, sp, 8 - li a7, 3 -.LBB4_6: - add a2, a1, t1 - #APP - lw a2, 0(a2) - #NO_APP - add a4, a6, t1 - sb a2, 0(a4) - srli a5, a2, 24 - sb a5, 3(a4) - srli a5, a2, 16 - sb a5, 2(a4) - srli a2, a2, 8 - sb a2, 1(a4) - addi a3, a3, -4 - addi t1, t1, 4 - bltu a7, a3, .LBB4_6 -.LBB4_7: - beqz a3, .LBB4_11 - addi a2, sp, 8 - add a4, a2, t1 - add a1, a1, t1 -.LBB4_9: - #APP - lb a2, 0(a1) - #NO_APP - sb a2, 0(a4) - addi a3, a3, -1 - addi a4, a4, 1 - addi a1, a1, 1 - bnez a3, .LBB4_9 - j .LBB4_11 -.LBB4_10: - addi a2, a1, 30 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 38(sp) - addi a2, a1, 28 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 36(sp) - addi a2, a1, 26 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 34(sp) - addi a2, a1, 24 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 32(sp) - addi a2, a1, 22 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 30(sp) - addi a2, a1, 20 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 28(sp) - addi a2, a1, 18 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 26(sp) - addi a2, a1, 16 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 24(sp) - addi a2, a1, 14 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 22(sp) - addi a2, a1, 12 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 20(sp) - addi a2, a1, 10 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 18(sp) - addi a2, a1, 8 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 16(sp) - addi a2, a1, 6 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 14(sp) - addi a2, a1, 4 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 12(sp) - addi a2, a1, 2 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 10(sp) - #APP - lh a1, 0(a1) - #NO_APP - sh a1, 8(sp) -.LBB4_11: - addi a1, sp, 8 - li a2, 32 - call memcpy@plt - fence r, rw - lw ra, 44(sp) - addi sp, sp, 48 - ret -.LBB6_3: - lbu t0, 0(a4) - mv a3, a2 - #APP - sb t0, 0(a5) - #NO_APP - addi a5, a5, 1 - addi a2, a2, 1 - addi a4, a4, 1 - bgeu a2, a3, .LBB6_3 - sub a2, a6, a7 - li a3, 4 - bltu a2, a3, .LBB6_7 -.LBB6_5: - addi a6, sp, 32 - li a7, 3 -.LBB6_6: - add a5, a6, a1 - lbu t0, 1(a5) - lbu t1, 0(a5) - lbu a3, 3(a5) - lbu a5, 2(a5) - slli a4, t0, 8 - or a4, a4, t1 - slli a3, a3, 8 - or a3, a3, a5 - slli a3, a3, 16 - or a3, a3, a4 - add a4, a0, a1 - #APP - sw a3, 0(a4) - #NO_APP - addi a2, a2, -4 - addi a1, a1, 4 - bltu a7, a2, .LBB6_6 -.LBB6_7: - beqz a2, .LBB6_10 - add a0, a0, a1 - addi a3, sp, 32 - add a1, a1, a3 -.LBB6_9: - lbu a3, 0(a1) - #APP - sb a3, 0(a0) - #NO_APP - addi a2, a2, -1 - addi a0, a0, 1 - addi a1, a1, 1 - bnez a2, .LBB6_9 -.LBB6_10: - addi sp, sp, 64 - ret -.LBB6_11: - lhu a1, 62(sp) - addi a2, a0, 30 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 60(sp) - addi a2, a0, 28 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 58(sp) - addi a2, a0, 26 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 56(sp) - addi a2, a0, 24 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 54(sp) - addi a2, a0, 22 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 52(sp) - addi a2, a0, 20 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 50(sp) - addi a2, a0, 18 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 48(sp) - addi a2, a0, 16 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 46(sp) - addi a2, a0, 14 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 44(sp) - addi a2, a0, 12 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 42(sp) - addi a2, a0, 10 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 40(sp) - addi a2, a0, 8 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 38(sp) - addi a2, a0, 6 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 36(sp) - addi a2, a0, 4 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 34(sp) - addi a2, a0, 2 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 32(sp) - #APP - sh a1, 0(a0) - #NO_APP - addi sp, sp, 64 - ret diff --git a/tests/asm-test/asm/riscv32imc-unknown-none-elf/atomic_memcpy_load_align16 b/tests/asm-test/asm/riscv32imc-unknown-none-elf/atomic_memcpy_load_align16 index 902a286..dc16215 100644 --- a/tests/asm-test/asm/riscv32imc-unknown-none-elf/atomic_memcpy_load_align16 +++ b/tests/asm-test/asm/riscv32imc-unknown-none-elf/atomic_memcpy_load_align16 @@ -1,43 +1,69 @@ asm_test::atomic_memcpy_load_align16::acquire: + addi a2, a1, 28 + addi a3, sp, 28 #APP - lw a6, 0(a2) + lw a2, 0(a2) + sw a2, 0(a3) #NO_APP - addi a3, a1, 24 + addi a2, a1, 24 + addi a3, sp, 24 #APP - lw a7, 0(a3) + lw a2, 0(a2) + sw a2, 0(a3) #NO_APP - addi a4, a1, 20 + addi a2, a1, 20 + addi a3, sp, 20 #APP - lw t0, 0(a4) + lw a2, 0(a2) + sw a2, 0(a3) #NO_APP - addi a5, a1, 16 + addi a2, a1, 16 + addi a3, sp, 16 #APP - lw a5, 0(a5) + lw a2, 0(a2) + sw a2, 0(a3) #NO_APP addi a2, a1, 12 + addi a3, sp, 12 #APP lw a2, 0(a2) + sw a2, 0(a3) #NO_APP - addi a3, a1, 8 + addi a2, a1, 8 + addi a3, sp, 8 #APP - lw a3, 0(a3) + lw a2, 0(a2) + sw a2, 0(a3) #NO_APP - addi a4, a1, 4 + addi a2, a1, 4 + addi a3, sp, 4 #APP - lw a4, 0(a4) + lw a2, 0(a2) + sw a2, 0(a3) #NO_APP + mv a2, sp #APP lw a1, 0(a1) + sw a1, 0(a2) #NO_APP + lw a1, 0(sp) + lw a2, 4(sp) + lw a3, 8(sp) + lw a4, 12(sp) sw a1, 0(a0) - sw a4, 4(a0) + sw a2, 4(a0) sw a3, 8(a0) - sw a2, 12(a0) - sw a5, 16(a0) - sw t0, 20(a0) - sw a7, 24(a0) - sw a6, 28(a0) + sw a4, 12(a0) + lw a1, 16(sp) + lw a2, 20(sp) + lw a3, 24(sp) + lw a4, 28(sp) + sw a1, 16(a0) + sw a2, 20(a0) + sw a3, 24(a0) + sw a4, 28(a0) fence r, rw + addi sp, sp, 32 ret asm_test::atomic_memcpy_load_align16::read_volatile_acquire_fence: lw a7, 24(a1) diff --git a/tests/asm-test/asm/riscv32imc-unknown-none-elf/atomic_memcpy_load_align2 b/tests/asm-test/asm/riscv32imc-unknown-none-elf/atomic_memcpy_load_align2 index d543ae9..41bdd8e 100644 --- a/tests/asm-test/asm/riscv32imc-unknown-none-elf/atomic_memcpy_load_align2 +++ b/tests/asm-test/asm/riscv32imc-unknown-none-elf/atomic_memcpy_load_align2 @@ -1,143 +1,100 @@ asm_test::atomic_memcpy_load_align2::acquire: sw ra, 44(sp) - addi a2, a1, 3 - andi a7, a2, -4 - sub t1, a7, a1 - li a3, 32 - bltu a3, t1, .LBB4_10 - beqz t1, .LBB4_5 - addi a6, a1, 32 - sub a4, a1, a7 - addi a5, sp, 8 - mv a3, a1 -.LBB4_3: - #APP - lb t0, 0(a3) - #NO_APP - mv a2, a4 - sb t0, 0(a5) - addi a5, a5, 1 - addi a4, a4, 1 - addi a3, a3, 1 - bgeu a4, a2, .LBB4_3 - sub a3, a6, a7 - li a4, 4 - bltu a3, a4, .LBB4_7 -.LBB4_5: - addi a6, sp, 8 - li a7, 3 -.LBB4_6: - add a2, a1, t1 - #APP - lw a2, 0(a2) - #NO_APP - add a4, a6, t1 - sb a2, 0(a4) - srli a5, a2, 24 - sb a5, 3(a4) - srli a5, a2, 16 - sb a5, 2(a4) - srli a2, a2, 8 - sb a2, 1(a4) - addi a3, a3, -4 - addi t1, t1, 4 - bltu a7, a3, .LBB4_6 -.LBB4_7: - beqz a3, .LBB4_11 - addi a2, sp, 8 - add a4, a2, t1 - add a1, a1, t1 -.LBB4_9: - #APP - lb a2, 0(a1) - #NO_APP - sb a2, 0(a4) - addi a3, a3, -1 - addi a4, a4, 1 - addi a1, a1, 1 - bnez a3, .LBB4_9 - j .LBB4_11 -.LBB4_10: addi a2, a1, 30 + addi a3, sp, 38 #APP lh a2, 0(a2) + sh a2, 0(a3) #NO_APP - sh a2, 38(sp) addi a2, a1, 28 + addi a3, sp, 36 #APP lh a2, 0(a2) + sh a2, 0(a3) #NO_APP - sh a2, 36(sp) addi a2, a1, 26 + addi a3, sp, 34 #APP lh a2, 0(a2) + sh a2, 0(a3) #NO_APP - sh a2, 34(sp) addi a2, a1, 24 + addi a3, sp, 32 #APP lh a2, 0(a2) + sh a2, 0(a3) #NO_APP - sh a2, 32(sp) addi a2, a1, 22 + addi a3, sp, 30 #APP lh a2, 0(a2) + sh a2, 0(a3) #NO_APP - sh a2, 30(sp) addi a2, a1, 20 + addi a3, sp, 28 #APP lh a2, 0(a2) + sh a2, 0(a3) #NO_APP - sh a2, 28(sp) addi a2, a1, 18 + addi a3, sp, 26 #APP lh a2, 0(a2) + sh a2, 0(a3) #NO_APP - sh a2, 26(sp) addi a2, a1, 16 + addi a3, sp, 24 #APP lh a2, 0(a2) + sh a2, 0(a3) #NO_APP - sh a2, 24(sp) addi a2, a1, 14 + addi a3, sp, 22 #APP lh a2, 0(a2) + sh a2, 0(a3) #NO_APP - sh a2, 22(sp) addi a2, a1, 12 + addi a3, sp, 20 #APP lh a2, 0(a2) + sh a2, 0(a3) #NO_APP - sh a2, 20(sp) addi a2, a1, 10 + addi a3, sp, 18 #APP lh a2, 0(a2) + sh a2, 0(a3) #NO_APP - sh a2, 18(sp) addi a2, a1, 8 + addi a3, sp, 16 #APP lh a2, 0(a2) + sh a2, 0(a3) #NO_APP - sh a2, 16(sp) addi a2, a1, 6 + addi a3, sp, 14 #APP lh a2, 0(a2) + sh a2, 0(a3) #NO_APP - sh a2, 14(sp) addi a2, a1, 4 + addi a3, sp, 12 #APP lh a2, 0(a2) + sh a2, 0(a3) #NO_APP - sh a2, 12(sp) addi a2, a1, 2 + addi a3, sp, 10 #APP lh a2, 0(a2) + sh a2, 0(a3) #NO_APP - sh a2, 10(sp) + addi a2, sp, 8 #APP lh a1, 0(a1) + sh a1, 0(a2) #NO_APP - sh a1, 8(sp) -.LBB4_11: addi a1, sp, 8 li a2, 32 call memcpy@plt @@ -145,140 +102,6 @@ asm_test::atomic_memcpy_load_align2::acquire: lw ra, 44(sp) addi sp, sp, 48 ret -.LBB6_3: - lbu t0, 0(a4) - mv a3, a2 - #APP - sb t0, 0(a5) - #NO_APP - addi a5, a5, 1 - addi a2, a2, 1 - addi a4, a4, 1 - bgeu a2, a3, .LBB6_3 - sub a2, a6, a7 - li a3, 4 - bltu a2, a3, .LBB6_7 -.LBB6_5: - addi a6, sp, 32 - li a7, 3 -.LBB6_6: - add a5, a6, a1 - lbu t0, 1(a5) - lbu t1, 0(a5) - lbu a3, 3(a5) - lbu a5, 2(a5) - slli a4, t0, 8 - or a4, a4, t1 - slli a3, a3, 8 - or a3, a3, a5 - slli a3, a3, 16 - or a3, a3, a4 - add a4, a0, a1 - #APP - sw a3, 0(a4) - #NO_APP - addi a2, a2, -4 - addi a1, a1, 4 - bltu a7, a2, .LBB6_6 -.LBB6_7: - beqz a2, .LBB6_10 - add a0, a0, a1 - addi a3, sp, 32 - add a1, a1, a3 -.LBB6_9: - lbu a3, 0(a1) - #APP - sb a3, 0(a0) - #NO_APP - addi a2, a2, -1 - addi a0, a0, 1 - addi a1, a1, 1 - bnez a2, .LBB6_9 -.LBB6_10: - addi sp, sp, 64 - ret -.LBB6_11: - lhu a1, 62(sp) - addi a2, a0, 30 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 60(sp) - addi a2, a0, 28 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 58(sp) - addi a2, a0, 26 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 56(sp) - addi a2, a0, 24 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 54(sp) - addi a2, a0, 22 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 52(sp) - addi a2, a0, 20 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 50(sp) - addi a2, a0, 18 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 48(sp) - addi a2, a0, 16 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 46(sp) - addi a2, a0, 14 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 44(sp) - addi a2, a0, 12 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 42(sp) - addi a2, a0, 10 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 40(sp) - addi a2, a0, 8 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 38(sp) - addi a2, a0, 6 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 36(sp) - addi a2, a0, 4 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 34(sp) - addi a2, a0, 2 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 32(sp) - #APP - sh a1, 0(a0) - #NO_APP - addi sp, sp, 64 - ret asm_test::atomic_memcpy_load_align2::read_volatile_acquire_fence: sw s0, 12(sp) sw s1, 8(sp) @@ -319,137 +142,3 @@ asm_test::atomic_memcpy_load_align2::read_volatile_acquire_fence: lw s1, 8(sp) addi sp, sp, 16 ret -.LBB6_3: - lbu t0, 0(a4) - mv a3, a2 - #APP - sb t0, 0(a5) - #NO_APP - addi a5, a5, 1 - addi a2, a2, 1 - addi a4, a4, 1 - bgeu a2, a3, .LBB6_3 - sub a2, a6, a7 - li a3, 4 - bltu a2, a3, .LBB6_7 -.LBB6_5: - addi a6, sp, 32 - li a7, 3 -.LBB6_6: - add a5, a6, a1 - lbu t0, 1(a5) - lbu t1, 0(a5) - lbu a3, 3(a5) - lbu a5, 2(a5) - slli a4, t0, 8 - or a4, a4, t1 - slli a3, a3, 8 - or a3, a3, a5 - slli a3, a3, 16 - or a3, a3, a4 - add a4, a0, a1 - #APP - sw a3, 0(a4) - #NO_APP - addi a2, a2, -4 - addi a1, a1, 4 - bltu a7, a2, .LBB6_6 -.LBB6_7: - beqz a2, .LBB6_10 - add a0, a0, a1 - addi a3, sp, 32 - add a1, a1, a3 -.LBB6_9: - lbu a3, 0(a1) - #APP - sb a3, 0(a0) - #NO_APP - addi a2, a2, -1 - addi a0, a0, 1 - addi a1, a1, 1 - bnez a2, .LBB6_9 -.LBB6_10: - addi sp, sp, 64 - ret -.LBB6_11: - lhu a1, 62(sp) - addi a2, a0, 30 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 60(sp) - addi a2, a0, 28 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 58(sp) - addi a2, a0, 26 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 56(sp) - addi a2, a0, 24 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 54(sp) - addi a2, a0, 22 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 52(sp) - addi a2, a0, 20 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 50(sp) - addi a2, a0, 18 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 48(sp) - addi a2, a0, 16 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 46(sp) - addi a2, a0, 14 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 44(sp) - addi a2, a0, 12 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 42(sp) - addi a2, a0, 10 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 40(sp) - addi a2, a0, 8 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 38(sp) - addi a2, a0, 6 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 36(sp) - addi a2, a0, 4 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 34(sp) - addi a2, a0, 2 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 32(sp) - #APP - sh a1, 0(a0) - #NO_APP - addi sp, sp, 64 - ret diff --git a/tests/asm-test/asm/riscv32imc-unknown-none-elf/atomic_memcpy_load_align4 b/tests/asm-test/asm/riscv32imc-unknown-none-elf/atomic_memcpy_load_align4 index 766cf43..8ff9af2 100644 --- a/tests/asm-test/asm/riscv32imc-unknown-none-elf/atomic_memcpy_load_align4 +++ b/tests/asm-test/asm/riscv32imc-unknown-none-elf/atomic_memcpy_load_align4 @@ -1,43 +1,69 @@ asm_test::atomic_memcpy_load_align4::acquire: + addi a2, a1, 28 + addi a3, sp, 28 #APP - lw a6, 0(a2) + lw a2, 0(a2) + sw a2, 0(a3) #NO_APP - addi a3, a1, 24 + addi a2, a1, 24 + addi a3, sp, 24 #APP - lw a7, 0(a3) + lw a2, 0(a2) + sw a2, 0(a3) #NO_APP - addi a4, a1, 20 + addi a2, a1, 20 + addi a3, sp, 20 #APP - lw t0, 0(a4) + lw a2, 0(a2) + sw a2, 0(a3) #NO_APP - addi a5, a1, 16 + addi a2, a1, 16 + addi a3, sp, 16 #APP - lw a5, 0(a5) + lw a2, 0(a2) + sw a2, 0(a3) #NO_APP addi a2, a1, 12 + addi a3, sp, 12 #APP lw a2, 0(a2) + sw a2, 0(a3) #NO_APP - addi a3, a1, 8 + addi a2, a1, 8 + addi a3, sp, 8 #APP - lw a3, 0(a3) + lw a2, 0(a2) + sw a2, 0(a3) #NO_APP - addi a4, a1, 4 + addi a2, a1, 4 + addi a3, sp, 4 #APP - lw a4, 0(a4) + lw a2, 0(a2) + sw a2, 0(a3) #NO_APP + mv a2, sp #APP lw a1, 0(a1) + sw a1, 0(a2) #NO_APP + lw a1, 0(sp) + lw a2, 4(sp) + lw a3, 8(sp) + lw a4, 12(sp) sw a1, 0(a0) - sw a4, 4(a0) + sw a2, 4(a0) sw a3, 8(a0) - sw a2, 12(a0) - sw a5, 16(a0) - sw t0, 20(a0) - sw a7, 24(a0) - sw a6, 28(a0) + sw a4, 12(a0) + lw a1, 16(sp) + lw a2, 20(sp) + lw a3, 24(sp) + lw a4, 28(sp) + sw a1, 16(a0) + sw a2, 20(a0) + sw a3, 24(a0) + sw a4, 28(a0) fence r, rw + addi sp, sp, 32 ret asm_test::atomic_memcpy_load_align4::read_volatile_acquire_fence: lw a7, 24(a1) diff --git a/tests/asm-test/asm/riscv32imc-unknown-none-elf/atomic_memcpy_load_align8 b/tests/asm-test/asm/riscv32imc-unknown-none-elf/atomic_memcpy_load_align8 index 4171c51..a1c06af 100644 --- a/tests/asm-test/asm/riscv32imc-unknown-none-elf/atomic_memcpy_load_align8 +++ b/tests/asm-test/asm/riscv32imc-unknown-none-elf/atomic_memcpy_load_align8 @@ -1,43 +1,69 @@ asm_test::atomic_memcpy_load_align8::acquire: + addi a2, a1, 28 + addi a3, sp, 28 #APP - lw a6, 0(a2) + lw a2, 0(a2) + sw a2, 0(a3) #NO_APP - addi a3, a1, 24 + addi a2, a1, 24 + addi a3, sp, 24 #APP - lw a7, 0(a3) + lw a2, 0(a2) + sw a2, 0(a3) #NO_APP - addi a4, a1, 20 + addi a2, a1, 20 + addi a3, sp, 20 #APP - lw t0, 0(a4) + lw a2, 0(a2) + sw a2, 0(a3) #NO_APP - addi a5, a1, 16 + addi a2, a1, 16 + addi a3, sp, 16 #APP - lw a5, 0(a5) + lw a2, 0(a2) + sw a2, 0(a3) #NO_APP addi a2, a1, 12 + addi a3, sp, 12 #APP lw a2, 0(a2) + sw a2, 0(a3) #NO_APP - addi a3, a1, 8 + addi a2, a1, 8 + addi a3, sp, 8 #APP - lw a3, 0(a3) + lw a2, 0(a2) + sw a2, 0(a3) #NO_APP - addi a4, a1, 4 + addi a2, a1, 4 + addi a3, sp, 4 #APP - lw a4, 0(a4) + lw a2, 0(a2) + sw a2, 0(a3) #NO_APP + mv a2, sp #APP lw a1, 0(a1) + sw a1, 0(a2) #NO_APP + lw a1, 0(sp) + lw a2, 4(sp) + lw a3, 8(sp) + lw a4, 12(sp) sw a1, 0(a0) - sw a4, 4(a0) + sw a2, 4(a0) sw a3, 8(a0) - sw a2, 12(a0) - sw a5, 16(a0) - sw t0, 20(a0) - sw a7, 24(a0) - sw a6, 28(a0) + sw a4, 12(a0) + lw a1, 16(sp) + lw a2, 20(sp) + lw a3, 24(sp) + lw a4, 28(sp) + sw a1, 16(a0) + sw a2, 20(a0) + sw a3, 24(a0) + sw a4, 28(a0) fence r, rw + addi sp, sp, 32 ret asm_test::atomic_memcpy_load_align8::read_volatile_acquire_fence: lw a7, 24(a1) diff --git a/tests/asm-test/asm/riscv32imc-unknown-none-elf/atomic_memcpy_store_align1 b/tests/asm-test/asm/riscv32imc-unknown-none-elf/atomic_memcpy_store_align1 index f25c03e..47c9a47 100644 --- a/tests/asm-test/asm/riscv32imc-unknown-none-elf/atomic_memcpy_store_align1 +++ b/tests/asm-test/asm/riscv32imc-unknown-none-elf/atomic_memcpy_store_align1 @@ -91,510 +91,209 @@ asm_test::atomic_memcpy_store_align1::release: lw a1, 28(sp) lw a2, 24(sp) lw a3, 20(sp) + lw a4, 16(sp) sw a1, 60(sp) sw a2, 56(sp) sw a3, 52(sp) - lw a1, 16(sp) - lw a2, 12(sp) - lw a3, 8(sp) - lw a4, 4(sp) - sw a1, 48(sp) - sw a2, 44(sp) - sw a3, 40(sp) - sw a4, 36(sp) + sw a4, 48(sp) + lw a1, 12(sp) + lw a2, 8(sp) + lw a3, 4(sp) lw a4, 0(sp) - addi a3, a0, 3 - andi a7, a3, -4 - sub a1, a7, a0 - li a5, 33 + sw a1, 44(sp) + sw a2, 40(sp) + sw a3, 36(sp) sw a4, 32(sp) - bgeu a1, a5, .LBB2_5 - beqz a1, .LBB2_6 - addi a6, a0, 32 - sub a3, a0, a7 - addi a4, sp, 32 - mv a5, a0 -.LBB2_3: - lbu t0, 0(a4) - mv a2, a3 - #APP - sb t0, 0(a5) - #NO_APP - addi a5, a5, 1 - addi a3, a3, 1 - addi a4, a4, 1 - bgeu a3, a2, .LBB2_3 - sub a2, a6, a7 - li a3, 4 - bgeu a2, a3, .LBB2_7 - j .LBB2_9 -.LBB2_5: - lbu a1, 32(sp) - #APP - sb a1, 0(a0) - #NO_APP - lbu a1, 33(sp) - addi a2, a0, 1 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 34(sp) - addi a2, a0, 2 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 35(sp) - #APP - sb a1, 0(a3) - #NO_APP - lbu a1, 36(sp) - addi a2, a0, 4 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 37(sp) - addi a2, a0, 5 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 38(sp) - addi a2, a0, 6 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 39(sp) - addi a2, a0, 7 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 40(sp) - addi a2, a0, 8 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 41(sp) - addi a2, a0, 9 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 42(sp) - addi a2, a0, 10 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 43(sp) - addi a2, a0, 11 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 44(sp) - addi a2, a0, 12 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 45(sp) - addi a2, a0, 13 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 46(sp) - addi a2, a0, 14 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 47(sp) - addi a2, a0, 15 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 48(sp) - addi a2, a0, 16 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 49(sp) - addi a2, a0, 17 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 50(sp) - addi a2, a0, 18 + addi a6, sp, 36 + addi a7, sp, 40 + addi t0, sp, 44 + addi t1, sp, 48 + addi a5, sp, 52 + addi a1, sp, 56 + addi a2, sp, 60 + addi a3, a0, 31 + addi a4, sp, 63 #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 51(sp) - addi a2, a0, 19 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 52(sp) - addi a2, a0, 20 - #APP - sb a1, 0(a2) + lb a4, 0(a4) + sb a4, 0(a3) #NO_APP - lbu a1, 53(sp) - addi a2, a0, 21 + addi a3, a0, 30 + addi a4, sp, 62 #APP - sb a1, 0(a2) + lb a4, 0(a4) + sb a4, 0(a3) #NO_APP - lbu a1, 54(sp) - addi a2, a0, 22 + addi a3, a0, 29 + addi a4, sp, 61 #APP - sb a1, 0(a2) + lb a4, 0(a4) + sb a4, 0(a3) #NO_APP - lbu a1, 55(sp) - addi a2, a0, 23 + addi a3, a0, 28 #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 56(sp) - addi a2, a0, 24 - #APP - sb a1, 0(a2) + lb a2, 0(a2) + sb a2, 0(a3) #NO_APP - lbu a1, 57(sp) - addi a2, a0, 25 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 58(sp) - addi a2, a0, 26 - #APP - sb a1, 0(a2) - #NO_APP - lbu a1, 59(sp) addi a2, a0, 27 + addi a3, sp, 59 #APP - sb a1, 0(a2) + lb a3, 0(a3) + sb a3, 0(a2) #NO_APP - lbu a1, 60(sp) - addi a2, a0, 28 + addi a2, a0, 26 + addi a3, sp, 58 #APP - sb a1, 0(a2) + lb a3, 0(a3) + sb a3, 0(a2) #NO_APP - lbu a1, 61(sp) - addi a2, a0, 29 + addi a2, a0, 25 + addi a3, sp, 57 #APP - sb a1, 0(a2) + lb a3, 0(a3) + sb a3, 0(a2) #NO_APP - lbu a1, 62(sp) - addi a2, a0, 30 + addi a2, a0, 24 #APP + lb a1, 0(a1) sb a1, 0(a2) #NO_APP - lbu a1, 63(sp) - addi a0, a0, 31 - #APP - sb a1, 0(a0) - #NO_APP - j .LBB2_12 -.LBB2_6: - li a2, 32 -.LBB2_7: - addi a6, sp, 32 - li a7, 3 -.LBB2_8: - add a5, a6, a1 - lbu t0, 1(a5) - lbu t1, 0(a5) - lbu a3, 3(a5) - lbu a5, 2(a5) - slli a4, t0, 8 - or a4, a4, t1 - slli a3, a3, 8 - or a3, a3, a5 - slli a3, a3, 16 - or a3, a3, a4 - add a4, a0, a1 - #APP - sw a3, 0(a4) - #NO_APP - addi a2, a2, -4 - addi a1, a1, 4 - bltu a7, a2, .LBB2_8 -.LBB2_9: - beqz a2, .LBB2_12 - add a0, a0, a1 - addi a3, sp, 32 - add a1, a1, a3 -.LBB2_11: - lbu a3, 0(a1) - #APP - sb a3, 0(a0) - #NO_APP - addi a2, a2, -1 - addi a0, a0, 1 - addi a1, a1, 1 - bnez a2, .LBB2_11 -.LBB2_12: - addi sp, sp, 64 - ret -.LBB4_3: - #APP - lb t0, 0(a3) - #NO_APP - mv a2, a4 - sb t0, 0(a5) - addi a5, a5, 1 - addi a4, a4, 1 - addi a3, a3, 1 - bgeu a4, a2, .LBB4_3 - sub a3, a6, a7 - li a4, 4 - bltu a3, a4, .LBB4_7 -.LBB4_5: - addi a6, sp, 8 - li a7, 3 -.LBB4_6: - add a2, a1, t1 - #APP - lw a2, 0(a2) - #NO_APP - add a4, a6, t1 - sb a2, 0(a4) - srli a5, a2, 24 - sb a5, 3(a4) - srli a5, a2, 16 - sb a5, 2(a4) - srli a2, a2, 8 - sb a2, 1(a4) - addi a3, a3, -4 - addi t1, t1, 4 - bltu a7, a3, .LBB4_6 -.LBB4_7: - beqz a3, .LBB4_11 - addi a2, sp, 8 - add a4, a2, t1 - add a1, a1, t1 -.LBB4_9: - #APP - lb a2, 0(a1) - #NO_APP - sb a2, 0(a4) - addi a3, a3, -1 - addi a4, a4, 1 - addi a1, a1, 1 - bnez a3, .LBB4_9 - j .LBB4_11 -.LBB4_10: - addi a2, a1, 30 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 38(sp) - addi a2, a1, 28 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 36(sp) - addi a2, a1, 26 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 34(sp) - addi a2, a1, 24 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 32(sp) - addi a2, a1, 22 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 30(sp) - addi a2, a1, 20 + addi a1, a0, 23 + addi a2, sp, 55 #APP - lh a2, 0(a2) + lb a2, 0(a2) + sb a2, 0(a1) #NO_APP - sh a2, 28(sp) - addi a2, a1, 18 + addi a1, a0, 22 + addi a2, sp, 54 #APP - lh a2, 0(a2) + lb a2, 0(a2) + sb a2, 0(a1) #NO_APP - sh a2, 26(sp) - addi a2, a1, 16 + addi a1, a0, 21 + addi a2, sp, 53 #APP - lh a2, 0(a2) + lb a2, 0(a2) + sb a2, 0(a1) #NO_APP - sh a2, 24(sp) - addi a2, a1, 14 + addi a1, a0, 20 #APP - lh a2, 0(a2) + lb a2, 0(a5) + sb a2, 0(a1) #NO_APP - sh a2, 22(sp) - addi a2, a1, 12 + addi a1, a0, 19 + addi a2, sp, 51 #APP - lh a2, 0(a2) + lb a2, 0(a2) + sb a2, 0(a1) #NO_APP - sh a2, 20(sp) - addi a2, a1, 10 + addi a1, a0, 18 + addi a2, sp, 50 #APP - lh a2, 0(a2) + lb a2, 0(a2) + sb a2, 0(a1) #NO_APP - sh a2, 18(sp) - addi a2, a1, 8 + addi a1, a0, 17 + addi a2, sp, 49 #APP - lh a2, 0(a2) + lb a2, 0(a2) + sb a2, 0(a1) #NO_APP - sh a2, 16(sp) - addi a2, a1, 6 + addi a1, a0, 16 #APP - lh a2, 0(a2) + lb a2, 0(t1) + sb a2, 0(a1) #NO_APP - sh a2, 14(sp) - addi a2, a1, 4 + addi a1, a0, 15 + addi a2, sp, 47 #APP - lh a2, 0(a2) + lb a2, 0(a2) + sb a2, 0(a1) #NO_APP - sh a2, 12(sp) - addi a2, a1, 2 + addi a1, a0, 14 + addi a2, sp, 46 #APP - lh a2, 0(a2) + lb a2, 0(a2) + sb a2, 0(a1) #NO_APP - sh a2, 10(sp) + addi a1, a0, 13 + addi a2, sp, 45 #APP - lh a1, 0(a1) + lb a2, 0(a2) + sb a2, 0(a1) #NO_APP - sh a1, 8(sp) -.LBB4_11: - addi a1, sp, 8 - li a2, 32 - call memcpy@plt - fence r, rw - lw ra, 44(sp) - addi sp, sp, 48 - ret -.LBB6_3: - lbu t0, 0(a4) - mv a3, a2 - #APP - sb t0, 0(a5) - #NO_APP - addi a5, a5, 1 - addi a2, a2, 1 - addi a4, a4, 1 - bgeu a2, a3, .LBB6_3 - sub a2, a6, a7 - li a3, 4 - bltu a2, a3, .LBB6_7 -.LBB6_5: - addi a6, sp, 32 - li a7, 3 -.LBB6_6: - add a5, a6, a1 - lbu t0, 1(a5) - lbu t1, 0(a5) - lbu a3, 3(a5) - lbu a5, 2(a5) - slli a4, t0, 8 - or a4, a4, t1 - slli a3, a3, 8 - or a3, a3, a5 - slli a3, a3, 16 - or a3, a3, a4 - add a4, a0, a1 - #APP - sw a3, 0(a4) - #NO_APP - addi a2, a2, -4 - addi a1, a1, 4 - bltu a7, a2, .LBB6_6 -.LBB6_7: - beqz a2, .LBB6_10 - add a0, a0, a1 - addi a3, sp, 32 - add a1, a1, a3 -.LBB6_9: - lbu a3, 0(a1) + addi a1, a0, 12 #APP - sb a3, 0(a0) + lb a2, 0(t0) + sb a2, 0(a1) #NO_APP - addi a2, a2, -1 - addi a0, a0, 1 - addi a1, a1, 1 - bnez a2, .LBB6_9 -.LBB6_10: - addi sp, sp, 64 - ret -.LBB6_11: - lhu a1, 62(sp) - addi a2, a0, 30 + addi a1, a0, 11 + addi a2, sp, 43 #APP - sh a1, 0(a2) + lb a2, 0(a2) + sb a2, 0(a1) #NO_APP - lhu a1, 60(sp) - addi a2, a0, 28 + addi a1, a0, 10 + addi a2, sp, 42 #APP - sh a1, 0(a2) + lb a2, 0(a2) + sb a2, 0(a1) #NO_APP - lhu a1, 58(sp) - addi a2, a0, 26 + addi a1, a0, 9 + addi a2, sp, 41 #APP - sh a1, 0(a2) + lb a2, 0(a2) + sb a2, 0(a1) #NO_APP - lhu a1, 56(sp) - addi a2, a0, 24 + addi a1, a0, 8 #APP - sh a1, 0(a2) + lb a2, 0(a7) + sb a2, 0(a1) #NO_APP - lhu a1, 54(sp) - addi a2, a0, 22 + addi a1, a0, 7 + addi a2, sp, 39 #APP - sh a1, 0(a2) + lb a2, 0(a2) + sb a2, 0(a1) #NO_APP - lhu a1, 52(sp) - addi a2, a0, 20 + addi a1, a0, 6 + addi a2, sp, 38 #APP - sh a1, 0(a2) + lb a2, 0(a2) + sb a2, 0(a1) #NO_APP - lhu a1, 50(sp) - addi a2, a0, 18 + addi a1, a0, 5 + addi a2, sp, 37 #APP - sh a1, 0(a2) + lb a2, 0(a2) + sb a2, 0(a1) #NO_APP - lhu a1, 48(sp) - addi a2, a0, 16 + addi a1, a0, 4 #APP - sh a1, 0(a2) + lb a2, 0(a6) + sb a2, 0(a1) #NO_APP - lhu a1, 46(sp) - addi a2, a0, 14 + addi a1, a0, 3 + addi a2, sp, 35 #APP - sh a1, 0(a2) + lb a2, 0(a2) + sb a2, 0(a1) #NO_APP - lhu a1, 44(sp) - addi a2, a0, 12 + addi a1, a0, 2 + addi a2, sp, 34 #APP - sh a1, 0(a2) + lb a2, 0(a2) + sb a2, 0(a1) #NO_APP - lhu a1, 42(sp) - addi a2, a0, 10 + addi a1, a0, 1 + addi a2, sp, 33 #APP - sh a1, 0(a2) + lb a2, 0(a2) + sb a2, 0(a1) #NO_APP - lhu a1, 40(sp) - addi a2, a0, 8 + addi a1, sp, 32 #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 38(sp) - addi a2, a0, 6 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 36(sp) - addi a2, a0, 4 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 34(sp) - addi a2, a0, 2 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 32(sp) - #APP - sh a1, 0(a0) + lb a1, 0(a1) + sb a1, 0(a0) #NO_APP addi sp, sp, 64 ret @@ -695,272 +394,3 @@ asm_test::atomic_memcpy_store_align1::write_volatile_release_fence: lw ra, 44(sp) addi sp, sp, 48 ret -.LBB4_3: - #APP - lb t0, 0(a3) - #NO_APP - mv a2, a4 - sb t0, 0(a5) - addi a5, a5, 1 - addi a4, a4, 1 - addi a3, a3, 1 - bgeu a4, a2, .LBB4_3 - sub a3, a6, a7 - li a4, 4 - bltu a3, a4, .LBB4_7 -.LBB4_5: - addi a6, sp, 8 - li a7, 3 -.LBB4_6: - add a2, a1, t1 - #APP - lw a2, 0(a2) - #NO_APP - add a4, a6, t1 - sb a2, 0(a4) - srli a5, a2, 24 - sb a5, 3(a4) - srli a5, a2, 16 - sb a5, 2(a4) - srli a2, a2, 8 - sb a2, 1(a4) - addi a3, a3, -4 - addi t1, t1, 4 - bltu a7, a3, .LBB4_6 -.LBB4_7: - beqz a3, .LBB4_11 - addi a2, sp, 8 - add a4, a2, t1 - add a1, a1, t1 -.LBB4_9: - #APP - lb a2, 0(a1) - #NO_APP - sb a2, 0(a4) - addi a3, a3, -1 - addi a4, a4, 1 - addi a1, a1, 1 - bnez a3, .LBB4_9 - j .LBB4_11 -.LBB4_10: - addi a2, a1, 30 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 38(sp) - addi a2, a1, 28 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 36(sp) - addi a2, a1, 26 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 34(sp) - addi a2, a1, 24 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 32(sp) - addi a2, a1, 22 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 30(sp) - addi a2, a1, 20 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 28(sp) - addi a2, a1, 18 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 26(sp) - addi a2, a1, 16 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 24(sp) - addi a2, a1, 14 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 22(sp) - addi a2, a1, 12 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 20(sp) - addi a2, a1, 10 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 18(sp) - addi a2, a1, 8 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 16(sp) - addi a2, a1, 6 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 14(sp) - addi a2, a1, 4 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 12(sp) - addi a2, a1, 2 - #APP - lh a2, 0(a2) - #NO_APP - sh a2, 10(sp) - #APP - lh a1, 0(a1) - #NO_APP - sh a1, 8(sp) -.LBB4_11: - addi a1, sp, 8 - li a2, 32 - call memcpy@plt - fence r, rw - lw ra, 44(sp) - addi sp, sp, 48 - ret -.LBB6_3: - lbu t0, 0(a4) - mv a3, a2 - #APP - sb t0, 0(a5) - #NO_APP - addi a5, a5, 1 - addi a2, a2, 1 - addi a4, a4, 1 - bgeu a2, a3, .LBB6_3 - sub a2, a6, a7 - li a3, 4 - bltu a2, a3, .LBB6_7 -.LBB6_5: - addi a6, sp, 32 - li a7, 3 -.LBB6_6: - add a5, a6, a1 - lbu t0, 1(a5) - lbu t1, 0(a5) - lbu a3, 3(a5) - lbu a5, 2(a5) - slli a4, t0, 8 - or a4, a4, t1 - slli a3, a3, 8 - or a3, a3, a5 - slli a3, a3, 16 - or a3, a3, a4 - add a4, a0, a1 - #APP - sw a3, 0(a4) - #NO_APP - addi a2, a2, -4 - addi a1, a1, 4 - bltu a7, a2, .LBB6_6 -.LBB6_7: - beqz a2, .LBB6_10 - add a0, a0, a1 - addi a3, sp, 32 - add a1, a1, a3 -.LBB6_9: - lbu a3, 0(a1) - #APP - sb a3, 0(a0) - #NO_APP - addi a2, a2, -1 - addi a0, a0, 1 - addi a1, a1, 1 - bnez a2, .LBB6_9 -.LBB6_10: - addi sp, sp, 64 - ret -.LBB6_11: - lhu a1, 62(sp) - addi a2, a0, 30 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 60(sp) - addi a2, a0, 28 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 58(sp) - addi a2, a0, 26 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 56(sp) - addi a2, a0, 24 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 54(sp) - addi a2, a0, 22 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 52(sp) - addi a2, a0, 20 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 50(sp) - addi a2, a0, 18 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 48(sp) - addi a2, a0, 16 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 46(sp) - addi a2, a0, 14 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 44(sp) - addi a2, a0, 12 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 42(sp) - addi a2, a0, 10 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 40(sp) - addi a2, a0, 8 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 38(sp) - addi a2, a0, 6 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 36(sp) - addi a2, a0, 4 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 34(sp) - addi a2, a0, 2 - #APP - sh a1, 0(a2) - #NO_APP - lhu a1, 32(sp) - #APP - sh a1, 0(a0) - #NO_APP - addi sp, sp, 64 - ret diff --git a/tests/asm-test/asm/riscv32imc-unknown-none-elf/atomic_memcpy_store_align16 b/tests/asm-test/asm/riscv32imc-unknown-none-elf/atomic_memcpy_store_align16 index 09106dc..312a7c5 100644 --- a/tests/asm-test/asm/riscv32imc-unknown-none-elf/atomic_memcpy_store_align16 +++ b/tests/asm-test/asm/riscv32imc-unknown-none-elf/atomic_memcpy_store_align16 @@ -1,43 +1,85 @@ asm_test::atomic_memcpy_store_align16::release: - lw a7, 4(a1) - lw t0, 8(a1) - lw t1, 12(a1) - lw a2, 16(a1) - lw a3, 20(a1) - lw a4, 28(a1) - lw a1, 24(a1) + lw a2, 28(a1) + lw a3, 24(a1) + lw a4, 20(a1) + lw a5, 16(a1) + sw a2, 28(sp) + sw a3, 24(sp) + sw a4, 20(sp) + sw a5, 16(sp) + lw a2, 12(a1) + lw a3, 8(a1) + lw a4, 4(a1) + lw a1, 0(a1) + sw a2, 12(sp) + sw a3, 8(sp) + sw a4, 4(sp) + sw a1, 0(sp) fence rw, w - addi a5, a0, 28 + lw a1, 28(sp) + lw a2, 24(sp) + lw a3, 20(sp) + lw a4, 16(sp) + sw a1, 60(sp) + sw a2, 56(sp) + sw a3, 52(sp) + sw a4, 48(sp) + lw a1, 12(sp) + lw a2, 8(sp) + lw a3, 4(sp) + lw a4, 0(sp) + sw a1, 44(sp) + sw a2, 40(sp) + sw a3, 36(sp) + sw a4, 32(sp) + addi a6, sp, 36 + addi a7, sp, 40 + addi t0, sp, 44 + addi a4, sp, 48 + addi a5, sp, 52 + addi a1, sp, 56 + addi a2, sp, 60 + addi a3, a0, 28 #APP - sw a4, 0(a5) + lw a2, 0(a2) + sw a2, 0(a3) #NO_APP - addi a4, a0, 24 + addi a2, a0, 24 #APP - sw a1, 0(a4) + lw a1, 0(a1) + sw a1, 0(a2) #NO_APP addi a1, a0, 20 #APP - sw a3, 0(a1) + lw a2, 0(a5) + sw a2, 0(a1) #NO_APP addi a1, a0, 16 #APP + lw a2, 0(a4) sw a2, 0(a1) #NO_APP addi a1, a0, 12 #APP - sw t1, 0(a1) + lw a2, 0(t0) + sw a2, 0(a1) #NO_APP addi a1, a0, 8 #APP - sw t0, 0(a1) + lw a2, 0(a7) + sw a2, 0(a1) #NO_APP addi a1, a0, 4 #APP - sw a7, 0(a1) + lw a2, 0(a6) + sw a2, 0(a1) #NO_APP + addi a1, sp, 32 #APP - sw a6, 0(a0) + lw a1, 0(a1) + sw a1, 0(a0) #NO_APP + addi sp, sp, 64 ret asm_test::atomic_memcpy_store_align16::write_volatile_release_fence: fence rw, w diff --git a/tests/asm-test/asm/riscv32imc-unknown-none-elf/atomic_memcpy_store_align2 b/tests/asm-test/asm/riscv32imc-unknown-none-elf/atomic_memcpy_store_align2 index 3795b7e..de653a7 100644 --- a/tests/asm-test/asm/riscv32imc-unknown-none-elf/atomic_memcpy_store_align2 +++ b/tests/asm-test/asm/riscv32imc-unknown-none-elf/atomic_memcpy_store_align2 @@ -43,159 +43,112 @@ asm_test::atomic_memcpy_store_align2::release: lw a1, 28(sp) lw a2, 24(sp) lw a3, 20(sp) + lw a4, 16(sp) sw a1, 60(sp) sw a2, 56(sp) sw a3, 52(sp) - lw a1, 16(sp) - lw a2, 12(sp) - lw a3, 8(sp) - lw a4, 4(sp) - sw a1, 48(sp) - sw a2, 44(sp) - sw a3, 40(sp) - sw a4, 36(sp) + sw a4, 48(sp) + lw a1, 12(sp) + lw a2, 8(sp) + lw a3, 4(sp) lw a4, 0(sp) - addi a1, a0, 3 - andi a7, a1, -4 - sub a1, a7, a0 - li a2, 32 + sw a1, 44(sp) + sw a2, 40(sp) + sw a3, 36(sp) sw a4, 32(sp) - bltu a2, a1, .LBB6_11 - beqz a1, .LBB6_5 - addi a6, a0, 32 - sub a2, a0, a7 - addi a4, sp, 32 - mv a5, a0 -.LBB6_3: - lbu t0, 0(a4) - mv a3, a2 - #APP - sb t0, 0(a5) - #NO_APP - addi a5, a5, 1 - addi a2, a2, 1 - addi a4, a4, 1 - bgeu a2, a3, .LBB6_3 - sub a2, a6, a7 - li a3, 4 - bltu a2, a3, .LBB6_7 -.LBB6_5: - addi a6, sp, 32 - li a7, 3 -.LBB6_6: - add a5, a6, a1 - lbu t0, 1(a5) - lbu t1, 0(a5) - lbu a3, 3(a5) - lbu a5, 2(a5) - slli a4, t0, 8 - or a4, a4, t1 - slli a3, a3, 8 - or a3, a3, a5 - slli a3, a3, 16 - or a3, a3, a4 - add a4, a0, a1 - #APP - sw a3, 0(a4) - #NO_APP - addi a2, a2, -4 - addi a1, a1, 4 - bltu a7, a2, .LBB6_6 -.LBB6_7: - beqz a2, .LBB6_10 - add a0, a0, a1 - addi a3, sp, 32 - add a1, a1, a3 -.LBB6_9: - lbu a3, 0(a1) - #APP - sb a3, 0(a0) - #NO_APP - addi a2, a2, -1 - addi a0, a0, 1 - addi a1, a1, 1 - bnez a2, .LBB6_9 -.LBB6_10: - addi sp, sp, 64 - ret -.LBB6_11: - lhu a1, 62(sp) - addi a2, a0, 30 + addi a6, sp, 36 + addi a7, sp, 40 + addi t0, sp, 44 + addi t1, sp, 48 + addi a5, sp, 52 + addi a1, sp, 56 + addi a2, sp, 60 + addi a3, a0, 30 + addi a4, sp, 62 #APP - sh a1, 0(a2) + lh a4, 0(a4) + sh a4, 0(a3) #NO_APP - lhu a1, 60(sp) - addi a2, a0, 28 + addi a3, a0, 28 #APP - sh a1, 0(a2) + lh a2, 0(a2) + sh a2, 0(a3) #NO_APP - lhu a1, 58(sp) addi a2, a0, 26 + addi a3, sp, 58 #APP - sh a1, 0(a2) + lh a3, 0(a3) + sh a3, 0(a2) #NO_APP - lhu a1, 56(sp) addi a2, a0, 24 #APP + lh a1, 0(a1) sh a1, 0(a2) #NO_APP - lhu a1, 54(sp) - addi a2, a0, 22 + addi a1, a0, 22 + addi a2, sp, 54 #APP - sh a1, 0(a2) + lh a2, 0(a2) + sh a2, 0(a1) #NO_APP - lhu a1, 52(sp) - addi a2, a0, 20 + addi a1, a0, 20 #APP - sh a1, 0(a2) + lh a2, 0(a5) + sh a2, 0(a1) #NO_APP - lhu a1, 50(sp) - addi a2, a0, 18 + addi a1, a0, 18 + addi a2, sp, 50 #APP - sh a1, 0(a2) + lh a2, 0(a2) + sh a2, 0(a1) #NO_APP - lhu a1, 48(sp) - addi a2, a0, 16 + addi a1, a0, 16 #APP - sh a1, 0(a2) + lh a2, 0(t1) + sh a2, 0(a1) #NO_APP - lhu a1, 46(sp) - addi a2, a0, 14 + addi a1, a0, 14 + addi a2, sp, 46 #APP - sh a1, 0(a2) + lh a2, 0(a2) + sh a2, 0(a1) #NO_APP - lhu a1, 44(sp) - addi a2, a0, 12 + addi a1, a0, 12 #APP - sh a1, 0(a2) + lh a2, 0(t0) + sh a2, 0(a1) #NO_APP - lhu a1, 42(sp) - addi a2, a0, 10 + addi a1, a0, 10 + addi a2, sp, 42 #APP - sh a1, 0(a2) + lh a2, 0(a2) + sh a2, 0(a1) #NO_APP - lhu a1, 40(sp) - addi a2, a0, 8 + addi a1, a0, 8 #APP - sh a1, 0(a2) + lh a2, 0(a7) + sh a2, 0(a1) #NO_APP - lhu a1, 38(sp) - addi a2, a0, 6 + addi a1, a0, 6 + addi a2, sp, 38 #APP - sh a1, 0(a2) + lh a2, 0(a2) + sh a2, 0(a1) #NO_APP - lhu a1, 36(sp) - addi a2, a0, 4 + addi a1, a0, 4 #APP - sh a1, 0(a2) + lh a2, 0(a6) + sh a2, 0(a1) #NO_APP - lhu a1, 34(sp) - addi a2, a0, 2 + addi a1, a0, 2 + addi a2, sp, 34 #APP - sh a1, 0(a2) + lh a2, 0(a2) + sh a2, 0(a1) #NO_APP - lhu a1, 32(sp) + addi a1, sp, 32 #APP + lh a1, 0(a1) sh a1, 0(a0) #NO_APP addi sp, sp, 64 diff --git a/tests/asm-test/asm/riscv32imc-unknown-none-elf/atomic_memcpy_store_align4 b/tests/asm-test/asm/riscv32imc-unknown-none-elf/atomic_memcpy_store_align4 index 60a831e..c61939a 100644 --- a/tests/asm-test/asm/riscv32imc-unknown-none-elf/atomic_memcpy_store_align4 +++ b/tests/asm-test/asm/riscv32imc-unknown-none-elf/atomic_memcpy_store_align4 @@ -1,43 +1,85 @@ asm_test::atomic_memcpy_store_align4::release: - lw a7, 4(a1) - lw t0, 8(a1) - lw t1, 12(a1) - lw a2, 16(a1) - lw a3, 20(a1) - lw a4, 28(a1) - lw a1, 24(a1) + lw a2, 28(a1) + lw a3, 24(a1) + lw a4, 20(a1) + lw a5, 16(a1) + sw a2, 28(sp) + sw a3, 24(sp) + sw a4, 20(sp) + sw a5, 16(sp) + lw a2, 12(a1) + lw a3, 8(a1) + lw a4, 4(a1) + lw a1, 0(a1) + sw a2, 12(sp) + sw a3, 8(sp) + sw a4, 4(sp) + sw a1, 0(sp) fence rw, w - addi a5, a0, 28 + lw a1, 28(sp) + lw a2, 24(sp) + lw a3, 20(sp) + lw a4, 16(sp) + sw a1, 60(sp) + sw a2, 56(sp) + sw a3, 52(sp) + sw a4, 48(sp) + lw a1, 12(sp) + lw a2, 8(sp) + lw a3, 4(sp) + lw a4, 0(sp) + sw a1, 44(sp) + sw a2, 40(sp) + sw a3, 36(sp) + sw a4, 32(sp) + addi a6, sp, 36 + addi a7, sp, 40 + addi t0, sp, 44 + addi a4, sp, 48 + addi a5, sp, 52 + addi a1, sp, 56 + addi a2, sp, 60 + addi a3, a0, 28 #APP - sw a4, 0(a5) + lw a2, 0(a2) + sw a2, 0(a3) #NO_APP - addi a4, a0, 24 + addi a2, a0, 24 #APP - sw a1, 0(a4) + lw a1, 0(a1) + sw a1, 0(a2) #NO_APP addi a1, a0, 20 #APP - sw a3, 0(a1) + lw a2, 0(a5) + sw a2, 0(a1) #NO_APP addi a1, a0, 16 #APP + lw a2, 0(a4) sw a2, 0(a1) #NO_APP addi a1, a0, 12 #APP - sw t1, 0(a1) + lw a2, 0(t0) + sw a2, 0(a1) #NO_APP addi a1, a0, 8 #APP - sw t0, 0(a1) + lw a2, 0(a7) + sw a2, 0(a1) #NO_APP addi a1, a0, 4 #APP - sw a7, 0(a1) + lw a2, 0(a6) + sw a2, 0(a1) #NO_APP + addi a1, sp, 32 #APP - sw a6, 0(a0) + lw a1, 0(a1) + sw a1, 0(a0) #NO_APP + addi sp, sp, 64 ret asm_test::atomic_memcpy_store_align4::write_volatile_release_fence: fence rw, w diff --git a/tests/asm-test/asm/riscv32imc-unknown-none-elf/atomic_memcpy_store_align8 b/tests/asm-test/asm/riscv32imc-unknown-none-elf/atomic_memcpy_store_align8 index c267b20..79b6bca 100644 --- a/tests/asm-test/asm/riscv32imc-unknown-none-elf/atomic_memcpy_store_align8 +++ b/tests/asm-test/asm/riscv32imc-unknown-none-elf/atomic_memcpy_store_align8 @@ -1,43 +1,85 @@ asm_test::atomic_memcpy_store_align8::release: - lw a7, 4(a1) - lw t0, 8(a1) - lw t1, 12(a1) - lw a2, 16(a1) - lw a3, 20(a1) - lw a4, 28(a1) - lw a1, 24(a1) + lw a2, 28(a1) + lw a3, 24(a1) + lw a4, 20(a1) + lw a5, 16(a1) + sw a2, 28(sp) + sw a3, 24(sp) + sw a4, 20(sp) + sw a5, 16(sp) + lw a2, 12(a1) + lw a3, 8(a1) + lw a4, 4(a1) + lw a1, 0(a1) + sw a2, 12(sp) + sw a3, 8(sp) + sw a4, 4(sp) + sw a1, 0(sp) fence rw, w - addi a5, a0, 28 + lw a1, 28(sp) + lw a2, 24(sp) + lw a3, 20(sp) + lw a4, 16(sp) + sw a1, 60(sp) + sw a2, 56(sp) + sw a3, 52(sp) + sw a4, 48(sp) + lw a1, 12(sp) + lw a2, 8(sp) + lw a3, 4(sp) + lw a4, 0(sp) + sw a1, 44(sp) + sw a2, 40(sp) + sw a3, 36(sp) + sw a4, 32(sp) + addi a6, sp, 36 + addi a7, sp, 40 + addi t0, sp, 44 + addi a4, sp, 48 + addi a5, sp, 52 + addi a1, sp, 56 + addi a2, sp, 60 + addi a3, a0, 28 #APP - sw a4, 0(a5) + lw a2, 0(a2) + sw a2, 0(a3) #NO_APP - addi a4, a0, 24 + addi a2, a0, 24 #APP - sw a1, 0(a4) + lw a1, 0(a1) + sw a1, 0(a2) #NO_APP addi a1, a0, 20 #APP - sw a3, 0(a1) + lw a2, 0(a5) + sw a2, 0(a1) #NO_APP addi a1, a0, 16 #APP + lw a2, 0(a4) sw a2, 0(a1) #NO_APP addi a1, a0, 12 #APP - sw t1, 0(a1) + lw a2, 0(t0) + sw a2, 0(a1) #NO_APP addi a1, a0, 8 #APP - sw t0, 0(a1) + lw a2, 0(a7) + sw a2, 0(a1) #NO_APP addi a1, a0, 4 #APP - sw a7, 0(a1) + lw a2, 0(a6) + sw a2, 0(a1) #NO_APP + addi a1, sp, 32 #APP - sw a6, 0(a0) + lw a1, 0(a1) + sw a1, 0(a0) #NO_APP + addi sp, sp, 64 ret asm_test::atomic_memcpy_store_align8::write_volatile_release_fence: fence rw, w diff --git a/tests/asm-test/asm/riscv64gc-unknown-linux-gnu/atomic_memcpy_load_align1 b/tests/asm-test/asm/riscv64gc-unknown-linux-gnu/atomic_memcpy_load_align1 index fee597f..b604237 100644 --- a/tests/asm-test/asm/riscv64gc-unknown-linux-gnu/atomic_memcpy_load_align1 +++ b/tests/asm-test/asm/riscv64gc-unknown-linux-gnu/atomic_memcpy_load_align1 @@ -1,81 +1,18 @@ asm_test::atomic_memcpy_load_align1::acquire: addi sp, sp, -80 sd ra, 72(sp) - addi a2, a1, 7 - andi a7, a2, -8 - sub t1, a7, a1 - li a3, 65 - bgeu t1, a3, .LBB0_5 - beqz t1, .LBB0_7 - addi a6, a1, 64 - sub a3, a1, a7 - addi a5, sp, 8 - mv a4, a1 -.LBB0_3: - lb t0, 0(a4) - mv a2, a3 - sb t0, 0(a5) - addi a5, a5, 1 - addi a3, a3, 1 - addi a4, a4, 1 - bgeu a3, a2, .LBB0_3 - sub a3, a6, a7 - li a4, 8 - bgeu a3, a4, .LBB0_8 - j .LBB0_10 -.LBB0_5: - li a4, 0 + li a2, 63 addi a6, sp, 8 - li a3, 64 -.LBB0_6: - add a5, a1, a4 + li a4, -1 +.LBB0_1: + add a5, a1, a2 + add a3, a6, a2 + #APP lb a5, 0(a5) - addi a2, a4, 1 - add a4, a4, a6 - sb a5, 0(a4) - mv a4, a2 - bne a2, a3, .LBB0_6 - j .LBB0_13 -.LBB0_7: - li a3, 64 -.LBB0_8: - addi a6, sp, 8 - li a7, 7 -.LBB0_9: - add a2, a1, t1 - ld a2, 0(a2) - add a4, a6, t1 - sb a2, 0(a4) - srli a5, a2, 56 - sb a5, 7(a4) - srli a5, a2, 48 - sb a5, 6(a4) - srli a5, a2, 40 - sb a5, 5(a4) - srli a5, a2, 32 - sb a5, 4(a4) - srli a5, a2, 24 - sb a5, 3(a4) - srli a5, a2, 16 - sb a5, 2(a4) - srli a2, a2, 8 - sb a2, 1(a4) - addi a3, a3, -8 - addi t1, t1, 8 - bltu a7, a3, .LBB0_9 -.LBB0_10: - beqz a3, .LBB0_13 - addi a2, sp, 8 - add a4, a2, t1 - add a1, a1, t1 -.LBB0_12: - lb a2, 0(a1) - sb a2, 0(a4) - addi a3, a3, -1 - addi a4, a4, 1 - addi a1, a1, 1 - bnez a3, .LBB0_12 -.LBB0_13: + sb a5, 0(a3) + #NO_APP + addi a2, a2, -1 + bne a2, a4, .LBB0_1 addi a1, sp, 8 li a2, 64 call memcpy@plt diff --git a/tests/asm-test/asm/riscv64gc-unknown-linux-gnu/atomic_memcpy_load_align16 b/tests/asm-test/asm/riscv64gc-unknown-linux-gnu/atomic_memcpy_load_align16 index 6012e7e..94507f9 100644 --- a/tests/asm-test/asm/riscv64gc-unknown-linux-gnu/atomic_memcpy_load_align16 +++ b/tests/asm-test/asm/riscv64gc-unknown-linux-gnu/atomic_memcpy_load_align16 @@ -1,21 +1,70 @@ asm_test::atomic_memcpy_load_align16::acquire: - ld a6, 56(a1) - ld a7, 48(a1) - ld t0, 40(a1) - ld a5, 32(a1) - ld a2, 24(a1) - ld a3, 16(a1) - ld a4, 8(a1) + addi sp, sp, -64 + addi a2, a1, 56 + addi a3, sp, 56 + #APP + ld a2, 0(a2) + sd a2, 0(a3) + #NO_APP + addi a2, a1, 48 + addi a3, sp, 48 + #APP + ld a2, 0(a2) + sd a2, 0(a3) + #NO_APP + addi a2, a1, 40 + addi a3, sp, 40 + #APP + ld a2, 0(a2) + sd a2, 0(a3) + #NO_APP + addi a2, a1, 32 + addi a3, sp, 32 + #APP + ld a2, 0(a2) + sd a2, 0(a3) + #NO_APP + addi a2, a1, 24 + addi a3, sp, 24 + #APP + ld a2, 0(a2) + sd a2, 0(a3) + #NO_APP + addi a2, a1, 16 + addi a3, sp, 16 + #APP + ld a2, 0(a2) + sd a2, 0(a3) + #NO_APP + addi a2, a1, 8 + addi a3, sp, 8 + #APP + ld a2, 0(a2) + sd a2, 0(a3) + #NO_APP + mv a2, sp + #APP ld a1, 0(a1) + sd a1, 0(a2) + #NO_APP + ld a1, 0(sp) + ld a2, 8(sp) + ld a3, 16(sp) + ld a4, 24(sp) sd a1, 0(a0) - sd a4, 8(a0) + sd a2, 8(a0) sd a3, 16(a0) - sd a2, 24(a0) - sd a5, 32(a0) - sd t0, 40(a0) - sd a7, 48(a0) - sd a6, 56(a0) + sd a4, 24(a0) + ld a1, 32(sp) + ld a2, 40(sp) + ld a3, 48(sp) + ld a4, 56(sp) + sd a1, 32(a0) + sd a2, 40(a0) + sd a3, 48(a0) + sd a4, 56(a0) fence r, rw + addi sp, sp, 64 ret asm_test::atomic_memcpy_load_align16::read_volatile_acquire_fence: ld a6, 56(a1) diff --git a/tests/asm-test/asm/riscv64gc-unknown-linux-gnu/atomic_memcpy_load_align2 b/tests/asm-test/asm/riscv64gc-unknown-linux-gnu/atomic_memcpy_load_align2 index d7be9e2..8bc72eb 100644 --- a/tests/asm-test/asm/riscv64gc-unknown-linux-gnu/atomic_memcpy_load_align2 +++ b/tests/asm-test/asm/riscv64gc-unknown-linux-gnu/atomic_memcpy_load_align2 @@ -1,131 +1,197 @@ asm_test::atomic_memcpy_load_align2::acquire: addi sp, sp, -80 sd ra, 72(sp) - addi a2, a1, 7 - andi a7, a2, -8 - sub t1, a7, a1 - li a3, 64 - bltu a3, t1, .LBB4_10 - beqz t1, .LBB4_5 - addi a6, a1, 64 - sub a4, a1, a7 - addi a5, sp, 8 - mv a3, a1 -.LBB4_3: - lb t0, 0(a3) - mv a2, a4 - sb t0, 0(a5) - addi a5, a5, 1 - addi a4, a4, 1 - addi a3, a3, 1 - bgeu a4, a2, .LBB4_3 - sub a3, a6, a7 - li a4, 8 - bltu a3, a4, .LBB4_7 -.LBB4_5: - addi a6, sp, 8 - li a7, 7 -.LBB4_6: - add a2, a1, t1 - ld a2, 0(a2) - add a4, a6, t1 - sb a2, 0(a4) - srli a5, a2, 56 - sb a5, 7(a4) - srli a5, a2, 48 - sb a5, 6(a4) - srli a5, a2, 40 - sb a5, 5(a4) - srli a5, a2, 32 - sb a5, 4(a4) - srli a5, a2, 24 - sb a5, 3(a4) - srli a5, a2, 16 - sb a5, 2(a4) - srli a2, a2, 8 - sb a2, 1(a4) - addi a3, a3, -8 - addi t1, t1, 8 - bltu a7, a3, .LBB4_6 -.LBB4_7: - beqz a3, .LBB4_11 + addi a2, a1, 62 + addi a3, sp, 70 + #APP + lh a2, 0(a2) + sh a2, 0(a3) + #NO_APP + addi a2, a1, 60 + addi a3, sp, 68 + #APP + lh a2, 0(a2) + sh a2, 0(a3) + #NO_APP + addi a2, a1, 58 + addi a3, sp, 66 + #APP + lh a2, 0(a2) + sh a2, 0(a3) + #NO_APP + addi a2, a1, 56 + addi a3, sp, 64 + #APP + lh a2, 0(a2) + sh a2, 0(a3) + #NO_APP + addi a2, a1, 54 + addi a3, sp, 62 + #APP + lh a2, 0(a2) + sh a2, 0(a3) + #NO_APP + addi a2, a1, 52 + addi a3, sp, 60 + #APP + lh a2, 0(a2) + sh a2, 0(a3) + #NO_APP + addi a2, a1, 50 + addi a3, sp, 58 + #APP + lh a2, 0(a2) + sh a2, 0(a3) + #NO_APP + addi a2, a1, 48 + addi a3, sp, 56 + #APP + lh a2, 0(a2) + sh a2, 0(a3) + #NO_APP + addi a2, a1, 46 + addi a3, sp, 54 + #APP + lh a2, 0(a2) + sh a2, 0(a3) + #NO_APP + addi a2, a1, 44 + addi a3, sp, 52 + #APP + lh a2, 0(a2) + sh a2, 0(a3) + #NO_APP + addi a2, a1, 42 + addi a3, sp, 50 + #APP + lh a2, 0(a2) + sh a2, 0(a3) + #NO_APP + addi a2, a1, 40 + addi a3, sp, 48 + #APP + lh a2, 0(a2) + sh a2, 0(a3) + #NO_APP + addi a2, a1, 38 + addi a3, sp, 46 + #APP + lh a2, 0(a2) + sh a2, 0(a3) + #NO_APP + addi a2, a1, 36 + addi a3, sp, 44 + #APP + lh a2, 0(a2) + sh a2, 0(a3) + #NO_APP + addi a2, a1, 34 + addi a3, sp, 42 + #APP + lh a2, 0(a2) + sh a2, 0(a3) + #NO_APP + addi a2, a1, 32 + addi a3, sp, 40 + #APP + lh a2, 0(a2) + sh a2, 0(a3) + #NO_APP + addi a2, a1, 30 + addi a3, sp, 38 + #APP + lh a2, 0(a2) + sh a2, 0(a3) + #NO_APP + addi a2, a1, 28 + addi a3, sp, 36 + #APP + lh a2, 0(a2) + sh a2, 0(a3) + #NO_APP + addi a2, a1, 26 + addi a3, sp, 34 + #APP + lh a2, 0(a2) + sh a2, 0(a3) + #NO_APP + addi a2, a1, 24 + addi a3, sp, 32 + #APP + lh a2, 0(a2) + sh a2, 0(a3) + #NO_APP + addi a2, a1, 22 + addi a3, sp, 30 + #APP + lh a2, 0(a2) + sh a2, 0(a3) + #NO_APP + addi a2, a1, 20 + addi a3, sp, 28 + #APP + lh a2, 0(a2) + sh a2, 0(a3) + #NO_APP + addi a2, a1, 18 + addi a3, sp, 26 + #APP + lh a2, 0(a2) + sh a2, 0(a3) + #NO_APP + addi a2, a1, 16 + addi a3, sp, 24 + #APP + lh a2, 0(a2) + sh a2, 0(a3) + #NO_APP + addi a2, a1, 14 + addi a3, sp, 22 + #APP + lh a2, 0(a2) + sh a2, 0(a3) + #NO_APP + addi a2, a1, 12 + addi a3, sp, 20 + #APP + lh a2, 0(a2) + sh a2, 0(a3) + #NO_APP + addi a2, a1, 10 + addi a3, sp, 18 + #APP + lh a2, 0(a2) + sh a2, 0(a3) + #NO_APP + addi a2, a1, 8 + addi a3, sp, 16 + #APP + lh a2, 0(a2) + sh a2, 0(a3) + #NO_APP + addi a2, a1, 6 + addi a3, sp, 14 + #APP + lh a2, 0(a2) + sh a2, 0(a3) + #NO_APP + addi a2, a1, 4 + addi a3, sp, 12 + #APP + lh a2, 0(a2) + sh a2, 0(a3) + #NO_APP + addi a2, a1, 2 + addi a3, sp, 10 + #APP + lh a2, 0(a2) + sh a2, 0(a3) + #NO_APP addi a2, sp, 8 - add a4, a2, t1 - add a1, a1, t1 -.LBB4_9: - lb a2, 0(a1) - sb a2, 0(a4) - addi a3, a3, -1 - addi a4, a4, 1 - addi a1, a1, 1 - bnez a3, .LBB4_9 - j .LBB4_11 -.LBB4_10: - lh a2, 62(a1) - sh a2, 70(sp) - lh a2, 60(a1) - sh a2, 68(sp) - lh a2, 58(a1) - sh a2, 66(sp) - lh a2, 56(a1) - sh a2, 64(sp) - lh a2, 54(a1) - sh a2, 62(sp) - lh a2, 52(a1) - sh a2, 60(sp) - lh a2, 50(a1) - sh a2, 58(sp) - lh a2, 48(a1) - sh a2, 56(sp) - lh a2, 46(a1) - sh a2, 54(sp) - lh a2, 44(a1) - sh a2, 52(sp) - lh a2, 42(a1) - sh a2, 50(sp) - lh a2, 40(a1) - sh a2, 48(sp) - lh a2, 38(a1) - sh a2, 46(sp) - lh a2, 36(a1) - sh a2, 44(sp) - lh a2, 34(a1) - sh a2, 42(sp) - lh a2, 32(a1) - sh a2, 40(sp) - lh a2, 30(a1) - sh a2, 38(sp) - lh a2, 28(a1) - sh a2, 36(sp) - lh a2, 26(a1) - sh a2, 34(sp) - lh a2, 24(a1) - sh a2, 32(sp) - lh a2, 22(a1) - sh a2, 30(sp) - lh a2, 20(a1) - sh a2, 28(sp) - lh a2, 18(a1) - sh a2, 26(sp) - lh a2, 16(a1) - sh a2, 24(sp) - lh a2, 14(a1) - sh a2, 22(sp) - lh a2, 12(a1) - sh a2, 20(sp) - lh a2, 10(a1) - sh a2, 18(sp) - lh a2, 8(a1) - sh a2, 16(sp) - lh a2, 6(a1) - sh a2, 14(sp) - lh a2, 4(a1) - sh a2, 12(sp) - lh a2, 2(a1) - sh a2, 10(sp) + #APP lh a1, 0(a1) - sh a1, 8(sp) -.LBB4_11: + sh a1, 0(a2) + #NO_APP addi a1, sp, 8 li a2, 64 call memcpy@plt diff --git a/tests/asm-test/asm/riscv64gc-unknown-linux-gnu/atomic_memcpy_load_align4 b/tests/asm-test/asm/riscv64gc-unknown-linux-gnu/atomic_memcpy_load_align4 index 385c0f3..85cce8e 100644 --- a/tests/asm-test/asm/riscv64gc-unknown-linux-gnu/atomic_memcpy_load_align4 +++ b/tests/asm-test/asm/riscv64gc-unknown-linux-gnu/atomic_memcpy_load_align4 @@ -1,99 +1,101 @@ asm_test::atomic_memcpy_load_align4::acquire: addi sp, sp, -80 sd ra, 72(sp) - addi a2, a1, 7 - andi a7, a2, -8 - sub t1, a7, a1 - li a3, 64 - bltu a3, t1, .LBB8_10 - beqz t1, .LBB8_5 - addi a6, a1, 64 - sub a4, a1, a7 - addi a5, sp, 8 - mv a3, a1 -.LBB8_3: - lb t0, 0(a3) - mv a2, a4 - sb t0, 0(a5) - addi a5, a5, 1 - addi a4, a4, 1 - addi a3, a3, 1 - bgeu a4, a2, .LBB8_3 - sub a3, a6, a7 - li a4, 8 - bltu a3, a4, .LBB8_7 -.LBB8_5: - addi a6, sp, 8 - li a7, 7 -.LBB8_6: - add a2, a1, t1 - ld a2, 0(a2) - add a4, a6, t1 - sb a2, 0(a4) - srli a5, a2, 56 - sb a5, 7(a4) - srli a5, a2, 48 - sb a5, 6(a4) - srli a5, a2, 40 - sb a5, 5(a4) - srli a5, a2, 32 - sb a5, 4(a4) - srli a5, a2, 24 - sb a5, 3(a4) - srli a5, a2, 16 - sb a5, 2(a4) - srli a2, a2, 8 - sb a2, 1(a4) - addi a3, a3, -8 - addi t1, t1, 8 - bltu a7, a3, .LBB8_6 -.LBB8_7: - beqz a3, .LBB8_11 + addi a2, a1, 60 + addi a3, sp, 68 + #APP + lw a2, 0(a2) + sw a2, 0(a3) + #NO_APP + addi a2, a1, 56 + addi a3, sp, 64 + #APP + lw a2, 0(a2) + sw a2, 0(a3) + #NO_APP + addi a2, a1, 52 + addi a3, sp, 60 + #APP + lw a2, 0(a2) + sw a2, 0(a3) + #NO_APP + addi a2, a1, 48 + addi a3, sp, 56 + #APP + lw a2, 0(a2) + sw a2, 0(a3) + #NO_APP + addi a2, a1, 44 + addi a3, sp, 52 + #APP + lw a2, 0(a2) + sw a2, 0(a3) + #NO_APP + addi a2, a1, 40 + addi a3, sp, 48 + #APP + lw a2, 0(a2) + sw a2, 0(a3) + #NO_APP + addi a2, a1, 36 + addi a3, sp, 44 + #APP + lw a2, 0(a2) + sw a2, 0(a3) + #NO_APP + addi a2, a1, 32 + addi a3, sp, 40 + #APP + lw a2, 0(a2) + sw a2, 0(a3) + #NO_APP + addi a2, a1, 28 + addi a3, sp, 36 + #APP + lw a2, 0(a2) + sw a2, 0(a3) + #NO_APP + addi a2, a1, 24 + addi a3, sp, 32 + #APP + lw a2, 0(a2) + sw a2, 0(a3) + #NO_APP + addi a2, a1, 20 + addi a3, sp, 28 + #APP + lw a2, 0(a2) + sw a2, 0(a3) + #NO_APP + addi a2, a1, 16 + addi a3, sp, 24 + #APP + lw a2, 0(a2) + sw a2, 0(a3) + #NO_APP + addi a2, a1, 12 + addi a3, sp, 20 + #APP + lw a2, 0(a2) + sw a2, 0(a3) + #NO_APP + addi a2, a1, 8 + addi a3, sp, 16 + #APP + lw a2, 0(a2) + sw a2, 0(a3) + #NO_APP + addi a2, a1, 4 + addi a3, sp, 12 + #APP + lw a2, 0(a2) + sw a2, 0(a3) + #NO_APP addi a2, sp, 8 - add a4, a2, t1 - add a1, a1, t1 -.LBB8_9: - lb a2, 0(a1) - sb a2, 0(a4) - addi a3, a3, -1 - addi a4, a4, 1 - addi a1, a1, 1 - bnez a3, .LBB8_9 - j .LBB8_11 -.LBB8_10: - lw a2, 60(a1) - sw a2, 68(sp) - lw a2, 56(a1) - sw a2, 64(sp) - lw a2, 52(a1) - sw a2, 60(sp) - lw a2, 48(a1) - sw a2, 56(sp) - lw a2, 44(a1) - sw a2, 52(sp) - lw a2, 40(a1) - sw a2, 48(sp) - lw a2, 36(a1) - sw a2, 44(sp) - lw a2, 32(a1) - sw a2, 40(sp) - lw a2, 28(a1) - sw a2, 36(sp) - lw a2, 24(a1) - sw a2, 32(sp) - lw a2, 20(a1) - sw a2, 28(sp) - lw a2, 16(a1) - sw a2, 24(sp) - lw a2, 12(a1) - sw a2, 20(sp) - lw a2, 8(a1) - sw a2, 16(sp) - lw a2, 4(a1) - sw a2, 12(sp) + #APP lw a1, 0(a1) - sw a1, 8(sp) -.LBB8_11: + sw a1, 0(a2) + #NO_APP addi a1, sp, 8 li a2, 64 call memcpy@plt diff --git a/tests/asm-test/asm/riscv64gc-unknown-linux-gnu/atomic_memcpy_load_align8 b/tests/asm-test/asm/riscv64gc-unknown-linux-gnu/atomic_memcpy_load_align8 index 7908a6d..6fe637c 100644 --- a/tests/asm-test/asm/riscv64gc-unknown-linux-gnu/atomic_memcpy_load_align8 +++ b/tests/asm-test/asm/riscv64gc-unknown-linux-gnu/atomic_memcpy_load_align8 @@ -1,21 +1,70 @@ asm_test::atomic_memcpy_load_align8::acquire: - ld a6, 56(a1) - ld a7, 48(a1) - ld t0, 40(a1) - ld a5, 32(a1) - ld a2, 24(a1) - ld a3, 16(a1) - ld a4, 8(a1) + addi sp, sp, -64 + addi a2, a1, 56 + addi a3, sp, 56 + #APP + ld a2, 0(a2) + sd a2, 0(a3) + #NO_APP + addi a2, a1, 48 + addi a3, sp, 48 + #APP + ld a2, 0(a2) + sd a2, 0(a3) + #NO_APP + addi a2, a1, 40 + addi a3, sp, 40 + #APP + ld a2, 0(a2) + sd a2, 0(a3) + #NO_APP + addi a2, a1, 32 + addi a3, sp, 32 + #APP + ld a2, 0(a2) + sd a2, 0(a3) + #NO_APP + addi a2, a1, 24 + addi a3, sp, 24 + #APP + ld a2, 0(a2) + sd a2, 0(a3) + #NO_APP + addi a2, a1, 16 + addi a3, sp, 16 + #APP + ld a2, 0(a2) + sd a2, 0(a3) + #NO_APP + addi a2, a1, 8 + addi a3, sp, 8 + #APP + ld a2, 0(a2) + sd a2, 0(a3) + #NO_APP + mv a2, sp + #APP ld a1, 0(a1) + sd a1, 0(a2) + #NO_APP + ld a1, 0(sp) + ld a2, 8(sp) + ld a3, 16(sp) + ld a4, 24(sp) sd a1, 0(a0) - sd a4, 8(a0) + sd a2, 8(a0) sd a3, 16(a0) - sd a2, 24(a0) - sd a5, 32(a0) - sd t0, 40(a0) - sd a7, 48(a0) - sd a6, 56(a0) + sd a4, 24(a0) + ld a1, 32(sp) + ld a2, 40(sp) + ld a3, 48(sp) + ld a4, 56(sp) + sd a1, 32(a0) + sd a2, 40(a0) + sd a3, 48(a0) + sd a4, 56(a0) fence r, rw + addi sp, sp, 64 ret asm_test::atomic_memcpy_load_align8::read_volatile_acquire_fence: ld a6, 56(a1) diff --git a/tests/asm-test/asm/riscv64gc-unknown-linux-gnu/atomic_memcpy_store_align1 b/tests/asm-test/asm/riscv64gc-unknown-linux-gnu/atomic_memcpy_store_align1 index ad17e15..3227c32 100644 --- a/tests/asm-test/asm/riscv64gc-unknown-linux-gnu/atomic_memcpy_store_align1 +++ b/tests/asm-test/asm/riscv64gc-unknown-linux-gnu/atomic_memcpy_store_align1 @@ -188,101 +188,31 @@ asm_test::atomic_memcpy_store_align1::release: ld a1, 56(sp) ld a2, 48(sp) ld a3, 40(sp) + ld a4, 32(sp) sd a1, 120(sp) sd a2, 112(sp) sd a3, 104(sp) - ld a1, 32(sp) - ld a2, 24(sp) - ld a3, 16(sp) - ld a4, 8(sp) - sd a1, 96(sp) - sd a2, 88(sp) - sd a3, 80(sp) - sd a4, 72(sp) - ld a3, 0(sp) - addi a1, a0, 7 - andi a7, a1, -8 - sub t0, a7, a0 - li a4, 65 - sd a3, 64(sp) - bgeu t0, a4, .LBB2_5 - beqz t0, .LBB2_7 - addi a6, a0, 64 - sub a5, a0, a7 - addi a4, sp, 64 - mv a3, a0 -.LBB2_3: - lb a2, 0(a4) - mv a1, a5 - sb a2, 0(a3) - addi a3, a3, 1 - addi a5, a5, 1 - addi a4, a4, 1 - bgeu a5, a1, .LBB2_3 - sub a2, a6, a7 - li a3, 8 - bgeu a2, a3, .LBB2_8 - j .LBB2_10 -.LBB2_5: - li a3, 0 - addi a1, sp, 64 - li a2, 64 -.LBB2_6: - add a4, a1, a3 - lb a4, 0(a4) - addi a5, a3, 1 - add a3, a3, a0 - sb a4, 0(a3) - mv a3, a5 - bne a5, a2, .LBB2_6 - j .LBB2_13 -.LBB2_7: - li a2, 64 -.LBB2_8: - addi a6, sp, 64 - li a7, 7 -.LBB2_9: - add a1, a6, t0 - lbu a5, 1(a1) - lbu t1, 0(a1) - lbu a4, 3(a1) - lbu a3, 2(a1) - slli a5, a5, 8 - or a5, a5, t1 - slli a4, a4, 8 - or a3, a3, a4 - slli a3, a3, 16 - or t1, a3, a5 - lbu a4, 5(a1) - lbu a5, 4(a1) - lbu a3, 7(a1) - lbu a1, 6(a1) - slli a4, a4, 8 - or a4, a4, a5 - slli a3, a3, 8 - or a1, a1, a3 - slli a1, a1, 16 - or a1, a1, a4 - slli a1, a1, 32 - or a1, a1, t1 - add a3, a0, t0 - sd a1, 0(a3) - addi a2, a2, -8 - addi t0, t0, 8 - bltu a7, a2, .LBB2_9 -.LBB2_10: - beqz a2, .LBB2_13 - add a0, a0, t0 - addi a1, sp, 64 - add a1, a1, t0 -.LBB2_12: - lb a3, 0(a1) - sb a3, 0(a0) - addi a2, a2, -1 - addi a0, a0, 1 - addi a1, a1, 1 - bnez a2, .LBB2_12 -.LBB2_13: + sd a4, 96(sp) + ld a1, 24(sp) + ld a2, 16(sp) + ld a3, 8(sp) + ld a4, 0(sp) + sd a1, 88(sp) + sd a2, 80(sp) + sd a3, 72(sp) + sd a4, 64(sp) + li a1, 63 + addi a2, sp, 64 + li a3, -1 +.LBB2_1: + add a4, a0, a1 + add a5, a2, a1 + #APP + lb a5, 0(a5) + sb a5, 0(a4) + #NO_APP + addi a1, a1, -1 + bne a1, a3, .LBB2_1 addi sp, sp, 128 ret asm_test::atomic_memcpy_store_align1::write_volatile_release_fence: diff --git a/tests/asm-test/asm/riscv64gc-unknown-linux-gnu/atomic_memcpy_store_align16 b/tests/asm-test/asm/riscv64gc-unknown-linux-gnu/atomic_memcpy_store_align16 index 89f5f30..d5fe3c0 100644 --- a/tests/asm-test/asm/riscv64gc-unknown-linux-gnu/atomic_memcpy_store_align16 +++ b/tests/asm-test/asm/riscv64gc-unknown-linux-gnu/atomic_memcpy_store_align16 @@ -1,21 +1,86 @@ asm_test::atomic_memcpy_store_align16::release: - ld a6, 0(a1) - ld a7, 8(a1) - ld t0, 16(a1) - ld a5, 24(a1) - ld a2, 32(a1) - ld a3, 56(a1) - ld a4, 48(a1) - ld a1, 40(a1) + addi sp, sp, -128 + ld a2, 56(a1) + ld a3, 48(a1) + ld a4, 40(a1) + ld a5, 32(a1) + sd a2, 56(sp) + sd a3, 48(sp) + sd a4, 40(sp) + sd a5, 32(sp) + ld a2, 24(a1) + ld a3, 16(a1) + ld a4, 8(a1) + ld a1, 0(a1) + sd a2, 24(sp) + sd a3, 16(sp) + sd a4, 8(sp) + sd a1, 0(sp) fence rw, w - sd a3, 56(a0) - sd a4, 48(a0) - sd a1, 40(a0) - sd a2, 32(a0) - sd a5, 24(a0) - sd t0, 16(a0) - sd a7, 8(a0) - sd a6, 0(a0) + ld a1, 56(sp) + ld a2, 48(sp) + ld a3, 40(sp) + ld a4, 32(sp) + sd a1, 120(sp) + sd a2, 112(sp) + sd a3, 104(sp) + sd a4, 96(sp) + ld a1, 24(sp) + ld a2, 16(sp) + ld a3, 8(sp) + ld a4, 0(sp) + sd a1, 88(sp) + sd a2, 80(sp) + sd a3, 72(sp) + sd a4, 64(sp) + addi a6, sp, 72 + addi a7, sp, 80 + addi t0, sp, 88 + addi a4, sp, 96 + addi a5, sp, 104 + addi a1, sp, 112 + addi a2, sp, 120 + addi a3, a0, 56 + #APP + ld a2, 0(a2) + sd a2, 0(a3) + #NO_APP + addi a2, a0, 48 + #APP + ld a1, 0(a1) + sd a1, 0(a2) + #NO_APP + addi a1, a0, 40 + #APP + ld a2, 0(a5) + sd a2, 0(a1) + #NO_APP + addi a1, a0, 32 + #APP + ld a2, 0(a4) + sd a2, 0(a1) + #NO_APP + addi a1, a0, 24 + #APP + ld a2, 0(t0) + sd a2, 0(a1) + #NO_APP + addi a1, a0, 16 + #APP + ld a2, 0(a7) + sd a2, 0(a1) + #NO_APP + addi a1, a0, 8 + #APP + ld a2, 0(a6) + sd a2, 0(a1) + #NO_APP + addi a1, sp, 64 + #APP + ld a1, 0(a1) + sd a1, 0(a0) + #NO_APP + addi sp, sp, 128 ret asm_test::atomic_memcpy_store_align16::write_volatile_release_fence: addi sp, sp, -64 diff --git a/tests/asm-test/asm/riscv64gc-unknown-linux-gnu/atomic_memcpy_store_align2 b/tests/asm-test/asm/riscv64gc-unknown-linux-gnu/atomic_memcpy_store_align2 index 55b4925..c886b91 100644 --- a/tests/asm-test/asm/riscv64gc-unknown-linux-gnu/atomic_memcpy_store_align2 +++ b/tests/asm-test/asm/riscv64gc-unknown-linux-gnu/atomic_memcpy_store_align2 @@ -92,152 +92,210 @@ asm_test::atomic_memcpy_store_align2::release: ld a1, 56(sp) ld a2, 48(sp) ld a3, 40(sp) + ld a4, 32(sp) sd a1, 120(sp) sd a2, 112(sp) sd a3, 104(sp) - ld a1, 32(sp) - ld a2, 24(sp) - ld a3, 16(sp) - ld a4, 8(sp) - sd a1, 96(sp) - sd a2, 88(sp) - sd a3, 80(sp) - sd a4, 72(sp) + sd a4, 96(sp) + ld a1, 24(sp) + ld a2, 16(sp) + ld a3, 8(sp) ld a4, 0(sp) - addi a1, a0, 7 - andi a7, a1, -8 - sub t2, a7, a0 - li a2, 64 + sd a1, 88(sp) + sd a2, 80(sp) + sd a3, 72(sp) sd a4, 64(sp) - bltu a2, t2, .LBB6_11 - beqz t2, .LBB6_5 - addi a6, a0, 64 - sub a5, a0, a7 - addi a4, sp, 64 - mv a2, a0 -.LBB6_3: - lb t0, 0(a4) - mv a3, a5 - sb t0, 0(a2) - addi a2, a2, 1 - addi a5, a5, 1 - addi a4, a4, 1 - bgeu a5, a3, .LBB6_3 - sub a2, a6, a7 - li a3, 8 - bltu a2, a3, .LBB6_7 -.LBB6_5: - addi a6, sp, 64 - li a7, 7 -.LBB6_6: - add a5, a6, t2 - lbu t0, 1(a5) - lbu t1, 0(a5) - lbu a3, 3(a5) - lbu a4, 2(a5) - slli a1, t0, 8 - or a1, a1, t1 - slli a3, a3, 8 - or a3, a3, a4 - slli a3, a3, 16 - or t0, a3, a1 - lbu a3, 5(a5) - lbu a4, 4(a5) - lbu a1, 7(a5) - lbu a5, 6(a5) - slli a3, a3, 8 - or a3, a3, a4 - slli a1, a1, 8 - or a1, a1, a5 - slli a1, a1, 16 - or a1, a1, a3 - slli a1, a1, 32 - or a1, a1, t0 - add a3, a0, t2 - sd a1, 0(a3) - addi a2, a2, -8 - addi t2, t2, 8 - bltu a7, a2, .LBB6_6 -.LBB6_7: - beqz a2, .LBB6_10 - add a0, a0, t2 - addi a3, sp, 64 - add a1, a3, t2 -.LBB6_9: - lb a3, 0(a1) - sb a3, 0(a0) - addi a2, a2, -1 - addi a0, a0, 1 - addi a1, a1, 1 - bnez a2, .LBB6_9 -.LBB6_10: - addi sp, sp, 128 - ret -.LBB6_11: - lh a1, 126(sp) - sh a1, 62(a0) - lh a1, 124(sp) - sh a1, 60(a0) - lh a1, 122(sp) - sh a1, 58(a0) - lh a1, 120(sp) - sh a1, 56(a0) - lh a1, 118(sp) - sh a1, 54(a0) - lh a1, 116(sp) - sh a1, 52(a0) - lh a1, 114(sp) - sh a1, 50(a0) - lh a1, 112(sp) - sh a1, 48(a0) - lh a1, 110(sp) - sh a1, 46(a0) - lh a1, 108(sp) - sh a1, 44(a0) - lh a1, 106(sp) - sh a1, 42(a0) - lh a1, 104(sp) - sh a1, 40(a0) - lh a1, 102(sp) - sh a1, 38(a0) - lh a1, 100(sp) - sh a1, 36(a0) - lh a1, 98(sp) - sh a1, 34(a0) - lh a1, 96(sp) - sh a1, 32(a0) - lh a1, 94(sp) - sh a1, 30(a0) - lh a1, 92(sp) - sh a1, 28(a0) - lh a1, 90(sp) - sh a1, 26(a0) - lh a1, 88(sp) - sh a1, 24(a0) - lh a1, 86(sp) - sh a1, 22(a0) - lh a1, 84(sp) - sh a1, 20(a0) - lh a1, 82(sp) - sh a1, 18(a0) - lh a1, 80(sp) - sh a1, 16(a0) - lh a1, 78(sp) - sh a1, 14(a0) - lh a1, 76(sp) - sh a1, 12(a0) - lh a1, 74(sp) - sh a1, 10(a0) - lh a1, 72(sp) - sh a1, 8(a0) - lh a1, 70(sp) - sh a1, 6(a0) - lh a1, 68(sp) - sh a1, 4(a0) - lh a1, 66(sp) - sh a1, 2(a0) - lh a1, 64(sp) + addi a6, sp, 72 + addi a7, sp, 80 + addi t0, sp, 88 + addi t1, sp, 96 + addi a5, sp, 104 + addi a1, sp, 112 + addi a2, sp, 120 + addi a3, a0, 62 + addi a4, sp, 126 + #APP + lh a4, 0(a4) + sh a4, 0(a3) + #NO_APP + addi a3, a0, 60 + addi a4, sp, 124 + #APP + lh a4, 0(a4) + sh a4, 0(a3) + #NO_APP + addi a3, a0, 58 + addi a4, sp, 122 + #APP + lh a4, 0(a4) + sh a4, 0(a3) + #NO_APP + addi a3, a0, 56 + #APP + lh a2, 0(a2) + sh a2, 0(a3) + #NO_APP + addi a2, a0, 54 + addi a3, sp, 118 + #APP + lh a3, 0(a3) + sh a3, 0(a2) + #NO_APP + addi a2, a0, 52 + addi a3, sp, 116 + #APP + lh a3, 0(a3) + sh a3, 0(a2) + #NO_APP + addi a2, a0, 50 + addi a3, sp, 114 + #APP + lh a3, 0(a3) + sh a3, 0(a2) + #NO_APP + addi a2, a0, 48 + #APP + lh a1, 0(a1) + sh a1, 0(a2) + #NO_APP + addi a1, a0, 46 + addi a2, sp, 110 + #APP + lh a2, 0(a2) + sh a2, 0(a1) + #NO_APP + addi a1, a0, 44 + addi a2, sp, 108 + #APP + lh a2, 0(a2) + sh a2, 0(a1) + #NO_APP + addi a1, a0, 42 + addi a2, sp, 106 + #APP + lh a2, 0(a2) + sh a2, 0(a1) + #NO_APP + addi a1, a0, 40 + #APP + lh a2, 0(a5) + sh a2, 0(a1) + #NO_APP + addi a1, a0, 38 + addi a2, sp, 102 + #APP + lh a2, 0(a2) + sh a2, 0(a1) + #NO_APP + addi a1, a0, 36 + addi a2, sp, 100 + #APP + lh a2, 0(a2) + sh a2, 0(a1) + #NO_APP + addi a1, a0, 34 + addi a2, sp, 98 + #APP + lh a2, 0(a2) + sh a2, 0(a1) + #NO_APP + addi a1, a0, 32 + #APP + lh a2, 0(t1) + sh a2, 0(a1) + #NO_APP + addi a1, a0, 30 + addi a2, sp, 94 + #APP + lh a2, 0(a2) + sh a2, 0(a1) + #NO_APP + addi a1, a0, 28 + addi a2, sp, 92 + #APP + lh a2, 0(a2) + sh a2, 0(a1) + #NO_APP + addi a1, a0, 26 + addi a2, sp, 90 + #APP + lh a2, 0(a2) + sh a2, 0(a1) + #NO_APP + addi a1, a0, 24 + #APP + lh a2, 0(t0) + sh a2, 0(a1) + #NO_APP + addi a1, a0, 22 + addi a2, sp, 86 + #APP + lh a2, 0(a2) + sh a2, 0(a1) + #NO_APP + addi a1, a0, 20 + addi a2, sp, 84 + #APP + lh a2, 0(a2) + sh a2, 0(a1) + #NO_APP + addi a1, a0, 18 + addi a2, sp, 82 + #APP + lh a2, 0(a2) + sh a2, 0(a1) + #NO_APP + addi a1, a0, 16 + #APP + lh a2, 0(a7) + sh a2, 0(a1) + #NO_APP + addi a1, a0, 14 + addi a2, sp, 78 + #APP + lh a2, 0(a2) + sh a2, 0(a1) + #NO_APP + addi a1, a0, 12 + addi a2, sp, 76 + #APP + lh a2, 0(a2) + sh a2, 0(a1) + #NO_APP + addi a1, a0, 10 + addi a2, sp, 74 + #APP + lh a2, 0(a2) + sh a2, 0(a1) + #NO_APP + addi a1, a0, 8 + #APP + lh a2, 0(a6) + sh a2, 0(a1) + #NO_APP + addi a1, a0, 6 + addi a2, sp, 70 + #APP + lh a2, 0(a2) + sh a2, 0(a1) + #NO_APP + addi a1, a0, 4 + addi a2, sp, 68 + #APP + lh a2, 0(a2) + sh a2, 0(a1) + #NO_APP + addi a1, a0, 2 + addi a2, sp, 66 + #APP + lh a2, 0(a2) + sh a2, 0(a1) + #NO_APP + addi a1, sp, 64 + #APP + lh a1, 0(a1) sh a1, 0(a0) + #NO_APP addi sp, sp, 128 ret asm_test::atomic_memcpy_store_align2::write_volatile_release_fence: diff --git a/tests/asm-test/asm/riscv64gc-unknown-linux-gnu/atomic_memcpy_store_align4 b/tests/asm-test/asm/riscv64gc-unknown-linux-gnu/atomic_memcpy_store_align4 index 331d007..9c8d8bf 100644 --- a/tests/asm-test/asm/riscv64gc-unknown-linux-gnu/atomic_memcpy_store_align4 +++ b/tests/asm-test/asm/riscv64gc-unknown-linux-gnu/atomic_memcpy_store_align4 @@ -44,120 +44,114 @@ asm_test::atomic_memcpy_store_align4::release: ld a1, 56(sp) ld a2, 48(sp) ld a3, 40(sp) + ld a4, 32(sp) sd a1, 120(sp) sd a2, 112(sp) sd a3, 104(sp) - ld a1, 32(sp) - ld a2, 24(sp) - ld a3, 16(sp) - ld a4, 8(sp) - sd a1, 96(sp) - sd a2, 88(sp) - sd a3, 80(sp) - sd a4, 72(sp) + sd a4, 96(sp) + ld a1, 24(sp) + ld a2, 16(sp) + ld a3, 8(sp) ld a4, 0(sp) - addi a1, a0, 7 - andi a7, a1, -8 - sub t2, a7, a0 - li a2, 64 + sd a1, 88(sp) + sd a2, 80(sp) + sd a3, 72(sp) sd a4, 64(sp) - bltu a2, t2, .LBB10_11 - beqz t2, .LBB10_5 - addi a6, a0, 64 - sub a5, a0, a7 - addi a4, sp, 64 - mv a2, a0 -.LBB10_3: - lb t0, 0(a4) - mv a3, a5 - sb t0, 0(a2) - addi a2, a2, 1 - addi a5, a5, 1 - addi a4, a4, 1 - bgeu a5, a3, .LBB10_3 - sub a2, a6, a7 - li a3, 8 - bltu a2, a3, .LBB10_7 -.LBB10_5: - addi a6, sp, 64 - li a7, 7 -.LBB10_6: - add a5, a6, t2 - lbu t0, 1(a5) - lbu t1, 0(a5) - lbu a3, 3(a5) - lbu a4, 2(a5) - slli a1, t0, 8 - or a1, a1, t1 - slli a3, a3, 8 - or a3, a3, a4 - slli a3, a3, 16 - or t0, a3, a1 - lbu a3, 5(a5) - lbu a4, 4(a5) - lbu a1, 7(a5) - lbu a5, 6(a5) - slli a3, a3, 8 - or a3, a3, a4 - slli a1, a1, 8 - or a1, a1, a5 - slli a1, a1, 16 - or a1, a1, a3 - slli a1, a1, 32 - or a1, a1, t0 - add a3, a0, t2 - sd a1, 0(a3) - addi a2, a2, -8 - addi t2, t2, 8 - bltu a7, a2, .LBB10_6 -.LBB10_7: - beqz a2, .LBB10_10 - add a0, a0, t2 - addi a3, sp, 64 - add a1, a3, t2 -.LBB10_9: - lb a3, 0(a1) - sb a3, 0(a0) - addi a2, a2, -1 - addi a0, a0, 1 - addi a1, a1, 1 - bnez a2, .LBB10_9 -.LBB10_10: - addi sp, sp, 128 - ret -.LBB10_11: - lw a1, 124(sp) - sw a1, 60(a0) - lw a1, 120(sp) - sw a1, 56(a0) - lw a1, 116(sp) - sw a1, 52(a0) - lw a1, 112(sp) - sw a1, 48(a0) - lw a1, 108(sp) - sw a1, 44(a0) - lw a1, 104(sp) - sw a1, 40(a0) - lw a1, 100(sp) - sw a1, 36(a0) - lw a1, 96(sp) - sw a1, 32(a0) - lw a1, 92(sp) - sw a1, 28(a0) - lw a1, 88(sp) - sw a1, 24(a0) - lw a1, 84(sp) - sw a1, 20(a0) - lw a1, 80(sp) - sw a1, 16(a0) - lw a1, 76(sp) - sw a1, 12(a0) - lw a1, 72(sp) - sw a1, 8(a0) - lw a1, 68(sp) - sw a1, 4(a0) - lw a1, 64(sp) + addi a6, sp, 72 + addi a7, sp, 80 + addi t0, sp, 88 + addi t1, sp, 96 + addi a5, sp, 104 + addi a1, sp, 112 + addi a2, sp, 120 + addi a3, a0, 60 + addi a4, sp, 124 + #APP + lw a4, 0(a4) + sw a4, 0(a3) + #NO_APP + addi a3, a0, 56 + #APP + lw a2, 0(a2) + sw a2, 0(a3) + #NO_APP + addi a2, a0, 52 + addi a3, sp, 116 + #APP + lw a3, 0(a3) + sw a3, 0(a2) + #NO_APP + addi a2, a0, 48 + #APP + lw a1, 0(a1) + sw a1, 0(a2) + #NO_APP + addi a1, a0, 44 + addi a2, sp, 108 + #APP + lw a2, 0(a2) + sw a2, 0(a1) + #NO_APP + addi a1, a0, 40 + #APP + lw a2, 0(a5) + sw a2, 0(a1) + #NO_APP + addi a1, a0, 36 + addi a2, sp, 100 + #APP + lw a2, 0(a2) + sw a2, 0(a1) + #NO_APP + addi a1, a0, 32 + #APP + lw a2, 0(t1) + sw a2, 0(a1) + #NO_APP + addi a1, a0, 28 + addi a2, sp, 92 + #APP + lw a2, 0(a2) + sw a2, 0(a1) + #NO_APP + addi a1, a0, 24 + #APP + lw a2, 0(t0) + sw a2, 0(a1) + #NO_APP + addi a1, a0, 20 + addi a2, sp, 84 + #APP + lw a2, 0(a2) + sw a2, 0(a1) + #NO_APP + addi a1, a0, 16 + #APP + lw a2, 0(a7) + sw a2, 0(a1) + #NO_APP + addi a1, a0, 12 + addi a2, sp, 76 + #APP + lw a2, 0(a2) + sw a2, 0(a1) + #NO_APP + addi a1, a0, 8 + #APP + lw a2, 0(a6) + sw a2, 0(a1) + #NO_APP + addi a1, a0, 4 + addi a2, sp, 68 + #APP + lw a2, 0(a2) + sw a2, 0(a1) + #NO_APP + addi a1, sp, 64 + #APP + lw a1, 0(a1) sw a1, 0(a0) + #NO_APP addi sp, sp, 128 ret asm_test::atomic_memcpy_store_align4::write_volatile_release_fence: diff --git a/tests/asm-test/asm/riscv64gc-unknown-linux-gnu/atomic_memcpy_store_align8 b/tests/asm-test/asm/riscv64gc-unknown-linux-gnu/atomic_memcpy_store_align8 index 8841bba..92031e9 100644 --- a/tests/asm-test/asm/riscv64gc-unknown-linux-gnu/atomic_memcpy_store_align8 +++ b/tests/asm-test/asm/riscv64gc-unknown-linux-gnu/atomic_memcpy_store_align8 @@ -1,21 +1,86 @@ asm_test::atomic_memcpy_store_align8::release: - ld a6, 0(a1) - ld a7, 8(a1) - ld t0, 16(a1) - ld a5, 24(a1) - ld a2, 32(a1) - ld a3, 56(a1) - ld a4, 48(a1) - ld a1, 40(a1) + addi sp, sp, -128 + ld a2, 56(a1) + ld a3, 48(a1) + ld a4, 40(a1) + ld a5, 32(a1) + sd a2, 56(sp) + sd a3, 48(sp) + sd a4, 40(sp) + sd a5, 32(sp) + ld a2, 24(a1) + ld a3, 16(a1) + ld a4, 8(a1) + ld a1, 0(a1) + sd a2, 24(sp) + sd a3, 16(sp) + sd a4, 8(sp) + sd a1, 0(sp) fence rw, w - sd a3, 56(a0) - sd a4, 48(a0) - sd a1, 40(a0) - sd a2, 32(a0) - sd a5, 24(a0) - sd t0, 16(a0) - sd a7, 8(a0) - sd a6, 0(a0) + ld a1, 56(sp) + ld a2, 48(sp) + ld a3, 40(sp) + ld a4, 32(sp) + sd a1, 120(sp) + sd a2, 112(sp) + sd a3, 104(sp) + sd a4, 96(sp) + ld a1, 24(sp) + ld a2, 16(sp) + ld a3, 8(sp) + ld a4, 0(sp) + sd a1, 88(sp) + sd a2, 80(sp) + sd a3, 72(sp) + sd a4, 64(sp) + addi a6, sp, 72 + addi a7, sp, 80 + addi t0, sp, 88 + addi a4, sp, 96 + addi a5, sp, 104 + addi a1, sp, 112 + addi a2, sp, 120 + addi a3, a0, 56 + #APP + ld a2, 0(a2) + sd a2, 0(a3) + #NO_APP + addi a2, a0, 48 + #APP + ld a1, 0(a1) + sd a1, 0(a2) + #NO_APP + addi a1, a0, 40 + #APP + ld a2, 0(a5) + sd a2, 0(a1) + #NO_APP + addi a1, a0, 32 + #APP + ld a2, 0(a4) + sd a2, 0(a1) + #NO_APP + addi a1, a0, 24 + #APP + ld a2, 0(t0) + sd a2, 0(a1) + #NO_APP + addi a1, a0, 16 + #APP + ld a2, 0(a7) + sd a2, 0(a1) + #NO_APP + addi a1, a0, 8 + #APP + ld a2, 0(a6) + sd a2, 0(a1) + #NO_APP + addi a1, sp, 64 + #APP + ld a1, 0(a1) + sd a1, 0(a0) + #NO_APP + addi sp, sp, 128 ret asm_test::atomic_memcpy_store_align8::write_volatile_release_fence: addi sp, sp, -64 diff --git a/tests/asm-test/asm/s390x-unknown-linux-gnu/atomic_memcpy_load_align1 b/tests/asm-test/asm/s390x-unknown-linux-gnu/atomic_memcpy_load_align1 index aa90dd5..bb1b4df 100644 --- a/tests/asm-test/asm/s390x-unknown-linux-gnu/atomic_memcpy_load_align1 +++ b/tests/asm-test/asm/s390x-unknown-linux-gnu/atomic_memcpy_load_align1 @@ -1,254 +1,391 @@ asm_test::atomic_memcpy_load_align1::acquire: - stmg %r11, %r15, 88(%r15) aghi %r15, -224 - la %r0, 7(%r3) - nill %r0, 65528 - lgr %r1, %r0 - sgr %r1, %r3 - clgijhe %r1, 65, .LBB0_10 - cgije %r1, 0, .LBB0_11 - lgr %r4, %r3 - xihf %r4, 4294967295 - xilf %r4, 4294967295 - agr %r4, %r0 - risbg %r5, %r1, 62, 191, 0 - lghi %r14, 0 - clgijl %r4, 3, .LBB0_6 - risbg %r4, %r1, 0, 189, 0 - lcgr %r14, %r4 - lghi %r4, 0 - la %r13, 160(%r15) - lgr %r12, %r3 -.LBB0_4: - lb %r11, 0(%r12) - stc %r11, 0(%r13) - lb %r11, 1(%r12) - stc %r11, 1(%r13) - lb %r11, 2(%r12) - stc %r11, 2(%r13) - lb %r11, 3(%r12) - stc %r11, 3(%r13) - aghi %r4, -4 - la %r13, 4(%r13) - la %r12, 4(%r12) - cgrjlh %r14, %r4, .LBB0_4 - lcgr %r14, %r4 -.LBB0_6: - la %r4, 64(%r3) - cgije %r5, 0, .LBB0_9 - la %r13, 160(%r14, %r15) - agr %r14, %r3 -.LBB0_8: - lb %r12, 0(%r14) - stc %r12, 0(%r13) - la %r13, 1(%r13) - la %r14, 1(%r14) - brctg %r5, .LBB0_8 -.LBB0_9: - sgr %r4, %r0 - clgijhe %r4, 8, .LBB0_12 - j .LBB0_19 -.LBB0_10: - lb %r0, 0(%r3) - stc %r0, 160(%r15) - lb %r0, 1(%r3) - stc %r0, 161(%r15) - lb %r0, 2(%r3) - stc %r0, 162(%r15) - lb %r0, 3(%r3) - stc %r0, 163(%r15) - lb %r0, 4(%r3) - stc %r0, 164(%r15) - lb %r0, 5(%r3) - stc %r0, 165(%r15) - lb %r0, 6(%r3) - stc %r0, 166(%r15) - lb %r0, 7(%r3) - stc %r0, 167(%r15) - lb %r0, 8(%r3) - stc %r0, 168(%r15) - lb %r0, 9(%r3) - stc %r0, 169(%r15) - lb %r0, 10(%r3) - stc %r0, 170(%r15) - lb %r0, 11(%r3) - stc %r0, 171(%r15) - lb %r0, 12(%r3) - stc %r0, 172(%r15) - lb %r0, 13(%r3) - stc %r0, 173(%r15) - lb %r0, 14(%r3) - stc %r0, 174(%r15) - lb %r0, 15(%r3) - stc %r0, 175(%r15) - lb %r0, 16(%r3) - stc %r0, 176(%r15) - lb %r0, 17(%r3) - stc %r0, 177(%r15) - lb %r0, 18(%r3) - stc %r0, 178(%r15) - lb %r0, 19(%r3) - stc %r0, 179(%r15) - lb %r0, 20(%r3) - stc %r0, 180(%r15) - lb %r0, 21(%r3) - stc %r0, 181(%r15) - lb %r0, 22(%r3) - stc %r0, 182(%r15) - lb %r0, 23(%r3) - stc %r0, 183(%r15) - lb %r0, 24(%r3) - stc %r0, 184(%r15) - lb %r0, 25(%r3) - stc %r0, 185(%r15) - lb %r0, 26(%r3) - stc %r0, 186(%r15) - lb %r0, 27(%r3) - stc %r0, 187(%r15) - lb %r0, 28(%r3) - stc %r0, 188(%r15) - lb %r0, 29(%r3) - stc %r0, 189(%r15) - lb %r0, 30(%r3) - stc %r0, 190(%r15) - lb %r0, 31(%r3) - stc %r0, 191(%r15) - lb %r0, 32(%r3) - stc %r0, 192(%r15) - lb %r0, 33(%r3) - stc %r0, 193(%r15) - lb %r0, 34(%r3) - stc %r0, 194(%r15) - lb %r0, 35(%r3) - stc %r0, 195(%r15) - lb %r0, 36(%r3) - stc %r0, 196(%r15) - lb %r0, 37(%r3) - stc %r0, 197(%r15) - lb %r0, 38(%r3) - stc %r0, 198(%r15) - lb %r0, 39(%r3) - stc %r0, 199(%r15) - lb %r0, 40(%r3) - stc %r0, 200(%r15) - lb %r0, 41(%r3) - stc %r0, 201(%r15) - lb %r0, 42(%r3) - stc %r0, 202(%r15) - lb %r0, 43(%r3) - stc %r0, 203(%r15) - lb %r0, 44(%r3) - stc %r0, 204(%r15) - lb %r0, 45(%r3) - stc %r0, 205(%r15) - lb %r0, 46(%r3) - stc %r0, 206(%r15) - lb %r0, 47(%r3) - stc %r0, 207(%r15) - lb %r0, 48(%r3) - stc %r0, 208(%r15) - lb %r0, 49(%r3) - stc %r0, 209(%r15) - lb %r0, 50(%r3) - stc %r0, 210(%r15) - lb %r0, 51(%r3) - stc %r0, 211(%r15) - lb %r0, 52(%r3) - stc %r0, 212(%r15) - lb %r0, 53(%r3) - stc %r0, 213(%r15) - lb %r0, 54(%r3) - stc %r0, 214(%r15) - lb %r0, 55(%r3) - stc %r0, 215(%r15) - lb %r0, 56(%r3) - stc %r0, 216(%r15) - lb %r0, 57(%r3) - stc %r0, 217(%r15) - lb %r0, 58(%r3) - stc %r0, 218(%r15) - lb %r0, 59(%r3) - stc %r0, 219(%r15) - lb %r0, 60(%r3) - stc %r0, 220(%r15) - lb %r0, 61(%r3) - stc %r0, 221(%r15) - lb %r0, 62(%r3) - stc %r0, 222(%r15) - lb %r0, 63(%r3) - stc %r0, 223(%r15) - j .LBB0_26 -.LBB0_11: - lghi %r4, 64 -.LBB0_12: - lghi %r0, 7 - sgr %r0, %r4 - lghi %r5, -8 - clgrjh %r0, %r5, .LBB0_14 - lghi %r0, -8 -.LBB0_14: - agr %r0, %r4 - srlg %r5, %r0, 3 - la %r5, 1(%r5) - tmll %r5, 3 - je .LBB0_17 - risbg %r5, %r5, 62, 191, 0 -.LBB0_16: - lg %r14, 0(%r1, %r3) - stg %r14, 160(%r1, %r15) - aghi %r4, -8 - la %r1, 8(%r1) - brctg %r5, .LBB0_16 -.LBB0_17: - clgijl %r0, 24, .LBB0_19 -.LBB0_18: - lg %r0, 0(%r1, %r3) - stg %r0, 160(%r1, %r15) - lg %r0, 8(%r1, %r3) - stg %r0, 168(%r1, %r15) - lg %r0, 16(%r1, %r3) - stg %r0, 176(%r1, %r15) - lg %r0, 24(%r1, %r3) - stg %r0, 184(%r1, %r15) - aghi %r4, -32 - la %r1, 32(%r1) - clgijh %r4, 7, .LBB0_18 -.LBB0_19: - cgije %r4, 0, .LBB0_26 - tmll %r4, 3 - lay %r0, -1(%r4) - lgr %r5, %r1 - je .LBB0_23 - risbg %r14, %r4, 62, 191, 0 - lgr %r5, %r1 -.LBB0_22: - lb %r13, 0(%r5, %r3) - stc %r13, 160(%r5, %r15) - la %r5, 1(%r5) - brctg %r14, .LBB0_22 -.LBB0_23: - clgijl %r0, 3, .LBB0_26 - agr %r4, %r1 - sgr %r4, %r5 - la %r1, 160(%r5, %r15) - agr %r3, %r5 -.LBB0_25: + la %r1, 160(%r15) + #APP lb %r0, 0(%r3) stc %r0, 0(%r1) - lb %r0, 1(%r3) - stc %r0, 1(%r1) - lb %r0, 2(%r3) - stc %r0, 2(%r1) - lb %r0, 3(%r3) - stc %r0, 3(%r1) - aghi %r4, -4 - la %r1, 4(%r1) - la %r3, 4(%r3) - jne .LBB0_25 -.LBB0_26: + #NO_APP + la %r1, 1(%r3) + la %r4, 161(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 2(%r3) + la %r4, 162(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 3(%r3) + la %r4, 163(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 4(%r3) + la %r4, 164(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 5(%r3) + la %r4, 165(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 6(%r3) + la %r4, 166(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 7(%r3) + la %r4, 167(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 8(%r3) + la %r4, 168(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 9(%r3) + la %r4, 169(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 10(%r3) + la %r4, 170(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 11(%r3) + la %r4, 171(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 12(%r3) + la %r4, 172(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 13(%r3) + la %r4, 173(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 14(%r3) + la %r4, 174(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 15(%r3) + la %r4, 175(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 16(%r3) + la %r4, 176(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 17(%r3) + la %r4, 177(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 18(%r3) + la %r4, 178(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 19(%r3) + la %r4, 179(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 20(%r3) + la %r4, 180(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 21(%r3) + la %r4, 181(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 22(%r3) + la %r4, 182(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 23(%r3) + la %r4, 183(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 24(%r3) + la %r4, 184(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 25(%r3) + la %r4, 185(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 26(%r3) + la %r4, 186(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 27(%r3) + la %r4, 187(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 28(%r3) + la %r4, 188(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 29(%r3) + la %r4, 189(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 30(%r3) + la %r4, 190(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 31(%r3) + la %r4, 191(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 32(%r3) + la %r4, 192(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 33(%r3) + la %r4, 193(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 34(%r3) + la %r4, 194(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 35(%r3) + la %r4, 195(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 36(%r3) + la %r4, 196(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 37(%r3) + la %r4, 197(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 38(%r3) + la %r4, 198(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 39(%r3) + la %r4, 199(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 40(%r3) + la %r4, 200(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 41(%r3) + la %r4, 201(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 42(%r3) + la %r4, 202(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 43(%r3) + la %r4, 203(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 44(%r3) + la %r4, 204(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 45(%r3) + la %r4, 205(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 46(%r3) + la %r4, 206(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 47(%r3) + la %r4, 207(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 48(%r3) + la %r4, 208(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 49(%r3) + la %r4, 209(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 50(%r3) + la %r4, 210(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 51(%r3) + la %r4, 211(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 52(%r3) + la %r4, 212(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 53(%r3) + la %r4, 213(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 54(%r3) + la %r4, 214(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 55(%r3) + la %r4, 215(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 56(%r3) + la %r4, 216(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 57(%r3) + la %r4, 217(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 58(%r3) + la %r4, 218(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 59(%r3) + la %r4, 219(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 60(%r3) + la %r4, 220(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 61(%r3) + la %r4, 221(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 62(%r3) + la %r4, 222(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r4) + #NO_APP + la %r1, 63(%r3) + la %r3, 223(%r15) + #APP + lb %r0, 0(%r1) + stc %r0, 0(%r3) + #NO_APP mvc 0(64, %r2), 160(%r15) #MEMBARRIER - lmg %r11, %r15, 312(%r15) + aghi %r15, 224 br %r14 asm_test::atomic_memcpy_load_align1::read_volatile_acquire_fence: stmg %r6, %r15, 48(%r15) diff --git a/tests/asm-test/asm/s390x-unknown-linux-gnu/atomic_memcpy_load_align16 b/tests/asm-test/asm/s390x-unknown-linux-gnu/atomic_memcpy_load_align16 index 8427729..b2cff39 100644 --- a/tests/asm-test/asm/s390x-unknown-linux-gnu/atomic_memcpy_load_align16 +++ b/tests/asm-test/asm/s390x-unknown-linux-gnu/atomic_memcpy_load_align16 @@ -1,23 +1,61 @@ asm_test::atomic_memcpy_load_align16::acquire: - stmg %r12, %r15, 96(%r15) + stmg %r11, %r15, 88(%r15) + aghi %r15, -160 + lgr %r11, %r15 + lgr %r4, %r15 + aghi %r4, -72 + la %r1, 168(%r4) + nill %r1, 65520 + lgr %r15, %r4 + #APP + lg %r0, 0(%r3) + stg %r0, 0(%r1) + #NO_APP + la %r4, 8(%r3) + la %r5, 8(%r1) + #APP + lg %r0, 0(%r4) + stg %r0, 0(%r5) + #NO_APP + la %r4, 16(%r3) + la %r5, 16(%r1) + #APP + lg %r0, 0(%r4) + stg %r0, 0(%r5) + #NO_APP + la %r4, 24(%r3) + la %r5, 24(%r1) + #APP + lg %r0, 0(%r4) + stg %r0, 0(%r5) + #NO_APP + la %r4, 32(%r3) + la %r5, 32(%r1) + #APP + lg %r0, 0(%r4) + stg %r0, 0(%r5) + #NO_APP + la %r4, 40(%r3) + la %r5, 40(%r1) + #APP + lg %r0, 0(%r4) + stg %r0, 0(%r5) + #NO_APP + la %r4, 48(%r3) + la %r5, 48(%r1) + #APP + lg %r0, 0(%r4) + stg %r0, 0(%r5) + #NO_APP + la %r3, 56(%r3) + la %r4, 56(%r1) + #APP lg %r0, 0(%r3) - lg %r1, 8(%r3) - lg %r4, 16(%r3) - lg %r5, 24(%r3) - lg %r14, 32(%r3) - lg %r13, 40(%r3) - lg %r12, 48(%r3) - lg %r3, 56(%r3) - stg %r0, 0(%r2) - stg %r1, 8(%r2) - stg %r4, 16(%r2) - stg %r5, 24(%r2) - stg %r14, 32(%r2) - stg %r13, 40(%r2) - stg %r12, 48(%r2) - stg %r3, 56(%r2) + stg %r0, 0(%r4) + #NO_APP + mvc 0(64, %r2), 0(%r1) #MEMBARRIER - lmg %r12, %r15, 96(%r15) + lmg %r11, %r15, 248(%r11) br %r14 asm_test::atomic_memcpy_load_align16::read_volatile_acquire_fence: stmg %r12, %r15, 96(%r15) diff --git a/tests/asm-test/asm/s390x-unknown-linux-gnu/atomic_memcpy_load_align2 b/tests/asm-test/asm/s390x-unknown-linux-gnu/atomic_memcpy_load_align2 index 6372937..854ab60 100644 --- a/tests/asm-test/asm/s390x-unknown-linux-gnu/atomic_memcpy_load_align2 +++ b/tests/asm-test/asm/s390x-unknown-linux-gnu/atomic_memcpy_load_align2 @@ -1,190 +1,199 @@ asm_test::atomic_memcpy_load_align2::acquire: - stmg %r11, %r15, 88(%r15) aghi %r15, -224 - la %r0, 7(%r3) - nill %r0, 65528 - lgr %r1, %r0 - sgr %r1, %r3 - clgijh %r1, 64, .LBB4_10 - cgije %r1, 0, .LBB4_11 - lgr %r4, %r3 - xihf %r4, 4294967295 - xilf %r4, 4294967295 - agr %r4, %r0 - risbg %r5, %r1, 62, 191, 0 - lghi %r14, 0 - clgijl %r4, 3, .LBB4_6 - risbg %r4, %r1, 0, 189, 0 - lcgr %r14, %r4 - lghi %r4, 0 - la %r13, 160(%r15) - lgr %r12, %r3 -.LBB4_4: - lb %r11, 0(%r12) - stc %r11, 0(%r13) - lb %r11, 1(%r12) - stc %r11, 1(%r13) - lb %r11, 2(%r12) - stc %r11, 2(%r13) - lb %r11, 3(%r12) - stc %r11, 3(%r13) - aghi %r4, -4 - la %r13, 4(%r13) - la %r12, 4(%r12) - cgrjlh %r14, %r4, .LBB4_4 - lcgr %r14, %r4 -.LBB4_6: - la %r4, 64(%r3) - cgije %r5, 0, .LBB4_9 - la %r13, 160(%r14, %r15) - agr %r14, %r3 -.LBB4_8: - lb %r12, 0(%r14) - stc %r12, 0(%r13) - la %r13, 1(%r13) - la %r14, 1(%r14) - brctg %r5, .LBB4_8 -.LBB4_9: - sgr %r4, %r0 - clgijhe %r4, 8, .LBB4_12 - j .LBB4_19 -.LBB4_10: + la %r1, 160(%r15) + #APP lh %r0, 0(%r3) - sth %r0, 160(%r15) - lh %r0, 2(%r3) - sth %r0, 162(%r15) - lh %r0, 4(%r3) - sth %r0, 164(%r15) - lh %r0, 6(%r3) - sth %r0, 166(%r15) - lh %r0, 8(%r3) - sth %r0, 168(%r15) - lh %r0, 10(%r3) - sth %r0, 170(%r15) - lh %r0, 12(%r3) - sth %r0, 172(%r15) - lh %r0, 14(%r3) - sth %r0, 174(%r15) - lh %r0, 16(%r3) - sth %r0, 176(%r15) - lh %r0, 18(%r3) - sth %r0, 178(%r15) - lh %r0, 20(%r3) - sth %r0, 180(%r15) - lh %r0, 22(%r3) - sth %r0, 182(%r15) - lh %r0, 24(%r3) - sth %r0, 184(%r15) - lh %r0, 26(%r3) - sth %r0, 186(%r15) - lh %r0, 28(%r3) - sth %r0, 188(%r15) - lh %r0, 30(%r3) - sth %r0, 190(%r15) - lh %r0, 32(%r3) - sth %r0, 192(%r15) - lh %r0, 34(%r3) - sth %r0, 194(%r15) - lh %r0, 36(%r3) - sth %r0, 196(%r15) - lh %r0, 38(%r3) - sth %r0, 198(%r15) - lh %r0, 40(%r3) - sth %r0, 200(%r15) - lh %r0, 42(%r3) - sth %r0, 202(%r15) - lh %r0, 44(%r3) - sth %r0, 204(%r15) - lh %r0, 46(%r3) - sth %r0, 206(%r15) - lh %r0, 48(%r3) - sth %r0, 208(%r15) - lh %r0, 50(%r3) - sth %r0, 210(%r15) - lh %r0, 52(%r3) - sth %r0, 212(%r15) - lh %r0, 54(%r3) - sth %r0, 214(%r15) - lh %r0, 56(%r3) - sth %r0, 216(%r15) - lh %r0, 58(%r3) - sth %r0, 218(%r15) - lh %r0, 60(%r3) - sth %r0, 220(%r15) - lh %r0, 62(%r3) - sth %r0, 222(%r15) - j .LBB4_26 -.LBB4_11: - lghi %r4, 64 -.LBB4_12: - lghi %r0, 7 - sgr %r0, %r4 - lghi %r5, -8 - clgrjh %r0, %r5, .LBB4_14 - lghi %r0, -8 -.LBB4_14: - agr %r0, %r4 - srlg %r5, %r0, 3 - la %r5, 1(%r5) - tmll %r5, 3 - je .LBB4_17 - risbg %r5, %r5, 62, 191, 0 -.LBB4_16: - lg %r14, 0(%r1, %r3) - stg %r14, 160(%r1, %r15) - aghi %r4, -8 - la %r1, 8(%r1) - brctg %r5, .LBB4_16 -.LBB4_17: - clgijl %r0, 24, .LBB4_19 -.LBB4_18: - lg %r0, 0(%r1, %r3) - stg %r0, 160(%r1, %r15) - lg %r0, 8(%r1, %r3) - stg %r0, 168(%r1, %r15) - lg %r0, 16(%r1, %r3) - stg %r0, 176(%r1, %r15) - lg %r0, 24(%r1, %r3) - stg %r0, 184(%r1, %r15) - aghi %r4, -32 - la %r1, 32(%r1) - clgijh %r4, 7, .LBB4_18 -.LBB4_19: - cgije %r4, 0, .LBB4_26 - tmll %r4, 3 - lay %r0, -1(%r4) - lgr %r5, %r1 - je .LBB4_23 - risbg %r14, %r4, 62, 191, 0 - lgr %r5, %r1 -.LBB4_22: - lb %r13, 0(%r5, %r3) - stc %r13, 160(%r5, %r15) - la %r5, 1(%r5) - brctg %r14, .LBB4_22 -.LBB4_23: - clgijl %r0, 3, .LBB4_26 - agr %r4, %r1 - sgr %r4, %r5 - la %r1, 160(%r5, %r15) - agr %r3, %r5 -.LBB4_25: - lb %r0, 0(%r3) - stc %r0, 0(%r1) - lb %r0, 1(%r3) - stc %r0, 1(%r1) - lb %r0, 2(%r3) - stc %r0, 2(%r1) - lb %r0, 3(%r3) - stc %r0, 3(%r1) - aghi %r4, -4 - la %r1, 4(%r1) - la %r3, 4(%r3) - jne .LBB4_25 -.LBB4_26: + sth %r0, 0(%r1) + #NO_APP + la %r1, 2(%r3) + la %r4, 162(%r15) + #APP + lh %r0, 0(%r1) + sth %r0, 0(%r4) + #NO_APP + la %r1, 4(%r3) + la %r4, 164(%r15) + #APP + lh %r0, 0(%r1) + sth %r0, 0(%r4) + #NO_APP + la %r1, 6(%r3) + la %r4, 166(%r15) + #APP + lh %r0, 0(%r1) + sth %r0, 0(%r4) + #NO_APP + la %r1, 8(%r3) + la %r4, 168(%r15) + #APP + lh %r0, 0(%r1) + sth %r0, 0(%r4) + #NO_APP + la %r1, 10(%r3) + la %r4, 170(%r15) + #APP + lh %r0, 0(%r1) + sth %r0, 0(%r4) + #NO_APP + la %r1, 12(%r3) + la %r4, 172(%r15) + #APP + lh %r0, 0(%r1) + sth %r0, 0(%r4) + #NO_APP + la %r1, 14(%r3) + la %r4, 174(%r15) + #APP + lh %r0, 0(%r1) + sth %r0, 0(%r4) + #NO_APP + la %r1, 16(%r3) + la %r4, 176(%r15) + #APP + lh %r0, 0(%r1) + sth %r0, 0(%r4) + #NO_APP + la %r1, 18(%r3) + la %r4, 178(%r15) + #APP + lh %r0, 0(%r1) + sth %r0, 0(%r4) + #NO_APP + la %r1, 20(%r3) + la %r4, 180(%r15) + #APP + lh %r0, 0(%r1) + sth %r0, 0(%r4) + #NO_APP + la %r1, 22(%r3) + la %r4, 182(%r15) + #APP + lh %r0, 0(%r1) + sth %r0, 0(%r4) + #NO_APP + la %r1, 24(%r3) + la %r4, 184(%r15) + #APP + lh %r0, 0(%r1) + sth %r0, 0(%r4) + #NO_APP + la %r1, 26(%r3) + la %r4, 186(%r15) + #APP + lh %r0, 0(%r1) + sth %r0, 0(%r4) + #NO_APP + la %r1, 28(%r3) + la %r4, 188(%r15) + #APP + lh %r0, 0(%r1) + sth %r0, 0(%r4) + #NO_APP + la %r1, 30(%r3) + la %r4, 190(%r15) + #APP + lh %r0, 0(%r1) + sth %r0, 0(%r4) + #NO_APP + la %r1, 32(%r3) + la %r4, 192(%r15) + #APP + lh %r0, 0(%r1) + sth %r0, 0(%r4) + #NO_APP + la %r1, 34(%r3) + la %r4, 194(%r15) + #APP + lh %r0, 0(%r1) + sth %r0, 0(%r4) + #NO_APP + la %r1, 36(%r3) + la %r4, 196(%r15) + #APP + lh %r0, 0(%r1) + sth %r0, 0(%r4) + #NO_APP + la %r1, 38(%r3) + la %r4, 198(%r15) + #APP + lh %r0, 0(%r1) + sth %r0, 0(%r4) + #NO_APP + la %r1, 40(%r3) + la %r4, 200(%r15) + #APP + lh %r0, 0(%r1) + sth %r0, 0(%r4) + #NO_APP + la %r1, 42(%r3) + la %r4, 202(%r15) + #APP + lh %r0, 0(%r1) + sth %r0, 0(%r4) + #NO_APP + la %r1, 44(%r3) + la %r4, 204(%r15) + #APP + lh %r0, 0(%r1) + sth %r0, 0(%r4) + #NO_APP + la %r1, 46(%r3) + la %r4, 206(%r15) + #APP + lh %r0, 0(%r1) + sth %r0, 0(%r4) + #NO_APP + la %r1, 48(%r3) + la %r4, 208(%r15) + #APP + lh %r0, 0(%r1) + sth %r0, 0(%r4) + #NO_APP + la %r1, 50(%r3) + la %r4, 210(%r15) + #APP + lh %r0, 0(%r1) + sth %r0, 0(%r4) + #NO_APP + la %r1, 52(%r3) + la %r4, 212(%r15) + #APP + lh %r0, 0(%r1) + sth %r0, 0(%r4) + #NO_APP + la %r1, 54(%r3) + la %r4, 214(%r15) + #APP + lh %r0, 0(%r1) + sth %r0, 0(%r4) + #NO_APP + la %r1, 56(%r3) + la %r4, 216(%r15) + #APP + lh %r0, 0(%r1) + sth %r0, 0(%r4) + #NO_APP + la %r1, 58(%r3) + la %r4, 218(%r15) + #APP + lh %r0, 0(%r1) + sth %r0, 0(%r4) + #NO_APP + la %r1, 60(%r3) + la %r4, 220(%r15) + #APP + lh %r0, 0(%r1) + sth %r0, 0(%r4) + #NO_APP + la %r1, 62(%r3) + la %r3, 222(%r15) + #APP + lh %r0, 0(%r1) + sth %r0, 0(%r3) + #NO_APP mvc 0(64, %r2), 160(%r15) #MEMBARRIER - lmg %r11, %r15, 312(%r15) + aghi %r15, 224 br %r14 asm_test::atomic_memcpy_load_align2::read_volatile_acquire_fence: stmg %r6, %r15, 48(%r15) diff --git a/tests/asm-test/asm/s390x-unknown-linux-gnu/atomic_memcpy_load_align4 b/tests/asm-test/asm/s390x-unknown-linux-gnu/atomic_memcpy_load_align4 index 06add15..ae94de3 100644 --- a/tests/asm-test/asm/s390x-unknown-linux-gnu/atomic_memcpy_load_align4 +++ b/tests/asm-test/asm/s390x-unknown-linux-gnu/atomic_memcpy_load_align4 @@ -1,158 +1,103 @@ asm_test::atomic_memcpy_load_align4::acquire: - stmg %r11, %r15, 88(%r15) aghi %r15, -224 - la %r0, 7(%r3) - nill %r0, 65528 - lgr %r1, %r0 - sgr %r1, %r3 - clgijh %r1, 64, .LBB8_10 - cgije %r1, 0, .LBB8_11 - lgr %r4, %r3 - xihf %r4, 4294967295 - xilf %r4, 4294967295 - agr %r4, %r0 - risbg %r5, %r1, 62, 191, 0 - lghi %r14, 0 - clgijl %r4, 3, .LBB8_6 - risbg %r4, %r1, 0, 189, 0 - lcgr %r14, %r4 - lghi %r4, 0 - la %r13, 160(%r15) - lgr %r12, %r3 -.LBB8_4: - lb %r11, 0(%r12) - stc %r11, 0(%r13) - lb %r11, 1(%r12) - stc %r11, 1(%r13) - lb %r11, 2(%r12) - stc %r11, 2(%r13) - lb %r11, 3(%r12) - stc %r11, 3(%r13) - aghi %r4, -4 - la %r13, 4(%r13) - la %r12, 4(%r12) - cgrjlh %r14, %r4, .LBB8_4 - lcgr %r14, %r4 -.LBB8_6: - la %r4, 64(%r3) - cgije %r5, 0, .LBB8_9 - la %r13, 160(%r14, %r15) - agr %r14, %r3 -.LBB8_8: - lb %r12, 0(%r14) - stc %r12, 0(%r13) - la %r13, 1(%r13) - la %r14, 1(%r14) - brctg %r5, .LBB8_8 -.LBB8_9: - sgr %r4, %r0 - clgijhe %r4, 8, .LBB8_12 - j .LBB8_19 -.LBB8_10: + la %r1, 160(%r15) + #APP l %r0, 0(%r3) - st %r0, 160(%r15) - l %r0, 4(%r3) - st %r0, 164(%r15) - l %r0, 8(%r3) - st %r0, 168(%r15) - l %r0, 12(%r3) - st %r0, 172(%r15) - l %r0, 16(%r3) - st %r0, 176(%r15) - l %r0, 20(%r3) - st %r0, 180(%r15) - l %r0, 24(%r3) - st %r0, 184(%r15) - l %r0, 28(%r3) - st %r0, 188(%r15) - l %r0, 32(%r3) - st %r0, 192(%r15) - l %r0, 36(%r3) - st %r0, 196(%r15) - l %r0, 40(%r3) - st %r0, 200(%r15) - l %r0, 44(%r3) - st %r0, 204(%r15) - l %r0, 48(%r3) - st %r0, 208(%r15) - l %r0, 52(%r3) - st %r0, 212(%r15) - l %r0, 56(%r3) - st %r0, 216(%r15) - l %r0, 60(%r3) - st %r0, 220(%r15) - j .LBB8_26 -.LBB8_11: - lghi %r4, 64 -.LBB8_12: - lghi %r0, 7 - sgr %r0, %r4 - lghi %r5, -8 - clgrjh %r0, %r5, .LBB8_14 - lghi %r0, -8 -.LBB8_14: - agr %r0, %r4 - srlg %r5, %r0, 3 - la %r5, 1(%r5) - tmll %r5, 3 - je .LBB8_17 - risbg %r5, %r5, 62, 191, 0 -.LBB8_16: - lg %r14, 0(%r1, %r3) - stg %r14, 160(%r1, %r15) - aghi %r4, -8 - la %r1, 8(%r1) - brctg %r5, .LBB8_16 -.LBB8_17: - clgijl %r0, 24, .LBB8_19 -.LBB8_18: - lg %r0, 0(%r1, %r3) - stg %r0, 160(%r1, %r15) - lg %r0, 8(%r1, %r3) - stg %r0, 168(%r1, %r15) - lg %r0, 16(%r1, %r3) - stg %r0, 176(%r1, %r15) - lg %r0, 24(%r1, %r3) - stg %r0, 184(%r1, %r15) - aghi %r4, -32 - la %r1, 32(%r1) - clgijh %r4, 7, .LBB8_18 -.LBB8_19: - cgije %r4, 0, .LBB8_26 - tmll %r4, 3 - lay %r0, -1(%r4) - lgr %r5, %r1 - je .LBB8_23 - risbg %r14, %r4, 62, 191, 0 - lgr %r5, %r1 -.LBB8_22: - lb %r13, 0(%r5, %r3) - stc %r13, 160(%r5, %r15) - la %r5, 1(%r5) - brctg %r14, .LBB8_22 -.LBB8_23: - clgijl %r0, 3, .LBB8_26 - agr %r4, %r1 - sgr %r4, %r5 - la %r1, 160(%r5, %r15) - agr %r3, %r5 -.LBB8_25: - lb %r0, 0(%r3) - stc %r0, 0(%r1) - lb %r0, 1(%r3) - stc %r0, 1(%r1) - lb %r0, 2(%r3) - stc %r0, 2(%r1) - lb %r0, 3(%r3) - stc %r0, 3(%r1) - aghi %r4, -4 - la %r1, 4(%r1) - la %r3, 4(%r3) - jne .LBB8_25 -.LBB8_26: + st %r0, 0(%r1) + #NO_APP + la %r1, 4(%r3) + la %r4, 164(%r15) + #APP + l %r0, 0(%r1) + st %r0, 0(%r4) + #NO_APP + la %r1, 8(%r3) + la %r4, 168(%r15) + #APP + l %r0, 0(%r1) + st %r0, 0(%r4) + #NO_APP + la %r1, 12(%r3) + la %r4, 172(%r15) + #APP + l %r0, 0(%r1) + st %r0, 0(%r4) + #NO_APP + la %r1, 16(%r3) + la %r4, 176(%r15) + #APP + l %r0, 0(%r1) + st %r0, 0(%r4) + #NO_APP + la %r1, 20(%r3) + la %r4, 180(%r15) + #APP + l %r0, 0(%r1) + st %r0, 0(%r4) + #NO_APP + la %r1, 24(%r3) + la %r4, 184(%r15) + #APP + l %r0, 0(%r1) + st %r0, 0(%r4) + #NO_APP + la %r1, 28(%r3) + la %r4, 188(%r15) + #APP + l %r0, 0(%r1) + st %r0, 0(%r4) + #NO_APP + la %r1, 32(%r3) + la %r4, 192(%r15) + #APP + l %r0, 0(%r1) + st %r0, 0(%r4) + #NO_APP + la %r1, 36(%r3) + la %r4, 196(%r15) + #APP + l %r0, 0(%r1) + st %r0, 0(%r4) + #NO_APP + la %r1, 40(%r3) + la %r4, 200(%r15) + #APP + l %r0, 0(%r1) + st %r0, 0(%r4) + #NO_APP + la %r1, 44(%r3) + la %r4, 204(%r15) + #APP + l %r0, 0(%r1) + st %r0, 0(%r4) + #NO_APP + la %r1, 48(%r3) + la %r4, 208(%r15) + #APP + l %r0, 0(%r1) + st %r0, 0(%r4) + #NO_APP + la %r1, 52(%r3) + la %r4, 212(%r15) + #APP + l %r0, 0(%r1) + st %r0, 0(%r4) + #NO_APP + la %r1, 56(%r3) + la %r4, 216(%r15) + #APP + l %r0, 0(%r1) + st %r0, 0(%r4) + #NO_APP + la %r1, 60(%r3) + la %r3, 220(%r15) + #APP + l %r0, 0(%r1) + st %r0, 0(%r3) + #NO_APP mvc 0(64, %r2), 160(%r15) #MEMBARRIER - lmg %r11, %r15, 312(%r15) + aghi %r15, 224 br %r14 asm_test::atomic_memcpy_load_align4::read_volatile_acquire_fence: stmg %r6, %r15, 48(%r15) diff --git a/tests/asm-test/asm/s390x-unknown-linux-gnu/atomic_memcpy_load_align8 b/tests/asm-test/asm/s390x-unknown-linux-gnu/atomic_memcpy_load_align8 index 89d7ee8..8ffe203 100644 --- a/tests/asm-test/asm/s390x-unknown-linux-gnu/atomic_memcpy_load_align8 +++ b/tests/asm-test/asm/s390x-unknown-linux-gnu/atomic_memcpy_load_align8 @@ -1,23 +1,55 @@ asm_test::atomic_memcpy_load_align8::acquire: - stmg %r12, %r15, 96(%r15) + aghi %r15, -224 + la %r1, 160(%r15) + #APP lg %r0, 0(%r3) - lg %r1, 8(%r3) - lg %r4, 16(%r3) - lg %r5, 24(%r3) - lg %r14, 32(%r3) - lg %r13, 40(%r3) - lg %r12, 48(%r3) - lg %r3, 56(%r3) - stg %r0, 0(%r2) - stg %r1, 8(%r2) - stg %r4, 16(%r2) - stg %r5, 24(%r2) - stg %r14, 32(%r2) - stg %r13, 40(%r2) - stg %r12, 48(%r2) - stg %r3, 56(%r2) + stg %r0, 0(%r1) + #NO_APP + la %r1, 8(%r3) + la %r4, 168(%r15) + #APP + lg %r0, 0(%r1) + stg %r0, 0(%r4) + #NO_APP + la %r1, 16(%r3) + la %r4, 176(%r15) + #APP + lg %r0, 0(%r1) + stg %r0, 0(%r4) + #NO_APP + la %r1, 24(%r3) + la %r4, 184(%r15) + #APP + lg %r0, 0(%r1) + stg %r0, 0(%r4) + #NO_APP + la %r1, 32(%r3) + la %r4, 192(%r15) + #APP + lg %r0, 0(%r1) + stg %r0, 0(%r4) + #NO_APP + la %r1, 40(%r3) + la %r4, 200(%r15) + #APP + lg %r0, 0(%r1) + stg %r0, 0(%r4) + #NO_APP + la %r1, 48(%r3) + la %r4, 208(%r15) + #APP + lg %r0, 0(%r1) + stg %r0, 0(%r4) + #NO_APP + la %r1, 56(%r3) + la %r3, 216(%r15) + #APP + lg %r0, 0(%r1) + stg %r0, 0(%r3) + #NO_APP + mvc 0(64, %r2), 160(%r15) #MEMBARRIER - lmg %r12, %r15, 96(%r15) + aghi %r15, 224 br %r14 asm_test::atomic_memcpy_load_align8::read_volatile_acquire_fence: stmg %r12, %r15, 96(%r15) diff --git a/tests/asm-test/asm/s390x-unknown-linux-gnu/atomic_memcpy_store_align1 b/tests/asm-test/asm/s390x-unknown-linux-gnu/atomic_memcpy_store_align1 index 10a89fa..d651f72 100644 --- a/tests/asm-test/asm/s390x-unknown-linux-gnu/atomic_memcpy_store_align1 +++ b/tests/asm-test/asm/s390x-unknown-linux-gnu/atomic_memcpy_store_align1 @@ -1,256 +1,392 @@ asm_test::atomic_memcpy_store_align1::release: - stmg %r12, %r15, 96(%r15) aghi %r15, -288 mvc 160(64, %r15), 0(%r3) #MEMBARRIER - la %r0, 7(%r2) - nill %r0, 65528 mvc 224(64, %r15), 160(%r15) - lgr %r1, %r0 - sgr %r1, %r2 - clgijhe %r1, 65, .LBB2_10 - cgije %r1, 0, .LBB2_11 - lgr %r3, %r2 - xihf %r3, 4294967295 - xilf %r3, 4294967295 - agr %r3, %r0 - risbg %r4, %r1, 62, 191, 0 - lghi %r14, 0 - clgijl %r3, 3, .LBB2_6 - risbg %r3, %r1, 0, 189, 0 - lcgr %r5, %r3 - lghi %r3, 0 - la %r14, 224(%r15) - lgr %r13, %r2 -.LBB2_4: - lb %r12, 0(%r14) - stc %r12, 0(%r13) - lb %r12, 1(%r14) - stc %r12, 1(%r13) - lb %r12, 2(%r14) - stc %r12, 2(%r13) - lb %r12, 3(%r14) - stc %r12, 3(%r13) - aghi %r3, -4 - la %r13, 4(%r13) - la %r14, 4(%r14) - cgrjlh %r5, %r3, .LBB2_4 - lcgr %r14, %r3 -.LBB2_6: - la %r3, 64(%r2) - cgije %r4, 0, .LBB2_9 - lgr %r5, %r2 - agr %r5, %r14 - la %r14, 224(%r14, %r15) -.LBB2_8: - lb %r13, 0(%r14) - stc %r13, 0(%r5) - la %r5, 1(%r5) - la %r14, 1(%r14) - brctg %r4, .LBB2_8 -.LBB2_9: - sgr %r3, %r0 - clgijhe %r3, 8, .LBB2_12 - j .LBB2_19 -.LBB2_10: - lb %r0, 224(%r15) - stc %r0, 0(%r2) - lb %r0, 225(%r15) - stc %r0, 1(%r2) - lb %r0, 226(%r15) - stc %r0, 2(%r2) - lb %r0, 227(%r15) - stc %r0, 3(%r2) - lb %r0, 228(%r15) - stc %r0, 4(%r2) - lb %r0, 229(%r15) - stc %r0, 5(%r2) - lb %r0, 230(%r15) - stc %r0, 6(%r2) - lb %r0, 231(%r15) - stc %r0, 7(%r2) - lb %r0, 232(%r15) - stc %r0, 8(%r2) - lb %r0, 233(%r15) - stc %r0, 9(%r2) - lb %r0, 234(%r15) - stc %r0, 10(%r2) - lb %r0, 235(%r15) - stc %r0, 11(%r2) - lb %r0, 236(%r15) - stc %r0, 12(%r2) - lb %r0, 237(%r15) - stc %r0, 13(%r2) - lb %r0, 238(%r15) - stc %r0, 14(%r2) - lb %r0, 239(%r15) - stc %r0, 15(%r2) - lb %r0, 240(%r15) - stc %r0, 16(%r2) - lb %r0, 241(%r15) - stc %r0, 17(%r2) - lb %r0, 242(%r15) - stc %r0, 18(%r2) - lb %r0, 243(%r15) - stc %r0, 19(%r2) - lb %r0, 244(%r15) - stc %r0, 20(%r2) - lb %r0, 245(%r15) - stc %r0, 21(%r2) - lb %r0, 246(%r15) - stc %r0, 22(%r2) - lb %r0, 247(%r15) - stc %r0, 23(%r2) - lb %r0, 248(%r15) - stc %r0, 24(%r2) - lb %r0, 249(%r15) - stc %r0, 25(%r2) - lb %r0, 250(%r15) - stc %r0, 26(%r2) - lb %r0, 251(%r15) - stc %r0, 27(%r2) - lb %r0, 252(%r15) - stc %r0, 28(%r2) - lb %r0, 253(%r15) - stc %r0, 29(%r2) - lb %r0, 254(%r15) - stc %r0, 30(%r2) - lb %r0, 255(%r15) - stc %r0, 31(%r2) - lb %r0, 256(%r15) - stc %r0, 32(%r2) - lb %r0, 257(%r15) - stc %r0, 33(%r2) - lb %r0, 258(%r15) - stc %r0, 34(%r2) - lb %r0, 259(%r15) - stc %r0, 35(%r2) - lb %r0, 260(%r15) - stc %r0, 36(%r2) - lb %r0, 261(%r15) - stc %r0, 37(%r2) - lb %r0, 262(%r15) - stc %r0, 38(%r2) - lb %r0, 263(%r15) - stc %r0, 39(%r2) - lb %r0, 264(%r15) - stc %r0, 40(%r2) - lb %r0, 265(%r15) - stc %r0, 41(%r2) - lb %r0, 266(%r15) - stc %r0, 42(%r2) - lb %r0, 267(%r15) - stc %r0, 43(%r2) - lb %r0, 268(%r15) - stc %r0, 44(%r2) - lb %r0, 269(%r15) - stc %r0, 45(%r2) - lb %r0, 270(%r15) - stc %r0, 46(%r2) - lb %r0, 271(%r15) - stc %r0, 47(%r2) - lb %r0, 272(%r15) - stc %r0, 48(%r2) - lb %r0, 273(%r15) - stc %r0, 49(%r2) - lb %r0, 274(%r15) - stc %r0, 50(%r2) - lb %r0, 275(%r15) - stc %r0, 51(%r2) - lb %r0, 276(%r15) - stc %r0, 52(%r2) - lb %r0, 277(%r15) - stc %r0, 53(%r2) - lb %r0, 278(%r15) - stc %r0, 54(%r2) - lb %r0, 279(%r15) - stc %r0, 55(%r2) - lb %r0, 280(%r15) - stc %r0, 56(%r2) - lb %r0, 281(%r15) - stc %r0, 57(%r2) - lb %r0, 282(%r15) - stc %r0, 58(%r2) - lb %r0, 283(%r15) - stc %r0, 59(%r2) - lb %r0, 284(%r15) - stc %r0, 60(%r2) - lb %r0, 285(%r15) - stc %r0, 61(%r2) - lb %r0, 286(%r15) - stc %r0, 62(%r2) - lb %r0, 287(%r15) - stc %r0, 63(%r2) - j .LBB2_26 -.LBB2_11: - lghi %r3, 64 -.LBB2_12: - lghi %r0, 7 - sgr %r0, %r3 - lghi %r4, -8 - clgrjh %r0, %r4, .LBB2_14 - lghi %r0, -8 -.LBB2_14: - agr %r0, %r3 - srlg %r4, %r0, 3 - la %r4, 1(%r4) - tmll %r4, 3 - je .LBB2_17 - risbg %r4, %r4, 62, 191, 0 -.LBB2_16: - lg %r5, 224(%r1, %r15) - stg %r5, 0(%r1, %r2) - aghi %r3, -8 - la %r1, 8(%r1) - brctg %r4, .LBB2_16 -.LBB2_17: - clgijl %r0, 24, .LBB2_19 -.LBB2_18: - lg %r0, 224(%r1, %r15) - stg %r0, 0(%r1, %r2) - lg %r0, 232(%r1, %r15) - stg %r0, 8(%r1, %r2) - lg %r0, 240(%r1, %r15) - stg %r0, 16(%r1, %r2) - lg %r0, 248(%r1, %r15) - stg %r0, 24(%r1, %r2) - aghi %r3, -32 - la %r1, 32(%r1) - clgijh %r3, 7, .LBB2_18 -.LBB2_19: - cgije %r3, 0, .LBB2_26 - tmll %r3, 3 - lay %r0, -1(%r3) - lgr %r4, %r1 - je .LBB2_23 - risbg %r5, %r3, 62, 191, 0 - lgr %r4, %r1 -.LBB2_22: - lb %r14, 224(%r4, %r15) - stc %r14, 0(%r4, %r2) - la %r4, 1(%r4) - brctg %r5, .LBB2_22 -.LBB2_23: - clgijl %r0, 3, .LBB2_26 - agr %r3, %r1 - sgr %r3, %r4 - agr %r2, %r4 - la %r1, 224(%r4, %r15) -.LBB2_25: + la %r1, 224(%r15) + #APP lb %r0, 0(%r1) stc %r0, 0(%r2) - lb %r0, 1(%r1) - stc %r0, 1(%r2) - lb %r0, 2(%r1) - stc %r0, 2(%r2) - lb %r0, 3(%r1) - stc %r0, 3(%r2) - aghi %r3, -4 - la %r2, 4(%r2) - la %r1, 4(%r1) - jne .LBB2_25 -.LBB2_26: - lmg %r12, %r15, 384(%r15) + #NO_APP + la %r1, 1(%r2) + la %r3, 225(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 2(%r2) + la %r3, 226(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 3(%r2) + la %r3, 227(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 4(%r2) + la %r3, 228(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 5(%r2) + la %r3, 229(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 6(%r2) + la %r3, 230(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 7(%r2) + la %r3, 231(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 8(%r2) + la %r3, 232(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 9(%r2) + la %r3, 233(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 10(%r2) + la %r3, 234(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 11(%r2) + la %r3, 235(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 12(%r2) + la %r3, 236(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 13(%r2) + la %r3, 237(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 14(%r2) + la %r3, 238(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 15(%r2) + la %r3, 239(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 16(%r2) + la %r3, 240(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 17(%r2) + la %r3, 241(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 18(%r2) + la %r3, 242(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 19(%r2) + la %r3, 243(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 20(%r2) + la %r3, 244(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 21(%r2) + la %r3, 245(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 22(%r2) + la %r3, 246(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 23(%r2) + la %r3, 247(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 24(%r2) + la %r3, 248(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 25(%r2) + la %r3, 249(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 26(%r2) + la %r3, 250(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 27(%r2) + la %r3, 251(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 28(%r2) + la %r3, 252(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 29(%r2) + la %r3, 253(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 30(%r2) + la %r3, 254(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 31(%r2) + la %r3, 255(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 32(%r2) + la %r3, 256(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 33(%r2) + la %r3, 257(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 34(%r2) + la %r3, 258(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 35(%r2) + la %r3, 259(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 36(%r2) + la %r3, 260(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 37(%r2) + la %r3, 261(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 38(%r2) + la %r3, 262(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 39(%r2) + la %r3, 263(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 40(%r2) + la %r3, 264(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 41(%r2) + la %r3, 265(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 42(%r2) + la %r3, 266(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 43(%r2) + la %r3, 267(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 44(%r2) + la %r3, 268(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 45(%r2) + la %r3, 269(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 46(%r2) + la %r3, 270(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 47(%r2) + la %r3, 271(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 48(%r2) + la %r3, 272(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 49(%r2) + la %r3, 273(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 50(%r2) + la %r3, 274(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 51(%r2) + la %r3, 275(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 52(%r2) + la %r3, 276(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 53(%r2) + la %r3, 277(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 54(%r2) + la %r3, 278(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 55(%r2) + la %r3, 279(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 56(%r2) + la %r3, 280(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 57(%r2) + la %r3, 281(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 58(%r2) + la %r3, 282(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 59(%r2) + la %r3, 283(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 60(%r2) + la %r3, 284(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 61(%r2) + la %r3, 285(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 62(%r2) + la %r3, 286(%r15) + #APP + lb %r0, 0(%r3) + stc %r0, 0(%r1) + #NO_APP + la %r1, 63(%r2) + la %r2, 287(%r15) + #APP + lb %r0, 0(%r2) + stc %r0, 0(%r1) + #NO_APP + aghi %r15, 288 br %r14 asm_test::atomic_memcpy_store_align1::write_volatile_release_fence: stmg %r14, %r15, 112(%r15) diff --git a/tests/asm-test/asm/s390x-unknown-linux-gnu/atomic_memcpy_store_align16 b/tests/asm-test/asm/s390x-unknown-linux-gnu/atomic_memcpy_store_align16 index 862a04c..843d484 100644 --- a/tests/asm-test/asm/s390x-unknown-linux-gnu/atomic_memcpy_store_align16 +++ b/tests/asm-test/asm/s390x-unknown-linux-gnu/atomic_memcpy_store_align16 @@ -1,23 +1,67 @@ asm_test::atomic_memcpy_store_align16::release: - stmg %r12, %r15, 96(%r15) - lg %r0, 0(%r3) - lg %r1, 8(%r3) - lg %r4, 16(%r3) - lg %r5, 24(%r3) - lg %r14, 32(%r3) - lg %r13, 40(%r3) - lg %r12, 48(%r3) - lg %r3, 56(%r3) + stmg %r11, %r15, 88(%r15) + aghi %r15, -160 + lgr %r11, %r15 + lgr %r4, %r15 + aghi %r4, -72 + la %r1, 168(%r4) + nill %r1, 65520 + lgr %r15, %r4 + lgr %r4, %r15 + aghi %r4, -72 + la %r5, 168(%r4) + nill %r5, 65520 + lgr %r15, %r4 + mvc 0(64, %r5), 0(%r3) #MEMBARRIER + mvc 0(64, %r1), 0(%r5) + #APP + lg %r0, 0(%r1) + stg %r0, 0(%r2) + #NO_APP + la %r3, 8(%r2) + la %r4, 8(%r1) + #APP + lg %r0, 0(%r4) + stg %r0, 0(%r3) + #NO_APP + la %r3, 16(%r2) + la %r4, 16(%r1) + #APP + lg %r0, 0(%r4) + stg %r0, 0(%r3) + #NO_APP + la %r3, 24(%r2) + la %r4, 24(%r1) + #APP + lg %r0, 0(%r4) + stg %r0, 0(%r3) + #NO_APP + la %r3, 32(%r2) + la %r4, 32(%r1) + #APP + lg %r0, 0(%r4) + stg %r0, 0(%r3) + #NO_APP + la %r3, 40(%r2) + la %r4, 40(%r1) + #APP + lg %r0, 0(%r4) + stg %r0, 0(%r3) + #NO_APP + la %r3, 48(%r2) + la %r4, 48(%r1) + #APP + lg %r0, 0(%r4) + stg %r0, 0(%r3) + #NO_APP + la %r2, 56(%r2) + la %r1, 56(%r1) + #APP + lg %r0, 0(%r1) stg %r0, 0(%r2) - stg %r1, 8(%r2) - stg %r4, 16(%r2) - stg %r5, 24(%r2) - stg %r14, 32(%r2) - stg %r13, 40(%r2) - stg %r12, 48(%r2) - stg %r3, 56(%r2) - lmg %r12, %r15, 96(%r15) + #NO_APP + lmg %r11, %r15, 248(%r11) br %r14 asm_test::atomic_memcpy_store_align16::write_volatile_release_fence: stmg %r11, %r15, 88(%r15) diff --git a/tests/asm-test/asm/s390x-unknown-linux-gnu/atomic_memcpy_store_align2 b/tests/asm-test/asm/s390x-unknown-linux-gnu/atomic_memcpy_store_align2 index 28e1b1c..395c7cb 100644 --- a/tests/asm-test/asm/s390x-unknown-linux-gnu/atomic_memcpy_store_align2 +++ b/tests/asm-test/asm/s390x-unknown-linux-gnu/atomic_memcpy_store_align2 @@ -1,193 +1,200 @@ asm_test::atomic_memcpy_store_align2::release: - stmg %r12, %r15, 96(%r15) aghi %r15, -288 mvc 160(64, %r15), 0(%r3) #MEMBARRIER - la %r0, 7(%r2) - nill %r0, 65528 mvc 224(64, %r15), 160(%r15) - lgr %r1, %r0 - sgr %r1, %r2 - clgijh %r1, 64, .LBB6_10 - cgije %r1, 0, .LBB6_11 - lgr %r3, %r2 - xihf %r3, 4294967295 - xilf %r3, 4294967295 - agr %r3, %r0 - risbg %r4, %r1, 62, 191, 0 - lghi %r14, 0 - clgijl %r3, 3, .LBB6_6 - risbg %r3, %r1, 0, 189, 0 - lcgr %r5, %r3 - lghi %r3, 0 - la %r14, 224(%r15) - lgr %r13, %r2 -.LBB6_4: - lb %r12, 0(%r14) - stc %r12, 0(%r13) - lb %r12, 1(%r14) - stc %r12, 1(%r13) - lb %r12, 2(%r14) - stc %r12, 2(%r13) - lb %r12, 3(%r14) - stc %r12, 3(%r13) - aghi %r3, -4 - la %r13, 4(%r13) - la %r14, 4(%r14) - cgrjlh %r5, %r3, .LBB6_4 - lcgr %r14, %r3 -.LBB6_6: - la %r3, 64(%r2) - cgije %r4, 0, .LBB6_9 - lgr %r5, %r2 - agr %r5, %r14 - la %r14, 224(%r14, %r15) -.LBB6_8: - lb %r13, 0(%r14) - stc %r13, 0(%r5) - la %r5, 1(%r5) - la %r14, 1(%r14) - brctg %r4, .LBB6_8 -.LBB6_9: - sgr %r3, %r0 - clgijhe %r3, 8, .LBB6_12 - j .LBB6_19 -.LBB6_10: - lh %r0, 224(%r15) + la %r1, 224(%r15) + #APP + lh %r0, 0(%r1) sth %r0, 0(%r2) - lh %r0, 226(%r15) - sth %r0, 2(%r2) - lh %r0, 228(%r15) - sth %r0, 4(%r2) - lh %r0, 230(%r15) - sth %r0, 6(%r2) - lh %r0, 232(%r15) - sth %r0, 8(%r2) - lh %r0, 234(%r15) - sth %r0, 10(%r2) - lh %r0, 236(%r15) - sth %r0, 12(%r2) - lh %r0, 238(%r15) - sth %r0, 14(%r2) - lh %r0, 240(%r15) - sth %r0, 16(%r2) - lh %r0, 242(%r15) - sth %r0, 18(%r2) - lh %r0, 244(%r15) - sth %r0, 20(%r2) - lh %r0, 246(%r15) - sth %r0, 22(%r2) - lh %r0, 248(%r15) - sth %r0, 24(%r2) - lh %r0, 250(%r15) - sth %r0, 26(%r2) - lh %r0, 252(%r15) - sth %r0, 28(%r2) - lh %r0, 254(%r15) - sth %r0, 30(%r2) - lh %r0, 256(%r15) - sth %r0, 32(%r2) - lh %r0, 258(%r15) - sth %r0, 34(%r2) - lh %r0, 260(%r15) - sth %r0, 36(%r2) - lh %r0, 262(%r15) - sth %r0, 38(%r2) - lh %r0, 264(%r15) - sth %r0, 40(%r2) - lh %r0, 266(%r15) - sth %r0, 42(%r2) - lh %r0, 268(%r15) - sth %r0, 44(%r2) - lh %r0, 270(%r15) - sth %r0, 46(%r2) - lh %r0, 272(%r15) - sth %r0, 48(%r2) - lh %r0, 274(%r15) - sth %r0, 50(%r2) - lh %r0, 276(%r15) - sth %r0, 52(%r2) - lh %r0, 278(%r15) - sth %r0, 54(%r2) - lh %r0, 280(%r15) - sth %r0, 56(%r2) - lh %r0, 282(%r15) - sth %r0, 58(%r2) - lh %r0, 284(%r15) - sth %r0, 60(%r2) - lh %r0, 286(%r15) - sth %r0, 62(%r2) - lmg %r12, %r15, 384(%r15) - br %r14 -.LBB6_11: - lghi %r3, 64 -.LBB6_12: - lghi %r0, 7 - sgr %r0, %r3 - lghi %r4, -8 - clgrjh %r0, %r4, .LBB6_14 - lghi %r0, -8 -.LBB6_14: - agr %r0, %r3 - srlg %r4, %r0, 3 - la %r4, 1(%r4) - tmll %r4, 3 - je .LBB6_17 - risbg %r4, %r4, 62, 191, 0 -.LBB6_16: - lg %r5, 224(%r1, %r15) - stg %r5, 0(%r1, %r2) - aghi %r3, -8 - la %r1, 8(%r1) - brctg %r4, .LBB6_16 -.LBB6_17: - clgijl %r0, 24, .LBB6_19 -.LBB6_18: - lg %r0, 224(%r1, %r15) - stg %r0, 0(%r1, %r2) - lg %r0, 232(%r1, %r15) - stg %r0, 8(%r1, %r2) - lg %r0, 240(%r1, %r15) - stg %r0, 16(%r1, %r2) - lg %r0, 248(%r1, %r15) - stg %r0, 24(%r1, %r2) - aghi %r3, -32 - la %r1, 32(%r1) - clgijh %r3, 7, .LBB6_18 -.LBB6_19: - cgije %r3, 0, .LBB6_26 - tmll %r3, 3 - lay %r0, -1(%r3) - lgr %r4, %r1 - je .LBB6_23 - risbg %r5, %r3, 62, 191, 0 - lgr %r4, %r1 -.LBB6_22: - lb %r14, 224(%r4, %r15) - stc %r14, 0(%r4, %r2) - la %r4, 1(%r4) - brctg %r5, .LBB6_22 -.LBB6_23: - clgijl %r0, 3, .LBB6_26 - agr %r3, %r1 - sgr %r3, %r4 - agr %r2, %r4 - la %r1, 224(%r4, %r15) -.LBB6_25: - lb %r0, 0(%r1) - stc %r0, 0(%r2) - lb %r0, 1(%r1) - stc %r0, 1(%r2) - lb %r0, 2(%r1) - stc %r0, 2(%r2) - lb %r0, 3(%r1) - stc %r0, 3(%r2) - aghi %r3, -4 - la %r2, 4(%r2) - la %r1, 4(%r1) - jne .LBB6_25 -.LBB6_26: - lmg %r12, %r15, 384(%r15) + #NO_APP + la %r1, 2(%r2) + la %r3, 226(%r15) + #APP + lh %r0, 0(%r3) + sth %r0, 0(%r1) + #NO_APP + la %r1, 4(%r2) + la %r3, 228(%r15) + #APP + lh %r0, 0(%r3) + sth %r0, 0(%r1) + #NO_APP + la %r1, 6(%r2) + la %r3, 230(%r15) + #APP + lh %r0, 0(%r3) + sth %r0, 0(%r1) + #NO_APP + la %r1, 8(%r2) + la %r3, 232(%r15) + #APP + lh %r0, 0(%r3) + sth %r0, 0(%r1) + #NO_APP + la %r1, 10(%r2) + la %r3, 234(%r15) + #APP + lh %r0, 0(%r3) + sth %r0, 0(%r1) + #NO_APP + la %r1, 12(%r2) + la %r3, 236(%r15) + #APP + lh %r0, 0(%r3) + sth %r0, 0(%r1) + #NO_APP + la %r1, 14(%r2) + la %r3, 238(%r15) + #APP + lh %r0, 0(%r3) + sth %r0, 0(%r1) + #NO_APP + la %r1, 16(%r2) + la %r3, 240(%r15) + #APP + lh %r0, 0(%r3) + sth %r0, 0(%r1) + #NO_APP + la %r1, 18(%r2) + la %r3, 242(%r15) + #APP + lh %r0, 0(%r3) + sth %r0, 0(%r1) + #NO_APP + la %r1, 20(%r2) + la %r3, 244(%r15) + #APP + lh %r0, 0(%r3) + sth %r0, 0(%r1) + #NO_APP + la %r1, 22(%r2) + la %r3, 246(%r15) + #APP + lh %r0, 0(%r3) + sth %r0, 0(%r1) + #NO_APP + la %r1, 24(%r2) + la %r3, 248(%r15) + #APP + lh %r0, 0(%r3) + sth %r0, 0(%r1) + #NO_APP + la %r1, 26(%r2) + la %r3, 250(%r15) + #APP + lh %r0, 0(%r3) + sth %r0, 0(%r1) + #NO_APP + la %r1, 28(%r2) + la %r3, 252(%r15) + #APP + lh %r0, 0(%r3) + sth %r0, 0(%r1) + #NO_APP + la %r1, 30(%r2) + la %r3, 254(%r15) + #APP + lh %r0, 0(%r3) + sth %r0, 0(%r1) + #NO_APP + la %r1, 32(%r2) + la %r3, 256(%r15) + #APP + lh %r0, 0(%r3) + sth %r0, 0(%r1) + #NO_APP + la %r1, 34(%r2) + la %r3, 258(%r15) + #APP + lh %r0, 0(%r3) + sth %r0, 0(%r1) + #NO_APP + la %r1, 36(%r2) + la %r3, 260(%r15) + #APP + lh %r0, 0(%r3) + sth %r0, 0(%r1) + #NO_APP + la %r1, 38(%r2) + la %r3, 262(%r15) + #APP + lh %r0, 0(%r3) + sth %r0, 0(%r1) + #NO_APP + la %r1, 40(%r2) + la %r3, 264(%r15) + #APP + lh %r0, 0(%r3) + sth %r0, 0(%r1) + #NO_APP + la %r1, 42(%r2) + la %r3, 266(%r15) + #APP + lh %r0, 0(%r3) + sth %r0, 0(%r1) + #NO_APP + la %r1, 44(%r2) + la %r3, 268(%r15) + #APP + lh %r0, 0(%r3) + sth %r0, 0(%r1) + #NO_APP + la %r1, 46(%r2) + la %r3, 270(%r15) + #APP + lh %r0, 0(%r3) + sth %r0, 0(%r1) + #NO_APP + la %r1, 48(%r2) + la %r3, 272(%r15) + #APP + lh %r0, 0(%r3) + sth %r0, 0(%r1) + #NO_APP + la %r1, 50(%r2) + la %r3, 274(%r15) + #APP + lh %r0, 0(%r3) + sth %r0, 0(%r1) + #NO_APP + la %r1, 52(%r2) + la %r3, 276(%r15) + #APP + lh %r0, 0(%r3) + sth %r0, 0(%r1) + #NO_APP + la %r1, 54(%r2) + la %r3, 278(%r15) + #APP + lh %r0, 0(%r3) + sth %r0, 0(%r1) + #NO_APP + la %r1, 56(%r2) + la %r3, 280(%r15) + #APP + lh %r0, 0(%r3) + sth %r0, 0(%r1) + #NO_APP + la %r1, 58(%r2) + la %r3, 282(%r15) + #APP + lh %r0, 0(%r3) + sth %r0, 0(%r1) + #NO_APP + la %r1, 60(%r2) + la %r3, 284(%r15) + #APP + lh %r0, 0(%r3) + sth %r0, 0(%r1) + #NO_APP + la %r1, 62(%r2) + la %r2, 286(%r15) + #APP + lh %r0, 0(%r2) + sth %r0, 0(%r1) + #NO_APP + aghi %r15, 288 br %r14 asm_test::atomic_memcpy_store_align2::write_volatile_release_fence: stmg %r14, %r15, 112(%r15) diff --git a/tests/asm-test/asm/s390x-unknown-linux-gnu/atomic_memcpy_store_align4 b/tests/asm-test/asm/s390x-unknown-linux-gnu/atomic_memcpy_store_align4 index f207815..a4a5da5 100644 --- a/tests/asm-test/asm/s390x-unknown-linux-gnu/atomic_memcpy_store_align4 +++ b/tests/asm-test/asm/s390x-unknown-linux-gnu/atomic_memcpy_store_align4 @@ -1,161 +1,104 @@ asm_test::atomic_memcpy_store_align4::release: - stmg %r12, %r15, 96(%r15) aghi %r15, -288 mvc 160(64, %r15), 0(%r3) #MEMBARRIER - la %r0, 7(%r2) - nill %r0, 65528 mvc 224(64, %r15), 160(%r15) - lgr %r1, %r0 - sgr %r1, %r2 - clgijh %r1, 64, .LBB10_10 - cgije %r1, 0, .LBB10_11 - lgr %r3, %r2 - xihf %r3, 4294967295 - xilf %r3, 4294967295 - agr %r3, %r0 - risbg %r4, %r1, 62, 191, 0 - lghi %r14, 0 - clgijl %r3, 3, .LBB10_6 - risbg %r3, %r1, 0, 189, 0 - lcgr %r5, %r3 - lghi %r3, 0 - la %r14, 224(%r15) - lgr %r13, %r2 -.LBB10_4: - lb %r12, 0(%r14) - stc %r12, 0(%r13) - lb %r12, 1(%r14) - stc %r12, 1(%r13) - lb %r12, 2(%r14) - stc %r12, 2(%r13) - lb %r12, 3(%r14) - stc %r12, 3(%r13) - aghi %r3, -4 - la %r13, 4(%r13) - la %r14, 4(%r14) - cgrjlh %r5, %r3, .LBB10_4 - lcgr %r14, %r3 -.LBB10_6: - la %r3, 64(%r2) - cgije %r4, 0, .LBB10_9 - lgr %r5, %r2 - agr %r5, %r14 - la %r14, 224(%r14, %r15) -.LBB10_8: - lb %r13, 0(%r14) - stc %r13, 0(%r5) - la %r5, 1(%r5) - la %r14, 1(%r14) - brctg %r4, .LBB10_8 -.LBB10_9: - sgr %r3, %r0 - clgijhe %r3, 8, .LBB10_12 - j .LBB10_19 -.LBB10_10: - l %r0, 224(%r15) + la %r1, 224(%r15) + #APP + l %r0, 0(%r1) st %r0, 0(%r2) - l %r0, 228(%r15) - st %r0, 4(%r2) - l %r0, 232(%r15) - st %r0, 8(%r2) - l %r0, 236(%r15) - st %r0, 12(%r2) - l %r0, 240(%r15) - st %r0, 16(%r2) - l %r0, 244(%r15) - st %r0, 20(%r2) - l %r0, 248(%r15) - st %r0, 24(%r2) - l %r0, 252(%r15) - st %r0, 28(%r2) - l %r0, 256(%r15) - st %r0, 32(%r2) - l %r0, 260(%r15) - st %r0, 36(%r2) - l %r0, 264(%r15) - st %r0, 40(%r2) - l %r0, 268(%r15) - st %r0, 44(%r2) - l %r0, 272(%r15) - st %r0, 48(%r2) - l %r0, 276(%r15) - st %r0, 52(%r2) - l %r0, 280(%r15) - st %r0, 56(%r2) - l %r0, 284(%r15) - st %r0, 60(%r2) - lmg %r12, %r15, 384(%r15) - br %r14 -.LBB10_11: - lghi %r3, 64 -.LBB10_12: - lghi %r0, 7 - sgr %r0, %r3 - lghi %r4, -8 - clgrjh %r0, %r4, .LBB10_14 - lghi %r0, -8 -.LBB10_14: - agr %r0, %r3 - srlg %r4, %r0, 3 - la %r4, 1(%r4) - tmll %r4, 3 - je .LBB10_17 - risbg %r4, %r4, 62, 191, 0 -.LBB10_16: - lg %r5, 224(%r1, %r15) - stg %r5, 0(%r1, %r2) - aghi %r3, -8 - la %r1, 8(%r1) - brctg %r4, .LBB10_16 -.LBB10_17: - clgijl %r0, 24, .LBB10_19 -.LBB10_18: - lg %r0, 224(%r1, %r15) - stg %r0, 0(%r1, %r2) - lg %r0, 232(%r1, %r15) - stg %r0, 8(%r1, %r2) - lg %r0, 240(%r1, %r15) - stg %r0, 16(%r1, %r2) - lg %r0, 248(%r1, %r15) - stg %r0, 24(%r1, %r2) - aghi %r3, -32 - la %r1, 32(%r1) - clgijh %r3, 7, .LBB10_18 -.LBB10_19: - cgije %r3, 0, .LBB10_26 - tmll %r3, 3 - lay %r0, -1(%r3) - lgr %r4, %r1 - je .LBB10_23 - risbg %r5, %r3, 62, 191, 0 - lgr %r4, %r1 -.LBB10_22: - lb %r14, 224(%r4, %r15) - stc %r14, 0(%r4, %r2) - la %r4, 1(%r4) - brctg %r5, .LBB10_22 -.LBB10_23: - clgijl %r0, 3, .LBB10_26 - agr %r3, %r1 - sgr %r3, %r4 - agr %r2, %r4 - la %r1, 224(%r4, %r15) -.LBB10_25: - lb %r0, 0(%r1) - stc %r0, 0(%r2) - lb %r0, 1(%r1) - stc %r0, 1(%r2) - lb %r0, 2(%r1) - stc %r0, 2(%r2) - lb %r0, 3(%r1) - stc %r0, 3(%r2) - aghi %r3, -4 - la %r2, 4(%r2) - la %r1, 4(%r1) - jne .LBB10_25 -.LBB10_26: - lmg %r12, %r15, 384(%r15) + #NO_APP + la %r1, 4(%r2) + la %r3, 228(%r15) + #APP + l %r0, 0(%r3) + st %r0, 0(%r1) + #NO_APP + la %r1, 8(%r2) + la %r3, 232(%r15) + #APP + l %r0, 0(%r3) + st %r0, 0(%r1) + #NO_APP + la %r1, 12(%r2) + la %r3, 236(%r15) + #APP + l %r0, 0(%r3) + st %r0, 0(%r1) + #NO_APP + la %r1, 16(%r2) + la %r3, 240(%r15) + #APP + l %r0, 0(%r3) + st %r0, 0(%r1) + #NO_APP + la %r1, 20(%r2) + la %r3, 244(%r15) + #APP + l %r0, 0(%r3) + st %r0, 0(%r1) + #NO_APP + la %r1, 24(%r2) + la %r3, 248(%r15) + #APP + l %r0, 0(%r3) + st %r0, 0(%r1) + #NO_APP + la %r1, 28(%r2) + la %r3, 252(%r15) + #APP + l %r0, 0(%r3) + st %r0, 0(%r1) + #NO_APP + la %r1, 32(%r2) + la %r3, 256(%r15) + #APP + l %r0, 0(%r3) + st %r0, 0(%r1) + #NO_APP + la %r1, 36(%r2) + la %r3, 260(%r15) + #APP + l %r0, 0(%r3) + st %r0, 0(%r1) + #NO_APP + la %r1, 40(%r2) + la %r3, 264(%r15) + #APP + l %r0, 0(%r3) + st %r0, 0(%r1) + #NO_APP + la %r1, 44(%r2) + la %r3, 268(%r15) + #APP + l %r0, 0(%r3) + st %r0, 0(%r1) + #NO_APP + la %r1, 48(%r2) + la %r3, 272(%r15) + #APP + l %r0, 0(%r3) + st %r0, 0(%r1) + #NO_APP + la %r1, 52(%r2) + la %r3, 276(%r15) + #APP + l %r0, 0(%r3) + st %r0, 0(%r1) + #NO_APP + la %r1, 56(%r2) + la %r3, 280(%r15) + #APP + l %r0, 0(%r3) + st %r0, 0(%r1) + #NO_APP + la %r1, 60(%r2) + la %r2, 284(%r15) + #APP + l %r0, 0(%r2) + st %r0, 0(%r1) + #NO_APP + aghi %r15, 288 br %r14 asm_test::atomic_memcpy_store_align4::write_volatile_release_fence: stmg %r14, %r15, 112(%r15) diff --git a/tests/asm-test/asm/s390x-unknown-linux-gnu/atomic_memcpy_store_align8 b/tests/asm-test/asm/s390x-unknown-linux-gnu/atomic_memcpy_store_align8 index c71a3b0..51a5c91 100644 --- a/tests/asm-test/asm/s390x-unknown-linux-gnu/atomic_memcpy_store_align8 +++ b/tests/asm-test/asm/s390x-unknown-linux-gnu/atomic_memcpy_store_align8 @@ -1,23 +1,56 @@ asm_test::atomic_memcpy_store_align8::release: - stmg %r12, %r15, 96(%r15) - lg %r0, 0(%r3) - lg %r1, 8(%r3) - lg %r4, 16(%r3) - lg %r5, 24(%r3) - lg %r14, 32(%r3) - lg %r13, 40(%r3) - lg %r12, 48(%r3) - lg %r3, 56(%r3) + aghi %r15, -288 + mvc 160(64, %r15), 0(%r3) #MEMBARRIER + mvc 224(64, %r15), 160(%r15) + la %r1, 224(%r15) + #APP + lg %r0, 0(%r1) stg %r0, 0(%r2) - stg %r1, 8(%r2) - stg %r4, 16(%r2) - stg %r5, 24(%r2) - stg %r14, 32(%r2) - stg %r13, 40(%r2) - stg %r12, 48(%r2) - stg %r3, 56(%r2) - lmg %r12, %r15, 96(%r15) + #NO_APP + la %r1, 8(%r2) + la %r3, 232(%r15) + #APP + lg %r0, 0(%r3) + stg %r0, 0(%r1) + #NO_APP + la %r1, 16(%r2) + la %r3, 240(%r15) + #APP + lg %r0, 0(%r3) + stg %r0, 0(%r1) + #NO_APP + la %r1, 24(%r2) + la %r3, 248(%r15) + #APP + lg %r0, 0(%r3) + stg %r0, 0(%r1) + #NO_APP + la %r1, 32(%r2) + la %r3, 256(%r15) + #APP + lg %r0, 0(%r3) + stg %r0, 0(%r1) + #NO_APP + la %r1, 40(%r2) + la %r3, 264(%r15) + #APP + lg %r0, 0(%r3) + stg %r0, 0(%r1) + #NO_APP + la %r1, 48(%r2) + la %r3, 272(%r15) + #APP + lg %r0, 0(%r3) + stg %r0, 0(%r1) + #NO_APP + la %r1, 56(%r2) + la %r2, 280(%r15) + #APP + lg %r0, 0(%r2) + stg %r0, 0(%r1) + #NO_APP + aghi %r15, 288 br %r14 asm_test::atomic_memcpy_store_align8::write_volatile_release_fence: stmg %r14, %r15, 112(%r15) diff --git a/tests/asm-test/asm/thumbv7neon-unknown-linux-gnueabihf/atomic_memcpy_load_align1 b/tests/asm-test/asm/thumbv7neon-unknown-linux-gnueabihf/atomic_memcpy_load_align1 index 6edac9f..a20a120 100644 --- a/tests/asm-test/asm/thumbv7neon-unknown-linux-gnueabihf/atomic_memcpy_load_align1 +++ b/tests/asm-test/asm/thumbv7neon-unknown-linux-gnueabihf/atomic_memcpy_load_align1 @@ -1,361 +1,206 @@ asm_test::atomic_memcpy_load_align1::acquire: - push {r4, r5, r6, lr} + push {r7, lr} sub sp, #32 + mov lr, sp + add.w r3, lr, #31 + add.w r12, r1, #31 + @APP + ldrb.w r2, [r12] + strb r2, [r3] + @NO_APP + add.w r2, r1, #30 + add.w r3, lr, #30 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add.w r12, lr, #16 + add.w r2, r1, #29 + add.w r3, lr, #29 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add.w r2, r1, #28 + add.w r3, lr, #28 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add.w r2, r1, #27 + add.w r3, lr, #27 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add.w r2, r1, #26 + add.w r3, lr, #26 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add.w r2, r1, #25 + add.w r3, lr, #25 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add.w r2, r1, #24 + add.w r3, lr, #24 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add.w r2, r1, #23 + add.w r3, lr, #23 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add.w r2, r1, #22 + add.w r3, lr, #22 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add.w r2, r1, #21 + add.w r3, lr, #21 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add.w r2, r1, #20 + add.w r3, lr, #20 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add.w r2, r1, #19 + add.w r3, lr, #19 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add.w r2, r1, #18 + add.w r3, lr, #18 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add.w r2, r1, #17 + add.w r3, lr, #17 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add.w r2, r1, #16 + mov r3, r12 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add.w r2, r1, #15 + add.w r3, lr, #15 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add.w r2, r1, #14 + add.w r3, lr, #14 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add.w r2, r1, #13 + add.w r3, lr, #13 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add.w r2, r1, #12 + add.w r3, lr, #12 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add.w r2, r1, #11 + add.w r3, lr, #11 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add.w r2, r1, #10 + add.w r3, lr, #10 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add.w r2, r1, #9 + add.w r3, lr, #9 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add.w r2, r1, #8 + add.w r3, lr, #8 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + adds r2, r1, #7 + add.w r3, lr, #7 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + adds r2, r1, #6 + add.w r3, lr, #6 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + adds r2, r1, #5 + add.w r3, lr, #5 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + adds r2, r1, #4 + add.w r3, lr, #4 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP adds r2, r1, #3 - bic lr, r2, #3 - sub.w r2, lr, r1 - cmp r2, #33 - bhs .LBB0_5 - cmp r2, #0 - beq .LBB0_6 - sub.w r3, r1, lr - add.w r12, r1, #32 - mov r4, sp - mov r5, r1 -.LBB0_3: - ldrb r6, [r5], #1 - adds r3, #1 - strb r6, [r4], #1 - blo .LBB0_3 - sub.w r3, r12, lr - cmp r3, #4 - bhs .LBB0_7 - b .LBB0_9 -.LBB0_5: - ldrb r2, [r1] - mov r3, sp - strb.w r2, [sp] - add.w r5, r3, #16 - ldrb r2, [r1, #1] - strb.w r2, [sp, #1] - ldrb r2, [r1, #2] - strb.w r2, [sp, #2] - ldrb r2, [r1, #3] - strb.w r2, [sp, #3] - ldrb r2, [r1, #4] - strb.w r2, [sp, #4] - ldrb r2, [r1, #5] - strb.w r2, [sp, #5] - ldrb r2, [r1, #6] - strb.w r2, [sp, #6] - ldrb r2, [r1, #7] - strb.w r2, [sp, #7] - ldrb r2, [r1, #8] - strb.w r2, [sp, #8] - ldrb r2, [r1, #9] - strb.w r2, [sp, #9] - ldrb r2, [r1, #10] - strb.w r2, [sp, #10] - ldrb r2, [r1, #11] - strb.w r2, [sp, #11] - ldrb r2, [r1, #12] - strb.w r2, [sp, #12] - ldrb r2, [r1, #13] - strb.w r2, [sp, #13] - ldrb r2, [r1, #14] - strb.w r2, [sp, #14] - ldrb r2, [r1, #15] - strb.w r2, [sp, #15] - ldrb r2, [r1, #16] - strb.w r2, [sp, #16] - ldrb r2, [r1, #17] - strb.w r2, [sp, #17] - ldrb r2, [r1, #18] - strb.w r2, [sp, #18] - ldrb r2, [r1, #19] - strb.w r2, [sp, #19] - ldrb r2, [r1, #20] - strb.w r2, [sp, #20] - ldrb r2, [r1, #21] - strb.w r2, [sp, #21] - ldrb r2, [r1, #22] - strb.w r2, [sp, #22] - ldrb r2, [r1, #23] - strb.w r2, [sp, #23] - ldrb r2, [r1, #24] - strb.w r2, [sp, #24] - ldrb r2, [r1, #25] - strb.w r2, [sp, #25] - ldrb r2, [r1, #26] - strb.w r2, [sp, #26] - ldrb r2, [r1, #27] - strb.w r2, [sp, #27] - ldrb r2, [r1, #28] - strb.w r2, [sp, #28] - ldrb r2, [r1, #29] - strb.w r2, [sp, #29] - ldrb r2, [r1, #30] - strb.w r2, [sp, #30] - movs r2, #31 - ldrb r1, [r1, #31] - vld1.8 {d16, d17}, [r3], r2 - strb r1, [r3] - vld1.64 {d18, d19}, [r5] - b .LBB0_13 -.LBB0_6: - movs r3, #32 -.LBB0_7: - mov r4, sp -.LBB0_8: - ldr r6, [r1, r2] - subs r3, #4 - cmp r3, #3 - str r6, [r4, r2] - add.w r2, r2, #4 - bhi .LBB0_8 -.LBB0_9: - cbz r3, .LBB0_12 - add r1, r2 - mov r6, sp - add r2, r6 -.LBB0_11: - ldrb r6, [r1], #1 - subs r3, #1 - strb r6, [r2], #1 - bne .LBB0_11 -.LBB0_12: - mov r1, sp - vld1.8 {d16, d17}, [r1]! - vld1.64 {d18, d19}, [r1] -.LBB0_13: + add.w r3, lr, #3 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + adds r2, r1, #2 + add.w r3, lr, #2 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + adds r2, r1, #1 + add.w r3, lr, #1 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + mov r2, lr + @APP + ldrb r1, [r1] + strb r1, [r2] + @NO_APP + vld1.64 {d16, d17}, [lr] + vld1.64 {d18, d19}, [r12] vst1.8 {d16, d17}, [r0]! vst1.8 {d18, d19}, [r0] dmb ish add sp, #32 - pop {r4, r5, r6, pc} -.LBB2_3: - ldrb r6, [r4], #1 - adds r3, #1 - strb r6, [r5], #1 - blo .LBB2_3 - sub.w r3, r12, lr - cmp r3, #4 - bhs .LBB2_7 - b .LBB2_8 -.LBB2_5: - ldrb.w r1, [sp, #32] - strb r1, [r0] - ldrb.w r1, [sp, #33] - strb r1, [r0, #1] - ldrb.w r1, [sp, #34] - strb r1, [r0, #2] - ldrb.w r1, [sp, #35] - strb r1, [r0, #3] - ldrb.w r1, [sp, #36] - strb r1, [r0, #4] - ldrb.w r1, [sp, #37] - strb r1, [r0, #5] - ldrb.w r1, [sp, #38] - strb r1, [r0, #6] - ldrb.w r1, [sp, #39] - strb r1, [r0, #7] - ldrb.w r1, [sp, #40] - strb r1, [r0, #8] - ldrb.w r1, [sp, #41] - strb r1, [r0, #9] - ldrb.w r1, [sp, #42] - strb r1, [r0, #10] - ldrb.w r1, [sp, #43] - strb r1, [r0, #11] - ldrb.w r1, [sp, #44] - strb r1, [r0, #12] - ldrb.w r1, [sp, #45] - strb r1, [r0, #13] - ldrb.w r1, [sp, #46] - strb r1, [r0, #14] - ldrb.w r1, [sp, #47] - strb r1, [r0, #15] - ldrb.w r1, [sp, #48] - strb r1, [r0, #16] - ldrb.w r1, [sp, #49] - strb r1, [r0, #17] - ldrb.w r1, [sp, #50] - strb r1, [r0, #18] - ldrb.w r1, [sp, #51] - strb r1, [r0, #19] - ldrb.w r1, [sp, #52] - strb r1, [r0, #20] - ldrb.w r1, [sp, #53] - strb r1, [r0, #21] - ldrb.w r1, [sp, #54] - strb r1, [r0, #22] - ldrb.w r1, [sp, #55] - strb r1, [r0, #23] - ldrb.w r1, [sp, #56] - strb r1, [r0, #24] - ldrb.w r1, [sp, #57] - strb r1, [r0, #25] - ldrb.w r1, [sp, #58] - strb r1, [r0, #26] - ldrb.w r1, [sp, #59] - strb r1, [r0, #27] - ldrb.w r1, [sp, #60] - strb r1, [r0, #28] - ldrb.w r1, [sp, #61] - strb r1, [r0, #29] - ldrb.w r1, [sp, #62] - strb r1, [r0, #30] - ldrb.w r1, [sp, #63] - strb r1, [r0, #31] - b .LBB2_11 -.LBB2_6: - movs r3, #32 -.LBB2_7: - ldr r6, [r1, r2] - subs r3, #4 - cmp r3, #3 - str r6, [r0, r2] - add.w r2, r2, #4 - bhi .LBB2_7 -.LBB2_8: - cbz r3, .LBB2_11 - add r1, r2 - add r0, r2 -.LBB2_10: - ldrb r2, [r1], #1 - subs r3, #1 - strb r2, [r0], #1 - bne .LBB2_10 -.LBB2_11: - add sp, #64 - pop {r4, r5, r6, pc} -.LBB4_3: - ldrb r6, [r5], #1 - adds r3, #1 - strb r6, [r4], #1 - blo .LBB4_3 - sub.w r3, r12, lr - cmp r3, #4 - bhs .LBB4_7 - b .LBB4_9 -.LBB4_5: - ldrh r2, [r1, #30] - strh.w r2, [sp, #30] - ldrh r2, [r1, #28] - strh.w r2, [sp, #28] - ldrh r2, [r1, #26] - strh.w r2, [sp, #26] - ldrh r2, [r1, #24] - strh.w r2, [sp, #24] - ldrh r2, [r1, #22] - strh.w r2, [sp, #22] - ldrh r2, [r1, #20] - strh.w r2, [sp, #20] - ldrh r2, [r1, #18] - strh.w r2, [sp, #18] - ldrh r2, [r1, #16] - strh.w r2, [sp, #16] - ldrh r2, [r1, #14] - strh.w r2, [sp, #14] - ldrh r2, [r1, #12] - strh.w r2, [sp, #12] - ldrh r2, [r1, #10] - strh.w r2, [sp, #10] - ldrh r2, [r1, #8] - strh.w r2, [sp, #8] - ldrh r2, [r1, #6] - strh.w r2, [sp, #6] - ldrh r2, [r1, #4] - strh.w r2, [sp, #4] - ldrh r2, [r1, #2] - strh.w r2, [sp, #2] - ldrh r1, [r1] - strh.w r1, [sp] - b .LBB4_12 -.LBB4_6: - movs r3, #32 -.LBB4_7: - mov r4, sp -.LBB4_8: - ldr r6, [r1, r2] - subs r3, #4 - cmp r3, #3 - str r6, [r4, r2] - add.w r2, r2, #4 - bhi .LBB4_8 -.LBB4_9: - cbz r3, .LBB4_12 - add r1, r2 - mov r6, sp - add r2, r6 -.LBB4_11: - ldrb r6, [r1], #1 - subs r3, #1 - strb r6, [r2], #1 - bne .LBB4_11 -.LBB4_12: - mov r1, sp - vld1.16 {d16, d17}, [r1]! - vld1.64 {d18, d19}, [r1] - vst1.16 {d16, d17}, [r0]! - vst1.16 {d18, d19}, [r0] - dmb ish - add sp, #32 - pop {r4, r5, r6, pc} -.LBB6_3: - ldrb r6, [r4], #1 - adds r3, #1 - strb r6, [r5], #1 - blo .LBB6_3 - sub.w r3, r12, lr - cmp r3, #4 - bhs .LBB6_7 - b .LBB6_8 -.LBB6_5: - ldrh.w r1, [sp, #62] - strh r1, [r0, #30] - ldrh.w r1, [sp, #60] - strh r1, [r0, #28] - ldrh.w r1, [sp, #58] - strh r1, [r0, #26] - ldrh.w r1, [sp, #56] - strh r1, [r0, #24] - ldrh.w r1, [sp, #54] - strh r1, [r0, #22] - ldrh.w r1, [sp, #52] - strh r1, [r0, #20] - ldrh.w r1, [sp, #50] - strh r1, [r0, #18] - ldrh.w r1, [sp, #48] - strh r1, [r0, #16] - ldrh.w r1, [sp, #46] - strh r1, [r0, #14] - ldrh.w r1, [sp, #44] - strh r1, [r0, #12] - ldrh.w r1, [sp, #42] - strh r1, [r0, #10] - ldrh.w r1, [sp, #40] - strh r1, [r0, #8] - ldrh.w r1, [sp, #38] - strh r1, [r0, #6] - ldrh.w r1, [sp, #36] - strh r1, [r0, #4] - ldrh.w r1, [sp, #34] - strh r1, [r0, #2] - ldrh.w r1, [sp, #32] - strh r1, [r0] - b .LBB6_11 -.LBB6_6: - movs r3, #32 -.LBB6_7: - ldr r6, [r1, r2] - subs r3, #4 - cmp r3, #3 - str r6, [r0, r2] - add.w r2, r2, #4 - bhi .LBB6_7 -.LBB6_8: - cbz r3, .LBB6_11 - add r1, r2 - add r0, r2 -.LBB6_10: - ldrb r2, [r1], #1 - subs r3, #1 - strb r2, [r0], #1 - bne .LBB6_10 -.LBB6_11: - add sp, #64 - pop {r4, r5, r6, pc} + pop {r7, pc} asm_test::atomic_memcpy_load_align1::read_volatile_acquire_fence: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} sub sp, #76 @@ -464,236 +309,3 @@ asm_test::atomic_memcpy_load_align1::read_volatile_acquire_fence: dmb ish add sp, #76 pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} -.LBB2_3: - ldrb r6, [r4], #1 - adds r3, #1 - strb r6, [r5], #1 - blo .LBB2_3 - sub.w r3, r12, lr - cmp r3, #4 - bhs .LBB2_7 - b .LBB2_8 -.LBB2_5: - ldrb.w r1, [sp, #32] - strb r1, [r0] - ldrb.w r1, [sp, #33] - strb r1, [r0, #1] - ldrb.w r1, [sp, #34] - strb r1, [r0, #2] - ldrb.w r1, [sp, #35] - strb r1, [r0, #3] - ldrb.w r1, [sp, #36] - strb r1, [r0, #4] - ldrb.w r1, [sp, #37] - strb r1, [r0, #5] - ldrb.w r1, [sp, #38] - strb r1, [r0, #6] - ldrb.w r1, [sp, #39] - strb r1, [r0, #7] - ldrb.w r1, [sp, #40] - strb r1, [r0, #8] - ldrb.w r1, [sp, #41] - strb r1, [r0, #9] - ldrb.w r1, [sp, #42] - strb r1, [r0, #10] - ldrb.w r1, [sp, #43] - strb r1, [r0, #11] - ldrb.w r1, [sp, #44] - strb r1, [r0, #12] - ldrb.w r1, [sp, #45] - strb r1, [r0, #13] - ldrb.w r1, [sp, #46] - strb r1, [r0, #14] - ldrb.w r1, [sp, #47] - strb r1, [r0, #15] - ldrb.w r1, [sp, #48] - strb r1, [r0, #16] - ldrb.w r1, [sp, #49] - strb r1, [r0, #17] - ldrb.w r1, [sp, #50] - strb r1, [r0, #18] - ldrb.w r1, [sp, #51] - strb r1, [r0, #19] - ldrb.w r1, [sp, #52] - strb r1, [r0, #20] - ldrb.w r1, [sp, #53] - strb r1, [r0, #21] - ldrb.w r1, [sp, #54] - strb r1, [r0, #22] - ldrb.w r1, [sp, #55] - strb r1, [r0, #23] - ldrb.w r1, [sp, #56] - strb r1, [r0, #24] - ldrb.w r1, [sp, #57] - strb r1, [r0, #25] - ldrb.w r1, [sp, #58] - strb r1, [r0, #26] - ldrb.w r1, [sp, #59] - strb r1, [r0, #27] - ldrb.w r1, [sp, #60] - strb r1, [r0, #28] - ldrb.w r1, [sp, #61] - strb r1, [r0, #29] - ldrb.w r1, [sp, #62] - strb r1, [r0, #30] - ldrb.w r1, [sp, #63] - strb r1, [r0, #31] - b .LBB2_11 -.LBB2_6: - movs r3, #32 -.LBB2_7: - ldr r6, [r1, r2] - subs r3, #4 - cmp r3, #3 - str r6, [r0, r2] - add.w r2, r2, #4 - bhi .LBB2_7 -.LBB2_8: - cbz r3, .LBB2_11 - add r1, r2 - add r0, r2 -.LBB2_10: - ldrb r2, [r1], #1 - subs r3, #1 - strb r2, [r0], #1 - bne .LBB2_10 -.LBB2_11: - add sp, #64 - pop {r4, r5, r6, pc} -.LBB4_3: - ldrb r6, [r5], #1 - adds r3, #1 - strb r6, [r4], #1 - blo .LBB4_3 - sub.w r3, r12, lr - cmp r3, #4 - bhs .LBB4_7 - b .LBB4_9 -.LBB4_5: - ldrh r2, [r1, #30] - strh.w r2, [sp, #30] - ldrh r2, [r1, #28] - strh.w r2, [sp, #28] - ldrh r2, [r1, #26] - strh.w r2, [sp, #26] - ldrh r2, [r1, #24] - strh.w r2, [sp, #24] - ldrh r2, [r1, #22] - strh.w r2, [sp, #22] - ldrh r2, [r1, #20] - strh.w r2, [sp, #20] - ldrh r2, [r1, #18] - strh.w r2, [sp, #18] - ldrh r2, [r1, #16] - strh.w r2, [sp, #16] - ldrh r2, [r1, #14] - strh.w r2, [sp, #14] - ldrh r2, [r1, #12] - strh.w r2, [sp, #12] - ldrh r2, [r1, #10] - strh.w r2, [sp, #10] - ldrh r2, [r1, #8] - strh.w r2, [sp, #8] - ldrh r2, [r1, #6] - strh.w r2, [sp, #6] - ldrh r2, [r1, #4] - strh.w r2, [sp, #4] - ldrh r2, [r1, #2] - strh.w r2, [sp, #2] - ldrh r1, [r1] - strh.w r1, [sp] - b .LBB4_12 -.LBB4_6: - movs r3, #32 -.LBB4_7: - mov r4, sp -.LBB4_8: - ldr r6, [r1, r2] - subs r3, #4 - cmp r3, #3 - str r6, [r4, r2] - add.w r2, r2, #4 - bhi .LBB4_8 -.LBB4_9: - cbz r3, .LBB4_12 - add r1, r2 - mov r6, sp - add r2, r6 -.LBB4_11: - ldrb r6, [r1], #1 - subs r3, #1 - strb r6, [r2], #1 - bne .LBB4_11 -.LBB4_12: - mov r1, sp - vld1.16 {d16, d17}, [r1]! - vld1.64 {d18, d19}, [r1] - vst1.16 {d16, d17}, [r0]! - vst1.16 {d18, d19}, [r0] - dmb ish - add sp, #32 - pop {r4, r5, r6, pc} -.LBB6_3: - ldrb r6, [r4], #1 - adds r3, #1 - strb r6, [r5], #1 - blo .LBB6_3 - sub.w r3, r12, lr - cmp r3, #4 - bhs .LBB6_7 - b .LBB6_8 -.LBB6_5: - ldrh.w r1, [sp, #62] - strh r1, [r0, #30] - ldrh.w r1, [sp, #60] - strh r1, [r0, #28] - ldrh.w r1, [sp, #58] - strh r1, [r0, #26] - ldrh.w r1, [sp, #56] - strh r1, [r0, #24] - ldrh.w r1, [sp, #54] - strh r1, [r0, #22] - ldrh.w r1, [sp, #52] - strh r1, [r0, #20] - ldrh.w r1, [sp, #50] - strh r1, [r0, #18] - ldrh.w r1, [sp, #48] - strh r1, [r0, #16] - ldrh.w r1, [sp, #46] - strh r1, [r0, #14] - ldrh.w r1, [sp, #44] - strh r1, [r0, #12] - ldrh.w r1, [sp, #42] - strh r1, [r0, #10] - ldrh.w r1, [sp, #40] - strh r1, [r0, #8] - ldrh.w r1, [sp, #38] - strh r1, [r0, #6] - ldrh.w r1, [sp, #36] - strh r1, [r0, #4] - ldrh.w r1, [sp, #34] - strh r1, [r0, #2] - ldrh.w r1, [sp, #32] - strh r1, [r0] - b .LBB6_11 -.LBB6_6: - movs r3, #32 -.LBB6_7: - ldr r6, [r1, r2] - subs r3, #4 - cmp r3, #3 - str r6, [r0, r2] - add.w r2, r2, #4 - bhi .LBB6_7 -.LBB6_8: - cbz r3, .LBB6_11 - add r1, r2 - add r0, r2 -.LBB6_10: - ldrb r2, [r1], #1 - subs r3, #1 - strb r2, [r0], #1 - bne .LBB6_10 -.LBB6_11: - add sp, #64 - pop {r4, r5, r6, pc} diff --git a/tests/asm-test/asm/thumbv7neon-unknown-linux-gnueabihf/atomic_memcpy_load_align16 b/tests/asm-test/asm/thumbv7neon-unknown-linux-gnueabihf/atomic_memcpy_load_align16 index e2ece39..f506ece 100644 --- a/tests/asm-test/asm/thumbv7neon-unknown-linux-gnueabihf/atomic_memcpy_load_align16 +++ b/tests/asm-test/asm/thumbv7neon-unknown-linux-gnueabihf/atomic_memcpy_load_align16 @@ -1,19 +1,67 @@ asm_test::atomic_memcpy_load_align16::acquire: - push {r4, r5, r6, lr} - ldr.w r12, [r1, #28] - ldr.w lr, [r1, #24] - ldr r2, [r1, #20] - ldr r3, [r1, #16] - ldr r4, [r1, #12] - ldr r5, [r1, #8] - ldr r6, [r1, #4] + push {r4, r6, r7, lr} + add r7, sp, #8 + sub sp, #32 + mov r4, sp + bfc r4, #0, #4 + mov sp, r4 + mov lr, sp + add.w r3, lr, #28 + add.w r12, r1, #28 + @APP + ldr.w r2, [r12] + str r2, [r3] + @NO_APP + sub.w r4, r7, #8 + add.w r2, r1, #24 + add.w r3, lr, #24 + @APP + ldr r2, [r2] + str r2, [r3] + @NO_APP + add.w r12, lr, #16 + add.w r2, r1, #20 + add.w r3, lr, #20 + @APP + ldr r2, [r2] + str r2, [r3] + @NO_APP + add.w r2, r1, #16 + mov r3, r12 + @APP + ldr r2, [r2] + str r2, [r3] + @NO_APP + add.w r2, r1, #12 + add.w r3, lr, #12 + @APP + ldr r2, [r2] + str r2, [r3] + @NO_APP + add.w r2, r1, #8 + add.w r3, lr, #8 + @APP + ldr r2, [r2] + str r2, [r3] + @NO_APP + adds r2, r1, #4 + add.w r3, lr, #4 + @APP + ldr r2, [r2] + str r2, [r3] + @NO_APP + mov r2, lr + @APP ldr r1, [r1] - strd r5, r4, [r0, #8] - strd r1, r6, [r0] - strd r3, r2, [r0, #16] - strd lr, r12, [r0, #24] + str r1, [r2] + @NO_APP + vld1.64 {d16, d17}, [lr:128] + vld1.64 {d18, d19}, [r12:128] + vst1.64 {d16, d17}, [r0:128]! + vst1.64 {d18, d19}, [r0:128] dmb ish - pop {r4, r5, r6, pc} + mov sp, r4 + pop {r4, r6, r7, pc} asm_test::atomic_memcpy_load_align16::read_volatile_acquire_fence: push {r4, r5, r6, lr} ldrd lr, r12, [r1, #8] diff --git a/tests/asm-test/asm/thumbv7neon-unknown-linux-gnueabihf/atomic_memcpy_load_align2 b/tests/asm-test/asm/thumbv7neon-unknown-linux-gnueabihf/atomic_memcpy_load_align2 index 313e10e..762406b 100644 --- a/tests/asm-test/asm/thumbv7neon-unknown-linux-gnueabihf/atomic_memcpy_load_align2 +++ b/tests/asm-test/asm/thumbv7neon-unknown-linux-gnueabihf/atomic_memcpy_load_align2 @@ -1,154 +1,110 @@ asm_test::atomic_memcpy_load_align2::acquire: - push {r4, r5, r6, lr} + push {r7, lr} sub sp, #32 - adds r2, r1, #3 - bic lr, r2, #3 - sub.w r2, lr, r1 - cmp r2, #32 - bhi .LBB4_5 - cmp r2, #0 - beq .LBB4_6 - sub.w r3, r1, lr - add.w r12, r1, #32 - mov r4, sp - mov r5, r1 -.LBB4_3: - ldrb r6, [r5], #1 - adds r3, #1 - strb r6, [r4], #1 - blo .LBB4_3 - sub.w r3, r12, lr - cmp r3, #4 - bhs .LBB4_7 - b .LBB4_9 -.LBB4_5: - ldrh r2, [r1, #30] - strh.w r2, [sp, #30] - ldrh r2, [r1, #28] - strh.w r2, [sp, #28] - ldrh r2, [r1, #26] - strh.w r2, [sp, #26] - ldrh r2, [r1, #24] - strh.w r2, [sp, #24] - ldrh r2, [r1, #22] - strh.w r2, [sp, #22] - ldrh r2, [r1, #20] - strh.w r2, [sp, #20] - ldrh r2, [r1, #18] - strh.w r2, [sp, #18] - ldrh r2, [r1, #16] - strh.w r2, [sp, #16] - ldrh r2, [r1, #14] - strh.w r2, [sp, #14] - ldrh r2, [r1, #12] - strh.w r2, [sp, #12] - ldrh r2, [r1, #10] - strh.w r2, [sp, #10] - ldrh r2, [r1, #8] - strh.w r2, [sp, #8] - ldrh r2, [r1, #6] - strh.w r2, [sp, #6] - ldrh r2, [r1, #4] - strh.w r2, [sp, #4] - ldrh r2, [r1, #2] - strh.w r2, [sp, #2] + mov lr, sp + add.w r3, lr, #30 + add.w r12, r1, #30 + @APP + ldrh.w r2, [r12] + strh r2, [r3] + @NO_APP + add.w r2, r1, #28 + add.w r3, lr, #28 + @APP + ldrh r2, [r2] + strh r2, [r3] + @NO_APP + add.w r12, lr, #16 + add.w r2, r1, #26 + add.w r3, lr, #26 + @APP + ldrh r2, [r2] + strh r2, [r3] + @NO_APP + add.w r2, r1, #24 + add.w r3, lr, #24 + @APP + ldrh r2, [r2] + strh r2, [r3] + @NO_APP + add.w r2, r1, #22 + add.w r3, lr, #22 + @APP + ldrh r2, [r2] + strh r2, [r3] + @NO_APP + add.w r2, r1, #20 + add.w r3, lr, #20 + @APP + ldrh r2, [r2] + strh r2, [r3] + @NO_APP + add.w r2, r1, #18 + add.w r3, lr, #18 + @APP + ldrh r2, [r2] + strh r2, [r3] + @NO_APP + add.w r2, r1, #16 + mov r3, r12 + @APP + ldrh r2, [r2] + strh r2, [r3] + @NO_APP + add.w r2, r1, #14 + add.w r3, lr, #14 + @APP + ldrh r2, [r2] + strh r2, [r3] + @NO_APP + add.w r2, r1, #12 + add.w r3, lr, #12 + @APP + ldrh r2, [r2] + strh r2, [r3] + @NO_APP + add.w r2, r1, #10 + add.w r3, lr, #10 + @APP + ldrh r2, [r2] + strh r2, [r3] + @NO_APP + add.w r2, r1, #8 + add.w r3, lr, #8 + @APP + ldrh r2, [r2] + strh r2, [r3] + @NO_APP + adds r2, r1, #6 + add.w r3, lr, #6 + @APP + ldrh r2, [r2] + strh r2, [r3] + @NO_APP + adds r2, r1, #4 + add.w r3, lr, #4 + @APP + ldrh r2, [r2] + strh r2, [r3] + @NO_APP + adds r2, r1, #2 + add.w r3, lr, #2 + @APP + ldrh r2, [r2] + strh r2, [r3] + @NO_APP + mov r2, lr + @APP ldrh r1, [r1] - strh.w r1, [sp] - b .LBB4_12 -.LBB4_6: - movs r3, #32 -.LBB4_7: - mov r4, sp -.LBB4_8: - ldr r6, [r1, r2] - subs r3, #4 - cmp r3, #3 - str r6, [r4, r2] - add.w r2, r2, #4 - bhi .LBB4_8 -.LBB4_9: - cbz r3, .LBB4_12 - add r1, r2 - mov r6, sp - add r2, r6 -.LBB4_11: - ldrb r6, [r1], #1 - subs r3, #1 - strb r6, [r2], #1 - bne .LBB4_11 -.LBB4_12: - mov r1, sp - vld1.16 {d16, d17}, [r1]! - vld1.64 {d18, d19}, [r1] + strh r1, [r2] + @NO_APP + vld1.64 {d16, d17}, [lr] + vld1.64 {d18, d19}, [r12] vst1.16 {d16, d17}, [r0]! vst1.16 {d18, d19}, [r0] dmb ish add sp, #32 - pop {r4, r5, r6, pc} -.LBB6_3: - ldrb r6, [r4], #1 - adds r3, #1 - strb r6, [r5], #1 - blo .LBB6_3 - sub.w r3, r12, lr - cmp r3, #4 - bhs .LBB6_7 - b .LBB6_8 -.LBB6_5: - ldrh.w r1, [sp, #62] - strh r1, [r0, #30] - ldrh.w r1, [sp, #60] - strh r1, [r0, #28] - ldrh.w r1, [sp, #58] - strh r1, [r0, #26] - ldrh.w r1, [sp, #56] - strh r1, [r0, #24] - ldrh.w r1, [sp, #54] - strh r1, [r0, #22] - ldrh.w r1, [sp, #52] - strh r1, [r0, #20] - ldrh.w r1, [sp, #50] - strh r1, [r0, #18] - ldrh.w r1, [sp, #48] - strh r1, [r0, #16] - ldrh.w r1, [sp, #46] - strh r1, [r0, #14] - ldrh.w r1, [sp, #44] - strh r1, [r0, #12] - ldrh.w r1, [sp, #42] - strh r1, [r0, #10] - ldrh.w r1, [sp, #40] - strh r1, [r0, #8] - ldrh.w r1, [sp, #38] - strh r1, [r0, #6] - ldrh.w r1, [sp, #36] - strh r1, [r0, #4] - ldrh.w r1, [sp, #34] - strh r1, [r0, #2] - ldrh.w r1, [sp, #32] - strh r1, [r0] - b .LBB6_11 -.LBB6_6: - movs r3, #32 -.LBB6_7: - ldr r6, [r1, r2] - subs r3, #4 - cmp r3, #3 - str r6, [r0, r2] - add.w r2, r2, #4 - bhi .LBB6_7 -.LBB6_8: - cbz r3, .LBB6_11 - add r1, r2 - add r0, r2 -.LBB6_10: - ldrb r2, [r1], #1 - subs r3, #1 - strb r2, [r0], #1 - bne .LBB6_10 -.LBB6_11: - add sp, #64 - pop {r4, r5, r6, pc} + pop {r7, pc} asm_test::atomic_memcpy_load_align2::read_volatile_acquire_fence: push {r4, r5, r7, lr} ldrh r2, [r1, #28] @@ -185,67 +141,3 @@ asm_test::atomic_memcpy_load_align2::read_volatile_acquire_fence: strh r3, [r0] dmb ish pop {r4, r5, r7, pc} -.LBB6_3: - ldrb r6, [r4], #1 - adds r3, #1 - strb r6, [r5], #1 - blo .LBB6_3 - sub.w r3, r12, lr - cmp r3, #4 - bhs .LBB6_7 - b .LBB6_8 -.LBB6_5: - ldrh.w r1, [sp, #62] - strh r1, [r0, #30] - ldrh.w r1, [sp, #60] - strh r1, [r0, #28] - ldrh.w r1, [sp, #58] - strh r1, [r0, #26] - ldrh.w r1, [sp, #56] - strh r1, [r0, #24] - ldrh.w r1, [sp, #54] - strh r1, [r0, #22] - ldrh.w r1, [sp, #52] - strh r1, [r0, #20] - ldrh.w r1, [sp, #50] - strh r1, [r0, #18] - ldrh.w r1, [sp, #48] - strh r1, [r0, #16] - ldrh.w r1, [sp, #46] - strh r1, [r0, #14] - ldrh.w r1, [sp, #44] - strh r1, [r0, #12] - ldrh.w r1, [sp, #42] - strh r1, [r0, #10] - ldrh.w r1, [sp, #40] - strh r1, [r0, #8] - ldrh.w r1, [sp, #38] - strh r1, [r0, #6] - ldrh.w r1, [sp, #36] - strh r1, [r0, #4] - ldrh.w r1, [sp, #34] - strh r1, [r0, #2] - ldrh.w r1, [sp, #32] - strh r1, [r0] - b .LBB6_11 -.LBB6_6: - movs r3, #32 -.LBB6_7: - ldr r6, [r1, r2] - subs r3, #4 - cmp r3, #3 - str r6, [r0, r2] - add.w r2, r2, #4 - bhi .LBB6_7 -.LBB6_8: - cbz r3, .LBB6_11 - add r1, r2 - add r0, r2 -.LBB6_10: - ldrb r2, [r1], #1 - subs r3, #1 - strb r2, [r0], #1 - bne .LBB6_10 -.LBB6_11: - add sp, #64 - pop {r4, r5, r6, pc} diff --git a/tests/asm-test/asm/thumbv7neon-unknown-linux-gnueabihf/atomic_memcpy_load_align4 b/tests/asm-test/asm/thumbv7neon-unknown-linux-gnueabihf/atomic_memcpy_load_align4 index 9f58ea8..b48f234 100644 --- a/tests/asm-test/asm/thumbv7neon-unknown-linux-gnueabihf/atomic_memcpy_load_align4 +++ b/tests/asm-test/asm/thumbv7neon-unknown-linux-gnueabihf/atomic_memcpy_load_align4 @@ -1,19 +1,62 @@ asm_test::atomic_memcpy_load_align4::acquire: - push {r4, r5, r6, lr} - ldr.w r12, [r1, #28] - ldr.w lr, [r1, #24] - ldr r2, [r1, #20] - ldr r3, [r1, #16] - ldr r4, [r1, #12] - ldr r5, [r1, #8] - ldr r6, [r1, #4] + push {r7, lr} + sub sp, #32 + mov lr, sp + add.w r3, lr, #28 + add.w r12, r1, #28 + @APP + ldr.w r2, [r12] + str r2, [r3] + @NO_APP + add.w r2, r1, #24 + add.w r3, lr, #24 + @APP + ldr r2, [r2] + str r2, [r3] + @NO_APP + add.w r12, lr, #16 + add.w r2, r1, #20 + add.w r3, lr, #20 + @APP + ldr r2, [r2] + str r2, [r3] + @NO_APP + add.w r2, r1, #16 + mov r3, r12 + @APP + ldr r2, [r2] + str r2, [r3] + @NO_APP + add.w r2, r1, #12 + add.w r3, lr, #12 + @APP + ldr r2, [r2] + str r2, [r3] + @NO_APP + add.w r2, r1, #8 + add.w r3, lr, #8 + @APP + ldr r2, [r2] + str r2, [r3] + @NO_APP + adds r2, r1, #4 + add.w r3, lr, #4 + @APP + ldr r2, [r2] + str r2, [r3] + @NO_APP + mov r2, lr + @APP ldr r1, [r1] - strd r5, r4, [r0, #8] - strd r1, r6, [r0] - strd r3, r2, [r0, #16] - strd lr, r12, [r0, #24] + str r1, [r2] + @NO_APP + vld1.64 {d16, d17}, [lr] + vld1.64 {d18, d19}, [r12] + vst1.32 {d16, d17}, [r0]! + vst1.32 {d18, d19}, [r0] dmb ish - pop {r4, r5, r6, pc} + add sp, #32 + pop {r7, pc} asm_test::atomic_memcpy_load_align4::read_volatile_acquire_fence: push {r4, r5, r6, lr} ldr.w lr, [r1, #20] diff --git a/tests/asm-test/asm/thumbv7neon-unknown-linux-gnueabihf/atomic_memcpy_load_align8 b/tests/asm-test/asm/thumbv7neon-unknown-linux-gnueabihf/atomic_memcpy_load_align8 index 6c8568a..0330738 100644 --- a/tests/asm-test/asm/thumbv7neon-unknown-linux-gnueabihf/atomic_memcpy_load_align8 +++ b/tests/asm-test/asm/thumbv7neon-unknown-linux-gnueabihf/atomic_memcpy_load_align8 @@ -1,19 +1,62 @@ asm_test::atomic_memcpy_load_align8::acquire: - push {r4, r5, r6, lr} - ldr.w r12, [r1, #28] - ldr.w lr, [r1, #24] - ldr r2, [r1, #20] - ldr r3, [r1, #16] - ldr r4, [r1, #12] - ldr r5, [r1, #8] - ldr r6, [r1, #4] + push {r7, lr} + sub sp, #32 + mov lr, sp + add.w r3, lr, #28 + add.w r12, r1, #28 + @APP + ldr.w r2, [r12] + str r2, [r3] + @NO_APP + add.w r2, r1, #24 + add.w r3, lr, #24 + @APP + ldr r2, [r2] + str r2, [r3] + @NO_APP + add.w r12, lr, #16 + add.w r2, r1, #20 + add.w r3, lr, #20 + @APP + ldr r2, [r2] + str r2, [r3] + @NO_APP + add.w r2, r1, #16 + mov r3, r12 + @APP + ldr r2, [r2] + str r2, [r3] + @NO_APP + add.w r2, r1, #12 + add.w r3, lr, #12 + @APP + ldr r2, [r2] + str r2, [r3] + @NO_APP + add.w r2, r1, #8 + add.w r3, lr, #8 + @APP + ldr r2, [r2] + str r2, [r3] + @NO_APP + adds r2, r1, #4 + add.w r3, lr, #4 + @APP + ldr r2, [r2] + str r2, [r3] + @NO_APP + mov r2, lr + @APP ldr r1, [r1] - strd r5, r4, [r0, #8] - strd r1, r6, [r0] - strd r3, r2, [r0, #16] - strd lr, r12, [r0, #24] + str r1, [r2] + @NO_APP + vld1.64 {d16, d17}, [lr] + vld1.64 {d18, d19}, [r12] + vst1.64 {d16, d17}, [r0]! + vst1.64 {d18, d19}, [r0] dmb ish - pop {r4, r5, r6, pc} + add sp, #32 + pop {r7, pc} asm_test::atomic_memcpy_load_align8::read_volatile_acquire_fence: push {r4, r5, r6, lr} ldrd lr, r12, [r1, #8] diff --git a/tests/asm-test/asm/thumbv7neon-unknown-linux-gnueabihf/atomic_memcpy_store_align1 b/tests/asm-test/asm/thumbv7neon-unknown-linux-gnueabihf/atomic_memcpy_store_align1 index 2eb4b09..1372b46 100644 --- a/tests/asm-test/asm/thumbv7neon-unknown-linux-gnueabihf/atomic_memcpy_store_align1 +++ b/tests/asm-test/asm/thumbv7neon-unknown-linux-gnueabihf/atomic_memcpy_store_align1 @@ -1,7 +1,8 @@ asm_test::atomic_memcpy_store_align1::release: - push {r4, r5, r6, lr} + push {r7, lr} sub sp, #64 vld1.8 {d16, d17}, [r1]! + movs r3, #1 vld1.8 {d18, d19}, [r1] mov r1, sp mov r2, r1 @@ -10,254 +11,201 @@ asm_test::atomic_memcpy_store_align1::release: dmb ish vld1.64 {d16, d17}, [r1] add r1, sp, #32 + mov lr, r1 vld1.64 {d18, d19}, [r2] - mov r2, r1 - vst1.64 {d16, d17}, [r2]! - vst1.64 {d18, d19}, [r2] + add.w r2, r1, #31 + vst1.64 {d16, d17}, [lr]! + mov r12, lr + vst1.64 {d18, d19}, [r12], r3 + add.w r3, r0, #31 + @APP + ldrb r2, [r2] + strb r2, [r3] + @NO_APP + add.w r2, r0, #30 + add.w r3, r1, #30 + @APP + ldrb r3, [r3] + strb r3, [r2] + @NO_APP + add.w r2, r0, #29 + add.w r3, r1, #29 + @APP + ldrb r3, [r3] + strb r3, [r2] + @NO_APP + add.w r2, r0, #28 + add.w r3, r1, #28 + @APP + ldrb r3, [r3] + strb r3, [r2] + @NO_APP + add.w r2, r0, #27 + add.w r3, r1, #27 + @APP + ldrb r3, [r3] + strb r3, [r2] + @NO_APP + add.w r2, r0, #26 + add.w r3, r1, #26 + @APP + ldrb r3, [r3] + strb r3, [r2] + @NO_APP + add.w r2, r0, #25 + add.w r3, r1, #25 + @APP + ldrb r3, [r3] + strb r3, [r2] + @NO_APP + add.w r2, r0, #24 + add.w r3, r1, #24 + @APP + ldrb r3, [r3] + strb r3, [r2] + @NO_APP + add.w r2, r0, #23 + add.w r3, r1, #23 + @APP + ldrb r3, [r3] + strb r3, [r2] + @NO_APP + add.w r2, r0, #22 + add.w r3, r1, #22 + @APP + ldrb r3, [r3] + strb r3, [r2] + @NO_APP + add.w r2, r0, #21 + add.w r3, r1, #21 + @APP + ldrb r3, [r3] + strb r3, [r2] + @NO_APP + add.w r2, r0, #20 + add.w r3, r1, #20 + @APP + ldrb r3, [r3] + strb r3, [r2] + @NO_APP + add.w r2, r0, #19 + add.w r3, r1, #19 + @APP + ldrb r3, [r3] + strb r3, [r2] + @NO_APP + add.w r2, r0, #18 + add.w r3, r1, #18 + @APP + ldrb r3, [r3] + strb r3, [r2] + @NO_APP + add.w r2, r0, #17 + @APP + ldrb.w r3, [r12] + strb r3, [r2] + @NO_APP + add.w r2, r0, #16 + @APP + ldrb.w r3, [lr] + strb r3, [r2] + @NO_APP + add.w r2, r0, #15 + add.w r3, r1, #15 + @APP + ldrb r3, [r3] + strb r3, [r2] + @NO_APP + add.w r2, r0, #14 + add.w r3, r1, #14 + @APP + ldrb r3, [r3] + strb r3, [r2] + @NO_APP + add.w r2, r0, #13 + add.w r3, r1, #13 + @APP + ldrb r3, [r3] + strb r3, [r2] + @NO_APP + add.w r2, r0, #12 + add.w r3, r1, #12 + @APP + ldrb r3, [r3] + strb r3, [r2] + @NO_APP + add.w r2, r0, #11 + add.w r3, r1, #11 + @APP + ldrb r3, [r3] + strb r3, [r2] + @NO_APP + add.w r2, r0, #10 + add.w r3, r1, #10 + @APP + ldrb r3, [r3] + strb r3, [r2] + @NO_APP + add.w r2, r0, #9 + add.w r3, r1, #9 + @APP + ldrb r3, [r3] + strb r3, [r2] + @NO_APP + add.w r2, r0, #8 + add.w r3, r1, #8 + @APP + ldrb r3, [r3] + strb r3, [r2] + @NO_APP + adds r2, r0, #7 + adds r3, r1, #7 + @APP + ldrb r3, [r3] + strb r3, [r2] + @NO_APP + adds r2, r0, #6 + adds r3, r1, #6 + @APP + ldrb r3, [r3] + strb r3, [r2] + @NO_APP + adds r2, r0, #5 + adds r3, r1, #5 + @APP + ldrb r3, [r3] + strb r3, [r2] + @NO_APP + adds r2, r0, #4 + adds r3, r1, #4 + @APP + ldrb r3, [r3] + strb r3, [r2] + @NO_APP adds r2, r0, #3 - bic lr, r2, #3 - sub.w r2, lr, r0 - cmp r2, #33 - bhs .LBB2_5 - cmp r2, #0 - beq .LBB2_6 - sub.w r3, r0, lr - add.w r12, r0, #32 - add r4, sp, #32 - mov r5, r0 -.LBB2_3: - ldrb r6, [r4], #1 - adds r3, #1 - strb r6, [r5], #1 - blo .LBB2_3 - sub.w r3, r12, lr - cmp r3, #4 - bhs .LBB2_7 - b .LBB2_8 -.LBB2_5: - ldrb.w r1, [sp, #32] + adds r3, r1, #3 + @APP + ldrb r3, [r3] + strb r3, [r2] + @NO_APP + adds r2, r0, #2 + adds r3, r1, #2 + @APP + ldrb r3, [r3] + strb r3, [r2] + @NO_APP + adds r2, r0, #1 + adds r3, r1, #1 + @APP + ldrb r3, [r3] + strb r3, [r2] + @NO_APP + @APP + ldrb r1, [r1] strb r1, [r0] - ldrb.w r1, [sp, #33] - strb r1, [r0, #1] - ldrb.w r1, [sp, #34] - strb r1, [r0, #2] - ldrb.w r1, [sp, #35] - strb r1, [r0, #3] - ldrb.w r1, [sp, #36] - strb r1, [r0, #4] - ldrb.w r1, [sp, #37] - strb r1, [r0, #5] - ldrb.w r1, [sp, #38] - strb r1, [r0, #6] - ldrb.w r1, [sp, #39] - strb r1, [r0, #7] - ldrb.w r1, [sp, #40] - strb r1, [r0, #8] - ldrb.w r1, [sp, #41] - strb r1, [r0, #9] - ldrb.w r1, [sp, #42] - strb r1, [r0, #10] - ldrb.w r1, [sp, #43] - strb r1, [r0, #11] - ldrb.w r1, [sp, #44] - strb r1, [r0, #12] - ldrb.w r1, [sp, #45] - strb r1, [r0, #13] - ldrb.w r1, [sp, #46] - strb r1, [r0, #14] - ldrb.w r1, [sp, #47] - strb r1, [r0, #15] - ldrb.w r1, [sp, #48] - strb r1, [r0, #16] - ldrb.w r1, [sp, #49] - strb r1, [r0, #17] - ldrb.w r1, [sp, #50] - strb r1, [r0, #18] - ldrb.w r1, [sp, #51] - strb r1, [r0, #19] - ldrb.w r1, [sp, #52] - strb r1, [r0, #20] - ldrb.w r1, [sp, #53] - strb r1, [r0, #21] - ldrb.w r1, [sp, #54] - strb r1, [r0, #22] - ldrb.w r1, [sp, #55] - strb r1, [r0, #23] - ldrb.w r1, [sp, #56] - strb r1, [r0, #24] - ldrb.w r1, [sp, #57] - strb r1, [r0, #25] - ldrb.w r1, [sp, #58] - strb r1, [r0, #26] - ldrb.w r1, [sp, #59] - strb r1, [r0, #27] - ldrb.w r1, [sp, #60] - strb r1, [r0, #28] - ldrb.w r1, [sp, #61] - strb r1, [r0, #29] - ldrb.w r1, [sp, #62] - strb r1, [r0, #30] - ldrb.w r1, [sp, #63] - strb r1, [r0, #31] - b .LBB2_11 -.LBB2_6: - movs r3, #32 -.LBB2_7: - ldr r6, [r1, r2] - subs r3, #4 - cmp r3, #3 - str r6, [r0, r2] - add.w r2, r2, #4 - bhi .LBB2_7 -.LBB2_8: - cbz r3, .LBB2_11 - add r1, r2 - add r0, r2 -.LBB2_10: - ldrb r2, [r1], #1 - subs r3, #1 - strb r2, [r0], #1 - bne .LBB2_10 -.LBB2_11: + @NO_APP add sp, #64 - pop {r4, r5, r6, pc} -.LBB4_3: - ldrb r6, [r5], #1 - adds r3, #1 - strb r6, [r4], #1 - blo .LBB4_3 - sub.w r3, r12, lr - cmp r3, #4 - bhs .LBB4_7 - b .LBB4_9 -.LBB4_5: - ldrh r2, [r1, #30] - strh.w r2, [sp, #30] - ldrh r2, [r1, #28] - strh.w r2, [sp, #28] - ldrh r2, [r1, #26] - strh.w r2, [sp, #26] - ldrh r2, [r1, #24] - strh.w r2, [sp, #24] - ldrh r2, [r1, #22] - strh.w r2, [sp, #22] - ldrh r2, [r1, #20] - strh.w r2, [sp, #20] - ldrh r2, [r1, #18] - strh.w r2, [sp, #18] - ldrh r2, [r1, #16] - strh.w r2, [sp, #16] - ldrh r2, [r1, #14] - strh.w r2, [sp, #14] - ldrh r2, [r1, #12] - strh.w r2, [sp, #12] - ldrh r2, [r1, #10] - strh.w r2, [sp, #10] - ldrh r2, [r1, #8] - strh.w r2, [sp, #8] - ldrh r2, [r1, #6] - strh.w r2, [sp, #6] - ldrh r2, [r1, #4] - strh.w r2, [sp, #4] - ldrh r2, [r1, #2] - strh.w r2, [sp, #2] - ldrh r1, [r1] - strh.w r1, [sp] - b .LBB4_12 -.LBB4_6: - movs r3, #32 -.LBB4_7: - mov r4, sp -.LBB4_8: - ldr r6, [r1, r2] - subs r3, #4 - cmp r3, #3 - str r6, [r4, r2] - add.w r2, r2, #4 - bhi .LBB4_8 -.LBB4_9: - cbz r3, .LBB4_12 - add r1, r2 - mov r6, sp - add r2, r6 -.LBB4_11: - ldrb r6, [r1], #1 - subs r3, #1 - strb r6, [r2], #1 - bne .LBB4_11 -.LBB4_12: - mov r1, sp - vld1.16 {d16, d17}, [r1]! - vld1.64 {d18, d19}, [r1] - vst1.16 {d16, d17}, [r0]! - vst1.16 {d18, d19}, [r0] - dmb ish - add sp, #32 - pop {r4, r5, r6, pc} -.LBB6_3: - ldrb r6, [r4], #1 - adds r3, #1 - strb r6, [r5], #1 - blo .LBB6_3 - sub.w r3, r12, lr - cmp r3, #4 - bhs .LBB6_7 - b .LBB6_8 -.LBB6_5: - ldrh.w r1, [sp, #62] - strh r1, [r0, #30] - ldrh.w r1, [sp, #60] - strh r1, [r0, #28] - ldrh.w r1, [sp, #58] - strh r1, [r0, #26] - ldrh.w r1, [sp, #56] - strh r1, [r0, #24] - ldrh.w r1, [sp, #54] - strh r1, [r0, #22] - ldrh.w r1, [sp, #52] - strh r1, [r0, #20] - ldrh.w r1, [sp, #50] - strh r1, [r0, #18] - ldrh.w r1, [sp, #48] - strh r1, [r0, #16] - ldrh.w r1, [sp, #46] - strh r1, [r0, #14] - ldrh.w r1, [sp, #44] - strh r1, [r0, #12] - ldrh.w r1, [sp, #42] - strh r1, [r0, #10] - ldrh.w r1, [sp, #40] - strh r1, [r0, #8] - ldrh.w r1, [sp, #38] - strh r1, [r0, #6] - ldrh.w r1, [sp, #36] - strh r1, [r0, #4] - ldrh.w r1, [sp, #34] - strh r1, [r0, #2] - ldrh.w r1, [sp, #32] - strh r1, [r0] - b .LBB6_11 -.LBB6_6: - movs r3, #32 -.LBB6_7: - ldr r6, [r1, r2] - subs r3, #4 - cmp r3, #3 - str r6, [r0, r2] - add.w r2, r2, #4 - bhi .LBB6_7 -.LBB6_8: - cbz r3, .LBB6_11 - add r1, r2 - add r0, r2 -.LBB6_10: - ldrb r2, [r1], #1 - subs r3, #1 - strb r2, [r0], #1 - bne .LBB6_10 -.LBB6_11: - add sp, #64 - pop {r4, r5, r6, pc} + pop {r7, pc} asm_test::atomic_memcpy_store_align1::write_volatile_release_fence: sub sp, #32 dmb ish @@ -273,140 +221,3 @@ asm_test::atomic_memcpy_store_align1::write_volatile_release_fence: vst1.8 {d16, d17}, [r0] add sp, #32 bx lr -.LBB4_3: - ldrb r6, [r5], #1 - adds r3, #1 - strb r6, [r4], #1 - blo .LBB4_3 - sub.w r3, r12, lr - cmp r3, #4 - bhs .LBB4_7 - b .LBB4_9 -.LBB4_5: - ldrh r2, [r1, #30] - strh.w r2, [sp, #30] - ldrh r2, [r1, #28] - strh.w r2, [sp, #28] - ldrh r2, [r1, #26] - strh.w r2, [sp, #26] - ldrh r2, [r1, #24] - strh.w r2, [sp, #24] - ldrh r2, [r1, #22] - strh.w r2, [sp, #22] - ldrh r2, [r1, #20] - strh.w r2, [sp, #20] - ldrh r2, [r1, #18] - strh.w r2, [sp, #18] - ldrh r2, [r1, #16] - strh.w r2, [sp, #16] - ldrh r2, [r1, #14] - strh.w r2, [sp, #14] - ldrh r2, [r1, #12] - strh.w r2, [sp, #12] - ldrh r2, [r1, #10] - strh.w r2, [sp, #10] - ldrh r2, [r1, #8] - strh.w r2, [sp, #8] - ldrh r2, [r1, #6] - strh.w r2, [sp, #6] - ldrh r2, [r1, #4] - strh.w r2, [sp, #4] - ldrh r2, [r1, #2] - strh.w r2, [sp, #2] - ldrh r1, [r1] - strh.w r1, [sp] - b .LBB4_12 -.LBB4_6: - movs r3, #32 -.LBB4_7: - mov r4, sp -.LBB4_8: - ldr r6, [r1, r2] - subs r3, #4 - cmp r3, #3 - str r6, [r4, r2] - add.w r2, r2, #4 - bhi .LBB4_8 -.LBB4_9: - cbz r3, .LBB4_12 - add r1, r2 - mov r6, sp - add r2, r6 -.LBB4_11: - ldrb r6, [r1], #1 - subs r3, #1 - strb r6, [r2], #1 - bne .LBB4_11 -.LBB4_12: - mov r1, sp - vld1.16 {d16, d17}, [r1]! - vld1.64 {d18, d19}, [r1] - vst1.16 {d16, d17}, [r0]! - vst1.16 {d18, d19}, [r0] - dmb ish - add sp, #32 - pop {r4, r5, r6, pc} -.LBB6_3: - ldrb r6, [r4], #1 - adds r3, #1 - strb r6, [r5], #1 - blo .LBB6_3 - sub.w r3, r12, lr - cmp r3, #4 - bhs .LBB6_7 - b .LBB6_8 -.LBB6_5: - ldrh.w r1, [sp, #62] - strh r1, [r0, #30] - ldrh.w r1, [sp, #60] - strh r1, [r0, #28] - ldrh.w r1, [sp, #58] - strh r1, [r0, #26] - ldrh.w r1, [sp, #56] - strh r1, [r0, #24] - ldrh.w r1, [sp, #54] - strh r1, [r0, #22] - ldrh.w r1, [sp, #52] - strh r1, [r0, #20] - ldrh.w r1, [sp, #50] - strh r1, [r0, #18] - ldrh.w r1, [sp, #48] - strh r1, [r0, #16] - ldrh.w r1, [sp, #46] - strh r1, [r0, #14] - ldrh.w r1, [sp, #44] - strh r1, [r0, #12] - ldrh.w r1, [sp, #42] - strh r1, [r0, #10] - ldrh.w r1, [sp, #40] - strh r1, [r0, #8] - ldrh.w r1, [sp, #38] - strh r1, [r0, #6] - ldrh.w r1, [sp, #36] - strh r1, [r0, #4] - ldrh.w r1, [sp, #34] - strh r1, [r0, #2] - ldrh.w r1, [sp, #32] - strh r1, [r0] - b .LBB6_11 -.LBB6_6: - movs r3, #32 -.LBB6_7: - ldr r6, [r1, r2] - subs r3, #4 - cmp r3, #3 - str r6, [r0, r2] - add.w r2, r2, #4 - bhi .LBB6_7 -.LBB6_8: - cbz r3, .LBB6_11 - add r1, r2 - add r0, r2 -.LBB6_10: - ldrb r2, [r1], #1 - subs r3, #1 - strb r2, [r0], #1 - bne .LBB6_10 -.LBB6_11: - add sp, #64 - pop {r4, r5, r6, pc} diff --git a/tests/asm-test/asm/thumbv7neon-unknown-linux-gnueabihf/atomic_memcpy_store_align16 b/tests/asm-test/asm/thumbv7neon-unknown-linux-gnueabihf/atomic_memcpy_store_align16 index 1ccf448..408e131 100644 --- a/tests/asm-test/asm/thumbv7neon-unknown-linux-gnueabihf/atomic_memcpy_store_align16 +++ b/tests/asm-test/asm/thumbv7neon-unknown-linux-gnueabihf/atomic_memcpy_store_align16 @@ -1,19 +1,72 @@ asm_test::atomic_memcpy_store_align16::release: - push {r4, r5, r6, lr} - add.w r6, r1, #8 - ldrd r12, lr, [r1] - ldr r1, [r1, #28] - ldm r6, {r2, r3, r4, r5, r6} + push {r4, r6, r7, lr} + add r7, sp, #8 + sub sp, #64 + mov r4, sp + bfc r4, #0, #4 + mov sp, r4 + vld1.64 {d16, d17}, [r1:128]! + add.w lr, sp, #32 + mov.w r12, #4 + add.w r3, lr, #28 + vld1.64 {d18, d19}, [r1:128] + mov r1, sp + mov r2, r1 + vst1.64 {d16, d17}, [r2:128]! + vst1.64 {d18, d19}, [r2:128] dmb ish - str r1, [r0, #28] - str r6, [r0, #24] - str r5, [r0, #20] - str r4, [r0, #16] - str r3, [r0, #12] - str r2, [r0, #8] - str.w lr, [r0, #4] - str.w r12, [r0] - pop {r4, r5, r6, pc} + vld1.64 {d16, d17}, [r1:128] + add.w r1, r0, #28 + vld1.64 {d18, d19}, [r2:128] + mov r2, lr + vst1.64 {d16, d17}, [r2:128]! + mov r4, r2 + vst1.64 {d18, d19}, [r4:128], r12 + @APP + ldr r3, [r3] + str r3, [r1] + @NO_APP + add.w r1, r0, #24 + add.w r3, lr, #24 + @APP + ldr r3, [r3] + str r3, [r1] + @NO_APP + add.w r1, r0, #20 + @APP + ldr r3, [r4] + str r3, [r1] + @NO_APP + sub.w r4, r7, #8 + add.w r1, r0, #16 + @APP + ldr r2, [r2] + str r2, [r1] + @NO_APP + add.w r1, r0, #12 + add.w r2, lr, #12 + @APP + ldr r2, [r2] + str r2, [r1] + @NO_APP + add.w r1, r0, #8 + add.w r2, lr, #8 + @APP + ldr r2, [r2] + str r2, [r1] + @NO_APP + adds r1, r0, #4 + add.w r2, lr, #4 + @APP + ldr r2, [r2] + str r2, [r1] + @NO_APP + @APP + ldr.w r1, [lr] + str r1, [r0] + @NO_APP + mov sp, r4 + pop {r4, r6, r7, pc} asm_test::atomic_memcpy_store_align16::write_volatile_release_fence: push {r4, r6, r7, lr} add r7, sp, #8 diff --git a/tests/asm-test/asm/thumbv7neon-unknown-linux-gnueabihf/atomic_memcpy_store_align2 b/tests/asm-test/asm/thumbv7neon-unknown-linux-gnueabihf/atomic_memcpy_store_align2 index bdba847..e28b997 100644 --- a/tests/asm-test/asm/thumbv7neon-unknown-linux-gnueabihf/atomic_memcpy_store_align2 +++ b/tests/asm-test/asm/thumbv7neon-unknown-linux-gnueabihf/atomic_memcpy_store_align2 @@ -1,7 +1,10 @@ asm_test::atomic_memcpy_store_align2::release: - push {r4, r5, r6, lr} + push {r4, lr} sub sp, #64 vld1.16 {d16, d17}, [r1]! + add r4, sp, #32 + mov.w r12, #2 + add.w r3, r0, #30 vld1.16 {d18, d19}, [r1] mov r1, sp mov r2, r1 @@ -9,86 +12,104 @@ asm_test::atomic_memcpy_store_align2::release: vst1.64 {d18, d19}, [r2] dmb ish vld1.64 {d16, d17}, [r1] - add r1, sp, #32 + add.w r1, r4, #30 vld1.64 {d18, d19}, [r2] - mov r2, r1 + mov r2, r4 vst1.64 {d16, d17}, [r2]! - vst1.64 {d18, d19}, [r2] - adds r2, r0, #3 - bic lr, r2, #3 - sub.w r2, lr, r0 - cmp r2, #32 - bhi .LBB6_5 - cmp r2, #0 - beq .LBB6_6 - sub.w r3, r0, lr - add.w r12, r0, #32 - add r4, sp, #32 - mov r5, r0 -.LBB6_3: - ldrb r6, [r4], #1 - adds r3, #1 - strb r6, [r5], #1 - blo .LBB6_3 - sub.w r3, r12, lr - cmp r3, #4 - bhs .LBB6_7 - b .LBB6_8 -.LBB6_5: - ldrh.w r1, [sp, #62] - strh r1, [r0, #30] - ldrh.w r1, [sp, #60] - strh r1, [r0, #28] - ldrh.w r1, [sp, #58] - strh r1, [r0, #26] - ldrh.w r1, [sp, #56] - strh r1, [r0, #24] - ldrh.w r1, [sp, #54] - strh r1, [r0, #22] - ldrh.w r1, [sp, #52] - strh r1, [r0, #20] - ldrh.w r1, [sp, #50] - strh r1, [r0, #18] - ldrh.w r1, [sp, #48] - strh r1, [r0, #16] - ldrh.w r1, [sp, #46] - strh r1, [r0, #14] - ldrh.w r1, [sp, #44] - strh r1, [r0, #12] - ldrh.w r1, [sp, #42] - strh r1, [r0, #10] - ldrh.w r1, [sp, #40] - strh r1, [r0, #8] - ldrh.w r1, [sp, #38] - strh r1, [r0, #6] - ldrh.w r1, [sp, #36] - strh r1, [r0, #4] - ldrh.w r1, [sp, #34] - strh r1, [r0, #2] - ldrh.w r1, [sp, #32] + mov lr, r2 + vst1.64 {d18, d19}, [lr], r12 + @APP + ldrh r1, [r1] + strh r1, [r3] + @NO_APP + add.w r1, r0, #28 + add.w r3, r4, #28 + @APP + ldrh r3, [r3] + strh r3, [r1] + @NO_APP + add.w r1, r0, #26 + add.w r3, r4, #26 + @APP + ldrh r3, [r3] + strh r3, [r1] + @NO_APP + add.w r1, r0, #24 + add.w r3, r4, #24 + @APP + ldrh r3, [r3] + strh r3, [r1] + @NO_APP + add.w r1, r0, #22 + add.w r3, r4, #22 + @APP + ldrh r3, [r3] + strh r3, [r1] + @NO_APP + add.w r1, r0, #20 + add.w r3, r4, #20 + @APP + ldrh r3, [r3] + strh r3, [r1] + @NO_APP + add.w r1, r0, #18 + @APP + ldrh.w r3, [lr] + strh r3, [r1] + @NO_APP + add.w r1, r0, #16 + @APP + ldrh r2, [r2] + strh r2, [r1] + @NO_APP + add.w r1, r0, #14 + add.w r2, r4, #14 + @APP + ldrh r2, [r2] + strh r2, [r1] + @NO_APP + add.w r1, r0, #12 + add.w r2, r4, #12 + @APP + ldrh r2, [r2] + strh r2, [r1] + @NO_APP + add.w r1, r0, #10 + add.w r2, r4, #10 + @APP + ldrh r2, [r2] + strh r2, [r1] + @NO_APP + add.w r1, r0, #8 + add.w r2, r4, #8 + @APP + ldrh r2, [r2] + strh r2, [r1] + @NO_APP + adds r1, r0, #6 + adds r2, r4, #6 + @APP + ldrh r2, [r2] + strh r2, [r1] + @NO_APP + adds r1, r0, #4 + adds r2, r4, #4 + @APP + ldrh r2, [r2] + strh r2, [r1] + @NO_APP + adds r1, r0, #2 + adds r2, r4, #2 + @APP + ldrh r2, [r2] + strh r2, [r1] + @NO_APP + @APP + ldrh r1, [r4] strh r1, [r0] - b .LBB6_11 -.LBB6_6: - movs r3, #32 -.LBB6_7: - ldr r6, [r1, r2] - subs r3, #4 - cmp r3, #3 - str r6, [r0, r2] - add.w r2, r2, #4 - bhi .LBB6_7 -.LBB6_8: - cbz r3, .LBB6_11 - add r1, r2 - add r0, r2 -.LBB6_10: - ldrb r2, [r1], #1 - subs r3, #1 - strb r2, [r0], #1 - bne .LBB6_10 -.LBB6_11: + @NO_APP add sp, #64 - pop {r4, r5, r6, pc} + pop {r4, pc} asm_test::atomic_memcpy_store_align2::write_volatile_release_fence: sub sp, #32 dmb ish diff --git a/tests/asm-test/asm/thumbv7neon-unknown-linux-gnueabihf/atomic_memcpy_store_align4 b/tests/asm-test/asm/thumbv7neon-unknown-linux-gnueabihf/atomic_memcpy_store_align4 index 0658db3..0f99399 100644 --- a/tests/asm-test/asm/thumbv7neon-unknown-linux-gnueabihf/atomic_memcpy_store_align4 +++ b/tests/asm-test/asm/thumbv7neon-unknown-linux-gnueabihf/atomic_memcpy_store_align4 @@ -1,19 +1,67 @@ asm_test::atomic_memcpy_store_align4::release: - push {r4, r5, r6, lr} - add.w r6, r1, #8 - ldrd r12, lr, [r1] - ldr r1, [r1, #28] - ldm r6, {r2, r3, r4, r5, r6} + push {r4, lr} + sub sp, #64 + vld1.32 {d16, d17}, [r1]! + add.w lr, sp, #32 + mov.w r12, #4 + add.w r3, lr, #28 + vld1.32 {d18, d19}, [r1] + mov r1, sp + mov r2, r1 + vst1.64 {d16, d17}, [r2]! + vst1.64 {d18, d19}, [r2] dmb ish - str r1, [r0, #28] - str r6, [r0, #24] - str r5, [r0, #20] - str r4, [r0, #16] - str r3, [r0, #12] - str r2, [r0, #8] - str.w lr, [r0, #4] - str.w r12, [r0] - pop {r4, r5, r6, pc} + vld1.64 {d16, d17}, [r1] + add.w r1, r0, #28 + vld1.64 {d18, d19}, [r2] + mov r2, lr + vst1.64 {d16, d17}, [r2]! + mov r4, r2 + vst1.64 {d18, d19}, [r4], r12 + @APP + ldr r3, [r3] + str r3, [r1] + @NO_APP + add.w r1, r0, #24 + add.w r3, lr, #24 + @APP + ldr r3, [r3] + str r3, [r1] + @NO_APP + add.w r1, r0, #20 + @APP + ldr r3, [r4] + str r3, [r1] + @NO_APP + add.w r1, r0, #16 + @APP + ldr r2, [r2] + str r2, [r1] + @NO_APP + add.w r1, r0, #12 + add.w r2, lr, #12 + @APP + ldr r2, [r2] + str r2, [r1] + @NO_APP + add.w r1, r0, #8 + add.w r2, lr, #8 + @APP + ldr r2, [r2] + str r2, [r1] + @NO_APP + adds r1, r0, #4 + add.w r2, lr, #4 + @APP + ldr r2, [r2] + str r2, [r1] + @NO_APP + @APP + ldr.w r1, [lr] + str r1, [r0] + @NO_APP + add sp, #64 + pop {r4, pc} asm_test::atomic_memcpy_store_align4::write_volatile_release_fence: sub sp, #32 dmb ish diff --git a/tests/asm-test/asm/thumbv7neon-unknown-linux-gnueabihf/atomic_memcpy_store_align8 b/tests/asm-test/asm/thumbv7neon-unknown-linux-gnueabihf/atomic_memcpy_store_align8 index 5927f80..8cf69cb 100644 --- a/tests/asm-test/asm/thumbv7neon-unknown-linux-gnueabihf/atomic_memcpy_store_align8 +++ b/tests/asm-test/asm/thumbv7neon-unknown-linux-gnueabihf/atomic_memcpy_store_align8 @@ -1,19 +1,67 @@ asm_test::atomic_memcpy_store_align8::release: - push {r4, r5, r6, lr} - add.w r6, r1, #8 - ldrd r12, lr, [r1] - ldr r1, [r1, #28] - ldm r6, {r2, r3, r4, r5, r6} + push {r4, lr} + sub sp, #64 + vld1.64 {d16, d17}, [r1]! + add.w lr, sp, #32 + mov.w r12, #4 + add.w r3, lr, #28 + vld1.64 {d18, d19}, [r1] + mov r1, sp + mov r2, r1 + vst1.64 {d16, d17}, [r2]! + vst1.64 {d18, d19}, [r2] dmb ish - str r1, [r0, #28] - str r6, [r0, #24] - str r5, [r0, #20] - str r4, [r0, #16] - str r3, [r0, #12] - str r2, [r0, #8] - str.w lr, [r0, #4] - str.w r12, [r0] - pop {r4, r5, r6, pc} + vld1.64 {d16, d17}, [r1] + add.w r1, r0, #28 + vld1.64 {d18, d19}, [r2] + mov r2, lr + vst1.64 {d16, d17}, [r2]! + mov r4, r2 + vst1.64 {d18, d19}, [r4], r12 + @APP + ldr r3, [r3] + str r3, [r1] + @NO_APP + add.w r1, r0, #24 + add.w r3, lr, #24 + @APP + ldr r3, [r3] + str r3, [r1] + @NO_APP + add.w r1, r0, #20 + @APP + ldr r3, [r4] + str r3, [r1] + @NO_APP + add.w r1, r0, #16 + @APP + ldr r2, [r2] + str r2, [r1] + @NO_APP + add.w r1, r0, #12 + add.w r2, lr, #12 + @APP + ldr r2, [r2] + str r2, [r1] + @NO_APP + add.w r1, r0, #8 + add.w r2, lr, #8 + @APP + ldr r2, [r2] + str r2, [r1] + @NO_APP + adds r1, r0, #4 + add.w r2, lr, #4 + @APP + ldr r2, [r2] + str r2, [r1] + @NO_APP + @APP + ldr.w r1, [lr] + str r1, [r0] + @NO_APP + add sp, #64 + pop {r4, pc} asm_test::atomic_memcpy_store_align8::write_volatile_release_fence: sub sp, #32 dmb ish diff --git a/tests/asm-test/asm/x86_64-unknown-linux-gnu/atomic_memcpy_load_align1 b/tests/asm-test/asm/x86_64-unknown-linux-gnu/atomic_memcpy_load_align1 index c94c17b..1aa7576 100644 --- a/tests/asm-test/asm/x86_64-unknown-linux-gnu/atomic_memcpy_load_align1 +++ b/tests/asm-test/asm/x86_64-unknown-linux-gnu/atomic_memcpy_load_align1 @@ -1,267 +1,397 @@ asm_test::atomic_memcpy_load_align1::acquire: sub rsp, 64 mov rax, rdi + lea rcx, [rsi, +, 63] + lea rdx, [rsp, +, 63] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 62] + lea rdx, [rsp, +, 62] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 61] + lea rdx, [rsp, +, 61] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 60] + lea rdx, [rsp, +, 60] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 59] + lea rdx, [rsp, +, 59] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 58] + lea rdx, [rsp, +, 58] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 57] + lea rdx, [rsp, +, 57] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 56] + lea rdx, [rsp, +, 56] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 55] + lea rdx, [rsp, +, 55] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 54] + lea rdx, [rsp, +, 54] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 53] + lea rdx, [rsp, +, 53] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 52] + lea rdx, [rsp, +, 52] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 51] + lea rdx, [rsp, +, 51] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 50] + lea rdx, [rsp, +, 50] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 49] + lea rdx, [rsp, +, 49] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 48] + lea rdx, [rsp, +, 48] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 47] + lea rdx, [rsp, +, 47] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 46] + lea rdx, [rsp, +, 46] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 45] + lea rdx, [rsp, +, 45] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 44] + lea rdx, [rsp, +, 44] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 43] + lea rdx, [rsp, +, 43] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 42] + lea rdx, [rsp, +, 42] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 41] + lea rdx, [rsp, +, 41] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 40] + lea rdx, [rsp, +, 40] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 39] + lea rdx, [rsp, +, 39] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 38] + lea rdx, [rsp, +, 38] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 37] + lea rdx, [rsp, +, 37] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 36] + lea rdx, [rsp, +, 36] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 35] + lea rdx, [rsp, +, 35] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 34] + lea rdx, [rsp, +, 34] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 33] + lea rdx, [rsp, +, 33] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 32] + lea rdx, [rsp, +, 32] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 31] + lea rdx, [rsp, +, 31] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 30] + lea rdx, [rsp, +, 30] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 29] + lea rdx, [rsp, +, 29] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 28] + lea rdx, [rsp, +, 28] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 27] + lea rdx, [rsp, +, 27] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 26] + lea rdx, [rsp, +, 26] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 25] + lea rdx, [rsp, +, 25] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 24] + lea rdx, [rsp, +, 24] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 23] + lea rdx, [rsp, +, 23] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 22] + lea rdx, [rsp, +, 22] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 21] + lea rdx, [rsp, +, 21] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 20] + lea rdx, [rsp, +, 20] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 19] + lea rdx, [rsp, +, 19] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 18] + lea rdx, [rsp, +, 18] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 17] + lea rdx, [rsp, +, 17] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 16] + lea rdx, [rsp, +, 16] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 15] + lea rdx, [rsp, +, 15] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 14] + lea rdx, [rsp, +, 14] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 13] + lea rdx, [rsp, +, 13] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 12] + lea rdx, [rsp, +, 12] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 11] + lea rdx, [rsp, +, 11] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 10] + lea rdx, [rsp, +, 10] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 9] + lea rdx, [rsp, +, 9] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 8] + lea rdx, [rsp, +, 8] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP lea rcx, [rsi, +, 7] - and rcx, -8 - mov r9, rcx - sub r9, rsi - cmp r9, 65 - jae .LBB0_1 - test r9, r9 - je .LBB0_4 - mov rdx, rsi - not rdx - add rcx, rdx - mov r8d, r9d - and r8d, 3 - cmp rcx, 3 - jae .LBB0_22 - mov r11d, 64 - xor edi, edi - test r8, r8 - jne .LBB0_10 - jmp .LBB0_13 -.LBB0_1: - mov cl, byte, ptr, [rsi] - mov byte, ptr, [rsp], cl - mov cl, byte, ptr, [rsi, +, 1] - mov byte, ptr, [rsp, +, 1], cl - mov cl, byte, ptr, [rsi, +, 2] - mov byte, ptr, [rsp, +, 2], cl - mov cl, byte, ptr, [rsi, +, 3] - mov byte, ptr, [rsp, +, 3], cl - mov cl, byte, ptr, [rsi, +, 4] - mov byte, ptr, [rsp, +, 4], cl - mov cl, byte, ptr, [rsi, +, 5] - mov byte, ptr, [rsp, +, 5], cl - mov cl, byte, ptr, [rsi, +, 6] - mov byte, ptr, [rsp, +, 6], cl - mov cl, byte, ptr, [rsi, +, 7] - mov byte, ptr, [rsp, +, 7], cl - mov cl, byte, ptr, [rsi, +, 8] - mov byte, ptr, [rsp, +, 8], cl - mov cl, byte, ptr, [rsi, +, 9] - mov byte, ptr, [rsp, +, 9], cl - mov cl, byte, ptr, [rsi, +, 10] - mov byte, ptr, [rsp, +, 10], cl - mov cl, byte, ptr, [rsi, +, 11] - mov byte, ptr, [rsp, +, 11], cl - mov cl, byte, ptr, [rsi, +, 12] - mov byte, ptr, [rsp, +, 12], cl - mov cl, byte, ptr, [rsi, +, 13] - mov byte, ptr, [rsp, +, 13], cl - mov cl, byte, ptr, [rsi, +, 14] - mov byte, ptr, [rsp, +, 14], cl - mov cl, byte, ptr, [rsi, +, 15] - mov byte, ptr, [rsp, +, 15], cl - mov cl, byte, ptr, [rsi, +, 16] - mov byte, ptr, [rsp, +, 16], cl - mov cl, byte, ptr, [rsi, +, 17] - mov byte, ptr, [rsp, +, 17], cl - mov cl, byte, ptr, [rsi, +, 18] - mov byte, ptr, [rsp, +, 18], cl - mov cl, byte, ptr, [rsi, +, 19] - mov byte, ptr, [rsp, +, 19], cl - mov cl, byte, ptr, [rsi, +, 20] - mov byte, ptr, [rsp, +, 20], cl - mov cl, byte, ptr, [rsi, +, 21] - mov byte, ptr, [rsp, +, 21], cl - mov cl, byte, ptr, [rsi, +, 22] - mov byte, ptr, [rsp, +, 22], cl - mov cl, byte, ptr, [rsi, +, 23] - mov byte, ptr, [rsp, +, 23], cl - mov cl, byte, ptr, [rsi, +, 24] - mov byte, ptr, [rsp, +, 24], cl - mov cl, byte, ptr, [rsi, +, 25] - mov byte, ptr, [rsp, +, 25], cl - mov cl, byte, ptr, [rsi, +, 26] - mov byte, ptr, [rsp, +, 26], cl - mov cl, byte, ptr, [rsi, +, 27] - mov byte, ptr, [rsp, +, 27], cl - mov cl, byte, ptr, [rsi, +, 28] - mov byte, ptr, [rsp, +, 28], cl - mov cl, byte, ptr, [rsi, +, 29] - mov byte, ptr, [rsp, +, 29], cl - mov cl, byte, ptr, [rsi, +, 30] - mov byte, ptr, [rsp, +, 30], cl - mov cl, byte, ptr, [rsi, +, 31] - mov byte, ptr, [rsp, +, 31], cl - mov cl, byte, ptr, [rsi, +, 32] - mov byte, ptr, [rsp, +, 32], cl - mov cl, byte, ptr, [rsi, +, 33] - mov byte, ptr, [rsp, +, 33], cl - mov cl, byte, ptr, [rsi, +, 34] - mov byte, ptr, [rsp, +, 34], cl - mov cl, byte, ptr, [rsi, +, 35] - mov byte, ptr, [rsp, +, 35], cl - mov cl, byte, ptr, [rsi, +, 36] - mov byte, ptr, [rsp, +, 36], cl - mov cl, byte, ptr, [rsi, +, 37] - mov byte, ptr, [rsp, +, 37], cl - mov cl, byte, ptr, [rsi, +, 38] - mov byte, ptr, [rsp, +, 38], cl - mov cl, byte, ptr, [rsi, +, 39] - mov byte, ptr, [rsp, +, 39], cl - mov cl, byte, ptr, [rsi, +, 40] - mov byte, ptr, [rsp, +, 40], cl - mov cl, byte, ptr, [rsi, +, 41] - mov byte, ptr, [rsp, +, 41], cl - mov cl, byte, ptr, [rsi, +, 42] - mov byte, ptr, [rsp, +, 42], cl - mov cl, byte, ptr, [rsi, +, 43] - mov byte, ptr, [rsp, +, 43], cl - mov cl, byte, ptr, [rsi, +, 44] - mov byte, ptr, [rsp, +, 44], cl - mov cl, byte, ptr, [rsi, +, 45] - mov byte, ptr, [rsp, +, 45], cl - mov cl, byte, ptr, [rsi, +, 46] - mov byte, ptr, [rsp, +, 46], cl - mov cl, byte, ptr, [rsi, +, 47] - mov byte, ptr, [rsp, +, 47], cl - mov cl, byte, ptr, [rsi, +, 48] - mov byte, ptr, [rsp, +, 48], cl - mov cl, byte, ptr, [rsi, +, 49] - mov byte, ptr, [rsp, +, 49], cl - mov cl, byte, ptr, [rsi, +, 50] - mov byte, ptr, [rsp, +, 50], cl - mov cl, byte, ptr, [rsi, +, 51] - mov byte, ptr, [rsp, +, 51], cl - mov cl, byte, ptr, [rsi, +, 52] - mov byte, ptr, [rsp, +, 52], cl - mov cl, byte, ptr, [rsi, +, 53] - mov byte, ptr, [rsp, +, 53], cl - mov cl, byte, ptr, [rsi, +, 54] - mov byte, ptr, [rsp, +, 54], cl - mov cl, byte, ptr, [rsi, +, 55] - mov byte, ptr, [rsp, +, 55], cl - mov cl, byte, ptr, [rsi, +, 56] - mov byte, ptr, [rsp, +, 56], cl - mov cl, byte, ptr, [rsi, +, 57] - mov byte, ptr, [rsp, +, 57], cl - mov cl, byte, ptr, [rsi, +, 58] - mov byte, ptr, [rsp, +, 58], cl - mov cl, byte, ptr, [rsi, +, 59] - mov byte, ptr, [rsp, +, 59], cl - mov cl, byte, ptr, [rsi, +, 60] - mov byte, ptr, [rsp, +, 60], cl - mov cl, byte, ptr, [rsi, +, 61] - mov byte, ptr, [rsp, +, 61], cl - mov cl, byte, ptr, [rsi, +, 62] - mov byte, ptr, [rsp, +, 62], cl - mov cl, byte, ptr, [rsi, +, 63] - mov byte, ptr, [rsp, +, 63], cl - movups xmm0, xmmword, ptr, [rsp] - movups xmm1, xmmword, ptr, [rsp, +, 16] - movups xmm2, xmmword, ptr, [rsp, +, 32] - movups xmmword, ptr, [rax, +, 32], xmm2 - movups xmmword, ptr, [rax, +, 16], xmm1 - movups xmmword, ptr, [rax], xmm0 - mov rcx, qword, ptr, [rsp, +, 48] - mov qword, ptr, [rax, +, 48], rcx - mov cl, byte, ptr, [rsp, +, 56] - mov byte, ptr, [rax, +, 56], cl - mov cl, byte, ptr, [rsp, +, 57] - mov byte, ptr, [rax, +, 57], cl - mov cl, byte, ptr, [rsp, +, 58] - mov byte, ptr, [rax, +, 58], cl - mov cl, byte, ptr, [rsp, +, 59] - mov byte, ptr, [rax, +, 59], cl - mov cl, byte, ptr, [rsp, +, 60] - mov byte, ptr, [rax, +, 60], cl - mov cl, byte, ptr, [rsp, +, 61] - mov byte, ptr, [rax, +, 61], cl - mov cl, byte, ptr, [rsp, +, 62] - mov byte, ptr, [rax, +, 62], cl - mov cl, byte, ptr, [rsp, +, 63] - mov byte, ptr, [rax, +, 63], cl - #MEMBARRIER - add rsp, 64 - ret -.LBB0_4: - mov r11d, 64 - xor r9d, r9d - jmp .LBB0_5 -.LBB0_22: - mov rcx, r9 - and rcx, -4 - xor edi, edi -.LBB0_23: - movzx edx, byte, ptr, [rsi, +, rdi] - mov byte, ptr, [rsp, +, rdi], dl - movzx edx, byte, ptr, [rsi, +, rdi, +, 1] - mov byte, ptr, [rsp, +, rdi, +, 1], dl - movzx edx, byte, ptr, [rsi, +, rdi, +, 2] - mov byte, ptr, [rsp, +, rdi, +, 2], dl - movzx edx, byte, ptr, [rsi, +, rdi, +, 3] - mov byte, ptr, [rsp, +, rdi, +, 3], dl - add rdi, 4 - cmp rcx, rdi - jne .LBB0_23 - mov r11d, 64 - sub r11, rdi - test r8, r8 - je .LBB0_13 -.LBB0_10: - lea r10, [rsp, +, rdi] - add rdi, rsi - xor edx, edx -.LBB0_11: - movzx ecx, byte, ptr, [rdi, +, rdx] - mov byte, ptr, [r10, +, rdx], cl - add rdx, 1 - cmp r8, rdx - jne .LBB0_11 - sub r11, rdx -.LBB0_13: - cmp r11, 8 - jb .LBB0_14 -.LBB0_5: - mov rcx, qword, ptr, [rsi, +, r9] - mov qword, ptr, [rsp, +, r9], rcx - add r11, -8 - add r9, 8 - cmp r11, 7 - ja .LBB0_5 -.LBB0_14: - test r11, r11 - je .LBB0_21 - lea r8, [r11, -, 1] - mov rdx, r11 - mov rdi, r9 - and rdx, 3 - je .LBB0_18 - mov rdi, r9 -.LBB0_17: - movzx ecx, byte, ptr, [rsi, +, rdi] - mov byte, ptr, [rsp, +, rdi], cl - add rdi, 1 - add rdx, -1 - jne .LBB0_17 -.LBB0_18: - cmp r8, 3 - jb .LBB0_21 - add r11, r9 -.LBB0_20: - movzx ecx, byte, ptr, [rsi, +, rdi] - mov byte, ptr, [rsp, +, rdi], cl - movzx ecx, byte, ptr, [rsi, +, rdi, +, 1] - mov byte, ptr, [rsp, +, rdi, +, 1], cl - movzx ecx, byte, ptr, [rsi, +, rdi, +, 2] - mov byte, ptr, [rsp, +, rdi, +, 2], cl - movzx ecx, byte, ptr, [rsi, +, rdi, +, 3] - mov byte, ptr, [rsp, +, rdi, +, 3], cl - add rdi, 4 - cmp r11, rdi - jne .LBB0_20 -.LBB0_21: + lea rdx, [rsp, +, 7] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 6] + lea rdx, [rsp, +, 6] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 5] + lea rdx, [rsp, +, 5] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 4] + lea rdx, [rsp, +, 4] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 3] + lea rdx, [rsp, +, 3] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 2] + lea rdx, [rsp, +, 2] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + lea rcx, [rsi, +, 1] + lea rdx, [rsp, +, 1] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rdx], cl + #NO_APP + mov rcx, rsp + #APP + mov dl, byte, ptr, [rsi] + mov byte, ptr, [rcx], dl + #NO_APP movups xmm0, xmmword, ptr, [rsp] movups xmm1, xmmword, ptr, [rsp, +, 16] movups xmm2, xmmword, ptr, [rsp, +, 32] movups xmm3, xmmword, ptr, [rsp, +, 48] - movups xmmword, ptr, [rax, +, 48], xmm3 - movups xmmword, ptr, [rax, +, 32], xmm2 - movups xmmword, ptr, [rax, +, 16], xmm1 - movups xmmword, ptr, [rax], xmm0 + movups xmmword, ptr, [rdi], xmm0 + movups xmmword, ptr, [rdi, +, 16], xmm1 + movups xmmword, ptr, [rdi, +, 32], xmm2 + movups xmmword, ptr, [rdi, +, 48], xmm3 #MEMBARRIER add rsp, 64 ret diff --git a/tests/asm-test/asm/x86_64-unknown-linux-gnu/atomic_memcpy_load_align16 b/tests/asm-test/asm/x86_64-unknown-linux-gnu/atomic_memcpy_load_align16 index d2e73b5..346486f 100644 --- a/tests/asm-test/asm/x86_64-unknown-linux-gnu/atomic_memcpy_load_align16 +++ b/tests/asm-test/asm/x86_64-unknown-linux-gnu/atomic_memcpy_load_align16 @@ -1,22 +1,63 @@ asm_test::atomic_memcpy_load_align16::acquire: + sub rsp, 72 mov rax, rdi - mov r8, qword, ptr, [rsi, +, 56] - mov r9, qword, ptr, [rsi, +, 48] - mov r10, qword, ptr, [rsi, +, 40] - mov r11, qword, ptr, [rsi, +, 32] - mov rdx, qword, ptr, [rsi, +, 24] - mov rdi, qword, ptr, [rsi, +, 16] - mov rcx, qword, ptr, [rsi, +, 8] - mov rsi, qword, ptr, [rsi] - mov qword, ptr, [rax], rsi - mov qword, ptr, [rax, +, 8], rcx - mov qword, ptr, [rax, +, 16], rdi - mov qword, ptr, [rax, +, 24], rdx - mov qword, ptr, [rax, +, 32], r11 - mov qword, ptr, [rax, +, 40], r10 - mov qword, ptr, [rax, +, 48], r9 - mov qword, ptr, [rax, +, 56], r8 + lea rcx, [rsi, +, 56] + lea rdx, [rsp, +, 56] + #APP + mov rcx, qword, ptr, [rcx] + mov qword, ptr, [rdx], rcx + #NO_APP + lea rcx, [rsi, +, 48] + lea rdx, [rsp, +, 48] + #APP + mov rcx, qword, ptr, [rcx] + mov qword, ptr, [rdx], rcx + #NO_APP + lea rcx, [rsi, +, 40] + lea rdx, [rsp, +, 40] + #APP + mov rcx, qword, ptr, [rcx] + mov qword, ptr, [rdx], rcx + #NO_APP + lea rcx, [rsi, +, 32] + lea rdx, [rsp, +, 32] + #APP + mov rcx, qword, ptr, [rcx] + mov qword, ptr, [rdx], rcx + #NO_APP + lea rcx, [rsi, +, 24] + lea rdx, [rsp, +, 24] + #APP + mov rcx, qword, ptr, [rcx] + mov qword, ptr, [rdx], rcx + #NO_APP + lea rcx, [rsi, +, 16] + lea rdx, [rsp, +, 16] + #APP + mov rcx, qword, ptr, [rcx] + mov qword, ptr, [rdx], rcx + #NO_APP + lea rcx, [rsi, +, 8] + lea rdx, [rsp, +, 8] + #APP + mov rcx, qword, ptr, [rcx] + mov qword, ptr, [rdx], rcx + #NO_APP + mov rcx, rsp + #APP + mov rdx, qword, ptr, [rsi] + mov qword, ptr, [rcx], rdx + #NO_APP + movaps xmm0, xmmword, ptr, [rsp] + movaps xmm1, xmmword, ptr, [rsp, +, 16] + movaps xmm2, xmmword, ptr, [rsp, +, 32] + movaps xmm3, xmmword, ptr, [rsp, +, 48] + movaps xmmword, ptr, [rdi], xmm0 + movaps xmmword, ptr, [rdi, +, 16], xmm1 + movaps xmmword, ptr, [rdi, +, 32], xmm2 + movaps xmmword, ptr, [rdi, +, 48], xmm3 #MEMBARRIER + add rsp, 72 ret asm_test::atomic_memcpy_load_align16::read_volatile_acquire_fence: mov rax, rdi diff --git a/tests/asm-test/asm/x86_64-unknown-linux-gnu/atomic_memcpy_load_align2 b/tests/asm-test/asm/x86_64-unknown-linux-gnu/atomic_memcpy_load_align2 index ca97094..9255c49 100644 --- a/tests/asm-test/asm/x86_64-unknown-linux-gnu/atomic_memcpy_load_align2 +++ b/tests/asm-test/asm/x86_64-unknown-linux-gnu/atomic_memcpy_load_align2 @@ -1,191 +1,205 @@ asm_test::atomic_memcpy_load_align2::acquire: sub rsp, 64 mov rax, rdi - lea rcx, [rsi, +, 7] - and rcx, -8 - mov r9, rcx - sub r9, rsi - cmp r9, 64 - ja .LBB4_6 - test r9, r9 - je .LBB4_2 - mov rdx, rsi - not rdx - add rcx, rdx - mov r8d, r9d - and r8d, 3 - cmp rcx, 3 - jae .LBB4_21 - mov r11d, 64 - xor edi, edi - test r8, r8 - jne .LBB4_10 - jmp .LBB4_13 -.LBB4_6: - movzx ecx, word, ptr, [rsi, +, 62] - mov word, ptr, [rsp, +, 62], cx - movzx ecx, word, ptr, [rsi, +, 60] - mov word, ptr, [rsp, +, 60], cx - movzx ecx, word, ptr, [rsi, +, 58] - mov word, ptr, [rsp, +, 58], cx - movzx ecx, word, ptr, [rsi, +, 56] - mov word, ptr, [rsp, +, 56], cx - movzx ecx, word, ptr, [rsi, +, 54] - mov word, ptr, [rsp, +, 54], cx - movzx ecx, word, ptr, [rsi, +, 52] - mov word, ptr, [rsp, +, 52], cx - movzx ecx, word, ptr, [rsi, +, 50] - mov word, ptr, [rsp, +, 50], cx - movzx ecx, word, ptr, [rsi, +, 48] - mov word, ptr, [rsp, +, 48], cx - movzx ecx, word, ptr, [rsi, +, 46] - mov word, ptr, [rsp, +, 46], cx - movzx ecx, word, ptr, [rsi, +, 44] - mov word, ptr, [rsp, +, 44], cx - movzx ecx, word, ptr, [rsi, +, 42] - mov word, ptr, [rsp, +, 42], cx - movzx ecx, word, ptr, [rsi, +, 40] - mov word, ptr, [rsp, +, 40], cx - movzx ecx, word, ptr, [rsi, +, 38] - mov word, ptr, [rsp, +, 38], cx - movzx ecx, word, ptr, [rsi, +, 36] - mov word, ptr, [rsp, +, 36], cx - movzx ecx, word, ptr, [rsi, +, 34] - mov word, ptr, [rsp, +, 34], cx - movzx ecx, word, ptr, [rsi, +, 32] - mov word, ptr, [rsp, +, 32], cx - movzx ecx, word, ptr, [rsi, +, 30] - mov word, ptr, [rsp, +, 30], cx - movzx ecx, word, ptr, [rsi, +, 28] - mov word, ptr, [rsp, +, 28], cx - movzx ecx, word, ptr, [rsi, +, 26] - mov word, ptr, [rsp, +, 26], cx - movzx ecx, word, ptr, [rsi, +, 24] - mov word, ptr, [rsp, +, 24], cx - movzx ecx, word, ptr, [rsi, +, 22] - mov word, ptr, [rsp, +, 22], cx - movzx ecx, word, ptr, [rsi, +, 20] - mov word, ptr, [rsp, +, 20], cx - movzx ecx, word, ptr, [rsi, +, 18] - mov word, ptr, [rsp, +, 18], cx - movzx ecx, word, ptr, [rsi, +, 16] - mov word, ptr, [rsp, +, 16], cx - movzx ecx, word, ptr, [rsi, +, 14] - mov word, ptr, [rsp, +, 14], cx - movzx ecx, word, ptr, [rsi, +, 12] - mov word, ptr, [rsp, +, 12], cx - movzx ecx, word, ptr, [rsi, +, 10] - mov word, ptr, [rsp, +, 10], cx - movzx ecx, word, ptr, [rsi, +, 8] - mov word, ptr, [rsp, +, 8], cx - movzx ecx, word, ptr, [rsi, +, 6] - mov word, ptr, [rsp, +, 6], cx - movzx ecx, word, ptr, [rsi, +, 4] - mov word, ptr, [rsp, +, 4], cx - movzx ecx, word, ptr, [rsi, +, 2] - mov word, ptr, [rsp, +, 2], cx - movzx ecx, word, ptr, [rsi] - mov word, ptr, [rsp], cx - jmp .LBB4_7 -.LBB4_2: - mov r11d, 64 - xor r9d, r9d - jmp .LBB4_3 -.LBB4_21: - mov rcx, r9 - and rcx, -4 - xor edi, edi -.LBB4_22: - movzx edx, byte, ptr, [rsi, +, rdi] - mov byte, ptr, [rsp, +, rdi], dl - movzx edx, byte, ptr, [rsi, +, rdi, +, 1] - mov byte, ptr, [rsp, +, rdi, +, 1], dl - movzx edx, byte, ptr, [rsi, +, rdi, +, 2] - mov byte, ptr, [rsp, +, rdi, +, 2], dl - movzx edx, byte, ptr, [rsi, +, rdi, +, 3] - mov byte, ptr, [rsp, +, rdi, +, 3], dl - add rdi, 4 - cmp rcx, rdi - jne .LBB4_22 - mov r11d, 64 - sub r11, rdi - test r8, r8 - je .LBB4_13 -.LBB4_10: - lea r10, [rsp, +, rdi] - add rdi, rsi - xor edx, edx -.LBB4_11: - movzx ecx, byte, ptr, [rdi, +, rdx] - mov byte, ptr, [r10, +, rdx], cl - add rdx, 1 - cmp r8, rdx - jne .LBB4_11 - sub r11, rdx -.LBB4_13: - cmp r11, 8 - jb .LBB4_14 -.LBB4_3: - mov rcx, qword, ptr, [rsi, +, r9] - mov qword, ptr, [rsp, +, r9], rcx - add r11, -8 - add r9, 8 - cmp r11, 7 - ja .LBB4_3 -.LBB4_14: - test r11, r11 - je .LBB4_7 - lea r8, [r11, -, 1] - mov rdx, r11 - mov rdi, r9 - and rdx, 3 - je .LBB4_18 - mov rdi, r9 -.LBB4_17: - movzx ecx, byte, ptr, [rsi, +, rdi] - mov byte, ptr, [rsp, +, rdi], cl - add rdi, 1 - add rdx, -1 - jne .LBB4_17 -.LBB4_18: - cmp r8, 3 - jb .LBB4_7 - add r11, r9 -.LBB4_20: - movzx ecx, byte, ptr, [rsi, +, rdi] - mov byte, ptr, [rsp, +, rdi], cl - movzx ecx, byte, ptr, [rsi, +, rdi, +, 1] - mov byte, ptr, [rsp, +, rdi, +, 1], cl - movzx ecx, byte, ptr, [rsi, +, rdi, +, 2] - mov byte, ptr, [rsp, +, rdi, +, 2], cl - movzx ecx, byte, ptr, [rsi, +, rdi, +, 3] - mov byte, ptr, [rsp, +, rdi, +, 3], cl - add rdi, 4 - cmp r11, rdi - jne .LBB4_20 -.LBB4_7: - movzx ecx, word, ptr, [rsp] - movzx edx, word, ptr, [rsp, +, 2] - movzx esi, word, ptr, [rsp, +, 4] - movzx edi, word, ptr, [rsp, +, 6] - movzx r8d, word, ptr, [rsp, +, 8] - movzx r9d, word, ptr, [rsp, +, 10] - movzx r10d, word, ptr, [rsp, +, 12] - movzx r11d, word, ptr, [rsp, +, 14] - movups xmm0, xmmword, ptr, [rsp, +, 16] - movups xmm1, xmmword, ptr, [rsp, +, 32] - movups xmm2, xmmword, ptr, [rsp, +, 48] - movups xmmword, ptr, [rax, +, 48], xmm2 - movups xmmword, ptr, [rax, +, 32], xmm1 - movups xmmword, ptr, [rax, +, 16], xmm0 - mov word, ptr, [rax], cx - mov word, ptr, [rax, +, 2], dx - mov word, ptr, [rax, +, 4], si - mov word, ptr, [rax, +, 6], di - mov word, ptr, [rax, +, 8], r8w - mov word, ptr, [rax, +, 10], r9w - mov word, ptr, [rax, +, 12], r10w - mov word, ptr, [rax, +, 14], r11w + lea rcx, [rsi, +, 62] + lea rdx, [rsp, +, 62] + #APP + mov cx, word, ptr, [rcx] + mov word, ptr, [rdx], cx + #NO_APP + lea rcx, [rsi, +, 60] + lea rdx, [rsp, +, 60] + #APP + mov cx, word, ptr, [rcx] + mov word, ptr, [rdx], cx + #NO_APP + lea rcx, [rsi, +, 58] + lea rdx, [rsp, +, 58] + #APP + mov cx, word, ptr, [rcx] + mov word, ptr, [rdx], cx + #NO_APP + lea rcx, [rsi, +, 56] + lea rdx, [rsp, +, 56] + #APP + mov cx, word, ptr, [rcx] + mov word, ptr, [rdx], cx + #NO_APP + lea rcx, [rsi, +, 54] + lea rdx, [rsp, +, 54] + #APP + mov cx, word, ptr, [rcx] + mov word, ptr, [rdx], cx + #NO_APP + lea rcx, [rsi, +, 52] + lea rdx, [rsp, +, 52] + #APP + mov cx, word, ptr, [rcx] + mov word, ptr, [rdx], cx + #NO_APP + lea rcx, [rsi, +, 50] + lea rdx, [rsp, +, 50] + #APP + mov cx, word, ptr, [rcx] + mov word, ptr, [rdx], cx + #NO_APP + lea rcx, [rsi, +, 48] + lea rdx, [rsp, +, 48] + #APP + mov cx, word, ptr, [rcx] + mov word, ptr, [rdx], cx + #NO_APP + lea rcx, [rsi, +, 46] + lea rdx, [rsp, +, 46] + #APP + mov cx, word, ptr, [rcx] + mov word, ptr, [rdx], cx + #NO_APP + lea rcx, [rsi, +, 44] + lea rdx, [rsp, +, 44] + #APP + mov cx, word, ptr, [rcx] + mov word, ptr, [rdx], cx + #NO_APP + lea rcx, [rsi, +, 42] + lea rdx, [rsp, +, 42] + #APP + mov cx, word, ptr, [rcx] + mov word, ptr, [rdx], cx + #NO_APP + lea rcx, [rsi, +, 40] + lea rdx, [rsp, +, 40] + #APP + mov cx, word, ptr, [rcx] + mov word, ptr, [rdx], cx + #NO_APP + lea rcx, [rsi, +, 38] + lea rdx, [rsp, +, 38] + #APP + mov cx, word, ptr, [rcx] + mov word, ptr, [rdx], cx + #NO_APP + lea rcx, [rsi, +, 36] + lea rdx, [rsp, +, 36] + #APP + mov cx, word, ptr, [rcx] + mov word, ptr, [rdx], cx + #NO_APP + lea rcx, [rsi, +, 34] + lea rdx, [rsp, +, 34] + #APP + mov cx, word, ptr, [rcx] + mov word, ptr, [rdx], cx + #NO_APP + lea rcx, [rsi, +, 32] + lea rdx, [rsp, +, 32] + #APP + mov cx, word, ptr, [rcx] + mov word, ptr, [rdx], cx + #NO_APP + lea rcx, [rsi, +, 30] + lea rdx, [rsp, +, 30] + #APP + mov cx, word, ptr, [rcx] + mov word, ptr, [rdx], cx + #NO_APP + lea rcx, [rsi, +, 28] + lea rdx, [rsp, +, 28] + #APP + mov cx, word, ptr, [rcx] + mov word, ptr, [rdx], cx + #NO_APP + lea rcx, [rsi, +, 26] + lea rdx, [rsp, +, 26] + #APP + mov cx, word, ptr, [rcx] + mov word, ptr, [rdx], cx + #NO_APP + lea rcx, [rsi, +, 24] + lea rdx, [rsp, +, 24] + #APP + mov cx, word, ptr, [rcx] + mov word, ptr, [rdx], cx + #NO_APP + lea rcx, [rsi, +, 22] + lea rdx, [rsp, +, 22] + #APP + mov cx, word, ptr, [rcx] + mov word, ptr, [rdx], cx + #NO_APP + lea rcx, [rsi, +, 20] + lea rdx, [rsp, +, 20] + #APP + mov cx, word, ptr, [rcx] + mov word, ptr, [rdx], cx + #NO_APP + lea rcx, [rsi, +, 18] + lea rdx, [rsp, +, 18] + #APP + mov cx, word, ptr, [rcx] + mov word, ptr, [rdx], cx + #NO_APP + lea rcx, [rsi, +, 16] + lea rdx, [rsp, +, 16] + #APP + mov cx, word, ptr, [rcx] + mov word, ptr, [rdx], cx + #NO_APP + lea rcx, [rsi, +, 14] + lea rdx, [rsp, +, 14] + #APP + mov cx, word, ptr, [rcx] + mov word, ptr, [rdx], cx + #NO_APP + lea rcx, [rsi, +, 12] + lea rdx, [rsp, +, 12] + #APP + mov cx, word, ptr, [rcx] + mov word, ptr, [rdx], cx + #NO_APP + lea rcx, [rsi, +, 10] + lea rdx, [rsp, +, 10] + #APP + mov cx, word, ptr, [rcx] + mov word, ptr, [rdx], cx + #NO_APP + lea rcx, [rsi, +, 8] + lea rdx, [rsp, +, 8] + #APP + mov cx, word, ptr, [rcx] + mov word, ptr, [rdx], cx + #NO_APP + lea rcx, [rsi, +, 6] + lea rdx, [rsp, +, 6] + #APP + mov cx, word, ptr, [rcx] + mov word, ptr, [rdx], cx + #NO_APP + lea rcx, [rsi, +, 4] + lea rdx, [rsp, +, 4] + #APP + mov cx, word, ptr, [rcx] + mov word, ptr, [rdx], cx + #NO_APP + lea rcx, [rsi, +, 2] + lea rdx, [rsp, +, 2] + #APP + mov cx, word, ptr, [rcx] + mov word, ptr, [rdx], cx + #NO_APP + mov rcx, rsp + #APP + mov dx, word, ptr, [rsi] + mov word, ptr, [rcx], dx + #NO_APP + movups xmm0, xmmword, ptr, [rsp] + movups xmm1, xmmword, ptr, [rsp, +, 16] + movups xmm2, xmmword, ptr, [rsp, +, 32] + movups xmm3, xmmword, ptr, [rsp, +, 48] + movups xmmword, ptr, [rdi], xmm0 + movups xmmword, ptr, [rdi, +, 16], xmm1 + movups xmmword, ptr, [rdi, +, 32], xmm2 + movups xmmword, ptr, [rdi, +, 48], xmm3 #MEMBARRIER add rsp, 64 ret diff --git a/tests/asm-test/asm/x86_64-unknown-linux-gnu/atomic_memcpy_load_align4 b/tests/asm-test/asm/x86_64-unknown-linux-gnu/atomic_memcpy_load_align4 index 2ca5a04..2bb7441 100644 --- a/tests/asm-test/asm/x86_64-unknown-linux-gnu/atomic_memcpy_load_align4 +++ b/tests/asm-test/asm/x86_64-unknown-linux-gnu/atomic_memcpy_load_align4 @@ -1,157 +1,109 @@ asm_test::atomic_memcpy_load_align4::acquire: sub rsp, 64 mov rax, rdi - lea rcx, [rsi, +, 7] - and rcx, -8 - mov r9, rcx - sub r9, rsi - cmp r9, 64 - ja .LBB8_6 - test r9, r9 - je .LBB8_2 - mov rdx, rsi - not rdx - add rcx, rdx - mov r8d, r9d - and r8d, 3 - cmp rcx, 3 - jae .LBB8_21 - mov r11d, 64 - xor edi, edi - test r8, r8 - jne .LBB8_10 - jmp .LBB8_13 -.LBB8_6: - mov ecx, dword, ptr, [rsi, +, 60] - mov dword, ptr, [rsp, +, 60], ecx - mov ecx, dword, ptr, [rsi, +, 56] - mov dword, ptr, [rsp, +, 56], ecx - mov ecx, dword, ptr, [rsi, +, 52] - mov dword, ptr, [rsp, +, 52], ecx - mov ecx, dword, ptr, [rsi, +, 48] - mov dword, ptr, [rsp, +, 48], ecx - mov ecx, dword, ptr, [rsi, +, 44] - mov dword, ptr, [rsp, +, 44], ecx - mov ecx, dword, ptr, [rsi, +, 40] - mov dword, ptr, [rsp, +, 40], ecx - mov ecx, dword, ptr, [rsi, +, 36] - mov dword, ptr, [rsp, +, 36], ecx - mov ecx, dword, ptr, [rsi, +, 32] - mov dword, ptr, [rsp, +, 32], ecx - mov ecx, dword, ptr, [rsi, +, 28] - mov dword, ptr, [rsp, +, 28], ecx - mov ecx, dword, ptr, [rsi, +, 24] - mov dword, ptr, [rsp, +, 24], ecx - mov ecx, dword, ptr, [rsi, +, 20] - mov dword, ptr, [rsp, +, 20], ecx - mov ecx, dword, ptr, [rsi, +, 16] - mov dword, ptr, [rsp, +, 16], ecx - mov ecx, dword, ptr, [rsi, +, 12] - mov dword, ptr, [rsp, +, 12], ecx - mov ecx, dword, ptr, [rsi, +, 8] - mov dword, ptr, [rsp, +, 8], ecx - mov ecx, dword, ptr, [rsi, +, 4] - mov dword, ptr, [rsp, +, 4], ecx - mov ecx, dword, ptr, [rsi] - mov dword, ptr, [rsp], ecx - jmp .LBB8_7 -.LBB8_2: - mov r11d, 64 - xor r9d, r9d - jmp .LBB8_3 -.LBB8_21: - mov rcx, r9 - and rcx, -4 - xor edi, edi -.LBB8_22: - movzx edx, byte, ptr, [rsi, +, rdi] - mov byte, ptr, [rsp, +, rdi], dl - movzx edx, byte, ptr, [rsi, +, rdi, +, 1] - mov byte, ptr, [rsp, +, rdi, +, 1], dl - movzx edx, byte, ptr, [rsi, +, rdi, +, 2] - mov byte, ptr, [rsp, +, rdi, +, 2], dl - movzx edx, byte, ptr, [rsi, +, rdi, +, 3] - mov byte, ptr, [rsp, +, rdi, +, 3], dl - add rdi, 4 - cmp rcx, rdi - jne .LBB8_22 - mov r11d, 64 - sub r11, rdi - test r8, r8 - je .LBB8_13 -.LBB8_10: - lea r10, [rsp, +, rdi] - add rdi, rsi - xor edx, edx -.LBB8_11: - movzx ecx, byte, ptr, [rdi, +, rdx] - mov byte, ptr, [r10, +, rdx], cl - add rdx, 1 - cmp r8, rdx - jne .LBB8_11 - sub r11, rdx -.LBB8_13: - cmp r11, 8 - jb .LBB8_14 -.LBB8_3: - mov rcx, qword, ptr, [rsi, +, r9] - mov qword, ptr, [rsp, +, r9], rcx - add r11, -8 - add r9, 8 - cmp r11, 7 - ja .LBB8_3 -.LBB8_14: - test r11, r11 - je .LBB8_7 - lea r8, [r11, -, 1] - mov rdx, r11 - mov rdi, r9 - and rdx, 3 - je .LBB8_18 - mov rdi, r9 -.LBB8_17: - movzx ecx, byte, ptr, [rsi, +, rdi] - mov byte, ptr, [rsp, +, rdi], cl - add rdi, 1 - add rdx, -1 - jne .LBB8_17 -.LBB8_18: - cmp r8, 3 - jb .LBB8_7 - add r11, r9 -.LBB8_20: - movzx ecx, byte, ptr, [rsi, +, rdi] - mov byte, ptr, [rsp, +, rdi], cl - movzx ecx, byte, ptr, [rsi, +, rdi, +, 1] - mov byte, ptr, [rsp, +, rdi, +, 1], cl - movzx ecx, byte, ptr, [rsi, +, rdi, +, 2] - mov byte, ptr, [rsp, +, rdi, +, 2], cl - movzx ecx, byte, ptr, [rsi, +, rdi, +, 3] - mov byte, ptr, [rsp, +, rdi, +, 3], cl - add rdi, 4 - cmp r11, rdi - jne .LBB8_20 -.LBB8_7: - mov r8d, dword, ptr, [rsp] - mov r9d, dword, ptr, [rsp, +, 4] - mov r10d, dword, ptr, [rsp, +, 8] - mov r11d, dword, ptr, [rsp, +, 12] - mov ecx, dword, ptr, [rsp, +, 16] - mov edx, dword, ptr, [rsp, +, 20] - mov esi, dword, ptr, [rsp, +, 24] - mov edi, dword, ptr, [rsp, +, 28] - movups xmm0, xmmword, ptr, [rsp, +, 32] - movups xmm1, xmmword, ptr, [rsp, +, 48] - movups xmmword, ptr, [rax, +, 48], xmm1 - movups xmmword, ptr, [rax, +, 32], xmm0 - mov dword, ptr, [rax, +, 16], ecx - mov dword, ptr, [rax, +, 20], edx - mov dword, ptr, [rax, +, 24], esi - mov dword, ptr, [rax, +, 28], edi - mov dword, ptr, [rax], r8d - mov dword, ptr, [rax, +, 4], r9d - mov dword, ptr, [rax, +, 8], r10d - mov dword, ptr, [rax, +, 12], r11d + lea rcx, [rsi, +, 60] + lea rdx, [rsp, +, 60] + #APP + mov ecx, dword, ptr, [rcx] + mov dword, ptr, [rdx], ecx + #NO_APP + lea rcx, [rsi, +, 56] + lea rdx, [rsp, +, 56] + #APP + mov ecx, dword, ptr, [rcx] + mov dword, ptr, [rdx], ecx + #NO_APP + lea rcx, [rsi, +, 52] + lea rdx, [rsp, +, 52] + #APP + mov ecx, dword, ptr, [rcx] + mov dword, ptr, [rdx], ecx + #NO_APP + lea rcx, [rsi, +, 48] + lea rdx, [rsp, +, 48] + #APP + mov ecx, dword, ptr, [rcx] + mov dword, ptr, [rdx], ecx + #NO_APP + lea rcx, [rsi, +, 44] + lea rdx, [rsp, +, 44] + #APP + mov ecx, dword, ptr, [rcx] + mov dword, ptr, [rdx], ecx + #NO_APP + lea rcx, [rsi, +, 40] + lea rdx, [rsp, +, 40] + #APP + mov ecx, dword, ptr, [rcx] + mov dword, ptr, [rdx], ecx + #NO_APP + lea rcx, [rsi, +, 36] + lea rdx, [rsp, +, 36] + #APP + mov ecx, dword, ptr, [rcx] + mov dword, ptr, [rdx], ecx + #NO_APP + lea rcx, [rsi, +, 32] + lea rdx, [rsp, +, 32] + #APP + mov ecx, dword, ptr, [rcx] + mov dword, ptr, [rdx], ecx + #NO_APP + lea rcx, [rsi, +, 28] + lea rdx, [rsp, +, 28] + #APP + mov ecx, dword, ptr, [rcx] + mov dword, ptr, [rdx], ecx + #NO_APP + lea rcx, [rsi, +, 24] + lea rdx, [rsp, +, 24] + #APP + mov ecx, dword, ptr, [rcx] + mov dword, ptr, [rdx], ecx + #NO_APP + lea rcx, [rsi, +, 20] + lea rdx, [rsp, +, 20] + #APP + mov ecx, dword, ptr, [rcx] + mov dword, ptr, [rdx], ecx + #NO_APP + lea rcx, [rsi, +, 16] + lea rdx, [rsp, +, 16] + #APP + mov ecx, dword, ptr, [rcx] + mov dword, ptr, [rdx], ecx + #NO_APP + lea rcx, [rsi, +, 12] + lea rdx, [rsp, +, 12] + #APP + mov ecx, dword, ptr, [rcx] + mov dword, ptr, [rdx], ecx + #NO_APP + lea rcx, [rsi, +, 8] + lea rdx, [rsp, +, 8] + #APP + mov ecx, dword, ptr, [rcx] + mov dword, ptr, [rdx], ecx + #NO_APP + lea rcx, [rsi, +, 4] + lea rdx, [rsp, +, 4] + #APP + mov ecx, dword, ptr, [rcx] + mov dword, ptr, [rdx], ecx + #NO_APP + mov rcx, rsp + #APP + mov edx, dword, ptr, [rsi] + mov dword, ptr, [rcx], edx + #NO_APP + movups xmm0, xmmword, ptr, [rsp] + movups xmm1, xmmword, ptr, [rsp, +, 16] + movups xmm2, xmmword, ptr, [rsp, +, 32] + movups xmm3, xmmword, ptr, [rsp, +, 48] + movups xmmword, ptr, [rdi], xmm0 + movups xmmword, ptr, [rdi, +, 16], xmm1 + movups xmmword, ptr, [rdi, +, 32], xmm2 + movups xmmword, ptr, [rdi, +, 48], xmm3 #MEMBARRIER add rsp, 64 ret diff --git a/tests/asm-test/asm/x86_64-unknown-linux-gnu/atomic_memcpy_load_align8 b/tests/asm-test/asm/x86_64-unknown-linux-gnu/atomic_memcpy_load_align8 index 04cbab5..633f0bf 100644 --- a/tests/asm-test/asm/x86_64-unknown-linux-gnu/atomic_memcpy_load_align8 +++ b/tests/asm-test/asm/x86_64-unknown-linux-gnu/atomic_memcpy_load_align8 @@ -1,22 +1,63 @@ asm_test::atomic_memcpy_load_align8::acquire: + sub rsp, 64 mov rax, rdi - mov r8, qword, ptr, [rsi, +, 56] - mov r9, qword, ptr, [rsi, +, 48] - mov r10, qword, ptr, [rsi, +, 40] - mov r11, qword, ptr, [rsi, +, 32] - mov rdx, qword, ptr, [rsi, +, 24] - mov rdi, qword, ptr, [rsi, +, 16] - mov rcx, qword, ptr, [rsi, +, 8] - mov rsi, qword, ptr, [rsi] - mov qword, ptr, [rax], rsi - mov qword, ptr, [rax, +, 8], rcx - mov qword, ptr, [rax, +, 16], rdi - mov qword, ptr, [rax, +, 24], rdx - mov qword, ptr, [rax, +, 32], r11 - mov qword, ptr, [rax, +, 40], r10 - mov qword, ptr, [rax, +, 48], r9 - mov qword, ptr, [rax, +, 56], r8 + lea rcx, [rsi, +, 56] + lea rdx, [rsp, +, 56] + #APP + mov rcx, qword, ptr, [rcx] + mov qword, ptr, [rdx], rcx + #NO_APP + lea rcx, [rsi, +, 48] + lea rdx, [rsp, +, 48] + #APP + mov rcx, qword, ptr, [rcx] + mov qword, ptr, [rdx], rcx + #NO_APP + lea rcx, [rsi, +, 40] + lea rdx, [rsp, +, 40] + #APP + mov rcx, qword, ptr, [rcx] + mov qword, ptr, [rdx], rcx + #NO_APP + lea rcx, [rsi, +, 32] + lea rdx, [rsp, +, 32] + #APP + mov rcx, qword, ptr, [rcx] + mov qword, ptr, [rdx], rcx + #NO_APP + lea rcx, [rsi, +, 24] + lea rdx, [rsp, +, 24] + #APP + mov rcx, qword, ptr, [rcx] + mov qword, ptr, [rdx], rcx + #NO_APP + lea rcx, [rsi, +, 16] + lea rdx, [rsp, +, 16] + #APP + mov rcx, qword, ptr, [rcx] + mov qword, ptr, [rdx], rcx + #NO_APP + lea rcx, [rsi, +, 8] + lea rdx, [rsp, +, 8] + #APP + mov rcx, qword, ptr, [rcx] + mov qword, ptr, [rdx], rcx + #NO_APP + mov rcx, rsp + #APP + mov rdx, qword, ptr, [rsi] + mov qword, ptr, [rcx], rdx + #NO_APP + movups xmm0, xmmword, ptr, [rsp] + movups xmm1, xmmword, ptr, [rsp, +, 16] + movups xmm2, xmmword, ptr, [rsp, +, 32] + movups xmm3, xmmword, ptr, [rsp, +, 48] + movups xmmword, ptr, [rdi], xmm0 + movups xmmword, ptr, [rdi, +, 16], xmm1 + movups xmmword, ptr, [rdi, +, 32], xmm2 + movups xmmword, ptr, [rdi, +, 48], xmm3 #MEMBARRIER + add rsp, 64 ret asm_test::atomic_memcpy_load_align8::read_volatile_acquire_fence: mov rax, rdi diff --git a/tests/asm-test/asm/x86_64-unknown-linux-gnu/atomic_memcpy_store_align1 b/tests/asm-test/asm/x86_64-unknown-linux-gnu/atomic_memcpy_store_align1 index 7638746..b6cf823 100644 --- a/tests/asm-test/asm/x86_64-unknown-linux-gnu/atomic_memcpy_store_align1 +++ b/tests/asm-test/asm/x86_64-unknown-linux-gnu/atomic_memcpy_store_align1 @@ -17,233 +17,389 @@ asm_test::atomic_memcpy_store_align1::release: movaps xmmword, ptr, [rsp, +, 32], xmm2 movaps xmmword, ptr, [rsp, +, 16], xmm1 movaps xmmword, ptr, [rsp], xmm0 - lea rcx, [rdi, +, 7] - and rcx, -8 - mov r11, rcx - sub r11, rdi - cmp r11, 65 - jae .LBB2_1 - test r11, r11 - je .LBB2_4 - mov rdx, rdi - not rdx - add rcx, rdx - mov r8d, r11d - and r8d, 3 - cmp rcx, 3 - jae .LBB2_21 - mov esi, 64 - xor ecx, ecx - test r8, r8 - jne .LBB2_10 - jmp .LBB2_13 -.LBB2_1: - mov al, byte, ptr, [rsp] + lea r8, [rsp, +, 16] + lea r9, [rsp, +, 32] + lea rdx, [rsp, +, 48] + lea rsi, [rdi, +, 63] + lea rax, [rsp, +, 63] + #APP + mov al, byte, ptr, [rax] + mov byte, ptr, [rsi], al + #NO_APP + lea rax, [rdi, +, 62] + lea rsi, [rsp, +, 62] + #APP + mov cl, byte, ptr, [rsi] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 61] + lea rcx, [rsp, +, 61] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 60] + lea rcx, [rsp, +, 60] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 59] + lea rcx, [rsp, +, 59] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 58] + lea rcx, [rsp, +, 58] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 57] + lea rcx, [rsp, +, 57] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 56] + lea rcx, [rsp, +, 56] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 55] + lea rcx, [rsp, +, 55] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 54] + lea rcx, [rsp, +, 54] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 53] + lea rcx, [rsp, +, 53] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 52] + lea rcx, [rsp, +, 52] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 51] + lea rcx, [rsp, +, 51] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 50] + lea rcx, [rsp, +, 50] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 49] + lea rcx, [rsp, +, 49] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 48] + #APP + mov cl, byte, ptr, [rdx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 47] + lea rcx, [rsp, +, 47] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 46] + lea rcx, [rsp, +, 46] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 45] + lea rcx, [rsp, +, 45] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 44] + lea rcx, [rsp, +, 44] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 43] + lea rcx, [rsp, +, 43] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 42] + lea rcx, [rsp, +, 42] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 41] + lea rcx, [rsp, +, 41] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 40] + lea rcx, [rsp, +, 40] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 39] + lea rcx, [rsp, +, 39] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 38] + lea rcx, [rsp, +, 38] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 37] + lea rcx, [rsp, +, 37] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 36] + lea rcx, [rsp, +, 36] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 35] + lea rcx, [rsp, +, 35] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 34] + lea rcx, [rsp, +, 34] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 33] + lea rcx, [rsp, +, 33] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 32] + #APP + mov cl, byte, ptr, [r9] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 31] + lea rcx, [rsp, +, 31] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 30] + lea rcx, [rsp, +, 30] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 29] + lea rcx, [rsp, +, 29] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 28] + lea rcx, [rsp, +, 28] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 27] + lea rcx, [rsp, +, 27] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 26] + lea rcx, [rsp, +, 26] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 25] + lea rcx, [rsp, +, 25] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 24] + lea rcx, [rsp, +, 24] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 23] + lea rcx, [rsp, +, 23] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 22] + lea rcx, [rsp, +, 22] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 21] + lea rcx, [rsp, +, 21] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 20] + lea rcx, [rsp, +, 20] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 19] + lea rcx, [rsp, +, 19] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 18] + lea rcx, [rsp, +, 18] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 17] + lea rcx, [rsp, +, 17] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 16] + #APP + mov cl, byte, ptr, [r8] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 15] + lea rcx, [rsp, +, 15] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 14] + lea rcx, [rsp, +, 14] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 13] + lea rcx, [rsp, +, 13] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 12] + lea rcx, [rsp, +, 12] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 11] + lea rcx, [rsp, +, 11] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 10] + lea rcx, [rsp, +, 10] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 9] + lea rcx, [rsp, +, 9] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 8] + lea rcx, [rsp, +, 8] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 7] + lea rcx, [rsp, +, 7] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 6] + lea rcx, [rsp, +, 6] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 5] + lea rcx, [rsp, +, 5] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 4] + lea rcx, [rsp, +, 4] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 3] + lea rcx, [rsp, +, 3] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 2] + lea rcx, [rsp, +, 2] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + lea rax, [rdi, +, 1] + lea rcx, [rsp, +, 1] + #APP + mov cl, byte, ptr, [rcx] + mov byte, ptr, [rax], cl + #NO_APP + mov rax, rsp + #APP + mov al, byte, ptr, [rax] mov byte, ptr, [rdi], al - mov al, byte, ptr, [rsp, +, 1] - mov byte, ptr, [rdi, +, 1], al - mov al, byte, ptr, [rsp, +, 2] - mov byte, ptr, [rdi, +, 2], al - mov al, byte, ptr, [rsp, +, 3] - mov byte, ptr, [rdi, +, 3], al - mov al, byte, ptr, [rsp, +, 4] - mov byte, ptr, [rdi, +, 4], al - mov al, byte, ptr, [rsp, +, 5] - mov byte, ptr, [rdi, +, 5], al - mov al, byte, ptr, [rsp, +, 6] - mov byte, ptr, [rdi, +, 6], al - mov al, byte, ptr, [rsp, +, 7] - mov byte, ptr, [rdi, +, 7], al - mov al, byte, ptr, [rsp, +, 8] - mov byte, ptr, [rdi, +, 8], al - mov al, byte, ptr, [rsp, +, 9] - mov byte, ptr, [rdi, +, 9], al - mov al, byte, ptr, [rsp, +, 10] - mov byte, ptr, [rdi, +, 10], al - mov al, byte, ptr, [rsp, +, 11] - mov byte, ptr, [rdi, +, 11], al - mov al, byte, ptr, [rsp, +, 12] - mov byte, ptr, [rdi, +, 12], al - mov al, byte, ptr, [rsp, +, 13] - mov byte, ptr, [rdi, +, 13], al - mov al, byte, ptr, [rsp, +, 14] - mov byte, ptr, [rdi, +, 14], al - mov al, byte, ptr, [rsp, +, 15] - mov byte, ptr, [rdi, +, 15], al - mov al, byte, ptr, [rsp, +, 16] - mov byte, ptr, [rdi, +, 16], al - mov al, byte, ptr, [rsp, +, 17] - mov byte, ptr, [rdi, +, 17], al - mov al, byte, ptr, [rsp, +, 18] - mov byte, ptr, [rdi, +, 18], al - mov al, byte, ptr, [rsp, +, 19] - mov byte, ptr, [rdi, +, 19], al - mov al, byte, ptr, [rsp, +, 20] - mov byte, ptr, [rdi, +, 20], al - mov al, byte, ptr, [rsp, +, 21] - mov byte, ptr, [rdi, +, 21], al - mov al, byte, ptr, [rsp, +, 22] - mov byte, ptr, [rdi, +, 22], al - mov al, byte, ptr, [rsp, +, 23] - mov byte, ptr, [rdi, +, 23], al - mov al, byte, ptr, [rsp, +, 24] - mov byte, ptr, [rdi, +, 24], al - mov al, byte, ptr, [rsp, +, 25] - mov byte, ptr, [rdi, +, 25], al - mov al, byte, ptr, [rsp, +, 26] - mov byte, ptr, [rdi, +, 26], al - mov al, byte, ptr, [rsp, +, 27] - mov byte, ptr, [rdi, +, 27], al - mov al, byte, ptr, [rsp, +, 28] - mov byte, ptr, [rdi, +, 28], al - mov al, byte, ptr, [rsp, +, 29] - mov byte, ptr, [rdi, +, 29], al - mov al, byte, ptr, [rsp, +, 30] - mov byte, ptr, [rdi, +, 30], al - mov al, byte, ptr, [rsp, +, 31] - mov byte, ptr, [rdi, +, 31], al - mov al, byte, ptr, [rsp, +, 32] - mov byte, ptr, [rdi, +, 32], al - mov al, byte, ptr, [rsp, +, 33] - mov byte, ptr, [rdi, +, 33], al - mov al, byte, ptr, [rsp, +, 34] - mov byte, ptr, [rdi, +, 34], al - mov al, byte, ptr, [rsp, +, 35] - mov byte, ptr, [rdi, +, 35], al - mov al, byte, ptr, [rsp, +, 36] - mov byte, ptr, [rdi, +, 36], al - mov al, byte, ptr, [rsp, +, 37] - mov byte, ptr, [rdi, +, 37], al - mov al, byte, ptr, [rsp, +, 38] - mov byte, ptr, [rdi, +, 38], al - mov al, byte, ptr, [rsp, +, 39] - mov byte, ptr, [rdi, +, 39], al - mov al, byte, ptr, [rsp, +, 40] - mov byte, ptr, [rdi, +, 40], al - mov al, byte, ptr, [rsp, +, 41] - mov byte, ptr, [rdi, +, 41], al - mov al, byte, ptr, [rsp, +, 42] - mov byte, ptr, [rdi, +, 42], al - mov al, byte, ptr, [rsp, +, 43] - mov byte, ptr, [rdi, +, 43], al - mov al, byte, ptr, [rsp, +, 44] - mov byte, ptr, [rdi, +, 44], al - mov al, byte, ptr, [rsp, +, 45] - mov byte, ptr, [rdi, +, 45], al - mov al, byte, ptr, [rsp, +, 46] - mov byte, ptr, [rdi, +, 46], al - mov al, byte, ptr, [rsp, +, 47] - mov byte, ptr, [rdi, +, 47], al - mov al, byte, ptr, [rsp, +, 48] - mov byte, ptr, [rdi, +, 48], al - mov al, byte, ptr, [rsp, +, 49] - mov byte, ptr, [rdi, +, 49], al - mov al, byte, ptr, [rsp, +, 50] - mov byte, ptr, [rdi, +, 50], al - mov al, byte, ptr, [rsp, +, 51] - mov byte, ptr, [rdi, +, 51], al - mov al, byte, ptr, [rsp, +, 52] - mov byte, ptr, [rdi, +, 52], al - mov al, byte, ptr, [rsp, +, 53] - mov byte, ptr, [rdi, +, 53], al - mov al, byte, ptr, [rsp, +, 54] - mov byte, ptr, [rdi, +, 54], al - mov al, byte, ptr, [rsp, +, 55] - mov byte, ptr, [rdi, +, 55], al - mov al, byte, ptr, [rsp, +, 56] - mov byte, ptr, [rdi, +, 56], al - mov al, byte, ptr, [rsp, +, 57] - mov byte, ptr, [rdi, +, 57], al - mov al, byte, ptr, [rsp, +, 58] - mov byte, ptr, [rdi, +, 58], al - mov al, byte, ptr, [rsp, +, 59] - mov byte, ptr, [rdi, +, 59], al - mov al, byte, ptr, [rsp, +, 60] - mov byte, ptr, [rdi, +, 60], al - mov al, byte, ptr, [rsp, +, 61] - mov byte, ptr, [rdi, +, 61], al - mov al, byte, ptr, [rsp, +, 62] - mov byte, ptr, [rdi, +, 62], al - mov al, byte, ptr, [rsp, +, 63] - mov byte, ptr, [rdi, +, 63], al - jmp .LBB2_2 -.LBB2_4: - mov esi, 64 - xor r11d, r11d - jmp .LBB2_5 -.LBB2_21: - mov rsi, r11 - and rsi, -4 - xor ecx, ecx -.LBB2_22: - movzx edx, byte, ptr, [rsp, +, rcx] - mov byte, ptr, [rdi, +, rcx], dl - movzx edx, byte, ptr, [rsp, +, rcx, +, 1] - mov byte, ptr, [rdi, +, rcx, +, 1], dl - movzx edx, byte, ptr, [rsp, +, rcx, +, 2] - mov byte, ptr, [rdi, +, rcx, +, 2], dl - movzx edx, byte, ptr, [rsp, +, rcx, +, 3] - mov byte, ptr, [rdi, +, rcx, +, 3], dl - add rcx, 4 - cmp rsi, rcx - jne .LBB2_22 - mov esi, 64 - sub rsi, rcx - test r8, r8 - je .LBB2_13 -.LBB2_10: - lea r9, [rdi, +, rcx] - lea r10, [rsp, +, rcx] - xor ecx, ecx -.LBB2_11: - movzx edx, byte, ptr, [r10, +, rcx] - mov byte, ptr, [r9, +, rcx], dl - add rcx, 1 - cmp r8, rcx - jne .LBB2_11 - sub rsi, rcx -.LBB2_13: - cmp rsi, 8 - jb .LBB2_14 -.LBB2_5: - mov rax, qword, ptr, [rsp, +, r11] - mov qword, ptr, [rdi, +, r11], rax - add rsi, -8 - add r11, 8 - cmp rsi, 7 - ja .LBB2_5 -.LBB2_14: - test rsi, rsi - je .LBB2_2 - lea r8, [rsi, -, 1] - mov rcx, rsi - mov rdx, r11 - and rcx, 3 - je .LBB2_18 - mov rdx, r11 -.LBB2_17: - movzx eax, byte, ptr, [rsp, +, rdx] - mov byte, ptr, [rdi, +, rdx], al - add rdx, 1 - add rcx, -1 - jne .LBB2_17 -.LBB2_18: - cmp r8, 3 - jb .LBB2_2 - add rsi, r11 -.LBB2_20: - movzx eax, byte, ptr, [rsp, +, rdx] - mov byte, ptr, [rdi, +, rdx], al - movzx eax, byte, ptr, [rsp, +, rdx, +, 1] - mov byte, ptr, [rdi, +, rdx, +, 1], al - movzx eax, byte, ptr, [rsp, +, rdx, +, 2] - mov byte, ptr, [rdi, +, rdx, +, 2], al - movzx eax, byte, ptr, [rsp, +, rdx, +, 3] - mov byte, ptr, [rdi, +, rdx, +, 3], al - add rdx, 4 - cmp rsi, rdx - jne .LBB2_20 -.LBB2_2: + #NO_APP add rsp, 136 ret asm_test::atomic_memcpy_store_align1::write_volatile_release_fence: diff --git a/tests/asm-test/asm/x86_64-unknown-linux-gnu/atomic_memcpy_store_align16 b/tests/asm-test/asm/x86_64-unknown-linux-gnu/atomic_memcpy_store_align16 index a2eddbe..7234931 100644 --- a/tests/asm-test/asm/x86_64-unknown-linux-gnu/atomic_memcpy_store_align16 +++ b/tests/asm-test/asm/x86_64-unknown-linux-gnu/atomic_memcpy_store_align16 @@ -1,21 +1,70 @@ asm_test::atomic_memcpy_store_align16::release: - mov r8, qword, ptr, [rsi] - mov r9, qword, ptr, [rsi, +, 8] - mov r10, qword, ptr, [rsi, +, 16] - mov r11, qword, ptr, [rsi, +, 24] - mov rcx, qword, ptr, [rsi, +, 32] - mov rdx, qword, ptr, [rsi, +, 40] - mov rax, qword, ptr, [rsi, +, 48] - mov rsi, qword, ptr, [rsi, +, 56] + sub rsp, 136 + movaps xmm0, xmmword, ptr, [rsi] + movaps xmm1, xmmword, ptr, [rsi, +, 16] + movaps xmm2, xmmword, ptr, [rsi, +, 32] + movaps xmm3, xmmword, ptr, [rsi, +, 48] + movaps xmmword, ptr, [rsp, +, 112], xmm3 + movaps xmmword, ptr, [rsp, +, 96], xmm2 + movaps xmmword, ptr, [rsp, +, 80], xmm1 + movaps xmmword, ptr, [rsp, +, 64], xmm0 #MEMBARRIER - mov qword, ptr, [rdi, +, 56], rsi - mov qword, ptr, [rdi, +, 48], rax - mov qword, ptr, [rdi, +, 40], rdx - mov qword, ptr, [rdi, +, 32], rcx - mov qword, ptr, [rdi, +, 24], r11 - mov qword, ptr, [rdi, +, 16], r10 - mov qword, ptr, [rdi, +, 8], r9 - mov qword, ptr, [rdi], r8 + movaps xmm0, xmmword, ptr, [rsp, +, 64] + movaps xmm1, xmmword, ptr, [rsp, +, 80] + movaps xmm2, xmmword, ptr, [rsp, +, 96] + movaps xmm3, xmmword, ptr, [rsp, +, 112] + movaps xmmword, ptr, [rsp, +, 48], xmm3 + movaps xmmword, ptr, [rsp, +, 32], xmm2 + movaps xmmword, ptr, [rsp, +, 16], xmm1 + movaps xmmword, ptr, [rsp], xmm0 + lea r8, [rsp, +, 16] + lea rcx, [rsp, +, 32] + lea rdx, [rsp, +, 48] + lea rsi, [rdi, +, 56] + lea rax, [rsp, +, 56] + #APP + mov rax, qword, ptr, [rax] + mov qword, ptr, [rsi], rax + #NO_APP + lea rax, [rdi, +, 48] + #APP + mov rdx, qword, ptr, [rdx] + mov qword, ptr, [rax], rdx + #NO_APP + lea rax, [rdi, +, 40] + lea rdx, [rsp, +, 40] + #APP + mov rdx, qword, ptr, [rdx] + mov qword, ptr, [rax], rdx + #NO_APP + lea rax, [rdi, +, 32] + #APP + mov rcx, qword, ptr, [rcx] + mov qword, ptr, [rax], rcx + #NO_APP + lea rax, [rdi, +, 24] + lea rcx, [rsp, +, 24] + #APP + mov rcx, qword, ptr, [rcx] + mov qword, ptr, [rax], rcx + #NO_APP + lea rax, [rdi, +, 16] + #APP + mov rcx, qword, ptr, [r8] + mov qword, ptr, [rax], rcx + #NO_APP + lea rax, [rdi, +, 8] + lea rcx, [rsp, +, 8] + #APP + mov rcx, qword, ptr, [rcx] + mov qword, ptr, [rax], rcx + #NO_APP + mov rax, rsp + #APP + mov rax, qword, ptr, [rax] + mov qword, ptr, [rdi], rax + #NO_APP + add rsp, 136 ret asm_test::atomic_memcpy_store_align16::write_volatile_release_fence: sub rsp, 72 diff --git a/tests/asm-test/asm/x86_64-unknown-linux-gnu/atomic_memcpy_store_align2 b/tests/asm-test/asm/x86_64-unknown-linux-gnu/atomic_memcpy_store_align2 index 0066477..af3da38 100644 --- a/tests/asm-test/asm/x86_64-unknown-linux-gnu/atomic_memcpy_store_align2 +++ b/tests/asm-test/asm/x86_64-unknown-linux-gnu/atomic_memcpy_store_align2 @@ -17,169 +17,197 @@ asm_test::atomic_memcpy_store_align2::release: movaps xmmword, ptr, [rsp, +, 32], xmm2 movaps xmmword, ptr, [rsp, +, 16], xmm1 movaps xmmword, ptr, [rsp], xmm0 - lea rcx, [rdi, +, 7] - and rcx, -8 - mov r11, rcx - sub r11, rdi - cmp r11, 64 - ja .LBB6_21 - test r11, r11 - je .LBB6_2 - mov rdx, rdi - not rdx - add rcx, rdx - mov r8d, r11d - and r8d, 3 - cmp rcx, 3 - jae .LBB6_19 - mov esi, 64 - xor ecx, ecx - test r8, r8 - jne .LBB6_8 - jmp .LBB6_11 -.LBB6_21: - movzx eax, word, ptr, [rsp, +, 62] - mov word, ptr, [rdi, +, 62], ax - movzx eax, word, ptr, [rsp, +, 60] - mov word, ptr, [rdi, +, 60], ax - movzx eax, word, ptr, [rsp, +, 58] - mov word, ptr, [rdi, +, 58], ax - movzx eax, word, ptr, [rsp, +, 56] - mov word, ptr, [rdi, +, 56], ax - movzx eax, word, ptr, [rsp, +, 54] - mov word, ptr, [rdi, +, 54], ax - movzx eax, word, ptr, [rsp, +, 52] - mov word, ptr, [rdi, +, 52], ax - movzx eax, word, ptr, [rsp, +, 50] - mov word, ptr, [rdi, +, 50], ax - movzx eax, word, ptr, [rsp, +, 48] - mov word, ptr, [rdi, +, 48], ax - movzx eax, word, ptr, [rsp, +, 46] - mov word, ptr, [rdi, +, 46], ax - movzx eax, word, ptr, [rsp, +, 44] - mov word, ptr, [rdi, +, 44], ax - movzx eax, word, ptr, [rsp, +, 42] - mov word, ptr, [rdi, +, 42], ax - movzx eax, word, ptr, [rsp, +, 40] - mov word, ptr, [rdi, +, 40], ax - movzx eax, word, ptr, [rsp, +, 38] - mov word, ptr, [rdi, +, 38], ax - movzx eax, word, ptr, [rsp, +, 36] - mov word, ptr, [rdi, +, 36], ax - movzx eax, word, ptr, [rsp, +, 34] - mov word, ptr, [rdi, +, 34], ax - movzx eax, word, ptr, [rsp, +, 32] - mov word, ptr, [rdi, +, 32], ax - movzx eax, word, ptr, [rsp, +, 30] - mov word, ptr, [rdi, +, 30], ax - movzx eax, word, ptr, [rsp, +, 28] - mov word, ptr, [rdi, +, 28], ax - movzx eax, word, ptr, [rsp, +, 26] - mov word, ptr, [rdi, +, 26], ax - movzx eax, word, ptr, [rsp, +, 24] - mov word, ptr, [rdi, +, 24], ax - movzx eax, word, ptr, [rsp, +, 22] - mov word, ptr, [rdi, +, 22], ax - movzx eax, word, ptr, [rsp, +, 20] - mov word, ptr, [rdi, +, 20], ax - movzx eax, word, ptr, [rsp, +, 18] - mov word, ptr, [rdi, +, 18], ax - movzx eax, word, ptr, [rsp, +, 16] - mov word, ptr, [rdi, +, 16], ax - movzx eax, word, ptr, [rsp, +, 14] - mov word, ptr, [rdi, +, 14], ax - movzx eax, word, ptr, [rsp, +, 12] - mov word, ptr, [rdi, +, 12], ax - movzx eax, word, ptr, [rsp, +, 10] - mov word, ptr, [rdi, +, 10], ax - movzx eax, word, ptr, [rsp, +, 8] - mov word, ptr, [rdi, +, 8], ax - movzx eax, word, ptr, [rsp, +, 6] - mov word, ptr, [rdi, +, 6], ax - movzx eax, word, ptr, [rsp, +, 4] - mov word, ptr, [rdi, +, 4], ax - movzx eax, word, ptr, [rsp, +, 2] - mov word, ptr, [rdi, +, 2], ax - movzx eax, word, ptr, [rsp] + lea r8, [rsp, +, 16] + lea rcx, [rsp, +, 32] + lea rdx, [rsp, +, 48] + lea rsi, [rdi, +, 62] + lea rax, [rsp, +, 62] + #APP + mov ax, word, ptr, [rax] + mov word, ptr, [rsi], ax + #NO_APP + lea rax, [rdi, +, 60] + lea rsi, [rsp, +, 60] + #APP + mov si, word, ptr, [rsi] + mov word, ptr, [rax], si + #NO_APP + lea rax, [rdi, +, 58] + lea rsi, [rsp, +, 58] + #APP + mov si, word, ptr, [rsi] + mov word, ptr, [rax], si + #NO_APP + lea rax, [rdi, +, 56] + lea rsi, [rsp, +, 56] + #APP + mov si, word, ptr, [rsi] + mov word, ptr, [rax], si + #NO_APP + lea rax, [rdi, +, 54] + lea rsi, [rsp, +, 54] + #APP + mov si, word, ptr, [rsi] + mov word, ptr, [rax], si + #NO_APP + lea rax, [rdi, +, 52] + lea rsi, [rsp, +, 52] + #APP + mov si, word, ptr, [rsi] + mov word, ptr, [rax], si + #NO_APP + lea rax, [rdi, +, 50] + lea rsi, [rsp, +, 50] + #APP + mov si, word, ptr, [rsi] + mov word, ptr, [rax], si + #NO_APP + lea rax, [rdi, +, 48] + #APP + mov dx, word, ptr, [rdx] + mov word, ptr, [rax], dx + #NO_APP + lea rax, [rdi, +, 46] + lea rdx, [rsp, +, 46] + #APP + mov dx, word, ptr, [rdx] + mov word, ptr, [rax], dx + #NO_APP + lea rax, [rdi, +, 44] + lea rdx, [rsp, +, 44] + #APP + mov dx, word, ptr, [rdx] + mov word, ptr, [rax], dx + #NO_APP + lea rax, [rdi, +, 42] + lea rdx, [rsp, +, 42] + #APP + mov dx, word, ptr, [rdx] + mov word, ptr, [rax], dx + #NO_APP + lea rax, [rdi, +, 40] + lea rdx, [rsp, +, 40] + #APP + mov dx, word, ptr, [rdx] + mov word, ptr, [rax], dx + #NO_APP + lea rax, [rdi, +, 38] + lea rdx, [rsp, +, 38] + #APP + mov dx, word, ptr, [rdx] + mov word, ptr, [rax], dx + #NO_APP + lea rax, [rdi, +, 36] + lea rdx, [rsp, +, 36] + #APP + mov dx, word, ptr, [rdx] + mov word, ptr, [rax], dx + #NO_APP + lea rax, [rdi, +, 34] + lea rdx, [rsp, +, 34] + #APP + mov dx, word, ptr, [rdx] + mov word, ptr, [rax], dx + #NO_APP + lea rax, [rdi, +, 32] + #APP + mov cx, word, ptr, [rcx] + mov word, ptr, [rax], cx + #NO_APP + lea rax, [rdi, +, 30] + lea rcx, [rsp, +, 30] + #APP + mov cx, word, ptr, [rcx] + mov word, ptr, [rax], cx + #NO_APP + lea rax, [rdi, +, 28] + lea rcx, [rsp, +, 28] + #APP + mov cx, word, ptr, [rcx] + mov word, ptr, [rax], cx + #NO_APP + lea rax, [rdi, +, 26] + lea rcx, [rsp, +, 26] + #APP + mov cx, word, ptr, [rcx] + mov word, ptr, [rax], cx + #NO_APP + lea rax, [rdi, +, 24] + lea rcx, [rsp, +, 24] + #APP + mov cx, word, ptr, [rcx] + mov word, ptr, [rax], cx + #NO_APP + lea rax, [rdi, +, 22] + lea rcx, [rsp, +, 22] + #APP + mov cx, word, ptr, [rcx] + mov word, ptr, [rax], cx + #NO_APP + lea rax, [rdi, +, 20] + lea rcx, [rsp, +, 20] + #APP + mov cx, word, ptr, [rcx] + mov word, ptr, [rax], cx + #NO_APP + lea rax, [rdi, +, 18] + lea rcx, [rsp, +, 18] + #APP + mov cx, word, ptr, [rcx] + mov word, ptr, [rax], cx + #NO_APP + lea rax, [rdi, +, 16] + #APP + mov cx, word, ptr, [r8] + mov word, ptr, [rax], cx + #NO_APP + lea rax, [rdi, +, 14] + lea rcx, [rsp, +, 14] + #APP + mov cx, word, ptr, [rcx] + mov word, ptr, [rax], cx + #NO_APP + lea rax, [rdi, +, 12] + lea rcx, [rsp, +, 12] + #APP + mov cx, word, ptr, [rcx] + mov word, ptr, [rax], cx + #NO_APP + lea rax, [rdi, +, 10] + lea rcx, [rsp, +, 10] + #APP + mov cx, word, ptr, [rcx] + mov word, ptr, [rax], cx + #NO_APP + lea rax, [rdi, +, 8] + lea rcx, [rsp, +, 8] + #APP + mov cx, word, ptr, [rcx] + mov word, ptr, [rax], cx + #NO_APP + lea rax, [rdi, +, 6] + lea rcx, [rsp, +, 6] + #APP + mov cx, word, ptr, [rcx] + mov word, ptr, [rax], cx + #NO_APP + lea rax, [rdi, +, 4] + lea rcx, [rsp, +, 4] + #APP + mov cx, word, ptr, [rcx] + mov word, ptr, [rax], cx + #NO_APP + lea rax, [rdi, +, 2] + lea rcx, [rsp, +, 2] + #APP + mov cx, word, ptr, [rcx] + mov word, ptr, [rax], cx + #NO_APP + mov rax, rsp + #APP + mov ax, word, ptr, [rax] mov word, ptr, [rdi], ax - jmp .LBB6_22 -.LBB6_2: - mov esi, 64 - xor r11d, r11d - jmp .LBB6_3 -.LBB6_19: - mov rsi, r11 - and rsi, -4 - xor ecx, ecx -.LBB6_20: - movzx edx, byte, ptr, [rsp, +, rcx] - mov byte, ptr, [rdi, +, rcx], dl - movzx edx, byte, ptr, [rsp, +, rcx, +, 1] - mov byte, ptr, [rdi, +, rcx, +, 1], dl - movzx edx, byte, ptr, [rsp, +, rcx, +, 2] - mov byte, ptr, [rdi, +, rcx, +, 2], dl - movzx edx, byte, ptr, [rsp, +, rcx, +, 3] - mov byte, ptr, [rdi, +, rcx, +, 3], dl - add rcx, 4 - cmp rsi, rcx - jne .LBB6_20 - mov esi, 64 - sub rsi, rcx - test r8, r8 - je .LBB6_11 -.LBB6_8: - lea r9, [rdi, +, rcx] - lea r10, [rsp, +, rcx] - xor ecx, ecx -.LBB6_9: - movzx edx, byte, ptr, [r10, +, rcx] - mov byte, ptr, [r9, +, rcx], dl - add rcx, 1 - cmp r8, rcx - jne .LBB6_9 - sub rsi, rcx -.LBB6_11: - cmp rsi, 8 - jb .LBB6_12 -.LBB6_3: - mov rax, qword, ptr, [rsp, +, r11] - mov qword, ptr, [rdi, +, r11], rax - add rsi, -8 - add r11, 8 - cmp rsi, 7 - ja .LBB6_3 -.LBB6_12: - test rsi, rsi - je .LBB6_22 - lea r8, [rsi, -, 1] - mov rcx, rsi - mov rdx, r11 - and rcx, 3 - je .LBB6_16 - mov rdx, r11 -.LBB6_15: - movzx eax, byte, ptr, [rsp, +, rdx] - mov byte, ptr, [rdi, +, rdx], al - add rdx, 1 - add rcx, -1 - jne .LBB6_15 -.LBB6_16: - cmp r8, 3 - jb .LBB6_22 - add rsi, r11 -.LBB6_18: - movzx eax, byte, ptr, [rsp, +, rdx] - mov byte, ptr, [rdi, +, rdx], al - movzx eax, byte, ptr, [rsp, +, rdx, +, 1] - mov byte, ptr, [rdi, +, rdx, +, 1], al - movzx eax, byte, ptr, [rsp, +, rdx, +, 2] - mov byte, ptr, [rdi, +, rdx, +, 2], al - movzx eax, byte, ptr, [rsp, +, rdx, +, 3] - mov byte, ptr, [rdi, +, rdx, +, 3], al - add rdx, 4 - cmp rsi, rdx - jne .LBB6_18 -.LBB6_22: + #NO_APP add rsp, 136 ret asm_test::atomic_memcpy_store_align2::write_volatile_release_fence: diff --git a/tests/asm-test/asm/x86_64-unknown-linux-gnu/atomic_memcpy_store_align4 b/tests/asm-test/asm/x86_64-unknown-linux-gnu/atomic_memcpy_store_align4 index bea735c..4ddc18f 100644 --- a/tests/asm-test/asm/x86_64-unknown-linux-gnu/atomic_memcpy_store_align4 +++ b/tests/asm-test/asm/x86_64-unknown-linux-gnu/atomic_memcpy_store_align4 @@ -17,137 +17,101 @@ asm_test::atomic_memcpy_store_align4::release: movaps xmmword, ptr, [rsp, +, 32], xmm2 movaps xmmword, ptr, [rsp, +, 16], xmm1 movaps xmmword, ptr, [rsp], xmm0 - lea rcx, [rdi, +, 7] - and rcx, -8 - mov r11, rcx - sub r11, rdi - cmp r11, 64 - ja .LBB10_21 - test r11, r11 - je .LBB10_2 - mov rdx, rdi - not rdx - add rcx, rdx - mov r8d, r11d - and r8d, 3 - cmp rcx, 3 - jae .LBB10_19 - mov esi, 64 - xor ecx, ecx - test r8, r8 - jne .LBB10_8 - jmp .LBB10_11 -.LBB10_21: - mov eax, dword, ptr, [rsp, +, 60] - mov dword, ptr, [rdi, +, 60], eax - mov eax, dword, ptr, [rsp, +, 56] - mov dword, ptr, [rdi, +, 56], eax - mov eax, dword, ptr, [rsp, +, 52] - mov dword, ptr, [rdi, +, 52], eax - mov eax, dword, ptr, [rsp, +, 48] - mov dword, ptr, [rdi, +, 48], eax - mov eax, dword, ptr, [rsp, +, 44] - mov dword, ptr, [rdi, +, 44], eax - mov eax, dword, ptr, [rsp, +, 40] - mov dword, ptr, [rdi, +, 40], eax - mov eax, dword, ptr, [rsp, +, 36] - mov dword, ptr, [rdi, +, 36], eax - mov eax, dword, ptr, [rsp, +, 32] - mov dword, ptr, [rdi, +, 32], eax - mov eax, dword, ptr, [rsp, +, 28] - mov dword, ptr, [rdi, +, 28], eax - mov eax, dword, ptr, [rsp, +, 24] - mov dword, ptr, [rdi, +, 24], eax - mov eax, dword, ptr, [rsp, +, 20] - mov dword, ptr, [rdi, +, 20], eax - mov eax, dword, ptr, [rsp, +, 16] - mov dword, ptr, [rdi, +, 16], eax - mov eax, dword, ptr, [rsp, +, 12] - mov dword, ptr, [rdi, +, 12], eax - mov eax, dword, ptr, [rsp, +, 8] - mov dword, ptr, [rdi, +, 8], eax - mov eax, dword, ptr, [rsp, +, 4] - mov dword, ptr, [rdi, +, 4], eax - mov eax, dword, ptr, [rsp] + lea r8, [rsp, +, 16] + lea rcx, [rsp, +, 32] + lea rdx, [rsp, +, 48] + lea rsi, [rdi, +, 60] + lea rax, [rsp, +, 60] + #APP + mov eax, dword, ptr, [rax] + mov dword, ptr, [rsi], eax + #NO_APP + lea rax, [rdi, +, 56] + lea rsi, [rsp, +, 56] + #APP + mov esi, dword, ptr, [rsi] + mov dword, ptr, [rax], esi + #NO_APP + lea rax, [rdi, +, 52] + lea rsi, [rsp, +, 52] + #APP + mov esi, dword, ptr, [rsi] + mov dword, ptr, [rax], esi + #NO_APP + lea rax, [rdi, +, 48] + #APP + mov edx, dword, ptr, [rdx] + mov dword, ptr, [rax], edx + #NO_APP + lea rax, [rdi, +, 44] + lea rdx, [rsp, +, 44] + #APP + mov edx, dword, ptr, [rdx] + mov dword, ptr, [rax], edx + #NO_APP + lea rax, [rdi, +, 40] + lea rdx, [rsp, +, 40] + #APP + mov edx, dword, ptr, [rdx] + mov dword, ptr, [rax], edx + #NO_APP + lea rax, [rdi, +, 36] + lea rdx, [rsp, +, 36] + #APP + mov edx, dword, ptr, [rdx] + mov dword, ptr, [rax], edx + #NO_APP + lea rax, [rdi, +, 32] + #APP + mov ecx, dword, ptr, [rcx] + mov dword, ptr, [rax], ecx + #NO_APP + lea rax, [rdi, +, 28] + lea rcx, [rsp, +, 28] + #APP + mov ecx, dword, ptr, [rcx] + mov dword, ptr, [rax], ecx + #NO_APP + lea rax, [rdi, +, 24] + lea rcx, [rsp, +, 24] + #APP + mov ecx, dword, ptr, [rcx] + mov dword, ptr, [rax], ecx + #NO_APP + lea rax, [rdi, +, 20] + lea rcx, [rsp, +, 20] + #APP + mov ecx, dword, ptr, [rcx] + mov dword, ptr, [rax], ecx + #NO_APP + lea rax, [rdi, +, 16] + #APP + mov ecx, dword, ptr, [r8] + mov dword, ptr, [rax], ecx + #NO_APP + lea rax, [rdi, +, 12] + lea rcx, [rsp, +, 12] + #APP + mov ecx, dword, ptr, [rcx] + mov dword, ptr, [rax], ecx + #NO_APP + lea rax, [rdi, +, 8] + lea rcx, [rsp, +, 8] + #APP + mov ecx, dword, ptr, [rcx] + mov dword, ptr, [rax], ecx + #NO_APP + lea rax, [rdi, +, 4] + lea rcx, [rsp, +, 4] + #APP + mov ecx, dword, ptr, [rcx] + mov dword, ptr, [rax], ecx + #NO_APP + mov rax, rsp + #APP + mov eax, dword, ptr, [rax] mov dword, ptr, [rdi], eax - jmp .LBB10_22 -.LBB10_2: - mov esi, 64 - xor r11d, r11d - jmp .LBB10_3 -.LBB10_19: - mov rsi, r11 - and rsi, -4 - xor ecx, ecx -.LBB10_20: - movzx edx, byte, ptr, [rsp, +, rcx] - mov byte, ptr, [rdi, +, rcx], dl - movzx edx, byte, ptr, [rsp, +, rcx, +, 1] - mov byte, ptr, [rdi, +, rcx, +, 1], dl - movzx edx, byte, ptr, [rsp, +, rcx, +, 2] - mov byte, ptr, [rdi, +, rcx, +, 2], dl - movzx edx, byte, ptr, [rsp, +, rcx, +, 3] - mov byte, ptr, [rdi, +, rcx, +, 3], dl - add rcx, 4 - cmp rsi, rcx - jne .LBB10_20 - mov esi, 64 - sub rsi, rcx - test r8, r8 - je .LBB10_11 -.LBB10_8: - lea r9, [rdi, +, rcx] - lea r10, [rsp, +, rcx] - xor ecx, ecx -.LBB10_9: - movzx edx, byte, ptr, [r10, +, rcx] - mov byte, ptr, [r9, +, rcx], dl - add rcx, 1 - cmp r8, rcx - jne .LBB10_9 - sub rsi, rcx -.LBB10_11: - cmp rsi, 8 - jb .LBB10_12 -.LBB10_3: - mov rax, qword, ptr, [rsp, +, r11] - mov qword, ptr, [rdi, +, r11], rax - add rsi, -8 - add r11, 8 - cmp rsi, 7 - ja .LBB10_3 -.LBB10_12: - test rsi, rsi - je .LBB10_22 - lea r8, [rsi, -, 1] - mov rcx, rsi - mov rdx, r11 - and rcx, 3 - je .LBB10_16 - mov rdx, r11 -.LBB10_15: - movzx eax, byte, ptr, [rsp, +, rdx] - mov byte, ptr, [rdi, +, rdx], al - add rdx, 1 - add rcx, -1 - jne .LBB10_15 -.LBB10_16: - cmp r8, 3 - jb .LBB10_22 - add rsi, r11 -.LBB10_18: - movzx eax, byte, ptr, [rsp, +, rdx] - mov byte, ptr, [rdi, +, rdx], al - movzx eax, byte, ptr, [rsp, +, rdx, +, 1] - mov byte, ptr, [rdi, +, rdx, +, 1], al - movzx eax, byte, ptr, [rsp, +, rdx, +, 2] - mov byte, ptr, [rdi, +, rdx, +, 2], al - movzx eax, byte, ptr, [rsp, +, rdx, +, 3] - mov byte, ptr, [rdi, +, rdx, +, 3], al - add rdx, 4 - cmp rsi, rdx - jne .LBB10_18 -.LBB10_22: + #NO_APP add rsp, 136 ret asm_test::atomic_memcpy_store_align4::write_volatile_release_fence: diff --git a/tests/asm-test/asm/x86_64-unknown-linux-gnu/atomic_memcpy_store_align8 b/tests/asm-test/asm/x86_64-unknown-linux-gnu/atomic_memcpy_store_align8 index 25e74e1..8ae8dd8 100644 --- a/tests/asm-test/asm/x86_64-unknown-linux-gnu/atomic_memcpy_store_align8 +++ b/tests/asm-test/asm/x86_64-unknown-linux-gnu/atomic_memcpy_store_align8 @@ -1,21 +1,70 @@ asm_test::atomic_memcpy_store_align8::release: - mov r8, qword, ptr, [rsi] - mov r9, qword, ptr, [rsi, +, 8] - mov r10, qword, ptr, [rsi, +, 16] - mov r11, qword, ptr, [rsi, +, 24] - mov rcx, qword, ptr, [rsi, +, 32] - mov rdx, qword, ptr, [rsi, +, 40] - mov rax, qword, ptr, [rsi, +, 48] - mov rsi, qword, ptr, [rsi, +, 56] + sub rsp, 136 + movups xmm0, xmmword, ptr, [rsi] + movups xmm1, xmmword, ptr, [rsi, +, 16] + movups xmm2, xmmword, ptr, [rsi, +, 32] + movups xmm3, xmmword, ptr, [rsi, +, 48] + movaps xmmword, ptr, [rsp, +, 112], xmm3 + movaps xmmword, ptr, [rsp, +, 96], xmm2 + movaps xmmword, ptr, [rsp, +, 80], xmm1 + movaps xmmword, ptr, [rsp, +, 64], xmm0 #MEMBARRIER - mov qword, ptr, [rdi, +, 56], rsi - mov qword, ptr, [rdi, +, 48], rax - mov qword, ptr, [rdi, +, 40], rdx - mov qword, ptr, [rdi, +, 32], rcx - mov qword, ptr, [rdi, +, 24], r11 - mov qword, ptr, [rdi, +, 16], r10 - mov qword, ptr, [rdi, +, 8], r9 - mov qword, ptr, [rdi], r8 + movaps xmm0, xmmword, ptr, [rsp, +, 64] + movaps xmm1, xmmword, ptr, [rsp, +, 80] + movaps xmm2, xmmword, ptr, [rsp, +, 96] + movaps xmm3, xmmword, ptr, [rsp, +, 112] + movaps xmmword, ptr, [rsp, +, 48], xmm3 + movaps xmmword, ptr, [rsp, +, 32], xmm2 + movaps xmmword, ptr, [rsp, +, 16], xmm1 + movaps xmmword, ptr, [rsp], xmm0 + lea r8, [rsp, +, 16] + lea rcx, [rsp, +, 32] + lea rdx, [rsp, +, 48] + lea rsi, [rdi, +, 56] + lea rax, [rsp, +, 56] + #APP + mov rax, qword, ptr, [rax] + mov qword, ptr, [rsi], rax + #NO_APP + lea rax, [rdi, +, 48] + #APP + mov rdx, qword, ptr, [rdx] + mov qword, ptr, [rax], rdx + #NO_APP + lea rax, [rdi, +, 40] + lea rdx, [rsp, +, 40] + #APP + mov rdx, qword, ptr, [rdx] + mov qword, ptr, [rax], rdx + #NO_APP + lea rax, [rdi, +, 32] + #APP + mov rcx, qword, ptr, [rcx] + mov qword, ptr, [rax], rcx + #NO_APP + lea rax, [rdi, +, 24] + lea rcx, [rsp, +, 24] + #APP + mov rcx, qword, ptr, [rcx] + mov qword, ptr, [rax], rcx + #NO_APP + lea rax, [rdi, +, 16] + #APP + mov rcx, qword, ptr, [r8] + mov qword, ptr, [rax], rcx + #NO_APP + lea rax, [rdi, +, 8] + lea rcx, [rsp, +, 8] + #APP + mov rcx, qword, ptr, [rcx] + mov qword, ptr, [rax], rcx + #NO_APP + mov rax, rsp + #APP + mov rax, qword, ptr, [rax] + mov qword, ptr, [rdi], rax + #NO_APP + add rsp, 136 ret asm_test::atomic_memcpy_store_align8::write_volatile_release_fence: sub rsp, 72 diff --git a/tests/asm-test/asm/x86_64-unknown-linux-gnux32/atomic_memcpy_load_align1 b/tests/asm-test/asm/x86_64-unknown-linux-gnux32/atomic_memcpy_load_align1 index 7e93b10..8de8dae 100644 --- a/tests/asm-test/asm/x86_64-unknown-linux-gnux32/atomic_memcpy_load_align1 +++ b/tests/asm-test/asm/x86_64-unknown-linux-gnux32/atomic_memcpy_load_align1 @@ -1,200 +1,203 @@ asm_test::atomic_memcpy_load_align1::acquire: - push rbx sub esp, 32 mov rax, rdi - lea r8d, [rsi, +, 3] - and r8d, -4 - mov r10d, r8d - sub r10d, esi - cmp r10d, 33 - jae .LBB0_9 - test r10d, r10d - je .LBB0_10 - mov ecx, esi - not ecx - add ecx, r8d - mov r9d, r10d - and r9d, 3 - xor edi, edi - cmp ecx, 3 - jb .LBB0_5 - mov edx, r10d - and edx, -4 - xor ecx, ecx -.LBB0_4: - movzx ebx, byte, ptr, [esi, +, ecx] - mov byte, ptr, [esp, +, ecx], bl - movzx ebx, byte, ptr, [esi, +, ecx, +, 1] - mov byte, ptr, [esp, +, ecx, +, 1], bl - movzx ebx, byte, ptr, [esi, +, ecx, +, 2] - mov byte, ptr, [esp, +, ecx, +, 2], bl - lea edi, [rcx, +, 4] - movzx ebx, byte, ptr, [esi, +, ecx, +, 3] - mov byte, ptr, [esp, +, ecx, +, 3], bl - mov ecx, edi - cmp edx, edi - jne .LBB0_4 -.LBB0_5: - lea edx, [rsi, +, 32] - test r9d, r9d - je .LBB0_8 - lea r11d, [rsp, +, rdi] - add edi, esi - xor ecx, ecx -.LBB0_7: - movzx ebx, byte, ptr, [edi, +, ecx] - mov byte, ptr, [r11d, +, ecx], bl - add ecx, 1 - cmp r9d, ecx - jne .LBB0_7 -.LBB0_8: - sub edx, r8d - cmp edx, 4 - jae .LBB0_11 - jmp .LBB0_12 -.LBB0_9: - mov cl, byte, ptr, [esi] - mov byte, ptr, [esp], cl - mov cl, byte, ptr, [esi, +, 1] - mov byte, ptr, [esp, +, 1], cl - mov cl, byte, ptr, [esi, +, 2] - mov byte, ptr, [esp, +, 2], cl - mov cl, byte, ptr, [esi, +, 3] - mov byte, ptr, [esp, +, 3], cl - mov cl, byte, ptr, [esi, +, 4] - mov byte, ptr, [esp, +, 4], cl - mov cl, byte, ptr, [esi, +, 5] - mov byte, ptr, [esp, +, 5], cl - mov cl, byte, ptr, [esi, +, 6] - mov byte, ptr, [esp, +, 6], cl - mov cl, byte, ptr, [esi, +, 7] - mov byte, ptr, [esp, +, 7], cl - mov cl, byte, ptr, [esi, +, 8] - mov byte, ptr, [esp, +, 8], cl - mov cl, byte, ptr, [esi, +, 9] - mov byte, ptr, [esp, +, 9], cl - mov cl, byte, ptr, [esi, +, 10] - mov byte, ptr, [esp, +, 10], cl - mov cl, byte, ptr, [esi, +, 11] - mov byte, ptr, [esp, +, 11], cl - mov cl, byte, ptr, [esi, +, 12] - mov byte, ptr, [esp, +, 12], cl - mov cl, byte, ptr, [esi, +, 13] - mov byte, ptr, [esp, +, 13], cl - mov cl, byte, ptr, [esi, +, 14] - mov byte, ptr, [esp, +, 14], cl - mov cl, byte, ptr, [esi, +, 15] - mov byte, ptr, [esp, +, 15], cl - mov cl, byte, ptr, [esi, +, 16] - mov byte, ptr, [esp, +, 16], cl - mov cl, byte, ptr, [esi, +, 17] - mov byte, ptr, [esp, +, 17], cl - mov cl, byte, ptr, [esi, +, 18] - mov byte, ptr, [esp, +, 18], cl - mov cl, byte, ptr, [esi, +, 19] - mov byte, ptr, [esp, +, 19], cl - mov cl, byte, ptr, [esi, +, 20] - mov byte, ptr, [esp, +, 20], cl - mov cl, byte, ptr, [esi, +, 21] - mov byte, ptr, [esp, +, 21], cl - mov cl, byte, ptr, [esi, +, 22] - mov byte, ptr, [esp, +, 22], cl - mov cl, byte, ptr, [esi, +, 23] - mov byte, ptr, [esp, +, 23], cl - mov cl, byte, ptr, [esi, +, 24] - mov byte, ptr, [esp, +, 24], cl - mov cl, byte, ptr, [esi, +, 25] - mov byte, ptr, [esp, +, 25], cl - mov cl, byte, ptr, [esi, +, 26] - mov byte, ptr, [esp, +, 26], cl - mov cl, byte, ptr, [esi, +, 27] - mov byte, ptr, [esp, +, 27], cl - mov cl, byte, ptr, [esi, +, 28] - mov byte, ptr, [esp, +, 28], cl - mov cl, byte, ptr, [esi, +, 29] - mov byte, ptr, [esp, +, 29], cl - mov cl, byte, ptr, [esi, +, 30] - mov byte, ptr, [esp, +, 30], cl - mov cl, byte, ptr, [esi, +, 31] - mov byte, ptr, [esp, +, 31], cl - movups xmm0, xmmword, ptr, [esp] - movups xmmword, ptr, [eax], xmm0 - mov ecx, dword, ptr, [esp, +, 16] - mov dword, ptr, [eax, +, 16], ecx - movzx ecx, word, ptr, [esp, +, 20] - mov word, ptr, [eax, +, 20], cx - mov cl, byte, ptr, [esp, +, 22] - mov byte, ptr, [eax, +, 22], cl - mov cl, byte, ptr, [esp, +, 23] - mov byte, ptr, [eax, +, 23], cl - mov cl, byte, ptr, [esp, +, 24] - mov byte, ptr, [eax, +, 24], cl - mov cl, byte, ptr, [esp, +, 25] - mov byte, ptr, [eax, +, 25], cl - mov cl, byte, ptr, [esp, +, 26] - mov byte, ptr, [eax, +, 26], cl - mov cl, byte, ptr, [esp, +, 27] - mov byte, ptr, [eax, +, 27], cl - mov cl, byte, ptr, [esp, +, 28] - mov byte, ptr, [eax, +, 28], cl - mov cl, byte, ptr, [esp, +, 29] - mov byte, ptr, [eax, +, 29], cl - mov cl, byte, ptr, [esp, +, 30] - mov byte, ptr, [eax, +, 30], cl - mov cl, byte, ptr, [esp, +, 31] - mov byte, ptr, [eax, +, 31], cl - #MEMBARRIER - add esp, 32 - pop rbx - ret -.LBB0_10: - mov edx, 32 -.LBB0_11: - mov ecx, dword, ptr, [esi, +, r10d] - mov dword, ptr, [esp, +, r10d], ecx - add edx, -4 - add r10d, 4 - cmp edx, 3 - ja .LBB0_11 -.LBB0_12: - test edx, edx - je .LBB0_19 - lea r8d, [rdx, -, 1] - mov ecx, edx - mov edi, r10d - and ecx, 3 - je .LBB0_16 - mov edi, r10d -.LBB0_15: - movzx ebx, byte, ptr, [esi, +, edi] - mov byte, ptr, [esp, +, edi], bl - add edi, 1 - add ecx, -1 - jne .LBB0_15 -.LBB0_16: - cmp r8d, 3 - jb .LBB0_19 - add edx, r10d -.LBB0_18: - movzx ecx, byte, ptr, [esi, +, edi] - mov byte, ptr, [esp, +, edi], cl - movzx ecx, byte, ptr, [esi, +, edi, +, 1] - mov byte, ptr, [esp, +, edi, +, 1], cl - movzx ecx, byte, ptr, [esi, +, edi, +, 2] - mov byte, ptr, [esp, +, edi, +, 2], cl - movzx ecx, byte, ptr, [esi, +, edi, +, 3] - mov byte, ptr, [esp, +, edi, +, 3], cl - add edi, 4 - cmp edx, edi - jne .LBB0_18 -.LBB0_19: + lea ecx, [rsi, +, 31] + lea edx, [rsp, +, 31] + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [edx], cl + #NO_APP + lea ecx, [rsi, +, 30] + lea edx, [rsp, +, 30] + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [edx], cl + #NO_APP + lea ecx, [rsi, +, 29] + lea edx, [rsp, +, 29] + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [edx], cl + #NO_APP + lea ecx, [rsi, +, 28] + lea edx, [rsp, +, 28] + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [edx], cl + #NO_APP + lea ecx, [rsi, +, 27] + lea edx, [rsp, +, 27] + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [edx], cl + #NO_APP + lea ecx, [rsi, +, 26] + lea edx, [rsp, +, 26] + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [edx], cl + #NO_APP + lea ecx, [rsi, +, 25] + lea edx, [rsp, +, 25] + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [edx], cl + #NO_APP + lea ecx, [rsi, +, 24] + lea edx, [rsp, +, 24] + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [edx], cl + #NO_APP + lea ecx, [rsi, +, 23] + lea edx, [rsp, +, 23] + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [edx], cl + #NO_APP + lea ecx, [rsi, +, 22] + lea edx, [rsp, +, 22] + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [edx], cl + #NO_APP + lea ecx, [rsi, +, 21] + lea edx, [rsp, +, 21] + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [edx], cl + #NO_APP + lea ecx, [rsi, +, 20] + lea edx, [rsp, +, 20] + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [edx], cl + #NO_APP + lea ecx, [rsi, +, 19] + lea edx, [rsp, +, 19] + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [edx], cl + #NO_APP + lea ecx, [rsi, +, 18] + lea edx, [rsp, +, 18] + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [edx], cl + #NO_APP + lea ecx, [rsi, +, 17] + lea edx, [rsp, +, 17] + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [edx], cl + #NO_APP + lea ecx, [rsi, +, 16] + lea edx, [rsp, +, 16] + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [edx], cl + #NO_APP + lea ecx, [rsi, +, 15] + lea edx, [rsp, +, 15] + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [edx], cl + #NO_APP + lea ecx, [rsi, +, 14] + lea edx, [rsp, +, 14] + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [edx], cl + #NO_APP + lea ecx, [rsi, +, 13] + lea edx, [rsp, +, 13] + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [edx], cl + #NO_APP + lea ecx, [rsi, +, 12] + lea edx, [rsp, +, 12] + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [edx], cl + #NO_APP + lea ecx, [rsi, +, 11] + lea edx, [rsp, +, 11] + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [edx], cl + #NO_APP + lea ecx, [rsi, +, 10] + lea edx, [rsp, +, 10] + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [edx], cl + #NO_APP + lea ecx, [rsi, +, 9] + lea edx, [rsp, +, 9] + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [edx], cl + #NO_APP + lea ecx, [rsi, +, 8] + lea edx, [rsp, +, 8] + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [edx], cl + #NO_APP + lea ecx, [rsi, +, 7] + lea edx, [rsp, +, 7] + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [edx], cl + #NO_APP + lea ecx, [rsi, +, 6] + lea edx, [rsp, +, 6] + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [edx], cl + #NO_APP + lea ecx, [rsi, +, 5] + lea edx, [rsp, +, 5] + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [edx], cl + #NO_APP + lea ecx, [rsi, +, 4] + lea edx, [rsp, +, 4] + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [edx], cl + #NO_APP + lea ecx, [rsi, +, 3] + lea edx, [rsp, +, 3] + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [edx], cl + #NO_APP + lea ecx, [rsi, +, 2] + lea edx, [rsp, +, 2] + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [edx], cl + #NO_APP + lea ecx, [rsi, +, 1] + lea edx, [rsp, +, 1] + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [edx], cl + #NO_APP + mov ecx, esp + #APP + mov dl, byte, ptr, [esi] + mov byte, ptr, [ecx], dl + #NO_APP movups xmm0, xmmword, ptr, [esp] movups xmm1, xmmword, ptr, [esp, +, 16] - movups xmmword, ptr, [eax, +, 16], xmm1 movups xmmword, ptr, [eax], xmm0 + movups xmmword, ptr, [eax, +, 16], xmm1 #MEMBARRIER add esp, 32 - pop rbx ret asm_test::atomic_memcpy_load_align1::read_volatile_acquire_fence: push rbp diff --git a/tests/asm-test/asm/x86_64-unknown-linux-gnux32/atomic_memcpy_load_align16 b/tests/asm-test/asm/x86_64-unknown-linux-gnux32/atomic_memcpy_load_align16 index 948e5c4..3323273 100644 --- a/tests/asm-test/asm/x86_64-unknown-linux-gnux32/atomic_memcpy_load_align16 +++ b/tests/asm-test/asm/x86_64-unknown-linux-gnux32/atomic_memcpy_load_align16 @@ -1,22 +1,59 @@ asm_test::atomic_memcpy_load_align16::acquire: + sub esp, 40 mov rax, rdi - mov r8d, dword, ptr, [esi, +, 28] - mov r9d, dword, ptr, [esi, +, 24] - mov r10d, dword, ptr, [esi, +, 20] - mov r11d, dword, ptr, [esi, +, 16] - mov edx, dword, ptr, [esi, +, 12] - mov edi, dword, ptr, [esi, +, 8] - mov ecx, dword, ptr, [esi, +, 4] - mov esi, dword, ptr, [esi] - mov dword, ptr, [eax], esi - mov dword, ptr, [eax, +, 4], ecx - mov dword, ptr, [eax, +, 8], edi - mov dword, ptr, [eax, +, 12], edx - mov dword, ptr, [eax, +, 16], r11d - mov dword, ptr, [eax, +, 20], r10d - mov dword, ptr, [eax, +, 24], r9d - mov dword, ptr, [eax, +, 28], r8d + lea ecx, [rsi, +, 28] + lea edx, [rsp, +, 28] + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [edx], ecx + #NO_APP + lea ecx, [rsi, +, 24] + lea edx, [rsp, +, 24] + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [edx], ecx + #NO_APP + lea ecx, [rsi, +, 20] + lea edx, [rsp, +, 20] + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [edx], ecx + #NO_APP + lea ecx, [rsi, +, 16] + lea edx, [rsp, +, 16] + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [edx], ecx + #NO_APP + lea ecx, [rsi, +, 12] + lea edx, [rsp, +, 12] + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [edx], ecx + #NO_APP + lea ecx, [rsi, +, 8] + lea edx, [rsp, +, 8] + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [edx], ecx + #NO_APP + lea ecx, [rsi, +, 4] + lea edx, [rsp, +, 4] + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [edx], ecx + #NO_APP + mov ecx, esp + #APP + mov edx, dword, ptr, [esi] + mov dword, ptr, [ecx], edx + #NO_APP + movaps xmm0, xmmword, ptr, [esp] + movaps xmm1, xmmword, ptr, [esp, +, 16] + movaps xmmword, ptr, [eax], xmm0 + movaps xmmword, ptr, [eax, +, 16], xmm1 #MEMBARRIER + add esp, 40 ret asm_test::atomic_memcpy_load_align16::read_volatile_acquire_fence: mov rax, rdi diff --git a/tests/asm-test/asm/x86_64-unknown-linux-gnux32/atomic_memcpy_load_align2 b/tests/asm-test/asm/x86_64-unknown-linux-gnux32/atomic_memcpy_load_align2 index 13debcf..35273ff 100644 --- a/tests/asm-test/asm/x86_64-unknown-linux-gnux32/atomic_memcpy_load_align2 +++ b/tests/asm-test/asm/x86_64-unknown-linux-gnux32/atomic_memcpy_load_align2 @@ -1,153 +1,107 @@ asm_test::atomic_memcpy_load_align2::acquire: - push rbx sub esp, 32 mov rax, rdi - lea r8d, [rsi, +, 3] - and r8d, -4 - mov r10d, r8d - sub r10d, esi - cmp r10d, 32 - ja .LBB4_19 - test r10d, r10d - je .LBB4_2 - mov ecx, esi - not ecx - add ecx, r8d - mov r9d, r10d - and r9d, 3 - xor edi, edi - cmp ecx, 3 - jb .LBB4_7 - mov edx, r10d - and edx, -4 - xor ecx, ecx -.LBB4_6: - movzx ebx, byte, ptr, [esi, +, ecx] - mov byte, ptr, [esp, +, ecx], bl - movzx ebx, byte, ptr, [esi, +, ecx, +, 1] - mov byte, ptr, [esp, +, ecx, +, 1], bl - movzx ebx, byte, ptr, [esi, +, ecx, +, 2] - mov byte, ptr, [esp, +, ecx, +, 2], bl - lea edi, [rcx, +, 4] - movzx ebx, byte, ptr, [esi, +, ecx, +, 3] - mov byte, ptr, [esp, +, ecx, +, 3], bl - mov ecx, edi - cmp edx, edi - jne .LBB4_6 -.LBB4_7: - lea edx, [rsi, +, 32] - test r9d, r9d - je .LBB4_10 - lea r11d, [rsp, +, rdi] - add edi, esi - xor ecx, ecx -.LBB4_9: - movzx ebx, byte, ptr, [edi, +, ecx] - mov byte, ptr, [r11d, +, ecx], bl - add ecx, 1 - cmp r9d, ecx - jne .LBB4_9 -.LBB4_10: - sub edx, r8d - cmp edx, 4 - jae .LBB4_3 - jmp .LBB4_11 -.LBB4_19: - movzx ecx, word, ptr, [esi, +, 30] - mov word, ptr, [esp, +, 30], cx - movzx ecx, word, ptr, [esi, +, 28] - mov word, ptr, [esp, +, 28], cx - movzx ecx, word, ptr, [esi, +, 26] - mov word, ptr, [esp, +, 26], cx - movzx ecx, word, ptr, [esi, +, 24] - mov word, ptr, [esp, +, 24], cx - movzx ecx, word, ptr, [esi, +, 22] - mov word, ptr, [esp, +, 22], cx - movzx ecx, word, ptr, [esi, +, 20] - mov word, ptr, [esp, +, 20], cx - movzx ecx, word, ptr, [esi, +, 18] - mov word, ptr, [esp, +, 18], cx - movzx ecx, word, ptr, [esi, +, 16] - mov word, ptr, [esp, +, 16], cx - movzx ecx, word, ptr, [esi, +, 14] - mov word, ptr, [esp, +, 14], cx - movzx ecx, word, ptr, [esi, +, 12] - mov word, ptr, [esp, +, 12], cx - movzx ecx, word, ptr, [esi, +, 10] - mov word, ptr, [esp, +, 10], cx - movzx ecx, word, ptr, [esi, +, 8] - mov word, ptr, [esp, +, 8], cx - movzx ecx, word, ptr, [esi, +, 6] - mov word, ptr, [esp, +, 6], cx - movzx ecx, word, ptr, [esi, +, 4] - mov word, ptr, [esp, +, 4], cx - movzx ecx, word, ptr, [esi, +, 2] - mov word, ptr, [esp, +, 2], cx - movzx ecx, word, ptr, [esi] - mov word, ptr, [esp], cx - jmp .LBB4_18 -.LBB4_2: - mov edx, 32 -.LBB4_3: - mov ecx, dword, ptr, [esi, +, r10d] - mov dword, ptr, [esp, +, r10d], ecx - add edx, -4 - add r10d, 4 - cmp edx, 3 - ja .LBB4_3 -.LBB4_11: - test edx, edx - je .LBB4_18 - lea r8d, [rdx, -, 1] - mov ecx, edx - mov edi, r10d - and ecx, 3 - je .LBB4_15 - mov edi, r10d -.LBB4_14: - movzx ebx, byte, ptr, [esi, +, edi] - mov byte, ptr, [esp, +, edi], bl - add edi, 1 - add ecx, -1 - jne .LBB4_14 -.LBB4_15: - cmp r8d, 3 - jb .LBB4_18 - add edx, r10d -.LBB4_17: - movzx ecx, byte, ptr, [esi, +, edi] - mov byte, ptr, [esp, +, edi], cl - movzx ecx, byte, ptr, [esi, +, edi, +, 1] - mov byte, ptr, [esp, +, edi, +, 1], cl - movzx ecx, byte, ptr, [esi, +, edi, +, 2] - mov byte, ptr, [esp, +, edi, +, 2], cl - movzx ecx, byte, ptr, [esi, +, edi, +, 3] - mov byte, ptr, [esp, +, edi, +, 3], cl - add edi, 4 - cmp edx, edi - jne .LBB4_17 -.LBB4_18: - movzx ecx, word, ptr, [esp] - movzx edx, word, ptr, [esp, +, 2] - movzx esi, word, ptr, [esp, +, 4] - movzx edi, word, ptr, [esp, +, 6] - movzx ebx, word, ptr, [esp, +, 8] - movzx r8d, word, ptr, [esp, +, 10] - movzx r9d, word, ptr, [esp, +, 12] - movzx r10d, word, ptr, [esp, +, 14] - movups xmm0, xmmword, ptr, [esp, +, 16] - movups xmmword, ptr, [eax, +, 16], xmm0 - mov word, ptr, [eax], cx - mov word, ptr, [eax, +, 2], dx - mov word, ptr, [eax, +, 4], si - mov word, ptr, [eax, +, 6], di - mov word, ptr, [eax, +, 8], bx - mov word, ptr, [eax, +, 10], r8w - mov word, ptr, [eax, +, 12], r9w - mov word, ptr, [eax, +, 14], r10w + lea ecx, [rsi, +, 30] + lea edx, [rsp, +, 30] + #APP + mov cx, word, ptr, [ecx] + mov word, ptr, [edx], cx + #NO_APP + lea ecx, [rsi, +, 28] + lea edx, [rsp, +, 28] + #APP + mov cx, word, ptr, [ecx] + mov word, ptr, [edx], cx + #NO_APP + lea ecx, [rsi, +, 26] + lea edx, [rsp, +, 26] + #APP + mov cx, word, ptr, [ecx] + mov word, ptr, [edx], cx + #NO_APP + lea ecx, [rsi, +, 24] + lea edx, [rsp, +, 24] + #APP + mov cx, word, ptr, [ecx] + mov word, ptr, [edx], cx + #NO_APP + lea ecx, [rsi, +, 22] + lea edx, [rsp, +, 22] + #APP + mov cx, word, ptr, [ecx] + mov word, ptr, [edx], cx + #NO_APP + lea ecx, [rsi, +, 20] + lea edx, [rsp, +, 20] + #APP + mov cx, word, ptr, [ecx] + mov word, ptr, [edx], cx + #NO_APP + lea ecx, [rsi, +, 18] + lea edx, [rsp, +, 18] + #APP + mov cx, word, ptr, [ecx] + mov word, ptr, [edx], cx + #NO_APP + lea ecx, [rsi, +, 16] + lea edx, [rsp, +, 16] + #APP + mov cx, word, ptr, [ecx] + mov word, ptr, [edx], cx + #NO_APP + lea ecx, [rsi, +, 14] + lea edx, [rsp, +, 14] + #APP + mov cx, word, ptr, [ecx] + mov word, ptr, [edx], cx + #NO_APP + lea ecx, [rsi, +, 12] + lea edx, [rsp, +, 12] + #APP + mov cx, word, ptr, [ecx] + mov word, ptr, [edx], cx + #NO_APP + lea ecx, [rsi, +, 10] + lea edx, [rsp, +, 10] + #APP + mov cx, word, ptr, [ecx] + mov word, ptr, [edx], cx + #NO_APP + lea ecx, [rsi, +, 8] + lea edx, [rsp, +, 8] + #APP + mov cx, word, ptr, [ecx] + mov word, ptr, [edx], cx + #NO_APP + lea ecx, [rsi, +, 6] + lea edx, [rsp, +, 6] + #APP + mov cx, word, ptr, [ecx] + mov word, ptr, [edx], cx + #NO_APP + lea ecx, [rsi, +, 4] + lea edx, [rsp, +, 4] + #APP + mov cx, word, ptr, [ecx] + mov word, ptr, [edx], cx + #NO_APP + lea ecx, [rsi, +, 2] + lea edx, [rsp, +, 2] + #APP + mov cx, word, ptr, [ecx] + mov word, ptr, [edx], cx + #NO_APP + mov ecx, esp + #APP + mov dx, word, ptr, [esi] + mov word, ptr, [ecx], dx + #NO_APP + movups xmm0, xmmword, ptr, [esp] + movups xmm1, xmmword, ptr, [esp, +, 16] + movups xmmword, ptr, [eax], xmm0 + movups xmmword, ptr, [eax, +, 16], xmm1 #MEMBARRIER add esp, 32 - pop rbx ret asm_test::atomic_memcpy_load_align2::read_volatile_acquire_fence: push rbp diff --git a/tests/asm-test/asm/x86_64-unknown-linux-gnux32/atomic_memcpy_load_align4 b/tests/asm-test/asm/x86_64-unknown-linux-gnux32/atomic_memcpy_load_align4 index a03931e..c2eccdf 100644 --- a/tests/asm-test/asm/x86_64-unknown-linux-gnux32/atomic_memcpy_load_align4 +++ b/tests/asm-test/asm/x86_64-unknown-linux-gnux32/atomic_memcpy_load_align4 @@ -1,22 +1,59 @@ asm_test::atomic_memcpy_load_align4::acquire: + sub esp, 32 mov rax, rdi - mov r8d, dword, ptr, [esi, +, 28] - mov r9d, dword, ptr, [esi, +, 24] - mov r10d, dword, ptr, [esi, +, 20] - mov r11d, dword, ptr, [esi, +, 16] - mov edx, dword, ptr, [esi, +, 12] - mov edi, dword, ptr, [esi, +, 8] - mov ecx, dword, ptr, [esi, +, 4] - mov esi, dword, ptr, [esi] - mov dword, ptr, [eax], esi - mov dword, ptr, [eax, +, 4], ecx - mov dword, ptr, [eax, +, 8], edi - mov dword, ptr, [eax, +, 12], edx - mov dword, ptr, [eax, +, 16], r11d - mov dword, ptr, [eax, +, 20], r10d - mov dword, ptr, [eax, +, 24], r9d - mov dword, ptr, [eax, +, 28], r8d + lea ecx, [rsi, +, 28] + lea edx, [rsp, +, 28] + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [edx], ecx + #NO_APP + lea ecx, [rsi, +, 24] + lea edx, [rsp, +, 24] + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [edx], ecx + #NO_APP + lea ecx, [rsi, +, 20] + lea edx, [rsp, +, 20] + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [edx], ecx + #NO_APP + lea ecx, [rsi, +, 16] + lea edx, [rsp, +, 16] + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [edx], ecx + #NO_APP + lea ecx, [rsi, +, 12] + lea edx, [rsp, +, 12] + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [edx], ecx + #NO_APP + lea ecx, [rsi, +, 8] + lea edx, [rsp, +, 8] + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [edx], ecx + #NO_APP + lea ecx, [rsi, +, 4] + lea edx, [rsp, +, 4] + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [edx], ecx + #NO_APP + mov ecx, esp + #APP + mov edx, dword, ptr, [esi] + mov dword, ptr, [ecx], edx + #NO_APP + movups xmm0, xmmword, ptr, [esp] + movups xmm1, xmmword, ptr, [esp, +, 16] + movups xmmword, ptr, [eax], xmm0 + movups xmmword, ptr, [eax, +, 16], xmm1 #MEMBARRIER + add esp, 32 ret asm_test::atomic_memcpy_load_align4::read_volatile_acquire_fence: mov rax, rdi diff --git a/tests/asm-test/asm/x86_64-unknown-linux-gnux32/atomic_memcpy_load_align8 b/tests/asm-test/asm/x86_64-unknown-linux-gnux32/atomic_memcpy_load_align8 index 6cd37a0..cce1c87 100644 --- a/tests/asm-test/asm/x86_64-unknown-linux-gnux32/atomic_memcpy_load_align8 +++ b/tests/asm-test/asm/x86_64-unknown-linux-gnux32/atomic_memcpy_load_align8 @@ -1,22 +1,59 @@ asm_test::atomic_memcpy_load_align8::acquire: + sub esp, 32 mov rax, rdi - mov r8d, dword, ptr, [esi, +, 28] - mov r9d, dword, ptr, [esi, +, 24] - mov r10d, dword, ptr, [esi, +, 20] - mov r11d, dword, ptr, [esi, +, 16] - mov edx, dword, ptr, [esi, +, 12] - mov edi, dword, ptr, [esi, +, 8] - mov ecx, dword, ptr, [esi, +, 4] - mov esi, dword, ptr, [esi] - mov dword, ptr, [eax], esi - mov dword, ptr, [eax, +, 4], ecx - mov dword, ptr, [eax, +, 8], edi - mov dword, ptr, [eax, +, 12], edx - mov dword, ptr, [eax, +, 16], r11d - mov dword, ptr, [eax, +, 20], r10d - mov dword, ptr, [eax, +, 24], r9d - mov dword, ptr, [eax, +, 28], r8d + lea ecx, [rsi, +, 28] + lea edx, [rsp, +, 28] + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [edx], ecx + #NO_APP + lea ecx, [rsi, +, 24] + lea edx, [rsp, +, 24] + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [edx], ecx + #NO_APP + lea ecx, [rsi, +, 20] + lea edx, [rsp, +, 20] + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [edx], ecx + #NO_APP + lea ecx, [rsi, +, 16] + lea edx, [rsp, +, 16] + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [edx], ecx + #NO_APP + lea ecx, [rsi, +, 12] + lea edx, [rsp, +, 12] + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [edx], ecx + #NO_APP + lea ecx, [rsi, +, 8] + lea edx, [rsp, +, 8] + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [edx], ecx + #NO_APP + lea ecx, [rsi, +, 4] + lea edx, [rsp, +, 4] + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [edx], ecx + #NO_APP + mov ecx, esp + #APP + mov edx, dword, ptr, [esi] + mov dword, ptr, [ecx], edx + #NO_APP + movups xmm0, xmmword, ptr, [esp] + movups xmm1, xmmword, ptr, [esp, +, 16] + movups xmmword, ptr, [eax], xmm0 + movups xmmword, ptr, [eax, +, 16], xmm1 #MEMBARRIER + add esp, 32 ret asm_test::atomic_memcpy_load_align8::read_volatile_acquire_fence: mov rax, rdi diff --git a/tests/asm-test/asm/x86_64-unknown-linux-gnux32/atomic_memcpy_store_align1 b/tests/asm-test/asm/x86_64-unknown-linux-gnux32/atomic_memcpy_store_align1 index dd554f1..a4b5037 100644 --- a/tests/asm-test/asm/x86_64-unknown-linux-gnux32/atomic_memcpy_store_align1 +++ b/tests/asm-test/asm/x86_64-unknown-linux-gnux32/atomic_memcpy_store_align1 @@ -9,164 +9,197 @@ asm_test::atomic_memcpy_store_align1::release: movaps xmm1, xmmword, ptr, [esp, +, 48] movaps xmmword, ptr, [esp, +, 16], xmm1 movaps xmmword, ptr, [esp], xmm0 - lea r8d, [rdi, +, 3] - and r8d, -4 - mov r11d, r8d - sub r11d, edi - cmp r11d, 33 - jae .LBB2_1 - test r11d, r11d - je .LBB2_3 - mov ecx, edi - not ecx - add ecx, r8d - mov r9d, r11d - and r9d, 3 - xor esi, esi - cmp ecx, 3 - jb .LBB2_8 - mov ecx, r11d - and ecx, -4 - xor edx, edx -.LBB2_7: - movzx eax, byte, ptr, [esp, +, edx] - mov byte, ptr, [edi, +, edx], al - movzx eax, byte, ptr, [esp, +, edx, +, 1] - mov byte, ptr, [edi, +, edx, +, 1], al - movzx eax, byte, ptr, [esp, +, edx, +, 2] - mov byte, ptr, [edi, +, edx, +, 2], al - lea esi, [rdx, +, 4] - movzx eax, byte, ptr, [esp, +, edx, +, 3] - mov byte, ptr, [edi, +, edx, +, 3], al - mov edx, esi - cmp ecx, esi - jne .LBB2_7 -.LBB2_8: - lea ecx, [rdi, +, 32] - test r9d, r9d - je .LBB2_11 - lea r10d, [rdi, +, rsi] - add esi, esp - xor edx, edx -.LBB2_10: - movzx eax, byte, ptr, [esi, +, edx] - mov byte, ptr, [r10d, +, edx], al - add edx, 1 - cmp r9d, edx - jne .LBB2_10 -.LBB2_11: - sub ecx, r8d - cmp ecx, 4 - jae .LBB2_4 - jmp .LBB2_12 -.LBB2_1: - mov al, byte, ptr, [esp] + lea eax, [rsp, +, 16] + lea ecx, [rdi, +, 31] + lea edx, [rsp, +, 31] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [ecx], dl + #NO_APP + lea ecx, [rdi, +, 30] + lea edx, [rsp, +, 30] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [ecx], dl + #NO_APP + lea ecx, [rdi, +, 29] + lea edx, [rsp, +, 29] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [ecx], dl + #NO_APP + lea ecx, [rdi, +, 28] + lea edx, [rsp, +, 28] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [ecx], dl + #NO_APP + lea ecx, [rdi, +, 27] + lea edx, [rsp, +, 27] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [ecx], dl + #NO_APP + lea ecx, [rdi, +, 26] + lea edx, [rsp, +, 26] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [ecx], dl + #NO_APP + lea ecx, [rdi, +, 25] + lea edx, [rsp, +, 25] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [ecx], dl + #NO_APP + lea ecx, [rdi, +, 24] + lea edx, [rsp, +, 24] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [ecx], dl + #NO_APP + lea ecx, [rdi, +, 23] + lea edx, [rsp, +, 23] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [ecx], dl + #NO_APP + lea ecx, [rdi, +, 22] + lea edx, [rsp, +, 22] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [ecx], dl + #NO_APP + lea ecx, [rdi, +, 21] + lea edx, [rsp, +, 21] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [ecx], dl + #NO_APP + lea ecx, [rdi, +, 20] + lea edx, [rsp, +, 20] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [ecx], dl + #NO_APP + lea ecx, [rdi, +, 19] + lea edx, [rsp, +, 19] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [ecx], dl + #NO_APP + lea ecx, [rdi, +, 18] + lea edx, [rsp, +, 18] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [ecx], dl + #NO_APP + lea ecx, [rdi, +, 17] + lea edx, [rsp, +, 17] + #APP + mov dl, byte, ptr, [edx] + mov byte, ptr, [ecx], dl + #NO_APP + lea ecx, [rdi, +, 16] + #APP + mov al, byte, ptr, [eax] + mov byte, ptr, [ecx], al + #NO_APP + lea eax, [rdi, +, 15] + lea ecx, [rsp, +, 15] + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [eax], cl + #NO_APP + lea eax, [rdi, +, 14] + lea ecx, [rsp, +, 14] + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [eax], cl + #NO_APP + lea eax, [rdi, +, 13] + lea ecx, [rsp, +, 13] + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [eax], cl + #NO_APP + lea eax, [rdi, +, 12] + lea ecx, [rsp, +, 12] + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [eax], cl + #NO_APP + lea eax, [rdi, +, 11] + lea ecx, [rsp, +, 11] + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [eax], cl + #NO_APP + lea eax, [rdi, +, 10] + lea ecx, [rsp, +, 10] + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [eax], cl + #NO_APP + lea eax, [rdi, +, 9] + lea ecx, [rsp, +, 9] + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [eax], cl + #NO_APP + lea eax, [rdi, +, 8] + lea ecx, [rsp, +, 8] + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [eax], cl + #NO_APP + lea eax, [rdi, +, 7] + lea ecx, [rsp, +, 7] + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [eax], cl + #NO_APP + lea eax, [rdi, +, 6] + lea ecx, [rsp, +, 6] + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [eax], cl + #NO_APP + lea eax, [rdi, +, 5] + lea ecx, [rsp, +, 5] + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [eax], cl + #NO_APP + lea eax, [rdi, +, 4] + lea ecx, [rsp, +, 4] + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [eax], cl + #NO_APP + lea eax, [rdi, +, 3] + lea ecx, [rsp, +, 3] + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [eax], cl + #NO_APP + lea eax, [rdi, +, 2] + lea ecx, [rsp, +, 2] + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [eax], cl + #NO_APP + lea eax, [rdi, +, 1] + lea ecx, [rsp, +, 1] + #APP + mov cl, byte, ptr, [ecx] + mov byte, ptr, [eax], cl + #NO_APP + mov eax, esp + #APP + mov al, byte, ptr, [eax] mov byte, ptr, [edi], al - mov al, byte, ptr, [esp, +, 1] - mov byte, ptr, [edi, +, 1], al - mov al, byte, ptr, [esp, +, 2] - mov byte, ptr, [edi, +, 2], al - mov al, byte, ptr, [esp, +, 3] - mov byte, ptr, [edi, +, 3], al - mov al, byte, ptr, [esp, +, 4] - mov byte, ptr, [edi, +, 4], al - mov al, byte, ptr, [esp, +, 5] - mov byte, ptr, [edi, +, 5], al - mov al, byte, ptr, [esp, +, 6] - mov byte, ptr, [edi, +, 6], al - mov al, byte, ptr, [esp, +, 7] - mov byte, ptr, [edi, +, 7], al - mov al, byte, ptr, [esp, +, 8] - mov byte, ptr, [edi, +, 8], al - mov al, byte, ptr, [esp, +, 9] - mov byte, ptr, [edi, +, 9], al - mov al, byte, ptr, [esp, +, 10] - mov byte, ptr, [edi, +, 10], al - mov al, byte, ptr, [esp, +, 11] - mov byte, ptr, [edi, +, 11], al - mov al, byte, ptr, [esp, +, 12] - mov byte, ptr, [edi, +, 12], al - mov al, byte, ptr, [esp, +, 13] - mov byte, ptr, [edi, +, 13], al - mov al, byte, ptr, [esp, +, 14] - mov byte, ptr, [edi, +, 14], al - mov al, byte, ptr, [esp, +, 15] - mov byte, ptr, [edi, +, 15], al - mov al, byte, ptr, [esp, +, 16] - mov byte, ptr, [edi, +, 16], al - mov al, byte, ptr, [esp, +, 17] - mov byte, ptr, [edi, +, 17], al - mov al, byte, ptr, [esp, +, 18] - mov byte, ptr, [edi, +, 18], al - mov al, byte, ptr, [esp, +, 19] - mov byte, ptr, [edi, +, 19], al - mov al, byte, ptr, [esp, +, 20] - mov byte, ptr, [edi, +, 20], al - mov al, byte, ptr, [esp, +, 21] - mov byte, ptr, [edi, +, 21], al - mov al, byte, ptr, [esp, +, 22] - mov byte, ptr, [edi, +, 22], al - mov al, byte, ptr, [esp, +, 23] - mov byte, ptr, [edi, +, 23], al - mov al, byte, ptr, [esp, +, 24] - mov byte, ptr, [edi, +, 24], al - mov al, byte, ptr, [esp, +, 25] - mov byte, ptr, [edi, +, 25], al - mov al, byte, ptr, [esp, +, 26] - mov byte, ptr, [edi, +, 26], al - mov al, byte, ptr, [esp, +, 27] - mov byte, ptr, [edi, +, 27], al - mov al, byte, ptr, [esp, +, 28] - mov byte, ptr, [edi, +, 28], al - mov al, byte, ptr, [esp, +, 29] - mov byte, ptr, [edi, +, 29], al - mov al, byte, ptr, [esp, +, 30] - mov byte, ptr, [edi, +, 30], al - mov al, byte, ptr, [esp, +, 31] - mov byte, ptr, [edi, +, 31], al - add esp, 72 - ret -.LBB2_3: - mov ecx, 32 -.LBB2_4: - mov eax, dword, ptr, [esp, +, r11d] - mov dword, ptr, [edi, +, r11d], eax - add ecx, -4 - add r11d, 4 - cmp ecx, 3 - ja .LBB2_4 -.LBB2_12: - test ecx, ecx - je .LBB2_19 - lea r8d, [rcx, -, 1] - mov esi, ecx - mov edx, r11d - and esi, 3 - je .LBB2_16 - mov edx, r11d -.LBB2_15: - movzx eax, byte, ptr, [esp, +, edx] - mov byte, ptr, [edi, +, edx], al - add edx, 1 - add esi, -1 - jne .LBB2_15 -.LBB2_16: - cmp r8d, 3 - jb .LBB2_19 - add ecx, r11d -.LBB2_18: - movzx eax, byte, ptr, [esp, +, edx] - mov byte, ptr, [edi, +, edx], al - movzx eax, byte, ptr, [esp, +, edx, +, 1] - mov byte, ptr, [edi, +, edx, +, 1], al - movzx eax, byte, ptr, [esp, +, edx, +, 2] - mov byte, ptr, [edi, +, edx, +, 2], al - movzx eax, byte, ptr, [esp, +, edx, +, 3] - mov byte, ptr, [edi, +, edx, +, 3], al - add edx, 4 - cmp ecx, edx - jne .LBB2_18 -.LBB2_19: + #NO_APP add esp, 72 ret asm_test::atomic_memcpy_store_align1::write_volatile_release_fence: diff --git a/tests/asm-test/asm/x86_64-unknown-linux-gnux32/atomic_memcpy_store_align16 b/tests/asm-test/asm/x86_64-unknown-linux-gnux32/atomic_memcpy_store_align16 index 9410d30..9dc5c66 100644 --- a/tests/asm-test/asm/x86_64-unknown-linux-gnux32/atomic_memcpy_store_align16 +++ b/tests/asm-test/asm/x86_64-unknown-linux-gnux32/atomic_memcpy_store_align16 @@ -1,21 +1,62 @@ asm_test::atomic_memcpy_store_align16::release: - mov r8d, dword, ptr, [esi] - mov r9d, dword, ptr, [esi, +, 4] - mov r10d, dword, ptr, [esi, +, 8] - mov r11d, dword, ptr, [esi, +, 12] - mov ecx, dword, ptr, [esi, +, 16] - mov edx, dword, ptr, [esi, +, 20] - mov eax, dword, ptr, [esi, +, 24] - mov esi, dword, ptr, [esi, +, 28] + sub esp, 72 + movaps xmm0, xmmword, ptr, [esi] + movaps xmm1, xmmword, ptr, [esi, +, 16] + movaps xmmword, ptr, [esp, +, 48], xmm1 + movaps xmmword, ptr, [esp, +, 32], xmm0 #MEMBARRIER - mov dword, ptr, [edi, +, 28], esi - mov dword, ptr, [edi, +, 24], eax - mov dword, ptr, [edi, +, 20], edx - mov dword, ptr, [edi, +, 16], ecx - mov dword, ptr, [edi, +, 12], r11d - mov dword, ptr, [edi, +, 8], r10d - mov dword, ptr, [edi, +, 4], r9d - mov dword, ptr, [edi], r8d + movaps xmm0, xmmword, ptr, [esp, +, 32] + movaps xmm1, xmmword, ptr, [esp, +, 48] + movaps xmmword, ptr, [esp, +, 16], xmm1 + movaps xmmword, ptr, [esp], xmm0 + lea eax, [rsp, +, 16] + lea ecx, [rdi, +, 28] + lea edx, [rsp, +, 28] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [ecx], edx + #NO_APP + lea ecx, [rdi, +, 24] + lea edx, [rsp, +, 24] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [ecx], edx + #NO_APP + lea ecx, [rdi, +, 20] + lea edx, [rsp, +, 20] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [ecx], edx + #NO_APP + lea ecx, [rdi, +, 16] + #APP + mov eax, dword, ptr, [eax] + mov dword, ptr, [ecx], eax + #NO_APP + lea eax, [rdi, +, 12] + lea ecx, [rsp, +, 12] + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [eax], ecx + #NO_APP + lea eax, [rdi, +, 8] + lea ecx, [rsp, +, 8] + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [eax], ecx + #NO_APP + lea eax, [rdi, +, 4] + lea ecx, [rsp, +, 4] + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [eax], ecx + #NO_APP + mov eax, esp + #APP + mov eax, dword, ptr, [eax] + mov dword, ptr, [edi], eax + #NO_APP + add esp, 72 ret asm_test::atomic_memcpy_store_align16::write_volatile_release_fence: sub esp, 40 diff --git a/tests/asm-test/asm/x86_64-unknown-linux-gnux32/atomic_memcpy_store_align2 b/tests/asm-test/asm/x86_64-unknown-linux-gnux32/atomic_memcpy_store_align2 index 7f09d98..d9e161a 100644 --- a/tests/asm-test/asm/x86_64-unknown-linux-gnux32/atomic_memcpy_store_align2 +++ b/tests/asm-test/asm/x86_64-unknown-linux-gnux32/atomic_memcpy_store_align2 @@ -9,132 +9,101 @@ asm_test::atomic_memcpy_store_align2::release: movaps xmm1, xmmword, ptr, [esp, +, 48] movaps xmmword, ptr, [esp, +, 16], xmm1 movaps xmmword, ptr, [esp], xmm0 - lea r8d, [rdi, +, 3] - and r8d, -4 - mov r11d, r8d - sub r11d, edi - cmp r11d, 32 - ja .LBB6_19 - test r11d, r11d - je .LBB6_2 - mov ecx, edi - not ecx - add ecx, r8d - mov r9d, r11d - and r9d, 3 - xor esi, esi - cmp ecx, 3 - jb .LBB6_7 - mov ecx, r11d - and ecx, -4 - xor edx, edx -.LBB6_6: - movzx eax, byte, ptr, [esp, +, edx] - mov byte, ptr, [edi, +, edx], al - movzx eax, byte, ptr, [esp, +, edx, +, 1] - mov byte, ptr, [edi, +, edx, +, 1], al - movzx eax, byte, ptr, [esp, +, edx, +, 2] - mov byte, ptr, [edi, +, edx, +, 2], al - lea esi, [rdx, +, 4] - movzx eax, byte, ptr, [esp, +, edx, +, 3] - mov byte, ptr, [edi, +, edx, +, 3], al - mov edx, esi - cmp ecx, esi - jne .LBB6_6 -.LBB6_7: - lea ecx, [rdi, +, 32] - test r9d, r9d - je .LBB6_10 - lea r10d, [rdi, +, rsi] - add esi, esp - xor edx, edx -.LBB6_9: - movzx eax, byte, ptr, [esi, +, edx] - mov byte, ptr, [r10d, +, edx], al - add edx, 1 - cmp r9d, edx - jne .LBB6_9 -.LBB6_10: - sub ecx, r8d - cmp ecx, 4 - jae .LBB6_3 - jmp .LBB6_11 -.LBB6_19: - movzx eax, word, ptr, [esp, +, 30] - mov word, ptr, [edi, +, 30], ax - movzx eax, word, ptr, [esp, +, 28] - mov word, ptr, [edi, +, 28], ax - movzx eax, word, ptr, [esp, +, 26] - mov word, ptr, [edi, +, 26], ax - movzx eax, word, ptr, [esp, +, 24] - mov word, ptr, [edi, +, 24], ax - movzx eax, word, ptr, [esp, +, 22] - mov word, ptr, [edi, +, 22], ax - movzx eax, word, ptr, [esp, +, 20] - mov word, ptr, [edi, +, 20], ax - movzx eax, word, ptr, [esp, +, 18] - mov word, ptr, [edi, +, 18], ax - movzx eax, word, ptr, [esp, +, 16] - mov word, ptr, [edi, +, 16], ax - movzx eax, word, ptr, [esp, +, 14] - mov word, ptr, [edi, +, 14], ax - movzx eax, word, ptr, [esp, +, 12] - mov word, ptr, [edi, +, 12], ax - movzx eax, word, ptr, [esp, +, 10] - mov word, ptr, [edi, +, 10], ax - movzx eax, word, ptr, [esp, +, 8] - mov word, ptr, [edi, +, 8], ax - movzx eax, word, ptr, [esp, +, 6] - mov word, ptr, [edi, +, 6], ax - movzx eax, word, ptr, [esp, +, 4] - mov word, ptr, [edi, +, 4], ax - movzx eax, word, ptr, [esp, +, 2] - mov word, ptr, [edi, +, 2], ax - movzx eax, word, ptr, [esp] + lea eax, [rsp, +, 16] + lea ecx, [rdi, +, 30] + lea edx, [rsp, +, 30] + #APP + mov dx, word, ptr, [edx] + mov word, ptr, [ecx], dx + #NO_APP + lea ecx, [rdi, +, 28] + lea edx, [rsp, +, 28] + #APP + mov dx, word, ptr, [edx] + mov word, ptr, [ecx], dx + #NO_APP + lea ecx, [rdi, +, 26] + lea edx, [rsp, +, 26] + #APP + mov dx, word, ptr, [edx] + mov word, ptr, [ecx], dx + #NO_APP + lea ecx, [rdi, +, 24] + lea edx, [rsp, +, 24] + #APP + mov dx, word, ptr, [edx] + mov word, ptr, [ecx], dx + #NO_APP + lea ecx, [rdi, +, 22] + lea edx, [rsp, +, 22] + #APP + mov dx, word, ptr, [edx] + mov word, ptr, [ecx], dx + #NO_APP + lea ecx, [rdi, +, 20] + lea edx, [rsp, +, 20] + #APP + mov dx, word, ptr, [edx] + mov word, ptr, [ecx], dx + #NO_APP + lea ecx, [rdi, +, 18] + lea edx, [rsp, +, 18] + #APP + mov dx, word, ptr, [edx] + mov word, ptr, [ecx], dx + #NO_APP + lea ecx, [rdi, +, 16] + #APP + mov ax, word, ptr, [eax] + mov word, ptr, [ecx], ax + #NO_APP + lea eax, [rdi, +, 14] + lea ecx, [rsp, +, 14] + #APP + mov cx, word, ptr, [ecx] + mov word, ptr, [eax], cx + #NO_APP + lea eax, [rdi, +, 12] + lea ecx, [rsp, +, 12] + #APP + mov cx, word, ptr, [ecx] + mov word, ptr, [eax], cx + #NO_APP + lea eax, [rdi, +, 10] + lea ecx, [rsp, +, 10] + #APP + mov cx, word, ptr, [ecx] + mov word, ptr, [eax], cx + #NO_APP + lea eax, [rdi, +, 8] + lea ecx, [rsp, +, 8] + #APP + mov cx, word, ptr, [ecx] + mov word, ptr, [eax], cx + #NO_APP + lea eax, [rdi, +, 6] + lea ecx, [rsp, +, 6] + #APP + mov cx, word, ptr, [ecx] + mov word, ptr, [eax], cx + #NO_APP + lea eax, [rdi, +, 4] + lea ecx, [rsp, +, 4] + #APP + mov cx, word, ptr, [ecx] + mov word, ptr, [eax], cx + #NO_APP + lea eax, [rdi, +, 2] + lea ecx, [rsp, +, 2] + #APP + mov cx, word, ptr, [ecx] + mov word, ptr, [eax], cx + #NO_APP + mov eax, esp + #APP + mov ax, word, ptr, [eax] mov word, ptr, [edi], ax - add esp, 72 - ret -.LBB6_2: - mov ecx, 32 -.LBB6_3: - mov eax, dword, ptr, [esp, +, r11d] - mov dword, ptr, [edi, +, r11d], eax - add ecx, -4 - add r11d, 4 - cmp ecx, 3 - ja .LBB6_3 -.LBB6_11: - test ecx, ecx - je .LBB6_18 - lea r8d, [rcx, -, 1] - mov esi, ecx - mov edx, r11d - and esi, 3 - je .LBB6_15 - mov edx, r11d -.LBB6_14: - movzx eax, byte, ptr, [esp, +, edx] - mov byte, ptr, [edi, +, edx], al - add edx, 1 - add esi, -1 - jne .LBB6_14 -.LBB6_15: - cmp r8d, 3 - jb .LBB6_18 - add ecx, r11d -.LBB6_17: - movzx eax, byte, ptr, [esp, +, edx] - mov byte, ptr, [edi, +, edx], al - movzx eax, byte, ptr, [esp, +, edx, +, 1] - mov byte, ptr, [edi, +, edx, +, 1], al - movzx eax, byte, ptr, [esp, +, edx, +, 2] - mov byte, ptr, [edi, +, edx, +, 2], al - movzx eax, byte, ptr, [esp, +, edx, +, 3] - mov byte, ptr, [edi, +, edx, +, 3], al - add edx, 4 - cmp ecx, edx - jne .LBB6_17 -.LBB6_18: + #NO_APP add esp, 72 ret asm_test::atomic_memcpy_store_align2::write_volatile_release_fence: diff --git a/tests/asm-test/asm/x86_64-unknown-linux-gnux32/atomic_memcpy_store_align4 b/tests/asm-test/asm/x86_64-unknown-linux-gnux32/atomic_memcpy_store_align4 index b746c2f..3df7ed0 100644 --- a/tests/asm-test/asm/x86_64-unknown-linux-gnux32/atomic_memcpy_store_align4 +++ b/tests/asm-test/asm/x86_64-unknown-linux-gnux32/atomic_memcpy_store_align4 @@ -1,21 +1,62 @@ asm_test::atomic_memcpy_store_align4::release: - mov r8d, dword, ptr, [esi] - mov r9d, dword, ptr, [esi, +, 4] - mov r10d, dword, ptr, [esi, +, 8] - mov r11d, dword, ptr, [esi, +, 12] - mov ecx, dword, ptr, [esi, +, 16] - mov edx, dword, ptr, [esi, +, 20] - mov eax, dword, ptr, [esi, +, 24] - mov esi, dword, ptr, [esi, +, 28] + sub esp, 72 + movups xmm0, xmmword, ptr, [esi] + movups xmm1, xmmword, ptr, [esi, +, 16] + movaps xmmword, ptr, [esp, +, 48], xmm1 + movaps xmmword, ptr, [esp, +, 32], xmm0 #MEMBARRIER - mov dword, ptr, [edi, +, 28], esi - mov dword, ptr, [edi, +, 24], eax - mov dword, ptr, [edi, +, 20], edx - mov dword, ptr, [edi, +, 16], ecx - mov dword, ptr, [edi, +, 12], r11d - mov dword, ptr, [edi, +, 8], r10d - mov dword, ptr, [edi, +, 4], r9d - mov dword, ptr, [edi], r8d + movaps xmm0, xmmword, ptr, [esp, +, 32] + movaps xmm1, xmmword, ptr, [esp, +, 48] + movaps xmmword, ptr, [esp, +, 16], xmm1 + movaps xmmword, ptr, [esp], xmm0 + lea eax, [rsp, +, 16] + lea ecx, [rdi, +, 28] + lea edx, [rsp, +, 28] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [ecx], edx + #NO_APP + lea ecx, [rdi, +, 24] + lea edx, [rsp, +, 24] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [ecx], edx + #NO_APP + lea ecx, [rdi, +, 20] + lea edx, [rsp, +, 20] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [ecx], edx + #NO_APP + lea ecx, [rdi, +, 16] + #APP + mov eax, dword, ptr, [eax] + mov dword, ptr, [ecx], eax + #NO_APP + lea eax, [rdi, +, 12] + lea ecx, [rsp, +, 12] + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [eax], ecx + #NO_APP + lea eax, [rdi, +, 8] + lea ecx, [rsp, +, 8] + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [eax], ecx + #NO_APP + lea eax, [rdi, +, 4] + lea ecx, [rsp, +, 4] + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [eax], ecx + #NO_APP + mov eax, esp + #APP + mov eax, dword, ptr, [eax] + mov dword, ptr, [edi], eax + #NO_APP + add esp, 72 ret asm_test::atomic_memcpy_store_align4::write_volatile_release_fence: sub esp, 40 diff --git a/tests/asm-test/asm/x86_64-unknown-linux-gnux32/atomic_memcpy_store_align8 b/tests/asm-test/asm/x86_64-unknown-linux-gnux32/atomic_memcpy_store_align8 index 95b54e8..6acff44 100644 --- a/tests/asm-test/asm/x86_64-unknown-linux-gnux32/atomic_memcpy_store_align8 +++ b/tests/asm-test/asm/x86_64-unknown-linux-gnux32/atomic_memcpy_store_align8 @@ -1,21 +1,62 @@ asm_test::atomic_memcpy_store_align8::release: - mov r8d, dword, ptr, [esi] - mov r9d, dword, ptr, [esi, +, 4] - mov r10d, dword, ptr, [esi, +, 8] - mov r11d, dword, ptr, [esi, +, 12] - mov ecx, dword, ptr, [esi, +, 16] - mov edx, dword, ptr, [esi, +, 20] - mov eax, dword, ptr, [esi, +, 24] - mov esi, dword, ptr, [esi, +, 28] + sub esp, 72 + movups xmm0, xmmword, ptr, [esi] + movups xmm1, xmmword, ptr, [esi, +, 16] + movaps xmmword, ptr, [esp, +, 48], xmm1 + movaps xmmword, ptr, [esp, +, 32], xmm0 #MEMBARRIER - mov dword, ptr, [edi, +, 28], esi - mov dword, ptr, [edi, +, 24], eax - mov dword, ptr, [edi, +, 20], edx - mov dword, ptr, [edi, +, 16], ecx - mov dword, ptr, [edi, +, 12], r11d - mov dword, ptr, [edi, +, 8], r10d - mov dword, ptr, [edi, +, 4], r9d - mov dword, ptr, [edi], r8d + movaps xmm0, xmmword, ptr, [esp, +, 32] + movaps xmm1, xmmword, ptr, [esp, +, 48] + movaps xmmword, ptr, [esp, +, 16], xmm1 + movaps xmmword, ptr, [esp], xmm0 + lea eax, [rsp, +, 16] + lea ecx, [rdi, +, 28] + lea edx, [rsp, +, 28] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [ecx], edx + #NO_APP + lea ecx, [rdi, +, 24] + lea edx, [rsp, +, 24] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [ecx], edx + #NO_APP + lea ecx, [rdi, +, 20] + lea edx, [rsp, +, 20] + #APP + mov edx, dword, ptr, [edx] + mov dword, ptr, [ecx], edx + #NO_APP + lea ecx, [rdi, +, 16] + #APP + mov eax, dword, ptr, [eax] + mov dword, ptr, [ecx], eax + #NO_APP + lea eax, [rdi, +, 12] + lea ecx, [rsp, +, 12] + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [eax], ecx + #NO_APP + lea eax, [rdi, +, 8] + lea ecx, [rsp, +, 8] + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [eax], ecx + #NO_APP + lea eax, [rdi, +, 4] + lea ecx, [rsp, +, 4] + #APP + mov ecx, dword, ptr, [ecx] + mov dword, ptr, [eax], ecx + #NO_APP + mov eax, esp + #APP + mov eax, dword, ptr, [eax] + mov dword, ptr, [edi], eax + #NO_APP + add esp, 72 ret asm_test::atomic_memcpy_store_align8::write_volatile_release_fence: sub esp, 40 diff --git a/tests/asm-test/src/main.rs b/tests/asm-test/src/main.rs index a343aa1..f643b1d 100644 --- a/tests/asm-test/src/main.rs +++ b/tests/asm-test/src/main.rs @@ -8,6 +8,38 @@ use fs_err as fs; use indexmap::{IndexMap, IndexSet}; use lexopt::prelude::*; +// https://github.com/taiki-e/atomic-maybe-uninit#platform-support +#[cfg(feature = "atomic-maybe-uninit")] +const DEFAULT_TARGETS: &[&str] = &[ + // All tier 1 or tier 2 linux (GNU) target + // rustup target list | grep -e '-linux-gnu' | sed 's/ .*$//g' | sed 's/^/"/g' | sed 's/$/",/g' + "aarch64-unknown-linux-gnu", + "arm-unknown-linux-gnueabi", + "arm-unknown-linux-gnueabihf", + // "armv5te-unknown-linux-gnueabi", + "armv7-unknown-linux-gnueabi", + "armv7-unknown-linux-gnueabihf", + "i586-unknown-linux-gnu", + "i686-unknown-linux-gnu", + "mips-unknown-linux-gnu", + "mips64-unknown-linux-gnuabi64", + "mips64el-unknown-linux-gnuabi64", + "mipsel-unknown-linux-gnu", + "powerpc-unknown-linux-gnu", + "powerpc64-unknown-linux-gnu", + // "powerpc64le-unknown-linux-gnu", // TODO: cargo-asm panic "thread 'main' panicked at 'called `Result::unwrap()` on an `Err` value: ParseIntError { kind: InvalidDigit }', src/asm/ast.rs:172:44" + "riscv64gc-unknown-linux-gnu", + "s390x-unknown-linux-gnu", + // "sparc64-unknown-linux-gnu", + "thumbv7neon-unknown-linux-gnueabihf", + "x86_64-unknown-linux-gnu", + "x86_64-unknown-linux-gnux32", + // Other targets + "riscv32i-unknown-none-elf", + "riscv32imac-unknown-none-elf", + "riscv32imc-unknown-none-elf", +]; +#[cfg(not(feature = "atomic-maybe-uninit"))] const DEFAULT_TARGETS: &[&str] = &[ // All tier 1 or tier 2 linux (GNU) target // rustup target list | grep -e '-linux-gnu' | sed 's/ .*$//g' | sed 's/^/"/g' | sed 's/$/",/g' @@ -120,12 +152,26 @@ fn main() -> Result<()> { println!(" {}", m); let mut out = String::new(); for func in functions { + #[cfg(not(feature = "atomic-maybe-uninit"))] + let mut cmd = cmd!( + "cargo", + "asm", + "--no-color", + "--lib", + "--no-default-features", + "--target", + target, + func + ); + #[cfg(feature = "atomic-maybe-uninit")] let mut cmd = cmd!( "cargo", "asm", "--no-color", "--lib", "--no-default-features", + "--features", + "atomic-maybe-uninit", "--target", target, func diff --git a/tools/build.sh b/tools/build.sh index 8bb346e..5c27851 100755 --- a/tools/build.sh +++ b/tools/build.sh @@ -46,7 +46,7 @@ if [[ "${rustc_version}" == *"nightly"* ]] || [[ "${rustc_version}" == *"dev"* ] # -Z check-cfg requires 1.63.0-nightly 1.[0-5]* | 1.6[0-2].*) ;; *) - check_cfg="-Z unstable-options --check-cfg=names(atomic_memcpy_unsafe_volatile)" + check_cfg="-Z unstable-options --check-cfg=names(docsrs,atomic_memcpy_unsafe_volatile)" rustup ${pre_args[@]+"${pre_args[@]}"} component add clippy &>/dev/null base_args=(${pre_args[@]+"${pre_args[@]}"} hack clippy -Z check-cfg="names,values,output,features") ;; @@ -92,6 +92,12 @@ build() { RUSTFLAGS="${target_rustflags}" \ x cargo "${args[@]}" --release --manifest-path tests/no-std/Cargo.toml + case "${target}" in + # https://github.com/taiki-e/atomic-maybe-uninit#platform-support + x86_64* | i*86* | aarch64* | arm* | thumb* | riscv* | mips* | powerpc* | s390x*) ;; + *) args+=(--exclude-features "atomic-maybe-uninit") ;; + esac + RUSTFLAGS="${target_rustflags}" \ x cargo "${args[@]}" --feature-powerset --optional-deps --no-dev-deps --manifest-path Cargo.toml RUSTFLAGS="${target_rustflags}" \