Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 80 additions & 34 deletions builtins-test/tests/lse.rs
Original file line number Diff line number Diff line change
@@ -1,30 +1,70 @@
#![feature(decl_macro)] // so we can use pub(super)
#![feature(macro_metavar_expr_concat)]
#![cfg(all(target_arch = "aarch64", target_os = "linux"))]
#![cfg(target_arch = "aarch64")]

use std::sync::Mutex;

use compiler_builtins::aarch64_outline_atomics::{get_have_lse_atomics, set_have_lse_atomics};
use compiler_builtins::int::{Int, MinInt};
use compiler_builtins::{foreach_bytes, foreach_ordering};

#[track_caller]
fn with_maybe_lse_atomics(use_lse: bool, f: impl FnOnce()) {
// Ensure tests run in parallel don't interleave global settings
static LOCK: Mutex<()> = Mutex::new(());
let _g = LOCK.lock().unwrap();
let old = get_have_lse_atomics();
// safety: as the caller of the unsafe fn `set_have_lse_atomics`, we
// have to ensure the CPU supports LSE. This is why we make this assertion.
if use_lse || old {
assert!(std::arch::is_aarch64_feature_detected!("lse"));
}
unsafe { set_have_lse_atomics(use_lse) };
f();
unsafe { set_have_lse_atomics(old) };
}

pub fn run_fuzz_tests_with_lse_variants<I: Int, F: Fn(I, I) + Copy>(n: u32, f: F)
where
<I as MinInt>::Unsigned: Int,
{
// We use `fuzz_2` because our subject function `f` requires two inputs
let test_fn = || {
builtins_test::fuzz_2(n, f);
};
// Always run without LSE
with_maybe_lse_atomics(false, test_fn);

// Conditionally run with LSE
if std::arch::is_aarch64_feature_detected!("lse") {
with_maybe_lse_atomics(true, test_fn);
}
}

/// Translate a byte size to a Rust type.
macro int_ty {
(1) => { i8 },
(2) => { i16 },
(4) => { i32 },
(8) => { i64 },
(16) => { i128 }
(1) => { u8 },
(2) => { u16 },
(4) => { u32 },
(8) => { u64 },
(16) => { u128 }
}

mod cas {
pub(super) macro test($_ordering:ident, $bytes:tt, $name:ident) {
#[test]
fn $name() {
builtins_test::fuzz_2(10000, |expected: super::int_ty!($bytes), new| {
crate::run_fuzz_tests_with_lse_variants(10000, |expected: super::int_ty!($bytes), new| {
let mut target = expected.wrapping_add(10);
let ret: super::int_ty!($bytes) = unsafe {
compiler_builtins::aarch64_outline_atomics::$name::$name(
expected,
new,
&mut target,
)
};
assert_eq!(
unsafe {
compiler_builtins::aarch64_outline_atomics::$name::$name(
expected,
new,
&mut target,
)
},
ret,
expected.wrapping_add(10),
"return value should always be the previous value",
);
Expand All @@ -35,15 +75,17 @@ mod cas {
);

target = expected;
let ret: super::int_ty!($bytes) = unsafe {
compiler_builtins::aarch64_outline_atomics::$name::$name(
expected,
new,
&mut target,
)
};
assert_eq!(
unsafe {
compiler_builtins::aarch64_outline_atomics::$name::$name(
expected,
new,
&mut target,
)
},
expected
ret,
expected,
"the new return value should always be the previous value (i.e. the first parameter passed to the function)",
);
assert_eq!(target, new, "should have updated target");
});
Expand All @@ -59,16 +101,21 @@ mod swap {
pub(super) macro test($_ordering:ident, $bytes:tt, $name:ident) {
#[test]
fn $name() {
builtins_test::fuzz_2(10000, |left: super::int_ty!($bytes), mut right| {
let orig_right = right;
assert_eq!(
unsafe {
compiler_builtins::aarch64_outline_atomics::$name::$name(left, &mut right)
},
orig_right
);
assert_eq!(left, right);
});
crate::run_fuzz_tests_with_lse_variants(
10000,
|left: super::int_ty!($bytes), mut right| {
let orig_right = right;
assert_eq!(
unsafe {
compiler_builtins::aarch64_outline_atomics::$name::$name(
left, &mut right,
)
},
orig_right
);
assert_eq!(left, right);
},
);
}
}
}
Expand All @@ -80,7 +127,7 @@ macro_rules! test_op {
($_ordering:ident, $bytes:tt, $name:ident) => {
#[test]
fn $name() {
builtins_test::fuzz_2(10000, |old, val| {
crate::run_fuzz_tests_with_lse_variants(10000, |old, val| {
let mut target = old;
let op: fn(super::int_ty!($bytes), super::int_ty!($bytes)) -> _ = $($op)*;
let expected = op(old, val);
Expand All @@ -98,7 +145,6 @@ test_op!(add, |left, right| left.wrapping_add(right));
test_op!(clr, |left, right| left & !right);
test_op!(xor, std::ops::BitXor::bitxor);
test_op!(or, std::ops::BitOr::bitor);
use compiler_builtins::{foreach_bytes, foreach_ordering};
compiler_builtins::foreach_cas!(cas::test);
compiler_builtins::foreach_cas16!(test_cas16);
compiler_builtins::foreach_swp!(swap::test);
Expand Down
102 changes: 85 additions & 17 deletions compiler-builtins/src/aarch64_outline_atomics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,27 @@ intrinsics! {
}
}

/// Function to enable/disable LSE. To be used only for testing purposes.
#[cfg(feature = "mangled-names")]
pub unsafe fn set_have_lse_atomics(has_lse: bool) {
let lse_flag = if has_lse { 1 } else { 0 };
HAVE_LSE_ATOMICS.store(lse_flag, Ordering::Relaxed);
}

/// Function to obtain whether LSE is enabled or not. To be used only for testing purposes.
#[cfg(feature = "mangled-names")]
pub fn get_have_lse_atomics() -> bool {
HAVE_LSE_ATOMICS.load(Ordering::Relaxed) != 0
}

/// Translate a byte size to a Rust type.
#[rustfmt::skip]
macro_rules! int_ty {
(1) => { i8 };
(2) => { i16 };
(4) => { i32 };
(8) => { i64 };
(16) => { i128 };
(1) => { u8 };
(2) => { u16 };
(4) => { u32 };
(8) => { u64 };
(16) => { u128 };
}

/// Given a byte size and a register number, return a register of the appropriate size.
Expand Down Expand Up @@ -135,18 +148,73 @@ macro_rules! stxp {
};
}

// The AArch64 assembly syntax for relocation specifiers
// when accessing symbols changes depending on the target executable format.
// In ELF (used in Linux), we have a prefix notation surrounded by colons (:specifier:sym),
// while in Mach-O object files (used in MacOS), a postfix notation is used (sym@specifier).

/// AArch64 ELF position-independent addressing:
///
/// adrp xN, symbol
/// add xN, xN, :lo12:symbol
///
/// The :lo12: modifier selects the low 12 bits of the symbol address
/// and emits an ELF relocation such as R_AARCH64_ADD_ABS_LO12_NC.
///
/// Defined by the AArch64 ELF psABI.
/// See: <https://github.com/ARM-software/abi-aa/blob/main/aaelf64/aaelf64.rst#static-miscellaneous-relocations>.
#[cfg(not(target_vendor = "apple"))]
macro_rules! sym {
($sym:literal) => {
$sym
};
}

#[cfg(not(target_vendor = "apple"))]
macro_rules! sym_off {
($sym:literal) => {
concat!(":lo12:", $sym)
};
}

/// Mach-O ARM64 relocation types:
/// ARM64_RELOC_PAGE21
/// ARM64_RELOC_PAGEOFF12
///
/// These relocations implement the @PAGE / @PAGEOFF split used by
/// adrp + add sequences on Apple platforms.
///
/// adrp xN, symbol@PAGE -> ARM64_RELOC_PAGE21
/// add xN, xN, symbol@PAGEOFF -> ARM64_RELOC_PAGEOFF12
///
/// Relocation types defined by Apple in XNU: <mach-o/arm64/reloc.h>.
/// See: <https://github.com/apple-oss-distributions/xnu/blob/f6217f891ac0bb64f3d375211650a4c1ff8ca1ea/EXTERNAL_HEADERS/mach-o/arm64/reloc.h>.
#[cfg(target_vendor = "apple")]
macro_rules! sym {
($sym:literal) => {
concat!($sym, "@PAGE")
};
}

#[cfg(target_vendor = "apple")]
macro_rules! sym_off {
($sym:literal) => {
concat!($sym, "@PAGEOFF")
};
}
Comment on lines +180 to +204
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This matches my understanding, but @madsmtm mind taking a second look here?


// If supported, perform the requested LSE op and return, or fallthrough.
macro_rules! try_lse_op {
($op: literal, $ordering:ident, $bytes:tt, $($reg:literal,)* [ $mem:ident ] ) => {
concat!(
".arch_extension lse; ",
"adrp x16, {have_lse}; ",
"ldrb w16, [x16, :lo12:{have_lse}]; ",
"cbz w16, 8f; ",
".arch_extension lse\n",
concat!("adrp x16, ", sym!("{have_lse}"), "\n"),
concat!("ldrb w16, [x16, ", sym_off!("{have_lse}"), "]\n"),
"cbz w16, 8f\n",
// LSE_OP s(reg),* [$mem]
concat!(lse!($op, $ordering, $bytes), $( " ", reg!($bytes, $reg), ", " ,)* "[", stringify!($mem), "]; ",),
"ret; ",
"8:"
concat!(lse!($op, $ordering, $bytes), $( " ", reg!($bytes, $reg), ", " ,)* "[", stringify!($mem), "]\n",),
"ret
8:"
)
};
}
Expand Down Expand Up @@ -203,15 +271,15 @@ macro_rules! compare_and_swap {
};
}

// i128 uses a completely different impl, so it has its own macro.
macro_rules! compare_and_swap_i128 {
// u128 uses a completely different impl, so it has its own macro.
macro_rules! compare_and_swap_u128 {
($ordering:ident, $name:ident) => {
intrinsics! {
#[maybe_use_optimized_c_shim]
#[unsafe(naked)]
pub unsafe extern "C" fn $name (
expected: i128, desired: i128, ptr: *mut i128
) -> i128 {
expected: u128, desired: u128, ptr: *mut u128
) -> u128 {
core::arch::naked_asm! {
// CASP x0, x1, x2, x3, [x4]; if LSE supported.
try_lse_op!("cas", $ordering, 16, 0, 1, 2, 3, [x4]),
Expand Down Expand Up @@ -391,7 +459,7 @@ macro_rules! foreach_ldset {
}

foreach_cas!(compare_and_swap);
foreach_cas16!(compare_and_swap_i128);
foreach_cas16!(compare_and_swap_u128);
foreach_swp!(swap);
foreach_ldadd!(add);
foreach_ldclr!(and);
Expand Down
7 changes: 6 additions & 1 deletion compiler-builtins/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,12 @@ pub mod arm;
#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
pub mod aarch64;

#[cfg(all(target_arch = "aarch64", target_feature = "outline-atomics"))]
// Note that we enable the module on "mangled-names" because that is the default feature
// in the builtins-test tests. So this is a way of enabling the module during testing.
#[cfg(all(
target_arch = "aarch64",
any(target_feature = "outline-atomics", feature = "mangled-names")
))]
pub mod aarch64_outline_atomics;

#[cfg(all(
Expand Down
Loading