From e8dca796b7d39bec1cbc6b92a251621afb2d56ec Mon Sep 17 00:00:00 2001 From: Aman Date: Mon, 22 Sep 2025 12:12:19 +0100 Subject: [PATCH 1/3] generic-sandbox: Fix signal handling on macOS ucontext_t is different on macOS and Linux. On macOS, SIGSEGV is not sent for page faults, instead SIGBUS is sent. This commit updates the signal handler to check for SIGBUS on macOS. Finally, we also fix the page fault address extraction on macOS. Signed-off-by: Aman --- crates/polkavm/src/sandbox/generic.rs | 93 +++++++++++++++++++++------ 1 file changed, 74 insertions(+), 19 deletions(-) diff --git a/crates/polkavm/src/sandbox/generic.rs b/crates/polkavm/src/sandbox/generic.rs index 58348fe1..542276f6 100644 --- a/crates/polkavm/src/sandbox/generic.rs +++ b/crates/polkavm/src/sandbox/generic.rs @@ -325,6 +325,61 @@ unsafe extern "C" fn signal_handler(signal: c_int, info: &sys::siginfo_t, contex let vmctx = THREAD_VMCTX.with(|thread_ctx| *thread_ctx.get()); if !vmctx.is_null() { + #[cfg(target_os = "macos")] + macro_rules! macos_reg_field { + (rax) => { + (*context.uc_mcontext).__ss.__rax + }; + (rbx) => { + (*context.uc_mcontext).__ss.__rbx + }; + (rcx) => { + (*context.uc_mcontext).__ss.__rcx + }; + (rdx) => { + (*context.uc_mcontext).__ss.__rdx + }; + (rdi) => { + (*context.uc_mcontext).__ss.__rdi + }; + (rsi) => { + (*context.uc_mcontext).__ss.__rsi + }; + (rbp) => { + (*context.uc_mcontext).__ss.__rbp + }; + (rsp) => { + (*context.uc_mcontext).__ss.__rsp + }; + (r8) => { + (*context.uc_mcontext).__ss.__r8 + }; + (r9) => { + (*context.uc_mcontext).__ss.__r9 + }; + (r10) => { + (*context.uc_mcontext).__ss.__r10 + }; + (r11) => { + (*context.uc_mcontext).__ss.__r11 + }; + (r12) => { + (*context.uc_mcontext).__ss.__r12 + }; + (r13) => { + (*context.uc_mcontext).__ss.__r13 + }; + (r14) => { + (*context.uc_mcontext).__ss.__r14 + }; + (r15) => { + (*context.uc_mcontext).__ss.__r15 + }; + (rip) => { + (*context.uc_mcontext).__ss.__rip + }; + } + macro_rules! fetch_reg { ($reg:ident) => {{ #[cfg(target_os = "linux")] @@ -333,7 +388,7 @@ unsafe extern "C" fn signal_handler(signal: c_int, info: &sys::siginfo_t, contex } #[cfg(target_os = "macos")] { - (*context.uc_mcontext).__ss.$reg as u64 + macos_reg_field!($reg) as u64 } #[cfg(target_os = "freebsd")] { @@ -343,19 +398,19 @@ unsafe extern "C" fn signal_handler(signal: c_int, info: &sys::siginfo_t, contex } const X86_TRAP_PF: u64 = 14; - let is_page_fault = signal == sys::SIGSEGV && { + let is_page_fault = { #[cfg(target_os = "linux")] { - context.uc_mcontext.trapno == X86_TRAP_PF + signal == sys::SIGSEGV && context.uc_mcontext.trapno == X86_TRAP_PF + } + #[cfg(target_os = "macos")] + { + signal == sys::SIGBUS && (*context.uc_mcontext).__es.__trapno as u64 == X86_TRAP_PF + } + #[cfg(target_os = "freebsd")] + { + signal == sys::SIGBUS && context.uc_mcontext.mc_trapno == X86_TRAP_PF } - // #[cfg(target_os = "macos")] - // { - // (*context.uc_mcontext).__ss.trapno == X86_TRAP_PF - // } - // #[cfg(target_os = "freebsd")] - // { - // context.uc_mcontext.mc_trapno == X86_TRAP_PF - // } }; let rip = fetch_reg!(rip); @@ -401,14 +456,14 @@ unsafe extern "C" fn signal_handler(signal: c_int, info: &sys::siginfo_t, contex { info.__bindgen_anon_1.__bindgen_anon_1._sifields._sigfault._addr as u64 } - // #[cfg(target_os = "macos")] - // { - // info.si_addr as u64 - // } - // #[cfg(target_os = "freebsd")] - // { - // info.si_addr as u64 - // } + #[cfg(target_os = "macos")] + { + info.si_addr as u64 + } + #[cfg(target_os = "freebsd")] + { + info.si_addr as u64 + } }; log::trace!("Page fault at 0x{fault_address:x} (rip: 0x{rip:x})"); From 79c54446d8f6ade8eb3b9e541b99f59cbefe7aba Mon Sep 17 00:00:00 2001 From: Aman Date: Mon, 22 Sep 2025 12:20:09 +0100 Subject: [PATCH 2/3] generic-sandbox: Fix invalid jump detection on macOS / Rosetta 2 On Rosetta 2, indirect jump to a non-canonical address doesn't trigger a page fault (SIGBUS) right away. Instead, it jumps to the address, and then triggers a page fault (SIGBUS) when trying to fetch the instruction. This means that previous program counter is now lost, and now we have no way of detecting if the guest program triggered an invalid jump or not. We fix this program by updating the vmctx before making the jump and in the signal handler we check if the current program counter is non-canonical. Signed-off-by: Aman --- crates/polkavm/src/compiler/amd64.rs | 16 ++++++++++++++++ crates/polkavm/src/sandbox/generic.rs | 15 +++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/crates/polkavm/src/compiler/amd64.rs b/crates/polkavm/src/compiler/amd64.rs index 7479ac9a..22febd47 100644 --- a/crates/polkavm/src/compiler/amd64.rs +++ b/crates/polkavm/src/compiler/amd64.rs @@ -920,6 +920,22 @@ where asm.assert_reserved_exactly_as_needed(); } SandboxKind::Generic => { + // On Rosetta 2, indirect jump to a non-canonical address doesn't trigger a SIGBUS right away. + // Instead, it jumps to the address, and then triggers a SIGBUS when trying to fetch the instruction. + // This means that previous program counter is now lost. There are other ways we can design it (by adding a + // dedicated jump table subroutine) but that would be less efficient. + // Therefore, let's store the executing address to the program counter before jumping. + #[cfg(target_os = "macos")] + { + let label_start = self.asm.create_label(); + self.asm.push(lea_rip_label(TMP_REG, label_start)); + self.push(store( + RegSize::R64, + Self::vmctx_field(S::offset_table().next_native_program_counter), + TMP_REG, + )); + } + // TODO: This also could be more efficient. self.push(lea_rip_label(TMP_REG, self.jump_table_label)); self.push(push(conv_reg(base))); diff --git a/crates/polkavm/src/sandbox/generic.rs b/crates/polkavm/src/sandbox/generic.rs index 542276f6..54953438 100644 --- a/crates/polkavm/src/sandbox/generic.rs +++ b/crates/polkavm/src/sandbox/generic.rs @@ -415,6 +415,21 @@ unsafe extern "C" fn signal_handler(signal: c_int, info: &sys::siginfo_t, contex let rip = fetch_reg!(rip); let vmctx = &mut *vmctx; + + // On Rosetta 2, the JMP emulation logic doesn't work same as on x64. + // Instead of triggering a GPF immdiately, it jumps to that address and then trigger a PF. + // Therefore the original program counter is lost. + // We fix this problem by storing the program counter in vmctx before jumping. + // See jump_indirect_impl for more details. + #[cfg(target_os = "macos")] + { + let is_invalid_rip = (rip >> 48) != 0; + if is_invalid_rip { + log::trace!("Jump table invalid address hit, returning to host"); + trigger_exit(vmctx, ExitReason::Signal); + } + } + if vmctx.program_range.contains(&rip) { use polkavm_common::regmap::NativeReg; for reg in polkavm_common::program::Reg::ALL { From 17f8804ccaef00d31c021471f4601dc4c71cb379 Mon Sep 17 00:00:00 2001 From: Aman Date: Mon, 22 Sep 2025 12:28:13 +0100 Subject: [PATCH 3/3] ci: Run generic-sandbox tests in CI on aarch64 MacOS using Rosetta 2 Signed-off-by: Aman --- ci/jobs/build-and-test-linux.sh | 6 ++++++ ci/jobs/build-and-test-macos.sh | 6 ++++++ ci/jobs/build-and-test.sh | 6 ------ 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/ci/jobs/build-and-test-linux.sh b/ci/jobs/build-and-test-linux.sh index aac454c8..fa28d130 100755 --- a/ci/jobs/build-and-test-linux.sh +++ b/ci/jobs/build-and-test-linux.sh @@ -24,6 +24,12 @@ POLKAVM_TRACE_EXECUTION=1 POLKAVM_ALLOW_INSECURE=1 POLKAVM_BACKEND=compiler POLK # echo ">> cargo run (examples, compiler, generic, x86_64-unknown-linux-gnu)" # POLKAVM_TRACE_EXECUTION=1 POLKAVM_ALLOW_INSECURE=1 POLKAVM_BACKEND=compiler POLKAVM_SANDBOX=generic cargo run --target=x86_64-unknown-linux-gnu -p hello-world-host +echo ">> cargo test (generic-sandbox)" +cargo test --features generic-sandbox -p polkavm -- \ + tests::compiler_generic_ \ + --skip tests::compiler_generic_memset_basic \ + --skip tests::compiler_generic_memset_with_dynamic_paging + echo ">> cargo check (polkatool, i686-unknown-linux-musl)" cargo check --target=i686-unknown-linux-musl -p polkatool diff --git a/ci/jobs/build-and-test-macos.sh b/ci/jobs/build-and-test-macos.sh index c6c13481..111a4829 100755 --- a/ci/jobs/build-and-test-macos.sh +++ b/ci/jobs/build-and-test-macos.sh @@ -14,3 +14,9 @@ POLKAVM_TRACE_EXECUTION=1 POLKAVM_ALLOW_INSECURE=1 POLKAVM_BACKEND=interpreter c echo ">> cargo run (examples, interpreter, aarch64-apple-darwin)" POLKAVM_TRACE_EXECUTION=1 POLKAVM_ALLOW_INSECURE=1 POLKAVM_BACKEND=interpreter cargo run --target=aarch64-apple-darwin -p hello-world-host + +echo ">> cargo test (generic-sandbox)" +cargo test --features generic-sandbox -p polkavm -- \ + tests::compiler_generic_ \ + --skip tests::compiler_generic_memset_basic \ + --skip tests::compiler_generic_memset_with_dynamic_paging diff --git a/ci/jobs/build-and-test.sh b/ci/jobs/build-and-test.sh index a631f958..7457aabf 100755 --- a/ci/jobs/build-and-test.sh +++ b/ci/jobs/build-and-test.sh @@ -30,12 +30,6 @@ do cd tools/benchtool && cargo test --profile $PROFILE && cd ../.. done -echo ">> cargo test (generic-sandbox)" -cargo test --features generic-sandbox -p polkavm -- \ - tests::compiler_generic_ \ - --skip tests::compiler_generic_memset_basic \ - --skip tests::compiler_generic_memset_with_dynamic_paging - echo ">> cargo run (examples)" POLKAVM_TRACE_EXECUTION=1 POLKAVM_ALLOW_INSECURE=1 cargo run -p hello-world-host