diff --git a/cranelift/codegen/meta/src/shared/settings.rs b/cranelift/codegen/meta/src/shared/settings.rs index f632280145c8..b51b957a38d4 100644 --- a/cranelift/codegen/meta/src/shared/settings.rs +++ b/cranelift/codegen/meta/src/shared/settings.rs @@ -118,6 +118,24 @@ pub(crate) fn define() -> SettingGroup { false, ); + settings.add_bool( + "ensure_precise_store_traps", + "Enable transform to ensure precise store traps in the presence of store-tearing hardware.", + r#" + This inserts a load before every store so that hardware that "tears" stores when + part of the stored address range would fault due to a not-present page does not do so. + This can be useful to ensure precise post-trap memory state: for example, a Wasm guest + that traps on an unaligned partially-out-of-bounds store might otherwise produce + incorrect final memory state on hardware that performs the in-bounds part of the store + before trapping. + + See also: + * https://github.com/WebAssembly/design/issues/1490 + * https://github.com/bytecodealliance/wasmtime/issues/7237 + "#, + false, + ); + settings.add_bool( "enable_pinned_reg", "Enable the use of the pinned register.", diff --git a/cranelift/codegen/src/context.rs b/cranelift/codegen/src/context.rs index c5efafaee1f1..f212f74be911 100644 --- a/cranelift/codegen/src/context.rs +++ b/cranelift/codegen/src/context.rs @@ -20,6 +20,7 @@ use crate::legalizer::simple_legalize; use crate::loop_analysis::LoopAnalysis; use crate::machinst::{CompiledCode, CompiledCodeStencil}; use crate::nan_canonicalization::do_nan_canonicalization; +use crate::precise_store_traps::do_precise_store_traps; use crate::remove_constant_phis::do_remove_constant_phis; use crate::result::{CodegenResult, CompileResult}; use crate::settings::{FlagsOrIsa, OptLevel}; @@ -176,6 +177,9 @@ impl Context { if isa.flags().enable_nan_canonicalization() { self.canonicalize_nans(isa)?; } + if isa.flags().ensure_precise_store_traps() { + self.ensure_precise_store_traps(isa)?; + } self.legalize(isa)?; @@ -286,6 +290,13 @@ impl Context { self.verify_if(isa) } + /// Translate all stores to load-then-store pairs to ensure + /// precise traps on store-tearing hardware. + pub fn ensure_precise_store_traps(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> { + do_precise_store_traps(&mut self.func); + self.verify_if(isa) + } + /// Run the legalizer for `isa` on the function. pub fn legalize(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> { // Legalization invalidates the domtree and loop_analysis by mutating the CFG. diff --git a/cranelift/codegen/src/lib.rs b/cranelift/codegen/src/lib.rs index 7b8ea499bfc2..6071b4b46c4f 100644 --- a/cranelift/codegen/src/lib.rs +++ b/cranelift/codegen/src/lib.rs @@ -77,6 +77,7 @@ mod iterators; mod legalizer; mod nan_canonicalization; mod opts; +mod precise_store_traps; mod remove_constant_phis; mod result; mod scoped_hash_map; diff --git a/cranelift/codegen/src/precise_store_traps.rs b/cranelift/codegen/src/precise_store_traps.rs new file mode 100644 index 000000000000..add385915223 --- /dev/null +++ b/cranelift/codegen/src/precise_store_traps.rs @@ -0,0 +1,109 @@ +//! Precise-store-traps pass. +//! +//! On some instruction-set architectures, a store that crosses a page +//! boundary such that one of the pages would fault on a write can +//! sometimes still perform part of its memory update on the other +//! page. This becomes relevant, and problematic, when page +//! protections are load-bearing for Wasm VM semantics: see [this +//! issue] where a partially-out-of-bounds store in Wasm is currently +//! defined to perform no side-effect, but with a common lowering on +//! several ISAs and on some microarchitectures does actually perform +//! a "torn write". +//! +//! [this issue]: https://github.com/WebAssembly/design/issues/1490 +//! +//! This pass performs a transform on CLIF that should avoid "torn +//! partially-faulting stores" by performing a throwaway *load* before +//! every store, of the same size and to the same address. This +//! throwaway load will fault if the store would have faulted due to +//! not-present pages (this still does nothing for +//! readonly-page-faults). Because the load happens before the store +//! in program order, if it faults, any ISA that guarantees precise +//! exceptions (all ISAs that we support) will ensure that the store +//! has no side-effects. (Microarchitecturally, once the faulting +//! instruction retires, the later not-yet-retired entries in the +//! store buffer will be flushed.) +//! +//! This is not on by default and remains an "experimental" option +//! while the Wasm spec resolves this issue, and serves for now to +//! allow collecting data on overheads and experimenting on affected +//! machines. + +use crate::cursor::{Cursor, FuncCursor}; +use crate::ir::types::*; +use crate::ir::*; + +fn covering_type_for_value(func: &Function, value: Value) -> Type { + match func.dfg.value_type(value).bits() { + 8 => I8, + 16 => I16, + 32 => I32, + 64 => I64, + 128 => I8X16, + _ => unreachable!(), + } +} + +/// Perform the precise-store-traps transform on a function body. +pub fn do_precise_store_traps(func: &mut Function) { + let mut pos = FuncCursor::new(func); + while let Some(_block) = pos.next_block() { + while let Some(inst) = pos.next_inst() { + match &pos.func.dfg.insts[inst] { + &InstructionData::StackStore { + opcode: _, + arg: data, + stack_slot, + offset, + } => { + let ty = covering_type_for_value(&pos.func, data); + let _ = pos.ins().stack_load(ty, stack_slot, offset); + } + &InstructionData::DynamicStackStore { + opcode: _, + arg: data, + dynamic_stack_slot, + } => { + let ty = covering_type_for_value(&pos.func, data); + let _ = pos.ins().dynamic_stack_load(ty, dynamic_stack_slot); + } + &InstructionData::Store { + opcode, + args, + flags, + offset, + } => { + let (data, addr) = (args[0], args[1]); + let ty = match opcode { + Opcode::Store => covering_type_for_value(&pos.func, data), + Opcode::Istore8 => I8, + Opcode::Istore16 => I16, + Opcode::Istore32 => I32, + _ => unreachable!(), + }; + let _ = pos.ins().load(ty, flags, addr, offset); + } + &InstructionData::StoreNoOffset { + opcode: Opcode::AtomicStore, + args, + flags, + } => { + let (data, addr) = (args[0], args[1]); + let ty = covering_type_for_value(&pos.func, data); + let _ = pos.ins().atomic_load(ty, flags, addr); + } + &InstructionData::AtomicCas { .. } | &InstructionData::AtomicRmw { .. } => { + // Nothing: already does a read before the write. + } + &InstructionData::NullAry { + opcode: Opcode::Debugtrap, + } => { + // Marked as `can_store`, but no concerns here. + } + inst => { + assert!(!inst.opcode().can_store()); + } + } + } + } +} diff --git a/cranelift/filetests/filetests/egraph/precise-store-traps.clif b/cranelift/filetests/filetests/egraph/precise-store-traps.clif new file mode 100644 index 000000000000..9881651121f3 --- /dev/null +++ b/cranelift/filetests/filetests/egraph/precise-store-traps.clif @@ -0,0 +1,41 @@ +test optimize +set opt_level=speed +set ensure_precise_store_traps=true +target x86_64 + +function %f0(i64) { +block0(v0: i64): + v1 = iconst.i64 0 + store.i64 v1, v0 + return +} + +; check: load.i64 v0 +; check: store v1, v0 + +function %f1(i64, i8x16) { +block0(v0: i64, v1: i8x16): + store.i64 v1, v0 + return +} + +; check: load.i8x16 v0 +; check: store v1, v0 + +function %f2(i64, i64) { +block0(v0: i64, v1: i64): + istore8 v1, v0 + return +} + +; check: load.i8 v0 +; check: istore8 v1, v0 + +function %f3(i64, i64) { +block0(v0: i64, v1: i64): + atomic_store.i64 v1, v0 + return +} + +; check: atomic_load.i64 v0 +; check: atomic_store v1, v0 diff --git a/crates/wasmtime/src/engine.rs b/crates/wasmtime/src/engine.rs index 0e6f16cf2ab8..6e2f2e189687 100644 --- a/crates/wasmtime/src/engine.rs +++ b/crates/wasmtime/src/engine.rs @@ -314,6 +314,7 @@ impl Engine { "enable_heap_access_spectre_mitigation" | "enable_table_access_spectre_mitigation" | "enable_nan_canonicalization" + | "ensure_precise_store_traps" | "enable_jump_tables" | "enable_float" | "enable_verifier"