Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions crates/cranelift/src/compiler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,8 @@ impl wasmtime_environ::Compiler for Compiler {
}
}
let FunctionBodyData { validator, body } = input;
// Set the function body offset for branch hint lookup
func_env.func_body_offset = body.get_binary_reader().original_position();
let mut validator =
validator.into_validator(mem::take(&mut compiler.cx.validator_allocations));
compiler.cx.func_translator.translate_body(
Expand Down
34 changes: 34 additions & 0 deletions crates/cranelift/src/func_environ.rs
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,10 @@ pub struct FuncEnvironment<'module_environment> {
/// The stack-slot used for exposing Wasm state via debug
/// instrumentation, if any, and the builder containing its metadata.
pub(crate) state_slot: Option<(ir::StackSlot, FrameStateSlotBuilder)>,

/// The byte offset of the current function body in the wasm module.
/// Used to convert absolute srcloc offsets to relative offsets for branch hint lookup.
pub(crate) func_body_offset: usize,
}

impl<'module_environment> FuncEnvironment<'module_environment> {
Expand Down Expand Up @@ -277,6 +281,8 @@ impl<'module_environment> FuncEnvironment<'module_environment> {
stack_switching_values_buffer: None,

state_slot: None,

func_body_offset: 0,
}
}

Expand Down Expand Up @@ -1203,6 +1209,34 @@ impl<'module_environment> FuncEnvironment<'module_environment> {
self.needs_gc_heap
}

/// Get a branch hint for the current function at the given absolute byte offset.
///
/// The offset is converted to a relative offset from the function body start
/// before looking up in the branch hints map.
///
/// Returns `Some(true)` if the branch is likely taken, `Some(false)` if unlikely taken,
/// or `None` if no hint exists for this offset.
pub fn get_branch_hint(&self, absolute_offset: usize) -> Option<bool> {
// Extract the DefinedFuncIndex from the current function's key
let def_func_index = match self.key {
FuncKey::DefinedWasmFunction(_, def_func_index) => def_func_index,
_ => return None,
};
// Convert absolute offset to relative offset from function body start
let relative_offset = absolute_offset.checked_sub(self.func_body_offset)?;
// Convert to full FuncIndex to look up in branch_hints
let func_index = self.module.func_index(def_func_index);
self.translation
.branch_hints
.get(&func_index.as_u32())
.and_then(|hints| {
hints
.iter()
.find(|(o, _)| *o as usize == relative_offset)
.map(|(_, taken)| *taken)
})
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This loop I think might be good to optimize in terms of translation will only lookup branch hints (I think?) in increasing order of offsets. That means that currently this code is an $O(n^2)$ loop as-implemented. That could be optimized by using a binary search here, but I think this could go one step further and, ideally, store a reference to the raw buffer of input wasm data (in theory) here. For example this could be a .peekable() iterator over the raw wasm itself. Looking up via get_branch_hint would advance the iterator if it's at the matching position.

Regardless I think it'll be needed to remove the quadratic behavior here, and I think ideally it'd be via an iterator-like approach to avoid the logarithmic nature of a binary search.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, you are right.

How about bc62088 for O(n) complexity?

Copy link
Member

@cfallin cfallin Feb 3, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What Alex is getting at, I think, is that the spec states that the hints are given in PC order. So why not traverse hints in order, taking them when they match as we visit increasing PC offsets? We should be able to that in O(n) time with O(1) space overhead, i.e., not building a HashMap at all.

}

/// Get the number of Wasm parameters for the given function.
pub(crate) fn num_params_for_func(&self, function_index: FuncIndex) -> usize {
let ty = self.module.functions[function_index]
Expand Down
14 changes: 14 additions & 0 deletions crates/cranelift/src/translate/code_translator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3965,9 +3965,23 @@ fn translate_br_if(
builder: &mut FunctionBuilder,
env: &mut FuncEnvironment<'_>,
) {
// Check for branch hints before borrowing env mutably.
// The srcloc contains the byte offset of the current instruction.
let offset = builder.srcloc().bits() as usize;
let branch_hint = env.get_branch_hint(offset);

let val = env.stacks.pop1();
let (br_destination, inputs) = translate_br_if_args(relative_depth, env);
let next_block = builder.create_block();

if let Some(likely) = branch_hint {
if likely {
builder.set_cold_block(next_block);
} else {
builder.set_cold_block(br_destination);
}
}

canonicalise_brif(builder, val, br_destination, inputs, next_block, &[]);

builder.seal_block(next_block); // The only predecessor is the current block.
Expand Down
19 changes: 19 additions & 0 deletions crates/environ/src/compile/module_environ.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,13 @@ pub struct ModuleTranslation<'data> {
/// The type information of the current module made available at the end of the
/// validation process.
types: Option<Types>,

/// Branch hints parsed from the `metadata.code.branch_hint` custom section.
///
/// Maps function index to a list of (func_offset, taken) pairs where
/// func_offset is the byte offset within the function body and taken
/// indicates whether the branch is likely to be taken.
pub branch_hints: HashMap<u32, Vec<(u32, bool)>>,
}

impl<'data> ModuleTranslation<'data> {
Expand All @@ -130,6 +137,7 @@ impl<'data> ModuleTranslation<'data> {
total_passive_data: 0,
code_index: 0,
types: None,
branch_hints: HashMap::default(),
}
}

Expand Down Expand Up @@ -740,6 +748,17 @@ and for re-adding support for interface types you can see this issue:
log::warn!("failed to parse name section {e:?}");
}
}
KnownCustom::BranchHints(reader) => {
for func_hints in reader.into_iter().flatten() {
let mut hints = Vec::new();
for hint in func_hints.hints.into_iter().flatten() {
hints.push((hint.func_offset, hint.taken));
}
if !hints.is_empty() {
self.result.branch_hints.insert(func_hints.func, hints);
}
}
}
_ => {
let name = section.name().trim_end_matches(".dwo");
if name.starts_with(".debug_") {
Expand Down
5 changes: 4 additions & 1 deletion docs/stability-wasm-proposals.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,17 +84,20 @@ The emoji legend is:

| Proposal | Phase 4 | Tests | Finished | Fuzzed | API | C API |
|-----------------------------|---------|-------|----------|--------|-----|-------|
| [`branch-hinting`] [^12] | ❌ | ❌ | ✅ | ❌ | ✅ | ✅ |
| [`stack-switching`] [^11] | ❌ | 🚧 | 🚧 | ❌ | ❌ | ❌ |

[^11]: The stack-switching proposal is a work-in-progress being tracked
at [#9465](https://github.com/bytecodealliance/wasmtime/issues/9465).
Currently the implementation is only for x86\_64 Linux.
[^12]: Branch hinting is implemented by parsing the `metadata.code.branch_hint`
custom section and marking cold blocks in Cranelift for optimization.
No configuration is required; hints are automatically used when present.

## Unimplemented proposals

| Proposal | Tracking Issue |
|-------------------------------|----------------|
| [`branch-hinting`] | [#9463](https://github.com/bytecodealliance/wasmtime/issues/9463) |
| [`flexible-vectors`] | [#9464](https://github.com/bytecodealliance/wasmtime/issues/9464) |
| [`memory-control`] | [#9467](https://github.com/bytecodealliance/wasmtime/issues/9467) |
| [`shared-everything-threads`] | [#9466](https://github.com/bytecodealliance/wasmtime/issues/9466) |
Expand Down
76 changes: 76 additions & 0 deletions tests/disas/branch-hints.wat
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
;;! target = "x86_64"
;;! test = "optimize"

;; Test that branch hints from the `metadata.code.branch_hint` custom section
;; are used to mark cold blocks in the generated code.

(module
;; Test br_if with hint that branch is unlikely (not taken).
;; The branch target block should be marked cold.
(func $unlikely_branch (param i32) (result i32)
(block $target (result i32)
i32.const 0 ;; value to return if branch taken
local.get 0 ;; condition
(@metadata.code.branch_hint "\00")
br_if $target
;; Fallthrough path (likely)
drop
i32.const 42
)
)

;; Test br_if with hint that branch is likely (taken).
;; The fallthrough block should be marked cold.
(func $likely_branch (param i32) (result i32)
(block $target (result i32)
i32.const 0 ;; value to return if branch taken
local.get 0 ;; condition
(@metadata.code.branch_hint "\01")
br_if $target
;; Fallthrough path (unlikely, should be cold)
drop
i32.const 42
)
)
)
;; function u0:0(i64 vmctx, i64, i32) -> i32 tail {
;; gv0 = vmctx
;; gv1 = load.i64 notrap aligned readonly gv0+8
;; gv2 = load.i64 notrap aligned gv1+16
;; stack_limit = gv2
;;
;; block0(v0: i64, v1: i64, v2: i32):
;; @0043 v5 = iconst.i32 0
;; @0047 brif v2, block2(v5), block3 ; v5 = 0
;;
;; block3:
;; @004a v6 = iconst.i32 42
;; @004c jump block2(v6) ; v6 = 42
;;
;; block2(v4: i32) cold:
;; @004d jump block1(v4)
;;
;; block1(v3: i32):
;; @004d return v3
;; }
;;
;; function u0:1(i64 vmctx, i64, i32) -> i32 tail {
;; gv0 = vmctx
;; gv1 = load.i64 notrap aligned readonly gv0+8
;; gv2 = load.i64 notrap aligned gv1+16
;; stack_limit = gv2
;;
;; block0(v0: i64, v1: i64, v2: i32):
;; @0052 v5 = iconst.i32 0
;; @0056 brif v2, block2(v5), block3 ; v5 = 0
;;
;; block3 cold:
;; @0059 v6 = iconst.i32 42
;; @005b jump block2(v6) ; v6 = 42
;;
;; block2(v4: i32):
;; @005c jump block1(v4)
;;
;; block1(v3: i32):
;; @005c return v3
;; }