Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Global Store #128

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Global Store
george-cosma committed Feb 17, 2025
commit 8b71675ae7a7704605210607a9e008c6dc29e691
37 changes: 24 additions & 13 deletions src/execution/execution_info.rs
Original file line number Diff line number Diff line change
@@ -12,18 +12,29 @@ pub struct ExecutionInfo<'r> {
pub name: String,
pub wasm_bytecode: &'r [u8],
pub wasm_reader: WasmReader<'r>,
pub fn_types: Vec<FuncType>,
pub store: Store,
}

impl<'r> ExecutionInfo<'r> {
pub fn new(name: &str, wasm_bytecode: &'r [u8], fn_types: Vec<FuncType>, store: Store) -> Self {
ExecutionInfo {
name: name.to_string(),
wasm_bytecode,
wasm_reader: WasmReader::new(wasm_bytecode),
fn_types,
store,
}
}
pub functions: Vec<usize>,
pub functions_offset: usize,
pub imported_functions_len: usize,

pub memories: Vec<usize>,
pub memories_offset: usize,
pub imported_memories_len: usize,

pub globals: Vec<usize>,
pub globals_offset: usize,
pub imported_globals_len: usize,

pub tables: Vec<usize>,
pub tables_offset: usize,
pub imported_tables_len: usize,

pub data: Vec<usize>,
pub data_offset: usize,

pub elements: Vec<usize>,
pub elements_offset: usize,

pub passive_element_indexes: Vec<usize>,
// pub exports: Vec<usize>,
}
2 changes: 1 addition & 1 deletion src/execution/mod.rs
Original file line number Diff line number Diff line change
@@ -735,7 +735,7 @@ where
let exports = validation_info.exports.clone();
Ok(Store {
funcs: function_instances,
mems: memory_instances,
memories: memory_instances,
globals: global_instances,
data: data_sections,
tables,
345 changes: 339 additions & 6 deletions src/execution/store.rs
Original file line number Diff line number Diff line change
@@ -3,29 +3,362 @@ use alloc::vec;
use alloc::vec::Vec;
use core::iter;

use crate::core::error::{Proposal, Result, StoreInstantiationError};
use crate::core::indices::TypeIdx;
use crate::core::reader::span::Span;
use crate::core::reader::types::export::Export;
use crate::core::reader::types::element::{ElemItems, ElemMode};
use crate::core::reader::types::global::Global;
use crate::core::reader::types::import::ImportDesc;
use crate::core::reader::types::{MemType, TableType, ValType};
use crate::core::reader::WasmReader;
use crate::core::sidetable::Sidetable;
use crate::execution::value::{Ref, Value};
use crate::RefType;
use crate::execution::{get_address_offset, run_const, run_const_span, Stack};
use crate::value::{ExternAddr, FuncAddr};
use crate::{Error, RefType, ValidationInfo};

use super::execution_info::ExecutionInfo;
use super::UnwrapValidatedExt;

/// The store represents all global state that can be manipulated by WebAssembly programs. It
/// consists of the runtime representation of all instances of functions, tables, memories, and
/// globals, element segments, and data segments that have been allocated during the life time of
/// the abstract machine.
/// <https://webassembly.github.io/spec/core/exec/runtime.html#store>
pub struct Store {
pub funcs: Vec<FuncInst>,
pub mems: Vec<MemInst>,
pub functions: Vec<FuncInst>,
pub memories: Vec<MemInst>,
pub globals: Vec<GlobalInst>,
pub data: Vec<DataInst>,
pub tables: Vec<TableInst>,
pub elements: Vec<ElemInst>,
pub passive_elem_indexes: Vec<usize>,
pub exports: Vec<Export>,
// pub exports: Vec<Export>,
}

impl<'b> Store {
pub fn add_module(
&mut self,
name: String,
module: ValidationInfo<'b>,
) -> Result<ExecutionInfo<'b>> {
// TODO: we can do validation at linktime such that if another module expects module `name` to export something,
// and it doesn't, we can reject it here instead of accepting it and failing later.

let function_inst = module.instantiate_functions()?;
let mut table_inst = module.instantiate_tables()?;
let (element_inst, passive_idxs) = module.instantiate_elements(&mut table_inst)?;
let mut memories = module.instantiate_memories()?;
let data = module.instantiate_data(&mut memories)?;
let globals = module.instantiate_globals()?;

let imported_functions = function_inst
.iter()
.filter(|func| matches!(func, FuncInst::Imported(_)))
.count();
let imported_memories = 0; // TODO: not yet supported
let imported_globals = 0; // TODO: not yet supported
let imported_tables = 0; // TODO: not yet supported

let functions_offset = self.functions.len();
let exec_functions = (functions_offset..(functions_offset + function_inst.len())).collect();
self.functions.extend(function_inst);

let memories_offset = self.memories.len();
let exec_memories = (memories_offset..(memories_offset + memories.len())).collect();
self.memories.extend(memories);

let globals_offset = self.globals.len();
let exec_globals = (globals_offset..(globals_offset + globals.len())).collect();
self.globals.extend(globals);

let data_offset = self.data.len();
let exec_data = (data_offset..(data_offset + data.len())).collect();
self.data.extend(data);

let tables_offset = self.tables.len();
let exec_tables = (tables_offset..(tables_offset + table_inst.len())).collect();
self.tables.extend(table_inst);

let elements_offset = self.elements.len();
let exec_elements = (elements_offset..(elements_offset + element_inst.len())).collect();
self.elements.extend(element_inst);

let execution_info = ExecutionInfo {
name,
wasm_bytecode: module.wasm,
wasm_reader: WasmReader::new(module.wasm),

functions: exec_functions,
functions_offset,
imported_functions_len: imported_functions,

memories: exec_memories,
memories_offset,
imported_memories_len: imported_memories,

globals: exec_globals,
globals_offset,
imported_globals_len: imported_globals,

tables: exec_tables,
tables_offset,
imported_tables_len: imported_tables,

data: exec_data,
data_offset,

elements: exec_elements,
elements_offset,

passive_element_indexes: passive_idxs,
};

Ok(execution_info)
}
}

impl<'b> ValidationInfo<'b> {
pub fn instantiate_functions(&self) -> Result<Vec<FuncInst>> {
let mut wasm_reader = WasmReader::new(self.wasm);

let functions = self.functions.iter();
let func_blocks = self.func_blocks.iter();

let local_function_inst = functions.zip(func_blocks).map(|(ty, (func, sidetable))| {
wasm_reader
.move_start_to(*func)
.expect("function index to be in the bounds of the WASM binary");

let (locals, bytes_read) = wasm_reader
.measure_num_read_bytes(crate::code::read_declared_locals)
.unwrap_validated();

let code_expr = wasm_reader
.make_span(func.len() - bytes_read)
.expect("TODO remove this expect");

FuncInst::Local(LocalFuncInst {
ty: *ty,
locals,
code_expr,
// TODO figure out where we want our sidetables
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sidetables would ideally be a field of modules, and would be per-module with along with their source code. In our current implementation we still have them per function, they can stay this way for now

sidetable: sidetable.clone(),
})
});

let imported_function_inst = self.imports.iter().filter_map(|import| match &import.desc {
ImportDesc::Func(type_idx) => Some(FuncInst::Imported(ImportedFuncInst {
ty: *type_idx,
module_name: import.module_name.clone(),
function_name: import.name.clone(),
})),
_ => None,
});

Ok(imported_function_inst.chain(local_function_inst).collect())
}

pub fn instantiate_tables(&self) -> Result<Vec<TableInst>> {
Ok(self.tables.iter().map(|ty| TableInst::new(*ty)).collect())
}

pub fn instantiate_elements(
&self,
tables: &mut [TableInst],
) -> Result<(Vec<ElemInst>, Vec<usize>)> {
let mut passive_elem_indexes: Vec<usize> = vec![];
// https://webassembly.github.io/spec/core/syntax/modules.html#element-segments
let elements: Vec<ElemInst> = self
.elements
.iter()
.enumerate()
.filter_map(|(i, elem)| {
trace!("Instantiating element {:#?}", elem);

let offsets = match &elem.init {
ElemItems::Exprs(_ref_type, init_exprs) => init_exprs
.iter()
.map(|expr| {
get_address_offset(
run_const_span(self.wasm, expr, ()).unwrap_validated(),
)
})
.collect::<Vec<Option<u32>>>(),
ElemItems::RefFuncs(indicies) => {
// This branch gets taken when the elements are direct function references (i32 values), so we just return the indices
indicies
.iter()
.map(|el| Some(*el))
.collect::<Vec<Option<u32>>>()
}
};

let references: Vec<Ref> = offsets
.iter()
.map(|offset| {
let offset = offset.as_ref().map(|offset| *offset as usize);
match elem.ty() {
RefType::FuncRef => Ref::Func(FuncAddr::new(offset)),
RefType::ExternRef => Ref::Extern(ExternAddr::new(offset)),
}
})
.collect();

let instance = ElemInst {
ty: elem.ty(),
references,
};

match &elem.mode {
// As per https://webassembly.github.io/spec/core/syntax/modules.html#element-segments
// A declarative element segment is not available at runtime but merely serves to forward-declare
// references that are formed in code with instructions like `ref.func`

// Also, the answer given by Andreas Rossberg (the editor of the WASM Spec - Release 2.0)
// Per https://stackoverflow.com/questions/78672934/what-is-the-purpose-of-a-wasm-declarative-element-segment
// "[...] The reason Wasm requires this (admittedly ugly) forward declaration is to support streaming compilation [...]"
ElemMode::Declarative => None,
ElemMode::Passive => {
passive_elem_indexes.push(i);
Some(instance)
}
ElemMode::Active(active_elem) => {
let table_idx = active_elem.table_idx as usize;

let offset = match run_const_span(self.wasm, &active_elem.init_expr, ())
.unwrap_validated()
{
Value::I32(offset) => offset as usize,
// We are already asserting that on top of the stack there is an I32 at validation time
_ => unreachable!(),
};

let table = &mut tables[table_idx];
// This can't be verified at validation-time because we don't keep track of actual values when validating expressions
// we only keep track of the type of the values. As such we can't pop the exact value of an i32 from the validation stack
assert!(table.len() >= (offset + instance.len()));

table.elem[offset..offset + instance.references.len()]
.copy_from_slice(&instance.references);

Some(instance)
}
}
})
.collect();

Ok((elements, passive_elem_indexes))
}

pub fn instantiate_memories(&self) -> Result<Vec<MemInst>> {
let memories: Vec<MemInst> = self.memories.iter().map(|ty| MemInst::new(*ty)).collect();

let import_memory_instances_len = self
.imports
.iter()
.filter(|import| matches!(import.desc, ImportDesc::Mem(_)))
.count();

match memories.len().checked_add(import_memory_instances_len) {
None => {
return Err(Error::StoreInstantiationError(
StoreInstantiationError::TooManyMemories(usize::MAX),
))
}
Some(mem_instances) => {
if mem_instances > 1 {
return Err(Error::UnsupportedProposal(Proposal::MultipleMemories));
}
}
};

Ok(memories)
}

pub fn instantiate_data(&self, memory_instances: &mut [MemInst]) -> Result<Vec<DataInst>> {
self.data
.iter()
.map(|d| {
use crate::core::reader::types::data::DataMode;
use crate::NumType;
if let DataMode::Active(active_data) = d.mode.clone() {
let mem_idx = active_data.memory_idx;
if mem_idx != 0 {
todo!("Active data has memory_idx different than 0");
}
assert!(
memory_instances.len() > mem_idx,
"Multiple memories not yet supported"
);

let boxed_value = {
let mut wasm = WasmReader::new(self.wasm);
wasm.move_start_to(active_data.offset).unwrap_validated();
let mut stack = Stack::new();
run_const(wasm, &mut stack, ());
stack.pop_value(ValType::NumType(NumType::I32))
// stack.peek_unknown_value().ok_or(MissingValueOnTheStack)?
};

// TODO: this shouldn't be a simple value, should it? I mean it can't be, but it can also be any type of ValType
// TODO: also, do we need to forcefully make it i32?
let offset: u32 = match boxed_value {
Value::I32(val) => val,
// Value::I64(val) => {
// if val > u32::MAX as u64 {
// return Err(I64ValueOutOfReach("data segment".to_owned()));
// }
// val as u32
// }
// TODO: implement all value types
_ => todo!(),
};

let mem_inst = memory_instances.get_mut(mem_idx).unwrap();

let len = mem_inst.data.len();
if offset as usize + d.init.len() > len {
return Err(Error::StoreInstantiationError(
StoreInstantiationError::ActiveDataWriteOutOfBounds,
));
}
let data = mem_inst
.data
.get_mut(offset as usize..offset as usize + d.init.len())
.unwrap();
data.copy_from_slice(&d.init);
}
Ok(DataInst {
data: d.init.clone(),
})
})
.collect::<Result<Vec<DataInst>>>()
}

pub fn instantiate_globals(&self) -> Result<Vec<GlobalInst>> {
Ok(self
.globals
.iter()
.map({
let mut stack = Stack::new();
move |global| {
let mut wasm = WasmReader::new(self.wasm);
// The place we are moving the start to should, by all means, be inside the wasm bytecode.
wasm.move_start_to(global.init_expr).unwrap_validated();
// We shouldn't need to clear the stack. If validation is correct, it will remain empty after execution.

// TODO: imported globals
run_const(wasm, &mut stack, ());
let value = stack.pop_value(global.ty.ty);

GlobalInst {
global: *global,
value,
}
}
})
.collect())
}
}

#[derive(Debug)]