diff --git a/Cargo.lock b/Cargo.lock index 3ea6e49d0..31f8b304a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1182,8 +1182,10 @@ dependencies = [ name = "hash-vm" version = "0.1.0" dependencies = [ + "bitflags", "hash-abi", "hash-reporting", + "hash-repr", "hash-source", "hash-storage", "hash-utils", diff --git a/compiler/hash-abi/src/lib.rs b/compiler/hash-abi/src/lib.rs index b1b34b754..27fa6ab05 100644 --- a/compiler/hash-abi/src/lib.rs +++ b/compiler/hash-abi/src/lib.rs @@ -3,6 +3,7 @@ //! and to be able to call functions from other languages, but to also provide //! information to code generation backends about how values are represented. +use hash_ir::ty::ReprTyId; use hash_repr::{LayoutId, TyInfo}; use hash_storage::{new_store_key, store::statics::StoreId}; use hash_target::{ @@ -51,6 +52,9 @@ new_store_key!(pub FnAbiId, derives = Debug); /// them)? #[derive(Debug, Clone)] pub struct FnAbi { + /// The ID of the function ABI if any. + pub ty: ReprTyId, + /// All the types of the arguments in order, and how they should /// be passed to the function (as per convention). pub args: Box<[ArgAbi]>, diff --git a/compiler/hash-codegen-llvm/src/ctx.rs b/compiler/hash-codegen-llvm/src/ctx.rs index adafed419..272ab7ad7 100644 --- a/compiler/hash-codegen-llvm/src/ctx.rs +++ b/compiler/hash-codegen-llvm/src/ctx.rs @@ -10,10 +10,7 @@ use hash_codegen::{ target::{HasTarget, Target}, traits::{BackendTypes, HasCtxMethods}, }; -use hash_ir::{ - IrCtx, - ty::{InstanceId, ReprTyId, VariantIdx}, -}; +use hash_ir::{IrCtx, ty::ReprTyId}; use hash_pipeline::settings::CompilerSettings; use hash_source::constant::AllocId; use hash_utils::fxhash::FxHashMap; @@ -24,8 +21,6 @@ use llvm::{ values::FunctionValue, }; -use crate::translation::ty::TyMemoryRemap; - /// The [CodeGenCtx] is used a context for converting Hash IR into LLVM IR. It /// stores references to all of the required information about the IR, as well /// as several stores in order to reduce the amount of work that is required to @@ -59,15 +54,9 @@ pub struct CodeGenCtx<'b, 'm> { /// of pointers and pointer offsets. pub size_ty: llvm::types::IntType<'m>, - /// A mapping between [InstanceId]s to [FunctionValue]s in order + /// A mapping between [ReprTyId]s to [FunctionValue]s in order /// to avoid re-generating declaring instance references. - pub(crate) instances: RefCell>>, - - /// A collection of [TyMemoryRemap]s that have occurred for - /// all of the types that have been translated. Additionally, this is used - /// as a cache to avoid re-lowering [ReprTyId]s into the equivalent - /// LLVM types. - pub(crate) ty_remaps: RefCell), TyMemoryRemap<'m>>>, + pub(crate) instances: RefCell>>, /// A map which stores the created [AnyValueEnum]s for the constant /// strings [InternedStr] that have been created. @@ -111,7 +100,6 @@ impl<'b, 'm> CodeGenCtx<'b, 'm> { symbol_counter: Cell::new(0), size_ty, instances: RefCell::new(FxHashMap::default()), - ty_remaps: RefCell::new(FxHashMap::default()), str_consts: RefCell::new(FxHashMap::default()), global_consts: RefCell::new(FxHashMap::default()), intrinsics: RefCell::new(FxHashMap::default()), diff --git a/compiler/hash-codegen-llvm/src/lib.rs b/compiler/hash-codegen-llvm/src/lib.rs index d3caeca0f..3698c1a6f 100644 --- a/compiler/hash-codegen-llvm/src/lib.rs +++ b/compiler/hash-codegen-llvm/src/lib.rs @@ -223,7 +223,8 @@ impl<'b, 'm> LLVMBackend<'b> { } // Get the instance of the function. - let instance = body.metadata().ty().borrow().as_instance(); + let ty = body.metadata().ty(); + let instance = ty.borrow().as_instance(); // So, we create the mangled symbol name, and then call `predefine()` which // should create the function ABI from the instance, with the correct @@ -231,7 +232,7 @@ impl<'b, 'm> LLVMBackend<'b> { let symbol_name = compute_symbol_name(instance); let abis = self.codegen_storage.abis(); - let abi = abis.create_fn_abi_from_instance(ctx, instance); + let abi = abis.create_fn_abi_from_ty(ctx, ty); abis.map_fast(abi, |abi| { ctx.predefine_fn(instance, symbol_name.as_str(), abi); @@ -252,19 +253,19 @@ impl<'b, 'm> LLVMBackend<'b> { continue; } - // Get the instance of the function. - let instance = body.metadata().ty().borrow().as_instance(); - // @@ErrorHandling: we should be able to handle the error here - codegen_body::(instance, body, ctx).unwrap(); + codegen_body::(body, ctx).unwrap(); // Check if we should dump the generated LLVM IR + let ty = body.metadata().ty(); + let instance = ty.borrow().as_instance(); + if instance.borrow().has_attr(attrs::DUMP_LLVM_IR) { - // @@Messaging + // @@Messaging§ log::info!( "LLVM IR for function {}\n{}", body.meta.name(), - ctx.get_fn(instance).print_to_string().to_string() + ctx.get_fn(ty).print_to_string().to_string() ); } } diff --git a/compiler/hash-codegen-llvm/src/translation/layouts.rs b/compiler/hash-codegen-llvm/src/translation/layouts.rs index 5ce677fb8..2ee37c060 100644 --- a/compiler/hash-codegen-llvm/src/translation/layouts.rs +++ b/compiler/hash-codegen-llvm/src/translation/layouts.rs @@ -1,21 +1,16 @@ //! Implements all of the required methods for computing the layouts of types. use hash_codegen::{ - repr::{Layout, LayoutShape, TyInfo, Variants}, + repr::{Layout, TyInfo}, target::abi::AbiRepresentation, traits::layout::LayoutMethods, }; -use hash_ir::ty::ReprTyId; use hash_storage::store::statics::StoreId; -use super::{LLVMBuilder, ty::TyMemoryRemap}; +use super::LLVMBuilder; use crate::ctx::CodeGenCtx; impl<'b> LayoutMethods<'b> for CodeGenCtx<'b, '_> { - fn backend_field_index(&self, info: TyInfo, index: usize) -> u64 { - info.layout.map(|layout| layout.llvm_field_index(self, info.ty, index)) - } - fn is_backend_immediate(&self, info: TyInfo) -> bool { info.layout.map(|layout| layout.is_llvm_immediate()) } @@ -26,10 +21,6 @@ impl<'b> LayoutMethods<'b> for CodeGenCtx<'b, '_> { } impl<'b> LayoutMethods<'b> for LLVMBuilder<'_, 'b, '_> { - fn backend_field_index(&self, info: TyInfo, index: usize) -> u64 { - self.ctx.backend_field_index(info, index) - } - fn is_backend_immediate(&self, ty: TyInfo) -> bool { self.ctx.is_backend_immediate(ty) } @@ -40,14 +31,11 @@ impl<'b> LayoutMethods<'b> for LLVMBuilder<'_, 'b, '_> { } pub trait ExtendedLayoutMethods<'m> { - /// Compute the field index from the backend specific type. - fn llvm_field_index(&self, cx: &CodeGenCtx<'_, 'm>, ty: ReprTyId, index: usize) -> u64; - /// Check if this is type is represented as an immediate value. fn is_llvm_immediate(&self) -> bool; /// Returns true if this [Layout] ABI is represented as is a - /// [`AbiRepresentation::Pair(..)`] + /// [`AbiRepresentation::Pair`] fn is_llvm_scalar_pair(&self) -> bool; } @@ -63,40 +51,4 @@ impl<'m> ExtendedLayoutMethods<'m> for &Layout { fn is_llvm_scalar_pair(&self) -> bool { matches!(self.abi, AbiRepresentation::Pair(..)) } - - fn llvm_field_index(&self, ctx: &CodeGenCtx<'_, 'm>, ty: ReprTyId, index: usize) -> u64 { - // Field index of scalar and scalar pairs is not applicable since - // it is handled else where. - match self.abi { - AbiRepresentation::Scalar(_) | AbiRepresentation::Pair(..) => { - panic!("cannot get field index of scalar or scalar pair") - } - _ => {} - }; - - match self.shape { - LayoutShape::Primitive | LayoutShape::Union { .. } => { - panic!("cannot get field index of primitive or union") - } - LayoutShape::Array { .. } => index as u64, - - // Here, we have to rely on the re-mapped version of the layout since - // we had to adjust it to account for all of the padding that was added - // to the struct/aggregate. - LayoutShape::Aggregate { .. } => { - let variant_index = match self.variants { - Variants::Single { index } => Some(index), - Variants::Multiple { .. } => None, - }; - - match ctx.ty_remaps.borrow().get(&(ty, variant_index)) { - Some(TyMemoryRemap { remap: Some(remap), .. }) => remap[index] as u64, - Some(TyMemoryRemap { remap: None, .. }) => { - self.shape.memory_index(index) as u64 - } - None => panic!("cannot find remapped layout for `{}`", ty), - } - } - } - } } diff --git a/compiler/hash-codegen-llvm/src/translation/misc.rs b/compiler/hash-codegen-llvm/src/translation/misc.rs index 96495dc37..3d1824658 100644 --- a/compiler/hash-codegen-llvm/src/translation/misc.rs +++ b/compiler/hash-codegen-llvm/src/translation/misc.rs @@ -7,7 +7,7 @@ use hash_codegen::{ target::HasTarget, traits::{HasCtxMethods, misc::MiscBuilderMethods, ty::TypeBuilderMethods}, }; -use hash_ir::ty::{InstanceHelpers, InstanceId}; +use hash_ir::ty::{InstanceHelpers, InstanceId, ReprTyId}; use hash_storage::store::{Store, statics::StoreId}; use inkwell::{ GlobalVisibility, @@ -20,19 +20,20 @@ use crate::ctx::CodeGenCtx; impl<'m> CodeGenCtx<'_, 'm> { /// Generate code for a reference to a function or method item. The - /// [Instance] specifies the function reference to generate, and any + /// [InstanceId] specifies the function reference to generate, and any /// attributes that need to be applied to the function. If the function /// has already been generated, a reference will be returned from the /// cache. - pub fn get_fn_or_create_ref(&self, instance: InstanceId) -> FunctionValue<'m> { + pub fn get_fn_or_create_ref(&self, ty: ReprTyId) -> FunctionValue<'m> { // First check if we have already created the function reference... - if let Some(fn_val) = self.instances.borrow().get(&instance) { + if let Some(fn_val) = self.instances.borrow().get(&ty) { return *fn_val; } + let instance = ty.borrow().as_instance(); let name = compute_symbol_name(instance); let abis = self.cg_ctx().abis(); - let fn_abi = abis.create_fn_abi_from_instance(self, instance); + let fn_abi = abis.create_fn_abi_from_ty(self, ty); // See if this item has already been declared in the module let func = if let Some(func) = self.module.get_function(name.as_str()) { @@ -55,25 +56,25 @@ impl<'m> CodeGenCtx<'_, 'm> { // We insert the function into the cache so that we can // reference it later on... - self.instances.borrow_mut().insert(instance, func); + self.instances.borrow_mut().insert(ty, func); func } } impl<'b> MiscBuilderMethods<'b> for CodeGenCtx<'b, '_> { - fn get_fn(&self, instance: InstanceId) -> Self::Function { - self.get_fn_or_create_ref(instance) + fn get_fn(&self, ty: ReprTyId) -> Self::Function { + self.get_fn_or_create_ref(ty) } - fn get_fn_ptr(&self, instance: InstanceId) -> Self::Value { - self.get_fn_or_create_ref(instance).as_any_value_enum() + fn get_fn_ptr(&self, ty: ReprTyId) -> Self::Value { + self.get_fn_or_create_ref(ty).as_any_value_enum() } - fn get_fn_addr(&self, instance: InstanceId) -> Self::Value { + fn get_fn_addr(&self, ty: ReprTyId) -> Self::Value { // @@Inkwell: PointerValue(..).as_any_value_enum() is bugged AnyValueEnum::PointerValue( - self.get_fn_or_create_ref(instance).as_global_value().as_pointer_value(), + self.get_fn_or_create_ref(ty).as_global_value().as_pointer_value(), ) } @@ -117,6 +118,6 @@ impl<'b> MiscBuilderMethods<'b> for CodeGenCtx<'b, '_> { // We insert the function into the cache so that we can // reference it later on... - self.instances.borrow_mut().insert(instance, decl); + self.instances.borrow_mut().insert(fn_abi.ty, decl); } } diff --git a/compiler/hash-codegen-llvm/src/translation/mod.rs b/compiler/hash-codegen-llvm/src/translation/mod.rs index 3419b823a..8801b7baa 100644 --- a/compiler/hash-codegen-llvm/src/translation/mod.rs +++ b/compiler/hash-codegen-llvm/src/translation/mod.rs @@ -37,7 +37,7 @@ pub(crate) const EMPTY_NAME: *const c_char = EMPTY_C_STR.as_ptr(); /// A [Builder] is defined as being a context that is used to implement /// all of the specified builder methods. pub struct LLVMBuilder<'a, 'b, 'm> { - /// The actual InkWell builder + /// The actual Inkwell builder. pub(crate) builder: inkwell::builder::Builder<'m>, /// The context for the builder. diff --git a/compiler/hash-codegen-llvm/src/translation/ty.rs b/compiler/hash-codegen-llvm/src/translation/ty.rs index a6e5c8217..c4f934a7d 100644 --- a/compiler/hash-codegen-llvm/src/translation/ty.rs +++ b/compiler/hash-codegen-llvm/src/translation/ty.rs @@ -251,23 +251,6 @@ impl<'b> TypeBuilderMethods<'b> for CodeGenCtx<'b, '_> { } } -/// A [TyMemoryRemap] is a type that is used to represent the occurred -/// memory field re-mapping that occurs when lowering a type to LLVM. -/// This re-mapping originates from the fact that "padding" within types -/// now becomes a concrete type, and thus the memory layout of the type -/// changes if padding slots are inserted. If the type had any re-maps, -/// then the [TyMemoryRemap] will contain a `remap` field with the -/// new memory to source field mapping. -pub(crate) struct TyMemoryRemap<'m> { - /// The lowered type. - pub ty: AnyTypeEnum<'m>, - - /// If the type was re-mapped, this is a reference - /// to the new memory map which should be used over the - /// one that is stored in the [LayoutShape] of a [Layout]. - pub remap: Option>, -} - /// Define a trait that provides additional methods on the [CodeGenCtx] /// for computing types as LLVM types, and various other related LLVM /// specific type utilities. @@ -296,20 +279,7 @@ pub(crate) trait ExtendedTyBuilderMethods<'m> { impl<'m> ExtendedTyBuilderMethods<'m> for TyInfo { fn llvm_ty(&self, ctx: &CodeGenCtx<'_, 'm>) -> llvm::types::AnyTypeEnum<'m> { - let (abi, variant_index) = self.layout.map(|layout| { - let variant_index = match &layout.variants { - Variants::Single { index } => Some(*index), - _ => None, - }; - - (layout.abi, variant_index) - }); - - // Check the cache if we have already computed the lowered type - // for this ir-type. - if let Some(ty_remap) = ctx.ty_remaps.borrow().get(&(self.ty, variant_index)) { - return ty_remap.ty; - } + let abi = self.layout.map(|layout| layout.abi); match abi { AbiRepresentation::Scalar(scalar) => self.scalar_llvm_type_at(ctx, scalar), @@ -386,9 +356,9 @@ impl<'m> ExtendedTyBuilderMethods<'m> for TyInfo { ctx.type_array(field_ty, elements) } LayoutShape::Aggregate { .. } => { - let (ty, field_remapping) = match name { + match name { Some(ref name) => { - let (fields, packed, new_field_remapping) = + let (fields, packed, _) = create_and_pad_struct_fields_from_layout( ctx, *self, layout, ); @@ -404,24 +374,17 @@ impl<'m> ExtendedTyBuilderMethods<'m> for TyInfo { .collect::>(); ty.set_body(&fields, packed); - (ty.into(), new_field_remapping) + ty.into() } None => { - let (fields, packed, new_field_remapping) = + let (fields, packed, _) = create_and_pad_struct_fields_from_layout( ctx, *self, layout, ); - (ctx.type_struct(&fields, packed), new_field_remapping) + ctx.type_struct(&fields, packed) } - }; - - ctx.ty_remaps.borrow_mut().insert( - (self.ty, variant_index), - TyMemoryRemap { ty, remap: field_remapping }, - ); - - ty + } } } }) diff --git a/compiler/hash-codegen-vm/src/ctx.rs b/compiler/hash-codegen-vm/src/ctx.rs index 8c7747289..5cbafedd6 100644 --- a/compiler/hash-codegen-vm/src/ctx.rs +++ b/compiler/hash-codegen-vm/src/ctx.rs @@ -6,8 +6,13 @@ use hash_codegen::{ target::{HasTarget, Target}, traits::{BackendTypes, HasCtxMethods}, }; -use hash_ir::IrCtx; +use hash_ir::{ + IrCtx, + ir::Const, + ty::{InstanceId, ReprTyId}, +}; use hash_pipeline::settings::CompilerSettings; +use hash_vm::{builder, bytecode::LabelOffset}; /// The [Ctx] is used a context for converting Hash IR into bytecode. It /// stores references to all of the required information about the IR, as well @@ -26,6 +31,9 @@ pub struct Ctx<'a> { /// Store for all of the information about type [Layout]s. pub layouts: &'a LayoutStorage, + + /// The bytecode builder that is being used to build the bytecode. + pub builder: builder::BytecodeBuilder, } impl Ctx<'_> { @@ -36,16 +44,16 @@ impl Ctx<'_> { codegen_ctx: &'a CodeGenStorage, layouts: &'a LayoutStorage, ) -> Ctx<'a> { - Ctx { settings, ir_ctx, codegen_ctx, layouts } + Ctx { settings, ir_ctx, codegen_ctx, layouts, builder: builder::BytecodeBuilder::new() } } } /// Specification for `BackedTypes` for the [Ctx]. impl<'m> BackendTypes for Ctx<'m> { - type Value = (); - type Function = (); - type Type = (); - type BasicBlock = (); + type Value = Const; + type Function = InstanceId; + type Type = ReprTyId; + type BasicBlock = LabelOffset; type DebugInfoScope = (); type DebugInfoLocation = (); type DebugInfoVariable = (); diff --git a/compiler/hash-codegen-vm/src/lib.rs b/compiler/hash-codegen-vm/src/lib.rs index 73eadbd57..4e4f17747 100644 --- a/compiler/hash-codegen-vm/src/lib.rs +++ b/compiler/hash-codegen-vm/src/lib.rs @@ -16,6 +16,7 @@ use hash_pipeline::{interface::CompilerResult, settings::CompilerSettings, works use hash_source::SourceMapUtils; use hash_storage::store::statics::StoreId; use hash_utils::profiling::{HasMutMetrics, StageMetrics}; +use hash_vm::builder::FunctionBuilder; use crate::{ctx::Ctx, translation::VMBuilder}; @@ -73,11 +74,15 @@ impl<'b> VMBackend<'b> { continue; } - // Get the instance of the function. - let instance = body.metadata().ty().borrow().as_instance(); + // Create a new `FunctionBuilder` and register it to the given + // instance. + let ty = body.metadata().ty(); + let instance = ty.borrow().as_instance(); + let fn_builder = FunctionBuilder::new(instance); + ctx.builder.new_function(fn_builder); // @@ErrorHandling: we should be able to handle the error here - codegen_body::(instance, body, ctx).unwrap(); + codegen_body::(body, ctx).unwrap(); if instance.borrow().has_attr(attrs::DUMP_BYTECODE) { // @@Messaging diff --git a/compiler/hash-codegen-vm/src/translation/abi.rs b/compiler/hash-codegen-vm/src/translation/abi.rs index af2a04674..4368fbf57 100644 --- a/compiler/hash-codegen-vm/src/translation/abi.rs +++ b/compiler/hash-codegen-vm/src/translation/abi.rs @@ -1,33 +1,65 @@ //! This implements all of the ABI specified methods for the [Builder]. use hash_codegen::{abi::ArgAbi, lower::place::PlaceRef, traits::abi::AbiBuilderMethods}; +use hash_ir::ir::Const; use crate::translation::VMBuilder; -impl<'b> AbiBuilderMethods<'b> for VMBuilder<'b> { +impl<'b> AbiBuilderMethods<'b> for VMBuilder<'_, 'b> { fn get_param(&mut self, _index: usize) -> Self::Value { unimplemented!() } fn store_fn_call_arg( &mut self, - _arg_abi: &ArgAbi, - _value: Self::Value, - _destination: PlaceRef, + arg_abi: &ArgAbi, + value: Self::Value, + destination: PlaceRef, ) { - unimplemented!() + arg_abi.store(self, value, destination) } fn store_fn_arg( &mut self, - _arg_abi: &ArgAbi, - _index: &mut usize, - _destination: PlaceRef, + arg_abi: &ArgAbi, + index: &mut usize, + destination: PlaceRef, ) { - unimplemented!() + arg_abi.store_fn_arg(self, index, destination) } - fn arg_ty(&mut self, _arg_abi: &ArgAbi) -> Self::Type { - unimplemented!() + fn arg_ty(&mut self, arg_abi: &ArgAbi) -> Self::Type { + arg_abi.info.ty + } +} + +pub trait ExtendedArgAbiMethods<'b, 'm> { + fn store(&self, builder: &mut VMBuilder<'_, 'm>, value: Const, destination: PlaceRef); + + fn store_fn_arg( + &self, + builder: &mut VMBuilder<'b, 'm>, + index: &mut usize, + destination: PlaceRef, + ); +} + +impl<'b, 'm> ExtendedArgAbiMethods<'b, 'm> for ArgAbi { + fn store( + &self, + _builder: &mut VMBuilder<'_, 'm>, + _value: Const, + _destination: PlaceRef, + ) { + // @@TODO: do not emit anything for now. + } + + fn store_fn_arg( + &self, + _builder: &mut VMBuilder<'b, 'm>, + _index: &mut usize, + _destination: PlaceRef, + ) { + // @@TODO: do not emit anything for now. } } diff --git a/compiler/hash-codegen-vm/src/translation/builder.rs b/compiler/hash-codegen-vm/src/translation/builder.rs index 63abd8f88..044ed42c4 100644 --- a/compiler/hash-codegen-vm/src/translation/builder.rs +++ b/compiler/hash-codegen-vm/src/translation/builder.rs @@ -1,22 +1,24 @@ -use hash_codegen::traits::builder::BlockBuilderMethods; +use hash_codegen::traits::{builder::BlockBuilderMethods, layout::LayoutMethods}; +use hash_ir::{ + ir::{Const, Scalar}, + ty::COMMON_REPR_TYS, +}; +use hash_vm::inst; use crate::translation::VMBuilder; -impl<'a, 'b> BlockBuilderMethods<'a, 'b> for VMBuilder<'b> { +impl<'a, 'b> BlockBuilderMethods<'a, 'b> for VMBuilder<'a, 'b> { fn ctx(&self) -> &Self::CodegenCtx { - todo!() + self.ctx } - fn build(_ctx: &'a Self::CodegenCtx, _block: Self::BasicBlock) -> Self { - todo!() + fn build(ctx: &'a Self::CodegenCtx, _: Self::BasicBlock) -> Self { + Self { ctx } } - fn append_block( - _ctx: &'a Self::CodegenCtx, - _func: Self::Function, - _name: &str, - ) -> Self::BasicBlock { - todo!() + fn append_block(ctx: &'a Self::CodegenCtx, func: Self::Function, _: &str) -> Self::BasicBlock { + // @@Todo: maybe support labels for debugging purposes. + ctx.builder.with_fn_builder_mut(func, |f| f.reserve_block()) } fn append_sibling_block(&mut self, _name: &str) -> Self::BasicBlock { @@ -24,11 +26,11 @@ impl<'a, 'b> BlockBuilderMethods<'a, 'b> for VMBuilder<'b> { } fn basic_block(&self) -> Self::BasicBlock { - todo!() + self.ctx.builder.with_current_mut(|fb| fb.reserve_block()) } - fn switch_to_block(&mut self, _block: Self::BasicBlock) { - todo!() + fn switch_to_block(&mut self, block: Self::BasicBlock) { + self.ctx.builder.with_current_mut(|fb| fb.switch_to_block(block)); } fn return_value(&mut self, _value: Self::Value) { @@ -313,8 +315,8 @@ impl<'a, 'b> BlockBuilderMethods<'a, 'b> for VMBuilder<'b> { todo!() } - fn value_from_immediate(&mut self, _v: Self::Value) -> Self::Value { - todo!() + fn value_from_immediate(&mut self, v: Self::Value) -> Self::Value { + v } fn to_immediate_scalar( @@ -327,10 +329,22 @@ impl<'a, 'b> BlockBuilderMethods<'a, 'b> for VMBuilder<'b> { fn alloca( &mut self, - _ty: Self::Type, - _alignment: hash_codegen::target::alignment::Alignment, + ty: Self::Type, + _: hash_codegen::target::alignment::Alignment, ) -> Self::Value { - todo!() + // @@todo: do we need to handle alignment here? + let size = self.ctx().layout_of(ty).size().bytes(); + + self.builder.with_current_mut(|fb| { + fb.append(inst! { + add64 SP, r[size]; + }); + }); + + // we need to know the address before the addition + // somehow... + let scalar = Scalar::from_uint(0u32, self.layouts.data_layout.pointer_size); + Const::scalar(scalar, COMMON_REPR_TYS.raw_ptr) } fn byte_array_alloca( @@ -391,12 +405,39 @@ impl<'a, 'b> BlockBuilderMethods<'a, 'b> for VMBuilder<'b> { fn store_with_flags( &mut self, - _value: Self::Value, - _ptr: Self::Value, + value: Self::Value, + ptr: Self::Value, _alignment: hash_codegen::target::alignment::Alignment, _flags: hash_codegen::common::MemFlags, ) -> Self::Value { - todo!() + let scalar = value.as_scalar(); + let size = scalar.size(); + let bits = scalar.assert_bits(size); + + // get raw address to write to. + let dest = ptr.as_scalar().to_target_usize(self.ctx) as usize; + + // Based on the size, we use the right store instruction. + self.builder.with_current_mut(|f| { + f.append(match size.bytes() { + 1 => inst! { + write8 #[dest], #[bits as u8]; + }, + 2 => inst! { + write16 #[dest], #[bits as u16]; + }, + 4 => inst! { + write32 #[dest], #[bits as u32]; + }, + 8 => inst! { + write64 #[dest], #[bits as u64]; + }, + _ => panic!("Unsupported store size: {}", size.bytes()), + }); + }); + + // @@Todo: do we really need to return anything here? + value } fn atomic_store( diff --git a/compiler/hash-codegen-vm/src/translation/constants.rs b/compiler/hash-codegen-vm/src/translation/constants.rs index 6e6c92089..15a76edfc 100644 --- a/compiler/hash-codegen-vm/src/translation/constants.rs +++ b/compiler/hash-codegen-vm/src/translation/constants.rs @@ -1,4 +1,5 @@ use hash_codegen::traits::constants::ConstValueBuilderMethods; +use hash_ir::ir::Const; use crate::ctx::Ctx; @@ -85,11 +86,11 @@ impl<'b> ConstValueBuilderMethods<'b> for Ctx<'b> { fn const_scalar_value( &self, - _scalar: hash_ir::ir::Scalar, - _abi: hash_codegen::target::abi::Scalar, - _ty: Self::Type, + scalar: hash_ir::ir::Scalar, + _: hash_codegen::target::abi::Scalar, + ty: Self::Type, ) -> Self::Value { - todo!() + Const::scalar(scalar, ty) } fn const_data_from_alloc(&self, _alloc: hash_ir::ir::AllocId) -> Self::Value { diff --git a/compiler/hash-codegen-vm/src/translation/debug_info.rs b/compiler/hash-codegen-vm/src/translation/debug_info.rs index b1fa96b1c..605b944b9 100644 --- a/compiler/hash-codegen-vm/src/translation/debug_info.rs +++ b/compiler/hash-codegen-vm/src/translation/debug_info.rs @@ -9,7 +9,7 @@ use hash_source::{identifier::Identifier, location::Span}; use super::VMBuilder; -impl DebugInfoBuilderMethods for VMBuilder<'_> { +impl DebugInfoBuilderMethods for VMBuilder<'_, '_> { fn create_debug_info_scope_for_fn( &self, _fn_abi: &FnAbi, diff --git a/compiler/hash-codegen-vm/src/translation/intrinsics.rs b/compiler/hash-codegen-vm/src/translation/intrinsics.rs index c50066306..98dfbef90 100644 --- a/compiler/hash-codegen-vm/src/translation/intrinsics.rs +++ b/compiler/hash-codegen-vm/src/translation/intrinsics.rs @@ -5,7 +5,7 @@ use hash_ir::ty::ReprTyId; use super::VMBuilder; -impl<'b> IntrinsicBuilderMethods<'b> for VMBuilder<'b> { +impl<'b> IntrinsicBuilderMethods<'b> for VMBuilder<'_, 'b> { fn codegen_intrinsic_call( &mut self, _ty: ReprTyId, diff --git a/compiler/hash-codegen-vm/src/translation/layouts.rs b/compiler/hash-codegen-vm/src/translation/layouts.rs index cd5b88599..2195ad339 100644 --- a/compiler/hash-codegen-vm/src/translation/layouts.rs +++ b/compiler/hash-codegen-vm/src/translation/layouts.rs @@ -1,28 +1,21 @@ //! Implements all of the required methods for computing the layouts of types. -use hash_codegen::{repr::TyInfo, traits::layout::LayoutMethods}; +use hash_codegen::{repr::TyInfo, target::abi::AbiRepresentation, traits::layout::LayoutMethods}; +use hash_storage::store::statics::StoreId; use crate::{ctx::Ctx, translation::VMBuilder}; impl<'b> LayoutMethods<'b> for Ctx<'b> { - fn backend_field_index(&self, _info: TyInfo, _index: usize) -> u64 { - todo!() - } - - fn is_backend_immediate(&self, _info: TyInfo) -> bool { - todo!() + fn is_backend_immediate(&self, info: TyInfo) -> bool { + info.layout.map(|layout| layout.is_vm_immediate()) } - fn is_backend_scalar_pair(&self, _info: TyInfo) -> bool { - todo!() + fn is_backend_scalar_pair(&self, info: TyInfo) -> bool { + info.layout.map(|layout| layout.is_vm_scalar_pair()) } } -impl<'b> LayoutMethods<'b> for VMBuilder<'b> { - fn backend_field_index(&self, info: TyInfo, index: usize) -> u64 { - self.ctx.backend_field_index(info, index) - } - +impl<'b> LayoutMethods<'b> for VMBuilder<'_, 'b> { fn is_backend_immediate(&self, info: TyInfo) -> bool { self.ctx.is_backend_immediate(info) } @@ -31,3 +24,26 @@ impl<'b> LayoutMethods<'b> for VMBuilder<'b> { self.ctx.is_backend_scalar_pair(info) } } + +pub trait ExtendedLayoutMethods<'m> { + fn is_vm_immediate(&self) -> bool; + + /// Returns true if this [Layout] ABI is represented as is a + /// [`AbiRepresentation::Pair`] + fn is_vm_scalar_pair(&self) -> bool; +} + +impl<'m> ExtendedLayoutMethods<'m> for &hash_codegen::repr::Layout { + fn is_vm_immediate(&self) -> bool { + match self.abi { + AbiRepresentation::Scalar(_) | AbiRepresentation::Vector { .. } => true, + AbiRepresentation::Pair(..) + | AbiRepresentation::Aggregate + | AbiRepresentation::Uninhabited => false, + } + } + + fn is_vm_scalar_pair(&self) -> bool { + matches!(self.abi, hash_codegen::target::abi::AbiRepresentation::Pair(..)) + } +} diff --git a/compiler/hash-codegen-vm/src/translation/misc.rs b/compiler/hash-codegen-vm/src/translation/misc.rs index ecb0fd5c1..5078086be 100644 --- a/compiler/hash-codegen-vm/src/translation/misc.rs +++ b/compiler/hash-codegen-vm/src/translation/misc.rs @@ -1,22 +1,25 @@ -use hash_codegen::traits::misc::MiscBuilderMethods; +use hash_codegen::{repr::ty, traits::misc::MiscBuilderMethods}; +use hash_storage::store::statics::StoreId; use crate::ctx::Ctx; impl<'b> MiscBuilderMethods<'b> for Ctx<'b> { - fn get_fn(&self, _instance: hash_ir::ty::InstanceId) -> Self::Function { - todo!() + fn get_fn(&self, instance: ty::ReprTyId) -> Self::Function { + instance.borrow().as_instance() } - fn get_fn_ptr(&self, _instance: hash_ir::ty::InstanceId) -> Self::Value { - todo!() + fn get_fn_ptr(&self, ty: ty::ReprTyId) -> Self::Value { + self.get_fn_addr(ty) } - fn get_fn_addr(&self, _instance: hash_ir::ty::InstanceId) -> Self::Value { + fn get_fn_addr(&self, _ty: ty::ReprTyId) -> Self::Value { todo!() } - fn declare_entry_point(&self, _fn_ty: Self::Type) -> Option { - todo!() + fn declare_entry_point(&self, fn_ty: Self::Type) -> Option { + let instance = fn_ty.borrow().as_instance(); + + Some(instance) } fn predefine_fn( diff --git a/compiler/hash-codegen-vm/src/translation/mod.rs b/compiler/hash-codegen-vm/src/translation/mod.rs index 2acffb11a..64b45a44d 100644 --- a/compiler/hash-codegen-vm/src/translation/mod.rs +++ b/compiler/hash-codegen-vm/src/translation/mod.rs @@ -18,23 +18,27 @@ use hash_codegen::{ }; use hash_ir::IrCtx; use hash_pipeline::settings::CompilerSettings; -use hash_vm::builder::BytecodeBuilder; use crate::ctx::Ctx; -pub struct VMBuilder<'b> { - /// The actual VM bytecode builder - pub(crate) _builder: BytecodeBuilder, - +/// The [VMBuilder] is the main builder type for generating Hash VM bytecode. +/// +/// It provides access to the code generation context, target information, +/// and various utilities needed during the translation process. +/// +/// The [VMBuilder] implements all of the traits that are required for code +/// generation to occur. This includes traits for handling ABI methods, type +/// layouts, and other codegen-related functionality. +pub struct VMBuilder<'a, 'b> { /// The context for the builder. - pub(crate) ctx: &'b Ctx<'b>, + pub(crate) ctx: &'a Ctx<'b>, } -impl<'b> Codegen<'b> for VMBuilder<'b> { +impl<'b> Codegen<'b> for VMBuilder<'_, 'b> { type CodegenCtx = Ctx<'b>; } -impl<'b> BackendTypes for VMBuilder<'b> { +impl<'b> BackendTypes for VMBuilder<'_, 'b> { type Value = as BackendTypes>::Value; type Function = as BackendTypes>::Function; type Type = as BackendTypes>::Type; @@ -44,7 +48,7 @@ impl<'b> BackendTypes for VMBuilder<'b> { type DebugInfoVariable = as BackendTypes>::DebugInfoVariable; } -impl<'b> std::ops::Deref for VMBuilder<'b> { +impl<'a, 'b> std::ops::Deref for VMBuilder<'a, 'b> { type Target = Ctx<'b>; fn deref(&self) -> &Self::Target { @@ -52,13 +56,13 @@ impl<'b> std::ops::Deref for VMBuilder<'b> { } } -impl HasTarget for VMBuilder<'_> { +impl HasTarget for VMBuilder<'_, '_> { fn target(&self) -> &Target { self.ctx.target() } } -impl<'b> HasCtxMethods<'b> for VMBuilder<'b> { +impl<'b> HasCtxMethods<'b> for VMBuilder<'_, 'b> { fn settings(&self) -> &CompilerSettings { self.ctx.settings() } diff --git a/compiler/hash-codegen-vm/src/translation/ty.rs b/compiler/hash-codegen-vm/src/translation/ty.rs index 8d3f6b346..4d20bb065 100644 --- a/compiler/hash-codegen-vm/src/translation/ty.rs +++ b/compiler/hash-codegen-vm/src/translation/ty.rs @@ -1,110 +1,174 @@ -use hash_codegen::traits::ty::TypeBuilderMethods; +use hash_codegen::{ + abi, common, + repr::TyInfo, + target::{HasTarget, abi::AddressSpace}, + traits::{BackendTypes, layout::LayoutMethods, ty::TypeBuilderMethods}, +}; +use hash_ir::ty::{COMMON_REPR_TYS, FnTy, ReprTy, ReprTyListId}; +use hash_source::FloatTy; +use hash_storage::store::statics::{SingleStoreValue, StoreId}; use crate::ctx::Ctx; impl<'b> TypeBuilderMethods<'b> for Ctx<'b> { fn type_i1(&self) -> Self::Type { - todo!() + COMMON_REPR_TYS.bool } fn type_i8(&self) -> Self::Type { - todo!() + COMMON_REPR_TYS.i8 } fn type_i16(&self) -> Self::Type { - todo!() + COMMON_REPR_TYS.i16 } fn type_i32(&self) -> Self::Type { - todo!() + COMMON_REPR_TYS.i32 } fn type_i64(&self) -> Self::Type { - todo!() + COMMON_REPR_TYS.i64 } fn type_i128(&self) -> Self::Type { - todo!() + COMMON_REPR_TYS.i128 } fn type_isize(&self) -> Self::Type { - todo!() + COMMON_REPR_TYS.isize } - fn type_ix(&self, _bits: u64) -> Self::Type { - todo!() + fn type_ix(&self, bits: u64) -> Self::Type { + match bits { + 8 => COMMON_REPR_TYS.i8, + 16 => COMMON_REPR_TYS.i16, + 32 => COMMON_REPR_TYS.i32, + 64 => COMMON_REPR_TYS.i64, + 128 => COMMON_REPR_TYS.i128, + // @@BigInts: support big integers? + _ => panic!("unsupported integer width: {bits}"), + } } fn type_f32(&self) -> Self::Type { - todo!() + COMMON_REPR_TYS.f32 } fn type_f64(&self) -> Self::Type { - todo!() + COMMON_REPR_TYS.f64 } - fn type_array(&self, _ty: Self::Type, _len: u64) -> Self::Type { - todo!() + fn type_array(&self, ty: Self::Type, length: u64) -> Self::Type { + ReprTy::create(ReprTy::Array { ty, length: length as usize }) } - fn type_function(&self, _args: &[Self::Type], _ret: Self::Type) -> Self::Type { - todo!() + fn type_function(&self, args: &[Self::Type], ret: Self::Type) -> Self::Type { + ReprTy::create(ReprTy::Fn(FnTy { + params: ReprTyListId::seq(args.iter().copied()), + return_ty: ret, + })) } - fn type_struct(&self, _els: &[Self::Type], _packed: bool) -> Self::Type { - todo!() + fn type_struct(&self, els: &[Self::Type], _packed: bool) -> Self::Type { + // we don't need to do anything for `_packed` in the VM. + ReprTy::create(ReprTy::tuple(els)) } fn type_ptr(&self) -> Self::Type { - todo!() + COMMON_REPR_TYS.ptr } - fn type_ptr_ext(&self, _address_space: hash_codegen::target::abi::AddressSpace) -> Self::Type { - todo!() + fn type_ptr_ext(&self, _address_space: AddressSpace) -> Self::Type { + COMMON_REPR_TYS.ptr } - fn element_type(&self, _ty: Self::Type) -> Self::Type { - todo!() + fn element_type(&self, ty: Self::Type) -> Self::Type { + ty.borrow().element_ty().unwrap() } - fn vector_length(&self, _ty: Self::Type) -> usize { - todo!() + fn vector_length(&self, ty: Self::Type) -> usize { + // Although: array types like [X; N] are supported which are effectively + // vectors? + unimplemented!("vector types are not supported yet: {ty:#?}") } - fn float_width(&self, _ty: Self::Type) -> usize { - todo!() + fn float_width(&self, ty: Self::Type) -> usize { + ty.borrow().as_float().size().bits() as usize } - fn int_width(&self, _ty: Self::Type) -> u64 { - todo!() + fn int_width(&self, ty: Self::Type) -> u64 { + ty.borrow().as_int().size(self.target().ptr_size()).bits() } - fn ty_of_value(&self, _value: Self::Value) -> Self::Type { - todo!() + fn ty_of_value(&self, value: Self::Value) -> Self::Type { + value.ty() } - fn ty_kind(&self, _ty: Self::Type) -> hash_codegen::common::TypeKind { - todo!() + /// This method maps a backend type to a [TypeKind]. + /// + /// I don't think this is really needed by the VM backend, but we implement + /// it anyway for completeness. + fn ty_kind(&self, ty: Self::Type) -> common::TypeKind { + let info = self.layout_of(ty); + + // 1. Check that if this is a ZST, this is basically a `void` type. + if info.is_zst() { + return common::TypeKind::Void; + } + + // The rest are a trivial mapping from ReprTy to TypeKind. + ty.map(|ty| match ty { + ReprTy::Bool | ReprTy::Char | ReprTy::UInt(..) | ReprTy::Int(..) => { + common::TypeKind::Integer + } + ReprTy::Float(FloatTy::F32) => common::TypeKind::Float, + ReprTy::Float(FloatTy::F64) => common::TypeKind::Double, + ReprTy::Ref { .. } => common::TypeKind::Pointer, + ReprTy::Array { .. } => common::TypeKind::Array, + ReprTy::FnDef { .. } | ReprTy::Fn { .. } => common::TypeKind::Function, + ReprTy::Slice(..) | ReprTy::Str | ReprTy::Adt { .. } => common::TypeKind::Struct, + ReprTy::Never => common::TypeKind::Void, + }) } - fn immediate_backend_ty(&self, _info: hash_codegen::repr::TyInfo) -> Self::Type { - todo!() + fn immediate_backend_ty(&self, info: TyInfo) -> Self::Type { + info.ty } fn scalar_pair_element_backend_ty( &self, - _info: hash_codegen::repr::TyInfo, - _index: usize, - _immediate: bool, + info: TyInfo, + index: usize, + immediate: bool, ) -> Self::Type { - todo!() + info.scalar_pair_element_ty(index, immediate) } - fn backend_ty_from_info(&self, _info: hash_codegen::repr::TyInfo) -> Self::Type { - todo!() + fn backend_ty_from_info(&self, info: TyInfo) -> Self::Type { + info.ty } - fn backend_ty_from_abi(&self, _abi: &hash_codegen::abi::FnAbi) -> Self::Type { - todo!() + fn backend_ty_from_abi(&self, abi: &abi::FnAbi) -> Self::Type { + abi.ty + } +} + +trait ExtendedTyBuilderMethods<'m> { + fn scalar_pair_element_ty( + &self, + index: usize, + immediate: bool, + ) -> as BackendTypes>::Type; +} + +impl<'m> ExtendedTyBuilderMethods<'m> for TyInfo { + fn scalar_pair_element_ty(&self, index: usize, _: bool) -> as BackendTypes>::Type { + self.ty.map(|ty| { + let adt = ty.as_adt().borrow(); + let variant = adt.univariant(); + + variant.fields[index].ty + }) } } diff --git a/compiler/hash-codegen/src/lower/abi.rs b/compiler/hash-codegen/src/lower/abi.rs index b83975cc9..5e5a4c60e 100644 --- a/compiler/hash-codegen/src/lower/abi.rs +++ b/compiler/hash-codegen/src/lower/abi.rs @@ -77,6 +77,7 @@ pub enum FnAbiError { /// Compute an [FnAbi] from a provided [Instance]. pub fn compute_fn_abi_from_instance<'b, Ctx: HasCtxMethods<'b> + LayoutMethods<'b>>( ctx: &Ctx, + ty: ReprTyId, instance: InstanceId, ) -> Result { let Instance { params, return_ty, abi, .. } = instance.value(); @@ -85,13 +86,14 @@ pub fn compute_fn_abi_from_instance<'b, Ctx: HasCtxMethods<'b> + LayoutMethods<' // to the target. let calling_convention = CallingConvention::make_from_abi_and_target(abi, ctx.target()); - compute_fn_abi(ctx, params, return_ty, calling_convention) + compute_fn_abi(ctx, ty, params, return_ty, calling_convention) } /// Compute an [FnAbi] from a provided function parameter and return type, with /// a given calling convention. pub fn compute_fn_abi<'b, Ctx: HasCtxMethods<'b> + LayoutMethods<'b>>( ctx: &Ctx, + ty: ReprTyId, params: ReprTyListId, ret_ty: ReprTyId, calling_convention: CallingConvention, @@ -117,6 +119,7 @@ pub fn compute_fn_abi<'b, Ctx: HasCtxMethods<'b> + LayoutMethods<'b>>( }; let fn_abi = FnAbi { + ty, args: params .borrow() .iter() diff --git a/compiler/hash-codegen/src/lower/intrinsics.rs b/compiler/hash-codegen/src/lower/intrinsics.rs index f7a6c4aa9..f96138215 100644 --- a/compiler/hash-codegen/src/lower/intrinsics.rs +++ b/compiler/hash-codegen/src/lower/intrinsics.rs @@ -2,7 +2,7 @@ //! code and resolving references to intrinsic function calls. use hash_abi::ArgAbi; -use hash_ir::{intrinsics::Intrinsic, lang_items::LangItem, ty::InstanceId}; +use hash_ir::{intrinsics::Intrinsic, lang_items::LangItem, ty::ReprTyId}; use hash_repr::TyInfo; use hash_target::abi; @@ -21,9 +21,9 @@ impl<'a, 'b, Builder: BlockBuilderMethods<'a, 'b>> FnBuilder<'a, 'b, Builder> { &mut self, builder: &Builder, item: LangItem, - ) -> (InstanceId, Builder::Value) { - let instance = self.ctx.ir_ctx().lang_items().get(item).unwrap(); - (instance, builder.get_fn_ptr(instance)) + ) -> (ReprTyId, Builder::Value) { + let item = self.ctx.ir_ctx().lang_items().get(item).unwrap(); + (item.ty, builder.get_fn_ptr(item.ty)) } /// Function that handles generating code for the defined language diff --git a/compiler/hash-codegen/src/lower/mod.rs b/compiler/hash-codegen/src/lower/mod.rs index 412d8a06c..01369747f 100644 --- a/compiler/hash-codegen/src/lower/mod.rs +++ b/compiler/hash-codegen/src/lower/mod.rs @@ -10,7 +10,6 @@ use hash_abi::{FnAbiId, PassMode}; use hash_ir::{ ir::{self, Local}, traversal, - ty::InstanceId, }; use hash_storage::store::Store; use hash_utils::index_vec::IndexVec; @@ -136,17 +135,15 @@ impl<'a, 'b, Builder: BlockBuilderMethods<'a, 'b>> FnBuilder<'a, 'b, Builder> { /// 3. Traverse the control flow graph in post-order and generate each block in /// the function. pub fn codegen_body<'a, 'b, Builder: BlockBuilderMethods<'a, 'b>>( - instance: InstanceId, body: &'b ir::Body, ctx: &'a Builder::CodegenCtx, ) -> Result<(), FnAbiError> { // @@Todo: compute debug info about each local - let func = ctx.get_fn(instance); - + let ty = body.metadata().ty(); + let func = ctx.get_fn(ty); let abis = ctx.cg_ctx().abis(); - - let fn_abi = abis.create_fn_abi_from_instance(ctx, instance); + let fn_abi = abis.create_fn_abi_from_ty(ctx, ty); let is_return_indirect = abis.map_fast(fn_abi, |abi| abi.ret_abi.is_indirect()); // create the starting block, this is needed since we always specify diff --git a/compiler/hash-codegen/src/lower/operands.rs b/compiler/hash-codegen/src/lower/operands.rs index 3bddf2973..e64404d99 100644 --- a/compiler/hash-codegen/src/lower/operands.rs +++ b/compiler/hash-codegen/src/lower/operands.rs @@ -429,8 +429,7 @@ impl<'a, 'b, Builder: BlockBuilderMethods<'a, 'b>> FnBuilder<'a, 'b, Builder> { // resolving! ir::Operand::Const(constant) if constant.is_zero() && constant.ty().is_fn_def() => { let ty = constant.ty(); - let instance = ty.borrow().as_instance(); - let value = OperandValue::Immediate(builder.get_fn_addr(instance)); + let value = OperandValue::Immediate(builder.get_fn_addr(ty)); let info = builder.layout_of(ty); OperandRef { value, info } } diff --git a/compiler/hash-codegen/src/lower/place.rs b/compiler/hash-codegen/src/lower/place.rs index 63e50ced1..36a8987a1 100644 --- a/compiler/hash-codegen/src/lower/place.rs +++ b/compiler/hash-codegen/src/lower/place.rs @@ -251,10 +251,10 @@ impl<'a, 'b, V: CodeGenObject> PlaceRef { // This must be a struct.. _ => { let ty = builder.backend_ty_from_info(self.info); - builder.structural_get_element_pointer( + builder.bounded_get_element_pointer( ty, self.value, - builder.backend_field_index(self.info, field), + &[builder.const_usize(field_offset.bytes())], ) } }; diff --git a/compiler/hash-codegen/src/lower/terminator.rs b/compiler/hash-codegen/src/lower/terminator.rs index ca328583b..5f2c48a25 100644 --- a/compiler/hash-codegen/src/lower/terminator.rs +++ b/compiler/hash-codegen/src/lower/terminator.rs @@ -204,10 +204,7 @@ impl<'a, 'b, Builder: BlockBuilderMethods<'a, 'b>> FnBuilder<'a, 'b, Builder> { // compute the function pointer value and the ABI let abis = self.ctx.cg_ctx().abis(); - let fn_abi = match instance { - Some(instance) => abis.create_fn_abi_from_instance(builder, instance), - None => abis.create_fn_abi_from_ty(builder, ty.borrow().as_fn()), - }; + let fn_abi = abis.create_fn_abi_from_ty(builder, ty); let ret_abi = abis.map_fast(fn_abi, |abi| abi.ret_abi); // If the return ABI pass mode is "indirect", then this means that @@ -283,11 +280,7 @@ impl<'a, 'b, Builder: BlockBuilderMethods<'a, 'b>> FnBuilder<'a, 'b, Builder> { }; } - let fn_ptr = match (instance, func) { - (Some(instance), None) => builder.get_fn_ptr(instance), - (_, Some(func)) => func, - _ => unreachable!(), - }; + let fn_ptr = if let Some(fn_ptr) = func { fn_ptr } else { builder.get_fn_ptr(ty) }; // Finally, generate the code for the function call and // cleanup @@ -639,8 +632,8 @@ impl<'a, 'b, Builder: BlockBuilderMethods<'a, 'b>> FnBuilder<'a, 'b, Builder> { let args: [Builder::Value; 2] = (bytes, len).into(); // Get the `panic` lang item. - let (instance, fn_ptr) = self.resolve_lang_item(builder, LangItem::Panic); - let abi = self.ctx.cg_ctx().abis().create_fn_abi_from_instance(builder, instance); + let (ty, fn_ptr) = self.resolve_lang_item(builder, LangItem::Panic); + let abi = self.ctx.cg_ctx().abis().create_fn_abi_from_ty(builder, ty); // Finally we emit this as a call to panic... self.codegen_fn_call(builder, abi, fn_ptr, &args, &[], None, false) diff --git a/compiler/hash-codegen/src/traits/abi.rs b/compiler/hash-codegen/src/traits/abi.rs index 511b9a647..22e47d23a 100644 --- a/compiler/hash-codegen/src/traits/abi.rs +++ b/compiler/hash-codegen/src/traits/abi.rs @@ -4,8 +4,8 @@ use std::cell::RefCell; use hash_abi::{ArgAbi, CallingConvention, FnAbi, FnAbiId}; -use hash_ir::ty::{FnTy, InstanceId}; -use hash_storage::store::{DefaultStore, Store, StoreInternalData}; +use hash_ir::ty::{FnTy, InstanceId, ReprTy, ReprTyId}; +use hash_storage::store::{DefaultStore, Store, StoreInternalData, statics::StoreId}; use hash_utils::fxhash::FxHashMap; use super::{BackendTypes, HasCtxMethods, layout::LayoutMethods}; @@ -72,7 +72,12 @@ impl FnAbiStore { /// Create (or re-use) a [FnAbi] of the [InstanceId]. This function returns /// the [FnAbiId] of the [FnAbi] that was created. - pub fn create_fn_abi_from_instance<'b, Ctx>(&self, ctx: &Ctx, instance: InstanceId) -> FnAbiId + pub fn create_fn_abi_from_instance<'b, Ctx>( + &self, + ctx: &Ctx, + ty: ReprTyId, + instance: InstanceId, + ) -> FnAbiId where Ctx: HasCtxMethods<'b> + LayoutMethods<'b>, { @@ -83,7 +88,7 @@ impl FnAbiStore { // Create the ABI if it does not exist. let abi = self.store.create( // @@Todo: Emit a fatal error if the function ABI cannot be computed. - compute_fn_abi_from_instance(ctx, instance).unwrap(), + compute_fn_abi_from_instance(ctx, ty, instance).unwrap(), ); // Add a mapping from the instance to the ABI. @@ -93,18 +98,25 @@ impl FnAbiStore { /// Compute the [FnAbi] of a given [FnTy] assuming that it is the standard /// calling convention of the target. - pub fn create_fn_abi_from_ty<'b, Ctx>(&self, ctx: &Ctx, func_ty: FnTy) -> FnAbiId + pub fn create_fn_abi_from_ty<'b, Ctx>(&self, ctx: &Ctx, ty: ReprTyId) -> FnAbiId where Ctx: HasCtxMethods<'b> + LayoutMethods<'b>, { - let FnTy { params, return_ty } = func_ty; + let ty_info = ty.value(); + let FnTy { params, return_ty } = match ty_info { + ReprTy::FnDef { instance } => { + return self.create_fn_abi_from_instance(ctx, ty, instance); + } + ReprTy::Fn(func) => func, + _ => unreachable!(), + }; // @@Todo: do we need to configure it based on any func attrs? let calling_convention = CallingConvention::C; self.store.create( // @@Todo: Emit a fatal error if the function ABI cannot be computed. - compute_fn_abi(ctx, params, return_ty, calling_convention).unwrap(), + compute_fn_abi(ctx, ty, params, return_ty, calling_convention).unwrap(), ) } @@ -119,15 +131,4 @@ impl FnAbiStore { pub fn get_fn_abi(&self, instance: InstanceId) -> FnAbiId { self.try_get_fn_abi(instance).unwrap() } - - /// Get or create the ABI of the [InstanceId], and then map over the ABI. - pub fn with_fn_abi<'b, F, R, Ctx>(&self, ctx: &mut Ctx, instance: InstanceId, f: F) -> R - where - F: FnOnce(&FnAbi, &mut Ctx) -> R, - Ctx: HasCtxMethods<'b> + LayoutMethods<'b>, - { - // Get or create the ABI, and then map over it. - let abi = self.create_fn_abi_from_instance(ctx, instance); - self.store.map_fast(abi, |abi| f(abi, ctx)) - } } diff --git a/compiler/hash-codegen/src/traits/builder.rs b/compiler/hash-codegen/src/traits/builder.rs index ffcf84188..7f1f7adf5 100644 --- a/compiler/hash-codegen/src/traits/builder.rs +++ b/compiler/hash-codegen/src/traits/builder.rs @@ -33,10 +33,10 @@ pub trait BlockBuilderMethods<'a, 'b>: + IntrinsicBuilderMethods<'b> + DebugInfoBuilderMethods { - /// Get the current context + /// Get the current context. fn ctx(&self) -> &Self::CodegenCtx; - /// Function to build the given `BasicBlock` into the backend equivalent. + /// Build the given `BasicBlock` into the backend equivalent. fn build(ctx: &'a Self::CodegenCtx, block: Self::BasicBlock) -> Self; /// Add a block to the current function. @@ -46,6 +46,7 @@ pub trait BlockBuilderMethods<'a, 'b>: name: &str, ) -> Self::BasicBlock; + /// Append a sibling block to the current function. fn append_sibling_block(&mut self, name: &str) -> Self::BasicBlock; /// Create a new basic block within the current function. diff --git a/compiler/hash-codegen/src/traits/layout.rs b/compiler/hash-codegen/src/traits/layout.rs index 9937e407b..262bd8a42 100644 --- a/compiler/hash-codegen/src/traits/layout.rs +++ b/compiler/hash-codegen/src/traits/layout.rs @@ -16,9 +16,6 @@ pub trait LayoutMethods<'b>: BackendTypes + HasCtxMethods<'b> { TyInfo { ty, layout } } - /// Compute the field index from the backend specific type. - fn backend_field_index(&self, info: TyInfo, index: usize) -> u64; - /// Check whether the [TyInfo] layout can be represented as an /// immediate value. fn is_backend_immediate(&self, ty: TyInfo) -> bool; diff --git a/compiler/hash-codegen/src/traits/misc.rs b/compiler/hash-codegen/src/traits/misc.rs index a801d1efa..916bf70b0 100644 --- a/compiler/hash-codegen/src/traits/misc.rs +++ b/compiler/hash-codegen/src/traits/misc.rs @@ -9,22 +9,28 @@ use hash_ir::ty::{self, InstanceId}; use super::BackendTypes; pub trait MiscBuilderMethods<'b>: BackendTypes { - /// Get a function reference from an [ty::InstanceId]. - fn get_fn(&self, instance: ty::InstanceId) -> Self::Function; + /// Get a function reference from an [ty::ReprTyId]. + /// + /// ##Note: It is assumed that the passed type is a [ty::ReprTy::FnDef]. + fn get_fn(&self, ty: ty::ReprTyId) -> Self::Function; - /// Get a function pointer from a [ty::InstanceId] whilst also + /// Get a function pointer from a [ty::ReprTyId] whilst also /// applying all of the specified attributes that can appear /// on a function definition. - fn get_fn_ptr(&self, instance: ty::InstanceId) -> Self::Value; + /// + /// ##Note: It is assumed that the passed type is a [ty::ReprTy::FnDef]. + fn get_fn_ptr(&self, ty: ty::ReprTyId) -> Self::Value; - /// Get a function pointer from a [ty::InstanceId] whilst also + /// Get a function pointer from a [ty::ReprTyId] whilst also /// applying all of the specified attributes that can appear /// on a function definition. - fn get_fn_addr(&self, instance: ty::InstanceId) -> Self::Value; + /// + /// ##Note: It is assumed that the passed type is a [ty::ReprTy::FnDef]. + fn get_fn_addr(&self, ty: ty::ReprTyId) -> Self::Value; - /// Declare the program entry point + /// Declare the program entry point. fn declare_entry_point(&self, ty: Self::Type) -> Option; - /// Pre-define a function based on the instance. + /// Pre-define a function based on the [InstanceId]. fn predefine_fn(&self, instance: InstanceId, symbol_name: &str, fn_abi: &FnAbi); } diff --git a/compiler/hash-ir/src/lang_items.rs b/compiler/hash-ir/src/lang_items.rs index 9d3e1c0aa..a5eec232d 100644 --- a/compiler/hash-ir/src/lang_items.rs +++ b/compiler/hash-ir/src/lang_items.rs @@ -34,15 +34,15 @@ impl LangItem { #[derive(Debug, Clone, Copy)] pub struct LangItemData { /// The defined instance that corresponds to the intrinsic. - instance: InstanceId, + pub instance: InstanceId, /// The type of the lang item. - ty: ReprTyId, + pub ty: ReprTyId, } /// This struct is used to map the [Intrinsic] enum to the /// associated type that is used to represent the intrinsic. -#[derive(Default)] +#[derive(Debug, Default, Clone, Copy)] pub struct LangItems { /// The intrinsic map. items: [Option; std::mem::variant_count::()], @@ -62,12 +62,7 @@ impl LangItems { } /// Get the [InstanceId] for the specified intrinsic. - pub fn get(&self, item: LangItem) -> Option { - self.items[item as usize].map(|item| item.instance) - } - - /// Get the [ReprTyId] for the specified intrinsic. - pub fn get_ty(&self, item: LangItem) -> Option { - self.items[item as usize].map(|item| item.ty) + pub fn get(&self, item: LangItem) -> Option { + self.items[item as usize] } } diff --git a/compiler/hash-ir/src/lib.rs b/compiler/hash-ir/src/lib.rs index 788265efa..ae956b64b 100644 --- a/compiler/hash-ir/src/lib.rs +++ b/compiler/hash-ir/src/lib.rs @@ -16,12 +16,13 @@ use std::{ sync::OnceLock, }; +use hash_repr::ty::ReprTyId; use hash_source::entry_point::EntryPointState; use hash_storage::stores; use intrinsics::Intrinsics; use ir::Body; use lang_items::LangItems; -use ty::{AdtStore, InstanceId, InstanceStore, ReprTyListStore, ReprTyStore}; +use ty::{AdtStore, InstanceStore, ReprTyListStore, ReprTyStore}; /// Storage that is used by the lowering stage. This stores all of the /// generated [Body]s and all of the accompanying data for the bodies. @@ -37,7 +38,7 @@ pub struct IrStorage { pub ctx: IrCtx, /// Holds information about the program entry point. - pub entry_point: EntryPointState, + pub entry_point: EntryPointState, } impl Default for IrStorage { diff --git a/compiler/hash-lower/src/build/rvalue.rs b/compiler/hash-lower/src/build/rvalue.rs index cb2e826a1..e47345f9c 100644 --- a/compiler/hash-lower/src/build/rvalue.rs +++ b/compiler/hash-lower/src/build/rvalue.rs @@ -402,7 +402,7 @@ impl BodyBuilder<'_> { rhs: Operand, span: AstNodeId, ) -> BlockAnd { - let str_eq = self.get_lang_item(LangItem::StrEq); + let str_eq = self.get_lang_item_ty(LangItem::StrEq); let eq_result = self.temp_place(COMMON_REPR_TYS.bool); let eq_block = self.control_flow_graph.start_new_block(); self.control_flow_graph.terminate( diff --git a/compiler/hash-lower/src/build/utils.rs b/compiler/hash-lower/src/build/utils.rs index b4fc412cc..d58de64e3 100644 --- a/compiler/hash-lower/src/build/utils.rs +++ b/compiler/hash-lower/src/build/utils.rs @@ -87,8 +87,9 @@ impl BodyBuilder<'_> { } } - pub(crate) fn get_lang_item(&self, name: LangItem) -> ReprTyId { - self.ctx.lcx.lang_items().get_ty(name).expect("lang item not found or not defined") + pub(crate) fn get_lang_item_ty(&self, name: LangItem) -> ReprTyId { + let item = self.ctx.lcx.lang_items().get(name).expect("lang item not found or not defined"); + item.ty } /// Create a new [RValue] that represents a pointer with metadata, this uses diff --git a/compiler/hash-lower/src/lib.rs b/compiler/hash-lower/src/lib.rs index 880c04b34..56eb6a8ea 100644 --- a/compiler/hash-lower/src/lib.rs +++ b/compiler/hash-lower/src/lib.rs @@ -127,8 +127,7 @@ impl CompilerStage for IrGen { if let Some(def) = entry_point.def() && def == func { - let instance = body.meta.ty().borrow().as_instance(); - data.icx.entry_point.set(instance, entry_point.kind().unwrap()); + data.icx.entry_point.set(body.meta.ty(), entry_point.kind().unwrap()); } // add the body to the lowered bodies diff --git a/compiler/hash-repr/src/constant.rs b/compiler/hash-repr/src/constant.rs index 4b27f9720..7853e24fd 100644 --- a/compiler/hash-repr/src/constant.rs +++ b/compiler/hash-repr/src/constant.rs @@ -74,6 +74,11 @@ impl Const { matches!(self.kind, ConstKind::Zero) } + /// Check if the [Const] is an allocation. + pub fn is_alloc(&self) -> bool { + matches!(self.kind, ConstKind::Alloc { .. }) + } + /// Get the type of the constant. pub fn ty(&self) -> ReprTyId { self.ty diff --git a/compiler/hash-repr/src/ty.rs b/compiler/hash-repr/src/ty.rs index 1f12890d4..2e6fa7780 100644 --- a/compiler/hash-repr/src/ty.rs +++ b/compiler/hash-repr/src/ty.rs @@ -316,6 +316,14 @@ impl ReprTy { matches!(self, Self::Float(_)) } + /// Assert that the type is a floating point one. + pub fn as_float(&self) -> FloatTy { + match self { + Self::Float(ty) => *ty, + _ => unreachable!(), // @@Todo: handle big floats? + } + } + /// Check if the [ReprTy] is a signed integral type. pub fn is_signed(&self) -> bool { matches!(self, Self::Int(_)) diff --git a/compiler/hash-utils/src/range_map.rs b/compiler/hash-utils/src/range_map.rs index 05e94c117..b13fce61a 100644 --- a/compiler/hash-utils/src/range_map.rs +++ b/compiler/hash-utils/src/range_map.rs @@ -68,13 +68,19 @@ impl From> for Range { } } -#[derive(Clone, Debug, Default)] +#[derive(Clone, Debug)] pub struct RangeMap { /// The store that stores the [Range] which maps a range of keys /// to a value. store: Vec<(Range, V)>, } +impl Default for RangeMap { + fn default() -> Self { + Self { store: Vec::new() } + } +} + impl fmt::Display for RangeMap { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { for (key, value) in self.store.iter() { @@ -85,7 +91,10 @@ impl fmt::Display for RangeMap { } } -impl RangeMap { +impl RangeMap +where + I: PrimInt + Clone + Copy + std::fmt::Debug, +{ /// Create a new empty [RangeMap]. pub fn new() -> Self { Self { store: vec![] } @@ -99,7 +108,7 @@ impl RangeMap { /// Create a new [RangeMap] with specified ranges that are assumed /// to be in order. - pub fn populated(items: Vec<(RangeInclusive, V)>) -> Self { + pub fn populated>>(items: Vec<(R, V)>) -> Self { let map = Self { store: items .into_iter() @@ -145,7 +154,9 @@ impl RangeMap { for (index, (item, _)) in self.store.iter().enumerate() { if overlaps(item, &key) { - panic!("keys are not allowed to overlap in a range map") + panic!( + "keys are not allowed to overlap in a range map. Key `{key:?}` overlaps with existing key `{item:?}`" + ); } if key.start > item.end { @@ -224,6 +235,15 @@ impl RangeMap { } } +impl std::iter::IntoIterator for RangeMap { + type Item = (Range, V); + type IntoIter = std::vec::IntoIter<(Range, V)>; + + fn into_iter(self) -> Self::IntoIter { + self.store.into_iter() + } +} + #[cfg(test)] mod test_super { use super::*; diff --git a/compiler/hash-vm/Cargo.toml b/compiler/hash-vm/Cargo.toml index 9003e7705..d737da800 100644 --- a/compiler/hash-vm/Cargo.toml +++ b/compiler/hash-vm/Cargo.toml @@ -8,8 +8,12 @@ edition = { workspace = true } doctest = false [dependencies] +hash-abi = { workspace = true } hash-reporting = { workspace = true } +hash-repr = { workspace = true } hash-source = { workspace = true } -hash-utils = { workspace = true } -hash-abi = { workspace = true } hash-storage = { workspace = true } +hash-utils = { workspace = true } + + +bitflags = { workspace = true } diff --git a/compiler/hash-vm/src/builder/func.rs b/compiler/hash-vm/src/builder/func.rs index aeb07410a..d47c91e0c 100644 --- a/compiler/hash-vm/src/builder/func.rs +++ b/compiler/hash-vm/src/builder/func.rs @@ -1,61 +1,204 @@ //! Function builder related logic for the Hash VM. -use hash_abi::FnAbiId; +use hash_repr::ty::InstanceId; use hash_utils::index_vec::IndexVec; use crate::bytecode::{Instruction, op::LabelOffset, pretty::FunctionBody}; -// Import FunctionBuilder if it's defined in another module +/// Represents a single basic block with its instructions. +#[derive(Debug, Clone)] +struct Block { + /// The instructions within this block. + instructions: Vec, +} + +impl Block { + /// Create a new empty block. + fn new() -> Self { + Self { instructions: Vec::new() } + } + + /// Append an instruction to this block. + fn emit(&mut self, instruction: Instruction) { + self.instructions.push(instruction); + } + + /// Append multiple instructions to this block. + fn append(&mut self, instructions: Vec) { + self.instructions.extend(instructions); + } +} + +/// The [FunctionBuilder] allows building functions with [Block]s that can be +/// constructed out of order. Instructions can be appended to any block at any +/// time. Once building is complete, call [FunctionBuilder::consolidate] to +/// merge all blocks into the final instruction stream with resolved label +/// offsets. #[derive(Debug)] pub struct FunctionBuilder { /// The ABI of the function, this is used to generate /// the correct instructions for the function, to read the /// arguments and return values correctly. - pub abi: FnAbiId, + pub instance: InstanceId, - /// The body of the function. All instructions that make up the function - /// are stored within the body. However, labels are stored separately to - /// allow for easier management of jumps and branches. - pub body: IndexVec, + /// The basic blocks of the function. Each block has its own instruction + /// buffer, allowing out-of-order construction where instructions can be + /// appended to any block at any time. + blocks: IndexVec, - /// The labels within the function body, these are used to - /// manage jumps and branches. The labels store the literal index - /// within the function body where the label is located. This is essentially - /// a mapping from instruction labels to their offsets: + /// The current active block. Instructions emitted via `emit()` or + /// `append()` without specifying a block will go to this block. + current_block: Option, + + /// Whether the function has been consolidated. Once consolidated, the + /// function is immutable and blocks can no longer be modified. + consolidated: bool, + + /// The consolidated body of the function after `consolidate()` is called. + /// This contains all instructions from all blocks in order. + body: IndexVec, + + /// The final label positions after consolidation. This maps each block's + /// label to its actual instruction offset in the consolidated body: /// - /// 0 -=-> LabelOffset(0) - /// | - /// \ Instruction 0 - /// Instruction 1 - /// ... - /// 1---> LabelOffset(5): - /// | - /// \ Instruction 5 - /// ... - pub labels: IndexVec, + /// Block 0 (LabelOffset(0)) -> Instruction offset 0 + /// Block 1 (LabelOffset(1)) -> Instruction offset 5 + /// Block 2 (LabelOffset(2)) -> Instruction offset 12 + /// ... + labels: IndexVec, } impl FunctionBuilder { - /// Create a new [FunctionBuilder] with the given ABI. - pub fn new(abi: FnAbiId) -> Self { - Self { abi, body: IndexVec::new(), labels: IndexVec::new() } + /// Create a new [FunctionBuilder] with the given instance. + pub fn new(instance: InstanceId) -> Self { + Self { + instance, + blocks: IndexVec::new(), + current_block: None, + consolidated: false, + body: IndexVec::new(), + labels: IndexVec::new(), + } + } + + /// Reserve a new basic block and return its label. + /// The block starts empty and instructions can be added to it later. + pub fn reserve_block(&mut self) -> LabelOffset { + assert!(!self.consolidated, "cannot reserve blocks after consolidation"); + let label = self.blocks.push(Block::new()); + + // If this is the first block, make it current + if self.current_block.is_none() { + self.current_block = Some(label); + } + + label + } + + /// Switch the current active block to the specified block. + /// Subsequent calls to `emit()` or `append()` will add instructions to this + /// block. + pub fn switch_to_block(&mut self, block: LabelOffset) { + assert!(!self.consolidated, "cannot switch blocks after consolidation"); + assert!(self.blocks.get(block).is_some(), "block {:?} does not exist", block); + self.current_block = Some(block); + } + + /// Get the currently active block, if any. + pub fn current_block(&self) -> Option { + self.current_block } - /// Add an instruction to the function body. + /// Emit a single instruction to the current active block. + /// + /// # Panics + /// Panics if no block is currently active or if the function has been + /// consolidated. pub fn emit(&mut self, instruction: Instruction) { - self.body.push(instruction); + assert!(!self.consolidated, "cannot emit instructions after consolidation"); + let block = self.current_block.expect("no active block"); + self.blocks[block].emit(instruction); } - /// Append multiple instructions to the function body. + /// Append multiple instructions to the current active block. + /// + /// # Panics + /// Panics if no block is currently active or if the function has been + /// consolidated. pub fn append(&mut self, instructions: Vec) { - self.body.extend(instructions); + assert!(!self.consolidated, "cannot append instructions after consolidation"); + let block = self.current_block.expect("no active block"); + self.blocks[block].append(instructions); + } + + /// Emit a single instruction to a specific block. + pub fn emit_to_block(&mut self, block: LabelOffset, instruction: Instruction) { + assert!(!self.consolidated, "cannot emit instructions after consolidation"); + assert!(self.blocks.get(block).is_some(), "block {:?} does not exist", block); + self.blocks[block].emit(instruction); + } + + /// Append multiple instructions to a specific block. + pub fn append_to_block(&mut self, block: LabelOffset, instructions: Vec) { + assert!(!self.consolidated, "cannot append instructions after consolidation"); + assert!(self.blocks.get(block).is_some(), "block {:?} does not exist", block); + self.blocks[block].append(instructions); + } + + /// Consolidate all blocks into the final instruction stream. + /// This resolves all label offsets to their actual positions in the + /// instruction stream. + /// + /// After consolidation, the function becomes immutable and no more + /// instructions can be added. + /// + /// # Panics + /// Panics if the function has already been consolidated. + pub fn consolidate(&mut self) { + assert!(!self.consolidated, "function has already been consolidated"); + + self.body = IndexVec::new(); + self.labels = IndexVec::new(); + + // Iterate through all blocks in order and merge their instructions + for (_block_label, block) in self.blocks.iter_enumerated() { + // Record the current offset as the label position for this block + let offset = LabelOffset::new(self.body.len()); + self.labels.push(offset); + + // Append all instructions from this block + self.body.extend(block.instructions.iter().copied()); + } + + self.consolidated = true; } - /// Append a new block with its own label to the function body. - pub fn append_block(&mut self, instructions: Vec) { - let label = LabelOffset::new(self.body.len()); - self.body.extend(instructions); - self.labels.push(label); + /// Check if the function has been consolidated. + pub fn is_consolidated(&self) -> bool { + self.consolidated + } + + /// Get the number of blocks in the function. + pub fn block_count(&self) -> usize { + self.blocks.len() + } + + /// Get a reference to the consolidated instruction body. + /// + /// # Panics + /// Panics if the function has not been consolidated yet. + pub fn body(&self) -> &IndexVec { + assert!(self.consolidated, "function must be consolidated before accessing body"); + &self.body + } + + /// Get a reference to the label offset mappings. + /// + /// # Panics + /// Panics if the function has not been consolidated yet. + pub fn label_offsets(&self) -> &IndexVec { + assert!(self.consolidated, "function must be consolidated before accessing labels"); + &self.labels } } @@ -68,3 +211,85 @@ impl FunctionBody for FunctionBuilder { &self.body } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::bytecode::{Instruction, register::Register}; + + // Helper to create a dummy InstanceId for testing + fn dummy_instance() -> InstanceId { + unsafe { std::mem::zeroed() } + } + + #[test] + fn test_out_of_order_block_building() { + let instance = dummy_instance(); + let mut builder = FunctionBuilder::new(instance); + + // Create three blocks + let block0 = builder.reserve_block(); + let block1 = builder.reserve_block(); + let block2 = builder.reserve_block(); + + // Add instructions out of order + builder.switch_to_block(block1); + builder.emit(Instruction::Add32 { l1: Register::new(1), l2: Register::new(2) }); + + builder.switch_to_block(block0); + builder.emit(Instruction::Add64 { l1: Register::new(0), l2: Register::new(1) }); + + builder.switch_to_block(block2); + builder.emit(Instruction::Sub32 { l1: Register::new(3), l2: Register::new(4) }); + + // Add more instructions to block0 after working on other blocks + builder.switch_to_block(block0); + builder.emit(Instruction::Mul32 { l1: Register::new(5), l2: Register::new(6) }); + + // Consolidate + builder.consolidate(); + + // Verify the final instruction order + let body = builder.body(); + assert_eq!(body.len(), 4); + + // Block 0 should have 2 instructions at offset 0 + assert_eq!(builder.label_offsets()[block0].get(), 0); + + // Block 1 should have 1 instruction at offset 2 + assert_eq!(builder.label_offsets()[block1].get(), 2); + + // Block 2 should have 1 instruction at offset 3 + assert_eq!(builder.label_offsets()[block2].get(), 3); + } + + #[test] + fn test_append_to_specific_block() { + let instance = dummy_instance(); + let mut builder = FunctionBuilder::new(instance); + + let block0 = builder.reserve_block(); + let block1 = builder.reserve_block(); + + // Use append_to_block instead of switching + builder.append_to_block( + block1, + vec![ + Instruction::Add32 { l1: Register::new(1), l2: Register::new(2) }, + Instruction::Sub32 { l1: Register::new(3), l2: Register::new(4) }, + ], + ); + + builder.append_to_block( + block0, + vec![Instruction::Add64 { l1: Register::new(0), l2: Register::new(1) }], + ); + + builder.consolidate(); + + let body = builder.body(); + assert_eq!(body.len(), 3); + assert_eq!(builder.label_offsets()[block0].get(), 0); + assert_eq!(builder.label_offsets()[block1].get(), 1); + } +} diff --git a/compiler/hash-vm/src/builder/instruction.rs b/compiler/hash-vm/src/builder/instruction.rs index adb2c98ca..62c8f7b90 100644 --- a/compiler/hash-vm/src/builder/instruction.rs +++ b/compiler/hash-vm/src/builder/instruction.rs @@ -56,6 +56,11 @@ macro_rules! __parse_operand { ([$lit:literal]) => { $crate::bytecode::register::Register::new($lit) }; + + // Parse the Stack Pointer register + (SP) => { + $crate::bytecode::register::Register::STACK_POINTER + }; } /// Helper macro to parse operand values (for Operand enum). @@ -222,6 +227,13 @@ macro_rules! __inst_impl { }); $crate::__inst_impl!($vec; $($rest)*); }; + ($vec:ident; add64 $r1:tt, r[$r2:expr]; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Add64 { + l1: $crate::__parse_operand!($r1), + l2: $crate::__parse_operand!(r [$r2]) + }); + $crate::__inst_impl!($vec; $($rest)*); + }; ($vec:ident; add64 $r1:tt, $r2:tt; $($rest:tt)*) => { $vec.push($crate::bytecode::Instruction::Add64 { l1: $crate::__parse_operand!($r1), @@ -677,28 +689,58 @@ macro_rules! __inst_impl { // Write operations with immediate values ($vec:ident; write8 $r1:tt, # [$val:expr]; $($rest:tt)*) => { $vec.push($crate::bytecode::Instruction::Write8 { - reg: $crate::__parse_operand!($r1), + op: $crate::__parse_operand_value!($r1), value: $val as u8 }); $crate::__inst_impl!($vec; $($rest)*); }; ($vec:ident; write16 $r1:tt, # [$val:expr]; $($rest:tt)*) => { $vec.push($crate::bytecode::Instruction::Write16 { - reg: $crate::__parse_operand!($r1), + op: $crate::__parse_operand_value!($r1), value: $val as u16 }); $crate::__inst_impl!($vec; $($rest)*); }; ($vec:ident; write32 $r1:tt, # [$val:expr]; $($rest:tt)*) => { $vec.push($crate::bytecode::Instruction::Write32 { - reg: $crate::__parse_operand!($r1), + op: $crate::__parse_operand_value!($r1), value: $val as u32 }); $crate::__inst_impl!($vec; $($rest)*); }; ($vec:ident; write64 $r1:tt, # [$val:expr]; $($rest:tt)*) => { $vec.push($crate::bytecode::Instruction::Write64 { - reg: $crate::__parse_operand!($r1), + op: $crate::__parse_operand_value!($r1), + value: $val as u64 + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + + // Write operations with immediate address and value + ($vec:ident; write8 # [$addr:expr], # [$val:expr]; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Write8 { + op: $crate::__parse_operand_value!(# [$addr]), + value: $val as u8 + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; write16 # [$addr:expr], # [$val:expr]; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Write16 { + op: $crate::__parse_operand_value!(# [$addr]), + value: $val as u16 + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; write32 # [$addr:expr], # [$val:expr]; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Write32 { + op: $crate::__parse_operand_value!(# [$addr]), + value: $val as u32 + }); + $crate::__inst_impl!($vec; $($rest)*); + }; + ($vec:ident; write64 # [$addr:expr], # [$val:expr]; $($rest:tt)*) => { + $vec.push($crate::bytecode::Instruction::Write64 { + op: $crate::__parse_operand_value!(# [$addr]), value: $val as u64 }); $crate::__inst_impl!($vec; $($rest)*); @@ -706,11 +748,11 @@ macro_rules! __inst_impl { // Control flow operations ($vec:ident; call r [$r1:expr]; $($rest:tt)*) => { - $vec.push($crate::bytecode::Instruction::Call { func: $crate::__parse_operand!(r [$r1]) }); + $vec.push($crate::bytecode::Instruction::Call { func: $crate::__parse_operand_value!(r [$r1]) }); $crate::__inst_impl!($vec; $($rest)*); }; ($vec:ident; call [$r1:literal]; $($rest:tt)*) => { - $vec.push($crate::bytecode::Instruction::Call { func: $crate::__parse_operand!([$r1]) }); + $vec.push($crate::bytecode::Instruction::Call { func: $crate::__parse_operand_value!([$r1]) }); $crate::__inst_impl!($vec; $($rest)*); }; @@ -1070,15 +1112,15 @@ mod tests { assert_eq!(instructions.len(), 4); - if let Instruction::Write64 { reg, value } = instructions[0] { - assert_eq!(reg, Register::new(50)); + if let Instruction::Write64 { op, value } = instructions[0] { + assert_eq!(op, Operand::Register(Register::new(50))); assert_eq!(value, 1234); } else { panic!("Expected Write64 instruction"); } - if let Instruction::Write32 { reg, value } = instructions[1] { - assert_eq!(reg, Register::new(51)); + if let Instruction::Write32 { op, value } = instructions[1] { + assert_eq!(op, Operand::Register(Register::new(51))); assert_eq!(value, 42); } else { panic!("Expected Write32 instruction"); diff --git a/compiler/hash-vm/src/builder/mod.rs b/compiler/hash-vm/src/builder/mod.rs index ea83738a3..3052f9a5b 100644 --- a/compiler/hash-vm/src/builder/mod.rs +++ b/compiler/hash-vm/src/builder/mod.rs @@ -5,15 +5,18 @@ mod func; mod instruction; +mod resolution; -use std::collections::HashMap; +use std::{ + cell::{Cell, RefCell}, + collections::HashMap, +}; use hash_abi::FnAbiId; +use hash_repr::ty::InstanceId; -use crate::{ - builder::func::FunctionBuilder, - bytecode::{Instruction, op::Operand}, -}; +pub use self::func::FunctionBuilder; +use crate::bytecode::Instruction; #[derive(Debug)] pub struct FunctionCtx { @@ -32,56 +35,37 @@ pub struct BytecodeBuilder { /// functions and their instructions. pub instructions: Vec, + /// Current function that is being built. + /// + /// N.B. We would need to store this somewhere differently if we wanted to + /// make this thread-safe. + current_function: Cell>, + /// The function context store, this is used to store the function contexts. - function_ctxs: HashMap, + function_ctxs: RefCell>, } impl BytecodeBuilder { + /// Create a new [BytecodeBuilder]. pub fn new() -> Self { - Self { instructions: Vec::new(), function_ctxs: HashMap::new() } - } - - pub fn absorb(&mut self, func: &FunctionBuilder) -> usize { - let FunctionBuilder { body, labels, .. } = func; - let offset = self.instructions.len(); - - // Reserve space for the function body instructions. - self.instructions.reserve(body.len()); - - // We need to resolve all of the labels within the function body, i.e. they - // should now use the "global" offsets within the entire bytecode - // program, rather than the relative offsets within the function body. - for mut instruction in body.into_iter().copied() { - match &mut instruction { - Instruction::Jmp { location, .. } - | Instruction::JmpPos { location, .. } - | Instruction::JmpNeg { location, .. } - | Instruction::JmpZero { location, .. } => { - if let Operand::Label(label) = *location { - // Resolve the label offset to the global instruction offset - let function_label = labels[label].get(); - let global_offset = function_label + offset; - *location = Operand::Immediate(global_offset); - } - } - _ => {} - } - - self.instructions.push(instruction); + Self { + instructions: Vec::new(), + function_ctxs: RefCell::new(HashMap::new()), + current_function: Cell::new(None), } - - offset } - pub fn add_function(&mut self, fn_builder: FunctionBuilder) { - // Absorb all of the function instructions into the bytecode builder. - let start = self.absorb(&fn_builder); - - let FunctionBuilder { abi, .. } = fn_builder; - let ctx = FunctionCtx { abi, offset: start }; - self.function_ctxs.insert(abi, ctx); + /// Add a new function to the bytecode builder. + /// + /// This will also configure the builder to use the newly added function + /// as the current function. + pub fn new_function(&self, fn_builder: FunctionBuilder) { + let FunctionBuilder { instance, .. } = fn_builder; + self.current_function.set(Some(instance)); + self.function_ctxs.borrow_mut().insert(instance, fn_builder); } + /// Add a single instruction to the bytecode builder. pub fn add_instruction(&mut self, instruction: Instruction) { self.instructions.push(instruction); } @@ -106,7 +90,38 @@ impl BytecodeBuilder { self.instructions.extend(instructions); } - pub fn build(self) -> Vec { - self.instructions + /// Get a function builder by its ABI. + pub fn with_fn_builder(&self, instance: InstanceId, f: F) + where + F: FnOnce(&FunctionBuilder), + { + let ctx = self.function_ctxs.borrow(); + let fn_builder = ctx.get(&instance).unwrap(); + f(fn_builder) + } + + pub fn with_fn_builder_mut(&self, instance: InstanceId, f: F) -> T + where + F: FnOnce(&mut FunctionBuilder) -> T, + { + let mut function_ctxs = self.function_ctxs.borrow_mut(); + let fn_builder = function_ctxs.get_mut(&instance).unwrap(); + f(fn_builder) + } + + /// Call a closure with the current function builder. + /// + /// This is useful for modifying the current function builder + /// without having to pass around the instance ID. + /// + /// ##Note: This assumes that there is a current function set. + pub fn with_current_mut(&self, f: F) -> T + where + F: FnOnce(&mut FunctionBuilder) -> T, + { + let instance = self.current_function.get().expect("there must be a current function"); + let mut function_ctxs = self.function_ctxs.borrow_mut(); + let fn_builder = function_ctxs.get_mut(&instance).unwrap(); + f(fn_builder) } } diff --git a/compiler/hash-vm/src/builder/resolution.rs b/compiler/hash-vm/src/builder/resolution.rs new file mode 100644 index 000000000..eb96d8213 --- /dev/null +++ b/compiler/hash-vm/src/builder/resolution.rs @@ -0,0 +1,43 @@ +use crate::{ + builder::{BytecodeBuilder, FunctionBuilder}, + bytecode::{Instruction, Operand}, +}; + +impl BytecodeBuilder { + pub fn absorb(&mut self, func: &FunctionBuilder) -> usize { + let body = func.body(); + let labels = func.label_offsets(); + let offset = self.instructions.len(); + + // Reserve space for the function body instructions. + self.instructions.reserve(body.len()); + + // We need to resolve all of the labels within the function body, i.e. they + // should now use the "global" offsets within the entire bytecode + // program, rather than the relative offsets within the function body. + for mut instruction in body.into_iter().copied() { + match &mut instruction { + Instruction::Jmp { location, .. } + | Instruction::JmpPos { location, .. } + | Instruction::JmpNeg { location, .. } + | Instruction::JmpZero { location, .. } => { + if let Operand::Label(label) = *location { + // Resolve the label offset to the global instruction offset + let function_label = labels[label].get(); + let global_offset = function_label + offset; + *location = Operand::Immediate(global_offset); + } + } + _ => {} + } + + self.instructions.push(instruction); + } + + offset + } + + pub fn build(self) -> Vec { + self.instructions + } +} diff --git a/compiler/hash-vm/src/bytecode/instruction.rs b/compiler/hash-vm/src/bytecode/instruction.rs index 63dc8eea9..0dd2925fa 100644 --- a/compiler/hash-vm/src/bytecode/instruction.rs +++ b/compiler/hash-vm/src/bytecode/instruction.rs @@ -357,27 +357,27 @@ pub enum Instruction { }, /// Write an 8bit literal value to a memory address. Write8 { - reg: Register, + op: Operand, value: u8, }, /// Write a 16bit literal value to a memory address. Write16 { - reg: Register, + op: Operand, value: u16, }, /// Write a 32bit literal value to a memory address. Write32 { - reg: Register, + op: Operand, value: u32, }, /// Write a 64bit literal value to a memory address. Write64 { - reg: Register, + op: Operand, value: u64, }, /// Call a function at a given address Call { - func: Register, + func: Operand, }, /// Copy a value from source register to destination register. Mov { @@ -495,10 +495,10 @@ impl fmt::Display for Instruction { Instruction::Shr16 { l1, l2 } => write!(f, "shr16 {}, {}", l1, l2), Instruction::Shr32 { l1, l2 } => write!(f, "shr32 {}, {}", l1, l2), Instruction::Shr64 { l1, l2 } => write!(f, "shr64 {}, {}", l1, l2), - Instruction::Write8 { reg, value } => write!(f, "write8 {}, {}", reg, value), - Instruction::Write16 { reg, value } => write!(f, "write16 {}, {}", reg, value), - Instruction::Write32 { reg, value } => write!(f, "write32 {}, {}", reg, value), - Instruction::Write64 { reg, value } => write!(f, "write64 {}, {}", reg, value), + Instruction::Write8 { op, value } => write!(f, "write8 {}, {}", op, value), + Instruction::Write16 { op, value } => write!(f, "write16 {}, {}", op, value), + Instruction::Write32 { op, value } => write!(f, "write32 {}, {}", op, value), + Instruction::Write64 { op, value } => write!(f, "write64 {}, {}", op, value), Instruction::Call { func } => write!(f, "call {}", func), Instruction::Mov { src, dest } => write!(f, "mov {}, {}", src, dest), Instruction::Syscall { id } => write!(f, "syscall {}", id), diff --git a/compiler/hash-vm/src/bytecode/op.rs b/compiler/hash-vm/src/bytecode/op.rs index 8149b6a64..984cdbc8b 100644 --- a/compiler/hash-vm/src/bytecode/op.rs +++ b/compiler/hash-vm/src/bytecode/op.rs @@ -87,6 +87,15 @@ impl Operand { matches!(self, Operand::Register(_)) } + /// Get the register if the operand is a register. + pub fn as_register(&self) -> Register { + let Operand::Register(reg) = self else { + panic!("Operand is not a register"); + }; + + *reg + } + /// Check if the operand is a label. pub fn is_label(&self) -> bool { matches!(self, Operand::Label(_)) diff --git a/compiler/hash-vm/src/bytecode/pretty.rs b/compiler/hash-vm/src/bytecode/pretty.rs index bbc22adf6..0ff016a92 100644 --- a/compiler/hash-vm/src/bytecode/pretty.rs +++ b/compiler/hash-vm/src/bytecode/pretty.rs @@ -120,16 +120,16 @@ mod tests { labels: index_vec![LabelOffset::new(0), LabelOffset::new(4), LabelOffset::new(6),], // Instructions matching the test case instructions: index_vec![ - Instruction::Write32 { reg: Register::new(0), value: 10 }, - Instruction::Write32 { reg: Register::new(1), value: 20 }, + Instruction::Write32 { op: Operand::Register(Register::new(0)), value: 10 }, + Instruction::Write32 { op: Operand::Register(Register::new(1)), value: 20 }, Instruction::Add32 { l1: Register::new(0), l2: Register::new(1) }, Instruction::JmpPos { l1: Register::new(0), location: Operand::Label(LabelOffset::new(2)) }, - Instruction::Write32 { reg: Register::new(0), value: 0 }, + Instruction::Write32 { op: Operand::Register(Register::new(0)), value: 0 }, Instruction::Return, - Instruction::Write32 { reg: Register::new(0), value: 1 }, + Instruction::Write32 { op: Operand::Register(Register::new(0)), value: 1 }, Instruction::Return, ], }; diff --git a/compiler/hash-vm/src/error.rs b/compiler/hash-vm/src/error.rs index 58a5526e7..e216f4ab6 100644 --- a/compiler/hash-vm/src/error.rs +++ b/compiler/hash-vm/src/error.rs @@ -20,7 +20,18 @@ impl fmt::Display for StackAccessKind { #[derive(Debug)] pub enum RuntimeError { - StackViolationAccess { kind: StackAccessKind, size: u8, total: usize }, + StackViolationAccess { + kind: StackAccessKind, + size: u8, + total: usize, + }, + + /// A memory access violation occurred. + MemoryAccessViolation { + addr: usize, + size: usize, + reason: String, + }, } pub type RuntimeResult = Result; @@ -36,6 +47,14 @@ impl From for Report { error_code: None, contents: vec![], }, + RuntimeError::MemoryAccessViolation { addr, size, reason } => Report { + kind: ReportKind::Error, + title: format!( + "Memory access violation occurred: tried to access memory at address {addr} with size {size}bytes. Reason: {reason}" + ), + error_code: None, + contents: vec![], + }, } } } diff --git a/compiler/hash-vm/src/heap.rs b/compiler/hash-vm/src/heap.rs deleted file mode 100644 index 4437b403d..000000000 --- a/compiler/hash-vm/src/heap.rs +++ /dev/null @@ -1,29 +0,0 @@ -//! Hash Compiler VM heap definitions. -#![allow(dead_code)] -use std::iter; - -/// The Heap of the VM. -#[derive(Debug)] -pub struct Heap { - values: Vec, -} - -pub struct Pointer(pub u64); - -impl Heap { - pub fn new() -> Self { - Heap { values: vec![] } - } - - pub fn allocate(&mut self, size: u64) -> Pointer { - let offset = self.values.len(); - - self.values.extend(iter::repeat_n(0, size.try_into().unwrap())); - - Pointer(offset.try_into().unwrap()) - } - - pub fn free(&self, _ptr: Pointer) { - todo!() - } -} diff --git a/compiler/hash-vm/src/lib.rs b/compiler/hash-vm/src/lib.rs index 40229cea2..904022efa 100644 --- a/compiler/hash-vm/src/lib.rs +++ b/compiler/hash-vm/src/lib.rs @@ -1,10 +1,8 @@ //! Hash Compiler VM crate. #![feature(if_let_guard)] -mod heap; -mod stack; - pub mod builder; pub mod bytecode; pub mod error; +pub mod memory; pub mod vm; diff --git a/compiler/hash-vm/src/memory.rs b/compiler/hash-vm/src/memory.rs new file mode 100644 index 000000000..8dc859ff9 --- /dev/null +++ b/compiler/hash-vm/src/memory.rs @@ -0,0 +1,267 @@ +//! A module that contains the implementation to the +//! virtual machine memory model. +//! +//! Essentially, this is a wrapper on top a conceptual +//! memory space that the VM can use to read and write +//! data to/from. +//! +//! This "memory" space is a global view of the entire machine +//! state, i.e. it hosts the stack, heap, and static data segments. +//! +//! That way, we can have a unified address space for the VM to +//! read and write data to/from. + +use std::iter::IntoIterator; + +use hash_utils::{ + index_vec::{IndexVec, define_index_type, index_vec}, + itertools::Itertools, + range_map::RangeMap, +}; + +use crate::error::RuntimeError; + +define_index_type! { + /// A unique identifier for a memory region. + pub struct RegionId = u32; + MAX_INDEX = i32::MAX as usize; + DISABLE_MAX_INDEX_CHECK = cfg!(not(debug_assertions)); +} + +#[derive(Debug, Clone)] +pub struct Region { + /// The unique identifier of the region. + pub id: RegionId, + + /// The name of the region, usually pre-determined + /// by the VM memory model. + pub name: String, + + /// The start address of the memory region. + pub start: usize, + + /// The size of the memory region. + pub size: usize, + + /// Flags that should be applied to the memory region. + pub flags: RegionFlags, +} + +bitflags::bitflags! { + /// Flags that can be applied to a memory region. + /// + /// Each flag implies that they have that particular + /// and all lower permissions. For example, if a region + /// has the `EXECUTE` flag, it also implies that it has + /// the `READ` and `WRITE` flags. + #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] + pub struct RegionFlags: u8 { + /// Readable region. + const READ = 1 << 0; + + /// Writable region. + const WRITE = 1 << 1 | Self::READ.bits(); + + /// Executable region. + const EXECUTE = 1 << 2 | Self::WRITE.bits(); + } +} + +/// The [MemoryBuilder] structure is used to abstract the +/// construction of a [Memory] instance. Overall, the idea +/// of the builder is that it is used to control and manage +/// individual memory regions, their sizes and properties, +/// and finally build the memory space that the VM will use. +#[derive(Debug, Default)] +pub struct MemoryBuilder { + /// The region of the map being built. + /// + /// This will be the final region map once + /// everything else has been built and finalised. + region_map: RangeMap, + + /// A list of the current regions that are being + /// constructed. + regions: IndexVec, + + /// The current offset of where to begin the next + /// region if we needed to re-size the memory. + offset: usize, +} + +impl MemoryBuilder { + /// Create a new region builder. + pub fn new() -> Self { + Self { region_map: RangeMap::new(), regions: index_vec![], offset: 0 } + } + + /// Add a new region to the region map. + pub fn add_region(&mut self, name: String, size: usize, flags: RegionFlags) -> &mut Self { + let offset = self.offset; + let region = + Region { id: RegionId::new(self.regions.len()), name, start: offset, size, flags }; + let start = offset + region.start; + let end = offset + region.start + region.size.saturating_sub(1); + + self.region_map.insert(start..=end, region.id); + self.offset = end + 1; + self.regions.push(region); + self + } + + /// Finalize the region map. + pub fn build(self) -> Memory { + Memory { + memory: vec![0; self.offset], + region_map: RangeMap::populated( + self.region_map + .into_iter() + .map(|pair| (pair.0, self.regions[pair.1].clone())) + .collect_vec(), + ), + } + } +} + +/// The default size of the stack region. +const DEFAULT_STACK_SIZE: usize = 100 * 1024; + +/// The default size of the VM memory space. +/// +/// 2MB stack by default. +const DEFAULT_SIZE: usize = 2 * 1024 * 1024; + +/// A representation of the VM memory space. +/// +/// This contains the actual memory bytes as well as +/// a region map that tracks the different memory regions +/// and their properties. +#[derive(Debug)] +pub struct Memory { + /// The memory space of the VM. + pub memory: Vec, + + /// A map of the region space that is being + /// used within the VM. + pub region_map: RangeMap, +} + +impl Memory { + pub fn check_access( + &self, + addr: usize, + size: usize, + flags: RegionFlags, + ) -> Result<(), RuntimeError> { + let entry = self.region_map.find(addr).ok_or(RuntimeError::MemoryAccessViolation { + addr, + size, + reason: "No memory region found for the given address".to_string(), + })?; + + // Check if the memory can be written into this region + if size > entry.size { + return Err(RuntimeError::MemoryAccessViolation { + addr, + size, + reason: format!( + "Access size {} exceeds region size {} for region {}", + size, entry.size, entry.name + ), + }); + } + + // Check if the region has the required flags. This accounts + // for that if `flags` have + let missing = entry.flags - flags; + if missing > flags { + return Err(RuntimeError::MemoryAccessViolation { + addr, + size, + reason: format!( + "Region {} is missing required access flags: {:?}", + entry.name, missing + ), + }); + } + + Ok(()) + } +} + +impl Default for Memory { + fn default() -> Self { + let mut builder = MemoryBuilder::new(); + builder + .add_region( + "read_only".to_string(), + 0, // We don't allocate any read-only data by default. + RegionFlags::READ, + ) + .add_region("stack".to_string(), DEFAULT_STACK_SIZE, RegionFlags::WRITE) + .add_region("heap".to_string(), DEFAULT_SIZE, RegionFlags::WRITE); + builder.build() + } +} + +pub trait HasMemoryAccess { + /// Read and write methods for the VM memory. + fn read8(&self, addr: usize) -> Result<&[u8; 1], RuntimeError>; + fn read16(&self, addr: usize) -> Result<&[u8; 2], RuntimeError>; + fn read32(&self, addr: usize) -> Result<&[u8; 4], RuntimeError>; + fn read64(&self, addr: usize) -> Result<&[u8; 8], RuntimeError>; + fn write8(&mut self, addr: usize, value: &[u8; 1]) -> Result<(), RuntimeError>; + fn write16(&mut self, addr: usize, value: &[u8; 2]) -> Result<(), RuntimeError>; + fn write32(&mut self, addr: usize, value: &[u8; 4]) -> Result<(), RuntimeError>; + fn write64(&mut self, addr: usize, value: &[u8; 8]) -> Result<(), RuntimeError>; +} + +impl HasMemoryAccess for Memory { + fn read8(&self, addr: usize) -> Result<&[u8; 1], RuntimeError> { + self.check_access(addr, 8, RegionFlags::READ)?; + let value = self.memory[addr..addr + 1].try_into().unwrap(); + Ok(value) + } + + fn read16(&self, addr: usize) -> Result<&[u8; 2], RuntimeError> { + self.check_access(addr, 16, RegionFlags::READ)?; + let value = self.memory[addr..addr + 2].try_into().unwrap(); + Ok(value) + } + + fn read32(&self, addr: usize) -> Result<&[u8; 4], RuntimeError> { + self.check_access(addr, 32, RegionFlags::READ)?; + let value = self.memory[addr..addr + 4].try_into().unwrap(); + Ok(value) + } + + fn read64(&self, addr: usize) -> Result<&[u8; 8], RuntimeError> { + self.check_access(addr, 64, RegionFlags::READ)?; + let value = self.memory[addr..addr + 8].try_into().unwrap(); + Ok(value) + } + + fn write8(&mut self, addr: usize, value: &[u8; 1]) -> Result<(), RuntimeError> { + self.check_access(addr, 8, RegionFlags::WRITE)?; + self.memory[addr..addr + 1].copy_from_slice(value); + Ok(()) + } + + fn write16(&mut self, addr: usize, value: &[u8; 2]) -> Result<(), RuntimeError> { + self.check_access(addr, 16, RegionFlags::WRITE)?; + self.memory[addr..addr + 2].copy_from_slice(value); + Ok(()) + } + + fn write32(&mut self, addr: usize, value: &[u8; 4]) -> Result<(), RuntimeError> { + self.check_access(addr, 32, RegionFlags::WRITE)?; + self.memory[addr..addr + 4].copy_from_slice(value); + Ok(()) + } + + fn write64(&mut self, addr: usize, value: &[u8; 8]) -> Result<(), RuntimeError> { + self.check_access(addr, 64, RegionFlags::WRITE)?; + self.memory[addr..addr + 8].copy_from_slice(value); + Ok(()) + } +} diff --git a/compiler/hash-vm/src/stack.rs b/compiler/hash-vm/src/stack.rs deleted file mode 100644 index 4f7494a93..000000000 --- a/compiler/hash-vm/src/stack.rs +++ /dev/null @@ -1,112 +0,0 @@ -//! Hash Compiler VM stack implementation. -use crate::error::{RuntimeError, RuntimeResult, StackAccessKind}; - -/// The [Stack] represents temporary storage for a current function scope -/// to ensure that functions can store data closely to the actual running -/// program. -#[derive(Debug)] -pub struct Stack { - /// The actual internal data of the stack. Once created, the stack size - /// cannot be modified. - data: Vec, - /// The internal representation of where the stack offset is located at. - stack_pointer: usize, -} - -impl Stack { - /// Create a new stack - pub fn new(size: usize) -> Self { - Stack { data: vec![0; size], stack_pointer: 0 } - } - - /// Method that verifies that a particular call to modify the stack storage - /// is sane and safe. - pub fn verify_access(&self, access_kind: StackAccessKind, size: u8) -> RuntimeResult<()> { - match access_kind { - StackAccessKind::Pop if self.stack_pointer > (size as usize) => Ok(()), - StackAccessKind::Push if self.data.len() - self.stack_pointer > (size as usize) => { - Ok(()) - } - _ => Err(RuntimeError::StackViolationAccess { - kind: access_kind, - size, - total: self.data.len(), - }), - } - } - - /// Pop the last byte of the stack - pub fn pop8(&mut self) -> RuntimeResult<&[u8; 1]> { - self.verify_access(StackAccessKind::Pop, 1)?; - - let value = self.data[(self.stack_pointer - 1)..self.stack_pointer].try_into().unwrap(); - self.stack_pointer -= 1; - - Ok(value) - } - - /// Pop the last two bytes of the stack - pub fn pop16(&mut self) -> RuntimeResult<&[u8; 2]> { - self.verify_access(StackAccessKind::Pop, 2)?; - - let value = self.data[(self.stack_pointer - 2)..self.stack_pointer].try_into().unwrap(); - self.stack_pointer -= 2; - - Ok(value) - } - - pub fn pop32(&mut self) -> RuntimeResult<&[u8; 4]> { - self.verify_access(StackAccessKind::Pop, 4)?; - - let value = self.data[(self.stack_pointer - 4)..self.stack_pointer].try_into().unwrap(); - self.stack_pointer -= 4; - - Ok(value) - } - - /// Pop the last eight bytes of the stack - pub fn pop64(&mut self) -> RuntimeResult<&[u8; 8]> { - self.verify_access(StackAccessKind::Pop, 8)?; - - let value = self.data[(self.stack_pointer - 8)..self.stack_pointer].try_into().unwrap(); - self.stack_pointer -= 8; - - Ok(value) - } - - /// Push the a byte onto the stack - pub fn push8(&mut self, value: &[u8; 1]) -> RuntimeResult<()> { - self.verify_access(StackAccessKind::Push, 1)?; - - self.data.splice(self.stack_pointer..(self.stack_pointer + 1), value.iter().copied()); - self.stack_pointer += 1; - Ok(()) - } - - /// Push the two bytes onto the stack - pub fn push16(&mut self, value: &[u8; 2]) -> RuntimeResult<()> { - self.verify_access(StackAccessKind::Push, 2)?; - - self.data.splice(self.stack_pointer..(self.stack_pointer + 2), value.iter().copied()); - self.stack_pointer += 2; - Ok(()) - } - - /// Push the four bytes onto the stack - pub fn push32(&mut self, value: &[u8; 4]) -> RuntimeResult<()> { - self.verify_access(StackAccessKind::Push, 4)?; - - self.data.splice(self.stack_pointer..(self.stack_pointer + 4), value.iter().copied()); - self.stack_pointer += 4; - Ok(()) - } - - /// Push the eight bytes onto the stack - pub fn push64(&mut self, value: &[u8; 8]) -> RuntimeResult<()> { - self.verify_access(StackAccessKind::Push, 8)?; - - self.data.splice(self.stack_pointer..(self.stack_pointer + 8), value.iter().copied()); - self.stack_pointer += 8; - Ok(()) - } -} diff --git a/compiler/hash-vm/src/vm.rs b/compiler/hash-vm/src/vm.rs index 189efb5a8..75ba62674 100644 --- a/compiler/hash-vm/src/vm.rs +++ b/compiler/hash-vm/src/vm.rs @@ -4,15 +4,13 @@ use std::cell::Cell; use crate::{ bytecode::{ - Instruction, + Instruction, Operand, register::{Register, RegisterSet}, }, error::RuntimeError, - stack::Stack, + memory::{HasMemoryAccess, Memory}, }; -const DEFAULT_STACK_SIZE: usize = 10_000; - /// Interpreter flags represent generated context from the current /// execution. This flags store information about the last executed /// instruction (if relevant). @@ -30,32 +28,25 @@ pub struct InterpreterFlags { /// registers, etc. #[derive(Debug)] pub struct Interpreter { - /// The Interpreter stack holds the current execution context of the - /// function. This is very similar to the way that the x86 architecture - /// handles the flag. - stack: Stack, - /// Interpreter flags represent the result of some operation that has - /// occurred - flags: InterpreterFlags, - /// A vector of [Instruction]s representing the program that it will run + /// The memory space of the VM. + memory: Memory, + + /// A vector of [Instruction]s representing the program that it will run. instructions: Vec, - /// We have 256 [Register]s available to the interpreter at any time + + /// The [Register]s available to the interpreter at any time. registers: RegisterSet, - // /// The interpreter [Heap] containing heap allocated values that are not contained on the - // stack heap: Heap, -} -impl Default for Interpreter { - fn default() -> Self { - Self::new() - } + /// Interpreter flags represent the result of some operation that has + /// occurred + flags: InterpreterFlags, } impl Interpreter { #[must_use] - pub fn new() -> Self { + pub fn new(memory: Memory) -> Self { Self { - stack: Stack::new(DEFAULT_STACK_SIZE), + memory, instructions: Vec::new(), registers: RegisterSet::default(), flags: InterpreterFlags::default(), @@ -69,6 +60,7 @@ impl Interpreter { fn run_next_instruction(&mut self) -> Result<(), RuntimeError> { let ip = self.get_instruction_pointer(); + let sp = self.get_stack_pointer(); let instruction = self.instructions.get(ip).unwrap(); match *instruction { @@ -759,48 +751,62 @@ impl Interpreter { } Instruction::Pop8 { l1 } => { // Pop the top byte on top of the stack and put it into the register - let value = self.stack.pop8()?; + let value = self.memory.read8(sp)?; self.registers.set_register_b(l1, value); + self.set_stack_pointer(sp - 1); } Instruction::Pop16 { l1 } => { // Pop the top two bytes on top of the stack and put it into the register - let value = self.stack.pop16()?; + let value = self.memory.read16(sp)?; self.registers.set_register_2b(l1, value); + self.set_stack_pointer(sp - 2); } Instruction::Pop32 { l1 } => { // Pop the top four bytes on top of the stack and put it into the register - let value = self.stack.pop32()?; + let value = self.memory.read32(sp)?; self.registers.set_register_4b(l1, value); + self.set_stack_pointer(sp - 4); } Instruction::Pop64 { l1 } => { // Pop the top four bytes on top of the stack and put it into the register - let value = self.stack.pop64()?; + let value = self.memory.read64(sp)?; self.registers.set_register_8b(l1, value); + self.set_stack_pointer(sp - 8); } Instruction::Push8 { l1 } => { let value = self.registers.get_register_b(l1); - self.stack.push8(value)?; + self.memory.write8(sp, value)?; + self.set_stack_pointer(sp + 1); } Instruction::Push16 { l1 } => { let value = self.registers.get_register_2b(l1); - self.stack.push16(value)?; + self.memory.write16(sp, value)?; + self.set_stack_pointer(sp + 2); } Instruction::Push32 { l1 } => { let value = self.registers.get_register_4b(l1); - self.stack.push32(value)?; + self.memory.write32(sp, value)?; + self.set_stack_pointer(sp + 4); } Instruction::Push64 { l1 } => { let value = self.registers.get_register_8b(l1); - self.stack.push64(value)?; + self.memory.write64(sp, value)?; + self.set_stack_pointer(sp + 8); } Instruction::Call { func } => { // Save the ip onto the stack - self.stack.push64( + self.memory.write64( + sp, &self.registers.get_register64(Register::INSTRUCTION_POINTER).to_be_bytes(), )?; + self.set_stack_pointer(self.get_stack_pointer() + 8); + // Save the bp onto the stack - self.stack - .push64(&self.registers.get_register64(Register::BASE_POINTER).to_be_bytes())?; + self.memory.write64( + sp, + &self.registers.get_register64(Register::BASE_POINTER).to_be_bytes(), + )?; + self.set_stack_pointer(sp + 8); // Set the new bp as the stack pointer self.registers.set_register64( @@ -811,7 +817,7 @@ impl Interpreter { // Jump to the function self.registers.set_register64( Register::INSTRUCTION_POINTER, - self.registers.get_register64(func), + self.registers.get_register64(func.as_register()), ); } Instruction::Return => { @@ -824,26 +830,64 @@ impl Interpreter { // Get the BP from stack and set it self.registers.set_register64( Register::BASE_POINTER, - u64::from_be_bytes(*self.stack.pop64()?), + u64::from_be_bytes(*self.memory.read64(sp)?), ); + self.set_stack_pointer(sp - 8); // Get the IP from stack and set it self.registers.set_register64( Register::INSTRUCTION_POINTER, - u64::from_be_bytes(*self.stack.pop64()?), + u64::from_be_bytes(*self.memory.read64(sp)?), ); + self.set_stack_pointer(sp - 16); // 8 for BP + 8 for IP } - Instruction::Write8 { reg, value } => { - self.registers.set_register8(reg, value); + Instruction::Write8 { op, value } => { + match op { + Operand::Register(reg) => { + self.registers.set_register8(reg, value); + } + Operand::Immediate(addr) => { + // write to the memory address in the stack. + self.memory.write8(addr, &value.to_be_bytes())?; + } + _ => unreachable!(), + } } - Instruction::Write16 { reg, value } => { - self.registers.set_register16(reg, value); + Instruction::Write16 { op, value } => { + match op { + Operand::Register(reg) => { + self.registers.set_register16(reg, value); + } + Operand::Immediate(addr) => { + // write to the memory address in the stack. + self.memory.write16(addr, &value.to_be_bytes())?; + } + _ => unreachable!(), + } } - Instruction::Write32 { reg, value } => { - self.registers.set_register32(reg, value); + Instruction::Write32 { op, value } => { + match op { + Operand::Register(reg) => { + self.registers.set_register32(reg, value); + } + Operand::Immediate(addr) => { + // write to the memory address in the stack. + self.memory.write32(addr, &value.to_be_bytes())?; + } + _ => unreachable!(), + } } - Instruction::Write64 { reg, value } => { - self.registers.set_register64(reg, value); + Instruction::Write64 { op, value } => { + match op { + Operand::Register(reg) => { + self.registers.set_register64(reg, value); + } + Operand::Immediate(addr) => { + // write to the memory address in the stack. + self.memory.write64(addr, &value.to_be_bytes())?; + } + _ => unreachable!(), + } } Instruction::Syscall { .. } => todo!(), }; @@ -851,6 +895,16 @@ impl Interpreter { Ok(()) } + /// Get the current stack pointer of the VM. + pub fn get_stack_pointer(&self) -> usize { + self.registers.get_register64(Register::STACK_POINTER).try_into().unwrap() + } + + /// Sets the current stack pointer of the VM. + pub fn set_stack_pointer(&mut self, value: usize) { + self.registers.set_register64(Register::STACK_POINTER, value.try_into().unwrap()); + } + /// Gets the current instruction pointer of the VM. pub fn get_instruction_pointer(&self) -> usize { self.registers.get_register64(Register::INSTRUCTION_POINTER).try_into().unwrap() diff --git a/compiler/hash-vm/tests/vm.rs b/compiler/hash-vm/tests/vm.rs index 3b140afc1..1be947ac2 100644 --- a/compiler/hash-vm/tests/vm.rs +++ b/compiler/hash-vm/tests/vm.rs @@ -1,9 +1,12 @@ //! Hash Compiler VM tests. -use hash_vm::{builder::BytecodeBuilder, bytecode::register::Register, inst, r, vm::Interpreter}; +use hash_vm::{ + builder::BytecodeBuilder, bytecode::register::Register, inst, memory::Memory, r, + vm::Interpreter, +}; #[test] fn push_two_and_add() { - let mut builder = BytecodeBuilder::default(); + let mut builder = BytecodeBuilder::new(); let r0 = r!(0); builder.append(inst! { @@ -12,7 +15,7 @@ fn push_two_and_add() { add16 [0], [1]; }); - let mut vm = Interpreter::new(); + let mut vm = Interpreter::new(Memory::default()); // @@Todo: this is definitely not correct, as we'd // still need to ensure that we've got all of the right